tumblr-music-scraper/fmfridays/pipelines.py

32 lines
1 KiB
Python
Raw Normal View History

2015-05-06 14:16:39 +00:00
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
import scrapy
from scrapy.contrib.pipeline.media import MediaPipeline
from scrapy.exceptions import DropItem
from fmfridays.settings import TUMBLR
import os
import re
class MP3DownloadPipeline(MediaPipeline):
def get_media_requests(self, item, info):
if not os.path.exists(self.path_from_item(item)):
yield scrapy.Request(item['url'])
def path_from_item(self, item):
return os.path.join('/Users/jpenner/Music/downloads/', TUMBLR, re.sub(r'[^-_!\(\),\'& a-zA-Z0-9]', '_', item['title']) + '.mp3')
def item_completed(self, results, item, info):
for ok, response in results:
if ok:
path = self.path_from_item(item)
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
with open(path, 'wb') as f:
f.write(response.body)
item['path'] = path
return item