# -*- coding: utf-8 -*- # Define your item pipelines here # # Don't forget to add your pipeline to the ITEM_PIPELINES setting # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html import scrapy from scrapy.contrib.pipeline.media import MediaPipeline from scrapy.exceptions import DropItem from fmfridays.settings import TUMBLR import os import re class MP3DownloadPipeline(MediaPipeline): def get_media_requests(self, item, info): if not os.path.exists(self.path_from_item(item)): yield scrapy.Request(item['url']) def path_from_item(self, item): return os.path.join('/Users/jpenner/Music/downloads/', TUMBLR, re.sub(r'[^-_!\(\),\'& a-zA-Z0-9]', '_', item['title']) + '.mp3') def item_completed(self, results, item, info): for ok, response in results: if ok: path = self.path_from_item(item) if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) with open(path, 'wb') as f: f.write(response.body) item['path'] = path return item