32 lines
1 KiB
Python
32 lines
1 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# Define your item pipelines here
|
|
#
|
|
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
|
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
|
|
import scrapy
|
|
from scrapy.contrib.pipeline.media import MediaPipeline
|
|
from scrapy.exceptions import DropItem
|
|
from fmfridays.settings import TUMBLR
|
|
import os
|
|
import re
|
|
|
|
class MP3DownloadPipeline(MediaPipeline):
|
|
def get_media_requests(self, item, info):
|
|
if not os.path.exists(self.path_from_item(item)):
|
|
yield scrapy.Request(item['url'])
|
|
|
|
def path_from_item(self, item):
|
|
return os.path.join('/Users/jpenner/Music/downloads/', TUMBLR, re.sub(r'[^-_!\(\),\'& a-zA-Z0-9]', '_', item['title']) + '.mp3')
|
|
|
|
def item_completed(self, results, item, info):
|
|
for ok, response in results:
|
|
if ok:
|
|
path = self.path_from_item(item)
|
|
if not os.path.exists(os.path.dirname(path)):
|
|
os.makedirs(os.path.dirname(path))
|
|
with open(path, 'wb') as f:
|
|
f.write(response.body)
|
|
item['path'] = path
|
|
return item
|