#!/usr/bin/env python import internetarchive import json import sys import os from glob import glob as pyglob import sh import re import requests from urllib import urlretrieve from urlparse import urljoin import unicodedata from flask import Flask, render_template, request, make_response, send_from_directory import requests from bs4 import BeautifulSoup from binascii import a2b_base64 from ConfigParser import SafeConfigParser VALID_OPTIONS = set(['user', 'dbdir', 'db', 'dosbox_install', 'default_collection']) def load_config(): config = SafeConfigParser() files = [os.path.expanduser('~/.config/sw.ini')] path = os.getcwd() while path != '/': files.insert(1, os.path.join(path, 'sw.ini')) path = os.path.dirname(path) config.read(files) for k, v in config.items('config'): if k.lower() in VALID_OPTIONS: globals()[k.upper()] = v load_config() if 'DB' not in globals(): DB=os.path.join(DBDIR, 'ia.json') # DEFAULT_COLLECTION = 'softwarelibrary_msdos_shareware' # DEFAULT_COLLECTION = 'open_source_software' # DEFAULT_COLLECTION = 'softwarelibrary_win3_shareware' # DEFAULT_COLLECTION = 'glorious_trainwrecks' def genid(name, spec): # return 'msdos_' + name + '_shareware' # return 'actionpoint_' + name # return 'win3_' + name # + '_knp' return 'gtrwx_' + name def gen_metadata(spec, orig_file): return def fixklik(spec): klikfiles = {'exe': None, 'gam': None, 'img': None} for line in sh.unzip('-l', spec['upload']).splitlines(): match = re.match(r"\s*\d+\s+\S+\s\S+\s+(.+)", line) file = match.group(1) if match else '' for ext in klikfiles.iterkeys(): if file.lower().endswith('.' + ext): if klikfiles[ext]: return klikfiles[ext] = file for fn in klikfiles.itervalues(): if not fn or not klikfiles['exe'] or fn[:-3] != klikfiles['exe'][:-3]: return exe = klikfiles['exe'] if '/' in exe: unzipdir = os.path.join(DBDIR, 'tmp') if os.path.exists(unzipdir): sh.rm('-r', unzipdir) sh.mkdir('-p', unzipdir) dirOld = os.getcwd() zipfile = os.path.abspath(spec['upload']) os.chdir(unzipdir) sh.unzip('-q', zipfile) print exe, exe[:exe.rfind('/')], zipfile zipdir(exe[:exe.rfind('/')], zipfile) exe = exe[exe.rfind('/') + 1:] spec['upload'] = zipfile os.chdir(dirOld) spec['emulator_start'] = 'd:\\runapp ' + exe spec['dosbox_drive_d'] = 'emularity_win31/win31.zip' def gen_metadata_json(spec, orig_file): spec['subject'] = 'klik & play;Geocities' print orig_file, os.path.split(orig_file)[1] try: gamesjson_path = os.path.join(os.path.split(orig_file)[0], "games.json") print gamesjson_path with open(gamesjson_path, 'rt') as f: meta = json.load(f) print meta if os.path.split(orig_file)[1] in meta: vals = meta[os.path.split(orig_file)[1]] if len(vals) >= 1: spec['title'] = vals[0] if len(vals) >= 2: spec['description'] = vals[1] if len(vals) >= 3: spec['creator'] = vals[2] site = None if '_AUTHOR' in meta: if 'creator' not in spec: spec['creator'] = meta['_AUTHOR'] site = meta['_AUTHOR'] if '_SITE' in meta: site = meta['_SITE'] if '_URL' in meta: link = '' if site: link += site else: link += 'this Geocities site' link += '' spec['description'] = (spec.get('description', '') + """ (Retrieved from """ + link + """.)""") except: pass SPEC_DEFAULTS = { 'emulator': 'dosbox', 'emulator_ext': 'zip', 'collection': DEFAULT_COLLECTION, 'mediatype': 'software', 'dosbox_drive_d': 'emularity_win31/win31.zip' } def genid_piratekart(name, spec): spec['creator'] = 'ShaperMC' default_tags = ["glorious trainwrecks", "klik & play", "The 100-in-1 Klik & Play Pirate Kart"] def matchheader(start, line): match = re.match(start + ".*: *(.*)", line) if match: return match.group(1) gametxt = patchstr(sh.unzip('-p', '-C', spec['upload'], 'game.txt', _encoding='cp437')) for line in gametxt.split('\n'): if matchheader("D", line): spec['description'] = matchheader("D", line).strip() elif matchheader("N", line): spec['title'] = matchheader("N", line).strip() elif matchheader("(?:G|Tag)", line): tags = [tag.strip() for tag in matchheader("(?:G|Tag)", line).split(',')] print tags tags = tags + default_tags spec['subject'] = ";".join(tags) for tag in tags: if tag.startswith("kotmk"): name = tag + "_" + name break def loaddb(): with open(DB, 'rt') as f: return json.load(f) def savedb(db): with open(DB, 'wt') as f: json.dump(db, f) def scan(db): """Finds everything I've uploaded and adds it to the DB.""" for result in internetarchive.search.Search('uploader:(jeremy@sporktania.com)', ['identifier', 'title', 'collection']): id = result['identifier'] if id not in db: db[id] = {'status': 'unknown'} db[id]['title'] = result['title'] db[id]['collection'] = result['collection'] def diff(db, newdb_filename): """Returns a db with all of the items in newdb that don't exist in db.""" with open(newdb_filename, 'rt') as f: newdb = json.load(f) diffdb = {} for id, val in newdb.iteritems(): if db.get(id) != val: diffdb[id] = val return diffdb def check(db, collection=None, ignore=set(['fine', 'dark'])): """Prints items and their status, for reporting non-working items to Jason.""" status = {'reshoot': []} for id, val in db.iteritems(): if collection != None and collection not in val.get('collection', []): continue if val['status'] not in status: status[val['status']] = [] if val.get('reshoot'): status['reshoot'].append(id) status[val['status']].append(id) for s, ids in status.iteritems(): if s not in ignore: print "\n" print s + ":" for id in ids: print "http://archive.org/details/" + id def setstatus(db, id, status, title=None): if id not in db: db[id] = {} db[id]['status'] = status if title: db[id]['title'] = title savedb(db) def patchstr(s): """Remove line-drawing (non-ascii) characters from a string""" return "".join(ch for ch in unicode(s) if unicodedata.category(ch)[0]!="C" or ch == '\n') def safefilename(s): return re.sub('[^a-zA-Z0-9_-]', '_', s) def fix(spec): """Tries to generate an ID and description""" print "fix", spec if 'upload' in spec: fullname, ext = os.path.splitext(spec['upload']) spec['upload'] = fullname + ext.lower() name = safefilename(os.path.basename(fullname)) print name if 'id' not in spec: spec['id'] = genid(name, spec) if 'description' not in spec: try: spec['description'] = patchstr(sh.unzip('-p', '-C', spec['upload'], 'FILE_ID.DIZ', _encoding='cp437')) except sh.ErrorReturnCode: pass def defprompt(name): def decorator(func): func.name = name return func return decorator def mvprompt(name): @defprompt(name) def mv(spec): destpath = os.path.join(os.path.dirname(spec['upload']), name) sh.mkdir('-p', destpath) sh.mv(spec['upload'], destpath) return mv def dbinst(zip): sh.rm('-r', DOSBOX_INSTALL) sh.mkdir('-p', os.path.join(DOSBOX_INSTALL, 'INSTALL')) sh.cp(zip, DOSBOX_INSTALL) dirOld = os.getcwd() os.chdir(os.path.join(DOSBOX_INSTALL, 'INSTALL')) sh.unzip('-q', '../' + os.path.basename(zip)) sh.open('../..') os.chdir(dirOld) def dbinstexe(exe): sh.rm('-r', DOSBOX_INSTALL) sh.mkdir('-p', os.path.join(DOSBOX_INSTALL, 'INSTALL')) sh.cp(exe, os.path.join(DOSBOX_INSTALL, 'INSTALL')) sh.open(os.path.join(DOSBOX_INSTALL, '..')) @defprompt('pkginst') def pkginstprompt(spec): if pkginst(spec): return "sync" if prompt(spec) else "ignore" return "ignore" @defprompt('dbinst') def dbinstprompt(spec): dbinst(spec['upload']) x = raw_input("Hit Enter when done (x to cancel): ") if x == 'x': return 'ignore' return pkginstprompt(spec) @defprompt('skip') def skipprompt(spec): pass @defprompt('metadata') def sync_only_metadata(spec): del spec['upload'] sync(spec, ensure_new=False) @defprompt('win31') def win31prompt(spec): spec['dosbox_drive_d'] = 'emularity_win31/win31.zip' return 'ignore' @defprompt('dos') def dosprompt(spec): spec['dosbox_drive_d'] = None return 'ignore' @defprompt('maxcpu') def maxcpuprompt(spec): unzipdir = os.path.join(DBDIR, 'tmp') if os.path.exists(unzipdir): sh.rm('-r', unzipdir) sh.mkdir('-p', unzipdir) dirOld = os.getcwd() zipfile = os.path.abspath(spec['upload']) os.chdir(unzipdir) sh.unzip('-q', zipfile) sh.cp(os.path.join(DBDIR, 'maxcpu.dosbox.conf'), 'dosbox.conf') zipdir('.', zipfile) os.chdir(dirOld) return 'ignore' def markprompt(db, status): @defprompt(status) def mark(spec): setstatus(db, spec['id'], status, spec.get('title')) return mark def run_prompts(input, spec, prompts): while True: if prompts: for k, v in prompts.iteritems(): print k + '=' + v.name, print '' result = raw_input(input) if result in prompts: result = prompts[result](spec) if result != "ignore": return (False, result) else: return (True, result) def prompt(spec, prompts={}, prompt_title=True): if 'upload' in spec and ('title' not in spec or 'emulator_start' not in spec): print spec['id'] if 'description' in spec: print spec['description'] try: print sh.grep(sh.unzip('-l', spec['upload']), '-i', '.[bec][axo][tem]') except sh.ErrorReturnCode: print "Unexpected error:", sys.exc_info()[0] print "no exe found, skipping" return None (isexe, exe) = run_prompts('EXE: ', spec, prompts) if isexe: if spec.get('dosbox_drive_d', SPEC_DEFAULTS.get('dosbox_drive_d')): spec['emulator_start'] = 'd:\\runapp ' + exe else: spec['emulator_start'] = exe if 'title' not in spec and prompt_title: spec['title'] = raw_input('Title: ') return True elif exe == 'sync': return True if 'upload' in spec and 'emulator_start' in spec and 'title' in spec: return True def pkginst(spec, uploadPath=None, prompts={}): print sh.ls('-al', DOSBOX_INSTALL) (success, d) = run_prompts('Dir: ', spec, prompts) if not success: return d == 'sync' dirToZip = os.path.join(DOSBOX_INSTALL, d) if not os.path.exists(dirToZip): return False name = spec.get('id') if not name: name = raw_input('ID: ') if not uploadPath: uploadPath = os.path.join(DBDIR, name + '.zip') zipdir(dirToZip, uploadPath) spec['upload'] = uploadPath return True def zipdir(dirToZip, zipPath): dirOld = os.getcwd() zipPath = os.path.abspath(zipPath) os.chdir(dirToZip) if os.path.exists(zipPath): sh.rm(zipPath) sh.zip('-r', zipPath, sh.glob('./*')) os.chdir(dirOld) def sync(spec, db=None, ensure_new=True): print json.dumps(spec) item = internetarchive.get_item(spec['id']) if ensure_new and item.exists: raise Exception("Item " + spec['id'] + " already exists!") mdOld = item.metadata.get('metadata', {}) mdNew = {} for key in ['title', 'description', 'emulator_start', 'emulator', 'emulator_ext', 'dosbox_drive_d', 'subject', 'creator']: if key in spec and (key not in mdOld or mdOld[key] != spec[key]): mdNew[key] = spec[key] for key, value in SPEC_DEFAULTS.iteritems(): if key not in mdOld and key not in spec: mdNew[key] = value try: if 'upload' in spec: print "uploading", spec['upload'], "to", spec['id'] item.upload(spec['upload'], metadata=mdNew) if mdNew and mdOld: print "updating metadata for", spec['id'], mdNew item.modify_metadata(mdNew) except requests.exceptions.HTTPError as e: print e print e.response raise if db: setstatus(db, spec['id'], 'unknown', spec['title']) def fixprompt(spec, extraprompts={}): prompts = {'s': skipprompt, 'i': dbinstprompt, 'p': pkginstprompt, 'z': sync_only_metadata, 'd': dosprompt}#, 'w': win31prompt} prompts.update(extraprompts) fix(spec) return prompt(spec, prompts) def fixpromptsync(spec, db=None, extraprompts={}): if fixprompt(spec, extraprompts): sync(spec, db) return True def reinstall(id, db): # download the zip item = internetarchive.get_item(id) zipfile = item.get_files(formats='ZIP')[0] zipfilename = os.path.join(DBDIR, zipfile.name) if os.path.exists(zipfilename): os.unlink(zipfilename) print id, db.get(id, {}).get('title') print "downloading", zipfile.name zipfile.download(zipfilename) # install and reupload dbinst(zipfilename) raw_input('Hit enter when finished:') spec = {'id': id} @defprompt('exe') def exeprompt(spec): spec['upload'] = zipfilename prompt(spec, prompt_title=False) del spec['upload'] sync(spec, db) prompts = {'w': markprompt(db, 'windows'), 'i': markprompt(db, 'inappropriate'), 'b': markprompt(db, 'busted'), 'e': exeprompt} if pkginst(spec, zipfilename, prompts): prompt(spec, prompt_title=False, prompts={'m': maxcpuprompt}) sync(spec) setstatus(db, id, 'unknown') savedb(db) def installers(db): for id, val in db.iteritems(): if val.get('status') == 'installer': reinstall(id, db) def batchfix(db): for id, val in db.iteritems(): if val['status'] == 'sync': sync({'id': id, 'emulator': 'dosbox-sync'}) setstatus(db, id, 'unknown') db[id]['reshoot'] = True savedb(db) def updatestatus(db, statusfrom, statusto): for id, val in db.iteritems(): if val['status'] == statusfrom: val['status'] = statusto savedb(db) def clearreshoot(db): for id, val in db.iteritems(): if val.get('reshoot'): del val['reshoot'] savedb(db) def iterfiles(ext, dir='.'): return [fn for fn in os.listdir(dir) if re.search(r'\.' + ext + r'$', fn, re.IGNORECASE)] def iterzipfiles(dir='.'): return iterfiles('zip') def zips(db, prefix, ext='zip'): prompts = { 'u': mvprompt('uploaded'), 'm': mvprompt('multi'), 'n': mvprompt('notapprop'), 'd': mvprompt('dup'), 'z': sync_only_metadata } for zipfile in iterfiles(ext): spec = {'upload': zipfile} gen_metadata(spec, zipfile) filename = os.path.basename(zipfile) description = None # description = patchstr(sh.sed(sh.grep('-i', '^' + filename, 'FILES1.BBS'), '-e', 's/^' + filename + r'[^ ]* *//')) print description if fixprompt(spec, extraprompts=prompts): spec['title'] = prefix + spec['title'] if not spec.get('description'): spec['description'] = description print spec['description'] sync(spec, db) mvprompt('uploaded')(spec) def exes(db, prefix): prompts = { 'u': mvprompt('uploaded'), 'm': mvprompt('multi'), 'n': mvprompt('notapprop'), 'd': mvprompt('dup') } for exefile in iterfiles('exe'): dbinstexe(exefile) x = raw_input("Hit Enter when done (x to cancel): ") if x == 'x': continue spec = {'upload': exefile} fix(spec) if pkginst(spec, prompts=prompts): if prompt(spec): sync(spec, db) mvprompt('uploaded')(spec) def serve(db): app = Flask(__name__) app.config['DEBUG'] = True app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0 ctx = {'db': db} def gen_tmpl(page): def from_tmpl(): return render_template(page + '.html', **ctx) from_tmpl.__name__ = "from_tmpl_" + page app.route('/' + page)(from_tmpl) for page in ['check']: gen_tmpl(page) @app.route('/db', methods=['PUT']) def putdb(): if request.json: ctx['db'] = request.json savedb(ctx['db']) return "OK cool" @app.route('/', defaults={'filename': 'index.html'}) @app.route('/static/') def staticfile(filename): return send_from_directory(os.path.join(DBDIR, 'static'), filename) @app.route('/screenshot', methods=['POST']) def postscreenshot(): dataUris = request.form.getlist('image') url = request.form['url'] name = re.match('.*/([^/]+)/?$', url).group(1) dirname = os.path.join(DBDIR, name) if len(dataUris) > 1: dirindex = 1 while os.path.exists(os.path.join(dirname, 'animation' + str(dirindex))): dirindex += 1 dirname = os.path.join(dirname, 'animation' + str(dirindex)) if not os.path.exists(dirname): os.makedirs(dirname) print dirname, len(dataUris) filenames = [] index = 1 for dataUri in dataUris: data = a2b_base64(dataUri[dataUri.index(',')+1:]) while os.path.exists(os.path.join(dirname, 'screen' + str(index) + '.png')): index += 1 filename = os.path.join(dirname, 'screen' + str(index) + '.png') with open(filename, 'wb') as f: f.write(data) filenames.append(filename) if len(filenames) > 1: # generate an animated giiiffff gifname = dirname + '.gif' filenames.append(gifname) sh.gm('convert', '-delay', 5, '-loop', 0, *filenames) sh.gifsicle('--batch', '-O3', gifname) return "yup good" import ssl context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2) context.load_cert_chain(os.path.join(DBDIR, 'server.crt'), os.path.join(DBDIR, 'server.key')) app.run(ssl_context=context) def join_elements(elements): text = '' for element in elements: textAdd = '' if element.name == 'p': if text: textAdd = '\n\n' textAdd += join_elements(element.contents) elif element.name == 'br': textAdd = '' elif unicode(element).strip(): textAdd = unicode(element) if textAdd: text += textAdd return text def scrapefile(project, url, spec): fn = os.path.join(DBDIR, project, url.split('/')[-1]) if not os.path.exists(fn): dirn = os.path.split(fn)[0] if not os.path.exists(dirn): os.makedirs(dirn) urlretrieve(url, fn) if not spec: spec = {} spec['upload'] = fn return spec def scrapegame(url, spec=None): soup = BeautifulSoup(requests.get(url).text) content = soup.find('div', 'node').find('div', 'content') descElems = [e for e in content.contents if e.name != 'div' and not (e.name == 'table' and e['id'] == 'attachments')] desc = join_elements(descElems) desc += '\n\n(Retrieved from Glorious Trainwrecks.)' title = soup.find('div', id='center').h2.contents[0] if content.find('div', 'field-field-gamefile'): dlurl = content.find('div', 'field-field-gamefile').a['href'] else: for attachment_link in content.find('table', id="attachments").find_all("a"): dlurl = attachment_link['href'] if dlurl.lower().endswith('.zip'): break user = soup.find('span', 'submitted').a.contents[0] tags = 'glorious trainwrecks;Klik & Play' legacy_event = content.find('div', 'field-field-field-event') if legacy_event and legacy_event.find('div', 'field-item').contents[0] == 'Pirate Kart 2': tags += ";The 529 in 1 Klik and Play Pirate Kart Part II: Klik Harder" event = content.find('div', 'field-field-event-created-for') if event and not event.a.contents[0].startswith("THE 371-"): tags += ';' + event.a.contents[0] if soup.find('div', 'terms'): for taglink in soup.find('div', 'terms').find_all('a'): tags += ';' + taglink.contents[0] spec = scrapefile('gtrwx', dlurl, spec) spec.update({ 'title': title, 'creator': user, 'description': desc, 'subject': tags }) return spec def scrapecomment(url, spec=None): comment_id = url.split("#")[1] soup = BeautifulSoup(requests.get(url).text) comment = soup.find('a', id=comment_id).find_next_sibling('div', 'comment') user = comment.find('span', 'submitted').a.contents[0] title = comment.h3.a.contents[0] event_title = soup.find('div', id='center').h2.contents[0] content = comment.find('div', 'content') desc = join_elements([e for e in content.contents if not (e.name == 'table' and e['id'] == 'attachments')]) desc += '\n\n(Retrieved from Glorious Trainwrecks.)' attachments = [a['href'] for a in content.find_all('a') if a['href'].split('.')[-1].lower() not in ['png', 'jpg']] if len(attachments) == 0: return None if len(attachments) > 1: iattach = 1 for attachment in attachments: print iattach, attachment iattach += 1 iattach = raw_input('Which #: ') try: iattach = int(iattach) attachment = attachments[iattach - 1] except: return None else: attachment = attachments[0] spec = scrapefile('gtrwx', urljoin(url, attachment), spec) spec.update({ 'title': title, 'creator': user, 'description': desc, 'subject': 'glorious trainwrecks;Klik & Play;' + event_title }) return spec def scrapeuser(username): games = [] url = 'http://www.glorioustrainwrecks.com/games/*/' + username while url: soup = BeautifulSoup(requests.get(url).text) for td in soup.find_all('td', 'view-field-node-title'): games.append((urljoin(url, td.a['href']), td.a.contents[0])) nextLink = soup.find(title='Go to next page') if nextLink: url = urljoin(url, nextLink['href']) else: url = None return games def runcmd(db, cmd, args): if cmd == 'scan': scan(db) savedb(db) elif cmd == 'check': check(db) elif cmd == 'diff': diffdb = diff(db, args[0]) runcmd(diffdb, args[1], args[2:]) savedb(db) elif cmd == 'mergedb': with open(args[0], 'rt') as f: dbnew = json.load(f) db.update(dbnew) savedb(db) elif cmd == 'dumpdb': print json.dumps(db) elif cmd == 'db': global DB db_old = DB DB = os.path.abspath(args[0]) runcmd(loaddb(), args[1], args[2:]) DB = db_old elif cmd == 'sync': sync(json.loads(args[0])) elif cmd == 'pkginst': spec = {} pkginst(spec) fixpromptsync(spec, db) elif cmd == 'zip': spec = {'upload': args[0]} gen_metadata(spec, args[0]) fixpromptsync(spec, db) elif cmd == 'gtgame': if '#' in args[0]: spec = scrapecomment(args[0]) else: spec = scrapegame(args[0]) fixklik(spec) fixpromptsync(spec, db) elif cmd == 'gtuser': games = scrapeuser(args[0]) for url, title in games: spec = scrapegame(url) if spec: fixklik(spec) fixpromptsync(spec, db) elif cmd == 'zips': prefix = args[0] + ": " if len(args) > 0 else "" zips(db, prefix) elif cmd == 'exezips': prefix = args[0] + ": " if len(args) > 0 else "" zips(db, prefix, 'exe') elif cmd == 'exes': prefix = args[0] + ": " if len(args) > 0 else "" exes(db, prefix) elif cmd == 'dir': d = args[0] if d.endswith('/'): d = d[:-1] zipf = os.path.join(DBDIR, safefilename(os.path.basename(d)) + '.zip') zipdir(d, zipf) spec = {'upload': zipf} gen_metadata(spec, d) if len(args) > 1: spec['id'] = args[1] fixpromptsync(spec, db) elif cmd == 'dirs': for d in os.listdir('.'): if os.path.isdir(d): zipf = os.path.join(DBDIR, safefilename(os.path.basename(d)) + '.zip') zipdir(d, zipf) spec = {'upload': zipf} gen_metadata(spec, d) fixpromptsync(spec, db) elif cmd == 'installers': try: installers(db) except (KeyboardInterrupt, SystemExit): pass savedb(db) elif cmd == 'batchfix': batchfix(db) elif cmd == 'setstatus': updatestatus(db, args[0], args[1]) elif cmd == 'clearreshoot': clearreshoot(db) elif cmd == 'fixext': for id, val in db.iteritems(): if val.get('status') == 'unknown': item = internetarchive.get_item(id) zipfile = item.get_files(formats='ZIP')[0] md = item.metadata.get('metadata', {}) print "checking", id, zipfile.name, md.get('emulator_ext') if os.path.splitext(zipfile.name)[1] == '.ZIP' and md.get('emulator_ext') == 'zip': print "fixing", id spec = {'id': id, 'emulator_ext': 'ZIP'} sync(spec) elif cmd == 'reinstall': reinstall(args[0], db) savedb(db) elif cmd == 'serve': serve(db) if __name__ == '__main__': runcmd(loaddb(), sys.argv[1], sys.argv[2:])