838 lines
24 KiB
Python
838 lines
24 KiB
Python
#!/usr/bin/env python
|
|
import internetarchive
|
|
import json
|
|
import sys
|
|
import os
|
|
from glob import glob as pyglob
|
|
import sh
|
|
import re
|
|
import requests
|
|
from urllib import urlretrieve
|
|
from urlparse import urljoin
|
|
import unicodedata
|
|
from flask import Flask, render_template, request, make_response, send_from_directory
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
from binascii import a2b_base64
|
|
from ConfigParser import SafeConfigParser
|
|
|
|
VALID_OPTIONS = set(['user', 'dbdir', 'db', 'dosbox_install', 'default_collection'])
|
|
def load_config():
|
|
config = SafeConfigParser()
|
|
files = [os.path.expanduser('~/.config/sw.ini')]
|
|
path = os.getcwd()
|
|
while path != '/':
|
|
files.insert(1, os.path.join(path, 'sw.ini'))
|
|
path = os.path.dirname(path)
|
|
config.read(files)
|
|
for k, v in config.items('config'):
|
|
if k.lower() in VALID_OPTIONS:
|
|
globals()[k.upper()] = v
|
|
|
|
load_config()
|
|
if 'DB' not in globals():
|
|
DB=os.path.join(DBDIR, 'ia.json')
|
|
|
|
|
|
# DEFAULT_COLLECTION = 'softwarelibrary_msdos_shareware'
|
|
# DEFAULT_COLLECTION = 'open_source_software'
|
|
# DEFAULT_COLLECTION = 'softwarelibrary_win3_shareware'
|
|
# DEFAULT_COLLECTION = 'glorious_trainwrecks'
|
|
|
|
def genid(name, spec):
|
|
# return 'msdos_' + name + '_shareware'
|
|
# return 'actionpoint_' + name
|
|
# return 'win3_' + name # + '_knp'
|
|
return 'gtrwx_' + name
|
|
|
|
def gen_metadata(spec, orig_file):
|
|
return
|
|
|
|
def fixklik(spec):
|
|
klikfiles = {'exe': None, 'gam': None, 'img': None}
|
|
for line in sh.unzip('-l', spec['upload']).splitlines():
|
|
match = re.match(r"\s*\d+\s+\S+\s\S+\s+(.+)", line)
|
|
file = match.group(1) if match else ''
|
|
for ext in klikfiles.iterkeys():
|
|
if file.lower().endswith('.' + ext):
|
|
if klikfiles[ext]:
|
|
return
|
|
klikfiles[ext] = file
|
|
for fn in klikfiles.itervalues():
|
|
if not fn or not klikfiles['exe'] or fn[:-3] != klikfiles['exe'][:-3]:
|
|
return
|
|
exe = klikfiles['exe']
|
|
if '/' in exe:
|
|
unzipdir = os.path.join(DBDIR, 'tmp')
|
|
if os.path.exists(unzipdir):
|
|
sh.rm('-r', unzipdir)
|
|
sh.mkdir('-p', unzipdir)
|
|
dirOld = os.getcwd()
|
|
zipfile = os.path.abspath(spec['upload'])
|
|
os.chdir(unzipdir)
|
|
sh.unzip('-q', zipfile)
|
|
print exe, exe[:exe.rfind('/')], zipfile
|
|
zipdir(exe[:exe.rfind('/')], zipfile)
|
|
exe = exe[exe.rfind('/') + 1:]
|
|
spec['upload'] = zipfile
|
|
os.chdir(dirOld)
|
|
spec['emulator_start'] = 'd:\\runapp ' + exe
|
|
spec['dosbox_drive_d'] = 'emularity_win31/win31.zip'
|
|
|
|
|
|
def gen_metadata_json(spec, orig_file):
|
|
spec['subject'] = 'klik & play;Geocities'
|
|
print orig_file, os.path.split(orig_file)[1]
|
|
try:
|
|
gamesjson_path = os.path.join(os.path.split(orig_file)[0], "games.json")
|
|
print gamesjson_path
|
|
with open(gamesjson_path, 'rt') as f:
|
|
meta = json.load(f)
|
|
print meta
|
|
if os.path.split(orig_file)[1] in meta:
|
|
vals = meta[os.path.split(orig_file)[1]]
|
|
if len(vals) >= 1:
|
|
spec['title'] = vals[0]
|
|
if len(vals) >= 2:
|
|
spec['description'] = vals[1]
|
|
if len(vals) >= 3:
|
|
spec['creator'] = vals[2]
|
|
site = None
|
|
if '_AUTHOR' in meta:
|
|
if 'creator' not in spec:
|
|
spec['creator'] = meta['_AUTHOR']
|
|
site = meta['_AUTHOR']
|
|
if '_SITE' in meta:
|
|
site = meta['_SITE']
|
|
if '_URL' in meta:
|
|
link = '<a href="' + meta['_URL'] + '">'
|
|
if site:
|
|
link += site
|
|
else:
|
|
link += 'this Geocities site'
|
|
link += '</a>'
|
|
spec['description'] = (spec.get('description', '') + """
|
|
|
|
(Retrieved from """ + link + """.)""")
|
|
except:
|
|
pass
|
|
|
|
SPEC_DEFAULTS = {
|
|
'emulator': 'dosbox',
|
|
'emulator_ext': 'zip',
|
|
'collection': DEFAULT_COLLECTION,
|
|
'mediatype': 'software',
|
|
'dosbox_drive_d': 'emularity_win31/win31.zip'
|
|
}
|
|
|
|
def genid_piratekart(name, spec):
|
|
spec['creator'] = 'ShaperMC'
|
|
default_tags = ["glorious trainwrecks", "klik & play", "The 100-in-1 Klik & Play Pirate Kart"]
|
|
|
|
def matchheader(start, line):
|
|
match = re.match(start + ".*: *(.*)", line)
|
|
if match:
|
|
return match.group(1)
|
|
|
|
gametxt = patchstr(sh.unzip('-p', '-C', spec['upload'], 'game.txt', _encoding='cp437'))
|
|
for line in gametxt.split('\n'):
|
|
if matchheader("D", line):
|
|
spec['description'] = matchheader("D", line).strip()
|
|
elif matchheader("N", line):
|
|
spec['title'] = matchheader("N", line).strip()
|
|
elif matchheader("(?:G|Tag)", line):
|
|
tags = [tag.strip() for tag in matchheader("(?:G|Tag)", line).split(',')]
|
|
print tags
|
|
tags = tags + default_tags
|
|
spec['subject'] = ";".join(tags)
|
|
for tag in tags:
|
|
if tag.startswith("kotmk"):
|
|
name = tag + "_" + name
|
|
break
|
|
def loaddb():
|
|
with open(DB, 'rt') as f:
|
|
return json.load(f)
|
|
|
|
def savedb(db):
|
|
with open(DB, 'wt') as f:
|
|
json.dump(db, f)
|
|
|
|
def scan(db):
|
|
"""Finds everything I've uploaded and adds it to the DB."""
|
|
for result in internetarchive.search.Search('uploader:(jeremy@sporktania.com)', ['identifier', 'title', 'collection']):
|
|
id = result['identifier']
|
|
if id not in db:
|
|
db[id] = {'status': 'unknown'}
|
|
db[id]['title'] = result['title']
|
|
db[id]['collection'] = result['collection']
|
|
|
|
def diff(db, newdb_filename):
|
|
"""Returns a db with all of the items in newdb that don't exist in db."""
|
|
with open(newdb_filename, 'rt') as f:
|
|
newdb = json.load(f)
|
|
diffdb = {}
|
|
for id, val in newdb.iteritems():
|
|
if db.get(id) != val:
|
|
diffdb[id] = val
|
|
return diffdb
|
|
|
|
def check(db, collection=None, ignore=set(['fine', 'dark'])):
|
|
"""Prints items and their status, for reporting non-working items to Jason."""
|
|
status = {'reshoot': []}
|
|
for id, val in db.iteritems():
|
|
if collection != None and collection not in val.get('collection', []):
|
|
continue
|
|
if val['status'] not in status:
|
|
status[val['status']] = []
|
|
if val.get('reshoot'):
|
|
status['reshoot'].append(id)
|
|
status[val['status']].append(id)
|
|
for s, ids in status.iteritems():
|
|
if s not in ignore:
|
|
print "\n"
|
|
print s + ":"
|
|
for id in ids:
|
|
print "http://archive.org/details/" + id
|
|
|
|
def setstatus(db, id, status, title=None):
|
|
if id not in db:
|
|
db[id] = {}
|
|
db[id]['status'] = status
|
|
if title:
|
|
db[id]['title'] = title
|
|
savedb(db)
|
|
|
|
def patchstr(s):
|
|
"""Remove line-drawing (non-ascii) characters from a string"""
|
|
return "".join(ch for ch in unicode(s) if unicodedata.category(ch)[0]!="C" or ch == '\n')
|
|
|
|
def safefilename(s):
|
|
return re.sub('[^a-zA-Z0-9_-]', '_', s)
|
|
def fix(spec):
|
|
"""Tries to generate an ID and description"""
|
|
print "fix", spec
|
|
if 'upload' in spec:
|
|
fullname, ext = os.path.splitext(spec['upload'])
|
|
spec['upload'] = fullname + ext.lower()
|
|
name = safefilename(os.path.basename(fullname))
|
|
print name
|
|
if 'id' not in spec:
|
|
spec['id'] = genid(name, spec)
|
|
if 'description' not in spec:
|
|
try:
|
|
spec['description'] = patchstr(sh.unzip('-p', '-C', spec['upload'], 'FILE_ID.DIZ', _encoding='cp437'))
|
|
except sh.ErrorReturnCode:
|
|
pass
|
|
|
|
def defprompt(name):
|
|
def decorator(func):
|
|
func.name = name
|
|
return func
|
|
return decorator
|
|
|
|
def mvprompt(name):
|
|
@defprompt(name)
|
|
def mv(spec):
|
|
destpath = os.path.join(os.path.dirname(spec['upload']), name)
|
|
sh.mkdir('-p', destpath)
|
|
sh.mv(spec['upload'], destpath)
|
|
return mv
|
|
|
|
def dbinst(zip):
|
|
sh.rm('-r', DOSBOX_INSTALL)
|
|
sh.mkdir('-p', os.path.join(DOSBOX_INSTALL, 'INSTALL'))
|
|
sh.cp(zip, DOSBOX_INSTALL)
|
|
dirOld = os.getcwd()
|
|
os.chdir(os.path.join(DOSBOX_INSTALL, 'INSTALL'))
|
|
sh.unzip('-q', '../' + os.path.basename(zip))
|
|
sh.open('../..')
|
|
os.chdir(dirOld)
|
|
|
|
def dbinstexe(exe):
|
|
sh.rm('-r', DOSBOX_INSTALL)
|
|
sh.mkdir('-p', os.path.join(DOSBOX_INSTALL, 'INSTALL'))
|
|
sh.cp(exe, os.path.join(DOSBOX_INSTALL, 'INSTALL'))
|
|
sh.open(os.path.join(DOSBOX_INSTALL, '..'))
|
|
|
|
@defprompt('pkginst')
|
|
def pkginstprompt(spec):
|
|
if pkginst(spec):
|
|
return "sync" if prompt(spec) else "ignore"
|
|
return "ignore"
|
|
|
|
@defprompt('dbinst')
|
|
def dbinstprompt(spec):
|
|
dbinst(spec['upload'])
|
|
x = raw_input("Hit Enter when done (x to cancel): ")
|
|
if x == 'x':
|
|
return 'ignore'
|
|
return pkginstprompt(spec)
|
|
|
|
@defprompt('skip')
|
|
def skipprompt(spec):
|
|
pass
|
|
|
|
@defprompt('metadata')
|
|
def sync_only_metadata(spec):
|
|
del spec['upload']
|
|
sync(spec, ensure_new=False)
|
|
|
|
@defprompt('win31')
|
|
def win31prompt(spec):
|
|
spec['dosbox_drive_d'] = 'emularity_win31/win31.zip'
|
|
return 'ignore'
|
|
|
|
@defprompt('dos')
|
|
def dosprompt(spec):
|
|
spec['dosbox_drive_d'] = None
|
|
return 'ignore'
|
|
|
|
@defprompt('maxcpu')
|
|
def maxcpuprompt(spec):
|
|
unzipdir = os.path.join(DBDIR, 'tmp')
|
|
if os.path.exists(unzipdir):
|
|
sh.rm('-r', unzipdir)
|
|
sh.mkdir('-p', unzipdir)
|
|
dirOld = os.getcwd()
|
|
zipfile = os.path.abspath(spec['upload'])
|
|
os.chdir(unzipdir)
|
|
sh.unzip('-q', zipfile)
|
|
sh.cp(os.path.join(DBDIR, 'maxcpu.dosbox.conf'), 'dosbox.conf')
|
|
zipdir('.', zipfile)
|
|
os.chdir(dirOld)
|
|
return 'ignore'
|
|
|
|
def markprompt(db, status):
|
|
@defprompt(status)
|
|
def mark(spec):
|
|
setstatus(db, spec['id'], status, spec.get('title'))
|
|
return mark
|
|
|
|
def run_prompts(input, spec, prompts):
|
|
while True:
|
|
if prompts:
|
|
for k, v in prompts.iteritems():
|
|
print k + '=' + v.name,
|
|
print ''
|
|
result = raw_input(input)
|
|
if result in prompts:
|
|
result = prompts[result](spec)
|
|
if result != "ignore":
|
|
return (False, result)
|
|
else:
|
|
return (True, result)
|
|
|
|
def prompt(spec, prompts={}, prompt_title=True):
|
|
if 'upload' in spec and ('title' not in spec or 'emulator_start' not in spec):
|
|
print spec['id']
|
|
if 'description' in spec:
|
|
print spec['description']
|
|
try:
|
|
print sh.grep(sh.unzip('-l', spec['upload']), '-i', '.[bec][axo][tem]')
|
|
except sh.ErrorReturnCode:
|
|
print "Unexpected error:", sys.exc_info()[0]
|
|
print "no exe found, skipping"
|
|
return None
|
|
|
|
(isexe, exe) = run_prompts('EXE: ', spec, prompts)
|
|
if isexe:
|
|
if spec.get('dosbox_drive_d', SPEC_DEFAULTS.get('dosbox_drive_d')):
|
|
spec['emulator_start'] = 'd:\\runapp ' + exe
|
|
else:
|
|
spec['emulator_start'] = exe
|
|
if 'title' not in spec and prompt_title:
|
|
spec['title'] = raw_input('Title: ')
|
|
return True
|
|
elif exe == 'sync':
|
|
return True
|
|
if 'upload' in spec and 'emulator_start' in spec and 'title' in spec:
|
|
return True
|
|
|
|
def pkginst(spec, uploadPath=None, prompts={}):
|
|
print sh.ls('-al', DOSBOX_INSTALL)
|
|
(success, d) = run_prompts('Dir: ', spec, prompts)
|
|
if not success:
|
|
return d == 'sync'
|
|
|
|
dirToZip = os.path.join(DOSBOX_INSTALL, d)
|
|
if not os.path.exists(dirToZip):
|
|
return False
|
|
name = spec.get('id')
|
|
if not name:
|
|
name = raw_input('ID: ')
|
|
if not uploadPath:
|
|
uploadPath = os.path.join(DBDIR, name + '.zip')
|
|
zipdir(dirToZip, uploadPath)
|
|
spec['upload'] = uploadPath
|
|
return True
|
|
|
|
def zipdir(dirToZip, zipPath):
|
|
dirOld = os.getcwd()
|
|
zipPath = os.path.abspath(zipPath)
|
|
os.chdir(dirToZip)
|
|
if os.path.exists(zipPath):
|
|
sh.rm(zipPath)
|
|
sh.zip('-r', zipPath, sh.glob('./*'))
|
|
os.chdir(dirOld)
|
|
|
|
def sync(spec, db=None, ensure_new=True):
|
|
print json.dumps(spec)
|
|
|
|
item = internetarchive.get_item(spec['id'])
|
|
if ensure_new and item.exists:
|
|
raise Exception("Item " + spec['id'] + " already exists!")
|
|
|
|
mdOld = item.metadata.get('metadata', {})
|
|
mdNew = {}
|
|
for key in ['title', 'description', 'emulator_start', 'emulator', 'emulator_ext', 'dosbox_drive_d', 'subject', 'creator']:
|
|
if key in spec and (key not in mdOld or mdOld[key] != spec[key]):
|
|
mdNew[key] = spec[key]
|
|
|
|
for key, value in SPEC_DEFAULTS.iteritems():
|
|
if key not in mdOld and key not in spec:
|
|
mdNew[key] = value
|
|
|
|
try:
|
|
if 'upload' in spec:
|
|
print "uploading", spec['upload'], "to", spec['id']
|
|
item.upload(spec['upload'], metadata=mdNew)
|
|
if mdNew and mdOld:
|
|
print "updating metadata for", spec['id'], mdNew
|
|
item.modify_metadata(mdNew)
|
|
except requests.exceptions.HTTPError as e:
|
|
print e
|
|
print e.response
|
|
raise
|
|
|
|
if db:
|
|
setstatus(db, spec['id'], 'unknown', spec['title'])
|
|
|
|
def fixprompt(spec, extraprompts={}):
|
|
prompts = {'s': skipprompt, 'i': dbinstprompt, 'p': pkginstprompt, 'z': sync_only_metadata, 'd': dosprompt}#, 'w': win31prompt}
|
|
prompts.update(extraprompts)
|
|
fix(spec)
|
|
return prompt(spec, prompts)
|
|
|
|
def fixpromptsync(spec, db=None, extraprompts={}):
|
|
if fixprompt(spec, extraprompts):
|
|
sync(spec, db)
|
|
return True
|
|
|
|
def reinstall(id, db):
|
|
# download the zip
|
|
item = internetarchive.get_item(id)
|
|
zipfile = item.get_files(formats='ZIP')[0]
|
|
zipfilename = os.path.join(DBDIR, zipfile.name)
|
|
if os.path.exists(zipfilename):
|
|
os.unlink(zipfilename)
|
|
print id, db.get(id, {}).get('title')
|
|
print "downloading", zipfile.name
|
|
zipfile.download(zipfilename)
|
|
|
|
# install and reupload
|
|
dbinst(zipfilename)
|
|
raw_input('Hit enter when finished:')
|
|
spec = {'id': id}
|
|
@defprompt('exe')
|
|
def exeprompt(spec):
|
|
spec['upload'] = zipfilename
|
|
prompt(spec, prompt_title=False)
|
|
del spec['upload']
|
|
sync(spec, db)
|
|
prompts = {'w': markprompt(db, 'windows'),
|
|
'i': markprompt(db, 'inappropriate'),
|
|
'b': markprompt(db, 'busted'),
|
|
'e': exeprompt}
|
|
if pkginst(spec, zipfilename, prompts):
|
|
prompt(spec, prompt_title=False, prompts={'m': maxcpuprompt})
|
|
sync(spec)
|
|
|
|
setstatus(db, id, 'unknown')
|
|
savedb(db)
|
|
|
|
def installers(db):
|
|
for id, val in db.iteritems():
|
|
if val.get('status') == 'installer':
|
|
reinstall(id, db)
|
|
|
|
def batchfix(db):
|
|
for id, val in db.iteritems():
|
|
if val['status'] == 'sync':
|
|
sync({'id': id, 'emulator': 'dosbox-sync'})
|
|
setstatus(db, id, 'unknown')
|
|
db[id]['reshoot'] = True
|
|
savedb(db)
|
|
|
|
def updatestatus(db, statusfrom, statusto):
|
|
for id, val in db.iteritems():
|
|
if val['status'] == statusfrom:
|
|
val['status'] = statusto
|
|
savedb(db)
|
|
|
|
def clearreshoot(db):
|
|
for id, val in db.iteritems():
|
|
if val.get('reshoot'):
|
|
del val['reshoot']
|
|
savedb(db)
|
|
|
|
def iterfiles(ext, dir='.'):
|
|
return [fn for fn in os.listdir(dir) if re.search(r'\.' + ext + r'$', fn, re.IGNORECASE)]
|
|
|
|
def iterzipfiles(dir='.'):
|
|
return iterfiles('zip')
|
|
|
|
def zips(db, prefix, ext='zip'):
|
|
prompts = {
|
|
'u': mvprompt('uploaded'),
|
|
'm': mvprompt('multi'),
|
|
'n': mvprompt('notapprop'),
|
|
'd': mvprompt('dup'),
|
|
'z': sync_only_metadata
|
|
}
|
|
for zipfile in iterfiles(ext):
|
|
spec = {'upload': zipfile}
|
|
gen_metadata(spec, zipfile)
|
|
filename = os.path.basename(zipfile)
|
|
description = None
|
|
# description = patchstr(sh.sed(sh.grep('-i', '^' + filename, 'FILES1.BBS'), '-e', 's/^' + filename + r'[^ ]* *//'))
|
|
print description
|
|
if fixprompt(spec, extraprompts=prompts):
|
|
spec['title'] = prefix + spec['title']
|
|
if not spec.get('description'):
|
|
spec['description'] = description
|
|
print spec['description']
|
|
sync(spec, db)
|
|
mvprompt('uploaded')(spec)
|
|
|
|
def exes(db, prefix):
|
|
prompts = {
|
|
'u': mvprompt('uploaded'),
|
|
'm': mvprompt('multi'),
|
|
'n': mvprompt('notapprop'),
|
|
'd': mvprompt('dup')
|
|
}
|
|
for exefile in iterfiles('exe'):
|
|
dbinstexe(exefile)
|
|
x = raw_input("Hit Enter when done (x to cancel): ")
|
|
if x == 'x':
|
|
continue
|
|
spec = {'upload': exefile}
|
|
fix(spec)
|
|
if pkginst(spec, prompts=prompts):
|
|
if prompt(spec):
|
|
sync(spec, db)
|
|
mvprompt('uploaded')(spec)
|
|
|
|
def serve(db):
|
|
app = Flask(__name__)
|
|
app.config['DEBUG'] = True
|
|
app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0
|
|
ctx = {'db': db}
|
|
|
|
def gen_tmpl(page):
|
|
def from_tmpl():
|
|
return render_template(page + '.html', **ctx)
|
|
from_tmpl.__name__ = "from_tmpl_" + page
|
|
app.route('/' + page)(from_tmpl)
|
|
|
|
for page in ['check']:
|
|
gen_tmpl(page)
|
|
|
|
@app.route('/db', methods=['PUT'])
|
|
def putdb():
|
|
if request.json:
|
|
ctx['db'] = request.json
|
|
savedb(ctx['db'])
|
|
return "OK cool"
|
|
|
|
@app.route('/', defaults={'filename': 'index.html'})
|
|
@app.route('/static/<filename>')
|
|
def staticfile(filename):
|
|
return send_from_directory(os.path.join(DBDIR, 'static'), filename)
|
|
|
|
@app.route('/screenshot', methods=['POST'])
|
|
def postscreenshot():
|
|
dataUris = request.form.getlist('image')
|
|
url = request.form['url']
|
|
|
|
name = re.match('.*/([^/]+)/?$', url).group(1)
|
|
dirname = os.path.join(DBDIR, name)
|
|
|
|
if len(dataUris) > 1:
|
|
dirindex = 1
|
|
while os.path.exists(os.path.join(dirname, 'animation' + str(dirindex))):
|
|
dirindex += 1
|
|
dirname = os.path.join(dirname, 'animation' + str(dirindex))
|
|
|
|
if not os.path.exists(dirname):
|
|
os.makedirs(dirname)
|
|
|
|
print dirname, len(dataUris)
|
|
filenames = []
|
|
index = 1
|
|
for dataUri in dataUris:
|
|
data = a2b_base64(dataUri[dataUri.index(',')+1:])
|
|
while os.path.exists(os.path.join(dirname, 'screen' + str(index) + '.png')):
|
|
index += 1
|
|
filename = os.path.join(dirname, 'screen' + str(index) + '.png')
|
|
with open(filename, 'wb') as f:
|
|
f.write(data)
|
|
filenames.append(filename)
|
|
|
|
if len(filenames) > 1:
|
|
# generate an animated giiiffff
|
|
gifname = dirname + '.gif'
|
|
filenames.append(gifname)
|
|
sh.gm('convert', '-delay', 5, '-loop', 0, *filenames)
|
|
sh.gifsicle('--batch', '-O3', gifname)
|
|
|
|
return "yup good"
|
|
|
|
import ssl
|
|
context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2)
|
|
context.load_cert_chain(os.path.join(DBDIR, 'server.crt'), os.path.join(DBDIR, 'server.key'))
|
|
app.run(ssl_context=context)
|
|
|
|
def join_elements(elements):
|
|
text = ''
|
|
for element in elements:
|
|
textAdd = ''
|
|
if element.name == 'p':
|
|
if text:
|
|
textAdd = '\n\n'
|
|
textAdd += join_elements(element.contents)
|
|
elif element.name == 'br':
|
|
textAdd = ''
|
|
elif unicode(element).strip():
|
|
textAdd = unicode(element)
|
|
if textAdd:
|
|
text += textAdd
|
|
|
|
return text
|
|
|
|
def scrapefile(project, url, spec):
|
|
fn = os.path.join(DBDIR, project, url.split('/')[-1])
|
|
if not os.path.exists(fn):
|
|
dirn = os.path.split(fn)[0]
|
|
if not os.path.exists(dirn):
|
|
os.makedirs(dirn)
|
|
urlretrieve(url, fn)
|
|
if not spec:
|
|
spec = {}
|
|
spec['upload'] = fn
|
|
return spec
|
|
|
|
def scrapegame(url, spec=None):
|
|
soup = BeautifulSoup(requests.get(url).text)
|
|
content = soup.find('div', 'node').find('div', 'content')
|
|
descElems = [e for e in content.contents if e.name != 'div' and not (e.name == 'table' and e['id'] == 'attachments')]
|
|
desc = join_elements(descElems)
|
|
desc += '\n\n(Retrieved from <a href="' + url + '">Glorious Trainwrecks</a>.)'
|
|
title = soup.find('div', id='center').h2.contents[0]
|
|
if content.find('div', 'field-field-gamefile'):
|
|
dlurl = content.find('div', 'field-field-gamefile').a['href']
|
|
else:
|
|
for attachment_link in content.find('table', id="attachments").find_all("a"):
|
|
dlurl = attachment_link['href']
|
|
if dlurl.lower().endswith('.zip'):
|
|
break
|
|
user = soup.find('span', 'submitted').a.contents[0]
|
|
tags = 'glorious trainwrecks;Klik & Play'
|
|
legacy_event = content.find('div', 'field-field-field-event')
|
|
if legacy_event and legacy_event.find('div', 'field-item').contents[0] == 'Pirate Kart 2':
|
|
tags += ";The 529 in 1 Klik and Play Pirate Kart Part II: Klik Harder"
|
|
event = content.find('div', 'field-field-event-created-for')
|
|
if event and not event.a.contents[0].startswith("THE 371-"):
|
|
tags += ';' + event.a.contents[0]
|
|
if soup.find('div', 'terms'):
|
|
for taglink in soup.find('div', 'terms').find_all('a'):
|
|
tags += ';' + taglink.contents[0]
|
|
spec = scrapefile('gtrwx', dlurl, spec)
|
|
spec.update({
|
|
'title': title,
|
|
'creator': user,
|
|
'description': desc,
|
|
'subject': tags
|
|
})
|
|
return spec
|
|
|
|
def scrapecomment(url, spec=None):
|
|
comment_id = url.split("#")[1]
|
|
soup = BeautifulSoup(requests.get(url).text)
|
|
comment = soup.find('a', id=comment_id).find_next_sibling('div', 'comment')
|
|
user = comment.find('span', 'submitted').a.contents[0]
|
|
title = comment.h3.a.contents[0]
|
|
event_title = soup.find('div', id='center').h2.contents[0]
|
|
content = comment.find('div', 'content')
|
|
desc = join_elements([e for e in content.contents if not (e.name == 'table' and e['id'] == 'attachments')])
|
|
desc += '\n\n(Retrieved from <a href="' + url + '">Glorious Trainwrecks</a>.)'
|
|
attachments = [a['href'] for a in content.find_all('a') if a['href'].split('.')[-1].lower() not in ['png', 'jpg']]
|
|
if len(attachments) == 0:
|
|
return None
|
|
if len(attachments) > 1:
|
|
iattach = 1
|
|
for attachment in attachments:
|
|
print iattach, attachment
|
|
iattach += 1
|
|
iattach = raw_input('Which #: ')
|
|
try:
|
|
iattach = int(iattach)
|
|
attachment = attachments[iattach - 1]
|
|
except:
|
|
return None
|
|
else:
|
|
attachment = attachments[0]
|
|
|
|
spec = scrapefile('gtrwx', urljoin(url, attachment), spec)
|
|
spec.update({
|
|
'title': title,
|
|
'creator': user,
|
|
'description': desc,
|
|
'subject': 'glorious trainwrecks;Klik & Play;' + event_title
|
|
})
|
|
return spec
|
|
|
|
def scrapeuser(username):
|
|
games = []
|
|
url = 'http://www.glorioustrainwrecks.com/games/*/' + username
|
|
while url:
|
|
soup = BeautifulSoup(requests.get(url).text)
|
|
for td in soup.find_all('td', 'view-field-node-title'):
|
|
games.append((urljoin(url, td.a['href']), td.a.contents[0]))
|
|
nextLink = soup.find(title='Go to next page')
|
|
if nextLink:
|
|
url = urljoin(url, nextLink['href'])
|
|
else:
|
|
url = None
|
|
return games
|
|
|
|
def runcmd(db, cmd, args):
|
|
if cmd == 'scan':
|
|
scan(db)
|
|
savedb(db)
|
|
|
|
elif cmd == 'check':
|
|
check(db)
|
|
|
|
elif cmd == 'diff':
|
|
diffdb = diff(db, args[0])
|
|
runcmd(diffdb, args[1], args[2:])
|
|
savedb(db)
|
|
|
|
elif cmd == 'mergedb':
|
|
with open(args[0], 'rt') as f:
|
|
dbnew = json.load(f)
|
|
db.update(dbnew)
|
|
savedb(db)
|
|
|
|
elif cmd == 'dumpdb':
|
|
print json.dumps(db)
|
|
|
|
elif cmd == 'db':
|
|
global DB
|
|
db_old = DB
|
|
DB = os.path.abspath(args[0])
|
|
runcmd(loaddb(), args[1], args[2:])
|
|
DB = db_old
|
|
|
|
elif cmd == 'sync':
|
|
sync(json.loads(args[0]))
|
|
|
|
elif cmd == 'pkginst':
|
|
spec = {}
|
|
pkginst(spec)
|
|
fixpromptsync(spec, db)
|
|
|
|
elif cmd == 'zip':
|
|
spec = {'upload': args[0]}
|
|
gen_metadata(spec, args[0])
|
|
fixpromptsync(spec, db)
|
|
|
|
elif cmd == 'gtgame':
|
|
if '#' in args[0]:
|
|
spec = scrapecomment(args[0])
|
|
else:
|
|
spec = scrapegame(args[0])
|
|
fixklik(spec)
|
|
fixpromptsync(spec, db)
|
|
|
|
elif cmd == 'gtuser':
|
|
games = scrapeuser(args[0])
|
|
for url, title in games:
|
|
spec = scrapegame(url)
|
|
if spec:
|
|
fixklik(spec)
|
|
fixpromptsync(spec, db)
|
|
|
|
elif cmd == 'zips':
|
|
prefix = args[0] + ": " if len(args) > 0 else ""
|
|
zips(db, prefix)
|
|
|
|
elif cmd == 'exezips':
|
|
prefix = args[0] + ": " if len(args) > 0 else ""
|
|
zips(db, prefix, 'exe')
|
|
|
|
elif cmd == 'exes':
|
|
prefix = args[0] + ": " if len(args) > 0 else ""
|
|
exes(db, prefix)
|
|
|
|
elif cmd == 'dir':
|
|
d = args[0]
|
|
if d.endswith('/'):
|
|
d = d[:-1]
|
|
zipf = os.path.join(DBDIR, safefilename(os.path.basename(d)) + '.zip')
|
|
zipdir(d, zipf)
|
|
spec = {'upload': zipf}
|
|
gen_metadata(spec, d)
|
|
if len(args) > 1:
|
|
spec['id'] = args[1]
|
|
fixpromptsync(spec, db)
|
|
|
|
elif cmd == 'dirs':
|
|
for d in os.listdir('.'):
|
|
if os.path.isdir(d):
|
|
zipf = os.path.join(DBDIR, safefilename(os.path.basename(d)) + '.zip')
|
|
zipdir(d, zipf)
|
|
spec = {'upload': zipf}
|
|
gen_metadata(spec, d)
|
|
fixpromptsync(spec, db)
|
|
|
|
elif cmd == 'installers':
|
|
try:
|
|
installers(db)
|
|
except (KeyboardInterrupt, SystemExit):
|
|
pass
|
|
savedb(db)
|
|
|
|
elif cmd == 'batchfix':
|
|
batchfix(db)
|
|
|
|
elif cmd == 'setstatus':
|
|
updatestatus(db, args[0], args[1])
|
|
|
|
elif cmd == 'clearreshoot':
|
|
clearreshoot(db)
|
|
|
|
elif cmd == 'fixext':
|
|
for id, val in db.iteritems():
|
|
if val.get('status') == 'unknown':
|
|
item = internetarchive.get_item(id)
|
|
zipfile = item.get_files(formats='ZIP')[0]
|
|
md = item.metadata.get('metadata', {})
|
|
print "checking", id, zipfile.name, md.get('emulator_ext')
|
|
if os.path.splitext(zipfile.name)[1] == '.ZIP' and md.get('emulator_ext') == 'zip':
|
|
print "fixing", id
|
|
spec = {'id': id, 'emulator_ext': 'ZIP'}
|
|
sync(spec)
|
|
|
|
elif cmd == 'reinstall':
|
|
reinstall(args[0], db)
|
|
savedb(db)
|
|
|
|
elif cmd == 'serve':
|
|
serve(db)
|
|
|
|
if __name__ == '__main__':
|
|
runcmd(loaddb(), sys.argv[1], sys.argv[2:])
|
|
|