96 lines
3.4 KiB
Python
96 lines
3.4 KiB
Python
# file format notes:
|
|
# https://web.archive.org/web/20130904192356/http://rewiki.regengedanken.de/wiki/.TXT_(Discworld)
|
|
# https://web.archive.org/web/20130903115000/http://rewiki.regengedanken.de/wiki/.SCN
|
|
# https://web.archive.org/web/20080331031010/http://rewiki.regengedanken.de/wiki/.IDX
|
|
# https://web.archive.org/web/20160319050238/http://rewiki.regengedanken.de/wiki/.SMP
|
|
|
|
import os
|
|
import pprint
|
|
from struct import unpack, calcsize
|
|
|
|
def read(f, format):
|
|
"Read some bytes from the file `f` and unpack them according to `format`."
|
|
buf = f.read(calcsize(format))
|
|
return unpack(format, buf)
|
|
|
|
def dialogue_chunk(f, size):
|
|
result = ["dialogue"]
|
|
for i in range(64):
|
|
# looks like there is a variable-length encoding for dialogue length, not documented on the wiki.
|
|
# if the high-bit is set, the length is encoded in two bytes.
|
|
# most of the time, this ends up looking like `80 CF` - a one-byte value encoded in two bytes.
|
|
# AFAICT there is only one line in english.txt that is longer than 255 bytes - it looks like `90 02`
|
|
# and refers to a piece of text 0x102 bytes long.
|
|
# Therefore, if the length byte is >= 0x80, we assume the bottom four bits are 0, the top bit is ignored,
|
|
# and the 3 bits in between are used as the high portion of an 11-bit unsigned integer, with the lower 8 bits
|
|
# following.
|
|
length = read(f, "B")[0]
|
|
if length >= 0x80:
|
|
assert((length & 0x0f) == 0)
|
|
length = ((length & 0x7f) << 4) | read(f, "B")[0]
|
|
entry = f.read(length)
|
|
|
|
# the dialogue in english.txt appears to be in latin-1 (the accented 'e' in "Café Ankh" is encoded as 0xe9)
|
|
result.append(entry.decode("latin-1"))
|
|
|
|
return result
|
|
|
|
chunk_decoders = {
|
|
0x0001: dialogue_chunk
|
|
}
|
|
|
|
def decode_chunks(f):
|
|
chunks = []
|
|
filesize = os.fstat(f.fileno()).st_size
|
|
while True:
|
|
(chunktype, magic, next_offset) = read(f, "<HHI")
|
|
assert(magic == 0x3334)
|
|
chunksize = next_offset - f.tell()
|
|
decoder = chunk_decoders.get(chunktype)
|
|
if next_offset == 0:
|
|
break
|
|
if decoder:
|
|
chunks.append(decoder(f, chunksize))
|
|
else:
|
|
chunks.append(["unknown", chunktype, f.read(chunksize)])
|
|
if next_offset < f.tell() or next_offset >= filesize:
|
|
break
|
|
f.seek(next_offset, os.SEEK_SET)
|
|
return chunks
|
|
|
|
def read_dialogue(filename):
|
|
with open(filename, "rb") as f:
|
|
return decode_chunks(f)
|
|
|
|
def read_speechindex(filename):
|
|
index = []
|
|
with open(filename, "rb") as f:
|
|
while True:
|
|
try:
|
|
index.append(read(f, "<I")[0])
|
|
except:
|
|
break
|
|
return index
|
|
|
|
def link_speech(chunks, index):
|
|
spoken_dialogue = []
|
|
i = 0
|
|
for chunk in chunks:
|
|
if chunk[0] == "dialogue":
|
|
for line in chunk[1:]:
|
|
if line != '' or index[i] != 0:
|
|
spoken_dialogue.append((line, index[i]))
|
|
i += 1
|
|
return spoken_dialogue
|
|
|
|
def extract_speech(filename, filename_out, offset):
|
|
with open(filename, "rb") as inf:
|
|
with open(filename_out, "wb") as outf:
|
|
inf.seek(offset, os.SEEK_SET)
|
|
(unknown, length) = read(inf, "<II")
|
|
outf.write(inf.read(length))
|
|
|
|
pprint.pp(link_speech(read_dialogue("english.txt"), read_speechindex("english.idx")))
|
|
|
|
# extract_speech("english.smp", "rightyho.mp2", 115990421)
|