Initial commit
This commit is contained in:
commit
90ec2d25b5
50
README.md
Normal file
50
README.md
Normal file
|
@ -0,0 +1,50 @@
|
|||
# Discworld Noir audio extraction script
|
||||
|
||||
This is a quick hack I threw together to extract the audio for the "Righty ho" easter egg from Discworld Noir.
|
||||
You could extend it to extract other audio, if you like. It has no interface; if you want the script to do
|
||||
something different than it currently does, scroll to the bottom of the file and change it to call different functions.
|
||||
|
||||
This code should work for other language files as well, and probably will work for the earlier Discworld games,
|
||||
but I haven't tested any of that. But if you've always wanted a robot that constantly says "That doesn't work"
|
||||
in Eric Idle's voice, you could probably tweak this script to achieve that dream.
|
||||
|
||||
There are three important files that it needs:
|
||||
|
||||
* `english.txt` - contains all the text in the script (including notes for translators and voice actors!)
|
||||
* `english.smp` - contains all of the voice lines and sound effects in compressed .mp2 format
|
||||
* `english.idx` - maps from lines of text in the `.txt` file to offsets in the `.smp` file
|
||||
|
||||
`english.txt` and `english.idx` are included in this repo because it's kind of fun to poke around in them,
|
||||
and they're small. The script in its current form parses all of the text and prints out a list of lines, along
|
||||
with the offsets into `english.smp` that you'd need to extract the sound for those lines. You'll have to
|
||||
find your own copy of `english.smp` and modify the script if you want to try to actually rip the audio.
|
||||
|
||||
# Fun voice acting directions hidden in `english.txt`
|
||||
|
||||
* in an over-the-top nautical fashion
|
||||
* Columbo-style acting dumb
|
||||
* lapsing into rather bad gangster-speak
|
||||
* confusion melting into insane realisation
|
||||
* with non-sexual innuendo
|
||||
* Hard boiled monologue - after being turned into a toad
|
||||
* Important: to speak Gable's part you must keep your mouth rigidly open - possibly by putting a closed fist in your mouth. Nothing you will say will be intelligible: don't worry about it. That's just the way gargoyles speak. The subtitles will convey meaning.
|
||||
* dogmatically, if you'll excuse the pun
|
||||
* last bit as a Shakespearean Bugs Bunny
|
||||
|
||||
# License
|
||||
|
||||
```
|
||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||
Version 2, December 2004
|
||||
|
||||
Copyright (C) 2004 Sam Hocevar <sam@hocevar.net>
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim or modified
|
||||
copies of this license document, and changing it is allowed as long
|
||||
as the name is changed.
|
||||
|
||||
DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. You just DO WHAT THE FUCK YOU WANT TO.
|
||||
```
|
BIN
english.idx
Normal file
BIN
english.idx
Normal file
Binary file not shown.
BIN
english.txt
Normal file
BIN
english.txt
Normal file
Binary file not shown.
95
extract.py
Normal file
95
extract.py
Normal file
|
@ -0,0 +1,95 @@
|
|||
# file format notes:
|
||||
# https://web.archive.org/web/20130904192356/http://rewiki.regengedanken.de/wiki/.TXT_(Discworld)
|
||||
# https://web.archive.org/web/20130903115000/http://rewiki.regengedanken.de/wiki/.SCN
|
||||
# https://web.archive.org/web/20080331031010/http://rewiki.regengedanken.de/wiki/.IDX
|
||||
# https://web.archive.org/web/20160319050238/http://rewiki.regengedanken.de/wiki/.SMP
|
||||
|
||||
import os
|
||||
import pprint
|
||||
from struct import unpack, calcsize
|
||||
|
||||
def read(f, format):
|
||||
"Read some bytes from the file `f` and unpack them according to `format`."
|
||||
buf = f.read(calcsize(format))
|
||||
return unpack(format, buf)
|
||||
|
||||
def dialogue_chunk(f, size):
|
||||
result = ["dialogue"]
|
||||
for i in range(64):
|
||||
# looks like there is a variable-length encoding for dialogue length, not documented on the wiki.
|
||||
# if the high-bit is set, the length is encoded in two bytes.
|
||||
# most of the time, this ends up looking like `80 CF` - a one-byte value encoded in two bytes.
|
||||
# AFAICT there is only one line in english.txt that is longer than 255 bytes - it looks like `90 02`
|
||||
# and refers to a piece of text 0x102 bytes long.
|
||||
# Therefore, if the length byte is >= 0x80, we assume the bottom four bits are 0, the top bit is ignored,
|
||||
# and the 3 bits in between are used as the high portion of an 11-bit unsigned integer, with the lower 8 bits
|
||||
# following.
|
||||
length = read(f, "B")[0]
|
||||
if length >= 0x80:
|
||||
assert((length & 0x0f) == 0)
|
||||
length = ((length & 0x7f) << 4) | read(f, "B")[0]
|
||||
entry = f.read(length)
|
||||
|
||||
# the dialogue in english.txt appears to be in latin-1 (the accented 'e' in "Café Ankh" is encoded as 0xe9)
|
||||
result.append(entry.decode("latin-1"))
|
||||
|
||||
return result
|
||||
|
||||
chunk_decoders = {
|
||||
0x0001: dialogue_chunk
|
||||
}
|
||||
|
||||
def decode_chunks(f):
|
||||
chunks = []
|
||||
filesize = os.fstat(f.fileno()).st_size
|
||||
while True:
|
||||
(chunktype, magic, next_offset) = read(f, "<HHI")
|
||||
assert(magic == 0x3334)
|
||||
chunksize = next_offset - f.tell()
|
||||
decoder = chunk_decoders.get(chunktype)
|
||||
if next_offset == 0:
|
||||
break
|
||||
if decoder:
|
||||
chunks.append(decoder(f, chunksize))
|
||||
else:
|
||||
chunks.append(["unknown", chunktype, f.read(chunksize)])
|
||||
if next_offset < f.tell() or next_offset >= filesize:
|
||||
break
|
||||
f.seek(next_offset, os.SEEK_SET)
|
||||
return chunks
|
||||
|
||||
def read_dialogue(filename):
|
||||
with open(filename, "rb") as f:
|
||||
return decode_chunks(f)
|
||||
|
||||
def read_speechindex(filename):
|
||||
index = []
|
||||
with open(filename, "rb") as f:
|
||||
while True:
|
||||
try:
|
||||
index.append(read(f, "<I")[0])
|
||||
except:
|
||||
break
|
||||
return index
|
||||
|
||||
def link_speech(chunks, index):
|
||||
spoken_dialogue = []
|
||||
i = 0
|
||||
for chunk in chunks:
|
||||
if chunk[0] == "dialogue":
|
||||
for line in chunk[1:]:
|
||||
if line != '' or index[i] != 0:
|
||||
spoken_dialogue.append((line, index[i]))
|
||||
i += 1
|
||||
return spoken_dialogue
|
||||
|
||||
def extract_speech(filename, filename_out, offset):
|
||||
with open(filename, "rb") as inf:
|
||||
with open(filename_out, "wb") as outf:
|
||||
inf.seek(offset, os.SEEK_SET)
|
||||
(unknown, length) = read(inf, "<II")
|
||||
outf.write(inf.read(length))
|
||||
|
||||
pprint.pp(link_speech(read_dialogue("english.txt"), read_speechindex("english.idx")))
|
||||
|
||||
# extract_speech("english.smp", "rightyho.mp2", 115990421)
|
BIN
rightyho.mp2
Normal file
BIN
rightyho.mp2
Normal file
Binary file not shown.
Loading…
Reference in a new issue