diff --git a/README.md b/README.md index be64c6d..b39a209 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,8 @@ As soon as I read this, I had to know more. `word-frequency-analysis.txt` lists the number of times words were either added or removed from a line. This allows us to see that, for example, the word "gnarly" was added to two lines in the US release, but removed from two other lines. +`dumpspeech.py` is a Python script that reads `norm.exe` and `digi/sound.raw` (not included in this repository) and generates .wav files for each line of dialogue, with filenames that correspond to line numbers in `lang.dat`. + ## Preliminary Findings The writer in both releases is credited as [Ade Carless](https://www.mobygames.com/person/5209/adrian-carless/). However, the US release has an additional credit for "Additional Script Writing", attributed to [Dennis M. Miller](https://www.mobygames.com/person/183890/dennis-m-miller/), who is also credited as the US producer. This suggests to me that he is the person primarily responsible for the changes to the US script. If anyone wanted to interview someone to get the full scoop, he would be the guy. diff --git a/dumpspeech.py b/dumpspeech.py new file mode 100644 index 0000000..cad3160 --- /dev/null +++ b/dumpspeech.py @@ -0,0 +1,40 @@ +import hashlib +import os +import struct +import wave + +SPEECH_TABLES = { + # US norm.exe + "c40f3d32d1d4e11c97f8c960e6011495566fb9f7e61b437b82dbe49194edbb7a": 0xbd978, + # UK norm.exe, from https://archive.org/details/NormalityUKDOS + "49dcc2085369106113cabe1319046272827817bedb30b1765d42be8e5d1ce3f7": 0xbd8d0 +} +LINE_COUNT = 6501 + +def hashfile(filename): + with open(filename, 'rb') as f: + m = hashlib.sha256() + m.update(f.read()) + return m.hexdigest() + +def dumpspeech(exefilename, speechfilename, outputdir): + os.makedirs(outputdir, exist_ok=True) + table_offset = SPEECH_TABLES.get(hashfile(exefilename)) + if table_offset is None: + raise Exception("Unrecognized norm.exe, sorry!") + with open(exefilename, 'rb') as f: + with open(speechfilename, 'rb') as s: + f.seek(table_offset) + for _ in range(LINE_COUNT): + (line, offset, size) = struct.unpack("= 0 and offset >= 0 and size > 1: + s.seek(offset) + with wave.open(os.path.join(outputdir, f'{line}.wav'), 'wb') as w: + w.setnchannels(1) + w.setsampwidth(1) + w.setframerate(11025) + w.writeframes(s.read(size)) + +dumpspeech("norm.exe.us", "sound.raw.us", "lines-us") +dumpspeech("norm.exe.uk", "sound.raw.uk", "lines-uk") +