From ec1bc42d840aad2b8aa676b88c272eebe2d83da9 Mon Sep 17 00:00:00 2001 From: Jeremy Penner Date: Sat, 24 Aug 2024 01:05:23 -0400 Subject: [PATCH] spoken dialog audio dumper --- README.md | 2 ++ dumpspeech.py | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 dumpspeech.py diff --git a/README.md b/README.md index be64c6d..b39a209 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,8 @@ As soon as I read this, I had to know more. `word-frequency-analysis.txt` lists the number of times words were either added or removed from a line. This allows us to see that, for example, the word "gnarly" was added to two lines in the US release, but removed from two other lines. +`dumpspeech.py` is a Python script that reads `norm.exe` and `digi/sound.raw` (not included in this repository) and generates .wav files for each line of dialogue, with filenames that correspond to line numbers in `lang.dat`. + ## Preliminary Findings The writer in both releases is credited as [Ade Carless](https://www.mobygames.com/person/5209/adrian-carless/). However, the US release has an additional credit for "Additional Script Writing", attributed to [Dennis M. Miller](https://www.mobygames.com/person/183890/dennis-m-miller/), who is also credited as the US producer. This suggests to me that he is the person primarily responsible for the changes to the US script. If anyone wanted to interview someone to get the full scoop, he would be the guy. diff --git a/dumpspeech.py b/dumpspeech.py new file mode 100644 index 0000000..cad3160 --- /dev/null +++ b/dumpspeech.py @@ -0,0 +1,40 @@ +import hashlib +import os +import struct +import wave + +SPEECH_TABLES = { + # US norm.exe + "c40f3d32d1d4e11c97f8c960e6011495566fb9f7e61b437b82dbe49194edbb7a": 0xbd978, + # UK norm.exe, from https://archive.org/details/NormalityUKDOS + "49dcc2085369106113cabe1319046272827817bedb30b1765d42be8e5d1ce3f7": 0xbd8d0 +} +LINE_COUNT = 6501 + +def hashfile(filename): + with open(filename, 'rb') as f: + m = hashlib.sha256() + m.update(f.read()) + return m.hexdigest() + +def dumpspeech(exefilename, speechfilename, outputdir): + os.makedirs(outputdir, exist_ok=True) + table_offset = SPEECH_TABLES.get(hashfile(exefilename)) + if table_offset is None: + raise Exception("Unrecognized norm.exe, sorry!") + with open(exefilename, 'rb') as f: + with open(speechfilename, 'rb') as s: + f.seek(table_offset) + for _ in range(LINE_COUNT): + (line, offset, size) = struct.unpack("= 0 and offset >= 0 and size > 1: + s.seek(offset) + with wave.open(os.path.join(outputdir, f'{line}.wav'), 'wb') as w: + w.setnchannels(1) + w.setsampwidth(1) + w.setframerate(11025) + w.writeframes(s.read(size)) + +dumpspeech("norm.exe.us", "sound.raw.us", "lines-us") +dumpspeech("norm.exe.uk", "sound.raw.uk", "lines-uk") +