spoken dialog audio dumper

2024-08-24 01:05:23 -04:00 · 2024-08-24 01:05:23 -04:00 · ec1bc42d84
parent 512453d748
commit ec1bc42d84
2 changed files with 42 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -27,6 +27,8 @@ As soon as I read this, I had to know more.

 `word-frequency-analysis.txt` lists the number of times words were either added or removed from a line. This allows us to see that, for example, the word "gnarly" was added to two lines in the US release, but removed from two other lines.

+`dumpspeech.py` is a Python script that reads `norm.exe` and `digi/sound.raw` (not included in this repository) and generates .wav files for each line of dialogue, with filenames that correspond to line numbers in `lang.dat`.
+
 ## Preliminary Findings

 The writer in both releases is credited as [Ade Carless](https://www.mobygames.com/person/5209/adrian-carless/). However, the US release has an additional credit for "Additional Script Writing", attributed to [Dennis M. Miller](https://www.mobygames.com/person/183890/dennis-m-miller/), who is also credited as the US producer. This suggests to me that he is the person primarily responsible for the changes to the US script. If anyone wanted to interview someone to get the full scoop, he would be the guy.
--- a/dumpspeech.py
+++ b/dumpspeech.py
@ -0,0 +1,40 @@
+import hashlib
+import os
+import struct
+import wave
+
+SPEECH_TABLES = {
+    # US norm.exe
+    "c40f3d32d1d4e11c97f8c960e6011495566fb9f7e61b437b82dbe49194edbb7a": 0xbd978,
+    # UK norm.exe, from https://archive.org/details/NormalityUKDOS
+    "49dcc2085369106113cabe1319046272827817bedb30b1765d42be8e5d1ce3f7": 0xbd8d0
+}
+LINE_COUNT = 6501
+
+def hashfile(filename):
+    with open(filename, 'rb') as f:
+        m = hashlib.sha256()
+        m.update(f.read())
+        return m.hexdigest()
+
+def dumpspeech(exefilename, speechfilename, outputdir):
+    os.makedirs(outputdir, exist_ok=True)
+    table_offset = SPEECH_TABLES.get(hashfile(exefilename))
+    if table_offset is None:
+        raise Exception("Unrecognized norm.exe, sorry!")
+    with open(exefilename, 'rb') as f:
+        with open(speechfilename, 'rb') as s:
+            f.seek(table_offset)
+            for _ in range(LINE_COUNT):
+                (line, offset, size) = struct.unpack("<iii", f.read(12))
+                if line >= 0 and offset >= 0 and size > 1:
+                    s.seek(offset)
+                    with wave.open(os.path.join(outputdir, f'{line}.wav'), 'wb') as w:
+                        w.setnchannels(1)
+                        w.setsampwidth(1)
+                        w.setframerate(11025)
+                        w.writeframes(s.read(size))
+
+dumpspeech("norm.exe.us", "sound.raw.us", "lines-us")
+dumpspeech("norm.exe.uk", "sound.raw.uk", "lines-uk")
+