spoken dialog audio dumper

This commit is contained in:
Jeremy Penner 2024-08-24 01:05:23 -04:00
parent 512453d748
commit ec1bc42d84
2 changed files with 42 additions and 0 deletions

View file

@ -27,6 +27,8 @@ As soon as I read this, I had to know more.
`word-frequency-analysis.txt` lists the number of times words were either added or removed from a line. This allows us to see that, for example, the word "gnarly" was added to two lines in the US release, but removed from two other lines.
`dumpspeech.py` is a Python script that reads `norm.exe` and `digi/sound.raw` (not included in this repository) and generates .wav files for each line of dialogue, with filenames that correspond to line numbers in `lang.dat`.
## Preliminary Findings
The writer in both releases is credited as [Ade Carless](https://www.mobygames.com/person/5209/adrian-carless/). However, the US release has an additional credit for "Additional Script Writing", attributed to [Dennis M. Miller](https://www.mobygames.com/person/183890/dennis-m-miller/), who is also credited as the US producer. This suggests to me that he is the person primarily responsible for the changes to the US script. If anyone wanted to interview someone to get the full scoop, he would be the guy.

40
dumpspeech.py Normal file
View file

@ -0,0 +1,40 @@
import hashlib
import os
import struct
import wave
SPEECH_TABLES = {
# US norm.exe
"c40f3d32d1d4e11c97f8c960e6011495566fb9f7e61b437b82dbe49194edbb7a": 0xbd978,
# UK norm.exe, from https://archive.org/details/NormalityUKDOS
"49dcc2085369106113cabe1319046272827817bedb30b1765d42be8e5d1ce3f7": 0xbd8d0
}
LINE_COUNT = 6501
def hashfile(filename):
with open(filename, 'rb') as f:
m = hashlib.sha256()
m.update(f.read())
return m.hexdigest()
def dumpspeech(exefilename, speechfilename, outputdir):
os.makedirs(outputdir, exist_ok=True)
table_offset = SPEECH_TABLES.get(hashfile(exefilename))
if table_offset is None:
raise Exception("Unrecognized norm.exe, sorry!")
with open(exefilename, 'rb') as f:
with open(speechfilename, 'rb') as s:
f.seek(table_offset)
for _ in range(LINE_COUNT):
(line, offset, size) = struct.unpack("<iii", f.read(12))
if line >= 0 and offset >= 0 and size > 1:
s.seek(offset)
with wave.open(os.path.join(outputdir, f'{line}.wav'), 'wb') as w:
w.setnchannels(1)
w.setsampwidth(1)
w.setframerate(11025)
w.writeframes(s.read(size))
dumpspeech("norm.exe.us", "sound.raw.us", "lines-us")
dumpspeech("norm.exe.uk", "sound.raw.uk", "lines-uk")