import re reEnt = re.compile(r"^.*&#(\d+);.*$"); mpch_entity = [0] with open("cp437_html.txt") as fp: for line in fp.readlines(): ent = reEnt.match(line).group(1) mpch_entity.append(int(ent)) print "mpch_entity = ", mpch_entity