-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathLMProcessor.py
More file actions
51 lines (36 loc) · 1.22 KB
/
LMProcessor.py
File metadata and controls
51 lines (36 loc) · 1.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def dict_preprocess():
dict_file = open('dict_1.8w.txt', 'r')
lines = dict_file.readlines()
outputs = []
for line in lines:
if line.__contains__(' / ') is False:
continue
outputs.append('%s\n' % line.split(' / ')[0])
outputs.sort()
with open('refer_dict.txt', 'w') as refer_dict:
refer_dict.writelines(outputs)
print('*** DONE ***')
def dict_process():
dict_file = open('cmudict-en-us.dict', 'r')
lines = dict_file.readlines()
refer_dict = open('refer_dict.txt', 'r')
words = refer_dict.readlines()
outputs = []
latestOutputFirstLetter = ''
for line in lines:
item = line.split('(')[0].split(' ')[0]
if latestOutputFirstLetter != item[0]:
latestOutputFirstLetter = item[0]
print('%s %d' % (item, outputs.__len__()))
for word in words:
if item.lower() == word.lower().strip('\n'):
# print(line)
outputs.append(line)
break
with open('cmudict-en-us-pruned.dict', 'w') as output_dict:
output_dict.writelines(outputs)
print('*** DONE ***')
def prune_language_model():
pass
if __name__ == '__main__':
dict_process()