-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathferup_genIG.py
More file actions
72 lines (60 loc) · 2.24 KB
/
Copy pathferup_genIG.py
File metadata and controls
72 lines (60 loc) · 2.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#-*- coding:utf-8 -*-
#!\usr\bin\env py3
import os
import re
import codecs
"""
Program: ferup_genIG.py
Function: generate IG tree model
Author: Junjie Pan (junjie.pan@nuance.com)
"""
class genIGTR(object):
'''
'''
def __init__(self, options, logger):
os.system("chcp 65001")
self.logger = logger
self.options = options
def process(self):
prefix = "{\n\tstring lhplus;\n\tstring char0;\n}\n%% forcing_rules\n"
suffix = "%% data\nNOMATCH\t=\n"
if not os.path.exists(self.options['fout']):
os.mkdir(self.options['fout'])
else:
flag = input("%s exists, new model will replace old one. Continue? (y/n)"%self.options['fout'])
if flag.lower() not in ['y','yes']:
exit(0)
f = self.options['default']
with codecs.open(f, 'r', 'utf-8') as fi:
for line in fi:
orth, _hex, lhp = line.strip().split()
self.logger.info("Process: %s(%s)"%(orth,_hex))
fn = os.path.join(self.options['fout'], "%s.txt"%_hex)
with codecs.open(fn, 'w', 'utf-8') as fo:
fo.write(prefix)
fo.write("%s\t%s\n"%(lhp, orth))
fo.write(suffix)
if __name__ == '__main__':
import time
import logging
from argparse import ArgumentParser
parser = ArgumentParser(description='ferup_genIG')
parser.add_argument("--version", action="version", version="ferup_genIG 1.0")
parser.add_argument(action="store", dest="default", default="", help='homograph default phone lists')
parser.add_argument(action="store", dest="fout", default="ig", help='bad word list')
parser.add_argument("-d", "--dict", action="store", dest="dct", default="", help='dictionary')
args = parser.parse_args()
options = vars(args)
logger = logging.getLogger()
formatter = logging.Formatter('[%(asctime)s][*%(levelname)s*][%(filename)s:%(lineno)d|%(funcName)s] - %(message)s', '%Y%m%d-%H:%M:%S')
file_handler = logging.FileHandler('LOG-ferup_genIG.txt', 'w', 'utf-8')
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
stream_handler = logging.StreamHandler()
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
logger.setLevel(logging.INFO)
allStartTP = time.time()
appInst = genIGTR(options, logger)
appInst.process()
allEndTP = time.time()