forked from drgulevich/gtexfix
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathto.py
executable file
·127 lines (116 loc) · 4.5 KB
/
to.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/usr/bin/env python
#-----------------------------------------
# Google translate fix for LaTeX documents
# Copyright (c) Dmitry R. Gulevich 2020
# GNU General Public License v3.0
#-----------------------------------------
import re
import sys
import pickle
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('filename')
args = parser.parse_args()
if(re.search('.tex$',args.filename)==None):
sys.exit('The input should be .tex file. Exit.')
print('LaTeX file:',args.filename)
with open(args.filename, 'r') as source_file:
source = source_file.read()
### Search for possible token conflicts
conflicts=re.findall('\[ *[012][\.\,][0-9]+\]',source)
if(conflicts!=[]):
print('Token conflicts detected: ',conflicts)
sys.exit('Tokens may overlap with the content. Change tokens or remove the source of conflict.')
else:
print('No token conflicts detected. Proceeding.')
### Hide everything that is beyond \begin{document} ... \end{document}
latex=[]
bdoc=re.search(r'\\begin{document}',source)
edoc=re.search(r'\\end{document}',source)
if(bdoc!=None):
preamble=source[:bdoc.end()]
latex.append(preamble)
if(edoc!=None):
text = '[1.0]' + source[bdoc.end():edoc.start()]
postamble=source[edoc.start():]
else:
text = '[1.0]' + source[bdoc.end():]
postamble=[]
else:
text=source
postamble=[]
### Hide all comments
recomment = re.compile(r'(?<!\\)[%].*')
comments=[]
for m in recomment.finditer(text):
comments.append(m.group())
ncomment=0
def repl_comment(obj):
global ncomment
ncomment += 1
return '___GTEXFIXCOMMENT%d___'%(ncomment-1)
text=recomment.sub(repl_comment,text)
with open('gtexfix_comments', 'wb') as fp:
pickle.dump(comments, fp)
### Hide LaTeX constructs \begin{...} ... \end{...}
start_values=[]
end_values=[]
for m in re.finditer(r'\\begin{ *equation\** *}|\\begin{ *figure\** *}|\\begin{ *eqnarray\** *}|\\begin{ *multline\** *}'
+r'|\\begin{ *thebibliography *}|\\begin{ *verbatim\** *}|\\begin{ *table\** *}|\\begin{ *subequations\** *}|\\begin{ *align\** *}'
+r'|\\begin{ *displaymath\** *}|\\begin{ *gather\** *}|\\\[',text):
start_values.append(m.start())
for m in re.finditer(r'\\end{ *equation\** *}|\\end{ *figure\** *}|\\end{ *eqnarray\** *}|\\end{ *multline\** *}'
+r'|\\end{ *thebibliography *}|\\end{ *verbatim\** *}|\\end{ *table\** *}|\\end{ *subequations\** *}|\\end{ *align\** *}'
+r'|\\end{ *displaymath\** *}|\\end{ *gather\** *}|\\\]',text):
end_values.append(m.end())
nitems=len(start_values)
assert(len(end_values)==nitems)
if(nitems>0):
newtext=text[:start_values[0]]
for neq in range(nitems-1):
latex.append(text[start_values[neq]:end_values[neq]])
newtext += '[1.%d]'%(len(latex)-1) + text[end_values[neq]:start_values[neq+1]]
latex.append(text[start_values[nitems-1]:end_values[nitems-1]])
newtext += '[1.%d]'%(len(latex)-1) + text[end_values[nitems-1]:]
text=newtext
if(postamble!=[]):
latex.append(postamble)
text += '[1.%d]'%(len(latex)-1)
with open('gtexfix_latex', 'wb') as fp:
pickle.dump(latex, fp)
### Replace LaTeX commands, formulas and comments by tokens
# Regular expression r'(\$+)(?:(?!\1)[\s\S])*\1' for treatment of $...$ and $$...$$ from:
# https://stackoverflow.com/questions/54663900/how-to-use-regular-expression-to-remove-all-math-expression-in-latex-file
recommand = re.compile(r'___GTEXFIXCOMMENT[0-9]*___|\\title|\\chapter\**|\\section\**|\\subsection\**|\\subsubsection\**|~*\\footnote[0-9]*|(\$+)(?:(?!\1)[\s\S])*\1|~*\\\w*\s*{[^}]*}\s*{[^}]*}|~*\\\w*\s*{[^}]*}|~*\\\w*')
commands=[]
for m in recommand.finditer(text):
commands.append(m.group())
nc=0
def repl_f(obj):
global nc
nc += 1
return '[2.%d]'%(nc-1)
text=recommand.sub(repl_f,text)
with open('gtexfix_commands', 'wb') as fp:
pickle.dump(commands, fp)
### Save the processed output to .txt file
limit=30000 # Estimated Google Translate character limit
filebase = re.sub('.tex$','',args.filename)
start=0
npart=0
for m in re.finditer(r'\.\n',text):
if(m.end()-start<limit):
end=m.end()
else:
output_filename = filebase+'_%d.txt'%npart
npart+=1
with open(output_filename, 'w') as txt_file:
txt_file.write(text[start:end])
print('Output file:',output_filename)
start=end
end=m.end()
output_filename = filebase+'_%d.txt'%npart
with open(output_filename, 'w') as txt_file:
txt_file.write(text[start:])
print('Output file:',output_filename)
print('Supply the output file(s) to Google Translate')