-
-
Notifications
You must be signed in to change notification settings - Fork 127
/
Copy pathcli.py
104 lines (82 loc) · 3.02 KB
/
cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import argparse
import io
import os
import shutil
import sys
import mammoth
from . import writers
def main():
args = _parse_args()
if args.style_map is None:
style_map = None
else:
with open(args.style_map) as style_map_fileobj:
style_map = style_map_fileobj.read()
with open(args.path, "rb") as docx_fileobj:
if args.output_dir is None:
convert_image = None
output_path = args.output
else:
convert_image = mammoth.images.img_element(ImageWriter(args.output_dir))
output_filename = "{0}.html".format(os.path.basename(args.path).rpartition(".")[0])
output_path = os.path.join(args.output_dir, output_filename)
result = mammoth.convert(
docx_fileobj,
style_map=style_map,
convert_image=convert_image,
output_format=args.output_format,
)
for message in result.messages:
sys.stderr.write(message.message)
sys.stderr.write("\n")
_write_output(output_path, result.value)
class ImageWriter(object):
def __init__(self, output_dir):
self._output_dir = output_dir
self._image_number = 1
def __call__(self, element):
extension = element.content_type.partition("/")[2]
image_filename = "{0}.{1}".format(self._image_number, extension)
with open(os.path.join(self._output_dir, image_filename), "wb") as image_dest:
with element.open() as image_source:
shutil.copyfileobj(image_source, image_dest)
self._image_number += 1
return {"src": image_filename}
def _write_output(path, contents):
if path is None:
if sys.version_info[0] <= 2:
stdout = sys.stdout
else:
stdout = sys.stdout.buffer
stdout.write(contents.encode("utf-8"))
stdout.flush()
else:
with io.open(path, "w", encoding="utf-8") as fileobj:
fileobj.write(contents)
def _parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"path",
metavar="docx-path",
help="Path to the .docx file to convert.")
output_group = parser.add_mutually_exclusive_group()
output_group.add_argument(
"output",
nargs="?",
metavar="output-path",
help="Output path for the generated document. Images will be stored inline in the output document. Output is written to stdout if not set.")
output_group.add_argument(
"--output-dir",
help="Output directory for generated HTML and images. Images will be stored in separate files. Mutually exclusive with output-path.")
parser.add_argument(
"--output-format",
required=False,
choices=writers.formats(),
help="Output format.")
parser.add_argument(
"--style-map",
required=False,
help="File containg a style map.")
return parser.parse_args()
if __name__ == "__main__":
main()