Skip to content

Commit 7ea711a

Browse files
authored
Merge pull request #806 from sillsdev/quotation_denormalization_with_machine
Quotation denormalization using Machine.py
2 parents a221510 + 276f182 commit 7ea711a

File tree

11 files changed

+839
-454
lines changed

11 files changed

+839
-454
lines changed

poetry.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ tqdm = "^4.62.2"
7070
sacrebleu = "^2.3.1"
7171
ctranslate2 = "^3.5.1"
7272
libclang = "14.0.6"
73-
sil-machine = {extras = ["thot"], version = "1.7.4"}
73+
sil-machine = {extras = ["thot"], version = "1.8.2"}
7474
datasets = "^2.7.1"
7575
torch = {version = "^2.4", source = "torch"}
7676
sacremoses = "^0.0.53"

silnlp/common/postprocess_draft.py

Lines changed: 43 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44

55
from ..nmt.clearml_connection import SILClearML
66
from ..nmt.config_utils import load_config
7-
from ..nmt.postprocess import get_draft_paths_from_exp, postprocess_draft, postprocess_experiment
8-
from .paratext import get_project_dir
7+
from ..nmt.postprocess import postprocess_experiment
98
from .postprocesser import PostprocessConfig, PostprocessHandler
109
from .utils import get_mt_exp_dir
1110

@@ -19,28 +18,11 @@ def main() -> None:
1918
)
2019
parser.add_argument(
2120
"--experiment",
21+
required=True,
2222
default=None,
2323
help="Name of an experiment directory in MT/experiments. \
2424
If this option is used, the experiment's translate config will be used to find source and draft files.",
2525
)
26-
parser.add_argument(
27-
"--source",
28-
default=None,
29-
help="Path of the source USFM file. \
30-
If in a Paratext project, the project settings will be used when reading the files.",
31-
)
32-
parser.add_argument(
33-
"--draft",
34-
default=None,
35-
help="Path of the draft USFM file that postprocessing will be applied to. \
36-
Must have the exact same USFM structure as 'source', which it will if it is a draft from that source.",
37-
)
38-
parser.add_argument(
39-
"--book",
40-
default=None,
41-
help="3-letter book id of book being evaluated, e.g. MAT. \
42-
Only necessary if the source file is not in a Paratext project directory.",
43-
)
4426
parser.add_argument(
4527
"--output-folder",
4628
default=None,
@@ -63,6 +45,30 @@ def main() -> None:
6345
action="store_true",
6446
help="Carry over embeds from the source project to the output without translating them",
6547
)
48+
parser.add_argument(
49+
"--denormalize-quotation-marks",
50+
default=False,
51+
action="store_true",
52+
help="For files in USFM format, attempt to change the draft's quotation marks to match the target project's quote convention",
53+
)
54+
parser.add_argument(
55+
"--source-quote-convention",
56+
default="detect",
57+
type=str,
58+
help="The quote convention for the source project. If not specified, it will be detected automatically.",
59+
)
60+
parser.add_argument(
61+
"--target-quote-convention",
62+
default="detect",
63+
type=str,
64+
help="The quote convention for the target project. If not specified, it will be detected automatically.",
65+
)
66+
parser.add_argument(
67+
"--source-project",
68+
default="",
69+
help="The name of the Paratext project used as the source. When the source quote convention is set to 'detect' or not specified,"
70+
+ " this project will be used to detect the source quote convention.",
71+
)
6672
parser.add_argument(
6773
"--clearml-queue",
6874
default=None,
@@ -72,52 +78,30 @@ def main() -> None:
7278
)
7379
args = parser.parse_args()
7480

75-
experiment = args.experiment.replace("\\", "/") if args.experiment else None
81+
experiment = args.experiment.replace("\\", "/")
7682
args.output_folder = Path(args.output_folder.replace("\\", "/")) if args.output_folder else None
7783
postprocess_config = PostprocessConfig(vars(args))
7884

79-
if args.experiment and (args.source or args.draft or args.book):
80-
LOGGER.info("--experiment option used. --source, --draft, and --book will be ignored.")
81-
if not (args.experiment or (args.source and args.draft)):
82-
raise ValueError("Not enough options used. Please use --experiment OR --source and --draft.")
85+
if not get_mt_exp_dir(experiment).exists():
86+
raise ValueError(f"Experiment {experiment} not found.")
8387

84-
if experiment:
85-
if not get_mt_exp_dir(experiment).exists():
86-
raise ValueError(f"Experiment {experiment} not found.")
87-
88-
if args.clearml_queue is not None:
89-
if "cpu" not in args.clearml_queue:
90-
raise ValueError("Running this script on a GPU queue will not speed it up. Please only use CPU queues.")
91-
clearml = SILClearML(experiment, args.clearml_queue)
92-
config = clearml.config
93-
else:
94-
config = load_config(experiment)
95-
96-
if not (config.exp_dir / "translate_config.yml").exists():
97-
raise ValueError("Experiment translate_config.yml not found.")
98-
99-
if not postprocess_config.is_base_config():
100-
src_paths, draft_paths, _ = get_draft_paths_from_exp(config)
101-
else:
102-
LOGGER.info("No postprocessing options used. Applying postprocessing requests from translate config.")
103-
postprocess_experiment(config, args.output_folder)
104-
exit()
105-
elif args.clearml_queue is not None:
106-
raise ValueError("Must use --experiment option to use ClearML.")
88+
if args.clearml_queue is not None:
89+
if "cpu" not in args.clearml_queue:
90+
raise ValueError("Running this script on a GPU queue will not speed it up. Please only use CPU queues.")
91+
clearml = SILClearML(experiment, args.clearml_queue)
92+
config = clearml.config
10793
else:
108-
src_paths = [Path(args.source.replace("\\", "/"))]
109-
draft_paths = [Path(args.draft.replace("\\", "/"))]
110-
if not str(src_paths[0]).startswith(str(get_project_dir(""))) and args.book is None:
111-
raise ValueError(
112-
"--book argument must be passed if the source file is not in a Paratext project directory."
113-
)
94+
config = load_config(experiment)
11495

115-
if postprocess_config.is_base_config():
116-
raise ValueError("Please use at least one postprocessing option.")
117-
postprocess_handler = PostprocessHandler([postprocess_config], include_base=False)
96+
if not (config.exp_dir / "translate_config.yml").exists():
97+
raise ValueError("Experiment translate_config.yml not found.")
11898

119-
for src_path, draft_path in zip(src_paths, draft_paths):
120-
postprocess_draft(src_path, draft_path, postprocess_handler, args.book, args.output_folder)
99+
if postprocess_config.is_base_config():
100+
LOGGER.info("No postprocessing options used. Applying postprocessing requests from translate config.")
101+
postprocess_experiment(config, out_dir=args.output_folder)
102+
else:
103+
postprocess_handler = PostprocessHandler([postprocess_config], include_base=False)
104+
postprocess_experiment(config, postprocess_handler=postprocess_handler, out_dir=args.output_folder)
121105

122106

123107
if __name__ == "__main__":

0 commit comments

Comments
 (0)