Skip to content

Commit f93a887

Browse files
committed
Fixed issues with reading in and setting translations for groups.
Groups can appear in multiple places in group files (in which case translations get appended). Reversed translations and explicit target->source translations no longer conflict when they overlap.
1 parent d083b22 commit f93a887

17 files changed

+63
-40
lines changed
File renamed without changes.
File renamed without changes.

logo.odg graphics/logo.odg

File renamed without changes.
File renamed without changes.
File renamed without changes.

gyez.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,10 @@ def load(reverse=False, bidir=False):
4646
s, t = iwqet.Language.load_trans(src, targ, bidir=bidir)
4747
return s, t
4848

49-
def tra(oracion, reverse=False, html=False, user=None, choose=False, verbosity=0):
50-
return sent(oracion, user=user, max_sols=2, translate=True,
51-
connect=True, generate=True, html=html, choose=choose,
52-
verbosity=verbosity)
49+
def tra(sentence, reverse=False, html=False, user=None, choose=False, verbosity=0):
50+
return sent(sentence, user=user, max_sols=2, translate=True, reverse=reverse,
51+
connect=True, generate=True, html=html, choose=choose,
52+
verbosity=verbosity)
5353

5454
# test sentences, including named entities
5555
T1 = "በቀለም ደረሰ።"
@@ -74,8 +74,9 @@ def document(text, process=True):
7474
def sent(text, reverse=False, user=None, max_sols=3, translate=True,
7575
connect=True, generate=False, html=False, choose=False, verbosity=0):
7676
return iwqet.አረፍተነገር(text, user=user, max_sols=max_sols, translate=translate,
77-
connect=connect, generate=generate, html=html, choose=choose,
78-
verbosity=verbosity)
77+
reverse=reverse,
78+
connect=connect, generate=generate, html=html, choose=choose,
79+
verbosity=verbosity)
7980

8081
def sentence(sentence, ambig=False, solve=True, user=None, segment=True,
8182
max_sols=1, verbosity=0):

iwqet/__init__.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -173,18 +173,22 @@ def gui_trans(gui, session=None, choose=False, return_string=False,
173173
verbosity=verbosity, terse=terse)
174174

175175
def አረፍተነገር(text='', src=None, targ=None, user=None, session=None,
176-
sentence=None,
177-
max_sols=2, translate=True, connect=True, generate=True,
178-
html=False, choose=False,
176+
sentence=None, reverse=False,
177+
max_sols=3, translate=True, connect=True, generate=True,
178+
html=False, choose=False, finalize=False,
179179
return_string=False, verbosity=0, terse=False):
180180
"""
181181
Analyze and possibly also translate a sentence from Amharic to Chaha.
182182
"""
183183
if not src and not targ:
184+
if reverse:
185+
s, t = 'sgw','amh'
186+
else:
187+
s, t = 'amh', 'sgw'
184188
# src = iwqet.Language.languages.get('amh')
185189
# targ = iwqet.Language.languages.get('sgw')
186190
# if not src:
187-
src, targ = Language.load_trans('amh', 'sgw', bidir=False)
191+
src, targ = Language.load_trans(s, t, bidir=False)
188192
if not session:
189193
session = make_session(src, targ, user, create_memory=True)
190194
s = Sentence.solve_sentence(src, targ, text=text, session=session,
@@ -195,6 +199,7 @@ def አረፍተነገር(text='', src=None, targ=None, user=None, session=None,
195199
segmentations = s.get_all_segmentations(translate=translate,
196200
generate=generate,
197201
agree_dflt=False, choose=choose,
202+
finalize=finalize,
198203
connect=connect, html=html,
199204
terse=terse)
200205
print("SEGMENTATIONS: {}".format(segmentations))

iwqet/entry.py

+16-2
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,11 @@ def match_segments(self, segments, startindex, seglimit=8, verbosity=1):
570570
return Match(self, matches)
571571

572572
def reverse_trans(self, sgroup, sfeats):
573+
if not self.trans:
574+
self.trans = []
575+
if any([sgroup == sg for sg, ft in self.trans]):
576+
# print("*** {} already has trans {}".format(self, sgroup))
577+
return
573578
rev_feats = sgroup.reverse_feats(self, sfeats)
574579
self.trans.append((sgroup, rev_feats))
575580

@@ -1144,8 +1149,17 @@ def from_string(string, language, trans_strings=None, target=None,
11441149
trans_strings=tstrings, cat=cat, comment=comment,
11451150
intervening=intervening)
11461151
if target and not trans:
1147-
# Add translation to source group
1148-
g.trans = tgroups or []
1152+
if not g.trans:
1153+
g.trans = []
1154+
gt = g.trans
1155+
if tgroups:
1156+
if gt:
1157+
existing_tg = [t[0] for t in gt]
1158+
tgroups = [tg for tg in tgroups if tg[0] not in existing_tg]
1159+
gt.extend(tgroups)
1160+
# if target and not trans:
1161+
# # Add translation to source group
1162+
# g.trans = tgroups or []
11491163
if not existing_group:
11501164
# Add group to its language in the appropriate POS groups
11511165
language.add_group(g, posindex=posindex, cat=cat)

iwqet/language.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1492,7 +1492,7 @@ def load_morpho(self, generate=False, analyze=True, segment=False,
14921492
Load words and FSTs for morphological analysis and/or generation.
14931493
"""
14941494
if verbose:
1495-
print('Loading morphological data for {} {}'.format(self.name, "(gen)" if generate else "(anal)"))
1495+
print('Loading morphological data for {} (anal:{}, gen:{})'.format(self.name, analyze, generate))
14961496
# Load pre-analyzed words
14971497
self.set_analyzed()
14981498
if analyze:
@@ -2557,7 +2557,7 @@ def load_trans(source, target, bidir=False):
25572557
targlang = Language.languages.get(target)
25582558
src_loaded = False
25592559
targ_loaded = False
2560-
srcuse = SOURCE
2560+
srcuse = BIDIR if bidir else SOURCE
25612561
targuse = BIDIR if bidir else TARGET
25622562
if srclang:
25632563
# print("Srclang use: {}".format(srclang.use))

iwqet/languages/amh/fst/n.pkl

0 Bytes
Binary file not shown.

iwqet/languages/amh/fst/n0.pkl

0 Bytes
Binary file not shown.

iwqet/languages/amh/fst/nG.pkl

29.8 MB
Binary file not shown.

iwqet/languages/amh/fst/v.pkl

0 Bytes
Binary file not shown.

iwqet/languages/amh/fst/v0.pkl

-3 Bytes
Binary file not shown.

iwqet/languages/amh/fst/vG.pkl

12.2 MB
Binary file not shown.

iwqet/sentence.py

+29-26
Original file line numberDiff line numberDiff line change
@@ -2117,28 +2117,29 @@ def create_segmentation(self, dstore=None, verbosity=0, terse=False):
21172117
gnode = self.gnodes[gn]
21182118
gn_group = gnode.ginst.index
21192119
if gn_group not in tree_attribs:
2120-
tree_attribs[gn_group] = [[], []]
2121-
tree_attribs[gn_group][0].append(snindex)
2122-
if len(sg) == 2:
2123-
# Record group merger when an snode is associated with two gnodes
2124-
gn0, gn1 = self.gnodes[sg[0]], self.gnodes[sg[1]]
2125-
group0, group1 = gn0.ginst.index, gn1.ginst.index
2126-
if gn0.cat:
2127-
# Group for gnode0 is merged with group for gnode1
2128-
tree_attribs[group0][1].append(group1)
2129-
else:
2130-
tree_attribs[group1][1].append(group0)
2131-
for gindex, sn in tree_attribs.items():
2132-
# First store the group's own tree as a set of sn indices and
2133-
# the third element of sn
2134-
sn.append(set(sn[0]))
2135-
# Next check for mergers
2136-
Sentence.update_tree(tree_attribs, gindex, sn[2])
2137-
# Convert the dict to a list and sort by group indices
2138-
trees = list(tree_attribs.items())
2139-
trees.sort(key=lambda x: x[0])
2140-
# Just keep the snode indices in each tree
2141-
trees = [x[1][2] for x in trees]
2120+
tree_attribs[gn_group] = [] # [[]] # [[], []]
2121+
tree_attribs[gn_group].append(snindex)
2122+
# if len(sg) == 2:
2123+
# # Record group merger when an snode is associated with two gnodes
2124+
# gn0, gn1 = self.gnodes[sg[0]], self.gnodes[sg[1]]
2125+
# group0, group1 = gn0.ginst.index, gn1.ginst.index
2126+
# if gn0.cat:
2127+
# # Group for gnode0 is merged with group for gnode1
2128+
# tree_attribs[group0][1].append(group1)
2129+
# else:
2130+
# tree_attribs[group1][1].append(group0)
2131+
# for gindex, sn in tree_attribs.items():
2132+
# # First store the group's own tree as a set of sn indices and
2133+
# # the third element of sn
2134+
# sn.append(set(sn[0]))
2135+
# # Next check for mergers
2136+
# Sentence.update_tree(tree_attribs, gindex, sn[2])
2137+
# # Convert the dict to a list and sort by group indices
2138+
# trees = list(tree_attribs.items())
2139+
# trees.sort(key=lambda x: x[0])
2140+
# # Just keep the snode indices in each tree
2141+
# trees = [x[1][2] for x in trees]
2142+
trees = [set(x) for x in tree_attribs.values()]
21422143
# Get the indices of the GNodes for each SNode
21432144
segmentation = Segmentation(self, ginsts, s2gnodes, len(self.segmentations),
21442145
trees=trees, dstore=dstore, session=self.session,
@@ -2148,7 +2149,8 @@ def create_segmentation(self, dstore=None, verbosity=0, terse=False):
21482149
return segmentation
21492150

21502151
def get_all_segmentations(self, translate=True, generate=True,
2151-
connect=False, html=False, agree_dflt=True, choose=False,
2152+
connect=False, html=False, agree_dflt=True,
2153+
choose=False, finalize=False,
21522154
verbosity=0, terse=False):
21532155
"""After a sentence has been translated and segmented, collect all the
21542156
segmentations, including those resulting from altsyn sentences."""
@@ -2178,7 +2180,8 @@ def get_all_segmentations(self, translate=True, generate=True,
21782180
# Realize target morphology
21792181
segmentation.generate()
21802182
# Generate the final translation strings and HTML for the GUI
2181-
segmentation.finalize_segments(html=html, agree_dflt=agree_dflt, choose=choose)
2183+
segmentation.finalize_segments(html=html, agree_dflt=agree_dflt,
2184+
choose=choose, finalize=finalize)
21822185
if generate and choose:
21832186
# Set the final output sentence string.
21842187
final = ' '.join([seg.final for seg in segmentation.segments])
@@ -2695,7 +2698,7 @@ def get_segs(self, terse=False):
26952698
#######
26962699

26972700
def finalize_segments(self, html=True, user_input=None, agree_dflt=True,
2698-
choose=False, verbosity=0):
2701+
choose=False, finalize=False, verbosity=0):
26992702
"""Set the final strings and morphology for each segment in this
27002703
segmentation and the HTML too if html is True."""
27012704
for i, segment in enumerate(self.segments):
@@ -2711,7 +2714,7 @@ def finalize_segments(self, html=True, user_input=None, agree_dflt=True,
27112714
first = False
27122715
self.do_seg_feat_agreement(user_input=user_input, agree_dflt=agree_dflt,
27132716
verbosity=verbosity)
2714-
if choose:
2717+
if choose or finalize:
27152718
self.choose_final(html=html, verbosity=verbosity)
27162719

27172720
def choose_final(self, html=True, verbosity=0):

notes.txt notes/notes.txt

File renamed without changes.

0 commit comments

Comments
 (0)