@@ -31,9 +31,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
31
31
32
32
using namespace std ;
33
33
34
- namespace MosesTuning {
34
+ namespace MosesTuning
35
+ {
35
36
36
- std::ostream& operator <<(std::ostream& out, const WordVec& wordVec) {
37
+ std::ostream& operator <<(std::ostream& out, const WordVec& wordVec)
38
+ {
37
39
out << " [" ;
38
40
for (size_t i = 0 ; i < wordVec.size (); ++i) {
39
41
out << wordVec[i]->first ;
@@ -44,7 +46,8 @@ std::ostream& operator<<(std::ostream& out, const WordVec& wordVec) {
44
46
}
45
47
46
48
47
- void ReferenceSet::Load (const vector<string>& files, Vocab& vocab) {
49
+ void ReferenceSet::Load (const vector<string>& files, Vocab& vocab)
50
+ {
48
51
for (size_t i = 0 ; i < files.size (); ++i) {
49
52
util::FilePiece fh (files[i].c_str ());
50
53
size_t sentenceId = 0 ;
@@ -55,14 +58,15 @@ void ReferenceSet::Load(const vector<string>& files, Vocab& vocab) {
55
58
} catch (util::EndOfFileException &e) {
56
59
break ;
57
60
}
58
- AddLine (sentenceId, line, vocab);
59
- ++sentenceId;
61
+ AddLine (sentenceId, line, vocab);
62
+ ++sentenceId;
60
63
}
61
64
}
62
65
63
66
}
64
67
65
- void ReferenceSet::AddLine (size_t sentenceId, const StringPiece& line, Vocab& vocab) {
68
+ void ReferenceSet::AddLine (size_t sentenceId, const StringPiece& line, Vocab& vocab)
69
+ {
66
70
// cerr << line << endl;
67
71
NgramCounter ngramCounts;
68
72
list<WordVec> openNgrams;
@@ -74,14 +78,14 @@ void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vo
74
78
openNgrams.push_front (WordVec ());
75
79
for (list<WordVec>::iterator k = openNgrams.begin (); k != openNgrams.end (); ++k) {
76
80
k->push_back (nextTok);
77
- ++ngramCounts[*k];
81
+ ++ngramCounts[*k];
78
82
}
79
83
if (openNgrams.size () >= kBleuNgramOrder ) openNgrams.pop_back ();
80
84
}
81
85
82
86
// merge into overall ngram map
83
87
for (NgramCounter::const_iterator ni = ngramCounts.begin ();
84
- ni != ngramCounts.end (); ++ni) {
88
+ ni != ngramCounts.end (); ++ni) {
85
89
size_t count = ni->second ;
86
90
// cerr << *ni << " " << count << endl;
87
91
if (ngramCounts_.size () <= sentenceId) ngramCounts_.resize (sentenceId+1 );
@@ -104,8 +108,9 @@ void ReferenceSet::AddLine(size_t sentenceId, const StringPiece& line, Vocab& vo
104
108
// cerr << endl;
105
109
106
110
}
107
-
108
- size_t ReferenceSet::NgramMatches (size_t sentenceId, const WordVec& ngram, bool clip) const {
111
+
112
+ size_t ReferenceSet::NgramMatches (size_t sentenceId, const WordVec& ngram, bool clip) const
113
+ {
109
114
const NgramMap& ngramCounts = ngramCounts_.at (sentenceId);
110
115
NgramMap::const_iterator ngi = ngramCounts.find (ngram);
111
116
if (ngi == ngramCounts.end ()) return 0 ;
@@ -114,7 +119,8 @@ size_t ReferenceSet::NgramMatches(size_t sentenceId, const WordVec& ngram, bool
114
119
115
120
VertexState::VertexState (): bleuStats(kBleuNgramOrder ), targetLength(0 ) {}
116
121
117
- void HgBleuScorer::UpdateMatches (const NgramCounter& counts, vector<FeatureStatsType>& bleuStats ) const {
122
+ void HgBleuScorer::UpdateMatches (const NgramCounter& counts, vector<FeatureStatsType>& bleuStats ) const
123
+ {
118
124
for (NgramCounter::const_iterator ngi = counts.begin (); ngi != counts.end (); ++ngi) {
119
125
// cerr << "Checking: " << *ngi << " matches " << references_.NgramMatches(sentenceId_,*ngi,false) << endl;
120
126
size_t order = ngi->first .size ();
@@ -124,7 +130,8 @@ void HgBleuScorer::UpdateMatches(const NgramCounter& counts, vector<FeatureStats
124
130
}
125
131
}
126
132
127
- size_t HgBleuScorer::GetTargetLength (const Edge& edge) const {
133
+ size_t HgBleuScorer::GetTargetLength (const Edge& edge) const
134
+ {
128
135
size_t targetLength = 0 ;
129
136
for (size_t i = 0 ; i < edge.Words ().size (); ++i) {
130
137
const Vocab::Entry* word = edge.Words ()[i];
@@ -137,7 +144,8 @@ size_t HgBleuScorer::GetTargetLength(const Edge& edge) const {
137
144
return targetLength;
138
145
}
139
146
140
- FeatureStatsType HgBleuScorer::Score (const Edge& edge, const Vertex& head, vector<FeatureStatsType>& bleuStats) {
147
+ FeatureStatsType HgBleuScorer::Score (const Edge& edge, const Vertex& head, vector<FeatureStatsType>& bleuStats)
148
+ {
141
149
NgramCounter ngramCounts;
142
150
size_t childId = 0 ;
143
151
size_t wordId = 0 ;
@@ -147,7 +155,7 @@ FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vecto
147
155
bool inRightContext = false ;
148
156
list<WordVec> openNgrams;
149
157
const Vocab::Entry* currentWord = NULL ;
150
- while (wordId < edge.Words ().size ()) {
158
+ while (wordId < edge.Words ().size ()) {
151
159
currentWord = edge.Words ()[wordId];
152
160
if (currentWord != NULL ) {
153
161
++wordId;
@@ -214,7 +222,7 @@ FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vecto
214
222
}
215
223
if (openNgrams.size () >= kBleuNgramOrder ) openNgrams.pop_back ();
216
224
}
217
-
225
+
218
226
// Collect matches
219
227
// This edge
220
228
// cerr << "edge ngrams" << endl;
@@ -227,26 +235,27 @@ FeatureStatsType HgBleuScorer::Score(const Edge& edge, const Vertex& head, vecto
227
235
bleuStats[j] += vertexStates_[edge.Children ()[i]].bleuStats [j];
228
236
}
229
237
}
230
-
238
+
231
239
232
240
FeatureStatsType sourceLength = head.SourceCovered ();
233
241
size_t referenceLength = references_.Length (sentenceId_);
234
- FeatureStatsType effectiveReferenceLength =
242
+ FeatureStatsType effectiveReferenceLength =
235
243
sourceLength / totalSourceLength_ * referenceLength;
236
244
237
245
bleuStats[bleuStats.size ()-1 ] = effectiveReferenceLength;
238
- // backgroundBleu_[backgroundBleu_.size()-1] =
246
+ // backgroundBleu_[backgroundBleu_.size()-1] =
239
247
// backgroundRefLength_ * sourceLength / totalSourceLength_;
240
248
FeatureStatsType bleu = sentenceLevelBackgroundBleu (bleuStats, backgroundBleu_);
241
249
242
250
return bleu;
243
251
}
244
252
245
- void HgBleuScorer::UpdateState (const Edge& winnerEdge, size_t vertexId, const vector<FeatureStatsType>& bleuStats) {
253
+ void HgBleuScorer::UpdateState (const Edge& winnerEdge, size_t vertexId, const vector<FeatureStatsType>& bleuStats)
254
+ {
246
255
// TODO: Maybe more efficient to absorb into the Score() method
247
256
VertexState& vertexState = vertexStates_[vertexId];
248
257
// cerr << "Updating state for " << vertexId << endl;
249
-
258
+
250
259
// leftContext
251
260
int wi = 0 ;
252
261
const VertexState* childState = NULL ;
@@ -263,9 +272,9 @@ void HgBleuScorer::UpdateState(const Edge& winnerEdge, size_t vertexId, const ve
263
272
// start of child state
264
273
childState = &(vertexStates_[winnerEdge.Children ()[childi++]]);
265
274
contexti = 0 ;
266
- }
275
+ }
267
276
if ((size_t )contexti < childState->leftContext .size ()) {
268
- vertexState.leftContext .push_back (childState->leftContext [contexti++]);
277
+ vertexState.leftContext .push_back (childState->leftContext [contexti++]);
269
278
} else {
270
279
// end of child context
271
280
childState = NULL ;
@@ -314,7 +323,8 @@ typedef pair<const Edge*,FeatureStatsType> BackPointer;
314
323
* Recurse through back pointers
315
324
**/
316
325
static void GetBestHypothesis (size_t vertexId, const Graph& graph, const vector<BackPointer>& bps,
317
- HgHypothesis* bestHypo) {
326
+ HgHypothesis* bestHypo)
327
+ {
318
328
// cerr << "Expanding " << vertexId << " Score: " << bps[vertexId].second << endl;
319
329
// UTIL_THROW_IF(bps[vertexId].second == kMinScore+1, HypergraphException, "Landed at vertex " << vertexId << " which is a dead end");
320
330
if (!bps[vertexId].first ) return ;
@@ -334,7 +344,7 @@ static void GetBestHypothesis(size_t vertexId, const Graph& graph, const vector<
334
344
}
335
345
}
336
346
337
- void Viterbi (const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references , size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu, HgHypothesis* bestHypo)
347
+ void Viterbi (const Graph& graph, const SparseVector& weights, float bleuWeight, const ReferenceSet& references , size_t sentenceId, const std::vector<FeatureStatsType>& backgroundBleu, HgHypothesis* bestHypo)
338
348
{
339
349
BackPointer init (NULL ,kMinScore );
340
350
vector<BackPointer> backPointers (graph.VertexSize (),init);
@@ -349,7 +359,7 @@ void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight,
349
359
// UTIL_THROW(HypergraphException, "Vertex " << vi << " has no incoming edges");
350
360
// If no incoming edges, vertex is a dead end
351
361
backPointers[vi].first = NULL ;
352
- backPointers[vi].second = kMinScore ;
362
+ backPointers[vi].second = kMinScore ;
353
363
} else {
354
364
// cerr << "\nVertex: " << vi << endl;
355
365
for (size_t ei = 0 ; ei < incoming.size (); ++ei) {
@@ -362,10 +372,10 @@ void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight,
362
372
incomingScore = max (incomingScore + backPointers[childId].second , kMinScore );
363
373
}
364
374
vector<FeatureStatsType> bleuStats (kBleuNgramOrder *2 +1 );
365
- // cerr << "Score: " << incomingScore << " Bleu: ";
366
- // if (incomingScore > nonbleuscore) {nonbleuscore = incomingScore; nonbleuid = ei;}
375
+ // cerr << "Score: " << incomingScore << " Bleu: ";
376
+ // if (incomingScore > nonbleuscore) {nonbleuscore = incomingScore; nonbleuid = ei;}
367
377
FeatureStatsType totalScore = incomingScore;
368
- if (bleuWeight) {
378
+ if (bleuWeight) {
369
379
FeatureStatsType bleuScore = bleuScorer.Score (*(incoming[ei]), vertex, bleuStats);
370
380
if (isnan (bleuScore)) {
371
381
cerr << " WARN: bleu score undefined" << endl;
@@ -379,7 +389,7 @@ void Viterbi(const Graph& graph, const SparseVector& weights, float bleuWeight,
379
389
}
380
390
// UTIL_THROW_IF(isnan(bleuScore), util::Exception, "Bleu score undefined, smoothing problem?");
381
391
totalScore += bleuWeight * bleuScore;
382
- // cerr << bleuScore << " Total: " << incomingScore << endl << endl;
392
+ // cerr << bleuScore << " Total: " << incomingScore << endl << endl;
383
393
// cerr << "is " << incomingScore << " bs " << bleuScore << endl;
384
394
}
385
395
if (totalScore >= winnerScore) {
0 commit comments