Skip to content

Commit 34d1d3a

Browse files
author
Matthias Huck
committed
sentence-bleu-nbest
1 parent e98a2fc commit 34d1d3a

File tree

9 files changed

+130
-93
lines changed

9 files changed

+130
-93
lines changed

mert/BleuScorer.cpp

Lines changed: 51 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,14 @@ BleuScorer::BleuScorer(const string& config)
4545
} else if (reflen == REFLEN_CLOSEST) {
4646
m_ref_length_type = CLOSEST;
4747
} else {
48-
throw runtime_error("Unknown reference length strategy: " + reflen);
48+
UTIL_THROW2("Unknown reference length strategy: " + reflen);
4949
}
5050
}
5151

5252
BleuScorer::~BleuScorer() {}
5353

5454
size_t BleuScorer::CountNgrams(const string& line, NgramCounts& counts,
55-
unsigned int n, bool is_testing)
55+
unsigned int n, bool is_testing) const
5656
{
5757
assert(n > 0);
5858
vector<int> encoded_tokens;
@@ -94,41 +94,46 @@ void BleuScorer::setReferenceFiles(const vector<string>& referenceFiles)
9494
mert::VocabularyFactory::GetVocabulary()->clear();
9595

9696
//load reference data
97-
for (size_t i = 0; i < referenceFiles.size(); ++i) {
97+
for (size_t i = 0; i < referenceFiles.size(); ++i)
98+
{
9899
TRACE_ERR("Loading reference from " << referenceFiles[i] << endl);
99100

100-
if (!OpenReference(referenceFiles[i].c_str(), i)) {
101-
throw runtime_error("Unable to open " + referenceFiles[i]);
101+
ifstream ifs(referenceFiles[i].c_str());
102+
UTIL_THROW_IF2(!ifs, "Cannot open " << referenceFiles[i]);
103+
if (!OpenReferenceStream(&ifs, i)) {
104+
UTIL_THROW2("Unable to open " + referenceFiles[i]);
102105
}
103106
}
104107
}
105108

106-
bool BleuScorer::OpenReference(const char* filename, size_t file_id)
107-
{
108-
ifstream ifs(filename);
109-
if (!ifs) {
110-
cerr << "Cannot open " << filename << endl;
111-
return false;
112-
}
113-
return OpenReferenceStream(&ifs, file_id);
114-
}
115-
116109
bool BleuScorer::OpenReferenceStream(istream* is, size_t file_id)
117110
{
118111
if (is == NULL) return false;
119112

120113
string line;
121114
size_t sid = 0;
122115
while (getline(*is, line)) {
116+
// TODO: rather than loading the whole reference corpus into memory, can we stream it line by line?
117+
// (loading the whole reference corpus can take gigabytes of RAM if done with millions of sentences)
123118
line = preprocessSentence(line);
124119
if (file_id == 0) {
125120
Reference* ref = new Reference;
126121
m_references.push_back(ref); // Take ownership of the Reference object.
127122
}
128-
if (m_references.size() <= sid) {
129-
cerr << "Reference " << file_id << "has too many sentences." << endl;
130-
return false;
123+
UTIL_THROW_IF2(m_references.size() <= sid, "Reference " << file_id << "has too many sentences.");
124+
125+
ProcessReferenceLine(line, m_references[sid]);
126+
127+
if (sid > 0 && sid % 100 == 0) {
128+
TRACE_ERR(".");
131129
}
130+
++sid;
131+
}
132+
return true;
133+
}
134+
135+
void BleuScorer::ProcessReferenceLine(const std::string& line, Reference* ref) const
136+
{
132137
NgramCounts counts;
133138
size_t length = CountNgrams(line, counts, kBleuNgramOrder);
134139

@@ -138,35 +143,30 @@ bool BleuScorer::OpenReferenceStream(istream* is, size_t file_id)
138143
const NgramCounts::Value newcount = ci->second;
139144

140145
NgramCounts::Value oldcount = 0;
141-
m_references[sid]->get_counts()->Lookup(ngram, &oldcount);
146+
ref->get_counts()->Lookup(ngram, &oldcount);
142147
if (newcount > oldcount) {
143-
m_references[sid]->get_counts()->operator[](ngram) = newcount;
148+
ref->get_counts()->operator[](ngram) = newcount;
144149
}
145150
}
146151
//add in the length
147-
m_references[sid]->push_back(length);
148-
if (sid > 0 && sid % 100 == 0) {
149-
TRACE_ERR(".");
150-
}
151-
++sid;
152-
}
153-
return true;
152+
ref->push_back(length);
154153
}
155154

156155
void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
157156
{
158-
if (sid >= m_references.size()) {
159-
stringstream msg;
160-
msg << "Sentence id (" << sid << ") not found in reference set";
161-
throw runtime_error(msg.str());
162-
}
157+
UTIL_THROW_IF2(sid >= m_references.size(), "Sentence id (" << sid << ") not found in reference set");
158+
CalcBleuStats(m_references[sid], text, entry);
159+
}
160+
161+
void BleuScorer::CalcBleuStats(const Reference* ref, const std::string& text, ScoreStats& entry) const
162+
{
163163
NgramCounts testcounts;
164164
// stats for this line
165165
vector<ScoreStatsType> stats(kBleuNgramOrder * 2);
166166
string sentence = preprocessSentence(text);
167167
const size_t length = CountNgrams(sentence, testcounts, kBleuNgramOrder, true);
168168

169-
const int reference_len = CalcReferenceLength(sid, length);
169+
const int reference_len = CalcReferenceLength(ref, length);
170170
stats.push_back(reference_len);
171171

172172
//precision on each ngram type
@@ -177,7 +177,7 @@ void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
177177
NgramCounts::Value correct = 0;
178178

179179
NgramCounts::Value v = 0;
180-
if (m_references[sid]->get_counts()->Lookup(testcounts_it->first, &v)) {
180+
if (ref->get_counts()->Lookup(testcounts_it->first, &v)) {
181181
correct = min(v, guess);
182182
}
183183
stats[len * 2 - 2] += correct;
@@ -207,21 +207,20 @@ statscore_t BleuScorer::calculateScore(const vector<ScoreStatsType>& comps) cons
207207
return exp(logbleu);
208208
}
209209

210-
int BleuScorer::CalcReferenceLength(size_t sentence_id, size_t length)
210+
int BleuScorer::CalcReferenceLength(const Reference* ref, std::size_t length) const
211211
{
212212
switch (m_ref_length_type) {
213213
case AVERAGE:
214-
return m_references[sentence_id]->CalcAverage();
214+
return ref->CalcAverage();
215215
break;
216216
case CLOSEST:
217-
return m_references[sentence_id]->CalcClosest(length);
217+
return ref->CalcClosest(length);
218218
break;
219219
case SHORTEST:
220-
return m_references[sentence_id]->CalcShortest();
220+
return ref->CalcShortest();
221221
break;
222222
default:
223-
cerr << "unknown reference types." << endl;
224-
exit(1);
223+
UTIL_THROW2("Unknown reference types");
225224
}
226225
}
227226

@@ -298,29 +297,23 @@ vector<float> BleuScorer::ScoreNbestList(const string& scoreFile, const string&
298297

299298
vector<FeatureDataIterator> featureDataIters;
300299
vector<ScoreDataIterator> scoreDataIters;
301-
for (size_t i = 0; i < featureFiles.size(); ++i) {
300+
for (size_t i = 0; i < featureFiles.size(); ++i)
301+
{
302302
featureDataIters.push_back(FeatureDataIterator(featureFiles[i]));
303303
scoreDataIters.push_back(ScoreDataIterator(scoreFiles[i]));
304304
}
305305

306306
vector<pair<size_t,size_t> > hypotheses;
307-
if (featureDataIters[0] == FeatureDataIterator::end()) {
308-
cerr << "Error: at the end of feature data iterator" << endl;
309-
exit(1);
310-
}
311-
for (size_t i = 0; i < featureFiles.size(); ++i) {
312-
if (featureDataIters[i] == FeatureDataIterator::end()) {
313-
cerr << "Error: Feature file " << i << " ended prematurely" << endl;
314-
exit(1);
315-
}
316-
if (scoreDataIters[i] == ScoreDataIterator::end()) {
317-
cerr << "Error: Score file " << i << " ended prematurely" << endl;
318-
exit(1);
319-
}
320-
if (featureDataIters[i]->size() != scoreDataIters[i]->size()) {
321-
cerr << "Error: features and scores have different size" << endl;
322-
exit(1);
323-
}
307+
UTIL_THROW_IF2(featureDataIters[0] == FeatureDataIterator::end(),
308+
"At the end of feature data iterator");
309+
for (size_t i = 0; i < featureFiles.size(); ++i)
310+
{
311+
UTIL_THROW_IF2(featureDataIters[i] == FeatureDataIterator::end(),
312+
"Feature file " << i << " ended prematurely");
313+
UTIL_THROW_IF2(scoreDataIters[i] == ScoreDataIterator::end(),
314+
"Score file " << i << " ended prematurely");
315+
UTIL_THROW_IF2(featureDataIters[i]->size() != scoreDataIters[i]->size(),
316+
"Features and scores have different size");
324317
for (size_t j = 0; j < featureDataIters[i]->size(); ++j) {
325318
hypotheses.push_back(pair<size_t,size_t>(i,j));
326319
}

mert/BleuScorer.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,14 @@ class BleuScorer: public StatisticsBasedScorer
4242
return 2 * kBleuNgramOrder + 1;
4343
}
4444

45-
int CalcReferenceLength(std::size_t sentence_id, std::size_t length);
45+
void CalcBleuStats(const Reference* ref, const std::string& text, ScoreStats& entry) const;
46+
47+
int CalcReferenceLength(const Reference* ref, std::size_t length) const;
4648

4749
ReferenceLengthType GetReferenceLengthType() const {
4850
return m_ref_length_type;
4951
}
52+
5053
void SetReferenceLengthType(ReferenceLengthType type) {
5154
m_ref_length_type = type;
5255
}
@@ -62,14 +65,14 @@ class BleuScorer: public StatisticsBasedScorer
6265
/**
6366
* Count the ngrams of each type, up to the given length in the input line.
6467
*/
65-
std::size_t CountNgrams(const std::string& line, NgramCounts& counts, unsigned int n, bool is_testing=false);
68+
std::size_t CountNgrams(const std::string& line, NgramCounts& counts, unsigned int n, bool is_testing=false) const;
6669

6770
void DumpCounts(std::ostream* os, const NgramCounts& counts) const;
6871

69-
bool OpenReference(const char* filename, std::size_t file_id);
70-
7172
// NOTE: this function is used for unit testing.
72-
virtual bool OpenReferenceStream(std::istream* is, std::size_t file_id);
73+
bool OpenReferenceStream(std::istream* is, std::size_t file_id);
74+
75+
void ProcessReferenceLine(const std::string& line, Reference* ref) const;
7376

7477
//private:
7578
protected:

mert/Jamfile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,13 @@ exe evaluator : evaluator.cpp mert_lib ;
6666

6767
exe sentence-bleu : sentence-bleu.cpp mert_lib ;
6868

69+
exe sentence-bleu-nbest : sentence-bleu-nbest.cpp mert_lib ;
70+
6971
exe pro : pro.cpp mert_lib ..//boost_program_options ;
7072

7173
exe kbmira : kbmira.cpp mert_lib ..//boost_program_options ..//boost_filesystem ;
7274

73-
alias programs : mert extractor evaluator pro kbmira sentence-bleu ;
75+
alias programs : mert extractor evaluator pro kbmira sentence-bleu sentence-bleu-nbest ;
7476

7577
unit-test bleu_scorer_test : BleuScorerTest.cpp mert_lib ..//boost_unit_test_framework ;
7678
unit-test feature_data_test : FeatureDataTest.cpp mert_lib ..//boost_unit_test_framework ;

mert/Scorer.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ void Scorer::InitConfig(const string& config)
6464
}
6565
}
6666

67-
void Scorer::TokenizeAndEncode(const string& line, vector<int>& encoded)
67+
void Scorer::TokenizeAndEncode(const string& line, vector<int>& encoded) const
6868
{
6969
for (util::TokenIter<util::AnyCharacter, true> it(line, util::AnyCharacter(" "));
7070
it; ++it) {
@@ -81,7 +81,7 @@ void Scorer::TokenizeAndEncode(const string& line, vector<int>& encoded)
8181
}
8282
}
8383

84-
void Scorer::TokenizeAndEncodeTesting(const string& line, vector<int>& encoded)
84+
void Scorer::TokenizeAndEncodeTesting(const string& line, vector<int>& encoded) const
8585
{
8686
for (util::TokenIter<util::AnyCharacter, true> it(line, util::AnyCharacter(" "));
8787
it; ++it) {

mert/Scorer.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,12 +187,12 @@ class Scorer
187187
* Tokenise line and encode.
188188
* Note: We assume that all tokens are separated by whitespaces.
189189
*/
190-
void TokenizeAndEncode(const std::string& line, std::vector<int>& encoded);
190+
void TokenizeAndEncode(const std::string& line, std::vector<int>& encoded) const;
191191

192192
/*
193193
* Tokenize functions for testing only.
194194
*/
195-
void TokenizeAndEncodeTesting(const std::string& line, std::vector<int>& encoded);
195+
void TokenizeAndEncodeTesting(const std::string& line, std::vector<int>& encoded) const;
196196

197197
/**
198198
* Every inherited scorer should call this function for each sentence

mert/sentence-bleu-nbest.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#include <iostream>
2+
#include <vector>
3+
#include <string>
4+
5+
#include "BleuScorer.h"
6+
#include "moses/Util.h"
7+
8+
using namespace MosesTuning;
9+
10+
int main(int argc, char **argv)
11+
{
12+
if (argc == 1) {
13+
std::cerr << "Usage: ./sentence-bleu-nbest ref1 [ref2 ...] < plain-nbest > bleu-scores" << std::endl;
14+
return 1;
15+
}
16+
17+
std::vector<std::string> refFiles(argv + 1, argv + argc);
18+
19+
// TODO all of these are empty for now
20+
std::string config;
21+
std::string factors;
22+
std::string filter;
23+
24+
BleuScorer scorer(config);
25+
scorer.setFactors(factors);
26+
scorer.setFilter(filter);
27+
scorer.setReferenceFiles(refFiles); // TODO: we don't need to load the whole reference corpus into memory (this can take gigabytes of RAM if done with millions of sentences)
28+
29+
// Loading sentences and preparing statistics
30+
std::string nbestLine;
31+
while ( getline(std::cin, nbestLine) )
32+
{
33+
std::vector<std::string> items;
34+
Moses::TokenizeMultiCharSeparator(items, nbestLine, " ||| ");
35+
size_t sid = Moses::Scan<size_t>(items[0]);
36+
37+
ScoreStats scoreStats;
38+
scorer.prepareStats(sid, items[1], scoreStats);
39+
std::vector<float> stats(scoreStats.getArray(), scoreStats.getArray() + scoreStats.size());
40+
std::cout << smoothedSentenceBleu(stats) << std::endl;
41+
}
42+
43+
return 0;
44+
}

mert/sentence-bleu.cpp

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,22 +23,19 @@ int main(int argc, char **argv)
2323
BleuScorer scorer(config);
2424
scorer.setFactors(factors);
2525
scorer.setFilter(filter);
26-
scorer.setReferenceFiles(refFiles);
27-
28-
vector<ScoreStats> entries;
26+
scorer.setReferenceFiles(refFiles); // TODO: we don't need to load the whole reference corpus into memory (this can take gigabytes of RAM if done with millions of sentences)
2927

3028
// Loading sentences and preparing statistics
31-
ScoreStats scoreentry;
32-
string line;
33-
while (getline(cin, line)) {
34-
scorer.prepareStats(entries.size(), line, scoreentry);
35-
entries.push_back(scoreentry);
29+
string hypothesisLine;
30+
size_t sid = 0;
31+
while (getline(std::cin, hypothesisLine))
32+
{
33+
ScoreStats scoreStats;
34+
scorer.prepareStats(sid, hypothesisLine, scoreStats);
35+
vector<float> stats(scoreStats.getArray(), scoreStats.getArray() + scoreStats.size());
36+
std::cout << smoothedSentenceBleu(stats) << std::endl;
37+
++sid;
3638
}
3739

38-
vector<ScoreStats>::const_iterator sentIt;
39-
for (sentIt = entries.begin(); sentIt != entries.end(); sentIt++) {
40-
vector<float> stats(sentIt->getArray(), sentIt->getArray() + sentIt->size());
41-
cout << smoothedSentenceBleu(stats) << "\n";
42-
}
4340
return 0;
4441
}

moses/Util.cpp

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -90,13 +90,6 @@ bool FileExists(const std::string& filePath)
9090
return !ifs.fail();
9191
}
9292

93-
const std::string Trim(const std::string& str, const std::string dropChars)
94-
{
95-
std::string res = str;
96-
res.erase(str.find_last_not_of(dropChars)+1);
97-
return res.erase(0, res.find_first_not_of(dropChars));
98-
}
99-
10093
void ResetUserTime()
10194
{
10295
g_timer.start();

0 commit comments

Comments
 (0)