Skip to content

Commit 38d790c

Browse files
author
Jeroen Vermeulen
committed
Add cross-platform randomizer module.
The code uses two mechanisms for generating random numbers: srand()/rand(), which is not thread-safe, and srandom()/random(), which is POSIX-specific. Here I add a util/random.cc module that centralizes these calls, and unifies some common usage patterns. If the implementation is not good enough, we can now change it in a single place. To keep things simple, this uses the portable srand()/rand() but protects them with a lock to avoid concurrency problems. The hard part was to keep the regression tests passing: they rely on fixed sequences of random numbers, so a small code change could break them very thoroughly. Util::rand(), for wide types like size_t, calls std::rand() not once but twice. This behaviour was generalized into utils::wide_rand() and friends.
1 parent 4b47e11 commit 38d790c

24 files changed

+452
-117
lines changed

contrib/mira/Main.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2525
#include "moses/Word.h"
2626
#include "moses/FF/FeatureFunction.h"
2727
#include "Decoder.h"
28+
#include "util/random.hh"
2829

2930
typedef std::map<const Moses::FeatureFunction*, std::vector< float > > ProducerWeightMap;
3031
typedef std::pair<const Moses::FeatureFunction*, std::vector< float > > ProducerWeightPair;
@@ -37,8 +38,11 @@ template <class T> bool from_string(T& t, const std::string& s, std::ios_base& (
3738

3839
struct RandomIndex {
3940
ptrdiff_t operator()(ptrdiff_t max) {
40-
srand(time(0)); // Initialize random number generator with current time.
41-
return static_cast<ptrdiff_t> (rand() % max);
41+
// TODO: Don't seed the randomizer here. If this function gets called
42+
// multiple times in the same second, it will return the same value on
43+
// each of those calls.
44+
util::rand_init();
45+
return util::rand_excl(max);
4246
}
4347
};
4448

contrib/relent-filter/src/Main.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
4242
#include "RelativeEntropyCalc.h"
4343
#include "LexicalReordering.h"
4444
#include "LexicalReorderingState.h"
45+
#include "util/random.hh"
4546

4647
#ifdef HAVE_PROTOBUF
4748
#include "hypergraph.pb.h"
@@ -205,7 +206,7 @@ int main(int argc, char** argv)
205206

206207

207208
//initialise random numbers
208-
srand(time(NULL));
209+
rand_init();
209210

210211
// set up read/writing class
211212
IOWrapper* ioWrapper = GetIOWrapper(staticData);

mert/Data.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ void Data::createShards(size_t shard_count, float shard_size, const string& scor
287287
} else {
288288
//create shards by randomly sampling
289289
for (size_t i = 0; i < floor(shard_size+0.5); ++i) {
290-
shard_contents.push_back(util::rand_int() % data_size);
290+
shard_contents.push_back(util::rand_excl(data_size));
291291
}
292292
}
293293

mert/Point.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,8 @@ void Point::Randomize()
5858
UTIL_THROW_IF(m_min.size() != Point::m_dim, util::Exception, "Error");
5959
UTIL_THROW_IF(m_max.size() != Point::m_dim, util::Exception, "Error");
6060

61-
for (unsigned int i = 0; i < size(); i++) {
62-
const float scale = (m_max[i] - m_min[i]) / float(RAND_MAX);
63-
operator[](i) = m_min[i] + util::rand_int() * scale;
64-
}
61+
for (unsigned int i = 0; i < size(); i++)
62+
operator[](i) = util::rand_incl(m_min[i], m_max[i]);
6563
}
6664

6765
double Point::operator*(const FeatureStats& F) const

mert/TODO

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,8 @@
55

66
- check that --pairwise-ranked is compatible with all optimization metrics
77

8-
- Replace the standard rand() currently used in MERT and PRO with better
9-
random generators such as Boost's random generators (e.g., boost::mt19937).
10-
- create a Random class to hide the details, i.e., how to generate
11-
random numbers, which allows us to use custom random generators more
12-
easily.
8+
- Use better random generators in util/random.cc, e.g. boost::mt19937.
9+
- Support plugging of custom random generators.
1310

1411
Pros:
1512
- In MERT, you might want to use the random restarting technique to avoid

mert/evaluator.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ void EvaluatorUtil::evaluate(const string& candFile, int bootstrap, bool nbest_i
9595
for (int i = 0; i < bootstrap; ++i) {
9696
ScoreData scoredata(g_scorer);
9797
for (int j = 0; j < n; ++j) {
98-
int randomIndex = util::rand_int() % n;
98+
const int randomIndex = util::rand_excl(n);
9999
scoredata.add(entries[randomIndex], j);
100100
}
101101
g_scorer->setScoreData(&scoredata);
@@ -285,10 +285,10 @@ void InitSeed(const ProgramOption *opt)
285285
{
286286
if (opt->has_seed) {
287287
cerr << "Seeding random numbers with " << opt->seed << endl;
288-
util::rand_int_init(opt->seed);
288+
util::rand_init(opt->seed);
289289
} else {
290290
cerr << "Seeding random numbers with system clock " << endl;
291-
util::rand_int_init();
291+
util::rand_init();
292292
}
293293
}
294294

mert/kbmira.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ de recherches du Canada
4040
#include <boost/scoped_ptr.hpp>
4141

4242
#include "util/exception.hh"
43+
#include "util/random.hh"
4344

4445
#include "BleuScorer.h"
4546
#include "HopeFearDecoder.h"
@@ -122,10 +123,10 @@ int main(int argc, char** argv)
122123

123124
if (vm.count("random-seed")) {
124125
cerr << "Initialising random seed to " << seed << endl;
125-
srand(seed);
126+
util::rand_init(seed);
126127
} else {
127128
cerr << "Initialising random seed from system clock" << endl;
128-
srand(time(NULL));
129+
util::rand_init();
129130
}
130131

131132
// Initialize weights

mert/mert.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -290,10 +290,10 @@ int main(int argc, char **argv)
290290

291291
if (option.has_seed) {
292292
cerr << "Seeding random numbers with " << option.seed << endl;
293-
util::rand_int_init(option.seed);
293+
util::rand_init(option.seed);
294294
} else {
295295
cerr << "Seeding random numbers with system clock " << endl;
296-
util::rand_int_init();
296+
util::rand_init();
297297
}
298298

299299
if (option.sparse_weights_file.size()) ++option.pdim;

mert/pro.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
4343
#include "ScoreDataIterator.h"
4444
#include "BleuScorer.h"
4545
#include "Util.h"
46+
#include "util/random.hh"
4647

4748
using namespace std;
4849
using namespace MosesTuning;
@@ -141,10 +142,10 @@ int main(int argc, char** argv)
141142

142143
if (vm.count("random-seed")) {
143144
cerr << "Initialising random seed to " << seed << endl;
144-
srand(seed);
145+
util::rand_init(seed);
145146
} else {
146147
cerr << "Initialising random seed from system clock" << endl;
147-
srand(time(NULL));
148+
util::rand_init();
148149
}
149150

150151
if (scoreFiles.size() == 0 || featureFiles.size() == 0) {
@@ -211,11 +212,11 @@ int main(int argc, char** argv)
211212
vector<float> scores;
212213
size_t n_translations = hypotheses.size();
213214
for(size_t i=0; i<n_candidates; i++) {
214-
size_t rand1 = rand() % n_translations;
215+
size_t rand1 = util::rand_excl(n_translations);
215216
pair<size_t,size_t> translation1 = hypotheses[rand1];
216217
float bleu1 = smoothedSentenceBleu(scoreDataIters[translation1.first]->operator[](translation1.second), bleuSmoothing, smoothBP);
217218

218-
size_t rand2 = rand() % n_translations;
219+
size_t rand2 = util::rand_excl(n_translations);
219220
pair<size_t,size_t> translation2 = hypotheses[rand2];
220221
float bleu2 = smoothedSentenceBleu(scoreDataIters[translation2.first]->operator[](translation2.second), bleuSmoothing, smoothBP);
221222

moses-cmd/MainVW.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
4545
#include "moses/FF/StatefulFeatureFunction.h"
4646
#include "moses/FF/StatelessFeatureFunction.h"
4747
#include "moses/TrainingTask.h"
48+
#include "util/random.hh"
4849

4950
#ifdef HAVE_PROTOBUF
5051
#include "hypergraph.pb.h"
@@ -117,7 +118,7 @@ int main(int argc, char** argv)
117118

118119

119120
//initialise random numbers
120-
srand(time(NULL));
121+
util::rand_init();
121122

122123
// set up read/writing class
123124
IFVERBOSE(1) {

moses/ExportInterface.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
2727
#include <sstream>
2828
#include <vector>
2929

30+
#include "util/random.hh"
3031
#include "util/usage.hh"
3132

3233
#ifdef WIN32
@@ -91,7 +92,7 @@ SimpleTranslationInterface::SimpleTranslationInterface(const string &mosesIni):
9192
exit(1);
9293
}
9394

94-
srand(time(NULL));
95+
util::rand_init();
9596

9697
}
9798

@@ -185,7 +186,7 @@ batch_run()
185186
const StaticData& staticData = StaticData::Instance();
186187

187188
//initialise random numbers
188-
srand(time(NULL));
189+
util::rand_init();
189190

190191
IFVERBOSE(1) PrintUserTime("Created input-output object");
191192

moses/Manager.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
5454
#endif
5555

5656
#include "util/exception.hh"
57+
#include "util/random.hh"
5758

5859
using namespace std;
5960

@@ -418,7 +419,7 @@ void Manager::CalcLatticeSamples(size_t count, TrellisPathList &ret) const
418419
//cerr << endl;
419420

420421
//draw the sample
421-
float frandom = log((float)rand()/RAND_MAX);
422+
const float frandom = log(util::rand_incl(0.0f, 1.0f));
422423
size_t position = 1;
423424
float sum = candidateScores[0];
424425
for (; position < candidateScores.size() && sum < frandom; ++position) {

moses/Parameter.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
3131
#include "InputFileStream.h"
3232
#include "StaticData.h"
3333
#include "util/exception.hh"
34+
#include "util/random.hh"
3435
#include <boost/program_options.hpp>
3536

3637

@@ -1392,7 +1393,7 @@ struct Credit {
13921393
this->contact = contact ;
13931394
this->currentPursuits = currentPursuits ;
13941395
this->areaResponsibility = areaResponsibility;
1395-
this->sortId = rand() % 1000;
1396+
this->sortId = util::rand_excl(1000);
13961397
}
13971398

13981399
bool operator<(const Credit &other) const {

moses/TranslationModel/DynSAInclude/hash.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "utils.h"
77
#include "FileHandler.h"
88
#include "util/exception.hh"
9+
#include "util/random.hh"
910

1011
using namespace Moses;
1112
typedef uint64_t P; // largest input range is 2^64
@@ -162,7 +163,7 @@ void Hash_shiftAddXOR<T>::initSeeds()
162163
{
163164
v_ = new T[this->H_];
164165
for(count_t i=0; i < this->H_; i++)
165-
v_[i] = Utils::rand<T>() + 1;
166+
v_[i] = util::wide_rand<T>() + 1;
166167
}
167168
template <typename T>
168169
T Hash_shiftAddXOR<T>::hash(const char* s, count_t h)
@@ -187,9 +188,8 @@ void UnivHash_tableXOR<T>::initSeeds()
187188
// fill with random values
188189
for(count_t j=0; j < this->H_; j++) {
189190
table_[j] = new T[tblLen_];
190-
for(count_t i=0; i < tblLen_; i++) {
191-
table_[j][i] = Utils::rand<T>(this->m_-1);
192-
}
191+
for(count_t i=0; i < tblLen_; i++)
192+
table_[j][i] = util::wide_rand_excl(this->m_-1);
193193
}
194194
}
195195
template <typename T>
@@ -218,7 +218,7 @@ void UnivHash_noPrimes<T>::initSeeds()
218218
{
219219
a_ = new P[this->H_];
220220
for(T i=0; i < this->H_; i++) {
221-
a_[i] = Utils::rand<P>();
221+
a_[i] = util::wide_rand<P>();
222222
if(a_[i] % 2 == 0) a_[i]++; // a must be odd
223223
}
224224
}
@@ -284,8 +284,8 @@ void UnivHash_linear<T>::initSeeds()
284284
a_[i] = new T[MAX_NGRAM_ORDER];
285285
b_[i] = new T[MAX_NGRAM_ORDER];
286286
for(count_t j=0; j < MAX_NGRAM_ORDER; j++) {
287-
a_[i][j] = 1 + Utils::rand<T>();
288-
b_[i][j] = Utils::rand<T>();
287+
a_[i][j] = 1 + util::wide_rand<T>();
288+
b_[i][j] = util::wide_rand<T>();
289289
}
290290
}
291291
}

moses/TranslationModel/DynSAInclude/utils.h

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -62,22 +62,6 @@ class Utils
6262
str[i] = tolower(str[i]);
6363
}
6464
}
65-
// TODO: interface with decent PRG
66-
template<typename T>
67-
static T rand(T mod_bnd = 0) {
68-
T random = 0;
69-
if(sizeof(T) <= 4) {
70-
random = static_cast<T>(std::rand());
71-
} else if(sizeof(T) == 8) {
72-
random = static_cast<T>(std::rand());
73-
random <<= 31;
74-
random <<= 1;
75-
random |= static_cast<T>(std::rand());
76-
}
77-
if(mod_bnd != 0)
78-
return random % mod_bnd;
79-
else return random;
80-
}
8165
};
8266

8367
#endif

moses/TranslationModel/DynSuffixArray.cpp

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#include "DynSuffixArray.h"
2+
#include "util/random.hh"
3+
24
#include <iostream>
35
#include <boost/foreach.hpp>
46

@@ -315,33 +317,31 @@ int DynSuffixArray::Compare(int pos1, int pos2, int max)
315317
return 0;
316318
}
317319

320+
namespace
321+
{
322+
/// Helper: swap two entries in an int array.
323+
inline void swap_ints(int array[], int one, int other)
324+
{
325+
const int tmp = array[one];
326+
array[one] = array[other];
327+
array[other] = tmp;
328+
}
329+
}
330+
318331
void DynSuffixArray::Qsort(int* array, int begin, int end)
319332
{
320333
if(end > begin) {
321-
int index;
334+
int index = util::rand_incl(begin, end);
322335
{
323-
index = begin + (rand() % (end - begin + 1));
324-
int pivot = array[index];
325-
{
326-
int tmp = array[index];
327-
array[index] = array[end];
328-
array[end] = tmp;
329-
}
336+
const int pivot = array[index];
337+
swap_ints(array, index, end);
330338
for(int i=index=begin; i < end; ++i) {
331339
if (Compare(array[i], pivot, 20) <= 0) {
332-
{
333-
int tmp = array[index];
334-
array[index] = array[i];
335-
array[i] = tmp;
336-
index++;
337-
}
340+
swap_ints(array, index, i);
341+
index++;
338342
}
339343
}
340-
{
341-
int tmp = array[index];
342-
array[index] = array[end];
343-
array[end] = tmp;
344-
}
344+
swap_ints(array, index, end);
345345
}
346346
Qsort(array, begin, index - 1);
347347
Qsort(array, index + 1, end);

moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include "moses/TranslationModel/fuzzy-match/SentenceAlignment.h"
4646
#include "util/file.hh"
4747
#include "util/exception.hh"
48+
#include "util/random.hh"
4849

4950
using namespace std;
5051

@@ -62,8 +63,8 @@ char *mkdtemp(char *tempbuf)
6263
return NULL;
6364
}
6465

65-
srand((unsigned)time(0));
66-
rand_value = (int)((rand() / ((double)RAND_MAX+1.0)) * 1e6);
66+
util::rand_init();
67+
rand_value = rand_excl(1e6);
6768
tempbase = strrchr(tempbuf, '/');
6869
tempbase = tempbase ? tempbase+1 : tempbuf;
6970
strcpy(tempbasebuf, tempbase);

0 commit comments

Comments
 (0)