Skip to content

Commit 497563b

Browse files
authored
Merge pull request #10715 from AnthonyLatsis/stable/20250402
[stable/20250402] Bring back deterministic hashing
2 parents 2ea28e7 + 0b0e42f commit 497563b

File tree

5 files changed

+136
-11
lines changed

5 files changed

+136
-11
lines changed

llvm/include/llvm/ADT/Hashing.h

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
#ifndef LLVM_ADT_HASHING_H
4545
#define LLVM_ADT_HASHING_H
4646

47-
#include "llvm/Config/abi-breaking.h"
4847
#include "llvm/Support/DataTypes.h"
4948
#include "llvm/Support/ErrorHandling.h"
5049
#include "llvm/Support/SwapByteOrder.h"
@@ -127,6 +126,23 @@ hash_code hash_value(const std::basic_string<T> &arg);
127126
/// Compute a hash_code for a standard string.
128127
template <typename T> hash_code hash_value(const std::optional<T> &arg);
129128

129+
/// Override the execution seed with a fixed value.
130+
///
131+
/// This hashing library uses a per-execution seed designed to change on each
132+
/// run with high probability in order to ensure that the hash codes are not
133+
/// attackable and to ensure that output which is intended to be stable does
134+
/// not rely on the particulars of the hash codes produced.
135+
///
136+
/// That said, there are use cases where it is important to be able to
137+
/// reproduce *exactly* a specific behavior. To that end, we provide a function
138+
/// which will forcibly set the seed to a fixed value. This must be done at the
139+
/// start of the program, before any hashes are computed. Also, it cannot be
140+
/// undone. This makes it thread-hostile and very hard to use outside of
141+
/// immediately on start of a simple program designed for reproducible
142+
/// behavior.
143+
void set_fixed_execution_hash_seed(uint64_t fixed_value);
144+
145+
130146
// All of the implementation details of actually computing the various hash
131147
// code values are held within this namespace. These routines are included in
132148
// the header file mainly to allow inlining and constant propagation.
@@ -306,17 +322,24 @@ struct hash_state {
306322
}
307323
};
308324

309-
/// In LLVM_ENABLE_ABI_BREAKING_CHECKS builds, the seed is non-deterministic
310-
/// per process (address of a function in LLVMSupport) to prevent having users
311-
/// depend on the particular hash values. On platforms without ASLR, this is
312-
/// still likely non-deterministic per build.
325+
326+
/// A global, fixed seed-override variable.
327+
///
328+
/// This variable can be set using the \see llvm::set_fixed_execution_seed
329+
/// function. See that function for details. Do not, under any circumstances,
330+
/// set or read this variable.
331+
extern uint64_t fixed_seed_override;
332+
313333
inline uint64_t get_execution_seed() {
314-
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
315-
return static_cast<uint64_t>(
316-
reinterpret_cast<uintptr_t>(&install_fatal_error_handler));
317-
#else
318-
return 0xff51afd7ed558ccdULL;
319-
#endif
334+
// FIXME: This needs to be a per-execution seed. This is just a placeholder
335+
// implementation. Switching to a per-execution seed is likely to flush out
336+
// instability bugs and so will happen as its own commit.
337+
//
338+
// However, if there is a fixed seed override set the first time this is
339+
// called, return that instead of the per-execution seed.
340+
const uint64_t seed_prime = 0xff51afd7ed558ccdULL;
341+
static uint64_t seed = fixed_seed_override ? fixed_seed_override : seed_prime;
342+
return seed;
320343
}
321344

322345

llvm/lib/Support/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ add_llvm_component_library(LLVMSupport
199199
FormatVariadic.cpp
200200
GlobPattern.cpp
201201
GraphWriter.cpp
202+
Hashing.cpp
202203
HexagonAttributeParser.cpp
203204
HexagonAttributes.cpp
204205
InitLLVM.cpp

llvm/lib/Support/Hashing.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
//===-------------- lib/Support/Hashing.cpp -------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file provides implementation bits for the LLVM common hashing
10+
// infrastructure. Documentation and most of the other information is in the
11+
// header file.
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
#include "llvm/ADT/Hashing.h"
16+
17+
using namespace llvm;
18+
19+
// Provide a definition and static initializer for the fixed seed. This
20+
// initializer should always be zero to ensure its value can never appear to be
21+
// non-zero, even during dynamic initialization.
22+
uint64_t llvm::hashing::detail::fixed_seed_override = 0;
23+
24+
// Implement the function for forced setting of the fixed seed.
25+
// FIXME: Use atomic operations here so that there is no data race.
26+
void llvm::set_fixed_execution_hash_seed(uint64_t fixed_value) {
27+
hashing::detail::fixed_seed_override = fixed_value;
28+
}

llvm/unittests/ADT/HashingTest.cpp

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,78 @@ TEST(HashingTest, HashCombineRangeLengthDiff) {
235235
}
236236
}
237237

238+
TEST(HashingTest, HashCombineRangeGoldenTest) {
239+
struct { const char *s; uint64_t hash; } golden_data[] = {
240+
#if SIZE_MAX == UINT64_MAX || SIZE_MAX == UINT32_MAX
241+
{ "a", 0xaeb6f9d5517c61f8ULL },
242+
{ "ab", 0x7ab1edb96be496b4ULL },
243+
{ "abc", 0xe38e60bf19c71a3fULL },
244+
{ "abcde", 0xd24461a66de97f6eULL },
245+
{ "abcdefgh", 0x4ef872ec411dec9dULL },
246+
{ "abcdefghijklm", 0xe8a865539f4eadfeULL },
247+
{ "abcdefghijklmnopqrstu", 0x261cdf85faaf4e79ULL },
248+
{ "abcdefghijklmnopqrstuvwxyzabcdef", 0x43ba70e4198e3b2aULL },
249+
{ "abcdefghijklmnopqrstuvwxyzabcdef"
250+
"abcdefghijklmnopqrstuvwxyzghijkl"
251+
"abcdefghijklmnopqrstuvwxyzmnopqr"
252+
"abcdefghijklmnopqrstuvwxyzstuvwx"
253+
"abcdefghijklmnopqrstuvwxyzyzabcd", 0xdcd57fb2afdf72beULL },
254+
{ "a", 0xaeb6f9d5517c61f8ULL },
255+
{ "aa", 0xf2b3b69a9736a1ebULL },
256+
{ "aaa", 0xf752eb6f07b1cafeULL },
257+
{ "aaaaa", 0x812bd21e1236954cULL },
258+
{ "aaaaaaaa", 0xff07a2cff08ac587ULL },
259+
{ "aaaaaaaaaaaaa", 0x84ac949d54d704ecULL },
260+
{ "aaaaaaaaaaaaaaaaaaaaa", 0xcb2c8fb6be8f5648ULL },
261+
{ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0xcc40ab7f164091b6ULL },
262+
{ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
263+
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
264+
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
265+
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
266+
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0xc58e174c1e78ffe9ULL },
267+
{ "z", 0x1ba160d7e8f8785cULL },
268+
{ "zz", 0x2c5c03172f1285d7ULL },
269+
{ "zzz", 0x9d2c4f4b507a2ac3ULL },
270+
{ "zzzzz", 0x0f03b9031735693aULL },
271+
{ "zzzzzzzz", 0xe674147c8582c08eULL },
272+
{ "zzzzzzzzzzzzz", 0x3162d9fa6938db83ULL },
273+
{ "zzzzzzzzzzzzzzzzzzzzz", 0x37b9a549e013620cULL },
274+
{ "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 0x8921470aff885016ULL },
275+
{ "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
276+
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
277+
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
278+
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"
279+
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", 0xf60fdcd9beb08441ULL },
280+
{ "a", 0xaeb6f9d5517c61f8ULL },
281+
{ "ab", 0x7ab1edb96be496b4ULL },
282+
{ "aba", 0x3edb049950884d0aULL },
283+
{ "ababa", 0x8f2de9e73a97714bULL },
284+
{ "abababab", 0xee14a29ddf0ce54cULL },
285+
{ "ababababababa", 0x38b3ddaada2d52b4ULL },
286+
{ "ababababababababababa", 0xd3665364219f2b85ULL },
287+
{ "abababababababababababababababab", 0xa75cd6afbf1bc972ULL },
288+
{ "abababababababababababababababab"
289+
"abababababababababababababababab"
290+
"abababababababababababababababab"
291+
"abababababababababababababababab"
292+
"abababababababababababababababab", 0x840192d129f7a22bULL }
293+
#else
294+
#error This test only supports 64-bit and 32-bit systems.
295+
#endif
296+
};
297+
for (unsigned i = 0; i < sizeof(golden_data)/sizeof(*golden_data); ++i) {
298+
StringRef str = golden_data[i].s;
299+
hash_code hash = hash_combine_range(str.begin(), str.end());
300+
#if 0 // Enable this to generate paste-able text for the above structure.
301+
std::string member_str = "\"" + str.str() + "\",";
302+
fprintf(stderr, " { %-35s 0x%016llxULL },\n",
303+
member_str.c_str(), static_cast<uint64_t>(hash));
304+
#endif
305+
EXPECT_EQ(static_cast<size_t>(golden_data[i].hash),
306+
static_cast<size_t>(hash));
307+
}
308+
}
309+
238310
TEST(HashingTest, HashCombineBasicTest) {
239311
// Hashing a sequence of homogenous types matches range hashing.
240312
const int i1 = 42, i2 = 43, i3 = 123, i4 = 999, i5 = 0, i6 = 79;

llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ static_library("Support") {
9494
"FormattedStream.cpp",
9595
"GlobPattern.cpp",
9696
"GraphWriter.cpp",
97+
"Hashing.cpp",
9798
"HexagonAttributeParser.cpp",
9899
"HexagonAttributes.cpp",
99100
"InitLLVM.cpp",

0 commit comments

Comments
 (0)