Skip to content

Commit f76be05

Browse files
apoelstraroconnor-blockstream
authored andcommitted
bech32: expose the character conversion functionality
In the next commit we will implement a new checksum, codex32, which uses the same encoding and HRP rules as bech32 and bech32m, but has a substantially different checksum verification procedure. To minimize duplicated code, we expose the character conversion in a new bech32::internals module.
1 parent b23cefb commit f76be05

File tree

2 files changed

+78
-44
lines changed

2 files changed

+78
-44
lines changed

src/bech32.cpp

Lines changed: 59 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,7 @@ namespace bech32
1717
namespace
1818
{
1919

20-
typedef std::vector<uint8_t> data;
21-
22-
/** The Bech32 and Bech32m character set for encoding. */
23-
const char* CHARSET = "qpzry9x8gf2tvdw0s3jn54khce6mua7l";
24-
25-
/** The Bech32 and Bech32m character set for decoding. */
26-
const int8_t CHARSET_REV[128] = {
27-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
28-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
29-
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
30-
15, -1, 10, 17, 21, 20, 26, 30, 7, 5, -1, -1, -1, -1, -1, -1,
31-
-1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1,
32-
1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1,
33-
-1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1,
34-
1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1
35-
};
20+
typedef internal::data data;
3621

3722
/** We work with the finite field GF(1024) defined as a degree 2 extension of the base field GF(32)
3823
* The defining polynomial of the extension is x^2 + 9x + 23.
@@ -308,21 +293,6 @@ bool CheckCharacters(const std::string& str, std::vector<int>& errors)
308293
return errors.empty();
309294
}
310295

311-
std::vector<unsigned char> PreparePolynomialCoefficients(const std::string& hrp, const data& values)
312-
{
313-
data ret;
314-
ret.reserve(hrp.size() + 1 + hrp.size() + values.size() + CHECKSUM_SIZE);
315-
316-
/** Expand a HRP for use in checksum computation. */
317-
for (size_t i = 0; i < hrp.size(); ++i) ret.push_back(hrp[i] >> 5);
318-
ret.push_back(0);
319-
for (size_t i = 0; i < hrp.size(); ++i) ret.push_back(hrp[i] & 0x1f);
320-
321-
ret.insert(ret.end(), values.begin(), values.end());
322-
323-
return ret;
324-
}
325-
326296
/** Verify a checksum. */
327297
Encoding VerifyChecksum(const std::string& hrp, const data& values)
328298
{
@@ -331,7 +301,7 @@ Encoding VerifyChecksum(const std::string& hrp, const data& values)
331301
// list of values would result in a new valid list. For that reason, Bech32 requires the
332302
// resulting checksum to be 1 instead. In Bech32m, this constant was amended. See
333303
// https://gist.github.com/sipa/14c248c288c3880a3b191f978a34508e for details.
334-
auto enc = PreparePolynomialCoefficients(hrp, values);
304+
auto enc = internal::PreparePolynomialCoefficients(hrp, values, 0);
335305
const uint32_t check = PolyMod(enc);
336306
if (check == EncodingConstant(Encoding::BECH32)) return Encoding::BECH32;
337307
if (check == EncodingConstant(Encoding::BECH32M)) return Encoding::BECH32M;
@@ -341,7 +311,7 @@ Encoding VerifyChecksum(const std::string& hrp, const data& values)
341311
/** Create a checksum. */
342312
data CreateChecksum(Encoding encoding, const std::string& hrp, const data& values)
343313
{
344-
auto enc = PreparePolynomialCoefficients(hrp, values);
314+
auto enc = internal::PreparePolynomialCoefficients(hrp, values, CHECKSUM_SIZE);
345315
enc.insert(enc.end(), CHECKSUM_SIZE, 0x00);
346316
uint32_t mod = PolyMod(enc) ^ EncodingConstant(encoding); // Determine what to XOR into those 6 zeroes.
347317
data ret(CHECKSUM_SIZE);
@@ -354,29 +324,61 @@ data CreateChecksum(Encoding encoding, const std::string& hrp, const data& value
354324

355325
} // namespace
356326

357-
/** Encode a Bech32 or Bech32m string. */
358-
std::string Encode(Encoding encoding, const std::string& hrp, const data& values) {
327+
namespace internal {
328+
329+
/** The Bech32 and Bech32m character set for encoding. */
330+
const char* CHARSET = "qpzry9x8gf2tvdw0s3jn54khce6mua7l";
331+
332+
/** The Bech32 and Bech32m character set for decoding. */
333+
const int8_t CHARSET_REV[128] = {
334+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
335+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
336+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
337+
15, -1, 10, 17, 21, 20, 26, 30, 7, 5, -1, -1, -1, -1, -1, -1,
338+
-1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1,
339+
1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1,
340+
-1, 29, -1, 24, 13, 25, 9, 8, 23, -1, 18, 22, 31, 27, 19, -1,
341+
1, 0, 3, 16, 11, 28, 12, 14, 6, 4, 2, -1, -1, -1, -1, -1
342+
};
343+
344+
std::vector<unsigned char> PreparePolynomialCoefficients(const std::string& hrp, const data& values, size_t extra)
345+
{
346+
data ret;
347+
ret.reserve(hrp.size() + 1 + hrp.size() + values.size() + extra);
348+
349+
/** Expand a HRP for use in checksum computation. */
350+
for (size_t i = 0; i < hrp.size(); ++i) ret.push_back(hrp[i] >> 5);
351+
ret.push_back(0);
352+
for (size_t i = 0; i < hrp.size(); ++i) ret.push_back(hrp[i] & 0x1f);
353+
354+
ret.insert(ret.end(), values.begin(), values.end());
355+
356+
return ret;
357+
}
358+
359+
/** Encode a hrpstring without concerning ourselves with checksum validity */
360+
std::string Encode(const std::string& hrp, const data& values, const data& checksum) {
359361
// First ensure that the HRP is all lowercase. BIP-173 and BIP350 require an encoder
360362
// to return a lowercase Bech32/Bech32m string, but if given an uppercase HRP, the
361363
// result will always be invalid.
362364
for (const char& c : hrp) assert(c < 'A' || c > 'Z');
363365

364366
std::string ret;
365-
ret.reserve(hrp.size() + 1 + values.size() + CHECKSUM_SIZE);
367+
ret.reserve(hrp.size() + 1 + values.size() + checksum.size());
366368
ret += hrp;
367369
ret += SEPARATOR;
368370
for (const uint8_t& i : values) ret += CHARSET[i];
369-
for (const uint8_t& i : CreateChecksum(encoding, hrp, values)) ret += CHARSET[i];
371+
for (const uint8_t& i : checksum) ret += CHARSET[i];
370372
return ret;
371373
}
372374

373-
/** Decode a Bech32 or Bech32m string. */
374-
DecodeResult Decode(const std::string& str, CharLimit limit) {
375+
/** Decode a hrpstring without concerning ourselves with checksum validity */
376+
std::pair<std::string, data> Decode(const std::string& str, CharLimit limit, size_t checksum_size) {
375377
std::vector<int> errors;
376378
if (!CheckCharacters(str, errors)) return {};
377379
size_t pos = str.rfind(SEPARATOR);
378380
if (str.size() > limit) return {};
379-
if (pos == str.npos || pos == 0 || pos + CHECKSUM_SIZE >= str.size()) {
381+
if (pos == str.npos || pos == 0 || pos + checksum_size >= str.size()) {
380382
return {};
381383
}
382384
data values(str.size() - 1 - pos);
@@ -394,9 +396,22 @@ DecodeResult Decode(const std::string& str, CharLimit limit) {
394396
for (size_t i = 0; i < pos; ++i) {
395397
hrp += LowerCase(str[i]);
396398
}
397-
Encoding result = VerifyChecksum(hrp, values);
399+
return std::make_pair(hrp, values);
400+
}
401+
402+
} // namespace internal
403+
404+
/** Encode a Bech32 or Bech32m string. */
405+
std::string Encode(Encoding encoding, const std::string& hrp, const data& values) {
406+
return internal::Encode(hrp, values, CreateChecksum(encoding, hrp, values));
407+
}
408+
409+
/** Decode a Bech32 or Bech32m string. */
410+
DecodeResult Decode(const std::string& str, CharLimit limit) {
411+
auto res = internal::Decode(str, limit, CHECKSUM_SIZE);
412+
Encoding result = VerifyChecksum(res.first, res.second);
398413
if (result == Encoding::INVALID) return {};
399-
return {result, std::move(hrp), data(values.begin(), values.end() - CHECKSUM_SIZE)};
414+
return {result, std::move(res.first), data(res.second.begin(), res.second.end() - CHECKSUM_SIZE)};
400415
}
401416

402417
/** Find index of an incorrect character in a Bech32 string. */
@@ -432,7 +447,7 @@ std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str, Ch
432447
data values(length);
433448
for (size_t i = pos + 1; i < str.size(); ++i) {
434449
unsigned char c = str[i];
435-
int8_t rev = CHARSET_REV[c];
450+
int8_t rev = internal::CHARSET_REV[c];
436451
if (rev == -1) {
437452
error_locations.push_back(i);
438453
return std::make_pair("Invalid Base 32 character", std::move(error_locations));
@@ -447,7 +462,7 @@ std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str, Ch
447462
std::vector<int> possible_errors;
448463
// Recall that (expanded hrp + values) is interpreted as a list of coefficients of a polynomial
449464
// over GF(32). PolyMod computes the "remainder" of this polynomial modulo the generator G(x).
450-
auto enc = PreparePolynomialCoefficients(hrp, values);
465+
auto enc = internal::PreparePolynomialCoefficients(hrp, values, 0);
451466
uint32_t residue = PolyMod(enc) ^ EncodingConstant(encoding);
452467

453468
// All valid codewords should be multiples of G(x), so this remainder (after XORing with the encoding

src/bech32.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,25 @@ DecodeResult Decode(const std::string& str, CharLimit limit = CharLimit::BECH32)
5959
/** Return the positions of errors in a Bech32 string. */
6060
std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str, CharLimit limit = CharLimit::BECH32);
6161

62+
// The internal namespace is used for things shared between bech32(m) and codex32.
63+
// These functions should not be used except by other hrpstring-encoded codes.
64+
namespace internal {
65+
typedef std::vector<uint8_t> data;
66+
67+
extern const char* CHARSET;
68+
extern const int8_t CHARSET_REV[128];
69+
70+
/** Expand a HRP for use in checksum computation. */
71+
std::vector<unsigned char> PreparePolynomialCoefficients(const std::string& hrp, const data& values, size_t extra);
72+
73+
/** Encode a hrpstring without concerning ourselves with checksum validity */
74+
std::string Encode(const std::string& hrp, const data& values, const data& checksum);
75+
76+
/** Decode a hrpstring without concerning ourselves with checksum validity */
77+
std::pair<std::string, data> Decode(const std::string& str, size_t max_length, size_t checksum_length);
78+
79+
} // namespace internal
80+
6281
} // namespace bech32
6382

6483
#endif // BITCOIN_BECH32_H

0 commit comments

Comments
 (0)