Skip to content

Improve Base62 encoding/decoding performance and robustness #3391

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
81 changes: 50 additions & 31 deletions base62/base62.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,56 +3,75 @@ package base62
import (
"fmt"
"math"
"strings"
)

const (
alphabet = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
base = uint32(len(alphabet))
alphabet = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
base = uint32(len(alphabet))
maxBase62Digits = 6 // max number of digits required to encode MaxUint32

)

// Encode encodes a uint32 value to a base62 string.
func Encode(num uint32) string {
if num == 0 {
return string(alphabet[0])
var (
ErrEmptyString = fmt.Errorf("empty string")
ErrInvalidChar = fmt.Errorf("invalid character")
ErrOverflow = fmt.Errorf("integer overflow")
)

// Fixed-size arrays have better performance and lower memory overhead compared to maps for small static sets of data
var charToIndex [123]int8 // Assuming ASCII from '\0' - 'z'

func init() {
for i := range charToIndex {
charToIndex[i] = -1
}
for i, c := range alphabet {
charToIndex[c] = int8(i)
}
}

var encoded strings.Builder
// Encode encodes a uint32 value to a base62 string.
// The returned string will be between 1-6 characters long.
func Encode(n uint32) string {
if n < base {
return string(alphabet[n])
}
// avoid dynamic memory usage for small, fixed size data
buf := [maxBase62Digits]byte{}
idx := len(buf)

for num > 0 {
remainder := num % base
encoded.WriteByte(alphabet[remainder])
num /= base
for n > 0 {
idx--
buf[idx] = alphabet[n%base]
n /= base
}

// Reverse the encoded string
encodedString := encoded.String()
reversed := reverse(encodedString)
return reversed
return string(buf[idx:])
}

// Decode decodes a base62 string to a uint32 value.
// Returns an error if the input string is empty, contains invalid characters,
// or would result in integer overflow.
func Decode(encoded string) (uint32, error) {
if len(encoded) == 0 {
return 0, ErrEmptyString
}
var decoded uint32
strLen := len(encoded)

for i, char := range encoded {
index := strings.IndexRune(alphabet, char)
for _, char := range encoded {
index := int8(-1)
if int(char) < len(charToIndex) {
index = charToIndex[char]
}
if index < 0 {
return 0, fmt.Errorf("invalid character: %c", char)
return 0, fmt.Errorf("%w: %c", ErrInvalidChar, char)
}
// Add overflow check when calculating the decoded value to prevent silent overflow of uint32
if decoded > (math.MaxUint32-uint32(index))/base {
return 0, fmt.Errorf("%w: %s", ErrOverflow, encoded)
}

decoded += uint32(index) * uint32(math.Pow(float64(base), float64(strLen-i-1)))
decoded = decoded*base + uint32(index)
}

return decoded, nil
}

// Reverse a string.
func reverse(s string) string {
runes := []rune(s)
for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 {
runes[i], runes[j] = runes[j], runes[i]
}
return string(runes)
}
64 changes: 50 additions & 14 deletions base62/base62_test.go
Original file line number Diff line number Diff line change
@@ -1,31 +1,67 @@
package base62

import (
"errors"
"math"
"testing"
)

func TestEncodeDecode(t *testing.T) {
tests := []struct {
num uint32
testCases := []struct {
input uint32
expected string
}{
{0},
{1},
{42},
{12345},
{99999},
{123456789},
{0, "0"},
{1, "1"},
{5, "5"},
{9, "9"},
{10, "A"},
{42, "g"},
{61, "z"},
{62, "10"},
{'0', "m"},
{'9', "v"},
{'A', "13"},
{'Z', "1S"},
{'a', "1Z"},
{'z', "1y"},
{99999, "Q0t"},
{12345, "3D7"},
{123456789, "8M0kX"},
{math.MaxUint32, "4gfFC3"},
}

for _, tt := range tests {
encoded := Encode(tt.num)
for _, tc := range testCases {
encoded := Encode(tc.input)
if encoded != tc.expected {
t.Errorf("Encode(%d) = %s; want %s", tc.input, encoded, tc.expected)
}
decoded, err := Decode(encoded)

if err != nil {
t.Errorf("Decode error: %v", err)
t.Errorf("Expected error nil, got %v", err)
}

if decoded != tt.num {
t.Errorf("Decode(%v) = %v, want %v", encoded, decoded, tt.num)
if decoded != tc.input {
t.Errorf("Decode(%v) = %v, want %v", encoded, decoded, tc.input)
}
}
}

// Decode handles empty string input with appropriate error
func TestDecodeEmptyString(t *testing.T) {
if _, err := Decode(""); !errors.Is(err, ErrEmptyString) {
t.Errorf("Expected error %v, got %v", ErrEmptyString, err)
}
}

func TestDecodeOverflow(t *testing.T) {
if _, err := Decode("4gfFC4"); !errors.Is(err, ErrOverflow) {
t.Errorf("Expected error %v, got %v", ErrOverflow, err)
}
}

func TestDecodeInvalid(t *testing.T) {
if _, err := Decode("/"); !errors.Is(err, ErrInvalidChar) {
t.Errorf("Expected error %v, got %v", ErrInvalidChar, err)
}
}
Loading