From dc8eb409653cd2a1e531058bf69d26709e776dd6 Mon Sep 17 00:00:00 2001 From: Redouan El Rhazouani <81578195+redouan-rhazouani@users.noreply.github.com> Date: Wed, 26 Feb 2025 10:07:26 +0100 Subject: [PATCH 1/7] Enhance Base62 unit tests by explicitly verifying encoding and decoding accuracy --- base62/base62_test.go | 55 ++++++++++++++++++++++++++++++++----------- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/base62/base62_test.go b/base62/base62_test.go index 00da2124a8e..608c7c9a979 100644 --- a/base62/base62_test.go +++ b/base62/base62_test.go @@ -1,31 +1,58 @@ package base62 import ( + "math" "testing" ) func TestEncodeDecode(t *testing.T) { - tests := []struct { - num uint32 + testCases := []struct { + input uint32 + expected string }{ - {0}, - {1}, - {42}, - {12345}, - {99999}, - {123456789}, + {0, "0"}, + {1, "1"}, + {5, "5"}, + {9, "9"}, + {10, "A"}, + {42, "g"}, + {'0', "m"}, + {'9', "v"}, + {'A', "13"}, + {'Z', "1S"}, + {'a', "1Z"}, + {'z', "1y"}, + {99999, "Q0t"}, + {12345, "3D7"}, + {123456789, "8M0kX"}, + {math.MaxUint32, "4gfFC3"}, } - for _, tt := range tests { - encoded := Encode(tt.num) + for _, tc := range testCases { + encoded := Encode(tc.input) + if encoded != tc.expected { + t.Errorf("Encode(%d) = %s; want %s", tc.input, encoded, tc.expected) + } decoded, err := Decode(encoded) - if err != nil { - t.Errorf("Decode error: %v", err) + t.Errorf("Expected error nil, got %v", err) } - if decoded != tt.num { - t.Errorf("Decode(%v) = %v, want %v", encoded, decoded, tt.num) + if decoded != tc.input { + t.Errorf("Decode(%v) = %v, want %v", encoded, decoded, tc.input) } } } + +// Decode handles empty string input with appropriate error +func TestDecodeEmptyString(t *testing.T) { + if _, err := Decode(""); err == nil { + t.Error("Expected error for empty string, got nil") + } +} + +func TestDecodeOverflow(t *testing.T) { + if _, err := Decode("4gfFC4"); err == nil { + t.Error("Expected overflow error, got nil") + } +} From 757e1783442497c21813c282787dd29a6512c49a Mon Sep 17 00:00:00 2001 From: Redouan El Rhazouani <81578195+redouan-rhazouani@users.noreply.github.com> Date: Wed, 26 Feb 2025 10:29:41 +0100 Subject: [PATCH 2/7] Reduce heap allocations and improves performance of base62.Encode by avoiding dynamic memory usage for small, fixed-size data --- base62/base62.go | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/base62/base62.go b/base62/base62.go index efafbc76836..d139e98785a 100644 --- a/base62/base62.go +++ b/base62/base62.go @@ -12,23 +12,21 @@ const ( ) // Encode encodes a uint32 value to a base62 string. -func Encode(num uint32) string { - if num == 0 { - return string(alphabet[0]) +func Encode(n uint32) string { + if n < base { + return string(alphabet[n]) } - - var encoded strings.Builder - - for num > 0 { - remainder := num % base - encoded.WriteByte(alphabet[remainder]) - num /= base + // avoid dynamic memory usage for small, fixed size data + buf := [6]byte{} // 6 is max number of digits required to encode MaxUint32 + idx := len(buf) + + for n > 0 { + idx-- + buf[idx] = alphabet[n%base] + n /= base } - // Reverse the encoded string - encodedString := encoded.String() - reversed := reverse(encodedString) - return reversed + return string(buf[idx:]) } // Decode decodes a base62 string to a uint32 value. From 88189c580e382bdad244259a4a005a16a7402ff6 Mon Sep 17 00:00:00 2001 From: Redouan El Rhazouani <81578195+redouan-rhazouani@users.noreply.github.com> Date: Wed, 26 Feb 2025 10:37:59 +0100 Subject: [PATCH 3/7] [Base62.Decode] use iterative multiplication instead of math.Pow() to avoid less precise floating point operations --- base62/base62.go | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/base62/base62.go b/base62/base62.go index d139e98785a..7ec2416251b 100644 --- a/base62/base62.go +++ b/base62/base62.go @@ -2,7 +2,6 @@ package base62 import ( "fmt" - "math" "strings" ) @@ -32,25 +31,14 @@ func Encode(n uint32) string { // Decode decodes a base62 string to a uint32 value. func Decode(encoded string) (uint32, error) { var decoded uint32 - strLen := len(encoded) - - for i, char := range encoded { + for _, char := range encoded { index := strings.IndexRune(alphabet, char) if index < 0 { return 0, fmt.Errorf("invalid character: %c", char) } - decoded += uint32(index) * uint32(math.Pow(float64(base), float64(strLen-i-1))) + decoded = decoded*base + uint32(index) } return decoded, nil } - -// Reverse a string. -func reverse(s string) string { - runes := []rune(s) - for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 { - runes[i], runes[j] = runes[j], runes[i] - } - return string(runes) -} From 8fa60211f861756861a57ba1c1e9dad73edc3819 Mon Sep 17 00:00:00 2001 From: Redouan El Rhazouani <81578195+redouan-rhazouani@users.noreply.github.com> Date: Wed, 26 Feb 2025 10:52:04 +0100 Subject: [PATCH 4/7] [Base62.Decode] return error if input string is empty --- base62/base62.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/base62/base62.go b/base62/base62.go index 7ec2416251b..924aefc2bbf 100644 --- a/base62/base62.go +++ b/base62/base62.go @@ -30,6 +30,9 @@ func Encode(n uint32) string { // Decode decodes a base62 string to a uint32 value. func Decode(encoded string) (uint32, error) { + if len(encoded) == 0 { + return 0, fmt.Errorf("empty string") + } var decoded uint32 for _, char := range encoded { index := strings.IndexRune(alphabet, char) From 1bad7712375b07b4ad815bcdb351599a0d551efc Mon Sep 17 00:00:00 2001 From: Redouan El Rhazouani <81578195+redouan-rhazouani@users.noreply.github.com> Date: Wed, 26 Feb 2025 10:55:23 +0100 Subject: [PATCH 5/7] Add overflow check when calculating the decoded value to prevent silent overflow of uint32 The current implementation could produce incorrect results for very long input strings --- base62/base62.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/base62/base62.go b/base62/base62.go index 924aefc2bbf..69982ddd2f0 100644 --- a/base62/base62.go +++ b/base62/base62.go @@ -2,6 +2,7 @@ package base62 import ( "fmt" + "math" "strings" ) @@ -39,6 +40,10 @@ func Decode(encoded string) (uint32, error) { if index < 0 { return 0, fmt.Errorf("invalid character: %c", char) } + // Add overflow check when calculating the decoded value to prevent silent overflow of uint32 + if decoded > (math.MaxUint32-uint32(index))/base { + return 0, fmt.Errorf("integer overflow") + } decoded = decoded*base + uint32(index) } From 2981ce514386b9d0183c8321682eae558ef18d9f Mon Sep 17 00:00:00 2001 From: Redouan El Rhazouani <81578195+redouan-rhazouani@users.noreply.github.com> Date: Wed, 26 Feb 2025 11:46:09 +0100 Subject: [PATCH 6/7] [Base62.Decode] add unit test for input strings containing invalid character --- base62/base62_test.go | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/base62/base62_test.go b/base62/base62_test.go index 608c7c9a979..8501ea71eef 100644 --- a/base62/base62_test.go +++ b/base62/base62_test.go @@ -16,6 +16,8 @@ func TestEncodeDecode(t *testing.T) { {9, "9"}, {10, "A"}, {42, "g"}, + {61, "z"}, + {62, "10"}, {'0', "m"}, {'9', "v"}, {'A', "13"}, @@ -53,6 +55,12 @@ func TestDecodeEmptyString(t *testing.T) { func TestDecodeOverflow(t *testing.T) { if _, err := Decode("4gfFC4"); err == nil { - t.Error("Expected overflow error, got nil") + t.Error("Expected error overflow , got nil") + } +} + +func TestDecodeInvalid(t *testing.T) { + if _, err := Decode("/"); err == nil { + t.Error("Expected error invalid character, got nil") } } From 0c6e9f1075d01da51ab50ed26a0ce3aece497d7c Mon Sep 17 00:00:00 2001 From: Redouan El Rhazouani <81578195+redouan-rhazouani@users.noreply.github.com> Date: Wed, 26 Feb 2025 12:15:15 +0100 Subject: [PATCH 7/7] [Base62.Decode] optimize decoding by replacing strings.IndexRune with a fixed-size array for O(1) lookups, improving performance and reducing memory overhead --- base62/base62.go | 41 +++++++++++++++++++++++++++++++++-------- base62/base62_test.go | 13 +++++++------ 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/base62/base62.go b/base62/base62.go index 69982ddd2f0..1a02e98e263 100644 --- a/base62/base62.go +++ b/base62/base62.go @@ -3,21 +3,41 @@ package base62 import ( "fmt" "math" - "strings" ) const ( - alphabet = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" - base = uint32(len(alphabet)) + alphabet = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + base = uint32(len(alphabet)) + maxBase62Digits = 6 // max number of digits required to encode MaxUint32 + +) + +var ( + ErrEmptyString = fmt.Errorf("empty string") + ErrInvalidChar = fmt.Errorf("invalid character") + ErrOverflow = fmt.Errorf("integer overflow") ) +// Fixed-size arrays have better performance and lower memory overhead compared to maps for small static sets of data +var charToIndex [123]int8 // Assuming ASCII from '\0' - 'z' + +func init() { + for i := range charToIndex { + charToIndex[i] = -1 + } + for i, c := range alphabet { + charToIndex[c] = int8(i) + } +} + // Encode encodes a uint32 value to a base62 string. +// The returned string will be between 1-6 characters long. func Encode(n uint32) string { if n < base { return string(alphabet[n]) } // avoid dynamic memory usage for small, fixed size data - buf := [6]byte{} // 6 is max number of digits required to encode MaxUint32 + buf := [maxBase62Digits]byte{} idx := len(buf) for n > 0 { @@ -30,19 +50,24 @@ func Encode(n uint32) string { } // Decode decodes a base62 string to a uint32 value. +// Returns an error if the input string is empty, contains invalid characters, +// or would result in integer overflow. func Decode(encoded string) (uint32, error) { if len(encoded) == 0 { - return 0, fmt.Errorf("empty string") + return 0, ErrEmptyString } var decoded uint32 for _, char := range encoded { - index := strings.IndexRune(alphabet, char) + index := int8(-1) + if int(char) < len(charToIndex) { + index = charToIndex[char] + } if index < 0 { - return 0, fmt.Errorf("invalid character: %c", char) + return 0, fmt.Errorf("%w: %c", ErrInvalidChar, char) } // Add overflow check when calculating the decoded value to prevent silent overflow of uint32 if decoded > (math.MaxUint32-uint32(index))/base { - return 0, fmt.Errorf("integer overflow") + return 0, fmt.Errorf("%w: %s", ErrOverflow, encoded) } decoded = decoded*base + uint32(index) diff --git a/base62/base62_test.go b/base62/base62_test.go index 8501ea71eef..f2ad06d6fb1 100644 --- a/base62/base62_test.go +++ b/base62/base62_test.go @@ -1,6 +1,7 @@ package base62 import ( + "errors" "math" "testing" ) @@ -48,19 +49,19 @@ func TestEncodeDecode(t *testing.T) { // Decode handles empty string input with appropriate error func TestDecodeEmptyString(t *testing.T) { - if _, err := Decode(""); err == nil { - t.Error("Expected error for empty string, got nil") + if _, err := Decode(""); !errors.Is(err, ErrEmptyString) { + t.Errorf("Expected error %v, got %v", ErrEmptyString, err) } } func TestDecodeOverflow(t *testing.T) { - if _, err := Decode("4gfFC4"); err == nil { - t.Error("Expected error overflow , got nil") + if _, err := Decode("4gfFC4"); !errors.Is(err, ErrOverflow) { + t.Errorf("Expected error %v, got %v", ErrOverflow, err) } } func TestDecodeInvalid(t *testing.T) { - if _, err := Decode("/"); err == nil { - t.Error("Expected error invalid character, got nil") + if _, err := Decode("/"); !errors.Is(err, ErrInvalidChar) { + t.Errorf("Expected error %v, got %v", ErrInvalidChar, err) } }