From 8cf1bb2d0d7f509d9f17bfba0e2fa522437cb185 Mon Sep 17 00:00:00 2001 From: heppu Date: Thu, 1 May 2025 20:24:55 +0300 Subject: [PATCH 1/3] Add test for batch.Split bug --- batch/batch_test.go | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/batch/batch_test.go b/batch/batch_test.go index 51978283..00921770 100644 --- a/batch/batch_test.go +++ b/batch/batch_test.go @@ -67,6 +67,15 @@ select top 1 1`, select top 1 1`, }, }, + testItem{ + Sql: `PRINT 1 +GOTO Bookmark +GO +PRINT 2 +Bookmark: +GO`, + Expect: []string{"PRINT 1\nGOTO Bookmark\n", "\nPRINT 2\nBookmark:\n"}, + }, testItem{Sql: `"0'"`, Expect: []string{`"0'"`}}, testItem{Sql: "0'", Expect: []string{"0'"}}, testItem{Sql: "--", Expect: []string{"--"}}, From 760489e7f2c2c0953f638755acc6f3556ed91631 Mon Sep 17 00:00:00 2001 From: heppu Date: Thu, 1 May 2025 23:19:29 +0300 Subject: [PATCH 2/3] Fix GOTO batch parsing bug --- batch/batch.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/batch/batch.go b/batch/batch.go index 5b793dcb..01e99f3d 100644 --- a/batch/batch.go +++ b/batch/batch.go @@ -46,6 +46,12 @@ func hasPrefixFold(s, sep string) bool { if len(s) < len(sep) { return false } + + // Forward lookup to see if the word continues. + if len(s) > len(sep) && unicode.IsLetter(rune(s[len(sep)])) { + return false + } + return strings.EqualFold(s[:len(sep)], sep) } From fcbee96cf37603f9ea7c7352e89040d2c31ff049 Mon Sep 17 00:00:00 2001 From: David Levy Date: Tue, 12 May 2026 12:00:18 -0500 Subject: [PATCH 3/3] fix: use DecodeRuneInString for word-boundary check Addresses review feedback on the GO/GOTO word-boundary fix: - Use utf8.DecodeRuneInString so the follower-char letter check sees the full rune, not the leading byte of a multi-byte UTF-8 sequence. Casting rune(byte) misclassifies multi-byte runes (for example Hebrew aleph U+05D0 has leading byte 0xD7 which is the MULTIPLICATION SIGN, not a letter). - Extend TestHasPrefixFold to cover word-boundary cases (GOTO, gotoflag, GO1, GO_FOO), Latin-1 follower, and the Hebrew-aleph case that distinguishes the two implementations. Co-authored-by: Henri Koski --- batch/batch.go | 13 ++++++++++--- batch/batch_test.go | 12 ++++++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/batch/batch.go b/batch/batch.go index 01e99f3d..4944a5d1 100644 --- a/batch/batch.go +++ b/batch/batch.go @@ -12,6 +12,7 @@ import ( "strconv" "strings" "unicode" + "unicode/utf8" ) // Split the provided SQL into multiple sql scripts based on a given @@ -47,9 +48,15 @@ func hasPrefixFold(s, sep string) bool { return false } - // Forward lookup to see if the word continues. - if len(s) > len(sep) && unicode.IsLetter(rune(s[len(sep)])) { - return false + // Reject matches where the separator is followed by another letter, + // so e.g. "GO" does not match the start of "GOTO". Use DecodeRuneInString + // to handle multi-byte runes correctly; a bare rune(s[i]) cast would + // misclassify the leading byte of a multi-byte sequence. + if len(s) > len(sep) { + r, _ := utf8.DecodeRuneInString(s[len(sep):]) + if unicode.IsLetter(r) { + return false + } } return strings.EqualFold(s[:len(sep)], sep) diff --git a/batch/batch_test.go b/batch/batch_test.go index 00921770..dcaa7e27 100644 --- a/batch/batch_test.go +++ b/batch/batch_test.go @@ -119,6 +119,18 @@ func TestHasPrefixFold(t *testing.T) { {"h", "H", true}, {"h", "K", false}, {"go 5\n", "go", true}, + // Word-boundary checks: separator must not be followed by another letter. + {"GOTO foo", "GO", false}, + {"gotoflag", "go", false}, + {"GO1\n", "GO", true}, + {"GO_FOO\n", "GO", true}, + // Multi-byte UTF-8 follower. Hebrew aleph (U+05D0) is encoded as + // 0xD7 0x90; a bare rune(s[i]) cast would see 0xD7 (× MULTIPLICATION + // SIGN, not a letter) and incorrectly allow the match. Decoding the + // rune correctly sees U+05D0 (a letter) and rejects. + {"GO\u05D0test", "GO", false}, + // Latin-1 letter follower (single-byte path). + {"GOé", "GO", false}, } for _, item := range list { is := hasPrefixFold(item.s, item.pre)