Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions segmenter/unicode14_rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func (cr *cursor) ruleLB30ab(breakOp *breakOpportunity) {
*breakOp = breakProhibited
}
// [\p{Extended_Pictographic}&\p{Cn}] × EM
if unicode.Is(ucd.Extended_Pictographic, cr.prev) && ucd.LookupType(cr.prev) == nil &&
if unicode.Is(ucd.Extended_Pictographic, cr.prev) && !ucd.IsAssigned(cr.prev) &&
cr.line == ucd.BreakEM {
*breakOp = breakProhibited
}
Expand Down Expand Up @@ -313,8 +313,7 @@ func (cr *cursor) ruleLB1() {
case ucd.BreakAI, ucd.BreakSG, ucd.BreakXX:
cr.line = ucd.BreakAL
case ucd.BreakSA:
generalCategory := ucd.LookupType(cr.r)
if generalCategory == unicode.Mn || generalCategory == unicode.Mc {
if unicode.Is(unicode.Mn, cr.r) || unicode.Is(unicode.Mc, cr.r) {
cr.line = ucd.BreakCM
} else {
cr.line = ucd.BreakAL
Expand Down
75 changes: 51 additions & 24 deletions unicodedata/general_category.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 6 additions & 16 deletions unicodedata/unicode.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,15 @@ import (
"github.com/go-text/typesetting/language"
)

var categories []*unicode.RangeTable

func init() {
for cat, table := range unicode.Categories {
if len(cat) == 2 {
categories = append(categories, table)
}
}
}

// LookupType returns the unicode general categorie of the rune,
// or nil if not found.
func LookupType(r rune) *unicode.RangeTable {
for _, table := range categories {
// IsAssigned returns [true] for runes with a general category,
// other than Cn.
func IsAssigned(r rune) bool {
for _, table := range allCategories {
if unicode.Is(table, r) {
return table
return true
}
}
return nil
return false
}

// LookupCombiningClass returns the class used for the Canonical Ordering Algorithm in the Unicode Standard,
Expand Down
23 changes: 12 additions & 11 deletions unicodedata/unicode_test.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package unicodedata

import (
"reflect"
"testing"
"unicode"

Expand Down Expand Up @@ -91,21 +90,23 @@ func TestBreakClass(t *testing.T) {
}
}

func TestLookupType(t *testing.T) {
func TestIsAssigned(t *testing.T) {
// some manual test cases
tests := []struct {
args rune
want *unicode.RangeTable
want bool
}{
{'a', unicode.Ll},
{'.', unicode.Po},
{'カ', unicode.Lo},
{'🦳', unicode.So},
{-1, nil},
{'a', true},
{'.', true},
{'カ', true},
{'🦳', true},
{'\U0001F3FF', true},
{'\U0001F02C', false},
{-1, false},
}
for _, tt := range tests {
if got := LookupType(tt.args); got != tt.want {
t.Errorf("LookupType(%s) = %v, want %v", string(tt.args), got, tt.want)
if got := IsAssigned(tt.args); got != tt.want {
t.Errorf("IsAssigned(%s) = %v, want %v", string(tt.args), got, tt.want)
}
}
}
Expand Down Expand Up @@ -508,7 +509,7 @@ func TestLookupLineBreakClass(t *testing.T) {

}
for _, tt := range tests {
if got := LookupLineBreakClass(tt.args); !reflect.DeepEqual(got, tt.want) {
if got := LookupLineBreakClass(tt.args); got != tt.want {
t.Errorf("LookupLineBreakClass(U+%x) = %p, want %p", tt.args, got, tt.want)
}
}
Expand Down