@@ -76,6 +76,16 @@ static wb_property property(code_point)
76
76
}
77
77
/**********************************************************************/
78
78
79
+ __attribute__ ((pure ))
80
+ static wb_property char_property (node )
81
+ const Char * node ;
82
+ {
83
+ if (node == NULL ) {
84
+ return eot ;
85
+ }
86
+ return property (node -> value );
87
+ }
88
+
79
89
/* Returns the next character, skipping Extend and Format characters.
80
90
* WB4: Skip over Extend and Format characters. */
81
91
__attribute__ ((pure ))
@@ -88,7 +98,7 @@ static Char* skip_to_next(from)
88
98
89
99
do {
90
100
from = from -> next ;
91
- } while (from != NULL && ExtendOrFormat (property (from -> value )));
101
+ } while (from != NULL && ExtendOrFormat (char_property (from )));
92
102
return from ;
93
103
}
94
104
@@ -99,14 +109,21 @@ static Char* skip_twice(from)
99
109
return skip_to_next (skip_to_next (from ));
100
110
}
101
111
102
- __attribute__ ((pure ))
103
- static wb_property char_property (node )
104
- const Char * node ;
112
+ /* Returns the last code point of a grapheme, including extend and format
113
+ * characters. */
114
+ static Char * skip_to_end_of_extend (from )
115
+ Char * from ;
105
116
{
106
- if (node == NULL ) {
107
- return eot ;
117
+ if (from == NULL ) {
118
+ return NULL ;
108
119
}
109
- return property (node -> value );
120
+
121
+ /* Skip until the LAST extend or format character. */
122
+ while (from -> next != NULL && ExtendOrFormat (char_property (from -> next ))) {
123
+ from = from -> next ;
124
+ }
125
+
126
+ return from ;
110
127
}
111
128
112
129
/*
@@ -131,13 +148,12 @@ static Char* find_next_boundary(start)
131
148
return NULL ;
132
149
}
133
150
134
-
135
- /* Loop to find next word break. */
151
+ /* Loop to find the next word break. */
136
152
137
153
/* WB2: Break at the start and end of text. */
138
154
while (current -> next != NULL ) {
139
155
/* Advance all the pointers. */
140
- current = current -> next ;
156
+ current = skip_to_next ( current ) ;
141
157
lookbehind = left ;
142
158
left = char_property (current );
143
159
right = char_property (skip_to_next (current ));
@@ -147,9 +163,9 @@ static Char* find_next_boundary(start)
147
163
if (left == CR && right == LF ) continue ;
148
164
149
165
/* WB3a: Otherwise break before and after newlines */
150
- if (left == Newline || left == CR || left == LF ) return current ;
166
+ if (left == Newline || left == CR || left == LF ) break ;
151
167
/* WB3b */
152
- if (right == Newline || right == CR || right == LF ) return current ;
168
+ if (right == Newline || right == CR || right == LF ) break ;
153
169
154
170
/* Ignore Format and Extend characters, except when they appear at the
155
171
* beginning of a region of text. */
@@ -160,7 +176,7 @@ static Char* find_next_boundary(start)
160
176
161
177
/* WB6: Do not break letters across certain punctuation. */
162
178
if (AHLetter (left ) &&
163
- (right == MidLetter || MidNumLetQ (right )) &&
179
+ (right == MidLetter || MidNumLetQ (right )) &&
164
180
AHLetter (left )) continue ;
165
181
/* WB7 */
166
182
if (AHLetter (lookbehind ) &&
@@ -215,10 +231,10 @@ static Char* find_next_boundary(start)
215
231
right == Regional_Indicator ) continue ;
216
232
217
233
/* WB14: Otherwise, break everywhere (including around ideographs). */
218
- return current ;
234
+ break ;
219
235
}
220
236
221
- return current ;
237
+ return skip_to_end_of_extend ( current ) ;
222
238
}
223
239
/**********************************************************************/
224
240
0 commit comments