7
7
import com .ibm .icu .text .UTF16 .StringComparator ;
8
8
import com .ibm .icu .text .UnicodeSet ;
9
9
import com .ibm .icu .util .ULocale ;
10
- import com .ibm .icu .util .VersionInfo ;
11
10
import java .text .ParsePosition ;
12
11
import java .util .Comparator ;
13
- import java .util .List ;
14
- import java .util .Map ;
15
12
import java .util .regex .Pattern ;
16
13
import org .unicode .cldr .util .MultiComparator ;
17
- import org .unicode .props .IndexUnicodeProperties ;
18
- import org .unicode .props .UcdProperty ;
19
- import org .unicode .props .UcdPropertyValues ;
20
- import org .unicode .props .UnicodeProperty ;
21
- import org .unicode .props .UnicodeProperty .PatternMatcher ;
22
- import org .unicode .props .UnicodePropertySymbolTable ;
14
+ import org .unicode .text .UCD .VersionedSymbolTable ;
23
15
24
16
public class UnicodeSetUtilities {
25
17
@@ -114,7 +106,12 @@ public static UnicodeSet parseUnicodeSet(String input) {
114
106
input = input .trim () + "]]]]]" ;
115
107
String parseInput = "[" + input + "]]]]]" ;
116
108
ParsePosition parsePosition = new ParsePosition (0 );
117
- UnicodeSet result = new UnicodeSet (parseInput , parsePosition , fullSymbolTable );
109
+ UnicodeSet result =
110
+ new UnicodeSet (
111
+ parseInput ,
112
+ parsePosition ,
113
+ VersionedSymbolTable .forReview (UcdLoader ::getOldestLoadedUcd )
114
+ .setUnversionedExtensions (XPropertyFactory .make ()));
118
115
int parseEnd = parsePosition .getIndex ();
119
116
if (parseEnd != parseInput .length ()
120
117
&& !UnicodeSetUtilities .OK_AT_END .containsAll (parseInput .substring (parseEnd ))) {
@@ -129,262 +126,4 @@ public static UnicodeSet parseUnicodeSet(String input) {
129
126
}
130
127
return result ;
131
128
}
132
-
133
- static UnicodeSet .XSymbolTable fullSymbolTable = new MySymbolTable ();
134
-
135
- private static class MySymbolTable extends UnicodeSet .XSymbolTable {
136
- UnicodeRegex unicodeRegex ;
137
- XPropertyFactory factory ;
138
-
139
- public MySymbolTable () {
140
- unicodeRegex = new UnicodeRegex ().setSymbolTable (this );
141
- }
142
-
143
- // public boolean applyPropertyAlias0(String propertyName,
144
- // String propertyValue, UnicodeSet result) {
145
- // if (!propertyName.contains("*")) {
146
- // return applyPropertyAlias(propertyName, propertyValue, result);
147
- // }
148
- // String[] propertyNames = propertyName.split("[*]");
149
- // for (int i = propertyNames.length - 1; i >= 0; ++i) {
150
- // String pname = propertyNames[i];
151
- //
152
- // }
153
- // return null;
154
- // }
155
-
156
- @ Override
157
- public boolean applyPropertyAlias (
158
- String propertyName , String propertyValue , UnicodeSet result ) {
159
- boolean status = false ;
160
- boolean invert = false ;
161
- int posNotEqual = propertyName .indexOf ('\u2260' );
162
- if (posNotEqual >= 0 ) {
163
- propertyValue =
164
- propertyValue .length () == 0
165
- ? propertyName .substring (posNotEqual + 1 )
166
- : propertyName .substring (posNotEqual + 1 ) + "=" + propertyValue ;
167
- propertyName = propertyName .substring (0 , posNotEqual );
168
- invert = true ;
169
- }
170
- if (propertyName .endsWith ("!" )) {
171
- propertyName = propertyName .substring (0 , propertyName .length () - 1 );
172
- invert = !invert ;
173
- }
174
- int posColon = propertyName .indexOf (':' );
175
- String versionPrefix = "" ;
176
- String versionlessPropertyName = propertyName ;
177
- if (posColon >= 0 ) {
178
- versionPrefix = propertyName .substring (0 , posColon + 1 );
179
- versionlessPropertyName = propertyName .substring (posColon + 1 );
180
- }
181
-
182
- if (factory == null ) {
183
- factory = XPropertyFactory .make ();
184
- }
185
-
186
- var gcProp = factory .getProperty (versionPrefix + "gc" );
187
- var scProp = factory .getProperty (versionPrefix + "sc" );
188
-
189
- UnicodeProperty prop = factory .getProperty (propertyName );
190
- if (propertyValue .length () != 0 ) {
191
- if (prop == null ) {
192
- propertyValue = propertyValue .trim ();
193
- } else if (prop .isTrimmable ()) {
194
- propertyValue = propertyValue .trim ();
195
- } else {
196
- int debug = 0 ;
197
- }
198
- status = applyPropertyAlias0 (prop , propertyValue , result , invert );
199
- } else {
200
- try {
201
- status = applyPropertyAlias0 (gcProp , versionlessPropertyName , result , invert );
202
- } catch (Exception e ) {
203
- }
204
- ;
205
- if (!status ) {
206
- try {
207
- status =
208
- applyPropertyAlias0 (
209
- scProp , versionlessPropertyName , result , invert );
210
- } catch (Exception e ) {
211
- }
212
- if (!status ) {
213
- if (prop .isType (UnicodeProperty .BINARY_OR_ENUMERATED_OR_CATALOG_MASK )) {
214
- try {
215
- status = applyPropertyAlias0 (prop , "No" , result , !invert );
216
- } catch (Exception e ) {
217
- }
218
- }
219
- if (!status ) {
220
- status = applyPropertyAlias0 (prop , "" , result , invert );
221
- }
222
- }
223
- }
224
- }
225
- return status ;
226
- }
227
-
228
- private static Map <UcdPropertyValues .General_Category_Values , String []>
229
- COARSE_GENERAL_CATEGORIES =
230
- Map .of (
231
- UcdPropertyValues .General_Category_Values .Other ,
232
- new String [] {"Cc" , "Cf" , "Cn" , "Co" , "Cs" },
233
- UcdPropertyValues .General_Category_Values .Letter ,
234
- new String [] {"Ll" , "Lm" , "Lo" , "Lt" , "Lu" },
235
- UcdPropertyValues .General_Category_Values .Cased_Letter ,
236
- new String [] {"Ll" , "Lt" , "Lu" },
237
- UcdPropertyValues .General_Category_Values .Mark ,
238
- new String [] {"Mc" , "Me" , "Mn" },
239
- UcdPropertyValues .General_Category_Values .Number ,
240
- new String [] {"Nd" , "Nl" , "No" },
241
- UcdPropertyValues .General_Category_Values .Punctuation ,
242
- new String [] {"Pc" , "Pd" , "Pe" , "Pf" , "Pi" , "Po" , "Ps" },
243
- UcdPropertyValues .General_Category_Values .Symbol ,
244
- new String [] {"Sc" , "Sk" , "Sm" , "So" },
245
- UcdPropertyValues .General_Category_Values .Separator ,
246
- new String [] {"Zl" , "Zp" , "Zs" });
247
-
248
- // TODO(eggrobin): I think this function only ever returns true; might as well make it void.
249
- private boolean applyPropertyAlias0 (
250
- UnicodeProperty prop , String propertyValue , UnicodeSet result , boolean invert ) {
251
- result .clear ();
252
- String propertyName = prop .getName ();
253
- String trimmedPropertyValue = propertyValue .trim ();
254
- PatternMatcher patternMatcher = null ;
255
- if (trimmedPropertyValue .length () > 1
256
- && trimmedPropertyValue .startsWith ("/" )
257
- && trimmedPropertyValue .endsWith ("/" )) {
258
- String fixedRegex =
259
- unicodeRegex .transform (
260
- trimmedPropertyValue .substring (
261
- 1 , trimmedPropertyValue .length () - 1 ));
262
- patternMatcher = new UnicodeProperty .RegexMatcher ().set (fixedRegex );
263
- }
264
- UnicodeProperty otherProperty = null ;
265
- boolean testCp = false ;
266
- boolean testNone = false ;
267
- if (trimmedPropertyValue .length () > 1
268
- && trimmedPropertyValue .startsWith ("@" )
269
- && trimmedPropertyValue .endsWith ("@" )) {
270
- String otherPropName =
271
- trimmedPropertyValue .substring (1 , trimmedPropertyValue .length () - 1 ).trim ();
272
- if (UnicodeProperty .equalNames ("code point" , otherPropName )) {
273
- testCp = true ;
274
- } else if (UnicodeProperty .equalNames ("none" , otherPropName )) {
275
- testNone = true ;
276
- } else {
277
- otherProperty = factory .getProperty (otherPropName );
278
- }
279
- }
280
- // TODO(egg): Name and Name_Alias require special handling (UAX44-LM2), and
281
- // treating Name_Alias as aliases for Name.
282
- boolean isAge = UnicodeProperty .equalNames ("age" , propertyName );
283
- if (prop != null ) {
284
- UnicodeSet set ;
285
- if (testCp ) {
286
- set = new UnicodeSet ();
287
- for (int i = 0 ; i <= 0x10FFFF ; ++i ) {
288
- if (invert != UnicodeProperty .equals (i , prop .getValue (i ))) {
289
- set .add (i );
290
- }
291
- }
292
- invert = false ;
293
- } else if (testNone ) {
294
- set = prop .getSet (UnicodeProperty .NULL_MATCHER );
295
- } else if (otherProperty != null ) {
296
- System .err .println (otherProperty + ", " + invert );
297
- set = new UnicodeSet ();
298
- for (int i = 0 ; i <= 0x10FFFF ; ++i ) {
299
- String v1 = prop .getValue (i );
300
- String v2 = otherProperty .getValue (i );
301
- if (invert != UnicodeProperty .equals (v1 , v2 )) {
302
- set .add (i );
303
- }
304
- }
305
- invert = false ;
306
- } else if (patternMatcher == null ) {
307
- if (!isValid (prop , propertyValue )) {
308
- throw new IllegalArgumentException (
309
- "The value '"
310
- + propertyValue
311
- + "' is illegal. Values for "
312
- + propertyName
313
- + " must be in "
314
- + prop .getAvailableValues ()
315
- + " or in "
316
- + prop .getValueAliases ());
317
- }
318
- if (isAge ) {
319
- set =
320
- prop .getSet (
321
- new UnicodePropertySymbolTable .ComparisonMatcher <
322
- VersionInfo >(
323
- UnicodePropertySymbolTable .parseVersionInfoOrMax (
324
- propertyValue ),
325
- UnicodePropertySymbolTable .Relation .geq ,
326
- Comparator .nullsFirst (Comparator .naturalOrder ()),
327
- UnicodePropertySymbolTable ::parseVersionInfoOrMax ));
328
- } else {
329
- if (prop .getName ().equals ("General_Category" )) {
330
- for (var entry : COARSE_GENERAL_CATEGORIES .entrySet ()) {
331
- final var aliases = entry .getKey ().getNames ().getAllNames ();
332
- if (aliases .stream ()
333
- .anyMatch (
334
- a ->
335
- UnicodeProperty .equalNames (
336
- propertyValue , a ))) {
337
- for (var value : entry .getValue ()) {
338
- prop .getSet (value , result );
339
- }
340
- return true ;
341
- }
342
- }
343
- }
344
- set = prop .getSet (propertyValue );
345
- if (set .isEmpty ()
346
- && prop instanceof IndexUnicodeProperties .IndexUnicodeProperty
347
- && prop .getName ().equals ("Name" )) {
348
- set =
349
- ((IndexUnicodeProperties .IndexUnicodeProperty ) prop )
350
- .getFactory ()
351
- .getProperty (UcdProperty .Name_Alias )
352
- .getSet (propertyValue );
353
- }
354
- }
355
- } else if (isAge ) {
356
- set = new UnicodeSet ();
357
- List <String > values = prop .getAvailableValues ();
358
- for (String value : values ) {
359
- if (patternMatcher .test (value )) {
360
- for (String other : values ) {
361
- if (other .compareTo (value ) <= 0 ) {
362
- set .addAll (prop .getSet (other ));
363
- }
364
- }
365
- }
366
- }
367
- } else {
368
- set = prop .getSet (patternMatcher );
369
- }
370
- if (invert ) {
371
- if (isAge ) {
372
- set .complement ();
373
- } else {
374
- set = prop .getUnicodeMap ().keySet ().removeAll (set );
375
- }
376
- }
377
- result .addAll (set );
378
- return true ;
379
- }
380
- throw new IllegalArgumentException ("Illegal property: " + propertyName );
381
- }
382
-
383
- private boolean isValid (UnicodeProperty prop , String propertyValue ) {
384
- // if (prop.getName().equals("General_Category")) {
385
- // if (propertyValue)
386
- // }
387
- return prop .isValidValue (propertyValue );
388
- }
389
- }
390
129
}
0 commit comments