Skip to content

Commit ccbca10

Browse files
committed
Merge remote-tracking branch 'la-vache/main' into propertiness
2 parents 6ede68f + 7628438 commit ccbca10

File tree

160 files changed

+27178
-1345
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

160 files changed

+27178
-1345
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ perf-*.xml
4343
test-*.xml
4444

4545
# Directories
46+
.idea/
4647
.settings/
4748
.vs/
4849
.vscode/

UnicodeJsps/src/main/java/org/unicode/jsp/BidiCharMap.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class BidiCharMap {
4848

4949
for (UnicodeSetIterator it =
5050
new UnicodeSetIterator(
51-
new UnicodeSet("[[:ascii:]-[[:cc:]-[:whitespace:]]]"));
51+
new UnicodeSet("[[:Block=ASCII:]-[[:cc:]-[:whitespace:]]]"));
5252
it.next(); ) {
5353
asciiHackMap.put(
5454
it.codepoint,

UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeSetUtilities.java

Lines changed: 7 additions & 268 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,11 @@
77
import com.ibm.icu.text.UTF16.StringComparator;
88
import com.ibm.icu.text.UnicodeSet;
99
import com.ibm.icu.util.ULocale;
10-
import com.ibm.icu.util.VersionInfo;
1110
import java.text.ParsePosition;
1211
import java.util.Comparator;
13-
import java.util.List;
14-
import java.util.Map;
1512
import java.util.regex.Pattern;
1613
import org.unicode.cldr.util.MultiComparator;
17-
import org.unicode.props.IndexUnicodeProperties;
18-
import org.unicode.props.UcdProperty;
19-
import org.unicode.props.UcdPropertyValues;
20-
import org.unicode.props.UnicodeProperty;
21-
import org.unicode.props.UnicodeProperty.PatternMatcher;
22-
import org.unicode.props.UnicodePropertySymbolTable;
14+
import org.unicode.text.UCD.VersionedSymbolTable;
2315

2416
public class UnicodeSetUtilities {
2517

@@ -114,7 +106,12 @@ public static UnicodeSet parseUnicodeSet(String input) {
114106
input = input.trim() + "]]]]]";
115107
String parseInput = "[" + input + "]]]]]";
116108
ParsePosition parsePosition = new ParsePosition(0);
117-
UnicodeSet result = new UnicodeSet(parseInput, parsePosition, fullSymbolTable);
109+
UnicodeSet result =
110+
new UnicodeSet(
111+
parseInput,
112+
parsePosition,
113+
VersionedSymbolTable.forReview(UcdLoader::getOldestLoadedUcd)
114+
.setUnversionedExtensions(XPropertyFactory.make()));
118115
int parseEnd = parsePosition.getIndex();
119116
if (parseEnd != parseInput.length()
120117
&& !UnicodeSetUtilities.OK_AT_END.containsAll(parseInput.substring(parseEnd))) {
@@ -129,262 +126,4 @@ public static UnicodeSet parseUnicodeSet(String input) {
129126
}
130127
return result;
131128
}
132-
133-
static UnicodeSet.XSymbolTable fullSymbolTable = new MySymbolTable();
134-
135-
private static class MySymbolTable extends UnicodeSet.XSymbolTable {
136-
UnicodeRegex unicodeRegex;
137-
XPropertyFactory factory;
138-
139-
public MySymbolTable() {
140-
unicodeRegex = new UnicodeRegex().setSymbolTable(this);
141-
}
142-
143-
// public boolean applyPropertyAlias0(String propertyName,
144-
// String propertyValue, UnicodeSet result) {
145-
// if (!propertyName.contains("*")) {
146-
// return applyPropertyAlias(propertyName, propertyValue, result);
147-
// }
148-
// String[] propertyNames = propertyName.split("[*]");
149-
// for (int i = propertyNames.length - 1; i >= 0; ++i) {
150-
// String pname = propertyNames[i];
151-
//
152-
// }
153-
// return null;
154-
// }
155-
156-
@Override
157-
public boolean applyPropertyAlias(
158-
String propertyName, String propertyValue, UnicodeSet result) {
159-
boolean status = false;
160-
boolean invert = false;
161-
int posNotEqual = propertyName.indexOf('\u2260');
162-
if (posNotEqual >= 0) {
163-
propertyValue =
164-
propertyValue.length() == 0
165-
? propertyName.substring(posNotEqual + 1)
166-
: propertyName.substring(posNotEqual + 1) + "=" + propertyValue;
167-
propertyName = propertyName.substring(0, posNotEqual);
168-
invert = true;
169-
}
170-
if (propertyName.endsWith("!")) {
171-
propertyName = propertyName.substring(0, propertyName.length() - 1);
172-
invert = !invert;
173-
}
174-
int posColon = propertyName.indexOf(':');
175-
String versionPrefix = "";
176-
String versionlessPropertyName = propertyName;
177-
if (posColon >= 0) {
178-
versionPrefix = propertyName.substring(0, posColon + 1);
179-
versionlessPropertyName = propertyName.substring(posColon + 1);
180-
}
181-
182-
if (factory == null) {
183-
factory = XPropertyFactory.make();
184-
}
185-
186-
var gcProp = factory.getProperty(versionPrefix + "gc");
187-
var scProp = factory.getProperty(versionPrefix + "sc");
188-
189-
UnicodeProperty prop = factory.getProperty(propertyName);
190-
if (propertyValue.length() != 0) {
191-
if (prop == null) {
192-
propertyValue = propertyValue.trim();
193-
} else if (prop.isTrimmable()) {
194-
propertyValue = propertyValue.trim();
195-
} else {
196-
int debug = 0;
197-
}
198-
status = applyPropertyAlias0(prop, propertyValue, result, invert);
199-
} else {
200-
try {
201-
status = applyPropertyAlias0(gcProp, versionlessPropertyName, result, invert);
202-
} catch (Exception e) {
203-
}
204-
;
205-
if (!status) {
206-
try {
207-
status =
208-
applyPropertyAlias0(
209-
scProp, versionlessPropertyName, result, invert);
210-
} catch (Exception e) {
211-
}
212-
if (!status) {
213-
if (prop.isType(UnicodeProperty.BINARY_OR_ENUMERATED_OR_CATALOG_MASK)) {
214-
try {
215-
status = applyPropertyAlias0(prop, "No", result, !invert);
216-
} catch (Exception e) {
217-
}
218-
}
219-
if (!status) {
220-
status = applyPropertyAlias0(prop, "", result, invert);
221-
}
222-
}
223-
}
224-
}
225-
return status;
226-
}
227-
228-
private static Map<UcdPropertyValues.General_Category_Values, String[]>
229-
COARSE_GENERAL_CATEGORIES =
230-
Map.of(
231-
UcdPropertyValues.General_Category_Values.Other,
232-
new String[] {"Cc", "Cf", "Cn", "Co", "Cs"},
233-
UcdPropertyValues.General_Category_Values.Letter,
234-
new String[] {"Ll", "Lm", "Lo", "Lt", "Lu"},
235-
UcdPropertyValues.General_Category_Values.Cased_Letter,
236-
new String[] {"Ll", "Lt", "Lu"},
237-
UcdPropertyValues.General_Category_Values.Mark,
238-
new String[] {"Mc", "Me", "Mn"},
239-
UcdPropertyValues.General_Category_Values.Number,
240-
new String[] {"Nd", "Nl", "No"},
241-
UcdPropertyValues.General_Category_Values.Punctuation,
242-
new String[] {"Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps"},
243-
UcdPropertyValues.General_Category_Values.Symbol,
244-
new String[] {"Sc", "Sk", "Sm", "So"},
245-
UcdPropertyValues.General_Category_Values.Separator,
246-
new String[] {"Zl", "Zp", "Zs"});
247-
248-
// TODO(eggrobin): I think this function only ever returns true; might as well make it void.
249-
private boolean applyPropertyAlias0(
250-
UnicodeProperty prop, String propertyValue, UnicodeSet result, boolean invert) {
251-
result.clear();
252-
String propertyName = prop.getName();
253-
String trimmedPropertyValue = propertyValue.trim();
254-
PatternMatcher patternMatcher = null;
255-
if (trimmedPropertyValue.length() > 1
256-
&& trimmedPropertyValue.startsWith("/")
257-
&& trimmedPropertyValue.endsWith("/")) {
258-
String fixedRegex =
259-
unicodeRegex.transform(
260-
trimmedPropertyValue.substring(
261-
1, trimmedPropertyValue.length() - 1));
262-
patternMatcher = new UnicodeProperty.RegexMatcher().set(fixedRegex);
263-
}
264-
UnicodeProperty otherProperty = null;
265-
boolean testCp = false;
266-
boolean testNone = false;
267-
if (trimmedPropertyValue.length() > 1
268-
&& trimmedPropertyValue.startsWith("@")
269-
&& trimmedPropertyValue.endsWith("@")) {
270-
String otherPropName =
271-
trimmedPropertyValue.substring(1, trimmedPropertyValue.length() - 1).trim();
272-
if (UnicodeProperty.equalNames("code point", otherPropName)) {
273-
testCp = true;
274-
} else if (UnicodeProperty.equalNames("none", otherPropName)) {
275-
testNone = true;
276-
} else {
277-
otherProperty = factory.getProperty(otherPropName);
278-
}
279-
}
280-
// TODO(egg): Name and Name_Alias require special handling (UAX44-LM2), and
281-
// treating Name_Alias as aliases for Name.
282-
boolean isAge = UnicodeProperty.equalNames("age", propertyName);
283-
if (prop != null) {
284-
UnicodeSet set;
285-
if (testCp) {
286-
set = new UnicodeSet();
287-
for (int i = 0; i <= 0x10FFFF; ++i) {
288-
if (invert != UnicodeProperty.equals(i, prop.getValue(i))) {
289-
set.add(i);
290-
}
291-
}
292-
invert = false;
293-
} else if (testNone) {
294-
set = prop.getSet(UnicodeProperty.NULL_MATCHER);
295-
} else if (otherProperty != null) {
296-
System.err.println(otherProperty + ", " + invert);
297-
set = new UnicodeSet();
298-
for (int i = 0; i <= 0x10FFFF; ++i) {
299-
String v1 = prop.getValue(i);
300-
String v2 = otherProperty.getValue(i);
301-
if (invert != UnicodeProperty.equals(v1, v2)) {
302-
set.add(i);
303-
}
304-
}
305-
invert = false;
306-
} else if (patternMatcher == null) {
307-
if (!isValid(prop, propertyValue)) {
308-
throw new IllegalArgumentException(
309-
"The value '"
310-
+ propertyValue
311-
+ "' is illegal. Values for "
312-
+ propertyName
313-
+ " must be in "
314-
+ prop.getAvailableValues()
315-
+ " or in "
316-
+ prop.getValueAliases());
317-
}
318-
if (isAge) {
319-
set =
320-
prop.getSet(
321-
new UnicodePropertySymbolTable.ComparisonMatcher<
322-
VersionInfo>(
323-
UnicodePropertySymbolTable.parseVersionInfoOrMax(
324-
propertyValue),
325-
UnicodePropertySymbolTable.Relation.geq,
326-
Comparator.nullsFirst(Comparator.naturalOrder()),
327-
UnicodePropertySymbolTable::parseVersionInfoOrMax));
328-
} else {
329-
if (prop.getName().equals("General_Category")) {
330-
for (var entry : COARSE_GENERAL_CATEGORIES.entrySet()) {
331-
final var aliases = entry.getKey().getNames().getAllNames();
332-
if (aliases.stream()
333-
.anyMatch(
334-
a ->
335-
UnicodeProperty.equalNames(
336-
propertyValue, a))) {
337-
for (var value : entry.getValue()) {
338-
prop.getSet(value, result);
339-
}
340-
return true;
341-
}
342-
}
343-
}
344-
set = prop.getSet(propertyValue);
345-
if (set.isEmpty()
346-
&& prop instanceof IndexUnicodeProperties.IndexUnicodeProperty
347-
&& prop.getName().equals("Name")) {
348-
set =
349-
((IndexUnicodeProperties.IndexUnicodeProperty) prop)
350-
.getFactory()
351-
.getProperty(UcdProperty.Name_Alias)
352-
.getSet(propertyValue);
353-
}
354-
}
355-
} else if (isAge) {
356-
set = new UnicodeSet();
357-
List<String> values = prop.getAvailableValues();
358-
for (String value : values) {
359-
if (patternMatcher.test(value)) {
360-
for (String other : values) {
361-
if (other.compareTo(value) <= 0) {
362-
set.addAll(prop.getSet(other));
363-
}
364-
}
365-
}
366-
}
367-
} else {
368-
set = prop.getSet(patternMatcher);
369-
}
370-
if (invert) {
371-
if (isAge) {
372-
set.complement();
373-
} else {
374-
set = prop.getUnicodeMap().keySet().removeAll(set);
375-
}
376-
}
377-
result.addAll(set);
378-
return true;
379-
}
380-
throw new IllegalArgumentException("Illegal property: " + propertyName);
381-
}
382-
383-
private boolean isValid(UnicodeProperty prop, String propertyValue) {
384-
// if (prop.getName().equals("General_Category")) {
385-
// if (propertyValue)
386-
// }
387-
return prop.isValidValue(propertyValue);
388-
}
389-
}
390129
}

UnicodeJsps/src/test/java/org/unicode/jsptest/TestJsp.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -743,9 +743,9 @@ public void TestMapping() {
743743
assertContains(sample, "abcd");
744744
sample = UnicodeJsp.showTransform("bc > CB; X > xx;", "Abcd");
745745
assertContains(sample, "ACBd");
746-
sample = UnicodeJsp.showTransform("lower", "[[:ascii:]{Abcd}]");
746+
sample = UnicodeJsp.showTransform("lower", "[[:Block=ASCII:]{Abcd}]");
747747
assertContains(sample, "\u00A0A\u00A0");
748-
sample = UnicodeJsp.showTransform("bc > CB; X > xx;", "[[:ascii:]{Abcd}]");
748+
sample = UnicodeJsp.showTransform("bc > CB; X > xx;", "[[:Block=ASCII:]{Abcd}]");
749749
assertContains(sample, "\u00A0ACBd\u00A0");
750750
sample = UnicodeJsp.showTransform("casefold", "[\\u0000-\\u00FF]");
751751
assertContains(sample, "\u00A0\u00E1\u00A0");
@@ -791,7 +791,7 @@ public void TestStuff() throws IOException {
791791
UnicodeJsp.showSet(
792792
"sc gc",
793793
"",
794-
new UnicodeSet("[[:ascii:]{123}{ab}{456}]"),
794+
new UnicodeSet("[[:Block=ASCII:]{123}{ab}{456}]"),
795795
false,
796796
true,
797797
true,
@@ -912,7 +912,7 @@ public void TestParameters() {
912912

913913
@Test
914914
public void TestRegex() {
915-
final String fix = UnicodeRegex.fix("ab[[:ascii:]&[:Ll:]]*c");
915+
final String fix = UnicodeRegex.fix("ab[[:Block=ASCII:]&[:Ll:]]*c");
916916
assertEquals("", "ab[a-z]*c", fix);
917917
assertEquals(
918918
"",
@@ -1028,7 +1028,7 @@ public void TestBnf() {
10281028
+ "host = \\/\\/ reserved+;\n"
10291029
+ "query = [\\=reserved]+;\n"
10301030
+ "fragment = reserved+;\n"
1031-
+ "reserved = [[:ascii:][:sc=grek:]&[:alphabetic:]];\n",
1031+
+ "reserved = [[:Block=ASCII:][:sc=grek:]&[:alphabetic:]];\n",
10321032
"http://αβγ?huh=hi#there"
10331033
},
10341034
// {

0 commit comments

Comments
 (0)