Skip to content

Propertiness #1064

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Apr 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 2 additions & 27 deletions UnicodeJsps/src/main/java/org/unicode/jsp/Common.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,7 @@
import com.ibm.icu.text.Normalizer2;
import com.ibm.icu.text.StringTransform;
import com.ibm.icu.text.Transform;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.ULocale;
import java.util.Arrays;
import java.util.List;
import org.unicode.jsp.XPropertyFactory.HanType.HanTypeValues;
Expand Down Expand Up @@ -47,18 +45,7 @@ public String transform(String source) {
};

static List<String> XPROPERTY_NAMES =
Arrays.asList(
new String[] {
"toNFC",
"toNFD",
"toNFKC",
"toNFKD",
"toCasefold",
"toLowercase",
"toUppercase",
"toTitlecase",
"subhead"
});
Arrays.asList(new String[] {"toNFC", "toNFD", "toNFKC", "toNFKD", "subhead"});
static final int XSTRING_START = UProperty.STRING_LIMIT;

public static String getXStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) {
Expand All @@ -72,14 +59,6 @@ public static String getXStringPropertyValue(int propertyEnum, int codepoint, in
return Common.MyNormalize(codepoint, Normalizer.NFKC);
case Common.TO_NFKD:
return Common.MyNormalize(codepoint, Normalizer.NFKD);
case Common.TO_CASEFOLD:
return UCharacter.foldCase(UTF16.valueOf(codepoint), true);
case Common.TO_LOWERCASE:
return UCharacter.toLowerCase(ULocale.ROOT, UTF16.valueOf(codepoint));
case Common.TO_UPPERCASE:
return UCharacter.toUpperCase(ULocale.ROOT, UTF16.valueOf(codepoint));
case Common.TO_TITLECASE:
return UCharacter.toTitleCase(ULocale.ROOT, UTF16.valueOf(codepoint), null);
case Common.SUBHEAD:
return UnicodeUtilities.getSubheader().getSubheader(codepoint);
}
Expand Down Expand Up @@ -120,11 +99,7 @@ static String MyNormalize(String string, Mode mode) {
static final int TO_NFD = UProperty.STRING_LIMIT + 1;
static final int TO_NFKC = UProperty.STRING_LIMIT + 2;
static final int TO_NFKD = UProperty.STRING_LIMIT + 3;
static final int TO_CASEFOLD = UProperty.STRING_LIMIT + 4;
static final int TO_LOWERCASE = UProperty.STRING_LIMIT + 5;
static final int TO_UPPERCASE = UProperty.STRING_LIMIT + 6;
static final int TO_TITLECASE = UProperty.STRING_LIMIT + 7;
public static final int SUBHEAD = TO_TITLECASE + 1;
public static final int SUBHEAD = TO_NFKD + 1;
static final int XSTRING_LIMIT = SUBHEAD + 1;
// static UnicodeSet isCaseFolded = new UnicodeSet();
// static UnicodeSet isLowercase = new UnicodeSet();
Expand Down
167 changes: 112 additions & 55 deletions UnicodeJsps/src/main/java/org/unicode/jsp/UnicodeUtilities.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
import org.unicode.idna.IdnaTypes;
import org.unicode.idna.Punycode;
import org.unicode.idna.Uts46;
import org.unicode.props.DerivedPropertyStatus;
import org.unicode.props.UcdProperty;
import org.unicode.props.UcdPropertyValues.Age_Values;
import org.unicode.props.UnicodeProperty;
import org.unicode.props.UnicodeProperty.UnicodeMapProperty;
Expand Down Expand Up @@ -1440,6 +1442,42 @@ public static void showProperties(

String kRSUnicode = getFactory().getProperty("kRSUnicode").getValue(cp);
boolean isUnihan = kRSUnicode != null;
List<UcdProperty> indexedProperties =
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

optional -- might simplify something:

How about, rather than just building separate lists of properties, you add an enum PropCategory { UCD, NON_UCD, ... CJK, ...}, and create a Map<PropCategory, List<UcdProperty>>?

You could then also use maps from PropCategory to table headings and such.


Does it matter if these lists are List's? Or do you just need Collection's?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it matter if these lists are List's?

Not really, I convert them to lists of String below anyway (because one of them is not a list of UcdProperty, namely the list of stuff that gets added in the tools).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will merge this now to unblock John and see if I can come up with something cleaner in a subsequent PR.

sortedProps.stream()
.map(UcdProperty::forString)
.filter(p -> p != null)
.collect(Collectors.toList());
List<UcdProperty> ucdProperties =
indexedProperties.stream()
.filter(
p ->
p.getDerivedStatus() == DerivedPropertyStatus.Approved
|| p.getDerivedStatus()
== DerivedPropertyStatus.Provisional)
.collect(Collectors.toList());
List<UcdProperty> nonUCDProperties =
indexedProperties.stream()
.filter(p -> p.getDerivedStatus() == DerivedPropertyStatus.NonUCDProperty)
.collect(Collectors.toList());
List<UcdProperty> ucdNonProperties =
indexedProperties.stream()
.filter(p -> p.getDerivedStatus() == DerivedPropertyStatus.UCDNonProperty)
.collect(Collectors.toList());
// Non-UCD non-properties, and things added directly in the tools.
List<String> otherData =
sortedProps.stream()
.filter(
p ->
UcdProperty.forString(p) == null
|| UcdProperty.forString(p).getDerivedStatus()
== DerivedPropertyStatus.NonUCDNonProperty)
.collect(Collectors.toList());

List<UcdProperty> cjkProperties =
ucdProperties.stream()
.filter(p -> p.getNames().getShortName().startsWith("cjk"))
.collect(Collectors.toList());
ucdProperties.removeIf(p -> p.getNames().getShortName().startsWith("cjk"));

Age_Values age = Age_Values.forName(getFactory().getProperty("Age").getValue(cp));
VersionInfo minVersion =
Expand All @@ -1456,71 +1494,86 @@ public static void showProperties(
+ "</p>");
}

out.append(
"<table class='propTable'>"
+ "<caption>"
+ (isUnihan ? "non-Unihan properties for U+" : "Properties for U+")
+ hex
+ "</caption>"
+ "<tr><th>With Non-Default Values</th><th>With Default Values</th></tr>"
+ "<tr><td width='50%'>\n");
out.append("<table width='100%'>\n");

List<String> unihanProperties = new ArrayList<>();
VersionInfo maxVersion =
showDevProperties ? Settings.LATEST_VERSION_INFO : Settings.LAST_VERSION_INFO;
for (String propName : sortedProps) {
UnicodeProperty prop = getFactory().getProperty(propName);
out.append("<table class='propTable'>");
showProperties(
ucdProperties.stream().map(UcdProperty::toString).collect(Collectors.toList()),
(isUnihan ? "Non-Unihan " : "")
+ "Normative, Informative, Contributory, and (Provisional) UCD properties for U+"
+ hex,
cp,
minVersion,
maxVersion,
showDevProperties,
out);
showProperties(
nonUCDProperties.stream().map(UcdProperty::toString).collect(Collectors.toList()),
"Non-UCD properties for U+" + hex,
cp,
minVersion,
maxVersion,
showDevProperties,
out);
showProperties(
ucdNonProperties.stream().map(UcdProperty::toString).collect(Collectors.toList()),
"Other " + (isUnihan ? "non-Unihan " : "") + "UCD data for U+" + hex,
cp,
minVersion,
maxVersion,
showDevProperties,
out);
if (isUnihan) {
showProperties(
cjkProperties.stream().map(UcdProperty::toString).collect(Collectors.toList()),
"Unihan Normative, Informative, and (Provisional) properties for U+" + hex,
cp,
minVersion,
maxVersion,
showDevProperties,
out);
}
showProperties(
otherData,
"Other information on U+" + hex,
cp,
minVersion,
maxVersion,
showDevProperties,
out);
out.append("</table>\n");
}

private static void showProperties(
List<String> properties,
String title,
int cp,
VersionInfo minVersion,
VersionInfo maxVersion,
boolean showDevProperties,
Appendable out)
throws IOException {
out.append("<tr><th colspan=2>" + title + "</th></tr>" + "<tr><td width='50%'>\n");
out.append("<table width='100%'>\n");
for (int i = 0; i < properties.size() / 2; ++i) {
UnicodeProperty prop = getFactory().getProperty(properties.get(i));
if (prop.getName().equals("confusable")) continue;
if (prop.getFirstNameAlias().startsWith("cjk")) {
unihanProperties.add(propName);
continue;
}

boolean isDefault = prop.isDefault(cp);
if (isDefault) continue;
showPropertyValue(propName, cp, minVersion, maxVersion, isDefault, out);
showPropertyValue(properties.get(i), cp, minVersion, maxVersion, out);
}
out.append("</table>\n");

out.append("</td><td width='50%'>\n");

out.append("<table width='100%'>\n");
for (String propName : sortedProps) {
UnicodeProperty prop = getFactory().getProperty(propName);
for (int i = properties.size() / 2; i < properties.size(); ++i) {
UnicodeProperty prop = getFactory().getProperty(properties.get(i));
if (prop.getName().equals("confusable")) continue;
if (prop.getFirstNameAlias().startsWith("cjk")) {
continue;
}

boolean isDefault = prop.isDefault(cp);
if (!isDefault) continue;
showPropertyValue(propName, cp, minVersion, maxVersion, isDefault, out);
showPropertyValue(properties.get(i), cp, minVersion, maxVersion, out);
}
out.append("</table>\n");

out.append("</td></tr></table>\n");
if (isUnihan) {
out.append(
"<table class='propTable'>"
+ "<caption>"
+ "Unihan properties for U+"
+ hex
+ "</caption>"
+ "<tr><td width='50%'>\n");
out.append("<table width='100%'>\n");
for (int i = 0; i < unihanProperties.size() / 2; ++i) {
showPropertyValue(unihanProperties.get(i), cp, minVersion, maxVersion, false, out);
}
out.append("</table>\n");
out.append("</td><td width='50%'>\n");
out.append("<table width='100%'>\n");
for (int i = unihanProperties.size() / 2; i < unihanProperties.size(); ++i) {
showPropertyValue(unihanProperties.get(i), cp, minVersion, maxVersion, false, out);
}
out.append("</table>\n");
out.append("</td></tr></table>\n");
}
out.append("</td></tr>\n");
}

private static StringBuilder displayConfusables(int codepoint) {
Expand Down Expand Up @@ -1648,10 +1701,14 @@ private static void showPropertyValue(
int codePoint,
VersionInfo minVersion,
VersionInfo maxVersion,
boolean isDefault,
Appendable out)
throws IOException {
String defaultClass = isDefault ? " class='default'" : "";
String defaultClass =
getFactory().getProperty(propName).isDefault(codePoint) ? " class='default'" : "";
var indexedProperty = UcdProperty.forString(propName);
final boolean provisional =
indexedProperty != null
&& indexedProperty.getDerivedStatus() == DerivedPropertyStatus.Provisional;
class PropertyAssignment {
VersionInfo first;
VersionInfo last;
Expand Down Expand Up @@ -1709,12 +1766,12 @@ class PropertyAssignment {
history.add(current);
}
out.append(
"<tr><th><a target='c' href='properties.jsp?a="
"<tr><th width='50%'><a target='c' href='properties.jsp?a="
+ propName
+ "#"
+ propName
+ "'>"
+ propName
+ (provisional ? "(" + propName + ")" : propName)
+ "</a></th>");
for (PropertyAssignment assignment : history) {
String first =
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package org.unicode.props;

/**
* A property status that can be derived from the data files defining the properties. Contrary to
* PropertyStatus.java, this does not reflect distinctions such as Normative vs. Informative vs.
* Contributory vs. Deprecated etc., as all of those are equal in the eyes of PropertyAliases.txt.
* It does distinguish Provisional properties
*/
public enum DerivedPropertyStatus {
/**
* Properties that are part of the UCD and subject to UTC decisions. These are the ones in
* PropertyAliases.txt. Their actual status may be Normative, Informative, or Contributory.
*/
Approved,
/**
* Provisional properties. These are actual UCD properties, but not in PropertyAliases.txt, and
* changes to them need not be approved by the UTC. They may be removed entirely from the UCD
* (though they remain in the tools, as the tools have history).
*/
Provisional,
/**
* Data in UCD files that do not specify character properties. Some of this data is exposed in
* the form of properties in the tools, because all we have is a hammer.
*/
UCDNonProperty,
/**
* Properties defined outside the UCD, e.g., in UTS #39 or UTS #51. These are explicitly
* described as properties in these documents.
*/
NonUCDProperty,
/** Non-property data defined outside the UCD. */
NonUCDNonProperty,
}
Loading
Loading