diff --git a/UnicodeJsps/src/main/java/org/unicode/jsp/CreateInversions.java b/UnicodeJsps/src/main/java/org/unicode/jsp/CreateInversions.java deleted file mode 100644 index 7534933423..0000000000 --- a/UnicodeJsps/src/main/java/org/unicode/jsp/CreateInversions.java +++ /dev/null @@ -1,249 +0,0 @@ -package org.unicode.jsp; - -import com.ibm.icu.impl.UnicodeMap; -import com.ibm.icu.impl.UnicodeMapIterator; -import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.text.UnicodeSet; -import com.ibm.icu.text.UnicodeSetIterator; -import java.io.IOException; - -public class CreateInversions { - - // testing - - public static void main(String[] args) { - UnicodeSet ignorables = - new UnicodeSet("[[:Cn:][:Cs:][:Co:]]") - .freeze(); // exclude unassigned, surrogates, and private use - CreateInversions createInversions = - new CreateInversions().setIgnorables(ignorables).setDelta(true); - - // check the code (by inspection) to make sure it works - // later do unit test - UnicodeSet[] tests = { - new UnicodeSet("[abcxyz]"), - new UnicodeSet("[:whitespace:]"), - new UnicodeSet("[:deprecated:]"), - }; - for (UnicodeSet test : tests) { - showSet(createInversions, test); - } - - UnicodeMap testMap = new UnicodeMap(); - testMap.putAll(new UnicodeSet("[abcxyz]"), "foo"); - showMap(createInversions, testMap); - - // check with names - for (UnicodeSet test : tests) { - testMap.clear(); - for (UnicodeSetIterator it = new UnicodeSetIterator(test); it.next(); ) { - testMap.put(it.codepoint, UCharacter.getName(it.codepoint)); - } - showMap(createInversions, testMap); - } - - // check with properties - ICUPropertyFactory propFactory = ICUPropertyFactory.make(); - UnicodeMap[] testProperties = { - propFactory.getProperty("numeric_type").getUnicodeMap(), - propFactory.getProperty("block").getUnicodeMap(), - propFactory.getProperty("word_break").getUnicodeMap(), - propFactory - .getProperty("grapheme_cluster_break") - .getUnicodeMap() - .putAll(new UnicodeSet(0xAC00, 0xD7A3), "LVT"), - // note: separating out the LV from LVT can be done more compactly with an algorithm. - // it is periodic: AC00, AC1C, AC38... - }; - for (UnicodeMap test : testProperties) { - showMap(createInversions, test); - } - - // further compaction can be done by assigning each property value to a number, and using - // that instead. - UnicodeMap source = - propFactory - .getProperty("grapheme_cluster_break") - .getUnicodeMap() - .putAll(new UnicodeSet(0xAC00, 0xD7A3), "LVT"); - UnicodeMap target = new UnicodeMap(); - int numberForValue = 0; - // iterate through the values, assigning each a number - for (Object value : source.getAvailableValues()) { - target.putAll(source.keySet(value), numberForValue++); - } - showMap(createInversions, target); - } - - private static void showSet(CreateInversions createInversions, UnicodeSet test) { - System.out.println("** Source:"); - System.out.println(test); - System.out.println("** Result:"); - System.out.println(createInversions.create("testName", test)); - System.out.println("Inversions: " + createInversions.getInversions()); - System.out.println(); - } - - private static void showMap(CreateInversions createInversions, UnicodeMap testMap) { - System.out.println("** Source:"); - System.out.println(testMap); - System.out.println("** Result:"); - System.out.println(createInversions.create("testName", testMap)); - System.out.println("Inversions: " + createInversions.getInversions()); - System.out.println(); - } - - // guts - - private UnicodeSet ignorables; - - private boolean delta; - - private int inversions; - - private int getInversions() { - return inversions; - } - - private CreateInversions setDelta(boolean b) { - delta = b; - return this; - } - - private CreateInversions setIgnorables(UnicodeSet ignorables) { - this.ignorables = ignorables; - return this; - } - - public String create(String name, UnicodeSet source) { - try { - return create(name, source, new StringBuilder()).toString(); - } catch (IOException e) { - throw (RuntimeException) new IllegalArgumentException("Should not happen").initCause(e); - } - } - - public String create(String name, UnicodeMap source) { - try { - return create(name, source, new StringBuilder()).toString(); - } catch (IOException e) { - throw (RuntimeException) new IllegalArgumentException("Should not happen").initCause(e); - } - } - - // public String createInversions(UnicodeSet source, String name, String - // filename) throws IOException { - // return createInversions(source, name, new StringBuilder()).close(); - // } - // - // public String createInversions(UnicodeMap source, String name, String - // filename) throws IOException { - // return createInversions(source, name, new StringBuilder()).toString(); - // } - - public Appendable create(String name, UnicodeSet source, Appendable target) throws IOException { - initShortestForm(); - target.append("var " + name + " = new Inversion([\n"); - boolean first = true; - for (UnicodeSetIterator it = new UnicodeSetIterator(source); it.nextRange(); ) { - if (first) { - first = false; - } else { - target.append(",\n"); // the linebreak is not needed, but easier to read - } - target.append(shortestForm(it.codepoint, delta)); - if (it.codepointEnd != 0x10FFFF) { - target.append(",").append(shortestForm(it.codepointEnd + 1, delta)); - } - } - target.append("\n]"); - if (delta) { - target.append(",true"); - } - target.append(");"); - return target; - } - - public Appendable create(String name, UnicodeMap source, Appendable target) throws IOException { - initShortestForm(); - target.append("var " + name + " = new Inversion([\n"); - StringBuilder valueArray = new StringBuilder(); - boolean first = true; - for (UnicodeMapIterator it = new UnicodeMapIterator(source); it.nextRange(); ) { - // skip ignorable range - if (ignorables.contains(it.codepoint, it.codepointEnd)) { - continue; - } - // also skip adjacent rows with same value - final String valueString = shortestForm(source.getValue(it.codepoint)); - if (lastValue == valueString || lastValue != null && lastValue.equals(valueString)) { - continue; - } - lastValue = valueString; - if (first) { - first = false; - } else { - target.append(",\n"); // the linebreak is not needed, but easier to read - valueArray.append(",\n"); // the linebreak is not needed, but easier to - // read - } - target.append(shortestForm(it.codepoint, delta)); - valueArray.append(valueString); - } - target.append("\n],[\n").append(valueArray).append("\n]"); - if (delta) { - target.append(",true"); - } - target.append(");"); - return target; - } - - long lastNumber; - String lastValue; - - private void initShortestForm() { - lastNumber = 0; - inversions = 0; - lastValue = null; - } - - private String shortestForm(Object value) { - String result; - if (value == null) { - result = "null"; - } else if (value instanceof Byte - || value instanceof Short - || value instanceof Integer - || value instanceof Long) { - --inversions; // don't add inversion in this case - result = shortestForm(((Number) value).longValue(), false); - } else if (value instanceof Float || value instanceof Double) { - result = value.toString(); - } else { - result = value.toString(); - // TODO optimize this - result.replace("\b", "\\\b"); // quote - result.replace("\t", "\\\t"); // quote - result.replace("\n", "\\\n"); // quote - result.replace("\u000B", "\\v"); // quote - result.replace("\f", "\\\f"); // quote - result.replace("\r", "\\\r"); // quote - result.replace("\"", "\\\""); // quote - result.replace("\\", "\\\\"); // quote - result = "\"" + result + "\""; - } - return result; - } - - private String shortestForm(long number, boolean useDelta) { - if (useDelta) { - long temp = number; - number -= lastNumber; - lastNumber = temp; - } - ++inversions; - String decimal = String.valueOf(number); - String hex = "0x" + Long.toHexString(number); - return decimal.length() < hex.length() ? decimal : hex; - } -} diff --git a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestIcuProperties.java b/UnicodeJsps/src/test/java/org/unicode/jsptest/TestIcuProperties.java deleted file mode 100644 index 215bdb8be2..0000000000 --- a/UnicodeJsps/src/test/java/org/unicode/jsptest/TestIcuProperties.java +++ /dev/null @@ -1,49 +0,0 @@ -package org.unicode.jsptest; - -import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.lang.UProperty; -import com.ibm.icu.lang.UProperty.NameChoice; -import java.util.Arrays; -import java.util.List; -import org.unicode.jsp.ICUPropertyFactory; -import org.unicode.props.UnicodeProperty; - -/** "Not really a test, move" */ -public class TestIcuProperties extends TestFmwk2 { - public void testProps() { - ICUPropertyFactory factory = ICUPropertyFactory.make(); - String sample = "🤩"; - - int nameChoice = NameChoice.LONG; - List propRanges = - Arrays.asList( - UProperty.BINARY_START, UProperty.BINARY_LIMIT, - UProperty.INT_START, UProperty.INT_LIMIT, - UProperty.DOUBLE_START, UProperty.DOUBLE_LIMIT, - UProperty.STRING_START, UProperty.STRING_LIMIT); - for (int range = 0; range < propRanges.size(); range += 2) { - final int rangeStart = propRanges.get(range); - final int rangeLimit = propRanges.get(range + 1); - for (int property = rangeStart; property < rangeLimit; ++property) { - String name = UCharacter.getPropertyName(property, nameChoice); - UnicodeProperty prop = factory.getInternalProperty(name); - String value = prop.getValue(sample.codePointAt(0)); - assertTrue("property exists", prop != null); - - System.out.println(property + "\t" + name + "\tvalue('" + sample + "'): " + value); - if (rangeStart == UProperty.INT_START) { - String gap = "\t "; - for (int i = UCharacter.getIntPropertyMinValue(property); - i <= UCharacter.getIntPropertyMaxValue(property); - ++i) { - String propertyValueName = - UCharacter.getPropertyValueName(property, i, nameChoice); - System.out.print(gap + propertyValueName); - gap = ", "; - } - } - System.out.println(); - } - } - } -} diff --git a/unicodetools/src/main/java/org/unicode/draft/FrequencyData2.java b/unicodetools/src/main/java/org/unicode/draft/FrequencyData2.java index 870f68d63f..07e497cf0f 100644 --- a/unicodetools/src/main/java/org/unicode/draft/FrequencyData2.java +++ b/unicodetools/src/main/java/org/unicode/draft/FrequencyData2.java @@ -23,7 +23,8 @@ import org.unicode.cldr.util.CLDRPaths; import org.unicode.cldr.util.Counter; import org.unicode.cldr.util.PatternCache; -import org.unicode.jsp.ICUPropertyFactory; +import org.unicode.props.IndexUnicodeProperties; +import org.unicode.props.UcdProperty; import org.unicode.props.UnicodeProperty; import org.unicode.text.utility.Utility; @@ -426,7 +427,7 @@ public static void main(String[] args) throws IOException { data.showData2( "Age", - ICUPropertyFactory.make().getProperty("age"), + IndexUnicodeProperties.make().getProperty(UcdProperty.Age), new UnicodeSet("[[:cn:][:co:]]"), true); data.showData("Script/Cat", UCharacter.getPropertyEnum("script"), NO_SCRIPT); diff --git a/unicodetools/src/main/java/org/unicode/jsp/ICUPropertyFactory.java b/unicodetools/src/main/java/org/unicode/jsp/ICUPropertyFactory.java deleted file mode 100644 index 26769d1ab4..0000000000 --- a/unicodetools/src/main/java/org/unicode/jsp/ICUPropertyFactory.java +++ /dev/null @@ -1,616 +0,0 @@ -/* - ******************************************************************************* - * Copyright (C) 2002-2009, International Business Machines Corporation and * - * others. All Rights Reserved. * - ******************************************************************************* - */ -package org.unicode.jsp; - -import com.ibm.icu.impl.Row; -import com.ibm.icu.impl.Row.R2; -import com.ibm.icu.impl.UnicodeMap; -import com.ibm.icu.lang.UCharacter; -import com.ibm.icu.lang.UProperty; -import com.ibm.icu.text.Normalizer; -import com.ibm.icu.text.UTF16; -import com.ibm.icu.text.UnicodeSet; -import com.ibm.icu.util.VersionInfo; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Set; -import java.util.TreeSet; -import org.unicode.props.UnicodeProperty; - -/** - * Provides a general interface for Unicode Properties, and extracting sets based on those values. - * - * @author Davis - */ -public class ICUPropertyFactory extends UnicodeProperty.Factory { - - static class ICUProperty extends UnicodeProperty { - protected int propEnum = Integer.MIN_VALUE; - - protected ICUProperty(String propName, int propEnum) { - setName(propName); - this.propEnum = propEnum; - setType(internalGetPropertyType(propEnum)); - if (propEnum == UProperty.DEFAULT_IGNORABLE_CODE_POINT - || propEnum == UProperty.BIDI_CLASS - || propEnum == UProperty.BLOCK - || propEnum == UProperty.EAST_ASIAN_WIDTH - || propEnum == UProperty.LINE_BREAK - || propEnum == UProperty.NONCHARACTER_CODE_POINT - || propEnum == UProperty.PATTERN_SYNTAX - || propEnum == UProperty.PATTERN_WHITE_SPACE - || propEnum == UProperty.CHANGES_WHEN_CASEFOLDED - || propEnum == UProperty.EMOJI - || propEnum == UProperty.EMOJI_MODIFIER - || propEnum == UProperty.EMOJI_MODIFIER_BASE - || propEnum == UProperty.EMOJI_PRESENTATION - || propEnum == UProperty.EXTENDED_PICTOGRAPHIC) { - setUniformUnassigned(false); - } - } - - boolean shownException = false; - - public String _getValue(int codePoint) { - switch (propEnum) { - case UProperty.AGE: - return getAge(codePoint); - case UProperty.BIDI_MIRRORING_GLYPH: - return UTF16.valueOf(UCharacter.getMirror(codePoint)); - case UProperty.CASE_FOLDING: - return UCharacter.foldCase(UTF16.valueOf(codePoint), true); - case UProperty.ISO_COMMENT: - return UCharacter.getISOComment(codePoint); - case UProperty.LOWERCASE_MAPPING: - return UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint)); - case UProperty.NAME: - return UCharacter.getName(codePoint); - case UProperty.SIMPLE_CASE_FOLDING: - return UTF16.valueOf(UCharacter.foldCase(codePoint, true)); - case UProperty.SIMPLE_LOWERCASE_MAPPING: - return UTF16.valueOf(UCharacter.toLowerCase(codePoint)); - case UProperty.SIMPLE_TITLECASE_MAPPING: - return UTF16.valueOf(UCharacter.toTitleCase(codePoint)); - case UProperty.SIMPLE_UPPERCASE_MAPPING: - return UTF16.valueOf(UCharacter.toUpperCase(codePoint)); - case UProperty.TITLECASE_MAPPING: - return UCharacter.toTitleCase(Locale.ENGLISH, UTF16.valueOf(codePoint), null); - case UProperty.UNICODE_1_NAME: - return UCharacter.getName1_0(codePoint); - case UProperty.UPPERCASE_MAPPING: - return UCharacter.toUpperCase(Locale.ENGLISH, UTF16.valueOf(codePoint)); - // case NFC: return Normalizer.normalize(codePoint, Normalizer.NFC); - // case NFD: return Normalizer.normalize(codePoint, Normalizer.NFD); - // case NFKC: return Normalizer.normalize(codePoint, Normalizer.NFKC); - // case NFKD: return Normalizer.normalize(codePoint, Normalizer.NFKD); - case isNFC: - return String.valueOf( - Normalizer.normalize(codePoint, Normalizer.NFC) - .equals(UTF16.valueOf(codePoint))); - case isNFD: - return String.valueOf( - Normalizer.normalize(codePoint, Normalizer.NFD) - .equals(UTF16.valueOf(codePoint))); - case isNFKC: - return String.valueOf( - Normalizer.normalize(codePoint, Normalizer.NFKC) - .equals(UTF16.valueOf(codePoint))); - case isNFKD: - return String.valueOf( - Normalizer.normalize(codePoint, Normalizer.NFKD) - .equals(UTF16.valueOf(codePoint))); - case isLowercase: - return String.valueOf( - UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint)) - .equals(UTF16.valueOf(codePoint))); - case isUppercase: - return String.valueOf( - UCharacter.toUpperCase(Locale.ENGLISH, UTF16.valueOf(codePoint)) - .equals(UTF16.valueOf(codePoint))); - case isTitlecase: - return String.valueOf( - UCharacter.toTitleCase(Locale.ENGLISH, UTF16.valueOf(codePoint), null) - .equals(UTF16.valueOf(codePoint))); - case isCasefolded: - return String.valueOf( - UCharacter.foldCase(UTF16.valueOf(codePoint), true) - .equals(UTF16.valueOf(codePoint))); - case isCased: - return String.valueOf( - UCharacter.toLowerCase(Locale.ENGLISH, UTF16.valueOf(codePoint)) - .equals(UTF16.valueOf(codePoint))); - } - if (propEnum < UProperty.INT_LIMIT) { - int enumValue = -1; - String value = null; - try { - enumValue = UCharacter.getIntPropertyValue(codePoint, propEnum); - if (enumValue >= 0) - value = - fixedGetPropertyValueName( - propEnum, enumValue, UProperty.NameChoice.LONG); - } catch (IllegalArgumentException e) { - if (!shownException) { - System.out.println( - "Fail: " + getName() + ", " + Integer.toHexString(codePoint)); - shownException = true; - } - } - return value != null ? value : String.valueOf(enumValue); - } else if (propEnum < UProperty.DOUBLE_LIMIT) { - double num = UCharacter.getUnicodeNumericValue(codePoint); - if (num == UCharacter.NO_NUMERIC_VALUE) return null; - return Double.toString(num); - // TODO: Fix HACK -- API deficient - } - return null; - } - - @Override - protected String _getValue(String string) { - throw new UnsupportedOperationException(); - } - - private String getAge(int codePoint) { - String temp = UCharacter.getAge(codePoint).toString(); - if (temp.equals("0.0.0.0")) return "unassigned"; - if (temp.endsWith(".0.0")) return temp.substring(0, temp.length() - 4); - return temp; - } - - /** - * @param propId TODO - * @param valueAlias null if unused. - * @param valueEnum -1 if unused - * @param nameChoice - * @return - */ - private String getFixedValueAlias( - int propId, String valueAlias, int valueEnum, int nameChoice) { - if (propId >= UProperty.STRING_START) { - if (nameChoice > UProperty.NameChoice.LONG) throw new IllegalArgumentException(); - if (nameChoice != UProperty.NameChoice.LONG) return null; - return ""; - } else if (propId >= UProperty.DOUBLE_START) { - if (nameChoice > UProperty.NameChoice.LONG) throw new IllegalArgumentException(); - if (nameChoice != UProperty.NameChoice.LONG) return null; - return ""; - } - if (valueAlias != null && !valueAlias.equals("")) { - valueEnum = fixedGetPropertyValueEnum(propId, valueAlias); - } - // because these are defined badly, there may be no normal (long) name. - // if there is - String result = fixedGetPropertyValueName(propId, valueEnum, nameChoice); - if (result != null) return result; - // HACK try other namechoice - if (nameChoice == UProperty.NameChoice.LONG) { - result = fixedGetPropertyValueName(propId, valueEnum, UProperty.NameChoice.SHORT); - if (result != null) return result; - if (isCombiningClassProperty()) return null; - return ""; - } - return null; - } - - public boolean isCombiningClassProperty() { - return (propEnum == UProperty.CANONICAL_COMBINING_CLASS - || propEnum == UProperty.LEAD_CANONICAL_COMBINING_CLASS - || propEnum == UProperty.TRAIL_CANONICAL_COMBINING_CLASS); - } - - private static int fixedGetPropertyValueEnum(int propEnum, String valueAlias) { - try { - if (propEnum < BINARY_LIMIT) { - propEnum = UProperty.ALPHABETIC; - } - return UCharacter.getPropertyValueEnum(propEnum, valueAlias); - } catch (Exception e) { - return Integer.parseInt(valueAlias); - } - } - - static Map fixSkeleton = new HashMap(); - - private static String fixedGetPropertyValueName( - int propEnum, int valueEnum, int nameChoice) { - - String value = UCharacter.getPropertyValueName(propEnum, valueEnum, nameChoice); - String newValue = (String) fixSkeleton.get(value); - if (newValue == null) { - newValue = value; - if (propEnum == UProperty.JOINING_GROUP) { - newValue = newValue == null ? null : newValue.toLowerCase(Locale.ENGLISH); - } - newValue = regularize(newValue, true); - if (propEnum == UProperty.BLOCK && newValue.equals("Sutton_Sign_Writing")) { - newValue = "Sutton_SignWriting"; - } - fixSkeleton.put(value, newValue); - } - return newValue; - } - - public List _getNameAliases(List result) { - if (result == null) result = new ArrayList(); - // String alias = String_Extras.get(propEnum); - // if (alias == null) - String alias = Binary_Extras.get(propEnum); - if (alias != null) { - addUnique(alias, result); - } else { - addUnique(getFixedPropertyName(propEnum, UProperty.NameChoice.SHORT), result); - addUnique(getFixedPropertyName(propEnum, UProperty.NameChoice.LONG), result); - } - return result; - } - - public String getFixedPropertyName(int propName, int nameChoice) { - try { - return UCharacter.getPropertyName(propEnum, nameChoice); - } catch (IllegalArgumentException e) { - return null; - } - } - - private static Map cccHack = new HashMap(); - private static Set cccExtras = new HashSet(); - - static { - int start = UCharacter.getIntPropertyMinValue(UProperty.CANONICAL_COMBINING_CLASS); - int end = UCharacter.getIntPropertyMaxValue(UProperty.CANONICAL_COMBINING_CLASS); - for (int i = 0; i <= 255; ++i) { - String alias = - UCharacter.getPropertyValueName( - UProperty.CANONICAL_COMBINING_CLASS, i, UProperty.NameChoice.LONG); - String numStr = String.valueOf(i); - if (alias != null) { - cccHack.put(alias, numStr); - } else { - cccHack.put(numStr, numStr); - cccExtras.add(numStr); - } - } - } - - public UnicodeSet getSet(PatternMatcher matcher, UnicodeSet result) { - result = super.getSet(matcher, result); - if (propEnum == UProperty.GENERAL_CATEGORY) { - for (String multiprop : SPECIAL_GC.keySet()) { - R2 value = SPECIAL_GC.get(multiprop); - if (matcher.test(multiprop) || matcher.test(value.get0())) { - result.addAll(value.get1()); - } - } - } - return result; - } - - static Map> SPECIAL_GC = - new LinkedHashMap>(); - - static { - String[][] extras = { - {"Other", "C", "[[:Cc:][:Cf:][:Cn:][:Co:][:Cs:]]"}, - {"Letter", "L", "[[:Ll:][:Lm:][:Lo:][:Lt:][:Lu:]]"}, - {"Cased_Letter", "LC", "[[:Ll:][:Lt:][:Lu:]]"}, - {"Mark", "M", "[[:Mc:][:Me:][:Mn:]]"}, - {"Number", "N", "[[:Nd:][:Nl:][:No:]]"}, - {"Punctuation", "P", "[[:Pc:][:Pd:][:Pe:][:Pf:][:Pi:][:Po:][:Ps:]]"}, - {"Symbol", "S", "[[:Sc:][:Sk:][:Sm:][:So:]]"}, - {"Separator", "Z", "[[:Zl:][:Zp:][:Zs:]]"}, - }; - for (String[] extra : extras) { - SPECIAL_GC.put( - extra[0], - (R2) - Row.of(extra[1], new UnicodeSet(extra[2]).freeze()).freeze()); - } - } - - public List _getAvailableValues(List result) { - if (result == null) result = new ArrayList(); - if (propEnum == UProperty.AGE) { - addAllUnique(getAges(), result); - return result; - } - if (propEnum < UProperty.INT_LIMIT) { - if (Binary_Extras.isInRange(propEnum)) { - propEnum = UProperty.BINARY_START; // HACK - } - addValues(propEnum, result); - if (propEnum == UProperty.GENERAL_CATEGORY) { - for (String item : SPECIAL_GC.keySet()) { - addUnique(item, result); - } - } - } else if (propEnum >= UProperty.DOUBLE_START && propEnum < UProperty.DOUBLE_LIMIT) { - UnicodeMap map = getUnicodeMap(); - Collection values = map.values(); - addAllUnique(values, result); - } else { - String alias = getFixedValueAlias(propEnum, null, -1, UProperty.NameChoice.LONG); - addUnique(alias, result); - } - return result; - } - - private void addValues(int propertyId, List result) { - int start = UCharacter.getIntPropertyMinValue(propertyId); - int end = UCharacter.getIntPropertyMaxValue(propertyId); - for (int i = start; i <= end; ++i) { - String alias = getFixedValueAlias(propEnum, null, i, UProperty.NameChoice.LONG); - String alias2 = getFixedValueAlias(propEnum, null, i, UProperty.NameChoice.SHORT); - if (alias == null) { - alias = alias2; - if (alias == null && isCombiningClassProperty()) { - alias = String.valueOf(i); - } - } - // System.out.println(propertyAlias + "\t" + i + ":\t" + alias); - addUnique(alias, result); - } - } - - static String[] AGES = null; - - private String[] getAges() { - if (AGES == null) { - Set ages = new TreeSet(); - for (int i = 0; i < 0x10FFFF; ++i) { - ages.add(getAge(i)); - } - AGES = (String[]) ages.toArray(new String[ages.size()]); - } - return AGES; - } - - public List _getValueAliases(String valueAlias, List result) { - if (result == null) result = new ArrayList(); - if (propEnum == UProperty.AGE) { - addUnique(valueAlias, result); - return result; - } - if (isCombiningClassProperty()) { - addUnique(cccHack.get(valueAlias), result); // add number - } - int type = getType(); - if (type == UnicodeProperty.NUMERIC || type == EXTENDED_NUMERIC) { - addUnique(valueAlias, result); - if (valueAlias.endsWith(".0")) { - addUnique(valueAlias.substring(0, valueAlias.length() - 2), result); - } - } else { - R2 temp; - if (propEnum == UProperty.GENERAL_CATEGORY - && (temp = SPECIAL_GC.get(valueAlias)) != null) { - addUnique(valueAlias, result); - addUnique(temp.get0(), result); - } else { - addAliases(propEnum, valueAlias, result); - } - } - return result; - } - - private void addAliases(int propId, String valueAlias, List result) { - for (int nameChoice = UProperty.NameChoice.SHORT; ; ++nameChoice) { - try { - addUnique(getFixedValueAlias(propId, valueAlias, -1, nameChoice), result); - } catch (Exception e) { - break; - } - } - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodePropertySource#getPropertyType() - */ - private int internalGetPropertyType(int prop) { - switch (prop) { - case UProperty.AGE: - case UProperty.BLOCK: - case UProperty.SCRIPT: - return UnicodeProperty.CATALOG; - case UProperty.ISO_COMMENT: - case UProperty.NAME: - case UProperty.UNICODE_1_NAME: - return UnicodeProperty.MISC; - case UProperty.BIDI_MIRRORING_GLYPH: - case UProperty.CASE_FOLDING: - case UProperty.LOWERCASE_MAPPING: - case UProperty.SIMPLE_CASE_FOLDING: - case UProperty.SIMPLE_LOWERCASE_MAPPING: - case UProperty.SIMPLE_TITLECASE_MAPPING: - case UProperty.SIMPLE_UPPERCASE_MAPPING: - case UProperty.TITLECASE_MAPPING: - case UProperty.UPPERCASE_MAPPING: - return UnicodeProperty.EXTENDED_STRING; - } - if (prop < UProperty.BINARY_START) return UnicodeProperty.UNKNOWN; - if (prop < UProperty.BINARY_LIMIT) return UnicodeProperty.BINARY; - if (prop < UProperty.INT_START) return UnicodeProperty.EXTENDED_BINARY; - if (prop < UProperty.INT_LIMIT) return UnicodeProperty.ENUMERATED; - if (prop < UProperty.DOUBLE_START) return UnicodeProperty.EXTENDED_ENUMERATED; - if (prop < UProperty.DOUBLE_LIMIT) return UnicodeProperty.NUMERIC; - if (prop < UProperty.STRING_START) return UnicodeProperty.EXTENDED_NUMERIC; - if (prop < UProperty.STRING_LIMIT) return UnicodeProperty.STRING; - return UnicodeProperty.EXTENDED_STRING; - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#getVersion() - */ - public String _getVersion() { - return VersionInfo.ICU_VERSION.toString(); - } - } - - /*{ - matchIterator = new UnicodeSetIterator( - new UnicodeSet("[^[:Cn:]-[:Default_Ignorable_Code_Point:]]")); - }*/ - - /* - * Other Missing Functions: - Expands_On_NFC - Expands_On_NFD - Expands_On_NFKC - Expands_On_NFKD - Composition_Exclusion - Decomposition_Mapping - FC_NFKC_Closure - ISO_Comment - NFC_Quick_Check - NFD_Quick_Check - NFKC_Quick_Check - NFKD_Quick_Check - Special_Case_Condition - Unicode_Radical_Stroke - */ - - static final Names Binary_Extras = - new Names( - UProperty.BINARY_LIMIT, - new String[] { - "isNFC", - "isNFD", - "isNFKC", - "isNFKD", - "isLowercase", - "isUppercase", - "isTitlecase", - "isCasefolded", - "isCased", - }); - - // static final Names String_Extras = new Names(UProperty.STRING_LIMIT, - // new String[] { - // "toNFC", "toNFD", "toNFKC", "toNKFD", - // }); - - static final int isNFC = UProperty.BINARY_LIMIT, - isNFD = UProperty.BINARY_LIMIT + 1, - isNFKC = UProperty.BINARY_LIMIT + 2, - isNFKD = UProperty.BINARY_LIMIT + 3, - isLowercase = UProperty.BINARY_LIMIT + 4, - isUppercase = UProperty.BINARY_LIMIT + 5, - isTitlecase = UProperty.BINARY_LIMIT + 6, - isCasefolded = UProperty.BINARY_LIMIT + 7, - isCased = UProperty.BINARY_LIMIT + 8, - BINARY_LIMIT = UProperty.BINARY_LIMIT + 9 - - // NFC = UProperty.STRING_LIMIT, - // NFD = UProperty.STRING_LIMIT+1, - // NFKC = UProperty.STRING_LIMIT+2, - // NFKD = UProperty.STRING_LIMIT+3 - ; - - private ICUPropertyFactory() { - Collection c = getInternalAvailablePropertyAliases(new ArrayList()); - Iterator it = c.iterator(); - while (it.hasNext()) { - add(getInternalProperty((String) it.next())); - } - } - - private static ICUPropertyFactory singleton = null; - - public static synchronized ICUPropertyFactory make() { - if (singleton != null) return singleton; - singleton = new ICUPropertyFactory(); - return singleton; - } - - public List getInternalAvailablePropertyAliases(List result) { - int[][] ranges = { - {UProperty.BINARY_START, UProperty.BINARY_LIMIT}, - {UProperty.INT_START, UProperty.INT_LIMIT}, - {UProperty.DOUBLE_START, UProperty.DOUBLE_LIMIT}, - {UProperty.STRING_START, UProperty.STRING_LIMIT}, - }; - for (int i = 0; i < ranges.length; ++i) { - for (int j = ranges[i][0]; j < ranges[i][1]; ++j) { - String alias; - try { - alias = UCharacter.getPropertyName(j, UProperty.NameChoice.LONG); - } catch (Exception e) { - continue; // probably mismatch in ICU version - } - UnicodeProperty.addUnique(alias, result); - if (!result.contains(alias)) result.add(alias); - } - } - // result.addAll(String_Extras.getNames()); - result.addAll(Binary_Extras.getNames()); - return result; - } - - public UnicodeProperty getInternalProperty(String propertyAlias) { - int propEnum; - main: - { - int possibleItem = Binary_Extras.get(propertyAlias); - if (possibleItem >= 0) { - propEnum = possibleItem; - break main; - } - // possibleItem = String_Extras.get(propertyAlias); - // if (possibleItem >= 0) { - // propEnum = possibleItem; - // break main; - // } - propEnum = UCharacter.getPropertyEnum(propertyAlias); - } - return new ICUProperty(propertyAlias, propEnum); - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodePropertySource#getProperty(java.lang.String) - */ - // TODO file bug on getPropertyValueName for Canonical_Combining_Class - - public static class Names { - private String[] names; - private int base; - - public Names(int base, String[] names) { - this.base = base; - this.names = names; - } - - public int get(String name) { - for (int i = 0; i < names.length; ++i) { - if (name.equalsIgnoreCase(names[i])) return base + i; - } - return -1; - } - - public String get(int number) { - number -= base; - if (number < 0 || names.length <= number) return null; - return names[number]; - } - - public boolean isInRange(int number) { - number -= base; - return (0 <= number && number < names.length); - } - - public List getNames() { - return Arrays.asList(names); - } - } -} diff --git a/unicodetools/src/main/java/org/unicode/props/BagFormatter.java b/unicodetools/src/main/java/org/unicode/props/BagFormatter.java index fa0279affa..1f9ae45c24 100644 --- a/unicodetools/src/main/java/org/unicode/props/BagFormatter.java +++ b/unicodetools/src/main/java/org/unicode/props/BagFormatter.java @@ -23,7 +23,6 @@ import org.unicode.cldr.util.Tabber; import org.unicode.cldr.util.Visitor; import org.unicode.cldr.util.props.UnicodeLabel; -import org.unicode.jsp.ICUPropertyFactory; public class BagFormatter { static final boolean DEBUG = false; @@ -395,7 +394,7 @@ public BagFormatter setUnicodePropertyFactory(UnicodeProperty.Factory source) { } private UnicodeProperty.Factory getUnicodePropertyFactory() { - if (source == null) source = ICUPropertyFactory.make(); + if (source == null) source = IndexUnicodeProperties.make(); return source; } diff --git a/unicodetools/src/main/java/org/unicode/props/RandomStringGenerator.java b/unicodetools/src/main/java/org/unicode/props/RandomStringGenerator.java index aed733d009..cb2e6c96e0 100644 --- a/unicodetools/src/main/java/org/unicode/props/RandomStringGenerator.java +++ b/unicodetools/src/main/java/org/unicode/props/RandomStringGenerator.java @@ -14,7 +14,6 @@ import java.util.ArrayList; import java.util.List; import java.util.Random; -import org.unicode.jsp.ICUPropertyFactory; public class RandomStringGenerator { @@ -58,7 +57,9 @@ public RandomStringGenerator( factory, factory.getProperty(propertyName).getUnicodeMap(), useShortName - ? ICUPropertyFactory.make().getProperty(propertyName).getUnicodeMap(true) + ? IndexUnicodeProperties.make() + .getProperty(propertyName) + .getUnicodeMap(true) : null, addGCStuff); } diff --git a/unicodetools/src/main/java/org/unicode/temp/UnicodePropertyX.java b/unicodetools/src/main/java/org/unicode/temp/UnicodePropertyX.java deleted file mode 100644 index cb16e493c1..0000000000 --- a/unicodetools/src/main/java/org/unicode/temp/UnicodePropertyX.java +++ /dev/null @@ -1,249 +0,0 @@ -/* - ******************************************************************************* - * Copyright (C) 1996-2014, International Business Machines Corporation and * - * others. All Rights Reserved. * - ******************************************************************************* - */ -package org.unicode.temp; - -import com.ibm.icu.impl.UnicodeMap; -import com.ibm.icu.text.UnicodeSet; -import java.util.List; -import org.unicode.props.UnicodeProperty; - -public abstract class UnicodePropertyX extends UnicodeProperty { - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#getTypeName() - */ - @Override - public String getTypeName() { - // TODO Auto-generated method stub - return super.getTypeName(); - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#getVersion() - */ - @Override - public String getVersion() { - // TODO Auto-generated method stub - return super.getVersion(); - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#getValue(int) - */ - @Override - public String getValue(int codepoint) { - // TODO Auto-generated method stub - return super.getValue(codepoint); - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#getNameAliases(java.util.List) - */ - @Override - public List getNameAliases(List result) { - // TODO Auto-generated method stub - return super.getNameAliases(result); - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#getValueAliases(java.lang.String, java.util.List) - */ - @Override - public List getValueAliases(String valueAlias, List result) { - // TODO Auto-generated method stub - return super.getValueAliases(valueAlias, result); - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#getAvailableValues(java.util.List) - */ - @Override - public List getAvailableValues(List result) { - // TODO Auto-generated method stub - return super.getAvailableValues(result); - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#_getVersion() - */ - @Override - protected String _getVersion() { - // TODO Auto-generated method stub - return null; - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#_getValue(int) - */ - @Override - protected String _getValue(int codepoint) { - // TODO Auto-generated method stub - return null; - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#_getNameAliases(java.util.List) - */ - @Override - protected List _getNameAliases(List result) { - // TODO Auto-generated method stub - return null; - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#_getValueAliases(java.lang.String, java.util.List) - */ - @Override - protected List _getValueAliases(String valueAlias, List result) { - // TODO Auto-generated method stub - return null; - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#_getAvailableValues(java.util.List) - */ - @Override - protected List _getAvailableValues(List result) { - // TODO Auto-generated method stub - return null; - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#getMaxWidth(boolean) - */ - @Override - public int getMaxWidth(boolean getShortest) { - // TODO Auto-generated method stub - return super.getMaxWidth(getShortest); - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#getSet(com.ibm.icu.dev.util.UnicodeProperty.PatternMatcher, com.ibm.icu.text.UnicodeSet) - */ - @Override - public UnicodeSet getSet(PatternMatcher matcher, UnicodeSet result) { - // TODO Auto-generated method stub - return super.getSet(matcher, result); - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#getUnicodeMap() - */ - @Override - public UnicodeMap getUnicodeMap() { - // TODO Auto-generated method stub - return super.getUnicodeMap(); - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#getUnicodeMap(boolean) - */ - @Override - public UnicodeMap getUnicodeMap(boolean getShortest) { - // TODO Auto-generated method stub - return super.getUnicodeMap(getShortest); - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#getUnicodeMap_internal() - */ - @Override - public UnicodeMap getUnicodeMap_internal() { - // TODO Auto-generated method stub - return super.getUnicodeMap_internal(); - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#_getUnicodeMap() - */ - @Override - protected UnicodeMap _getUnicodeMap() { - // TODO Auto-generated method stub - return super._getUnicodeMap(); - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#isValidValue(java.lang.String) - */ - @Override - public boolean isValidValue(String propertyValue) { - // TODO Auto-generated method stub - return super.isValidValue(propertyValue); - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#getValueAliases() - */ - @Override - public List getValueAliases() { - // TODO Auto-generated method stub - return super.getValueAliases(); - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#isDefault(int) - */ - @Override - public boolean isDefault(int cp) { - // TODO Auto-generated method stub - return super.isDefault(cp); - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#hasUniformUnassigned() - */ - @Override - public boolean hasUniformUnassigned() { - // TODO Auto-generated method stub - return super.hasUniformUnassigned(); - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeProperty#setUniformUnassigned(boolean) - */ - @Override - protected UnicodeProperty setUniformUnassigned(boolean hasUniformUnassigned) { - // TODO Auto-generated method stub - return super.setUniformUnassigned(hasUniformUnassigned); - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeLabel#transform(java.lang.Integer) - */ - @Override - public String transform(Integer codepoint) { - // TODO Auto-generated method stub - return super.transform(codepoint); - } - - /* (non-Javadoc) - * @see com.ibm.icu.dev.util.UnicodeLabel#getValue(java.lang.String, java.lang.String, boolean) - */ - @Override - public String getValue(String s, String separator, boolean withCodePoint) { - // TODO Auto-generated method stub - return super.getValue(s, separator, withCodePoint); - } - - /* (non-Javadoc) - * @see java.lang.Object#hashCode() - */ - @Override - public int hashCode() { - // TODO Auto-generated method stub - return super.hashCode(); - } - - /* (non-Javadoc) - * @see java.lang.Object#equals(java.lang.Object) - */ - @Override - public boolean equals(Object obj) { - // TODO Auto-generated method stub - return super.equals(obj); - } -} diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/CheckICU.java b/unicodetools/src/main/java/org/unicode/text/UCD/CheckICU.java deleted file mode 100644 index 87b7314eb3..0000000000 --- a/unicodetools/src/main/java/org/unicode/text/UCD/CheckICU.java +++ /dev/null @@ -1,363 +0,0 @@ -package org.unicode.text.UCD; - -import com.ibm.icu.text.Collator; -import com.ibm.icu.text.UnicodeSet; -import com.ibm.icu.util.ULocale; -import java.io.IOException; -import java.io.PrintWriter; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Set; -import java.util.TreeSet; -import org.unicode.cldr.draft.FileUtilities; -import org.unicode.cldr.util.props.UnicodeLabel; -import org.unicode.jsp.ICUPropertyFactory; -import org.unicode.props.BagFormatter; -import org.unicode.props.UnicodeProperty; -import org.unicode.text.utility.Settings; -import org.unicode.text.utility.Utility; - -public class CheckICU { - static final BagFormatter bf = new BagFormatter(); - - public static void main(String[] args) throws IOException { - System.out.println("Start"); - test(); - System.out.println("End"); - } - - static UnicodeSet itemFailures; - static ICUPropertyFactory icuFactory; - static ToolUnicodePropertySource toolFactory; - - static class ReplaceLabel extends UnicodeLabel { - UnicodeProperty p; - - ReplaceLabel(UnicodeProperty p) { - this.p = p; - } - - @Override - public String getValue(int codepoint, boolean isShort) { - // TODO Auto-generated method stub - return p.getValue(codepoint, isShort).replace('_', ' '); - } - - @Override - public int getMaxWidth(boolean v) { - return p.getMaxWidth(v); - } - } - - public static void test() throws IOException { - checkAvailable(); - if (true) { - return; - } - checkUCD(); - itemFailures = new UnicodeSet(); - icuFactory = ICUPropertyFactory.make(); - toolFactory = ToolUnicodePropertySource.make("4.0.0"); - - final String[] quickList = { - // "Canonical_Combining_Class", - // "Script", "Bidi_Mirroring_Glyph", "Case_Folding", - // "Numeric_Value" - }; - for (final String element : quickList) { - testProperty(element, -1); - } - if (quickList.length > 0) { - return; - } - - final Collection availableTool = toolFactory.getAvailableNames(); - - final Collection availableICU = icuFactory.getAvailableNames(); - System.out.println( - showDifferences("Property Aliases", "ICU", availableICU, "Tool", availableTool)); - final Collection common = new TreeSet(availableICU); - common.retainAll(availableTool); - - for (int j = UnicodeProperty.BINARY; j < UnicodeProperty.LIMIT_TYPE; ++j) { - System.out.println(); - System.out.println(UnicodeProperty.getTypeName(j)); - final Iterator it = common.iterator(); - while (it.hasNext()) { - final String prop = (String) it.next(); - testProperty(prop, j); - } - } - } - - /** */ - private static void checkAvailable() { - // generateFile("4.0.0", "DerivedCombiningClass"); - // generateFile("4.0.0", "DerivedCoreProperties"); - final ULocale[] locales = Collator.getAvailableULocales(); - - System.out.println("Collation"); - System.out.println("Possible keyword=values pairs:"); - { - final String[] keywords = Collator.getKeywords(); - for (int i = 0; i < Collator.getKeywords().length; ++i) { - final String[] values = Collator.getKeywordValues(keywords[i]); - for (final String value : values) { - System.out.println("\t" + keywords[i] + "=" + value); - } - } - } - System.out.println("Differing Collators:"); - final Set testSet = new HashSet(Arrays.asList(new String[] {"nl", "de", "de_DE", "zh_TW"})); - for (int k = 0; k < locales.length; ++k) { - if (!testSet.contains(locales[k].toString())) { - continue; - } - showCollationVariants(locales[k]); - } - } - - /** */ - private static void showCollationVariants(ULocale locale) { - final String[] keywords = Collator.getKeywords(); - System.out.println(locale.getDisplayName(ULocale.ENGLISH) + " [" + locale + "]"); - for (int i = 0; i < Collator.getKeywords().length; ++i) { - final ULocale base = - Collator.getFunctionalEquivalent( - keywords[i], locale - // new ULocale(locale + "@" + keywords[i] + "=standard") - ); - if (true) { - System.out.println( - "\"" - + base - + "\" == Collator.getFunctionalEquivalent(\"" - + keywords[i] - + "\", \"" - + locale - + "\");"); - } - final String[] values = Collator.getKeywordValues(keywords[i]); - for (int j = 0; j < Collator.getKeywordValues(keywords[i]).length; ++j) { - final ULocale other = - Collator.getFunctionalEquivalent( - keywords[i], - new ULocale(locale + "@" + keywords[i] + "=" + values[j])); - if (true) { - System.out.println( - "\"" - + other - + "\" == Collator.getFunctionalEquivalent(\"" - + keywords[i] - + "\", new ULocale(\"" - + locale - + "@" - + keywords[i] - + "=" - + values[j] - + "\");"); - } - // HACK: commented line should work but doesn't - if (!other.equals(base)) { - // if (other.toString().indexOf("@") >= 0) { - System.out.println( - "\t" + keywords[i] + "=" + values[j] + "; \t" + base + "; \t" + other); - } - } - } - } - - /** - * Sample code that prints out the variants that 'make a difference' for a given locale. To - * iterate through the locales, use Collator.getVariant - */ - private static void showCollationVariants2(ULocale locale) { - final String[] keywords = Collator.getKeywords(); - System.out.println(locale.getDisplayName(ULocale.ENGLISH) + " [" + locale + "]"); - for (int i = 0; i < Collator.getKeywords().length; ++i) { - final ULocale base = Collator.getFunctionalEquivalent(keywords[i], locale); - final String[] values = Collator.getKeywordValues(keywords[i]); - for (int j = 0; j < Collator.getKeywordValues(keywords[i]).length; ++j) { - final ULocale other = - Collator.getFunctionalEquivalent( - keywords[i], - new ULocale(locale + "@" + keywords[i] + "=" + values[j])); - if (!other.equals(base)) { - System.out.println( - "\t" + keywords[i] + "=" + values[j] + "; \t" + base + "; \t" + other); - } - } - } - } - - private static void checkUCD() throws IOException { - final UCD myUCD = UCD.make("4.0.0"); - final Normalizer nfc = new Normalizer(UCD_Types.NFC, "4.0.0"); - final UnicodeSet leading = new UnicodeSet(); - final UnicodeSet trailing = new UnicodeSet(); - final UnicodeSet starter = new UnicodeSet(); - for (int i = 0; i <= 0x10FFFF; ++i) { - if (myUCD.getCombiningClass(i) == 0) { - starter.add(i); - } - if (nfc.isTrailing(i)) { - trailing.add(i); - } - if (nfc.isLeading(i)) { - leading.add(i); - } - } - final PrintWriter pw = - FileUtilities.openUTF8Writer(Settings.Output.GEN_DIR, "Trailing.txt"); - pw.println("+Trailing+Starter"); - bf.showSetNames(pw, new UnicodeSet(trailing).retainAll(starter)); - pw.println("+Trailing-Starter"); - bf.showSetNames(pw, new UnicodeSet(trailing).removeAll(starter)); - pw.println("-Trailing-Starter"); - bf.showSetNames(pw, new UnicodeSet(trailing).complement().removeAll(starter)); - pw.println("+Trailing+Leading"); - bf.showSetNames(pw, new UnicodeSet(trailing).retainAll(leading)); - pw.println("+Trailing-Leading"); - bf.showSetNames(pw, new UnicodeSet(trailing).removeAll(leading)); - pw.close(); - } - - /* - * int icuType; - int toolType; - Collection icuAliases; - Collection toolAliases; - String firstDiffICU; - String firstDiffTool; - String firstDiffCP; - String icuProp; - String toolProp; - - */ - - private static void testProperty(String prop, int typeFilter) { - final UnicodeProperty icuProp = icuFactory.getProperty(prop); - final int icuType = icuProp.getType(); - - if (typeFilter >= 0 && icuType != typeFilter) { - return; - } - - System.out.println(); - System.out.println("Testing: " + prop); - final UnicodeProperty toolProp = toolFactory.getProperty(prop); - - final int toolType = toolProp.getType(); - if (icuType != toolType) { - System.out.println( - "FAILURE Type: ICU: " - + UnicodeProperty.getTypeName(icuType) - + "\tTool: " - + UnicodeProperty.getTypeName(toolType)); - } - - Collection icuAliases = icuProp.getNameAliases(new ArrayList()); - Collection toolAliases = toolProp.getNameAliases(new ArrayList()); - System.out.println(showDifferences("Aliases", "ICU", icuAliases, "Tool", toolAliases)); - - icuAliases = icuProp.getAvailableValues(new ArrayList()); - toolAliases = toolProp.getAvailableValues(new ArrayList()); - System.out.println( - showDifferences("Value Aliases", "ICU", icuAliases, "Tool", toolAliases)); - - // TODO do property value aliases - itemFailures.clear(); - String firstDiffICU = null, firstDiffTool = null, firstDiffCP = null; - for (int i = 0; i <= 0x10FFFF; ++i) { - /*if (i == 0x0237) { - System.out.println(); - } - */ - final String icuValue = icuProp.getValue(i); - final String toolValue = toolProp.getValue(i); - if (!equals(icuValue, toolValue)) { - itemFailures.add(i); - if (firstDiffCP == null) { - firstDiffICU = icuValue; - firstDiffTool = toolValue; - firstDiffCP = Utility.hex(i); - } - } - } - if (itemFailures.size() != 0) { - System.out.println("FAILURE " + itemFailures.size() + " Differences: "); - System.out.println(itemFailures.toPattern(true)); - if (firstDiffICU != null) { - firstDiffICU = BagFormatter.hex.transliterate(firstDiffICU); - } - if (firstDiffTool != null) { - firstDiffTool = BagFormatter.hex.transliterate(firstDiffTool); - } - System.out.println( - firstDiffCP + "\tICU: <" + firstDiffICU + ">\tTool: <" + firstDiffTool + ">"); - } - System.out.println("done"); - - // do values later, and their aliases - /* - System.out.println("-Values"); - UnicodeSet - System.out.println(showDifferences("ICU", availableICU, "Tool", availableTool)); - */ - } - - static boolean equals(Object a, Object b) { - if (a == null) { - return b == null; - } - return a.equals(b); - } - - public static String showDifferences( - String title, String name1, Collection set1, String name2, Collection set2) { - - final Collection temp = new TreeSet(set1); - temp.retainAll(set2); - - if (set1.size() == temp.size()) { - return title + ": " + name1 + " == " + name2 + ": " + bf.join(set1); - } - - final StringBuffer result = new StringBuffer(); - result.append(title + "\tFAILURE\n"); - result.append("\t" + name1 + " = " + bf.join(set1) + "\n"); - result.append("\t" + name2 + " = " + bf.join(set2) + "\n"); - - // damn'd collection doesn't have a clone, so - // we go with Set, even though that - // may not preserve order and duplicates - if (temp.size() != 0) { - result.append("\t" + name2 + " & " + name1 + ":\n"); - result.append("\t" + bf.join(temp)); - result.append("\n"); - } - - temp.clear(); - temp.addAll(set1); - temp.removeAll(set2); - if (temp.size() != 0) { - result.append("\t" + name1 + " - " + name2 + ":\n"); - result.append("\t" + bf.join(temp)); - result.append("\n"); - } - - temp.clear(); - temp.addAll(set2); - temp.removeAll(set1); - if (temp.size() != 0) { - result.append("\t" + name2 + " - " + name1 + ":\n"); - result.append("\t" + bf.join(temp)); - result.append("\n"); - } - - return result.toString(); - } -} diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/TestData.java b/unicodetools/src/main/java/org/unicode/text/UCD/TestData.java index 3a806da093..0b7f08399e 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/TestData.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/TestData.java @@ -42,8 +42,8 @@ import org.unicode.cldr.draft.FileUtilities; import org.unicode.cldr.util.Counter; import org.unicode.cldr.util.Pair; -import org.unicode.jsp.ICUPropertyFactory; import org.unicode.props.BagFormatter; +import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.UnicodeProperty; import org.unicode.text.utility.Settings; import org.unicode.text.utility.UTF32; @@ -191,7 +191,7 @@ public static void main(String[] args) throws IOException { foo(); System.out.println("main: " + Default.getDate()); - upf = ICUPropertyFactory.make(); + upf = IndexUnicodeProperties.make(); System.out.println("after factory: " + Default.getDate()); showPropDiff( diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java index 8b842ed96a..82f7f53503 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java @@ -31,7 +31,6 @@ import org.unicode.cldr.util.Tabber; import org.unicode.cldr.util.Tabber.HTMLTabber; import org.unicode.cldr.util.props.UnicodeLabel; -import org.unicode.jsp.ICUPropertyFactory; import org.unicode.props.BagFormatter; import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.IndexUnicodeProperties.DefaultValueType; @@ -43,7 +42,6 @@ public class TestUnicodeInvariants { private static final boolean DEBUG = false; - private static final boolean ICU_VERSION = false; // ignore the versions if this is true private static final Factory LATEST_PROPS = getProperties(Settings.latestVersion); private static final boolean SHOW_LOOKUP = false; private static int showRangeLimit = 20; @@ -1988,7 +1986,7 @@ private static int scan(UnicodeSet allowed, CharSequence line, int start, boolea } private static Factory getProperties(final String version) { - return ICU_VERSION ? ICUPropertyFactory.make() : ToolUnicodePropertySource.make(version); + return ToolUnicodePropertySource.make(version); } // Some of our parse exceptions are thrown with a parse position before the problem. diff --git a/unicodetools/src/main/java/org/unicode/text/tools/VerifyXmlUcd.java b/unicodetools/src/main/java/org/unicode/text/tools/VerifyXmlUcd.java index 13e80b92a4..3c03767347 100644 --- a/unicodetools/src/main/java/org/unicode/text/tools/VerifyXmlUcd.java +++ b/unicodetools/src/main/java/org/unicode/text/tools/VerifyXmlUcd.java @@ -19,7 +19,6 @@ import java.util.regex.Pattern; import org.unicode.cldr.util.XMLFileReader; import org.unicode.cldr.util.XPathParts; -import org.unicode.jsp.ICUPropertyFactory; import org.unicode.props.UnicodeProperty; import org.unicode.props.UnicodeProperty.Factory; import org.unicode.text.UCD.Default; @@ -27,16 +26,12 @@ import org.unicode.text.utility.Settings; public class VerifyXmlUcd { - public static final boolean USE_ICU = false; public static final boolean ABBREVIATED = true; private static Factory factory; static Factory getFactory() { if (factory == null) { - factory = - USE_ICU - ? ICUPropertyFactory.make() - : ToolUnicodePropertySource.make(Default.ucdVersion()); + factory = ToolUnicodePropertySource.make(Default.ucdVersion()); } return factory; } @@ -343,11 +338,11 @@ private String matchEricsValues(int cp, String property, UnicodeProperty toolPro // get my values if (toolProperty == null) { - return USE_ICU ? null : "MISSING"; + return "MISSING"; } String toolValue = toolProperty.getValue(cp, true); if (toolValue == null) { - return USE_ICU ? null : ""; // for ICU, only test a subset + return ""; } final int type = toolProperty.getType(); diff --git a/unicodetools/src/main/java/org/unicode/text/utility/TestUtility.java b/unicodetools/src/main/java/org/unicode/text/utility/TestUtility.java index 483dad07ab..13736b638e 100644 --- a/unicodetools/src/main/java/org/unicode/text/utility/TestUtility.java +++ b/unicodetools/src/main/java/org/unicode/text/utility/TestUtility.java @@ -34,7 +34,6 @@ import java.util.zip.GZIPInputStream; import java.util.zip.GZIPOutputStream; import org.unicode.cldr.util.Counter; -import org.unicode.jsp.ICUPropertyFactory; import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.UcdProperty; import org.unicode.props.UnicodeProperty; @@ -78,7 +77,7 @@ public static void main(String[] args) throws Exception { // if (true) return; // UnicodeLabel ul; - final ICUPropertyFactory p = ICUPropertyFactory.make(); + final var p = IndexUnicodeProperties.make(); total = 0; final BreakIterator bk = BreakIterator.getWordInstance(Locale.ENGLISH); final Matcher nameMatch = Pattern.compile("Name").matcher(""); diff --git a/unicodetools/src/main/java/org/unicode/tools/TestSegments.java b/unicodetools/src/main/java/org/unicode/tools/TestSegments.java index ead5331b79..0e26ab2c2c 100644 --- a/unicodetools/src/main/java/org/unicode/tools/TestSegments.java +++ b/unicodetools/src/main/java/org/unicode/tools/TestSegments.java @@ -19,7 +19,7 @@ import org.unicode.cldr.util.CLDRPaths; import org.unicode.cldr.util.CldrUtility; import org.unicode.cldr.util.Log; -import org.unicode.jsp.ICUPropertyFactory; +import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.RandomStringGenerator; import org.unicode.props.UnicodeProperty; @@ -67,7 +67,7 @@ public static void main(String[] args) throws IOException { args = new String[] {"GraphemeClusterBreak", "LineBreak", "SentenceBreak", "WordBreak"}; List testChoice = Arrays.asList(args); - UnicodeProperty.Factory propFactory = ICUPropertyFactory.make(); + UnicodeProperty.Factory propFactory = IndexUnicodeProperties.make(); // grab the rules, build a RuleList, and run against the test samples. diff --git a/unicodetools/src/test/java/org/unicode/draft/TestCompressed.java b/unicodetools/src/test/java/org/unicode/draft/TestCompressed.java index 3aea9efe04..70afbcccc7 100644 --- a/unicodetools/src/test/java/org/unicode/draft/TestCompressed.java +++ b/unicodetools/src/test/java/org/unicode/draft/TestCompressed.java @@ -21,7 +21,7 @@ import org.junit.jupiter.api.Test; import org.unicode.draft.UnicodeDataInput.StringReader; import org.unicode.draft.UnicodeDataOutput.StringWriter; -import org.unicode.jsp.ICUPropertyFactory; +import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.UnicodeProperty; import org.unicode.unittest.TestFmwkMinusMinus; @@ -401,7 +401,7 @@ private void writeObject(Object item, DataOutput out) throws IOException { */ @Test public void TestUnicodePropAccess() throws IOException { - final UnicodeProperty.Factory f = ICUPropertyFactory.make(); + final UnicodeProperty.Factory f = IndexUnicodeProperties.make(); for (final String property : (Iterable) f.getAvailableNames()) { final UnicodeProperty prop = f.getProperty(property); if (prop.isType(UnicodeProperty.STRING_OR_MISC_MASK)) { diff --git a/unicodetools/src/test/java/org/unicode/propstest/CheckProperties.java b/unicodetools/src/test/java/org/unicode/propstest/CheckProperties.java index 8ceb0ef4fa..0596c83745 100644 --- a/unicodetools/src/test/java/org/unicode/propstest/CheckProperties.java +++ b/unicodetools/src/test/java/org/unicode/propstest/CheckProperties.java @@ -28,7 +28,6 @@ import org.unicode.cldr.util.With; import org.unicode.draft.UnicodeDataOutput; import org.unicode.draft.UnicodeDataOutput.ItemWriter; -import org.unicode.jsp.ICUPropertyFactory; import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.PropertyNames; import org.unicode.props.PropertyNames.NameMatcher; @@ -48,16 +47,12 @@ public class CheckProperties { private static final int DEBUG_CODE_POINT = 0x0600; - private static final boolean LATEST_ICU = true; - static LinkedHashSet PROPNAMEDIFFERENCES = new LinkedHashSet(); static LinkedHashSet SKIPPING = new LinkedHashSet(); - static LinkedHashSet NOT_IN_ICU = new LinkedHashSet(); enum Action { SHOW, COMPARE, - ICU, EMPTY, INFO, SPACES, @@ -221,16 +216,6 @@ public static void main(String[] args) throws Exception { showSummary(summary); } break; - case ICU: - { - out.println("Property\tICU-Value\tDirect-Value\tChars-Affected"); - final Set summary = new LinkedHashSet(); - for (final UcdProperty prop : values) { - compareICU(prop, LATEST_ICU ? latest : last, summary); - } - showSummary(summary); - } - break; case DEFAULTS: for (final UcdProperty prop : values) { showDefaults(prop); @@ -309,7 +294,6 @@ public static void main(String[] args) throws Exception { showInfo("No Differences", SKIPPING, out); showInfo("Property Enum Canonical Form wrong", PROPNAMEDIFFERENCES, outLog); - showInfo("Not In ICU", NOT_IN_ICU, outLog); showInfo("Cache File Sizes", latest.getCacheFileSize().entrySet(), outLog); final Set>> dataLoadingErrors = @@ -563,39 +547,6 @@ public static void checkEmpty(IndexUnicodeProperties latest, UcdProperty prop) { // } } - private static void compareICU( - UcdProperty prop, IndexUnicodeProperties direct, Set summary) { - PropertyNames names = prop.getNames(); - if (VERBOSE) { - System.out.println(prop); - } - if (prop == UcdProperty.Unicode_1_Name) { - NOT_IN_ICU.add(prop.toString()); - return; - } - - final ICUPropertyFactory propFactory = ICUPropertyFactory.make(); - final UnicodeProperty icuProp = propFactory.getProperty(prop.toString()); - if (icuProp == null) { - NOT_IN_ICU.add(prop.toString()); - return; - } - final UnicodeMap icuMap = icuProp.getUnicodeMap(); - if (prop == UcdProperty.Numeric_Value) { - icuMap.setMissing("NaN"); - } - - final UnicodeMap directMap = direct.load(prop); - showChanges( - prop, - new UnicodeSet("[^[:cn:][:co:][:cs:]]"), - null, - icuMap, - direct, - directMap, - summary); - } - private static void addAll(UnicodeSet toSet, UnicodeSet set) { if (set.contains('\u5427')) { final int y = 3;