Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions icu4c/source/common/static_unicode_sets.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,13 @@ void U_CALLCONV initNumberParseUniSets(UErrorCode& status) {
U_ASSERT(gUnicodeSets[PERCENT_SIGN] != nullptr);
U_ASSERT(gUnicodeSets[PERMILLE_SIGN] != nullptr);

// The following don't currently have parseLenients in data.
U_ASSERT(gUnicodeSets[INFINITY_SIGN] == nullptr);
gUnicodeSets[INFINITY_SIGN] = new UnicodeSet(u"[∞]", status);
U_ASSERT(gUnicodeSets[APPROXIMATELY_SIGN] == nullptr);
// This set of characters was manually curated from the
// values of the approximatelySign element of CLDR common/main/*.xml files.
gUnicodeSets[APPROXIMATELY_SIGN] = new UnicodeSet(u"[∼~≈≃約]", status);
if (U_FAILURE(status)) { return; }

U_ASSERT(gUnicodeSets[DOLLAR_SIGN] != nullptr);
Expand Down
1 change: 1 addition & 0 deletions icu4c/source/common/static_unicode_sets.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ enum Key {
PERCENT_SIGN,
PERMILLE_SIGN,
INFINITY_SIGN,
APPROXIMATELY_SIGN,

// Currency Symbols
DOLLAR_SIGN,
Expand Down
8 changes: 8 additions & 0 deletions icu4c/source/i18n/numparse_affixes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp,
case TYPE_PLUS_SIGN:
addMatcher(fWarehouse.plusSign());
break;
case TYPE_APPROXIMATELY_SIGN:
addMatcher(fWarehouse.approximatelySign());
break;
case TYPE_PERCENT:
addMatcher(fWarehouse.percent());
break;
Expand All @@ -97,6 +100,7 @@ void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp,
case TYPE_CURRENCY_TRIPLE:
case TYPE_CURRENCY_QUAD:
case TYPE_CURRENCY_QUINT:
case TYPE_CURRENCY_OVERFLOW:
// All currency symbols use the same matcher
addMatcher(fWarehouse.currency(status));
break;
Expand Down Expand Up @@ -142,6 +146,10 @@ NumberParseMatcher& AffixTokenMatcherWarehouse::plusSign() {
return fPlusSign = {fSetupData->dfs, true};
}

NumberParseMatcher& AffixTokenMatcherWarehouse::approximatelySign() {
return fApproximatelySign = {fSetupData->dfs, true};
}

NumberParseMatcher& AffixTokenMatcherWarehouse::percent() {
return fPercent = {fSetupData->dfs};
}
Expand Down
3 changes: 3 additions & 0 deletions icu4c/source/i18n/numparse_affixes.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ class U_I18N_API AffixTokenMatcherWarehouse : public UMemory {

NumberParseMatcher& plusSign();

NumberParseMatcher& approximatelySign();

NumberParseMatcher& percent();

NumberParseMatcher& permille();
Expand All @@ -108,6 +110,7 @@ class U_I18N_API AffixTokenMatcherWarehouse : public UMemory {
// NOTE: These are default-constructed and should not be used until initialized.
MinusSignMatcher fMinusSign;
PlusSignMatcher fPlusSign;
ApproximatelySignMatcher fApproximatelySign;
PercentMatcher fPercent;
PermilleMatcher fPermille;
CombinedCurrencyMatcher fCurrency;
Expand Down
2 changes: 2 additions & 0 deletions icu4c/source/i18n/numparse_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ NumberParserImpl::createSimpleParser(const Locale& locale, const UnicodeString&
parser->addMatcher(parser->fLocalMatchers.decimal = {symbols, grouper, parseFlags});
parser->addMatcher(parser->fLocalMatchers.minusSign = {symbols, false});
parser->addMatcher(parser->fLocalMatchers.plusSign = {symbols, false});
parser->addMatcher(parser->fLocalMatchers.approximatelySign = {symbols, false});
parser->addMatcher(parser->fLocalMatchers.percent = {symbols});
parser->addMatcher(parser->fLocalMatchers.permille = {symbols});
parser->addMatcher(parser->fLocalMatchers.nan = {symbols});
Expand Down Expand Up @@ -164,6 +165,7 @@ NumberParserImpl::createParserFromProperties(const number::impl::DecimalFormatPr
if (!isStrict) {
parser->addMatcher(parser->fLocalMatchers.plusSign = {symbols, false});
parser->addMatcher(parser->fLocalMatchers.minusSign = {symbols, false});
parser->addMatcher(parser->fLocalMatchers.approximatelySign = {symbols, false});
}
parser->addMatcher(parser->fLocalMatchers.nan = {symbols});
parser->addMatcher(parser->fLocalMatchers.infinity = {symbols});
Expand Down
1 change: 1 addition & 0 deletions icu4c/source/i18n/numparse_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ class U_I18N_API NumberParserImpl : public MutableMatcherCollection, public UMem
PercentMatcher percent;
PermilleMatcher permille;
PlusSignMatcher plusSign;
ApproximatelySignMatcher approximatelySign;
DecimalMatcher decimal;
ScientificMatcher scientific;
CombinedCurrencyMatcher currency;
Expand Down
17 changes: 17 additions & 0 deletions icu4c/source/i18n/numparse_symbols.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,4 +195,21 @@ void PlusSignMatcher::accept(StringSegment& segment, ParsedNumber& result) const
}


ApproximatelySignMatcher::ApproximatelySignMatcher(
const DecimalFormatSymbols& dfs, bool allowTrailing) :
SymbolMatcher(
dfs.getConstSymbol(DecimalFormatSymbols::kApproximatelySignSymbol),
unisets::APPROXIMATELY_SIGN),
fAllowTrailing(allowTrailing) {
}

bool ApproximatelySignMatcher::isDisabled(const ParsedNumber& result) const {
return !fAllowTrailing && result.seenNumber();
}

void ApproximatelySignMatcher::accept(StringSegment& segment, ParsedNumber& result) const {
result.setCharsConsumed(segment);
}


#endif /* #if !UCONFIG_NO_FORMATTING */
17 changes: 17 additions & 0 deletions icu4c/source/i18n/numparse_symbols.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,23 @@ class U_I18N_API PlusSignMatcher : public SymbolMatcher {
bool fAllowTrailing;
};


// Exported as U_I18N_API for tests
class U_I18N_API ApproximatelySignMatcher : public SymbolMatcher {
public:
ApproximatelySignMatcher() = default; // WARNING: Leaves the object in an unusable state

ApproximatelySignMatcher(const DecimalFormatSymbols& dfs, bool allowTrailing);

protected:
bool isDisabled(const ParsedNumber& result) const override;

void accept(StringSegment& segment, ParsedNumber& result) const override;

private:
bool fAllowTrailing;
};

} // namespace numparse::impl
U_NAMESPACE_END

Expand Down
8 changes: 8 additions & 0 deletions icu4c/source/test/intltest/numbertest_parse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ void NumberParserTest::testBasic() {
{3, u" 1,234", u"a0", 35, 1234.}, // should not hang
{3, u"NaN", u"0", 3, NAN},
{3, u"NaN E5", u"0", 6, NAN},
{3, u"~100", u"~0", 4, 100.0},
{3, u" ~ 100", u"~0", 6, 100.0},
{3, u"≈100", u"~0", 4, 100.0},
{3, u"100≈", u"~0", 3, 100.0},
{3, u"0", u"0", 1, 0.0}};

parse_flags_t parseFlags = PARSE_FLAG_IGNORE_CASE | PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES;
Expand Down Expand Up @@ -180,6 +184,10 @@ void NumberParserTest::testBasic() {
assertEquals("Strict Parse failed: " + message,
cas.expectedResultDouble, resultObject.getDouble(status));
}

if (status.errDataIfFailureAndReset("parsing test failed")) {
continue;
}
}
}

Expand Down
29 changes: 27 additions & 2 deletions icu4c/source/test/intltest/numfmtst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ void NumberFormatTest::runIndexedTest( int32_t index, UBool exec, const char* &n
TESTCASE_AUTO(TestFormatAttributes);
TESTCASE_AUTO(TestFieldPositionIterator);
TESTCASE_AUTO(TestDecimal);
TESTCASE_AUTO(TestDecimalFormatParse7E);
TESTCASE_AUTO(TestCurrencyFractionDigits);
TESTCASE_AUTO(TestExponentParse);
TESTCASE_AUTO(TestExplicitParents);
Expand Down Expand Up @@ -6981,6 +6982,30 @@ void NumberFormatTest::TestDecimal() {

}

void NumberFormatTest::TestDecimalFormatParse7E() {
UErrorCode status = U_ZERO_ERROR;
UnicodeString testdata = u"~";
icu::Formattable result;
icu::DecimalFormat dfmt(testdata, status);
if (U_SUCCESS(status)) {
dfmt.parse(testdata, result, status);
}

// Test basic behavior
status = U_ZERO_ERROR;
dfmt = icu::DecimalFormat(u"~0", status);
ASSERT_SUCCESS(status);
dfmt.parse(u"200", result, status);
ASSERT_EQUALS(status, U_INVALID_FORMAT_ERROR);
status = U_ZERO_ERROR;
dfmt.parse(u"≈200", result, status);
ASSERT_SUCCESS(status);
if (result.getInt64() != 200) {
errln(UnicodeString(u"Got unexpected parse result: ") +
DoubleToUnicodeString(result.getInt64()));
}
}

void NumberFormatTest::TestCurrencyFractionDigits() {
UErrorCode status = U_ZERO_ERROR;
UnicodeString text1, text2;
Expand Down Expand Up @@ -10048,7 +10073,7 @@ void NumberFormatTest::Test13733_StrictAndLenient() {
parsedStrictValue = ca_strict->getNumber().getInt64();
}
assertEquals("Strict parse of " + inputString + " using " + patternString,
parsedStrictValue, cas.expectedStrictParse);
cas.expectedStrictParse, parsedStrictValue);

ppos.setIndex(0);
df.setLenient(true);
Expand All @@ -10058,7 +10083,7 @@ void NumberFormatTest::Test13733_StrictAndLenient() {
parsedLenientValue = ca_lenient->getNumber().getInt64();
}
assertEquals("Lenient parse of " + inputString + " using " + patternString,
parsedLenientValue, cas.expectedLenientParse);
cas.expectedLenientParse, parsedLenientValue);
}
}

Expand Down
1 change: 1 addition & 0 deletions icu4c/source/test/intltest/numfmtst.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ class NumberFormatTest: public CalendarTimeZoneTest {
void TestLenientParse();

void TestDecimal();
void TestDecimalFormatParse7E();
void TestCurrencyFractionDigits();

void TestExponentParse();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3682,6 +3682,32 @@ public void TestLenientSymbolParsing() {
}
}

@Test
public void TestDecimalFormatParse7E() {
String testdata = "~";
DecimalFormat dfmt = new DecimalFormat(testdata);
try {
dfmt.parse(testdata);
errln("parsing ~ should fail with a handled exception");
} catch (ParseException e) {
}

// Test basic behavior
dfmt = new DecimalFormat("~0");
dfmt.setParseStrict(true);
try {
dfmt.parse("200");
errln("parsing 200 should fail");
} catch (ParseException e) {
}
try {
Number result = dfmt.parse("≈200");
assertEquals("parsing with approximately should succeed", result.longValue(), 200);
} catch (ParseException e) {
errln(e.toString());
}
}

/*
* Testing currency driven max/min fraction digits problem
* reported by ticket#7282
Expand Down Expand Up @@ -6917,7 +6943,7 @@ public void test13733_StrictAndLenient() {
parsedStrictValue = ca_strict.getNumber().intValue();
}
assertEquals("Strict parse of " + inputString + " using " + patternString,
parsedStrictValue, expectedStrictParse);
expectedStrictParse, parsedStrictValue);

ppos.setIndex(0);
df.setParseStrict(false);
Expand All @@ -6926,7 +6952,7 @@ public void test13733_StrictAndLenient() {
parsedLenientValue = ca_lenient.getNumber().intValue();
}
assertEquals("Strict parse of " + inputString + " using " + patternString,
parsedLenientValue, expectedLenientParse);
expectedLenientParse, parsedLenientValue);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,10 @@ public void testBasic() {
{ 3, " 1,234", "a0", 35, 1234. }, // should not hang
{ 3, "NaN", "0", 3, Double.NaN },
{ 3, "NaN E5", "0", 6, Double.NaN },
{ 3, "~100", "~0", 4, 100.0 },
{ 3, " ~ 100", "~0", 6, 100.0 },
{ 3, "≈100", "~0", 4, 100.0 },
{ 3, "100≈", "~0", 3, 100.0 },
{ 3, "0", "0", 1, 0.0 } };

int parseFlags = ParsingUtils.PARSE_FLAG_IGNORE_CASE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ public static enum Key {
PERCENT_SIGN,
PERMILLE_SIGN,
INFINITY_SIGN,
APPROXIMATELY_SIGN,

// Currency Symbols
DOLLAR_SIGN,
Expand Down Expand Up @@ -263,7 +264,11 @@ public void put(com.ibm.icu.impl.UResource.Key key, Value value, boolean noFallb
assert unicodeSets.containsKey(Key.PERCENT_SIGN);
assert unicodeSets.containsKey(Key.PERMILLE_SIGN);

// The following don't currently have parseLenients in data.
unicodeSets.put(Key.INFINITY_SIGN, new UnicodeSet("[∞]").freeze());
// This set of characters was manually curated from the
// values of the approximatelySign element of CLDR common/main/*.xml files.
unicodeSets.put(Key.APPROXIMATELY_SIGN, new UnicodeSet("[∼~≈≃約]").freeze());

assert unicodeSets.containsKey(Key.DOLLAR_SIGN);
assert unicodeSets.containsKey(Key.POUND_SIGN);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ && length() > 0
case AffixUtils.TYPE_PLUS_SIGN:
addMatcher(factory.plusSign());
break;
case AffixUtils.TYPE_APPROXIMATELY_SIGN:
addMatcher(factory.approximatelySign());
break;
case AffixUtils.TYPE_PERCENT:
addMatcher(factory.percent());
break;
Expand All @@ -87,6 +90,7 @@ && length() > 0
case AffixUtils.TYPE_CURRENCY_TRIPLE:
case AffixUtils.TYPE_CURRENCY_QUAD:
case AffixUtils.TYPE_CURRENCY_QUINT:
case AffixUtils.TYPE_CURRENCY_OVERFLOW:
// All currency symbols use the same matcher
addMatcher(factory.currency());
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ public PlusSignMatcher plusSign() {
return PlusSignMatcher.getInstance(symbols, true);
}

public ApproximatelySignMatcher approximatelySign() {
return ApproximatelySignMatcher.getInstance(symbols, true);
}

public PercentMatcher percent() {
return PercentMatcher.getInstance(symbols);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package com.ibm.icu.impl.number.parse;

import com.ibm.icu.impl.StaticUnicodeSets;
import com.ibm.icu.impl.StringSegment;
import com.ibm.icu.text.DecimalFormatSymbols;

/**
* @author sffc
*
*/
public class ApproximatelySignMatcher extends SymbolMatcher {

private static final ApproximatelySignMatcher DEFAULT = new ApproximatelySignMatcher(false);
private static final ApproximatelySignMatcher DEFAULT_ALLOW_TRAILING = new ApproximatelySignMatcher(true);

public static ApproximatelySignMatcher getInstance(DecimalFormatSymbols symbols, boolean allowTrailing) {
String symbolString = symbols.getApproximatelySignString();
if (DEFAULT.uniSet.contains(symbolString)) {
return allowTrailing ? DEFAULT_ALLOW_TRAILING : DEFAULT;
} else {
return new ApproximatelySignMatcher(symbolString, allowTrailing);
}
}

private final boolean allowTrailing;

private ApproximatelySignMatcher(String symbolString, boolean allowTrailing) {
super(symbolString, DEFAULT.uniSet);
this.allowTrailing = allowTrailing;
}

private ApproximatelySignMatcher(boolean allowTrailing) {
super(StaticUnicodeSets.Key.APPROXIMATELY_SIGN);
this.allowTrailing = allowTrailing;
}

@Override
protected boolean isDisabled(ParsedNumber result) {
return !allowTrailing && result.seenNumber();
}

@Override
protected void accept(StringSegment segment, ParsedNumber result) {
result.setCharsConsumed(segment);
}

@Override
public String toString() {
return "<ApproximatelySignMatcher>";
}

}
Loading