From 249c4c5ea9376c24572daf9c2effa7484a282f14 Mon Sep 17 00:00:00 2001 From: Apple Date: Wed, 29 Jan 2020 22:34:02 +0000 Subject: [PATCH] ICU-62141.0.1.tar.gz --- icuSources/i18n/decimfmtimpl.cpp | 1 + icuSources/i18n/inputext.cpp | 20 ++++++++++++--- icuSources/i18n/precision.cpp | 1 + icuSources/i18n/unicode/ucsdet.h | 5 +++- icuSources/test/cintltst/cnumtst.c | 39 +++++++++++++++++++++++++++++ icuSources/test/testdata/format.txt | 10 ++++---- 6 files changed, 66 insertions(+), 10 deletions(-) diff --git a/icuSources/i18n/decimfmtimpl.cpp b/icuSources/i18n/decimfmtimpl.cpp index 0a0bedc6..789250cf 100644 --- a/icuSources/i18n/decimfmtimpl.cpp +++ b/icuSources/i18n/decimfmtimpl.cpp @@ -587,6 +587,7 @@ DecimalFormatImpl::initVisibleDigitsWithExponent( return digits; } if (fUseScientific) { + digits.setFormatFullPrecision(fFormatFullPrecision); // Apple fEffPrecision.initVisibleDigitsWithExponent( number, digits, status); } else { diff --git a/icuSources/i18n/inputext.cpp b/icuSources/i18n/inputext.cpp index 2d4f8a38..e74d636c 100644 --- a/icuSources/i18n/inputext.cpp +++ b/icuSources/i18n/inputext.cpp @@ -75,7 +75,8 @@ UBool InputText::isSet() const /** * MungeInput - after getting a set of raw input data to be analyzed, preprocess -* it by removing what appears to be html markup. +* it by removing what appears to be html markup. Currently only used +* by CharsetDetector::detectAll. * * @internal */ @@ -84,6 +85,7 @@ void InputText::MungeInput(UBool fStripTags) { int dsti = 0; uint8_t b; bool inMarkup = FALSE; + bool inCSSDecl = FALSE; int32_t openTags = 0; int32_t badTags = 0; @@ -98,22 +100,32 @@ void InputText::MungeInput(UBool fStripTags) { for (srci = 0; srci < fRawLength && dsti < BUFFER_SIZE; srci += 1) { b = fRawInput[srci]; - if (b == (uint8_t)0x3C) { /* Check for the ASCII '<' */ + if ((b == (uint8_t)0x3C) && !inCSSDecl) { /* Check for the ASCII '<' */ if (inMarkup) { badTags += 1; } - inMarkup = TRUE; openTags += 1; } - if (! inMarkup) { + if ((b == (uint8_t)0x7B) && !inMarkup) { /* Check for the ASCII '{' */ + if (inCSSDecl) { + badTags += 1; + } + inCSSDecl = TRUE; + openTags += 1; + } + + if (!inMarkup && !inCSSDecl) { fInputBytes[dsti++] = b; } if (b == (uint8_t)0x3E) { /* Check for the ASCII '>' */ inMarkup = FALSE; } + if (b == (uint8_t)0x7D) { /* Check for the ASCII '}' */ + inCSSDecl = FALSE; + } } fInputLen = dsti; diff --git a/icuSources/i18n/precision.cpp b/icuSources/i18n/precision.cpp index 17517cbb..a2c530da 100644 --- a/icuSources/i18n/precision.cpp +++ b/icuSources/i18n/precision.cpp @@ -423,6 +423,7 @@ ScientificPrecision::initVisibleDigitsWithExponent( return digits; } DigitList digitList; + digitList.fFormatFullPrecision = digits.fMantissa.formatFullPrecision(); // Apples digitList.set(value); return initVisibleDigitsWithExponent(digitList, digits, status); } diff --git a/icuSources/i18n/unicode/ucsdet.h b/icuSources/i18n/unicode/ucsdet.h index 2a955ea0..892f3ee4 100644 --- a/icuSources/i18n/unicode/ucsdet.h +++ b/icuSources/i18n/unicode/ucsdet.h @@ -354,7 +354,8 @@ ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd, UErrorCode *statu * Test whether input filtering is enabled for this charset detector. * Input filtering removes text that appears to be HTML or xml * markup from the input before applying the code page detection - * heuristics. + * heuristics. Apple addition per : Will also + * remove text that appears to be CSS declaration blocks. * * @param ucsd The charset detector to check. * @return TRUE if filtering is enabled. @@ -369,6 +370,8 @@ ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd); * Enable filtering of input text. If filtering is enabled, * text within angle brackets ("<" and ">") will be removed * before detection, which will remove most HTML or xml markup. + * Apple addition per : Will also + * remove text between '{' and '}', e.g. CSS declaration blocks. * * @param ucsd the charset detector to be modified. * @param filter true to enable input text filtering. diff --git a/icuSources/test/cintltst/cnumtst.c b/icuSources/test/cintltst/cnumtst.c index 2b72aa94..360ef7c9 100644 --- a/icuSources/test/cintltst/cnumtst.c +++ b/icuSources/test/cintltst/cnumtst.c @@ -70,6 +70,7 @@ static void TestParseAltNum(void); static void TestParseCurrPatternWithDecStyle(void); static void TestParseCases(void); static void TestFormatPrecision(void); +static void TestSciNotationRound(void); // Apple #define TESTCASE(x) addTest(root, &x, "tsformat/cnumtst/" #x) @@ -107,6 +108,7 @@ void addNumForTest(TestNode** root) TESTCASE(TestParseCurrPatternWithDecStyle); TESTCASE(TestParseCases); TESTCASE(TestFormatPrecision); + TESTCASE(TestSciNotationRound); } /* test Parse int 64 */ @@ -3273,4 +3275,41 @@ static void TestFormatPrecision(void) { } } +// Currently Apple only for +enum { kBBufMax = 128 }; +static const UChar* pat1 = u"#.##E+00;-#.##E+00"; +static void TestSciNotationRound(void) { + UErrorCode status = U_ZERO_ERROR; + UNumberFormat* unum = unum_open(UNUM_PATTERN_DECIMAL, NULL, 0, "en_US", NULL, &status); + if ( U_FAILURE(status) ) { + log_data_err("unum_open UNUM_PATTERN_DECIMAL with null pattern for \"en_US\" fails with %s\n", u_errorName(status)); + } else { + unum_applyPattern(unum, FALSE, pat1, u_strlen(pat1), NULL, &status); + if ( U_FAILURE(status) ) { + log_err("unum_applyPattern fails with %s\n", u_errorName(status)); + } else { + double value; + UChar ubuf[kUBufMax]; + char bbuf[kBBufMax]; + int32_t ulen; + + unum_setAttribute(unum, UNUM_ROUNDING_MODE, UNUM_ROUND_HALFUP); + unum_setAttribute(unum, UNUM_MIN_FRACTION_DIGITS, 0); + unum_setAttribute(unum, UNUM_MAX_FRACTION_DIGITS, 50); // problem happens at 15 or more + + for (value = 10000000000000000000000.0; value < 1000000000000000000000000000000000000000.0; value *= 10.0) { + status = U_ZERO_ERROR; + ulen = unum_formatDouble(unum, value, ubuf, kUBufMax, NULL, &status); + if ( U_FAILURE(status) ) { + printf("unum_formatDouble value %.1f status %s\n", value, u_errorName(status)); + } else if (u_strncmp(ubuf,u"1E+",3) != 0) { + u_strToUTF8(bbuf, kBBufMax, NULL, ubuf, ulen, &status); + log_err("unum_formatDouble value %.1f expected result to begin with 1E+, got %s\n", value, bbuf); + } + } + } + unum_close(unum); + } +} + #endif /* #if !UCONFIG_NO_FORMATTING */ diff --git a/icuSources/test/testdata/format.txt b/icuSources/test/testdata/format.txt index b3492d51..2cd8e895 100644 --- a/icuSources/test/testdata/format.txt +++ b/icuSources/test/testdata/format.txt @@ -494,35 +494,35 @@ format:table(nofallback) { "", "PATTERN=G y", "YEAR=8", - "Heisei 8" + "Reiwa 8" }, { "en_US@calendar=japanese", "", "PATTERN=G yy", "YEAR=8", - "Heisei 08" + "Reiwa 08" }, { "en_US@calendar=japanese", "", "PATTERN=G yyy", "YEAR=8", - "Heisei 008" + "Reiwa 008" }, { "en_US@calendar=japanese", "", "PATTERN=G yyyy", "YEAR=8", - "Heisei 0008" + "Reiwa 0008" }, { "en_US@calendar=japanese", "", "PATTERN=G yyyyy", "YEAR=8", - "Heisei 00008" + "Reiwa 00008" }, } -- 2.47.2