ICU-62141.0.1.tar.gz

author Apple <opensource@apple.com>

Wed, 29 Jan 2020 22:34:02 +0000 (22:34 +0000)

committer Apple <opensource@apple.com>

Wed, 29 Jan 2020 22:34:02 +0000 (22:34 +0000)
author Apple <opensource@apple.com>
Wed, 29 Jan 2020 22:34:02 +0000 (22:34 +0000)
committer Apple <opensource@apple.com>
Wed, 29 Jan 2020 22:34:02 +0000 (22:34 +0000)
diff --git a/icuSources/i18n/decimfmtimpl.cpp b/icuSources/i18n/decimfmtimpl.cpp

index 0a0bedc64e1b95fc98808628f26474e8356c55ba..789250cf56ca998db00a02300d7ec4e25ed26d38 100644 (file)
--- a/icuSources/i18n/decimfmtimpl.cpp
+++ b/icuSources/i18n/decimfmtimpl.cpp
@@ -587,6 +587,7 @@ DecimalFormatImpl::initVisibleDigitsWithExponent(
          return digits;
      }
      if (fUseScientific) {
+        digits.setFormatFullPrecision(fFormatFullPrecision); // Apple
          fEffPrecision.initVisibleDigitsWithExponent(
                  number, digits, status);
      } else {
diff --git a/icuSources/i18n/inputext.cpp b/icuSources/i18n/inputext.cpp

index 2d4f8a388af6126f69bae095dc84c7158f2b38aa..e74d636c76c3c61c9c2631d512d2d979073dc0ca 100644 (file)
--- a/icuSources/i18n/inputext.cpp
+++ b/icuSources/i18n/inputext.cpp
@@ -75,7 +75,8 @@ UBool InputText::isSet() const
  
  /**
  *  MungeInput - after getting a set of raw input data to be analyzed, preprocess
-*               it by removing what appears to be html markup.
+*               it by removing what appears to be html markup. Currently only used
+*               by CharsetDetector::detectAll.
  * 
  * @internal
  */
@@ -84,6 +85,7 @@ void InputText::MungeInput(UBool fStripTags) {
      int     dsti = 0;
      uint8_t b;
      bool    inMarkup = FALSE;
+    bool    inCSSDecl = FALSE;
      int32_t openTags = 0;
      int32_t badTags  = 0;
  
@@ -98,22 +100,32 @@ void InputText::MungeInput(UBool fStripTags) {
          for (srci = 0; srci < fRawLength && dsti < BUFFER_SIZE; srci += 1) {
              b = fRawInput[srci];
  
-            if (b == (uint8_t)0x3C) { /* Check for the ASCII '<' */
+            if ((b == (uint8_t)0x3C) && !inCSSDecl) { /* Check for the ASCII '<' */
                  if (inMarkup) {
                      badTags += 1;
                  }
-
                  inMarkup = TRUE;
                  openTags += 1;
              }
  
-            if (! inMarkup) {
+            if ((b == (uint8_t)0x7B) && !inMarkup) { /* Check for the ASCII '{' */
+                if (inCSSDecl) {
+                    badTags += 1;
+                }
+                inCSSDecl = TRUE;
+                openTags += 1;
+            }
+
+            if (!inMarkup && !inCSSDecl) {
                  fInputBytes[dsti++] = b;
              }
  
              if (b == (uint8_t)0x3E) { /* Check for the ASCII '>' */
                  inMarkup = FALSE;
              }
+            if (b == (uint8_t)0x7D) { /* Check for the ASCII '}' */
+                inCSSDecl = FALSE;
+            }
          }
  
          fInputLen = dsti;
diff --git a/icuSources/i18n/precision.cpp b/icuSources/i18n/precision.cpp

index 17517cbba730127633acd29856657a6a0710ca8d..a2c530dafe2643805355d2d4d7960b01ab522988 100644 (file)
--- a/icuSources/i18n/precision.cpp
+++ b/icuSources/i18n/precision.cpp
@@ -423,6 +423,7 @@ ScientificPrecision::initVisibleDigitsWithExponent(
          return digits;
      }
      DigitList digitList;
+    digitList.fFormatFullPrecision = digits.fMantissa.formatFullPrecision(); // Apples
      digitList.set(value);
      return initVisibleDigitsWithExponent(digitList, digits, status);
  }
diff --git a/icuSources/i18n/unicode/ucsdet.h b/icuSources/i18n/unicode/ucsdet.h

index 2a955ea0472220f410c7dde8aab3eb412db7f2e2..892f3ee41265db4c6e970e5e9139dd1be6202ba5 100644 (file)
--- a/icuSources/i18n/unicode/ucsdet.h
+++ b/icuSources/i18n/unicode/ucsdet.h
@@ -354,7 +354,8 @@ ucsdet_getAllDetectableCharsets(const UCharsetDetector *ucsd,  UErrorCode *statu
    *  Test whether input filtering is enabled for this charset detector.
    *  Input filtering removes text that appears to be HTML or xml
    *  markup from the input before applying the code page detection
-  *  heuristics.
+  *  heuristics. Apple addition per <rdar://problem/48093252>: Will also
+  *  remove text that appears to be CSS declaration blocks.
    *
    *  @param ucsd  The charset detector to check.
    *  @return TRUE if filtering is enabled.
@@ -369,6 +370,8 @@ ucsdet_isInputFilterEnabled(const UCharsetDetector *ucsd);
   * Enable filtering of input text. If filtering is enabled,
   * text within angle brackets ("<" and ">") will be removed
   * before detection, which will remove most HTML or xml markup.
+ * Apple addition per <rdar://problem/48093252>: Will also
+ * remove text between '{' and '}', e.g. CSS declaration blocks.
   *
   * @param ucsd   the charset detector to be modified.
   * @param filter <code>true</code> to enable input text filtering.
diff --git a/icuSources/test/cintltst/cnumtst.c b/icuSources/test/cintltst/cnumtst.c

index 2b72aa94d2c8c004f821a5838376146c7a17c16f..360ef7c9528df14e3408aade32b6e2df40b926c3 100644 (file)
--- a/icuSources/test/cintltst/cnumtst.c
+++ b/icuSources/test/cintltst/cnumtst.c
@@ -70,6 +70,7 @@ static void TestParseAltNum(void);
  static void TestParseCurrPatternWithDecStyle(void);
  static void TestParseCases(void);
  static void TestFormatPrecision(void);
+static void TestSciNotationRound(void); // Apple <rdar://problem/49159521>
  
  #define TESTCASE(x) addTest(root, &x, "tsformat/cnumtst/" #x)
  
@@ -107,6 +108,7 @@ void addNumForTest(TestNode** root)
      TESTCASE(TestParseCurrPatternWithDecStyle);
      TESTCASE(TestParseCases);
      TESTCASE(TestFormatPrecision);
+    TESTCASE(TestSciNotationRound);
  }
  
  /* test Parse int 64 */
@@ -3273,4 +3275,41 @@ static void TestFormatPrecision(void) {
      }
  }
  
+// Currently Apple only for <rdar://problem/49159521>
+enum { kBBufMax = 128  };
+static const UChar* pat1 = u"#.##E+00;-#.##E+00";
+static void TestSciNotationRound(void) {
+    UErrorCode status = U_ZERO_ERROR;
+    UNumberFormat* unum = unum_open(UNUM_PATTERN_DECIMAL, NULL, 0, "en_US", NULL, &status);
+    if ( U_FAILURE(status) ) {
+        log_data_err("unum_open UNUM_PATTERN_DECIMAL with null pattern for \"en_US\" fails with %s\n", u_errorName(status));
+    } else {
+        unum_applyPattern(unum, FALSE, pat1, u_strlen(pat1), NULL, &status);
+        if ( U_FAILURE(status) ) {
+            log_err("unum_applyPattern fails with %s\n", u_errorName(status));
+        } else {
+            double value;
+            UChar ubuf[kUBufMax];
+            char bbuf[kBBufMax];
+            int32_t ulen;
+
+            unum_setAttribute(unum, UNUM_ROUNDING_MODE, UNUM_ROUND_HALFUP);
+            unum_setAttribute(unum, UNUM_MIN_FRACTION_DIGITS, 0);
+            unum_setAttribute(unum, UNUM_MAX_FRACTION_DIGITS, 50); // problem happens at 15 or more
+
+            for (value = 10000000000000000000000.0; value < 1000000000000000000000000000000000000000.0; value *= 10.0) {
+                status = U_ZERO_ERROR;
+                ulen = unum_formatDouble(unum, value, ubuf, kUBufMax, NULL, &status);
+                if ( U_FAILURE(status) ) {
+                    printf("unum_formatDouble value %.1f status %s\n", value, u_errorName(status));
+                } else if (u_strncmp(ubuf,u"1E+",3) != 0) {
+                    u_strToUTF8(bbuf, kBBufMax, NULL, ubuf, ulen, &status);
+                    log_err("unum_formatDouble value %.1f expected result to begin with 1E+, got %s\n", value, bbuf);
+                }
+            }
+        }
+        unum_close(unum);
+    }
+}
+
  #endif /* #if !UCONFIG_NO_FORMATTING */
diff --git a/icuSources/test/testdata/format.txt b/icuSources/test/testdata/format.txt

index b3492d51e71a389ac44c4d9c0511294482e50974..2cd8e89542b6bf6735bd077d83bedcb9a8fc76b8 100644 (file)
--- a/icuSources/test/testdata/format.txt
+++ b/icuSources/test/testdata/format.txt
@@ -494,35 +494,35 @@ format:table(nofallback) {
                      "",
                      "PATTERN=G y",
                      "YEAR=8",
-                    "Heisei 8"
+                    "Reiwa 8"
                 },
                 {
                      "en_US@calendar=japanese",         
                      "",
                      "PATTERN=G yy",
                      "YEAR=8",
-                    "Heisei 08"
+                    "Reiwa 08"
                 },
                 {
                      "en_US@calendar=japanese",         
                      "",
                      "PATTERN=G yyy",
                      "YEAR=8",
-                    "Heisei 008"
+                    "Reiwa 008"
                 },
                 {
                      "en_US@calendar=japanese",         
                      "",
                      "PATTERN=G yyyy",
                      "YEAR=8",
-                    "Heisei 0008"
+                    "Reiwa 0008"
                 },
                 {
                      "en_US@calendar=japanese",         
                      "",
                      "PATTERN=G yyyyy",
                      "YEAR=8",
-                    "Heisei 00008"
+                    "Reiwa 00008"
                 },
  
              }
author	Apple <opensource@apple.com>
	Wed, 29 Jan 2020 22:34:02 +0000 (22:34 +0000)
committer	Apple <opensource@apple.com>
	Wed, 29 Jan 2020 22:34:02 +0000 (22:34 +0000)
icuSources/i18n/decimfmtimpl.cpp		patch \| blob \| blame \| history
icuSources/i18n/inputext.cpp		patch \| blob \| blame \| history
icuSources/i18n/precision.cpp		patch \| blob \| blame \| history
icuSources/i18n/unicode/ucsdet.h		patch \| blob \| blame \| history
icuSources/test/cintltst/cnumtst.c		patch \| blob \| blame \| history
icuSources/test/testdata/format.txt		patch \| blob \| blame \| history