ICU-531.31.tar.gz

[apple/icu.git] / icuSources / i18n / uspoof_conf.cpp
diff --git a/icuSources/i18n/uspoof_conf.cpp b/icuSources/i18n/uspoof_conf.cpp

index 7a8f2070764693478d53fd25c5a2bd3d7ae3e5e5..2b2db9fe661c2bad65101d41ece80029bcbf895f 100644 (file)
--- a/icuSources/i18n/uspoof_conf.cpp
+++ b/icuSources/i18n/uspoof_conf.cpp
@@ -1,7 +1,7 @@
  /*
  ******************************************************************************
  *
-*   Copyright (C) 2008-2010, International Business Machines
+*   Copyright (C) 2008-2013, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  ******************************************************************************
@@ -219,6 +219,7 @@ void ConfusabledataBuilder::build(const char * confusables, int32_t confusablesL
      fInput = static_cast<UChar *>(uprv_malloc((inputLen+1) * sizeof(UChar)));
      if (fInput == NULL) {
          status = U_MEMORY_ALLOCATION_ERROR;
+        return;
      }
      u_strFromUTF8(fInput, inputLen+1, NULL, confusables, confusablesLen, &status);
  
@@ -232,19 +233,21 @@ void ConfusabledataBuilder::build(const char * confusables, int32_t confusablesL
      //   Capture Group 8:  A syntactically invalid line.  Anything that didn't match before.
      // Example Line from the confusables.txt source file:
      //   "1D702 ;      006E 0329 ;     SL      # MATHEMATICAL ITALIC SMALL ETA ... "
-    fParseLine = uregex_openC(
+    UnicodeString pattern(
          "(?m)^[ \\t]*([0-9A-Fa-f]+)[ \\t]+;"      // Match the source char
          "[ \\t]*([0-9A-Fa-f]+"                    // Match the replacement char(s)
             "(?:[ \\t]+[0-9A-Fa-f]+)*)[ \\t]*;"    //     (continued)
          "\\s*(?:(SL)|(SA)|(ML)|(MA))"             // Match the table type
          "[ \\t]*(?:#.*?)?$"                       // Match any trailing #comment
          "|^([ \\t]*(?:#.*?)?)$"       // OR match empty lines or lines with only a #comment
-        "|^(.*?)$",                   // OR match any line, which catches illegal lines.
-        0, NULL, &status);
+        "|^(.*?)$", -1, US_INV);      // OR match any line, which catches illegal lines.
+    // TODO: Why are we using the regex C API here? C++ would just take UnicodeString...
+    fParseLine = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status);
  
      // Regular expression for parsing a hex number out of a space-separated list of them.
      //   Capture group 1 gets the number, with spaces removed.
-    fParseHexNum = uregex_openC("\\s*([0-9A-F]+)", 0, NULL, &status);
+    pattern = UNICODE_STRING_SIMPLE("\\s*([0-9A-F]+)");
+    fParseHexNum = uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, &status);
  
      // Zap any Byte Order Mark at the start of input.  Changing it to a space is benign
      //   given the syntax of the input.
@@ -412,6 +415,7 @@ void ConfusabledataBuilder::outputData(UErrorCode &status) {
      int32_t previousKey = 0;
      for (i=0; i<numKeys; i++) {
          int32_t key =  fKeyVec->elementAti(i);
+        (void)previousKey;         // Suppress unused variable warning on gcc.
          U_ASSERT((key & 0x00ffffff) >= (previousKey & 0x00ffffff));
          U_ASSERT((key & 0xff000000) != 0);
          keys[i] = key;
@@ -476,6 +480,7 @@ void ConfusabledataBuilder::outputData(UErrorCode &status) {
          uint32_t length = static_cast<uint32_t>(fStringLengthsTable->elementAti(i+1));
          U_ASSERT(offset < stringsLength);
          U_ASSERT(length < 40);
+        (void)previousLength;  // Suppress unused variable warning on gcc.
          U_ASSERT(length > previousLength);
          stringLengths[destIndex++] = static_cast<uint16_t>(offset);
          stringLengths[destIndex++] = static_cast<uint16_t>(length);