ICU-8.11.tar.gz

[apple/icu.git] / icuSources / common / rbbiscan.cpp
diff --git a/icuSources/common/rbbiscan.cpp b/icuSources/common/rbbiscan.cpp

index 13c44c42ba0dc6351d57c68ec568300ec7c0a29a..75c69f525e9b67596bec6cbec68e4692d0b73f2e 100644 (file)
--- a/icuSources/common/rbbiscan.cpp
+++ b/icuSources/common/rbbiscan.cpp
@@ -2,7 +2,7 @@
  //
  //  file:  rbbiscan.cpp
  //
-//  Copyright (C) 2002-2004, International Business Machines Corporation and others.
+//  Copyright (C) 2002-2006, International Business Machines Corporation and others.
  //  All Rights Reserved.
  //
  //  This file contains the Rule Based Break Iterator Rule Builder functions for
@@ -71,7 +71,7 @@ static const UChar kAny[] = {0x61, 0x6e, 0x79, 0x00};  // "any"
  
  
  U_CDECL_BEGIN
-static void  U_EXPORT2 U_CALLCONV RBBISetTable_deleter(void *p) {
+static void U_CALLCONV RBBISetTable_deleter(void *p) {
      RBBISetTableEl *px = (RBBISetTableEl *)p;
      delete px->key;
      // Note:  px->val is owned by the linked list "fSetsListHead" in scanner.
@@ -142,7 +142,7 @@ RBBIRuleScanner::RBBIRuleScanner(RBBIRuleBuilder *rb)
      }
  
      fSymbolTable = new RBBISymbolTable(this, rb->fRules, *rb->fStatus);
-    fSetTable    = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, rb->fStatus);
+    fSetTable    = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, rb->fStatus);
      uhash_setValueDeleter(fSetTable, RBBISetTable_deleter);
  }
  
@@ -289,6 +289,13 @@ UBool RBBIRuleScanner::doParseActions(EParseAction action)
  
              // Make a symbol table entry for the $variableRef node.
              fSymbolTable->addEntry(varRefNode->fText, varRefNode, *fRB->fStatus);
+            if (U_FAILURE(*fRB->fStatus)) {
+                // This is a round-about way to get the parse position set
+                //  so that duplicate symbols error messages include a line number.
+                UErrorCode t = *fRB->fStatus;
+                *fRB->fStatus = U_ZERO_ERROR;
+                error(t);
+            }
  
              // Clean up the stack.
              delete startExprNode;
@@ -302,7 +309,9 @@ UBool RBBIRuleScanner::doParseActions(EParseAction action)
          if (U_FAILURE(*fRB->fStatus)) {       //   parse tree rooted in TOS node.
              break;
          }
+#ifdef RBBI_DEBUG
          if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rtree")) {printNodeStack("end of rule");}
+#endif
          U_ASSERT(fNodeStackPtr == 1);
  
          // If this rule includes a look-ahead '/', add a endMark node to the
@@ -426,7 +435,6 @@ UBool RBBIRuleScanner::doParseActions(EParseAction action)
              fRB->fRules.extractBetween(n->fFirstPos, n->fLastPos, n->fText);
              break;
          }
-        break;
  
      case doSlash:
          // Scanned a '/', which identifies a look-ahead break position in a rule.
@@ -939,7 +947,9 @@ void RBBIRuleScanner::parse() {
          #endif
  
          for (;;) {
-            if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPrintf(".");}
+            #ifdef RBBI_DEBUG
+                if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPrintf(".");}
+            #endif
              if (tableEl->fCharClass < 127 && fC.fEscaped == FALSE &&   tableEl->fCharClass == fC.fChar) {
                  // Table row specified an individual character, not a set, and
                  //   the input character is not escaped, and
@@ -978,7 +988,7 @@ void RBBIRuleScanner::parse() {
              // No match on this row, advance to the next  row for this state,
              tableEl++;
          }
-        if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPrintf("\n");}
+        if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "scan")) { RBBIDebugPuts("");}
  
          //
          // We've found the row of the state table that matches the current input
@@ -1062,8 +1072,6 @@ void RBBIRuleScanner::parse() {
  //
  //---------------------------------------------------------------------------------
  #ifdef RBBI_DEBUG
-void RBBIRuleScanner::printNodeStack(const char *) {}
-#else
  void RBBIRuleScanner::printNodeStack(const char *title) {
      int i;
      RBBIDebugPrintf("%s.  Dumping node stack...\n", title);
@@ -1130,7 +1138,9 @@ void RBBIRuleScanner::scanSet() {
      if (U_FAILURE(localStatus)) {
          //  TODO:  Get more accurate position of the error from UnicodeSet's return info.
          //         UnicodeSet appears to not be reporting correctly at this time.
-        RBBIDebugPrintf("UnicodeSet parse postion.ErrorIndex = %d\n", pos.getIndex());
+        #ifdef RBBI_DEBUG
+            RBBIDebugPrintf("UnicodeSet parse postion.ErrorIndex = %d\n", pos.getIndex());
+        #endif
          error(localStatus);
          delete uset;
          return;
@@ -1138,7 +1148,7 @@ void RBBIRuleScanner::scanSet() {
  
      // Verify that the set contains at least one code point.
      //
-    if (uset->charAt(0) == -1) {
+    if (uset->isEmpty()) {
          // This set is empty.
          //  Make it an error, because it almost certainly is not what the user wanted.
          //  Also, avoids having to think about corner cases in the tree manipulation code