ICU-461.18.tar.gz

[apple/icu.git] / icuSources / i18n / cpdtrans.cpp
diff --git a/icuSources/i18n/cpdtrans.cpp b/icuSources/i18n/cpdtrans.cpp

index fa3b558956348bb38ad6025c41ef1dac985ff4df..ebe5d44bf6c25e5a65fbd32afe883db26ee3a887 100644 (file)
--- a/icuSources/i18n/cpdtrans.cpp
+++ b/icuSources/i18n/cpdtrans.cpp
@@ -1,6 +1,6 @@
  /*
  **********************************************************************
-*   Copyright (C) 1999-2004, International Business Machines
+*   Copyright (C) 1999-2008, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  *   Date        Name        Description
@@ -20,7 +20,7 @@
  #include "cmemory.h"
  
  // keep in sync with Transliterator
-static const UChar ID_SEP   = 0x002D; /*-*/
+//static const UChar ID_SEP   = 0x002D; /*-*/
  static const UChar ID_DELIM = 0x003B; /*;*/
  static const UChar NEWLINE  = 10;
  
@@ -30,6 +30,8 @@ static const UChar COLON_COLON[] = {0x3A, 0x3A, 0}; //"::"
  
  U_NAMESPACE_BEGIN
  
+const UChar CompoundTransliterator::PASS_STRING[] = { 0x0025, 0x0050, 0x0061, 0x0073, 0x0073, 0 }; // "%Pass"
+
  UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompoundTransliterator)
  
  /**
@@ -51,7 +53,7 @@ CompoundTransliterator::CompoundTransliterator(
                             int32_t transliteratorCount,
                             UnicodeFilter* adoptedFilter) :
      Transliterator(joinIDs(transliterators, transliteratorCount), adoptedFilter),
-    trans(0), count(0), compoundRBTIndex(-1)  {
+    trans(0), count(0), numAnonymousRBTs(0)  {
      setTransliterators(transliterators, transliteratorCount);
  }
  
@@ -68,20 +70,36 @@ CompoundTransliterator::CompoundTransliterator(const UnicodeString& id,
                                UParseError& /*parseError*/,
                                UErrorCode& status) :
      Transliterator(id, adoptedFilter),
-    trans(0), compoundRBTIndex(-1) {
+    trans(0), numAnonymousRBTs(0) {
      // TODO add code for parseError...currently unused, but
      // later may be used by parsing code...
-    init(id, direction, -1, 0, TRUE, status);
+    init(id, direction, TRUE, status);
  }
  
  CompoundTransliterator::CompoundTransliterator(const UnicodeString& id,
                                UParseError& /*parseError*/,
                                UErrorCode& status) :
      Transliterator(id, 0), // set filter to 0 here!
-    trans(0), compoundRBTIndex(-1) {
+    trans(0), numAnonymousRBTs(0) {
      // TODO add code for parseError...currently unused, but
      // later may be used by parsing code...
-    init(id, UTRANS_FORWARD, -1, 0, TRUE, status);
+    init(id, UTRANS_FORWARD, TRUE, status);
+}
+
+
+/**
+ * Private constructor for use of TransliteratorAlias
+ */
+CompoundTransliterator::CompoundTransliterator(const UnicodeString& newID,
+                                              UVector& list,
+                                              UnicodeFilter* adoptedFilter,
+                                              int32_t anonymousRBTs,
+                                              UParseError& /*parseError*/,
+                                              UErrorCode& status) :
+    Transliterator(newID, adoptedFilter),
+    trans(0), numAnonymousRBTs(anonymousRBTs)
+{
+    init(list, UTRANS_FORWARD, FALSE, status);
  }
  
  /**
@@ -93,7 +111,7 @@ CompoundTransliterator::CompoundTransliterator(UVector& list,
                                                 UParseError& /*parseError*/,
                                                 UErrorCode& status) :
      Transliterator(EMPTY, NULL),
-    trans(0), compoundRBTIndex(-1)
+    trans(0), numAnonymousRBTs(0)
  {
      // TODO add code for parseError...currently unused, but
      // later may be used by parsing code...
@@ -101,20 +119,14 @@ CompoundTransliterator::CompoundTransliterator(UVector& list,
      // assume caller will fixup ID
  }
  
-/**
- * Private constructor for compound RBTs.  Construct a compound
- * transliterator using the given idBlock, with the adoptedTrans
- * inserted at the idSplitPoint.
- */
-CompoundTransliterator::CompoundTransliterator(const UnicodeString& newID,
-                                               const UnicodeString& idBlock,
-                                               int32_t idSplitPoint,
-                                               Transliterator *adoptedTrans,
+CompoundTransliterator::CompoundTransliterator(UVector& list,
+                                               int32_t anonymousRBTs,
+                                               UParseError& /*parseError*/,
                                                 UErrorCode& status) :
-    Transliterator(newID, 0),
-    trans(0), compoundRBTIndex(-1)
+    Transliterator(EMPTY, NULL),
+    trans(0), numAnonymousRBTs(anonymousRBTs)
  {
-    init(idBlock, UTRANS_FORWARD, idSplitPoint, adoptedTrans, FALSE, status);
+    init(list, UTRANS_FORWARD, FALSE, status);
  }
  
  /**
@@ -135,14 +147,11 @@ CompoundTransliterator::CompoundTransliterator(const UnicodeString& newID,
   */
  void CompoundTransliterator::init(const UnicodeString& id,
                                    UTransDirection direction,
-                                  int32_t idSplitPoint,
-                                  Transliterator *adoptedSplitTrans,
                                    UBool fixReverseID,
                                    UErrorCode& status) {
      // assert(trans == 0);
  
      if (U_FAILURE(status)) {
-        delete adoptedSplitTrans;
          return;
      }
  
@@ -152,12 +161,11 @@ void CompoundTransliterator::init(const UnicodeString& id,
      if (!TransliteratorIDParser::parseCompoundID(id, direction,
                                        regenID, list, compoundFilter)) {
          status = U_INVALID_ID;
-        delete adoptedSplitTrans;
          delete compoundFilter;
          return;
      }
  
-    compoundRBTIndex = TransliteratorIDParser::instantiateList(list, adoptedSplitTrans, idSplitPoint, status);
+    TransliteratorIDParser::instantiateList(list, status);
  
      init(list, direction, fixReverseID, status);
  
@@ -209,11 +217,6 @@ void CompoundTransliterator::init(UVector& list,
          trans[i] = (Transliterator*) list.elementAt(j);
      }
  
-    // Fix compoundRBTIndex for REVERSE transliterators
-    if (compoundRBTIndex >= 0 && direction == UTRANS_REVERSE) {
-        compoundRBTIndex = count - 1 - compoundRBTIndex;
-    }
-
      // If the direction is UTRANS_REVERSE then we may need to fix the
      // ID.
      if (direction == UTRANS_REVERSE && fixReverseID) {
@@ -251,7 +254,7 @@ UnicodeString CompoundTransliterator::joinIDs(Transliterator* const transliterat
   * Copy constructor.
   */
  CompoundTransliterator::CompoundTransliterator(const CompoundTransliterator& t) :
-    Transliterator(t), trans(0), count(0), compoundRBTIndex(-1) {
+    Transliterator(t), trans(0), count(0), numAnonymousRBTs(-1) {
      *this = t;
  }
  
@@ -277,22 +280,43 @@ void CompoundTransliterator::freeTransliterators(void) {
   * Assignment operator.
   */
  CompoundTransliterator& CompoundTransliterator::operator=(
-                                             const CompoundTransliterator& t) {
+                                             const CompoundTransliterator& t)
+{
      Transliterator::operator=(t);
-    int32_t i;
-    for (i=0; i<count; ++i) {
-        delete trans[i];
-        trans[i] = 0;
+    int32_t i = 0;
+    UBool failed = FALSE;
+    if (trans != NULL) {
+        for (i=0; i<count; ++i) {
+            delete trans[i];
+            trans[i] = 0;
+        }
      }
      if (t.count > count) {
-        uprv_free(trans);
+        if (trans != NULL) {
+            uprv_free(trans);
+        }
          trans = (Transliterator **)uprv_malloc(t.count * sizeof(Transliterator *));
      }
      count = t.count;
-    for (i=0; i<count; ++i) {
-        trans[i] = t.trans[i]->clone();
+    if (trans != NULL) {
+        for (i=0; i<count; ++i) {
+            trans[i] = t.trans[i]->clone();
+            if (trans[i] == NULL) {
+                failed = TRUE;
+                break;
+            }
+        }
+    }
+
+    // if memory allocation failed delete backwards trans array
+    if (failed && i > 0) {
+        int32_t n;
+        for (n = i-1; n >= 0; n--) {
+            uprv_free(trans[n]);
+            trans[n] = NULL;
+        }
      }
-    compoundRBTIndex = t.compoundRBTIndex;
+    numAnonymousRBTs = t.numAnonymousRBTs;
      return *this;
  }
  
@@ -323,8 +347,25 @@ const Transliterator& CompoundTransliterator::getTransliterator(int32_t index) c
  void CompoundTransliterator::setTransliterators(Transliterator* const transliterators[],
                                                  int32_t transCount) {
      Transliterator** a = (Transliterator **)uprv_malloc(transCount * sizeof(Transliterator *));
-    for (int32_t i=0; i<transCount; ++i) {
+    if (a == NULL) {
+        return;
+    }
+    int32_t i = 0;
+    UBool failed = FALSE;
+    for (i=0; i<transCount; ++i) {
          a[i] = transliterators[i]->clone();
+        if (a[i] == NULL) {
+            failed = TRUE;
+            break;
+        }
+    }
+    if (failed && i > 0) {
+        int32_t n;
+        for (n = i-1; n >= 0; n--) {
+            uprv_free(a[n]);
+            a[n] = NULL;
+        }
+        return;
      }
      adoptTransliterators(a, transCount);
  }
@@ -359,7 +400,7 @@ UnicodeString& CompoundTransliterator::toRules(UnicodeString& rulesSource,
      // compoundRBTIndex >= 0.  For the transliterator at compoundRBTIndex,
      // we do call toRules() recursively.
      rulesSource.truncate(0);
-    if (compoundRBTIndex >= 0 && getFilter() != NULL) {
+    if (numAnonymousRBTs >= 1 && getFilter() != NULL) {
          // If we are a compound RBT and if we have a global
          // filter, then emit it at the top.
          UnicodeString pat;
@@ -367,8 +408,24 @@ UnicodeString& CompoundTransliterator::toRules(UnicodeString& rulesSource,
      }
      for (int32_t i=0; i<count; ++i) {
          UnicodeString rule;
-        if (i == compoundRBTIndex) {
+
+        // Anonymous RuleBasedTransliterators (inline rules and
+        // ::BEGIN/::END blocks) are given IDs that begin with
+        // "%Pass": use toRules() to write all the rules to the output
+        // (and insert "::Null;" if we have two in a row)
+        if (trans[i]->getID().startsWith(PASS_STRING)) {
              trans[i]->toRules(rule, escapeUnprintable);
+            if (numAnonymousRBTs > 1 && i > 0 && trans[i - 1]->getID().startsWith(PASS_STRING))
+                rule = UNICODE_STRING_SIMPLE("::Null;") + rule;
+
+        // we also use toRules() on CompoundTransliterators (which we
+        // check for by looking for a semicolon in the ID)-- this gets
+        // the list of their child transliterators output in the right
+        // format
+        } else if (trans[i]->getID().indexOf(ID_DELIM) >= 0) {
+            trans[i]->toRules(rule, escapeUnprintable);
+
+        // for everything else, use Transliterator::toRules()
          } else {
              trans[i]->Transliterator::toRules(rule, escapeUnprintable);
          }