ICU-64232.0.1.tar.gz

[apple/icu.git] / icuSources / i18n / translit.cpp
diff --git a/icuSources/i18n/translit.cpp b/icuSources/i18n/translit.cpp

index f5e8a56e722ebb4787eef1521b1c27fd39d71d93..aaaee8c9206b633aa3b4d635a222475d6deda91c 100644 (file)
--- a/icuSources/i18n/translit.cpp
+++ b/icuSources/i18n/translit.cpp
@@ -1,12 +1,16 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
  /*
-**********************************************************************
-*   Copyright (C) 1999-2003, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
-*   Date        Name        Description
-*   11/17/99    aliu        Creation.
-**********************************************************************
-*/
+ **********************************************************************
+ *   Copyright (C) 1999-2016, International Business Machines
+ *   Corporation and others.  All Rights Reserved.
+ **********************************************************************
+ *   Date        Name        Description
+ *   11/17/99    aliu        Creation.
+ **********************************************************************
+ */
+
+#include "utypeinfo.h"  // for 'typeid' to work
  
  #include "unicode/utypes.h"
  
@@ -19,9 +23,10 @@
  #include "unicode/rep.h"
  #include "unicode/resbund.h"
  #include "unicode/unifilt.h"
-#include "unicode/unifltlg.h"
  #include "unicode/uniset.h"
  #include "unicode/uscript.h"
+#include "unicode/strenum.h"
+#include "unicode/utf16.h"
  #include "cpdtrans.h"
  #include "nultrans.h"
  #include "rbt_data.h"
@@ -35,6 +40,7 @@
  #include "tolowtrn.h"
  #include "toupptrn.h"
  #include "uni2name.h"
+#include "brktrans.h"
  #include "esctrn.h"
  #include "unesctrn.h"
  #include "tridpars.h"
@@ -46,6 +52,7 @@
  #include "uassert.h"
  #include "cmemory.h"
  #include "cstring.h"
+#include "uinvchar.h"
  
  static const UChar TARGET_SEP  = 0x002D; /*-*/
  static const UChar ID_DELIM    = 0x003B; /*;*/
@@ -84,33 +91,29 @@ static const char RB_RULE_BASED_IDS[] = "RuleBasedTransliteratorIDs";
  /**
   * The mutex controlling access to registry object.
   */
-static UMTX registryMutex = 0;
+static icu::UMutex *registryMutex() {
+    static icu::UMutex *m = STATIC_NEW(icu::UMutex);
+    return m;
+}
  
  /**
   * System transliterator registry; non-null when initialized.
   */
-static TransliteratorRegistry* registry = 0;
+static icu::TransliteratorRegistry* registry = 0;
  
  // Macro to check/initialize the registry. ONLY USE WITHIN
  // MUTEX. Avoids function call when registry is initialized.
-#define HAVE_REGISTRY (registry!=0 || initializeRegistry())
-
-// Empty string
-static const UChar EMPTY[] = {0}; //""
+#define HAVE_REGISTRY(status) (registry!=0 || initializeRegistry(status))
  
  U_NAMESPACE_BEGIN
  
-/**
- * Class identifier for subclasses of Transliterator that do not
- * define their class (anonymous subclasses).
- */
-const char Transliterator::fgClassID = 0; // Value is irrelevant
+UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(Transliterator)
  
  /**
   * Return TRUE if the given UTransPosition is valid for text of
   * the given length.
   */
-inline UBool positionIsValid(UTransPosition& index, int32_t len) {
+static inline UBool positionIsValid(UTransPosition& index, int32_t len) {
      return !(index.contextStart < 0 ||
               index.start < index.contextStart ||
               index.limit < index.start ||
@@ -129,13 +132,20 @@ inline UBool positionIsValid(UTransPosition& index, int32_t len) {
  Transliterator::Transliterator(const UnicodeString& theID,
                                 UnicodeFilter* adoptedFilter) :
      UObject(), ID(theID), filter(adoptedFilter),
-    maximumContextLength(0) {}
+    maximumContextLength(0)
+{
+    // NUL-terminate the ID string, which is a non-aliased copy.
+    ID.append((UChar)0);
+    ID.truncate(ID.length()-1);
+}
  
  /**
   * Destructor.
   */
  Transliterator::~Transliterator() {
-    delete filter;
+    if (filter) {
+        delete filter;
+    }
  }
  
  /**
@@ -143,18 +153,30 @@ Transliterator::~Transliterator() {
   */
  Transliterator::Transliterator(const Transliterator& other) :
      UObject(other), ID(other.ID), filter(0),
-    maximumContextLength(other.maximumContextLength) {
+    maximumContextLength(other.maximumContextLength)
+{
+    // NUL-terminate the ID string, which is a non-aliased copy.
+    ID.append((UChar)0);
+    ID.truncate(ID.length()-1);
+
      if (other.filter != 0) {
          // We own the filter, so we must have our own copy
          filter = (UnicodeFilter*) other.filter->clone();
      }
  }
  
+Transliterator* Transliterator::clone() const {
+    return NULL;
+}
+
  /**
   * Assignment operator.
   */
  Transliterator& Transliterator::operator=(const Transliterator& other) {
      ID = other.ID;
+    // NUL-terminate the ID string
+    ID.getTerminatedBuffer();
+
      maximumContextLength = other.maximumContextLength;
      adoptFilter((other.filter == 0) ? 0 : (UnicodeFilter*) other.filter->clone());
      return *this;
@@ -352,7 +374,7 @@ void Transliterator::_transliterate(Replaceable& text,
      }
  
      if (index.limit > 0 &&
-        UTF_IS_LEAD(text.charAt(index.limit - 1))) {
+        U16_IS_LEAD(text.charAt(index.limit - 1))) {
          // Oops, there is a dangling lead surrogate in the buffer.
          // This will break most transliterators, since they will
          // assume it is part of a pair.  Don't transliterate until
@@ -391,7 +413,7 @@ void Transliterator::_transliterate(Replaceable& text,
      int32_t n = getMaximumContextLength();
      while (newCS > originalStart && n-- > 0) {
          --newCS;
-        newCS -= UTF_CHAR_LENGTH(text.char32At(newCS)) - 1;
+        newCS -= U16_LENGTH(text.char32At(newCS)) - 1;
      }
      index.contextStart = uprv_max(newCS, originalStart);
  #endif
@@ -462,14 +484,14 @@ void Transliterator::filteredTransliterate(Replaceable& text,
              UChar32 c;
              while (index.start < globalLimit &&
                     !filter->contains(c=text.char32At(index.start))) {
-                index.start += UTF_CHAR_LENGTH(c);
+                index.start += U16_LENGTH(c);
              }
  
              // Find the end of this run of unfiltered chars
              index.limit = index.start;
              while (index.limit < globalLimit &&
                     filter->contains(c=text.char32At(index.limit))) {
-                index.limit += UTF_CHAR_LENGTH(c);
+                index.limit += U16_LENGTH(c);
              }
          }
  
@@ -552,8 +574,7 @@ void Transliterator::filteredTransliterate(Replaceable& text,
              // transliterations and commit complete transliterations.
              for (;;) {
                  // Length of additional code point, either one or two
-                int32_t charLength =
-                    UTF_CHAR_LENGTH(text.char32At(passLimit));
+                int32_t charLength = U16_LENGTH(text.char32At(passLimit));
                  passLimit += charLength;
                  if (passLimit > runLimit) {
                      break;
@@ -579,7 +600,7 @@ void Transliterator::filteredTransliterate(Replaceable& text,
                      int32_t rs = rollbackStart + delta - (index.limit - passStart);
  
                      // Delete the partially transliterated text
-                    text.handleReplaceBetween(passStart, index.limit, EMPTY);
+                    text.handleReplaceBetween(passStart, index.limit, UnicodeString());
  
                      // Copy the rollback text back
                      text.copy(rs, rs + uncommittedLength, passStart);
@@ -617,7 +638,7 @@ void Transliterator::filteredTransliterate(Replaceable& text,
              globalLimit += totalDelta;
  
              // Delete the rollback copy
-            text.handleReplaceBetween(rollbackOrigin, rollbackOrigin + runLength, EMPTY);
+            text.handleReplaceBetween(rollbackOrigin, rollbackOrigin + runLength, UnicodeString());
  
              // Move start past committed text
              index.start = passStart;
@@ -690,7 +711,7 @@ const UnicodeString& Transliterator::getID(void) const {
   * display to the user in the default locale.  See {@link
   * #getDisplayName(Locale)} for details.
   */
-UnicodeString& Transliterator::getDisplayName(const UnicodeString& ID,
+UnicodeString& U_EXPORT2 Transliterator::getDisplayName(const UnicodeString& ID,
                                                UnicodeString& result) {
      return getDisplayName(ID, Locale::getDefault(), result);
  }
@@ -713,12 +734,12 @@ UnicodeString& Transliterator::getDisplayName(const UnicodeString& ID,
   * localized.
   * @see java.text.MessageFormat
   */
-UnicodeString& Transliterator::getDisplayName(const UnicodeString& id,
+UnicodeString& U_EXPORT2 Transliterator::getDisplayName(const UnicodeString& id,
                                                const Locale& inLocale,
                                                UnicodeString& result) {
      UErrorCode status = U_ZERO_ERROR;
  
-    ResourceBundle bundle(u_getDataDirectory(), inLocale, status);
+    ResourceBundle bundle(U_ICUDATA_TRANSLIT, inLocale, status);
  
      // Suspend checking status until later...
  
@@ -739,65 +760,69 @@ UnicodeString& Transliterator::getDisplayName(const UnicodeString& id,
      ID.append(TARGET_SEP).append(target).append(variant);
  
      // build the char* key
-    char key[200];
-    uprv_strcpy(key, RB_DISPLAY_NAME_PREFIX);
-    int32_t length=(int32_t)uprv_strlen(RB_DISPLAY_NAME_PREFIX);
-    ID.extract(0, (int32_t)(sizeof(key)-length), key+length, "");
+    if (uprv_isInvariantUString(ID.getBuffer(), ID.length())) {
+        char key[200];
+        uprv_strcpy(key, RB_DISPLAY_NAME_PREFIX);
+        int32_t length=(int32_t)uprv_strlen(RB_DISPLAY_NAME_PREFIX);
+        ID.extract(0, (int32_t)(sizeof(key)-length), key+length, (int32_t)(sizeof(key)-length), US_INV);
  
-    // Try to retrieve a UnicodeString from the bundle.
-    UnicodeString resString = bundle.getStringEx(key, status);
+        // Try to retrieve a UnicodeString from the bundle.
+        UnicodeString resString = bundle.getStringEx(key, status);
  
-    if (U_SUCCESS(status) && resString.length() != 0) {
-        return result = resString; // [sic] assign & return
-    }
+        if (U_SUCCESS(status) && resString.length() != 0) {
+            return result = resString; // [sic] assign & return
+        }
  
  #if !UCONFIG_NO_FORMATTING
-    // We have failed to get a name from the locale data.  This is
-    // typical, since most transliterators will not have localized
-    // name data.  The next step is to retrieve the MessageFormat
-    // pattern from the locale data and to use it to synthesize the
-    // name from the ID.
-
-    status = U_ZERO_ERROR;
-    resString = bundle.getStringEx(RB_DISPLAY_NAME_PATTERN, status);
-
-    if (U_SUCCESS(status) && resString.length() != 0) {
-        MessageFormat msg(resString, inLocale, status);
-        // Suspend checking status until later...
-
-        // We pass either 2 or 3 Formattable objects to msg.
-        Formattable args[3];
-        int32_t nargs;
-        args[0].setLong(2); // # of args to follow
-        args[1].setString(source);
-        args[2].setString(target);
-        nargs = 3;
-
-        // Use display names for the scripts, if they exist
-        UnicodeString s;
-        length=(int32_t)uprv_strlen(RB_SCRIPT_DISPLAY_NAME_PREFIX);
-        for (int j=1; j<=2; ++j) {
-            status = U_ZERO_ERROR;
-            uprv_strcpy(key, RB_SCRIPT_DISPLAY_NAME_PREFIX);
-            args[j].getString(s);
-            s.extract(0, sizeof(key)-length-1, key+length, "");
+        // We have failed to get a name from the locale data.  This is
+        // typical, since most transliterators will not have localized
+        // name data.  The next step is to retrieve the MessageFormat
+        // pattern from the locale data and to use it to synthesize the
+        // name from the ID.
  
-            resString = bundle.getStringEx(key, status);
+        status = U_ZERO_ERROR;
+        resString = bundle.getStringEx(RB_DISPLAY_NAME_PATTERN, status);
+
+        if (U_SUCCESS(status) && resString.length() != 0) {
+            MessageFormat msg(resString, inLocale, status);
+            // Suspend checking status until later...
+
+            // We pass either 2 or 3 Formattable objects to msg.
+            Formattable args[3];
+            int32_t nargs;
+            args[0].setLong(2); // # of args to follow
+            args[1].setString(source);
+            args[2].setString(target);
+            nargs = 3;
+
+            // Use display names for the scripts, if they exist
+            UnicodeString s;
+            length=(int32_t)uprv_strlen(RB_SCRIPT_DISPLAY_NAME_PREFIX);
+            for (int j=1; j<=2; ++j) {
+                status = U_ZERO_ERROR;
+                uprv_strcpy(key, RB_SCRIPT_DISPLAY_NAME_PREFIX);
+                args[j].getString(s);
+                if (uprv_isInvariantUString(s.getBuffer(), s.length())) {
+                    s.extract(0, sizeof(key)-length-1, key+length, (int32_t)sizeof(key)-length-1, US_INV);
+
+                    resString = bundle.getStringEx(key, status);
+
+                    if (U_SUCCESS(status)) {
+                        args[j] = resString;
+                    }
+                }
+            }
  
+            status = U_ZERO_ERROR;
+            FieldPosition pos; // ignored by msg
+            msg.format(args, nargs, result, pos, status);
              if (U_SUCCESS(status)) {
-                args[j] = resString;
+                result.append(variant);
+                return result;
              }
          }
-        
-        status = U_ZERO_ERROR;
-        FieldPosition pos; // ignored by msg
-        msg.format(args, nargs, result, pos, status);
-        if (U_SUCCESS(status)) {
-            result.append(variant);
-            return result;
-        }
-    }
  #endif
+    }
  
      // We should not reach this point unless there is something
      // wrong with the build or the RB_DISPLAY_NAME_PATTERN has
@@ -865,9 +890,11 @@ Transliterator* Transliterator::createInverse(UErrorCode& status) const {
      return Transliterator::createInstance(ID, UTRANS_REVERSE,parseError,status);
  }
  
-Transliterator* Transliterator::createInstance(const UnicodeString& ID,
-                                               UTransDirection dir,
-                                               UErrorCode& status) {
+Transliterator* U_EXPORT2
+Transliterator::createInstance(const UnicodeString& ID,
+                                UTransDirection dir,
+                                UErrorCode& status)
+{
      UParseError parseError;
      return createInstance(ID, dir, parseError, status);
  }
@@ -883,10 +910,12 @@ Transliterator* Transliterator::createInstance(const UnicodeString& ID,
   * @see #getAvailableIDs
   * @see #getID
   */
-Transliterator* Transliterator::createInstance(const UnicodeString& ID,
-                                               UTransDirection dir,
-                                               UParseError& parseError,
-                                               UErrorCode& status) {
+Transliterator* U_EXPORT2
+Transliterator::createInstance(const UnicodeString& ID,
+                                UTransDirection dir,
+                                UParseError& parseError,
+                                UErrorCode& status)
+{
      if (U_FAILURE(status)) {
          return 0;
      }
@@ -905,33 +934,34 @@ Transliterator* Transliterator::createInstance(const UnicodeString& ID,
          return NULL;
      }
      
-    TransliteratorIDParser::instantiateList(list, NULL, -1, status);
+    TransliteratorIDParser::instantiateList(list, status);
      if (U_FAILURE(status)) {
          return NULL;
      }
      
      U_ASSERT(list.size() > 0);
      Transliterator* t = NULL;
-    switch (list.size()) {
-    case 1:
-        t = (Transliterator*) list.elementAt(0);
-        break;
-    default:
+    
+    if (list.size() > 1 || canonID.indexOf(ID_DELIM) >= 0) {
+        // [NOTE: If it's a compoundID, we instantiate a CompoundTransliterator even if it only
+        // has one child transliterator.  This is so that toRules() will return the right thing
+        // (without any inactive ID), but our main ID still comes out correct.  That is, if we
+        // instantiate "(Lower);Latin-Greek;", we want the rules to come out as "::Latin-Greek;"
+        // even though the ID is "(Lower);Latin-Greek;".
          t = new CompoundTransliterator(list, parseError, status);
-        /* test for NULL */
-        if (t == 0) {
-            status = U_MEMORY_ALLOCATION_ERROR;
-            return 0;
-        }
-        if (U_FAILURE(status)) {
-            delete t;
-            return NULL;
+    }
+    else {
+        t = (Transliterator*)list.elementAt(0);
+    }
+    // Check null pointer
+    if (t != NULL) {
+        t->setID(canonID);
+        if (globalFilter != NULL) {
+            t->adoptFilter(globalFilter);
          }
-        break;
      }
-    t->setID(canonID);
-    if (globalFilter != NULL) {
-        t->adoptFilter(globalFilter);
+    else if (U_SUCCESS(status)) {
+        status = U_MEMORY_ALLOCATION_ERROR;
      }
      return t;
  }
@@ -950,28 +980,57 @@ Transliterator* Transliterator::createBasicInstance(const UnicodeString& id,
      UErrorCode ec = U_ZERO_ERROR;
      TransliteratorAlias* alias = 0;
      Transliterator* t = 0;
-    
-    umtx_init(&registryMutex);
-    umtx_lock(&registryMutex);
-    if (HAVE_REGISTRY) {
-        t = registry->get(id, alias, pe, ec);
+
+    umtx_lock(registryMutex());
+    if (HAVE_REGISTRY(ec)) {
+        t = registry->get(id, alias, ec);
      }
-    umtx_unlock(&registryMutex);
+    umtx_unlock(registryMutex());
  
      if (U_FAILURE(ec)) {
          delete t;
          delete alias;
-        return NULL;
+        return 0;
      }
  
-    if (alias != 0) {
-        // Instantiate an alias
+    // We may have not gotten a transliterator:  Because we can't
+    // instantiate a transliterator from inside TransliteratorRegistry::
+    // get() (that would deadlock), we sometimes pass back an alias.  This
+    // contains the data we need to finish the instantiation outside the
+    // registry mutex.  The alias may, in turn, generate another alias, so
+    // we handle aliases in a loop.  The max times through the loop is two.
+    // [alan]
+    while (alias != 0) {
          U_ASSERT(t==0);
-        t = alias->create(pe, ec);
-        delete alias;
+        // Rule-based aliases are handled with TransliteratorAlias::
+        // parse(), followed by TransliteratorRegistry::reget().
+        // Other aliases are handled with TransliteratorAlias::create().
+        if (alias->isRuleBased()) {
+            // Step 1. parse
+            TransliteratorParser parser(ec);
+            alias->parse(parser, pe, ec);
+            delete alias;
+            alias = 0;
+
+            // Step 2. reget
+            umtx_lock(registryMutex());
+            if (HAVE_REGISTRY(ec)) {
+                t = registry->reget(id, parser, alias, ec);
+            }
+            umtx_unlock(registryMutex());
+
+            // Step 3. Loop back around!
+        } else {
+            t = alias->create(pe, ec);
+            delete alias;
+            alias = 0;
+            break;
+        }
          if (U_FAILURE(ec)) {
              delete t;
+            delete alias;
              t = NULL;
+            break;
          }
      }
  
@@ -990,14 +1049,16 @@ Transliterator* Transliterator::createBasicInstance(const UnicodeString& id,
   * NullTransliterator, if it contains ID blocks which parse as
   * empty for the given direction.
   */
-Transliterator* Transliterator::createFromRules(const UnicodeString& ID,
-                                                const UnicodeString& rules,
-                                                UTransDirection dir,
-                                                UParseError& parseError,
-                                                UErrorCode& status) {
+Transliterator* U_EXPORT2
+Transliterator::createFromRules(const UnicodeString& ID,
+                                const UnicodeString& rules,
+                                UTransDirection dir,
+                                UParseError& parseError,
+                                UErrorCode& status)
+{
      Transliterator* t = NULL;
  
-    TransliteratorParser parser;
+    TransliteratorParser parser(status);
      parser.parse(rules, dir, parseError, status);
  
      if (U_FAILURE(status)) {
@@ -1005,59 +1066,74 @@ Transliterator* Transliterator::createFromRules(const UnicodeString& ID,
      }
  
      // NOTE: The logic here matches that in TransliteratorRegistry.
-    if (parser.idBlock.length() == 0) {
-        if (parser.data == NULL) {
-            // No idBlock, no data -- this is just an
-            // alias for Null
-            t = new NullTransliterator();
-        } else {
-            // No idBlock, data != 0 -- this is an
-            // ordinary RBT_DATA.
-            t = new RuleBasedTransliterator(ID, parser.orphanData(), TRUE); // TRUE == adopt data object
+    if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 0) {
+        t = new NullTransliterator();
+    }
+    else if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 1) {
+        t = new RuleBasedTransliterator(ID, (TransliterationRuleData*)parser.dataVector.orphanElementAt(0), TRUE);
+    }
+    else if (parser.idBlockVector.size() == 1 && parser.dataVector.size() == 0) {
+        // idBlock, no data -- this is an alias.  The ID has
+        // been munged from reverse into forward mode, if
+        // necessary, so instantiate the ID in the forward
+        // direction.
+        if (parser.compoundFilter != NULL) {
+            UnicodeString filterPattern;
+            parser.compoundFilter->toPattern(filterPattern, FALSE);
+            t = createInstance(filterPattern + UnicodeString(ID_DELIM)
+                    + *((UnicodeString*)parser.idBlockVector.elementAt(0)), UTRANS_FORWARD, parseError, status);
          }
-        /* test for NULL */
-        if (t == 0) {
-            status = U_MEMORY_ALLOCATION_ERROR;
-            return 0;
+        else
+            t = createInstance(*((UnicodeString*)parser.idBlockVector.elementAt(0)), UTRANS_FORWARD, parseError, status);
+
+
+        if (t != NULL) {
+            t->setID(ID);
          }
-    } else {
-        if (parser.data == NULL) {
-            // idBlock, no data -- this is an alias.  The ID has
-            // been munged from reverse into forward mode, if
-            // necessary, so instantiate the ID in the forward
-            // direction.
-            t = createInstance(parser.idBlock, UTRANS_FORWARD, parseError, status);
-            if (t != NULL) {
-                t->setID(ID);
-            }
-        } else {
-            // idBlock and data -- this is a compound
-            // RBT
-            UnicodeString id("_", "");
-            t = new RuleBasedTransliterator(id, parser.orphanData(), TRUE); // TRUE == adopt data object
-            /* test for NULL */
-            if (t == 0) {
-                status = U_MEMORY_ALLOCATION_ERROR;
-                return 0;
-            }
-            t = new CompoundTransliterator(ID, parser.idBlock, parser.idSplitPoint,
-                                           t, status);
-            /* test for NULL */
-            if (t == 0) {
-                status = U_MEMORY_ALLOCATION_ERROR;
-                return 0;
-            }
-            if (U_FAILURE(status)) {
-                delete t;
-                t = 0;
+    }
+    else {
+        UVector transliterators(status);
+        int32_t passNumber = 1;
+
+        int32_t limit = parser.idBlockVector.size();
+        if (parser.dataVector.size() > limit)
+            limit = parser.dataVector.size();
+
+        for (int32_t i = 0; i < limit; i++) {
+            if (i < parser.idBlockVector.size()) {
+                UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.elementAt(i);
+                if (!idBlock->isEmpty()) {
+                    Transliterator* temp = createInstance(*idBlock, UTRANS_FORWARD, parseError, status);
+                    if (temp != NULL && typeid(*temp) != typeid(NullTransliterator))
+                        transliterators.addElement(temp, status);
+                    else
+                        delete temp;
+                }
              }
-            if (parser.compoundFilter != NULL) {
-                t->adoptFilter(parser.orphanCompoundFilter());
+            if (!parser.dataVector.isEmpty()) {
+                TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0);
+                // TODO: Should passNumber be turned into a decimal-string representation (1 -> "1")?
+                RuleBasedTransliterator* temprbt = new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + UnicodeString(passNumber++),
+                        data, TRUE);
+                // Check if NULL before adding it to transliterators to avoid future usage of NULL pointer.
+                if (temprbt == NULL) {
+                    status = U_MEMORY_ALLOCATION_ERROR;
+                    return t;
+                }
+                transliterators.addElement(temprbt, status);
              }
-            return t;
          }
-    }
  
+        t = new CompoundTransliterator(transliterators, passNumber - 1, parseError, status);
+        // Null pointer check
+        if (t != NULL) {
+            t->setID(ID);
+            t->adoptFilter(parser.orphanCompoundFilter());
+        }
+    }
+    if (U_SUCCESS(status) && t == NULL) {
+        status = U_MEMORY_ALLOCATION_ERROR;
+    }
      return t;
  }
  
@@ -1073,7 +1149,7 @@ UnicodeString& Transliterator::toRules(UnicodeString& rulesSource,
              if (!ICU_Utility::escapeUnprintable(rulesSource, c)) {
                  rulesSource.append(c);
              }
-            i += UTF_CHAR_LENGTH(c);
+            i += U16_LENGTH(c);
          }
      } else {
          rulesSource = getID();
@@ -1084,24 +1160,45 @@ UnicodeString& Transliterator::toRules(UnicodeString& rulesSource,
      return rulesSource;
  }
  
+int32_t Transliterator::countElements() const {
+    const CompoundTransliterator* ct = dynamic_cast<const CompoundTransliterator*>(this);
+    return ct != NULL ? ct->getCount() : 0;
+}
+
+const Transliterator& Transliterator::getElement(int32_t index, UErrorCode& ec) const {
+    if (U_FAILURE(ec)) {
+        return *this;
+    }
+    const CompoundTransliterator* cpd = dynamic_cast<const CompoundTransliterator*>(this);
+    int32_t n = (cpd == NULL) ? 1 : cpd->getCount();
+    if (index < 0 || index >= n) {
+        ec = U_INDEX_OUTOFBOUNDS_ERROR;
+        return *this;
+    } else {
+        return (n == 1) ? *this : cpd->getTransliterator(index);
+    }
+}
+
  UnicodeSet& Transliterator::getSourceSet(UnicodeSet& result) const {
      handleGetSourceSet(result);
      if (filter != NULL) {
-    UnicodeSet* filterSet;
-    UBool deleteFilterSet = FALSE;
-    // Most, but not all filters will be UnicodeSets.  Optimize for
-    // the high-runner case.
-    if (filter->getDynamicClassID() == UnicodeSet::getStaticClassID()) {
-        filterSet = (UnicodeSet*) filter;
-    } else {
-        filterSet = new UnicodeSet();
-        deleteFilterSet = TRUE;
-        filter->addMatchSetTo(*filterSet);
-    }
-    result.retainAll(*filterSet);
-    if (deleteFilterSet) {
-        delete filterSet;
-    }
+        UnicodeSet* filterSet = dynamic_cast<UnicodeSet*>(filter);
+        UBool deleteFilterSet = FALSE;
+        // Most, but not all filters will be UnicodeSets.  Optimize for
+        // the high-runner case.
+        if (filterSet == NULL) {
+            filterSet = new UnicodeSet();
+            // Check null pointer
+            if (filterSet == NULL) {
+                return result;
+            }
+            deleteFilterSet = TRUE;
+            filter->addMatchSetTo(*filterSet);
+        }
+        result.retainAll(*filterSet);
+        if (deleteFilterSet) {
+            delete filterSet;
+        }
      }
      return result;
  }
@@ -1115,12 +1212,12 @@ UnicodeSet& Transliterator::getTargetSet(UnicodeSet& result) const {
  }
  
  // For public consumption
-void Transliterator::registerFactory(const UnicodeString& id,
+void U_EXPORT2 Transliterator::registerFactory(const UnicodeString& id,
                                       Transliterator::Factory factory,
                                       Transliterator::Token context) {
-    umtx_init(&registryMutex);
-    Mutex lock(&registryMutex);
-    if (HAVE_REGISTRY) {
+    Mutex lock(registryMutex());
+    UErrorCode ec = U_ZERO_ERROR;
+    if (HAVE_REGISTRY(ec)) {
          _registerFactory(id, factory, context);
      }
  }
@@ -1130,7 +1227,8 @@ void Transliterator::registerFactory(const UnicodeString& id,
  void Transliterator::_registerFactory(const UnicodeString& id,
                                        Transliterator::Factory factory,
                                        Transliterator::Token context) {
-    registry->put(id, factory, context, TRUE);
+    UErrorCode ec = U_ZERO_ERROR;
+    registry->put(id, factory, context, TRUE, ec);
  }
  
  // To be called only by Transliterator subclasses that are called
@@ -1138,7 +1236,8 @@ void Transliterator::_registerFactory(const UnicodeString& id,
  void Transliterator::_registerSpecialInverse(const UnicodeString& target,
                                               const UnicodeString& inverseTarget,
                                               UBool bidirectional) {
-    TransliteratorIDParser::registerSpecialInverse(target, inverseTarget, bidirectional);
+    UErrorCode status = U_ZERO_ERROR;
+    TransliteratorIDParser::registerSpecialInverse(target, inverseTarget, bidirectional, status);
  }
  
  /**
@@ -1154,109 +1253,145 @@ void Transliterator::_registerSpecialInverse(const UnicodeString& target,
   * @see #getInstance
   * @see #unregister
   */
-void Transliterator::registerInstance(Transliterator* adoptedPrototype) {
-    umtx_init(&registryMutex);
-    Mutex lock(&registryMutex);
-    if (HAVE_REGISTRY) {
+void U_EXPORT2 Transliterator::registerInstance(Transliterator* adoptedPrototype) {
+    Mutex lock(registryMutex());
+    UErrorCode ec = U_ZERO_ERROR;
+    if (HAVE_REGISTRY(ec)) {
          _registerInstance(adoptedPrototype);
      }
  }
  
  void Transliterator::_registerInstance(Transliterator* adoptedPrototype) {
-    registry->put(adoptedPrototype, TRUE);
+    UErrorCode ec = U_ZERO_ERROR;
+    registry->put(adoptedPrototype, TRUE, ec);
+}
+
+void U_EXPORT2 Transliterator::registerAlias(const UnicodeString& aliasID,
+                                             const UnicodeString& realID) {
+    Mutex lock(registryMutex());
+    UErrorCode ec = U_ZERO_ERROR;
+    if (HAVE_REGISTRY(ec)) {
+        _registerAlias(aliasID, realID);
+    }
+}
+
+void Transliterator::_registerAlias(const UnicodeString& aliasID,
+                                    const UnicodeString& realID) {
+    UErrorCode ec = U_ZERO_ERROR;
+    registry->put(aliasID, realID, FALSE, TRUE, ec);
  }
  
  /**
   * Unregisters a transliterator or class.  This may be either
   * a system transliterator or a user transliterator or class.
- * 
+ *
   * @param ID the ID of the transliterator or class
   * @see #registerInstance
  
   */
-void Transliterator::unregister(const UnicodeString& ID) {
-    umtx_init(&registryMutex);
-    Mutex lock(&registryMutex);
-    if (HAVE_REGISTRY) {
+void U_EXPORT2 Transliterator::unregister(const UnicodeString& ID) {
+    Mutex lock(registryMutex());
+    UErrorCode ec = U_ZERO_ERROR;
+    if (HAVE_REGISTRY(ec)) {
          registry->remove(ID);
      }
  }
  
  /**
+ * == OBSOLETE - remove in ICU 3.4 ==
   * Return the number of IDs currently registered with the system.
   * To retrieve the actual IDs, call getAvailableID(i) with
   * i from 0 to countAvailableIDs() - 1.
   */
-int32_t Transliterator::countAvailableIDs(void) {
-    umtx_init(&registryMutex);
-    Mutex lock(&registryMutex);
-    return HAVE_REGISTRY ? registry->countAvailableIDs() : 0;
+int32_t U_EXPORT2 Transliterator::countAvailableIDs(void) {
+    int32_t retVal = 0;
+    Mutex lock(registryMutex());
+    UErrorCode ec = U_ZERO_ERROR;
+    if (HAVE_REGISTRY(ec)) {
+        retVal = registry->countAvailableIDs();
+    }
+    return retVal;
  }
  
  /**
+ * == OBSOLETE - remove in ICU 3.4 ==
   * Return the index-th available ID.  index must be between 0
   * and countAvailableIDs() - 1, inclusive.  If index is out of
   * range, the result of getAvailableID(0) is returned.
   */
-const UnicodeString& Transliterator::getAvailableID(int32_t index) {
+const UnicodeString& U_EXPORT2 Transliterator::getAvailableID(int32_t index) {
      const UnicodeString* result = NULL;
-    umtx_init(&registryMutex);
-    umtx_lock(&registryMutex);
-    if (HAVE_REGISTRY) {
+    umtx_lock(registryMutex());
+    UErrorCode ec = U_ZERO_ERROR;
+    if (HAVE_REGISTRY(ec)) {
          result = &registry->getAvailableID(index);
      }
-    umtx_unlock(&registryMutex);
+    umtx_unlock(registryMutex());
      U_ASSERT(result != NULL); // fail if no registry
      return *result;
  }
  
-int32_t Transliterator::countAvailableSources(void) {
-    umtx_init(&registryMutex);
-    Mutex lock(&registryMutex);
-    return HAVE_REGISTRY ? _countAvailableSources() : 0;
+StringEnumeration* U_EXPORT2 Transliterator::getAvailableIDs(UErrorCode& ec) {
+    if (U_FAILURE(ec)) return NULL;
+    StringEnumeration* result = NULL;
+    umtx_lock(registryMutex());
+    if (HAVE_REGISTRY(ec)) {
+        result = registry->getAvailableIDs();
+    }
+    umtx_unlock(registryMutex());
+    if (result == NULL) {
+        ec = U_INTERNAL_TRANSLITERATOR_ERROR;
+    }
+    return result;
+}
+
+int32_t U_EXPORT2 Transliterator::countAvailableSources(void) {
+    Mutex lock(registryMutex());
+    UErrorCode ec = U_ZERO_ERROR;
+    return HAVE_REGISTRY(ec) ? _countAvailableSources() : 0;
  }
  
-UnicodeString& Transliterator::getAvailableSource(int32_t index,
+UnicodeString& U_EXPORT2 Transliterator::getAvailableSource(int32_t index,
                                                    UnicodeString& result) {
-    umtx_init(&registryMutex);
-    Mutex lock(&registryMutex);
-    if (HAVE_REGISTRY) {
+    Mutex lock(registryMutex());
+    UErrorCode ec = U_ZERO_ERROR;
+    if (HAVE_REGISTRY(ec)) {
          _getAvailableSource(index, result);
      }
      return result;
  }
  
-int32_t Transliterator::countAvailableTargets(const UnicodeString& source) {
-    umtx_init(&registryMutex);
-    Mutex lock(&registryMutex);
-    return HAVE_REGISTRY ? _countAvailableTargets(source) : 0;
+int32_t U_EXPORT2 Transliterator::countAvailableTargets(const UnicodeString& source) {
+    Mutex lock(registryMutex());
+    UErrorCode ec = U_ZERO_ERROR;
+    return HAVE_REGISTRY(ec) ? _countAvailableTargets(source) : 0;
  }
  
-UnicodeString& Transliterator::getAvailableTarget(int32_t index,
+UnicodeString& U_EXPORT2 Transliterator::getAvailableTarget(int32_t index,
                                                    const UnicodeString& source,
                                                    UnicodeString& result) {
-    umtx_init(&registryMutex);
-    Mutex lock(&registryMutex);
-    if (HAVE_REGISTRY) {
+    Mutex lock(registryMutex());
+    UErrorCode ec = U_ZERO_ERROR;
+    if (HAVE_REGISTRY(ec)) {
          _getAvailableTarget(index, source, result);
      }
      return result;
  }
  
-int32_t Transliterator::countAvailableVariants(const UnicodeString& source,
+int32_t U_EXPORT2 Transliterator::countAvailableVariants(const UnicodeString& source,
                                                 const UnicodeString& target) {
-    umtx_init(&registryMutex);
-    Mutex lock(&registryMutex);
-    return HAVE_REGISTRY ? _countAvailableVariants(source, target) : 0;
+    Mutex lock(registryMutex());
+    UErrorCode ec = U_ZERO_ERROR;
+    return HAVE_REGISTRY(ec) ? _countAvailableVariants(source, target) : 0;
  }
  
-UnicodeString& Transliterator::getAvailableVariant(int32_t index,
+UnicodeString& U_EXPORT2 Transliterator::getAvailableVariant(int32_t index,
                                                     const UnicodeString& source,
                                                     const UnicodeString& target,
                                                     UnicodeString& result) {
-    umtx_init(&registryMutex);
-    Mutex lock(&registryMutex);
-    if (HAVE_REGISTRY) {
+    Mutex lock(registryMutex());
+    UErrorCode ec = U_ZERO_ERROR;
+    if (HAVE_REGISTRY(ec)) {
          _getAvailableVariant(index, source, target, result);
      }
      return result;
@@ -1315,18 +1450,16 @@ UChar Transliterator::filteredCharAt(const Replaceable& text, int32_t i) const {
   * and return TRUE.  If the registry cannot be initialized, return
   * FALSE (rare).
   *
- * IMPORTANT: Upon entry, registryMutex must be LOCKED.  The entirely
+ * IMPORTANT: Upon entry, registryMutex must be LOCKED.  The entire
   * initialization is done with the lock held.  There is NO REASON to
   * unlock, since no other thread that is waiting on the registryMutex
   * cannot itself proceed until the registry is initialized.
   */
-UBool Transliterator::initializeRegistry() {
+UBool Transliterator::initializeRegistry(UErrorCode &status) {
      if (registry != 0) {
          return TRUE;
      }
  
-    UErrorCode status = U_ZERO_ERROR;
-
      registry = new TransliteratorRegistry(status);
      if (registry == 0 || U_FAILURE(status)) {
          delete registry;
@@ -1335,71 +1468,88 @@ UBool Transliterator::initializeRegistry() {
      }
  
      /* The following code parses the index table located in
-     * icu/data/translit_index.txt.  The index is an n x 4 table
+     * icu/data/translit/root.txt.  The index is an n x 4 table
       * that follows this format:
-     *
-     *   <id>:file:<resource>:<direction>
-     *   <id>:internal:<resource>:<direction>
-     *   <id>:alias:<getInstanceArg>:
-     *  
+     *  <id>{
+     *      file{
+     *          resource{"<resource>"}
+     *          direction{"<direction>"}
+     *      }
+     *  }
+     *  <id>{
+     *      internal{
+     *          resource{"<resource>"}
+     *          direction{"<direction"}
+     *       }
+     *  }
+     *  <id>{
+     *      alias{"<getInstanceArg"}
+     *  }
       * <id> is the ID of the system transliterator being defined.  These
       * are public IDs enumerated by Transliterator.getAvailableIDs(),
       * unless the second field is "internal".
-     * 
+     *
       * <resource> is a ResourceReader resource name.  Currently these refer
       * to file names under com/ibm/text/resources.  This string is passed
       * directly to ResourceReader, together with <encoding>.
-     * 
+     *
       * <direction> is either "FORWARD" or "REVERSE".
-     * 
+     *
       * <getInstanceArg> is a string to be passed directly to
       * Transliterator.getInstance().  The returned Transliterator object
       * then has its ID changed to <id> and is returned.
       *
       * The extra blank field on "alias" lines is to make the array square.
       */
-    static const char translit_index[] = "translit_index";
-
-    UResourceBundle *bundle, *transIDs, *colBund;
-    bundle = ures_openDirect(0, translit_index, &status);
-    transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &status);
+    //static const char translit_index[] = "translit_index";
  
-    int32_t row, maxRows;
+    UResourceBundle *bundle = ures_open(U_ICUDATA_TRANSLIT, NULL/*open default locale*/, &status);
+    UResourceBundle *transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &status);
      if (U_SUCCESS(status)) {
-        maxRows = ures_getSize(transIDs);
+        UResourceBundle *colBund = NULL;
+        UResourceBundle* res = NULL;
+        int32_t row, maxRows = ures_getSize(transIDs);
          for (row = 0; row < maxRows; row++) {
-            colBund = ures_getByIndex(transIDs, row, 0, &status);
-
-            if (U_SUCCESS(status) && ures_getSize(colBund) == 4) {
-                UnicodeString id = ures_getUnicodeStringByIndex(colBund, 0, &status);
-                UChar type = ures_getUnicodeStringByIndex(colBund, 1, &status).charAt(0);
-                UnicodeString resString = ures_getUnicodeStringByIndex(colBund, 2, &status);
-
-                if (U_SUCCESS(status)) {
-                    switch (type) {
-                    case 0x66: // 'f'
-                    case 0x69: // 'i'
-                        // 'file' or 'internal';
-                        // row[2]=resource, row[3]=direction
-                        {
-                            UBool visible = (type == 0x0066 /*f*/);
-                            UTransDirection dir = 
-                                (ures_getUnicodeStringByIndex(colBund, 3, &status).charAt(0) ==
-                                 0x0046 /*F*/) ?
-                                UTRANS_FORWARD : UTRANS_REVERSE;
-                            registry->put(id, resString, dir, visible);
-                        }
-                        break;
-                    case 0x61: // 'a'
-                        // 'alias'; row[2]=createInstance argument
-                        registry->put(id, resString, TRUE);
-                        break;
+            colBund = ures_getByIndex(transIDs, row, colBund, &status);
+            if (U_FAILURE(status)) {
+                break;
+            }
+            const char *tridKey = ures_getKey(colBund);
+            if (tridKey == NULL || uprv_strstr(tridKey, "-t-") != NULL) {
+                continue; // Apple version should not get any of these, eliminated the root.txt entries
+            }
+            res = ures_getNextResource(colBund, res, &status);
+            if (U_FAILURE(status)) {
+                break;
+            }
+            UnicodeString trID(tridKey, -1, US_INV);
+            const char* typeStr = ures_getKey(res);
+            int32_t len = 0, dlen = 0;
+            UBool visible = FALSE;
+            const UChar *resString;
+            switch (typeStr[0]) {
+                case 'f': // "file"
+                    visible = TRUE;
+                    // FALLTHROUGH
+                case 'i': // "internal" => visible = FALSE
+                    // child resources are resource and direction
+                    {
+                        resString = ures_getStringByKey(res, "resource", &len, &status);
+                        const UChar* dirString = ures_getStringByKey(res, "direction", &dlen, &status);
+                        UTransDirection dir = (dlen <= 0 || dirString[0] ==  0x0046 /*F*/)? UTRANS_FORWARD : UTRANS_REVERSE;
+                        registry->put(trID, UnicodeString(TRUE, resString, len), dir, TRUE, visible, status);
                      }
-                }
+                    break;
+                case 'a': // "alias", string argument is alias
+                    resString = ures_getString(res, &len, &status);
+                    registry->put(trID, UnicodeString(TRUE, resString, len), TRUE, TRUE, status);
+                    break;
+                default: // do nothing
+                    break;
              }
-
-            ures_close(colBund);
          }
+        ures_close(res);
+        ures_close(colBund);
      }
  
      ures_close(transIDs);
@@ -1409,12 +1559,51 @@ UBool Transliterator::initializeRegistry() {
      // cache.  This is how new non-rule-based transliterators are
      // added to the system.
  
-    registry->put(new NullTransliterator(), TRUE);
-    registry->put(new LowercaseTransliterator(), TRUE);
-    registry->put(new UppercaseTransliterator(), TRUE);
-    registry->put(new TitlecaseTransliterator(), TRUE);
-    registry->put(new UnicodeNameTransliterator(), TRUE);
-    registry->put(new NameUnicodeTransliterator(), TRUE);
+    // This is to allow for null pointer check
+    NullTransliterator* tempNullTranslit = new NullTransliterator();
+    LowercaseTransliterator* tempLowercaseTranslit = new LowercaseTransliterator();
+    UppercaseTransliterator* tempUppercaseTranslit = new UppercaseTransliterator();
+    TitlecaseTransliterator* tempTitlecaseTranslit = new TitlecaseTransliterator();
+    UnicodeNameTransliterator* tempUnicodeTranslit = new UnicodeNameTransliterator();
+    NameUnicodeTransliterator* tempNameUnicodeTranslit = new NameUnicodeTransliterator();
+#if !UCONFIG_NO_BREAK_ITERATION
+     // TODO: could or should these transliterators be referenced polymorphically once constructed?
+     BreakTransliterator* tempBreakTranslit         = new BreakTransliterator();
+#endif
+    // Check for null pointers
+    if (tempNullTranslit == NULL || tempLowercaseTranslit == NULL || tempUppercaseTranslit == NULL ||
+        tempTitlecaseTranslit == NULL || tempUnicodeTranslit == NULL ||
+#if !UCONFIG_NO_BREAK_ITERATION
+        tempBreakTranslit == NULL ||
+#endif
+        tempNameUnicodeTranslit == NULL )
+    {
+        delete tempNullTranslit;
+        delete tempLowercaseTranslit;
+        delete tempUppercaseTranslit;
+        delete tempTitlecaseTranslit;
+        delete tempUnicodeTranslit;
+        delete tempNameUnicodeTranslit;
+#if !UCONFIG_NO_BREAK_ITERATION
+        delete tempBreakTranslit;
+#endif
+        // Since there was an error, remove registry
+        delete registry;
+        registry = NULL;
+
+        status = U_MEMORY_ALLOCATION_ERROR;
+        return 0;
+    }
+
+    registry->put(tempNullTranslit, TRUE, status);
+    registry->put(tempLowercaseTranslit, TRUE, status);
+    registry->put(tempUppercaseTranslit, TRUE, status);
+    registry->put(tempTitlecaseTranslit, TRUE, status);
+    registry->put(tempUnicodeTranslit, TRUE, status);
+    registry->put(tempNameUnicodeTranslit, TRUE, status);
+#if !UCONFIG_NO_BREAK_ITERATION
+    registry->put(tempBreakTranslit, FALSE, status);   // FALSE means invisible.
+#endif
  
      RemoveTransliterator::registerIDs(); // Must be within mutex
      EscapeTransliterator::registerIDs();
@@ -1422,33 +1611,34 @@ UBool Transliterator::initializeRegistry() {
      NormalizationTransliterator::registerIDs();
      AnyTransliterator::registerIDs();
  
-    _registerSpecialInverse(NullTransliterator::SHORT_ID,
-                            NullTransliterator::SHORT_ID, FALSE);
-    _registerSpecialInverse("Upper", "Lower", TRUE);
-    _registerSpecialInverse("Title", "Lower", FALSE);
+    _registerSpecialInverse(UNICODE_STRING_SIMPLE("Null"),
+                            UNICODE_STRING_SIMPLE("Null"), FALSE);
+    _registerSpecialInverse(UNICODE_STRING_SIMPLE("Upper"),
+                            UNICODE_STRING_SIMPLE("Lower"), TRUE);
+    _registerSpecialInverse(UNICODE_STRING_SIMPLE("Title"),
+                            UNICODE_STRING_SIMPLE("Lower"), FALSE);
  
-    ucln_i18n_registerCleanup();
+    ucln_i18n_registerCleanup(UCLN_I18N_TRANSLITERATOR, utrans_transliterator_cleanup);
  
      return TRUE;
  }
  
  U_NAMESPACE_END
  
-// Defined in ucln_in.h:
+// Defined in transreg.h:
  
  /**
   * Release all static memory held by transliterator.  This will
   * necessarily invalidate any rule-based transliterators held by the
   * user, because RBTs hold pointers to common data objects.
   */
-U_CFUNC UBool transliterator_cleanup(void) {
-    TitlecaseTransliterator::cleanup();
+U_CFUNC UBool utrans_transliterator_cleanup(void) {
+    U_NAMESPACE_USE
      TransliteratorIDParser::cleanup();
      if (registry) {
          delete registry;
          registry = NULL;
      }
-    umtx_destroy(&registryMutex);
      return TRUE;
  }