]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/i18n/anytrans.cpp
ICU-64252.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / anytrans.cpp
index cdfea1b4ff0ee602bf49295bc1dcb8b579f4940f..6e382b824b95b7cd6ba8932c3b18c2cee0756c19 100644 (file)
@@ -1,6 +1,8 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
 /*
 *****************************************************************
 /*
 *****************************************************************
-* Copyright (c) 2002-2003, International Business Machines Corporation
+* Copyright (c) 2002-2014, International Business Machines Corporation
 * and others.  All Rights Reserved.
 *****************************************************************
 * Date        Name        Description
 * and others.  All Rights Reserved.
 *****************************************************************
 * Date        Name        Description
 
 #include "unicode/uobject.h"
 #include "unicode/uscript.h"
 
 #include "unicode/uobject.h"
 #include "unicode/uscript.h"
-#include "nultrans.h"
+
 #include "anytrans.h"
 #include "anytrans.h"
-#include "uvector.h"
-#include "tridpars.h"
 #include "hash.h"
 #include "hash.h"
+#include "mutex.h"
+#include "nultrans.h"
+#include "putilimp.h"
+#include "tridpars.h"
+#include "uinvchar.h"
+#include "uvector.h"
 
 //------------------------------------------------------------
 // Constants
 
 static const UChar TARGET_SEP = 45; // '-'
 static const UChar VARIANT_SEP = 47; // '/'
 
 //------------------------------------------------------------
 // Constants
 
 static const UChar TARGET_SEP = 45; // '-'
 static const UChar VARIANT_SEP = 47; // '/'
-static const UChar ANY[] = {65,110,121,0}; // "Any"
+static const UChar ANY[] = {0x41,0x6E,0x79,0}; // "Any"
 static const UChar NULL_ID[] = {78,117,108,108,0}; // "Null"
 static const UChar NULL_ID[] = {78,117,108,108,0}; // "Null"
-static const UChar LATIN_PIVOT[] = {45,76,97,116,105,110,59,76,97,116,105,110,45,0}; // "-Latin;Latin-"
+static const UChar LATIN_PIVOT[] = {0x2D,0x4C,0x61,0x74,0x6E,0x3B,0x4C,0x61,0x74,0x6E,0x2D,0}; // "-Latn;Latn-"
+
+// initial size for an Any-XXXX transform's cache of script-XXXX transforms
+// (will grow as necessary, but we don't expect to have source text with more than 7 scripts)
+#define ANY_TRANS_CACHE_INIT_SIZE 7
 
 //------------------------------------------------------------
 
 
 //------------------------------------------------------------
 
@@ -37,7 +47,7 @@ U_CDECL_BEGIN
  */
 static void U_CALLCONV
 _deleteTransliterator(void *obj) {
  */
 static void U_CALLCONV
 _deleteTransliterator(void *obj) {
-    delete (Transliterator*) obj;    
+    delete (icu::Transliterator*) obj;
 }
 U_CDECL_END
 
 }
 U_CDECL_END
 
@@ -83,7 +93,7 @@ public:
      * The end of the run, exclusive, valid after next() returns.
      */
     int32_t limit;
      * The end of the run, exclusive, valid after next() returns.
      */
     int32_t limit;
-    
+
     /**
      * Constructs a run iterator over the given text from start
      * (inclusive) to limit (exclusive).
     /**
      * Constructs a run iterator over the given text from start
      * (inclusive) to limit (exclusive).
@@ -170,7 +180,7 @@ void ScriptRunIterator::adjustLimit(int32_t delta) {
 //------------------------------------------------------------
 // AnyTransliterator
 
 //------------------------------------------------------------
 // AnyTransliterator
 
-const char AnyTransliterator::fgClassID=0;
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(AnyTransliterator)
 
 AnyTransliterator::AnyTransliterator(const UnicodeString& id,
                                      const UnicodeString& theTarget,
 
 AnyTransliterator::AnyTransliterator(const UnicodeString& id,
                                      const UnicodeString& theTarget,
@@ -178,9 +188,12 @@ AnyTransliterator::AnyTransliterator(const UnicodeString& id,
                                      UScriptCode theTargetScript,
                                      UErrorCode& ec) :
     Transliterator(id, NULL),
                                      UScriptCode theTargetScript,
                                      UErrorCode& ec) :
     Transliterator(id, NULL),
-    targetScript(theTargetScript) 
+    targetScript(theTargetScript)
 {
 {
-    cache = uhash_open(uhash_hashLong, uhash_compareLong, &ec);
+    cache = uhash_openSize(uhash_hashLong, uhash_compareLong, NULL, ANY_TRANS_CACHE_INIT_SIZE, &ec);
+    if (U_FAILURE(ec)) {
+        return;
+    }
     uhash_setValueDeleter(cache, _deleteTransliterator);
 
     target = theTarget;
     uhash_setValueDeleter(cache, _deleteTransliterator);
 
     target = theTarget;
@@ -203,7 +216,10 @@ AnyTransliterator::AnyTransliterator(const AnyTransliterator& o) :
 {
     // Don't copy the cache contents
     UErrorCode ec = U_ZERO_ERROR;
 {
     // Don't copy the cache contents
     UErrorCode ec = U_ZERO_ERROR;
-    cache = uhash_open(uhash_hashLong, uhash_compareLong, &ec);
+    cache = uhash_openSize(uhash_hashLong, uhash_compareLong, NULL, ANY_TRANS_CACHE_INIT_SIZE, &ec);
+    if (U_FAILURE(ec)) {
+        return;
+    }
     uhash_setValueDeleter(cache, _deleteTransliterator);
 }
 
     uhash_setValueDeleter(cache, _deleteTransliterator);
 }
 
@@ -231,7 +247,7 @@ void AnyTransliterator::handleTransliterate(Replaceable& text, UTransPosition& p
         // Try to instantiate transliterator from it.scriptCode to
         // our target or target/variant
         Transliterator* t = getTransliterator(it.scriptCode);
         // Try to instantiate transliterator from it.scriptCode to
         // our target or target/variant
         Transliterator* t = getTransliterator(it.scriptCode);
-       
+
         if (t == NULL) {
             // We have no transliterator.  Do nothing, but keep
             // pos.start up to date.
         if (t == NULL) {
             // We have no transliterator.  Do nothing, but keep
             // pos.start up to date.
@@ -243,7 +259,7 @@ void AnyTransliterator::handleTransliterate(Replaceable& text, UTransPosition& p
         // a non-incremental transliteration.  Otherwise do an
         // incremental one.
         UBool incremental = isIncremental && (it.limit >= allLimit);
         // a non-incremental transliteration.  Otherwise do an
         // incremental one.
         UBool incremental = isIncremental && (it.limit >= allLimit);
-        
+
         pos.start = uprv_max(allStart, it.start);
         pos.limit = uprv_min(allLimit, it.limit);
         int32_t limit = pos.limit;
         pos.start = uprv_max(allStart, it.start);
         pos.limit = uprv_min(allLimit, it.limit);
         int32_t limit = pos.limit;
@@ -267,20 +283,24 @@ Transliterator* AnyTransliterator::getTransliterator(UScriptCode source) const {
         return NULL;
     }
 
         return NULL;
     }
 
-    Transliterator* t = (Transliterator*) uhash_iget(cache, (int32_t) source);
+    Transliterator* t = NULL;
+    {
+        Mutex m(NULL);
+        t = (Transliterator*) uhash_iget(cache, (int32_t) source);
+    }
     if (t == NULL) {
         UErrorCode ec = U_ZERO_ERROR;
     if (t == NULL) {
         UErrorCode ec = U_ZERO_ERROR;
-        UnicodeString sourceName(uscript_getName(source), "");
+        UnicodeString sourceName(uscript_getShortName(source), -1, US_INV);
         UnicodeString id(sourceName);
         id.append(TARGET_SEP).append(target);
         UnicodeString id(sourceName);
         id.append(TARGET_SEP).append(target);
-        
+
         t = Transliterator::createInstance(id, UTRANS_FORWARD, ec);
         if (U_FAILURE(ec) || t == NULL) {
             delete t;
         t = Transliterator::createInstance(id, UTRANS_FORWARD, ec);
         if (U_FAILURE(ec) || t == NULL) {
             delete t;
-            
+
             // Try to pivot around Latin, our most common script
             id = sourceName;
             // Try to pivot around Latin, our most common script
             id = sourceName;
-            id.append(LATIN_PIVOT).append(target);
+            id.append(LATIN_PIVOT, -1).append(target);
             t = Transliterator::createInstance(id, UTRANS_FORWARD, ec);
             if (U_FAILURE(ec) || t == NULL) {
                 delete t;
             t = Transliterator::createInstance(id, UTRANS_FORWARD, ec);
             if (U_FAILURE(ec) || t == NULL) {
                 delete t;
@@ -289,24 +309,42 @@ Transliterator* AnyTransliterator::getTransliterator(UScriptCode source) const {
         }
 
         if (t != NULL) {
         }
 
         if (t != NULL) {
-            uhash_iput(cache, (int32_t) source, t, &ec);
+            Transliterator *rt = NULL;
+            {
+                Mutex m(NULL);
+                rt = static_cast<Transliterator *> (uhash_iget(cache, (int32_t) source));
+                if (rt == NULL) {
+                    // Common case, no race to cache this new transliterator.
+                    uhash_iput(cache, (int32_t) source, t, &ec);
+                } else {
+                    // Race case, some other thread beat us to caching this transliterator.
+                    Transliterator *temp = rt;
+                    rt = t;    // Our newly created transliterator that lost the race & now needs deleting.
+                    t  = temp; // The transliterator from the cache that we will return.
+                }
+            }
+            delete rt;    // will be non-null only in case of races.
         }
     }
         }
     }
-
     return t;
 }
 
 /**
  * Return the script code for a given name, or -1 if not found.
  */
     return t;
 }
 
 /**
  * Return the script code for a given name, or -1 if not found.
  */
-UScriptCode AnyTransliterator::scriptNameToCode(const UnicodeString& name) {
+static UScriptCode scriptNameToCode(const UnicodeString& name) {
     char buf[128];
     UScriptCode code;
     UErrorCode ec = U_ZERO_ERROR;
     char buf[128];
     UScriptCode code;
     UErrorCode ec = U_ZERO_ERROR;
+    int32_t nameLen = name.length();
+    UBool isInvariant = uprv_isInvariantUString(name.getBuffer(), nameLen);
 
 
-    name.extract(0, 128, buf, 128, "");
-    if (uscript_getCode(buf, &code, 1, &ec) != 1 ||
-        U_FAILURE(ec)) {
+    if (isInvariant) {
+        name.extract(0, nameLen, buf, (int32_t)sizeof(buf), US_INV);
+        buf[127] = 0;   // Make sure that we NULL terminate the string.
+    }
+    if (!isInvariant || uscript_getCode(buf, &code, 1, &ec) != 1 || U_FAILURE(ec))
+    {
         code = USCRIPT_INVALID_CODE;
     }
     return code;
         code = USCRIPT_INVALID_CODE;
     }
     return code;
@@ -319,8 +357,8 @@ UScriptCode AnyTransliterator::scriptNameToCode(const UnicodeString& name) {
  */
 void AnyTransliterator::registerIDs() {
 
  */
 void AnyTransliterator::registerIDs() {
 
-    UErrorCode ec;
-    Hashtable seen(TRUE);
+    UErrorCode ec = U_ZERO_ERROR;
+    Hashtable seen(TRUE, ec);
 
     int32_t sourceCount = Transliterator::_countAvailableSources();
     for (int32_t s=0; s<sourceCount; ++s) {
 
     int32_t sourceCount = Transliterator::_countAvailableSources();
     for (int32_t s=0; s<sourceCount; ++s) {
@@ -328,7 +366,7 @@ void AnyTransliterator::registerIDs() {
         Transliterator::_getAvailableSource(s, source);
 
         // Ignore the "Any" source
         Transliterator::_getAvailableSource(s, source);
 
         // Ignore the "Any" source
-        if (source.caseCompare(ANY, 0 /*U_FOLD_CASE_DEFAULT*/) == 0) continue;
+        if (source.caseCompare(ANY, 3, 0 /*U_FOLD_CASE_DEFAULT*/) == 0) continue;
 
         int32_t targetCount = Transliterator::_countAvailableTargets(source);
         for (int32_t t=0; t<targetCount; ++t) {
 
         int32_t targetCount = Transliterator::_countAvailableTargets(source);
         for (int32_t t=0; t<targetCount; ++t) {
@@ -339,7 +377,7 @@ void AnyTransliterator::registerIDs() {
             if (seen.geti(target) != 0) continue;
             ec = U_ZERO_ERROR;
             seen.puti(target, 1, ec);
             if (seen.geti(target) != 0) continue;
             ec = U_ZERO_ERROR;
             seen.puti(target, 1, ec);
-            
+
             // Get the script code for the target.  If not a script, ignore.
             UScriptCode targetScript = scriptNameToCode(target);
             if (targetScript == USCRIPT_INVALID_CODE) continue;
             // Get the script code for the target.  If not a script, ignore.
             UScriptCode targetScript = scriptNameToCode(target);
             if (targetScript == USCRIPT_INVALID_CODE) continue;
@@ -349,17 +387,17 @@ void AnyTransliterator::registerIDs() {
             for (int32_t v=0; v<variantCount; ++v) {
                 UnicodeString variant;
                 Transliterator::_getAvailableVariant(v, source, target, variant);
             for (int32_t v=0; v<variantCount; ++v) {
                 UnicodeString variant;
                 Transliterator::_getAvailableVariant(v, source, target, variant);
-                
+
                 UnicodeString id;
                 UnicodeString id;
-                TransliteratorIDParser::STVtoID(ANY, target, variant, id);
+                TransliteratorIDParser::STVtoID(UnicodeString(TRUE, ANY, 3), target, variant, id);
                 ec = U_ZERO_ERROR;
                 ec = U_ZERO_ERROR;
-                AnyTransliterator* t = new AnyTransliterator(id, target, variant,
+                AnyTransliterator* tl = new AnyTransliterator(id, target, variant,
                                                              targetScript, ec);
                 if (U_FAILURE(ec)) {
                                                              targetScript, ec);
                 if (U_FAILURE(ec)) {
-                    delete t;
+                    delete tl;
                 } else {
                 } else {
-                    Transliterator::_registerInstance(t);
-                    Transliterator::_registerSpecialInverse(target, NULL_ID, FALSE);
+                    Transliterator::_registerInstance(tl);
+                    Transliterator::_registerSpecialInverse(target, UnicodeString(TRUE, NULL_ID, 4), FALSE);
                 }
             }
         }
                 }
             }
         }