ICU-8.11.4.tar.gz

[apple/icu.git] / icuSources / i18n / name2uni.cpp
diff --git a/icuSources/i18n/name2uni.cpp b/icuSources/i18n/name2uni.cpp

index 8a6f2d9ca8e91962d09d68f1abcb0ba5d35827a7..d2b898d893270d5f045f6524b8a0f16fd6d4db22 100644 (file)
--- a/icuSources/i18n/name2uni.cpp
+++ b/icuSources/i18n/name2uni.cpp
@@ -1,6 +1,6 @@
  /*
  **********************************************************************
-*   Copyright (C) 2001, International Business Machines
+*   Copyright (C) 2001-2006, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  *   Date        Name        Description
@@ -18,25 +18,56 @@
  #include "name2uni.h"
  #include "cmemory.h"
  #include "uprops.h"
+#include "uinvchar.h"
  #include "util.h"
  
  U_NAMESPACE_BEGIN
  
-const char NameUnicodeTransliterator::fgClassID=0;
-
-const char NameUnicodeTransliterator::_ID[] = "Name-Any";
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(NameUnicodeTransliterator)
  
  static const UChar OPEN[] = {92,78,126,123,126,0}; // "\N~{~"
  static const UChar OPEN_DELIM  = 92;  // '\\' first char of OPEN
  static const UChar CLOSE_DELIM = 125; // '}'
  static const UChar SPACE       = 32;  // ' '
  
+U_CDECL_BEGIN
+
+// USetAdder implementation
+// Does not use uset.h to reduce code dependencies
+static void U_CALLCONV
+_set_add(USet *set, UChar32 c) {
+    ((UnicodeSet *)set)->add(c);
+}
+
+static void U_CALLCONV
+_set_addRange(USet *set, UChar32 start, UChar32 end) {
+    ((UnicodeSet *)set)->add(start, end);
+}
+
+static void U_CALLCONV
+_set_addString(USet *set, const UChar *str, int32_t length) {
+    ((UnicodeSet *)set)->add(UnicodeString((UBool)(length<0), str, length));
+}
+
+U_CDECL_END
+
  /**
   * Constructs a transliterator with the default delimiters '{' and
   * '}'.
   */
  NameUnicodeTransliterator::NameUnicodeTransliterator(UnicodeFilter* adoptedFilter) :
-    Transliterator(_ID, adoptedFilter) {
+    Transliterator(UNICODE_STRING("Name-Any", 8), adoptedFilter) {
+
+    UnicodeSet *legalPtr = &legal;
+    // Get the legal character set
+    USetAdder sa = {
+        (USet *)legalPtr, // USet* == UnicodeSet*
+        _set_add,
+        _set_addRange,
+        _set_addString,
+        NULL // don't need remove()
+    };
+    uprv_getCharNameCharacters(&sa);
  }
  
  /**
@@ -48,7 +79,7 @@ NameUnicodeTransliterator::~NameUnicodeTransliterator() {}
   * Copy constructor.
   */
  NameUnicodeTransliterator::NameUnicodeTransliterator(const NameUnicodeTransliterator& o) :
-    Transliterator(o) {}
+    Transliterator(o), legal(o.legal) {}
  
  /**
   * Assignment operator.
@@ -56,6 +87,7 @@ NameUnicodeTransliterator::NameUnicodeTransliterator(const NameUnicodeTransliter
  NameUnicodeTransliterator& NameUnicodeTransliterator::operator=(
                               const NameUnicodeTransliterator& o) {
      Transliterator::operator=(o);
+    // not necessary: the legal sets should all be the same -- legal=o.legal;
      return *this;
  }
  
@@ -92,10 +124,6 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos
      UnicodeString openPat(TRUE, OPEN, -1);
      UnicodeString str, name;
  
-    // Get the legal character set
-    UnicodeSet legal;
-    uprv_getCharNameCharacters((USet*) &legal); // USet* == UnicodeSet*
-    
      int32_t cursor = offsets.start;
      int32_t limit = offsets.limit;
  
@@ -147,7 +175,6 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos
              }
  
              if (c == CLOSE_DELIM) {
-
                  int32_t len = name.length();
  
                  // Delete trailing space, if any
@@ -156,27 +183,29 @@ void NameUnicodeTransliterator::handleTransliterate(Replaceable& text, UTransPos
                      --len;
                  }
  
-                name.extract(0, len, cbuf, "");
+                if (uprv_isInvariantUString(name.getBuffer(), len)) {
+                    name.extract(0, len, cbuf, maxLen, US_INV);
  
-                UErrorCode status = U_ZERO_ERROR;
-                c = u_charFromName(U_EXTENDED_CHAR_NAME, cbuf, &status);
-                if (U_SUCCESS(status)) {
-                    // Lookup succeeded
+                    UErrorCode status = U_ZERO_ERROR;
+                    c = u_charFromName(U_EXTENDED_CHAR_NAME, cbuf, &status);
+                    if (U_SUCCESS(status)) {
+                        // Lookup succeeded
  
-                    // assert(UTF_CHAR_LENGTH(CLOSE_DELIM) == 1);
-                    cursor++; // advance over CLOSE_DELIM
+                        // assert(UTF_CHAR_LENGTH(CLOSE_DELIM) == 1);
+                        cursor++; // advance over CLOSE_DELIM
  
-                    str.truncate(0);
-                    str.append(c);
-                    text.handleReplaceBetween(openPos, cursor, str);
+                        str.truncate(0);
+                        str.append(c);
+                        text.handleReplaceBetween(openPos, cursor, str);
  
-                    // Adjust indices for the change in the length of
-                    // the string.  Do not assume that str.length() ==
-                    // 1, in case of surrogates.
-                    int32_t delta = cursor - openPos - str.length();
-                    cursor -= delta;
-                    limit -= delta;
-                    // assert(cursor == openPos + str.length());
+                        // Adjust indices for the change in the length of
+                        // the string.  Do not assume that str.length() ==
+                        // 1, in case of surrogates.
+                        int32_t delta = cursor - openPos - str.length();
+                        cursor -= delta;
+                        limit -= delta;
+                        // assert(cursor == openPos + str.length());
+                    }
                  }
                  // If the lookup failed, we leave things as-is and
                  // still switch to mode 0 and continue.