ICU-400.37.tar.gz

[apple/icu.git] / icuSources / common / unames.c
diff --git a/icuSources/common/unames.c b/icuSources/common/unames.c

index 9d4fe257e9373585912ec792406a0536f8ef641d..c12033fa47dae9171fd8fc750b28eb68bf2c4327 100644 (file)
--- a/icuSources/common/unames.c
+++ b/icuSources/common/unames.c
@@ -1,8 +1,7 @@
-
  /*
  ******************************************************************************
  *
-*   Copyright (C) 1999-2003, International Business Machines
+*   Copyright (C) 1999-2007, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  ******************************************************************************
@@ -15,20 +14,16 @@
  *   created by: Markus W. Scherer
  */
  
-/* set import/export definitions */
-#ifndef U_COMMON_IMPLEMENTATION
-#   define U_COMMON_IMPLEMENTATION
-#endif
-
  #include "unicode/utypes.h"
+#include "unicode/putil.h"
  #include "unicode/uchar.h"
  #include "unicode/udata.h"
-#include "unicode/uset.h"
  #include "ustr_imp.h"
  #include "umutex.h"
  #include "cmemory.h"
  #include "cstring.h"
  #include "ucln_cmn.h"
+#include "udataswp.h"
  #include "uprops.h"
  
  /* prototypes ------------------------------------------------------------- */
@@ -70,626 +65,201 @@ static UErrorCode gLoadErrorCode=U_ZERO_ERROR;
  
  /*
   * Maximum length of character names (regular & 1.0).
- * Maximum length of ISO comments.
   */
-static int32_t gMaxNameLength=0, gMaxISOCommentLength=0;
+static int32_t gMaxNameLength=0;
  
  /*
   * Set of chars used in character names (regular & 1.0).
- * Set of chars used in ISO comments.
   * Chars are platform-dependent (can be EBCDIC).
   */
-static uint32_t gNameSet[8]={ 0 }, gISOCommentSet[8]={ 0 };
-
-static UBool
-isDataLoaded(UErrorCode *pErrorCode);
-
-static UBool U_CALLCONV
-isAcceptable(void *context,
-             const char *type, const char *name,
-             const UDataInfo *pInfo);
-
-static Group *
-getGroup(UCharNames *names, uint32_t code);
-
-static uint16_t
-getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
-        char *buffer, uint16_t bufferLength);
-
-static const uint8_t *
-expandGroupLengths(const uint8_t *s,
-                   uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]);
+static uint32_t gNameSet[8]={ 0 };
  
-static uint16_t
-expandGroupName(UCharNames *names, Group *group,
-                uint16_t lineNumber, UCharNameChoice nameChoice,
-                char *buffer, uint16_t bufferLength);
-
-static uint16_t
-expandName(UCharNames *names,
-           const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
-           char *buffer, uint16_t bufferLength);
-
-static UBool
-compareName(UCharNames *names,
-            const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
-            const char *otherName);
+#define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT
+#define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1
+#define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2
  
-static UBool
-enumGroupNames(UCharNames *names, Group *group,
-               UChar32 start, UChar32 end,
-               UEnumCharNamesFn *fn, void *context,
-               UCharNameChoice nameChoice);
+#define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)
  
-static UBool
-enumExtNames(UChar32 start, UChar32 end,
-             UEnumCharNamesFn *fn, void *context);
+static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {
+    "unassigned",
+    "uppercase letter",
+    "lowercase letter",
+    "titlecase letter",
+    "modifier letter",
+    "other letter",
+    "non spacing mark",
+    "enclosing mark",
+    "combining spacing mark",
+    "decimal digit number",
+    "letter number",
+    "other number",
+    "space separator",
+    "line separator",
+    "paragraph separator",
+    "control",
+    "format",
+    "private use area",
+    "surrogate",
+    "dash punctuation",   
+    "start punctuation",
+    "end punctuation",
+    "connector punctuation",
+    "other punctuation",
+    "math symbol",
+    "currency symbol",
+    "modifier symbol",
+    "other symbol",
+    "initial punctuation",
+    "final punctuation",
+    "noncharacter",
+    "lead surrogate",
+    "trail surrogate"
+};
  
-static UBool
-enumNames(UCharNames *names,
-          UChar32 start, UChar32 limit,
-          UEnumCharNamesFn *fn, void *context,
-          UCharNameChoice nameChoice);
+/* implementation ----------------------------------------------------------- */
  
-static uint16_t
-getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
-        char *buffer, uint16_t bufferLength);
+static UBool U_CALLCONV unames_cleanup(void)
+{
+    if(uCharNamesData) {
+        udata_close(uCharNamesData);
+        uCharNamesData = NULL;
+    }
+    if(uCharNames) {
+        uCharNames = NULL;
+    }
+    gMaxNameLength=0;
+    return TRUE;
+}
  
-static uint16_t
-writeFactorSuffix(const uint16_t *factors, uint16_t count,
-                  const char *s, /* suffix elements */
-                  uint32_t code,
-                  uint16_t indexes[8], /* output fields from here */
-                  const char *elementBases[8], const char *elements[8],
-                  char *buffer, uint16_t bufferLength);
+static UBool U_CALLCONV
+isAcceptable(void *context,
+             const char *type, const char *name,
+             const UDataInfo *pInfo) {
+    return (UBool)(
+        pInfo->size>=20 &&
+        pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
+        pInfo->charsetFamily==U_CHARSET_FAMILY &&
+        pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
+        pInfo->dataFormat[1]==0x6e &&
+        pInfo->dataFormat[2]==0x61 &&
+        pInfo->dataFormat[3]==0x6d &&
+        pInfo->formatVersion[0]==1);
+}
  
  static UBool
-enumAlgNames(AlgorithmicRange *range,
-             UChar32 start, UChar32 limit,
-             UEnumCharNamesFn *fn, void *context,
-             UCharNameChoice nameChoice);
-
-static UChar32
-findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName);
-
-static uint16_t 
-getExtName(uint32_t code, char *buffer, uint16_t bufferLength);
+isDataLoaded(UErrorCode *pErrorCode) {
+    /* load UCharNames from file if necessary */
+    UBool isCached;
  
-#define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT
-#define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1
-#define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2
+    /* do this because double-checked locking is broken */
+    UMTX_CHECK(NULL, (uCharNames!=NULL), isCached);
  
-#define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)
+    if(!isCached) {
+        UCharNames *names;
+        UDataMemory *data;
  
-static const char * const
-charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT];
+        /* check error code from previous attempt */
+        if(U_FAILURE(gLoadErrorCode)) {
+            *pErrorCode=gLoadErrorCode;
+            return FALSE;
+        }
  
-static uint8_t
-getCharCat(UChar32 cp);
+        /* open the data outside the mutex block */
+        data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
+        if(U_FAILURE(*pErrorCode)) {
+            gLoadErrorCode=*pErrorCode;
+            return FALSE;
+        }
  
-static const char *
-getCharCatName(UChar32 cp);
- 
-/* public API --------------------------------------------------------------- */
+        names=(UCharNames *)udata_getMemory(data);
  
-U_CAPI int32_t U_EXPORT2
-u_charName(UChar32 code, UCharNameChoice nameChoice,
-           char *buffer, int32_t bufferLength,
-           UErrorCode *pErrorCode) {
-    AlgorithmicRange *algRange;
-    uint32_t *p;
-    uint32_t i;
-    int32_t length;
+        /* in the mutex block, set the data for this process */
+        {
+            umtx_lock(NULL);
+            if(uCharNames==NULL) {
+                uCharNames=names;
+                uCharNamesData=data;
+                data=NULL;
+                names=NULL;
+                ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
+            }
+            umtx_unlock(NULL);
+        }
  
-    /* check the argument values */
-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
-        return 0;
-    } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT ||
-              bufferLength<0 || (bufferLength>0 && buffer==NULL)
-    ) {
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
-        return 0;
+        /* if a different thread set it first, then close the extra data */
+        if(data!=NULL) {
+            udata_close(data); /* NULL if it was set correctly */
+        }
      }
+    return TRUE;
+}
  
-    if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
-        return u_terminateChars(buffer, bufferLength, 0, pErrorCode);
-    }
+#define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \
+    if((bufferLength)>0) { \
+        *(buffer)++=c; \
+        --(bufferLength); \
+    } \
+    ++(bufferPos); \
+}
  
-    length=0;
+#define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT
  
-    /* try algorithmic names first */
-    p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
-    i=*p;
-    algRange=(AlgorithmicRange *)(p+1);
-    while(i>0) {
-        if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {
-            length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
-            break;
-        }
-        algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
-        --i;
-    }
+/*
+ * Important: expandName() and compareName() are almost the same -
+ * apply fixes to both.
+ *
+ * UnicodeData.txt uses ';' as a field separator, so no
+ * field can contain ';' as part of its contents.
+ * In unames.dat, it is marked as token[';']==-1 only if the
+ * semicolon is used in the data file - which is iff we
+ * have Unicode 1.0 names or ISO comments.
+ * So, it will be token[';']==-1 if we store U1.0 names/ISO comments
+ * although we know that it will never be part of a name.
+ */
+static uint16_t
+expandName(UCharNames *names,
+           const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
+           char *buffer, uint16_t bufferLength) {
+    uint16_t *tokens=(uint16_t *)names+8;
+    uint16_t token, tokenCount=*tokens++, bufferPos=0;
+    uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
+    uint8_t c;
  
-    if(i==0) {
-        if (nameChoice == U_EXTENDED_CHAR_NAME) {
-            length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);
-            if (!length) {
-                /* extended character name */
-                length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);
+    if(nameChoice==U_UNICODE_10_CHAR_NAME || nameChoice==U_ISO_COMMENT) {
+        /*
+         * skip the modern name if it is not requested _and_
+         * if the semicolon byte value is a character, not a token number
+         */
+        if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
+            while(nameLength>0) {
+                --nameLength;
+                if(*name++==';') {
+                    break;
+                }
+            }
+            if(nameChoice==U_ISO_COMMENT) {
+                /* skip the Unicode 1.0 name as well to get the ISO comment */
+                while(nameLength>0) {
+                    --nameLength;
+                    if(*name++==';') {
+                        break;
+                    }
+                }
              }
          } else {
-            /* normal character name */
-            length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
+            /*
+             * the semicolon byte value is a token number, therefore
+             * only modern names are stored in unames.dat and there is no
+             * such requested Unicode 1.0 name here
+             */
+            nameLength=0;
          }
      }
  
-    return u_terminateChars(buffer, bufferLength, length, pErrorCode);
-}
-
-#define _U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT
-
-U_CAPI int32_t U_EXPORT2
-u_getISOComment(UChar32 c,
-                char *dest, int32_t destCapacity,
-                UErrorCode *pErrorCode) {
-    int32_t length;
-
-    /* check the argument values */
-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
-        return 0;
-    } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
-        return 0;
-    }
-
-    if((uint32_t)c>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
-        return u_terminateChars(dest, destCapacity, 0, pErrorCode);
-    }
-
-    /* the ISO comment is stored like a normal character name */
-    length=getName(uCharNames, (uint32_t)c, _U_ISO_COMMENT, dest, (uint16_t)destCapacity);
-    return u_terminateChars(dest, destCapacity, length, pErrorCode);
-}
-
-U_CAPI UChar32 U_EXPORT2
-u_charFromName(UCharNameChoice nameChoice,
-               const char *name,
-               UErrorCode *pErrorCode) {
-    char upper[120], lower[120];
-    FindName findName;
-    AlgorithmicRange *algRange;
-    uint32_t *p;
-    uint32_t i;
-    UChar32 cp = 0;
-    char c0;
-    UChar32 error = 0xffff;     /* Undefined, but use this for backwards compatibility. */
-
-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
-        return error;
-    }
-
-    if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) {
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
-        return error;
-    }
-
-    if(!isDataLoaded(pErrorCode)) {
-        return error;
-    }
-
-    /* construct the uppercase and lowercase of the name first */
-    for(i=0; i<sizeof(upper); ++i) {
-        if((c0=*name++)!=0) {
-            upper[i]=uprv_toupper(c0);
-            lower[i]=uprv_tolower(c0);
-        } else {
-            upper[i]=lower[i]=0;
-            break;
-        }
-    }
-    if(i==sizeof(upper)) {
-        /* name too long, there is no such character */
-        *pErrorCode = U_ILLEGAL_CHAR_FOUND;
-        return error;
-    }
-
-    /* try extended names first */
-    if (lower[0] == '<') {
-        if (nameChoice == U_EXTENDED_CHAR_NAME) {
-            if (lower[--i] == '>') {
-                for (--i; lower[i] && lower[i] != '-'; --i);
-
-                if (lower[i] == '-') { /* We've got a category. */
-                    uint32_t cIdx;
-
-                    lower[i] = 0;
-
-                    for (++i; lower[i] != '>'; ++i) {
-                        if (lower[i] >= '0' && lower[i] <= '9') {
-                            cp = (cp << 4) + lower[i] - '0';
-                        } else if (lower[i] >= 'a' && lower[i] <= 'f') {
-                            cp = (cp << 4) + lower[i] - 'a' + 10;
-                        } else {
-                            *pErrorCode = U_ILLEGAL_CHAR_FOUND;
-                            return error;
-                        }
-                    }
-
-                    /* Now validate the category name.
-                       We could use a binary search, or a trie, if
-                       we really wanted to. */
-
-                    for (lower[i] = 0, cIdx = 0; cIdx < LENGTHOF(charCatNames); ++cIdx) {
-
-                        if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
-                            if (getCharCat(cp) == cIdx) {
-                                return cp;
-                            }
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-
-        *pErrorCode = U_ILLEGAL_CHAR_FOUND;
-        return error;
-    }
-
-    /* try algorithmic names now */
-    p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
-    i=*p;
-    algRange=(AlgorithmicRange *)(p+1);
-    while(i>0) {
-        if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) {
-            return cp;
-        }
-        algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
-        --i;
-    }
-
-    /* normal character name */
-    findName.otherName=upper;
-    findName.code=error;
-    enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice);
-    if (findName.code == error) {
-         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
-    }
-    return findName.code;
-}
-
-U_CAPI void U_EXPORT2
-u_enumCharNames(UChar32 start, UChar32 limit,
-                UEnumCharNamesFn *fn,
-                void *context,
-                UCharNameChoice nameChoice,
-                UErrorCode *pErrorCode) {
-    AlgorithmicRange *algRange;
-    uint32_t *p;
-    uint32_t i;
-
-    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
-        return;
-    }
-
-    if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) {
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
-        return;
-    }
-
-    if((uint32_t) limit > UCHAR_MAX_VALUE + 1) {
-        limit = UCHAR_MAX_VALUE + 1;
-    }
-    if((uint32_t)start>=(uint32_t)limit) {
-        return;
-    }
-
-    if(!isDataLoaded(pErrorCode)) {
-        return;
-    }
-
-    /* interleave the data-driven ones with the algorithmic ones */
-    /* iterate over all algorithmic ranges; assume that they are in ascending order */
-    p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
-    i=*p;
-    algRange=(AlgorithmicRange *)(p+1);
-    while(i>0) {
-        /* enumerate the character names before the current algorithmic range */
-        /* here: start<limit */
-        if((uint32_t)start<algRange->start) {
-            if((uint32_t)limit<=algRange->start) {
-                enumNames(uCharNames, start, limit, fn, context, nameChoice);
-                return;
-            }
-            if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {
-                return;
-            }
-            start=(UChar32)algRange->start;
-        }
-        /* enumerate the character names in the current algorithmic range */
-        /* here: algRange->start<=start<limit */
-        if((uint32_t)start<=algRange->end) {
-            if((uint32_t)limit<=(algRange->end+1)) {
-                enumAlgNames(algRange, start, limit, fn, context, nameChoice);
-                return;
-            }
-            if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) {
-                return;
-            }
-            start=(UChar32)algRange->end+1;
-        }
-        /* continue to the next algorithmic range (here: start<limit) */
-        algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
-        --i;
-    }
-    /* enumerate the character names after the last algorithmic range */
-    enumNames(uCharNames, start, limit, fn, context, nameChoice);
-}
-
-/* implementation ----------------------------------------------------------- */
-
-UBool
-unames_cleanup()
-{
-    if(uCharNamesData) {
-        udata_close(uCharNamesData);
-        uCharNamesData = NULL;
-    }
-    if(uCharNames) {
-        uCharNames = NULL;
-    }
-    gMaxNameLength=0;
-    return TRUE;
-}
-
-static UBool
-isDataLoaded(UErrorCode *pErrorCode) {
-    /* load UCharNames from file if necessary */
-    UBool isCached;
-
-    /* do this because double-checked locking is broken */
-    umtx_lock(NULL);
-    isCached=uCharNames!=NULL;
-    umtx_unlock(NULL);
-
-    if(!isCached) {
-        UCharNames *names;
-        UDataMemory *data;
-
-        /* check error code from previous attempt */
-        if(U_FAILURE(gLoadErrorCode)) {
-            *pErrorCode=gLoadErrorCode;
-            return FALSE;
-        }
-
-        /* open the data outside the mutex block */
-        data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
-        if(U_FAILURE(*pErrorCode)) {
-            gLoadErrorCode=*pErrorCode;
-            return FALSE;
-        }
-
-        names=(UCharNames *)udata_getMemory(data);
-
-        /* in the mutex block, set the data for this process */
-        {
-            umtx_lock(NULL);
-            if(uCharNames==NULL) {
-                uCharNames=names;
-                uCharNamesData=data;
-                data=NULL;
-                names=NULL;
-            }
-            umtx_unlock(NULL);
-        }
-
-        /* if a different thread set it first, then close the extra data */
-        if(data!=NULL) {
-            udata_close(data); /* NULL if it was set correctly */
-        }
-    }
-    return TRUE;
-}
-
-static UBool U_CALLCONV
-isAcceptable(void *context,
-             const char *type, const char *name,
-             const UDataInfo *pInfo) {
-    return (UBool)(
-        pInfo->size>=20 &&
-        pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
-        pInfo->charsetFamily==U_CHARSET_FAMILY &&
-        pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
-        pInfo->dataFormat[1]==0x6e &&
-        pInfo->dataFormat[2]==0x61 &&
-        pInfo->dataFormat[3]==0x6d &&
-        pInfo->formatVersion[0]==1);
-}
-
-/*
- * getGroup() does a binary search for the group that contains the
- * Unicode code point "code".
- * The return value is always a valid Group* that may contain "code"
- * or else is the highest group before "code".
- * If the lowest group is after "code", then that one is returned.
- */
-static Group *
-getGroup(UCharNames *names, uint32_t code) {
-    uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
-             start=0,
-             limit=*(uint16_t *)((char *)names+names->groupsOffset),
-             number;
-    Group *groups=(Group *)((char *)names+names->groupsOffset+2);
-
-    /* binary search for the group of names that contains the one for code */
-    while(start<limit-1) {
-        number=(uint16_t)((start+limit)/2);
-        if(groupMSB<groups[number].groupMSB) {
-            limit=number;
-        } else {
-            start=number;
-        }
-    }
-
-    /* return this regardless of whether it is an exact match */
-    return groups+start;
-}
-
-static uint16_t
-getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
-        char *buffer, uint16_t bufferLength) {
-    Group *group=getGroup(names, code);
-    if((uint16_t)(code>>GROUP_SHIFT)==group->groupMSB) {
-        return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,
-                               buffer, bufferLength);
-    } else {
-        /* group not found */
-        /* zero-terminate */
-        if(bufferLength>0) {
-            *buffer=0;
-        }
-        return 0;
-    }
-}
-
-/*
- * expandGroupLengths() reads a block of compressed lengths of 32 strings and
- * expands them into offsets and lengths for each string.
- * Lengths are stored with a variable-width encoding in consecutive nibbles:
- * If a nibble<0xc, then it is the length itself (0=empty string).
- * If a nibble>=0xc, then it forms a length value with the following nibble.
- * Calculation see below.
- * The offsets and lengths arrays must be at least 33 (one more) long because
- * there is no check here at the end if the last nibble is still used.
- */
-static const uint8_t *
-expandGroupLengths(const uint8_t *s,
-                   uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) {
-    /* read the lengths of the 32 strings in this group and get each string's offset */
-    uint16_t i=0, offset=0, length=0;
-    uint8_t lengthByte;
-
-    /* all 32 lengths must be read to get the offset of the first group string */
-    while(i<LINES_PER_GROUP) {
-        lengthByte=*s++;
-
-        /* read even nibble - MSBs of lengthByte */
-        if(length>=12) {
-            /* double-nibble length spread across two bytes */
-            length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12);
-            lengthByte&=0xf;
-        } else if((lengthByte /* &0xf0 */)>=0xc0) {
-            /* double-nibble length spread across this one byte */
-            length=(uint16_t)((lengthByte&0x3f)+12);
-        } else {
-            /* single-nibble length in MSBs */
-            length=(uint16_t)(lengthByte>>4);
-            lengthByte&=0xf;
-        }
-
-        *offsets++=offset;
-        *lengths++=length;
-
-        offset+=length;
-        ++i;
-
-        /* read odd nibble - LSBs of lengthByte */
-        if((lengthByte&0xf0)==0) {
-            /* this nibble was not consumed for a double-nibble length above */
-            length=lengthByte;
-            if(length<12) {
-                /* single-nibble length in LSBs */
-                *offsets++=offset;
-                *lengths++=length;
-
-                offset+=length;
-                ++i;
-            }
-        } else {
-            length=0;   /* prevent double-nibble detection in the next iteration */
-        }
-    }
-
-    /* now, s is at the first group string */
-    return s;
-}
-
-static uint16_t
-expandGroupName(UCharNames *names, Group *group,
-                uint16_t lineNumber, UCharNameChoice nameChoice,
-                char *buffer, uint16_t bufferLength) {
-    uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
-    const uint8_t *s=(uint8_t *)names+names->groupStringOffset+
-                                    (group->offsetHigh<<16|group->offsetLow);
-    s=expandGroupLengths(s, offsets, lengths);
-    return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,
-                      buffer, bufferLength);
-}
-
-#define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \
-    if((bufferLength)>0) { \
-        *(buffer)++=c; \
-        --(bufferLength); \
-    } \
-    ++(bufferPos); \
-}
-
-/*
- * Important: expandName() and compareName() are almost the same -
- * apply fixes to both.
- *
- * UnicodeData.txt uses ';' as a field separator, so no
- * field can contain ';' as part of its contents.
- * In unames.dat, it is marked as token[';']==-1 only if the
- * semicolon is used in the data file - which is iff we
- * have Unicode 1.0 names or ISO comments.
- * So, it will be token[';']==-1 if we store U1.0 names/ISO comments
- * although we know that it will never be part of a name.
- */
-static uint16_t
-expandName(UCharNames *names,
-           const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
-           char *buffer, uint16_t bufferLength) {
-    uint16_t *tokens=(uint16_t *)names+8;
-    uint16_t token, tokenCount=*tokens++, bufferPos=0;
-    uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
-    uint8_t c;
-
-    if(nameChoice==U_UNICODE_10_CHAR_NAME || nameChoice==_U_ISO_COMMENT) {
-        /*
-         * skip the modern name if it is not requested _and_
-         * if the semicolon byte value is a character, not a token number
-         */
-        if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
-            while(nameLength>0) {
-                --nameLength;
-                if(*name++==';') {
-                    break;
-                }
-            }
-            if(nameChoice==_U_ISO_COMMENT) {
-                /* skip the Unicode 1.0 name as well to get the ISO comment */
-                while(nameLength>0) {
-                    --nameLength;
-                    if(*name++==';') {
-                        break;
-                    }
-                }
-            }
-        } else {
-            /*
-             * the semicolon byte value is a token number, therefore
-             * only modern names are stored in unames.dat and there is no
-             * such requested Unicode 1.0 name here
-             */
-            nameLength=0;
-        }
-    }
-
-    /* write each letter directly, and write a token word per token */
-    while(nameLength>0) {
-        --nameLength;
-        c=*name++;
+    /* write each letter directly, and write a token word per token */
+    while(nameLength>0) {
+        --nameLength;
+        c=*name++;
  
          if(c>=tokenCount) {
              if(c!=';') {
@@ -828,9 +398,183 @@ compareName(UCharNames *names,
              }
          }
      }
-
-    /* complete match? */
-    return (UBool)(*otherName==0);
+
+    /* complete match? */
+    return (UBool)(*otherName==0);
+}
+
+static uint8_t getCharCat(UChar32 cp) {
+    uint8_t cat;
+
+    if (UTF_IS_UNICODE_NONCHAR(cp)) {
+        return U_NONCHARACTER_CODE_POINT;
+    }
+
+    if ((cat = u_charType(cp)) == U_SURROGATE) {
+        cat = UTF_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
+    }
+
+    return cat;
+}
+
+static const char *getCharCatName(UChar32 cp) {
+    uint8_t cat = getCharCat(cp);
+
+    /* Return unknown if the table of names above is not up to
+       date. */
+
+    if (cat >= LENGTHOF(charCatNames)) {
+        return "unknown";
+    } else {
+        return charCatNames[cat];
+    }
+}
+
+static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
+    const char *catname = getCharCatName(code);
+    uint16_t length = 0;
+
+    UChar32 cp;
+    int ndigits, i;
+    
+    WRITE_CHAR(buffer, bufferLength, length, '<');
+    while (catname[length - 1]) {
+        WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);
+    }
+    WRITE_CHAR(buffer, bufferLength, length, '-');
+    for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)
+        ;
+    if (ndigits < 4)
+        ndigits = 4;
+    for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) {
+        uint8_t v = (uint8_t)(cp & 0xf);
+        buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
+    }
+    buffer += ndigits;
+    length += ndigits;
+    WRITE_CHAR(buffer, bufferLength, length, '>');
+
+    return length;
+}
+
+/*
+ * getGroup() does a binary search for the group that contains the
+ * Unicode code point "code".
+ * The return value is always a valid Group* that may contain "code"
+ * or else is the highest group before "code".
+ * If the lowest group is after "code", then that one is returned.
+ */
+static Group *
+getGroup(UCharNames *names, uint32_t code) {
+    uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
+             start=0,
+             limit=*(uint16_t *)((char *)names+names->groupsOffset),
+             number;
+    Group *groups=(Group *)((char *)names+names->groupsOffset+2);
+
+    /* binary search for the group of names that contains the one for code */
+    while(start<limit-1) {
+        number=(uint16_t)((start+limit)/2);
+        if(groupMSB<groups[number].groupMSB) {
+            limit=number;
+        } else {
+            start=number;
+        }
+    }
+
+    /* return this regardless of whether it is an exact match */
+    return groups+start;
+}
+
+/*
+ * expandGroupLengths() reads a block of compressed lengths of 32 strings and
+ * expands them into offsets and lengths for each string.
+ * Lengths are stored with a variable-width encoding in consecutive nibbles:
+ * If a nibble<0xc, then it is the length itself (0=empty string).
+ * If a nibble>=0xc, then it forms a length value with the following nibble.
+ * Calculation see below.
+ * The offsets and lengths arrays must be at least 33 (one more) long because
+ * there is no check here at the end if the last nibble is still used.
+ */
+static const uint8_t *
+expandGroupLengths(const uint8_t *s,
+                   uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) {
+    /* read the lengths of the 32 strings in this group and get each string's offset */
+    uint16_t i=0, offset=0, length=0;
+    uint8_t lengthByte;
+
+    /* all 32 lengths must be read to get the offset of the first group string */
+    while(i<LINES_PER_GROUP) {
+        lengthByte=*s++;
+
+        /* read even nibble - MSBs of lengthByte */
+        if(length>=12) {
+            /* double-nibble length spread across two bytes */
+            length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12);
+            lengthByte&=0xf;
+        } else if((lengthByte /* &0xf0 */)>=0xc0) {
+            /* double-nibble length spread across this one byte */
+            length=(uint16_t)((lengthByte&0x3f)+12);
+        } else {
+            /* single-nibble length in MSBs */
+            length=(uint16_t)(lengthByte>>4);
+            lengthByte&=0xf;
+        }
+
+        *offsets++=offset;
+        *lengths++=length;
+
+        offset+=length;
+        ++i;
+
+        /* read odd nibble - LSBs of lengthByte */
+        if((lengthByte&0xf0)==0) {
+            /* this nibble was not consumed for a double-nibble length above */
+            length=lengthByte;
+            if(length<12) {
+                /* single-nibble length in LSBs */
+                *offsets++=offset;
+                *lengths++=length;
+
+                offset+=length;
+                ++i;
+            }
+        } else {
+            length=0;   /* prevent double-nibble detection in the next iteration */
+        }
+    }
+
+    /* now, s is at the first group string */
+    return s;
+}
+
+static uint16_t
+expandGroupName(UCharNames *names, Group *group,
+                uint16_t lineNumber, UCharNameChoice nameChoice,
+                char *buffer, uint16_t bufferLength) {
+    uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
+    const uint8_t *s=(uint8_t *)names+names->groupStringOffset+
+                                    (group->offsetHigh<<16|group->offsetLow);
+    s=expandGroupLengths(s, offsets, lengths);
+    return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,
+                      buffer, bufferLength);
+}
+
+static uint16_t
+getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
+        char *buffer, uint16_t bufferLength) {
+    Group *group=getGroup(names, code);
+    if((uint16_t)(code>>GROUP_SHIFT)==group->groupMSB) {
+        return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,
+                               buffer, bufferLength);
+    } else {
+        /* group not found */
+        /* zero-terminate */
+        if(bufferLength>0) {
+            *buffer=0;
+        }
+        return 0;
+    }
  }
  
  /*
@@ -996,6 +740,80 @@ enumNames(UCharNames *names,
      return TRUE;
  }
  
+static uint16_t
+writeFactorSuffix(const uint16_t *factors, uint16_t count,
+                  const char *s, /* suffix elements */
+                  uint32_t code,
+                  uint16_t indexes[8], /* output fields from here */
+                  const char *elementBases[8], const char *elements[8],
+                  char *buffer, uint16_t bufferLength) {
+    uint16_t i, factor, bufferPos=0;
+    char c;
+
+    /* write elements according to the factors */
+
+    /*
+     * the factorized elements are determined by modulo arithmetic
+     * with the factors of this algorithm
+     *
+     * note that for fewer operations, count is decremented here
+     */
+    --count;
+    for(i=count; i>0; --i) {
+        factor=factors[i];
+        indexes[i]=(uint16_t)(code%factor);
+        code/=factor;
+    }
+    /*
+     * we don't need to calculate the last modulus because start<=code<=end
+     * guarantees here that code<=factors[0]
+     */
+    indexes[0]=(uint16_t)code;
+
+    /* write each element */
+    for(;;) {
+        if(elementBases!=NULL) {
+            *elementBases++=s;
+        }
+
+        /* skip indexes[i] strings */
+        factor=indexes[i];
+        while(factor>0) {
+            while(*s++!=0) {}
+            --factor;
+        }
+        if(elements!=NULL) {
+            *elements++=s;
+        }
+
+        /* write element */
+        while((c=*s++)!=0) {
+            WRITE_CHAR(buffer, bufferLength, bufferPos, c);
+        }
+
+        /* we do not need to perform the rest of this loop for i==count - break here */
+        if(i>=count) {
+            break;
+        }
+
+        /* skip the rest of the strings for this factors[i] */
+        factor=(uint16_t)(factors[i]-indexes[i]-1);
+        while(factor>0) {
+            while(*s++!=0) {}
+            --factor;
+        }
+
+        ++i;
+    }
+
+    /* zero-terminate */
+    if(bufferLength>0) {
+        *buffer=0;
+    }
+
+    return bufferPos;
+}
+
  /*
   * Important:
   * Parts of findAlgName() are almost the same as some of getAlgName().
@@ -1086,80 +904,6 @@ getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
      return bufferPos;
  }
  
-static uint16_t
-writeFactorSuffix(const uint16_t *factors, uint16_t count,
-                  const char *s, /* suffix elements */
-                  uint32_t code,
-                  uint16_t indexes[8], /* output fields from here */
-                  const char *elementBases[8], const char *elements[8],
-                  char *buffer, uint16_t bufferLength) {
-    uint16_t i, factor, bufferPos=0;
-    char c;
-
-    /* write elements according to the factors */
-
-    /*
-     * the factorized elements are determined by modulo arithmetic
-     * with the factors of this algorithm
-     *
-     * note that for fewer operations, count is decremented here
-     */
-    --count;
-    for(i=count; i>0; --i) {
-        factor=factors[i];
-        indexes[i]=(uint16_t)(code%factor);
-        code/=factor;
-    }
-    /*
-     * we don't need to calculate the last modulus because start<=code<=end
-     * guarantees here that code<=factors[0]
-     */
-    indexes[0]=(uint16_t)code;
-
-    /* write each element */
-    for(;;) {
-        if(elementBases!=NULL) {
-            *elementBases++=s;
-        }
-
-        /* skip indexes[i] strings */
-        factor=indexes[i];
-        while(factor>0) {
-            while(*s++!=0) {}
-            --factor;
-        }
-        if(elements!=NULL) {
-            *elements++=s;
-        }
-
-        /* write element */
-        while((c=*s++)!=0) {
-            WRITE_CHAR(buffer, bufferLength, bufferPos, c);
-        }
-
-        /* we do not need to perform the rest of this loop for i==count - break here */
-        if(i>=count) {
-            break;
-        }
-
-        /* skip the rest of the strings for this factors[i] */
-        factor=(uint16_t)(factors[i]-indexes[i]-1);
-        while(factor>0) {
-            while(*s++!=0) {}
-            --factor;
-        }
-
-        ++i;
-    }
-
-    /* zero-terminate */
-    if(bufferLength>0) {
-        *buffer=0;
-    }
-
-    return bufferPos;
-}
-
  /*
   * Important: enumAlgNames() and findAlgName() are almost the same.
   * Any fix must be applied to both.
@@ -1388,133 +1132,43 @@ findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *oth
              i=count;
              for (;;) {
                  index=(uint16_t)(indexes[--i]+1);
-                if(index<factors[i]) {
-                    /* skip one index and its element string */
-                    indexes[i]=index;
-                    s=elements[i];
-                    while(*s++!=0) {}
-                    elements[i]=s;
-                    break;
-                } else {
-                    /* reset this index to 0 and its element string to the first one */
-                    indexes[i]=0;
-                    elements[i]=elementBases[i];
-                }
-            }
-
-            /* to make matters a little easier, just compare all elements of the suffix */
-            t=otherName;
-            for(i=0; i<count; ++i) {
-                s=elements[i];
-                while((c=*s++)!=0) {
-                    if(c!=*t++) {
-                        s=""; /* does not match */
-                        i=99;
-                    }
-                }
-            }
-            if(i<99 && *t==0) {
-                return start;
-            }
-        }
-        break;
-    }
-    default:
-        /* undefined type */
-        break;
-    }
-
-    return 0xffff;
-}
-
-static uint8_t getCharCat(UChar32 cp) {
-    uint8_t cat;
-
-    if (UTF_IS_UNICODE_NONCHAR(cp)) {
-        return U_NONCHARACTER_CODE_POINT;
-    }
-
-    if ((cat = u_charType(cp)) == U_SURROGATE) {
-        cat = UTF_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
-    }
-
-    return cat;
-}
-
-static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {
-    "unassigned",
-    "uppercase letter",
-    "lowercase letter",
-    "titlecase letter",
-    "modifier letter",
-    "other letter",
-    "non spacing mark",
-    "enclosing mark",
-    "combining spacing mark",
-    "decimal digit number",
-    "letter number",
-    "other number",
-    "space separator",
-    "line separator",
-    "paragraph separator",
-    "control",
-    "format",
-    "private use area",
-    "surrogate",
-    "dash punctuation",   
-    "start punctuation",
-    "end punctuation",
-    "connector punctuation",
-    "other punctuation",
-    "math symbol",
-    "currency symbol",
-    "modifier symbol",
-    "other symbol",
-    "initial punctuation",
-    "final punctuation",
-    "noncharacter",
-    "lead surrogate",
-    "trail surrogate"
-};
-
-static const char *getCharCatName(UChar32 cp) {
-    uint8_t cat = getCharCat(cp);
-
-    /* Return unknown if the table of names above is not up to
-       date. */
-
-    if (cat >= LENGTHOF(charCatNames)) {
-        return "unknown";
-    } else {
-        return charCatNames[cat];
-    }
-}
-
-static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
-    const char *catname = getCharCatName(code);
-    uint16_t length = 0;
+                if(index<factors[i]) {
+                    /* skip one index and its element string */
+                    indexes[i]=index;
+                    s=elements[i];
+                    while(*s++!=0) {}
+                    elements[i]=s;
+                    break;
+                } else {
+                    /* reset this index to 0 and its element string to the first one */
+                    indexes[i]=0;
+                    elements[i]=elementBases[i];
+                }
+            }
  
-    UChar32 cp;
-    int ndigits, i;
-    
-    WRITE_CHAR(buffer, bufferLength, length, '<');
-    while (catname[length - 1]) {
-        WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);
+            /* to make matters a little easier, just compare all elements of the suffix */
+            t=otherName;
+            for(i=0; i<count; ++i) {
+                s=elements[i];
+                while((c=*s++)!=0) {
+                    if(c!=*t++) {
+                        s=""; /* does not match */
+                        i=99;
+                    }
+                }
+            }
+            if(i<99 && *t==0) {
+                return start;
+            }
+        }
+        break;
      }
-    WRITE_CHAR(buffer, bufferLength, length, '-');
-    for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)
-        ;
-    if (ndigits < 4)
-        ndigits = 4;
-    for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) {
-        uint8_t v = (uint8_t)(cp & 0xf);
-        buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
+    default:
+        /* undefined type */
+        break;
      }
-    buffer += ndigits;
-    length += ndigits;
-    WRITE_CHAR(buffer, bufferLength, length, '>');
  
-    return length;
+    return 0xffff;
  }
  
  /* sets of name characters, maximum name lengths ---------------------------- */
@@ -1674,7 +1328,6 @@ calcGroupNameSetsLengths(int32_t maxNameLength) {
      Group *group;
      const uint8_t *s, *line, *lineLimit;
  
-    int32_t maxISOCommentLength=0;
      int32_t groupCount, lineNumber, length;
  
      tokenLengths=(int8_t *)uprv_malloc(tokenCount);
@@ -1702,72 +1355,323 @@ calcGroupNameSetsLengths(int32_t maxNameLength) {
  
              lineLimit=line+length;
  
-            /* read regular name */
-            length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
-            if(length>maxNameLength) {
-                maxNameLength=length;
-            }
-            if(line==lineLimit) {
-                continue;
-            }
+            /* read regular name */
+            length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
+            if(length>maxNameLength) {
+                maxNameLength=length;
+            }
+            if(line==lineLimit) {
+                continue;
+            }
+
+            /* read Unicode 1.0 name */
+            length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
+            if(length>maxNameLength) {
+                maxNameLength=length;
+            }
+            if(line==lineLimit) {
+                continue;
+            }
+
+            /* read ISO comment */
+            /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/
+        }
+
+        ++group;
+        --groupCount;
+    }
+
+    if(tokenLengths!=NULL) {
+        uprv_free(tokenLengths);
+    }
+
+    /* set gMax... - name length last for threading */
+    gMaxNameLength=maxNameLength;
+}
+
+static UBool
+calcNameSetsLengths(UErrorCode *pErrorCode) {
+    static const char extChars[]="0123456789ABCDEF<>-";
+    int32_t i, maxNameLength;
+
+    if(gMaxNameLength!=0) {
+        return TRUE;
+    }
+
+    if(!isDataLoaded(pErrorCode)) {
+        return FALSE;
+    }
+
+    /* set hex digits, used in various names, and <>-, used in extended names */
+    for(i=0; i<sizeof(extChars)-1; ++i) {
+        SET_ADD(gNameSet, extChars[i]);
+    }
+
+    /* set sets and lengths from algorithmic names */
+    maxNameLength=calcAlgNameSetsLengths(0);
+
+    /* set sets and lengths from extended names */
+    maxNameLength=calcExtNameSetsLengths(maxNameLength);
+
+    /* set sets and lengths from group names, set global maximum values */
+    calcGroupNameSetsLengths(maxNameLength);
+
+    return TRUE;
+}
+
+/* public API --------------------------------------------------------------- */
+
+U_CAPI int32_t U_EXPORT2
+u_charName(UChar32 code, UCharNameChoice nameChoice,
+           char *buffer, int32_t bufferLength,
+           UErrorCode *pErrorCode) {
+    AlgorithmicRange *algRange;
+    uint32_t *p;
+    uint32_t i;
+    int32_t length;
+
+    /* check the argument values */
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT ||
+              bufferLength<0 || (bufferLength>0 && buffer==NULL)
+    ) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
+        return u_terminateChars(buffer, bufferLength, 0, pErrorCode);
+    }
+
+    length=0;
+
+    /* try algorithmic names first */
+    p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
+    i=*p;
+    algRange=(AlgorithmicRange *)(p+1);
+    while(i>0) {
+        if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {
+            length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
+            break;
+        }
+        algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
+        --i;
+    }
+
+    if(i==0) {
+        if (nameChoice == U_EXTENDED_CHAR_NAME) {
+            length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);
+            if (!length) {
+                /* extended character name */
+                length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);
+            }
+        } else {
+            /* normal character name */
+            length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
+        }
+    }
+
+    return u_terminateChars(buffer, bufferLength, length, pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+u_getISOComment(UChar32 c,
+                char *dest, int32_t destCapacity,
+                UErrorCode *pErrorCode) {
+    int32_t length;
+
+    /* check the argument values */
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    if((uint32_t)c>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
+        return u_terminateChars(dest, destCapacity, 0, pErrorCode);
+    }
+
+    /* the ISO comment is stored like a normal character name */
+    length=getName(uCharNames, (uint32_t)c, U_ISO_COMMENT, dest, (uint16_t)destCapacity);
+    return u_terminateChars(dest, destCapacity, length, pErrorCode);
+}
+
+U_CAPI UChar32 U_EXPORT2
+u_charFromName(UCharNameChoice nameChoice,
+               const char *name,
+               UErrorCode *pErrorCode) {
+    char upper[120], lower[120];
+    FindName findName;
+    AlgorithmicRange *algRange;
+    uint32_t *p;
+    uint32_t i;
+    UChar32 cp = 0;
+    char c0;
+    UChar32 error = 0xffff;     /* Undefined, but use this for backwards compatibility. */
+
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return error;
+    }
+
+    if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return error;
+    }
+
+    if(!isDataLoaded(pErrorCode)) {
+        return error;
+    }
+
+    /* construct the uppercase and lowercase of the name first */
+    for(i=0; i<sizeof(upper); ++i) {
+        if((c0=*name++)!=0) {
+            upper[i]=uprv_toupper(c0);
+            lower[i]=uprv_tolower(c0);
+        } else {
+            upper[i]=lower[i]=0;
+            break;
+        }
+    }
+    if(i==sizeof(upper)) {
+        /* name too long, there is no such character */
+        *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+        return error;
+    }
+
+    /* try extended names first */
+    if (lower[0] == '<') {
+        if (nameChoice == U_EXTENDED_CHAR_NAME) {
+            if (lower[--i] == '>') {
+                for (--i; lower[i] && lower[i] != '-'; --i) {
+                }
+
+                if (lower[i] == '-') { /* We've got a category. */
+                    uint32_t cIdx;
+
+                    lower[i] = 0;
+
+                    for (++i; lower[i] != '>'; ++i) {
+                        if (lower[i] >= '0' && lower[i] <= '9') {
+                            cp = (cp << 4) + lower[i] - '0';
+                        } else if (lower[i] >= 'a' && lower[i] <= 'f') {
+                            cp = (cp << 4) + lower[i] - 'a' + 10;
+                        } else {
+                            *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+                            return error;
+                        }
+                    }
+
+                    /* Now validate the category name.
+                       We could use a binary search, or a trie, if
+                       we really wanted to. */
  
-            /* read Unicode 1.0 name */
-            length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
-            if(length>maxNameLength) {
-                maxNameLength=length;
-            }
-            if(line==lineLimit) {
-                continue;
-            }
+                    for (lower[i] = 0, cIdx = 0; cIdx < LENGTHOF(charCatNames); ++cIdx) {
  
-            /* read ISO comment */
-            length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);
-            if(length>maxISOCommentLength) {
-                maxISOCommentLength=length;
+                        if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
+                            if (getCharCat(cp) == cIdx) {
+                                return cp;
+                            }
+                            break;
+                        }
+                    }
+                }
              }
          }
  
-        ++group;
-        --groupCount;
+        *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+        return error;
      }
  
-    if(tokenLengths!=NULL) {
-        uprv_free(tokenLengths);
+    /* try algorithmic names now */
+    p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
+    i=*p;
+    algRange=(AlgorithmicRange *)(p+1);
+    while(i>0) {
+        if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) {
+            return cp;
+        }
+        algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
+        --i;
      }
  
-    /* set gMax... - name length last for threading */
-    gMaxISOCommentLength=maxISOCommentLength;
-    gMaxNameLength=maxNameLength;
+    /* normal character name */
+    findName.otherName=upper;
+    findName.code=error;
+    enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice);
+    if (findName.code == error) {
+         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
+    }
+    return findName.code;
  }
  
-static UBool
-calcNameSetsLengths(UErrorCode *pErrorCode) {
-    static const char extChars[]="0123456789ABCDEF<>-";
-    int32_t i, maxNameLength;
+U_CAPI void U_EXPORT2
+u_enumCharNames(UChar32 start, UChar32 limit,
+                UEnumCharNamesFn *fn,
+                void *context,
+                UCharNameChoice nameChoice,
+                UErrorCode *pErrorCode) {
+    AlgorithmicRange *algRange;
+    uint32_t *p;
+    uint32_t i;
  
-    if(gMaxNameLength!=0) {
-        return TRUE;
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return;
      }
  
-    if(!isDataLoaded(pErrorCode)) {
-        return FALSE;
+    if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) {
+        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        return;
      }
  
-    /* set hex digits, used in various names, and <>-, used in extended names */
-    for(i=0; i<sizeof(extChars)-1; ++i) {
-        SET_ADD(gNameSet, extChars[i]);
+    if((uint32_t) limit > UCHAR_MAX_VALUE + 1) {
+        limit = UCHAR_MAX_VALUE + 1;
+    }
+    if((uint32_t)start>=(uint32_t)limit) {
+        return;
      }
  
-    /* set sets and lengths from algorithmic names */
-    maxNameLength=calcAlgNameSetsLengths(0);
-
-    /* set sets and lengths from extended names */
-    maxNameLength=calcExtNameSetsLengths(maxNameLength);
-
-    /* set sets and lengths from group names, set global maximum values */
-    calcGroupNameSetsLengths(maxNameLength);
+    if(!isDataLoaded(pErrorCode)) {
+        return;
+    }
  
-    return TRUE;
+    /* interleave the data-driven ones with the algorithmic ones */
+    /* iterate over all algorithmic ranges; assume that they are in ascending order */
+    p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
+    i=*p;
+    algRange=(AlgorithmicRange *)(p+1);
+    while(i>0) {
+        /* enumerate the character names before the current algorithmic range */
+        /* here: start<limit */
+        if((uint32_t)start<algRange->start) {
+            if((uint32_t)limit<=algRange->start) {
+                enumNames(uCharNames, start, limit, fn, context, nameChoice);
+                return;
+            }
+            if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {
+                return;
+            }
+            start=(UChar32)algRange->start;
+        }
+        /* enumerate the character names in the current algorithmic range */
+        /* here: algRange->start<=start<limit */
+        if((uint32_t)start<=algRange->end) {
+            if((uint32_t)limit<=(algRange->end+1)) {
+                enumAlgNames(algRange, start, limit, fn, context, nameChoice);
+                return;
+            }
+            if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) {
+                return;
+            }
+            start=(UChar32)algRange->end+1;
+        }
+        /* continue to the next algorithmic range (here: start<limit) */
+        algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
+        --i;
+    }
+    /* enumerate the character names after the last algorithmic range */
+    enumNames(uCharNames, start, limit, fn, context, nameChoice);
  }
  
  U_CAPI int32_t U_EXPORT2
@@ -1780,29 +1684,13 @@ uprv_getMaxCharNameLength() {
      }
  }
  
-#if 0
-/* 
-Currently not used but left for future use. Probably by UnicodeSet. 
-urename.h and uprops.h changed accordingly. 
-*/
-U_CAPI int32_t U_EXPORT2
-uprv_getMaxISOCommentLength() {
-    UErrorCode errorCode=U_ZERO_ERROR;
-    if(calcNameSetsLengths(&errorCode)) {
-        return gMaxISOCommentLength;
-    } else {
-        return 0;
-    }
-}
-#endif
-
  /**
   * Converts the char set cset into a Unicode set uset.
   * @param cset Set of 256 bit flags corresponding to a set of chars.
   * @param uset USet to receive characters. Existing contents are deleted.
   */
  static void
-charSetToUSet(uint32_t cset[8], USet* uset) {
+charSetToUSet(uint32_t cset[8], const USetAdder *sa) {
      UChar us[256];
      char cs[256];
  
@@ -1810,7 +1698,6 @@ charSetToUSet(uint32_t cset[8], USet* uset) {
      UErrorCode errorCode;
  
      errorCode=U_ZERO_ERROR;
-    uset_clear(uset);
  
      if(!calcNameSetsLengths(&errorCode)) {
          return;
@@ -1830,34 +1717,361 @@ charSetToUSet(uint32_t cset[8], USet* uset) {
      /* add each UChar to the USet */
      for(i=0; i<length; ++i) {
          if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */
-            uset_add(uset, us[i]);
+            sa->add(sa->set, us[i]);
          }
      }
  }
  
  /**
   * Fills set with characters that are used in Unicode character names.
- * @param set USet to receive characters. Existing contents are deleted.
+ * @param set USet to receive characters.
   */
  U_CAPI void U_EXPORT2
-uprv_getCharNameCharacters(USet* set) {
-    charSetToUSet(gNameSet, set);
+uprv_getCharNameCharacters(const USetAdder *sa) {
+    charSetToUSet(gNameSet, sa);
  }
  
-#if 0
-/* 
-Currently not used but left for future use. Probably by UnicodeSet. 
-urename.h and uprops.h changed accordingly. 
-*/
-/**
- * Fills set with characters that are used in Unicode character names.
- * @param set USet to receive characters. Existing contents are deleted.
+/* data swapping ------------------------------------------------------------ */
+
+/*
+ * The token table contains non-negative entries for token bytes,
+ * and -1 for bytes that represent themselves in the data file's charset.
+ * -2 entries are used for lead bytes.
+ *
+ * Direct bytes (-1 entries) must be translated from the input charset family
+ * to the output charset family.
+ * makeTokenMap() writes a permutation mapping for this.
+ * Use it once for single-/lead-byte tokens and once more for all trail byte
+ * tokens. (';' is an unused trail byte marked with -1.)
   */
-U_CAPI void U_EXPORT2
-uprv_getISOCommentCharacters(USet* set) {
-    charSetToUSet(gISOCommentSet, set);
+static void
+makeTokenMap(const UDataSwapper *ds,
+             int16_t tokens[], uint16_t tokenCount,
+             uint8_t map[256],
+             UErrorCode *pErrorCode) {
+    UBool usedOutChar[256];
+    uint16_t i, j;
+    uint8_t c1, c2;
+
+    if(U_FAILURE(*pErrorCode)) {
+        return;
+    }
+
+    if(ds->inCharset==ds->outCharset) {
+        /* Same charset family: identity permutation */
+        for(i=0; i<256; ++i) {
+            map[i]=(uint8_t)i;
+        }
+    } else {
+        uprv_memset(map, 0, 256);
+        uprv_memset(usedOutChar, 0, 256);
+
+        if(tokenCount>256) {
+            tokenCount=256;
+        }
+
+        /* set the direct bytes (byte 0 always maps to itself) */
+        for(i=1; i<tokenCount; ++i) {
+            if(tokens[i]==-1) {
+                /* convert the direct byte character */
+                c1=(uint8_t)i;
+                ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode);
+                if(U_FAILURE(*pErrorCode)) {
+                    udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n",
+                                     i, ds->inCharset);
+                    return;
+                }
+
+                /* enter the converted character into the map and mark it used */
+                map[c1]=c2;
+                usedOutChar[c2]=TRUE;
+            }
+        }
+
+        /* set the mappings for the rest of the permutation */
+        for(i=j=1; i<tokenCount; ++i) {
+            /* set mappings that were not set for direct bytes */
+            if(map[i]==0) {
+                /* set an output byte value that was not used as an output byte above */
+                while(usedOutChar[j]) {
+                    ++j;
+                }
+                map[i]=(uint8_t)j++;
+            }
+        }
+
+        /*
+         * leave mappings at tokenCount and above unset if tokenCount<256
+         * because they won't be used
+         */
+    }
+}
+
+U_CAPI int32_t U_EXPORT2
+uchar_swapNames(const UDataSwapper *ds,
+                const void *inData, int32_t length, void *outData,
+                UErrorCode *pErrorCode) {
+    const UDataInfo *pInfo;
+    int32_t headerSize;
+
+    const uint8_t *inBytes;
+    uint8_t *outBytes;
+
+    uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,
+             offset, i, count, stringsCount;
+
+    const AlgorithmicRange *inRange;
+    AlgorithmicRange *outRange;
+
+    /* udata_swapDataHeader checks the arguments */
+    headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
+    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+
+    /* check data format and format version */
+    pInfo=(const UDataInfo *)((const char *)inData+4);
+    if(!(
+        pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
+        pInfo->dataFormat[1]==0x6e &&
+        pInfo->dataFormat[2]==0x61 &&
+        pInfo->dataFormat[3]==0x6d &&
+        pInfo->formatVersion[0]==1
+    )) {
+        udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",
+                         pInfo->dataFormat[0], pInfo->dataFormat[1],
+                         pInfo->dataFormat[2], pInfo->dataFormat[3],
+                         pInfo->formatVersion[0]);
+        *pErrorCode=U_UNSUPPORTED_ERROR;
+        return 0;
+    }
+
+    inBytes=(const uint8_t *)inData+headerSize;
+    outBytes=(uint8_t *)outData+headerSize;
+    if(length<0) {
+        algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]);
+    } else {
+        length-=headerSize;
+        if( length<20 ||
+            (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]))
+        ) {
+            udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n",
+                             length);
+            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+            return 0;
+        }
+    }
+
+    if(length<0) {
+        /* preflighting: iterate through algorithmic ranges */
+        offset=algNamesOffset;
+        count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
+        offset+=4;
+
+        for(i=0; i<count; ++i) {
+            inRange=(const AlgorithmicRange *)(inBytes+offset);
+            offset+=ds->readUInt16(inRange->size);
+        }
+    } else {
+        /* swap data */
+        const uint16_t *p;
+        uint16_t *q, *temp;
+
+        int16_t tokens[512];
+        uint16_t tokenCount;
+
+        uint8_t map[256], trailMap[256];
+
+        /* copy the data for inaccessible bytes */
+        if(inBytes!=outBytes) {
+            uprv_memcpy(outBytes, inBytes, length);
+        }
+
+        /* the initial 4 offsets first */
+        tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]);
+        groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]);
+        groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]);
+        ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode);
+
+        /*
+         * now the tokens table
+         * it needs to be permutated along with the compressed name strings
+         */
+        p=(const uint16_t *)(inBytes+16);
+        q=(uint16_t *)(outBytes+16);
+
+        /* read and swap the tokenCount */
+        tokenCount=ds->readUInt16(*p);
+        ds->swapArray16(ds, p, 2, q, pErrorCode);
+        ++p;
+        ++q;
+
+        /* read the first 512 tokens and make the token maps */
+        if(tokenCount<=512) {
+            count=tokenCount;
+        } else {
+            count=512;
+        }
+        for(i=0; i<count; ++i) {
+            tokens[i]=udata_readInt16(ds, p[i]);
+        }
+        for(; i<512; ++i) {
+            tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */
+        }
+        makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);
+        makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode);
+        if(U_FAILURE(*pErrorCode)) {
+            return 0;
+        }
+
+        /*
+         * swap and permutate the tokens
+         * go through a temporary array to support in-place swapping
+         */
+        temp=(uint16_t *)uprv_malloc(tokenCount*2);
+        if(temp==NULL) {
+            udata_printError(ds, "out of memory swapping %u unames.icu tokens\n",
+                             tokenCount);
+            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+            return 0;
+        }
+
+        /* swap and permutate single-/lead-byte tokens */
+        for(i=0; i<tokenCount && i<256; ++i) {
+            ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode);
+        }
+
+        /* swap and permutate trail-byte tokens */
+        for(; i<tokenCount; ++i) {
+            ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode);
+        }
+
+        /* copy the result into the output and free the temporary array */
+        uprv_memcpy(q, temp, tokenCount*2);
+        uprv_free(temp);
+
+        /*
+         * swap the token strings but not a possible padding byte after
+         * the terminating NUL of the last string
+         */
+        udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset),
+                                    outBytes+tokenStringOffset, pErrorCode);
+        if(U_FAILURE(*pErrorCode)) {
+            udata_printError(ds, "uchar_swapNames(token strings) failed\n");
+            return 0;
+        }
+
+        /* swap the group table */
+        count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset)));
+        ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2),
+                           outBytes+groupsOffset, pErrorCode);
+
+        /*
+         * swap the group strings
+         * swap the string bytes but not the nibble-encoded string lengths
+         */
+        if(ds->inCharset!=ds->outCharset) {
+            uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1];
+
+            const uint8_t *inStrings, *nextInStrings;
+            uint8_t *outStrings;
+
+            uint8_t c;
+
+            inStrings=inBytes+groupStringOffset;
+            outStrings=outBytes+groupStringOffset;
+
+            stringsCount=algNamesOffset-groupStringOffset;
+
+            /* iterate through string groups until only a few padding bytes are left */
+            while(stringsCount>32) {
+                nextInStrings=expandGroupLengths(inStrings, offsets, lengths);
+
+                /* move past the length bytes */
+                stringsCount-=(uint32_t)(nextInStrings-inStrings);
+                outStrings+=nextInStrings-inStrings;
+                inStrings=nextInStrings;
+
+                count=offsets[31]+lengths[31]; /* total number of string bytes in this group */
+                stringsCount-=count;
+
+                /* swap the string bytes using map[] and trailMap[] */
+                while(count>0) {
+                    c=*inStrings++;
+                    *outStrings++=map[c];
+                    if(tokens[c]!=-2) {
+                        --count;
+                    } else {
+                        /* token lead byte: swap the trail byte, too */
+                        *outStrings++=trailMap[*inStrings++];
+                        count-=2;
+                    }
+                }
+            }
+        }
+
+        /* swap the algorithmic ranges */
+        offset=algNamesOffset;
+        count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
+        ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode);
+        offset+=4;
+
+        for(i=0; i<count; ++i) {
+            if(offset>(uint32_t)length) {
+                udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",
+                                 length, i);
+                *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+                return 0;
+            }
+
+            inRange=(const AlgorithmicRange *)(inBytes+offset);
+            outRange=(AlgorithmicRange *)(outBytes+offset);
+            offset+=ds->readUInt16(inRange->size);
+
+            ds->swapArray32(ds, inRange, 8, outRange, pErrorCode);
+            ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode);
+            switch(inRange->type) {
+            case 0:
+                /* swap prefix string */
+                ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)),
+                                    outRange+1, pErrorCode);
+                if(U_FAILURE(*pErrorCode)) {
+                    udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n",
+                                     i);
+                    return 0;
+                }
+                break;
+            case 1:
+                {
+                    /* swap factors and the prefix and factor strings */
+                    uint32_t factorsCount;
+
+                    factorsCount=inRange->variant;
+                    p=(const uint16_t *)(inRange+1);
+                    q=(uint16_t *)(outRange+1);
+                    ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode);
+
+                    /* swap the strings, up to the last terminating NUL */
+                    p+=factorsCount;
+                    q+=factorsCount;
+                    stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p);
+                    while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) {
+                        --stringsCount;
+                    }
+                    ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode);
+                }
+                break;
+            default:
+                udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n",
+                                 inRange->type, i);
+                *pErrorCode=U_UNSUPPORTED_ERROR;
+                return 0;
+            }
+        }
+    }
+
+    return headerSize+(int32_t)offset;
  }
-#endif
  
  /*
   * Hey, Emacs, please set the following: