icuSources/common/unames.cpp

   1 /*
   2 ******************************************************************************
   3 *
   4 *   Copyright (C) 1999-2014, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 ******************************************************************************
   8 *   file name:  unames.c
   9 *   encoding:   US-ASCII
  10 *   tab size:   8 (not used)
  11 *   indentation:4
  12 *
  13 *   created on: 1999oct04
  14 *   created by: Markus W. Scherer
  15 */
  16
  17 #include "unicode/utypes.h"
  18 #include "unicode/putil.h"
  19 #include "unicode/uchar.h"
  20 #include "unicode/udata.h"
  21 #include "unicode/utf.h"
  22 #include "unicode/utf16.h"
  23 #include "uassert.h"
  24 #include "ustr_imp.h"
  25 #include "umutex.h"
  26 #include "cmemory.h"
  27 #include "cstring.h"
  28 #include "ucln_cmn.h"
  29 #include "udataswp.h"
  30 #include "uprops.h"
  31
  32 U_NAMESPACE_BEGIN
  33
  34 /* prototypes ------------------------------------------------------------- */
  35
  36 static const char DATA_NAME[] = "unames";
  37 static const char DATA_TYPE[] = "icu";
  38
  39 #define GROUP_SHIFT 5
  40 #define LINES_PER_GROUP (1L<<GROUP_SHIFT)
  41 #define GROUP_MASK (LINES_PER_GROUP-1)
  42
  43 /*
  44  * This struct was replaced by explicitly accessing equivalent
  45  * fields from triples of uint16_t.
  46  * The Group struct was padded to 8 bytes on compilers for early ARM CPUs,
  47  * which broke the assumption that sizeof(Group)==6 and that the ++ operator
  48  * would advance by 6 bytes (3 uint16_t).
  49  *
  50  * We can't just change the data structure because it's loaded from a data file,
  51  * and we don't want to make it less compact, so we changed the access code.
  52  *
  53  * For details see ICU tickets 6331 and 6008.
  54 typedef struct {
  55     uint16_t groupMSB,
  56              offsetHigh, offsetLow; / * avoid padding * /
  57 } Group;
  58  */
  59 enum {
  60     GROUP_MSB,
  61     GROUP_OFFSET_HIGH,
  62     GROUP_OFFSET_LOW,
  63     GROUP_LENGTH
  64 };
  65
  66 /*
  67  * Get the 32-bit group offset.
  68  * @param group (const uint16_t *) pointer to a Group triple of uint16_t
  69  * @return group offset (int32_t)
  70  */
  71 #define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16|(group)[GROUP_OFFSET_LOW])
  72
  73 #define NEXT_GROUP(group) ((group)+GROUP_LENGTH)
  74 #define PREV_GROUP(group) ((group)-GROUP_LENGTH)
  75
  76 typedef struct {
  77     uint32_t start, end;
  78     uint8_t type, variant;
  79     uint16_t size;
  80 } AlgorithmicRange;
  81
  82 typedef struct {
  83     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;
  84 } UCharNames;
  85
  86 /*
  87  * Get the groups table from a UCharNames struct.
  88  * The groups table consists of one uint16_t groupCount followed by
  89  * groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH
  90  * and the comment for the old struct Group above.
  91  *
  92  * @param names (const UCharNames *) pointer to the UCharNames indexes
  93  * @return (const uint16_t *) pointer to the groups table
  94  */
  95 #define GET_GROUPS(names) (const uint16_t *)((const char *)names+names->groupsOffset)
  96
  97 typedef struct {
  98     const char *otherName;
  99     UChar32 code;
 100 } FindName;
 101
 102 #define DO_FIND_NAME NULL
 103
 104 static UDataMemory *uCharNamesData=NULL;
 105 static UCharNames *uCharNames=NULL;
 106 static icu::UInitOnce gCharNamesInitOnce = U_INITONCE_INITIALIZER;
 107
 108 /*
 109  * Maximum length of character names (regular & 1.0).
 110  */
 111 static int32_t gMaxNameLength=0;
 112
 113 /*
 114  * Set of chars used in character names (regular & 1.0).
 115  * Chars are platform-dependent (can be EBCDIC).
 116  */
 117 static uint32_t gNameSet[8]={ 0 };
 118
 119 #define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT
 120 #define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1
 121 #define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2
 122
 123 #define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)
 124
 125 static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {
 126     "unassigned",
 127     "uppercase letter",
 128     "lowercase letter",
 129     "titlecase letter",
 130     "modifier letter",
 131     "other letter",
 132     "non spacing mark",
 133     "enclosing mark",
 134     "combining spacing mark",
 135     "decimal digit number",
 136     "letter number",
 137     "other number",
 138     "space separator",
 139     "line separator",
 140     "paragraph separator",
 141     "control",
 142     "format",
 143     "private use area",
 144     "surrogate",
 145     "dash punctuation",
 146     "start punctuation",
 147     "end punctuation",
 148     "connector punctuation",
 149     "other punctuation",
 150     "math symbol",
 151     "currency symbol",
 152     "modifier symbol",
 153     "other symbol",
 154     "initial punctuation",
 155     "final punctuation",
 156     "noncharacter",
 157     "lead surrogate",
 158     "trail surrogate"
 159 };
 160
 161 /* implementation ----------------------------------------------------------- */
 162
 163 static UBool U_CALLCONV unames_cleanup(void)
 164 {
 165     if(uCharNamesData) {
 166         udata_close(uCharNamesData);
 167         uCharNamesData = NULL;
 168     }
 169     if(uCharNames) {
 170         uCharNames = NULL;
 171     }
 172     gCharNamesInitOnce.reset();
 173     gMaxNameLength=0;
 174     return TRUE;
 175 }
 176
 177 static UBool U_CALLCONV
 178 isAcceptable(void * /*context*/,
 179              const char * /*type*/, const char * /*name*/,
 180              const UDataInfo *pInfo) {
 181     return (UBool)(
 182         pInfo->size>=20 &&
 183         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
 184         pInfo->charsetFamily==U_CHARSET_FAMILY &&
 185         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
 186         pInfo->dataFormat[1]==0x6e &&
 187         pInfo->dataFormat[2]==0x61 &&
 188         pInfo->dataFormat[3]==0x6d &&
 189         pInfo->formatVersion[0]==1);
 190 }
 191
 192 static void U_CALLCONV
 193 loadCharNames(UErrorCode &status) {
 194     U_ASSERT(uCharNamesData == NULL);
 195     U_ASSERT(uCharNames == NULL);
 196
 197     uCharNamesData = udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, &status);
 198     if(U_FAILURE(status)) {
 199         uCharNamesData = NULL;
 200     } else {
 201         uCharNames = (UCharNames *)udata_getMemory(uCharNamesData);
 202     }
 203     ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
 204 }
 205
 206
 207 static UBool
 208 isDataLoaded(UErrorCode *pErrorCode) {
 209     umtx_initOnce(gCharNamesInitOnce, &loadCharNames, *pErrorCode);
 210     return U_SUCCESS(*pErrorCode);
 211 }
 212
 213 #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \
 214     if((bufferLength)>0) { \
 215         *(buffer)++=c; \
 216         --(bufferLength); \
 217     } \
 218     ++(bufferPos); \
 219 }
 220
 221 #define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT
 222
 223 /*
 224  * Important: expandName() and compareName() are almost the same -
 225  * apply fixes to both.
 226  *
 227  * UnicodeData.txt uses ';' as a field separator, so no
 228  * field can contain ';' as part of its contents.
 229  * In unames.dat, it is marked as token[';']==-1 only if the
 230  * semicolon is used in the data file - which is iff we
 231  * have Unicode 1.0 names or ISO comments or aliases.
 232  * So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases
 233  * although we know that it will never be part of a name.
 234  */
 235 static uint16_t
 236 expandName(UCharNames *names,
 237            const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
 238            char *buffer, uint16_t bufferLength) {
 239     uint16_t *tokens=(uint16_t *)names+8;
 240     uint16_t token, tokenCount=*tokens++, bufferPos=0;
 241     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
 242     uint8_t c;
 243
 244     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
 245         /*
 246          * skip the modern name if it is not requested _and_
 247          * if the semicolon byte value is a character, not a token number
 248          */
 249         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
 250             int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
 251             do {
 252                 while(nameLength>0) {
 253                     --nameLength;
 254                     if(*name++==';') {
 255                         break;
 256                     }
 257                 }
 258             } while(--fieldIndex>0);
 259         } else {
 260             /*
 261              * the semicolon byte value is a token number, therefore
 262              * only modern names are stored in unames.dat and there is no
 263              * such requested alternate name here
 264              */
 265             nameLength=0;
 266         }
 267     }
 268
 269     /* write each letter directly, and write a token word per token */
 270     while(nameLength>0) {
 271         --nameLength;
 272         c=*name++;
 273
 274         if(c>=tokenCount) {
 275             if(c!=';') {
 276                 /* implicit letter */
 277                 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 278             } else {
 279                 /* finished */
 280                 break;
 281             }
 282         } else {
 283             token=tokens[c];
 284             if(token==(uint16_t)(-2)) {
 285                 /* this is a lead byte for a double-byte token */
 286                 token=tokens[c<<8|*name++];
 287                 --nameLength;
 288             }
 289             if(token==(uint16_t)(-1)) {
 290                 if(c!=';') {
 291                     /* explicit letter */
 292                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 293                 } else {
 294                     /* stop, but skip the semicolon if we are seeking
 295                        extended names and there was no 2.0 name but there
 296                        is a 1.0 name. */
 297                     if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) {
 298                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
 299                             continue;
 300                         }
 301                     }
 302                     /* finished */
 303                     break;
 304                 }
 305             } else {
 306                 /* write token word */
 307                 uint8_t *tokenString=tokenStrings+token;
 308                 while((c=*tokenString++)!=0) {
 309                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 310                 }
 311             }
 312         }
 313     }
 314
 315     /* zero-terminate */
 316     if(bufferLength>0) {
 317         *buffer=0;
 318     }
 319
 320     return bufferPos;
 321 }
 322
 323 /*
 324  * compareName() is almost the same as expandName() except that it compares
 325  * the currently expanded name to an input name.
 326  * It returns the match/no match result as soon as possible.
 327  */
 328 static UBool
 329 compareName(UCharNames *names,
 330             const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
 331             const char *otherName) {
 332     uint16_t *tokens=(uint16_t *)names+8;
 333     uint16_t token, tokenCount=*tokens++;
 334     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
 335     uint8_t c;
 336     const char *origOtherName = otherName;
 337
 338     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
 339         /*
 340          * skip the modern name if it is not requested _and_
 341          * if the semicolon byte value is a character, not a token number
 342          */
 343         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
 344             int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
 345             do {
 346                 while(nameLength>0) {
 347                     --nameLength;
 348                     if(*name++==';') {
 349                         break;
 350                     }
 351                 }
 352             } while(--fieldIndex>0);
 353         } else {
 354             /*
 355              * the semicolon byte value is a token number, therefore
 356              * only modern names are stored in unames.dat and there is no
 357              * such requested alternate name here
 358              */
 359             nameLength=0;
 360         }
 361     }
 362
 363     /* compare each letter directly, and compare a token word per token */
 364     while(nameLength>0) {
 365         --nameLength;
 366         c=*name++;
 367
 368         if(c>=tokenCount) {
 369             if(c!=';') {
 370                 /* implicit letter */
 371                 if((char)c!=*otherName++) {
 372                     return FALSE;
 373                 }
 374             } else {
 375                 /* finished */
 376                 break;
 377             }
 378         } else {
 379             token=tokens[c];
 380             if(token==(uint16_t)(-2)) {
 381                 /* this is a lead byte for a double-byte token */
 382                 token=tokens[c<<8|*name++];
 383                 --nameLength;
 384             }
 385             if(token==(uint16_t)(-1)) {
 386                 if(c!=';') {
 387                     /* explicit letter */
 388                     if((char)c!=*otherName++) {
 389                         return FALSE;
 390                     }
 391                 } else {
 392                     /* stop, but skip the semicolon if we are seeking
 393                        extended names and there was no 2.0 name but there
 394                        is a 1.0 name. */
 395                     if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) {
 396                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
 397                             continue;
 398                         }
 399                     }
 400                     /* finished */
 401                     break;
 402                 }
 403             } else {
 404                 /* write token word */
 405                 uint8_t *tokenString=tokenStrings+token;
 406                 while((c=*tokenString++)!=0) {
 407                     if((char)c!=*otherName++) {
 408                         return FALSE;
 409                     }
 410                 }
 411             }
 412         }
 413     }
 414
 415     /* complete match? */
 416     return (UBool)(*otherName==0);
 417 }
 418
 419 static uint8_t getCharCat(UChar32 cp) {
 420     uint8_t cat;
 421
 422     if (U_IS_UNICODE_NONCHAR(cp)) {
 423         return U_NONCHARACTER_CODE_POINT;
 424     }
 425
 426     if ((cat = u_charType(cp)) == U_SURROGATE) {
 427         cat = U_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
 428     }
 429
 430     return cat;
 431 }
 432
 433 static const char *getCharCatName(UChar32 cp) {
 434     uint8_t cat = getCharCat(cp);
 435
 436     /* Return unknown if the table of names above is not up to
 437        date. */
 438
 439     if (cat >= UPRV_LENGTHOF(charCatNames)) {
 440         return "unknown";
 441     } else {
 442         return charCatNames[cat];
 443     }
 444 }
 445
 446 static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
 447     const char *catname = getCharCatName(code);
 448     uint16_t length = 0;
 449
 450     UChar32 cp;
 451     int ndigits, i;
 452
 453     WRITE_CHAR(buffer, bufferLength, length, '<');
 454     while (catname[length - 1]) {
 455         WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);
 456     }
 457     WRITE_CHAR(buffer, bufferLength, length, '-');
 458     for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)
 459         ;
 460     if (ndigits < 4)
 461         ndigits = 4;
 462     for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) {
 463         uint8_t v = (uint8_t)(cp & 0xf);
 464         buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
 465     }
 466     buffer += ndigits;
 467     length += ndigits;
 468     WRITE_CHAR(buffer, bufferLength, length, '>');
 469
 470     return length;
 471 }
 472
 473 /*
 474  * getGroup() does a binary search for the group that contains the
 475  * Unicode code point "code".
 476  * The return value is always a valid Group* that may contain "code"
 477  * or else is the highest group before "code".
 478  * If the lowest group is after "code", then that one is returned.
 479  */
 480 static const uint16_t *
 481 getGroup(UCharNames *names, uint32_t code) {
 482     const uint16_t *groups=GET_GROUPS(names);
 483     uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
 484              start=0,
 485              limit=*groups++,
 486              number;
 487
 488     /* binary search for the group of names that contains the one for code */
 489     while(start<limit-1) {
 490         number=(uint16_t)((start+limit)/2);
 491         if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) {
 492             limit=number;
 493         } else {
 494             start=number;
 495         }
 496     }
 497
 498     /* return this regardless of whether it is an exact match */
 499     return groups+start*GROUP_LENGTH;
 500 }
 501
 502 /*
 503  * expandGroupLengths() reads a block of compressed lengths of 32 strings and
 504  * expands them into offsets and lengths for each string.
 505  * Lengths are stored with a variable-width encoding in consecutive nibbles:
 506  * If a nibble<0xc, then it is the length itself (0=empty string).
 507  * If a nibble>=0xc, then it forms a length value with the following nibble.
 508  * Calculation see below.
 509  * The offsets and lengths arrays must be at least 33 (one more) long because
 510  * there is no check here at the end if the last nibble is still used.
 511  */
 512 static const uint8_t *
 513 expandGroupLengths(const uint8_t *s,
 514                    uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) {
 515     /* read the lengths of the 32 strings in this group and get each string's offset */
 516     uint16_t i=0, offset=0, length=0;
 517     uint8_t lengthByte;
 518
 519     /* all 32 lengths must be read to get the offset of the first group string */
 520     while(i<LINES_PER_GROUP) {
 521         lengthByte=*s++;
 522
 523         /* read even nibble - MSBs of lengthByte */
 524         if(length>=12) {
 525             /* double-nibble length spread across two bytes */
 526             length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12);
 527             lengthByte&=0xf;
 528         } else if((lengthByte /* &0xf0 */)>=0xc0) {
 529             /* double-nibble length spread across this one byte */
 530             length=(uint16_t)((lengthByte&0x3f)+12);
 531         } else {
 532             /* single-nibble length in MSBs */
 533             length=(uint16_t)(lengthByte>>4);
 534             lengthByte&=0xf;
 535         }
 536
 537         *offsets++=offset;
 538         *lengths++=length;
 539
 540         offset+=length;
 541         ++i;
 542
 543         /* read odd nibble - LSBs of lengthByte */
 544         if((lengthByte&0xf0)==0) {
 545             /* this nibble was not consumed for a double-nibble length above */
 546             length=lengthByte;
 547             if(length<12) {
 548                 /* single-nibble length in LSBs */
 549                 *offsets++=offset;
 550                 *lengths++=length;
 551
 552                 offset+=length;
 553                 ++i;
 554             }
 555         } else {
 556             length=0;   /* prevent double-nibble detection in the next iteration */
 557         }
 558     }
 559
 560     /* now, s is at the first group string */
 561     return s;
 562 }
 563
 564 static uint16_t
 565 expandGroupName(UCharNames *names, const uint16_t *group,
 566                 uint16_t lineNumber, UCharNameChoice nameChoice,
 567                 char *buffer, uint16_t bufferLength) {
 568     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
 569     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
 570     s=expandGroupLengths(s, offsets, lengths);
 571     return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,
 572                       buffer, bufferLength);
 573 }
 574
 575 static uint16_t
 576 getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
 577         char *buffer, uint16_t bufferLength) {
 578     const uint16_t *group=getGroup(names, code);
 579     if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) {
 580         return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,
 581                                buffer, bufferLength);
 582     } else {
 583         /* group not found */
 584         /* zero-terminate */
 585         if(bufferLength>0) {
 586             *buffer=0;
 587         }
 588         return 0;
 589     }
 590 }
 591
 592 /*
 593  * enumGroupNames() enumerates all the names in a 32-group
 594  * and either calls the enumerator function or finds a given input name.
 595  */
 596 static UBool
 597 enumGroupNames(UCharNames *names, const uint16_t *group,
 598                UChar32 start, UChar32 end,
 599                UEnumCharNamesFn *fn, void *context,
 600                UCharNameChoice nameChoice) {
 601     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
 602     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
 603
 604     s=expandGroupLengths(s, offsets, lengths);
 605     if(fn!=DO_FIND_NAME) {
 606         char buffer[200];
 607         uint16_t length;
 608
 609         while(start<=end) {
 610             length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer));
 611             if (!length && nameChoice == U_EXTENDED_CHAR_NAME) {
 612                 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
 613             }
 614             /* here, we assume that the buffer is large enough */
 615             if(length>0) {
 616                 if(!fn(context, start, nameChoice, buffer, length)) {
 617                     return FALSE;
 618                 }
 619             }
 620             ++start;
 621         }
 622     } else {
 623         const char *otherName=((FindName *)context)->otherName;
 624         while(start<=end) {
 625             if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) {
 626                 ((FindName *)context)->code=start;
 627                 return FALSE;
 628             }
 629             ++start;
 630         }
 631     }
 632     return TRUE;
 633 }
 634
 635 /*
 636  * enumExtNames enumerate extended names.
 637  * It only needs to do it if it is called with a real function and not
 638  * with the dummy DO_FIND_NAME, because u_charFromName() does a check
 639  * for extended names by itself.
 640  */
 641 static UBool
 642 enumExtNames(UChar32 start, UChar32 end,
 643              UEnumCharNamesFn *fn, void *context)
 644 {
 645     if(fn!=DO_FIND_NAME) {
 646         char buffer[200];
 647         uint16_t length;
 648
 649         while(start<=end) {
 650             buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
 651             /* here, we assume that the buffer is large enough */
 652             if(length>0) {
 653                 if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) {
 654                     return FALSE;
 655                 }
 656             }
 657             ++start;
 658         }
 659     }
 660
 661     return TRUE;
 662 }
 663
 664 static UBool
 665 enumNames(UCharNames *names,
 666           UChar32 start, UChar32 limit,
 667           UEnumCharNamesFn *fn, void *context,
 668           UCharNameChoice nameChoice) {
 669     uint16_t startGroupMSB, endGroupMSB, groupCount;
 670     const uint16_t *group, *groupLimit;
 671
 672     startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);
 673     endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT);
 674
 675     /* find the group that contains start, or the highest before it */
 676     group=getGroup(names, start);
 677
 678     if(startGroupMSB<group[GROUP_MSB] && nameChoice==U_EXTENDED_CHAR_NAME) {
 679         /* enumerate synthetic names between start and the group start */
 680         UChar32 extLimit=((UChar32)group[GROUP_MSB]<<GROUP_SHIFT);
 681         if(extLimit>limit) {
 682             extLimit=limit;
 683         }
 684         if(!enumExtNames(start, extLimit-1, fn, context)) {
 685             return FALSE;
 686         }
 687         start=extLimit;
 688     }
 689
 690     if(startGroupMSB==endGroupMSB) {
 691         if(startGroupMSB==group[GROUP_MSB]) {
 692             /* if start and limit-1 are in the same group, then enumerate only in that one */
 693             return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice);
 694         }
 695     } else {
 696         const uint16_t *groups=GET_GROUPS(names);
 697         groupCount=*groups++;
 698         groupLimit=groups+groupCount*GROUP_LENGTH;
 699
 700         if(startGroupMSB==group[GROUP_MSB]) {
 701             /* enumerate characters in the partial start group */
 702             if((start&GROUP_MASK)!=0) {
 703                 if(!enumGroupNames(names, group,
 704                                    start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1,
 705                                    fn, context, nameChoice)) {
 706                     return FALSE;
 707                 }
 708                 group=NEXT_GROUP(group); /* continue with the next group */
 709             }
 710         } else if(startGroupMSB>group[GROUP_MSB]) {
 711             /* make sure that we start enumerating with the first group after start */
 712             const uint16_t *nextGroup=NEXT_GROUP(group);
 713             if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) {
 714                 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
 715                 if (end > limit) {
 716                     end = limit;
 717                 }
 718                 if (!enumExtNames(start, end - 1, fn, context)) {
 719                     return FALSE;
 720                 }
 721             }
 722             group=nextGroup;
 723         }
 724
 725         /* enumerate entire groups between the start- and end-groups */
 726         while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) {
 727             const uint16_t *nextGroup;
 728             start=(UChar32)group[GROUP_MSB]<<GROUP_SHIFT;
 729             if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) {
 730                 return FALSE;
 731             }
 732             nextGroup=NEXT_GROUP(group);
 733             if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) {
 734                 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
 735                 if (end > limit) {
 736                     end = limit;
 737                 }
 738                 if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1, fn, context)) {
 739                     return FALSE;
 740                 }
 741             }
 742             group=nextGroup;
 743         }
 744
 745         /* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */
 746         if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) {
 747             return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice);
 748         } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) {
 749             UChar32 next = (PREV_GROUP(group)[GROUP_MSB] + 1) << GROUP_SHIFT;
 750             if (next > start) {
 751                 start = next;
 752             }
 753         } else {
 754             return TRUE;
 755         }
 756     }
 757
 758     /* we have not found a group, which means everything is made of
 759        extended names. */
 760     if (nameChoice == U_EXTENDED_CHAR_NAME) {
 761         if (limit > UCHAR_MAX_VALUE + 1) {
 762             limit = UCHAR_MAX_VALUE + 1;
 763         }
 764         return enumExtNames(start, limit - 1, fn, context);
 765     }
 766
 767     return TRUE;
 768 }
 769
 770 static uint16_t
 771 writeFactorSuffix(const uint16_t *factors, uint16_t count,
 772                   const char *s, /* suffix elements */
 773                   uint32_t code,
 774                   uint16_t indexes[8], /* output fields from here */
 775                   const char *elementBases[8], const char *elements[8],
 776                   char *buffer, uint16_t bufferLength) {
 777     uint16_t i, factor, bufferPos=0;
 778     char c;
 779
 780     /* write elements according to the factors */
 781
 782     /*
 783      * the factorized elements are determined by modulo arithmetic
 784      * with the factors of this algorithm
 785      *
 786      * note that for fewer operations, count is decremented here
 787      */
 788     --count;
 789     for(i=count; i>0; --i) {
 790         factor=factors[i];
 791         indexes[i]=(uint16_t)(code%factor);
 792         code/=factor;
 793     }
 794     /*
 795      * we don't need to calculate the last modulus because start<=code<=end
 796      * guarantees here that code<=factors[0]
 797      */
 798     indexes[0]=(uint16_t)code;
 799
 800     /* write each element */
 801     for(;;) {
 802         if(elementBases!=NULL) {
 803             *elementBases++=s;
 804         }
 805
 806         /* skip indexes[i] strings */
 807         factor=indexes[i];
 808         while(factor>0) {
 809             while(*s++!=0) {}
 810             --factor;
 811         }
 812         if(elements!=NULL) {
 813             *elements++=s;
 814         }
 815
 816         /* write element */
 817         while((c=*s++)!=0) {
 818             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 819         }
 820
 821         /* we do not need to perform the rest of this loop for i==count - break here */
 822         if(i>=count) {
 823             break;
 824         }
 825
 826         /* skip the rest of the strings for this factors[i] */
 827         factor=(uint16_t)(factors[i]-indexes[i]-1);
 828         while(factor>0) {
 829             while(*s++!=0) {}
 830             --factor;
 831         }
 832
 833         ++i;
 834     }
 835
 836     /* zero-terminate */
 837     if(bufferLength>0) {
 838         *buffer=0;
 839     }
 840
 841     return bufferPos;
 842 }
 843
 844 /*
 845  * Important:
 846  * Parts of findAlgName() are almost the same as some of getAlgName().
 847  * Fixes must be applied to both.
 848  */
 849 static uint16_t
 850 getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
 851         char *buffer, uint16_t bufferLength) {
 852     uint16_t bufferPos=0;
 853
 854     /* Only the normative character name can be algorithmic. */
 855     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
 856         /* zero-terminate */
 857         if(bufferLength>0) {
 858             *buffer=0;
 859         }
 860         return 0;
 861     }
 862
 863     switch(range->type) {
 864     case 0: {
 865         /* name = prefix hex-digits */
 866         const char *s=(const char *)(range+1);
 867         char c;
 868
 869         uint16_t i, count;
 870
 871         /* copy prefix */
 872         while((c=*s++)!=0) {
 873             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 874         }
 875
 876         /* write hexadecimal code point value */
 877         count=range->variant;
 878
 879         /* zero-terminate */
 880         if(count<bufferLength) {
 881             buffer[count]=0;
 882         }
 883
 884         for(i=count; i>0;) {
 885             if(--i<bufferLength) {
 886                 c=(char)(code&0xf);
 887                 if(c<10) {
 888                     c+='0';
 889                 } else {
 890                     c+='A'-10;
 891                 }
 892                 buffer[i]=c;
 893             }
 894             code>>=4;
 895         }
 896
 897         bufferPos+=count;
 898         break;
 899     }
 900     case 1: {
 901         /* name = prefix factorized-elements */
 902         uint16_t indexes[8];
 903         const uint16_t *factors=(const uint16_t *)(range+1);
 904         uint16_t count=range->variant;
 905         const char *s=(const char *)(factors+count);
 906         char c;
 907
 908         /* copy prefix */
 909         while((c=*s++)!=0) {
 910             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 911         }
 912
 913         bufferPos+=writeFactorSuffix(factors, count,
 914                                      s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);
 915         break;
 916     }
 917     default:
 918         /* undefined type */
 919         /* zero-terminate */
 920         if(bufferLength>0) {
 921             *buffer=0;
 922         }
 923         break;
 924     }
 925
 926     return bufferPos;
 927 }
 928
 929 /*
 930  * Important: enumAlgNames() and findAlgName() are almost the same.
 931  * Any fix must be applied to both.
 932  */
 933 static UBool
 934 enumAlgNames(AlgorithmicRange *range,
 935              UChar32 start, UChar32 limit,
 936              UEnumCharNamesFn *fn, void *context,
 937              UCharNameChoice nameChoice) {
 938     char buffer[200];
 939     uint16_t length;
 940
 941     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
 942         return TRUE;
 943     }
 944
 945     switch(range->type) {
 946     case 0: {
 947         char *s, *end;
 948         char c;
 949
 950         /* get the full name of the start character */
 951         length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer));
 952         if(length<=0) {
 953             return TRUE;
 954         }
 955
 956         /* call the enumerator function with this first character */
 957         if(!fn(context, start, nameChoice, buffer, length)) {
 958             return FALSE;
 959         }
 960
 961         /* go to the end of the name; all these names have the same length */
 962         end=buffer;
 963         while(*end!=0) {
 964             ++end;
 965         }
 966
 967         /* enumerate the rest of the names */
 968         while(++start<limit) {
 969             /* increment the hexadecimal number on a character-basis */
 970             s=end;
 971             for (;;) {
 972                 c=*--s;
 973                 if(('0'<=c && c<'9') || ('A'<=c && c<'F')) {
 974                     *s=(char)(c+1);
 975                     break;
 976                 } else if(c=='9') {
 977                     *s='A';
 978                     break;
 979                 } else if(c=='F') {
 980                     *s='0';
 981                 }
 982             }
 983
 984             if(!fn(context, start, nameChoice, buffer, length)) {
 985                 return FALSE;
 986             }
 987         }
 988         break;
 989     }
 990     case 1: {
 991         uint16_t indexes[8];
 992         const char *elementBases[8], *elements[8];
 993         const uint16_t *factors=(const uint16_t *)(range+1);
 994         uint16_t count=range->variant;
 995         const char *s=(const char *)(factors+count);
 996         char *suffix, *t;
 997         uint16_t prefixLength, i, idx;
 998
 999         char c;
1000
1001         /* name = prefix factorized-elements */
1002
1003         /* copy prefix */
1004         suffix=buffer;
1005         prefixLength=0;
1006         while((c=*s++)!=0) {
1007             *suffix++=c;
1008             ++prefixLength;
1009         }
1010
1011         /* append the suffix of the start character */
1012         length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count,
1013                                               s, (uint32_t)start-range->start,
1014                                               indexes, elementBases, elements,
1015                                               suffix, (uint16_t)(sizeof(buffer)-prefixLength)));
1016
1017         /* call the enumerator function with this first character */
1018         if(!fn(context, start, nameChoice, buffer, length)) {
1019             return FALSE;
1020         }
1021
1022         /* enumerate the rest of the names */
1023         while(++start<limit) {
1024             /* increment the indexes in lexical order bound by the factors */
1025             i=count;
1026             for (;;) {
1027                 idx=(uint16_t)(indexes[--i]+1);
1028                 if(idx<factors[i]) {
1029                     /* skip one index and its element string */
1030                     indexes[i]=idx;
1031                     s=elements[i];
1032                     while(*s++!=0) {
1033                     }
1034                     elements[i]=s;
1035                     break;
1036                 } else {
1037                     /* reset this index to 0 and its element string to the first one */
1038                     indexes[i]=0;
1039                     elements[i]=elementBases[i];
1040                 }
1041             }
1042
1043             /* to make matters a little easier, just append all elements to the suffix */
1044             t=suffix;
1045             length=prefixLength;
1046             for(i=0; i<count; ++i) {
1047                 s=elements[i];
1048                 while((c=*s++)!=0) {
1049                     *t++=c;
1050                     ++length;
1051                 }
1052             }
1053             /* zero-terminate */
1054             *t=0;
1055
1056             if(!fn(context, start, nameChoice, buffer, length)) {
1057                 return FALSE;
1058             }
1059         }
1060         break;
1061     }
1062     default:
1063         /* undefined type */
1064         break;
1065     }
1066
1067     return TRUE;
1068 }
1069
1070 /*
1071  * findAlgName() is almost the same as enumAlgNames() except that it
1072  * returns the code point for a name if it fits into the range.
1073  * It returns 0xffff otherwise.
1074  */
1075 static UChar32
1076 findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) {
1077     UChar32 code;
1078
1079     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
1080         return 0xffff;
1081     }
1082
1083     switch(range->type) {
1084     case 0: {
1085         /* name = prefix hex-digits */
1086         const char *s=(const char *)(range+1);
1087         char c;
1088
1089         uint16_t i, count;
1090
1091         /* compare prefix */
1092         while((c=*s++)!=0) {
1093             if((char)c!=*otherName++) {
1094                 return 0xffff;
1095             }
1096         }
1097
1098         /* read hexadecimal code point value */
1099         count=range->variant;
1100         code=0;
1101         for(i=0; i<count; ++i) {
1102             c=*otherName++;
1103             if('0'<=c && c<='9') {
1104                 code=(code<<4)|(c-'0');
1105             } else if('A'<=c && c<='F') {
1106                 code=(code<<4)|(c-'A'+10);
1107             } else {
1108                 return 0xffff;
1109             }
1110         }
1111
1112         /* does it fit into the range? */
1113         if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) {
1114             return code;
1115         }
1116         break;
1117     }
1118     case 1: {
1119         char buffer[64];
1120         uint16_t indexes[8];
1121         const char *elementBases[8], *elements[8];
1122         const uint16_t *factors=(const uint16_t *)(range+1);
1123         uint16_t count=range->variant;
1124         const char *s=(const char *)(factors+count), *t;
1125         UChar32 start, limit;
1126         uint16_t i, idx;
1127
1128         char c;
1129
1130         /* name = prefix factorized-elements */
1131
1132         /* compare prefix */
1133         while((c=*s++)!=0) {
1134             if((char)c!=*otherName++) {
1135                 return 0xffff;
1136             }
1137         }
1138
1139         start=(UChar32)range->start;
1140         limit=(UChar32)(range->end+1);
1141
1142         /* initialize the suffix elements for enumeration; indexes should all be set to 0 */
1143         writeFactorSuffix(factors, count, s, 0,
1144                           indexes, elementBases, elements, buffer, sizeof(buffer));
1145
1146         /* compare the first suffix */
1147         if(0==uprv_strcmp(otherName, buffer)) {
1148             return start;
1149         }
1150
1151         /* enumerate and compare the rest of the suffixes */
1152         while(++start<limit) {
1153             /* increment the indexes in lexical order bound by the factors */
1154             i=count;
1155             for (;;) {
1156                 idx=(uint16_t)(indexes[--i]+1);
1157                 if(idx<factors[i]) {
1158                     /* skip one index and its element string */
1159                     indexes[i]=idx;
1160                     s=elements[i];
1161                     while(*s++!=0) {}
1162                     elements[i]=s;
1163                     break;
1164                 } else {
1165                     /* reset this index to 0 and its element string to the first one */
1166                     indexes[i]=0;
1167                     elements[i]=elementBases[i];
1168                 }
1169             }
1170
1171             /* to make matters a little easier, just compare all elements of the suffix */
1172             t=otherName;
1173             for(i=0; i<count; ++i) {
1174                 s=elements[i];
1175                 while((c=*s++)!=0) {
1176                     if(c!=*t++) {
1177                         s=""; /* does not match */
1178                         i=99;
1179                     }
1180                 }
1181             }
1182             if(i<99 && *t==0) {
1183                 return start;
1184             }
1185         }
1186         break;
1187     }
1188     default:
1189         /* undefined type */
1190         break;
1191     }
1192
1193     return 0xffff;
1194 }
1195
1196 /* sets of name characters, maximum name lengths ---------------------------- */
1197
1198 #define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f)))
1199 #define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0)
1200
1201 static int32_t
1202 calcStringSetLength(uint32_t set[8], const char *s) {
1203     int32_t length=0;
1204     char c;
1205
1206     while((c=*s++)!=0) {
1207         SET_ADD(set, c);
1208         ++length;
1209     }
1210     return length;
1211 }
1212
1213 static int32_t
1214 calcAlgNameSetsLengths(int32_t maxNameLength) {
1215     AlgorithmicRange *range;
1216     uint32_t *p;
1217     uint32_t rangeCount;
1218     int32_t length;
1219
1220     /* enumerate algorithmic ranges */
1221     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1222     rangeCount=*p;
1223     range=(AlgorithmicRange *)(p+1);
1224     while(rangeCount>0) {
1225         switch(range->type) {
1226         case 0:
1227             /* name = prefix + (range->variant times) hex-digits */
1228             /* prefix */
1229             length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant;
1230             if(length>maxNameLength) {
1231                 maxNameLength=length;
1232             }
1233             break;
1234         case 1: {
1235             /* name = prefix factorized-elements */
1236             const uint16_t *factors=(const uint16_t *)(range+1);
1237             const char *s;
1238             int32_t i, count=range->variant, factor, factorLength, maxFactorLength;
1239
1240             /* prefix length */
1241             s=(const char *)(factors+count);
1242             length=calcStringSetLength(gNameSet, s);
1243             s+=length+1; /* start of factor suffixes */
1244
1245             /* get the set and maximum factor suffix length for each factor */
1246             for(i=0; i<count; ++i) {
1247                 maxFactorLength=0;
1248                 for(factor=factors[i]; factor>0; --factor) {
1249                     factorLength=calcStringSetLength(gNameSet, s);
1250                     s+=factorLength+1;
1251                     if(factorLength>maxFactorLength) {
1252                         maxFactorLength=factorLength;
1253                     }
1254                 }
1255                 length+=maxFactorLength;
1256             }
1257
1258             if(length>maxNameLength) {
1259                 maxNameLength=length;
1260             }
1261             break;
1262         }
1263         default:
1264             /* unknown type */
1265             break;
1266         }
1267
1268         range=(AlgorithmicRange *)((uint8_t *)range+range->size);
1269         --rangeCount;
1270     }
1271     return maxNameLength;
1272 }
1273
1274 static int32_t
1275 calcExtNameSetsLengths(int32_t maxNameLength) {
1276     int32_t i, length;
1277
1278     for(i=0; i<UPRV_LENGTHOF(charCatNames); ++i) {
1279         /*
1280          * for each category, count the length of the category name
1281          * plus 9=
1282          * 2 for <>
1283          * 1 for -
1284          * 6 for most hex digits per code point
1285          */
1286         length=9+calcStringSetLength(gNameSet, charCatNames[i]);
1287         if(length>maxNameLength) {
1288             maxNameLength=length;
1289         }
1290     }
1291     return maxNameLength;
1292 }
1293
1294 static int32_t
1295 calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths,
1296                   uint32_t set[8],
1297                   const uint8_t **pLine, const uint8_t *lineLimit) {
1298     const uint8_t *line=*pLine;
1299     int32_t length=0, tokenLength;
1300     uint16_t c, token;
1301
1302     while(line!=lineLimit && (c=*line++)!=(uint8_t)';') {
1303         if(c>=tokenCount) {
1304             /* implicit letter */
1305             SET_ADD(set, c);
1306             ++length;
1307         } else {
1308             token=tokens[c];
1309             if(token==(uint16_t)(-2)) {
1310                 /* this is a lead byte for a double-byte token */
1311                 c=c<<8|*line++;
1312                 token=tokens[c];
1313             }
1314             if(token==(uint16_t)(-1)) {
1315                 /* explicit letter */
1316                 SET_ADD(set, c);
1317                 ++length;
1318             } else {
1319                 /* count token word */
1320                 if(tokenLengths!=NULL) {
1321                     /* use cached token length */
1322                     tokenLength=tokenLengths[c];
1323                     if(tokenLength==0) {
1324                         tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
1325                         tokenLengths[c]=(int8_t)tokenLength;
1326                     }
1327                 } else {
1328                     tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
1329                 }
1330                 length+=tokenLength;
1331             }
1332         }
1333     }
1334
1335     *pLine=line;
1336     return length;
1337 }
1338
1339 static void
1340 calcGroupNameSetsLengths(int32_t maxNameLength) {
1341     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
1342
1343     uint16_t *tokens=(uint16_t *)uCharNames+8;
1344     uint16_t tokenCount=*tokens++;
1345     uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset;
1346
1347     int8_t *tokenLengths;
1348
1349     const uint16_t *group;
1350     const uint8_t *s, *line, *lineLimit;
1351
1352     int32_t groupCount, lineNumber, length;
1353
1354     tokenLengths=(int8_t *)uprv_malloc(tokenCount);
1355     if(tokenLengths!=NULL) {
1356         uprv_memset(tokenLengths, 0, tokenCount);
1357     }
1358
1359     group=GET_GROUPS(uCharNames);
1360     groupCount=*group++;
1361
1362     /* enumerate all groups */
1363     while(groupCount>0) {
1364         s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(group);
1365         s=expandGroupLengths(s, offsets, lengths);
1366
1367         /* enumerate all lines in each group */
1368         for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) {
1369             line=s+offsets[lineNumber];
1370             length=lengths[lineNumber];
1371             if(length==0) {
1372                 continue;
1373             }
1374
1375             lineLimit=line+length;
1376
1377             /* read regular name */
1378             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
1379             if(length>maxNameLength) {
1380                 maxNameLength=length;
1381             }
1382             if(line==lineLimit) {
1383                 continue;
1384             }
1385
1386             /* read Unicode 1.0 name */
1387             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
1388             if(length>maxNameLength) {
1389                 maxNameLength=length;
1390             }
1391             if(line==lineLimit) {
1392                 continue;
1393             }
1394
1395             /* read ISO comment */
1396             /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/
1397         }
1398
1399         group=NEXT_GROUP(group);
1400         --groupCount;
1401     }
1402
1403     if(tokenLengths!=NULL) {
1404         uprv_free(tokenLengths);
1405     }
1406
1407     /* set gMax... - name length last for threading */
1408     gMaxNameLength=maxNameLength;
1409 }
1410
1411 static UBool
1412 calcNameSetsLengths(UErrorCode *pErrorCode) {
1413     static const char extChars[]="0123456789ABCDEF<>-";
1414     int32_t i, maxNameLength;
1415
1416     if(gMaxNameLength!=0) {
1417         return TRUE;
1418     }
1419
1420     if(!isDataLoaded(pErrorCode)) {
1421         return FALSE;
1422     }
1423
1424     /* set hex digits, used in various names, and <>-, used in extended names */
1425     for(i=0; i<(int32_t)sizeof(extChars)-1; ++i) {
1426         SET_ADD(gNameSet, extChars[i]);
1427     }
1428
1429     /* set sets and lengths from algorithmic names */
1430     maxNameLength=calcAlgNameSetsLengths(0);
1431
1432     /* set sets and lengths from extended names */
1433     maxNameLength=calcExtNameSetsLengths(maxNameLength);
1434
1435     /* set sets and lengths from group names, set global maximum values */
1436     calcGroupNameSetsLengths(maxNameLength);
1437
1438     return TRUE;
1439 }
1440
1441 U_NAMESPACE_END
1442
1443 /* public API --------------------------------------------------------------- */
1444
1445 U_NAMESPACE_USE
1446
1447 U_CAPI int32_t U_EXPORT2
1448 u_charName(UChar32 code, UCharNameChoice nameChoice,
1449            char *buffer, int32_t bufferLength,
1450            UErrorCode *pErrorCode) {
1451      AlgorithmicRange *algRange;
1452     uint32_t *p;
1453     uint32_t i;
1454     int32_t length;
1455
1456     /* check the argument values */
1457     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1458         return 0;
1459     } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT ||
1460               bufferLength<0 || (bufferLength>0 && buffer==NULL)
1461     ) {
1462         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1463         return 0;
1464     }
1465
1466     if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
1467         return u_terminateChars(buffer, bufferLength, 0, pErrorCode);
1468     }
1469
1470     length=0;
1471
1472     /* try algorithmic names first */
1473     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1474     i=*p;
1475     algRange=(AlgorithmicRange *)(p+1);
1476     while(i>0) {
1477         if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {
1478             length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
1479             break;
1480         }
1481         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
1482         --i;
1483     }
1484
1485     if(i==0) {
1486         if (nameChoice == U_EXTENDED_CHAR_NAME) {
1487             length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);
1488             if (!length) {
1489                 /* extended character name */
1490                 length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);
1491             }
1492         } else {
1493             /* normal character name */
1494             length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
1495         }
1496     }
1497
1498     return u_terminateChars(buffer, bufferLength, length, pErrorCode);
1499 }
1500
1501 U_CAPI int32_t U_EXPORT2
1502 u_getISOComment(UChar32 /*c*/,
1503                 char *dest, int32_t destCapacity,
1504                 UErrorCode *pErrorCode) {
1505     /* check the argument values */
1506     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1507         return 0;
1508     } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
1509         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1510         return 0;
1511     }
1512
1513     return u_terminateChars(dest, destCapacity, 0, pErrorCode);
1514 }
1515
1516 U_CAPI UChar32 U_EXPORT2
1517 u_charFromName(UCharNameChoice nameChoice,
1518                const char *name,
1519                UErrorCode *pErrorCode) {
1520     char upper[120], lower[120];
1521     FindName findName;
1522     AlgorithmicRange *algRange;
1523     uint32_t *p;
1524     uint32_t i;
1525     UChar32 cp = 0;
1526     char c0;
1527     UChar32 error = 0xffff;     /* Undefined, but use this for backwards compatibility. */
1528
1529     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1530         return error;
1531     }
1532
1533     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) {
1534         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1535         return error;
1536     }
1537
1538     if(!isDataLoaded(pErrorCode)) {
1539         return error;
1540     }
1541
1542     /* construct the uppercase and lowercase of the name first */
1543     for(i=0; i<sizeof(upper); ++i) {
1544         if((c0=*name++)!=0) {
1545             upper[i]=uprv_toupper(c0);
1546             lower[i]=uprv_tolower(c0);
1547         } else {
1548             upper[i]=lower[i]=0;
1549             break;
1550         }
1551     }
1552     if(i==sizeof(upper)) {
1553         /* name too long, there is no such character */
1554         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1555         return error;
1556     }
1557     // i==strlen(name)==strlen(lower)==strlen(upper)
1558
1559     /* try extended names first */
1560     if (lower[0] == '<') {
1561         if (nameChoice == U_EXTENDED_CHAR_NAME) {
1562             // Parse a string like "<category-HHHH>" where HHHH is a hex code point.
1563             if (lower[--i] == '>' && i >= 3 && lower[--i] != '-') {
1564                 while (i >= 3 && lower[--i] != '-') {}
1565
1566                 if (i >= 2 && lower[i] == '-') {
1567                     uint32_t cIdx;
1568
1569                     lower[i] = 0;
1570
1571                     for (++i; lower[i] != '>'; ++i) {
1572                         if (lower[i] >= '0' && lower[i] <= '9') {
1573                             cp = (cp << 4) + lower[i] - '0';
1574                         } else if (lower[i] >= 'a' && lower[i] <= 'f') {
1575                             cp = (cp << 4) + lower[i] - 'a' + 10;
1576                         } else {
1577                             *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1578                             return error;
1579                         }
1580                     }
1581
1582                     /* Now validate the category name.
1583                        We could use a binary search, or a trie, if
1584                        we really wanted to. */
1585
1586                     for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
1587
1588                         if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
1589                             if (getCharCat(cp) == cIdx) {
1590                                 return cp;
1591                             }
1592                             break;
1593                         }
1594                     }
1595                 }
1596             }
1597         }
1598
1599         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1600         return error;
1601     }
1602
1603     /* try algorithmic names now */
1604     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1605     i=*p;
1606     algRange=(AlgorithmicRange *)(p+1);
1607     while(i>0) {
1608         if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) {
1609             return cp;
1610         }
1611         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
1612         --i;
1613     }
1614
1615     /* normal character name */
1616     findName.otherName=upper;
1617     findName.code=error;
1618     enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice);
1619     if (findName.code == error) {
1620          *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1621     }
1622     return findName.code;
1623 }
1624
1625 U_CAPI void U_EXPORT2
1626 u_enumCharNames(UChar32 start, UChar32 limit,
1627                 UEnumCharNamesFn *fn,
1628                 void *context,
1629                 UCharNameChoice nameChoice,
1630                 UErrorCode *pErrorCode) {
1631     AlgorithmicRange *algRange;
1632     uint32_t *p;
1633     uint32_t i;
1634
1635     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1636         return;
1637     }
1638
1639     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) {
1640         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1641         return;
1642     }
1643
1644     if((uint32_t) limit > UCHAR_MAX_VALUE + 1) {
1645         limit = UCHAR_MAX_VALUE + 1;
1646     }
1647     if((uint32_t)start>=(uint32_t)limit) {
1648         return;
1649     }
1650
1651     if(!isDataLoaded(pErrorCode)) {
1652         return;
1653     }
1654
1655     /* interleave the data-driven ones with the algorithmic ones */
1656     /* iterate over all algorithmic ranges; assume that they are in ascending order */
1657     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1658     i=*p;
1659     algRange=(AlgorithmicRange *)(p+1);
1660     while(i>0) {
1661         /* enumerate the character names before the current algorithmic range */
1662         /* here: start<limit */
1663         if((uint32_t)start<algRange->start) {
1664             if((uint32_t)limit<=algRange->start) {
1665                 enumNames(uCharNames, start, limit, fn, context, nameChoice);
1666                 return;
1667             }
1668             if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {
1669                 return;
1670             }
1671             start=(UChar32)algRange->start;
1672         }
1673         /* enumerate the character names in the current algorithmic range */
1674         /* here: algRange->start<=start<limit */
1675         if((uint32_t)start<=algRange->end) {
1676             if((uint32_t)limit<=(algRange->end+1)) {
1677                 enumAlgNames(algRange, start, limit, fn, context, nameChoice);
1678                 return;
1679             }
1680             if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) {
1681                 return;
1682             }
1683             start=(UChar32)algRange->end+1;
1684         }
1685         /* continue to the next algorithmic range (here: start<limit) */
1686         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
1687         --i;
1688     }
1689     /* enumerate the character names after the last algorithmic range */
1690     enumNames(uCharNames, start, limit, fn, context, nameChoice);
1691 }
1692
1693 U_CAPI int32_t U_EXPORT2
1694 uprv_getMaxCharNameLength() {
1695     UErrorCode errorCode=U_ZERO_ERROR;
1696     if(calcNameSetsLengths(&errorCode)) {
1697         return gMaxNameLength;
1698     } else {
1699         return 0;
1700     }
1701 }
1702
1703 /**
1704  * Converts the char set cset into a Unicode set uset.
1705  * @param cset Set of 256 bit flags corresponding to a set of chars.
1706  * @param uset USet to receive characters. Existing contents are deleted.
1707  */
1708 static void
1709 charSetToUSet(uint32_t cset[8], const USetAdder *sa) {
1710     UChar us[256];
1711     char cs[256];
1712
1713     int32_t i, length;
1714     UErrorCode errorCode;
1715
1716     errorCode=U_ZERO_ERROR;
1717
1718     if(!calcNameSetsLengths(&errorCode)) {
1719         return;
1720     }
1721
1722     /* build a char string with all chars that are used in character names */
1723     length=0;
1724     for(i=0; i<256; ++i) {
1725         if(SET_CONTAINS(cset, i)) {
1726             cs[length++]=(char)i;
1727         }
1728     }
1729
1730     /* convert the char string to a UChar string */
1731     u_charsToUChars(cs, us, length);
1732
1733     /* add each UChar to the USet */
1734     for(i=0; i<length; ++i) {
1735         if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */
1736             sa->add(sa->set, us[i]);
1737         }
1738     }
1739 }
1740
1741 /**
1742  * Fills set with characters that are used in Unicode character names.
1743  * @param set USet to receive characters.
1744  */
1745 U_CAPI void U_EXPORT2
1746 uprv_getCharNameCharacters(const USetAdder *sa) {
1747     charSetToUSet(gNameSet, sa);
1748 }
1749
1750 /* data swapping ------------------------------------------------------------ */
1751
1752 /*
1753  * The token table contains non-negative entries for token bytes,
1754  * and -1 for bytes that represent themselves in the data file's charset.
1755  * -2 entries are used for lead bytes.
1756  *
1757  * Direct bytes (-1 entries) must be translated from the input charset family
1758  * to the output charset family.
1759  * makeTokenMap() writes a permutation mapping for this.
1760  * Use it once for single-/lead-byte tokens and once more for all trail byte
1761  * tokens. (';' is an unused trail byte marked with -1.)
1762  */
1763 static void
1764 makeTokenMap(const UDataSwapper *ds,
1765              int16_t tokens[], uint16_t tokenCount,
1766              uint8_t map[256],
1767              UErrorCode *pErrorCode) {
1768     UBool usedOutChar[256];
1769     uint16_t i, j;
1770     uint8_t c1, c2;
1771
1772     if(U_FAILURE(*pErrorCode)) {
1773         return;
1774     }
1775
1776     if(ds->inCharset==ds->outCharset) {
1777         /* Same charset family: identity permutation */
1778         for(i=0; i<256; ++i) {
1779             map[i]=(uint8_t)i;
1780         }
1781     } else {
1782         uprv_memset(map, 0, 256);
1783         uprv_memset(usedOutChar, 0, 256);
1784
1785         if(tokenCount>256) {
1786             tokenCount=256;
1787         }
1788
1789         /* set the direct bytes (byte 0 always maps to itself) */
1790         for(i=1; i<tokenCount; ++i) {
1791             if(tokens[i]==-1) {
1792                 /* convert the direct byte character */
1793                 c1=(uint8_t)i;
1794                 ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode);
1795                 if(U_FAILURE(*pErrorCode)) {
1796                     udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n",
1797                                      i, ds->inCharset);
1798                     return;
1799                 }
1800
1801                 /* enter the converted character into the map and mark it used */
1802                 map[c1]=c2;
1803                 usedOutChar[c2]=TRUE;
1804             }
1805         }
1806
1807         /* set the mappings for the rest of the permutation */
1808         for(i=j=1; i<tokenCount; ++i) {
1809             /* set mappings that were not set for direct bytes */
1810             if(map[i]==0) {
1811                 /* set an output byte value that was not used as an output byte above */
1812                 while(usedOutChar[j]) {
1813                     ++j;
1814                 }
1815                 map[i]=(uint8_t)j++;
1816             }
1817         }
1818
1819         /*
1820          * leave mappings at tokenCount and above unset if tokenCount<256
1821          * because they won't be used
1822          */
1823     }
1824 }
1825
1826 U_CAPI int32_t U_EXPORT2
1827 uchar_swapNames(const UDataSwapper *ds,
1828                 const void *inData, int32_t length, void *outData,
1829                 UErrorCode *pErrorCode) {
1830     const UDataInfo *pInfo;
1831     int32_t headerSize;
1832
1833     const uint8_t *inBytes;
1834     uint8_t *outBytes;
1835
1836     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,
1837              offset, i, count, stringsCount;
1838
1839     const AlgorithmicRange *inRange;
1840     AlgorithmicRange *outRange;
1841
1842     /* udata_swapDataHeader checks the arguments */
1843     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1844     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1845         return 0;
1846     }
1847
1848     /* check data format and format version */
1849     pInfo=(const UDataInfo *)((const char *)inData+4);
1850     if(!(
1851         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
1852         pInfo->dataFormat[1]==0x6e &&
1853         pInfo->dataFormat[2]==0x61 &&
1854         pInfo->dataFormat[3]==0x6d &&
1855         pInfo->formatVersion[0]==1
1856     )) {
1857         udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",
1858                          pInfo->dataFormat[0], pInfo->dataFormat[1],
1859                          pInfo->dataFormat[2], pInfo->dataFormat[3],
1860                          pInfo->formatVersion[0]);
1861         *pErrorCode=U_UNSUPPORTED_ERROR;
1862         return 0;
1863     }
1864
1865     inBytes=(const uint8_t *)inData+headerSize;
1866     outBytes=(uint8_t *)outData+headerSize;
1867     if(length<0) {
1868         algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]);
1869     } else {
1870         length-=headerSize;
1871         if( length<20 ||
1872             (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]))
1873         ) {
1874             udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n",
1875                              length);
1876             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1877             return 0;
1878         }
1879     }
1880
1881     if(length<0) {
1882         /* preflighting: iterate through algorithmic ranges */
1883         offset=algNamesOffset;
1884         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
1885         offset+=4;
1886
1887         for(i=0; i<count; ++i) {
1888             inRange=(const AlgorithmicRange *)(inBytes+offset);
1889             offset+=ds->readUInt16(inRange->size);
1890         }
1891     } else {
1892         /* swap data */
1893         const uint16_t *p;
1894         uint16_t *q, *temp;
1895
1896         int16_t tokens[512];
1897         uint16_t tokenCount;
1898
1899         uint8_t map[256], trailMap[256];
1900
1901         /* copy the data for inaccessible bytes */
1902         if(inBytes!=outBytes) {
1903             uprv_memcpy(outBytes, inBytes, length);
1904         }
1905
1906         /* the initial 4 offsets first */
1907         tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]);
1908         groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]);
1909         groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]);
1910         ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode);
1911
1912         /*
1913          * now the tokens table
1914          * it needs to be permutated along with the compressed name strings
1915          */
1916         p=(const uint16_t *)(inBytes+16);
1917         q=(uint16_t *)(outBytes+16);
1918
1919         /* read and swap the tokenCount */
1920         tokenCount=ds->readUInt16(*p);
1921         ds->swapArray16(ds, p, 2, q, pErrorCode);
1922         ++p;
1923         ++q;
1924
1925         /* read the first 512 tokens and make the token maps */
1926         if(tokenCount<=512) {
1927             count=tokenCount;
1928         } else {
1929             count=512;
1930         }
1931         for(i=0; i<count; ++i) {
1932             tokens[i]=udata_readInt16(ds, p[i]);
1933         }
1934         for(; i<512; ++i) {
1935             tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */
1936         }
1937         makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);
1938         makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode);
1939         if(U_FAILURE(*pErrorCode)) {
1940             return 0;
1941         }
1942
1943         /*
1944          * swap and permutate the tokens
1945          * go through a temporary array to support in-place swapping
1946          */
1947         temp=(uint16_t *)uprv_malloc(tokenCount*2);
1948         if(temp==NULL) {
1949             udata_printError(ds, "out of memory swapping %u unames.icu tokens\n",
1950                              tokenCount);
1951             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1952             return 0;
1953         }
1954
1955         /* swap and permutate single-/lead-byte tokens */
1956         for(i=0; i<tokenCount && i<256; ++i) {
1957             ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode);
1958         }
1959
1960         /* swap and permutate trail-byte tokens */
1961         for(; i<tokenCount; ++i) {
1962             ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode);
1963         }
1964
1965         /* copy the result into the output and free the temporary array */
1966         uprv_memcpy(q, temp, tokenCount*2);
1967         uprv_free(temp);
1968
1969         /*
1970          * swap the token strings but not a possible padding byte after
1971          * the terminating NUL of the last string
1972          */
1973         udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset),
1974                                     outBytes+tokenStringOffset, pErrorCode);
1975         if(U_FAILURE(*pErrorCode)) {
1976             udata_printError(ds, "uchar_swapNames(token strings) failed\n");
1977             return 0;
1978         }
1979
1980         /* swap the group table */
1981         count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset)));
1982         ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2),
1983                            outBytes+groupsOffset, pErrorCode);
1984
1985         /*
1986          * swap the group strings
1987          * swap the string bytes but not the nibble-encoded string lengths
1988          */
1989         if(ds->inCharset!=ds->outCharset) {
1990             uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1];
1991
1992             const uint8_t *inStrings, *nextInStrings;
1993             uint8_t *outStrings;
1994
1995             uint8_t c;
1996
1997             inStrings=inBytes+groupStringOffset;
1998             outStrings=outBytes+groupStringOffset;
1999
2000             stringsCount=algNamesOffset-groupStringOffset;
2001
2002             /* iterate through string groups until only a few padding bytes are left */
2003             while(stringsCount>32) {
2004                 nextInStrings=expandGroupLengths(inStrings, offsets, lengths);
2005
2006                 /* move past the length bytes */
2007                 stringsCount-=(uint32_t)(nextInStrings-inStrings);
2008                 outStrings+=nextInStrings-inStrings;
2009                 inStrings=nextInStrings;
2010
2011                 count=offsets[31]+lengths[31]; /* total number of string bytes in this group */
2012                 stringsCount-=count;
2013
2014                 /* swap the string bytes using map[] and trailMap[] */
2015                 while(count>0) {
2016                     c=*inStrings++;
2017                     *outStrings++=map[c];
2018                     if(tokens[c]!=-2) {
2019                         --count;
2020                     } else {
2021                         /* token lead byte: swap the trail byte, too */
2022                         *outStrings++=trailMap[*inStrings++];
2023                         count-=2;
2024                     }
2025                 }
2026             }
2027         }
2028
2029         /* swap the algorithmic ranges */
2030         offset=algNamesOffset;
2031         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
2032         ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode);
2033         offset+=4;
2034
2035         for(i=0; i<count; ++i) {
2036             if(offset>(uint32_t)length) {
2037                 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",
2038                                  length, i);
2039                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
2040                 return 0;
2041             }
2042
2043             inRange=(const AlgorithmicRange *)(inBytes+offset);
2044             outRange=(AlgorithmicRange *)(outBytes+offset);
2045             offset+=ds->readUInt16(inRange->size);
2046
2047             ds->swapArray32(ds, inRange, 8, outRange, pErrorCode);
2048             ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode);
2049             switch(inRange->type) {
2050             case 0:
2051                 /* swap prefix string */
2052                 ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)),
2053                                     outRange+1, pErrorCode);
2054                 if(U_FAILURE(*pErrorCode)) {
2055                     udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n",
2056                                      i);
2057                     return 0;
2058                 }
2059                 break;
2060             case 1:
2061                 {
2062                     /* swap factors and the prefix and factor strings */
2063                     uint32_t factorsCount;
2064
2065                     factorsCount=inRange->variant;
2066                     p=(const uint16_t *)(inRange+1);
2067                     q=(uint16_t *)(outRange+1);
2068                     ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode);
2069
2070                     /* swap the strings, up to the last terminating NUL */
2071                     p+=factorsCount;
2072                     q+=factorsCount;
2073                     stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p);
2074                     while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) {
2075                         --stringsCount;
2076                     }
2077                     ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode);
2078                 }
2079                 break;
2080             default:
2081                 udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n",
2082                                  inRange->type, i);
2083                 *pErrorCode=U_UNSUPPORTED_ERROR;
2084                 return 0;
2085             }
2086         }
2087     }
2088
2089     return headerSize+(int32_t)offset;
2090 }
2091
2092 /*
2093  * Hey, Emacs, please set the following:
2094  *
2095  * Local Variables:
2096  * indent-tabs-mode: nil
2097  * End:
2098  *
2099  */