icuSources/common/unames.cpp

   1 /*
   2 ******************************************************************************
   3 *
   4 *   Copyright (C) 1999-2011, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 ******************************************************************************
   8 *   file name:  unames.c
   9 *   encoding:   US-ASCII
  10 *   tab size:   8 (not used)
  11 *   indentation:4
  12 *
  13 *   created on: 1999oct04
  14 *   created by: Markus W. Scherer
  15 */
  16
  17 #include "unicode/utypes.h"
  18 #include "unicode/putil.h"
  19 #include "unicode/uchar.h"
  20 #include "unicode/udata.h"
  21 #include "unicode/utf.h"
  22 #include "unicode/utf16.h"
  23 #include "ustr_imp.h"
  24 #include "umutex.h"
  25 #include "cmemory.h"
  26 #include "cstring.h"
  27 #include "ucln_cmn.h"
  28 #include "udataswp.h"
  29 #include "uprops.h"
  30
  31 /* prototypes ------------------------------------------------------------- */
  32
  33 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
  34
  35 static const char DATA_NAME[] = "unames";
  36 static const char DATA_TYPE[] = "icu";
  37
  38 #define GROUP_SHIFT 5
  39 #define LINES_PER_GROUP (1L<<GROUP_SHIFT)
  40 #define GROUP_MASK (LINES_PER_GROUP-1)
  41
  42 /*
  43  * This struct was replaced by explicitly accessing equivalent
  44  * fields from triples of uint16_t.
  45  * The Group struct was padded to 8 bytes on compilers for early ARM CPUs,
  46  * which broke the assumption that sizeof(Group)==6 and that the ++ operator
  47  * would advance by 6 bytes (3 uint16_t).
  48  *
  49  * We can't just change the data structure because it's loaded from a data file,
  50  * and we don't want to make it less compact, so we changed the access code.
  51  *
  52  * For details see ICU tickets 6331 and 6008.
  53 typedef struct {
  54     uint16_t groupMSB,
  55              offsetHigh, offsetLow; / * avoid padding * /
  56 } Group;
  57  */
  58 enum {
  59     GROUP_MSB,
  60     GROUP_OFFSET_HIGH,
  61     GROUP_OFFSET_LOW,
  62     GROUP_LENGTH
  63 };
  64
  65 /*
  66  * Get the 32-bit group offset.
  67  * @param group (const uint16_t *) pointer to a Group triple of uint16_t
  68  * @return group offset (int32_t)
  69  */
  70 #define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16|(group)[GROUP_OFFSET_LOW])
  71
  72 #define NEXT_GROUP(group) ((group)+GROUP_LENGTH)
  73 #define PREV_GROUP(group) ((group)-GROUP_LENGTH)
  74
  75 typedef struct {
  76     uint32_t start, end;
  77     uint8_t type, variant;
  78     uint16_t size;
  79 } AlgorithmicRange;
  80
  81 typedef struct {
  82     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;
  83 } UCharNames;
  84
  85 /*
  86  * Get the groups table from a UCharNames struct.
  87  * The groups table consists of one uint16_t groupCount followed by
  88  * groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH
  89  * and the comment for the old struct Group above.
  90  *
  91  * @param names (const UCharNames *) pointer to the UCharNames indexes
  92  * @return (const uint16_t *) pointer to the groups table
  93  */
  94 #define GET_GROUPS(names) (const uint16_t *)((const char *)names+names->groupsOffset)
  95
  96 typedef struct {
  97     const char *otherName;
  98     UChar32 code;
  99 } FindName;
 100
 101 #define DO_FIND_NAME NULL
 102
 103 static UDataMemory *uCharNamesData=NULL;
 104 static UCharNames *uCharNames=NULL;
 105 static UErrorCode gLoadErrorCode=U_ZERO_ERROR;
 106
 107 /*
 108  * Maximum length of character names (regular & 1.0).
 109  */
 110 static int32_t gMaxNameLength=0;
 111
 112 /*
 113  * Set of chars used in character names (regular & 1.0).
 114  * Chars are platform-dependent (can be EBCDIC).
 115  */
 116 static uint32_t gNameSet[8]={ 0 };
 117
 118 #define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT
 119 #define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1
 120 #define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2
 121
 122 #define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)
 123
 124 static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {
 125     "unassigned",
 126     "uppercase letter",
 127     "lowercase letter",
 128     "titlecase letter",
 129     "modifier letter",
 130     "other letter",
 131     "non spacing mark",
 132     "enclosing mark",
 133     "combining spacing mark",
 134     "decimal digit number",
 135     "letter number",
 136     "other number",
 137     "space separator",
 138     "line separator",
 139     "paragraph separator",
 140     "control",
 141     "format",
 142     "private use area",
 143     "surrogate",
 144     "dash punctuation",
 145     "start punctuation",
 146     "end punctuation",
 147     "connector punctuation",
 148     "other punctuation",
 149     "math symbol",
 150     "currency symbol",
 151     "modifier symbol",
 152     "other symbol",
 153     "initial punctuation",
 154     "final punctuation",
 155     "noncharacter",
 156     "lead surrogate",
 157     "trail surrogate"
 158 };
 159
 160 /* implementation ----------------------------------------------------------- */
 161
 162 static UBool U_CALLCONV unames_cleanup(void)
 163 {
 164     if(uCharNamesData) {
 165         udata_close(uCharNamesData);
 166         uCharNamesData = NULL;
 167     }
 168     if(uCharNames) {
 169         uCharNames = NULL;
 170     }
 171     gMaxNameLength=0;
 172     return TRUE;
 173 }
 174
 175 static UBool U_CALLCONV
 176 isAcceptable(void * /*context*/,
 177              const char * /*type*/, const char * /*name*/,
 178              const UDataInfo *pInfo) {
 179     return (UBool)(
 180         pInfo->size>=20 &&
 181         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
 182         pInfo->charsetFamily==U_CHARSET_FAMILY &&
 183         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
 184         pInfo->dataFormat[1]==0x6e &&
 185         pInfo->dataFormat[2]==0x61 &&
 186         pInfo->dataFormat[3]==0x6d &&
 187         pInfo->formatVersion[0]==1);
 188 }
 189
 190 static UBool
 191 isDataLoaded(UErrorCode *pErrorCode) {
 192     /* load UCharNames from file if necessary */
 193     UBool isCached;
 194
 195     /* do this because double-checked locking is broken */
 196     UMTX_CHECK(NULL, (uCharNames!=NULL), isCached);
 197
 198     if(!isCached) {
 199         UCharNames *names;
 200         UDataMemory *data;
 201
 202         /* check error code from previous attempt */
 203         if(U_FAILURE(gLoadErrorCode)) {
 204             *pErrorCode=gLoadErrorCode;
 205             return FALSE;
 206         }
 207
 208         /* open the data outside the mutex block */
 209         data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
 210         if(U_FAILURE(*pErrorCode)) {
 211             gLoadErrorCode=*pErrorCode;
 212             return FALSE;
 213         }
 214
 215         names=(UCharNames *)udata_getMemory(data);
 216
 217         /* in the mutex block, set the data for this process */
 218         {
 219             umtx_lock(NULL);
 220             if(uCharNames==NULL) {
 221                 uCharNamesData=data;
 222                 uCharNames=names;
 223                 data=NULL;
 224                 names=NULL;
 225                 ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
 226             }
 227             umtx_unlock(NULL);
 228         }
 229
 230         /* if a different thread set it first, then close the extra data */
 231         if(data!=NULL) {
 232             udata_close(data); /* NULL if it was set correctly */
 233         }
 234     }
 235     return TRUE;
 236 }
 237
 238 #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \
 239     if((bufferLength)>0) { \
 240         *(buffer)++=c; \
 241         --(bufferLength); \
 242     } \
 243     ++(bufferPos); \
 244 }
 245
 246 #define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT
 247
 248 /*
 249  * Important: expandName() and compareName() are almost the same -
 250  * apply fixes to both.
 251  *
 252  * UnicodeData.txt uses ';' as a field separator, so no
 253  * field can contain ';' as part of its contents.
 254  * In unames.dat, it is marked as token[';']==-1 only if the
 255  * semicolon is used in the data file - which is iff we
 256  * have Unicode 1.0 names or ISO comments or aliases.
 257  * So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases
 258  * although we know that it will never be part of a name.
 259  */
 260 static uint16_t
 261 expandName(UCharNames *names,
 262            const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
 263            char *buffer, uint16_t bufferLength) {
 264     uint16_t *tokens=(uint16_t *)names+8;
 265     uint16_t token, tokenCount=*tokens++, bufferPos=0;
 266     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
 267     uint8_t c;
 268
 269     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
 270         /*
 271          * skip the modern name if it is not requested _and_
 272          * if the semicolon byte value is a character, not a token number
 273          */
 274         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
 275             int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
 276             do {
 277                 while(nameLength>0) {
 278                     --nameLength;
 279                     if(*name++==';') {
 280                         break;
 281                     }
 282                 }
 283             } while(--fieldIndex>0);
 284         } else {
 285             /*
 286              * the semicolon byte value is a token number, therefore
 287              * only modern names are stored in unames.dat and there is no
 288              * such requested alternate name here
 289              */
 290             nameLength=0;
 291         }
 292     }
 293
 294     /* write each letter directly, and write a token word per token */
 295     while(nameLength>0) {
 296         --nameLength;
 297         c=*name++;
 298
 299         if(c>=tokenCount) {
 300             if(c!=';') {
 301                 /* implicit letter */
 302                 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 303             } else {
 304                 /* finished */
 305                 break;
 306             }
 307         } else {
 308             token=tokens[c];
 309             if(token==(uint16_t)(-2)) {
 310                 /* this is a lead byte for a double-byte token */
 311                 token=tokens[c<<8|*name++];
 312                 --nameLength;
 313             }
 314             if(token==(uint16_t)(-1)) {
 315                 if(c!=';') {
 316                     /* explicit letter */
 317                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 318                 } else {
 319                     /* stop, but skip the semicolon if we are seeking
 320                        extended names and there was no 2.0 name but there
 321                        is a 1.0 name. */
 322                     if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) {
 323                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
 324                             continue;
 325                         }
 326                     }
 327                     /* finished */
 328                     break;
 329                 }
 330             } else {
 331                 /* write token word */
 332                 uint8_t *tokenString=tokenStrings+token;
 333                 while((c=*tokenString++)!=0) {
 334                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 335                 }
 336             }
 337         }
 338     }
 339
 340     /* zero-terminate */
 341     if(bufferLength>0) {
 342         *buffer=0;
 343     }
 344
 345     return bufferPos;
 346 }
 347
 348 /*
 349  * compareName() is almost the same as expandName() except that it compares
 350  * the currently expanded name to an input name.
 351  * It returns the match/no match result as soon as possible.
 352  */
 353 static UBool
 354 compareName(UCharNames *names,
 355             const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
 356             const char *otherName) {
 357     uint16_t *tokens=(uint16_t *)names+8;
 358     uint16_t token, tokenCount=*tokens++;
 359     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
 360     uint8_t c;
 361     const char *origOtherName = otherName;
 362
 363     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
 364         /*
 365          * skip the modern name if it is not requested _and_
 366          * if the semicolon byte value is a character, not a token number
 367          */
 368         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
 369             int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
 370             do {
 371                 while(nameLength>0) {
 372                     --nameLength;
 373                     if(*name++==';') {
 374                         break;
 375                     }
 376                 }
 377             } while(--fieldIndex>0);
 378         } else {
 379             /*
 380              * the semicolon byte value is a token number, therefore
 381              * only modern names are stored in unames.dat and there is no
 382              * such requested alternate name here
 383              */
 384             nameLength=0;
 385         }
 386     }
 387
 388     /* compare each letter directly, and compare a token word per token */
 389     while(nameLength>0) {
 390         --nameLength;
 391         c=*name++;
 392
 393         if(c>=tokenCount) {
 394             if(c!=';') {
 395                 /* implicit letter */
 396                 if((char)c!=*otherName++) {
 397                     return FALSE;
 398                 }
 399             } else {
 400                 /* finished */
 401                 break;
 402             }
 403         } else {
 404             token=tokens[c];
 405             if(token==(uint16_t)(-2)) {
 406                 /* this is a lead byte for a double-byte token */
 407                 token=tokens[c<<8|*name++];
 408                 --nameLength;
 409             }
 410             if(token==(uint16_t)(-1)) {
 411                 if(c!=';') {
 412                     /* explicit letter */
 413                     if((char)c!=*otherName++) {
 414                         return FALSE;
 415                     }
 416                 } else {
 417                     /* stop, but skip the semicolon if we are seeking
 418                        extended names and there was no 2.0 name but there
 419                        is a 1.0 name. */
 420                     if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) {
 421                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
 422                             continue;
 423                         }
 424                     }
 425                     /* finished */
 426                     break;
 427                 }
 428             } else {
 429                 /* write token word */
 430                 uint8_t *tokenString=tokenStrings+token;
 431                 while((c=*tokenString++)!=0) {
 432                     if((char)c!=*otherName++) {
 433                         return FALSE;
 434                     }
 435                 }
 436             }
 437         }
 438     }
 439
 440     /* complete match? */
 441     return (UBool)(*otherName==0);
 442 }
 443
 444 static uint8_t getCharCat(UChar32 cp) {
 445     uint8_t cat;
 446
 447     if (U_IS_UNICODE_NONCHAR(cp)) {
 448         return U_NONCHARACTER_CODE_POINT;
 449     }
 450
 451     if ((cat = u_charType(cp)) == U_SURROGATE) {
 452         cat = U_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
 453     }
 454
 455     return cat;
 456 }
 457
 458 static const char *getCharCatName(UChar32 cp) {
 459     uint8_t cat = getCharCat(cp);
 460
 461     /* Return unknown if the table of names above is not up to
 462        date. */
 463
 464     if (cat >= LENGTHOF(charCatNames)) {
 465         return "unknown";
 466     } else {
 467         return charCatNames[cat];
 468     }
 469 }
 470
 471 static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
 472     const char *catname = getCharCatName(code);
 473     uint16_t length = 0;
 474
 475     UChar32 cp;
 476     int ndigits, i;
 477
 478     WRITE_CHAR(buffer, bufferLength, length, '<');
 479     while (catname[length - 1]) {
 480         WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);
 481     }
 482     WRITE_CHAR(buffer, bufferLength, length, '-');
 483     for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)
 484         ;
 485     if (ndigits < 4)
 486         ndigits = 4;
 487     for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) {
 488         uint8_t v = (uint8_t)(cp & 0xf);
 489         buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
 490     }
 491     buffer += ndigits;
 492     length += ndigits;
 493     WRITE_CHAR(buffer, bufferLength, length, '>');
 494
 495     return length;
 496 }
 497
 498 /*
 499  * getGroup() does a binary search for the group that contains the
 500  * Unicode code point "code".
 501  * The return value is always a valid Group* that may contain "code"
 502  * or else is the highest group before "code".
 503  * If the lowest group is after "code", then that one is returned.
 504  */
 505 static const uint16_t *
 506 getGroup(UCharNames *names, uint32_t code) {
 507     const uint16_t *groups=GET_GROUPS(names);
 508     uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
 509              start=0,
 510              limit=*groups++,
 511              number;
 512
 513     /* binary search for the group of names that contains the one for code */
 514     while(start<limit-1) {
 515         number=(uint16_t)((start+limit)/2);
 516         if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) {
 517             limit=number;
 518         } else {
 519             start=number;
 520         }
 521     }
 522
 523     /* return this regardless of whether it is an exact match */
 524     return groups+start*GROUP_LENGTH;
 525 }
 526
 527 /*
 528  * expandGroupLengths() reads a block of compressed lengths of 32 strings and
 529  * expands them into offsets and lengths for each string.
 530  * Lengths are stored with a variable-width encoding in consecutive nibbles:
 531  * If a nibble<0xc, then it is the length itself (0=empty string).
 532  * If a nibble>=0xc, then it forms a length value with the following nibble.
 533  * Calculation see below.
 534  * The offsets and lengths arrays must be at least 33 (one more) long because
 535  * there is no check here at the end if the last nibble is still used.
 536  */
 537 static const uint8_t *
 538 expandGroupLengths(const uint8_t *s,
 539                    uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) {
 540     /* read the lengths of the 32 strings in this group and get each string's offset */
 541     uint16_t i=0, offset=0, length=0;
 542     uint8_t lengthByte;
 543
 544     /* all 32 lengths must be read to get the offset of the first group string */
 545     while(i<LINES_PER_GROUP) {
 546         lengthByte=*s++;
 547
 548         /* read even nibble - MSBs of lengthByte */
 549         if(length>=12) {
 550             /* double-nibble length spread across two bytes */
 551             length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12);
 552             lengthByte&=0xf;
 553         } else if((lengthByte /* &0xf0 */)>=0xc0) {
 554             /* double-nibble length spread across this one byte */
 555             length=(uint16_t)((lengthByte&0x3f)+12);
 556         } else {
 557             /* single-nibble length in MSBs */
 558             length=(uint16_t)(lengthByte>>4);
 559             lengthByte&=0xf;
 560         }
 561
 562         *offsets++=offset;
 563         *lengths++=length;
 564
 565         offset+=length;
 566         ++i;
 567
 568         /* read odd nibble - LSBs of lengthByte */
 569         if((lengthByte&0xf0)==0) {
 570             /* this nibble was not consumed for a double-nibble length above */
 571             length=lengthByte;
 572             if(length<12) {
 573                 /* single-nibble length in LSBs */
 574                 *offsets++=offset;
 575                 *lengths++=length;
 576
 577                 offset+=length;
 578                 ++i;
 579             }
 580         } else {
 581             length=0;   /* prevent double-nibble detection in the next iteration */
 582         }
 583     }
 584
 585     /* now, s is at the first group string */
 586     return s;
 587 }
 588
 589 static uint16_t
 590 expandGroupName(UCharNames *names, const uint16_t *group,
 591                 uint16_t lineNumber, UCharNameChoice nameChoice,
 592                 char *buffer, uint16_t bufferLength) {
 593     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
 594     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
 595     s=expandGroupLengths(s, offsets, lengths);
 596     return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,
 597                       buffer, bufferLength);
 598 }
 599
 600 static uint16_t
 601 getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
 602         char *buffer, uint16_t bufferLength) {
 603     const uint16_t *group=getGroup(names, code);
 604     if((uint16_t)(code>>GROUP_SHIFT)==group[GROUP_MSB]) {
 605         return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,
 606                                buffer, bufferLength);
 607     } else {
 608         /* group not found */
 609         /* zero-terminate */
 610         if(bufferLength>0) {
 611             *buffer=0;
 612         }
 613         return 0;
 614     }
 615 }
 616
 617 /*
 618  * enumGroupNames() enumerates all the names in a 32-group
 619  * and either calls the enumerator function or finds a given input name.
 620  */
 621 static UBool
 622 enumGroupNames(UCharNames *names, const uint16_t *group,
 623                UChar32 start, UChar32 end,
 624                UEnumCharNamesFn *fn, void *context,
 625                UCharNameChoice nameChoice) {
 626     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
 627     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
 628
 629     s=expandGroupLengths(s, offsets, lengths);
 630     if(fn!=DO_FIND_NAME) {
 631         char buffer[200];
 632         uint16_t length;
 633
 634         while(start<=end) {
 635             length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer));
 636             if (!length && nameChoice == U_EXTENDED_CHAR_NAME) {
 637                 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
 638             }
 639             /* here, we assume that the buffer is large enough */
 640             if(length>0) {
 641                 if(!fn(context, start, nameChoice, buffer, length)) {
 642                     return FALSE;
 643                 }
 644             }
 645             ++start;
 646         }
 647     } else {
 648         const char *otherName=((FindName *)context)->otherName;
 649         while(start<=end) {
 650             if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) {
 651                 ((FindName *)context)->code=start;
 652                 return FALSE;
 653             }
 654             ++start;
 655         }
 656     }
 657     return TRUE;
 658 }
 659
 660 /*
 661  * enumExtNames enumerate extended names.
 662  * It only needs to do it if it is called with a real function and not
 663  * with the dummy DO_FIND_NAME, because u_charFromName() does a check
 664  * for extended names by itself.
 665  */
 666 static UBool
 667 enumExtNames(UChar32 start, UChar32 end,
 668              UEnumCharNamesFn *fn, void *context)
 669 {
 670     if(fn!=DO_FIND_NAME) {
 671         char buffer[200];
 672         uint16_t length;
 673
 674         while(start<=end) {
 675             buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
 676             /* here, we assume that the buffer is large enough */
 677             if(length>0) {
 678                 if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) {
 679                     return FALSE;
 680                 }
 681             }
 682             ++start;
 683         }
 684     }
 685
 686     return TRUE;
 687 }
 688
 689 static UBool
 690 enumNames(UCharNames *names,
 691           UChar32 start, UChar32 limit,
 692           UEnumCharNamesFn *fn, void *context,
 693           UCharNameChoice nameChoice) {
 694     uint16_t startGroupMSB, endGroupMSB, groupCount;
 695     const uint16_t *group, *groupLimit;
 696
 697     startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);
 698     endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT);
 699
 700     /* find the group that contains start, or the highest before it */
 701     group=getGroup(names, start);
 702
 703     if(startGroupMSB<group[GROUP_MSB] && nameChoice==U_EXTENDED_CHAR_NAME) {
 704         /* enumerate synthetic names between start and the group start */
 705         UChar32 extLimit=((UChar32)group[GROUP_MSB]<<GROUP_SHIFT);
 706         if(extLimit>limit) {
 707             extLimit=limit;
 708         }
 709         if(!enumExtNames(start, extLimit-1, fn, context)) {
 710             return FALSE;
 711         }
 712         start=extLimit;
 713     }
 714
 715     if(startGroupMSB==endGroupMSB) {
 716         if(startGroupMSB==group[GROUP_MSB]) {
 717             /* if start and limit-1 are in the same group, then enumerate only in that one */
 718             return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice);
 719         }
 720     } else {
 721         const uint16_t *groups=GET_GROUPS(names);
 722         groupCount=*groups++;
 723         groupLimit=groups+groupCount*GROUP_LENGTH;
 724
 725         if(startGroupMSB==group[GROUP_MSB]) {
 726             /* enumerate characters in the partial start group */
 727             if((start&GROUP_MASK)!=0) {
 728                 if(!enumGroupNames(names, group,
 729                                    start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1,
 730                                    fn, context, nameChoice)) {
 731                     return FALSE;
 732                 }
 733                 group=NEXT_GROUP(group); /* continue with the next group */
 734             }
 735         } else if(startGroupMSB>group[GROUP_MSB]) {
 736             /* make sure that we start enumerating with the first group after start */
 737             const uint16_t *nextGroup=NEXT_GROUP(group);
 738             if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) {
 739                 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
 740                 if (end > limit) {
 741                     end = limit;
 742                 }
 743                 if (!enumExtNames(start, end - 1, fn, context)) {
 744                     return FALSE;
 745                 }
 746             }
 747             group=nextGroup;
 748         }
 749
 750         /* enumerate entire groups between the start- and end-groups */
 751         while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) {
 752             const uint16_t *nextGroup;
 753             start=(UChar32)group[GROUP_MSB]<<GROUP_SHIFT;
 754             if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) {
 755                 return FALSE;
 756             }
 757             nextGroup=NEXT_GROUP(group);
 758             if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) {
 759                 UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
 760                 if (end > limit) {
 761                     end = limit;
 762                 }
 763                 if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1, fn, context)) {
 764                     return FALSE;
 765                 }
 766             }
 767             group=nextGroup;
 768         }
 769
 770         /* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */
 771         if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) {
 772             return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice);
 773         } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) {
 774             UChar32 next = (PREV_GROUP(group)[GROUP_MSB] + 1) << GROUP_SHIFT;
 775             if (next > start) {
 776                 start = next;
 777             }
 778         } else {
 779             return TRUE;
 780         }
 781     }
 782
 783     /* we have not found a group, which means everything is made of
 784        extended names. */
 785     if (nameChoice == U_EXTENDED_CHAR_NAME) {
 786         if (limit > UCHAR_MAX_VALUE + 1) {
 787             limit = UCHAR_MAX_VALUE + 1;
 788         }
 789         return enumExtNames(start, limit - 1, fn, context);
 790     }
 791
 792     return TRUE;
 793 }
 794
 795 static uint16_t
 796 writeFactorSuffix(const uint16_t *factors, uint16_t count,
 797                   const char *s, /* suffix elements */
 798                   uint32_t code,
 799                   uint16_t indexes[8], /* output fields from here */
 800                   const char *elementBases[8], const char *elements[8],
 801                   char *buffer, uint16_t bufferLength) {
 802     uint16_t i, factor, bufferPos=0;
 803     char c;
 804
 805     /* write elements according to the factors */
 806
 807     /*
 808      * the factorized elements are determined by modulo arithmetic
 809      * with the factors of this algorithm
 810      *
 811      * note that for fewer operations, count is decremented here
 812      */
 813     --count;
 814     for(i=count; i>0; --i) {
 815         factor=factors[i];
 816         indexes[i]=(uint16_t)(code%factor);
 817         code/=factor;
 818     }
 819     /*
 820      * we don't need to calculate the last modulus because start<=code<=end
 821      * guarantees here that code<=factors[0]
 822      */
 823     indexes[0]=(uint16_t)code;
 824
 825     /* write each element */
 826     for(;;) {
 827         if(elementBases!=NULL) {
 828             *elementBases++=s;
 829         }
 830
 831         /* skip indexes[i] strings */
 832         factor=indexes[i];
 833         while(factor>0) {
 834             while(*s++!=0) {}
 835             --factor;
 836         }
 837         if(elements!=NULL) {
 838             *elements++=s;
 839         }
 840
 841         /* write element */
 842         while((c=*s++)!=0) {
 843             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 844         }
 845
 846         /* we do not need to perform the rest of this loop for i==count - break here */
 847         if(i>=count) {
 848             break;
 849         }
 850
 851         /* skip the rest of the strings for this factors[i] */
 852         factor=(uint16_t)(factors[i]-indexes[i]-1);
 853         while(factor>0) {
 854             while(*s++!=0) {}
 855             --factor;
 856         }
 857
 858         ++i;
 859     }
 860
 861     /* zero-terminate */
 862     if(bufferLength>0) {
 863         *buffer=0;
 864     }
 865
 866     return bufferPos;
 867 }
 868
 869 /*
 870  * Important:
 871  * Parts of findAlgName() are almost the same as some of getAlgName().
 872  * Fixes must be applied to both.
 873  */
 874 static uint16_t
 875 getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
 876         char *buffer, uint16_t bufferLength) {
 877     uint16_t bufferPos=0;
 878
 879     /* Only the normative character name can be algorithmic. */
 880     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
 881         /* zero-terminate */
 882         if(bufferLength>0) {
 883             *buffer=0;
 884         }
 885         return 0;
 886     }
 887
 888     switch(range->type) {
 889     case 0: {
 890         /* name = prefix hex-digits */
 891         const char *s=(const char *)(range+1);
 892         char c;
 893
 894         uint16_t i, count;
 895
 896         /* copy prefix */
 897         while((c=*s++)!=0) {
 898             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 899         }
 900
 901         /* write hexadecimal code point value */
 902         count=range->variant;
 903
 904         /* zero-terminate */
 905         if(count<bufferLength) {
 906             buffer[count]=0;
 907         }
 908
 909         for(i=count; i>0;) {
 910             if(--i<bufferLength) {
 911                 c=(char)(code&0xf);
 912                 if(c<10) {
 913                     c+='0';
 914                 } else {
 915                     c+='A'-10;
 916                 }
 917                 buffer[i]=c;
 918             }
 919             code>>=4;
 920         }
 921
 922         bufferPos+=count;
 923         break;
 924     }
 925     case 1: {
 926         /* name = prefix factorized-elements */
 927         uint16_t indexes[8];
 928         const uint16_t *factors=(const uint16_t *)(range+1);
 929         uint16_t count=range->variant;
 930         const char *s=(const char *)(factors+count);
 931         char c;
 932
 933         /* copy prefix */
 934         while((c=*s++)!=0) {
 935             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 936         }
 937
 938         bufferPos+=writeFactorSuffix(factors, count,
 939                                      s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);
 940         break;
 941     }
 942     default:
 943         /* undefined type */
 944         /* zero-terminate */
 945         if(bufferLength>0) {
 946             *buffer=0;
 947         }
 948         break;
 949     }
 950
 951     return bufferPos;
 952 }
 953
 954 /*
 955  * Important: enumAlgNames() and findAlgName() are almost the same.
 956  * Any fix must be applied to both.
 957  */
 958 static UBool
 959 enumAlgNames(AlgorithmicRange *range,
 960              UChar32 start, UChar32 limit,
 961              UEnumCharNamesFn *fn, void *context,
 962              UCharNameChoice nameChoice) {
 963     char buffer[200];
 964     uint16_t length;
 965
 966     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
 967         return TRUE;
 968     }
 969
 970     switch(range->type) {
 971     case 0: {
 972         char *s, *end;
 973         char c;
 974
 975         /* get the full name of the start character */
 976         length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer));
 977         if(length<=0) {
 978             return TRUE;
 979         }
 980
 981         /* call the enumerator function with this first character */
 982         if(!fn(context, start, nameChoice, buffer, length)) {
 983             return FALSE;
 984         }
 985
 986         /* go to the end of the name; all these names have the same length */
 987         end=buffer;
 988         while(*end!=0) {
 989             ++end;
 990         }
 991
 992         /* enumerate the rest of the names */
 993         while(++start<limit) {
 994             /* increment the hexadecimal number on a character-basis */
 995             s=end;
 996             for (;;) {
 997                 c=*--s;
 998                 if(('0'<=c && c<'9') || ('A'<=c && c<'F')) {
 999                     *s=(char)(c+1);
1000                     break;
1001                 } else if(c=='9') {
1002                     *s='A';
1003                     break;
1004                 } else if(c=='F') {
1005                     *s='0';
1006                 }
1007             }
1008
1009             if(!fn(context, start, nameChoice, buffer, length)) {
1010                 return FALSE;
1011             }
1012         }
1013         break;
1014     }
1015     case 1: {
1016         uint16_t indexes[8];
1017         const char *elementBases[8], *elements[8];
1018         const uint16_t *factors=(const uint16_t *)(range+1);
1019         uint16_t count=range->variant;
1020         const char *s=(const char *)(factors+count);
1021         char *suffix, *t;
1022         uint16_t prefixLength, i, idx;
1023
1024         char c;
1025
1026         /* name = prefix factorized-elements */
1027
1028         /* copy prefix */
1029         suffix=buffer;
1030         prefixLength=0;
1031         while((c=*s++)!=0) {
1032             *suffix++=c;
1033             ++prefixLength;
1034         }
1035
1036         /* append the suffix of the start character */
1037         length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count,
1038                                               s, (uint32_t)start-range->start,
1039                                               indexes, elementBases, elements,
1040                                               suffix, (uint16_t)(sizeof(buffer)-prefixLength)));
1041
1042         /* call the enumerator function with this first character */
1043         if(!fn(context, start, nameChoice, buffer, length)) {
1044             return FALSE;
1045         }
1046
1047         /* enumerate the rest of the names */
1048         while(++start<limit) {
1049             /* increment the indexes in lexical order bound by the factors */
1050             i=count;
1051             for (;;) {
1052                 idx=(uint16_t)(indexes[--i]+1);
1053                 if(idx<factors[i]) {
1054                     /* skip one index and its element string */
1055                     indexes[i]=idx;
1056                     s=elements[i];
1057                     while(*s++!=0) {
1058                     }
1059                     elements[i]=s;
1060                     break;
1061                 } else {
1062                     /* reset this index to 0 and its element string to the first one */
1063                     indexes[i]=0;
1064                     elements[i]=elementBases[i];
1065                 }
1066             }
1067
1068             /* to make matters a little easier, just append all elements to the suffix */
1069             t=suffix;
1070             length=prefixLength;
1071             for(i=0; i<count; ++i) {
1072                 s=elements[i];
1073                 while((c=*s++)!=0) {
1074                     *t++=c;
1075                     ++length;
1076                 }
1077             }
1078             /* zero-terminate */
1079             *t=0;
1080
1081             if(!fn(context, start, nameChoice, buffer, length)) {
1082                 return FALSE;
1083             }
1084         }
1085         break;
1086     }
1087     default:
1088         /* undefined type */
1089         break;
1090     }
1091
1092     return TRUE;
1093 }
1094
1095 /*
1096  * findAlgName() is almost the same as enumAlgNames() except that it
1097  * returns the code point for a name if it fits into the range.
1098  * It returns 0xffff otherwise.
1099  */
1100 static UChar32
1101 findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) {
1102     UChar32 code;
1103
1104     if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
1105         return 0xffff;
1106     }
1107
1108     switch(range->type) {
1109     case 0: {
1110         /* name = prefix hex-digits */
1111         const char *s=(const char *)(range+1);
1112         char c;
1113
1114         uint16_t i, count;
1115
1116         /* compare prefix */
1117         while((c=*s++)!=0) {
1118             if((char)c!=*otherName++) {
1119                 return 0xffff;
1120             }
1121         }
1122
1123         /* read hexadecimal code point value */
1124         count=range->variant;
1125         code=0;
1126         for(i=0; i<count; ++i) {
1127             c=*otherName++;
1128             if('0'<=c && c<='9') {
1129                 code=(code<<4)|(c-'0');
1130             } else if('A'<=c && c<='F') {
1131                 code=(code<<4)|(c-'A'+10);
1132             } else {
1133                 return 0xffff;
1134             }
1135         }
1136
1137         /* does it fit into the range? */
1138         if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) {
1139             return code;
1140         }
1141         break;
1142     }
1143     case 1: {
1144         char buffer[64];
1145         uint16_t indexes[8];
1146         const char *elementBases[8], *elements[8];
1147         const uint16_t *factors=(const uint16_t *)(range+1);
1148         uint16_t count=range->variant;
1149         const char *s=(const char *)(factors+count), *t;
1150         UChar32 start, limit;
1151         uint16_t i, idx;
1152
1153         char c;
1154
1155         /* name = prefix factorized-elements */
1156
1157         /* compare prefix */
1158         while((c=*s++)!=0) {
1159             if((char)c!=*otherName++) {
1160                 return 0xffff;
1161             }
1162         }
1163
1164         start=(UChar32)range->start;
1165         limit=(UChar32)(range->end+1);
1166
1167         /* initialize the suffix elements for enumeration; indexes should all be set to 0 */
1168         writeFactorSuffix(factors, count, s, 0,
1169                           indexes, elementBases, elements, buffer, sizeof(buffer));
1170
1171         /* compare the first suffix */
1172         if(0==uprv_strcmp(otherName, buffer)) {
1173             return start;
1174         }
1175
1176         /* enumerate and compare the rest of the suffixes */
1177         while(++start<limit) {
1178             /* increment the indexes in lexical order bound by the factors */
1179             i=count;
1180             for (;;) {
1181                 idx=(uint16_t)(indexes[--i]+1);
1182                 if(idx<factors[i]) {
1183                     /* skip one index and its element string */
1184                     indexes[i]=idx;
1185                     s=elements[i];
1186                     while(*s++!=0) {}
1187                     elements[i]=s;
1188                     break;
1189                 } else {
1190                     /* reset this index to 0 and its element string to the first one */
1191                     indexes[i]=0;
1192                     elements[i]=elementBases[i];
1193                 }
1194             }
1195
1196             /* to make matters a little easier, just compare all elements of the suffix */
1197             t=otherName;
1198             for(i=0; i<count; ++i) {
1199                 s=elements[i];
1200                 while((c=*s++)!=0) {
1201                     if(c!=*t++) {
1202                         s=""; /* does not match */
1203                         i=99;
1204                     }
1205                 }
1206             }
1207             if(i<99 && *t==0) {
1208                 return start;
1209             }
1210         }
1211         break;
1212     }
1213     default:
1214         /* undefined type */
1215         break;
1216     }
1217
1218     return 0xffff;
1219 }
1220
1221 /* sets of name characters, maximum name lengths ---------------------------- */
1222
1223 #define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f)))
1224 #define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0)
1225
1226 static int32_t
1227 calcStringSetLength(uint32_t set[8], const char *s) {
1228     int32_t length=0;
1229     char c;
1230
1231     while((c=*s++)!=0) {
1232         SET_ADD(set, c);
1233         ++length;
1234     }
1235     return length;
1236 }
1237
1238 static int32_t
1239 calcAlgNameSetsLengths(int32_t maxNameLength) {
1240     AlgorithmicRange *range;
1241     uint32_t *p;
1242     uint32_t rangeCount;
1243     int32_t length;
1244
1245     /* enumerate algorithmic ranges */
1246     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1247     rangeCount=*p;
1248     range=(AlgorithmicRange *)(p+1);
1249     while(rangeCount>0) {
1250         switch(range->type) {
1251         case 0:
1252             /* name = prefix + (range->variant times) hex-digits */
1253             /* prefix */
1254             length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant;
1255             if(length>maxNameLength) {
1256                 maxNameLength=length;
1257             }
1258             break;
1259         case 1: {
1260             /* name = prefix factorized-elements */
1261             const uint16_t *factors=(const uint16_t *)(range+1);
1262             const char *s;
1263             int32_t i, count=range->variant, factor, factorLength, maxFactorLength;
1264
1265             /* prefix length */
1266             s=(const char *)(factors+count);
1267             length=calcStringSetLength(gNameSet, s);
1268             s+=length+1; /* start of factor suffixes */
1269
1270             /* get the set and maximum factor suffix length for each factor */
1271             for(i=0; i<count; ++i) {
1272                 maxFactorLength=0;
1273                 for(factor=factors[i]; factor>0; --factor) {
1274                     factorLength=calcStringSetLength(gNameSet, s);
1275                     s+=factorLength+1;
1276                     if(factorLength>maxFactorLength) {
1277                         maxFactorLength=factorLength;
1278                     }
1279                 }
1280                 length+=maxFactorLength;
1281             }
1282
1283             if(length>maxNameLength) {
1284                 maxNameLength=length;
1285             }
1286             break;
1287         }
1288         default:
1289             /* unknown type */
1290             break;
1291         }
1292
1293         range=(AlgorithmicRange *)((uint8_t *)range+range->size);
1294         --rangeCount;
1295     }
1296     return maxNameLength;
1297 }
1298
1299 static int32_t
1300 calcExtNameSetsLengths(int32_t maxNameLength) {
1301     int32_t i, length;
1302
1303     for(i=0; i<LENGTHOF(charCatNames); ++i) {
1304         /*
1305          * for each category, count the length of the category name
1306          * plus 9=
1307          * 2 for <>
1308          * 1 for -
1309          * 6 for most hex digits per code point
1310          */
1311         length=9+calcStringSetLength(gNameSet, charCatNames[i]);
1312         if(length>maxNameLength) {
1313             maxNameLength=length;
1314         }
1315     }
1316     return maxNameLength;
1317 }
1318
1319 static int32_t
1320 calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths,
1321                   uint32_t set[8],
1322                   const uint8_t **pLine, const uint8_t *lineLimit) {
1323     const uint8_t *line=*pLine;
1324     int32_t length=0, tokenLength;
1325     uint16_t c, token;
1326
1327     while(line!=lineLimit && (c=*line++)!=(uint8_t)';') {
1328         if(c>=tokenCount) {
1329             /* implicit letter */
1330             SET_ADD(set, c);
1331             ++length;
1332         } else {
1333             token=tokens[c];
1334             if(token==(uint16_t)(-2)) {
1335                 /* this is a lead byte for a double-byte token */
1336                 c=c<<8|*line++;
1337                 token=tokens[c];
1338             }
1339             if(token==(uint16_t)(-1)) {
1340                 /* explicit letter */
1341                 SET_ADD(set, c);
1342                 ++length;
1343             } else {
1344                 /* count token word */
1345                 if(tokenLengths!=NULL) {
1346                     /* use cached token length */
1347                     tokenLength=tokenLengths[c];
1348                     if(tokenLength==0) {
1349                         tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
1350                         tokenLengths[c]=(int8_t)tokenLength;
1351                     }
1352                 } else {
1353                     tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
1354                 }
1355                 length+=tokenLength;
1356             }
1357         }
1358     }
1359
1360     *pLine=line;
1361     return length;
1362 }
1363
1364 static void
1365 calcGroupNameSetsLengths(int32_t maxNameLength) {
1366     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
1367
1368     uint16_t *tokens=(uint16_t *)uCharNames+8;
1369     uint16_t tokenCount=*tokens++;
1370     uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset;
1371
1372     int8_t *tokenLengths;
1373
1374     const uint16_t *group;
1375     const uint8_t *s, *line, *lineLimit;
1376
1377     int32_t groupCount, lineNumber, length;
1378
1379     tokenLengths=(int8_t *)uprv_malloc(tokenCount);
1380     if(tokenLengths!=NULL) {
1381         uprv_memset(tokenLengths, 0, tokenCount);
1382     }
1383
1384     group=GET_GROUPS(uCharNames);
1385     groupCount=*group++;
1386
1387     /* enumerate all groups */
1388     while(groupCount>0) {
1389         s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+GET_GROUP_OFFSET(group);
1390         s=expandGroupLengths(s, offsets, lengths);
1391
1392         /* enumerate all lines in each group */
1393         for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) {
1394             line=s+offsets[lineNumber];
1395             length=lengths[lineNumber];
1396             if(length==0) {
1397                 continue;
1398             }
1399
1400             lineLimit=line+length;
1401
1402             /* read regular name */
1403             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
1404             if(length>maxNameLength) {
1405                 maxNameLength=length;
1406             }
1407             if(line==lineLimit) {
1408                 continue;
1409             }
1410
1411             /* read Unicode 1.0 name */
1412             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
1413             if(length>maxNameLength) {
1414                 maxNameLength=length;
1415             }
1416             if(line==lineLimit) {
1417                 continue;
1418             }
1419
1420             /* read ISO comment */
1421             /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/
1422         }
1423
1424         group=NEXT_GROUP(group);
1425         --groupCount;
1426     }
1427
1428     if(tokenLengths!=NULL) {
1429         uprv_free(tokenLengths);
1430     }
1431
1432     /* set gMax... - name length last for threading */
1433     gMaxNameLength=maxNameLength;
1434 }
1435
1436 static UBool
1437 calcNameSetsLengths(UErrorCode *pErrorCode) {
1438     static const char extChars[]="0123456789ABCDEF<>-";
1439     int32_t i, maxNameLength;
1440
1441     if(gMaxNameLength!=0) {
1442         return TRUE;
1443     }
1444
1445     if(!isDataLoaded(pErrorCode)) {
1446         return FALSE;
1447     }
1448
1449     /* set hex digits, used in various names, and <>-, used in extended names */
1450     for(i=0; i<(int32_t)sizeof(extChars)-1; ++i) {
1451         SET_ADD(gNameSet, extChars[i]);
1452     }
1453
1454     /* set sets and lengths from algorithmic names */
1455     maxNameLength=calcAlgNameSetsLengths(0);
1456
1457     /* set sets and lengths from extended names */
1458     maxNameLength=calcExtNameSetsLengths(maxNameLength);
1459
1460     /* set sets and lengths from group names, set global maximum values */
1461     calcGroupNameSetsLengths(maxNameLength);
1462
1463     return TRUE;
1464 }
1465
1466 /* public API --------------------------------------------------------------- */
1467
1468 U_CAPI int32_t U_EXPORT2
1469 u_charName(UChar32 code, UCharNameChoice nameChoice,
1470            char *buffer, int32_t bufferLength,
1471            UErrorCode *pErrorCode) {
1472     AlgorithmicRange *algRange;
1473     uint32_t *p;
1474     uint32_t i;
1475     int32_t length;
1476
1477     /* check the argument values */
1478     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1479         return 0;
1480     } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT ||
1481               bufferLength<0 || (bufferLength>0 && buffer==NULL)
1482     ) {
1483         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1484         return 0;
1485     }
1486
1487     if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
1488         return u_terminateChars(buffer, bufferLength, 0, pErrorCode);
1489     }
1490
1491     length=0;
1492
1493     /* try algorithmic names first */
1494     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1495     i=*p;
1496     algRange=(AlgorithmicRange *)(p+1);
1497     while(i>0) {
1498         if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {
1499             length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
1500             break;
1501         }
1502         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
1503         --i;
1504     }
1505
1506     if(i==0) {
1507         if (nameChoice == U_EXTENDED_CHAR_NAME) {
1508             length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);
1509             if (!length) {
1510                 /* extended character name */
1511                 length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);
1512             }
1513         } else {
1514             /* normal character name */
1515             length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
1516         }
1517     }
1518
1519     return u_terminateChars(buffer, bufferLength, length, pErrorCode);
1520 }
1521
1522 U_CAPI int32_t U_EXPORT2
1523 u_getISOComment(UChar32 /*c*/,
1524                 char *dest, int32_t destCapacity,
1525                 UErrorCode *pErrorCode) {
1526     /* check the argument values */
1527     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1528         return 0;
1529     } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
1530         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1531         return 0;
1532     }
1533
1534     return u_terminateChars(dest, destCapacity, 0, pErrorCode);
1535 }
1536
1537 U_CAPI UChar32 U_EXPORT2
1538 u_charFromName(UCharNameChoice nameChoice,
1539                const char *name,
1540                UErrorCode *pErrorCode) {
1541     char upper[120], lower[120];
1542     FindName findName;
1543     AlgorithmicRange *algRange;
1544     uint32_t *p;
1545     uint32_t i;
1546     UChar32 cp = 0;
1547     char c0;
1548     UChar32 error = 0xffff;     /* Undefined, but use this for backwards compatibility. */
1549
1550     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1551         return error;
1552     }
1553
1554     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) {
1555         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1556         return error;
1557     }
1558
1559     if(!isDataLoaded(pErrorCode)) {
1560         return error;
1561     }
1562
1563     /* construct the uppercase and lowercase of the name first */
1564     for(i=0; i<sizeof(upper); ++i) {
1565         if((c0=*name++)!=0) {
1566             upper[i]=uprv_toupper(c0);
1567             lower[i]=uprv_tolower(c0);
1568         } else {
1569             upper[i]=lower[i]=0;
1570             break;
1571         }
1572     }
1573     if(i==sizeof(upper)) {
1574         /* name too long, there is no such character */
1575         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1576         return error;
1577     }
1578
1579     /* try extended names first */
1580     if (lower[0] == '<') {
1581         if (nameChoice == U_EXTENDED_CHAR_NAME) {
1582             if (lower[--i] == '>') {
1583                 for (--i; lower[i] && lower[i] != '-'; --i) {
1584                 }
1585
1586                 if (lower[i] == '-') { /* We've got a category. */
1587                     uint32_t cIdx;
1588
1589                     lower[i] = 0;
1590
1591                     for (++i; lower[i] != '>'; ++i) {
1592                         if (lower[i] >= '0' && lower[i] <= '9') {
1593                             cp = (cp << 4) + lower[i] - '0';
1594                         } else if (lower[i] >= 'a' && lower[i] <= 'f') {
1595                             cp = (cp << 4) + lower[i] - 'a' + 10;
1596                         } else {
1597                             *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1598                             return error;
1599                         }
1600                     }
1601
1602                     /* Now validate the category name.
1603                        We could use a binary search, or a trie, if
1604                        we really wanted to. */
1605
1606                     for (lower[i] = 0, cIdx = 0; cIdx < LENGTHOF(charCatNames); ++cIdx) {
1607
1608                         if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
1609                             if (getCharCat(cp) == cIdx) {
1610                                 return cp;
1611                             }
1612                             break;
1613                         }
1614                     }
1615                 }
1616             }
1617         }
1618
1619         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1620         return error;
1621     }
1622
1623     /* try algorithmic names now */
1624     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1625     i=*p;
1626     algRange=(AlgorithmicRange *)(p+1);
1627     while(i>0) {
1628         if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) {
1629             return cp;
1630         }
1631         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
1632         --i;
1633     }
1634
1635     /* normal character name */
1636     findName.otherName=upper;
1637     findName.code=error;
1638     enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice);
1639     if (findName.code == error) {
1640          *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1641     }
1642     return findName.code;
1643 }
1644
1645 U_CAPI void U_EXPORT2
1646 u_enumCharNames(UChar32 start, UChar32 limit,
1647                 UEnumCharNamesFn *fn,
1648                 void *context,
1649                 UCharNameChoice nameChoice,
1650                 UErrorCode *pErrorCode) {
1651     AlgorithmicRange *algRange;
1652     uint32_t *p;
1653     uint32_t i;
1654
1655     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1656         return;
1657     }
1658
1659     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) {
1660         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1661         return;
1662     }
1663
1664     if((uint32_t) limit > UCHAR_MAX_VALUE + 1) {
1665         limit = UCHAR_MAX_VALUE + 1;
1666     }
1667     if((uint32_t)start>=(uint32_t)limit) {
1668         return;
1669     }
1670
1671     if(!isDataLoaded(pErrorCode)) {
1672         return;
1673     }
1674
1675     /* interleave the data-driven ones with the algorithmic ones */
1676     /* iterate over all algorithmic ranges; assume that they are in ascending order */
1677     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1678     i=*p;
1679     algRange=(AlgorithmicRange *)(p+1);
1680     while(i>0) {
1681         /* enumerate the character names before the current algorithmic range */
1682         /* here: start<limit */
1683         if((uint32_t)start<algRange->start) {
1684             if((uint32_t)limit<=algRange->start) {
1685                 enumNames(uCharNames, start, limit, fn, context, nameChoice);
1686                 return;
1687             }
1688             if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {
1689                 return;
1690             }
1691             start=(UChar32)algRange->start;
1692         }
1693         /* enumerate the character names in the current algorithmic range */
1694         /* here: algRange->start<=start<limit */
1695         if((uint32_t)start<=algRange->end) {
1696             if((uint32_t)limit<=(algRange->end+1)) {
1697                 enumAlgNames(algRange, start, limit, fn, context, nameChoice);
1698                 return;
1699             }
1700             if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) {
1701                 return;
1702             }
1703             start=(UChar32)algRange->end+1;
1704         }
1705         /* continue to the next algorithmic range (here: start<limit) */
1706         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
1707         --i;
1708     }
1709     /* enumerate the character names after the last algorithmic range */
1710     enumNames(uCharNames, start, limit, fn, context, nameChoice);
1711 }
1712
1713 U_CAPI int32_t U_EXPORT2
1714 uprv_getMaxCharNameLength() {
1715     UErrorCode errorCode=U_ZERO_ERROR;
1716     if(calcNameSetsLengths(&errorCode)) {
1717         return gMaxNameLength;
1718     } else {
1719         return 0;
1720     }
1721 }
1722
1723 /**
1724  * Converts the char set cset into a Unicode set uset.
1725  * @param cset Set of 256 bit flags corresponding to a set of chars.
1726  * @param uset USet to receive characters. Existing contents are deleted.
1727  */
1728 static void
1729 charSetToUSet(uint32_t cset[8], const USetAdder *sa) {
1730     UChar us[256];
1731     char cs[256];
1732
1733     int32_t i, length;
1734     UErrorCode errorCode;
1735
1736     errorCode=U_ZERO_ERROR;
1737
1738     if(!calcNameSetsLengths(&errorCode)) {
1739         return;
1740     }
1741
1742     /* build a char string with all chars that are used in character names */
1743     length=0;
1744     for(i=0; i<256; ++i) {
1745         if(SET_CONTAINS(cset, i)) {
1746             cs[length++]=(char)i;
1747         }
1748     }
1749
1750     /* convert the char string to a UChar string */
1751     u_charsToUChars(cs, us, length);
1752
1753     /* add each UChar to the USet */
1754     for(i=0; i<length; ++i) {
1755         if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */
1756             sa->add(sa->set, us[i]);
1757         }
1758     }
1759 }
1760
1761 /**
1762  * Fills set with characters that are used in Unicode character names.
1763  * @param set USet to receive characters.
1764  */
1765 U_CAPI void U_EXPORT2
1766 uprv_getCharNameCharacters(const USetAdder *sa) {
1767     charSetToUSet(gNameSet, sa);
1768 }
1769
1770 /* data swapping ------------------------------------------------------------ */
1771
1772 /*
1773  * The token table contains non-negative entries for token bytes,
1774  * and -1 for bytes that represent themselves in the data file's charset.
1775  * -2 entries are used for lead bytes.
1776  *
1777  * Direct bytes (-1 entries) must be translated from the input charset family
1778  * to the output charset family.
1779  * makeTokenMap() writes a permutation mapping for this.
1780  * Use it once for single-/lead-byte tokens and once more for all trail byte
1781  * tokens. (';' is an unused trail byte marked with -1.)
1782  */
1783 static void
1784 makeTokenMap(const UDataSwapper *ds,
1785              int16_t tokens[], uint16_t tokenCount,
1786              uint8_t map[256],
1787              UErrorCode *pErrorCode) {
1788     UBool usedOutChar[256];
1789     uint16_t i, j;
1790     uint8_t c1, c2;
1791
1792     if(U_FAILURE(*pErrorCode)) {
1793         return;
1794     }
1795
1796     if(ds->inCharset==ds->outCharset) {
1797         /* Same charset family: identity permutation */
1798         for(i=0; i<256; ++i) {
1799             map[i]=(uint8_t)i;
1800         }
1801     } else {
1802         uprv_memset(map, 0, 256);
1803         uprv_memset(usedOutChar, 0, 256);
1804
1805         if(tokenCount>256) {
1806             tokenCount=256;
1807         }
1808
1809         /* set the direct bytes (byte 0 always maps to itself) */
1810         for(i=1; i<tokenCount; ++i) {
1811             if(tokens[i]==-1) {
1812                 /* convert the direct byte character */
1813                 c1=(uint8_t)i;
1814                 ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode);
1815                 if(U_FAILURE(*pErrorCode)) {
1816                     udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n",
1817                                      i, ds->inCharset);
1818                     return;
1819                 }
1820
1821                 /* enter the converted character into the map and mark it used */
1822                 map[c1]=c2;
1823                 usedOutChar[c2]=TRUE;
1824             }
1825         }
1826
1827         /* set the mappings for the rest of the permutation */
1828         for(i=j=1; i<tokenCount; ++i) {
1829             /* set mappings that were not set for direct bytes */
1830             if(map[i]==0) {
1831                 /* set an output byte value that was not used as an output byte above */
1832                 while(usedOutChar[j]) {
1833                     ++j;
1834                 }
1835                 map[i]=(uint8_t)j++;
1836             }
1837         }
1838
1839         /*
1840          * leave mappings at tokenCount and above unset if tokenCount<256
1841          * because they won't be used
1842          */
1843     }
1844 }
1845
1846 U_CAPI int32_t U_EXPORT2
1847 uchar_swapNames(const UDataSwapper *ds,
1848                 const void *inData, int32_t length, void *outData,
1849                 UErrorCode *pErrorCode) {
1850     const UDataInfo *pInfo;
1851     int32_t headerSize;
1852
1853     const uint8_t *inBytes;
1854     uint8_t *outBytes;
1855
1856     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,
1857              offset, i, count, stringsCount;
1858
1859     const AlgorithmicRange *inRange;
1860     AlgorithmicRange *outRange;
1861
1862     /* udata_swapDataHeader checks the arguments */
1863     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1864     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1865         return 0;
1866     }
1867
1868     /* check data format and format version */
1869     pInfo=(const UDataInfo *)((const char *)inData+4);
1870     if(!(
1871         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
1872         pInfo->dataFormat[1]==0x6e &&
1873         pInfo->dataFormat[2]==0x61 &&
1874         pInfo->dataFormat[3]==0x6d &&
1875         pInfo->formatVersion[0]==1
1876     )) {
1877         udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",
1878                          pInfo->dataFormat[0], pInfo->dataFormat[1],
1879                          pInfo->dataFormat[2], pInfo->dataFormat[3],
1880                          pInfo->formatVersion[0]);
1881         *pErrorCode=U_UNSUPPORTED_ERROR;
1882         return 0;
1883     }
1884
1885     inBytes=(const uint8_t *)inData+headerSize;
1886     outBytes=(uint8_t *)outData+headerSize;
1887     if(length<0) {
1888         algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]);
1889     } else {
1890         length-=headerSize;
1891         if( length<20 ||
1892             (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]))
1893         ) {
1894             udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n",
1895                              length);
1896             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1897             return 0;
1898         }
1899     }
1900
1901     if(length<0) {
1902         /* preflighting: iterate through algorithmic ranges */
1903         offset=algNamesOffset;
1904         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
1905         offset+=4;
1906
1907         for(i=0; i<count; ++i) {
1908             inRange=(const AlgorithmicRange *)(inBytes+offset);
1909             offset+=ds->readUInt16(inRange->size);
1910         }
1911     } else {
1912         /* swap data */
1913         const uint16_t *p;
1914         uint16_t *q, *temp;
1915
1916         int16_t tokens[512];
1917         uint16_t tokenCount;
1918
1919         uint8_t map[256], trailMap[256];
1920
1921         /* copy the data for inaccessible bytes */
1922         if(inBytes!=outBytes) {
1923             uprv_memcpy(outBytes, inBytes, length);
1924         }
1925
1926         /* the initial 4 offsets first */
1927         tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]);
1928         groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]);
1929         groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]);
1930         ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode);
1931
1932         /*
1933          * now the tokens table
1934          * it needs to be permutated along with the compressed name strings
1935          */
1936         p=(const uint16_t *)(inBytes+16);
1937         q=(uint16_t *)(outBytes+16);
1938
1939         /* read and swap the tokenCount */
1940         tokenCount=ds->readUInt16(*p);
1941         ds->swapArray16(ds, p, 2, q, pErrorCode);
1942         ++p;
1943         ++q;
1944
1945         /* read the first 512 tokens and make the token maps */
1946         if(tokenCount<=512) {
1947             count=tokenCount;
1948         } else {
1949             count=512;
1950         }
1951         for(i=0; i<count; ++i) {
1952             tokens[i]=udata_readInt16(ds, p[i]);
1953         }
1954         for(; i<512; ++i) {
1955             tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */
1956         }
1957         makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);
1958         makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode);
1959         if(U_FAILURE(*pErrorCode)) {
1960             return 0;
1961         }
1962
1963         /*
1964          * swap and permutate the tokens
1965          * go through a temporary array to support in-place swapping
1966          */
1967         temp=(uint16_t *)uprv_malloc(tokenCount*2);
1968         if(temp==NULL) {
1969             udata_printError(ds, "out of memory swapping %u unames.icu tokens\n",
1970                              tokenCount);
1971             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1972             return 0;
1973         }
1974
1975         /* swap and permutate single-/lead-byte tokens */
1976         for(i=0; i<tokenCount && i<256; ++i) {
1977             ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode);
1978         }
1979
1980         /* swap and permutate trail-byte tokens */
1981         for(; i<tokenCount; ++i) {
1982             ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode);
1983         }
1984
1985         /* copy the result into the output and free the temporary array */
1986         uprv_memcpy(q, temp, tokenCount*2);
1987         uprv_free(temp);
1988
1989         /*
1990          * swap the token strings but not a possible padding byte after
1991          * the terminating NUL of the last string
1992          */
1993         udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset),
1994                                     outBytes+tokenStringOffset, pErrorCode);
1995         if(U_FAILURE(*pErrorCode)) {
1996             udata_printError(ds, "uchar_swapNames(token strings) failed\n");
1997             return 0;
1998         }
1999
2000         /* swap the group table */
2001         count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset)));
2002         ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2),
2003                            outBytes+groupsOffset, pErrorCode);
2004
2005         /*
2006          * swap the group strings
2007          * swap the string bytes but not the nibble-encoded string lengths
2008          */
2009         if(ds->inCharset!=ds->outCharset) {
2010             uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1];
2011
2012             const uint8_t *inStrings, *nextInStrings;
2013             uint8_t *outStrings;
2014
2015             uint8_t c;
2016
2017             inStrings=inBytes+groupStringOffset;
2018             outStrings=outBytes+groupStringOffset;
2019
2020             stringsCount=algNamesOffset-groupStringOffset;
2021
2022             /* iterate through string groups until only a few padding bytes are left */
2023             while(stringsCount>32) {
2024                 nextInStrings=expandGroupLengths(inStrings, offsets, lengths);
2025
2026                 /* move past the length bytes */
2027                 stringsCount-=(uint32_t)(nextInStrings-inStrings);
2028                 outStrings+=nextInStrings-inStrings;
2029                 inStrings=nextInStrings;
2030
2031                 count=offsets[31]+lengths[31]; /* total number of string bytes in this group */
2032                 stringsCount-=count;
2033
2034                 /* swap the string bytes using map[] and trailMap[] */
2035                 while(count>0) {
2036                     c=*inStrings++;
2037                     *outStrings++=map[c];
2038                     if(tokens[c]!=-2) {
2039                         --count;
2040                     } else {
2041                         /* token lead byte: swap the trail byte, too */
2042                         *outStrings++=trailMap[*inStrings++];
2043                         count-=2;
2044                     }
2045                 }
2046             }
2047         }
2048
2049         /* swap the algorithmic ranges */
2050         offset=algNamesOffset;
2051         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
2052         ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode);
2053         offset+=4;
2054
2055         for(i=0; i<count; ++i) {
2056             if(offset>(uint32_t)length) {
2057                 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",
2058                                  length, i);
2059                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
2060                 return 0;
2061             }
2062
2063             inRange=(const AlgorithmicRange *)(inBytes+offset);
2064             outRange=(AlgorithmicRange *)(outBytes+offset);
2065             offset+=ds->readUInt16(inRange->size);
2066
2067             ds->swapArray32(ds, inRange, 8, outRange, pErrorCode);
2068             ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode);
2069             switch(inRange->type) {
2070             case 0:
2071                 /* swap prefix string */
2072                 ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)),
2073                                     outRange+1, pErrorCode);
2074                 if(U_FAILURE(*pErrorCode)) {
2075                     udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n",
2076                                      i);
2077                     return 0;
2078                 }
2079                 break;
2080             case 1:
2081                 {
2082                     /* swap factors and the prefix and factor strings */
2083                     uint32_t factorsCount;
2084
2085                     factorsCount=inRange->variant;
2086                     p=(const uint16_t *)(inRange+1);
2087                     q=(uint16_t *)(outRange+1);
2088                     ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode);
2089
2090                     /* swap the strings, up to the last terminating NUL */
2091                     p+=factorsCount;
2092                     q+=factorsCount;
2093                     stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p);
2094                     while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) {
2095                         --stringsCount;
2096                     }
2097                     ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode);
2098                 }
2099                 break;
2100             default:
2101                 udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n",
2102                                  inRange->type, i);
2103                 *pErrorCode=U_UNSUPPORTED_ERROR;
2104                 return 0;
2105             }
2106         }
2107     }
2108
2109     return headerSize+(int32_t)offset;
2110 }
2111
2112 /*
2113  * Hey, Emacs, please set the following:
2114  *
2115  * Local Variables:
2116  * indent-tabs-mode: nil
2117  * End:
2118  *
2119  */