icuSources/common/unames.c

   1 /*
   2 ******************************************************************************
   3 *
   4 *   Copyright (C) 1999-2004, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 ******************************************************************************
   8 *   file name:  unames.c
   9 *   encoding:   US-ASCII
  10 *   tab size:   8 (not used)
  11 *   indentation:4
  12 *
  13 *   created on: 1999oct04
  14 *   created by: Markus W. Scherer
  15 */
  16
  17 #include "unicode/utypes.h"
  18 #include "unicode/putil.h"
  19 #include "unicode/uchar.h"
  20 #include "unicode/udata.h"
  21 #include "ustr_imp.h"
  22 #include "umutex.h"
  23 #include "cmemory.h"
  24 #include "cstring.h"
  25 #include "ucln_cmn.h"
  26 #include "udataswp.h"
  27 #include "uprops.h"
  28
  29 /* prototypes ------------------------------------------------------------- */
  30
  31 #define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
  32
  33 static const char DATA_NAME[] = "unames";
  34 static const char DATA_TYPE[] = "icu";
  35
  36 #define GROUP_SHIFT 5
  37 #define LINES_PER_GROUP (1UL<<GROUP_SHIFT)
  38 #define GROUP_MASK (LINES_PER_GROUP-1)
  39
  40 typedef struct {
  41     uint16_t groupMSB,
  42              offsetHigh, offsetLow; /* avoid padding */
  43 } Group;
  44
  45 typedef struct {
  46     uint32_t start, end;
  47     uint8_t type, variant;
  48     uint16_t size;
  49 } AlgorithmicRange;
  50
  51 typedef struct {
  52     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;
  53 } UCharNames;
  54
  55 typedef struct {
  56     const char *otherName;
  57     UChar32 code;
  58 } FindName;
  59
  60 #define DO_FIND_NAME NULL
  61
  62 static UDataMemory *uCharNamesData=NULL;
  63 static UCharNames *uCharNames=NULL;
  64 static UErrorCode gLoadErrorCode=U_ZERO_ERROR;
  65
  66 /*
  67  * Maximum length of character names (regular & 1.0).
  68  * Maximum length of ISO comments.
  69  */
  70 static int32_t gMaxNameLength=0, gMaxISOCommentLength=0;
  71
  72 /*
  73  * Set of chars used in character names (regular & 1.0).
  74  * Set of chars used in ISO comments.
  75  * Chars are platform-dependent (can be EBCDIC).
  76  */
  77 static uint32_t gNameSet[8]={ 0 }, gISOCommentSet[8]={ 0 };
  78
  79 #define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT
  80 #define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1
  81 #define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2
  82
  83 #define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)
  84
  85 static const char * const
  86 charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT];
  87
  88 /* implementation ----------------------------------------------------------- */
  89
  90 static UBool U_CALLCONV unames_cleanup(void)
  91 {
  92     if(uCharNamesData) {
  93         udata_close(uCharNamesData);
  94         uCharNamesData = NULL;
  95     }
  96     if(uCharNames) {
  97         uCharNames = NULL;
  98     }
  99     gMaxNameLength=0;
 100     return TRUE;
 101 }
 102
 103 static UBool U_CALLCONV
 104 isAcceptable(void *context,
 105              const char *type, const char *name,
 106              const UDataInfo *pInfo) {
 107     return (UBool)(
 108         pInfo->size>=20 &&
 109         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
 110         pInfo->charsetFamily==U_CHARSET_FAMILY &&
 111         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
 112         pInfo->dataFormat[1]==0x6e &&
 113         pInfo->dataFormat[2]==0x61 &&
 114         pInfo->dataFormat[3]==0x6d &&
 115         pInfo->formatVersion[0]==1);
 116 }
 117
 118 static UBool
 119 isDataLoaded(UErrorCode *pErrorCode) {
 120     /* load UCharNames from file if necessary */
 121     UBool isCached;
 122
 123     /* do this because double-checked locking is broken */
 124     umtx_lock(NULL);
 125     isCached=uCharNames!=NULL;
 126     umtx_unlock(NULL);
 127
 128     if(!isCached) {
 129         UCharNames *names;
 130         UDataMemory *data;
 131
 132         /* check error code from previous attempt */
 133         if(U_FAILURE(gLoadErrorCode)) {
 134             *pErrorCode=gLoadErrorCode;
 135             return FALSE;
 136         }
 137
 138         /* open the data outside the mutex block */
 139         data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
 140         if(U_FAILURE(*pErrorCode)) {
 141             gLoadErrorCode=*pErrorCode;
 142             return FALSE;
 143         }
 144
 145         names=(UCharNames *)udata_getMemory(data);
 146
 147         /* in the mutex block, set the data for this process */
 148         {
 149             umtx_lock(NULL);
 150             if(uCharNames==NULL) {
 151                 uCharNames=names;
 152                 uCharNamesData=data;
 153                 data=NULL;
 154                 names=NULL;
 155                 ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
 156             }
 157             umtx_unlock(NULL);
 158         }
 159
 160         /* if a different thread set it first, then close the extra data */
 161         if(data!=NULL) {
 162             udata_close(data); /* NULL if it was set correctly */
 163         }
 164     }
 165     return TRUE;
 166 }
 167
 168 #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \
 169     if((bufferLength)>0) { \
 170         *(buffer)++=c; \
 171         --(bufferLength); \
 172     } \
 173     ++(bufferPos); \
 174 }
 175
 176 #define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT
 177
 178 /*
 179  * Important: expandName() and compareName() are almost the same -
 180  * apply fixes to both.
 181  *
 182  * UnicodeData.txt uses ';' as a field separator, so no
 183  * field can contain ';' as part of its contents.
 184  * In unames.dat, it is marked as token[';']==-1 only if the
 185  * semicolon is used in the data file - which is iff we
 186  * have Unicode 1.0 names or ISO comments.
 187  * So, it will be token[';']==-1 if we store U1.0 names/ISO comments
 188  * although we know that it will never be part of a name.
 189  */
 190 static uint16_t
 191 expandName(UCharNames *names,
 192            const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
 193            char *buffer, uint16_t bufferLength) {
 194     uint16_t *tokens=(uint16_t *)names+8;
 195     uint16_t token, tokenCount=*tokens++, bufferPos=0;
 196     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
 197     uint8_t c;
 198
 199     if(nameChoice==U_UNICODE_10_CHAR_NAME || nameChoice==U_ISO_COMMENT) {
 200         /*
 201          * skip the modern name if it is not requested _and_
 202          * if the semicolon byte value is a character, not a token number
 203          */
 204         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
 205             while(nameLength>0) {
 206                 --nameLength;
 207                 if(*name++==';') {
 208                     break;
 209                 }
 210             }
 211             if(nameChoice==U_ISO_COMMENT) {
 212                 /* skip the Unicode 1.0 name as well to get the ISO comment */
 213                 while(nameLength>0) {
 214                     --nameLength;
 215                     if(*name++==';') {
 216                         break;
 217                     }
 218                 }
 219             }
 220         } else {
 221             /*
 222              * the semicolon byte value is a token number, therefore
 223              * only modern names are stored in unames.dat and there is no
 224              * such requested Unicode 1.0 name here
 225              */
 226             nameLength=0;
 227         }
 228     }
 229
 230     /* write each letter directly, and write a token word per token */
 231     while(nameLength>0) {
 232         --nameLength;
 233         c=*name++;
 234
 235         if(c>=tokenCount) {
 236             if(c!=';') {
 237                 /* implicit letter */
 238                 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 239             } else {
 240                 /* finished */
 241                 break;
 242             }
 243         } else {
 244             token=tokens[c];
 245             if(token==(uint16_t)(-2)) {
 246                 /* this is a lead byte for a double-byte token */
 247                 token=tokens[c<<8|*name++];
 248                 --nameLength;
 249             }
 250             if(token==(uint16_t)(-1)) {
 251                 if(c!=';') {
 252                     /* explicit letter */
 253                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 254                 } else {
 255                     /* stop, but skip the semicolon if we are seeking
 256                        extended names and there was no 2.0 name but there
 257                        is a 1.0 name. */
 258                     if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) {
 259                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
 260                             continue;
 261                         }
 262                     }
 263                     /* finished */
 264                     break;
 265                 }
 266             } else {
 267                 /* write token word */
 268                 uint8_t *tokenString=tokenStrings+token;
 269                 while((c=*tokenString++)!=0) {
 270                     WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 271                 }
 272             }
 273         }
 274     }
 275
 276     /* zero-terminate */
 277     if(bufferLength>0) {
 278         *buffer=0;
 279     }
 280
 281     return bufferPos;
 282 }
 283
 284 /*
 285  * compareName() is almost the same as expandName() except that it compares
 286  * the currently expanded name to an input name.
 287  * It returns the match/no match result as soon as possible.
 288  */
 289 static UBool
 290 compareName(UCharNames *names,
 291             const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
 292             const char *otherName) {
 293     uint16_t *tokens=(uint16_t *)names+8;
 294     uint16_t token, tokenCount=*tokens++;
 295     uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
 296     uint8_t c;
 297     const char *origOtherName = otherName;
 298
 299     if(nameChoice==U_UNICODE_10_CHAR_NAME) {
 300         /*
 301          * skip the modern name if it is not requested _and_
 302          * if the semicolon byte value is a character, not a token number
 303          */
 304         if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
 305             while(nameLength>0) {
 306                 --nameLength;
 307                 if(*name++==';') {
 308                     break;
 309                 }
 310             }
 311         } else {
 312             /*
 313              * the semicolon byte value is a token number, therefore
 314              * only modern names are stored in unames.dat and there is no
 315              * such requested Unicode 1.0 name here
 316              */
 317             nameLength=0;
 318         }
 319     }
 320
 321     /* compare each letter directly, and compare a token word per token */
 322     while(nameLength>0) {
 323         --nameLength;
 324         c=*name++;
 325
 326         if(c>=tokenCount) {
 327             if(c!=';') {
 328                 /* implicit letter */
 329                 if((char)c!=*otherName++) {
 330                     return FALSE;
 331                 }
 332             } else {
 333                 /* finished */
 334                 break;
 335             }
 336         } else {
 337             token=tokens[c];
 338             if(token==(uint16_t)(-2)) {
 339                 /* this is a lead byte for a double-byte token */
 340                 token=tokens[c<<8|*name++];
 341                 --nameLength;
 342             }
 343             if(token==(uint16_t)(-1)) {
 344                 if(c!=';') {
 345                     /* explicit letter */
 346                     if((char)c!=*otherName++) {
 347                         return FALSE;
 348                     }
 349                 } else {
 350                     /* stop, but skip the semicolon if we are seeking
 351                        extended names and there was no 2.0 name but there
 352                        is a 1.0 name. */
 353                     if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) {
 354                         if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
 355                             continue;
 356                         }
 357                     }
 358                     /* finished */
 359                     break;
 360                 }
 361             } else {
 362                 /* write token word */
 363                 uint8_t *tokenString=tokenStrings+token;
 364                 while((c=*tokenString++)!=0) {
 365                     if((char)c!=*otherName++) {
 366                         return FALSE;
 367                     }
 368                 }
 369             }
 370         }
 371     }
 372
 373     /* complete match? */
 374     return (UBool)(*otherName==0);
 375 }
 376
 377 static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {
 378     "unassigned",
 379     "uppercase letter",
 380     "lowercase letter",
 381     "titlecase letter",
 382     "modifier letter",
 383     "other letter",
 384     "non spacing mark",
 385     "enclosing mark",
 386     "combining spacing mark",
 387     "decimal digit number",
 388     "letter number",
 389     "other number",
 390     "space separator",
 391     "line separator",
 392     "paragraph separator",
 393     "control",
 394     "format",
 395     "private use area",
 396     "surrogate",
 397     "dash punctuation",
 398     "start punctuation",
 399     "end punctuation",
 400     "connector punctuation",
 401     "other punctuation",
 402     "math symbol",
 403     "currency symbol",
 404     "modifier symbol",
 405     "other symbol",
 406     "initial punctuation",
 407     "final punctuation",
 408     "noncharacter",
 409     "lead surrogate",
 410     "trail surrogate"
 411 };
 412
 413 static uint8_t getCharCat(UChar32 cp) {
 414     uint8_t cat;
 415
 416     if (UTF_IS_UNICODE_NONCHAR(cp)) {
 417         return U_NONCHARACTER_CODE_POINT;
 418     }
 419
 420     if ((cat = u_charType(cp)) == U_SURROGATE) {
 421         cat = UTF_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
 422     }
 423
 424     return cat;
 425 }
 426
 427 static const char *getCharCatName(UChar32 cp) {
 428     uint8_t cat = getCharCat(cp);
 429
 430     /* Return unknown if the table of names above is not up to
 431        date. */
 432
 433     if (cat >= LENGTHOF(charCatNames)) {
 434         return "unknown";
 435     } else {
 436         return charCatNames[cat];
 437     }
 438 }
 439
 440 static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
 441     const char *catname = getCharCatName(code);
 442     uint16_t length = 0;
 443
 444     UChar32 cp;
 445     int ndigits, i;
 446
 447     WRITE_CHAR(buffer, bufferLength, length, '<');
 448     while (catname[length - 1]) {
 449         WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);
 450     }
 451     WRITE_CHAR(buffer, bufferLength, length, '-');
 452     for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)
 453         ;
 454     if (ndigits < 4)
 455         ndigits = 4;
 456     for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) {
 457         uint8_t v = (uint8_t)(cp & 0xf);
 458         buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
 459     }
 460     buffer += ndigits;
 461     length += ndigits;
 462     WRITE_CHAR(buffer, bufferLength, length, '>');
 463
 464     return length;
 465 }
 466
 467 /*
 468  * getGroup() does a binary search for the group that contains the
 469  * Unicode code point "code".
 470  * The return value is always a valid Group* that may contain "code"
 471  * or else is the highest group before "code".
 472  * If the lowest group is after "code", then that one is returned.
 473  */
 474 static Group *
 475 getGroup(UCharNames *names, uint32_t code) {
 476     uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
 477              start=0,
 478              limit=*(uint16_t *)((char *)names+names->groupsOffset),
 479              number;
 480     Group *groups=(Group *)((char *)names+names->groupsOffset+2);
 481
 482     /* binary search for the group of names that contains the one for code */
 483     while(start<limit-1) {
 484         number=(uint16_t)((start+limit)/2);
 485         if(groupMSB<groups[number].groupMSB) {
 486             limit=number;
 487         } else {
 488             start=number;
 489         }
 490     }
 491
 492     /* return this regardless of whether it is an exact match */
 493     return groups+start;
 494 }
 495
 496 /*
 497  * expandGroupLengths() reads a block of compressed lengths of 32 strings and
 498  * expands them into offsets and lengths for each string.
 499  * Lengths are stored with a variable-width encoding in consecutive nibbles:
 500  * If a nibble<0xc, then it is the length itself (0=empty string).
 501  * If a nibble>=0xc, then it forms a length value with the following nibble.
 502  * Calculation see below.
 503  * The offsets and lengths arrays must be at least 33 (one more) long because
 504  * there is no check here at the end if the last nibble is still used.
 505  */
 506 static const uint8_t *
 507 expandGroupLengths(const uint8_t *s,
 508                    uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) {
 509     /* read the lengths of the 32 strings in this group and get each string's offset */
 510     uint16_t i=0, offset=0, length=0;
 511     uint8_t lengthByte;
 512
 513     /* all 32 lengths must be read to get the offset of the first group string */
 514     while(i<LINES_PER_GROUP) {
 515         lengthByte=*s++;
 516
 517         /* read even nibble - MSBs of lengthByte */
 518         if(length>=12) {
 519             /* double-nibble length spread across two bytes */
 520             length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12);
 521             lengthByte&=0xf;
 522         } else if((lengthByte /* &0xf0 */)>=0xc0) {
 523             /* double-nibble length spread across this one byte */
 524             length=(uint16_t)((lengthByte&0x3f)+12);
 525         } else {
 526             /* single-nibble length in MSBs */
 527             length=(uint16_t)(lengthByte>>4);
 528             lengthByte&=0xf;
 529         }
 530
 531         *offsets++=offset;
 532         *lengths++=length;
 533
 534         offset+=length;
 535         ++i;
 536
 537         /* read odd nibble - LSBs of lengthByte */
 538         if((lengthByte&0xf0)==0) {
 539             /* this nibble was not consumed for a double-nibble length above */
 540             length=lengthByte;
 541             if(length<12) {
 542                 /* single-nibble length in LSBs */
 543                 *offsets++=offset;
 544                 *lengths++=length;
 545
 546                 offset+=length;
 547                 ++i;
 548             }
 549         } else {
 550             length=0;   /* prevent double-nibble detection in the next iteration */
 551         }
 552     }
 553
 554     /* now, s is at the first group string */
 555     return s;
 556 }
 557
 558 static uint16_t
 559 expandGroupName(UCharNames *names, Group *group,
 560                 uint16_t lineNumber, UCharNameChoice nameChoice,
 561                 char *buffer, uint16_t bufferLength) {
 562     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
 563     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+
 564                                     (group->offsetHigh<<16|group->offsetLow);
 565     s=expandGroupLengths(s, offsets, lengths);
 566     return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,
 567                       buffer, bufferLength);
 568 }
 569
 570 static uint16_t
 571 getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
 572         char *buffer, uint16_t bufferLength) {
 573     Group *group=getGroup(names, code);
 574     if((uint16_t)(code>>GROUP_SHIFT)==group->groupMSB) {
 575         return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,
 576                                buffer, bufferLength);
 577     } else {
 578         /* group not found */
 579         /* zero-terminate */
 580         if(bufferLength>0) {
 581             *buffer=0;
 582         }
 583         return 0;
 584     }
 585 }
 586
 587 /*
 588  * enumGroupNames() enumerates all the names in a 32-group
 589  * and either calls the enumerator function or finds a given input name.
 590  */
 591 static UBool
 592 enumGroupNames(UCharNames *names, Group *group,
 593                UChar32 start, UChar32 end,
 594                UEnumCharNamesFn *fn, void *context,
 595                UCharNameChoice nameChoice) {
 596     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
 597     const uint8_t *s=(uint8_t *)names+names->groupStringOffset+
 598                                     (group->offsetHigh<<16|group->offsetLow);
 599
 600     s=expandGroupLengths(s, offsets, lengths);
 601     if(fn!=DO_FIND_NAME) {
 602         char buffer[200];
 603         uint16_t length;
 604
 605         while(start<=end) {
 606             length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer));
 607             if (!length && nameChoice == U_EXTENDED_CHAR_NAME) {
 608                 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
 609             }
 610             /* here, we assume that the buffer is large enough */
 611             if(length>0) {
 612                 if(!fn(context, start, nameChoice, buffer, length)) {
 613                     return FALSE;
 614                 }
 615             }
 616             ++start;
 617         }
 618     } else {
 619         const char *otherName=((FindName *)context)->otherName;
 620         while(start<=end) {
 621             if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) {
 622                 ((FindName *)context)->code=start;
 623                 return FALSE;
 624             }
 625             ++start;
 626         }
 627     }
 628     return TRUE;
 629 }
 630
 631 /*
 632  * enumExtNames enumerate extended names.
 633  * It only needs to do it if it is called with a real function and not
 634  * with the dummy DO_FIND_NAME, because u_charFromName() does a check
 635  * for extended names by itself.
 636  */
 637 static UBool
 638 enumExtNames(UChar32 start, UChar32 end,
 639              UEnumCharNamesFn *fn, void *context)
 640 {
 641     if(fn!=DO_FIND_NAME) {
 642         char buffer[200];
 643         uint16_t length;
 644
 645         while(start<=end) {
 646             buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
 647             /* here, we assume that the buffer is large enough */
 648             if(length>0) {
 649                 if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) {
 650                     return FALSE;
 651                 }
 652             }
 653             ++start;
 654         }
 655     }
 656
 657     return TRUE;
 658 }
 659
 660 static UBool
 661 enumNames(UCharNames *names,
 662           UChar32 start, UChar32 limit,
 663           UEnumCharNamesFn *fn, void *context,
 664           UCharNameChoice nameChoice) {
 665     uint16_t startGroupMSB, endGroupMSB, groupCount;
 666     Group *group, *groupLimit;
 667
 668     startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);
 669     endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT);
 670
 671     /* find the group that contains start, or the highest before it */
 672     group=getGroup(names, start);
 673
 674     if(startGroupMSB==endGroupMSB) {
 675         if(startGroupMSB==group->groupMSB) {
 676             /* if start and limit-1 are in the same group, then enumerate only in that one */
 677             return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice);
 678         }
 679     } else {
 680         groupCount=*(uint16_t *)((char *)names+names->groupsOffset);
 681         groupLimit=(Group *)((char *)names+names->groupsOffset+2)+groupCount;
 682
 683         if(startGroupMSB==group->groupMSB) {
 684             /* enumerate characters in the partial start group */
 685             if((start&GROUP_MASK)!=0) {
 686                 if(!enumGroupNames(names, group,
 687                                    start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1,
 688                                    fn, context, nameChoice)) {
 689                     return FALSE;
 690                 }
 691                 ++group; /* continue with the next group */
 692             }
 693         } else if(startGroupMSB>group->groupMSB) {
 694             /* make sure that we start enumerating with the first group after start */
 695             if (group + 1 < groupLimit && (group + 1)->groupMSB > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) {
 696                 UChar32 end = (group + 1)->groupMSB << GROUP_SHIFT;
 697                 if (end > limit) {
 698                     end = limit;
 699                 }
 700                 if (!enumExtNames(start, end - 1, fn, context)) {
 701                     return FALSE;
 702                 }
 703             }
 704             ++group;
 705         }
 706
 707         /* enumerate entire groups between the start- and end-groups */
 708         while(group<groupLimit && group->groupMSB<endGroupMSB) {
 709             start=(UChar32)group->groupMSB<<GROUP_SHIFT;
 710             if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) {
 711                 return FALSE;
 712             }
 713             if (group + 1 < groupLimit && (group + 1)->groupMSB > group->groupMSB + 1 && nameChoice == U_EXTENDED_CHAR_NAME) {
 714                 UChar32 end = (group + 1)->groupMSB << GROUP_SHIFT;
 715                 if (end > limit) {
 716                     end = limit;
 717                 }
 718                 if (!enumExtNames((group->groupMSB + 1) << GROUP_SHIFT, end - 1, fn, context)) {
 719                     return FALSE;
 720                 }
 721             }
 722             ++group;
 723         }
 724
 725         /* enumerate within the end group (group->groupMSB==endGroupMSB) */
 726         if(group<groupLimit && group->groupMSB==endGroupMSB) {
 727             return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice);
 728         } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) {
 729             UChar32 next = ((group - 1)->groupMSB + 1) << GROUP_SHIFT;
 730             if (next > start) {
 731                 start = next;
 732             }
 733         } else {
 734             return TRUE;
 735         }
 736     }
 737
 738     /* we have not found a group, which means everything is made of
 739        extended names. */
 740     if (nameChoice == U_EXTENDED_CHAR_NAME) {
 741         if (limit > UCHAR_MAX_VALUE + 1) {
 742             limit = UCHAR_MAX_VALUE + 1;
 743         }
 744         return enumExtNames(start, limit - 1, fn, context);
 745     }
 746
 747     return TRUE;
 748 }
 749
 750 static uint16_t
 751 writeFactorSuffix(const uint16_t *factors, uint16_t count,
 752                   const char *s, /* suffix elements */
 753                   uint32_t code,
 754                   uint16_t indexes[8], /* output fields from here */
 755                   const char *elementBases[8], const char *elements[8],
 756                   char *buffer, uint16_t bufferLength) {
 757     uint16_t i, factor, bufferPos=0;
 758     char c;
 759
 760     /* write elements according to the factors */
 761
 762     /*
 763      * the factorized elements are determined by modulo arithmetic
 764      * with the factors of this algorithm
 765      *
 766      * note that for fewer operations, count is decremented here
 767      */
 768     --count;
 769     for(i=count; i>0; --i) {
 770         factor=factors[i];
 771         indexes[i]=(uint16_t)(code%factor);
 772         code/=factor;
 773     }
 774     /*
 775      * we don't need to calculate the last modulus because start<=code<=end
 776      * guarantees here that code<=factors[0]
 777      */
 778     indexes[0]=(uint16_t)code;
 779
 780     /* write each element */
 781     for(;;) {
 782         if(elementBases!=NULL) {
 783             *elementBases++=s;
 784         }
 785
 786         /* skip indexes[i] strings */
 787         factor=indexes[i];
 788         while(factor>0) {
 789             while(*s++!=0) {}
 790             --factor;
 791         }
 792         if(elements!=NULL) {
 793             *elements++=s;
 794         }
 795
 796         /* write element */
 797         while((c=*s++)!=0) {
 798             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 799         }
 800
 801         /* we do not need to perform the rest of this loop for i==count - break here */
 802         if(i>=count) {
 803             break;
 804         }
 805
 806         /* skip the rest of the strings for this factors[i] */
 807         factor=(uint16_t)(factors[i]-indexes[i]-1);
 808         while(factor>0) {
 809             while(*s++!=0) {}
 810             --factor;
 811         }
 812
 813         ++i;
 814     }
 815
 816     /* zero-terminate */
 817     if(bufferLength>0) {
 818         *buffer=0;
 819     }
 820
 821     return bufferPos;
 822 }
 823
 824 /*
 825  * Important:
 826  * Parts of findAlgName() are almost the same as some of getAlgName().
 827  * Fixes must be applied to both.
 828  */
 829 static uint16_t
 830 getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
 831         char *buffer, uint16_t bufferLength) {
 832     uint16_t bufferPos=0;
 833
 834     /*
 835      * Do not write algorithmic Unicode 1.0 names because
 836      * Unihan names are the same as the modern ones,
 837      * extension A was only introduced with Unicode 3.0, and
 838      * the Hangul syllable block was moved and changed around Unicode 1.1.5.
 839      */
 840     if(nameChoice==U_UNICODE_10_CHAR_NAME) {
 841         /* zero-terminate */
 842         if(bufferLength>0) {
 843             *buffer=0;
 844         }
 845         return 0;
 846     }
 847
 848     switch(range->type) {
 849     case 0: {
 850         /* name = prefix hex-digits */
 851         const char *s=(const char *)(range+1);
 852         char c;
 853
 854         uint16_t i, count;
 855
 856         /* copy prefix */
 857         while((c=*s++)!=0) {
 858             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 859         }
 860
 861         /* write hexadecimal code point value */
 862         count=range->variant;
 863
 864         /* zero-terminate */
 865         if(count<bufferLength) {
 866             buffer[count]=0;
 867         }
 868
 869         for(i=count; i>0;) {
 870             if(--i<bufferLength) {
 871                 c=(char)(code&0xf);
 872                 if(c<10) {
 873                     c+='0';
 874                 } else {
 875                     c+='A'-10;
 876                 }
 877                 buffer[i]=c;
 878             }
 879             code>>=4;
 880         }
 881
 882         bufferPos+=count;
 883         break;
 884     }
 885     case 1: {
 886         /* name = prefix factorized-elements */
 887         uint16_t indexes[8];
 888         const uint16_t *factors=(const uint16_t *)(range+1);
 889         uint16_t count=range->variant;
 890         const char *s=(const char *)(factors+count);
 891         char c;
 892
 893         /* copy prefix */
 894         while((c=*s++)!=0) {
 895             WRITE_CHAR(buffer, bufferLength, bufferPos, c);
 896         }
 897
 898         bufferPos+=writeFactorSuffix(factors, count,
 899                                      s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);
 900         break;
 901     }
 902     default:
 903         /* undefined type */
 904         /* zero-terminate */
 905         if(bufferLength>0) {
 906             *buffer=0;
 907         }
 908         break;
 909     }
 910
 911     return bufferPos;
 912 }
 913
 914 /*
 915  * Important: enumAlgNames() and findAlgName() are almost the same.
 916  * Any fix must be applied to both.
 917  */
 918 static UBool
 919 enumAlgNames(AlgorithmicRange *range,
 920              UChar32 start, UChar32 limit,
 921              UEnumCharNamesFn *fn, void *context,
 922              UCharNameChoice nameChoice) {
 923     char buffer[200];
 924     uint16_t length;
 925
 926     if(nameChoice==U_UNICODE_10_CHAR_NAME) {
 927         return TRUE;
 928     }
 929
 930     switch(range->type) {
 931     case 0: {
 932         char *s, *end;
 933         char c;
 934
 935         /* get the full name of the start character */
 936         length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer));
 937         if(length<=0) {
 938             return TRUE;
 939         }
 940
 941         /* call the enumerator function with this first character */
 942         if(!fn(context, start, nameChoice, buffer, length)) {
 943             return FALSE;
 944         }
 945
 946         /* go to the end of the name; all these names have the same length */
 947         end=buffer;
 948         while(*end!=0) {
 949             ++end;
 950         }
 951
 952         /* enumerate the rest of the names */
 953         while(++start<limit) {
 954             /* increment the hexadecimal number on a character-basis */
 955             s=end;
 956             for (;;) {
 957                 c=*--s;
 958                 if(('0'<=c && c<'9') || ('A'<=c && c<'F')) {
 959                     *s=(char)(c+1);
 960                     break;
 961                 } else if(c=='9') {
 962                     *s='A';
 963                     break;
 964                 } else if(c=='F') {
 965                     *s='0';
 966                 }
 967             }
 968
 969             if(!fn(context, start, nameChoice, buffer, length)) {
 970                 return FALSE;
 971             }
 972         }
 973         break;
 974     }
 975     case 1: {
 976         uint16_t indexes[8];
 977         const char *elementBases[8], *elements[8];
 978         const uint16_t *factors=(const uint16_t *)(range+1);
 979         uint16_t count=range->variant;
 980         const char *s=(const char *)(factors+count);
 981         char *suffix, *t;
 982         uint16_t prefixLength, i, index;
 983
 984         char c;
 985
 986         /* name = prefix factorized-elements */
 987
 988         /* copy prefix */
 989         suffix=buffer;
 990         prefixLength=0;
 991         while((c=*s++)!=0) {
 992             *suffix++=c;
 993             ++prefixLength;
 994         }
 995
 996         /* append the suffix of the start character */
 997         length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count,
 998                                               s, (uint32_t)start-range->start,
 999                                               indexes, elementBases, elements,
1000                                               suffix, (uint16_t)(sizeof(buffer)-prefixLength)));
1001
1002         /* call the enumerator function with this first character */
1003         if(!fn(context, start, nameChoice, buffer, length)) {
1004             return FALSE;
1005         }
1006
1007         /* enumerate the rest of the names */
1008         while(++start<limit) {
1009             /* increment the indexes in lexical order bound by the factors */
1010             i=count;
1011             for (;;) {
1012                 index=(uint16_t)(indexes[--i]+1);
1013                 if(index<factors[i]) {
1014                     /* skip one index and its element string */
1015                     indexes[i]=index;
1016                     s=elements[i];
1017                     while(*s++!=0) {
1018                     }
1019                     elements[i]=s;
1020                     break;
1021                 } else {
1022                     /* reset this index to 0 and its element string to the first one */
1023                     indexes[i]=0;
1024                     elements[i]=elementBases[i];
1025                 }
1026             }
1027
1028             /* to make matters a little easier, just append all elements to the suffix */
1029             t=suffix;
1030             length=prefixLength;
1031             for(i=0; i<count; ++i) {
1032                 s=elements[i];
1033                 while((c=*s++)!=0) {
1034                     *t++=c;
1035                     ++length;
1036                 }
1037             }
1038             /* zero-terminate */
1039             *t=0;
1040
1041             if(!fn(context, start, nameChoice, buffer, length)) {
1042                 return FALSE;
1043             }
1044         }
1045         break;
1046     }
1047     default:
1048         /* undefined type */
1049         break;
1050     }
1051
1052     return TRUE;
1053 }
1054
1055 /*
1056  * findAlgName() is almost the same as enumAlgNames() except that it
1057  * returns the code point for a name if it fits into the range.
1058  * It returns 0xffff otherwise.
1059  */
1060 static UChar32
1061 findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) {
1062     UChar32 code;
1063
1064     if(nameChoice==U_UNICODE_10_CHAR_NAME) {
1065         return 0xffff;
1066     }
1067
1068     switch(range->type) {
1069     case 0: {
1070         /* name = prefix hex-digits */
1071         const char *s=(const char *)(range+1);
1072         char c;
1073
1074         uint16_t i, count;
1075
1076         /* compare prefix */
1077         while((c=*s++)!=0) {
1078             if((char)c!=*otherName++) {
1079                 return 0xffff;
1080             }
1081         }
1082
1083         /* read hexadecimal code point value */
1084         count=range->variant;
1085         code=0;
1086         for(i=0; i<count; ++i) {
1087             c=*otherName++;
1088             if('0'<=c && c<='9') {
1089                 code=(code<<4)|(c-'0');
1090             } else if('A'<=c && c<='F') {
1091                 code=(code<<4)|(c-'A'+10);
1092             } else {
1093                 return 0xffff;
1094             }
1095         }
1096
1097         /* does it fit into the range? */
1098         if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) {
1099             return code;
1100         }
1101         break;
1102     }
1103     case 1: {
1104         char buffer[64];
1105         uint16_t indexes[8];
1106         const char *elementBases[8], *elements[8];
1107         const uint16_t *factors=(const uint16_t *)(range+1);
1108         uint16_t count=range->variant;
1109         const char *s=(const char *)(factors+count), *t;
1110         UChar32 start, limit;
1111         uint16_t i, index;
1112
1113         char c;
1114
1115         /* name = prefix factorized-elements */
1116
1117         /* compare prefix */
1118         while((c=*s++)!=0) {
1119             if((char)c!=*otherName++) {
1120                 return 0xffff;
1121             }
1122         }
1123
1124         start=(UChar32)range->start;
1125         limit=(UChar32)(range->end+1);
1126
1127         /* initialize the suffix elements for enumeration; indexes should all be set to 0 */
1128         writeFactorSuffix(factors, count, s, 0,
1129                           indexes, elementBases, elements, buffer, sizeof(buffer));
1130
1131         /* compare the first suffix */
1132         if(0==uprv_strcmp(otherName, buffer)) {
1133             return start;
1134         }
1135
1136         /* enumerate and compare the rest of the suffixes */
1137         while(++start<limit) {
1138             /* increment the indexes in lexical order bound by the factors */
1139             i=count;
1140             for (;;) {
1141                 index=(uint16_t)(indexes[--i]+1);
1142                 if(index<factors[i]) {
1143                     /* skip one index and its element string */
1144                     indexes[i]=index;
1145                     s=elements[i];
1146                     while(*s++!=0) {}
1147                     elements[i]=s;
1148                     break;
1149                 } else {
1150                     /* reset this index to 0 and its element string to the first one */
1151                     indexes[i]=0;
1152                     elements[i]=elementBases[i];
1153                 }
1154             }
1155
1156             /* to make matters a little easier, just compare all elements of the suffix */
1157             t=otherName;
1158             for(i=0; i<count; ++i) {
1159                 s=elements[i];
1160                 while((c=*s++)!=0) {
1161                     if(c!=*t++) {
1162                         s=""; /* does not match */
1163                         i=99;
1164                     }
1165                 }
1166             }
1167             if(i<99 && *t==0) {
1168                 return start;
1169             }
1170         }
1171         break;
1172     }
1173     default:
1174         /* undefined type */
1175         break;
1176     }
1177
1178     return 0xffff;
1179 }
1180
1181 /* sets of name characters, maximum name lengths ---------------------------- */
1182
1183 #define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f)))
1184 #define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0)
1185
1186 static int32_t
1187 calcStringSetLength(uint32_t set[8], const char *s) {
1188     int32_t length=0;
1189     char c;
1190
1191     while((c=*s++)!=0) {
1192         SET_ADD(set, c);
1193         ++length;
1194     }
1195     return length;
1196 }
1197
1198 static int32_t
1199 calcAlgNameSetsLengths(int32_t maxNameLength) {
1200     AlgorithmicRange *range;
1201     uint32_t *p;
1202     uint32_t rangeCount;
1203     int32_t length;
1204
1205     /* enumerate algorithmic ranges */
1206     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1207     rangeCount=*p;
1208     range=(AlgorithmicRange *)(p+1);
1209     while(rangeCount>0) {
1210         switch(range->type) {
1211         case 0:
1212             /* name = prefix + (range->variant times) hex-digits */
1213             /* prefix */
1214             length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant;
1215             if(length>maxNameLength) {
1216                 maxNameLength=length;
1217             }
1218             break;
1219         case 1: {
1220             /* name = prefix factorized-elements */
1221             const uint16_t *factors=(const uint16_t *)(range+1);
1222             const char *s;
1223             int32_t i, count=range->variant, factor, factorLength, maxFactorLength;
1224
1225             /* prefix length */
1226             s=(const char *)(factors+count);
1227             length=calcStringSetLength(gNameSet, s);
1228             s+=length+1; /* start of factor suffixes */
1229
1230             /* get the set and maximum factor suffix length for each factor */
1231             for(i=0; i<count; ++i) {
1232                 maxFactorLength=0;
1233                 for(factor=factors[i]; factor>0; --factor) {
1234                     factorLength=calcStringSetLength(gNameSet, s);
1235                     s+=factorLength+1;
1236                     if(factorLength>maxFactorLength) {
1237                         maxFactorLength=factorLength;
1238                     }
1239                 }
1240                 length+=maxFactorLength;
1241             }
1242
1243             if(length>maxNameLength) {
1244                 maxNameLength=length;
1245             }
1246             break;
1247         }
1248         default:
1249             /* unknown type */
1250             break;
1251         }
1252
1253         range=(AlgorithmicRange *)((uint8_t *)range+range->size);
1254         --rangeCount;
1255     }
1256     return maxNameLength;
1257 }
1258
1259 static int32_t
1260 calcExtNameSetsLengths(int32_t maxNameLength) {
1261     int32_t i, length;
1262
1263     for(i=0; i<LENGTHOF(charCatNames); ++i) {
1264         /*
1265          * for each category, count the length of the category name
1266          * plus 9=
1267          * 2 for <>
1268          * 1 for -
1269          * 6 for most hex digits per code point
1270          */
1271         length=9+calcStringSetLength(gNameSet, charCatNames[i]);
1272         if(length>maxNameLength) {
1273             maxNameLength=length;
1274         }
1275     }
1276     return maxNameLength;
1277 }
1278
1279 static int32_t
1280 calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths,
1281                   uint32_t set[8],
1282                   const uint8_t **pLine, const uint8_t *lineLimit) {
1283     const uint8_t *line=*pLine;
1284     int32_t length=0, tokenLength;
1285     uint16_t c, token;
1286
1287     while(line!=lineLimit && (c=*line++)!=(uint8_t)';') {
1288         if(c>=tokenCount) {
1289             /* implicit letter */
1290             SET_ADD(set, c);
1291             ++length;
1292         } else {
1293             token=tokens[c];
1294             if(token==(uint16_t)(-2)) {
1295                 /* this is a lead byte for a double-byte token */
1296                 c=c<<8|*line++;
1297                 token=tokens[c];
1298             }
1299             if(token==(uint16_t)(-1)) {
1300                 /* explicit letter */
1301                 SET_ADD(set, c);
1302                 ++length;
1303             } else {
1304                 /* count token word */
1305                 if(tokenLengths!=NULL) {
1306                     /* use cached token length */
1307                     tokenLength=tokenLengths[c];
1308                     if(tokenLength==0) {
1309                         tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
1310                         tokenLengths[c]=(int8_t)tokenLength;
1311                     }
1312                 } else {
1313                     tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
1314                 }
1315                 length+=tokenLength;
1316             }
1317         }
1318     }
1319
1320     *pLine=line;
1321     return length;
1322 }
1323
1324 static void
1325 calcGroupNameSetsLengths(int32_t maxNameLength) {
1326     uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
1327
1328     uint16_t *tokens=(uint16_t *)uCharNames+8;
1329     uint16_t tokenCount=*tokens++;
1330     uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset;
1331
1332     int8_t *tokenLengths;
1333
1334     uint16_t *groups;
1335     Group *group;
1336     const uint8_t *s, *line, *lineLimit;
1337
1338     int32_t maxISOCommentLength=0;
1339     int32_t groupCount, lineNumber, length;
1340
1341     tokenLengths=(int8_t *)uprv_malloc(tokenCount);
1342     if(tokenLengths!=NULL) {
1343         uprv_memset(tokenLengths, 0, tokenCount);
1344     }
1345
1346     groups=(uint16_t *)((char *)uCharNames+uCharNames->groupsOffset);
1347     groupCount=*groups++;
1348     group=(Group *)groups;
1349
1350     /* enumerate all groups */
1351     while(groupCount>0) {
1352         s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+
1353                                     ((int32_t)group->offsetHigh<<16|group->offsetLow);
1354         s=expandGroupLengths(s, offsets, lengths);
1355
1356         /* enumerate all lines in each group */
1357         for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) {
1358             line=s+offsets[lineNumber];
1359             length=lengths[lineNumber];
1360             if(length==0) {
1361                 continue;
1362             }
1363
1364             lineLimit=line+length;
1365
1366             /* read regular name */
1367             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
1368             if(length>maxNameLength) {
1369                 maxNameLength=length;
1370             }
1371             if(line==lineLimit) {
1372                 continue;
1373             }
1374
1375             /* read Unicode 1.0 name */
1376             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
1377             if(length>maxNameLength) {
1378                 maxNameLength=length;
1379             }
1380             if(line==lineLimit) {
1381                 continue;
1382             }
1383
1384             /* read ISO comment */
1385             length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);
1386             if(length>maxISOCommentLength) {
1387                 maxISOCommentLength=length;
1388             }
1389         }
1390
1391         ++group;
1392         --groupCount;
1393     }
1394
1395     if(tokenLengths!=NULL) {
1396         uprv_free(tokenLengths);
1397     }
1398
1399     /* set gMax... - name length last for threading */
1400     gMaxISOCommentLength=maxISOCommentLength;
1401     gMaxNameLength=maxNameLength;
1402 }
1403
1404 static UBool
1405 calcNameSetsLengths(UErrorCode *pErrorCode) {
1406     static const char extChars[]="0123456789ABCDEF<>-";
1407     int32_t i, maxNameLength;
1408
1409     if(gMaxNameLength!=0) {
1410         return TRUE;
1411     }
1412
1413     if(!isDataLoaded(pErrorCode)) {
1414         return FALSE;
1415     }
1416
1417     /* set hex digits, used in various names, and <>-, used in extended names */
1418     for(i=0; i<sizeof(extChars)-1; ++i) {
1419         SET_ADD(gNameSet, extChars[i]);
1420     }
1421
1422     /* set sets and lengths from algorithmic names */
1423     maxNameLength=calcAlgNameSetsLengths(0);
1424
1425     /* set sets and lengths from extended names */
1426     maxNameLength=calcExtNameSetsLengths(maxNameLength);
1427
1428     /* set sets and lengths from group names, set global maximum values */
1429     calcGroupNameSetsLengths(maxNameLength);
1430
1431     return TRUE;
1432 }
1433
1434 /* public API --------------------------------------------------------------- */
1435
1436 U_CAPI int32_t U_EXPORT2
1437 u_charName(UChar32 code, UCharNameChoice nameChoice,
1438            char *buffer, int32_t bufferLength,
1439            UErrorCode *pErrorCode) {
1440     AlgorithmicRange *algRange;
1441     uint32_t *p;
1442     uint32_t i;
1443     int32_t length;
1444
1445     /* check the argument values */
1446     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1447         return 0;
1448     } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT ||
1449               bufferLength<0 || (bufferLength>0 && buffer==NULL)
1450     ) {
1451         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1452         return 0;
1453     }
1454
1455     if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
1456         return u_terminateChars(buffer, bufferLength, 0, pErrorCode);
1457     }
1458
1459     length=0;
1460
1461     /* try algorithmic names first */
1462     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1463     i=*p;
1464     algRange=(AlgorithmicRange *)(p+1);
1465     while(i>0) {
1466         if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {
1467             length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
1468             break;
1469         }
1470         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
1471         --i;
1472     }
1473
1474     if(i==0) {
1475         if (nameChoice == U_EXTENDED_CHAR_NAME) {
1476             length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);
1477             if (!length) {
1478                 /* extended character name */
1479                 length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);
1480             }
1481         } else {
1482             /* normal character name */
1483             length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
1484         }
1485     }
1486
1487     return u_terminateChars(buffer, bufferLength, length, pErrorCode);
1488 }
1489
1490 U_CAPI int32_t U_EXPORT2
1491 u_getISOComment(UChar32 c,
1492                 char *dest, int32_t destCapacity,
1493                 UErrorCode *pErrorCode) {
1494     int32_t length;
1495
1496     /* check the argument values */
1497     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1498         return 0;
1499     } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
1500         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1501         return 0;
1502     }
1503
1504     if((uint32_t)c>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
1505         return u_terminateChars(dest, destCapacity, 0, pErrorCode);
1506     }
1507
1508     /* the ISO comment is stored like a normal character name */
1509     length=getName(uCharNames, (uint32_t)c, U_ISO_COMMENT, dest, (uint16_t)destCapacity);
1510     return u_terminateChars(dest, destCapacity, length, pErrorCode);
1511 }
1512
1513 U_CAPI UChar32 U_EXPORT2
1514 u_charFromName(UCharNameChoice nameChoice,
1515                const char *name,
1516                UErrorCode *pErrorCode) {
1517     char upper[120], lower[120];
1518     FindName findName;
1519     AlgorithmicRange *algRange;
1520     uint32_t *p;
1521     uint32_t i;
1522     UChar32 cp = 0;
1523     char c0;
1524     UChar32 error = 0xffff;     /* Undefined, but use this for backwards compatibility. */
1525
1526     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1527         return error;
1528     }
1529
1530     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) {
1531         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1532         return error;
1533     }
1534
1535     if(!isDataLoaded(pErrorCode)) {
1536         return error;
1537     }
1538
1539     /* construct the uppercase and lowercase of the name first */
1540     for(i=0; i<sizeof(upper); ++i) {
1541         if((c0=*name++)!=0) {
1542             upper[i]=uprv_toupper(c0);
1543             lower[i]=uprv_tolower(c0);
1544         } else {
1545             upper[i]=lower[i]=0;
1546             break;
1547         }
1548     }
1549     if(i==sizeof(upper)) {
1550         /* name too long, there is no such character */
1551         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1552         return error;
1553     }
1554
1555     /* try extended names first */
1556     if (lower[0] == '<') {
1557         if (nameChoice == U_EXTENDED_CHAR_NAME) {
1558             if (lower[--i] == '>') {
1559                 for (--i; lower[i] && lower[i] != '-'; --i) {
1560                 }
1561
1562                 if (lower[i] == '-') { /* We've got a category. */
1563                     uint32_t cIdx;
1564
1565                     lower[i] = 0;
1566
1567                     for (++i; lower[i] != '>'; ++i) {
1568                         if (lower[i] >= '0' && lower[i] <= '9') {
1569                             cp = (cp << 4) + lower[i] - '0';
1570                         } else if (lower[i] >= 'a' && lower[i] <= 'f') {
1571                             cp = (cp << 4) + lower[i] - 'a' + 10;
1572                         } else {
1573                             *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1574                             return error;
1575                         }
1576                     }
1577
1578                     /* Now validate the category name.
1579                        We could use a binary search, or a trie, if
1580                        we really wanted to. */
1581
1582                     for (lower[i] = 0, cIdx = 0; cIdx < LENGTHOF(charCatNames); ++cIdx) {
1583
1584                         if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
1585                             if (getCharCat(cp) == cIdx) {
1586                                 return cp;
1587                             }
1588                             break;
1589                         }
1590                     }
1591                 }
1592             }
1593         }
1594
1595         *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1596         return error;
1597     }
1598
1599     /* try algorithmic names now */
1600     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1601     i=*p;
1602     algRange=(AlgorithmicRange *)(p+1);
1603     while(i>0) {
1604         if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) {
1605             return cp;
1606         }
1607         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
1608         --i;
1609     }
1610
1611     /* normal character name */
1612     findName.otherName=upper;
1613     findName.code=error;
1614     enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice);
1615     if (findName.code == error) {
1616          *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1617     }
1618     return findName.code;
1619 }
1620
1621 U_CAPI void U_EXPORT2
1622 u_enumCharNames(UChar32 start, UChar32 limit,
1623                 UEnumCharNamesFn *fn,
1624                 void *context,
1625                 UCharNameChoice nameChoice,
1626                 UErrorCode *pErrorCode) {
1627     AlgorithmicRange *algRange;
1628     uint32_t *p;
1629     uint32_t i;
1630
1631     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1632         return;
1633     }
1634
1635     if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) {
1636         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1637         return;
1638     }
1639
1640     if((uint32_t) limit > UCHAR_MAX_VALUE + 1) {
1641         limit = UCHAR_MAX_VALUE + 1;
1642     }
1643     if((uint32_t)start>=(uint32_t)limit) {
1644         return;
1645     }
1646
1647     if(!isDataLoaded(pErrorCode)) {
1648         return;
1649     }
1650
1651     /* interleave the data-driven ones with the algorithmic ones */
1652     /* iterate over all algorithmic ranges; assume that they are in ascending order */
1653     p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1654     i=*p;
1655     algRange=(AlgorithmicRange *)(p+1);
1656     while(i>0) {
1657         /* enumerate the character names before the current algorithmic range */
1658         /* here: start<limit */
1659         if((uint32_t)start<algRange->start) {
1660             if((uint32_t)limit<=algRange->start) {
1661                 enumNames(uCharNames, start, limit, fn, context, nameChoice);
1662                 return;
1663             }
1664             if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {
1665                 return;
1666             }
1667             start=(UChar32)algRange->start;
1668         }
1669         /* enumerate the character names in the current algorithmic range */
1670         /* here: algRange->start<=start<limit */
1671         if((uint32_t)start<=algRange->end) {
1672             if((uint32_t)limit<=(algRange->end+1)) {
1673                 enumAlgNames(algRange, start, limit, fn, context, nameChoice);
1674                 return;
1675             }
1676             if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) {
1677                 return;
1678             }
1679             start=(UChar32)algRange->end+1;
1680         }
1681         /* continue to the next algorithmic range (here: start<limit) */
1682         algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
1683         --i;
1684     }
1685     /* enumerate the character names after the last algorithmic range */
1686     enumNames(uCharNames, start, limit, fn, context, nameChoice);
1687 }
1688
1689 U_CAPI int32_t U_EXPORT2
1690 uprv_getMaxCharNameLength() {
1691     UErrorCode errorCode=U_ZERO_ERROR;
1692     if(calcNameSetsLengths(&errorCode)) {
1693         return gMaxNameLength;
1694     } else {
1695         return 0;
1696     }
1697 }
1698
1699 #if 0
1700 /*
1701 Currently not used but left for future use. Probably by UnicodeSet.
1702 urename.h and uprops.h changed accordingly.
1703 */
1704 U_CAPI int32_t U_EXPORT2
1705 uprv_getMaxISOCommentLength() {
1706     UErrorCode errorCode=U_ZERO_ERROR;
1707     if(calcNameSetsLengths(&errorCode)) {
1708         return gMaxISOCommentLength;
1709     } else {
1710         return 0;
1711     }
1712 }
1713 #endif
1714
1715 /**
1716  * Converts the char set cset into a Unicode set uset.
1717  * @param cset Set of 256 bit flags corresponding to a set of chars.
1718  * @param uset USet to receive characters. Existing contents are deleted.
1719  */
1720 static void
1721 charSetToUSet(uint32_t cset[8], USetAdder *sa) {
1722     UChar us[256];
1723     char cs[256];
1724
1725     int32_t i, length;
1726     UErrorCode errorCode;
1727
1728     errorCode=U_ZERO_ERROR;
1729
1730     if(!calcNameSetsLengths(&errorCode)) {
1731         return;
1732     }
1733
1734     /* build a char string with all chars that are used in character names */
1735     length=0;
1736     for(i=0; i<256; ++i) {
1737         if(SET_CONTAINS(cset, i)) {
1738             cs[length++]=(char)i;
1739         }
1740     }
1741
1742     /* convert the char string to a UChar string */
1743     u_charsToUChars(cs, us, length);
1744
1745     /* add each UChar to the USet */
1746     for(i=0; i<length; ++i) {
1747         if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */
1748             sa->add(sa->set, us[i]);
1749         }
1750     }
1751 }
1752
1753 /**
1754  * Fills set with characters that are used in Unicode character names.
1755  * @param set USet to receive characters.
1756  */
1757 U_CAPI void U_EXPORT2
1758 uprv_getCharNameCharacters(USetAdder *sa) {
1759     charSetToUSet(gNameSet, sa);
1760 }
1761
1762 #if 0
1763 /*
1764 Currently not used but left for future use. Probably by UnicodeSet.
1765 urename.h and uprops.h changed accordingly.
1766 */
1767 /**
1768  * Fills set with characters that are used in Unicode character names.
1769  * @param set USetAdder to receive characters.
1770  */
1771 U_CAPI void U_EXPORT2
1772 uprv_getISOCommentCharacters(USetAdder *sa) {
1773     charSetToUSet(gISOCommentSet, sa);
1774 }
1775 #endif
1776
1777 /* data swapping ------------------------------------------------------------ */
1778
1779 /*
1780  * The token table contains non-negative entries for token bytes,
1781  * and -1 for bytes that represent themselves in the data file's charset.
1782  * -2 entries are used for lead bytes.
1783  *
1784  * Direct bytes (-1 entries) must be translated from the input charset family
1785  * to the output charset family.
1786  * makeTokenMap() writes a permutation mapping for this.
1787  * Use it once for single-/lead-byte tokens and once more for all trail byte
1788  * tokens. (';' is an unused trail byte marked with -1.)
1789  */
1790 static void
1791 makeTokenMap(const UDataSwapper *ds,
1792              int16_t tokens[], uint16_t tokenCount,
1793              uint8_t map[256],
1794              UErrorCode *pErrorCode) {
1795     UBool usedOutChar[256];
1796     uint16_t i, j;
1797     uint8_t c1, c2;
1798
1799     if(U_FAILURE(*pErrorCode)) {
1800         return;
1801     }
1802
1803     if(ds->inCharset==ds->outCharset) {
1804         /* Same charset family: identity permutation */
1805         for(i=0; i<256; ++i) {
1806             map[i]=(uint8_t)i;
1807         }
1808     } else {
1809         uprv_memset(map, 0, 256);
1810         uprv_memset(usedOutChar, 0, 256);
1811
1812         if(tokenCount>256) {
1813             tokenCount=256;
1814         }
1815
1816         /* set the direct bytes (byte 0 always maps to itself) */
1817         for(i=1; i<tokenCount; ++i) {
1818             if(tokens[i]==-1) {
1819                 /* convert the direct byte character */
1820                 c1=(uint8_t)i;
1821                 ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode);
1822                 if(U_FAILURE(*pErrorCode)) {
1823                     udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d) - %s\n",
1824                                      i, ds->inCharset, u_errorName(*pErrorCode));
1825                     return;
1826                 }
1827
1828                 /* enter the converted character into the map and mark it used */
1829                 map[c1]=c2;
1830                 usedOutChar[c2]=TRUE;
1831             }
1832         }
1833
1834         /* set the mappings for the rest of the permutation */
1835         for(i=j=1; i<tokenCount; ++i) {
1836             /* set mappings that were not set for direct bytes */
1837             if(map[i]==0) {
1838                 /* set an output byte value that was not used as an output byte above */
1839                 while(usedOutChar[j]) {
1840                     ++j;
1841                 }
1842                 map[i]=(uint8_t)j++;
1843             }
1844         }
1845
1846         /*
1847          * leave mappings at tokenCount and above unset if tokenCount<256
1848          * because they won't be used
1849          */
1850     }
1851 }
1852
1853 U_CAPI int32_t U_EXPORT2
1854 uchar_swapNames(const UDataSwapper *ds,
1855                 const void *inData, int32_t length, void *outData,
1856                 UErrorCode *pErrorCode) {
1857     const UDataInfo *pInfo;
1858     int32_t headerSize;
1859
1860     const uint8_t *inBytes;
1861     uint8_t *outBytes;
1862
1863     uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,
1864              offset, i, count, stringsCount;
1865
1866     const AlgorithmicRange *inRange;
1867     AlgorithmicRange *outRange;
1868
1869     /* udata_swapDataHeader checks the arguments */
1870     headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1871     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1872         return 0;
1873     }
1874
1875     /* check data format and format version */
1876     pInfo=(const UDataInfo *)((const char *)inData+4);
1877     if(!(
1878         pInfo->dataFormat[0]==0x75 &&   /* dataFormat="unam" */
1879         pInfo->dataFormat[1]==0x6e &&
1880         pInfo->dataFormat[2]==0x61 &&
1881         pInfo->dataFormat[3]==0x6d &&
1882         pInfo->formatVersion[0]==1
1883     )) {
1884         udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",
1885                          pInfo->dataFormat[0], pInfo->dataFormat[1],
1886                          pInfo->dataFormat[2], pInfo->dataFormat[3],
1887                          pInfo->formatVersion[0]);
1888         *pErrorCode=U_UNSUPPORTED_ERROR;
1889         return 0;
1890     }
1891
1892     inBytes=(const uint8_t *)inData+headerSize;
1893     outBytes=(uint8_t *)outData+headerSize;
1894     if(length<0) {
1895         algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]);
1896     } else {
1897         length-=headerSize;
1898         if( length<20 ||
1899             (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]))
1900         ) {
1901             udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n",
1902                              length);
1903             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1904             return 0;
1905         }
1906     }
1907
1908     if(length<0) {
1909         /* preflighting: iterate through algorithmic ranges */
1910         offset=algNamesOffset;
1911         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
1912         offset+=4;
1913
1914         for(i=0; i<count; ++i) {
1915             inRange=(const AlgorithmicRange *)(inBytes+offset);
1916             offset+=ds->readUInt16(inRange->size);
1917         }
1918     } else {
1919         /* swap data */
1920         const uint16_t *p;
1921         uint16_t *q, *temp;
1922
1923         int16_t tokens[512];
1924         uint16_t tokenCount;
1925
1926         uint8_t map[256], trailMap[256];
1927
1928         /* copy the data for inaccessible bytes */
1929         if(inBytes!=outBytes) {
1930             uprv_memcpy(outBytes, inBytes, length);
1931         }
1932
1933         /* the initial 4 offsets first */
1934         tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]);
1935         groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]);
1936         groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]);
1937         ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode);
1938
1939         /*
1940          * now the tokens table
1941          * it needs to be permutated along with the compressed name strings
1942          */
1943         p=(const uint16_t *)(inBytes+16);
1944         q=(uint16_t *)(outBytes+16);
1945
1946         /* read and swap the tokenCount */
1947         tokenCount=ds->readUInt16(*p);
1948         ds->swapArray16(ds, p, 2, q, pErrorCode);
1949         ++p;
1950         ++q;
1951
1952         /* read the first 512 tokens and make the token maps */
1953         if(tokenCount<=512) {
1954             count=tokenCount;
1955         } else {
1956             count=512;
1957         }
1958         for(i=0; i<count; ++i) {
1959             tokens[i]=udata_readInt16(ds, p[i]);
1960         }
1961         for(; i<512; ++i) {
1962             tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */
1963         }
1964         makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);
1965         makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode);
1966         if(U_FAILURE(*pErrorCode)) {
1967             return 0;
1968         }
1969
1970         /*
1971          * swap and permutate the tokens
1972          * go through a temporary array to support in-place swapping
1973          */
1974         temp=(uint16_t *)uprv_malloc(tokenCount*2);
1975         if(temp==NULL) {
1976             udata_printError(ds, "out of memory swapping %u unames.icu tokens\n",
1977                              tokenCount);
1978             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1979             return 0;
1980         }
1981
1982         /* swap and permutate single-/lead-byte tokens */
1983         for(i=0; i<tokenCount && i<256; ++i) {
1984             ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode);
1985         }
1986
1987         /* swap and permutate trail-byte tokens */
1988         for(; i<tokenCount; ++i) {
1989             ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode);
1990         }
1991
1992         /* copy the result into the output and free the temporary array */
1993         uprv_memcpy(q, temp, tokenCount*2);
1994         uprv_free(temp);
1995
1996         /*
1997          * swap the token strings but not a possible padding byte after
1998          * the terminating NUL of the last string
1999          */
2000         udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset),
2001                                     outBytes+tokenStringOffset, pErrorCode);
2002         if(U_FAILURE(*pErrorCode)) {
2003             udata_printError(ds, "uchar_swapNames(token strings) failed - %s\n",
2004                              u_errorName(*pErrorCode));
2005             return 0;
2006         }
2007
2008         /* swap the group table */
2009         count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset)));
2010         ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2),
2011                            outBytes+groupsOffset, pErrorCode);
2012
2013         /*
2014          * swap the group strings
2015          * swap the string bytes but not the nibble-encoded string lengths
2016          */
2017         if(ds->inCharset!=ds->outCharset) {
2018             uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1];
2019
2020             const uint8_t *inStrings, *nextInStrings;
2021             uint8_t *outStrings;
2022
2023             uint8_t c;
2024
2025             inStrings=inBytes+groupStringOffset;
2026             outStrings=outBytes+groupStringOffset;
2027
2028             stringsCount=algNamesOffset-groupStringOffset;
2029
2030             /* iterate through string groups until only a few padding bytes are left */
2031             while(stringsCount>32) {
2032                 nextInStrings=expandGroupLengths(inStrings, offsets, lengths);
2033
2034                 /* move past the length bytes */
2035                 stringsCount-=(uint32_t)(nextInStrings-inStrings);
2036                 outStrings+=nextInStrings-inStrings;
2037                 inStrings=nextInStrings;
2038
2039                 count=offsets[31]+lengths[31]; /* total number of string bytes in this group */
2040                 stringsCount-=count;
2041
2042                 /* swap the string bytes using map[] and trailMap[] */
2043                 while(count>0) {
2044                     c=*inStrings++;
2045                     *outStrings++=map[c];
2046                     if(tokens[c]!=-2) {
2047                         --count;
2048                     } else {
2049                         /* token lead byte: swap the trail byte, too */
2050                         *outStrings++=trailMap[*inStrings++];
2051                         count-=2;
2052                     }
2053                 }
2054             }
2055         }
2056
2057         /* swap the algorithmic ranges */
2058         offset=algNamesOffset;
2059         count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
2060         ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode);
2061         offset+=4;
2062
2063         for(i=0; i<count; ++i) {
2064             if(offset>(uint32_t)length) {
2065                 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",
2066                                  length, i);
2067                 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
2068                 return 0;
2069             }
2070
2071             inRange=(const AlgorithmicRange *)(inBytes+offset);
2072             outRange=(AlgorithmicRange *)(outBytes+offset);
2073             offset+=ds->readUInt16(inRange->size);
2074
2075             ds->swapArray32(ds, inRange, 8, outRange, pErrorCode);
2076             ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode);
2077             switch(inRange->type) {
2078             case 0:
2079                 /* swap prefix string */
2080                 ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)),
2081                                     outRange+1, pErrorCode);
2082                 if(U_FAILURE(*pErrorCode)) {
2083                     udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed - %s\n",
2084                                      i, u_errorName(*pErrorCode));
2085                     return 0;
2086                 }
2087                 break;
2088             case 1:
2089                 {
2090                     /* swap factors and the prefix and factor strings */
2091                     uint16_t factors[8];
2092                     uint32_t j, factorsCount;
2093
2094                     factorsCount=inRange->variant;
2095                     if(factorsCount==0 || factorsCount>LENGTHOF(factors)) {
2096                         udata_printError(ds, "uchar_swapNames(): too many factors (%u) in algorithmic range %u\n",
2097                                          factorsCount, i);
2098                         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
2099                         return 0;
2100                     }
2101
2102                     /* read and swap the factors */
2103                     p=(const uint16_t *)(inRange+1);
2104                     q=(uint16_t *)(outRange+1);
2105                     for(j=0; j<factorsCount; ++j) {
2106                         factors[j]=ds->readUInt16(p[j]);
2107                     }
2108                     ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode);
2109
2110                     /* swap the strings, up to the last terminating NUL */
2111                     p+=factorsCount;
2112                     q+=factorsCount;
2113                     stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p);
2114                     while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) {
2115                         --stringsCount;
2116                     }
2117                     ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode);
2118                 }
2119                 break;
2120             default:
2121                 udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n",
2122                                  inRange->type, i);
2123                 *pErrorCode=U_UNSUPPORTED_ERROR;
2124                 return 0;
2125             }
2126         }
2127     }
2128
2129     return headerSize+(int32_t)offset;
2130 }
2131
2132 /*
2133  * Hey, Emacs, please set the following:
2134  *
2135  * Local Variables:
2136  * indent-tabs-mode: nil
2137  * End:
2138  *
2139  */