]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/unames.c
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / common / unames.c
CommitLineData
b75a7d8f
A
1/*
2******************************************************************************
3*
374ca955 4* Copyright (C) 1999-2004, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7******************************************************************************
8* file name: unames.c
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 1999oct04
14* created by: Markus W. Scherer
15*/
16
b75a7d8f 17#include "unicode/utypes.h"
374ca955 18#include "unicode/putil.h"
b75a7d8f
A
19#include "unicode/uchar.h"
20#include "unicode/udata.h"
b75a7d8f
A
21#include "ustr_imp.h"
22#include "umutex.h"
23#include "cmemory.h"
24#include "cstring.h"
25#include "ucln_cmn.h"
374ca955 26#include "udataswp.h"
b75a7d8f
A
27#include "uprops.h"
28
29/* prototypes ------------------------------------------------------------- */
30
31#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
32
33static const char DATA_NAME[] = "unames";
34static const char DATA_TYPE[] = "icu";
35
36#define GROUP_SHIFT 5
37#define LINES_PER_GROUP (1UL<<GROUP_SHIFT)
38#define GROUP_MASK (LINES_PER_GROUP-1)
39
40typedef struct {
41 uint16_t groupMSB,
42 offsetHigh, offsetLow; /* avoid padding */
43} Group;
44
45typedef struct {
46 uint32_t start, end;
47 uint8_t type, variant;
48 uint16_t size;
49} AlgorithmicRange;
50
51typedef struct {
52 uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;
53} UCharNames;
54
55typedef struct {
56 const char *otherName;
57 UChar32 code;
58} FindName;
59
60#define DO_FIND_NAME NULL
61
62static UDataMemory *uCharNamesData=NULL;
63static UCharNames *uCharNames=NULL;
64static UErrorCode gLoadErrorCode=U_ZERO_ERROR;
65
66/*
67 * Maximum length of character names (regular & 1.0).
68 * Maximum length of ISO comments.
69 */
70static int32_t gMaxNameLength=0, gMaxISOCommentLength=0;
71
72/*
73 * Set of chars used in character names (regular & 1.0).
74 * Set of chars used in ISO comments.
75 * Chars are platform-dependent (can be EBCDIC).
76 */
77static uint32_t gNameSet[8]={ 0 }, gISOCommentSet[8]={ 0 };
78
b75a7d8f
A
79#define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT
80#define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1
81#define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2
82
83#define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)
84
85static const char * const
86charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT];
87
374ca955 88/* implementation ----------------------------------------------------------- */
b75a7d8f 89
374ca955
A
90static UBool U_CALLCONV unames_cleanup(void)
91{
92 if(uCharNamesData) {
93 udata_close(uCharNamesData);
94 uCharNamesData = NULL;
95 }
96 if(uCharNames) {
97 uCharNames = NULL;
98 }
99 gMaxNameLength=0;
100 return TRUE;
101}
b75a7d8f 102
374ca955
A
103static UBool U_CALLCONV
104isAcceptable(void *context,
105 const char *type, const char *name,
106 const UDataInfo *pInfo) {
107 return (UBool)(
108 pInfo->size>=20 &&
109 pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
110 pInfo->charsetFamily==U_CHARSET_FAMILY &&
111 pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */
112 pInfo->dataFormat[1]==0x6e &&
113 pInfo->dataFormat[2]==0x61 &&
114 pInfo->dataFormat[3]==0x6d &&
115 pInfo->formatVersion[0]==1);
116}
b75a7d8f 117
374ca955
A
118static UBool
119isDataLoaded(UErrorCode *pErrorCode) {
120 /* load UCharNames from file if necessary */
121 UBool isCached;
b75a7d8f 122
374ca955
A
123 /* do this because double-checked locking is broken */
124 umtx_lock(NULL);
125 isCached=uCharNames!=NULL;
126 umtx_unlock(NULL);
b75a7d8f 127
374ca955
A
128 if(!isCached) {
129 UCharNames *names;
130 UDataMemory *data;
b75a7d8f 131
374ca955
A
132 /* check error code from previous attempt */
133 if(U_FAILURE(gLoadErrorCode)) {
134 *pErrorCode=gLoadErrorCode;
135 return FALSE;
b75a7d8f 136 }
b75a7d8f 137
374ca955
A
138 /* open the data outside the mutex block */
139 data=udata_openChoice(NULL, DATA_TYPE, DATA_NAME, isAcceptable, NULL, pErrorCode);
140 if(U_FAILURE(*pErrorCode)) {
141 gLoadErrorCode=*pErrorCode;
142 return FALSE;
b75a7d8f 143 }
b75a7d8f 144
374ca955 145 names=(UCharNames *)udata_getMemory(data);
b75a7d8f 146
374ca955
A
147 /* in the mutex block, set the data for this process */
148 {
149 umtx_lock(NULL);
150 if(uCharNames==NULL) {
151 uCharNames=names;
152 uCharNamesData=data;
153 data=NULL;
154 names=NULL;
155 ucln_common_registerCleanup(UCLN_COMMON_UNAMES, unames_cleanup);
156 }
157 umtx_unlock(NULL);
158 }
b75a7d8f 159
374ca955
A
160 /* if a different thread set it first, then close the extra data */
161 if(data!=NULL) {
162 udata_close(data); /* NULL if it was set correctly */
163 }
b75a7d8f 164 }
374ca955
A
165 return TRUE;
166}
b75a7d8f 167
374ca955
A
168#define WRITE_CHAR(buffer, bufferLength, bufferPos, c) { \
169 if((bufferLength)>0) { \
170 *(buffer)++=c; \
171 --(bufferLength); \
172 } \
173 ++(bufferPos); \
b75a7d8f
A
174}
175
374ca955 176#define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT
b75a7d8f 177
374ca955
A
178/*
179 * Important: expandName() and compareName() are almost the same -
180 * apply fixes to both.
181 *
182 * UnicodeData.txt uses ';' as a field separator, so no
183 * field can contain ';' as part of its contents.
184 * In unames.dat, it is marked as token[';']==-1 only if the
185 * semicolon is used in the data file - which is iff we
186 * have Unicode 1.0 names or ISO comments.
187 * So, it will be token[';']==-1 if we store U1.0 names/ISO comments
188 * although we know that it will never be part of a name.
189 */
190static uint16_t
191expandName(UCharNames *names,
192 const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
193 char *buffer, uint16_t bufferLength) {
194 uint16_t *tokens=(uint16_t *)names+8;
195 uint16_t token, tokenCount=*tokens++, bufferPos=0;
196 uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
197 uint8_t c;
198
199 if(nameChoice==U_UNICODE_10_CHAR_NAME || nameChoice==U_ISO_COMMENT) {
200 /*
201 * skip the modern name if it is not requested _and_
202 * if the semicolon byte value is a character, not a token number
203 */
204 if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
205 while(nameLength>0) {
206 --nameLength;
207 if(*name++==';') {
208 break;
209 }
210 }
211 if(nameChoice==U_ISO_COMMENT) {
212 /* skip the Unicode 1.0 name as well to get the ISO comment */
213 while(nameLength>0) {
214 --nameLength;
215 if(*name++==';') {
216 break;
217 }
218 }
219 }
220 } else {
221 /*
222 * the semicolon byte value is a token number, therefore
223 * only modern names are stored in unames.dat and there is no
224 * such requested Unicode 1.0 name here
225 */
226 nameLength=0;
227 }
b75a7d8f
A
228 }
229
230 /* write each letter directly, and write a token word per token */
231 while(nameLength>0) {
232 --nameLength;
233 c=*name++;
234
235 if(c>=tokenCount) {
236 if(c!=';') {
237 /* implicit letter */
238 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
239 } else {
240 /* finished */
241 break;
242 }
243 } else {
244 token=tokens[c];
245 if(token==(uint16_t)(-2)) {
246 /* this is a lead byte for a double-byte token */
247 token=tokens[c<<8|*name++];
248 --nameLength;
249 }
250 if(token==(uint16_t)(-1)) {
251 if(c!=';') {
252 /* explicit letter */
253 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
254 } else {
255 /* stop, but skip the semicolon if we are seeking
256 extended names and there was no 2.0 name but there
257 is a 1.0 name. */
258 if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) {
259 if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
260 continue;
261 }
262 }
263 /* finished */
264 break;
265 }
266 } else {
267 /* write token word */
268 uint8_t *tokenString=tokenStrings+token;
269 while((c=*tokenString++)!=0) {
270 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
271 }
272 }
273 }
274 }
275
276 /* zero-terminate */
277 if(bufferLength>0) {
278 *buffer=0;
279 }
280
281 return bufferPos;
282}
283
284/*
285 * compareName() is almost the same as expandName() except that it compares
286 * the currently expanded name to an input name.
287 * It returns the match/no match result as soon as possible.
288 */
289static UBool
290compareName(UCharNames *names,
291 const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
292 const char *otherName) {
293 uint16_t *tokens=(uint16_t *)names+8;
294 uint16_t token, tokenCount=*tokens++;
295 uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
296 uint8_t c;
297 const char *origOtherName = otherName;
298
299 if(nameChoice==U_UNICODE_10_CHAR_NAME) {
300 /*
301 * skip the modern name if it is not requested _and_
302 * if the semicolon byte value is a character, not a token number
303 */
304 if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
305 while(nameLength>0) {
306 --nameLength;
307 if(*name++==';') {
308 break;
309 }
310 }
311 } else {
312 /*
313 * the semicolon byte value is a token number, therefore
314 * only modern names are stored in unames.dat and there is no
315 * such requested Unicode 1.0 name here
316 */
317 nameLength=0;
318 }
319 }
320
321 /* compare each letter directly, and compare a token word per token */
322 while(nameLength>0) {
323 --nameLength;
324 c=*name++;
325
326 if(c>=tokenCount) {
327 if(c!=';') {
328 /* implicit letter */
329 if((char)c!=*otherName++) {
330 return FALSE;
331 }
332 } else {
333 /* finished */
334 break;
335 }
336 } else {
337 token=tokens[c];
338 if(token==(uint16_t)(-2)) {
339 /* this is a lead byte for a double-byte token */
340 token=tokens[c<<8|*name++];
341 --nameLength;
342 }
343 if(token==(uint16_t)(-1)) {
344 if(c!=';') {
345 /* explicit letter */
346 if((char)c!=*otherName++) {
347 return FALSE;
348 }
349 } else {
350 /* stop, but skip the semicolon if we are seeking
351 extended names and there was no 2.0 name but there
352 is a 1.0 name. */
353 if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) {
354 if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
355 continue;
356 }
357 }
358 /* finished */
359 break;
360 }
361 } else {
362 /* write token word */
363 uint8_t *tokenString=tokenStrings+token;
364 while((c=*tokenString++)!=0) {
365 if((char)c!=*otherName++) {
366 return FALSE;
367 }
368 }
369 }
370 }
371 }
374ca955
A
372
373 /* complete match? */
374 return (UBool)(*otherName==0);
375}
376
377static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {
378 "unassigned",
379 "uppercase letter",
380 "lowercase letter",
381 "titlecase letter",
382 "modifier letter",
383 "other letter",
384 "non spacing mark",
385 "enclosing mark",
386 "combining spacing mark",
387 "decimal digit number",
388 "letter number",
389 "other number",
390 "space separator",
391 "line separator",
392 "paragraph separator",
393 "control",
394 "format",
395 "private use area",
396 "surrogate",
397 "dash punctuation",
398 "start punctuation",
399 "end punctuation",
400 "connector punctuation",
401 "other punctuation",
402 "math symbol",
403 "currency symbol",
404 "modifier symbol",
405 "other symbol",
406 "initial punctuation",
407 "final punctuation",
408 "noncharacter",
409 "lead surrogate",
410 "trail surrogate"
411};
412
413static uint8_t getCharCat(UChar32 cp) {
414 uint8_t cat;
415
416 if (UTF_IS_UNICODE_NONCHAR(cp)) {
417 return U_NONCHARACTER_CODE_POINT;
418 }
419
420 if ((cat = u_charType(cp)) == U_SURROGATE) {
421 cat = UTF_IS_LEAD(cp) ? U_LEAD_SURROGATE : U_TRAIL_SURROGATE;
422 }
423
424 return cat;
425}
426
427static const char *getCharCatName(UChar32 cp) {
428 uint8_t cat = getCharCat(cp);
429
430 /* Return unknown if the table of names above is not up to
431 date. */
432
433 if (cat >= LENGTHOF(charCatNames)) {
434 return "unknown";
435 } else {
436 return charCatNames[cat];
437 }
438}
439
440static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
441 const char *catname = getCharCatName(code);
442 uint16_t length = 0;
443
444 UChar32 cp;
445 int ndigits, i;
446
447 WRITE_CHAR(buffer, bufferLength, length, '<');
448 while (catname[length - 1]) {
449 WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);
450 }
451 WRITE_CHAR(buffer, bufferLength, length, '-');
452 for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)
453 ;
454 if (ndigits < 4)
455 ndigits = 4;
456 for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) {
457 uint8_t v = (uint8_t)(cp & 0xf);
458 buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
459 }
460 buffer += ndigits;
461 length += ndigits;
462 WRITE_CHAR(buffer, bufferLength, length, '>');
463
464 return length;
465}
466
467/*
468 * getGroup() does a binary search for the group that contains the
469 * Unicode code point "code".
470 * The return value is always a valid Group* that may contain "code"
471 * or else is the highest group before "code".
472 * If the lowest group is after "code", then that one is returned.
473 */
474static Group *
475getGroup(UCharNames *names, uint32_t code) {
476 uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
477 start=0,
478 limit=*(uint16_t *)((char *)names+names->groupsOffset),
479 number;
480 Group *groups=(Group *)((char *)names+names->groupsOffset+2);
481
482 /* binary search for the group of names that contains the one for code */
483 while(start<limit-1) {
484 number=(uint16_t)((start+limit)/2);
485 if(groupMSB<groups[number].groupMSB) {
486 limit=number;
487 } else {
488 start=number;
489 }
490 }
491
492 /* return this regardless of whether it is an exact match */
493 return groups+start;
494}
495
496/*
497 * expandGroupLengths() reads a block of compressed lengths of 32 strings and
498 * expands them into offsets and lengths for each string.
499 * Lengths are stored with a variable-width encoding in consecutive nibbles:
500 * If a nibble<0xc, then it is the length itself (0=empty string).
501 * If a nibble>=0xc, then it forms a length value with the following nibble.
502 * Calculation see below.
503 * The offsets and lengths arrays must be at least 33 (one more) long because
504 * there is no check here at the end if the last nibble is still used.
505 */
506static const uint8_t *
507expandGroupLengths(const uint8_t *s,
508 uint16_t offsets[LINES_PER_GROUP+1], uint16_t lengths[LINES_PER_GROUP+1]) {
509 /* read the lengths of the 32 strings in this group and get each string's offset */
510 uint16_t i=0, offset=0, length=0;
511 uint8_t lengthByte;
512
513 /* all 32 lengths must be read to get the offset of the first group string */
514 while(i<LINES_PER_GROUP) {
515 lengthByte=*s++;
516
517 /* read even nibble - MSBs of lengthByte */
518 if(length>=12) {
519 /* double-nibble length spread across two bytes */
520 length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12);
521 lengthByte&=0xf;
522 } else if((lengthByte /* &0xf0 */)>=0xc0) {
523 /* double-nibble length spread across this one byte */
524 length=(uint16_t)((lengthByte&0x3f)+12);
525 } else {
526 /* single-nibble length in MSBs */
527 length=(uint16_t)(lengthByte>>4);
528 lengthByte&=0xf;
529 }
530
531 *offsets++=offset;
532 *lengths++=length;
533
534 offset+=length;
535 ++i;
536
537 /* read odd nibble - LSBs of lengthByte */
538 if((lengthByte&0xf0)==0) {
539 /* this nibble was not consumed for a double-nibble length above */
540 length=lengthByte;
541 if(length<12) {
542 /* single-nibble length in LSBs */
543 *offsets++=offset;
544 *lengths++=length;
545
546 offset+=length;
547 ++i;
548 }
549 } else {
550 length=0; /* prevent double-nibble detection in the next iteration */
551 }
552 }
553
554 /* now, s is at the first group string */
555 return s;
556}
557
558static uint16_t
559expandGroupName(UCharNames *names, Group *group,
560 uint16_t lineNumber, UCharNameChoice nameChoice,
561 char *buffer, uint16_t bufferLength) {
562 uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
563 const uint8_t *s=(uint8_t *)names+names->groupStringOffset+
564 (group->offsetHigh<<16|group->offsetLow);
565 s=expandGroupLengths(s, offsets, lengths);
566 return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,
567 buffer, bufferLength);
568}
569
570static uint16_t
571getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice,
572 char *buffer, uint16_t bufferLength) {
573 Group *group=getGroup(names, code);
574 if((uint16_t)(code>>GROUP_SHIFT)==group->groupMSB) {
575 return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,
576 buffer, bufferLength);
577 } else {
578 /* group not found */
579 /* zero-terminate */
580 if(bufferLength>0) {
581 *buffer=0;
582 }
583 return 0;
584 }
b75a7d8f
A
585}
586
587/*
588 * enumGroupNames() enumerates all the names in a 32-group
589 * and either calls the enumerator function or finds a given input name.
590 */
591static UBool
592enumGroupNames(UCharNames *names, Group *group,
593 UChar32 start, UChar32 end,
594 UEnumCharNamesFn *fn, void *context,
595 UCharNameChoice nameChoice) {
596 uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
597 const uint8_t *s=(uint8_t *)names+names->groupStringOffset+
598 (group->offsetHigh<<16|group->offsetLow);
599
600 s=expandGroupLengths(s, offsets, lengths);
601 if(fn!=DO_FIND_NAME) {
602 char buffer[200];
603 uint16_t length;
604
605 while(start<=end) {
606 length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer));
607 if (!length && nameChoice == U_EXTENDED_CHAR_NAME) {
608 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
609 }
610 /* here, we assume that the buffer is large enough */
611 if(length>0) {
612 if(!fn(context, start, nameChoice, buffer, length)) {
613 return FALSE;
614 }
615 }
616 ++start;
617 }
618 } else {
619 const char *otherName=((FindName *)context)->otherName;
620 while(start<=end) {
621 if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) {
622 ((FindName *)context)->code=start;
623 return FALSE;
624 }
625 ++start;
626 }
627 }
628 return TRUE;
629}
630
631/*
632 * enumExtNames enumerate extended names.
633 * It only needs to do it if it is called with a real function and not
634 * with the dummy DO_FIND_NAME, because u_charFromName() does a check
635 * for extended names by itself.
636 */
637static UBool
638enumExtNames(UChar32 start, UChar32 end,
639 UEnumCharNamesFn *fn, void *context)
640{
641 if(fn!=DO_FIND_NAME) {
642 char buffer[200];
643 uint16_t length;
644
645 while(start<=end) {
646 buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
647 /* here, we assume that the buffer is large enough */
648 if(length>0) {
649 if(!fn(context, start, U_EXTENDED_CHAR_NAME, buffer, length)) {
650 return FALSE;
651 }
652 }
653 ++start;
654 }
655 }
656
657 return TRUE;
658}
659
660static UBool
661enumNames(UCharNames *names,
662 UChar32 start, UChar32 limit,
663 UEnumCharNamesFn *fn, void *context,
664 UCharNameChoice nameChoice) {
665 uint16_t startGroupMSB, endGroupMSB, groupCount;
666 Group *group, *groupLimit;
667
668 startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);
669 endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT);
670
671 /* find the group that contains start, or the highest before it */
672 group=getGroup(names, start);
673
674 if(startGroupMSB==endGroupMSB) {
675 if(startGroupMSB==group->groupMSB) {
676 /* if start and limit-1 are in the same group, then enumerate only in that one */
677 return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice);
678 }
679 } else {
680 groupCount=*(uint16_t *)((char *)names+names->groupsOffset);
681 groupLimit=(Group *)((char *)names+names->groupsOffset+2)+groupCount;
682
683 if(startGroupMSB==group->groupMSB) {
684 /* enumerate characters in the partial start group */
685 if((start&GROUP_MASK)!=0) {
686 if(!enumGroupNames(names, group,
687 start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1,
688 fn, context, nameChoice)) {
689 return FALSE;
690 }
691 ++group; /* continue with the next group */
692 }
693 } else if(startGroupMSB>group->groupMSB) {
694 /* make sure that we start enumerating with the first group after start */
695 if (group + 1 < groupLimit && (group + 1)->groupMSB > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) {
696 UChar32 end = (group + 1)->groupMSB << GROUP_SHIFT;
697 if (end > limit) {
698 end = limit;
699 }
700 if (!enumExtNames(start, end - 1, fn, context)) {
701 return FALSE;
702 }
703 }
704 ++group;
705 }
706
707 /* enumerate entire groups between the start- and end-groups */
708 while(group<groupLimit && group->groupMSB<endGroupMSB) {
709 start=(UChar32)group->groupMSB<<GROUP_SHIFT;
710 if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) {
711 return FALSE;
712 }
713 if (group + 1 < groupLimit && (group + 1)->groupMSB > group->groupMSB + 1 && nameChoice == U_EXTENDED_CHAR_NAME) {
714 UChar32 end = (group + 1)->groupMSB << GROUP_SHIFT;
715 if (end > limit) {
716 end = limit;
717 }
718 if (!enumExtNames((group->groupMSB + 1) << GROUP_SHIFT, end - 1, fn, context)) {
719 return FALSE;
720 }
721 }
722 ++group;
723 }
724
725 /* enumerate within the end group (group->groupMSB==endGroupMSB) */
726 if(group<groupLimit && group->groupMSB==endGroupMSB) {
727 return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice);
728 } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) {
729 UChar32 next = ((group - 1)->groupMSB + 1) << GROUP_SHIFT;
730 if (next > start) {
731 start = next;
732 }
733 } else {
734 return TRUE;
735 }
736 }
737
738 /* we have not found a group, which means everything is made of
739 extended names. */
740 if (nameChoice == U_EXTENDED_CHAR_NAME) {
741 if (limit > UCHAR_MAX_VALUE + 1) {
742 limit = UCHAR_MAX_VALUE + 1;
743 }
744 return enumExtNames(start, limit - 1, fn, context);
745 }
746
747 return TRUE;
748}
749
374ca955
A
750static uint16_t
751writeFactorSuffix(const uint16_t *factors, uint16_t count,
752 const char *s, /* suffix elements */
753 uint32_t code,
754 uint16_t indexes[8], /* output fields from here */
755 const char *elementBases[8], const char *elements[8],
756 char *buffer, uint16_t bufferLength) {
757 uint16_t i, factor, bufferPos=0;
758 char c;
759
760 /* write elements according to the factors */
761
762 /*
763 * the factorized elements are determined by modulo arithmetic
764 * with the factors of this algorithm
765 *
766 * note that for fewer operations, count is decremented here
767 */
768 --count;
769 for(i=count; i>0; --i) {
770 factor=factors[i];
771 indexes[i]=(uint16_t)(code%factor);
772 code/=factor;
773 }
774 /*
775 * we don't need to calculate the last modulus because start<=code<=end
776 * guarantees here that code<=factors[0]
777 */
778 indexes[0]=(uint16_t)code;
779
780 /* write each element */
781 for(;;) {
782 if(elementBases!=NULL) {
783 *elementBases++=s;
784 }
785
786 /* skip indexes[i] strings */
787 factor=indexes[i];
788 while(factor>0) {
789 while(*s++!=0) {}
790 --factor;
791 }
792 if(elements!=NULL) {
793 *elements++=s;
794 }
795
796 /* write element */
797 while((c=*s++)!=0) {
798 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
799 }
800
801 /* we do not need to perform the rest of this loop for i==count - break here */
802 if(i>=count) {
803 break;
804 }
805
806 /* skip the rest of the strings for this factors[i] */
807 factor=(uint16_t)(factors[i]-indexes[i]-1);
808 while(factor>0) {
809 while(*s++!=0) {}
810 --factor;
811 }
812
813 ++i;
814 }
815
816 /* zero-terminate */
817 if(bufferLength>0) {
818 *buffer=0;
819 }
820
821 return bufferPos;
822}
823
b75a7d8f
A
824/*
825 * Important:
826 * Parts of findAlgName() are almost the same as some of getAlgName().
827 * Fixes must be applied to both.
828 */
829static uint16_t
830getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice,
831 char *buffer, uint16_t bufferLength) {
832 uint16_t bufferPos=0;
833
834 /*
835 * Do not write algorithmic Unicode 1.0 names because
836 * Unihan names are the same as the modern ones,
837 * extension A was only introduced with Unicode 3.0, and
838 * the Hangul syllable block was moved and changed around Unicode 1.1.5.
839 */
840 if(nameChoice==U_UNICODE_10_CHAR_NAME) {
841 /* zero-terminate */
842 if(bufferLength>0) {
843 *buffer=0;
844 }
845 return 0;
846 }
847
848 switch(range->type) {
849 case 0: {
850 /* name = prefix hex-digits */
851 const char *s=(const char *)(range+1);
852 char c;
853
854 uint16_t i, count;
855
856 /* copy prefix */
857 while((c=*s++)!=0) {
858 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
859 }
860
861 /* write hexadecimal code point value */
862 count=range->variant;
863
864 /* zero-terminate */
865 if(count<bufferLength) {
866 buffer[count]=0;
867 }
868
869 for(i=count; i>0;) {
870 if(--i<bufferLength) {
871 c=(char)(code&0xf);
872 if(c<10) {
873 c+='0';
874 } else {
875 c+='A'-10;
876 }
877 buffer[i]=c;
878 }
879 code>>=4;
880 }
881
882 bufferPos+=count;
883 break;
884 }
885 case 1: {
886 /* name = prefix factorized-elements */
887 uint16_t indexes[8];
888 const uint16_t *factors=(const uint16_t *)(range+1);
889 uint16_t count=range->variant;
890 const char *s=(const char *)(factors+count);
891 char c;
892
893 /* copy prefix */
894 while((c=*s++)!=0) {
895 WRITE_CHAR(buffer, bufferLength, bufferPos, c);
896 }
897
898 bufferPos+=writeFactorSuffix(factors, count,
899 s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);
900 break;
901 }
902 default:
903 /* undefined type */
904 /* zero-terminate */
905 if(bufferLength>0) {
906 *buffer=0;
907 }
908 break;
909 }
910
911 return bufferPos;
912}
913
b75a7d8f
A
914/*
915 * Important: enumAlgNames() and findAlgName() are almost the same.
916 * Any fix must be applied to both.
917 */
918static UBool
919enumAlgNames(AlgorithmicRange *range,
920 UChar32 start, UChar32 limit,
921 UEnumCharNamesFn *fn, void *context,
922 UCharNameChoice nameChoice) {
923 char buffer[200];
924 uint16_t length;
925
926 if(nameChoice==U_UNICODE_10_CHAR_NAME) {
927 return TRUE;
928 }
929
930 switch(range->type) {
931 case 0: {
932 char *s, *end;
933 char c;
934
935 /* get the full name of the start character */
936 length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer));
937 if(length<=0) {
938 return TRUE;
939 }
940
941 /* call the enumerator function with this first character */
942 if(!fn(context, start, nameChoice, buffer, length)) {
943 return FALSE;
944 }
945
946 /* go to the end of the name; all these names have the same length */
947 end=buffer;
948 while(*end!=0) {
949 ++end;
950 }
951
952 /* enumerate the rest of the names */
953 while(++start<limit) {
954 /* increment the hexadecimal number on a character-basis */
955 s=end;
956 for (;;) {
957 c=*--s;
958 if(('0'<=c && c<'9') || ('A'<=c && c<'F')) {
959 *s=(char)(c+1);
960 break;
961 } else if(c=='9') {
962 *s='A';
963 break;
964 } else if(c=='F') {
965 *s='0';
966 }
967 }
968
969 if(!fn(context, start, nameChoice, buffer, length)) {
970 return FALSE;
971 }
972 }
973 break;
974 }
975 case 1: {
976 uint16_t indexes[8];
977 const char *elementBases[8], *elements[8];
978 const uint16_t *factors=(const uint16_t *)(range+1);
979 uint16_t count=range->variant;
980 const char *s=(const char *)(factors+count);
981 char *suffix, *t;
982 uint16_t prefixLength, i, index;
983
984 char c;
985
986 /* name = prefix factorized-elements */
987
988 /* copy prefix */
989 suffix=buffer;
990 prefixLength=0;
991 while((c=*s++)!=0) {
992 *suffix++=c;
993 ++prefixLength;
994 }
995
996 /* append the suffix of the start character */
997 length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count,
998 s, (uint32_t)start-range->start,
999 indexes, elementBases, elements,
1000 suffix, (uint16_t)(sizeof(buffer)-prefixLength)));
1001
1002 /* call the enumerator function with this first character */
1003 if(!fn(context, start, nameChoice, buffer, length)) {
1004 return FALSE;
1005 }
1006
1007 /* enumerate the rest of the names */
1008 while(++start<limit) {
1009 /* increment the indexes in lexical order bound by the factors */
1010 i=count;
1011 for (;;) {
1012 index=(uint16_t)(indexes[--i]+1);
1013 if(index<factors[i]) {
1014 /* skip one index and its element string */
1015 indexes[i]=index;
1016 s=elements[i];
1017 while(*s++!=0) {
1018 }
1019 elements[i]=s;
1020 break;
1021 } else {
1022 /* reset this index to 0 and its element string to the first one */
1023 indexes[i]=0;
1024 elements[i]=elementBases[i];
1025 }
1026 }
1027
1028 /* to make matters a little easier, just append all elements to the suffix */
1029 t=suffix;
1030 length=prefixLength;
1031 for(i=0; i<count; ++i) {
1032 s=elements[i];
1033 while((c=*s++)!=0) {
1034 *t++=c;
1035 ++length;
1036 }
1037 }
1038 /* zero-terminate */
1039 *t=0;
1040
1041 if(!fn(context, start, nameChoice, buffer, length)) {
1042 return FALSE;
1043 }
1044 }
1045 break;
1046 }
1047 default:
1048 /* undefined type */
1049 break;
1050 }
1051
1052 return TRUE;
1053}
1054
1055/*
1056 * findAlgName() is almost the same as enumAlgNames() except that it
1057 * returns the code point for a name if it fits into the range.
1058 * It returns 0xffff otherwise.
1059 */
1060static UChar32
1061findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) {
1062 UChar32 code;
1063
1064 if(nameChoice==U_UNICODE_10_CHAR_NAME) {
1065 return 0xffff;
1066 }
1067
1068 switch(range->type) {
1069 case 0: {
1070 /* name = prefix hex-digits */
1071 const char *s=(const char *)(range+1);
1072 char c;
1073
1074 uint16_t i, count;
1075
1076 /* compare prefix */
1077 while((c=*s++)!=0) {
1078 if((char)c!=*otherName++) {
1079 return 0xffff;
1080 }
1081 }
1082
1083 /* read hexadecimal code point value */
1084 count=range->variant;
1085 code=0;
1086 for(i=0; i<count; ++i) {
1087 c=*otherName++;
1088 if('0'<=c && c<='9') {
1089 code=(code<<4)|(c-'0');
1090 } else if('A'<=c && c<='F') {
1091 code=(code<<4)|(c-'A'+10);
1092 } else {
1093 return 0xffff;
1094 }
1095 }
1096
1097 /* does it fit into the range? */
1098 if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) {
1099 return code;
1100 }
1101 break;
1102 }
1103 case 1: {
1104 char buffer[64];
1105 uint16_t indexes[8];
1106 const char *elementBases[8], *elements[8];
1107 const uint16_t *factors=(const uint16_t *)(range+1);
1108 uint16_t count=range->variant;
1109 const char *s=(const char *)(factors+count), *t;
1110 UChar32 start, limit;
1111 uint16_t i, index;
1112
1113 char c;
1114
1115 /* name = prefix factorized-elements */
1116
1117 /* compare prefix */
1118 while((c=*s++)!=0) {
1119 if((char)c!=*otherName++) {
1120 return 0xffff;
1121 }
1122 }
1123
1124 start=(UChar32)range->start;
1125 limit=(UChar32)(range->end+1);
1126
1127 /* initialize the suffix elements for enumeration; indexes should all be set to 0 */
1128 writeFactorSuffix(factors, count, s, 0,
1129 indexes, elementBases, elements, buffer, sizeof(buffer));
1130
1131 /* compare the first suffix */
1132 if(0==uprv_strcmp(otherName, buffer)) {
1133 return start;
1134 }
1135
1136 /* enumerate and compare the rest of the suffixes */
1137 while(++start<limit) {
1138 /* increment the indexes in lexical order bound by the factors */
1139 i=count;
1140 for (;;) {
1141 index=(uint16_t)(indexes[--i]+1);
1142 if(index<factors[i]) {
1143 /* skip one index and its element string */
374ca955
A
1144 indexes[i]=index;
1145 s=elements[i];
1146 while(*s++!=0) {}
1147 elements[i]=s;
1148 break;
1149 } else {
1150 /* reset this index to 0 and its element string to the first one */
1151 indexes[i]=0;
1152 elements[i]=elementBases[i];
1153 }
1154 }
b75a7d8f 1155
374ca955
A
1156 /* to make matters a little easier, just compare all elements of the suffix */
1157 t=otherName;
1158 for(i=0; i<count; ++i) {
1159 s=elements[i];
1160 while((c=*s++)!=0) {
1161 if(c!=*t++) {
1162 s=""; /* does not match */
1163 i=99;
1164 }
1165 }
1166 }
1167 if(i<99 && *t==0) {
1168 return start;
1169 }
1170 }
1171 break;
b75a7d8f 1172 }
374ca955
A
1173 default:
1174 /* undefined type */
1175 break;
b75a7d8f 1176 }
b75a7d8f 1177
374ca955 1178 return 0xffff;
b75a7d8f
A
1179}
1180
1181/* sets of name characters, maximum name lengths ---------------------------- */
1182
1183#define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f)))
1184#define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0)
1185
1186static int32_t
1187calcStringSetLength(uint32_t set[8], const char *s) {
1188 int32_t length=0;
1189 char c;
1190
1191 while((c=*s++)!=0) {
1192 SET_ADD(set, c);
1193 ++length;
1194 }
1195 return length;
1196}
1197
1198static int32_t
1199calcAlgNameSetsLengths(int32_t maxNameLength) {
1200 AlgorithmicRange *range;
1201 uint32_t *p;
1202 uint32_t rangeCount;
1203 int32_t length;
1204
1205 /* enumerate algorithmic ranges */
1206 p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1207 rangeCount=*p;
1208 range=(AlgorithmicRange *)(p+1);
1209 while(rangeCount>0) {
1210 switch(range->type) {
1211 case 0:
1212 /* name = prefix + (range->variant times) hex-digits */
1213 /* prefix */
1214 length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant;
1215 if(length>maxNameLength) {
1216 maxNameLength=length;
1217 }
1218 break;
1219 case 1: {
1220 /* name = prefix factorized-elements */
1221 const uint16_t *factors=(const uint16_t *)(range+1);
1222 const char *s;
1223 int32_t i, count=range->variant, factor, factorLength, maxFactorLength;
1224
1225 /* prefix length */
1226 s=(const char *)(factors+count);
1227 length=calcStringSetLength(gNameSet, s);
1228 s+=length+1; /* start of factor suffixes */
1229
1230 /* get the set and maximum factor suffix length for each factor */
1231 for(i=0; i<count; ++i) {
1232 maxFactorLength=0;
1233 for(factor=factors[i]; factor>0; --factor) {
1234 factorLength=calcStringSetLength(gNameSet, s);
1235 s+=factorLength+1;
1236 if(factorLength>maxFactorLength) {
1237 maxFactorLength=factorLength;
1238 }
1239 }
1240 length+=maxFactorLength;
1241 }
1242
1243 if(length>maxNameLength) {
1244 maxNameLength=length;
1245 }
1246 break;
1247 }
1248 default:
1249 /* unknown type */
1250 break;
1251 }
1252
1253 range=(AlgorithmicRange *)((uint8_t *)range+range->size);
1254 --rangeCount;
1255 }
1256 return maxNameLength;
1257}
1258
1259static int32_t
1260calcExtNameSetsLengths(int32_t maxNameLength) {
1261 int32_t i, length;
1262
1263 for(i=0; i<LENGTHOF(charCatNames); ++i) {
1264 /*
1265 * for each category, count the length of the category name
1266 * plus 9=
1267 * 2 for <>
1268 * 1 for -
1269 * 6 for most hex digits per code point
1270 */
1271 length=9+calcStringSetLength(gNameSet, charCatNames[i]);
1272 if(length>maxNameLength) {
1273 maxNameLength=length;
1274 }
1275 }
1276 return maxNameLength;
1277}
1278
1279static int32_t
1280calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths,
1281 uint32_t set[8],
1282 const uint8_t **pLine, const uint8_t *lineLimit) {
1283 const uint8_t *line=*pLine;
1284 int32_t length=0, tokenLength;
1285 uint16_t c, token;
1286
1287 while(line!=lineLimit && (c=*line++)!=(uint8_t)';') {
1288 if(c>=tokenCount) {
1289 /* implicit letter */
1290 SET_ADD(set, c);
1291 ++length;
1292 } else {
1293 token=tokens[c];
1294 if(token==(uint16_t)(-2)) {
1295 /* this is a lead byte for a double-byte token */
1296 c=c<<8|*line++;
1297 token=tokens[c];
1298 }
1299 if(token==(uint16_t)(-1)) {
1300 /* explicit letter */
1301 SET_ADD(set, c);
1302 ++length;
1303 } else {
1304 /* count token word */
1305 if(tokenLengths!=NULL) {
1306 /* use cached token length */
1307 tokenLength=tokenLengths[c];
1308 if(tokenLength==0) {
1309 tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
1310 tokenLengths[c]=(int8_t)tokenLength;
1311 }
1312 } else {
1313 tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
1314 }
1315 length+=tokenLength;
1316 }
1317 }
1318 }
1319
1320 *pLine=line;
1321 return length;
1322}
1323
1324static void
1325calcGroupNameSetsLengths(int32_t maxNameLength) {
1326 uint16_t offsets[LINES_PER_GROUP+2], lengths[LINES_PER_GROUP+2];
1327
1328 uint16_t *tokens=(uint16_t *)uCharNames+8;
1329 uint16_t tokenCount=*tokens++;
1330 uint8_t *tokenStrings=(uint8_t *)uCharNames+uCharNames->tokenStringOffset;
1331
1332 int8_t *tokenLengths;
1333
1334 uint16_t *groups;
1335 Group *group;
1336 const uint8_t *s, *line, *lineLimit;
1337
1338 int32_t maxISOCommentLength=0;
1339 int32_t groupCount, lineNumber, length;
1340
1341 tokenLengths=(int8_t *)uprv_malloc(tokenCount);
1342 if(tokenLengths!=NULL) {
1343 uprv_memset(tokenLengths, 0, tokenCount);
1344 }
1345
1346 groups=(uint16_t *)((char *)uCharNames+uCharNames->groupsOffset);
1347 groupCount=*groups++;
1348 group=(Group *)groups;
1349
1350 /* enumerate all groups */
1351 while(groupCount>0) {
1352 s=(uint8_t *)uCharNames+uCharNames->groupStringOffset+
1353 ((int32_t)group->offsetHigh<<16|group->offsetLow);
1354 s=expandGroupLengths(s, offsets, lengths);
1355
1356 /* enumerate all lines in each group */
1357 for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) {
1358 line=s+offsets[lineNumber];
1359 length=lengths[lineNumber];
1360 if(length==0) {
1361 continue;
1362 }
1363
1364 lineLimit=line+length;
1365
374ca955
A
1366 /* read regular name */
1367 length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
1368 if(length>maxNameLength) {
1369 maxNameLength=length;
1370 }
1371 if(line==lineLimit) {
1372 continue;
1373 }
1374
1375 /* read Unicode 1.0 name */
1376 length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
1377 if(length>maxNameLength) {
1378 maxNameLength=length;
1379 }
1380 if(line==lineLimit) {
1381 continue;
1382 }
1383
1384 /* read ISO comment */
1385 length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);
1386 if(length>maxISOCommentLength) {
1387 maxISOCommentLength=length;
1388 }
1389 }
1390
1391 ++group;
1392 --groupCount;
1393 }
1394
1395 if(tokenLengths!=NULL) {
1396 uprv_free(tokenLengths);
1397 }
1398
1399 /* set gMax... - name length last for threading */
1400 gMaxISOCommentLength=maxISOCommentLength;
1401 gMaxNameLength=maxNameLength;
1402}
1403
1404static UBool
1405calcNameSetsLengths(UErrorCode *pErrorCode) {
1406 static const char extChars[]="0123456789ABCDEF<>-";
1407 int32_t i, maxNameLength;
1408
1409 if(gMaxNameLength!=0) {
1410 return TRUE;
1411 }
1412
1413 if(!isDataLoaded(pErrorCode)) {
1414 return FALSE;
1415 }
1416
1417 /* set hex digits, used in various names, and <>-, used in extended names */
1418 for(i=0; i<sizeof(extChars)-1; ++i) {
1419 SET_ADD(gNameSet, extChars[i]);
1420 }
1421
1422 /* set sets and lengths from algorithmic names */
1423 maxNameLength=calcAlgNameSetsLengths(0);
1424
1425 /* set sets and lengths from extended names */
1426 maxNameLength=calcExtNameSetsLengths(maxNameLength);
1427
1428 /* set sets and lengths from group names, set global maximum values */
1429 calcGroupNameSetsLengths(maxNameLength);
1430
1431 return TRUE;
1432}
1433
1434/* public API --------------------------------------------------------------- */
1435
1436U_CAPI int32_t U_EXPORT2
1437u_charName(UChar32 code, UCharNameChoice nameChoice,
1438 char *buffer, int32_t bufferLength,
1439 UErrorCode *pErrorCode) {
1440 AlgorithmicRange *algRange;
1441 uint32_t *p;
1442 uint32_t i;
1443 int32_t length;
1444
1445 /* check the argument values */
1446 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1447 return 0;
1448 } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT ||
1449 bufferLength<0 || (bufferLength>0 && buffer==NULL)
1450 ) {
1451 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1452 return 0;
1453 }
1454
1455 if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
1456 return u_terminateChars(buffer, bufferLength, 0, pErrorCode);
1457 }
1458
1459 length=0;
1460
1461 /* try algorithmic names first */
1462 p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1463 i=*p;
1464 algRange=(AlgorithmicRange *)(p+1);
1465 while(i>0) {
1466 if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {
1467 length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
1468 break;
1469 }
1470 algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
1471 --i;
1472 }
1473
1474 if(i==0) {
1475 if (nameChoice == U_EXTENDED_CHAR_NAME) {
1476 length = getName(uCharNames, (uint32_t )code, U_EXTENDED_CHAR_NAME, buffer, (uint16_t) bufferLength);
1477 if (!length) {
1478 /* extended character name */
1479 length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);
1480 }
1481 } else {
1482 /* normal character name */
1483 length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
1484 }
1485 }
1486
1487 return u_terminateChars(buffer, bufferLength, length, pErrorCode);
1488}
1489
1490U_CAPI int32_t U_EXPORT2
1491u_getISOComment(UChar32 c,
1492 char *dest, int32_t destCapacity,
1493 UErrorCode *pErrorCode) {
1494 int32_t length;
1495
1496 /* check the argument values */
1497 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1498 return 0;
1499 } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
1500 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1501 return 0;
1502 }
1503
1504 if((uint32_t)c>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
1505 return u_terminateChars(dest, destCapacity, 0, pErrorCode);
1506 }
1507
1508 /* the ISO comment is stored like a normal character name */
1509 length=getName(uCharNames, (uint32_t)c, U_ISO_COMMENT, dest, (uint16_t)destCapacity);
1510 return u_terminateChars(dest, destCapacity, length, pErrorCode);
1511}
1512
1513U_CAPI UChar32 U_EXPORT2
1514u_charFromName(UCharNameChoice nameChoice,
1515 const char *name,
1516 UErrorCode *pErrorCode) {
1517 char upper[120], lower[120];
1518 FindName findName;
1519 AlgorithmicRange *algRange;
1520 uint32_t *p;
1521 uint32_t i;
1522 UChar32 cp = 0;
1523 char c0;
1524 UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */
1525
1526 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1527 return error;
1528 }
1529
1530 if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) {
1531 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1532 return error;
1533 }
1534
1535 if(!isDataLoaded(pErrorCode)) {
1536 return error;
1537 }
1538
1539 /* construct the uppercase and lowercase of the name first */
1540 for(i=0; i<sizeof(upper); ++i) {
1541 if((c0=*name++)!=0) {
1542 upper[i]=uprv_toupper(c0);
1543 lower[i]=uprv_tolower(c0);
1544 } else {
1545 upper[i]=lower[i]=0;
1546 break;
1547 }
1548 }
1549 if(i==sizeof(upper)) {
1550 /* name too long, there is no such character */
1551 *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1552 return error;
1553 }
1554
1555 /* try extended names first */
1556 if (lower[0] == '<') {
1557 if (nameChoice == U_EXTENDED_CHAR_NAME) {
1558 if (lower[--i] == '>') {
1559 for (--i; lower[i] && lower[i] != '-'; --i) {
1560 }
1561
1562 if (lower[i] == '-') { /* We've got a category. */
1563 uint32_t cIdx;
1564
1565 lower[i] = 0;
1566
1567 for (++i; lower[i] != '>'; ++i) {
1568 if (lower[i] >= '0' && lower[i] <= '9') {
1569 cp = (cp << 4) + lower[i] - '0';
1570 } else if (lower[i] >= 'a' && lower[i] <= 'f') {
1571 cp = (cp << 4) + lower[i] - 'a' + 10;
1572 } else {
1573 *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1574 return error;
1575 }
1576 }
1577
1578 /* Now validate the category name.
1579 We could use a binary search, or a trie, if
1580 we really wanted to. */
b75a7d8f 1581
374ca955 1582 for (lower[i] = 0, cIdx = 0; cIdx < LENGTHOF(charCatNames); ++cIdx) {
b75a7d8f 1583
374ca955
A
1584 if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
1585 if (getCharCat(cp) == cIdx) {
1586 return cp;
1587 }
1588 break;
1589 }
1590 }
1591 }
b75a7d8f
A
1592 }
1593 }
1594
374ca955
A
1595 *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1596 return error;
b75a7d8f
A
1597 }
1598
374ca955
A
1599 /* try algorithmic names now */
1600 p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1601 i=*p;
1602 algRange=(AlgorithmicRange *)(p+1);
1603 while(i>0) {
1604 if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) {
1605 return cp;
1606 }
1607 algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
1608 --i;
b75a7d8f
A
1609 }
1610
374ca955
A
1611 /* normal character name */
1612 findName.otherName=upper;
1613 findName.code=error;
1614 enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice);
1615 if (findName.code == error) {
1616 *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1617 }
1618 return findName.code;
b75a7d8f
A
1619}
1620
374ca955
A
1621U_CAPI void U_EXPORT2
1622u_enumCharNames(UChar32 start, UChar32 limit,
1623 UEnumCharNamesFn *fn,
1624 void *context,
1625 UCharNameChoice nameChoice,
1626 UErrorCode *pErrorCode) {
1627 AlgorithmicRange *algRange;
1628 uint32_t *p;
1629 uint32_t i;
b75a7d8f 1630
374ca955
A
1631 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1632 return;
b75a7d8f
A
1633 }
1634
374ca955
A
1635 if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) {
1636 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1637 return;
b75a7d8f
A
1638 }
1639
374ca955
A
1640 if((uint32_t) limit > UCHAR_MAX_VALUE + 1) {
1641 limit = UCHAR_MAX_VALUE + 1;
1642 }
1643 if((uint32_t)start>=(uint32_t)limit) {
1644 return;
b75a7d8f
A
1645 }
1646
374ca955
A
1647 if(!isDataLoaded(pErrorCode)) {
1648 return;
1649 }
b75a7d8f 1650
374ca955
A
1651 /* interleave the data-driven ones with the algorithmic ones */
1652 /* iterate over all algorithmic ranges; assume that they are in ascending order */
1653 p=(uint32_t *)((uint8_t *)uCharNames+uCharNames->algNamesOffset);
1654 i=*p;
1655 algRange=(AlgorithmicRange *)(p+1);
1656 while(i>0) {
1657 /* enumerate the character names before the current algorithmic range */
1658 /* here: start<limit */
1659 if((uint32_t)start<algRange->start) {
1660 if((uint32_t)limit<=algRange->start) {
1661 enumNames(uCharNames, start, limit, fn, context, nameChoice);
1662 return;
1663 }
1664 if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {
1665 return;
1666 }
1667 start=(UChar32)algRange->start;
1668 }
1669 /* enumerate the character names in the current algorithmic range */
1670 /* here: algRange->start<=start<limit */
1671 if((uint32_t)start<=algRange->end) {
1672 if((uint32_t)limit<=(algRange->end+1)) {
1673 enumAlgNames(algRange, start, limit, fn, context, nameChoice);
1674 return;
1675 }
1676 if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) {
1677 return;
1678 }
1679 start=(UChar32)algRange->end+1;
1680 }
1681 /* continue to the next algorithmic range (here: start<limit) */
1682 algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
1683 --i;
1684 }
1685 /* enumerate the character names after the last algorithmic range */
1686 enumNames(uCharNames, start, limit, fn, context, nameChoice);
b75a7d8f
A
1687}
1688
1689U_CAPI int32_t U_EXPORT2
1690uprv_getMaxCharNameLength() {
1691 UErrorCode errorCode=U_ZERO_ERROR;
1692 if(calcNameSetsLengths(&errorCode)) {
1693 return gMaxNameLength;
1694 } else {
1695 return 0;
1696 }
1697}
1698
1699#if 0
1700/*
1701Currently not used but left for future use. Probably by UnicodeSet.
1702urename.h and uprops.h changed accordingly.
1703*/
1704U_CAPI int32_t U_EXPORT2
1705uprv_getMaxISOCommentLength() {
1706 UErrorCode errorCode=U_ZERO_ERROR;
1707 if(calcNameSetsLengths(&errorCode)) {
1708 return gMaxISOCommentLength;
1709 } else {
1710 return 0;
1711 }
1712}
1713#endif
1714
1715/**
1716 * Converts the char set cset into a Unicode set uset.
1717 * @param cset Set of 256 bit flags corresponding to a set of chars.
1718 * @param uset USet to receive characters. Existing contents are deleted.
1719 */
1720static void
374ca955 1721charSetToUSet(uint32_t cset[8], USetAdder *sa) {
b75a7d8f
A
1722 UChar us[256];
1723 char cs[256];
1724
1725 int32_t i, length;
1726 UErrorCode errorCode;
1727
1728 errorCode=U_ZERO_ERROR;
b75a7d8f
A
1729
1730 if(!calcNameSetsLengths(&errorCode)) {
1731 return;
1732 }
1733
1734 /* build a char string with all chars that are used in character names */
1735 length=0;
1736 for(i=0; i<256; ++i) {
1737 if(SET_CONTAINS(cset, i)) {
1738 cs[length++]=(char)i;
1739 }
1740 }
1741
1742 /* convert the char string to a UChar string */
1743 u_charsToUChars(cs, us, length);
1744
1745 /* add each UChar to the USet */
1746 for(i=0; i<length; ++i) {
1747 if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */
374ca955 1748 sa->add(sa->set, us[i]);
b75a7d8f
A
1749 }
1750 }
1751}
1752
1753/**
1754 * Fills set with characters that are used in Unicode character names.
374ca955 1755 * @param set USet to receive characters.
b75a7d8f
A
1756 */
1757U_CAPI void U_EXPORT2
374ca955
A
1758uprv_getCharNameCharacters(USetAdder *sa) {
1759 charSetToUSet(gNameSet, sa);
b75a7d8f
A
1760}
1761
1762#if 0
1763/*
1764Currently not used but left for future use. Probably by UnicodeSet.
1765urename.h and uprops.h changed accordingly.
1766*/
1767/**
1768 * Fills set with characters that are used in Unicode character names.
374ca955 1769 * @param set USetAdder to receive characters.
b75a7d8f
A
1770 */
1771U_CAPI void U_EXPORT2
374ca955
A
1772uprv_getISOCommentCharacters(USetAdder *sa) {
1773 charSetToUSet(gISOCommentSet, sa);
b75a7d8f
A
1774}
1775#endif
1776
374ca955
A
1777/* data swapping ------------------------------------------------------------ */
1778
1779/*
1780 * The token table contains non-negative entries for token bytes,
1781 * and -1 for bytes that represent themselves in the data file's charset.
1782 * -2 entries are used for lead bytes.
1783 *
1784 * Direct bytes (-1 entries) must be translated from the input charset family
1785 * to the output charset family.
1786 * makeTokenMap() writes a permutation mapping for this.
1787 * Use it once for single-/lead-byte tokens and once more for all trail byte
1788 * tokens. (';' is an unused trail byte marked with -1.)
1789 */
1790static void
1791makeTokenMap(const UDataSwapper *ds,
1792 int16_t tokens[], uint16_t tokenCount,
1793 uint8_t map[256],
1794 UErrorCode *pErrorCode) {
1795 UBool usedOutChar[256];
1796 uint16_t i, j;
1797 uint8_t c1, c2;
1798
1799 if(U_FAILURE(*pErrorCode)) {
1800 return;
1801 }
1802
1803 if(ds->inCharset==ds->outCharset) {
1804 /* Same charset family: identity permutation */
1805 for(i=0; i<256; ++i) {
1806 map[i]=(uint8_t)i;
1807 }
1808 } else {
1809 uprv_memset(map, 0, 256);
1810 uprv_memset(usedOutChar, 0, 256);
1811
1812 if(tokenCount>256) {
1813 tokenCount=256;
1814 }
1815
1816 /* set the direct bytes (byte 0 always maps to itself) */
1817 for(i=1; i<tokenCount; ++i) {
1818 if(tokens[i]==-1) {
1819 /* convert the direct byte character */
1820 c1=(uint8_t)i;
1821 ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode);
1822 if(U_FAILURE(*pErrorCode)) {
1823 udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d) - %s\n",
1824 i, ds->inCharset, u_errorName(*pErrorCode));
1825 return;
1826 }
1827
1828 /* enter the converted character into the map and mark it used */
1829 map[c1]=c2;
1830 usedOutChar[c2]=TRUE;
1831 }
1832 }
1833
1834 /* set the mappings for the rest of the permutation */
1835 for(i=j=1; i<tokenCount; ++i) {
1836 /* set mappings that were not set for direct bytes */
1837 if(map[i]==0) {
1838 /* set an output byte value that was not used as an output byte above */
1839 while(usedOutChar[j]) {
1840 ++j;
1841 }
1842 map[i]=(uint8_t)j++;
1843 }
1844 }
1845
1846 /*
1847 * leave mappings at tokenCount and above unset if tokenCount<256
1848 * because they won't be used
1849 */
1850 }
1851}
1852
1853U_CAPI int32_t U_EXPORT2
1854uchar_swapNames(const UDataSwapper *ds,
1855 const void *inData, int32_t length, void *outData,
1856 UErrorCode *pErrorCode) {
1857 const UDataInfo *pInfo;
1858 int32_t headerSize;
1859
1860 const uint8_t *inBytes;
1861 uint8_t *outBytes;
1862
1863 uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,
1864 offset, i, count, stringsCount;
1865
1866 const AlgorithmicRange *inRange;
1867 AlgorithmicRange *outRange;
1868
1869 /* udata_swapDataHeader checks the arguments */
1870 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1871 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1872 return 0;
1873 }
1874
1875 /* check data format and format version */
1876 pInfo=(const UDataInfo *)((const char *)inData+4);
1877 if(!(
1878 pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */
1879 pInfo->dataFormat[1]==0x6e &&
1880 pInfo->dataFormat[2]==0x61 &&
1881 pInfo->dataFormat[3]==0x6d &&
1882 pInfo->formatVersion[0]==1
1883 )) {
1884 udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",
1885 pInfo->dataFormat[0], pInfo->dataFormat[1],
1886 pInfo->dataFormat[2], pInfo->dataFormat[3],
1887 pInfo->formatVersion[0]);
1888 *pErrorCode=U_UNSUPPORTED_ERROR;
1889 return 0;
1890 }
1891
1892 inBytes=(const uint8_t *)inData+headerSize;
1893 outBytes=(uint8_t *)outData+headerSize;
1894 if(length<0) {
1895 algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]);
1896 } else {
1897 length-=headerSize;
1898 if( length<20 ||
1899 (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]))
1900 ) {
1901 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n",
1902 length);
1903 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1904 return 0;
1905 }
1906 }
1907
1908 if(length<0) {
1909 /* preflighting: iterate through algorithmic ranges */
1910 offset=algNamesOffset;
1911 count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
1912 offset+=4;
1913
1914 for(i=0; i<count; ++i) {
1915 inRange=(const AlgorithmicRange *)(inBytes+offset);
1916 offset+=ds->readUInt16(inRange->size);
1917 }
1918 } else {
1919 /* swap data */
1920 const uint16_t *p;
1921 uint16_t *q, *temp;
1922
1923 int16_t tokens[512];
1924 uint16_t tokenCount;
1925
1926 uint8_t map[256], trailMap[256];
1927
1928 /* copy the data for inaccessible bytes */
1929 if(inBytes!=outBytes) {
1930 uprv_memcpy(outBytes, inBytes, length);
1931 }
1932
1933 /* the initial 4 offsets first */
1934 tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]);
1935 groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]);
1936 groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]);
1937 ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode);
1938
1939 /*
1940 * now the tokens table
1941 * it needs to be permutated along with the compressed name strings
1942 */
1943 p=(const uint16_t *)(inBytes+16);
1944 q=(uint16_t *)(outBytes+16);
1945
1946 /* read and swap the tokenCount */
1947 tokenCount=ds->readUInt16(*p);
1948 ds->swapArray16(ds, p, 2, q, pErrorCode);
1949 ++p;
1950 ++q;
1951
1952 /* read the first 512 tokens and make the token maps */
1953 if(tokenCount<=512) {
1954 count=tokenCount;
1955 } else {
1956 count=512;
1957 }
1958 for(i=0; i<count; ++i) {
1959 tokens[i]=udata_readInt16(ds, p[i]);
1960 }
1961 for(; i<512; ++i) {
1962 tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */
1963 }
1964 makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);
1965 makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode);
1966 if(U_FAILURE(*pErrorCode)) {
1967 return 0;
1968 }
1969
1970 /*
1971 * swap and permutate the tokens
1972 * go through a temporary array to support in-place swapping
1973 */
1974 temp=(uint16_t *)uprv_malloc(tokenCount*2);
1975 if(temp==NULL) {
1976 udata_printError(ds, "out of memory swapping %u unames.icu tokens\n",
1977 tokenCount);
1978 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1979 return 0;
1980 }
1981
1982 /* swap and permutate single-/lead-byte tokens */
1983 for(i=0; i<tokenCount && i<256; ++i) {
1984 ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode);
1985 }
1986
1987 /* swap and permutate trail-byte tokens */
1988 for(; i<tokenCount; ++i) {
1989 ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode);
1990 }
1991
1992 /* copy the result into the output and free the temporary array */
1993 uprv_memcpy(q, temp, tokenCount*2);
1994 uprv_free(temp);
1995
1996 /*
1997 * swap the token strings but not a possible padding byte after
1998 * the terminating NUL of the last string
1999 */
2000 udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset),
2001 outBytes+tokenStringOffset, pErrorCode);
2002 if(U_FAILURE(*pErrorCode)) {
2003 udata_printError(ds, "uchar_swapNames(token strings) failed - %s\n",
2004 u_errorName(*pErrorCode));
2005 return 0;
2006 }
2007
2008 /* swap the group table */
2009 count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset)));
2010 ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2),
2011 outBytes+groupsOffset, pErrorCode);
2012
2013 /*
2014 * swap the group strings
2015 * swap the string bytes but not the nibble-encoded string lengths
2016 */
2017 if(ds->inCharset!=ds->outCharset) {
2018 uint16_t offsets[LINES_PER_GROUP+1], lengths[LINES_PER_GROUP+1];
2019
2020 const uint8_t *inStrings, *nextInStrings;
2021 uint8_t *outStrings;
2022
2023 uint8_t c;
2024
2025 inStrings=inBytes+groupStringOffset;
2026 outStrings=outBytes+groupStringOffset;
2027
2028 stringsCount=algNamesOffset-groupStringOffset;
2029
2030 /* iterate through string groups until only a few padding bytes are left */
2031 while(stringsCount>32) {
2032 nextInStrings=expandGroupLengths(inStrings, offsets, lengths);
2033
2034 /* move past the length bytes */
2035 stringsCount-=(uint32_t)(nextInStrings-inStrings);
2036 outStrings+=nextInStrings-inStrings;
2037 inStrings=nextInStrings;
2038
2039 count=offsets[31]+lengths[31]; /* total number of string bytes in this group */
2040 stringsCount-=count;
2041
2042 /* swap the string bytes using map[] and trailMap[] */
2043 while(count>0) {
2044 c=*inStrings++;
2045 *outStrings++=map[c];
2046 if(tokens[c]!=-2) {
2047 --count;
2048 } else {
2049 /* token lead byte: swap the trail byte, too */
2050 *outStrings++=trailMap[*inStrings++];
2051 count-=2;
2052 }
2053 }
2054 }
2055 }
2056
2057 /* swap the algorithmic ranges */
2058 offset=algNamesOffset;
2059 count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
2060 ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode);
2061 offset+=4;
2062
2063 for(i=0; i<count; ++i) {
2064 if(offset>(uint32_t)length) {
2065 udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",
2066 length, i);
2067 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
2068 return 0;
2069 }
2070
2071 inRange=(const AlgorithmicRange *)(inBytes+offset);
2072 outRange=(AlgorithmicRange *)(outBytes+offset);
2073 offset+=ds->readUInt16(inRange->size);
2074
2075 ds->swapArray32(ds, inRange, 8, outRange, pErrorCode);
2076 ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode);
2077 switch(inRange->type) {
2078 case 0:
2079 /* swap prefix string */
2080 ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)),
2081 outRange+1, pErrorCode);
2082 if(U_FAILURE(*pErrorCode)) {
2083 udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed - %s\n",
2084 i, u_errorName(*pErrorCode));
2085 return 0;
2086 }
2087 break;
2088 case 1:
2089 {
2090 /* swap factors and the prefix and factor strings */
2091 uint16_t factors[8];
2092 uint32_t j, factorsCount;
2093
2094 factorsCount=inRange->variant;
2095 if(factorsCount==0 || factorsCount>LENGTHOF(factors)) {
2096 udata_printError(ds, "uchar_swapNames(): too many factors (%u) in algorithmic range %u\n",
2097 factorsCount, i);
2098 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
2099 return 0;
2100 }
2101
2102 /* read and swap the factors */
2103 p=(const uint16_t *)(inRange+1);
2104 q=(uint16_t *)(outRange+1);
2105 for(j=0; j<factorsCount; ++j) {
2106 factors[j]=ds->readUInt16(p[j]);
2107 }
2108 ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode);
2109
2110 /* swap the strings, up to the last terminating NUL */
2111 p+=factorsCount;
2112 q+=factorsCount;
2113 stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p);
2114 while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) {
2115 --stringsCount;
2116 }
2117 ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode);
2118 }
2119 break;
2120 default:
2121 udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n",
2122 inRange->type, i);
2123 *pErrorCode=U_UNSUPPORTED_ERROR;
2124 return 0;
2125 }
2126 }
2127 }
2128
2129 return headerSize+(int32_t)offset;
2130}
2131
b75a7d8f
A
2132/*
2133 * Hey, Emacs, please set the following:
2134 *
2135 * Local Variables:
2136 * indent-tabs-mode: nil
2137 * End:
2138 *
2139 */