]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | ******************************************************************************* | |
3 | * | |
4 | * Copyright (C) 2004, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: store.c | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2004aug28 | |
14 | * created by: Markus W. Scherer | |
15 | * | |
16 | * Store Unicode case mapping properties efficiently for | |
17 | * random access. | |
18 | */ | |
19 | ||
20 | #include <stdio.h> | |
21 | #include <stdlib.h> | |
22 | #include "unicode/utypes.h" | |
23 | #include "unicode/uchar.h" | |
24 | #include "unicode/ustring.h" | |
25 | #include "cmemory.h" | |
26 | #include "cstring.h" | |
27 | #include "filestrm.h" | |
28 | #include "utrie.h" | |
29 | #include "unicode/udata.h" | |
30 | #include "unewdata.h" | |
31 | #include "propsvec.h" | |
32 | #include "gencase.h" | |
33 | ||
34 | /* Unicode case mapping properties file format --------------------------------- | |
35 | ||
36 | The file format prepared and written here contains several data | |
37 | structures that store indexes or data. | |
38 | ||
39 | Before the data contents described below, there are the headers required by | |
40 | the udata API for loading ICU data. Especially, a UDataInfo structure | |
41 | precedes the actual data. It contains platform properties values and the | |
42 | file format version. | |
43 | ||
44 | The following is a description of format version 1 . | |
45 | ||
46 | The file contains the following structures: | |
47 | ||
48 | const int32_t indexes[i0] with values i0, i1, ...: | |
49 | (see UCASE_IX_... constants for names of indexes) | |
50 | ||
51 | i0 indexLength; -- length of indexes[] (UCASE_IX_TOP) | |
52 | i1 dataLength; -- length in bytes of the post-header data (incl. indexes[]) | |
53 | i2 trieSize; -- size in bytes of the case mapping properties trie | |
54 | i3 exceptionsLength; -- length in uint16_t of the exceptions array | |
55 | ||
56 | i4..i14 reservedIndexes; -- reserved values; 0 for now | |
57 | ||
58 | i15 maxFullLength; -- maximum length of a full case mapping/folding string | |
59 | ||
60 | ||
61 | Serizalied trie, see utrie.h; | |
62 | ||
63 | const uint16_t exceptions[exceptionsLength]; | |
64 | ||
65 | ||
66 | Trie data word: | |
67 | Bits | |
68 | if(exception) { | |
69 | 15..4 unsigned exception index | |
70 | } else { | |
71 | if(not uncased) { | |
72 | 15..6 signed delta to simple case mapping code point | |
73 | (add delta to input code point) | |
74 | } else { | |
75 | 6 the code point is case-ignorable | |
76 | (U+0307 is also case-ignorable but has an exception) | |
77 | } | |
78 | 5..4 0 normal character with cc=0 | |
79 | 1 soft-dotted character | |
80 | 2 cc=230 | |
81 | 3 other cc | |
82 | } | |
83 | 3 exception | |
84 | 2 case sensitive | |
85 | 1..0 0 uncased | |
86 | 1 lowercase | |
87 | 2 uppercase | |
88 | 3 titlecase | |
89 | ||
90 | ||
91 | Exceptions: | |
92 | A sub-array of the exceptions array is indexed by the exception index in a | |
93 | trie word. | |
94 | The sub-array consists of the following fields: | |
95 | uint16_t excWord; | |
96 | uint16_t optional values []; | |
97 | UTF-16 strings for full (string) mappings for lowercase, case folding, uppercase, titlecase | |
98 | ||
99 | excWord: (see UCASE_EXC_...) | |
100 | Bits | |
101 | 15 conditional case folding | |
102 | 14 conditional special casing | |
103 | 13..12 same as non-exception trie data bits 5..4 | |
104 | moved here because the exception index needs more bits than the delta | |
105 | 0 normal character with cc=0 | |
106 | 1 soft-dotted character | |
107 | 2 cc=230 | |
108 | 3 other cc | |
109 | 11.. 9 reserved | |
110 | 8 if set, then for each optional-value slot there are 2 uint16_t values | |
111 | (high and low parts of 32-bit values) | |
112 | instead of single ones | |
113 | 7.. 0 bits for which optional value is present | |
114 | ||
115 | Optional-value slots: | |
116 | 0 lowercase mapping (code point) | |
117 | 1 case folding (code point) | |
118 | 2 uppercase mapping (code point) | |
119 | 3 titlecase mapping (code point) | |
120 | 4..6 reserved | |
121 | 7 there is at least one full (string) case mapping | |
122 | the length of each is encoded in a nibble of this optional value, | |
123 | and the strings follow this optional value in the same order: | |
124 | lower/fold/upper/title | |
125 | ||
126 | For space saving, some values are not stored. Lookups are as follows: | |
127 | - If special casing is conditional, then no full lower/upper/title mapping | |
128 | strings are stored. | |
129 | - If case folding is conditional, then no simple or full case foldings are | |
130 | stored. | |
131 | - Fall back in this order: | |
132 | full (string) mapping -- if full mappings are used | |
133 | simple (code point) mapping of the same type | |
134 | simple fold->simple lower | |
135 | simple title->simple upper | |
136 | finally, the original code point (no mapping) | |
137 | ||
138 | ----------------------------------------------------------------------------- */ | |
139 | ||
140 | /* UDataInfo cf. udata.h */ | |
141 | static UDataInfo dataInfo={ | |
142 | sizeof(UDataInfo), | |
143 | 0, | |
144 | ||
145 | U_IS_BIG_ENDIAN, | |
146 | U_CHARSET_FAMILY, | |
147 | U_SIZEOF_UCHAR, | |
148 | 0, | |
149 | ||
150 | /* dataFormat="cAsE" */ | |
151 | { UCASE_FMT_0, UCASE_FMT_1, UCASE_FMT_2, UCASE_FMT_3 }, | |
152 | { 1, 0, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */ | |
153 | { 4, 0, 1, 0 } /* dataVersion */ | |
154 | }; | |
155 | ||
156 | enum { | |
157 | /* maximum number of exceptions expected */ | |
158 | MAX_EXC_COUNT=1000 | |
159 | }; | |
160 | ||
161 | /* exceptions values */ | |
162 | static uint16_t exceptions[UCASE_MAX_EXCEPTIONS+100]; | |
163 | static uint16_t exceptionsTop=0; | |
164 | static Props excProps[MAX_EXC_COUNT]; | |
165 | static uint16_t exceptionsCount=0; | |
166 | ||
167 | /* becomes indexes[UCASE_IX_MAX_FULL_LENGTH] */ | |
168 | static int32_t maxFullLength=U16_MAX_LENGTH; | |
169 | ||
170 | /* -------------------------------------------------------------------------- */ | |
171 | ||
172 | extern void | |
173 | setUnicodeVersion(const char *v) { | |
174 | UVersionInfo version; | |
175 | u_versionFromString(version, v); | |
176 | uprv_memcpy(dataInfo.dataVersion, version, 4); | |
177 | } | |
178 | ||
179 | /* store a character's properties ------------------------------------------- */ | |
180 | ||
181 | extern void | |
182 | setProps(Props *p) { | |
183 | UErrorCode errorCode; | |
184 | uint32_t value, oldValue; | |
185 | int32_t delta; | |
186 | ||
187 | /* get the non-UnicodeData.txt properties */ | |
188 | value=oldValue=upvec_getValue(pv, p->code, 0); | |
189 | ||
190 | /* default: map to self */ | |
191 | delta=0; | |
192 | ||
193 | if(p->gc==U_TITLECASE_LETTER) { | |
194 | /* the Titlecase property is read late, from UnicodeData.txt */ | |
195 | value|=UCASE_TITLE; | |
196 | } | |
197 | ||
198 | if(p->upperCase!=0) { | |
199 | /* uppercase mapping as delta if the character is lowercase */ | |
200 | if((value&UCASE_TYPE_MASK)==UCASE_LOWER) { | |
201 | delta=p->upperCase-p->code; | |
202 | } else { | |
203 | value|=UCASE_EXCEPTION; | |
204 | } | |
205 | } | |
206 | if(p->lowerCase!=0) { | |
207 | /* lowercase mapping as delta if the character is uppercase or titlecase */ | |
208 | if((value&UCASE_TYPE_MASK)>=UCASE_UPPER) { | |
209 | delta=p->lowerCase-p->code; | |
210 | } else { | |
211 | value|=UCASE_EXCEPTION; | |
212 | } | |
213 | } | |
214 | if(p->upperCase!=p->titleCase) { | |
215 | value|=UCASE_EXCEPTION; | |
216 | } | |
217 | if(p->specialCasing!=NULL) { | |
218 | value|=UCASE_EXCEPTION; | |
219 | } | |
220 | if(p->caseFolding!=NULL) { | |
221 | value|=UCASE_EXCEPTION; | |
222 | } | |
223 | ||
224 | if(delta<UCASE_MIN_DELTA || UCASE_MAX_DELTA<delta) { | |
225 | value|=UCASE_EXCEPTION; | |
226 | } | |
227 | ||
228 | if(p->cc!=0) { | |
229 | if(value&UCASE_DOT_MASK) { | |
230 | fprintf(stderr, "gencase: a soft-dotted character has cc!=0\n"); | |
231 | exit(U_INTERNAL_PROGRAM_ERROR); | |
232 | } | |
233 | if(p->cc==230) { | |
234 | value|=UCASE_ABOVE; | |
235 | } else { | |
236 | value|=UCASE_OTHER_ACCENT; | |
237 | } | |
238 | } | |
239 | ||
240 | /* encode case-ignorable as delta==1 on uncased characters */ | |
241 | if( | |
242 | (value&UCASE_TYPE_MASK)==UCASE_NONE && | |
243 | p->code!=0x307 && | |
244 | ((U_MASK(p->gc)&(U_GC_MN_MASK|U_GC_ME_MASK|U_GC_CF_MASK|U_GC_LM_MASK|U_GC_SK_MASK))!=0 || | |
245 | p->code==0x27 || p->code==0xad || p->code==0x2019) | |
246 | ) { | |
247 | /* | |
248 | * We use one of the delta/exception bits, which works because we only | |
249 | * store the case-ignorable flag for uncased characters. | |
250 | * There is no delta for uncased characters (see checks above). | |
251 | * If there is an exception for an uncased, case-ignorable character | |
252 | * (although there should not be any case mappings if it's uncased) | |
253 | * then we have a problem. | |
254 | * There is one character which is case-ignorable but has an exception: | |
255 | * U+0307 is uncased, Mn, has conditional special casing and | |
256 | * is therefore handled in code instead. | |
257 | */ | |
258 | if(value&UCASE_EXCEPTION) { | |
259 | fprintf(stderr, "gencase error: unable to encode case-ignorable for U+%04lx with exceptions\n", | |
260 | (unsigned long)p->code); | |
261 | exit(U_INTERNAL_PROGRAM_ERROR); | |
262 | } | |
263 | ||
264 | delta=1; | |
265 | } | |
266 | ||
267 | /* handle exceptions */ | |
268 | if(value&UCASE_EXCEPTION) { | |
269 | /* simply store exceptions for later processing and encoding */ | |
270 | value|=(uint32_t)exceptionsCount<<UGENCASE_EXC_SHIFT; | |
271 | uprv_memcpy(excProps+exceptionsCount, p, sizeof(*p)); | |
272 | if(++exceptionsCount==MAX_EXC_COUNT) { | |
273 | fprintf(stderr, "gencase: too many exceptions\n"); | |
274 | exit(U_INDEX_OUTOFBOUNDS_ERROR); | |
275 | } | |
276 | } else { | |
277 | /* store the simple case mapping delta */ | |
278 | value|=((uint32_t)delta<<UCASE_DELTA_SHIFT)&UCASE_DELTA_MASK; | |
279 | } | |
280 | ||
281 | errorCode=U_ZERO_ERROR; | |
282 | if( value!=oldValue && | |
283 | !upvec_setValue(pv, p->code, p->code+1, 0, value, 0xffffffff, &errorCode) | |
284 | ) { | |
285 | fprintf(stderr, "gencase error: unable to set case mapping values, code: %s\n", | |
286 | u_errorName(errorCode)); | |
287 | exit(errorCode); | |
288 | } | |
289 | } | |
290 | ||
291 | extern void | |
292 | addCaseSensitive(UChar32 first, UChar32 last) { | |
293 | UErrorCode errorCode=U_ZERO_ERROR; | |
294 | if(!upvec_setValue(pv, first, last+1, 0, UCASE_SENSITIVE, UCASE_SENSITIVE, &errorCode)) { | |
295 | fprintf(stderr, "gencase error: unable to set UCASE_SENSITIVE, code: %s\n", | |
296 | u_errorName(errorCode)); | |
297 | exit(errorCode); | |
298 | } | |
299 | } | |
300 | ||
301 | extern void | |
302 | makeCaseClosure() { | |
303 | /* TODO */ | |
304 | } | |
305 | ||
306 | /* exceptions --------------------------------------------------------------- */ | |
307 | ||
308 | static UBool | |
309 | fullMappingEqualsSimple(const UChar *s, UChar32 simple, UChar32 c) { | |
310 | int32_t i, length; | |
311 | UChar32 full; | |
312 | ||
313 | length=*s++; | |
314 | if(length==0 || length>U16_MAX_LENGTH) { | |
315 | return FALSE; | |
316 | } | |
317 | i=0; | |
318 | U16_NEXT(s, i, length, full); | |
319 | ||
320 | if(simple==0) { | |
321 | simple=c; /* UCD has no simple mapping if it's the same as the code point itself */ | |
322 | } | |
323 | return (UBool)(i==length && full==simple); | |
324 | } | |
325 | ||
326 | static uint16_t | |
327 | makeException(uint32_t value, Props *p) { | |
328 | uint32_t slots[8]; | |
329 | uint32_t slotBits; | |
330 | uint16_t excWord, excIndex, excTop, i, count, length, fullLengths; | |
331 | UBool doubleSlots; | |
332 | ||
333 | /* excIndex will be returned for storing in the trie word */ | |
334 | excIndex=exceptionsTop; | |
335 | if(excIndex>=UCASE_MAX_EXCEPTIONS) { | |
336 | fprintf(stderr, "gencase error: too many exceptions words\n"); | |
337 | exit(U_BUFFER_OVERFLOW_ERROR); | |
338 | } | |
339 | ||
340 | excTop=excIndex+1; /* +1 for excWord which will be stored at excIndex */ | |
341 | ||
342 | /* copy and shift the soft-dotted bits */ | |
343 | excWord=((uint16_t)value&UCASE_DOT_MASK)<<UCASE_EXC_DOT_SHIFT; | |
344 | ||
345 | /* update maxFullLength */ | |
346 | if(p->specialCasing!=NULL) { | |
347 | length=p->specialCasing->lowerCase[0]; | |
348 | if(length>maxFullLength) { | |
349 | maxFullLength=length; | |
350 | } | |
351 | length=p->specialCasing->upperCase[0]; | |
352 | if(length>maxFullLength) { | |
353 | maxFullLength=length; | |
354 | } | |
355 | length=p->specialCasing->titleCase[0]; | |
356 | if(length>maxFullLength) { | |
357 | maxFullLength=length; | |
358 | } | |
359 | } | |
360 | if(p->caseFolding!=NULL) { | |
361 | length=p->caseFolding->full[0]; | |
362 | if(length>maxFullLength) { | |
363 | maxFullLength=length; | |
364 | } | |
365 | } | |
366 | ||
367 | /* set the bits for conditional mappings */ | |
368 | if(p->specialCasing!=NULL && p->specialCasing->isComplex) { | |
369 | excWord|=UCASE_EXC_CONDITIONAL_SPECIAL; | |
370 | p->specialCasing=NULL; | |
371 | } | |
372 | if(p->caseFolding!=NULL && p->caseFolding->simple==0 && p->caseFolding->full[0]==0) { | |
373 | excWord|=UCASE_EXC_CONDITIONAL_FOLD; | |
374 | p->caseFolding=NULL; | |
375 | } | |
376 | ||
377 | /* | |
378 | * Note: | |
379 | * UCD stores no simple mappings when they are the same as the code point itself. | |
380 | * SpecialCasing and CaseFolding do store simple mappings even if they are | |
381 | * the same as the code point itself. | |
382 | * Comparisons between simple regular mappings and simple special/folding | |
383 | * mappings need to compensate for the difference by comparing with the | |
384 | * original code point if a simple UCD mapping is missing (0). | |
385 | */ | |
386 | ||
387 | /* remove redundant data */ | |
388 | if(p->specialCasing!=NULL) { | |
389 | /* do not store full mappings if they are the same as the simple ones */ | |
390 | if(fullMappingEqualsSimple(p->specialCasing->lowerCase, p->lowerCase, p->code)) { | |
391 | p->specialCasing->lowerCase[0]=0; | |
392 | } | |
393 | if(fullMappingEqualsSimple(p->specialCasing->upperCase, p->upperCase, p->code)) { | |
394 | p->specialCasing->upperCase[0]=0; | |
395 | } | |
396 | if(fullMappingEqualsSimple(p->specialCasing->titleCase, p->titleCase, p->code)) { | |
397 | p->specialCasing->titleCase[0]=0; | |
398 | } | |
399 | } | |
400 | if( p->caseFolding!=NULL && | |
401 | fullMappingEqualsSimple(p->caseFolding->full, p->caseFolding->simple, p->code) | |
402 | ) { | |
403 | p->caseFolding->full[0]=0; | |
404 | } | |
405 | ||
406 | /* write the optional slots */ | |
407 | slotBits=0; | |
408 | count=0; | |
409 | ||
410 | if(p->lowerCase!=0) { | |
411 | slots[count]=(uint32_t)p->lowerCase; | |
412 | slotBits|=slots[count]; | |
413 | ++count; | |
414 | excWord|=U_MASK(UCASE_EXC_LOWER); | |
415 | } | |
416 | if( p->caseFolding!=NULL && | |
417 | p->caseFolding->simple!=0 && | |
418 | (p->lowerCase!=0 ? | |
419 | p->caseFolding->simple!=p->lowerCase : | |
420 | p->caseFolding->simple!=p->code) | |
421 | ) { | |
422 | slots[count]=(uint32_t)p->caseFolding->simple; | |
423 | slotBits|=slots[count]; | |
424 | ++count; | |
425 | excWord|=U_MASK(UCASE_EXC_FOLD); | |
426 | } | |
427 | if(p->upperCase!=0) { | |
428 | slots[count]=(uint32_t)p->upperCase; | |
429 | slotBits|=slots[count]; | |
430 | ++count; | |
431 | excWord|=U_MASK(UCASE_EXC_UPPER); | |
432 | } | |
433 | if(p->upperCase!=p->titleCase) { | |
434 | if(p->titleCase!=0) { | |
435 | slots[count]=(uint32_t)p->titleCase; | |
436 | } else { | |
437 | slots[count]=(uint32_t)p->code; | |
438 | } | |
439 | slotBits|=slots[count]; | |
440 | ++count; | |
441 | excWord|=U_MASK(UCASE_EXC_TITLE); | |
442 | } | |
443 | ||
444 | /* lengths of full case mapping strings, stored in the last slot */ | |
445 | fullLengths=0; | |
446 | if(p->specialCasing!=NULL) { | |
447 | fullLengths=p->specialCasing->lowerCase[0]; | |
448 | fullLengths|=p->specialCasing->upperCase[0]<<8; | |
449 | fullLengths|=p->specialCasing->titleCase[0]<<12; | |
450 | } | |
451 | if(p->caseFolding!=NULL) { | |
452 | fullLengths|=p->caseFolding->full[0]<<4; | |
453 | } | |
454 | if(fullLengths!=0) { | |
455 | slots[count]=fullLengths; | |
456 | slotBits|=slots[count]; | |
457 | ++count; | |
458 | excWord|=U_MASK(UCASE_EXC_FULL_MAPPINGS); | |
459 | } | |
460 | ||
461 | /* write slots */ | |
462 | doubleSlots=(UBool)(slotBits>0xffff); | |
463 | if(!doubleSlots) { | |
464 | for(i=0; i<count; ++i) { | |
465 | exceptions[excTop++]=(uint16_t)slots[i]; | |
466 | } | |
467 | } else { | |
468 | excWord|=UCASE_EXC_DOUBLE_SLOTS; | |
469 | for(i=0; i<count; ++i) { | |
470 | exceptions[excTop++]=(uint16_t)(slots[i]>>16); | |
471 | exceptions[excTop++]=(uint16_t)slots[i]; | |
472 | } | |
473 | } | |
474 | ||
475 | /* write the full case mapping strings */ | |
476 | if(p->specialCasing!=NULL) { | |
477 | length=(uint16_t)p->specialCasing->lowerCase[0]; | |
478 | u_memcpy((UChar *)exceptions+excTop, p->specialCasing->lowerCase+1, length); | |
479 | excTop+=length; | |
480 | } | |
481 | if(p->caseFolding!=NULL) { | |
482 | length=(uint16_t)p->caseFolding->full[0]; | |
483 | u_memcpy((UChar *)exceptions+excTop, p->caseFolding->full+1, length); | |
484 | excTop+=length; | |
485 | } | |
486 | if(p->specialCasing!=NULL) { | |
487 | length=(uint16_t)p->specialCasing->upperCase[0]; | |
488 | u_memcpy((UChar *)exceptions+excTop, p->specialCasing->upperCase+1, length); | |
489 | excTop+=length; | |
490 | ||
491 | length=(uint16_t)p->specialCasing->titleCase[0]; | |
492 | u_memcpy((UChar *)exceptions+excTop, p->specialCasing->titleCase+1, length); | |
493 | excTop+=length; | |
494 | } | |
495 | ||
496 | exceptionsTop=excTop; | |
497 | ||
498 | /* write the main exceptions word */ | |
499 | exceptions[excIndex]=excWord; | |
500 | ||
501 | return excIndex; | |
502 | } | |
503 | ||
504 | extern void | |
505 | makeExceptions() { | |
506 | uint32_t *row; | |
507 | uint32_t value; | |
508 | int32_t i; | |
509 | uint16_t excIndex; | |
510 | ||
511 | i=0; | |
512 | while((row=upvec_getRow(pv, i, NULL, NULL))!=NULL) { | |
513 | value=*row; | |
514 | if(value&UCASE_EXCEPTION) { | |
515 | excIndex=makeException(value, excProps+(value>>UGENCASE_EXC_SHIFT)); | |
516 | *row=(value&~(UGENCASE_EXC_MASK|UCASE_EXC_MASK))|(excIndex<<UCASE_EXC_SHIFT); | |
517 | } | |
518 | ++i; | |
519 | } | |
520 | } | |
521 | ||
522 | /* generate output data ----------------------------------------------------- */ | |
523 | ||
524 | extern void | |
525 | generateData(const char *dataDir) { | |
526 | static int32_t indexes[UCASE_IX_TOP]={ | |
527 | UCASE_IX_TOP | |
528 | }; | |
529 | static uint8_t trieBlock[40000]; | |
530 | ||
531 | const uint32_t *row; | |
532 | UChar32 start, limit; | |
533 | int32_t i; | |
534 | ||
535 | UNewDataMemory *pData; | |
536 | UNewTrie *pTrie; | |
537 | UErrorCode errorCode=U_ZERO_ERROR; | |
538 | int32_t trieSize; | |
539 | long dataLength; | |
540 | ||
541 | pTrie=utrie_open(NULL, NULL, 20000, 0, 0, TRUE); | |
542 | if(pTrie==NULL) { | |
543 | fprintf(stderr, "gencase error: unable to create a UNewTrie\n"); | |
544 | exit(U_MEMORY_ALLOCATION_ERROR); | |
545 | } | |
546 | ||
547 | for(i=0; (row=upvec_getRow(pv, i, &start, &limit))!=NULL; ++i) { | |
548 | if(!utrie_setRange32(pTrie, start, limit, *row, TRUE)) { | |
549 | fprintf(stderr, "gencase error: unable to set trie value (overflow)\n"); | |
550 | exit(U_BUFFER_OVERFLOW_ERROR); | |
551 | } | |
552 | } | |
553 | ||
554 | trieSize=utrie_serialize(pTrie, trieBlock, sizeof(trieBlock), NULL, TRUE, &errorCode); | |
555 | if(U_FAILURE(errorCode)) { | |
556 | fprintf(stderr, "error: utrie_serialize failed: %s (length %ld)\n", u_errorName(errorCode), (long)trieSize); | |
557 | exit(errorCode); | |
558 | } | |
559 | ||
560 | indexes[UCASE_IX_EXC_LENGTH]=exceptionsTop; | |
561 | indexes[UCASE_IX_TRIE_SIZE]=trieSize; | |
562 | indexes[UCASE_IX_LENGTH]=(int32_t)sizeof(indexes)+trieSize+2*exceptionsTop; | |
563 | ||
564 | indexes[UCASE_IX_MAX_FULL_LENGTH]=maxFullLength; | |
565 | ||
566 | if(beVerbose) { | |
567 | printf("trie size in bytes: %5d\n", (int)trieSize); | |
568 | printf("number of code points with exceptions: %5d\n", exceptionsCount); | |
569 | printf("size in bytes of exceptions: %5d\n", 2*exceptionsTop); | |
570 | printf("data size: %5d\n", (int)indexes[UCASE_IX_LENGTH]); | |
571 | } | |
572 | ||
573 | /* write the data */ | |
574 | pData=udata_create(dataDir, UCASE_DATA_TYPE, UCASE_DATA_NAME, &dataInfo, | |
575 | haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode); | |
576 | if(U_FAILURE(errorCode)) { | |
577 | fprintf(stderr, "gencase: unable to create data memory, %s\n", u_errorName(errorCode)); | |
578 | exit(errorCode); | |
579 | } | |
580 | ||
581 | udata_writeBlock(pData, indexes, sizeof(indexes)); | |
582 | udata_writeBlock(pData, trieBlock, trieSize); | |
583 | udata_writeBlock(pData, exceptions, 2*exceptionsTop); | |
584 | ||
585 | /* finish up */ | |
586 | dataLength=udata_finish(pData, &errorCode); | |
587 | if(U_FAILURE(errorCode)) { | |
588 | fprintf(stderr, "gencase: error %d writing the output file\n", errorCode); | |
589 | exit(errorCode); | |
590 | } | |
591 | ||
592 | if(dataLength!=indexes[UCASE_IX_LENGTH]) { | |
593 | fprintf(stderr, "gencase: data length %ld != calculated size %d\n", | |
594 | dataLength, (int)indexes[UCASE_IX_LENGTH]); | |
595 | exit(U_INTERNAL_PROGRAM_ERROR); | |
596 | } | |
597 | ||
598 | utrie_close(pTrie); | |
599 | } | |
600 | ||
601 | /* | |
602 | * Hey, Emacs, please set the following: | |
603 | * | |
604 | * Local Variables: | |
605 | * indent-tabs-mode: nil | |
606 | * End: | |
607 | * | |
608 | */ |