]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/gensprep/store.c
ICU-461.13.tar.gz
[apple/icu.git] / icuSources / tools / gensprep / store.c
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1999-2009, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: store.c
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003-02-06
14 * created by: Ram Viswanadha
15 *
16 */
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include "unicode/utypes.h"
21 #include "cmemory.h"
22 #include "cstring.h"
23 #include "filestrm.h"
24 #include "unicode/udata.h"
25 #include "utrie.h"
26 #include "unewdata.h"
27 #include "gensprep.h"
28 #include "uhash.h"
29
30
31 #define DO_DEBUG_OUT 0
32
33
34 /*
35 * StringPrep profile file format ------------------------------------
36 *
37 * The file format prepared and written here contains a 16-bit trie and a mapping table.
38 *
39 * Before the data contents described below, there are the headers required by
40 * the udata API for loading ICU data. Especially, a UDataInfo structure
41 * precedes the actual data. It contains platform properties values and the
42 * file format version.
43 *
44 * The following is a description of format version 2.
45 *
46 * Data contents:
47 *
48 * The contents is a parsed, binary form of RFC3454 and possibly
49 * NormalizationCorrections.txt depending on the options specified on the profile.
50 *
51 * Any Unicode code point from 0 to 0x10ffff can be looked up to get
52 * the trie-word, if any, for that code point. This means that the input
53 * to the lookup are 21-bit unsigned integers, with not all of the
54 * 21-bit range used.
55 *
56 * *.spp files customarily begin with a UDataInfo structure, see udata.h and .c.
57 * After that there are the following structures:
58 *
59 * int32_t indexes[_SPREP_INDEX_TOP]; -- _SPREP_INDEX_TOP=16, see enum in sprpimpl.h file
60 *
61 * UTrie stringPrepTrie; -- size in bytes=indexes[_SPREP_INDEX_TRIE_SIZE]
62 *
63 * uint16_t mappingTable[]; -- Contains the sequecence of code units that the code point maps to
64 * size in bytes = indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]
65 *
66 * The indexes array contains the following values:
67 * indexes[_SPREP_INDEX_TRIE_SIZE] -- The size of the StringPrep trie in bytes
68 * indexes[_SPREP_INDEX_MAPPING_DATA_SIZE] -- The size of the mappingTable in bytes
69 * indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION] -- The index of Unicode version of last entry in NormalizationCorrections.txt
70 * indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] -- The starting index of 1 UChar mapping index in the mapping table
71 * indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] -- The starting index of 2 UChars mapping index in the mapping table
72 * indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] -- The starting index of 3 UChars mapping index in the mapping table
73 * indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] -- The starting index of 4 UChars mapping index in the mapping table
74 * indexes[_SPREP_OPTIONS] -- Bit set of options to turn on in the profile, e.g: USPREP_NORMALIZATION_ON, USPREP_CHECK_BIDI_ON
75 *
76 *
77 * StringPrep Trie :
78 *
79 * The StringPrep tries is a 16-bit trie that contains data for the profile.
80 * Each code point is associated with a value (trie-word) in the trie.
81 *
82 * - structure of data words from the trie
83 *
84 * i) A value greater than or equal to _SPREP_TYPE_THRESHOLD (0xFFF0)
85 * represents the type associated with the code point
86 * if(trieWord >= _SPREP_TYPE_THRESHOLD){
87 * type = trieWord - 0xFFF0;
88 * }
89 * The type can be :
90 * USPREP_UNASSIGNED
91 * USPREP_PROHIBITED
92 * USPREP_DELETE
93 *
94 * ii) A value less than _SPREP_TYPE_THRESHOLD means the type is USPREP_MAP and
95 * contains distribution described below
96 *
97 * 0 - ON : The code point is prohibited (USPREP_PROHIBITED). This is to allow for codepoint that are both prohibited and mapped.
98 * 1 - ON : The value in the next 14 bits is an index into the mapping table
99 * OFF: The value in the next 14 bits is an delta value from the code point
100 * 2..15 - Contains data as described by bit 1. If all bits are set
101 * (value = _SPREP_MAX_INDEX_VALUE) then the type is USPREP_DELETE
102 *
103 *
104 * Mapping Table:
105 * The data in mapping table is sorted according to the length of the mapping sequence.
106 * If the type of the code point is USPREP_MAP and value in trie word is an index, the index
107 * is compared with start indexes of sequence length start to figure out the length according to
108 * the following algorithm:
109 *
110 * if( index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
111 * index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
112 * length = 1;
113 * }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
114 * index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
115 * length = 2;
116 * }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
117 * index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
118 * length = 3;
119 * }else{
120 * // The first position in the mapping table contains the length
121 * // of the sequence
122 * length = mappingTable[index++];
123 *
124 * }
125 *
126 */
127
128 /* file data ---------------------------------------------------------------- */
129 /* indexes[] value names */
130
131 #if UCONFIG_NO_IDNA
132
133 /* dummy UDataInfo cf. udata.h */
134 static UDataInfo dataInfo = {
135 sizeof(UDataInfo),
136 0,
137
138 U_IS_BIG_ENDIAN,
139 U_CHARSET_FAMILY,
140 U_SIZEOF_UCHAR,
141 0,
142
143 { 0, 0, 0, 0 }, /* dummy dataFormat */
144 { 0, 0, 0, 0 }, /* dummy formatVersion */
145 { 0, 0, 0, 0 } /* dummy dataVersion */
146 };
147
148 #else
149
150 static int32_t indexes[_SPREP_INDEX_TOP]={ 0 };
151
152 static uint16_t* mappingData= NULL;
153 static int32_t mappingDataCapacity = 0; /* we skip the first index in mapping data */
154 static int16_t currentIndex = 0; /* the current index into the data trie */
155 static int32_t maxLength = 0; /* maximum length of mapping string */
156
157
158 /* UDataInfo cf. udata.h */
159 static UDataInfo dataInfo={
160 sizeof(UDataInfo),
161 0,
162
163 U_IS_BIG_ENDIAN,
164 U_CHARSET_FAMILY,
165 U_SIZEOF_UCHAR,
166 0,
167
168 { 0x53, 0x50, 0x52, 0x50 }, /* dataFormat="SPRP" */
169 { 3, 2, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */
170 { 3, 2, 0, 0 } /* dataVersion (Unicode version) */
171 };
172 void
173 setUnicodeVersion(const char *v) {
174 UVersionInfo version;
175 u_versionFromString(version, v);
176 uprv_memcpy(dataInfo.dataVersion, version, 4);
177 }
178
179 void
180 setUnicodeVersionNC(UVersionInfo version){
181 uint32_t univer = version[0] << 24;
182 univer += version[1] << 16;
183 univer += version[2] << 8;
184 univer += version[3];
185 indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION] = univer;
186 }
187 static UNewTrie *sprepTrie;
188
189 #define MAX_DATA_LENGTH 11500
190
191
192 #define SPREP_DELTA_RANGE_POSITIVE_LIMIT 8191
193 #define SPREP_DELTA_RANGE_NEGATIVE_LIMIT -8192
194
195
196 extern void
197 init() {
198
199 sprepTrie = (UNewTrie *)uprv_malloc(sizeof(UNewTrie));
200 uprv_memset(sprepTrie, 0, sizeof(UNewTrie));
201
202 /* initialize the two tries */
203 if(NULL==utrie_open(sprepTrie, NULL, MAX_DATA_LENGTH, 0, 0, FALSE)) {
204 fprintf(stderr, "error: failed to initialize tries\n");
205 exit(U_MEMORY_ALLOCATION_ERROR);
206 }
207 }
208
209 static UHashtable* hashTable = NULL;
210
211
212 typedef struct ValueStruct {
213 UChar* mapping;
214 int16_t length;
215 UStringPrepType type;
216 } ValueStruct;
217
218 /* Callback for deleting the value from the hashtable */
219 static void U_CALLCONV valueDeleter(void* obj){
220 ValueStruct* value = (ValueStruct*) obj;
221 uprv_free(value->mapping);
222 uprv_free(value);
223 }
224
225 /* Callback for hashing the entry */
226 static int32_t U_CALLCONV hashEntry(const UHashTok parm) {
227 return parm.integer;
228 }
229
230 /* Callback for comparing two entries */
231 static UBool U_CALLCONV compareEntries(const UHashTok p1, const UHashTok p2) {
232 return (UBool)(p1.integer != p2.integer);
233 }
234
235
236 static void
237 storeMappingData(){
238
239 int32_t pos = -1;
240 const UHashElement* element = NULL;
241 ValueStruct* value = NULL;
242 int32_t codepoint = 0;
243 int32_t elementCount = 0;
244 int32_t writtenElementCount = 0;
245 int32_t mappingLength = 1; /* minimum mapping length */
246 int32_t oldMappingLength = 0;
247 uint16_t trieWord =0;
248 int32_t limitIndex = 0;
249
250 if (hashTable == NULL) {
251 return;
252 }
253 elementCount = uhash_count(hashTable);
254
255 /*initialize the mapping data */
256 mappingData = (uint16_t*) uprv_malloc(U_SIZEOF_UCHAR * (mappingDataCapacity));
257
258 uprv_memset(mappingData,0,U_SIZEOF_UCHAR * mappingDataCapacity);
259
260 while(writtenElementCount < elementCount){
261
262 while( (element = uhash_nextElement(hashTable, &pos))!=NULL){
263
264 codepoint = element->key.integer;
265 value = (ValueStruct*)element->value.pointer;
266
267 /* store the start of indexes */
268 if(oldMappingLength != mappingLength){
269 /* Assume that index[] is used according to the enums defined */
270 if(oldMappingLength <=_SPREP_MAX_INDEX_TOP_LENGTH){
271 indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex;
272 }
273 if(oldMappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH &&
274 mappingLength == _SPREP_MAX_INDEX_TOP_LENGTH +1){
275
276 limitIndex = currentIndex;
277
278 }
279 oldMappingLength = mappingLength;
280 }
281
282 if(value->length == mappingLength){
283 uint32_t savedTrieWord = 0;
284 trieWord = currentIndex << 2;
285 /* turn on the 2nd bit to signal that the following bits contain an index */
286 trieWord += 0x02;
287
288 if(trieWord > _SPREP_TYPE_THRESHOLD){
289 fprintf(stderr,"trieWord cannot contain value greater than 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
290 exit(U_ILLEGAL_CHAR_FOUND);
291 }
292 /* figure out if the code point has type already stored */
293 savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
294 if(savedTrieWord!=0){
295 if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
296 /* turn on the first bit in trie word */
297 trieWord += 0x01;
298 }else{
299 /*
300 * the codepoint has value something other than prohibited
301 * and a mapping .. error!
302 */
303 fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
304 exit(U_ILLEGAL_ARGUMENT_ERROR);
305 }
306 }
307
308 /* now set the value in the trie */
309 if(!utrie_set32(sprepTrie,codepoint,trieWord)){
310 fprintf(stderr,"Could not set the value for code point.\n");
311 exit(U_ILLEGAL_ARGUMENT_ERROR);
312 }
313
314 /* written the trie word for the codepoint... increment the count*/
315 writtenElementCount++;
316
317 /* sanity check are we exceeding the max number allowed */
318 if(currentIndex+value->length+1 > _SPREP_MAX_INDEX_VALUE){
319 fprintf(stderr, "Too many entries in the mapping table %i. Maximum allowed is %i\n", currentIndex+value->length, _SPREP_MAX_INDEX_VALUE);
320 exit(U_INDEX_OUTOFBOUNDS_ERROR);
321 }
322
323 /* copy the mapping data */
324 if(currentIndex+value->length+1 <= mappingDataCapacity){
325 /* write the length */
326 if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
327 /* the cast here is safe since we donot expect the length to be > 65535 */
328 mappingData[currentIndex++] = (uint16_t) mappingLength;
329 }
330 /* copy the contents to mappindData array */
331 uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR);
332 currentIndex += value->length;
333
334 }else{
335 /* realloc */
336 UChar* newMappingData = (uint16_t*) uprv_malloc(U_SIZEOF_UCHAR * mappingDataCapacity*2);
337 if(newMappingData == NULL){
338 fprintf(stderr, "Could not realloc the mapping data!\n");
339 exit(U_MEMORY_ALLOCATION_ERROR);
340 }
341 uprv_memmove(newMappingData, mappingData, U_SIZEOF_UCHAR * mappingDataCapacity);
342 mappingDataCapacity *= 2;
343 uprv_free(mappingData);
344 mappingData = newMappingData;
345 /* write the length */
346 if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
347 /* the cast here is safe since we donot expect the length to be > 65535 */
348 mappingData[currentIndex++] = (uint16_t) mappingLength;
349 }
350 /* continue copying */
351 uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR);
352 currentIndex += value->length;
353 }
354
355 }
356 }
357 mappingLength++;
358 pos = -1;
359 }
360 /* set the last length for range check */
361 if(mappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH){
362 indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex+1;
363 }else{
364 indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] = limitIndex;
365 }
366
367 }
368
369 extern void setOptions(int32_t options){
370 indexes[_SPREP_OPTIONS] = options;
371 }
372 extern void
373 storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length,
374 UStringPrepType type, UErrorCode* status){
375
376
377 UChar* map = NULL;
378 int16_t adjustedLen=0, i;
379 uint16_t trieWord = 0;
380 ValueStruct *value = NULL;
381 uint32_t savedTrieWord = 0;
382
383 /* initialize the hashtable */
384 if(hashTable==NULL){
385 hashTable = uhash_open(hashEntry, compareEntries, NULL, status);
386 uhash_setValueDeleter(hashTable, valueDeleter);
387 }
388
389 /* figure out if the code point has type already stored */
390 savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
391 if(savedTrieWord!=0){
392 if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
393 /* turn on the first bit in trie word */
394 trieWord += 0x01;
395 }else{
396 /*
397 * the codepoint has value something other than prohibited
398 * and a mapping .. error!
399 */
400 fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
401 exit(U_ILLEGAL_ARGUMENT_ERROR);
402 }
403 }
404
405 /* figure out the real length */
406 for(i=0; i<length; i++){
407 if(mapping[i] > 0xFFFF){
408 adjustedLen +=2;
409 }else{
410 adjustedLen++;
411 }
412 }
413
414 if(adjustedLen == 0){
415 trieWord = (uint16_t)(_SPREP_MAX_INDEX_VALUE << 2);
416 /* make sure that the value of trieWord is less than the threshold */
417 if(trieWord < _SPREP_TYPE_THRESHOLD){
418 /* now set the value in the trie */
419 if(!utrie_set32(sprepTrie,codepoint,trieWord)){
420 fprintf(stderr,"Could not set the value for code point.\n");
421 exit(U_ILLEGAL_ARGUMENT_ERROR);
422 }
423 /* value is set so just return */
424 return;
425 }else{
426 fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
427 exit(U_ILLEGAL_CHAR_FOUND);
428 }
429 }
430
431 if(adjustedLen == 1){
432 /* calculate the delta */
433 int16_t delta = (int16_t)((int32_t)codepoint - (int16_t) mapping[0]);
434 if(delta >= SPREP_DELTA_RANGE_NEGATIVE_LIMIT && delta <= SPREP_DELTA_RANGE_POSITIVE_LIMIT){
435
436 trieWord = delta << 2;
437
438
439 /* make sure that the second bit is OFF */
440 if((trieWord & 0x02) != 0 ){
441 fprintf(stderr,"The second bit in the trie word is not zero while storing a delta.\n");
442 exit(U_INTERNAL_PROGRAM_ERROR);
443 }
444 /* make sure that the value of trieWord is less than the threshold */
445 if(trieWord < _SPREP_TYPE_THRESHOLD){
446 /* now set the value in the trie */
447 if(!utrie_set32(sprepTrie,codepoint,trieWord)){
448 fprintf(stderr,"Could not set the value for code point.\n");
449 exit(U_ILLEGAL_ARGUMENT_ERROR);
450 }
451 /* value is set so just return */
452 return;
453 }
454 }
455 /*
456 * if the delta is not in the given range or if the trieWord is larger than the threshold
457 * just fall through for storing the mapping in the mapping table
458 */
459 }
460
461 map = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (adjustedLen+1));
462 uprv_memset(map,0,U_SIZEOF_UCHAR * (adjustedLen+1));
463
464 i=0;
465
466 while(i<length){
467 if(mapping[i] <= 0xFFFF){
468 map[i] = (uint16_t)mapping[i];
469 }else{
470 map[i] = UTF16_LEAD(mapping[i]);
471 map[i+1] = UTF16_TRAIL(mapping[i]);
472 }
473 i++;
474 }
475
476 value = (ValueStruct*) uprv_malloc(sizeof(ValueStruct));
477 value->mapping = map;
478 value->type = type;
479 value->length = adjustedLen;
480 if(value->length > _SPREP_MAX_INDEX_TOP_LENGTH){
481 mappingDataCapacity++;
482 }
483 if(maxLength < value->length){
484 maxLength = value->length;
485 }
486 uhash_iput(hashTable,codepoint,value,status);
487 mappingDataCapacity += adjustedLen;
488
489 if(U_FAILURE(*status)){
490 fprintf(stderr, "Failed to put entries into the hastable. Error: %s\n", u_errorName(*status));
491 exit(*status);
492 }
493 }
494
495
496 extern void
497 storeRange(uint32_t start, uint32_t end, UStringPrepType type,UErrorCode* status){
498 uint16_t trieWord = 0;
499
500 if((int)(_SPREP_TYPE_THRESHOLD + type) > 0xFFFF){
501 fprintf(stderr,"trieWord cannot contain value greater than 0xFFFF.\n");
502 exit(U_ILLEGAL_CHAR_FOUND);
503 }
504 trieWord = (_SPREP_TYPE_THRESHOLD + type); /* the top 4 bits contain the value */
505 if(start == end){
506 uint32_t savedTrieWord = utrie_get32(sprepTrie, start, NULL);
507 if(savedTrieWord>0){
508 if(savedTrieWord < _SPREP_TYPE_THRESHOLD && type == USPREP_PROHIBITED){
509 /*
510 * A mapping is stored in the trie word
511 * and the only other possible type that a
512 * code point can have is USPREP_PROHIBITED
513 *
514 */
515
516 /* turn on the 0th bit in the savedTrieWord */
517 savedTrieWord += 0x01;
518
519 /* the downcast is safe since we only save 16 bit values */
520 trieWord = (uint16_t)savedTrieWord;
521
522 /* make sure that the value of trieWord is less than the threshold */
523 if(trieWord < _SPREP_TYPE_THRESHOLD){
524 /* now set the value in the trie */
525 if(!utrie_set32(sprepTrie,start,trieWord)){
526 fprintf(stderr,"Could not set the value for code point.\n");
527 exit(U_ILLEGAL_ARGUMENT_ERROR);
528 }
529 /* value is set so just return */
530 return;
531 }else{
532 fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
533 exit(U_ILLEGAL_CHAR_FOUND);
534 }
535
536 }else if(savedTrieWord != trieWord){
537 fprintf(stderr,"Value for codepoint \\U%08X already set!.\n", (int)start);
538 exit(U_ILLEGAL_ARGUMENT_ERROR);
539 }
540 /* if savedTrieWord == trieWord .. fall through and set the value */
541 }
542 if(!utrie_set32(sprepTrie,start,trieWord)){
543 fprintf(stderr,"Could not set the value for code point \\U%08X.\n", (int)start);
544 exit(U_ILLEGAL_ARGUMENT_ERROR);
545 }
546 }else{
547 if(!utrie_setRange32(sprepTrie, start, end+1, trieWord, FALSE)){
548 fprintf(stderr,"Value for certain codepoint already set.\n");
549 exit(U_ILLEGAL_CHAR_FOUND);
550 }
551 }
552
553 }
554
555 /* folding value: just store the offset (16 bits) if there is any non-0 entry */
556 static uint32_t U_CALLCONV
557 getFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) {
558 uint32_t foldedValue, value;
559 UChar32 limit=0;
560 UBool inBlockZero;
561
562 foldedValue=0;
563
564 limit=start+0x400;
565 while(start<limit) {
566 value=utrie_get32(trie, start, &inBlockZero);
567 if(inBlockZero) {
568 start+=UTRIE_DATA_BLOCK_LENGTH;
569 } else if(value!=0) {
570 return (uint32_t)offset;
571 } else {
572 ++start;
573 }
574 }
575 return 0;
576
577 }
578
579 #endif /* #if !UCONFIG_NO_IDNA */
580
581 extern void
582 generateData(const char *dataDir, const char* bundleName) {
583 static uint8_t sprepTrieBlock[100000];
584
585 UNewDataMemory *pData;
586 UErrorCode errorCode=U_ZERO_ERROR;
587 int32_t size, dataLength;
588 char* fileName = (char*) uprv_malloc(uprv_strlen(bundleName) +100);
589
590 #if UCONFIG_NO_IDNA
591
592 size=0;
593
594 #else
595
596 int32_t sprepTrieSize;
597
598 /* sort and add mapping data */
599 storeMappingData();
600
601 sprepTrieSize=utrie_serialize(sprepTrie, sprepTrieBlock, sizeof(sprepTrieBlock), getFoldedValue, TRUE, &errorCode);
602 if(U_FAILURE(errorCode)) {
603 fprintf(stderr, "error: utrie_serialize(sprep trie) failed, %s\n", u_errorName(errorCode));
604 exit(errorCode);
605 }
606
607 size = sprepTrieSize + mappingDataCapacity*U_SIZEOF_UCHAR + sizeof(indexes);
608 if(beVerbose) {
609 printf("size of sprep trie %5u bytes\n", (int)sprepTrieSize);
610 printf("size of " U_ICUDATA_NAME "_%s." DATA_TYPE " contents: %ld bytes\n", bundleName,(long)size);
611 printf("size of mapping data array %5u bytes\n",(int)mappingDataCapacity * U_SIZEOF_UCHAR);
612 printf("Number of code units in mappingData (currentIndex) are: %i \n", currentIndex);
613 printf("Maximum length of the mapping string is : %i \n", (int)maxLength);
614 }
615
616 #endif
617
618 fileName[0]=0;
619 uprv_strcat(fileName,bundleName);
620 /* write the data */
621 pData=udata_create(dataDir, DATA_TYPE, fileName, &dataInfo,
622 haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
623 if(U_FAILURE(errorCode)) {
624 fprintf(stderr, "gensprep: unable to create the output file, error %d\n", errorCode);
625 exit(errorCode);
626 }
627
628 #if !UCONFIG_NO_IDNA
629
630 indexes[_SPREP_INDEX_TRIE_SIZE]=sprepTrieSize;
631 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]=mappingDataCapacity*U_SIZEOF_UCHAR;
632
633 udata_writeBlock(pData, indexes, sizeof(indexes));
634 udata_writeBlock(pData, sprepTrieBlock, sprepTrieSize);
635 udata_writeBlock(pData, mappingData, indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]);
636
637
638 #endif
639
640 /* finish up */
641 dataLength=udata_finish(pData, &errorCode);
642 if(U_FAILURE(errorCode)) {
643 fprintf(stderr, "gensprep: error %d writing the output file\n", errorCode);
644 exit(errorCode);
645 }
646
647 if(dataLength!=size) {
648 fprintf(stderr, "gensprep error: data length %ld != calculated size %ld\n",
649 (long)dataLength, (long)size);
650 exit(U_INTERNAL_PROGRAM_ERROR);
651 }
652
653 #if !UCONFIG_NO_IDNA
654 /* done with writing the data .. close the hashtable */
655 if (hashTable != NULL) {
656 uhash_close(hashTable);
657 }
658 #endif
659 }
660
661 #if !UCONFIG_NO_IDNA
662
663 extern void
664 cleanUpData(void) {
665
666 utrie_close(sprepTrie);
667 uprv_free(sprepTrie);
668 }
669
670 #endif /* #if !UCONFIG_NO_IDNA */
671
672 /*
673 * Hey, Emacs, please set the following:
674 *
675 * Local Variables:
676 * indent-tabs-mode: nil
677 * End:
678 *
679 */