]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/gensprep/store.c
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / tools / gensprep / store.c
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1999-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: store.c
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003-02-06
14 * created by: Ram Viswanadha
15 *
16 */
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include "unicode/utypes.h"
21 #include "cmemory.h"
22 #include "cstring.h"
23 #include "filestrm.h"
24 #include "unicode/udata.h"
25 #include "utrie.h"
26 #include "unewdata.h"
27 #include "gensprep.h"
28 #include "uhash.h"
29
30
31 #ifdef WIN32
32 # pragma warning(disable: 4100)
33 #endif
34
35 #define DO_DEBUG_OUT 0
36
37
38 /*
39 * StringPrep profile file format ------------------------------------
40 *
41 * The file format prepared and written here contains a 16-bit trie and a mapping table.
42 *
43 * Before the data contents described below, there are the headers required by
44 * the udata API for loading ICU data. Especially, a UDataInfo structure
45 * precedes the actual data. It contains platform properties values and the
46 * file format version.
47 *
48 * The following is a description of format version 2.
49 *
50 * Data contents:
51 *
52 * The contents is a parsed, binary form of RFC3454 and possibly
53 * NormalizationCorrections.txt depending on the options specified on the profile.
54 *
55 * Any Unicode code point from 0 to 0x10ffff can be looked up to get
56 * the trie-word, if any, for that code point. This means that the input
57 * to the lookup are 21-bit unsigned integers, with not all of the
58 * 21-bit range used.
59 *
60 * *.spp files customarily begin with a UDataInfo structure, see udata.h and .c.
61 * After that there are the following structures:
62 *
63 * int32_t indexes[_SPREP_INDEX_TOP]; -- _SPREP_INDEX_TOP=16, see enum in sprpimpl.h file
64 *
65 * UTrie stringPrepTrie; -- size in bytes=indexes[_SPREP_INDEX_TRIE_SIZE]
66 *
67 * uint16_t mappingTable[]; -- Contains the sequecence of code units that the code point maps to
68 * size in bytes = indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]
69 *
70 * The indexes array contains the following values:
71 * indexes[_SPREP_INDEX_TRIE_SIZE] -- The size of the StringPrep trie in bytes
72 * indexes[_SPREP_INDEX_MAPPING_DATA_SIZE] -- The size of the mappingTable in bytes
73 * indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION] -- The index of Unicode version of last entry in NormalizationCorrections.txt
74 * indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] -- The starting index of 1 UChar mapping index in the mapping table
75 * indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] -- The starting index of 2 UChars mapping index in the mapping table
76 * indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] -- The starting index of 3 UChars mapping index in the mapping table
77 * indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] -- The starting index of 4 UChars mapping index in the mapping table
78 * indexes[_SPREP_OPTIONS] -- Bit set of options to turn on in the profile, e.g: USPREP_NORMALIZATION_ON, USPREP_CHECK_BIDI_ON
79 *
80 *
81 * StringPrep Trie :
82 *
83 * The StringPrep tries is a 16-bit trie that contains data for the profile.
84 * Each code point is associated with a value (trie-word) in the trie.
85 *
86 * - structure of data words from the trie
87 *
88 * i) A value greater than or equal to _SPREP_TYPE_THRESHOLD (0xFFF0)
89 * represents the type associated with the code point
90 * if(trieWord >= _SPREP_TYPE_THRESHOLD){
91 * type = trieWord - 0xFFF0;
92 * }
93 * The type can be :
94 * USPREP_UNASSIGNED
95 * USPREP_PROHIBITED
96 * USPREP_DELETE
97 *
98 * ii) A value less than _SPREP_TYPE_THRESHOLD means the type is USPREP_MAP and
99 * contains distribution described below
100 *
101 * 0 - ON : The code point is prohibited (USPREP_PROHIBITED). This is to allow for codepoint that are both prohibited and mapped.
102 * 1 - ON : The value in the next 14 bits is an index into the mapping table
103 * OFF: The value in the next 14 bits is an delta value from the code point
104 * 2..15 - Contains data as described by bit 1. If all bits are set
105 * (value = _SPREP_MAX_INDEX_VALUE) then the type is USPREP_DELETE
106 *
107 *
108 * Mapping Table:
109 * The data in mapping table is sorted according to the length of the mapping sequence.
110 * If the type of the code point is USPREP_MAP and value in trie word is an index, the index
111 * is compared with start indexes of sequence length start to figure out the length according to
112 * the following algorithm:
113 *
114 * if( index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
115 * index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
116 * length = 1;
117 * }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
118 * index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
119 * length = 2;
120 * }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
121 * index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
122 * length = 3;
123 * }else{
124 * // The first position in the mapping table contains the length
125 * // of the sequence
126 * length = mappingTable[index++];
127 *
128 * }
129 *
130 */
131
132 /* file data ---------------------------------------------------------------- */
133 /* indexes[] value names */
134
135 #if UCONFIG_NO_IDNA
136
137 /* dummy UDataInfo cf. udata.h */
138 static UDataInfo dataInfo = {
139 sizeof(UDataInfo),
140 0,
141
142 U_IS_BIG_ENDIAN,
143 U_CHARSET_FAMILY,
144 U_SIZEOF_UCHAR,
145 0,
146
147 { 0, 0, 0, 0 }, /* dummy dataFormat */
148 { 0, 0, 0, 0 }, /* dummy formatVersion */
149 { 0, 0, 0, 0 } /* dummy dataVersion */
150 };
151
152 #else
153
154 static int32_t indexes[_SPREP_INDEX_TOP]={ 0 };
155
156 static uint16_t* mappingData= NULL;
157 static int32_t mappingDataCapacity = 0; /* we skip the first index in mapping data */
158 static int16_t currentIndex = 0; /* the current index into the data trie */
159 static int32_t maxLength = 0; /* maximum length of mapping string */
160
161
162 /* UDataInfo cf. udata.h */
163 static UDataInfo dataInfo={
164 sizeof(UDataInfo),
165 0,
166
167 U_IS_BIG_ENDIAN,
168 U_CHARSET_FAMILY,
169 U_SIZEOF_UCHAR,
170 0,
171
172 { 0x53, 0x50, 0x52, 0x50 }, /* dataFormat="SPRP" */
173 { 3, 2, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */
174 { 3, 2, 0, 0 } /* dataVersion (Unicode version) */
175 };
176 void
177 setUnicodeVersion(const char *v) {
178 UVersionInfo version;
179 u_versionFromString(version, v);
180 uprv_memcpy(dataInfo.dataVersion, version, 4);
181 }
182
183 void
184 setUnicodeVersionNC(UVersionInfo version){
185 uint32_t univer = version[0] << 24;
186 univer += version[1] << 16;
187 univer += version[2] << 8;
188 univer += version[3];
189 indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION] = univer;
190 }
191 static UNewTrie *sprepTrie;
192
193 #define MAX_DATA_LENGTH 11500
194
195
196 #define SPREP_DELTA_RANGE_POSITIVE_LIMIT 8191
197 #define SPREP_DELTA_RANGE_NEGATIVE_LIMIT -8192
198
199
200 extern void
201 init() {
202
203 sprepTrie = (UNewTrie *)uprv_malloc(sizeof(UNewTrie));
204 uprv_memset(sprepTrie, 0, sizeof(UNewTrie));
205
206 /* initialize the two tries */
207 if(NULL==utrie_open(sprepTrie, NULL, MAX_DATA_LENGTH, 0, 0, FALSE)) {
208 fprintf(stderr, "error: failed to initialize tries\n");
209 exit(U_MEMORY_ALLOCATION_ERROR);
210 }
211 }
212
213 static UHashtable* hashTable = NULL;
214
215
216 typedef struct ValueStruct {
217 UChar* mapping;
218 int16_t length;
219 UStringPrepType type;
220 } ValueStruct;
221
222 /* Callback for deleting the value from the hashtable */
223 static void U_CALLCONV valueDeleter(void* obj){
224 ValueStruct* value = (ValueStruct*) obj;
225 uprv_free(value->mapping);
226 uprv_free(value);
227 }
228
229 /* Callback for hashing the entry */
230 static int32_t U_CALLCONV hashEntry(const UHashTok parm) {
231 return parm.integer;
232 }
233
234 /* Callback for comparing two entries */
235 static UBool U_CALLCONV compareEntries(const UHashTok p1, const UHashTok p2) {
236 return (UBool)(p1.integer != p2.integer);
237 }
238
239
240 static void
241 storeMappingData(){
242
243 int32_t pos = -1;
244 const UHashElement* element = NULL;
245 ValueStruct* value = NULL;
246 int32_t codepoint = 0;
247 int32_t elementCount = uhash_count(hashTable);
248 int32_t writtenElementCount = 0;
249 int32_t mappingLength = 1; /* minimum mapping length */
250 int32_t oldMappingLength = 0;
251 uint16_t trieWord =0;
252 int32_t limitIndex = 0;
253
254 /*initialize the mapping data */
255 mappingData = (uint16_t*) uprv_malloc(U_SIZEOF_UCHAR * (mappingDataCapacity));
256
257 uprv_memset(mappingData,0,U_SIZEOF_UCHAR * mappingDataCapacity);
258
259 while(writtenElementCount < elementCount){
260
261 while( (element = uhash_nextElement(hashTable, &pos))!=NULL){
262
263 codepoint = element->key.integer;
264 value = (ValueStruct*)element->value.pointer;
265
266 /* store the start of indexes */
267 if(oldMappingLength != mappingLength){
268 /* Assume that index[] is used according to the enums defined */
269 if(oldMappingLength <=_SPREP_MAX_INDEX_TOP_LENGTH){
270 indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex;
271 }
272 if(oldMappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH &&
273 mappingLength == _SPREP_MAX_INDEX_TOP_LENGTH +1){
274
275 limitIndex = currentIndex;
276
277 }
278 oldMappingLength = mappingLength;
279 }
280
281 if(value->length == mappingLength){
282 uint32_t savedTrieWord = 0;
283 trieWord = currentIndex << 2;
284 /* turn on the 2nd bit to signal that the following bits contain an index */
285 trieWord += 0x02;
286
287 if(trieWord > _SPREP_TYPE_THRESHOLD){
288 fprintf(stderr,"trieWord cannot contain value greater than 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
289 exit(U_ILLEGAL_CHAR_FOUND);
290 }
291 /* figure out if the code point has type already stored */
292 savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
293 if(savedTrieWord!=0){
294 if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
295 /* turn on the first bit in trie word */
296 trieWord += 0x01;
297 }else{
298 /*
299 * the codepoint has value something other than prohibited
300 * and a mapping .. error!
301 */
302 fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
303 exit(U_ILLEGAL_ARGUMENT_ERROR);
304 }
305 }
306
307 /* now set the value in the trie */
308 if(!utrie_set32(sprepTrie,codepoint,trieWord)){
309 fprintf(stderr,"Could not set the value for code point.\n");
310 exit(U_ILLEGAL_ARGUMENT_ERROR);
311 }
312
313 /* written the trie word for the codepoint... increment the count*/
314 writtenElementCount++;
315
316 /* sanity check are we exceeding the max number allowed */
317 if(currentIndex+value->length+1 > _SPREP_MAX_INDEX_VALUE){
318 fprintf(stderr, "Too many entries in the mapping table %i. Maximum allowed is %i\n", currentIndex+value->length, _SPREP_MAX_INDEX_VALUE);
319 exit(U_INDEX_OUTOFBOUNDS_ERROR);
320 }
321
322 /* copy the mapping data */
323 if(currentIndex+value->length+1 <= mappingDataCapacity){
324 /* write the length */
325 if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
326 /* the cast here is safe since we donot expect the length to be > 65535 */
327 mappingData[currentIndex++] = (uint16_t) mappingLength;
328 }
329 /* copy the contents to mappindData array */
330 uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR);
331 currentIndex += value->length;
332
333 }else{
334 /* realloc */
335 UChar* newMappingData = (uint16_t*) uprv_malloc(U_SIZEOF_UCHAR * mappingDataCapacity*2);
336 if(newMappingData == NULL){
337 fprintf(stderr, "Could not realloc the mapping data!\n");
338 exit(U_MEMORY_ALLOCATION_ERROR);
339 }
340 uprv_memmove(newMappingData, mappingData, U_SIZEOF_UCHAR * mappingDataCapacity);
341 mappingDataCapacity *= 2;
342 uprv_free(mappingData);
343 mappingData = newMappingData;
344 /* write the length */
345 if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
346 /* the cast here is safe since we donot expect the length to be > 65535 */
347 mappingData[currentIndex++] = (uint16_t) mappingLength;
348 }
349 /* continue copying */
350 uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR);
351 currentIndex += value->length;
352 }
353
354 }
355 }
356 mappingLength++;
357 pos = -1;
358 }
359 /* set the last length for range check */
360 if(mappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH){
361 indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex+1;
362 }else{
363 indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] = limitIndex;
364 }
365
366 }
367
368 extern void setOptions(int32_t options){
369 indexes[_SPREP_OPTIONS] = options;
370 }
371 extern void
372 storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length,
373 UStringPrepType type, UErrorCode* status){
374
375
376 UChar* map = NULL;
377 int16_t adjustedLen=0, i;
378 uint16_t trieWord = 0;
379 ValueStruct *value = NULL;
380 uint32_t savedTrieWord = 0;
381
382 /* initialize the hashtable */
383 if(hashTable==NULL){
384 hashTable = uhash_open(hashEntry, compareEntries, status);
385 uhash_setValueDeleter(hashTable, valueDeleter);
386 }
387
388 /* figure out if the code point has type already stored */
389 savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
390 if(savedTrieWord!=0){
391 if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
392 /* turn on the first bit in trie word */
393 trieWord += 0x01;
394 }else{
395 /*
396 * the codepoint has value something other than prohibited
397 * and a mapping .. error!
398 */
399 fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
400 exit(U_ILLEGAL_ARGUMENT_ERROR);
401 }
402 }
403
404 /* figure out the real length */
405 for(i=0; i<length; i++){
406 if(mapping[i] > 0xFFFF){
407 adjustedLen +=2;
408 }else{
409 adjustedLen++;
410 }
411 }
412
413 if(adjustedLen == 0){
414 trieWord = (uint16_t)(_SPREP_MAX_INDEX_VALUE << 2);
415 /* make sure that the value of trieWord is less than the threshold */
416 if(trieWord < _SPREP_TYPE_THRESHOLD){
417 /* now set the value in the trie */
418 if(!utrie_set32(sprepTrie,codepoint,trieWord)){
419 fprintf(stderr,"Could not set the value for code point.\n");
420 exit(U_ILLEGAL_ARGUMENT_ERROR);
421 }
422 /* value is set so just return */
423 return;
424 }else{
425 fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
426 exit(U_ILLEGAL_CHAR_FOUND);
427 }
428 }
429
430 if(adjustedLen == 1){
431 /* calculate the delta */
432 int16_t delta = (int16_t)((int32_t)codepoint - (int16_t) mapping[0]);
433 if(delta >= SPREP_DELTA_RANGE_NEGATIVE_LIMIT && delta <= SPREP_DELTA_RANGE_POSITIVE_LIMIT){
434
435 trieWord = delta << 2;
436
437
438 /* make sure that the second bit is OFF */
439 if((trieWord & 0x02) != 0 ){
440 fprintf(stderr,"The second bit in the trie word is not zero while storing a delta.\n");
441 exit(U_INTERNAL_PROGRAM_ERROR);
442 }
443 /* make sure that the value of trieWord is less than the threshold */
444 if(trieWord < _SPREP_TYPE_THRESHOLD){
445 /* now set the value in the trie */
446 if(!utrie_set32(sprepTrie,codepoint,trieWord)){
447 fprintf(stderr,"Could not set the value for code point.\n");
448 exit(U_ILLEGAL_ARGUMENT_ERROR);
449 }
450 /* value is set so just return */
451 return;
452 }
453 }
454 /*
455 * if the delta is not in the given range or if the trieWord is larger than the threshold
456 * just fall through for storing the mapping in the mapping table
457 */
458 }
459
460 map = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (adjustedLen+1));
461 uprv_memset(map,0,U_SIZEOF_UCHAR * (adjustedLen+1));
462
463 i=0;
464
465 while(i<length){
466 if(mapping[i] <= 0xFFFF){
467 map[i] = (uint16_t)mapping[i];
468 }else{
469 map[i] = UTF16_LEAD(mapping[i]);
470 map[i+1] = UTF16_TRAIL(mapping[i]);
471 }
472 i++;
473 }
474
475 value = (ValueStruct*) uprv_malloc(sizeof(ValueStruct));
476 value->mapping = map;
477 value->type = type;
478 value->length = adjustedLen;
479 if(value->length > _SPREP_MAX_INDEX_TOP_LENGTH){
480 mappingDataCapacity++;
481 }
482 if(maxLength < value->length){
483 maxLength = value->length;
484 }
485 uhash_iput(hashTable,codepoint,value,status);
486 mappingDataCapacity += adjustedLen;
487
488 if(U_FAILURE(*status)){
489 fprintf(stderr, "Failed to put entries into the hastable. Error: %s\n", u_errorName(*status));
490 exit(*status);
491 }
492 }
493
494
495 extern void
496 storeRange(uint32_t start, uint32_t end, UStringPrepType type,UErrorCode* status){
497 uint16_t trieWord = 0;
498
499 trieWord += (_SPREP_TYPE_THRESHOLD + type); /* the top 4 bits contain the value */
500 if(trieWord > 0xFFFF){
501 fprintf(stderr,"trieWord cannot contain value greater than 0xFFFF.\n");
502 exit(U_ILLEGAL_CHAR_FOUND);
503 }
504 if(start == end){
505 uint32_t savedTrieWord = utrie_get32(sprepTrie, start, NULL);
506 if(savedTrieWord>0){
507 if(savedTrieWord < _SPREP_TYPE_THRESHOLD && type == USPREP_PROHIBITED){
508 /*
509 * A mapping is stored in the trie word
510 * and the only other possible type that a
511 * code point can have is USPREP_PROHIBITED
512 *
513 */
514
515 /* turn on the 0th bit in the savedTrieWord */
516 savedTrieWord += 0x01;
517
518 /* the downcast is safe since we only save 16 bit values */
519 trieWord = (uint16_t)savedTrieWord;
520
521 /* make sure that the value of trieWord is less than the threshold */
522 if(trieWord < _SPREP_TYPE_THRESHOLD){
523 /* now set the value in the trie */
524 if(!utrie_set32(sprepTrie,start,trieWord)){
525 fprintf(stderr,"Could not set the value for code point.\n");
526 exit(U_ILLEGAL_ARGUMENT_ERROR);
527 }
528 /* value is set so just return */
529 return;
530 }else{
531 fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
532 exit(U_ILLEGAL_CHAR_FOUND);
533 }
534
535 }else if(savedTrieWord != trieWord){
536 fprintf(stderr,"Value for codepoint \\U%08X already set!.\n", (int)start);
537 exit(U_ILLEGAL_ARGUMENT_ERROR);
538 }
539 /* if savedTrieWord == trieWord .. fall through and set the value */
540 }
541 if(!utrie_set32(sprepTrie,start,trieWord)){
542 fprintf(stderr,"Could not set the value for code point \\U%08X.\n", (int)start);
543 exit(U_ILLEGAL_ARGUMENT_ERROR);
544 }
545 }else{
546 if(!utrie_setRange32(sprepTrie, start, end+1, trieWord, FALSE)){
547 fprintf(stderr,"Value for certain codepoint already set.\n");
548 exit(U_ILLEGAL_CHAR_FOUND);
549 }
550 }
551
552 }
553
554 /* folding value: just store the offset (16 bits) if there is any non-0 entry */
555 static uint32_t U_CALLCONV
556 getFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) {
557 uint32_t foldedValue, value;
558 UChar32 limit=0;
559 UBool inBlockZero;
560
561 foldedValue=0;
562
563 limit=start+0x400;
564 while(start<limit) {
565 value=utrie_get32(trie, start, &inBlockZero);
566 if(inBlockZero) {
567 start+=UTRIE_DATA_BLOCK_LENGTH;
568 } else if(value!=0) {
569 return (uint32_t)offset;
570 } else {
571 ++start;
572 }
573 }
574 return 0;
575
576 }
577
578 #endif /* #if !UCONFIG_NO_IDNA */
579
580 extern void
581 generateData(const char *dataDir, const char *packageName, const char* bundleName) {
582 static uint8_t sprepTrieBlock[100000];
583
584 UNewDataMemory *pData;
585 UErrorCode errorCode=U_ZERO_ERROR;
586 int32_t size, dataLength;
587 char* fileName = (char*) uprv_malloc(uprv_strlen(bundleName) +100);
588
589 #if UCONFIG_NO_IDNA
590
591 size=0;
592
593 #else
594
595 int32_t sprepTrieSize;
596
597 /* sort and add mapping data */
598 storeMappingData();
599
600 sprepTrieSize=utrie_serialize(sprepTrie, sprepTrieBlock, sizeof(sprepTrieBlock), getFoldedValue, TRUE, &errorCode);
601 if(U_FAILURE(errorCode)) {
602 fprintf(stderr, "error: utrie_serialize(sprep trie) failed, %s\n", u_errorName(errorCode));
603 exit(errorCode);
604 }
605
606 size = sprepTrieSize + mappingDataCapacity*U_SIZEOF_UCHAR + sizeof(indexes);
607 if(beVerbose) {
608 printf("size of sprep trie %5u bytes\n", (int)sprepTrieSize);
609 printf("size of " U_ICUDATA_NAME "_%s." DATA_TYPE " contents: %ld bytes\n", bundleName,(long)size);
610 printf("size of mapping data array %5u bytes\n",(int)mappingDataCapacity * U_SIZEOF_UCHAR);
611 printf("Number of code units in mappingData (currentIndex) are: %i \n", currentIndex);
612 printf("Maximum length of the mapping string is : %i \n", (int)maxLength);
613 }
614
615 #endif
616
617 if(packageName != NULL) {
618 uprv_strcpy(fileName,packageName);
619 uprv_strcat(fileName,"_");
620 } else {
621 fileName[0]=0;
622 }
623 uprv_strcat(fileName,bundleName);
624 /* write the data */
625 pData=udata_create(dataDir, DATA_TYPE, fileName, &dataInfo,
626 haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
627 if(U_FAILURE(errorCode)) {
628 fprintf(stderr, "gensprep: unable to create the output file, error %d\n", errorCode);
629 exit(errorCode);
630 }
631
632 #if !UCONFIG_NO_IDNA
633
634 indexes[_SPREP_INDEX_TRIE_SIZE]=sprepTrieSize;
635 indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]=mappingDataCapacity*U_SIZEOF_UCHAR;
636
637 udata_writeBlock(pData, indexes, sizeof(indexes));
638 udata_writeBlock(pData, sprepTrieBlock, sprepTrieSize);
639 udata_writeBlock(pData, mappingData, indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]);
640
641
642 #endif
643
644 /* finish up */
645 dataLength=udata_finish(pData, &errorCode);
646 if(U_FAILURE(errorCode)) {
647 fprintf(stderr, "gensprep: error %d writing the output file\n", errorCode);
648 exit(errorCode);
649 }
650
651 if(dataLength!=size) {
652 fprintf(stderr, "gensprep error: data length %ld != calculated size %ld\n",
653 (long)dataLength, (long)size);
654 exit(U_INTERNAL_PROGRAM_ERROR);
655 }
656
657 #if !UCONFIG_NO_IDNA
658 /* done with writing the data .. close the hashtable */
659 uhash_close(hashTable);
660 #endif
661 }
662
663 #if !UCONFIG_NO_IDNA
664
665 extern void
666 cleanUpData(void) {
667
668 utrie_close(sprepTrie);
669 uprv_free(sprepTrie);
670 }
671
672 #endif /* #if !UCONFIG_NO_IDNA */
673
674 /*
675 * Hey, Emacs, please set the following:
676 *
677 * Local Variables:
678 * indent-tabs-mode: nil
679 * End:
680 *
681 */