]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/tools/gensprep/store.c
ICU-6.2.4.tar.gz
[apple/icu.git] / icuSources / tools / gensprep / store.c
diff --git a/icuSources/tools/gensprep/store.c b/icuSources/tools/gensprep/store.c
new file mode 100644 (file)
index 0000000..6a70d61
--- /dev/null
@@ -0,0 +1,681 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 1999-2004, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  store.c
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2003-02-06
+*   created by: Ram Viswanadha
+*
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include "cstring.h"
+#include "filestrm.h"
+#include "unicode/udata.h"
+#include "utrie.h"
+#include "unewdata.h"
+#include "gensprep.h"
+#include "uhash.h"
+
+
+#ifdef WIN32
+#   pragma warning(disable: 4100)
+#endif
+
+#define DO_DEBUG_OUT 0
+
+
+/* 
+ * StringPrep profile file format ------------------------------------
+ * 
+ * The file format prepared and written here contains a 16-bit trie and a mapping table.
+ * 
+ * Before the data contents described below, there are the headers required by
+ * the udata API for loading ICU data. Especially, a UDataInfo structure
+ * precedes the actual data. It contains platform properties values and the
+ * file format version.
+ * 
+ * The following is a description of format version 2.
+ * 
+ * Data contents:
+ * 
+ * The contents is a parsed, binary form of RFC3454 and possibly
+ * NormalizationCorrections.txt depending on the options specified on the profile.
+ * 
+ * Any Unicode code point from 0 to 0x10ffff can be looked up to get
+ * the trie-word, if any, for that code point. This means that the input
+ * to the lookup are 21-bit unsigned integers, with not all of the
+ * 21-bit range used.
+ * 
+ * *.spp files customarily begin with a UDataInfo structure, see udata.h and .c.
+ * After that there are the following structures:
+ *
+ * int32_t indexes[_SPREP_INDEX_TOP];           -- _SPREP_INDEX_TOP=16, see enum in sprpimpl.h file
+ *
+ * UTrie stringPrepTrie;                        -- size in bytes=indexes[_SPREP_INDEX_TRIE_SIZE]
+ * 
+ * uint16_t mappingTable[];                     -- Contains the sequecence of code units that the code point maps to 
+ *                                                 size in bytes = indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]
+ *
+ * The indexes array contains the following values:
+ *  indexes[_SPREP_INDEX_TRIE_SIZE]                  -- The size of the StringPrep trie in bytes
+ *  indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]          -- The size of the mappingTable in bytes 
+ *  indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION]  -- The index of Unicode version of last entry in NormalizationCorrections.txt 
+ *  indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START]    -- The starting index of 1 UChar  mapping index in the mapping table 
+ *  indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]   -- The starting index of 2 UChars mapping index in the mapping table
+ *  indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] -- The starting index of 3 UChars mapping index in the mapping table
+ *  indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]  -- The starting index of 4 UChars mapping index in the mapping table
+ *  indexes[_SPREP_OPTIONS]                          -- Bit set of options to turn on in the profile, e.g: USPREP_NORMALIZATION_ON, USPREP_CHECK_BIDI_ON
+ *    
+ *
+ * StringPrep Trie :
+ *
+ * The StringPrep tries is a 16-bit trie that contains data for the profile. 
+ * Each code point is associated with a value (trie-word) in the trie.
+ *
+ * - structure of data words from the trie
+ * 
+ *  i)  A value greater than or equal to _SPREP_TYPE_THRESHOLD (0xFFF0) 
+ *      represents the type associated with the code point
+ *      if(trieWord >= _SPREP_TYPE_THRESHOLD){
+ *          type = trieWord - 0xFFF0;
+ *      }
+ *      The type can be :
+ *             USPREP_UNASSIGNED                     
+ *             USPREP_PROHIBITED       
+ *             USPREP_DELETE     
+ *     
+ *  ii) A value less than _SPREP_TYPE_THRESHOLD means the type is USPREP_MAP and
+ *      contains distribution described below
+ *      
+ *      0       -  ON : The code point is prohibited (USPREP_PROHIBITED). This is to allow for codepoint that are both prohibited and mapped.
+ *      1       -  ON : The value in the next 14 bits is an index into the mapping table
+ *                 OFF: The value in the next 14 bits is an delta value from the code point
+ *      2..15   -  Contains data as described by bit 1. If all bits are set 
+ *                 (value = _SPREP_MAX_INDEX_VALUE) then the type is USPREP_DELETE
+ *
+ *  
+ * Mapping Table:
+ * The data in mapping table is sorted according to the length of the mapping sequence.
+ * If the type of the code point is USPREP_MAP and value in trie word is an index, the index
+ * is compared with start indexes of sequence length start to figure out the length according to
+ * the following algorithm:
+ *
+ *              if(       index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
+ *                        index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
+ *                   length = 1;
+ *               }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
+ *                        index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
+ *                   length = 2;
+ *               }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
+ *                        index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
+ *                   length = 3;
+ *               }else{
+ *                   // The first position in the mapping table contains the length 
+ *                   // of the sequence
+ *                   length = mappingTable[index++];
+ *        
+ *               }
+ *
+ */
+
+/* file data ---------------------------------------------------------------- */
+/* indexes[] value names */
+
+#if UCONFIG_NO_IDNA
+
+/* dummy UDataInfo cf. udata.h */
+static UDataInfo dataInfo = {
+    sizeof(UDataInfo),
+    0,
+
+    U_IS_BIG_ENDIAN,
+    U_CHARSET_FAMILY,
+    U_SIZEOF_UCHAR,
+    0,
+
+    { 0, 0, 0, 0 },                 /* dummy dataFormat */
+    { 0, 0, 0, 0 },                 /* dummy formatVersion */
+    { 0, 0, 0, 0 }                  /* dummy dataVersion */
+};
+
+#else
+
+static int32_t indexes[_SPREP_INDEX_TOP]={ 0 };
+
+static uint16_t* mappingData= NULL;
+static int32_t mappingDataCapacity = 0; /* we skip the first index in mapping data */
+static int16_t currentIndex = 0; /* the current index into the data trie */
+static int32_t maxLength = 0;  /* maximum length of mapping string */
+
+
+/* UDataInfo cf. udata.h */
+static UDataInfo dataInfo={
+    sizeof(UDataInfo),
+    0,
+
+    U_IS_BIG_ENDIAN,
+    U_CHARSET_FAMILY,
+    U_SIZEOF_UCHAR,
+    0,
+
+    { 0x53, 0x50, 0x52, 0x50 },                 /* dataFormat="SPRP" */
+    { 3, 2, UTRIE_SHIFT, UTRIE_INDEX_SHIFT },   /* formatVersion */
+    { 3, 2, 0, 0 }                              /* dataVersion (Unicode version) */
+};
+void
+setUnicodeVersion(const char *v) {
+    UVersionInfo version;
+    u_versionFromString(version, v);
+    uprv_memcpy(dataInfo.dataVersion, version, 4);
+}
+
+void
+setUnicodeVersionNC(UVersionInfo version){
+    uint32_t univer = version[0] << 24;
+    univer += version[1] << 16;
+    univer += version[2] << 8;
+    univer += version[3];
+    indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION] = univer;
+}
+static UNewTrie *sprepTrie;
+
+#define MAX_DATA_LENGTH 11500
+
+
+#define SPREP_DELTA_RANGE_POSITIVE_LIMIT              8191 
+#define SPREP_DELTA_RANGE_NEGATIVE_LIMIT              -8192
+
+
+extern void
+init() {
+
+    sprepTrie = (UNewTrie *)uprv_malloc(sizeof(UNewTrie));
+    uprv_memset(sprepTrie, 0, sizeof(UNewTrie));
+
+    /* initialize the two tries */
+    if(NULL==utrie_open(sprepTrie, NULL, MAX_DATA_LENGTH, 0, 0, FALSE)) {
+        fprintf(stderr, "error: failed to initialize tries\n");
+        exit(U_MEMORY_ALLOCATION_ERROR);
+    }
+}
+
+static UHashtable* hashTable = NULL;
+
+
+typedef struct ValueStruct {
+    UChar* mapping;
+    int16_t length;
+    UStringPrepType type;
+} ValueStruct;
+
+/* Callback for deleting the value from the hashtable */
+static void U_CALLCONV valueDeleter(void* obj){
+    ValueStruct* value = (ValueStruct*) obj;
+    uprv_free(value->mapping);
+    uprv_free(value);
+}
+
+/* Callback for hashing the entry */
+static int32_t U_CALLCONV hashEntry(const UHashTok parm) {
+    return  parm.integer;
+}
+
+/* Callback for comparing two entries */
+static UBool U_CALLCONV compareEntries(const UHashTok p1, const UHashTok p2) {
+    return (UBool)(p1.integer != p2.integer);
+}
+
+
+static void 
+storeMappingData(){
+
+    int32_t pos = -1;
+    const UHashElement* element = NULL;
+    ValueStruct* value  = NULL;
+    int32_t codepoint = 0;
+    int32_t elementCount = uhash_count(hashTable);
+    int32_t writtenElementCount = 0;
+    int32_t mappingLength = 1; /* minimum mapping length */
+    int32_t oldMappingLength = 0;
+    uint16_t trieWord =0;
+    int32_t limitIndex = 0;
+
+    /*initialize the mapping data */
+    mappingData = (uint16_t*) uprv_malloc(U_SIZEOF_UCHAR * (mappingDataCapacity));
+
+    uprv_memset(mappingData,0,U_SIZEOF_UCHAR * mappingDataCapacity);
+
+    while(writtenElementCount < elementCount){
+
+        while( (element = uhash_nextElement(hashTable, &pos))!=NULL){
+            
+            codepoint = element->key.integer;
+            value = (ValueStruct*)element->value.pointer;
+            
+            /* store the start of indexes */
+            if(oldMappingLength != mappingLength){
+                /* Assume that index[] is used according to the enums defined */
+                if(oldMappingLength <=_SPREP_MAX_INDEX_TOP_LENGTH){
+                    indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex;
+                }
+                if(oldMappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH &&
+                   mappingLength == _SPREP_MAX_INDEX_TOP_LENGTH +1){
+                   
+                    limitIndex = currentIndex;
+                     
+                }
+                oldMappingLength = mappingLength;
+            }
+
+            if(value->length == mappingLength){
+                uint32_t savedTrieWord = 0;
+                trieWord = currentIndex << 2;
+                /* turn on the 2nd bit to signal that the following bits contain an index */
+                trieWord += 0x02;
+            
+                if(trieWord > _SPREP_TYPE_THRESHOLD){
+                    fprintf(stderr,"trieWord cannot contain value greater than 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
+                    exit(U_ILLEGAL_CHAR_FOUND);
+                }
+                /* figure out if the code point has type already stored */
+                savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
+                if(savedTrieWord!=0){
+                    if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
+                        /* turn on the first bit in trie word */
+                        trieWord += 0x01;
+                    }else{
+                        /* 
+                         * the codepoint has value something other than prohibited
+                         * and a mapping .. error! 
+                         */
+                        fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
+                        exit(U_ILLEGAL_ARGUMENT_ERROR); 
+                    } 
+                } 
+                
+                /* now set the value in the trie */
+                if(!utrie_set32(sprepTrie,codepoint,trieWord)){
+                    fprintf(stderr,"Could not set the value for code point.\n");
+                    exit(U_ILLEGAL_ARGUMENT_ERROR);   
+                }
+
+                /* written the trie word for the codepoint... increment the count*/
+                writtenElementCount++;
+
+                /* sanity check are we exceeding the max number allowed */
+                if(currentIndex+value->length+1 > _SPREP_MAX_INDEX_VALUE){
+                    fprintf(stderr, "Too many entries in the mapping table %i. Maximum allowed is %i\n", currentIndex+value->length, _SPREP_MAX_INDEX_VALUE);
+                    exit(U_INDEX_OUTOFBOUNDS_ERROR);
+                }
+
+                /* copy the mapping data */
+                if(currentIndex+value->length+1 <= mappingDataCapacity){
+                    /* write the length */
+                    if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
+                         /* the cast here is safe since we donot expect the length to be > 65535 */
+                         mappingData[currentIndex++] = (uint16_t) mappingLength;
+                    }
+                    /* copy the contents to mappindData array */
+                    uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR);
+                    currentIndex += value->length;
+                   
+                }else{
+                    /* realloc */
+                    UChar* newMappingData = (uint16_t*) uprv_malloc(U_SIZEOF_UCHAR * mappingDataCapacity*2);
+                    if(newMappingData == NULL){
+                        fprintf(stderr, "Could not realloc the mapping data!\n");
+                        exit(U_MEMORY_ALLOCATION_ERROR);
+                    }
+                    uprv_memmove(newMappingData, mappingData, U_SIZEOF_UCHAR * mappingDataCapacity);
+                    mappingDataCapacity *= 2;
+                    uprv_free(mappingData);
+                    mappingData = newMappingData;
+                    /* write the length */
+                    if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
+                         /* the cast here is safe since we donot expect the length to be > 65535 */
+                         mappingData[currentIndex++] = (uint16_t) mappingLength;
+                    }
+                    /* continue copying */
+                    uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR);
+                    currentIndex += value->length;
+                }
+                          
+            }
+        }
+        mappingLength++;
+        pos = -1;
+    }
+    /* set the last length for range check */
+    if(mappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH){
+        indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex+1;
+    }else{
+        indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] = limitIndex;
+    }
+    
+}
+
+extern void setOptions(int32_t options){
+    indexes[_SPREP_OPTIONS] = options;
+}
+extern void
+storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length,
+             UStringPrepType type, UErrorCode* status){
+    
+    UChar* map = NULL;
+    int16_t adjustedLen=0, i;
+    uint16_t trieWord = 0;
+    ValueStruct *value = NULL;
+    uint32_t savedTrieWord = 0;
+
+    /* initialize the hashtable */
+    if(hashTable==NULL){
+        hashTable = uhash_open(hashEntry, compareEntries, status);
+        uhash_setValueDeleter(hashTable, valueDeleter);
+    }
+    
+    /* figure out if the code point has type already stored */
+    savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
+    if(savedTrieWord!=0){
+        if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
+            /* turn on the first bit in trie word */
+            trieWord += 0x01;
+        }else{
+            /* 
+             * the codepoint has value something other than prohibited
+             * and a mapping .. error! 
+             */
+            fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
+            exit(U_ILLEGAL_ARGUMENT_ERROR); 
+        } 
+    }
+
+    /* figure out the real length */ 
+    for(i=0; i<length; i++){
+        if(mapping[i] > 0xFFFF){
+            adjustedLen +=2;
+        }else{
+            adjustedLen++;
+        }      
+    }
+
+    if(adjustedLen == 0){
+        trieWord = (uint16_t)(_SPREP_MAX_INDEX_VALUE << 2);
+        /* make sure that the value of trieWord is less than the threshold */
+        if(trieWord < _SPREP_TYPE_THRESHOLD){   
+            /* now set the value in the trie */
+            if(!utrie_set32(sprepTrie,codepoint,trieWord)){
+                fprintf(stderr,"Could not set the value for code point.\n");
+                exit(U_ILLEGAL_ARGUMENT_ERROR);   
+            }
+            /* value is set so just return */
+            return;
+        }else{
+            fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
+            exit(U_ILLEGAL_CHAR_FOUND);
+        }
+    }
+
+    if(adjustedLen == 1){
+        /* calculate the delta */
+        int16_t delta = (int16_t)((int32_t)codepoint - (int16_t) mapping[0]);
+        if(delta >= SPREP_DELTA_RANGE_NEGATIVE_LIMIT && delta <= SPREP_DELTA_RANGE_POSITIVE_LIMIT){
+
+            trieWord = delta << 2;
+
+
+            /* make sure that the second bit is OFF */
+            if((trieWord & 0x02) != 0 ){
+                fprintf(stderr,"The second bit in the trie word is not zero while storing a delta.\n");
+                exit(U_INTERNAL_PROGRAM_ERROR);
+            }
+            /* make sure that the value of trieWord is less than the threshold */
+            if(trieWord < _SPREP_TYPE_THRESHOLD){   
+                /* now set the value in the trie */
+                if(!utrie_set32(sprepTrie,codepoint,trieWord)){
+                    fprintf(stderr,"Could not set the value for code point.\n");
+                    exit(U_ILLEGAL_ARGUMENT_ERROR);   
+                }
+                /* value is set so just return */
+                return;
+            }
+        }
+        /* 
+         * if the delta is not in the given range or if the trieWord is larger than the threshold
+         * just fall through for storing the mapping in the mapping table
+         */
+    }
+
+    map = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (adjustedLen+1));
+    uprv_memset(map,0,U_SIZEOF_UCHAR * (adjustedLen+1));
+
+    i=0;
+    
+    while(i<length){
+        if(mapping[i] <= 0xFFFF){
+            map[i] = (uint16_t)mapping[i];
+        }else{
+            map[i]   = UTF16_LEAD(mapping[i]);
+            map[i+1] = UTF16_TRAIL(mapping[i]);
+        }
+        i++;
+    }
+    
+    value = (ValueStruct*) uprv_malloc(sizeof(ValueStruct));
+    value->mapping = map;
+    value->type   = type;
+    value->length  = adjustedLen;
+    if(value->length > _SPREP_MAX_INDEX_TOP_LENGTH){
+        mappingDataCapacity++;
+    }
+    if(maxLength < value->length){
+        maxLength = value->length;
+    }
+    uhash_iput(hashTable,codepoint,value,status);
+    mappingDataCapacity += adjustedLen;
+
+    if(U_FAILURE(*status)){
+        fprintf(stderr, "Failed to put entries into the hastable. Error: %s\n", u_errorName(*status));
+        exit(*status);
+    }
+}
+
+
+extern void
+storeRange(uint32_t start, uint32_t end, UStringPrepType type,UErrorCode* status){
+    uint16_t trieWord = 0;
+
+    trieWord += (_SPREP_TYPE_THRESHOLD + type); /* the top 4 bits contain the value */
+    if(trieWord > 0xFFFF){
+        fprintf(stderr,"trieWord cannot contain value greater than 0xFFFF.\n");
+        exit(U_ILLEGAL_CHAR_FOUND);
+    }
+    if(start == end){
+        uint32_t savedTrieWord = utrie_get32(sprepTrie, start, NULL);
+        if(savedTrieWord>0){
+            if(savedTrieWord < _SPREP_TYPE_THRESHOLD && type == USPREP_PROHIBITED){
+                /* 
+                 * A mapping is stored in the trie word 
+                 * and the only other possible type that a 
+                 * code point can have is USPREP_PROHIBITED
+                 *
+                 */
+
+                /* turn on the 0th bit in the savedTrieWord */
+                savedTrieWord += 0x01;
+
+                /* the downcast is safe since we only save 16 bit values */
+                trieWord = (uint16_t)savedTrieWord;
+
+                /* make sure that the value of trieWord is less than the threshold */
+                if(trieWord < _SPREP_TYPE_THRESHOLD){   
+                    /* now set the value in the trie */
+                    if(!utrie_set32(sprepTrie,start,trieWord)){
+                        fprintf(stderr,"Could not set the value for code point.\n");
+                        exit(U_ILLEGAL_ARGUMENT_ERROR);   
+                    }
+                    /* value is set so just return */
+                    return;
+                }else{
+                    fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
+                    exit(U_ILLEGAL_CHAR_FOUND);
+                }
+            }else if(savedTrieWord != trieWord){
+                fprintf(stderr,"Value for codepoint \\U%08X already set!.\n", (int)start);
+                exit(U_ILLEGAL_ARGUMENT_ERROR);
+            }
+            /* if savedTrieWord == trieWord .. fall through and set the value */
+        }
+        if(!utrie_set32(sprepTrie,start,trieWord)){
+            fprintf(stderr,"Could not set the value for code point \\U%08X.\n", (int)start);
+            exit(U_ILLEGAL_ARGUMENT_ERROR);   
+        }
+    }else{
+        if(!utrie_setRange32(sprepTrie, start, end+1, trieWord, FALSE)){
+            fprintf(stderr,"Value for certain codepoint already set.\n");
+            exit(U_ILLEGAL_CHAR_FOUND);   
+        }
+    }
+
+}
+
+/* folding value: just store the offset (16 bits) if there is any non-0 entry */
+static uint32_t U_CALLCONV
+getFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) {
+    uint32_t foldedValue, value;
+    UChar32 limit=0;
+    UBool inBlockZero;
+
+    foldedValue=0;
+
+    limit=start+0x400;
+    while(start<limit) {
+        value=utrie_get32(trie, start, &inBlockZero);
+        if(inBlockZero) {
+            start+=UTRIE_DATA_BLOCK_LENGTH;
+        } else if(value!=0) {
+            return (uint32_t)offset;
+        } else {
+            ++start;
+        }
+    }
+    return 0;
+
+}
+
+#endif /* #if !UCONFIG_NO_IDNA */
+
+extern void
+generateData(const char *dataDir, const char *packageName, const char* bundleName) {
+    static uint8_t sprepTrieBlock[100000];
+
+    UNewDataMemory *pData;
+    UErrorCode errorCode=U_ZERO_ERROR;
+    int32_t size, dataLength;
+    char* fileName = (char*) uprv_malloc(uprv_strlen(bundleName) +100);
+
+#if UCONFIG_NO_IDNA
+
+    size=0;
+
+#else
+
+    int32_t sprepTrieSize;
+
+    /* sort and add mapping data */
+    storeMappingData();
+    
+    sprepTrieSize=utrie_serialize(sprepTrie, sprepTrieBlock, sizeof(sprepTrieBlock), getFoldedValue, TRUE, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr, "error: utrie_serialize(sprep trie) failed, %s\n", u_errorName(errorCode));
+        exit(errorCode);
+    }
+    
+    size = sprepTrieSize + mappingDataCapacity*U_SIZEOF_UCHAR + sizeof(indexes);
+    if(beVerbose) {
+        printf("size of sprep trie              %5u bytes\n", (int)sprepTrieSize);
+        printf("size of " U_ICUDATA_NAME "_%s." DATA_TYPE " contents: %ld bytes\n", bundleName,(long)size);
+        printf("size of mapping data array %5u bytes\n",(int)mappingDataCapacity * U_SIZEOF_UCHAR);
+        printf("Number of code units in mappingData (currentIndex) are: %i \n", currentIndex);
+        printf("Maximum length of the mapping string is : %i \n", (int)maxLength);
+    }
+
+#endif
+
+    if(packageName != NULL) {
+      uprv_strcpy(fileName,packageName);
+      uprv_strcat(fileName,"_");
+    } else {
+      fileName[0]=0;
+    }
+    uprv_strcat(fileName,bundleName);
+    /* write the data */
+    pData=udata_create(dataDir, DATA_TYPE, fileName, &dataInfo,
+                       haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr, "gensprep: unable to create the output file, error %d\n", errorCode);
+        exit(errorCode);
+    }
+
+#if !UCONFIG_NO_IDNA
+
+    indexes[_SPREP_INDEX_TRIE_SIZE]=sprepTrieSize;
+    indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]=mappingDataCapacity*U_SIZEOF_UCHAR;
+    
+    udata_writeBlock(pData, indexes, sizeof(indexes));
+    udata_writeBlock(pData, sprepTrieBlock, sprepTrieSize);
+    udata_writeBlock(pData, mappingData, indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]);
+    
+
+#endif
+
+    /* finish up */
+    dataLength=udata_finish(pData, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr, "gensprep: error %d writing the output file\n", errorCode);
+        exit(errorCode);
+    }
+
+    if(dataLength!=size) {
+        fprintf(stderr, "gensprep error: data length %ld != calculated size %ld\n",
+            (long)dataLength, (long)size);
+        exit(U_INTERNAL_PROGRAM_ERROR);
+    }
+
+#if !UCONFIG_NO_IDNA
+    /* done with writing the data .. close the hashtable */
+    uhash_close(hashTable);
+#endif
+}
+
+#if !UCONFIG_NO_IDNA
+
+extern void
+cleanUpData(void) {
+
+    utrie_close(sprepTrie);
+    uprv_free(sprepTrie);
+}
+
+#endif /* #if !UCONFIG_NO_IDNA */
+
+/*
+ * Hey, Emacs, please set the following:
+ *
+ * Local Variables:
+ * indent-tabs-mode: nil
+ * End:
+ *
+ */