/*
*******************************************************************************
*
-* Copyright (C) 2001-2003, International Business Machines
+* Copyright (C) 2001-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
}
U_CAPI tempUCATable* U_EXPORT2
-uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UErrorCode *status) {
+uprv_uca_initTempTable(UCATableHeader *image, UColOptionSet *opts, const UCollator *UCA, UColCETags initTag, UColCETags supplementaryInitTag, UErrorCode *status) {
tempUCATable *t = (tempUCATable *)uprv_malloc(sizeof(tempUCATable));
/* test for NULL */
if (t == NULL) {
}
uprv_memset(t->expansions, 0, sizeof(ExpansionTable));
/*t->mapping = ucmpe32_open(UCOL_SPECIAL_FLAG | (initTag<<24), UCOL_SPECIAL_FLAG | (SURROGATE_TAG<<24), UCOL_SPECIAL_FLAG | (LEAD_SURROGATE_TAG<<24), status);*/
- t->mapping = utrie_open(NULL, NULL, 0x100000, UCOL_SPECIAL_FLAG | (initTag<<24), TRUE); // Do your own mallocs for the structure, array and have linear Latin 1
+ /*t->mapping = utrie_open(NULL, NULL, 0x100000, UCOL_SPECIAL_FLAG | (initTag<<24), TRUE); // Do your own mallocs for the structure, array and have linear Latin 1*/
+
+ t->mapping = utrie_open(NULL, NULL, 0x100000,
+ UCOL_SPECIAL_FLAG | (initTag<<24),
+ UCOL_SPECIAL_FLAG | (supplementaryInitTag << 24),
+ TRUE); // Do your own mallocs for the structure, array and have linear Latin 1
t->prefixLookup = uhash_open(prefixLookupHash, prefixLookupComp, status);
uhash_setValueDeleter(t->prefixLookup, uhash_freeBlock);
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
- uprv_memcpy(r->expansions->CEs, t->expansions->CEs, sizeof(uint32_t)*t->expansions->size);
+ uprv_memcpy(r->expansions->CEs, t->expansions->CEs, sizeof(uint32_t)*t->expansions->position);
} else {
r->expansions->CEs = NULL;
}
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
- uprv_memcpy(r->maxExpansions->endExpansionCE, t->maxExpansions->endExpansionCE, t->maxExpansions->size*sizeof(uint32_t));
+ uprv_memcpy(r->maxExpansions->endExpansionCE, t->maxExpansions->endExpansionCE, t->maxExpansions->position*sizeof(uint32_t));
} else {
r->maxExpansions->endExpansionCE = NULL;
}
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
- uprv_memcpy(r->maxExpansions->expansionCESize, t->maxExpansions->expansionCESize, t->maxExpansions->size*sizeof(uint8_t));
+ uprv_memcpy(r->maxExpansions->expansionCESize, t->maxExpansions->expansionCESize, t->maxExpansions->position*sizeof(uint8_t));
} else {
r->maxExpansions->expansionCESize = NULL;
}
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
- uprv_memcpy(r->maxJamoExpansions->endExpansionCE, t->maxJamoExpansions->endExpansionCE, t->maxJamoExpansions->size*sizeof(uint32_t));
+ uprv_memcpy(r->maxJamoExpansions->endExpansionCE, t->maxJamoExpansions->endExpansionCE, t->maxJamoExpansions->position*sizeof(uint32_t));
r->maxJamoExpansions->isV = (UBool *)uprv_malloc(sizeof(UBool)*t->maxJamoExpansions->size);
/* test for NULL */
if (r->maxJamoExpansions->isV == NULL) {
*status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
- uprv_memcpy(r->maxJamoExpansions->isV, t->maxJamoExpansions->isV, t->maxJamoExpansions->size*sizeof(UBool));
+ uprv_memcpy(r->maxJamoExpansions->isV, t->maxJamoExpansions->isV, t->maxJamoExpansions->position*sizeof(UBool));
} else {
r->maxJamoExpansions->endExpansionCE = NULL;
r->maxJamoExpansions->isV = NULL;
* @param status error status
* @returns size of the maxexpansion and maxsize used.
*/
-int uprv_uca_setMaxExpansion(uint32_t endexpansion,
+static int uprv_uca_setMaxExpansion(uint32_t endexpansion,
uint8_t expansionsize,
MaxExpansionTable *maxexpansion,
UErrorCode *status)
start = mid;
}
}
-
+
if (*start == endexpansion) {
result = start - pendexpansionce;
}
if (*limit == endexpansion) {
result = limit - pendexpansionce;
}
-
+
if (result > -1) {
/* found the ce in expansion, we'll just modify the size if it is
smaller */
int shiftsize = (pendexpansionce + pos) - start;
uint32_t *shiftpos = start + 1;
uint8_t *sizeshiftpos = pexpansionsize + (shiftpos - pendexpansionce);
-
+
/* okay need to rearrange the array into sorted order */
- if (shiftsize == 0 || *(pendexpansionce + pos) < endexpansion) {
+ if (shiftsize == 0 /*|| *(pendexpansionce + pos) < endexpansion*/) { /* the commented part is actually both redundant and dangerous */
*(pendexpansionce + pos + 1) = endexpansion;
*(pexpansionsize + pos + 1) = expansionsize;
}
* @param status error status
* @returns size of the maxexpansion and maxsize used.
*/
-int uprv_uca_setMaxJamoExpansion(UChar ch,
+static int uprv_uca_setMaxJamoExpansion(UChar ch,
uint32_t endexpansion,
uint8_t expansionsize,
MaxJamoExpansionTable *maxexpansion,
*(pendexpansionce + maxexpansion->position) = endexpansion;
*(maxexpansion->isV + maxexpansion->position) = isV;
maxexpansion->position ++;
-
+
return maxexpansion->position;
}
}
}
-uint32_t uprv_uca_addPrefix(tempUCATable *t, uint32_t CE,
+static uint32_t uprv_uca_addPrefix(tempUCATable *t, uint32_t CE,
UCAElements *element, UErrorCode *status) {
// currently the longest prefix we're supporting in Japanese is two characters
// long. Although this table could quite easily mimic complete contraction stuff
// in the contraction, it is going to be handled as a pair of code units,
// as it doesn't affect the performance AND handling surrogates specially
// would complicate code way too much.
-uint32_t uprv_uca_addContraction(tempUCATable *t, uint32_t CE,
+static uint32_t uprv_uca_addContraction(tempUCATable *t, uint32_t CE,
UCAElements *element, UErrorCode *status) {
CntTable *contractions = t->contractions;
UChar32 cp;
if(U_FAILURE(*status)) {
return 0xFFFF;
}
+
+ element->mapCE = 0; // clear mapCE so that we can catch expansions
+
if(element->noOfCEs == 1) {
if(element->isThai == FALSE) {
- UChar32 uniChar = 0;
- //printElement(element);
- if ((element->cSize == 2) && U16_IS_LEAD(element->uchars[0])){
- uniChar = U16_GET_SUPPLEMENTARY(element->uchars[0], element->uchars[1]);
-
- } else if (element->cSize == 1){
- uniChar = element->uchars[0];
-
- }
-
- if (uniChar != 0 && u_isdigit(uniChar)){
- expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (DIGIT_TAG<<UCOL_TAG_SHIFT)
- | ((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4)
- | 0x1);
- element->mapCE = expansion;
- unsafeCPSet(t->unsafeCP, uniChar);
- }else
- element->mapCE = element->CEs[0];
+ element->mapCE = element->CEs[0];
} else { /* add thai - totally bad here */
expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (THAI_TAG<<UCOL_TAG_SHIFT)
| ((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4)
| ((element->CEs[0]>>8) & 0xFFFF00) // first and second byte of primary
| ((element->CEs[1]>>24) & 0xFF); // third byte of primary
} else {
-
- /* Checking here to see if we should insert the DIGIT_TAG or the EXPANSION_TAG */
- UChar32 uniChar = 0;
-
- if ((element->cSize == 2) && U16_IS_LEAD(element->uchars[0])){
- uniChar = U16_GET_SUPPLEMENTARY(element->uchars[0], element->uchars[1]);
- } else if (element->cSize == 1){
- uniChar = element->uchars[0];
- }
-
- if (uniChar != 0 && u_isdigit(uniChar)){
- expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (DIGIT_TAG<<UCOL_TAG_SHIFT)
- | ((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4)
- | 0x1);
- unsafeCPSet(t->unsafeCP, uniChar);
- }else{
- expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (EXPANSION_TAG<<UCOL_TAG_SHIFT)
- | ((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4)
- & 0xFFFFF0);
- }
-
+ expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (EXPANSION_TAG<<UCOL_TAG_SHIFT)
+ | ((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4)
+ & 0xFFFFF0);
+
for(i = 1; i<element->noOfCEs; i++) {
uprv_uca_addExpansion(expansions, element->CEs[i], status);
}
}
}
+ // We treat digits differently - they are "uber special" and should be
+ // processed differently if numeric collation is on.
+ UChar32 uniChar = 0;
+ //printElement(element);
+ if ((element->cSize == 2) && U16_IS_LEAD(element->uchars[0])){
+ uniChar = U16_GET_SUPPLEMENTARY(element->uchars[0], element->uchars[1]);
+ } else if (element->cSize == 1){
+ uniChar = element->uchars[0];
+ }
+
+ // Here, we either have one normal CE OR mapCE is set. Therefore, we stuff only
+ // one element to the expansion buffer. When we encounter a digit and we don't
+ // do numeric collation, we will just pick the CE we have and break out of case
+ // (see ucol.cpp ucol_prv_getSpecialCE && ucol_prv_getSpecialPrevCE). If we picked
+ // a special, further processing will occur. If it's a simple CE, we'll return due
+ // to how the loop is constructed.
+ if (uniChar != 0 && u_isdigit(uniChar)){
+ expansion = (uint32_t)(UCOL_SPECIAL_FLAG | (DIGIT_TAG<<UCOL_TAG_SHIFT) | 1); // prepare the element
+ if(element->mapCE) { // if there is an expansion, we'll pick it here
+ expansion |= ((uprv_uca_addExpansion(expansions, element->mapCE, status)+(headersize>>2))<<4);
+ } else {
+ expansion |= ((uprv_uca_addExpansion(expansions, element->CEs[0], status)+(headersize>>2))<<4);
+ }
+ element->mapCE = expansion;
+
+ // Need to go back to the beginning of the digit string if in the middle!
+ if(uniChar <= 0xFFFF) { // supplementaries are always unsafe. API takes UChars
+ unsafeCPSet(t->unsafeCP, (UChar)uniChar);
+ }
+ }
+
// here we want to add the prefix structure.
// I will try to process it as a reverse contraction, if possible.
// prefix buffer is already reversed.
/*void uprv_uca_getMaxExpansionJamo(CompactEIntArray *mapping, */
-void uprv_uca_getMaxExpansionJamo(UNewTrie *mapping,
+static void uprv_uca_getMaxExpansionJamo(UNewTrie *mapping,
MaxExpansionTable *maxexpansion,
MaxJamoExpansionTable *maxjamoexpansion,
UBool jamospecial,
const uint32_t TBASE = 0x11A8;
const uint32_t VCOUNT = 21;
const uint32_t TCOUNT = 28;
-
+
uint32_t v = VBASE + VCOUNT - 1;
uint32_t t = TBASE + TCOUNT - 1;
uint32_t ce;
}
UCATableHeader *myData = (UCATableHeader *)dataStart;
- uprv_memcpy(myData, t->image, sizeof(UCATableHeader));
+ // Please, do reset all the fields!
+ uprv_memset(dataStart, 0, toAllocate);
+ // Make sure we know this is reset
+ myData->magic = UCOL_HEADER_MAGIC;
+ myData->isBigEndian = U_IS_BIG_ENDIAN;
+ myData->charSetFamily = U_CHARSET_FAMILY;
+ myData->formatVersion[0] = UCA_FORMAT_VERSION_0;
+ myData->formatVersion[1] = UCA_FORMAT_VERSION_1;
+ myData->formatVersion[2] = UCA_FORMAT_VERSION_2;
+ myData->formatVersion[3] = UCA_FORMAT_VERSION_3;
+ myData->jamoSpecial = t->image->jamoSpecial;
+
+ // Don't copy stuff from UCA header!
+ //uprv_memcpy(myData, t->image, sizeof(UCATableHeader));
myData->contractionSize = contractionsSize;
tableOffset += (uint32_t)(paddedsize(contractionsSize*sizeof(uint32_t)));
} else {
myData->contractionIndex = 0;
- myData->contractionIndex = 0;
+ myData->contractionCEs = 0;
}
/* copy mapping table */
// This is debug code to dump the contents of the trie. It needs two functions defined above
{
UTrie UCAt = { 0 };
+ uint32_t trieWord;
utrie_unserialize(&UCAt, dataStart+tableOffset, 9999999, status);
UCAt.getFoldingOffset = myGetFoldingOffset;
if(U_SUCCESS(*status)) {
utrie_enum(&UCAt, NULL, enumRange, NULL);
}
+ trieWord = UTRIE_GET32_FROM_LEAD(UCAt, 0xDC01)
}
#endif
tableOffset += paddedsize(mappingSize);
el.prefixSize = 0;
el.noOfCEs = 0;
ucol_setText(colEl, decomp, noOfDec, status);
- while((el.CEs[el.noOfCEs] = ucol_next(colEl, status)) != UCOL_NULLORDER) {
+ while((el.CEs[el.noOfCEs] = ucol_next(colEl, status)) != (uint32_t)UCOL_NULLORDER) {
el.noOfCEs++;
}
} else {
tempUCATable *tempTable = uprv_uca_cloneTempTable(t, status);
UCATableHeader *tempData = uprv_uca_assembleTable(tempTable, status);
- tempColl = ucol_initCollator(tempData, 0, status);
+ tempColl = ucol_initCollator(tempData, 0, t->UCA, status);
uprv_uca_closeTempTable(tempTable);
if(U_SUCCESS(*status)) {
U_NAMESPACE_END
-#endif /* #if !UCONFIG_NO_COLLATION */
\ No newline at end of file
+#endif /* #if !UCONFIG_NO_COLLATION */
+
+