2 *******************************************************************************
4 * Copyright (C) 2001-2008, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: ucol_cnt.cpp
10 * tab size: 8 (not used)
14 * created by: Vladimir Weinstein
16 * This module maintains a contraction table structure in expanded form
17 * and provides means to flatten this structure
21 #include "unicode/utypes.h"
23 #if !UCONFIG_NO_COLLATION
25 #include "unicode/uchar.h"
29 static void uprv_growTable(ContractionTable
*tbl
, UErrorCode
*status
) {
30 if(tbl
->position
== tbl
->size
) {
31 uint32_t *newData
= (uint32_t *)uprv_realloc(tbl
->CEs
, 2*tbl
->size
*sizeof(uint32_t));
33 *status
= U_MEMORY_ALLOCATION_ERROR
;
36 UChar
*newCPs
= (UChar
*)uprv_realloc(tbl
->codePoints
, 2*tbl
->size
*sizeof(UChar
));
39 *status
= U_MEMORY_ALLOCATION_ERROR
;
43 tbl
->codePoints
= newCPs
;
48 U_CAPI CntTable
* U_EXPORT2
49 /*uprv_cnttab_open(CompactEIntArray *mapping, UErrorCode *status) {*/
50 uprv_cnttab_open(UNewTrie
*mapping
, UErrorCode
*status
) {
51 if(U_FAILURE(*status
)) {
54 CntTable
*tbl
= (CntTable
*)uprv_malloc(sizeof(CntTable
));
56 *status
= U_MEMORY_ALLOCATION_ERROR
;
59 tbl
->mapping
= mapping
;
60 tbl
->elements
= (ContractionTable
**)uprv_malloc(INIT_EXP_TABLE_SIZE
*sizeof(ContractionTable
*));
61 if(tbl
->elements
== NULL
) {
62 *status
= U_MEMORY_ALLOCATION_ERROR
;
66 tbl
->capacity
= INIT_EXP_TABLE_SIZE
;
67 uprv_memset(tbl
->elements
, 0, INIT_EXP_TABLE_SIZE
*sizeof(ContractionTable
*));
71 tbl
->codePoints
= NULL
;
73 tbl
->currentTag
= NOT_FOUND_TAG
;
77 static ContractionTable
*addATableElement(CntTable
*table
, uint32_t *key
, UErrorCode
*status
) {
78 ContractionTable
*el
= (ContractionTable
*)uprv_malloc(sizeof(ContractionTable
));
82 el
->CEs
= (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE
*sizeof(uint32_t));
87 el
->codePoints
= (UChar
*)uprv_malloc(INIT_EXP_TABLE_SIZE
*sizeof(UChar
));
88 if(el
->codePoints
== NULL
) {
94 el
->size
= INIT_EXP_TABLE_SIZE
;
95 uprv_memset(el
->CEs
, 0, INIT_EXP_TABLE_SIZE
*sizeof(uint32_t));
96 uprv_memset(el
->codePoints
, 0, INIT_EXP_TABLE_SIZE
*sizeof(UChar
));
98 table
->elements
[table
->size
] = el
;
100 //uhash_put(table->elements, (void *)table->size, el, status);
102 *key
= table
->size
++;
104 if(table
->size
== table
->capacity
) {
105 ContractionTable
**newElements
= (ContractionTable
**)uprv_malloc(table
->capacity
*2*sizeof(ContractionTable
*));
107 /* table->elements = (ContractionTable **)realloc(table->elements, table->capacity*2*sizeof(ContractionTable *));*/
108 if(newElements
== NULL
) {
109 uprv_free(el
->codePoints
);
113 ContractionTable
**oldElements
= table
->elements
;
114 uprv_memcpy(newElements
, oldElements
, table
->capacity
*sizeof(ContractionTable
*));
115 uprv_memset(newElements
+table
->capacity
, 0, table
->capacity
*sizeof(ContractionTable
*));
116 table
->capacity
*= 2;
117 table
->elements
= newElements
;
118 uprv_free(oldElements
);
124 *status
= U_MEMORY_ALLOCATION_ERROR
;
125 if (el
) uprv_free(el
);
129 U_CAPI
int32_t U_EXPORT2
130 uprv_cnttab_constructTable(CntTable
*table
, uint32_t mainOffset
, UErrorCode
*status
) {
131 int32_t i
= 0, j
= 0;
132 if(U_FAILURE(*status
) || table
->size
== 0) {
138 if(table
->offsets
!= NULL
) {
139 uprv_free(table
->offsets
);
141 table
->offsets
= (int32_t *)uprv_malloc(table
->size
*sizeof(int32_t));
142 if(table
->offsets
== NULL
) {
143 *status
= U_MEMORY_ALLOCATION_ERROR
;
148 /* See how much memory we need */
149 for(i
= 0; i
<table
->size
; i
++) {
150 table
->offsets
[i
] = table
->position
+mainOffset
;
151 table
->position
+= table
->elements
[i
]->position
;
155 if(table
->CEs
!= NULL
) {
156 uprv_free(table
->CEs
);
158 table
->CEs
= (uint32_t *)uprv_malloc(table
->position
*sizeof(uint32_t));
159 if(table
->CEs
== NULL
) {
160 *status
= U_MEMORY_ALLOCATION_ERROR
;
161 uprv_free(table
->offsets
);
162 table
->offsets
= NULL
;
165 uprv_memset(table
->CEs
, '?', table
->position
*sizeof(uint32_t));
167 if(table
->codePoints
!= NULL
) {
168 uprv_free(table
->codePoints
);
170 table
->codePoints
= (UChar
*)uprv_malloc(table
->position
*sizeof(UChar
));
171 if(table
->codePoints
== NULL
) {
172 *status
= U_MEMORY_ALLOCATION_ERROR
;
173 uprv_free(table
->offsets
);
174 table
->offsets
= NULL
;
175 uprv_free(table
->CEs
);
179 uprv_memset(table
->codePoints
, '?', table
->position
*sizeof(UChar
));
181 /* Now stuff the things in*/
183 UChar
*cpPointer
= table
->codePoints
;
184 uint32_t *CEPointer
= table
->CEs
;
185 for(i
= 0; i
<table
->size
; i
++) {
186 int32_t size
= table
->elements
[i
]->position
;
187 uint8_t ccMax
= 0, ccMin
= 255, cc
= 0;
188 for(j
= 1; j
<size
; j
++) {
189 cc
= u_getCombiningClass(table
->elements
[i
]->codePoints
[j
]);
196 *(cpPointer
+j
) = table
->elements
[i
]->codePoints
[j
];
198 *cpPointer
= ((ccMin
==ccMax
)?1:0 << 8) | ccMax
;
200 uprv_memcpy(CEPointer
, table
->elements
[i
]->CEs
, size
*sizeof(uint32_t));
201 for(j
= 0; j
<size
; j
++) {
202 if(isCntTableElement(*(CEPointer
+j
))) {
203 *(CEPointer
+j
) = constructContractCE(getCETag(*(CEPointer
+j
)), table
->offsets
[getContractOffset(*(CEPointer
+j
))]);
210 // TODO: this one apparently updates the contraction CEs to point to a real address (relative to the
211 // start of the flat file). However, what is done below is just wrong and it affects building of
212 // tailorings that have constructions in a bad way. At least, one should enumerate the trie. Also,
213 // keeping a list of code points that are contractions might be smart, although I'm not sure if it's
216 for(i
= 0; i
<=0x10FFFF; i
++) {
217 /*CE = ucmpe32_get(table->mapping, i);*/
218 CE
= utrie_get32(table
->mapping
, i
, NULL
);
219 if(isCntTableElement(CE
)) {
220 CE
= constructContractCE(getCETag(CE
), table
->offsets
[getContractOffset(CE
)]);
221 /*ucmpe32_set(table->mapping, i, CE);*/
222 utrie_set32(table
->mapping
, i
, CE
);
227 return table
->position
;
230 static ContractionTable
*uprv_cnttab_cloneContraction(ContractionTable
*t
, UErrorCode
*status
) {
231 ContractionTable
*r
= (ContractionTable
*)uprv_malloc(sizeof(ContractionTable
));
236 r
->position
= t
->position
;
239 r
->codePoints
= (UChar
*)uprv_malloc(sizeof(UChar
)*t
->size
);
240 if(r
->codePoints
== NULL
) {
243 r
->CEs
= (uint32_t *)uprv_malloc(sizeof(uint32_t)*t
->size
);
245 uprv_free(r
->codePoints
);
248 uprv_memcpy(r
->codePoints
, t
->codePoints
, sizeof(UChar
)*t
->size
);
249 uprv_memcpy(r
->CEs
, t
->CEs
, sizeof(uint32_t)*t
->size
);
254 *status
= U_MEMORY_ALLOCATION_ERROR
;
259 U_CAPI CntTable
* U_EXPORT2
260 uprv_cnttab_clone(CntTable
*t
, UErrorCode
*status
) {
261 if(U_FAILURE(*status
)) {
265 CntTable
*r
= (CntTable
*)uprv_malloc(sizeof(CntTable
));
270 r
->position
= t
->position
;
272 r
->capacity
= t
->capacity
;
274 r
->mapping
= t
->mapping
;
276 r
->elements
= (ContractionTable
**)uprv_malloc(t
->capacity
*sizeof(ContractionTable
*));
278 if (r
->elements
== NULL
) {
281 //uprv_memcpy(r->elements, t->elements, t->capacity*sizeof(ContractionTable *));
283 for(i
= 0; i
<t
->size
; i
++) {
284 r
->elements
[i
] = uprv_cnttab_cloneContraction(t
->elements
[i
], status
);
288 r
->CEs
= (uint32_t *)uprv_malloc(t
->position
*sizeof(uint32_t));
290 if (r
->CEs
== NULL
) {
291 uprv_free(r
->elements
);
294 uprv_memcpy(r
->CEs
, t
->CEs
, t
->position
*sizeof(uint32_t));
299 if(t
->codePoints
!= NULL
) {
300 r
->codePoints
= (UChar
*)uprv_malloc(t
->position
*sizeof(UChar
));
302 if (r
->codePoints
== NULL
) {
304 uprv_free(r
->elements
);
307 uprv_memcpy(r
->codePoints
, t
->codePoints
, t
->position
*sizeof(UChar
));
309 r
->codePoints
= NULL
;
312 if(t
->offsets
!= NULL
) {
313 r
->offsets
= (int32_t *)uprv_malloc(t
->size
*sizeof(int32_t));
315 if (r
->offsets
== NULL
) {
316 uprv_free(r
->codePoints
);
318 uprv_free(r
->elements
);
321 uprv_memcpy(r
->offsets
, t
->offsets
, t
->size
*sizeof(int32_t));
329 *status
= U_MEMORY_ALLOCATION_ERROR
;
334 U_CAPI
void U_EXPORT2
335 uprv_cnttab_close(CntTable
*table
) {
337 for(i
= 0; i
<table
->size
; i
++) {
338 uprv_free(table
->elements
[i
]->CEs
);
339 uprv_free(table
->elements
[i
]->codePoints
);
340 uprv_free(table
->elements
[i
]);
342 uprv_free(table
->elements
);
343 uprv_free(table
->CEs
);
344 uprv_free(table
->offsets
);
345 uprv_free(table
->codePoints
);
349 /* this is for adding non contractions */
350 U_CAPI
uint32_t U_EXPORT2
351 uprv_cnttab_changeLastCE(CntTable
*table
, uint32_t element
, uint32_t value
, UErrorCode
*status
) {
354 ContractionTable
*tbl
= NULL
;
355 if(U_FAILURE(*status
)) {
359 if((element
== 0xFFFFFF) || (tbl
= table
->elements
[element
]) == NULL
) {
363 tbl
->CEs
[tbl
->position
-1] = value
;
365 return(constructContractCE(table
->currentTag
, element
));
369 /* inserts a part of contraction sequence in table. Sequences behind the offset are moved back. If element is non existent, it creates on. Returns element handle */
370 U_CAPI
uint32_t U_EXPORT2
371 uprv_cnttab_insertContraction(CntTable
*table
, uint32_t element
, UChar codePoint
, uint32_t value
, UErrorCode
*status
) {
373 ContractionTable
*tbl
= NULL
;
375 if(U_FAILURE(*status
)) {
380 if((element
== 0xFFFFFF) || (tbl
= table
->elements
[element
]) == NULL
) {
381 tbl
= addATableElement(table
, &element
, status
);
382 if (U_FAILURE(*status
)) {
387 uprv_growTable(tbl
, status
);
392 while(tbl
->codePoints
[offset
] < codePoint
&& offset
<tbl
->position
) {
396 uint32_t i
= tbl
->position
;
397 for(i
= tbl
->position
; i
> offset
; i
--) {
398 tbl
->CEs
[i
] = tbl
->CEs
[i
-1];
399 tbl
->codePoints
[i
] = tbl
->codePoints
[i
-1];
402 tbl
->CEs
[offset
] = value
;
403 tbl
->codePoints
[offset
] = codePoint
;
407 return(constructContractCE(table
->currentTag
, element
));
411 /* adds more contractions in table. If element is non existant, it creates on. Returns element handle */
412 U_CAPI
uint32_t U_EXPORT2
413 uprv_cnttab_addContraction(CntTable
*table
, uint32_t element
, UChar codePoint
, uint32_t value
, UErrorCode
*status
) {
417 ContractionTable
*tbl
= NULL
;
419 if(U_FAILURE(*status
)) {
423 if((element
== 0xFFFFFF) || (tbl
= table
->elements
[element
]) == NULL
) {
424 tbl
= addATableElement(table
, &element
, status
);
425 if (U_FAILURE(*status
)) {
430 uprv_growTable(tbl
, status
);
432 tbl
->CEs
[tbl
->position
] = value
;
433 tbl
->codePoints
[tbl
->position
] = codePoint
;
437 return(constructContractCE(table
->currentTag
, element
));
440 /* sets a part of contraction sequence in table. If element is non existant, it creates on. Returns element handle */
441 U_CAPI
uint32_t U_EXPORT2
442 uprv_cnttab_setContraction(CntTable
*table
, uint32_t element
, uint32_t offset
, UChar codePoint
, uint32_t value
, UErrorCode
*status
) {
445 ContractionTable
*tbl
= NULL
;
447 if(U_FAILURE(*status
)) {
451 if((element
== 0xFFFFFF) || (tbl
= table
->elements
[element
]) == NULL
) {
452 tbl
= addATableElement(table
, &element
, status
);
453 if (U_FAILURE(*status
)) {
459 if(offset
>= tbl
->size
) {
460 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
463 tbl
->CEs
[offset
] = value
;
464 tbl
->codePoints
[offset
] = codePoint
;
467 return(constructContractCE(table
->currentTag
, element
));
470 static ContractionTable
*_cnttab_getContractionTable(CntTable
*table
, uint32_t element
) {
472 ContractionTable
*tbl
= NULL
;
474 if(element
!= 0xFFFFFF) {
475 tbl
= table
->elements
[element
]; /* This could also return NULL */
480 static int32_t _cnttab_findCP(ContractionTable
*tbl
, UChar codePoint
) {
481 uint32_t position
= 0;
486 while(codePoint
> tbl
->codePoints
[position
]) {
488 if(position
> tbl
->position
) {
492 if (codePoint
== tbl
->codePoints
[position
]) {
499 static uint32_t _cnttab_getCE(ContractionTable
*tbl
, int32_t position
) {
501 return UCOL_NOT_FOUND
;
503 if((uint32_t)position
> tbl
->position
|| position
== -1) {
504 return UCOL_NOT_FOUND
;
506 return tbl
->CEs
[position
];
510 U_CAPI
int32_t U_EXPORT2
511 uprv_cnttab_findCP(CntTable
*table
, uint32_t element
, UChar codePoint
, UErrorCode
*status
) {
513 if(U_FAILURE(*status
)) {
517 return _cnttab_findCP(_cnttab_getContractionTable(table
, element
), codePoint
);
520 U_CAPI
uint32_t U_EXPORT2
521 uprv_cnttab_getCE(CntTable
*table
, uint32_t element
, uint32_t position
, UErrorCode
*status
) {
522 if(U_FAILURE(*status
)) {
523 return UCOL_NOT_FOUND
;
526 return(_cnttab_getCE(_cnttab_getContractionTable(table
, element
), position
));
529 U_CAPI
uint32_t U_EXPORT2
530 uprv_cnttab_findCE(CntTable
*table
, uint32_t element
, UChar codePoint
, UErrorCode
*status
) {
531 if(U_FAILURE(*status
)) {
532 return UCOL_NOT_FOUND
;
534 ContractionTable
*tbl
= _cnttab_getContractionTable(table
, element
);
535 return _cnttab_getCE(tbl
, _cnttab_findCP(tbl
, codePoint
));
538 U_CAPI UBool U_EXPORT2
539 uprv_cnttab_isTailored(CntTable
*table
, uint32_t element
, UChar
*ztString
, UErrorCode
*status
) {
540 if(U_FAILURE(*status
)) {
544 while(*(ztString
)!=0) {
545 element
= uprv_cnttab_findCE(table
, element
, *(ztString
), status
);
546 if(element
== UCOL_NOT_FOUND
) {
549 if(!isCntTableElement(element
)) {
554 return (UBool
)(uprv_cnttab_getCE(table
, element
, 0, status
) != UCOL_NOT_FOUND
);
557 U_CAPI
uint32_t U_EXPORT2
558 uprv_cnttab_changeContraction(CntTable
*table
, uint32_t element
, UChar codePoint
, uint32_t newCE
, UErrorCode
*status
) {
561 ContractionTable
*tbl
= NULL
;
563 if(U_FAILURE(*status
)) {
567 if((element
== 0xFFFFFF) || (tbl
= table
->elements
[element
]) == NULL
) {
571 uint32_t position
= 0;
573 while(codePoint
> tbl
->codePoints
[position
]) {
575 if(position
> tbl
->position
) {
576 return UCOL_NOT_FOUND
;
579 if (codePoint
== tbl
->codePoints
[position
]) {
580 tbl
->CEs
[position
] = newCE
;
583 return UCOL_NOT_FOUND
;
587 #endif /* #if !UCONFIG_NO_COLLATION */