2 *******************************************************************************
4 * Copyright (C) 2001-2006, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: ucol_cnt.cpp
10 * tab size: 8 (not used)
14 * created by: Vladimir Weinstein
16 * This module maintains a contraction table structure in expanded form
17 * and provides means to flatten this structure
21 #include "unicode/utypes.h"
23 #if !UCONFIG_NO_COLLATION
25 #include "unicode/uchar.h"
29 static void uprv_growTable(ContractionTable
*tbl
, UErrorCode
*status
) {
30 if(tbl
->position
== tbl
->size
) {
31 uint32_t *newData
= (uint32_t *)uprv_realloc(tbl
->CEs
, 2*tbl
->size
*sizeof(uint32_t));
33 *status
= U_MEMORY_ALLOCATION_ERROR
;
36 UChar
*newCPs
= (UChar
*)uprv_realloc(tbl
->codePoints
, 2*tbl
->size
*sizeof(UChar
));
39 *status
= U_MEMORY_ALLOCATION_ERROR
;
43 tbl
->codePoints
= newCPs
;
48 U_CAPI CntTable
* U_EXPORT2
49 /*uprv_cnttab_open(CompactEIntArray *mapping, UErrorCode *status) {*/
50 uprv_cnttab_open(UNewTrie
*mapping
, UErrorCode
*status
) {
51 if(U_FAILURE(*status
)) {
54 CntTable
*tbl
= (CntTable
*)uprv_malloc(sizeof(CntTable
));
56 *status
= U_MEMORY_ALLOCATION_ERROR
;
59 tbl
->mapping
= mapping
;
60 tbl
->elements
= (ContractionTable
**)uprv_malloc(INIT_EXP_TABLE_SIZE
*sizeof(ContractionTable
*));
61 if(tbl
->elements
== NULL
) {
62 *status
= U_MEMORY_ALLOCATION_ERROR
;
66 tbl
->capacity
= INIT_EXP_TABLE_SIZE
;
67 uprv_memset(tbl
->elements
, 0, INIT_EXP_TABLE_SIZE
*sizeof(ContractionTable
*));
71 tbl
->codePoints
= NULL
;
73 tbl
->currentTag
= NOT_FOUND_TAG
;
77 static ContractionTable
*addATableElement(CntTable
*table
, uint32_t *key
, UErrorCode
*status
) {
78 ContractionTable
*el
= (ContractionTable
*)uprv_malloc(sizeof(ContractionTable
));
80 *status
= U_MEMORY_ALLOCATION_ERROR
;
83 el
->CEs
= (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE
*sizeof(uint32_t));
85 *status
= U_MEMORY_ALLOCATION_ERROR
;
90 el
->codePoints
= (UChar
*)uprv_malloc(INIT_EXP_TABLE_SIZE
*sizeof(UChar
));
91 if(el
->codePoints
== NULL
) {
92 *status
= U_MEMORY_ALLOCATION_ERROR
;
99 el
->size
= INIT_EXP_TABLE_SIZE
;
100 uprv_memset(el
->CEs
, 0, INIT_EXP_TABLE_SIZE
*sizeof(uint32_t));
101 uprv_memset(el
->codePoints
, 0, INIT_EXP_TABLE_SIZE
*sizeof(UChar
));
103 table
->elements
[table
->size
] = el
;
105 //uhash_put(table->elements, (void *)table->size, el, status);
107 *key
= table
->size
++;
109 if(table
->size
== table
->capacity
) {
110 ContractionTable
**newElements
= (ContractionTable
**)uprv_malloc(table
->capacity
*2*sizeof(ContractionTable
*));
112 /* table->elements = (ContractionTable **)realloc(table->elements, table->capacity*2*sizeof(ContractionTable *));*/
113 if(newElements
== NULL
) {
114 *status
= U_MEMORY_ALLOCATION_ERROR
;
115 uprv_free(el
->codePoints
);
120 ContractionTable
**oldElements
= table
->elements
;
121 uprv_memcpy(newElements
, oldElements
, table
->capacity
*sizeof(ContractionTable
*));
122 uprv_memset(newElements
+table
->capacity
, 0, table
->capacity
*sizeof(ContractionTable
*));
123 table
->capacity
*= 2;
124 table
->elements
= newElements
;
125 uprv_free(oldElements
);
132 U_CAPI
int32_t U_EXPORT2
133 uprv_cnttab_constructTable(CntTable
*table
, uint32_t mainOffset
, UErrorCode
*status
) {
134 int32_t i
= 0, j
= 0;
135 if(U_FAILURE(*status
) || table
->size
== 0) {
141 if(table
->offsets
!= NULL
) {
142 uprv_free(table
->offsets
);
144 table
->offsets
= (int32_t *)uprv_malloc(table
->size
*sizeof(int32_t));
145 if(table
->offsets
== NULL
) {
146 *status
= U_MEMORY_ALLOCATION_ERROR
;
151 /* See how much memory we need */
152 for(i
= 0; i
<table
->size
; i
++) {
153 table
->offsets
[i
] = table
->position
+mainOffset
;
154 table
->position
+= table
->elements
[i
]->position
;
158 if(table
->CEs
!= NULL
) {
159 uprv_free(table
->CEs
);
161 table
->CEs
= (uint32_t *)uprv_malloc(table
->position
*sizeof(uint32_t));
162 if(table
->CEs
== NULL
) {
163 *status
= U_MEMORY_ALLOCATION_ERROR
;
164 uprv_free(table
->offsets
);
165 table
->offsets
= NULL
;
168 uprv_memset(table
->CEs
, '?', table
->position
*sizeof(uint32_t));
170 if(table
->codePoints
!= NULL
) {
171 uprv_free(table
->codePoints
);
173 table
->codePoints
= (UChar
*)uprv_malloc(table
->position
*sizeof(UChar
));
174 if(table
->codePoints
== NULL
) {
175 *status
= U_MEMORY_ALLOCATION_ERROR
;
176 uprv_free(table
->offsets
);
177 table
->offsets
= NULL
;
178 uprv_free(table
->CEs
);
182 uprv_memset(table
->codePoints
, '?', table
->position
*sizeof(UChar
));
184 /* Now stuff the things in*/
186 UChar
*cpPointer
= table
->codePoints
;
187 uint32_t *CEPointer
= table
->CEs
;
188 for(i
= 0; i
<table
->size
; i
++) {
189 int32_t size
= table
->elements
[i
]->position
;
190 uint8_t ccMax
= 0, ccMin
= 255, cc
= 0;
191 for(j
= 1; j
<size
; j
++) {
192 cc
= u_getCombiningClass(table
->elements
[i
]->codePoints
[j
]);
199 *(cpPointer
+j
) = table
->elements
[i
]->codePoints
[j
];
201 *cpPointer
= ((ccMin
==ccMax
)?1:0 << 8) | ccMax
;
203 uprv_memcpy(CEPointer
, table
->elements
[i
]->CEs
, size
*sizeof(uint32_t));
204 for(j
= 0; j
<size
; j
++) {
205 if(isCntTableElement(*(CEPointer
+j
))) {
206 *(CEPointer
+j
) = constructContractCE(getCETag(*(CEPointer
+j
)), table
->offsets
[getContractOffset(*(CEPointer
+j
))]);
213 // TODO: this one apparently updates the contraction CEs to point to a real address (relative to the
214 // start of the flat file). However, what is done below is just wrong and it affects building of
215 // tailorings that have constructions in a bad way. At least, one should enumerate the trie. Also,
216 // keeping a list of code points that are contractions might be smart, although I'm not sure if it's
219 for(i
= 0; i
<=0x10FFFF; i
++) {
220 /*CE = ucmpe32_get(table->mapping, i);*/
221 CE
= utrie_get32(table
->mapping
, i
, NULL
);
222 if(isCntTableElement(CE
)) {
223 CE
= constructContractCE(getCETag(CE
), table
->offsets
[getContractOffset(CE
)]);
224 /*ucmpe32_set(table->mapping, i, CE);*/
225 utrie_set32(table
->mapping
, i
, CE
);
230 return table
->position
;
233 static ContractionTable
*uprv_cnttab_cloneContraction(ContractionTable
*t
, UErrorCode
*status
) {
234 ContractionTable
*r
= (ContractionTable
*)uprv_malloc(sizeof(ContractionTable
));
236 *status
= U_MEMORY_ALLOCATION_ERROR
;
240 r
->position
= t
->position
;
243 r
->codePoints
= (UChar
*)uprv_malloc(sizeof(UChar
)*t
->size
);
244 if(r
->codePoints
== NULL
) {
245 *status
= U_MEMORY_ALLOCATION_ERROR
;
249 r
->CEs
= (uint32_t *)uprv_malloc(sizeof(uint32_t)*t
->size
);
251 *status
= U_MEMORY_ALLOCATION_ERROR
;
252 uprv_free(r
->codePoints
);
256 uprv_memcpy(r
->codePoints
, t
->codePoints
, sizeof(UChar
)*t
->size
);
257 uprv_memcpy(r
->CEs
, t
->CEs
, sizeof(uint32_t)*t
->size
);
263 U_CAPI CntTable
* U_EXPORT2
264 uprv_cnttab_clone(CntTable
*t
, UErrorCode
*status
) {
265 if(U_FAILURE(*status
)) {
269 CntTable
*r
= (CntTable
*)uprv_malloc(sizeof(CntTable
));
272 *status
= U_MEMORY_ALLOCATION_ERROR
;
275 r
->position
= t
->position
;
277 r
->capacity
= t
->capacity
;
279 r
->mapping
= t
->mapping
;
281 r
->elements
= (ContractionTable
**)uprv_malloc(t
->capacity
*sizeof(ContractionTable
*));
283 if (r
->elements
== NULL
) {
284 *status
= U_MEMORY_ALLOCATION_ERROR
;
288 //uprv_memcpy(r->elements, t->elements, t->capacity*sizeof(ContractionTable *));
290 for(i
= 0; i
<t
->size
; i
++) {
291 r
->elements
[i
] = uprv_cnttab_cloneContraction(t
->elements
[i
], status
);
295 r
->CEs
= (uint32_t *)uprv_malloc(t
->position
*sizeof(uint32_t));
297 if (r
->CEs
== NULL
) {
298 *status
= U_MEMORY_ALLOCATION_ERROR
;
299 uprv_free(r
->elements
);
303 uprv_memcpy(r
->CEs
, t
->CEs
, t
->position
*sizeof(uint32_t));
308 if(t
->codePoints
!= NULL
) {
309 r
->codePoints
= (UChar
*)uprv_malloc(t
->position
*sizeof(UChar
));
311 if (r
->codePoints
== NULL
) {
312 *status
= U_MEMORY_ALLOCATION_ERROR
;
314 uprv_free(r
->elements
);
318 uprv_memcpy(r
->codePoints
, t
->codePoints
, t
->position
*sizeof(UChar
));
320 r
->codePoints
= NULL
;
323 if(t
->offsets
!= NULL
) {
324 r
->offsets
= (int32_t *)uprv_malloc(t
->size
*sizeof(int32_t));
326 if (r
->offsets
== NULL
) {
327 *status
= U_MEMORY_ALLOCATION_ERROR
;
328 uprv_free(r
->codePoints
);
330 uprv_free(r
->elements
);
334 uprv_memcpy(r
->offsets
, t
->offsets
, t
->size
*sizeof(int32_t));
342 U_CAPI
void U_EXPORT2
343 uprv_cnttab_close(CntTable
*table
) {
345 for(i
= 0; i
<table
->size
; i
++) {
346 uprv_free(table
->elements
[i
]->CEs
);
347 uprv_free(table
->elements
[i
]->codePoints
);
348 uprv_free(table
->elements
[i
]);
350 uprv_free(table
->elements
);
351 uprv_free(table
->CEs
);
352 uprv_free(table
->offsets
);
353 uprv_free(table
->codePoints
);
357 /* this is for adding non contractions */
358 U_CAPI
uint32_t U_EXPORT2
359 uprv_cnttab_changeLastCE(CntTable
*table
, uint32_t element
, uint32_t value
, UErrorCode
*status
) {
362 ContractionTable
*tbl
= NULL
;
363 if(U_FAILURE(*status
)) {
367 if((element
== 0xFFFFFF) || (tbl
= table
->elements
[element
]) == NULL
) {
371 tbl
->CEs
[tbl
->position
-1] = value
;
373 return(constructContractCE(table
->currentTag
, element
));
377 /* inserts a part of contraction sequence in table. Sequences behind the offset are moved back. If element is non existent, it creates on. Returns element handle */
378 U_CAPI
uint32_t U_EXPORT2
379 uprv_cnttab_insertContraction(CntTable
*table
, uint32_t element
, UChar codePoint
, uint32_t value
, UErrorCode
*status
) {
381 ContractionTable
*tbl
= NULL
;
383 if(U_FAILURE(*status
)) {
388 if((element
== 0xFFFFFF) || (tbl
= table
->elements
[element
]) == NULL
) {
389 tbl
= addATableElement(table
, &element
, status
);
392 uprv_growTable(tbl
, status
);
397 while(tbl
->codePoints
[offset
] < codePoint
&& offset
<tbl
->position
) {
401 uint32_t i
= tbl
->position
;
402 for(i
= tbl
->position
; i
> offset
; i
--) {
403 tbl
->CEs
[i
] = tbl
->CEs
[i
-1];
404 tbl
->codePoints
[i
] = tbl
->codePoints
[i
-1];
407 tbl
->CEs
[offset
] = value
;
408 tbl
->codePoints
[offset
] = codePoint
;
412 return(constructContractCE(table
->currentTag
, element
));
416 /* adds more contractions in table. If element is non existant, it creates on. Returns element handle */
417 U_CAPI
uint32_t U_EXPORT2
418 uprv_cnttab_addContraction(CntTable
*table
, uint32_t element
, UChar codePoint
, uint32_t value
, UErrorCode
*status
) {
422 ContractionTable
*tbl
= NULL
;
424 if(U_FAILURE(*status
)) {
428 if((element
== 0xFFFFFF) || (tbl
= table
->elements
[element
]) == NULL
) {
429 tbl
= addATableElement(table
, &element
, status
);
432 uprv_growTable(tbl
, status
);
434 tbl
->CEs
[tbl
->position
] = value
;
435 tbl
->codePoints
[tbl
->position
] = codePoint
;
439 return(constructContractCE(table
->currentTag
, element
));
442 /* sets a part of contraction sequence in table. If element is non existant, it creates on. Returns element handle */
443 U_CAPI
uint32_t U_EXPORT2
444 uprv_cnttab_setContraction(CntTable
*table
, uint32_t element
, uint32_t offset
, UChar codePoint
, uint32_t value
, UErrorCode
*status
) {
447 ContractionTable
*tbl
= NULL
;
449 if(U_FAILURE(*status
)) {
453 if((element
== 0xFFFFFF) || (tbl
= table
->elements
[element
]) == NULL
) {
454 tbl
= addATableElement(table
, &element
, status
);
457 if(offset
>= tbl
->size
) {
458 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
461 tbl
->CEs
[offset
] = value
;
462 tbl
->codePoints
[offset
] = codePoint
;
465 return(constructContractCE(table
->currentTag
, element
));
468 static ContractionTable
*_cnttab_getContractionTable(CntTable
*table
, uint32_t element
) {
470 ContractionTable
*tbl
= NULL
;
472 if((element
== 0xFFFFFF) || (tbl
= table
->elements
[element
]) == NULL
) {
479 static int32_t _cnttab_findCP(ContractionTable
*tbl
, UChar codePoint
) {
480 uint32_t position
= 0;
485 while(codePoint
> tbl
->codePoints
[position
]) {
487 if(position
> tbl
->position
) {
491 if (codePoint
== tbl
->codePoints
[position
]) {
498 static uint32_t _cnttab_getCE(ContractionTable
*tbl
, int32_t position
) {
500 return UCOL_NOT_FOUND
;
502 if((uint32_t)position
> tbl
->position
|| position
== -1) {
503 return UCOL_NOT_FOUND
;
505 return tbl
->CEs
[position
];
509 U_CAPI
int32_t U_EXPORT2
510 uprv_cnttab_findCP(CntTable
*table
, uint32_t element
, UChar codePoint
, UErrorCode
*status
) {
512 if(U_FAILURE(*status
)) {
516 return _cnttab_findCP(_cnttab_getContractionTable(table
, element
), codePoint
);
519 U_CAPI
uint32_t U_EXPORT2
520 uprv_cnttab_getCE(CntTable
*table
, uint32_t element
, uint32_t position
, UErrorCode
*status
) {
521 if(U_FAILURE(*status
)) {
522 return UCOL_NOT_FOUND
;
525 return(_cnttab_getCE(_cnttab_getContractionTable(table
, element
), position
));
528 U_CAPI
uint32_t U_EXPORT2
529 uprv_cnttab_findCE(CntTable
*table
, uint32_t element
, UChar codePoint
, UErrorCode
*status
) {
530 if(U_FAILURE(*status
)) {
531 return UCOL_NOT_FOUND
;
533 ContractionTable
*tbl
= _cnttab_getContractionTable(table
, element
);
534 return _cnttab_getCE(tbl
, _cnttab_findCP(tbl
, codePoint
));
537 U_CAPI UBool U_EXPORT2
538 uprv_cnttab_isTailored(CntTable
*table
, uint32_t element
, UChar
*ztString
, UErrorCode
*status
) {
539 if(U_FAILURE(*status
)) {
543 while(*(ztString
)!=0) {
544 element
= uprv_cnttab_findCE(table
, element
, *(ztString
), status
);
545 if(element
== UCOL_NOT_FOUND
) {
548 if(!isCntTableElement(element
)) {
553 return (UBool
)(uprv_cnttab_getCE(table
, element
, 0, status
) != UCOL_NOT_FOUND
);
556 U_CAPI
uint32_t U_EXPORT2
557 uprv_cnttab_changeContraction(CntTable
*table
, uint32_t element
, UChar codePoint
, uint32_t newCE
, UErrorCode
*status
) {
560 ContractionTable
*tbl
= NULL
;
562 if(U_FAILURE(*status
)) {
566 if((element
== 0xFFFFFF) || (tbl
= table
->elements
[element
]) == NULL
) {
570 uint32_t position
= 0;
572 while(codePoint
> tbl
->codePoints
[position
]) {
574 if(position
> tbl
->position
) {
575 return UCOL_NOT_FOUND
;
578 if (codePoint
== tbl
->codePoints
[position
]) {
579 tbl
->CEs
[position
] = newCE
;
582 return UCOL_NOT_FOUND
;
586 #endif /* #if !UCONFIG_NO_COLLATION */