2 *******************************************************************************
4 * Copyright (C) 2001-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: ucol_cnt.cpp
10 * tab size: 8 (not used)
14 * created by: Vladimir Weinstein
16 * This module maintains a contraction table structure in expanded form
17 * and provides means to flatten this structure
21 #include "unicode/utypes.h"
23 #if !UCONFIG_NO_COLLATION
25 #include "unicode/uchar.h"
31 static void uprv_growTable(ContractionTable
*tbl
, UErrorCode
*status
) {
32 if(tbl
->position
== tbl
->size
) {
33 uint32_t *newData
= (uint32_t *)uprv_realloc(tbl
->CEs
, 2*tbl
->size
*sizeof(uint32_t));
35 *status
= U_MEMORY_ALLOCATION_ERROR
;
38 UChar
*newCPs
= (UChar
*)uprv_realloc(tbl
->codePoints
, 2*tbl
->size
*sizeof(UChar
));
41 *status
= U_MEMORY_ALLOCATION_ERROR
;
45 tbl
->codePoints
= newCPs
;
50 U_CAPI CntTable
* U_EXPORT2
51 /*uprv_cnttab_open(CompactEIntArray *mapping, UErrorCode *status) {*/
52 uprv_cnttab_open(UNewTrie
*mapping
, UErrorCode
*status
) {
53 if(U_FAILURE(*status
)) {
56 CntTable
*tbl
= (CntTable
*)uprv_malloc(sizeof(CntTable
));
58 *status
= U_MEMORY_ALLOCATION_ERROR
;
61 tbl
->mapping
= mapping
;
62 tbl
->elements
= (ContractionTable
**)uprv_malloc(INIT_EXP_TABLE_SIZE
*sizeof(ContractionTable
*));
63 if(tbl
->elements
== NULL
) {
64 *status
= U_MEMORY_ALLOCATION_ERROR
;
68 tbl
->capacity
= INIT_EXP_TABLE_SIZE
;
69 uprv_memset(tbl
->elements
, 0, INIT_EXP_TABLE_SIZE
*sizeof(ContractionTable
*));
73 tbl
->codePoints
= NULL
;
75 tbl
->currentTag
= NOT_FOUND_TAG
;
79 static ContractionTable
*addATableElement(CntTable
*table
, uint32_t *key
, UErrorCode
*status
) {
80 ContractionTable
*el
= (ContractionTable
*)uprv_malloc(sizeof(ContractionTable
));
82 *status
= U_MEMORY_ALLOCATION_ERROR
;
85 el
->CEs
= (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE
*sizeof(uint32_t));
87 *status
= U_MEMORY_ALLOCATION_ERROR
;
92 el
->codePoints
= (UChar
*)uprv_malloc(INIT_EXP_TABLE_SIZE
*sizeof(UChar
));
93 if(el
->codePoints
== NULL
) {
94 *status
= U_MEMORY_ALLOCATION_ERROR
;
101 el
->size
= INIT_EXP_TABLE_SIZE
;
102 uprv_memset(el
->CEs
, 0, INIT_EXP_TABLE_SIZE
*sizeof(uint32_t));
103 uprv_memset(el
->codePoints
, 0, INIT_EXP_TABLE_SIZE
*sizeof(UChar
));
105 table
->elements
[table
->size
] = el
;
107 //uhash_put(table->elements, (void *)table->size, el, status);
109 *key
= table
->size
++;
111 if(table
->size
== table
->capacity
) {
112 ContractionTable
**newElements
= (ContractionTable
**)uprv_malloc(table
->capacity
*2*sizeof(ContractionTable
*));
114 /* table->elements = (ContractionTable **)realloc(table->elements, table->capacity*2*sizeof(ContractionTable *));*/
115 if(newElements
== NULL
) {
116 *status
= U_MEMORY_ALLOCATION_ERROR
;
117 uprv_free(el
->codePoints
);
122 ContractionTable
**oldElements
= table
->elements
;
123 uprv_memcpy(newElements
, oldElements
, table
->capacity
*sizeof(ContractionTable
*));
124 uprv_memset(newElements
+table
->capacity
, 0, table
->capacity
*sizeof(ContractionTable
*));
125 table
->capacity
*= 2;
126 table
->elements
= newElements
;
127 uprv_free(oldElements
);
134 U_CAPI
int32_t U_EXPORT2
135 uprv_cnttab_constructTable(CntTable
*table
, uint32_t mainOffset
, UErrorCode
*status
) {
136 int32_t i
= 0, j
= 0;
137 if(U_FAILURE(*status
) || table
->size
== 0) {
143 if(table
->offsets
!= NULL
) {
144 uprv_free(table
->offsets
);
146 table
->offsets
= (int32_t *)uprv_malloc(table
->size
*sizeof(int32_t));
147 if(table
->offsets
== NULL
) {
148 *status
= U_MEMORY_ALLOCATION_ERROR
;
153 /* See how much memory we need */
154 for(i
= 0; i
<table
->size
; i
++) {
155 table
->offsets
[i
] = table
->position
+mainOffset
;
156 table
->position
+= table
->elements
[i
]->position
;
160 if(table
->CEs
!= NULL
) {
161 uprv_free(table
->CEs
);
163 table
->CEs
= (uint32_t *)uprv_malloc(table
->position
*sizeof(uint32_t));
164 if(table
->CEs
== NULL
) {
165 *status
= U_MEMORY_ALLOCATION_ERROR
;
166 uprv_free(table
->offsets
);
167 table
->offsets
= NULL
;
170 uprv_memset(table
->CEs
, '?', table
->position
*sizeof(uint32_t));
172 if(table
->codePoints
!= NULL
) {
173 uprv_free(table
->codePoints
);
175 table
->codePoints
= (UChar
*)uprv_malloc(table
->position
*sizeof(UChar
));
176 if(table
->codePoints
== NULL
) {
177 *status
= U_MEMORY_ALLOCATION_ERROR
;
178 uprv_free(table
->offsets
);
179 table
->offsets
= NULL
;
180 uprv_free(table
->CEs
);
184 uprv_memset(table
->codePoints
, '?', table
->position
*sizeof(UChar
));
186 /* Now stuff the things in*/
188 UChar
*cpPointer
= table
->codePoints
;
189 uint32_t *CEPointer
= table
->CEs
;
190 for(i
= 0; i
<table
->size
; i
++) {
191 int32_t size
= table
->elements
[i
]->position
;
192 uint8_t ccMax
= 0, ccMin
= 255, cc
= 0;
193 for(j
= 1; j
<size
; j
++) {
194 cc
= u_getCombiningClass(table
->elements
[i
]->codePoints
[j
]);
201 *(cpPointer
+j
) = table
->elements
[i
]->codePoints
[j
];
203 *cpPointer
= ((ccMin
==ccMax
)?1:0 << 8) | ccMax
;
205 uprv_memcpy(CEPointer
, table
->elements
[i
]->CEs
, size
*sizeof(uint32_t));
206 for(j
= 0; j
<size
; j
++) {
207 if(isCntTableElement(*(CEPointer
+j
))) {
208 *(CEPointer
+j
) = constructContractCE(getCETag(*(CEPointer
+j
)), table
->offsets
[getContractOffset(*(CEPointer
+j
))]);
215 // TODO: this one apparently updates the contraction CEs to point to a real address (relative to the
216 // start of the flat file). However, what is done below is just wrong and it affects building of
217 // tailorings that have constructions in a bad way. At least, one should enumerate the trie. Also,
218 // keeping a list of code points that are contractions might be smart, although I'm not sure if it's
221 for(i
= 0; i
<=0x10FFFF; i
++) {
222 /*CE = ucmpe32_get(table->mapping, i);*/
223 CE
= utrie_get32(table
->mapping
, i
, NULL
);
224 if(isCntTableElement(CE
)) {
225 CE
= constructContractCE(getCETag(CE
), table
->offsets
[getContractOffset(CE
)]);
226 /*ucmpe32_set(table->mapping, i, CE);*/
227 utrie_set32(table
->mapping
, i
, CE
);
232 return table
->position
;
235 static ContractionTable
*uprv_cnttab_cloneContraction(ContractionTable
*t
, UErrorCode
*status
) {
236 ContractionTable
*r
= (ContractionTable
*)uprv_malloc(sizeof(ContractionTable
));
238 *status
= U_MEMORY_ALLOCATION_ERROR
;
242 r
->position
= t
->position
;
245 r
->codePoints
= (UChar
*)uprv_malloc(sizeof(UChar
)*t
->size
);
246 r
->CEs
= (uint32_t *)uprv_malloc(sizeof(uint32_t)*t
->size
);
249 if((r
->codePoints
== NULL
) || (r
->CEs
== NULL
)) {
250 *status
= U_MEMORY_ALLOCATION_ERROR
;
253 uprv_memcpy(r
->codePoints
, t
->codePoints
, sizeof(UChar
)*t
->size
);
254 uprv_memcpy(r
->CEs
, t
->CEs
, sizeof(uint32_t)*t
->size
);
260 U_CAPI CntTable
* U_EXPORT2
261 uprv_cnttab_clone(CntTable
*t
, UErrorCode
*status
) {
262 if(U_FAILURE(*status
)) {
266 CntTable
*r
= (CntTable
*)uprv_malloc(sizeof(CntTable
));
269 *status
= U_MEMORY_ALLOCATION_ERROR
;
272 r
->position
= t
->position
;
274 r
->capacity
= t
->capacity
;
276 r
->mapping
= t
->mapping
;
278 r
->elements
= (ContractionTable
**)uprv_malloc(t
->capacity
*sizeof(ContractionTable
*));
280 if (r
->elements
== NULL
) {
281 *status
= U_MEMORY_ALLOCATION_ERROR
;
284 //uprv_memcpy(r->elements, t->elements, t->capacity*sizeof(ContractionTable *));
286 for(i
= 0; i
<t
->size
; i
++) {
287 r
->elements
[i
] = uprv_cnttab_cloneContraction(t
->elements
[i
], status
);
291 r
->CEs
= (uint32_t *)uprv_malloc(t
->position
*sizeof(uint32_t));
293 if (r
->CEs
== NULL
) {
294 *status
= U_MEMORY_ALLOCATION_ERROR
;
297 uprv_memcpy(r
->CEs
, t
->CEs
, t
->position
*sizeof(uint32_t));
302 if(t
->codePoints
!= NULL
) {
303 r
->codePoints
= (UChar
*)uprv_malloc(t
->position
*sizeof(UChar
));
305 if (r
->codePoints
== NULL
) {
306 *status
= U_MEMORY_ALLOCATION_ERROR
;
309 uprv_memcpy(r
->codePoints
, t
->codePoints
, t
->position
*sizeof(UChar
));
311 r
->codePoints
= NULL
;
314 if(t
->offsets
!= NULL
) {
315 r
->offsets
= (int32_t *)uprv_malloc(t
->size
*sizeof(int32_t));
317 if (r
->offsets
== NULL
) {
318 *status
= U_MEMORY_ALLOCATION_ERROR
;
321 uprv_memcpy(r
->offsets
, t
->offsets
, t
->size
*sizeof(int32_t));
329 U_CAPI
void U_EXPORT2
330 uprv_cnttab_close(CntTable
*table
) {
332 for(i
= 0; i
<table
->size
; i
++) {
333 uprv_free(table
->elements
[i
]->CEs
);
334 uprv_free(table
->elements
[i
]->codePoints
);
335 uprv_free(table
->elements
[i
]);
337 uprv_free(table
->elements
);
338 uprv_free(table
->CEs
);
339 uprv_free(table
->offsets
);
340 uprv_free(table
->codePoints
);
344 /* this is for adding non contractions */
345 U_CAPI
uint32_t U_EXPORT2
346 uprv_cnttab_changeLastCE(CntTable
*table
, uint32_t element
, uint32_t value
, UErrorCode
*status
) {
349 ContractionTable
*tbl
= NULL
;
350 if(U_FAILURE(*status
)) {
354 if((element
== 0xFFFFFF) || (tbl
= table
->elements
[element
]) == NULL
) {
358 tbl
->CEs
[tbl
->position
-1] = value
;
360 return(constructContractCE(table
->currentTag
, element
));
364 /* inserts a part of contraction sequence in table. Sequences behind the offset are moved back. If element is non existent, it creates on. Returns element handle */
365 U_CAPI
uint32_t U_EXPORT2
366 uprv_cnttab_insertContraction(CntTable
*table
, uint32_t element
, UChar codePoint
, uint32_t value
, UErrorCode
*status
) {
369 ContractionTable
*tbl
= NULL
;
371 if(U_FAILURE(*status
)) {
375 if((element
== 0xFFFFFF) || (tbl
= table
->elements
[element
]) == NULL
) {
376 tbl
= addATableElement(table
, &element
, status
);
379 uprv_growTable(tbl
, status
);
384 while(tbl
->codePoints
[offset
] < codePoint
&& offset
<tbl
->position
) {
388 uint32_t i
= tbl
->position
;
389 for(i
= tbl
->position
; i
> offset
; i
--) {
390 tbl
->CEs
[i
] = tbl
->CEs
[i
-1];
391 tbl
->codePoints
[i
] = tbl
->codePoints
[i
-1];
394 tbl
->CEs
[offset
] = value
;
395 tbl
->codePoints
[offset
] = codePoint
;
399 return(constructContractCE(table
->currentTag
, element
));
403 /* adds more contractions in table. If element is non existant, it creates on. Returns element handle */
404 U_CAPI
uint32_t U_EXPORT2
405 uprv_cnttab_addContraction(CntTable
*table
, uint32_t element
, UChar codePoint
, uint32_t value
, UErrorCode
*status
) {
409 ContractionTable
*tbl
= NULL
;
411 if(U_FAILURE(*status
)) {
415 if((element
== 0xFFFFFF) || (tbl
= table
->elements
[element
]) == NULL
) {
416 tbl
= addATableElement(table
, &element
, status
);
419 uprv_growTable(tbl
, status
);
421 tbl
->CEs
[tbl
->position
] = value
;
422 tbl
->codePoints
[tbl
->position
] = codePoint
;
426 return(constructContractCE(table
->currentTag
, element
));
429 /* sets a part of contraction sequence in table. If element is non existant, it creates on. Returns element handle */
430 U_CAPI
uint32_t U_EXPORT2
431 uprv_cnttab_setContraction(CntTable
*table
, uint32_t element
, uint32_t offset
, UChar codePoint
, uint32_t value
, UErrorCode
*status
) {
434 ContractionTable
*tbl
= NULL
;
436 if(U_FAILURE(*status
)) {
440 if((element
== 0xFFFFFF) || (tbl
= table
->elements
[element
]) == NULL
) {
441 tbl
= addATableElement(table
, &element
, status
);
444 if(offset
>= tbl
->size
) {
445 *status
= U_INDEX_OUTOFBOUNDS_ERROR
;
448 tbl
->CEs
[offset
] = value
;
449 tbl
->codePoints
[offset
] = codePoint
;
452 return(constructContractCE(table
->currentTag
, element
));
455 static ContractionTable
*_cnttab_getContractionTable(CntTable
*table
, uint32_t element
) {
457 ContractionTable
*tbl
= NULL
;
459 if((element
== 0xFFFFFF) || (tbl
= table
->elements
[element
]) == NULL
) {
466 static int32_t _cnttab_findCP(ContractionTable
*tbl
, UChar codePoint
) {
467 uint32_t position
= 0;
472 while(codePoint
> tbl
->codePoints
[position
]) {
474 if(position
> tbl
->position
) {
478 if (codePoint
== tbl
->codePoints
[position
]) {
485 static uint32_t _cnttab_getCE(ContractionTable
*tbl
, int32_t position
) {
487 return UCOL_NOT_FOUND
;
489 if((uint32_t)position
> tbl
->position
|| position
== -1) {
490 return UCOL_NOT_FOUND
;
492 return tbl
->CEs
[position
];
496 U_CAPI
int32_t U_EXPORT2
497 uprv_cnttab_findCP(CntTable
*table
, uint32_t element
, UChar codePoint
, UErrorCode
*status
) {
499 if(U_FAILURE(*status
)) {
503 return _cnttab_findCP(_cnttab_getContractionTable(table
, element
), codePoint
);
506 U_CAPI
uint32_t U_EXPORT2
507 uprv_cnttab_getCE(CntTable
*table
, uint32_t element
, uint32_t position
, UErrorCode
*status
) {
508 if(U_FAILURE(*status
)) {
509 return UCOL_NOT_FOUND
;
512 return(_cnttab_getCE(_cnttab_getContractionTable(table
, element
), position
));
515 U_CAPI
uint32_t U_EXPORT2
516 uprv_cnttab_findCE(CntTable
*table
, uint32_t element
, UChar codePoint
, UErrorCode
*status
) {
517 if(U_FAILURE(*status
)) {
518 return UCOL_NOT_FOUND
;
520 ContractionTable
*tbl
= _cnttab_getContractionTable(table
, element
);
521 return _cnttab_getCE(tbl
, _cnttab_findCP(tbl
, codePoint
));
524 U_CAPI UBool U_EXPORT2
525 uprv_cnttab_isTailored(CntTable
*table
, uint32_t element
, UChar
*ztString
, UErrorCode
*status
) {
526 if(U_FAILURE(*status
)) {
530 while(*(ztString
)!=0) {
531 element
= uprv_cnttab_findCE(table
, element
, *(ztString
), status
);
532 if(element
== UCOL_NOT_FOUND
) {
535 if(!isCntTableElement(element
)) {
540 if(uprv_cnttab_getCE(table
, element
, 0, status
) != UCOL_NOT_FOUND
) {
547 U_CAPI
uint32_t U_EXPORT2
548 uprv_cnttab_changeContraction(CntTable
*table
, uint32_t element
, UChar codePoint
, uint32_t newCE
, UErrorCode
*status
) {
551 ContractionTable
*tbl
= NULL
;
553 if(U_FAILURE(*status
)) {
557 if((element
== 0xFFFFFF) || (tbl
= table
->elements
[element
]) == NULL
) {
561 uint32_t position
= 0;
563 while(codePoint
> tbl
->codePoints
[position
]) {
565 if(position
> tbl
->position
) {
566 return UCOL_NOT_FOUND
;
569 if (codePoint
== tbl
->codePoints
[position
]) {
570 tbl
->CEs
[position
] = newCE
;
573 return UCOL_NOT_FOUND
;
579 #endif /* #if !UCONFIG_NO_COLLATION */