]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/ucol_cnt.cpp
ICU-8.11.4.tar.gz
[apple/icu.git] / icuSources / i18n / ucol_cnt.cpp
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
3*
73c04bcf 4* Copyright (C) 2001-2006, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: ucol_cnt.cpp
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created 02/22/2001
14* created by: Vladimir Weinstein
15*
16* This module maintains a contraction table structure in expanded form
17* and provides means to flatten this structure
18*
19*/
20
21#include "unicode/utypes.h"
22
23#if !UCONFIG_NO_COLLATION
24
25#include "unicode/uchar.h"
26#include "ucol_cnt.h"
27#include "cmemory.h"
28
374ca955 29static void uprv_growTable(ContractionTable *tbl, UErrorCode *status) {
b75a7d8f
A
30 if(tbl->position == tbl->size) {
31 uint32_t *newData = (uint32_t *)uprv_realloc(tbl->CEs, 2*tbl->size*sizeof(uint32_t));
32 if(newData == NULL) {
33 *status = U_MEMORY_ALLOCATION_ERROR;
34 return;
35 }
36 UChar *newCPs = (UChar *)uprv_realloc(tbl->codePoints, 2*tbl->size*sizeof(UChar));
37 if(newCPs == NULL) {
38 uprv_free(newData);
39 *status = U_MEMORY_ALLOCATION_ERROR;
40 return;
41 }
42 tbl->CEs = newData;
43 tbl->codePoints = newCPs;
44 tbl->size *= 2;
45 }
46}
47
48U_CAPI CntTable* U_EXPORT2
49/*uprv_cnttab_open(CompactEIntArray *mapping, UErrorCode *status) {*/
50uprv_cnttab_open(UNewTrie *mapping, UErrorCode *status) {
51 if(U_FAILURE(*status)) {
52 return 0;
53 }
54 CntTable *tbl = (CntTable *)uprv_malloc(sizeof(CntTable));
55 if(tbl == NULL) {
73c04bcf
A
56 *status = U_MEMORY_ALLOCATION_ERROR;
57 return NULL;
b75a7d8f
A
58 }
59 tbl->mapping = mapping;
60 tbl->elements = (ContractionTable **)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *));
61 if(tbl->elements == NULL) {
73c04bcf
A
62 *status = U_MEMORY_ALLOCATION_ERROR;
63 uprv_free(tbl);
64 return NULL;
b75a7d8f
A
65 }
66 tbl->capacity = INIT_EXP_TABLE_SIZE;
67 uprv_memset(tbl->elements, 0, INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *));
68 tbl->size = 0;
69 tbl->position = 0;
70 tbl->CEs = NULL;
71 tbl->codePoints = NULL;
72 tbl->offsets = NULL;
73 tbl->currentTag = NOT_FOUND_TAG;
74 return tbl;
75}
76
77static ContractionTable *addATableElement(CntTable *table, uint32_t *key, UErrorCode *status) {
78 ContractionTable *el = (ContractionTable *)uprv_malloc(sizeof(ContractionTable));
79 if(el == NULL) {
73c04bcf
A
80 *status = U_MEMORY_ALLOCATION_ERROR;
81 return NULL;
b75a7d8f
A
82 }
83 el->CEs = (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
84 if(el->CEs == NULL) {
73c04bcf
A
85 *status = U_MEMORY_ALLOCATION_ERROR;
86 uprv_free(el);
87 return NULL;
b75a7d8f
A
88 }
89
90 el->codePoints = (UChar *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(UChar));
91 if(el->codePoints == NULL) {
73c04bcf
A
92 *status = U_MEMORY_ALLOCATION_ERROR;
93 uprv_free(el->CEs);
94 uprv_free(el);
95 return NULL;
b75a7d8f
A
96 }
97
98 el->position = 0;
99 el->size = INIT_EXP_TABLE_SIZE;
100 uprv_memset(el->CEs, 0, INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
101 uprv_memset(el->codePoints, 0, INIT_EXP_TABLE_SIZE*sizeof(UChar));
102
103 table->elements[table->size] = el;
104
105 //uhash_put(table->elements, (void *)table->size, el, status);
106
107 *key = table->size++;
108
109 if(table->size == table->capacity) {
110 ContractionTable **newElements = (ContractionTable **)uprv_malloc(table->capacity*2*sizeof(ContractionTable *));
111 // do realloc
73c04bcf 112 /* table->elements = (ContractionTable **)realloc(table->elements, table->capacity*2*sizeof(ContractionTable *));*/
b75a7d8f 113 if(newElements == NULL) {
73c04bcf
A
114 *status = U_MEMORY_ALLOCATION_ERROR;
115 uprv_free(el->codePoints);
116 uprv_free(el->CEs);
117 uprv_free(el);
118 return NULL;
b75a7d8f 119 } else {
73c04bcf
A
120 ContractionTable **oldElements = table->elements;
121 uprv_memcpy(newElements, oldElements, table->capacity*sizeof(ContractionTable *));
122 uprv_memset(newElements+table->capacity, 0, table->capacity*sizeof(ContractionTable *));
123 table->capacity *= 2;
124 table->elements = newElements;
125 uprv_free(oldElements);
b75a7d8f
A
126 }
127 }
128
129 return el;
130}
131
132U_CAPI int32_t U_EXPORT2
133uprv_cnttab_constructTable(CntTable *table, uint32_t mainOffset, UErrorCode *status) {
134 int32_t i = 0, j = 0;
135 if(U_FAILURE(*status) || table->size == 0) {
136 return 0;
137 }
138
139 table->position = 0;
140
141 if(table->offsets != NULL) {
142 uprv_free(table->offsets);
143 }
144 table->offsets = (int32_t *)uprv_malloc(table->size*sizeof(int32_t));
145 if(table->offsets == NULL) {
73c04bcf
A
146 *status = U_MEMORY_ALLOCATION_ERROR;
147 return 0;
b75a7d8f
A
148 }
149
150
151 /* See how much memory we need */
152 for(i = 0; i<table->size; i++) {
153 table->offsets[i] = table->position+mainOffset;
154 table->position += table->elements[i]->position;
155 }
156
157 /* Allocate it */
158 if(table->CEs != NULL) {
159 uprv_free(table->CEs);
160 }
161 table->CEs = (uint32_t *)uprv_malloc(table->position*sizeof(uint32_t));
162 if(table->CEs == NULL) {
73c04bcf
A
163 *status = U_MEMORY_ALLOCATION_ERROR;
164 uprv_free(table->offsets);
165 table->offsets = NULL;
166 return 0;
b75a7d8f
A
167 }
168 uprv_memset(table->CEs, '?', table->position*sizeof(uint32_t));
169
170 if(table->codePoints != NULL) {
171 uprv_free(table->codePoints);
172 }
173 table->codePoints = (UChar *)uprv_malloc(table->position*sizeof(UChar));
174 if(table->codePoints == NULL) {
73c04bcf
A
175 *status = U_MEMORY_ALLOCATION_ERROR;
176 uprv_free(table->offsets);
177 table->offsets = NULL;
178 uprv_free(table->CEs);
179 table->CEs = NULL;
180 return 0;
b75a7d8f
A
181 }
182 uprv_memset(table->codePoints, '?', table->position*sizeof(UChar));
183
184 /* Now stuff the things in*/
185
186 UChar *cpPointer = table->codePoints;
187 uint32_t *CEPointer = table->CEs;
188 for(i = 0; i<table->size; i++) {
189 int32_t size = table->elements[i]->position;
190 uint8_t ccMax = 0, ccMin = 255, cc = 0;
191 for(j = 1; j<size; j++) {
73c04bcf
A
192 cc = u_getCombiningClass(table->elements[i]->codePoints[j]);
193 if(cc>ccMax) {
194 ccMax = cc;
195 }
196 if(cc<ccMin) {
197 ccMin = cc;
198 }
199 *(cpPointer+j) = table->elements[i]->codePoints[j];
b75a7d8f
A
200 }
201 *cpPointer = ((ccMin==ccMax)?1:0 << 8) | ccMax;
202
203 uprv_memcpy(CEPointer, table->elements[i]->CEs, size*sizeof(uint32_t));
204 for(j = 0; j<size; j++) {
205 if(isCntTableElement(*(CEPointer+j))) {
206 *(CEPointer+j) = constructContractCE(getCETag(*(CEPointer+j)), table->offsets[getContractOffset(*(CEPointer+j))]);
207 }
208 }
209 cpPointer += size;
210 CEPointer += size;
211 }
212
374ca955
A
213 // TODO: this one apparently updates the contraction CEs to point to a real address (relative to the
214 // start of the flat file). However, what is done below is just wrong and it affects building of
215 // tailorings that have constructions in a bad way. At least, one should enumerate the trie. Also,
216 // keeping a list of code points that are contractions might be smart, although I'm not sure if it's
217 // feasible.
b75a7d8f
A
218 uint32_t CE;
219 for(i = 0; i<=0x10FFFF; i++) {
220 /*CE = ucmpe32_get(table->mapping, i);*/
221 CE = utrie_get32(table->mapping, i, NULL);
222 if(isCntTableElement(CE)) {
223 CE = constructContractCE(getCETag(CE), table->offsets[getContractOffset(CE)]);
224 /*ucmpe32_set(table->mapping, i, CE);*/
225 utrie_set32(table->mapping, i, CE);
226 }
227 }
228
229
230 return table->position;
231}
232
374ca955 233static ContractionTable *uprv_cnttab_cloneContraction(ContractionTable *t, UErrorCode *status) {
73c04bcf
A
234 ContractionTable *r = (ContractionTable *)uprv_malloc(sizeof(ContractionTable));
235 if(r == NULL) {
236 *status = U_MEMORY_ALLOCATION_ERROR;
237 return NULL;
238 }
239
240 r->position = t->position;
241 r->size = t->size;
242
243 r->codePoints = (UChar *)uprv_malloc(sizeof(UChar)*t->size);
244 if(r->codePoints == NULL) {
245 *status = U_MEMORY_ALLOCATION_ERROR;
246 uprv_free(r);
247 return NULL;
248 }
249 r->CEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->size);
250 if(r->CEs == NULL) {
251 *status = U_MEMORY_ALLOCATION_ERROR;
252 uprv_free(r->codePoints);
253 uprv_free(r);
254 return NULL;
255 }
256 uprv_memcpy(r->codePoints, t->codePoints, sizeof(UChar)*t->size);
257 uprv_memcpy(r->CEs, t->CEs, sizeof(uint32_t)*t->size);
258
259 return r;
b75a7d8f
A
260
261}
262
263U_CAPI CntTable* U_EXPORT2
264uprv_cnttab_clone(CntTable *t, UErrorCode *status) {
73c04bcf 265 if(U_FAILURE(*status)) {
b75a7d8f
A
266 return NULL;
267 }
73c04bcf
A
268 int32_t i = 0;
269 CntTable *r = (CntTable *)uprv_malloc(sizeof(CntTable));
b75a7d8f 270 /* test for NULL */
73c04bcf 271 if (r == NULL) {
b75a7d8f
A
272 *status = U_MEMORY_ALLOCATION_ERROR;
273 return NULL;
274 }
73c04bcf
A
275 r->position = t->position;
276 r->size = t->size;
277 r->capacity = t->capacity;
278
279 r->mapping = t->mapping;
b75a7d8f 280
73c04bcf 281 r->elements = (ContractionTable **)uprv_malloc(t->capacity*sizeof(ContractionTable *));
b75a7d8f 282 /* test for NULL */
73c04bcf 283 if (r->elements == NULL) {
b75a7d8f 284 *status = U_MEMORY_ALLOCATION_ERROR;
73c04bcf 285 uprv_free(r);
b75a7d8f
A
286 return NULL;
287 }
73c04bcf 288 //uprv_memcpy(r->elements, t->elements, t->capacity*sizeof(ContractionTable *));
b75a7d8f 289
73c04bcf
A
290 for(i = 0; i<t->size; i++) {
291 r->elements[i] = uprv_cnttab_cloneContraction(t->elements[i], status);
292 }
293
294 if(t->CEs != NULL) {
295 r->CEs = (uint32_t *)uprv_malloc(t->position*sizeof(uint32_t));
296 /* test for NULL */
297 if (r->CEs == NULL) {
298 *status = U_MEMORY_ALLOCATION_ERROR;
299 uprv_free(r->elements);
300 uprv_free(r);
301 return NULL;
302 }
303 uprv_memcpy(r->CEs, t->CEs, t->position*sizeof(uint32_t));
304 } else {
305 r->CEs = NULL;
306 }
307
308 if(t->codePoints != NULL) {
309 r->codePoints = (UChar *)uprv_malloc(t->position*sizeof(UChar));
310 /* test for NULL */
311 if (r->codePoints == NULL) {
312 *status = U_MEMORY_ALLOCATION_ERROR;
313 uprv_free(r->CEs);
314 uprv_free(r->elements);
315 uprv_free(r);
316 return NULL;
317 }
318 uprv_memcpy(r->codePoints, t->codePoints, t->position*sizeof(UChar));
319 } else {
320 r->codePoints = NULL;
321 }
322
323 if(t->offsets != NULL) {
324 r->offsets = (int32_t *)uprv_malloc(t->size*sizeof(int32_t));
325 /* test for NULL */
326 if (r->offsets == NULL) {
327 *status = U_MEMORY_ALLOCATION_ERROR;
328 uprv_free(r->codePoints);
329 uprv_free(r->CEs);
330 uprv_free(r->elements);
331 uprv_free(r);
332 return NULL;
333 }
334 uprv_memcpy(r->offsets, t->offsets, t->size*sizeof(int32_t));
335 } else {
336 r->offsets = NULL;
337 }
338
339 return r;
b75a7d8f
A
340}
341
342U_CAPI void U_EXPORT2
343uprv_cnttab_close(CntTable *table) {
344 int32_t i = 0;
345 for(i = 0; i<table->size; i++) {
346 uprv_free(table->elements[i]->CEs);
347 uprv_free(table->elements[i]->codePoints);
348 uprv_free(table->elements[i]);
349 }
350 uprv_free(table->elements);
351 uprv_free(table->CEs);
352 uprv_free(table->offsets);
353 uprv_free(table->codePoints);
354 uprv_free(table);
355}
356
357/* this is for adding non contractions */
358U_CAPI uint32_t U_EXPORT2
359uprv_cnttab_changeLastCE(CntTable *table, uint32_t element, uint32_t value, UErrorCode *status) {
360 element &= 0xFFFFFF;
361
362 ContractionTable *tbl = NULL;
363 if(U_FAILURE(*status)) {
364 return 0;
365 }
366
367 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
73c04bcf 368 return 0;
b75a7d8f
A
369 }
370
371 tbl->CEs[tbl->position-1] = value;
372
373 return(constructContractCE(table->currentTag, element));
374}
375
376
377/* inserts a part of contraction sequence in table. Sequences behind the offset are moved back. If element is non existent, it creates on. Returns element handle */
378U_CAPI uint32_t U_EXPORT2
379uprv_cnttab_insertContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UErrorCode *status) {
380
b75a7d8f
A
381 ContractionTable *tbl = NULL;
382
383 if(U_FAILURE(*status)) {
384 return 0;
385 }
73c04bcf 386 element &= 0xFFFFFF;
b75a7d8f
A
387
388 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
389 tbl = addATableElement(table, &element, status);
390 }
391
392 uprv_growTable(tbl, status);
393
394 uint32_t offset = 0;
395
396
397 while(tbl->codePoints[offset] < codePoint && offset<tbl->position) {
398 offset++;
399 }
400
401 uint32_t i = tbl->position;
402 for(i = tbl->position; i > offset; i--) {
403 tbl->CEs[i] = tbl->CEs[i-1];
404 tbl->codePoints[i] = tbl->codePoints[i-1];
405 }
406
407 tbl->CEs[offset] = value;
408 tbl->codePoints[offset] = codePoint;
409
410 tbl->position++;
411
412 return(constructContractCE(table->currentTag, element));
413}
414
415
416/* adds more contractions in table. If element is non existant, it creates on. Returns element handle */
417U_CAPI uint32_t U_EXPORT2
418uprv_cnttab_addContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UErrorCode *status) {
419
420 element &= 0xFFFFFF;
421
422 ContractionTable *tbl = NULL;
423
424 if(U_FAILURE(*status)) {
425 return 0;
426 }
427
428 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
429 tbl = addATableElement(table, &element, status);
430 }
431
432 uprv_growTable(tbl, status);
433
434 tbl->CEs[tbl->position] = value;
435 tbl->codePoints[tbl->position] = codePoint;
436
437 tbl->position++;
438
439 return(constructContractCE(table->currentTag, element));
440}
441
442/* sets a part of contraction sequence in table. If element is non existant, it creates on. Returns element handle */
443U_CAPI uint32_t U_EXPORT2
444uprv_cnttab_setContraction(CntTable *table, uint32_t element, uint32_t offset, UChar codePoint, uint32_t value, UErrorCode *status) {
445
446 element &= 0xFFFFFF;
447 ContractionTable *tbl = NULL;
448
449 if(U_FAILURE(*status)) {
450 return 0;
451 }
452
453 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
454 tbl = addATableElement(table, &element, status);
455 }
456
457 if(offset >= tbl->size) {
458 *status = U_INDEX_OUTOFBOUNDS_ERROR;
459 return 0;
460 }
461 tbl->CEs[offset] = value;
462 tbl->codePoints[offset] = codePoint;
463
464 //return(offset);
465 return(constructContractCE(table->currentTag, element));
466}
467
468static ContractionTable *_cnttab_getContractionTable(CntTable *table, uint32_t element) {
469 element &= 0xFFFFFF;
470 ContractionTable *tbl = NULL;
471
472 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
73c04bcf 473 return NULL;
b75a7d8f 474 } else {
73c04bcf 475 return tbl;
b75a7d8f
A
476 }
477}
478
479static int32_t _cnttab_findCP(ContractionTable *tbl, UChar codePoint) {
480 uint32_t position = 0;
481 if(tbl == NULL) {
73c04bcf 482 return -1;
b75a7d8f
A
483 }
484
485 while(codePoint > tbl->codePoints[position]) {
73c04bcf
A
486 position++;
487 if(position > tbl->position) {
488 return -1;
489 }
b75a7d8f
A
490 }
491 if (codePoint == tbl->codePoints[position]) {
73c04bcf 492 return position;
b75a7d8f 493 } else {
73c04bcf 494 return -1;
b75a7d8f
A
495 }
496}
497
498static uint32_t _cnttab_getCE(ContractionTable *tbl, int32_t position) {
73c04bcf
A
499 if(tbl == NULL) {
500 return UCOL_NOT_FOUND;
501 }
502 if((uint32_t)position > tbl->position || position == -1) {
503 return UCOL_NOT_FOUND;
504 } else {
505 return tbl->CEs[position];
506 }
b75a7d8f
A
507}
508
509U_CAPI int32_t U_EXPORT2
510uprv_cnttab_findCP(CntTable *table, uint32_t element, UChar codePoint, UErrorCode *status) {
511
512 if(U_FAILURE(*status)) {
513 return 0;
514 }
515
516 return _cnttab_findCP(_cnttab_getContractionTable(table, element), codePoint);
517}
518
519U_CAPI uint32_t U_EXPORT2
520uprv_cnttab_getCE(CntTable *table, uint32_t element, uint32_t position, UErrorCode *status) {
521 if(U_FAILURE(*status)) {
522 return UCOL_NOT_FOUND;
523 }
524
525 return(_cnttab_getCE(_cnttab_getContractionTable(table, element), position));
526}
527
528U_CAPI uint32_t U_EXPORT2
529uprv_cnttab_findCE(CntTable *table, uint32_t element, UChar codePoint, UErrorCode *status) {
530 if(U_FAILURE(*status)) {
531 return UCOL_NOT_FOUND;
532 }
533 ContractionTable *tbl = _cnttab_getContractionTable(table, element);
534 return _cnttab_getCE(tbl, _cnttab_findCP(tbl, codePoint));
535}
536
537U_CAPI UBool U_EXPORT2
538uprv_cnttab_isTailored(CntTable *table, uint32_t element, UChar *ztString, UErrorCode *status) {
539 if(U_FAILURE(*status)) {
540 return FALSE;
541 }
542
543 while(*(ztString)!=0) {
73c04bcf
A
544 element = uprv_cnttab_findCE(table, element, *(ztString), status);
545 if(element == UCOL_NOT_FOUND) {
546 return FALSE;
547 }
548 if(!isCntTableElement(element)) {
549 return TRUE;
550 }
551 ztString++;
b75a7d8f 552 }
73c04bcf 553 return (UBool)(uprv_cnttab_getCE(table, element, 0, status) != UCOL_NOT_FOUND);
b75a7d8f
A
554}
555
556U_CAPI uint32_t U_EXPORT2
557uprv_cnttab_changeContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t newCE, UErrorCode *status) {
558
559 element &= 0xFFFFFF;
560 ContractionTable *tbl = NULL;
561
562 if(U_FAILURE(*status)) {
563 return 0;
564 }
565
566 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
73c04bcf 567 return 0;
b75a7d8f
A
568 }
569
570 uint32_t position = 0;
571
572 while(codePoint > tbl->codePoints[position]) {
73c04bcf
A
573 position++;
574 if(position > tbl->position) {
575 return UCOL_NOT_FOUND;
576 }
b75a7d8f
A
577 }
578 if (codePoint == tbl->codePoints[position]) {
73c04bcf
A
579 tbl->CEs[position] = newCE;
580 return element;
b75a7d8f 581 } else {
73c04bcf 582 return UCOL_NOT_FOUND;
b75a7d8f
A
583 }
584}
585
b75a7d8f 586#endif /* #if !UCONFIG_NO_COLLATION */