]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/ucol_cnt.cpp
ICU-8.11.tar.gz
[apple/icu.git] / icuSources / i18n / ucol_cnt.cpp
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2001-2006, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: ucol_cnt.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created 02/22/2001
14 * created by: Vladimir Weinstein
15 *
16 * This module maintains a contraction table structure in expanded form
17 * and provides means to flatten this structure
18 *
19 */
20
21 #include "unicode/utypes.h"
22
23 #if !UCONFIG_NO_COLLATION
24
25 #include "unicode/uchar.h"
26 #include "ucol_cnt.h"
27 #include "cmemory.h"
28
29 static void uprv_growTable(ContractionTable *tbl, UErrorCode *status) {
30 if(tbl->position == tbl->size) {
31 uint32_t *newData = (uint32_t *)uprv_realloc(tbl->CEs, 2*tbl->size*sizeof(uint32_t));
32 if(newData == NULL) {
33 *status = U_MEMORY_ALLOCATION_ERROR;
34 return;
35 }
36 UChar *newCPs = (UChar *)uprv_realloc(tbl->codePoints, 2*tbl->size*sizeof(UChar));
37 if(newCPs == NULL) {
38 uprv_free(newData);
39 *status = U_MEMORY_ALLOCATION_ERROR;
40 return;
41 }
42 tbl->CEs = newData;
43 tbl->codePoints = newCPs;
44 tbl->size *= 2;
45 }
46 }
47
48 U_CAPI CntTable* U_EXPORT2
49 /*uprv_cnttab_open(CompactEIntArray *mapping, UErrorCode *status) {*/
50 uprv_cnttab_open(UNewTrie *mapping, UErrorCode *status) {
51 if(U_FAILURE(*status)) {
52 return 0;
53 }
54 CntTable *tbl = (CntTable *)uprv_malloc(sizeof(CntTable));
55 if(tbl == NULL) {
56 *status = U_MEMORY_ALLOCATION_ERROR;
57 return NULL;
58 }
59 tbl->mapping = mapping;
60 tbl->elements = (ContractionTable **)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *));
61 if(tbl->elements == NULL) {
62 *status = U_MEMORY_ALLOCATION_ERROR;
63 uprv_free(tbl);
64 return NULL;
65 }
66 tbl->capacity = INIT_EXP_TABLE_SIZE;
67 uprv_memset(tbl->elements, 0, INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *));
68 tbl->size = 0;
69 tbl->position = 0;
70 tbl->CEs = NULL;
71 tbl->codePoints = NULL;
72 tbl->offsets = NULL;
73 tbl->currentTag = NOT_FOUND_TAG;
74 return tbl;
75 }
76
77 static ContractionTable *addATableElement(CntTable *table, uint32_t *key, UErrorCode *status) {
78 ContractionTable *el = (ContractionTable *)uprv_malloc(sizeof(ContractionTable));
79 if(el == NULL) {
80 *status = U_MEMORY_ALLOCATION_ERROR;
81 return NULL;
82 }
83 el->CEs = (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
84 if(el->CEs == NULL) {
85 *status = U_MEMORY_ALLOCATION_ERROR;
86 uprv_free(el);
87 return NULL;
88 }
89
90 el->codePoints = (UChar *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(UChar));
91 if(el->codePoints == NULL) {
92 *status = U_MEMORY_ALLOCATION_ERROR;
93 uprv_free(el->CEs);
94 uprv_free(el);
95 return NULL;
96 }
97
98 el->position = 0;
99 el->size = INIT_EXP_TABLE_SIZE;
100 uprv_memset(el->CEs, 0, INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
101 uprv_memset(el->codePoints, 0, INIT_EXP_TABLE_SIZE*sizeof(UChar));
102
103 table->elements[table->size] = el;
104
105 //uhash_put(table->elements, (void *)table->size, el, status);
106
107 *key = table->size++;
108
109 if(table->size == table->capacity) {
110 ContractionTable **newElements = (ContractionTable **)uprv_malloc(table->capacity*2*sizeof(ContractionTable *));
111 // do realloc
112 /* table->elements = (ContractionTable **)realloc(table->elements, table->capacity*2*sizeof(ContractionTable *));*/
113 if(newElements == NULL) {
114 *status = U_MEMORY_ALLOCATION_ERROR;
115 uprv_free(el->codePoints);
116 uprv_free(el->CEs);
117 uprv_free(el);
118 return NULL;
119 } else {
120 ContractionTable **oldElements = table->elements;
121 uprv_memcpy(newElements, oldElements, table->capacity*sizeof(ContractionTable *));
122 uprv_memset(newElements+table->capacity, 0, table->capacity*sizeof(ContractionTable *));
123 table->capacity *= 2;
124 table->elements = newElements;
125 uprv_free(oldElements);
126 }
127 }
128
129 return el;
130 }
131
132 U_CAPI int32_t U_EXPORT2
133 uprv_cnttab_constructTable(CntTable *table, uint32_t mainOffset, UErrorCode *status) {
134 int32_t i = 0, j = 0;
135 if(U_FAILURE(*status) || table->size == 0) {
136 return 0;
137 }
138
139 table->position = 0;
140
141 if(table->offsets != NULL) {
142 uprv_free(table->offsets);
143 }
144 table->offsets = (int32_t *)uprv_malloc(table->size*sizeof(int32_t));
145 if(table->offsets == NULL) {
146 *status = U_MEMORY_ALLOCATION_ERROR;
147 return 0;
148 }
149
150
151 /* See how much memory we need */
152 for(i = 0; i<table->size; i++) {
153 table->offsets[i] = table->position+mainOffset;
154 table->position += table->elements[i]->position;
155 }
156
157 /* Allocate it */
158 if(table->CEs != NULL) {
159 uprv_free(table->CEs);
160 }
161 table->CEs = (uint32_t *)uprv_malloc(table->position*sizeof(uint32_t));
162 if(table->CEs == NULL) {
163 *status = U_MEMORY_ALLOCATION_ERROR;
164 uprv_free(table->offsets);
165 table->offsets = NULL;
166 return 0;
167 }
168 uprv_memset(table->CEs, '?', table->position*sizeof(uint32_t));
169
170 if(table->codePoints != NULL) {
171 uprv_free(table->codePoints);
172 }
173 table->codePoints = (UChar *)uprv_malloc(table->position*sizeof(UChar));
174 if(table->codePoints == NULL) {
175 *status = U_MEMORY_ALLOCATION_ERROR;
176 uprv_free(table->offsets);
177 table->offsets = NULL;
178 uprv_free(table->CEs);
179 table->CEs = NULL;
180 return 0;
181 }
182 uprv_memset(table->codePoints, '?', table->position*sizeof(UChar));
183
184 /* Now stuff the things in*/
185
186 UChar *cpPointer = table->codePoints;
187 uint32_t *CEPointer = table->CEs;
188 for(i = 0; i<table->size; i++) {
189 int32_t size = table->elements[i]->position;
190 uint8_t ccMax = 0, ccMin = 255, cc = 0;
191 for(j = 1; j<size; j++) {
192 cc = u_getCombiningClass(table->elements[i]->codePoints[j]);
193 if(cc>ccMax) {
194 ccMax = cc;
195 }
196 if(cc<ccMin) {
197 ccMin = cc;
198 }
199 *(cpPointer+j) = table->elements[i]->codePoints[j];
200 }
201 *cpPointer = ((ccMin==ccMax)?1:0 << 8) | ccMax;
202
203 uprv_memcpy(CEPointer, table->elements[i]->CEs, size*sizeof(uint32_t));
204 for(j = 0; j<size; j++) {
205 if(isCntTableElement(*(CEPointer+j))) {
206 *(CEPointer+j) = constructContractCE(getCETag(*(CEPointer+j)), table->offsets[getContractOffset(*(CEPointer+j))]);
207 }
208 }
209 cpPointer += size;
210 CEPointer += size;
211 }
212
213 // TODO: this one apparently updates the contraction CEs to point to a real address (relative to the
214 // start of the flat file). However, what is done below is just wrong and it affects building of
215 // tailorings that have constructions in a bad way. At least, one should enumerate the trie. Also,
216 // keeping a list of code points that are contractions might be smart, although I'm not sure if it's
217 // feasible.
218 uint32_t CE;
219 for(i = 0; i<=0x10FFFF; i++) {
220 /*CE = ucmpe32_get(table->mapping, i);*/
221 CE = utrie_get32(table->mapping, i, NULL);
222 if(isCntTableElement(CE)) {
223 CE = constructContractCE(getCETag(CE), table->offsets[getContractOffset(CE)]);
224 /*ucmpe32_set(table->mapping, i, CE);*/
225 utrie_set32(table->mapping, i, CE);
226 }
227 }
228
229
230 return table->position;
231 }
232
233 static ContractionTable *uprv_cnttab_cloneContraction(ContractionTable *t, UErrorCode *status) {
234 ContractionTable *r = (ContractionTable *)uprv_malloc(sizeof(ContractionTable));
235 if(r == NULL) {
236 *status = U_MEMORY_ALLOCATION_ERROR;
237 return NULL;
238 }
239
240 r->position = t->position;
241 r->size = t->size;
242
243 r->codePoints = (UChar *)uprv_malloc(sizeof(UChar)*t->size);
244 if(r->codePoints == NULL) {
245 *status = U_MEMORY_ALLOCATION_ERROR;
246 uprv_free(r);
247 return NULL;
248 }
249 r->CEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->size);
250 if(r->CEs == NULL) {
251 *status = U_MEMORY_ALLOCATION_ERROR;
252 uprv_free(r->codePoints);
253 uprv_free(r);
254 return NULL;
255 }
256 uprv_memcpy(r->codePoints, t->codePoints, sizeof(UChar)*t->size);
257 uprv_memcpy(r->CEs, t->CEs, sizeof(uint32_t)*t->size);
258
259 return r;
260
261 }
262
263 U_CAPI CntTable* U_EXPORT2
264 uprv_cnttab_clone(CntTable *t, UErrorCode *status) {
265 if(U_FAILURE(*status)) {
266 return NULL;
267 }
268 int32_t i = 0;
269 CntTable *r = (CntTable *)uprv_malloc(sizeof(CntTable));
270 /* test for NULL */
271 if (r == NULL) {
272 *status = U_MEMORY_ALLOCATION_ERROR;
273 return NULL;
274 }
275 r->position = t->position;
276 r->size = t->size;
277 r->capacity = t->capacity;
278
279 r->mapping = t->mapping;
280
281 r->elements = (ContractionTable **)uprv_malloc(t->capacity*sizeof(ContractionTable *));
282 /* test for NULL */
283 if (r->elements == NULL) {
284 *status = U_MEMORY_ALLOCATION_ERROR;
285 uprv_free(r);
286 return NULL;
287 }
288 //uprv_memcpy(r->elements, t->elements, t->capacity*sizeof(ContractionTable *));
289
290 for(i = 0; i<t->size; i++) {
291 r->elements[i] = uprv_cnttab_cloneContraction(t->elements[i], status);
292 }
293
294 if(t->CEs != NULL) {
295 r->CEs = (uint32_t *)uprv_malloc(t->position*sizeof(uint32_t));
296 /* test for NULL */
297 if (r->CEs == NULL) {
298 *status = U_MEMORY_ALLOCATION_ERROR;
299 uprv_free(r->elements);
300 uprv_free(r);
301 return NULL;
302 }
303 uprv_memcpy(r->CEs, t->CEs, t->position*sizeof(uint32_t));
304 } else {
305 r->CEs = NULL;
306 }
307
308 if(t->codePoints != NULL) {
309 r->codePoints = (UChar *)uprv_malloc(t->position*sizeof(UChar));
310 /* test for NULL */
311 if (r->codePoints == NULL) {
312 *status = U_MEMORY_ALLOCATION_ERROR;
313 uprv_free(r->CEs);
314 uprv_free(r->elements);
315 uprv_free(r);
316 return NULL;
317 }
318 uprv_memcpy(r->codePoints, t->codePoints, t->position*sizeof(UChar));
319 } else {
320 r->codePoints = NULL;
321 }
322
323 if(t->offsets != NULL) {
324 r->offsets = (int32_t *)uprv_malloc(t->size*sizeof(int32_t));
325 /* test for NULL */
326 if (r->offsets == NULL) {
327 *status = U_MEMORY_ALLOCATION_ERROR;
328 uprv_free(r->codePoints);
329 uprv_free(r->CEs);
330 uprv_free(r->elements);
331 uprv_free(r);
332 return NULL;
333 }
334 uprv_memcpy(r->offsets, t->offsets, t->size*sizeof(int32_t));
335 } else {
336 r->offsets = NULL;
337 }
338
339 return r;
340 }
341
342 U_CAPI void U_EXPORT2
343 uprv_cnttab_close(CntTable *table) {
344 int32_t i = 0;
345 for(i = 0; i<table->size; i++) {
346 uprv_free(table->elements[i]->CEs);
347 uprv_free(table->elements[i]->codePoints);
348 uprv_free(table->elements[i]);
349 }
350 uprv_free(table->elements);
351 uprv_free(table->CEs);
352 uprv_free(table->offsets);
353 uprv_free(table->codePoints);
354 uprv_free(table);
355 }
356
357 /* this is for adding non contractions */
358 U_CAPI uint32_t U_EXPORT2
359 uprv_cnttab_changeLastCE(CntTable *table, uint32_t element, uint32_t value, UErrorCode *status) {
360 element &= 0xFFFFFF;
361
362 ContractionTable *tbl = NULL;
363 if(U_FAILURE(*status)) {
364 return 0;
365 }
366
367 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
368 return 0;
369 }
370
371 tbl->CEs[tbl->position-1] = value;
372
373 return(constructContractCE(table->currentTag, element));
374 }
375
376
377 /* inserts a part of contraction sequence in table. Sequences behind the offset are moved back. If element is non existent, it creates on. Returns element handle */
378 U_CAPI uint32_t U_EXPORT2
379 uprv_cnttab_insertContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UErrorCode *status) {
380
381 ContractionTable *tbl = NULL;
382
383 if(U_FAILURE(*status)) {
384 return 0;
385 }
386 element &= 0xFFFFFF;
387
388 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
389 tbl = addATableElement(table, &element, status);
390 }
391
392 uprv_growTable(tbl, status);
393
394 uint32_t offset = 0;
395
396
397 while(tbl->codePoints[offset] < codePoint && offset<tbl->position) {
398 offset++;
399 }
400
401 uint32_t i = tbl->position;
402 for(i = tbl->position; i > offset; i--) {
403 tbl->CEs[i] = tbl->CEs[i-1];
404 tbl->codePoints[i] = tbl->codePoints[i-1];
405 }
406
407 tbl->CEs[offset] = value;
408 tbl->codePoints[offset] = codePoint;
409
410 tbl->position++;
411
412 return(constructContractCE(table->currentTag, element));
413 }
414
415
416 /* adds more contractions in table. If element is non existant, it creates on. Returns element handle */
417 U_CAPI uint32_t U_EXPORT2
418 uprv_cnttab_addContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UErrorCode *status) {
419
420 element &= 0xFFFFFF;
421
422 ContractionTable *tbl = NULL;
423
424 if(U_FAILURE(*status)) {
425 return 0;
426 }
427
428 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
429 tbl = addATableElement(table, &element, status);
430 }
431
432 uprv_growTable(tbl, status);
433
434 tbl->CEs[tbl->position] = value;
435 tbl->codePoints[tbl->position] = codePoint;
436
437 tbl->position++;
438
439 return(constructContractCE(table->currentTag, element));
440 }
441
442 /* sets a part of contraction sequence in table. If element is non existant, it creates on. Returns element handle */
443 U_CAPI uint32_t U_EXPORT2
444 uprv_cnttab_setContraction(CntTable *table, uint32_t element, uint32_t offset, UChar codePoint, uint32_t value, UErrorCode *status) {
445
446 element &= 0xFFFFFF;
447 ContractionTable *tbl = NULL;
448
449 if(U_FAILURE(*status)) {
450 return 0;
451 }
452
453 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
454 tbl = addATableElement(table, &element, status);
455 }
456
457 if(offset >= tbl->size) {
458 *status = U_INDEX_OUTOFBOUNDS_ERROR;
459 return 0;
460 }
461 tbl->CEs[offset] = value;
462 tbl->codePoints[offset] = codePoint;
463
464 //return(offset);
465 return(constructContractCE(table->currentTag, element));
466 }
467
468 static ContractionTable *_cnttab_getContractionTable(CntTable *table, uint32_t element) {
469 element &= 0xFFFFFF;
470 ContractionTable *tbl = NULL;
471
472 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
473 return NULL;
474 } else {
475 return tbl;
476 }
477 }
478
479 static int32_t _cnttab_findCP(ContractionTable *tbl, UChar codePoint) {
480 uint32_t position = 0;
481 if(tbl == NULL) {
482 return -1;
483 }
484
485 while(codePoint > tbl->codePoints[position]) {
486 position++;
487 if(position > tbl->position) {
488 return -1;
489 }
490 }
491 if (codePoint == tbl->codePoints[position]) {
492 return position;
493 } else {
494 return -1;
495 }
496 }
497
498 static uint32_t _cnttab_getCE(ContractionTable *tbl, int32_t position) {
499 if(tbl == NULL) {
500 return UCOL_NOT_FOUND;
501 }
502 if((uint32_t)position > tbl->position || position == -1) {
503 return UCOL_NOT_FOUND;
504 } else {
505 return tbl->CEs[position];
506 }
507 }
508
509 U_CAPI int32_t U_EXPORT2
510 uprv_cnttab_findCP(CntTable *table, uint32_t element, UChar codePoint, UErrorCode *status) {
511
512 if(U_FAILURE(*status)) {
513 return 0;
514 }
515
516 return _cnttab_findCP(_cnttab_getContractionTable(table, element), codePoint);
517 }
518
519 U_CAPI uint32_t U_EXPORT2
520 uprv_cnttab_getCE(CntTable *table, uint32_t element, uint32_t position, UErrorCode *status) {
521 if(U_FAILURE(*status)) {
522 return UCOL_NOT_FOUND;
523 }
524
525 return(_cnttab_getCE(_cnttab_getContractionTable(table, element), position));
526 }
527
528 U_CAPI uint32_t U_EXPORT2
529 uprv_cnttab_findCE(CntTable *table, uint32_t element, UChar codePoint, UErrorCode *status) {
530 if(U_FAILURE(*status)) {
531 return UCOL_NOT_FOUND;
532 }
533 ContractionTable *tbl = _cnttab_getContractionTable(table, element);
534 return _cnttab_getCE(tbl, _cnttab_findCP(tbl, codePoint));
535 }
536
537 U_CAPI UBool U_EXPORT2
538 uprv_cnttab_isTailored(CntTable *table, uint32_t element, UChar *ztString, UErrorCode *status) {
539 if(U_FAILURE(*status)) {
540 return FALSE;
541 }
542
543 while(*(ztString)!=0) {
544 element = uprv_cnttab_findCE(table, element, *(ztString), status);
545 if(element == UCOL_NOT_FOUND) {
546 return FALSE;
547 }
548 if(!isCntTableElement(element)) {
549 return TRUE;
550 }
551 ztString++;
552 }
553 return (UBool)(uprv_cnttab_getCE(table, element, 0, status) != UCOL_NOT_FOUND);
554 }
555
556 U_CAPI uint32_t U_EXPORT2
557 uprv_cnttab_changeContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t newCE, UErrorCode *status) {
558
559 element &= 0xFFFFFF;
560 ContractionTable *tbl = NULL;
561
562 if(U_FAILURE(*status)) {
563 return 0;
564 }
565
566 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
567 return 0;
568 }
569
570 uint32_t position = 0;
571
572 while(codePoint > tbl->codePoints[position]) {
573 position++;
574 if(position > tbl->position) {
575 return UCOL_NOT_FOUND;
576 }
577 }
578 if (codePoint == tbl->codePoints[position]) {
579 tbl->CEs[position] = newCE;
580 return element;
581 } else {
582 return UCOL_NOT_FOUND;
583 }
584 }
585
586 #endif /* #if !UCONFIG_NO_COLLATION */