]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/ucol_cnt.cpp
ICU-6.2.9.tar.gz
[apple/icu.git] / icuSources / i18n / ucol_cnt.cpp
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
3*
374ca955 4* Copyright (C) 2001-2004, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: ucol_cnt.cpp
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created 02/22/2001
14* created by: Vladimir Weinstein
15*
16* This module maintains a contraction table structure in expanded form
17* and provides means to flatten this structure
18*
19*/
20
21#include "unicode/utypes.h"
22
23#if !UCONFIG_NO_COLLATION
24
25#include "unicode/uchar.h"
26#include "ucol_cnt.h"
27#include "cmemory.h"
28
29U_NAMESPACE_BEGIN
30
374ca955 31static void uprv_growTable(ContractionTable *tbl, UErrorCode *status) {
b75a7d8f
A
32 if(tbl->position == tbl->size) {
33 uint32_t *newData = (uint32_t *)uprv_realloc(tbl->CEs, 2*tbl->size*sizeof(uint32_t));
34 if(newData == NULL) {
35 *status = U_MEMORY_ALLOCATION_ERROR;
36 return;
37 }
38 UChar *newCPs = (UChar *)uprv_realloc(tbl->codePoints, 2*tbl->size*sizeof(UChar));
39 if(newCPs == NULL) {
40 uprv_free(newData);
41 *status = U_MEMORY_ALLOCATION_ERROR;
42 return;
43 }
44 tbl->CEs = newData;
45 tbl->codePoints = newCPs;
46 tbl->size *= 2;
47 }
48}
49
50U_CAPI CntTable* U_EXPORT2
51/*uprv_cnttab_open(CompactEIntArray *mapping, UErrorCode *status) {*/
52uprv_cnttab_open(UNewTrie *mapping, UErrorCode *status) {
53 if(U_FAILURE(*status)) {
54 return 0;
55 }
56 CntTable *tbl = (CntTable *)uprv_malloc(sizeof(CntTable));
57 if(tbl == NULL) {
58 *status = U_MEMORY_ALLOCATION_ERROR;
59 return NULL;
60 }
61 tbl->mapping = mapping;
62 tbl->elements = (ContractionTable **)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *));
63 if(tbl->elements == NULL) {
64 *status = U_MEMORY_ALLOCATION_ERROR;
65 uprv_free(tbl);
66 return NULL;
67 }
68 tbl->capacity = INIT_EXP_TABLE_SIZE;
69 uprv_memset(tbl->elements, 0, INIT_EXP_TABLE_SIZE*sizeof(ContractionTable *));
70 tbl->size = 0;
71 tbl->position = 0;
72 tbl->CEs = NULL;
73 tbl->codePoints = NULL;
74 tbl->offsets = NULL;
75 tbl->currentTag = NOT_FOUND_TAG;
76 return tbl;
77}
78
79static ContractionTable *addATableElement(CntTable *table, uint32_t *key, UErrorCode *status) {
80 ContractionTable *el = (ContractionTable *)uprv_malloc(sizeof(ContractionTable));
81 if(el == NULL) {
82 *status = U_MEMORY_ALLOCATION_ERROR;
83 return NULL;
84 }
85 el->CEs = (uint32_t *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
86 if(el->CEs == NULL) {
87 *status = U_MEMORY_ALLOCATION_ERROR;
88 uprv_free(el);
89 return NULL;
90 }
91
92 el->codePoints = (UChar *)uprv_malloc(INIT_EXP_TABLE_SIZE*sizeof(UChar));
93 if(el->codePoints == NULL) {
94 *status = U_MEMORY_ALLOCATION_ERROR;
95 uprv_free(el->CEs);
96 uprv_free(el);
97 return NULL;
98 }
99
100 el->position = 0;
101 el->size = INIT_EXP_TABLE_SIZE;
102 uprv_memset(el->CEs, 0, INIT_EXP_TABLE_SIZE*sizeof(uint32_t));
103 uprv_memset(el->codePoints, 0, INIT_EXP_TABLE_SIZE*sizeof(UChar));
104
105 table->elements[table->size] = el;
106
107 //uhash_put(table->elements, (void *)table->size, el, status);
108
109 *key = table->size++;
110
111 if(table->size == table->capacity) {
112 ContractionTable **newElements = (ContractionTable **)uprv_malloc(table->capacity*2*sizeof(ContractionTable *));
113 // do realloc
114/* table->elements = (ContractionTable **)realloc(table->elements, table->capacity*2*sizeof(ContractionTable *));*/
115 if(newElements == NULL) {
116 *status = U_MEMORY_ALLOCATION_ERROR;
117 uprv_free(el->codePoints);
118 uprv_free(el->CEs);
119 uprv_free(el);
120 return NULL;
121 } else {
122 ContractionTable **oldElements = table->elements;
123 uprv_memcpy(newElements, oldElements, table->capacity*sizeof(ContractionTable *));
124 uprv_memset(newElements+table->capacity, 0, table->capacity*sizeof(ContractionTable *));
125 table->capacity *= 2;
126 table->elements = newElements;
127 uprv_free(oldElements);
128 }
129 }
130
131 return el;
132}
133
134U_CAPI int32_t U_EXPORT2
135uprv_cnttab_constructTable(CntTable *table, uint32_t mainOffset, UErrorCode *status) {
136 int32_t i = 0, j = 0;
137 if(U_FAILURE(*status) || table->size == 0) {
138 return 0;
139 }
140
141 table->position = 0;
142
143 if(table->offsets != NULL) {
144 uprv_free(table->offsets);
145 }
146 table->offsets = (int32_t *)uprv_malloc(table->size*sizeof(int32_t));
147 if(table->offsets == NULL) {
148 *status = U_MEMORY_ALLOCATION_ERROR;
149 return 0;
150 }
151
152
153 /* See how much memory we need */
154 for(i = 0; i<table->size; i++) {
155 table->offsets[i] = table->position+mainOffset;
156 table->position += table->elements[i]->position;
157 }
158
159 /* Allocate it */
160 if(table->CEs != NULL) {
161 uprv_free(table->CEs);
162 }
163 table->CEs = (uint32_t *)uprv_malloc(table->position*sizeof(uint32_t));
164 if(table->CEs == NULL) {
165 *status = U_MEMORY_ALLOCATION_ERROR;
166 uprv_free(table->offsets);
167 table->offsets = NULL;
168 return 0;
169 }
170 uprv_memset(table->CEs, '?', table->position*sizeof(uint32_t));
171
172 if(table->codePoints != NULL) {
173 uprv_free(table->codePoints);
174 }
175 table->codePoints = (UChar *)uprv_malloc(table->position*sizeof(UChar));
176 if(table->codePoints == NULL) {
177 *status = U_MEMORY_ALLOCATION_ERROR;
178 uprv_free(table->offsets);
179 table->offsets = NULL;
180 uprv_free(table->CEs);
181 table->CEs = NULL;
182 return 0;
183 }
184 uprv_memset(table->codePoints, '?', table->position*sizeof(UChar));
185
186 /* Now stuff the things in*/
187
188 UChar *cpPointer = table->codePoints;
189 uint32_t *CEPointer = table->CEs;
190 for(i = 0; i<table->size; i++) {
191 int32_t size = table->elements[i]->position;
192 uint8_t ccMax = 0, ccMin = 255, cc = 0;
193 for(j = 1; j<size; j++) {
194 cc = u_getCombiningClass(table->elements[i]->codePoints[j]);
195 if(cc>ccMax) {
196 ccMax = cc;
197 }
198 if(cc<ccMin) {
199 ccMin = cc;
200 }
201 *(cpPointer+j) = table->elements[i]->codePoints[j];
202 }
203 *cpPointer = ((ccMin==ccMax)?1:0 << 8) | ccMax;
204
205 uprv_memcpy(CEPointer, table->elements[i]->CEs, size*sizeof(uint32_t));
206 for(j = 0; j<size; j++) {
207 if(isCntTableElement(*(CEPointer+j))) {
208 *(CEPointer+j) = constructContractCE(getCETag(*(CEPointer+j)), table->offsets[getContractOffset(*(CEPointer+j))]);
209 }
210 }
211 cpPointer += size;
212 CEPointer += size;
213 }
214
374ca955
A
215 // TODO: this one apparently updates the contraction CEs to point to a real address (relative to the
216 // start of the flat file). However, what is done below is just wrong and it affects building of
217 // tailorings that have constructions in a bad way. At least, one should enumerate the trie. Also,
218 // keeping a list of code points that are contractions might be smart, although I'm not sure if it's
219 // feasible.
b75a7d8f
A
220 uint32_t CE;
221 for(i = 0; i<=0x10FFFF; i++) {
222 /*CE = ucmpe32_get(table->mapping, i);*/
223 CE = utrie_get32(table->mapping, i, NULL);
224 if(isCntTableElement(CE)) {
225 CE = constructContractCE(getCETag(CE), table->offsets[getContractOffset(CE)]);
226 /*ucmpe32_set(table->mapping, i, CE);*/
227 utrie_set32(table->mapping, i, CE);
228 }
229 }
230
231
232 return table->position;
233}
234
374ca955 235static ContractionTable *uprv_cnttab_cloneContraction(ContractionTable *t, UErrorCode *status) {
b75a7d8f
A
236 ContractionTable *r = (ContractionTable *)uprv_malloc(sizeof(ContractionTable));
237 if(r == NULL) {
238 *status = U_MEMORY_ALLOCATION_ERROR;
239 return NULL;
240 }
241
242 r->position = t->position;
243 r->size = t->size;
244
245 r->codePoints = (UChar *)uprv_malloc(sizeof(UChar)*t->size);
246 r->CEs = (uint32_t *)uprv_malloc(sizeof(uint32_t)*t->size);
247
248 /* test for NULL */
249 if((r->codePoints == NULL) || (r->CEs == NULL)) {
250 *status = U_MEMORY_ALLOCATION_ERROR;
251 return NULL;
252 }
253 uprv_memcpy(r->codePoints, t->codePoints, sizeof(UChar)*t->size);
254 uprv_memcpy(r->CEs, t->CEs, sizeof(uint32_t)*t->size);
255
256 return r;
257
258}
259
260U_CAPI CntTable* U_EXPORT2
261uprv_cnttab_clone(CntTable *t, UErrorCode *status) {
262 if(U_FAILURE(*status)) {
263 return NULL;
264 }
265 int32_t i = 0;
266 CntTable *r = (CntTable *)uprv_malloc(sizeof(CntTable));
267 /* test for NULL */
268 if (r == NULL) {
269 *status = U_MEMORY_ALLOCATION_ERROR;
270 return NULL;
271 }
272 r->position = t->position;
273 r->size = t->size;
274 r->capacity = t->capacity;
275
276 r->mapping = t->mapping;
277
278 r->elements = (ContractionTable **)uprv_malloc(t->capacity*sizeof(ContractionTable *));
279 /* test for NULL */
280 if (r->elements == NULL) {
281 *status = U_MEMORY_ALLOCATION_ERROR;
282 return NULL;
283 }
284 //uprv_memcpy(r->elements, t->elements, t->capacity*sizeof(ContractionTable *));
285
286 for(i = 0; i<t->size; i++) {
287 r->elements[i] = uprv_cnttab_cloneContraction(t->elements[i], status);
288 }
289
290 if(t->CEs != NULL) {
291 r->CEs = (uint32_t *)uprv_malloc(t->position*sizeof(uint32_t));
292 /* test for NULL */
293 if (r->CEs == NULL) {
294 *status = U_MEMORY_ALLOCATION_ERROR;
295 return NULL;
296 }
297 uprv_memcpy(r->CEs, t->CEs, t->position*sizeof(uint32_t));
298 } else {
299 r->CEs = NULL;
300 }
301
302 if(t->codePoints != NULL) {
303 r->codePoints = (UChar *)uprv_malloc(t->position*sizeof(UChar));
304 /* test for NULL */
305 if (r->codePoints == NULL) {
306 *status = U_MEMORY_ALLOCATION_ERROR;
307 return NULL;
308 }
309 uprv_memcpy(r->codePoints, t->codePoints, t->position*sizeof(UChar));
310 } else {
311 r->codePoints = NULL;
312 }
313
314 if(t->offsets != NULL) {
315 r->offsets = (int32_t *)uprv_malloc(t->size*sizeof(int32_t));
316 /* test for NULL */
317 if (r->offsets == NULL) {
318 *status = U_MEMORY_ALLOCATION_ERROR;
319 return NULL;
320 }
321 uprv_memcpy(r->offsets, t->offsets, t->size*sizeof(int32_t));
322 } else {
323 r->offsets = NULL;
324 }
325
326 return r;
327}
328
329U_CAPI void U_EXPORT2
330uprv_cnttab_close(CntTable *table) {
331 int32_t i = 0;
332 for(i = 0; i<table->size; i++) {
333 uprv_free(table->elements[i]->CEs);
334 uprv_free(table->elements[i]->codePoints);
335 uprv_free(table->elements[i]);
336 }
337 uprv_free(table->elements);
338 uprv_free(table->CEs);
339 uprv_free(table->offsets);
340 uprv_free(table->codePoints);
341 uprv_free(table);
342}
343
344/* this is for adding non contractions */
345U_CAPI uint32_t U_EXPORT2
346uprv_cnttab_changeLastCE(CntTable *table, uint32_t element, uint32_t value, UErrorCode *status) {
347 element &= 0xFFFFFF;
348
349 ContractionTable *tbl = NULL;
350 if(U_FAILURE(*status)) {
351 return 0;
352 }
353
354 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
355 return 0;
356 }
357
358 tbl->CEs[tbl->position-1] = value;
359
360 return(constructContractCE(table->currentTag, element));
361}
362
363
364/* inserts a part of contraction sequence in table. Sequences behind the offset are moved back. If element is non existent, it creates on. Returns element handle */
365U_CAPI uint32_t U_EXPORT2
366uprv_cnttab_insertContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UErrorCode *status) {
367
368 element &= 0xFFFFFF;
369 ContractionTable *tbl = NULL;
370
371 if(U_FAILURE(*status)) {
372 return 0;
373 }
374
375 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
376 tbl = addATableElement(table, &element, status);
377 }
378
379 uprv_growTable(tbl, status);
380
381 uint32_t offset = 0;
382
383
384 while(tbl->codePoints[offset] < codePoint && offset<tbl->position) {
385 offset++;
386 }
387
388 uint32_t i = tbl->position;
389 for(i = tbl->position; i > offset; i--) {
390 tbl->CEs[i] = tbl->CEs[i-1];
391 tbl->codePoints[i] = tbl->codePoints[i-1];
392 }
393
394 tbl->CEs[offset] = value;
395 tbl->codePoints[offset] = codePoint;
396
397 tbl->position++;
398
399 return(constructContractCE(table->currentTag, element));
400}
401
402
403/* adds more contractions in table. If element is non existant, it creates on. Returns element handle */
404U_CAPI uint32_t U_EXPORT2
405uprv_cnttab_addContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t value, UErrorCode *status) {
406
407 element &= 0xFFFFFF;
408
409 ContractionTable *tbl = NULL;
410
411 if(U_FAILURE(*status)) {
412 return 0;
413 }
414
415 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
416 tbl = addATableElement(table, &element, status);
417 }
418
419 uprv_growTable(tbl, status);
420
421 tbl->CEs[tbl->position] = value;
422 tbl->codePoints[tbl->position] = codePoint;
423
424 tbl->position++;
425
426 return(constructContractCE(table->currentTag, element));
427}
428
429/* sets a part of contraction sequence in table. If element is non existant, it creates on. Returns element handle */
430U_CAPI uint32_t U_EXPORT2
431uprv_cnttab_setContraction(CntTable *table, uint32_t element, uint32_t offset, UChar codePoint, uint32_t value, UErrorCode *status) {
432
433 element &= 0xFFFFFF;
434 ContractionTable *tbl = NULL;
435
436 if(U_FAILURE(*status)) {
437 return 0;
438 }
439
440 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
441 tbl = addATableElement(table, &element, status);
442 }
443
444 if(offset >= tbl->size) {
445 *status = U_INDEX_OUTOFBOUNDS_ERROR;
446 return 0;
447 }
448 tbl->CEs[offset] = value;
449 tbl->codePoints[offset] = codePoint;
450
451 //return(offset);
452 return(constructContractCE(table->currentTag, element));
453}
454
455static ContractionTable *_cnttab_getContractionTable(CntTable *table, uint32_t element) {
456 element &= 0xFFFFFF;
457 ContractionTable *tbl = NULL;
458
459 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
460 return NULL;
461 } else {
462 return tbl;
463 }
464}
465
466static int32_t _cnttab_findCP(ContractionTable *tbl, UChar codePoint) {
467 uint32_t position = 0;
468 if(tbl == NULL) {
469 return -1;
470 }
471
472 while(codePoint > tbl->codePoints[position]) {
473 position++;
474 if(position > tbl->position) {
475 return -1;
476 }
477 }
478 if (codePoint == tbl->codePoints[position]) {
479 return position;
480 } else {
481 return -1;
482 }
483}
484
485static uint32_t _cnttab_getCE(ContractionTable *tbl, int32_t position) {
486 if(tbl == NULL) {
487 return UCOL_NOT_FOUND;
488 }
489 if((uint32_t)position > tbl->position || position == -1) {
490 return UCOL_NOT_FOUND;
491 } else {
492 return tbl->CEs[position];
493 }
494}
495
496U_CAPI int32_t U_EXPORT2
497uprv_cnttab_findCP(CntTable *table, uint32_t element, UChar codePoint, UErrorCode *status) {
498
499 if(U_FAILURE(*status)) {
500 return 0;
501 }
502
503 return _cnttab_findCP(_cnttab_getContractionTable(table, element), codePoint);
504}
505
506U_CAPI uint32_t U_EXPORT2
507uprv_cnttab_getCE(CntTable *table, uint32_t element, uint32_t position, UErrorCode *status) {
508 if(U_FAILURE(*status)) {
509 return UCOL_NOT_FOUND;
510 }
511
512 return(_cnttab_getCE(_cnttab_getContractionTable(table, element), position));
513}
514
515U_CAPI uint32_t U_EXPORT2
516uprv_cnttab_findCE(CntTable *table, uint32_t element, UChar codePoint, UErrorCode *status) {
517 if(U_FAILURE(*status)) {
518 return UCOL_NOT_FOUND;
519 }
520 ContractionTable *tbl = _cnttab_getContractionTable(table, element);
521 return _cnttab_getCE(tbl, _cnttab_findCP(tbl, codePoint));
522}
523
524U_CAPI UBool U_EXPORT2
525uprv_cnttab_isTailored(CntTable *table, uint32_t element, UChar *ztString, UErrorCode *status) {
526 if(U_FAILURE(*status)) {
527 return FALSE;
528 }
529
530 while(*(ztString)!=0) {
531 element = uprv_cnttab_findCE(table, element, *(ztString), status);
532 if(element == UCOL_NOT_FOUND) {
533 return FALSE;
534 }
535 if(!isCntTableElement(element)) {
536 return TRUE;
537 }
538 ztString++;
539 }
540 if(uprv_cnttab_getCE(table, element, 0, status) != UCOL_NOT_FOUND) {
541 return TRUE;
542 } else {
543 return FALSE;
544 }
545}
546
547U_CAPI uint32_t U_EXPORT2
548uprv_cnttab_changeContraction(CntTable *table, uint32_t element, UChar codePoint, uint32_t newCE, UErrorCode *status) {
549
550 element &= 0xFFFFFF;
551 ContractionTable *tbl = NULL;
552
553 if(U_FAILURE(*status)) {
554 return 0;
555 }
556
557 if((element == 0xFFFFFF) || (tbl = table->elements[element]) == NULL) {
558 return 0;
559 }
560
561 uint32_t position = 0;
562
563 while(codePoint > tbl->codePoints[position]) {
564 position++;
565 if(position > tbl->position) {
566 return UCOL_NOT_FOUND;
567 }
568 }
569 if (codePoint == tbl->codePoints[position]) {
570 tbl->CEs[position] = newCE;
571 return element;
572 } else {
573 return UCOL_NOT_FOUND;
574 }
575}
576
577U_NAMESPACE_END
578
579#endif /* #if !UCONFIG_NO_COLLATION */