]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/ucmp8.c
ICU-6.2.8.tar.gz
[apple/icu.git] / icuSources / common / ucmp8.c
1 /*
2 ********************************************************************
3 * COPYRIGHT:
4 * Copyright (c) 1997-2004, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 ********************************************************************
7 */
8
9 #include "ucmp8.h"
10 #include "cmemory.h"
11
12 /* internal constants*/
13
14
15 U_CAPI int32_t U_EXPORT2
16 ucmp8_getkUnicodeCount() { return UCMP8_kUnicodeCount;}
17
18 U_CAPI int32_t U_EXPORT2
19 ucmp8_getkBlockCount() { return UCMP8_kBlockCount;}
20
21 U_CAPI void U_EXPORT2
22 ucmp8_initBogus(CompactByteArray* array)
23 {
24 CompactByteArray* this_obj = array;
25
26 if (this_obj == NULL) return;
27
28 this_obj->fStructSize = sizeof(CompactByteArray);
29 this_obj->fArray = NULL;
30 this_obj->fIndex = NULL;
31 this_obj->fCount = UCMP8_kUnicodeCount;
32 this_obj->fCompact = FALSE;
33 this_obj->fBogus = TRUE;
34 this_obj->fAlias = FALSE;
35 this_obj->fIAmOwned = TRUE;
36 }
37
38 /* debug flags*/
39 /*=======================================================*/
40 U_CAPI void U_EXPORT2
41 ucmp8_init(CompactByteArray* array, int8_t defaultValue)
42 {
43 /* set up the index array and the data array.
44 * the index array always points into particular parts of the data array
45 * it is initially set up to point at regular block boundaries
46 * The following example uses blocks of 4 for simplicity
47 * Example: Expanded
48 * INDEX# 0 1 2 3 4
49 * INDEX 0 4 8 12 16 ...
50 * ARRAY abcdeababcedzyabcdea...
51 * | | | | | |...
52 * whenever you set an element in the array, it unpacks to this state
53 * After compression, the index will point to various places in the data array
54 * wherever there is a runs of the same elements as in the original
55 * Example: Compressed
56 * INDEX# 0 1 2 3 4
57 * INDEX 0 4 1 8 2 ...
58 * ARRAY abcdeabazyabc...
59 * If you look at the example, index# 2 in the expanded version points
60 * to data position number 8, which has elements "bced". In the compressed
61 * version, index# 2 points to data position 1, which also has "bced"
62 */
63 CompactByteArray* this_obj = array;
64 int32_t i;
65
66 if (this_obj == NULL) return;
67
68 this_obj->fStructSize = sizeof(CompactByteArray);
69 this_obj->fArray = NULL;
70 this_obj->fIndex = NULL;
71 this_obj->fCount = UCMP8_kUnicodeCount;
72 this_obj->fCompact = FALSE;
73 this_obj->fBogus = FALSE;
74 this_obj->fAlias = FALSE;
75 this_obj->fIAmOwned = TRUE;
76
77
78 this_obj->fArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
79 if (!this_obj->fArray)
80 {
81 this_obj->fBogus = TRUE;
82 return;
83 }
84 this_obj->fIndex = (uint16_t*) uprv_malloc(sizeof(uint16_t) * UCMP8_kIndexCount);
85 if (!this_obj->fIndex)
86 {
87 uprv_free(this_obj->fArray);
88 this_obj->fArray = NULL;
89 this_obj->fBogus = TRUE;
90 return;
91 }
92 for (i = 0; i < UCMP8_kUnicodeCount; ++i)
93 {
94 this_obj->fArray[i] = defaultValue;
95 }
96 for (i = 0; i < UCMP8_kIndexCount; ++i)
97 {
98 this_obj->fIndex[i] = (uint16_t)(i << UCMP8_kBlockShift);
99 }
100 }
101
102 U_CAPI CompactByteArray* U_EXPORT2
103 ucmp8_open(int8_t defaultValue)
104 {
105 /* set up the index array and the data array.
106 * the index array always points into particular parts of the data array
107 * it is initially set up to point at regular block boundaries
108 * The following example uses blocks of 4 for simplicity
109 * Example: Expanded
110 * INDEX# 0 1 2 3 4
111 * INDEX 0 4 8 12 16 ...
112 * ARRAY abcdeababcedzyabcdea...
113 * | | | | | |...
114 * whenever you set an element in the array, it unpacks to this state
115 * After compression, the index will point to various places in the data array
116 * wherever there is a runs of the same elements as in the original
117 * Example: Compressed
118 * INDEX# 0 1 2 3 4
119 * INDEX 0 4 1 8 2 ...
120 * ARRAY abcdeabazyabc...
121 * If you look at the example, index# 2 in the expanded version points
122 * to data position number 8, which has elements "bced". In the compressed
123 * version, index# 2 points to data position 1, which also has "bced"
124 */
125 CompactByteArray* this_obj = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
126 int32_t i;
127
128 if (this_obj == NULL) return NULL;
129
130 this_obj->fStructSize = sizeof(CompactByteArray);
131 this_obj->fArray = NULL;
132 this_obj->fIndex = NULL;
133 this_obj->fCount = UCMP8_kUnicodeCount;
134 this_obj->fCompact = FALSE;
135 this_obj->fBogus = FALSE;
136 this_obj->fAlias = FALSE;
137 this_obj->fIAmOwned = FALSE;
138
139
140 this_obj->fArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
141 if (!this_obj->fArray)
142 {
143 this_obj->fBogus = TRUE;
144 return NULL;
145 }
146 this_obj->fIndex = (uint16_t*) uprv_malloc(sizeof(uint16_t) * UCMP8_kIndexCount);
147 if (!this_obj->fIndex)
148 {
149 uprv_free(this_obj->fArray);
150 this_obj->fArray = NULL;
151 this_obj->fBogus = TRUE;
152 return NULL;
153 }
154 for (i = 0; i < UCMP8_kUnicodeCount; ++i)
155 {
156 this_obj->fArray[i] = defaultValue;
157 }
158 for (i = 0; i < UCMP8_kIndexCount; ++i)
159 {
160 this_obj->fIndex[i] = (uint16_t)(i << UCMP8_kBlockShift);
161 }
162
163 return this_obj;
164 }
165
166 U_CAPI CompactByteArray* U_EXPORT2
167 ucmp8_openAdopt(uint16_t *indexArray,
168 int8_t *newValues,
169 int32_t count)
170 {
171 CompactByteArray* this_obj = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
172 /* test for NULL */
173 if(this_obj == NULL)
174 return NULL;
175 ucmp8_initAdopt(this_obj, indexArray, newValues, count);
176 this_obj->fIAmOwned = FALSE;
177 return this_obj;
178 }
179
180 U_CAPI CompactByteArray* U_EXPORT2
181 ucmp8_openAlias(uint16_t *indexArray,
182 int8_t *newValues,
183 int32_t count)
184 {
185 CompactByteArray* this_obj = (CompactByteArray*) uprv_malloc(sizeof(CompactByteArray));
186 /* test for NULL */
187 if(this_obj == NULL)
188 return NULL;
189 ucmp8_initAlias(this_obj, indexArray, newValues, count);
190 this_obj->fIAmOwned = FALSE;
191 return this_obj;
192 }
193
194 /*=======================================================*/
195
196 U_CAPI CompactByteArray* U_EXPORT2
197 ucmp8_initAdopt(CompactByteArray *this_obj,
198 uint16_t *indexArray,
199 int8_t *newValues,
200 int32_t count)
201 {
202 if (this_obj) {
203 this_obj->fCount = count;
204 this_obj->fBogus = FALSE;
205 this_obj->fStructSize = sizeof(CompactByteArray);
206
207 this_obj->fArray = newValues;
208 this_obj->fIndex = indexArray;
209 this_obj->fCompact = (UBool)((count < UCMP8_kUnicodeCount) ? TRUE : FALSE);
210 this_obj->fAlias = FALSE;
211 this_obj->fIAmOwned = TRUE;
212 }
213
214 return this_obj;
215 }
216
217 U_CAPI CompactByteArray* U_EXPORT2
218 ucmp8_initAlias(CompactByteArray *this_obj,
219 uint16_t *indexArray,
220 int8_t *newValues,
221 int32_t count)
222 {
223 if (this_obj) {
224 this_obj->fArray = NULL;
225 this_obj->fIndex = NULL;
226 this_obj->fCount = count;
227 this_obj->fBogus = FALSE;
228 this_obj->fStructSize = sizeof(CompactByteArray);
229
230 this_obj->fArray = newValues;
231 this_obj->fIndex = indexArray;
232 this_obj->fCompact = (UBool)((count < UCMP8_kUnicodeCount) ? TRUE : FALSE);
233 this_obj->fAlias = TRUE;
234 this_obj->fIAmOwned = TRUE;
235 }
236
237 return this_obj;
238 }
239
240 /*=======================================================*/
241
242 U_CAPI void U_EXPORT2
243 ucmp8_close(CompactByteArray* this_obj)
244 {
245 if(this_obj != NULL) {
246 if(!this_obj->fAlias) {
247 if(this_obj->fArray != NULL) {
248 uprv_free(this_obj->fArray);
249 }
250 if(this_obj->fIndex != NULL) {
251 uprv_free(this_obj->fIndex);
252 }
253 }
254 if(!this_obj->fIAmOwned) /* Called if 'init' was called instead of 'open'. */
255 {
256 uprv_free(this_obj);
257 }
258 }
259 }
260
261
262 /*=======================================================*/
263
264 U_CAPI void U_EXPORT2
265 ucmp8_expand(CompactByteArray* this_obj)
266 {
267 /* can optimize later.
268 * if we have to expand, then walk through the blocks instead of using Get
269 * this code unpacks the array by copying the blocks to the normalized position.
270 * Example: Compressed
271 * INDEX# 0 1 2 3 4
272 * INDEX 0 4 1 8 2 ...
273 * ARRAY abcdeabazyabc...
274 * turns into
275 * Example: Expanded
276 * INDEX# 0 1 2 3 4
277 * INDEX 0 4 8 12 16 ...
278 * ARRAY abcdeababcedzyabcdea...
279 */
280 int32_t i;
281 if (this_obj->fCompact)
282 {
283 int8_t* tempArray;
284 tempArray = (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount);
285 if (!tempArray)
286 {
287 this_obj->fBogus = TRUE;
288 return;
289 }
290 for (i = 0; i < UCMP8_kUnicodeCount; ++i)
291 {
292 tempArray[i] = ucmp8_get(this_obj,(UChar)i); /* HSYS : How expand?*/
293 }
294 for (i = 0; i < UCMP8_kIndexCount; ++i)
295 {
296 this_obj->fIndex[i] = (uint16_t)(i<< UCMP8_kBlockShift);
297 }
298 uprv_free(this_obj->fArray);
299 this_obj->fArray = tempArray;
300 this_obj->fCompact = FALSE;
301 this_obj->fAlias = FALSE;
302
303 }
304 }
305
306
307 /*=======================================================*/
308 /* this_obj->fArray: an array to be overlapped
309 * start and count: specify the block to be overlapped
310 * tempIndex: the overlapped array (actually indices back into inputContents)
311 * inputHash: an index of hashes for tempIndex, where
312 * inputHash[i] = XOR of values from i-count+1 to i
313 */
314 static int32_t
315 findOverlappingPosition(CompactByteArray* this_obj,
316 uint32_t start,
317 const UChar* tempIndex,
318 int32_t tempIndexCount,
319 uint32_t cycle)
320 {
321 /* this_obj is a utility routine for finding blocks that overlap.
322 * IMPORTANT: the cycle number is very important. Small cycles take a lot
323 * longer to work. In some cases, they may be able to get better compaction.
324 */
325
326 int32_t i;
327 int32_t j;
328 int32_t currentCount;
329
330 for (i = 0; i < tempIndexCount; i += cycle)
331 {
332 currentCount = UCMP8_kBlockCount;
333 if (i + UCMP8_kBlockCount > tempIndexCount)
334 {
335 currentCount = tempIndexCount - i;
336 }
337 for (j = 0; j < currentCount; ++j)
338 {
339 if (this_obj->fArray[start + j] != this_obj->fArray[tempIndex[i + j]])
340 break;
341 }
342 if (j == currentCount)
343 break;
344 }
345
346 return i;
347 }
348
349 U_CAPI UBool U_EXPORT2
350 ucmp8_isBogus(const CompactByteArray* this_obj)
351 {
352 return (UBool)(this_obj == NULL || this_obj->fBogus);
353 }
354
355 U_CAPI const int8_t* U_EXPORT2
356 ucmp8_getArray(const CompactByteArray* this_obj)
357 {
358 return this_obj->fArray;
359 }
360
361 U_CAPI const uint16_t* U_EXPORT2
362 ucmp8_getIndex(const CompactByteArray* this_obj)
363 {
364 return this_obj->fIndex;
365 }
366
367 U_CAPI int32_t U_EXPORT2
368 ucmp8_getCount(const CompactByteArray* this_obj)
369 {
370 return this_obj->fCount;
371 }
372
373
374 U_CAPI void U_EXPORT2
375 ucmp8_set(CompactByteArray* this_obj,
376 UChar c,
377 int8_t value)
378 {
379 if (this_obj->fCompact == TRUE)
380 {
381 ucmp8_expand(this_obj);
382 if (this_obj->fBogus) return;
383 }
384 this_obj->fArray[(int32_t)c] = value;
385 }
386
387
388 U_CAPI void U_EXPORT2
389 ucmp8_setRange(CompactByteArray* this_obj,
390 UChar start,
391 UChar end,
392 int8_t value)
393 {
394 int32_t i;
395 if (this_obj->fCompact == TRUE)
396 {
397 ucmp8_expand(this_obj);
398 if (this_obj->fBogus)
399 return;
400 }
401 for (i = start; i <= end; ++i)
402 {
403 this_obj->fArray[i] = value;
404 }
405 }
406
407
408 /*=======================================================*/
409
410 U_CAPI void U_EXPORT2
411 ucmp8_compact(CompactByteArray* this_obj,
412 uint32_t cycle)
413 {
414 if (!this_obj->fCompact)
415 {
416 /* this_obj actually does the compaction.
417 * it walks throught the contents of the expanded array, finding the
418 * first block in the data that matches the contents of the current index.
419 * As it works, it keeps an updated pointer to the last position,
420 * so that it knows how big to make the final array
421 * If the matching succeeds, then the index will point into the data
422 * at some earlier position.
423 * If the matching fails, then last position pointer will be bumped,
424 * and the index will point to that last block of data.
425 */
426 UChar* tempIndex;
427 int32_t tempIndexCount;
428 int8_t* tempArray;
429 int32_t iBlock, iIndex;
430
431 /* fix cycle, must be 0 < cycle <= blockcount*/
432 if (cycle <= 0)
433 cycle = 1;
434 else if (cycle > (uint32_t)UCMP8_kBlockCount)
435 cycle = UCMP8_kBlockCount;
436
437 /* make temp storage, larger than we need*/
438 tempIndex = (UChar*) uprv_malloc(sizeof(UChar)* UCMP8_kUnicodeCount);
439 if (!tempIndex)
440 {
441 this_obj->fBogus = TRUE;
442 return;
443 }
444 /* set up first block.*/
445 tempIndexCount = UCMP8_kBlockCount;
446 for (iIndex = 0; iIndex < UCMP8_kBlockCount; ++iIndex)
447 {
448 tempIndex[iIndex] = (uint16_t)iIndex;
449 } /* endfor (iIndex = 0; .....)*/
450 this_obj->fIndex[0] = 0;
451
452 /* for each successive block, find out its first position in the compacted array*/
453 for (iBlock = 1; iBlock < UCMP8_kIndexCount; ++iBlock)
454 {
455 int32_t newCount, firstPosition, block;
456 block = iBlock << UCMP8_kBlockShift;
457 /* if (debugSmall) if (block > debugSmallLimit) break;*/
458 firstPosition = findOverlappingPosition(this_obj,
459 block,
460 tempIndex,
461 tempIndexCount,
462 cycle);
463
464 /* if not contained in the current list, copy the remainder
465 * invariant; cumulativeHash[iBlock] = XOR of values from iBlock-kBlockCount+1 to iBlock
466 * we do this_obj by XORing out cumulativeHash[iBlock-kBlockCount]
467 */
468 newCount = firstPosition + UCMP8_kBlockCount;
469 if (newCount > tempIndexCount)
470 {
471 for (iIndex = tempIndexCount; iIndex < newCount; ++iIndex)
472 {
473 tempIndex[iIndex] = (uint16_t)(iIndex - firstPosition + block);
474 } /* endfor (iIndex = tempIndexCount....)*/
475 tempIndexCount = newCount;
476 } /* endif (newCount > tempIndexCount)*/
477 this_obj->fIndex[iBlock] = (uint16_t)firstPosition;
478 } /* endfor (iBlock = 1.....)*/
479
480 /* now allocate and copy the items into the array*/
481 tempArray = (int8_t*) uprv_malloc(tempIndexCount * sizeof(int8_t));
482 if (!tempArray)
483 {
484 this_obj->fBogus = TRUE;
485 uprv_free(tempIndex);
486 return;
487 }
488 for (iIndex = 0; iIndex < tempIndexCount; ++iIndex)
489 {
490 tempArray[iIndex] = this_obj->fArray[tempIndex[iIndex]];
491 }
492 uprv_free(this_obj->fArray);
493 this_obj->fArray = tempArray;
494 this_obj->fCount = tempIndexCount;
495
496
497 /* free up temp storage*/
498 uprv_free(tempIndex);
499 this_obj->fCompact = TRUE;
500 } /* endif (!this_obj->fCompact)*/
501 }
502
503 #define MEMORY_WRITE(destAddr, source, sizeSoFar, len) \
504 if (destAddr) {\
505 uprv_memcpy(destAddr+sizeSoFar, source, len);\
506 }\
507 sizeSoFar += (len)
508
509 U_CAPI uint32_t U_EXPORT2 ucmp8_flattenMem (const CompactByteArray* array, uint8_t *MS)
510 {
511 int32_t size = 0;
512 static const int32_t version = ICU_UCMP8_VERSION;
513
514 MEMORY_WRITE(MS, &version, size, 4);
515
516 MEMORY_WRITE(MS, &array->fCount, size, 4);
517
518 MEMORY_WRITE(MS, array->fIndex, size, sizeof(array->fIndex[0])*UCMP8_kIndexCount);
519
520 MEMORY_WRITE(MS, array->fArray, size, sizeof(array->fArray[0])*array->fCount);
521
522 while(size%4) /* end padding */
523 {
524 uint8_t pad = 0;
525 MEMORY_WRITE(MS, &pad, size, 1);
526 }
527
528 return size;
529 }
530
531 /* We use sizeof(*array), etc so that this code can be as portable as
532 possible between the ucmpX_ family.
533 */
534
535 U_CAPI void U_EXPORT2 ucmp8_initFromData(CompactByteArray *this_obj, const uint8_t **source, UErrorCode *status)
536 {
537 uint32_t i;
538 const uint8_t *oldSource = *source;
539
540 if(U_FAILURE(*status))
541 return;
542
543 this_obj->fArray = NULL;
544 this_obj->fIndex = NULL;
545 this_obj->fBogus = FALSE;
546 this_obj->fStructSize = sizeof(CompactByteArray);
547 this_obj->fCompact = TRUE;
548 this_obj->fAlias = TRUE;
549 this_obj->fIAmOwned = TRUE;
550
551 i = * ((const uint32_t*) *source);
552 (*source) += 4;
553
554 if(i != ICU_UCMP8_VERSION)
555 {
556 *status = U_INVALID_FORMAT_ERROR;
557 return;
558 }
559
560 this_obj->fCount = * ((const uint32_t*)*source);
561 (*source) += 4;
562
563 this_obj->fIndex = (uint16_t*) *source;
564 (*source) += sizeof(this_obj->fIndex[0])*UCMP8_kIndexCount;
565
566 this_obj->fArray = (int8_t*) *source;
567 (*source) += sizeof(this_obj->fArray[0])*this_obj->fCount;
568
569 /* eat up padding */
570 while((*source-(oldSource))%4)
571 (*source)++;
572 }