]>
git.saurik.com Git - apple/icu.git/blob - icuSources/common/ucmp8.c
2 ********************************************************************
4 * Copyright (c) 1997-2004, International Business Machines Corporation and
5 * others. All Rights Reserved.
6 ********************************************************************
12 /* internal constants*/
15 U_CAPI
int32_t U_EXPORT2
16 ucmp8_getkUnicodeCount() { return UCMP8_kUnicodeCount
;}
18 U_CAPI
int32_t U_EXPORT2
19 ucmp8_getkBlockCount() { return UCMP8_kBlockCount
;}
22 ucmp8_initBogus(CompactByteArray
* array
)
24 CompactByteArray
* this_obj
= array
;
26 if (this_obj
== NULL
) return;
28 this_obj
->fStructSize
= sizeof(CompactByteArray
);
29 this_obj
->fArray
= NULL
;
30 this_obj
->fIndex
= NULL
;
31 this_obj
->fCount
= UCMP8_kUnicodeCount
;
32 this_obj
->fCompact
= FALSE
;
33 this_obj
->fBogus
= TRUE
;
34 this_obj
->fAlias
= FALSE
;
35 this_obj
->fIAmOwned
= TRUE
;
39 /*=======================================================*/
41 ucmp8_init(CompactByteArray
* array
, int8_t defaultValue
)
43 /* set up the index array and the data array.
44 * the index array always points into particular parts of the data array
45 * it is initially set up to point at regular block boundaries
46 * The following example uses blocks of 4 for simplicity
49 * INDEX 0 4 8 12 16 ...
50 * ARRAY abcdeababcedzyabcdea...
52 * whenever you set an element in the array, it unpacks to this state
53 * After compression, the index will point to various places in the data array
54 * wherever there is a runs of the same elements as in the original
58 * ARRAY abcdeabazyabc...
59 * If you look at the example, index# 2 in the expanded version points
60 * to data position number 8, which has elements "bced". In the compressed
61 * version, index# 2 points to data position 1, which also has "bced"
63 CompactByteArray
* this_obj
= array
;
66 if (this_obj
== NULL
) return;
68 this_obj
->fStructSize
= sizeof(CompactByteArray
);
69 this_obj
->fArray
= NULL
;
70 this_obj
->fIndex
= NULL
;
71 this_obj
->fCount
= UCMP8_kUnicodeCount
;
72 this_obj
->fCompact
= FALSE
;
73 this_obj
->fBogus
= FALSE
;
74 this_obj
->fAlias
= FALSE
;
75 this_obj
->fIAmOwned
= TRUE
;
78 this_obj
->fArray
= (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount
);
79 if (!this_obj
->fArray
)
81 this_obj
->fBogus
= TRUE
;
84 this_obj
->fIndex
= (uint16_t*) uprv_malloc(sizeof(uint16_t) * UCMP8_kIndexCount
);
85 if (!this_obj
->fIndex
)
87 uprv_free(this_obj
->fArray
);
88 this_obj
->fArray
= NULL
;
89 this_obj
->fBogus
= TRUE
;
92 for (i
= 0; i
< UCMP8_kUnicodeCount
; ++i
)
94 this_obj
->fArray
[i
] = defaultValue
;
96 for (i
= 0; i
< UCMP8_kIndexCount
; ++i
)
98 this_obj
->fIndex
[i
] = (uint16_t)(i
<< UCMP8_kBlockShift
);
102 U_CAPI CompactByteArray
* U_EXPORT2
103 ucmp8_open(int8_t defaultValue
)
105 /* set up the index array and the data array.
106 * the index array always points into particular parts of the data array
107 * it is initially set up to point at regular block boundaries
108 * The following example uses blocks of 4 for simplicity
111 * INDEX 0 4 8 12 16 ...
112 * ARRAY abcdeababcedzyabcdea...
114 * whenever you set an element in the array, it unpacks to this state
115 * After compression, the index will point to various places in the data array
116 * wherever there is a runs of the same elements as in the original
117 * Example: Compressed
119 * INDEX 0 4 1 8 2 ...
120 * ARRAY abcdeabazyabc...
121 * If you look at the example, index# 2 in the expanded version points
122 * to data position number 8, which has elements "bced". In the compressed
123 * version, index# 2 points to data position 1, which also has "bced"
125 CompactByteArray
* this_obj
= (CompactByteArray
*) uprv_malloc(sizeof(CompactByteArray
));
128 if (this_obj
== NULL
) return NULL
;
130 this_obj
->fStructSize
= sizeof(CompactByteArray
);
131 this_obj
->fArray
= NULL
;
132 this_obj
->fIndex
= NULL
;
133 this_obj
->fCount
= UCMP8_kUnicodeCount
;
134 this_obj
->fCompact
= FALSE
;
135 this_obj
->fBogus
= FALSE
;
136 this_obj
->fAlias
= FALSE
;
137 this_obj
->fIAmOwned
= FALSE
;
140 this_obj
->fArray
= (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount
);
141 if (!this_obj
->fArray
)
143 this_obj
->fBogus
= TRUE
;
146 this_obj
->fIndex
= (uint16_t*) uprv_malloc(sizeof(uint16_t) * UCMP8_kIndexCount
);
147 if (!this_obj
->fIndex
)
149 uprv_free(this_obj
->fArray
);
150 this_obj
->fArray
= NULL
;
151 this_obj
->fBogus
= TRUE
;
154 for (i
= 0; i
< UCMP8_kUnicodeCount
; ++i
)
156 this_obj
->fArray
[i
] = defaultValue
;
158 for (i
= 0; i
< UCMP8_kIndexCount
; ++i
)
160 this_obj
->fIndex
[i
] = (uint16_t)(i
<< UCMP8_kBlockShift
);
166 U_CAPI CompactByteArray
* U_EXPORT2
167 ucmp8_openAdopt(uint16_t *indexArray
,
171 CompactByteArray
* this_obj
= (CompactByteArray
*) uprv_malloc(sizeof(CompactByteArray
));
175 ucmp8_initAdopt(this_obj
, indexArray
, newValues
, count
);
176 this_obj
->fIAmOwned
= FALSE
;
180 U_CAPI CompactByteArray
* U_EXPORT2
181 ucmp8_openAlias(uint16_t *indexArray
,
185 CompactByteArray
* this_obj
= (CompactByteArray
*) uprv_malloc(sizeof(CompactByteArray
));
189 ucmp8_initAlias(this_obj
, indexArray
, newValues
, count
);
190 this_obj
->fIAmOwned
= FALSE
;
194 /*=======================================================*/
196 U_CAPI CompactByteArray
* U_EXPORT2
197 ucmp8_initAdopt(CompactByteArray
*this_obj
,
198 uint16_t *indexArray
,
203 this_obj
->fCount
= count
;
204 this_obj
->fBogus
= FALSE
;
205 this_obj
->fStructSize
= sizeof(CompactByteArray
);
207 this_obj
->fArray
= newValues
;
208 this_obj
->fIndex
= indexArray
;
209 this_obj
->fCompact
= (UBool
)((count
< UCMP8_kUnicodeCount
) ? TRUE
: FALSE
);
210 this_obj
->fAlias
= FALSE
;
211 this_obj
->fIAmOwned
= TRUE
;
217 U_CAPI CompactByteArray
* U_EXPORT2
218 ucmp8_initAlias(CompactByteArray
*this_obj
,
219 uint16_t *indexArray
,
224 this_obj
->fArray
= NULL
;
225 this_obj
->fIndex
= NULL
;
226 this_obj
->fCount
= count
;
227 this_obj
->fBogus
= FALSE
;
228 this_obj
->fStructSize
= sizeof(CompactByteArray
);
230 this_obj
->fArray
= newValues
;
231 this_obj
->fIndex
= indexArray
;
232 this_obj
->fCompact
= (UBool
)((count
< UCMP8_kUnicodeCount
) ? TRUE
: FALSE
);
233 this_obj
->fAlias
= TRUE
;
234 this_obj
->fIAmOwned
= TRUE
;
240 /*=======================================================*/
242 U_CAPI
void U_EXPORT2
243 ucmp8_close(CompactByteArray
* this_obj
)
245 if(this_obj
!= NULL
) {
246 if(!this_obj
->fAlias
) {
247 if(this_obj
->fArray
!= NULL
) {
248 uprv_free(this_obj
->fArray
);
250 if(this_obj
->fIndex
!= NULL
) {
251 uprv_free(this_obj
->fIndex
);
254 if(!this_obj
->fIAmOwned
) /* Called if 'init' was called instead of 'open'. */
262 /*=======================================================*/
264 U_CAPI
void U_EXPORT2
265 ucmp8_expand(CompactByteArray
* this_obj
)
267 /* can optimize later.
268 * if we have to expand, then walk through the blocks instead of using Get
269 * this code unpacks the array by copying the blocks to the normalized position.
270 * Example: Compressed
272 * INDEX 0 4 1 8 2 ...
273 * ARRAY abcdeabazyabc...
277 * INDEX 0 4 8 12 16 ...
278 * ARRAY abcdeababcedzyabcdea...
281 if (this_obj
->fCompact
)
284 tempArray
= (int8_t*) uprv_malloc(sizeof(int8_t) * UCMP8_kUnicodeCount
);
287 this_obj
->fBogus
= TRUE
;
290 for (i
= 0; i
< UCMP8_kUnicodeCount
; ++i
)
292 tempArray
[i
] = ucmp8_get(this_obj
,(UChar
)i
); /* HSYS : How expand?*/
294 for (i
= 0; i
< UCMP8_kIndexCount
; ++i
)
296 this_obj
->fIndex
[i
] = (uint16_t)(i
<< UCMP8_kBlockShift
);
298 uprv_free(this_obj
->fArray
);
299 this_obj
->fArray
= tempArray
;
300 this_obj
->fCompact
= FALSE
;
301 this_obj
->fAlias
= FALSE
;
307 /*=======================================================*/
308 /* this_obj->fArray: an array to be overlapped
309 * start and count: specify the block to be overlapped
310 * tempIndex: the overlapped array (actually indices back into inputContents)
311 * inputHash: an index of hashes for tempIndex, where
312 * inputHash[i] = XOR of values from i-count+1 to i
315 findOverlappingPosition(CompactByteArray
* this_obj
,
317 const UChar
* tempIndex
,
318 int32_t tempIndexCount
,
321 /* this_obj is a utility routine for finding blocks that overlap.
322 * IMPORTANT: the cycle number is very important. Small cycles take a lot
323 * longer to work. In some cases, they may be able to get better compaction.
328 int32_t currentCount
;
330 for (i
= 0; i
< tempIndexCount
; i
+= cycle
)
332 currentCount
= UCMP8_kBlockCount
;
333 if (i
+ UCMP8_kBlockCount
> tempIndexCount
)
335 currentCount
= tempIndexCount
- i
;
337 for (j
= 0; j
< currentCount
; ++j
)
339 if (this_obj
->fArray
[start
+ j
] != this_obj
->fArray
[tempIndex
[i
+ j
]])
342 if (j
== currentCount
)
349 U_CAPI UBool U_EXPORT2
350 ucmp8_isBogus(const CompactByteArray
* this_obj
)
352 return (UBool
)(this_obj
== NULL
|| this_obj
->fBogus
);
355 U_CAPI
const int8_t* U_EXPORT2
356 ucmp8_getArray(const CompactByteArray
* this_obj
)
358 return this_obj
->fArray
;
361 U_CAPI
const uint16_t* U_EXPORT2
362 ucmp8_getIndex(const CompactByteArray
* this_obj
)
364 return this_obj
->fIndex
;
367 U_CAPI
int32_t U_EXPORT2
368 ucmp8_getCount(const CompactByteArray
* this_obj
)
370 return this_obj
->fCount
;
374 U_CAPI
void U_EXPORT2
375 ucmp8_set(CompactByteArray
* this_obj
,
379 if (this_obj
->fCompact
== TRUE
)
381 ucmp8_expand(this_obj
);
382 if (this_obj
->fBogus
) return;
384 this_obj
->fArray
[(int32_t)c
] = value
;
388 U_CAPI
void U_EXPORT2
389 ucmp8_setRange(CompactByteArray
* this_obj
,
395 if (this_obj
->fCompact
== TRUE
)
397 ucmp8_expand(this_obj
);
398 if (this_obj
->fBogus
)
401 for (i
= start
; i
<= end
; ++i
)
403 this_obj
->fArray
[i
] = value
;
408 /*=======================================================*/
410 U_CAPI
void U_EXPORT2
411 ucmp8_compact(CompactByteArray
* this_obj
,
414 if (!this_obj
->fCompact
)
416 /* this_obj actually does the compaction.
417 * it walks throught the contents of the expanded array, finding the
418 * first block in the data that matches the contents of the current index.
419 * As it works, it keeps an updated pointer to the last position,
420 * so that it knows how big to make the final array
421 * If the matching succeeds, then the index will point into the data
422 * at some earlier position.
423 * If the matching fails, then last position pointer will be bumped,
424 * and the index will point to that last block of data.
427 int32_t tempIndexCount
;
429 int32_t iBlock
, iIndex
;
431 /* fix cycle, must be 0 < cycle <= blockcount*/
434 else if (cycle
> (uint32_t)UCMP8_kBlockCount
)
435 cycle
= UCMP8_kBlockCount
;
437 /* make temp storage, larger than we need*/
438 tempIndex
= (UChar
*) uprv_malloc(sizeof(UChar
)* UCMP8_kUnicodeCount
);
441 this_obj
->fBogus
= TRUE
;
444 /* set up first block.*/
445 tempIndexCount
= UCMP8_kBlockCount
;
446 for (iIndex
= 0; iIndex
< UCMP8_kBlockCount
; ++iIndex
)
448 tempIndex
[iIndex
] = (uint16_t)iIndex
;
449 } /* endfor (iIndex = 0; .....)*/
450 this_obj
->fIndex
[0] = 0;
452 /* for each successive block, find out its first position in the compacted array*/
453 for (iBlock
= 1; iBlock
< UCMP8_kIndexCount
; ++iBlock
)
455 int32_t newCount
, firstPosition
, block
;
456 block
= iBlock
<< UCMP8_kBlockShift
;
457 /* if (debugSmall) if (block > debugSmallLimit) break;*/
458 firstPosition
= findOverlappingPosition(this_obj
,
464 /* if not contained in the current list, copy the remainder
465 * invariant; cumulativeHash[iBlock] = XOR of values from iBlock-kBlockCount+1 to iBlock
466 * we do this_obj by XORing out cumulativeHash[iBlock-kBlockCount]
468 newCount
= firstPosition
+ UCMP8_kBlockCount
;
469 if (newCount
> tempIndexCount
)
471 for (iIndex
= tempIndexCount
; iIndex
< newCount
; ++iIndex
)
473 tempIndex
[iIndex
] = (uint16_t)(iIndex
- firstPosition
+ block
);
474 } /* endfor (iIndex = tempIndexCount....)*/
475 tempIndexCount
= newCount
;
476 } /* endif (newCount > tempIndexCount)*/
477 this_obj
->fIndex
[iBlock
] = (uint16_t)firstPosition
;
478 } /* endfor (iBlock = 1.....)*/
480 /* now allocate and copy the items into the array*/
481 tempArray
= (int8_t*) uprv_malloc(tempIndexCount
* sizeof(int8_t));
484 this_obj
->fBogus
= TRUE
;
485 uprv_free(tempIndex
);
488 for (iIndex
= 0; iIndex
< tempIndexCount
; ++iIndex
)
490 tempArray
[iIndex
] = this_obj
->fArray
[tempIndex
[iIndex
]];
492 uprv_free(this_obj
->fArray
);
493 this_obj
->fArray
= tempArray
;
494 this_obj
->fCount
= tempIndexCount
;
497 /* free up temp storage*/
498 uprv_free(tempIndex
);
499 this_obj
->fCompact
= TRUE
;
500 } /* endif (!this_obj->fCompact)*/
503 #define MEMORY_WRITE(destAddr, source, sizeSoFar, len) \
505 uprv_memcpy(destAddr+sizeSoFar, source, len);\
509 U_CAPI
uint32_t U_EXPORT2
ucmp8_flattenMem (const CompactByteArray
* array
, uint8_t *MS
)
512 static const int32_t version
= ICU_UCMP8_VERSION
;
514 MEMORY_WRITE(MS
, &version
, size
, 4);
516 MEMORY_WRITE(MS
, &array
->fCount
, size
, 4);
518 MEMORY_WRITE(MS
, array
->fIndex
, size
, sizeof(array
->fIndex
[0])*UCMP8_kIndexCount
);
520 MEMORY_WRITE(MS
, array
->fArray
, size
, sizeof(array
->fArray
[0])*array
->fCount
);
522 while(size%4
) /* end padding */
525 MEMORY_WRITE(MS
, &pad
, size
, 1);
531 /* We use sizeof(*array), etc so that this code can be as portable as
532 possible between the ucmpX_ family.
535 U_CAPI
void U_EXPORT2
ucmp8_initFromData(CompactByteArray
*this_obj
, const uint8_t **source
, UErrorCode
*status
)
538 const uint8_t *oldSource
= *source
;
540 if(U_FAILURE(*status
))
543 this_obj
->fArray
= NULL
;
544 this_obj
->fIndex
= NULL
;
545 this_obj
->fBogus
= FALSE
;
546 this_obj
->fStructSize
= sizeof(CompactByteArray
);
547 this_obj
->fCompact
= TRUE
;
548 this_obj
->fAlias
= TRUE
;
549 this_obj
->fIAmOwned
= TRUE
;
551 i
= * ((const uint32_t*) *source
);
554 if(i
!= ICU_UCMP8_VERSION
)
556 *status
= U_INVALID_FORMAT_ERROR
;
560 this_obj
->fCount
= * ((const uint32_t*)*source
);
563 this_obj
->fIndex
= (uint16_t*) *source
;
564 (*source
) += sizeof(this_obj
->fIndex
[0])*UCMP8_kIndexCount
;
566 this_obj
->fArray
= (int8_t*) *source
;
567 (*source
) += sizeof(this_obj
->fArray
[0])*this_obj
->fCount
;
570 while((*source
-(oldSource
))%4
)