2 *******************************************************************************
4 * Copyright (C) 2002-2011, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: propsvec.c
10 * tab size: 8 (not used)
13 * created on: 2002feb22
14 * created by: Markus W. Scherer
16 * Store bits (Unicode character properties) in bit set vectors.
20 #include "unicode/utypes.h"
28 struct UPropsVectors
{
30 int32_t columns
; /* number of columns, plus two for start & limit values */
33 int32_t prevRow
; /* search optimization: remember last row seen */
37 #define UPVEC_INITIAL_ROWS (1<<12)
38 #define UPVEC_MEDIUM_ROWS ((int32_t)1<<16)
39 #define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1)
41 U_CAPI UPropsVectors
* U_EXPORT2
42 upvec_open(int32_t columns
, UErrorCode
*pErrorCode
) {
47 if(U_FAILURE(*pErrorCode
)) {
51 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
54 columns
+=2; /* count range start and limit columns */
56 pv
=(UPropsVectors
*)uprv_malloc(sizeof(UPropsVectors
));
57 v
=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS
*columns
*4);
58 if(pv
==NULL
|| v
==NULL
) {
61 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
64 uprv_memset(pv
, 0, sizeof(UPropsVectors
));
67 pv
->maxRows
=UPVEC_INITIAL_ROWS
;
68 pv
->rows
=2+(UPVEC_MAX_CP
-UPVEC_FIRST_SPECIAL_CP
);
70 /* set the all-Unicode row and the special-value rows */
72 uprv_memset(row
, 0, pv
->rows
*columns
*4);
76 for(cp
=UPVEC_FIRST_SPECIAL_CP
; cp
<=UPVEC_MAX_CP
; ++cp
) {
85 upvec_close(UPropsVectors
*pv
) {
93 _findRow(UPropsVectors
*pv
, UChar32 rangeStart
) {
95 int32_t columns
, i
, start
, limit
, prevRow
;
101 /* check the vicinity of the last-seen row (start searching with an unrolled loop) */
102 row
=pv
->v
+prevRow
*columns
;
103 if(rangeStart
>=(UChar32
)row
[0]) {
104 if(rangeStart
<(UChar32
)row
[1]) {
105 /* same row as last seen */
107 } else if(rangeStart
<(UChar32
)(row
+=columns
)[1]) {
108 /* next row after the last one */
109 pv
->prevRow
=prevRow
+1;
111 } else if(rangeStart
<(UChar32
)(row
+=columns
)[1]) {
112 /* second row after the last one */
113 pv
->prevRow
=prevRow
+2;
115 } else if((rangeStart
-(UChar32
)row
[1])<10) {
116 /* we are close, continue looping */
121 } while(rangeStart
>=(UChar32
)row
[1]);
125 } else if(rangeStart
<(UChar32
)pv
->v
[1]) {
126 /* the very first row */
131 /* do a binary search for the start of the range */
133 while(start
<limit
-1) {
136 if(rangeStart
<(UChar32
)row
[0]) {
138 } else if(rangeStart
<(UChar32
)row
[1]) {
146 /* must be found because all ranges together always cover all of Unicode */
148 return pv
->v
+start
*columns
;
151 U_CAPI
void U_EXPORT2
152 upvec_setValue(UPropsVectors
*pv
,
153 UChar32 start
, UChar32 end
,
155 uint32_t value
, uint32_t mask
,
156 UErrorCode
*pErrorCode
) {
157 uint32_t *firstRow
, *lastRow
;
160 UBool splitFirstRow
, splitLastRow
;
162 /* argument checking */
163 if(U_FAILURE(*pErrorCode
)) {
167 start
<0 || start
>end
|| end
>UPVEC_MAX_CP
||
168 column
<0 || column
>=(pv
->columns
-2)
170 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
173 if(pv
->isCompacted
) {
174 *pErrorCode
=U_NO_WRITE_PERMISSION
;
181 column
+=2; /* skip range start and limit columns */
184 /* find the rows whose ranges overlap with the input range */
186 /* find the first and last rows, always successful */
187 firstRow
=_findRow(pv
, start
);
188 lastRow
=_findRow(pv
, end
);
191 * Rows need to be split if they partially overlap with the
192 * input range (only possible for the first and last rows)
193 * and if their value differs from the input value.
195 splitFirstRow
= (UBool
)(start
!=(UChar32
)firstRow
[0] && value
!=(firstRow
[column
]&mask
));
196 splitLastRow
= (UBool
)(limit
!=(UChar32
)lastRow
[1] && value
!=(lastRow
[column
]&mask
));
198 /* split first/last rows if necessary */
199 if(splitFirstRow
|| splitLastRow
) {
203 if((rows
+splitFirstRow
+splitLastRow
)>pv
->maxRows
) {
204 uint32_t *newVectors
;
207 if(pv
->maxRows
<UPVEC_MEDIUM_ROWS
) {
208 newMaxRows
=UPVEC_MEDIUM_ROWS
;
209 } else if(pv
->maxRows
<UPVEC_MAX_ROWS
) {
210 newMaxRows
=UPVEC_MAX_ROWS
;
212 /* Implementation bug, or UPVEC_MAX_ROWS too low. */
213 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
216 newVectors
=(uint32_t *)uprv_malloc(newMaxRows
*columns
*4);
217 if(newVectors
==NULL
) {
218 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
221 uprv_memcpy(newVectors
, pv
->v
, rows
*columns
*4);
222 firstRow
=newVectors
+(firstRow
-pv
->v
);
223 lastRow
=newVectors
+(lastRow
-pv
->v
);
226 pv
->maxRows
=newMaxRows
;
229 /* count the number of row cells to move after the last row, and move them */
230 count
= (int32_t)((pv
->v
+rows
*columns
)-(lastRow
+columns
));
233 lastRow
+(1+splitFirstRow
+splitLastRow
)*columns
,
237 pv
->rows
=rows
+splitFirstRow
+splitLastRow
;
239 /* split the first row, and move the firstRow pointer to the second part */
241 /* copy all affected rows up one and move the lastRow pointer */
242 count
= (int32_t)((lastRow
-firstRow
)+columns
);
243 uprv_memmove(firstRow
+columns
, firstRow
, count
*4);
246 /* split the range and move the firstRow pointer */
247 firstRow
[1]=firstRow
[columns
]=(uint32_t)start
;
251 /* split the last row */
253 /* copy the last row data */
254 uprv_memcpy(lastRow
+columns
, lastRow
, columns
*4);
256 /* split the range and move the firstRow pointer */
257 lastRow
[1]=lastRow
[columns
]=(uint32_t)limit
;
261 /* set the "row last seen" to the last row for the range */
262 pv
->prevRow
=(int32_t)((lastRow
-(pv
->v
))/columns
);
264 /* set the input value in all remaining rows */
269 *firstRow
=(*firstRow
&mask
)|value
;
270 if(firstRow
==lastRow
) {
277 U_CAPI
uint32_t U_EXPORT2
278 upvec_getValue(const UPropsVectors
*pv
, UChar32 c
, int32_t column
) {
282 if(pv
->isCompacted
|| c
<0 || c
>UPVEC_MAX_CP
|| column
<0 || column
>=(pv
->columns
-2)) {
285 ncpv
=(UPropsVectors
*)pv
;
286 row
=_findRow(ncpv
, c
);
287 return row
[2+column
];
290 U_CAPI
uint32_t * U_EXPORT2
291 upvec_getRow(const UPropsVectors
*pv
, int32_t rowIndex
,
292 UChar32
*pRangeStart
, UChar32
*pRangeEnd
) {
296 if(pv
->isCompacted
|| rowIndex
<0 || rowIndex
>=pv
->rows
) {
301 row
=pv
->v
+rowIndex
*columns
;
302 if(pRangeStart
!=NULL
) {
303 *pRangeStart
=(UChar32
)row
[0];
305 if(pRangeEnd
!=NULL
) {
306 *pRangeEnd
=(UChar32
)row
[1]-1;
311 static int32_t U_CALLCONV
312 upvec_compareRows(const void *context
, const void *l
, const void *r
) {
313 const uint32_t *left
=(const uint32_t *)l
, *right
=(const uint32_t *)r
;
314 const UPropsVectors
*pv
=(const UPropsVectors
*)context
;
315 int32_t i
, count
, columns
;
317 count
=columns
=pv
->columns
; /* includes start/limit columns */
319 /* start comparing after start/limit but wrap around to them */
322 if(left
[i
]!=right
[i
]) {
323 return left
[i
]<right
[i
] ? -1 : 1;
333 U_CAPI
void U_EXPORT2
334 upvec_compact(UPropsVectors
*pv
, UPVecCompactHandler
*handler
, void *context
, UErrorCode
*pErrorCode
) {
336 int32_t i
, columns
, valueColumns
, rows
, count
;
337 UChar32 start
, limit
;
339 /* argument checking */
340 if(U_FAILURE(*pErrorCode
)) {
344 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
347 if(pv
->isCompacted
) {
351 /* Set the flag now: Sorting and compacting destroys the builder data structure. */
352 pv
->isCompacted
=TRUE
;
356 U_ASSERT(columns
>=3); /* upvec_open asserts this */
357 valueColumns
=columns
-2; /* not counting start & limit */
359 /* sort the properties vectors to find unique vector values */
360 uprv_sortArray(pv
->v
, rows
, columns
*4,
361 upvec_compareRows
, pv
, FALSE
, pErrorCode
);
362 if(U_FAILURE(*pErrorCode
)) {
367 * Find and set the special values.
368 * This has to do almost the same work as the compaction below,
369 * to find the indexes where the special-value rows will move.
373 for(i
=0; i
<rows
; ++i
) {
374 start
=(UChar32
)row
[0];
376 /* count a new values vector if it is different from the current one */
377 if(count
<0 || 0!=uprv_memcmp(row
+2, row
-valueColumns
, valueColumns
*4)) {
381 if(start
>=UPVEC_FIRST_SPECIAL_CP
) {
382 handler(context
, start
, start
, count
, row
+2, valueColumns
, pErrorCode
);
383 if(U_FAILURE(*pErrorCode
)) {
391 /* count is at the beginning of the last vector, add valueColumns to include that last vector */
394 /* Call the handler once more to signal the start of delivering real values. */
395 handler(context
, UPVEC_START_REAL_VALUES_CP
, UPVEC_START_REAL_VALUES_CP
,
396 count
, row
-valueColumns
, valueColumns
, pErrorCode
);
397 if(U_FAILURE(*pErrorCode
)) {
402 * Move vector contents up to a contiguous array with only unique
403 * vector values, and call the handler function for each vector.
405 * This destroys the Properties Vector structure and replaces it
406 * with an array of just vector values.
410 for(i
=0; i
<rows
; ++i
) {
411 /* fetch these first before memmove() may overwrite them */
412 start
=(UChar32
)row
[0];
413 limit
=(UChar32
)row
[1];
415 /* add a new values vector if it is different from the current one */
416 if(count
<0 || 0!=uprv_memcmp(row
+2, pv
->v
+count
, valueColumns
*4)) {
418 uprv_memmove(pv
->v
+count
, row
+2, valueColumns
*4);
421 if(start
<UPVEC_FIRST_SPECIAL_CP
) {
422 handler(context
, start
, limit
-1, count
, pv
->v
+count
, valueColumns
, pErrorCode
);
423 if(U_FAILURE(*pErrorCode
)) {
431 /* count is at the beginning of the last vector, add one to include that last vector */
432 pv
->rows
=count
/valueColumns
+1;
435 U_CAPI
const uint32_t * U_EXPORT2
436 upvec_getArray(const UPropsVectors
*pv
, int32_t *pRows
, int32_t *pColumns
) {
437 if(!pv
->isCompacted
) {
444 *pColumns
=pv
->columns
-2;
449 U_CAPI
uint32_t * U_EXPORT2
450 upvec_cloneArray(const UPropsVectors
*pv
,
451 int32_t *pRows
, int32_t *pColumns
, UErrorCode
*pErrorCode
) {
452 uint32_t *clonedArray
;
455 if(U_FAILURE(*pErrorCode
)) {
458 if(!pv
->isCompacted
) {
459 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
462 byteLength
=pv
->rows
*(pv
->columns
-2)*4;
463 clonedArray
=(uint32_t *)uprv_malloc(byteLength
);
464 if(clonedArray
==NULL
) {
465 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
468 uprv_memcpy(clonedArray
, pv
->v
, byteLength
);
473 *pColumns
=pv
->columns
-2;
478 U_CAPI UTrie2
* U_EXPORT2
479 upvec_compactToUTrie2WithRowIndexes(UPropsVectors
*pv
, UErrorCode
*pErrorCode
) {
480 UPVecToUTrie2Context toUTrie2
={ NULL
};
481 upvec_compact(pv
, upvec_compactToUTrie2Handler
, &toUTrie2
, pErrorCode
);
482 utrie2_freeze(toUTrie2
.trie
, UTRIE2_16_VALUE_BITS
, pErrorCode
);
483 if(U_FAILURE(*pErrorCode
)) {
484 utrie2_close(toUTrie2
.trie
);
487 return toUTrie2
.trie
;
491 * TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts
492 * some 16-bit field and builds and returns a UTrie2.
495 U_CAPI
void U_CALLCONV
496 upvec_compactToUTrie2Handler(void *context
,
497 UChar32 start
, UChar32 end
,
498 int32_t rowIndex
, uint32_t *row
, int32_t columns
,
499 UErrorCode
*pErrorCode
) {
500 UPVecToUTrie2Context
*toUTrie2
=(UPVecToUTrie2Context
*)context
;
501 if(start
<UPVEC_FIRST_SPECIAL_CP
) {
502 utrie2_setRange32(toUTrie2
->trie
, start
, end
, (uint32_t)rowIndex
, TRUE
, pErrorCode
);
505 case UPVEC_INITIAL_VALUE_CP
:
506 toUTrie2
->initialValue
=rowIndex
;
508 case UPVEC_ERROR_VALUE_CP
:
509 toUTrie2
->errorValue
=rowIndex
;
511 case UPVEC_START_REAL_VALUES_CP
:
512 toUTrie2
->maxValue
=rowIndex
;
513 if(rowIndex
>0xffff) {
514 /* too many rows for a 16-bit trie */
515 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
517 toUTrie2
->trie
=utrie2_open(toUTrie2
->initialValue
,
518 toUTrie2
->errorValue
, pErrorCode
);