1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 2002-2011, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: propsvec.c
12 * tab size: 8 (not used)
15 * created on: 2002feb22
16 * created by: Markus W. Scherer
18 * Store bits (Unicode character properties) in bit set vectors.
22 #include "unicode/utypes.h"
30 struct UPropsVectors
{
32 int32_t columns
; /* number of columns, plus two for start & limit values */
35 int32_t prevRow
; /* search optimization: remember last row seen */
39 #define UPVEC_INITIAL_ROWS (1<<12)
40 #define UPVEC_MEDIUM_ROWS ((int32_t)1<<16)
41 #define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1)
43 U_CAPI UPropsVectors
* U_EXPORT2
44 upvec_open(int32_t columns
, UErrorCode
*pErrorCode
) {
49 if(U_FAILURE(*pErrorCode
)) {
53 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
56 columns
+=2; /* count range start and limit columns */
58 pv
=(UPropsVectors
*)uprv_malloc(sizeof(UPropsVectors
));
59 v
=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS
*columns
*4);
60 if(pv
==NULL
|| v
==NULL
) {
63 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
66 uprv_memset(pv
, 0, sizeof(UPropsVectors
));
69 pv
->maxRows
=UPVEC_INITIAL_ROWS
;
70 pv
->rows
=2+(UPVEC_MAX_CP
-UPVEC_FIRST_SPECIAL_CP
);
72 /* set the all-Unicode row and the special-value rows */
74 uprv_memset(row
, 0, pv
->rows
*columns
*4);
78 for(cp
=UPVEC_FIRST_SPECIAL_CP
; cp
<=UPVEC_MAX_CP
; ++cp
) {
87 upvec_close(UPropsVectors
*pv
) {
95 _findRow(UPropsVectors
*pv
, UChar32 rangeStart
) {
97 int32_t columns
, i
, start
, limit
, prevRow
;
103 /* check the vicinity of the last-seen row (start searching with an unrolled loop) */
104 row
=pv
->v
+prevRow
*columns
;
105 if(rangeStart
>=(UChar32
)row
[0]) {
106 if(rangeStart
<(UChar32
)row
[1]) {
107 /* same row as last seen */
109 } else if(rangeStart
<(UChar32
)(row
+=columns
)[1]) {
110 /* next row after the last one */
111 pv
->prevRow
=prevRow
+1;
113 } else if(rangeStart
<(UChar32
)(row
+=columns
)[1]) {
114 /* second row after the last one */
115 pv
->prevRow
=prevRow
+2;
117 } else if((rangeStart
-(UChar32
)row
[1])<10) {
118 /* we are close, continue looping */
123 } while(rangeStart
>=(UChar32
)row
[1]);
127 } else if(rangeStart
<(UChar32
)pv
->v
[1]) {
128 /* the very first row */
133 /* do a binary search for the start of the range */
135 while(start
<limit
-1) {
138 if(rangeStart
<(UChar32
)row
[0]) {
140 } else if(rangeStart
<(UChar32
)row
[1]) {
148 /* must be found because all ranges together always cover all of Unicode */
150 return pv
->v
+start
*columns
;
153 U_CAPI
void U_EXPORT2
154 upvec_setValue(UPropsVectors
*pv
,
155 UChar32 start
, UChar32 end
,
157 uint32_t value
, uint32_t mask
,
158 UErrorCode
*pErrorCode
) {
159 uint32_t *firstRow
, *lastRow
;
162 UBool splitFirstRow
, splitLastRow
;
164 /* argument checking */
165 if(U_FAILURE(*pErrorCode
)) {
169 start
<0 || start
>end
|| end
>UPVEC_MAX_CP
||
170 column
<0 || column
>=(pv
->columns
-2)
172 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
175 if(pv
->isCompacted
) {
176 *pErrorCode
=U_NO_WRITE_PERMISSION
;
183 column
+=2; /* skip range start and limit columns */
186 /* find the rows whose ranges overlap with the input range */
188 /* find the first and last rows, always successful */
189 firstRow
=_findRow(pv
, start
);
190 lastRow
=_findRow(pv
, end
);
193 * Rows need to be split if they partially overlap with the
194 * input range (only possible for the first and last rows)
195 * and if their value differs from the input value.
197 splitFirstRow
= (UBool
)(start
!=(UChar32
)firstRow
[0] && value
!=(firstRow
[column
]&mask
));
198 splitLastRow
= (UBool
)(limit
!=(UChar32
)lastRow
[1] && value
!=(lastRow
[column
]&mask
));
200 /* split first/last rows if necessary */
201 if(splitFirstRow
|| splitLastRow
) {
205 if((rows
+splitFirstRow
+splitLastRow
)>pv
->maxRows
) {
206 uint32_t *newVectors
;
209 if(pv
->maxRows
<UPVEC_MEDIUM_ROWS
) {
210 newMaxRows
=UPVEC_MEDIUM_ROWS
;
211 } else if(pv
->maxRows
<UPVEC_MAX_ROWS
) {
212 newMaxRows
=UPVEC_MAX_ROWS
;
214 /* Implementation bug, or UPVEC_MAX_ROWS too low. */
215 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
218 newVectors
=(uint32_t *)uprv_malloc(newMaxRows
*columns
*4);
219 if(newVectors
==NULL
) {
220 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
223 uprv_memcpy(newVectors
, pv
->v
, (size_t)rows
*columns
*4);
224 firstRow
=newVectors
+(firstRow
-pv
->v
);
225 lastRow
=newVectors
+(lastRow
-pv
->v
);
228 pv
->maxRows
=newMaxRows
;
231 /* count the number of row cells to move after the last row, and move them */
232 count
= (int32_t)((pv
->v
+rows
*columns
)-(lastRow
+columns
));
235 lastRow
+(1+splitFirstRow
+splitLastRow
)*columns
,
239 pv
->rows
=rows
+splitFirstRow
+splitLastRow
;
241 /* split the first row, and move the firstRow pointer to the second part */
243 /* copy all affected rows up one and move the lastRow pointer */
244 count
= (int32_t)((lastRow
-firstRow
)+columns
);
245 uprv_memmove(firstRow
+columns
, firstRow
, (size_t)count
*4);
248 /* split the range and move the firstRow pointer */
249 firstRow
[1]=firstRow
[columns
]=(uint32_t)start
;
253 /* split the last row */
255 /* copy the last row data */
256 uprv_memcpy(lastRow
+columns
, lastRow
, (size_t)columns
*4);
258 /* split the range and move the firstRow pointer */
259 lastRow
[1]=lastRow
[columns
]=(uint32_t)limit
;
263 /* set the "row last seen" to the last row for the range */
264 pv
->prevRow
=(int32_t)((lastRow
-(pv
->v
))/columns
);
266 /* set the input value in all remaining rows */
271 *firstRow
=(*firstRow
&mask
)|value
;
272 if(firstRow
==lastRow
) {
279 U_CAPI
uint32_t U_EXPORT2
280 upvec_getValue(const UPropsVectors
*pv
, UChar32 c
, int32_t column
) {
284 if(pv
->isCompacted
|| c
<0 || c
>UPVEC_MAX_CP
|| column
<0 || column
>=(pv
->columns
-2)) {
287 ncpv
=(UPropsVectors
*)pv
;
288 row
=_findRow(ncpv
, c
);
289 return row
[2+column
];
292 U_CAPI
uint32_t * U_EXPORT2
293 upvec_getRow(const UPropsVectors
*pv
, int32_t rowIndex
,
294 UChar32
*pRangeStart
, UChar32
*pRangeEnd
) {
298 if(pv
->isCompacted
|| rowIndex
<0 || rowIndex
>=pv
->rows
) {
303 row
=pv
->v
+rowIndex
*columns
;
304 if(pRangeStart
!=NULL
) {
305 *pRangeStart
=(UChar32
)row
[0];
307 if(pRangeEnd
!=NULL
) {
308 *pRangeEnd
=(UChar32
)row
[1]-1;
313 static int32_t U_CALLCONV
314 upvec_compareRows(const void *context
, const void *l
, const void *r
) {
315 const uint32_t *left
=(const uint32_t *)l
, *right
=(const uint32_t *)r
;
316 const UPropsVectors
*pv
=(const UPropsVectors
*)context
;
317 int32_t i
, count
, columns
;
319 count
=columns
=pv
->columns
; /* includes start/limit columns */
321 /* start comparing after start/limit but wrap around to them */
324 if(left
[i
]!=right
[i
]) {
325 return left
[i
]<right
[i
] ? -1 : 1;
335 U_CAPI
void U_EXPORT2
336 upvec_compact(UPropsVectors
*pv
, UPVecCompactHandler
*handler
, void *context
, UErrorCode
*pErrorCode
) {
338 int32_t i
, columns
, valueColumns
, rows
, count
;
339 UChar32 start
, limit
;
341 /* argument checking */
342 if(U_FAILURE(*pErrorCode
)) {
346 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
349 if(pv
->isCompacted
) {
353 /* Set the flag now: Sorting and compacting destroys the builder data structure. */
354 pv
->isCompacted
=TRUE
;
358 U_ASSERT(columns
>=3); /* upvec_open asserts this */
359 valueColumns
=columns
-2; /* not counting start & limit */
361 /* sort the properties vectors to find unique vector values */
362 uprv_sortArray(pv
->v
, rows
, columns
*4,
363 upvec_compareRows
, pv
, FALSE
, pErrorCode
);
364 if(U_FAILURE(*pErrorCode
)) {
369 * Find and set the special values.
370 * This has to do almost the same work as the compaction below,
371 * to find the indexes where the special-value rows will move.
375 for(i
=0; i
<rows
; ++i
) {
376 start
=(UChar32
)row
[0];
378 /* count a new values vector if it is different from the current one */
379 if(count
<0 || 0!=uprv_memcmp(row
+2, row
-valueColumns
, valueColumns
*4)) {
383 if(start
>=UPVEC_FIRST_SPECIAL_CP
) {
384 handler(context
, start
, start
, count
, row
+2, valueColumns
, pErrorCode
);
385 if(U_FAILURE(*pErrorCode
)) {
393 /* count is at the beginning of the last vector, add valueColumns to include that last vector */
396 /* Call the handler once more to signal the start of delivering real values. */
397 handler(context
, UPVEC_START_REAL_VALUES_CP
, UPVEC_START_REAL_VALUES_CP
,
398 count
, row
-valueColumns
, valueColumns
, pErrorCode
);
399 if(U_FAILURE(*pErrorCode
)) {
404 * Move vector contents up to a contiguous array with only unique
405 * vector values, and call the handler function for each vector.
407 * This destroys the Properties Vector structure and replaces it
408 * with an array of just vector values.
412 for(i
=0; i
<rows
; ++i
) {
413 /* fetch these first before memmove() may overwrite them */
414 start
=(UChar32
)row
[0];
415 limit
=(UChar32
)row
[1];
417 /* add a new values vector if it is different from the current one */
418 if(count
<0 || 0!=uprv_memcmp(row
+2, pv
->v
+count
, valueColumns
*4)) {
420 uprv_memmove(pv
->v
+count
, row
+2, (size_t)valueColumns
*4);
423 if(start
<UPVEC_FIRST_SPECIAL_CP
) {
424 handler(context
, start
, limit
-1, count
, pv
->v
+count
, valueColumns
, pErrorCode
);
425 if(U_FAILURE(*pErrorCode
)) {
433 /* count is at the beginning of the last vector, add one to include that last vector */
434 pv
->rows
=count
/valueColumns
+1;
437 U_CAPI
const uint32_t * U_EXPORT2
438 upvec_getArray(const UPropsVectors
*pv
, int32_t *pRows
, int32_t *pColumns
) {
439 if(!pv
->isCompacted
) {
446 *pColumns
=pv
->columns
-2;
451 U_CAPI
uint32_t * U_EXPORT2
452 upvec_cloneArray(const UPropsVectors
*pv
,
453 int32_t *pRows
, int32_t *pColumns
, UErrorCode
*pErrorCode
) {
454 uint32_t *clonedArray
;
457 if(U_FAILURE(*pErrorCode
)) {
460 if(!pv
->isCompacted
) {
461 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
464 byteLength
=pv
->rows
*(pv
->columns
-2)*4;
465 clonedArray
=(uint32_t *)uprv_malloc(byteLength
);
466 if(clonedArray
==NULL
) {
467 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
;
470 uprv_memcpy(clonedArray
, pv
->v
, byteLength
);
475 *pColumns
=pv
->columns
-2;
480 U_CAPI UTrie2
* U_EXPORT2
481 upvec_compactToUTrie2WithRowIndexes(UPropsVectors
*pv
, UErrorCode
*pErrorCode
) {
482 UPVecToUTrie2Context toUTrie2
={ NULL
, 0, 0, 0 };
483 upvec_compact(pv
, upvec_compactToUTrie2Handler
, &toUTrie2
, pErrorCode
);
484 utrie2_freeze(toUTrie2
.trie
, UTRIE2_16_VALUE_BITS
, pErrorCode
);
485 if(U_FAILURE(*pErrorCode
)) {
486 utrie2_close(toUTrie2
.trie
);
489 return toUTrie2
.trie
;
493 * TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts
494 * some 16-bit field and builds and returns a UTrie2.
497 U_CAPI
void U_CALLCONV
498 upvec_compactToUTrie2Handler(void *context
,
499 UChar32 start
, UChar32 end
,
500 int32_t rowIndex
, uint32_t *row
, int32_t columns
,
501 UErrorCode
*pErrorCode
) {
504 UPVecToUTrie2Context
*toUTrie2
=(UPVecToUTrie2Context
*)context
;
505 if(start
<UPVEC_FIRST_SPECIAL_CP
) {
506 utrie2_setRange32(toUTrie2
->trie
, start
, end
, (uint32_t)rowIndex
, TRUE
, pErrorCode
);
509 case UPVEC_INITIAL_VALUE_CP
:
510 toUTrie2
->initialValue
=rowIndex
;
512 case UPVEC_ERROR_VALUE_CP
:
513 toUTrie2
->errorValue
=rowIndex
;
515 case UPVEC_START_REAL_VALUES_CP
:
516 toUTrie2
->maxValue
=rowIndex
;
517 if(rowIndex
>0xffff) {
518 /* too many rows for a 16-bit trie */
519 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
521 toUTrie2
->trie
=utrie2_open(toUTrie2
->initialValue
,
522 toUTrie2
->errorValue
, pErrorCode
);