2 *******************************************************************************
4 * Copyright (C) 2002-2004, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: propsvec.c
10 * tab size: 8 (not used)
13 * created on: 2002feb22
14 * created by: Markus W. Scherer
16 * Store additional Unicode character properties in bit set vectors.
20 #include "unicode/utypes.h"
27 _findRow(uint32_t *pv
, UChar32 rangeStart
) {
30 int32_t columns
, i
, start
, limit
, prevRow
, rows
;
33 columns
=hdr
[UPVEC_COLUMNS
];
34 limit
=hdr
[UPVEC_ROWS
];
35 prevRow
=hdr
[UPVEC_PREV_ROW
];
37 pv
+=UPVEC_HEADER_LENGTH
;
39 /* check the vicinity of the last-seen row */
41 row
=pv
+prevRow
*columns
;
42 if(rangeStart
>=(UChar32
)row
[0]) {
43 if(rangeStart
<(UChar32
)row
[1]) {
44 /* same row as last seen */
48 rangeStart
>=(UChar32
)(row
+=columns
)[0] && rangeStart
<(UChar32
)row
[1]
50 /* next row after the last one */
51 hdr
[UPVEC_PREV_ROW
]=prevRow
;
57 /* do a binary search for the start of the range */
59 while(start
<limit
-1) {
62 if(rangeStart
<(UChar32
)row
[0]) {
64 } else if(rangeStart
<(UChar32
)row
[1]) {
65 hdr
[UPVEC_PREV_ROW
]=i
;
72 /* must be found because all ranges together always cover all of Unicode */
73 hdr
[UPVEC_PREV_ROW
]=start
;
74 return pv
+start
*columns
;
77 U_CAPI
uint32_t * U_EXPORT2
78 upvec_open(int32_t columns
, int32_t maxRows
) {
82 if(columns
<1 || maxRows
<1) {
86 columns
+=2; /* count range start and limit columns */
87 length
=UPVEC_HEADER_LENGTH
+maxRows
*columns
;
88 pv
=(uint32_t *)uprv_malloc(length
*4);
91 pv
[UPVEC_COLUMNS
]=(uint32_t)columns
;
92 pv
[UPVEC_MAXROWS
]=(uint32_t)maxRows
;
97 row
=pv
+UPVEC_HEADER_LENGTH
;
103 } while(--columns
>0);
108 U_CAPI
void U_EXPORT2
109 upvec_close(uint32_t *pv
) {
115 U_CAPI UBool U_EXPORT2
116 upvec_setValue(uint32_t *pv
,
117 UChar32 start
, UChar32 limit
,
119 uint32_t value
, uint32_t mask
,
120 UErrorCode
*pErrorCode
) {
121 uint32_t *firstRow
, *lastRow
;
123 UBool splitFirstRow
, splitLastRow
;
125 /* argument checking */
126 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
131 start
<0 || start
>limit
|| limit
>0x110000 ||
132 column
<0 || (uint32_t)(column
+1)>=pv
[UPVEC_COLUMNS
]
134 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
138 /* empty range, nothing to do */
143 columns
=(int32_t)pv
[UPVEC_COLUMNS
];
144 column
+=2; /* skip range start and limit columns */
147 /* find the rows whose ranges overlap with the input range */
149 /* find the first row, always successful */
150 firstRow
=_findRow(pv
, start
);
152 /* find the last row, always successful */
154 while(limit
>(UChar32
)lastRow
[1]) {
159 * Rows need to be split if they partially overlap with the
160 * input range (only possible for the first and last rows)
161 * and if their value differs from the input value.
163 splitFirstRow
= (UBool
)(start
!=(UChar32
)firstRow
[0] && value
!=(firstRow
[column
]&mask
));
164 splitLastRow
= (UBool
)(limit
!=(UChar32
)lastRow
[1] && value
!=(lastRow
[column
]&mask
));
166 /* split first/last rows if necessary */
167 if(splitFirstRow
|| splitLastRow
) {
170 rows
=(int32_t)pv
[UPVEC_ROWS
];
171 if((rows
+splitFirstRow
+splitLastRow
)>(int32_t)pv
[UPVEC_MAXROWS
]) {
172 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
;
176 /* count the number of row cells to move after the last row, and move them */
177 count
= (int32_t)((pv
+UPVEC_HEADER_LENGTH
+rows
*columns
)-(lastRow
+columns
));
180 lastRow
+(1+splitFirstRow
+splitLastRow
)*columns
,
184 pv
[UPVEC_ROWS
]=rows
+splitFirstRow
+splitLastRow
;
186 /* split the first row, and move the firstRow pointer to the second part */
188 /* copy all affected rows up one and move the lastRow pointer */
189 count
= (int32_t)((lastRow
-firstRow
)+columns
);
190 uprv_memmove(firstRow
+columns
, firstRow
, count
*4);
193 /* split the range and move the firstRow pointer */
194 firstRow
[1]=firstRow
[columns
]=(uint32_t)start
;
198 /* split the last row */
200 /* copy the last row data */
201 uprv_memcpy(lastRow
+columns
, lastRow
, columns
*4);
203 /* split the range and move the firstRow pointer */
204 lastRow
[1]=lastRow
[columns
]=(uint32_t)limit
;
208 /* set the "row last seen" to the last row for the range */
209 pv
[UPVEC_PREV_ROW
]=(uint32_t)((lastRow
-(pv
+UPVEC_HEADER_LENGTH
))/columns
);
211 /* set the input value in all remaining rows */
216 *firstRow
=(*firstRow
&mask
)|value
;
217 if(firstRow
==lastRow
) {
225 U_CAPI
uint32_t U_EXPORT2
226 upvec_getValue(uint32_t *pv
, UChar32 c
, int32_t column
) {
229 if(pv
==NULL
|| c
<0 || c
>=0x110000) {
233 return row
[2+column
];
236 U_CAPI
uint32_t * U_EXPORT2
237 upvec_getRow(uint32_t *pv
, int32_t rowIndex
,
238 UChar32
*pRangeStart
, UChar32
*pRangeLimit
) {
242 if(pv
==NULL
|| rowIndex
<0 || rowIndex
>=(int32_t)pv
[UPVEC_ROWS
]) {
246 columns
=(int32_t)pv
[UPVEC_COLUMNS
];
247 row
=pv
+UPVEC_HEADER_LENGTH
+rowIndex
*columns
;
248 if(pRangeStart
!=NULL
) {
251 if(pRangeLimit
!=NULL
) {
257 static int32_t U_CALLCONV
258 upvec_compareRows(const void *context
, const void *l
, const void *r
) {
259 const uint32_t *left
=(const uint32_t *)l
, *right
=(const uint32_t *)r
;
260 const uint32_t *pv
=(const uint32_t *)context
;
261 int32_t i
, count
, columns
;
263 count
=columns
=(int32_t)pv
[UPVEC_COLUMNS
]; /* includes start/limit columns */
265 /* start comparing after start/limit but wrap around to them */
268 if(left
[i
]!=right
[i
]) {
269 return left
[i
]<right
[i
] ? -1 : 1;
279 U_CAPI
int32_t U_EXPORT2
280 upvec_toTrie(uint32_t *pv
, UNewTrie
*trie
, UErrorCode
*pErrorCode
) {
282 int32_t columns
, valueColumns
, rows
, count
;
284 /* argument checking */
285 if(pErrorCode
==NULL
|| U_FAILURE(*pErrorCode
)) {
289 if(pv
==NULL
|| trie
==NULL
) {
290 *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
;
294 row
=pv
+UPVEC_HEADER_LENGTH
;
295 columns
=(int32_t)pv
[UPVEC_COLUMNS
];
296 rows
=(int32_t)pv
[UPVEC_ROWS
];
298 /* sort the properties vectors to find unique vector values */
300 uprv_sortArray(pv
+UPVEC_HEADER_LENGTH
, rows
, columns
*4,
301 upvec_compareRows
, pv
, FALSE
, pErrorCode
);
303 if(U_FAILURE(*pErrorCode
)) {
308 * Move vector contents up to a contiguous array with only unique
309 * vector values, and set indexes to those values into the trie.
311 * This destroys the Properties Vector structure and replaces it
312 * with an array of just vector values.
314 valueColumns
=columns
-2; /* not counting start & limit */
318 /* add a new values vector if it is different from the current one */
319 if(count
<0 || 0!=uprv_memcmp(row
+2, pv
+count
, valueColumns
*4)) {
321 uprv_memmove(pv
+count
, row
+2, valueColumns
*4);
324 if(count
>0 && !utrie_setRange32(trie
, (UChar32
)row
[0], (UChar32
)row
[1], (uint32_t)count
, FALSE
)) {
325 *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
;
332 return count
+valueColumns
;