]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/tools/toolutil/propsvec.c
ICU-8.11.1.tar.gz
[apple/icu.git] / icuSources / tools / toolutil / propsvec.c
... / ...
CommitLineData
1/*
2*******************************************************************************
3*
4* Copyright (C) 2002-2005, International Business Machines
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: propsvec.c
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2002feb22
14* created by: Markus W. Scherer
15*
16* Store additional Unicode character properties in bit set vectors.
17*/
18
19#include <stdlib.h>
20#include "unicode/utypes.h"
21#include "cmemory.h"
22#include "utrie.h"
23#include "uarrsort.h"
24#include "propsvec.h"
25
26static uint32_t *
27_findRow(uint32_t *pv, UChar32 rangeStart) {
28 uint32_t *row;
29 int32_t *hdr;
30 int32_t columns, i, start, limit, prevRow, rows;
31
32 hdr=(int32_t *)pv;
33 columns=hdr[UPVEC_COLUMNS];
34 limit=hdr[UPVEC_ROWS];
35 prevRow=hdr[UPVEC_PREV_ROW];
36 rows=hdr[UPVEC_ROWS];
37 pv+=UPVEC_HEADER_LENGTH;
38
39 /* check the vicinity of the last-seen row */
40 if(prevRow<rows) {
41 row=pv+prevRow*columns;
42 if(rangeStart>=(UChar32)row[0]) {
43 if(rangeStart<(UChar32)row[1]) {
44 /* same row as last seen */
45 return row;
46 } else if(
47 ++prevRow<rows &&
48 rangeStart>=(UChar32)(row+=columns)[0] && rangeStart<(UChar32)row[1]
49 ) {
50 /* next row after the last one */
51 hdr[UPVEC_PREV_ROW]=prevRow;
52 return row;
53 }
54 }
55 }
56
57 /* do a binary search for the start of the range */
58 start=0;
59 while(start<limit-1) {
60 i=(start+limit)/2;
61 row=pv+i*columns;
62 if(rangeStart<(UChar32)row[0]) {
63 limit=i;
64 } else if(rangeStart<(UChar32)row[1]) {
65 hdr[UPVEC_PREV_ROW]=i;
66 return row;
67 } else {
68 start=i;
69 }
70 }
71
72 /* must be found because all ranges together always cover all of Unicode */
73 hdr[UPVEC_PREV_ROW]=start;
74 return pv+start*columns;
75}
76
77U_CAPI uint32_t * U_EXPORT2
78upvec_open(int32_t columns, int32_t maxRows) {
79 uint32_t *pv, *row;
80 int32_t length;
81
82 if(columns<1 || maxRows<1) {
83 return NULL;
84 }
85
86 columns+=2; /* count range start and limit columns */
87 length=UPVEC_HEADER_LENGTH+maxRows*columns;
88 pv=(uint32_t *)uprv_malloc(length*4);
89 if(pv!=NULL) {
90 /* set header */
91 pv[UPVEC_COLUMNS]=(uint32_t)columns;
92 pv[UPVEC_MAXROWS]=(uint32_t)maxRows;
93 pv[UPVEC_ROWS]=1;
94 pv[UPVEC_PREV_ROW]=0;
95
96 /* set initial row */
97 row=pv+UPVEC_HEADER_LENGTH;
98 *row++=0;
99 *row++=0x110000;
100 columns-=2;
101 do {
102 *row++=0;
103 } while(--columns>0);
104 }
105 return pv;
106}
107
108U_CAPI void U_EXPORT2
109upvec_close(uint32_t *pv) {
110 if(pv!=NULL) {
111 uprv_free(pv);
112 }
113}
114
115U_CAPI UBool U_EXPORT2
116upvec_setValue(uint32_t *pv,
117 UChar32 start, UChar32 limit,
118 int32_t column,
119 uint32_t value, uint32_t mask,
120 UErrorCode *pErrorCode) {
121 uint32_t *firstRow, *lastRow;
122 int32_t columns;
123 UBool splitFirstRow, splitLastRow;
124
125 /* argument checking */
126 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
127 return FALSE;
128 }
129
130 if( pv==NULL ||
131 start<0 || start>limit || limit>0x110000 ||
132 column<0 || (uint32_t)(column+1)>=pv[UPVEC_COLUMNS]
133 ) {
134 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
135 return FALSE;
136 }
137 if(start==limit) {
138 /* empty range, nothing to do */
139 return TRUE;
140 }
141
142 /* initialize */
143 columns=(int32_t)pv[UPVEC_COLUMNS];
144 column+=2; /* skip range start and limit columns */
145 value&=mask;
146
147 /* find the rows whose ranges overlap with the input range */
148
149 /* find the first row, always successful */
150 firstRow=_findRow(pv, start);
151
152 /* find the last row, always successful */
153 lastRow=firstRow;
154 while(limit>(UChar32)lastRow[1]) {
155 lastRow+=columns;
156 }
157
158 /*
159 * Rows need to be split if they partially overlap with the
160 * input range (only possible for the first and last rows)
161 * and if their value differs from the input value.
162 */
163 splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask));
164 splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask));
165
166 /* split first/last rows if necessary */
167 if(splitFirstRow || splitLastRow) {
168 int32_t count, rows;
169
170 rows=(int32_t)pv[UPVEC_ROWS];
171 if((rows+splitFirstRow+splitLastRow)>(int32_t)pv[UPVEC_MAXROWS]) {
172 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
173 return FALSE;
174 }
175
176 /* count the number of row cells to move after the last row, and move them */
177 count = (int32_t)((pv+UPVEC_HEADER_LENGTH+rows*columns)-(lastRow+columns));
178 if(count>0) {
179 uprv_memmove(
180 lastRow+(1+splitFirstRow+splitLastRow)*columns,
181 lastRow+columns,
182 count*4);
183 }
184 pv[UPVEC_ROWS]=rows+splitFirstRow+splitLastRow;
185
186 /* split the first row, and move the firstRow pointer to the second part */
187 if(splitFirstRow) {
188 /* copy all affected rows up one and move the lastRow pointer */
189 count = (int32_t)((lastRow-firstRow)+columns);
190 uprv_memmove(firstRow+columns, firstRow, count*4);
191 lastRow+=columns;
192
193 /* split the range and move the firstRow pointer */
194 firstRow[1]=firstRow[columns]=(uint32_t)start;
195 firstRow+=columns;
196 }
197
198 /* split the last row */
199 if(splitLastRow) {
200 /* copy the last row data */
201 uprv_memcpy(lastRow+columns, lastRow, columns*4);
202
203 /* split the range and move the firstRow pointer */
204 lastRow[1]=lastRow[columns]=(uint32_t)limit;
205 }
206 }
207
208 /* set the "row last seen" to the last row for the range */
209 pv[UPVEC_PREV_ROW]=(uint32_t)((lastRow-(pv+UPVEC_HEADER_LENGTH))/columns);
210
211 /* set the input value in all remaining rows */
212 firstRow+=column;
213 lastRow+=column;
214 mask=~mask;
215 for(;;) {
216 *firstRow=(*firstRow&mask)|value;
217 if(firstRow==lastRow) {
218 break;
219 }
220 firstRow+=columns;
221 }
222 return TRUE;
223}
224
225U_CAPI uint32_t U_EXPORT2
226upvec_getValue(uint32_t *pv, UChar32 c, int32_t column) {
227 uint32_t *row;
228
229 if(pv==NULL || c<0 || c>=0x110000) {
230 return 0;
231 }
232 row=_findRow(pv, c);
233 return row[2+column];
234}
235
236U_CAPI uint32_t * U_EXPORT2
237upvec_getRow(uint32_t *pv, int32_t rowIndex,
238 UChar32 *pRangeStart, UChar32 *pRangeLimit) {
239 uint32_t *row;
240 int32_t columns;
241
242 if(pv==NULL || rowIndex<0 || rowIndex>=(int32_t)pv[UPVEC_ROWS]) {
243 return NULL;
244 }
245
246 columns=(int32_t)pv[UPVEC_COLUMNS];
247 row=pv+UPVEC_HEADER_LENGTH+rowIndex*columns;
248 if(pRangeStart!=NULL) {
249 *pRangeStart=row[0];
250 }
251 if(pRangeLimit!=NULL) {
252 *pRangeLimit=row[1];
253 }
254 return row+2;
255}
256
257static int32_t U_CALLCONV
258upvec_compareRows(const void *context, const void *l, const void *r) {
259 const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r;
260 const uint32_t *pv=(const uint32_t *)context;
261 int32_t i, count, columns;
262
263 count=columns=(int32_t)pv[UPVEC_COLUMNS]; /* includes start/limit columns */
264
265 /* start comparing after start/limit but wrap around to them */
266 i=2;
267 do {
268 if(left[i]!=right[i]) {
269 return left[i]<right[i] ? -1 : 1;
270 }
271 if(++i==columns) {
272 i=0;
273 }
274 } while(--count>0);
275
276 return 0;
277}
278
279U_CAPI int32_t U_EXPORT2
280upvec_compact(uint32_t *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode) {
281 uint32_t *row;
282 int32_t columns, valueColumns, rows, count;
283 UChar32 start, limit;
284
285 /* argument checking */
286 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
287 return 0;
288 }
289
290 if(pv==NULL || handler==NULL) {
291 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
292 return 0;
293 }
294
295 row=pv+UPVEC_HEADER_LENGTH;
296 columns=(int32_t)pv[UPVEC_COLUMNS];
297 rows=(int32_t)pv[UPVEC_ROWS];
298
299 if(rows==0) {
300 return 0;
301 }
302
303 /* sort the properties vectors to find unique vector values */
304 if(rows>1) {
305 uprv_sortArray(pv+UPVEC_HEADER_LENGTH, rows, columns*4,
306 upvec_compareRows, pv, FALSE, pErrorCode);
307 }
308 if(U_FAILURE(*pErrorCode)) {
309 return 0;
310 }
311
312 /*
313 * Move vector contents up to a contiguous array with only unique
314 * vector values, and call the handler function for each vector.
315 *
316 * This destroys the Properties Vector structure and replaces it
317 * with an array of just vector values.
318 */
319 valueColumns=columns-2; /* not counting start & limit */
320 count=-valueColumns;
321
322 do {
323 /* fetch these first before memmove() may overwrite them */
324 start=(UChar32)row[0];
325 limit=(UChar32)row[1];
326
327 /* add a new values vector if it is different from the current one */
328 if(count<0 || 0!=uprv_memcmp(row+2, pv+count, valueColumns*4)) {
329 count+=valueColumns;
330 uprv_memmove(pv+count, row+2, valueColumns*4);
331 }
332
333 handler(context, start, limit, count, pv+count, valueColumns, pErrorCode);
334 if(U_FAILURE(*pErrorCode)) {
335 return 0;
336 }
337
338 row+=columns;
339 } while(--rows>0);
340
341 /* count is at the beginning of the last vector, add valueColumns to include that last vector */
342 return count+valueColumns;
343}
344
345U_CAPI void U_CALLCONV
346upvec_compactToTrieHandler(void *context,
347 UChar32 start, UChar32 limit,
348 int32_t rowIndex, uint32_t *row, int32_t columns,
349 UErrorCode *pErrorCode) {
350 if(!utrie_setRange32((UNewTrie *)context, start, limit, (uint32_t)rowIndex, FALSE)) {
351 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
352 }
353}