X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..374ca955a76ecab1204ca8bfa63ff9238d998416:/icuSources/tools/toolutil/propsvec.c diff --git a/icuSources/tools/toolutil/propsvec.c b/icuSources/tools/toolutil/propsvec.c new file mode 100644 index 00000000..84fb3311 --- /dev/null +++ b/icuSources/tools/toolutil/propsvec.c @@ -0,0 +1,333 @@ +/* +******************************************************************************* +* +* Copyright (C) 2002-2004, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: propsvec.c +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002feb22 +* created by: Markus W. Scherer +* +* Store additional Unicode character properties in bit set vectors. +*/ + +#include +#include "unicode/utypes.h" +#include "cmemory.h" +#include "utrie.h" +#include "uarrsort.h" +#include "propsvec.h" + +static uint32_t * +_findRow(uint32_t *pv, UChar32 rangeStart) { + uint32_t *row; + int32_t *hdr; + int32_t columns, i, start, limit, prevRow, rows; + + hdr=(int32_t *)pv; + columns=hdr[UPVEC_COLUMNS]; + limit=hdr[UPVEC_ROWS]; + prevRow=hdr[UPVEC_PREV_ROW]; + rows=hdr[UPVEC_ROWS]; + pv+=UPVEC_HEADER_LENGTH; + + /* check the vicinity of the last-seen row */ + if(prevRow=(UChar32)row[0]) { + if(rangeStart<(UChar32)row[1]) { + /* same row as last seen */ + return row; + } else if( + ++prevRow=(UChar32)(row+=columns)[0] && rangeStart<(UChar32)row[1] + ) { + /* next row after the last one */ + hdr[UPVEC_PREV_ROW]=prevRow; + return row; + } + } + } + + /* do a binary search for the start of the range */ + start=0; + while(start0); + } + return pv; +} + +U_CAPI void U_EXPORT2 +upvec_close(uint32_t *pv) { + if(pv!=NULL) { + uprv_free(pv); + } +} + +U_CAPI UBool U_EXPORT2 +upvec_setValue(uint32_t *pv, + UChar32 start, UChar32 limit, + int32_t column, + uint32_t value, uint32_t mask, + UErrorCode *pErrorCode) { + uint32_t *firstRow, *lastRow; + int32_t columns; + UBool splitFirstRow, splitLastRow; + + /* argument checking */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return FALSE; + } + + if( pv==NULL || + start<0 || start>limit || limit>0x110000 || + column<0 || (uint32_t)(column+1)>=pv[UPVEC_COLUMNS] + ) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + if(start==limit) { + /* empty range, nothing to do */ + return TRUE; + } + + /* initialize */ + columns=(int32_t)pv[UPVEC_COLUMNS]; + column+=2; /* skip range start and limit columns */ + value&=mask; + + /* find the rows whose ranges overlap with the input range */ + + /* find the first row, always successful */ + firstRow=_findRow(pv, start); + + /* find the last row, always successful */ + lastRow=firstRow; + while(limit>(UChar32)lastRow[1]) { + lastRow+=columns; + } + + /* + * Rows need to be split if they partially overlap with the + * input range (only possible for the first and last rows) + * and if their value differs from the input value. + */ + splitFirstRow= (UBool)(start!=(UChar32)firstRow[0] && value!=(firstRow[column]&mask)); + splitLastRow= (UBool)(limit!=(UChar32)lastRow[1] && value!=(lastRow[column]&mask)); + + /* split first/last rows if necessary */ + if(splitFirstRow || splitLastRow) { + int32_t count, rows; + + rows=(int32_t)pv[UPVEC_ROWS]; + if((rows+splitFirstRow+splitLastRow)>(int32_t)pv[UPVEC_MAXROWS]) { + *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; + return FALSE; + } + + /* count the number of row cells to move after the last row, and move them */ + count = (int32_t)((pv+UPVEC_HEADER_LENGTH+rows*columns)-(lastRow+columns)); + if(count>0) { + uprv_memmove( + lastRow+(1+splitFirstRow+splitLastRow)*columns, + lastRow+columns, + count*4); + } + pv[UPVEC_ROWS]=rows+splitFirstRow+splitLastRow; + + /* split the first row, and move the firstRow pointer to the second part */ + if(splitFirstRow) { + /* copy all affected rows up one and move the lastRow pointer */ + count = (int32_t)((lastRow-firstRow)+columns); + uprv_memmove(firstRow+columns, firstRow, count*4); + lastRow+=columns; + + /* split the range and move the firstRow pointer */ + firstRow[1]=firstRow[columns]=(uint32_t)start; + firstRow+=columns; + } + + /* split the last row */ + if(splitLastRow) { + /* copy the last row data */ + uprv_memcpy(lastRow+columns, lastRow, columns*4); + + /* split the range and move the firstRow pointer */ + lastRow[1]=lastRow[columns]=(uint32_t)limit; + } + } + + /* set the "row last seen" to the last row for the range */ + pv[UPVEC_PREV_ROW]=(uint32_t)((lastRow-(pv+UPVEC_HEADER_LENGTH))/columns); + + /* set the input value in all remaining rows */ + firstRow+=column; + lastRow+=column; + mask=~mask; + for(;;) { + *firstRow=(*firstRow&mask)|value; + if(firstRow==lastRow) { + break; + } + firstRow+=columns; + } + return TRUE; +} + +U_CAPI uint32_t U_EXPORT2 +upvec_getValue(uint32_t *pv, UChar32 c, int32_t column) { + uint32_t *row; + + if(pv==NULL || c<0 || c>=0x110000) { + return 0; + } + row=_findRow(pv, c); + return row[2+column]; +} + +U_CAPI uint32_t * U_EXPORT2 +upvec_getRow(uint32_t *pv, int32_t rowIndex, + UChar32 *pRangeStart, UChar32 *pRangeLimit) { + uint32_t *row; + int32_t columns; + + if(pv==NULL || rowIndex<0 || rowIndex>=(int32_t)pv[UPVEC_ROWS]) { + return NULL; + } + + columns=(int32_t)pv[UPVEC_COLUMNS]; + row=pv+UPVEC_HEADER_LENGTH+rowIndex*columns; + if(pRangeStart!=NULL) { + *pRangeStart=row[0]; + } + if(pRangeLimit!=NULL) { + *pRangeLimit=row[1]; + } + return row+2; +} + +static int32_t U_CALLCONV +upvec_compareRows(const void *context, const void *l, const void *r) { + const uint32_t *left=(const uint32_t *)l, *right=(const uint32_t *)r; + const uint32_t *pv=(const uint32_t *)context; + int32_t i, count, columns; + + count=columns=(int32_t)pv[UPVEC_COLUMNS]; /* includes start/limit columns */ + + /* start comparing after start/limit but wrap around to them */ + i=2; + do { + if(left[i]!=right[i]) { + return left[i]0); + + return 0; +} + +U_CAPI int32_t U_EXPORT2 +upvec_toTrie(uint32_t *pv, UNewTrie *trie, UErrorCode *pErrorCode) { + uint32_t *row; + int32_t columns, valueColumns, rows, count; + + /* argument checking */ + if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { + return 0; + } + + if(pv==NULL || trie==NULL) { + *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + row=pv+UPVEC_HEADER_LENGTH; + columns=(int32_t)pv[UPVEC_COLUMNS]; + rows=(int32_t)pv[UPVEC_ROWS]; + + /* sort the properties vectors to find unique vector values */ + if(rows>1) { + uprv_sortArray(pv+UPVEC_HEADER_LENGTH, rows, columns*4, + upvec_compareRows, pv, FALSE, pErrorCode); + } + if(U_FAILURE(*pErrorCode)) { + return 0; + } + + /* + * Move vector contents up to a contiguous array with only unique + * vector values, and set indexes to those values into the trie. + * + * This destroys the Properties Vector structure and replaces it + * with an array of just vector values. + */ + valueColumns=columns-2; /* not counting start & limit */ + count=-valueColumns; + + do { + /* add a new values vector if it is different from the current one */ + if(count<0 || 0!=uprv_memcmp(row+2, pv+count, valueColumns*4)) { + count+=valueColumns; + uprv_memmove(pv+count, row+2, valueColumns*4); + } + + if(count>0 && !utrie_setRange32(trie, (UChar32)row[0], (UChar32)row[1], (uint32_t)count, FALSE)) { + *pErrorCode=U_BUFFER_OVERFLOW_ERROR; + return 0; + } + + row+=columns; + } while(--rows>0); + + return count+valueColumns; +}