2 ******************************************************************************* 
   4 *   Copyright (C) 2002-2011, International Business Machines 
   5 *   Corporation and others.  All Rights Reserved. 
   7 ******************************************************************************* 
   8 *   file name:  propsvec.c 
  10 *   tab size:   8 (not used) 
  13 *   created on: 2002feb22 
  14 *   created by: Markus W. Scherer 
  16 *   Store bits (Unicode character properties) in bit set vectors. 
  20 #include "unicode/utypes.h" 
  28 struct UPropsVectors 
{ 
  30     int32_t columns
;  /* number of columns, plus two for start & limit values */ 
  33     int32_t prevRow
;  /* search optimization: remember last row seen */ 
  37 #define UPVEC_INITIAL_ROWS (1<<12) 
  38 #define UPVEC_MEDIUM_ROWS ((int32_t)1<<16) 
  39 #define UPVEC_MAX_ROWS (UPVEC_MAX_CP+1) 
  41 U_CAPI UPropsVectors 
* U_EXPORT2
 
  42 upvec_open(int32_t columns
, UErrorCode 
*pErrorCode
) { 
  47     if(U_FAILURE(*pErrorCode
)) { 
  51         *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
  54     columns
+=2; /* count range start and limit columns */ 
  56     pv
=(UPropsVectors 
*)uprv_malloc(sizeof(UPropsVectors
)); 
  57     v
=(uint32_t *)uprv_malloc(UPVEC_INITIAL_ROWS
*columns
*4); 
  58     if(pv
==NULL 
|| v
==NULL
) { 
  61         *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
; 
  64     uprv_memset(pv
, 0, sizeof(UPropsVectors
)); 
  67     pv
->maxRows
=UPVEC_INITIAL_ROWS
; 
  68     pv
->rows
=2+(UPVEC_MAX_CP
-UPVEC_FIRST_SPECIAL_CP
); 
  70     /* set the all-Unicode row and the special-value rows */ 
  72     uprv_memset(row
, 0, pv
->rows
*columns
*4); 
  76     for(cp
=UPVEC_FIRST_SPECIAL_CP
; cp
<=UPVEC_MAX_CP
; ++cp
) { 
  85 upvec_close(UPropsVectors 
*pv
) { 
  93 _findRow(UPropsVectors 
*pv
, UChar32 rangeStart
) { 
  95     int32_t columns
, i
, start
, limit
, prevRow
; 
 101     /* check the vicinity of the last-seen row (start searching with an unrolled loop) */ 
 102     row
=pv
->v
+prevRow
*columns
; 
 103     if(rangeStart
>=(UChar32
)row
[0]) { 
 104         if(rangeStart
<(UChar32
)row
[1]) { 
 105             /* same row as last seen */ 
 107         } else if(rangeStart
<(UChar32
)(row
+=columns
)[1]) { 
 108             /* next row after the last one */ 
 109             pv
->prevRow
=prevRow
+1; 
 111         } else if(rangeStart
<(UChar32
)(row
+=columns
)[1]) { 
 112             /* second row after the last one */ 
 113             pv
->prevRow
=prevRow
+2; 
 115         } else if((rangeStart
-(UChar32
)row
[1])<10) { 
 116             /* we are close, continue looping */ 
 121             } while(rangeStart
>=(UChar32
)row
[1]); 
 125     } else if(rangeStart
<(UChar32
)pv
->v
[1]) { 
 126         /* the very first row */ 
 131     /* do a binary search for the start of the range */ 
 133     while(start
<limit
-1) { 
 136         if(rangeStart
<(UChar32
)row
[0]) { 
 138         } else if(rangeStart
<(UChar32
)row
[1]) { 
 146     /* must be found because all ranges together always cover all of Unicode */ 
 148     return pv
->v
+start
*columns
; 
 151 U_CAPI 
void U_EXPORT2
 
 152 upvec_setValue(UPropsVectors 
*pv
, 
 153                UChar32 start
, UChar32 end
, 
 155                uint32_t value
, uint32_t mask
, 
 156                UErrorCode 
*pErrorCode
) { 
 157     uint32_t *firstRow
, *lastRow
; 
 160     UBool splitFirstRow
, splitLastRow
; 
 162     /* argument checking */ 
 163     if(U_FAILURE(*pErrorCode
)) { 
 167         start
<0 || start
>end 
|| end
>UPVEC_MAX_CP 
|| 
 168         column
<0 || column
>=(pv
->columns
-2) 
 170         *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 173     if(pv
->isCompacted
) { 
 174         *pErrorCode
=U_NO_WRITE_PERMISSION
; 
 181     column
+=2; /* skip range start and limit columns */ 
 184     /* find the rows whose ranges overlap with the input range */ 
 186     /* find the first and last rows, always successful */ 
 187     firstRow
=_findRow(pv
, start
); 
 188     lastRow
=_findRow(pv
, end
); 
 191      * Rows need to be split if they partially overlap with the 
 192      * input range (only possible for the first and last rows) 
 193      * and if their value differs from the input value. 
 195     splitFirstRow
= (UBool
)(start
!=(UChar32
)firstRow
[0] && value
!=(firstRow
[column
]&mask
)); 
 196     splitLastRow
= (UBool
)(limit
!=(UChar32
)lastRow
[1] && value
!=(lastRow
[column
]&mask
)); 
 198     /* split first/last rows if necessary */ 
 199     if(splitFirstRow 
|| splitLastRow
) { 
 203         if((rows
+splitFirstRow
+splitLastRow
)>pv
->maxRows
) { 
 204             uint32_t *newVectors
; 
 207             if(pv
->maxRows
<UPVEC_MEDIUM_ROWS
) { 
 208                 newMaxRows
=UPVEC_MEDIUM_ROWS
; 
 209             } else if(pv
->maxRows
<UPVEC_MAX_ROWS
) { 
 210                 newMaxRows
=UPVEC_MAX_ROWS
; 
 212                 /* Implementation bug, or UPVEC_MAX_ROWS too low. */ 
 213                 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
; 
 216             newVectors
=(uint32_t *)uprv_malloc(newMaxRows
*columns
*4); 
 217             if(newVectors
==NULL
) { 
 218                 *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
; 
 221             uprv_memcpy(newVectors
, pv
->v
, rows
*columns
*4); 
 222             firstRow
=newVectors
+(firstRow
-pv
->v
); 
 223             lastRow
=newVectors
+(lastRow
-pv
->v
); 
 226             pv
->maxRows
=newMaxRows
; 
 229         /* count the number of row cells to move after the last row, and move them */ 
 230         count 
= (int32_t)((pv
->v
+rows
*columns
)-(lastRow
+columns
)); 
 233                 lastRow
+(1+splitFirstRow
+splitLastRow
)*columns
, 
 237         pv
->rows
=rows
+splitFirstRow
+splitLastRow
; 
 239         /* split the first row, and move the firstRow pointer to the second part */ 
 241             /* copy all affected rows up one and move the lastRow pointer */ 
 242             count 
= (int32_t)((lastRow
-firstRow
)+columns
); 
 243             uprv_memmove(firstRow
+columns
, firstRow
, count
*4); 
 246             /* split the range and move the firstRow pointer */ 
 247             firstRow
[1]=firstRow
[columns
]=(uint32_t)start
; 
 251         /* split the last row */ 
 253             /* copy the last row data */ 
 254             uprv_memcpy(lastRow
+columns
, lastRow
, columns
*4); 
 256             /* split the range and move the firstRow pointer */ 
 257             lastRow
[1]=lastRow
[columns
]=(uint32_t)limit
; 
 261     /* set the "row last seen" to the last row for the range */ 
 262     pv
->prevRow
=(int32_t)((lastRow
-(pv
->v
))/columns
); 
 264     /* set the input value in all remaining rows */ 
 269         *firstRow
=(*firstRow
&mask
)|value
; 
 270         if(firstRow
==lastRow
) { 
 277 U_CAPI 
uint32_t U_EXPORT2
 
 278 upvec_getValue(const UPropsVectors 
*pv
, UChar32 c
, int32_t column
) { 
 282     if(pv
->isCompacted 
|| c
<0 || c
>UPVEC_MAX_CP 
|| column
<0 || column
>=(pv
->columns
-2)) { 
 285     ncpv
=(UPropsVectors 
*)pv
; 
 286     row
=_findRow(ncpv
, c
); 
 287     return row
[2+column
]; 
 290 U_CAPI 
uint32_t * U_EXPORT2
 
 291 upvec_getRow(const UPropsVectors 
*pv
, int32_t rowIndex
, 
 292              UChar32 
*pRangeStart
, UChar32 
*pRangeEnd
) { 
 296     if(pv
->isCompacted 
|| rowIndex
<0 || rowIndex
>=pv
->rows
) { 
 301     row
=pv
->v
+rowIndex
*columns
; 
 302     if(pRangeStart
!=NULL
) { 
 303         *pRangeStart
=(UChar32
)row
[0]; 
 305     if(pRangeEnd
!=NULL
) { 
 306         *pRangeEnd
=(UChar32
)row
[1]-1; 
 311 static int32_t U_CALLCONV
 
 312 upvec_compareRows(const void *context
, const void *l
, const void *r
) { 
 313     const uint32_t *left
=(const uint32_t *)l
, *right
=(const uint32_t *)r
; 
 314     const UPropsVectors 
*pv
=(const UPropsVectors 
*)context
; 
 315     int32_t i
, count
, columns
; 
 317     count
=columns
=pv
->columns
; /* includes start/limit columns */ 
 319     /* start comparing after start/limit but wrap around to them */ 
 322         if(left
[i
]!=right
[i
]) { 
 323             return left
[i
]<right
[i
] ? -1 : 1; 
 333 U_CAPI 
void U_EXPORT2
 
 334 upvec_compact(UPropsVectors 
*pv
, UPVecCompactHandler 
*handler
, void *context
, UErrorCode 
*pErrorCode
) { 
 336     int32_t i
, columns
, valueColumns
, rows
, count
; 
 337     UChar32 start
, limit
; 
 339     /* argument checking */ 
 340     if(U_FAILURE(*pErrorCode
)) { 
 344         *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 347     if(pv
->isCompacted
) { 
 351     /* Set the flag now: Sorting and compacting destroys the builder data structure. */ 
 352     pv
->isCompacted
=TRUE
; 
 356     U_ASSERT(columns
>=3); /* upvec_open asserts this */ 
 357     valueColumns
=columns
-2; /* not counting start & limit */ 
 359     /* sort the properties vectors to find unique vector values */ 
 360     uprv_sortArray(pv
->v
, rows
, columns
*4, 
 361                    upvec_compareRows
, pv
, FALSE
, pErrorCode
); 
 362     if(U_FAILURE(*pErrorCode
)) { 
 367      * Find and set the special values. 
 368      * This has to do almost the same work as the compaction below, 
 369      * to find the indexes where the special-value rows will move. 
 373     for(i
=0; i
<rows
; ++i
) { 
 374         start
=(UChar32
)row
[0]; 
 376         /* count a new values vector if it is different from the current one */ 
 377         if(count
<0 || 0!=uprv_memcmp(row
+2, row
-valueColumns
, valueColumns
*4)) { 
 381         if(start
>=UPVEC_FIRST_SPECIAL_CP
) { 
 382             handler(context
, start
, start
, count
, row
+2, valueColumns
, pErrorCode
); 
 383             if(U_FAILURE(*pErrorCode
)) { 
 391     /* count is at the beginning of the last vector, add valueColumns to include that last vector */ 
 394     /* Call the handler once more to signal the start of delivering real values. */ 
 395     handler(context
, UPVEC_START_REAL_VALUES_CP
, UPVEC_START_REAL_VALUES_CP
, 
 396             count
, row
-valueColumns
, valueColumns
, pErrorCode
); 
 397     if(U_FAILURE(*pErrorCode
)) { 
 402      * Move vector contents up to a contiguous array with only unique 
 403      * vector values, and call the handler function for each vector. 
 405      * This destroys the Properties Vector structure and replaces it 
 406      * with an array of just vector values. 
 410     for(i
=0; i
<rows
; ++i
) { 
 411         /* fetch these first before memmove() may overwrite them */ 
 412         start
=(UChar32
)row
[0]; 
 413         limit
=(UChar32
)row
[1]; 
 415         /* add a new values vector if it is different from the current one */ 
 416         if(count
<0 || 0!=uprv_memcmp(row
+2, pv
->v
+count
, valueColumns
*4)) { 
 418             uprv_memmove(pv
->v
+count
, row
+2, valueColumns
*4); 
 421         if(start
<UPVEC_FIRST_SPECIAL_CP
) { 
 422             handler(context
, start
, limit
-1, count
, pv
->v
+count
, valueColumns
, pErrorCode
); 
 423             if(U_FAILURE(*pErrorCode
)) { 
 431     /* count is at the beginning of the last vector, add one to include that last vector */ 
 432     pv
->rows
=count
/valueColumns
+1; 
 435 U_CAPI 
const uint32_t * U_EXPORT2
 
 436 upvec_getArray(const UPropsVectors 
*pv
, int32_t *pRows
, int32_t *pColumns
) { 
 437     if(!pv
->isCompacted
) { 
 444         *pColumns
=pv
->columns
-2; 
 449 U_CAPI 
uint32_t * U_EXPORT2
 
 450 upvec_cloneArray(const UPropsVectors 
*pv
, 
 451                  int32_t *pRows
, int32_t *pColumns
, UErrorCode 
*pErrorCode
) { 
 452     uint32_t *clonedArray
; 
 455     if(U_FAILURE(*pErrorCode
)) { 
 458     if(!pv
->isCompacted
) { 
 459         *pErrorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 462     byteLength
=pv
->rows
*(pv
->columns
-2)*4; 
 463     clonedArray
=(uint32_t *)uprv_malloc(byteLength
); 
 464     if(clonedArray
==NULL
) { 
 465         *pErrorCode
=U_MEMORY_ALLOCATION_ERROR
; 
 468     uprv_memcpy(clonedArray
, pv
->v
, byteLength
); 
 473         *pColumns
=pv
->columns
-2; 
 478 U_CAPI UTrie2 
* U_EXPORT2
 
 479 upvec_compactToUTrie2WithRowIndexes(UPropsVectors 
*pv
, UErrorCode 
*pErrorCode
) { 
 480     UPVecToUTrie2Context toUTrie2
={ NULL 
}; 
 481     upvec_compact(pv
, upvec_compactToUTrie2Handler
, &toUTrie2
, pErrorCode
); 
 482     utrie2_freeze(toUTrie2
.trie
, UTRIE2_16_VALUE_BITS
, pErrorCode
); 
 483     if(U_FAILURE(*pErrorCode
)) { 
 484         utrie2_close(toUTrie2
.trie
); 
 487     return toUTrie2
.trie
; 
 491  * TODO(markus): Add upvec_16BitsToUTrie2() function that enumerates all rows, extracts 
 492  * some 16-bit field and builds and returns a UTrie2. 
 495 U_CAPI 
void U_CALLCONV
 
 496 upvec_compactToUTrie2Handler(void *context
, 
 497                              UChar32 start
, UChar32 end
, 
 498                              int32_t rowIndex
, uint32_t *row
, int32_t columns
, 
 499                              UErrorCode 
*pErrorCode
) { 
 500     UPVecToUTrie2Context 
*toUTrie2
=(UPVecToUTrie2Context 
*)context
; 
 501     if(start
<UPVEC_FIRST_SPECIAL_CP
) { 
 502         utrie2_setRange32(toUTrie2
->trie
, start
, end
, (uint32_t)rowIndex
, TRUE
, pErrorCode
); 
 505         case UPVEC_INITIAL_VALUE_CP
: 
 506             toUTrie2
->initialValue
=rowIndex
; 
 508         case UPVEC_ERROR_VALUE_CP
: 
 509             toUTrie2
->errorValue
=rowIndex
; 
 511         case UPVEC_START_REAL_VALUES_CP
: 
 512             toUTrie2
->maxValue
=rowIndex
; 
 513             if(rowIndex
>0xffff) { 
 514                 /* too many rows for a 16-bit trie */ 
 515                 *pErrorCode
=U_INDEX_OUTOFBOUNDS_ERROR
; 
 517                 toUTrie2
->trie
=utrie2_open(toUTrie2
->initialValue
, 
 518                                            toUTrie2
->errorValue
, pErrorCode
);