2 *******************************************************************************
4 * Copyright (C) 2002-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: propsvec.h
10 * tab size: 8 (not used)
13 * created on: 2002feb22
14 * created by: Markus W. Scherer
16 * Store bits (Unicode character properties) in bit set vectors.
19 #ifndef __UPROPSVEC_H__
20 #define __UPROPSVEC_H__
22 #include "unicode/utypes.h"
29 * Unicode Properties Vectors associated with code point ranges.
31 * Rows of uint32_t integers in a contiguous array store
32 * the range limits and the properties vectors.
34 * Logically, each row has a certain number of uint32_t values,
35 * which is set via the upvec_open() "columns" parameter.
37 * Internally, two additional columns are stored.
38 * In each internal row,
39 * row[0] contains the start code point and
40 * row[1] contains the limit code point,
41 * which is the start of the next range.
43 * Initially, there is only one "normal" row for
44 * range [0..0x110000[ with values 0.
45 * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP.
47 * It would be possible to store only one range boundary per row,
48 * but self-contained rows allow to later sort them by contents.
51 typedef struct UPropsVectors UPropsVectors
;
54 * Special pseudo code points for storing the initialValue and the errorValue,
55 * which are used to initialize a UTrie2 or similar.
57 #define UPVEC_FIRST_SPECIAL_CP 0x110000
58 #define UPVEC_INITIAL_VALUE_CP 0x110000
59 #define UPVEC_ERROR_VALUE_CP 0x110001
60 #define UPVEC_MAX_CP 0x110001
63 * Special pseudo code point used in upvec_compact() signalling the end of
64 * delivering special values and the beginning of delivering real ones.
65 * Stable value, unlike UPVEC_MAX_CP which might grow over time.
67 #define UPVEC_START_REAL_VALUES_CP 0x200000
70 * Open a UPropsVectors object.
71 * @param columns Number of value integers (uint32_t) per row.
73 U_CAPI UPropsVectors
* U_EXPORT2
74 upvec_open(int32_t columns
, UErrorCode
*pErrorCode
);
77 upvec_close(UPropsVectors
*pv
);
80 * In rows for code points [start..end], select the column,
81 * reset the mask bits and set the value bits (ANDed with the mask).
83 * Will set U_NO_WRITE_PERMISSION if called after upvec_compact().
86 upvec_setValue(UPropsVectors
*pv
,
87 UChar32 start
, UChar32 end
,
89 uint32_t value
, uint32_t mask
,
90 UErrorCode
*pErrorCode
);
93 * Logically const but must not be used on the same pv concurrently!
94 * Always returns 0 if called after upvec_compact().
96 U_CAPI
uint32_t U_EXPORT2
97 upvec_getValue(const UPropsVectors
*pv
, UChar32 c
, int32_t column
);
100 * pRangeStart and pRangeEnd can be NULL.
101 * @return NULL if rowIndex out of range and for illegal arguments,
102 * or if called after upvec_compact()
104 U_CAPI
uint32_t * U_EXPORT2
105 upvec_getRow(const UPropsVectors
*pv
, int32_t rowIndex
,
106 UChar32
*pRangeStart
, UChar32
*pRangeEnd
);
109 * Compact the vectors:
110 * - modify the memory
111 * - keep only unique vectors
112 * - store them contiguously from the beginning of the memory
113 * - for each (non-unique) row, call the handler function
115 * The handler's rowIndex is the index of the row in the compacted
117 * (Therefore, it starts at 0 increases in increments of the columns value.)
119 * In a first phase, only special values are delivered (each exactly once),
120 * with start==end both equalling a special pseudo code point.
121 * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP
122 * where rowIndex is the length of the compacted array,
123 * and the row is arbitrary (but not NULL).
124 * Then, in the second phase, the handler is called for each row of real values.
126 typedef void U_CALLCONV
127 UPVecCompactHandler(void *context
,
128 UChar32 start
, UChar32 end
,
129 int32_t rowIndex
, uint32_t *row
, int32_t columns
,
130 UErrorCode
*pErrorCode
);
132 U_CAPI
void U_EXPORT2
133 upvec_compact(UPropsVectors
*pv
, UPVecCompactHandler
*handler
, void *context
, UErrorCode
*pErrorCode
);
136 * Get the vectors array after calling upvec_compact().
137 * The caller must not modify nor release the returned array.
138 * Returns NULL if called before upvec_compact().
140 U_CAPI
const uint32_t * U_EXPORT2
141 upvec_getArray(const UPropsVectors
*pv
, int32_t *pRows
, int32_t *pColumns
);
144 * Get a clone of the vectors array after calling upvec_compact().
145 * The caller owns the returned array and must uprv_free() it.
146 * Returns NULL if called before upvec_compact().
148 U_CAPI
uint32_t * U_EXPORT2
149 upvec_cloneArray(const UPropsVectors
*pv
,
150 int32_t *pRows
, int32_t *pColumns
, UErrorCode
*pErrorCode
);
153 * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted
154 * vectors array, and freeze the trie.
156 U_CAPI UTrie2
* U_EXPORT2
157 upvec_compactToUTrie2WithRowIndexes(UPropsVectors
*pv
, UErrorCode
*pErrorCode
);
159 struct UPVecToUTrie2Context
{
161 int32_t initialValue
;
165 typedef struct UPVecToUTrie2Context UPVecToUTrie2Context
;
167 /* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */
168 U_CAPI
void U_CALLCONV
169 upvec_compactToUTrie2Handler(void *context
,
170 UChar32 start
, UChar32 end
,
171 int32_t rowIndex
, uint32_t *row
, int32_t columns
,
172 UErrorCode
*pErrorCode
);