]>
Commit | Line | Data |
---|---|---|
729e4ab9 A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
4 | * Copyright (C) 2002-2010, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: propsvec.h | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2002feb22 | |
14 | * created by: Markus W. Scherer | |
15 | * | |
16 | * Store bits (Unicode character properties) in bit set vectors. | |
17 | */ | |
18 | ||
19 | #ifndef __UPROPSVEC_H__ | |
20 | #define __UPROPSVEC_H__ | |
21 | ||
22 | #include "unicode/utypes.h" | |
23 | #include "utrie.h" | |
24 | #include "utrie2.h" | |
25 | ||
26 | U_CDECL_BEGIN | |
27 | ||
28 | /** | |
29 | * Unicode Properties Vectors associated with code point ranges. | |
30 | * | |
31 | * Rows of uint32_t integers in a contiguous array store | |
32 | * the range limits and the properties vectors. | |
33 | * | |
34 | * Logically, each row has a certain number of uint32_t values, | |
35 | * which is set via the upvec_open() "columns" parameter. | |
36 | * | |
37 | * Internally, two additional columns are stored. | |
38 | * In each internal row, | |
39 | * row[0] contains the start code point and | |
40 | * row[1] contains the limit code point, | |
41 | * which is the start of the next range. | |
42 | * | |
43 | * Initially, there is only one "normal" row for | |
44 | * range [0..0x110000[ with values 0. | |
45 | * There are additional rows for special purposes, see UPVEC_FIRST_SPECIAL_CP. | |
46 | * | |
47 | * It would be possible to store only one range boundary per row, | |
48 | * but self-contained rows allow to later sort them by contents. | |
49 | */ | |
50 | struct UPropsVectors; | |
51 | typedef struct UPropsVectors UPropsVectors; | |
52 | ||
53 | /* | |
54 | * Special pseudo code points for storing the initialValue and the errorValue, | |
55 | * which are used to initialize a UTrie2 or similar. | |
56 | */ | |
57 | #define UPVEC_FIRST_SPECIAL_CP 0x110000 | |
58 | #define UPVEC_INITIAL_VALUE_CP 0x110000 | |
59 | #define UPVEC_ERROR_VALUE_CP 0x110001 | |
60 | #define UPVEC_MAX_CP 0x110001 | |
61 | ||
62 | /* | |
63 | * Special pseudo code point used in upvec_compact() signalling the end of | |
64 | * delivering special values and the beginning of delivering real ones. | |
65 | * Stable value, unlike UPVEC_MAX_CP which might grow over time. | |
66 | */ | |
67 | #define UPVEC_START_REAL_VALUES_CP 0x200000 | |
68 | ||
69 | /* | |
70 | * Open a UPropsVectors object. | |
71 | * @param columns Number of value integers (uint32_t) per row. | |
72 | */ | |
73 | U_CAPI UPropsVectors * U_EXPORT2 | |
74 | upvec_open(int32_t columns, UErrorCode *pErrorCode); | |
75 | ||
76 | U_CAPI void U_EXPORT2 | |
77 | upvec_close(UPropsVectors *pv); | |
78 | ||
79 | /* | |
80 | * In rows for code points [start..end], select the column, | |
81 | * reset the mask bits and set the value bits (ANDed with the mask). | |
82 | * | |
83 | * Will set U_NO_WRITE_PERMISSION if called after upvec_compact(). | |
84 | */ | |
85 | U_CAPI void U_EXPORT2 | |
86 | upvec_setValue(UPropsVectors *pv, | |
87 | UChar32 start, UChar32 end, | |
88 | int32_t column, | |
89 | uint32_t value, uint32_t mask, | |
90 | UErrorCode *pErrorCode); | |
91 | ||
92 | /* | |
93 | * Logically const but must not be used on the same pv concurrently! | |
94 | * Always returns 0 if called after upvec_compact(). | |
95 | */ | |
96 | U_CAPI uint32_t U_EXPORT2 | |
97 | upvec_getValue(const UPropsVectors *pv, UChar32 c, int32_t column); | |
98 | ||
99 | /* | |
100 | * pRangeStart and pRangeEnd can be NULL. | |
101 | * @return NULL if rowIndex out of range and for illegal arguments, | |
102 | * or if called after upvec_compact() | |
103 | */ | |
104 | U_CAPI uint32_t * U_EXPORT2 | |
105 | upvec_getRow(const UPropsVectors *pv, int32_t rowIndex, | |
106 | UChar32 *pRangeStart, UChar32 *pRangeEnd); | |
107 | ||
108 | /* | |
109 | * Compact the vectors: | |
110 | * - modify the memory | |
111 | * - keep only unique vectors | |
112 | * - store them contiguously from the beginning of the memory | |
113 | * - for each (non-unique) row, call the handler function | |
114 | * | |
115 | * The handler's rowIndex is the index of the row in the compacted | |
116 | * memory block. | |
117 | * (Therefore, it starts at 0 increases in increments of the columns value.) | |
118 | * | |
119 | * In a first phase, only special values are delivered (each exactly once), | |
120 | * with start==end both equalling a special pseudo code point. | |
121 | * Then the handler is called once more with start==end==UPVEC_START_REAL_VALUES_CP | |
122 | * where rowIndex is the length of the compacted array, | |
123 | * and the row is arbitrary (but not NULL). | |
124 | * Then, in the second phase, the handler is called for each row of real values. | |
125 | */ | |
126 | typedef void U_CALLCONV | |
127 | UPVecCompactHandler(void *context, | |
128 | UChar32 start, UChar32 end, | |
129 | int32_t rowIndex, uint32_t *row, int32_t columns, | |
130 | UErrorCode *pErrorCode); | |
131 | ||
132 | U_CAPI void U_EXPORT2 | |
133 | upvec_compact(UPropsVectors *pv, UPVecCompactHandler *handler, void *context, UErrorCode *pErrorCode); | |
134 | ||
135 | /* | |
136 | * Get the vectors array after calling upvec_compact(). | |
137 | * The caller must not modify nor release the returned array. | |
138 | * Returns NULL if called before upvec_compact(). | |
139 | */ | |
140 | U_CAPI const uint32_t * U_EXPORT2 | |
141 | upvec_getArray(const UPropsVectors *pv, int32_t *pRows, int32_t *pColumns); | |
142 | ||
143 | /* | |
144 | * Get a clone of the vectors array after calling upvec_compact(). | |
145 | * The caller owns the returned array and must uprv_free() it. | |
146 | * Returns NULL if called before upvec_compact(). | |
147 | */ | |
148 | U_CAPI uint32_t * U_EXPORT2 | |
149 | upvec_cloneArray(const UPropsVectors *pv, | |
150 | int32_t *pRows, int32_t *pColumns, UErrorCode *pErrorCode); | |
151 | ||
152 | /* | |
153 | * Call upvec_compact(), create a 16-bit UTrie2 with indexes into the compacted | |
154 | * vectors array, and freeze the trie. | |
155 | */ | |
156 | U_CAPI UTrie2 * U_EXPORT2 | |
157 | upvec_compactToUTrie2WithRowIndexes(UPropsVectors *pv, UErrorCode *pErrorCode); | |
158 | ||
159 | struct UPVecToUTrie2Context { | |
160 | UTrie2 *trie; | |
161 | int32_t initialValue; | |
162 | int32_t errorValue; | |
163 | int32_t maxValue; | |
164 | }; | |
165 | typedef struct UPVecToUTrie2Context UPVecToUTrie2Context; | |
166 | ||
167 | /* context=UPVecToUTrie2Context, creates the trie and stores the rowIndex values */ | |
168 | U_CAPI void U_CALLCONV | |
169 | upvec_compactToUTrie2Handler(void *context, | |
170 | UChar32 start, UChar32 end, | |
171 | int32_t rowIndex, uint32_t *row, int32_t columns, | |
172 | UErrorCode *pErrorCode); | |
173 | ||
174 | U_CDECL_END | |
175 | ||
176 | #endif |