]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
4388f060 | 4 | * Copyright (C) 2001-2011, International Business Machines |
b75a7d8f A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: ucol_tok.h | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created 02/22/2001 | |
14 | * created by: Vladimir Weinstein | |
15 | * | |
16 | * This module reads a tailoring rule string and produces a list of | |
17 | * tokens that will be turned into collation elements | |
18 | * | |
19 | */ | |
20 | ||
21 | #ifndef UCOL_TOKENS_H | |
22 | #define UCOL_TOKENS_H | |
23 | ||
24 | #include "unicode/utypes.h" | |
25 | #include "unicode/uset.h" | |
26 | ||
27 | #if !UCONFIG_NO_COLLATION | |
28 | ||
29 | #include "ucol_imp.h" | |
30 | #include "uhash.h" | |
31 | #include "unicode/parseerr.h" | |
32 | ||
33 | #define UCOL_TOK_UNSET 0xFFFFFFFF | |
34 | #define UCOL_TOK_RESET 0xDEADBEEF | |
35 | ||
36 | #define UCOL_TOK_POLARITY_NEGATIVE 0 | |
37 | #define UCOL_TOK_POLARITY_POSITIVE 1 | |
38 | ||
39 | #define UCOL_TOK_TOP 0x04 | |
40 | #define UCOL_TOK_VARIABLE_TOP 0x08 | |
41 | #define UCOL_TOK_BEFORE 0x03 | |
42 | #define UCOL_TOK_SUCCESS 0x10 | |
43 | ||
44 | /* this is space for the extra strings that need to be unquoted */ | |
45 | /* during the parsing of the rules */ | |
73c04bcf | 46 | #define UCOL_TOK_EXTRA_RULE_SPACE_SIZE 4096 |
b75a7d8f A |
47 | typedef struct UColToken UColToken; |
48 | ||
49 | typedef struct { | |
50 | UColToken* first; | |
51 | UColToken* last; | |
52 | UColToken* reset; | |
53 | UBool indirect; | |
54 | uint32_t baseCE; | |
55 | uint32_t baseContCE; | |
56 | uint32_t nextCE; | |
57 | uint32_t nextContCE; | |
58 | uint32_t previousCE; | |
59 | uint32_t previousContCE; | |
60 | int32_t pos[UCOL_STRENGTH_LIMIT]; | |
61 | uint32_t gapsLo[3*UCOL_CE_STRENGTH_LIMIT]; | |
62 | uint32_t gapsHi[3*UCOL_CE_STRENGTH_LIMIT]; | |
63 | uint32_t numStr[UCOL_CE_STRENGTH_LIMIT]; | |
64 | UColToken* fStrToken[UCOL_CE_STRENGTH_LIMIT]; | |
65 | UColToken* lStrToken[UCOL_CE_STRENGTH_LIMIT]; | |
66 | } UColTokListHeader; | |
67 | ||
68 | struct UColToken { | |
69 | UChar debugSource; | |
70 | UChar debugExpansion; | |
71 | UChar debugPrefix; | |
72 | uint32_t CEs[128]; | |
73 | uint32_t noOfCEs; | |
74 | uint32_t expCEs[128]; | |
75 | uint32_t noOfExpCEs; | |
76 | uint32_t source; | |
77 | uint32_t expansion; | |
78 | uint32_t prefix; | |
79 | uint32_t strength; | |
80 | uint32_t toInsert; | |
81 | uint32_t polarity; /* 1 for <, <<, <<<, , ; and -1 for >, >>, >>> */ | |
82 | UColTokListHeader *listHeader; | |
83 | UColToken* previous; | |
84 | UColToken* next; | |
729e4ab9 | 85 | UChar **rulesToParseHdl; |
374ca955 | 86 | uint16_t flags; |
b75a7d8f A |
87 | }; |
88 | ||
89 | /* | |
90 | * This is a token that has been parsed | |
91 | * but not yet processed. Used to reduce | |
92 | * the number of arguments in the parser | |
93 | */ | |
94 | typedef struct { | |
95 | uint32_t strength; | |
96 | uint32_t charsOffset; | |
97 | uint32_t charsLen; | |
98 | uint32_t extensionOffset; | |
99 | uint32_t extensionLen; | |
100 | uint32_t prefixOffset; | |
101 | uint32_t prefixLen; | |
102 | uint16_t flags; | |
103 | uint16_t indirectIndex; | |
104 | } UColParsedToken; | |
105 | ||
106 | ||
107 | typedef struct { | |
108 | UColParsedToken parsedToken; | |
109 | UChar *source; | |
110 | UChar *end; | |
111 | const UChar *current; | |
112 | UChar *sourceCurrent; | |
113 | UChar *extraCurrent; | |
114 | UChar *extraEnd; | |
115 | const InverseUCATableHeader *invUCA; | |
116 | const UCollator *UCA; | |
117 | UHashtable *tailored; | |
118 | UColOptionSet *opts; | |
119 | uint32_t resultLen; | |
120 | uint32_t listCapacity; | |
121 | UColTokListHeader *lh; | |
122 | UColToken *varTop; | |
123 | USet *copySet; | |
124 | USet *removeSet; | |
46f4442e | 125 | UBool buildCCTabFlag; /* Tailoring rule requirs building combining class table. */ |
729e4ab9 A |
126 | |
127 | UChar32 previousCp; /* Previous code point. */ | |
128 | /* For processing starred lists. */ | |
129 | UBool isStarred; /* Are we processing a starred token? */ | |
130 | UBool savedIsStarred; | |
131 | uint32_t currentStarredCharIndex; /* Index of the current charrecter in the starred expression. */ | |
132 | uint32_t lastStarredCharIndex; /* Index to the last character in the starred expression. */ | |
133 | ||
134 | /* For processing ranges. */ | |
135 | UBool inRange; /* Are we in a range? */ | |
136 | UChar32 currentRangeCp; /* Current code point in the range. */ | |
137 | UChar32 lastRangeCp; /* The last code point in the range. */ | |
138 | ||
139 | /* reorder codes for collation reordering */ | |
140 | int32_t* reorderCodes; | |
141 | int32_t reorderCodesLength; | |
142 | ||
b75a7d8f A |
143 | } UColTokenParser; |
144 | ||
145 | typedef struct { | |
146 | const UChar *subName; | |
147 | int32_t subLen; | |
148 | UColAttributeValue attrVal; | |
149 | } ucolTokSuboption; | |
150 | ||
151 | typedef struct { | |
152 | const UChar *optionName; | |
153 | int32_t optionLen; | |
154 | const ucolTokSuboption *subopts; | |
155 | int32_t subSize; | |
156 | UColAttribute attr; | |
157 | } ucolTokOption; | |
158 | ||
159 | #define ucol_tok_isSpecialChar(ch) \ | |
160 | (((((ch) <= 0x002F) && ((ch) >= 0x0020)) || \ | |
161 | (((ch) <= 0x003F) && ((ch) >= 0x003A)) || \ | |
162 | (((ch) <= 0x0060) && ((ch) >= 0x005B)) || \ | |
163 | (((ch) <= 0x007E) && ((ch) >= 0x007D)) || \ | |
164 | (ch) == 0x007B)) | |
165 | ||
166 | ||
167 | U_CFUNC | |
168 | uint32_t ucol_tok_assembleTokenList(UColTokenParser *src, | |
169 | UParseError *parseError, | |
170 | UErrorCode *status); | |
171 | ||
172 | U_CFUNC | |
729e4ab9 A |
173 | void ucol_tok_initTokenList(UColTokenParser *src, |
174 | const UChar *rules, | |
175 | const uint32_t rulesLength, | |
176 | const UCollator *UCA, | |
177 | GetCollationRulesFunction importFunc, | |
178 | void* context, | |
179 | UErrorCode *status); | |
b75a7d8f A |
180 | |
181 | U_CFUNC void ucol_tok_closeTokenList(UColTokenParser *src); | |
182 | ||
183 | U_CAPI const UChar* U_EXPORT2 ucol_tok_parseNextToken(UColTokenParser *src, | |
184 | UBool startOfRules, | |
185 | UParseError *parseError, | |
186 | UErrorCode *status); | |
187 | ||
729e4ab9 | 188 | |
b75a7d8f A |
189 | U_CAPI const UChar * U_EXPORT2 |
190 | ucol_tok_getNextArgument(const UChar *start, const UChar *end, | |
191 | UColAttribute *attrib, UColAttributeValue *value, | |
192 | UErrorCode *status); | |
374ca955 A |
193 | U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src, |
194 | uint32_t CE, uint32_t contCE, | |
195 | uint32_t *nextCE, uint32_t *nextContCE, | |
196 | uint32_t strength); | |
46f4442e | 197 | U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src, |
374ca955 A |
198 | uint32_t CE, uint32_t contCE, |
199 | uint32_t *prevCE, uint32_t *prevContCE, | |
200 | uint32_t strength); | |
201 | ||
4388f060 | 202 | const UChar* U_CALLCONV ucol_tok_getRulesFromBundle( |
729e4ab9 A |
203 | void* context, |
204 | const char* locale, | |
205 | const char* type, | |
206 | int32_t* pLength, | |
207 | UErrorCode* status); | |
b75a7d8f A |
208 | |
209 | #endif /* #if !UCONFIG_NO_COLLATION */ | |
210 | ||
211 | #endif |