]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
46f4442e | 4 | * Copyright (C) 2001-2008, International Business Machines |
b75a7d8f A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: ucol_tok.h | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created 02/22/2001 | |
14 | * created by: Vladimir Weinstein | |
15 | * | |
16 | * This module reads a tailoring rule string and produces a list of | |
17 | * tokens that will be turned into collation elements | |
18 | * | |
19 | */ | |
20 | ||
21 | #ifndef UCOL_TOKENS_H | |
22 | #define UCOL_TOKENS_H | |
23 | ||
24 | #include "unicode/utypes.h" | |
25 | #include "unicode/uset.h" | |
26 | ||
27 | #if !UCONFIG_NO_COLLATION | |
28 | ||
29 | #include "ucol_imp.h" | |
30 | #include "uhash.h" | |
31 | #include "unicode/parseerr.h" | |
32 | ||
33 | #define UCOL_TOK_UNSET 0xFFFFFFFF | |
34 | #define UCOL_TOK_RESET 0xDEADBEEF | |
35 | ||
36 | #define UCOL_TOK_POLARITY_NEGATIVE 0 | |
37 | #define UCOL_TOK_POLARITY_POSITIVE 1 | |
38 | ||
39 | #define UCOL_TOK_TOP 0x04 | |
40 | #define UCOL_TOK_VARIABLE_TOP 0x08 | |
41 | #define UCOL_TOK_BEFORE 0x03 | |
42 | #define UCOL_TOK_SUCCESS 0x10 | |
43 | ||
44 | /* this is space for the extra strings that need to be unquoted */ | |
45 | /* during the parsing of the rules */ | |
73c04bcf | 46 | #define UCOL_TOK_EXTRA_RULE_SPACE_SIZE 4096 |
b75a7d8f A |
47 | typedef struct UColToken UColToken; |
48 | ||
49 | typedef struct { | |
50 | UColToken* first; | |
51 | UColToken* last; | |
52 | UColToken* reset; | |
53 | UBool indirect; | |
54 | uint32_t baseCE; | |
55 | uint32_t baseContCE; | |
56 | uint32_t nextCE; | |
57 | uint32_t nextContCE; | |
58 | uint32_t previousCE; | |
59 | uint32_t previousContCE; | |
60 | int32_t pos[UCOL_STRENGTH_LIMIT]; | |
61 | uint32_t gapsLo[3*UCOL_CE_STRENGTH_LIMIT]; | |
62 | uint32_t gapsHi[3*UCOL_CE_STRENGTH_LIMIT]; | |
63 | uint32_t numStr[UCOL_CE_STRENGTH_LIMIT]; | |
64 | UColToken* fStrToken[UCOL_CE_STRENGTH_LIMIT]; | |
65 | UColToken* lStrToken[UCOL_CE_STRENGTH_LIMIT]; | |
66 | } UColTokListHeader; | |
67 | ||
68 | struct UColToken { | |
69 | UChar debugSource; | |
70 | UChar debugExpansion; | |
71 | UChar debugPrefix; | |
72 | uint32_t CEs[128]; | |
73 | uint32_t noOfCEs; | |
74 | uint32_t expCEs[128]; | |
75 | uint32_t noOfExpCEs; | |
76 | uint32_t source; | |
77 | uint32_t expansion; | |
78 | uint32_t prefix; | |
79 | uint32_t strength; | |
80 | uint32_t toInsert; | |
81 | uint32_t polarity; /* 1 for <, <<, <<<, , ; and -1 for >, >>, >>> */ | |
82 | UColTokListHeader *listHeader; | |
83 | UColToken* previous; | |
84 | UColToken* next; | |
85 | UChar *rulesToParse; | |
374ca955 | 86 | uint16_t flags; |
b75a7d8f A |
87 | }; |
88 | ||
89 | /* | |
90 | * This is a token that has been parsed | |
91 | * but not yet processed. Used to reduce | |
92 | * the number of arguments in the parser | |
93 | */ | |
94 | typedef struct { | |
95 | uint32_t strength; | |
96 | uint32_t charsOffset; | |
97 | uint32_t charsLen; | |
98 | uint32_t extensionOffset; | |
99 | uint32_t extensionLen; | |
100 | uint32_t prefixOffset; | |
101 | uint32_t prefixLen; | |
102 | uint16_t flags; | |
103 | uint16_t indirectIndex; | |
104 | } UColParsedToken; | |
105 | ||
106 | ||
107 | typedef struct { | |
108 | UColParsedToken parsedToken; | |
109 | UChar *source; | |
110 | UChar *end; | |
111 | const UChar *current; | |
112 | UChar *sourceCurrent; | |
113 | UChar *extraCurrent; | |
114 | UChar *extraEnd; | |
115 | const InverseUCATableHeader *invUCA; | |
116 | const UCollator *UCA; | |
117 | UHashtable *tailored; | |
118 | UColOptionSet *opts; | |
119 | uint32_t resultLen; | |
120 | uint32_t listCapacity; | |
121 | UColTokListHeader *lh; | |
122 | UColToken *varTop; | |
123 | USet *copySet; | |
124 | USet *removeSet; | |
46f4442e | 125 | UBool buildCCTabFlag; /* Tailoring rule requirs building combining class table. */ |
b75a7d8f A |
126 | } UColTokenParser; |
127 | ||
128 | typedef struct { | |
129 | const UChar *subName; | |
130 | int32_t subLen; | |
131 | UColAttributeValue attrVal; | |
132 | } ucolTokSuboption; | |
133 | ||
134 | typedef struct { | |
135 | const UChar *optionName; | |
136 | int32_t optionLen; | |
137 | const ucolTokSuboption *subopts; | |
138 | int32_t subSize; | |
139 | UColAttribute attr; | |
140 | } ucolTokOption; | |
141 | ||
142 | #define ucol_tok_isSpecialChar(ch) \ | |
143 | (((((ch) <= 0x002F) && ((ch) >= 0x0020)) || \ | |
144 | (((ch) <= 0x003F) && ((ch) >= 0x003A)) || \ | |
145 | (((ch) <= 0x0060) && ((ch) >= 0x005B)) || \ | |
146 | (((ch) <= 0x007E) && ((ch) >= 0x007D)) || \ | |
147 | (ch) == 0x007B)) | |
148 | ||
149 | ||
150 | U_CFUNC | |
151 | uint32_t ucol_tok_assembleTokenList(UColTokenParser *src, | |
152 | UParseError *parseError, | |
153 | UErrorCode *status); | |
154 | ||
155 | U_CFUNC | |
374ca955 | 156 | void ucol_tok_initTokenList(UColTokenParser *src, const UChar *rules, const uint32_t rulesLength, const UCollator *UCA, UErrorCode *status); |
b75a7d8f A |
157 | |
158 | U_CFUNC void ucol_tok_closeTokenList(UColTokenParser *src); | |
159 | ||
160 | U_CAPI const UChar* U_EXPORT2 ucol_tok_parseNextToken(UColTokenParser *src, | |
161 | UBool startOfRules, | |
162 | UParseError *parseError, | |
163 | UErrorCode *status); | |
164 | ||
165 | U_CAPI const UChar * U_EXPORT2 | |
166 | ucol_tok_getNextArgument(const UChar *start, const UChar *end, | |
167 | UColAttribute *attrib, UColAttributeValue *value, | |
168 | UErrorCode *status); | |
374ca955 A |
169 | U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src, |
170 | uint32_t CE, uint32_t contCE, | |
171 | uint32_t *nextCE, uint32_t *nextContCE, | |
172 | uint32_t strength); | |
46f4442e | 173 | U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src, |
374ca955 A |
174 | uint32_t CE, uint32_t contCE, |
175 | uint32_t *prevCE, uint32_t *prevContCE, | |
176 | uint32_t strength); | |
177 | ||
b75a7d8f A |
178 | |
179 | #endif /* #if !UCONFIG_NO_COLLATION */ | |
180 | ||
181 | #endif |