]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/ucol_tok.h
ICU-400.38.tar.gz
[apple/icu.git] / icuSources / i18n / ucol_tok.h
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
3*
46f4442e 4* Copyright (C) 2001-2008, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: ucol_tok.h
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created 02/22/2001
14* created by: Vladimir Weinstein
15*
16* This module reads a tailoring rule string and produces a list of
17* tokens that will be turned into collation elements
18*
19*/
20
21#ifndef UCOL_TOKENS_H
22#define UCOL_TOKENS_H
23
24#include "unicode/utypes.h"
25#include "unicode/uset.h"
26
27#if !UCONFIG_NO_COLLATION
28
29#include "ucol_imp.h"
30#include "uhash.h"
31#include "unicode/parseerr.h"
32
33#define UCOL_TOK_UNSET 0xFFFFFFFF
34#define UCOL_TOK_RESET 0xDEADBEEF
35
36#define UCOL_TOK_POLARITY_NEGATIVE 0
37#define UCOL_TOK_POLARITY_POSITIVE 1
38
39#define UCOL_TOK_TOP 0x04
40#define UCOL_TOK_VARIABLE_TOP 0x08
41#define UCOL_TOK_BEFORE 0x03
42#define UCOL_TOK_SUCCESS 0x10
43
44/* this is space for the extra strings that need to be unquoted */
45/* during the parsing of the rules */
73c04bcf 46#define UCOL_TOK_EXTRA_RULE_SPACE_SIZE 4096
b75a7d8f
A
47typedef struct UColToken UColToken;
48
49typedef struct {
50 UColToken* first;
51 UColToken* last;
52 UColToken* reset;
53 UBool indirect;
54 uint32_t baseCE;
55 uint32_t baseContCE;
56 uint32_t nextCE;
57 uint32_t nextContCE;
58 uint32_t previousCE;
59 uint32_t previousContCE;
60 int32_t pos[UCOL_STRENGTH_LIMIT];
61 uint32_t gapsLo[3*UCOL_CE_STRENGTH_LIMIT];
62 uint32_t gapsHi[3*UCOL_CE_STRENGTH_LIMIT];
63 uint32_t numStr[UCOL_CE_STRENGTH_LIMIT];
64 UColToken* fStrToken[UCOL_CE_STRENGTH_LIMIT];
65 UColToken* lStrToken[UCOL_CE_STRENGTH_LIMIT];
66} UColTokListHeader;
67
68struct UColToken {
69 UChar debugSource;
70 UChar debugExpansion;
71 UChar debugPrefix;
72 uint32_t CEs[128];
73 uint32_t noOfCEs;
74 uint32_t expCEs[128];
75 uint32_t noOfExpCEs;
76 uint32_t source;
77 uint32_t expansion;
78 uint32_t prefix;
79 uint32_t strength;
80 uint32_t toInsert;
81 uint32_t polarity; /* 1 for <, <<, <<<, , ; and -1 for >, >>, >>> */
82 UColTokListHeader *listHeader;
83 UColToken* previous;
84 UColToken* next;
85 UChar *rulesToParse;
374ca955 86 uint16_t flags;
b75a7d8f
A
87};
88
89/*
90 * This is a token that has been parsed
91 * but not yet processed. Used to reduce
92 * the number of arguments in the parser
93 */
94typedef struct {
95 uint32_t strength;
96 uint32_t charsOffset;
97 uint32_t charsLen;
98 uint32_t extensionOffset;
99 uint32_t extensionLen;
100 uint32_t prefixOffset;
101 uint32_t prefixLen;
102 uint16_t flags;
103 uint16_t indirectIndex;
104} UColParsedToken;
105
106
107typedef struct {
108 UColParsedToken parsedToken;
109 UChar *source;
110 UChar *end;
111 const UChar *current;
112 UChar *sourceCurrent;
113 UChar *extraCurrent;
114 UChar *extraEnd;
115 const InverseUCATableHeader *invUCA;
116 const UCollator *UCA;
117 UHashtable *tailored;
118 UColOptionSet *opts;
119 uint32_t resultLen;
120 uint32_t listCapacity;
121 UColTokListHeader *lh;
122 UColToken *varTop;
123 USet *copySet;
124 USet *removeSet;
46f4442e 125 UBool buildCCTabFlag; /* Tailoring rule requirs building combining class table. */
b75a7d8f
A
126} UColTokenParser;
127
128typedef struct {
129 const UChar *subName;
130 int32_t subLen;
131 UColAttributeValue attrVal;
132} ucolTokSuboption;
133
134typedef struct {
135 const UChar *optionName;
136 int32_t optionLen;
137 const ucolTokSuboption *subopts;
138 int32_t subSize;
139 UColAttribute attr;
140} ucolTokOption;
141
142#define ucol_tok_isSpecialChar(ch) \
143 (((((ch) <= 0x002F) && ((ch) >= 0x0020)) || \
144 (((ch) <= 0x003F) && ((ch) >= 0x003A)) || \
145 (((ch) <= 0x0060) && ((ch) >= 0x005B)) || \
146 (((ch) <= 0x007E) && ((ch) >= 0x007D)) || \
147 (ch) == 0x007B))
148
149
150U_CFUNC
151uint32_t ucol_tok_assembleTokenList(UColTokenParser *src,
152 UParseError *parseError,
153 UErrorCode *status);
154
155U_CFUNC
374ca955 156void ucol_tok_initTokenList(UColTokenParser *src, const UChar *rules, const uint32_t rulesLength, const UCollator *UCA, UErrorCode *status);
b75a7d8f
A
157
158U_CFUNC void ucol_tok_closeTokenList(UColTokenParser *src);
159
160U_CAPI const UChar* U_EXPORT2 ucol_tok_parseNextToken(UColTokenParser *src,
161 UBool startOfRules,
162 UParseError *parseError,
163 UErrorCode *status);
164
165U_CAPI const UChar * U_EXPORT2
166ucol_tok_getNextArgument(const UChar *start, const UChar *end,
167 UColAttribute *attrib, UColAttributeValue *value,
168 UErrorCode *status);
374ca955
A
169U_CAPI int32_t U_EXPORT2 ucol_inv_getNextCE(const UColTokenParser *src,
170 uint32_t CE, uint32_t contCE,
171 uint32_t *nextCE, uint32_t *nextContCE,
172 uint32_t strength);
46f4442e 173U_CFUNC int32_t U_EXPORT2 ucol_inv_getPrevCE(const UColTokenParser *src,
374ca955
A
174 uint32_t CE, uint32_t contCE,
175 uint32_t *prevCE, uint32_t *prevContCE,
176 uint32_t strength);
177
b75a7d8f
A
178
179#endif /* #if !UCONFIG_NO_COLLATION */
180
181#endif