2 *******************************************************************************
4 * Copyright (C) 2001-2006, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: ucol_tok.h
10 * tab size: 8 (not used)
14 * created by: Vladimir Weinstein
16 * This module reads a tailoring rule string and produces a list of
17 * tokens that will be turned into collation elements
24 #include "unicode/utypes.h"
25 #include "unicode/uset.h"
27 #if !UCONFIG_NO_COLLATION
31 #include "unicode/parseerr.h"
33 #define UCOL_TOK_UNSET 0xFFFFFFFF
34 #define UCOL_TOK_RESET 0xDEADBEEF
36 #define UCOL_TOK_POLARITY_NEGATIVE 0
37 #define UCOL_TOK_POLARITY_POSITIVE 1
39 #define UCOL_TOK_TOP 0x04
40 #define UCOL_TOK_VARIABLE_TOP 0x08
41 #define UCOL_TOK_BEFORE 0x03
42 #define UCOL_TOK_SUCCESS 0x10
44 /* this is space for the extra strings that need to be unquoted */
45 /* during the parsing of the rules */
46 #define UCOL_TOK_EXTRA_RULE_SPACE_SIZE 4096
47 typedef struct UColToken UColToken
;
59 uint32_t previousContCE
;
60 int32_t pos
[UCOL_STRENGTH_LIMIT
];
61 uint32_t gapsLo
[3*UCOL_CE_STRENGTH_LIMIT
];
62 uint32_t gapsHi
[3*UCOL_CE_STRENGTH_LIMIT
];
63 uint32_t numStr
[UCOL_CE_STRENGTH_LIMIT
];
64 UColToken
* fStrToken
[UCOL_CE_STRENGTH_LIMIT
];
65 UColToken
* lStrToken
[UCOL_CE_STRENGTH_LIMIT
];
81 uint32_t polarity
; /* 1 for <, <<, <<<, , ; and -1 for >, >>, >>> */
82 UColTokListHeader
*listHeader
;
90 * This is a token that has been parsed
91 * but not yet processed. Used to reduce
92 * the number of arguments in the parser
98 uint32_t extensionOffset
;
99 uint32_t extensionLen
;
100 uint32_t prefixOffset
;
103 uint16_t indirectIndex
;
108 UColParsedToken parsedToken
;
111 const UChar
*current
;
112 UChar
*sourceCurrent
;
115 const InverseUCATableHeader
*invUCA
;
116 const UCollator
*UCA
;
117 UHashtable
*tailored
;
120 uint32_t listCapacity
;
121 UColTokListHeader
*lh
;
128 const UChar
*subName
;
130 UColAttributeValue attrVal
;
134 const UChar
*optionName
;
136 const ucolTokSuboption
*subopts
;
141 #define ucol_tok_isSpecialChar(ch) \
142 (((((ch) <= 0x002F) && ((ch) >= 0x0020)) || \
143 (((ch) <= 0x003F) && ((ch) >= 0x003A)) || \
144 (((ch) <= 0x0060) && ((ch) >= 0x005B)) || \
145 (((ch) <= 0x007E) && ((ch) >= 0x007D)) || \
150 uint32_t ucol_tok_assembleTokenList(UColTokenParser
*src
,
151 UParseError
*parseError
,
155 void ucol_tok_initTokenList(UColTokenParser
*src
, const UChar
*rules
, const uint32_t rulesLength
, const UCollator
*UCA
, UErrorCode
*status
);
157 U_CFUNC
void ucol_tok_closeTokenList(UColTokenParser
*src
);
159 U_CAPI
const UChar
* U_EXPORT2
ucol_tok_parseNextToken(UColTokenParser
*src
,
161 UParseError
*parseError
,
164 U_CAPI
const UChar
* U_EXPORT2
165 ucol_tok_getNextArgument(const UChar
*start
, const UChar
*end
,
166 UColAttribute
*attrib
, UColAttributeValue
*value
,
168 U_CAPI
int32_t U_EXPORT2
ucol_inv_getNextCE(const UColTokenParser
*src
,
169 uint32_t CE
, uint32_t contCE
,
170 uint32_t *nextCE
, uint32_t *nextContCE
,
172 U_CAPI
int32_t U_EXPORT2
ucol_inv_getPrevCE(const UColTokenParser
*src
,
173 uint32_t CE
, uint32_t contCE
,
174 uint32_t *prevCE
, uint32_t *prevContCE
,
177 U_CAPI
uint32_t U_EXPORT2
ucol_getCEStrengthDifference(uint32_t CE
, uint32_t contCE
,
178 uint32_t prevCE
, uint32_t prevContCE
);
181 #endif /* #if !UCONFIG_NO_COLLATION */