]>
Commit | Line | Data |
---|---|---|
4388f060 A |
1 | /* |
2 | ******************************************************************************* | |
3 | * Copyright (C) 2010-2012, International Business Machines | |
4 | * Corporation and others. All Rights Reserved. | |
5 | ******************************************************************************* | |
6 | * file name: ucharstrie.h | |
7 | * encoding: US-ASCII | |
8 | * tab size: 8 (not used) | |
9 | * indentation:4 | |
10 | * | |
11 | * created on: 2010nov14 | |
12 | * created by: Markus W. Scherer | |
13 | */ | |
14 | ||
15 | #ifndef __UCHARSTRIE_H__ | |
16 | #define __UCHARSTRIE_H__ | |
17 | ||
18 | /** | |
19 | * \file | |
20 | * \brief C++ API: Trie for mapping Unicode strings (or 16-bit-unit sequences) | |
21 | * to integer values. | |
22 | */ | |
23 | ||
24 | #include "unicode/utypes.h" | |
25 | #include "unicode/unistr.h" | |
26 | #include "unicode/uobject.h" | |
27 | #include "unicode/ustringtrie.h" | |
28 | ||
29 | U_NAMESPACE_BEGIN | |
30 | ||
31 | class Appendable; | |
32 | class UCharsTrieBuilder; | |
33 | class UVector32; | |
34 | ||
35 | /** | |
36 | * Light-weight, non-const reader class for a UCharsTrie. | |
37 | * Traverses a UChar-serialized data structure with minimal state, | |
38 | * for mapping strings (16-bit-unit sequences) to non-negative integer values. | |
39 | * | |
40 | * This class owns the serialized trie data only if it was constructed by | |
41 | * the builder's build() method. | |
42 | * The public constructor and the copy constructor only alias the data (only copy the pointer). | |
43 | * There is no assignment operator. | |
44 | * | |
45 | * This class is not intended for public subclassing. | |
46 | * @stable ICU 4.8 | |
47 | */ | |
48 | class U_COMMON_API UCharsTrie : public UMemory { | |
49 | public: | |
50 | /** | |
51 | * Constructs a UCharsTrie reader instance. | |
52 | * | |
53 | * The trieUChars must contain a copy of a UChar sequence from the UCharsTrieBuilder, | |
54 | * starting with the first UChar of that sequence. | |
55 | * The UCharsTrie object will not read more UChars than | |
56 | * the UCharsTrieBuilder generated in the corresponding build() call. | |
57 | * | |
58 | * The array is not copied/cloned and must not be modified while | |
59 | * the UCharsTrie object is in use. | |
60 | * | |
61 | * @param trieUChars The UChar array that contains the serialized trie. | |
62 | * @stable ICU 4.8 | |
63 | */ | |
64 | UCharsTrie(const UChar *trieUChars) | |
65 | : ownedArray_(NULL), uchars_(trieUChars), | |
66 | pos_(uchars_), remainingMatchLength_(-1) {} | |
67 | ||
68 | /** | |
69 | * Destructor. | |
70 | * @stable ICU 4.8 | |
71 | */ | |
72 | ~UCharsTrie(); | |
73 | ||
74 | /** | |
75 | * Copy constructor, copies the other trie reader object and its state, | |
76 | * but not the UChar array which will be shared. (Shallow copy.) | |
77 | * @param other Another UCharsTrie object. | |
78 | * @stable ICU 4.8 | |
79 | */ | |
80 | UCharsTrie(const UCharsTrie &other) | |
81 | : ownedArray_(NULL), uchars_(other.uchars_), | |
82 | pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {} | |
83 | ||
84 | /** | |
85 | * Resets this trie to its initial state. | |
86 | * @return *this | |
87 | * @stable ICU 4.8 | |
88 | */ | |
89 | UCharsTrie &reset() { | |
90 | pos_=uchars_; | |
91 | remainingMatchLength_=-1; | |
92 | return *this; | |
93 | } | |
94 | ||
95 | /** | |
96 | * UCharsTrie state object, for saving a trie's current state | |
97 | * and resetting the trie back to this state later. | |
98 | * @stable ICU 4.8 | |
99 | */ | |
100 | class State : public UMemory { | |
101 | public: | |
102 | /** | |
103 | * Constructs an empty State. | |
104 | * @stable ICU 4.8 | |
105 | */ | |
106 | State() { uchars=NULL; } | |
107 | private: | |
108 | friend class UCharsTrie; | |
109 | ||
110 | const UChar *uchars; | |
111 | const UChar *pos; | |
112 | int32_t remainingMatchLength; | |
113 | }; | |
114 | ||
115 | /** | |
116 | * Saves the state of this trie. | |
117 | * @param state The State object to hold the trie's state. | |
118 | * @return *this | |
119 | * @see resetToState | |
120 | * @stable ICU 4.8 | |
121 | */ | |
122 | const UCharsTrie &saveState(State &state) const { | |
123 | state.uchars=uchars_; | |
124 | state.pos=pos_; | |
125 | state.remainingMatchLength=remainingMatchLength_; | |
126 | return *this; | |
127 | } | |
128 | ||
129 | /** | |
130 | * Resets this trie to the saved state. | |
131 | * If the state object contains no state, or the state of a different trie, | |
132 | * then this trie remains unchanged. | |
133 | * @param state The State object which holds a saved trie state. | |
134 | * @return *this | |
135 | * @see saveState | |
136 | * @see reset | |
137 | * @stable ICU 4.8 | |
138 | */ | |
139 | UCharsTrie &resetToState(const State &state) { | |
140 | if(uchars_==state.uchars && uchars_!=NULL) { | |
141 | pos_=state.pos; | |
142 | remainingMatchLength_=state.remainingMatchLength; | |
143 | } | |
144 | return *this; | |
145 | } | |
146 | ||
147 | /** | |
148 | * Determines whether the string so far matches, whether it has a value, | |
149 | * and whether another input UChar can continue a matching string. | |
150 | * @return The match/value Result. | |
151 | * @stable ICU 4.8 | |
152 | */ | |
153 | UStringTrieResult current() const; | |
154 | ||
155 | /** | |
156 | * Traverses the trie from the initial state for this input UChar. | |
157 | * Equivalent to reset().next(uchar). | |
158 | * @param uchar Input char value. Values below 0 and above 0xffff will never match. | |
159 | * @return The match/value Result. | |
160 | * @stable ICU 4.8 | |
161 | */ | |
162 | inline UStringTrieResult first(int32_t uchar) { | |
163 | remainingMatchLength_=-1; | |
164 | return nextImpl(uchars_, uchar); | |
165 | } | |
166 | ||
167 | /** | |
168 | * Traverses the trie from the initial state for the | |
169 | * one or two UTF-16 code units for this input code point. | |
170 | * Equivalent to reset().nextForCodePoint(cp). | |
171 | * @param cp A Unicode code point 0..0x10ffff. | |
172 | * @return The match/value Result. | |
173 | * @stable ICU 4.8 | |
174 | */ | |
175 | UStringTrieResult firstForCodePoint(UChar32 cp); | |
176 | ||
177 | /** | |
178 | * Traverses the trie from the current state for this input UChar. | |
179 | * @param uchar Input char value. Values below 0 and above 0xffff will never match. | |
180 | * @return The match/value Result. | |
181 | * @stable ICU 4.8 | |
182 | */ | |
183 | UStringTrieResult next(int32_t uchar); | |
184 | ||
185 | /** | |
186 | * Traverses the trie from the current state for the | |
187 | * one or two UTF-16 code units for this input code point. | |
188 | * @param cp A Unicode code point 0..0x10ffff. | |
189 | * @return The match/value Result. | |
190 | * @stable ICU 4.8 | |
191 | */ | |
192 | UStringTrieResult nextForCodePoint(UChar32 cp); | |
193 | ||
194 | /** | |
195 | * Traverses the trie from the current state for this string. | |
196 | * Equivalent to | |
197 | * \code | |
198 | * Result result=current(); | |
199 | * for(each c in s) | |
200 | * if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH; | |
201 | * result=next(c); | |
202 | * return result; | |
203 | * \endcode | |
204 | * @param s A string. Can be NULL if length is 0. | |
205 | * @param length The length of the string. Can be -1 if NUL-terminated. | |
206 | * @return The match/value Result. | |
207 | * @stable ICU 4.8 | |
208 | */ | |
209 | UStringTrieResult next(const UChar *s, int32_t length); | |
210 | ||
211 | /** | |
212 | * Returns a matching string's value if called immediately after | |
213 | * current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE. | |
214 | * getValue() can be called multiple times. | |
215 | * | |
216 | * Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE! | |
217 | * @return The value for the string so far. | |
218 | * @stable ICU 4.8 | |
219 | */ | |
220 | inline int32_t getValue() const { | |
221 | const UChar *pos=pos_; | |
222 | int32_t leadUnit=*pos++; | |
223 | // U_ASSERT(leadUnit>=kMinValueLead); | |
224 | return leadUnit&kValueIsFinal ? | |
225 | readValue(pos, leadUnit&0x7fff) : readNodeValue(pos, leadUnit); | |
226 | } | |
227 | ||
228 | /** | |
229 | * Determines whether all strings reachable from the current state | |
230 | * map to the same value. | |
231 | * @param uniqueValue Receives the unique value, if this function returns TRUE. | |
232 | * (output-only) | |
233 | * @return TRUE if all strings reachable from the current state | |
234 | * map to the same value. | |
235 | * @stable ICU 4.8 | |
236 | */ | |
237 | inline UBool hasUniqueValue(int32_t &uniqueValue) const { | |
238 | const UChar *pos=pos_; | |
239 | // Skip the rest of a pending linear-match node. | |
240 | return pos!=NULL && findUniqueValue(pos+remainingMatchLength_+1, FALSE, uniqueValue); | |
241 | } | |
242 | ||
243 | /** | |
244 | * Finds each UChar which continues the string from the current state. | |
245 | * That is, each UChar c for which it would be next(c)!=USTRINGTRIE_NO_MATCH now. | |
246 | * @param out Each next UChar is appended to this object. | |
247 | * @return the number of UChars which continue the string from here | |
248 | * @stable ICU 4.8 | |
249 | */ | |
250 | int32_t getNextUChars(Appendable &out) const; | |
251 | ||
252 | /** | |
253 | * Iterator for all of the (string, value) pairs in a UCharsTrie. | |
254 | * @stable ICU 4.8 | |
255 | */ | |
256 | class U_COMMON_API Iterator : public UMemory { | |
257 | public: | |
258 | /** | |
259 | * Iterates from the root of a UChar-serialized UCharsTrie. | |
260 | * @param trieUChars The trie UChars. | |
261 | * @param maxStringLength If 0, the iterator returns full strings. | |
262 | * Otherwise, the iterator returns strings with this maximum length. | |
263 | * @param errorCode Standard ICU error code. Its input value must | |
264 | * pass the U_SUCCESS() test, or else the function returns | |
265 | * immediately. Check for U_FAILURE() on output or use with | |
266 | * function chaining. (See User Guide for details.) | |
267 | * @stable ICU 4.8 | |
268 | */ | |
269 | Iterator(const UChar *trieUChars, int32_t maxStringLength, UErrorCode &errorCode); | |
270 | ||
271 | /** | |
272 | * Iterates from the current state of the specified UCharsTrie. | |
273 | * @param trie The trie whose state will be copied for iteration. | |
274 | * @param maxStringLength If 0, the iterator returns full strings. | |
275 | * Otherwise, the iterator returns strings with this maximum length. | |
276 | * @param errorCode Standard ICU error code. Its input value must | |
277 | * pass the U_SUCCESS() test, or else the function returns | |
278 | * immediately. Check for U_FAILURE() on output or use with | |
279 | * function chaining. (See User Guide for details.) | |
280 | * @stable ICU 4.8 | |
281 | */ | |
282 | Iterator(const UCharsTrie &trie, int32_t maxStringLength, UErrorCode &errorCode); | |
283 | ||
284 | /** | |
285 | * Destructor. | |
286 | * @stable ICU 4.8 | |
287 | */ | |
288 | ~Iterator(); | |
289 | ||
290 | /** | |
291 | * Resets this iterator to its initial state. | |
292 | * @return *this | |
293 | * @stable ICU 4.8 | |
294 | */ | |
295 | Iterator &reset(); | |
296 | ||
297 | /** | |
298 | * @return TRUE if there are more elements. | |
299 | * @stable ICU 4.8 | |
300 | */ | |
301 | UBool hasNext() const; | |
302 | ||
303 | /** | |
304 | * Finds the next (string, value) pair if there is one. | |
305 | * | |
306 | * If the string is truncated to the maximum length and does not | |
307 | * have a real value, then the value is set to -1. | |
308 | * In this case, this "not a real value" is indistinguishable from | |
309 | * a real value of -1. | |
310 | * @param errorCode Standard ICU error code. Its input value must | |
311 | * pass the U_SUCCESS() test, or else the function returns | |
312 | * immediately. Check for U_FAILURE() on output or use with | |
313 | * function chaining. (See User Guide for details.) | |
314 | * @return TRUE if there is another element. | |
315 | * @stable ICU 4.8 | |
316 | */ | |
317 | UBool next(UErrorCode &errorCode); | |
318 | ||
319 | /** | |
320 | * @return The string for the last successful next(). | |
321 | * @stable ICU 4.8 | |
322 | */ | |
323 | const UnicodeString &getString() const { return str_; } | |
324 | /** | |
325 | * @return The value for the last successful next(). | |
326 | * @stable ICU 4.8 | |
327 | */ | |
328 | int32_t getValue() const { return value_; } | |
329 | ||
330 | private: | |
331 | UBool truncateAndStop() { | |
332 | pos_=NULL; | |
333 | value_=-1; // no real value for str | |
334 | return TRUE; | |
335 | } | |
336 | ||
337 | const UChar *branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode); | |
338 | ||
339 | const UChar *uchars_; | |
340 | const UChar *pos_; | |
341 | const UChar *initialPos_; | |
342 | int32_t remainingMatchLength_; | |
343 | int32_t initialRemainingMatchLength_; | |
344 | UBool skipValue_; // Skip intermediate value which was already delivered. | |
345 | ||
346 | UnicodeString str_; | |
347 | int32_t maxLength_; | |
348 | int32_t value_; | |
349 | ||
350 | // The stack stores pairs of integers for backtracking to another | |
351 | // outbound edge of a branch node. | |
352 | // The first integer is an offset from uchars_. | |
353 | // The second integer has the str_.length() from before the node in bits 15..0, | |
354 | // and the remaining branch length in bits 31..16. | |
355 | // (We could store the remaining branch length minus 1 in bits 30..16 and not use the sign bit, | |
356 | // but the code looks more confusing that way.) | |
357 | UVector32 *stack_; | |
358 | }; | |
359 | ||
360 | private: | |
361 | friend class UCharsTrieBuilder; | |
362 | ||
363 | /** | |
364 | * Constructs a UCharsTrie reader instance. | |
365 | * Unlike the public constructor which just aliases an array, | |
366 | * this constructor adopts the builder's array. | |
367 | * This constructor is only called by the builder. | |
368 | */ | |
369 | UCharsTrie(UChar *adoptUChars, const UChar *trieUChars) | |
370 | : ownedArray_(adoptUChars), uchars_(trieUChars), | |
371 | pos_(uchars_), remainingMatchLength_(-1) {} | |
372 | ||
373 | // No assignment operator. | |
374 | UCharsTrie &operator=(const UCharsTrie &other); | |
375 | ||
376 | inline void stop() { | |
377 | pos_=NULL; | |
378 | } | |
379 | ||
380 | // Reads a compact 32-bit integer. | |
381 | // pos is already after the leadUnit, and the lead unit has bit 15 reset. | |
382 | static inline int32_t readValue(const UChar *pos, int32_t leadUnit) { | |
383 | int32_t value; | |
384 | if(leadUnit<kMinTwoUnitValueLead) { | |
385 | value=leadUnit; | |
386 | } else if(leadUnit<kThreeUnitValueLead) { | |
387 | value=((leadUnit-kMinTwoUnitValueLead)<<16)|*pos; | |
388 | } else { | |
389 | value=(pos[0]<<16)|pos[1]; | |
390 | } | |
391 | return value; | |
392 | } | |
393 | static inline const UChar *skipValue(const UChar *pos, int32_t leadUnit) { | |
394 | if(leadUnit>=kMinTwoUnitValueLead) { | |
395 | if(leadUnit<kThreeUnitValueLead) { | |
396 | ++pos; | |
397 | } else { | |
398 | pos+=2; | |
399 | } | |
400 | } | |
401 | return pos; | |
402 | } | |
403 | static inline const UChar *skipValue(const UChar *pos) { | |
404 | int32_t leadUnit=*pos++; | |
405 | return skipValue(pos, leadUnit&0x7fff); | |
406 | } | |
407 | ||
408 | static inline int32_t readNodeValue(const UChar *pos, int32_t leadUnit) { | |
409 | // U_ASSERT(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal); | |
410 | int32_t value; | |
411 | if(leadUnit<kMinTwoUnitNodeValueLead) { | |
412 | value=(leadUnit>>6)-1; | |
413 | } else if(leadUnit<kThreeUnitNodeValueLead) { | |
414 | value=(((leadUnit&0x7fc0)-kMinTwoUnitNodeValueLead)<<10)|*pos; | |
415 | } else { | |
416 | value=(pos[0]<<16)|pos[1]; | |
417 | } | |
418 | return value; | |
419 | } | |
420 | static inline const UChar *skipNodeValue(const UChar *pos, int32_t leadUnit) { | |
421 | // U_ASSERT(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal); | |
422 | if(leadUnit>=kMinTwoUnitNodeValueLead) { | |
423 | if(leadUnit<kThreeUnitNodeValueLead) { | |
424 | ++pos; | |
425 | } else { | |
426 | pos+=2; | |
427 | } | |
428 | } | |
429 | return pos; | |
430 | } | |
431 | ||
432 | static inline const UChar *jumpByDelta(const UChar *pos) { | |
433 | int32_t delta=*pos++; | |
434 | if(delta>=kMinTwoUnitDeltaLead) { | |
435 | if(delta==kThreeUnitDeltaLead) { | |
436 | delta=(pos[0]<<16)|pos[1]; | |
437 | pos+=2; | |
438 | } else { | |
439 | delta=((delta-kMinTwoUnitDeltaLead)<<16)|*pos++; | |
440 | } | |
441 | } | |
442 | return pos+delta; | |
443 | } | |
444 | ||
445 | static const UChar *skipDelta(const UChar *pos) { | |
446 | int32_t delta=*pos++; | |
447 | if(delta>=kMinTwoUnitDeltaLead) { | |
448 | if(delta==kThreeUnitDeltaLead) { | |
449 | pos+=2; | |
450 | } else { | |
451 | ++pos; | |
452 | } | |
453 | } | |
454 | return pos; | |
455 | } | |
456 | ||
457 | static inline UStringTrieResult valueResult(int32_t node) { | |
458 | return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node>>15)); | |
459 | } | |
460 | ||
461 | // Handles a branch node for both next(uchar) and next(string). | |
462 | UStringTrieResult branchNext(const UChar *pos, int32_t length, int32_t uchar); | |
463 | ||
464 | // Requires remainingLength_<0. | |
465 | UStringTrieResult nextImpl(const UChar *pos, int32_t uchar); | |
466 | ||
467 | // Helper functions for hasUniqueValue(). | |
468 | // Recursively finds a unique value (or whether there is not a unique one) | |
469 | // from a branch. | |
470 | static const UChar *findUniqueValueFromBranch(const UChar *pos, int32_t length, | |
471 | UBool haveUniqueValue, int32_t &uniqueValue); | |
472 | // Recursively finds a unique value (or whether there is not a unique one) | |
473 | // starting from a position on a node lead unit. | |
474 | static UBool findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue); | |
475 | ||
476 | // Helper functions for getNextUChars(). | |
477 | // getNextUChars() when pos is on a branch node. | |
478 | static void getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out); | |
479 | ||
480 | // UCharsTrie data structure | |
481 | // | |
482 | // The trie consists of a series of UChar-serialized nodes for incremental | |
483 | // Unicode string/UChar sequence matching. (UChar=16-bit unsigned integer) | |
484 | // The root node is at the beginning of the trie data. | |
485 | // | |
486 | // Types of nodes are distinguished by their node lead unit ranges. | |
487 | // After each node, except a final-value node, another node follows to | |
488 | // encode match values or continue matching further units. | |
489 | // | |
490 | // Node types: | |
491 | // - Final-value node: Stores a 32-bit integer in a compact, variable-length format. | |
492 | // The value is for the string/UChar sequence so far. | |
493 | // - Match node, optionally with an intermediate value in a different compact format. | |
494 | // The value, if present, is for the string/UChar sequence so far. | |
495 | // | |
496 | // Aside from the value, which uses the node lead unit's high bits: | |
497 | // | |
498 | // - Linear-match node: Matches a number of units. | |
499 | // - Branch node: Branches to other nodes according to the current input unit. | |
500 | // The node unit is the length of the branch (number of units to select from) | |
501 | // minus 1. It is followed by a sub-node: | |
502 | // - If the length is at most kMaxBranchLinearSubNodeLength, then | |
503 | // there are length-1 (key, value) pairs and then one more comparison unit. | |
504 | // If one of the key units matches, then the value is either a final value for | |
505 | // the string so far, or a "jump" delta to the next node. | |
506 | // If the last unit matches, then matching continues with the next node. | |
507 | // (Values have the same encoding as final-value nodes.) | |
508 | // - If the length is greater than kMaxBranchLinearSubNodeLength, then | |
509 | // there is one unit and one "jump" delta. | |
510 | // If the input unit is less than the sub-node unit, then "jump" by delta to | |
511 | // the next sub-node which will have a length of length/2. | |
512 | // (The delta has its own compact encoding.) | |
513 | // Otherwise, skip the "jump" delta to the next sub-node | |
514 | // which will have a length of length-length/2. | |
515 | ||
516 | // Match-node lead unit values, after masking off intermediate-value bits: | |
517 | ||
518 | // 0000..002f: Branch node. If node!=0 then the length is node+1, otherwise | |
519 | // the length is one more than the next unit. | |
520 | ||
521 | // For a branch sub-node with at most this many entries, we drop down | |
522 | // to a linear search. | |
523 | static const int32_t kMaxBranchLinearSubNodeLength=5; | |
524 | ||
525 | // 0030..003f: Linear-match node, match 1..16 units and continue reading the next node. | |
526 | static const int32_t kMinLinearMatch=0x30; | |
527 | static const int32_t kMaxLinearMatchLength=0x10; | |
528 | ||
529 | // Match-node lead unit bits 14..6 for the optional intermediate value. | |
530 | // If these bits are 0, then there is no intermediate value. | |
531 | // Otherwise, see the *NodeValue* constants below. | |
532 | static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength; // 0x0040 | |
533 | static const int32_t kNodeTypeMask=kMinValueLead-1; // 0x003f | |
534 | ||
535 | // A final-value node has bit 15 set. | |
536 | static const int32_t kValueIsFinal=0x8000; | |
537 | ||
538 | // Compact value: After testing and masking off bit 15, use the following thresholds. | |
539 | static const int32_t kMaxOneUnitValue=0x3fff; | |
540 | ||
541 | static const int32_t kMinTwoUnitValueLead=kMaxOneUnitValue+1; // 0x4000 | |
542 | static const int32_t kThreeUnitValueLead=0x7fff; | |
543 | ||
544 | static const int32_t kMaxTwoUnitValue=((kThreeUnitValueLead-kMinTwoUnitValueLead)<<16)-1; // 0x3ffeffff | |
545 | ||
546 | // Compact intermediate-value integer, lead unit shared with a branch or linear-match node. | |
547 | static const int32_t kMaxOneUnitNodeValue=0xff; | |
548 | static const int32_t kMinTwoUnitNodeValueLead=kMinValueLead+((kMaxOneUnitNodeValue+1)<<6); // 0x4040 | |
549 | static const int32_t kThreeUnitNodeValueLead=0x7fc0; | |
550 | ||
551 | static const int32_t kMaxTwoUnitNodeValue= | |
552 | ((kThreeUnitNodeValueLead-kMinTwoUnitNodeValueLead)<<10)-1; // 0xfdffff | |
553 | ||
554 | // Compact delta integers. | |
555 | static const int32_t kMaxOneUnitDelta=0xfbff; | |
556 | static const int32_t kMinTwoUnitDeltaLead=kMaxOneUnitDelta+1; // 0xfc00 | |
557 | static const int32_t kThreeUnitDeltaLead=0xffff; | |
558 | ||
559 | static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1; // 0x03feffff | |
560 | ||
561 | UChar *ownedArray_; | |
562 | ||
563 | // Fixed value referencing the UCharsTrie words. | |
564 | const UChar *uchars_; | |
565 | ||
566 | // Iterator variables. | |
567 | ||
568 | // Pointer to next trie unit to read. NULL if no more matches. | |
569 | const UChar *pos_; | |
570 | // Remaining length of a linear-match node, minus 1. Negative if not in such a node. | |
571 | int32_t remainingMatchLength_; | |
572 | }; | |
573 | ||
574 | U_NAMESPACE_END | |
575 | ||
576 | #endif // __UCHARSTRIE_H__ |