2 *******************************************************************************
3 * Copyright (C) 2010-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: ucharstrie.h
8 * tab size: 8 (not used)
11 * created on: 2010nov14
12 * created by: Markus W. Scherer
15 #include "unicode/utypes.h"
16 #include "unicode/appendable.h"
17 #include "unicode/ucharstrie.h"
18 #include "unicode/uobject.h"
19 #include "unicode/utf16.h"
25 UCharsTrie::~UCharsTrie() {
26 uprv_free(ownedArray_
);
30 UCharsTrie::current() const {
31 const UChar
*pos
=pos_
;
33 return USTRINGTRIE_NO_MATCH
;
36 return (remainingMatchLength_
<0 && (node
=*pos
)>=kMinValueLead
) ?
37 valueResult(node
) : USTRINGTRIE_NO_VALUE
;
42 UCharsTrie::firstForCodePoint(UChar32 cp
) {
45 (USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp
))) ?
47 USTRINGTRIE_NO_MATCH
);
51 UCharsTrie::nextForCodePoint(UChar32 cp
) {
54 (USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp
))) ?
56 USTRINGTRIE_NO_MATCH
);
60 UCharsTrie::branchNext(const UChar
*pos
, int32_t length
, int32_t uchar
) {
61 // Branch according to the current unit.
66 // The length of the branch is the number of units to select from.
67 // The data structure encodes a binary search.
68 while(length
>kMaxBranchLinearSubNodeLength
) {
73 length
=length
-(length
>>1);
77 // Drop down to linear search for the last few units.
78 // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
79 // and divides length by 2.
82 UStringTrieResult result
;
84 if(node
&kValueIsFinal
) {
85 // Leave the final value for getValue() to read.
86 result
=USTRINGTRIE_FINAL_VALUE
;
88 // Use the non-final value as the jump delta.
90 // int32_t delta=readValue(pos, node);
92 if(node
<kMinTwoUnitValueLead
) {
94 } else if(node
<kThreeUnitValueLead
) {
95 delta
=((node
-kMinTwoUnitValueLead
)<<16)|*pos
++;
97 delta
=(pos
[0]<<16)|pos
[1];
103 result
= node
>=kMinValueLead
? valueResult(node
) : USTRINGTRIE_NO_VALUE
;
114 return node
>=kMinValueLead
? valueResult(node
) : USTRINGTRIE_NO_VALUE
;
117 return USTRINGTRIE_NO_MATCH
;
122 UCharsTrie::nextImpl(const UChar
*pos
, int32_t uchar
) {
125 if(node
<kMinLinearMatch
) {
126 return branchNext(pos
, node
, uchar
);
127 } else if(node
<kMinValueLead
) {
128 // Match the first of length+1 units.
129 int32_t length
=node
-kMinLinearMatch
; // Actual match length minus 1.
131 remainingMatchLength_
=--length
;
133 return (length
<0 && (node
=*pos
)>=kMinValueLead
) ?
134 valueResult(node
) : USTRINGTRIE_NO_VALUE
;
139 } else if(node
&kValueIsFinal
) {
140 // No further matching units.
143 // Skip intermediate value.
144 pos
=skipNodeValue(pos
, node
);
149 return USTRINGTRIE_NO_MATCH
;
153 UCharsTrie::next(int32_t uchar
) {
154 const UChar
*pos
=pos_
;
156 return USTRINGTRIE_NO_MATCH
;
158 int32_t length
=remainingMatchLength_
; // Actual remaining match length minus 1.
160 // Remaining part of a linear-match node.
162 remainingMatchLength_
=--length
;
165 return (length
<0 && (node
=*pos
)>=kMinValueLead
) ?
166 valueResult(node
) : USTRINGTRIE_NO_VALUE
;
169 return USTRINGTRIE_NO_MATCH
;
172 return nextImpl(pos
, uchar
);
176 UCharsTrie::next(const UChar
*s
, int32_t sLength
) {
177 if(sLength
<0 ? *s
==0 : sLength
==0) {
181 const UChar
*pos
=pos_
;
183 return USTRINGTRIE_NO_MATCH
;
185 int32_t length
=remainingMatchLength_
; // Actual remaining match length minus 1.
187 // Fetch the next input unit, if there is one.
188 // Continue a linear-match node without rechecking sLength<0.
192 if((uchar
=*s
++)==0) {
193 remainingMatchLength_
=length
;
196 return (length
<0 && (node
=*pos
)>=kMinValueLead
) ?
197 valueResult(node
) : USTRINGTRIE_NO_VALUE
;
200 remainingMatchLength_
=length
;
205 return USTRINGTRIE_NO_MATCH
;
213 remainingMatchLength_
=length
;
216 return (length
<0 && (node
=*pos
)>=kMinValueLead
) ?
217 valueResult(node
) : USTRINGTRIE_NO_VALUE
;
222 remainingMatchLength_
=length
;
227 return USTRINGTRIE_NO_MATCH
;
235 if(node
<kMinLinearMatch
) {
236 UStringTrieResult result
=branchNext(pos
, node
, uchar
);
237 if(result
==USTRINGTRIE_NO_MATCH
) {
238 return USTRINGTRIE_NO_MATCH
;
240 // Fetch the next input unit, if there is one.
242 if((uchar
=*s
++)==0) {
252 if(result
==USTRINGTRIE_FINAL_VALUE
) {
253 // No further matching units.
255 return USTRINGTRIE_NO_MATCH
;
257 pos
=pos_
; // branchNext() advanced pos and wrote it to pos_ .
259 } else if(node
<kMinValueLead
) {
260 // Match length+1 units.
261 length
=node
-kMinLinearMatch
; // Actual match length minus 1.
264 return USTRINGTRIE_NO_MATCH
;
269 } else if(node
&kValueIsFinal
) {
270 // No further matching units.
272 return USTRINGTRIE_NO_MATCH
;
274 // Skip intermediate value.
275 pos
=skipNodeValue(pos
, node
);
283 UCharsTrie::findUniqueValueFromBranch(const UChar
*pos
, int32_t length
,
284 UBool haveUniqueValue
, int32_t &uniqueValue
) {
285 while(length
>kMaxBranchLinearSubNodeLength
) {
286 ++pos
; // ignore the comparison unit
287 if(NULL
==findUniqueValueFromBranch(jumpByDelta(pos
), length
>>1, haveUniqueValue
, uniqueValue
)) {
290 length
=length
-(length
>>1);
294 ++pos
; // ignore a comparison unit
297 UBool isFinal
=(UBool
)(node
>>15);
299 int32_t value
=readValue(pos
, node
);
300 pos
=skipValue(pos
, node
);
302 if(haveUniqueValue
) {
303 if(value
!=uniqueValue
) {
308 haveUniqueValue
=TRUE
;
311 if(!findUniqueValue(pos
+value
, haveUniqueValue
, uniqueValue
)) {
314 haveUniqueValue
=TRUE
;
317 return pos
+1; // ignore the last comparison unit
321 UCharsTrie::findUniqueValue(const UChar
*pos
, UBool haveUniqueValue
, int32_t &uniqueValue
) {
324 if(node
<kMinLinearMatch
) {
328 pos
=findUniqueValueFromBranch(pos
, node
+1, haveUniqueValue
, uniqueValue
);
332 haveUniqueValue
=TRUE
;
334 } else if(node
<kMinValueLead
) {
336 pos
+=node
-kMinLinearMatch
+1; // Ignore the match units.
339 UBool isFinal
=(UBool
)(node
>>15);
342 value
=readValue(pos
, node
&0x7fff);
344 value
=readNodeValue(pos
, node
);
346 if(haveUniqueValue
) {
347 if(value
!=uniqueValue
) {
352 haveUniqueValue
=TRUE
;
357 pos
=skipNodeValue(pos
, node
);
364 UCharsTrie::getNextUChars(Appendable
&out
) const {
365 const UChar
*pos
=pos_
;
369 if(remainingMatchLength_
>=0) {
370 out
.appendCodeUnit(*pos
); // Next unit of a pending linear-match node.
374 if(node
>=kMinValueLead
) {
375 if(node
&kValueIsFinal
) {
378 pos
=skipNodeValue(pos
, node
);
382 if(node
<kMinLinearMatch
) {
386 out
.reserveAppendCapacity(++node
);
387 getNextBranchUChars(pos
, node
, out
);
390 // First unit of the linear-match node.
391 out
.appendCodeUnit(*pos
);
397 UCharsTrie::getNextBranchUChars(const UChar
*pos
, int32_t length
, Appendable
&out
) {
398 while(length
>kMaxBranchLinearSubNodeLength
) {
399 ++pos
; // ignore the comparison unit
400 getNextBranchUChars(jumpByDelta(pos
), length
>>1, out
);
401 length
=length
-(length
>>1);
405 out
.appendCodeUnit(*pos
++);
408 out
.appendCodeUnit(*pos
);