1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2010-2011, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: ucharstrie.h
10 * tab size: 8 (not used)
13 * created on: 2010nov14
14 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
18 #include "unicode/appendable.h"
19 #include "unicode/ucharstrie.h"
20 #include "unicode/uobject.h"
21 #include "unicode/utf16.h"
27 UCharsTrie::~UCharsTrie() {
28 uprv_free(ownedArray_
);
32 UCharsTrie::current() const {
33 const UChar
*pos
=pos_
;
35 return USTRINGTRIE_NO_MATCH
;
38 return (remainingMatchLength_
<0 && (node
=*pos
)>=kMinValueLead
) ?
39 valueResult(node
) : USTRINGTRIE_NO_VALUE
;
44 UCharsTrie::firstForCodePoint(UChar32 cp
) {
47 (USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp
))) ?
49 USTRINGTRIE_NO_MATCH
);
53 UCharsTrie::nextForCodePoint(UChar32 cp
) {
56 (USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp
))) ?
58 USTRINGTRIE_NO_MATCH
);
62 UCharsTrie::branchNext(const UChar
*pos
, int32_t length
, int32_t uchar
) {
63 // Branch according to the current unit.
68 // The length of the branch is the number of units to select from.
69 // The data structure encodes a binary search.
70 while(length
>kMaxBranchLinearSubNodeLength
) {
75 length
=length
-(length
>>1);
79 // Drop down to linear search for the last few units.
80 // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
81 // and divides length by 2.
84 UStringTrieResult result
;
86 if(node
&kValueIsFinal
) {
87 // Leave the final value for getValue() to read.
88 result
=USTRINGTRIE_FINAL_VALUE
;
90 // Use the non-final value as the jump delta.
92 // int32_t delta=readValue(pos, node);
94 if(node
<kMinTwoUnitValueLead
) {
96 } else if(node
<kThreeUnitValueLead
) {
97 delta
=((node
-kMinTwoUnitValueLead
)<<16)|*pos
++;
99 delta
=(pos
[0]<<16)|pos
[1];
105 result
= node
>=kMinValueLead
? valueResult(node
) : USTRINGTRIE_NO_VALUE
;
116 return node
>=kMinValueLead
? valueResult(node
) : USTRINGTRIE_NO_VALUE
;
119 return USTRINGTRIE_NO_MATCH
;
124 UCharsTrie::nextImpl(const UChar
*pos
, int32_t uchar
) {
127 if(node
<kMinLinearMatch
) {
128 return branchNext(pos
, node
, uchar
);
129 } else if(node
<kMinValueLead
) {
130 // Match the first of length+1 units.
131 int32_t length
=node
-kMinLinearMatch
; // Actual match length minus 1.
133 remainingMatchLength_
=--length
;
135 return (length
<0 && (node
=*pos
)>=kMinValueLead
) ?
136 valueResult(node
) : USTRINGTRIE_NO_VALUE
;
141 } else if(node
&kValueIsFinal
) {
142 // No further matching units.
145 // Skip intermediate value.
146 pos
=skipNodeValue(pos
, node
);
151 return USTRINGTRIE_NO_MATCH
;
155 UCharsTrie::next(int32_t uchar
) {
156 const UChar
*pos
=pos_
;
158 return USTRINGTRIE_NO_MATCH
;
160 int32_t length
=remainingMatchLength_
; // Actual remaining match length minus 1.
162 // Remaining part of a linear-match node.
164 remainingMatchLength_
=--length
;
167 return (length
<0 && (node
=*pos
)>=kMinValueLead
) ?
168 valueResult(node
) : USTRINGTRIE_NO_VALUE
;
171 return USTRINGTRIE_NO_MATCH
;
174 return nextImpl(pos
, uchar
);
178 UCharsTrie::next(ConstChar16Ptr ptr
, int32_t sLength
) {
180 if(sLength
<0 ? *s
==0 : sLength
==0) {
184 const UChar
*pos
=pos_
;
186 return USTRINGTRIE_NO_MATCH
;
188 int32_t length
=remainingMatchLength_
; // Actual remaining match length minus 1.
190 // Fetch the next input unit, if there is one.
191 // Continue a linear-match node without rechecking sLength<0.
195 if((uchar
=*s
++)==0) {
196 remainingMatchLength_
=length
;
199 return (length
<0 && (node
=*pos
)>=kMinValueLead
) ?
200 valueResult(node
) : USTRINGTRIE_NO_VALUE
;
203 remainingMatchLength_
=length
;
208 return USTRINGTRIE_NO_MATCH
;
216 remainingMatchLength_
=length
;
219 return (length
<0 && (node
=*pos
)>=kMinValueLead
) ?
220 valueResult(node
) : USTRINGTRIE_NO_VALUE
;
225 remainingMatchLength_
=length
;
230 return USTRINGTRIE_NO_MATCH
;
238 if(node
<kMinLinearMatch
) {
239 UStringTrieResult result
=branchNext(pos
, node
, uchar
);
240 if(result
==USTRINGTRIE_NO_MATCH
) {
241 return USTRINGTRIE_NO_MATCH
;
243 // Fetch the next input unit, if there is one.
245 if((uchar
=*s
++)==0) {
255 if(result
==USTRINGTRIE_FINAL_VALUE
) {
256 // No further matching units.
258 return USTRINGTRIE_NO_MATCH
;
260 pos
=pos_
; // branchNext() advanced pos and wrote it to pos_ .
262 } else if(node
<kMinValueLead
) {
263 // Match length+1 units.
264 length
=node
-kMinLinearMatch
; // Actual match length minus 1.
267 return USTRINGTRIE_NO_MATCH
;
272 } else if(node
&kValueIsFinal
) {
273 // No further matching units.
275 return USTRINGTRIE_NO_MATCH
;
277 // Skip intermediate value.
278 pos
=skipNodeValue(pos
, node
);
286 UCharsTrie::findUniqueValueFromBranch(const UChar
*pos
, int32_t length
,
287 UBool haveUniqueValue
, int32_t &uniqueValue
) {
288 while(length
>kMaxBranchLinearSubNodeLength
) {
289 ++pos
; // ignore the comparison unit
290 if(NULL
==findUniqueValueFromBranch(jumpByDelta(pos
), length
>>1, haveUniqueValue
, uniqueValue
)) {
293 length
=length
-(length
>>1);
297 ++pos
; // ignore a comparison unit
300 UBool isFinal
=(UBool
)(node
>>15);
302 int32_t value
=readValue(pos
, node
);
303 pos
=skipValue(pos
, node
);
305 if(haveUniqueValue
) {
306 if(value
!=uniqueValue
) {
311 haveUniqueValue
=TRUE
;
314 if(!findUniqueValue(pos
+value
, haveUniqueValue
, uniqueValue
)) {
317 haveUniqueValue
=TRUE
;
320 return pos
+1; // ignore the last comparison unit
324 UCharsTrie::findUniqueValue(const UChar
*pos
, UBool haveUniqueValue
, int32_t &uniqueValue
) {
327 if(node
<kMinLinearMatch
) {
331 pos
=findUniqueValueFromBranch(pos
, node
+1, haveUniqueValue
, uniqueValue
);
335 haveUniqueValue
=TRUE
;
337 } else if(node
<kMinValueLead
) {
339 pos
+=node
-kMinLinearMatch
+1; // Ignore the match units.
342 UBool isFinal
=(UBool
)(node
>>15);
345 value
=readValue(pos
, node
&0x7fff);
347 value
=readNodeValue(pos
, node
);
349 if(haveUniqueValue
) {
350 if(value
!=uniqueValue
) {
355 haveUniqueValue
=TRUE
;
360 pos
=skipNodeValue(pos
, node
);
367 UCharsTrie::getNextUChars(Appendable
&out
) const {
368 const UChar
*pos
=pos_
;
372 if(remainingMatchLength_
>=0) {
373 out
.appendCodeUnit(*pos
); // Next unit of a pending linear-match node.
377 if(node
>=kMinValueLead
) {
378 if(node
&kValueIsFinal
) {
381 pos
=skipNodeValue(pos
, node
);
385 if(node
<kMinLinearMatch
) {
389 out
.reserveAppendCapacity(++node
);
390 getNextBranchUChars(pos
, node
, out
);
393 // First unit of the linear-match node.
394 out
.appendCodeUnit(*pos
);
400 UCharsTrie::getNextBranchUChars(const UChar
*pos
, int32_t length
, Appendable
&out
) {
401 while(length
>kMaxBranchLinearSubNodeLength
) {
402 ++pos
; // ignore the comparison unit
403 getNextBranchUChars(jumpByDelta(pos
), length
>>1, out
);
404 length
=length
-(length
>>1);
408 out
.appendCodeUnit(*pos
++);
411 out
.appendCodeUnit(*pos
);