1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2010-2011, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: bytestrie.cpp
10 * tab size: 8 (not used)
13 * created on: 2010sep25
14 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
18 #include "unicode/bytestream.h"
19 #include "unicode/bytestrie.h"
20 #include "unicode/uobject.h"
26 BytesTrie::~BytesTrie() {
27 uprv_free(ownedArray_
);
30 // lead byte already shifted right by 1.
32 BytesTrie::readValue(const uint8_t *pos
, int32_t leadByte
) {
34 if(leadByte
<kMinTwoByteValueLead
) {
35 value
=leadByte
-kMinOneByteValueLead
;
36 } else if(leadByte
<kMinThreeByteValueLead
) {
37 value
=((leadByte
-kMinTwoByteValueLead
)<<8)|*pos
;
38 } else if(leadByte
<kFourByteValueLead
) {
39 value
=((leadByte
-kMinThreeByteValueLead
)<<16)|(pos
[0]<<8)|pos
[1];
40 } else if(leadByte
==kFourByteValueLead
) {
41 value
=(pos
[0]<<16)|(pos
[1]<<8)|pos
[2];
43 value
=(pos
[0]<<24)|(pos
[1]<<16)|(pos
[2]<<8)|pos
[3];
49 BytesTrie::jumpByDelta(const uint8_t *pos
) {
51 if(delta
<kMinTwoByteDeltaLead
) {
53 } else if(delta
<kMinThreeByteDeltaLead
) {
54 delta
=((delta
-kMinTwoByteDeltaLead
)<<8)|*pos
++;
55 } else if(delta
<kFourByteDeltaLead
) {
56 delta
=((delta
-kMinThreeByteDeltaLead
)<<16)|(pos
[0]<<8)|pos
[1];
58 } else if(delta
==kFourByteDeltaLead
) {
59 delta
=(pos
[0]<<16)|(pos
[1]<<8)|pos
[2];
62 delta
=(pos
[0]<<24)|(pos
[1]<<16)|(pos
[2]<<8)|pos
[3];
69 BytesTrie::current() const {
70 const uint8_t *pos
=pos_
;
72 return USTRINGTRIE_NO_MATCH
;
75 return (remainingMatchLength_
<0 && (node
=*pos
)>=kMinValueLead
) ?
76 valueResult(node
) : USTRINGTRIE_NO_VALUE
;
81 BytesTrie::branchNext(const uint8_t *pos
, int32_t length
, int32_t inByte
) {
82 // Branch according to the current byte.
87 // The length of the branch is the number of bytes to select from.
88 // The data structure encodes a binary search.
89 while(length
>kMaxBranchLinearSubNodeLength
) {
94 length
=length
-(length
>>1);
98 // Drop down to linear search for the last few bytes.
99 // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
100 // and divides length by 2.
103 UStringTrieResult result
;
105 U_ASSERT(node
>=kMinValueLead
);
106 if(node
&kValueIsFinal
) {
107 // Leave the final value for getValue() to read.
108 result
=USTRINGTRIE_FINAL_VALUE
;
110 // Use the non-final value as the jump delta.
112 // int32_t delta=readValue(pos, node>>1);
115 if(node
<kMinTwoByteValueLead
) {
116 delta
=node
-kMinOneByteValueLead
;
117 } else if(node
<kMinThreeByteValueLead
) {
118 delta
=((node
-kMinTwoByteValueLead
)<<8)|*pos
++;
119 } else if(node
<kFourByteValueLead
) {
120 delta
=((node
-kMinThreeByteValueLead
)<<16)|(pos
[0]<<8)|pos
[1];
122 } else if(node
==kFourByteValueLead
) {
123 delta
=(pos
[0]<<16)|(pos
[1]<<8)|pos
[2];
126 delta
=(pos
[0]<<24)|(pos
[1]<<16)|(pos
[2]<<8)|pos
[3];
132 result
= node
>=kMinValueLead
? valueResult(node
) : USTRINGTRIE_NO_VALUE
;
143 return node
>=kMinValueLead
? valueResult(node
) : USTRINGTRIE_NO_VALUE
;
146 return USTRINGTRIE_NO_MATCH
;
151 BytesTrie::nextImpl(const uint8_t *pos
, int32_t inByte
) {
154 if(node
<kMinLinearMatch
) {
155 return branchNext(pos
, node
, inByte
);
156 } else if(node
<kMinValueLead
) {
157 // Match the first of length+1 bytes.
158 int32_t length
=node
-kMinLinearMatch
; // Actual match length minus 1.
160 remainingMatchLength_
=--length
;
162 return (length
<0 && (node
=*pos
)>=kMinValueLead
) ?
163 valueResult(node
) : USTRINGTRIE_NO_VALUE
;
168 } else if(node
&kValueIsFinal
) {
169 // No further matching bytes.
172 // Skip intermediate value.
173 pos
=skipValue(pos
, node
);
174 // The next node must not also be a value node.
175 U_ASSERT(*pos
<kMinValueLead
);
179 return USTRINGTRIE_NO_MATCH
;
183 BytesTrie::next(int32_t inByte
) {
184 const uint8_t *pos
=pos_
;
186 return USTRINGTRIE_NO_MATCH
;
191 int32_t length
=remainingMatchLength_
; // Actual remaining match length minus 1.
193 // Remaining part of a linear-match node.
195 remainingMatchLength_
=--length
;
198 return (length
<0 && (node
=*pos
)>=kMinValueLead
) ?
199 valueResult(node
) : USTRINGTRIE_NO_VALUE
;
202 return USTRINGTRIE_NO_MATCH
;
205 return nextImpl(pos
, inByte
);
209 BytesTrie::next(const char *s
, int32_t sLength
) {
210 if(sLength
<0 ? *s
==0 : sLength
==0) {
214 const uint8_t *pos
=pos_
;
216 return USTRINGTRIE_NO_MATCH
;
218 int32_t length
=remainingMatchLength_
; // Actual remaining match length minus 1.
220 // Fetch the next input byte, if there is one.
221 // Continue a linear-match node without rechecking sLength<0.
225 if((inByte
=*s
++)==0) {
226 remainingMatchLength_
=length
;
229 return (length
<0 && (node
=*pos
)>=kMinValueLead
) ?
230 valueResult(node
) : USTRINGTRIE_NO_VALUE
;
233 remainingMatchLength_
=length
;
238 return USTRINGTRIE_NO_MATCH
;
246 remainingMatchLength_
=length
;
249 return (length
<0 && (node
=*pos
)>=kMinValueLead
) ?
250 valueResult(node
) : USTRINGTRIE_NO_VALUE
;
255 remainingMatchLength_
=length
;
260 return USTRINGTRIE_NO_MATCH
;
268 if(node
<kMinLinearMatch
) {
269 UStringTrieResult result
=branchNext(pos
, node
, inByte
);
270 if(result
==USTRINGTRIE_NO_MATCH
) {
271 return USTRINGTRIE_NO_MATCH
;
273 // Fetch the next input byte, if there is one.
275 if((inByte
=*s
++)==0) {
285 if(result
==USTRINGTRIE_FINAL_VALUE
) {
286 // No further matching bytes.
288 return USTRINGTRIE_NO_MATCH
;
290 pos
=pos_
; // branchNext() advanced pos and wrote it to pos_ .
291 } else if(node
<kMinValueLead
) {
292 // Match length+1 bytes.
293 length
=node
-kMinLinearMatch
; // Actual match length minus 1.
296 return USTRINGTRIE_NO_MATCH
;
301 } else if(node
&kValueIsFinal
) {
302 // No further matching bytes.
304 return USTRINGTRIE_NO_MATCH
;
306 // Skip intermediate value.
307 pos
=skipValue(pos
, node
);
308 // The next node must not also be a value node.
309 U_ASSERT(*pos
<kMinValueLead
);
316 BytesTrie::findUniqueValueFromBranch(const uint8_t *pos
, int32_t length
,
317 UBool haveUniqueValue
, int32_t &uniqueValue
) {
318 while(length
>kMaxBranchLinearSubNodeLength
) {
319 ++pos
; // ignore the comparison byte
320 if(NULL
==findUniqueValueFromBranch(jumpByDelta(pos
), length
>>1, haveUniqueValue
, uniqueValue
)) {
323 length
=length
-(length
>>1);
327 ++pos
; // ignore a comparison byte
330 UBool isFinal
=(UBool
)(node
&kValueIsFinal
);
331 int32_t value
=readValue(pos
, node
>>1);
332 pos
=skipValue(pos
, node
);
334 if(haveUniqueValue
) {
335 if(value
!=uniqueValue
) {
340 haveUniqueValue
=TRUE
;
343 if(!findUniqueValue(pos
+value
, haveUniqueValue
, uniqueValue
)) {
346 haveUniqueValue
=TRUE
;
349 return pos
+1; // ignore the last comparison byte
353 BytesTrie::findUniqueValue(const uint8_t *pos
, UBool haveUniqueValue
, int32_t &uniqueValue
) {
356 if(node
<kMinLinearMatch
) {
360 pos
=findUniqueValueFromBranch(pos
, node
+1, haveUniqueValue
, uniqueValue
);
364 haveUniqueValue
=TRUE
;
365 } else if(node
<kMinValueLead
) {
367 pos
+=node
-kMinLinearMatch
+1; // Ignore the match bytes.
369 UBool isFinal
=(UBool
)(node
&kValueIsFinal
);
370 int32_t value
=readValue(pos
, node
>>1);
371 if(haveUniqueValue
) {
372 if(value
!=uniqueValue
) {
377 haveUniqueValue
=TRUE
;
382 pos
=skipValue(pos
, node
);
388 BytesTrie::getNextBytes(ByteSink
&out
) const {
389 const uint8_t *pos
=pos_
;
393 if(remainingMatchLength_
>=0) {
394 append(out
, *pos
); // Next byte of a pending linear-match node.
398 if(node
>=kMinValueLead
) {
399 if(node
&kValueIsFinal
) {
402 pos
=skipValue(pos
, node
);
404 U_ASSERT(node
<kMinValueLead
);
407 if(node
<kMinLinearMatch
) {
411 getNextBranchBytes(pos
, ++node
, out
);
414 // First byte of the linear-match node.
421 BytesTrie::getNextBranchBytes(const uint8_t *pos
, int32_t length
, ByteSink
&out
) {
422 while(length
>kMaxBranchLinearSubNodeLength
) {
423 ++pos
; // ignore the comparison byte
424 getNextBranchBytes(jumpByDelta(pos
), length
>>1, out
);
425 length
=length
-(length
>>1);
436 BytesTrie::append(ByteSink
&out
, int c
) {