2 *******************************************************************************
3 * Copyright (C) 2010-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: bytestrie.cpp
8 * tab size: 8 (not used)
11 * created on: 2010sep25
12 * created by: Markus W. Scherer
15 #include "unicode/utypes.h"
16 #include "unicode/bytestream.h"
17 #include "unicode/bytestrie.h"
18 #include "unicode/uobject.h"
24 BytesTrie::~BytesTrie() {
25 uprv_free(ownedArray_
);
28 // lead byte already shifted right by 1.
30 BytesTrie::readValue(const uint8_t *pos
, int32_t leadByte
) {
32 if(leadByte
<kMinTwoByteValueLead
) {
33 value
=leadByte
-kMinOneByteValueLead
;
34 } else if(leadByte
<kMinThreeByteValueLead
) {
35 value
=((leadByte
-kMinTwoByteValueLead
)<<8)|*pos
;
36 } else if(leadByte
<kFourByteValueLead
) {
37 value
=((leadByte
-kMinThreeByteValueLead
)<<16)|(pos
[0]<<8)|pos
[1];
38 } else if(leadByte
==kFourByteValueLead
) {
39 value
=(pos
[0]<<16)|(pos
[1]<<8)|pos
[2];
41 value
=(pos
[0]<<24)|(pos
[1]<<16)|(pos
[2]<<8)|pos
[3];
47 BytesTrie::jumpByDelta(const uint8_t *pos
) {
49 if(delta
<kMinTwoByteDeltaLead
) {
51 } else if(delta
<kMinThreeByteDeltaLead
) {
52 delta
=((delta
-kMinTwoByteDeltaLead
)<<8)|*pos
++;
53 } else if(delta
<kFourByteDeltaLead
) {
54 delta
=((delta
-kMinThreeByteDeltaLead
)<<16)|(pos
[0]<<8)|pos
[1];
56 } else if(delta
==kFourByteDeltaLead
) {
57 delta
=(pos
[0]<<16)|(pos
[1]<<8)|pos
[2];
60 delta
=(pos
[0]<<24)|(pos
[1]<<16)|(pos
[2]<<8)|pos
[3];
67 BytesTrie::current() const {
68 const uint8_t *pos
=pos_
;
70 return USTRINGTRIE_NO_MATCH
;
73 return (remainingMatchLength_
<0 && (node
=*pos
)>=kMinValueLead
) ?
74 valueResult(node
) : USTRINGTRIE_NO_VALUE
;
79 BytesTrie::branchNext(const uint8_t *pos
, int32_t length
, int32_t inByte
) {
80 // Branch according to the current byte.
85 // The length of the branch is the number of bytes to select from.
86 // The data structure encodes a binary search.
87 while(length
>kMaxBranchLinearSubNodeLength
) {
92 length
=length
-(length
>>1);
96 // Drop down to linear search for the last few bytes.
97 // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
98 // and divides length by 2.
101 UStringTrieResult result
;
103 U_ASSERT(node
>=kMinValueLead
);
104 if(node
&kValueIsFinal
) {
105 // Leave the final value for getValue() to read.
106 result
=USTRINGTRIE_FINAL_VALUE
;
108 // Use the non-final value as the jump delta.
110 // int32_t delta=readValue(pos, node>>1);
113 if(node
<kMinTwoByteValueLead
) {
114 delta
=node
-kMinOneByteValueLead
;
115 } else if(node
<kMinThreeByteValueLead
) {
116 delta
=((node
-kMinTwoByteValueLead
)<<8)|*pos
++;
117 } else if(node
<kFourByteValueLead
) {
118 delta
=((node
-kMinThreeByteValueLead
)<<16)|(pos
[0]<<8)|pos
[1];
120 } else if(node
==kFourByteValueLead
) {
121 delta
=(pos
[0]<<16)|(pos
[1]<<8)|pos
[2];
124 delta
=(pos
[0]<<24)|(pos
[1]<<16)|(pos
[2]<<8)|pos
[3];
130 result
= node
>=kMinValueLead
? valueResult(node
) : USTRINGTRIE_NO_VALUE
;
141 return node
>=kMinValueLead
? valueResult(node
) : USTRINGTRIE_NO_VALUE
;
144 return USTRINGTRIE_NO_MATCH
;
149 BytesTrie::nextImpl(const uint8_t *pos
, int32_t inByte
) {
152 if(node
<kMinLinearMatch
) {
153 return branchNext(pos
, node
, inByte
);
154 } else if(node
<kMinValueLead
) {
155 // Match the first of length+1 bytes.
156 int32_t length
=node
-kMinLinearMatch
; // Actual match length minus 1.
158 remainingMatchLength_
=--length
;
160 return (length
<0 && (node
=*pos
)>=kMinValueLead
) ?
161 valueResult(node
) : USTRINGTRIE_NO_VALUE
;
166 } else if(node
&kValueIsFinal
) {
167 // No further matching bytes.
170 // Skip intermediate value.
171 pos
=skipValue(pos
, node
);
172 // The next node must not also be a value node.
173 U_ASSERT(*pos
<kMinValueLead
);
177 return USTRINGTRIE_NO_MATCH
;
181 BytesTrie::next(int32_t inByte
) {
182 const uint8_t *pos
=pos_
;
184 return USTRINGTRIE_NO_MATCH
;
189 int32_t length
=remainingMatchLength_
; // Actual remaining match length minus 1.
191 // Remaining part of a linear-match node.
193 remainingMatchLength_
=--length
;
196 return (length
<0 && (node
=*pos
)>=kMinValueLead
) ?
197 valueResult(node
) : USTRINGTRIE_NO_VALUE
;
200 return USTRINGTRIE_NO_MATCH
;
203 return nextImpl(pos
, inByte
);
207 BytesTrie::next(const char *s
, int32_t sLength
) {
208 if(sLength
<0 ? *s
==0 : sLength
==0) {
212 const uint8_t *pos
=pos_
;
214 return USTRINGTRIE_NO_MATCH
;
216 int32_t length
=remainingMatchLength_
; // Actual remaining match length minus 1.
218 // Fetch the next input byte, if there is one.
219 // Continue a linear-match node without rechecking sLength<0.
223 if((inByte
=*s
++)==0) {
224 remainingMatchLength_
=length
;
227 return (length
<0 && (node
=*pos
)>=kMinValueLead
) ?
228 valueResult(node
) : USTRINGTRIE_NO_VALUE
;
231 remainingMatchLength_
=length
;
236 return USTRINGTRIE_NO_MATCH
;
244 remainingMatchLength_
=length
;
247 return (length
<0 && (node
=*pos
)>=kMinValueLead
) ?
248 valueResult(node
) : USTRINGTRIE_NO_VALUE
;
253 remainingMatchLength_
=length
;
258 return USTRINGTRIE_NO_MATCH
;
266 if(node
<kMinLinearMatch
) {
267 UStringTrieResult result
=branchNext(pos
, node
, inByte
);
268 if(result
==USTRINGTRIE_NO_MATCH
) {
269 return USTRINGTRIE_NO_MATCH
;
271 // Fetch the next input byte, if there is one.
273 if((inByte
=*s
++)==0) {
283 if(result
==USTRINGTRIE_FINAL_VALUE
) {
284 // No further matching bytes.
286 return USTRINGTRIE_NO_MATCH
;
288 pos
=pos_
; // branchNext() advanced pos and wrote it to pos_ .
289 } else if(node
<kMinValueLead
) {
290 // Match length+1 bytes.
291 length
=node
-kMinLinearMatch
; // Actual match length minus 1.
294 return USTRINGTRIE_NO_MATCH
;
299 } else if(node
&kValueIsFinal
) {
300 // No further matching bytes.
302 return USTRINGTRIE_NO_MATCH
;
304 // Skip intermediate value.
305 pos
=skipValue(pos
, node
);
306 // The next node must not also be a value node.
307 U_ASSERT(*pos
<kMinValueLead
);
314 BytesTrie::findUniqueValueFromBranch(const uint8_t *pos
, int32_t length
,
315 UBool haveUniqueValue
, int32_t &uniqueValue
) {
316 while(length
>kMaxBranchLinearSubNodeLength
) {
317 ++pos
; // ignore the comparison byte
318 if(NULL
==findUniqueValueFromBranch(jumpByDelta(pos
), length
>>1, haveUniqueValue
, uniqueValue
)) {
321 length
=length
-(length
>>1);
325 ++pos
; // ignore a comparison byte
328 UBool isFinal
=(UBool
)(node
&kValueIsFinal
);
329 int32_t value
=readValue(pos
, node
>>1);
330 pos
=skipValue(pos
, node
);
332 if(haveUniqueValue
) {
333 if(value
!=uniqueValue
) {
338 haveUniqueValue
=TRUE
;
341 if(!findUniqueValue(pos
+value
, haveUniqueValue
, uniqueValue
)) {
344 haveUniqueValue
=TRUE
;
347 return pos
+1; // ignore the last comparison byte
351 BytesTrie::findUniqueValue(const uint8_t *pos
, UBool haveUniqueValue
, int32_t &uniqueValue
) {
354 if(node
<kMinLinearMatch
) {
358 pos
=findUniqueValueFromBranch(pos
, node
+1, haveUniqueValue
, uniqueValue
);
362 haveUniqueValue
=TRUE
;
363 } else if(node
<kMinValueLead
) {
365 pos
+=node
-kMinLinearMatch
+1; // Ignore the match bytes.
367 UBool isFinal
=(UBool
)(node
&kValueIsFinal
);
368 int32_t value
=readValue(pos
, node
>>1);
369 if(haveUniqueValue
) {
370 if(value
!=uniqueValue
) {
375 haveUniqueValue
=TRUE
;
380 pos
=skipValue(pos
, node
);
386 BytesTrie::getNextBytes(ByteSink
&out
) const {
387 const uint8_t *pos
=pos_
;
391 if(remainingMatchLength_
>=0) {
392 append(out
, *pos
); // Next byte of a pending linear-match node.
396 if(node
>=kMinValueLead
) {
397 if(node
&kValueIsFinal
) {
400 pos
=skipValue(pos
, node
);
402 U_ASSERT(node
<kMinValueLead
);
405 if(node
<kMinLinearMatch
) {
409 getNextBranchBytes(pos
, ++node
, out
);
412 // First byte of the linear-match node.
419 BytesTrie::getNextBranchBytes(const uint8_t *pos
, int32_t length
, ByteSink
&out
) {
420 while(length
>kMaxBranchLinearSubNodeLength
) {
421 ++pos
; // ignore the comparison byte
422 getNextBranchBytes(jumpByDelta(pos
), length
>>1, out
);
423 length
=length
-(length
>>1);
434 BytesTrie::append(ByteSink
&out
, int c
) {