1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2010-2012, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: bytestrieiterator.cpp
10 * tab size: 8 (not used)
13 * created on: 2010nov03
14 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
18 #include "unicode/bytestrie.h"
19 #include "unicode/stringpiece.h"
25 BytesTrie::Iterator::Iterator(const void *trieBytes
, int32_t maxStringLength
,
26 UErrorCode
&errorCode
)
27 : bytes_(static_cast<const uint8_t *>(trieBytes
)),
28 pos_(bytes_
), initialPos_(bytes_
),
29 remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
30 str_(NULL
), maxLength_(maxStringLength
), value_(0), stack_(NULL
) {
31 if(U_FAILURE(errorCode
)) {
34 // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
35 // a public API header for which we would want it to depend only on
36 // other public headers.
37 // Unlike BytesTrie itself, its Iterator performs memory allocations anyway
38 // via the CharString and UVector32 implementations, so this additional
40 str_
=new CharString();
41 stack_
=new UVector32(errorCode
);
42 if(U_SUCCESS(errorCode
) && (str_
==NULL
|| stack_
==NULL
)) {
43 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
47 BytesTrie::Iterator::Iterator(const BytesTrie
&trie
, int32_t maxStringLength
,
48 UErrorCode
&errorCode
)
49 : bytes_(trie
.bytes_
), pos_(trie
.pos_
), initialPos_(trie
.pos_
),
50 remainingMatchLength_(trie
.remainingMatchLength_
),
51 initialRemainingMatchLength_(trie
.remainingMatchLength_
),
52 str_(NULL
), maxLength_(maxStringLength
), value_(0), stack_(NULL
) {
53 if(U_FAILURE(errorCode
)) {
56 str_
=new CharString();
57 stack_
=new UVector32(errorCode
);
58 if(U_FAILURE(errorCode
)) {
61 if(str_
==NULL
|| stack_
==NULL
) {
62 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
65 int32_t length
=remainingMatchLength_
; // Actual remaining match length minus 1.
67 // Pending linear-match node, append remaining bytes to str_.
69 if(maxLength_
>0 && length
>maxLength_
) {
70 length
=maxLength_
; // This will leave remainingMatchLength>=0 as a signal.
72 str_
->append(reinterpret_cast<const char *>(pos_
), length
, errorCode
);
74 remainingMatchLength_
-=length
;
78 BytesTrie::Iterator::~Iterator() {
84 BytesTrie::Iterator::reset() {
86 remainingMatchLength_
=initialRemainingMatchLength_
;
87 int32_t length
=remainingMatchLength_
+1; // Remaining match length.
88 if(maxLength_
>0 && length
>maxLength_
) {
91 str_
->truncate(length
);
93 remainingMatchLength_
-=length
;
99 BytesTrie::Iterator::hasNext() const { return pos_
!=NULL
|| !stack_
->isEmpty(); }
102 BytesTrie::Iterator::next(UErrorCode
&errorCode
) {
103 if(U_FAILURE(errorCode
)) {
106 const uint8_t *pos
=pos_
;
108 if(stack_
->isEmpty()) {
111 // Pop the state off the stack and continue with the next outbound edge of
113 int32_t stackSize
=stack_
->size();
114 int32_t length
=stack_
->elementAti(stackSize
-1);
115 pos
=bytes_
+stack_
->elementAti(stackSize
-2);
116 stack_
->setSize(stackSize
-2);
117 str_
->truncate(length
&0xffff);
118 length
=(int32_t)((uint32_t)length
>>16);
120 pos
=branchNext(pos
, length
, errorCode
);
122 return TRUE
; // Reached a final value.
125 str_
->append((char)*pos
++, errorCode
);
128 if(remainingMatchLength_
>=0) {
129 // We only get here if we started in a pending linear-match node
130 // with more than maxLength remaining bytes.
131 return truncateAndStop();
135 if(node
>=kMinValueLead
) {
136 // Deliver value for the byte sequence so far.
137 UBool isFinal
=(UBool
)(node
&kValueIsFinal
);
138 value_
=readValue(pos
, node
>>1);
139 if(isFinal
|| (maxLength_
>0 && str_
->length()==maxLength_
)) {
142 pos_
=skipValue(pos
, node
);
146 if(maxLength_
>0 && str_
->length()==maxLength_
) {
147 return truncateAndStop();
149 if(node
<kMinLinearMatch
) {
153 pos
=branchNext(pos
, node
+1, errorCode
);
155 return TRUE
; // Reached a final value.
158 // Linear-match node, append length bytes to str_.
159 int32_t length
=node
-kMinLinearMatch
+1;
160 if(maxLength_
>0 && str_
->length()+length
>maxLength_
) {
161 str_
->append(reinterpret_cast<const char *>(pos
),
162 maxLength_
-str_
->length(), errorCode
);
163 return truncateAndStop();
165 str_
->append(reinterpret_cast<const char *>(pos
), length
, errorCode
);
172 BytesTrie::Iterator::getString() const {
173 return str_
== NULL
? StringPiece() : str_
->toStringPiece();
177 BytesTrie::Iterator::truncateAndStop() {
179 value_
=-1; // no real value for str
183 // Branch node, needs to take the first outbound edge and push state for the rest.
185 BytesTrie::Iterator::branchNext(const uint8_t *pos
, int32_t length
, UErrorCode
&errorCode
) {
186 while(length
>kMaxBranchLinearSubNodeLength
) {
187 ++pos
; // ignore the comparison byte
188 // Push state for the greater-or-equal edge.
189 stack_
->addElement((int32_t)(skipDelta(pos
)-bytes_
), errorCode
);
190 stack_
->addElement(((length
-(length
>>1))<<16)|str_
->length(), errorCode
);
191 // Follow the less-than edge.
193 pos
=jumpByDelta(pos
);
195 // List of key-value pairs where values are either final values or jump deltas.
196 // Read the first (key, value) pair.
197 uint8_t trieByte
=*pos
++;
199 UBool isFinal
=(UBool
)(node
&kValueIsFinal
);
200 int32_t value
=readValue(pos
, node
>>1);
201 pos
=skipValue(pos
, node
);
202 stack_
->addElement((int32_t)(pos
-bytes_
), errorCode
);
203 stack_
->addElement(((length
-1)<<16)|str_
->length(), errorCode
);
204 str_
->append((char)trieByte
, errorCode
);