2 *******************************************************************************
3 * Copyright (C) 2010-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: bytestrieiterator.cpp
8 * tab size: 8 (not used)
11 * created on: 2010nov03
12 * created by: Markus W. Scherer
15 #include "unicode/utypes.h"
16 #include "unicode/bytestrie.h"
17 #include "unicode/stringpiece.h"
23 BytesTrie::Iterator::Iterator(const void *trieBytes
, int32_t maxStringLength
,
24 UErrorCode
&errorCode
)
25 : bytes_(static_cast<const uint8_t *>(trieBytes
)),
26 pos_(bytes_
), initialPos_(bytes_
),
27 remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
28 str_(NULL
), maxLength_(maxStringLength
), value_(0), stack_(NULL
) {
29 if(U_FAILURE(errorCode
)) {
32 // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
33 // a public API header for which we would want it to depend only on
34 // other public headers.
35 // Unlike BytesTrie itself, its Iterator performs memory allocations anyway
36 // via the CharString and UVector32 implementations, so this additional
38 str_
=new CharString();
39 stack_
=new UVector32(errorCode
);
40 if(U_SUCCESS(errorCode
) && (str_
==NULL
|| stack_
==NULL
)) {
41 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
45 BytesTrie::Iterator::Iterator(const BytesTrie
&trie
, int32_t maxStringLength
,
46 UErrorCode
&errorCode
)
47 : bytes_(trie
.bytes_
), pos_(trie
.pos_
), initialPos_(trie
.pos_
),
48 remainingMatchLength_(trie
.remainingMatchLength_
),
49 initialRemainingMatchLength_(trie
.remainingMatchLength_
),
50 str_(NULL
), maxLength_(maxStringLength
), value_(0), stack_(NULL
) {
51 if(U_FAILURE(errorCode
)) {
54 str_
=new CharString();
55 stack_
=new UVector32(errorCode
);
56 if(U_FAILURE(errorCode
)) {
59 if(str_
==NULL
|| stack_
==NULL
) {
60 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
63 int32_t length
=remainingMatchLength_
; // Actual remaining match length minus 1.
65 // Pending linear-match node, append remaining bytes to str_.
67 if(maxLength_
>0 && length
>maxLength_
) {
68 length
=maxLength_
; // This will leave remainingMatchLength>=0 as a signal.
70 str_
->append(reinterpret_cast<const char *>(pos_
), length
, errorCode
);
72 remainingMatchLength_
-=length
;
76 BytesTrie::Iterator::~Iterator() {
82 BytesTrie::Iterator::reset() {
84 remainingMatchLength_
=initialRemainingMatchLength_
;
85 int32_t length
=remainingMatchLength_
+1; // Remaining match length.
86 if(maxLength_
>0 && length
>maxLength_
) {
89 str_
->truncate(length
);
91 remainingMatchLength_
-=length
;
97 BytesTrie::Iterator::hasNext() const { return pos_
!=NULL
|| !stack_
->isEmpty(); }
100 BytesTrie::Iterator::next(UErrorCode
&errorCode
) {
101 if(U_FAILURE(errorCode
)) {
104 const uint8_t *pos
=pos_
;
106 if(stack_
->isEmpty()) {
109 // Pop the state off the stack and continue with the next outbound edge of
111 int32_t stackSize
=stack_
->size();
112 int32_t length
=stack_
->elementAti(stackSize
-1);
113 pos
=bytes_
+stack_
->elementAti(stackSize
-2);
114 stack_
->setSize(stackSize
-2);
115 str_
->truncate(length
&0xffff);
116 length
=(int32_t)((uint32_t)length
>>16);
118 pos
=branchNext(pos
, length
, errorCode
);
120 return TRUE
; // Reached a final value.
123 str_
->append((char)*pos
++, errorCode
);
126 if(remainingMatchLength_
>=0) {
127 // We only get here if we started in a pending linear-match node
128 // with more than maxLength remaining bytes.
129 return truncateAndStop();
133 if(node
>=kMinValueLead
) {
134 // Deliver value for the byte sequence so far.
135 UBool isFinal
=(UBool
)(node
&kValueIsFinal
);
136 value_
=readValue(pos
, node
>>1);
137 if(isFinal
|| (maxLength_
>0 && str_
->length()==maxLength_
)) {
140 pos_
=skipValue(pos
, node
);
142 sp_
.set(str_
->data(), str_
->length());
145 if(maxLength_
>0 && str_
->length()==maxLength_
) {
146 return truncateAndStop();
148 if(node
<kMinLinearMatch
) {
152 pos
=branchNext(pos
, node
+1, errorCode
);
154 return TRUE
; // Reached a final value.
157 // Linear-match node, append length bytes to str_.
158 int32_t length
=node
-kMinLinearMatch
+1;
159 if(maxLength_
>0 && str_
->length()+length
>maxLength_
) {
160 str_
->append(reinterpret_cast<const char *>(pos
),
161 maxLength_
-str_
->length(), errorCode
);
162 return truncateAndStop();
164 str_
->append(reinterpret_cast<const char *>(pos
), length
, errorCode
);
171 BytesTrie::Iterator::truncateAndStop() {
173 sp_
.set(str_
->data(), str_
->length());
174 value_
=-1; // no real value for str
178 // Branch node, needs to take the first outbound edge and push state for the rest.
180 BytesTrie::Iterator::branchNext(const uint8_t *pos
, int32_t length
, UErrorCode
&errorCode
) {
181 while(length
>kMaxBranchLinearSubNodeLength
) {
182 ++pos
; // ignore the comparison byte
183 // Push state for the greater-or-equal edge.
184 stack_
->addElement((int32_t)(skipDelta(pos
)-bytes_
), errorCode
);
185 stack_
->addElement(((length
-(length
>>1))<<16)|str_
->length(), errorCode
);
186 // Follow the less-than edge.
188 pos
=jumpByDelta(pos
);
190 // List of key-value pairs where values are either final values or jump deltas.
191 // Read the first (key, value) pair.
192 uint8_t trieByte
=*pos
++;
194 UBool isFinal
=(UBool
)(node
&kValueIsFinal
);
195 int32_t value
=readValue(pos
, node
>>1);
196 pos
=skipValue(pos
, node
);
197 stack_
->addElement((int32_t)(pos
-bytes_
), errorCode
);
198 stack_
->addElement(((length
-1)<<16)|str_
->length(), errorCode
);
199 str_
->append((char)trieByte
, errorCode
);
202 sp_
.set(str_
->data(), str_
->length());