2 *******************************************************************************
3 * Copyright (C) 2010-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: ucharstrieiterator.h
8 * tab size: 8 (not used)
11 * created on: 2010nov15
12 * created by: Markus W. Scherer
15 #include "unicode/utypes.h"
16 #include "unicode/ucharstrie.h"
17 #include "unicode/unistr.h"
22 UCharsTrie::Iterator::Iterator(const UChar
*trieUChars
, int32_t maxStringLength
,
23 UErrorCode
&errorCode
)
24 : uchars_(trieUChars
),
25 pos_(uchars_
), initialPos_(uchars_
),
26 remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
28 maxLength_(maxStringLength
), value_(0), stack_(NULL
) {
29 if(U_FAILURE(errorCode
)) {
32 // stack_ is a pointer so that it's easy to turn ucharstrie.h into
33 // a public API header for which we would want it to depend only on
34 // other public headers.
35 // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway
36 // via the UnicodeString and UVector32 implementations, so this additional
38 stack_
=new UVector32(errorCode
);
40 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
44 UCharsTrie::Iterator::Iterator(const UCharsTrie
&trie
, int32_t maxStringLength
,
45 UErrorCode
&errorCode
)
46 : uchars_(trie
.uchars_
), pos_(trie
.pos_
), initialPos_(trie
.pos_
),
47 remainingMatchLength_(trie
.remainingMatchLength_
),
48 initialRemainingMatchLength_(trie
.remainingMatchLength_
),
50 maxLength_(maxStringLength
), value_(0), stack_(NULL
) {
51 if(U_FAILURE(errorCode
)) {
54 stack_
=new UVector32(errorCode
);
55 if(U_FAILURE(errorCode
)) {
59 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
62 int32_t length
=remainingMatchLength_
; // Actual remaining match length minus 1.
64 // Pending linear-match node, append remaining UChars to str_.
66 if(maxLength_
>0 && length
>maxLength_
) {
67 length
=maxLength_
; // This will leave remainingMatchLength>=0 as a signal.
69 str_
.append(pos_
, length
);
71 remainingMatchLength_
-=length
;
75 UCharsTrie::Iterator::~Iterator() {
79 UCharsTrie::Iterator
&
80 UCharsTrie::Iterator::reset() {
82 remainingMatchLength_
=initialRemainingMatchLength_
;
84 int32_t length
=remainingMatchLength_
+1; // Remaining match length.
85 if(maxLength_
>0 && length
>maxLength_
) {
88 str_
.truncate(length
);
90 remainingMatchLength_
-=length
;
96 UCharsTrie::Iterator::hasNext() const { return pos_
!=NULL
|| !stack_
->isEmpty(); }
99 UCharsTrie::Iterator::next(UErrorCode
&errorCode
) {
100 if(U_FAILURE(errorCode
)) {
103 const UChar
*pos
=pos_
;
105 if(stack_
->isEmpty()) {
108 // Pop the state off the stack and continue with the next outbound edge of
110 int32_t stackSize
=stack_
->size();
111 int32_t length
=stack_
->elementAti(stackSize
-1);
112 pos
=uchars_
+stack_
->elementAti(stackSize
-2);
113 stack_
->setSize(stackSize
-2);
114 str_
.truncate(length
&0xffff);
115 length
=(int32_t)((uint32_t)length
>>16);
117 pos
=branchNext(pos
, length
, errorCode
);
119 return TRUE
; // Reached a final value.
125 if(remainingMatchLength_
>=0) {
126 // We only get here if we started in a pending linear-match node
127 // with more than maxLength remaining units.
128 return truncateAndStop();
132 if(node
>=kMinValueLead
) {
134 pos
=skipNodeValue(pos
, node
);
138 // Deliver value for the string so far.
139 UBool isFinal
=(UBool
)(node
>>15);
141 value_
=readValue(pos
, node
&0x7fff);
143 value_
=readNodeValue(pos
, node
);
145 if(isFinal
|| (maxLength_
>0 && str_
.length()==maxLength_
)) {
148 // We cannot skip the value right here because it shares its
149 // lead unit with a match node which we have to evaluate
151 // Instead, keep pos_ on the node lead unit itself.
158 if(maxLength_
>0 && str_
.length()==maxLength_
) {
159 return truncateAndStop();
161 if(node
<kMinLinearMatch
) {
165 pos
=branchNext(pos
, node
+1, errorCode
);
167 return TRUE
; // Reached a final value.
170 // Linear-match node, append length units to str_.
171 int32_t length
=node
-kMinLinearMatch
+1;
172 if(maxLength_
>0 && str_
.length()+length
>maxLength_
) {
173 str_
.append(pos
, maxLength_
-str_
.length());
174 return truncateAndStop();
176 str_
.append(pos
, length
);
182 // Branch node, needs to take the first outbound edge and push state for the rest.
184 UCharsTrie::Iterator::branchNext(const UChar
*pos
, int32_t length
, UErrorCode
&errorCode
) {
185 while(length
>kMaxBranchLinearSubNodeLength
) {
186 ++pos
; // ignore the comparison unit
187 // Push state for the greater-or-equal edge.
188 stack_
->addElement((int32_t)(skipDelta(pos
)-uchars_
), errorCode
);
189 stack_
->addElement(((length
-(length
>>1))<<16)|str_
.length(), errorCode
);
190 // Follow the less-than edge.
192 pos
=jumpByDelta(pos
);
194 // List of key-value pairs where values are either final values or jump deltas.
195 // Read the first (key, value) pair.
196 UChar trieUnit
=*pos
++;
198 UBool isFinal
=(UBool
)(node
>>15);
199 int32_t value
=readValue(pos
, node
&=0x7fff);
200 pos
=skipValue(pos
, node
);
201 stack_
->addElement((int32_t)(pos
-uchars_
), errorCode
);
202 stack_
->addElement(((length
-1)<<16)|str_
.length(), errorCode
);
203 str_
.append(trieUnit
);