1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2010-2011, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: ucharstrieiterator.h
10 * tab size: 8 (not used)
13 * created on: 2010nov15
14 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
18 #include "unicode/ucharstrie.h"
19 #include "unicode/unistr.h"
24 UCharsTrie::Iterator::Iterator(ConstChar16Ptr trieUChars
, int32_t maxStringLength
,
25 UErrorCode
&errorCode
)
26 : uchars_(trieUChars
),
27 pos_(uchars_
), initialPos_(uchars_
),
28 remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
30 maxLength_(maxStringLength
), value_(0), stack_(NULL
) {
31 if(U_FAILURE(errorCode
)) {
34 // stack_ is a pointer so that it's easy to turn ucharstrie.h into
35 // a public API header for which we would want it to depend only on
36 // other public headers.
37 // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway
38 // via the UnicodeString and UVector32 implementations, so this additional
40 stack_
=new UVector32(errorCode
);
42 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
46 UCharsTrie::Iterator::Iterator(const UCharsTrie
&trie
, int32_t maxStringLength
,
47 UErrorCode
&errorCode
)
48 : uchars_(trie
.uchars_
), pos_(trie
.pos_
), initialPos_(trie
.pos_
),
49 remainingMatchLength_(trie
.remainingMatchLength_
),
50 initialRemainingMatchLength_(trie
.remainingMatchLength_
),
52 maxLength_(maxStringLength
), value_(0), stack_(NULL
) {
53 if(U_FAILURE(errorCode
)) {
56 stack_
=new UVector32(errorCode
);
57 if(U_FAILURE(errorCode
)) {
61 errorCode
=U_MEMORY_ALLOCATION_ERROR
;
64 int32_t length
=remainingMatchLength_
; // Actual remaining match length minus 1.
66 // Pending linear-match node, append remaining UChars to str_.
68 if(maxLength_
>0 && length
>maxLength_
) {
69 length
=maxLength_
; // This will leave remainingMatchLength>=0 as a signal.
71 str_
.append(pos_
, length
);
73 remainingMatchLength_
-=length
;
77 UCharsTrie::Iterator::~Iterator() {
81 UCharsTrie::Iterator
&
82 UCharsTrie::Iterator::reset() {
84 remainingMatchLength_
=initialRemainingMatchLength_
;
86 int32_t length
=remainingMatchLength_
+1; // Remaining match length.
87 if(maxLength_
>0 && length
>maxLength_
) {
90 str_
.truncate(length
);
92 remainingMatchLength_
-=length
;
98 UCharsTrie::Iterator::hasNext() const { return pos_
!=NULL
|| !stack_
->isEmpty(); }
101 UCharsTrie::Iterator::next(UErrorCode
&errorCode
) {
102 if(U_FAILURE(errorCode
)) {
105 const UChar
*pos
=pos_
;
107 if(stack_
->isEmpty()) {
110 // Pop the state off the stack and continue with the next outbound edge of
112 int32_t stackSize
=stack_
->size();
113 int32_t length
=stack_
->elementAti(stackSize
-1);
114 pos
=uchars_
+stack_
->elementAti(stackSize
-2);
115 stack_
->setSize(stackSize
-2);
116 str_
.truncate(length
&0xffff);
117 length
=(int32_t)((uint32_t)length
>>16);
119 pos
=branchNext(pos
, length
, errorCode
);
121 return TRUE
; // Reached a final value.
127 if(remainingMatchLength_
>=0) {
128 // We only get here if we started in a pending linear-match node
129 // with more than maxLength remaining units.
130 return truncateAndStop();
134 if(node
>=kMinValueLead
) {
136 pos
=skipNodeValue(pos
, node
);
140 // Deliver value for the string so far.
141 UBool isFinal
=(UBool
)(node
>>15);
143 value_
=readValue(pos
, node
&0x7fff);
145 value_
=readNodeValue(pos
, node
);
147 if(isFinal
|| (maxLength_
>0 && str_
.length()==maxLength_
)) {
150 // We cannot skip the value right here because it shares its
151 // lead unit with a match node which we have to evaluate
153 // Instead, keep pos_ on the node lead unit itself.
160 if(maxLength_
>0 && str_
.length()==maxLength_
) {
161 return truncateAndStop();
163 if(node
<kMinLinearMatch
) {
167 pos
=branchNext(pos
, node
+1, errorCode
);
169 return TRUE
; // Reached a final value.
172 // Linear-match node, append length units to str_.
173 int32_t length
=node
-kMinLinearMatch
+1;
174 if(maxLength_
>0 && str_
.length()+length
>maxLength_
) {
175 str_
.append(pos
, maxLength_
-str_
.length());
176 return truncateAndStop();
178 str_
.append(pos
, length
);
184 // Branch node, needs to take the first outbound edge and push state for the rest.
186 UCharsTrie::Iterator::branchNext(const UChar
*pos
, int32_t length
, UErrorCode
&errorCode
) {
187 while(length
>kMaxBranchLinearSubNodeLength
) {
188 ++pos
; // ignore the comparison unit
189 // Push state for the greater-or-equal edge.
190 stack_
->addElement((int32_t)(skipDelta(pos
)-uchars_
), errorCode
);
191 stack_
->addElement(((length
-(length
>>1))<<16)|str_
.length(), errorCode
);
192 // Follow the less-than edge.
194 pos
=jumpByDelta(pos
);
196 // List of key-value pairs where values are either final values or jump deltas.
197 // Read the first (key, value) pair.
198 UChar trieUnit
=*pos
++;
200 UBool isFinal
=(UBool
)(node
>>15);
201 int32_t value
=readValue(pos
, node
&=0x7fff);
202 pos
=skipValue(pos
, node
);
203 stack_
->addElement((int32_t)(pos
-uchars_
), errorCode
);
204 stack_
->addElement(((length
-1)<<16)|str_
.length(), errorCode
);
205 str_
.append(trieUnit
);