]>
Commit | Line | Data |
---|---|---|
4388f060 A |
1 | /* |
2 | ******************************************************************************* | |
51004dcb | 3 | * Copyright (C) 2010-2012, International Business Machines |
4388f060 A |
4 | * Corporation and others. All Rights Reserved. |
5 | ******************************************************************************* | |
6 | * file name: bytestrieiterator.cpp | |
7 | * encoding: US-ASCII | |
8 | * tab size: 8 (not used) | |
9 | * indentation:4 | |
10 | * | |
11 | * created on: 2010nov03 | |
12 | * created by: Markus W. Scherer | |
13 | */ | |
14 | ||
15 | #include "unicode/utypes.h" | |
16 | #include "unicode/bytestrie.h" | |
17 | #include "unicode/stringpiece.h" | |
18 | #include "charstr.h" | |
19 | #include "uvectr32.h" | |
20 | ||
21 | U_NAMESPACE_BEGIN | |
22 | ||
23 | BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength, | |
24 | UErrorCode &errorCode) | |
51004dcb | 25 | : bytes_(static_cast<const uint8_t *>(trieBytes)), |
4388f060 A |
26 | pos_(bytes_), initialPos_(bytes_), |
27 | remainingMatchLength_(-1), initialRemainingMatchLength_(-1), | |
28 | str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { | |
29 | if(U_FAILURE(errorCode)) { | |
30 | return; | |
31 | } | |
32 | // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into | |
33 | // a public API header for which we would want it to depend only on | |
34 | // other public headers. | |
35 | // Unlike BytesTrie itself, its Iterator performs memory allocations anyway | |
36 | // via the CharString and UVector32 implementations, so this additional | |
37 | // cost is minimal. | |
38 | str_=new CharString(); | |
39 | stack_=new UVector32(errorCode); | |
40 | if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) { | |
41 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
42 | } | |
43 | } | |
44 | ||
45 | BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength, | |
46 | UErrorCode &errorCode) | |
47 | : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_), | |
48 | remainingMatchLength_(trie.remainingMatchLength_), | |
49 | initialRemainingMatchLength_(trie.remainingMatchLength_), | |
50 | str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { | |
51 | if(U_FAILURE(errorCode)) { | |
52 | return; | |
53 | } | |
54 | str_=new CharString(); | |
55 | stack_=new UVector32(errorCode); | |
56 | if(U_FAILURE(errorCode)) { | |
57 | return; | |
58 | } | |
59 | if(str_==NULL || stack_==NULL) { | |
60 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
61 | return; | |
62 | } | |
63 | int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. | |
64 | if(length>=0) { | |
65 | // Pending linear-match node, append remaining bytes to str_. | |
66 | ++length; | |
67 | if(maxLength_>0 && length>maxLength_) { | |
68 | length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. | |
69 | } | |
70 | str_->append(reinterpret_cast<const char *>(pos_), length, errorCode); | |
71 | pos_+=length; | |
72 | remainingMatchLength_-=length; | |
73 | } | |
74 | } | |
75 | ||
76 | BytesTrie::Iterator::~Iterator() { | |
77 | delete str_; | |
78 | delete stack_; | |
79 | } | |
80 | ||
81 | BytesTrie::Iterator & | |
82 | BytesTrie::Iterator::reset() { | |
83 | pos_=initialPos_; | |
84 | remainingMatchLength_=initialRemainingMatchLength_; | |
85 | int32_t length=remainingMatchLength_+1; // Remaining match length. | |
86 | if(maxLength_>0 && length>maxLength_) { | |
87 | length=maxLength_; | |
88 | } | |
89 | str_->truncate(length); | |
90 | pos_+=length; | |
91 | remainingMatchLength_-=length; | |
92 | stack_->setSize(0); | |
93 | return *this; | |
94 | } | |
95 | ||
96 | UBool | |
97 | BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); } | |
98 | ||
99 | UBool | |
100 | BytesTrie::Iterator::next(UErrorCode &errorCode) { | |
101 | if(U_FAILURE(errorCode)) { | |
102 | return FALSE; | |
103 | } | |
104 | const uint8_t *pos=pos_; | |
105 | if(pos==NULL) { | |
106 | if(stack_->isEmpty()) { | |
107 | return FALSE; | |
108 | } | |
109 | // Pop the state off the stack and continue with the next outbound edge of | |
110 | // the branch node. | |
111 | int32_t stackSize=stack_->size(); | |
112 | int32_t length=stack_->elementAti(stackSize-1); | |
113 | pos=bytes_+stack_->elementAti(stackSize-2); | |
114 | stack_->setSize(stackSize-2); | |
115 | str_->truncate(length&0xffff); | |
116 | length=(int32_t)((uint32_t)length>>16); | |
117 | if(length>1) { | |
118 | pos=branchNext(pos, length, errorCode); | |
119 | if(pos==NULL) { | |
120 | return TRUE; // Reached a final value. | |
121 | } | |
122 | } else { | |
123 | str_->append((char)*pos++, errorCode); | |
124 | } | |
125 | } | |
126 | if(remainingMatchLength_>=0) { | |
127 | // We only get here if we started in a pending linear-match node | |
128 | // with more than maxLength remaining bytes. | |
129 | return truncateAndStop(); | |
130 | } | |
131 | for(;;) { | |
132 | int32_t node=*pos++; | |
133 | if(node>=kMinValueLead) { | |
134 | // Deliver value for the byte sequence so far. | |
135 | UBool isFinal=(UBool)(node&kValueIsFinal); | |
136 | value_=readValue(pos, node>>1); | |
137 | if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) { | |
138 | pos_=NULL; | |
139 | } else { | |
140 | pos_=skipValue(pos, node); | |
141 | } | |
142 | sp_.set(str_->data(), str_->length()); | |
143 | return TRUE; | |
144 | } | |
145 | if(maxLength_>0 && str_->length()==maxLength_) { | |
146 | return truncateAndStop(); | |
147 | } | |
148 | if(node<kMinLinearMatch) { | |
149 | if(node==0) { | |
150 | node=*pos++; | |
151 | } | |
152 | pos=branchNext(pos, node+1, errorCode); | |
153 | if(pos==NULL) { | |
154 | return TRUE; // Reached a final value. | |
155 | } | |
156 | } else { | |
157 | // Linear-match node, append length bytes to str_. | |
158 | int32_t length=node-kMinLinearMatch+1; | |
159 | if(maxLength_>0 && str_->length()+length>maxLength_) { | |
160 | str_->append(reinterpret_cast<const char *>(pos), | |
161 | maxLength_-str_->length(), errorCode); | |
162 | return truncateAndStop(); | |
163 | } | |
164 | str_->append(reinterpret_cast<const char *>(pos), length, errorCode); | |
165 | pos+=length; | |
166 | } | |
167 | } | |
168 | } | |
169 | ||
170 | UBool | |
171 | BytesTrie::Iterator::truncateAndStop() { | |
172 | pos_=NULL; | |
173 | sp_.set(str_->data(), str_->length()); | |
174 | value_=-1; // no real value for str | |
175 | return TRUE; | |
176 | } | |
177 | ||
178 | // Branch node, needs to take the first outbound edge and push state for the rest. | |
179 | const uint8_t * | |
180 | BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) { | |
181 | while(length>kMaxBranchLinearSubNodeLength) { | |
182 | ++pos; // ignore the comparison byte | |
183 | // Push state for the greater-or-equal edge. | |
184 | stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode); | |
185 | stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode); | |
186 | // Follow the less-than edge. | |
187 | length>>=1; | |
188 | pos=jumpByDelta(pos); | |
189 | } | |
190 | // List of key-value pairs where values are either final values or jump deltas. | |
191 | // Read the first (key, value) pair. | |
192 | uint8_t trieByte=*pos++; | |
193 | int32_t node=*pos++; | |
194 | UBool isFinal=(UBool)(node&kValueIsFinal); | |
195 | int32_t value=readValue(pos, node>>1); | |
196 | pos=skipValue(pos, node); | |
197 | stack_->addElement((int32_t)(pos-bytes_), errorCode); | |
198 | stack_->addElement(((length-1)<<16)|str_->length(), errorCode); | |
199 | str_->append((char)trieByte, errorCode); | |
200 | if(isFinal) { | |
201 | pos_=NULL; | |
202 | sp_.set(str_->data(), str_->length()); | |
203 | value_=value; | |
204 | return NULL; | |
205 | } else { | |
206 | return pos+value; | |
207 | } | |
208 | } | |
209 | ||
210 | U_NAMESPACE_END |