]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/bytestrie.cpp
ICU-57166.0.1.tar.gz
[apple/icu.git] / icuSources / common / bytestrie.cpp
CommitLineData
4388f060
A
1/*
2*******************************************************************************
3* Copyright (C) 2010-2011, International Business Machines
4* Corporation and others. All Rights Reserved.
5*******************************************************************************
6* file name: bytestrie.cpp
7* encoding: US-ASCII
8* tab size: 8 (not used)
9* indentation:4
10*
11* created on: 2010sep25
12* created by: Markus W. Scherer
13*/
14
15#include "unicode/utypes.h"
16#include "unicode/bytestream.h"
17#include "unicode/bytestrie.h"
18#include "unicode/uobject.h"
19#include "cmemory.h"
20#include "uassert.h"
21
22U_NAMESPACE_BEGIN
23
24BytesTrie::~BytesTrie() {
25 uprv_free(ownedArray_);
26}
27
28// lead byte already shifted right by 1.
29int32_t
30BytesTrie::readValue(const uint8_t *pos, int32_t leadByte) {
31 int32_t value;
32 if(leadByte<kMinTwoByteValueLead) {
33 value=leadByte-kMinOneByteValueLead;
34 } else if(leadByte<kMinThreeByteValueLead) {
35 value=((leadByte-kMinTwoByteValueLead)<<8)|*pos;
36 } else if(leadByte<kFourByteValueLead) {
37 value=((leadByte-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
38 } else if(leadByte==kFourByteValueLead) {
39 value=(pos[0]<<16)|(pos[1]<<8)|pos[2];
40 } else {
41 value=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
42 }
43 return value;
44}
45
46const uint8_t *
47BytesTrie::jumpByDelta(const uint8_t *pos) {
48 int32_t delta=*pos++;
49 if(delta<kMinTwoByteDeltaLead) {
50 // nothing to do
51 } else if(delta<kMinThreeByteDeltaLead) {
52 delta=((delta-kMinTwoByteDeltaLead)<<8)|*pos++;
53 } else if(delta<kFourByteDeltaLead) {
54 delta=((delta-kMinThreeByteDeltaLead)<<16)|(pos[0]<<8)|pos[1];
55 pos+=2;
56 } else if(delta==kFourByteDeltaLead) {
57 delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
58 pos+=3;
59 } else {
60 delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
61 pos+=4;
62 }
63 return pos+delta;
64}
65
66UStringTrieResult
67BytesTrie::current() const {
68 const uint8_t *pos=pos_;
69 if(pos==NULL) {
70 return USTRINGTRIE_NO_MATCH;
71 } else {
72 int32_t node;
73 return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
74 valueResult(node) : USTRINGTRIE_NO_VALUE;
75 }
76}
77
78UStringTrieResult
79BytesTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
80 // Branch according to the current byte.
81 if(length==0) {
82 length=*pos++;
83 }
84 ++length;
85 // The length of the branch is the number of bytes to select from.
86 // The data structure encodes a binary search.
87 while(length>kMaxBranchLinearSubNodeLength) {
88 if(inByte<*pos++) {
89 length>>=1;
90 pos=jumpByDelta(pos);
91 } else {
92 length=length-(length>>1);
93 pos=skipDelta(pos);
94 }
95 }
96 // Drop down to linear search for the last few bytes.
97 // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
98 // and divides length by 2.
99 do {
100 if(inByte==*pos++) {
101 UStringTrieResult result;
102 int32_t node=*pos;
103 U_ASSERT(node>=kMinValueLead);
104 if(node&kValueIsFinal) {
105 // Leave the final value for getValue() to read.
106 result=USTRINGTRIE_FINAL_VALUE;
107 } else {
108 // Use the non-final value as the jump delta.
109 ++pos;
110 // int32_t delta=readValue(pos, node>>1);
111 node>>=1;
112 int32_t delta;
113 if(node<kMinTwoByteValueLead) {
114 delta=node-kMinOneByteValueLead;
115 } else if(node<kMinThreeByteValueLead) {
116 delta=((node-kMinTwoByteValueLead)<<8)|*pos++;
117 } else if(node<kFourByteValueLead) {
118 delta=((node-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
119 pos+=2;
120 } else if(node==kFourByteValueLead) {
121 delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
122 pos+=3;
123 } else {
124 delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
125 pos+=4;
126 }
127 // end readValue()
128 pos+=delta;
129 node=*pos;
130 result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
131 }
132 pos_=pos;
133 return result;
134 }
135 --length;
136 pos=skipValue(pos);
137 } while(length>1);
138 if(inByte==*pos++) {
139 pos_=pos;
140 int32_t node=*pos;
141 return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
142 } else {
143 stop();
144 return USTRINGTRIE_NO_MATCH;
145 }
146}
147
148UStringTrieResult
149BytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
150 for(;;) {
151 int32_t node=*pos++;
152 if(node<kMinLinearMatch) {
153 return branchNext(pos, node, inByte);
154 } else if(node<kMinValueLead) {
155 // Match the first of length+1 bytes.
156 int32_t length=node-kMinLinearMatch; // Actual match length minus 1.
157 if(inByte==*pos++) {
158 remainingMatchLength_=--length;
159 pos_=pos;
160 return (length<0 && (node=*pos)>=kMinValueLead) ?
161 valueResult(node) : USTRINGTRIE_NO_VALUE;
162 } else {
163 // No match.
164 break;
165 }
166 } else if(node&kValueIsFinal) {
167 // No further matching bytes.
168 break;
169 } else {
170 // Skip intermediate value.
171 pos=skipValue(pos, node);
172 // The next node must not also be a value node.
173 U_ASSERT(*pos<kMinValueLead);
174 }
175 }
176 stop();
177 return USTRINGTRIE_NO_MATCH;
178}
179
180UStringTrieResult
181BytesTrie::next(int32_t inByte) {
182 const uint8_t *pos=pos_;
183 if(pos==NULL) {
184 return USTRINGTRIE_NO_MATCH;
185 }
186 if(inByte<0) {
187 inByte+=0x100;
188 }
189 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
190 if(length>=0) {
191 // Remaining part of a linear-match node.
192 if(inByte==*pos++) {
193 remainingMatchLength_=--length;
194 pos_=pos;
195 int32_t node;
196 return (length<0 && (node=*pos)>=kMinValueLead) ?
197 valueResult(node) : USTRINGTRIE_NO_VALUE;
198 } else {
199 stop();
200 return USTRINGTRIE_NO_MATCH;
201 }
202 }
203 return nextImpl(pos, inByte);
204}
205
206UStringTrieResult
207BytesTrie::next(const char *s, int32_t sLength) {
208 if(sLength<0 ? *s==0 : sLength==0) {
209 // Empty input.
210 return current();
211 }
212 const uint8_t *pos=pos_;
213 if(pos==NULL) {
214 return USTRINGTRIE_NO_MATCH;
215 }
216 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
217 for(;;) {
218 // Fetch the next input byte, if there is one.
219 // Continue a linear-match node without rechecking sLength<0.
220 int32_t inByte;
221 if(sLength<0) {
222 for(;;) {
223 if((inByte=*s++)==0) {
224 remainingMatchLength_=length;
225 pos_=pos;
226 int32_t node;
227 return (length<0 && (node=*pos)>=kMinValueLead) ?
228 valueResult(node) : USTRINGTRIE_NO_VALUE;
229 }
230 if(length<0) {
231 remainingMatchLength_=length;
232 break;
233 }
234 if(inByte!=*pos) {
235 stop();
236 return USTRINGTRIE_NO_MATCH;
237 }
238 ++pos;
239 --length;
240 }
241 } else {
242 for(;;) {
243 if(sLength==0) {
244 remainingMatchLength_=length;
245 pos_=pos;
246 int32_t node;
247 return (length<0 && (node=*pos)>=kMinValueLead) ?
248 valueResult(node) : USTRINGTRIE_NO_VALUE;
249 }
250 inByte=*s++;
251 --sLength;
252 if(length<0) {
253 remainingMatchLength_=length;
254 break;
255 }
256 if(inByte!=*pos) {
257 stop();
258 return USTRINGTRIE_NO_MATCH;
259 }
260 ++pos;
261 --length;
262 }
263 }
264 for(;;) {
265 int32_t node=*pos++;
266 if(node<kMinLinearMatch) {
267 UStringTrieResult result=branchNext(pos, node, inByte);
268 if(result==USTRINGTRIE_NO_MATCH) {
269 return USTRINGTRIE_NO_MATCH;
270 }
271 // Fetch the next input byte, if there is one.
272 if(sLength<0) {
273 if((inByte=*s++)==0) {
274 return result;
275 }
276 } else {
277 if(sLength==0) {
278 return result;
279 }
280 inByte=*s++;
281 --sLength;
282 }
283 if(result==USTRINGTRIE_FINAL_VALUE) {
284 // No further matching bytes.
285 stop();
286 return USTRINGTRIE_NO_MATCH;
287 }
288 pos=pos_; // branchNext() advanced pos and wrote it to pos_ .
289 } else if(node<kMinValueLead) {
290 // Match length+1 bytes.
291 length=node-kMinLinearMatch; // Actual match length minus 1.
292 if(inByte!=*pos) {
293 stop();
294 return USTRINGTRIE_NO_MATCH;
295 }
296 ++pos;
297 --length;
298 break;
299 } else if(node&kValueIsFinal) {
300 // No further matching bytes.
301 stop();
302 return USTRINGTRIE_NO_MATCH;
303 } else {
304 // Skip intermediate value.
305 pos=skipValue(pos, node);
306 // The next node must not also be a value node.
307 U_ASSERT(*pos<kMinValueLead);
308 }
309 }
310 }
311}
312
313const uint8_t *
314BytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
315 UBool haveUniqueValue, int32_t &uniqueValue) {
316 while(length>kMaxBranchLinearSubNodeLength) {
317 ++pos; // ignore the comparison byte
318 if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
319 return NULL;
320 }
321 length=length-(length>>1);
322 pos=skipDelta(pos);
323 }
324 do {
325 ++pos; // ignore a comparison byte
326 // handle its value
327 int32_t node=*pos++;
328 UBool isFinal=(UBool)(node&kValueIsFinal);
329 int32_t value=readValue(pos, node>>1);
330 pos=skipValue(pos, node);
331 if(isFinal) {
332 if(haveUniqueValue) {
333 if(value!=uniqueValue) {
334 return NULL;
335 }
336 } else {
337 uniqueValue=value;
338 haveUniqueValue=TRUE;
339 }
340 } else {
341 if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
342 return NULL;
343 }
344 haveUniqueValue=TRUE;
345 }
346 } while(--length>1);
347 return pos+1; // ignore the last comparison byte
348}
349
350UBool
351BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
352 for(;;) {
353 int32_t node=*pos++;
354 if(node<kMinLinearMatch) {
355 if(node==0) {
356 node=*pos++;
357 }
358 pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
359 if(pos==NULL) {
360 return FALSE;
361 }
362 haveUniqueValue=TRUE;
363 } else if(node<kMinValueLead) {
364 // linear-match node
365 pos+=node-kMinLinearMatch+1; // Ignore the match bytes.
366 } else {
367 UBool isFinal=(UBool)(node&kValueIsFinal);
368 int32_t value=readValue(pos, node>>1);
369 if(haveUniqueValue) {
370 if(value!=uniqueValue) {
371 return FALSE;
372 }
373 } else {
374 uniqueValue=value;
375 haveUniqueValue=TRUE;
376 }
377 if(isFinal) {
378 return TRUE;
379 }
380 pos=skipValue(pos, node);
381 }
382 }
383}
384
385int32_t
386BytesTrie::getNextBytes(ByteSink &out) const {
387 const uint8_t *pos=pos_;
388 if(pos==NULL) {
389 return 0;
390 }
391 if(remainingMatchLength_>=0) {
392 append(out, *pos); // Next byte of a pending linear-match node.
393 return 1;
394 }
395 int32_t node=*pos++;
396 if(node>=kMinValueLead) {
397 if(node&kValueIsFinal) {
398 return 0;
399 } else {
400 pos=skipValue(pos, node);
401 node=*pos++;
402 U_ASSERT(node<kMinValueLead);
403 }
404 }
405 if(node<kMinLinearMatch) {
406 if(node==0) {
407 node=*pos++;
408 }
409 getNextBranchBytes(pos, ++node, out);
410 return node;
411 } else {
412 // First byte of the linear-match node.
413 append(out, *pos);
414 return 1;
415 }
416}
417
418void
419BytesTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) {
420 while(length>kMaxBranchLinearSubNodeLength) {
421 ++pos; // ignore the comparison byte
422 getNextBranchBytes(jumpByDelta(pos), length>>1, out);
423 length=length-(length>>1);
424 pos=skipDelta(pos);
425 }
426 do {
427 append(out, *pos++);
428 pos=skipValue(pos);
429 } while(--length>1);
430 append(out, *pos);
431}
432
433void
434BytesTrie::append(ByteSink &out, int c) {
435 char ch=(char)c;
436 out.Append(&ch, 1);
437}
438
439U_NAMESPACE_END