]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
4388f060 A |
3 | /* |
4 | ******************************************************************************* | |
51004dcb | 5 | * Copyright (C) 2010-2012, International Business Machines |
4388f060 A |
6 | * Corporation and others. All Rights Reserved. |
7 | ******************************************************************************* | |
8 | * file name: stringtriebuilder.cpp | |
f3c0d7a5 | 9 | * encoding: UTF-8 |
4388f060 A |
10 | * tab size: 8 (not used) |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2010dec24 | |
14 | * created by: Markus W. Scherer | |
15 | */ | |
16 | ||
51004dcb | 17 | #include "utypeinfo.h" // for 'typeid' to work |
4388f060 A |
18 | #include "unicode/utypes.h" |
19 | #include "unicode/stringtriebuilder.h" | |
20 | #include "uassert.h" | |
21 | #include "uhash.h" | |
22 | ||
23 | U_CDECL_BEGIN | |
24 | ||
25 | static int32_t U_CALLCONV | |
26 | hashStringTrieNode(const UHashTok key) { | |
27 | return icu::StringTrieBuilder::hashNode(key.pointer); | |
28 | } | |
29 | ||
30 | static UBool U_CALLCONV | |
31 | equalStringTrieNodes(const UHashTok key1, const UHashTok key2) { | |
32 | return icu::StringTrieBuilder::equalNodes(key1.pointer, key2.pointer); | |
33 | } | |
34 | ||
35 | U_CDECL_END | |
36 | ||
37 | U_NAMESPACE_BEGIN | |
38 | ||
39 | StringTrieBuilder::StringTrieBuilder() : nodes(NULL) {} | |
40 | ||
41 | StringTrieBuilder::~StringTrieBuilder() { | |
42 | deleteCompactBuilder(); | |
43 | } | |
44 | ||
45 | void | |
46 | StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) { | |
47 | if(U_FAILURE(errorCode)) { | |
48 | return; | |
49 | } | |
50 | nodes=uhash_openSize(hashStringTrieNode, equalStringTrieNodes, NULL, | |
51 | sizeGuess, &errorCode); | |
52 | if(U_SUCCESS(errorCode)) { | |
53 | if(nodes==NULL) { | |
54 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
55 | } else { | |
56 | uhash_setKeyDeleter(nodes, uprv_deleteUObject); | |
57 | } | |
58 | } | |
59 | } | |
60 | ||
61 | void | |
62 | StringTrieBuilder::deleteCompactBuilder() { | |
63 | uhash_close(nodes); | |
64 | nodes=NULL; | |
65 | } | |
66 | ||
67 | void | |
68 | StringTrieBuilder::build(UStringTrieBuildOption buildOption, int32_t elementsLength, | |
69 | UErrorCode &errorCode) { | |
70 | if(buildOption==USTRINGTRIE_BUILD_FAST) { | |
71 | writeNode(0, elementsLength, 0); | |
72 | } else /* USTRINGTRIE_BUILD_SMALL */ { | |
73 | createCompactBuilder(2*elementsLength, errorCode); | |
74 | Node *root=makeNode(0, elementsLength, 0, errorCode); | |
75 | if(U_SUCCESS(errorCode)) { | |
76 | root->markRightEdgesFirst(-1); | |
77 | root->write(*this); | |
78 | } | |
79 | deleteCompactBuilder(); | |
80 | } | |
81 | } | |
82 | ||
83 | // Requires start<limit, | |
84 | // and all strings of the [start..limit[ elements must be sorted and | |
85 | // have a common prefix of length unitIndex. | |
86 | int32_t | |
87 | StringTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) { | |
88 | UBool hasValue=FALSE; | |
89 | int32_t value=0; | |
90 | int32_t type; | |
91 | if(unitIndex==getElementStringLength(start)) { | |
92 | // An intermediate or final value. | |
93 | value=getElementValue(start++); | |
94 | if(start==limit) { | |
95 | return writeValueAndFinal(value, TRUE); // final-value node | |
96 | } | |
97 | hasValue=TRUE; | |
98 | } | |
99 | // Now all [start..limit[ strings are longer than unitIndex. | |
100 | int32_t minUnit=getElementUnit(start, unitIndex); | |
101 | int32_t maxUnit=getElementUnit(limit-1, unitIndex); | |
102 | if(minUnit==maxUnit) { | |
103 | // Linear-match node: All strings have the same character at unitIndex. | |
104 | int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex); | |
105 | writeNode(start, limit, lastUnitIndex); | |
106 | // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength. | |
107 | int32_t length=lastUnitIndex-unitIndex; | |
108 | int32_t maxLinearMatchLength=getMaxLinearMatchLength(); | |
109 | while(length>maxLinearMatchLength) { | |
110 | lastUnitIndex-=maxLinearMatchLength; | |
111 | length-=maxLinearMatchLength; | |
112 | writeElementUnits(start, lastUnitIndex, maxLinearMatchLength); | |
113 | write(getMinLinearMatch()+maxLinearMatchLength-1); | |
114 | } | |
115 | writeElementUnits(start, unitIndex, length); | |
116 | type=getMinLinearMatch()+length-1; | |
117 | } else { | |
118 | // Branch node. | |
119 | int32_t length=countElementUnits(start, limit, unitIndex); | |
120 | // length>=2 because minUnit!=maxUnit. | |
121 | writeBranchSubNode(start, limit, unitIndex, length); | |
122 | if(--length<getMinLinearMatch()) { | |
123 | type=length; | |
124 | } else { | |
125 | write(length); | |
126 | type=0; | |
127 | } | |
128 | } | |
129 | return writeValueAndType(hasValue, value, type); | |
130 | } | |
131 | ||
132 | // start<limit && all strings longer than unitIndex && | |
133 | // length different units at unitIndex | |
134 | int32_t | |
135 | StringTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length) { | |
136 | UChar middleUnits[kMaxSplitBranchLevels]; | |
137 | int32_t lessThan[kMaxSplitBranchLevels]; | |
138 | int32_t ltLength=0; | |
139 | while(length>getMaxBranchLinearSubNodeLength()) { | |
140 | // Branch on the middle unit. | |
141 | // First, find the middle unit. | |
142 | int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2); | |
143 | // Encode the less-than branch first. | |
144 | middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit | |
145 | lessThan[ltLength]=writeBranchSubNode(start, i, unitIndex, length/2); | |
146 | ++ltLength; | |
147 | // Continue for the greater-or-equal branch. | |
148 | start=i; | |
149 | length=length-length/2; | |
150 | } | |
151 | // For each unit, find its elements array start and whether it has a final value. | |
152 | int32_t starts[kMaxBranchLinearSubNodeLength]; | |
153 | UBool isFinal[kMaxBranchLinearSubNodeLength-1]; | |
154 | int32_t unitNumber=0; | |
155 | do { | |
156 | int32_t i=starts[unitNumber]=start; | |
157 | UChar unit=getElementUnit(i++, unitIndex); | |
158 | i=indexOfElementWithNextUnit(i, unitIndex, unit); | |
159 | isFinal[unitNumber]= start==i-1 && unitIndex+1==getElementStringLength(start); | |
160 | start=i; | |
161 | } while(++unitNumber<length-1); | |
162 | // unitNumber==length-1, and the maxUnit elements range is [start..limit[ | |
163 | starts[unitNumber]=start; | |
164 | ||
165 | // Write the sub-nodes in reverse order: The jump lengths are deltas from | |
166 | // after their own positions, so if we wrote the minUnit sub-node first, | |
167 | // then its jump delta would be larger. | |
168 | // Instead we write the minUnit sub-node last, for a shorter delta. | |
169 | int32_t jumpTargets[kMaxBranchLinearSubNodeLength-1]; | |
170 | do { | |
171 | --unitNumber; | |
172 | if(!isFinal[unitNumber]) { | |
173 | jumpTargets[unitNumber]=writeNode(starts[unitNumber], starts[unitNumber+1], unitIndex+1); | |
174 | } | |
175 | } while(unitNumber>0); | |
176 | // The maxUnit sub-node is written as the very last one because we do | |
177 | // not jump for it at all. | |
178 | unitNumber=length-1; | |
179 | writeNode(start, limit, unitIndex+1); | |
180 | int32_t offset=write(getElementUnit(start, unitIndex)); | |
181 | // Write the rest of this node's unit-value pairs. | |
182 | while(--unitNumber>=0) { | |
183 | start=starts[unitNumber]; | |
184 | int32_t value; | |
185 | if(isFinal[unitNumber]) { | |
186 | // Write the final value for the one string ending with this unit. | |
187 | value=getElementValue(start); | |
188 | } else { | |
189 | // Write the delta to the start position of the sub-node. | |
190 | value=offset-jumpTargets[unitNumber]; | |
191 | } | |
192 | writeValueAndFinal(value, isFinal[unitNumber]); | |
193 | offset=write(getElementUnit(start, unitIndex)); | |
194 | } | |
195 | // Write the split-branch nodes. | |
196 | while(ltLength>0) { | |
197 | --ltLength; | |
198 | writeDeltaTo(lessThan[ltLength]); | |
199 | offset=write(middleUnits[ltLength]); | |
200 | } | |
201 | return offset; | |
202 | } | |
203 | ||
204 | // Requires start<limit, | |
205 | // and all strings of the [start..limit[ elements must be sorted and | |
206 | // have a common prefix of length unitIndex. | |
207 | StringTrieBuilder::Node * | |
208 | StringTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode) { | |
209 | if(U_FAILURE(errorCode)) { | |
210 | return NULL; | |
211 | } | |
212 | UBool hasValue=FALSE; | |
213 | int32_t value=0; | |
214 | if(unitIndex==getElementStringLength(start)) { | |
215 | // An intermediate or final value. | |
216 | value=getElementValue(start++); | |
217 | if(start==limit) { | |
218 | return registerFinalValue(value, errorCode); | |
219 | } | |
220 | hasValue=TRUE; | |
221 | } | |
222 | Node *node; | |
223 | // Now all [start..limit[ strings are longer than unitIndex. | |
224 | int32_t minUnit=getElementUnit(start, unitIndex); | |
225 | int32_t maxUnit=getElementUnit(limit-1, unitIndex); | |
226 | if(minUnit==maxUnit) { | |
227 | // Linear-match node: All strings have the same character at unitIndex. | |
228 | int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex); | |
229 | Node *nextNode=makeNode(start, limit, lastUnitIndex, errorCode); | |
230 | // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength. | |
231 | int32_t length=lastUnitIndex-unitIndex; | |
232 | int32_t maxLinearMatchLength=getMaxLinearMatchLength(); | |
233 | while(length>maxLinearMatchLength) { | |
234 | lastUnitIndex-=maxLinearMatchLength; | |
235 | length-=maxLinearMatchLength; | |
236 | node=createLinearMatchNode(start, lastUnitIndex, maxLinearMatchLength, nextNode); | |
237 | nextNode=registerNode(node, errorCode); | |
238 | } | |
239 | node=createLinearMatchNode(start, unitIndex, length, nextNode); | |
240 | } else { | |
241 | // Branch node. | |
242 | int32_t length=countElementUnits(start, limit, unitIndex); | |
243 | // length>=2 because minUnit!=maxUnit. | |
244 | Node *subNode=makeBranchSubNode(start, limit, unitIndex, length, errorCode); | |
245 | node=new BranchHeadNode(length, subNode); | |
246 | } | |
247 | if(hasValue && node!=NULL) { | |
248 | if(matchNodesCanHaveValues()) { | |
249 | ((ValueNode *)node)->setValue(value); | |
250 | } else { | |
251 | node=new IntermediateValueNode(value, registerNode(node, errorCode)); | |
252 | } | |
253 | } | |
254 | return registerNode(node, errorCode); | |
255 | } | |
256 | ||
257 | // start<limit && all strings longer than unitIndex && | |
258 | // length different units at unitIndex | |
259 | StringTrieBuilder::Node * | |
260 | StringTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, | |
261 | int32_t length, UErrorCode &errorCode) { | |
262 | if(U_FAILURE(errorCode)) { | |
263 | return NULL; | |
264 | } | |
265 | UChar middleUnits[kMaxSplitBranchLevels]; | |
266 | Node *lessThan[kMaxSplitBranchLevels]; | |
267 | int32_t ltLength=0; | |
268 | while(length>getMaxBranchLinearSubNodeLength()) { | |
269 | // Branch on the middle unit. | |
270 | // First, find the middle unit. | |
271 | int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2); | |
272 | // Create the less-than branch. | |
273 | middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit | |
274 | lessThan[ltLength]=makeBranchSubNode(start, i, unitIndex, length/2, errorCode); | |
275 | ++ltLength; | |
276 | // Continue for the greater-or-equal branch. | |
277 | start=i; | |
278 | length=length-length/2; | |
279 | } | |
280 | if(U_FAILURE(errorCode)) { | |
281 | return NULL; | |
282 | } | |
283 | ListBranchNode *listNode=new ListBranchNode(); | |
284 | if(listNode==NULL) { | |
285 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
286 | return NULL; | |
287 | } | |
288 | // For each unit, find its elements array start and whether it has a final value. | |
289 | int32_t unitNumber=0; | |
290 | do { | |
291 | int32_t i=start; | |
292 | UChar unit=getElementUnit(i++, unitIndex); | |
293 | i=indexOfElementWithNextUnit(i, unitIndex, unit); | |
294 | if(start==i-1 && unitIndex+1==getElementStringLength(start)) { | |
295 | listNode->add(unit, getElementValue(start)); | |
296 | } else { | |
297 | listNode->add(unit, makeNode(start, i, unitIndex+1, errorCode)); | |
298 | } | |
299 | start=i; | |
300 | } while(++unitNumber<length-1); | |
301 | // unitNumber==length-1, and the maxUnit elements range is [start..limit[ | |
302 | UChar unit=getElementUnit(start, unitIndex); | |
303 | if(start==limit-1 && unitIndex+1==getElementStringLength(start)) { | |
304 | listNode->add(unit, getElementValue(start)); | |
305 | } else { | |
306 | listNode->add(unit, makeNode(start, limit, unitIndex+1, errorCode)); | |
307 | } | |
308 | Node *node=registerNode(listNode, errorCode); | |
309 | // Create the split-branch nodes. | |
310 | while(ltLength>0) { | |
311 | --ltLength; | |
312 | node=registerNode( | |
313 | new SplitBranchNode(middleUnits[ltLength], lessThan[ltLength], node), errorCode); | |
314 | } | |
315 | return node; | |
316 | } | |
317 | ||
318 | StringTrieBuilder::Node * | |
319 | StringTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) { | |
320 | if(U_FAILURE(errorCode)) { | |
321 | delete newNode; | |
322 | return NULL; | |
323 | } | |
324 | if(newNode==NULL) { | |
325 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
326 | return NULL; | |
327 | } | |
328 | const UHashElement *old=uhash_find(nodes, newNode); | |
329 | if(old!=NULL) { | |
330 | delete newNode; | |
331 | return (Node *)old->key.pointer; | |
332 | } | |
333 | // If uhash_puti() returns a non-zero value from an equivalent, previously | |
334 | // registered node, then uhash_find() failed to find that and we will leak newNode. | |
335 | #if U_DEBUG | |
336 | int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue. | |
337 | #endif | |
338 | uhash_puti(nodes, newNode, 1, &errorCode); | |
339 | U_ASSERT(oldValue==0); | |
340 | if(U_FAILURE(errorCode)) { | |
341 | delete newNode; | |
342 | return NULL; | |
343 | } | |
344 | return newNode; | |
345 | } | |
346 | ||
347 | StringTrieBuilder::Node * | |
348 | StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) { | |
349 | if(U_FAILURE(errorCode)) { | |
350 | return NULL; | |
351 | } | |
352 | FinalValueNode key(value); | |
353 | const UHashElement *old=uhash_find(nodes, &key); | |
354 | if(old!=NULL) { | |
355 | return (Node *)old->key.pointer; | |
356 | } | |
357 | Node *newNode=new FinalValueNode(value); | |
358 | if(newNode==NULL) { | |
359 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
360 | return NULL; | |
361 | } | |
362 | // If uhash_puti() returns a non-zero value from an equivalent, previously | |
363 | // registered node, then uhash_find() failed to find that and we will leak newNode. | |
364 | #if U_DEBUG | |
365 | int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue. | |
366 | #endif | |
367 | uhash_puti(nodes, newNode, 1, &errorCode); | |
368 | U_ASSERT(oldValue==0); | |
369 | if(U_FAILURE(errorCode)) { | |
370 | delete newNode; | |
371 | return NULL; | |
372 | } | |
373 | return newNode; | |
374 | } | |
375 | ||
376 | UBool | |
377 | StringTrieBuilder::hashNode(const void *node) { | |
378 | return ((const Node *)node)->hashCode(); | |
379 | } | |
380 | ||
381 | UBool | |
382 | StringTrieBuilder::equalNodes(const void *left, const void *right) { | |
383 | return *(const Node *)left==*(const Node *)right; | |
384 | } | |
385 | ||
4388f060 A |
386 | UBool |
387 | StringTrieBuilder::Node::operator==(const Node &other) const { | |
388 | return this==&other || (typeid(*this)==typeid(other) && hash==other.hash); | |
389 | } | |
390 | ||
391 | int32_t | |
392 | StringTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) { | |
393 | if(offset==0) { | |
394 | offset=edgeNumber; | |
395 | } | |
396 | return edgeNumber; | |
397 | } | |
398 | ||
4388f060 A |
399 | UBool |
400 | StringTrieBuilder::FinalValueNode::operator==(const Node &other) const { | |
401 | if(this==&other) { | |
402 | return TRUE; | |
403 | } | |
404 | if(!Node::operator==(other)) { | |
405 | return FALSE; | |
406 | } | |
407 | const FinalValueNode &o=(const FinalValueNode &)other; | |
408 | return value==o.value; | |
409 | } | |
410 | ||
411 | void | |
412 | StringTrieBuilder::FinalValueNode::write(StringTrieBuilder &builder) { | |
413 | offset=builder.writeValueAndFinal(value, TRUE); | |
414 | } | |
415 | ||
416 | UBool | |
417 | StringTrieBuilder::ValueNode::operator==(const Node &other) const { | |
418 | if(this==&other) { | |
419 | return TRUE; | |
420 | } | |
421 | if(!Node::operator==(other)) { | |
422 | return FALSE; | |
423 | } | |
424 | const ValueNode &o=(const ValueNode &)other; | |
425 | return hasValue==o.hasValue && (!hasValue || value==o.value); | |
426 | } | |
427 | ||
428 | UBool | |
429 | StringTrieBuilder::IntermediateValueNode::operator==(const Node &other) const { | |
430 | if(this==&other) { | |
431 | return TRUE; | |
432 | } | |
433 | if(!ValueNode::operator==(other)) { | |
434 | return FALSE; | |
435 | } | |
436 | const IntermediateValueNode &o=(const IntermediateValueNode &)other; | |
437 | return next==o.next; | |
438 | } | |
439 | ||
440 | int32_t | |
441 | StringTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) { | |
442 | if(offset==0) { | |
443 | offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); | |
444 | } | |
445 | return edgeNumber; | |
446 | } | |
447 | ||
448 | void | |
449 | StringTrieBuilder::IntermediateValueNode::write(StringTrieBuilder &builder) { | |
450 | next->write(builder); | |
451 | offset=builder.writeValueAndFinal(value, FALSE); | |
452 | } | |
453 | ||
454 | UBool | |
455 | StringTrieBuilder::LinearMatchNode::operator==(const Node &other) const { | |
456 | if(this==&other) { | |
457 | return TRUE; | |
458 | } | |
459 | if(!ValueNode::operator==(other)) { | |
460 | return FALSE; | |
461 | } | |
462 | const LinearMatchNode &o=(const LinearMatchNode &)other; | |
463 | return length==o.length && next==o.next; | |
464 | } | |
465 | ||
466 | int32_t | |
467 | StringTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) { | |
468 | if(offset==0) { | |
469 | offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); | |
470 | } | |
471 | return edgeNumber; | |
472 | } | |
473 | ||
474 | UBool | |
475 | StringTrieBuilder::ListBranchNode::operator==(const Node &other) const { | |
476 | if(this==&other) { | |
477 | return TRUE; | |
478 | } | |
479 | if(!Node::operator==(other)) { | |
480 | return FALSE; | |
481 | } | |
482 | const ListBranchNode &o=(const ListBranchNode &)other; | |
483 | for(int32_t i=0; i<length; ++i) { | |
484 | if(units[i]!=o.units[i] || values[i]!=o.values[i] || equal[i]!=o.equal[i]) { | |
485 | return FALSE; | |
486 | } | |
487 | } | |
488 | return TRUE; | |
489 | } | |
490 | ||
491 | int32_t | |
492 | StringTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) { | |
493 | if(offset==0) { | |
494 | firstEdgeNumber=edgeNumber; | |
495 | int32_t step=0; | |
496 | int32_t i=length; | |
497 | do { | |
498 | Node *edge=equal[--i]; | |
499 | if(edge!=NULL) { | |
500 | edgeNumber=edge->markRightEdgesFirst(edgeNumber-step); | |
501 | } | |
502 | // For all but the rightmost edge, decrement the edge number. | |
503 | step=1; | |
504 | } while(i>0); | |
505 | offset=edgeNumber; | |
506 | } | |
507 | return edgeNumber; | |
508 | } | |
509 | ||
510 | void | |
511 | StringTrieBuilder::ListBranchNode::write(StringTrieBuilder &builder) { | |
512 | // Write the sub-nodes in reverse order: The jump lengths are deltas from | |
513 | // after their own positions, so if we wrote the minUnit sub-node first, | |
514 | // then its jump delta would be larger. | |
515 | // Instead we write the minUnit sub-node last, for a shorter delta. | |
516 | int32_t unitNumber=length-1; | |
517 | Node *rightEdge=equal[unitNumber]; | |
518 | int32_t rightEdgeNumber= rightEdge==NULL ? firstEdgeNumber : rightEdge->getOffset(); | |
519 | do { | |
520 | --unitNumber; | |
521 | if(equal[unitNumber]!=NULL) { | |
522 | equal[unitNumber]->writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder); | |
523 | } | |
524 | } while(unitNumber>0); | |
525 | // The maxUnit sub-node is written as the very last one because we do | |
526 | // not jump for it at all. | |
527 | unitNumber=length-1; | |
528 | if(rightEdge==NULL) { | |
529 | builder.writeValueAndFinal(values[unitNumber], TRUE); | |
530 | } else { | |
531 | rightEdge->write(builder); | |
532 | } | |
533 | offset=builder.write(units[unitNumber]); | |
534 | // Write the rest of this node's unit-value pairs. | |
535 | while(--unitNumber>=0) { | |
536 | int32_t value; | |
537 | UBool isFinal; | |
538 | if(equal[unitNumber]==NULL) { | |
539 | // Write the final value for the one string ending with this unit. | |
540 | value=values[unitNumber]; | |
541 | isFinal=TRUE; | |
542 | } else { | |
543 | // Write the delta to the start position of the sub-node. | |
544 | U_ASSERT(equal[unitNumber]->getOffset()>0); | |
545 | value=offset-equal[unitNumber]->getOffset(); | |
546 | isFinal=FALSE; | |
547 | } | |
548 | builder.writeValueAndFinal(value, isFinal); | |
549 | offset=builder.write(units[unitNumber]); | |
550 | } | |
551 | } | |
552 | ||
553 | UBool | |
554 | StringTrieBuilder::SplitBranchNode::operator==(const Node &other) const { | |
555 | if(this==&other) { | |
556 | return TRUE; | |
557 | } | |
558 | if(!Node::operator==(other)) { | |
559 | return FALSE; | |
560 | } | |
561 | const SplitBranchNode &o=(const SplitBranchNode &)other; | |
562 | return unit==o.unit && lessThan==o.lessThan && greaterOrEqual==o.greaterOrEqual; | |
563 | } | |
564 | ||
565 | int32_t | |
566 | StringTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) { | |
567 | if(offset==0) { | |
568 | firstEdgeNumber=edgeNumber; | |
569 | edgeNumber=greaterOrEqual->markRightEdgesFirst(edgeNumber); | |
570 | offset=edgeNumber=lessThan->markRightEdgesFirst(edgeNumber-1); | |
571 | } | |
572 | return edgeNumber; | |
573 | } | |
574 | ||
575 | void | |
576 | StringTrieBuilder::SplitBranchNode::write(StringTrieBuilder &builder) { | |
577 | // Encode the less-than branch first. | |
578 | lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder); | |
579 | // Encode the greater-or-equal branch last because we do not jump for it at all. | |
580 | greaterOrEqual->write(builder); | |
581 | // Write this node. | |
582 | U_ASSERT(lessThan->getOffset()>0); | |
583 | builder.writeDeltaTo(lessThan->getOffset()); // less-than | |
584 | offset=builder.write(unit); | |
585 | } | |
586 | ||
587 | UBool | |
588 | StringTrieBuilder::BranchHeadNode::operator==(const Node &other) const { | |
589 | if(this==&other) { | |
590 | return TRUE; | |
591 | } | |
592 | if(!ValueNode::operator==(other)) { | |
593 | return FALSE; | |
594 | } | |
595 | const BranchHeadNode &o=(const BranchHeadNode &)other; | |
596 | return length==o.length && next==o.next; | |
597 | } | |
598 | ||
599 | int32_t | |
600 | StringTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) { | |
601 | if(offset==0) { | |
602 | offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); | |
603 | } | |
604 | return edgeNumber; | |
605 | } | |
606 | ||
607 | void | |
608 | StringTrieBuilder::BranchHeadNode::write(StringTrieBuilder &builder) { | |
609 | next->write(builder); | |
610 | if(length<=builder.getMinLinearMatch()) { | |
611 | offset=builder.writeValueAndType(hasValue, value, length-1); | |
612 | } else { | |
613 | builder.write(length-1); | |
614 | offset=builder.writeValueAndType(hasValue, value, 0); | |
615 | } | |
616 | } | |
617 | ||
618 | U_NAMESPACE_END |