]>
Commit | Line | Data |
---|---|---|
4388f060 A |
1 | /* |
2 | ******************************************************************************* | |
51004dcb | 3 | * Copyright (C) 2010-2012, International Business Machines |
4388f060 A |
4 | * Corporation and others. All Rights Reserved. |
5 | ******************************************************************************* | |
6 | * file name: stringtriebuilder.cpp | |
7 | * encoding: US-ASCII | |
8 | * tab size: 8 (not used) | |
9 | * indentation:4 | |
10 | * | |
11 | * created on: 2010dec24 | |
12 | * created by: Markus W. Scherer | |
13 | */ | |
14 | ||
51004dcb | 15 | #include "utypeinfo.h" // for 'typeid' to work |
4388f060 A |
16 | #include "unicode/utypes.h" |
17 | #include "unicode/stringtriebuilder.h" | |
18 | #include "uassert.h" | |
19 | #include "uhash.h" | |
20 | ||
21 | U_CDECL_BEGIN | |
22 | ||
23 | static int32_t U_CALLCONV | |
24 | hashStringTrieNode(const UHashTok key) { | |
25 | return icu::StringTrieBuilder::hashNode(key.pointer); | |
26 | } | |
27 | ||
28 | static UBool U_CALLCONV | |
29 | equalStringTrieNodes(const UHashTok key1, const UHashTok key2) { | |
30 | return icu::StringTrieBuilder::equalNodes(key1.pointer, key2.pointer); | |
31 | } | |
32 | ||
33 | U_CDECL_END | |
34 | ||
35 | U_NAMESPACE_BEGIN | |
36 | ||
37 | StringTrieBuilder::StringTrieBuilder() : nodes(NULL) {} | |
38 | ||
39 | StringTrieBuilder::~StringTrieBuilder() { | |
40 | deleteCompactBuilder(); | |
41 | } | |
42 | ||
43 | void | |
44 | StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) { | |
45 | if(U_FAILURE(errorCode)) { | |
46 | return; | |
47 | } | |
48 | nodes=uhash_openSize(hashStringTrieNode, equalStringTrieNodes, NULL, | |
49 | sizeGuess, &errorCode); | |
50 | if(U_SUCCESS(errorCode)) { | |
51 | if(nodes==NULL) { | |
52 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
53 | } else { | |
54 | uhash_setKeyDeleter(nodes, uprv_deleteUObject); | |
55 | } | |
56 | } | |
57 | } | |
58 | ||
59 | void | |
60 | StringTrieBuilder::deleteCompactBuilder() { | |
61 | uhash_close(nodes); | |
62 | nodes=NULL; | |
63 | } | |
64 | ||
65 | void | |
66 | StringTrieBuilder::build(UStringTrieBuildOption buildOption, int32_t elementsLength, | |
67 | UErrorCode &errorCode) { | |
68 | if(buildOption==USTRINGTRIE_BUILD_FAST) { | |
69 | writeNode(0, elementsLength, 0); | |
70 | } else /* USTRINGTRIE_BUILD_SMALL */ { | |
71 | createCompactBuilder(2*elementsLength, errorCode); | |
72 | Node *root=makeNode(0, elementsLength, 0, errorCode); | |
73 | if(U_SUCCESS(errorCode)) { | |
74 | root->markRightEdgesFirst(-1); | |
75 | root->write(*this); | |
76 | } | |
77 | deleteCompactBuilder(); | |
78 | } | |
79 | } | |
80 | ||
81 | // Requires start<limit, | |
82 | // and all strings of the [start..limit[ elements must be sorted and | |
83 | // have a common prefix of length unitIndex. | |
84 | int32_t | |
85 | StringTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) { | |
86 | UBool hasValue=FALSE; | |
87 | int32_t value=0; | |
88 | int32_t type; | |
89 | if(unitIndex==getElementStringLength(start)) { | |
90 | // An intermediate or final value. | |
91 | value=getElementValue(start++); | |
92 | if(start==limit) { | |
93 | return writeValueAndFinal(value, TRUE); // final-value node | |
94 | } | |
95 | hasValue=TRUE; | |
96 | } | |
97 | // Now all [start..limit[ strings are longer than unitIndex. | |
98 | int32_t minUnit=getElementUnit(start, unitIndex); | |
99 | int32_t maxUnit=getElementUnit(limit-1, unitIndex); | |
100 | if(minUnit==maxUnit) { | |
101 | // Linear-match node: All strings have the same character at unitIndex. | |
102 | int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex); | |
103 | writeNode(start, limit, lastUnitIndex); | |
104 | // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength. | |
105 | int32_t length=lastUnitIndex-unitIndex; | |
106 | int32_t maxLinearMatchLength=getMaxLinearMatchLength(); | |
107 | while(length>maxLinearMatchLength) { | |
108 | lastUnitIndex-=maxLinearMatchLength; | |
109 | length-=maxLinearMatchLength; | |
110 | writeElementUnits(start, lastUnitIndex, maxLinearMatchLength); | |
111 | write(getMinLinearMatch()+maxLinearMatchLength-1); | |
112 | } | |
113 | writeElementUnits(start, unitIndex, length); | |
114 | type=getMinLinearMatch()+length-1; | |
115 | } else { | |
116 | // Branch node. | |
117 | int32_t length=countElementUnits(start, limit, unitIndex); | |
118 | // length>=2 because minUnit!=maxUnit. | |
119 | writeBranchSubNode(start, limit, unitIndex, length); | |
120 | if(--length<getMinLinearMatch()) { | |
121 | type=length; | |
122 | } else { | |
123 | write(length); | |
124 | type=0; | |
125 | } | |
126 | } | |
127 | return writeValueAndType(hasValue, value, type); | |
128 | } | |
129 | ||
130 | // start<limit && all strings longer than unitIndex && | |
131 | // length different units at unitIndex | |
132 | int32_t | |
133 | StringTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length) { | |
134 | UChar middleUnits[kMaxSplitBranchLevels]; | |
135 | int32_t lessThan[kMaxSplitBranchLevels]; | |
136 | int32_t ltLength=0; | |
137 | while(length>getMaxBranchLinearSubNodeLength()) { | |
138 | // Branch on the middle unit. | |
139 | // First, find the middle unit. | |
140 | int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2); | |
141 | // Encode the less-than branch first. | |
142 | middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit | |
143 | lessThan[ltLength]=writeBranchSubNode(start, i, unitIndex, length/2); | |
144 | ++ltLength; | |
145 | // Continue for the greater-or-equal branch. | |
146 | start=i; | |
147 | length=length-length/2; | |
148 | } | |
149 | // For each unit, find its elements array start and whether it has a final value. | |
150 | int32_t starts[kMaxBranchLinearSubNodeLength]; | |
151 | UBool isFinal[kMaxBranchLinearSubNodeLength-1]; | |
152 | int32_t unitNumber=0; | |
153 | do { | |
154 | int32_t i=starts[unitNumber]=start; | |
155 | UChar unit=getElementUnit(i++, unitIndex); | |
156 | i=indexOfElementWithNextUnit(i, unitIndex, unit); | |
157 | isFinal[unitNumber]= start==i-1 && unitIndex+1==getElementStringLength(start); | |
158 | start=i; | |
159 | } while(++unitNumber<length-1); | |
160 | // unitNumber==length-1, and the maxUnit elements range is [start..limit[ | |
161 | starts[unitNumber]=start; | |
162 | ||
163 | // Write the sub-nodes in reverse order: The jump lengths are deltas from | |
164 | // after their own positions, so if we wrote the minUnit sub-node first, | |
165 | // then its jump delta would be larger. | |
166 | // Instead we write the minUnit sub-node last, for a shorter delta. | |
167 | int32_t jumpTargets[kMaxBranchLinearSubNodeLength-1]; | |
168 | do { | |
169 | --unitNumber; | |
170 | if(!isFinal[unitNumber]) { | |
171 | jumpTargets[unitNumber]=writeNode(starts[unitNumber], starts[unitNumber+1], unitIndex+1); | |
172 | } | |
173 | } while(unitNumber>0); | |
174 | // The maxUnit sub-node is written as the very last one because we do | |
175 | // not jump for it at all. | |
176 | unitNumber=length-1; | |
177 | writeNode(start, limit, unitIndex+1); | |
178 | int32_t offset=write(getElementUnit(start, unitIndex)); | |
179 | // Write the rest of this node's unit-value pairs. | |
180 | while(--unitNumber>=0) { | |
181 | start=starts[unitNumber]; | |
182 | int32_t value; | |
183 | if(isFinal[unitNumber]) { | |
184 | // Write the final value for the one string ending with this unit. | |
185 | value=getElementValue(start); | |
186 | } else { | |
187 | // Write the delta to the start position of the sub-node. | |
188 | value=offset-jumpTargets[unitNumber]; | |
189 | } | |
190 | writeValueAndFinal(value, isFinal[unitNumber]); | |
191 | offset=write(getElementUnit(start, unitIndex)); | |
192 | } | |
193 | // Write the split-branch nodes. | |
194 | while(ltLength>0) { | |
195 | --ltLength; | |
196 | writeDeltaTo(lessThan[ltLength]); | |
197 | offset=write(middleUnits[ltLength]); | |
198 | } | |
199 | return offset; | |
200 | } | |
201 | ||
202 | // Requires start<limit, | |
203 | // and all strings of the [start..limit[ elements must be sorted and | |
204 | // have a common prefix of length unitIndex. | |
205 | StringTrieBuilder::Node * | |
206 | StringTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode) { | |
207 | if(U_FAILURE(errorCode)) { | |
208 | return NULL; | |
209 | } | |
210 | UBool hasValue=FALSE; | |
211 | int32_t value=0; | |
212 | if(unitIndex==getElementStringLength(start)) { | |
213 | // An intermediate or final value. | |
214 | value=getElementValue(start++); | |
215 | if(start==limit) { | |
216 | return registerFinalValue(value, errorCode); | |
217 | } | |
218 | hasValue=TRUE; | |
219 | } | |
220 | Node *node; | |
221 | // Now all [start..limit[ strings are longer than unitIndex. | |
222 | int32_t minUnit=getElementUnit(start, unitIndex); | |
223 | int32_t maxUnit=getElementUnit(limit-1, unitIndex); | |
224 | if(minUnit==maxUnit) { | |
225 | // Linear-match node: All strings have the same character at unitIndex. | |
226 | int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex); | |
227 | Node *nextNode=makeNode(start, limit, lastUnitIndex, errorCode); | |
228 | // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength. | |
229 | int32_t length=lastUnitIndex-unitIndex; | |
230 | int32_t maxLinearMatchLength=getMaxLinearMatchLength(); | |
231 | while(length>maxLinearMatchLength) { | |
232 | lastUnitIndex-=maxLinearMatchLength; | |
233 | length-=maxLinearMatchLength; | |
234 | node=createLinearMatchNode(start, lastUnitIndex, maxLinearMatchLength, nextNode); | |
235 | nextNode=registerNode(node, errorCode); | |
236 | } | |
237 | node=createLinearMatchNode(start, unitIndex, length, nextNode); | |
238 | } else { | |
239 | // Branch node. | |
240 | int32_t length=countElementUnits(start, limit, unitIndex); | |
241 | // length>=2 because minUnit!=maxUnit. | |
242 | Node *subNode=makeBranchSubNode(start, limit, unitIndex, length, errorCode); | |
243 | node=new BranchHeadNode(length, subNode); | |
244 | } | |
245 | if(hasValue && node!=NULL) { | |
246 | if(matchNodesCanHaveValues()) { | |
247 | ((ValueNode *)node)->setValue(value); | |
248 | } else { | |
249 | node=new IntermediateValueNode(value, registerNode(node, errorCode)); | |
250 | } | |
251 | } | |
252 | return registerNode(node, errorCode); | |
253 | } | |
254 | ||
255 | // start<limit && all strings longer than unitIndex && | |
256 | // length different units at unitIndex | |
257 | StringTrieBuilder::Node * | |
258 | StringTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, | |
259 | int32_t length, UErrorCode &errorCode) { | |
260 | if(U_FAILURE(errorCode)) { | |
261 | return NULL; | |
262 | } | |
263 | UChar middleUnits[kMaxSplitBranchLevels]; | |
264 | Node *lessThan[kMaxSplitBranchLevels]; | |
265 | int32_t ltLength=0; | |
266 | while(length>getMaxBranchLinearSubNodeLength()) { | |
267 | // Branch on the middle unit. | |
268 | // First, find the middle unit. | |
269 | int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2); | |
270 | // Create the less-than branch. | |
271 | middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit | |
272 | lessThan[ltLength]=makeBranchSubNode(start, i, unitIndex, length/2, errorCode); | |
273 | ++ltLength; | |
274 | // Continue for the greater-or-equal branch. | |
275 | start=i; | |
276 | length=length-length/2; | |
277 | } | |
278 | if(U_FAILURE(errorCode)) { | |
279 | return NULL; | |
280 | } | |
281 | ListBranchNode *listNode=new ListBranchNode(); | |
282 | if(listNode==NULL) { | |
283 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
284 | return NULL; | |
285 | } | |
286 | // For each unit, find its elements array start and whether it has a final value. | |
287 | int32_t unitNumber=0; | |
288 | do { | |
289 | int32_t i=start; | |
290 | UChar unit=getElementUnit(i++, unitIndex); | |
291 | i=indexOfElementWithNextUnit(i, unitIndex, unit); | |
292 | if(start==i-1 && unitIndex+1==getElementStringLength(start)) { | |
293 | listNode->add(unit, getElementValue(start)); | |
294 | } else { | |
295 | listNode->add(unit, makeNode(start, i, unitIndex+1, errorCode)); | |
296 | } | |
297 | start=i; | |
298 | } while(++unitNumber<length-1); | |
299 | // unitNumber==length-1, and the maxUnit elements range is [start..limit[ | |
300 | UChar unit=getElementUnit(start, unitIndex); | |
301 | if(start==limit-1 && unitIndex+1==getElementStringLength(start)) { | |
302 | listNode->add(unit, getElementValue(start)); | |
303 | } else { | |
304 | listNode->add(unit, makeNode(start, limit, unitIndex+1, errorCode)); | |
305 | } | |
306 | Node *node=registerNode(listNode, errorCode); | |
307 | // Create the split-branch nodes. | |
308 | while(ltLength>0) { | |
309 | --ltLength; | |
310 | node=registerNode( | |
311 | new SplitBranchNode(middleUnits[ltLength], lessThan[ltLength], node), errorCode); | |
312 | } | |
313 | return node; | |
314 | } | |
315 | ||
316 | StringTrieBuilder::Node * | |
317 | StringTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) { | |
318 | if(U_FAILURE(errorCode)) { | |
319 | delete newNode; | |
320 | return NULL; | |
321 | } | |
322 | if(newNode==NULL) { | |
323 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
324 | return NULL; | |
325 | } | |
326 | const UHashElement *old=uhash_find(nodes, newNode); | |
327 | if(old!=NULL) { | |
328 | delete newNode; | |
329 | return (Node *)old->key.pointer; | |
330 | } | |
331 | // If uhash_puti() returns a non-zero value from an equivalent, previously | |
332 | // registered node, then uhash_find() failed to find that and we will leak newNode. | |
333 | #if U_DEBUG | |
334 | int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue. | |
335 | #endif | |
336 | uhash_puti(nodes, newNode, 1, &errorCode); | |
337 | U_ASSERT(oldValue==0); | |
338 | if(U_FAILURE(errorCode)) { | |
339 | delete newNode; | |
340 | return NULL; | |
341 | } | |
342 | return newNode; | |
343 | } | |
344 | ||
345 | StringTrieBuilder::Node * | |
346 | StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) { | |
347 | if(U_FAILURE(errorCode)) { | |
348 | return NULL; | |
349 | } | |
350 | FinalValueNode key(value); | |
351 | const UHashElement *old=uhash_find(nodes, &key); | |
352 | if(old!=NULL) { | |
353 | return (Node *)old->key.pointer; | |
354 | } | |
355 | Node *newNode=new FinalValueNode(value); | |
356 | if(newNode==NULL) { | |
357 | errorCode=U_MEMORY_ALLOCATION_ERROR; | |
358 | return NULL; | |
359 | } | |
360 | // If uhash_puti() returns a non-zero value from an equivalent, previously | |
361 | // registered node, then uhash_find() failed to find that and we will leak newNode. | |
362 | #if U_DEBUG | |
363 | int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue. | |
364 | #endif | |
365 | uhash_puti(nodes, newNode, 1, &errorCode); | |
366 | U_ASSERT(oldValue==0); | |
367 | if(U_FAILURE(errorCode)) { | |
368 | delete newNode; | |
369 | return NULL; | |
370 | } | |
371 | return newNode; | |
372 | } | |
373 | ||
374 | UBool | |
375 | StringTrieBuilder::hashNode(const void *node) { | |
376 | return ((const Node *)node)->hashCode(); | |
377 | } | |
378 | ||
379 | UBool | |
380 | StringTrieBuilder::equalNodes(const void *left, const void *right) { | |
381 | return *(const Node *)left==*(const Node *)right; | |
382 | } | |
383 | ||
4388f060 A |
384 | UBool |
385 | StringTrieBuilder::Node::operator==(const Node &other) const { | |
386 | return this==&other || (typeid(*this)==typeid(other) && hash==other.hash); | |
387 | } | |
388 | ||
389 | int32_t | |
390 | StringTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) { | |
391 | if(offset==0) { | |
392 | offset=edgeNumber; | |
393 | } | |
394 | return edgeNumber; | |
395 | } | |
396 | ||
4388f060 A |
397 | UBool |
398 | StringTrieBuilder::FinalValueNode::operator==(const Node &other) const { | |
399 | if(this==&other) { | |
400 | return TRUE; | |
401 | } | |
402 | if(!Node::operator==(other)) { | |
403 | return FALSE; | |
404 | } | |
405 | const FinalValueNode &o=(const FinalValueNode &)other; | |
406 | return value==o.value; | |
407 | } | |
408 | ||
409 | void | |
410 | StringTrieBuilder::FinalValueNode::write(StringTrieBuilder &builder) { | |
411 | offset=builder.writeValueAndFinal(value, TRUE); | |
412 | } | |
413 | ||
414 | UBool | |
415 | StringTrieBuilder::ValueNode::operator==(const Node &other) const { | |
416 | if(this==&other) { | |
417 | return TRUE; | |
418 | } | |
419 | if(!Node::operator==(other)) { | |
420 | return FALSE; | |
421 | } | |
422 | const ValueNode &o=(const ValueNode &)other; | |
423 | return hasValue==o.hasValue && (!hasValue || value==o.value); | |
424 | } | |
425 | ||
426 | UBool | |
427 | StringTrieBuilder::IntermediateValueNode::operator==(const Node &other) const { | |
428 | if(this==&other) { | |
429 | return TRUE; | |
430 | } | |
431 | if(!ValueNode::operator==(other)) { | |
432 | return FALSE; | |
433 | } | |
434 | const IntermediateValueNode &o=(const IntermediateValueNode &)other; | |
435 | return next==o.next; | |
436 | } | |
437 | ||
438 | int32_t | |
439 | StringTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) { | |
440 | if(offset==0) { | |
441 | offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); | |
442 | } | |
443 | return edgeNumber; | |
444 | } | |
445 | ||
446 | void | |
447 | StringTrieBuilder::IntermediateValueNode::write(StringTrieBuilder &builder) { | |
448 | next->write(builder); | |
449 | offset=builder.writeValueAndFinal(value, FALSE); | |
450 | } | |
451 | ||
452 | UBool | |
453 | StringTrieBuilder::LinearMatchNode::operator==(const Node &other) const { | |
454 | if(this==&other) { | |
455 | return TRUE; | |
456 | } | |
457 | if(!ValueNode::operator==(other)) { | |
458 | return FALSE; | |
459 | } | |
460 | const LinearMatchNode &o=(const LinearMatchNode &)other; | |
461 | return length==o.length && next==o.next; | |
462 | } | |
463 | ||
464 | int32_t | |
465 | StringTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) { | |
466 | if(offset==0) { | |
467 | offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); | |
468 | } | |
469 | return edgeNumber; | |
470 | } | |
471 | ||
472 | UBool | |
473 | StringTrieBuilder::ListBranchNode::operator==(const Node &other) const { | |
474 | if(this==&other) { | |
475 | return TRUE; | |
476 | } | |
477 | if(!Node::operator==(other)) { | |
478 | return FALSE; | |
479 | } | |
480 | const ListBranchNode &o=(const ListBranchNode &)other; | |
481 | for(int32_t i=0; i<length; ++i) { | |
482 | if(units[i]!=o.units[i] || values[i]!=o.values[i] || equal[i]!=o.equal[i]) { | |
483 | return FALSE; | |
484 | } | |
485 | } | |
486 | return TRUE; | |
487 | } | |
488 | ||
489 | int32_t | |
490 | StringTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) { | |
491 | if(offset==0) { | |
492 | firstEdgeNumber=edgeNumber; | |
493 | int32_t step=0; | |
494 | int32_t i=length; | |
495 | do { | |
496 | Node *edge=equal[--i]; | |
497 | if(edge!=NULL) { | |
498 | edgeNumber=edge->markRightEdgesFirst(edgeNumber-step); | |
499 | } | |
500 | // For all but the rightmost edge, decrement the edge number. | |
501 | step=1; | |
502 | } while(i>0); | |
503 | offset=edgeNumber; | |
504 | } | |
505 | return edgeNumber; | |
506 | } | |
507 | ||
508 | void | |
509 | StringTrieBuilder::ListBranchNode::write(StringTrieBuilder &builder) { | |
510 | // Write the sub-nodes in reverse order: The jump lengths are deltas from | |
511 | // after their own positions, so if we wrote the minUnit sub-node first, | |
512 | // then its jump delta would be larger. | |
513 | // Instead we write the minUnit sub-node last, for a shorter delta. | |
514 | int32_t unitNumber=length-1; | |
515 | Node *rightEdge=equal[unitNumber]; | |
516 | int32_t rightEdgeNumber= rightEdge==NULL ? firstEdgeNumber : rightEdge->getOffset(); | |
517 | do { | |
518 | --unitNumber; | |
519 | if(equal[unitNumber]!=NULL) { | |
520 | equal[unitNumber]->writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder); | |
521 | } | |
522 | } while(unitNumber>0); | |
523 | // The maxUnit sub-node is written as the very last one because we do | |
524 | // not jump for it at all. | |
525 | unitNumber=length-1; | |
526 | if(rightEdge==NULL) { | |
527 | builder.writeValueAndFinal(values[unitNumber], TRUE); | |
528 | } else { | |
529 | rightEdge->write(builder); | |
530 | } | |
531 | offset=builder.write(units[unitNumber]); | |
532 | // Write the rest of this node's unit-value pairs. | |
533 | while(--unitNumber>=0) { | |
534 | int32_t value; | |
535 | UBool isFinal; | |
536 | if(equal[unitNumber]==NULL) { | |
537 | // Write the final value for the one string ending with this unit. | |
538 | value=values[unitNumber]; | |
539 | isFinal=TRUE; | |
540 | } else { | |
541 | // Write the delta to the start position of the sub-node. | |
542 | U_ASSERT(equal[unitNumber]->getOffset()>0); | |
543 | value=offset-equal[unitNumber]->getOffset(); | |
544 | isFinal=FALSE; | |
545 | } | |
546 | builder.writeValueAndFinal(value, isFinal); | |
547 | offset=builder.write(units[unitNumber]); | |
548 | } | |
549 | } | |
550 | ||
551 | UBool | |
552 | StringTrieBuilder::SplitBranchNode::operator==(const Node &other) const { | |
553 | if(this==&other) { | |
554 | return TRUE; | |
555 | } | |
556 | if(!Node::operator==(other)) { | |
557 | return FALSE; | |
558 | } | |
559 | const SplitBranchNode &o=(const SplitBranchNode &)other; | |
560 | return unit==o.unit && lessThan==o.lessThan && greaterOrEqual==o.greaterOrEqual; | |
561 | } | |
562 | ||
563 | int32_t | |
564 | StringTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) { | |
565 | if(offset==0) { | |
566 | firstEdgeNumber=edgeNumber; | |
567 | edgeNumber=greaterOrEqual->markRightEdgesFirst(edgeNumber); | |
568 | offset=edgeNumber=lessThan->markRightEdgesFirst(edgeNumber-1); | |
569 | } | |
570 | return edgeNumber; | |
571 | } | |
572 | ||
573 | void | |
574 | StringTrieBuilder::SplitBranchNode::write(StringTrieBuilder &builder) { | |
575 | // Encode the less-than branch first. | |
576 | lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder); | |
577 | // Encode the greater-or-equal branch last because we do not jump for it at all. | |
578 | greaterOrEqual->write(builder); | |
579 | // Write this node. | |
580 | U_ASSERT(lessThan->getOffset()>0); | |
581 | builder.writeDeltaTo(lessThan->getOffset()); // less-than | |
582 | offset=builder.write(unit); | |
583 | } | |
584 | ||
585 | UBool | |
586 | StringTrieBuilder::BranchHeadNode::operator==(const Node &other) const { | |
587 | if(this==&other) { | |
588 | return TRUE; | |
589 | } | |
590 | if(!ValueNode::operator==(other)) { | |
591 | return FALSE; | |
592 | } | |
593 | const BranchHeadNode &o=(const BranchHeadNode &)other; | |
594 | return length==o.length && next==o.next; | |
595 | } | |
596 | ||
597 | int32_t | |
598 | StringTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) { | |
599 | if(offset==0) { | |
600 | offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); | |
601 | } | |
602 | return edgeNumber; | |
603 | } | |
604 | ||
605 | void | |
606 | StringTrieBuilder::BranchHeadNode::write(StringTrieBuilder &builder) { | |
607 | next->write(builder); | |
608 | if(length<=builder.getMinLinearMatch()) { | |
609 | offset=builder.writeValueAndType(hasValue, value, length-1); | |
610 | } else { | |
611 | builder.write(length-1); | |
612 | offset=builder.writeValueAndType(hasValue, value, 0); | |
613 | } | |
614 | } | |
615 | ||
616 | U_NAMESPACE_END |