]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/triedict.cpp
ICU-461.12.tar.gz
[apple/icu.git] / icuSources / common / triedict.cpp
1 /**
2 *******************************************************************************
3 * Copyright (C) 2006-2008, International Business Machines Corporation *
4 * and others. All Rights Reserved. *
5 *******************************************************************************
6 */
7
8 #include "unicode/utypes.h"
9
10 #if !UCONFIG_NO_BREAK_ITERATION
11
12 #include "triedict.h"
13 #include "unicode/chariter.h"
14 #include "unicode/uchriter.h"
15 #include "unicode/strenum.h"
16 #include "unicode/uenum.h"
17 #include "unicode/udata.h"
18 #include "cmemory.h"
19 #include "udataswp.h"
20 #include "uvector.h"
21 #include "uvectr32.h"
22 #include "uarrsort.h"
23
24 //#define DEBUG_TRIE_DICT 1
25
26 #ifdef DEBUG_TRIE_DICT
27 #include <sys/times.h>
28 #include <limits.h>
29 #include <stdio.h>
30 #endif
31
32 U_NAMESPACE_BEGIN
33
34 /*******************************************************************
35 * TrieWordDictionary
36 */
37
38 TrieWordDictionary::TrieWordDictionary() {
39 }
40
41 TrieWordDictionary::~TrieWordDictionary() {
42 }
43
44 /*******************************************************************
45 * MutableTrieDictionary
46 */
47
48 // Node structure for the ternary, uncompressed trie
49 struct TernaryNode : public UMemory {
50 UChar ch; // UTF-16 code unit
51 uint16_t flags; // Flag word
52 TernaryNode *low; // Less-than link
53 TernaryNode *equal; // Equal link
54 TernaryNode *high; // Greater-than link
55
56 TernaryNode(UChar uc);
57 ~TernaryNode();
58 };
59
60 enum MutableTrieNodeFlags {
61 kEndsWord = 0x0001 // This node marks the end of a valid word
62 };
63
64 inline
65 TernaryNode::TernaryNode(UChar uc) {
66 ch = uc;
67 flags = 0;
68 low = NULL;
69 equal = NULL;
70 high = NULL;
71 }
72
73 // Not inline since it's recursive
74 TernaryNode::~TernaryNode() {
75 delete low;
76 delete equal;
77 delete high;
78 }
79
80 MutableTrieDictionary::MutableTrieDictionary( UChar median, UErrorCode &status ) {
81 // Start the trie off with something. Having the root node already present
82 // cuts a special case out of the search/insertion functions.
83 // Making it a median character cuts the worse case for searches from
84 // 4x a balanced trie to 2x a balanced trie. It's best to choose something
85 // that starts a word that is midway in the list.
86 fTrie = new TernaryNode(median);
87 if (fTrie == NULL) {
88 status = U_MEMORY_ALLOCATION_ERROR;
89 }
90 fIter = utext_openUChars(NULL, NULL, 0, &status);
91 if (U_SUCCESS(status) && fIter == NULL) {
92 status = U_MEMORY_ALLOCATION_ERROR;
93 }
94 }
95
96 MutableTrieDictionary::MutableTrieDictionary( UErrorCode &status ) {
97 fTrie = NULL;
98 fIter = utext_openUChars(NULL, NULL, 0, &status);
99 if (U_SUCCESS(status) && fIter == NULL) {
100 status = U_MEMORY_ALLOCATION_ERROR;
101 }
102 }
103
104 MutableTrieDictionary::~MutableTrieDictionary() {
105 delete fTrie;
106 utext_close(fIter);
107 }
108
109 int32_t
110 MutableTrieDictionary::search( UText *text,
111 int32_t maxLength,
112 int32_t *lengths,
113 int &count,
114 int limit,
115 TernaryNode *&parent,
116 UBool &pMatched ) const {
117 // TODO: current implementation works in UTF-16 space
118 const TernaryNode *up = NULL;
119 const TernaryNode *p = fTrie;
120 int mycount = 0;
121 pMatched = TRUE;
122 int i;
123
124 UChar uc = utext_current32(text);
125 for (i = 0; i < maxLength && p != NULL; ++i) {
126 while (p != NULL) {
127 if (uc < p->ch) {
128 up = p;
129 p = p->low;
130 }
131 else if (uc == p->ch) {
132 break;
133 }
134 else {
135 up = p;
136 p = p->high;
137 }
138 }
139 if (p == NULL) {
140 pMatched = FALSE;
141 break;
142 }
143 // Must be equal to get here
144 if (limit > 0 && (p->flags & kEndsWord)) {
145 lengths[mycount++] = i+1;
146 --limit;
147 }
148 up = p;
149 p = p->equal;
150 uc = utext_next32(text);
151 uc = utext_current32(text);
152 }
153
154 // Note that there is no way to reach here with up == 0 unless
155 // maxLength is 0 coming in.
156 parent = (TernaryNode *)up;
157 count = mycount;
158 return i;
159 }
160
161 void
162 MutableTrieDictionary::addWord( const UChar *word,
163 int32_t length,
164 UErrorCode &status ) {
165 #if 0
166 if (length <= 0) {
167 status = U_ILLEGAL_ARGUMENT_ERROR;
168 return;
169 }
170 #endif
171 TernaryNode *parent;
172 UBool pMatched;
173 int count;
174 fIter = utext_openUChars(fIter, word, length, &status);
175
176 int matched;
177 matched = search(fIter, length, NULL, count, 0, parent, pMatched);
178
179 while (matched++ < length) {
180 UChar32 uc = utext_next32(fIter); // TODO: supplemetary support?
181 U_ASSERT(uc != U_SENTINEL);
182 TernaryNode *newNode = new TernaryNode(uc);
183 if (newNode == NULL) {
184 status = U_MEMORY_ALLOCATION_ERROR;
185 return;
186 }
187 if (pMatched) {
188 parent->equal = newNode;
189 }
190 else {
191 pMatched = TRUE;
192 if (uc < parent->ch) {
193 parent->low = newNode;
194 }
195 else {
196 parent->high = newNode;
197 }
198 }
199 parent = newNode;
200 }
201
202 parent->flags |= kEndsWord;
203 }
204
205 #if 0
206 void
207 MutableTrieDictionary::addWords( UEnumeration *words,
208 UErrorCode &status ) {
209 int32_t length;
210 const UChar *word;
211 while ((word = uenum_unext(words, &length, &status)) && U_SUCCESS(status)) {
212 addWord(word, length, status);
213 }
214 }
215 #endif
216
217 int32_t
218 MutableTrieDictionary::matches( UText *text,
219 int32_t maxLength,
220 int32_t *lengths,
221 int &count,
222 int limit ) const {
223 TernaryNode *parent;
224 UBool pMatched;
225 return search(text, maxLength, lengths, count, limit, parent, pMatched);
226 }
227
228 // Implementation of iteration for MutableTrieDictionary
229 class MutableTrieEnumeration : public StringEnumeration {
230 private:
231 UStack fNodeStack; // Stack of nodes to process
232 UVector32 fBranchStack; // Stack of which branch we are working on
233 TernaryNode *fRoot; // Root node
234 enum StackBranch {
235 kLessThan,
236 kEqual,
237 kGreaterThan,
238 kDone
239 };
240
241 public:
242 static UClassID U_EXPORT2 getStaticClassID(void);
243 virtual UClassID getDynamicClassID(void) const;
244 public:
245 MutableTrieEnumeration(TernaryNode *root, UErrorCode &status)
246 : fNodeStack(status), fBranchStack(status) {
247 fRoot = root;
248 fNodeStack.push(root, status);
249 fBranchStack.push(kLessThan, status);
250 unistr.remove();
251 }
252
253 virtual ~MutableTrieEnumeration() {
254 }
255
256 virtual StringEnumeration *clone() const {
257 UErrorCode status = U_ZERO_ERROR;
258 return new MutableTrieEnumeration(fRoot, status);
259 }
260
261 virtual const UnicodeString *snext(UErrorCode &status) {
262 if (fNodeStack.empty() || U_FAILURE(status)) {
263 return NULL;
264 }
265 TernaryNode *node = (TernaryNode *) fNodeStack.peek();
266 StackBranch where = (StackBranch) fBranchStack.peeki();
267 while (!fNodeStack.empty() && U_SUCCESS(status)) {
268 UBool emit;
269 UBool equal;
270
271 switch (where) {
272 case kLessThan:
273 if (node->low != NULL) {
274 fBranchStack.setElementAt(kEqual, fBranchStack.size()-1);
275 node = (TernaryNode *) fNodeStack.push(node->low, status);
276 where = (StackBranch) fBranchStack.push(kLessThan, status);
277 break;
278 }
279 case kEqual:
280 emit = (node->flags & kEndsWord) != 0;
281 equal = (node->equal != NULL);
282 // If this node should be part of the next emitted string, append
283 // the UChar to the string, and make sure we pop it when we come
284 // back to this node. The character should only be in the string
285 // for as long as we're traversing the equal subtree of this node
286 if (equal || emit) {
287 unistr.append(node->ch);
288 fBranchStack.setElementAt(kGreaterThan, fBranchStack.size()-1);
289 }
290 if (equal) {
291 node = (TernaryNode *) fNodeStack.push(node->equal, status);
292 where = (StackBranch) fBranchStack.push(kLessThan, status);
293 }
294 if (emit) {
295 return &unistr;
296 }
297 if (equal) {
298 break;
299 }
300 case kGreaterThan:
301 // If this node's character is in the string, remove it.
302 if (node->equal != NULL || (node->flags & kEndsWord)) {
303 unistr.truncate(unistr.length()-1);
304 }
305 if (node->high != NULL) {
306 fBranchStack.setElementAt(kDone, fBranchStack.size()-1);
307 node = (TernaryNode *) fNodeStack.push(node->high, status);
308 where = (StackBranch) fBranchStack.push(kLessThan, status);
309 break;
310 }
311 case kDone:
312 fNodeStack.pop();
313 fBranchStack.popi();
314 node = (TernaryNode *) fNodeStack.peek();
315 where = (StackBranch) fBranchStack.peeki();
316 break;
317 default:
318 return NULL;
319 }
320 }
321 return NULL;
322 }
323
324 // Very expensive, but this should never be used.
325 virtual int32_t count(UErrorCode &status) const {
326 MutableTrieEnumeration counter(fRoot, status);
327 int32_t result = 0;
328 while (counter.snext(status) != NULL && U_SUCCESS(status)) {
329 ++result;
330 }
331 return result;
332 }
333
334 virtual void reset(UErrorCode &status) {
335 fNodeStack.removeAllElements();
336 fBranchStack.removeAllElements();
337 fNodeStack.push(fRoot, status);
338 fBranchStack.push(kLessThan, status);
339 unistr.remove();
340 }
341 };
342
343 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MutableTrieEnumeration)
344
345 StringEnumeration *
346 MutableTrieDictionary::openWords( UErrorCode &status ) const {
347 if (U_FAILURE(status)) {
348 return NULL;
349 }
350 return new MutableTrieEnumeration(fTrie, status);
351 }
352
353 /*******************************************************************
354 * CompactTrieDictionary
355 */
356
357 struct CompactTrieHeader {
358 uint32_t size; // Size of the data in bytes
359 uint32_t magic; // Magic number (including version)
360 uint16_t nodeCount; // Number of entries in offsets[]
361 uint16_t root; // Node number of the root node
362 uint32_t offsets[1]; // Offsets to nodes from start of data
363 };
364
365 // Note that to avoid platform-specific alignment issues, all members of the node
366 // structures should be the same size, or should contain explicit padding to
367 // natural alignment boundaries.
368
369 // We can't use a bitfield for the flags+count field, because the layout of those
370 // is not portable. 12 bits of count allows for up to 4096 entries in a node.
371 struct CompactTrieNode {
372 uint16_t flagscount; // Count of sub-entries, plus flags
373 };
374
375 enum CompactTrieNodeFlags {
376 kVerticalNode = 0x1000, // This is a vertical node
377 kParentEndsWord = 0x2000, // The node whose equal link points to this ends a word
378 kReservedFlag1 = 0x4000,
379 kReservedFlag2 = 0x8000,
380 kCountMask = 0x0FFF, // The count portion of flagscount
381 kFlagMask = 0xF000 // The flags portion of flagscount
382 };
383
384 // The two node types are distinguished by the kVerticalNode flag.
385
386 struct CompactTrieHorizontalEntry {
387 uint16_t ch; // UChar
388 uint16_t equal; // Equal link node index
389 };
390
391 // We don't use inheritance here because C++ does not guarantee that the
392 // base class comes first in memory!!
393
394 struct CompactTrieHorizontalNode {
395 uint16_t flagscount; // Count of sub-entries, plus flags
396 CompactTrieHorizontalEntry entries[1];
397 };
398
399 struct CompactTrieVerticalNode {
400 uint16_t flagscount; // Count of sub-entries, plus flags
401 uint16_t equal; // Equal link node index
402 uint16_t chars[1]; // Code units
403 };
404
405 // {'Dic', 1}, version 1
406 #define COMPACT_TRIE_MAGIC_1 0x44696301
407
408 CompactTrieDictionary::CompactTrieDictionary(UDataMemory *dataObj,
409 UErrorCode &status )
410 : fUData(dataObj)
411 {
412 fData = (const CompactTrieHeader *) udata_getMemory(dataObj);
413 fOwnData = FALSE;
414 if (fData->magic != COMPACT_TRIE_MAGIC_1) {
415 status = U_ILLEGAL_ARGUMENT_ERROR;
416 fData = NULL;
417 }
418 }
419 CompactTrieDictionary::CompactTrieDictionary( const void *data,
420 UErrorCode &status )
421 : fUData(NULL)
422 {
423 fData = (const CompactTrieHeader *) data;
424 fOwnData = FALSE;
425 if (fData->magic != COMPACT_TRIE_MAGIC_1) {
426 status = U_ILLEGAL_ARGUMENT_ERROR;
427 fData = NULL;
428 }
429 }
430
431 CompactTrieDictionary::CompactTrieDictionary( const MutableTrieDictionary &dict,
432 UErrorCode &status )
433 : fUData(NULL)
434 {
435 fData = compactMutableTrieDictionary(dict, status);
436 fOwnData = !U_FAILURE(status);
437 }
438
439 CompactTrieDictionary::~CompactTrieDictionary() {
440 if (fOwnData) {
441 uprv_free((void *)fData);
442 }
443 if (fUData) {
444 udata_close(fUData);
445 }
446 }
447
448 uint32_t
449 CompactTrieDictionary::dataSize() const {
450 return fData->size;
451 }
452
453 const void *
454 CompactTrieDictionary::data() const {
455 return fData;
456 }
457
458 // This function finds the address of a node for us, given its node ID
459 static inline const CompactTrieNode *
460 getCompactNode(const CompactTrieHeader *header, uint16_t node) {
461 return (const CompactTrieNode *)((const uint8_t *)header + header->offsets[node]);
462 }
463
464 int32_t
465 CompactTrieDictionary::matches( UText *text,
466 int32_t maxLength,
467 int32_t *lengths,
468 int &count,
469 int limit ) const {
470 // TODO: current implementation works in UTF-16 space
471 const CompactTrieNode *node = getCompactNode(fData, fData->root);
472 int mycount = 0;
473
474 UChar uc = utext_current32(text);
475 int i = 0;
476
477 while (node != NULL) {
478 // Check if the node we just exited ends a word
479 if (limit > 0 && (node->flagscount & kParentEndsWord)) {
480 lengths[mycount++] = i;
481 --limit;
482 }
483 // Check that we haven't exceeded the maximum number of input characters.
484 // We have to do that here rather than in the while condition so that
485 // we can check for ending a word, above.
486 if (i >= maxLength) {
487 break;
488 }
489
490 int nodeCount = (node->flagscount & kCountMask);
491 if (nodeCount == 0) {
492 // Special terminal node; return now
493 break;
494 }
495 if (node->flagscount & kVerticalNode) {
496 // Vertical node; check all the characters in it
497 const CompactTrieVerticalNode *vnode = (const CompactTrieVerticalNode *)node;
498 for (int j = 0; j < nodeCount && i < maxLength; ++j) {
499 if (uc != vnode->chars[j]) {
500 // We hit a non-equal character; return
501 goto exit;
502 }
503 utext_next32(text);
504 uc = utext_current32(text);
505 ++i;
506 }
507 // To get here we must have come through the whole list successfully;
508 // go on to the next node. Note that a word cannot end in the middle
509 // of a vertical node.
510 node = getCompactNode(fData, vnode->equal);
511 }
512 else {
513 // Horizontal node; do binary search
514 const CompactTrieHorizontalNode *hnode = (const CompactTrieHorizontalNode *)node;
515 int low = 0;
516 int high = nodeCount-1;
517 int middle;
518 node = NULL; // If we don't find a match, we'll fall out of the loop
519 while (high >= low) {
520 middle = (high+low)/2;
521 if (uc == hnode->entries[middle].ch) {
522 // We hit a match; get the next node and next character
523 node = getCompactNode(fData, hnode->entries[middle].equal);
524 utext_next32(text);
525 uc = utext_current32(text);
526 ++i;
527 break;
528 }
529 else if (uc < hnode->entries[middle].ch) {
530 high = middle-1;
531 }
532 else {
533 low = middle+1;
534 }
535 }
536 }
537 }
538 exit:
539 count = mycount;
540 return i;
541 }
542
543 // Implementation of iteration for CompactTrieDictionary
544 class CompactTrieEnumeration : public StringEnumeration {
545 private:
546 UVector32 fNodeStack; // Stack of nodes to process
547 UVector32 fIndexStack; // Stack of where in node we are
548 const CompactTrieHeader *fHeader; // Trie data
549
550 public:
551 static UClassID U_EXPORT2 getStaticClassID(void);
552 virtual UClassID getDynamicClassID(void) const;
553 public:
554 CompactTrieEnumeration(const CompactTrieHeader *header, UErrorCode &status)
555 : fNodeStack(status), fIndexStack(status) {
556 fHeader = header;
557 fNodeStack.push(header->root, status);
558 fIndexStack.push(0, status);
559 unistr.remove();
560 }
561
562 virtual ~CompactTrieEnumeration() {
563 }
564
565 virtual StringEnumeration *clone() const {
566 UErrorCode status = U_ZERO_ERROR;
567 return new CompactTrieEnumeration(fHeader, status);
568 }
569
570 virtual const UnicodeString * snext(UErrorCode &status);
571
572 // Very expensive, but this should never be used.
573 virtual int32_t count(UErrorCode &status) const {
574 CompactTrieEnumeration counter(fHeader, status);
575 int32_t result = 0;
576 while (counter.snext(status) != NULL && U_SUCCESS(status)) {
577 ++result;
578 }
579 return result;
580 }
581
582 virtual void reset(UErrorCode &status) {
583 fNodeStack.removeAllElements();
584 fIndexStack.removeAllElements();
585 fNodeStack.push(fHeader->root, status);
586 fIndexStack.push(0, status);
587 unistr.remove();
588 }
589 };
590
591 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompactTrieEnumeration)
592
593 const UnicodeString *
594 CompactTrieEnumeration::snext(UErrorCode &status) {
595 if (fNodeStack.empty() || U_FAILURE(status)) {
596 return NULL;
597 }
598 const CompactTrieNode *node = getCompactNode(fHeader, fNodeStack.peeki());
599 int where = fIndexStack.peeki();
600 while (!fNodeStack.empty() && U_SUCCESS(status)) {
601 int nodeCount = (node->flagscount & kCountMask);
602 UBool goingDown = FALSE;
603 if (nodeCount == 0) {
604 // Terminal node; go up immediately
605 fNodeStack.popi();
606 fIndexStack.popi();
607 node = getCompactNode(fHeader, fNodeStack.peeki());
608 where = fIndexStack.peeki();
609 }
610 else if (node->flagscount & kVerticalNode) {
611 // Vertical node
612 const CompactTrieVerticalNode *vnode = (const CompactTrieVerticalNode *)node;
613 if (where == 0) {
614 // Going down
615 unistr.append((const UChar *)vnode->chars, (int32_t) nodeCount);
616 fIndexStack.setElementAt(1, fIndexStack.size()-1);
617 node = getCompactNode(fHeader, fNodeStack.push(vnode->equal, status));
618 where = fIndexStack.push(0, status);
619 goingDown = TRUE;
620 }
621 else {
622 // Going up
623 unistr.truncate(unistr.length()-nodeCount);
624 fNodeStack.popi();
625 fIndexStack.popi();
626 node = getCompactNode(fHeader, fNodeStack.peeki());
627 where = fIndexStack.peeki();
628 }
629 }
630 else {
631 // Horizontal node
632 const CompactTrieHorizontalNode *hnode = (const CompactTrieHorizontalNode *)node;
633 if (where > 0) {
634 // Pop previous char
635 unistr.truncate(unistr.length()-1);
636 }
637 if (where < nodeCount) {
638 // Push on next node
639 unistr.append((UChar)hnode->entries[where].ch);
640 fIndexStack.setElementAt(where+1, fIndexStack.size()-1);
641 node = getCompactNode(fHeader, fNodeStack.push(hnode->entries[where].equal, status));
642 where = fIndexStack.push(0, status);
643 goingDown = TRUE;
644 }
645 else {
646 // Going up
647 fNodeStack.popi();
648 fIndexStack.popi();
649 node = getCompactNode(fHeader, fNodeStack.peeki());
650 where = fIndexStack.peeki();
651 }
652 }
653 // Check if the parent of the node we've just gone down to ends a
654 // word. If so, return it.
655 if (goingDown && (node->flagscount & kParentEndsWord)) {
656 return &unistr;
657 }
658 }
659 return NULL;
660 }
661
662 StringEnumeration *
663 CompactTrieDictionary::openWords( UErrorCode &status ) const {
664 if (U_FAILURE(status)) {
665 return NULL;
666 }
667 return new CompactTrieEnumeration(fData, status);
668 }
669
670 //
671 // Below here is all code related to converting a ternary trie to a compact trie
672 // and back again
673 //
674
675 // Helper classes to construct the compact trie
676 class BuildCompactTrieNode: public UMemory {
677 public:
678 UBool fParentEndsWord;
679 UBool fVertical;
680 UBool fHasDuplicate;
681 int32_t fNodeID;
682 UnicodeString fChars;
683
684 public:
685 BuildCompactTrieNode(UBool parentEndsWord, UBool vertical, UStack &nodes, UErrorCode &status) {
686 fParentEndsWord = parentEndsWord;
687 fHasDuplicate = FALSE;
688 fVertical = vertical;
689 fNodeID = nodes.size();
690 nodes.push(this, status);
691 }
692
693 virtual ~BuildCompactTrieNode() {
694 }
695
696 virtual uint32_t size() {
697 return sizeof(uint16_t);
698 }
699
700 virtual void write(uint8_t *bytes, uint32_t &offset, const UVector32 &/*translate*/) {
701 // Write flag/count
702 *((uint16_t *)(bytes+offset)) = (fChars.length() & kCountMask)
703 | (fVertical ? kVerticalNode : 0) | (fParentEndsWord ? kParentEndsWord : 0 );
704 offset += sizeof(uint16_t);
705 }
706 };
707
708 class BuildCompactTrieHorizontalNode: public BuildCompactTrieNode {
709 public:
710 UStack fLinks;
711
712 public:
713 BuildCompactTrieHorizontalNode(UBool parentEndsWord, UStack &nodes, UErrorCode &status)
714 : BuildCompactTrieNode(parentEndsWord, FALSE, nodes, status), fLinks(status) {
715 }
716
717 virtual ~BuildCompactTrieHorizontalNode() {
718 }
719
720 virtual uint32_t size() {
721 return offsetof(CompactTrieHorizontalNode,entries) +
722 (fChars.length()*sizeof(CompactTrieHorizontalEntry));
723 }
724
725 virtual void write(uint8_t *bytes, uint32_t &offset, const UVector32 &translate) {
726 BuildCompactTrieNode::write(bytes, offset, translate);
727 int32_t count = fChars.length();
728 for (int32_t i = 0; i < count; ++i) {
729 CompactTrieHorizontalEntry *entry = (CompactTrieHorizontalEntry *)(bytes+offset);
730 entry->ch = fChars[i];
731 entry->equal = translate.elementAti(((BuildCompactTrieNode *)fLinks[i])->fNodeID);
732 #ifdef DEBUG_TRIE_DICT
733 if (entry->equal == 0) {
734 fprintf(stderr, "ERROR: horizontal link %d, logical node %d maps to physical node zero\n",
735 i, ((BuildCompactTrieNode *)fLinks[i])->fNodeID);
736 }
737 #endif
738 offset += sizeof(CompactTrieHorizontalEntry);
739 }
740 }
741
742 void addNode(UChar ch, BuildCompactTrieNode *link, UErrorCode &status) {
743 fChars.append(ch);
744 fLinks.push(link, status);
745 }
746 };
747
748 class BuildCompactTrieVerticalNode: public BuildCompactTrieNode {
749 public:
750 BuildCompactTrieNode *fEqual;
751
752 public:
753 BuildCompactTrieVerticalNode(UBool parentEndsWord, UStack &nodes, UErrorCode &status)
754 : BuildCompactTrieNode(parentEndsWord, TRUE, nodes, status) {
755 fEqual = NULL;
756 }
757
758 virtual ~BuildCompactTrieVerticalNode() {
759 }
760
761 virtual uint32_t size() {
762 return offsetof(CompactTrieVerticalNode,chars) + (fChars.length()*sizeof(uint16_t));
763 }
764
765 virtual void write(uint8_t *bytes, uint32_t &offset, const UVector32 &translate) {
766 CompactTrieVerticalNode *node = (CompactTrieVerticalNode *)(bytes+offset);
767 BuildCompactTrieNode::write(bytes, offset, translate);
768 node->equal = translate.elementAti(fEqual->fNodeID);
769 offset += sizeof(node->equal);
770 #ifdef DEBUG_TRIE_DICT
771 if (node->equal == 0) {
772 fprintf(stderr, "ERROR: vertical link, logical node %d maps to physical node zero\n",
773 fEqual->fNodeID);
774 }
775 #endif
776 fChars.extract(0, fChars.length(), (UChar *)node->chars);
777 offset += sizeof(uint16_t)*fChars.length();
778 }
779
780 void addChar(UChar ch) {
781 fChars.append(ch);
782 }
783
784 void setLink(BuildCompactTrieNode *node) {
785 fEqual = node;
786 }
787 };
788
789 // Forward declaration
790 static void walkHorizontal(const TernaryNode *node,
791 BuildCompactTrieHorizontalNode *building,
792 UStack &nodes,
793 UErrorCode &status);
794
795 // Convert one node. Uses recursion.
796
797 static BuildCompactTrieNode *
798 compactOneNode(const TernaryNode *node, UBool parentEndsWord, UStack &nodes, UErrorCode &status) {
799 if (U_FAILURE(status)) {
800 return NULL;
801 }
802 BuildCompactTrieNode *result = NULL;
803 UBool horizontal = (node->low != NULL || node->high != NULL);
804 if (horizontal) {
805 BuildCompactTrieHorizontalNode *hResult =
806 new BuildCompactTrieHorizontalNode(parentEndsWord, nodes, status);
807 if (hResult == NULL) {
808 status = U_MEMORY_ALLOCATION_ERROR;
809 return NULL;
810 }
811 if (U_SUCCESS(status)) {
812 walkHorizontal(node, hResult, nodes, status);
813 result = hResult;
814 }
815 }
816 else {
817 BuildCompactTrieVerticalNode *vResult =
818 new BuildCompactTrieVerticalNode(parentEndsWord, nodes, status);
819 if (vResult == NULL) {
820 status = U_MEMORY_ALLOCATION_ERROR;
821 }
822 else if (U_SUCCESS(status)) {
823 UBool endsWord = FALSE;
824 // Take up nodes until we end a word, or hit a node with < or > links
825 do {
826 vResult->addChar(node->ch);
827 endsWord = (node->flags & kEndsWord) != 0;
828 node = node->equal;
829 }
830 while(node != NULL && !endsWord && node->low == NULL && node->high == NULL);
831 if (node == NULL) {
832 if (!endsWord) {
833 status = U_ILLEGAL_ARGUMENT_ERROR; // Corrupt input trie
834 }
835 else {
836 vResult->setLink((BuildCompactTrieNode *)nodes[1]);
837 }
838 }
839 else {
840 vResult->setLink(compactOneNode(node, endsWord, nodes, status));
841 }
842 result = vResult;
843 }
844 }
845 return result;
846 }
847
848 // Walk the set of peers at the same level, to build a horizontal node.
849 // Uses recursion.
850
851 static void walkHorizontal(const TernaryNode *node,
852 BuildCompactTrieHorizontalNode *building,
853 UStack &nodes,
854 UErrorCode &status) {
855 while (U_SUCCESS(status) && node != NULL) {
856 if (node->low != NULL) {
857 walkHorizontal(node->low, building, nodes, status);
858 }
859 BuildCompactTrieNode *link = NULL;
860 if (node->equal != NULL) {
861 link = compactOneNode(node->equal, (node->flags & kEndsWord) != 0, nodes, status);
862 }
863 else if (node->flags & kEndsWord) {
864 link = (BuildCompactTrieNode *)nodes[1];
865 }
866 if (U_SUCCESS(status) && link != NULL) {
867 building->addNode(node->ch, link, status);
868 }
869 // Tail recurse manually instead of leaving it to the compiler.
870 //if (node->high != NULL) {
871 // walkHorizontal(node->high, building, nodes, status);
872 //}
873 node = node->high;
874 }
875 }
876
877 U_NAMESPACE_END
878 U_NAMESPACE_USE
879 U_CDECL_BEGIN
880 static int32_t U_CALLCONV
881 _sortBuildNodes(const void * /*context*/, const void *voidl, const void *voidr) {
882 BuildCompactTrieNode *left = *(BuildCompactTrieNode **)voidl;
883 BuildCompactTrieNode *right = *(BuildCompactTrieNode **)voidr;
884 // Check for comparing a node to itself, to avoid spurious duplicates
885 if (left == right) {
886 return 0;
887 }
888 // Most significant is type of node. Can never coalesce.
889 if (left->fVertical != right->fVertical) {
890 return left->fVertical - right->fVertical;
891 }
892 // Next, the "parent ends word" flag. If that differs, we cannot coalesce.
893 if (left->fParentEndsWord != right->fParentEndsWord) {
894 return left->fParentEndsWord - right->fParentEndsWord;
895 }
896 // Next, the string. If that differs, we can never coalesce.
897 int32_t result = left->fChars.compare(right->fChars);
898 if (result != 0) {
899 return result;
900 }
901 // We know they're both the same node type, so branch for the two cases.
902 if (left->fVertical) {
903 result = ((BuildCompactTrieVerticalNode *)left)->fEqual->fNodeID
904 - ((BuildCompactTrieVerticalNode *)right)->fEqual->fNodeID;
905 }
906 else {
907 // We need to compare the links vectors. They should be the
908 // same size because the strings were equal.
909 // We compare the node IDs instead of the pointers, to handle
910 // coalesced nodes.
911 BuildCompactTrieHorizontalNode *hleft, *hright;
912 hleft = (BuildCompactTrieHorizontalNode *)left;
913 hright = (BuildCompactTrieHorizontalNode *)right;
914 int32_t count = hleft->fLinks.size();
915 for (int32_t i = 0; i < count && result == 0; ++i) {
916 result = ((BuildCompactTrieNode *)(hleft->fLinks[i]))->fNodeID -
917 ((BuildCompactTrieNode *)(hright->fLinks[i]))->fNodeID;
918 }
919 }
920 // If they are equal to each other, mark them (speeds coalescing)
921 if (result == 0) {
922 left->fHasDuplicate = TRUE;
923 right->fHasDuplicate = TRUE;
924 }
925 return result;
926 }
927 U_CDECL_END
928 U_NAMESPACE_BEGIN
929
930 static void coalesceDuplicates(UStack &nodes, UErrorCode &status) {
931 // We sort the array of nodes to place duplicates next to each other
932 if (U_FAILURE(status)) {
933 return;
934 }
935 int32_t size = nodes.size();
936 void **array = (void **)uprv_malloc(sizeof(void *)*size);
937 if (array == NULL) {
938 status = U_MEMORY_ALLOCATION_ERROR;
939 return;
940 }
941 (void) nodes.toArray(array);
942
943 // Now repeatedly identify duplicates until there are no more
944 int32_t dupes = 0;
945 long passCount = 0;
946 #ifdef DEBUG_TRIE_DICT
947 long totalDupes = 0;
948 #endif
949 do {
950 BuildCompactTrieNode *node;
951 BuildCompactTrieNode *first = NULL;
952 BuildCompactTrieNode **p;
953 BuildCompactTrieNode **pFirst = NULL;
954 int32_t counter = size - 2;
955 // Sort the array, skipping nodes 0 and 1. Use quicksort for the first
956 // pass for speed. For the second and subsequent passes, we use stable
957 // (insertion) sort for two reasons:
958 // 1. The array is already mostly ordered, so we get better performance.
959 // 2. The way we find one and only one instance of a set of duplicates is to
960 // check that the node ID equals the array index. If we used an unstable
961 // sort for the second or later passes, it's possible that none of the
962 // duplicates would wind up with a node ID equal to its array index.
963 // The sort stability guarantees that, because as we coalesce more and
964 // more groups, the first element of the resultant group will be one of
965 // the first elements of the groups being coalesced.
966 // To use quicksort for the second and subsequent passes, we would have to
967 // find the minimum of the node numbers in a group, and set all the nodes
968 // in the group to that node number.
969 uprv_sortArray(array+2, counter, sizeof(void *), _sortBuildNodes, NULL, (passCount > 0), &status);
970 dupes = 0;
971 for (p = (BuildCompactTrieNode **)array + 2; counter > 0; --counter, ++p) {
972 node = *p;
973 if (node->fHasDuplicate) {
974 if (first == NULL) {
975 first = node;
976 pFirst = p;
977 }
978 else if (_sortBuildNodes(NULL, pFirst, p) != 0) {
979 // Starting a new run of dupes
980 first = node;
981 pFirst = p;
982 }
983 else if (node->fNodeID != first->fNodeID) {
984 // Slave one to the other, note duplicate
985 node->fNodeID = first->fNodeID;
986 dupes += 1;
987 }
988 }
989 else {
990 // This node has no dupes
991 first = NULL;
992 pFirst = NULL;
993 }
994 }
995 passCount += 1;
996 #ifdef DEBUG_TRIE_DICT
997 totalDupes += dupes;
998 fprintf(stderr, "Trie node dupe removal, pass %d: %d nodes tagged\n", passCount, dupes);
999 #endif
1000 }
1001 while (dupes > 0);
1002 #ifdef DEBUG_TRIE_DICT
1003 fprintf(stderr, "Trie node dupe removal complete: %d tagged in %d passes\n", totalDupes, passCount);
1004 #endif
1005
1006 // We no longer need the temporary array, as the nodes have all been marked appropriately.
1007 uprv_free(array);
1008 }
1009
1010 U_NAMESPACE_END
1011 U_CDECL_BEGIN
1012 static void U_CALLCONV _deleteBuildNode(void *obj) {
1013 delete (BuildCompactTrieNode *) obj;
1014 }
1015 U_CDECL_END
1016 U_NAMESPACE_BEGIN
1017
1018 CompactTrieHeader *
1019 CompactTrieDictionary::compactMutableTrieDictionary( const MutableTrieDictionary &dict,
1020 UErrorCode &status ) {
1021 if (U_FAILURE(status)) {
1022 return NULL;
1023 }
1024 #ifdef DEBUG_TRIE_DICT
1025 struct tms timing;
1026 struct tms previous;
1027 (void) ::times(&previous);
1028 #endif
1029 UStack nodes(_deleteBuildNode, NULL, status); // Index of nodes
1030
1031 // Add node 0, used as the NULL pointer/sentinel.
1032 nodes.addElement((int32_t)0, status);
1033
1034 // Start by creating the special empty node we use to indicate that the parent
1035 // terminates a word. This must be node 1, because the builder assumes
1036 // that.
1037 if (U_FAILURE(status)) {
1038 return NULL;
1039 }
1040 BuildCompactTrieNode *terminal = new BuildCompactTrieNode(TRUE, FALSE, nodes, status);
1041 if (terminal == NULL) {
1042 status = U_MEMORY_ALLOCATION_ERROR;
1043 }
1044
1045 // This call does all the work of building the new trie structure. The root
1046 // will be node 2.
1047 BuildCompactTrieNode *root = compactOneNode(dict.fTrie, FALSE, nodes, status);
1048 #ifdef DEBUG_TRIE_DICT
1049 (void) ::times(&timing);
1050 fprintf(stderr, "Compact trie built, %d nodes, time user %f system %f\n",
1051 nodes.size(), (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK,
1052 (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK);
1053 previous = timing;
1054 #endif
1055
1056 // Now coalesce all duplicate nodes.
1057 coalesceDuplicates(nodes, status);
1058 #ifdef DEBUG_TRIE_DICT
1059 (void) ::times(&timing);
1060 fprintf(stderr, "Duplicates coalesced, time user %f system %f\n",
1061 (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK,
1062 (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK);
1063 previous = timing;
1064 #endif
1065
1066 // Next, build the output trie.
1067 // First we compute all the sizes and build the node ID translation table.
1068 uint32_t totalSize = offsetof(CompactTrieHeader,offsets);
1069 int32_t count = nodes.size();
1070 int32_t nodeCount = 1; // The sentinel node we already have
1071 BuildCompactTrieNode *node;
1072 int32_t i;
1073 UVector32 translate(count, status); // Should be no growth needed after this
1074 translate.push(0, status); // The sentinel node
1075
1076 if (U_FAILURE(status)) {
1077 return NULL;
1078 }
1079
1080 for (i = 1; i < count; ++i) {
1081 node = (BuildCompactTrieNode *)nodes[i];
1082 if (node->fNodeID == i) {
1083 // Only one node out of each duplicate set is used
1084 if (i >= translate.size()) {
1085 // Logically extend the mapping table
1086 translate.setSize(i+1);
1087 }
1088 translate.setElementAt(nodeCount++, i);
1089 totalSize += node->size();
1090 }
1091 }
1092
1093 // Check for overflowing 16 bits worth of nodes.
1094 if (nodeCount > 0x10000) {
1095 status = U_ILLEGAL_ARGUMENT_ERROR;
1096 return NULL;
1097 }
1098
1099 // Add enough room for the offsets.
1100 totalSize += nodeCount*sizeof(uint32_t);
1101 #ifdef DEBUG_TRIE_DICT
1102 (void) ::times(&timing);
1103 fprintf(stderr, "Sizes/mapping done, time user %f system %f\n",
1104 (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK,
1105 (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK);
1106 previous = timing;
1107 fprintf(stderr, "%d nodes, %d unique, %d bytes\n", nodes.size(), nodeCount, totalSize);
1108 #endif
1109 uint8_t *bytes = (uint8_t *)uprv_malloc(totalSize);
1110 if (bytes == NULL) {
1111 status = U_MEMORY_ALLOCATION_ERROR;
1112 return NULL;
1113 }
1114
1115 CompactTrieHeader *header = (CompactTrieHeader *)bytes;
1116 header->size = totalSize;
1117 header->nodeCount = nodeCount;
1118 header->offsets[0] = 0; // Sentinel
1119 header->root = translate.elementAti(root->fNodeID);
1120 #ifdef DEBUG_TRIE_DICT
1121 if (header->root == 0) {
1122 fprintf(stderr, "ERROR: root node %d translate to physical zero\n", root->fNodeID);
1123 }
1124 #endif
1125 uint32_t offset = offsetof(CompactTrieHeader,offsets)+(nodeCount*sizeof(uint32_t));
1126 nodeCount = 1;
1127 // Now write the data
1128 for (i = 1; i < count; ++i) {
1129 node = (BuildCompactTrieNode *)nodes[i];
1130 if (node->fNodeID == i) {
1131 header->offsets[nodeCount++] = offset;
1132 node->write(bytes, offset, translate);
1133 }
1134 }
1135 #ifdef DEBUG_TRIE_DICT
1136 (void) ::times(&timing);
1137 fprintf(stderr, "Trie built, time user %f system %f\n",
1138 (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK,
1139 (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK);
1140 previous = timing;
1141 fprintf(stderr, "Final offset is %d\n", offset);
1142
1143 // Collect statistics on node types and sizes
1144 int hCount = 0;
1145 int vCount = 0;
1146 size_t hSize = 0;
1147 size_t vSize = 0;
1148 size_t hItemCount = 0;
1149 size_t vItemCount = 0;
1150 uint32_t previousOff = offset;
1151 for (uint16_t nodeIdx = nodeCount-1; nodeIdx >= 2; --nodeIdx) {
1152 const CompactTrieNode *node = getCompactNode(header, nodeIdx);
1153 if (node->flagscount & kVerticalNode) {
1154 vCount += 1;
1155 vItemCount += (node->flagscount & kCountMask);
1156 vSize += previousOff-header->offsets[nodeIdx];
1157 }
1158 else {
1159 hCount += 1;
1160 hItemCount += (node->flagscount & kCountMask);
1161 hSize += previousOff-header->offsets[nodeIdx];
1162 }
1163 previousOff = header->offsets[nodeIdx];
1164 }
1165 fprintf(stderr, "Horizontal nodes: %d total, average %f bytes with %f items\n", hCount,
1166 (double)hSize/hCount, (double)hItemCount/hCount);
1167 fprintf(stderr, "Vertical nodes: %d total, average %f bytes with %f items\n", vCount,
1168 (double)vSize/vCount, (double)vItemCount/vCount);
1169 #endif
1170
1171 if (U_FAILURE(status)) {
1172 uprv_free(bytes);
1173 header = NULL;
1174 }
1175 else {
1176 header->magic = COMPACT_TRIE_MAGIC_1;
1177 }
1178 return header;
1179 }
1180
1181 // Forward declaration
1182 static TernaryNode *
1183 unpackOneNode( const CompactTrieHeader *header, const CompactTrieNode *node, UErrorCode &status );
1184
1185
1186 // Convert a horizontal node (or subarray thereof) into a ternary subtrie
1187 static TernaryNode *
1188 unpackHorizontalArray( const CompactTrieHeader *header, const CompactTrieHorizontalEntry *array,
1189 int low, int high, UErrorCode &status ) {
1190 if (U_FAILURE(status) || low > high) {
1191 return NULL;
1192 }
1193 int middle = (low+high)/2;
1194 TernaryNode *result = new TernaryNode(array[middle].ch);
1195 if (result == NULL) {
1196 status = U_MEMORY_ALLOCATION_ERROR;
1197 return NULL;
1198 }
1199 const CompactTrieNode *equal = getCompactNode(header, array[middle].equal);
1200 if (equal->flagscount & kParentEndsWord) {
1201 result->flags |= kEndsWord;
1202 }
1203 result->low = unpackHorizontalArray(header, array, low, middle-1, status);
1204 result->high = unpackHorizontalArray(header, array, middle+1, high, status);
1205 result->equal = unpackOneNode(header, equal, status);
1206 return result;
1207 }
1208
1209 // Convert one compact trie node into a ternary subtrie
1210 static TernaryNode *
1211 unpackOneNode( const CompactTrieHeader *header, const CompactTrieNode *node, UErrorCode &status ) {
1212 int nodeCount = (node->flagscount & kCountMask);
1213 if (nodeCount == 0 || U_FAILURE(status)) {
1214 // Failure, or terminal node
1215 return NULL;
1216 }
1217 if (node->flagscount & kVerticalNode) {
1218 const CompactTrieVerticalNode *vnode = (const CompactTrieVerticalNode *)node;
1219 TernaryNode *head = NULL;
1220 TernaryNode *previous = NULL;
1221 TernaryNode *latest = NULL;
1222 for (int i = 0; i < nodeCount; ++i) {
1223 latest = new TernaryNode(vnode->chars[i]);
1224 if (latest == NULL) {
1225 status = U_MEMORY_ALLOCATION_ERROR;
1226 break;
1227 }
1228 if (head == NULL) {
1229 head = latest;
1230 }
1231 if (previous != NULL) {
1232 previous->equal = latest;
1233 }
1234 previous = latest;
1235 }
1236 if (latest != NULL) {
1237 const CompactTrieNode *equal = getCompactNode(header, vnode->equal);
1238 if (equal->flagscount & kParentEndsWord) {
1239 latest->flags |= kEndsWord;
1240 }
1241 latest->equal = unpackOneNode(header, equal, status);
1242 }
1243 return head;
1244 }
1245 else {
1246 // Horizontal node
1247 const CompactTrieHorizontalNode *hnode = (const CompactTrieHorizontalNode *)node;
1248 return unpackHorizontalArray(header, &hnode->entries[0], 0, nodeCount-1, status);
1249 }
1250 }
1251
1252 MutableTrieDictionary *
1253 CompactTrieDictionary::cloneMutable( UErrorCode &status ) const {
1254 MutableTrieDictionary *result = new MutableTrieDictionary( status );
1255 if (result == NULL) {
1256 status = U_MEMORY_ALLOCATION_ERROR;
1257 return NULL;
1258 }
1259 TernaryNode *root = unpackOneNode(fData, getCompactNode(fData, fData->root), status);
1260 if (U_FAILURE(status)) {
1261 delete root; // Clean up
1262 delete result;
1263 return NULL;
1264 }
1265 result->fTrie = root;
1266 return result;
1267 }
1268
1269 U_NAMESPACE_END
1270
1271 U_CAPI int32_t U_EXPORT2
1272 triedict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData,
1273 UErrorCode *status) {
1274
1275 if (status == NULL || U_FAILURE(*status)) {
1276 return 0;
1277 }
1278 if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
1279 *status=U_ILLEGAL_ARGUMENT_ERROR;
1280 return 0;
1281 }
1282
1283 //
1284 // Check that the data header is for for dictionary data.
1285 // (Header contents are defined in genxxx.cpp)
1286 //
1287 const UDataInfo *pInfo = (const UDataInfo *)((const uint8_t *)inData+4);
1288 if(!( pInfo->dataFormat[0]==0x54 && /* dataFormat="TrDc" */
1289 pInfo->dataFormat[1]==0x72 &&
1290 pInfo->dataFormat[2]==0x44 &&
1291 pInfo->dataFormat[3]==0x63 &&
1292 pInfo->formatVersion[0]==1 )) {
1293 udata_printError(ds, "triedict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n",
1294 pInfo->dataFormat[0], pInfo->dataFormat[1],
1295 pInfo->dataFormat[2], pInfo->dataFormat[3],
1296 pInfo->formatVersion[0]);
1297 *status=U_UNSUPPORTED_ERROR;
1298 return 0;
1299 }
1300
1301 //
1302 // Swap the data header. (This is the generic ICU Data Header, not the
1303 // CompactTrieHeader). This swap also conveniently gets us
1304 // the size of the ICU d.h., which lets us locate the start
1305 // of the RBBI specific data.
1306 //
1307 int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status);
1308
1309 //
1310 // Get the CompactTrieHeader, and check that it appears to be OK.
1311 //
1312 const uint8_t *inBytes =(const uint8_t *)inData+headerSize;
1313 const CompactTrieHeader *header = (const CompactTrieHeader *)inBytes;
1314 if (ds->readUInt32(header->magic) != COMPACT_TRIE_MAGIC_1
1315 || ds->readUInt32(header->size) < sizeof(CompactTrieHeader))
1316 {
1317 udata_printError(ds, "triedict_swap(): CompactTrieHeader is invalid.\n");
1318 *status=U_UNSUPPORTED_ERROR;
1319 return 0;
1320 }
1321
1322 //
1323 // Prefight operation? Just return the size
1324 //
1325 uint32_t totalSize = ds->readUInt32(header->size);
1326 int32_t sizeWithUData = (int32_t)totalSize + headerSize;
1327 if (length < 0) {
1328 return sizeWithUData;
1329 }
1330
1331 //
1332 // Check that length passed in is consistent with length from RBBI data header.
1333 //
1334 if (length < sizeWithUData) {
1335 udata_printError(ds, "triedict_swap(): too few bytes (%d after ICU Data header) for trie data.\n",
1336 totalSize);
1337 *status=U_INDEX_OUTOFBOUNDS_ERROR;
1338 return 0;
1339 }
1340
1341 //
1342 // Swap the Data. Do the data itself first, then the CompactTrieHeader, because
1343 // we need to reference the header to locate the data, and an
1344 // inplace swap of the header leaves it unusable.
1345 //
1346 uint8_t *outBytes = (uint8_t *)outData + headerSize;
1347 CompactTrieHeader *outputHeader = (CompactTrieHeader *)outBytes;
1348
1349 #if 0
1350 //
1351 // If not swapping in place, zero out the output buffer before starting.
1352 //
1353 if (inBytes != outBytes) {
1354 uprv_memset(outBytes, 0, totalSize);
1355 }
1356
1357 // We need to loop through all the nodes in the offset table, and swap each one.
1358 uint16_t nodeCount = ds->readUInt16(header->nodeCount);
1359 // Skip node 0, which should always be 0.
1360 for (int i = 1; i < nodeCount; ++i) {
1361 uint32_t nodeOff = ds->readUInt32(header->offsets[i]);
1362 const CompactTrieNode *inNode = (const CompactTrieNode *)(inBytes + nodeOff);
1363 CompactTrieNode *outNode = (CompactTrieNode *)(outBytes + nodeOff);
1364 uint16_t flagscount = ds->readUInt16(inNode->flagscount);
1365 uint16_t itemCount = flagscount & kCountMask;
1366 ds->writeUInt16(&outNode->flagscount, flagscount);
1367 if (itemCount > 0) {
1368 if (flagscount & kVerticalNode) {
1369 ds->swapArray16(ds, inBytes+nodeOff+offsetof(CompactTrieVerticalNode,chars),
1370 itemCount*sizeof(uint16_t),
1371 outBytes+nodeOff+offsetof(CompactTrieVerticalNode,chars), status);
1372 uint16_t equal = ds->readUInt16(inBytes+nodeOff+offsetof(CompactTrieVerticalNode,equal);
1373 ds->writeUInt16(outBytes+nodeOff+offsetof(CompactTrieVerticalNode,equal));
1374 }
1375 else {
1376 const CompactTrieHorizontalNode *inHNode = (const CompactTrieHorizontalNode *)inNode;
1377 CompactTrieHorizontalNode *outHNode = (CompactTrieHorizontalNode *)outNode;
1378 for (int j = 0; j < itemCount; ++j) {
1379 uint16_t word = ds->readUInt16(inHNode->entries[j].ch);
1380 ds->writeUInt16(&outHNode->entries[j].ch, word);
1381 word = ds->readUInt16(inHNode->entries[j].equal);
1382 ds->writeUInt16(&outHNode->entries[j].equal, word);
1383 }
1384 }
1385 }
1386 }
1387 #endif
1388
1389 // All the data in all the nodes consist of 16 bit items. Swap them all at once.
1390 uint16_t nodeCount = ds->readUInt16(header->nodeCount);
1391 uint32_t nodesOff = offsetof(CompactTrieHeader,offsets)+((uint32_t)nodeCount*sizeof(uint32_t));
1392 ds->swapArray16(ds, inBytes+nodesOff, totalSize-nodesOff, outBytes+nodesOff, status);
1393
1394 // Swap the header
1395 ds->writeUInt32(&outputHeader->size, totalSize);
1396 uint32_t magic = ds->readUInt32(header->magic);
1397 ds->writeUInt32(&outputHeader->magic, magic);
1398 ds->writeUInt16(&outputHeader->nodeCount, nodeCount);
1399 uint16_t root = ds->readUInt16(header->root);
1400 ds->writeUInt16(&outputHeader->root, root);
1401 ds->swapArray32(ds, inBytes+offsetof(CompactTrieHeader,offsets),
1402 sizeof(uint32_t)*(int32_t)nodeCount,
1403 outBytes+offsetof(CompactTrieHeader,offsets), status);
1404
1405 return sizeWithUData;
1406 }
1407
1408 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */