]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/triedict.cpp
ICU-491.11.2.tar.gz
[apple/icu.git] / icuSources / common / triedict.cpp
1 /**
2 *******************************************************************************
3 * Copyright (C) 2006-2011, International Business Machines Corporation *
4 * and others. All Rights Reserved. *
5 *******************************************************************************
6 */
7
8 #include "unicode/utypes.h"
9
10 #if !UCONFIG_NO_BREAK_ITERATION
11
12 #include "triedict.h"
13 #include "unicode/chariter.h"
14 #include "unicode/uchriter.h"
15 #include "unicode/strenum.h"
16 #include "unicode/uenum.h"
17 #include "unicode/udata.h"
18 #include "cmemory.h"
19 #include "udataswp.h"
20 #include "uvector.h"
21 #include "uvectr32.h"
22 #include "uarrsort.h"
23
24 //#define DEBUG_TRIE_DICT 1
25
26 #ifdef DEBUG_TRIE_DICT
27 #include <sys/times.h>
28 #include <limits.h>
29 #include <stdio.h>
30 #endif
31
32 U_NAMESPACE_BEGIN
33
34 /*******************************************************************
35 * TrieWordDictionary
36 */
37
38 TrieWordDictionary::TrieWordDictionary() {
39 }
40
41 TrieWordDictionary::~TrieWordDictionary() {
42 }
43
44 /*******************************************************************
45 * MutableTrieDictionary
46 */
47
48 // Node structure for the ternary, uncompressed trie
49 struct TernaryNode : public UMemory {
50 UChar ch; // UTF-16 code unit
51 uint16_t flags; // Flag word
52 TernaryNode *low; // Less-than link
53 TernaryNode *equal; // Equal link
54 TernaryNode *high; // Greater-than link
55
56 TernaryNode(UChar uc);
57 ~TernaryNode();
58 };
59
60 enum MutableTrieNodeFlags {
61 kEndsWord = 0x0001 // This node marks the end of a valid word
62 };
63
64 inline
65 TernaryNode::TernaryNode(UChar uc) {
66 ch = uc;
67 flags = 0;
68 low = NULL;
69 equal = NULL;
70 high = NULL;
71 }
72
73 // Not inline since it's recursive
74 TernaryNode::~TernaryNode() {
75 delete low;
76 delete equal;
77 delete high;
78 }
79
80 MutableTrieDictionary::MutableTrieDictionary( UChar median, UErrorCode &status ) {
81 // Start the trie off with something. Having the root node already present
82 // cuts a special case out of the search/insertion functions.
83 // Making it a median character cuts the worse case for searches from
84 // 4x a balanced trie to 2x a balanced trie. It's best to choose something
85 // that starts a word that is midway in the list.
86 fTrie = new TernaryNode(median);
87 if (fTrie == NULL) {
88 status = U_MEMORY_ALLOCATION_ERROR;
89 }
90 fIter = utext_openUChars(NULL, NULL, 0, &status);
91 if (U_SUCCESS(status) && fIter == NULL) {
92 status = U_MEMORY_ALLOCATION_ERROR;
93 }
94 }
95
96 MutableTrieDictionary::MutableTrieDictionary( UErrorCode &status ) {
97 fTrie = NULL;
98 fIter = utext_openUChars(NULL, NULL, 0, &status);
99 if (U_SUCCESS(status) && fIter == NULL) {
100 status = U_MEMORY_ALLOCATION_ERROR;
101 }
102 }
103
104 MutableTrieDictionary::~MutableTrieDictionary() {
105 delete fTrie;
106 utext_close(fIter);
107 }
108
109 int32_t
110 MutableTrieDictionary::search( UText *text,
111 int32_t maxLength,
112 int32_t *lengths,
113 int &count,
114 int limit,
115 TernaryNode *&parent,
116 UBool &pMatched ) const {
117 // TODO: current implementation works in UTF-16 space
118 const TernaryNode *up = NULL;
119 const TernaryNode *p = fTrie;
120 int mycount = 0;
121 pMatched = TRUE;
122 int i;
123
124 UChar uc = utext_current32(text);
125 for (i = 0; i < maxLength && p != NULL; ++i) {
126 while (p != NULL) {
127 if (uc < p->ch) {
128 up = p;
129 p = p->low;
130 }
131 else if (uc == p->ch) {
132 break;
133 }
134 else {
135 up = p;
136 p = p->high;
137 }
138 }
139 if (p == NULL) {
140 pMatched = FALSE;
141 break;
142 }
143 // Must be equal to get here
144 if (limit > 0 && (p->flags & kEndsWord)) {
145 lengths[mycount++] = i+1;
146 --limit;
147 }
148 up = p;
149 p = p->equal;
150 uc = utext_next32(text);
151 uc = utext_current32(text);
152 }
153
154 // Note that there is no way to reach here with up == 0 unless
155 // maxLength is 0 coming in.
156 parent = (TernaryNode *)up;
157 count = mycount;
158 return i;
159 }
160
161 void
162 MutableTrieDictionary::addWord( const UChar *word,
163 int32_t length,
164 UErrorCode &status ) {
165 #if 0
166 if (length <= 0) {
167 status = U_ILLEGAL_ARGUMENT_ERROR;
168 return;
169 }
170 #endif
171 TernaryNode *parent;
172 UBool pMatched;
173 int count;
174 fIter = utext_openUChars(fIter, word, length, &status);
175
176 int matched;
177 matched = search(fIter, length, NULL, count, 0, parent, pMatched);
178
179 while (matched++ < length) {
180 UChar32 uc = utext_next32(fIter); // TODO: supplemetary support?
181 U_ASSERT(uc != U_SENTINEL);
182 TernaryNode *newNode = new TernaryNode(uc);
183 if (newNode == NULL) {
184 status = U_MEMORY_ALLOCATION_ERROR;
185 return;
186 }
187 if (pMatched) {
188 parent->equal = newNode;
189 }
190 else {
191 pMatched = TRUE;
192 if (uc < parent->ch) {
193 parent->low = newNode;
194 }
195 else {
196 parent->high = newNode;
197 }
198 }
199 parent = newNode;
200 }
201
202 parent->flags |= kEndsWord;
203 }
204
205 #if 0
206 void
207 MutableTrieDictionary::addWords( UEnumeration *words,
208 UErrorCode &status ) {
209 int32_t length;
210 const UChar *word;
211 while ((word = uenum_unext(words, &length, &status)) && U_SUCCESS(status)) {
212 addWord(word, length, status);
213 }
214 }
215 #endif
216
217 int32_t
218 MutableTrieDictionary::matches( UText *text,
219 int32_t maxLength,
220 int32_t *lengths,
221 int &count,
222 int limit ) const {
223 TernaryNode *parent;
224 UBool pMatched;
225 return search(text, maxLength, lengths, count, limit, parent, pMatched);
226 }
227
228 // Implementation of iteration for MutableTrieDictionary
229 class MutableTrieEnumeration : public StringEnumeration {
230 private:
231 UStack fNodeStack; // Stack of nodes to process
232 UVector32 fBranchStack; // Stack of which branch we are working on
233 TernaryNode *fRoot; // Root node
234 enum StackBranch {
235 kLessThan,
236 kEqual,
237 kGreaterThan,
238 kDone
239 };
240
241 public:
242 static UClassID U_EXPORT2 getStaticClassID(void);
243 virtual UClassID getDynamicClassID(void) const;
244 public:
245 MutableTrieEnumeration(TernaryNode *root, UErrorCode &status)
246 : fNodeStack(status), fBranchStack(status) {
247 fRoot = root;
248 fNodeStack.push(root, status);
249 fBranchStack.push(kLessThan, status);
250 unistr.remove();
251 }
252
253 virtual ~MutableTrieEnumeration();
254
255 virtual StringEnumeration *clone() const {
256 UErrorCode status = U_ZERO_ERROR;
257 return new MutableTrieEnumeration(fRoot, status);
258 }
259
260 virtual const UnicodeString *snext(UErrorCode &status) {
261 if (fNodeStack.empty() || U_FAILURE(status)) {
262 return NULL;
263 }
264 TernaryNode *node = (TernaryNode *) fNodeStack.peek();
265 StackBranch where = (StackBranch) fBranchStack.peeki();
266 while (!fNodeStack.empty() && U_SUCCESS(status)) {
267 UBool emit;
268 UBool equal;
269
270 switch (where) {
271 case kLessThan:
272 if (node->low != NULL) {
273 fBranchStack.setElementAt(kEqual, fBranchStack.size()-1);
274 node = (TernaryNode *) fNodeStack.push(node->low, status);
275 where = (StackBranch) fBranchStack.push(kLessThan, status);
276 break;
277 }
278 case kEqual: /*fall through*/
279 emit = (node->flags & kEndsWord) != 0;
280 equal = (node->equal != NULL);
281 // If this node should be part of the next emitted string, append
282 // the UChar to the string, and make sure we pop it when we come
283 // back to this node. The character should only be in the string
284 // for as long as we're traversing the equal subtree of this node
285 if (equal || emit) {
286 unistr.append(node->ch);
287 fBranchStack.setElementAt(kGreaterThan, fBranchStack.size()-1);
288 }
289 if (equal) {
290 node = (TernaryNode *) fNodeStack.push(node->equal, status);
291 where = (StackBranch) fBranchStack.push(kLessThan, status);
292 }
293 if (emit) {
294 return &unistr;
295 }
296 if (equal) {
297 break;
298 }
299 case kGreaterThan: /*fall through*/
300 // If this node's character is in the string, remove it.
301 if (node->equal != NULL || (node->flags & kEndsWord)) {
302 unistr.truncate(unistr.length()-1);
303 }
304 if (node->high != NULL) {
305 fBranchStack.setElementAt(kDone, fBranchStack.size()-1);
306 node = (TernaryNode *) fNodeStack.push(node->high, status);
307 where = (StackBranch) fBranchStack.push(kLessThan, status);
308 break;
309 }
310 case kDone: /*fall through*/
311 fNodeStack.pop();
312 fBranchStack.popi();
313 node = (TernaryNode *) fNodeStack.peek();
314 where = (StackBranch) fBranchStack.peeki();
315 break;
316 default:
317 return NULL;
318 }
319 }
320 return NULL;
321 }
322
323 // Very expensive, but this should never be used.
324 virtual int32_t count(UErrorCode &status) const {
325 MutableTrieEnumeration counter(fRoot, status);
326 int32_t result = 0;
327 while (counter.snext(status) != NULL && U_SUCCESS(status)) {
328 ++result;
329 }
330 return result;
331 }
332
333 virtual void reset(UErrorCode &status) {
334 fNodeStack.removeAllElements();
335 fBranchStack.removeAllElements();
336 fNodeStack.push(fRoot, status);
337 fBranchStack.push(kLessThan, status);
338 unistr.remove();
339 }
340 };
341
342 MutableTrieEnumeration::~MutableTrieEnumeration() {}
343
344 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MutableTrieEnumeration)
345
346 StringEnumeration *
347 MutableTrieDictionary::openWords( UErrorCode &status ) const {
348 if (U_FAILURE(status)) {
349 return NULL;
350 }
351 return new MutableTrieEnumeration(fTrie, status);
352 }
353
354 /*******************************************************************
355 * CompactTrieDictionary
356 */
357
358 struct CompactTrieHeader {
359 uint32_t size; // Size of the data in bytes
360 uint32_t magic; // Magic number (including version)
361 uint16_t nodeCount; // Number of entries in offsets[]
362 uint16_t root; // Node number of the root node
363 uint32_t offsets[1]; // Offsets to nodes from start of data
364 };
365
366 // Note that to avoid platform-specific alignment issues, all members of the node
367 // structures should be the same size, or should contain explicit padding to
368 // natural alignment boundaries.
369
370 // We can't use a bitfield for the flags+count field, because the layout of those
371 // is not portable. 12 bits of count allows for up to 4096 entries in a node.
372 struct CompactTrieNode {
373 uint16_t flagscount; // Count of sub-entries, plus flags
374 };
375
376 enum CompactTrieNodeFlags {
377 kVerticalNode = 0x1000, // This is a vertical node
378 kParentEndsWord = 0x2000, // The node whose equal link points to this ends a word
379 kReservedFlag1 = 0x4000,
380 kReservedFlag2 = 0x8000,
381 kCountMask = 0x0FFF, // The count portion of flagscount
382 kFlagMask = 0xF000 // The flags portion of flagscount
383 };
384
385 // The two node types are distinguished by the kVerticalNode flag.
386
387 struct CompactTrieHorizontalEntry {
388 uint16_t ch; // UChar
389 uint16_t equal; // Equal link node index
390 };
391
392 // We don't use inheritance here because C++ does not guarantee that the
393 // base class comes first in memory!!
394
395 struct CompactTrieHorizontalNode {
396 uint16_t flagscount; // Count of sub-entries, plus flags
397 CompactTrieHorizontalEntry entries[1];
398 };
399
400 struct CompactTrieVerticalNode {
401 uint16_t flagscount; // Count of sub-entries, plus flags
402 uint16_t equal; // Equal link node index
403 uint16_t chars[1]; // Code units
404 };
405
406 // {'Dic', 1}, version 1
407 #define COMPACT_TRIE_MAGIC_1 0x44696301
408
409 CompactTrieDictionary::CompactTrieDictionary(UDataMemory *dataObj,
410 UErrorCode &status )
411 : fUData(dataObj)
412 {
413 fData = (const CompactTrieHeader *) udata_getMemory(dataObj);
414 fOwnData = FALSE;
415 if (fData->magic != COMPACT_TRIE_MAGIC_1) {
416 status = U_ILLEGAL_ARGUMENT_ERROR;
417 fData = NULL;
418 }
419 }
420 CompactTrieDictionary::CompactTrieDictionary( const void *data,
421 UErrorCode &status )
422 : fUData(NULL)
423 {
424 fData = (const CompactTrieHeader *) data;
425 fOwnData = FALSE;
426 if (fData->magic != COMPACT_TRIE_MAGIC_1) {
427 status = U_ILLEGAL_ARGUMENT_ERROR;
428 fData = NULL;
429 }
430 }
431
432 CompactTrieDictionary::CompactTrieDictionary( const MutableTrieDictionary &dict,
433 UErrorCode &status )
434 : fUData(NULL)
435 {
436 fData = compactMutableTrieDictionary(dict, status);
437 fOwnData = !U_FAILURE(status);
438 }
439
440 CompactTrieDictionary::~CompactTrieDictionary() {
441 if (fOwnData) {
442 uprv_free((void *)fData);
443 }
444 if (fUData) {
445 udata_close(fUData);
446 }
447 }
448
449 uint32_t
450 CompactTrieDictionary::dataSize() const {
451 return fData->size;
452 }
453
454 const void *
455 CompactTrieDictionary::data() const {
456 return fData;
457 }
458
459 // This function finds the address of a node for us, given its node ID
460 static inline const CompactTrieNode *
461 getCompactNode(const CompactTrieHeader *header, uint16_t node) {
462 return (const CompactTrieNode *)((const uint8_t *)header + header->offsets[node]);
463 }
464
465 int32_t
466 CompactTrieDictionary::matches( UText *text,
467 int32_t maxLength,
468 int32_t *lengths,
469 int &count,
470 int limit ) const {
471 // TODO: current implementation works in UTF-16 space
472 const CompactTrieNode *node = getCompactNode(fData, fData->root);
473 int mycount = 0;
474
475 UChar uc = utext_current32(text);
476 int i = 0;
477
478 while (node != NULL) {
479 // Check if the node we just exited ends a word
480 if (limit > 0 && (node->flagscount & kParentEndsWord)) {
481 lengths[mycount++] = i;
482 --limit;
483 }
484 // Check that we haven't exceeded the maximum number of input characters.
485 // We have to do that here rather than in the while condition so that
486 // we can check for ending a word, above.
487 if (i >= maxLength) {
488 break;
489 }
490
491 int nodeCount = (node->flagscount & kCountMask);
492 if (nodeCount == 0) {
493 // Special terminal node; return now
494 break;
495 }
496 if (node->flagscount & kVerticalNode) {
497 // Vertical node; check all the characters in it
498 const CompactTrieVerticalNode *vnode = (const CompactTrieVerticalNode *)node;
499 for (int j = 0; j < nodeCount && i < maxLength; ++j) {
500 if (uc != vnode->chars[j]) {
501 // We hit a non-equal character; return
502 goto exit;
503 }
504 utext_next32(text);
505 uc = utext_current32(text);
506 ++i;
507 }
508 // To get here we must have come through the whole list successfully;
509 // go on to the next node. Note that a word cannot end in the middle
510 // of a vertical node.
511 node = getCompactNode(fData, vnode->equal);
512 }
513 else {
514 // Horizontal node; do binary search
515 const CompactTrieHorizontalNode *hnode = (const CompactTrieHorizontalNode *)node;
516 int low = 0;
517 int high = nodeCount-1;
518 int middle;
519 node = NULL; // If we don't find a match, we'll fall out of the loop
520 while (high >= low) {
521 middle = (high+low)/2;
522 if (uc == hnode->entries[middle].ch) {
523 // We hit a match; get the next node and next character
524 node = getCompactNode(fData, hnode->entries[middle].equal);
525 utext_next32(text);
526 uc = utext_current32(text);
527 ++i;
528 break;
529 }
530 else if (uc < hnode->entries[middle].ch) {
531 high = middle-1;
532 }
533 else {
534 low = middle+1;
535 }
536 }
537 }
538 }
539 exit:
540 count = mycount;
541 return i;
542 }
543
544 // Implementation of iteration for CompactTrieDictionary
545 class CompactTrieEnumeration : public StringEnumeration {
546 private:
547 UVector32 fNodeStack; // Stack of nodes to process
548 UVector32 fIndexStack; // Stack of where in node we are
549 const CompactTrieHeader *fHeader; // Trie data
550
551 public:
552 static UClassID U_EXPORT2 getStaticClassID(void);
553 virtual UClassID getDynamicClassID(void) const;
554 public:
555 CompactTrieEnumeration(const CompactTrieHeader *header, UErrorCode &status)
556 : fNodeStack(status), fIndexStack(status) {
557 fHeader = header;
558 fNodeStack.push(header->root, status);
559 fIndexStack.push(0, status);
560 unistr.remove();
561 }
562
563 virtual ~CompactTrieEnumeration();
564
565 virtual StringEnumeration *clone() const {
566 UErrorCode status = U_ZERO_ERROR;
567 return new CompactTrieEnumeration(fHeader, status);
568 }
569
570 virtual const UnicodeString * snext(UErrorCode &status);
571
572 // Very expensive, but this should never be used.
573 virtual int32_t count(UErrorCode &status) const {
574 CompactTrieEnumeration counter(fHeader, status);
575 int32_t result = 0;
576 while (counter.snext(status) != NULL && U_SUCCESS(status)) {
577 ++result;
578 }
579 return result;
580 }
581
582 virtual void reset(UErrorCode &status) {
583 fNodeStack.removeAllElements();
584 fIndexStack.removeAllElements();
585 fNodeStack.push(fHeader->root, status);
586 fIndexStack.push(0, status);
587 unistr.remove();
588 }
589 };
590
591 CompactTrieEnumeration::~CompactTrieEnumeration() {}
592
593 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompactTrieEnumeration)
594
595 const UnicodeString *
596 CompactTrieEnumeration::snext(UErrorCode &status) {
597 if (fNodeStack.empty() || U_FAILURE(status)) {
598 return NULL;
599 }
600 const CompactTrieNode *node = getCompactNode(fHeader, fNodeStack.peeki());
601 int where = fIndexStack.peeki();
602 while (!fNodeStack.empty() && U_SUCCESS(status)) {
603 int nodeCount = (node->flagscount & kCountMask);
604 UBool goingDown = FALSE;
605 if (nodeCount == 0) {
606 // Terminal node; go up immediately
607 fNodeStack.popi();
608 fIndexStack.popi();
609 node = getCompactNode(fHeader, fNodeStack.peeki());
610 where = fIndexStack.peeki();
611 }
612 else if (node->flagscount & kVerticalNode) {
613 // Vertical node
614 const CompactTrieVerticalNode *vnode = (const CompactTrieVerticalNode *)node;
615 if (where == 0) {
616 // Going down
617 unistr.append((const UChar *)vnode->chars, (int32_t) nodeCount);
618 fIndexStack.setElementAt(1, fIndexStack.size()-1);
619 node = getCompactNode(fHeader, fNodeStack.push(vnode->equal, status));
620 where = fIndexStack.push(0, status);
621 goingDown = TRUE;
622 }
623 else {
624 // Going up
625 unistr.truncate(unistr.length()-nodeCount);
626 fNodeStack.popi();
627 fIndexStack.popi();
628 node = getCompactNode(fHeader, fNodeStack.peeki());
629 where = fIndexStack.peeki();
630 }
631 }
632 else {
633 // Horizontal node
634 const CompactTrieHorizontalNode *hnode = (const CompactTrieHorizontalNode *)node;
635 if (where > 0) {
636 // Pop previous char
637 unistr.truncate(unistr.length()-1);
638 }
639 if (where < nodeCount) {
640 // Push on next node
641 unistr.append((UChar)hnode->entries[where].ch);
642 fIndexStack.setElementAt(where+1, fIndexStack.size()-1);
643 node = getCompactNode(fHeader, fNodeStack.push(hnode->entries[where].equal, status));
644 where = fIndexStack.push(0, status);
645 goingDown = TRUE;
646 }
647 else {
648 // Going up
649 fNodeStack.popi();
650 fIndexStack.popi();
651 node = getCompactNode(fHeader, fNodeStack.peeki());
652 where = fIndexStack.peeki();
653 }
654 }
655 // Check if the parent of the node we've just gone down to ends a
656 // word. If so, return it.
657 if (goingDown && (node->flagscount & kParentEndsWord)) {
658 return &unistr;
659 }
660 }
661 return NULL;
662 }
663
664 StringEnumeration *
665 CompactTrieDictionary::openWords( UErrorCode &status ) const {
666 if (U_FAILURE(status)) {
667 return NULL;
668 }
669 return new CompactTrieEnumeration(fData, status);
670 }
671
672 //
673 // Below here is all code related to converting a ternary trie to a compact trie
674 // and back again
675 //
676
677 // Helper classes to construct the compact trie
678 class BuildCompactTrieNode: public UMemory {
679 public:
680 UBool fParentEndsWord;
681 UBool fVertical;
682 UBool fHasDuplicate;
683 int32_t fNodeID;
684 UnicodeString fChars;
685
686 public:
687 BuildCompactTrieNode(UBool parentEndsWord, UBool vertical, UStack &nodes, UErrorCode &status) {
688 fParentEndsWord = parentEndsWord;
689 fHasDuplicate = FALSE;
690 fVertical = vertical;
691 fNodeID = nodes.size();
692 nodes.push(this, status);
693 }
694
695 virtual ~BuildCompactTrieNode();
696
697 virtual uint32_t size() {
698 return sizeof(uint16_t);
699 }
700
701 virtual void write(uint8_t *bytes, uint32_t &offset, const UVector32 &/*translate*/) {
702 // Write flag/count
703 *((uint16_t *)(bytes+offset)) = (fChars.length() & kCountMask)
704 | (fVertical ? kVerticalNode : 0) | (fParentEndsWord ? kParentEndsWord : 0 );
705 offset += sizeof(uint16_t);
706 }
707 };
708
709 BuildCompactTrieNode::~BuildCompactTrieNode() {}
710
711 class BuildCompactTrieHorizontalNode: public BuildCompactTrieNode {
712 public:
713 UStack fLinks;
714
715 public:
716 BuildCompactTrieHorizontalNode(UBool parentEndsWord, UStack &nodes, UErrorCode &status)
717 : BuildCompactTrieNode(parentEndsWord, FALSE, nodes, status), fLinks(status) {
718 }
719
720 virtual ~BuildCompactTrieHorizontalNode();
721
722 virtual uint32_t size() {
723 return offsetof(CompactTrieHorizontalNode,entries) +
724 (fChars.length()*sizeof(CompactTrieHorizontalEntry));
725 }
726
727 virtual void write(uint8_t *bytes, uint32_t &offset, const UVector32 &translate) {
728 BuildCompactTrieNode::write(bytes, offset, translate);
729 int32_t count = fChars.length();
730 for (int32_t i = 0; i < count; ++i) {
731 CompactTrieHorizontalEntry *entry = (CompactTrieHorizontalEntry *)(bytes+offset);
732 entry->ch = fChars[i];
733 entry->equal = translate.elementAti(((BuildCompactTrieNode *)fLinks[i])->fNodeID);
734 #ifdef DEBUG_TRIE_DICT
735 if (entry->equal == 0) {
736 fprintf(stderr, "ERROR: horizontal link %d, logical node %d maps to physical node zero\n",
737 i, ((BuildCompactTrieNode *)fLinks[i])->fNodeID);
738 }
739 #endif
740 offset += sizeof(CompactTrieHorizontalEntry);
741 }
742 }
743
744 void addNode(UChar ch, BuildCompactTrieNode *link, UErrorCode &status) {
745 fChars.append(ch);
746 fLinks.push(link, status);
747 }
748 };
749
750 BuildCompactTrieHorizontalNode::~BuildCompactTrieHorizontalNode() {}
751
752 class BuildCompactTrieVerticalNode: public BuildCompactTrieNode {
753 public:
754 BuildCompactTrieNode *fEqual;
755
756 public:
757 BuildCompactTrieVerticalNode(UBool parentEndsWord, UStack &nodes, UErrorCode &status)
758 : BuildCompactTrieNode(parentEndsWord, TRUE, nodes, status) {
759 fEqual = NULL;
760 }
761
762 virtual ~BuildCompactTrieVerticalNode();
763
764 virtual uint32_t size() {
765 return offsetof(CompactTrieVerticalNode,chars) + (fChars.length()*sizeof(uint16_t));
766 }
767
768 virtual void write(uint8_t *bytes, uint32_t &offset, const UVector32 &translate) {
769 CompactTrieVerticalNode *node = (CompactTrieVerticalNode *)(bytes+offset);
770 BuildCompactTrieNode::write(bytes, offset, translate);
771 node->equal = translate.elementAti(fEqual->fNodeID);
772 offset += sizeof(node->equal);
773 #ifdef DEBUG_TRIE_DICT
774 if (node->equal == 0) {
775 fprintf(stderr, "ERROR: vertical link, logical node %d maps to physical node zero\n",
776 fEqual->fNodeID);
777 }
778 #endif
779 fChars.extract(0, fChars.length(), (UChar *)node->chars);
780 offset += sizeof(uint16_t)*fChars.length();
781 }
782
783 void addChar(UChar ch) {
784 fChars.append(ch);
785 }
786
787 void setLink(BuildCompactTrieNode *node) {
788 fEqual = node;
789 }
790 };
791
792 BuildCompactTrieVerticalNode::~BuildCompactTrieVerticalNode() {}
793
794 // Forward declaration
795 static void walkHorizontal(const TernaryNode *node,
796 BuildCompactTrieHorizontalNode *building,
797 UStack &nodes,
798 UErrorCode &status);
799
800 // Convert one node. Uses recursion.
801
802 static BuildCompactTrieNode *
803 compactOneNode(const TernaryNode *node, UBool parentEndsWord, UStack &nodes, UErrorCode &status) {
804 if (U_FAILURE(status)) {
805 return NULL;
806 }
807 BuildCompactTrieNode *result = NULL;
808 UBool horizontal = (node->low != NULL || node->high != NULL);
809 if (horizontal) {
810 BuildCompactTrieHorizontalNode *hResult =
811 new BuildCompactTrieHorizontalNode(parentEndsWord, nodes, status);
812 if (hResult == NULL) {
813 status = U_MEMORY_ALLOCATION_ERROR;
814 return NULL;
815 }
816 if (U_SUCCESS(status)) {
817 walkHorizontal(node, hResult, nodes, status);
818 result = hResult;
819 }
820 }
821 else {
822 BuildCompactTrieVerticalNode *vResult =
823 new BuildCompactTrieVerticalNode(parentEndsWord, nodes, status);
824 if (vResult == NULL) {
825 status = U_MEMORY_ALLOCATION_ERROR;
826 }
827 else if (U_SUCCESS(status)) {
828 UBool endsWord = FALSE;
829 // Take up nodes until we end a word, or hit a node with < or > links
830 do {
831 vResult->addChar(node->ch);
832 endsWord = (node->flags & kEndsWord) != 0;
833 node = node->equal;
834 }
835 while(node != NULL && !endsWord && node->low == NULL && node->high == NULL);
836 if (node == NULL) {
837 if (!endsWord) {
838 status = U_ILLEGAL_ARGUMENT_ERROR; // Corrupt input trie
839 }
840 else {
841 vResult->setLink((BuildCompactTrieNode *)nodes[1]);
842 }
843 }
844 else {
845 vResult->setLink(compactOneNode(node, endsWord, nodes, status));
846 }
847 result = vResult;
848 }
849 }
850 return result;
851 }
852
853 // Walk the set of peers at the same level, to build a horizontal node.
854 // Uses recursion.
855
856 static void walkHorizontal(const TernaryNode *node,
857 BuildCompactTrieHorizontalNode *building,
858 UStack &nodes,
859 UErrorCode &status) {
860 while (U_SUCCESS(status) && node != NULL) {
861 if (node->low != NULL) {
862 walkHorizontal(node->low, building, nodes, status);
863 }
864 BuildCompactTrieNode *link = NULL;
865 if (node->equal != NULL) {
866 link = compactOneNode(node->equal, (node->flags & kEndsWord) != 0, nodes, status);
867 }
868 else if (node->flags & kEndsWord) {
869 link = (BuildCompactTrieNode *)nodes[1];
870 }
871 if (U_SUCCESS(status) && link != NULL) {
872 building->addNode(node->ch, link, status);
873 }
874 // Tail recurse manually instead of leaving it to the compiler.
875 //if (node->high != NULL) {
876 // walkHorizontal(node->high, building, nodes, status);
877 //}
878 node = node->high;
879 }
880 }
881
882 U_NAMESPACE_END
883 U_NAMESPACE_USE
884 U_CDECL_BEGIN
885 static int32_t U_CALLCONV
886 _sortBuildNodes(const void * /*context*/, const void *voidl, const void *voidr) {
887 BuildCompactTrieNode *left = *(BuildCompactTrieNode **)voidl;
888 BuildCompactTrieNode *right = *(BuildCompactTrieNode **)voidr;
889 // Check for comparing a node to itself, to avoid spurious duplicates
890 if (left == right) {
891 return 0;
892 }
893 // Most significant is type of node. Can never coalesce.
894 if (left->fVertical != right->fVertical) {
895 return left->fVertical - right->fVertical;
896 }
897 // Next, the "parent ends word" flag. If that differs, we cannot coalesce.
898 if (left->fParentEndsWord != right->fParentEndsWord) {
899 return left->fParentEndsWord - right->fParentEndsWord;
900 }
901 // Next, the string. If that differs, we can never coalesce.
902 int32_t result = left->fChars.compare(right->fChars);
903 if (result != 0) {
904 return result;
905 }
906 // We know they're both the same node type, so branch for the two cases.
907 if (left->fVertical) {
908 result = ((BuildCompactTrieVerticalNode *)left)->fEqual->fNodeID
909 - ((BuildCompactTrieVerticalNode *)right)->fEqual->fNodeID;
910 }
911 else {
912 // We need to compare the links vectors. They should be the
913 // same size because the strings were equal.
914 // We compare the node IDs instead of the pointers, to handle
915 // coalesced nodes.
916 BuildCompactTrieHorizontalNode *hleft, *hright;
917 hleft = (BuildCompactTrieHorizontalNode *)left;
918 hright = (BuildCompactTrieHorizontalNode *)right;
919 int32_t count = hleft->fLinks.size();
920 for (int32_t i = 0; i < count && result == 0; ++i) {
921 result = ((BuildCompactTrieNode *)(hleft->fLinks[i]))->fNodeID -
922 ((BuildCompactTrieNode *)(hright->fLinks[i]))->fNodeID;
923 }
924 }
925 // If they are equal to each other, mark them (speeds coalescing)
926 if (result == 0) {
927 left->fHasDuplicate = TRUE;
928 right->fHasDuplicate = TRUE;
929 }
930 return result;
931 }
932 U_CDECL_END
933 U_NAMESPACE_BEGIN
934
935 static void coalesceDuplicates(UStack &nodes, UErrorCode &status) {
936 // We sort the array of nodes to place duplicates next to each other
937 if (U_FAILURE(status)) {
938 return;
939 }
940 int32_t size = nodes.size();
941 void **array = (void **)uprv_malloc(sizeof(void *)*size);
942 if (array == NULL) {
943 status = U_MEMORY_ALLOCATION_ERROR;
944 return;
945 }
946 (void) nodes.toArray(array);
947
948 // Now repeatedly identify duplicates until there are no more
949 int32_t dupes = 0;
950 long passCount = 0;
951 #ifdef DEBUG_TRIE_DICT
952 long totalDupes = 0;
953 #endif
954 do {
955 BuildCompactTrieNode *node;
956 BuildCompactTrieNode *first = NULL;
957 BuildCompactTrieNode **p;
958 BuildCompactTrieNode **pFirst = NULL;
959 int32_t counter = size - 2;
960 // Sort the array, skipping nodes 0 and 1. Use quicksort for the first
961 // pass for speed. For the second and subsequent passes, we use stable
962 // (insertion) sort for two reasons:
963 // 1. The array is already mostly ordered, so we get better performance.
964 // 2. The way we find one and only one instance of a set of duplicates is to
965 // check that the node ID equals the array index. If we used an unstable
966 // sort for the second or later passes, it's possible that none of the
967 // duplicates would wind up with a node ID equal to its array index.
968 // The sort stability guarantees that, because as we coalesce more and
969 // more groups, the first element of the resultant group will be one of
970 // the first elements of the groups being coalesced.
971 // To use quicksort for the second and subsequent passes, we would have to
972 // find the minimum of the node numbers in a group, and set all the nodes
973 // in the group to that node number.
974 uprv_sortArray(array+2, counter, sizeof(void *), _sortBuildNodes, NULL, (passCount > 0), &status);
975 dupes = 0;
976 for (p = (BuildCompactTrieNode **)array + 2; counter > 0; --counter, ++p) {
977 node = *p;
978 if (node->fHasDuplicate) {
979 if (first == NULL) {
980 first = node;
981 pFirst = p;
982 }
983 else if (_sortBuildNodes(NULL, pFirst, p) != 0) {
984 // Starting a new run of dupes
985 first = node;
986 pFirst = p;
987 }
988 else if (node->fNodeID != first->fNodeID) {
989 // Slave one to the other, note duplicate
990 node->fNodeID = first->fNodeID;
991 dupes += 1;
992 }
993 }
994 else {
995 // This node has no dupes
996 first = NULL;
997 pFirst = NULL;
998 }
999 }
1000 passCount += 1;
1001 #ifdef DEBUG_TRIE_DICT
1002 totalDupes += dupes;
1003 fprintf(stderr, "Trie node dupe removal, pass %d: %d nodes tagged\n", passCount, dupes);
1004 #endif
1005 }
1006 while (dupes > 0);
1007 #ifdef DEBUG_TRIE_DICT
1008 fprintf(stderr, "Trie node dupe removal complete: %d tagged in %d passes\n", totalDupes, passCount);
1009 #endif
1010
1011 // We no longer need the temporary array, as the nodes have all been marked appropriately.
1012 uprv_free(array);
1013 }
1014
1015 U_NAMESPACE_END
1016 U_CDECL_BEGIN
1017 static void U_CALLCONV _deleteBuildNode(void *obj) {
1018 delete (BuildCompactTrieNode *) obj;
1019 }
1020 U_CDECL_END
1021 U_NAMESPACE_BEGIN
1022
1023 CompactTrieHeader *
1024 CompactTrieDictionary::compactMutableTrieDictionary( const MutableTrieDictionary &dict,
1025 UErrorCode &status ) {
1026 if (U_FAILURE(status)) {
1027 return NULL;
1028 }
1029 #ifdef DEBUG_TRIE_DICT
1030 struct tms timing;
1031 struct tms previous;
1032 (void) ::times(&previous);
1033 #endif
1034 UStack nodes(_deleteBuildNode, NULL, status); // Index of nodes
1035
1036 // Add node 0, used as the NULL pointer/sentinel.
1037 nodes.addElement((int32_t)0, status);
1038
1039 // Start by creating the special empty node we use to indicate that the parent
1040 // terminates a word. This must be node 1, because the builder assumes
1041 // that.
1042 if (U_FAILURE(status)) {
1043 return NULL;
1044 }
1045 BuildCompactTrieNode *terminal = new BuildCompactTrieNode(TRUE, FALSE, nodes, status);
1046 if (terminal == NULL) {
1047 status = U_MEMORY_ALLOCATION_ERROR;
1048 }
1049
1050 // This call does all the work of building the new trie structure. The root
1051 // will be node 2.
1052 BuildCompactTrieNode *root = compactOneNode(dict.fTrie, FALSE, nodes, status);
1053 #ifdef DEBUG_TRIE_DICT
1054 (void) ::times(&timing);
1055 fprintf(stderr, "Compact trie built, %d nodes, time user %f system %f\n",
1056 nodes.size(), (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK,
1057 (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK);
1058 previous = timing;
1059 #endif
1060
1061 // Now coalesce all duplicate nodes.
1062 coalesceDuplicates(nodes, status);
1063 #ifdef DEBUG_TRIE_DICT
1064 (void) ::times(&timing);
1065 fprintf(stderr, "Duplicates coalesced, time user %f system %f\n",
1066 (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK,
1067 (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK);
1068 previous = timing;
1069 #endif
1070
1071 // Next, build the output trie.
1072 // First we compute all the sizes and build the node ID translation table.
1073 uint32_t totalSize = offsetof(CompactTrieHeader,offsets);
1074 int32_t count = nodes.size();
1075 int32_t nodeCount = 1; // The sentinel node we already have
1076 BuildCompactTrieNode *node;
1077 int32_t i;
1078 UVector32 translate(count, status); // Should be no growth needed after this
1079 translate.push(0, status); // The sentinel node
1080
1081 if (U_FAILURE(status)) {
1082 return NULL;
1083 }
1084
1085 for (i = 1; i < count; ++i) {
1086 node = (BuildCompactTrieNode *)nodes[i];
1087 if (node->fNodeID == i) {
1088 // Only one node out of each duplicate set is used
1089 if (i >= translate.size()) {
1090 // Logically extend the mapping table
1091 translate.setSize(i+1);
1092 }
1093 translate.setElementAt(nodeCount++, i);
1094 totalSize += node->size();
1095 }
1096 }
1097
1098 // Check for overflowing 16 bits worth of nodes.
1099 if (nodeCount > 0x10000) {
1100 status = U_ILLEGAL_ARGUMENT_ERROR;
1101 return NULL;
1102 }
1103
1104 // Add enough room for the offsets.
1105 totalSize += nodeCount*sizeof(uint32_t);
1106 #ifdef DEBUG_TRIE_DICT
1107 (void) ::times(&timing);
1108 fprintf(stderr, "Sizes/mapping done, time user %f system %f\n",
1109 (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK,
1110 (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK);
1111 previous = timing;
1112 fprintf(stderr, "%d nodes, %d unique, %d bytes\n", nodes.size(), nodeCount, totalSize);
1113 #endif
1114 uint8_t *bytes = (uint8_t *)uprv_malloc(totalSize);
1115 if (bytes == NULL) {
1116 status = U_MEMORY_ALLOCATION_ERROR;
1117 return NULL;
1118 }
1119
1120 CompactTrieHeader *header = (CompactTrieHeader *)bytes;
1121 header->size = totalSize;
1122 header->nodeCount = nodeCount;
1123 header->offsets[0] = 0; // Sentinel
1124 header->root = translate.elementAti(root->fNodeID);
1125 #ifdef DEBUG_TRIE_DICT
1126 if (header->root == 0) {
1127 fprintf(stderr, "ERROR: root node %d translate to physical zero\n", root->fNodeID);
1128 }
1129 #endif
1130 uint32_t offset = offsetof(CompactTrieHeader,offsets)+(nodeCount*sizeof(uint32_t));
1131 nodeCount = 1;
1132 // Now write the data
1133 for (i = 1; i < count; ++i) {
1134 node = (BuildCompactTrieNode *)nodes[i];
1135 if (node->fNodeID == i) {
1136 header->offsets[nodeCount++] = offset;
1137 node->write(bytes, offset, translate);
1138 }
1139 }
1140 #ifdef DEBUG_TRIE_DICT
1141 (void) ::times(&timing);
1142 fprintf(stderr, "Trie built, time user %f system %f\n",
1143 (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK,
1144 (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK);
1145 previous = timing;
1146 fprintf(stderr, "Final offset is %d\n", offset);
1147
1148 // Collect statistics on node types and sizes
1149 int hCount = 0;
1150 int vCount = 0;
1151 size_t hSize = 0;
1152 size_t vSize = 0;
1153 size_t hItemCount = 0;
1154 size_t vItemCount = 0;
1155 uint32_t previousOff = offset;
1156 for (uint16_t nodeIdx = nodeCount-1; nodeIdx >= 2; --nodeIdx) {
1157 const CompactTrieNode *node = getCompactNode(header, nodeIdx);
1158 if (node->flagscount & kVerticalNode) {
1159 vCount += 1;
1160 vItemCount += (node->flagscount & kCountMask);
1161 vSize += previousOff-header->offsets[nodeIdx];
1162 }
1163 else {
1164 hCount += 1;
1165 hItemCount += (node->flagscount & kCountMask);
1166 hSize += previousOff-header->offsets[nodeIdx];
1167 }
1168 previousOff = header->offsets[nodeIdx];
1169 }
1170 fprintf(stderr, "Horizontal nodes: %d total, average %f bytes with %f items\n", hCount,
1171 (double)hSize/hCount, (double)hItemCount/hCount);
1172 fprintf(stderr, "Vertical nodes: %d total, average %f bytes with %f items\n", vCount,
1173 (double)vSize/vCount, (double)vItemCount/vCount);
1174 #endif
1175
1176 if (U_FAILURE(status)) {
1177 uprv_free(bytes);
1178 header = NULL;
1179 }
1180 else {
1181 header->magic = COMPACT_TRIE_MAGIC_1;
1182 }
1183 return header;
1184 }
1185
1186 // Forward declaration
1187 static TernaryNode *
1188 unpackOneNode( const CompactTrieHeader *header, const CompactTrieNode *node, UErrorCode &status );
1189
1190
1191 // Convert a horizontal node (or subarray thereof) into a ternary subtrie
1192 static TernaryNode *
1193 unpackHorizontalArray( const CompactTrieHeader *header, const CompactTrieHorizontalEntry *array,
1194 int low, int high, UErrorCode &status ) {
1195 if (U_FAILURE(status) || low > high) {
1196 return NULL;
1197 }
1198 int middle = (low+high)/2;
1199 TernaryNode *result = new TernaryNode(array[middle].ch);
1200 if (result == NULL) {
1201 status = U_MEMORY_ALLOCATION_ERROR;
1202 return NULL;
1203 }
1204 const CompactTrieNode *equal = getCompactNode(header, array[middle].equal);
1205 if (equal->flagscount & kParentEndsWord) {
1206 result->flags |= kEndsWord;
1207 }
1208 result->low = unpackHorizontalArray(header, array, low, middle-1, status);
1209 result->high = unpackHorizontalArray(header, array, middle+1, high, status);
1210 result->equal = unpackOneNode(header, equal, status);
1211 return result;
1212 }
1213
1214 // Convert one compact trie node into a ternary subtrie
1215 static TernaryNode *
1216 unpackOneNode( const CompactTrieHeader *header, const CompactTrieNode *node, UErrorCode &status ) {
1217 int nodeCount = (node->flagscount & kCountMask);
1218 if (nodeCount == 0 || U_FAILURE(status)) {
1219 // Failure, or terminal node
1220 return NULL;
1221 }
1222 if (node->flagscount & kVerticalNode) {
1223 const CompactTrieVerticalNode *vnode = (const CompactTrieVerticalNode *)node;
1224 TernaryNode *head = NULL;
1225 TernaryNode *previous = NULL;
1226 TernaryNode *latest = NULL;
1227 for (int i = 0; i < nodeCount; ++i) {
1228 latest = new TernaryNode(vnode->chars[i]);
1229 if (latest == NULL) {
1230 status = U_MEMORY_ALLOCATION_ERROR;
1231 break;
1232 }
1233 if (head == NULL) {
1234 head = latest;
1235 }
1236 if (previous != NULL) {
1237 previous->equal = latest;
1238 }
1239 previous = latest;
1240 }
1241 if (latest != NULL) {
1242 const CompactTrieNode *equal = getCompactNode(header, vnode->equal);
1243 if (equal->flagscount & kParentEndsWord) {
1244 latest->flags |= kEndsWord;
1245 }
1246 latest->equal = unpackOneNode(header, equal, status);
1247 }
1248 return head;
1249 }
1250 else {
1251 // Horizontal node
1252 const CompactTrieHorizontalNode *hnode = (const CompactTrieHorizontalNode *)node;
1253 return unpackHorizontalArray(header, &hnode->entries[0], 0, nodeCount-1, status);
1254 }
1255 }
1256
1257 MutableTrieDictionary *
1258 CompactTrieDictionary::cloneMutable( UErrorCode &status ) const {
1259 MutableTrieDictionary *result = new MutableTrieDictionary( status );
1260 if (result == NULL) {
1261 status = U_MEMORY_ALLOCATION_ERROR;
1262 return NULL;
1263 }
1264 TernaryNode *root = unpackOneNode(fData, getCompactNode(fData, fData->root), status);
1265 if (U_FAILURE(status)) {
1266 delete root; // Clean up
1267 delete result;
1268 return NULL;
1269 }
1270 result->fTrie = root;
1271 return result;
1272 }
1273
1274 U_NAMESPACE_END
1275
1276 U_CAPI int32_t U_EXPORT2
1277 triedict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData,
1278 UErrorCode *status) {
1279
1280 if (status == NULL || U_FAILURE(*status)) {
1281 return 0;
1282 }
1283 if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
1284 *status=U_ILLEGAL_ARGUMENT_ERROR;
1285 return 0;
1286 }
1287
1288 //
1289 // Check that the data header is for for dictionary data.
1290 // (Header contents are defined in genxxx.cpp)
1291 //
1292 const UDataInfo *pInfo = (const UDataInfo *)((const uint8_t *)inData+4);
1293 if(!( pInfo->dataFormat[0]==0x54 && /* dataFormat="TrDc" */
1294 pInfo->dataFormat[1]==0x72 &&
1295 pInfo->dataFormat[2]==0x44 &&
1296 pInfo->dataFormat[3]==0x63 &&
1297 pInfo->formatVersion[0]==1 )) {
1298 udata_printError(ds, "triedict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n",
1299 pInfo->dataFormat[0], pInfo->dataFormat[1],
1300 pInfo->dataFormat[2], pInfo->dataFormat[3],
1301 pInfo->formatVersion[0]);
1302 *status=U_UNSUPPORTED_ERROR;
1303 return 0;
1304 }
1305
1306 //
1307 // Swap the data header. (This is the generic ICU Data Header, not the
1308 // CompactTrieHeader). This swap also conveniently gets us
1309 // the size of the ICU d.h., which lets us locate the start
1310 // of the RBBI specific data.
1311 //
1312 int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status);
1313
1314 //
1315 // Get the CompactTrieHeader, and check that it appears to be OK.
1316 //
1317 const uint8_t *inBytes =(const uint8_t *)inData+headerSize;
1318 const CompactTrieHeader *header = (const CompactTrieHeader *)inBytes;
1319 if (ds->readUInt32(header->magic) != COMPACT_TRIE_MAGIC_1
1320 || ds->readUInt32(header->size) < sizeof(CompactTrieHeader))
1321 {
1322 udata_printError(ds, "triedict_swap(): CompactTrieHeader is invalid.\n");
1323 *status=U_UNSUPPORTED_ERROR;
1324 return 0;
1325 }
1326
1327 //
1328 // Prefight operation? Just return the size
1329 //
1330 uint32_t totalSize = ds->readUInt32(header->size);
1331 int32_t sizeWithUData = (int32_t)totalSize + headerSize;
1332 if (length < 0) {
1333 return sizeWithUData;
1334 }
1335
1336 //
1337 // Check that length passed in is consistent with length from RBBI data header.
1338 //
1339 if (length < sizeWithUData) {
1340 udata_printError(ds, "triedict_swap(): too few bytes (%d after ICU Data header) for trie data.\n",
1341 totalSize);
1342 *status=U_INDEX_OUTOFBOUNDS_ERROR;
1343 return 0;
1344 }
1345
1346 //
1347 // Swap the Data. Do the data itself first, then the CompactTrieHeader, because
1348 // we need to reference the header to locate the data, and an
1349 // inplace swap of the header leaves it unusable.
1350 //
1351 uint8_t *outBytes = (uint8_t *)outData + headerSize;
1352 CompactTrieHeader *outputHeader = (CompactTrieHeader *)outBytes;
1353
1354 #if 0
1355 //
1356 // If not swapping in place, zero out the output buffer before starting.
1357 //
1358 if (inBytes != outBytes) {
1359 uprv_memset(outBytes, 0, totalSize);
1360 }
1361
1362 // We need to loop through all the nodes in the offset table, and swap each one.
1363 uint16_t nodeCount = ds->readUInt16(header->nodeCount);
1364 // Skip node 0, which should always be 0.
1365 for (int i = 1; i < nodeCount; ++i) {
1366 uint32_t nodeOff = ds->readUInt32(header->offsets[i]);
1367 const CompactTrieNode *inNode = (const CompactTrieNode *)(inBytes + nodeOff);
1368 CompactTrieNode *outNode = (CompactTrieNode *)(outBytes + nodeOff);
1369 uint16_t flagscount = ds->readUInt16(inNode->flagscount);
1370 uint16_t itemCount = flagscount & kCountMask;
1371 ds->writeUInt16(&outNode->flagscount, flagscount);
1372 if (itemCount > 0) {
1373 if (flagscount & kVerticalNode) {
1374 ds->swapArray16(ds, inBytes+nodeOff+offsetof(CompactTrieVerticalNode,chars),
1375 itemCount*sizeof(uint16_t),
1376 outBytes+nodeOff+offsetof(CompactTrieVerticalNode,chars), status);
1377 uint16_t equal = ds->readUInt16(inBytes+nodeOff+offsetof(CompactTrieVerticalNode,equal);
1378 ds->writeUInt16(outBytes+nodeOff+offsetof(CompactTrieVerticalNode,equal));
1379 }
1380 else {
1381 const CompactTrieHorizontalNode *inHNode = (const CompactTrieHorizontalNode *)inNode;
1382 CompactTrieHorizontalNode *outHNode = (CompactTrieHorizontalNode *)outNode;
1383 for (int j = 0; j < itemCount; ++j) {
1384 uint16_t word = ds->readUInt16(inHNode->entries[j].ch);
1385 ds->writeUInt16(&outHNode->entries[j].ch, word);
1386 word = ds->readUInt16(inHNode->entries[j].equal);
1387 ds->writeUInt16(&outHNode->entries[j].equal, word);
1388 }
1389 }
1390 }
1391 }
1392 #endif
1393
1394 // All the data in all the nodes consist of 16 bit items. Swap them all at once.
1395 uint16_t nodeCount = ds->readUInt16(header->nodeCount);
1396 uint32_t nodesOff = offsetof(CompactTrieHeader,offsets)+((uint32_t)nodeCount*sizeof(uint32_t));
1397 ds->swapArray16(ds, inBytes+nodesOff, totalSize-nodesOff, outBytes+nodesOff, status);
1398
1399 // Swap the header
1400 ds->writeUInt32(&outputHeader->size, totalSize);
1401 uint32_t magic = ds->readUInt32(header->magic);
1402 ds->writeUInt32(&outputHeader->magic, magic);
1403 ds->writeUInt16(&outputHeader->nodeCount, nodeCount);
1404 uint16_t root = ds->readUInt16(header->root);
1405 ds->writeUInt16(&outputHeader->root, root);
1406 ds->swapArray32(ds, inBytes+offsetof(CompactTrieHeader,offsets),
1407 sizeof(uint32_t)*(int32_t)nodeCount,
1408 outBytes+offsetof(CompactTrieHeader,offsets), status);
1409
1410 return sizeWithUData;
1411 }
1412
1413 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */