]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/triedict.cpp
ICU-8.11.4.tar.gz
[apple/icu.git] / icuSources / common / triedict.cpp
CommitLineData
73c04bcf
A
1/**
2 *******************************************************************************
3 * Copyright (C) 2006, International Business Machines Corporation and others. *
4 * All Rights Reserved. *
5 *******************************************************************************
6 */
7
8#include "unicode/utypes.h"
9
10#if !UCONFIG_NO_BREAK_ITERATION
11
12#include "triedict.h"
13#include "unicode/chariter.h"
14#include "unicode/uchriter.h"
15#include "unicode/strenum.h"
16#include "unicode/uenum.h"
17#include "unicode/udata.h"
18#include "cmemory.h"
19#include "udataswp.h"
20#include "uvector.h"
21#include "uvectr32.h"
22#include "uarrsort.h"
23
24//#define DEBUG_TRIE_DICT 1
25
26#ifdef DEBUG_TRIE_DICT
27#include <sys/times.h>
28#include <limits.h>
29#include <stdio.h>
30#endif
31
32U_NAMESPACE_BEGIN
33
34/*******************************************************************
35 * TrieWordDictionary
36 */
37
38TrieWordDictionary::TrieWordDictionary() {
39}
40
41TrieWordDictionary::~TrieWordDictionary() {
42}
43
44/*******************************************************************
45 * MutableTrieDictionary
46 */
47
48// Node structure for the ternary, uncompressed trie
49struct TernaryNode : public UMemory {
50 UChar ch; // UTF-16 code unit
51 uint16_t flags; // Flag word
52 TernaryNode *low; // Less-than link
53 TernaryNode *equal; // Equal link
54 TernaryNode *high; // Greater-than link
55
56 TernaryNode(UChar uc);
57 ~TernaryNode();
58};
59
60enum MutableTrieNodeFlags {
61 kEndsWord = 0x0001 // This node marks the end of a valid word
62};
63
64inline
65TernaryNode::TernaryNode(UChar uc) {
66 ch = uc;
67 flags = 0;
68 low = NULL;
69 equal = NULL;
70 high = NULL;
71}
72
73// Not inline since it's recursive
74TernaryNode::~TernaryNode() {
75 delete low;
76 delete equal;
77 delete high;
78}
79
80MutableTrieDictionary::MutableTrieDictionary( UChar median, UErrorCode &status ) {
81 // Start the trie off with something. Having the root node already present
82 // cuts a special case out of the search/insertion functions.
83 // Making it a median character cuts the worse case for searches from
84 // 4x a balanced trie to 2x a balanced trie. It's best to choose something
85 // that starts a word that is midway in the list.
86 fTrie = new TernaryNode(median);
87 if (fTrie == NULL) {
88 status = U_MEMORY_ALLOCATION_ERROR;
89 }
90 fIter = utext_openUChars(NULL, NULL, 0, &status);
91 if (U_SUCCESS(status) && fIter == NULL) {
92 status = U_MEMORY_ALLOCATION_ERROR;
93 }
94}
95
96MutableTrieDictionary::MutableTrieDictionary( UErrorCode &status ) {
97 fTrie = NULL;
98 fIter = utext_openUChars(NULL, NULL, 0, &status);
99 if (U_SUCCESS(status) && fIter == NULL) {
100 status = U_MEMORY_ALLOCATION_ERROR;
101 }
102}
103
104MutableTrieDictionary::~MutableTrieDictionary() {
105 delete fTrie;
106 utext_close(fIter);
107}
108
109int32_t
110MutableTrieDictionary::search( UText *text,
111 int32_t maxLength,
112 int32_t *lengths,
113 int &count,
114 int limit,
115 TernaryNode *&parent,
116 UBool &pMatched ) const {
117 // TODO: current implementation works in UTF-16 space
118 const TernaryNode *up = NULL;
119 const TernaryNode *p = fTrie;
120 int mycount = 0;
121 pMatched = TRUE;
122 int i;
123
124 UChar uc = utext_current32(text);
125 for (i = 0; i < maxLength && p != NULL; ++i) {
126 while (p != NULL) {
127 if (uc < p->ch) {
128 up = p;
129 p = p->low;
130 }
131 else if (uc == p->ch) {
132 break;
133 }
134 else {
135 up = p;
136 p = p->high;
137 }
138 }
139 if (p == NULL) {
140 pMatched = FALSE;
141 break;
142 }
143 // Must be equal to get here
144 if (limit > 0 && (p->flags & kEndsWord)) {
145 lengths[mycount++] = i+1;
146 --limit;
147 }
148 up = p;
149 p = p->equal;
150 uc = utext_next32(text);
151 uc = utext_current32(text);
152 }
153
154 // Note that there is no way to reach here with up == 0 unless
155 // maxLength is 0 coming in.
156 parent = (TernaryNode *)up;
157 count = mycount;
158 return i;
159}
160
161void
162MutableTrieDictionary::addWord( const UChar *word,
163 int32_t length,
164 UErrorCode &status ) {
165#if 0
166 if (length <= 0) {
167 status = U_ILLEGAL_ARGUMENT_ERROR;
168 return;
169 }
170#endif
171 TernaryNode *parent;
172 UBool pMatched;
173 int count;
174 fIter = utext_openUChars(fIter, word, length, &status);
175
176 int matched;
177 matched = search(fIter, length, NULL, count, 0, parent, pMatched);
178
179 while (matched++ < length) {
180 UChar32 uc = utext_next32(fIter); // TODO: supplemetary support?
181 U_ASSERT(uc != U_SENTINEL);
182 TernaryNode *newNode = new TernaryNode(uc);
183 if (newNode == NULL) {
184 status = U_MEMORY_ALLOCATION_ERROR;
185 return;
186 }
187 if (pMatched) {
188 parent->equal = newNode;
189 }
190 else {
191 pMatched = TRUE;
192 if (uc < parent->ch) {
193 parent->low = newNode;
194 }
195 else {
196 parent->high = newNode;
197 }
198 }
199 parent = newNode;
200 }
201
202 parent->flags |= kEndsWord;
203}
204
205#if 0
206void
207MutableTrieDictionary::addWords( UEnumeration *words,
208 UErrorCode &status ) {
209 int32_t length;
210 const UChar *word;
211 while ((word = uenum_unext(words, &length, &status)) && U_SUCCESS(status)) {
212 addWord(word, length, status);
213 }
214}
215#endif
216
217int32_t
218MutableTrieDictionary::matches( UText *text,
219 int32_t maxLength,
220 int32_t *lengths,
221 int &count,
222 int limit ) const {
223 TernaryNode *parent;
224 UBool pMatched;
225 return search(text, maxLength, lengths, count, limit, parent, pMatched);
226}
227
228// Implementation of iteration for MutableTrieDictionary
229class MutableTrieEnumeration : public StringEnumeration {
230private:
231 UStack fNodeStack; // Stack of nodes to process
232 UVector32 fBranchStack; // Stack of which branch we are working on
233 TernaryNode *fRoot; // Root node
234 static const char fgClassID;
235 enum StackBranch {
236 kLessThan,
237 kEqual,
238 kGreaterThan,
239 kDone
240 };
241
242public:
243 static UClassID U_EXPORT2 getStaticClassID(void) { return (UClassID)&fgClassID; }
244 virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); }
245public:
246 MutableTrieEnumeration(TernaryNode *root, UErrorCode &status)
247 : fNodeStack(status), fBranchStack(status) {
248 fRoot = root;
249 fNodeStack.push(root, status);
250 fBranchStack.push(kLessThan, status);
251 unistr.remove();
252 }
253
254 virtual ~MutableTrieEnumeration() {
255 }
256
257 virtual StringEnumeration *clone() const {
258 UErrorCode status = U_ZERO_ERROR;
259 return new MutableTrieEnumeration(fRoot, status);
260 }
261
262 virtual const UnicodeString *snext(UErrorCode &status) {
263 if (fNodeStack.empty() || U_FAILURE(status)) {
264 return NULL;
265 }
266 TernaryNode *node = (TernaryNode *) fNodeStack.peek();
267 StackBranch where = (StackBranch) fBranchStack.peeki();
268 while (!fNodeStack.empty() && U_SUCCESS(status)) {
269 UBool emit;
270 UBool equal;
271
272 switch (where) {
273 case kLessThan:
274 if (node->low != NULL) {
275 fBranchStack.setElementAt(kEqual, fBranchStack.size()-1);
276 node = (TernaryNode *) fNodeStack.push(node->low, status);
277 where = (StackBranch) fBranchStack.push(kLessThan, status);
278 break;
279 }
280 case kEqual:
281 emit = (node->flags & kEndsWord) != 0;
282 equal = (node->equal != NULL);
283 // If this node should be part of the next emitted string, append
284 // the UChar to the string, and make sure we pop it when we come
285 // back to this node. The character should only be in the string
286 // for as long as we're traversing the equal subtree of this node
287 if (equal || emit) {
288 unistr.append(node->ch);
289 fBranchStack.setElementAt(kGreaterThan, fBranchStack.size()-1);
290 }
291 if (equal) {
292 node = (TernaryNode *) fNodeStack.push(node->equal, status);
293 where = (StackBranch) fBranchStack.push(kLessThan, status);
294 }
295 if (emit) {
296 return &unistr;
297 }
298 if (equal) {
299 break;
300 }
301 case kGreaterThan:
302 // If this node's character is in the string, remove it.
303 if (node->equal != NULL || (node->flags & kEndsWord)) {
304 unistr.truncate(unistr.length()-1);
305 }
306 if (node->high != NULL) {
307 fBranchStack.setElementAt(kDone, fBranchStack.size()-1);
308 node = (TernaryNode *) fNodeStack.push(node->high, status);
309 where = (StackBranch) fBranchStack.push(kLessThan, status);
310 break;
311 }
312 case kDone:
313 fNodeStack.pop();
314 fBranchStack.popi();
315 node = (TernaryNode *) fNodeStack.peek();
316 where = (StackBranch) fBranchStack.peeki();
317 break;
318 default:
319 return NULL;
320 }
321 }
322 return NULL;
323 }
324
325 // Very expensive, but this should never be used.
326 virtual int32_t count(UErrorCode &status) const {
327 MutableTrieEnumeration counter(fRoot, status);
328 int32_t result = 0;
329 while (counter.snext(status) != NULL && U_SUCCESS(status)) {
330 ++result;
331 }
332 return result;
333 }
334
335 virtual void reset(UErrorCode &status) {
336 fNodeStack.removeAllElements();
337 fBranchStack.removeAllElements();
338 fNodeStack.push(fRoot, status);
339 fBranchStack.push(kLessThan, status);
340 unistr.remove();
341 }
342};
343
344const char MutableTrieEnumeration::fgClassID = '\0';
345
346StringEnumeration *
347MutableTrieDictionary::openWords( UErrorCode &status ) const {
348 if (U_FAILURE(status)) {
349 return NULL;
350 }
351 return new MutableTrieEnumeration(fTrie, status);
352}
353
354/*******************************************************************
355 * CompactTrieDictionary
356 */
357
358struct CompactTrieHeader {
359 uint32_t size; // Size of the data in bytes
360 uint32_t magic; // Magic number (including version)
361 uint16_t nodeCount; // Number of entries in offsets[]
362 uint16_t root; // Node number of the root node
363 uint32_t offsets[1]; // Offsets to nodes from start of data
364};
365
366// Note that to avoid platform-specific alignment issues, all members of the node
367// structures should be the same size, or should contain explicit padding to
368// natural alignment boundaries.
369
370// We can't use a bitfield for the flags+count field, because the layout of those
371// is not portable. 12 bits of count allows for up to 4096 entries in a node.
372struct CompactTrieNode {
373 uint16_t flagscount; // Count of sub-entries, plus flags
374};
375
376enum CompactTrieNodeFlags {
377 kVerticalNode = 0x1000, // This is a vertical node
378 kParentEndsWord = 0x2000, // The node whose equal link points to this ends a word
379 kReservedFlag1 = 0x4000,
380 kReservedFlag2 = 0x8000,
381 kCountMask = 0x0FFF, // The count portion of flagscount
382 kFlagMask = 0xF000 // The flags portion of flagscount
383};
384
385// The two node types are distinguished by the kVerticalNode flag.
386
387struct CompactTrieHorizontalEntry {
388 uint16_t ch; // UChar
389 uint16_t equal; // Equal link node index
390};
391
392// We don't use inheritance here because C++ does not guarantee that the
393// base class comes first in memory!!
394
395struct CompactTrieHorizontalNode {
396 uint16_t flagscount; // Count of sub-entries, plus flags
397 CompactTrieHorizontalEntry entries[1];
398};
399
400struct CompactTrieVerticalNode {
401 uint16_t flagscount; // Count of sub-entries, plus flags
402 uint16_t equal; // Equal link node index
403 uint16_t chars[1]; // Code units
404};
405
406// {'Dic', 1}, version 1
407#define COMPACT_TRIE_MAGIC_1 0x44696301
408
409CompactTrieDictionary::CompactTrieDictionary(UDataMemory *dataObj,
410 UErrorCode &status )
411: fUData(dataObj)
412{
413 fData = (const CompactTrieHeader *) udata_getMemory(dataObj);
414 fOwnData = FALSE;
415 if (fData->magic != COMPACT_TRIE_MAGIC_1) {
416 status = U_ILLEGAL_ARGUMENT_ERROR;
417 fData = NULL;
418 }
419}
420CompactTrieDictionary::CompactTrieDictionary( const void *data,
421 UErrorCode &status )
422: fUData(NULL)
423{
424 fData = (const CompactTrieHeader *) data;
425 fOwnData = FALSE;
426 if (fData->magic != COMPACT_TRIE_MAGIC_1) {
427 status = U_ILLEGAL_ARGUMENT_ERROR;
428 fData = NULL;
429 }
430}
431
432CompactTrieDictionary::CompactTrieDictionary( const MutableTrieDictionary &dict,
433 UErrorCode &status )
434: fUData(NULL)
435{
436 fData = compactMutableTrieDictionary(dict, status);
437 fOwnData = !U_FAILURE(status);
438}
439
440CompactTrieDictionary::~CompactTrieDictionary() {
441 if (fOwnData) {
442 uprv_free((void *)fData);
443 }
444 if (fUData) {
445 udata_close(fUData);
446 }
447}
448
449uint32_t
450CompactTrieDictionary::dataSize() const {
451 return fData->size;
452}
453
454const void *
455CompactTrieDictionary::data() const {
456 return fData;
457}
458
459// This function finds the address of a node for us, given its node ID
460static inline const CompactTrieNode *
461getCompactNode(const CompactTrieHeader *header, uint16_t node) {
462 return (const CompactTrieNode *)((const uint8_t *)header + header->offsets[node]);
463}
464
465int32_t
466CompactTrieDictionary::matches( UText *text,
467 int32_t maxLength,
468 int32_t *lengths,
469 int &count,
470 int limit ) const {
471 // TODO: current implementation works in UTF-16 space
472 const CompactTrieNode *node = getCompactNode(fData, fData->root);
473 int mycount = 0;
474
475 UChar uc = utext_current32(text);
476 int i = 0;
477
478 while (node != NULL) {
479 // Check if the node we just exited ends a word
480 if (limit > 0 && (node->flagscount & kParentEndsWord)) {
481 lengths[mycount++] = i;
482 --limit;
483 }
484 // Check that we haven't exceeded the maximum number of input characters.
485 // We have to do that here rather than in the while condition so that
486 // we can check for ending a word, above.
487 if (i >= maxLength) {
488 break;
489 }
490
491 int nodeCount = (node->flagscount & kCountMask);
492 if (nodeCount == 0) {
493 // Special terminal node; return now
494 break;
495 }
496 if (node->flagscount & kVerticalNode) {
497 // Vertical node; check all the characters in it
498 const CompactTrieVerticalNode *vnode = (const CompactTrieVerticalNode *)node;
499 for (int j = 0; j < nodeCount && i < maxLength; ++j) {
500 if (uc != vnode->chars[j]) {
501 // We hit a non-equal character; return
502 goto exit;
503 }
504 utext_next32(text);
505 uc = utext_current32(text);
506 ++i;
507 }
508 // To get here we must have come through the whole list successfully;
509 // go on to the next node. Note that a word cannot end in the middle
510 // of a vertical node.
511 node = getCompactNode(fData, vnode->equal);
512 }
513 else {
514 // Horizontal node; do binary search
515 const CompactTrieHorizontalNode *hnode = (const CompactTrieHorizontalNode *)node;
516 int low = 0;
517 int high = nodeCount-1;
518 int middle;
519 node = NULL; // If we don't find a match, we'll fall out of the loop
520 while (high >= low) {
521 middle = (high+low)/2;
522 if (uc == hnode->entries[middle].ch) {
523 // We hit a match; get the next node and next character
524 node = getCompactNode(fData, hnode->entries[middle].equal);
525 utext_next32(text);
526 uc = utext_current32(text);
527 ++i;
528 break;
529 }
530 else if (uc < hnode->entries[middle].ch) {
531 high = middle-1;
532 }
533 else {
534 low = middle+1;
535 }
536 }
537 }
538 }
539exit:
540 count = mycount;
541 return i;
542}
543
544// Implementation of iteration for CompactTrieDictionary
545class CompactTrieEnumeration : public StringEnumeration {
546private:
547 UVector32 fNodeStack; // Stack of nodes to process
548 UVector32 fIndexStack; // Stack of where in node we are
549 const CompactTrieHeader *fHeader; // Trie data
550 static const char fgClassID;
551
552public:
553 static UClassID U_EXPORT2 getStaticClassID(void) { return (UClassID)&fgClassID; }
554 virtual UClassID getDynamicClassID(void) const { return getStaticClassID(); }
555public:
556 CompactTrieEnumeration(const CompactTrieHeader *header, UErrorCode &status)
557 : fNodeStack(status), fIndexStack(status) {
558 fHeader = header;
559 fNodeStack.push(header->root, status);
560 fIndexStack.push(0, status);
561 unistr.remove();
562 }
563
564 virtual ~CompactTrieEnumeration() {
565 }
566
567 virtual StringEnumeration *clone() const {
568 UErrorCode status = U_ZERO_ERROR;
569 return new CompactTrieEnumeration(fHeader, status);
570 }
571
572 virtual const UnicodeString * snext(UErrorCode &status);
573
574 // Very expensive, but this should never be used.
575 virtual int32_t count(UErrorCode &status) const {
576 CompactTrieEnumeration counter(fHeader, status);
577 int32_t result = 0;
578 while (counter.snext(status) != NULL && U_SUCCESS(status)) {
579 ++result;
580 }
581 return result;
582 }
583
584 virtual void reset(UErrorCode &status) {
585 fNodeStack.removeAllElements();
586 fIndexStack.removeAllElements();
587 fNodeStack.push(fHeader->root, status);
588 fIndexStack.push(0, status);
589 unistr.remove();
590 }
591};
592
593const char CompactTrieEnumeration::fgClassID = '\0';
594
595const UnicodeString *
596CompactTrieEnumeration::snext(UErrorCode &status) {
597 if (fNodeStack.empty() || U_FAILURE(status)) {
598 return NULL;
599 }
600 const CompactTrieNode *node = getCompactNode(fHeader, fNodeStack.peeki());
601 int where = fIndexStack.peeki();
602 while (!fNodeStack.empty() && U_SUCCESS(status)) {
603 int nodeCount = (node->flagscount & kCountMask);
604 UBool goingDown = FALSE;
605 if (nodeCount == 0) {
606 // Terminal node; go up immediately
607 fNodeStack.popi();
608 fIndexStack.popi();
609 node = getCompactNode(fHeader, fNodeStack.peeki());
610 where = fIndexStack.peeki();
611 }
612 else if (node->flagscount & kVerticalNode) {
613 // Vertical node
614 const CompactTrieVerticalNode *vnode = (const CompactTrieVerticalNode *)node;
615 if (where == 0) {
616 // Going down
617 unistr.append((const UChar *)vnode->chars, (int32_t) nodeCount);
618 fIndexStack.setElementAt(1, fIndexStack.size()-1);
619 node = getCompactNode(fHeader, fNodeStack.push(vnode->equal, status));
620 where = fIndexStack.push(0, status);
621 goingDown = TRUE;
622 }
623 else {
624 // Going up
625 unistr.truncate(unistr.length()-nodeCount);
626 fNodeStack.popi();
627 fIndexStack.popi();
628 node = getCompactNode(fHeader, fNodeStack.peeki());
629 where = fIndexStack.peeki();
630 }
631 }
632 else {
633 // Horizontal node
634 const CompactTrieHorizontalNode *hnode = (const CompactTrieHorizontalNode *)node;
635 if (where > 0) {
636 // Pop previous char
637 unistr.truncate(unistr.length()-1);
638 }
639 if (where < nodeCount) {
640 // Push on next node
641 unistr.append((UChar)hnode->entries[where].ch);
642 fIndexStack.setElementAt(where+1, fIndexStack.size()-1);
643 node = getCompactNode(fHeader, fNodeStack.push(hnode->entries[where].equal, status));
644 where = fIndexStack.push(0, status);
645 goingDown = TRUE;
646 }
647 else {
648 // Going up
649 fNodeStack.popi();
650 fIndexStack.popi();
651 node = getCompactNode(fHeader, fNodeStack.peeki());
652 where = fIndexStack.peeki();
653 }
654 }
655 // Check if the parent of the node we've just gone down to ends a
656 // word. If so, return it.
657 if (goingDown && (node->flagscount & kParentEndsWord)) {
658 return &unistr;
659 }
660 }
661 return NULL;
662}
663
664StringEnumeration *
665CompactTrieDictionary::openWords( UErrorCode &status ) const {
666 if (U_FAILURE(status)) {
667 return NULL;
668 }
669 return new CompactTrieEnumeration(fData, status);
670}
671
672//
673// Below here is all code related to converting a ternary trie to a compact trie
674// and back again
675//
676
677// Helper classes to construct the compact trie
678class BuildCompactTrieNode: public UMemory {
679 public:
680 UBool fParentEndsWord;
681 UBool fVertical;
682 UBool fHasDuplicate;
683 int32_t fNodeID;
684 UnicodeString fChars;
685
686 public:
687 BuildCompactTrieNode(UBool parentEndsWord, UBool vertical, UStack &nodes, UErrorCode &status) {
688 fParentEndsWord = parentEndsWord;
689 fHasDuplicate = FALSE;
690 fVertical = vertical;
691 fNodeID = nodes.size();
692 nodes.push(this, status);
693 }
694
695 virtual ~BuildCompactTrieNode() {
696 }
697
698 virtual uint32_t size() {
699 return sizeof(uint16_t);
700 }
701
702 virtual void write(uint8_t *bytes, uint32_t &offset, const UVector32 &/*translate*/) {
703 // Write flag/count
704 *((uint16_t *)(bytes+offset)) = (fChars.length() & kCountMask)
705 | (fVertical ? kVerticalNode : 0) | (fParentEndsWord ? kParentEndsWord : 0 );
706 offset += sizeof(uint16_t);
707 }
708};
709
710class BuildCompactTrieHorizontalNode: public BuildCompactTrieNode {
711 public:
712 UStack fLinks;
713
714 public:
715 BuildCompactTrieHorizontalNode(UBool parentEndsWord, UStack &nodes, UErrorCode &status)
716 : BuildCompactTrieNode(parentEndsWord, FALSE, nodes, status), fLinks(status) {
717 }
718
719 virtual ~BuildCompactTrieHorizontalNode() {
720 }
721
722 virtual uint32_t size() {
723 return offsetof(CompactTrieHorizontalNode,entries) +
724 (fChars.length()*sizeof(CompactTrieHorizontalEntry));
725 }
726
727 virtual void write(uint8_t *bytes, uint32_t &offset, const UVector32 &translate) {
728 BuildCompactTrieNode::write(bytes, offset, translate);
729 int32_t count = fChars.length();
730 for (int32_t i = 0; i < count; ++i) {
731 CompactTrieHorizontalEntry *entry = (CompactTrieHorizontalEntry *)(bytes+offset);
732 entry->ch = fChars[i];
733 entry->equal = translate.elementAti(((BuildCompactTrieNode *)fLinks[i])->fNodeID);
734#ifdef DEBUG_TRIE_DICT
735 if (entry->equal == 0) {
736 fprintf(stderr, "ERROR: horizontal link %d, logical node %d maps to physical node zero\n",
737 i, ((BuildCompactTrieNode *)fLinks[i])->fNodeID);
738 }
739#endif
740 offset += sizeof(CompactTrieHorizontalEntry);
741 }
742 }
743
744 void addNode(UChar ch, BuildCompactTrieNode *link, UErrorCode &status) {
745 fChars.append(ch);
746 fLinks.push(link, status);
747 }
748};
749
750class BuildCompactTrieVerticalNode: public BuildCompactTrieNode {
751 public:
752 BuildCompactTrieNode *fEqual;
753
754 public:
755 BuildCompactTrieVerticalNode(UBool parentEndsWord, UStack &nodes, UErrorCode &status)
756 : BuildCompactTrieNode(parentEndsWord, TRUE, nodes, status) {
757 fEqual = NULL;
758 }
759
760 virtual ~BuildCompactTrieVerticalNode() {
761 }
762
763 virtual uint32_t size() {
764 return offsetof(CompactTrieVerticalNode,chars) + (fChars.length()*sizeof(uint16_t));
765 }
766
767 virtual void write(uint8_t *bytes, uint32_t &offset, const UVector32 &translate) {
768 CompactTrieVerticalNode *node = (CompactTrieVerticalNode *)(bytes+offset);
769 BuildCompactTrieNode::write(bytes, offset, translate);
770 node->equal = translate.elementAti(fEqual->fNodeID);
771 offset += sizeof(node->equal);
772#ifdef DEBUG_TRIE_DICT
773 if (node->equal == 0) {
774 fprintf(stderr, "ERROR: vertical link, logical node %d maps to physical node zero\n",
775 fEqual->fNodeID);
776 }
777#endif
778 fChars.extract(0, fChars.length(), (UChar *)node->chars);
779 offset += sizeof(uint16_t)*fChars.length();
780 }
781
782 void addChar(UChar ch) {
783 fChars.append(ch);
784 }
785
786 void setLink(BuildCompactTrieNode *node) {
787 fEqual = node;
788 }
789};
790
791// Forward declaration
792static void walkHorizontal(const TernaryNode *node,
793 BuildCompactTrieHorizontalNode *building,
794 UStack &nodes,
795 UErrorCode &status);
796
797// Convert one node. Uses recursion.
798
799static BuildCompactTrieNode *
800compactOneNode(const TernaryNode *node, UBool parentEndsWord, UStack &nodes, UErrorCode &status) {
801 if (U_FAILURE(status)) {
802 return NULL;
803 }
804 BuildCompactTrieNode *result = NULL;
805 UBool horizontal = (node->low != NULL || node->high != NULL);
806 if (horizontal) {
807 BuildCompactTrieHorizontalNode *hResult =
808 new BuildCompactTrieHorizontalNode(parentEndsWord, nodes, status);
809 if (hResult == NULL) {
810 status = U_MEMORY_ALLOCATION_ERROR;
811 }
812 if (U_SUCCESS(status)) {
813 walkHorizontal(node, hResult, nodes, status);
814 result = hResult;
815 }
816 }
817 else {
818 BuildCompactTrieVerticalNode *vResult =
819 new BuildCompactTrieVerticalNode(parentEndsWord, nodes, status);
820 if (vResult == NULL) {
821 status = U_MEMORY_ALLOCATION_ERROR;
822 }
823 if (U_SUCCESS(status)) {
824 UBool endsWord = FALSE;
825 // Take up nodes until we end a word, or hit a node with < or > links
826 do {
827 vResult->addChar(node->ch);
828 endsWord = (node->flags & kEndsWord) != 0;
829 node = node->equal;
830 }
831 while(node != NULL && !endsWord && node->low == NULL && node->high == NULL);
832 if (node == NULL) {
833 if (!endsWord) {
834 status = U_ILLEGAL_ARGUMENT_ERROR; // Corrupt input trie
835 }
836 else {
837 vResult->setLink((BuildCompactTrieNode *)nodes[1]);
838 }
839 }
840 else {
841 vResult->setLink(compactOneNode(node, endsWord, nodes, status));
842 }
843 result = vResult;
844 }
845 }
846 return result;
847}
848
849// Walk the set of peers at the same level, to build a horizontal node.
850// Uses recursion.
851
852static void walkHorizontal(const TernaryNode *node,
853 BuildCompactTrieHorizontalNode *building,
854 UStack &nodes,
855 UErrorCode &status) {
856 while (U_SUCCESS(status) && node != NULL) {
857 if (node->low != NULL) {
858 walkHorizontal(node->low, building, nodes, status);
859 }
860 BuildCompactTrieNode *link = NULL;
861 if (node->equal != NULL) {
862 link = compactOneNode(node->equal, (node->flags & kEndsWord) != 0, nodes, status);
863 }
864 else if (node->flags & kEndsWord) {
865 link = (BuildCompactTrieNode *)nodes[1];
866 }
867 if (U_SUCCESS(status) && link != NULL) {
868 building->addNode(node->ch, link, status);
869 }
870 // Tail recurse manually instead of leaving it to the compiler.
871 //if (node->high != NULL) {
872 // walkHorizontal(node->high, building, nodes, status);
873 //}
874 node = node->high;
875 }
876}
877
878U_NAMESPACE_END
879U_CDECL_BEGIN
880static int32_t U_CALLCONV
881_sortBuildNodes(const void * /*context*/, const void *voidl, const void *voidr) {
882 BuildCompactTrieNode *left = *(BuildCompactTrieNode **)voidl;
883 BuildCompactTrieNode *right = *(BuildCompactTrieNode **)voidr;
884 // Check for comparing a node to itself, to avoid spurious duplicates
885 if (left == right) {
886 return 0;
887 }
888 // Most significant is type of node. Can never coalesce.
889 if (left->fVertical != right->fVertical) {
890 return left->fVertical - right->fVertical;
891 }
892 // Next, the "parent ends word" flag. If that differs, we cannot coalesce.
893 if (left->fParentEndsWord != right->fParentEndsWord) {
894 return left->fParentEndsWord - right->fParentEndsWord;
895 }
896 // Next, the string. If that differs, we can never coalesce.
897 int32_t result = left->fChars.compare(right->fChars);
898 if (result != 0) {
899 return result;
900 }
901 // We know they're both the same node type, so branch for the two cases.
902 if (left->fVertical) {
903 result = ((BuildCompactTrieVerticalNode *)left)->fEqual->fNodeID
904 - ((BuildCompactTrieVerticalNode *)right)->fEqual->fNodeID;
905 }
906 else {
907 // We need to compare the links vectors. They should be the
908 // same size because the strings were equal.
909 // We compare the node IDs instead of the pointers, to handle
910 // coalesced nodes.
911 BuildCompactTrieHorizontalNode *hleft, *hright;
912 hleft = (BuildCompactTrieHorizontalNode *)left;
913 hright = (BuildCompactTrieHorizontalNode *)right;
914 int32_t count = hleft->fLinks.size();
915 for (int32_t i = 0; i < count && result == 0; ++i) {
916 result = ((BuildCompactTrieNode *)(hleft->fLinks[i]))->fNodeID -
917 ((BuildCompactTrieNode *)(hright->fLinks[i]))->fNodeID;
918 }
919 }
920 // If they are equal to each other, mark them (speeds coalescing)
921 if (result == 0) {
922 left->fHasDuplicate = TRUE;
923 right->fHasDuplicate = TRUE;
924 }
925 return result;
926}
927U_CDECL_END
928U_NAMESPACE_BEGIN
929
930static void coalesceDuplicates(UStack &nodes, UErrorCode &status) {
931 // We sort the array of nodes to place duplicates next to each other
932 if (U_FAILURE(status)) {
933 return;
934 }
935 int32_t size = nodes.size();
936 void **array = (void **)uprv_malloc(sizeof(void *)*size);
937 if (array == NULL) {
938 status = U_MEMORY_ALLOCATION_ERROR;
939 return;
940 }
941 (void) nodes.toArray(array);
942
943 // Now repeatedly identify duplicates until there are no more
944 int32_t dupes = 0;
945 long passCount = 0;
946#ifdef DEBUG_TRIE_DICT
947 long totalDupes = 0;
948#endif
949 do {
950 BuildCompactTrieNode *node;
951 BuildCompactTrieNode *first = NULL;
952 BuildCompactTrieNode **p;
953 BuildCompactTrieNode **pFirst = NULL;
954 int32_t counter = size - 2;
955 // Sort the array, skipping nodes 0 and 1. Use quicksort for the first
956 // pass for speed. For the second and subsequent passes, we use stable
957 // (insertion) sort for two reasons:
958 // 1. The array is already mostly ordered, so we get better performance.
959 // 2. The way we find one and only one instance of a set of duplicates is to
960 // check that the node ID equals the array index. If we used an unstable
961 // sort for the second or later passes, it's possible that none of the
962 // duplicates would wind up with a node ID equal to its array index.
963 // The sort stability guarantees that, because as we coalesce more and
964 // more groups, the first element of the resultant group will be one of
965 // the first elements of the groups being coalesced.
966 // To use quicksort for the second and subsequent passes, we would have to
967 // find the minimum of the node numbers in a group, and set all the nodes
968 // in the group to that node number.
969 uprv_sortArray(array+2, counter, sizeof(void *), _sortBuildNodes, NULL, (passCount > 0), &status);
970 dupes = 0;
971 for (p = (BuildCompactTrieNode **)array + 2; counter > 0; --counter, ++p) {
972 node = *p;
973 if (node->fHasDuplicate) {
974 if (first == NULL) {
975 first = node;
976 pFirst = p;
977 }
978 else if (_sortBuildNodes(NULL, pFirst, p) != 0) {
979 // Starting a new run of dupes
980 first = node;
981 pFirst = p;
982 }
983 else if (node->fNodeID != first->fNodeID) {
984 // Slave one to the other, note duplicate
985 node->fNodeID = first->fNodeID;
986 dupes += 1;
987 }
988 }
989 else {
990 // This node has no dupes
991 first = NULL;
992 pFirst = NULL;
993 }
994 }
995 passCount += 1;
996#ifdef DEBUG_TRIE_DICT
997 totalDupes += dupes;
998 fprintf(stderr, "Trie node dupe removal, pass %d: %d nodes tagged\n", passCount, dupes);
999#endif
1000 }
1001 while (dupes > 0);
1002#ifdef DEBUG_TRIE_DICT
1003 fprintf(stderr, "Trie node dupe removal complete: %d tagged in %d passes\n", totalDupes, passCount);
1004#endif
1005
1006 // We no longer need the temporary array, as the nodes have all been marked appropriately.
1007 uprv_free(array);
1008}
1009
1010U_NAMESPACE_END
1011U_CDECL_BEGIN
1012static void U_CALLCONV _deleteBuildNode(void *obj) {
1013 delete (BuildCompactTrieNode *) obj;
1014}
1015U_CDECL_END
1016U_NAMESPACE_BEGIN
1017
1018CompactTrieHeader *
1019CompactTrieDictionary::compactMutableTrieDictionary( const MutableTrieDictionary &dict,
1020 UErrorCode &status ) {
1021 if (U_FAILURE(status)) {
1022 return NULL;
1023 }
1024#ifdef DEBUG_TRIE_DICT
1025 struct tms timing;
1026 struct tms previous;
1027 (void) ::times(&previous);
1028#endif
1029 UStack nodes(_deleteBuildNode, NULL, status); // Index of nodes
1030
1031 // Add node 0, used as the NULL pointer/sentinel.
1032 nodes.addElement((int32_t)0, status);
1033
1034 // Start by creating the special empty node we use to indicate that the parent
1035 // terminates a word. This must be node 1, because the builder assumes
1036 // that.
1037 if (U_FAILURE(status)) {
1038 return NULL;
1039 }
1040 BuildCompactTrieNode *terminal = new BuildCompactTrieNode(TRUE, FALSE, nodes, status);
1041 if (terminal == NULL) {
1042 status = U_MEMORY_ALLOCATION_ERROR;
1043 }
1044
1045 // This call does all the work of building the new trie structure. The root
1046 // will be node 2.
1047 BuildCompactTrieNode *root = compactOneNode(dict.fTrie, FALSE, nodes, status);
1048#ifdef DEBUG_TRIE_DICT
1049 (void) ::times(&timing);
1050 fprintf(stderr, "Compact trie built, %d nodes, time user %f system %f\n",
1051 nodes.size(), (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK,
1052 (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK);
1053 previous = timing;
1054#endif
1055
1056 // Now coalesce all duplicate nodes.
1057 coalesceDuplicates(nodes, status);
1058#ifdef DEBUG_TRIE_DICT
1059 (void) ::times(&timing);
1060 fprintf(stderr, "Duplicates coalesced, time user %f system %f\n",
1061 (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK,
1062 (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK);
1063 previous = timing;
1064#endif
1065
1066 // Next, build the output trie.
1067 // First we compute all the sizes and build the node ID translation table.
1068 uint32_t totalSize = offsetof(CompactTrieHeader,offsets);
1069 int32_t count = nodes.size();
1070 int32_t nodeCount = 1; // The sentinel node we already have
1071 BuildCompactTrieNode *node;
1072 UVector32 translate(count, status); // Should be no growth needed after this
1073 translate.push(0, status); // The sentinel node
1074
1075 if (U_FAILURE(status)) {
1076 return NULL;
1077 }
1078
1079 for (int32_t i = 1; i < count; ++i) {
1080 node = (BuildCompactTrieNode *)nodes[i];
1081 if (node->fNodeID == i) {
1082 // Only one node out of each duplicate set is used
1083 if (i >= translate.size()) {
1084 // Logically extend the mapping table
1085 translate.setSize(i+1);
1086 }
1087 translate.setElementAt(nodeCount++, i);
1088 totalSize += node->size();
1089 }
1090 }
1091
1092 // Check for overflowing 16 bits worth of nodes.
1093 if (nodeCount > 0x10000) {
1094 status = U_ILLEGAL_ARGUMENT_ERROR;
1095 return NULL;
1096 }
1097
1098 // Add enough room for the offsets.
1099 totalSize += nodeCount*sizeof(uint32_t);
1100#ifdef DEBUG_TRIE_DICT
1101 (void) ::times(&timing);
1102 fprintf(stderr, "Sizes/mapping done, time user %f system %f\n",
1103 (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK,
1104 (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK);
1105 previous = timing;
1106 fprintf(stderr, "%d nodes, %d unique, %d bytes\n", nodes.size(), nodeCount, totalSize);
1107#endif
1108 uint8_t *bytes = (uint8_t *)uprv_malloc(totalSize);
1109 if (bytes == NULL) {
1110 status = U_MEMORY_ALLOCATION_ERROR;
1111 return NULL;
1112 }
1113
1114 CompactTrieHeader *header = (CompactTrieHeader *)bytes;
1115 header->size = totalSize;
1116 header->nodeCount = nodeCount;
1117 header->offsets[0] = 0; // Sentinel
1118 header->root = translate.elementAti(root->fNodeID);
1119#ifdef DEBUG_TRIE_DICT
1120 if (header->root == 0) {
1121 fprintf(stderr, "ERROR: root node %d translate to physical zero\n", root->fNodeID);
1122 }
1123#endif
1124 uint32_t offset = offsetof(CompactTrieHeader,offsets)+(nodeCount*sizeof(uint32_t));
1125 nodeCount = 1;
1126 // Now write the data
1127 for (int32_t i = 1; i < count; ++i) {
1128 node = (BuildCompactTrieNode *)nodes[i];
1129 if (node->fNodeID == i) {
1130 header->offsets[nodeCount++] = offset;
1131 node->write(bytes, offset, translate);
1132 }
1133 }
1134#ifdef DEBUG_TRIE_DICT
1135 (void) ::times(&timing);
1136 fprintf(stderr, "Trie built, time user %f system %f\n",
1137 (double)(timing.tms_utime-previous.tms_utime)/CLK_TCK,
1138 (double)(timing.tms_stime-previous.tms_stime)/CLK_TCK);
1139 previous = timing;
1140 fprintf(stderr, "Final offset is %d\n", offset);
1141
1142 // Collect statistics on node types and sizes
1143 int hCount = 0;
1144 int vCount = 0;
1145 size_t hSize = 0;
1146 size_t vSize = 0;
1147 size_t hItemCount = 0;
1148 size_t vItemCount = 0;
1149 uint32_t previousOff = offset;
1150 for (uint16_t i = nodeCount-1; i >= 2; --i) {
1151 const CompactTrieNode *node = getCompactNode(header, i);
1152 if (node->flagscount & kVerticalNode) {
1153 vCount += 1;
1154 vItemCount += (node->flagscount & kCountMask);
1155 vSize += previousOff-header->offsets[i];
1156 }
1157 else {
1158 hCount += 1;
1159 hItemCount += (node->flagscount & kCountMask);
1160 hSize += previousOff-header->offsets[i];
1161 }
1162 previousOff = header->offsets[i];
1163 }
1164 fprintf(stderr, "Horizontal nodes: %d total, average %f bytes with %f items\n", hCount,
1165 (double)hSize/hCount, (double)hItemCount/hCount);
1166 fprintf(stderr, "Vertical nodes: %d total, average %f bytes with %f items\n", vCount,
1167 (double)vSize/vCount, (double)vItemCount/vCount);
1168#endif
1169
1170 if (U_FAILURE(status)) {
1171 uprv_free(bytes);
1172 header = NULL;
1173 }
1174 else {
1175 header->magic = COMPACT_TRIE_MAGIC_1;
1176 }
1177 return header;
1178}
1179
1180// Forward declaration
1181static TernaryNode *
1182unpackOneNode( const CompactTrieHeader *header, const CompactTrieNode *node, UErrorCode &status );
1183
1184
1185// Convert a horizontal node (or subarray thereof) into a ternary subtrie
1186static TernaryNode *
1187unpackHorizontalArray( const CompactTrieHeader *header, const CompactTrieHorizontalEntry *array,
1188 int low, int high, UErrorCode &status ) {
1189 if (U_FAILURE(status) || low > high) {
1190 return NULL;
1191 }
1192 int middle = (low+high)/2;
1193 TernaryNode *result = new TernaryNode(array[middle].ch);
1194 if (result == NULL) {
1195 status = U_MEMORY_ALLOCATION_ERROR;
1196 return NULL;
1197 }
1198 const CompactTrieNode *equal = getCompactNode(header, array[middle].equal);
1199 if (equal->flagscount & kParentEndsWord) {
1200 result->flags |= kEndsWord;
1201 }
1202 result->low = unpackHorizontalArray(header, array, low, middle-1, status);
1203 result->high = unpackHorizontalArray(header, array, middle+1, high, status);
1204 result->equal = unpackOneNode(header, equal, status);
1205 return result;
1206}
1207
1208// Convert one compact trie node into a ternary subtrie
1209static TernaryNode *
1210unpackOneNode( const CompactTrieHeader *header, const CompactTrieNode *node, UErrorCode &status ) {
1211 int nodeCount = (node->flagscount & kCountMask);
1212 if (nodeCount == 0 || U_FAILURE(status)) {
1213 // Failure, or terminal node
1214 return NULL;
1215 }
1216 if (node->flagscount & kVerticalNode) {
1217 const CompactTrieVerticalNode *vnode = (const CompactTrieVerticalNode *)node;
1218 TernaryNode *head = NULL;
1219 TernaryNode *previous = NULL;
1220 TernaryNode *latest = NULL;
1221 for (int i = 0; i < nodeCount; ++i) {
1222 latest = new TernaryNode(vnode->chars[i]);
1223 if (latest == NULL) {
1224 status = U_MEMORY_ALLOCATION_ERROR;
1225 break;
1226 }
1227 if (head == NULL) {
1228 head = latest;
1229 }
1230 if (previous != NULL) {
1231 previous->equal = latest;
1232 }
1233 previous = latest;
1234 }
1235 if (latest != NULL) {
1236 const CompactTrieNode *equal = getCompactNode(header, vnode->equal);
1237 if (equal->flagscount & kParentEndsWord) {
1238 latest->flags |= kEndsWord;
1239 }
1240 latest->equal = unpackOneNode(header, equal, status);
1241 }
1242 return head;
1243 }
1244 else {
1245 // Horizontal node
1246 const CompactTrieHorizontalNode *hnode = (const CompactTrieHorizontalNode *)node;
1247 return unpackHorizontalArray(header, &hnode->entries[0], 0, nodeCount-1, status);
1248 }
1249}
1250
1251MutableTrieDictionary *
1252CompactTrieDictionary::cloneMutable( UErrorCode &status ) const {
1253 MutableTrieDictionary *result = new MutableTrieDictionary( status );
1254 if (result == NULL) {
1255 status = U_MEMORY_ALLOCATION_ERROR;
1256 return NULL;
1257 }
1258 TernaryNode *root = unpackOneNode(fData, getCompactNode(fData, fData->root), status);
1259 if (U_FAILURE(status)) {
1260 delete root; // Clean up
1261 delete result;
1262 return NULL;
1263 }
1264 result->fTrie = root;
1265 return result;
1266}
1267
1268U_NAMESPACE_END
1269
1270U_CAPI int32_t U_EXPORT2
1271triedict_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outData,
1272 UErrorCode *status) {
1273
1274 if (status == NULL || U_FAILURE(*status)) {
1275 return 0;
1276 }
1277 if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
1278 *status=U_ILLEGAL_ARGUMENT_ERROR;
1279 return 0;
1280 }
1281
1282 //
1283 // Check that the data header is for for dictionary data.
1284 // (Header contents are defined in genxxx.cpp)
1285 //
1286 const UDataInfo *pInfo = (const UDataInfo *)((const uint8_t *)inData+4);
1287 if(!( pInfo->dataFormat[0]==0x54 && /* dataFormat="TrDc" */
1288 pInfo->dataFormat[1]==0x72 &&
1289 pInfo->dataFormat[2]==0x44 &&
1290 pInfo->dataFormat[3]==0x63 &&
1291 pInfo->formatVersion[0]==1 )) {
1292 udata_printError(ds, "triedict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n",
1293 pInfo->dataFormat[0], pInfo->dataFormat[1],
1294 pInfo->dataFormat[2], pInfo->dataFormat[3],
1295 pInfo->formatVersion[0]);
1296 *status=U_UNSUPPORTED_ERROR;
1297 return 0;
1298 }
1299
1300 //
1301 // Swap the data header. (This is the generic ICU Data Header, not the
1302 // CompactTrieHeader). This swap also conveniently gets us
1303 // the size of the ICU d.h., which lets us locate the start
1304 // of the RBBI specific data.
1305 //
1306 int32_t headerSize=udata_swapDataHeader(ds, inData, length, outData, status);
1307
1308 //
1309 // Get the CompactTrieHeader, and check that it appears to be OK.
1310 //
1311 const uint8_t *inBytes =(const uint8_t *)inData+headerSize;
1312 const CompactTrieHeader *header = (const CompactTrieHeader *)inBytes;
1313 if (ds->readUInt32(header->magic) != COMPACT_TRIE_MAGIC_1
1314 || ds->readUInt32(header->size) < sizeof(CompactTrieHeader))
1315 {
1316 udata_printError(ds, "triedict_swap(): CompactTrieHeader is invalid.\n");
1317 *status=U_UNSUPPORTED_ERROR;
1318 return 0;
1319 }
1320
1321 //
1322 // Prefight operation? Just return the size
1323 //
1324 uint32_t totalSize = ds->readUInt32(header->size);
1325 int32_t sizeWithUData = (int32_t)totalSize + headerSize;
1326 if (length < 0) {
1327 return sizeWithUData;
1328 }
1329
1330 //
1331 // Check that length passed in is consistent with length from RBBI data header.
1332 //
1333 if (length < sizeWithUData) {
1334 udata_printError(ds, "triedict_swap(): too few bytes (%d after ICU Data header) for trie data.\n",
1335 totalSize);
1336 *status=U_INDEX_OUTOFBOUNDS_ERROR;
1337 return 0;
1338 }
1339
1340 //
1341 // Swap the Data. Do the data itself first, then the CompactTrieHeader, because
1342 // we need to reference the header to locate the data, and an
1343 // inplace swap of the header leaves it unusable.
1344 //
1345 uint8_t *outBytes = (uint8_t *)outData + headerSize;
1346 CompactTrieHeader *outputHeader = (CompactTrieHeader *)outBytes;
1347
1348#if 0
1349 //
1350 // If not swapping in place, zero out the output buffer before starting.
1351 //
1352 if (inBytes != outBytes) {
1353 uprv_memset(outBytes, 0, totalSize);
1354 }
1355
1356 // We need to loop through all the nodes in the offset table, and swap each one.
1357 uint16_t nodeCount = ds->readUInt16(header->nodeCount);
1358 // Skip node 0, which should always be 0.
1359 for (int i = 1; i < nodeCount; ++i) {
1360 uint32_t nodeOff = ds->readUInt32(header->offsets[i]);
1361 const CompactTrieNode *inNode = (const CompactTrieNode *)(inBytes + nodeOff);
1362 CompactTrieNode *outNode = (CompactTrieNode *)(outBytes + nodeOff);
1363 uint16_t flagscount = ds->readUInt16(inNode->flagscount);
1364 uint16_t itemCount = flagscount & kCountMask;
1365 ds->writeUInt16(&outNode->flagscount, flagscount);
1366 if (itemCount > 0) {
1367 if (flagscount & kVerticalNode) {
1368 ds->swapArray16(ds, inBytes+nodeOff+offsetof(CompactTrieVerticalNode,chars),
1369 itemCount*sizeof(uint16_t),
1370 outBytes+nodeOff+offsetof(CompactTrieVerticalNode,chars), status);
1371 uint16_t equal = ds->readUInt16(inBytes+nodeOff+offsetof(CompactTrieVerticalNode,equal);
1372 ds->writeUInt16(outBytes+nodeOff+offsetof(CompactTrieVerticalNode,equal));
1373 }
1374 else {
1375 const CompactTrieHorizontalNode *inHNode = (const CompactTrieHorizontalNode *)inNode;
1376 CompactTrieHorizontalNode *outHNode = (CompactTrieHorizontalNode *)outNode;
1377 for (int j = 0; j < itemCount; ++j) {
1378 uint16_t word = ds->readUInt16(inHNode->entries[j].ch);
1379 ds->writeUInt16(&outHNode->entries[j].ch, word);
1380 word = ds->readUInt16(inHNode->entries[j].equal);
1381 ds->writeUInt16(&outHNode->entries[j].equal, word);
1382 }
1383 }
1384 }
1385 }
1386#endif
1387
1388 // All the data in all the nodes consist of 16 bit items. Swap them all at once.
1389 uint16_t nodeCount = ds->readUInt16(header->nodeCount);
1390 uint32_t nodesOff = offsetof(CompactTrieHeader,offsets)+((uint32_t)nodeCount*sizeof(uint32_t));
1391 ds->swapArray16(ds, inBytes+nodesOff, totalSize-nodesOff, outBytes+nodesOff, status);
1392
1393 // Swap the header
1394 ds->writeUInt32(&outputHeader->size, totalSize);
1395 uint32_t magic = ds->readUInt32(header->magic);
1396 ds->writeUInt32(&outputHeader->magic, magic);
1397 ds->writeUInt16(&outputHeader->nodeCount, nodeCount);
1398 uint16_t root = ds->readUInt16(header->root);
1399 ds->writeUInt16(&outputHeader->root, root);
1400 ds->swapArray32(ds, inBytes+offsetof(CompactTrieHeader,offsets),
1401 sizeof(uint32_t)*(int32_t)nodeCount,
1402 outBytes+offsetof(CompactTrieHeader,offsets), status);
1403
1404 return sizeWithUData;
1405}
1406
1407#endif /* #if !UCONFIG_NO_BREAK_ITERATION */