]>
git.saurik.com Git - apple/icu.git/blob - icuSources/common/rbbisetb57.h
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (c) 2001-2005, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
9 **********************************************************************
10 * Legacy version of RangeDescriptor and RBBISetBuilder from ICU 57,
11 * only for use by Apple RuleBasedTokenizer
12 **********************************************************************
18 #include "unicode/utypes.h"
19 #include "unicode/uobject.h"
28 class RBBIRuleBuilder57
;
31 // RBBISetBuilder57 Derives the character categories used by the runtime RBBI engine
32 // from the Unicode Sets appearing in the source RBBI rules, and
33 // creates the TRIE table used to map from Unicode to the
34 // character categories.
41 // Each of the non-overlapping character ranges gets one of these descriptors.
42 // All of them are strung together in a linked list, which is kept in order
45 class RangeDescriptor57
: public UMemory
{
47 UChar32 fStartChar
; // Start of range, unicode 32 bit value.
48 UChar32 fEndChar
; // End of range, unicode 32 bit value.
49 int32_t fNum
; // runtime-mapped input value for this range.
50 UVector
*fIncludesSets
; // vector of the the original
51 // Unicode sets that include this range.
52 // (Contains ptrs to uset nodes)
53 RangeDescriptor57
*fNext
; // Next RangeDescriptor57 in the linked list.
55 RangeDescriptor57(UErrorCode
&status
);
56 RangeDescriptor57(const RangeDescriptor57
&other
, UErrorCode
&status
);
58 void split(UChar32 where
, UErrorCode
&status
); // Spit this range in two at "where", with
59 // where appearing in the second (higher) part.
60 void setDictionaryFlag(); // Check whether this range appears as part of
61 // the Unicode set named "dictionary"
64 RangeDescriptor57(const RangeDescriptor57
&other
); // forbid copying of this class
65 RangeDescriptor57
&operator=(const RangeDescriptor57
&other
); // forbid copying of this class
70 // RBBISetBuilder57 Handles processing of Unicode Sets from RBBI rules.
72 // Starting with the rules parse tree from the scanner,
74 // - Enumerate the set of UnicodeSets that are referenced
76 // - compute a derived set of non-overlapping UnicodeSets
77 // that will correspond to columns in the state table for
78 // the RBBI execution engine.
79 // - construct the trie table that maps input characters
80 // to set numbers in the non-overlapping set of sets.
84 class RBBISetBuilder57
: public UMemory
{
86 RBBISetBuilder57(RBBIRuleBuilder57
*rb
);
90 void addValToSets(UVector
*sets
, uint32_t val
);
91 void addValToSet (RBBINode
*usetNode
, uint32_t val
);
92 int32_t getNumCharCategories() const; // CharCategories are the same as input symbol set to the
93 // runtime state machine, which are the same as
94 // columns in the DFA state table
95 int32_t getTrieSize() /*const*/; // Size in bytes of the serialized Trie.
96 void serializeTrie(uint8_t *where
); // write out the serialized Trie.
97 UChar32
getFirstChar(int32_t val
) const;
98 UBool
sawBOF() const; // Indicate whether any references to the {bof} pseudo
99 // character were encountered.
103 void printRangeGroups();
106 #define printRanges()
107 #define printRangeGroups()
113 RBBIRuleBuilder57
*fRB
; // The RBBI Rule Compiler that owns us.
116 RangeDescriptor57
*fRangeList
; // Head of the linked list of RangeDescriptors
118 UNewTrie
*fTrie
; // The mapping TRIE that is the end result of processing
119 uint32_t fTrieSize
; // the Unicode Sets.
121 // Groups correspond to character categories -
122 // groups of ranges that are in the same original UnicodeSets.
123 // fGroupCount is the index of the last used group.
124 // fGroupCount+1 is also the number of columns in the RBBI state table being compiled.
125 // State table column 0 is not used. Column 1 is for end-of-input.
126 // column 2 is for group 0. Funny counting.
131 RBBISetBuilder57(const RBBISetBuilder57
&other
); // forbid copying of this class
132 RBBISetBuilder57
&operator=(const RBBISetBuilder57
&other
); // forbid copying of this class