]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | // |
2 | // rbbisetb.h | |
3 | /* | |
4 | ********************************************************************** | |
374ca955 | 5 | * Copyright (c) 2001-2004, International Business Machines |
b75a7d8f A |
6 | * Corporation and others. All Rights Reserved. |
7 | ********************************************************************** | |
8 | */ | |
9 | ||
10 | #ifndef RBBISETB_H | |
11 | #define RBBISETB_H | |
12 | ||
13 | #include "unicode/utypes.h" | |
14 | #include "unicode/uobject.h" | |
15 | #include "rbbirb.h" | |
16 | #include "uvector.h" | |
b75a7d8f A |
17 | |
18 | struct UNewTrie; | |
19 | ||
20 | U_NAMESPACE_BEGIN | |
21 | ||
22 | // | |
23 | // RBBISetBuilder Derives the character categories used by the runtime RBBI engine | |
24 | // from the Unicode Sets appearing in the source RBBI rules, and | |
25 | // creates the TRIE table used to map from Unicode to the | |
26 | // character categories. | |
27 | // | |
28 | ||
29 | ||
30 | // | |
31 | // RangeDescriptor | |
32 | // | |
33 | // Each of the non-overlapping character ranges gets one of these descriptors. | |
34 | // All of them are strung together in a linked list, which is kept in order | |
35 | // (by character) | |
36 | // | |
37 | class RangeDescriptor : public UMemory { | |
38 | public: | |
39 | UChar32 fStartChar; // Start of range, unicode 32 bit value. | |
40 | UChar32 fEndChar; // End of range, unicode 32 bit value. | |
41 | int32_t fNum; // runtime-mapped input value for this range. | |
42 | UVector *fIncludesSets; // vector of the the original | |
43 | // Unicode sets that include this range. | |
44 | // (Contains ptrs to uset nodes) | |
45 | RangeDescriptor *fNext; // Next RangeDescriptor in the linked list. | |
46 | ||
47 | RangeDescriptor(UErrorCode &status); | |
48 | RangeDescriptor(const RangeDescriptor &other, UErrorCode &status); | |
49 | ~RangeDescriptor(); | |
50 | void split(UChar32 where, UErrorCode &status); // Spit this range in two at "where", with | |
51 | // where appearing in the second (higher) part. | |
52 | void setDictionaryFlag(); // Check whether this range appears as part of | |
53 | // the Unicode set named "dictionary" | |
54 | ||
55 | private: | |
56 | RangeDescriptor(const RangeDescriptor &other); // forbid copying of this class | |
57 | RangeDescriptor &operator=(const RangeDescriptor &other); // forbid copying of this class | |
58 | }; | |
59 | ||
60 | ||
61 | // | |
62 | // RBBISetBuilder Handles processing of Unicode Sets from RBBI rules. | |
63 | // | |
64 | // Starting with the rules parse tree from the scanner, | |
65 | // | |
66 | // - Enumerate the set of UnicodeSets that are referenced | |
67 | // by the RBBI rules. | |
68 | // - compute a derived set of non-overlapping UnicodeSets | |
69 | // that will correspond to columns in the state table for | |
70 | // the RBBI execution engine. | |
71 | // - construct the trie table that maps input characters | |
72 | // to set numbers in the non-overlapping set of sets. | |
73 | // | |
74 | ||
75 | ||
76 | class RBBISetBuilder : public UMemory { | |
77 | public: | |
78 | RBBISetBuilder(RBBIRuleBuilder *rb); | |
79 | ~RBBISetBuilder(); | |
80 | ||
81 | void build(); | |
82 | void addValToSets(UVector *sets, uint32_t val); | |
374ca955 | 83 | int32_t getNumCharCategories() const; // CharCategories are the same as input symbol set to the |
b75a7d8f A |
84 | // runtime state machine, which are the same as |
85 | // columns in the DFA state table | |
374ca955 | 86 | int32_t getTrieSize() /*const*/; // Size in bytes of the serialized Trie. |
b75a7d8f | 87 | void serializeTrie(uint8_t *where); // write out the serialized Trie. |
374ca955 A |
88 | UChar32 getFirstChar(int32_t val) const; |
89 | #ifdef RBBI_DEBUG | |
b75a7d8f A |
90 | void printSets(); |
91 | void printRanges(); | |
92 | void printRangeGroups(); | |
374ca955 A |
93 | #else |
94 | #define printSets() | |
95 | #define printRanges() | |
96 | #define printRangeGroups() | |
97 | #endif | |
b75a7d8f A |
98 | |
99 | private: | |
100 | void numberSets(); | |
101 | ||
102 | RBBIRuleBuilder *fRB; // The RBBI Rule Compiler that owns us. | |
103 | UErrorCode *fStatus; | |
104 | ||
105 | RangeDescriptor *fRangeList; // Head of the linked list of RangeDescriptors | |
106 | ||
107 | UNewTrie *fTrie; // The mapping TRIE that is the end result of processing | |
108 | uint32_t fTrieSize; // the Unicode Sets. | |
109 | ||
110 | // Groups correspond to character categories - | |
111 | // groups of ranges that are in the same original UnicodeSets. | |
112 | // fGroupCount is the index of the last used group. | |
113 | // The value is also the number of columns in the RBBI state table being compiled. | |
114 | // Index 0 is not used. Funny counting. | |
115 | int32_t fGroupCount; | |
116 | ||
117 | RBBISetBuilder(const RBBISetBuilder &other); // forbid copying of this class | |
118 | RBBISetBuilder &operator=(const RBBISetBuilder &other); // forbid copying of this class | |
119 | }; | |
120 | ||
121 | ||
122 | ||
123 | U_NAMESPACE_END | |
124 | #endif |