]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/common/rbbisetb.h
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / common / rbbisetb.h
... / ...
CommitLineData
1//
2// rbbisetb.h
3/*
4**********************************************************************
5* Copyright (c) 2001-2004, International Business Machines
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8*/
9
10#ifndef RBBISETB_H
11#define RBBISETB_H
12
13#include "unicode/utypes.h"
14#include "unicode/uobject.h"
15#include "rbbirb.h"
16#include "uvector.h"
17
18struct UNewTrie;
19
20U_NAMESPACE_BEGIN
21
22//
23// RBBISetBuilder Derives the character categories used by the runtime RBBI engine
24// from the Unicode Sets appearing in the source RBBI rules, and
25// creates the TRIE table used to map from Unicode to the
26// character categories.
27//
28
29
30//
31// RangeDescriptor
32//
33// Each of the non-overlapping character ranges gets one of these descriptors.
34// All of them are strung together in a linked list, which is kept in order
35// (by character)
36//
37class RangeDescriptor : public UMemory {
38public:
39 UChar32 fStartChar; // Start of range, unicode 32 bit value.
40 UChar32 fEndChar; // End of range, unicode 32 bit value.
41 int32_t fNum; // runtime-mapped input value for this range.
42 UVector *fIncludesSets; // vector of the the original
43 // Unicode sets that include this range.
44 // (Contains ptrs to uset nodes)
45 RangeDescriptor *fNext; // Next RangeDescriptor in the linked list.
46
47 RangeDescriptor(UErrorCode &status);
48 RangeDescriptor(const RangeDescriptor &other, UErrorCode &status);
49 ~RangeDescriptor();
50 void split(UChar32 where, UErrorCode &status); // Spit this range in two at "where", with
51 // where appearing in the second (higher) part.
52 void setDictionaryFlag(); // Check whether this range appears as part of
53 // the Unicode set named "dictionary"
54
55private:
56 RangeDescriptor(const RangeDescriptor &other); // forbid copying of this class
57 RangeDescriptor &operator=(const RangeDescriptor &other); // forbid copying of this class
58};
59
60
61//
62// RBBISetBuilder Handles processing of Unicode Sets from RBBI rules.
63//
64// Starting with the rules parse tree from the scanner,
65//
66// - Enumerate the set of UnicodeSets that are referenced
67// by the RBBI rules.
68// - compute a derived set of non-overlapping UnicodeSets
69// that will correspond to columns in the state table for
70// the RBBI execution engine.
71// - construct the trie table that maps input characters
72// to set numbers in the non-overlapping set of sets.
73//
74
75
76class RBBISetBuilder : public UMemory {
77public:
78 RBBISetBuilder(RBBIRuleBuilder *rb);
79 ~RBBISetBuilder();
80
81 void build();
82 void addValToSets(UVector *sets, uint32_t val);
83 int32_t getNumCharCategories() const; // CharCategories are the same as input symbol set to the
84 // runtime state machine, which are the same as
85 // columns in the DFA state table
86 int32_t getTrieSize() /*const*/; // Size in bytes of the serialized Trie.
87 void serializeTrie(uint8_t *where); // write out the serialized Trie.
88 UChar32 getFirstChar(int32_t val) const;
89#ifdef RBBI_DEBUG
90 void printSets();
91 void printRanges();
92 void printRangeGroups();
93#else
94 #define printSets()
95 #define printRanges()
96 #define printRangeGroups()
97#endif
98
99private:
100 void numberSets();
101
102 RBBIRuleBuilder *fRB; // The RBBI Rule Compiler that owns us.
103 UErrorCode *fStatus;
104
105 RangeDescriptor *fRangeList; // Head of the linked list of RangeDescriptors
106
107 UNewTrie *fTrie; // The mapping TRIE that is the end result of processing
108 uint32_t fTrieSize; // the Unicode Sets.
109
110 // Groups correspond to character categories -
111 // groups of ranges that are in the same original UnicodeSets.
112 // fGroupCount is the index of the last used group.
113 // The value is also the number of columns in the RBBI state table being compiled.
114 // Index 0 is not used. Funny counting.
115 int32_t fGroupCount;
116
117 RBBISetBuilder(const RBBISetBuilder &other); // forbid copying of this class
118 RBBISetBuilder &operator=(const RBBISetBuilder &other); // forbid copying of this class
119};
120
121
122
123U_NAMESPACE_END
124#endif