4 // Copyright (C) 2002-2003, International Business Machines Corporation and others.
5 // All Rights Reserved.
7 // This file contains the RBBIRuleBuilder class implementation. This is the main class for
8 // building (compiling) break rules into the tables required by the runtime
12 #include "unicode/utypes.h"
14 #if !UCONFIG_NO_BREAK_ITERATION
16 #include "unicode/brkiter.h"
17 #include "unicode/rbbi.h"
18 #include "unicode/ubrk.h"
19 #include "unicode/unistr.h"
20 #include "unicode/uniset.h"
21 #include "unicode/uchar.h"
22 #include "unicode/uchriter.h"
23 #include "unicode/parsepos.h"
24 #include "unicode/parseerr.h"
40 //----------------------------------------------------------------------------------------
44 //----------------------------------------------------------------------------------------
45 RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString
&rules
,
46 UParseError
&parseErr
,
51 fParseError
= &parseErr
;
54 fDebugEnv
= getenv("U_RBBIDEBUG");
60 fForwardTables
= NULL
;
61 fReverseTables
= NULL
;
62 fUSetNodes
= new UVector(status
);
63 fScanner
= new RBBIRuleScanner(this);
64 fSetBuilder
= new RBBISetBuilder(this);
65 if(fSetBuilder
== 0 || fScanner
== 0 || fUSetNodes
== 0) {
66 status
= U_MEMORY_ALLOCATION_ERROR
;
72 //----------------------------------------------------------------------------------------
76 //----------------------------------------------------------------------------------------
77 RBBIRuleBuilder::~RBBIRuleBuilder() {
81 RBBINode
*n
= (RBBINode
*)fUSetNodes
->elementAt(i
);
90 delete fForwardTables
;
91 delete fReverseTables
;
101 //----------------------------------------------------------------------------------------
103 // flattenData() - Collect up the compiled RBBI rule data and put it into
104 // the format for saving in ICU data files,
105 // which is also the format needed by the RBBI runtime engine.
107 //----------------------------------------------------------------------------------------
108 static int32_t align8(int32_t i
) {return (i
+7) & 0xfffffff8;}
110 RBBIDataHeader
*RBBIRuleBuilder::flattenData() {
111 if (U_FAILURE(*fStatus
)) {
115 // Remove comments and whitespace from the rules to make it smaller.
116 UnicodeString
strippedRules(RBBIRuleScanner::stripRules(fRules
));
118 // Calculate the size of each section in the data.
119 // Sizes here are padded up to a multiple of 8 for better memory alignment.
120 // Sections sizes actually stored in the header are for the actual data
121 // without the padding.
123 int32_t headerSize
= align8(sizeof(RBBIDataHeader
));
124 int32_t forwardTableSize
= align8(fForwardTables
->getTableSize());
125 int32_t reverseTableSize
= align8(fReverseTables
->getTableSize());
126 int32_t trieSize
= align8(fSetBuilder
->getTrieSize());
127 int32_t rulesSize
= align8((strippedRules
.length()+1) * sizeof(UChar
));
129 int32_t totalSize
= headerSize
+ forwardTableSize
+ reverseTableSize
130 + trieSize
+ rulesSize
;
131 RBBIDataHeader
*data
= (RBBIDataHeader
*)uprv_malloc(totalSize
);
133 *fStatus
= U_MEMORY_ALLOCATION_ERROR
;
136 uprv_memset(data
, 0, totalSize
);
139 data
->fMagic
= 0xb1a0;
141 data
->fLength
= totalSize
;
142 data
->fCatCount
= fSetBuilder
->getNumCharCategories();
144 data
->fFTable
= headerSize
;
145 data
->fFTableLen
= forwardTableSize
;
146 data
->fRTable
= data
->fFTable
+ forwardTableSize
;
147 data
->fRTableLen
= reverseTableSize
;
148 data
->fTrie
= data
->fRTable
+ reverseTableSize
;
149 data
->fTrieLen
= fSetBuilder
->getTrieSize();
150 data
->fRuleSource
= data
->fTrie
+ trieSize
;
151 data
->fRuleSourceLen
= strippedRules
.length() * sizeof(UChar
);
153 uprv_memset(data
->fReserved
, 0, sizeof(data
->fReserved
));
155 fForwardTables
->exportTable((uint8_t *)data
+ data
->fFTable
);
156 fReverseTables
->exportTable((uint8_t *)data
+ data
->fRTable
);
157 fSetBuilder
->serializeTrie ((uint8_t *)data
+ data
->fTrie
);
158 strippedRules
.extract((UChar
*)((uint8_t *)data
+data
->fRuleSource
), rulesSize
/2+1, *fStatus
);
168 //----------------------------------------------------------------------------------------
170 // createRuleBasedBreakIterator construct from source rules that are passed in
171 // in a UnicodeString
173 //----------------------------------------------------------------------------------------
175 RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString
&rules
,
176 UParseError
&parseError
,
179 if (U_FAILURE(status
)) {
184 // Read the input rules, generate a parse tree, symbol table,
185 // and list of all Unicode Sets referenced by the rules.
187 RBBIRuleBuilder
builder(rules
, parseError
, status
);
188 builder
.fScanner
->parse();
189 if (U_FAILURE(status
)) {
194 // UnicodeSet processing.
195 // Munge the Unicode Sets to create a set of character categories.
196 // Generate the mapping tables (TRIE) from input 32-bit characters to
197 // the character categories.
199 builder
.fSetBuilder
->build();
203 // Generate the DFA state transition table.
205 builder
.fForwardTables
= new RBBITableBuilder(&builder
, &builder
.fForwardTree
);
206 builder
.fReverseTables
= new RBBITableBuilder(&builder
, &builder
.fReverseTree
);
207 if(builder
.fForwardTables
== NULL
|| builder
.fReverseTables
== NULL
) {
208 status
= U_MEMORY_ALLOCATION_ERROR
;
212 builder
.fForwardTables
->build();
213 builder
.fReverseTables
->build();
214 if (U_FAILURE(status
)) {
220 // Package up the compiled data into a memory image
221 // in the run-time format.
223 RBBIDataHeader
*data
;
224 data
= builder
.flattenData();
228 // Clean up the compiler related stuff
233 // Create a break iterator from the compiled rules.
234 // (Identical to creation from stored pre-compiled rules)
236 RuleBasedBreakIterator
*This
= new RuleBasedBreakIterator(data
, status
);
239 status
= U_MEMORY_ALLOCATION_ERROR
;
243 if (U_FAILURE(status
)) {
252 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */