]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/rbbirb.cpp
ICU-57163.0.1.tar.gz
[apple/icu.git] / icuSources / common / rbbirb.cpp
CommitLineData
b75a7d8f
A
1//
2// file: rbbirb.cpp
3//
4388f060 4// Copyright (C) 2002-2011, International Business Machines Corporation and others.
b75a7d8f
A
5// All Rights Reserved.
6//
7// This file contains the RBBIRuleBuilder class implementation. This is the main class for
8// building (compiling) break rules into the tables required by the runtime
9// RBBI engine.
10//
11
12#include "unicode/utypes.h"
13
14#if !UCONFIG_NO_BREAK_ITERATION
15
16#include "unicode/brkiter.h"
17#include "unicode/rbbi.h"
18#include "unicode/ubrk.h"
19#include "unicode/unistr.h"
20#include "unicode/uniset.h"
21#include "unicode/uchar.h"
22#include "unicode/uchriter.h"
23#include "unicode/parsepos.h"
24#include "unicode/parseerr.h"
25#include "cmemory.h"
26#include "cstring.h"
27
28#include "rbbirb.h"
29#include "rbbinode.h"
30
31#include "rbbiscan.h"
32#include "rbbisetb.h"
33#include "rbbitblb.h"
34#include "rbbidata.h"
35
36
37U_NAMESPACE_BEGIN
38
39
40//----------------------------------------------------------------------------------------
41//
42// Constructor.
43//
44//----------------------------------------------------------------------------------------
45RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules,
46f4442e 46 UParseError *parseErr,
b75a7d8f
A
47 UErrorCode &status)
48 : fRules(rules)
49{
374ca955 50 fStatus = &status; // status is checked below
46f4442e 51 fParseError = parseErr;
b75a7d8f
A
52 fDebugEnv = NULL;
53#ifdef RBBI_DEBUG
54 fDebugEnv = getenv("U_RBBIDEBUG");
55#endif
56
57
58 fForwardTree = NULL;
59 fReverseTree = NULL;
374ca955
A
60 fSafeFwdTree = NULL;
61 fSafeRevTree = NULL;
62 fDefaultTree = &fForwardTree;
b75a7d8f
A
63 fForwardTables = NULL;
64 fReverseTables = NULL;
374ca955
A
65 fSafeFwdTables = NULL;
66 fSafeRevTables = NULL;
67 fRuleStatusVals = NULL;
68 fChainRules = FALSE;
69 fLBCMNoChain = FALSE;
70 fLookAheadHardBreak = FALSE;
b331163b 71 fRINoChain = FALSE;
374ca955
A
72 fUSetNodes = NULL;
73 fRuleStatusVals = NULL;
74 fScanner = NULL;
75 fSetBuilder = NULL;
46f4442e
A
76 if (parseErr) {
77 uprv_memset(parseErr, 0, sizeof(UParseError));
78 }
374ca955
A
79
80 if (U_FAILURE(status)) {
81 return;
82 }
83
84 fUSetNodes = new UVector(status); // bcos status gets overwritten here
85 fRuleStatusVals = new UVector(status);
b75a7d8f
A
86 fScanner = new RBBIRuleScanner(this);
87 fSetBuilder = new RBBISetBuilder(this);
374ca955
A
88 if (U_FAILURE(status)) {
89 return;
90 }
91 if(fSetBuilder == 0 || fScanner == 0 || fUSetNodes == 0 || fRuleStatusVals == 0) {
b75a7d8f
A
92 status = U_MEMORY_ALLOCATION_ERROR;
93 }
94}
95
96
97
98//----------------------------------------------------------------------------------------
99//
100// Destructor
101//
102//----------------------------------------------------------------------------------------
103RBBIRuleBuilder::~RBBIRuleBuilder() {
104
105 int i;
106 for (i=0; ; i++) {
107 RBBINode *n = (RBBINode *)fUSetNodes->elementAt(i);
108 if (n==NULL) {
109 break;
110 }
111 delete n;
112 }
113
114 delete fUSetNodes;
115 delete fSetBuilder;
116 delete fForwardTables;
117 delete fReverseTables;
374ca955
A
118 delete fSafeFwdTables;
119 delete fSafeRevTables;
120
b75a7d8f
A
121 delete fForwardTree;
122 delete fReverseTree;
374ca955
A
123 delete fSafeFwdTree;
124 delete fSafeRevTree;
b75a7d8f 125 delete fScanner;
374ca955 126 delete fRuleStatusVals;
b75a7d8f
A
127}
128
129
130
131
132
133//----------------------------------------------------------------------------------------
134//
135// flattenData() - Collect up the compiled RBBI rule data and put it into
136// the format for saving in ICU data files,
137// which is also the format needed by the RBBI runtime engine.
138//
139//----------------------------------------------------------------------------------------
140static int32_t align8(int32_t i) {return (i+7) & 0xfffffff8;}
141
142RBBIDataHeader *RBBIRuleBuilder::flattenData() {
374ca955
A
143 int32_t i;
144
b75a7d8f
A
145 if (U_FAILURE(*fStatus)) {
146 return NULL;
147 }
148
149 // Remove comments and whitespace from the rules to make it smaller.
73c04bcf 150 UnicodeString strippedRules((const UnicodeString&)RBBIRuleScanner::stripRules(fRules));
b75a7d8f
A
151
152 // Calculate the size of each section in the data.
153 // Sizes here are padded up to a multiple of 8 for better memory alignment.
154 // Sections sizes actually stored in the header are for the actual data
155 // without the padding.
156 //
157 int32_t headerSize = align8(sizeof(RBBIDataHeader));
158 int32_t forwardTableSize = align8(fForwardTables->getTableSize());
159 int32_t reverseTableSize = align8(fReverseTables->getTableSize());
374ca955
A
160 int32_t safeFwdTableSize = align8(fSafeFwdTables->getTableSize());
161 int32_t safeRevTableSize = align8(fSafeRevTables->getTableSize());
b75a7d8f 162 int32_t trieSize = align8(fSetBuilder->getTrieSize());
374ca955 163 int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t));
b75a7d8f
A
164 int32_t rulesSize = align8((strippedRules.length()+1) * sizeof(UChar));
165
166 int32_t totalSize = headerSize + forwardTableSize + reverseTableSize
374ca955
A
167 + safeFwdTableSize + safeRevTableSize
168 + statusTableSize + trieSize + rulesSize;
169
b75a7d8f
A
170 RBBIDataHeader *data = (RBBIDataHeader *)uprv_malloc(totalSize);
171 if (data == NULL) {
172 *fStatus = U_MEMORY_ALLOCATION_ERROR;
173 return NULL;
174 }
175 uprv_memset(data, 0, totalSize);
176
177
73c04bcf
A
178 data->fMagic = 0xb1a0;
179 data->fFormatVersion[0] = 3;
180 data->fFormatVersion[1] = 1;
181 data->fFormatVersion[2] = 0;
182 data->fFormatVersion[3] = 0;
183 data->fLength = totalSize;
184 data->fCatCount = fSetBuilder->getNumCharCategories();
b75a7d8f
A
185
186 data->fFTable = headerSize;
187 data->fFTableLen = forwardTableSize;
374ca955 188 data->fRTable = data->fFTable + forwardTableSize;
b75a7d8f 189 data->fRTableLen = reverseTableSize;
374ca955
A
190 data->fSFTable = data->fRTable + reverseTableSize;
191 data->fSFTableLen = safeFwdTableSize;
192 data->fSRTable = data->fSFTable + safeFwdTableSize;
193 data->fSRTableLen = safeRevTableSize;
194
195 data->fTrie = data->fSRTable + safeRevTableSize;
b75a7d8f 196 data->fTrieLen = fSetBuilder->getTrieSize();
374ca955
A
197 data->fStatusTable = data->fTrie + trieSize;
198 data->fStatusTableLen= statusTableSize;
199 data->fRuleSource = data->fStatusTable + statusTableSize;
b75a7d8f
A
200 data->fRuleSourceLen = strippedRules.length() * sizeof(UChar);
201
202 uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
203
204 fForwardTables->exportTable((uint8_t *)data + data->fFTable);
205 fReverseTables->exportTable((uint8_t *)data + data->fRTable);
374ca955
A
206 fSafeFwdTables->exportTable((uint8_t *)data + data->fSFTable);
207 fSafeRevTables->exportTable((uint8_t *)data + data->fSRTable);
b75a7d8f 208 fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie);
374ca955
A
209
210 int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable);
211 for (i=0; i<fRuleStatusVals->size(); i++) {
212 ruleStatusTable[i] = fRuleStatusVals->elementAti(i);
213 }
214
b75a7d8f
A
215 strippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus);
216
217 return data;
218}
219
220
221
222
223
224
225//----------------------------------------------------------------------------------------
226//
227// createRuleBasedBreakIterator construct from source rules that are passed in
228// in a UnicodeString
229//
230//----------------------------------------------------------------------------------------
231BreakIterator *
232RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
46f4442e 233 UParseError *parseError,
b75a7d8f
A
234 UErrorCode &status)
235{
374ca955 236 // status checked below
b75a7d8f
A
237
238 //
239 // Read the input rules, generate a parse tree, symbol table,
240 // and list of all Unicode Sets referenced by the rules.
241 //
242 RBBIRuleBuilder builder(rules, parseError, status);
374ca955 243 if (U_FAILURE(status)) { // status checked here bcos build below doesn't
b75a7d8f
A
244 return NULL;
245 }
46f4442e 246 builder.fScanner->parse();
b75a7d8f
A
247
248 //
249 // UnicodeSet processing.
250 // Munge the Unicode Sets to create a set of character categories.
251 // Generate the mapping tables (TRIE) from input 32-bit characters to
252 // the character categories.
253 //
254 builder.fSetBuilder->build();
255
256
257 //
258 // Generate the DFA state transition table.
259 //
260 builder.fForwardTables = new RBBITableBuilder(&builder, &builder.fForwardTree);
261 builder.fReverseTables = new RBBITableBuilder(&builder, &builder.fReverseTree);
374ca955
A
262 builder.fSafeFwdTables = new RBBITableBuilder(&builder, &builder.fSafeFwdTree);
263 builder.fSafeRevTables = new RBBITableBuilder(&builder, &builder.fSafeRevTree);
4388f060
A
264 if (builder.fForwardTables == NULL || builder.fReverseTables == NULL ||
265 builder.fSafeFwdTables == NULL || builder.fSafeRevTables == NULL)
374ca955 266 {
b75a7d8f 267 status = U_MEMORY_ALLOCATION_ERROR;
4388f060
A
268 delete builder.fForwardTables; builder.fForwardTables = NULL;
269 delete builder.fReverseTables; builder.fReverseTables = NULL;
270 delete builder.fSafeFwdTables; builder.fSafeFwdTables = NULL;
271 delete builder.fSafeRevTables; builder.fSafeRevTables = NULL;
b75a7d8f
A
272 return NULL;
273 }
274
275 builder.fForwardTables->build();
276 builder.fReverseTables->build();
374ca955
A
277 builder.fSafeFwdTables->build();
278 builder.fSafeRevTables->build();
b75a7d8f 279
374ca955
A
280#ifdef RBBI_DEBUG
281 if (builder.fDebugEnv && uprv_strstr(builder.fDebugEnv, "states")) {
282 builder.fForwardTables->printRuleStatusTable();
283 }
284#endif
b75a7d8f
A
285
286 //
287 // Package up the compiled data into a memory image
288 // in the run-time format.
289 //
374ca955 290 RBBIDataHeader *data = builder.flattenData(); // returns NULL if error
46f4442e
A
291 if (U_FAILURE(*builder.fStatus)) {
292 return NULL;
293 }
b75a7d8f
A
294
295
296 //
297 // Clean up the compiler related stuff
298 //
299
300
301 //
302 // Create a break iterator from the compiled rules.
303 // (Identical to creation from stored pre-compiled rules)
304 //
374ca955 305 // status is checked after init in construction.
b75a7d8f 306 RuleBasedBreakIterator *This = new RuleBasedBreakIterator(data, status);
b75a7d8f
A
307 if (U_FAILURE(status)) {
308 delete This;
309 This = NULL;
374ca955
A
310 }
311 else if(This == NULL) { // test for NULL
312 status = U_MEMORY_ALLOCATION_ERROR;
b75a7d8f
A
313 }
314 return This;
315}
316
317U_NAMESPACE_END
318
319#endif /* #if !UCONFIG_NO_BREAK_ITERATION */