]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/rbbirb57.cpp
ICU-64260.0.1.tar.gz
[apple/icu.git] / icuSources / common / rbbirb57.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 //
4 // Copyright (C) 2002-2011, International Business Machines Corporation and others.
5 // All Rights Reserved.
6 //
7
8 #include "unicode/utypes.h"
9
10 #if !UCONFIG_NO_BREAK_ITERATION
11
12 #include "unicode/brkiter.h"
13 #include "unicode/ubrk.h"
14 #include "unicode/unistr.h"
15 #include "unicode/uniset.h"
16 #include "unicode/uchar.h"
17 #include "unicode/uchriter.h"
18 #include "unicode/parsepos.h"
19 #include "unicode/parseerr.h"
20 #include "cmemory.h"
21 #include "cstring.h"
22
23 #include "rbbidata57.h"
24 #include "rbbirb57.h"
25 #include "rbbisetb57.h"
26 #include "rbbi57.h"
27 #include "rbbinode.h"
28 #include "rbbiscan57.h"
29 #include "rbbitblb57.h"
30
31
32 U_NAMESPACE_BEGIN
33
34
35 //----------------------------------------------------------------------------------------
36 //
37 // Constructor.
38 //
39 //----------------------------------------------------------------------------------------
40 RBBIRuleBuilder57::RBBIRuleBuilder57(const UnicodeString &rules,
41 UParseError *parseErr,
42 UErrorCode &status)
43 : fRules(rules)
44 {
45 fStatus = &status; // status is checked below
46 fParseError = parseErr;
47 fDebugEnv = NULL;
48 #ifdef RBBI_DEBUG
49 fDebugEnv = getenv("U_RBBIDEBUG");
50 #endif
51
52
53 fForwardTree = NULL;
54 fReverseTree = NULL;
55 fSafeFwdTree = NULL;
56 fSafeRevTree = NULL;
57 fDefaultTree = &fForwardTree;
58 fForwardTables = NULL;
59 fReverseTables = NULL;
60 fSafeFwdTables = NULL;
61 fSafeRevTables = NULL;
62 fRuleStatusVals = NULL;
63 fChainRules = FALSE;
64 fLBCMNoChain = FALSE;
65 fLookAheadHardBreak = FALSE;
66 fRINoChain = FALSE;
67 fUSetNodes = NULL;
68 fRuleStatusVals = NULL;
69 fScanner = NULL;
70 fSetBuilder = NULL;
71 if (parseErr) {
72 uprv_memset(parseErr, 0, sizeof(UParseError));
73 }
74
75 if (U_FAILURE(status)) {
76 return;
77 }
78
79 fUSetNodes = new UVector(status); // bcos status gets overwritten here
80 fRuleStatusVals = new UVector(status);
81 fScanner = new RBBIRuleScanner57(this);
82 fSetBuilder = new RBBISetBuilder57(this);
83 if (U_FAILURE(status)) {
84 return;
85 }
86 if(fSetBuilder == 0 || fScanner == 0 || fUSetNodes == 0 || fRuleStatusVals == 0) {
87 status = U_MEMORY_ALLOCATION_ERROR;
88 }
89 }
90
91
92
93 //----------------------------------------------------------------------------------------
94 //
95 // Destructor
96 //
97 //----------------------------------------------------------------------------------------
98 RBBIRuleBuilder57::~RBBIRuleBuilder57() {
99
100 int i;
101 for (i=0; ; i++) {
102 RBBINode *n = (RBBINode *)fUSetNodes->elementAt(i);
103 if (n==NULL) {
104 break;
105 }
106 delete n;
107 }
108
109 delete fUSetNodes;
110 delete fSetBuilder;
111 delete fForwardTables;
112 delete fReverseTables;
113 delete fSafeFwdTables;
114 delete fSafeRevTables;
115
116 delete fForwardTree;
117 delete fReverseTree;
118 delete fSafeFwdTree;
119 delete fSafeRevTree;
120 delete fScanner;
121 delete fRuleStatusVals;
122 }
123
124
125
126
127
128 //----------------------------------------------------------------------------------------
129 //
130 // flattenData() - Collect up the compiled RBBI rule data and put it into
131 // the format for saving in ICU data files,
132 // which is also the format needed by the RBBI runtime engine.
133 //
134 //----------------------------------------------------------------------------------------
135 static int32_t align8(int32_t i) {return (i+7) & 0xfffffff8;}
136
137 RBBIDataHeader57 *RBBIRuleBuilder57::flattenData() {
138 int32_t i;
139
140 if (U_FAILURE(*fStatus)) {
141 return NULL;
142 }
143
144 // Remove comments and whitespace from the rules to make it smaller.
145 UnicodeString strippedRules((const UnicodeString&)RBBIRuleScanner57::stripRules(fRules));
146
147 // Calculate the size of each section in the data.
148 // Sizes here are padded up to a multiple of 8 for better memory alignment.
149 // Sections sizes actually stored in the header are for the actual data
150 // without the padding.
151 //
152 int32_t headerSize = align8(sizeof(RBBIDataHeader57));
153 int32_t forwardTableSize = align8(fForwardTables->getTableSize());
154 int32_t reverseTableSize = align8(fReverseTables->getTableSize());
155 int32_t safeFwdTableSize = align8(fSafeFwdTables->getTableSize());
156 int32_t safeRevTableSize = align8(fSafeRevTables->getTableSize());
157 int32_t trieSize = align8(fSetBuilder->getTrieSize());
158 int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t));
159 int32_t rulesSize = align8((strippedRules.length()+1) * sizeof(UChar));
160
161 int32_t totalSize = headerSize + forwardTableSize + reverseTableSize
162 + safeFwdTableSize + safeRevTableSize
163 + statusTableSize + trieSize + rulesSize;
164
165 RBBIDataHeader57 *data = (RBBIDataHeader57 *)uprv_malloc(totalSize);
166 if (data == NULL) {
167 *fStatus = U_MEMORY_ALLOCATION_ERROR;
168 return NULL;
169 }
170 uprv_memset(data, 0, totalSize);
171
172
173 data->fMagic = 0xb1a0;
174 data->fFormatVersion[0] = 3;
175 data->fFormatVersion[1] = 1;
176 data->fFormatVersion[2] = 0;
177 data->fFormatVersion[3] = 0;
178 data->fLength = totalSize;
179 data->fCatCount = fSetBuilder->getNumCharCategories();
180
181 data->fFTable = headerSize;
182 data->fFTableLen = forwardTableSize;
183 data->fRTable = data->fFTable + forwardTableSize;
184 data->fRTableLen = reverseTableSize;
185 data->fSFTable = data->fRTable + reverseTableSize;
186 data->fSFTableLen = safeFwdTableSize;
187 data->fSRTable = data->fSFTable + safeFwdTableSize;
188 data->fSRTableLen = safeRevTableSize;
189
190 data->fTrie = data->fSRTable + safeRevTableSize;
191 data->fTrieLen = fSetBuilder->getTrieSize();
192 data->fStatusTable = data->fTrie + trieSize;
193 data->fStatusTableLen= statusTableSize;
194 data->fRuleSource = data->fStatusTable + statusTableSize;
195 data->fRuleSourceLen = strippedRules.length() * sizeof(UChar);
196
197 uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
198
199 fForwardTables->exportTable((uint8_t *)data + data->fFTable);
200 fReverseTables->exportTable((uint8_t *)data + data->fRTable);
201 fSafeFwdTables->exportTable((uint8_t *)data + data->fSFTable);
202 fSafeRevTables->exportTable((uint8_t *)data + data->fSRTable);
203 fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie);
204
205 int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable);
206 for (i=0; i<fRuleStatusVals->size(); i++) {
207 ruleStatusTable[i] = fRuleStatusVals->elementAti(i);
208 }
209
210 strippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus);
211
212 return data;
213 }
214
215
216
217
218
219
220 //----------------------------------------------------------------------------------------
221 //
222 // createRuleBasedBreakIterator construct from source rules that are passed in
223 // in a UnicodeString
224 //
225 //----------------------------------------------------------------------------------------
226 BreakIterator *
227 RBBIRuleBuilder57::createRuleBasedBreakIterator( const UnicodeString &rules,
228 UParseError *parseError,
229 UErrorCode &status)
230 {
231 // status checked below
232
233 //
234 // Read the input rules, generate a parse tree, symbol table,
235 // and list of all Unicode Sets referenced by the rules.
236 //
237 RBBIRuleBuilder57 builder(rules, parseError, status);
238 if (U_FAILURE(status)) { // status checked here bcos build below doesn't
239 return NULL;
240 }
241 builder.fScanner->parse();
242
243 //
244 // UnicodeSet processing.
245 // Munge the Unicode Sets to create a set of character categories.
246 // Generate the mapping tables (TRIE) from input 32-bit characters to
247 // the character categories.
248 //
249 builder.fSetBuilder->build();
250
251
252 //
253 // Generate the DFA state transition table.
254 //
255 builder.fForwardTables = new RBBITableBuilder57(&builder, &builder.fForwardTree);
256 builder.fReverseTables = new RBBITableBuilder57(&builder, &builder.fReverseTree);
257 builder.fSafeFwdTables = new RBBITableBuilder57(&builder, &builder.fSafeFwdTree);
258 builder.fSafeRevTables = new RBBITableBuilder57(&builder, &builder.fSafeRevTree);
259 if (builder.fForwardTables == NULL || builder.fReverseTables == NULL ||
260 builder.fSafeFwdTables == NULL || builder.fSafeRevTables == NULL)
261 {
262 status = U_MEMORY_ALLOCATION_ERROR;
263 delete builder.fForwardTables; builder.fForwardTables = NULL;
264 delete builder.fReverseTables; builder.fReverseTables = NULL;
265 delete builder.fSafeFwdTables; builder.fSafeFwdTables = NULL;
266 delete builder.fSafeRevTables; builder.fSafeRevTables = NULL;
267 return NULL;
268 }
269
270 builder.fForwardTables->build();
271 builder.fReverseTables->build();
272 builder.fSafeFwdTables->build();
273 builder.fSafeRevTables->build();
274
275 #ifdef RBBI_DEBUG
276 if (builder.fDebugEnv && uprv_strstr(builder.fDebugEnv, "states")) {
277 builder.fForwardTables->printRuleStatusTable();
278 }
279 #endif
280
281 //
282 // Package up the compiled data into a memory image
283 // in the run-time format.
284 //
285 RBBIDataHeader57 *data = builder.flattenData(); // returns NULL if error
286 if (U_FAILURE(*builder.fStatus)) {
287 return NULL;
288 }
289
290
291 //
292 // Clean up the compiler related stuff
293 //
294
295
296 //
297 // Create a break iterator from the compiled rules.
298 // (Identical to creation from stored pre-compiled rules)
299 //
300 // status is checked after init in construction.
301 RuleBasedBreakIterator57 *This = new RuleBasedBreakIterator57(data, status);
302 if (U_FAILURE(status)) {
303 delete This;
304 This = NULL;
305 }
306 else if(This == NULL) { // test for NULL
307 status = U_MEMORY_ALLOCATION_ERROR;
308 }
309 return This;
310 }
311
312 U_NAMESPACE_END
313
314 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */