]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
b75a7d8f A |
3 | /* |
4 | *************************************************************************** | |
2ca993e8 | 5 | * Copyright (C) 2002-2016 International Business Machines Corporation * |
b75a7d8f A |
6 | * and others. All rights reserved. * |
7 | *************************************************************************** | |
8 | */ | |
9 | ||
10 | // | |
11 | // File: rbbinode.cpp | |
12 | // | |
13 | // Implementation of class RBBINode, which represents a node in the | |
14 | // tree generated when parsing the Rules Based Break Iterator rules. | |
15 | // | |
16 | // This "Class" is actually closer to a struct. | |
17 | // Code using it is expected to directly access fields much of the time. | |
18 | // | |
19 | ||
20 | #include "unicode/utypes.h" | |
21 | ||
22 | #if !UCONFIG_NO_BREAK_ITERATION | |
23 | ||
24 | #include "unicode/unistr.h" | |
25 | #include "unicode/uniset.h" | |
26 | #include "unicode/uchar.h" | |
27 | #include "unicode/parsepos.h" | |
f3c0d7a5 A |
28 | |
29 | #include "cstr.h" | |
b75a7d8f A |
30 | #include "uvector.h" |
31 | ||
32 | #include "rbbirb.h" | |
33 | #include "rbbinode.h" | |
34 | ||
35 | #include "uassert.h" | |
36 | ||
37 | ||
38 | U_NAMESPACE_BEGIN | |
39 | ||
73c04bcf A |
40 | #ifdef RBBI_DEBUG |
41 | static int gLastSerial = 0; | |
42 | #endif | |
b75a7d8f A |
43 | |
44 | ||
45 | //------------------------------------------------------------------------- | |
46 | // | |
47 | // Constructor. Just set the fields to reasonable default values. | |
48 | // | |
49 | //------------------------------------------------------------------------- | |
50 | RBBINode::RBBINode(NodeType t) : UMemory() { | |
73c04bcf | 51 | #ifdef RBBI_DEBUG |
b75a7d8f | 52 | fSerialNum = ++gLastSerial; |
73c04bcf | 53 | #endif |
b75a7d8f A |
54 | fType = t; |
55 | fParent = NULL; | |
56 | fLeftChild = NULL; | |
57 | fRightChild = NULL; | |
58 | fInputSet = NULL; | |
59 | fFirstPos = 0; | |
60 | fLastPos = 0; | |
61 | fNullable = FALSE; | |
62 | fLookAheadEnd = FALSE; | |
2ca993e8 A |
63 | fRuleRoot = FALSE; |
64 | fChainIn = FALSE; | |
b75a7d8f | 65 | fVal = 0; |
374ca955 | 66 | fPrecedence = precZero; |
b75a7d8f A |
67 | |
68 | UErrorCode status = U_ZERO_ERROR; | |
69 | fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere | |
70 | fLastPosSet = new UVector(status); | |
71 | fFollowPos = new UVector(status); | |
72 | if (t==opCat) {fPrecedence = precOpCat;} | |
73 | else if (t==opOr) {fPrecedence = precOpOr;} | |
74 | else if (t==opStart) {fPrecedence = precStart;} | |
75 | else if (t==opLParen) {fPrecedence = precLParen;} | |
76 | ||
77 | } | |
78 | ||
79 | ||
80 | RBBINode::RBBINode(const RBBINode &other) : UMemory(other) { | |
73c04bcf | 81 | #ifdef RBBI_DEBUG |
b75a7d8f | 82 | fSerialNum = ++gLastSerial; |
73c04bcf | 83 | #endif |
b75a7d8f A |
84 | fType = other.fType; |
85 | fParent = NULL; | |
86 | fLeftChild = NULL; | |
87 | fRightChild = NULL; | |
88 | fInputSet = other.fInputSet; | |
89 | fPrecedence = other.fPrecedence; | |
90 | fText = other.fText; | |
91 | fFirstPos = other.fFirstPos; | |
92 | fLastPos = other.fLastPos; | |
93 | fNullable = other.fNullable; | |
94 | fVal = other.fVal; | |
2ca993e8 A |
95 | fRuleRoot = FALSE; |
96 | fChainIn = other.fChainIn; | |
b75a7d8f A |
97 | UErrorCode status = U_ZERO_ERROR; |
98 | fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere | |
99 | fLastPosSet = new UVector(status); | |
100 | fFollowPos = new UVector(status); | |
101 | } | |
102 | ||
103 | ||
104 | //------------------------------------------------------------------------- | |
105 | // | |
106 | // Destructor. Deletes both this node AND any child nodes, | |
107 | // except in the case of variable reference nodes. For | |
108 | // these, the l. child points back to the definition, which | |
109 | // is common for all references to the variable, meaning | |
110 | // it can't be deleted here. | |
111 | // | |
112 | //------------------------------------------------------------------------- | |
113 | RBBINode::~RBBINode() { | |
114 | // printf("deleting node %8x serial %4d\n", this, this->fSerialNum); | |
115 | delete fInputSet; | |
116 | fInputSet = NULL; | |
117 | ||
118 | switch (this->fType) { | |
119 | case varRef: | |
120 | case setRef: | |
121 | // for these node types, multiple instances point to the same "children" | |
122 | // Storage ownership of children handled elsewhere. Don't delete here. | |
123 | break; | |
124 | ||
125 | default: | |
126 | delete fLeftChild; | |
127 | fLeftChild = NULL; | |
128 | delete fRightChild; | |
129 | fRightChild = NULL; | |
130 | } | |
131 | ||
132 | ||
133 | delete fFirstPosSet; | |
134 | delete fLastPosSet; | |
135 | delete fFollowPos; | |
136 | ||
137 | } | |
138 | ||
139 | ||
140 | //------------------------------------------------------------------------- | |
141 | // | |
142 | // cloneTree Make a copy of the subtree rooted at this node. | |
143 | // Discard any variable references encountered along the way, | |
144 | // and replace with copies of the variable's definitions. | |
145 | // Used to replicate the expression underneath variable | |
146 | // references in preparation for generating the DFA tables. | |
147 | // | |
148 | //------------------------------------------------------------------------- | |
149 | RBBINode *RBBINode::cloneTree() { | |
150 | RBBINode *n; | |
151 | ||
152 | if (fType == RBBINode::varRef) { | |
153 | // If the current node is a variable reference, skip over it | |
154 | // and clone the definition of the variable instead. | |
155 | n = fLeftChild->cloneTree(); | |
156 | } else if (fType == RBBINode::uset) { | |
157 | n = this; | |
158 | } else { | |
159 | n = new RBBINode(*this); | |
46f4442e A |
160 | // Check for null pointer. |
161 | if (n != NULL) { | |
162 | if (fLeftChild != NULL) { | |
163 | n->fLeftChild = fLeftChild->cloneTree(); | |
164 | n->fLeftChild->fParent = n; | |
165 | } | |
166 | if (fRightChild != NULL) { | |
167 | n->fRightChild = fRightChild->cloneTree(); | |
168 | n->fRightChild->fParent = n; | |
169 | } | |
b75a7d8f A |
170 | } |
171 | } | |
172 | return n; | |
173 | } | |
174 | ||
175 | ||
176 | ||
177 | //------------------------------------------------------------------------- | |
178 | // | |
179 | // flattenVariables Walk a parse tree, replacing any variable | |
180 | // references with a copy of the variable's definition. | |
181 | // Aside from variables, the tree is not changed. | |
182 | // | |
183 | // Return the root of the tree. If the root was not a variable | |
184 | // reference, it remains unchanged - the root we started with | |
185 | // is the root we return. If, however, the root was a variable | |
186 | // reference, the root of the newly cloned replacement tree will | |
187 | // be returned, and the original tree deleted. | |
188 | // | |
189 | // This function works by recursively walking the tree | |
190 | // without doing anything until a variable reference is | |
191 | // found, then calling cloneTree() at that point. Any | |
192 | // nested references are handled by cloneTree(), not here. | |
193 | // | |
194 | //------------------------------------------------------------------------- | |
195 | RBBINode *RBBINode::flattenVariables() { | |
196 | if (fType == varRef) { | |
f3c0d7a5 A |
197 | RBBINode *retNode = fLeftChild->cloneTree(); |
198 | if (retNode != NULL) { | |
199 | retNode->fRuleRoot = this->fRuleRoot; | |
200 | retNode->fChainIn = this->fChainIn; | |
201 | } | |
202 | delete this; // TODO: undefined behavior. Fix. | |
b75a7d8f A |
203 | return retNode; |
204 | } | |
205 | ||
206 | if (fLeftChild != NULL) { | |
207 | fLeftChild = fLeftChild->flattenVariables(); | |
208 | fLeftChild->fParent = this; | |
209 | } | |
210 | if (fRightChild != NULL) { | |
211 | fRightChild = fRightChild->flattenVariables(); | |
212 | fRightChild->fParent = this; | |
213 | } | |
214 | return this; | |
215 | } | |
216 | ||
217 | ||
218 | //------------------------------------------------------------------------- | |
219 | // | |
220 | // flattenSets Walk the parse tree, replacing any nodes of type setRef | |
221 | // with a copy of the expression tree for the set. A set's | |
222 | // equivalent expression tree is precomputed and saved as | |
223 | // the left child of the uset node. | |
224 | // | |
225 | //------------------------------------------------------------------------- | |
226 | void RBBINode::flattenSets() { | |
227 | U_ASSERT(fType != setRef); | |
228 | ||
229 | if (fLeftChild != NULL) { | |
230 | if (fLeftChild->fType==setRef) { | |
231 | RBBINode *setRefNode = fLeftChild; | |
232 | RBBINode *usetNode = setRefNode->fLeftChild; | |
233 | RBBINode *replTree = usetNode->fLeftChild; | |
234 | fLeftChild = replTree->cloneTree(); | |
235 | fLeftChild->fParent = this; | |
236 | delete setRefNode; | |
237 | } else { | |
238 | fLeftChild->flattenSets(); | |
239 | } | |
240 | } | |
241 | ||
242 | if (fRightChild != NULL) { | |
243 | if (fRightChild->fType==setRef) { | |
244 | RBBINode *setRefNode = fRightChild; | |
245 | RBBINode *usetNode = setRefNode->fLeftChild; | |
246 | RBBINode *replTree = usetNode->fLeftChild; | |
247 | fRightChild = replTree->cloneTree(); | |
248 | fRightChild->fParent = this; | |
249 | delete setRefNode; | |
250 | } else { | |
251 | fRightChild->flattenSets(); | |
252 | } | |
253 | } | |
254 | } | |
255 | ||
256 | ||
257 | ||
258 | //------------------------------------------------------------------------- | |
259 | // | |
260 | // findNodes() Locate all the nodes of the specified type, starting | |
261 | // at the specified root. | |
262 | // | |
263 | //------------------------------------------------------------------------- | |
264 | void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) { | |
265 | /* test for buffer overflows */ | |
266 | if (U_FAILURE(status)) { | |
267 | return; | |
268 | } | |
269 | if (fType == kind) { | |
270 | dest->addElement(this, status); | |
271 | } | |
272 | if (fLeftChild != NULL) { | |
273 | fLeftChild->findNodes(dest, kind, status); | |
274 | } | |
275 | if (fRightChild != NULL) { | |
276 | fRightChild->findNodes(dest, kind, status); | |
277 | } | |
278 | } | |
279 | ||
280 | ||
281 | //------------------------------------------------------------------------- | |
282 | // | |
283 | // print. Print out a single node, for debugging. | |
284 | // | |
285 | //------------------------------------------------------------------------- | |
b75a7d8f | 286 | #ifdef RBBI_DEBUG |
2ca993e8 A |
287 | |
288 | static int32_t serial(const RBBINode *node) { | |
289 | return (node == NULL? -1 : node->fSerialNum); | |
290 | } | |
291 | ||
292 | ||
f3c0d7a5 | 293 | void RBBINode::printNode(const RBBINode *node) { |
b75a7d8f A |
294 | static const char * const nodeTypeNames[] = { |
295 | "setRef", | |
296 | "uset", | |
297 | "varRef", | |
298 | "leafChar", | |
299 | "lookAhead", | |
300 | "tag", | |
301 | "endMark", | |
302 | "opStart", | |
303 | "opCat", | |
304 | "opOr", | |
305 | "opStar", | |
306 | "opPlus", | |
307 | "opQuestion", | |
308 | "opBreak", | |
309 | "opReverse", | |
310 | "opLParen" | |
311 | }; | |
312 | ||
f3c0d7a5 A |
313 | if (node==NULL) { |
314 | RBBIDebugPrintf("%10p", (void *)node); | |
374ca955 | 315 | } else { |
2ca993e8 | 316 | RBBIDebugPrintf("%10p %5d %12s %c%c %5d %5d %5d %6d %d ", |
f3c0d7a5 A |
317 | (void *)node, node->fSerialNum, nodeTypeNames[node->fType], |
318 | node->fRuleRoot?'R':' ', node->fChainIn?'C':' ', | |
319 | serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent), | |
320 | node->fFirstPos, node->fVal); | |
321 | if (node->fType == varRef) { | |
322 | RBBI_DEBUG_printUnicodeString(node->fText); | |
374ca955 | 323 | } |
b75a7d8f A |
324 | } |
325 | RBBIDebugPrintf("\n"); | |
b75a7d8f | 326 | } |
374ca955 | 327 | #endif |
b75a7d8f A |
328 | |
329 | ||
330 | #ifdef RBBI_DEBUG | |
f3c0d7a5 A |
331 | U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) { |
332 | RBBIDebugPrintf("%*s", minWidth, CStr(s)()); | |
b75a7d8f A |
333 | } |
334 | #endif | |
335 | ||
336 | ||
337 | //------------------------------------------------------------------------- | |
338 | // | |
339 | // print. Print out the tree of nodes rooted at "this" | |
340 | // | |
341 | //------------------------------------------------------------------------- | |
342 | #ifdef RBBI_DEBUG | |
2ca993e8 A |
343 | void RBBINode::printNodeHeader() { |
344 | RBBIDebugPrintf(" Address serial type LeftChild RightChild Parent position value\n"); | |
345 | } | |
346 | ||
f3c0d7a5 | 347 | void RBBINode::printTree(const RBBINode *node, UBool printHeading) { |
b75a7d8f | 348 | if (printHeading) { |
2ca993e8 | 349 | printNodeHeader(); |
b75a7d8f | 350 | } |
f3c0d7a5 A |
351 | printNode(node); |
352 | if (node != NULL) { | |
374ca955 A |
353 | // Only dump the definition under a variable reference if asked to. |
354 | // Unconditinally dump children of all other node types. | |
f3c0d7a5 A |
355 | if (node->fType != varRef) { |
356 | if (node->fLeftChild != NULL) { | |
357 | printTree(node->fLeftChild, FALSE); | |
374ca955 A |
358 | } |
359 | ||
f3c0d7a5 A |
360 | if (node->fRightChild != NULL) { |
361 | printTree(node->fRightChild, FALSE); | |
374ca955 | 362 | } |
b75a7d8f A |
363 | } |
364 | } | |
365 | } | |
366 | #endif | |
367 | ||
368 | ||
369 | ||
370 | U_NAMESPACE_END | |
371 | ||
372 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |