]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | *************************************************************************** | |
2ca993e8 | 3 | * Copyright (C) 2002-2016 International Business Machines Corporation * |
b75a7d8f A |
4 | * and others. All rights reserved. * |
5 | *************************************************************************** | |
6 | */ | |
7 | ||
8 | // | |
9 | // File: rbbinode.cpp | |
10 | // | |
11 | // Implementation of class RBBINode, which represents a node in the | |
12 | // tree generated when parsing the Rules Based Break Iterator rules. | |
13 | // | |
14 | // This "Class" is actually closer to a struct. | |
15 | // Code using it is expected to directly access fields much of the time. | |
16 | // | |
17 | ||
18 | #include "unicode/utypes.h" | |
19 | ||
20 | #if !UCONFIG_NO_BREAK_ITERATION | |
21 | ||
22 | #include "unicode/unistr.h" | |
23 | #include "unicode/uniset.h" | |
24 | #include "unicode/uchar.h" | |
25 | #include "unicode/parsepos.h" | |
26 | #include "uvector.h" | |
27 | ||
28 | #include "rbbirb.h" | |
29 | #include "rbbinode.h" | |
30 | ||
31 | #include "uassert.h" | |
32 | ||
33 | ||
34 | U_NAMESPACE_BEGIN | |
35 | ||
73c04bcf A |
36 | #ifdef RBBI_DEBUG |
37 | static int gLastSerial = 0; | |
38 | #endif | |
b75a7d8f A |
39 | |
40 | ||
41 | //------------------------------------------------------------------------- | |
42 | // | |
43 | // Constructor. Just set the fields to reasonable default values. | |
44 | // | |
45 | //------------------------------------------------------------------------- | |
46 | RBBINode::RBBINode(NodeType t) : UMemory() { | |
73c04bcf | 47 | #ifdef RBBI_DEBUG |
b75a7d8f | 48 | fSerialNum = ++gLastSerial; |
73c04bcf | 49 | #endif |
b75a7d8f A |
50 | fType = t; |
51 | fParent = NULL; | |
52 | fLeftChild = NULL; | |
53 | fRightChild = NULL; | |
54 | fInputSet = NULL; | |
55 | fFirstPos = 0; | |
56 | fLastPos = 0; | |
57 | fNullable = FALSE; | |
58 | fLookAheadEnd = FALSE; | |
2ca993e8 A |
59 | fRuleRoot = FALSE; |
60 | fChainIn = FALSE; | |
b75a7d8f | 61 | fVal = 0; |
374ca955 | 62 | fPrecedence = precZero; |
b75a7d8f A |
63 | |
64 | UErrorCode status = U_ZERO_ERROR; | |
65 | fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere | |
66 | fLastPosSet = new UVector(status); | |
67 | fFollowPos = new UVector(status); | |
68 | if (t==opCat) {fPrecedence = precOpCat;} | |
69 | else if (t==opOr) {fPrecedence = precOpOr;} | |
70 | else if (t==opStart) {fPrecedence = precStart;} | |
71 | else if (t==opLParen) {fPrecedence = precLParen;} | |
72 | ||
73 | } | |
74 | ||
75 | ||
76 | RBBINode::RBBINode(const RBBINode &other) : UMemory(other) { | |
73c04bcf | 77 | #ifdef RBBI_DEBUG |
b75a7d8f | 78 | fSerialNum = ++gLastSerial; |
73c04bcf | 79 | #endif |
b75a7d8f A |
80 | fType = other.fType; |
81 | fParent = NULL; | |
82 | fLeftChild = NULL; | |
83 | fRightChild = NULL; | |
84 | fInputSet = other.fInputSet; | |
85 | fPrecedence = other.fPrecedence; | |
86 | fText = other.fText; | |
87 | fFirstPos = other.fFirstPos; | |
88 | fLastPos = other.fLastPos; | |
89 | fNullable = other.fNullable; | |
90 | fVal = other.fVal; | |
2ca993e8 A |
91 | fRuleRoot = FALSE; |
92 | fChainIn = other.fChainIn; | |
b75a7d8f A |
93 | UErrorCode status = U_ZERO_ERROR; |
94 | fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere | |
95 | fLastPosSet = new UVector(status); | |
96 | fFollowPos = new UVector(status); | |
97 | } | |
98 | ||
99 | ||
100 | //------------------------------------------------------------------------- | |
101 | // | |
102 | // Destructor. Deletes both this node AND any child nodes, | |
103 | // except in the case of variable reference nodes. For | |
104 | // these, the l. child points back to the definition, which | |
105 | // is common for all references to the variable, meaning | |
106 | // it can't be deleted here. | |
107 | // | |
108 | //------------------------------------------------------------------------- | |
109 | RBBINode::~RBBINode() { | |
110 | // printf("deleting node %8x serial %4d\n", this, this->fSerialNum); | |
111 | delete fInputSet; | |
112 | fInputSet = NULL; | |
113 | ||
114 | switch (this->fType) { | |
115 | case varRef: | |
116 | case setRef: | |
117 | // for these node types, multiple instances point to the same "children" | |
118 | // Storage ownership of children handled elsewhere. Don't delete here. | |
119 | break; | |
120 | ||
121 | default: | |
122 | delete fLeftChild; | |
123 | fLeftChild = NULL; | |
124 | delete fRightChild; | |
125 | fRightChild = NULL; | |
126 | } | |
127 | ||
128 | ||
129 | delete fFirstPosSet; | |
130 | delete fLastPosSet; | |
131 | delete fFollowPos; | |
132 | ||
133 | } | |
134 | ||
135 | ||
136 | //------------------------------------------------------------------------- | |
137 | // | |
138 | // cloneTree Make a copy of the subtree rooted at this node. | |
139 | // Discard any variable references encountered along the way, | |
140 | // and replace with copies of the variable's definitions. | |
141 | // Used to replicate the expression underneath variable | |
142 | // references in preparation for generating the DFA tables. | |
143 | // | |
144 | //------------------------------------------------------------------------- | |
145 | RBBINode *RBBINode::cloneTree() { | |
146 | RBBINode *n; | |
147 | ||
148 | if (fType == RBBINode::varRef) { | |
149 | // If the current node is a variable reference, skip over it | |
150 | // and clone the definition of the variable instead. | |
151 | n = fLeftChild->cloneTree(); | |
152 | } else if (fType == RBBINode::uset) { | |
153 | n = this; | |
154 | } else { | |
155 | n = new RBBINode(*this); | |
46f4442e A |
156 | // Check for null pointer. |
157 | if (n != NULL) { | |
158 | if (fLeftChild != NULL) { | |
159 | n->fLeftChild = fLeftChild->cloneTree(); | |
160 | n->fLeftChild->fParent = n; | |
161 | } | |
162 | if (fRightChild != NULL) { | |
163 | n->fRightChild = fRightChild->cloneTree(); | |
164 | n->fRightChild->fParent = n; | |
165 | } | |
b75a7d8f A |
166 | } |
167 | } | |
2ca993e8 A |
168 | n->fRuleRoot = this->fRuleRoot; |
169 | n->fChainIn = this->fChainIn; | |
b75a7d8f A |
170 | return n; |
171 | } | |
172 | ||
173 | ||
174 | ||
175 | //------------------------------------------------------------------------- | |
176 | // | |
177 | // flattenVariables Walk a parse tree, replacing any variable | |
178 | // references with a copy of the variable's definition. | |
179 | // Aside from variables, the tree is not changed. | |
180 | // | |
181 | // Return the root of the tree. If the root was not a variable | |
182 | // reference, it remains unchanged - the root we started with | |
183 | // is the root we return. If, however, the root was a variable | |
184 | // reference, the root of the newly cloned replacement tree will | |
185 | // be returned, and the original tree deleted. | |
186 | // | |
187 | // This function works by recursively walking the tree | |
188 | // without doing anything until a variable reference is | |
189 | // found, then calling cloneTree() at that point. Any | |
190 | // nested references are handled by cloneTree(), not here. | |
191 | // | |
192 | //------------------------------------------------------------------------- | |
193 | RBBINode *RBBINode::flattenVariables() { | |
194 | if (fType == varRef) { | |
195 | RBBINode *retNode = fLeftChild->cloneTree(); | |
196 | delete this; | |
197 | return retNode; | |
198 | } | |
199 | ||
200 | if (fLeftChild != NULL) { | |
201 | fLeftChild = fLeftChild->flattenVariables(); | |
202 | fLeftChild->fParent = this; | |
203 | } | |
204 | if (fRightChild != NULL) { | |
205 | fRightChild = fRightChild->flattenVariables(); | |
206 | fRightChild->fParent = this; | |
207 | } | |
208 | return this; | |
209 | } | |
210 | ||
211 | ||
212 | //------------------------------------------------------------------------- | |
213 | // | |
214 | // flattenSets Walk the parse tree, replacing any nodes of type setRef | |
215 | // with a copy of the expression tree for the set. A set's | |
216 | // equivalent expression tree is precomputed and saved as | |
217 | // the left child of the uset node. | |
218 | // | |
219 | //------------------------------------------------------------------------- | |
220 | void RBBINode::flattenSets() { | |
221 | U_ASSERT(fType != setRef); | |
222 | ||
223 | if (fLeftChild != NULL) { | |
224 | if (fLeftChild->fType==setRef) { | |
225 | RBBINode *setRefNode = fLeftChild; | |
226 | RBBINode *usetNode = setRefNode->fLeftChild; | |
227 | RBBINode *replTree = usetNode->fLeftChild; | |
228 | fLeftChild = replTree->cloneTree(); | |
229 | fLeftChild->fParent = this; | |
230 | delete setRefNode; | |
231 | } else { | |
232 | fLeftChild->flattenSets(); | |
233 | } | |
234 | } | |
235 | ||
236 | if (fRightChild != NULL) { | |
237 | if (fRightChild->fType==setRef) { | |
238 | RBBINode *setRefNode = fRightChild; | |
239 | RBBINode *usetNode = setRefNode->fLeftChild; | |
240 | RBBINode *replTree = usetNode->fLeftChild; | |
241 | fRightChild = replTree->cloneTree(); | |
242 | fRightChild->fParent = this; | |
243 | delete setRefNode; | |
244 | } else { | |
245 | fRightChild->flattenSets(); | |
246 | } | |
247 | } | |
248 | } | |
249 | ||
250 | ||
251 | ||
252 | //------------------------------------------------------------------------- | |
253 | // | |
254 | // findNodes() Locate all the nodes of the specified type, starting | |
255 | // at the specified root. | |
256 | // | |
257 | //------------------------------------------------------------------------- | |
258 | void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) { | |
259 | /* test for buffer overflows */ | |
260 | if (U_FAILURE(status)) { | |
261 | return; | |
262 | } | |
263 | if (fType == kind) { | |
264 | dest->addElement(this, status); | |
265 | } | |
266 | if (fLeftChild != NULL) { | |
267 | fLeftChild->findNodes(dest, kind, status); | |
268 | } | |
269 | if (fRightChild != NULL) { | |
270 | fRightChild->findNodes(dest, kind, status); | |
271 | } | |
272 | } | |
273 | ||
274 | ||
275 | //------------------------------------------------------------------------- | |
276 | // | |
277 | // print. Print out a single node, for debugging. | |
278 | // | |
279 | //------------------------------------------------------------------------- | |
b75a7d8f | 280 | #ifdef RBBI_DEBUG |
2ca993e8 A |
281 | |
282 | static int32_t serial(const RBBINode *node) { | |
283 | return (node == NULL? -1 : node->fSerialNum); | |
284 | } | |
285 | ||
286 | ||
374ca955 | 287 | void RBBINode::printNode() { |
b75a7d8f A |
288 | static const char * const nodeTypeNames[] = { |
289 | "setRef", | |
290 | "uset", | |
291 | "varRef", | |
292 | "leafChar", | |
293 | "lookAhead", | |
294 | "tag", | |
295 | "endMark", | |
296 | "opStart", | |
297 | "opCat", | |
298 | "opOr", | |
299 | "opStar", | |
300 | "opPlus", | |
301 | "opQuestion", | |
302 | "opBreak", | |
303 | "opReverse", | |
304 | "opLParen" | |
305 | }; | |
306 | ||
374ca955 A |
307 | if (this==NULL) { |
308 | RBBIDebugPrintf("%10p", (void *)this); | |
309 | } else { | |
2ca993e8 A |
310 | RBBIDebugPrintf("%10p %5d %12s %c%c %5d %5d %5d %6d %d ", |
311 | (void *)this, fSerialNum, nodeTypeNames[fType], fRuleRoot?'R':' ', fChainIn?'C':' ', | |
312 | serial(fLeftChild), serial(fRightChild), serial(fParent), | |
313 | fFirstPos, fVal); | |
374ca955 A |
314 | if (fType == varRef) { |
315 | RBBI_DEBUG_printUnicodeString(fText); | |
316 | } | |
b75a7d8f A |
317 | } |
318 | RBBIDebugPrintf("\n"); | |
b75a7d8f | 319 | } |
374ca955 | 320 | #endif |
b75a7d8f A |
321 | |
322 | ||
323 | #ifdef RBBI_DEBUG | |
374ca955 | 324 | U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) |
b75a7d8f A |
325 | { |
326 | int i; | |
327 | for (i=0; i<s.length(); i++) { | |
328 | RBBIDebugPrintf("%c", s.charAt(i)); | |
329 | // putc(s.charAt(i), stdout); | |
330 | } | |
331 | for (i=s.length(); i<minWidth; i++) { | |
332 | RBBIDebugPrintf(" "); | |
333 | } | |
334 | } | |
335 | #endif | |
336 | ||
337 | ||
338 | //------------------------------------------------------------------------- | |
339 | // | |
340 | // print. Print out the tree of nodes rooted at "this" | |
341 | // | |
342 | //------------------------------------------------------------------------- | |
343 | #ifdef RBBI_DEBUG | |
2ca993e8 A |
344 | void RBBINode::printNodeHeader() { |
345 | RBBIDebugPrintf(" Address serial type LeftChild RightChild Parent position value\n"); | |
346 | } | |
347 | ||
374ca955 | 348 | void RBBINode::printTree(UBool printHeading) { |
b75a7d8f | 349 | if (printHeading) { |
2ca993e8 | 350 | printNodeHeader(); |
b75a7d8f | 351 | } |
374ca955 A |
352 | this->printNode(); |
353 | if (this != NULL) { | |
354 | // Only dump the definition under a variable reference if asked to. | |
355 | // Unconditinally dump children of all other node types. | |
356 | if (fType != varRef) { | |
357 | if (fLeftChild != NULL) { | |
358 | fLeftChild->printTree(FALSE); | |
359 | } | |
360 | ||
361 | if (fRightChild != NULL) { | |
362 | fRightChild->printTree(FALSE); | |
363 | } | |
b75a7d8f A |
364 | } |
365 | } | |
366 | } | |
367 | #endif | |
368 | ||
369 | ||
370 | ||
371 | U_NAMESPACE_END | |
372 | ||
373 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |