]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/rbbinode.cpp
ICU-59180.0.1.tar.gz
[apple/icu.git] / icuSources / common / rbbinode.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4***************************************************************************
2ca993e8 5* Copyright (C) 2002-2016 International Business Machines Corporation *
b75a7d8f
A
6* and others. All rights reserved. *
7***************************************************************************
8*/
9
10//
11// File: rbbinode.cpp
12//
13// Implementation of class RBBINode, which represents a node in the
14// tree generated when parsing the Rules Based Break Iterator rules.
15//
16// This "Class" is actually closer to a struct.
17// Code using it is expected to directly access fields much of the time.
18//
19
20#include "unicode/utypes.h"
21
22#if !UCONFIG_NO_BREAK_ITERATION
23
24#include "unicode/unistr.h"
25#include "unicode/uniset.h"
26#include "unicode/uchar.h"
27#include "unicode/parsepos.h"
f3c0d7a5
A
28
29#include "cstr.h"
b75a7d8f
A
30#include "uvector.h"
31
32#include "rbbirb.h"
33#include "rbbinode.h"
34
35#include "uassert.h"
36
37
38U_NAMESPACE_BEGIN
39
73c04bcf
A
40#ifdef RBBI_DEBUG
41static int gLastSerial = 0;
42#endif
b75a7d8f
A
43
44
45//-------------------------------------------------------------------------
46//
47// Constructor. Just set the fields to reasonable default values.
48//
49//-------------------------------------------------------------------------
50RBBINode::RBBINode(NodeType t) : UMemory() {
73c04bcf 51#ifdef RBBI_DEBUG
b75a7d8f 52 fSerialNum = ++gLastSerial;
73c04bcf 53#endif
b75a7d8f
A
54 fType = t;
55 fParent = NULL;
56 fLeftChild = NULL;
57 fRightChild = NULL;
58 fInputSet = NULL;
59 fFirstPos = 0;
60 fLastPos = 0;
61 fNullable = FALSE;
62 fLookAheadEnd = FALSE;
2ca993e8
A
63 fRuleRoot = FALSE;
64 fChainIn = FALSE;
b75a7d8f 65 fVal = 0;
374ca955 66 fPrecedence = precZero;
b75a7d8f
A
67
68 UErrorCode status = U_ZERO_ERROR;
69 fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
70 fLastPosSet = new UVector(status);
71 fFollowPos = new UVector(status);
72 if (t==opCat) {fPrecedence = precOpCat;}
73 else if (t==opOr) {fPrecedence = precOpOr;}
74 else if (t==opStart) {fPrecedence = precStart;}
75 else if (t==opLParen) {fPrecedence = precLParen;}
76
77}
78
79
80RBBINode::RBBINode(const RBBINode &other) : UMemory(other) {
73c04bcf 81#ifdef RBBI_DEBUG
b75a7d8f 82 fSerialNum = ++gLastSerial;
73c04bcf 83#endif
b75a7d8f
A
84 fType = other.fType;
85 fParent = NULL;
86 fLeftChild = NULL;
87 fRightChild = NULL;
88 fInputSet = other.fInputSet;
89 fPrecedence = other.fPrecedence;
90 fText = other.fText;
91 fFirstPos = other.fFirstPos;
92 fLastPos = other.fLastPos;
93 fNullable = other.fNullable;
94 fVal = other.fVal;
2ca993e8
A
95 fRuleRoot = FALSE;
96 fChainIn = other.fChainIn;
b75a7d8f
A
97 UErrorCode status = U_ZERO_ERROR;
98 fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
99 fLastPosSet = new UVector(status);
100 fFollowPos = new UVector(status);
101}
102
103
104//-------------------------------------------------------------------------
105//
106// Destructor. Deletes both this node AND any child nodes,
107// except in the case of variable reference nodes. For
108// these, the l. child points back to the definition, which
109// is common for all references to the variable, meaning
110// it can't be deleted here.
111//
112//-------------------------------------------------------------------------
113RBBINode::~RBBINode() {
114 // printf("deleting node %8x serial %4d\n", this, this->fSerialNum);
115 delete fInputSet;
116 fInputSet = NULL;
117
118 switch (this->fType) {
119 case varRef:
120 case setRef:
121 // for these node types, multiple instances point to the same "children"
122 // Storage ownership of children handled elsewhere. Don't delete here.
123 break;
124
125 default:
126 delete fLeftChild;
127 fLeftChild = NULL;
128 delete fRightChild;
129 fRightChild = NULL;
130 }
131
132
133 delete fFirstPosSet;
134 delete fLastPosSet;
135 delete fFollowPos;
136
137}
138
139
140//-------------------------------------------------------------------------
141//
142// cloneTree Make a copy of the subtree rooted at this node.
143// Discard any variable references encountered along the way,
144// and replace with copies of the variable's definitions.
145// Used to replicate the expression underneath variable
146// references in preparation for generating the DFA tables.
147//
148//-------------------------------------------------------------------------
149RBBINode *RBBINode::cloneTree() {
150 RBBINode *n;
151
152 if (fType == RBBINode::varRef) {
153 // If the current node is a variable reference, skip over it
154 // and clone the definition of the variable instead.
155 n = fLeftChild->cloneTree();
156 } else if (fType == RBBINode::uset) {
157 n = this;
158 } else {
159 n = new RBBINode(*this);
46f4442e
A
160 // Check for null pointer.
161 if (n != NULL) {
162 if (fLeftChild != NULL) {
163 n->fLeftChild = fLeftChild->cloneTree();
164 n->fLeftChild->fParent = n;
165 }
166 if (fRightChild != NULL) {
167 n->fRightChild = fRightChild->cloneTree();
168 n->fRightChild->fParent = n;
169 }
b75a7d8f
A
170 }
171 }
172 return n;
173}
174
175
176
177//-------------------------------------------------------------------------
178//
179// flattenVariables Walk a parse tree, replacing any variable
180// references with a copy of the variable's definition.
181// Aside from variables, the tree is not changed.
182//
183// Return the root of the tree. If the root was not a variable
184// reference, it remains unchanged - the root we started with
185// is the root we return. If, however, the root was a variable
186// reference, the root of the newly cloned replacement tree will
187// be returned, and the original tree deleted.
188//
189// This function works by recursively walking the tree
190// without doing anything until a variable reference is
191// found, then calling cloneTree() at that point. Any
192// nested references are handled by cloneTree(), not here.
193//
194//-------------------------------------------------------------------------
195RBBINode *RBBINode::flattenVariables() {
196 if (fType == varRef) {
f3c0d7a5
A
197 RBBINode *retNode = fLeftChild->cloneTree();
198 if (retNode != NULL) {
199 retNode->fRuleRoot = this->fRuleRoot;
200 retNode->fChainIn = this->fChainIn;
201 }
202 delete this; // TODO: undefined behavior. Fix.
b75a7d8f
A
203 return retNode;
204 }
205
206 if (fLeftChild != NULL) {
207 fLeftChild = fLeftChild->flattenVariables();
208 fLeftChild->fParent = this;
209 }
210 if (fRightChild != NULL) {
211 fRightChild = fRightChild->flattenVariables();
212 fRightChild->fParent = this;
213 }
214 return this;
215}
216
217
218//-------------------------------------------------------------------------
219//
220// flattenSets Walk the parse tree, replacing any nodes of type setRef
221// with a copy of the expression tree for the set. A set's
222// equivalent expression tree is precomputed and saved as
223// the left child of the uset node.
224//
225//-------------------------------------------------------------------------
226void RBBINode::flattenSets() {
227 U_ASSERT(fType != setRef);
228
229 if (fLeftChild != NULL) {
230 if (fLeftChild->fType==setRef) {
231 RBBINode *setRefNode = fLeftChild;
232 RBBINode *usetNode = setRefNode->fLeftChild;
233 RBBINode *replTree = usetNode->fLeftChild;
234 fLeftChild = replTree->cloneTree();
235 fLeftChild->fParent = this;
236 delete setRefNode;
237 } else {
238 fLeftChild->flattenSets();
239 }
240 }
241
242 if (fRightChild != NULL) {
243 if (fRightChild->fType==setRef) {
244 RBBINode *setRefNode = fRightChild;
245 RBBINode *usetNode = setRefNode->fLeftChild;
246 RBBINode *replTree = usetNode->fLeftChild;
247 fRightChild = replTree->cloneTree();
248 fRightChild->fParent = this;
249 delete setRefNode;
250 } else {
251 fRightChild->flattenSets();
252 }
253 }
254}
255
256
257
258//-------------------------------------------------------------------------
259//
260// findNodes() Locate all the nodes of the specified type, starting
261// at the specified root.
262//
263//-------------------------------------------------------------------------
264void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) {
265 /* test for buffer overflows */
266 if (U_FAILURE(status)) {
267 return;
268 }
269 if (fType == kind) {
270 dest->addElement(this, status);
271 }
272 if (fLeftChild != NULL) {
273 fLeftChild->findNodes(dest, kind, status);
274 }
275 if (fRightChild != NULL) {
276 fRightChild->findNodes(dest, kind, status);
277 }
278}
279
280
281//-------------------------------------------------------------------------
282//
283// print. Print out a single node, for debugging.
284//
285//-------------------------------------------------------------------------
b75a7d8f 286#ifdef RBBI_DEBUG
2ca993e8
A
287
288static int32_t serial(const RBBINode *node) {
289 return (node == NULL? -1 : node->fSerialNum);
290}
291
292
f3c0d7a5 293void RBBINode::printNode(const RBBINode *node) {
b75a7d8f
A
294 static const char * const nodeTypeNames[] = {
295 "setRef",
296 "uset",
297 "varRef",
298 "leafChar",
299 "lookAhead",
300 "tag",
301 "endMark",
302 "opStart",
303 "opCat",
304 "opOr",
305 "opStar",
306 "opPlus",
307 "opQuestion",
308 "opBreak",
309 "opReverse",
310 "opLParen"
311 };
312
f3c0d7a5
A
313 if (node==NULL) {
314 RBBIDebugPrintf("%10p", (void *)node);
374ca955 315 } else {
2ca993e8 316 RBBIDebugPrintf("%10p %5d %12s %c%c %5d %5d %5d %6d %d ",
f3c0d7a5
A
317 (void *)node, node->fSerialNum, nodeTypeNames[node->fType],
318 node->fRuleRoot?'R':' ', node->fChainIn?'C':' ',
319 serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent),
320 node->fFirstPos, node->fVal);
321 if (node->fType == varRef) {
322 RBBI_DEBUG_printUnicodeString(node->fText);
374ca955 323 }
b75a7d8f
A
324 }
325 RBBIDebugPrintf("\n");
b75a7d8f 326}
374ca955 327#endif
b75a7d8f
A
328
329
330#ifdef RBBI_DEBUG
f3c0d7a5
A
331U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) {
332 RBBIDebugPrintf("%*s", minWidth, CStr(s)());
b75a7d8f
A
333}
334#endif
335
336
337//-------------------------------------------------------------------------
338//
339// print. Print out the tree of nodes rooted at "this"
340//
341//-------------------------------------------------------------------------
342#ifdef RBBI_DEBUG
2ca993e8
A
343void RBBINode::printNodeHeader() {
344 RBBIDebugPrintf(" Address serial type LeftChild RightChild Parent position value\n");
345}
346
f3c0d7a5 347void RBBINode::printTree(const RBBINode *node, UBool printHeading) {
b75a7d8f 348 if (printHeading) {
2ca993e8 349 printNodeHeader();
b75a7d8f 350 }
f3c0d7a5
A
351 printNode(node);
352 if (node != NULL) {
374ca955
A
353 // Only dump the definition under a variable reference if asked to.
354 // Unconditinally dump children of all other node types.
f3c0d7a5
A
355 if (node->fType != varRef) {
356 if (node->fLeftChild != NULL) {
357 printTree(node->fLeftChild, FALSE);
374ca955
A
358 }
359
f3c0d7a5
A
360 if (node->fRightChild != NULL) {
361 printTree(node->fRightChild, FALSE);
374ca955 362 }
b75a7d8f
A
363 }
364 }
365}
366#endif
367
368
369
370U_NAMESPACE_END
371
372#endif /* #if !UCONFIG_NO_BREAK_ITERATION */