6 **********************************************************************
7 * Copyright (c) 2002-2004, International Business Machines
8 * Corporation and others. All Rights Reserved.
9 **********************************************************************
12 #include "unicode/utypes.h"
14 #if !UCONFIG_NO_BREAK_ITERATION
16 #include "unicode/unistr.h"
26 RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder
*rb
, RBBINode
**rootNode
) :
29 fStatus
= fRB
->fStatus
;
30 UErrorCode status
= U_ZERO_ERROR
;
31 fDStates
= new UVector(status
);
32 if (U_FAILURE(*fStatus
)) {
35 if (U_FAILURE(status
)) {
39 if (fDStates
== NULL
) {
40 *fStatus
= U_MEMORY_ALLOCATION_ERROR
;;
46 RBBITableBuilder::~RBBITableBuilder() {
48 for (i
=0; i
<fDStates
->size(); i
++) {
49 delete (RBBIStateDescriptor
*)fDStates
->elementAt(i
);
55 //-----------------------------------------------------------------------------
57 // RBBITableBuilder::build - This is the main function for building the DFA state transtion
58 // table from the RBBI rules parse tree.
60 //-----------------------------------------------------------------------------
61 void RBBITableBuilder::build() {
63 if (U_FAILURE(*fStatus
)) {
67 // If there were no rules, just return. This situation can easily arise
68 // for the reverse rules.
74 // Walk through the tree, replacing any references to $variables with a copy of the
75 // parse tree for the substition expression.
77 fTree
= fTree
->flattenVariables();
78 if (fRB
->fDebugEnv
&& uprv_strstr(fRB
->fDebugEnv
, "ftree")) {
79 RBBIDebugPuts("Parse tree after flattening variable references.");
80 fTree
->printTree(TRUE
);
84 // Add a unique right-end marker to the expression.
85 // Appears as a cat-node, left child being the original tree,
86 // right child being the end marker.
88 RBBINode
*cn
= new RBBINode(RBBINode::opCat
);
89 cn
->fLeftChild
= fTree
;
91 cn
->fRightChild
= new RBBINode(RBBINode::endMark
);
92 cn
->fRightChild
->fParent
= cn
;
96 // Replace all references to UnicodeSets with the tree for the equivalent
100 if (fRB
->fDebugEnv
&& uprv_strstr(fRB
->fDebugEnv
, "stree")) {
101 RBBIDebugPuts("Parse tree after flattening Unicode Set references.");
102 fTree
->printTree(TRUE
);
107 // calculate the functions nullable, firstpos, lastpos and followpos on
108 // nodes in the parse tree.
109 // See the alogrithm description in Aho.
110 // Understanding how this works by looking at the code alone will be
111 // nearly impossible.
116 calcFollowPos(fTree
);
117 if (fRB
->fDebugEnv
&& uprv_strstr(fRB
->fDebugEnv
, "pos")) {
123 // For "chained" rules, modify the followPos sets
125 if (fRB
->fChainRules
) {
126 calcChainedFollowPos(fTree
);
130 // Build the DFA state transition tables.
133 flagAcceptingStates();
134 flagLookAheadStates();
138 // Update the global table of rule status {tag} values
139 // The rule builder has a global vector of status values that are common
140 // for all tables. Merge the ones from this table into the global set.
142 mergeRuleStatusVals();
144 if (fRB
->fDebugEnv
&& uprv_strstr(fRB
->fDebugEnv
, "states")) {printStates();};
149 //-----------------------------------------------------------------------------
151 // calcNullable. Impossible to explain succinctly. See Aho, section 3.9
153 //-----------------------------------------------------------------------------
154 void RBBITableBuilder::calcNullable(RBBINode
*n
) {
158 if (n
->fType
== RBBINode::setRef
||
159 n
->fType
== RBBINode::endMark
) {
160 // These are non-empty leaf node types.
161 n
->fNullable
= FALSE
;
165 if (n
->fType
== RBBINode::lookAhead
|| n
->fType
== RBBINode::tag
) {
166 // Lookahead marker node. It's a leaf, so no recursion on children.
167 // It's nullable because it does not match any literal text from the input stream.
173 // The node is not a leaf.
174 // Calculate nullable on its children.
175 calcNullable(n
->fLeftChild
);
176 calcNullable(n
->fRightChild
);
178 // Apply functions from table 3.40 in Aho
179 if (n
->fType
== RBBINode::opOr
) {
180 n
->fNullable
= n
->fLeftChild
->fNullable
|| n
->fRightChild
->fNullable
;
182 else if (n
->fType
== RBBINode::opCat
) {
183 n
->fNullable
= n
->fLeftChild
->fNullable
&& n
->fRightChild
->fNullable
;
185 else if (n
->fType
== RBBINode::opStar
|| n
->fType
== RBBINode::opQuestion
) {
189 n
->fNullable
= FALSE
;
196 //-----------------------------------------------------------------------------
198 // calcFirstPos. Impossible to explain succinctly. See Aho, section 3.9
200 //-----------------------------------------------------------------------------
201 void RBBITableBuilder::calcFirstPos(RBBINode
*n
) {
205 if (n
->fType
== RBBINode::leafChar
||
206 n
->fType
== RBBINode::endMark
||
207 n
->fType
== RBBINode::lookAhead
||
208 n
->fType
== RBBINode::tag
) {
209 // These are non-empty leaf node types.
210 n
->fFirstPosSet
->addElement(n
, *fStatus
);
214 // The node is not a leaf.
215 // Calculate firstPos on its children.
216 calcFirstPos(n
->fLeftChild
);
217 calcFirstPos(n
->fRightChild
);
219 // Apply functions from table 3.40 in Aho
220 if (n
->fType
== RBBINode::opOr
) {
221 setAdd(n
->fFirstPosSet
, n
->fLeftChild
->fFirstPosSet
);
222 setAdd(n
->fFirstPosSet
, n
->fRightChild
->fFirstPosSet
);
224 else if (n
->fType
== RBBINode::opCat
) {
225 setAdd(n
->fFirstPosSet
, n
->fLeftChild
->fFirstPosSet
);
226 if (n
->fLeftChild
->fNullable
) {
227 setAdd(n
->fFirstPosSet
, n
->fRightChild
->fFirstPosSet
);
230 else if (n
->fType
== RBBINode::opStar
||
231 n
->fType
== RBBINode::opQuestion
||
232 n
->fType
== RBBINode::opPlus
) {
233 setAdd(n
->fFirstPosSet
, n
->fLeftChild
->fFirstPosSet
);
239 //-----------------------------------------------------------------------------
241 // calcLastPos. Impossible to explain succinctly. See Aho, section 3.9
243 //-----------------------------------------------------------------------------
244 void RBBITableBuilder::calcLastPos(RBBINode
*n
) {
248 if (n
->fType
== RBBINode::leafChar
||
249 n
->fType
== RBBINode::endMark
||
250 n
->fType
== RBBINode::lookAhead
||
251 n
->fType
== RBBINode::tag
) {
252 // These are non-empty leaf node types.
253 n
->fLastPosSet
->addElement(n
, *fStatus
);
257 // The node is not a leaf.
258 // Calculate lastPos on its children.
259 calcLastPos(n
->fLeftChild
);
260 calcLastPos(n
->fRightChild
);
262 // Apply functions from table 3.40 in Aho
263 if (n
->fType
== RBBINode::opOr
) {
264 setAdd(n
->fLastPosSet
, n
->fLeftChild
->fLastPosSet
);
265 setAdd(n
->fLastPosSet
, n
->fRightChild
->fLastPosSet
);
267 else if (n
->fType
== RBBINode::opCat
) {
268 setAdd(n
->fLastPosSet
, n
->fRightChild
->fLastPosSet
);
269 if (n
->fRightChild
->fNullable
) {
270 setAdd(n
->fLastPosSet
, n
->fLeftChild
->fLastPosSet
);
273 else if (n
->fType
== RBBINode::opStar
||
274 n
->fType
== RBBINode::opQuestion
||
275 n
->fType
== RBBINode::opPlus
) {
276 setAdd(n
->fLastPosSet
, n
->fLeftChild
->fLastPosSet
);
282 //-----------------------------------------------------------------------------
284 // calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9
286 //-----------------------------------------------------------------------------
287 void RBBITableBuilder::calcFollowPos(RBBINode
*n
) {
289 n
->fType
== RBBINode::leafChar
||
290 n
->fType
== RBBINode::endMark
) {
294 calcFollowPos(n
->fLeftChild
);
295 calcFollowPos(n
->fRightChild
);
298 if (n
->fType
== RBBINode::opCat
) {
299 RBBINode
*i
; // is 'i' in Aho's description
302 UVector
*LastPosOfLeftChild
= n
->fLeftChild
->fLastPosSet
;
304 for (ix
=0; ix
<(uint32_t)LastPosOfLeftChild
->size(); ix
++) {
305 i
= (RBBINode
*)LastPosOfLeftChild
->elementAt(ix
);
306 setAdd(i
->fFollowPos
, n
->fRightChild
->fFirstPosSet
);
311 if (n
->fType
== RBBINode::opStar
||
312 n
->fType
== RBBINode::opPlus
) {
313 RBBINode
*i
; // again, n and i are the names from Aho's description.
316 for (ix
=0; ix
<(uint32_t)n
->fLastPosSet
->size(); ix
++) {
317 i
= (RBBINode
*)n
->fLastPosSet
->elementAt(ix
);
318 setAdd(i
->fFollowPos
, n
->fFirstPosSet
);
327 //-----------------------------------------------------------------------------
329 // calcChainedFollowPos. Modify the previously calculated followPos sets
330 // to implement rule chaining. NOT described by Aho
332 //-----------------------------------------------------------------------------
333 void RBBITableBuilder::calcChainedFollowPos(RBBINode
*tree
) {
335 UVector
endMarkerNodes(*fStatus
);
336 UVector
leafNodes(*fStatus
);
339 if (U_FAILURE(*fStatus
)) {
343 // get a list of all endmarker nodes.
344 tree
->findNodes(&endMarkerNodes
, RBBINode::endMark
, *fStatus
);
346 // get a list all leaf nodes
347 tree
->findNodes(&leafNodes
, RBBINode::leafChar
, *fStatus
);
348 if (U_FAILURE(*fStatus
)) {
352 // Get all nodes that can be the start a match, which is FirstPosition(root)
353 UVector
*matchStartNodes
= tree
->fFirstPosSet
;
356 // Iteratate over all leaf nodes,
361 for (endNodeIx
=0; endNodeIx
<leafNodes
.size(); endNodeIx
++) {
362 RBBINode
*tNode
= (RBBINode
*)leafNodes
.elementAt(endNodeIx
);
363 RBBINode
*endNode
= NULL
;
365 // Identify leaf nodes that correspond to overall rule match positions.
366 // These include an endMarkerNode in their followPos sets.
367 for (i
=0; i
<endMarkerNodes
.size(); i
++) {
368 if (tNode
->fFollowPos
->contains(endMarkerNodes
.elementAt(i
))) {
373 if (endNode
== NULL
) {
374 // node wasn't an end node. Try again with the next.
378 // We've got a node that can end a match.
380 // Line Break Specific hack: If this node's val correspond to the $CM char class,
381 // don't chain from it.
382 // TODO: Add rule syntax for this behavior, get specifics out of here and
383 // into the rule file.
384 if (fRB
->fLBCMNoChain
) {
385 UChar32 c
= this->fRB
->fSetBuilder
->getFirstChar(endNode
->fVal
);
387 ULineBreak cLBProp
= (ULineBreak
)u_getIntPropertyValue(c
, UCHAR_LINE_BREAK
);
388 if (cLBProp
== U_LB_COMBINING_MARK
) {
394 // Now iterate over the nodes that can start a match, looking for ones
395 // with the same char class as our ending node.
397 for (startNodeIx
= 0; startNodeIx
<matchStartNodes
->size(); startNodeIx
++) {
398 startNode
= (RBBINode
*)matchStartNodes
->elementAt(startNodeIx
);
399 if (startNode
->fType
!= RBBINode::leafChar
) {
403 if (endNode
->fVal
== startNode
->fVal
) {
404 // The end val (character class) of one possible match is the
405 // same as the start of another.
407 // Add all nodes from the followPos of the start node to the
408 // followPos set of the end node, which will have the effect of
409 // letting matches transition from a match state at endNode
410 // to the second char of a match starting with startNode.
411 setAdd(endNode
->fFollowPos
, startNode
->fFollowPos
);
418 //-----------------------------------------------------------------------------
420 // buildStateTable() Determine the set of runtime DFA states and the
421 // transition tables for these states, by the algorithm
422 // of fig. 3.44 in Aho.
424 // Most of the comments are quotes of Aho's psuedo-code.
426 //-----------------------------------------------------------------------------
427 void RBBITableBuilder::buildStateTable() {
428 if (U_FAILURE(*fStatus
)) {
432 // Add a dummy state 0 - the stop state. Not from Aho.
433 int lastInputSymbol
= fRB
->fSetBuilder
->getNumCharCategories() - 1;
434 RBBIStateDescriptor
*failState
= new RBBIStateDescriptor(lastInputSymbol
, fStatus
);
435 failState
->fPositions
= new UVector(*fStatus
);
436 if (U_FAILURE(*fStatus
)) {
439 fDStates
->addElement(failState
, *fStatus
);
440 if (U_FAILURE(*fStatus
)) {
444 // initially, the only unmarked state in Dstates is firstpos(root),
445 // where toot is the root of the syntax tree for (r)#;
446 RBBIStateDescriptor
*initialState
= new RBBIStateDescriptor(lastInputSymbol
, fStatus
);
447 if (U_FAILURE(*fStatus
)) {
450 initialState
->fPositions
= new UVector(*fStatus
);
451 if (U_FAILURE(*fStatus
)) {
454 setAdd(initialState
->fPositions
, fTree
->fFirstPosSet
);
455 fDStates
->addElement(initialState
, *fStatus
);
456 if (U_FAILURE(*fStatus
)) {
460 // while there is an unmarked state T in Dstates do begin
462 RBBIStateDescriptor
*T
= NULL
;
464 for (tx
=1; tx
<fDStates
->size(); tx
++) {
465 RBBIStateDescriptor
*temp
;
466 temp
= (RBBIStateDescriptor
*)fDStates
->elementAt(tx
);
467 if (temp
->fMarked
== FALSE
) {
479 // for each input symbol a do begin
481 for (a
= 1; a
<=lastInputSymbol
; a
++) {
482 // let U be the set of positions that are in followpos(p)
483 // for some position p in T
484 // such that the symbol at position p is a;
488 for (px
=0; px
<T
->fPositions
->size(); px
++) {
489 p
= (RBBINode
*)T
->fPositions
->elementAt(px
);
490 if ((p
->fType
== RBBINode::leafChar
) && (p
->fVal
== a
)) {
492 U
= new UVector(*fStatus
);
494 setAdd(U
, p
->fFollowPos
);
498 // if U is not empty and not in DStates then
500 UBool UinDstates
= FALSE
;
502 U_ASSERT(U
->size() > 0);
504 for (ix
=0; ix
<fDStates
->size(); ix
++) {
505 RBBIStateDescriptor
*temp2
;
506 temp2
= (RBBIStateDescriptor
*)fDStates
->elementAt(ix
);
507 if (setEquals(U
, temp2
->fPositions
)) {
509 U
= temp2
->fPositions
;
516 // Add U as an unmarked state to Dstates
519 RBBIStateDescriptor
*newState
= new RBBIStateDescriptor(lastInputSymbol
, fStatus
);
520 if (U_FAILURE(*fStatus
)) {
523 newState
->fPositions
= U
;
524 fDStates
->addElement(newState
, *fStatus
);
525 if (U_FAILURE(*fStatus
)) {
528 ux
= fDStates
->size()-1;
532 T
->fDtran
->setElementAt(ux
, a
);
540 //-----------------------------------------------------------------------------
542 // flagAcceptingStates Identify accepting states.
543 // First get a list of all of the end marker nodes.
544 // Then, for each state s,
545 // if s contains one of the end marker nodes in its list of tree positions then
546 // s is an accepting state.
548 //-----------------------------------------------------------------------------
549 void RBBITableBuilder::flagAcceptingStates() {
550 if (U_FAILURE(*fStatus
)) {
553 UVector
endMarkerNodes(*fStatus
);
558 if (U_FAILURE(*fStatus
)) {
562 fTree
->findNodes(&endMarkerNodes
, RBBINode::endMark
, *fStatus
);
563 if (U_FAILURE(*fStatus
)) {
567 for (i
=0; i
<endMarkerNodes
.size(); i
++) {
568 endMarker
= (RBBINode
*)endMarkerNodes
.elementAt(i
);
569 for (n
=0; n
<fDStates
->size(); n
++) {
570 RBBIStateDescriptor
*sd
= (RBBIStateDescriptor
*)fDStates
->elementAt(n
);
571 if (sd
->fPositions
->indexOf(endMarker
) >= 0) {
572 // Any non-zero value for fAccepting means this is an accepting node.
573 // The value is what will be returned to the user as the break status.
574 // If no other value was specified, force it to -1.
575 sd
->fAccepting
= endMarker
->fVal
;
576 if (sd
->fAccepting
== 0) {
580 // If the end marker node is from a look-ahead rule, set
581 // the fLookAhead field or this state also.
582 if (endMarker
->fLookAheadEnd
) {
583 sd
->fLookAhead
= sd
->fAccepting
;
591 //-----------------------------------------------------------------------------
593 // flagLookAheadStates Very similar to flagAcceptingStates, above.
595 //-----------------------------------------------------------------------------
596 void RBBITableBuilder::flagLookAheadStates() {
597 if (U_FAILURE(*fStatus
)) {
600 UVector
lookAheadNodes(*fStatus
);
601 RBBINode
*lookAheadNode
;
605 fTree
->findNodes(&lookAheadNodes
, RBBINode::lookAhead
, *fStatus
);
606 if (U_FAILURE(*fStatus
)) {
609 for (i
=0; i
<lookAheadNodes
.size(); i
++) {
610 lookAheadNode
= (RBBINode
*)lookAheadNodes
.elementAt(i
);
612 for (n
=0; n
<fDStates
->size(); n
++) {
613 RBBIStateDescriptor
*sd
= (RBBIStateDescriptor
*)fDStates
->elementAt(n
);
614 if (sd
->fPositions
->indexOf(lookAheadNode
) >= 0) {
615 sd
->fLookAhead
= lookAheadNode
->fVal
;
624 //-----------------------------------------------------------------------------
628 //-----------------------------------------------------------------------------
629 void RBBITableBuilder::flagTaggedStates() {
630 if (U_FAILURE(*fStatus
)) {
633 UVector
tagNodes(*fStatus
);
638 if (U_FAILURE(*fStatus
)) {
641 fTree
->findNodes(&tagNodes
, RBBINode::tag
, *fStatus
);
642 if (U_FAILURE(*fStatus
)) {
645 for (i
=0; i
<tagNodes
.size(); i
++) { // For each tag node t (all of 'em)
646 tagNode
= (RBBINode
*)tagNodes
.elementAt(i
);
648 for (n
=0; n
<fDStates
->size(); n
++) { // For each state s (row in the state table)
649 RBBIStateDescriptor
*sd
= (RBBIStateDescriptor
*)fDStates
->elementAt(n
);
650 if (sd
->fPositions
->indexOf(tagNode
) >= 0) { // if s include the tag node t
651 sortedAdd(&sd
->fTagVals
, tagNode
->fVal
);
660 //-----------------------------------------------------------------------------
662 // mergeRuleStatusVals
664 // Update the global table of rule status {tag} values
665 // The rule builder has a global vector of status values that are common
666 // for all tables. Merge the ones from this table into the global set.
668 //-----------------------------------------------------------------------------
669 void RBBITableBuilder::mergeRuleStatusVals() {
671 // The basic outline of what happens here is this...
673 // for each state in this state table
674 // if the status tag list for this state is in the global statuses list
676 // continue with the next state
678 // add the tag list for this state to the global list.
683 // Pre-set a single tag of {0} into the table.
684 // We will need this as a default, for rule sets with no explicit tagging.
685 if (fRB
->fRuleStatusVals
->size() == 0) {
686 fRB
->fRuleStatusVals
->addElement(1, *fStatus
); // Num of statuses in group
687 fRB
->fRuleStatusVals
->addElement((int32_t)0, *fStatus
); // and our single status of zero
691 for (n
=0; n
<fDStates
->size(); n
++) {
692 RBBIStateDescriptor
*sd
= (RBBIStateDescriptor
*)fDStates
->elementAt(n
);
693 UVector
*thisStatesTagValues
= sd
->fTagVals
;
694 if (thisStatesTagValues
== NULL
) {
695 // No tag values are explicitly associated with this state.
696 // Set the default tag value.
701 // There are tag(s) associated with this state.
702 // fTagsIdx will be the index into the global tag list for this state's tag values.
703 // Initial value of -1 flags that we haven't got it set yet.
705 int32_t thisTagGroupStart
= 0; // indexes into the global rule status vals list
706 int32_t nextTagGroupStart
= 0;
708 // Loop runs once per group of tags in the global list
709 while (nextTagGroupStart
< fRB
->fRuleStatusVals
->size()) {
710 thisTagGroupStart
= nextTagGroupStart
;
711 nextTagGroupStart
+= fRB
->fRuleStatusVals
->elementAti(thisTagGroupStart
) + 1;
712 if (thisStatesTagValues
->size() != fRB
->fRuleStatusVals
->elementAti(thisTagGroupStart
)) {
713 // The number of tags for this state is different from
714 // the number of tags in this group from the global list.
715 // Continue with the next group from the global list.
718 // The lengths match, go ahead and compare the actual tag values
719 // between this state and the group from the global list.
720 for (i
=0; i
<thisStatesTagValues
->size(); i
++) {
721 if (thisStatesTagValues
->elementAti(i
) !=
722 fRB
->fRuleStatusVals
->elementAti(thisTagGroupStart
+ 1 + i
) ) {
728 if (i
== thisStatesTagValues
->size()) {
729 // We found a set of tag values in the global list that match
730 // those for this state. Use them.
731 sd
->fTagsIdx
= thisTagGroupStart
;
736 if (sd
->fTagsIdx
== -1) {
737 // No suitable entry in the global tag list already. Add one
738 sd
->fTagsIdx
= fRB
->fRuleStatusVals
->size();
739 fRB
->fRuleStatusVals
->addElement(thisStatesTagValues
->size(), *fStatus
);
740 for (i
=0; i
<thisStatesTagValues
->size(); i
++) {
741 fRB
->fRuleStatusVals
->addElement(thisStatesTagValues
->elementAti(i
), *fStatus
);
753 //-----------------------------------------------------------------------------
755 // sortedAdd Add a value to a vector of sorted values (ints).
756 // Do not replicate entries; if the value is already there, do not
758 // Lazily create the vector if it does not already exist.
760 //-----------------------------------------------------------------------------
761 void RBBITableBuilder::sortedAdd(UVector
**vector
, int32_t val
) {
764 if (*vector
== NULL
) {
765 *vector
= new UVector(*fStatus
);
767 if (*vector
== NULL
|| U_FAILURE(*fStatus
)) {
770 UVector
*vec
= *vector
;
771 int32_t vSize
= vec
->size();
772 for (i
=0; i
<vSize
; i
++) {
773 int32_t valAtI
= vec
->elementAti(i
);
775 // The value is already in the vector. Don't add it again.
782 vec
->insertElementAt(val
, i
, *fStatus
);
787 //-----------------------------------------------------------------------------
789 // setAdd Set operation on UVector
790 // dest = dest union source
791 // Elements may only appear once. Order is unimportant.
793 //-----------------------------------------------------------------------------
794 void RBBITableBuilder::setAdd(UVector
*dest
, UVector
*source
) {
795 int destOriginalSize
= dest
->size();
796 int sourceSize
= source
->size();
799 for (si
=0; si
<sourceSize
&& U_SUCCESS(*fStatus
); si
++) {
800 void *elToAdd
= source
->elementAt(si
);
801 for (di
=0; di
<destOriginalSize
; di
++) {
802 if (dest
->elementAt(di
) == elToAdd
) {
803 goto elementAlreadyInDest
;
806 dest
->addElement(elToAdd
, *fStatus
);
807 elementAlreadyInDest
: ;
813 //-----------------------------------------------------------------------------
815 // setEqual Set operation on UVector.
816 // Compare for equality.
817 // Elements may appear only once.
818 // Elements may appear in any order.
820 //-----------------------------------------------------------------------------
821 UBool
RBBITableBuilder::setEquals(UVector
*a
, UVector
*b
) {
822 int32_t aSize
= a
->size();
823 int32_t bSize
= b
->size();
825 if (aSize
!= bSize
) {
835 for (ax
=0; ax
<aSize
; ax
++) {
836 aVal
= a
->elementAt(ax
);
837 for (bx
=firstBx
; bx
<bSize
; bx
++) {
838 bVal
= b
->elementAt(bx
);
854 //-----------------------------------------------------------------------------
856 // printPosSets Debug function. Dump Nullable, firstpos, lastpos and followpos
857 // for each node in the tree.
859 //-----------------------------------------------------------------------------
861 void RBBITableBuilder::printPosSets(RBBINode
*n
) {
866 RBBIDebugPrintf(" Nullable: %s\n", n
->fNullable
?"TRUE":"FALSE");
868 RBBIDebugPrintf(" firstpos: ");
869 printSet(n
->fFirstPosSet
);
871 RBBIDebugPrintf(" lastpos: ");
872 printSet(n
->fLastPosSet
);
874 RBBIDebugPrintf(" followpos: ");
875 printSet(n
->fFollowPos
);
877 printPosSets(n
->fLeftChild
);
878 printPosSets(n
->fRightChild
);
884 //-----------------------------------------------------------------------------
886 // getTableSize() Calculate the size of the runtime form of this
887 // state transition table.
889 //-----------------------------------------------------------------------------
890 int32_t RBBITableBuilder::getTableSize() const {
900 size
= sizeof(RBBIStateTable
) - 4; // The header, with no rows to the table.
902 numRows
= fDStates
->size();
903 numCols
= fRB
->fSetBuilder
->getNumCharCategories();
905 // Note The declaration of RBBIStateTableRow is for a table of two columns.
906 // Therefore we subtract two from numCols when determining
907 // how much storage to add to a row for the total columns.
908 rowSize
= sizeof(RBBIStateTableRow
) + sizeof(uint16_t)*(numCols
-2);
909 size
+= numRows
* rowSize
;
915 //-----------------------------------------------------------------------------
917 // exportTable() export the state transition table in the format required
918 // by the runtime engine. getTableSize() bytes of memory
919 // must be available at the output address "where".
921 //-----------------------------------------------------------------------------
922 void RBBITableBuilder::exportTable(void *where
) {
923 RBBIStateTable
*table
= (RBBIStateTable
*)where
;
927 if (U_FAILURE(*fStatus
) || fTree
== NULL
) {
931 if (fRB
->fSetBuilder
->getNumCharCategories() > 0x7fff ||
932 fDStates
->size() > 0x7fff) {
933 *fStatus
= U_BRK_INTERNAL_ERROR
;
937 table
->fRowLen
= sizeof(RBBIStateTableRow
) +
938 sizeof(uint16_t) * (fRB
->fSetBuilder
->getNumCharCategories() - 2);
939 table
->fNumStates
= fDStates
->size();
941 if (fRB
->fLookAheadHardBreak
) {
942 table
->fFlags
|= RBBI_LOOKAHEAD_HARD_BREAK
;
944 table
->fReserved
= 0;
946 for (state
=0; state
<table
->fNumStates
; state
++) {
947 RBBIStateDescriptor
*sd
= (RBBIStateDescriptor
*)fDStates
->elementAt(state
);
948 RBBIStateTableRow
*row
= (RBBIStateTableRow
*)(table
->fTableData
+ state
*table
->fRowLen
);
949 U_ASSERT (-32768 < sd
->fAccepting
&& sd
->fAccepting
<= 32767);
950 U_ASSERT (-32768 < sd
->fLookAhead
&& sd
->fLookAhead
<= 32767);
951 row
->fAccepting
= (int16_t)sd
->fAccepting
;
952 row
->fLookAhead
= (int16_t)sd
->fLookAhead
;
953 row
->fTagIdx
= (int16_t)sd
->fTagsIdx
;
954 for (col
=0; col
<fRB
->fSetBuilder
->getNumCharCategories(); col
++) {
955 row
->fNextState
[col
] = (uint16_t)sd
->fDtran
->elementAti(col
);
962 //-----------------------------------------------------------------------------
964 // printSet Debug function. Print the contents of a UVector
966 //-----------------------------------------------------------------------------
968 void RBBITableBuilder::printSet(UVector
*s
) {
970 for (i
=0; i
<s
->size(); i
++) {
971 void *v
= s
->elementAt(i
);
972 RBBIDebugPrintf("%10p", v
);
974 RBBIDebugPrintf("\n");
979 //-----------------------------------------------------------------------------
981 // printStates Debug Function. Dump the fully constructed state transition table.
983 //-----------------------------------------------------------------------------
985 void RBBITableBuilder::printStates() {
986 int c
; // input "character"
987 int n
; // state number
989 RBBIDebugPrintf("state | i n p u t s y m b o l s \n");
990 RBBIDebugPrintf(" | Acc LA Tag");
991 for (c
=0; c
<fRB
->fSetBuilder
->getNumCharCategories(); c
++) {
992 RBBIDebugPrintf(" %2d", c
);
994 RBBIDebugPrintf("\n");
995 RBBIDebugPrintf(" |---------------");
996 for (c
=0; c
<fRB
->fSetBuilder
->getNumCharCategories(); c
++) {
997 RBBIDebugPrintf("---");
999 RBBIDebugPrintf("\n");
1001 for (n
=0; n
<fDStates
->size(); n
++) {
1002 RBBIStateDescriptor
*sd
= (RBBIStateDescriptor
*)fDStates
->elementAt(n
);
1003 RBBIDebugPrintf(" %3d | " , n
);
1004 RBBIDebugPrintf("%3d %3d %5d ", sd
->fAccepting
, sd
->fLookAhead
, sd
->fTagsIdx
);
1005 for (c
=0; c
<fRB
->fSetBuilder
->getNumCharCategories(); c
++) {
1006 RBBIDebugPrintf(" %2d", sd
->fDtran
->elementAti(c
));
1008 RBBIDebugPrintf("\n");
1010 RBBIDebugPrintf("\n\n");
1016 //-----------------------------------------------------------------------------
1018 // printRuleStatusTable Debug Function. Dump the common rule status table
1020 //-----------------------------------------------------------------------------
1022 void RBBITableBuilder::printRuleStatusTable() {
1023 int32_t thisRecord
= 0;
1024 int32_t nextRecord
= 0;
1026 UVector
*tbl
= fRB
->fRuleStatusVals
;
1028 RBBIDebugPrintf("index | tags \n");
1029 RBBIDebugPrintf("-------------------\n");
1031 while (nextRecord
< tbl
->size()) {
1032 thisRecord
= nextRecord
;
1033 nextRecord
= thisRecord
+ tbl
->elementAti(thisRecord
) + 1;
1034 RBBIDebugPrintf("%4d ", thisRecord
);
1035 for (i
=thisRecord
+1; i
<nextRecord
; i
++) {
1036 RBBIDebugPrintf(" %5d", tbl
->elementAti(i
));
1038 RBBIDebugPrintf("\n");
1040 RBBIDebugPrintf("\n\n");
1045 //-----------------------------------------------------------------------------
1047 // RBBIStateDescriptor Methods. This is a very struct-like class
1048 // Most access is directly to the fields.
1050 //-----------------------------------------------------------------------------
1052 RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol
, UErrorCode
*fStatus
) {
1061 fDtran
= new UVector(lastInputSymbol
+1, *fStatus
);
1062 if (U_FAILURE(*fStatus
)) {
1065 if (fDtran
== NULL
) {
1066 *fStatus
= U_MEMORY_ALLOCATION_ERROR
;
1069 fDtran
->setSize(lastInputSymbol
+1); // fDtran needs to be pre-sized.
1070 // It is indexed by input symbols, and will
1071 // hold the next state number for each
1076 RBBIStateDescriptor::~RBBIStateDescriptor() {
1087 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */