]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | ****************************************************************************** | |
3 | * Copyright (C) 2005, International Business Machines Corporation and * | |
4 | * others. All Rights Reserved. * | |
5 | ****************************************************************************** | |
6 | */ | |
7 | /* | |
8 | WBNF, Weighted BNF, is an extend BNF. The most difference between WBNF | |
9 | and standard BNF is the WBNF accepts weight for its alternation items. | |
10 | The weight specifies the opportunity it will be selected. | |
11 | ||
12 | The purpose of WBNF is to help generate a random string from a given grammar | |
13 | which can be described with standard BNF. The introduction of 'weight' | |
14 | is to guide the generator to give the specific parts different chances to be | |
15 | generated. | |
16 | ||
17 | Usually, the user gives LanguageGenerator the grammar description in WBNF, | |
18 | then LanguageGenerator will generate a random string on every next() call. | |
19 | The return code of parseBNF() can help user to determine the error, | |
20 | either in the grammar description or in the WBNF parser itself. | |
21 | ||
22 | ||
23 | The grammar of WBNF itself can be described in standard BNF, | |
24 | ||
25 | escaping = _single character with a leading back slash, either inside or outside quoting_ | |
26 | quoting = _quoted with a pair of single quotation marks_ | |
27 | string = string alphabet | string digit | string quoting | string escaping | | |
28 | alphabet | quoting | escaping | |
29 | alphabet = | |
30 | digit = | |
31 | integer = integer digit | digit | |
32 | weight = integer % | |
33 | weight-list = weight-list weight | weight | |
34 | var = var alphabet | var digit | $ alphabet | |
35 | ||
36 | var-defs = var-defs var-def | var-def | |
37 | var-def = var '=' definition; | |
38 | ||
39 | alternation = alternation '|' alt-item | alt-item | |
40 | alt-item = sequence | sequence weight | |
41 | ||
42 | sequence = sequence modified | modified | |
43 | ||
44 | modified = core | morph | quote | repeat | |
45 | morph = modified ~ | |
46 | quote = modified @ | |
47 | repeat = modified quantifier | modified quantifier weight-list | |
48 | quantifier = ? | * | + | { integer , integer} | {integer, } | {integer} | |
49 | ||
50 | core = var | string | '(' definition ')' | |
51 | ||
52 | definition = core | modified | sequence | alternation | |
53 | definition = alternation | |
54 | ||
55 | Remarks: | |
56 | o Following characters are literals in preceding definition | |
57 | but are syntax symbols in WBNF | |
58 | ||
59 | % $ ~ @ ? * + { } , | |
60 | ||
61 | o Following character are syntax symbols in preceding definition | |
62 | (sapce) contact operation, or separators to increase readability | |
63 | = definition | |
64 | | selection operation | |
65 | ( ) precedence select | |
66 | ' ' override special-character to plain character | |
67 | ||
68 | o the definition of 'escaping' and 'quoting' are preceding definition text | |
69 | o infinite is actually a predefine value PSEUDO_INFINIT defined in this file | |
70 | o if weight is not presented in "alt-item' and 'repeat', | |
71 | a default weight DEFAULT_WEIGHT defined in this file is used | |
72 | ||
73 | o * == {0, } | |
74 | + == {1, } | |
75 | ? == {0, 1} | |
76 | ||
77 | o the weight-list for repeat assigns the weights for repeat itmes one by one | |
78 | ||
79 | demo{1,3} 30% 40% 100% == (demo)30% | (demodemo)40% | (demodemodemo)100% | |
80 | ||
81 | To find more explain of the weight-list, please see the LIMITATION of the grammar | |
82 | ||
83 | o but the weight-list for question mark has different meaning | |
84 | ||
85 | demo ? 30% != demo{0,1} 30% 100% | |
86 | demo ? 30% == demo{0,1} 70% 30% | |
87 | ||
88 | the 70% is calculated from (DEFAULT_WEIGHT - weight) | |
89 | ||
90 | ||
91 | Known LIMITATION of the grammar | |
92 | For 'repeat', the parser will eat up as much as possible weights at one time, | |
93 | discard superfluous weights if it is too much, | |
94 | fill insufficient weights with default weight if it is too less. | |
95 | This behavior means following definitions are equal | |
96 | ||
97 | demo{1,3} 30% 40% 100% | |
98 | demo{1,3} 30% 40% 100% 50% | |
99 | demo{1,3} 30% 40% | |
100 | ||
101 | This behavior will cause a little confusion when defining an alternation | |
102 | ||
103 | demo{1,3} 30% 40% 100% 50% | show 20% | |
104 | ||
105 | is interpreted as | |
106 | ||
107 | (demo{1,3} 30% 40% 100%) 100% | show 20% | |
108 | ||
109 | not | |
110 | ||
111 | (demo{1,3} 30% 40% 100%) 50% | show 20% | |
112 | ||
113 | to get an expected definition, please use parentheses. | |
114 | ||
115 | Known LIMITATION of current implement | |
116 | Due to the well known point alias problem, current Parser will be effectively | |
117 | crashed if the definition looks like | |
118 | ||
119 | $a = demo; | |
120 | $b = $a; | |
121 | $c = $a; | |
122 | or | |
123 | $a = demo; | |
124 | $b = $a $a; | |
125 | or | |
126 | $a = demo; | |
127 | $b = $b $a; | |
128 | ||
129 | The crash will occur at delete operation in destructor or other memory release code. | |
130 | Several plans are on hard to fix the problem. Use a smart point with reference count, | |
131 | or use a central memory management solution. But now, it works well with collation | |
132 | monkey test, which is the only user for WBNF. | |
133 | */ | |
134 | ||
135 | #ifndef _WBNF | |
136 | #define _WBNF | |
137 | ||
138 | #include "unicode/utypes.h" | |
139 | ||
140 | const int DEFAULT_WEIGHT = 100; | |
141 | const int PSEUDO_INFINIT = 200; | |
142 | ||
143 | class LanguageGenerator_impl; | |
144 | ||
145 | class LanguageGenerator{ | |
146 | LanguageGenerator_impl * lang_gen; | |
147 | public: | |
148 | enum PARSE_RESULT {OK, BNF_DEF_WRONG, INCOMPLETE, NO_TOP_NODE}; | |
149 | LanguageGenerator(); | |
150 | ~LanguageGenerator(); | |
151 | PARSE_RESULT parseBNF(const char *const bnf_definition /*in*/, const char *const top_node/*in*/, UBool debug=FALSE); | |
152 | const char *next(); /* Return a null-terminated c-string. The buffer is owned by callee. */ | |
153 | }; | |
154 | ||
155 | void TestWbnf(void); | |
156 | ||
157 | #endif /* _WBNF */ |