]>
Commit | Line | Data |
---|---|---|
2ca993e8 A |
1 | /************************************************************************* |
2 | * Copyright (c) 2016, International Business Machines | |
3 | * Corporation and others. All Rights Reserved. | |
4 | ************************************************************************* | |
5 | */ | |
6 | #ifndef RBBIMONKEYTEST_H | |
7 | #define RBBIMONKEYTEST_H | |
8 | ||
9 | #include "unicode/utypes.h" | |
10 | ||
11 | #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FORMATTING | |
12 | ||
13 | #include "intltest.h" | |
14 | ||
15 | #include "unicode/rbbi.h" | |
16 | #include "unicode/regex.h" | |
17 | #include "unicode/uniset.h" | |
18 | #include "unicode/unistr.h" | |
19 | #include "unicode/uobject.h" | |
20 | ||
21 | #include "simplethread.h" | |
22 | #include "ucbuf.h" | |
23 | #include "uhash.h" | |
24 | #include "uvector.h" | |
25 | ||
26 | // | |
27 | // TODO: | |
28 | // Develop a tailoring format. | |
29 | // Hook to old tests that use monkey impl to get expected data. | |
30 | // Remove old tests. | |
31 | ||
32 | class BreakRules; // Forward declaration | |
33 | class RBBIMonkeyImpl; | |
34 | ||
35 | /** | |
36 | * Test the RuleBasedBreakIterator class giving different rules | |
37 | */ | |
38 | class RBBIMonkeyTest: public IntlTest { | |
39 | public: | |
40 | RBBIMonkeyTest(); | |
41 | virtual ~RBBIMonkeyTest(); | |
42 | ||
43 | void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = NULL ); | |
44 | void testMonkey(); | |
45 | ||
46 | ||
47 | private: | |
48 | const char *fParams; // Copy of user parameters passed in from IntlTest. | |
49 | ||
50 | ||
51 | void testRules(const char *ruleFile); | |
52 | static UBool getIntParam(UnicodeString name, UnicodeString ¶ms, int64_t &val, UErrorCode &status); | |
53 | static UBool getStringParam(UnicodeString name, UnicodeString ¶ms, CharString &dest, UErrorCode &status); | |
54 | static UBool getBoolParam(UnicodeString name, UnicodeString ¶ms, UBool &dest, UErrorCode &status); | |
55 | ||
56 | }; | |
57 | ||
58 | // The following classes are internal to the RBBI Monkey Test implementation. | |
59 | ||
60 | ||
61 | ||
62 | // class CharClass Represents a single character class from the source break rules. | |
63 | // Inherits from UObject because instances are adopted by UHashtable, which ultimately | |
64 | // deletes them using hash's object deleter function. | |
65 | ||
66 | class CharClass: public UObject { | |
67 | public: | |
68 | UnicodeString fName; | |
69 | UnicodeString fOriginalDef; // set definition as it appeared in user supplied rules. | |
70 | UnicodeString fExpandedDef; // set definition with any embedded named sets replaced by their defs, recursively. | |
71 | LocalPointer<const UnicodeSet> fSet; | |
72 | CharClass(const UnicodeString &name, const UnicodeString &originalDef, const UnicodeString &expandedDef, const UnicodeSet *set) : | |
73 | fName(name), fOriginalDef(originalDef), fExpandedDef(expandedDef), fSet(set) {} | |
74 | }; | |
75 | ||
76 | ||
77 | // class BreakRule represents a single rule from a set of break rules. | |
78 | // Each rule has the set definitions expanded, and | |
79 | // is compiled to a regular expression. | |
80 | ||
81 | class BreakRule: public UObject { | |
82 | public: | |
83 | BreakRule(); | |
84 | ~BreakRule(); | |
85 | UnicodeString fName; // Name of the rule. | |
86 | UnicodeString fRule; // Rule expression, excluding the name, as written in user source. | |
87 | UnicodeString fExpandedRule; // Rule expression after expanding the set definitions. | |
88 | LocalPointer<RegexMatcher> fRuleMatcher; // Regular expression that matches the rule. | |
89 | }; | |
90 | ||
91 | ||
92 | // class BreakRules represents a complete set of break rules, possibly tailored, | |
93 | // compiled from testdata break rules. | |
94 | ||
95 | class BreakRules: public UObject { | |
96 | public: | |
97 | BreakRules(RBBIMonkeyImpl *monkeyImpl, UErrorCode &status); | |
98 | ~BreakRules(); | |
99 | ||
100 | void compileRules(UCHARBUF *rules, UErrorCode &status); | |
101 | ||
102 | const CharClass *getClassForChar(UChar32 c, int32_t *iter=NULL) const; | |
103 | ||
104 | ||
105 | RBBIMonkeyImpl *fMonkeyImpl; // Pointer back to the owning MonkeyImpl instance. | |
106 | icu::UVector fBreakRules; // Contents are of type (BreakRule *). | |
107 | ||
108 | LocalUHashtablePointer fCharClasses; // Key is set name (UnicodeString). | |
109 | // Value is (CharClass *) | |
110 | LocalPointer<UVector> fCharClassList; // Char Classes, same contents as fCharClasses values, | |
111 | // but in a vector so they can be accessed by index. | |
112 | UnicodeSet fDictionarySet; // Dictionary set, empty if none is defined. | |
113 | Locale fLocale; | |
114 | UBreakIteratorType fType; | |
115 | ||
116 | CharClass *addCharClass(const UnicodeString &name, const UnicodeString &def, UErrorCode &status); | |
117 | void addRule(const UnicodeString &name, const UnicodeString &def, UErrorCode &status); | |
118 | bool setKeywordParameter(const UnicodeString &keyword, const UnicodeString &value, UErrorCode &status); | |
119 | RuleBasedBreakIterator *createICUBreakIterator(UErrorCode &status); | |
120 | ||
121 | LocalPointer<RegexMatcher> fSetRefsMatcher; | |
122 | LocalPointer<RegexMatcher> fCommentsMatcher; | |
123 | LocalPointer<RegexMatcher> fClassDefMatcher; | |
124 | LocalPointer<RegexMatcher> fRuleDefMatcher; | |
125 | }; | |
126 | ||
127 | ||
128 | // class MonkeyTestData represents a randomly synthesized test data string together | |
129 | // with the expected break positions obtained by applying | |
130 | // the test break rules. | |
131 | ||
132 | class MonkeyTestData: public UObject { | |
133 | public: | |
134 | MonkeyTestData() {}; | |
135 | ~MonkeyTestData() {}; | |
136 | void set(BreakRules *rules, IntlTest::icu_rand &rand, UErrorCode &status); | |
137 | void clearActualBreaks(); | |
138 | void dump(int32_t around = -1) const; | |
139 | ||
140 | uint32_t fRandomSeed; // The initial seed value from the random number genererator. | |
141 | const BreakRules *fBkRules; // The break rules used to generate this data. | |
142 | UnicodeString fString; // The text. | |
143 | UnicodeString fExpectedBreaks; // Breaks as found by the reference rules. | |
144 | // Parallel to fString. Non-zero if break preceding. | |
145 | UnicodeString fActualBreaks; // Breaks as found by ICU break iterator. | |
146 | UnicodeString fRuleForPosition; // Index into BreakRules.fBreakRules of rule that applied at each position. | |
147 | // Also parallel to fString. | |
148 | UnicodeString f2ndRuleForPos; // As above. A 2nd rule applies when the preceding rule | |
149 | // didn't cause a break, and a subsequent rule match starts | |
150 | // on the last code point of the preceding match. | |
151 | ||
152 | }; | |
153 | ||
154 | ||
155 | ||
156 | ||
157 | // class RBBIMonkeyImpl holds (some indirectly) everything associated with running a monkey | |
158 | // test for one set of break rules. | |
159 | // | |
160 | // When running RBBIMonkeyTest with multiple threads, there is a 1:1 correspondence | |
161 | // between instances of RBBIMonkeyImpl and threads. | |
162 | // | |
163 | class RBBIMonkeyImpl: public UObject { | |
164 | public: | |
165 | RBBIMonkeyImpl(UErrorCode &status); | |
166 | ~RBBIMonkeyImpl(); | |
167 | ||
168 | void setup(const char *ruleFileName, UErrorCode &status); | |
169 | ||
170 | void startTest(); | |
171 | void runTest(); | |
172 | void join(); | |
173 | ||
174 | LocalUCHARBUFPointer fRuleCharBuffer; // source file contents of the reference rules. | |
175 | LocalPointer<BreakRules> fRuleSet; | |
176 | LocalPointer<RuleBasedBreakIterator> fBI; | |
177 | LocalPointer<MonkeyTestData> fTestData; | |
178 | IntlTest::icu_rand fRandomGenerator; | |
179 | const char *fRuleFileName; | |
180 | UBool fVerbose; // True to do long dump of failing data. | |
181 | int32_t fLoopCount; | |
182 | ||
183 | UBool fDumpExpansions; // Debug flag to output epananded form of rules and sets. | |
184 | ||
185 | enum CheckDirection { | |
186 | FORWARD = 1, | |
187 | REVERSE = 2 | |
188 | }; | |
189 | void clearActualBreaks(); | |
190 | void testForwards(UErrorCode &status); | |
191 | void testPrevious(UErrorCode &status); | |
192 | void testFollowing(UErrorCode &status); | |
193 | void testPreceding(UErrorCode &status); | |
194 | void testIsBoundary(UErrorCode &status); | |
195 | void checkResults(const char *msg, CheckDirection dir, UErrorCode &status); | |
196 | ||
197 | class RBBIMonkeyThread: public SimpleThread { | |
198 | private: | |
199 | RBBIMonkeyImpl *fMonkeyImpl; | |
200 | public: | |
201 | RBBIMonkeyThread(RBBIMonkeyImpl *impl) : fMonkeyImpl(impl) {}; | |
202 | void run() U_OVERRIDE { fMonkeyImpl->runTest(); }; | |
203 | }; | |
204 | private: | |
205 | void openBreakRules(const char *fileName, UErrorCode &status); | |
206 | RBBIMonkeyThread fThread; | |
207 | ||
208 | }; | |
209 | ||
210 | #endif /* !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FORMATTING */ | |
211 | ||
212 | #endif // RBBIMONKEYTEST_H |