]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/intltest/rbbimonkeytest.h
ICU-59117.0.1.tar.gz
[apple/icu.git] / icuSources / test / intltest / rbbimonkeytest.h
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*************************************************************************
4 * Copyright (c) 2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *************************************************************************
7 */
8 #ifndef RBBIMONKEYTEST_H
9 #define RBBIMONKEYTEST_H
10
11 #include "unicode/utypes.h"
12
13 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FORMATTING
14
15 #include "intltest.h"
16
17 #include "unicode/rbbi.h"
18 #include "unicode/regex.h"
19 #include "unicode/uniset.h"
20 #include "unicode/unistr.h"
21 #include "unicode/uobject.h"
22
23 #include "simplethread.h"
24 #include "ucbuf.h"
25 #include "uhash.h"
26 #include "uvector.h"
27
28 //
29 // TODO:
30 // Develop a tailoring format.
31 // Hook to old tests that use monkey impl to get expected data.
32 // Remove old tests.
33
34 class BreakRules; // Forward declaration
35 class RBBIMonkeyImpl;
36
37 /**
38 * Test the RuleBasedBreakIterator class giving different rules
39 */
40 class RBBIMonkeyTest: public IntlTest {
41 public:
42 RBBIMonkeyTest();
43 virtual ~RBBIMonkeyTest();
44
45 void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = NULL );
46 void testMonkey();
47
48
49 private:
50 const char *fParams; // Copy of user parameters passed in from IntlTest.
51
52
53 void testRules(const char *ruleFile);
54 static UBool getIntParam(UnicodeString name, UnicodeString &params, int64_t &val, UErrorCode &status);
55 static UBool getStringParam(UnicodeString name, UnicodeString &params, CharString &dest, UErrorCode &status);
56 static UBool getBoolParam(UnicodeString name, UnicodeString &params, UBool &dest, UErrorCode &status);
57
58 };
59
60 // The following classes are internal to the RBBI Monkey Test implementation.
61
62
63
64 // class CharClass Represents a single character class from the source break rules.
65 // Inherits from UObject because instances are adopted by UHashtable, which ultimately
66 // deletes them using hash's object deleter function.
67
68 class CharClass: public UObject {
69 public:
70 UnicodeString fName;
71 UnicodeString fOriginalDef; // set definition as it appeared in user supplied rules.
72 UnicodeString fExpandedDef; // set definition with any embedded named sets replaced by their defs, recursively.
73 LocalPointer<const UnicodeSet> fSet;
74 CharClass(const UnicodeString &name, const UnicodeString &originalDef, const UnicodeString &expandedDef, const UnicodeSet *set) :
75 fName(name), fOriginalDef(originalDef), fExpandedDef(expandedDef), fSet(set) {}
76 };
77
78
79 // class BreakRule represents a single rule from a set of break rules.
80 // Each rule has the set definitions expanded, and
81 // is compiled to a regular expression.
82
83 class BreakRule: public UObject {
84 public:
85 BreakRule();
86 ~BreakRule();
87 UnicodeString fName; // Name of the rule.
88 UnicodeString fRule; // Rule expression, excluding the name, as written in user source.
89 UnicodeString fExpandedRule; // Rule expression after expanding the set definitions.
90 LocalPointer<RegexMatcher> fRuleMatcher; // Regular expression that matches the rule.
91 };
92
93
94 // class BreakRules represents a complete set of break rules, possibly tailored,
95 // compiled from testdata break rules.
96
97 class BreakRules: public UObject {
98 public:
99 BreakRules(RBBIMonkeyImpl *monkeyImpl, UErrorCode &status);
100 ~BreakRules();
101
102 void compileRules(UCHARBUF *rules, UErrorCode &status);
103
104 const CharClass *getClassForChar(UChar32 c, int32_t *iter=NULL) const;
105
106
107 RBBIMonkeyImpl *fMonkeyImpl; // Pointer back to the owning MonkeyImpl instance.
108 icu::UVector fBreakRules; // Contents are of type (BreakRule *).
109
110 LocalUHashtablePointer fCharClasses; // Key is set name (UnicodeString).
111 // Value is (CharClass *)
112 LocalPointer<UVector> fCharClassList; // Char Classes, same contents as fCharClasses values,
113 // but in a vector so they can be accessed by index.
114 UnicodeSet fDictionarySet; // Dictionary set, empty if none is defined.
115 Locale fLocale;
116 UBreakIteratorType fType;
117
118 CharClass *addCharClass(const UnicodeString &name, const UnicodeString &def, UErrorCode &status);
119 void addRule(const UnicodeString &name, const UnicodeString &def, UErrorCode &status);
120 bool setKeywordParameter(const UnicodeString &keyword, const UnicodeString &value, UErrorCode &status);
121 RuleBasedBreakIterator *createICUBreakIterator(UErrorCode &status);
122
123 LocalPointer<RegexMatcher> fSetRefsMatcher;
124 LocalPointer<RegexMatcher> fCommentsMatcher;
125 LocalPointer<RegexMatcher> fClassDefMatcher;
126 LocalPointer<RegexMatcher> fRuleDefMatcher;
127 };
128
129
130 // class MonkeyTestData represents a randomly synthesized test data string together
131 // with the expected break positions obtained by applying
132 // the test break rules.
133
134 class MonkeyTestData: public UObject {
135 public:
136 MonkeyTestData() {};
137 ~MonkeyTestData() {};
138 void set(BreakRules *rules, IntlTest::icu_rand &rand, UErrorCode &status);
139 void clearActualBreaks();
140 void dump(int32_t around = -1) const;
141
142 uint32_t fRandomSeed; // The initial seed value from the random number genererator.
143 const BreakRules *fBkRules; // The break rules used to generate this data.
144 UnicodeString fString; // The text.
145 UnicodeString fExpectedBreaks; // Breaks as found by the reference rules.
146 // Parallel to fString. Non-zero if break preceding.
147 UnicodeString fActualBreaks; // Breaks as found by ICU break iterator.
148 UnicodeString fRuleForPosition; // Index into BreakRules.fBreakRules of rule that applied at each position.
149 // Also parallel to fString.
150 UnicodeString f2ndRuleForPos; // As above. A 2nd rule applies when the preceding rule
151 // didn't cause a break, and a subsequent rule match starts
152 // on the last code point of the preceding match.
153
154 };
155
156
157
158
159 // class RBBIMonkeyImpl holds (some indirectly) everything associated with running a monkey
160 // test for one set of break rules.
161 //
162 // When running RBBIMonkeyTest with multiple threads, there is a 1:1 correspondence
163 // between instances of RBBIMonkeyImpl and threads.
164 //
165 class RBBIMonkeyImpl: public UObject {
166 public:
167 RBBIMonkeyImpl(UErrorCode &status);
168 ~RBBIMonkeyImpl();
169
170 void setup(const char *ruleFileName, UErrorCode &status);
171
172 void startTest();
173 void runTest();
174 void join();
175
176 LocalUCHARBUFPointer fRuleCharBuffer; // source file contents of the reference rules.
177 LocalPointer<BreakRules> fRuleSet;
178 LocalPointer<RuleBasedBreakIterator> fBI;
179 LocalPointer<MonkeyTestData> fTestData;
180 IntlTest::icu_rand fRandomGenerator;
181 const char *fRuleFileName;
182 UBool fVerbose; // True to do long dump of failing data.
183 int32_t fLoopCount;
184
185 UBool fDumpExpansions; // Debug flag to output epananded form of rules and sets.
186
187 enum CheckDirection {
188 FORWARD = 1,
189 REVERSE = 2
190 };
191 void clearActualBreaks();
192 void testForwards(UErrorCode &status);
193 void testPrevious(UErrorCode &status);
194 void testFollowing(UErrorCode &status);
195 void testPreceding(UErrorCode &status);
196 void testIsBoundary(UErrorCode &status);
197 void checkResults(const char *msg, CheckDirection dir, UErrorCode &status);
198
199 class RBBIMonkeyThread: public SimpleThread {
200 private:
201 RBBIMonkeyImpl *fMonkeyImpl;
202 public:
203 RBBIMonkeyThread(RBBIMonkeyImpl *impl) : fMonkeyImpl(impl) {};
204 void run() U_OVERRIDE { fMonkeyImpl->runTest(); };
205 };
206 private:
207 void openBreakRules(const char *fileName, UErrorCode &status);
208 RBBIMonkeyThread fThread;
209
210 };
211
212 #endif /* !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FORMATTING */
213
214 #endif // RBBIMONKEYTEST_H