]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/intltest/rbbimonkeytest.h
ICU-57131.0.1.tar.gz
[apple/icu.git] / icuSources / test / intltest / rbbimonkeytest.h
CommitLineData
2ca993e8
A
1/*************************************************************************
2 * Copyright (c) 2016, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 *************************************************************************
5*/
6#ifndef RBBIMONKEYTEST_H
7#define RBBIMONKEYTEST_H
8
9#include "unicode/utypes.h"
10
11#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FORMATTING
12
13#include "intltest.h"
14
15#include "unicode/rbbi.h"
16#include "unicode/regex.h"
17#include "unicode/uniset.h"
18#include "unicode/unistr.h"
19#include "unicode/uobject.h"
20
21#include "simplethread.h"
22#include "ucbuf.h"
23#include "uhash.h"
24#include "uvector.h"
25
26//
27// TODO:
28// Develop a tailoring format.
29// Hook to old tests that use monkey impl to get expected data.
30// Remove old tests.
31
32class BreakRules; // Forward declaration
33class RBBIMonkeyImpl;
34
35/**
36 * Test the RuleBasedBreakIterator class giving different rules
37 */
38class RBBIMonkeyTest: public IntlTest {
39 public:
40 RBBIMonkeyTest();
41 virtual ~RBBIMonkeyTest();
42
43 void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = NULL );
44 void testMonkey();
45
46
47 private:
48 const char *fParams; // Copy of user parameters passed in from IntlTest.
49
50
51 void testRules(const char *ruleFile);
52 static UBool getIntParam(UnicodeString name, UnicodeString &params, int64_t &val, UErrorCode &status);
53 static UBool getStringParam(UnicodeString name, UnicodeString &params, CharString &dest, UErrorCode &status);
54 static UBool getBoolParam(UnicodeString name, UnicodeString &params, UBool &dest, UErrorCode &status);
55
56};
57
58// The following classes are internal to the RBBI Monkey Test implementation.
59
60
61
62// class CharClass Represents a single character class from the source break rules.
63// Inherits from UObject because instances are adopted by UHashtable, which ultimately
64// deletes them using hash's object deleter function.
65
66class CharClass: public UObject {
67 public:
68 UnicodeString fName;
69 UnicodeString fOriginalDef; // set definition as it appeared in user supplied rules.
70 UnicodeString fExpandedDef; // set definition with any embedded named sets replaced by their defs, recursively.
71 LocalPointer<const UnicodeSet> fSet;
72 CharClass(const UnicodeString &name, const UnicodeString &originalDef, const UnicodeString &expandedDef, const UnicodeSet *set) :
73 fName(name), fOriginalDef(originalDef), fExpandedDef(expandedDef), fSet(set) {}
74};
75
76
77// class BreakRule represents a single rule from a set of break rules.
78// Each rule has the set definitions expanded, and
79// is compiled to a regular expression.
80
81class BreakRule: public UObject {
82 public:
83 BreakRule();
84 ~BreakRule();
85 UnicodeString fName; // Name of the rule.
86 UnicodeString fRule; // Rule expression, excluding the name, as written in user source.
87 UnicodeString fExpandedRule; // Rule expression after expanding the set definitions.
88 LocalPointer<RegexMatcher> fRuleMatcher; // Regular expression that matches the rule.
89};
90
91
92// class BreakRules represents a complete set of break rules, possibly tailored,
93// compiled from testdata break rules.
94
95class BreakRules: public UObject {
96 public:
97 BreakRules(RBBIMonkeyImpl *monkeyImpl, UErrorCode &status);
98 ~BreakRules();
99
100 void compileRules(UCHARBUF *rules, UErrorCode &status);
101
102 const CharClass *getClassForChar(UChar32 c, int32_t *iter=NULL) const;
103
104
105 RBBIMonkeyImpl *fMonkeyImpl; // Pointer back to the owning MonkeyImpl instance.
106 icu::UVector fBreakRules; // Contents are of type (BreakRule *).
107
108 LocalUHashtablePointer fCharClasses; // Key is set name (UnicodeString).
109 // Value is (CharClass *)
110 LocalPointer<UVector> fCharClassList; // Char Classes, same contents as fCharClasses values,
111 // but in a vector so they can be accessed by index.
112 UnicodeSet fDictionarySet; // Dictionary set, empty if none is defined.
113 Locale fLocale;
114 UBreakIteratorType fType;
115
116 CharClass *addCharClass(const UnicodeString &name, const UnicodeString &def, UErrorCode &status);
117 void addRule(const UnicodeString &name, const UnicodeString &def, UErrorCode &status);
118 bool setKeywordParameter(const UnicodeString &keyword, const UnicodeString &value, UErrorCode &status);
119 RuleBasedBreakIterator *createICUBreakIterator(UErrorCode &status);
120
121 LocalPointer<RegexMatcher> fSetRefsMatcher;
122 LocalPointer<RegexMatcher> fCommentsMatcher;
123 LocalPointer<RegexMatcher> fClassDefMatcher;
124 LocalPointer<RegexMatcher> fRuleDefMatcher;
125};
126
127
128// class MonkeyTestData represents a randomly synthesized test data string together
129// with the expected break positions obtained by applying
130// the test break rules.
131
132class MonkeyTestData: public UObject {
133 public:
134 MonkeyTestData() {};
135 ~MonkeyTestData() {};
136 void set(BreakRules *rules, IntlTest::icu_rand &rand, UErrorCode &status);
137 void clearActualBreaks();
138 void dump(int32_t around = -1) const;
139
140 uint32_t fRandomSeed; // The initial seed value from the random number genererator.
141 const BreakRules *fBkRules; // The break rules used to generate this data.
142 UnicodeString fString; // The text.
143 UnicodeString fExpectedBreaks; // Breaks as found by the reference rules.
144 // Parallel to fString. Non-zero if break preceding.
145 UnicodeString fActualBreaks; // Breaks as found by ICU break iterator.
146 UnicodeString fRuleForPosition; // Index into BreakRules.fBreakRules of rule that applied at each position.
147 // Also parallel to fString.
148 UnicodeString f2ndRuleForPos; // As above. A 2nd rule applies when the preceding rule
149 // didn't cause a break, and a subsequent rule match starts
150 // on the last code point of the preceding match.
151
152};
153
154
155
156
157// class RBBIMonkeyImpl holds (some indirectly) everything associated with running a monkey
158// test for one set of break rules.
159//
160// When running RBBIMonkeyTest with multiple threads, there is a 1:1 correspondence
161// between instances of RBBIMonkeyImpl and threads.
162//
163class RBBIMonkeyImpl: public UObject {
164 public:
165 RBBIMonkeyImpl(UErrorCode &status);
166 ~RBBIMonkeyImpl();
167
168 void setup(const char *ruleFileName, UErrorCode &status);
169
170 void startTest();
171 void runTest();
172 void join();
173
174 LocalUCHARBUFPointer fRuleCharBuffer; // source file contents of the reference rules.
175 LocalPointer<BreakRules> fRuleSet;
176 LocalPointer<RuleBasedBreakIterator> fBI;
177 LocalPointer<MonkeyTestData> fTestData;
178 IntlTest::icu_rand fRandomGenerator;
179 const char *fRuleFileName;
180 UBool fVerbose; // True to do long dump of failing data.
181 int32_t fLoopCount;
182
183 UBool fDumpExpansions; // Debug flag to output epananded form of rules and sets.
184
185 enum CheckDirection {
186 FORWARD = 1,
187 REVERSE = 2
188 };
189 void clearActualBreaks();
190 void testForwards(UErrorCode &status);
191 void testPrevious(UErrorCode &status);
192 void testFollowing(UErrorCode &status);
193 void testPreceding(UErrorCode &status);
194 void testIsBoundary(UErrorCode &status);
195 void checkResults(const char *msg, CheckDirection dir, UErrorCode &status);
196
197 class RBBIMonkeyThread: public SimpleThread {
198 private:
199 RBBIMonkeyImpl *fMonkeyImpl;
200 public:
201 RBBIMonkeyThread(RBBIMonkeyImpl *impl) : fMonkeyImpl(impl) {};
202 void run() U_OVERRIDE { fMonkeyImpl->runTest(); };
203 };
204 private:
205 void openBreakRules(const char *fileName, UErrorCode &status);
206 RBBIMonkeyThread fThread;
207
208};
209
210#endif /* !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FORMATTING */
211
212#endif // RBBIMONKEYTEST_H