]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/intltest/rbbimonkeytest.h
ICU-62109.0.1.tar.gz
[apple/icu.git] / icuSources / test / intltest / rbbimonkeytest.h
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
2ca993e8
A
3/*************************************************************************
4 * Copyright (c) 2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *************************************************************************
7*/
8#ifndef RBBIMONKEYTEST_H
9#define RBBIMONKEYTEST_H
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FORMATTING
14
15#include "intltest.h"
16
17#include "unicode/rbbi.h"
18#include "unicode/regex.h"
19#include "unicode/uniset.h"
20#include "unicode/unistr.h"
21#include "unicode/uobject.h"
22
23#include "simplethread.h"
24#include "ucbuf.h"
25#include "uhash.h"
26#include "uvector.h"
27
0f5d89e8
A
28// RBBI Monkey Test. Run break iterators against randomly generated strings, compare results with
29// an independent reference implementation.
30//
31// The monkey test can be run with parameters, e.g.
32// intltest rbbi/RBBIMonkeyTest@loop=-1,rules=word.txt
33// will run word break testing in an infinite loop.
34// Summary of options
35// rules=name Test against the named reference rule file.
36// Files are found in source/test/testdata/break_rules
37// loop=nnn Loop nnn times. -1 for no limit. loop of 1 is useful for debugging.
38// seed=nnnn Random number generator seed. Allows recreation of a failure.
39// Error messages include the necessary seed value.
40// verbose Display details of a failure. Useful for debugging. Use with loop=1.
41// expansions Debug option, show expansions of rules and sets.
2ca993e8
A
42//
43// TODO:
44// Develop a tailoring format.
45// Hook to old tests that use monkey impl to get expected data.
46// Remove old tests.
47
48class BreakRules; // Forward declaration
49class RBBIMonkeyImpl;
50
51/**
52 * Test the RuleBasedBreakIterator class giving different rules
53 */
54class RBBIMonkeyTest: public IntlTest {
55 public:
56 RBBIMonkeyTest();
57 virtual ~RBBIMonkeyTest();
58
59 void runIndexedTest( int32_t index, UBool exec, const char* &name, char* par = NULL );
60 void testMonkey();
61
62
63 private:
64 const char *fParams; // Copy of user parameters passed in from IntlTest.
65
66
67 void testRules(const char *ruleFile);
68 static UBool getIntParam(UnicodeString name, UnicodeString &params, int64_t &val, UErrorCode &status);
69 static UBool getStringParam(UnicodeString name, UnicodeString &params, CharString &dest, UErrorCode &status);
70 static UBool getBoolParam(UnicodeString name, UnicodeString &params, UBool &dest, UErrorCode &status);
71
72};
73
74// The following classes are internal to the RBBI Monkey Test implementation.
75
76
77
78// class CharClass Represents a single character class from the source break rules.
79// Inherits from UObject because instances are adopted by UHashtable, which ultimately
80// deletes them using hash's object deleter function.
81
82class CharClass: public UObject {
83 public:
84 UnicodeString fName;
85 UnicodeString fOriginalDef; // set definition as it appeared in user supplied rules.
86 UnicodeString fExpandedDef; // set definition with any embedded named sets replaced by their defs, recursively.
87 LocalPointer<const UnicodeSet> fSet;
88 CharClass(const UnicodeString &name, const UnicodeString &originalDef, const UnicodeString &expandedDef, const UnicodeSet *set) :
89 fName(name), fOriginalDef(originalDef), fExpandedDef(expandedDef), fSet(set) {}
90};
91
92
93// class BreakRule represents a single rule from a set of break rules.
94// Each rule has the set definitions expanded, and
95// is compiled to a regular expression.
96
97class BreakRule: public UObject {
98 public:
99 BreakRule();
100 ~BreakRule();
101 UnicodeString fName; // Name of the rule.
102 UnicodeString fRule; // Rule expression, excluding the name, as written in user source.
103 UnicodeString fExpandedRule; // Rule expression after expanding the set definitions.
104 LocalPointer<RegexMatcher> fRuleMatcher; // Regular expression that matches the rule.
105};
106
107
108// class BreakRules represents a complete set of break rules, possibly tailored,
109// compiled from testdata break rules.
110
111class BreakRules: public UObject {
112 public:
113 BreakRules(RBBIMonkeyImpl *monkeyImpl, UErrorCode &status);
114 ~BreakRules();
115
116 void compileRules(UCHARBUF *rules, UErrorCode &status);
117
118 const CharClass *getClassForChar(UChar32 c, int32_t *iter=NULL) const;
119
120
121 RBBIMonkeyImpl *fMonkeyImpl; // Pointer back to the owning MonkeyImpl instance.
122 icu::UVector fBreakRules; // Contents are of type (BreakRule *).
123
124 LocalUHashtablePointer fCharClasses; // Key is set name (UnicodeString).
125 // Value is (CharClass *)
126 LocalPointer<UVector> fCharClassList; // Char Classes, same contents as fCharClasses values,
127 // but in a vector so they can be accessed by index.
128 UnicodeSet fDictionarySet; // Dictionary set, empty if none is defined.
129 Locale fLocale;
130 UBreakIteratorType fType;
131
132 CharClass *addCharClass(const UnicodeString &name, const UnicodeString &def, UErrorCode &status);
133 void addRule(const UnicodeString &name, const UnicodeString &def, UErrorCode &status);
134 bool setKeywordParameter(const UnicodeString &keyword, const UnicodeString &value, UErrorCode &status);
135 RuleBasedBreakIterator *createICUBreakIterator(UErrorCode &status);
136
137 LocalPointer<RegexMatcher> fSetRefsMatcher;
138 LocalPointer<RegexMatcher> fCommentsMatcher;
139 LocalPointer<RegexMatcher> fClassDefMatcher;
140 LocalPointer<RegexMatcher> fRuleDefMatcher;
141};
142
143
144// class MonkeyTestData represents a randomly synthesized test data string together
145// with the expected break positions obtained by applying
146// the test break rules.
147
148class MonkeyTestData: public UObject {
149 public:
150 MonkeyTestData() {};
151 ~MonkeyTestData() {};
152 void set(BreakRules *rules, IntlTest::icu_rand &rand, UErrorCode &status);
153 void clearActualBreaks();
154 void dump(int32_t around = -1) const;
155
156 uint32_t fRandomSeed; // The initial seed value from the random number genererator.
157 const BreakRules *fBkRules; // The break rules used to generate this data.
158 UnicodeString fString; // The text.
159 UnicodeString fExpectedBreaks; // Breaks as found by the reference rules.
160 // Parallel to fString. Non-zero if break preceding.
161 UnicodeString fActualBreaks; // Breaks as found by ICU break iterator.
162 UnicodeString fRuleForPosition; // Index into BreakRules.fBreakRules of rule that applied at each position.
163 // Also parallel to fString.
164 UnicodeString f2ndRuleForPos; // As above. A 2nd rule applies when the preceding rule
165 // didn't cause a break, and a subsequent rule match starts
166 // on the last code point of the preceding match.
167
168};
169
170
171
172
173// class RBBIMonkeyImpl holds (some indirectly) everything associated with running a monkey
174// test for one set of break rules.
175//
176// When running RBBIMonkeyTest with multiple threads, there is a 1:1 correspondence
177// between instances of RBBIMonkeyImpl and threads.
178//
179class RBBIMonkeyImpl: public UObject {
180 public:
181 RBBIMonkeyImpl(UErrorCode &status);
182 ~RBBIMonkeyImpl();
183
184 void setup(const char *ruleFileName, UErrorCode &status);
185
186 void startTest();
187 void runTest();
188 void join();
189
190 LocalUCHARBUFPointer fRuleCharBuffer; // source file contents of the reference rules.
191 LocalPointer<BreakRules> fRuleSet;
192 LocalPointer<RuleBasedBreakIterator> fBI;
193 LocalPointer<MonkeyTestData> fTestData;
194 IntlTest::icu_rand fRandomGenerator;
195 const char *fRuleFileName;
196 UBool fVerbose; // True to do long dump of failing data.
197 int32_t fLoopCount;
198
199 UBool fDumpExpansions; // Debug flag to output epananded form of rules and sets.
200
201 enum CheckDirection {
202 FORWARD = 1,
203 REVERSE = 2
204 };
205 void clearActualBreaks();
206 void testForwards(UErrorCode &status);
207 void testPrevious(UErrorCode &status);
208 void testFollowing(UErrorCode &status);
209 void testPreceding(UErrorCode &status);
210 void testIsBoundary(UErrorCode &status);
0f5d89e8 211 void testIsBoundaryRandom(UErrorCode &status);
2ca993e8
A
212 void checkResults(const char *msg, CheckDirection dir, UErrorCode &status);
213
214 class RBBIMonkeyThread: public SimpleThread {
215 private:
216 RBBIMonkeyImpl *fMonkeyImpl;
217 public:
218 RBBIMonkeyThread(RBBIMonkeyImpl *impl) : fMonkeyImpl(impl) {};
219 void run() U_OVERRIDE { fMonkeyImpl->runTest(); };
220 };
221 private:
222 void openBreakRules(const char *fileName, UErrorCode &status);
223 RBBIMonkeyThread fThread;
224
225};
226
227#endif /* !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FORMATTING */
228
229#endif // RBBIMONKEYTEST_H