1 /*************************************************************************
2 * Copyright (c) 2016, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 *************************************************************************
6 #ifndef RBBIMONKEYTEST_H
7 #define RBBIMONKEYTEST_H
9 #include "unicode/utypes.h"
11 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FORMATTING
15 #include "unicode/rbbi.h"
16 #include "unicode/regex.h"
17 #include "unicode/uniset.h"
18 #include "unicode/unistr.h"
19 #include "unicode/uobject.h"
21 #include "simplethread.h"
28 // Develop a tailoring format.
29 // Hook to old tests that use monkey impl to get expected data.
32 class BreakRules
; // Forward declaration
36 * Test the RuleBasedBreakIterator class giving different rules
38 class RBBIMonkeyTest
: public IntlTest
{
41 virtual ~RBBIMonkeyTest();
43 void runIndexedTest( int32_t index
, UBool exec
, const char* &name
, char* par
= NULL
);
48 const char *fParams
; // Copy of user parameters passed in from IntlTest.
51 void testRules(const char *ruleFile
);
52 static UBool
getIntParam(UnicodeString name
, UnicodeString
¶ms
, int64_t &val
, UErrorCode
&status
);
53 static UBool
getStringParam(UnicodeString name
, UnicodeString
¶ms
, CharString
&dest
, UErrorCode
&status
);
54 static UBool
getBoolParam(UnicodeString name
, UnicodeString
¶ms
, UBool
&dest
, UErrorCode
&status
);
58 // The following classes are internal to the RBBI Monkey Test implementation.
62 // class CharClass Represents a single character class from the source break rules.
63 // Inherits from UObject because instances are adopted by UHashtable, which ultimately
64 // deletes them using hash's object deleter function.
66 class CharClass
: public UObject
{
69 UnicodeString fOriginalDef
; // set definition as it appeared in user supplied rules.
70 UnicodeString fExpandedDef
; // set definition with any embedded named sets replaced by their defs, recursively.
71 LocalPointer
<const UnicodeSet
> fSet
;
72 CharClass(const UnicodeString
&name
, const UnicodeString
&originalDef
, const UnicodeString
&expandedDef
, const UnicodeSet
*set
) :
73 fName(name
), fOriginalDef(originalDef
), fExpandedDef(expandedDef
), fSet(set
) {}
77 // class BreakRule represents a single rule from a set of break rules.
78 // Each rule has the set definitions expanded, and
79 // is compiled to a regular expression.
81 class BreakRule
: public UObject
{
85 UnicodeString fName
; // Name of the rule.
86 UnicodeString fRule
; // Rule expression, excluding the name, as written in user source.
87 UnicodeString fExpandedRule
; // Rule expression after expanding the set definitions.
88 LocalPointer
<RegexMatcher
> fRuleMatcher
; // Regular expression that matches the rule.
92 // class BreakRules represents a complete set of break rules, possibly tailored,
93 // compiled from testdata break rules.
95 class BreakRules
: public UObject
{
97 BreakRules(RBBIMonkeyImpl
*monkeyImpl
, UErrorCode
&status
);
100 void compileRules(UCHARBUF
*rules
, UErrorCode
&status
);
102 const CharClass
*getClassForChar(UChar32 c
, int32_t *iter
=NULL
) const;
105 RBBIMonkeyImpl
*fMonkeyImpl
; // Pointer back to the owning MonkeyImpl instance.
106 icu::UVector fBreakRules
; // Contents are of type (BreakRule *).
108 LocalUHashtablePointer fCharClasses
; // Key is set name (UnicodeString).
109 // Value is (CharClass *)
110 LocalPointer
<UVector
> fCharClassList
; // Char Classes, same contents as fCharClasses values,
111 // but in a vector so they can be accessed by index.
112 UnicodeSet fDictionarySet
; // Dictionary set, empty if none is defined.
114 UBreakIteratorType fType
;
116 CharClass
*addCharClass(const UnicodeString
&name
, const UnicodeString
&def
, UErrorCode
&status
);
117 void addRule(const UnicodeString
&name
, const UnicodeString
&def
, UErrorCode
&status
);
118 bool setKeywordParameter(const UnicodeString
&keyword
, const UnicodeString
&value
, UErrorCode
&status
);
119 RuleBasedBreakIterator
*createICUBreakIterator(UErrorCode
&status
);
121 LocalPointer
<RegexMatcher
> fSetRefsMatcher
;
122 LocalPointer
<RegexMatcher
> fCommentsMatcher
;
123 LocalPointer
<RegexMatcher
> fClassDefMatcher
;
124 LocalPointer
<RegexMatcher
> fRuleDefMatcher
;
128 // class MonkeyTestData represents a randomly synthesized test data string together
129 // with the expected break positions obtained by applying
130 // the test break rules.
132 class MonkeyTestData
: public UObject
{
135 ~MonkeyTestData() {};
136 void set(BreakRules
*rules
, IntlTest::icu_rand
&rand
, UErrorCode
&status
);
137 void clearActualBreaks();
138 void dump(int32_t around
= -1) const;
140 uint32_t fRandomSeed
; // The initial seed value from the random number genererator.
141 const BreakRules
*fBkRules
; // The break rules used to generate this data.
142 UnicodeString fString
; // The text.
143 UnicodeString fExpectedBreaks
; // Breaks as found by the reference rules.
144 // Parallel to fString. Non-zero if break preceding.
145 UnicodeString fActualBreaks
; // Breaks as found by ICU break iterator.
146 UnicodeString fRuleForPosition
; // Index into BreakRules.fBreakRules of rule that applied at each position.
147 // Also parallel to fString.
148 UnicodeString f2ndRuleForPos
; // As above. A 2nd rule applies when the preceding rule
149 // didn't cause a break, and a subsequent rule match starts
150 // on the last code point of the preceding match.
157 // class RBBIMonkeyImpl holds (some indirectly) everything associated with running a monkey
158 // test for one set of break rules.
160 // When running RBBIMonkeyTest with multiple threads, there is a 1:1 correspondence
161 // between instances of RBBIMonkeyImpl and threads.
163 class RBBIMonkeyImpl
: public UObject
{
165 RBBIMonkeyImpl(UErrorCode
&status
);
168 void setup(const char *ruleFileName
, UErrorCode
&status
);
174 LocalUCHARBUFPointer fRuleCharBuffer
; // source file contents of the reference rules.
175 LocalPointer
<BreakRules
> fRuleSet
;
176 LocalPointer
<RuleBasedBreakIterator
> fBI
;
177 LocalPointer
<MonkeyTestData
> fTestData
;
178 IntlTest::icu_rand fRandomGenerator
;
179 const char *fRuleFileName
;
180 UBool fVerbose
; // True to do long dump of failing data.
183 UBool fDumpExpansions
; // Debug flag to output epananded form of rules and sets.
185 enum CheckDirection
{
189 void clearActualBreaks();
190 void testForwards(UErrorCode
&status
);
191 void testPrevious(UErrorCode
&status
);
192 void testFollowing(UErrorCode
&status
);
193 void testPreceding(UErrorCode
&status
);
194 void testIsBoundary(UErrorCode
&status
);
195 void checkResults(const char *msg
, CheckDirection dir
, UErrorCode
&status
);
197 class RBBIMonkeyThread
: public SimpleThread
{
199 RBBIMonkeyImpl
*fMonkeyImpl
;
201 RBBIMonkeyThread(RBBIMonkeyImpl
*impl
) : fMonkeyImpl(impl
) {};
202 void run() U_OVERRIDE
{ fMonkeyImpl
->runTest(); };
205 void openBreakRules(const char *fileName
, UErrorCode
&status
);
206 RBBIMonkeyThread fThread
;
210 #endif /* !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FORMATTING */
212 #endif // RBBIMONKEYTEST_H