1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*************************************************************************
4 * Copyright (c) 2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *************************************************************************
8 #ifndef RBBIMONKEYTEST_H
9 #define RBBIMONKEYTEST_H
11 #include "unicode/utypes.h"
13 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FORMATTING
17 #include "unicode/rbbi.h"
18 #include "unicode/regex.h"
19 #include "unicode/uniset.h"
20 #include "unicode/unistr.h"
21 #include "unicode/uobject.h"
23 #include "simplethread.h"
30 // Develop a tailoring format.
31 // Hook to old tests that use monkey impl to get expected data.
34 class BreakRules
; // Forward declaration
38 * Test the RuleBasedBreakIterator class giving different rules
40 class RBBIMonkeyTest
: public IntlTest
{
43 virtual ~RBBIMonkeyTest();
45 void runIndexedTest( int32_t index
, UBool exec
, const char* &name
, char* par
= NULL
);
50 const char *fParams
; // Copy of user parameters passed in from IntlTest.
53 void testRules(const char *ruleFile
);
54 static UBool
getIntParam(UnicodeString name
, UnicodeString
¶ms
, int64_t &val
, UErrorCode
&status
);
55 static UBool
getStringParam(UnicodeString name
, UnicodeString
¶ms
, CharString
&dest
, UErrorCode
&status
);
56 static UBool
getBoolParam(UnicodeString name
, UnicodeString
¶ms
, UBool
&dest
, UErrorCode
&status
);
60 // The following classes are internal to the RBBI Monkey Test implementation.
64 // class CharClass Represents a single character class from the source break rules.
65 // Inherits from UObject because instances are adopted by UHashtable, which ultimately
66 // deletes them using hash's object deleter function.
68 class CharClass
: public UObject
{
71 UnicodeString fOriginalDef
; // set definition as it appeared in user supplied rules.
72 UnicodeString fExpandedDef
; // set definition with any embedded named sets replaced by their defs, recursively.
73 LocalPointer
<const UnicodeSet
> fSet
;
74 CharClass(const UnicodeString
&name
, const UnicodeString
&originalDef
, const UnicodeString
&expandedDef
, const UnicodeSet
*set
) :
75 fName(name
), fOriginalDef(originalDef
), fExpandedDef(expandedDef
), fSet(set
) {}
79 // class BreakRule represents a single rule from a set of break rules.
80 // Each rule has the set definitions expanded, and
81 // is compiled to a regular expression.
83 class BreakRule
: public UObject
{
87 UnicodeString fName
; // Name of the rule.
88 UnicodeString fRule
; // Rule expression, excluding the name, as written in user source.
89 UnicodeString fExpandedRule
; // Rule expression after expanding the set definitions.
90 LocalPointer
<RegexMatcher
> fRuleMatcher
; // Regular expression that matches the rule.
94 // class BreakRules represents a complete set of break rules, possibly tailored,
95 // compiled from testdata break rules.
97 class BreakRules
: public UObject
{
99 BreakRules(RBBIMonkeyImpl
*monkeyImpl
, UErrorCode
&status
);
102 void compileRules(UCHARBUF
*rules
, UErrorCode
&status
);
104 const CharClass
*getClassForChar(UChar32 c
, int32_t *iter
=NULL
) const;
107 RBBIMonkeyImpl
*fMonkeyImpl
; // Pointer back to the owning MonkeyImpl instance.
108 icu::UVector fBreakRules
; // Contents are of type (BreakRule *).
110 LocalUHashtablePointer fCharClasses
; // Key is set name (UnicodeString).
111 // Value is (CharClass *)
112 LocalPointer
<UVector
> fCharClassList
; // Char Classes, same contents as fCharClasses values,
113 // but in a vector so they can be accessed by index.
114 UnicodeSet fDictionarySet
; // Dictionary set, empty if none is defined.
116 UBreakIteratorType fType
;
118 CharClass
*addCharClass(const UnicodeString
&name
, const UnicodeString
&def
, UErrorCode
&status
);
119 void addRule(const UnicodeString
&name
, const UnicodeString
&def
, UErrorCode
&status
);
120 bool setKeywordParameter(const UnicodeString
&keyword
, const UnicodeString
&value
, UErrorCode
&status
);
121 RuleBasedBreakIterator
*createICUBreakIterator(UErrorCode
&status
);
123 LocalPointer
<RegexMatcher
> fSetRefsMatcher
;
124 LocalPointer
<RegexMatcher
> fCommentsMatcher
;
125 LocalPointer
<RegexMatcher
> fClassDefMatcher
;
126 LocalPointer
<RegexMatcher
> fRuleDefMatcher
;
130 // class MonkeyTestData represents a randomly synthesized test data string together
131 // with the expected break positions obtained by applying
132 // the test break rules.
134 class MonkeyTestData
: public UObject
{
137 ~MonkeyTestData() {};
138 void set(BreakRules
*rules
, IntlTest::icu_rand
&rand
, UErrorCode
&status
);
139 void clearActualBreaks();
140 void dump(int32_t around
= -1) const;
142 uint32_t fRandomSeed
; // The initial seed value from the random number genererator.
143 const BreakRules
*fBkRules
; // The break rules used to generate this data.
144 UnicodeString fString
; // The text.
145 UnicodeString fExpectedBreaks
; // Breaks as found by the reference rules.
146 // Parallel to fString. Non-zero if break preceding.
147 UnicodeString fActualBreaks
; // Breaks as found by ICU break iterator.
148 UnicodeString fRuleForPosition
; // Index into BreakRules.fBreakRules of rule that applied at each position.
149 // Also parallel to fString.
150 UnicodeString f2ndRuleForPos
; // As above. A 2nd rule applies when the preceding rule
151 // didn't cause a break, and a subsequent rule match starts
152 // on the last code point of the preceding match.
159 // class RBBIMonkeyImpl holds (some indirectly) everything associated with running a monkey
160 // test for one set of break rules.
162 // When running RBBIMonkeyTest with multiple threads, there is a 1:1 correspondence
163 // between instances of RBBIMonkeyImpl and threads.
165 class RBBIMonkeyImpl
: public UObject
{
167 RBBIMonkeyImpl(UErrorCode
&status
);
170 void setup(const char *ruleFileName
, UErrorCode
&status
);
176 LocalUCHARBUFPointer fRuleCharBuffer
; // source file contents of the reference rules.
177 LocalPointer
<BreakRules
> fRuleSet
;
178 LocalPointer
<RuleBasedBreakIterator
> fBI
;
179 LocalPointer
<MonkeyTestData
> fTestData
;
180 IntlTest::icu_rand fRandomGenerator
;
181 const char *fRuleFileName
;
182 UBool fVerbose
; // True to do long dump of failing data.
185 UBool fDumpExpansions
; // Debug flag to output epananded form of rules and sets.
187 enum CheckDirection
{
191 void clearActualBreaks();
192 void testForwards(UErrorCode
&status
);
193 void testPrevious(UErrorCode
&status
);
194 void testFollowing(UErrorCode
&status
);
195 void testPreceding(UErrorCode
&status
);
196 void testIsBoundary(UErrorCode
&status
);
197 void checkResults(const char *msg
, CheckDirection dir
, UErrorCode
&status
);
199 class RBBIMonkeyThread
: public SimpleThread
{
201 RBBIMonkeyImpl
*fMonkeyImpl
;
203 RBBIMonkeyThread(RBBIMonkeyImpl
*impl
) : fMonkeyImpl(impl
) {};
204 void run() U_OVERRIDE
{ fMonkeyImpl
->runTest(); };
207 void openBreakRules(const char *fileName
, UErrorCode
&status
);
208 RBBIMonkeyThread fThread
;
212 #endif /* !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FORMATTING */
214 #endif // RBBIMONKEYTEST_H