]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
2ca993e8 A |
3 | /******************************************************************** |
4 | * Copyright (c) 2016, International Business Machines Corporation and | |
5 | * others. All Rights Reserved. | |
6 | ********************************************************************/ | |
7 | ||
8 | ||
9 | #include "unicode/utypes.h" | |
10 | ||
11 | #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FORMATTING | |
12 | ||
13 | #include "rbbimonkeytest.h" | |
14 | #include "unicode/utypes.h" | |
15 | #include "unicode/brkiter.h" | |
16 | #include "unicode/utf16.h" | |
17 | #include "unicode/uniset.h" | |
18 | #include "unicode/unistr.h" | |
19 | ||
20 | #include "charstr.h" | |
21 | #include "cmemory.h" | |
22 | #include "cstr.h" | |
23 | #include "uelement.h" | |
24 | #include "uhash.h" | |
25 | #include "cstring.h" | |
26 | ||
f3c0d7a5 A |
27 | #include <iostream> |
28 | #include <stdio.h> | |
29 | #include <stdlib.h> | |
30 | #include <string> | |
2ca993e8 A |
31 | |
32 | using namespace icu; | |
33 | ||
34 | ||
35 | void RBBIMonkeyTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* params) { | |
36 | fParams = params; // Work around TESTCASE_AUTO not being able to pass params to test function. | |
37 | ||
38 | TESTCASE_AUTO_BEGIN; | |
39 | TESTCASE_AUTO(testMonkey); | |
40 | TESTCASE_AUTO_END; | |
41 | } | |
42 | ||
43 | //--------------------------------------------------------------------------------------- | |
44 | // | |
45 | // class BreakRule implementation. | |
46 | // | |
47 | //--------------------------------------------------------------------------------------- | |
48 | ||
49 | BreakRule::BreakRule() // : all field default initialized. | |
50 | { | |
51 | } | |
52 | ||
53 | BreakRule::~BreakRule() {} | |
54 | ||
55 | ||
56 | //--------------------------------------------------------------------------------------- | |
57 | // | |
58 | // class BreakRules implementation. | |
59 | // | |
60 | //--------------------------------------------------------------------------------------- | |
61 | BreakRules::BreakRules(RBBIMonkeyImpl *monkeyImpl, UErrorCode &status) : | |
62 | fMonkeyImpl(monkeyImpl), fBreakRules(status), fType(UBRK_COUNT) { | |
63 | fCharClasses.adoptInstead(uhash_open(uhash_hashUnicodeString, | |
64 | uhash_compareUnicodeString, | |
65 | NULL, // value comparator. | |
66 | &status)); | |
67 | if (U_FAILURE(status)) { | |
68 | return; | |
69 | } | |
70 | uhash_setKeyDeleter(fCharClasses.getAlias(), uprv_deleteUObject); | |
71 | uhash_setValueDeleter(fCharClasses.getAlias(), uprv_deleteUObject); | |
72 | fBreakRules.setDeleter(uprv_deleteUObject); | |
73 | ||
74 | fCharClassList.adoptInstead(new UVector(status)); | |
75 | ||
76 | fSetRefsMatcher.adoptInstead(new RegexMatcher(UnicodeString( | |
77 | "(?!(?:\\{|=|\\[:)[ \\t]{0,4})" // Negative lookbehind for '{' or '=' or '[:' | |
78 | // (the identifier is a unicode property name or value) | |
79 | "(?<ClassName>[A-Za-z_][A-Za-z0-9_]*)"), // The char class name | |
80 | 0, status)); | |
81 | ||
82 | // Match comments and blank lines. Matches will be replaced with "", stripping the comments from the rules. | |
83 | fCommentsMatcher.adoptInstead(new RegexMatcher(UnicodeString( | |
84 | "(^|(?<=;))" // Start either at start of line, or just after a ';' (look-behind for ';') | |
85 | "[ \\t]*+" // Match white space. | |
86 | "(#.*)?+" // Optional # plus whatever follows | |
87 | "\\R$" // new-line at end of line. | |
88 | ), 0, status)); | |
89 | ||
90 | // Match (initial parse) of a character class defintion line. | |
91 | fClassDefMatcher.adoptInstead(new RegexMatcher(UnicodeString( | |
92 | "[ \\t]*" // leading white space | |
93 | "(?<ClassName>[A-Za-z_][A-Za-z0-9_]*)" // The char class name | |
94 | "[ \\t]*=[ \\t]*" // = | |
95 | "(?<ClassDef>.*?)" // The char class UnicodeSet expression | |
96 | "[ \\t]*;$"), // ; <end of line> | |
97 | 0, status)); | |
98 | ||
99 | // Match (initial parse) of a break rule line. | |
100 | fRuleDefMatcher.adoptInstead(new RegexMatcher(UnicodeString( | |
101 | "[ \\t]*" // leading white space | |
102 | "(?<RuleName>[A-Za-z_][A-Za-z0-9_.]*)" // The rule name | |
103 | "[ \\t]*:[ \\t]*" // : | |
104 | "(?<RuleDef>.*?)" // The rule definition | |
105 | "[ \\t]*;$"), // ; <end of line> | |
106 | 0, status)); | |
107 | ||
108 | } | |
109 | ||
110 | ||
111 | BreakRules::~BreakRules() {} | |
112 | ||
113 | ||
114 | CharClass *BreakRules::addCharClass(const UnicodeString &name, const UnicodeString &definition, UErrorCode &status) { | |
115 | ||
116 | // Create the expanded definition for this char class, | |
117 | // replacing any set references with the corresponding definition. | |
118 | ||
119 | UnicodeString expandedDef; | |
120 | UnicodeString emptyString; | |
121 | fSetRefsMatcher->reset(definition); | |
122 | while (fSetRefsMatcher->find() && U_SUCCESS(status)) { | |
123 | const UnicodeString name = | |
124 | fSetRefsMatcher->group(fSetRefsMatcher->pattern().groupNumberFromName("ClassName", status), status); | |
125 | CharClass *nameClass = static_cast<CharClass *>(uhash_get(fCharClasses.getAlias(), &name)); | |
126 | const UnicodeString &expansionForName = nameClass ? nameClass->fExpandedDef : name; | |
127 | ||
128 | fSetRefsMatcher->appendReplacement(expandedDef, emptyString, status); | |
129 | expandedDef.append(expansionForName); | |
130 | } | |
131 | fSetRefsMatcher->appendTail(expandedDef); | |
132 | ||
133 | // Verify that the expanded set defintion is valid. | |
134 | ||
135 | if (fMonkeyImpl->fDumpExpansions) { | |
136 | printf("epandedDef: %s\n", CStr(expandedDef)()); | |
137 | } | |
138 | ||
139 | UnicodeSet *s = new UnicodeSet(expandedDef, USET_IGNORE_SPACE, NULL, status); | |
140 | if (U_FAILURE(status)) { | |
141 | IntlTest::gTest->errln("%s:%d: error %s creating UnicodeSet %s", __FILE__, __LINE__, | |
142 | u_errorName(status), CStr(name)()); | |
143 | return NULL; | |
144 | } | |
145 | CharClass *cclass = new CharClass(name, definition, expandedDef, s); | |
146 | CharClass *previousClass = static_cast<CharClass *>(uhash_put(fCharClasses.getAlias(), | |
147 | new UnicodeString(name), // Key, owned by hash table. | |
148 | cclass, // Value, owned by hash table. | |
149 | &status)); | |
150 | ||
151 | if (previousClass != NULL) { | |
152 | // Duplicate class def. | |
153 | // These are legitimate, they are adustments of an existing class. | |
154 | // TODO: will need to keep the old around when we handle tailorings. | |
155 | IntlTest::gTest->logln("Redefinition of character class %s\n", CStr(cclass->fName)()); | |
156 | delete previousClass; | |
157 | } | |
158 | return cclass; | |
159 | } | |
160 | ||
161 | ||
162 | void BreakRules::addRule(const UnicodeString &name, const UnicodeString &definition, UErrorCode &status) { | |
163 | LocalPointer<BreakRule> thisRule(new BreakRule); | |
164 | thisRule->fName = name; | |
165 | thisRule->fRule = definition; | |
166 | ||
167 | // If the rule name contains embedded digits, pad the first numeric field to a fixed length with leading zeroes, | |
168 | // This gives a numeric sort order that matches Unicode UAX rule numbering conventions. | |
169 | UnicodeString emptyString; | |
170 | ||
171 | // Expand the char class definitions within the rule. | |
172 | fSetRefsMatcher->reset(definition); | |
173 | while (fSetRefsMatcher->find() && U_SUCCESS(status)) { | |
174 | const UnicodeString name = | |
175 | fSetRefsMatcher->group(fSetRefsMatcher->pattern().groupNumberFromName("ClassName", status), status); | |
176 | CharClass *nameClass = static_cast<CharClass *>(uhash_get(fCharClasses.getAlias(), &name)); | |
177 | if (!nameClass) { | |
178 | IntlTest::gTest->errln("%s:%d char class \"%s\" unrecognized in rule \"%s\"", | |
179 | __FILE__, __LINE__, CStr(name)(), CStr(definition)()); | |
180 | } | |
181 | const UnicodeString &expansionForName = nameClass ? nameClass->fExpandedDef : name; | |
182 | ||
183 | fSetRefsMatcher->appendReplacement(thisRule->fExpandedRule, emptyString, status); | |
184 | thisRule->fExpandedRule.append(expansionForName); | |
185 | } | |
186 | fSetRefsMatcher->appendTail(thisRule->fExpandedRule); | |
187 | ||
188 | // Replace the divide sign (\u00f7) with a regular expression named capture. | |
189 | // When running the rules, a match that includes this group means we found a break position. | |
190 | ||
191 | int32_t dividePos = thisRule->fExpandedRule.indexOf((UChar)0x00f7); | |
192 | if (dividePos >= 0) { | |
193 | thisRule->fExpandedRule.replace(dividePos, 1, UnicodeString("(?<BreakPosition>)")); | |
194 | } | |
195 | if (thisRule->fExpandedRule.indexOf((UChar)0x00f7) != -1) { | |
196 | status = U_ILLEGAL_ARGUMENT_ERROR; // TODO: produce a good error message. | |
197 | } | |
198 | ||
199 | // UAX break rule set definitions can be empty, just []. | |
200 | // Regular expression set expressions don't accept this. Substitute with [^\u0000-\U0010ffff], which | |
201 | // also matches nothing. | |
202 | ||
203 | static const UChar emptySet[] = {(UChar)0x5b, (UChar)0x5d, 0}; | |
204 | int32_t where = 0; | |
205 | while ((where = thisRule->fExpandedRule.indexOf(emptySet, 2, 0)) >= 0) { | |
206 | thisRule->fExpandedRule.replace(where, 2, UnicodeString("[^\\u0000-\\U0010ffff]")); | |
207 | } | |
208 | if (fMonkeyImpl->fDumpExpansions) { | |
209 | printf("fExpandedRule: %s\n", CStr(thisRule->fExpandedRule)()); | |
210 | } | |
211 | ||
212 | // Compile a regular expression for this rule. | |
213 | thisRule->fRuleMatcher.adoptInstead(new RegexMatcher(thisRule->fExpandedRule, UREGEX_COMMENTS | UREGEX_DOTALL, status)); | |
214 | if (U_FAILURE(status)) { | |
215 | IntlTest::gTest->errln("%s:%d Error creating regular expression for %s", | |
216 | __FILE__, __LINE__, CStr(thisRule->fExpandedRule)()); | |
217 | return; | |
218 | } | |
219 | ||
220 | // Put this new rule into the vector of all Rules. | |
221 | fBreakRules.addElement(thisRule.orphan(), status); | |
222 | } | |
223 | ||
224 | ||
225 | bool BreakRules::setKeywordParameter(const UnicodeString &keyword, const UnicodeString &value, UErrorCode &status) { | |
226 | if (keyword == UnicodeString("locale")) { | |
227 | CharString localeName; | |
228 | localeName.append(CStr(value)(), -1, status); | |
229 | fLocale = Locale::createFromName(localeName.data()); | |
230 | return true; | |
231 | } | |
232 | if (keyword == UnicodeString("type")) { | |
233 | if (value == UnicodeString("grapheme")) { | |
234 | fType = UBRK_CHARACTER; | |
235 | } else if (value == UnicodeString("word")) { | |
236 | fType = UBRK_WORD; | |
237 | } else if (value == UnicodeString("line")) { | |
238 | fType = UBRK_LINE; | |
239 | } else if (value == UnicodeString("sentence")) { | |
240 | fType = UBRK_SENTENCE; | |
241 | } else { | |
242 | IntlTest::gTest->errln("%s:%d Unrecognized break type %s", __FILE__, __LINE__, CStr(value)()); | |
243 | } | |
244 | return true; | |
245 | } | |
246 | // TODO: add tailoring base setting here. | |
247 | return false; | |
248 | } | |
249 | ||
250 | RuleBasedBreakIterator *BreakRules::createICUBreakIterator(UErrorCode &status) { | |
251 | if (U_FAILURE(status)) { | |
252 | return NULL; | |
253 | } | |
254 | RuleBasedBreakIterator *bi = NULL; | |
255 | switch(fType) { | |
256 | case UBRK_CHARACTER: | |
257 | bi = dynamic_cast<RuleBasedBreakIterator *>(BreakIterator::createCharacterInstance(fLocale, status)); | |
258 | break; | |
259 | case UBRK_WORD: | |
260 | bi = dynamic_cast<RuleBasedBreakIterator *>(BreakIterator::createWordInstance(fLocale, status)); | |
261 | break; | |
262 | case UBRK_LINE: | |
263 | bi = dynamic_cast<RuleBasedBreakIterator *>(BreakIterator::createLineInstance(fLocale, status)); | |
264 | break; | |
265 | case UBRK_SENTENCE: | |
266 | bi = dynamic_cast<RuleBasedBreakIterator *>(BreakIterator::createSentenceInstance(fLocale, status)); | |
267 | break; | |
268 | default: | |
269 | IntlTest::gTest->errln("%s:%d Bad break iterator type of %d", __FILE__, __LINE__, fType); | |
270 | status = U_ILLEGAL_ARGUMENT_ERROR; | |
271 | } | |
272 | return bi; | |
273 | } | |
274 | ||
275 | ||
276 | void BreakRules::compileRules(UCHARBUF *rules, UErrorCode &status) { | |
277 | if (U_FAILURE(status)) { | |
278 | return; | |
279 | } | |
280 | ||
281 | UnicodeString emptyString; | |
282 | for (int32_t lineNumber=0; ;lineNumber++) { // Loop once per input line. | |
283 | if (U_FAILURE(status)) { | |
284 | return; | |
285 | } | |
286 | int32_t lineLength = 0; | |
287 | const UChar *lineBuf = ucbuf_readline(rules, &lineLength, &status); | |
288 | if (lineBuf == NULL) { | |
289 | break; | |
290 | } | |
291 | UnicodeString line(lineBuf, lineLength); | |
292 | ||
293 | // Strip comment lines. | |
294 | fCommentsMatcher->reset(line); | |
295 | line = fCommentsMatcher->replaceFirst(emptyString, status); | |
296 | if (line.isEmpty()) { | |
297 | continue; | |
298 | } | |
299 | ||
300 | // Recognize character class definition and keyword lines | |
301 | fClassDefMatcher->reset(line); | |
302 | if (fClassDefMatcher->matches(status)) { | |
303 | UnicodeString className = fClassDefMatcher->group(fClassDefMatcher->pattern().groupNumberFromName("ClassName", status), status); | |
304 | UnicodeString classDef = fClassDefMatcher->group(fClassDefMatcher->pattern().groupNumberFromName("ClassDef", status), status); | |
305 | if (fMonkeyImpl->fDumpExpansions) { | |
306 | printf("scanned class: %s = %s\n", CStr(className)(), CStr(classDef)()); | |
307 | } | |
308 | if (setKeywordParameter(className, classDef, status)) { | |
309 | // The scanned item was "type = ..." or "locale = ...", etc. | |
310 | // which are not actual character classes. | |
311 | continue; | |
312 | } | |
313 | addCharClass(className, classDef, status); | |
314 | continue; | |
315 | } | |
316 | ||
317 | // Recognize rule lines. | |
318 | fRuleDefMatcher->reset(line); | |
319 | if (fRuleDefMatcher->matches(status)) { | |
320 | UnicodeString ruleName = fRuleDefMatcher->group(fRuleDefMatcher->pattern().groupNumberFromName("RuleName", status), status); | |
321 | UnicodeString ruleDef = fRuleDefMatcher->group(fRuleDefMatcher->pattern().groupNumberFromName("RuleDef", status), status); | |
322 | if (fMonkeyImpl->fDumpExpansions) { | |
323 | printf("scanned rule: %s : %s\n", CStr(ruleName)(), CStr(ruleDef)()); | |
324 | } | |
325 | addRule(ruleName, ruleDef, status); | |
326 | continue; | |
327 | } | |
328 | ||
329 | IntlTest::gTest->errln("%s:%d: Unrecognized line in rule file %s: \"%s\"\n", | |
330 | __FILE__, __LINE__, fMonkeyImpl->fRuleFileName, CStr(line)()); | |
331 | } | |
332 | ||
333 | // Build the vector of char classes, omitting the dictionary class if there is one. | |
334 | // This will be used when constructing the random text to be tested. | |
335 | ||
336 | // Also compute the "other" set, consisting of any characters not included in | |
337 | // one or more of the user defined sets. | |
338 | ||
339 | UnicodeSet otherSet((UChar32)0, 0x10ffff); | |
340 | int32_t pos = UHASH_FIRST; | |
341 | const UHashElement *el = NULL; | |
342 | while ((el = uhash_nextElement(fCharClasses.getAlias(), &pos)) != NULL) { | |
343 | const UnicodeString *ccName = static_cast<const UnicodeString *>(el->key.pointer); | |
344 | CharClass *cclass = static_cast<CharClass *>(el->value.pointer); | |
345 | // printf(" Adding %s\n", CStr(*ccName)()); | |
346 | if (*ccName != cclass->fName) { | |
347 | IntlTest::gTest->errln("%s:%d: internal error, set names (%s, %s) inconsistent.\n", | |
348 | __FILE__, __LINE__, CStr(*ccName)(), CStr(cclass->fName)()); | |
349 | } | |
350 | const UnicodeSet *set = cclass->fSet.getAlias(); | |
351 | otherSet.removeAll(*set); | |
352 | if (*ccName == UnicodeString("dictionary")) { | |
353 | fDictionarySet = *set; | |
354 | } else { | |
355 | fCharClassList->addElement(cclass, status); | |
356 | } | |
357 | } | |
358 | ||
359 | if (!otherSet.isEmpty()) { | |
360 | // fprintf(stderr, "have an other set.\n"); | |
361 | UnicodeString pattern; | |
362 | CharClass *cclass = addCharClass(UnicodeString("__Others"), otherSet.toPattern(pattern), status); | |
363 | fCharClassList->addElement(cclass, status); | |
364 | } | |
365 | } | |
366 | ||
367 | ||
368 | const CharClass *BreakRules::getClassForChar(UChar32 c, int32_t *iter) const { | |
369 | int32_t localIter = 0; | |
370 | int32_t &it = iter? *iter : localIter; | |
371 | ||
372 | while (it < fCharClassList->size()) { | |
373 | const CharClass *cc = static_cast<const CharClass *>(fCharClassList->elementAt(it)); | |
374 | ++it; | |
375 | if (cc->fSet->contains(c)) { | |
376 | return cc; | |
377 | } | |
378 | } | |
379 | return NULL; | |
380 | } | |
381 | ||
382 | //--------------------------------------------------------------------------------------- | |
383 | // | |
384 | // class MonkeyTestData implementation. | |
385 | // | |
386 | //--------------------------------------------------------------------------------------- | |
387 | ||
388 | void MonkeyTestData::set(BreakRules *rules, IntlTest::icu_rand &rand, UErrorCode &status) { | |
389 | const int32_t dataLength = 1000; | |
390 | ||
391 | // Fill the test string with random characters. | |
392 | // First randomly pick a char class, then randomly pick a character from that class. | |
393 | // Exclude any characters from the dictionary set. | |
394 | ||
395 | // std::cout << "Populating Test Data" << std::endl; | |
396 | fRandomSeed = rand.getSeed(); // Save initial seed for use in error messages, | |
397 | // allowing recreation of failing data. | |
398 | fBkRules = rules; | |
399 | fString.remove(); | |
400 | for (int32_t n=0; n<dataLength;) { | |
401 | int charClassIndex = rand() % rules->fCharClassList->size(); | |
402 | const CharClass *cclass = static_cast<CharClass *>(rules->fCharClassList->elementAt(charClassIndex)); | |
403 | if (cclass->fSet->size() == 0) { | |
404 | // Some rules or tailorings do end up with empty char classes. | |
405 | continue; | |
406 | } | |
407 | int32_t charIndex = rand() % cclass->fSet->size(); | |
408 | UChar32 c = cclass->fSet->charAt(charIndex); | |
409 | if (U16_IS_TRAIL(c) && fString.length() > 0 && U16_IS_LEAD(fString.charAt(fString.length()-1))) { | |
410 | // Character classes may contain unpaired surrogates, e.g. Grapheme_Cluster_Break = Control. | |
411 | // Don't let random unpaired surrogates combine in the test data because they might | |
412 | // produce an unwanted dictionary character. | |
413 | continue; | |
414 | } | |
415 | ||
416 | if (!rules->fDictionarySet.contains(c)) { | |
417 | fString.append(c); | |
418 | ++n; | |
419 | } | |
420 | } | |
421 | ||
422 | // Reset each rule matcher regex with this new string. | |
423 | // (Although we are always using the same string object, ICU regular expressions | |
424 | // don't like the underlying string data changing without doing a reset). | |
425 | ||
426 | for (int32_t ruleNum=0; ruleNum<rules->fBreakRules.size(); ruleNum++) { | |
427 | BreakRule *rule = static_cast<BreakRule *>(rules->fBreakRules.elementAt(ruleNum)); | |
428 | rule->fRuleMatcher->reset(fString); | |
429 | } | |
430 | ||
431 | // Init the expectedBreaks, actualBreaks and ruleForPosition strings (used as arrays). | |
432 | // Expected and Actual breaks are one longer than the input string; a non-zero value | |
433 | // will indicate a boundary preceding that position. | |
434 | ||
435 | clearActualBreaks(); | |
436 | fExpectedBreaks = fActualBreaks; | |
437 | fRuleForPosition = fActualBreaks; | |
438 | f2ndRuleForPos = fActualBreaks; | |
439 | ||
440 | // Apply reference rules to find the expected breaks. | |
441 | ||
442 | fExpectedBreaks.setCharAt(0, (UChar)1); // Force an expected break before the start of the text. | |
443 | // ICU always reports a break there. | |
444 | // The reference rules do not have a means to do so. | |
445 | int32_t strIdx = 0; | |
446 | while (strIdx < fString.length()) { | |
447 | BreakRule *matchingRule = NULL; | |
448 | UBool hasBreak = FALSE; | |
449 | int32_t ruleNum = 0; | |
450 | int32_t matchStart = 0; | |
451 | int32_t matchEnd = 0; | |
452 | int32_t breakGroup = 0; | |
453 | for (ruleNum=0; ruleNum<rules->fBreakRules.size(); ruleNum++) { | |
454 | BreakRule *rule = static_cast<BreakRule *>(rules->fBreakRules.elementAt(ruleNum)); | |
455 | rule->fRuleMatcher->reset(); | |
456 | if (rule->fRuleMatcher->lookingAt(strIdx, status)) { | |
457 | // A candidate rule match, check further to see if we take it or continue to check other rules. | |
458 | // Matches of zero or one codepoint count only if they also specify a break. | |
459 | matchStart = rule->fRuleMatcher->start(status); | |
460 | matchEnd = rule->fRuleMatcher->end(status); | |
461 | breakGroup = rule->fRuleMatcher->pattern().groupNumberFromName("BreakPosition", status); | |
462 | hasBreak = U_SUCCESS(status); | |
463 | if (status == U_REGEX_INVALID_CAPTURE_GROUP_NAME) { | |
464 | status = U_ZERO_ERROR; | |
465 | } | |
466 | if (hasBreak || fString.moveIndex32(matchStart, 1) < matchEnd) { | |
467 | matchingRule = rule; | |
468 | break; | |
469 | } | |
470 | } | |
471 | } | |
472 | if (matchingRule == NULL) { | |
473 | // No reference rule matched. This is an error in the rules that should never happen. | |
474 | IntlTest::gTest->errln("%s:%d Trouble with monkey test reference rules at position %d. ", | |
475 | __FILE__, __LINE__, strIdx); | |
476 | dump(strIdx); | |
477 | status = U_INVALID_FORMAT_ERROR; | |
478 | return; | |
479 | } | |
480 | if (matchingRule->fRuleMatcher->group(status).length() == 0) { | |
481 | // Zero length rule match. This is also an error in the rule expressions. | |
482 | IntlTest::gTest->errln("%s:%d Zero length rule match.", | |
483 | __FILE__, __LINE__); | |
484 | status = U_INVALID_FORMAT_ERROR; | |
485 | return; | |
486 | } | |
487 | ||
488 | // Record which rule matched over the length of the match. | |
489 | for (int i = matchStart; i < matchEnd; i++) { | |
490 | if (fRuleForPosition.charAt(i) == 0) { | |
491 | fRuleForPosition.setCharAt(i, (UChar)ruleNum); | |
492 | } else { | |
493 | f2ndRuleForPos.setCharAt(i, (UChar)ruleNum); | |
494 | } | |
495 | } | |
496 | ||
497 | // Break positions appear in rules as a matching named capture of zero length at the break position, | |
498 | // the adjusted pattern contains (?<BreakPosition>) | |
499 | if (hasBreak) { | |
500 | int32_t breakPos = matchingRule->fRuleMatcher->start(breakGroup, status); | |
501 | if (U_FAILURE(status) || breakPos < 0) { | |
502 | // Rule specified a break, but that break wasn't part of the match, even | |
503 | // though the rule as a whole matched. | |
504 | // Can't happen with regular expressions derived from (equivalent to) ICU break rules. | |
505 | // Shouldn't get here. | |
506 | IntlTest::gTest->errln("%s:%d Internal Rule Error.", __FILE__, __LINE__); | |
507 | status = U_INVALID_FORMAT_ERROR; | |
508 | break; | |
509 | } | |
510 | fExpectedBreaks.setCharAt(breakPos, (UChar)1); | |
511 | // printf("recording break at %d\n", breakPos); | |
512 | // For the next iteration, pick up applying rules immediately after the break, | |
513 | // which may differ from end of the match. The matching rule may have included | |
514 | // context following the boundary that needs to be looked at again. | |
515 | strIdx = matchingRule->fRuleMatcher->end(breakGroup, status); | |
516 | } else { | |
517 | // Original rule didn't specify a break. | |
518 | // Continue applying rules starting on the last code point of this match. | |
519 | strIdx = fString.moveIndex32(matchEnd, -1); | |
520 | if (strIdx == matchStart) { | |
521 | // Match was only one code point, no progress if we continue. | |
522 | // Shouldn't get here, case is filtered out at top of loop. | |
523 | CharString ruleName; | |
524 | ruleName.appendInvariantChars(matchingRule->fName, status); | |
525 | IntlTest::gTest->errln("%s:%d Rule %s internal error", | |
526 | __FILE__, __LINE__, ruleName.data()); | |
527 | status = U_INVALID_FORMAT_ERROR; | |
528 | break; | |
529 | } | |
530 | } | |
531 | if (U_FAILURE(status)) { | |
532 | IntlTest::gTest->errln("%s:%d status = %s. Unexpected failure, perhaps problem internal to test.", | |
533 | __FILE__, __LINE__, u_errorName(status)); | |
534 | break; | |
535 | } | |
536 | } | |
537 | } | |
538 | ||
539 | void MonkeyTestData::clearActualBreaks() { | |
540 | fActualBreaks.remove(); | |
541 | // Actual Breaks length is one longer than the data string length, allowing | |
542 | // for breaks before the first and after the last character in the data. | |
543 | for (int32_t i=0; i<=fString.length(); i++) { | |
544 | fActualBreaks.append((UChar)0); | |
545 | } | |
546 | } | |
547 | ||
548 | void MonkeyTestData::dump(int32_t around) const { | |
549 | printf("\n" | |
550 | " char break Rule Character\n" | |
551 | " pos code class R I name name\n" | |
552 | "---------------------------------------------------------------------------------------------\n"); | |
553 | ||
554 | int32_t start; | |
555 | int32_t end; | |
556 | ||
557 | if (around == -1) { | |
558 | start = 0; | |
559 | end = fString.length(); | |
560 | } else { | |
561 | // Display context around a failure. | |
562 | start = fString.moveIndex32(around, -30); | |
563 | end = fString.moveIndex32(around, +30); | |
564 | } | |
565 | ||
566 | for (int charIdx = start; charIdx < end; charIdx=fString.moveIndex32(charIdx, 1)) { | |
567 | UErrorCode status = U_ZERO_ERROR; | |
568 | UChar32 c = fString.char32At(charIdx); | |
569 | const CharClass *cc = fBkRules->getClassForChar(c); | |
570 | CharString ccName; | |
571 | ccName.appendInvariantChars(cc->fName, status); | |
572 | CharString ruleName, secondRuleName; | |
573 | const BreakRule *rule = static_cast<BreakRule *>(fBkRules->fBreakRules.elementAt(fRuleForPosition.charAt(charIdx))); | |
574 | ruleName.appendInvariantChars(rule->fName, status); | |
575 | if (f2ndRuleForPos.charAt(charIdx) > 0) { | |
576 | const BreakRule *secondRule = static_cast<BreakRule *>(fBkRules->fBreakRules.elementAt(f2ndRuleForPos.charAt(charIdx))); | |
577 | secondRuleName.appendInvariantChars(secondRule->fName, status); | |
578 | } | |
579 | char cName[200]; | |
580 | u_charName(c, U_EXTENDED_CHAR_NAME, cName, sizeof(cName), &status); | |
581 | ||
582 | printf(" %4.1d %6.4x %-20s %c %c %-10s %-10s %s\n", | |
583 | charIdx, c, ccName.data(), | |
584 | fExpectedBreaks.charAt(charIdx) ? '*' : '.', | |
585 | fActualBreaks.charAt(charIdx) ? '*' : '.', | |
586 | ruleName.data(), secondRuleName.data(), cName | |
587 | ); | |
588 | } | |
589 | } | |
590 | ||
591 | ||
592 | //--------------------------------------------------------------------------------------- | |
593 | // | |
594 | // class RBBIMonkeyImpl | |
595 | // | |
596 | //--------------------------------------------------------------------------------------- | |
597 | ||
598 | RBBIMonkeyImpl::RBBIMonkeyImpl(UErrorCode &status) : fDumpExpansions(FALSE), fThread(this) { | |
599 | (void)status; // suppress unused parameter compiler warning. | |
600 | } | |
601 | ||
602 | ||
603 | // RBBIMonkeyImpl setup does all of the setup for a single rule set - compiling the | |
604 | // reference rules and creating the icu breakiterator to test, | |
605 | // with its type and locale coming from the reference rules. | |
606 | ||
607 | void RBBIMonkeyImpl::setup(const char *ruleFile, UErrorCode &status) { | |
608 | fRuleFileName = ruleFile; | |
609 | openBreakRules(ruleFile, status); | |
610 | if (U_FAILURE(status)) { | |
611 | IntlTest::gTest->errln("%s:%d Error %s opening file %s.", __FILE__, __LINE__, u_errorName(status), ruleFile); | |
612 | return; | |
613 | } | |
614 | fRuleSet.adoptInstead(new BreakRules(this, status)); | |
615 | fRuleSet->compileRules(fRuleCharBuffer.getAlias(), status); | |
616 | if (U_FAILURE(status)) { | |
617 | IntlTest::gTest->errln("%s:%d Error %s processing file %s.", __FILE__, __LINE__, u_errorName(status), ruleFile); | |
618 | return; | |
619 | } | |
620 | fBI.adoptInstead(fRuleSet->createICUBreakIterator(status)); | |
621 | fTestData.adoptInstead(new MonkeyTestData()); | |
622 | } | |
623 | ||
624 | ||
625 | RBBIMonkeyImpl::~RBBIMonkeyImpl() { | |
626 | } | |
627 | ||
628 | ||
629 | void RBBIMonkeyImpl::openBreakRules(const char *fileName, UErrorCode &status) { | |
630 | CharString path; | |
631 | path.append(IntlTest::getSourceTestData(status), status); | |
632 | path.append("break_rules" U_FILE_SEP_STRING, status); | |
633 | path.appendPathPart(fileName, status); | |
634 | const char *codePage = "UTF-8"; | |
635 | fRuleCharBuffer.adoptInstead(ucbuf_open(path.data(), &codePage, TRUE, FALSE, &status)); | |
636 | } | |
637 | ||
638 | ||
639 | void RBBIMonkeyImpl::startTest() { | |
640 | fThread.start(); // invokes runTest() in a separate thread. | |
641 | } | |
642 | ||
643 | void RBBIMonkeyImpl::join() { | |
644 | fThread.join(); | |
645 | } | |
646 | ||
647 | ||
648 | #define MONKEY_ERROR(msg, index) { \ | |
649 | IntlTest::gTest->errln("%s:%d %s at index %d. Parameters to reproduce: @rules=%s,seed=%u,loop=1,verbose ", \ | |
650 | __FILE__, __LINE__, msg, index, fRuleFileName, fTestData->fRandomSeed); \ | |
651 | if (fVerbose) { fTestData->dump(index); } \ | |
652 | status = U_INVALID_STATE_ERROR; \ | |
653 | } | |
654 | ||
655 | void RBBIMonkeyImpl::runTest() { | |
656 | UErrorCode status = U_ZERO_ERROR; | |
657 | int32_t errorCount = 0; | |
658 | for (int64_t loopCount = 0; fLoopCount < 0 || loopCount < fLoopCount; loopCount++) { | |
659 | status = U_ZERO_ERROR; | |
660 | fTestData->set(fRuleSet.getAlias(), fRandomGenerator, status); | |
661 | if (fBI.isNull()) { | |
662 | IntlTest::gTest->dataerrln("Unable to run test because fBI is null."); | |
663 | return; | |
664 | } | |
665 | if ( uprv_strcmp(fRuleFileName,"line_loose_cj.txt") == 0 && fTestData->fRandomSeed==1712915859 ) { | |
666 | continue; // known bug around index 103-104, break expected/actual 0/1, fwd 0020 200D | FDFC, rev 1325A 0020 | 200D | |
667 | } | |
668 | // fTestData->dump(); | |
669 | testForwards(status); | |
670 | testPrevious(status); | |
671 | testFollowing(status); | |
672 | testPreceding(status); | |
673 | testIsBoundary(status); | |
674 | ||
675 | if (fLoopCount < 0 && loopCount % 100 == 0) { | |
676 | fprintf(stderr, "."); | |
677 | } | |
678 | if (U_FAILURE(status)) { | |
679 | if (++errorCount > 10) { | |
680 | return; | |
681 | } | |
682 | } | |
683 | } | |
684 | } | |
685 | ||
686 | void RBBIMonkeyImpl::testForwards(UErrorCode &status) { | |
687 | if (U_FAILURE(status)) { | |
688 | return; | |
689 | } | |
690 | fTestData->clearActualBreaks(); | |
691 | fBI->setText(fTestData->fString); | |
692 | int32_t previousBreak = -2; | |
693 | for (int32_t bk=fBI->first(); bk != BreakIterator::DONE; bk=fBI->next()) { | |
694 | if (bk <= previousBreak) { | |
695 | MONKEY_ERROR("Break Iterator Stall", bk); | |
696 | return; | |
697 | } | |
698 | if (bk < 0 || bk > fTestData->fString.length()) { | |
699 | MONKEY_ERROR("Boundary out of bounds", bk); | |
700 | return; | |
701 | } | |
702 | fTestData->fActualBreaks.setCharAt(bk, 1); | |
703 | } | |
704 | checkResults("testForwards", FORWARD, status); | |
705 | } | |
706 | ||
707 | void RBBIMonkeyImpl::testFollowing(UErrorCode &status) { | |
708 | if (U_FAILURE(status)) { | |
709 | return; | |
710 | } | |
711 | fTestData->clearActualBreaks(); | |
712 | fBI->setText(fTestData->fString); | |
713 | int32_t nextBreak = -1; | |
714 | for (int32_t i=-1 ; i<fTestData->fString.length(); ++i) { | |
715 | int32_t bk = fBI->following(i); | |
716 | if (bk == BreakIterator::DONE && i == fTestData->fString.length()) { | |
717 | continue; | |
718 | } | |
719 | if (bk == nextBreak && bk > i) { | |
720 | // i is in the gap between two breaks. | |
721 | continue; | |
722 | } | |
723 | if (i == nextBreak && bk > nextBreak) { | |
724 | fTestData->fActualBreaks.setCharAt(bk, 1); | |
725 | nextBreak = bk; | |
726 | continue; | |
727 | } | |
728 | MONKEY_ERROR("following(i)", i); | |
729 | return; | |
730 | } | |
731 | checkResults("testFollowing", FORWARD, status); | |
732 | } | |
733 | ||
734 | ||
735 | ||
736 | void RBBIMonkeyImpl::testPrevious(UErrorCode &status) { | |
737 | if (U_FAILURE(status)) {return;} | |
738 | ||
739 | fTestData->clearActualBreaks(); | |
740 | fBI->setText(fTestData->fString); | |
741 | int32_t previousBreak = INT32_MAX; | |
742 | for (int32_t bk=fBI->last(); bk != BreakIterator::DONE; bk=fBI->previous()) { | |
743 | if (bk >= previousBreak) { | |
744 | MONKEY_ERROR("Break Iterator Stall", bk); | |
745 | return; | |
746 | } | |
747 | if (bk < 0 || bk > fTestData->fString.length()) { | |
748 | MONKEY_ERROR("Boundary out of bounds", bk); | |
749 | return; | |
750 | } | |
751 | fTestData->fActualBreaks.setCharAt(bk, 1); | |
752 | } | |
753 | checkResults("testPrevious", REVERSE, status); | |
754 | } | |
755 | ||
756 | ||
757 | void RBBIMonkeyImpl::testPreceding(UErrorCode &status) { | |
758 | if (U_FAILURE(status)) { | |
759 | return; | |
760 | } | |
761 | fTestData->clearActualBreaks(); | |
762 | fBI->setText(fTestData->fString); | |
763 | int32_t nextBreak = fTestData->fString.length()+1; | |
764 | for (int32_t i=fTestData->fString.length()+1 ; i>=0; --i) { | |
765 | int32_t bk = fBI->preceding(i); | |
766 | // printf("i:%d bk:%d nextBreak:%d\n", i, bk, nextBreak); | |
767 | if (bk == BreakIterator::DONE && i == 0) { | |
768 | continue; | |
769 | } | |
770 | if (bk == nextBreak && bk < i) { | |
771 | // i is in the gap between two breaks. | |
772 | continue; | |
773 | } | |
774 | if (i<fTestData->fString.length() && fTestData->fString.getChar32Start(i) < i) { | |
775 | // i indexes to a trailing surrogate. | |
776 | // Break Iterators treat an index to either half as referring to the supplemental code point, | |
777 | // with preceding going to some preceding code point. | |
778 | if (fBI->preceding(i) != fBI->preceding(fTestData->fString.getChar32Start(i))) { | |
779 | MONKEY_ERROR("preceding of trailing surrogate error", i); | |
780 | } | |
781 | continue; | |
782 | } | |
783 | if (i == nextBreak && bk < nextBreak) { | |
784 | fTestData->fActualBreaks.setCharAt(bk, 1); | |
785 | nextBreak = bk; | |
786 | continue; | |
787 | } | |
788 | MONKEY_ERROR("preceding(i)", i); | |
789 | return; | |
790 | } | |
791 | checkResults("testPreceding", REVERSE, status); | |
792 | } | |
793 | ||
794 | ||
795 | void RBBIMonkeyImpl::testIsBoundary(UErrorCode &status) { | |
796 | if (U_FAILURE(status)) { | |
797 | return; | |
798 | } | |
799 | fTestData->clearActualBreaks(); | |
800 | fBI->setText(fTestData->fString); | |
801 | for (int i=fTestData->fString.length(); i>=0; --i) { | |
802 | if (fBI->isBoundary(i)) { | |
803 | fTestData->fActualBreaks.setCharAt(i, 1); | |
804 | } | |
805 | } | |
806 | checkResults("testForwards", FORWARD, status); | |
807 | } | |
808 | ||
809 | void RBBIMonkeyImpl::checkResults(const char *msg, CheckDirection direction, UErrorCode &status) { | |
810 | if (U_FAILURE(status)) { | |
811 | return; | |
812 | } | |
813 | if (direction == FORWARD) { | |
814 | for (int i=0; i<=fTestData->fString.length(); ++i) { | |
815 | if (fTestData->fExpectedBreaks.charAt(i) != fTestData->fActualBreaks.charAt(i)) { | |
816 | if (i > 1) { | |
817 | IntlTest::gTest->errln("%s:%d %s failure at index %d, %04X %04X | %04X, break expected/actual %d/%d. Parameters to reproduce: @rules=%s,seed=%u,loop=1,verbose ", | |
818 | __FILE__, __LINE__, msg, i, fTestData->fString.char32At(i-2), fTestData->fString.char32At(i-1), fTestData->fString.char32At(i), fTestData->fExpectedBreaks.charAt(i), fTestData->fActualBreaks.charAt(i), | |
819 | fRuleFileName, fTestData->fRandomSeed); | |
820 | } else { | |
821 | IntlTest::gTest->errln("%s:%d %s failure at index %d, %04X | %04X, break expected/actual %d/%d. Parameters to reproduce: @rules=%s,seed=%u,loop=1,verbose ", | |
822 | __FILE__, __LINE__, msg, i, fTestData->fString.char32At(i-1), fTestData->fString.char32At(i), fTestData->fExpectedBreaks.charAt(i), fTestData->fActualBreaks.charAt(i), | |
823 | fRuleFileName, fTestData->fRandomSeed); | |
824 | } | |
825 | if (fVerbose) { | |
826 | fTestData->dump(i); | |
827 | } | |
828 | status = U_INVALID_STATE_ERROR; // Prevent the test from continuing, which would likely | |
829 | break; // produce many redundant errors. | |
830 | } | |
831 | } | |
832 | } else { | |
833 | for (int i=fTestData->fString.length(); i>=0; i--) { | |
834 | if (fTestData->fExpectedBreaks.charAt(i) != fTestData->fActualBreaks.charAt(i)) { | |
835 | if (i > 1) { | |
836 | IntlTest::gTest->errln("%s:%d %s failure at index %d, %04X %04X | %04X, break expected/actual %d/%d. Parameters to reproduce: @rules=%s,seed=%u,loop=1,verbose ", | |
837 | __FILE__, __LINE__, msg, i, fTestData->fString.char32At(i-2), fTestData->fString.char32At(i-1), fTestData->fString.char32At(i), fTestData->fExpectedBreaks.charAt(i), fTestData->fActualBreaks.charAt(i), | |
838 | fRuleFileName, fTestData->fRandomSeed); | |
839 | } else { | |
840 | IntlTest::gTest->errln("%s:%d %s failure at index %d, %04X | %04X, break expected/actual %d/%d. Parameters to reproduce: @rules=%s,seed=%u,loop=1,verbose ", | |
841 | __FILE__, __LINE__, msg, i, fTestData->fString.char32At(i-1), fTestData->fString.char32At(i), fTestData->fExpectedBreaks.charAt(i), fTestData->fActualBreaks.charAt(i), | |
842 | fRuleFileName, fTestData->fRandomSeed); | |
843 | } | |
844 | if (fVerbose) { | |
845 | fTestData->dump(i); | |
846 | } | |
847 | status = U_INVALID_STATE_ERROR; | |
848 | break; | |
849 | } | |
850 | } | |
851 | } | |
852 | } | |
853 | ||
854 | ||
855 | ||
856 | //--------------------------------------------------------------------------------------- | |
857 | // | |
858 | // class RBBIMonkeyTest implementation. | |
859 | // | |
860 | //--------------------------------------------------------------------------------------- | |
861 | RBBIMonkeyTest::RBBIMonkeyTest() { | |
862 | } | |
863 | ||
864 | RBBIMonkeyTest::~RBBIMonkeyTest() { | |
865 | } | |
866 | ||
867 | ||
868 | // params, taken from this->fParams. | |
869 | // rules=file_name Name of file containing the reference rules. | |
870 | // seed=nnnnn Random number starting seed. | |
871 | // Setting the seed allows errors to be reproduced. | |
872 | // loop=nnn Looping count. Controls running time. | |
873 | // -1: run forever. | |
874 | // 0 or greater: run length. | |
875 | // expansions debug option, show expansions of rules and sets. | |
876 | // verbose Display details of the failure. | |
877 | // | |
878 | // Parameters on the intltest command line follow the test name, and are preceded by '@'. | |
879 | // For example, | |
880 | // intltest rbbi/RBBIMonkeyTest/testMonkey@rules=line.txt,loop=-1 | |
881 | // | |
882 | void RBBIMonkeyTest::testMonkey() { | |
883 | // printf("Test parameters: %s\n", fParams); | |
884 | UnicodeString params(fParams); | |
885 | UErrorCode status = U_ZERO_ERROR; | |
886 | ||
887 | const char *tests[] = {"grapheme.txt", "word.txt", "line.txt", "sentence.txt", "line_normal.txt", | |
888 | "line_normal_cj.txt", "line_loose.txt", "line_loose_cj.txt", "word_POSIX.txt", | |
889 | NULL }; | |
890 | CharString testNameFromParams; | |
891 | if (getStringParam("rules", params, testNameFromParams, status)) { | |
892 | tests[0] = testNameFromParams.data(); | |
893 | tests[1] = NULL; | |
894 | } | |
895 | ||
896 | int64_t loopCount = quick? 100 : 5000; | |
897 | getIntParam("loop", params, loopCount, status); | |
898 | ||
899 | UBool dumpExpansions = FALSE; | |
900 | getBoolParam("expansions", params, dumpExpansions, status); | |
901 | ||
902 | UBool verbose = FALSE; | |
903 | getBoolParam("verbose", params, verbose, status); | |
904 | ||
905 | int64_t seed = 0; | |
906 | getIntParam("seed", params, seed, status); | |
907 | ||
908 | if (params.length() != 0) { | |
909 | // Options processing did not consume all of the parameters. Something unrecognized was present. | |
910 | CharString unrecognizedParameters; | |
911 | unrecognizedParameters.append(CStr(params)(), -1, status); | |
912 | errln("%s:%d unrecognized test parameter(s) \"%s\"", __FILE__, __LINE__, unrecognizedParameters.data()); | |
913 | return; | |
914 | } | |
915 | ||
916 | UVector startedTests(status); | |
917 | if (U_FAILURE(status)) { | |
918 | errln("%s:%d: error %s while setting up test.", __FILE__, __LINE__, u_errorName(status)); | |
919 | return; | |
920 | } | |
921 | ||
922 | // Monkey testing is multi-threaded. | |
923 | // Each set of break rules to be tested is run in a separate thread. | |
924 | // Each thread/set of rules gets a separate RBBIMonkeyImpl object. | |
925 | int32_t i; | |
926 | for (i=0; tests[i] != NULL; ++i) { | |
927 | logln("beginning testing of %s", tests[i]); | |
928 | RBBIMonkeyImpl *test = new RBBIMonkeyImpl(status); | |
f3c0d7a5 A |
929 | if (U_FAILURE(status)) { |
930 | errln("%s:%d: error %s while starting test %s.", __FILE__, __LINE__, u_errorName(status), tests[i]); | |
931 | break; | |
932 | } | |
2ca993e8 A |
933 | test->fDumpExpansions = dumpExpansions; |
934 | test->fVerbose = verbose; | |
935 | test->fRandomGenerator.seed((uint32_t)seed); | |
936 | test->fLoopCount = loopCount; | |
937 | test->setup(tests[i], status); | |
f3c0d7a5 A |
938 | if (U_FAILURE(status)) { |
939 | errln("%s:%d: error %s while starting test %s.", __FILE__, __LINE__, u_errorName(status), tests[i]); | |
940 | break; | |
941 | } | |
2ca993e8 A |
942 | test->startTest(); |
943 | startedTests.addElement(test, status); | |
944 | if (U_FAILURE(status)) { | |
f3c0d7a5 | 945 | errln("%s:%d: error %s while starting test %s.", __FILE__, __LINE__, u_errorName(status), tests[i]); |
2ca993e8 A |
946 | break; |
947 | } | |
948 | } | |
949 | ||
2ca993e8 A |
950 | for (i=0; i<startedTests.size(); ++i) { |
951 | RBBIMonkeyImpl *test = static_cast<RBBIMonkeyImpl *>(startedTests.elementAt(i)); | |
952 | test->join(); | |
953 | delete test; | |
954 | } | |
955 | } | |
956 | ||
957 | ||
958 | UBool RBBIMonkeyTest::getIntParam(UnicodeString name, UnicodeString ¶ms, int64_t &val, UErrorCode &status) { | |
959 | name.append(" *= *(-?\\d+) *,? *"); | |
960 | RegexMatcher m(name, params, 0, status); | |
961 | if (m.find()) { | |
962 | // The param exists. Convert the string to an int. | |
963 | CharString str; | |
964 | str.append(CStr(m.group(1, status))(), -1, status); | |
965 | val = strtol(str.data(), NULL, 10); | |
966 | ||
967 | // Delete this parameter from the params string. | |
968 | m.reset(); | |
969 | params = m.replaceFirst(UnicodeString(), status); | |
970 | return TRUE; | |
971 | } | |
972 | return FALSE; | |
973 | } | |
974 | ||
975 | UBool RBBIMonkeyTest::getStringParam(UnicodeString name, UnicodeString ¶ms, CharString &dest, UErrorCode &status) { | |
976 | name.append(" *= *([^ ,]*) *,? *"); | |
977 | RegexMatcher m(name, params, 0, status); | |
978 | if (m.find()) { | |
979 | // The param exists. | |
980 | dest.append(CStr(m.group(1, status))(), -1, status); | |
981 | ||
982 | // Delete this parameter from the params string. | |
983 | m.reset(); | |
984 | params = m.replaceFirst(UnicodeString(), status); | |
985 | return TRUE; | |
986 | } | |
987 | return FALSE; | |
988 | } | |
989 | ||
990 | UBool RBBIMonkeyTest::getBoolParam(UnicodeString name, UnicodeString ¶ms, UBool &dest, UErrorCode &status) { | |
991 | name.append("(?: *= *(true|false))? *,? *"); | |
992 | RegexMatcher m(name, params, UREGEX_CASE_INSENSITIVE, status); | |
993 | if (m.find()) { | |
994 | if (m.start(1, status) > 0) { | |
995 | // user option included a value. | |
996 | dest = m.group(1, status).caseCompare(UnicodeString("true"), U_FOLD_CASE_DEFAULT) == 0; | |
997 | } else { | |
998 | // No explicit user value, implies true. | |
999 | dest = TRUE; | |
1000 | } | |
1001 | ||
1002 | // Delete this parameter from the params string. | |
1003 | m.reset(); | |
1004 | params = m.replaceFirst(UnicodeString(), status); | |
1005 | return TRUE; | |
1006 | } | |
1007 | return FALSE; | |
1008 | } | |
1009 | ||
1010 | #endif /* !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_REGULAR_EXPRESSIONS && !UCONFIG_NO_FORMATTING */ |