2 *****************************************************************************
3 * Copyright (C) 2001-2004, International Business Machines orporation
4 * and others. All Rights Reserved.
5 ****************************************************************************/
7 #include "unicode/utypes.h"
9 #if !UCONFIG_NO_COLLATION
12 #include "../cintltst/usrchdat.c"
13 #include "unicode/stsearch.h"
14 #include "unicode/ustring.h"
15 #include "unicode/schriter.h"
19 // private definitions -----------------------------------------------------
21 #define CASE(id,test) \
26 logln((UnicodeString)""); \
31 // public contructors and destructors --------------------------------------
33 StringSearchTest::StringSearchTest() :
34 m_en_wordbreaker_(NULL
), m_en_characterbreaker_(NULL
)
36 UErrorCode status
= U_ZERO_ERROR
;
38 m_en_us_
= (RuleBasedCollator
*)Collator::createInstance("en_US", status
);
39 m_fr_fr_
= (RuleBasedCollator
*)Collator::createInstance("fr_FR", status
);
40 m_de_
= (RuleBasedCollator
*)Collator::createInstance("de_DE", status
);
41 m_es_
= (RuleBasedCollator
*)Collator::createInstance("es_ES", status
);
42 if(U_FAILURE(status
)) {
51 errln("Collator creation failed with %s", u_errorName(status
));
57 rules
.setTo(((RuleBasedCollator
*)m_de_
)->getRules());
58 UChar extrarules
[128];
59 u_unescape(EXTRACOLLATIONRULE
, extrarules
, 128);
60 rules
.append(extrarules
, u_strlen(extrarules
));
63 m_de_
= new RuleBasedCollator(rules
, status
);
65 rules
.setTo(((RuleBasedCollator
*)m_es_
)->getRules());
66 rules
.append(extrarules
, u_strlen(extrarules
));
70 m_es_
= new RuleBasedCollator(rules
, status
);
72 #if !UCONFIG_NO_BREAK_ITERATION
73 m_en_wordbreaker_
= BreakIterator::createWordInstance(
74 Locale::getEnglish(), status
);
75 m_en_characterbreaker_
= BreakIterator::createCharacterInstance(
76 Locale::getEnglish(), status
);
80 StringSearchTest::~StringSearchTest()
86 #if !UCONFIG_NO_BREAK_ITERATION
87 delete m_en_wordbreaker_
;
88 delete m_en_characterbreaker_
;
92 // public methods ----------------------------------------------------------
94 void StringSearchTest::runIndexedTest(int32_t index
, UBool exec
,
95 const char* &name
, char* )
97 if (m_en_us_
== NULL
&& m_fr_fr_
== NULL
&& m_de_
== NULL
&&
98 m_es_
== NULL
&& m_en_wordbreaker_
== NULL
&&
99 m_en_characterbreaker_
== NULL
&& exec
) {
100 errln(__FILE__
" cannot test - failed to create collator.");
106 CASE(0, TestOpenClose
)
107 CASE(1, TestInitialization
)
109 CASE(3, TestNormExact
)
110 CASE(4, TestStrength
)
111 #if UCONFIG_NO_BREAK_ITERATION
113 name
= "TestBreakIterator";
116 CASE(5, TestBreakIterator
)
118 CASE(6, TestVariable
)
120 CASE(8, TestCollator
)
123 CASE(11, TestCompositeBoundaries
)
124 CASE(12, TestGetSetOffset
)
125 CASE(13, TestGetSetAttribute
)
126 CASE(14, TestGetMatch
)
127 CASE(15, TestSetMatch
)
129 CASE(17, TestSupplementary
)
130 CASE(18, TestContraction
)
131 CASE(19, TestIgnorable
)
132 CASE(20, TestCanonical
)
133 CASE(21, TestNormCanonical
)
134 CASE(22, TestStrengthCanonical
)
135 #if UCONFIG_NO_BREAK_ITERATION
137 name
= "TestBreakIteratorCanonical";
140 CASE(23, TestBreakIteratorCanonical
)
142 CASE(24, TestVariableCanonical
)
143 CASE(25, TestOverlapCanonical
)
144 CASE(26, TestCollatorCanonical
)
145 CASE(27, TestPatternCanonical
)
146 CASE(28, TestTextCanonical
)
147 CASE(29, TestCompositeBoundariesCanonical
)
148 CASE(30, TestGetSetOffsetCanonical
)
149 CASE(31, TestSupplementaryCanonical
)
150 CASE(32, TestContractionCanonical
)
151 CASE(33, TestUClassID
)
152 CASE(34, TestSubclass
)
153 default: name
= ""; break;
157 // private methods ------------------------------------------------------
159 RuleBasedCollator
* StringSearchTest::getCollator(const char *collator
)
161 if (collator
== NULL
) {
164 if (strcmp(collator
, "fr") == 0) {
167 else if (strcmp(collator
, "de") == 0) {
170 else if (strcmp(collator
, "es") == 0) {
178 BreakIterator
* StringSearchTest::getBreakIterator(const char *breaker
)
180 #if UCONFIG_NO_BREAK_ITERATION
183 if (breaker
== NULL
) {
186 if (strcmp(breaker
, "wordbreaker") == 0) {
187 return m_en_wordbreaker_
;
190 return m_en_characterbreaker_
;
195 char * StringSearchTest::toCharString(const UnicodeString
&text
)
197 static char result
[1024];
200 int length
= text
.length();
202 for (; count
< length
; count
++) {
203 UChar ch
= text
[count
];
204 if (ch
>= 0x20 && ch
<= 0x7e) {
205 result
[index
++] = (char)ch
;
208 sprintf(result
+index
, "\\u%04x", ch
);
209 index
+= 6; /* \uxxxx */
217 Collator::ECollationStrength
StringSearchTest::getECollationStrength(
218 const UCollationStrength
&strength
) const
223 return Collator::PRIMARY
;
224 case UCOL_SECONDARY
:
225 return Collator::SECONDARY
;
227 return Collator::TERTIARY
;
229 return Collator::IDENTICAL
;
233 UBool
StringSearchTest::assertEqualWithStringSearch(StringSearch
*strsrch
,
234 const SearchData
*search
)
237 UErrorCode status
= U_ZERO_ERROR
;
238 int32_t matchindex
= search
->offset
[count
];
239 UnicodeString matchtext
;
241 if (strsrch
->getMatchedStart() != USEARCH_DONE
||
242 strsrch
->getMatchedLength() != 0) {
243 errln("Error with the initialization of match start and length");
245 // start of following matches
246 while (U_SUCCESS(status
) && matchindex
>= 0) {
247 int32_t matchlength
= search
->size
[count
];
248 strsrch
->next(status
);
249 if (matchindex
!= strsrch
->getMatchedStart() ||
250 matchlength
!= strsrch
->getMatchedLength()) {
251 char *str
= toCharString(strsrch
->getText());
252 errln("Text: %s", str
);
253 str
= toCharString(strsrch
->getPattern());
254 errln("Pattern: %s", str
);
255 errln("Error following match found at %d %d",
256 strsrch
->getMatchedStart(), strsrch
->getMatchedLength());
261 strsrch
->getMatchedText(matchtext
);
263 if (U_FAILURE(status
) ||
264 strsrch
->getText().compareBetween(matchindex
,
265 matchindex
+ matchlength
,
267 matchtext
.length())) {
268 errln("Error getting following matched text");
271 matchindex
= search
->offset
[count
];
273 strsrch
->next(status
);
274 if (strsrch
->getMatchedStart() != USEARCH_DONE
||
275 strsrch
->getMatchedLength() != 0) {
276 char *str
= toCharString(strsrch
->getText());
277 errln("Text: %s", str
);
278 str
= toCharString(strsrch
->getPattern());
279 errln("Pattern: %s", str
);
280 errln("Error following match found at %d %d",
281 strsrch
->getMatchedStart(), strsrch
->getMatchedLength());
284 // start of preceding matches
285 count
= count
== 0 ? 0 : count
- 1;
286 matchindex
= search
->offset
[count
];
287 while (U_SUCCESS(status
) && matchindex
>= 0) {
288 int32_t matchlength
= search
->size
[count
];
289 strsrch
->previous(status
);
290 if (matchindex
!= strsrch
->getMatchedStart() ||
291 matchlength
!= strsrch
->getMatchedLength()) {
292 char *str
= toCharString(strsrch
->getText());
293 errln("Text: %s", str
);
294 str
= toCharString(strsrch
->getPattern());
295 errln("Pattern: %s", str
);
296 errln("Error following match found at %d %d",
297 strsrch
->getMatchedStart(), strsrch
->getMatchedLength());
301 strsrch
->getMatchedText(matchtext
);
303 if (U_FAILURE(status
) ||
304 strsrch
->getText().compareBetween(matchindex
,
305 matchindex
+ matchlength
,
307 matchtext
.length())) {
308 errln("Error getting following matched text");
311 matchindex
= count
> 0 ? search
->offset
[count
- 1] : -1;
314 strsrch
->previous(status
);
315 if (strsrch
->getMatchedStart() != USEARCH_DONE
||
316 strsrch
->getMatchedLength() != 0) {
317 char *str
= toCharString(strsrch
->getText());
318 errln("Text: %s", str
);
319 str
= toCharString(strsrch
->getPattern());
320 errln("Pattern: %s", str
);
321 errln("Error following match found at %d %d",
322 strsrch
->getMatchedStart(), strsrch
->getMatchedLength());
328 UBool
StringSearchTest::assertEqual(const SearchData
*search
)
330 UErrorCode status
= U_ZERO_ERROR
;
332 Collator
*collator
= getCollator(search
->collator
);
333 BreakIterator
*breaker
= getBreakIterator(search
->breaker
);
334 StringSearch
*strsrch
, *strsrch2
;
337 #if UCONFIG_NO_BREAK_ITERATION
338 if(search
->breaker
) {
339 return TRUE
; /* skip test */
342 u_unescape(search
->text
, temp
, 128);
345 u_unescape(search
->pattern
, temp
, 128);
346 UnicodeString pattern
;
349 #if !UCONFIG_NO_BREAK_ITERATION
350 if (breaker
!= NULL
) {
351 breaker
->setText(text
);
354 collator
->setStrength(getECollationStrength(search
->strength
));
355 strsrch
= new StringSearch(pattern
, text
, (RuleBasedCollator
*)collator
,
357 if (U_FAILURE(status
)) {
358 errln("Error opening string search %s", u_errorName(status
));
362 if (!assertEqualWithStringSearch(strsrch
, search
)) {
363 collator
->setStrength(getECollationStrength(UCOL_TERTIARY
));
369 strsrch2
= strsrch
->clone();
370 if( strsrch2
== strsrch
|| *strsrch2
!= *strsrch
||
371 !assertEqualWithStringSearch(strsrch2
, search
)
373 errln("failure with StringSearch.clone()");
374 collator
->setStrength(getECollationStrength(UCOL_TERTIARY
));
381 collator
->setStrength(getECollationStrength(UCOL_TERTIARY
));
386 UBool
StringSearchTest::assertCanonicalEqual(const SearchData
*search
)
388 UErrorCode status
= U_ZERO_ERROR
;
389 Collator
*collator
= getCollator(search
->collator
);
390 BreakIterator
*breaker
= getBreakIterator(search
->breaker
);
391 StringSearch
*strsrch
;
394 #if UCONFIG_NO_BREAK_ITERATION
395 if(search
->breaker
) {
396 return TRUE
; /* skip test */
400 u_unescape(search
->text
, temp
, 128);
403 u_unescape(search
->pattern
, temp
, 128);
404 UnicodeString pattern
;
407 #if !UCONFIG_NO_BREAK_ITERATION
408 if (breaker
!= NULL
) {
409 breaker
->setText(text
);
412 collator
->setStrength(getECollationStrength(search
->strength
));
413 strsrch
= new StringSearch(pattern
, text
, (RuleBasedCollator
*)collator
,
415 strsrch
->setAttribute(USEARCH_CANONICAL_MATCH
, USEARCH_ON
, status
);
416 if (U_FAILURE(status
)) {
417 errln("Error opening string search %s", u_errorName(status
));
421 if (!assertEqualWithStringSearch(strsrch
, search
)) {
422 collator
->setStrength(getECollationStrength(UCOL_TERTIARY
));
426 collator
->setStrength(getECollationStrength(UCOL_TERTIARY
));
431 UBool
StringSearchTest::assertEqualWithAttribute(const SearchData
*search
,
432 USearchAttributeValue canonical
,
433 USearchAttributeValue overlap
)
435 UErrorCode status
= U_ZERO_ERROR
;
436 Collator
*collator
= getCollator(search
->collator
);
437 BreakIterator
*breaker
= getBreakIterator(search
->breaker
);
438 StringSearch
*strsrch
;
442 #if UCONFIG_NO_BREAK_ITERATION
443 if(search
->breaker
) {
444 return TRUE
; /* skip test */
448 u_unescape(search
->text
, temp
, 128);
451 u_unescape(search
->pattern
, temp
, 128);
452 UnicodeString pattern
;
455 #if !UCONFIG_NO_BREAK_ITERATION
456 if (breaker
!= NULL
) {
457 breaker
->setText(text
);
460 collator
->setStrength(getECollationStrength(search
->strength
));
461 strsrch
= new StringSearch(pattern
, text
, (RuleBasedCollator
*)collator
,
463 strsrch
->setAttribute(USEARCH_CANONICAL_MATCH
, canonical
, status
);
464 strsrch
->setAttribute(USEARCH_OVERLAP
, overlap
, status
);
466 if (U_FAILURE(status
)) {
467 errln("Error opening string search %s", u_errorName(status
));
471 if (!assertEqualWithStringSearch(strsrch
, search
)) {
472 collator
->setStrength(getECollationStrength(UCOL_TERTIARY
));
476 collator
->setStrength(getECollationStrength(UCOL_TERTIARY
));
481 void StringSearchTest::TestOpenClose()
483 UErrorCode status
= U_ZERO_ERROR
;
484 StringSearch
*result
;
485 BreakIterator
*breakiter
= m_en_wordbreaker_
;
486 UnicodeString pattern
;
488 UnicodeString
temp("a");
489 StringCharacterIterator
chariter(text
);
491 /* testing null arguments */
492 result
= new StringSearch(pattern
, text
, NULL
, NULL
, status
);
493 if (U_SUCCESS(status
)) {
494 errln("Error: NULL arguments should produce an error");
498 chariter
.setText(text
);
499 status
= U_ZERO_ERROR
;
500 result
= new StringSearch(pattern
, chariter
, NULL
, NULL
, status
);
501 if (U_SUCCESS(status
)) {
502 errln("Error: NULL arguments should produce an error");
507 status
= U_ZERO_ERROR
;
508 result
= new StringSearch(pattern
, text
, NULL
, NULL
, status
);
509 if (U_SUCCESS(status
)) {
510 errln("Error: Empty pattern should produce an error");
514 chariter
.setText(text
);
515 status
= U_ZERO_ERROR
;
516 result
= new StringSearch(pattern
, chariter
, NULL
, NULL
, status
);
517 if (U_SUCCESS(status
)) {
518 errln("Error: Empty pattern should produce an error");
523 pattern
.append(temp
);
524 status
= U_ZERO_ERROR
;
525 result
= new StringSearch(pattern
, text
, NULL
, NULL
, status
);
526 if (U_SUCCESS(status
)) {
527 errln("Error: Empty text should produce an error");
531 chariter
.setText(text
);
532 status
= U_ZERO_ERROR
;
533 result
= new StringSearch(pattern
, chariter
, NULL
, NULL
, status
);
534 if (U_SUCCESS(status
)) {
535 errln("Error: Empty text should produce an error");
540 status
= U_ZERO_ERROR
;
541 result
= new StringSearch(pattern
, text
, NULL
, NULL
, status
);
542 if (U_SUCCESS(status
)) {
543 errln("Error: NULL arguments should produce an error");
547 chariter
.setText(text
);
548 status
= U_ZERO_ERROR
;
549 result
= new StringSearch(pattern
, chariter
, NULL
, NULL
, status
);
550 if (U_SUCCESS(status
)) {
551 errln("Error: NULL arguments should produce an error");
555 status
= U_ZERO_ERROR
;
556 result
= new StringSearch(pattern
, text
, m_en_us_
, NULL
, status
);
557 if (U_FAILURE(status
)) {
558 errln("Error: NULL break iterator is valid for opening search");
562 status
= U_ZERO_ERROR
;
563 result
= new StringSearch(pattern
, chariter
, m_en_us_
, NULL
, status
);
564 if (U_FAILURE(status
)) {
565 errln("Error: NULL break iterator is valid for opening search");
569 status
= U_ZERO_ERROR
;
570 result
= new StringSearch(pattern
, text
, Locale::getEnglish(), NULL
, status
);
571 if (U_FAILURE(status
) || result
== NULL
) {
572 errln("Error: NULL break iterator is valid for opening search");
576 status
= U_ZERO_ERROR
;
577 result
= new StringSearch(pattern
, chariter
, Locale::getEnglish(), NULL
, status
);
578 if (U_FAILURE(status
)) {
579 errln("Error: NULL break iterator is valid for opening search");
583 status
= U_ZERO_ERROR
;
584 result
= new StringSearch(pattern
, text
, m_en_us_
, breakiter
, status
);
585 if (U_FAILURE(status
)) {
586 errln("Error: Break iterator is valid for opening search");
590 status
= U_ZERO_ERROR
;
591 result
= new StringSearch(pattern
, chariter
, m_en_us_
, NULL
, status
);
592 if (U_FAILURE(status
)) {
593 errln("Error: Break iterator is valid for opening search");
598 void StringSearchTest::TestInitialization()
600 UErrorCode status
= U_ZERO_ERROR
;
601 UnicodeString pattern
;
603 UnicodeString
temp("a");
604 StringSearch
*result
;
607 /* simple test on the pattern ce construction */
608 pattern
.append(temp
);
609 pattern
.append(temp
);
613 result
= new StringSearch(pattern
, text
, m_en_us_
, NULL
, status
);
614 if (U_FAILURE(status
)) {
615 errln("Error opening search %s", u_errorName(status
));
617 StringSearch
*copy
= new StringSearch(*result
);
618 if (*(copy
->getCollator()) != *(result
->getCollator()) ||
619 copy
->getBreakIterator() != result
->getBreakIterator() ||
620 copy
->getMatchedLength() != result
->getMatchedLength() ||
621 copy
->getMatchedStart() != result
->getMatchedStart() ||
622 copy
->getOffset() != result
->getOffset() ||
623 copy
->getPattern() != result
->getPattern() ||
624 copy
->getText() != result
->getText() ||
625 *(copy
) != *(result
))
627 errln("Error copying StringSearch");
631 copy
= (StringSearch
*)result
->safeClone();
632 if (*(copy
->getCollator()) != *(result
->getCollator()) ||
633 copy
->getBreakIterator() != result
->getBreakIterator() ||
634 copy
->getMatchedLength() != result
->getMatchedLength() ||
635 copy
->getMatchedStart() != result
->getMatchedStart() ||
636 copy
->getOffset() != result
->getOffset() ||
637 copy
->getPattern() != result
->getPattern() ||
638 copy
->getText() != result
->getText() ||
639 *(copy
) != *(result
)) {
640 errln("Error copying StringSearch");
644 /* testing if an extremely large pattern will fail the initialization */
645 for (count
= 0; count
< 512; count
++) {
646 pattern
.append(temp
);
648 result
= new StringSearch(pattern
, text
, m_en_us_
, NULL
, status
);
649 if (*result
!= *result
) {
650 errln("Error: string search object expected to match itself");
652 if (*result
== *copy
) {
653 errln("Error: string search objects are not expected to match");
656 if (*(copy
->getCollator()) != *(result
->getCollator()) ||
657 copy
->getBreakIterator() != result
->getBreakIterator() ||
658 copy
->getMatchedLength() != result
->getMatchedLength() ||
659 copy
->getMatchedStart() != result
->getMatchedStart() ||
660 copy
->getOffset() != result
->getOffset() ||
661 copy
->getPattern() != result
->getPattern() ||
662 copy
->getText() != result
->getText() ||
663 *(copy
) != *(result
)) {
664 errln("Error copying StringSearch");
666 if (U_FAILURE(status
)) {
667 errln("Error opening search %s", u_errorName(status
));
673 void StringSearchTest::TestBasic()
676 while (BASIC
[count
].text
!= NULL
) {
677 //printf("count %d", count);
678 if (!assertEqual(&BASIC
[count
])) {
679 errln("Error at test number %d", count
);
685 void StringSearchTest::TestNormExact()
688 UErrorCode status
= U_ZERO_ERROR
;
689 m_en_us_
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
690 if (U_FAILURE(status
)) {
691 errln("Error setting collation normalization %s",
692 u_errorName(status
));
694 while (BASIC
[count
].text
!= NULL
) {
695 if (!assertEqual(&BASIC
[count
])) {
696 errln("Error at test number %d", count
);
701 while (NORMEXACT
[count
].text
!= NULL
) {
702 if (!assertEqual(&NORMEXACT
[count
])) {
703 errln("Error at test number %d", count
);
707 m_en_us_
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_OFF
, status
);
709 while (NONNORMEXACT
[count
].text
!= NULL
) {
710 if (!assertEqual(&NONNORMEXACT
[count
])) {
711 errln("Error at test number %d", count
);
717 void StringSearchTest::TestStrength()
720 while (STRENGTH
[count
].text
!= NULL
) {
721 if (!assertEqual(&STRENGTH
[count
])) {
722 errln("Error at test number %d", count
);
728 #if !UCONFIG_NO_BREAK_ITERATION
730 void StringSearchTest::TestBreakIterator()
733 u_unescape(BREAKITERATOREXACT
[0].text
, temp
, 128);
735 text
.setTo(temp
, u_strlen(temp
));
736 u_unescape(BREAKITERATOREXACT
[0].pattern
, temp
, 128);
737 UnicodeString pattern
;
738 pattern
.setTo(temp
, u_strlen(temp
));
740 UErrorCode status
= U_ZERO_ERROR
;
741 StringSearch
*strsrch
= new StringSearch(pattern
, text
, m_en_us_
, NULL
,
743 if (U_FAILURE(status
)) {
744 errln("Error opening string search %s", u_errorName(status
));
747 strsrch
->setBreakIterator(NULL
, status
);
748 if (U_FAILURE(status
) || strsrch
->getBreakIterator() != NULL
) {
749 errln("Error usearch_getBreakIterator returned wrong object");
752 strsrch
->setBreakIterator(m_en_characterbreaker_
, status
);
753 if (U_FAILURE(status
) ||
754 strsrch
->getBreakIterator() != m_en_characterbreaker_
) {
755 errln("Error usearch_getBreakIterator returned wrong object");
758 strsrch
->setBreakIterator(m_en_wordbreaker_
, status
);
759 if (U_FAILURE(status
) ||
760 strsrch
->getBreakIterator() != m_en_wordbreaker_
) {
761 errln("Error usearch_getBreakIterator returned wrong object");
768 // special purposes for tests numbers 0-3
769 const SearchData
*search
= &(BREAKITERATOREXACT
[count
]);
770 RuleBasedCollator
*collator
= getCollator(search
->collator
);
771 BreakIterator
*breaker
= getBreakIterator(search
->breaker
);
772 StringSearch
*strsrch
;
774 u_unescape(search
->text
, temp
, 128);
775 text
.setTo(temp
, u_strlen(temp
));
776 u_unescape(search
->pattern
, temp
, 128);
777 pattern
.setTo(temp
, u_strlen(temp
));
778 if (breaker
!= NULL
) {
779 breaker
->setText(text
);
781 collator
->setStrength(getECollationStrength(search
->strength
));
783 strsrch
= new StringSearch(pattern
, text
, collator
, breaker
, status
);
784 if (U_FAILURE(status
) ||
785 strsrch
->getBreakIterator() != breaker
) {
786 errln("Error setting break iterator");
787 if (strsrch
!= NULL
) {
791 if (!assertEqualWithStringSearch(strsrch
, search
)) {
792 collator
->setStrength(getECollationStrength(UCOL_TERTIARY
));
795 search
= &(BREAKITERATOREXACT
[count
+ 1]);
796 breaker
= getBreakIterator(search
->breaker
);
797 if (breaker
!= NULL
) {
798 breaker
->setText(text
);
800 strsrch
->setBreakIterator(breaker
, status
);
801 if (U_FAILURE(status
) ||
802 strsrch
->getBreakIterator() != breaker
) {
803 errln("Error setting break iterator");
807 if (!assertEqualWithStringSearch(strsrch
, search
)) {
808 errln("Error at test number %d", count
);
814 while (BREAKITERATOREXACT
[count
].text
!= NULL
) {
815 if (!assertEqual(&BREAKITERATOREXACT
[count
])) {
816 errln("Error at test number %d", count
);
824 void StringSearchTest::TestVariable()
827 UErrorCode status
= U_ZERO_ERROR
;
828 m_en_us_
->setAttribute(UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, status
);
829 if (U_FAILURE(status
)) {
830 errln("Error setting collation alternate attribute %s",
831 u_errorName(status
));
833 while (VARIABLE
[count
].text
!= NULL
) {
834 logln("variable %d", count
);
835 if (!assertEqual(&VARIABLE
[count
])) {
836 errln("Error at test number %d", count
);
840 m_en_us_
->setAttribute(UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
,
844 void StringSearchTest::TestOverlap()
847 while (OVERLAP
[count
].text
!= NULL
) {
848 if (!assertEqualWithAttribute(&OVERLAP
[count
], USEARCH_OFF
,
850 errln("Error at overlap test number %d", count
);
855 while (NONOVERLAP
[count
].text
!= NULL
) {
856 if (!assertEqual(&NONOVERLAP
[count
])) {
857 errln("Error at non overlap test number %d", count
);
864 const SearchData
*search
= &(OVERLAP
[count
]);
866 u_unescape(search
->text
, temp
, 128);
868 text
.setTo(temp
, u_strlen(temp
));
869 u_unescape(search
->pattern
, temp
, 128);
870 UnicodeString pattern
;
871 pattern
.setTo(temp
, u_strlen(temp
));
873 RuleBasedCollator
*collator
= getCollator(search
->collator
);
874 UErrorCode status
= U_ZERO_ERROR
;
875 StringSearch
*strsrch
= new StringSearch(pattern
, text
,
879 strsrch
->setAttribute(USEARCH_OVERLAP
, USEARCH_ON
, status
);
880 if (U_FAILURE(status
) ||
881 strsrch
->getAttribute(USEARCH_OVERLAP
) != USEARCH_ON
) {
882 errln("Error setting overlap option");
884 if (!assertEqualWithStringSearch(strsrch
, search
)) {
889 search
= &(NONOVERLAP
[count
]);
890 strsrch
->setAttribute(USEARCH_OVERLAP
, USEARCH_OFF
, status
);
891 if (U_FAILURE(status
) ||
892 strsrch
->getAttribute(USEARCH_OVERLAP
) != USEARCH_OFF
) {
893 errln("Error setting overlap option");
896 if (!assertEqualWithStringSearch(strsrch
, search
)) {
898 errln("Error at test number %d", count
);
906 void StringSearchTest::TestCollator()
908 // test collator that thinks "o" and "p" are the same thing
910 u_unescape(COLLATOR
[0].text
, temp
, 128);
912 text
.setTo(temp
, u_strlen(temp
));
913 u_unescape(COLLATOR
[0].pattern
, temp
, 128);
914 UnicodeString pattern
;
915 pattern
.setTo(temp
, u_strlen(temp
));
917 UErrorCode status
= U_ZERO_ERROR
;
918 StringSearch
*strsrch
= new StringSearch(pattern
, text
, m_en_us_
, NULL
,
920 if (U_FAILURE(status
)) {
921 errln("Error opening string search %s", u_errorName(status
));
925 if (!assertEqualWithStringSearch(strsrch
, &COLLATOR
[0])) {
930 u_unescape(TESTCOLLATORRULE
, temp
, 128);
932 rules
.setTo(temp
, u_strlen(temp
));
933 RuleBasedCollator
*tailored
= new RuleBasedCollator(rules
, status
);
934 tailored
->setStrength(getECollationStrength(COLLATOR
[1].strength
));
936 if (U_FAILURE(status
)) {
937 errln("Error opening rule based collator %s", u_errorName(status
));
939 if (tailored
!= NULL
) {
945 strsrch
->setCollator(tailored
, status
);
946 if (U_FAILURE(status
) || (*strsrch
->getCollator()) != (*tailored
)) {
947 errln("Error setting rule based collator");
949 if (tailored
!= NULL
) {
954 if (!assertEqualWithStringSearch(strsrch
, &COLLATOR
[1])) {
956 if (tailored
!= NULL
) {
962 strsrch
->setCollator(m_en_us_
, status
);
964 if (U_FAILURE(status
) || (*strsrch
->getCollator()) != (*m_en_us_
)) {
965 errln("Error setting rule based collator");
967 if (tailored
!= NULL
) {
971 if (!assertEqualWithStringSearch(strsrch
, &COLLATOR
[0])) {
972 errln("Error searching collator test");
975 if (tailored
!= NULL
) {
980 void StringSearchTest::TestPattern()
985 u_unescape(PATTERN
[0].text
, temp
, 512);
987 text
.setTo(temp
, u_strlen(temp
));
988 u_unescape(PATTERN
[0].pattern
, temp
, 512);
989 UnicodeString pattern
;
990 pattern
.setTo(temp
, u_strlen(temp
));
992 m_en_us_
->setStrength(getECollationStrength(PATTERN
[0].strength
));
993 UErrorCode status
= U_ZERO_ERROR
;
994 StringSearch
*strsrch
= new StringSearch(pattern
, text
, m_en_us_
, NULL
,
997 if (U_FAILURE(status
)) {
998 errln("Error opening string search %s", u_errorName(status
));
999 m_en_us_
->setStrength(getECollationStrength(UCOL_TERTIARY
));
1000 if (strsrch
!= NULL
) {
1005 if (strsrch
->getPattern() != pattern
) {
1006 errln("Error setting pattern");
1008 if (!assertEqualWithStringSearch(strsrch
, &PATTERN
[0])) {
1009 m_en_us_
->setStrength(getECollationStrength(UCOL_TERTIARY
));
1010 if (strsrch
!= NULL
) {
1016 u_unescape(PATTERN
[1].pattern
, temp
, 512);
1017 pattern
.setTo(temp
, u_strlen(temp
));
1018 strsrch
->setPattern(pattern
, status
);
1019 if (pattern
!= strsrch
->getPattern()) {
1020 errln("Error setting pattern");
1021 m_en_us_
->setStrength(getECollationStrength(UCOL_TERTIARY
));
1022 if (strsrch
!= NULL
) {
1028 if (U_FAILURE(status
)) {
1029 errln("Error setting pattern %s", u_errorName(status
));
1031 if (!assertEqualWithStringSearch(strsrch
, &PATTERN
[1])) {
1032 m_en_us_
->setStrength(getECollationStrength(UCOL_TERTIARY
));
1033 if (strsrch
!= NULL
) {
1039 u_unescape(PATTERN
[0].pattern
, temp
, 512);
1040 pattern
.setTo(temp
, u_strlen(temp
));
1041 strsrch
->setPattern(pattern
, status
);
1042 if (pattern
!= strsrch
->getPattern()) {
1043 errln("Error setting pattern");
1044 m_en_us_
->setStrength(getECollationStrength(UCOL_TERTIARY
));
1045 if (strsrch
!= NULL
) {
1051 if (U_FAILURE(status
)) {
1052 errln("Error setting pattern %s", u_errorName(status
));
1054 if (!assertEqualWithStringSearch(strsrch
, &PATTERN
[0])) {
1055 m_en_us_
->setStrength(getECollationStrength(UCOL_TERTIARY
));
1056 if (strsrch
!= NULL
) {
1061 /* enormous pattern size to see if this crashes */
1062 for (templength
= 0; templength
!= 512; templength
++) {
1063 temp
[templength
] = 0x61;
1066 pattern
.setTo(temp
, 511);
1067 strsrch
->setPattern(pattern
, status
);
1068 if (U_FAILURE(status
)) {
1069 errln("Error setting pattern with size 512, %s", u_errorName(status
));
1071 m_en_us_
->setStrength(getECollationStrength(UCOL_TERTIARY
));
1072 if (strsrch
!= NULL
) {
1077 void StringSearchTest::TestText()
1080 u_unescape(TEXT
[0].text
, temp
, 128);
1082 text
.setTo(temp
, u_strlen(temp
));
1083 u_unescape(TEXT
[0].pattern
, temp
, 128);
1084 UnicodeString pattern
;
1085 pattern
.setTo(temp
, u_strlen(temp
));
1087 UErrorCode status
= U_ZERO_ERROR
;
1088 StringSearch
*strsrch
= new StringSearch(pattern
, text
, m_en_us_
, NULL
,
1090 if (U_FAILURE(status
)) {
1091 errln("Error opening string search %s", u_errorName(status
));
1094 if (text
!= strsrch
->getText()) {
1095 errln("Error setting text");
1097 if (!assertEqualWithStringSearch(strsrch
, &TEXT
[0])) {
1102 u_unescape(TEXT
[1].text
, temp
, 128);
1103 text
.setTo(temp
, u_strlen(temp
));
1104 strsrch
->setText(text
, status
);
1105 if (text
!= strsrch
->getText()) {
1106 errln("Error setting text");
1110 if (U_FAILURE(status
)) {
1111 errln("Error setting text %s", u_errorName(status
));
1113 if (!assertEqualWithStringSearch(strsrch
, &TEXT
[1])) {
1118 u_unescape(TEXT
[0].text
, temp
, 128);
1119 text
.setTo(temp
, u_strlen(temp
));
1120 StringCharacterIterator
chariter(text
);
1121 strsrch
->setText(chariter
, status
);
1122 if (text
!= strsrch
->getText()) {
1123 errln("Error setting text");
1127 if (U_FAILURE(status
)) {
1128 errln("Error setting pattern %s", u_errorName(status
));
1130 if (!assertEqualWithStringSearch(strsrch
, &TEXT
[0])) {
1131 errln("Error searching within set text");
1136 void StringSearchTest::TestCompositeBoundaries()
1139 while (COMPOSITEBOUNDARIES
[count
].text
!= NULL
) {
1140 logln("composite %d", count
);
1141 if (!assertEqual(&COMPOSITEBOUNDARIES
[count
])) {
1142 errln("Error at test number %d", count
);
1148 void StringSearchTest::TestGetSetOffset()
1150 UErrorCode status
= U_ZERO_ERROR
;
1151 UnicodeString
pattern("1234567890123456");
1152 UnicodeString
text("12345678901234567890123456789012");
1153 StringSearch
*strsrch
= new StringSearch(pattern
, text
, m_en_us_
,
1155 /* testing out of bounds error */
1156 strsrch
->setOffset(-1, status
);
1157 if (U_SUCCESS(status
)) {
1158 errln("Error expecting set offset error");
1160 strsrch
->setOffset(128, status
);
1161 if (U_SUCCESS(status
)) {
1162 errln("Error expecting set offset error");
1165 while (BASIC
[index
].text
!= NULL
) {
1166 UErrorCode status
= U_ZERO_ERROR
;
1167 SearchData search
= BASIC
[index
++];
1170 u_unescape(search
.text
, temp
, 128);
1171 text
.setTo(temp
, u_strlen(temp
));
1172 u_unescape(search
.pattern
, temp
, 128);
1173 pattern
.setTo(temp
, u_strlen(temp
));
1174 strsrch
->setText(text
, status
);
1175 strsrch
->setPattern(pattern
, status
);
1176 strsrch
->getCollator()->setStrength(getECollationStrength(
1181 int32_t matchindex
= search
.offset
[count
];
1182 while (U_SUCCESS(status
) && matchindex
>= 0) {
1183 int32_t matchlength
= search
.size
[count
];
1184 strsrch
->next(status
);
1185 if (matchindex
!= strsrch
->getMatchedStart() ||
1186 matchlength
!= strsrch
->getMatchedLength()) {
1187 char *str
= toCharString(strsrch
->getText());
1188 errln("Text: %s", str
);
1189 str
= toCharString(strsrch
->getPattern());
1190 errln("Pattern: %s", str
);
1191 errln("Error match found at %d %d",
1192 strsrch
->getMatchedStart(),
1193 strsrch
->getMatchedLength());
1196 matchindex
= search
.offset
[count
+ 1] == -1 ? -1 :
1197 search
.offset
[count
+ 2];
1198 if (search
.offset
[count
+ 1] != -1) {
1199 strsrch
->setOffset(search
.offset
[count
+ 1] + 1, status
);
1200 if (strsrch
->getOffset() != search
.offset
[count
+ 1] + 1) {
1201 errln("Error setting offset\n");
1208 strsrch
->next(status
);
1209 if (strsrch
->getMatchedStart() != USEARCH_DONE
) {
1210 char *str
= toCharString(strsrch
->getText());
1211 errln("Text: %s", str
);
1212 str
= toCharString(strsrch
->getPattern());
1213 errln("Pattern: %s", str
);
1214 errln("Error match found at %d %d",
1215 strsrch
->getMatchedStart(),
1216 strsrch
->getMatchedLength());
1220 strsrch
->getCollator()->setStrength(getECollationStrength(
1225 void StringSearchTest::TestGetSetAttribute()
1227 UErrorCode status
= U_ZERO_ERROR
;
1228 UnicodeString
pattern("pattern");
1229 UnicodeString
text("text");
1230 StringSearch
*strsrch
= new StringSearch(pattern
, text
, m_en_us_
, NULL
,
1232 if (U_FAILURE(status
)) {
1233 errln("Error opening search %s", u_errorName(status
));
1237 strsrch
->setAttribute(USEARCH_OVERLAP
, USEARCH_DEFAULT
, status
);
1238 if (U_FAILURE(status
) ||
1239 strsrch
->getAttribute(USEARCH_OVERLAP
) != USEARCH_OFF
) {
1240 errln("Error setting overlap to the default");
1242 strsrch
->setAttribute(USEARCH_OVERLAP
, USEARCH_ON
, status
);
1243 if (U_FAILURE(status
) ||
1244 strsrch
->getAttribute(USEARCH_OVERLAP
) != USEARCH_ON
) {
1245 errln("Error setting overlap true");
1247 strsrch
->setAttribute(USEARCH_OVERLAP
, USEARCH_OFF
, status
);
1248 if (U_FAILURE(status
) ||
1249 strsrch
->getAttribute(USEARCH_OVERLAP
) != USEARCH_OFF
) {
1250 errln("Error setting overlap false");
1252 strsrch
->setAttribute(USEARCH_OVERLAP
, USEARCH_ATTRIBUTE_VALUE_COUNT
,
1254 if (U_SUCCESS(status
)) {
1255 errln("Error setting overlap to illegal value");
1257 status
= U_ZERO_ERROR
;
1258 strsrch
->setAttribute(USEARCH_CANONICAL_MATCH
, USEARCH_DEFAULT
, status
);
1259 if (U_FAILURE(status
) ||
1260 strsrch
->getAttribute(USEARCH_CANONICAL_MATCH
) != USEARCH_OFF
) {
1261 errln("Error setting canonical match to the default");
1263 strsrch
->setAttribute(USEARCH_CANONICAL_MATCH
, USEARCH_ON
, status
);
1264 if (U_FAILURE(status
) ||
1265 strsrch
->getAttribute(USEARCH_CANONICAL_MATCH
) != USEARCH_ON
) {
1266 errln("Error setting canonical match true");
1268 strsrch
->setAttribute(USEARCH_CANONICAL_MATCH
, USEARCH_OFF
, status
);
1269 if (U_FAILURE(status
) ||
1270 strsrch
->getAttribute(USEARCH_CANONICAL_MATCH
) != USEARCH_OFF
) {
1271 errln("Error setting canonical match false");
1273 strsrch
->setAttribute(USEARCH_CANONICAL_MATCH
,
1274 USEARCH_ATTRIBUTE_VALUE_COUNT
, status
);
1275 if (U_SUCCESS(status
)) {
1276 errln("Error setting canonical match to illegal value");
1278 status
= U_ZERO_ERROR
;
1279 strsrch
->setAttribute(USEARCH_ATTRIBUTE_COUNT
, USEARCH_DEFAULT
, status
);
1280 if (U_SUCCESS(status
)) {
1281 errln("Error setting illegal attribute success");
1287 void StringSearchTest::TestGetMatch()
1290 SearchData search
= MATCH
[0];
1291 u_unescape(search
.text
, temp
, 128);
1293 text
.setTo(temp
, u_strlen(temp
));
1294 u_unescape(search
.pattern
, temp
, 128);
1295 UnicodeString pattern
;
1296 pattern
.setTo(temp
, u_strlen(temp
));
1298 UErrorCode status
= U_ZERO_ERROR
;
1299 StringSearch
*strsrch
= new StringSearch(pattern
, text
, m_en_us_
, NULL
,
1301 if (U_FAILURE(status
)) {
1302 errln("Error opening string search %s", u_errorName(status
));
1303 if (strsrch
!= NULL
) {
1310 int32_t matchindex
= search
.offset
[count
];
1311 UnicodeString matchtext
;
1312 while (U_SUCCESS(status
) && matchindex
>= 0) {
1313 int32_t matchlength
= search
.size
[count
];
1314 strsrch
->next(status
);
1315 if (matchindex
!= strsrch
->getMatchedStart() ||
1316 matchlength
!= strsrch
->getMatchedLength()) {
1317 char *str
= toCharString(strsrch
->getText());
1318 errln("Text: %s", str
);
1319 str
= toCharString(strsrch
->getPattern());
1320 errln("Pattern: %s", str
);
1321 errln("Error match found at %d %d", strsrch
->getMatchedStart(),
1322 strsrch
->getMatchedLength());
1327 status
= U_ZERO_ERROR
;
1328 strsrch
->getMatchedText(matchtext
);
1329 if (matchtext
.length() != matchlength
|| U_FAILURE(status
)){
1330 errln("Error getting match text");
1332 matchindex
= search
.offset
[count
];
1334 status
= U_ZERO_ERROR
;
1335 strsrch
->next(status
);
1336 if (strsrch
->getMatchedStart() != USEARCH_DONE
||
1337 strsrch
->getMatchedLength() != 0) {
1338 errln("Error end of match not found");
1340 status
= U_ZERO_ERROR
;
1341 strsrch
->getMatchedText(matchtext
);
1342 if (matchtext
.length() != 0) {
1343 errln("Error getting null matches");
1348 void StringSearchTest::TestSetMatch()
1351 while (MATCH
[count
].text
!= NULL
) {
1352 SearchData search
= MATCH
[count
];
1354 UErrorCode status
= U_ZERO_ERROR
;
1355 u_unescape(search
.text
, temp
, 128);
1357 text
.setTo(temp
, u_strlen(temp
));
1358 u_unescape(search
.pattern
, temp
, 128);
1359 UnicodeString pattern
;
1360 pattern
.setTo(temp
, u_strlen(temp
));
1362 StringSearch
*strsrch
= new StringSearch(pattern
, text
, m_en_us_
,
1364 if (U_FAILURE(status
)) {
1365 errln("Error opening string search %s", u_errorName(status
));
1366 if (strsrch
!= NULL
) {
1373 while (search
.offset
[size
] != -1) {
1377 if (strsrch
->first(status
) != search
.offset
[0] || U_FAILURE(status
)) {
1378 errln("Error getting first match");
1380 if (strsrch
->last(status
) != search
.offset
[size
-1] ||
1381 U_FAILURE(status
)) {
1382 errln("Error getting last match");
1386 while (index
< size
) {
1387 if (index
+ 2 < size
) {
1388 if (strsrch
->following(search
.offset
[index
+ 2] - 1, status
)
1389 != search
.offset
[index
+ 2] || U_FAILURE(status
)) {
1390 errln("Error getting following match at index %d",
1391 search
.offset
[index
+ 2] - 1);
1394 if (index
+ 1 < size
) {
1395 if (strsrch
->preceding(search
.offset
[index
+ 1] +
1396 search
.size
[index
+ 1] + 1,
1397 status
) != search
.offset
[index
+ 1] ||
1398 U_FAILURE(status
)) {
1399 errln("Error getting preceeding match at index %d",
1400 search
.offset
[index
+ 1] + 1);
1405 status
= U_ZERO_ERROR
;
1406 if (strsrch
->following(text
.length(), status
) != USEARCH_DONE
) {
1407 errln("Error expecting out of bounds match");
1409 if (strsrch
->preceding(0, status
) != USEARCH_DONE
) {
1410 errln("Error expecting out of bounds match");
1417 void StringSearchTest::TestReset()
1419 UErrorCode status
= U_ZERO_ERROR
;
1420 UnicodeString
text("fish fish");
1421 UnicodeString
pattern("s");
1422 StringSearch
*strsrch
= new StringSearch(pattern
, text
, m_en_us_
, NULL
,
1424 if (U_FAILURE(status
)) {
1425 errln("Error opening string search %s", u_errorName(status
));
1426 if (strsrch
!= NULL
) {
1431 strsrch
->setAttribute(USEARCH_OVERLAP
, USEARCH_ON
, status
);
1432 strsrch
->setAttribute(USEARCH_CANONICAL_MATCH
, USEARCH_ON
, status
);
1433 strsrch
->setOffset(9, status
);
1434 if (U_FAILURE(status
)) {
1435 errln("Error setting attributes and offsets");
1439 if (strsrch
->getAttribute(USEARCH_OVERLAP
) != USEARCH_OFF
||
1440 strsrch
->getAttribute(USEARCH_CANONICAL_MATCH
) != USEARCH_OFF
||
1441 strsrch
->getOffset() != 0 || strsrch
->getMatchedLength() != 0 ||
1442 strsrch
->getMatchedStart() != USEARCH_DONE
) {
1443 errln("Error resetting string search");
1445 strsrch
->previous(status
);
1446 if (strsrch
->getMatchedStart() != 7 ||
1447 strsrch
->getMatchedLength() != 1) {
1448 errln("Error resetting string search\n");
1454 void StringSearchTest::TestSupplementary()
1457 while (SUPPLEMENTARY
[count
].text
!= NULL
) {
1458 if (!assertEqual(&SUPPLEMENTARY
[count
])) {
1459 errln("Error at test number %d", count
);
1465 void StringSearchTest::TestContraction()
1468 UErrorCode status
= U_ZERO_ERROR
;
1470 u_unescape(CONTRACTIONRULE
, temp
, 128);
1471 UnicodeString rules
;
1472 rules
.setTo(temp
, u_strlen(temp
));
1473 RuleBasedCollator
*collator
= new RuleBasedCollator(rules
,
1474 getECollationStrength(UCOL_TERTIARY
), UCOL_ON
, status
);
1475 if (U_FAILURE(status
)) {
1476 errln("Error opening collator %s", u_errorName(status
));
1478 UnicodeString
text("text");
1479 UnicodeString
pattern("pattern");
1480 StringSearch
*strsrch
= new StringSearch(pattern
, text
, collator
, NULL
,
1482 if (U_FAILURE(status
)) {
1483 errln("Error opening string search %s", u_errorName(status
));
1487 while (CONTRACTION
[count
].text
!= NULL
) {
1488 u_unescape(CONTRACTION
[count
].text
, temp
, 128);
1489 text
.setTo(temp
, u_strlen(temp
));
1490 u_unescape(CONTRACTION
[count
].pattern
, temp
, 128);
1491 pattern
.setTo(temp
, u_strlen(temp
));
1492 strsrch
->setText(text
, status
);
1493 strsrch
->setPattern(pattern
, status
);
1494 if (!assertEqualWithStringSearch(strsrch
, &CONTRACTION
[count
])) {
1495 errln("Error at test number %d", count
);
1503 void StringSearchTest::TestIgnorable()
1506 u_unescape(IGNORABLERULE
, temp
, 128);
1507 UnicodeString rules
;
1508 rules
.setTo(temp
, u_strlen(temp
));
1509 UErrorCode status
= U_ZERO_ERROR
;
1511 RuleBasedCollator
*collator
= new RuleBasedCollator(rules
,
1512 getECollationStrength(IGNORABLE
[count
].strength
),
1514 if (U_FAILURE(status
)) {
1515 errln("Error opening collator %s", u_errorName(status
));
1518 UnicodeString
pattern("pattern");
1519 UnicodeString
text("text");
1520 StringSearch
*strsrch
= new StringSearch(pattern
, text
, collator
, NULL
,
1522 if (U_FAILURE(status
)) {
1523 errln("Error opening string search %s", u_errorName(status
));
1528 while (IGNORABLE
[count
].text
!= NULL
) {
1529 u_unescape(IGNORABLE
[count
].text
, temp
, 128);
1530 text
.setTo(temp
, u_strlen(temp
));
1531 u_unescape(IGNORABLE
[count
].pattern
, temp
, 128);
1532 pattern
.setTo(temp
, u_strlen(temp
));
1533 strsrch
->setText(text
, status
);
1534 strsrch
->setPattern(pattern
, status
);
1535 if (!assertEqualWithStringSearch(strsrch
, &IGNORABLE
[count
])) {
1536 errln("Error at test number %d", count
);
1544 void StringSearchTest::TestCanonical()
1547 while (BASICCANONICAL
[count
].text
!= NULL
) {
1548 if (!assertCanonicalEqual(&BASICCANONICAL
[count
])) {
1549 errln("Error at test number %d", count
);
1555 void StringSearchTest::TestNormCanonical()
1557 UErrorCode status
= U_ZERO_ERROR
;
1558 m_en_us_
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_ON
, status
);
1560 while (NORMCANONICAL
[count
].text
!= NULL
) {
1561 if (!assertCanonicalEqual(&NORMCANONICAL
[count
])) {
1562 errln("Error at test number %d", count
);
1566 m_en_us_
->setAttribute(UCOL_NORMALIZATION_MODE
, UCOL_OFF
, status
);
1569 void StringSearchTest::TestStrengthCanonical()
1572 while (STRENGTHCANONICAL
[count
].text
!= NULL
) {
1573 if (!assertCanonicalEqual(&STRENGTHCANONICAL
[count
])) {
1574 errln("Error at test number %d", count
);
1580 #if !UCONFIG_NO_BREAK_ITERATION
1582 void StringSearchTest::TestBreakIteratorCanonical()
1584 UErrorCode status
= U_ZERO_ERROR
;
1588 // special purposes for tests numbers 0-3
1590 const SearchData
*search
= &(BREAKITERATORCANONICAL
[count
]);
1592 u_unescape(search
->text
, temp
, 128);
1594 text
.setTo(temp
, u_strlen(temp
));
1595 u_unescape(search
->pattern
, temp
, 128);
1596 UnicodeString pattern
;
1597 pattern
.setTo(temp
, u_strlen(temp
));
1598 RuleBasedCollator
*collator
= getCollator(search
->collator
);
1599 collator
->setStrength(getECollationStrength(search
->strength
));
1601 BreakIterator
*breaker
= getBreakIterator(search
->breaker
);
1602 StringSearch
*strsrch
= new StringSearch(pattern
, text
, collator
,
1604 if (U_FAILURE(status
)) {
1605 errln("Error creating string search data");
1608 strsrch
->setAttribute(USEARCH_CANONICAL_MATCH
, USEARCH_ON
, status
);
1609 if (U_FAILURE(status
) ||
1610 strsrch
->getBreakIterator() != breaker
) {
1611 errln("Error setting break iterator");
1615 if (!assertEqualWithStringSearch(strsrch
, search
)) {
1616 collator
->setStrength(getECollationStrength(UCOL_TERTIARY
));
1620 search
= &(BREAKITERATOREXACT
[count
+ 1]);
1621 breaker
= getBreakIterator(search
->breaker
);
1622 if (breaker
== NULL
) {
1623 errln("Error creating BreakIterator");
1626 breaker
->setText(strsrch
->getText());
1627 strsrch
->setBreakIterator(breaker
, status
);
1628 if (U_FAILURE(status
) || strsrch
->getBreakIterator() != breaker
) {
1629 errln("Error setting break iterator");
1634 strsrch
->setAttribute(USEARCH_CANONICAL_MATCH
, USEARCH_ON
, status
);
1635 if (!assertEqualWithStringSearch(strsrch
, search
)) {
1636 errln("Error at test number %d", count
);
1643 while (BREAKITERATORCANONICAL
[count
].text
!= NULL
) {
1644 if (!assertEqual(&BREAKITERATORCANONICAL
[count
])) {
1645 errln("Error at test number %d", count
);
1654 void StringSearchTest::TestVariableCanonical()
1657 UErrorCode status
= U_ZERO_ERROR
;
1658 m_en_us_
->setAttribute(UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, status
);
1659 if (U_FAILURE(status
)) {
1660 errln("Error setting collation alternate attribute %s",
1661 u_errorName(status
));
1663 while (VARIABLE
[count
].text
!= NULL
) {
1664 logln("variable %d", count
);
1665 if (!assertCanonicalEqual(&VARIABLE
[count
])) {
1666 errln("Error at test number %d", count
);
1670 m_en_us_
->setAttribute(UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
,
1674 void StringSearchTest::TestOverlapCanonical()
1677 while (OVERLAPCANONICAL
[count
].text
!= NULL
) {
1678 if (!assertEqualWithAttribute(&OVERLAPCANONICAL
[count
], USEARCH_ON
,
1680 errln("Error at overlap test number %d", count
);
1685 while (NONOVERLAP
[count
].text
!= NULL
) {
1686 if (!assertCanonicalEqual(&NONOVERLAPCANONICAL
[count
])) {
1687 errln("Error at non overlap test number %d", count
);
1695 const SearchData
*search
= &(OVERLAPCANONICAL
[count
]);
1696 UErrorCode status
= U_ZERO_ERROR
;
1698 u_unescape(search
->text
, temp
, 128);
1700 text
.setTo(temp
, u_strlen(temp
));
1701 u_unescape(search
->pattern
, temp
, 128);
1702 UnicodeString pattern
;
1703 pattern
.setTo(temp
, u_strlen(temp
));
1704 RuleBasedCollator
*collator
= getCollator(search
->collator
);
1705 StringSearch
*strsrch
= new StringSearch(pattern
, text
, collator
,
1707 strsrch
->setAttribute(USEARCH_CANONICAL_MATCH
, USEARCH_ON
, status
);
1708 strsrch
->setAttribute(USEARCH_OVERLAP
, USEARCH_ON
, status
);
1709 if (U_FAILURE(status
) ||
1710 strsrch
->getAttribute(USEARCH_OVERLAP
) != USEARCH_ON
) {
1711 errln("Error setting overlap option");
1713 if (!assertEqualWithStringSearch(strsrch
, search
)) {
1717 search
= &(NONOVERLAPCANONICAL
[count
]);
1718 strsrch
->setAttribute(USEARCH_OVERLAP
, USEARCH_OFF
, status
);
1719 if (U_FAILURE(status
) ||
1720 strsrch
->getAttribute(USEARCH_OVERLAP
) != USEARCH_OFF
) {
1721 errln("Error setting overlap option");
1724 if (!assertEqualWithStringSearch(strsrch
, search
)) {
1726 errln("Error at test number %d", count
);
1734 void StringSearchTest::TestCollatorCanonical()
1736 /* test collator that thinks "o" and "p" are the same thing */
1738 u_unescape(COLLATORCANONICAL
[0].text
, temp
, 128);
1740 text
.setTo(temp
, u_strlen(temp
));
1741 u_unescape(COLLATORCANONICAL
[0].pattern
, temp
, 128);
1742 UnicodeString pattern
;
1743 pattern
.setTo(temp
, u_strlen(temp
));
1745 UErrorCode status
= U_ZERO_ERROR
;
1746 StringSearch
*strsrch
= new StringSearch(pattern
, text
, m_en_us_
,
1748 strsrch
->setAttribute(USEARCH_CANONICAL_MATCH
, USEARCH_ON
, status
);
1749 if (U_FAILURE(status
)) {
1750 errln("Error opening string search %s", u_errorName(status
));
1752 if (!assertEqualWithStringSearch(strsrch
, &COLLATORCANONICAL
[0])) {
1757 u_unescape(TESTCOLLATORRULE
, temp
, 128);
1758 UnicodeString rules
;
1759 rules
.setTo(temp
, u_strlen(temp
));
1760 RuleBasedCollator
*tailored
= new RuleBasedCollator(rules
,
1761 getECollationStrength(COLLATORCANONICAL
[1].strength
),
1764 if (U_FAILURE(status
)) {
1765 errln("Error opening rule based collator %s", u_errorName(status
));
1768 strsrch
->setCollator(tailored
, status
);
1769 if (U_FAILURE(status
) || *(strsrch
->getCollator()) != *tailored
) {
1770 errln("Error setting rule based collator");
1773 strsrch
->setAttribute(USEARCH_CANONICAL_MATCH
, USEARCH_ON
, status
);
1774 if (!assertEqualWithStringSearch(strsrch
, &COLLATORCANONICAL
[1])) {
1776 if (tailored
!= NULL
) {
1781 strsrch
->setCollator(m_en_us_
, status
);
1783 if (U_FAILURE(status
) || *(strsrch
->getCollator()) != *m_en_us_
) {
1784 errln("Error setting rule based collator");
1786 if (!assertEqualWithStringSearch(strsrch
, &COLLATORCANONICAL
[0])) {
1789 if (tailored
!= NULL
) {
1794 void StringSearchTest::TestPatternCanonical()
1799 u_unescape(PATTERNCANONICAL
[0].text
, temp
, 128);
1801 text
.setTo(temp
, u_strlen(temp
));
1802 u_unescape(PATTERNCANONICAL
[0].pattern
, temp
, 128);
1803 UnicodeString pattern
;
1804 pattern
.setTo(temp
, u_strlen(temp
));
1806 m_en_us_
->setStrength(
1807 getECollationStrength(PATTERNCANONICAL
[0].strength
));
1809 UErrorCode status
= U_ZERO_ERROR
;
1810 StringSearch
*strsrch
= new StringSearch(pattern
, text
, m_en_us_
, NULL
,
1812 strsrch
->setAttribute(USEARCH_CANONICAL_MATCH
, USEARCH_ON
, status
);
1813 if (U_FAILURE(status
)) {
1814 errln("Error opening string search %s", u_errorName(status
));
1815 goto ENDTESTPATTERN
;
1817 if (pattern
!= strsrch
->getPattern()) {
1818 errln("Error setting pattern");
1820 if (!assertEqualWithStringSearch(strsrch
, &PATTERNCANONICAL
[0])) {
1821 goto ENDTESTPATTERN
;
1824 u_unescape(PATTERNCANONICAL
[1].pattern
, temp
, 128);
1825 pattern
.setTo(temp
, u_strlen(temp
));
1826 strsrch
->setPattern(pattern
, status
);
1827 if (pattern
!= strsrch
->getPattern()) {
1828 errln("Error setting pattern");
1829 goto ENDTESTPATTERN
;
1832 strsrch
->setAttribute(USEARCH_CANONICAL_MATCH
, USEARCH_ON
, status
);
1833 if (U_FAILURE(status
)) {
1834 errln("Error setting pattern %s", u_errorName(status
));
1836 if (!assertEqualWithStringSearch(strsrch
, &PATTERNCANONICAL
[1])) {
1837 goto ENDTESTPATTERN
;
1840 u_unescape(PATTERNCANONICAL
[0].pattern
, temp
, 128);
1841 pattern
.setTo(temp
, u_strlen(temp
));
1842 strsrch
->setPattern(pattern
, status
);
1843 if (pattern
!= strsrch
->getPattern()) {
1844 errln("Error setting pattern");
1845 goto ENDTESTPATTERN
;
1848 strsrch
->setAttribute(USEARCH_CANONICAL_MATCH
, USEARCH_ON
, status
);
1849 if (U_FAILURE(status
)) {
1850 errln("Error setting pattern %s", u_errorName(status
));
1852 if (!assertEqualWithStringSearch(strsrch
, &PATTERNCANONICAL
[0])) {
1853 goto ENDTESTPATTERN
;
1856 m_en_us_
->setStrength(getECollationStrength(UCOL_TERTIARY
));
1857 if (strsrch
!= NULL
) {
1862 void StringSearchTest::TestTextCanonical()
1865 u_unescape(TEXTCANONICAL
[0].text
, temp
, 128);
1867 text
.setTo(temp
, u_strlen(temp
));
1868 u_unescape(TEXTCANONICAL
[0].pattern
, temp
, 128);
1869 UnicodeString pattern
;
1870 pattern
.setTo(temp
, u_strlen(temp
));
1872 UErrorCode status
= U_ZERO_ERROR
;
1873 StringSearch
*strsrch
= new StringSearch(pattern
, text
, m_en_us_
, NULL
,
1875 strsrch
->setAttribute(USEARCH_CANONICAL_MATCH
, USEARCH_ON
, status
);
1877 if (U_FAILURE(status
)) {
1878 errln("Error opening string search %s", u_errorName(status
));
1879 goto ENDTESTPATTERN
;
1881 if (text
!= strsrch
->getText()) {
1882 errln("Error setting text");
1884 if (!assertEqualWithStringSearch(strsrch
, &TEXTCANONICAL
[0])) {
1885 goto ENDTESTPATTERN
;
1888 u_unescape(TEXTCANONICAL
[1].text
, temp
, 128);
1889 text
.setTo(temp
, u_strlen(temp
));
1890 strsrch
->setText(text
, status
);
1891 if (text
!= strsrch
->getText()) {
1892 errln("Error setting text");
1893 goto ENDTESTPATTERN
;
1895 if (U_FAILURE(status
)) {
1896 errln("Error setting text %s", u_errorName(status
));
1898 if (!assertEqualWithStringSearch(strsrch
, &TEXTCANONICAL
[1])) {
1899 goto ENDTESTPATTERN
;
1902 u_unescape(TEXTCANONICAL
[0].text
, temp
, 128);
1903 text
.setTo(temp
, u_strlen(temp
));
1904 strsrch
->setText(text
, status
);
1905 if (text
!= strsrch
->getText()) {
1906 errln("Error setting text");
1907 goto ENDTESTPATTERN
;
1909 if (U_FAILURE(status
)) {
1910 errln("Error setting pattern %s", u_errorName(status
));
1912 if (!assertEqualWithStringSearch(strsrch
, &TEXTCANONICAL
[0])) {
1913 goto ENDTESTPATTERN
;
1916 if (strsrch
!= NULL
) {
1921 void StringSearchTest::TestCompositeBoundariesCanonical()
1924 while (COMPOSITEBOUNDARIESCANONICAL
[count
].text
!= NULL
) {
1925 logln("composite %d", count
);
1926 if (!assertCanonicalEqual(&COMPOSITEBOUNDARIESCANONICAL
[count
])) {
1927 errln("Error at test number %d", count
);
1933 void StringSearchTest::TestGetSetOffsetCanonical()
1936 UErrorCode status
= U_ZERO_ERROR
;
1937 UnicodeString
text("text");
1938 UnicodeString
pattern("pattern");
1939 StringSearch
*strsrch
= new StringSearch(pattern
, text
, m_en_us_
, NULL
,
1941 strsrch
->setAttribute(USEARCH_CANONICAL_MATCH
, USEARCH_ON
, status
);
1942 /* testing out of bounds error */
1943 strsrch
->setOffset(-1, status
);
1944 if (U_SUCCESS(status
)) {
1945 errln("Error expecting set offset error");
1947 strsrch
->setOffset(128, status
);
1948 if (U_SUCCESS(status
)) {
1949 errln("Error expecting set offset error");
1953 while (BASICCANONICAL
[index
].text
!= NULL
) {
1954 SearchData search
= BASICCANONICAL
[index
++];
1955 if (BASICCANONICAL
[index
].text
== NULL
) {
1956 /* skip the last one */
1960 u_unescape(search
.text
, temp
, 128);
1961 text
.setTo(temp
, u_strlen(temp
));
1962 u_unescape(search
.pattern
, temp
, 128);
1963 pattern
.setTo(temp
, u_strlen(temp
));
1965 UErrorCode status
= U_ZERO_ERROR
;
1966 strsrch
->setText(text
, status
);
1968 strsrch
->setPattern(pattern
, status
);
1971 int32_t matchindex
= search
.offset
[count
];
1972 while (U_SUCCESS(status
) && matchindex
>= 0) {
1973 int32_t matchlength
= search
.size
[count
];
1974 strsrch
->next(status
);
1975 if (matchindex
!= strsrch
->getMatchedStart() ||
1976 matchlength
!= strsrch
->getMatchedLength()) {
1977 char *str
= toCharString(strsrch
->getText());
1978 errln("Text: %s", str
);
1979 str
= toCharString(strsrch
->getPattern());
1980 errln("Pattern: %s", str
);
1981 errln("Error match found at %d %d",
1982 strsrch
->getMatchedStart(),
1983 strsrch
->getMatchedLength());
1986 matchindex
= search
.offset
[count
+ 1] == -1 ? -1 :
1987 search
.offset
[count
+ 2];
1988 if (search
.offset
[count
+ 1] != -1) {
1989 strsrch
->setOffset(search
.offset
[count
+ 1] + 1, status
);
1990 if (strsrch
->getOffset() != search
.offset
[count
+ 1] + 1) {
1991 errln("Error setting offset");
1998 strsrch
->next(status
);
1999 if (strsrch
->getMatchedStart() != USEARCH_DONE
) {
2000 char *str
= toCharString(strsrch
->getText());
2001 errln("Text: %s", str
);
2002 str
= toCharString(strsrch
->getPattern());
2003 errln("Pattern: %s", str
);
2004 errln("Error match found at %d %d", strsrch
->getMatchedStart(),
2005 strsrch
->getMatchedLength());
2012 void StringSearchTest::TestSupplementaryCanonical()
2015 while (SUPPLEMENTARYCANONICAL
[count
].text
!= NULL
) {
2016 if (!assertCanonicalEqual(&SUPPLEMENTARYCANONICAL
[count
])) {
2017 errln("Error at test number %d", count
);
2023 void StringSearchTest::TestContractionCanonical()
2027 u_unescape(CONTRACTIONRULE
, temp
, 128);
2028 UnicodeString rules
;
2029 rules
.setTo(temp
, u_strlen(temp
));
2031 UErrorCode status
= U_ZERO_ERROR
;
2032 RuleBasedCollator
*collator
= new RuleBasedCollator(rules
,
2033 getECollationStrength(UCOL_TERTIARY
), UCOL_ON
, status
);
2034 if (U_FAILURE(status
)) {
2035 errln("Error opening collator %s", u_errorName(status
));
2037 UnicodeString
text("text");
2038 UnicodeString
pattern("pattern");
2039 StringSearch
*strsrch
= new StringSearch(pattern
, text
, collator
, NULL
,
2041 strsrch
->setAttribute(USEARCH_CANONICAL_MATCH
, USEARCH_ON
, status
);
2042 if (U_FAILURE(status
)) {
2043 errln("Error opening string search %s", u_errorName(status
));
2047 while (CONTRACTIONCANONICAL
[count
].text
!= NULL
) {
2048 u_unescape(CONTRACTIONCANONICAL
[count
].text
, temp
, 128);
2049 text
.setTo(temp
, u_strlen(temp
));
2050 u_unescape(CONTRACTIONCANONICAL
[count
].pattern
, temp
, 128);
2051 pattern
.setTo(temp
, u_strlen(temp
));
2052 strsrch
->setText(text
, status
);
2053 strsrch
->setPattern(pattern
, status
);
2054 if (!assertEqualWithStringSearch(strsrch
,
2055 &CONTRACTIONCANONICAL
[count
])) {
2056 errln("Error at test number %d", count
);
2064 void StringSearchTest::TestUClassID()
2066 char id
= *((char *)StringSearch::getStaticClassID());
2068 errln("Static class id for StringSearch should be 0");
2070 UErrorCode status
= U_ZERO_ERROR
;
2071 UnicodeString
text("text");
2072 UnicodeString
pattern("pattern");
2073 StringSearch
*strsrch
= new StringSearch(pattern
, text
, m_en_us_
, NULL
,
2075 id
= *((char *)strsrch
->getDynamicClassID());
2077 errln("Dynamic class id for StringSearch should be 0");
2082 class TestSearch
: public SearchIterator
2085 TestSearch(const TestSearch
&obj
);
2086 TestSearch(const UnicodeString
&text
,
2087 BreakIterator
*breakiter
,
2088 const UnicodeString
&pattern
);
2091 void setOffset(int32_t position
, UErrorCode
&status
);
2092 int32_t getOffset() const;
2093 SearchIterator
* safeClone() const;
2097 * ICU "poor man's RTTI", returns a UClassID for the actual class.
2101 virtual inline UClassID
getDynamicClassID() const { return getStaticClassID(); }
2104 * ICU "poor man's RTTI", returns a UClassID for this class.
2108 static inline UClassID
getStaticClassID() { return (UClassID
)&fgClassID
; }
2110 UBool
operator!=(const TestSearch
&that
) const;
2112 UnicodeString m_pattern_
;
2115 int32_t handleNext(int32_t position
, UErrorCode
&status
);
2116 int32_t handlePrev(int32_t position
, UErrorCode
&status
);
2117 TestSearch
& operator=(const TestSearch
&that
);
2122 * The address of this static class variable serves as this class's ID
2123 * for ICU "poor man's RTTI".
2125 static const char fgClassID
;
2129 const char TestSearch::fgClassID
=0;
2131 TestSearch::TestSearch(const TestSearch
&obj
) : SearchIterator(obj
)
2133 m_offset_
= obj
.m_offset_
;
2134 m_pattern_
= obj
.m_pattern_
;
2137 TestSearch::TestSearch(const UnicodeString
&text
,
2138 BreakIterator
*breakiter
,
2139 const UnicodeString
&pattern
) : SearchIterator()
2141 m_breakiterator_
= breakiter
;
2142 m_pattern_
= pattern
;
2145 m_pattern_
= pattern
;
2148 TestSearch::~TestSearch()
2153 void TestSearch::setOffset(int32_t position
, UErrorCode
&status
)
2155 if (position
>= 0 && position
<= m_text_
.length()) {
2156 m_offset_
= position
;
2159 status
= U_INDEX_OUTOFBOUNDS_ERROR
;
2163 int32_t TestSearch::getOffset() const
2168 SearchIterator
* TestSearch::safeClone() const
2170 return new TestSearch(m_text_
, m_breakiterator_
, m_pattern_
);
2173 UBool
TestSearch::operator!=(const TestSearch
&that
) const
2175 if (SearchIterator::operator !=(that
)) {
2178 return m_offset_
!= that
.m_offset_
|| m_pattern_
!= that
.m_pattern_
;
2181 int32_t TestSearch::handleNext(int32_t start
, UErrorCode
&status
)
2183 if(U_SUCCESS(status
)) {
2184 int match
= m_text_
.indexOf(m_pattern_
, start
);
2186 m_offset_
= m_text_
.length();
2187 setMatchStart(m_offset_
);
2189 return USEARCH_DONE
;
2191 setMatchStart(match
);
2193 setMatchLength(m_pattern_
.length());
2196 return USEARCH_DONE
;
2200 int32_t TestSearch::handlePrev(int32_t start
, UErrorCode
&status
)
2202 if(U_SUCCESS(status
)) {
2203 int match
= m_text_
.lastIndexOf(m_pattern_
, 0, start
);
2206 setMatchStart(m_offset_
);
2208 return USEARCH_DONE
;
2210 setMatchStart(match
);
2212 setMatchLength(m_pattern_
.length());
2215 return USEARCH_DONE
;
2219 TestSearch
& TestSearch::operator=(const TestSearch
&that
)
2221 SearchIterator::operator=(that
);
2222 m_offset_
= that
.m_offset_
;
2223 m_pattern_
= that
.m_pattern_
;
2227 void StringSearchTest::TestSubclass()
2229 UnicodeString
text("abc abcd abc");
2230 UnicodeString
pattern("abc");
2231 TestSearch
search(text
, NULL
, pattern
);
2232 TestSearch
search2(search
);
2233 int expected
[] = {0, 4, 9};
2234 UErrorCode status
= U_ZERO_ERROR
;
2236 StringCharacterIterator
chariter(text
);
2238 search
.setText(text
, status
);
2239 if (search
.getText() != search2
.getText()) {
2240 errln("Error setting text");
2243 search
.setText(chariter
, status
);
2244 if (search
.getText() != search2
.getText()) {
2245 errln("Error setting text");
2249 // comparing constructors
2251 for (i
= 0; i
< (int)(sizeof(expected
) / sizeof(expected
[0])); i
++) {
2252 if (search
.next(status
) != expected
[i
]) {
2253 errln("Error getting next match");
2255 if (search
.getMatchedLength() != search
.m_pattern_
.length()) {
2256 errln("Error getting next match length");
2259 if (search
.next(status
) != USEARCH_DONE
) {
2260 errln("Error should have reached the end of the iteration");
2262 for (i
= sizeof(expected
) / sizeof(expected
[0]) - 1; i
>= 0; i
--) {
2263 if (search
.previous(status
) != expected
[i
]) {
2264 errln("Error getting previous match");
2266 if (search
.getMatchedLength() != search
.m_pattern_
.length()) {
2267 errln("Error getting previous match length");
2270 if (search
.previous(status
) != USEARCH_DONE
) {
2271 errln("Error should have reached the start of the iteration");
2275 #endif /* #if !UCONFIG_NO_COLLATION */