1 /********************************************************************
2 * Copyright (c) 2001-2011 International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************
6 * Modification History:
7 * Name Date Description
8 * synwee July 19 2001 creation
9 ********************************************************************/
11 #include "unicode/utypes.h"
13 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO
15 #include "unicode/usearch.h"
16 #include "unicode/ustring.h"
21 #include "unicode/ubrk.h"
24 static UBool TOCLOSE_
= TRUE
;
25 static UCollator
*EN_US_
;
26 static UCollator
*FR_FR_
;
27 static UCollator
*DE_
;
28 static UCollator
*ES_
;
31 * CHECK_BREAK(char *brk)
32 * Test if a break iterator is passed in AND break iteration is disabled.
33 * Skip the test if so.
34 * CHECK_BREAK_BOOL(char *brk)
35 * Same as above, but returns 'TRUE' as a passing result
38 #if !UCONFIG_NO_BREAK_ITERATION
39 static UBreakIterator
*EN_WORDBREAKER_
;
40 static UBreakIterator
*EN_CHARACTERBREAKER_
;
41 #define CHECK_BREAK(x)
42 #define CHECK_BREAK_BOOL(x)
44 #define CHECK_BREAK(x) if(x) { log_info("Skipping test on %s:%d because UCONFIG_NO_BREAK_ITERATION is on\n", __FILE__, __LINE__); return; }
45 #define CHECK_BREAK_BOOL(x) if(x) { log_info("Skipping test on %s:%d because UCONFIG_NO_BREAK_ITERATION is on\n", __FILE__, __LINE__); return TRUE; }
49 * Opening all static collators and break iterators
51 static void open(UErrorCode
* status
)
55 int32_t rulelength
= 0;
56 *status
= U_ZERO_ERROR
;
58 EN_US_
= ucol_open("en_US", status
);
59 if(U_FAILURE(*status
)) {
60 log_err_status(*status
, "Error opening collator\n");
63 FR_FR_
= ucol_open("fr_FR", status
);
64 DE_
= ucol_open("de_DE", status
);
65 ES_
= ucol_open("es_ES", status
);
67 u_strcpy(rules
, ucol_getRules(DE_
, &rulelength
));
68 u_unescape(EXTRACOLLATIONRULE
, rules
+ rulelength
, 1024 - rulelength
);
72 DE_
= ucol_openRules(rules
, u_strlen(rules
), UCOL_ON
, UCOL_TERTIARY
,
73 (UParseError
*)NULL
, status
);
74 u_strcpy(rules
, ucol_getRules(ES_
, &rulelength
));
75 u_unescape(EXTRACOLLATIONRULE
, rules
+ rulelength
, 1024 - rulelength
);
78 ES_
= ucol_openRules(rules
, u_strlen(rules
), UCOL_ON
, UCOL_TERTIARY
,
80 #if !UCONFIG_NO_BREAK_ITERATION
81 EN_WORDBREAKER_
= ubrk_open(UBRK_WORD
, "en_US", NULL
, 0, status
);
82 EN_CHARACTERBREAKER_
= ubrk_open(UBRK_CHARACTER
, "en_US", NULL
, 0,
90 * Start opening all static collators and break iterators
92 static void TestStart(void)
94 UErrorCode status
= U_ZERO_ERROR
;
96 if (U_FAILURE(status
)) {
97 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
104 * Closing all static collators and break iterators
106 static void close(void)
113 #if !UCONFIG_NO_BREAK_ITERATION
114 ubrk_close(EN_WORDBREAKER_
);
115 ubrk_close(EN_CHARACTERBREAKER_
);
122 * End closing all static collators and break iterators
124 static void TestEnd(void)
132 * output UChar strings for printing.
134 static char *toCharString(const UChar
* unichars
)
136 static char result
[1024];
139 int length
= u_strlen(unichars
);
141 for (; count
< length
; count
++) {
142 UChar ch
= unichars
[count
];
143 if (ch
>= 0x20 && ch
<= 0x7e) {
147 sprintf(temp
, "\\u%04x", ch
);
148 temp
+= 6; /* \uxxxx */
157 * Getting the collator
159 static UCollator
*getCollator(const char *collator
)
161 if (collator
== NULL
) {
164 if (strcmp(collator
, "fr") == 0) {
167 else if (strcmp(collator
, "de") == 0) {
170 else if (strcmp(collator
, "es") == 0) {
179 * Getting the breakiterator
181 static UBreakIterator
*getBreakIterator(const char *breaker
)
183 if (breaker
== NULL
) {
186 #if !UCONFIG_NO_BREAK_ITERATION
187 if (strcmp(breaker
, "wordbreaker") == 0) {
188 return EN_WORDBREAKER_
;
191 return EN_CHARACTERBREAKER_
;
198 static void TestOpenClose(void)
200 UErrorCode status
= U_ZERO_ERROR
;
201 UStringSearch
*result
;
202 const UChar pattern
[] = {0x61, 0x62, 0x63, 0x64, 0x65, 0x66};
203 const UChar text
[] = {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67};
204 #if !UCONFIG_NO_BREAK_ITERATION
205 UBreakIterator
*breakiter
= ubrk_open(UBRK_WORD
, "en_US",
208 /* testing null arguments */
209 result
= usearch_open(NULL
, 0, NULL
, 0, NULL
, NULL
, &status
);
210 if (U_SUCCESS(status
) || result
!= NULL
) {
211 log_err("Error: NULL arguments should produce an error and a NULL result\n");
213 status
= U_ZERO_ERROR
;
214 result
= usearch_openFromCollator(NULL
, 0, NULL
, 0, NULL
, NULL
, &status
);
215 if (U_SUCCESS(status
) || result
!= NULL
) {
216 log_err("Error: NULL arguments should produce an error and a NULL result\n");
219 status
= U_ZERO_ERROR
;
220 result
= usearch_open(pattern
, 3, NULL
, 0, NULL
, NULL
, &status
);
221 if (U_SUCCESS(status
) || result
!= NULL
) {
222 log_err("Error: NULL arguments should produce an error and a NULL result\n");
224 status
= U_ZERO_ERROR
;
225 result
= usearch_openFromCollator(pattern
, 3, NULL
, 0, NULL
, NULL
,
227 if (U_SUCCESS(status
) || result
!= NULL
) {
228 log_err("Error: NULL arguments should produce an error and a NULL result\n");
231 status
= U_ZERO_ERROR
;
232 result
= usearch_open(pattern
, 3, text
, 6, NULL
, NULL
, &status
);
233 if (U_SUCCESS(status
) || result
!= NULL
) {
234 log_err("Error: NULL arguments should produce an error and a NULL result\n");
236 status
= U_ZERO_ERROR
;
237 result
= usearch_openFromCollator(pattern
, 3, text
, 6, NULL
, NULL
,
239 if (U_SUCCESS(status
) || result
!= NULL
) {
240 log_err("Error: NULL arguments should produce an error and a NULL result\n");
243 status
= U_ZERO_ERROR
;
244 result
= usearch_open(pattern
, 3, text
, 6, "en_US", NULL
, &status
);
245 if (U_FAILURE(status
) || result
== NULL
) {
246 log_err_status(status
, "Error: NULL break iterator is valid for opening search\n");
249 usearch_close(result
);
252 if (U_FAILURE(status
)) {
253 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
256 status
= U_ZERO_ERROR
;
257 result
= usearch_openFromCollator(pattern
, 3, text
, 6, EN_US_
, NULL
,
259 if (U_FAILURE(status
) || result
== NULL
) {
260 if (EN_US_
== NULL
) {
261 log_data_err("Opening collator failed.\n");
263 log_err("Error: NULL break iterator is valid for opening search\n");
267 usearch_close(result
);
271 status
= U_ZERO_ERROR
;
272 #if !UCONFIG_NO_BREAK_ITERATION
274 result
= usearch_open(pattern
, 3, text
, 6, "en_US", breakiter
, &status
);
275 if (U_FAILURE(status
) || result
== NULL
) {
276 log_err_status(status
, "Error: Break iterator is valid for opening search\n");
279 usearch_close(result
);
281 status
= U_ZERO_ERROR
;
282 result
= usearch_openFromCollator(pattern
, 3, text
, 6, EN_US_
, breakiter
,
284 if (U_FAILURE(status
) || result
== NULL
) {
285 if (EN_US_
== NULL
) {
286 log_data_err("Opening collator failed.\n");
288 log_err("Error: Break iterator is valid for opening search\n");
292 usearch_close(result
);
294 ubrk_close(breakiter
);
299 static void TestInitialization(void)
301 UErrorCode status
= U_ZERO_ERROR
;
303 const UChar text
[] = {0x61, 0x62, 0x63, 0x64, 0x65, 0x66};
305 UStringSearch
*result
;
307 /* simple test on the pattern ce construction */
311 if (U_FAILURE(status
)) {
312 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
315 result
= usearch_openFromCollator(pattern
, 2, text
, 3, EN_US_
, NULL
,
317 if (U_FAILURE(status
)) {
318 log_err("Error opening search %s\n", u_errorName(status
));
320 usearch_close(result
);
322 /* testing if an extremely large pattern will fail the initialization */
323 for(i
= 0; i
< 512; i
++) {
326 /*uprv_memset(pattern, 0x41, 512);*/
327 result
= usearch_openFromCollator(pattern
, 512, text
, 3, EN_US_
, NULL
,
329 if (U_FAILURE(status
)) {
330 log_err("Error opening search %s\n", u_errorName(status
));
332 usearch_close(result
);
336 static UBool
assertEqualWithUStringSearch( UStringSearch
*strsrch
,
337 const SearchData search
)
340 UErrorCode status
= U_ZERO_ERROR
;
341 int32_t matchindex
= search
.offset
[count
];
343 UChar matchtext
[128];
348 usearch_setAttribute(strsrch
, USEARCH_ELEMENT_COMPARISON
, search
.elemCompare
, &status
);
349 if (U_FAILURE(status
)) {
350 log_err("Error setting USEARCH_ELEMENT_COMPARISON attribute %s\n", u_errorName(status
));
354 if (usearch_getMatchedStart(strsrch
) != USEARCH_DONE
||
355 usearch_getMatchedLength(strsrch
) != 0) {
356 log_err("Error with the initialization of match start and length\n");
358 /* start of next matches */
359 while (U_SUCCESS(status
) && matchindex
>= 0) {
360 matchlength
= search
.size
[count
];
361 usearch_next(strsrch
, &status
);
362 if (matchindex
!= usearch_getMatchedStart(strsrch
) ||
363 matchlength
!= (uint32_t)usearch_getMatchedLength(strsrch
)) {
364 char *str
= toCharString(usearch_getText(strsrch
, &textlength
));
365 log_err("Text: %s\n", str
);
366 str
= toCharString(usearch_getPattern(strsrch
, &textlength
));
367 log_err("Pattern: %s\n", str
);
368 log_err("Error next match found at idx %d (len:%d); expected %d (len:%d)\n",
369 usearch_getMatchedStart(strsrch
), usearch_getMatchedLength(strsrch
),
370 matchindex
, matchlength
);
375 if (usearch_getMatchedText(strsrch
, matchtext
, 128, &status
) !=
376 (int32_t) matchlength
|| U_FAILURE(status
) ||
378 usearch_getText(strsrch
, &textlength
) + matchindex
,
379 matchlength
* sizeof(UChar
)) != 0) {
380 log_err("Error getting next matched text\n");
383 matchindex
= search
.offset
[count
];
385 usearch_next(strsrch
, &status
);
386 if (usearch_getMatchedStart(strsrch
) != USEARCH_DONE
||
387 usearch_getMatchedLength(strsrch
) != 0) {
388 char *str
= toCharString(usearch_getText(strsrch
, &textlength
));
389 log_err("Text: %s\n", str
);
390 str
= toCharString(usearch_getPattern(strsrch
, &textlength
));
391 log_err("Pattern: %s\n", str
);
392 log_err("Error next match found at %d (len:%d); expected <NO MATCH>\n",
393 usearch_getMatchedStart(strsrch
),
394 usearch_getMatchedLength(strsrch
));
397 /* start of previous matches */
398 count
= count
== 0 ? 0 : count
- 1;
399 matchindex
= search
.offset
[count
];
401 while (U_SUCCESS(status
) && matchindex
>= 0) {
402 matchlength
= search
.size
[count
];
403 usearch_previous(strsrch
, &status
);
404 if (matchindex
!= usearch_getMatchedStart(strsrch
) ||
405 matchlength
!= (uint32_t)usearch_getMatchedLength(strsrch
)) {
406 char *str
= toCharString(usearch_getText(strsrch
, &textlength
));
407 log_err("Text: %s\n", str
);
408 str
= toCharString(usearch_getPattern(strsrch
, &textlength
));
409 log_err("Pattern: %s\n", str
);
410 log_err("Error previous match found at %d (len:%d); expected %d (len:%d)\n",
411 usearch_getMatchedStart(strsrch
), usearch_getMatchedLength(strsrch
),
412 matchindex
, matchlength
);
416 if (usearch_getMatchedText(strsrch
, matchtext
, 128, &status
) !=
417 (int32_t) matchlength
|| U_FAILURE(status
) ||
419 usearch_getText(strsrch
, &textlength
) + matchindex
,
420 matchlength
* sizeof(UChar
)) != 0) {
421 log_err("Error getting previous matched text\n");
424 matchindex
= count
> 0 ? search
.offset
[count
- 1] : -1;
427 usearch_previous(strsrch
, &status
);
428 if (usearch_getMatchedStart(strsrch
) != USEARCH_DONE
||
429 usearch_getMatchedLength(strsrch
) != 0) {
430 char *str
= toCharString(usearch_getText(strsrch
, &textlength
));
431 log_err("Text: %s\n", str
);
432 str
= toCharString(usearch_getPattern(strsrch
, &textlength
));
433 log_err("Pattern: %s\n", str
);
434 log_err("Error previous match found at %d (len:%d); expected <NO MATCH>\n",
435 usearch_getMatchedStart(strsrch
),
436 usearch_getMatchedLength(strsrch
));
441 isOverlap
= (usearch_getAttribute(strsrch
, USEARCH_OVERLAP
) == USEARCH_ON
);
443 /* start of following matches */
445 matchindex
= search
.offset
[count
];
449 usearch_following(strsrch
, nextStart
, &status
);
451 if (matchindex
< 0) {
452 if (usearch_getMatchedStart(strsrch
) != USEARCH_DONE
|| usearch_getMatchedLength(strsrch
) != 0) {
453 char *str
= toCharString(usearch_getText(strsrch
, &textlength
));
454 log_err("Text: %s\n", str
);
455 str
= toCharString(usearch_getPattern(strsrch
, &textlength
));
456 log_err("Pattern: %s\n", str
);
457 log_err("Error following match starting at %d (overlap:%d) found at %d (len:%d); expected <NO MATCH>\n",
458 nextStart
, isOverlap
,
459 usearch_getMatchedStart(strsrch
),
460 usearch_getMatchedLength(strsrch
));
463 /* no more matches */
467 matchlength
= search
.size
[count
];
468 if (usearch_getMatchedStart(strsrch
) != matchindex
469 || usearch_getMatchedLength(strsrch
) != matchlength
470 || U_FAILURE(status
)) {
471 char *str
= toCharString(usearch_getText(strsrch
, &textlength
));
472 log_err("Text: %s\n", str
);
473 str
= toCharString(usearch_getPattern(strsrch
, &textlength
));
474 log_err("Pattern: %s\n", str
);
475 log_err("Error following match starting at %d (overlap: %d) found at %d (len:%d); expected %d (len:%d)\n",
476 nextStart
, isOverlap
,
477 usearch_getMatchedStart(strsrch
), usearch_getMatchedLength(strsrch
),
478 matchindex
, matchlength
);
482 if (isOverlap
|| usearch_getMatchedLength(strsrch
) == 0) {
483 nextStart
= usearch_getMatchedStart(strsrch
) + 1;
485 nextStart
= usearch_getMatchedStart(strsrch
) + usearch_getMatchedLength(strsrch
);
489 matchindex
= search
.offset
[count
];
492 /* start of preceding matches */
493 count
= -1; /* last non-negative offset index, could be -1 if no match */
494 while (search
.offset
[count
+ 1] >= 0) {
497 usearch_getText(strsrch
, &nextStart
);
500 usearch_preceding(strsrch
, nextStart
, &status
);
503 if (usearch_getMatchedStart(strsrch
) != USEARCH_DONE
|| usearch_getMatchedLength(strsrch
) != 0) {
504 char *str
= toCharString(usearch_getText(strsrch
, &textlength
));
505 log_err("Text: %s\n", str
);
506 str
= toCharString(usearch_getPattern(strsrch
, &textlength
));
507 log_err("Pattern: %s\n", str
);
508 log_err("Error preceding match starting at %d (overlap: %d) found at %d (len:%d); expected <NO MATCH>\n",
509 nextStart
, isOverlap
,
510 usearch_getMatchedStart(strsrch
),
511 usearch_getMatchedLength(strsrch
));
514 /* no more matches */
518 matchindex
= search
.offset
[count
];
519 matchlength
= search
.size
[count
];
520 if (usearch_getMatchedStart(strsrch
) != matchindex
521 || usearch_getMatchedLength(strsrch
) != matchlength
522 || U_FAILURE(status
)) {
523 char *str
= toCharString(usearch_getText(strsrch
, &textlength
));
524 log_err("Text: %s\n", str
);
525 str
= toCharString(usearch_getPattern(strsrch
, &textlength
));
526 log_err("Pattern: %s\n", str
);
527 log_err("Error preceding match starting at %d (overlap: %d) found at %d (len:%d); expected %d (len:%d)\n",
528 nextStart
, isOverlap
,
529 usearch_getMatchedStart(strsrch
), usearch_getMatchedLength(strsrch
),
530 matchindex
, matchlength
);
534 nextStart
= matchindex
;
538 usearch_setAttribute(strsrch
, USEARCH_ELEMENT_COMPARISON
, USEARCH_STANDARD_ELEMENT_COMPARISON
, &status
);
542 static UBool
assertEqual(const SearchData search
)
544 UErrorCode status
= U_ZERO_ERROR
;
547 UCollator
*collator
= getCollator(search
.collator
);
548 UBreakIterator
*breaker
= getBreakIterator(search
.breaker
);
549 UStringSearch
*strsrch
;
551 CHECK_BREAK_BOOL(search
.breaker
);
553 u_unescape(search
.text
, text
, 128);
554 u_unescape(search
.pattern
, pattern
, 32);
555 ucol_setStrength(collator
, search
.strength
);
556 strsrch
= usearch_openFromCollator(pattern
, -1, text
, -1, collator
,
558 if (U_FAILURE(status
)) {
559 log_err("Error opening string search %s\n", u_errorName(status
));
563 if (!assertEqualWithUStringSearch(strsrch
, search
)) {
564 ucol_setStrength(collator
, UCOL_TERTIARY
);
565 usearch_close(strsrch
);
568 ucol_setStrength(collator
, UCOL_TERTIARY
);
569 usearch_close(strsrch
);
573 static UBool
assertCanonicalEqual(const SearchData search
)
575 UErrorCode status
= U_ZERO_ERROR
;
578 UCollator
*collator
= getCollator(search
.collator
);
579 UBreakIterator
*breaker
= getBreakIterator(search
.breaker
);
580 UStringSearch
*strsrch
;
583 CHECK_BREAK_BOOL(search
.breaker
);
584 u_unescape(search
.text
, text
, 128);
585 u_unescape(search
.pattern
, pattern
, 32);
586 ucol_setStrength(collator
, search
.strength
);
587 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
588 strsrch
= usearch_openFromCollator(pattern
, -1, text
, -1, collator
,
590 usearch_setAttribute(strsrch
, USEARCH_CANONICAL_MATCH
, USEARCH_ON
,
592 if (U_FAILURE(status
)) {
593 log_err("Error opening string search %s\n", u_errorName(status
));
598 if (!assertEqualWithUStringSearch(strsrch
, search
)) {
599 ucol_setStrength(collator
, UCOL_TERTIARY
);
600 usearch_close(strsrch
);
606 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &status
);
607 ucol_setStrength(collator
, UCOL_TERTIARY
);
608 usearch_close(strsrch
);
612 static UBool
assertEqualWithAttribute(const SearchData search
,
613 USearchAttributeValue canonical
,
614 USearchAttributeValue overlap
)
616 UErrorCode status
= U_ZERO_ERROR
;
619 UCollator
*collator
= getCollator(search
.collator
);
620 UBreakIterator
*breaker
= getBreakIterator(search
.breaker
);
621 UStringSearch
*strsrch
;
623 CHECK_BREAK_BOOL(search
.breaker
);
624 u_unescape(search
.text
, text
, 128);
625 u_unescape(search
.pattern
, pattern
, 32);
626 ucol_setStrength(collator
, search
.strength
);
627 strsrch
= usearch_openFromCollator(pattern
, -1, text
, -1, collator
,
629 usearch_setAttribute(strsrch
, USEARCH_CANONICAL_MATCH
, canonical
,
631 usearch_setAttribute(strsrch
, USEARCH_OVERLAP
, overlap
, &status
);
633 if (U_FAILURE(status
)) {
634 log_err("Error opening string search %s\n", u_errorName(status
));
638 if (!assertEqualWithUStringSearch(strsrch
, search
)) {
639 ucol_setStrength(collator
, UCOL_TERTIARY
);
640 usearch_close(strsrch
);
643 ucol_setStrength(collator
, UCOL_TERTIARY
);
644 usearch_close(strsrch
);
648 static void TestBasic(void)
651 UErrorCode status
= U_ZERO_ERROR
;
653 if (U_FAILURE(status
)) {
654 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
657 while (BASIC
[count
].text
!= NULL
) {
658 if (!assertEqual(BASIC
[count
])) {
659 log_err("Error at test number %d\n", count
);
666 static void TestNormExact(void)
669 UErrorCode status
= U_ZERO_ERROR
;
671 if (U_FAILURE(status
)) {
672 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
675 ucol_setAttribute(EN_US_
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
676 if (U_FAILURE(status
)) {
677 log_err("Error setting collation normalization %s\n",
678 u_errorName(status
));
680 while (BASIC
[count
].text
!= NULL
) {
681 if (!assertEqual(BASIC
[count
])) {
682 log_err("Error at test number %d\n", count
);
687 while (NORMEXACT
[count
].text
!= NULL
) {
688 if (!assertEqual(NORMEXACT
[count
])) {
689 log_err("Error at test number %d\n", count
);
693 ucol_setAttribute(EN_US_
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &status
);
695 while (NONNORMEXACT
[count
].text
!= NULL
) {
696 if (!assertEqual(NONNORMEXACT
[count
])) {
697 log_err("Error at test number %d\n", count
);
704 static void TestStrength(void)
707 UErrorCode status
= U_ZERO_ERROR
;
709 if (U_FAILURE(status
)) {
710 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
713 while (STRENGTH
[count
].text
!= NULL
) {
714 if (!assertEqual(STRENGTH
[count
])) {
715 log_err("Error at test number %d\n", count
);
722 static void TestBreakIterator(void) {
723 UErrorCode status
= U_ZERO_ERROR
;
724 UStringSearch
*strsrch
;
731 #if !UCONFIG_NO_BREAK_ITERATION
733 if (U_FAILURE(status
)) {
734 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
737 if (usearch_getBreakIterator(NULL
) != NULL
) {
738 log_err("Expected NULL breakiterator from NULL string search\n");
740 u_unescape(BREAKITERATOREXACT
[0].text
, text
, 128);
741 u_unescape(BREAKITERATOREXACT
[0].pattern
, pattern
, 32);
742 strsrch
= usearch_openFromCollator(pattern
, -1, text
, -1, EN_US_
, NULL
,
744 if (U_FAILURE(status
)) {
745 log_err("Error opening string search %s\n", u_errorName(status
));
746 goto ENDTESTBREAKITERATOR
;
749 usearch_setBreakIterator(strsrch
, NULL
, &status
);
750 if (U_FAILURE(status
) || usearch_getBreakIterator(strsrch
) != NULL
) {
751 log_err("Error usearch_getBreakIterator returned wrong object");
752 goto ENDTESTBREAKITERATOR
;
755 usearch_setBreakIterator(strsrch
, EN_CHARACTERBREAKER_
, &status
);
756 if (U_FAILURE(status
) ||
757 usearch_getBreakIterator(strsrch
) != EN_CHARACTERBREAKER_
) {
758 log_err("Error usearch_getBreakIterator returned wrong object");
759 goto ENDTESTBREAKITERATOR
;
762 usearch_setBreakIterator(strsrch
, EN_WORDBREAKER_
, &status
);
763 if (U_FAILURE(status
) ||
764 usearch_getBreakIterator(strsrch
) != EN_WORDBREAKER_
) {
765 log_err("Error usearch_getBreakIterator returned wrong object");
766 goto ENDTESTBREAKITERATOR
;
769 usearch_close(strsrch
);
773 /* 0-3 test are fixed */
774 const SearchData
*search
= &(BREAKITERATOREXACT
[count
]);
775 UCollator
*collator
= getCollator(search
->collator
);
776 UBreakIterator
*breaker
= getBreakIterator(search
->breaker
);
778 u_unescape(search
->text
, text
, 128);
779 u_unescape(search
->pattern
, pattern
, 32);
780 ucol_setStrength(collator
, search
->strength
);
782 strsrch
= usearch_openFromCollator(pattern
, -1, text
, -1, collator
,
784 if (U_FAILURE(status
) ||
785 usearch_getBreakIterator(strsrch
) != breaker
) {
786 log_err("Error setting break iterator\n");
787 if (strsrch
!= NULL
) {
788 usearch_close(strsrch
);
791 if (!assertEqualWithUStringSearch(strsrch
, *search
)) {
792 ucol_setStrength(collator
, UCOL_TERTIARY
);
793 usearch_close(strsrch
);
794 goto ENDTESTBREAKITERATOR
;
796 search
= &(BREAKITERATOREXACT
[count
+ 1]);
797 breaker
= getBreakIterator(search
->breaker
);
798 usearch_setBreakIterator(strsrch
, breaker
, &status
);
799 if (U_FAILURE(status
) || usearch_getBreakIterator(strsrch
) != breaker
) {
800 log_err("Error setting break iterator\n");
801 usearch_close(strsrch
);
802 goto ENDTESTBREAKITERATOR
;
804 usearch_reset(strsrch
);
805 if (!assertEqualWithUStringSearch(strsrch
, *search
)) {
806 log_err("Error at test number %d\n", count
);
807 usearch_close(strsrch
);
808 goto ENDTESTBREAKITERATOR
;
810 usearch_close(strsrch
);
814 while (BREAKITERATOREXACT
[count
].text
!= NULL
) {
815 if (!assertEqual(BREAKITERATOREXACT
[count
])) {
816 log_err("Error at test number %d\n", count
);
817 goto ENDTESTBREAKITERATOR
;
822 ENDTESTBREAKITERATOR
:
827 static void TestVariable(void)
830 UErrorCode status
= U_ZERO_ERROR
;
832 if (U_FAILURE(status
)) {
833 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
836 ucol_setAttribute(EN_US_
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
837 if (U_FAILURE(status
)) {
838 log_err("Error setting collation alternate attribute %s\n",
839 u_errorName(status
));
841 while (VARIABLE
[count
].text
!= NULL
) {
842 log_verbose("variable %d\n", count
);
843 if (!assertEqual(VARIABLE
[count
])) {
844 log_err("Error at test number %d\n", count
);
848 ucol_setAttribute(EN_US_
, UCOL_ALTERNATE_HANDLING
,
849 UCOL_NON_IGNORABLE
, &status
);
853 static void TestOverlap(void)
856 UErrorCode status
= U_ZERO_ERROR
;
858 if (U_FAILURE(status
)) {
859 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
862 while (OVERLAP
[count
].text
!= NULL
) {
863 if (!assertEqualWithAttribute(OVERLAP
[count
], USEARCH_OFF
,
865 log_err("Error at overlap test number %d\n", count
);
870 while (NONOVERLAP
[count
].text
!= NULL
) {
871 if (!assertEqual(NONOVERLAP
[count
])) {
872 log_err("Error at non overlap test number %d\n", count
);
881 const SearchData
*search
= &(OVERLAP
[count
]);
882 UCollator
*collator
= getCollator(search
->collator
);
883 UStringSearch
*strsrch
;
884 status
= U_ZERO_ERROR
;
886 u_unescape(search
->text
, text
, 128);
887 u_unescape(search
->pattern
, pattern
, 32);
888 strsrch
= usearch_openFromCollator(pattern
, -1, text
, -1, collator
,
890 if(status
== U_FILE_ACCESS_ERROR
) {
891 log_data_err("Is your data around?\n");
893 } else if(U_FAILURE(status
)) {
894 log_err("Error opening searcher\n");
897 usearch_setAttribute(strsrch
, USEARCH_OVERLAP
, USEARCH_ON
, &status
);
898 if (U_FAILURE(status
) ||
899 usearch_getAttribute(strsrch
, USEARCH_OVERLAP
) != USEARCH_ON
) {
900 log_err("Error setting overlap option\n");
902 if (!assertEqualWithUStringSearch(strsrch
, *search
)) {
903 usearch_close(strsrch
);
906 search
= &(NONOVERLAP
[count
]);
907 usearch_setAttribute(strsrch
, USEARCH_OVERLAP
, USEARCH_OFF
, &status
);
908 if (U_FAILURE(status
) ||
909 usearch_getAttribute(strsrch
, USEARCH_OVERLAP
) != USEARCH_OFF
) {
910 log_err("Error setting overlap option\n");
912 usearch_reset(strsrch
);
913 if (!assertEqualWithUStringSearch(strsrch
, *search
)) {
914 usearch_close(strsrch
);
915 log_err("Error at test number %d\n", count
);
919 usearch_close(strsrch
);
924 static void TestCollator(void)
926 /* test collator that thinks "o" and "p" are the same thing */
928 UCollator
*tailored
= NULL
;
929 UErrorCode status
= U_ZERO_ERROR
;
932 UStringSearch
*strsrch
;
941 strsrch
= usearch_open(pattern
, 2, text
, 5, "en_US", NULL
, &status
);
942 if(status
== U_FILE_ACCESS_ERROR
) {
943 log_data_err("Is your data around?\n");
945 } else if(U_FAILURE(status
)) {
946 log_err("Error opening searcher\n");
949 tailored
= usearch_getCollator(strsrch
);
950 if (usearch_next(strsrch
, &status
) != -1) {
951 log_err("Error: Found case insensitive match, when we shouldn't\n");
953 ucol_setStrength(tailored
, UCOL_PRIMARY
);
954 usearch_reset(strsrch
);
955 if (usearch_next(strsrch
, &status
) != 1) {
956 log_err("Error: Found case insensitive match not found\n");
958 usearch_close(strsrch
);
962 if (usearch_getCollator(NULL
) != NULL
) {
963 log_err("Expected NULL collator from NULL string search\n");
965 u_unescape(COLLATOR
[0].text
, text
, 128);
966 u_unescape(COLLATOR
[0].pattern
, pattern
, 32);
968 strsrch
= usearch_openFromCollator(pattern
, -1, text
, -1, EN_US_
,
970 if (U_FAILURE(status
)) {
971 log_err("Error opening string search %s\n", u_errorName(status
));
973 if (!assertEqualWithUStringSearch(strsrch
, COLLATOR
[0])) {
974 goto ENDTESTCOLLATOR
;
977 u_unescape(TESTCOLLATORRULE
, rules
, 32);
978 tailored
= ucol_openRules(rules
, -1, UCOL_ON
, COLLATOR
[1].strength
,
980 if (U_FAILURE(status
)) {
981 log_err("Error opening rule based collator %s\n", u_errorName(status
));
984 usearch_setCollator(strsrch
, tailored
, &status
);
985 if (U_FAILURE(status
) || usearch_getCollator(strsrch
) != tailored
) {
986 log_err("Error setting rule based collator\n");
988 usearch_reset(strsrch
);
989 if (!assertEqualWithUStringSearch(strsrch
, COLLATOR
[1])) {
990 goto ENDTESTCOLLATOR
;
993 usearch_setCollator(strsrch
, EN_US_
, &status
);
994 usearch_reset(strsrch
);
995 if (U_FAILURE(status
) || usearch_getCollator(strsrch
) != EN_US_
) {
996 log_err("Error setting rule based collator\n");
998 if (!assertEqualWithUStringSearch(strsrch
, COLLATOR
[0])) {
999 goto ENDTESTCOLLATOR
;
1003 usearch_close(strsrch
);
1004 if (tailored
!= NULL
) {
1005 ucol_close(tailored
);
1010 static void TestPattern(void)
1012 UStringSearch
*strsrch
;
1014 UChar bigpattern
[512];
1018 UErrorCode status
= U_ZERO_ERROR
;
1021 if (U_FAILURE(status
)) {
1022 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
1025 if (usearch_getPattern(NULL
, &templength
) != NULL
) {
1026 log_err("Error NULL string search expected returning NULL pattern\n");
1028 usearch_setPattern(NULL
, pattern
, 3, &status
);
1029 if (U_SUCCESS(status
)) {
1030 log_err("Error expected setting pattern in NULL strings search\n");
1032 status
= U_ZERO_ERROR
;
1033 u_unescape(PATTERN
[0].text
, text
, 128);
1034 u_unescape(PATTERN
[0].pattern
, pattern
, 32);
1036 ucol_setStrength(EN_US_
, PATTERN
[0].strength
);
1037 strsrch
= usearch_openFromCollator(pattern
, -1, text
, -1, EN_US_
,
1039 if(status
== U_FILE_ACCESS_ERROR
) {
1040 log_data_err("Is your data around?\n");
1042 } else if(U_FAILURE(status
)) {
1043 log_err("Error opening searcher\n");
1047 status
= U_ZERO_ERROR
;
1048 usearch_setPattern(strsrch
, NULL
, 3, &status
);
1049 if (U_SUCCESS(status
)) {
1050 log_err("Error expected setting NULL pattern in strings search\n");
1052 status
= U_ZERO_ERROR
;
1053 usearch_setPattern(strsrch
, pattern
, 0, &status
);
1054 if (U_SUCCESS(status
)) {
1055 log_err("Error expected setting pattern with length 0 in strings search\n");
1057 status
= U_ZERO_ERROR
;
1058 if (U_FAILURE(status
)) {
1059 log_err("Error opening string search %s\n", u_errorName(status
));
1060 goto ENDTESTPATTERN
;
1062 temp
= usearch_getPattern(strsrch
, &templength
);
1063 if (u_strcmp(pattern
, temp
) != 0) {
1064 log_err("Error setting pattern\n");
1066 if (!assertEqualWithUStringSearch(strsrch
, PATTERN
[0])) {
1067 goto ENDTESTPATTERN
;
1070 u_unescape(PATTERN
[1].pattern
, pattern
, 32);
1071 usearch_setPattern(strsrch
, pattern
, -1, &status
);
1072 temp
= usearch_getPattern(strsrch
, &templength
);
1073 if (u_strcmp(pattern
, temp
) != 0) {
1074 log_err("Error setting pattern\n");
1075 goto ENDTESTPATTERN
;
1077 usearch_reset(strsrch
);
1078 if (U_FAILURE(status
)) {
1079 log_err("Error setting pattern %s\n", u_errorName(status
));
1081 if (!assertEqualWithUStringSearch(strsrch
, PATTERN
[1])) {
1082 goto ENDTESTPATTERN
;
1085 u_unescape(PATTERN
[0].pattern
, pattern
, 32);
1086 usearch_setPattern(strsrch
, pattern
, -1, &status
);
1087 temp
= usearch_getPattern(strsrch
, &templength
);
1088 if (u_strcmp(pattern
, temp
) != 0) {
1089 log_err("Error setting pattern\n");
1090 goto ENDTESTPATTERN
;
1092 usearch_reset(strsrch
);
1093 if (U_FAILURE(status
)) {
1094 log_err("Error setting pattern %s\n", u_errorName(status
));
1096 if (!assertEqualWithUStringSearch(strsrch
, PATTERN
[0])) {
1097 goto ENDTESTPATTERN
;
1099 /* enormous pattern size to see if this crashes */
1100 for (templength
= 0; templength
!= 512; templength
++) {
1101 bigpattern
[templength
] = 0x61;
1103 bigpattern
[511] = 0;
1104 usearch_setPattern(strsrch
, bigpattern
, -1, &status
);
1105 if (U_FAILURE(status
)) {
1106 log_err("Error setting pattern with size 512, %s \n",
1107 u_errorName(status
));
1110 ucol_setStrength(EN_US_
, UCOL_TERTIARY
);
1111 if (strsrch
!= NULL
) {
1112 usearch_close(strsrch
);
1117 static void TestText(void)
1119 UStringSearch
*strsrch
;
1124 UErrorCode status
= U_ZERO_ERROR
;
1126 u_unescape(TEXT
[0].text
, text
, 128);
1127 u_unescape(TEXT
[0].pattern
, pattern
, 32);
1130 if (U_FAILURE(status
)) {
1131 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
1135 if (usearch_getText(NULL
, &templength
) != NULL
) {
1136 log_err("Error NULL string search should return NULL text\n");
1139 usearch_setText(NULL
, text
, 10, &status
);
1140 if (U_SUCCESS(status
)) {
1141 log_err("Error NULL string search should have an error when setting text\n");
1144 status
= U_ZERO_ERROR
;
1145 strsrch
= usearch_openFromCollator(pattern
, -1, text
, -1, EN_US_
,
1148 if (U_FAILURE(status
)) {
1149 log_err("Error opening string search %s\n", u_errorName(status
));
1150 goto ENDTESTPATTERN
;
1152 temp
= usearch_getText(strsrch
, &templength
);
1153 if (u_strcmp(text
, temp
) != 0) {
1154 log_err("Error setting text\n");
1156 if (!assertEqualWithUStringSearch(strsrch
, TEXT
[0])) {
1157 goto ENDTESTPATTERN
;
1160 u_unescape(TEXT
[1].text
, text
, 32);
1161 usearch_setText(strsrch
, text
, -1, &status
);
1162 temp
= usearch_getText(strsrch
, &templength
);
1163 if (u_strcmp(text
, temp
) != 0) {
1164 log_err("Error setting text\n");
1165 goto ENDTESTPATTERN
;
1167 if (U_FAILURE(status
)) {
1168 log_err("Error setting text %s\n", u_errorName(status
));
1170 if (!assertEqualWithUStringSearch(strsrch
, TEXT
[1])) {
1171 goto ENDTESTPATTERN
;
1174 u_unescape(TEXT
[0].text
, text
, 32);
1175 usearch_setText(strsrch
, text
, -1, &status
);
1176 temp
= usearch_getText(strsrch
, &templength
);
1177 if (u_strcmp(text
, temp
) != 0) {
1178 log_err("Error setting text\n");
1179 goto ENDTESTPATTERN
;
1181 if (U_FAILURE(status
)) {
1182 log_err("Error setting pattern %s\n", u_errorName(status
));
1184 if (!assertEqualWithUStringSearch(strsrch
, TEXT
[0])) {
1185 goto ENDTESTPATTERN
;
1188 if (strsrch
!= NULL
) {
1189 usearch_close(strsrch
);
1194 static void TestCompositeBoundaries(void)
1197 UErrorCode status
= U_ZERO_ERROR
;
1199 if (U_FAILURE(status
)) {
1200 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
1203 while (COMPOSITEBOUNDARIES
[count
].text
!= NULL
) {
1204 log_verbose("composite %d\n", count
);
1205 if (!assertEqual(COMPOSITEBOUNDARIES
[count
])) {
1206 log_err("Error at test number %d\n", count
);
1213 static void TestGetSetOffset(void)
1215 int searchDataIndex
= 0;
1218 UErrorCode status
= U_ZERO_ERROR
;
1219 UStringSearch
*strsrch
;
1220 memset(pattern
, 0, 32*sizeof(UChar
));
1221 memset(text
, 0, 128*sizeof(UChar
));
1224 if (U_FAILURE(status
)) {
1225 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
1228 if (usearch_getOffset(NULL
) != USEARCH_DONE
) {
1229 log_err("usearch_getOffset(NULL) expected USEARCH_DONE\n");
1231 strsrch
= usearch_openFromCollator(pattern
, 16, text
, 32, EN_US_
, NULL
,
1233 /* testing out of bounds error */
1234 usearch_setOffset(strsrch
, -1, &status
);
1235 if (U_SUCCESS(status
)) {
1236 log_err("Error expecting set offset error\n");
1238 usearch_setOffset(strsrch
, 128, &status
);
1239 if (U_SUCCESS(status
)) {
1240 log_err("Error expecting set offset error\n");
1242 while (BASIC
[searchDataIndex
].text
!= NULL
) {
1244 SearchData search
= BASIC
[searchDataIndex
++];
1245 int32_t matchindex
= search
.offset
[count
];
1248 u_unescape(search
.text
, text
, 128);
1249 u_unescape(search
.pattern
, pattern
, 32);
1250 status
= U_ZERO_ERROR
;
1251 usearch_setText(strsrch
, text
, -1, &status
);
1252 usearch_setPattern(strsrch
, pattern
, -1, &status
);
1253 ucol_setStrength(usearch_getCollator(strsrch
), search
.strength
);
1254 usearch_reset(strsrch
);
1255 while (U_SUCCESS(status
) && matchindex
>= 0) {
1256 uint32_t matchlength
= search
.size
[count
];
1257 usearch_next(strsrch
, &status
);
1258 if (matchindex
!= usearch_getMatchedStart(strsrch
) ||
1259 matchlength
!= (uint32_t)usearch_getMatchedLength(strsrch
)) {
1260 char *str
= toCharString(usearch_getText(strsrch
,
1262 log_err("Text: %s\n", str
);
1263 str
= toCharString(usearch_getPattern(strsrch
, &textlength
));
1264 log_err("Pattern: %s\n", str
);
1265 log_err("Error match found at %d %d\n",
1266 usearch_getMatchedStart(strsrch
),
1267 usearch_getMatchedLength(strsrch
));
1270 usearch_setOffset(strsrch
, matchindex
+ matchlength
, &status
);
1271 usearch_previous(strsrch
, &status
);
1272 if (matchindex
!= usearch_getMatchedStart(strsrch
) ||
1273 matchlength
!= (uint32_t)usearch_getMatchedLength(strsrch
)) {
1274 char *str
= toCharString(usearch_getText(strsrch
,
1276 log_err("Text: %s\n", str
);
1277 str
= toCharString(usearch_getPattern(strsrch
, &textlength
));
1278 log_err("Pattern: %s\n", str
);
1279 log_err("Error match found at %d %d\n",
1280 usearch_getMatchedStart(strsrch
),
1281 usearch_getMatchedLength(strsrch
));
1284 usearch_setOffset(strsrch
, matchindex
+ matchlength
, &status
);
1285 matchindex
= search
.offset
[count
+ 1] == -1 ? -1 :
1286 search
.offset
[count
+ 2];
1287 if (search
.offset
[count
+ 1] != -1) {
1288 usearch_setOffset(strsrch
, search
.offset
[count
+ 1] + 1,
1290 if (usearch_getOffset(strsrch
) != search
.offset
[count
+ 1] + 1) {
1291 log_err("Error setting offset\n");
1298 usearch_next(strsrch
, &status
);
1299 if (usearch_getMatchedStart(strsrch
) != USEARCH_DONE
) {
1300 char *str
= toCharString(usearch_getText(strsrch
, &textlength
));
1301 log_err("Text: %s\n", str
);
1302 str
= toCharString(usearch_getPattern(strsrch
, &textlength
));
1303 log_err("Pattern: %s\n", str
);
1304 log_err("Error match found at %d %d\n",
1305 usearch_getMatchedStart(strsrch
),
1306 usearch_getMatchedLength(strsrch
));
1310 ucol_setStrength(usearch_getCollator(strsrch
), UCOL_TERTIARY
);
1311 usearch_close(strsrch
);
1315 static void TestGetSetAttribute(void)
1317 UErrorCode status
= U_ZERO_ERROR
;
1320 UStringSearch
*strsrch
;
1322 memset(pattern
, 0, 32*sizeof(UChar
));
1323 memset(text
, 0, 128*sizeof(UChar
));
1326 if (U_FAILURE(status
)) {
1327 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
1330 if (usearch_getAttribute(NULL
, USEARCH_OVERLAP
) != USEARCH_DEFAULT
||
1331 usearch_getAttribute(NULL
, USEARCH_CANONICAL_MATCH
) !=
1334 "Attributes for NULL string search should be USEARCH_DEFAULT\n");
1336 strsrch
= usearch_openFromCollator(pattern
, 16, text
, 32, EN_US_
, NULL
,
1338 if (U_FAILURE(status
)) {
1339 log_err("Error opening search %s\n", u_errorName(status
));
1343 usearch_setAttribute(strsrch
, USEARCH_OVERLAP
, USEARCH_DEFAULT
, &status
);
1344 if (U_FAILURE(status
) ||
1345 usearch_getAttribute(strsrch
, USEARCH_OVERLAP
) != USEARCH_OFF
) {
1346 log_err("Error setting overlap to the default\n");
1348 usearch_setAttribute(strsrch
, USEARCH_OVERLAP
, USEARCH_ON
, &status
);
1349 if (U_FAILURE(status
) ||
1350 usearch_getAttribute(strsrch
, USEARCH_OVERLAP
) != USEARCH_ON
) {
1351 log_err("Error setting overlap true\n");
1353 usearch_setAttribute(strsrch
, USEARCH_OVERLAP
, USEARCH_OFF
, &status
);
1354 if (U_FAILURE(status
) ||
1355 usearch_getAttribute(strsrch
, USEARCH_OVERLAP
) != USEARCH_OFF
) {
1356 log_err("Error setting overlap false\n");
1358 usearch_setAttribute(strsrch
, USEARCH_OVERLAP
,
1359 USEARCH_ATTRIBUTE_VALUE_COUNT
, &status
);
1360 if (U_SUCCESS(status
)) {
1361 log_err("Error setting overlap to illegal value\n");
1363 status
= U_ZERO_ERROR
;
1364 usearch_setAttribute(strsrch
, USEARCH_CANONICAL_MATCH
, USEARCH_DEFAULT
,
1366 if (U_FAILURE(status
) ||
1367 usearch_getAttribute(strsrch
, USEARCH_CANONICAL_MATCH
) !=
1369 log_err("Error setting canonical match to the default\n");
1371 usearch_setAttribute(strsrch
, USEARCH_CANONICAL_MATCH
, USEARCH_ON
,
1373 if (U_FAILURE(status
) ||
1374 usearch_getAttribute(strsrch
, USEARCH_CANONICAL_MATCH
) !=
1376 log_err("Error setting canonical match true\n");
1378 usearch_setAttribute(strsrch
, USEARCH_CANONICAL_MATCH
, USEARCH_OFF
,
1380 if (U_FAILURE(status
) ||
1381 usearch_getAttribute(strsrch
, USEARCH_CANONICAL_MATCH
) !=
1383 log_err("Error setting canonical match false\n");
1385 usearch_setAttribute(strsrch
, USEARCH_CANONICAL_MATCH
,
1386 USEARCH_ATTRIBUTE_VALUE_COUNT
, &status
);
1387 if (U_SUCCESS(status
)) {
1388 log_err("Error setting canonical match to illegal value\n");
1390 status
= U_ZERO_ERROR
;
1391 usearch_setAttribute(strsrch
, USEARCH_ATTRIBUTE_COUNT
, USEARCH_DEFAULT
,
1393 if (U_SUCCESS(status
)) {
1394 log_err("Error setting illegal attribute success\n");
1397 usearch_close(strsrch
);
1401 static void TestGetMatch(void)
1404 UErrorCode status
= U_ZERO_ERROR
;
1407 SearchData search
= MATCH
[0];
1408 int32_t matchindex
= search
.offset
[count
];
1409 UStringSearch
*strsrch
;
1411 UChar matchtext
[128];
1414 if (U_FAILURE(status
)) {
1415 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
1419 if (usearch_getMatchedStart(NULL
) != USEARCH_DONE
||
1420 usearch_getMatchedLength(NULL
) != USEARCH_DONE
) {
1422 "Expected start and length of NULL string search should be USEARCH_DONE\n");
1425 u_unescape(search
.text
, text
, 128);
1426 u_unescape(search
.pattern
, pattern
, 32);
1427 strsrch
= usearch_openFromCollator(pattern
, -1, text
, -1, EN_US_
,
1429 if (U_FAILURE(status
)) {
1430 log_err("Error opening string search %s\n", u_errorName(status
));
1431 if (strsrch
!= NULL
) {
1432 usearch_close(strsrch
);
1437 while (U_SUCCESS(status
) && matchindex
>= 0) {
1438 int32_t matchlength
= search
.size
[count
];
1439 usearch_next(strsrch
, &status
);
1440 if (matchindex
!= usearch_getMatchedStart(strsrch
) ||
1441 matchlength
!= usearch_getMatchedLength(strsrch
)) {
1442 char *str
= toCharString(usearch_getText(strsrch
, &textlength
));
1443 log_err("Text: %s\n", str
);
1444 str
= toCharString(usearch_getPattern(strsrch
, &textlength
));
1445 log_err("Pattern: %s\n", str
);
1446 log_err("Error match found at %d %d\n",
1447 usearch_getMatchedStart(strsrch
),
1448 usearch_getMatchedLength(strsrch
));
1453 status
= U_ZERO_ERROR
;
1454 if (usearch_getMatchedText(NULL
, matchtext
, 128, &status
) !=
1455 USEARCH_DONE
|| U_SUCCESS(status
)){
1456 log_err("Error expecting errors with NULL string search\n");
1458 status
= U_ZERO_ERROR
;
1459 if (usearch_getMatchedText(strsrch
, NULL
, 0, &status
) !=
1460 (int32_t)matchlength
|| U_SUCCESS(status
)){
1461 log_err("Error pre-flighting match length\n");
1463 status
= U_ZERO_ERROR
;
1464 if (usearch_getMatchedText(strsrch
, matchtext
, 0, &status
) !=
1465 (int32_t)matchlength
|| U_SUCCESS(status
)){
1466 log_err("Error getting match text with buffer size 0\n");
1468 status
= U_ZERO_ERROR
;
1469 if (usearch_getMatchedText(strsrch
, matchtext
, matchlength
, &status
)
1470 != (int32_t)matchlength
|| matchtext
[matchlength
- 1] == 0 ||
1472 log_err("Error getting match text with exact size\n");
1474 status
= U_ZERO_ERROR
;
1475 if (usearch_getMatchedText(strsrch
, matchtext
, 128, &status
) !=
1476 (int32_t) matchlength
|| U_FAILURE(status
) ||
1478 usearch_getText(strsrch
, &textlength
) + matchindex
,
1479 matchlength
* sizeof(UChar
)) != 0 ||
1480 matchtext
[matchlength
] != 0) {
1481 log_err("Error getting matched text\n");
1484 matchindex
= search
.offset
[count
];
1486 status
= U_ZERO_ERROR
;
1487 usearch_next(strsrch
, &status
);
1488 if (usearch_getMatchedStart(strsrch
) != USEARCH_DONE
||
1489 usearch_getMatchedLength(strsrch
) != 0) {
1490 log_err("Error end of match not found\n");
1492 status
= U_ZERO_ERROR
;
1493 if (usearch_getMatchedText(strsrch
, matchtext
, 128, &status
) !=
1495 log_err("Error getting null matches\n");
1497 usearch_close(strsrch
);
1501 static void TestSetMatch(void)
1504 UErrorCode status
= U_ZERO_ERROR
;
1506 if (U_FAILURE(status
)) {
1507 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
1510 while (MATCH
[count
].text
!= NULL
) {
1511 SearchData search
= MATCH
[count
];
1513 int offsetIndex
= 0;
1516 UStringSearch
*strsrch
;
1517 status
= U_ZERO_ERROR
;
1519 if (usearch_first(NULL
, &status
) != USEARCH_DONE
||
1520 usearch_last(NULL
, &status
) != USEARCH_DONE
) {
1521 log_err("Error getting the first and last match of a NULL string search\n");
1523 u_unescape(search
.text
, text
, 128);
1524 u_unescape(search
.pattern
, pattern
, 32);
1525 strsrch
= usearch_openFromCollator(pattern
, -1, text
, -1, EN_US_
,
1527 if (U_FAILURE(status
)) {
1528 log_err("Error opening string search %s\n", u_errorName(status
));
1529 if (strsrch
!= NULL
) {
1530 usearch_close(strsrch
);
1536 while (search
.offset
[size
] != -1) {
1540 if (usearch_first(strsrch
, &status
) != search
.offset
[0] ||
1541 U_FAILURE(status
)) {
1542 log_err("Error getting first match\n");
1544 if (usearch_last(strsrch
, &status
) != search
.offset
[size
-1] ||
1545 U_FAILURE(status
)) {
1546 log_err("Error getting last match\n");
1549 while (offsetIndex
< size
) {
1550 if (offsetIndex
+ 2 < size
) {
1551 if (usearch_following(strsrch
, search
.offset
[offsetIndex
+ 2] - 1,
1552 &status
) != search
.offset
[offsetIndex
+ 2] ||
1553 U_FAILURE(status
)) {
1554 log_err("Error getting following match at index %d\n",
1555 search
.offset
[offsetIndex
+ 2] - 1);
1558 if (offsetIndex
+ 1 < size
) {
1559 if (usearch_preceding(strsrch
, search
.offset
[offsetIndex
+ 1] +
1560 search
.size
[offsetIndex
+ 1] + 1,
1561 &status
) != search
.offset
[offsetIndex
+ 1] ||
1562 U_FAILURE(status
)) {
1563 log_err("Error getting preceeding match at index %d\n",
1564 search
.offset
[offsetIndex
+ 1] + 1);
1569 status
= U_ZERO_ERROR
;
1570 if (usearch_following(strsrch
, u_strlen(text
), &status
) !=
1572 log_err("Error expecting out of bounds match\n");
1574 if (usearch_preceding(strsrch
, 0, &status
) != USEARCH_DONE
) {
1575 log_err("Error expecting out of bounds match\n");
1578 usearch_close(strsrch
);
1583 static void TestReset(void)
1585 UErrorCode status
= U_ZERO_ERROR
;
1586 UChar text
[] = {0x66, 0x69, 0x73, 0x68, 0x20,
1587 0x66, 0x69, 0x73, 0x68};
1588 UChar pattern
[] = {0x73};
1589 UStringSearch
*strsrch
;
1592 if (U_FAILURE(status
)) {
1593 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
1596 strsrch
= usearch_openFromCollator(pattern
, 1, text
, 9,
1597 EN_US_
, NULL
, &status
);
1598 if (U_FAILURE(status
)) {
1599 log_err("Error opening string search %s\n", u_errorName(status
));
1600 if (strsrch
!= NULL
) {
1601 usearch_close(strsrch
);
1605 usearch_setAttribute(strsrch
, USEARCH_OVERLAP
, USEARCH_ON
, &status
);
1606 usearch_setAttribute(strsrch
, USEARCH_CANONICAL_MATCH
, USEARCH_ON
,
1608 usearch_setOffset(strsrch
, 9, &status
);
1609 if (U_FAILURE(status
)) {
1610 log_err("Error setting attributes and offsets\n");
1613 usearch_reset(strsrch
);
1614 if (usearch_getAttribute(strsrch
, USEARCH_OVERLAP
) != USEARCH_OFF
||
1615 usearch_getAttribute(strsrch
, USEARCH_CANONICAL_MATCH
) !=
1617 usearch_getOffset(strsrch
) != 0 ||
1618 usearch_getMatchedLength(strsrch
) != 0 ||
1619 usearch_getMatchedStart(strsrch
) != USEARCH_DONE
) {
1620 log_err("Error resetting string search\n");
1622 usearch_previous(strsrch
, &status
);
1623 if (usearch_getMatchedStart(strsrch
) != 7 ||
1624 usearch_getMatchedLength(strsrch
) != 1) {
1625 log_err("Error resetting string search\n");
1628 usearch_close(strsrch
);
1632 static void TestSupplementary(void)
1635 UErrorCode status
= U_ZERO_ERROR
;
1637 if (U_FAILURE(status
)) {
1638 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
1641 while (SUPPLEMENTARY
[count
].text
!= NULL
) {
1642 if (!assertEqual(SUPPLEMENTARY
[count
])) {
1643 log_err("Error at test number %d\n", count
);
1650 static void TestContraction(void)
1655 UCollator
*collator
;
1656 UErrorCode status
= U_ZERO_ERROR
;
1658 UStringSearch
*strsrch
;
1659 memset(rules
, 0, 128*sizeof(UChar
));
1660 memset(pattern
, 0, 128*sizeof(UChar
));
1661 memset(text
, 0, 128*sizeof(UChar
));
1663 u_unescape(CONTRACTIONRULE
, rules
, 128);
1664 collator
= ucol_openRules(rules
, u_strlen(rules
), UCOL_ON
,
1665 UCOL_TERTIARY
, NULL
, &status
);
1666 if(status
== U_FILE_ACCESS_ERROR
) {
1667 log_data_err("Is your data around?\n");
1669 } else if(U_FAILURE(status
)) {
1670 log_err("Error opening collator %s\n", u_errorName(status
));
1673 strsrch
= usearch_openFromCollator(pattern
, 1, text
, 1, collator
, NULL
,
1675 if (U_FAILURE(status
)) {
1676 log_err("Error opening string search %s\n", u_errorName(status
));
1679 while (CONTRACTION
[count
].text
!= NULL
) {
1680 u_unescape(CONTRACTION
[count
].text
, text
, 128);
1681 u_unescape(CONTRACTION
[count
].pattern
, pattern
, 128);
1682 usearch_setText(strsrch
, text
, -1, &status
);
1683 usearch_setPattern(strsrch
, pattern
, -1, &status
);
1684 if (!assertEqualWithUStringSearch(strsrch
, CONTRACTION
[count
])) {
1685 log_err("Error at test number %d\n", count
);
1689 usearch_close(strsrch
);
1690 ucol_close(collator
);
1693 static void TestIgnorable(void)
1698 UCollator
*collator
;
1699 UErrorCode status
= U_ZERO_ERROR
;
1700 UStringSearch
*strsrch
;
1703 memset(rules
, 0, 128*sizeof(UChar
));
1704 memset(pattern
, 0, 128*sizeof(UChar
));
1705 memset(text
, 0, 128*sizeof(UChar
));
1707 u_unescape(IGNORABLERULE
, rules
, 128);
1708 collator
= ucol_openRules(rules
, u_strlen(rules
), UCOL_ON
,
1709 IGNORABLE
[count
].strength
, NULL
, &status
);
1710 if(status
== U_FILE_ACCESS_ERROR
) {
1711 log_data_err("Is your data around?\n");
1713 } else if(U_FAILURE(status
)) {
1714 log_err("Error opening collator %s\n", u_errorName(status
));
1717 strsrch
= usearch_openFromCollator(pattern
, 1, text
, 1, collator
, NULL
,
1719 if (U_FAILURE(status
)) {
1720 log_err("Error opening string search %s\n", u_errorName(status
));
1723 while (IGNORABLE
[count
].text
!= NULL
) {
1724 u_unescape(IGNORABLE
[count
].text
, text
, 128);
1725 u_unescape(IGNORABLE
[count
].pattern
, pattern
, 128);
1726 usearch_setText(strsrch
, text
, -1, &status
);
1727 usearch_setPattern(strsrch
, pattern
, -1, &status
);
1728 if (!assertEqualWithUStringSearch(strsrch
, IGNORABLE
[count
])) {
1729 log_err("Error at test number %d\n", count
);
1733 usearch_close(strsrch
);
1734 ucol_close(collator
);
1737 static void TestDiacriticMatch(void)
1741 UErrorCode status
= U_ZERO_ERROR
;
1742 UStringSearch
*strsrch
= NULL
;
1743 UCollator
*coll
= NULL
;
1747 memset(pattern
, 0, 128*sizeof(UChar
));
1748 memset(text
, 0, 128*sizeof(UChar
));
1750 strsrch
= usearch_open(pattern
, 1, text
, 1, uloc_getDefault(), NULL
, &status
);
1751 if (U_FAILURE(status
)) {
1752 log_err_status(status
, "Error opening string search %s\n", u_errorName(status
));
1756 search
= DIACRITICMATCH
[count
];
1757 while (search
.text
!= NULL
) {
1758 if (search
.collator
!= NULL
) {
1759 coll
= ucol_openFromShortString(search
.collator
, FALSE
, NULL
, &status
);
1761 /* Always use "en_US" because some of these tests fail in Danish locales. */
1762 coll
= ucol_open("en_US"/*uloc_getDefault()*/, &status
);
1763 ucol_setStrength(coll
, search
.strength
);
1765 if (U_FAILURE(status
)) {
1766 log_err("Error opening string search collator(\"%s\") %s\n", search
.collator
, u_errorName(status
));
1770 usearch_setCollator(strsrch
, coll
, &status
);
1771 if (U_FAILURE(status
)) {
1772 log_err("Error setting string search collator %s\n", u_errorName(status
));
1776 u_unescape(search
.text
, text
, 128);
1777 u_unescape(search
.pattern
, pattern
, 128);
1778 usearch_setText(strsrch
, text
, -1, &status
);
1779 usearch_setPattern(strsrch
, pattern
, -1, &status
);
1780 if (!assertEqualWithUStringSearch(strsrch
, search
)) {
1781 log_err("Error at test number %d\n", count
);
1785 search
= DIACRITICMATCH
[++count
];
1787 usearch_close(strsrch
);
1790 static void TestCanonical(void)
1793 UErrorCode status
= U_ZERO_ERROR
;
1795 if (U_FAILURE(status
)) {
1796 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
1799 while (BASICCANONICAL
[count
].text
!= NULL
) {
1800 if (!assertCanonicalEqual(BASICCANONICAL
[count
])) {
1801 log_err("Error at test number %d\n", count
);
1808 static void TestNormCanonical(void)
1811 UErrorCode status
= U_ZERO_ERROR
;
1813 if (U_FAILURE(status
)) {
1814 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
1817 ucol_setAttribute(EN_US_
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
1819 while (NORMCANONICAL
[count
].text
!= NULL
) {
1820 if (!assertCanonicalEqual(NORMCANONICAL
[count
])) {
1821 log_err("Error at test number %d\n", count
);
1825 ucol_setAttribute(EN_US_
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &status
);
1829 static void TestStrengthCanonical(void)
1832 UErrorCode status
= U_ZERO_ERROR
;
1834 if (U_FAILURE(status
)) {
1835 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
1838 while (STRENGTHCANONICAL
[count
].text
!= NULL
) {
1839 if (!assertCanonicalEqual(STRENGTHCANONICAL
[count
])) {
1840 log_err("Error at test number %d\n", count
);
1847 static void TestBreakIteratorCanonical(void) {
1848 UErrorCode status
= U_ZERO_ERROR
;
1853 #if !UCONFIG_NO_BREAK_ITERATION
1856 if (U_FAILURE(status
)) {
1857 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
1861 /* 0-3 test are fixed */
1864 const SearchData
*search
= &(BREAKITERATORCANONICAL
[count
]);
1865 UCollator
*collator
= getCollator(search
->collator
);
1866 UBreakIterator
*breaker
= getBreakIterator(search
->breaker
);
1867 UStringSearch
*strsrch
;
1869 u_unescape(search
->text
, text
, 128);
1870 u_unescape(search
->pattern
, pattern
, 32);
1871 ucol_setStrength(collator
, search
->strength
);
1873 strsrch
= usearch_openFromCollator(pattern
, -1, text
, -1, collator
,
1875 if(status
== U_FILE_ACCESS_ERROR
) {
1876 log_data_err("Is your data around?\n");
1877 goto ENDTESTBREAKITERATOR
;
1878 } else if(U_FAILURE(status
)) {
1879 log_err("Error opening searcher\n");
1880 goto ENDTESTBREAKITERATOR
;
1882 usearch_setAttribute(strsrch
, USEARCH_CANONICAL_MATCH
, USEARCH_ON
,
1884 if (U_FAILURE(status
) ||
1885 usearch_getBreakIterator(strsrch
) != breaker
) {
1886 log_err("Error setting break iterator\n");
1887 usearch_close(strsrch
);
1888 goto ENDTESTBREAKITERATOR
;
1890 if (!assertEqualWithUStringSearch(strsrch
, *search
)) {
1891 ucol_setStrength(collator
, UCOL_TERTIARY
);
1892 usearch_close(strsrch
);
1893 goto ENDTESTBREAKITERATOR
;
1895 search
= &(BREAKITERATOREXACT
[count
+ 1]);
1896 breaker
= getBreakIterator(search
->breaker
);
1897 usearch_setBreakIterator(strsrch
, breaker
, &status
);
1898 if (U_FAILURE(status
) || usearch_getBreakIterator(strsrch
) != breaker
) {
1899 log_err("Error setting break iterator\n");
1900 usearch_close(strsrch
);
1901 goto ENDTESTBREAKITERATOR
;
1903 usearch_reset(strsrch
);
1904 usearch_setAttribute(strsrch
, USEARCH_CANONICAL_MATCH
, USEARCH_ON
,
1906 if (!assertEqualWithUStringSearch(strsrch
, *search
)) {
1907 log_err("Error at test number %d\n", count
);
1908 usearch_close(strsrch
);
1909 goto ENDTESTBREAKITERATOR
;
1911 usearch_close(strsrch
);
1915 while (BREAKITERATORCANONICAL
[count
].text
!= NULL
) {
1916 if (!assertEqual(BREAKITERATORCANONICAL
[count
])) {
1917 log_err("Error at test number %d\n", count
);
1918 goto ENDTESTBREAKITERATOR
;
1923 ENDTESTBREAKITERATOR
:
1928 static void TestVariableCanonical(void)
1931 UErrorCode status
= U_ZERO_ERROR
;
1933 if (U_FAILURE(status
)) {
1934 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
1937 ucol_setAttribute(EN_US_
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
1938 if (U_FAILURE(status
)) {
1939 log_err("Error setting collation alternate attribute %s\n",
1940 u_errorName(status
));
1942 while (VARIABLE
[count
].text
!= NULL
) {
1943 log_verbose("variable %d\n", count
);
1944 if (!assertCanonicalEqual(VARIABLE
[count
])) {
1945 log_err("Error at test number %d\n", count
);
1949 ucol_setAttribute(EN_US_
, UCOL_ALTERNATE_HANDLING
,
1950 UCOL_NON_IGNORABLE
, &status
);
1954 static void TestOverlapCanonical(void)
1957 UErrorCode status
= U_ZERO_ERROR
;
1959 if (U_FAILURE(status
)) {
1960 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
1963 while (OVERLAPCANONICAL
[count
].text
!= NULL
) {
1964 if (!assertEqualWithAttribute(OVERLAPCANONICAL
[count
], USEARCH_ON
,
1966 log_err("Error at overlap test number %d\n", count
);
1971 while (NONOVERLAP
[count
].text
!= NULL
) {
1972 if (!assertCanonicalEqual(NONOVERLAPCANONICAL
[count
])) {
1973 log_err("Error at non overlap test number %d\n", count
);
1982 const SearchData
*search
= &(OVERLAPCANONICAL
[count
]);
1983 UCollator
*collator
= getCollator(search
->collator
);
1984 UStringSearch
*strsrch
;
1985 status
= U_ZERO_ERROR
;
1987 u_unescape(search
->text
, text
, 128);
1988 u_unescape(search
->pattern
, pattern
, 32);
1989 strsrch
= usearch_openFromCollator(pattern
, -1, text
, -1, collator
,
1991 if(status
== U_FILE_ACCESS_ERROR
) {
1992 log_data_err("Is your data around?\n");
1994 } else if(U_FAILURE(status
)) {
1995 log_err("Error opening searcher\n");
1998 usearch_setAttribute(strsrch
, USEARCH_CANONICAL_MATCH
, USEARCH_ON
,
2000 usearch_setAttribute(strsrch
, USEARCH_OVERLAP
, USEARCH_ON
, &status
);
2001 if (U_FAILURE(status
) ||
2002 usearch_getAttribute(strsrch
, USEARCH_OVERLAP
) != USEARCH_ON
) {
2003 log_err("Error setting overlap option\n");
2005 if (!assertEqualWithUStringSearch(strsrch
, *search
)) {
2006 usearch_close(strsrch
);
2009 search
= &(NONOVERLAPCANONICAL
[count
]);
2010 usearch_setAttribute(strsrch
, USEARCH_OVERLAP
, USEARCH_OFF
, &status
);
2011 if (U_FAILURE(status
) ||
2012 usearch_getAttribute(strsrch
, USEARCH_OVERLAP
) != USEARCH_OFF
) {
2013 log_err("Error setting overlap option\n");
2015 usearch_reset(strsrch
);
2016 if (!assertEqualWithUStringSearch(strsrch
, *search
)) {
2017 usearch_close(strsrch
);
2018 log_err("Error at test number %d\n", count
);
2022 usearch_close(strsrch
);
2027 static void TestCollatorCanonical(void)
2029 /* test collator that thinks "o" and "p" are the same thing */
2031 UCollator
*tailored
= NULL
;
2032 UErrorCode status
= U_ZERO_ERROR
;
2035 UStringSearch
*strsrch
;
2038 if (U_FAILURE(status
)) {
2039 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
2042 u_unescape(COLLATORCANONICAL
[0].text
, text
, 128);
2043 u_unescape(COLLATORCANONICAL
[0].pattern
, pattern
, 32);
2045 strsrch
= usearch_openFromCollator(pattern
, -1, text
, -1, EN_US_
,
2047 if(status
== U_FILE_ACCESS_ERROR
) {
2048 log_data_err("Is your data around?\n");
2050 } else if(U_FAILURE(status
)) {
2051 log_err("Error opening searcher\n");
2054 usearch_setAttribute(strsrch
, USEARCH_CANONICAL_MATCH
, USEARCH_ON
,
2056 if (U_FAILURE(status
)) {
2057 log_err("Error opening string search %s\n", u_errorName(status
));
2059 if (!assertEqualWithUStringSearch(strsrch
, COLLATORCANONICAL
[0])) {
2060 goto ENDTESTCOLLATOR
;
2063 u_unescape(TESTCOLLATORRULE
, rules
, 32);
2064 tailored
= ucol_openRules(rules
, -1, UCOL_ON
,
2065 COLLATORCANONICAL
[1].strength
, NULL
, &status
);
2066 if (U_FAILURE(status
)) {
2067 log_err("Error opening rule based collator %s\n", u_errorName(status
));
2070 usearch_setCollator(strsrch
, tailored
, &status
);
2071 if (U_FAILURE(status
) || usearch_getCollator(strsrch
) != tailored
) {
2072 log_err("Error setting rule based collator\n");
2074 usearch_reset(strsrch
);
2075 usearch_setAttribute(strsrch
, USEARCH_CANONICAL_MATCH
, USEARCH_ON
,
2077 if (!assertEqualWithUStringSearch(strsrch
, COLLATORCANONICAL
[1])) {
2078 goto ENDTESTCOLLATOR
;
2081 usearch_setCollator(strsrch
, EN_US_
, &status
);
2082 usearch_reset(strsrch
);
2083 usearch_setAttribute(strsrch
, USEARCH_CANONICAL_MATCH
, USEARCH_ON
,
2085 if (U_FAILURE(status
) || usearch_getCollator(strsrch
) != EN_US_
) {
2086 log_err("Error setting rule based collator\n");
2088 if (!assertEqualWithUStringSearch(strsrch
, COLLATORCANONICAL
[0])) {
2089 goto ENDTESTCOLLATOR
;
2093 usearch_close(strsrch
);
2094 if (tailored
!= NULL
) {
2095 ucol_close(tailored
);
2100 static void TestPatternCanonical(void)
2102 UStringSearch
*strsrch
;
2107 UErrorCode status
= U_ZERO_ERROR
;
2110 if (U_FAILURE(status
)) {
2111 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
2114 u_unescape(PATTERNCANONICAL
[0].text
, text
, 128);
2115 u_unescape(PATTERNCANONICAL
[0].pattern
, pattern
, 32);
2117 ucol_setStrength(EN_US_
, PATTERNCANONICAL
[0].strength
);
2118 strsrch
= usearch_openFromCollator(pattern
, -1, text
, -1, EN_US_
,
2120 usearch_setAttribute(strsrch
, USEARCH_CANONICAL_MATCH
, USEARCH_ON
,
2122 if (U_FAILURE(status
)) {
2123 log_err("Error opening string search %s\n", u_errorName(status
));
2124 goto ENDTESTPATTERN
;
2126 temp
= usearch_getPattern(strsrch
, &templength
);
2127 if (u_strcmp(pattern
, temp
) != 0) {
2128 log_err("Error setting pattern\n");
2130 if (!assertEqualWithUStringSearch(strsrch
, PATTERNCANONICAL
[0])) {
2131 goto ENDTESTPATTERN
;
2134 u_unescape(PATTERNCANONICAL
[1].pattern
, pattern
, 32);
2135 usearch_setPattern(strsrch
, pattern
, -1, &status
);
2136 temp
= usearch_getPattern(strsrch
, &templength
);
2137 if (u_strcmp(pattern
, temp
) != 0) {
2138 log_err("Error setting pattern\n");
2139 goto ENDTESTPATTERN
;
2141 usearch_reset(strsrch
);
2142 usearch_setAttribute(strsrch
, USEARCH_CANONICAL_MATCH
, USEARCH_ON
,
2144 if (U_FAILURE(status
)) {
2145 log_err("Error setting pattern %s\n", u_errorName(status
));
2147 if (!assertEqualWithUStringSearch(strsrch
, PATTERNCANONICAL
[1])) {
2148 goto ENDTESTPATTERN
;
2151 u_unescape(PATTERNCANONICAL
[0].pattern
, pattern
, 32);
2152 usearch_setPattern(strsrch
, pattern
, -1, &status
);
2153 temp
= usearch_getPattern(strsrch
, &templength
);
2154 if (u_strcmp(pattern
, temp
) != 0) {
2155 log_err("Error setting pattern\n");
2156 goto ENDTESTPATTERN
;
2158 usearch_reset(strsrch
);
2159 usearch_setAttribute(strsrch
, USEARCH_CANONICAL_MATCH
, USEARCH_ON
,
2161 if (U_FAILURE(status
)) {
2162 log_err("Error setting pattern %s\n", u_errorName(status
));
2164 if (!assertEqualWithUStringSearch(strsrch
, PATTERNCANONICAL
[0])) {
2165 goto ENDTESTPATTERN
;
2168 ucol_setStrength(EN_US_
, UCOL_TERTIARY
);
2169 if (strsrch
!= NULL
) {
2170 usearch_close(strsrch
);
2175 static void TestTextCanonical(void)
2177 UStringSearch
*strsrch
;
2182 UErrorCode status
= U_ZERO_ERROR
;
2184 u_unescape(TEXTCANONICAL
[0].text
, text
, 128);
2185 u_unescape(TEXTCANONICAL
[0].pattern
, pattern
, 32);
2188 if (U_FAILURE(status
)) {
2189 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
2192 strsrch
= usearch_openFromCollator(pattern
, -1, text
, -1, EN_US_
,
2194 usearch_setAttribute(strsrch
, USEARCH_CANONICAL_MATCH
, USEARCH_ON
,
2197 if (U_FAILURE(status
)) {
2198 log_err("Error opening string search %s\n", u_errorName(status
));
2199 goto ENDTESTPATTERN
;
2201 temp
= usearch_getText(strsrch
, &templength
);
2202 if (u_strcmp(text
, temp
) != 0) {
2203 log_err("Error setting text\n");
2205 if (!assertEqualWithUStringSearch(strsrch
, TEXTCANONICAL
[0])) {
2206 goto ENDTESTPATTERN
;
2209 u_unescape(TEXTCANONICAL
[1].text
, text
, 32);
2210 usearch_setText(strsrch
, text
, -1, &status
);
2211 temp
= usearch_getText(strsrch
, &templength
);
2212 if (u_strcmp(text
, temp
) != 0) {
2213 log_err("Error setting text\n");
2214 goto ENDTESTPATTERN
;
2216 if (U_FAILURE(status
)) {
2217 log_err("Error setting text %s\n", u_errorName(status
));
2219 if (!assertEqualWithUStringSearch(strsrch
, TEXTCANONICAL
[1])) {
2220 goto ENDTESTPATTERN
;
2223 u_unescape(TEXTCANONICAL
[0].text
, text
, 32);
2224 usearch_setText(strsrch
, text
, -1, &status
);
2225 temp
= usearch_getText(strsrch
, &templength
);
2226 if (u_strcmp(text
, temp
) != 0) {
2227 log_err("Error setting text\n");
2228 goto ENDTESTPATTERN
;
2230 if (U_FAILURE(status
)) {
2231 log_err("Error setting pattern %s\n", u_errorName(status
));
2233 if (!assertEqualWithUStringSearch(strsrch
, TEXTCANONICAL
[0])) {
2234 goto ENDTESTPATTERN
;
2237 if (strsrch
!= NULL
) {
2238 usearch_close(strsrch
);
2243 static void TestCompositeBoundariesCanonical(void)
2246 UErrorCode status
= U_ZERO_ERROR
;
2248 if (U_FAILURE(status
)) {
2249 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
2252 while (COMPOSITEBOUNDARIESCANONICAL
[count
].text
!= NULL
) {
2253 log_verbose("composite %d\n", count
);
2254 if (!assertCanonicalEqual(COMPOSITEBOUNDARIESCANONICAL
[count
])) {
2255 log_err("Error at test number %d\n", count
);
2262 static void TestGetSetOffsetCanonical(void)
2264 int searchDataIndex
= 0;
2267 UErrorCode status
= U_ZERO_ERROR
;
2268 UStringSearch
*strsrch
;
2269 UCollator
*collator
;
2271 memset(pattern
, 0, 32*sizeof(UChar
));
2272 memset(text
, 0, 128*sizeof(UChar
));
2275 if (U_FAILURE(status
)) {
2276 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
2279 strsrch
= usearch_openFromCollator(pattern
, 16, text
, 32, EN_US_
, NULL
,
2282 collator
= usearch_getCollator(strsrch
);
2283 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
2285 usearch_setAttribute(strsrch
, USEARCH_CANONICAL_MATCH
, USEARCH_ON
,
2288 /* testing out of bounds error */
2289 usearch_setOffset(strsrch
, -1, &status
);
2290 if (U_SUCCESS(status
)) {
2291 log_err("Error expecting set offset error\n");
2293 usearch_setOffset(strsrch
, 128, &status
);
2294 if (U_SUCCESS(status
)) {
2295 log_err("Error expecting set offset error\n");
2297 while (BASICCANONICAL
[searchDataIndex
].text
!= NULL
) {
2299 SearchData search
= BASICCANONICAL
[searchDataIndex
++];
2300 int32_t matchindex
= search
.offset
[count
];
2303 if (BASICCANONICAL
[searchDataIndex
].text
== NULL
) {
2304 /* skip the last one */
2308 u_unescape(search
.text
, text
, 128);
2309 u_unescape(search
.pattern
, pattern
, 32);
2310 status
= U_ZERO_ERROR
;
2311 usearch_setText(strsrch
, text
, -1, &status
);
2312 usearch_setPattern(strsrch
, pattern
, -1, &status
);
2313 while (U_SUCCESS(status
) && matchindex
>= 0) {
2314 uint32_t matchlength
= search
.size
[count
];
2315 usearch_next(strsrch
, &status
);
2316 if (matchindex
!= usearch_getMatchedStart(strsrch
) ||
2317 matchlength
!= (uint32_t)usearch_getMatchedLength(strsrch
)) {
2318 char *str
= toCharString(usearch_getText(strsrch
,
2320 log_err("Text: %s\n", str
);
2321 str
= toCharString(usearch_getPattern(strsrch
, &textlength
));
2322 log_err("Pattern: %s\n", str
);
2323 log_err("Error match found at %d %d\n",
2324 usearch_getMatchedStart(strsrch
),
2325 usearch_getMatchedLength(strsrch
));
2328 matchindex
= search
.offset
[count
+ 1] == -1 ? -1 :
2329 search
.offset
[count
+ 2];
2330 if (search
.offset
[count
+ 1] != -1) {
2331 usearch_setOffset(strsrch
, search
.offset
[count
+ 1] + 1,
2333 if (usearch_getOffset(strsrch
) != search
.offset
[count
+ 1] + 1) {
2334 log_err("Error setting offset\n");
2341 usearch_next(strsrch
, &status
);
2342 if (usearch_getMatchedStart(strsrch
) != USEARCH_DONE
) {
2343 char *str
= toCharString(usearch_getText(strsrch
, &textlength
));
2344 log_err("Text: %s\n", str
);
2345 str
= toCharString(usearch_getPattern(strsrch
, &textlength
));
2346 log_err("Pattern: %s\n", str
);
2347 log_err("Error match found at %d %d\n",
2348 usearch_getMatchedStart(strsrch
),
2349 usearch_getMatchedLength(strsrch
));
2355 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_OFF
, &status
);
2356 usearch_close(strsrch
);
2360 static void TestSupplementaryCanonical(void)
2363 UErrorCode status
= U_ZERO_ERROR
;
2365 if (U_FAILURE(status
)) {
2366 log_err_status(status
, "Unable to open static collators %s\n", u_errorName(status
));
2369 while (SUPPLEMENTARYCANONICAL
[count
].text
!= NULL
) {
2370 if (!assertCanonicalEqual(SUPPLEMENTARYCANONICAL
[count
])) {
2371 log_err("Error at test number %d\n", count
);
2378 static void TestContractionCanonical(void)
2383 UCollator
*collator
= NULL
;
2384 UErrorCode status
= U_ZERO_ERROR
;
2386 UStringSearch
*strsrch
= NULL
;
2387 memset(rules
, 0, 128*sizeof(UChar
));
2388 memset(pattern
, 0, 128*sizeof(UChar
));
2389 memset(text
, 0, 128*sizeof(UChar
));
2391 u_unescape(CONTRACTIONRULE
, rules
, 128);
2392 collator
= ucol_openRules(rules
, u_strlen(rules
), UCOL_ON
,
2393 UCOL_TERTIARY
, NULL
, &status
);
2394 if(status
== U_FILE_ACCESS_ERROR
) {
2395 log_data_err("Is your data around?\n");
2397 } else if(U_FAILURE(status
)) {
2398 log_err("Error opening collator %s\n", u_errorName(status
));
2401 strsrch
= usearch_openFromCollator(pattern
, 1, text
, 1, collator
, NULL
,
2403 usearch_setAttribute(strsrch
, USEARCH_CANONICAL_MATCH
, USEARCH_ON
,
2405 if (U_FAILURE(status
)) {
2406 log_err("Error opening string search %s\n", u_errorName(status
));
2409 while (CONTRACTIONCANONICAL
[count
].text
!= NULL
) {
2410 u_unescape(CONTRACTIONCANONICAL
[count
].text
, text
, 128);
2411 u_unescape(CONTRACTIONCANONICAL
[count
].pattern
, pattern
, 128);
2412 usearch_setText(strsrch
, text
, -1, &status
);
2413 usearch_setPattern(strsrch
, pattern
, -1, &status
);
2414 if (!assertEqualWithUStringSearch(strsrch
,
2415 CONTRACTIONCANONICAL
[count
])) {
2416 log_err("Error at test number %d\n", count
);
2420 usearch_close(strsrch
);
2421 ucol_close(collator
);
2424 static void TestNumeric(void) {
2425 UCollator
*coll
= NULL
;
2426 UStringSearch
*strsrch
= NULL
;
2427 UErrorCode status
= U_ZERO_ERROR
;
2431 memset(pattern
, 0, 128*sizeof(UChar
));
2432 memset(text
, 0, 128*sizeof(UChar
));
2434 coll
= ucol_open("", &status
);
2435 if(U_FAILURE(status
)) {
2436 log_data_err("Could not open UCA. Is your data around?\n");
2440 ucol_setAttribute(coll
, UCOL_NUMERIC_COLLATION
, UCOL_ON
, &status
);
2442 strsrch
= usearch_openFromCollator(pattern
, 1, text
, 1, coll
, NULL
, &status
);
2444 if(status
!= U_UNSUPPORTED_ERROR
|| U_SUCCESS(status
)) {
2445 log_err("Expected U_UNSUPPORTED_ERROR when trying to instantiate a search object from a CODAN collator, got %s instead\n", u_errorName(status
));
2447 usearch_close(strsrch
);
2455 /* This test is for ticket 4038 due to incorrect backward searching when certain patterns have a length > 1 */
2456 static void TestForwardBackward(void) {
2457 UErrorCode status
= U_ZERO_ERROR
;
2458 UCollator
*coll
= NULL
;
2459 UStringSearch
*search
= NULL
;
2460 UChar usrcstr
[32], value
[4];
2462 int32_t expectedPos
= 9;
2464 coll
= ucol_open("en_GB", &status
);
2465 if (U_FAILURE(status
)) {
2466 log_err_status(status
, "ucol_open failed: %s\n", u_errorName(status
));
2467 goto exitTestForwardBackward
;
2469 ucol_setAttribute(coll
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
);
2470 ucol_setAttribute(coll
, UCOL_CASE_LEVEL
, UCOL_ON
, &status
);
2471 ucol_setAttribute(coll
, UCOL_ALTERNATE_HANDLING
, UCOL_NON_IGNORABLE
, &status
);
2473 u_uastrcpy(usrcstr
, "QBitArray::bitarr_data"); /* text */
2474 u_uastrcpy(value
, "::"); /* pattern */
2476 search
= usearch_openFromCollator(value
, 2, usrcstr
, 22, coll
, NULL
, &status
);
2477 if (U_FAILURE(status
)) {
2478 log_err("usearch_openFromCollator failed: %s\n", u_errorName(status
));
2479 goto exitTestForwardBackward
;
2482 usearch_reset(search
);
2483 /* forward search */
2484 pos
= usearch_first(search
, &status
);
2485 if (pos
!= expectedPos
) {
2486 log_err("Expected search result: %d; Got instead: %d\n", expectedPos
, pos
);
2487 goto exitTestForwardBackward
;
2491 usearch_reset(search
);
2492 /* backward search */
2493 pos
= usearch_last(search
, &status
);
2494 if (pos
!= expectedPos
) {
2495 log_err("Expected search result: %d; Got instead: %d\n", expectedPos
, pos
);
2498 exitTestForwardBackward
:
2502 if (search
!= NULL
) {
2503 usearch_close(search
);
2507 #define TEST_ASSERT(x) \
2508 {if (U_FAILURE(x)) {log_err_status(x, "%s:%d: FAIL: test assertion failure \n", __FILE__, __LINE__);\
2511 static void TestSearchForNull(void) {
2514 UStringSearch
*search
;
2521 const UChar zerodigit
= 0x0030; /* 0 */
2522 const UChar nulldigit
= 0x0000; /* null */
2524 /* static const UChar var[(length)+1]=U_DECLARE_UTF16(cs) */
2525 #define PATTERN_LEN 4
2528 U_STRING_DECL(_pattern
, "IS 0", PATTERN_LEN
);
2529 U_STRING_DECL(_text
, "_0IS 0 OK?", TEXT_LEN
);
2530 UChar pattern
[PATTERN_LEN
+ 1], text
[TEXT_LEN
+ 1];
2532 U_STRING_INIT(_pattern
, "IS 0", PATTERN_LEN
);
2533 U_STRING_INIT(_text
, "_0IS 0 OK?", TEXT_LEN
);
2538 for (pos
= 0; pos
< PATTERN_LEN
; pos
++) {
2539 if (_pattern
[pos
] == zerodigit
) {
2540 pattern
[pos
] = nulldigit
;
2542 pattern
[pos
] = _pattern
[pos
];
2545 pattern
[PATTERN_LEN
] = 0x0000;
2547 for (pos
= 0; pos
< TEXT_LEN
; pos
++) {
2548 if (_text
[pos
] == zerodigit
) {
2549 text
[pos
] = nulldigit
;
2551 text
[pos
] = _text
[pos
];
2554 text
[TEXT_LEN
] = 0x0000;
2558 /* create a US-English collator */
2559 coll
= ucol_open("en_US", &ec
);
2561 /* make sure we didn't fail. */
2564 ucol_setStrength(coll
, UCOL_IDENTICAL
);
2566 /* open a search looking for 0 */
2567 search
= usearch_openFromCollator(pattern
, PATTERN_LEN
, text
,
2568 TEXT_LEN
, coll
, NULL
, &ec
);
2571 if (coll
!= NULL
&& search
!= NULL
) {
2572 pos
= usearch_first(search
, &ec
);
2573 len
= usearch_getMatchedLength(search
);
2574 if (pos
!= expectedPos
) {
2575 log_err("Expected search result: %d; Got instead: %d\n", expectedPos
,
2579 if (len
!= expectedLen
) {
2580 log_err("Expected search result length: %d; Got instead: %d\n",
2584 for (pos
= usearch_first(search
, &ec
); pos
!= USEARCH_DONE
; pos
2585 = usearch_next(search
, &ec
)) {
2586 log_verbose("Match at %d\n", pos
);
2590 if (count
!= expectedNum
) {
2591 log_err("Expected %d search hits, found %d\n", expectedNum
, count
);
2596 usearch_close(search
);
2599 static void TestStrengthIdentical(void)
2602 UErrorCode ec
= U_ZERO_ERROR
;
2603 UStringSearch
*search
;
2605 UChar pattern
[] = {0x05E9, 0x0591, 0x05E9};
2606 UChar text
[] = {0x05E9, 0x0592, 0x05E9};
2607 int32_t pLen
= sizeof (pattern
) / sizeof(pattern
[0]);
2608 int32_t tLen
= sizeof(text
) / sizeof (text
[0]);
2609 int32_t expectedPos
= 0;
2610 int32_t expectedLen
= 3;
2615 /* create a US-English collator */
2616 coll
= ucol_open ("en_US", &ec
);
2618 /* make sure we didn't fail. */
2621 ucol_setStrength( coll
, UCOL_TERTIARY
);
2623 /* open a search looking for 0 */
2624 search
= usearch_openFromCollator (pattern
, pLen
, text
, tLen
, coll
, NULL
, &ec
);
2627 if (coll
!= NULL
&& search
!= NULL
) {
2628 pos
= usearch_first(search
, &ec
);
2629 len
= usearch_getMatchedLength(search
);
2631 if(pos
!= expectedPos
) {
2632 log_err("Expected search result: %d; Got instead: %d\n", expectedPos
, pos
);
2635 if(len
!= expectedLen
) {
2636 log_err("Expected search result length: %d; Got instead: %d\n", expectedLen
, len
);
2639 /* Now try it at strength == UCOL_IDENTICAL */
2640 ucol_setStrength(coll
, UCOL_IDENTICAL
);
2641 usearch_reset(search
);
2643 pos
= usearch_first(search
, &ec
);
2644 len
= usearch_getMatchedLength(search
);
2647 log_err("Expected failure for strentgh = UCOL_IDENTICAL: got %d instead.\n", pos
);
2651 usearch_close(search
);
2656 * TestUsingSearchCollator
2659 #define ARRAY_LENGTH(array) (sizeof(array)/sizeof(array[0]))
2662 const UChar
* pattern
;
2663 const int32_t * offsets
;
2665 } PatternAndOffsets
;
2667 static const UChar scKoText
[] = {
2669 /*01*/ 0xAC00, 0x0020, /* simple LV Hangul */
2670 /*03*/ 0xAC01, 0x0020, /* simple LVT Hangul */
2671 /*05*/ 0xAC0F, 0x0020, /* LVTT, last jamo expands for search */
2672 /*07*/ 0xAFFF, 0x0020, /* LLVVVTT, every jamo expands for search */
2673 /*09*/ 0x1100, 0x1161, 0x11A8, 0x0020, /* 0xAC01 as conjoining jamo */
2674 /*13*/ 0x1100, 0x1161, 0x1100, 0x0020, /* 0xAC01 as basic conjoining jamo (per search rules) */
2675 /*17*/ 0x3131, 0x314F, 0x3131, 0x0020, /* 0xAC01 as compatibility jamo */
2676 /*21*/ 0x1100, 0x1161, 0x11B6, 0x0020, /* 0xAC0F as conjoining jamo; last expands for search */
2677 /*25*/ 0x1100, 0x1161, 0x1105, 0x1112, 0x0020, /* 0xAC0F as basic conjoining jamo; last expands for search */
2678 /*30*/ 0x1101, 0x1170, 0x11B6, 0x0020, /* 0xAFFF as conjoining jamo; all expand for search */
2679 /*34*/ 0x00E6, 0x0020, /* small letter ae, expands */
2680 /*36*/ 0x1E4D, 0x0020, /* small letter o with tilde and acute, decomposes */
2684 static const UChar scKoPat0
[] = { 0xAC01, 0 };
2685 static const UChar scKoPat1
[] = { 0x1100, 0x1161, 0x11A8, 0 }; /* 0xAC01 as conjoining jamo */
2686 static const UChar scKoPat2
[] = { 0xAC0F, 0 };
2687 static const UChar scKoPat3
[] = { 0x1100, 0x1161, 0x1105, 0x1112, 0 }; /* 0xAC0F as basic conjoining jamo */
2688 static const UChar scKoPat4
[] = { 0xAFFF, 0 };
2689 static const UChar scKoPat5
[] = { 0x1101, 0x1170, 0x11B6, 0 }; /* 0xAFFF as conjoining jamo */
2691 static const int32_t scKoSrchOff01
[] = { 3, 9, 13 };
2692 static const int32_t scKoSrchOff23
[] = { 5, 21, 25 };
2693 static const int32_t scKoSrchOff45
[] = { 7, 30 };
2695 static const PatternAndOffsets scKoSrchPatternsOffsets
[] = {
2696 { scKoPat0
, scKoSrchOff01
, ARRAY_LENGTH(scKoSrchOff01
) },
2697 { scKoPat1
, scKoSrchOff01
, ARRAY_LENGTH(scKoSrchOff01
) },
2698 { scKoPat2
, scKoSrchOff23
, ARRAY_LENGTH(scKoSrchOff23
) },
2699 { scKoPat3
, scKoSrchOff23
, ARRAY_LENGTH(scKoSrchOff23
) },
2700 { scKoPat4
, scKoSrchOff45
, ARRAY_LENGTH(scKoSrchOff45
) },
2701 { scKoPat5
, scKoSrchOff45
, ARRAY_LENGTH(scKoSrchOff45
) },
2705 static const int32_t scKoStndOff01
[] = { 3, 9 };
2706 static const int32_t scKoStndOff2
[] = { 5, 21 };
2707 static const int32_t scKoStndOff3
[] = { 25 };
2708 static const int32_t scKoStndOff45
[] = { 7, 30 };
2710 static const PatternAndOffsets scKoStndPatternsOffsets
[] = {
2711 { scKoPat0
, scKoStndOff01
, ARRAY_LENGTH(scKoStndOff01
) },
2712 { scKoPat1
, scKoStndOff01
, ARRAY_LENGTH(scKoStndOff01
) },
2713 { scKoPat2
, scKoStndOff2
, ARRAY_LENGTH(scKoStndOff2
) },
2714 { scKoPat3
, scKoStndOff3
, ARRAY_LENGTH(scKoStndOff3
) },
2715 { scKoPat4
, scKoStndOff45
, ARRAY_LENGTH(scKoStndOff45
) },
2716 { scKoPat5
, scKoStndOff45
, ARRAY_LENGTH(scKoStndOff45
) },
2721 const char * locale
;
2723 const PatternAndOffsets
* patternsAndOffsets
;
2726 static const TUSCItem tuscItems
[] = {
2727 { "root", scKoText
, scKoStndPatternsOffsets
},
2728 { "root@collation=search", scKoText
, scKoSrchPatternsOffsets
},
2729 { "ko@collation=search", scKoText
, scKoSrchPatternsOffsets
},
2730 { NULL
, NULL
, NULL
}
2733 static const UChar dummyPat
[] = { 0x0061, 0 };
2735 static void TestUsingSearchCollator(void)
2737 const TUSCItem
* tuscItemPtr
;
2738 for (tuscItemPtr
= tuscItems
; tuscItemPtr
->locale
!= NULL
; tuscItemPtr
++) {
2739 UErrorCode status
= U_ZERO_ERROR
;
2740 UCollator
* ucol
= ucol_open(tuscItemPtr
->locale
, &status
);
2741 if ( U_SUCCESS(status
) ) {
2742 UStringSearch
* usrch
= usearch_openFromCollator(dummyPat
, -1, tuscItemPtr
->text
, -1, ucol
, NULL
, &status
);
2743 if ( U_SUCCESS(status
) ) {
2744 const PatternAndOffsets
* patternsOffsetsPtr
;
2745 for ( patternsOffsetsPtr
= tuscItemPtr
->patternsAndOffsets
; patternsOffsetsPtr
->pattern
!= NULL
; patternsOffsetsPtr
++) {
2746 usearch_setPattern(usrch
, patternsOffsetsPtr
->pattern
, -1, &status
);
2747 if ( U_SUCCESS(status
) ) {
2749 const int32_t * nextOffsetPtr
;
2750 const int32_t * limitOffsetPtr
;
2752 usearch_reset(usrch
);
2753 nextOffsetPtr
= patternsOffsetsPtr
->offsets
;
2754 limitOffsetPtr
= patternsOffsetsPtr
->offsets
+ patternsOffsetsPtr
->offsetsLen
;
2756 offset
= usearch_next(usrch
, &status
);
2757 if ( U_FAILURE(status
) || offset
== USEARCH_DONE
) {
2760 if ( nextOffsetPtr
< limitOffsetPtr
) {
2761 if (offset
!= *nextOffsetPtr
) {
2762 log_err("error, locale %s, expected usearch_next %d, got %d\n", tuscItemPtr
->locale
, *nextOffsetPtr
, offset
);
2763 nextOffsetPtr
= limitOffsetPtr
;
2768 log_err("error, locale %s, usearch_next returned more matches than expected\n", tuscItemPtr
->locale
);
2771 if ( U_FAILURE(status
) ) {
2772 log_err("error, locale %s, usearch_next failed: %s\n", tuscItemPtr
->locale
, u_errorName(status
) );
2773 } else if ( nextOffsetPtr
< limitOffsetPtr
) {
2774 log_err("error, locale %s, usearch_next returned fewer matches than expected\n", tuscItemPtr
->locale
);
2777 status
= U_ZERO_ERROR
;
2778 usearch_reset(usrch
);
2779 nextOffsetPtr
= patternsOffsetsPtr
->offsets
+ patternsOffsetsPtr
->offsetsLen
;
2780 limitOffsetPtr
= patternsOffsetsPtr
->offsets
;
2782 offset
= usearch_previous(usrch
, &status
);
2783 if ( U_FAILURE(status
) || offset
== USEARCH_DONE
) {
2786 if ( nextOffsetPtr
> limitOffsetPtr
) {
2788 if (offset
!= *nextOffsetPtr
) {
2789 log_err("error, locale %s, expected usearch_previous %d, got %d\n", tuscItemPtr
->locale
, *nextOffsetPtr
, offset
);
2790 nextOffsetPtr
= limitOffsetPtr
;
2794 log_err("error, locale %s, usearch_previous returned more matches than expected\n", tuscItemPtr
->locale
);
2797 if ( U_FAILURE(status
) ) {
2798 log_err("error, locale %s, usearch_previous failed: %s\n", tuscItemPtr
->locale
, u_errorName(status
) );
2799 } else if ( nextOffsetPtr
> limitOffsetPtr
) {
2800 log_err("error, locale %s, usearch_previous returned fewer matches than expected\n", tuscItemPtr
->locale
);
2804 log_err("error, locale %s, usearch_setPattern failed: %s\n", tuscItemPtr
->locale
, u_errorName(status
) );
2807 usearch_close(usrch
);
2809 log_err("error, locale %s, usearch_openFromCollator failed: %s\n", tuscItemPtr
->locale
, u_errorName(status
) );
2813 log_data_err("error, locale %s, ucol_open failed: %s\n", tuscItemPtr
->locale
, u_errorName(status
) );
2819 static void TestPCEBuffer_with(const UChar
*search
, uint32_t searchLen
, const UChar
*source
, uint32_t sourceLen
) {
2820 UErrorCode icuStatus
= U_ZERO_ERROR
;
2823 UBreakIterator
*ubrk
;
2824 UStringSearch
*usearch
;
2828 coll
= ucol_openFromShortString( "LSK_AS_CX_EX_FX_HX_NX_S4",
2832 if ( U_FAILURE(icuStatus
) )
2834 log_data_err( "ucol_openFromShortString error %s\n" , u_errorName(icuStatus
));
2838 locale
= ucol_getLocaleByType( coll
,
2841 if ( U_FAILURE(icuStatus
) )
2843 log_err( "ucol_getLocaleByType error %s\n", u_errorName(icuStatus
) );
2847 log_verbose("locale=%s\n", locale
);
2849 ubrk
= ubrk_open( UBRK_CHARACTER
,
2854 if ( U_FAILURE(icuStatus
) )
2856 log_err( "ubrk_open error %s\n", u_errorName(icuStatus
) );
2860 usearch
= usearch_openFromCollator( search
,
2867 if ( U_FAILURE(icuStatus
) )
2869 log_err( "usearch_openFromCollator error %s\n", u_errorName(icuStatus
) );
2873 match
= usearch_first( usearch
,
2875 if ( U_FAILURE(icuStatus
) )
2877 log_err( "usearch_first error %s\n", u_errorName(icuStatus
) );
2882 log_verbose("OK: match=%d\n", match
);
2884 log_err("Err: match expected 0 got %d\n", match
);
2887 usearch_close(usearch
);
2896 static void TestPCEBuffer_100df(void) {
2898 { 0x0020, 0x0020, 0x00df, 0x0020, 0x0041, 0x00df, 0x0020, 0x0061, 0x00df, 0x0020, 0x00c5, 0x00df, 0x0020, 0x212b, 0x00df, 0x0020, 0x0041, 0x030a, 0x00df, 0x0020, 0x00e5, 0x00df, 0x0020, 0x0061, 0x02da, 0x00df, 0x0020, 0x0061, 0x030a, 0x00df, 0x0020, 0xd8fa, 0xdeae, 0x00df, 0x0020, 0x2027, 0x00df }; /* 38 cp, 9 of them unpaired surrogates */
2900 { 0x0020, 0x0020, 0x00df, 0x0020, 0x0041, 0x00df, 0x0020, 0x0061, 0x00df, 0x0020, 0x00c5, 0x00df, 0x0020, 0x212b, 0x00df, 0x0020, 0x0041, 0x030a, 0x00df, 0x0020, 0x00e5, 0x00df, 0x0020, 0x0061, 0x02da, 0x00df, 0x0020, 0x0061, 0x030a, 0x00df, 0x0020, 0xd8fa, 0xdeae, 0x00df, 0x0020, 0x2027, 0x00df };
2901 uint32_t searchLen
= sizeof(search
)/sizeof(UChar
);
2902 uint32_t sourceLen
= sizeof(source
)/sizeof(UChar
);
2903 TestPCEBuffer_with(search
,searchLen
,source
,sourceLen
);
2907 static void TestPCEBuffer_2surr(void) {
2909 { 0x0020, 0x0020, 0xdfff, 0x0020, 0x0041, 0xdfff, 0x0020, 0x0061, 0xdfff, 0x0020, 0x00c5, 0xdfff, 0x0020, 0x212b, 0xdfff, 0x0020, 0x0041, 0x030a, 0xdfff, 0x0020, 0x00e5, 0xdfff, 0x0020, 0x0061, 0x02da, 0xdfff, 0x0020, 0x0061, 0x030a, 0xdfff, 0x0020, 0xd8fa, 0xdeae, 0xdfff, 0x0020, 0x2027, 0xdfff }; /* 38 cp, 9 of them unpaired surrogates */
2911 { 0x0020, 0x0020, 0xdfff, 0x0020, 0x0041, 0xdfff, 0x0020, 0x0061, 0xdfff, 0x0020, 0x00c5, 0xdfff, 0x0020, 0x212b, 0xdfff, 0x0020, 0x0041, 0x030a, 0xdfff, 0x0020, 0x00e5, 0xdfff, 0x0020, 0x0061, 0x02da, 0xdfff, 0x0020, 0x0061, 0x030a, 0xdfff, 0x0020, 0xd8fa, 0xdeae, 0xdfff, 0x0020, 0x2027, 0xdfff };
2912 uint32_t searchLen
= sizeof(search
)/sizeof(UChar
);
2913 uint32_t sourceLen
= sizeof(source
)/sizeof(UChar
);
2914 TestPCEBuffer_with(search
,searchLen
,source
,sourceLen
);
2917 static void TestMatchFollowedByIgnorables(void) {
2918 /* test case for ticket#8482 */
2919 UChar search
[] = { 0x00c9 };
2920 UChar source
[] = { 0x00c9, 0x0000, 0x0041 };
2923 UErrorCode icuStatus
= U_ZERO_ERROR
;
2926 UBreakIterator
*ubrk
;
2927 UStringSearch
*usearch
;
2929 int32_t matchLength
= 0;
2930 const int32_t expectedMatchLength
= 1;
2932 searchLen
= sizeof(search
)/sizeof(UChar
);
2933 sourceLen
= sizeof(source
)/sizeof(UChar
);
2935 coll
= ucol_openFromShortString("LHR_AN_CX_EX_FX_HX_NX_S3",
2939 if (U_FAILURE(icuStatus
)) {
2940 log_data_err("ucol_openFromShortString error - %s\n", u_errorName(icuStatus
));
2943 locale
= ucol_getLocaleByType(coll
,
2946 if (U_FAILURE(icuStatus
)) {
2947 log_data_err("ucol_getLocaleByType error - %s\n", u_errorName(icuStatus
));
2950 ubrk
= ubrk_open(UBRK_CHARACTER
,
2955 if (U_FAILURE(icuStatus
)) {
2956 log_data_err("ubrk_open error - %s\n", u_errorName(icuStatus
));
2959 usearch
= usearch_openFromCollator(search
,
2966 if (U_FAILURE(icuStatus
)) {
2967 log_data_err("usearch_openFromCollator error - %s\n", u_errorName(icuStatus
));
2970 match
= usearch_first(usearch
,
2972 if (U_FAILURE(icuStatus
)) {
2973 log_data_err("usearch_first error - %s\n", u_errorName(icuStatus
));
2976 log_verbose("match=%d\n", match
);
2978 matchLength
= usearch_getMatchedLength(usearch
);
2980 if (matchLength
!= expectedMatchLength
) {
2981 log_err("Error: matchLength=%d, expected=%d\n", matchLength
, expectedMatchLength
);
2985 usearch_close(usearch
);
2994 void addSearchTest(TestNode
** root
)
2996 addTest(root
, &TestStart
, "tscoll/usrchtst/TestStart");
2997 addTest(root
, &TestOpenClose
, "tscoll/usrchtst/TestOpenClose");
2998 addTest(root
, &TestInitialization
, "tscoll/usrchtst/TestInitialization");
2999 addTest(root
, &TestBasic
, "tscoll/usrchtst/TestBasic");
3000 addTest(root
, &TestNormExact
, "tscoll/usrchtst/TestNormExact");
3001 addTest(root
, &TestStrength
, "tscoll/usrchtst/TestStrength");
3002 addTest(root
, &TestBreakIterator
, "tscoll/usrchtst/TestBreakIterator");
3003 addTest(root
, &TestVariable
, "tscoll/usrchtst/TestVariable");
3004 addTest(root
, &TestOverlap
, "tscoll/usrchtst/TestOverlap");
3005 addTest(root
, &TestCollator
, "tscoll/usrchtst/TestCollator");
3006 addTest(root
, &TestPattern
, "tscoll/usrchtst/TestPattern");
3007 addTest(root
, &TestText
, "tscoll/usrchtst/TestText");
3008 addTest(root
, &TestCompositeBoundaries
,
3009 "tscoll/usrchtst/TestCompositeBoundaries");
3010 addTest(root
, &TestGetSetOffset
, "tscoll/usrchtst/TestGetSetOffset");
3011 addTest(root
, &TestGetSetAttribute
,
3012 "tscoll/usrchtst/TestGetSetAttribute");
3013 addTest(root
, &TestGetMatch
, "tscoll/usrchtst/TestGetMatch");
3014 addTest(root
, &TestSetMatch
, "tscoll/usrchtst/TestSetMatch");
3015 addTest(root
, &TestReset
, "tscoll/usrchtst/TestReset");
3016 addTest(root
, &TestSupplementary
, "tscoll/usrchtst/TestSupplementary");
3017 addTest(root
, &TestContraction
, "tscoll/usrchtst/TestContraction");
3018 addTest(root
, &TestIgnorable
, "tscoll/usrchtst/TestIgnorable");
3019 addTest(root
, &TestCanonical
, "tscoll/usrchtst/TestCanonical");
3020 addTest(root
, &TestNormCanonical
, "tscoll/usrchtst/TestNormCanonical");
3021 addTest(root
, &TestStrengthCanonical
,
3022 "tscoll/usrchtst/TestStrengthCanonical");
3023 addTest(root
, &TestBreakIteratorCanonical
,
3024 "tscoll/usrchtst/TestBreakIteratorCanonical");
3025 addTest(root
, &TestVariableCanonical
,
3026 "tscoll/usrchtst/TestVariableCanonical");
3027 addTest(root
, &TestOverlapCanonical
,
3028 "tscoll/usrchtst/TestOverlapCanonical");
3029 addTest(root
, &TestCollatorCanonical
,
3030 "tscoll/usrchtst/TestCollatorCanonical");
3031 addTest(root
, &TestPatternCanonical
,
3032 "tscoll/usrchtst/TestPatternCanonical");
3033 addTest(root
, &TestTextCanonical
, "tscoll/usrchtst/TestTextCanonical");
3034 addTest(root
, &TestCompositeBoundariesCanonical
,
3035 "tscoll/usrchtst/TestCompositeBoundariesCanonical");
3036 addTest(root
, &TestGetSetOffsetCanonical
,
3037 "tscoll/usrchtst/TestGetSetOffsetCanonical");
3038 addTest(root
, &TestSupplementaryCanonical
,
3039 "tscoll/usrchtst/TestSupplementaryCanonical");
3040 addTest(root
, &TestContractionCanonical
,
3041 "tscoll/usrchtst/TestContractionCanonical");
3042 addTest(root
, &TestEnd
, "tscoll/usrchtst/TestEnd");
3043 addTest(root
, &TestNumeric
, "tscoll/usrchtst/TestNumeric");
3044 addTest(root
, &TestDiacriticMatch
, "tscoll/usrchtst/TestDiacriticMatch");
3045 addTest(root
, &TestForwardBackward
, "tscoll/usrchtst/TestForwardBackward");
3046 addTest(root
, &TestSearchForNull
, "tscoll/usrchtst/TestSearchForNull");
3047 addTest(root
, &TestStrengthIdentical
, "tscoll/usrchtst/TestStrengthIdentical");
3048 addTest(root
, &TestUsingSearchCollator
, "tscoll/usrchtst/TestUsingSearchCollator");
3049 addTest(root
, &TestPCEBuffer_100df
, "tscoll/usrchtst/TestPCEBuffer/1_00df");
3050 addTest(root
, &TestPCEBuffer_2surr
, "tscoll/usrchtst/TestPCEBuffer/2_dfff");
3051 addTest(root
, &TestMatchFollowedByIgnorables
, "tscoll/usrchtst/TestMatchFollowedByIgnorables");
3054 #endif /* #if !UCONFIG_NO_COLLATION */