]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/usrchtst.c
ICU-62107.0.1.tar.gz
[apple/icu.git] / icuSources / test / cintltst / usrchtst.c
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * Copyright (c) 2001-2016 International Business Machines
5 * Corporation and others. All Rights Reserved.
6 ********************************************************************
7 * File usrchtst.c
8 * Modification History:
9 * Name Date Description
10 * synwee July 19 2001 creation
11 ********************************************************************/
12
13 #include "unicode/utypes.h"
14
15 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILE_IO
16
17 #include "unicode/usearch.h"
18 #include "unicode/ustring.h"
19 #include "ccolltst.h"
20 #include "cmemory.h"
21 #include <stdio.h>
22 #include "usrchdat.c"
23 #include "unicode/ubrk.h"
24 #include <assert.h>
25
26 static UBool TOCLOSE_ = TRUE;
27 static UCollator *EN_US_;
28 static UCollator *FR_FR_;
29 static UCollator *DE_;
30 static UCollator *ES_;
31
32 /**
33 * CHECK_BREAK(char *brk)
34 * Test if a break iterator is passed in AND break iteration is disabled.
35 * Skip the test if so.
36 * CHECK_BREAK_BOOL(char *brk)
37 * Same as above, but returns 'TRUE' as a passing result
38 */
39
40 #if !UCONFIG_NO_BREAK_ITERATION
41 static UBreakIterator *EN_WORDBREAKER_;
42 static UBreakIterator *EN_CHARACTERBREAKER_;
43 #define CHECK_BREAK(x)
44 #define CHECK_BREAK_BOOL(x)
45 #else
46 #define CHECK_BREAK(x) if(x) { log_info("Skipping test on %s:%d because UCONFIG_NO_BREAK_ITERATION is on\n", __FILE__, __LINE__); return; }
47 #define CHECK_BREAK_BOOL(x) if(x) { log_info("Skipping test on %s:%d because UCONFIG_NO_BREAK_ITERATION is on\n", __FILE__, __LINE__); return TRUE; }
48 #endif
49
50 /**
51 * Opening all static collators and break iterators
52 */
53 static void open(UErrorCode* status)
54 {
55 if (TOCLOSE_) {
56 UChar rules[1024];
57 int32_t rulelength = 0;
58 *status = U_ZERO_ERROR;
59
60 EN_US_ = ucol_open("en_US", status);
61 if(U_FAILURE(*status)) {
62 log_err_status(*status, "Error opening collator\n");
63 return;
64 }
65 FR_FR_ = ucol_open("fr_FR", status);
66 DE_ = ucol_open("de_DE", status);
67 ES_ = ucol_open("es_ES", status);
68
69 u_strcpy(rules, ucol_getRules(DE_, &rulelength));
70 u_unescape(EXTRACOLLATIONRULE, rules + rulelength, 1024 - rulelength);
71
72 ucol_close(DE_);
73
74 DE_ = ucol_openRules(rules, u_strlen(rules), UCOL_ON, UCOL_TERTIARY,
75 (UParseError *)NULL, status);
76 u_strcpy(rules, ucol_getRules(ES_, &rulelength));
77 u_unescape(EXTRACOLLATIONRULE, rules + rulelength, 1024 - rulelength);
78
79 ucol_close(ES_);
80 ES_ = ucol_openRules(rules, u_strlen(rules), UCOL_ON, UCOL_TERTIARY,
81 NULL, status);
82 #if !UCONFIG_NO_BREAK_ITERATION
83 EN_WORDBREAKER_ = ubrk_open(UBRK_WORD, "en_US", NULL, 0, status);
84 EN_CHARACTERBREAKER_ = ubrk_open(UBRK_CHARACTER, "en_US", NULL, 0,
85 status);
86 #endif
87 TOCLOSE_ = TRUE;
88 }
89 }
90
91 /**
92 * Start opening all static collators and break iterators
93 */
94 static void TestStart(void)
95 {
96 UErrorCode status = U_ZERO_ERROR;
97 open(&status);
98 if (U_FAILURE(status)) {
99 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
100 return;
101 }
102 TOCLOSE_ = FALSE;
103 }
104
105 /**
106 * Closing all static collators and break iterators
107 */
108 static void close(void)
109 {
110 if (TOCLOSE_) {
111 ucol_close(EN_US_);
112 ucol_close(FR_FR_);
113 ucol_close(DE_);
114 ucol_close(ES_);
115 #if !UCONFIG_NO_BREAK_ITERATION
116 ubrk_close(EN_WORDBREAKER_);
117 ubrk_close(EN_CHARACTERBREAKER_);
118 #endif
119 }
120 TOCLOSE_ = FALSE;
121 }
122
123 /**
124 * End closing all static collators and break iterators
125 */
126 static void TestEnd(void)
127 {
128 TOCLOSE_ = TRUE;
129 close();
130 TOCLOSE_ = TRUE;
131 }
132
133 /**
134 * output UChar strings for printing.
135 */
136 static char *toCharString(const UChar* unichars)
137 {
138 static char result[1024];
139 char *temp = result;
140 int count = 0;
141 int length = u_strlen(unichars);
142
143 for (; count < length; count ++) {
144 UChar ch = unichars[count];
145 if (ch >= 0x20 && ch <= 0x7e) {
146 *temp ++ = (char)ch;
147 }
148 else {
149 sprintf(temp, "\\u%04x", ch);
150 temp += 6; /* \uxxxx */
151 }
152 }
153 *temp = 0;
154
155 return result;
156 }
157
158 /**
159 * Getting the collator
160 */
161 static UCollator *getCollator(const char *collator)
162 {
163 if (collator == NULL) {
164 return EN_US_;
165 }
166 if (strcmp(collator, "fr") == 0) {
167 return FR_FR_;
168 }
169 else if (strcmp(collator, "de") == 0) {
170 return DE_;
171 }
172 else if (strcmp(collator, "es") == 0) {
173 return ES_;
174 }
175 else {
176 return EN_US_;
177 }
178 }
179
180 /**
181 * Getting the breakiterator
182 */
183 static UBreakIterator *getBreakIterator(const char *breaker)
184 {
185 if (breaker == NULL) {
186 return NULL;
187 }
188 #if !UCONFIG_NO_BREAK_ITERATION
189 if (strcmp(breaker, "wordbreaker") == 0) {
190 return EN_WORDBREAKER_;
191 }
192 else {
193 return EN_CHARACTERBREAKER_;
194 }
195 #else
196 return NULL;
197 #endif
198 }
199
200 static void TestOpenClose(void)
201 {
202 UErrorCode status = U_ZERO_ERROR;
203 UStringSearch *result;
204 const UChar pattern[] = {0x61, 0x62, 0x63, 0x64, 0x65, 0x66};
205 const UChar text[] = {0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67};
206 #if !UCONFIG_NO_BREAK_ITERATION
207 UBreakIterator *breakiter = ubrk_open(UBRK_WORD, "en_US",
208 text, 6, &status);
209 #endif
210 /* testing null arguments */
211 result = usearch_open(NULL, 0, NULL, 0, NULL, NULL, &status);
212 if (U_SUCCESS(status) || result != NULL) {
213 log_err("Error: NULL arguments should produce an error and a NULL result\n");
214 }
215 status = U_ZERO_ERROR;
216 result = usearch_openFromCollator(NULL, 0, NULL, 0, NULL, NULL, &status);
217 if (U_SUCCESS(status) || result != NULL) {
218 log_err("Error: NULL arguments should produce an error and a NULL result\n");
219 }
220
221 status = U_ZERO_ERROR;
222 result = usearch_open(pattern, 3, NULL, 0, NULL, NULL, &status);
223 if (U_SUCCESS(status) || result != NULL) {
224 log_err("Error: NULL arguments should produce an error and a NULL result\n");
225 }
226 status = U_ZERO_ERROR;
227 result = usearch_openFromCollator(pattern, 3, NULL, 0, NULL, NULL,
228 &status);
229 if (U_SUCCESS(status) || result != NULL) {
230 log_err("Error: NULL arguments should produce an error and a NULL result\n");
231 }
232
233 status = U_ZERO_ERROR;
234 result = usearch_open(pattern, 3, text, 6, NULL, NULL, &status);
235 if (U_SUCCESS(status) || result != NULL) {
236 log_err("Error: NULL arguments should produce an error and a NULL result\n");
237 }
238 status = U_ZERO_ERROR;
239 result = usearch_openFromCollator(pattern, 3, text, 6, NULL, NULL,
240 &status);
241 if (U_SUCCESS(status) || result != NULL) {
242 log_err("Error: NULL arguments should produce an error and a NULL result\n");
243 }
244
245 status = U_ZERO_ERROR;
246 result = usearch_open(pattern, 3, text, 6, "en_US", NULL, &status);
247 if (U_FAILURE(status) || result == NULL) {
248 log_err_status(status, "Error: NULL break iterator is valid for opening search\n");
249 }
250 else {
251 usearch_close(result);
252 }
253 open(&status);
254 if (U_FAILURE(status)) {
255 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
256 return;
257 }
258 status = U_ZERO_ERROR;
259 result = usearch_openFromCollator(pattern, 3, text, 6, EN_US_, NULL,
260 &status);
261 if (U_FAILURE(status) || result == NULL) {
262 if (EN_US_ == NULL) {
263 log_data_err("Opening collator failed.\n");
264 } else {
265 log_err("Error: NULL break iterator is valid for opening search\n");
266 }
267 }
268 else {
269 usearch_close(result);
270 }
271
272
273 status = U_ZERO_ERROR;
274 #if !UCONFIG_NO_BREAK_ITERATION
275
276 result = usearch_open(pattern, 3, text, 6, "en_US", breakiter, &status);
277 if (U_FAILURE(status) || result == NULL) {
278 log_err_status(status, "Error: Break iterator is valid for opening search\n");
279 }
280 else {
281 usearch_close(result);
282 }
283 status = U_ZERO_ERROR;
284 result = usearch_openFromCollator(pattern, 3, text, 6, EN_US_, breakiter,
285 &status);
286 if (U_FAILURE(status) || result == NULL) {
287 if (EN_US_ == NULL) {
288 log_data_err("Opening collator failed.\n");
289 } else {
290 log_err("Error: Break iterator is valid for opening search\n");
291 }
292 }
293 else {
294 usearch_close(result);
295 }
296 ubrk_close(breakiter);
297 #endif
298 close();
299 }
300
301 static void TestInitialization(void)
302 {
303 UErrorCode status = U_ZERO_ERROR;
304 UChar pattern[512];
305 const UChar text[] = {0x61, 0x62, 0x63, 0x64, 0x65, 0x66};
306 int32_t i = 0;
307 UStringSearch *result;
308
309 /* simple test on the pattern ce construction */
310 pattern[0] = 0x41;
311 pattern[1] = 0x42;
312 open(&status);
313 if (U_FAILURE(status)) {
314 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
315 return;
316 }
317 result = usearch_openFromCollator(pattern, 2, text, 3, EN_US_, NULL,
318 &status);
319 if (U_FAILURE(status)) {
320 log_err("Error opening search %s\n", u_errorName(status));
321 }
322 usearch_close(result);
323
324 /* testing if an extremely large pattern will fail the initialization */
325 for(i = 0; i < 512; i++) {
326 pattern[i] = 0x41;
327 }
328 /*uprv_memset(pattern, 0x41, 512);*/
329 result = usearch_openFromCollator(pattern, 512, text, 3, EN_US_, NULL,
330 &status);
331 if (U_FAILURE(status)) {
332 log_err("Error opening search %s\n", u_errorName(status));
333 }
334 usearch_close(result);
335 close();
336 }
337
338 static UBool assertEqualWithUStringSearch( UStringSearch *strsrch,
339 const SearchData search)
340 {
341 int count = 0;
342 UErrorCode status = U_ZERO_ERROR;
343 int32_t matchindex = search.offset[count];
344 int32_t textlength;
345 UChar matchtext[128];
346 int32_t matchlength;
347 int32_t nextStart;
348 UBool isOverlap;
349
350 usearch_setAttribute(strsrch, USEARCH_ELEMENT_COMPARISON, search.elemCompare, &status);
351 if (U_FAILURE(status)) {
352 log_err("Error setting USEARCH_ELEMENT_COMPARISON attribute %s\n", u_errorName(status));
353 return FALSE;
354 }
355
356 if (usearch_getMatchedStart(strsrch) != USEARCH_DONE ||
357 usearch_getMatchedLength(strsrch) != 0) {
358 log_err("Error with the initialization of match start and length\n");
359 }
360 /* start of next matches */
361 while (U_SUCCESS(status) && matchindex >= 0) {
362 matchlength = search.size[count];
363 usearch_next(strsrch, &status);
364 if (matchindex != usearch_getMatchedStart(strsrch) ||
365 matchlength != (uint32_t)usearch_getMatchedLength(strsrch)) {
366 char *str = toCharString(usearch_getText(strsrch, &textlength));
367 log_err("Text: %s\n", str);
368 str = toCharString(usearch_getPattern(strsrch, &textlength));
369 log_err("Pattern: %s\n", str);
370 log_err("Error next match found at idx %d (len:%d); expected %d (len:%d)\n",
371 usearch_getMatchedStart(strsrch), usearch_getMatchedLength(strsrch),
372 matchindex, matchlength);
373 return FALSE;
374 }
375 count ++;
376
377 if (usearch_getMatchedText(strsrch, matchtext, 128, &status) !=
378 (int32_t) matchlength || U_FAILURE(status) ||
379 memcmp(matchtext,
380 usearch_getText(strsrch, &textlength) + matchindex,
381 matchlength * sizeof(UChar)) != 0) {
382 log_err("Error getting next matched text\n");
383 }
384
385 matchindex = search.offset[count];
386 }
387 usearch_next(strsrch, &status);
388 if (usearch_getMatchedStart(strsrch) != USEARCH_DONE ||
389 usearch_getMatchedLength(strsrch) != 0) {
390 char *str = toCharString(usearch_getText(strsrch, &textlength));
391 log_err("Text: %s\n", str);
392 str = toCharString(usearch_getPattern(strsrch, &textlength));
393 log_err("Pattern: %s\n", str);
394 log_err("Error next match found at %d (len:%d); expected <NO MATCH>\n",
395 usearch_getMatchedStart(strsrch),
396 usearch_getMatchedLength(strsrch));
397 return FALSE;
398 }
399 /* start of previous matches */
400 count = count == 0 ? 0 : count - 1;
401 matchindex = search.offset[count];
402
403 while (U_SUCCESS(status) && matchindex >= 0) {
404 matchlength = search.size[count];
405 usearch_previous(strsrch, &status);
406 if (matchindex != usearch_getMatchedStart(strsrch) ||
407 matchlength != (uint32_t)usearch_getMatchedLength(strsrch)) {
408 char *str = toCharString(usearch_getText(strsrch, &textlength));
409 log_err("Text: %s\n", str);
410 str = toCharString(usearch_getPattern(strsrch, &textlength));
411 log_err("Pattern: %s\n", str);
412 log_err("Error previous match found at %d (len:%d); expected %d (len:%d)\n",
413 usearch_getMatchedStart(strsrch), usearch_getMatchedLength(strsrch),
414 matchindex, matchlength);
415 return FALSE;
416 }
417
418 if (usearch_getMatchedText(strsrch, matchtext, 128, &status) !=
419 (int32_t) matchlength || U_FAILURE(status) ||
420 memcmp(matchtext,
421 usearch_getText(strsrch, &textlength) + matchindex,
422 matchlength * sizeof(UChar)) != 0) {
423 log_err("Error getting previous matched text\n");
424 }
425
426 matchindex = count > 0 ? search.offset[count - 1] : -1;
427 count --;
428 }
429 usearch_previous(strsrch, &status);
430 if (usearch_getMatchedStart(strsrch) != USEARCH_DONE ||
431 usearch_getMatchedLength(strsrch) != 0) {
432 char *str = toCharString(usearch_getText(strsrch, &textlength));
433 log_err("Text: %s\n", str);
434 str = toCharString(usearch_getPattern(strsrch, &textlength));
435 log_err("Pattern: %s\n", str);
436 log_err("Error previous match found at %d (len:%d); expected <NO MATCH>\n",
437 usearch_getMatchedStart(strsrch),
438 usearch_getMatchedLength(strsrch));
439 return FALSE;
440 }
441
442
443 isOverlap = (usearch_getAttribute(strsrch, USEARCH_OVERLAP) == USEARCH_ON);
444
445 /* start of following matches */
446 count = 0;
447 matchindex = search.offset[count];
448 nextStart = 0;
449
450 while (TRUE) {
451 usearch_following(strsrch, nextStart, &status);
452
453 if (matchindex < 0) {
454 if (usearch_getMatchedStart(strsrch) != USEARCH_DONE || usearch_getMatchedLength(strsrch) != 0) {
455 char *str = toCharString(usearch_getText(strsrch, &textlength));
456 log_err("Text: %s\n", str);
457 str = toCharString(usearch_getPattern(strsrch, &textlength));
458 log_err("Pattern: %s\n", str);
459 log_err("Error following match starting at %d (overlap:%d) found at %d (len:%d); expected <NO MATCH>\n",
460 nextStart, isOverlap,
461 usearch_getMatchedStart(strsrch),
462 usearch_getMatchedLength(strsrch));
463 return FALSE;
464 }
465 /* no more matches */
466 break;
467 }
468
469 matchlength = search.size[count];
470 if (usearch_getMatchedStart(strsrch) != matchindex
471 || usearch_getMatchedLength(strsrch) != matchlength
472 || U_FAILURE(status)) {
473 char *str = toCharString(usearch_getText(strsrch, &textlength));
474 log_err("Text: %s\n", str);
475 str = toCharString(usearch_getPattern(strsrch, &textlength));
476 log_err("Pattern: %s\n", str);
477 log_err("Error following match starting at %d (overlap: %d) found at %d (len:%d); expected %d (len:%d)\n",
478 nextStart, isOverlap,
479 usearch_getMatchedStart(strsrch), usearch_getMatchedLength(strsrch),
480 matchindex, matchlength);
481 return FALSE;
482 }
483
484 if (isOverlap || usearch_getMatchedLength(strsrch) == 0) {
485 nextStart = usearch_getMatchedStart(strsrch) + 1;
486 } else {
487 nextStart = usearch_getMatchedStart(strsrch) + usearch_getMatchedLength(strsrch);
488 }
489
490 count++;
491 matchindex = search.offset[count];
492 }
493
494 /* start of preceding matches */
495 count = -1; /* last non-negative offset index, could be -1 if no match */
496 while (search.offset[count + 1] >= 0) {
497 count++;
498 }
499 usearch_getText(strsrch, &nextStart);
500
501 while (TRUE) {
502 usearch_preceding(strsrch, nextStart, &status);
503
504 if (count < 0) {
505 if (usearch_getMatchedStart(strsrch) != USEARCH_DONE || usearch_getMatchedLength(strsrch) != 0) {
506 char *str = toCharString(usearch_getText(strsrch, &textlength));
507 log_err("Text: %s\n", str);
508 str = toCharString(usearch_getPattern(strsrch, &textlength));
509 log_err("Pattern: %s\n", str);
510 log_err("Error preceding match starting at %d (overlap: %d) found at %d (len:%d); expected <NO MATCH>\n",
511 nextStart, isOverlap,
512 usearch_getMatchedStart(strsrch),
513 usearch_getMatchedLength(strsrch));
514 return FALSE;
515 }
516 /* no more matches */
517 break;
518 }
519
520 matchindex = search.offset[count];
521 matchlength = search.size[count];
522 if (usearch_getMatchedStart(strsrch) != matchindex
523 || usearch_getMatchedLength(strsrch) != matchlength
524 || U_FAILURE(status)) {
525 char *str = toCharString(usearch_getText(strsrch, &textlength));
526 log_err("Text: %s\n", str);
527 str = toCharString(usearch_getPattern(strsrch, &textlength));
528 log_err("Pattern: %s\n", str);
529 log_err("Error preceding match starting at %d (overlap: %d) found at %d (len:%d); expected %d (len:%d)\n",
530 nextStart, isOverlap,
531 usearch_getMatchedStart(strsrch), usearch_getMatchedLength(strsrch),
532 matchindex, matchlength);
533 return FALSE;
534 }
535
536 nextStart = matchindex;
537 count--;
538 }
539
540 usearch_setAttribute(strsrch, USEARCH_ELEMENT_COMPARISON, USEARCH_STANDARD_ELEMENT_COMPARISON, &status);
541 return TRUE;
542 }
543
544 static UBool assertEqual(const SearchData search)
545 {
546 UErrorCode status = U_ZERO_ERROR;
547 UChar pattern[32];
548 UChar text[128];
549 UCollator *collator = getCollator(search.collator);
550 UBreakIterator *breaker = getBreakIterator(search.breaker);
551 UStringSearch *strsrch;
552
553 CHECK_BREAK_BOOL(search.breaker);
554
555 u_unescape(search.text, text, 128);
556 u_unescape(search.pattern, pattern, 32);
557 ucol_setStrength(collator, search.strength);
558 strsrch = usearch_openFromCollator(pattern, -1, text, -1, collator,
559 breaker, &status);
560 if (U_FAILURE(status)) {
561 log_err("Error opening string search %s\n", u_errorName(status));
562 return FALSE;
563 }
564
565 if (!assertEqualWithUStringSearch(strsrch, search)) {
566 ucol_setStrength(collator, UCOL_TERTIARY);
567 usearch_close(strsrch);
568 return FALSE;
569 }
570 ucol_setStrength(collator, UCOL_TERTIARY);
571 usearch_close(strsrch);
572 return TRUE;
573 }
574
575 static UBool assertCanonicalEqual(const SearchData search)
576 {
577 UErrorCode status = U_ZERO_ERROR;
578 UChar pattern[32];
579 UChar text[128];
580 UCollator *collator = getCollator(search.collator);
581 UBreakIterator *breaker = getBreakIterator(search.breaker);
582 UStringSearch *strsrch;
583 UBool result = TRUE;
584
585 CHECK_BREAK_BOOL(search.breaker);
586 u_unescape(search.text, text, 128);
587 u_unescape(search.pattern, pattern, 32);
588 ucol_setStrength(collator, search.strength);
589 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
590 strsrch = usearch_openFromCollator(pattern, -1, text, -1, collator,
591 breaker, &status);
592 usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON,
593 &status);
594 if (U_FAILURE(status)) {
595 log_err("Error opening string search %s\n", u_errorName(status));
596 result = FALSE;
597 goto bail;
598 }
599
600 if (!assertEqualWithUStringSearch(strsrch, search)) {
601 ucol_setStrength(collator, UCOL_TERTIARY);
602 usearch_close(strsrch);
603 result = FALSE;
604 goto bail;
605 }
606
607 bail:
608 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
609 ucol_setStrength(collator, UCOL_TERTIARY);
610 usearch_close(strsrch);
611 return result;
612 }
613
614 static UBool assertEqualWithAttribute(const SearchData search,
615 USearchAttributeValue canonical,
616 USearchAttributeValue overlap)
617 {
618 UErrorCode status = U_ZERO_ERROR;
619 UChar pattern[32];
620 UChar text[128];
621 UCollator *collator = getCollator(search.collator);
622 UBreakIterator *breaker = getBreakIterator(search.breaker);
623 UStringSearch *strsrch;
624
625 CHECK_BREAK_BOOL(search.breaker);
626 u_unescape(search.text, text, 128);
627 u_unescape(search.pattern, pattern, 32);
628 ucol_setStrength(collator, search.strength);
629 strsrch = usearch_openFromCollator(pattern, -1, text, -1, collator,
630 breaker, &status);
631 usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, canonical,
632 &status);
633 usearch_setAttribute(strsrch, USEARCH_OVERLAP, overlap, &status);
634
635 if (U_FAILURE(status)) {
636 log_err("Error opening string search %s\n", u_errorName(status));
637 return FALSE;
638 }
639
640 if (!assertEqualWithUStringSearch(strsrch, search)) {
641 ucol_setStrength(collator, UCOL_TERTIARY);
642 usearch_close(strsrch);
643 return FALSE;
644 }
645 ucol_setStrength(collator, UCOL_TERTIARY);
646 usearch_close(strsrch);
647 return TRUE;
648 }
649
650 static void TestBasic(void)
651 {
652 int count = 0;
653 UErrorCode status = U_ZERO_ERROR;
654 open(&status);
655 if (U_FAILURE(status)) {
656 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
657 return;
658 }
659 while (BASIC[count].text != NULL) {
660 if (!assertEqual(BASIC[count])) {
661 log_err("Error at test number %d\n", count);
662 }
663 count ++;
664 }
665 close();
666 }
667
668 static void TestNormExact(void)
669 {
670 int count = 0;
671 UErrorCode status = U_ZERO_ERROR;
672 open(&status);
673 if (U_FAILURE(status)) {
674 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
675 return;
676 }
677 ucol_setAttribute(EN_US_, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
678 if (U_FAILURE(status)) {
679 log_err("Error setting collation normalization %s\n",
680 u_errorName(status));
681 }
682 while (BASIC[count].text != NULL) {
683 if (!assertEqual(BASIC[count])) {
684 log_err("Error at test number %d\n", count);
685 }
686 count ++;
687 }
688 count = 0;
689 while (NORMEXACT[count].text != NULL) {
690 if (!assertEqual(NORMEXACT[count])) {
691 log_err("Error at test number %d\n", count);
692 }
693 count ++;
694 }
695 ucol_setAttribute(EN_US_, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
696 count = 0;
697 while (NONNORMEXACT[count].text != NULL) {
698 if (!assertEqual(NONNORMEXACT[count])) {
699 log_err("Error at test number %d\n", count);
700 }
701 count ++;
702 }
703 close();
704 }
705
706 static void TestStrength(void)
707 {
708 int count = 0;
709 UErrorCode status = U_ZERO_ERROR;
710 open(&status);
711 if (U_FAILURE(status)) {
712 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
713 return;
714 }
715 while (STRENGTH[count].text != NULL) {
716 if (!assertEqual(STRENGTH[count])) {
717 log_err("Error at test number %d\n", count);
718 }
719 count ++;
720 }
721 close();
722 }
723
724 static void TestBreakIterator(void) {
725 UErrorCode status = U_ZERO_ERROR;
726 UStringSearch *strsrch;
727 UChar text[128];
728 UChar pattern[32];
729 int count = 0;
730
731 CHECK_BREAK("x");
732
733 #if !UCONFIG_NO_BREAK_ITERATION
734 open(&status);
735 if (U_FAILURE(status)) {
736 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
737 return;
738 }
739 if (usearch_getBreakIterator(NULL) != NULL) {
740 log_err("Expected NULL breakiterator from NULL string search\n");
741 }
742 u_unescape(BREAKITERATOREXACT[0].text, text, 128);
743 u_unescape(BREAKITERATOREXACT[0].pattern, pattern, 32);
744 strsrch = usearch_openFromCollator(pattern, -1, text, -1, EN_US_, NULL,
745 &status);
746 if (U_FAILURE(status)) {
747 log_err("Error opening string search %s\n", u_errorName(status));
748 goto ENDTESTBREAKITERATOR;
749 }
750
751 usearch_setBreakIterator(strsrch, NULL, &status);
752 if (U_FAILURE(status) || usearch_getBreakIterator(strsrch) != NULL) {
753 log_err("Error usearch_getBreakIterator returned wrong object");
754 goto ENDTESTBREAKITERATOR;
755 }
756
757 usearch_setBreakIterator(strsrch, EN_CHARACTERBREAKER_, &status);
758 if (U_FAILURE(status) ||
759 usearch_getBreakIterator(strsrch) != EN_CHARACTERBREAKER_) {
760 log_err("Error usearch_getBreakIterator returned wrong object");
761 goto ENDTESTBREAKITERATOR;
762 }
763
764 usearch_setBreakIterator(strsrch, EN_WORDBREAKER_, &status);
765 if (U_FAILURE(status) ||
766 usearch_getBreakIterator(strsrch) != EN_WORDBREAKER_) {
767 log_err("Error usearch_getBreakIterator returned wrong object");
768 goto ENDTESTBREAKITERATOR;
769 }
770
771 usearch_close(strsrch);
772
773 count = 0;
774 while (count < 4) {
775 /* 0-3 test are fixed */
776 const SearchData *search = &(BREAKITERATOREXACT[count]);
777 UCollator *collator = getCollator(search->collator);
778 UBreakIterator *breaker = getBreakIterator(search->breaker);
779
780 u_unescape(search->text, text, 128);
781 u_unescape(search->pattern, pattern, 32);
782 ucol_setStrength(collator, search->strength);
783
784 strsrch = usearch_openFromCollator(pattern, -1, text, -1, collator,
785 breaker, &status);
786 if (U_FAILURE(status) ||
787 usearch_getBreakIterator(strsrch) != breaker) {
788 log_err("Error setting break iterator\n");
789 if (strsrch != NULL) {
790 usearch_close(strsrch);
791 }
792 }
793 if (!assertEqualWithUStringSearch(strsrch, *search)) {
794 ucol_setStrength(collator, UCOL_TERTIARY);
795 usearch_close(strsrch);
796 goto ENDTESTBREAKITERATOR;
797 }
798 search = &(BREAKITERATOREXACT[count + 1]);
799 breaker = getBreakIterator(search->breaker);
800 usearch_setBreakIterator(strsrch, breaker, &status);
801 if (U_FAILURE(status) || usearch_getBreakIterator(strsrch) != breaker) {
802 log_err("Error setting break iterator\n");
803 usearch_close(strsrch);
804 goto ENDTESTBREAKITERATOR;
805 }
806 usearch_reset(strsrch);
807 if (!assertEqualWithUStringSearch(strsrch, *search)) {
808 log_err("Error at test number %d\n", count);
809 usearch_close(strsrch);
810 goto ENDTESTBREAKITERATOR;
811 }
812 usearch_close(strsrch);
813 count += 2;
814 }
815 count = 0;
816 while (BREAKITERATOREXACT[count].text != NULL) {
817 if (!assertEqual(BREAKITERATOREXACT[count])) {
818 log_err("Error at test number %d\n", count);
819 goto ENDTESTBREAKITERATOR;
820 }
821 count ++;
822 }
823
824 ENDTESTBREAKITERATOR:
825 close();
826 #endif
827 }
828
829 static void TestVariable(void)
830 {
831 int count = 0;
832 UErrorCode status = U_ZERO_ERROR;
833 open(&status);
834 if (U_FAILURE(status)) {
835 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
836 return;
837 }
838 ucol_setAttribute(EN_US_, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
839 if (U_FAILURE(status)) {
840 log_err("Error setting collation alternate attribute %s\n",
841 u_errorName(status));
842 }
843 while (VARIABLE[count].text != NULL) {
844 log_verbose("variable %d\n", count);
845 if (!assertEqual(VARIABLE[count])) {
846 log_err("Error at test number %d\n", count);
847 }
848 count ++;
849 }
850 ucol_setAttribute(EN_US_, UCOL_ALTERNATE_HANDLING,
851 UCOL_NON_IGNORABLE, &status);
852 close();
853 }
854
855 static void TestOverlap(void)
856 {
857 int count = 0;
858 UErrorCode status = U_ZERO_ERROR;
859 open(&status);
860 if (U_FAILURE(status)) {
861 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
862 return;
863 }
864 while (OVERLAP[count].text != NULL) {
865 if (!assertEqualWithAttribute(OVERLAP[count], USEARCH_OFF,
866 USEARCH_ON)) {
867 log_err("Error at overlap test number %d\n", count);
868 }
869 count ++;
870 }
871 count = 0;
872 while (NONOVERLAP[count].text != NULL) {
873 if (!assertEqual(NONOVERLAP[count])) {
874 log_err("Error at non overlap test number %d\n", count);
875 }
876 count ++;
877 }
878
879 count = 0;
880 while (count < 1) {
881 UChar pattern[32];
882 UChar text[128];
883 const SearchData *search = &(OVERLAP[count]);
884 UCollator *collator = getCollator(search->collator);
885 UStringSearch *strsrch;
886 status = U_ZERO_ERROR;
887
888 u_unescape(search->text, text, 128);
889 u_unescape(search->pattern, pattern, 32);
890 strsrch = usearch_openFromCollator(pattern, -1, text, -1, collator,
891 NULL, &status);
892 if(status == U_FILE_ACCESS_ERROR) {
893 log_data_err("Is your data around?\n");
894 return;
895 } else if(U_FAILURE(status)) {
896 log_err("Error opening searcher\n");
897 return;
898 }
899 usearch_setAttribute(strsrch, USEARCH_OVERLAP, USEARCH_ON, &status);
900 if (U_FAILURE(status) ||
901 usearch_getAttribute(strsrch, USEARCH_OVERLAP) != USEARCH_ON) {
902 log_err("Error setting overlap option\n");
903 }
904 if (!assertEqualWithUStringSearch(strsrch, *search)) {
905 usearch_close(strsrch);
906 return;
907 }
908 search = &(NONOVERLAP[count]);
909 usearch_setAttribute(strsrch, USEARCH_OVERLAP, USEARCH_OFF, &status);
910 if (U_FAILURE(status) ||
911 usearch_getAttribute(strsrch, USEARCH_OVERLAP) != USEARCH_OFF) {
912 log_err("Error setting overlap option\n");
913 }
914 usearch_reset(strsrch);
915 if (!assertEqualWithUStringSearch(strsrch, *search)) {
916 usearch_close(strsrch);
917 log_err("Error at test number %d\n", count);
918 }
919
920 count ++;
921 usearch_close(strsrch);
922 }
923 close();
924 }
925
926 static void TestCollator(void)
927 {
928 /* test collator that thinks "o" and "p" are the same thing */
929 UChar rules[32];
930 UCollator *tailored = NULL;
931 UErrorCode status = U_ZERO_ERROR;
932 UChar pattern[32];
933 UChar text[128];
934 UStringSearch *strsrch;
935
936 text[0] = 0x41;
937 text[1] = 0x42;
938 text[2] = 0x43;
939 text[3] = 0x44;
940 text[4] = 0x45;
941 pattern[0] = 0x62;
942 pattern[1] = 0x63;
943 strsrch = usearch_open(pattern, 2, text, 5, "en_US", NULL, &status);
944 if(status == U_FILE_ACCESS_ERROR) {
945 log_data_err("Is your data around?\n");
946 return;
947 } else if(U_FAILURE(status)) {
948 log_err("Error opening searcher\n");
949 return;
950 }
951 tailored = usearch_getCollator(strsrch);
952 if (usearch_next(strsrch, &status) != -1) {
953 log_err("Error: Found case insensitive match, when we shouldn't\n");
954 }
955 ucol_setStrength(tailored, UCOL_PRIMARY);
956 usearch_reset(strsrch);
957 if (usearch_next(strsrch, &status) != 1) {
958 log_err("Error: Found case insensitive match not found\n");
959 }
960 usearch_close(strsrch);
961
962 open(&status);
963
964 if (usearch_getCollator(NULL) != NULL) {
965 log_err("Expected NULL collator from NULL string search\n");
966 }
967 u_unescape(COLLATOR[0].text, text, 128);
968 u_unescape(COLLATOR[0].pattern, pattern, 32);
969
970 strsrch = usearch_openFromCollator(pattern, -1, text, -1, EN_US_,
971 NULL, &status);
972 if (U_FAILURE(status)) {
973 log_err("Error opening string search %s\n", u_errorName(status));
974 }
975 if (!assertEqualWithUStringSearch(strsrch, COLLATOR[0])) {
976 goto ENDTESTCOLLATOR;
977 }
978
979 u_unescape(TESTCOLLATORRULE, rules, 32);
980 tailored = ucol_openRules(rules, -1, UCOL_ON, COLLATOR[1].strength,
981 NULL, &status);
982 if (U_FAILURE(status)) {
983 log_err("Error opening rule based collator %s\n", u_errorName(status));
984 }
985
986 usearch_setCollator(strsrch, tailored, &status);
987 if (U_FAILURE(status) || usearch_getCollator(strsrch) != tailored) {
988 log_err("Error setting rule based collator\n");
989 }
990 usearch_reset(strsrch);
991 if (!assertEqualWithUStringSearch(strsrch, COLLATOR[1])) {
992 goto ENDTESTCOLLATOR;
993 }
994
995 usearch_setCollator(strsrch, EN_US_, &status);
996 usearch_reset(strsrch);
997 if (U_FAILURE(status) || usearch_getCollator(strsrch) != EN_US_) {
998 log_err("Error setting rule based collator\n");
999 }
1000 if (!assertEqualWithUStringSearch(strsrch, COLLATOR[0])) {
1001 goto ENDTESTCOLLATOR;
1002 }
1003
1004 ENDTESTCOLLATOR:
1005 usearch_close(strsrch);
1006 if (tailored != NULL) {
1007 ucol_close(tailored);
1008 }
1009 close();
1010 }
1011
1012 static void TestPattern(void)
1013 {
1014 UStringSearch *strsrch;
1015 UChar pattern[32];
1016 UChar bigpattern[512];
1017 UChar text[128];
1018 const UChar *temp;
1019 int32_t templength;
1020 UErrorCode status = U_ZERO_ERROR;
1021
1022 open(&status);
1023 if (U_FAILURE(status)) {
1024 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
1025 return;
1026 }
1027 if (usearch_getPattern(NULL, &templength) != NULL) {
1028 log_err("Error NULL string search expected returning NULL pattern\n");
1029 }
1030 usearch_setPattern(NULL, pattern, 3, &status);
1031 if (U_SUCCESS(status)) {
1032 log_err("Error expected setting pattern in NULL strings search\n");
1033 }
1034 status = U_ZERO_ERROR;
1035 u_unescape(PATTERN[0].text, text, 128);
1036 u_unescape(PATTERN[0].pattern, pattern, 32);
1037
1038 ucol_setStrength(EN_US_, PATTERN[0].strength);
1039 strsrch = usearch_openFromCollator(pattern, -1, text, -1, EN_US_,
1040 NULL, &status);
1041 if(status == U_FILE_ACCESS_ERROR) {
1042 log_data_err("Is your data around?\n");
1043 return;
1044 } else if(U_FAILURE(status)) {
1045 log_err("Error opening searcher\n");
1046 return;
1047 }
1048
1049 status = U_ZERO_ERROR;
1050 usearch_setPattern(strsrch, NULL, 3, &status);
1051 if (U_SUCCESS(status)) {
1052 log_err("Error expected setting NULL pattern in strings search\n");
1053 }
1054 status = U_ZERO_ERROR;
1055 usearch_setPattern(strsrch, pattern, 0, &status);
1056 if (U_SUCCESS(status)) {
1057 log_err("Error expected setting pattern with length 0 in strings search\n");
1058 }
1059 status = U_ZERO_ERROR;
1060 if (U_FAILURE(status)) {
1061 log_err("Error opening string search %s\n", u_errorName(status));
1062 goto ENDTESTPATTERN;
1063 }
1064 temp = usearch_getPattern(strsrch, &templength);
1065 if (u_strcmp(pattern, temp) != 0) {
1066 log_err("Error setting pattern\n");
1067 }
1068 if (!assertEqualWithUStringSearch(strsrch, PATTERN[0])) {
1069 goto ENDTESTPATTERN;
1070 }
1071
1072 u_unescape(PATTERN[1].pattern, pattern, 32);
1073 usearch_setPattern(strsrch, pattern, -1, &status);
1074 temp = usearch_getPattern(strsrch, &templength);
1075 if (u_strcmp(pattern, temp) != 0) {
1076 log_err("Error setting pattern\n");
1077 goto ENDTESTPATTERN;
1078 }
1079 usearch_reset(strsrch);
1080 if (U_FAILURE(status)) {
1081 log_err("Error setting pattern %s\n", u_errorName(status));
1082 }
1083 if (!assertEqualWithUStringSearch(strsrch, PATTERN[1])) {
1084 goto ENDTESTPATTERN;
1085 }
1086
1087 u_unescape(PATTERN[0].pattern, pattern, 32);
1088 usearch_setPattern(strsrch, pattern, -1, &status);
1089 temp = usearch_getPattern(strsrch, &templength);
1090 if (u_strcmp(pattern, temp) != 0) {
1091 log_err("Error setting pattern\n");
1092 goto ENDTESTPATTERN;
1093 }
1094 usearch_reset(strsrch);
1095 if (U_FAILURE(status)) {
1096 log_err("Error setting pattern %s\n", u_errorName(status));
1097 }
1098 if (!assertEqualWithUStringSearch(strsrch, PATTERN[0])) {
1099 goto ENDTESTPATTERN;
1100 }
1101 /* enormous pattern size to see if this crashes */
1102 for (templength = 0; templength != 512; templength ++) {
1103 bigpattern[templength] = 0x61;
1104 }
1105 bigpattern[511] = 0;
1106 usearch_setPattern(strsrch, bigpattern, -1, &status);
1107 if (U_FAILURE(status)) {
1108 log_err("Error setting pattern with size 512, %s \n",
1109 u_errorName(status));
1110 }
1111 ENDTESTPATTERN:
1112 ucol_setStrength(EN_US_, UCOL_TERTIARY);
1113 if (strsrch != NULL) {
1114 usearch_close(strsrch);
1115 }
1116 close();
1117 }
1118
1119 static void TestText(void)
1120 {
1121 UStringSearch *strsrch;
1122 UChar pattern[32];
1123 UChar text[128];
1124 const UChar *temp;
1125 int32_t templength;
1126 UErrorCode status = U_ZERO_ERROR;
1127
1128 u_unescape(TEXT[0].text, text, 128);
1129 u_unescape(TEXT[0].pattern, pattern, 32);
1130
1131 open(&status);
1132 if (U_FAILURE(status)) {
1133 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
1134 return;
1135 }
1136
1137 if (usearch_getText(NULL, &templength) != NULL) {
1138 log_err("Error NULL string search should return NULL text\n");
1139 }
1140
1141 usearch_setText(NULL, text, 10, &status);
1142 if (U_SUCCESS(status)) {
1143 log_err("Error NULL string search should have an error when setting text\n");
1144 }
1145
1146 status = U_ZERO_ERROR;
1147 strsrch = usearch_openFromCollator(pattern, -1, text, -1, EN_US_,
1148 NULL, &status);
1149
1150 if (U_FAILURE(status)) {
1151 log_err("Error opening string search %s\n", u_errorName(status));
1152 goto ENDTESTPATTERN;
1153 }
1154 temp = usearch_getText(strsrch, &templength);
1155 if (u_strcmp(text, temp) != 0) {
1156 log_err("Error setting text\n");
1157 }
1158 if (!assertEqualWithUStringSearch(strsrch, TEXT[0])) {
1159 goto ENDTESTPATTERN;
1160 }
1161
1162 u_unescape(TEXT[1].text, text, 32);
1163 usearch_setText(strsrch, text, -1, &status);
1164 temp = usearch_getText(strsrch, &templength);
1165 if (u_strcmp(text, temp) != 0) {
1166 log_err("Error setting text\n");
1167 goto ENDTESTPATTERN;
1168 }
1169 if (U_FAILURE(status)) {
1170 log_err("Error setting text %s\n", u_errorName(status));
1171 }
1172 if (!assertEqualWithUStringSearch(strsrch, TEXT[1])) {
1173 goto ENDTESTPATTERN;
1174 }
1175
1176 u_unescape(TEXT[0].text, text, 32);
1177 usearch_setText(strsrch, text, -1, &status);
1178 temp = usearch_getText(strsrch, &templength);
1179 if (u_strcmp(text, temp) != 0) {
1180 log_err("Error setting text\n");
1181 goto ENDTESTPATTERN;
1182 }
1183 if (U_FAILURE(status)) {
1184 log_err("Error setting pattern %s\n", u_errorName(status));
1185 }
1186 if (!assertEqualWithUStringSearch(strsrch, TEXT[0])) {
1187 goto ENDTESTPATTERN;
1188 }
1189 ENDTESTPATTERN:
1190 if (strsrch != NULL) {
1191 usearch_close(strsrch);
1192 }
1193 close();
1194 }
1195
1196 static void TestCompositeBoundaries(void)
1197 {
1198 int count = 0;
1199 UErrorCode status = U_ZERO_ERROR;
1200 open(&status);
1201 if (U_FAILURE(status)) {
1202 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
1203 return;
1204 }
1205 while (COMPOSITEBOUNDARIES[count].text != NULL) {
1206 log_verbose("composite %d\n", count);
1207 if (!assertEqual(COMPOSITEBOUNDARIES[count])) {
1208 log_err("Error at test number %d\n", count);
1209 }
1210 count ++;
1211 }
1212 close();
1213 }
1214
1215 static void TestGetSetOffset(void)
1216 {
1217 int searchDataIndex = 0;
1218 UChar pattern[32];
1219 UChar text[128];
1220 UErrorCode status = U_ZERO_ERROR;
1221 UStringSearch *strsrch;
1222 memset(pattern, 0, 32*sizeof(UChar));
1223 memset(text, 0, 128*sizeof(UChar));
1224
1225 open(&status);
1226 if (U_FAILURE(status)) {
1227 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
1228 return;
1229 }
1230 if (usearch_getOffset(NULL) != USEARCH_DONE) {
1231 log_err("usearch_getOffset(NULL) expected USEARCH_DONE\n");
1232 }
1233 strsrch = usearch_openFromCollator(pattern, 16, text, 32, EN_US_, NULL,
1234 &status);
1235 /* testing out of bounds error */
1236 usearch_setOffset(strsrch, -1, &status);
1237 if (U_SUCCESS(status)) {
1238 log_err("Error expecting set offset error\n");
1239 }
1240 usearch_setOffset(strsrch, 128, &status);
1241 if (U_SUCCESS(status)) {
1242 log_err("Error expecting set offset error\n");
1243 }
1244 while (BASIC[searchDataIndex].text != NULL) {
1245 int count = 0;
1246 SearchData search = BASIC[searchDataIndex ++];
1247 int32_t matchindex = search.offset[count];
1248 int32_t textlength;
1249
1250 u_unescape(search.text, text, 128);
1251 u_unescape(search.pattern, pattern, 32);
1252 status = U_ZERO_ERROR;
1253 usearch_setText(strsrch, text, -1, &status);
1254 usearch_setPattern(strsrch, pattern, -1, &status);
1255 ucol_setStrength(usearch_getCollator(strsrch), search.strength);
1256 usearch_reset(strsrch);
1257 while (U_SUCCESS(status) && matchindex >= 0) {
1258 uint32_t matchlength = search.size[count];
1259 usearch_next(strsrch, &status);
1260 if (matchindex != usearch_getMatchedStart(strsrch) ||
1261 matchlength != (uint32_t)usearch_getMatchedLength(strsrch)) {
1262 char *str = toCharString(usearch_getText(strsrch,
1263 &textlength));
1264 log_err("Text: %s\n", str);
1265 str = toCharString(usearch_getPattern(strsrch, &textlength));
1266 log_err("Pattern: %s\n", str);
1267 log_err("Error match found at %d %d\n",
1268 usearch_getMatchedStart(strsrch),
1269 usearch_getMatchedLength(strsrch));
1270 return;
1271 }
1272 usearch_setOffset(strsrch, matchindex + matchlength, &status);
1273 usearch_previous(strsrch, &status);
1274 if (matchindex != usearch_getMatchedStart(strsrch) ||
1275 matchlength != (uint32_t)usearch_getMatchedLength(strsrch)) {
1276 char *str = toCharString(usearch_getText(strsrch,
1277 &textlength));
1278 log_err("Text: %s\n", str);
1279 str = toCharString(usearch_getPattern(strsrch, &textlength));
1280 log_err("Pattern: %s\n", str);
1281 log_err("Error match found at %d %d\n",
1282 usearch_getMatchedStart(strsrch),
1283 usearch_getMatchedLength(strsrch));
1284 return;
1285 }
1286 usearch_setOffset(strsrch, matchindex + matchlength, &status);
1287 matchindex = search.offset[count + 1] == -1 ? -1 :
1288 search.offset[count + 2];
1289 if (search.offset[count + 1] != -1) {
1290 usearch_setOffset(strsrch, search.offset[count + 1] + 1,
1291 &status);
1292 if (usearch_getOffset(strsrch) != search.offset[count + 1] + 1) {
1293 log_err("Error setting offset\n");
1294 return;
1295 }
1296 }
1297
1298 count += 2;
1299 }
1300 usearch_next(strsrch, &status);
1301 if (usearch_getMatchedStart(strsrch) != USEARCH_DONE) {
1302 char *str = toCharString(usearch_getText(strsrch, &textlength));
1303 log_err("Text: %s\n", str);
1304 str = toCharString(usearch_getPattern(strsrch, &textlength));
1305 log_err("Pattern: %s\n", str);
1306 log_err("Error match found at %d %d\n",
1307 usearch_getMatchedStart(strsrch),
1308 usearch_getMatchedLength(strsrch));
1309 return;
1310 }
1311 }
1312 ucol_setStrength(usearch_getCollator(strsrch), UCOL_TERTIARY);
1313 usearch_close(strsrch);
1314 close();
1315 }
1316
1317 static void TestGetSetAttribute(void)
1318 {
1319 UErrorCode status = U_ZERO_ERROR;
1320 UChar pattern[32];
1321 UChar text[128];
1322 UStringSearch *strsrch;
1323
1324 memset(pattern, 0, 32*sizeof(UChar));
1325 memset(text, 0, 128*sizeof(UChar));
1326
1327 open(&status);
1328 if (U_FAILURE(status)) {
1329 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
1330 return;
1331 }
1332 if (usearch_getAttribute(NULL, USEARCH_OVERLAP) != USEARCH_DEFAULT ||
1333 usearch_getAttribute(NULL, USEARCH_CANONICAL_MATCH) !=
1334 USEARCH_DEFAULT) {
1335 log_err(
1336 "Attributes for NULL string search should be USEARCH_DEFAULT\n");
1337 }
1338 strsrch = usearch_openFromCollator(pattern, 16, text, 32, EN_US_, NULL,
1339 &status);
1340 if (U_FAILURE(status)) {
1341 log_err("Error opening search %s\n", u_errorName(status));
1342 return;
1343 }
1344
1345 usearch_setAttribute(strsrch, USEARCH_OVERLAP, USEARCH_DEFAULT, &status);
1346 if (U_FAILURE(status) ||
1347 usearch_getAttribute(strsrch, USEARCH_OVERLAP) != USEARCH_OFF) {
1348 log_err("Error setting overlap to the default\n");
1349 }
1350 usearch_setAttribute(strsrch, USEARCH_OVERLAP, USEARCH_ON, &status);
1351 if (U_FAILURE(status) ||
1352 usearch_getAttribute(strsrch, USEARCH_OVERLAP) != USEARCH_ON) {
1353 log_err("Error setting overlap true\n");
1354 }
1355 usearch_setAttribute(strsrch, USEARCH_OVERLAP, USEARCH_OFF, &status);
1356 if (U_FAILURE(status) ||
1357 usearch_getAttribute(strsrch, USEARCH_OVERLAP) != USEARCH_OFF) {
1358 log_err("Error setting overlap false\n");
1359 }
1360 usearch_setAttribute(strsrch, USEARCH_OVERLAP,
1361 USEARCH_ATTRIBUTE_VALUE_COUNT, &status);
1362 if (U_SUCCESS(status)) {
1363 log_err("Error setting overlap to illegal value\n");
1364 }
1365 status = U_ZERO_ERROR;
1366 usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_DEFAULT,
1367 &status);
1368 if (U_FAILURE(status) ||
1369 usearch_getAttribute(strsrch, USEARCH_CANONICAL_MATCH) !=
1370 USEARCH_OFF) {
1371 log_err("Error setting canonical match to the default\n");
1372 }
1373 usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON,
1374 &status);
1375 if (U_FAILURE(status) ||
1376 usearch_getAttribute(strsrch, USEARCH_CANONICAL_MATCH) !=
1377 USEARCH_ON) {
1378 log_err("Error setting canonical match true\n");
1379 }
1380 usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_OFF,
1381 &status);
1382 if (U_FAILURE(status) ||
1383 usearch_getAttribute(strsrch, USEARCH_CANONICAL_MATCH) !=
1384 USEARCH_OFF) {
1385 log_err("Error setting canonical match false\n");
1386 }
1387 usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH,
1388 USEARCH_ATTRIBUTE_VALUE_COUNT, &status);
1389 if (U_SUCCESS(status)) {
1390 log_err("Error setting canonical match to illegal value\n");
1391 }
1392 status = U_ZERO_ERROR;
1393 usearch_setAttribute(strsrch, USEARCH_ATTRIBUTE_COUNT, USEARCH_DEFAULT,
1394 &status);
1395 if (U_SUCCESS(status)) {
1396 log_err("Error setting illegal attribute success\n");
1397 }
1398
1399 usearch_close(strsrch);
1400 close();
1401 }
1402
1403 static void TestGetMatch(void)
1404 {
1405 int count = 0;
1406 UErrorCode status = U_ZERO_ERROR;
1407 UChar text[128];
1408 UChar pattern[32];
1409 SearchData search = MATCH[0];
1410 int32_t matchindex = search.offset[count];
1411 UStringSearch *strsrch;
1412 int32_t textlength;
1413 UChar matchtext[128];
1414
1415 open(&status);
1416 if (U_FAILURE(status)) {
1417 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
1418 return;
1419 }
1420
1421 if (usearch_getMatchedStart(NULL) != USEARCH_DONE ||
1422 usearch_getMatchedLength(NULL) != USEARCH_DONE) {
1423 log_err(
1424 "Expected start and length of NULL string search should be USEARCH_DONE\n");
1425 }
1426
1427 u_unescape(search.text, text, 128);
1428 u_unescape(search.pattern, pattern, 32);
1429 strsrch = usearch_openFromCollator(pattern, -1, text, -1, EN_US_,
1430 NULL, &status);
1431 if (U_FAILURE(status)) {
1432 log_err("Error opening string search %s\n", u_errorName(status));
1433 if (strsrch != NULL) {
1434 usearch_close(strsrch);
1435 }
1436 return;
1437 }
1438
1439 while (U_SUCCESS(status) && matchindex >= 0) {
1440 int32_t matchlength = search.size[count];
1441 usearch_next(strsrch, &status);
1442 if (matchindex != usearch_getMatchedStart(strsrch) ||
1443 matchlength != usearch_getMatchedLength(strsrch)) {
1444 char *str = toCharString(usearch_getText(strsrch, &textlength));
1445 log_err("Text: %s\n", str);
1446 str = toCharString(usearch_getPattern(strsrch, &textlength));
1447 log_err("Pattern: %s\n", str);
1448 log_err("Error match found at %d %d\n",
1449 usearch_getMatchedStart(strsrch),
1450 usearch_getMatchedLength(strsrch));
1451 return;
1452 }
1453 count ++;
1454
1455 status = U_ZERO_ERROR;
1456 if (usearch_getMatchedText(NULL, matchtext, 128, &status) !=
1457 USEARCH_DONE || U_SUCCESS(status)){
1458 log_err("Error expecting errors with NULL string search\n");
1459 }
1460 status = U_ZERO_ERROR;
1461 if (usearch_getMatchedText(strsrch, NULL, 0, &status) !=
1462 (int32_t)matchlength || U_SUCCESS(status)){
1463 log_err("Error pre-flighting match length\n");
1464 }
1465 status = U_ZERO_ERROR;
1466 if (usearch_getMatchedText(strsrch, matchtext, 0, &status) !=
1467 (int32_t)matchlength || U_SUCCESS(status)){
1468 log_err("Error getting match text with buffer size 0\n");
1469 }
1470 status = U_ZERO_ERROR;
1471 if (usearch_getMatchedText(strsrch, matchtext, matchlength, &status)
1472 != (int32_t)matchlength || matchtext[matchlength - 1] == 0 ||
1473 U_FAILURE(status)){
1474 log_err("Error getting match text with exact size\n");
1475 }
1476 status = U_ZERO_ERROR;
1477 if (usearch_getMatchedText(strsrch, matchtext, 128, &status) !=
1478 (int32_t) matchlength || U_FAILURE(status) ||
1479 memcmp(matchtext,
1480 usearch_getText(strsrch, &textlength) + matchindex,
1481 matchlength * sizeof(UChar)) != 0 ||
1482 matchtext[matchlength] != 0) {
1483 log_err("Error getting matched text\n");
1484 }
1485
1486 matchindex = search.offset[count];
1487 }
1488 status = U_ZERO_ERROR;
1489 usearch_next(strsrch, &status);
1490 if (usearch_getMatchedStart(strsrch) != USEARCH_DONE ||
1491 usearch_getMatchedLength(strsrch) != 0) {
1492 log_err("Error end of match not found\n");
1493 }
1494 status = U_ZERO_ERROR;
1495 if (usearch_getMatchedText(strsrch, matchtext, 128, &status) !=
1496 USEARCH_DONE) {
1497 log_err("Error getting null matches\n");
1498 }
1499 usearch_close(strsrch);
1500 close();
1501 }
1502
1503 static void TestSetMatch(void)
1504 {
1505 int count = 0;
1506 UErrorCode status = U_ZERO_ERROR;
1507 open(&status);
1508 if (U_FAILURE(status)) {
1509 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
1510 return;
1511 }
1512 while (MATCH[count].text != NULL) {
1513 SearchData search = MATCH[count];
1514 int size = 0;
1515 int offsetIndex = 0;
1516 UChar text[128];
1517 UChar pattern[32];
1518 UStringSearch *strsrch;
1519 status = U_ZERO_ERROR;
1520
1521 if (usearch_first(NULL, &status) != USEARCH_DONE ||
1522 usearch_last(NULL, &status) != USEARCH_DONE) {
1523 log_err("Error getting the first and last match of a NULL string search\n");
1524 }
1525 u_unescape(search.text, text, 128);
1526 u_unescape(search.pattern, pattern, 32);
1527 strsrch = usearch_openFromCollator(pattern, -1, text, -1, EN_US_,
1528 NULL, &status);
1529 if (U_FAILURE(status)) {
1530 log_err("Error opening string search %s\n", u_errorName(status));
1531 if (strsrch != NULL) {
1532 usearch_close(strsrch);
1533 }
1534 return;
1535 }
1536
1537 size = 0;
1538 while (search.offset[size] != -1) {
1539 size ++;
1540 }
1541
1542 if (usearch_first(strsrch, &status) != search.offset[0] ||
1543 U_FAILURE(status)) {
1544 log_err("Error getting first match\n");
1545 }
1546 if (usearch_last(strsrch, &status) != search.offset[size -1] ||
1547 U_FAILURE(status)) {
1548 log_err("Error getting last match\n");
1549 }
1550
1551 while (offsetIndex < size) {
1552 if (offsetIndex + 2 < size) {
1553 if (usearch_following(strsrch, search.offset[offsetIndex + 2] - 1,
1554 &status) != search.offset[offsetIndex + 2] ||
1555 U_FAILURE(status)) {
1556 log_err("Error getting following match at index %d\n",
1557 search.offset[offsetIndex + 2] - 1);
1558 }
1559 }
1560 if (offsetIndex + 1 < size) {
1561 if (usearch_preceding(strsrch, search.offset[offsetIndex + 1] +
1562 search.size[offsetIndex + 1] + 1,
1563 &status) != search.offset[offsetIndex + 1] ||
1564 U_FAILURE(status)) {
1565 log_err("Error getting preceeding match at index %d\n",
1566 search.offset[offsetIndex + 1] + 1);
1567 }
1568 }
1569 offsetIndex += 2;
1570 }
1571 status = U_ZERO_ERROR;
1572 if (usearch_following(strsrch, u_strlen(text), &status) !=
1573 USEARCH_DONE) {
1574 log_err("Error expecting out of bounds match\n");
1575 }
1576 if (usearch_preceding(strsrch, 0, &status) != USEARCH_DONE) {
1577 log_err("Error expecting out of bounds match\n");
1578 }
1579 count ++;
1580 usearch_close(strsrch);
1581 }
1582 close();
1583 }
1584
1585 static void TestReset(void)
1586 {
1587 UErrorCode status = U_ZERO_ERROR;
1588 UChar text[] = {0x66, 0x69, 0x73, 0x68, 0x20,
1589 0x66, 0x69, 0x73, 0x68};
1590 UChar pattern[] = {0x73};
1591 UStringSearch *strsrch;
1592
1593 open(&status);
1594 if (U_FAILURE(status)) {
1595 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
1596 return;
1597 }
1598 strsrch = usearch_openFromCollator(pattern, 1, text, 9,
1599 EN_US_, NULL, &status);
1600 if (U_FAILURE(status)) {
1601 log_err("Error opening string search %s\n", u_errorName(status));
1602 if (strsrch != NULL) {
1603 usearch_close(strsrch);
1604 }
1605 return;
1606 }
1607 usearch_setAttribute(strsrch, USEARCH_OVERLAP, USEARCH_ON, &status);
1608 usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON,
1609 &status);
1610 usearch_setOffset(strsrch, 9, &status);
1611 if (U_FAILURE(status)) {
1612 log_err("Error setting attributes and offsets\n");
1613 }
1614 else {
1615 usearch_reset(strsrch);
1616 if (usearch_getAttribute(strsrch, USEARCH_OVERLAP) != USEARCH_OFF ||
1617 usearch_getAttribute(strsrch, USEARCH_CANONICAL_MATCH) !=
1618 USEARCH_OFF ||
1619 usearch_getOffset(strsrch) != 0 ||
1620 usearch_getMatchedLength(strsrch) != 0 ||
1621 usearch_getMatchedStart(strsrch) != USEARCH_DONE) {
1622 log_err("Error resetting string search\n");
1623 }
1624 usearch_previous(strsrch, &status);
1625 if (usearch_getMatchedStart(strsrch) != 7 ||
1626 usearch_getMatchedLength(strsrch) != 1) {
1627 log_err("Error resetting string search\n");
1628 }
1629 }
1630 usearch_close(strsrch);
1631 close();
1632 }
1633
1634 static void TestSupplementary(void)
1635 {
1636 int count = 0;
1637 UErrorCode status = U_ZERO_ERROR;
1638 open(&status);
1639 if (U_FAILURE(status)) {
1640 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
1641 return;
1642 }
1643 while (SUPPLEMENTARY[count].text != NULL) {
1644 if (!assertEqual(SUPPLEMENTARY[count])) {
1645 log_err("Error at test number %d\n", count);
1646 }
1647 count ++;
1648 }
1649 close();
1650 }
1651
1652 static void TestContraction(void)
1653 {
1654 UChar rules[128];
1655 UChar pattern[128];
1656 UChar text[128];
1657 UCollator *collator;
1658 UErrorCode status = U_ZERO_ERROR;
1659 int count = 0;
1660 UStringSearch *strsrch;
1661 memset(rules, 0, 128*sizeof(UChar));
1662 memset(pattern, 0, 128*sizeof(UChar));
1663 memset(text, 0, 128*sizeof(UChar));
1664
1665 u_unescape(CONTRACTIONRULE, rules, 128);
1666 collator = ucol_openRules(rules, u_strlen(rules), UCOL_ON,
1667 UCOL_TERTIARY, NULL, &status);
1668 if(status == U_FILE_ACCESS_ERROR) {
1669 log_data_err("Is your data around?\n");
1670 return;
1671 } else if(U_FAILURE(status)) {
1672 log_err("Error opening collator %s\n", u_errorName(status));
1673 return;
1674 }
1675 strsrch = usearch_openFromCollator(pattern, 1, text, 1, collator, NULL,
1676 &status);
1677 if (U_FAILURE(status)) {
1678 log_err("Error opening string search %s\n", u_errorName(status));
1679 }
1680
1681 while (CONTRACTION[count].text != NULL) {
1682 u_unescape(CONTRACTION[count].text, text, 128);
1683 u_unescape(CONTRACTION[count].pattern, pattern, 128);
1684 usearch_setText(strsrch, text, -1, &status);
1685 usearch_setPattern(strsrch, pattern, -1, &status);
1686 if (!assertEqualWithUStringSearch(strsrch, CONTRACTION[count])) {
1687 log_err("Error at test number %d\n", count);
1688 }
1689 count ++;
1690 }
1691 usearch_close(strsrch);
1692 ucol_close(collator);
1693 }
1694
1695 static void TestIgnorable(void)
1696 {
1697 UChar rules[128];
1698 UChar pattern[128];
1699 UChar text[128];
1700 UCollator *collator;
1701 UErrorCode status = U_ZERO_ERROR;
1702 UStringSearch *strsrch;
1703 uint32_t count = 0;
1704
1705 memset(rules, 0, 128*sizeof(UChar));
1706 memset(pattern, 0, 128*sizeof(UChar));
1707 memset(text, 0, 128*sizeof(UChar));
1708
1709 u_unescape(IGNORABLERULE, rules, 128);
1710 collator = ucol_openRules(rules, u_strlen(rules), UCOL_ON,
1711 IGNORABLE[count].strength, NULL, &status);
1712 if(status == U_FILE_ACCESS_ERROR) {
1713 log_data_err("Is your data around?\n");
1714 return;
1715 } else if(U_FAILURE(status)) {
1716 log_err("Error opening collator %s\n", u_errorName(status));
1717 return;
1718 }
1719 strsrch = usearch_openFromCollator(pattern, 1, text, 1, collator, NULL,
1720 &status);
1721 if (U_FAILURE(status)) {
1722 log_err("Error opening string search %s\n", u_errorName(status));
1723 }
1724
1725 while (IGNORABLE[count].text != NULL) {
1726 u_unescape(IGNORABLE[count].text, text, 128);
1727 u_unescape(IGNORABLE[count].pattern, pattern, 128);
1728 usearch_setText(strsrch, text, -1, &status);
1729 usearch_setPattern(strsrch, pattern, -1, &status);
1730 if (!assertEqualWithUStringSearch(strsrch, IGNORABLE[count])) {
1731 log_err("Error at test number %d\n", count);
1732 }
1733 count ++;
1734 }
1735 usearch_close(strsrch);
1736 ucol_close(collator);
1737 }
1738
1739 static void TestDiacriticMatch(void)
1740 {
1741 UChar pattern[128];
1742 UChar text[128];
1743 UErrorCode status = U_ZERO_ERROR;
1744 UStringSearch *strsrch = NULL;
1745 UCollator *coll = NULL;
1746 uint32_t count = 0;
1747 SearchData search;
1748
1749 memset(pattern, 0, 128*sizeof(UChar));
1750 memset(text, 0, 128*sizeof(UChar));
1751
1752 strsrch = usearch_open(pattern, 1, text, 1, uloc_getDefault(), NULL, &status);
1753 if (U_FAILURE(status)) {
1754 log_err_status(status, "Error opening string search %s\n", u_errorName(status));
1755 return;
1756 }
1757
1758 search = DIACRITICMATCH[count];
1759 while (search.text != NULL) {
1760 if (search.collator != NULL) {
1761 coll = ucol_openFromShortString(search.collator, FALSE, NULL, &status);
1762 } else {
1763 /* Always use "en_US" because some of these tests fail in Danish locales. */
1764 coll = ucol_open("en_US"/*uloc_getDefault()*/, &status);
1765 ucol_setStrength(coll, search.strength);
1766 }
1767 if (U_FAILURE(status)) {
1768 log_err("Error opening string search collator(\"%s\") %s\n", search.collator, u_errorName(status));
1769 return;
1770 }
1771
1772 usearch_setCollator(strsrch, coll, &status);
1773 if (U_FAILURE(status)) {
1774 log_err("Error setting string search collator %s\n", u_errorName(status));
1775 return;
1776 }
1777
1778 u_unescape(search.text, text, 128);
1779 u_unescape(search.pattern, pattern, 128);
1780 usearch_setText(strsrch, text, -1, &status);
1781 usearch_setPattern(strsrch, pattern, -1, &status);
1782 if (!assertEqualWithUStringSearch(strsrch, search)) {
1783 log_err("Error at test number %d\n", count);
1784 }
1785 ucol_close(coll);
1786
1787 search = DIACRITICMATCH[++count];
1788 }
1789 usearch_close(strsrch);
1790 }
1791
1792 static void TestCanonical(void)
1793 {
1794 int count = 0;
1795 UErrorCode status = U_ZERO_ERROR;
1796 open(&status);
1797 if (U_FAILURE(status)) {
1798 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
1799 return;
1800 }
1801 while (BASICCANONICAL[count].text != NULL) {
1802 if (!assertCanonicalEqual(BASICCANONICAL[count])) {
1803 log_err("Error at test number %d\n", count);
1804 }
1805 count ++;
1806 }
1807 close();
1808 }
1809
1810 static void TestNormCanonical(void)
1811 {
1812 int count = 0;
1813 UErrorCode status = U_ZERO_ERROR;
1814 open(&status);
1815 if (U_FAILURE(status)) {
1816 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
1817 return;
1818 }
1819 ucol_setAttribute(EN_US_, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
1820 count = 0;
1821 while (NORMCANONICAL[count].text != NULL) {
1822 if (!assertCanonicalEqual(NORMCANONICAL[count])) {
1823 log_err("Error at test number %d\n", count);
1824 }
1825 count ++;
1826 }
1827 ucol_setAttribute(EN_US_, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
1828 close();
1829 }
1830
1831 static void TestStrengthCanonical(void)
1832 {
1833 int count = 0;
1834 UErrorCode status = U_ZERO_ERROR;
1835 open(&status);
1836 if (U_FAILURE(status)) {
1837 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
1838 return;
1839 }
1840 while (STRENGTHCANONICAL[count].text != NULL) {
1841 if (!assertCanonicalEqual(STRENGTHCANONICAL[count])) {
1842 log_err("Error at test number %d\n", count);
1843 }
1844 count ++;
1845 }
1846 close();
1847 }
1848
1849 static void TestBreakIteratorCanonical(void) {
1850 UErrorCode status = U_ZERO_ERROR;
1851 int count = 0;
1852
1853 CHECK_BREAK("x");
1854
1855 #if !UCONFIG_NO_BREAK_ITERATION
1856
1857 open(&status);
1858 if (U_FAILURE(status)) {
1859 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
1860 return;
1861 }
1862 while (count < 4) {
1863 /* 0-3 test are fixed */
1864 UChar pattern[32];
1865 UChar text[128];
1866 const SearchData *search = &(BREAKITERATORCANONICAL[count]);
1867 UCollator *collator = getCollator(search->collator);
1868 UBreakIterator *breaker = getBreakIterator(search->breaker);
1869 UStringSearch *strsrch;
1870
1871 u_unescape(search->text, text, 128);
1872 u_unescape(search->pattern, pattern, 32);
1873 ucol_setStrength(collator, search->strength);
1874
1875 strsrch = usearch_openFromCollator(pattern, -1, text, -1, collator,
1876 breaker, &status);
1877 if(status == U_FILE_ACCESS_ERROR) {
1878 log_data_err("Is your data around?\n");
1879 goto ENDTESTBREAKITERATOR;
1880 } else if(U_FAILURE(status)) {
1881 log_err("Error opening searcher\n");
1882 goto ENDTESTBREAKITERATOR;
1883 }
1884 usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON,
1885 &status);
1886 if (U_FAILURE(status) ||
1887 usearch_getBreakIterator(strsrch) != breaker) {
1888 log_err("Error setting break iterator\n");
1889 usearch_close(strsrch);
1890 goto ENDTESTBREAKITERATOR;
1891 }
1892 if (!assertEqualWithUStringSearch(strsrch, *search)) {
1893 ucol_setStrength(collator, UCOL_TERTIARY);
1894 usearch_close(strsrch);
1895 goto ENDTESTBREAKITERATOR;
1896 }
1897 search = &(BREAKITERATOREXACT[count + 1]);
1898 breaker = getBreakIterator(search->breaker);
1899 usearch_setBreakIterator(strsrch, breaker, &status);
1900 if (U_FAILURE(status) || usearch_getBreakIterator(strsrch) != breaker) {
1901 log_err("Error setting break iterator\n");
1902 usearch_close(strsrch);
1903 goto ENDTESTBREAKITERATOR;
1904 }
1905 usearch_reset(strsrch);
1906 usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON,
1907 &status);
1908 if (!assertEqualWithUStringSearch(strsrch, *search)) {
1909 log_err("Error at test number %d\n", count);
1910 usearch_close(strsrch);
1911 goto ENDTESTBREAKITERATOR;
1912 }
1913 usearch_close(strsrch);
1914 count += 2;
1915 }
1916 count = 0;
1917 while (BREAKITERATORCANONICAL[count].text != NULL) {
1918 if (!assertEqual(BREAKITERATORCANONICAL[count])) {
1919 log_err("Error at test number %d\n", count);
1920 goto ENDTESTBREAKITERATOR;
1921 }
1922 count ++;
1923 }
1924
1925 ENDTESTBREAKITERATOR:
1926 close();
1927 #endif
1928 }
1929
1930 static void TestVariableCanonical(void)
1931 {
1932 int count = 0;
1933 UErrorCode status = U_ZERO_ERROR;
1934 open(&status);
1935 if (U_FAILURE(status)) {
1936 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
1937 return;
1938 }
1939 ucol_setAttribute(EN_US_, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
1940 if (U_FAILURE(status)) {
1941 log_err("Error setting collation alternate attribute %s\n",
1942 u_errorName(status));
1943 }
1944 while (VARIABLE[count].text != NULL) {
1945 log_verbose("variable %d\n", count);
1946 if (!assertCanonicalEqual(VARIABLE[count])) {
1947 log_err("Error at test number %d\n", count);
1948 }
1949 count ++;
1950 }
1951 ucol_setAttribute(EN_US_, UCOL_ALTERNATE_HANDLING,
1952 UCOL_NON_IGNORABLE, &status);
1953 close();
1954 }
1955
1956 static void TestOverlapCanonical(void)
1957 {
1958 int count = 0;
1959 UErrorCode status = U_ZERO_ERROR;
1960 open(&status);
1961 if (U_FAILURE(status)) {
1962 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
1963 return;
1964 }
1965 while (OVERLAPCANONICAL[count].text != NULL) {
1966 if (!assertEqualWithAttribute(OVERLAPCANONICAL[count], USEARCH_ON,
1967 USEARCH_ON)) {
1968 log_err("Error at overlap test number %d\n", count);
1969 }
1970 count ++;
1971 }
1972 count = 0;
1973 while (NONOVERLAP[count].text != NULL) {
1974 if (!assertCanonicalEqual(NONOVERLAPCANONICAL[count])) {
1975 log_err("Error at non overlap test number %d\n", count);
1976 }
1977 count ++;
1978 }
1979
1980 count = 0;
1981 while (count < 1) {
1982 UChar pattern[32];
1983 UChar text[128];
1984 const SearchData *search = &(OVERLAPCANONICAL[count]);
1985 UCollator *collator = getCollator(search->collator);
1986 UStringSearch *strsrch;
1987 status = U_ZERO_ERROR;
1988
1989 u_unescape(search->text, text, 128);
1990 u_unescape(search->pattern, pattern, 32);
1991 strsrch = usearch_openFromCollator(pattern, -1, text, -1, collator,
1992 NULL, &status);
1993 if(status == U_FILE_ACCESS_ERROR) {
1994 log_data_err("Is your data around?\n");
1995 return;
1996 } else if(U_FAILURE(status)) {
1997 log_err("Error opening searcher\n");
1998 return;
1999 }
2000 usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON,
2001 &status);
2002 usearch_setAttribute(strsrch, USEARCH_OVERLAP, USEARCH_ON, &status);
2003 if (U_FAILURE(status) ||
2004 usearch_getAttribute(strsrch, USEARCH_OVERLAP) != USEARCH_ON) {
2005 log_err("Error setting overlap option\n");
2006 }
2007 if (!assertEqualWithUStringSearch(strsrch, *search)) {
2008 usearch_close(strsrch);
2009 return;
2010 }
2011 search = &(NONOVERLAPCANONICAL[count]);
2012 usearch_setAttribute(strsrch, USEARCH_OVERLAP, USEARCH_OFF, &status);
2013 if (U_FAILURE(status) ||
2014 usearch_getAttribute(strsrch, USEARCH_OVERLAP) != USEARCH_OFF) {
2015 log_err("Error setting overlap option\n");
2016 }
2017 usearch_reset(strsrch);
2018 if (!assertEqualWithUStringSearch(strsrch, *search)) {
2019 usearch_close(strsrch);
2020 log_err("Error at test number %d\n", count);
2021 }
2022
2023 count ++;
2024 usearch_close(strsrch);
2025 }
2026 close();
2027 }
2028
2029 static void TestCollatorCanonical(void)
2030 {
2031 /* test collator that thinks "o" and "p" are the same thing */
2032 UChar rules[32];
2033 UCollator *tailored = NULL;
2034 UErrorCode status = U_ZERO_ERROR;
2035 UChar pattern[32];
2036 UChar text[128];
2037 UStringSearch *strsrch;
2038
2039 open(&status);
2040 if (U_FAILURE(status)) {
2041 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
2042 return;
2043 }
2044 u_unescape(COLLATORCANONICAL[0].text, text, 128);
2045 u_unescape(COLLATORCANONICAL[0].pattern, pattern, 32);
2046
2047 strsrch = usearch_openFromCollator(pattern, -1, text, -1, EN_US_,
2048 NULL, &status);
2049 if(status == U_FILE_ACCESS_ERROR) {
2050 log_data_err("Is your data around?\n");
2051 return;
2052 } else if(U_FAILURE(status)) {
2053 log_err("Error opening searcher\n");
2054 return;
2055 }
2056 usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON,
2057 &status);
2058 if (U_FAILURE(status)) {
2059 log_err("Error opening string search %s\n", u_errorName(status));
2060 }
2061 if (!assertEqualWithUStringSearch(strsrch, COLLATORCANONICAL[0])) {
2062 goto ENDTESTCOLLATOR;
2063 }
2064
2065 u_unescape(TESTCOLLATORRULE, rules, 32);
2066 tailored = ucol_openRules(rules, -1, UCOL_ON,
2067 COLLATORCANONICAL[1].strength, NULL, &status);
2068 if (U_FAILURE(status)) {
2069 log_err("Error opening rule based collator %s\n", u_errorName(status));
2070 }
2071
2072 usearch_setCollator(strsrch, tailored, &status);
2073 if (U_FAILURE(status) || usearch_getCollator(strsrch) != tailored) {
2074 log_err("Error setting rule based collator\n");
2075 }
2076 usearch_reset(strsrch);
2077 usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON,
2078 &status);
2079 if (!assertEqualWithUStringSearch(strsrch, COLLATORCANONICAL[1])) {
2080 goto ENDTESTCOLLATOR;
2081 }
2082
2083 usearch_setCollator(strsrch, EN_US_, &status);
2084 usearch_reset(strsrch);
2085 usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON,
2086 &status);
2087 if (U_FAILURE(status) || usearch_getCollator(strsrch) != EN_US_) {
2088 log_err("Error setting rule based collator\n");
2089 }
2090 if (!assertEqualWithUStringSearch(strsrch, COLLATORCANONICAL[0])) {
2091 goto ENDTESTCOLLATOR;
2092 }
2093
2094 ENDTESTCOLLATOR:
2095 usearch_close(strsrch);
2096 if (tailored != NULL) {
2097 ucol_close(tailored);
2098 }
2099 close();
2100 }
2101
2102 static void TestPatternCanonical(void)
2103 {
2104 UStringSearch *strsrch;
2105 UChar pattern[32];
2106 UChar text[128];
2107 const UChar *temp;
2108 int32_t templength;
2109 UErrorCode status = U_ZERO_ERROR;
2110
2111 open(&status);
2112 if (U_FAILURE(status)) {
2113 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
2114 return;
2115 }
2116 u_unescape(PATTERNCANONICAL[0].text, text, 128);
2117 u_unescape(PATTERNCANONICAL[0].pattern, pattern, 32);
2118
2119 ucol_setStrength(EN_US_, PATTERNCANONICAL[0].strength);
2120 strsrch = usearch_openFromCollator(pattern, -1, text, -1, EN_US_,
2121 NULL, &status);
2122 usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON,
2123 &status);
2124 if (U_FAILURE(status)) {
2125 log_err("Error opening string search %s\n", u_errorName(status));
2126 goto ENDTESTPATTERN;
2127 }
2128 temp = usearch_getPattern(strsrch, &templength);
2129 if (u_strcmp(pattern, temp) != 0) {
2130 log_err("Error setting pattern\n");
2131 }
2132 if (!assertEqualWithUStringSearch(strsrch, PATTERNCANONICAL[0])) {
2133 goto ENDTESTPATTERN;
2134 }
2135
2136 u_unescape(PATTERNCANONICAL[1].pattern, pattern, 32);
2137 usearch_setPattern(strsrch, pattern, -1, &status);
2138 temp = usearch_getPattern(strsrch, &templength);
2139 if (u_strcmp(pattern, temp) != 0) {
2140 log_err("Error setting pattern\n");
2141 goto ENDTESTPATTERN;
2142 }
2143 usearch_reset(strsrch);
2144 usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON,
2145 &status);
2146 if (U_FAILURE(status)) {
2147 log_err("Error setting pattern %s\n", u_errorName(status));
2148 }
2149 if (!assertEqualWithUStringSearch(strsrch, PATTERNCANONICAL[1])) {
2150 goto ENDTESTPATTERN;
2151 }
2152
2153 u_unescape(PATTERNCANONICAL[0].pattern, pattern, 32);
2154 usearch_setPattern(strsrch, pattern, -1, &status);
2155 temp = usearch_getPattern(strsrch, &templength);
2156 if (u_strcmp(pattern, temp) != 0) {
2157 log_err("Error setting pattern\n");
2158 goto ENDTESTPATTERN;
2159 }
2160 usearch_reset(strsrch);
2161 usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON,
2162 &status);
2163 if (U_FAILURE(status)) {
2164 log_err("Error setting pattern %s\n", u_errorName(status));
2165 }
2166 if (!assertEqualWithUStringSearch(strsrch, PATTERNCANONICAL[0])) {
2167 goto ENDTESTPATTERN;
2168 }
2169 ENDTESTPATTERN:
2170 ucol_setStrength(EN_US_, UCOL_TERTIARY);
2171 if (strsrch != NULL) {
2172 usearch_close(strsrch);
2173 }
2174 close();
2175 }
2176
2177 static void TestTextCanonical(void)
2178 {
2179 UStringSearch *strsrch;
2180 UChar pattern[32];
2181 UChar text[128];
2182 const UChar *temp;
2183 int32_t templength;
2184 UErrorCode status = U_ZERO_ERROR;
2185
2186 u_unescape(TEXTCANONICAL[0].text, text, 128);
2187 u_unescape(TEXTCANONICAL[0].pattern, pattern, 32);
2188
2189 open(&status);
2190 if (U_FAILURE(status)) {
2191 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
2192 return;
2193 }
2194 strsrch = usearch_openFromCollator(pattern, -1, text, -1, EN_US_,
2195 NULL, &status);
2196 usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON,
2197 &status);
2198
2199 if (U_FAILURE(status)) {
2200 log_err("Error opening string search %s\n", u_errorName(status));
2201 goto ENDTESTPATTERN;
2202 }
2203 temp = usearch_getText(strsrch, &templength);
2204 if (u_strcmp(text, temp) != 0) {
2205 log_err("Error setting text\n");
2206 }
2207 if (!assertEqualWithUStringSearch(strsrch, TEXTCANONICAL[0])) {
2208 goto ENDTESTPATTERN;
2209 }
2210
2211 u_unescape(TEXTCANONICAL[1].text, text, 32);
2212 usearch_setText(strsrch, text, -1, &status);
2213 temp = usearch_getText(strsrch, &templength);
2214 if (u_strcmp(text, temp) != 0) {
2215 log_err("Error setting text\n");
2216 goto ENDTESTPATTERN;
2217 }
2218 if (U_FAILURE(status)) {
2219 log_err("Error setting text %s\n", u_errorName(status));
2220 }
2221 if (!assertEqualWithUStringSearch(strsrch, TEXTCANONICAL[1])) {
2222 goto ENDTESTPATTERN;
2223 }
2224
2225 u_unescape(TEXTCANONICAL[0].text, text, 32);
2226 usearch_setText(strsrch, text, -1, &status);
2227 temp = usearch_getText(strsrch, &templength);
2228 if (u_strcmp(text, temp) != 0) {
2229 log_err("Error setting text\n");
2230 goto ENDTESTPATTERN;
2231 }
2232 if (U_FAILURE(status)) {
2233 log_err("Error setting pattern %s\n", u_errorName(status));
2234 }
2235 if (!assertEqualWithUStringSearch(strsrch, TEXTCANONICAL[0])) {
2236 goto ENDTESTPATTERN;
2237 }
2238 ENDTESTPATTERN:
2239 if (strsrch != NULL) {
2240 usearch_close(strsrch);
2241 }
2242 close();
2243 }
2244
2245 static void TestCompositeBoundariesCanonical(void)
2246 {
2247 int count = 0;
2248 UErrorCode status = U_ZERO_ERROR;
2249 open(&status);
2250 if (U_FAILURE(status)) {
2251 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
2252 return;
2253 }
2254 while (COMPOSITEBOUNDARIESCANONICAL[count].text != NULL) {
2255 log_verbose("composite %d\n", count);
2256 if (!assertCanonicalEqual(COMPOSITEBOUNDARIESCANONICAL[count])) {
2257 log_err("Error at test number %d\n", count);
2258 }
2259 count ++;
2260 }
2261 close();
2262 }
2263
2264 static void TestGetSetOffsetCanonical(void)
2265 {
2266 int searchDataIndex = 0;
2267 UChar pattern[32];
2268 UChar text[128];
2269 UErrorCode status = U_ZERO_ERROR;
2270 UStringSearch *strsrch;
2271 UCollator *collator;
2272
2273 memset(pattern, 0, 32*sizeof(UChar));
2274 memset(text, 0, 128*sizeof(UChar));
2275
2276 open(&status);
2277 if (U_FAILURE(status)) {
2278 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
2279 return;
2280 }
2281 strsrch = usearch_openFromCollator(pattern, 16, text, 32, EN_US_, NULL,
2282 &status);
2283
2284 collator = usearch_getCollator(strsrch);
2285 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
2286
2287 usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON,
2288 &status);
2289
2290 /* testing out of bounds error */
2291 usearch_setOffset(strsrch, -1, &status);
2292 if (U_SUCCESS(status)) {
2293 log_err("Error expecting set offset error\n");
2294 }
2295 usearch_setOffset(strsrch, 128, &status);
2296 if (U_SUCCESS(status)) {
2297 log_err("Error expecting set offset error\n");
2298 }
2299 while (BASICCANONICAL[searchDataIndex].text != NULL) {
2300 int count = 0;
2301 SearchData search = BASICCANONICAL[searchDataIndex ++];
2302 int32_t matchindex = search.offset[count];
2303 int32_t textlength;
2304
2305 if (BASICCANONICAL[searchDataIndex].text == NULL) {
2306 /* skip the last one */
2307 break;
2308 }
2309
2310 u_unescape(search.text, text, 128);
2311 u_unescape(search.pattern, pattern, 32);
2312 status = U_ZERO_ERROR;
2313 usearch_setText(strsrch, text, -1, &status);
2314 usearch_setPattern(strsrch, pattern, -1, &status);
2315 while (U_SUCCESS(status) && matchindex >= 0) {
2316 uint32_t matchlength = search.size[count];
2317 usearch_next(strsrch, &status);
2318 if (matchindex != usearch_getMatchedStart(strsrch) ||
2319 matchlength != (uint32_t)usearch_getMatchedLength(strsrch)) {
2320 char *str = toCharString(usearch_getText(strsrch,
2321 &textlength));
2322 log_err("Text: %s\n", str);
2323 str = toCharString(usearch_getPattern(strsrch, &textlength));
2324 log_err("Pattern: %s\n", str);
2325 log_err("Error match found at %d %d\n",
2326 usearch_getMatchedStart(strsrch),
2327 usearch_getMatchedLength(strsrch));
2328 goto bail;
2329 }
2330 matchindex = search.offset[count + 1] == -1 ? -1 :
2331 search.offset[count + 2];
2332 if (search.offset[count + 1] != -1) {
2333 usearch_setOffset(strsrch, search.offset[count + 1] + 1,
2334 &status);
2335 if (usearch_getOffset(strsrch) != search.offset[count + 1] + 1) {
2336 log_err("Error setting offset\n");
2337 goto bail;
2338 }
2339 }
2340
2341 count += 2;
2342 }
2343 usearch_next(strsrch, &status);
2344 if (usearch_getMatchedStart(strsrch) != USEARCH_DONE) {
2345 char *str = toCharString(usearch_getText(strsrch, &textlength));
2346 log_err("Text: %s\n", str);
2347 str = toCharString(usearch_getPattern(strsrch, &textlength));
2348 log_err("Pattern: %s\n", str);
2349 log_err("Error match found at %d %d\n",
2350 usearch_getMatchedStart(strsrch),
2351 usearch_getMatchedLength(strsrch));
2352 goto bail;
2353 }
2354 }
2355
2356 bail:
2357 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_OFF, &status);
2358 usearch_close(strsrch);
2359 close();
2360 }
2361
2362 static void TestSupplementaryCanonical(void)
2363 {
2364 int count = 0;
2365 UErrorCode status = U_ZERO_ERROR;
2366 open(&status);
2367 if (U_FAILURE(status)) {
2368 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
2369 return;
2370 }
2371 while (SUPPLEMENTARYCANONICAL[count].text != NULL) {
2372 if (!assertCanonicalEqual(SUPPLEMENTARYCANONICAL[count])) {
2373 log_err("Error at test number %d\n", count);
2374 }
2375 count ++;
2376 }
2377 close();
2378 }
2379
2380 static void TestContractionCanonical(void)
2381 {
2382 UChar rules[128];
2383 UChar pattern[128];
2384 UChar text[128];
2385 UCollator *collator = NULL;
2386 UErrorCode status = U_ZERO_ERROR;
2387 int count = 0;
2388 UStringSearch *strsrch = NULL;
2389 memset(rules, 0, 128*sizeof(UChar));
2390 memset(pattern, 0, 128*sizeof(UChar));
2391 memset(text, 0, 128*sizeof(UChar));
2392
2393 u_unescape(CONTRACTIONRULE, rules, 128);
2394 collator = ucol_openRules(rules, u_strlen(rules), UCOL_ON,
2395 UCOL_TERTIARY, NULL, &status);
2396 if(status == U_FILE_ACCESS_ERROR) {
2397 log_data_err("Is your data around?\n");
2398 return;
2399 } else if(U_FAILURE(status)) {
2400 log_err("Error opening collator %s\n", u_errorName(status));
2401 return;
2402 }
2403 strsrch = usearch_openFromCollator(pattern, 1, text, 1, collator, NULL,
2404 &status);
2405 usearch_setAttribute(strsrch, USEARCH_CANONICAL_MATCH, USEARCH_ON,
2406 &status);
2407 if (U_FAILURE(status)) {
2408 log_err("Error opening string search %s\n", u_errorName(status));
2409 }
2410
2411 while (CONTRACTIONCANONICAL[count].text != NULL) {
2412 u_unescape(CONTRACTIONCANONICAL[count].text, text, 128);
2413 u_unescape(CONTRACTIONCANONICAL[count].pattern, pattern, 128);
2414 usearch_setText(strsrch, text, -1, &status);
2415 usearch_setPattern(strsrch, pattern, -1, &status);
2416 if (!assertEqualWithUStringSearch(strsrch,
2417 CONTRACTIONCANONICAL[count])) {
2418 log_err("Error at test number %d\n", count);
2419 }
2420 count ++;
2421 }
2422 usearch_close(strsrch);
2423 ucol_close(collator);
2424 }
2425
2426 static void TestNumeric(void) {
2427 UCollator *coll = NULL;
2428 UStringSearch *strsrch = NULL;
2429 UErrorCode status = U_ZERO_ERROR;
2430
2431 UChar pattern[128];
2432 UChar text[128];
2433 memset(pattern, 0, 128*sizeof(UChar));
2434 memset(text, 0, 128*sizeof(UChar));
2435
2436 coll = ucol_open("", &status);
2437 if(U_FAILURE(status)) {
2438 log_data_err("Could not open UCA. Is your data around?\n");
2439 return;
2440 }
2441
2442 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, UCOL_ON, &status);
2443
2444 strsrch = usearch_openFromCollator(pattern, 1, text, 1, coll, NULL, &status);
2445
2446 if(status != U_UNSUPPORTED_ERROR || U_SUCCESS(status)) {
2447 log_err("Expected U_UNSUPPORTED_ERROR when trying to instantiate a search object from a CODAN collator, got %s instead\n", u_errorName(status));
2448 if(strsrch) {
2449 usearch_close(strsrch);
2450 }
2451 }
2452
2453 ucol_close(coll);
2454
2455 }
2456
2457 /* This test is for ticket 4038 due to incorrect backward searching when certain patterns have a length > 1 */
2458 static void TestForwardBackward(void) {
2459 UErrorCode status = U_ZERO_ERROR;
2460 UCollator *coll = NULL;
2461 UStringSearch *search = NULL;
2462 UChar usrcstr[32], value[4];
2463 int32_t pos= -1;
2464 int32_t expectedPos = 9;
2465
2466 coll = ucol_open("en_GB", &status);
2467 if (U_FAILURE(status)) {
2468 log_err_status(status, "ucol_open failed: %s\n", u_errorName(status));
2469 goto exitTestForwardBackward;
2470 }
2471 ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_PRIMARY, &status);
2472 ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
2473 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, UCOL_NON_IGNORABLE, &status);
2474
2475 u_uastrcpy(usrcstr, "QBitArray::bitarr_data"); /* text */
2476 u_uastrcpy(value, "::"); /* pattern */
2477
2478 search = usearch_openFromCollator(value, 2, usrcstr, 22, coll, NULL, &status);
2479 if (U_FAILURE(status)) {
2480 log_err("usearch_openFromCollator failed: %s\n", u_errorName(status));
2481 goto exitTestForwardBackward;
2482 }
2483
2484 usearch_reset(search);
2485 /* forward search */
2486 pos = usearch_first(search, &status);
2487 if (pos != expectedPos) {
2488 log_err("Expected search result: %d; Got instead: %d\n", expectedPos, pos);
2489 goto exitTestForwardBackward;
2490 }
2491
2492 pos = -1;
2493 usearch_reset(search);
2494 /* backward search */
2495 pos = usearch_last(search, &status);
2496 if (pos != expectedPos) {
2497 log_err("Expected search result: %d; Got instead: %d\n", expectedPos, pos);
2498 }
2499
2500 exitTestForwardBackward :
2501 if (coll != NULL) {
2502 ucol_close(coll);
2503 }
2504 if (search != NULL) {
2505 usearch_close(search);
2506 }
2507 }
2508
2509 #define TEST_ASSERT(x) \
2510 {if (U_FAILURE(x)) {log_err_status(x, "%s:%d: FAIL: test assertion failure \n", __FILE__, __LINE__);\
2511 }}
2512
2513 static void TestSearchForNull(void) {
2514 UCollator *coll;
2515 UErrorCode ec;
2516 UStringSearch *search;
2517 int pos;
2518 int len;
2519 int expectedPos;
2520 int expectedLen;
2521 int expectedNum;
2522 int count = 0;
2523 const UChar zerodigit = 0x0030; /* 0 */
2524 const UChar nulldigit = 0x0000; /* null */
2525
2526 /* static const UChar var[(length)+1]=U_DECLARE_UTF16(cs) */
2527 #define PATTERN_LEN 4
2528 #define TEXT_LEN 10
2529
2530 U_STRING_DECL(_pattern, "IS 0", PATTERN_LEN);
2531 U_STRING_DECL(_text, "_0IS 0 OK?", TEXT_LEN);
2532 UChar pattern[PATTERN_LEN + 1], text[TEXT_LEN + 1];
2533
2534 U_STRING_INIT(_pattern, "IS 0", PATTERN_LEN);
2535 U_STRING_INIT(_text, "_0IS 0 OK?", TEXT_LEN);
2536 expectedPos = 2;
2537 expectedLen = 4;
2538 expectedNum = 1;
2539
2540 for (pos = 0; pos < PATTERN_LEN; pos++) {
2541 if (_pattern[pos] == zerodigit) {
2542 pattern[pos] = nulldigit;
2543 } else {
2544 pattern[pos] = _pattern[pos];
2545 }
2546 }
2547 pattern[PATTERN_LEN] = 0x0000;
2548
2549 for (pos = 0; pos < TEXT_LEN; pos++) {
2550 if (_text[pos] == zerodigit) {
2551 text[pos] = nulldigit;
2552 } else {
2553 text[pos] = _text[pos];
2554 }
2555 }
2556 text[TEXT_LEN] = 0x0000;
2557
2558 ec = U_ZERO_ERROR;
2559
2560 /* create a US-English collator */
2561 coll = ucol_open("en_US", &ec);
2562
2563 /* make sure we didn't fail. */
2564 TEST_ASSERT (ec);
2565
2566 ucol_setStrength(coll, UCOL_IDENTICAL);
2567
2568 /* open a search looking for 0 */
2569 search = usearch_openFromCollator(pattern, PATTERN_LEN, text,
2570 TEXT_LEN, coll, NULL, &ec);
2571 TEST_ASSERT (ec);
2572
2573 if (coll != NULL && search != NULL) {
2574 pos = usearch_first(search, &ec);
2575 len = usearch_getMatchedLength(search);
2576 if (pos != expectedPos) {
2577 log_err("Expected search result: %d; Got instead: %d\n", expectedPos,
2578 pos);
2579 }
2580
2581 if (len != expectedLen) {
2582 log_err("Expected search result length: %d; Got instead: %d\n",
2583 expectedLen, len);
2584 }
2585
2586 for (pos = usearch_first(search, &ec); pos != USEARCH_DONE; pos
2587 = usearch_next(search, &ec)) {
2588 log_verbose("Match at %d\n", pos);
2589 count += 1;
2590 }
2591
2592 if (count != expectedNum) {
2593 log_err("Expected %d search hits, found %d\n", expectedNum, count);
2594 }
2595 }
2596
2597 ucol_close(coll);
2598 usearch_close(search);
2599 }
2600
2601 static void TestStrengthIdentical(void)
2602 {
2603 UCollator *coll;
2604 UErrorCode ec = U_ZERO_ERROR;
2605 UStringSearch *search;
2606
2607 UChar pattern[] = {0x05E9, 0x0591, 0x05E9};
2608 UChar text[] = {0x05E9, 0x0592, 0x05E9};
2609 int32_t pLen = UPRV_LENGTHOF(pattern);
2610 int32_t tLen = UPRV_LENGTHOF(text);
2611 int32_t expectedPos = 0;
2612 int32_t expectedLen = 3;
2613
2614 int32_t pos;
2615 int32_t len;
2616
2617 /* create a US-English collator */
2618 coll = ucol_open ("en_US", &ec);
2619
2620 /* make sure we didn't fail. */
2621 TEST_ASSERT (ec);
2622
2623 ucol_setStrength( coll, UCOL_TERTIARY);
2624
2625 /* open a search looking for 0 */
2626 search = usearch_openFromCollator (pattern, pLen, text, tLen, coll, NULL, &ec);
2627 TEST_ASSERT (ec);
2628
2629 if (coll != NULL && search != NULL) {
2630 pos = usearch_first(search, &ec);
2631 len = usearch_getMatchedLength(search);
2632
2633 if(pos != expectedPos) {
2634 log_err("Expected search result: %d; Got instead: %d\n", expectedPos, pos);
2635 }
2636
2637 if(len != expectedLen) {
2638 log_err("Expected search result length: %d; Got instead: %d\n", expectedLen, len);
2639 }
2640
2641 /* Now try it at strength == UCOL_IDENTICAL */
2642 ucol_setStrength(coll, UCOL_IDENTICAL);
2643 usearch_reset(search);
2644
2645 pos = usearch_first(search, &ec);
2646 len = usearch_getMatchedLength(search);
2647
2648 if(pos != -1) {
2649 log_err("Expected failure for strentgh = UCOL_IDENTICAL: got %d instead.\n", pos);
2650 }
2651 }
2652
2653 usearch_close(search);
2654 ucol_close(coll);
2655 }
2656
2657 /**
2658 * TestUsingSearchCollator
2659 */
2660
2661 typedef struct {
2662 const UChar * pattern;
2663 const int32_t * offsets;
2664 int32_t offsetsLen;
2665 const int32_t * matchLens;
2666 } PatternAndOffsets;
2667
2668 static const UChar scKoText[] = {
2669 0x0020,
2670 /*01*/ 0xAC00, 0x0020, /* simple LV Hangul */
2671 /*03*/ 0xAC01, 0x0020, /* simple LVT Hangul */
2672 /*05*/ 0xAC0F, 0x0020, /* LVTT, last jamo expands for search */
2673 /*07*/ 0xAFFF, 0x0020, /* LLVVVTT, every jamo expands for search */
2674 /*09*/ 0x1100, 0x1161, 0x11A8, 0x0020, /* 0xAC01 as conjoining jamo */
2675 /*13*/ 0x1100, 0x1161, 0x1100, 0x0020, /* 0xAC01 as basic conjoining jamo (per search rules) */
2676 /*17*/ 0x3131, 0x314F, 0x3131, 0x0020, /* 0xAC01 as compatibility jamo */
2677 /*21*/ 0x1100, 0x1161, 0x11B6, 0x0020, /* 0xAC0F as conjoining jamo; last expands for search */
2678 /*25*/ 0x1100, 0x1161, 0x1105, 0x1112, 0x0020, /* 0xAC0F as basic conjoining jamo; last expands for search */
2679 /*30*/ 0x1101, 0x1170, 0x11B6, 0x0020, /* 0xAFFF as conjoining jamo; all expand for search */
2680 /*34*/ 0x00E6, 0x0020, /* small letter ae, expands */
2681 /*36*/ 0x1E4D, 0x0020, /* small letter o with tilde and acute, decomposes */
2682 0
2683 };
2684
2685 static const UChar scKoPat0[] = { 0xAC01, 0 };
2686 static const UChar scKoPat1[] = { 0x1100, 0x1161, 0x11A8, 0 }; /* 0xAC01 as conjoining jamo */
2687 static const UChar scKoPat2[] = { 0xAC0F, 0 };
2688 static const UChar scKoPat3[] = { 0x1100, 0x1161, 0x1105, 0x1112, 0 }; /* 0xAC0F as basic conjoining jamo */
2689 static const UChar scKoPat4[] = { 0xAFFF, 0 };
2690 static const UChar scKoPat5[] = { 0x1101, 0x1170, 0x11B6, 0 }; /* 0xAFFF as conjoining jamo */
2691
2692 static const int32_t scKoSrchOff01[] = { 3, 9, 13 };
2693 static const int32_t scKoSrchOff23[] = { 5, 21, 25 };
2694 static const int32_t scKoSrchOff45[] = { 7, 30 };
2695
2696 static const PatternAndOffsets scKoSrchPatternsOffsets[] = {
2697 { scKoPat0, scKoSrchOff01, UPRV_LENGTHOF(scKoSrchOff01), NULL },
2698 { scKoPat1, scKoSrchOff01, UPRV_LENGTHOF(scKoSrchOff01), NULL },
2699 { scKoPat2, scKoSrchOff23, UPRV_LENGTHOF(scKoSrchOff23), NULL },
2700 { scKoPat3, scKoSrchOff23, UPRV_LENGTHOF(scKoSrchOff23), NULL },
2701 { scKoPat4, scKoSrchOff45, UPRV_LENGTHOF(scKoSrchOff45), NULL },
2702 { scKoPat5, scKoSrchOff45, UPRV_LENGTHOF(scKoSrchOff45), NULL },
2703 { NULL, NULL, 0, NULL }
2704 };
2705
2706 static const int32_t scKoStndOff01[] = { 3, 9 };
2707 static const int32_t scKoStndOff2[] = { 5, 21 };
2708 static const int32_t scKoStndOff3[] = { 25 };
2709 static const int32_t scKoStndOff45[] = { 7, 30 };
2710
2711 static const PatternAndOffsets scKoStndPatternsOffsets[] = {
2712 { scKoPat0, scKoStndOff01, UPRV_LENGTHOF(scKoStndOff01), NULL },
2713 { scKoPat1, scKoStndOff01, UPRV_LENGTHOF(scKoStndOff01), NULL },
2714 { scKoPat2, scKoStndOff2, UPRV_LENGTHOF(scKoStndOff2), NULL },
2715 { scKoPat3, scKoStndOff3, UPRV_LENGTHOF(scKoStndOff3), NULL },
2716 { scKoPat4, scKoStndOff45, UPRV_LENGTHOF(scKoStndOff45), NULL },
2717 { scKoPat5, scKoStndOff45, UPRV_LENGTHOF(scKoStndOff45), NULL },
2718 { NULL, NULL, 0, NULL }
2719 };
2720
2721 static const UChar scJaText[] = {
2722 /*00*/ 0x304D,0x305F,0x0020,0x30AD,0x30BF,0x0020, /* kita, hiragana and katakana */
2723 /*06*/ 0x304D,0x3060,0x0020,0x30AD,0x30C0,0x0020, /* kida, hiragana and katakana */
2724 /*12*/ 0x306F,0x306D,0x0020,0x30CF,0x30CD,0x0020, /* hane, hiragana and katakana */
2725 /*18*/ 0x3070,0x306D,0x0020,0x30D0,0x30CD,0x0020, /* bane, hiragana and katakana */
2726 /*24*/ 0x3071,0x306D,0x0020,0x30D1,0x30CD,0x0020, /* pane, hiragana and katakana */
2727 /*30*/ 0
2728 };
2729
2730 static const UChar scJaPatH0[] = { 0x304D,0x305F,0 }; /* kita, hiragana */
2731 static const UChar scJaPatK0[] = { 0x30AD,0x30BF,0 }; /* kita, katakana */
2732 static const UChar scJaPatH1[] = { 0x304D,0x3060,0 }; /* kida, hiragana */
2733 static const UChar scJaPatK1[] = { 0x30AD,0x30C0,0 }; /* kida, katakana */
2734 static const UChar scJaPatH2[] = { 0x306F,0x306D,0 }; /* hane, hiragana */
2735 static const UChar scJaPatK2[] = { 0x30CF,0x30CD,0 }; /* hane, katakana */
2736 static const UChar scJaPatH3[] = { 0x3070,0x306D,0 }; /* bane, hiragana */
2737 static const UChar scJaPatK3[] = { 0x30D0,0x30CD,0 }; /* bane, katakana */
2738 static const UChar scJaPatH4[] = { 0x3071,0x306D,0 }; /* pane, hiragana */
2739 static const UChar scJaPatK4[] = { 0x30D1,0x30CD,0 }; /* pane, katakana */
2740
2741 static const int32_t scJaStndOff01[] = { 0, 3, 6, 9 };
2742 static const int32_t scJaStndOff234[] = { 12, 15, 18, 21, 24, 27 };
2743
2744 static const int32_t scJaSrchOff0[] = { 0, 3 };
2745 static const int32_t scJaSrchOff1[] = { 6, 9 };
2746 static const int32_t scJaSrchOff2[] = { 12, 15 };
2747 static const int32_t scJaSrchOff3[] = { 18, 21 };
2748 static const int32_t scJaSrchOff4[] = { 24, 27 };
2749
2750 static const PatternAndOffsets scJaStndPatternsOffsets[] = {
2751 { scJaPatH0, scJaStndOff01, UPRV_LENGTHOF(scJaStndOff01), NULL },
2752 { scJaPatK0, scJaStndOff01, UPRV_LENGTHOF(scJaStndOff01), NULL },
2753 { scJaPatH1, scJaStndOff01, UPRV_LENGTHOF(scJaStndOff01), NULL },
2754 { scJaPatK1, scJaStndOff01, UPRV_LENGTHOF(scJaStndOff01), NULL },
2755 { scJaPatH2, scJaStndOff234, UPRV_LENGTHOF(scJaStndOff234), NULL },
2756 { scJaPatK2, scJaStndOff234, UPRV_LENGTHOF(scJaStndOff234), NULL },
2757 { scJaPatH3, scJaStndOff234, UPRV_LENGTHOF(scJaStndOff234), NULL },
2758 { scJaPatK3, scJaStndOff234, UPRV_LENGTHOF(scJaStndOff234), NULL },
2759 { scJaPatH4, scJaStndOff234, UPRV_LENGTHOF(scJaStndOff234), NULL },
2760 { scJaPatK4, scJaStndOff234, UPRV_LENGTHOF(scJaStndOff234), NULL },
2761 { NULL, NULL, 0, NULL }
2762 };
2763
2764 static const PatternAndOffsets scJaSrchPatternsOffsets[] = {
2765 { scJaPatH0, scJaSrchOff0, UPRV_LENGTHOF(scJaSrchOff0), NULL },
2766 { scJaPatK0, scJaSrchOff0, UPRV_LENGTHOF(scJaSrchOff0), NULL },
2767 { scJaPatH1, scJaSrchOff1, UPRV_LENGTHOF(scJaSrchOff1), NULL },
2768 { scJaPatK1, scJaSrchOff1, UPRV_LENGTHOF(scJaSrchOff1), NULL },
2769 { scJaPatH2, scJaSrchOff2, UPRV_LENGTHOF(scJaSrchOff2), NULL },
2770 { scJaPatK2, scJaSrchOff2, UPRV_LENGTHOF(scJaSrchOff2), NULL },
2771 { scJaPatH3, scJaSrchOff3, UPRV_LENGTHOF(scJaSrchOff3), NULL },
2772 { scJaPatK3, scJaSrchOff3, UPRV_LENGTHOF(scJaSrchOff3), NULL },
2773 { scJaPatH4, scJaSrchOff4, UPRV_LENGTHOF(scJaSrchOff4), NULL },
2774 { scJaPatK4, scJaSrchOff4, UPRV_LENGTHOF(scJaSrchOff4), NULL },
2775 { NULL, NULL, 0, NULL }
2776 };
2777
2778 static const UChar scModsText[] = {
2779 /*00*/ 0x0020,0xD83D,0xDC4D,
2780 /*03*/ 0x0020,0xD83D,0xDC4D,0xD83C,0xDFFC,
2781 /*08*/ 0x0020,0xD83D,0xDC4D,0xD83C,0xDFFF,
2782 /*13*/ 0x0020,0xD83D,0xDC4D,0x0300,
2783 /*17*/ 0x0020,0
2784 };
2785
2786 static const UChar scMods0[] = { 0xD83D,0xDC4D,0 }; /* hand with no mods */
2787 static const UChar scMods1[] = { 0xD83D,0xDC4D,0xD83C,0xDFFC,0 }; /* hand with fitz 3 */
2788 static const UChar scMods2[] = { 0xD83D,0xDC4D,0xD83C,0xDFFF,0 }; /* hand with fitz 6 */
2789 static const UChar scMods3[] = { 0xD83D,0xDC4D,0x0300,0 }; /* hand with grave */
2790
2791 static const int32_t scMods012[] = { 1, 4, 9, 14 };
2792 static const int32_t scModsLens012[] = { 2, 4, 4, 3 };
2793
2794 static const PatternAndOffsets scModsPatternsOffsets[] = {
2795 { scMods0, scMods012, UPRV_LENGTHOF(scMods012), scModsLens012 },
2796 { scMods1, scMods012, UPRV_LENGTHOF(scMods012), scModsLens012 },
2797 { scMods2, scMods012, UPRV_LENGTHOF(scMods012), scModsLens012 },
2798 { scMods3, scMods012, UPRV_LENGTHOF(scMods012), scModsLens012 },
2799 { NULL, NULL, 0, NULL }
2800 };
2801
2802 static const UChar scFlagText[] = {
2803 /*00*/ 0xD83C,0xDDF3,0xD83C,0xDDFF, /*NZ*/
2804 /*04*/ 0xD83C,0xDFF4,0xDB40,0xDC67,0xDB40,0xDC62,0xDB40,0xDC65,0xDB40,0xDC6E,0xDB40,0xDC67,0xDB40,0xDC7F, /*gbeng*/
2805 /*18*/ 0x0061,0x0062,0x0063,
2806 /*21*/ 0xD83C,0xDFF4,0xDB40,0xDC67,0xDB40,0xDC62,0xDB40,0xDC73,0xDB40,0xDC63,0xDB40,0xDC74,0xDB40,0xDC7F, /*gbsct*/
2807 /*35*/ 0xD83C,0xDFF4,0xDB40,0xDC67,0xDB40,0xDC62,0xDB40,0xDC77,0xDB40,0xDC6C,0xDB40,0xDC73,0xDB40,0xDC7F, /*gbwls*/
2808 /*49*/ 0x0020,0
2809 };
2810
2811 static const UChar scFlag1[] = { 0xD83C,0xDFF4,0xDB40,0xDC67,0xDB40,0xDC62,0xDB40,0xDC73,0xDB40,0xDC63,0xDB40,0xDC74,0xDB40,0xDC7F,0 }; /* gbsct */
2812 static const int32_t scOffFlag1[] = { 21 };
2813 static const int32_t scLenFlag1[] = { 14 };
2814
2815 static const PatternAndOffsets scFlagPatternsOffsets[] = {
2816 { scFlag1, scOffFlag1, UPRV_LENGTHOF(scOffFlag1), scLenFlag1 },
2817 { NULL, NULL, 0, NULL }
2818 };
2819
2820
2821 typedef struct {
2822 const char * locale;
2823 const UChar * text;
2824 const PatternAndOffsets * patternsAndOffsets;
2825 } TUSCItem;
2826
2827 static const TUSCItem tuscItems[] = {
2828 { "root", scKoText, scKoStndPatternsOffsets },
2829 { "root@collation=search", scKoText, scKoSrchPatternsOffsets },
2830 { "ko@collation=search", scKoText, scKoSrchPatternsOffsets },
2831 { "root@colStrength=primary", scJaText, scJaStndPatternsOffsets },
2832 { "root@collation=search;colStrength=primary", scJaText, scJaSrchPatternsOffsets },
2833 { "ja@colStrength=primary", scJaText, scJaStndPatternsOffsets },
2834 { "ja@collation=search;colStrength=primary", scJaText, scJaSrchPatternsOffsets },
2835 { "root@collation=search;colStrength=primary", scModsText, scModsPatternsOffsets },
2836 { "root@collation=search;colStrength=primary", scFlagText, scFlagPatternsOffsets },
2837 { NULL, NULL, NULL }
2838 };
2839
2840 static const UChar dummyPat[] = { 0x0061, 0 };
2841
2842 static void TestUsingSearchCollator(void)
2843 {
2844 const TUSCItem * tuscItemPtr;
2845 for (tuscItemPtr = tuscItems; tuscItemPtr->locale != NULL; tuscItemPtr++) {
2846 UErrorCode status = U_ZERO_ERROR;
2847 UCollator* ucol = ucol_open(tuscItemPtr->locale, &status);
2848 if ( U_SUCCESS(status) ) {
2849 UStringSearch* usrch = usearch_openFromCollator(dummyPat, -1, tuscItemPtr->text, -1, ucol, NULL, &status);
2850 if ( U_SUCCESS(status) ) {
2851 const PatternAndOffsets * patternsOffsetsPtr;
2852 for ( patternsOffsetsPtr = tuscItemPtr->patternsAndOffsets; patternsOffsetsPtr->pattern != NULL; patternsOffsetsPtr++) {
2853 int32_t patLen = u_strlen(patternsOffsetsPtr->pattern);
2854 usearch_setPattern(usrch, patternsOffsetsPtr->pattern, patLen, &status);
2855 if ( U_SUCCESS(status) ) {
2856 int32_t offset;
2857 const int32_t * nextOffsetPtr;
2858 const int32_t * limitOffsetPtr;
2859 const int32_t * nextMatchLenPtr;
2860
2861 usearch_reset(usrch);
2862 nextOffsetPtr = patternsOffsetsPtr->offsets;
2863 limitOffsetPtr = patternsOffsetsPtr->offsets + patternsOffsetsPtr->offsetsLen;
2864 nextMatchLenPtr = patternsOffsetsPtr->matchLens;
2865 while (TRUE) {
2866 offset = usearch_next(usrch, &status);
2867 if ( U_FAILURE(status) || offset == USEARCH_DONE ) {
2868 break;
2869 }
2870 if ( nextOffsetPtr < limitOffsetPtr ) {
2871 if (offset != *nextOffsetPtr) {
2872 log_err("error, locale %s, patn (%d) %04X %04X..., expected usearch_next %d, got %d\n",
2873 tuscItemPtr->locale, patLen, patternsOffsetsPtr->pattern[0], patternsOffsetsPtr->pattern[1], *nextOffsetPtr, offset);
2874 nextOffsetPtr = limitOffsetPtr;
2875 break;
2876 } else if (nextMatchLenPtr != NULL) {
2877 int32_t matchLen = usearch_getMatchedLength(usrch);
2878 if (matchLen != *nextMatchLenPtr) {
2879 log_err("error, locale %s, patn (%d) %04X %04X..., offset %d, expected matchLen %d, got %d\n",
2880 tuscItemPtr->locale, patLen, patternsOffsetsPtr->pattern[0], patternsOffsetsPtr->pattern[1], offset, *nextMatchLenPtr, matchLen);
2881 }
2882 nextMatchLenPtr++;
2883 }
2884 nextOffsetPtr++;
2885 } else {
2886 log_err("error, locale %s, patn (%d) %04X %04X..., usearch_next returned more matches than expected\n",
2887 tuscItemPtr->locale, patLen, patternsOffsetsPtr->pattern[0], patternsOffsetsPtr->pattern[1] );
2888 }
2889 }
2890 if ( U_FAILURE(status) ) {
2891 log_err("error, locale %s, patn (%d) %04X %04X..., usearch_next failed: %s\n",
2892 tuscItemPtr->locale, patLen, patternsOffsetsPtr->pattern[0], patternsOffsetsPtr->pattern[1], u_errorName(status) );
2893 } else if ( nextOffsetPtr < limitOffsetPtr ) {
2894 log_err("error, locale %s, patn (%d) %04X %04X..., usearch_next returned fewer matches than expected\n",
2895 tuscItemPtr->locale, patLen, patternsOffsetsPtr->pattern[0], patternsOffsetsPtr->pattern[1] );
2896 }
2897
2898 status = U_ZERO_ERROR;
2899 usearch_reset(usrch);
2900 nextOffsetPtr = patternsOffsetsPtr->offsets + patternsOffsetsPtr->offsetsLen;
2901 limitOffsetPtr = patternsOffsetsPtr->offsets;
2902 while (TRUE) {
2903 offset = usearch_previous(usrch, &status);
2904 if ( U_FAILURE(status) || offset == USEARCH_DONE ) {
2905 break;
2906 }
2907 if ( nextOffsetPtr > limitOffsetPtr ) {
2908 nextOffsetPtr--;
2909 if (offset != *nextOffsetPtr) {
2910 log_err("error, locale %s, patn (%d) %04X %04X..., expected usearch_previous %d, got %d\n",
2911 tuscItemPtr->locale, patLen, patternsOffsetsPtr->pattern[0], patternsOffsetsPtr->pattern[1], *nextOffsetPtr, offset);
2912 nextOffsetPtr = limitOffsetPtr;
2913 break;
2914 }
2915 } else {
2916 log_err("error, locale %s, patn (%d) %04X %04X..., usearch_previous returned more matches than expected\n",
2917 tuscItemPtr->locale, patLen, patternsOffsetsPtr->pattern[0], patternsOffsetsPtr->pattern[1] );
2918 }
2919 }
2920 if ( U_FAILURE(status) ) {
2921 log_err("error, locale %s, patn (%d) %04X %04X..., usearch_previous failed: %s\n",
2922 tuscItemPtr->locale, patLen, patternsOffsetsPtr->pattern[0], patternsOffsetsPtr->pattern[1], u_errorName(status) );
2923 } else if ( nextOffsetPtr > limitOffsetPtr ) {
2924 log_err("error, locale %s, patn (%d) %04X %04X..., usearch_previous returned fewer matches than expected\n",
2925 tuscItemPtr->locale, patLen, patternsOffsetsPtr->pattern[0], patternsOffsetsPtr->pattern[1] );
2926 }
2927
2928 } else {
2929 log_err("error, locale %s, usearch_setPattern failed: %s\n",
2930 tuscItemPtr->locale, u_errorName(status) );
2931 }
2932 }
2933 usearch_close(usrch);
2934 } else {
2935 log_err("error, locale %s, usearch_openFromCollator failed: %s\n", tuscItemPtr->locale, u_errorName(status) );
2936 }
2937 ucol_close(ucol);
2938 } else {
2939 log_data_err("error, locale %s, ucol_open failed: %s\n", tuscItemPtr->locale, u_errorName(status) );
2940 }
2941 }
2942 }
2943
2944
2945 static void TestPCEBuffer_with(const UChar *search, uint32_t searchLen, const UChar *source, uint32_t sourceLen) {
2946 UErrorCode icuStatus = U_ZERO_ERROR;
2947 UCollator *coll;
2948 const char *locale;
2949 UBreakIterator *ubrk;
2950 UStringSearch *usearch;
2951 int32_t match = 0;
2952
2953
2954 coll = ucol_openFromShortString( "LSK_AS_CX_EX_FX_HX_NX_S4",
2955 FALSE,
2956 NULL,
2957 &icuStatus );
2958 if ( U_FAILURE(icuStatus) )
2959 {
2960 log_data_err( "ucol_openFromShortString error %s\n" , u_errorName(icuStatus));
2961 goto exit;
2962 }
2963
2964 locale = ucol_getLocaleByType( coll,
2965 ULOC_VALID_LOCALE,
2966 &icuStatus );
2967 if ( U_FAILURE(icuStatus) )
2968 {
2969 log_err( "ucol_getLocaleByType error %s\n", u_errorName(icuStatus) );
2970 goto exit;
2971 }
2972
2973 log_verbose("locale=%s\n", locale);
2974
2975 ubrk = ubrk_open( UBRK_CHARACTER,
2976 locale,
2977 source,
2978 sourceLen,
2979 &icuStatus );
2980 if ( U_FAILURE(icuStatus) )
2981 {
2982 log_err( "ubrk_open error %s\n", u_errorName(icuStatus) );
2983 goto exit;
2984 }
2985
2986 usearch = usearch_openFromCollator( search,
2987 searchLen,
2988 source,
2989 sourceLen,
2990 coll,
2991 ubrk,
2992 &icuStatus );
2993 if ( U_FAILURE(icuStatus) )
2994 {
2995 log_err( "usearch_openFromCollator error %s\n", u_errorName(icuStatus) );
2996 goto exit;
2997 }
2998
2999 match = usearch_first( usearch,
3000 &icuStatus );
3001 if ( U_FAILURE(icuStatus) )
3002 {
3003 log_err( "usearch_first error %s\n", u_errorName(icuStatus) );
3004 goto exit;
3005 }
3006
3007 if(match==0) {
3008 log_verbose("OK: match=%d\n", match);
3009 } else {
3010 log_err("Err: match expected 0 got %d\n", match);
3011 }
3012
3013 usearch_close(usearch);
3014 ubrk_close(ubrk);
3015 ucol_close(coll);
3016
3017 exit:
3018 return;
3019 }
3020
3021
3022 static void TestPCEBuffer_100df(void) {
3023 UChar search[] =
3024 { 0x0020, 0x0020, 0x00df, 0x0020, 0x0041, 0x00df, 0x0020, 0x0061, 0x00df, 0x0020, 0x00c5, 0x00df, 0x0020, 0x212b, 0x00df, 0x0020, 0x0041, 0x030a, 0x00df, 0x0020, 0x00e5, 0x00df, 0x0020, 0x0061, 0x02da, 0x00df, 0x0020, 0x0061, 0x030a, 0x00df, 0x0020, 0xd8fa, 0xdeae, 0x00df, 0x0020, 0x2027, 0x00df }; /* 38 cp, 9 of them unpaired surrogates */
3025 UChar source[] =
3026 { 0x0020, 0x0020, 0x00df, 0x0020, 0x0041, 0x00df, 0x0020, 0x0061, 0x00df, 0x0020, 0x00c5, 0x00df, 0x0020, 0x212b, 0x00df, 0x0020, 0x0041, 0x030a, 0x00df, 0x0020, 0x00e5, 0x00df, 0x0020, 0x0061, 0x02da, 0x00df, 0x0020, 0x0061, 0x030a, 0x00df, 0x0020, 0xd8fa, 0xdeae, 0x00df, 0x0020, 0x2027, 0x00df };
3027 uint32_t searchLen = UPRV_LENGTHOF(search);
3028 uint32_t sourceLen = UPRV_LENGTHOF(source);
3029 TestPCEBuffer_with(search,searchLen,source,sourceLen);
3030 }
3031
3032
3033 static void TestPCEBuffer_2surr(void) {
3034 UChar search[] =
3035 { 0x0020, 0x0020, 0xdfff, 0x0020, 0x0041, 0xdfff, 0x0020, 0x0061, 0xdfff, 0x0020, 0x00c5, 0xdfff, 0x0020, 0x212b, 0xdfff, 0x0020, 0x0041, 0x030a, 0xdfff, 0x0020, 0x00e5, 0xdfff, 0x0020, 0x0061, 0x02da, 0xdfff, 0x0020, 0x0061, 0x030a, 0xdfff, 0x0020, 0xd8fa, 0xdeae, 0xdfff, 0x0020, 0x2027, 0xdfff }; /* 38 cp, 9 of them unpaired surrogates */
3036 UChar source[] =
3037 { 0x0020, 0x0020, 0xdfff, 0x0020, 0x0041, 0xdfff, 0x0020, 0x0061, 0xdfff, 0x0020, 0x00c5, 0xdfff, 0x0020, 0x212b, 0xdfff, 0x0020, 0x0041, 0x030a, 0xdfff, 0x0020, 0x00e5, 0xdfff, 0x0020, 0x0061, 0x02da, 0xdfff, 0x0020, 0x0061, 0x030a, 0xdfff, 0x0020, 0xd8fa, 0xdeae, 0xdfff, 0x0020, 0x2027, 0xdfff };
3038 uint32_t searchLen = UPRV_LENGTHOF(search);
3039 uint32_t sourceLen = UPRV_LENGTHOF(source);
3040 TestPCEBuffer_with(search,searchLen,source,sourceLen);
3041 }
3042
3043 static void TestMatchFollowedByIgnorables(void) {
3044 /* test case for ticket#8482 */
3045 UChar search[] = { 0x00c9 };
3046 UChar source[] = { 0x00c9, 0x0000, 0x0041 };
3047 int32_t searchLen;
3048 int32_t sourceLen;
3049 UErrorCode icuStatus = U_ZERO_ERROR;
3050 UCollator *coll;
3051 const char *locale;
3052 UBreakIterator *ubrk;
3053 UStringSearch *usearch;
3054 int32_t match = 0;
3055 int32_t matchLength = 0;
3056 const int32_t expectedMatchLength = 1;
3057
3058 searchLen = UPRV_LENGTHOF(search);
3059 sourceLen = UPRV_LENGTHOF(source);
3060
3061 coll = ucol_openFromShortString("LHR_AN_CX_EX_FX_HX_NX_S3",
3062 FALSE,
3063 NULL,
3064 &icuStatus);
3065 if (U_FAILURE(icuStatus)) {
3066 log_data_err("ucol_openFromShortString error - %s\n", u_errorName(icuStatus));
3067 }
3068
3069 locale = ucol_getLocaleByType(coll,
3070 ULOC_VALID_LOCALE,
3071 &icuStatus);
3072 if (U_FAILURE(icuStatus)) {
3073 log_data_err("ucol_getLocaleByType error - %s\n", u_errorName(icuStatus));
3074 }
3075
3076 ubrk = ubrk_open(UBRK_CHARACTER,
3077 locale,
3078 source,
3079 sourceLen,
3080 &icuStatus);
3081 if (U_FAILURE(icuStatus)) {
3082 log_data_err("ubrk_open error - %s\n", u_errorName(icuStatus));
3083 }
3084
3085 usearch = usearch_openFromCollator(search,
3086 searchLen,
3087 source,
3088 sourceLen,
3089 coll,
3090 ubrk,
3091 &icuStatus);
3092 if (U_FAILURE(icuStatus)) {
3093 log_data_err("usearch_openFromCollator error - %s\n", u_errorName(icuStatus));
3094 }
3095
3096 match = usearch_first(usearch,
3097 &icuStatus);
3098 if (U_FAILURE(icuStatus)) {
3099 log_data_err("usearch_first error - %s\n", u_errorName(icuStatus));
3100 } else {
3101
3102 log_verbose("match=%d\n", match);
3103
3104 matchLength = usearch_getMatchedLength(usearch);
3105
3106 if (matchLength != expectedMatchLength) {
3107 log_err("Error: matchLength=%d, expected=%d\n", matchLength, expectedMatchLength);
3108 }
3109 }
3110
3111 usearch_close(usearch);
3112 ubrk_close(ubrk);
3113 ucol_close(coll);
3114 }
3115
3116 static void TestIndicPrefixMatch(void)
3117 {
3118 int count = 0;
3119 UErrorCode status = U_ZERO_ERROR;
3120 open(&status);
3121 if (U_FAILURE(status)) {
3122 log_err_status(status, "Unable to open static collators %s\n", u_errorName(status));
3123 return;
3124 }
3125 while (INDICPREFIXMATCH[count].text != NULL) {
3126 if (!assertEqual(INDICPREFIXMATCH[count])) {
3127 log_err("Error at test number %d\n", count);
3128 }
3129 count ++;
3130 }
3131 close();
3132 }
3133
3134 /**
3135 * addSearchTest
3136 */
3137
3138 void addSearchTest(TestNode** root)
3139 {
3140 addTest(root, &TestStart, "tscoll/usrchtst/TestStart");
3141 addTest(root, &TestOpenClose, "tscoll/usrchtst/TestOpenClose");
3142 addTest(root, &TestInitialization, "tscoll/usrchtst/TestInitialization");
3143 addTest(root, &TestBasic, "tscoll/usrchtst/TestBasic");
3144 addTest(root, &TestNormExact, "tscoll/usrchtst/TestNormExact");
3145 addTest(root, &TestStrength, "tscoll/usrchtst/TestStrength");
3146 addTest(root, &TestBreakIterator, "tscoll/usrchtst/TestBreakIterator");
3147 addTest(root, &TestVariable, "tscoll/usrchtst/TestVariable");
3148 addTest(root, &TestOverlap, "tscoll/usrchtst/TestOverlap");
3149 addTest(root, &TestCollator, "tscoll/usrchtst/TestCollator");
3150 addTest(root, &TestPattern, "tscoll/usrchtst/TestPattern");
3151 addTest(root, &TestText, "tscoll/usrchtst/TestText");
3152 addTest(root, &TestCompositeBoundaries,
3153 "tscoll/usrchtst/TestCompositeBoundaries");
3154 addTest(root, &TestGetSetOffset, "tscoll/usrchtst/TestGetSetOffset");
3155 addTest(root, &TestGetSetAttribute,
3156 "tscoll/usrchtst/TestGetSetAttribute");
3157 addTest(root, &TestGetMatch, "tscoll/usrchtst/TestGetMatch");
3158 addTest(root, &TestSetMatch, "tscoll/usrchtst/TestSetMatch");
3159 addTest(root, &TestReset, "tscoll/usrchtst/TestReset");
3160 addTest(root, &TestSupplementary, "tscoll/usrchtst/TestSupplementary");
3161 addTest(root, &TestContraction, "tscoll/usrchtst/TestContraction");
3162 addTest(root, &TestIgnorable, "tscoll/usrchtst/TestIgnorable");
3163 addTest(root, &TestCanonical, "tscoll/usrchtst/TestCanonical");
3164 addTest(root, &TestNormCanonical, "tscoll/usrchtst/TestNormCanonical");
3165 addTest(root, &TestStrengthCanonical,
3166 "tscoll/usrchtst/TestStrengthCanonical");
3167 addTest(root, &TestBreakIteratorCanonical,
3168 "tscoll/usrchtst/TestBreakIteratorCanonical");
3169 addTest(root, &TestVariableCanonical,
3170 "tscoll/usrchtst/TestVariableCanonical");
3171 addTest(root, &TestOverlapCanonical,
3172 "tscoll/usrchtst/TestOverlapCanonical");
3173 addTest(root, &TestCollatorCanonical,
3174 "tscoll/usrchtst/TestCollatorCanonical");
3175 addTest(root, &TestPatternCanonical,
3176 "tscoll/usrchtst/TestPatternCanonical");
3177 addTest(root, &TestTextCanonical, "tscoll/usrchtst/TestTextCanonical");
3178 addTest(root, &TestCompositeBoundariesCanonical,
3179 "tscoll/usrchtst/TestCompositeBoundariesCanonical");
3180 addTest(root, &TestGetSetOffsetCanonical,
3181 "tscoll/usrchtst/TestGetSetOffsetCanonical");
3182 addTest(root, &TestSupplementaryCanonical,
3183 "tscoll/usrchtst/TestSupplementaryCanonical");
3184 addTest(root, &TestContractionCanonical,
3185 "tscoll/usrchtst/TestContractionCanonical");
3186 addTest(root, &TestEnd, "tscoll/usrchtst/TestEnd");
3187 addTest(root, &TestNumeric, "tscoll/usrchtst/TestNumeric");
3188 addTest(root, &TestDiacriticMatch, "tscoll/usrchtst/TestDiacriticMatch");
3189 addTest(root, &TestForwardBackward, "tscoll/usrchtst/TestForwardBackward");
3190 addTest(root, &TestSearchForNull, "tscoll/usrchtst/TestSearchForNull");
3191 addTest(root, &TestStrengthIdentical, "tscoll/usrchtst/TestStrengthIdentical");
3192 addTest(root, &TestUsingSearchCollator, "tscoll/usrchtst/TestUsingSearchCollator");
3193 addTest(root, &TestPCEBuffer_100df, "tscoll/usrchtst/TestPCEBuffer/1_00df");
3194 addTest(root, &TestPCEBuffer_2surr, "tscoll/usrchtst/TestPCEBuffer/2_dfff");
3195 addTest(root, &TestMatchFollowedByIgnorables, "tscoll/usrchtst/TestMatchFollowedByIgnorables");
3196 addTest(root, &TestIndicPrefixMatch, "tscoll/usrchtst/TestIndicPrefixMatch");
3197 }
3198
3199 #endif /* #if !UCONFIG_NO_COLLATION */