]>
Commit | Line | Data |
---|---|---|
374ca955 A |
1 | /******************************************************************** |
2 | * COPYRIGHT: | |
3 | * Copyright (c) 2004, International Business Machines Corporation and | |
4 | * others. All Rights Reserved. | |
5 | ********************************************************************/ | |
6 | /******************************************************************************** | |
7 | * | |
8 | * File reapits.c | |
9 | * | |
10 | *********************************************************************************/ | |
11 | /*C API TEST FOR Regular Expressions */ | |
12 | /** | |
13 | * This is an API test for ICU regular expressions in C. It doesn't test very many cases, and doesn't | |
14 | * try to test the full functionality. It just calls each function and verifies that it | |
15 | * works on a basic level. | |
16 | * | |
17 | * More complete testing of regular expression functionality is done with the C++ tests. | |
18 | **/ | |
19 | ||
20 | #include "unicode/utypes.h" | |
21 | ||
22 | #if !UCONFIG_NO_REGULAR_EXPRESSIONS | |
23 | ||
24 | #include <stdlib.h> | |
25 | #include <string.h> | |
26 | #include "unicode/uloc.h" | |
27 | #include "unicode/uregex.h" | |
28 | #include "unicode/ustring.h" | |
29 | #include "cintltst.h" | |
30 | ||
31 | #define TEST_ASSERT_SUCCESS(status) {if (U_FAILURE(status)) { \ | |
32 | log_err("Failure at file %s, line %d, error = %s\n", __FILE__, __LINE__, u_errorName(status));}} | |
33 | ||
34 | #define TEST_ASSERT(expr) {if ((expr)==FALSE) { \ | |
35 | log_err("Test Failure at file %s, line %d\n", __FILE__, __LINE__);}} | |
36 | ||
37 | #define TEST_ASSERT_STRING(expected, actual, nulTerm) { \ | |
38 | char buf_inside_macro[120]; \ | |
39 | int32_t len = (int32_t)strlen(expected); \ | |
40 | UBool success; \ | |
41 | if (nulTerm) { \ | |
42 | u_austrncpy(buf_inside_macro, (actual), len+1); \ | |
43 | success = (strcmp((expected), buf_inside_macro) == 0); \ | |
44 | } else { \ | |
45 | u_austrncpy(buf_inside_macro, (actual), len); \ | |
46 | success = (strncmp((expected), buf_inside_macro, len) == 0); \ | |
47 | } \ | |
48 | if (success == FALSE) { \ | |
49 | log_err("Failure at file %s, line %d, expected \"%s\", got \"%s\"\n", \ | |
50 | __FILE__, __LINE__, (expected), buf_inside_macro); \ | |
51 | } \ | |
52 | } | |
53 | ||
54 | ||
55 | ||
56 | ||
57 | ||
58 | ||
59 | static void TestRegexCAPI(void); | |
60 | ||
61 | void addURegexTest(TestNode** root); | |
62 | ||
63 | void addURegexTest(TestNode** root) | |
64 | { | |
65 | addTest(root, &TestRegexCAPI, "regex/TestRegexCAPI"); | |
66 | /* addTest(root, &TestBreakIteratorSafeClone, "tstxtbd/cbiapts/TestBreakIteratorSafeClone"); */ | |
67 | } | |
68 | ||
69 | ||
70 | static void TestRegexCAPI(void) { | |
71 | UErrorCode status = U_ZERO_ERROR; | |
72 | URegularExpression *re; | |
73 | UChar pat[200]; | |
74 | UChar *minus1; | |
75 | ||
76 | memset(&minus1, -1, sizeof(minus1)); | |
77 | ||
78 | /* Mimimalist open/close */ | |
79 | u_uastrncpy(pat, "abc*", sizeof(pat)/2); | |
80 | re = uregex_open(pat, -1, 0, 0, &status); | |
81 | TEST_ASSERT_SUCCESS(status); | |
82 | uregex_close(re); | |
83 | ||
84 | /* Open with all flag values set */ | |
85 | status = U_ZERO_ERROR; | |
86 | re = uregex_open(pat, -1, | |
87 | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD, | |
88 | 0, &status); | |
89 | TEST_ASSERT_SUCCESS(status); | |
90 | uregex_close(re); | |
91 | ||
92 | /* Open with an invalid flag */ | |
93 | status = U_ZERO_ERROR; | |
94 | re = uregex_open(pat, -1, 0x40000000, 0, &status); | |
95 | TEST_ASSERT(status == U_REGEX_INVALID_FLAG); | |
96 | uregex_close(re); | |
97 | ||
98 | ||
99 | /* openC open from a C string */ | |
100 | { | |
101 | const UChar *p; | |
102 | int32_t len; | |
103 | status = U_ZERO_ERROR; | |
104 | re = uregex_openC("abc*", 0, 0, &status); | |
105 | TEST_ASSERT_SUCCESS(status); | |
106 | p = uregex_pattern(re, &len, &status); | |
107 | TEST_ASSERT_SUCCESS(status); | |
108 | u_uastrncpy(pat, "abc*", sizeof(pat)/2); | |
109 | TEST_ASSERT(u_strcmp(pat, p) == 0); | |
110 | TEST_ASSERT(len==(int32_t)strlen("abc*")); | |
111 | ||
112 | uregex_close(re); | |
113 | ||
114 | /* TODO: Open with ParseError parameter */ | |
115 | } | |
116 | ||
117 | /* | |
118 | * clone | |
119 | */ | |
120 | { | |
121 | URegularExpression *clone1; | |
122 | URegularExpression *clone2; | |
123 | URegularExpression *clone3; | |
124 | UChar testString1[30]; | |
125 | UChar testString2[30]; | |
126 | UBool result; | |
127 | ||
128 | ||
129 | status = U_ZERO_ERROR; | |
130 | re = uregex_openC("abc*", 0, 0, &status); | |
131 | TEST_ASSERT_SUCCESS(status); | |
132 | clone1 = uregex_clone(re, &status); | |
133 | TEST_ASSERT_SUCCESS(status); | |
134 | TEST_ASSERT(clone1 != NULL); | |
135 | ||
136 | status = U_ZERO_ERROR; | |
137 | clone2 = uregex_clone(re, &status); | |
138 | TEST_ASSERT_SUCCESS(status); | |
139 | TEST_ASSERT(clone2 != NULL); | |
140 | uregex_close(re); | |
141 | ||
142 | status = U_ZERO_ERROR; | |
143 | clone3 = uregex_clone(clone2, &status); | |
144 | TEST_ASSERT_SUCCESS(status); | |
145 | TEST_ASSERT(clone3 != NULL); | |
146 | ||
147 | u_uastrncpy(testString1, "abcccd", sizeof(pat)/2); | |
148 | u_uastrncpy(testString2, "xxxabcccd", sizeof(pat)/2); | |
149 | ||
150 | status = U_ZERO_ERROR; | |
151 | uregex_setText(clone1, testString1, -1, &status); | |
152 | TEST_ASSERT_SUCCESS(status); | |
153 | result = uregex_lookingAt(clone1, 0, &status); | |
154 | TEST_ASSERT_SUCCESS(status); | |
155 | TEST_ASSERT(result==TRUE); | |
156 | ||
157 | status = U_ZERO_ERROR; | |
158 | uregex_setText(clone2, testString2, -1, &status); | |
159 | TEST_ASSERT_SUCCESS(status); | |
160 | result = uregex_lookingAt(clone2, 0, &status); | |
161 | TEST_ASSERT_SUCCESS(status); | |
162 | TEST_ASSERT(result==FALSE); | |
163 | result = uregex_find(clone2, 0, &status); | |
164 | TEST_ASSERT_SUCCESS(status); | |
165 | TEST_ASSERT(result==TRUE); | |
166 | ||
167 | uregex_close(clone1); | |
168 | uregex_close(clone2); | |
169 | uregex_close(clone3); | |
170 | ||
171 | } | |
172 | ||
173 | /* | |
174 | * pattern() | |
175 | */ | |
176 | { | |
177 | const UChar *resultPat; | |
178 | int32_t resultLen; | |
179 | u_uastrncpy(pat, "hello", sizeof(pat)/2); | |
180 | status = U_ZERO_ERROR; | |
181 | re = uregex_open(pat, -1, 0, NULL, &status); | |
182 | resultPat = uregex_pattern(re, &resultLen, &status); | |
183 | TEST_ASSERT_SUCCESS(status); | |
184 | TEST_ASSERT(resultLen == -1); | |
185 | TEST_ASSERT(u_strcmp(resultPat, pat) == 0); | |
186 | uregex_close(re); | |
187 | ||
188 | status = U_ZERO_ERROR; | |
189 | re = uregex_open(pat, 3, 0, NULL, &status); | |
190 | resultPat = uregex_pattern(re, &resultLen, &status); | |
191 | TEST_ASSERT_SUCCESS(status); | |
192 | TEST_ASSERT(resultLen == 3); | |
193 | TEST_ASSERT(u_strncmp(resultPat, pat, 3) == 0); | |
194 | TEST_ASSERT(u_strlen(resultPat) == 3); | |
195 | uregex_close(re); | |
196 | } | |
197 | ||
198 | /* | |
199 | * flags() | |
200 | */ | |
201 | { | |
202 | int32_t t; | |
203 | ||
204 | status = U_ZERO_ERROR; | |
205 | re = uregex_open(pat, -1, 0, NULL, &status); | |
206 | t = uregex_flags(re, &status); | |
207 | TEST_ASSERT_SUCCESS(status); | |
208 | TEST_ASSERT(t == 0); | |
209 | uregex_close(re); | |
210 | ||
211 | status = U_ZERO_ERROR; | |
212 | re = uregex_open(pat, -1, 0, NULL, &status); | |
213 | t = uregex_flags(re, &status); | |
214 | TEST_ASSERT_SUCCESS(status); | |
215 | TEST_ASSERT(t == 0); | |
216 | uregex_close(re); | |
217 | ||
218 | status = U_ZERO_ERROR; | |
219 | re = uregex_open(pat, -1, UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL, NULL, &status); | |
220 | t = uregex_flags(re, &status); | |
221 | TEST_ASSERT_SUCCESS(status); | |
222 | TEST_ASSERT(t == (UREGEX_CASE_INSENSITIVE | UREGEX_DOTALL)); | |
223 | uregex_close(re); | |
224 | } | |
225 | ||
226 | /* | |
227 | * setText() and lookingAt() | |
228 | */ | |
229 | { | |
230 | UChar text1[50]; | |
231 | UChar text2[50]; | |
232 | UBool result; | |
233 | ||
234 | u_uastrncpy(text1, "abcccd", sizeof(text1)/2); | |
235 | u_uastrncpy(text2, "abcccxd", sizeof(text2)/2); | |
236 | status = U_ZERO_ERROR; | |
237 | u_uastrncpy(pat, "abc*d", sizeof(pat)/2); | |
238 | re = uregex_open(pat, -1, 0, NULL, &status); | |
239 | TEST_ASSERT_SUCCESS(status); | |
240 | ||
241 | /* Operation before doing a setText should fail... */ | |
242 | status = U_ZERO_ERROR; | |
243 | uregex_lookingAt(re, 0, &status); | |
244 | TEST_ASSERT( status== U_REGEX_INVALID_STATE); | |
245 | ||
246 | status = U_ZERO_ERROR; | |
247 | uregex_setText(re, text1, -1, &status); | |
248 | result = uregex_lookingAt(re, 0, &status); | |
249 | TEST_ASSERT(result == TRUE); | |
250 | TEST_ASSERT_SUCCESS(status); | |
251 | ||
252 | status = U_ZERO_ERROR; | |
253 | uregex_setText(re, text2, -1, &status); | |
254 | result = uregex_lookingAt(re, 0, &status); | |
255 | TEST_ASSERT(result == FALSE); | |
256 | TEST_ASSERT_SUCCESS(status); | |
257 | ||
258 | status = U_ZERO_ERROR; | |
259 | uregex_setText(re, text1, -1, &status); | |
260 | result = uregex_lookingAt(re, 0, &status); | |
261 | TEST_ASSERT(result == TRUE); | |
262 | TEST_ASSERT_SUCCESS(status); | |
263 | ||
264 | status = U_ZERO_ERROR; | |
265 | uregex_setText(re, text1, 5, &status); | |
266 | result = uregex_lookingAt(re, 0, &status); | |
267 | TEST_ASSERT(result == FALSE); | |
268 | TEST_ASSERT_SUCCESS(status); | |
269 | ||
270 | status = U_ZERO_ERROR; | |
271 | uregex_setText(re, text1, 6, &status); | |
272 | result = uregex_lookingAt(re, 0, &status); | |
273 | TEST_ASSERT(result == TRUE); | |
274 | TEST_ASSERT_SUCCESS(status); | |
275 | ||
276 | uregex_close(re); | |
277 | } | |
278 | ||
279 | ||
280 | /* | |
281 | * getText() | |
282 | */ | |
283 | { | |
284 | UChar text1[50]; | |
285 | UChar text2[50]; | |
286 | const UChar *result; | |
287 | int32_t textLength; | |
288 | ||
289 | u_uastrncpy(text1, "abcccd", sizeof(text1)/2); | |
290 | u_uastrncpy(text2, "abcccxd", sizeof(text2)/2); | |
291 | status = U_ZERO_ERROR; | |
292 | u_uastrncpy(pat, "abc*d", sizeof(pat)/2); | |
293 | re = uregex_open(pat, -1, 0, NULL, &status); | |
294 | ||
295 | uregex_setText(re, text1, -1, &status); | |
296 | result = uregex_getText(re, &textLength, &status); | |
297 | TEST_ASSERT(result == text1); | |
298 | TEST_ASSERT(textLength == -1); | |
299 | TEST_ASSERT_SUCCESS(status); | |
300 | ||
301 | status = U_ZERO_ERROR; | |
302 | uregex_setText(re, text2, 7, &status); | |
303 | result = uregex_getText(re, &textLength, &status); | |
304 | TEST_ASSERT(result == text2); | |
305 | TEST_ASSERT(textLength == 7); | |
306 | TEST_ASSERT_SUCCESS(status); | |
307 | ||
308 | status = U_ZERO_ERROR; | |
309 | uregex_setText(re, text2, 4, &status); | |
310 | result = uregex_getText(re, &textLength, &status); | |
311 | TEST_ASSERT(result == text2); | |
312 | TEST_ASSERT(textLength == 4); | |
313 | TEST_ASSERT_SUCCESS(status); | |
314 | uregex_close(re); | |
315 | } | |
316 | ||
317 | /* | |
318 | * matches() | |
319 | */ | |
320 | { | |
321 | UChar text1[50]; | |
322 | UBool result; | |
323 | int len; | |
324 | UChar nullString[] = {0,0,0}; | |
325 | ||
326 | u_uastrncpy(text1, "abcccde", sizeof(text1)/2); | |
327 | status = U_ZERO_ERROR; | |
328 | u_uastrncpy(pat, "abc*d", sizeof(pat)/2); | |
329 | re = uregex_open(pat, -1, 0, NULL, &status); | |
330 | ||
331 | uregex_setText(re, text1, -1, &status); | |
332 | result = uregex_matches(re, 0, &status); | |
333 | TEST_ASSERT(result == FALSE); | |
334 | TEST_ASSERT_SUCCESS(status); | |
335 | ||
336 | status = U_ZERO_ERROR; | |
337 | uregex_setText(re, text1, 6, &status); | |
338 | result = uregex_matches(re, 0, &status); | |
339 | TEST_ASSERT(result == TRUE); | |
340 | TEST_ASSERT_SUCCESS(status); | |
341 | ||
342 | status = U_ZERO_ERROR; | |
343 | uregex_setText(re, text1, 6, &status); | |
344 | result = uregex_matches(re, 1, &status); | |
345 | TEST_ASSERT(result == FALSE); | |
346 | TEST_ASSERT_SUCCESS(status); | |
347 | uregex_close(re); | |
348 | ||
349 | status = U_ZERO_ERROR; | |
350 | re = uregex_openC(".?", 0, NULL, &status); | |
351 | uregex_setText(re, text1, -1, &status); | |
352 | len = u_strlen(text1); | |
353 | result = uregex_matches(re, len, &status); | |
354 | TEST_ASSERT(result == TRUE); | |
355 | TEST_ASSERT_SUCCESS(status); | |
356 | ||
357 | status = U_ZERO_ERROR; | |
358 | uregex_setText(re, nullString, -1, &status); | |
359 | TEST_ASSERT_SUCCESS(status); | |
360 | result = uregex_matches(re, 0, &status); | |
361 | TEST_ASSERT(result == TRUE); | |
362 | TEST_ASSERT_SUCCESS(status); | |
363 | uregex_close(re); | |
364 | } | |
365 | ||
366 | ||
367 | /* | |
368 | * lookingAt() Used in setText test. | |
369 | */ | |
370 | ||
371 | ||
372 | /* | |
373 | * find(), findNext, start, end, reset | |
374 | */ | |
375 | { | |
376 | UChar text1[50]; | |
377 | UBool result; | |
378 | u_uastrncpy(text1, "012rx5rx890rxrx...", sizeof(text1)/2); | |
379 | status = U_ZERO_ERROR; | |
380 | re = uregex_openC("rx", 0, NULL, &status); | |
381 | ||
382 | uregex_setText(re, text1, -1, &status); | |
383 | result = uregex_find(re, 0, &status); | |
384 | TEST_ASSERT(result == TRUE); | |
385 | TEST_ASSERT(uregex_start(re, 0, &status) == 3); | |
386 | TEST_ASSERT(uregex_end(re, 0, &status) == 5); | |
387 | TEST_ASSERT_SUCCESS(status); | |
388 | ||
389 | result = uregex_find(re, 9, &status); | |
390 | TEST_ASSERT(result == TRUE); | |
391 | TEST_ASSERT(uregex_start(re, 0, &status) == 11); | |
392 | TEST_ASSERT(uregex_end(re, 0, &status) == 13); | |
393 | TEST_ASSERT_SUCCESS(status); | |
394 | ||
395 | result = uregex_find(re, 14, &status); | |
396 | TEST_ASSERT(result == FALSE); | |
397 | TEST_ASSERT_SUCCESS(status); | |
398 | ||
399 | status = U_ZERO_ERROR; | |
400 | uregex_reset(re, 0, &status); | |
401 | ||
402 | result = uregex_findNext(re, &status); | |
403 | TEST_ASSERT(result == TRUE); | |
404 | TEST_ASSERT(uregex_start(re, 0, &status) == 3); | |
405 | TEST_ASSERT(uregex_end(re, 0, &status) == 5); | |
406 | TEST_ASSERT_SUCCESS(status); | |
407 | ||
408 | result = uregex_findNext(re, &status); | |
409 | TEST_ASSERT(result == TRUE); | |
410 | TEST_ASSERT(uregex_start(re, 0, &status) == 6); | |
411 | TEST_ASSERT(uregex_end(re, 0, &status) == 8); | |
412 | TEST_ASSERT_SUCCESS(status); | |
413 | ||
414 | status = U_ZERO_ERROR; | |
415 | uregex_reset(re, 12, &status); | |
416 | ||
417 | result = uregex_findNext(re, &status); | |
418 | TEST_ASSERT(result == TRUE); | |
419 | TEST_ASSERT(uregex_start(re, 0, &status) == 13); | |
420 | TEST_ASSERT(uregex_end(re, 0, &status) == 15); | |
421 | TEST_ASSERT_SUCCESS(status); | |
422 | ||
423 | result = uregex_findNext(re, &status); | |
424 | TEST_ASSERT(result == FALSE); | |
425 | TEST_ASSERT_SUCCESS(status); | |
426 | ||
427 | uregex_close(re); | |
428 | } | |
429 | ||
430 | /* | |
431 | * groupCount | |
432 | */ | |
433 | { | |
434 | int32_t result; | |
435 | ||
436 | status = U_ZERO_ERROR; | |
437 | re = uregex_openC("abc", 0, NULL, &status); | |
438 | result = uregex_groupCount(re, &status); | |
439 | TEST_ASSERT_SUCCESS(status); | |
440 | TEST_ASSERT(result == 0); | |
441 | uregex_close(re); | |
442 | ||
443 | status = U_ZERO_ERROR; | |
444 | re = uregex_openC("abc(def)(ghi(j))", 0, NULL, &status); | |
445 | result = uregex_groupCount(re, &status); | |
446 | TEST_ASSERT_SUCCESS(status); | |
447 | TEST_ASSERT(result == 3); | |
448 | uregex_close(re); | |
449 | ||
450 | } | |
451 | ||
452 | ||
453 | /* | |
454 | * group() | |
455 | */ | |
456 | { | |
457 | UChar text1[80]; | |
458 | UChar buf[80]; | |
459 | UBool result; | |
460 | int32_t resultSz; | |
461 | u_uastrncpy(text1, "noise abc interior def, and this is off the end", sizeof(text1)/2); | |
462 | ||
463 | status = U_ZERO_ERROR; | |
464 | re = uregex_openC("abc(.*?)def", 0, NULL, &status); | |
465 | TEST_ASSERT_SUCCESS(status); | |
466 | ||
467 | ||
468 | uregex_setText(re, text1, -1, &status); | |
469 | result = uregex_find(re, 0, &status); | |
470 | TEST_ASSERT(result==TRUE); | |
471 | ||
472 | /* Capture Group 0, the full match. Should succeed. */ | |
473 | status = U_ZERO_ERROR; | |
474 | resultSz = uregex_group(re, 0, buf, sizeof(buf)/2, &status); | |
475 | TEST_ASSERT_SUCCESS(status); | |
476 | TEST_ASSERT_STRING("abc interior def", buf, TRUE); | |
477 | TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); | |
478 | ||
479 | /* Capture group #1. Should succeed. */ | |
480 | status = U_ZERO_ERROR; | |
481 | resultSz = uregex_group(re, 1, buf, sizeof(buf)/2, &status); | |
482 | TEST_ASSERT_SUCCESS(status); | |
483 | TEST_ASSERT_STRING(" interior ", buf, TRUE); | |
484 | TEST_ASSERT(resultSz == (int32_t)strlen(" interior ")); | |
485 | ||
486 | /* Capture group out of range. Error. */ | |
487 | status = U_ZERO_ERROR; | |
488 | uregex_group(re, 2, buf, sizeof(buf)/2, &status); | |
489 | TEST_ASSERT(status == U_INDEX_OUTOFBOUNDS_ERROR); | |
490 | ||
491 | /* NULL buffer, pure pre-flight */ | |
492 | status = U_ZERO_ERROR; | |
493 | resultSz = uregex_group(re, 0, NULL, 0, &status); | |
494 | TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
495 | TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); | |
496 | ||
497 | /* Too small buffer, truncated string */ | |
498 | status = U_ZERO_ERROR; | |
499 | memset(buf, -1, sizeof(buf)); | |
500 | resultSz = uregex_group(re, 0, buf, 5, &status); | |
501 | TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
502 | TEST_ASSERT_STRING("abc i", buf, FALSE); | |
503 | TEST_ASSERT(buf[5] == (UChar)0xffff); | |
504 | TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); | |
505 | ||
506 | /* Output string just fits buffer, no NUL term. */ | |
507 | status = U_ZERO_ERROR; | |
508 | resultSz = uregex_group(re, 0, buf, (int32_t)strlen("abc interior def"), &status); | |
509 | TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); | |
510 | TEST_ASSERT_STRING("abc interior def", buf, FALSE); | |
511 | TEST_ASSERT(resultSz == (int32_t)strlen("abc interior def")); | |
512 | TEST_ASSERT(buf[strlen("abc interior def")] == (UChar)0xffff); | |
513 | ||
514 | uregex_close(re); | |
515 | ||
516 | } | |
517 | ||
518 | /* | |
519 | * replaceFirst() | |
520 | */ | |
521 | { | |
522 | UChar text1[80]; | |
523 | UChar text2[80]; | |
524 | UChar replText[80]; | |
525 | UChar buf[80]; | |
526 | int32_t resultSz; | |
527 | u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); | |
528 | u_uastrncpy(text2, "No match here.", sizeof(text2)/2); | |
529 | u_uastrncpy(replText, "<$1>", sizeof(replText)/2); | |
530 | ||
531 | status = U_ZERO_ERROR; | |
532 | re = uregex_openC("x(.*?)x", 0, NULL, &status); | |
533 | TEST_ASSERT_SUCCESS(status); | |
534 | ||
535 | /* Normal case, with match */ | |
536 | uregex_setText(re, text1, -1, &status); | |
537 | resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status); | |
538 | TEST_ASSERT_SUCCESS(status); | |
539 | TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, TRUE); | |
540 | TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); | |
541 | ||
542 | /* No match. Text should copy to output with no changes. */ | |
543 | status = U_ZERO_ERROR; | |
544 | uregex_setText(re, text2, -1, &status); | |
545 | resultSz = uregex_replaceFirst(re, replText, -1, buf, sizeof(buf)/2, &status); | |
546 | TEST_ASSERT_SUCCESS(status); | |
547 | TEST_ASSERT_STRING("No match here.", buf, TRUE); | |
548 | TEST_ASSERT(resultSz == (int32_t)strlen("No match here.")); | |
549 | ||
550 | /* Match, output just fills buffer, no termination warning. */ | |
551 | status = U_ZERO_ERROR; | |
552 | uregex_setText(re, text1, -1, &status); | |
553 | memset(buf, -1, sizeof(buf)); | |
554 | resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status); | |
555 | TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); | |
556 | TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); | |
557 | TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); | |
558 | TEST_ASSERT(buf[resultSz] == (UChar)0xffff); | |
559 | ||
560 | /* Do the replaceFirst again, without first resetting anything. | |
561 | * Should give the same results. | |
562 | */ | |
563 | status = U_ZERO_ERROR; | |
564 | memset(buf, -1, sizeof(buf)); | |
565 | resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x."), &status); | |
566 | TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); | |
567 | TEST_ASSERT_STRING("Replace <aa> x1x x...x.", buf, FALSE); | |
568 | TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); | |
569 | TEST_ASSERT(buf[resultSz] == (UChar)0xffff); | |
570 | ||
571 | /* NULL buffer, zero buffer length */ | |
572 | status = U_ZERO_ERROR; | |
573 | resultSz = uregex_replaceFirst(re, replText, -1, NULL, 0, &status); | |
574 | TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
575 | TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); | |
576 | ||
577 | /* Buffer too small by one */ | |
578 | status = U_ZERO_ERROR; | |
579 | memset(buf, -1, sizeof(buf)); | |
580 | resultSz = uregex_replaceFirst(re, replText, -1, buf, strlen("Replace <aa> x1x x...x.")-1, &status); | |
581 | TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
582 | TEST_ASSERT_STRING("Replace <aa> x1x x...x", buf, FALSE); | |
583 | TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); | |
584 | TEST_ASSERT(buf[resultSz] == (UChar)0xffff); | |
585 | ||
586 | uregex_close(re); | |
587 | } | |
588 | ||
589 | ||
590 | /* | |
591 | * replaceAll() | |
592 | */ | |
593 | { | |
594 | UChar text1[80]; | |
595 | UChar text2[80]; | |
596 | UChar replText[80]; | |
597 | UChar buf[80]; | |
598 | int32_t resultSz; | |
599 | int32_t expectedResultSize; | |
600 | int32_t i; | |
601 | ||
602 | u_uastrncpy(text1, "Replace xaax x1x x...x.", sizeof(text1)/2); | |
603 | u_uastrncpy(text2, "No match here.", sizeof(text2)/2); | |
604 | u_uastrncpy(replText, "<$1>", sizeof(replText)/2); | |
605 | expectedResultSize = u_strlen(text1); | |
606 | ||
607 | status = U_ZERO_ERROR; | |
608 | re = uregex_openC("x(.*?)x", 0, NULL, &status); | |
609 | TEST_ASSERT_SUCCESS(status); | |
610 | ||
611 | /* Normal case, with match */ | |
612 | uregex_setText(re, text1, -1, &status); | |
613 | resultSz = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status); | |
614 | TEST_ASSERT_SUCCESS(status); | |
615 | TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, TRUE); | |
616 | TEST_ASSERT(resultSz == (int32_t)strlen("Replace xaax x1x x...x.")); | |
617 | ||
618 | /* No match. Text should copy to output with no changes. */ | |
619 | status = U_ZERO_ERROR; | |
620 | uregex_setText(re, text2, -1, &status); | |
621 | resultSz = uregex_replaceAll(re, replText, -1, buf, sizeof(buf)/2, &status); | |
622 | TEST_ASSERT_SUCCESS(status); | |
623 | TEST_ASSERT_STRING("No match here.", buf, TRUE); | |
624 | TEST_ASSERT(resultSz == (int32_t)strlen("No match here.")); | |
625 | ||
626 | /* Match, output just fills buffer, no termination warning. */ | |
627 | status = U_ZERO_ERROR; | |
628 | uregex_setText(re, text1, -1, &status); | |
629 | memset(buf, -1, sizeof(buf)); | |
630 | resultSz = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status); | |
631 | TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); | |
632 | TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE); | |
633 | TEST_ASSERT(resultSz == (int32_t)strlen("Replace <aa> <1> <...>.")); | |
634 | TEST_ASSERT(buf[resultSz] == (UChar)0xffff); | |
635 | ||
636 | /* Do the replaceFirst again, without first resetting anything. | |
637 | * Should give the same results. | |
638 | */ | |
639 | status = U_ZERO_ERROR; | |
640 | memset(buf, -1, sizeof(buf)); | |
641 | resultSz = uregex_replaceAll(re, replText, -1, buf, strlen("Replace xaax x1x x...x."), &status); | |
642 | TEST_ASSERT(status == U_STRING_NOT_TERMINATED_WARNING); | |
643 | TEST_ASSERT_STRING("Replace <aa> <1> <...>.", buf, FALSE); | |
644 | TEST_ASSERT(resultSz == (int32_t)strlen("Replace <aa> <1> <...>.")); | |
645 | TEST_ASSERT(buf[resultSz] == (UChar)0xffff); | |
646 | ||
647 | /* NULL buffer, zero buffer length */ | |
648 | status = U_ZERO_ERROR; | |
649 | resultSz = uregex_replaceAll(re, replText, -1, NULL, 0, &status); | |
650 | TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
651 | TEST_ASSERT(resultSz == (int32_t)strlen("Replace <aa> <1> <...>.")); | |
652 | ||
653 | /* Buffer too small. Try every size, which will tickle edge cases | |
654 | * in uregex_appendReplacement (used by replaceAll) */ | |
655 | for (i=0; i<expectedResultSize; i++) { | |
656 | char expected[80]; | |
657 | status = U_ZERO_ERROR; | |
658 | memset(buf, -1, sizeof(buf)); | |
659 | resultSz = uregex_replaceAll(re, replText, -1, buf, i, &status); | |
660 | TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
661 | strcpy(expected, "Replace <aa> <1> <...>."); | |
662 | expected[i] = 0; | |
663 | TEST_ASSERT_STRING(expected, buf, FALSE); | |
664 | TEST_ASSERT(resultSz == expectedResultSize); | |
665 | TEST_ASSERT(buf[i] == (UChar)0xffff); | |
666 | } | |
667 | ||
668 | uregex_close(re); | |
669 | } | |
670 | ||
671 | ||
672 | /* | |
673 | * appendReplacement() | |
674 | */ | |
675 | { | |
676 | UChar text[100]; | |
677 | UChar repl[100]; | |
678 | UChar buf[100]; | |
679 | UChar *bufPtr; | |
680 | int32_t bufCap; | |
681 | ||
682 | ||
683 | status = U_ZERO_ERROR; | |
684 | re = uregex_openC(".*", 0, 0, &status); | |
685 | TEST_ASSERT_SUCCESS(status); | |
686 | ||
687 | u_uastrncpy(text, "whatever", sizeof(text)/2); | |
688 | u_uastrncpy(repl, "some other", sizeof(repl)/2); | |
689 | uregex_setText(re, text, -1, &status); | |
690 | ||
691 | /* match covers whole target string */ | |
692 | uregex_find(re, 0, &status); | |
693 | TEST_ASSERT_SUCCESS(status); | |
694 | bufPtr = buf; | |
695 | bufCap = sizeof(buf) / 2; | |
696 | uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); | |
697 | TEST_ASSERT_SUCCESS(status); | |
698 | TEST_ASSERT_STRING("some other", buf, TRUE); | |
699 | ||
700 | /* Match has \u \U escapes */ | |
701 | uregex_find(re, 0, &status); | |
702 | TEST_ASSERT_SUCCESS(status); | |
703 | bufPtr = buf; | |
704 | bufCap = sizeof(buf) / 2; | |
705 | u_uastrncpy(repl, "abc\\u0041\\U00000042 \\\\ $ \\abc", sizeof(repl)/2); | |
706 | uregex_appendReplacement(re, repl, -1, &bufPtr, &bufCap, &status); | |
707 | TEST_ASSERT_SUCCESS(status); | |
708 | TEST_ASSERT_STRING("abcAB \\ $ abc", buf, TRUE); | |
709 | ||
710 | uregex_close(re); | |
711 | } | |
712 | ||
713 | ||
714 | /* | |
715 | * appendTail(). Checked in ReplaceFirst(), replaceAll(). | |
716 | */ | |
717 | ||
718 | /* | |
719 | * split() | |
720 | */ | |
721 | { | |
722 | UChar textToSplit[80]; | |
723 | UChar text2[80]; | |
724 | UChar buf[200]; | |
725 | UChar *fields[10]; | |
726 | int32_t numFields; | |
727 | int32_t requiredCapacity; | |
728 | int32_t spaceNeeded; | |
729 | int32_t sz; | |
730 | ||
731 | u_uastrncpy(textToSplit, "first : second: third", sizeof(textToSplit)/2); | |
732 | u_uastrncpy(text2, "No match here.", sizeof(text2)/2); | |
733 | ||
734 | status = U_ZERO_ERROR; | |
735 | re = uregex_openC(":", 0, NULL, &status); | |
736 | ||
737 | ||
738 | /* Simple split */ | |
739 | ||
740 | uregex_setText(re, textToSplit, -1, &status); | |
741 | TEST_ASSERT_SUCCESS(status); | |
742 | ||
743 | memset(fields, -1, sizeof(fields)); | |
744 | numFields = | |
745 | uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status); | |
746 | TEST_ASSERT_SUCCESS(status); | |
747 | TEST_ASSERT(numFields == 3); | |
748 | TEST_ASSERT_STRING("first ", fields[0], TRUE); | |
749 | TEST_ASSERT_STRING(" second", fields[1], TRUE); | |
750 | TEST_ASSERT_STRING(" third", fields[2], TRUE); | |
751 | TEST_ASSERT(fields[3] == NULL); | |
752 | ||
753 | spaceNeeded = u_strlen(textToSplit) - | |
754 | (numFields - 1) + /* Field delimiters do not appear in output */ | |
755 | numFields; /* Each field gets a NUL terminator */ | |
756 | ||
757 | TEST_ASSERT(spaceNeeded == requiredCapacity); | |
758 | uregex_close(re); | |
759 | ||
760 | ||
761 | /* Split with too few output strings available */ | |
762 | status = U_ZERO_ERROR; | |
763 | re = uregex_openC(":", 0, NULL, &status); | |
764 | uregex_setText(re, textToSplit, -1, &status); | |
765 | TEST_ASSERT_SUCCESS(status); | |
766 | ||
767 | memset(fields, -1, sizeof(fields)); | |
768 | numFields = | |
769 | uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status); | |
770 | TEST_ASSERT_SUCCESS(status); | |
771 | TEST_ASSERT(numFields == 2); | |
772 | TEST_ASSERT_STRING("first ", fields[0], TRUE); | |
773 | TEST_ASSERT_STRING(" second: third", fields[1], TRUE); | |
774 | TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); | |
775 | ||
776 | spaceNeeded = u_strlen(textToSplit) - | |
777 | (numFields - 1) + /* Field delimiters do not appear in output */ | |
778 | numFields; /* Each field gets a NUL terminator */ | |
779 | ||
780 | TEST_ASSERT(spaceNeeded == requiredCapacity); | |
781 | ||
782 | /* Split with a range of output buffer sizes. */ | |
783 | spaceNeeded = u_strlen(textToSplit) - | |
784 | (numFields - 1) + /* Field delimiters do not appear in output */ | |
785 | numFields; /* Each field gets a NUL terminator */ | |
786 | ||
787 | for (sz=0; sz < spaceNeeded+1; sz++) { | |
788 | memset(fields, -1, sizeof(fields)); | |
789 | status = U_ZERO_ERROR; | |
790 | numFields = | |
791 | uregex_split(re, buf, sz, &requiredCapacity, fields, 10, &status); | |
792 | if (sz >= spaceNeeded) { | |
793 | TEST_ASSERT_SUCCESS(status); | |
794 | TEST_ASSERT_STRING("first ", fields[0], TRUE); | |
795 | TEST_ASSERT_STRING(" second", fields[1], TRUE); | |
796 | TEST_ASSERT_STRING(" third", fields[2], TRUE); | |
797 | } else { | |
798 | TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR); | |
799 | } | |
800 | TEST_ASSERT(numFields == 3); | |
801 | TEST_ASSERT(fields[3] == NULL); | |
802 | TEST_ASSERT(spaceNeeded == requiredCapacity); | |
803 | } | |
804 | uregex_close(re); | |
805 | } | |
806 | ||
807 | ||
808 | ||
809 | ||
810 | /* Split(), part 2. Patterns with capture groups. The capture group text | |
811 | * comes out as additional fields. */ | |
812 | { | |
813 | UChar textToSplit[80]; | |
814 | UChar buf[200]; | |
815 | UChar *fields[10]; | |
816 | int32_t numFields; | |
817 | int32_t requiredCapacity; | |
818 | int32_t spaceNeeded; | |
819 | int32_t sz; | |
820 | ||
821 | u_uastrncpy(textToSplit, "first <tag-a> second<tag-b> third", sizeof(textToSplit)/2); | |
822 | ||
823 | status = U_ZERO_ERROR; | |
824 | re = uregex_openC("<(.*?)>", 0, NULL, &status); | |
825 | ||
826 | uregex_setText(re, textToSplit, -1, &status); | |
827 | TEST_ASSERT_SUCCESS(status); | |
828 | ||
829 | memset(fields, -1, sizeof(fields)); | |
830 | numFields = | |
831 | uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 10, &status); | |
832 | TEST_ASSERT_SUCCESS(status); | |
833 | TEST_ASSERT(numFields == 5); | |
834 | TEST_ASSERT_STRING("first ", fields[0], TRUE); | |
835 | TEST_ASSERT_STRING("tag-a", fields[1], TRUE); | |
836 | TEST_ASSERT_STRING(" second", fields[2], TRUE); | |
837 | TEST_ASSERT_STRING("tag-b", fields[3], TRUE); | |
838 | TEST_ASSERT_STRING(" third", fields[4], TRUE); | |
839 | TEST_ASSERT(fields[5] == NULL); | |
840 | spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */ | |
841 | TEST_ASSERT(spaceNeeded == requiredCapacity); | |
842 | ||
843 | ||
844 | /* Split with too few output strings available (2) */ | |
845 | status = U_ZERO_ERROR; | |
846 | memset(fields, -1, sizeof(fields)); | |
847 | numFields = | |
848 | uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 2, &status); | |
849 | TEST_ASSERT_SUCCESS(status); | |
850 | TEST_ASSERT(numFields == 2); | |
851 | TEST_ASSERT_STRING("first ", fields[0], TRUE); | |
852 | TEST_ASSERT_STRING(" second<tag-b> third", fields[1], TRUE); | |
853 | TEST_ASSERT(!memcmp(&fields[2],&minus1,sizeof(UChar*))); | |
854 | ||
855 | spaceNeeded = strlen("first . second<tag-b> third."); /* "." at NUL positions */ | |
856 | TEST_ASSERT(spaceNeeded == requiredCapacity); | |
857 | ||
858 | /* Split with too few output strings available (3) */ | |
859 | status = U_ZERO_ERROR; | |
860 | memset(fields, -1, sizeof(fields)); | |
861 | numFields = | |
862 | uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 3, &status); | |
863 | TEST_ASSERT_SUCCESS(status); | |
864 | TEST_ASSERT(numFields == 3); | |
865 | TEST_ASSERT_STRING("first ", fields[0], TRUE); | |
866 | TEST_ASSERT_STRING("tag-a", fields[1], TRUE); | |
867 | TEST_ASSERT_STRING(" second<tag-b> third", fields[2], TRUE); | |
868 | TEST_ASSERT(!memcmp(&fields[3],&minus1,sizeof(UChar*))); | |
869 | ||
870 | spaceNeeded = strlen("first .tag-a. second<tag-b> third."); /* "." at NUL positions */ | |
871 | TEST_ASSERT(spaceNeeded == requiredCapacity); | |
872 | ||
873 | /* Split with just enough output strings available (5) */ | |
874 | status = U_ZERO_ERROR; | |
875 | memset(fields, -1, sizeof(fields)); | |
876 | numFields = | |
877 | uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 5, &status); | |
878 | TEST_ASSERT_SUCCESS(status); | |
879 | TEST_ASSERT(numFields == 5); | |
880 | TEST_ASSERT_STRING("first ", fields[0], TRUE); | |
881 | TEST_ASSERT_STRING("tag-a", fields[1], TRUE); | |
882 | TEST_ASSERT_STRING(" second", fields[2], TRUE); | |
883 | TEST_ASSERT_STRING("tag-b", fields[3], TRUE); | |
884 | TEST_ASSERT_STRING(" third", fields[4], TRUE); | |
885 | TEST_ASSERT(!memcmp(&fields[5],&minus1,sizeof(UChar*))); | |
886 | ||
887 | spaceNeeded = strlen("first .tag-a. second.tag-b. third."); /* "." at NUL positions */ | |
888 | TEST_ASSERT(spaceNeeded == requiredCapacity); | |
889 | ||
890 | ||
891 | /* Split, end of text is a field delimiter. */ | |
892 | status = U_ZERO_ERROR; | |
893 | sz = strlen("first <tag-a> second<tag-b>"); | |
894 | uregex_setText(re, textToSplit, sz, &status); | |
895 | TEST_ASSERT_SUCCESS(status); | |
896 | memset(fields, -1, sizeof(fields)); | |
897 | numFields = | |
898 | uregex_split(re, buf, sizeof(buf)/2, &requiredCapacity, fields, 9, &status); | |
899 | TEST_ASSERT_SUCCESS(status); | |
900 | TEST_ASSERT(numFields == 4); | |
901 | TEST_ASSERT_STRING("first ", fields[0], TRUE); | |
902 | TEST_ASSERT_STRING("tag-a", fields[1], TRUE); | |
903 | TEST_ASSERT_STRING(" second", fields[2], TRUE); | |
904 | TEST_ASSERT_STRING("tag-b", fields[3], TRUE); | |
905 | TEST_ASSERT(fields[4] == NULL); | |
906 | TEST_ASSERT(fields[8] == NULL); | |
907 | TEST_ASSERT(!memcmp(&fields[9],&minus1,sizeof(UChar*))); | |
908 | spaceNeeded = strlen("first .tag-a. second.tag-b."); /* "." at NUL positions */ | |
909 | TEST_ASSERT(spaceNeeded == requiredCapacity); | |
910 | ||
911 | uregex_close(re); | |
912 | } | |
913 | ||
914 | } | |
915 | ||
916 | #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ |