2 **********************************************************************
3 * Copyright (C) 1999-2003 Alan Liu ,International Business Machines Corporation and
4 * others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 10/20/99 alan Creation.
8 * 03/22/2000 Madhu Added additional tests
9 **********************************************************************
12 #include "unicode/utypes.h"
14 #include "unicode/uniset.h"
15 #include "unicode/uchar.h"
16 #include "unicode/usetiter.h"
17 #include "unicode/ustring.h"
19 UnicodeString
operator+(const UnicodeString
& left
, const UnicodeSet
& set
) {
22 return left
+ UnicodeSetTest::escape(pat
);
25 #define CASE(id,test) case id: \
29 logln((UnicodeString)""); \
35 UnicodeSetTest::runIndexedTest(int32_t index
, UBool exec
,
36 const char* &name
, char* /*par*/) {
37 // if (exec) logln((UnicodeString)"TestSuite UnicodeSetTest");
40 CASE(1,TestAddRemove
);
41 CASE(2,TestCategories
);
42 CASE(3,TestCloneEqualHash
);
43 CASE(4,TestMinimalRep
);
45 CASE(6,TestScriptSet
);
46 CASE(7,TestPropertySet
);
48 CASE(9,TestExhaustive
);
49 CASE(10,TestToPattern
);
52 CASE(13,TestStringPatterns
);
54 CASE(15,TestCloseOver
);
55 CASE(16,TestEscapePattern
);
56 CASE(17,TestInvalidCodePoint
);
57 default: name
= ""; break;
62 * UVector was improperly copying contents
63 * This code will crash this is still true
65 void UnicodeSetTest::Testj2268() {
67 t
.add(UnicodeString("abc"));
69 UnicodeString ustrPat
;
70 test
.toPattern(ustrPat
, TRUE
);
74 * Test that toPattern() round trips with syntax characters and
77 void UnicodeSetTest::TestToPattern() {
78 static const char* OTHER_TOPATTERN_TESTS
[] = {
79 "[[:latin:]&[:greek:]]",
80 "[[:latin:]-[:greek:]]",
81 "[:nonspacing mark:]",
85 for (int32_t j
=0; OTHER_TOPATTERN_TESTS
[j
]!=NULL
; ++j
) {
86 UErrorCode ec
= U_ZERO_ERROR
;
87 UnicodeSet
s(OTHER_TOPATTERN_TESTS
[j
], ec
);
89 errln((UnicodeString
)"FAIL: bad pattern " + OTHER_TOPATTERN_TESTS
[j
]);
92 checkPat(OTHER_TOPATTERN_TESTS
[j
], s
);
95 for (UChar32 i
= 0; i
<= 0x10FFFF; ++i
) {
96 if ((i
<= 0xFF && !u_isalpha(i
)) || u_isspace(i
)) {
98 // check various combinations to make sure they all work.
99 if (i
!= 0 && !toPatternAux(i
, i
)){
102 if (!toPatternAux(0, i
)){
105 if (!toPatternAux(i
, 0xFFFF)){
112 UBool
UnicodeSetTest::toPatternAux(UChar32 start
, UChar32 end
) {
114 // use Integer.toString because Utility.hex doesn't handle ints
115 UnicodeString pat
= "";
116 // TODO do these in hex
117 //String source = "0x" + Integer.toString(start,16).toUpperCase();
118 //if (start != end) source += "..0x" + Integer.toString(end,16).toUpperCase();
119 UnicodeString source
;
120 source
= source
+ (uint32_t)start
;
122 source
= source
+ ".." + (uint32_t)end
;
124 testSet
.add(start
, end
);
125 return checkPat(source
, testSet
);
128 UBool
UnicodeSetTest::checkPat(const UnicodeString
& source
,
129 const UnicodeSet
& testSet
) {
130 // What we want to make sure of is that a pattern generated
131 // by toPattern(), with or without escaped unprintables, can
132 // be passed back into the UnicodeSet constructor.
135 testSet
.toPattern(pat0
, TRUE
);
137 if (!checkPat(source
+ " (escaped)", testSet
, pat0
)) return FALSE
;
139 //String pat1 = unescapeLeniently(pat0);
140 //if (!checkPat(source + " (in code)", testSet, pat1)) return false;
143 testSet
.toPattern(pat2
, FALSE
);
144 if (!checkPat(source
, testSet
, pat2
)) return FALSE
;
146 //String pat3 = unescapeLeniently(pat2);
147 // if (!checkPat(source + " (in code)", testSet, pat3)) return false;
149 //logln(source + " => " + pat0 + ", " + pat1 + ", " + pat2 + ", " + pat3);
150 logln((UnicodeString
)source
+ " => " + pat0
+ ", " + pat2
);
154 UBool
UnicodeSetTest::checkPat(const UnicodeString
& source
,
155 const UnicodeSet
& testSet
,
156 const UnicodeString
& pat
) {
157 UErrorCode ec
= U_ZERO_ERROR
;
158 UnicodeSet
testSet2(pat
, ec
);
159 if (testSet2
!= testSet
) {
160 errln((UnicodeString
)"Fail toPattern: " + source
+ " => " + pat
);
167 UnicodeSetTest::TestPatterns(void) {
169 expectPattern(set
, UnicodeString("[[a-m]&[d-z]&[k-y]]", ""), "km");
170 expectPattern(set
, UnicodeString("[[a-z]-[m-y]-[d-r]]", ""), "aczz");
171 expectPattern(set
, UnicodeString("[a\\-z]", ""), "--aazz");
172 expectPattern(set
, UnicodeString("[-az]", ""), "--aazz");
173 expectPattern(set
, UnicodeString("[az-]", ""), "--aazz");
174 expectPattern(set
, UnicodeString("[[[a-z]-[aeiou]i]]", ""), "bdfnptvz");
176 // Throw in a test of complement
179 exp
.append((UChar
)0x0000).append("aeeoouu").append((UChar
)(0x007a+1)).append((UChar
)0xFFFF);
180 expectPairs(set
, exp
);
184 UnicodeSetTest::TestCategories(void) {
185 UErrorCode status
= U_ZERO_ERROR
;
186 const char* pat
= " [:Lu:] "; // Whitespace ok outside [:..:]
187 UnicodeSet
set(pat
, status
);
188 if (U_FAILURE(status
)) {
189 errln((UnicodeString
)"Fail: Can't construct set with " + pat
);
191 expectContainment(set
, pat
, "ABC", "abc");
195 int32_t failures
= 0;
196 // Make sure generation of L doesn't pollute cached Lu set
197 // First generate L, then Lu
198 set
.applyPattern("[:L:]", status
);
199 if (U_FAILURE(status
)) { errln("FAIL"); return; }
200 for (i
=0; i
<0x200; ++i
) {
201 UBool l
= u_isalpha((UChar
)i
);
202 if (l
!= set
.contains(i
)) {
203 errln((UnicodeString
)"FAIL: L contains " + (unsigned short)i
+ " = " +
205 if (++failures
== 10) break;
209 set
.applyPattern("[:Lu:]", status
);
210 if (U_FAILURE(status
)) { errln("FAIL"); return; }
211 for (i
=0; i
<0x200; ++i
) {
212 UBool lu
= (u_charType((UChar
)i
) == U_UPPERCASE_LETTER
);
213 if (lu
!= set
.contains(i
)) {
214 errln((UnicodeString
)"FAIL: Lu contains " + (unsigned short)i
+ " = " +
216 if (++failures
== 20) break;
221 UnicodeSetTest::TestCloneEqualHash(void) {
222 UErrorCode status
= U_ZERO_ERROR
;
223 // set1 and set2 used to be built with the obsolete constructor taking
224 // UCharCategory values; replaced with pattern constructors
226 UnicodeSet
*set1
=new UnicodeSet("\\p{Lowercase Letter}", status
); // :Ll: Letter, lowercase
227 UnicodeSet
*set1a
=new UnicodeSet("[:Ll:]", status
); // Letter, lowercase
228 if (U_FAILURE(status
)){
229 errln((UnicodeString
)"FAIL: Can't construst set with category->Ll");
232 UnicodeSet
*set2
=new UnicodeSet("\\p{Decimal Number}", status
); //Number, Decimal digit
233 UnicodeSet
*set2a
=new UnicodeSet("[:Nd:]", status
); //Number, Decimal digit
234 if (U_FAILURE(status
)){
235 errln((UnicodeString
)"FAIL: Can't construct set with category->Nd");
239 if (*set1
!= *set1a
) {
240 errln("FAIL: category constructor for Ll broken");
242 if (*set2
!= *set2a
) {
243 errln("FAIL: category constructor for Nd broken");
248 logln("Testing copy construction");
249 UnicodeSet
*set1copy
=new UnicodeSet(*set1
);
250 if(*set1
!= *set1copy
|| *set1
== *set2
||
251 getPairs(*set1
) != getPairs(*set1copy
) ||
252 set1
->hashCode() != set1copy
->hashCode()){
253 errln("FAIL : Error in copy construction");
257 logln("Testing =operator");
258 UnicodeSet set1equal
=*set1
;
259 UnicodeSet set2equal
=*set2
;
260 if(set1equal
!= *set1
|| set1equal
!= *set1copy
|| set2equal
!= *set2
||
261 set2equal
== *set1
|| set2equal
== *set1copy
|| set2equal
== set1equal
){
262 errln("FAIL: Error in =operator");
265 logln("Testing clone()");
266 UnicodeSet
*set1clone
=(UnicodeSet
*)set1
->clone();
267 UnicodeSet
*set2clone
=(UnicodeSet
*)set2
->clone();
268 if(*set1clone
!= *set1
|| *set1clone
!= *set1copy
|| *set1clone
!= set1equal
||
269 *set2clone
!= *set2
|| *set2clone
== *set1copy
|| *set2clone
!= set2equal
||
270 *set2clone
== *set1
|| *set2clone
== set1equal
|| *set2clone
== *set1clone
){
271 errln("FAIL: Error in clone");
274 logln("Testing hashcode");
275 if(set1
->hashCode() != set1equal
.hashCode() || set1
->hashCode() != set1clone
->hashCode() ||
276 set2
->hashCode() != set2equal
.hashCode() || set2
->hashCode() != set2clone
->hashCode() ||
277 set1copy
->hashCode() != set1equal
.hashCode() || set1copy
->hashCode() != set1clone
->hashCode() ||
278 set1
->hashCode() == set2
->hashCode() || set1copy
->hashCode() == set2
->hashCode() ||
279 set2
->hashCode() == set1clone
->hashCode() || set2
->hashCode() == set1equal
.hashCode() ){
280 errln("FAIL: Error in hashCode()");
292 UnicodeSetTest::TestAddRemove(void) {
293 UnicodeSet set
; // Construct empty set
294 doAssert(set
.isEmpty() == TRUE
, "set should be empty");
295 doAssert(set
.size() == 0, "size should be 0");
296 set
.add(0x0061, 0x007a);
297 expectPairs(set
, "az");
298 doAssert(set
.isEmpty() == FALSE
, "set should not be empty");
299 doAssert(set
.size() != 0, "size should not be equal to 0");
300 doAssert(set
.size() == 26, "size should be equal to 26");
301 set
.remove(0x006d, 0x0070);
302 expectPairs(set
, "alqz");
303 doAssert(set
.size() == 22, "size should be equal to 22");
304 set
.remove(0x0065, 0x0067);
305 expectPairs(set
, "adhlqz");
306 doAssert(set
.size() == 19, "size should be equal to 19");
307 set
.remove(0x0064, 0x0069);
308 expectPairs(set
, "acjlqz");
309 doAssert(set
.size() == 16, "size should be equal to 16");
310 set
.remove(0x0063, 0x0072);
311 expectPairs(set
, "absz");
312 doAssert(set
.size() == 10, "size should be equal to 10");
313 set
.add(0x0066, 0x0071);
314 expectPairs(set
, "abfqsz");
315 doAssert(set
.size() == 22, "size should be equal to 22");
316 set
.remove(0x0061, 0x0067);
317 expectPairs(set
, "hqsz");
318 set
.remove(0x0061, 0x007a);
319 expectPairs(set
, "");
320 doAssert(set
.isEmpty() == TRUE
, "set should be empty");
321 doAssert(set
.size() == 0, "size should be 0");
323 doAssert(set
.isEmpty() == FALSE
, "set should not be empty");
324 doAssert(set
.size() == 1, "size should not be equal to 1");
327 expectPairs(set
, "ac");
328 doAssert(set
.size() == 3, "size should not be equal to 3");
331 expectPairs(set
, "acpq");
332 doAssert(set
.size() == 5, "size should not be equal to 5");
334 expectPairs(set
, "");
335 doAssert(set
.isEmpty() == TRUE
, "set should be empty");
336 doAssert(set
.size() == 0, "size should be 0");
338 // Try removing an entire set from another set
339 expectPattern(set
, "[c-x]", "cx");
341 expectPattern(set2
, "[f-ky-za-bc[vw]]", "acfkvwyz");
343 expectPairs(set
, "deluxx");
345 // Try adding an entire set to another set
346 expectPattern(set
, "[jackiemclean]", "aacceein");
347 expectPattern(set2
, "[hitoshinamekatajamesanderson]", "aadehkmort");
349 expectPairs(set
, "aacehort");
350 doAssert(set
.containsAll(set2
) == TRUE
, "set should contain all the elements in set2");
352 // Try retaining an set of elements contained in another set (intersection)
354 expectPattern(set3
, "[a-c]", "ac");
355 doAssert(set
.containsAll(set3
) == FALSE
, "set doesn't contain all the elements in set3");
357 expectPairs(set3
, "aacc");
358 doAssert(set
.containsAll(set3
) == TRUE
, "set should contain all the elements in set3");
360 expectPairs(set
, "aacc");
361 doAssert(set
.size() == set3
.size(), "set.size() should be set3.size()");
362 doAssert(set
.containsAll(set3
) == TRUE
, "set should contain all the elements in set3");
364 doAssert(set
.size() != set3
.size(), "set.size() != set3.size()");
366 // Test commutativity
367 expectPattern(set
, "[hitoshinamekatajamesanderson]", "aadehkmort");
368 expectPattern(set2
, "[jackiemclean]", "aacceein");
370 expectPairs(set
, "aacehort");
371 doAssert(set
.containsAll(set2
) == TRUE
, "set should contain all the elements in set2");
379 * Make sure minimal representation is maintained.
381 void UnicodeSetTest::TestMinimalRep() {
382 UErrorCode status
= U_ZERO_ERROR
;
383 // This is pretty thoroughly tested by checkCanonicalRep()
384 // run against the exhaustive operation results. Use the code
385 // here for debugging specific spot problems.
387 // 1 overlap against 2
388 UnicodeSet
set("[h-km-q]", status
);
389 if (U_FAILURE(status
)) { errln("FAIL"); return; }
390 UnicodeSet
set2("[i-o]", status
);
391 if (U_FAILURE(status
)) { errln("FAIL"); return; }
393 expectPairs(set
, "hq");
395 set
.applyPattern("[a-m]", status
);
396 if (U_FAILURE(status
)) { errln("FAIL"); return; }
397 set2
.applyPattern("[e-o]", status
);
398 if (U_FAILURE(status
)) { errln("FAIL"); return; }
400 expectPairs(set
, "ao");
402 set
.applyPattern("[e-o]", status
);
403 if (U_FAILURE(status
)) { errln("FAIL"); return; }
404 set2
.applyPattern("[a-m]", status
);
405 if (U_FAILURE(status
)) { errln("FAIL"); return; }
407 expectPairs(set
, "ao");
408 // 1 overlap against 3
409 set
.applyPattern("[a-eg-mo-w]", status
);
410 if (U_FAILURE(status
)) { errln("FAIL"); return; }
411 set2
.applyPattern("[d-q]", status
);
412 if (U_FAILURE(status
)) { errln("FAIL"); return; }
414 expectPairs(set
, "aw");
417 void UnicodeSetTest::TestAPI() {
418 UErrorCode status
= U_ZERO_ERROR
;
421 if (!set
.isEmpty() || set
.getRangeCount() != 0) {
422 errln((UnicodeString
)"FAIL, set should be empty but isn't: " +
426 // clear(), isEmpty()
429 errln((UnicodeString
)"FAIL, set shouldn't be empty but is: " +
433 if (!set
.isEmpty()) {
434 errln((UnicodeString
)"FAIL, set should be empty but isn't: " +
440 if (set
.size() != 0) {
441 errln((UnicodeString
)"FAIL, size should be 0, but is " + set
.size() +
445 if (set
.size() != 1) {
446 errln((UnicodeString
)"FAIL, size should be 1, but is " + set
.size() +
449 set
.add(0x0031, 0x0039);
450 if (set
.size() != 10) {
451 errln((UnicodeString
)"FAIL, size should be 10, but is " + set
.size() +
455 // contains(first, last)
457 set
.applyPattern("[A-Y 1-8 b-d l-y]", status
);
458 if (U_FAILURE(status
)) { errln("FAIL"); return; }
459 for (int32_t i
= 0; i
<set
.getRangeCount(); ++i
) {
460 UChar32 a
= set
.getRangeStart(i
);
461 UChar32 b
= set
.getRangeEnd(i
);
462 if (!set
.contains(a
, b
)) {
463 errln((UnicodeString
)"FAIL, should contain " + (unsigned short)a
+ '-' + (unsigned short)b
+
464 " but doesn't: " + set
);
466 if (set
.contains((UChar32
)(a
-1), b
)) {
467 errln((UnicodeString
)"FAIL, shouldn't contain " +
468 (unsigned short)(a
-1) + '-' + (unsigned short)b
+
469 " but does: " + set
);
471 if (set
.contains(a
, (UChar32
)(b
+1))) {
472 errln((UnicodeString
)"FAIL, shouldn't contain " +
473 (unsigned short)a
+ '-' + (unsigned short)(b
+1) +
474 " but does: " + set
);
478 // Ported InversionList test.
479 UnicodeSet
a((UChar32
)3,(UChar32
)10);
480 UnicodeSet
b((UChar32
)7,(UChar32
)15);
483 logln((UnicodeString
)"a [3-10]: " + a
);
484 logln((UnicodeString
)"b [7-15]: " + b
);
486 UnicodeSet
exp((UChar32
)3,(UChar32
)15);
488 logln((UnicodeString
)"c.set(a).add(b): " + c
);
490 errln((UnicodeString
)"FAIL: c.set(a).add(b) = " + c
+ ", expect " + exp
);
493 exp
.set((UChar32
)0, (UChar32
)2);
494 exp
.add((UChar32
)16, UnicodeSet::MAX_VALUE
);
496 logln((UnicodeString
)"c.complement(): " + c
);
498 errln((UnicodeString
)"FAIL: c.complement() = " + c
+ ", expect " + exp
);
501 exp
.set((UChar32
)3, (UChar32
)15);
503 logln((UnicodeString
)"c.complement(): " + c
);
505 errln((UnicodeString
)"FAIL: c.complement() = " + c
+ ", expect " + exp
);
507 c
= a
; c
.complementAll(b
);
508 exp
.set((UChar32
)3,(UChar32
)6);
509 exp
.add((UChar32
)11,(UChar32
) 15);
511 logln((UnicodeString
)"c.set(a).exclusiveOr(b): " + c
);
513 errln((UnicodeString
)"FAIL: c.set(a).exclusiveOr(b) = " + c
+ ", expect " + exp
);
517 bitsToSet(setToBits(c
), c
);
519 logln((UnicodeString
)"bitsToSet(setToBits(c)): " + c
);
521 errln((UnicodeString
)"FAIL: bitsToSet(setToBits(c)) = " + c
+ ", expect " + exp
);
524 // Additional tests for coverage JB#2118
525 //UnicodeSet::complement(class UnicodeString const &)
526 //UnicodeSet::complementAll(class UnicodeString const &)
527 //UnicodeSet::containsNone(class UnicodeSet const &)
528 //UnicodeSet::containsNone(long,long)
529 //UnicodeSet::containsSome(class UnicodeSet const &)
530 //UnicodeSet::containsSome(long,long)
531 //UnicodeSet::removeAll(class UnicodeString const &)
532 //UnicodeSet::retain(long)
533 //UnicodeSet::retainAll(class UnicodeString const &)
534 //UnicodeSet::serialize(unsigned short *,long,enum UErrorCode &)
535 //UnicodeSetIterator::getString(void)
537 set
.complement("ab");
538 exp
.applyPattern("[{ab}]", status
);
539 if (U_FAILURE(status
)) { errln("FAIL"); return; }
540 if (set
!= exp
) { errln("FAIL: complement(\"ab\")"); return; }
542 UnicodeSetIterator
iset(set
);
543 if (!iset
.next() || !iset
.isString()) {
544 errln("FAIL: UnicodeSetIterator::next/isString");
545 } else if (iset
.getString() != "ab") {
546 errln("FAIL: UnicodeSetIterator::getString");
549 set
.add((UChar32
)0x61, (UChar32
)0x7A);
550 set
.complementAll("alan");
551 exp
.applyPattern("[{ab}b-kmo-z]", status
);
552 if (U_FAILURE(status
)) { errln("FAIL"); return; }
553 if (set
!= exp
) { errln("FAIL: complementAll(\"alan\")"); return; }
555 exp
.applyPattern("[a-z]", status
);
556 if (U_FAILURE(status
)) { errln("FAIL"); return; }
557 if (set
.containsNone(exp
)) { errln("FAIL: containsNone(UnicodeSet)"); }
558 if (!set
.containsSome(exp
)) { errln("FAIL: containsSome(UnicodeSet)"); }
559 exp
.applyPattern("[aln]", status
);
560 if (U_FAILURE(status
)) { errln("FAIL"); return; }
561 if (!set
.containsNone(exp
)) { errln("FAIL: containsNone(UnicodeSet)"); }
562 if (set
.containsSome(exp
)) { errln("FAIL: containsSome(UnicodeSet)"); }
564 if (set
.containsNone((UChar32
)0x61, (UChar32
)0x7A)) {
565 errln("FAIL: containsNone(UChar32, UChar32)");
567 if (!set
.containsSome((UChar32
)0x61, (UChar32
)0x7A)) {
568 errln("FAIL: containsSome(UChar32, UChar32)");
570 if (!set
.containsNone((UChar32
)0x41, (UChar32
)0x5A)) {
571 errln("FAIL: containsNone(UChar32, UChar32)");
573 if (set
.containsSome((UChar32
)0x41, (UChar32
)0x5A)) {
574 errln("FAIL: containsSome(UChar32, UChar32)");
577 set
.removeAll("liu");
578 exp
.applyPattern("[{ab}b-hj-kmo-tv-z]", status
);
579 if (U_FAILURE(status
)) { errln("FAIL"); return; }
580 if (set
!= exp
) { errln("FAIL: removeAll(\"liu\")"); return; }
582 set
.retainAll("star");
583 exp
.applyPattern("[rst]", status
);
584 if (U_FAILURE(status
)) { errln("FAIL"); return; }
585 if (set
!= exp
) { errln("FAIL: retainAll(\"star\")"); return; }
587 set
.retain((UChar32
)0x73);
588 exp
.applyPattern("[s]", status
);
589 if (U_FAILURE(status
)) { errln("FAIL"); return; }
590 if (set
!= exp
) { errln("FAIL: retain('s')"); return; }
593 int32_t slen
= set
.serialize(buf
, sizeof(buf
)/sizeof(buf
[0]), status
);
594 if (U_FAILURE(status
)) { errln("FAIL: serialize"); return; }
595 if (slen
!= 3 || buf
[0] != 2 || buf
[1] != 0x73 || buf
[2] != 0x74) {
596 errln("FAIL: serialize");
601 void UnicodeSetTest::TestStrings() {
602 UErrorCode ec
= U_ZERO_ERROR
;
604 UnicodeSet
* testList
[] = {
605 UnicodeSet::createFromAll("abc"),
606 new UnicodeSet("[a-c]", ec
),
608 &(UnicodeSet::createFrom("ch")->add('a','z').add("ll")),
609 new UnicodeSet("[{ll}{ch}a-z]", ec
),
611 UnicodeSet::createFrom("ab}c"),
612 new UnicodeSet("[{ab\\}c}]", ec
),
614 &((new UnicodeSet('a','z'))->add('A', 'Z').retain('M','m').complement('X')),
615 new UnicodeSet("[[a-zA-Z]&[M-m]-[X]]", ec
),
621 errln("FAIL: couldn't construct test sets");
624 for (int32_t i
= 0; testList
[i
] != NULL
; i
+=2) {
626 UnicodeString pat0
, pat1
;
627 testList
[i
]->toPattern(pat0
, TRUE
);
628 testList
[i
+1]->toPattern(pat1
, TRUE
);
629 if (*testList
[i
] == *testList
[i
+1]) {
630 logln((UnicodeString
)"Ok: " + pat0
+ " == " + pat1
);
632 logln((UnicodeString
)"FAIL: " + pat0
+ " != " + pat1
);
636 delete testList
[i
+1];
640 static const char NOT
[] = "%%%%";
643 * Test pattern behavior of multicharacter strings.
645 void UnicodeSetTest::TestStringPatterns() {
646 UErrorCode ec
= U_ZERO_ERROR
;
647 UnicodeSet
* s
= new UnicodeSet("[a-z {aa} {ab}]", ec
);
649 // This loop isn't a loop. It's here to make the compiler happy.
650 // If you're curious, try removing it and changing the 'break'
651 // statements (except for the last) to goto's.
653 if (U_FAILURE(ec
)) break;
654 const char* exp1
[] = {"aa", "ab", NOT
, "ac", NULL
};
655 expectToPattern(*s
, "[a-z{aa}{ab}]", exp1
);
658 const char* exp2
[] = {"aa", "ab", "ac", NOT
, "xy", NULL
};
659 expectToPattern(*s
, "[a-z{aa}{ab}{ac}]", exp2
);
661 s
->applyPattern("[a-z {\\{l} {r\\}}]", ec
);
662 if (U_FAILURE(ec
)) break;
663 const char* exp3
[] = {"{l", "r}", NOT
, "xy", NULL
};
664 expectToPattern(*s
, "[a-z{\\{l}{r\\}}]", exp3
);
667 const char* exp4
[] = {"{l", "r}", "[]", NOT
, "xy", NULL
};
668 expectToPattern(*s
, "[a-z{\\[\\]}{r\\}}{\\{l}]", exp4
);
670 s
->applyPattern("[a-z {\\u4E01\\u4E02}{\\n\\r}]", ec
);
671 if (U_FAILURE(ec
)) break;
672 const char* exp5
[] = {"\\u4E01\\u4E02", "\n\r", NULL
};
673 expectToPattern(*s
, "[a-z{\\u4E01\\u4E02}{\\n\\r}]", exp5
);
677 s
->add(UnicodeString("abc", ""));
678 s
->add(UnicodeString("abc", ""));
679 const char* exp6
[] = {"abc", NOT
, "ab", NULL
};
680 expectToPattern(*s
, "[{abc}]", exp6
);
685 if (U_FAILURE(ec
)) errln("FAIL: pattern parse error");
690 * Test the [:Latin:] syntax.
692 void UnicodeSetTest::TestScriptSet() {
693 expectContainment("[:Latin:]", "aA", CharsToUnicodeString("\\u0391\\u03B1"));
695 expectContainment("[:Greek:]", CharsToUnicodeString("\\u0391\\u03B1"), "aA");
698 expectContainment("[[:Common:][:Inherited:]]", CharsToUnicodeString("\\U00003099\\U0001D169\\u0000"), "aA");
703 * Test the [:Latin:] syntax.
705 void UnicodeSetTest::TestPropertySet() {
706 static const char* DATA
[] = {
707 // Pattern, Chars IN, Chars NOT in
717 "\\P{ GENERAL Category = upper case letter }",
721 // Combining class: @since ICU 2.2
722 // Check both symbolic and numeric
727 "\\p{Canonical Combining Class = 11}",
731 "[:c c c = iota subscript :]",
735 // Bidi class: @since ICU 2.2
736 "\\p{bidiclass=lefttoright}",
740 // Binary properties: @since ICU 2.2
749 // JB#1767 \N{}, \p{ASCII}
754 "[\\N{ latin small letter a }[:name= latin small letter z:]]",
770 "\\u03D8\\u03D9", // 3.2
773 "\\u1800\\u3400\\U0002f800",
774 "\\u0220\\u034f\\u30ff\\u33ff\\ufe73\\U00010000\\U00050000",
776 // JB#2350: Case_Sensitive
777 "[:Case Sensitive:]",
778 "A\\u1FFC\\U00010410",
779 ";\\u00B4\\U00010500",
781 // JB#2832: C99-compatibility props
788 " \\u0003\\u0007\\u0009\\u000A\\u000D",
791 "!@#%&*()[]{}-_\\/;:,.?'\"",
798 // Regex compatibility test
799 "[-b]", // leading '-' is literal
803 "[^-b]", // leading '-' is literal
807 "[b-]", // trailing '-' is literal
811 "[^b-]", // trailing '-' is literal
816 static const int32_t DATA_LEN
= sizeof(DATA
)/sizeof(DATA
[0]);
818 for (int32_t i
=0; i
<DATA_LEN
; i
+=3) {
819 expectContainment(DATA
[i
], CharsToUnicodeString(DATA
[i
+1]),
820 CharsToUnicodeString(DATA
[i
+2]));
825 * Test cloning of UnicodeSet. For C++, we test the copy constructor.
827 void UnicodeSetTest::TestClone() {
828 UErrorCode ec
= U_ZERO_ERROR
;
829 UnicodeSet
s("[abcxyz]", ec
);
831 expectContainment(t
, "abc", "def");
835 * Test the indexOf() and charAt() methods.
837 void UnicodeSetTest::TestIndexOf() {
838 UErrorCode ec
= U_ZERO_ERROR
;
839 UnicodeSet
set("[a-cx-y3578]", ec
);
841 errln("FAIL: UnicodeSet constructor");
844 for (int32_t i
=0; i
<set
.size(); ++i
) {
845 UChar32 c
= set
.charAt(i
);
846 if (set
.indexOf(c
) != i
) {
847 errln("FAIL: charAt(%d) = %X => indexOf() => %d",
848 i
, c
, set
.indexOf(c
));
851 UChar32 c
= set
.charAt(set
.size());
853 errln("FAIL: charAt(<out of range>) = %X", c
);
855 int32_t j
= set
.indexOf((UChar32
)0x71/*'q'*/);
857 errln((UnicodeString
)"FAIL: indexOf('q') = " + j
);
864 void UnicodeSetTest::TestCloseOver() {
865 UErrorCode ec
= U_ZERO_ERROR
;
867 char CASE
[] = {(char)USET_CASE
};
868 const char* DATA
[] = {
869 // selector, input, output
871 "[aq\\u00DF{Bc}{bC}{Fi}]",
872 "[aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]",
876 "[\\u01F1\\u01F2\\u01F3]",
880 "[\\u1FB4{\\u03AC\\u03B9}]",
886 CASE
, // make sure binary search finds limits
888 "[aA\\uFF3A\\uFF5A]",
891 "[a-z]","[A-Za-z\\u017F\\u212A]",
902 for (int32_t i
=0; DATA
[i
]!=NULL
; i
+=3) {
903 int32_t selector
= DATA
[i
][0];
904 UnicodeString
pat(DATA
[i
+1]);
905 UnicodeString
exp(DATA
[i
+2]);
906 s
.applyPattern(pat
, ec
);
907 s
.closeOver(selector
);
908 t
.applyPattern(exp
, ec
);
910 errln("FAIL: applyPattern failed");
914 logln((UnicodeString
)"Ok: " + pat
+ ".closeOver(" + selector
+ ") => " + exp
);
917 errln((UnicodeString
)"FAIL: " + pat
+ ".closeOver(" + selector
+ ") => " +
918 s
.toPattern(buf
, TRUE
) + ", expected " + exp
);
922 // Test the pattern API
923 s
.applyPattern("[abc]", USET_CASE_INSENSITIVE
, ec
);
925 errln("FAIL: applyPattern failed");
927 expectContainment(s
, "abcABC", "defDEF");
929 UnicodeSet
v("[^abc]", USET_CASE_INSENSITIVE
, ec
);
931 errln("FAIL: constructor failed");
933 expectContainment(v
, "defDEF", "abcABC");
937 void UnicodeSetTest::TestEscapePattern() {
938 const char pattern
[] =
939 "[\\uFEFF \\uFFF9-\\uFFFC \\U0001D173-\\U0001D17A \\U000F0000-\\U000FFFFD ]";
941 "[\\uFEFF\\uFFF9-\\uFFFC\\U0001D173-\\U0001D17A\\U000F0000-\\U000FFFFD]";
942 // We test this with two passes; in the second pass we
943 // pre-unescape the pattern. Since U+FEFF and several other code
944 // points are rule whitespace, this fails -- which is what we
946 for (int32_t pass
=1; pass
<=2; ++pass
) {
947 UErrorCode ec
= U_ZERO_ERROR
;
948 UnicodeString
pat(pattern
);
950 pat
= pat
.unescape();
952 // Pattern is only good for pass 1
953 UBool isPatternValid
= (pass
==1);
955 UnicodeSet
set(pat
, ec
);
956 if (U_SUCCESS(ec
) != isPatternValid
){
957 errln((UnicodeString
)"FAIL: applyPattern(" +
958 escape(pat
) + ") => " +
965 if (set
.contains((UChar
)0x0644)){
966 errln((UnicodeString
)"FAIL: " + escape(pat
) + " contains(U+0664)");
969 UnicodeString newpat
;
970 set
.toPattern(newpat
, TRUE
);
972 logln(escape(pat
) + " => " + newpat
);
974 errln((UnicodeString
)"FAIL: " + escape(pat
) + " => " + newpat
);
977 for (int32_t i
=0; i
<set
.getRangeCount(); ++i
) {
978 UnicodeString
str("Range ");
979 str
.append((UChar
)(0x30 + i
))
981 .append((UChar32
)set
.getRangeStart(i
))
983 .append((UChar32
)set
.getRangeEnd(i
));
984 str
= str
+ " (" + set
.getRangeStart(i
) + " - " +
985 set
.getRangeEnd(i
) + ")";
986 if (set
.getRangeStart(i
) < 0) {
987 errln((UnicodeString
)"FAIL: " + escape(str
));
995 void UnicodeSetTest::expectRange(const UnicodeString
& label
,
996 const UnicodeSet
& set
,
997 UChar32 start
, UChar32 end
) {
998 UnicodeSet
exp(start
, end
);
1001 logln(label
+ " => " + set
.toPattern(pat
, TRUE
));
1004 errln((UnicodeString
)"FAIL: " + label
+ " => " +
1005 set
.toPattern(pat
, TRUE
) +
1006 ", expected " + exp
.toPattern(xpat
, TRUE
));
1010 void UnicodeSetTest::TestInvalidCodePoint() {
1012 const UChar32 DATA
[] = {
1013 // Test range Expected range
1014 0, 0x10FFFF, 0, 0x10FFFF,
1015 (UChar32
)-1, 8, 0, 8,
1016 8, 0x110000, 8, 0x10FFFF
1018 const int32_t DATA_LENGTH
= sizeof(DATA
)/sizeof(DATA
[0]);
1023 for (i
=0; i
<DATA_LENGTH
; i
+=4) {
1024 UChar32 start
= DATA
[i
];
1025 UChar32 end
= DATA
[i
+1];
1026 UChar32 xstart
= DATA
[i
+2];
1027 UChar32 xend
= DATA
[i
+3];
1029 // Try various API using the test code points
1031 UnicodeSet
set(start
, end
);
1032 expectRange((UnicodeString
)"ct(" + start
+ "," + end
+ ")",
1036 set
.set(start
, end
);
1037 expectRange((UnicodeString
)"set(" + start
+ "," + end
+ ")",
1040 UBool b
= set
.contains(start
);
1041 b
= set
.contains(start
, end
);
1042 b
= set
.containsNone(start
, end
);
1043 b
= set
.containsSome(start
, end
);
1045 int32_t index
= set
.indexOf(start
);
1049 set
.add(start
, end
);
1050 expectRange((UnicodeString
)"add(" + start
+ "," + end
+ ")",
1053 set
.set(0, 0x10FFFF);
1054 set
.retain(start
, end
);
1055 expectRange((UnicodeString
)"retain(" + start
+ "," + end
+ ")",
1059 set
.set(0, 0x10FFFF);
1061 set
.remove(start
, end
);
1063 expectRange((UnicodeString
)"!remove(" + start
+ "," + end
+ ")",
1066 set
.set(0, 0x10FFFF);
1067 set
.complement(start
, end
);
1069 expectRange((UnicodeString
)"!complement(" + start
+ "," + end
+ ")",
1071 set
.complement(start
);
1074 const UChar32 DATA2
[] = {
1080 const int32_t DATA2_LENGTH
= sizeof(DATA2
)/sizeof(DATA2
[0]);
1082 for (i
=0; i
<DATA2_LENGTH
; ++i
) {
1083 UChar32 c
= DATA2
[i
], end
= 0x10FFFF;
1084 UBool valid
= (c
>= 0 && c
<= 0x10FFFF);
1086 UnicodeSet
set(0, 0x10FFFF);
1088 // For single-codepoint contains, invalid codepoints are NOT contained
1089 UBool b
= set
.contains(c
);
1091 logln((UnicodeString
)"[\\u0000-\\U0010FFFF].contains(" + c
+
1094 errln((UnicodeString
)"FAIL: [\\u0000-\\U0010FFFF].contains(" + c
+
1098 // For codepoint range contains, containsNone, and containsSome,
1099 // invalid or empty (start > end) ranges have UNDEFINED behavior.
1100 b
= set
.contains(c
, end
);
1101 logln((UnicodeString
)"* [\\u0000-\\U0010FFFF].contains(" + c
+
1102 "," + end
+ ") = " + b
);
1104 b
= set
.containsNone(c
, end
);
1105 logln((UnicodeString
)"* [\\u0000-\\U0010FFFF].containsNone(" + c
+
1106 "," + end
+ ") = " + b
);
1108 b
= set
.containsSome(c
, end
);
1109 logln((UnicodeString
)"* [\\u0000-\\U0010FFFF].containsSome(" + c
+
1110 "," + end
+ ") = " + b
);
1112 int32_t index
= set
.indexOf(c
);
1113 if ((index
>= 0) == valid
) {
1114 logln((UnicodeString
)"[\\u0000-\\U0010FFFF].indexOf(" + c
+
1117 errln((UnicodeString
)"FAIL: [\\u0000-\\U0010FFFF].indexOf(" + c
+
1123 void UnicodeSetTest::TestExhaustive() {
1124 // exhaustive tests. Simulate UnicodeSets with integers.
1125 // That gives us very solid tests (except for large memory tests).
1127 int32_t limit
= 128;
1129 UnicodeSet x
, y
, z
, aa
;
1131 for (int32_t i
= 0; i
< limit
; ++i
) {
1133 logln((UnicodeString
)"Testing " + i
+ ", " + x
);
1134 _testComplement(i
, x
, y
);
1136 // AS LONG AS WE ARE HERE, check roundtrip
1137 checkRoundTrip(bitsToSet(i
, aa
));
1139 for (int32_t j
= 0; j
< limit
; ++j
) {
1140 _testAdd(i
,j
, x
,y
,z
);
1141 _testXor(i
,j
, x
,y
,z
);
1142 _testRetain(i
,j
, x
,y
,z
);
1143 _testRemove(i
,j
, x
,y
,z
);
1148 void UnicodeSetTest::_testComplement(int32_t a
, UnicodeSet
& x
, UnicodeSet
& z
) {
1152 int32_t c
= setToBits(z
);
1154 errln((UnicodeString
)"FAILED: add: ~" + x
+ " != " + z
);
1155 errln((UnicodeString
)"FAILED: add: ~" + a
+ " != " + c
);
1157 checkCanonicalRep(z
, (UnicodeString
)"complement " + a
);
1160 void UnicodeSetTest::_testAdd(int32_t a
, int32_t b
, UnicodeSet
& x
, UnicodeSet
& y
, UnicodeSet
& z
) {
1165 int32_t c
= setToBits(z
);
1167 errln((UnicodeString
)"FAILED: add: " + x
+ " | " + y
+ " != " + z
);
1168 errln((UnicodeString
)"FAILED: add: " + a
+ " | " + b
+ " != " + c
);
1170 checkCanonicalRep(z
, (UnicodeString
)"add " + a
+ "," + b
);
1173 void UnicodeSetTest::_testRetain(int32_t a
, int32_t b
, UnicodeSet
& x
, UnicodeSet
& y
, UnicodeSet
& z
) {
1178 int32_t c
= setToBits(z
);
1180 errln((UnicodeString
)"FAILED: retain: " + x
+ " & " + y
+ " != " + z
);
1181 errln((UnicodeString
)"FAILED: retain: " + a
+ " & " + b
+ " != " + c
);
1183 checkCanonicalRep(z
, (UnicodeString
)"retain " + a
+ "," + b
);
1186 void UnicodeSetTest::_testRemove(int32_t a
, int32_t b
, UnicodeSet
& x
, UnicodeSet
& y
, UnicodeSet
& z
) {
1191 int32_t c
= setToBits(z
);
1192 if (c
!= (a
&~ b
)) {
1193 errln((UnicodeString
)"FAILED: remove: " + x
+ " &~ " + y
+ " != " + z
);
1194 errln((UnicodeString
)"FAILED: remove: " + a
+ " &~ " + b
+ " != " + c
);
1196 checkCanonicalRep(z
, (UnicodeString
)"remove " + a
+ "," + b
);
1199 void UnicodeSetTest::_testXor(int32_t a
, int32_t b
, UnicodeSet
& x
, UnicodeSet
& y
, UnicodeSet
& z
) {
1204 int32_t c
= setToBits(z
);
1206 errln((UnicodeString
)"FAILED: complement: " + x
+ " ^ " + y
+ " != " + z
);
1207 errln((UnicodeString
)"FAILED: complement: " + a
+ " ^ " + b
+ " != " + c
);
1209 checkCanonicalRep(z
, (UnicodeString
)"complement " + a
+ "," + b
);
1213 * Check that ranges are monotonically increasing and non-
1216 void UnicodeSetTest::checkCanonicalRep(const UnicodeSet
& set
, const UnicodeString
& msg
) {
1217 int32_t n
= set
.getRangeCount();
1219 errln((UnicodeString
)"FAIL result of " + msg
+
1220 ": range count should be >= 0 but is " +
1221 n
/*+ " for " + set.toPattern())*/);
1225 for (int32_t i
=0; i
<n
; ++i
) {
1226 UChar32 start
= set
.getRangeStart(i
);
1227 UChar32 end
= set
.getRangeEnd(i
);
1229 errln((UnicodeString
)"FAIL result of " + msg
+
1230 ": range " + (i
+1) +
1231 " start > end: " + (int)start
+ ", " + (int)end
+
1234 if (i
> 0 && start
<= last
) {
1235 errln((UnicodeString
)"FAIL result of " + msg
+
1236 ": range " + (i
+1) +
1237 " overlaps previous range: " + (int)start
+ ", " + (int)end
+
1245 * Convert a bitmask to a UnicodeSet.
1247 UnicodeSet
& UnicodeSetTest::bitsToSet(int32_t a
, UnicodeSet
& result
) {
1249 for (UChar32 i
= 0; i
< 32; ++i
) {
1250 if ((a
& (1<<i
)) != 0) {
1258 * Convert a UnicodeSet to a bitmask. Only the characters
1259 * U+0000 to U+0020 are represented in the bitmask.
1261 int32_t UnicodeSetTest::setToBits(const UnicodeSet
& x
) {
1263 for (int32_t i
= 0; i
< 32; ++i
) {
1264 if (x
.contains((UChar32
)i
)) {
1272 * Return the representation of an inversion list based UnicodeSet
1273 * as a pairs list. Ranges are listed in ascending Unicode order.
1274 * For example, the set [a-zA-M3] is represented as "33AMaz".
1276 UnicodeString
UnicodeSetTest::getPairs(const UnicodeSet
& set
) {
1277 UnicodeString pairs
;
1278 for (int32_t i
=0; i
<set
.getRangeCount(); ++i
) {
1279 UChar32 start
= set
.getRangeStart(i
);
1280 UChar32 end
= set
.getRangeEnd(i
);
1283 i
= set
.getRangeCount(); // Should be unnecessary
1285 pairs
.append((UChar
)start
).append((UChar
)end
);
1291 * Basic consistency check for a few items.
1292 * That the iterator works, and that we can create a pattern and
1293 * get the same thing back
1295 void UnicodeSetTest::checkRoundTrip(const UnicodeSet
& s
) {
1296 UErrorCode ec
= U_ZERO_ERROR
;
1299 checkEqual(s
, t
, "copy ct");
1302 checkEqual(s
, t
, "operator=");
1304 copyWithIterator(t
, s
, FALSE
);
1305 checkEqual(s
, t
, "iterator roundtrip");
1307 copyWithIterator(t
, s
, TRUE
); // try range
1308 checkEqual(s
, t
, "iterator roundtrip");
1310 UnicodeString pat
; s
.toPattern(pat
, FALSE
);
1311 t
.applyPattern(pat
, ec
);
1312 if (U_FAILURE(ec
)) {
1313 errln("FAIL: applyPattern");
1316 checkEqual(s
, t
, "toPattern(false)");
1319 s
.toPattern(pat
, TRUE
);
1320 t
.applyPattern(pat
, ec
);
1321 if (U_FAILURE(ec
)) {
1322 errln("FAIL: applyPattern");
1325 checkEqual(s
, t
, "toPattern(true)");
1329 void UnicodeSetTest::copyWithIterator(UnicodeSet
& t
, const UnicodeSet
& s
, UBool withRange
) {
1331 UnicodeSetIterator
it(s
);
1333 while (it
.nextRange()) {
1334 if (it
.isString()) {
1335 t
.add(it
.getString());
1337 t
.add(it
.getCodepoint(), it
.getCodepointEnd());
1342 if (it
.isString()) {
1343 t
.add(it
.getString());
1345 t
.add(it
.getCodepoint());
1351 UBool
UnicodeSetTest::checkEqual(const UnicodeSet
& s
, const UnicodeSet
& t
, const char* message
) {
1352 UnicodeString source
; s
.toPattern(source
, TRUE
);
1353 UnicodeString result
; t
.toPattern(result
, TRUE
);
1355 errln((UnicodeString
)"FAIL: " + message
1356 + "; source = " + source
1357 + "; result = " + result
1361 logln((UnicodeString
)"Ok: " + message
1362 + "; source = " + source
1363 + "; result = " + result
1370 UnicodeSetTest::expectContainment(const UnicodeString
& pat
,
1371 const UnicodeString
& charsIn
,
1372 const UnicodeString
& charsOut
) {
1373 UErrorCode ec
= U_ZERO_ERROR
;
1374 UnicodeSet
set(pat
, ec
);
1375 if (U_FAILURE(ec
)) {
1376 errln((UnicodeString
)"FAIL: pattern \"" +
1377 pat
+ "\" => " + u_errorName(ec
));
1380 expectContainment(set
, pat
, charsIn
, charsOut
);
1384 UnicodeSetTest::expectContainment(const UnicodeSet
& set
,
1385 const UnicodeString
& charsIn
,
1386 const UnicodeString
& charsOut
) {
1389 expectContainment(set
, pat
, charsIn
, charsOut
);
1393 UnicodeSetTest::expectContainment(const UnicodeSet
& set
,
1394 const UnicodeString
& setName
,
1395 const UnicodeString
& charsIn
,
1396 const UnicodeString
& charsOut
) {
1401 for (i
=0; i
<charsIn
.length(); i
+=U16_LENGTH(c
)) {
1402 c
= charsIn
.char32At(i
);
1403 if (!set
.contains(c
)) {
1407 if (bad
.length() > 0) {
1408 errln((UnicodeString
)"Fail: set " + setName
+ " does not contain " + prettify(bad
) +
1409 ", expected containment of " + prettify(charsIn
));
1411 logln((UnicodeString
)"Ok: set " + setName
+ " contains " + prettify(charsIn
));
1415 for (i
=0; i
<charsOut
.length(); i
+=U16_LENGTH(c
)) {
1416 c
= charsOut
.char32At(i
);
1417 if (set
.contains(c
)) {
1421 if (bad
.length() > 0) {
1422 errln((UnicodeString
)"Fail: set " + setName
+ " contains " + prettify(bad
) +
1423 ", expected non-containment of " + prettify(charsOut
));
1425 logln((UnicodeString
)"Ok: set " + setName
+ " does not contain " + prettify(charsOut
));
1430 UnicodeSetTest::expectPattern(UnicodeSet
& set
,
1431 const UnicodeString
& pattern
,
1432 const UnicodeString
& expectedPairs
){
1433 UErrorCode status
= U_ZERO_ERROR
;
1434 set
.applyPattern(pattern
, status
);
1435 if (U_FAILURE(status
)) {
1436 errln(UnicodeString("FAIL: applyPattern(\"") + pattern
+
1440 if (getPairs(set
) != expectedPairs
) {
1441 errln(UnicodeString("FAIL: applyPattern(\"") + pattern
+
1443 escape(getPairs(set
)) + "\", expected \"" +
1444 escape(expectedPairs
) + "\"");
1446 logln(UnicodeString("Ok: applyPattern(\"") + pattern
+
1448 escape(getPairs(set
)) + "\"");
1451 // the result of calling set.toPattern(), which is the string representation of
1452 // this set(set), is passed to a UnicodeSet constructor, and tested that it
1453 // will produce another set that is equal to this one.
1454 UnicodeString temppattern
;
1455 set
.toPattern(temppattern
);
1456 UnicodeSet
*tempset
=new UnicodeSet(temppattern
, status
);
1457 if (U_FAILURE(status
)) {
1458 errln(UnicodeString("FAIL: applyPattern(\""+ pattern
+ "\").toPattern() => " + temppattern
+ " => invalid pattern"));
1461 if(*tempset
!= set
|| getPairs(*tempset
) != getPairs(set
)){
1462 errln(UnicodeString("FAIL: applyPattern(\""+ pattern
+ "\").toPattern() => " + temppattern
+ " => pairs \""+ escape(getPairs(*tempset
)) + "\", expected pairs \"" +
1463 escape(getPairs(set
)) + "\""));
1465 logln(UnicodeString("Ok: applyPattern(\""+ pattern
+ "\").toPattern() => " + temppattern
+ " => pairs \"" + escape(getPairs(*tempset
)) + "\""));
1473 UnicodeSetTest::expectPairs(const UnicodeSet
& set
, const UnicodeString
& expectedPairs
) {
1474 if (getPairs(set
) != expectedPairs
) {
1475 errln(UnicodeString("FAIL: Expected pair list \"") +
1476 escape(expectedPairs
) + "\", got \"" +
1477 escape(getPairs(set
)) + "\"");
1481 void UnicodeSetTest::expectToPattern(const UnicodeSet
& set
,
1482 const UnicodeString
& expPat
,
1483 const char** expStrings
) {
1485 set
.toPattern(pat
, TRUE
);
1486 if (pat
== expPat
) {
1487 logln((UnicodeString
)"Ok: toPattern() => \"" + pat
+ "\"");
1489 errln((UnicodeString
)"FAIL: toPattern() => \"" + pat
+ "\", expected \"" + expPat
+ "\"");
1493 for (int32_t i
=0; expStrings
[i
] != NULL
; ++i
) {
1494 if (expStrings
[i
] == NOT
) { // sic; pointer comparison
1498 UnicodeString s
= CharsToUnicodeString(expStrings
[i
]);
1499 UBool contained
= set
.contains(s
);
1500 if (contained
== in
) {
1501 logln((UnicodeString
)"Ok: " + expPat
+
1502 (contained
? " contains {" : " does not contain {") +
1503 escape(expStrings
[i
]) + "}");
1505 errln((UnicodeString
)"FAIL: " + expPat
+
1506 (contained
? " contains {" : " does not contain {") +
1507 escape(expStrings
[i
]) + "}");
1512 static UChar
toHexString(int32_t i
) { return (UChar
)(i
+ (i
< 10 ? 0x30 : (0x41 - 10))); }
1515 UnicodeSetTest::doAssert(UBool condition
, const char *message
)
1518 errln(UnicodeString("ERROR : ") + message
);
1523 UnicodeSetTest::escape(const UnicodeString
& s
) {
1525 for (int32_t i
=0; i
<s
.length(); )
1527 UChar32 c
= s
.char32At(i
);
1528 if (0x0020 <= c
&& c
<= 0x007F) {
1532 buf
+= (UChar
)0x5c; buf
+= (UChar
)0x75;
1534 buf
+= (UChar
)0x5c; buf
+= (UChar
)0x55;
1535 buf
+= toHexString((c
& 0xF0000000) >> 28);
1536 buf
+= toHexString((c
& 0x0F000000) >> 24);
1537 buf
+= toHexString((c
& 0x00F00000) >> 20);
1538 buf
+= toHexString((c
& 0x000F0000) >> 16);
1540 buf
+= toHexString((c
& 0xF000) >> 12);
1541 buf
+= toHexString((c
& 0x0F00) >> 8);
1542 buf
+= toHexString((c
& 0x00F0) >> 4);
1543 buf
+= toHexString(c
& 0x000F);