1 /********************************************************************
3 * Copyright (c) 2005-2006, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
6 /************************************************************************
7 * Tests for the UText and UTextIterator text abstraction classses
9 ************************************************************************/
11 #include "unicode/utypes.h"
16 #include <unicode/utext.h>
17 #include <unicode/utf8.h>
18 #include <unicode/ustring.h>
19 #include <unicode/uchriter.h>
22 static UBool gFailed
= FALSE
;
23 static int gTestNum
= 0;
26 UText
*openFragmentedUnicodeString(UText
*ut
, UnicodeString
*s
, UErrorCode
*status
);
28 #define TEST_ASSERT(x) \
29 { if ((x)==FALSE) {errln("Test #%d failure in file %s at line %d\n", gTestNum, __FILE__, __LINE__);\
34 #define TEST_SUCCESS(status) \
35 { if (U_FAILURE(status)) {errln("Test #%d failure in file %s at line %d. Error = \"%s\"\n", \
36 gTestNum, __FILE__, __LINE__, u_errorName(status)); \
40 UTextTest::UTextTest() {
43 UTextTest::~UTextTest() {
48 UTextTest::runIndexedTest(int32_t index
, UBool exec
,
49 const char* &name
, char* /*par*/) {
51 case 0: name
= "TextTest";
52 if (exec
) TextTest(); break;
53 case 1: name
= "ErrorTest";
54 if (exec
) ErrorTest(); break;
55 case 2: name
= "FreezeTest";
56 if (exec
) FreezeTest(); break;
57 default: name
= ""; break;
62 // Quick and dirty random number generator.
63 // (don't use library so that results are portable.
64 static uint32_t m_seed
= 1;
65 static uint32_t m_rand()
67 m_seed
= m_seed
* 1103515245 + 12345;
68 return (uint32_t)(m_seed
/65536) % 32768;
75 // Top Level function for UText testing.
76 // Specifies the strings to be tested, with the acutal testing itself
77 // being carried out in another function, TestString().
79 void UTextTest::TextTest() {
82 TestString("abcd\\U00010001xyz");
85 // Supplementary chars at start or end
86 TestString("\\U00010001");
87 TestString("abc\\U00010001");
88 TestString("\\U00010001abc");
90 // Test simple strings of lengths 1 to 60, looking for glitches at buffer boundaries
92 for (i
=1; i
<60; i
++) {
96 // backslash. Needs to be escaped
97 s
.append((UChar
)0x5c);
99 s
.append(UChar(j
+0x30));
104 // Test strings with odd-aligned supplementary chars,
105 // looking for glitches at buffer boundaries
106 for (i
=1; i
<60; i
++) {
108 s
.append((UChar
)0x41);
109 for (j
=0; j
<i
; j
++) {
110 s
.append(UChar32(j
+0x11000));
115 // String of chars of randomly varying size in utf-8 representation.
116 // Exercise the mapping, and the varying sized buffer.
122 UChar32 c4
= 0x11000;
123 for (i
=0; i
<1000; i
++) {
124 int len8
= m_rand()%4
+ 1;
128 // don't put 0 into string (0 terminated strings for some tests)
129 // don't put '\', will cause unescape() to fail.
130 if (c1
==0x5c || c1
==0) {
151 // TestString() Run a suite of UText tests on a string.
152 // The test string is unescaped before use.
154 void UTextTest::TestString(const UnicodeString
&s
) {
159 UErrorCode status
= U_ZERO_ERROR
;
163 UnicodeString sa
= s
.unescape();
167 // Build up a mapping between code points and UTF-16 code unit indexes.
169 m
*cpMap
= new m
[sa
.length() + 1];
171 for (i
=0; i
<sa
.length(); i
=sa
.moveIndex32(i
, 1)) {
173 cpMap
[j
].nativeIdx
= i
;
178 cpMap
[j
].nativeIdx
= i
; // position following the last char in utf-16 string.
181 // UChar * test, null terminated
182 status
= U_ZERO_ERROR
;
183 UChar
*buf
= new UChar
[saLen
+1];
184 sa
.extract(buf
, saLen
+1, status
);
185 TEST_SUCCESS(status
);
186 ut
= utext_openUChars(NULL
, buf
, -1, &status
);
187 TEST_SUCCESS(status
);
188 TestAccess(sa
, ut
, cpCount
, cpMap
);
192 // UChar * test, with length
193 status
= U_ZERO_ERROR
;
194 buf
= new UChar
[saLen
+1];
195 sa
.extract(buf
, saLen
+1, status
);
196 TEST_SUCCESS(status
);
197 ut
= utext_openUChars(NULL
, buf
, saLen
, &status
);
198 TEST_SUCCESS(status
);
199 TestAccess(sa
, ut
, cpCount
, cpMap
);
204 // UnicodeString test
205 status
= U_ZERO_ERROR
;
206 ut
= utext_openUnicodeString(NULL
, &sa
, &status
);
207 TEST_SUCCESS(status
);
208 TestAccess(sa
, ut
, cpCount
, cpMap
);
209 TestCMR(sa
, ut
, cpCount
, cpMap
, cpMap
);
213 // Const UnicodeString test
214 status
= U_ZERO_ERROR
;
215 ut
= utext_openConstUnicodeString(NULL
, &sa
, &status
);
216 TEST_SUCCESS(status
);
217 TestAccess(sa
, ut
, cpCount
, cpMap
);
221 // Replaceable test. (UnicodeString inherits Replaceable)
222 status
= U_ZERO_ERROR
;
223 ut
= utext_openReplaceable(NULL
, &sa
, &status
);
224 TEST_SUCCESS(status
);
225 TestAccess(sa
, ut
, cpCount
, cpMap
);
226 TestCMR(sa
, ut
, cpCount
, cpMap
, cpMap
);
229 // Character Iterator Tests
230 status
= U_ZERO_ERROR
;
231 const UChar
*cbuf
= sa
.getBuffer();
232 CharacterIterator
*ci
= new UCharCharacterIterator(cbuf
, saLen
, status
);
233 TEST_SUCCESS(status
);
234 ut
= utext_openCharacterIterator(NULL
, ci
, &status
);
235 TEST_SUCCESS(status
);
236 TestAccess(sa
, ut
, cpCount
, cpMap
);
241 // Fragmented UnicodeString (Chunk size of one)
243 status
= U_ZERO_ERROR
;
244 ut
= openFragmentedUnicodeString(NULL
, &sa
, &status
);
245 TEST_SUCCESS(status
);
246 TestAccess(sa
, ut
, cpCount
, cpMap
);
253 // Convert the test string from UnicodeString to (char *) in utf-8 format
254 int32_t u8Len
= sa
.extract(0, sa
.length(), NULL
, 0, "utf-8");
255 char *u8String
= new char[u8Len
+ 1];
256 sa
.extract(0, sa
.length(), u8String
, u8Len
+1, "utf-8");
258 // Build up the map of code point indices in the utf-8 string
259 m
* u8Map
= new m
[sa
.length() + 1];
260 i
= 0; // native utf-8 index
261 for (j
=0; j
<cpCount
; j
++) { // code point number
262 u8Map
[j
].nativeIdx
= i
;
263 U8_NEXT(u8String
, i
, u8Len
, c
)
266 u8Map
[cpCount
].nativeIdx
= u8Len
; // position following the last char in utf-8 string.
268 // Do the test itself
269 status
= U_ZERO_ERROR
;
270 ut
= utext_openUTF8(NULL
, u8String
, -1, &status
);
271 TEST_SUCCESS(status
);
272 TestAccess(sa
, ut
, cpCount
, u8Map
);
282 // TestCMR test Copy, Move and Replace operations.
283 // us UnicodeString containing the test text.
284 // ut UText containing the same test text.
285 // cpCount number of code points in the test text.
286 // nativeMap Mapping from code points to native indexes for the UText.
287 // u16Map Mapping from code points to UTF-16 indexes, for use with the UnicodeString.
289 // This function runs a whole series of opertions on each incoming UText.
290 // The UText is deep-cloned prior to each operation, so that the original UText remains unchanged.
292 void UTextTest::TestCMR(const UnicodeString
&us
, UText
*ut
, int cpCount
, m
*nativeMap
, m
*u16Map
) {
293 TEST_ASSERT(utext_isWritable(ut
) == TRUE
);
295 int srcLengthType
; // Loop variables for selecting the postion and length
296 int srcPosType
; // of the block to operate on within the source text.
299 int srcIndex
= 0; // Code Point indexes of the block to operate on for
300 int srcLength
= 0; // a specific test.
302 int destIndex
= 0; // Code point index of the destination for a copy/move test.
304 int32_t nativeStart
= 0; // Native unit indexes for a test.
305 int32_t nativeLimit
= 0;
306 int32_t nativeDest
= 0;
308 int32_t u16Start
= 0; // UTF-16 indexes for a test.
309 int32_t u16Limit
= 0; // used when performing the same operation in a Unicode String
312 // Iterate over a whole series of source index, length and a target indexes.
313 // This is done with code point indexes; these will be later translated to native
314 // indexes using the cpMap.
315 for (srcLengthType
=1; srcLengthType
<=3; srcLengthType
++) {
316 switch (srcLengthType
) {
317 case 1: srcLength
= 1; break;
318 case 2: srcLength
= 5; break;
319 case 3: srcLength
= cpCount
/ 3;
321 for (srcPosType
=1; srcPosType
<=5; srcPosType
++) {
322 switch (srcPosType
) {
323 case 1: srcIndex
= 0; break;
324 case 2: srcIndex
= 1; break;
325 case 3: srcIndex
= cpCount
- srcLength
; break;
326 case 4: srcIndex
= cpCount
- srcLength
- 1; break;
327 case 5: srcIndex
= cpCount
/ 2; break;
329 if (srcIndex
< 0 || srcIndex
+ srcLength
> cpCount
) {
330 // filter out bogus test cases -
331 // those with a source range that falls of an edge of the string.
336 // Copy and move tests.
337 // iterate over a variety of destination positions.
339 for (destPosType
=1; destPosType
<=4; destPosType
++) {
340 switch (destPosType
) {
341 case 1: destIndex
= 0; break;
342 case 2: destIndex
= 1; break;
343 case 3: destIndex
= srcIndex
- 1; break;
344 case 4: destIndex
= srcIndex
+ srcLength
+ 1; break;
345 case 5: destIndex
= cpCount
-1; break;
346 case 6: destIndex
= cpCount
; break;
348 if (destIndex
<0 || destIndex
>cpCount
) {
349 // filter out bogus test cases.
353 nativeStart
= nativeMap
[srcIndex
].nativeIdx
;
354 nativeLimit
= nativeMap
[srcIndex
+srcLength
].nativeIdx
;
355 nativeDest
= nativeMap
[destIndex
].nativeIdx
;
357 u16Start
= u16Map
[srcIndex
].nativeIdx
;
358 u16Limit
= u16Map
[srcIndex
+srcLength
].nativeIdx
;
359 u16Dest
= u16Map
[destIndex
].nativeIdx
;
362 TestCopyMove(us
, ut
, FALSE
,
363 nativeStart
, nativeLimit
, nativeDest
,
364 u16Start
, u16Limit
, u16Dest
);
366 TestCopyMove(us
, ut
, TRUE
,
367 nativeStart
, nativeLimit
, nativeDest
,
368 u16Start
, u16Limit
, u16Dest
);
378 UnicodeString
fullRepString("This is an arbitrary string that will be used as replacement text");
379 for (int32_t replStrLen
=0; replStrLen
<20; replStrLen
++) {
380 UnicodeString
repStr(fullRepString
, 0, replStrLen
);
382 nativeStart
, nativeLimit
,
396 // TestCopyMove run a single test case for utext_copy.
397 // Test cases are created in TestCMR and dispatched here for execution.
399 void UTextTest::TestCopyMove(const UnicodeString
&us
, UText
*ut
, UBool move
,
400 int32_t nativeStart
, int32_t nativeLimit
, int32_t nativeDest
,
401 int32_t u16Start
, int32_t u16Limit
, int32_t u16Dest
)
403 UErrorCode status
= U_ZERO_ERROR
;
404 UText
*targetUT
= NULL
;
409 // clone the UText. The test will be run in the cloned copy
410 // so that we don't alter the original.
412 targetUT
= utext_clone(NULL
, ut
, TRUE
, FALSE
, &status
);
413 TEST_SUCCESS(status
);
414 UnicodeString
targetUS(us
); // And copy the reference string.
416 // do the test operation first in the reference
417 targetUS
.copy(u16Start
, u16Limit
, u16Dest
);
419 // delete out the source range.
420 if (u16Limit
< u16Dest
) {
421 targetUS
.removeBetween(u16Start
, u16Limit
);
423 int32_t amtCopied
= u16Limit
- u16Start
;
424 targetUS
.removeBetween(u16Start
+amtCopied
, u16Limit
+amtCopied
);
428 // Do the same operation in the UText under test
429 utext_copy(targetUT
, nativeStart
, nativeLimit
, nativeDest
, move
, &status
);
430 if (nativeDest
> nativeStart
&& nativeDest
< nativeLimit
) {
431 TEST_ASSERT(status
== U_INDEX_OUTOFBOUNDS_ERROR
);
433 TEST_SUCCESS(status
);
435 // Compare the results of the two parallel tests
436 int32_t usi
= 0; // UnicodeString postion, utf-16 index.
437 int64_t uti
= 0; // UText position, native index.
438 int32_t cpi
; // char32 position (code point index)
439 UChar32 usc
; // code point from Unicode String
440 UChar32 utc
; // code point from UText
441 utext_setNativeIndex(targetUT
, 0);
442 for (cpi
=0; ; cpi
++) {
443 usc
= targetUS
.char32At(usi
);
444 utc
= utext_next32(targetUT
);
448 TEST_ASSERT(uti
== usi
);
449 TEST_ASSERT(utc
== usc
);
450 usi
= targetUS
.moveIndex32(usi
, 1);
451 uti
= utext_getNativeIndex(targetUT
);
453 goto cleanupAndReturn
;
456 int64_t expectedNativeLength
= utext_nativeLength(ut
);
458 expectedNativeLength
+= nativeLimit
- nativeStart
;
460 uti
= utext_getNativeIndex(targetUT
);
461 TEST_ASSERT(uti
== expectedNativeLength
);
465 utext_close(targetUT
);
470 // TestReplace Test a single Replace operation.
472 void UTextTest::TestReplace(
473 const UnicodeString
&us
, // reference UnicodeString in which to do the replace
474 UText
*ut
, // UnicodeText object under test.
475 int32_t nativeStart
, // Range to be replaced, in UText native units.
477 int32_t u16Start
, // Range to be replaced, in UTF-16 units
478 int32_t u16Limit
, // for use in the reference UnicodeString.
479 const UnicodeString
&repStr
) // The replacement string
481 UErrorCode status
= U_ZERO_ERROR
;
482 UText
*targetUT
= NULL
;
487 // clone the target UText. The test will be run in the cloned copy
488 // so that we don't alter the original.
490 targetUT
= utext_clone(NULL
, ut
, TRUE
, FALSE
, &status
);
491 TEST_SUCCESS(status
);
492 UnicodeString
targetUS(us
); // And copy the reference string.
495 // Do the replace operation in the Unicode String, to
496 // produce a reference result.
498 targetUS
.replace(u16Start
, u16Limit
-u16Start
, repStr
);
501 // Do the replace on the UText under test
503 const UChar
*rs
= repStr
.getBuffer();
504 int32_t rsLen
= repStr
.length();
505 int32_t actualDelta
= utext_replace(targetUT
, nativeStart
, nativeLimit
, rs
, rsLen
, &status
);
506 int32_t expectedDelta
= repStr
.length() - (nativeLimit
- nativeStart
);
507 TEST_ASSERT(actualDelta
== expectedDelta
);
510 // Compare the results
512 int32_t usi
= 0; // UnicodeString postion, utf-16 index.
513 int64_t uti
= 0; // UText position, native index.
514 int32_t cpi
; // char32 position (code point index)
515 UChar32 usc
; // code point from Unicode String
516 UChar32 utc
; // code point from UText
517 int64_t expectedNativeLength
= 0;
518 utext_setNativeIndex(targetUT
, 0);
519 for (cpi
=0; ; cpi
++) {
520 usc
= targetUS
.char32At(usi
);
521 utc
= utext_next32(targetUT
);
525 TEST_ASSERT(uti
== usi
);
526 TEST_ASSERT(utc
== usc
);
527 usi
= targetUS
.moveIndex32(usi
, 1);
528 uti
= utext_getNativeIndex(targetUT
);
530 goto cleanupAndReturn
;
533 expectedNativeLength
= utext_nativeLength(ut
) + expectedDelta
;
534 uti
= utext_getNativeIndex(targetUT
);
535 TEST_ASSERT(uti
== expectedNativeLength
);
538 utext_close(targetUT
);
542 // TestAccess() Test the read only access functions on a UText.
543 // The text is accessed in a variety of ways, and compared with
544 // the reference UnicodeString.
546 void UTextTest::TestAccess(const UnicodeString
&us
, UText
*ut
, int cpCount
, m
*cpMap
) {
547 UErrorCode status
= U_ZERO_ERROR
;
551 // Check the length from the UText
553 int64_t expectedLen
= cpMap
[cpCount
].nativeIdx
;
554 int64_t utlen
= utext_nativeLength(ut
);
555 TEST_ASSERT(expectedLen
== utlen
);
558 // Iterate forwards, verify that we get the correct code points
559 // at the correct native offsets.
563 int64_t expectedIndex
= 0;
564 int64_t foundIndex
= 0;
569 for (i
=0; i
<cpCount
; i
++) {
570 expectedIndex
= cpMap
[i
].nativeIdx
;
571 foundIndex
= utext_getNativeIndex(ut
);
572 TEST_ASSERT(expectedIndex
== foundIndex
);
573 expectedC
= cpMap
[i
].cp
;
574 foundC
= utext_next32(ut
);
575 TEST_ASSERT(expectedC
== foundC
);
576 foundIndex
= utext_getPreviousNativeIndex(ut
);
577 TEST_ASSERT(expectedIndex
== foundIndex
);
582 foundC
= utext_next32(ut
);
583 TEST_ASSERT(foundC
== U_SENTINEL
);
585 // Repeat above, using macros
586 utext_setNativeIndex(ut
, 0);
587 for (i
=0; i
<cpCount
; i
++) {
588 expectedIndex
= cpMap
[i
].nativeIdx
;
589 foundIndex
= UTEXT_GETNATIVEINDEX(ut
);
590 TEST_ASSERT(expectedIndex
== foundIndex
);
591 expectedC
= cpMap
[i
].cp
;
592 foundC
= UTEXT_NEXT32(ut
);
593 TEST_ASSERT(expectedC
== foundC
);
598 foundC
= UTEXT_NEXT32(ut
);
599 TEST_ASSERT(foundC
== U_SENTINEL
);
602 // Forward iteration (above) should have left index at the
603 // end of the input, which should == length().
605 len
= utext_nativeLength(ut
);
606 foundIndex
= utext_getNativeIndex(ut
);
607 TEST_ASSERT(len
== foundIndex
);
610 // Iterate backwards over entire test string
612 len
= utext_getNativeIndex(ut
);
613 utext_setNativeIndex(ut
, len
);
614 for (i
=cpCount
-1; i
>=0; i
--) {
615 expectedC
= cpMap
[i
].cp
;
616 expectedIndex
= cpMap
[i
].nativeIdx
;
617 int64_t prevIndex
= utext_getPreviousNativeIndex(ut
);
618 foundC
= utext_previous32(ut
);
619 foundIndex
= utext_getNativeIndex(ut
);
620 TEST_ASSERT(expectedIndex
== foundIndex
);
621 TEST_ASSERT(expectedC
== foundC
);
622 TEST_ASSERT(prevIndex
== foundIndex
);
629 // Backwards iteration, above, should have left our iterator
630 // position at zero, and continued backwards iterationshould fail.
632 foundIndex
= utext_getNativeIndex(ut
);
633 TEST_ASSERT(foundIndex
== 0);
634 foundIndex
= utext_getPreviousNativeIndex(ut
);
635 TEST_ASSERT(foundIndex
== 0);
638 foundC
= utext_previous32(ut
);
639 TEST_ASSERT(foundC
== U_SENTINEL
);
640 foundIndex
= utext_getNativeIndex(ut
);
641 TEST_ASSERT(foundIndex
== 0);
642 foundIndex
= utext_getPreviousNativeIndex(ut
);
643 TEST_ASSERT(foundIndex
== 0);
646 // And again, with the macros
647 utext_setNativeIndex(ut
, len
);
648 for (i
=cpCount
-1; i
>=0; i
--) {
649 expectedC
= cpMap
[i
].cp
;
650 expectedIndex
= cpMap
[i
].nativeIdx
;
651 foundC
= UTEXT_PREVIOUS32(ut
);
652 foundIndex
= UTEXT_GETNATIVEINDEX(ut
);
653 TEST_ASSERT(expectedIndex
== foundIndex
);
654 TEST_ASSERT(expectedC
== foundC
);
661 // Backwards iteration, above, should have left our iterator
662 // position at zero, and continued backwards iterationshould fail.
664 foundIndex
= UTEXT_GETNATIVEINDEX(ut
);
665 TEST_ASSERT(foundIndex
== 0);
667 foundC
= UTEXT_PREVIOUS32(ut
);
668 TEST_ASSERT(foundC
== U_SENTINEL
);
669 foundIndex
= UTEXT_GETNATIVEINDEX(ut
);
670 TEST_ASSERT(foundIndex
== 0);
676 // next32From(), prevous32From(), Iterate in a somewhat random order.
679 for (i
=0; i
<cpCount
; i
++) {
680 cpIndex
= (cpIndex
+ 9973) % cpCount
;
681 index
= cpMap
[cpIndex
].nativeIdx
;
682 expectedC
= cpMap
[cpIndex
].cp
;
683 foundC
= utext_next32From(ut
, index
);
684 TEST_ASSERT(expectedC
== foundC
);
691 for (i
=0; i
<cpCount
; i
++) {
692 cpIndex
= (cpIndex
+ 9973) % cpCount
;
693 index
= cpMap
[cpIndex
+1].nativeIdx
;
694 expectedC
= cpMap
[cpIndex
].cp
;
695 foundC
= utext_previous32From(ut
, index
);
696 TEST_ASSERT(expectedC
== foundC
);
704 // moveIndex(int32_t delta);
707 // Walk through frontwards, incrementing by one
708 utext_setNativeIndex(ut
, 0);
709 for (i
=1; i
<=cpCount
; i
++) {
710 utext_moveIndex32(ut
, 1);
711 index
= utext_getNativeIndex(ut
);
712 expectedIndex
= cpMap
[i
].nativeIdx
;
713 TEST_ASSERT(expectedIndex
== index
);
714 index
= UTEXT_GETNATIVEINDEX(ut
);
715 TEST_ASSERT(expectedIndex
== index
);
718 // Walk through frontwards, incrementing by two
719 utext_setNativeIndex(ut
, 0);
720 for (i
=2; i
<cpCount
; i
+=2) {
721 utext_moveIndex32(ut
, 2);
722 index
= utext_getNativeIndex(ut
);
723 expectedIndex
= cpMap
[i
].nativeIdx
;
724 TEST_ASSERT(expectedIndex
== index
);
725 index
= UTEXT_GETNATIVEINDEX(ut
);
726 TEST_ASSERT(expectedIndex
== index
);
729 // walk through the string backwards, decrementing by one.
730 i
= cpMap
[cpCount
].nativeIdx
;
731 utext_setNativeIndex(ut
, i
);
732 for (i
=cpCount
; i
>=0; i
--) {
733 expectedIndex
= cpMap
[i
].nativeIdx
;
734 index
= utext_getNativeIndex(ut
);
735 TEST_ASSERT(expectedIndex
== index
);
736 index
= UTEXT_GETNATIVEINDEX(ut
);
737 TEST_ASSERT(expectedIndex
== index
);
738 utext_moveIndex32(ut
, -1);
742 // walk through backwards, decrementing by three
743 i
= cpMap
[cpCount
].nativeIdx
;
744 utext_setNativeIndex(ut
, i
);
745 for (i
=cpCount
; i
>=0; i
-=3) {
746 expectedIndex
= cpMap
[i
].nativeIdx
;
747 index
= utext_getNativeIndex(ut
);
748 TEST_ASSERT(expectedIndex
== index
);
749 index
= UTEXT_GETNATIVEINDEX(ut
);
750 TEST_ASSERT(expectedIndex
== index
);
751 utext_moveIndex32(ut
, -3);
758 int bufSize
= us
.length() + 10;
759 UChar
*buf
= new UChar
[bufSize
];
760 status
= U_ZERO_ERROR
;
761 expectedLen
= us
.length();
762 len
= utext_extract(ut
, 0, utlen
, buf
, bufSize
, &status
);
763 TEST_SUCCESS(status
);
764 TEST_ASSERT(len
== expectedLen
);
765 int compareResult
= us
.compare(buf
, -1);
766 TEST_ASSERT(compareResult
== 0);
768 status
= U_ZERO_ERROR
;
769 len
= utext_extract(ut
, 0, utlen
, NULL
, 0, &status
);
771 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
773 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
775 TEST_ASSERT(len
== expectedLen
);
777 status
= U_ZERO_ERROR
;
778 u_memset(buf
, 0x5555, bufSize
);
779 len
= utext_extract(ut
, 0, utlen
, buf
, 1, &status
);
780 if (us
.length() == 0) {
781 TEST_SUCCESS(status
);
782 TEST_ASSERT(buf
[0] == 0);
784 // Buf len == 1, extracting a single 16 bit value.
785 // If the data char is supplementary, it doesn't matter whether the buffer remains unchanged,
786 // or whether the lead surrogate of the pair is extracted.
787 // It's a buffer overflow error in either case.
788 TEST_ASSERT(buf
[0] == us
.charAt(0) ||
789 buf
[0] == 0x5555 && U_IS_SUPPLEMENTARY(us
.char32At(0)));
790 TEST_ASSERT(buf
[1] == 0x5555);
791 if (us
.length() == 1) {
792 TEST_ASSERT(status
== U_STRING_NOT_TERMINATED_WARNING
);
794 TEST_ASSERT(status
== U_BUFFER_OVERFLOW_ERROR
);
804 // ErrorTest() Check various error and edge cases.
806 void UTextTest::ErrorTest()
808 // Close of an unitialized UText. Shouldn't blow up.
811 memset(&ut
, 0, sizeof(UText
));
816 // Double-close of a UText. Shouldn't blow up. UText should still be usable.
818 UErrorCode status
= U_ZERO_ERROR
;
819 UText ut
= UTEXT_INITIALIZER
;
820 UnicodeString
s("Hello, World");
821 UText
*ut2
= utext_openUnicodeString(&ut
, &s
, &status
);
822 TEST_SUCCESS(status
);
823 TEST_ASSERT(ut2
== &ut
);
825 UText
*ut3
= utext_close(&ut
);
826 TEST_ASSERT(ut3
== &ut
);
828 UText
*ut4
= utext_close(&ut
);
829 TEST_ASSERT(ut4
== &ut
);
831 utext_openUnicodeString(&ut
, &s
, &status
);
832 TEST_SUCCESS(status
);
836 // Re-use of a UText, chaining through each of the types of UText
837 // (If it doesn't blow up, and doesn't leak, it's probably working fine)
839 UErrorCode status
= U_ZERO_ERROR
;
840 UText ut
= UTEXT_INITIALIZER
;
842 UnicodeString
s1("Hello, World");
843 UChar s2
[] = {(UChar
)0x41, (UChar
)0x42, (UChar
)0};
844 const char *s3
= "\x66\x67\x68";
846 utp
= utext_openUnicodeString(&ut
, &s1
, &status
);
847 TEST_SUCCESS(status
);
848 TEST_ASSERT(utp
== &ut
);
850 utp
= utext_openConstUnicodeString(&ut
, &s1
, &status
);
851 TEST_SUCCESS(status
);
852 TEST_ASSERT(utp
== &ut
);
854 utp
= utext_openUTF8(&ut
, s3
, -1, &status
);
855 TEST_SUCCESS(status
);
856 TEST_ASSERT(utp
== &ut
);
858 utp
= utext_openUChars(&ut
, s2
, -1, &status
);
859 TEST_SUCCESS(status
);
860 TEST_ASSERT(utp
== &ut
);
862 utp
= utext_close(&ut
);
863 TEST_ASSERT(utp
== &ut
);
865 utp
= utext_openUnicodeString(&ut
, &s1
, &status
);
866 TEST_SUCCESS(status
);
867 TEST_ASSERT(utp
== &ut
);
871 // UTF-8 with malformed sequences.
872 // These should come through as the Unicode replacement char, \ufffd
875 UErrorCode status
= U_ZERO_ERROR
;
877 const char *badUTF8
= "\x41\x81\x42\xf0\x81\x81\x43";
880 ut
= utext_openUTF8(NULL
, badUTF8
, -1, &status
);
881 TEST_SUCCESS(status
);
882 c
= utext_char32At(ut
, 1);
883 TEST_ASSERT(c
== 0xfffd);
884 c
= utext_char32At(ut
, 3);
885 TEST_ASSERT(c
== 0xfffd);
886 c
= utext_char32At(ut
, 5);
887 TEST_ASSERT(c
== 0xfffd);
888 c
= utext_char32At(ut
, 6);
889 TEST_ASSERT(c
== 0x43);
892 int n
= utext_extract(ut
, 0, 9, buf
, 10, &status
);
893 TEST_SUCCESS(status
);
895 TEST_ASSERT(buf
[1] == 0xfffd);
896 TEST_ASSERT(buf
[3] == 0xfffd);
897 TEST_ASSERT(buf
[2] == 0x42);
903 // isLengthExpensive - does it make the exptected transitions after
904 // getting the length of a nul terminated string?
907 UErrorCode status
= U_ZERO_ERROR
;
908 UnicodeString
sa("Hello, this is a string");
912 memset(sb
, 0x20, sizeof(sb
));
915 UText
*uta
= utext_openUnicodeString(NULL
, &sa
, &status
);
916 TEST_SUCCESS(status
);
917 isExpensive
= utext_isLengthExpensive(uta
);
918 TEST_ASSERT(isExpensive
== FALSE
);
921 UText
*utb
= utext_openUChars(NULL
, sb
, -1, &status
);
922 TEST_SUCCESS(status
);
923 isExpensive
= utext_isLengthExpensive(utb
);
924 TEST_ASSERT(isExpensive
== TRUE
);
925 int64_t len
= utext_nativeLength(utb
);
926 TEST_ASSERT(len
== 99);
927 isExpensive
= utext_isLengthExpensive(utb
);
928 TEST_ASSERT(isExpensive
== FALSE
);
933 // Index to positions not on code point boundaries.
936 const char *u8str
= "\xc8\x81\xe1\x82\x83\xf1\x84\x85\x86";
937 int32_t startMap
[] = { 0, 0, 2, 2, 2, 5, 5, 5, 5, 9, 9};
938 int32_t nextMap
[] = { 2, 2, 5, 5, 5, 9, 9, 9, 9, 9, 9};
939 int32_t prevMap
[] = { 0, 0, 0, 0, 0, 2, 2, 2, 2, 5, 5};
940 UChar32 c32Map
[] = {0x201, 0x201, 0x1083, 0x1083, 0x1083, 0x044146, 0x044146, 0x044146, 0x044146, -1, -1};
941 UChar32 pr32Map
[] = { -1, -1, 0x201, 0x201, 0x201, 0x1083, 0x1083, 0x1083, 0x1083, 0x044146, 0x044146};
943 // extractLen is the size, in UChars, of what will be extracted between index and index+1.
944 // is zero when both index positions lie within the same code point.
945 int32_t exLen
[] = { 0, 1, 0, 0, 1, 0, 0, 0, 2, 0, 0};
948 UErrorCode status
= U_ZERO_ERROR
;
949 UText
*ut
= utext_openUTF8(NULL
, u8str
, -1, &status
);
950 TEST_SUCCESS(status
);
954 int32_t startMapLimit
= sizeof(startMap
) / sizeof(int32_t);
955 for (i
=0; i
<startMapLimit
; i
++) {
956 utext_setNativeIndex(ut
, i
);
957 int64_t cpIndex
= utext_getNativeIndex(ut
);
958 TEST_ASSERT(cpIndex
== startMap
[i
]);
959 cpIndex
= UTEXT_GETNATIVEINDEX(ut
);
960 TEST_ASSERT(cpIndex
== startMap
[i
]);
964 for (i
=0; i
<startMapLimit
; i
++) {
965 UChar32 c32
= utext_char32At(ut
, i
);
966 TEST_ASSERT(c32
== c32Map
[i
]);
967 int64_t cpIndex
= utext_getNativeIndex(ut
);
968 TEST_ASSERT(cpIndex
== startMap
[i
]);
971 // Check utext_next32From
972 for (i
=0; i
<startMapLimit
; i
++) {
973 UChar32 c32
= utext_next32From(ut
, i
);
974 TEST_ASSERT(c32
== c32Map
[i
]);
975 int64_t cpIndex
= utext_getNativeIndex(ut
);
976 TEST_ASSERT(cpIndex
== nextMap
[i
]);
979 // check utext_previous32From
980 for (i
=0; i
<startMapLimit
; i
++) {
982 UChar32 c32
= utext_previous32From(ut
, i
);
983 TEST_ASSERT(c32
== pr32Map
[i
]);
984 int64_t cpIndex
= utext_getNativeIndex(ut
);
985 TEST_ASSERT(cpIndex
== prevMap
[i
]);
989 // Extract from i to i+1, which may be zero or one code points,
990 // depending on whether the indices straddle a cp boundary.
991 for (i
=0; i
<startMapLimit
; i
++) {
993 status
= U_ZERO_ERROR
;
994 int32_t extractedLen
= utext_extract(ut
, i
, i
+1, buf
, 3, &status
);
995 TEST_SUCCESS(status
);
996 TEST_ASSERT(extractedLen
== exLen
[i
]);
997 if (extractedLen
> 0) {
999 U16_GET(buf
, 0, 0, extractedLen
, c32
);
1000 TEST_ASSERT(c32
== c32Map
[i
]);
1008 { // Similar test, with utf16 instead of utf8
1009 // TODO: merge the common parts of these tests.
1011 UnicodeString
u16str("\\u1000\\U00011000\\u2000\\U00022000");
1012 int32_t startMap
[] ={ 0, 1, 1, 3, 4, 4, 6, 6};
1013 int32_t nextMap
[] = { 1, 3, 3, 4, 6, 6, 6, 6};
1014 int32_t prevMap
[] = { 0, 0, 0, 1, 3, 3, 4, 4};
1015 UChar32 c32Map
[] = {0x1000, 0x11000, 0x11000, 0x2000, 0x22000, 0x22000, -1, -1};
1016 UChar32 pr32Map
[] = { -1, 0x1000, 0x1000, 0x11000, 0x2000, 0x2000, 0x22000, 0x22000};
1017 int32_t exLen
[] = { 1, 0, 2, 1, 0, 2, 0, 0,};
1019 u16str
= u16str
.unescape();
1020 UErrorCode status
= U_ZERO_ERROR
;
1021 UText
*ut
= utext_openUnicodeString(NULL
, &u16str
, &status
);
1022 TEST_SUCCESS(status
);
1024 int32_t startMapLimit
= sizeof(startMap
) / sizeof(int32_t);
1026 for (i
=0; i
<startMapLimit
; i
++) {
1027 utext_setNativeIndex(ut
, i
);
1028 int64_t cpIndex
= utext_getNativeIndex(ut
);
1029 TEST_ASSERT(cpIndex
== startMap
[i
]);
1033 for (i
=0; i
<startMapLimit
; i
++) {
1034 UChar32 c32
= utext_char32At(ut
, i
);
1035 TEST_ASSERT(c32
== c32Map
[i
]);
1036 int64_t cpIndex
= utext_getNativeIndex(ut
);
1037 TEST_ASSERT(cpIndex
== startMap
[i
]);
1040 // Check utext_next32From
1041 for (i
=0; i
<startMapLimit
; i
++) {
1042 UChar32 c32
= utext_next32From(ut
, i
);
1043 TEST_ASSERT(c32
== c32Map
[i
]);
1044 int64_t cpIndex
= utext_getNativeIndex(ut
);
1045 TEST_ASSERT(cpIndex
== nextMap
[i
]);
1048 // check utext_previous32From
1049 for (i
=0; i
<startMapLimit
; i
++) {
1050 UChar32 c32
= utext_previous32From(ut
, i
);
1051 TEST_ASSERT(c32
== pr32Map
[i
]);
1052 int64_t cpIndex
= utext_getNativeIndex(ut
);
1053 TEST_ASSERT(cpIndex
== prevMap
[i
]);
1057 // Extract from i to i+1, which may be zero or one code points,
1058 // depending on whether the indices straddle a cp boundary.
1059 for (i
=0; i
<startMapLimit
; i
++) {
1061 status
= U_ZERO_ERROR
;
1062 int32_t extractedLen
= utext_extract(ut
, i
, i
+1, buf
, 3, &status
);
1063 TEST_SUCCESS(status
);
1064 TEST_ASSERT(extractedLen
== exLen
[i
]);
1065 if (extractedLen
> 0) {
1067 U16_GET(buf
, 0, 0, extractedLen
, c32
);
1068 TEST_ASSERT(c32
== c32Map
[i
]);
1075 { // Similar test, with UText over Replaceable
1076 // TODO: merge the common parts of these tests.
1078 UnicodeString
u16str("\\u1000\\U00011000\\u2000\\U00022000");
1079 int32_t startMap
[] ={ 0, 1, 1, 3, 4, 4, 6, 6};
1080 int32_t nextMap
[] = { 1, 3, 3, 4, 6, 6, 6, 6};
1081 int32_t prevMap
[] = { 0, 0, 0, 1, 3, 3, 4, 4};
1082 UChar32 c32Map
[] = {0x1000, 0x11000, 0x11000, 0x2000, 0x22000, 0x22000, -1, -1};
1083 UChar32 pr32Map
[] = { -1, 0x1000, 0x1000, 0x11000, 0x2000, 0x2000, 0x22000, 0x22000};
1084 int32_t exLen
[] = { 1, 0, 2, 1, 0, 2, 0, 0,};
1086 u16str
= u16str
.unescape();
1087 UErrorCode status
= U_ZERO_ERROR
;
1088 UText
*ut
= utext_openReplaceable(NULL
, &u16str
, &status
);
1089 TEST_SUCCESS(status
);
1091 int32_t startMapLimit
= sizeof(startMap
) / sizeof(int32_t);
1093 for (i
=0; i
<startMapLimit
; i
++) {
1094 utext_setNativeIndex(ut
, i
);
1095 int64_t cpIndex
= utext_getNativeIndex(ut
);
1096 TEST_ASSERT(cpIndex
== startMap
[i
]);
1100 for (i
=0; i
<startMapLimit
; i
++) {
1101 UChar32 c32
= utext_char32At(ut
, i
);
1102 TEST_ASSERT(c32
== c32Map
[i
]);
1103 int64_t cpIndex
= utext_getNativeIndex(ut
);
1104 TEST_ASSERT(cpIndex
== startMap
[i
]);
1107 // Check utext_next32From
1108 for (i
=0; i
<startMapLimit
; i
++) {
1109 UChar32 c32
= utext_next32From(ut
, i
);
1110 TEST_ASSERT(c32
== c32Map
[i
]);
1111 int64_t cpIndex
= utext_getNativeIndex(ut
);
1112 TEST_ASSERT(cpIndex
== nextMap
[i
]);
1115 // check utext_previous32From
1116 for (i
=0; i
<startMapLimit
; i
++) {
1117 UChar32 c32
= utext_previous32From(ut
, i
);
1118 TEST_ASSERT(c32
== pr32Map
[i
]);
1119 int64_t cpIndex
= utext_getNativeIndex(ut
);
1120 TEST_ASSERT(cpIndex
== prevMap
[i
]);
1124 // Extract from i to i+1, which may be zero or one code points,
1125 // depending on whether the indices straddle a cp boundary.
1126 for (i
=0; i
<startMapLimit
; i
++) {
1128 status
= U_ZERO_ERROR
;
1129 int32_t extractedLen
= utext_extract(ut
, i
, i
+1, buf
, 3, &status
);
1130 TEST_SUCCESS(status
);
1131 TEST_ASSERT(extractedLen
== exLen
[i
]);
1132 if (extractedLen
> 0) {
1134 U16_GET(buf
, 0, 0, extractedLen
, c32
);
1135 TEST_ASSERT(c32
== c32Map
[i
]);
1144 void UTextTest::FreezeTest() {
1145 // Check isWritable() and freeze() behavior.
1148 UnicodeString
ustr("Hello, World.");
1149 const char u8str
[] = {char(0x31), (char)0x32, (char)0x33, 0};
1150 const UChar u16str
[] = {(UChar
)0x31, (UChar
)0x32, (UChar
)0x44, 0};
1152 UErrorCode status
= U_ZERO_ERROR
;
1156 ut
= utext_openUTF8(ut
, u8str
, -1, &status
);
1157 TEST_SUCCESS(status
);
1158 UBool writable
= utext_isWritable(ut
);
1159 TEST_ASSERT(writable
== FALSE
);
1160 utext_copy(ut
, 1, 2, 0, TRUE
, &status
);
1161 TEST_ASSERT(status
== U_NO_WRITE_PERMISSION
);
1163 status
= U_ZERO_ERROR
;
1164 ut
= utext_openUChars(ut
, u16str
, -1, &status
);
1165 TEST_SUCCESS(status
);
1166 writable
= utext_isWritable(ut
);
1167 TEST_ASSERT(writable
== FALSE
);
1168 utext_copy(ut
, 1, 2, 0, TRUE
, &status
);
1169 TEST_ASSERT(status
== U_NO_WRITE_PERMISSION
);
1171 status
= U_ZERO_ERROR
;
1172 ut
= utext_openUnicodeString(ut
, &ustr
, &status
);
1173 TEST_SUCCESS(status
);
1174 writable
= utext_isWritable(ut
);
1175 TEST_ASSERT(writable
== TRUE
);
1177 writable
= utext_isWritable(ut
);
1178 TEST_ASSERT(writable
== FALSE
);
1179 utext_copy(ut
, 1, 2, 0, TRUE
, &status
);
1180 TEST_ASSERT(status
== U_NO_WRITE_PERMISSION
);
1182 status
= U_ZERO_ERROR
;
1183 ut
= utext_openUnicodeString(ut
, &ustr
, &status
);
1184 TEST_SUCCESS(status
);
1185 ut2
= utext_clone(ut2
, ut
, FALSE
, FALSE
, &status
); // clone with readonly = false
1186 TEST_SUCCESS(status
);
1187 writable
= utext_isWritable(ut2
);
1188 TEST_ASSERT(writable
== TRUE
);
1189 ut2
= utext_clone(ut2
, ut
, FALSE
, TRUE
, &status
); // clone with readonly = true
1190 TEST_SUCCESS(status
);
1191 writable
= utext_isWritable(ut2
);
1192 TEST_ASSERT(writable
== FALSE
);
1193 utext_copy(ut2
, 1, 2, 0, TRUE
, &status
);
1194 TEST_ASSERT(status
== U_NO_WRITE_PERMISSION
);
1196 status
= U_ZERO_ERROR
;
1197 ut
= utext_openConstUnicodeString(ut
, (const UnicodeString
*)&ustr
, &status
);
1198 TEST_SUCCESS(status
);
1199 writable
= utext_isWritable(ut
);
1200 TEST_ASSERT(writable
== FALSE
);
1201 utext_copy(ut
, 1, 2, 0, TRUE
, &status
);
1202 TEST_ASSERT(status
== U_NO_WRITE_PERMISSION
);
1204 // Deep Clone of a frozen UText should re-enable writing in the copy.
1205 status
= U_ZERO_ERROR
;
1206 ut
= utext_openUnicodeString(ut
, &ustr
, &status
);
1207 TEST_SUCCESS(status
);
1209 ut2
= utext_clone(ut2
, ut
, TRUE
, FALSE
, &status
); // deep clone
1210 TEST_SUCCESS(status
);
1211 writable
= utext_isWritable(ut2
);
1212 TEST_ASSERT(writable
== TRUE
);
1215 // Deep clone of a frozen UText, where the base type is intrinsically non-writable,
1216 // should NOT enable writing in the copy.
1217 status
= U_ZERO_ERROR
;
1218 ut
= utext_openUChars(ut
, u16str
, -1, &status
);
1219 TEST_SUCCESS(status
);
1221 ut2
= utext_clone(ut2
, ut
, TRUE
, FALSE
, &status
); // deep clone
1222 TEST_SUCCESS(status
);
1223 writable
= utext_isWritable(ut2
);
1224 TEST_ASSERT(writable
== FALSE
);
1234 // A UText type that works with a chunk size of 1.
1235 // Intended to test for edge cases.
1236 // Input comes from a UnicodeString.
1238 // ut.b the character. Put into both halves.
1242 static UBool U_CALLCONV
1243 fragTextAccess(UText
*ut
, int64_t index
, UBool forward
) {
1244 const UnicodeString
*us
= (const UnicodeString
*)ut
->context
;
1246 int32_t length
= us
->length();
1247 if (forward
&& index
>=0 && index
<length
) {
1248 c
= us
->charAt((int32_t)index
);
1250 ut
->chunkOffset
= 0;
1251 ut
->chunkLength
= 1;
1252 ut
->chunkNativeStart
= index
;
1253 ut
->chunkNativeLimit
= index
+1;
1256 if (!forward
&& index
>0 && index
<=length
) {
1257 c
= us
->charAt((int32_t)index
-1);
1259 ut
->chunkOffset
= 1;
1260 ut
->chunkLength
= 1;
1261 ut
->chunkNativeStart
= index
-1;
1262 ut
->chunkNativeLimit
= index
;
1266 ut
->chunkOffset
= 0;
1267 ut
->chunkLength
= 0;
1269 ut
->chunkNativeStart
= 0;
1270 ut
->chunkNativeLimit
= 0;
1272 ut
->chunkNativeStart
= length
;
1273 ut
->chunkNativeLimit
= length
;
1279 // Function table to be used with this fragmented text provider.
1280 // Initialized in the open function.
1281 UTextFuncs fragmentFuncs
;
1283 // Open function for the fragmented text provider.
1285 openFragmentedUnicodeString(UText
*ut
, UnicodeString
*s
, UErrorCode
*status
) {
1286 ut
= utext_openUnicodeString(ut
, s
, status
);
1287 if (U_FAILURE(*status
)) {
1291 // Copy of the function table from the stock UnicodeString UText,
1292 // and replace the entry for the access function.
1293 memcpy(&fragmentFuncs
, ut
->pFuncs
, sizeof(fragmentFuncs
));
1294 fragmentFuncs
.access
= fragTextAccess
;
1295 ut
->pFuncs
= &fragmentFuncs
;
1297 ut
->chunkContents
= (UChar
*)&ut
->b
;
1298 ut
->pFuncs
->access(ut
, 0, TRUE
);