1 /********************************************************************
3 * Copyright (c) 1997-2008, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 ********************************************************************/
7 #include "unicode/ustring.h"
8 #include "unicode/uchar.h"
9 #include "unicode/uniset.h"
10 #include "unicode/putil.h"
15 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof(array[0]))
17 UnicodeTest::UnicodeTest()
21 UnicodeTest::~UnicodeTest()
25 void UnicodeTest::runIndexedTest( int32_t index
, UBool exec
, const char* &name
, char* /*par*/ )
27 if (exec
) logln("TestSuite UnicodeTest: ");
29 case 0: name
= "TestAdditionalProperties"; if(exec
) TestAdditionalProperties(); break;
30 case 1: name
= "TestBinaryValues"; if(exec
) TestBinaryValues(); break;
31 default: name
= ""; break; //needed to end loop
35 //====================================================
36 // private data used by the tests
37 //====================================================
39 // test DerivedCoreProperties.txt -------------------------------------------
41 // copied from genprops.c
43 getTokenIndex(const char *const tokens
[], int32_t countTokens
, const char *s
) {
47 s
=u_skipWhitespace(s
);
48 for(i
=0; i
<countTokens
; ++i
) {
57 z
=u_skipWhitespace(s
+j
);
58 if(*z
==';' || *z
==0) {
70 static const char *const
71 derivedCorePropsNames
[]={
80 "Default_Ignorable_Code_Point",
82 "Grapheme_Link", /* Unicode 5 moves this property here from PropList.txt */
86 static const UProperty
87 derivedCorePropsIndex
[]={
96 UCHAR_DEFAULT_IGNORABLE_CODE_POINT
,
97 UCHAR_GRAPHEME_EXTEND
,
102 U_CFUNC
void U_CALLCONV
103 derivedCorePropsLineFn(void *context
,
104 char *fields
[][2], int32_t /* fieldCount */,
105 UErrorCode
*pErrorCode
)
107 UnicodeTest
*me
=(UnicodeTest
*)context
;
111 u_parseCodePointRange(fields
[0][0], &start
, &end
, pErrorCode
);
112 if(U_FAILURE(*pErrorCode
)) {
113 me
->errln("UnicodeTest: syntax error in DerivedCoreProperties.txt field 0 at %s\n", fields
[0][0]);
117 /* parse derived binary property name, ignore unknown names */
118 i
=getTokenIndex(derivedCorePropsNames
, LENGTHOF(derivedCorePropsNames
), fields
[1][0]);
120 me
->errln("UnicodeTest warning: unknown property name '%s' in \n", fields
[1][0]);
124 me
->derivedCoreProps
[i
].add(start
, end
);
127 void UnicodeTest::TestAdditionalProperties() {
128 // test DerivedCoreProperties.txt
129 if(LENGTHOF(derivedCoreProps
)<LENGTHOF(derivedCorePropsNames
)) {
130 errln("error: UnicodeTest::derivedCoreProps[] too short, need at least %d UnicodeSets\n",
131 LENGTHOF(derivedCorePropsNames
));
134 if(LENGTHOF(derivedCorePropsIndex
)!=LENGTHOF(derivedCorePropsNames
)) {
135 errln("error in ucdtest.cpp: LENGTHOF(derivedCorePropsIndex)!=LENGTHOF(derivedCorePropsNames)\n");
140 char backupPath
[256];
142 UErrorCode errorCode
=U_ZERO_ERROR
;
144 /* Look inside ICU_DATA first */
145 strcpy(newPath
, pathToDataDirectory());
146 strcat(newPath
, "unidata" U_FILE_SEP_STRING
"DerivedCoreProperties.txt");
148 // As a fallback, try to guess where the source data was located
149 // at the time ICU was built, and look there.
151 strcpy(backupPath
, U_TOPSRCDIR U_FILE_SEP_STRING
"data");
153 strcpy(backupPath
, loadTestData(errorCode
));
154 strcat(backupPath
, U_FILE_SEP_STRING
".." U_FILE_SEP_STRING
".." U_FILE_SEP_STRING
".." U_FILE_SEP_STRING
".." U_FILE_SEP_STRING
"data");
156 strcat(backupPath
, U_FILE_SEP_STRING
);
157 strcat(backupPath
, "unidata" U_FILE_SEP_STRING
"DerivedCoreProperties.txt");
159 u_parseDelimitedFile(newPath
, ';', fields
, 2, derivedCorePropsLineFn
, this, &errorCode
);
161 if(errorCode
==U_FILE_ACCESS_ERROR
) {
162 errorCode
=U_ZERO_ERROR
;
163 u_parseDelimitedFile(backupPath
, ';', fields
, 2, derivedCorePropsLineFn
, this, &errorCode
);
165 if(U_FAILURE(errorCode
)) {
166 errln("error parsing DerivedCoreProperties.txt: %s\n", u_errorName(errorCode
));
170 // now we have all derived core properties in the UnicodeSets
171 // run them all through the API
172 int32_t rangeCount
, range
;
175 int32_t noErrors
= 0;
177 // test all TRUE properties
178 for(i
=0; i
<LENGTHOF(derivedCorePropsNames
); ++i
) {
179 rangeCount
=derivedCoreProps
[i
].getRangeCount();
180 for(range
=0; range
<rangeCount
; ++range
) {
181 start
=derivedCoreProps
[i
].getRangeStart(range
);
182 end
=derivedCoreProps
[i
].getRangeEnd(range
);
183 for(; start
<=end
; ++start
) {
184 if(!u_hasBinaryProperty(start
, derivedCorePropsIndex
[i
])) {
185 errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==FALSE is wrong\n", start
, derivedCorePropsNames
[i
]);
186 if(noErrors
++ > 100) {
187 errln("Too many errors, moving to the next test");
196 // invert all properties
197 for(i
=0; i
<LENGTHOF(derivedCorePropsNames
); ++i
) {
198 derivedCoreProps
[i
].complement();
201 // test all FALSE properties
202 for(i
=0; i
<LENGTHOF(derivedCorePropsNames
); ++i
) {
203 rangeCount
=derivedCoreProps
[i
].getRangeCount();
204 for(range
=0; range
<rangeCount
; ++range
) {
205 start
=derivedCoreProps
[i
].getRangeStart(range
);
206 end
=derivedCoreProps
[i
].getRangeEnd(range
);
207 for(; start
<=end
; ++start
) {
208 if(u_hasBinaryProperty(start
, derivedCorePropsIndex
[i
])) {
209 errln("UnicodeTest error: u_hasBinaryProperty(U+%04lx, %s)==TRUE is wrong\n", start
, derivedCorePropsNames
[i
]);
210 if(noErrors
++ > 100) {
211 errln("Too many errors, moving to the next test");
220 void UnicodeTest::TestBinaryValues() {
222 * Unicode 5.1 explicitly defines binary property value aliases.
223 * Verify that they are all recognized.
225 UErrorCode errorCode
=U_ZERO_ERROR
;
226 UnicodeSet
alpha(UNICODE_STRING_SIMPLE("[:Alphabetic:]"), errorCode
);
227 if(U_FAILURE(errorCode
)) {
228 errln("UnicodeSet([:Alphabetic:]) failed - %s\n", u_errorName(errorCode
));
232 static const char *const falseValues
[]={ "N", "No", "F", "False" };
233 static const char *const trueValues
[]={ "Y", "Yes", "T", "True" };
235 for(i
=0; i
<LENGTHOF(falseValues
); ++i
) {
236 UnicodeString pattern
=UNICODE_STRING_SIMPLE("[:Alphabetic=:]");
237 pattern
.insert(pattern
.length()-2, UnicodeString(falseValues
[i
], -1, US_INV
));
238 errorCode
=U_ZERO_ERROR
;
239 UnicodeSet
set(pattern
, errorCode
);
240 if(U_FAILURE(errorCode
)) {
241 errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", falseValues
[i
], u_errorName(errorCode
));
246 errln("UnicodeSet([:Alphabetic=%s:]).complement()!=UnicodeSet([:Alphabetic:])\n", falseValues
[i
]);
249 for(i
=0; i
<LENGTHOF(trueValues
); ++i
) {
250 UnicodeString pattern
=UNICODE_STRING_SIMPLE("[:Alphabetic=:]");
251 pattern
.insert(pattern
.length()-2, UnicodeString(trueValues
[i
], -1, US_INV
));
252 errorCode
=U_ZERO_ERROR
;
253 UnicodeSet
set(pattern
, errorCode
);
254 if(U_FAILURE(errorCode
)) {
255 errln("UnicodeSet([:Alphabetic=%s:]) failed - %s\n", trueValues
[i
], u_errorName(errorCode
));
259 errln("UnicodeSet([:Alphabetic=%s:])!=UnicodeSet([:Alphabetic:])\n", trueValues
[i
]);