2 **********************************************************************
3 * Copyright (C) 2007, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * file name: unisetperf.cpp
8 * tab size: 8 (not used)
11 * created on: 2007jan31
12 * created by: Markus Scherer
18 #include "unicode/uperf.h"
19 #include "unicode/uniset.h"
20 #include "unicode/unistr.h"
23 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
25 // Command-line options specific to unisetperf.
26 // Options do not have abbreviations: Force readable command lines.
27 // (Using U+0001 for abbreviation characters.)
31 UNISETPERF_OPTIONS_COUNT
34 static UOption options
[UNISETPERF_OPTIONS_COUNT
]={
35 UOPTION_DEF("pattern", '\x01', UOPT_REQUIRES_ARG
),
36 UOPTION_DEF("type", '\x01', UOPT_REQUIRES_ARG
)
39 static const char *const unisetperf_usage
=
40 "\t--pattern UnicodeSet pattern for instantiation.\n"
41 "\t Default: [:ID_Continue:]\n"
42 "\t--type Type of UnicodeSet: slow fast\n"
45 // Test object with setup data.
46 class UnicodeSetPerformanceTest
: public UPerfTest
{
48 UnicodeSetPerformanceTest(int32_t argc
, const char *argv
[], UErrorCode
&status
)
49 : UPerfTest(argc
, argv
, options
, LENGTHOF(options
), unisetperf_usage
, status
),
50 utf8(NULL
), utf8Length(0), countInputCodePoints(0), spanCount(0) {
51 if (U_SUCCESS(status
)) {
52 UnicodeString pattern
=UnicodeString(options
[SET_PATTERN
].value
, -1, US_INV
).unescape();
53 set
.applyPattern(pattern
, status
);
55 if(0==strcmp(options
[FAST_TYPE
].value
, "fast")) {
60 UPerfTest::getBuffer(inputLength
, status
);
61 if(U_SUCCESS(status
) && inputLength
>0) {
62 countInputCodePoints
= u_countChar32(buffer
, bufferLen
);
66 // Preflight the UTF-8 length and allocate utf8.
67 u_strToUTF8(NULL
, 0, &utf8Length
, buffer
, bufferLen
, &status
);
68 if(status
==U_BUFFER_OVERFLOW_ERROR
) {
69 utf8
=(char *)malloc(utf8Length
);
72 u_strToUTF8(utf8
, utf8Length
, NULL
, buffer
, bufferLen
, &status
);
74 status
=U_MEMORY_ALLOCATION_ERROR
;
79 printf("code points:%ld len16:%ld len8:%ld spans:%ld "
80 "cp/span:%.3g UChar/span:%.3g B/span:%.3g B/cp:%.3g\n",
81 (long)countInputCodePoints
, (long)bufferLen
, (long)utf8Length
, (long)spanCount
,
82 (double)countInputCodePoints
/spanCount
, (double)bufferLen
/spanCount
, (double)utf8Length
/spanCount
,
83 (double)utf8Length
/countInputCodePoints
);
89 virtual UPerfFunction
* runIndexedTest(int32_t index
, UBool exec
, const char* &name
, char* par
= NULL
);
91 // Count spans of characters that are in the set,
92 // and spans of characters that are not in the set.
93 // If the very first character is in the set, then one additional
94 // not-span is counted.
96 const UChar
*s
=getBuffer();
97 int32_t length
=getBufferLen();
101 i
=span(s
, length
, i
, tf
);
106 int32_t span(const UChar
*s
, int32_t length
, int32_t start
, UBool tf
) const {
109 while((prev
=start
)<length
) {
110 U16_NEXT(s
, start
, length
, c
);
111 if(tf
!=set
.contains(c
)) {
118 const UChar
*getBuffer() const { return buffer
; }
119 int32_t getBufferLen() const { return bufferLen
; }
124 // Number of code points in the input text.
125 int32_t countInputCodePoints
;
129 UnicodeSet prefrozen
;
132 // Performance test function object.
133 class Command
: public UPerfFunction
{
135 Command(const UnicodeSetPerformanceTest
&testcase
) : testcase(testcase
) {}
138 virtual ~Command() {}
140 // virtual void call(UErrorCode* pErrorCode) { ... }
142 virtual long getOperationsPerIteration() {
143 // Number of code points tested:
144 // Input code points, plus one for the end of each span except the last span.
145 return testcase
.countInputCodePoints
+testcase
.spanCount
-1;
148 virtual long getEventsPerIteration() {
149 return testcase
.spanCount
;
152 const UnicodeSetPerformanceTest
&testcase
;
155 class Contains
: public Command
{
157 Contains(const UnicodeSetPerformanceTest
&testcase
) : Command(testcase
) {
158 // Verify that the frozen set is equal to the unfrozen one.
162 for(c
=0; c
<=0x10ffff; ++c
) {
163 if(testcase
.set
.contains(c
)) {
167 if(set
!=testcase
.set
) {
168 fprintf(stderr
, "error: frozen set != original!\n");
172 static UPerfFunction
* get(const UnicodeSetPerformanceTest
&testcase
) {
173 return new Contains(testcase
);
175 virtual void call(UErrorCode
* pErrorCode
) {
176 const UnicodeSet
&set
=testcase
.set
;
177 const UChar
*s
=testcase
.getBuffer();
178 int32_t length
=testcase
.getBufferLen();
183 i
+=span(set
, s
+i
, length
-i
, tf
);
187 if(count
!=testcase
.spanCount
) {
188 fprintf(stderr
, "error: Contains() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
189 (long)count
, (long)testcase
.spanCount
);
192 static int32_t span(const UnicodeSet
&set
, const UChar
*s
, int32_t length
, UBool tf
) {
194 int32_t start
=0, prev
;
195 while((prev
=start
)<length
) {
196 U16_NEXT(s
, start
, length
, c
);
197 if(tf
!=set
.contains(c
)) {
205 class SpanUTF16
: public Command
{
207 SpanUTF16(const UnicodeSetPerformanceTest
&testcase
) : Command(testcase
) {
208 // Verify that the frozen set is equal to the unfrozen one.
213 for(c
=0; c
<=0xffff; ++c
) {
215 if(testcase
.set
.span(utf16
, 1, USET_SPAN_CONTAINED
)>0) {
219 for(c
=0xd800; c
<=0xdbff; ++c
) {
221 for(c2
=0xdc00; c2
<=0xdfff; ++c2
) {
223 if(testcase
.set
.span(utf16
, 2, USET_SPAN_CONTAINED
)>0) {
224 set
.add(U16_GET_SUPPLEMENTARY(c
, c2
));
229 if(set
!=testcase
.set
) {
230 fprintf(stderr
, "error: frozen set != original!\n");
234 static UPerfFunction
* get(const UnicodeSetPerformanceTest
&testcase
) {
235 return new SpanUTF16(testcase
);
237 virtual void call(UErrorCode
* pErrorCode
) {
238 const UnicodeSet
&set
=testcase
.set
;
239 const UChar
*s
=testcase
.getBuffer();
240 int32_t length
=testcase
.getBufferLen();
245 i
+=set
.span(s
+i
, length
-i
, (USetSpanCondition
)tf
);
249 if(count
!=testcase
.spanCount
) {
250 fprintf(stderr
, "error: SpanUTF16() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
251 (long)count
, (long)testcase
.spanCount
);
256 class SpanBackUTF16
: public Command
{
258 SpanBackUTF16(const UnicodeSetPerformanceTest
&testcase
) : Command(testcase
) {
259 // Verify that the frozen set is equal to the unfrozen one.
264 for(c
=0; c
<=0xffff; ++c
) {
266 if(testcase
.set
.spanBack(utf16
, 1, USET_SPAN_CONTAINED
)==0) {
270 for(c
=0xd800; c
<=0xdbff; ++c
) {
272 for(c2
=0xdc00; c2
<=0xdfff; ++c2
) {
274 if(testcase
.set
.spanBack(utf16
, 2, USET_SPAN_CONTAINED
)==0) {
275 set
.add(U16_GET_SUPPLEMENTARY(c
, c2
));
280 if(set
!=testcase
.set
) {
281 fprintf(stderr
, "error: frozen set != original!\n");
285 static UPerfFunction
* get(const UnicodeSetPerformanceTest
&testcase
) {
286 return new SpanBackUTF16(testcase
);
288 virtual void call(UErrorCode
* pErrorCode
) {
289 const UnicodeSet
&set
=testcase
.set
;
290 const UChar
*s
=testcase
.getBuffer();
291 int32_t length
=testcase
.getBufferLen();
294 * Get the same spans as with span() where we always start with a not-contained span.
295 * If testcase.spanCount is an odd number, then the last span() was not-contained.
296 * The last spanBack() must be not-contained to match the first span().
298 UBool tf
=(UBool
)((testcase
.spanCount
&1)==0);
299 while(length
>0 || !tf
) {
300 length
=set
.spanBack(s
, length
, (USetSpanCondition
)tf
);
304 if(count
!=testcase
.spanCount
) {
305 fprintf(stderr
, "error: SpanBackUTF16() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
306 (long)count
, (long)testcase
.spanCount
);
311 class SpanUTF8
: public Command
{
313 SpanUTF8(const UnicodeSetPerformanceTest
&testcase
) : Command(testcase
) {
314 // Verify that the frozen set is equal to the unfrozen one.
320 for(c
=0; c
<=0x10ffff; ++c
) {
325 U8_APPEND_UNSAFE(utf8
, length
, c
);
326 if(testcase
.set
.spanUTF8(utf8
, length
, USET_SPAN_CONTAINED
)>0) {
330 if(set
!=testcase
.set
) {
331 fprintf(stderr
, "error: frozen set != original!\n");
335 static UPerfFunction
* get(const UnicodeSetPerformanceTest
&testcase
) {
336 return new SpanUTF8(testcase
);
338 virtual void call(UErrorCode
* pErrorCode
) {
339 const UnicodeSet
&set
=testcase
.set
;
340 const char *s
=testcase
.utf8
;
341 int32_t length
=testcase
.utf8Length
;
346 i
+=set
.spanUTF8(s
+i
, length
-i
, (USetSpanCondition
)tf
);
350 if(count
!=testcase
.spanCount
) {
351 fprintf(stderr
, "error: SpanUTF8() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
352 (long)count
, (long)testcase
.spanCount
);
357 class SpanBackUTF8
: public Command
{
359 SpanBackUTF8(const UnicodeSetPerformanceTest
&testcase
) : Command(testcase
) {
360 // Verify that the frozen set is equal to the unfrozen one.
366 for(c
=0; c
<=0x10ffff; ++c
) {
371 U8_APPEND_UNSAFE(utf8
, length
, c
);
372 if(testcase
.set
.spanBackUTF8(utf8
, length
, USET_SPAN_CONTAINED
)==0) {
376 if(set
!=testcase
.set
) {
377 fprintf(stderr
, "error: frozen set != original!\n");
381 static UPerfFunction
* get(const UnicodeSetPerformanceTest
&testcase
) {
382 return new SpanBackUTF8(testcase
);
384 virtual void call(UErrorCode
* pErrorCode
) {
385 const UnicodeSet
&set
=testcase
.set
;
386 const char *s
=testcase
.utf8
;
387 int32_t length
=testcase
.utf8Length
;
390 * Get the same spans as with span() where we always start with a not-contained span.
391 * If testcase.spanCount is an odd number, then the last span() was not-contained.
392 * The last spanBack() must be not-contained to match the first span().
394 UBool tf
=(UBool
)((testcase
.spanCount
&1)==0);
395 while(length
>0 || !tf
) {
396 length
=set
.spanBackUTF8(s
, length
, (USetSpanCondition
)tf
);
400 if(count
!=testcase
.spanCount
) {
401 fprintf(stderr
, "error: SpanBackUTF8() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
402 (long)count
, (long)testcase
.spanCount
);
407 UPerfFunction
* UnicodeSetPerformanceTest::runIndexedTest(int32_t index
, UBool exec
, const char* &name
, char* par
) {
409 case 0: name
= "Contains"; if (exec
) return Contains::get(*this); break;
410 case 1: name
= "SpanUTF16"; if (exec
) return SpanUTF16::get(*this); break;
411 case 2: name
= "SpanBackUTF16";if (exec
) return SpanBackUTF16::get(*this); break;
412 case 3: name
= "SpanUTF8"; if (exec
) return SpanUTF8::get(*this); break;
413 case 4: name
= "SpanBackUTF8"; if (exec
) return SpanBackUTF8::get(*this); break;
414 default: name
= ""; break;
419 int main(int argc
, const char *argv
[])
421 // Default values for command-line options.
422 options
[SET_PATTERN
].value
= "[:ID_Continue:]";
423 options
[FAST_TYPE
].value
= "slow";
425 UErrorCode status
= U_ZERO_ERROR
;
426 UnicodeSetPerformanceTest
test(argc
, argv
, status
);
428 if (U_FAILURE(status
)){
429 printf("The error is %s\n", u_errorName(status
));
434 if (test
.run() == FALSE
){
435 fprintf(stderr
, "FAILED: Tests could not be run, please check the "