2 **********************************************************************
3 * Copyright (C) 2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * file name: unisetperf.cpp
8 * tab size: 8 (not used)
11 * created on: 2007jan31
12 * created by: Markus Scherer
18 #include "unicode/uperf.h"
19 #include "unicode/uniset.h"
20 #include "unicode/unistr.h"
22 #include "cmemory.h" // for UPRV_LENGTHOF
24 // Command-line options specific to unisetperf.
25 // Options do not have abbreviations: Force readable command lines.
26 // (Using U+0001 for abbreviation characters.)
30 UNISETPERF_OPTIONS_COUNT
33 static UOption options
[UNISETPERF_OPTIONS_COUNT
]={
34 UOPTION_DEF("pattern", '\x01', UOPT_REQUIRES_ARG
),
35 UOPTION_DEF("type", '\x01', UOPT_REQUIRES_ARG
)
38 static const char *const unisetperf_usage
=
39 "\t--pattern UnicodeSet pattern for instantiation.\n"
40 "\t Default: [:ID_Continue:]\n"
41 "\t--type Type of UnicodeSet: slow fast\n"
44 // Test object with setup data.
45 class UnicodeSetPerformanceTest
: public UPerfTest
{
47 UnicodeSetPerformanceTest(int32_t argc
, const char *argv
[], UErrorCode
&status
)
48 : UPerfTest(argc
, argv
, options
, UPRV_LENGTHOF(options
), unisetperf_usage
, status
),
49 utf8(NULL
), utf8Length(0), countInputCodePoints(0), spanCount(0) {
50 if (U_SUCCESS(status
)) {
51 UnicodeString pattern
=UnicodeString(options
[SET_PATTERN
].value
, -1, US_INV
).unescape();
52 set
.applyPattern(pattern
, status
);
54 if(0==strcmp(options
[FAST_TYPE
].value
, "fast")) {
59 UPerfTest::getBuffer(inputLength
, status
);
60 if(U_SUCCESS(status
) && inputLength
>0) {
61 countInputCodePoints
= u_countChar32(buffer
, bufferLen
);
65 // Preflight the UTF-8 length and allocate utf8.
66 u_strToUTF8(NULL
, 0, &utf8Length
, buffer
, bufferLen
, &status
);
67 if(status
==U_BUFFER_OVERFLOW_ERROR
) {
68 utf8
=(char *)malloc(utf8Length
);
71 u_strToUTF8(utf8
, utf8Length
, NULL
, buffer
, bufferLen
, &status
);
73 status
=U_MEMORY_ALLOCATION_ERROR
;
78 printf("code points:%ld len16:%ld len8:%ld spans:%ld "
79 "cp/span:%.3g UChar/span:%.3g B/span:%.3g B/cp:%.3g\n",
80 (long)countInputCodePoints
, (long)bufferLen
, (long)utf8Length
, (long)spanCount
,
81 (double)countInputCodePoints
/spanCount
, (double)bufferLen
/spanCount
, (double)utf8Length
/spanCount
,
82 (double)utf8Length
/countInputCodePoints
);
88 virtual UPerfFunction
* runIndexedTest(int32_t index
, UBool exec
, const char* &name
, char* par
= NULL
);
90 // Count spans of characters that are in the set,
91 // and spans of characters that are not in the set.
92 // If the very first character is in the set, then one additional
93 // not-span is counted.
95 const UChar
*s
=getBuffer();
96 int32_t length
=getBufferLen();
100 i
=span(s
, length
, i
, tf
);
105 int32_t span(const UChar
*s
, int32_t length
, int32_t start
, UBool tf
) const {
108 while((prev
=start
)<length
) {
109 U16_NEXT(s
, start
, length
, c
);
110 if(tf
!=set
.contains(c
)) {
117 const UChar
*getBuffer() const { return buffer
; }
118 int32_t getBufferLen() const { return bufferLen
; }
123 // Number of code points in the input text.
124 int32_t countInputCodePoints
;
128 UnicodeSet prefrozen
;
131 // Performance test function object.
132 class Command
: public UPerfFunction
{
134 Command(const UnicodeSetPerformanceTest
&testcase
) : testcase(testcase
) {}
137 virtual ~Command() {}
139 // virtual void call(UErrorCode* pErrorCode) { ... }
141 virtual long getOperationsPerIteration() {
142 // Number of code points tested:
143 // Input code points, plus one for the end of each span except the last span.
144 return testcase
.countInputCodePoints
+testcase
.spanCount
-1;
147 virtual long getEventsPerIteration() {
148 return testcase
.spanCount
;
151 const UnicodeSetPerformanceTest
&testcase
;
154 class Contains
: public Command
{
156 Contains(const UnicodeSetPerformanceTest
&testcase
) : Command(testcase
) {
157 // Verify that the frozen set is equal to the unfrozen one.
161 for(c
=0; c
<=0x10ffff; ++c
) {
162 if(testcase
.set
.contains(c
)) {
166 if(set
!=testcase
.set
) {
167 fprintf(stderr
, "error: frozen set != original!\n");
171 static UPerfFunction
* get(const UnicodeSetPerformanceTest
&testcase
) {
172 return new Contains(testcase
);
174 virtual void call(UErrorCode
* pErrorCode
) {
175 const UnicodeSet
&set
=testcase
.set
;
176 const UChar
*s
=testcase
.getBuffer();
177 int32_t length
=testcase
.getBufferLen();
182 i
+=span(set
, s
+i
, length
-i
, tf
);
186 if(count
!=testcase
.spanCount
) {
187 fprintf(stderr
, "error: Contains() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
188 (long)count
, (long)testcase
.spanCount
);
191 static int32_t span(const UnicodeSet
&set
, const UChar
*s
, int32_t length
, UBool tf
) {
193 int32_t start
=0, prev
;
194 while((prev
=start
)<length
) {
195 U16_NEXT(s
, start
, length
, c
);
196 if(tf
!=set
.contains(c
)) {
204 class SpanUTF16
: public Command
{
206 SpanUTF16(const UnicodeSetPerformanceTest
&testcase
) : Command(testcase
) {
207 // Verify that the frozen set is equal to the unfrozen one.
212 for(c
=0; c
<=0xffff; ++c
) {
214 if(testcase
.set
.span(utf16
, 1, USET_SPAN_CONTAINED
)>0) {
218 for(c
=0xd800; c
<=0xdbff; ++c
) {
220 for(c2
=0xdc00; c2
<=0xdfff; ++c2
) {
222 if(testcase
.set
.span(utf16
, 2, USET_SPAN_CONTAINED
)>0) {
223 set
.add(U16_GET_SUPPLEMENTARY(c
, c2
));
228 if(set
!=testcase
.set
) {
229 fprintf(stderr
, "error: frozen set != original!\n");
233 static UPerfFunction
* get(const UnicodeSetPerformanceTest
&testcase
) {
234 return new SpanUTF16(testcase
);
236 virtual void call(UErrorCode
* pErrorCode
) {
237 const UnicodeSet
&set
=testcase
.set
;
238 const UChar
*s
=testcase
.getBuffer();
239 int32_t length
=testcase
.getBufferLen();
244 i
+=set
.span(s
+i
, length
-i
, (USetSpanCondition
)tf
);
248 if(count
!=testcase
.spanCount
) {
249 fprintf(stderr
, "error: SpanUTF16() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
250 (long)count
, (long)testcase
.spanCount
);
255 class SpanBackUTF16
: public Command
{
257 SpanBackUTF16(const UnicodeSetPerformanceTest
&testcase
) : Command(testcase
) {
258 // Verify that the frozen set is equal to the unfrozen one.
263 for(c
=0; c
<=0xffff; ++c
) {
265 if(testcase
.set
.spanBack(utf16
, 1, USET_SPAN_CONTAINED
)==0) {
269 for(c
=0xd800; c
<=0xdbff; ++c
) {
271 for(c2
=0xdc00; c2
<=0xdfff; ++c2
) {
273 if(testcase
.set
.spanBack(utf16
, 2, USET_SPAN_CONTAINED
)==0) {
274 set
.add(U16_GET_SUPPLEMENTARY(c
, c2
));
279 if(set
!=testcase
.set
) {
280 fprintf(stderr
, "error: frozen set != original!\n");
284 static UPerfFunction
* get(const UnicodeSetPerformanceTest
&testcase
) {
285 return new SpanBackUTF16(testcase
);
287 virtual void call(UErrorCode
* pErrorCode
) {
288 const UnicodeSet
&set
=testcase
.set
;
289 const UChar
*s
=testcase
.getBuffer();
290 int32_t length
=testcase
.getBufferLen();
293 * Get the same spans as with span() where we always start with a not-contained span.
294 * If testcase.spanCount is an odd number, then the last span() was not-contained.
295 * The last spanBack() must be not-contained to match the first span().
297 UBool tf
=(UBool
)((testcase
.spanCount
&1)==0);
298 while(length
>0 || !tf
) {
299 length
=set
.spanBack(s
, length
, (USetSpanCondition
)tf
);
303 if(count
!=testcase
.spanCount
) {
304 fprintf(stderr
, "error: SpanBackUTF16() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
305 (long)count
, (long)testcase
.spanCount
);
310 class SpanUTF8
: public Command
{
312 SpanUTF8(const UnicodeSetPerformanceTest
&testcase
) : Command(testcase
) {
313 // Verify that the frozen set is equal to the unfrozen one.
319 for(c
=0; c
<=0x10ffff; ++c
) {
324 U8_APPEND_UNSAFE(utf8
, length
, c
);
325 if(testcase
.set
.spanUTF8(utf8
, length
, USET_SPAN_CONTAINED
)>0) {
329 if(set
!=testcase
.set
) {
330 fprintf(stderr
, "error: frozen set != original!\n");
334 static UPerfFunction
* get(const UnicodeSetPerformanceTest
&testcase
) {
335 return new SpanUTF8(testcase
);
337 virtual void call(UErrorCode
* pErrorCode
) {
338 const UnicodeSet
&set
=testcase
.set
;
339 const char *s
=testcase
.utf8
;
340 int32_t length
=testcase
.utf8Length
;
345 i
+=set
.spanUTF8(s
+i
, length
-i
, (USetSpanCondition
)tf
);
349 if(count
!=testcase
.spanCount
) {
350 fprintf(stderr
, "error: SpanUTF8() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
351 (long)count
, (long)testcase
.spanCount
);
356 class SpanBackUTF8
: public Command
{
358 SpanBackUTF8(const UnicodeSetPerformanceTest
&testcase
) : Command(testcase
) {
359 // Verify that the frozen set is equal to the unfrozen one.
365 for(c
=0; c
<=0x10ffff; ++c
) {
370 U8_APPEND_UNSAFE(utf8
, length
, c
);
371 if(testcase
.set
.spanBackUTF8(utf8
, length
, USET_SPAN_CONTAINED
)==0) {
375 if(set
!=testcase
.set
) {
376 fprintf(stderr
, "error: frozen set != original!\n");
380 static UPerfFunction
* get(const UnicodeSetPerformanceTest
&testcase
) {
381 return new SpanBackUTF8(testcase
);
383 virtual void call(UErrorCode
* pErrorCode
) {
384 const UnicodeSet
&set
=testcase
.set
;
385 const char *s
=testcase
.utf8
;
386 int32_t length
=testcase
.utf8Length
;
389 * Get the same spans as with span() where we always start with a not-contained span.
390 * If testcase.spanCount is an odd number, then the last span() was not-contained.
391 * The last spanBack() must be not-contained to match the first span().
393 UBool tf
=(UBool
)((testcase
.spanCount
&1)==0);
394 while(length
>0 || !tf
) {
395 length
=set
.spanBackUTF8(s
, length
, (USetSpanCondition
)tf
);
399 if(count
!=testcase
.spanCount
) {
400 fprintf(stderr
, "error: SpanBackUTF8() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
401 (long)count
, (long)testcase
.spanCount
);
406 UPerfFunction
* UnicodeSetPerformanceTest::runIndexedTest(int32_t index
, UBool exec
, const char* &name
, char* par
) {
408 case 0: name
= "Contains"; if (exec
) return Contains::get(*this); break;
409 case 1: name
= "SpanUTF16"; if (exec
) return SpanUTF16::get(*this); break;
410 case 2: name
= "SpanBackUTF16";if (exec
) return SpanBackUTF16::get(*this); break;
411 case 3: name
= "SpanUTF8"; if (exec
) return SpanUTF8::get(*this); break;
412 case 4: name
= "SpanBackUTF8"; if (exec
) return SpanBackUTF8::get(*this); break;
413 default: name
= ""; break;
418 int main(int argc
, const char *argv
[])
420 // Default values for command-line options.
421 options
[SET_PATTERN
].value
= "[:ID_Continue:]";
422 options
[FAST_TYPE
].value
= "slow";
424 UErrorCode status
= U_ZERO_ERROR
;
425 UnicodeSetPerformanceTest
test(argc
, argv
, status
);
427 if (U_FAILURE(status
)){
428 printf("The error is %s\n", u_errorName(status
));
433 if (test
.run() == FALSE
){
434 fprintf(stderr
, "FAILED: Tests could not be run, please check the "