2 **************************************************************************
3 * © 2016 and later: Unicode, Inc. and others.
4 * License & terms of use: http://www.unicode.org/copyright.html#License
5 **************************************************************************
6 **************************************************************************
7 * Copyright (C) 2014, International Business Machines
8 * Corporation and others. All Rights Reserved.
9 **************************************************************************
10 * file name: unisetperf.cpp
12 * tab size: 8 (not used)
15 * created on: 2007jan31
16 * created by: Markus Scherer
22 #include "unicode/uperf.h"
23 #include "unicode/uniset.h"
24 #include "unicode/unistr.h"
26 #include "cmemory.h" // for UPRV_LENGTHOF
28 // Command-line options specific to unisetperf.
29 // Options do not have abbreviations: Force readable command lines.
30 // (Using U+0001 for abbreviation characters.)
34 UNISETPERF_OPTIONS_COUNT
37 static UOption options
[UNISETPERF_OPTIONS_COUNT
]={
38 UOPTION_DEF("pattern", '\x01', UOPT_REQUIRES_ARG
),
39 UOPTION_DEF("type", '\x01', UOPT_REQUIRES_ARG
)
42 static const char *const unisetperf_usage
=
43 "\t--pattern UnicodeSet pattern for instantiation.\n"
44 "\t Default: [:ID_Continue:]\n"
45 "\t--type Type of UnicodeSet: slow fast\n"
48 // Test object with setup data.
49 class UnicodeSetPerformanceTest
: public UPerfTest
{
51 UnicodeSetPerformanceTest(int32_t argc
, const char *argv
[], UErrorCode
&status
)
52 : UPerfTest(argc
, argv
, options
, UPRV_LENGTHOF(options
), unisetperf_usage
, status
),
53 utf8(NULL
), utf8Length(0), countInputCodePoints(0), spanCount(0) {
54 if (U_SUCCESS(status
)) {
55 UnicodeString pattern
=UnicodeString(options
[SET_PATTERN
].value
, -1, US_INV
).unescape();
56 set
.applyPattern(pattern
, status
);
58 if(0==strcmp(options
[FAST_TYPE
].value
, "fast")) {
63 UPerfTest::getBuffer(inputLength
, status
);
64 if(U_SUCCESS(status
) && inputLength
>0) {
65 countInputCodePoints
= u_countChar32(buffer
, bufferLen
);
69 // Preflight the UTF-8 length and allocate utf8.
70 u_strToUTF8(NULL
, 0, &utf8Length
, buffer
, bufferLen
, &status
);
71 if(status
==U_BUFFER_OVERFLOW_ERROR
) {
72 utf8
=(char *)malloc(utf8Length
);
75 u_strToUTF8(utf8
, utf8Length
, NULL
, buffer
, bufferLen
, &status
);
77 status
=U_MEMORY_ALLOCATION_ERROR
;
82 printf("code points:%ld len16:%ld len8:%ld spans:%ld "
83 "cp/span:%.3g UChar/span:%.3g B/span:%.3g B/cp:%.3g\n",
84 (long)countInputCodePoints
, (long)bufferLen
, (long)utf8Length
, (long)spanCount
,
85 (double)countInputCodePoints
/spanCount
, (double)bufferLen
/spanCount
, (double)utf8Length
/spanCount
,
86 (double)utf8Length
/countInputCodePoints
);
92 virtual UPerfFunction
* runIndexedTest(int32_t index
, UBool exec
, const char* &name
, char* par
= NULL
);
94 // Count spans of characters that are in the set,
95 // and spans of characters that are not in the set.
96 // If the very first character is in the set, then one additional
97 // not-span is counted.
99 const UChar
*s
=getBuffer();
100 int32_t length
=getBufferLen();
104 i
=span(s
, length
, i
, tf
);
109 int32_t span(const UChar
*s
, int32_t length
, int32_t start
, UBool tf
) const {
112 while((prev
=start
)<length
) {
113 U16_NEXT(s
, start
, length
, c
);
114 if(tf
!=set
.contains(c
)) {
121 const UChar
*getBuffer() const { return buffer
; }
122 int32_t getBufferLen() const { return bufferLen
; }
127 // Number of code points in the input text.
128 int32_t countInputCodePoints
;
132 UnicodeSet prefrozen
;
135 // Performance test function object.
136 class Command
: public UPerfFunction
{
138 Command(const UnicodeSetPerformanceTest
&testcase
) : testcase(testcase
) {}
141 virtual ~Command() {}
143 // virtual void call(UErrorCode* pErrorCode) { ... }
145 virtual long getOperationsPerIteration() {
146 // Number of code points tested:
147 // Input code points, plus one for the end of each span except the last span.
148 return testcase
.countInputCodePoints
+testcase
.spanCount
-1;
151 virtual long getEventsPerIteration() {
152 return testcase
.spanCount
;
155 const UnicodeSetPerformanceTest
&testcase
;
158 class Contains
: public Command
{
160 Contains(const UnicodeSetPerformanceTest
&testcase
) : Command(testcase
) {
161 // Verify that the frozen set is equal to the unfrozen one.
165 for(c
=0; c
<=0x10ffff; ++c
) {
166 if(testcase
.set
.contains(c
)) {
170 if(set
!=testcase
.set
) {
171 fprintf(stderr
, "error: frozen set != original!\n");
175 static UPerfFunction
* get(const UnicodeSetPerformanceTest
&testcase
) {
176 return new Contains(testcase
);
178 virtual void call(UErrorCode
* pErrorCode
) {
179 const UnicodeSet
&set
=testcase
.set
;
180 const UChar
*s
=testcase
.getBuffer();
181 int32_t length
=testcase
.getBufferLen();
186 i
+=span(set
, s
+i
, length
-i
, tf
);
190 if(count
!=testcase
.spanCount
) {
191 fprintf(stderr
, "error: Contains() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
192 (long)count
, (long)testcase
.spanCount
);
195 static int32_t span(const UnicodeSet
&set
, const UChar
*s
, int32_t length
, UBool tf
) {
197 int32_t start
=0, prev
;
198 while((prev
=start
)<length
) {
199 U16_NEXT(s
, start
, length
, c
);
200 if(tf
!=set
.contains(c
)) {
208 class SpanUTF16
: public Command
{
210 SpanUTF16(const UnicodeSetPerformanceTest
&testcase
) : Command(testcase
) {
211 // Verify that the frozen set is equal to the unfrozen one.
216 for(c
=0; c
<=0xffff; ++c
) {
218 if(testcase
.set
.span(utf16
, 1, USET_SPAN_CONTAINED
)>0) {
222 for(c
=0xd800; c
<=0xdbff; ++c
) {
224 for(c2
=0xdc00; c2
<=0xdfff; ++c2
) {
226 if(testcase
.set
.span(utf16
, 2, USET_SPAN_CONTAINED
)>0) {
227 set
.add(U16_GET_SUPPLEMENTARY(c
, c2
));
232 if(set
!=testcase
.set
) {
233 fprintf(stderr
, "error: frozen set != original!\n");
237 static UPerfFunction
* get(const UnicodeSetPerformanceTest
&testcase
) {
238 return new SpanUTF16(testcase
);
240 virtual void call(UErrorCode
* pErrorCode
) {
241 const UnicodeSet
&set
=testcase
.set
;
242 const UChar
*s
=testcase
.getBuffer();
243 int32_t length
=testcase
.getBufferLen();
248 i
+=set
.span(s
+i
, length
-i
, (USetSpanCondition
)tf
);
252 if(count
!=testcase
.spanCount
) {
253 fprintf(stderr
, "error: SpanUTF16() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
254 (long)count
, (long)testcase
.spanCount
);
259 class SpanBackUTF16
: public Command
{
261 SpanBackUTF16(const UnicodeSetPerformanceTest
&testcase
) : Command(testcase
) {
262 // Verify that the frozen set is equal to the unfrozen one.
267 for(c
=0; c
<=0xffff; ++c
) {
269 if(testcase
.set
.spanBack(utf16
, 1, USET_SPAN_CONTAINED
)==0) {
273 for(c
=0xd800; c
<=0xdbff; ++c
) {
275 for(c2
=0xdc00; c2
<=0xdfff; ++c2
) {
277 if(testcase
.set
.spanBack(utf16
, 2, USET_SPAN_CONTAINED
)==0) {
278 set
.add(U16_GET_SUPPLEMENTARY(c
, c2
));
283 if(set
!=testcase
.set
) {
284 fprintf(stderr
, "error: frozen set != original!\n");
288 static UPerfFunction
* get(const UnicodeSetPerformanceTest
&testcase
) {
289 return new SpanBackUTF16(testcase
);
291 virtual void call(UErrorCode
* pErrorCode
) {
292 const UnicodeSet
&set
=testcase
.set
;
293 const UChar
*s
=testcase
.getBuffer();
294 int32_t length
=testcase
.getBufferLen();
297 * Get the same spans as with span() where we always start with a not-contained span.
298 * If testcase.spanCount is an odd number, then the last span() was not-contained.
299 * The last spanBack() must be not-contained to match the first span().
301 UBool tf
=(UBool
)((testcase
.spanCount
&1)==0);
302 while(length
>0 || !tf
) {
303 length
=set
.spanBack(s
, length
, (USetSpanCondition
)tf
);
307 if(count
!=testcase
.spanCount
) {
308 fprintf(stderr
, "error: SpanBackUTF16() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
309 (long)count
, (long)testcase
.spanCount
);
314 class SpanUTF8
: public Command
{
316 SpanUTF8(const UnicodeSetPerformanceTest
&testcase
) : Command(testcase
) {
317 // Verify that the frozen set is equal to the unfrozen one.
323 for(c
=0; c
<=0x10ffff; ++c
) {
328 U8_APPEND_UNSAFE(utf8
, length
, c
);
329 if(testcase
.set
.spanUTF8(utf8
, length
, USET_SPAN_CONTAINED
)>0) {
333 if(set
!=testcase
.set
) {
334 fprintf(stderr
, "error: frozen set != original!\n");
338 static UPerfFunction
* get(const UnicodeSetPerformanceTest
&testcase
) {
339 return new SpanUTF8(testcase
);
341 virtual void call(UErrorCode
* pErrorCode
) {
342 const UnicodeSet
&set
=testcase
.set
;
343 const char *s
=testcase
.utf8
;
344 int32_t length
=testcase
.utf8Length
;
349 i
+=set
.spanUTF8(s
+i
, length
-i
, (USetSpanCondition
)tf
);
353 if(count
!=testcase
.spanCount
) {
354 fprintf(stderr
, "error: SpanUTF8() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
355 (long)count
, (long)testcase
.spanCount
);
360 class SpanBackUTF8
: public Command
{
362 SpanBackUTF8(const UnicodeSetPerformanceTest
&testcase
) : Command(testcase
) {
363 // Verify that the frozen set is equal to the unfrozen one.
369 for(c
=0; c
<=0x10ffff; ++c
) {
374 U8_APPEND_UNSAFE(utf8
, length
, c
);
375 if(testcase
.set
.spanBackUTF8(utf8
, length
, USET_SPAN_CONTAINED
)==0) {
379 if(set
!=testcase
.set
) {
380 fprintf(stderr
, "error: frozen set != original!\n");
384 static UPerfFunction
* get(const UnicodeSetPerformanceTest
&testcase
) {
385 return new SpanBackUTF8(testcase
);
387 virtual void call(UErrorCode
* pErrorCode
) {
388 const UnicodeSet
&set
=testcase
.set
;
389 const char *s
=testcase
.utf8
;
390 int32_t length
=testcase
.utf8Length
;
393 * Get the same spans as with span() where we always start with a not-contained span.
394 * If testcase.spanCount is an odd number, then the last span() was not-contained.
395 * The last spanBack() must be not-contained to match the first span().
397 UBool tf
=(UBool
)((testcase
.spanCount
&1)==0);
398 while(length
>0 || !tf
) {
399 length
=set
.spanBackUTF8(s
, length
, (USetSpanCondition
)tf
);
403 if(count
!=testcase
.spanCount
) {
404 fprintf(stderr
, "error: SpanBackUTF8() count=%ld != %ld=UnicodeSetPerformanceTest.spanCount\n",
405 (long)count
, (long)testcase
.spanCount
);
410 UPerfFunction
* UnicodeSetPerformanceTest::runIndexedTest(int32_t index
, UBool exec
, const char* &name
, char* par
) {
412 case 0: name
= "Contains"; if (exec
) return Contains::get(*this); break;
413 case 1: name
= "SpanUTF16"; if (exec
) return SpanUTF16::get(*this); break;
414 case 2: name
= "SpanBackUTF16";if (exec
) return SpanBackUTF16::get(*this); break;
415 case 3: name
= "SpanUTF8"; if (exec
) return SpanUTF8::get(*this); break;
416 case 4: name
= "SpanBackUTF8"; if (exec
) return SpanBackUTF8::get(*this); break;
417 default: name
= ""; break;
422 int main(int argc
, const char *argv
[])
424 // Default values for command-line options.
425 options
[SET_PATTERN
].value
= "[:ID_Continue:]";
426 options
[FAST_TYPE
].value
= "slow";
428 UErrorCode status
= U_ZERO_ERROR
;
429 UnicodeSetPerformanceTest
test(argc
, argv
, status
);
431 if (U_FAILURE(status
)){
432 printf("The error is %s\n", u_errorName(status
));
437 if (test
.run() == FALSE
){
438 fprintf(stderr
, "FAILED: Tests could not be run, please check the "