2 **************************************************************************
3 * © 2016 and later: Unicode, Inc. and others.
4 * License & terms of use: http://www.unicode.org/copyright.html#License
5 *************************************************************************
6 *************************************************************************
7 * Copyright (C) 2002-2014, International Business Machines
8 * Corporation and others. All Rights Reserved.
9 *************************************************************************
10 * file name: utfperf.cpp
12 * tab size: 8 (not used)
15 * created on: 2005Nov17
16 * created by: Raymond Yang
18 * Ported from utfper.c created by Markus W. Scherer
19 * Performance test program for Unicode converters
24 #include "unicode/uperf.h"
25 #include "cmemory.h" // for UPRV_LENGTHOF
28 /* definitions and text buffers */
30 #define INPUT_CAPACITY (1024*1024)
31 #define INTERMEDIATE_CAPACITY 4096
32 #define INTERMEDIATE_SMALL_CAPACITY 20
33 #define PIVOT_CAPACITY 1024
34 #define OUTPUT_CAPACITY INPUT_CAPACITY
36 static char utf8
[INPUT_CAPACITY
];
37 static UChar pivot
[INTERMEDIATE_CAPACITY
];
39 static UChar output
[OUTPUT_CAPACITY
];
40 static char intermediate
[OUTPUT_CAPACITY
];
42 static int32_t utf8Length
, encodedLength
, outputLength
, countInputCodePoints
;
44 static int32_t fromUCallbackCount
;
46 // Command-line options specific to utfperf.
47 // Options do not have abbreviations: Force readable command lines.
48 // (Using U+0001 for abbreviation characters.)
56 static UOption options
[UTFPERF_OPTIONS_COUNT
]={
57 UOPTION_DEF("charset", '\x01', UOPT_REQUIRES_ARG
),
58 UOPTION_DEF("chunk", '\x01', UOPT_REQUIRES_ARG
),
59 UOPTION_DEF("pivot", '\x01', UOPT_REQUIRES_ARG
)
62 static const char *const utfperf_usage
=
63 "\t--charset Charset for which to test performance, e.g. windows-1251.\n"
65 "\t--chunk Length (in bytes) of charset output chunks. [4096]\n"
66 "\t--pivot Length (in UChars) of the UTF-16 pivot buffer, if applicable.\n"
70 class UtfPerformanceTest
: public UPerfTest
{
72 UtfPerformanceTest(int32_t argc
, const char *argv
[], UErrorCode
&status
)
73 : UPerfTest(argc
, argv
, options
, UPRV_LENGTHOF(options
), utfperf_usage
, status
) {
74 if (U_SUCCESS(status
)) {
75 charset
= options
[CHARSET
].value
;
77 chunkLength
= atoi(options
[CHUNK_LENGTH
].value
);
78 if (chunkLength
< 1 || OUTPUT_CAPACITY
< chunkLength
) {
79 fprintf(stderr
, "error: chunk length must be 1..%ld\n", (long)OUTPUT_CAPACITY
);
80 status
= U_ILLEGAL_ARGUMENT_ERROR
;
83 pivotLength
= atoi(options
[PIVOT_LENGTH
].value
);
84 if (pivotLength
< 1 || PIVOT_CAPACITY
< pivotLength
) {
85 fprintf(stderr
, "error: pivot length must be 1..%ld\n", (long)PIVOT_CAPACITY
);
86 status
= U_ILLEGAL_ARGUMENT_ERROR
;
90 UPerfTest::getBuffer(inputLength
, status
);
91 countInputCodePoints
= u_countChar32(buffer
, bufferLen
);
92 u_strToUTF8(utf8
, (int32_t)sizeof(utf8
), &utf8Length
, buffer
, bufferLen
, &status
);
96 virtual UPerfFunction
* runIndexedTest(int32_t index
, UBool exec
, const char* &name
, char* par
= NULL
);
98 const UChar
*getBuffer() const { return buffer
; }
99 int32_t getBufferLen() const { return bufferLen
; }
102 int32_t chunkLength
, pivotLength
;
106 // Custom callback for counting callback calls.
107 static void U_CALLCONV
108 fromUCallback(const void *context
,
109 UConverterFromUnicodeArgs
*fromUArgs
,
110 const UChar
*codeUnits
,
113 UConverterCallbackReason reason
,
114 UErrorCode
*pErrorCode
) {
115 if (reason
<= UCNV_IRREGULAR
) {
116 ++fromUCallbackCount
;
118 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context
, fromUArgs
, codeUnits
, length
, codePoint
, reason
, pErrorCode
);
122 // Base class for Roundtrip, FromUnicode and FromUTF8 with common setup.
123 class Command
: public UPerfFunction
{
125 Command(const UtfPerformanceTest
&testcase
)
126 : testcase(testcase
),
127 input(testcase
.getBuffer()), inputLength(testcase
.getBufferLen()),
128 errorCode(U_ZERO_ERROR
) {
129 cnv
=ucnv_open(testcase
.charset
, &errorCode
);
130 if (U_FAILURE(errorCode
)) {
131 fprintf(stderr
, "error opening converter for \"%s\" - %s\n", testcase
.charset
, u_errorName(errorCode
));
133 ucnv_setFromUCallBack(cnv
, fromUCallback
, NULL
, NULL
, NULL
, &errorCode
);
137 if(U_SUCCESS(errorCode
)) {
141 // virtual void call(UErrorCode* pErrorCode) { ... }
142 virtual long getOperationsPerIteration(){
143 return countInputCodePoints
;
146 const UtfPerformanceTest
&testcase
;
149 UErrorCode errorCode
;
153 // Test roundtrip UTF-16->encoding->UTF-16.
154 class Roundtrip
: public Command
{
156 Roundtrip(const UtfPerformanceTest
&testcase
) : Command(testcase
) {}
158 static UPerfFunction
* get(const UtfPerformanceTest
&testcase
) {
159 Roundtrip
* t
= new Roundtrip(testcase
);
160 if (U_SUCCESS(t
->errorCode
)){
167 virtual void call(UErrorCode
* pErrorCode
){
168 const UChar
*pIn
, *pInLimit
;
169 UChar
*pOut
, *pOutLimit
;
170 char *pInter
, *pInterLimit
;
175 fromUCallbackCount
=0;
178 pInLimit
=input
+inputLength
;
181 pOutLimit
=output
+OUTPUT_CAPACITY
;
183 pInterLimit
=intermediate
+testcase
.chunkLength
;
185 encodedLength
=outputLength
=0;
189 /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */
191 ucnv_fromUnicode(cnv
, &pInter
, pInterLimit
, &pIn
, pInLimit
, NULL
, TRUE
, pErrorCode
);
192 encodedLength
+=(int32_t)(pInter
-intermediate
);
194 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
195 /* make sure that we convert once more to really flush */
196 *pErrorCode
=U_ZERO_ERROR
;
197 } else if(U_FAILURE(*pErrorCode
)) {
199 } else if(pIn
==pInLimit
) {
203 /* convert the block [intermediate..pInter[ back to UTF-16 */
205 ucnv_toUnicode(cnv
, &pOut
, pOutLimit
,&p
, pInter
,NULL
, flush
,pErrorCode
);
206 if(U_FAILURE(*pErrorCode
)) {
209 /* intermediate must have been consumed (p==pInter) because of the converter semantics */
212 outputLength
=pOut
-output
;
213 if(inputLength
!=outputLength
) {
214 fprintf(stderr
, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength
, outputLength
);
215 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
220 // Test one-way conversion UTF-16->encoding.
221 class FromUnicode
: public Command
{
223 FromUnicode(const UtfPerformanceTest
&testcase
) : Command(testcase
) {}
225 static UPerfFunction
* get(const UtfPerformanceTest
&testcase
) {
226 FromUnicode
* t
= new FromUnicode(testcase
);
227 if (U_SUCCESS(t
->errorCode
)){
234 virtual void call(UErrorCode
* pErrorCode
){
235 const UChar
*pIn
, *pInLimit
;
236 char *pInter
, *pInterLimit
;
238 ucnv_resetFromUnicode(cnv
);
239 fromUCallbackCount
=0;
242 pInLimit
=input
+inputLength
;
244 pInterLimit
=intermediate
+testcase
.chunkLength
;
250 ucnv_fromUnicode(cnv
, &pInter
, pInterLimit
, &pIn
, pInLimit
, NULL
, TRUE
, pErrorCode
);
251 encodedLength
+=(int32_t)(pInter
-intermediate
);
253 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
254 /* make sure that we convert once more to really flush */
255 *pErrorCode
=U_ZERO_ERROR
;
256 } else if(U_FAILURE(*pErrorCode
)) {
265 // Test one-way conversion UTF-8->encoding.
266 class FromUTF8
: public Command
{
268 FromUTF8(const UtfPerformanceTest
&testcase
)
271 input8(utf8
), input8Length(utf8Length
) {
272 utf8Cnv
=ucnv_open("UTF-8", &errorCode
);
275 static UPerfFunction
* get(const UtfPerformanceTest
&testcase
) {
276 FromUTF8
* t
= new FromUTF8(testcase
);
277 if (U_SUCCESS(t
->errorCode
)){
287 virtual void call(UErrorCode
* pErrorCode
){
288 const char *pIn
, *pInLimit
;
289 char *pInter
, *pInterLimit
;
290 UChar
*pivotSource
, *pivotTarget
, *pivotLimit
;
292 ucnv_resetToUnicode(utf8Cnv
);
293 ucnv_resetFromUnicode(cnv
);
294 fromUCallbackCount
=0;
297 pInLimit
=input8
+input8Length
;
299 pInterLimit
=intermediate
+testcase
.chunkLength
;
301 pivotSource
=pivotTarget
=pivot
;
302 pivotLimit
=pivot
+testcase
.pivotLength
;
308 ucnv_convertEx(cnv
, utf8Cnv
,
309 &pInter
, pInterLimit
,
311 pivot
, &pivotSource
, &pivotTarget
, pivotLimit
,
312 FALSE
, TRUE
, pErrorCode
);
313 encodedLength
+=(int32_t)(pInter
-intermediate
);
315 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
316 /* make sure that we convert once more to really flush */
317 *pErrorCode
=U_ZERO_ERROR
;
318 } else if(U_FAILURE(*pErrorCode
)) {
328 int32_t input8Length
;
331 UPerfFunction
* UtfPerformanceTest::runIndexedTest(int32_t index
, UBool exec
, const char* &name
, char* par
) {
333 case 0: name
= "Roundtrip"; if (exec
) return Roundtrip::get(*this); break;
334 case 1: name
= "FromUnicode"; if (exec
) return FromUnicode::get(*this); break;
335 case 2: name
= "FromUTF8"; if (exec
) return FromUTF8::get(*this); break;
336 default: name
= ""; break;
341 int main(int argc
, const char *argv
[])
343 // Default values for command-line options.
344 options
[CHARSET
].value
= "UTF-8";
345 options
[CHUNK_LENGTH
].value
= "4096";
346 options
[PIVOT_LENGTH
].value
= "1024";
348 UErrorCode status
= U_ZERO_ERROR
;
349 UtfPerformanceTest
test(argc
, argv
, status
);
351 if (U_FAILURE(status
)){
352 printf("The error is %s\n", u_errorName(status
));
357 if (test
.run() == FALSE
){
358 fprintf(stderr
, "FAILED: Tests could not be run please check the "
363 if (fromUCallbackCount
> 0) {
364 printf("Number of fromUnicode callback calls in the last iteration: %ld\n", (long)fromUCallbackCount
);