2 **********************************************************************
3 * Copyright (C) 2002-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * file name: utfperf.cpp
8 * tab size: 8 (not used)
11 * created on: 2005Nov17
12 * created by: Raymond Yang
14 * Ported from utfper.c created by Markus W. Scherer
15 * Performance test program for Unicode converters
20 #include "unicode/uperf.h"
21 #include "cmemory.h" // for UPRV_LENGTHOF
24 /* definitions and text buffers */
26 #define INPUT_CAPACITY (1024*1024)
27 #define INTERMEDIATE_CAPACITY 4096
28 #define INTERMEDIATE_SMALL_CAPACITY 20
29 #define PIVOT_CAPACITY 1024
30 #define OUTPUT_CAPACITY INPUT_CAPACITY
32 static char utf8
[INPUT_CAPACITY
];
33 static UChar pivot
[INTERMEDIATE_CAPACITY
];
35 static UChar output
[OUTPUT_CAPACITY
];
36 static char intermediate
[OUTPUT_CAPACITY
];
38 static int32_t utf8Length
, encodedLength
, outputLength
, countInputCodePoints
;
40 static int32_t fromUCallbackCount
;
42 // Command-line options specific to utfperf.
43 // Options do not have abbreviations: Force readable command lines.
44 // (Using U+0001 for abbreviation characters.)
52 static UOption options
[UTFPERF_OPTIONS_COUNT
]={
53 UOPTION_DEF("charset", '\x01', UOPT_REQUIRES_ARG
),
54 UOPTION_DEF("chunk", '\x01', UOPT_REQUIRES_ARG
),
55 UOPTION_DEF("pivot", '\x01', UOPT_REQUIRES_ARG
)
58 static const char *const utfperf_usage
=
59 "\t--charset Charset for which to test performance, e.g. windows-1251.\n"
61 "\t--chunk Length (in bytes) of charset output chunks. [4096]\n"
62 "\t--pivot Length (in UChars) of the UTF-16 pivot buffer, if applicable.\n"
66 class UtfPerformanceTest
: public UPerfTest
{
68 UtfPerformanceTest(int32_t argc
, const char *argv
[], UErrorCode
&status
)
69 : UPerfTest(argc
, argv
, options
, UPRV_LENGTHOF(options
), utfperf_usage
, status
) {
70 if (U_SUCCESS(status
)) {
71 charset
= options
[CHARSET
].value
;
73 chunkLength
= atoi(options
[CHUNK_LENGTH
].value
);
74 if (chunkLength
< 1 || OUTPUT_CAPACITY
< chunkLength
) {
75 fprintf(stderr
, "error: chunk length must be 1..%ld\n", (long)OUTPUT_CAPACITY
);
76 status
= U_ILLEGAL_ARGUMENT_ERROR
;
79 pivotLength
= atoi(options
[PIVOT_LENGTH
].value
);
80 if (pivotLength
< 1 || PIVOT_CAPACITY
< pivotLength
) {
81 fprintf(stderr
, "error: pivot length must be 1..%ld\n", (long)PIVOT_CAPACITY
);
82 status
= U_ILLEGAL_ARGUMENT_ERROR
;
86 UPerfTest::getBuffer(inputLength
, status
);
87 countInputCodePoints
= u_countChar32(buffer
, bufferLen
);
88 u_strToUTF8(utf8
, (int32_t)sizeof(utf8
), &utf8Length
, buffer
, bufferLen
, &status
);
92 virtual UPerfFunction
* runIndexedTest(int32_t index
, UBool exec
, const char* &name
, char* par
= NULL
);
94 const UChar
*getBuffer() const { return buffer
; }
95 int32_t getBufferLen() const { return bufferLen
; }
98 int32_t chunkLength
, pivotLength
;
102 // Custom callback for counting callback calls.
103 static void U_CALLCONV
104 fromUCallback(const void *context
,
105 UConverterFromUnicodeArgs
*fromUArgs
,
106 const UChar
*codeUnits
,
109 UConverterCallbackReason reason
,
110 UErrorCode
*pErrorCode
) {
111 if (reason
<= UCNV_IRREGULAR
) {
112 ++fromUCallbackCount
;
114 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context
, fromUArgs
, codeUnits
, length
, codePoint
, reason
, pErrorCode
);
118 // Base class for Roundtrip, FromUnicode and FromUTF8 with common setup.
119 class Command
: public UPerfFunction
{
121 Command(const UtfPerformanceTest
&testcase
)
122 : testcase(testcase
),
123 input(testcase
.getBuffer()), inputLength(testcase
.getBufferLen()),
124 errorCode(U_ZERO_ERROR
) {
125 cnv
=ucnv_open(testcase
.charset
, &errorCode
);
126 if (U_FAILURE(errorCode
)) {
127 fprintf(stderr
, "error opening converter for \"%s\" - %s\n", testcase
.charset
, u_errorName(errorCode
));
129 ucnv_setFromUCallBack(cnv
, fromUCallback
, NULL
, NULL
, NULL
, &errorCode
);
133 if(U_SUCCESS(errorCode
)) {
137 // virtual void call(UErrorCode* pErrorCode) { ... }
138 virtual long getOperationsPerIteration(){
139 return countInputCodePoints
;
142 const UtfPerformanceTest
&testcase
;
145 UErrorCode errorCode
;
149 // Test roundtrip UTF-16->encoding->UTF-16.
150 class Roundtrip
: public Command
{
152 Roundtrip(const UtfPerformanceTest
&testcase
) : Command(testcase
) {}
154 static UPerfFunction
* get(const UtfPerformanceTest
&testcase
) {
155 Roundtrip
* t
= new Roundtrip(testcase
);
156 if (U_SUCCESS(t
->errorCode
)){
163 virtual void call(UErrorCode
* pErrorCode
){
164 const UChar
*pIn
, *pInLimit
;
165 UChar
*pOut
, *pOutLimit
;
166 char *pInter
, *pInterLimit
;
171 fromUCallbackCount
=0;
174 pInLimit
=input
+inputLength
;
177 pOutLimit
=output
+OUTPUT_CAPACITY
;
179 pInterLimit
=intermediate
+testcase
.chunkLength
;
181 encodedLength
=outputLength
=0;
185 /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */
187 ucnv_fromUnicode(cnv
, &pInter
, pInterLimit
, &pIn
, pInLimit
, NULL
, TRUE
, pErrorCode
);
188 encodedLength
+=(int32_t)(pInter
-intermediate
);
190 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
191 /* make sure that we convert once more to really flush */
192 *pErrorCode
=U_ZERO_ERROR
;
193 } else if(U_FAILURE(*pErrorCode
)) {
195 } else if(pIn
==pInLimit
) {
199 /* convert the block [intermediate..pInter[ back to UTF-16 */
201 ucnv_toUnicode(cnv
, &pOut
, pOutLimit
,&p
, pInter
,NULL
, flush
,pErrorCode
);
202 if(U_FAILURE(*pErrorCode
)) {
205 /* intermediate must have been consumed (p==pInter) because of the converter semantics */
208 outputLength
=pOut
-output
;
209 if(inputLength
!=outputLength
) {
210 fprintf(stderr
, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength
, outputLength
);
211 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
216 // Test one-way conversion UTF-16->encoding.
217 class FromUnicode
: public Command
{
219 FromUnicode(const UtfPerformanceTest
&testcase
) : Command(testcase
) {}
221 static UPerfFunction
* get(const UtfPerformanceTest
&testcase
) {
222 FromUnicode
* t
= new FromUnicode(testcase
);
223 if (U_SUCCESS(t
->errorCode
)){
230 virtual void call(UErrorCode
* pErrorCode
){
231 const UChar
*pIn
, *pInLimit
;
232 char *pInter
, *pInterLimit
;
234 ucnv_resetFromUnicode(cnv
);
235 fromUCallbackCount
=0;
238 pInLimit
=input
+inputLength
;
240 pInterLimit
=intermediate
+testcase
.chunkLength
;
246 ucnv_fromUnicode(cnv
, &pInter
, pInterLimit
, &pIn
, pInLimit
, NULL
, TRUE
, pErrorCode
);
247 encodedLength
+=(int32_t)(pInter
-intermediate
);
249 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
250 /* make sure that we convert once more to really flush */
251 *pErrorCode
=U_ZERO_ERROR
;
252 } else if(U_FAILURE(*pErrorCode
)) {
261 // Test one-way conversion UTF-8->encoding.
262 class FromUTF8
: public Command
{
264 FromUTF8(const UtfPerformanceTest
&testcase
)
267 input8(utf8
), input8Length(utf8Length
) {
268 utf8Cnv
=ucnv_open("UTF-8", &errorCode
);
271 static UPerfFunction
* get(const UtfPerformanceTest
&testcase
) {
272 FromUTF8
* t
= new FromUTF8(testcase
);
273 if (U_SUCCESS(t
->errorCode
)){
283 virtual void call(UErrorCode
* pErrorCode
){
284 const char *pIn
, *pInLimit
;
285 char *pInter
, *pInterLimit
;
286 UChar
*pivotSource
, *pivotTarget
, *pivotLimit
;
288 ucnv_resetToUnicode(utf8Cnv
);
289 ucnv_resetFromUnicode(cnv
);
290 fromUCallbackCount
=0;
293 pInLimit
=input8
+input8Length
;
295 pInterLimit
=intermediate
+testcase
.chunkLength
;
297 pivotSource
=pivotTarget
=pivot
;
298 pivotLimit
=pivot
+testcase
.pivotLength
;
304 ucnv_convertEx(cnv
, utf8Cnv
,
305 &pInter
, pInterLimit
,
307 pivot
, &pivotSource
, &pivotTarget
, pivotLimit
,
308 FALSE
, TRUE
, pErrorCode
);
309 encodedLength
+=(int32_t)(pInter
-intermediate
);
311 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
312 /* make sure that we convert once more to really flush */
313 *pErrorCode
=U_ZERO_ERROR
;
314 } else if(U_FAILURE(*pErrorCode
)) {
324 int32_t input8Length
;
327 UPerfFunction
* UtfPerformanceTest::runIndexedTest(int32_t index
, UBool exec
, const char* &name
, char* par
) {
329 case 0: name
= "Roundtrip"; if (exec
) return Roundtrip::get(*this); break;
330 case 1: name
= "FromUnicode"; if (exec
) return FromUnicode::get(*this); break;
331 case 2: name
= "FromUTF8"; if (exec
) return FromUTF8::get(*this); break;
332 default: name
= ""; break;
337 int main(int argc
, const char *argv
[])
339 // Default values for command-line options.
340 options
[CHARSET
].value
= "UTF-8";
341 options
[CHUNK_LENGTH
].value
= "4096";
342 options
[PIVOT_LENGTH
].value
= "1024";
344 UErrorCode status
= U_ZERO_ERROR
;
345 UtfPerformanceTest
test(argc
, argv
, status
);
347 if (U_FAILURE(status
)){
348 printf("The error is %s\n", u_errorName(status
));
353 if (test
.run() == FALSE
){
354 fprintf(stderr
, "FAILED: Tests could not be run please check the "
359 if (fromUCallbackCount
> 0) {
360 printf("Number of fromUnicode callback calls in the last iteration: %ld\n", (long)fromUCallbackCount
);