2 **********************************************************************
3 * Copyright (C) 2002-2007, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * file name: utfperf.cpp
8 * tab size: 8 (not used)
11 * created on: 2005Nov17
12 * created by: Raymond Yang
14 * Ported from utfper.c created by Markus W. Scherer
15 * Performance test program for Unicode converters
20 #include "unicode/uperf.h"
23 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
25 /* definitions and text buffers */
27 #define INPUT_CAPACITY (1024*1024)
28 #define INTERMEDIATE_CAPACITY 4096
29 #define INTERMEDIATE_SMALL_CAPACITY 20
30 #define PIVOT_CAPACITY 1024
31 #define OUTPUT_CAPACITY INPUT_CAPACITY
33 static char utf8
[INPUT_CAPACITY
];
34 static UChar pivot
[INTERMEDIATE_CAPACITY
];
36 static UChar output
[OUTPUT_CAPACITY
];
37 static char intermediate
[OUTPUT_CAPACITY
];
39 static int32_t utf8Length
, encodedLength
, outputLength
, countInputCodePoints
;
41 static int32_t fromUCallbackCount
;
43 // Command-line options specific to utfperf.
44 // Options do not have abbreviations: Force readable command lines.
45 // (Using U+0001 for abbreviation characters.)
53 static UOption options
[UTFPERF_OPTIONS_COUNT
]={
54 UOPTION_DEF("charset", '\x01', UOPT_REQUIRES_ARG
),
55 UOPTION_DEF("chunk", '\x01', UOPT_REQUIRES_ARG
),
56 UOPTION_DEF("pivot", '\x01', UOPT_REQUIRES_ARG
)
59 static const char *const utfperf_usage
=
60 "\t--charset Charset for which to test performance, e.g. windows-1251.\n"
62 "\t--chunk Length (in bytes) of charset output chunks. [4096]\n"
63 "\t--pivot Length (in UChars) of the UTF-16 pivot buffer, if applicable.\n"
67 class UtfPerformanceTest
: public UPerfTest
{
69 UtfPerformanceTest(int32_t argc
, const char *argv
[], UErrorCode
&status
)
70 : UPerfTest(argc
, argv
, options
, LENGTHOF(options
), utfperf_usage
, status
) {
71 if (U_SUCCESS(status
)) {
72 charset
= options
[CHARSET
].value
;
74 chunkLength
= atoi(options
[CHUNK_LENGTH
].value
);
75 if (chunkLength
< 1 || OUTPUT_CAPACITY
< chunkLength
) {
76 fprintf(stderr
, "error: chunk length must be 1..%ld\n", (long)OUTPUT_CAPACITY
);
77 status
= U_ILLEGAL_ARGUMENT_ERROR
;
80 pivotLength
= atoi(options
[PIVOT_LENGTH
].value
);
81 if (pivotLength
< 1 || PIVOT_CAPACITY
< pivotLength
) {
82 fprintf(stderr
, "error: pivot length must be 1..%ld\n", (long)PIVOT_CAPACITY
);
83 status
= U_ILLEGAL_ARGUMENT_ERROR
;
87 UPerfTest::getBuffer(inputLength
, status
);
88 countInputCodePoints
= u_countChar32(buffer
, bufferLen
);
89 u_strToUTF8(utf8
, (int32_t)sizeof(utf8
), &utf8Length
, buffer
, bufferLen
, &status
);
93 virtual UPerfFunction
* runIndexedTest(int32_t index
, UBool exec
, const char* &name
, char* par
= NULL
);
95 const UChar
*getBuffer() const { return buffer
; }
96 int32_t getBufferLen() const { return bufferLen
; }
99 int32_t chunkLength
, pivotLength
;
103 // Custom callback for counting callback calls.
104 static void U_CALLCONV
105 fromUCallback(const void *context
,
106 UConverterFromUnicodeArgs
*fromUArgs
,
107 const UChar
*codeUnits
,
110 UConverterCallbackReason reason
,
111 UErrorCode
*pErrorCode
) {
112 if (reason
<= UCNV_IRREGULAR
) {
113 ++fromUCallbackCount
;
115 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context
, fromUArgs
, codeUnits
, length
, codePoint
, reason
, pErrorCode
);
119 // Base class for Roundtrip, FromUnicode and FromUTF8 with common setup.
120 class Command
: public UPerfFunction
{
122 Command(const UtfPerformanceTest
&testcase
)
123 : testcase(testcase
),
124 input(testcase
.getBuffer()), inputLength(testcase
.getBufferLen()),
125 errorCode(U_ZERO_ERROR
) {
126 cnv
=ucnv_open(testcase
.charset
, &errorCode
);
127 if (U_FAILURE(errorCode
)) {
128 fprintf(stderr
, "error opening converter for \"%s\" - %s\n", testcase
.charset
, u_errorName(errorCode
));
130 ucnv_setFromUCallBack(cnv
, fromUCallback
, NULL
, NULL
, NULL
, &errorCode
);
134 if(U_SUCCESS(errorCode
)) {
138 // virtual void call(UErrorCode* pErrorCode) { ... }
139 virtual long getOperationsPerIteration(){
140 return countInputCodePoints
;
143 const UtfPerformanceTest
&testcase
;
146 UErrorCode errorCode
;
150 // Test roundtrip UTF-16->encoding->UTF-16.
151 class Roundtrip
: public Command
{
153 Roundtrip(const UtfPerformanceTest
&testcase
) : Command(testcase
) {}
155 static UPerfFunction
* get(const UtfPerformanceTest
&testcase
) {
156 Roundtrip
* t
= new Roundtrip(testcase
);
157 if (U_SUCCESS(t
->errorCode
)){
164 virtual void call(UErrorCode
* pErrorCode
){
165 const UChar
*pIn
, *pInLimit
;
166 UChar
*pOut
, *pOutLimit
;
167 char *pInter
, *pInterLimit
;
172 fromUCallbackCount
=0;
175 pInLimit
=input
+inputLength
;
178 pOutLimit
=output
+OUTPUT_CAPACITY
;
180 pInterLimit
=intermediate
+testcase
.chunkLength
;
182 encodedLength
=outputLength
=0;
186 /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */
188 ucnv_fromUnicode(cnv
, &pInter
, pInterLimit
, &pIn
, pInLimit
, NULL
, TRUE
, pErrorCode
);
189 encodedLength
+=(int32_t)(pInter
-intermediate
);
191 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
192 /* make sure that we convert once more to really flush */
193 *pErrorCode
=U_ZERO_ERROR
;
194 } else if(U_FAILURE(*pErrorCode
)) {
196 } else if(pIn
==pInLimit
) {
200 /* convert the block [intermediate..pInter[ back to UTF-16 */
202 ucnv_toUnicode(cnv
, &pOut
, pOutLimit
,&p
, pInter
,NULL
, flush
,pErrorCode
);
203 if(U_FAILURE(*pErrorCode
)) {
206 /* intermediate must have been consumed (p==pInter) because of the converter semantics */
209 outputLength
=pOut
-output
;
210 if(inputLength
!=outputLength
) {
211 fprintf(stderr
, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength
, outputLength
);
212 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
;
217 // Test one-way conversion UTF-16->encoding.
218 class FromUnicode
: public Command
{
220 FromUnicode(const UtfPerformanceTest
&testcase
) : Command(testcase
) {}
222 static UPerfFunction
* get(const UtfPerformanceTest
&testcase
) {
223 FromUnicode
* t
= new FromUnicode(testcase
);
224 if (U_SUCCESS(t
->errorCode
)){
231 virtual void call(UErrorCode
* pErrorCode
){
232 const UChar
*pIn
, *pInLimit
;
233 char *pInter
, *pInterLimit
;
235 ucnv_resetFromUnicode(cnv
);
236 fromUCallbackCount
=0;
239 pInLimit
=input
+inputLength
;
241 pInterLimit
=intermediate
+testcase
.chunkLength
;
247 ucnv_fromUnicode(cnv
, &pInter
, pInterLimit
, &pIn
, pInLimit
, NULL
, TRUE
, pErrorCode
);
248 encodedLength
+=(int32_t)(pInter
-intermediate
);
250 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
251 /* make sure that we convert once more to really flush */
252 *pErrorCode
=U_ZERO_ERROR
;
253 } else if(U_FAILURE(*pErrorCode
)) {
262 // Test one-way conversion UTF-8->encoding.
263 class FromUTF8
: public Command
{
265 FromUTF8(const UtfPerformanceTest
&testcase
)
268 input8(utf8
), input8Length(utf8Length
) {
269 utf8Cnv
=ucnv_open("UTF-8", &errorCode
);
272 static UPerfFunction
* get(const UtfPerformanceTest
&testcase
) {
273 FromUTF8
* t
= new FromUTF8(testcase
);
274 if (U_SUCCESS(t
->errorCode
)){
284 virtual void call(UErrorCode
* pErrorCode
){
285 const char *pIn
, *pInLimit
;
286 char *pInter
, *pInterLimit
;
287 UChar
*pivotSource
, *pivotTarget
, *pivotLimit
;
289 ucnv_resetToUnicode(utf8Cnv
);
290 ucnv_resetFromUnicode(cnv
);
291 fromUCallbackCount
=0;
294 pInLimit
=input8
+input8Length
;
296 pInterLimit
=intermediate
+testcase
.chunkLength
;
298 pivotSource
=pivotTarget
=pivot
;
299 pivotLimit
=pivot
+testcase
.pivotLength
;
305 ucnv_convertEx(cnv
, utf8Cnv
,
306 &pInter
, pInterLimit
,
308 pivot
, &pivotSource
, &pivotTarget
, pivotLimit
,
309 FALSE
, TRUE
, pErrorCode
);
310 encodedLength
+=(int32_t)(pInter
-intermediate
);
312 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) {
313 /* make sure that we convert once more to really flush */
314 *pErrorCode
=U_ZERO_ERROR
;
315 } else if(U_FAILURE(*pErrorCode
)) {
325 int32_t input8Length
;
328 UPerfFunction
* UtfPerformanceTest::runIndexedTest(int32_t index
, UBool exec
, const char* &name
, char* par
) {
330 case 0: name
= "Roundtrip"; if (exec
) return Roundtrip::get(*this); break;
331 case 1: name
= "FromUnicode"; if (exec
) return FromUnicode::get(*this); break;
332 case 2: name
= "FromUTF8"; if (exec
) return FromUTF8::get(*this); break;
333 default: name
= ""; break;
338 int main(int argc
, const char *argv
[])
340 // Default values for command-line options.
341 options
[CHARSET
].value
= "UTF-8";
342 options
[CHUNK_LENGTH
].value
= "4096";
343 options
[PIVOT_LENGTH
].value
= "1024";
345 UErrorCode status
= U_ZERO_ERROR
;
346 UtfPerformanceTest
test(argc
, argv
, status
);
348 if (U_FAILURE(status
)){
349 printf("The error is %s\n", u_errorName(status
));
354 if (test
.run() == FALSE
){
355 fprintf(stderr
, "FAILED: Tests could not be run please check the "
360 if (fromUCallbackCount
> 0) {
361 printf("Number of fromUnicode callback calls in the last iteration: %ld\n", (long)fromUCallbackCount
);