]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/perf/utfperf/utfperf.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / test / perf / utfperf / utfperf.cpp
CommitLineData
73c04bcf 1/*
f3c0d7a5
A
2**************************************************************************
3 * © 2016 and later: Unicode, Inc. and others.
4 * License & terms of use: http://www.unicode.org/copyright.html#License
5 *************************************************************************
6 *************************************************************************
b331163b 7 * Copyright (C) 2002-2014, International Business Machines
46f4442e 8 * Corporation and others. All Rights Reserved.
f3c0d7a5 9 *************************************************************************
46f4442e 10 * file name: utfperf.cpp
f3c0d7a5 11 * encoding: UTF-8
46f4442e
A
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2005Nov17
16 * created by: Raymond Yang
17 *
18 * Ported from utfper.c created by Markus W. Scherer
19 * Performance test program for Unicode converters
20 */
73c04bcf
A
21
22#include <stdio.h>
46f4442e 23#include <stdlib.h>
73c04bcf 24#include "unicode/uperf.h"
b331163b 25#include "cmemory.h" // for UPRV_LENGTHOF
46f4442e 26#include "uoptions.h"
73c04bcf 27
73c04bcf
A
28/* definitions and text buffers */
29
30#define INPUT_CAPACITY (1024*1024)
31#define INTERMEDIATE_CAPACITY 4096
32#define INTERMEDIATE_SMALL_CAPACITY 20
46f4442e 33#define PIVOT_CAPACITY 1024
73c04bcf
A
34#define OUTPUT_CAPACITY INPUT_CAPACITY
35
46f4442e
A
36static char utf8[INPUT_CAPACITY];
37static UChar pivot[INTERMEDIATE_CAPACITY];
38
73c04bcf 39static UChar output[OUTPUT_CAPACITY];
46f4442e
A
40static char intermediate[OUTPUT_CAPACITY];
41
42static int32_t utf8Length, encodedLength, outputLength, countInputCodePoints;
43
44static int32_t fromUCallbackCount;
45
46// Command-line options specific to utfperf.
47// Options do not have abbreviations: Force readable command lines.
48// (Using U+0001 for abbreviation characters.)
49enum {
50 CHARSET,
51 CHUNK_LENGTH,
52 PIVOT_LENGTH,
53 UTFPERF_OPTIONS_COUNT
54};
55
56static UOption options[UTFPERF_OPTIONS_COUNT]={
57 UOPTION_DEF("charset", '\x01', UOPT_REQUIRES_ARG),
58 UOPTION_DEF("chunk", '\x01', UOPT_REQUIRES_ARG),
59 UOPTION_DEF("pivot", '\x01', UOPT_REQUIRES_ARG)
60};
61
62static const char *const utfperf_usage =
63 "\t--charset Charset for which to test performance, e.g. windows-1251.\n"
64 "\t Default: UTF-8\n"
65 "\t--chunk Length (in bytes) of charset output chunks. [4096]\n"
66 "\t--pivot Length (in UChars) of the UTF-16 pivot buffer, if applicable.\n"
67 "\t [1024]\n";
68
69// Test object.
70class UtfPerformanceTest : public UPerfTest{
71public:
72 UtfPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
b331163b 73 : UPerfTest(argc, argv, options, UPRV_LENGTHOF(options), utfperf_usage, status) {
46f4442e
A
74 if (U_SUCCESS(status)) {
75 charset = options[CHARSET].value;
76
77 chunkLength = atoi(options[CHUNK_LENGTH].value);
78 if (chunkLength < 1 || OUTPUT_CAPACITY < chunkLength) {
79 fprintf(stderr, "error: chunk length must be 1..%ld\n", (long)OUTPUT_CAPACITY);
80 status = U_ILLEGAL_ARGUMENT_ERROR;
81 }
82
83 pivotLength = atoi(options[PIVOT_LENGTH].value);
84 if (pivotLength < 1 || PIVOT_CAPACITY < pivotLength) {
85 fprintf(stderr, "error: pivot length must be 1..%ld\n", (long)PIVOT_CAPACITY);
86 status = U_ILLEGAL_ARGUMENT_ERROR;
87 }
88
89 int32_t inputLength;
90 UPerfTest::getBuffer(inputLength, status);
91 countInputCodePoints = u_countChar32(buffer, bufferLen);
92 u_strToUTF8(utf8, (int32_t)sizeof(utf8), &utf8Length, buffer, bufferLen, &status);
93 }
94 }
73c04bcf 95
46f4442e
A
96 virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL);
97
98 const UChar *getBuffer() const { return buffer; }
99 int32_t getBufferLen() const { return bufferLen; }
100
101 const char *charset;
102 int32_t chunkLength, pivotLength;
103};
73c04bcf 104
46f4442e
A
105U_CDECL_BEGIN
106// Custom callback for counting callback calls.
107static void U_CALLCONV
108fromUCallback(const void *context,
109 UConverterFromUnicodeArgs *fromUArgs,
110 const UChar *codeUnits,
111 int32_t length,
112 UChar32 codePoint,
113 UConverterCallbackReason reason,
114 UErrorCode *pErrorCode) {
115 if (reason <= UCNV_IRREGULAR) {
116 ++fromUCallbackCount;
117 }
118 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, pErrorCode);
119}
120U_CDECL_END
73c04bcf 121
46f4442e 122// Base class for Roundtrip, FromUnicode and FromUTF8 with common setup.
73c04bcf 123class Command : public UPerfFunction {
46f4442e
A
124protected:
125 Command(const UtfPerformanceTest &testcase)
126 : testcase(testcase),
127 input(testcase.getBuffer()), inputLength(testcase.getBufferLen()),
128 errorCode(U_ZERO_ERROR) {
129 cnv=ucnv_open(testcase.charset, &errorCode);
130 if (U_FAILURE(errorCode)) {
131 fprintf(stderr, "error opening converter for \"%s\" - %s\n", testcase.charset, u_errorName(errorCode));
132 }
133 ucnv_setFromUCallBack(cnv, fromUCallback, NULL, NULL, NULL, &errorCode);
73c04bcf
A
134 }
135public:
46f4442e
A
136 virtual ~Command(){
137 if(U_SUCCESS(errorCode)) {
138 ucnv_close(cnv);
139 }
140 }
141 // virtual void call(UErrorCode* pErrorCode) { ... }
142 virtual long getOperationsPerIteration(){
143 return countInputCodePoints;
144 }
145
146 const UtfPerformanceTest &testcase;
147 const UChar *input;
148 int32_t inputLength;
149 UErrorCode errorCode;
150 UConverter *cnv;
151};
152
153// Test roundtrip UTF-16->encoding->UTF-16.
154class Roundtrip : public Command {
155protected:
156 Roundtrip(const UtfPerformanceTest &testcase) : Command(testcase) {}
157public:
158 static UPerfFunction* get(const UtfPerformanceTest &testcase) {
159 Roundtrip * t = new Roundtrip(testcase);
73c04bcf
A
160 if (U_SUCCESS(t->errorCode)){
161 return t;
162 } else {
73c04bcf
A
163 delete t;
164 return NULL;
165 }
166 }
73c04bcf
A
167 virtual void call(UErrorCode* pErrorCode){
168 const UChar *pIn, *pInLimit;
169 UChar *pOut, *pOutLimit;
170 char *pInter, *pInterLimit;
171 const char *p;
172 UBool flush;
173
174 ucnv_reset(cnv);
46f4442e 175 fromUCallbackCount=0;
73c04bcf
A
176
177 pIn=input;
178 pInLimit=input+inputLength;
179
180 pOut=output;
181 pOutLimit=output+OUTPUT_CAPACITY;
182
46f4442e 183 pInterLimit=intermediate+testcase.chunkLength;
73c04bcf
A
184
185 encodedLength=outputLength=0;
186 flush=FALSE;
187
46f4442e 188 do {
73c04bcf
A
189 /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */
190 pInter=intermediate;
46f4442e 191 ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
73c04bcf
A
192 encodedLength+=(int32_t)(pInter-intermediate);
193
194 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
46f4442e 195 /* make sure that we convert once more to really flush */
73c04bcf
A
196 *pErrorCode=U_ZERO_ERROR;
197 } else if(U_FAILURE(*pErrorCode)) {
198 return;
46f4442e
A
199 } else if(pIn==pInLimit) {
200 flush=TRUE;
73c04bcf
A
201 }
202
203 /* convert the block [intermediate..pInter[ back to UTF-16 */
204 p=intermediate;
205 ucnv_toUnicode(cnv, &pOut, pOutLimit,&p, pInter,NULL, flush,pErrorCode);
206 if(U_FAILURE(*pErrorCode)) {
207 return;
208 }
209 /* intermediate must have been consumed (p==pInter) because of the converter semantics */
46f4442e 210 } while(!flush);
73c04bcf
A
211
212 outputLength=pOut-output;
213 if(inputLength!=outputLength) {
214 fprintf(stderr, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength, outputLength);
215 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
216 }
217 }
46f4442e
A
218};
219
220// Test one-way conversion UTF-16->encoding.
221class FromUnicode : public Command {
222protected:
223 FromUnicode(const UtfPerformanceTest &testcase) : Command(testcase) {}
224public:
225 static UPerfFunction* get(const UtfPerformanceTest &testcase) {
226 FromUnicode * t = new FromUnicode(testcase);
227 if (U_SUCCESS(t->errorCode)){
228 return t;
229 } else {
230 delete t;
231 return NULL;
232 }
233 }
234 virtual void call(UErrorCode* pErrorCode){
235 const UChar *pIn, *pInLimit;
236 char *pInter, *pInterLimit;
237
238 ucnv_resetFromUnicode(cnv);
239 fromUCallbackCount=0;
240
241 pIn=input;
242 pInLimit=input+inputLength;
243
244 pInterLimit=intermediate+testcase.chunkLength;
245
246 encodedLength=0;
247
248 for(;;) {
249 pInter=intermediate;
250 ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
251 encodedLength+=(int32_t)(pInter-intermediate);
252
253 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
254 /* make sure that we convert once more to really flush */
255 *pErrorCode=U_ZERO_ERROR;
256 } else if(U_FAILURE(*pErrorCode)) {
257 return;
258 } else {
259 break; // all done
260 }
261 }
73c04bcf 262 }
73c04bcf
A
263};
264
46f4442e
A
265// Test one-way conversion UTF-8->encoding.
266class FromUTF8 : public Command {
267protected:
268 FromUTF8(const UtfPerformanceTest &testcase)
269 : Command(testcase),
270 utf8Cnv(NULL),
271 input8(utf8), input8Length(utf8Length) {
272 utf8Cnv=ucnv_open("UTF-8", &errorCode);
273 }
73c04bcf 274public:
46f4442e
A
275 static UPerfFunction* get(const UtfPerformanceTest &testcase) {
276 FromUTF8 * t = new FromUTF8(testcase);
277 if (U_SUCCESS(t->errorCode)){
278 return t;
279 } else {
280 delete t;
281 return NULL;
73c04bcf 282 }
73c04bcf 283 }
46f4442e
A
284 ~FromUTF8() {
285 ucnv_close(utf8Cnv);
286 }
287 virtual void call(UErrorCode* pErrorCode){
288 const char *pIn, *pInLimit;
289 char *pInter, *pInterLimit;
290 UChar *pivotSource, *pivotTarget, *pivotLimit;
291
292 ucnv_resetToUnicode(utf8Cnv);
293 ucnv_resetFromUnicode(cnv);
294 fromUCallbackCount=0;
295
296 pIn=input8;
297 pInLimit=input8+input8Length;
298
299 pInterLimit=intermediate+testcase.chunkLength;
300
301 pivotSource=pivotTarget=pivot;
302 pivotLimit=pivot+testcase.pivotLength;
303
304 encodedLength=0;
305
306 for(;;) {
307 pInter=intermediate;
308 ucnv_convertEx(cnv, utf8Cnv,
309 &pInter, pInterLimit,
310 &pIn, pInLimit,
311 pivot, &pivotSource, &pivotTarget, pivotLimit,
312 FALSE, TRUE, pErrorCode);
313 encodedLength+=(int32_t)(pInter-intermediate);
314
315 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
316 /* make sure that we convert once more to really flush */
317 *pErrorCode=U_ZERO_ERROR;
318 } else if(U_FAILURE(*pErrorCode)) {
319 return;
320 } else {
321 break; // all done
322 }
323 }
324 }
325protected:
326 UConverter *utf8Cnv;
327 const char *input8;
328 int32_t input8Length;
73c04bcf
A
329};
330
46f4442e
A
331UPerfFunction* UtfPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) {
332 switch (index) {
333 case 0: name = "Roundtrip"; if (exec) return Roundtrip::get(*this); break;
334 case 1: name = "FromUnicode"; if (exec) return FromUnicode::get(*this); break;
335 case 2: name = "FromUTF8"; if (exec) return FromUTF8::get(*this); break;
336 default: name = ""; break;
337 }
338 return NULL;
339}
73c04bcf
A
340
341int main(int argc, const char *argv[])
342{
46f4442e
A
343 // Default values for command-line options.
344 options[CHARSET].value = "UTF-8";
345 options[CHUNK_LENGTH].value = "4096";
346 options[PIVOT_LENGTH].value = "1024";
347
73c04bcf
A
348 UErrorCode status = U_ZERO_ERROR;
349 UtfPerformanceTest test(argc, argv, status);
350
351 if (U_FAILURE(status)){
352 printf("The error is %s\n", u_errorName(status));
46f4442e 353 test.usage();
73c04bcf
A
354 return status;
355 }
356
357 if (test.run() == FALSE){
358 fprintf(stderr, "FAILED: Tests could not be run please check the "
359 "arguments.\n");
360 return -1;
361 }
46f4442e
A
362
363 if (fromUCallbackCount > 0) {
364 printf("Number of fromUnicode callback calls in the last iteration: %ld\n", (long)fromUCallbackCount);
365 }
366
73c04bcf
A
367 return 0;
368}