]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/perf/utfperf/utfperf.cpp
ICU-511.25.tar.gz
[apple/icu.git] / icuSources / test / perf / utfperf / utfperf.cpp
1 /*
2 **********************************************************************
3 * Copyright (C) 2002-2007, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * file name: utfperf.cpp
7 * encoding: US-ASCII
8 * tab size: 8 (not used)
9 * indentation:4
10 *
11 * created on: 2005Nov17
12 * created by: Raymond Yang
13 *
14 * Ported from utfper.c created by Markus W. Scherer
15 * Performance test program for Unicode converters
16 */
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include "unicode/uperf.h"
21 #include "uoptions.h"
22
23 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
24
25 /* definitions and text buffers */
26
27 #define INPUT_CAPACITY (1024*1024)
28 #define INTERMEDIATE_CAPACITY 4096
29 #define INTERMEDIATE_SMALL_CAPACITY 20
30 #define PIVOT_CAPACITY 1024
31 #define OUTPUT_CAPACITY INPUT_CAPACITY
32
33 static char utf8[INPUT_CAPACITY];
34 static UChar pivot[INTERMEDIATE_CAPACITY];
35
36 static UChar output[OUTPUT_CAPACITY];
37 static char intermediate[OUTPUT_CAPACITY];
38
39 static int32_t utf8Length, encodedLength, outputLength, countInputCodePoints;
40
41 static int32_t fromUCallbackCount;
42
43 // Command-line options specific to utfperf.
44 // Options do not have abbreviations: Force readable command lines.
45 // (Using U+0001 for abbreviation characters.)
46 enum {
47 CHARSET,
48 CHUNK_LENGTH,
49 PIVOT_LENGTH,
50 UTFPERF_OPTIONS_COUNT
51 };
52
53 static UOption options[UTFPERF_OPTIONS_COUNT]={
54 UOPTION_DEF("charset", '\x01', UOPT_REQUIRES_ARG),
55 UOPTION_DEF("chunk", '\x01', UOPT_REQUIRES_ARG),
56 UOPTION_DEF("pivot", '\x01', UOPT_REQUIRES_ARG)
57 };
58
59 static const char *const utfperf_usage =
60 "\t--charset Charset for which to test performance, e.g. windows-1251.\n"
61 "\t Default: UTF-8\n"
62 "\t--chunk Length (in bytes) of charset output chunks. [4096]\n"
63 "\t--pivot Length (in UChars) of the UTF-16 pivot buffer, if applicable.\n"
64 "\t [1024]\n";
65
66 // Test object.
67 class UtfPerformanceTest : public UPerfTest{
68 public:
69 UtfPerformanceTest(int32_t argc, const char *argv[], UErrorCode &status)
70 : UPerfTest(argc, argv, options, LENGTHOF(options), utfperf_usage, status) {
71 if (U_SUCCESS(status)) {
72 charset = options[CHARSET].value;
73
74 chunkLength = atoi(options[CHUNK_LENGTH].value);
75 if (chunkLength < 1 || OUTPUT_CAPACITY < chunkLength) {
76 fprintf(stderr, "error: chunk length must be 1..%ld\n", (long)OUTPUT_CAPACITY);
77 status = U_ILLEGAL_ARGUMENT_ERROR;
78 }
79
80 pivotLength = atoi(options[PIVOT_LENGTH].value);
81 if (pivotLength < 1 || PIVOT_CAPACITY < pivotLength) {
82 fprintf(stderr, "error: pivot length must be 1..%ld\n", (long)PIVOT_CAPACITY);
83 status = U_ILLEGAL_ARGUMENT_ERROR;
84 }
85
86 int32_t inputLength;
87 UPerfTest::getBuffer(inputLength, status);
88 countInputCodePoints = u_countChar32(buffer, bufferLen);
89 u_strToUTF8(utf8, (int32_t)sizeof(utf8), &utf8Length, buffer, bufferLen, &status);
90 }
91 }
92
93 virtual UPerfFunction* runIndexedTest(int32_t index, UBool exec, const char* &name, char* par = NULL);
94
95 const UChar *getBuffer() const { return buffer; }
96 int32_t getBufferLen() const { return bufferLen; }
97
98 const char *charset;
99 int32_t chunkLength, pivotLength;
100 };
101
102 U_CDECL_BEGIN
103 // Custom callback for counting callback calls.
104 static void U_CALLCONV
105 fromUCallback(const void *context,
106 UConverterFromUnicodeArgs *fromUArgs,
107 const UChar *codeUnits,
108 int32_t length,
109 UChar32 codePoint,
110 UConverterCallbackReason reason,
111 UErrorCode *pErrorCode) {
112 if (reason <= UCNV_IRREGULAR) {
113 ++fromUCallbackCount;
114 }
115 UCNV_FROM_U_CALLBACK_SUBSTITUTE(context, fromUArgs, codeUnits, length, codePoint, reason, pErrorCode);
116 }
117 U_CDECL_END
118
119 // Base class for Roundtrip, FromUnicode and FromUTF8 with common setup.
120 class Command : public UPerfFunction {
121 protected:
122 Command(const UtfPerformanceTest &testcase)
123 : testcase(testcase),
124 input(testcase.getBuffer()), inputLength(testcase.getBufferLen()),
125 errorCode(U_ZERO_ERROR) {
126 cnv=ucnv_open(testcase.charset, &errorCode);
127 if (U_FAILURE(errorCode)) {
128 fprintf(stderr, "error opening converter for \"%s\" - %s\n", testcase.charset, u_errorName(errorCode));
129 }
130 ucnv_setFromUCallBack(cnv, fromUCallback, NULL, NULL, NULL, &errorCode);
131 }
132 public:
133 virtual ~Command(){
134 if(U_SUCCESS(errorCode)) {
135 ucnv_close(cnv);
136 }
137 }
138 // virtual void call(UErrorCode* pErrorCode) { ... }
139 virtual long getOperationsPerIteration(){
140 return countInputCodePoints;
141 }
142
143 const UtfPerformanceTest &testcase;
144 const UChar *input;
145 int32_t inputLength;
146 UErrorCode errorCode;
147 UConverter *cnv;
148 };
149
150 // Test roundtrip UTF-16->encoding->UTF-16.
151 class Roundtrip : public Command {
152 protected:
153 Roundtrip(const UtfPerformanceTest &testcase) : Command(testcase) {}
154 public:
155 static UPerfFunction* get(const UtfPerformanceTest &testcase) {
156 Roundtrip * t = new Roundtrip(testcase);
157 if (U_SUCCESS(t->errorCode)){
158 return t;
159 } else {
160 delete t;
161 return NULL;
162 }
163 }
164 virtual void call(UErrorCode* pErrorCode){
165 const UChar *pIn, *pInLimit;
166 UChar *pOut, *pOutLimit;
167 char *pInter, *pInterLimit;
168 const char *p;
169 UBool flush;
170
171 ucnv_reset(cnv);
172 fromUCallbackCount=0;
173
174 pIn=input;
175 pInLimit=input+inputLength;
176
177 pOut=output;
178 pOutLimit=output+OUTPUT_CAPACITY;
179
180 pInterLimit=intermediate+testcase.chunkLength;
181
182 encodedLength=outputLength=0;
183 flush=FALSE;
184
185 do {
186 /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */
187 pInter=intermediate;
188 ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
189 encodedLength+=(int32_t)(pInter-intermediate);
190
191 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
192 /* make sure that we convert once more to really flush */
193 *pErrorCode=U_ZERO_ERROR;
194 } else if(U_FAILURE(*pErrorCode)) {
195 return;
196 } else if(pIn==pInLimit) {
197 flush=TRUE;
198 }
199
200 /* convert the block [intermediate..pInter[ back to UTF-16 */
201 p=intermediate;
202 ucnv_toUnicode(cnv, &pOut, pOutLimit,&p, pInter,NULL, flush,pErrorCode);
203 if(U_FAILURE(*pErrorCode)) {
204 return;
205 }
206 /* intermediate must have been consumed (p==pInter) because of the converter semantics */
207 } while(!flush);
208
209 outputLength=pOut-output;
210 if(inputLength!=outputLength) {
211 fprintf(stderr, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength, outputLength);
212 *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
213 }
214 }
215 };
216
217 // Test one-way conversion UTF-16->encoding.
218 class FromUnicode : public Command {
219 protected:
220 FromUnicode(const UtfPerformanceTest &testcase) : Command(testcase) {}
221 public:
222 static UPerfFunction* get(const UtfPerformanceTest &testcase) {
223 FromUnicode * t = new FromUnicode(testcase);
224 if (U_SUCCESS(t->errorCode)){
225 return t;
226 } else {
227 delete t;
228 return NULL;
229 }
230 }
231 virtual void call(UErrorCode* pErrorCode){
232 const UChar *pIn, *pInLimit;
233 char *pInter, *pInterLimit;
234
235 ucnv_resetFromUnicode(cnv);
236 fromUCallbackCount=0;
237
238 pIn=input;
239 pInLimit=input+inputLength;
240
241 pInterLimit=intermediate+testcase.chunkLength;
242
243 encodedLength=0;
244
245 for(;;) {
246 pInter=intermediate;
247 ucnv_fromUnicode(cnv, &pInter, pInterLimit, &pIn, pInLimit, NULL, TRUE, pErrorCode);
248 encodedLength+=(int32_t)(pInter-intermediate);
249
250 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
251 /* make sure that we convert once more to really flush */
252 *pErrorCode=U_ZERO_ERROR;
253 } else if(U_FAILURE(*pErrorCode)) {
254 return;
255 } else {
256 break; // all done
257 }
258 }
259 }
260 };
261
262 // Test one-way conversion UTF-8->encoding.
263 class FromUTF8 : public Command {
264 protected:
265 FromUTF8(const UtfPerformanceTest &testcase)
266 : Command(testcase),
267 utf8Cnv(NULL),
268 input8(utf8), input8Length(utf8Length) {
269 utf8Cnv=ucnv_open("UTF-8", &errorCode);
270 }
271 public:
272 static UPerfFunction* get(const UtfPerformanceTest &testcase) {
273 FromUTF8 * t = new FromUTF8(testcase);
274 if (U_SUCCESS(t->errorCode)){
275 return t;
276 } else {
277 delete t;
278 return NULL;
279 }
280 }
281 ~FromUTF8() {
282 ucnv_close(utf8Cnv);
283 }
284 virtual void call(UErrorCode* pErrorCode){
285 const char *pIn, *pInLimit;
286 char *pInter, *pInterLimit;
287 UChar *pivotSource, *pivotTarget, *pivotLimit;
288
289 ucnv_resetToUnicode(utf8Cnv);
290 ucnv_resetFromUnicode(cnv);
291 fromUCallbackCount=0;
292
293 pIn=input8;
294 pInLimit=input8+input8Length;
295
296 pInterLimit=intermediate+testcase.chunkLength;
297
298 pivotSource=pivotTarget=pivot;
299 pivotLimit=pivot+testcase.pivotLength;
300
301 encodedLength=0;
302
303 for(;;) {
304 pInter=intermediate;
305 ucnv_convertEx(cnv, utf8Cnv,
306 &pInter, pInterLimit,
307 &pIn, pInLimit,
308 pivot, &pivotSource, &pivotTarget, pivotLimit,
309 FALSE, TRUE, pErrorCode);
310 encodedLength+=(int32_t)(pInter-intermediate);
311
312 if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
313 /* make sure that we convert once more to really flush */
314 *pErrorCode=U_ZERO_ERROR;
315 } else if(U_FAILURE(*pErrorCode)) {
316 return;
317 } else {
318 break; // all done
319 }
320 }
321 }
322 protected:
323 UConverter *utf8Cnv;
324 const char *input8;
325 int32_t input8Length;
326 };
327
328 UPerfFunction* UtfPerformanceTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* par) {
329 switch (index) {
330 case 0: name = "Roundtrip"; if (exec) return Roundtrip::get(*this); break;
331 case 1: name = "FromUnicode"; if (exec) return FromUnicode::get(*this); break;
332 case 2: name = "FromUTF8"; if (exec) return FromUTF8::get(*this); break;
333 default: name = ""; break;
334 }
335 return NULL;
336 }
337
338 int main(int argc, const char *argv[])
339 {
340 // Default values for command-line options.
341 options[CHARSET].value = "UTF-8";
342 options[CHUNK_LENGTH].value = "4096";
343 options[PIVOT_LENGTH].value = "1024";
344
345 UErrorCode status = U_ZERO_ERROR;
346 UtfPerformanceTest test(argc, argv, status);
347
348 if (U_FAILURE(status)){
349 printf("The error is %s\n", u_errorName(status));
350 test.usage();
351 return status;
352 }
353
354 if (test.run() == FALSE){
355 fprintf(stderr, "FAILED: Tests could not be run please check the "
356 "arguments.\n");
357 return -1;
358 }
359
360 if (fromUCallbackCount > 0) {
361 printf("Number of fromUnicode callback calls in the last iteration: %ld\n", (long)fromUCallbackCount);
362 }
363
364 return 0;
365 }