]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/perf/collperf2/collperf2.cpp
ICU-531.48.tar.gz
[apple/icu.git] / icuSources / test / perf / collperf2 / collperf2.cpp
CommitLineData
57a6839d
A
1/*
2**********************************************************************
3* Copyright (c) 2013-2014, International Business Machines
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6*/
7
8#include <string.h>
9#include "unicode/localpointer.h"
10#include "unicode/uperf.h"
11#include "unicode/ucol.h"
12#include "unicode/coll.h"
13#include "unicode/uiter.h"
14#include "unicode/ustring.h"
15#include "unicode/sortkey.h"
16#include "uarrsort.h"
17#include "uoptions.h"
18#include "ustr_imp.h"
19
20#define COMPACT_ARRAY(CompactArrays, UNIT) \
21struct CompactArrays{\
22 CompactArrays(const CompactArrays & );\
23 CompactArrays & operator=(const CompactArrays & );\
24 int32_t count;/*total number of the strings*/ \
25 int32_t * index;/*relative offset in data*/ \
26 UNIT * data; /*the real space to hold strings*/ \
27 \
28 ~CompactArrays(){free(index);free(data);} \
29 CompactArrays() : count(0), index(NULL), data(NULL) { \
30 index = (int32_t *) realloc(index, sizeof(int32_t)); \
31 index[0] = 0; \
32 } \
33 void append_one(int32_t theLen){ /*include terminal NULL*/ \
34 count++; \
35 index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \
36 index[count] = index[count - 1] + theLen; \
37 data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \
38 } \
39 UNIT * last(){return data + index[count - 1];} \
40 const UNIT * dataOf(int32_t i) const {return data + index[i];} \
41 int32_t lengthOf(int i) const {return index[i+1] - index[i] - 1; } /*exclude terminating NULL*/ \
42};
43
44COMPACT_ARRAY(CA_uchar, UChar)
45COMPACT_ARRAY(CA_char, char)
46
47#define MAX_TEST_STRINGS_FOR_PERMUTING 1000
48
49// C API test cases
50
51//
52// Test case taking a single test data array, calling ucol_strcoll by permuting the test data
53//
54class Strcoll : public UPerfFunction
55{
56public:
57 Strcoll(const UCollator* coll, const CA_uchar* source, UBool useLen);
58 ~Strcoll();
59 virtual void call(UErrorCode* status);
60 virtual long getOperationsPerIteration();
61
62private:
63 const UCollator *coll;
64 const CA_uchar *source;
65 UBool useLen;
66 int32_t maxTestStrings;
67};
68
69Strcoll::Strcoll(const UCollator* coll, const CA_uchar* source, UBool useLen)
70 : coll(coll),
71 source(source),
72 useLen(useLen)
73{
74 maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
75}
76
77Strcoll::~Strcoll()
78{
79}
80
81void Strcoll::call(UErrorCode* status)
82{
83 if (U_FAILURE(*status)) return;
84
85 // call strcoll for permutation
86 int32_t divisor = source->count / maxTestStrings;
87 int32_t srcLen, tgtLen;
88 int32_t cmp = 0;
89 for (int32_t i = 0, numTestStringsI = 0; i < source->count && numTestStringsI < maxTestStrings; i++) {
90 if (i % divisor) continue;
91 numTestStringsI++;
92 srcLen = useLen ? source->lengthOf(i) : -1;
93 for (int32_t j = 0, numTestStringsJ = 0; j < source->count && numTestStringsJ < maxTestStrings; j++) {
94 if (j % divisor) continue;
95 numTestStringsJ++;
96 tgtLen = useLen ? source->lengthOf(j) : -1;
97 cmp += ucol_strcoll(coll, source->dataOf(i), srcLen, source->dataOf(j), tgtLen);
98 }
99 }
100 // At the end, cmp must be 0
101 if (cmp != 0) {
102 *status = U_INTERNAL_PROGRAM_ERROR;
103 }
104}
105
106long Strcoll::getOperationsPerIteration()
107{
108 return maxTestStrings * maxTestStrings;
109}
110
111//
112// Test case taking two test data arrays, calling ucol_strcoll for strings at a same index
113//
114class Strcoll_2 : public UPerfFunction
115{
116public:
117 Strcoll_2(const UCollator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen);
118 ~Strcoll_2();
119 virtual void call(UErrorCode* status);
120 virtual long getOperationsPerIteration();
121
122private:
123 const UCollator *coll;
124 const CA_uchar *source;
125 const CA_uchar *target;
126 UBool useLen;
127};
128
129Strcoll_2::Strcoll_2(const UCollator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen)
130 : coll(coll),
131 source(source),
132 target(target),
133 useLen(useLen)
134{
135}
136
137Strcoll_2::~Strcoll_2()
138{
139}
140
141void Strcoll_2::call(UErrorCode* status)
142{
143 if (U_FAILURE(*status)) return;
144
145 // call strcoll for two strings at the same index
146 if (source->count < target->count) {
147 *status = U_ILLEGAL_ARGUMENT_ERROR;
148 } else {
149 for (int32_t i = 0; i < source->count; i++) {
150 int32_t srcLen = useLen ? source->lengthOf(i) : -1;
151 int32_t tgtLen = useLen ? target->lengthOf(i) : -1;
152 ucol_strcoll(coll, source->dataOf(i), srcLen, target->dataOf(i), tgtLen);
153 }
154 }
155}
156
157long Strcoll_2::getOperationsPerIteration()
158{
159 return source->count;
160}
161
162
163//
164// Test case taking a single test data array, calling ucol_strcollUTF8 by permuting the test data
165//
166class StrcollUTF8 : public UPerfFunction
167{
168public:
169 StrcollUTF8(const UCollator* coll, const CA_char* source, UBool useLen);
170 ~StrcollUTF8();
171 virtual void call(UErrorCode* status);
172 virtual long getOperationsPerIteration();
173
174private:
175 const UCollator *coll;
176 const CA_char *source;
177 UBool useLen;
178 int32_t maxTestStrings;
179};
180
181StrcollUTF8::StrcollUTF8(const UCollator* coll, const CA_char* source, UBool useLen)
182 : coll(coll),
183 source(source),
184 useLen(useLen)
185{
186 maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
187}
188
189StrcollUTF8::~StrcollUTF8()
190{
191}
192
193void StrcollUTF8::call(UErrorCode* status)
194{
195 if (U_FAILURE(*status)) return;
196
197 // call strcollUTF8 for permutation
198 int32_t divisor = source->count / maxTestStrings;
199 int32_t srcLen, tgtLen;
200 int32_t cmp = 0;
201 for (int32_t i = 0, numTestStringsI = 0; U_SUCCESS(*status) && i < source->count && numTestStringsI < maxTestStrings; i++) {
202 if (i % divisor) continue;
203 numTestStringsI++;
204 srcLen = useLen ? source->lengthOf(i) : -1;
205 for (int32_t j = 0, numTestStringsJ = 0; U_SUCCESS(*status) && j < source->count && numTestStringsJ < maxTestStrings; j++) {
206 if (j % divisor) continue;
207 numTestStringsJ++;
208 tgtLen = useLen ? source->lengthOf(j) : -1;
209 cmp += ucol_strcollUTF8(coll, source->dataOf(i), srcLen, source->dataOf(j), tgtLen, status);
210 }
211 }
212 // At the end, cmp must be 0
213 if (cmp != 0) {
214 *status = U_INTERNAL_PROGRAM_ERROR;
215 }
216}
217
218long StrcollUTF8::getOperationsPerIteration()
219{
220 return maxTestStrings * maxTestStrings;
221}
222
223//
224// Test case taking two test data arrays, calling ucol_strcoll for strings at a same index
225//
226class StrcollUTF8_2 : public UPerfFunction
227{
228public:
229 StrcollUTF8_2(const UCollator* coll, const CA_char* source, const CA_char* target, UBool useLen);
230 ~StrcollUTF8_2();
231 virtual void call(UErrorCode* status);
232 virtual long getOperationsPerIteration();
233
234private:
235 const UCollator *coll;
236 const CA_char *source;
237 const CA_char *target;
238 UBool useLen;
239};
240
241StrcollUTF8_2::StrcollUTF8_2(const UCollator* coll, const CA_char* source, const CA_char* target, UBool useLen)
242 : coll(coll),
243 source(source),
244 target(target),
245 useLen(useLen)
246{
247}
248
249StrcollUTF8_2::~StrcollUTF8_2()
250{
251}
252
253void StrcollUTF8_2::call(UErrorCode* status)
254{
255 if (U_FAILURE(*status)) return;
256
257 // call strcoll for two strings at the same index
258 if (source->count < target->count) {
259 *status = U_ILLEGAL_ARGUMENT_ERROR;
260 } else {
261 for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) {
262 int32_t srcLen = useLen ? source->lengthOf(i) : -1;
263 int32_t tgtLen = useLen ? target->lengthOf(i) : -1;
264 ucol_strcollUTF8(coll, source->dataOf(i), srcLen, target->dataOf(i), tgtLen, status);
265 }
266 }
267}
268
269long StrcollUTF8_2::getOperationsPerIteration()
270{
271 return source->count;
272}
273
274//
275// Test case taking a single test data array, calling ucol_getSortKey for each
276//
277class GetSortKey : public UPerfFunction
278{
279public:
280 GetSortKey(const UCollator* coll, const CA_uchar* source, UBool useLen);
281 ~GetSortKey();
282 virtual void call(UErrorCode* status);
283 virtual long getOperationsPerIteration();
284
285private:
286 const UCollator *coll;
287 const CA_uchar *source;
288 UBool useLen;
289};
290
291GetSortKey::GetSortKey(const UCollator* coll, const CA_uchar* source, UBool useLen)
292 : coll(coll),
293 source(source),
294 useLen(useLen)
295{
296}
297
298GetSortKey::~GetSortKey()
299{
300}
301
302#define KEY_BUF_SIZE 512
303
304void GetSortKey::call(UErrorCode* status)
305{
306 if (U_FAILURE(*status)) return;
307
308 uint8_t key[KEY_BUF_SIZE];
309 int32_t len;
310
311 if (useLen) {
312 for (int32_t i = 0; i < source->count; i++) {
313 len = ucol_getSortKey(coll, source->dataOf(i), source->lengthOf(i), key, KEY_BUF_SIZE);
314 }
315 } else {
316 for (int32_t i = 0; i < source->count; i++) {
317 len = ucol_getSortKey(coll, source->dataOf(i), -1, key, KEY_BUF_SIZE);
318 }
319 }
320}
321
322long GetSortKey::getOperationsPerIteration()
323{
324 return source->count;
325}
326
327//
328// Test case taking a single test data array in UTF-16, calling ucol_nextSortKeyPart for each for the
329// given buffer size
330//
331class NextSortKeyPart : public UPerfFunction
332{
333public:
334 NextSortKeyPart(const UCollator* coll, const CA_uchar* source, int32_t bufSize, int32_t maxIteration = -1);
335 ~NextSortKeyPart();
336 virtual void call(UErrorCode* status);
337 virtual long getOperationsPerIteration();
338 virtual long getEventsPerIteration();
339
340private:
341 const UCollator *coll;
342 const CA_uchar *source;
343 int32_t bufSize;
344 int32_t maxIteration;
345 long events;
346};
347
348// Note: maxIteration = -1 -> repeat until the end of collation key
349NextSortKeyPart::NextSortKeyPart(const UCollator* coll, const CA_uchar* source, int32_t bufSize, int32_t maxIteration /* = -1 */)
350 : coll(coll),
351 source(source),
352 bufSize(bufSize),
353 maxIteration(maxIteration),
354 events(0)
355{
356}
357
358NextSortKeyPart::~NextSortKeyPart()
359{
360}
361
362void NextSortKeyPart::call(UErrorCode* status)
363{
364 if (U_FAILURE(*status)) return;
365
366 uint8_t *part = (uint8_t *)malloc(bufSize);
367 uint32_t state[2];
368 UCharIterator iter;
369
370 events = 0;
371 for (int i = 0; i < source->count && U_SUCCESS(*status); i++) {
372 uiter_setString(&iter, source->dataOf(i), source->lengthOf(i));
373 state[0] = 0;
374 state[1] = 0;
375 int32_t partLen = bufSize;
376 for (int32_t n = 0; U_SUCCESS(*status) && partLen == bufSize && (maxIteration < 0 || n < maxIteration); n++) {
377 partLen = ucol_nextSortKeyPart(coll, &iter, state, part, bufSize, status);
378 events++;
379 }
380 }
381 free(part);
382}
383
384long NextSortKeyPart::getOperationsPerIteration()
385{
386 return source->count;
387}
388
389long NextSortKeyPart::getEventsPerIteration()
390{
391 return events;
392}
393
394//
395// Test case taking a single test data array in UTF-8, calling ucol_nextSortKeyPart for each for the
396// given buffer size
397//
398class NextSortKeyPartUTF8 : public UPerfFunction
399{
400public:
401 NextSortKeyPartUTF8(const UCollator* coll, const CA_char* source, int32_t bufSize, int32_t maxIteration = -1);
402 ~NextSortKeyPartUTF8();
403 virtual void call(UErrorCode* status);
404 virtual long getOperationsPerIteration();
405 virtual long getEventsPerIteration();
406
407private:
408 const UCollator *coll;
409 const CA_char *source;
410 int32_t bufSize;
411 int32_t maxIteration;
412 long events;
413};
414
415// Note: maxIteration = -1 -> repeat until the end of collation key
416NextSortKeyPartUTF8::NextSortKeyPartUTF8(const UCollator* coll, const CA_char* source, int32_t bufSize, int32_t maxIteration /* = -1 */)
417 : coll(coll),
418 source(source),
419 bufSize(bufSize),
420 maxIteration(maxIteration),
421 events(0)
422{
423}
424
425NextSortKeyPartUTF8::~NextSortKeyPartUTF8()
426{
427}
428
429void NextSortKeyPartUTF8::call(UErrorCode* status)
430{
431 if (U_FAILURE(*status)) return;
432
433 uint8_t *part = (uint8_t *)malloc(bufSize);
434 uint32_t state[2];
435 UCharIterator iter;
436
437 events = 0;
438 for (int i = 0; i < source->count && U_SUCCESS(*status); i++) {
439 uiter_setUTF8(&iter, source->dataOf(i), source->lengthOf(i));
440 state[0] = 0;
441 state[1] = 0;
442 int32_t partLen = bufSize;
443 for (int32_t n = 0; U_SUCCESS(*status) && partLen == bufSize && (maxIteration < 0 || n < maxIteration); n++) {
444 partLen = ucol_nextSortKeyPart(coll, &iter, state, part, bufSize, status);
445 events++;
446 }
447 }
448 free(part);
449}
450
451long NextSortKeyPartUTF8::getOperationsPerIteration()
452{
453 return source->count;
454}
455
456long NextSortKeyPartUTF8::getEventsPerIteration()
457{
458 return events;
459}
460
461// CPP API test cases
462
463//
464// Test case taking a single test data array, calling Collator::compare by permuting the test data
465//
466class CppCompare : public UPerfFunction
467{
468public:
469 CppCompare(const Collator* coll, const CA_uchar* source, UBool useLen);
470 ~CppCompare();
471 virtual void call(UErrorCode* status);
472 virtual long getOperationsPerIteration();
473
474private:
475 const Collator *coll;
476 const CA_uchar *source;
477 UBool useLen;
478 int32_t maxTestStrings;
479};
480
481CppCompare::CppCompare(const Collator* coll, const CA_uchar* source, UBool useLen)
482 : coll(coll),
483 source(source),
484 useLen(useLen)
485{
486 maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
487}
488
489CppCompare::~CppCompare()
490{
491}
492
493void CppCompare::call(UErrorCode* status) {
494 if (U_FAILURE(*status)) return;
495
496 // call compare for permutation of test data
497 int32_t divisor = source->count / maxTestStrings;
498 int32_t srcLen, tgtLen;
499 int32_t cmp = 0;
500 for (int32_t i = 0, numTestStringsI = 0; i < source->count && numTestStringsI < maxTestStrings; i++) {
501 if (i % divisor) continue;
502 numTestStringsI++;
503 srcLen = useLen ? source->lengthOf(i) : -1;
504 for (int32_t j = 0, numTestStringsJ = 0; j < source->count && numTestStringsJ < maxTestStrings; j++) {
505 if (j % divisor) continue;
506 numTestStringsJ++;
507 tgtLen = useLen ? source->lengthOf(j) : -1;
508 cmp += coll->compare(source->dataOf(i), srcLen, source->dataOf(j), tgtLen);
509 }
510 }
511 // At the end, cmp must be 0
512 if (cmp != 0) {
513 *status = U_INTERNAL_PROGRAM_ERROR;
514 }
515}
516
517long CppCompare::getOperationsPerIteration()
518{
519 return maxTestStrings * maxTestStrings;
520}
521
522//
523// Test case taking two test data arrays, calling Collator::compare for strings at a same index
524//
525class CppCompare_2 : public UPerfFunction
526{
527public:
528 CppCompare_2(const Collator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen);
529 ~CppCompare_2();
530 virtual void call(UErrorCode* status);
531 virtual long getOperationsPerIteration();
532
533private:
534 const Collator *coll;
535 const CA_uchar *source;
536 const CA_uchar *target;
537 UBool useLen;
538};
539
540CppCompare_2::CppCompare_2(const Collator* coll, const CA_uchar* source, const CA_uchar* target, UBool useLen)
541 : coll(coll),
542 source(source),
543 target(target),
544 useLen(useLen)
545{
546}
547
548CppCompare_2::~CppCompare_2()
549{
550}
551
552void CppCompare_2::call(UErrorCode* status) {
553 if (U_FAILURE(*status)) return;
554
555 // call strcoll for two strings at the same index
556 if (source->count < target->count) {
557 *status = U_ILLEGAL_ARGUMENT_ERROR;
558 } else {
559 for (int32_t i = 0; i < source->count; i++) {
560 int32_t srcLen = useLen ? source->lengthOf(i) : -1;
561 int32_t tgtLen = useLen ? target->lengthOf(i) : -1;
562 coll->compare(source->dataOf(i), srcLen, target->dataOf(i), tgtLen);
563 }
564 }
565}
566
567long CppCompare_2::getOperationsPerIteration()
568{
569 return source->count;
570}
571
572
573//
574// Test case taking a single test data array, calling Collator::compareUTF8 by permuting the test data
575//
576class CppCompareUTF8 : public UPerfFunction
577{
578public:
579 CppCompareUTF8(const Collator* coll, const CA_char* source, UBool useLen);
580 ~CppCompareUTF8();
581 virtual void call(UErrorCode* status);
582 virtual long getOperationsPerIteration();
583
584private:
585 const Collator *coll;
586 const CA_char *source;
587 UBool useLen;
588 int32_t maxTestStrings;
589};
590
591CppCompareUTF8::CppCompareUTF8(const Collator* coll, const CA_char* source, UBool useLen)
592 : coll(coll),
593 source(source),
594 useLen(useLen)
595{
596 maxTestStrings = source->count > MAX_TEST_STRINGS_FOR_PERMUTING ? MAX_TEST_STRINGS_FOR_PERMUTING : source->count;
597}
598
599CppCompareUTF8::~CppCompareUTF8()
600{
601}
602
603void CppCompareUTF8::call(UErrorCode* status) {
604 if (U_FAILURE(*status)) return;
605
606 // call compareUTF8 for all permutations
607 int32_t divisor = source->count / maxTestStrings;
608 StringPiece src, tgt;
609 int32_t cmp = 0;
610 for (int32_t i = 0, numTestStringsI = 0; U_SUCCESS(*status) && i < source->count && numTestStringsI < maxTestStrings; i++) {
611 if (i % divisor) continue;
612 numTestStringsI++;
613
614 if (useLen) {
615 src.set(source->dataOf(i), source->lengthOf(i));
616 } else {
617 src.set(source->dataOf(i));
618 }
619 for (int32_t j = 0, numTestStringsJ = 0; U_SUCCESS(*status) && j < source->count && numTestStringsJ < maxTestStrings; j++) {
620 if (j % divisor) continue;
621 numTestStringsJ++;
622
623 if (useLen) {
624 tgt.set(source->dataOf(i), source->lengthOf(i));
625 } else {
626 tgt.set(source->dataOf(i));
627 }
628 cmp += coll->compareUTF8(src, tgt, *status);
629 }
630 }
631 // At the end, cmp must be 0
632 if (cmp != 0) {
633 *status = U_INTERNAL_PROGRAM_ERROR;
634 }
635}
636
637long CppCompareUTF8::getOperationsPerIteration()
638{
639 return maxTestStrings * maxTestStrings;
640}
641
642
643//
644// Test case taking two test data arrays, calling Collator::compareUTF8 for strings at a same index
645//
646class CppCompareUTF8_2 : public UPerfFunction
647{
648public:
649 CppCompareUTF8_2(const Collator* coll, const CA_char* source, const CA_char* target, UBool useLen);
650 ~CppCompareUTF8_2();
651 virtual void call(UErrorCode* status);
652 virtual long getOperationsPerIteration();
653
654private:
655 const Collator *coll;
656 const CA_char *source;
657 const CA_char *target;
658 UBool useLen;
659};
660
661CppCompareUTF8_2::CppCompareUTF8_2(const Collator* coll, const CA_char* source, const CA_char* target, UBool useLen)
662 : coll(coll),
663 source(source),
664 target(target),
665 useLen(useLen)
666{
667}
668
669CppCompareUTF8_2::~CppCompareUTF8_2()
670{
671}
672
673void CppCompareUTF8_2::call(UErrorCode* status) {
674 if (U_FAILURE(*status)) return;
675
676 // call strcoll for two strings at the same index
677 StringPiece src, tgt;
678 if (source->count < target->count) {
679 *status = U_ILLEGAL_ARGUMENT_ERROR;
680 } else {
681 for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) {
682 if (useLen) {
683 src.set(source->dataOf(i), source->lengthOf(i));
684 tgt.set(target->dataOf(i), target->lengthOf(i));
685 } else {
686 src.set(source->dataOf(i));
687 tgt.set(target->dataOf(i));
688 }
689 coll->compareUTF8(src, tgt, *status);
690 }
691 }
692}
693
694long CppCompareUTF8_2::getOperationsPerIteration()
695{
696 return source->count;
697}
698
699
700//
701// Test case taking a single test data array, calling Collator::getCollationKey for each
702//
703class CppGetCollationKey : public UPerfFunction
704{
705public:
706 CppGetCollationKey(const Collator* coll, const CA_uchar* source, UBool useLen);
707 ~CppGetCollationKey();
708 virtual void call(UErrorCode* status);
709 virtual long getOperationsPerIteration();
710
711private:
712 const Collator *coll;
713 const CA_uchar *source;
714 UBool useLen;
715};
716
717CppGetCollationKey::CppGetCollationKey(const Collator* coll, const CA_uchar* source, UBool useLen)
718 : coll(coll),
719 source(source),
720 useLen(useLen)
721{
722}
723
724CppGetCollationKey::~CppGetCollationKey()
725{
726}
727
728void CppGetCollationKey::call(UErrorCode* status)
729{
730 if (U_FAILURE(*status)) return;
731
732 CollationKey key;
733 for (int32_t i = 0; U_SUCCESS(*status) && i < source->count; i++) {
734 coll->getCollationKey(source->dataOf(i), source->lengthOf(i), key, *status);
735 }
736}
737
738long CppGetCollationKey::getOperationsPerIteration() {
739 return source->count;
740}
741
742namespace {
743
744struct CollatorAndCounter {
745 CollatorAndCounter(const Collator& coll) : coll(coll), ucoll(NULL), counter(0) {}
746 CollatorAndCounter(const Collator& coll, const UCollator *ucoll)
747 : coll(coll), ucoll(ucoll), counter(0) {}
748 const Collator& coll;
749 const UCollator *ucoll;
750 int32_t counter;
751};
752
753int32_t U_CALLCONV
754UniStrCollatorComparator(const void* context, const void* left, const void* right) {
755 CollatorAndCounter& cc = *(CollatorAndCounter*)context;
756 const UnicodeString& leftString = **(const UnicodeString**)left;
757 const UnicodeString& rightString = **(const UnicodeString**)right;
758 UErrorCode errorCode = U_ZERO_ERROR;
759 ++cc.counter;
760 return cc.coll.compare(leftString, rightString, errorCode);
761}
762
763} // namespace
764
765class CollPerfFunction : public UPerfFunction {
766public:
767 CollPerfFunction(const Collator& coll, const UCollator *ucoll)
768 : coll(coll), ucoll(ucoll), ops(0) {}
769 virtual ~CollPerfFunction();
770 /** Calls call() to set the ops field, and returns that. */
771 virtual long getOperationsPerIteration();
772
773protected:
774 const Collator& coll;
775 const UCollator *ucoll;
776 int32_t ops;
777};
778
779CollPerfFunction::~CollPerfFunction() {}
780
781long CollPerfFunction::getOperationsPerIteration() {
782 UErrorCode errorCode = U_ZERO_ERROR;
783 call(&errorCode);
784 return U_SUCCESS(errorCode) ? ops : 0;
785}
786
787class UniStrCollPerfFunction : public CollPerfFunction {
788public:
789 UniStrCollPerfFunction(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16)
790 : CollPerfFunction(coll, ucoll), d16(data16),
791 source(new UnicodeString*[d16->count]) {
792 for (int32_t i = 0; i < d16->count; ++i) {
793 source[i] = new UnicodeString(TRUE, d16->dataOf(i), d16->lengthOf(i));
794 }
795 }
796 virtual ~UniStrCollPerfFunction();
797
798protected:
799 const CA_uchar* d16;
800 UnicodeString** source;
801};
802
803UniStrCollPerfFunction::~UniStrCollPerfFunction() {
804 for (int32_t i = 0; i < d16->count; ++i) {
805 delete source[i];
806 }
807 delete[] source;
808}
809
810//
811// Test case sorting an array of UnicodeString pointers.
812//
813class UniStrSort : public UniStrCollPerfFunction {
814public:
815 UniStrSort(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16)
816 : UniStrCollPerfFunction(coll, ucoll, data16),
817 dest(new UnicodeString*[d16->count]) {}
818 virtual ~UniStrSort();
819 virtual void call(UErrorCode* status);
820
821private:
822 UnicodeString** dest; // aliases only
823};
824
825UniStrSort::~UniStrSort() {
826 delete[] dest;
827}
828
829void UniStrSort::call(UErrorCode* status) {
830 if (U_FAILURE(*status)) return;
831
832 CollatorAndCounter cc(coll);
833 int32_t count = d16->count;
834 memcpy(dest, source, count * sizeof(UnicodeString *));
835 uprv_sortArray(dest, count, (int32_t)sizeof(UnicodeString *),
836 UniStrCollatorComparator, &cc, TRUE, status);
837 ops = cc.counter;
838}
839
840namespace {
841
842int32_t U_CALLCONV
843StringPieceCollatorComparator(const void* context, const void* left, const void* right) {
844 CollatorAndCounter& cc = *(CollatorAndCounter*)context;
845 const StringPiece& leftString = *(const StringPiece*)left;
846 const StringPiece& rightString = *(const StringPiece*)right;
847 UErrorCode errorCode = U_ZERO_ERROR;
848 ++cc.counter;
849 return cc.coll.compareUTF8(leftString, rightString, errorCode);
850}
851
852int32_t U_CALLCONV
853StringPieceUCollatorComparator(const void* context, const void* left, const void* right) {
854 CollatorAndCounter& cc = *(CollatorAndCounter*)context;
855 const StringPiece& leftString = *(const StringPiece*)left;
856 const StringPiece& rightString = *(const StringPiece*)right;
857 UErrorCode errorCode = U_ZERO_ERROR;
858 ++cc.counter;
859 return ucol_strcollUTF8(cc.ucoll,
860 leftString.data(), leftString.length(),
861 rightString.data(), rightString.length(), &errorCode);
862}
863
864} // namespace
865
866class StringPieceCollPerfFunction : public CollPerfFunction {
867public:
868 StringPieceCollPerfFunction(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
869 : CollPerfFunction(coll, ucoll), d8(data8),
870 source(new StringPiece[d8->count]) {
871 for (int32_t i = 0; i < d8->count; ++i) {
872 source[i].set(d8->dataOf(i), d8->lengthOf(i));
873 }
874 }
875 virtual ~StringPieceCollPerfFunction();
876
877protected:
878 const CA_char* d8;
879 StringPiece* source;
880};
881
882StringPieceCollPerfFunction::~StringPieceCollPerfFunction() {
883 delete[] source;
884}
885
886class StringPieceSort : public StringPieceCollPerfFunction {
887public:
888 StringPieceSort(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
889 : StringPieceCollPerfFunction(coll, ucoll, data8),
890 dest(new StringPiece[d8->count]) {}
891 virtual ~StringPieceSort();
892
893protected:
894 StringPiece* dest;
895};
896
897StringPieceSort::~StringPieceSort() {
898 delete[] dest;
899}
900
901//
902// Test case sorting an array of UTF-8 StringPiece's with Collator::compareUTF8().
903//
904class StringPieceSortCpp : public StringPieceSort {
905public:
906 StringPieceSortCpp(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
907 : StringPieceSort(coll, ucoll, data8) {}
908 virtual ~StringPieceSortCpp();
909 virtual void call(UErrorCode* status);
910};
911
912StringPieceSortCpp::~StringPieceSortCpp() {}
913
914void StringPieceSortCpp::call(UErrorCode* status) {
915 if (U_FAILURE(*status)) return;
916
917 CollatorAndCounter cc(coll);
918 int32_t count = d8->count;
919 memcpy(dest, source, count * sizeof(StringPiece));
920 uprv_sortArray(dest, count, (int32_t)sizeof(StringPiece),
921 StringPieceCollatorComparator, &cc, TRUE, status);
922 ops = cc.counter;
923}
924
925//
926// Test case sorting an array of UTF-8 StringPiece's with ucol_strcollUTF8().
927//
928class StringPieceSortC : public StringPieceSort {
929public:
930 StringPieceSortC(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
931 : StringPieceSort(coll, ucoll, data8) {}
932 virtual ~StringPieceSortC();
933 virtual void call(UErrorCode* status);
934};
935
936StringPieceSortC::~StringPieceSortC() {}
937
938void StringPieceSortC::call(UErrorCode* status) {
939 if (U_FAILURE(*status)) return;
940
941 CollatorAndCounter cc(coll, ucoll);
942 int32_t count = d8->count;
943 memcpy(dest, source, count * sizeof(StringPiece));
944 uprv_sortArray(dest, count, (int32_t)sizeof(StringPiece),
945 StringPieceUCollatorComparator, &cc, TRUE, status);
946 ops = cc.counter;
947}
948
949//
950// Test case performing binary searches in a sorted array of UnicodeString pointers.
951//
952class UniStrBinSearch : public UniStrCollPerfFunction {
953public:
954 UniStrBinSearch(const Collator& coll, const UCollator *ucoll, const CA_uchar* data16)
955 : UniStrCollPerfFunction(coll, ucoll, data16) {}
956 virtual ~UniStrBinSearch();
957 virtual void call(UErrorCode* status);
958};
959
960UniStrBinSearch::~UniStrBinSearch() {}
961
962void UniStrBinSearch::call(UErrorCode* status) {
963 if (U_FAILURE(*status)) return;
964
965 CollatorAndCounter cc(coll);
966 int32_t count = d16->count;
967 for (int32_t i = 0; i < count; ++i) {
968 (void)uprv_stableBinarySearch((char *)source, count,
969 source + i, (int32_t)sizeof(UnicodeString *),
970 UniStrCollatorComparator, &cc);
971 }
972 ops = cc.counter;
973}
974
975class StringPieceBinSearch : public StringPieceCollPerfFunction {
976public:
977 StringPieceBinSearch(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
978 : StringPieceCollPerfFunction(coll, ucoll, data8) {}
979 virtual ~StringPieceBinSearch();
980};
981
982StringPieceBinSearch::~StringPieceBinSearch() {}
983
984//
985// Test case performing binary searches in a sorted array of UTF-8 StringPiece's
986// with Collator::compareUTF8().
987//
988class StringPieceBinSearchCpp : public StringPieceBinSearch {
989public:
990 StringPieceBinSearchCpp(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
991 : StringPieceBinSearch(coll, ucoll, data8) {}
992 virtual ~StringPieceBinSearchCpp();
993 virtual void call(UErrorCode* status);
994};
995
996StringPieceBinSearchCpp::~StringPieceBinSearchCpp() {}
997
998void StringPieceBinSearchCpp::call(UErrorCode* status) {
999 if (U_FAILURE(*status)) return;
1000
1001 CollatorAndCounter cc(coll);
1002 int32_t count = d8->count;
1003 for (int32_t i = 0; i < count; ++i) {
1004 (void)uprv_stableBinarySearch((char *)source, count,
1005 source + i, (int32_t)sizeof(StringPiece),
1006 StringPieceCollatorComparator, &cc);
1007 }
1008 ops = cc.counter;
1009}
1010
1011//
1012// Test case performing binary searches in a sorted array of UTF-8 StringPiece's
1013// with ucol_strcollUTF8().
1014//
1015class StringPieceBinSearchC : public StringPieceBinSearch {
1016public:
1017 StringPieceBinSearchC(const Collator& coll, const UCollator *ucoll, const CA_char* data8)
1018 : StringPieceBinSearch(coll, ucoll, data8) {}
1019 virtual ~StringPieceBinSearchC();
1020 virtual void call(UErrorCode* status);
1021};
1022
1023StringPieceBinSearchC::~StringPieceBinSearchC() {}
1024
1025void StringPieceBinSearchC::call(UErrorCode* status) {
1026 if (U_FAILURE(*status)) return;
1027
1028 CollatorAndCounter cc(coll, ucoll);
1029 int32_t count = d8->count;
1030 for (int32_t i = 0; i < count; ++i) {
1031 (void)uprv_stableBinarySearch((char *)source, count,
1032 source + i, (int32_t)sizeof(StringPiece),
1033 StringPieceUCollatorComparator, &cc);
1034 }
1035 ops = cc.counter;
1036}
1037
1038
1039class CollPerf2Test : public UPerfTest
1040{
1041public:
1042 CollPerf2Test(int32_t argc, const char *argv[], UErrorCode &status);
1043 ~CollPerf2Test();
1044 virtual UPerfFunction* runIndexedTest(
1045 int32_t index, UBool exec, const char *&name, char *par = NULL);
1046
1047private:
1048 UCollator* coll;
1049 Collator* collObj;
1050
1051 int32_t count;
1052 CA_uchar* data16;
1053 CA_char* data8;
1054
1055 CA_uchar* modData16;
1056 CA_char* modData8;
1057
1058 CA_uchar* sortedData16;
1059 CA_char* sortedData8;
1060
1061 CA_uchar* randomData16;
1062 CA_char* randomData8;
1063
1064 const CA_uchar* getData16(UErrorCode &status);
1065 const CA_char* getData8(UErrorCode &status);
1066
1067 const CA_uchar* getModData16(UErrorCode &status);
1068 const CA_char* getModData8(UErrorCode &status);
1069
1070 const CA_uchar* getSortedData16(UErrorCode &status);
1071 const CA_char* getSortedData8(UErrorCode &status);
1072
1073 const CA_uchar* getRandomData16(UErrorCode &status);
1074 const CA_char* getRandomData8(UErrorCode &status);
1075
1076 static CA_uchar* sortData16(
1077 const CA_uchar* d16,
1078 UComparator *cmp, const void *context,
1079 UErrorCode &status);
1080 static CA_char* getData8FromData16(const CA_uchar* d16, UErrorCode &status);
1081
1082 UPerfFunction* TestStrcoll();
1083 UPerfFunction* TestStrcollNull();
1084 UPerfFunction* TestStrcollSimilar();
1085
1086 UPerfFunction* TestStrcollUTF8();
1087 UPerfFunction* TestStrcollUTF8Null();
1088 UPerfFunction* TestStrcollUTF8Similar();
1089
1090 UPerfFunction* TestGetSortKey();
1091 UPerfFunction* TestGetSortKeyNull();
1092
1093 UPerfFunction* TestNextSortKeyPart_4All();
1094 UPerfFunction* TestNextSortKeyPart_4x2();
1095 UPerfFunction* TestNextSortKeyPart_4x4();
1096 UPerfFunction* TestNextSortKeyPart_4x8();
1097 UPerfFunction* TestNextSortKeyPart_32All();
1098 UPerfFunction* TestNextSortKeyPart_32x2();
1099
1100 UPerfFunction* TestNextSortKeyPartUTF8_4All();
1101 UPerfFunction* TestNextSortKeyPartUTF8_4x2();
1102 UPerfFunction* TestNextSortKeyPartUTF8_4x4();
1103 UPerfFunction* TestNextSortKeyPartUTF8_4x8();
1104 UPerfFunction* TestNextSortKeyPartUTF8_32All();
1105 UPerfFunction* TestNextSortKeyPartUTF8_32x2();
1106
1107 UPerfFunction* TestCppCompare();
1108 UPerfFunction* TestCppCompareNull();
1109 UPerfFunction* TestCppCompareSimilar();
1110
1111 UPerfFunction* TestCppCompareUTF8();
1112 UPerfFunction* TestCppCompareUTF8Null();
1113 UPerfFunction* TestCppCompareUTF8Similar();
1114
1115 UPerfFunction* TestCppGetCollationKey();
1116 UPerfFunction* TestCppGetCollationKeyNull();
1117
1118 UPerfFunction* TestUniStrSort();
1119 UPerfFunction* TestStringPieceSortCpp();
1120 UPerfFunction* TestStringPieceSortC();
1121
1122 UPerfFunction* TestUniStrBinSearch();
1123 UPerfFunction* TestStringPieceBinSearchCpp();
1124 UPerfFunction* TestStringPieceBinSearchC();
1125};
1126
1127CollPerf2Test::CollPerf2Test(int32_t argc, const char *argv[], UErrorCode &status) :
1128 UPerfTest(argc, argv, status),
1129 coll(NULL),
1130 collObj(NULL),
1131 count(0),
1132 data16(NULL),
1133 data8(NULL),
1134 modData16(NULL),
1135 modData8(NULL),
1136 sortedData16(NULL),
1137 sortedData8(NULL),
1138 randomData16(NULL),
1139 randomData8(NULL)
1140{
1141 if (U_FAILURE(status)) {
1142 return;
1143 }
1144
1145 if (locale == NULL){
1146 locale = "en_US"; // set default locale
1147 }
1148
1149 // Set up an ICU collator
1150 coll = ucol_open(locale, &status);
1151 collObj = Collator::createInstance(locale, status);
1152
1153 // Keyword support should be actually a part of ICU collator, see ICU ticket #8260.
1154 char keyBuffer[256];
1155 UColAttributeValue val;
1156 if (uloc_getKeywordValue(locale, "strength", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) {
1157 if (strcmp(keyBuffer, "primary") == 0) {
1158 val = UCOL_PRIMARY;
1159 } else if (strcmp(keyBuffer, "secondary") == 0) {
1160 val = UCOL_SECONDARY;
1161 } else if (strcmp(keyBuffer, "tertiary") == 0) {
1162 val = UCOL_TERTIARY;
1163 } else if (strcmp(keyBuffer, "quaternary") == 0) {
1164 val = UCOL_QUATERNARY;
1165 } else if (strcmp(keyBuffer, "identical") == 0) {
1166 val = UCOL_IDENTICAL;
1167 } else {
1168 status = U_ILLEGAL_ARGUMENT_ERROR;
1169 }
1170 if (U_SUCCESS(status)) {
1171 ucol_setAttribute(coll, UCOL_STRENGTH, val, &status);
1172 collObj->setAttribute(UCOL_STRENGTH, val, status);
1173 }
1174 }
1175 if (uloc_getKeywordValue(locale, "alternate", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) {
1176 if (strcmp(keyBuffer, "non-ignorable") == 0) {
1177 val = UCOL_NON_IGNORABLE;
1178 } else if (strcmp(keyBuffer, "shifted") == 0) {
1179 val = UCOL_SHIFTED;
1180 } else {
1181 status = U_ILLEGAL_ARGUMENT_ERROR;
1182 }
1183 if (U_SUCCESS(status)) {
1184 ucol_setAttribute(coll, UCOL_ALTERNATE_HANDLING, val, &status);
1185 collObj->setAttribute(UCOL_ALTERNATE_HANDLING, val, status);
1186 }
1187 }
1188 if (uloc_getKeywordValue(locale, "backwards", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) {
1189 if (strcmp(keyBuffer, "on") == 0) {
1190 val = UCOL_ON;
1191 } else if (strcmp(keyBuffer, "off") == 0) {
1192 val = UCOL_OFF;
1193 } else {
1194 status = U_ILLEGAL_ARGUMENT_ERROR;
1195 }
1196 if (U_SUCCESS(status)) {
1197 ucol_setAttribute(coll, UCOL_FRENCH_COLLATION, val, &status);
1198 collObj->setAttribute(UCOL_FRENCH_COLLATION, val, status);
1199 }
1200 }
1201 if (uloc_getKeywordValue(locale, "normalization", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) {
1202 if (strcmp(keyBuffer, "on") == 0) {
1203 val = UCOL_ON;
1204 } else if (strcmp(keyBuffer, "off") == 0) {
1205 val = UCOL_OFF;
1206 } else {
1207 status = U_ILLEGAL_ARGUMENT_ERROR;
1208 }
1209 if (U_SUCCESS(status)) {
1210 ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, val, &status);
1211 collObj->setAttribute(UCOL_NORMALIZATION_MODE, val, status);
1212 }
1213 }
1214 if (uloc_getKeywordValue(locale, "caseLevel", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) {
1215 if (strcmp(keyBuffer, "on") == 0) {
1216 val = UCOL_ON;
1217 } else if (strcmp(keyBuffer, "off") == 0) {
1218 val = UCOL_OFF;
1219 } else {
1220 status = U_ILLEGAL_ARGUMENT_ERROR;
1221 }
1222 if (U_SUCCESS(status)) {
1223 ucol_setAttribute(coll, UCOL_CASE_LEVEL, val, &status);
1224 collObj->setAttribute(UCOL_CASE_LEVEL, val, status);
1225 }
1226 }
1227 if (uloc_getKeywordValue(locale, "caseFirst", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) {
1228 if (strcmp(keyBuffer, "upper") == 0) {
1229 val = UCOL_UPPER_FIRST;
1230 } else if (strcmp(keyBuffer, "lower") == 0) {
1231 val = UCOL_LOWER_FIRST;
1232 } else if (strcmp(keyBuffer, "off") == 0) {
1233 val = UCOL_OFF;
1234 } else {
1235 status = U_ILLEGAL_ARGUMENT_ERROR;
1236 }
1237 if (U_SUCCESS(status)) {
1238 ucol_setAttribute(coll, UCOL_CASE_FIRST, val, &status);
1239 collObj->setAttribute(UCOL_CASE_FIRST, val, status);
1240 }
1241 }
1242 if (uloc_getKeywordValue(locale, "hiraganaQuaternary", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) {
1243 if (strcmp(keyBuffer, "on") == 0) {
1244 val = UCOL_ON;
1245 } else if (strcmp(keyBuffer, "off") == 0) {
1246 val = UCOL_OFF;
1247 } else {
1248 status = U_ILLEGAL_ARGUMENT_ERROR;
1249 }
1250 if (U_SUCCESS(status)) {
1251 ucol_setAttribute(coll, UCOL_HIRAGANA_QUATERNARY_MODE, val, &status);
1252 collObj->setAttribute(UCOL_HIRAGANA_QUATERNARY_MODE, val, status);
1253 }
1254 }
1255 if (uloc_getKeywordValue(locale, "numeric", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) {
1256 if (strcmp(keyBuffer, "on") == 0) {
1257 val = UCOL_ON;
1258 } else if (strcmp(keyBuffer, "off") == 0) {
1259 val = UCOL_OFF;
1260 } else {
1261 status = U_ILLEGAL_ARGUMENT_ERROR;
1262 }
1263 if (U_SUCCESS(status)) {
1264 ucol_setAttribute(coll, UCOL_NUMERIC_COLLATION, val, &status);
1265 collObj->setAttribute(UCOL_NUMERIC_COLLATION, val, status);
1266 }
1267 }
1268 if (uloc_getKeywordValue(locale, "variableTop", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) {
1269 // no support for now
1270 status = U_UNSUPPORTED_ERROR;
1271 }
1272 if (uloc_getKeywordValue(locale, "reorder", keyBuffer, sizeof(keyBuffer)/sizeof(keyBuffer[0]), &status)) {
1273 // no support for now
1274 status = U_UNSUPPORTED_ERROR;
1275 }
1276}
1277
1278CollPerf2Test::~CollPerf2Test()
1279{
1280 ucol_close(coll);
1281 delete collObj;
1282
1283 delete data16;
1284 delete data8;
1285 delete modData16;
1286 delete modData8;
1287 delete sortedData16;
1288 delete sortedData8;
1289 delete randomData16;
1290 delete randomData8;
1291}
1292
1293#define MAX_NUM_DATA 10000
1294
1295const CA_uchar* CollPerf2Test::getData16(UErrorCode &status)
1296{
1297 if (U_FAILURE(status)) return NULL;
1298 if (data16) return data16;
1299
1300 CA_uchar* d16 = new CA_uchar();
1301 const UChar *line = NULL;
1302 int32_t len = 0;
1303 int32_t numData = 0;
1304
1305 for (;;) {
1306 line = ucbuf_readline(ucharBuf, &len, &status);
1307 if (line == NULL || U_FAILURE(status)) break;
1308
1309 // Refer to the source code of ucbuf_readline()
1310 // 1. 'len' includes the line terminal symbols
1311 // 2. The length of the line terminal symbols is only one character
1312 // 3. The Windows CR LF line terminal symbols will be converted to CR
1313
1314 if (len == 1 || line[0] == 0x23 /* '#' */) {
1315 continue; // skip empty/comment line
1316 } else {
1317 d16->append_one(len);
1318 u_memcpy(d16->last(), line, len);
1319
1320 numData++;
1321 if (numData >= MAX_NUM_DATA) break;
1322 }
1323 }
1324
1325 if (U_SUCCESS(status)) {
1326 data16 = d16;
1327 } else {
1328 delete d16;
1329 }
1330
1331 return data16;
1332}
1333
1334const CA_char* CollPerf2Test::getData8(UErrorCode &status)
1335{
1336 if (U_FAILURE(status)) return NULL;
1337 if (data8) return data8;
1338 return data8 = getData8FromData16(getData16(status), status);
1339}
1340
1341const CA_uchar* CollPerf2Test::getModData16(UErrorCode &status)
1342{
1343 if (U_FAILURE(status)) return NULL;
1344 if (modData16) return modData16;
1345
1346 const CA_uchar* d16 = getData16(status);
1347 if (U_FAILURE(status)) return NULL;
1348
1349 CA_uchar* modData16 = new CA_uchar();
1350
1351 for (int32_t i = 0; i < d16->count; i++) {
1352 const UChar *s = d16->dataOf(i);
1353 int32_t len = d16->lengthOf(i) + 1; // including NULL terminator
1354
1355 modData16->append_one(len);
1356 u_memcpy(modData16->last(), s, len);
1357
1358 // replacing the last character with a different character
1359 UChar *lastChar = &modData16->last()[len -2];
1360 for (int32_t j = i + 1; j != i; j++) {
1361 if (j >= d16->count) {
1362 j = 0;
1363 }
1364 const UChar *s1 = d16->dataOf(j);
1365 UChar lastChar1 = s1[d16->lengthOf(j) - 1];
1366 if (*lastChar != lastChar1) {
1367 *lastChar = lastChar1;
1368 break;
1369 }
1370 }
1371 }
1372
1373 return modData16;
1374}
1375
1376const CA_char* CollPerf2Test::getModData8(UErrorCode &status)
1377{
1378 if (U_FAILURE(status)) return NULL;
1379 if (modData8) return modData8;
1380 return modData8 = getData8FromData16(getModData16(status), status);
1381}
1382
1383namespace {
1384
1385struct ArrayAndColl {
1386 ArrayAndColl(const CA_uchar* a, const Collator& c) : d16(a), coll(c) {}
1387 const CA_uchar* d16;
1388 const Collator& coll;
1389};
1390
1391int32_t U_CALLCONV
1392U16CollatorComparator(const void* context, const void* left, const void* right) {
1393 const ArrayAndColl& ac = *(const ArrayAndColl*)context;
1394 const CA_uchar* d16 = ac.d16;
1395 int32_t leftIndex = *(const int32_t*)left;
1396 int32_t rightIndex = *(const int32_t*)right;
1397 UErrorCode errorCode = U_ZERO_ERROR;
1398 return ac.coll.compare(d16->dataOf(leftIndex), d16->lengthOf(leftIndex),
1399 d16->dataOf(rightIndex), d16->lengthOf(rightIndex),
1400 errorCode);
1401}
1402
1403int32_t U_CALLCONV
1404U16HashComparator(const void* context, const void* left, const void* right) {
1405 const CA_uchar* d16 = (const CA_uchar*)context;
1406 int32_t leftIndex = *(const int32_t*)left;
1407 int32_t rightIndex = *(const int32_t*)right;
1408 int32_t leftHash = ustr_hashUCharsN(d16->dataOf(leftIndex), d16->lengthOf(leftIndex));
1409 int32_t rightHash = ustr_hashUCharsN(d16->dataOf(rightIndex), d16->lengthOf(rightIndex));
1410 return leftHash < rightHash ? -1 : leftHash == rightHash ? 0 : 1;
1411}
1412
1413} // namespace
1414
1415const CA_uchar* CollPerf2Test::getSortedData16(UErrorCode &status) {
1416 if (U_FAILURE(status)) return NULL;
1417 if (sortedData16) return sortedData16;
1418
1419 ArrayAndColl ac(getData16(status), *collObj);
1420 return sortedData16 = sortData16(ac.d16, U16CollatorComparator, &ac, status);
1421}
1422
1423const CA_char* CollPerf2Test::getSortedData8(UErrorCode &status) {
1424 if (U_FAILURE(status)) return NULL;
1425 if (sortedData8) return sortedData8;
1426 return sortedData8 = getData8FromData16(getSortedData16(status), status);
1427}
1428
1429const CA_uchar* CollPerf2Test::getRandomData16(UErrorCode &status) {
1430 if (U_FAILURE(status)) return NULL;
1431 if (randomData16) return randomData16;
1432
1433 // Sort the strings by their hash codes, which should be a reasonably pseudo-random order.
1434 const CA_uchar* d16 = getData16(status);
1435 return randomData16 = sortData16(d16, U16HashComparator, d16, status);
1436}
1437
1438const CA_char* CollPerf2Test::getRandomData8(UErrorCode &status) {
1439 if (U_FAILURE(status)) return NULL;
1440 if (randomData8) return randomData8;
1441 return randomData8 = getData8FromData16(getRandomData16(status), status);
1442}
1443
1444CA_uchar* CollPerf2Test::sortData16(const CA_uchar* d16,
1445 UComparator *cmp, const void *context,
1446 UErrorCode &status) {
1447 if (U_FAILURE(status)) return NULL;
1448
1449 LocalArray<int32_t> indexes(new int32_t[d16->count]);
1450 for (int32_t i = 0; i < d16->count; ++i) {
1451 indexes[i] = i;
1452 }
1453 uprv_sortArray(indexes.getAlias(), d16->count, 4, cmp, context, TRUE, &status);
1454 if (U_FAILURE(status)) return NULL;
1455
1456 // Copy the strings in sorted order into a new array.
1457 LocalPointer<CA_uchar> newD16(new CA_uchar());
1458 for (int32_t i = 0; i < d16->count; i++) {
1459 const UChar* s = d16->dataOf(i);
1460 int32_t len = d16->lengthOf(i);
1461 int32_t capacity = len + 1; // including NULL terminator
1462 newD16->append_one(capacity);
1463 u_memcpy(newD16->last(), s, capacity);
1464 }
1465
1466 if (U_SUCCESS(status)) {
1467 return newD16.orphan();
1468 } else {
1469 return NULL;
1470 }
1471}
1472
1473CA_char* CollPerf2Test::getData8FromData16(const CA_uchar* d16, UErrorCode &status) {
1474 if (U_FAILURE(status)) return NULL;
1475
1476 // UTF-16 -> UTF-8 conversion
1477 LocalPointer<CA_char> d8(new CA_char());
1478 for (int32_t i = 0; i < d16->count; i++) {
1479 const UChar *s16 = d16->dataOf(i);
1480 int32_t length16 = d16->lengthOf(i);
1481
1482 // get length in UTF-8
1483 int32_t length8;
1484 u_strToUTF8(NULL, 0, &length8, s16, length16, &status);
1485 if (status == U_BUFFER_OVERFLOW_ERROR || status == U_ZERO_ERROR){
1486 status = U_ZERO_ERROR;
1487 } else {
1488 break;
1489 }
1490 int32_t capacity8 = length8 + 1; // plus terminal NULL
1491 d8->append_one(capacity8);
1492
1493 // convert to UTF-8
1494 u_strToUTF8(d8->last(), capacity8, NULL, s16, length16, &status);
1495 if (U_FAILURE(status)) break;
1496 }
1497
1498 if (U_SUCCESS(status)) {
1499 return d8.orphan();
1500 } else {
1501 return NULL;
1502 }
1503}
1504
1505UPerfFunction*
1506CollPerf2Test::runIndexedTest(int32_t index, UBool exec, const char *&name, char *par /*= NULL*/)
1507{
1508 (void)par;
1509 TESTCASE_AUTO_BEGIN;
1510
1511 TESTCASE_AUTO(TestStrcoll);
1512 TESTCASE_AUTO(TestStrcollNull);
1513 TESTCASE_AUTO(TestStrcollSimilar);
1514
1515 TESTCASE_AUTO(TestStrcollUTF8);
1516 TESTCASE_AUTO(TestStrcollUTF8Null);
1517 TESTCASE_AUTO(TestStrcollUTF8Similar);
1518
1519 TESTCASE_AUTO(TestGetSortKey);
1520 TESTCASE_AUTO(TestGetSortKeyNull);
1521
1522 TESTCASE_AUTO(TestNextSortKeyPart_4All);
1523 TESTCASE_AUTO(TestNextSortKeyPart_4x4);
1524 TESTCASE_AUTO(TestNextSortKeyPart_4x8);
1525 TESTCASE_AUTO(TestNextSortKeyPart_32All);
1526 TESTCASE_AUTO(TestNextSortKeyPart_32x2);
1527
1528 TESTCASE_AUTO(TestNextSortKeyPartUTF8_4All);
1529 TESTCASE_AUTO(TestNextSortKeyPartUTF8_4x4);
1530 TESTCASE_AUTO(TestNextSortKeyPartUTF8_4x8);
1531 TESTCASE_AUTO(TestNextSortKeyPartUTF8_32All);
1532 TESTCASE_AUTO(TestNextSortKeyPartUTF8_32x2);
1533
1534 TESTCASE_AUTO(TestCppCompare);
1535 TESTCASE_AUTO(TestCppCompareNull);
1536 TESTCASE_AUTO(TestCppCompareSimilar);
1537
1538 TESTCASE_AUTO(TestCppCompareUTF8);
1539 TESTCASE_AUTO(TestCppCompareUTF8Null);
1540 TESTCASE_AUTO(TestCppCompareUTF8Similar);
1541
1542 TESTCASE_AUTO(TestCppGetCollationKey);
1543 TESTCASE_AUTO(TestCppGetCollationKeyNull);
1544
1545 TESTCASE_AUTO(TestUniStrSort);
1546 TESTCASE_AUTO(TestStringPieceSortCpp);
1547 TESTCASE_AUTO(TestStringPieceSortC);
1548
1549 TESTCASE_AUTO(TestUniStrBinSearch);
1550 TESTCASE_AUTO(TestStringPieceBinSearchCpp);
1551 TESTCASE_AUTO(TestStringPieceBinSearchC);
1552
1553 TESTCASE_AUTO_END;
1554 return NULL;
1555}
1556
1557
1558
1559UPerfFunction* CollPerf2Test::TestStrcoll()
1560{
1561 UErrorCode status = U_ZERO_ERROR;
1562 Strcoll *testCase = new Strcoll(coll, getData16(status), TRUE /* useLen */);
1563 if (U_FAILURE(status)) {
1564 delete testCase;
1565 return NULL;
1566 }
1567 return testCase;
1568}
1569
1570UPerfFunction* CollPerf2Test::TestStrcollNull()
1571{
1572 UErrorCode status = U_ZERO_ERROR;
1573 Strcoll *testCase = new Strcoll(coll, getData16(status), FALSE /* useLen */);
1574 if (U_FAILURE(status)) {
1575 delete testCase;
1576 return NULL;
1577 }
1578 return testCase;
1579}
1580
1581UPerfFunction* CollPerf2Test::TestStrcollSimilar()
1582{
1583 UErrorCode status = U_ZERO_ERROR;
1584 Strcoll_2 *testCase = new Strcoll_2(coll, getData16(status), getModData16(status), TRUE /* useLen */);
1585 if (U_FAILURE(status)) {
1586 delete testCase;
1587 return NULL;
1588 }
1589 return testCase;
1590}
1591
1592UPerfFunction* CollPerf2Test::TestStrcollUTF8()
1593{
1594 UErrorCode status = U_ZERO_ERROR;
1595 StrcollUTF8 *testCase = new StrcollUTF8(coll, getData8(status), TRUE /* useLen */);
1596 if (U_FAILURE(status)) {
1597 delete testCase;
1598 return NULL;
1599 }
1600 return testCase;
1601}
1602
1603UPerfFunction* CollPerf2Test::TestStrcollUTF8Null()
1604{
1605 UErrorCode status = U_ZERO_ERROR;
1606 StrcollUTF8 *testCase = new StrcollUTF8(coll, getData8(status),FALSE /* useLen */);
1607 if (U_FAILURE(status)) {
1608 delete testCase;
1609 return NULL;
1610 }
1611 return testCase;
1612}
1613
1614UPerfFunction* CollPerf2Test::TestStrcollUTF8Similar()
1615{
1616 UErrorCode status = U_ZERO_ERROR;
1617 StrcollUTF8_2 *testCase = new StrcollUTF8_2(coll, getData8(status), getModData8(status), TRUE /* useLen */);
1618 if (U_FAILURE(status)) {
1619 delete testCase;
1620 return NULL;
1621 }
1622 return testCase;
1623}
1624
1625UPerfFunction* CollPerf2Test::TestGetSortKey()
1626{
1627 UErrorCode status = U_ZERO_ERROR;
1628 GetSortKey *testCase = new GetSortKey(coll, getData16(status), TRUE /* useLen */);
1629 if (U_FAILURE(status)) {
1630 delete testCase;
1631 return NULL;
1632 }
1633 return testCase;
1634}
1635
1636UPerfFunction* CollPerf2Test::TestGetSortKeyNull()
1637{
1638 UErrorCode status = U_ZERO_ERROR;
1639 GetSortKey *testCase = new GetSortKey(coll, getData16(status), FALSE /* useLen */);
1640 if (U_FAILURE(status)) {
1641 delete testCase;
1642 return NULL;
1643 }
1644 return testCase;
1645}
1646
1647UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4All()
1648{
1649 UErrorCode status = U_ZERO_ERROR;
1650 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */);
1651 if (U_FAILURE(status)) {
1652 delete testCase;
1653 return NULL;
1654 }
1655 return testCase;
1656}
1657
1658UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4x4()
1659{
1660 UErrorCode status = U_ZERO_ERROR;
1661 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */, 4 /* maxIteration */);
1662 if (U_FAILURE(status)) {
1663 delete testCase;
1664 return NULL;
1665 }
1666 return testCase;
1667}
1668
1669UPerfFunction* CollPerf2Test::TestNextSortKeyPart_4x8()
1670{
1671 UErrorCode status = U_ZERO_ERROR;
1672 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 4 /* bufSize */, 8 /* maxIteration */);
1673 if (U_FAILURE(status)) {
1674 delete testCase;
1675 return NULL;
1676 }
1677 return testCase;
1678}
1679
1680UPerfFunction* CollPerf2Test::TestNextSortKeyPart_32All()
1681{
1682 UErrorCode status = U_ZERO_ERROR;
1683 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 32 /* bufSize */);
1684 if (U_FAILURE(status)) {
1685 delete testCase;
1686 return NULL;
1687 }
1688 return testCase;
1689}
1690
1691UPerfFunction* CollPerf2Test::TestNextSortKeyPart_32x2()
1692{
1693 UErrorCode status = U_ZERO_ERROR;
1694 NextSortKeyPart *testCase = new NextSortKeyPart(coll, getData16(status), 32 /* bufSize */, 2 /* maxIteration */);
1695 if (U_FAILURE(status)) {
1696 delete testCase;
1697 return NULL;
1698 }
1699 return testCase;
1700}
1701
1702UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4All()
1703{
1704 UErrorCode status = U_ZERO_ERROR;
1705 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */);
1706 if (U_FAILURE(status)) {
1707 delete testCase;
1708 return NULL;
1709 }
1710 return testCase;
1711}
1712
1713UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4x4()
1714{
1715 UErrorCode status = U_ZERO_ERROR;
1716 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */, 4 /* maxIteration */);
1717 if (U_FAILURE(status)) {
1718 delete testCase;
1719 return NULL;
1720 }
1721 return testCase;
1722}
1723
1724UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_4x8()
1725{
1726 UErrorCode status = U_ZERO_ERROR;
1727 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 4 /* bufSize */, 8 /* maxIteration */);
1728 if (U_FAILURE(status)) {
1729 delete testCase;
1730 return NULL;
1731 }
1732 return testCase;
1733}
1734
1735UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_32All()
1736{
1737 UErrorCode status = U_ZERO_ERROR;
1738 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 32 /* bufSize */);
1739 if (U_FAILURE(status)) {
1740 delete testCase;
1741 return NULL;
1742 }
1743 return testCase;
1744}
1745
1746UPerfFunction* CollPerf2Test::TestNextSortKeyPartUTF8_32x2()
1747{
1748 UErrorCode status = U_ZERO_ERROR;
1749 NextSortKeyPartUTF8 *testCase = new NextSortKeyPartUTF8(coll, getData8(status), 32 /* bufSize */, 2 /* maxIteration */);
1750 if (U_FAILURE(status)) {
1751 delete testCase;
1752 return NULL;
1753 }
1754 return testCase;
1755}
1756
1757UPerfFunction* CollPerf2Test::TestCppCompare()
1758{
1759 UErrorCode status = U_ZERO_ERROR;
1760 CppCompare *testCase = new CppCompare(collObj, getData16(status), TRUE /* useLen */);
1761 if (U_FAILURE(status)) {
1762 delete testCase;
1763 return NULL;
1764 }
1765 return testCase;
1766}
1767
1768UPerfFunction* CollPerf2Test::TestCppCompareNull()
1769{
1770 UErrorCode status = U_ZERO_ERROR;
1771 CppCompare *testCase = new CppCompare(collObj, getData16(status), FALSE /* useLen */);
1772 if (U_FAILURE(status)) {
1773 delete testCase;
1774 return NULL;
1775 }
1776 return testCase;
1777}
1778
1779UPerfFunction* CollPerf2Test::TestCppCompareSimilar()
1780{
1781 UErrorCode status = U_ZERO_ERROR;
1782 CppCompare_2 *testCase = new CppCompare_2(collObj, getData16(status), getModData16(status), TRUE /* useLen */);
1783 if (U_FAILURE(status)) {
1784 delete testCase;
1785 return NULL;
1786 }
1787 return testCase;
1788}
1789
1790UPerfFunction* CollPerf2Test::TestCppCompareUTF8()
1791{
1792 UErrorCode status = U_ZERO_ERROR;
1793 CppCompareUTF8 *testCase = new CppCompareUTF8(collObj, getData8(status), TRUE /* useLen */);
1794 if (U_FAILURE(status)) {
1795 delete testCase;
1796 return NULL;
1797 }
1798 return testCase;
1799}
1800
1801UPerfFunction* CollPerf2Test::TestCppCompareUTF8Null()
1802{
1803 UErrorCode status = U_ZERO_ERROR;
1804 CppCompareUTF8 *testCase = new CppCompareUTF8(collObj, getData8(status), FALSE /* useLen */);
1805 if (U_FAILURE(status)) {
1806 delete testCase;
1807 return NULL;
1808 }
1809 return testCase;
1810}
1811
1812UPerfFunction* CollPerf2Test::TestCppCompareUTF8Similar()
1813{
1814 UErrorCode status = U_ZERO_ERROR;
1815 CppCompareUTF8_2 *testCase = new CppCompareUTF8_2(collObj, getData8(status), getModData8(status), TRUE /* useLen */);
1816 if (U_FAILURE(status)) {
1817 delete testCase;
1818 return NULL;
1819 }
1820 return testCase;
1821}
1822
1823UPerfFunction* CollPerf2Test::TestCppGetCollationKey()
1824{
1825 UErrorCode status = U_ZERO_ERROR;
1826 CppGetCollationKey *testCase = new CppGetCollationKey(collObj, getData16(status), TRUE /* useLen */);
1827 if (U_FAILURE(status)) {
1828 delete testCase;
1829 return NULL;
1830 }
1831 return testCase;
1832}
1833
1834UPerfFunction* CollPerf2Test::TestCppGetCollationKeyNull()
1835{
1836 UErrorCode status = U_ZERO_ERROR;
1837 CppGetCollationKey *testCase = new CppGetCollationKey(collObj, getData16(status), FALSE /* useLen */);
1838 if (U_FAILURE(status)) {
1839 delete testCase;
1840 return NULL;
1841 }
1842 return testCase;
1843}
1844
1845UPerfFunction* CollPerf2Test::TestUniStrSort() {
1846 UErrorCode status = U_ZERO_ERROR;
1847 UPerfFunction *testCase = new UniStrSort(*collObj, coll, getRandomData16(status));
1848 if (U_FAILURE(status)) {
1849 delete testCase;
1850 return NULL;
1851 }
1852 return testCase;
1853}
1854
1855UPerfFunction* CollPerf2Test::TestStringPieceSortCpp() {
1856 UErrorCode status = U_ZERO_ERROR;
1857 UPerfFunction *testCase = new StringPieceSortCpp(*collObj, coll, getRandomData8(status));
1858 if (U_FAILURE(status)) {
1859 delete testCase;
1860 return NULL;
1861 }
1862 return testCase;
1863}
1864
1865UPerfFunction* CollPerf2Test::TestStringPieceSortC() {
1866 UErrorCode status = U_ZERO_ERROR;
1867 UPerfFunction *testCase = new StringPieceSortC(*collObj, coll, getRandomData8(status));
1868 if (U_FAILURE(status)) {
1869 delete testCase;
1870 return NULL;
1871 }
1872 return testCase;
1873}
1874
1875UPerfFunction* CollPerf2Test::TestUniStrBinSearch() {
1876 UErrorCode status = U_ZERO_ERROR;
1877 UPerfFunction *testCase = new UniStrBinSearch(*collObj, coll, getSortedData16(status));
1878 if (U_FAILURE(status)) {
1879 delete testCase;
1880 return NULL;
1881 }
1882 return testCase;
1883}
1884
1885UPerfFunction* CollPerf2Test::TestStringPieceBinSearchCpp() {
1886 UErrorCode status = U_ZERO_ERROR;
1887 UPerfFunction *testCase = new StringPieceBinSearchCpp(*collObj, coll, getSortedData8(status));
1888 if (U_FAILURE(status)) {
1889 delete testCase;
1890 return NULL;
1891 }
1892 return testCase;
1893}
1894
1895UPerfFunction* CollPerf2Test::TestStringPieceBinSearchC() {
1896 UErrorCode status = U_ZERO_ERROR;
1897 UPerfFunction *testCase = new StringPieceBinSearchC(*collObj, coll, getSortedData8(status));
1898 if (U_FAILURE(status)) {
1899 delete testCase;
1900 return NULL;
1901 }
1902 return testCase;
1903}
1904
1905
1906int main(int argc, const char *argv[])
1907{
1908 UErrorCode status = U_ZERO_ERROR;
1909 CollPerf2Test test(argc, argv, status);
1910
1911 if (U_FAILURE(status)){
1912 printf("The error is %s\n", u_errorName(status));
1913 //TODO: print usage here
1914 return status;
1915 }
1916
1917 if (test.run() == FALSE){
1918 fprintf(stderr, "FAILED: Tests could not be run please check the arguments.\n");
1919 return -1;
1920 }
1921 return 0;
1922}