1 /********************************************************************
3 * Copyright (C) 2001-2016 IBM, Inc. All Rights Reserved.
5 ********************************************************************/
13 #include "unicode/uperf.h"
15 #include "unicode/coll.h"
16 #include <unicode/ucoleitr.h>
18 #if !U_PLATFORM_HAS_WIN32_API
19 #define DWORD uint32_t
23 /* To store an array of string<UNIT> in continue space.
24 Since string<UNIT> itself is treated as an array of UNIT, this
25 class will ease our memory management for an array of string<UNIT>.
28 //template<typename UNIT>
29 #define COMPATCT_ARRAY(CompactArrays, UNIT) \
30 struct CompactArrays{\
31 CompactArrays(const CompactArrays & );\
32 CompactArrays & operator=(const CompactArrays & );\
33 int32_t count;/*total number of the strings*/ \
34 int32_t * index;/*relative offset in data*/ \
35 UNIT * data; /*the real space to hold strings*/ \
37 ~CompactArrays(){free(index);free(data);} \
38 CompactArrays():data(NULL), index(NULL), count(0){ \
39 index = (int32_t *) realloc(index, sizeof(int32_t)); \
42 void append_one(int32_t theLen){ /*include terminal NULL*/ \
44 index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \
45 index[count] = index[count - 1] + theLen; \
46 data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \
48 UNIT * last(){return data + index[count - 1];} \
49 UNIT * dataOf(int32_t i){return data + index[i];} \
50 int32_t lengthOf(int i){return index[i+1] - index[i] - 1; } /*exclude terminating NULL*/ \
53 //typedef CompactArrays<UChar> CA_uchar;
54 //typedef CompactArrays<char> CA_char;
55 //typedef CompactArrays<uint8_t> CA_uint8;
56 //typedef CompactArrays<WCHAR> CA_win_wchar;
58 COMPATCT_ARRAY(CA_uchar
, UChar
)
59 COMPATCT_ARRAY(CA_char
, char)
60 COMPATCT_ARRAY(CA_uint8
, uint8_t)
61 COMPATCT_ARRAY(CA_win_wchar
, WCHAR
)
65 static DWORD win_langid
; // for qsort callback function
66 static UCollator
* col
; // for qsort callback function
72 int32_t posix_data_len
;
77 DWORD
DataIndex::win_langid
;
78 UCollator
* DataIndex::col
;
82 class CmdKeyGen
: public UPerfFunction
{
83 typedef void (CmdKeyGen::* Func
)(int32_t);
84 enum{MAX_KEY_LENGTH
= 5000};
91 union { // to save sapce
92 uint8_t icu_key
[MAX_KEY_LENGTH
];
93 char posix_key
[MAX_KEY_LENGTH
];
94 WCHAR win_key
[MAX_KEY_LENGTH
];
97 CmdKeyGen(UErrorCode
, UCollator
* col
,DWORD win_langid
, int32_t count
, DataIndex
* data
,Func fn
,int32_t)
98 :col(col
),win_langid(win_langid
), count(count
), data(data
), fn(fn
){}
100 virtual long getOperationsPerIteration(){return count
;}
102 virtual void call(UErrorCode
* status
){
103 for(int32_t i
= 0; i
< count
; i
++){
108 void icu_key_null(int32_t i
){
109 ucol_getSortKey(col
, data
[i
].icu_data
, -1, icu_key
, MAX_KEY_LENGTH
);
112 void icu_key_len(int32_t i
){
113 ucol_getSortKey(col
, data
[i
].icu_data
, data
[i
].icu_data_len
, icu_key
, MAX_KEY_LENGTH
);
116 #if U_PLATFORM_HAS_WIN32_API
117 // pre-generated in CollPerfTest::prepareData(), need not to check error here
118 void win_key_null(int32_t i
){
119 //LCMAP_SORTsk 0x00000400 // WC sort sk (normalize)
120 LCMapStringW(win_langid
, LCMAP_SORTKEY
, data
[i
].win_data
, -1, win_key
, MAX_KEY_LENGTH
);
123 void win_key_len(int32_t i
){
124 LCMapStringW(win_langid
, LCMAP_SORTKEY
, data
[i
].win_data
, data
[i
].win_data_len
, win_key
, MAX_KEY_LENGTH
);
128 void posix_key_null(int32_t i
){
129 strxfrm(posix_key
, data
[i
].posix_data
, MAX_KEY_LENGTH
);
134 class CmdIter
: public UPerfFunction
{
135 typedef void (CmdIter::* Func
)(UErrorCode
* , int32_t );
139 UCollationElements
*iter
;
142 CmdIter(UErrorCode
& status
, UCollator
* col
, int32_t count
, CA_uchar
*data
, Func fn
, int32_t,int32_t)
143 :count(count
), data(data
), fn(fn
){
145 UChar dummytext
[] = {0, 0};
146 iter
= ucol_openElements(col
, NULL
, 0, &status
);
147 ucol_setText(iter
, dummytext
, 1, &status
);
150 ucol_closeElements(iter
);
153 virtual long getOperationsPerIteration(){return exec_count
? exec_count
: 1;}
155 virtual void call(UErrorCode
* status
){
157 for(int32_t i
= 0; i
< count
; i
++){
158 (this->*fn
)(status
, i
);
162 void icu_forward_null(UErrorCode
* status
, int32_t i
){
163 ucol_setText(iter
, data
->dataOf(i
), -1, status
);
164 while (ucol_next(iter
, status
) != UCOL_NULLORDER
) exec_count
++;
167 void icu_forward_len(UErrorCode
* status
, int32_t i
){
168 ucol_setText(iter
, data
->dataOf(i
), data
->lengthOf(i
) , status
);
169 while (ucol_next(iter
, status
) != UCOL_NULLORDER
) exec_count
++;
172 void icu_backward_null(UErrorCode
* status
, int32_t i
){
173 ucol_setText(iter
, data
->dataOf(i
), -1, status
);
174 while (ucol_previous(iter
, status
) != UCOL_NULLORDER
) exec_count
++;
177 void icu_backward_len(UErrorCode
* status
, int32_t i
){
178 ucol_setText(iter
, data
->dataOf(i
), data
->lengthOf(i
) , status
);
179 while (ucol_previous(iter
, status
) != UCOL_NULLORDER
) exec_count
++;
183 class CmdIterAll
: public UPerfFunction
{
184 typedef void (CmdIterAll::* Func
)(UErrorCode
* status
);
188 UCollationElements
*iter
;
192 enum CALL
{forward_null
, forward_len
, backward_null
, backward_len
};
195 ucol_closeElements(iter
);
197 CmdIterAll(UErrorCode
& status
, UCollator
* col
, int32_t count
, UChar
* data
, CALL call
,int32_t,int32_t)
198 :count(count
),data(data
)
201 if (call
== forward_null
|| call
== backward_null
) {
202 iter
= ucol_openElements(col
, data
, -1, &status
);
204 iter
= ucol_openElements(col
, data
, count
, &status
);
207 if (call
== forward_null
|| call
== forward_len
){
208 fn
= &CmdIterAll::icu_forward_all
;
210 fn
= &CmdIterAll::icu_backward_all
;
213 virtual long getOperationsPerIteration(){return exec_count
? exec_count
: 1;}
215 virtual void call(UErrorCode
* status
){
219 void icu_forward_all(UErrorCode
* status
){
220 int strlen
= count
- 5;
223 ucol_setOffset(iter
, strindex
, status
);
225 if (ucol_next(iter
, status
) == UCOL_NULLORDER
) {
232 if (strindex
> strlen
) {
235 ucol_setOffset(iter
, strindex
, status
);
241 void icu_backward_all(UErrorCode
* status
){
245 ucol_setOffset(iter
, strindex
, status
);
247 if (ucol_previous(iter
, status
) == UCOL_NULLORDER
) {
254 if (strindex
> strlen
) {
257 ucol_setOffset(iter
, strindex
, status
);
265 struct CmdQsort
: public UPerfFunction
{
267 static int q_random(const void * a
, const void * b
){
268 uint8_t * key_a
= ((DataIndex
*)a
)->icu_key
;
269 uint8_t * key_b
= ((DataIndex
*)b
)->icu_key
;
273 while (*key_a
!= 0) {val_a
+= val_a
*37 + *key_a
++;}
274 while (*key_b
!= 0) {val_b
+= val_b
*37 + *key_b
++;}
275 return val_a
- val_b
;
279 DataIndex * da = (DataIndex *) a; \
280 DataIndex * db = (DataIndex *) b; \
283 static int icu_strcoll_null(const void *a
, const void *b
){
285 return ucol_strcoll(da
->col
, da
->icu_data
, -1, db
->icu_data
, -1) - UCOL_EQUAL
;
288 static int icu_strcoll_len(const void *a
, const void *b
){
290 return ucol_strcoll(da
->col
, da
->icu_data
, da
->icu_data_len
, db
->icu_data
, db
->icu_data_len
) - UCOL_EQUAL
;
293 static int icu_cmpkey (const void *a
, const void *b
){
295 return strcmp((char *) da
->icu_key
, (char *) db
->icu_key
);
298 #if U_PLATFORM_HAS_WIN32_API
299 static int win_cmp_null(const void *a
, const void *b
) {
303 //CSTR_GREATER_THAN 3
304 int t
= CompareStringW(da
->win_langid
, 0, da
->win_data
, -1, db
->win_data
, -1);
306 fprintf(stderr
, "CompareStringW error, error number %x\n", GetLastError());
309 return t
- CSTR_EQUAL
;
313 static int win_cmp_len(const void *a
, const void *b
) {
315 int t
= CompareStringW(da
->win_langid
, 0, da
->win_data
, da
->win_data_len
, db
->win_data
, db
->win_data_len
);
317 fprintf(stderr
, "CompareStringW error, error number %x\n", GetLastError());
320 return t
- CSTR_EQUAL
;
325 #define QFUNC(name, func, data) \
326 static int name (const void *a, const void *b){ \
328 return func(da->data, db->data); \
331 QFUNC(posix_strcoll_null
, strcoll
, posix_data
)
332 QFUNC(posix_cmpkey
, strcmp
, posix_key
)
333 #if U_PLATFORM_HAS_WIN32_API
334 QFUNC(win_cmpkey
, strcmp
, win_key
)
335 QFUNC(win_wcscmp
, wcscmp
, win_data
)
337 QFUNC(icu_strcmp
, u_strcmp
, icu_data
)
338 QFUNC(icu_cmpcpo
, u_strcmpCodePointOrder
, icu_data
)
341 static int32_t exec_count
; // potential muilt-thread problem
343 typedef int (* Func
)(const void *, const void *);
346 void * base
; //Start of target array.
347 int32_t num
; //Array size in elements.
348 int32_t width
; //Element size in bytes.
350 void * backup
; //copy source of base
352 CmdQsort(UErrorCode
& status
,void *theBase
, int32_t num
, int32_t width
, Func fn
, int32_t,int32_t)
353 :backup(theBase
),num(num
),width(width
),fn(fn
){
354 base
= malloc(num
* width
);
355 time_empty(100, &status
); // warm memory/cache
364 memcpy(base
, backup
, num
* width
);
367 double time_empty(int32_t n
, UErrorCode
* status
) {
369 utimer_getTime(&start
);
373 utimer_getTime(&stop
);
374 return utimer_getDeltaSeconds(&start
,&stop
); // ms
377 virtual void call(UErrorCode
* status
){
379 memcpy(base
, backup
, num
* width
);
380 qsort(base
, num
, width
, fn
);
382 virtual double time(int32_t n
, UErrorCode
* status
) {
383 double t1
= time_empty(n
,status
);
384 double t2
= UPerfFunction::time(n
, status
);
385 return t2
-t1
;// < 0 ? t2 : t2-t1;
388 virtual long getOperationsPerIteration(){ return exec_count
?exec_count
:1;}
390 int32_t CmdQsort::exec_count
;
393 class CmdBinSearch
: public UPerfFunction
{
395 typedef int (CmdBinSearch::* Func
)(int, int);
405 CmdBinSearch(UErrorCode
, UCollator
* col
,DWORD win_langid
,int32_t count
,DataIndex
* rnd
,DataIndex
* ord
,Func fn
)
406 :col(col
),win_langid(win_langid
), count(count
), rnd(rnd
), ord(ord
), fn(fn
),exec_count(0){}
409 virtual void call(UErrorCode
* status
){
411 for(int32_t i
= 0; i
< count
; i
++){ // search all data
415 virtual long getOperationsPerIteration(){ return exec_count
?exec_count
:1;}
417 void binary_search(int32_t random
) {
419 int high
= count
- 1;
424 guess
= (high
+ low
)/2;
425 if (last_guess
== guess
) break; // nothing to search
427 r
= (this->*fn
)(random
, guess
);
431 return; // found, search end.
441 int icu_strcoll_null(int32_t i
, int32_t j
){
442 return ucol_strcoll(col
, rnd
[i
].icu_data
, -1, ord
[j
].icu_data
,-1);
445 int icu_strcoll_len(int32_t i
, int32_t j
){
446 return ucol_strcoll(col
, rnd
[i
].icu_data
, rnd
[i
].icu_data_len
, ord
[j
].icu_data
, ord
[j
].icu_data_len
);
449 int icu_cmpkey(int32_t i
, int32_t j
) {
450 return strcmp( (char *) rnd
[i
].icu_key
, (char *) ord
[j
].icu_key
);
453 #if U_PLATFORM_HAS_WIN32_API
454 int win_cmp_null(int32_t i
, int32_t j
) {
455 int t
= CompareStringW(win_langid
, 0, rnd
[i
].win_data
, -1, ord
[j
].win_data
, -1);
457 fprintf(stderr
, "CompareStringW error, error number %x\n", GetLastError());
460 return t
- CSTR_EQUAL
;
464 int win_cmp_len(int32_t i
, int32_t j
) {
465 int t
= CompareStringW(win_langid
, 0, rnd
[i
].win_data
, rnd
[i
].win_data_len
, ord
[j
].win_data
, ord
[j
].win_data_len
);
467 fprintf(stderr
, "CompareStringW error, error number %x\n", GetLastError());
470 return t
- CSTR_EQUAL
;
475 #define BFUNC(name, func, data) \
476 int name(int32_t i, int32_t j) { \
477 return func(rnd[i].data, ord[j].data); \
480 BFUNC(posix_strcoll_null
, strcoll
, posix_data
)
481 BFUNC(posix_cmpkey
, strcmp
, posix_key
)
482 BFUNC(win_cmpkey
, strcmp
, win_key
)
483 BFUNC(win_wcscmp
, wcscmp
, win_data
)
484 BFUNC(icu_strcmp
, u_strcmp
, icu_data
)
485 BFUNC(icu_cmpcpo
, u_strcmpCodePointOrder
, icu_data
)
488 class CollPerfTest
: public UPerfTest
{
493 UChar
* icu_data_all
;
494 int32_t icu_data_all_len
;
499 CA_char
* posix_data
;
501 CA_win_wchar
* win_data
;
504 DataIndex
* rnd_index
; // random by icu key
505 DataIndex
* ord_win_data
;
506 DataIndex
* ord_win_key
;
507 DataIndex
* ord_posix_data
;
508 DataIndex
* ord_posix_key
;
509 DataIndex
* ord_icu_data
;
510 DataIndex
* ord_icu_key
;
511 DataIndex
* ord_win_wcscmp
;
512 DataIndex
* ord_icu_strcmp
;
513 DataIndex
* ord_icu_cmpcpo
;
515 virtual ~CollPerfTest(){
517 delete [] icu_data_all
;
525 delete[] ord_win_data
;
526 delete[] ord_win_key
;
527 delete[] ord_posix_data
;
528 delete[] ord_posix_key
;
529 delete[] ord_icu_data
;
530 delete[] ord_icu_key
;
531 delete[] ord_win_wcscmp
;
532 delete[] ord_icu_strcmp
;
533 delete[] ord_icu_cmpcpo
;
536 CollPerfTest(int32_t argc
, const char* argv
[], UErrorCode
& status
):UPerfTest(argc
, argv
, status
){
549 ord_posix_data
= NULL
;
553 ord_win_wcscmp
= NULL
;
554 ord_icu_strcmp
= NULL
;
555 ord_icu_cmpcpo
= NULL
;
557 if (U_FAILURE(status
)){
561 // Parse additional arguments
563 UOption options
[] = {
564 UOPTION_DEF("langid", 'i', UOPT_REQUIRES_ARG
), // Windows Language ID number.
565 UOPTION_DEF("rulefile", 'r', UOPT_REQUIRES_ARG
), // --rulefile <filename>
566 // Collation related arguments. All are optional.
567 // To simplify parsing, two choice arguments are disigned as NO_ARG.
568 // The default value is UPPER word in the comment
569 UOPTION_DEF("c_french", 'f', UOPT_NO_ARG
), // --french <on | OFF>
570 UOPTION_DEF("c_alternate", 'a', UOPT_NO_ARG
), // --alternate <NON_IGNORE | shifted>
571 UOPTION_DEF("c_casefirst", 'c', UOPT_REQUIRES_ARG
), // --casefirst <lower | upper | OFF>
572 UOPTION_DEF("c_caselevel", 'l', UOPT_NO_ARG
), // --caselevel <on | OFF>
573 UOPTION_DEF("c_normal", 'n', UOPT_NO_ARG
), // --normal <on | OFF>
574 UOPTION_DEF("c_strength", 's', UOPT_REQUIRES_ARG
), // --strength <1-5>
576 int32_t opt_len
= UPRV_LENGTHOF(options
);
577 enum {i
, r
,f
,a
,c
,l
,n
,s
}; // The buffer between the option items' order and their references
579 _remainingArgc
= u_parseArgs(_remainingArgc
, (char**)argv
, opt_len
, options
);
581 if (_remainingArgc
< 0){
582 status
= U_ILLEGAL_ARGUMENT_ERROR
;
587 locale
= "en_US"; // set default locale
590 #if U_PLATFORM_HAS_WIN32_API
591 if (options
[i
].doesOccur
) {
593 int tmp
= strtol(options
[i
].value
, &endp
, 0);
594 if (endp
== options
[i
].value
) {
595 status
= U_ILLEGAL_ARGUMENT_ERROR
;
598 win_langid
= MAKELCID(tmp
, SORT_DEFAULT
);
600 win_langid
= uloc_getLCID(locale
);
604 // Set up an ICU collator
605 if (options
[r
].doesOccur
) {
606 // TODO: implement it
608 col
= ucol_open(locale
, &status
);
609 if (U_FAILURE(status
)) {
614 if (options
[f
].doesOccur
) {
615 ucol_setAttribute(col
, UCOL_FRENCH_COLLATION
, UCOL_ON
, &status
);
617 ucol_setAttribute(col
, UCOL_FRENCH_COLLATION
, UCOL_OFF
, &status
);
620 if (options
[a
].doesOccur
) {
621 ucol_setAttribute(col
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
624 if (options
[c
].doesOccur
) { // strcmp() has i18n encoding problem
625 if (strcmp("lower", options
[c
].value
) == 0){
626 ucol_setAttribute(col
, UCOL_CASE_FIRST
, UCOL_LOWER_FIRST
, &status
);
627 } else if (strcmp("upper", options
[c
].value
) == 0) {
628 ucol_setAttribute(col
, UCOL_CASE_FIRST
, UCOL_UPPER_FIRST
, &status
);
630 status
= U_ILLEGAL_ARGUMENT_ERROR
;
635 if (options
[l
].doesOccur
){
636 ucol_setAttribute(col
, UCOL_CASE_LEVEL
, UCOL_ON
, &status
);
639 if (options
[n
].doesOccur
){
640 ucol_setAttribute(col
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
643 if (options
[s
].doesOccur
) {
645 int tmp
= strtol(options
[l
].value
, &endp
, 0);
646 if (endp
== options
[l
].value
) {
647 status
= U_ILLEGAL_ARGUMENT_ERROR
;
651 case 1: ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
); break;
652 case 2: ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_SECONDARY
, &status
); break;
653 case 3: ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_TERTIARY
, &status
); break;
654 case 4: ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_QUATERNARY
, &status
); break;
655 case 5: ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_IDENTICAL
, &status
); break;
656 default: status
= U_ILLEGAL_ARGUMENT_ERROR
; return;
662 //to avoid use the annoying 'id' in TESTCASE(id,test) macro or the like
663 #define TEST(testname, classname, arg1, arg2, arg3, arg4, arg5, arg6) \
667 UErrorCode status = U_ZERO_ERROR;\
668 UPerfFunction * t = new classname(status,arg1, arg2, arg3, arg4, arg5, arg6);\
669 if (U_FAILURE(status)) {\
682 virtual UPerfFunction* runIndexedTest( /*[in]*/int32_t index, /*[in]*/UBool exec, /*[out]*/const char* &name, /*[in]*/ char* par = NULL ){
685 #define TEST_KEYGEN(testname, func)\
686 TEST(testname, CmdKeyGen, col, win_langid, count, rnd_index, &CmdKeyGen::func, 0)
687 TEST_KEYGEN(TestIcu_KeyGen_null
, icu_key_null
);
688 TEST_KEYGEN(TestIcu_KeyGen_len
, icu_key_len
);
689 TEST_KEYGEN(TestPosix_KeyGen_null
, posix_key_null
);
690 #if U_PLATFORM_HAS_WIN32_API
691 TEST_KEYGEN(TestWin_KeyGen_null
, win_key_null
);
692 TEST_KEYGEN(TestWin_KeyGen_len
, win_key_len
);
695 #define TEST_ITER(testname, func)\
696 TEST(testname, CmdIter, col, count, icu_data, &CmdIter::func,0,0)
697 TEST_ITER(TestIcu_ForwardIter_null
, icu_forward_null
);
698 TEST_ITER(TestIcu_ForwardIter_len
, icu_forward_len
);
699 TEST_ITER(TestIcu_BackwardIter_null
, icu_backward_null
);
700 TEST_ITER(TestIcu_BackwardIter_len
, icu_backward_len
);
702 #define TEST_ITER_ALL(testname, func)\
703 TEST(testname, CmdIterAll, col, icu_data_all_len, icu_data_all, CmdIterAll::func,0,0)
704 TEST_ITER_ALL(TestIcu_ForwardIter_all_null
, forward_null
);
705 TEST_ITER_ALL(TestIcu_ForwardIter_all_len
, forward_len
);
706 TEST_ITER_ALL(TestIcu_BackwardIter_all_null
, backward_null
);
707 TEST_ITER_ALL(TestIcu_BackwardIter_all_len
, backward_len
);
709 #define TEST_QSORT(testname, func)\
710 TEST(testname, CmdQsort, rnd_index, count, sizeof(DataIndex), CmdQsort::func,0,0)
711 TEST_QSORT(TestIcu_qsort_strcoll_null
, icu_strcoll_null
);
712 TEST_QSORT(TestIcu_qsort_strcoll_len
, icu_strcoll_len
);
713 TEST_QSORT(TestIcu_qsort_usekey
, icu_cmpkey
);
714 TEST_QSORT(TestPosix_qsort_strcoll_null
, posix_strcoll_null
);
715 TEST_QSORT(TestPosix_qsort_usekey
, posix_cmpkey
);
716 #if U_PLATFORM_HAS_WIN32_API
717 TEST_QSORT(TestWin_qsort_CompareStringW_null
, win_cmp_null
);
718 TEST_QSORT(TestWin_qsort_CompareStringW_len
, win_cmp_len
);
719 TEST_QSORT(TestWin_qsort_usekey
, win_cmpkey
);
722 #define TEST_BIN(testname, func)\
723 TEST(testname, CmdBinSearch, col, win_langid, count, rnd_index, ord_icu_key, &CmdBinSearch::func)
724 TEST_BIN(TestIcu_BinarySearch_strcoll_null
, icu_strcoll_null
);
725 TEST_BIN(TestIcu_BinarySearch_strcoll_len
, icu_strcoll_len
);
726 TEST_BIN(TestIcu_BinarySearch_usekey
, icu_cmpkey
);
727 TEST_BIN(TestIcu_BinarySearch_strcmp
, icu_strcmp
);
728 TEST_BIN(TestIcu_BinarySearch_cmpCPO
, icu_cmpcpo
);
729 TEST_BIN(TestPosix_BinarySearch_strcoll_null
, posix_strcoll_null
);
730 TEST_BIN(TestPosix_BinarySearch_usekey
, posix_cmpkey
);
731 #if U_PLATFORM_HAS_WIN32_API
732 TEST_BIN(TestWin_BinarySearch_CompareStringW_null
, win_cmp_null
);
733 TEST_BIN(TestWin_BinarySearch_CompareStringW_len
, win_cmp_len
);
735 TEST_BIN(TestWin_BinarySearch_usekey
, win_cmpkey
);
736 TEST_BIN(TestWin_BinarySearch_wcscmp
, win_wcscmp
);
744 void prepareData(UErrorCode
& status
){
745 if(U_FAILURE(status
)) return;
746 if (icu_data
) return; // prepared
748 icu_data
= new CA_uchar();
750 // Following code is borrowed from UPerfTest::getLines();
751 const UChar
* line
=NULL
;
754 line
= ucbuf_readline(ucharBuf
,&len
,&status
);
755 if(line
== NULL
|| U_FAILURE(status
)){break;}
757 // Refer to the source code of ucbuf_readline()
758 // 1. 'len' includs the line terminal symbols
759 // 2. The length of the line terminal symbols is only one character
760 // 3. The Windows CR LF line terminal symbols will be converted to CR
763 continue; //skip empty line
765 icu_data
->append_one(len
);
766 memcpy(icu_data
->last(), line
, len
* sizeof(UChar
));
767 icu_data
->last()[len
-1] = NULL
;
770 if(U_FAILURE(status
)) return;
772 // UTF-16 -> UTF-8 conversion.
773 UConverter
*conv
= ucnv_open("utf-8", &status
); // just UTF-8 for now.
774 if (U_FAILURE(status
)) return;
776 count
= icu_data
->count
;
778 icu_data_all_len
= icu_data
->index
[count
]; // includes all NULLs
779 icu_data_all_len
-= count
; // excludes all NULLs
780 icu_data_all_len
+= 1; // the terminal NULL
781 icu_data_all
= new UChar
[icu_data_all_len
];
782 icu_data_all
[icu_data_all_len
- 1] = 0; //the terminal NULL
784 icu_key
= new CA_uint8
;
785 win_data
= new CA_win_wchar
;
786 win_key
= new CA_char
;
787 posix_data
= new CA_char
;
788 posix_key
= new CA_char
;
789 rnd_index
= new DataIndex
[count
];
790 DataIndex::win_langid
= win_langid
;
791 DataIndex::col
= col
;
794 UChar
* p
= icu_data_all
;
797 for (int i
=0; i
< count
; i
++) {
799 s
= sizeof(UChar
) * icu_data
->lengthOf(i
);
800 memcpy(p
, icu_data
->dataOf(i
), s
);
801 p
+= icu_data
->lengthOf(i
);
806 s
= ucol_getSortKey(col
, icu_data
->dataOf(i
), -1,NULL
, 0);
807 icu_key
->append_one(s
);
808 t
= ucol_getSortKey(col
, icu_data
->dataOf(i
), -1,icu_key
->last(), s
);
809 if (t
!= s
) {status
= U_INVALID_FORMAT_ERROR
;return;}
812 s
= ucnv_fromUChars(conv
,NULL
, 0, icu_data
->dataOf(i
), icu_data
->lengthOf(i
), &status
);
813 if (status
== U_BUFFER_OVERFLOW_ERROR
|| status
== U_ZERO_ERROR
){
814 status
= U_ZERO_ERROR
;
818 posix_data
->append_one(s
+ 1); // plus terminal NULL
819 t
= ucnv_fromUChars(conv
,posix_data
->last(), s
, icu_data
->dataOf(i
), icu_data
->lengthOf(i
), &status
);
820 if (U_FAILURE(status
)) return;
821 if ( t
!= s
){status
= U_INVALID_FORMAT_ERROR
;return;}
822 posix_data
->last()[s
] = 0;
825 s
= strxfrm(NULL
, posix_data
->dataOf(i
), 0);
826 if (s
== INT_MAX
){status
= U_INVALID_FORMAT_ERROR
;return;}
827 posix_key
->append_one(s
);
828 t
= strxfrm(posix_key
->last(), posix_data
->dataOf(i
), s
);
829 if (t
!= s
) {status
= U_INVALID_FORMAT_ERROR
;return;}
831 #if U_PLATFORM_HAS_WIN32_API
833 s
= icu_data
->lengthOf(i
) + 1; // plus terminal NULL
834 win_data
->append_one(s
);
835 memcpy(win_data
->last(), icu_data
->dataOf(i
), sizeof(WCHAR
) * s
);
838 s
= LCMapStringW(win_langid
, LCMAP_SORTKEY
, win_data
->dataOf(i
), win_data
->lengthOf(i
), NULL
,0);
839 if (s
== 0) {status
= U_INVALID_FORMAT_ERROR
;return;}
840 win_key
->append_one(s
);
841 t
= LCMapStringW(win_langid
, LCMAP_SORTKEY
, win_data
->dataOf(i
), win_data
->lengthOf(i
), (WCHAR
*)(win_key
->last()),s
);
842 if (t
!= s
) {status
= U_INVALID_FORMAT_ERROR
;return;}
846 // append_one() will make points shifting, should not merge following code into previous iteration
847 for (int i
=0; i
< count
; i
++) {
848 rnd_index
[i
].icu_key
= icu_key
->dataOf(i
);
849 rnd_index
[i
].icu_data
= icu_data
->dataOf(i
);
850 rnd_index
[i
].icu_data_len
= icu_data
->lengthOf(i
);
851 rnd_index
[i
].posix_key
= posix_key
->last();
852 rnd_index
[i
].posix_data
= posix_data
->dataOf(i
);
853 rnd_index
[i
].posix_data_len
= posix_data
->lengthOf(i
);
854 #if U_PLATFORM_HAS_WIN32_API
855 rnd_index
[i
].win_key
= win_key
->dataOf(i
);
856 rnd_index
[i
].win_data
= win_data
->dataOf(i
);
857 rnd_index
[i
].win_data_len
= win_data
->lengthOf(i
);
862 qsort(rnd_index
, count
, sizeof(DataIndex
), CmdQsort::q_random
);
864 #define SORT(data, func) \
865 data = new DataIndex[count];\
866 memcpy(data, rnd_index, count * sizeof(DataIndex));\
867 qsort(data, count, sizeof(DataIndex), CmdQsort::func)
869 SORT(ord_icu_data
, icu_strcoll_len
);
870 SORT(ord_icu_key
, icu_cmpkey
);
871 SORT(ord_posix_data
, posix_strcoll_null
);
872 SORT(ord_posix_key
, posix_cmpkey
);
873 #if U_PLATFORM_HAS_WIN32_API
874 SORT(ord_win_data
, win_cmp_len
);
875 SORT(ord_win_key
, win_cmpkey
);
876 SORT(ord_win_wcscmp
, win_wcscmp
);
878 SORT(ord_icu_strcmp
, icu_strcmp
);
879 SORT(ord_icu_cmpcpo
, icu_cmpcpo
);
884 int main(int argc
, const char *argv
[])
887 UErrorCode status
= U_ZERO_ERROR
;
888 CollPerfTest
test(argc
, argv
, status
);
890 if (U_FAILURE(status
)){
891 printf("The error is %s\n", u_errorName(status
));
892 //TODO: print usage here
896 if (test
.run() == FALSE
){
897 fprintf(stderr
, "FAILED: Tests could not be run please check the "