1 /********************************************************************
3 * Copyright (C) 2001-2012 IBM, Inc. All Rights Reserved.
5 ********************************************************************/
12 #include "unicode/uperf.h"
14 #include "unicode/coll.h"
15 #include <unicode/ucoleitr.h>
17 #if !U_PLATFORM_HAS_WIN32_API
18 #define DWORD uint32_t
22 /* To store an array of string<UNIT> in continue space.
23 Since string<UNIT> itself is treated as an array of UNIT, this
24 class will ease our memory management for an array of string<UNIT>.
27 //template<typename UNIT>
28 #define COMPATCT_ARRAY(CompactArrays, UNIT) \
29 struct CompactArrays{\
30 CompactArrays(const CompactArrays & );\
31 CompactArrays & operator=(const CompactArrays & );\
32 int32_t count;/*total number of the strings*/ \
33 int32_t * index;/*relative offset in data*/ \
34 UNIT * data; /*the real space to hold strings*/ \
36 ~CompactArrays(){free(index);free(data);} \
37 CompactArrays():data(NULL), index(NULL), count(0){ \
38 index = (int32_t *) realloc(index, sizeof(int32_t)); \
41 void append_one(int32_t theLen){ /*include terminal NULL*/ \
43 index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \
44 index[count] = index[count - 1] + theLen; \
45 data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \
47 UNIT * last(){return data + index[count - 1];} \
48 UNIT * dataOf(int32_t i){return data + index[i];} \
49 int32_t lengthOf(int i){return index[i+1] - index[i] - 1; } /*exclude terminating NULL*/ \
52 //typedef CompactArrays<UChar> CA_uchar;
53 //typedef CompactArrays<char> CA_char;
54 //typedef CompactArrays<uint8_t> CA_uint8;
55 //typedef CompactArrays<WCHAR> CA_win_wchar;
57 COMPATCT_ARRAY(CA_uchar
, UChar
)
58 COMPATCT_ARRAY(CA_char
, char)
59 COMPATCT_ARRAY(CA_uint8
, uint8_t)
60 COMPATCT_ARRAY(CA_win_wchar
, WCHAR
)
64 static DWORD win_langid
; // for qsort callback function
65 static UCollator
* col
; // for qsort callback function
71 int32_t posix_data_len
;
76 DWORD
DataIndex::win_langid
;
77 UCollator
* DataIndex::col
;
81 class CmdKeyGen
: public UPerfFunction
{
82 typedef void (CmdKeyGen::* Func
)(int32_t);
83 enum{MAX_KEY_LENGTH
= 5000};
90 union { // to save sapce
91 uint8_t icu_key
[MAX_KEY_LENGTH
];
92 char posix_key
[MAX_KEY_LENGTH
];
93 WCHAR win_key
[MAX_KEY_LENGTH
];
96 CmdKeyGen(UErrorCode
, UCollator
* col
,DWORD win_langid
, int32_t count
, DataIndex
* data
,Func fn
,int32_t)
97 :col(col
),win_langid(win_langid
), count(count
), data(data
), fn(fn
){}
99 virtual long getOperationsPerIteration(){return count
;}
101 virtual void call(UErrorCode
* status
){
102 for(int32_t i
= 0; i
< count
; i
++){
107 void icu_key_null(int32_t i
){
108 ucol_getSortKey(col
, data
[i
].icu_data
, -1, icu_key
, MAX_KEY_LENGTH
);
111 void icu_key_len(int32_t i
){
112 ucol_getSortKey(col
, data
[i
].icu_data
, data
[i
].icu_data_len
, icu_key
, MAX_KEY_LENGTH
);
115 #if U_PLATFORM_HAS_WIN32_API
116 // pre-generated in CollPerfTest::prepareData(), need not to check error here
117 void win_key_null(int32_t i
){
118 //LCMAP_SORTsk 0x00000400 // WC sort sk (normalize)
119 LCMapStringW(win_langid
, LCMAP_SORTKEY
, data
[i
].win_data
, -1, win_key
, MAX_KEY_LENGTH
);
122 void win_key_len(int32_t i
){
123 LCMapStringW(win_langid
, LCMAP_SORTKEY
, data
[i
].win_data
, data
[i
].win_data_len
, win_key
, MAX_KEY_LENGTH
);
127 void posix_key_null(int32_t i
){
128 strxfrm(posix_key
, data
[i
].posix_data
, MAX_KEY_LENGTH
);
133 class CmdIter
: public UPerfFunction
{
134 typedef void (CmdIter::* Func
)(UErrorCode
* , int32_t );
138 UCollationElements
*iter
;
141 CmdIter(UErrorCode
& status
, UCollator
* col
, int32_t count
, CA_uchar
*data
, Func fn
, int32_t,int32_t)
142 :count(count
), data(data
), fn(fn
){
144 UChar dummytext
[] = {0, 0};
145 iter
= ucol_openElements(col
, NULL
, 0, &status
);
146 ucol_setText(iter
, dummytext
, 1, &status
);
149 ucol_closeElements(iter
);
152 virtual long getOperationsPerIteration(){return exec_count
? exec_count
: 1;}
154 virtual void call(UErrorCode
* status
){
156 for(int32_t i
= 0; i
< count
; i
++){
157 (this->*fn
)(status
, i
);
161 void icu_forward_null(UErrorCode
* status
, int32_t i
){
162 ucol_setText(iter
, data
->dataOf(i
), -1, status
);
163 while (ucol_next(iter
, status
) != UCOL_NULLORDER
) exec_count
++;
166 void icu_forward_len(UErrorCode
* status
, int32_t i
){
167 ucol_setText(iter
, data
->dataOf(i
), data
->lengthOf(i
) , status
);
168 while (ucol_next(iter
, status
) != UCOL_NULLORDER
) exec_count
++;
171 void icu_backward_null(UErrorCode
* status
, int32_t i
){
172 ucol_setText(iter
, data
->dataOf(i
), -1, status
);
173 while (ucol_previous(iter
, status
) != UCOL_NULLORDER
) exec_count
++;
176 void icu_backward_len(UErrorCode
* status
, int32_t i
){
177 ucol_setText(iter
, data
->dataOf(i
), data
->lengthOf(i
) , status
);
178 while (ucol_previous(iter
, status
) != UCOL_NULLORDER
) exec_count
++;
182 class CmdIterAll
: public UPerfFunction
{
183 typedef void (CmdIterAll::* Func
)(UErrorCode
* status
);
187 UCollationElements
*iter
;
191 enum CALL
{forward_null
, forward_len
, backward_null
, backward_len
};
194 ucol_closeElements(iter
);
196 CmdIterAll(UErrorCode
& status
, UCollator
* col
, int32_t count
, UChar
* data
, CALL call
,int32_t,int32_t)
197 :count(count
),data(data
)
200 if (call
== forward_null
|| call
== backward_null
) {
201 iter
= ucol_openElements(col
, data
, -1, &status
);
203 iter
= ucol_openElements(col
, data
, count
, &status
);
206 if (call
== forward_null
|| call
== forward_len
){
207 fn
= &CmdIterAll::icu_forward_all
;
209 fn
= &CmdIterAll::icu_backward_all
;
212 virtual long getOperationsPerIteration(){return exec_count
? exec_count
: 1;}
214 virtual void call(UErrorCode
* status
){
218 void icu_forward_all(UErrorCode
* status
){
219 int strlen
= count
- 5;
222 ucol_setOffset(iter
, strindex
, status
);
224 if (ucol_next(iter
, status
) == UCOL_NULLORDER
) {
231 if (strindex
> strlen
) {
234 ucol_setOffset(iter
, strindex
, status
);
240 void icu_backward_all(UErrorCode
* status
){
244 ucol_setOffset(iter
, strindex
, status
);
246 if (ucol_previous(iter
, status
) == UCOL_NULLORDER
) {
253 if (strindex
> strlen
) {
256 ucol_setOffset(iter
, strindex
, status
);
264 struct CmdQsort
: public UPerfFunction
{
266 static int q_random(const void * a
, const void * b
){
267 uint8_t * key_a
= ((DataIndex
*)a
)->icu_key
;
268 uint8_t * key_b
= ((DataIndex
*)b
)->icu_key
;
272 while (*key_a
!= 0) {val_a
+= val_a
*37 + *key_a
++;}
273 while (*key_b
!= 0) {val_b
+= val_b
*37 + *key_b
++;}
274 return val_a
- val_b
;
278 DataIndex * da = (DataIndex *) a; \
279 DataIndex * db = (DataIndex *) b; \
282 static int icu_strcoll_null(const void *a
, const void *b
){
284 return ucol_strcoll(da
->col
, da
->icu_data
, -1, db
->icu_data
, -1) - UCOL_EQUAL
;
287 static int icu_strcoll_len(const void *a
, const void *b
){
289 return ucol_strcoll(da
->col
, da
->icu_data
, da
->icu_data_len
, db
->icu_data
, db
->icu_data_len
) - UCOL_EQUAL
;
292 static int icu_cmpkey (const void *a
, const void *b
){
294 return strcmp((char *) da
->icu_key
, (char *) db
->icu_key
);
297 #if U_PLATFORM_HAS_WIN32_API
298 static int win_cmp_null(const void *a
, const void *b
) {
302 //CSTR_GREATER_THAN 3
303 int t
= CompareStringW(da
->win_langid
, 0, da
->win_data
, -1, db
->win_data
, -1);
305 fprintf(stderr
, "CompareStringW error, error number %x\n", GetLastError());
308 return t
- CSTR_EQUAL
;
312 static int win_cmp_len(const void *a
, const void *b
) {
314 int t
= CompareStringW(da
->win_langid
, 0, da
->win_data
, da
->win_data_len
, db
->win_data
, db
->win_data_len
);
316 fprintf(stderr
, "CompareStringW error, error number %x\n", GetLastError());
319 return t
- CSTR_EQUAL
;
324 #define QFUNC(name, func, data) \
325 static int name (const void *a, const void *b){ \
327 return func(da->data, db->data); \
330 QFUNC(posix_strcoll_null
, strcoll
, posix_data
)
331 QFUNC(posix_cmpkey
, strcmp
, posix_key
)
332 #if U_PLATFORM_HAS_WIN32_API
333 QFUNC(win_cmpkey
, strcmp
, win_key
)
334 QFUNC(win_wcscmp
, wcscmp
, win_data
)
336 QFUNC(icu_strcmp
, u_strcmp
, icu_data
)
337 QFUNC(icu_cmpcpo
, u_strcmpCodePointOrder
, icu_data
)
340 static int32_t exec_count
; // potential muilt-thread problem
342 typedef int (* Func
)(const void *, const void *);
345 void * base
; //Start of target array.
346 int32_t num
; //Array size in elements.
347 int32_t width
; //Element size in bytes.
349 void * backup
; //copy source of base
351 CmdQsort(UErrorCode
& status
,void *theBase
, int32_t num
, int32_t width
, Func fn
, int32_t,int32_t)
352 :backup(theBase
),num(num
),width(width
),fn(fn
){
353 base
= malloc(num
* width
);
354 time_empty(100, &status
); // warm memory/cache
363 memcpy(base
, backup
, num
* width
);
366 double time_empty(int32_t n
, UErrorCode
* status
) {
368 utimer_getTime(&start
);
372 utimer_getTime(&stop
);
373 return utimer_getDeltaSeconds(&start
,&stop
); // ms
376 virtual void call(UErrorCode
* status
){
378 memcpy(base
, backup
, num
* width
);
379 qsort(base
, num
, width
, fn
);
381 virtual double time(int32_t n
, UErrorCode
* status
) {
382 double t1
= time_empty(n
,status
);
383 double t2
= UPerfFunction::time(n
, status
);
384 return t2
-t1
;// < 0 ? t2 : t2-t1;
387 virtual long getOperationsPerIteration(){ return exec_count
?exec_count
:1;}
389 int32_t CmdQsort::exec_count
;
392 class CmdBinSearch
: public UPerfFunction
{
394 typedef int (CmdBinSearch::* Func
)(int, int);
404 CmdBinSearch(UErrorCode
, UCollator
* col
,DWORD win_langid
,int32_t count
,DataIndex
* rnd
,DataIndex
* ord
,Func fn
)
405 :col(col
),win_langid(win_langid
), count(count
), rnd(rnd
), ord(ord
), fn(fn
),exec_count(0){}
408 virtual void call(UErrorCode
* status
){
410 for(int32_t i
= 0; i
< count
; i
++){ // search all data
414 virtual long getOperationsPerIteration(){ return exec_count
?exec_count
:1;}
416 void binary_search(int32_t random
) {
418 int high
= count
- 1;
423 guess
= (high
+ low
)/2;
424 if (last_guess
== guess
) break; // nothing to search
426 r
= (this->*fn
)(random
, guess
);
430 return; // found, search end.
440 int icu_strcoll_null(int32_t i
, int32_t j
){
441 return ucol_strcoll(col
, rnd
[i
].icu_data
, -1, ord
[j
].icu_data
,-1);
444 int icu_strcoll_len(int32_t i
, int32_t j
){
445 return ucol_strcoll(col
, rnd
[i
].icu_data
, rnd
[i
].icu_data_len
, ord
[j
].icu_data
, ord
[j
].icu_data_len
);
448 int icu_cmpkey(int32_t i
, int32_t j
) {
449 return strcmp( (char *) rnd
[i
].icu_key
, (char *) ord
[j
].icu_key
);
452 #if U_PLATFORM_HAS_WIN32_API
453 int win_cmp_null(int32_t i
, int32_t j
) {
454 int t
= CompareStringW(win_langid
, 0, rnd
[i
].win_data
, -1, ord
[j
].win_data
, -1);
456 fprintf(stderr
, "CompareStringW error, error number %x\n", GetLastError());
459 return t
- CSTR_EQUAL
;
463 int win_cmp_len(int32_t i
, int32_t j
) {
464 int t
= CompareStringW(win_langid
, 0, rnd
[i
].win_data
, rnd
[i
].win_data_len
, ord
[j
].win_data
, ord
[j
].win_data_len
);
466 fprintf(stderr
, "CompareStringW error, error number %x\n", GetLastError());
469 return t
- CSTR_EQUAL
;
474 #define BFUNC(name, func, data) \
475 int name(int32_t i, int32_t j) { \
476 return func(rnd[i].data, ord[j].data); \
479 BFUNC(posix_strcoll_null
, strcoll
, posix_data
)
480 BFUNC(posix_cmpkey
, strcmp
, posix_key
)
481 BFUNC(win_cmpkey
, strcmp
, win_key
)
482 BFUNC(win_wcscmp
, wcscmp
, win_data
)
483 BFUNC(icu_strcmp
, u_strcmp
, icu_data
)
484 BFUNC(icu_cmpcpo
, u_strcmpCodePointOrder
, icu_data
)
487 class CollPerfTest
: public UPerfTest
{
492 UChar
* icu_data_all
;
493 int32_t icu_data_all_len
;
498 CA_char
* posix_data
;
500 CA_win_wchar
* win_data
;
503 DataIndex
* rnd_index
; // random by icu key
504 DataIndex
* ord_win_data
;
505 DataIndex
* ord_win_key
;
506 DataIndex
* ord_posix_data
;
507 DataIndex
* ord_posix_key
;
508 DataIndex
* ord_icu_data
;
509 DataIndex
* ord_icu_key
;
510 DataIndex
* ord_win_wcscmp
;
511 DataIndex
* ord_icu_strcmp
;
512 DataIndex
* ord_icu_cmpcpo
;
514 virtual ~CollPerfTest(){
516 delete [] icu_data_all
;
524 delete[] ord_win_data
;
525 delete[] ord_win_key
;
526 delete[] ord_posix_data
;
527 delete[] ord_posix_key
;
528 delete[] ord_icu_data
;
529 delete[] ord_icu_key
;
530 delete[] ord_win_wcscmp
;
531 delete[] ord_icu_strcmp
;
532 delete[] ord_icu_cmpcpo
;
535 CollPerfTest(int32_t argc
, const char* argv
[], UErrorCode
& status
):UPerfTest(argc
, argv
, status
){
548 ord_posix_data
= NULL
;
552 ord_win_wcscmp
= NULL
;
553 ord_icu_strcmp
= NULL
;
554 ord_icu_cmpcpo
= NULL
;
556 if (U_FAILURE(status
)){
560 // Parse additional arguments
562 UOption options
[] = {
563 UOPTION_DEF("langid", 'i', UOPT_REQUIRES_ARG
), // Windows Language ID number.
564 UOPTION_DEF("rulefile", 'r', UOPT_REQUIRES_ARG
), // --rulefile <filename>
565 // Collation related arguments. All are optional.
566 // To simplify parsing, two choice arguments are disigned as NO_ARG.
567 // The default value is UPPER word in the comment
568 UOPTION_DEF("c_french", 'f', UOPT_NO_ARG
), // --french <on | OFF>
569 UOPTION_DEF("c_alternate", 'a', UOPT_NO_ARG
), // --alternate <NON_IGNORE | shifted>
570 UOPTION_DEF("c_casefirst", 'c', UOPT_REQUIRES_ARG
), // --casefirst <lower | upper | OFF>
571 UOPTION_DEF("c_caselevel", 'l', UOPT_NO_ARG
), // --caselevel <on | OFF>
572 UOPTION_DEF("c_normal", 'n', UOPT_NO_ARG
), // --normal <on | OFF>
573 UOPTION_DEF("c_strength", 's', UOPT_REQUIRES_ARG
), // --strength <1-5>
575 int32_t opt_len
= (sizeof(options
)/sizeof(options
[0]));
576 enum {i
, r
,f
,a
,c
,l
,n
,s
}; // The buffer between the option items' order and their references
578 _remainingArgc
= u_parseArgs(_remainingArgc
, (char**)argv
, opt_len
, options
);
580 if (_remainingArgc
< 0){
581 status
= U_ILLEGAL_ARGUMENT_ERROR
;
586 locale
= "en_US"; // set default locale
589 #if U_PLATFORM_HAS_WIN32_API
590 if (options
[i
].doesOccur
) {
592 int tmp
= strtol(options
[i
].value
, &endp
, 0);
593 if (endp
== options
[i
].value
) {
594 status
= U_ILLEGAL_ARGUMENT_ERROR
;
597 win_langid
= MAKELCID(tmp
, SORT_DEFAULT
);
599 win_langid
= uloc_getLCID(locale
);
603 // Set up an ICU collator
604 if (options
[r
].doesOccur
) {
605 // TODO: implement it
607 col
= ucol_open(locale
, &status
);
608 if (U_FAILURE(status
)) {
613 if (options
[f
].doesOccur
) {
614 ucol_setAttribute(col
, UCOL_FRENCH_COLLATION
, UCOL_ON
, &status
);
616 ucol_setAttribute(col
, UCOL_FRENCH_COLLATION
, UCOL_OFF
, &status
);
619 if (options
[a
].doesOccur
) {
620 ucol_setAttribute(col
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
623 if (options
[c
].doesOccur
) { // strcmp() has i18n encoding problem
624 if (strcmp("lower", options
[c
].value
) == 0){
625 ucol_setAttribute(col
, UCOL_CASE_FIRST
, UCOL_LOWER_FIRST
, &status
);
626 } else if (strcmp("upper", options
[c
].value
) == 0) {
627 ucol_setAttribute(col
, UCOL_CASE_FIRST
, UCOL_UPPER_FIRST
, &status
);
629 status
= U_ILLEGAL_ARGUMENT_ERROR
;
634 if (options
[l
].doesOccur
){
635 ucol_setAttribute(col
, UCOL_CASE_LEVEL
, UCOL_ON
, &status
);
638 if (options
[n
].doesOccur
){
639 ucol_setAttribute(col
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
642 if (options
[s
].doesOccur
) {
644 int tmp
= strtol(options
[l
].value
, &endp
, 0);
645 if (endp
== options
[l
].value
) {
646 status
= U_ILLEGAL_ARGUMENT_ERROR
;
650 case 1: ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
); break;
651 case 2: ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_SECONDARY
, &status
); break;
652 case 3: ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_TERTIARY
, &status
); break;
653 case 4: ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_QUATERNARY
, &status
); break;
654 case 5: ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_IDENTICAL
, &status
); break;
655 default: status
= U_ILLEGAL_ARGUMENT_ERROR
; return;
661 //to avoid use the annoying 'id' in TESTCASE(id,test) macro or the like
662 #define TEST(testname, classname, arg1, arg2, arg3, arg4, arg5, arg6) \
666 UErrorCode status = U_ZERO_ERROR;\
667 UPerfFunction * t = new classname(status,arg1, arg2, arg3, arg4, arg5, arg6);\
668 if (U_FAILURE(status)) {\
681 virtual UPerfFunction* runIndexedTest( /*[in]*/int32_t index, /*[in]*/UBool exec, /*[out]*/const char* &name, /*[in]*/ char* par = NULL ){
684 #define TEST_KEYGEN(testname, func)\
685 TEST(testname, CmdKeyGen, col, win_langid, count, rnd_index, &CmdKeyGen::func, 0)
686 TEST_KEYGEN(TestIcu_KeyGen_null
, icu_key_null
);
687 TEST_KEYGEN(TestIcu_KeyGen_len
, icu_key_len
);
688 TEST_KEYGEN(TestPosix_KeyGen_null
, posix_key_null
);
689 #if U_PLATFORM_HAS_WIN32_API
690 TEST_KEYGEN(TestWin_KeyGen_null
, win_key_null
);
691 TEST_KEYGEN(TestWin_KeyGen_len
, win_key_len
);
694 #define TEST_ITER(testname, func)\
695 TEST(testname, CmdIter, col, count, icu_data, &CmdIter::func,0,0)
696 TEST_ITER(TestIcu_ForwardIter_null
, icu_forward_null
);
697 TEST_ITER(TestIcu_ForwardIter_len
, icu_forward_len
);
698 TEST_ITER(TestIcu_BackwardIter_null
, icu_backward_null
);
699 TEST_ITER(TestIcu_BackwardIter_len
, icu_backward_len
);
701 #define TEST_ITER_ALL(testname, func)\
702 TEST(testname, CmdIterAll, col, icu_data_all_len, icu_data_all, CmdIterAll::func,0,0)
703 TEST_ITER_ALL(TestIcu_ForwardIter_all_null
, forward_null
);
704 TEST_ITER_ALL(TestIcu_ForwardIter_all_len
, forward_len
);
705 TEST_ITER_ALL(TestIcu_BackwardIter_all_null
, backward_null
);
706 TEST_ITER_ALL(TestIcu_BackwardIter_all_len
, backward_len
);
708 #define TEST_QSORT(testname, func)\
709 TEST(testname, CmdQsort, rnd_index, count, sizeof(DataIndex), CmdQsort::func,0,0)
710 TEST_QSORT(TestIcu_qsort_strcoll_null
, icu_strcoll_null
);
711 TEST_QSORT(TestIcu_qsort_strcoll_len
, icu_strcoll_len
);
712 TEST_QSORT(TestIcu_qsort_usekey
, icu_cmpkey
);
713 TEST_QSORT(TestPosix_qsort_strcoll_null
, posix_strcoll_null
);
714 TEST_QSORT(TestPosix_qsort_usekey
, posix_cmpkey
);
715 #if U_PLATFORM_HAS_WIN32_API
716 TEST_QSORT(TestWin_qsort_CompareStringW_null
, win_cmp_null
);
717 TEST_QSORT(TestWin_qsort_CompareStringW_len
, win_cmp_len
);
718 TEST_QSORT(TestWin_qsort_usekey
, win_cmpkey
);
721 #define TEST_BIN(testname, func)\
722 TEST(testname, CmdBinSearch, col, win_langid, count, rnd_index, ord_icu_key, &CmdBinSearch::func)
723 TEST_BIN(TestIcu_BinarySearch_strcoll_null
, icu_strcoll_null
);
724 TEST_BIN(TestIcu_BinarySearch_strcoll_len
, icu_strcoll_len
);
725 TEST_BIN(TestIcu_BinarySearch_usekey
, icu_cmpkey
);
726 TEST_BIN(TestIcu_BinarySearch_strcmp
, icu_strcmp
);
727 TEST_BIN(TestIcu_BinarySearch_cmpCPO
, icu_cmpcpo
);
728 TEST_BIN(TestPosix_BinarySearch_strcoll_null
, posix_strcoll_null
);
729 TEST_BIN(TestPosix_BinarySearch_usekey
, posix_cmpkey
);
730 #if U_PLATFORM_HAS_WIN32_API
731 TEST_BIN(TestWin_BinarySearch_CompareStringW_null
, win_cmp_null
);
732 TEST_BIN(TestWin_BinarySearch_CompareStringW_len
, win_cmp_len
);
734 TEST_BIN(TestWin_BinarySearch_usekey
, win_cmpkey
);
735 TEST_BIN(TestWin_BinarySearch_wcscmp
, win_wcscmp
);
743 void prepareData(UErrorCode
& status
){
744 if(U_FAILURE(status
)) return;
745 if (icu_data
) return; // prepared
747 icu_data
= new CA_uchar();
749 // Following code is borrowed from UPerfTest::getLines();
750 const UChar
* line
=NULL
;
753 line
= ucbuf_readline(ucharBuf
,&len
,&status
);
754 if(line
== NULL
|| U_FAILURE(status
)){break;}
756 // Refer to the source code of ucbuf_readline()
757 // 1. 'len' includs the line terminal symbols
758 // 2. The length of the line terminal symbols is only one character
759 // 3. The Windows CR LF line terminal symbols will be converted to CR
762 continue; //skip empty line
764 icu_data
->append_one(len
);
765 memcpy(icu_data
->last(), line
, len
* sizeof(UChar
));
766 icu_data
->last()[len
-1] = NULL
;
769 if(U_FAILURE(status
)) return;
771 // UTF-16 -> UTF-8 conversion.
772 UConverter
*conv
= ucnv_open("utf-8", &status
); // just UTF-8 for now.
773 if (U_FAILURE(status
)) return;
775 count
= icu_data
->count
;
777 icu_data_all_len
= icu_data
->index
[count
]; // includes all NULLs
778 icu_data_all_len
-= count
; // excludes all NULLs
779 icu_data_all_len
+= 1; // the terminal NULL
780 icu_data_all
= new UChar
[icu_data_all_len
];
781 icu_data_all
[icu_data_all_len
- 1] = 0; //the terminal NULL
783 icu_key
= new CA_uint8
;
784 win_data
= new CA_win_wchar
;
785 win_key
= new CA_char
;
786 posix_data
= new CA_char
;
787 posix_key
= new CA_char
;
788 rnd_index
= new DataIndex
[count
];
789 DataIndex::win_langid
= win_langid
;
790 DataIndex::col
= col
;
793 UChar
* p
= icu_data_all
;
796 for (int i
=0; i
< count
; i
++) {
798 s
= sizeof(UChar
) * icu_data
->lengthOf(i
);
799 memcpy(p
, icu_data
->dataOf(i
), s
);
800 p
+= icu_data
->lengthOf(i
);
805 s
= ucol_getSortKey(col
, icu_data
->dataOf(i
), -1,NULL
, 0);
806 icu_key
->append_one(s
);
807 t
= ucol_getSortKey(col
, icu_data
->dataOf(i
), -1,icu_key
->last(), s
);
808 if (t
!= s
) {status
= U_INVALID_FORMAT_ERROR
;return;}
811 s
= ucnv_fromUChars(conv
,NULL
, 0, icu_data
->dataOf(i
), icu_data
->lengthOf(i
), &status
);
812 if (status
== U_BUFFER_OVERFLOW_ERROR
|| status
== U_ZERO_ERROR
){
813 status
= U_ZERO_ERROR
;
817 posix_data
->append_one(s
+ 1); // plus terminal NULL
818 t
= ucnv_fromUChars(conv
,posix_data
->last(), s
, icu_data
->dataOf(i
), icu_data
->lengthOf(i
), &status
);
819 if (U_FAILURE(status
)) return;
820 if ( t
!= s
){status
= U_INVALID_FORMAT_ERROR
;return;}
821 posix_data
->last()[s
] = 0;
824 s
= strxfrm(NULL
, posix_data
->dataOf(i
), 0);
825 if (s
== INT_MAX
){status
= U_INVALID_FORMAT_ERROR
;return;}
826 posix_key
->append_one(s
);
827 t
= strxfrm(posix_key
->last(), posix_data
->dataOf(i
), s
);
828 if (t
!= s
) {status
= U_INVALID_FORMAT_ERROR
;return;}
830 #if U_PLATFORM_HAS_WIN32_API
832 s
= icu_data
->lengthOf(i
) + 1; // plus terminal NULL
833 win_data
->append_one(s
);
834 memcpy(win_data
->last(), icu_data
->dataOf(i
), sizeof(WCHAR
) * s
);
837 s
= LCMapStringW(win_langid
, LCMAP_SORTKEY
, win_data
->dataOf(i
), win_data
->lengthOf(i
), NULL
,0);
838 if (s
== 0) {status
= U_INVALID_FORMAT_ERROR
;return;}
839 win_key
->append_one(s
);
840 t
= LCMapStringW(win_langid
, LCMAP_SORTKEY
, win_data
->dataOf(i
), win_data
->lengthOf(i
), (WCHAR
*)(win_key
->last()),s
);
841 if (t
!= s
) {status
= U_INVALID_FORMAT_ERROR
;return;}
845 // append_one() will make points shifting, should not merge following code into previous iteration
846 for (int i
=0; i
< count
; i
++) {
847 rnd_index
[i
].icu_key
= icu_key
->dataOf(i
);
848 rnd_index
[i
].icu_data
= icu_data
->dataOf(i
);
849 rnd_index
[i
].icu_data_len
= icu_data
->lengthOf(i
);
850 rnd_index
[i
].posix_key
= posix_key
->last();
851 rnd_index
[i
].posix_data
= posix_data
->dataOf(i
);
852 rnd_index
[i
].posix_data_len
= posix_data
->lengthOf(i
);
853 #if U_PLATFORM_HAS_WIN32_API
854 rnd_index
[i
].win_key
= win_key
->dataOf(i
);
855 rnd_index
[i
].win_data
= win_data
->dataOf(i
);
856 rnd_index
[i
].win_data_len
= win_data
->lengthOf(i
);
861 qsort(rnd_index
, count
, sizeof(DataIndex
), CmdQsort::q_random
);
863 #define SORT(data, func) \
864 data = new DataIndex[count];\
865 memcpy(data, rnd_index, count * sizeof(DataIndex));\
866 qsort(data, count, sizeof(DataIndex), CmdQsort::func)
868 SORT(ord_icu_data
, icu_strcoll_len
);
869 SORT(ord_icu_key
, icu_cmpkey
);
870 SORT(ord_posix_data
, posix_strcoll_null
);
871 SORT(ord_posix_key
, posix_cmpkey
);
872 #if U_PLATFORM_HAS_WIN32_API
873 SORT(ord_win_data
, win_cmp_len
);
874 SORT(ord_win_key
, win_cmpkey
);
875 SORT(ord_win_wcscmp
, win_wcscmp
);
877 SORT(ord_icu_strcmp
, icu_strcmp
);
878 SORT(ord_icu_cmpcpo
, icu_cmpcpo
);
883 int main(int argc
, const char *argv
[])
886 UErrorCode status
= U_ZERO_ERROR
;
887 CollPerfTest
test(argc
, argv
, status
);
889 if (U_FAILURE(status
)){
890 printf("The error is %s\n", u_errorName(status
));
891 //TODO: print usage here
895 if (test
.run() == FALSE
){
896 fprintf(stderr
, "FAILED: Tests could not be run please check the "