1 /********************************************************************
3 * Copyright (C) 2001-2011 IBM, Inc. All Rights Reserved.
5 ********************************************************************/
12 #include "unicode/uperf.h"
14 #include "unicode/coll.h"
15 #include <unicode/ucoleitr.h>
19 /* To store an array of string<UNIT> in continue space.
20 Since string<UNIT> itself is treated as an array of UNIT, this
21 class will ease our memory management for an array of string<UNIT>.
24 //template<typename UNIT>
25 #define COMPATCT_ARRAY(CompactArrays, UNIT) \
26 struct CompactArrays{\
27 CompactArrays(const CompactArrays & );\
28 CompactArrays & operator=(const CompactArrays & );\
29 int32_t count;/*total number of the strings*/ \
30 int32_t * index;/*relative offset in data*/ \
31 UNIT * data; /*the real space to hold strings*/ \
33 ~CompactArrays(){free(index);free(data);} \
34 CompactArrays():data(NULL), index(NULL), count(0){ \
35 index = (int32_t *) realloc(index, sizeof(int32_t)); \
38 void append_one(int32_t theLen){ /*include terminal NULL*/ \
40 index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \
41 index[count] = index[count - 1] + theLen; \
42 data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \
44 UNIT * last(){return data + index[count - 1];} \
45 UNIT * dataOf(int32_t i){return data + index[i];} \
46 int32_t lengthOf(int i){return index[i+1] - index[i] - 1; } /*exclude terminating NULL*/ \
49 //typedef CompactArrays<UChar> CA_uchar;
50 //typedef CompactArrays<char> CA_char;
51 //typedef CompactArrays<uint8_t> CA_uint8;
52 //typedef CompactArrays<WCHAR> CA_win_wchar;
54 COMPATCT_ARRAY(CA_uchar
, UChar
)
55 COMPATCT_ARRAY(CA_char
, char)
56 COMPATCT_ARRAY(CA_uint8
, uint8_t)
57 COMPATCT_ARRAY(CA_win_wchar
, WCHAR
)
61 static DWORD win_langid
; // for qsort callback function
62 static UCollator
* col
; // for qsort callback function
68 int32_t posix_data_len
;
73 DWORD
DataIndex::win_langid
;
74 UCollator
* DataIndex::col
;
78 class CmdKeyGen
: public UPerfFunction
{
79 typedef void (CmdKeyGen::* Func
)(int32_t);
80 enum{MAX_KEY_LENGTH
= 5000};
87 union { // to save sapce
88 uint8_t icu_key
[MAX_KEY_LENGTH
];
89 char posix_key
[MAX_KEY_LENGTH
];
90 WCHAR win_key
[MAX_KEY_LENGTH
];
93 CmdKeyGen(UErrorCode
, UCollator
* col
,DWORD win_langid
, int32_t count
, DataIndex
* data
,Func fn
,int32_t)
94 :col(col
),win_langid(win_langid
), count(count
), data(data
), fn(fn
){}
96 virtual long getOperationsPerIteration(){return count
;}
98 virtual void call(UErrorCode
* status
){
99 for(int32_t i
= 0; i
< count
; i
++){
104 void icu_key_null(int32_t i
){
105 ucol_getSortKey(col
, data
[i
].icu_data
, -1, icu_key
, MAX_KEY_LENGTH
);
108 void icu_key_len(int32_t i
){
109 ucol_getSortKey(col
, data
[i
].icu_data
, data
[i
].icu_data_len
, icu_key
, MAX_KEY_LENGTH
);
112 // pre-generated in CollPerfTest::prepareData(), need not to check error here
113 void win_key_null(int32_t i
){
114 //LCMAP_SORTsk 0x00000400 // WC sort sk (normalize)
115 LCMapStringW(win_langid
, LCMAP_SORTKEY
, data
[i
].win_data
, -1, win_key
, MAX_KEY_LENGTH
);
118 void win_key_len(int32_t i
){
119 LCMapStringW(win_langid
, LCMAP_SORTKEY
, data
[i
].win_data
, data
[i
].win_data_len
, win_key
, MAX_KEY_LENGTH
);
122 void posix_key_null(int32_t i
){
123 strxfrm(posix_key
, data
[i
].posix_data
, MAX_KEY_LENGTH
);
128 class CmdIter
: public UPerfFunction
{
129 typedef void (CmdIter::* Func
)(UErrorCode
* , int32_t );
133 UCollationElements
*iter
;
136 CmdIter(UErrorCode
& status
, UCollator
* col
, int32_t count
, CA_uchar
*data
, Func fn
, int32_t,int32_t)
137 :count(count
), data(data
), fn(fn
){
139 UChar dummytext
[] = {0, 0};
140 iter
= ucol_openElements(col
, NULL
, 0, &status
);
141 ucol_setText(iter
, dummytext
, 1, &status
);
144 ucol_closeElements(iter
);
147 virtual long getOperationsPerIteration(){return exec_count
? exec_count
: 1;}
149 virtual void call(UErrorCode
* status
){
151 for(int32_t i
= 0; i
< count
; i
++){
152 (this->*fn
)(status
, i
);
156 void icu_forward_null(UErrorCode
* status
, int32_t i
){
157 ucol_setText(iter
, data
->dataOf(i
), -1, status
);
158 while (ucol_next(iter
, status
) != UCOL_NULLORDER
) exec_count
++;
161 void icu_forward_len(UErrorCode
* status
, int32_t i
){
162 ucol_setText(iter
, data
->dataOf(i
), data
->lengthOf(i
) , status
);
163 while (ucol_next(iter
, status
) != UCOL_NULLORDER
) exec_count
++;
166 void icu_backward_null(UErrorCode
* status
, int32_t i
){
167 ucol_setText(iter
, data
->dataOf(i
), -1, status
);
168 while (ucol_previous(iter
, status
) != UCOL_NULLORDER
) exec_count
++;
171 void icu_backward_len(UErrorCode
* status
, int32_t i
){
172 ucol_setText(iter
, data
->dataOf(i
), data
->lengthOf(i
) , status
);
173 while (ucol_previous(iter
, status
) != UCOL_NULLORDER
) exec_count
++;
177 class CmdIterAll
: public UPerfFunction
{
178 typedef void (CmdIterAll::* Func
)(UErrorCode
* status
);
182 UCollationElements
*iter
;
186 enum CALL
{forward_null
, forward_len
, backward_null
, backward_len
};
189 ucol_closeElements(iter
);
191 CmdIterAll(UErrorCode
& status
, UCollator
* col
, int32_t count
, UChar
* data
, CALL call
,int32_t,int32_t)
192 :count(count
),data(data
)
195 if (call
== forward_null
|| call
== backward_null
) {
196 iter
= ucol_openElements(col
, data
, -1, &status
);
198 iter
= ucol_openElements(col
, data
, count
, &status
);
201 if (call
== forward_null
|| call
== forward_len
){
202 fn
= &CmdIterAll::icu_forward_all
;
204 fn
= &CmdIterAll::icu_backward_all
;
207 virtual long getOperationsPerIteration(){return exec_count
? exec_count
: 1;}
209 virtual void call(UErrorCode
* status
){
213 void icu_forward_all(UErrorCode
* status
){
214 int strlen
= count
- 5;
217 ucol_setOffset(iter
, strindex
, status
);
219 if (ucol_next(iter
, status
) == UCOL_NULLORDER
) {
226 if (strindex
> strlen
) {
229 ucol_setOffset(iter
, strindex
, status
);
235 void icu_backward_all(UErrorCode
* status
){
239 ucol_setOffset(iter
, strindex
, status
);
241 if (ucol_previous(iter
, status
) == UCOL_NULLORDER
) {
248 if (strindex
> strlen
) {
251 ucol_setOffset(iter
, strindex
, status
);
259 struct CmdQsort
: public UPerfFunction
{
261 static int q_random(const void * a
, const void * b
){
262 uint8_t * key_a
= ((DataIndex
*)a
)->icu_key
;
263 uint8_t * key_b
= ((DataIndex
*)b
)->icu_key
;
267 while (*key_a
!= 0) {val_a
+= val_a
*37 + *key_a
++;}
268 while (*key_b
!= 0) {val_b
+= val_b
*37 + *key_b
++;}
269 return val_a
- val_b
;
273 DataIndex * da = (DataIndex *) a; \
274 DataIndex * db = (DataIndex *) b; \
277 static int icu_strcoll_null(const void *a
, const void *b
){
279 return ucol_strcoll(da
->col
, da
->icu_data
, -1, db
->icu_data
, -1) - UCOL_EQUAL
;
282 static int icu_strcoll_len(const void *a
, const void *b
){
284 return ucol_strcoll(da
->col
, da
->icu_data
, da
->icu_data_len
, db
->icu_data
, db
->icu_data_len
) - UCOL_EQUAL
;
287 static int icu_cmpkey (const void *a
, const void *b
){
289 return strcmp((char *) da
->icu_key
, (char *) db
->icu_key
);
292 static int win_cmp_null(const void *a
, const void *b
) {
296 //CSTR_GREATER_THAN 3
297 int t
= CompareStringW(da
->win_langid
, 0, da
->win_data
, -1, db
->win_data
, -1);
299 fprintf(stderr
, "CompareStringW error, error number %x\n", GetLastError());
302 return t
- CSTR_EQUAL
;
306 static int win_cmp_len(const void *a
, const void *b
) {
308 int t
= CompareStringW(da
->win_langid
, 0, da
->win_data
, da
->win_data_len
, db
->win_data
, db
->win_data_len
);
310 fprintf(stderr
, "CompareStringW error, error number %x\n", GetLastError());
313 return t
- CSTR_EQUAL
;
317 #define QFUNC(name, func, data) \
318 static int name (const void *a, const void *b){ \
320 return func(da->data, db->data); \
323 QFUNC(posix_strcoll_null
, strcoll
, posix_data
)
324 QFUNC(posix_cmpkey
, strcmp
, posix_key
)
325 QFUNC(win_cmpkey
, strcmp
, win_key
)
326 QFUNC(win_wcscmp
, wcscmp
, win_data
)
327 QFUNC(icu_strcmp
, u_strcmp
, icu_data
)
328 QFUNC(icu_cmpcpo
, u_strcmpCodePointOrder
, icu_data
)
331 static int32_t exec_count
; // potential muilt-thread problem
333 typedef int (* Func
)(const void *, const void *);
336 void * base
; //Start of target array.
337 int32_t num
; //Array size in elements.
338 int32_t width
; //Element size in bytes.
340 void * backup
; //copy source of base
342 CmdQsort(UErrorCode
& status
,void *theBase
, int32_t num
, int32_t width
, Func fn
, int32_t,int32_t)
343 :backup(theBase
),num(num
),width(width
),fn(fn
){
344 base
= malloc(num
* width
);
345 time_empty(100, &status
); // warm memory/cache
354 memcpy(base
, backup
, num
* width
);
357 double time_empty(int32_t n
, UErrorCode
* status
) {
359 utimer_getTime(&start
);
363 utimer_getTime(&stop
);
364 return utimer_getDeltaSeconds(&start
,&stop
); // ms
367 virtual void call(UErrorCode
* status
){
369 memcpy(base
, backup
, num
* width
);
370 qsort(base
, num
, width
, fn
);
372 virtual double time(int32_t n
, UErrorCode
* status
) {
373 double t1
= time_empty(n
,status
);
374 double t2
= UPerfFunction::time(n
, status
);
375 return t2
-t1
;// < 0 ? t2 : t2-t1;
378 virtual long getOperationsPerIteration(){ return exec_count
?exec_count
:1;}
380 int32_t CmdQsort::exec_count
;
383 class CmdBinSearch
: public UPerfFunction
{
385 typedef int (CmdBinSearch::* Func
)(int, int);
395 CmdBinSearch(UErrorCode
, UCollator
* col
,DWORD win_langid
,int32_t count
,DataIndex
* rnd
,DataIndex
* ord
,Func fn
)
396 :col(col
),win_langid(win_langid
), count(count
), rnd(rnd
), ord(ord
), fn(fn
),exec_count(0){}
399 virtual void call(UErrorCode
* status
){
401 for(int32_t i
= 0; i
< count
; i
++){ // search all data
405 virtual long getOperationsPerIteration(){ return exec_count
?exec_count
:1;}
407 void binary_search(int32_t random
) {
409 int high
= count
- 1;
414 guess
= (high
+ low
)/2;
415 if (last_guess
== guess
) break; // nothing to search
417 r
= (this->*fn
)(random
, guess
);
421 return; // found, search end.
431 int icu_strcoll_null(int32_t i
, int32_t j
){
432 return ucol_strcoll(col
, rnd
[i
].icu_data
, -1, ord
[j
].icu_data
,-1);
435 int icu_strcoll_len(int32_t i
, int32_t j
){
436 return ucol_strcoll(col
, rnd
[i
].icu_data
, rnd
[i
].icu_data_len
, ord
[j
].icu_data
, ord
[j
].icu_data_len
);
439 int icu_cmpkey(int32_t i
, int32_t j
) {
440 return strcmp( (char *) rnd
[i
].icu_key
, (char *) ord
[j
].icu_key
);
443 int win_cmp_null(int32_t i
, int32_t j
) {
444 int t
= CompareStringW(win_langid
, 0, rnd
[i
].win_data
, -1, ord
[j
].win_data
, -1);
446 fprintf(stderr
, "CompareStringW error, error number %x\n", GetLastError());
449 return t
- CSTR_EQUAL
;
453 int win_cmp_len(int32_t i
, int32_t j
) {
454 int t
= CompareStringW(win_langid
, 0, rnd
[i
].win_data
, rnd
[i
].win_data_len
, ord
[j
].win_data
, ord
[j
].win_data_len
);
456 fprintf(stderr
, "CompareStringW error, error number %x\n", GetLastError());
459 return t
- CSTR_EQUAL
;
463 #define BFUNC(name, func, data) \
464 int name(int32_t i, int32_t j) { \
465 return func(rnd[i].data, ord[j].data); \
468 BFUNC(posix_strcoll_null
, strcoll
, posix_data
)
469 BFUNC(posix_cmpkey
, strcmp
, posix_key
)
470 BFUNC(win_cmpkey
, strcmp
, win_key
)
471 BFUNC(win_wcscmp
, wcscmp
, win_data
)
472 BFUNC(icu_strcmp
, u_strcmp
, icu_data
)
473 BFUNC(icu_cmpcpo
, u_strcmpCodePointOrder
, icu_data
)
476 class CollPerfTest
: public UPerfTest
{
481 UChar
* icu_data_all
;
482 int32_t icu_data_all_len
;
487 CA_char
* posix_data
;
489 CA_win_wchar
* win_data
;
492 DataIndex
* rnd_index
; // random by icu key
493 DataIndex
* ord_win_data
;
494 DataIndex
* ord_win_key
;
495 DataIndex
* ord_posix_data
;
496 DataIndex
* ord_posix_key
;
497 DataIndex
* ord_icu_data
;
498 DataIndex
* ord_icu_key
;
499 DataIndex
* ord_win_wcscmp
;
500 DataIndex
* ord_icu_strcmp
;
501 DataIndex
* ord_icu_cmpcpo
;
503 virtual ~CollPerfTest(){
505 delete [] icu_data_all
;
513 delete[] ord_win_data
;
514 delete[] ord_win_key
;
515 delete[] ord_posix_data
;
516 delete[] ord_posix_key
;
517 delete[] ord_icu_data
;
518 delete[] ord_icu_key
;
519 delete[] ord_win_wcscmp
;
520 delete[] ord_icu_strcmp
;
521 delete[] ord_icu_cmpcpo
;
524 CollPerfTest(int32_t argc
, const char* argv
[], UErrorCode
& status
):UPerfTest(argc
, argv
, status
){
537 ord_posix_data
= NULL
;
541 ord_win_wcscmp
= NULL
;
542 ord_icu_strcmp
= NULL
;
543 ord_icu_cmpcpo
= NULL
;
545 if (U_FAILURE(status
)){
549 // Parse additional arguments
551 UOption options
[] = {
552 UOPTION_DEF("langid", 'i', UOPT_REQUIRES_ARG
), // Windows Language ID number.
553 UOPTION_DEF("rulefile", 'r', UOPT_REQUIRES_ARG
), // --rulefile <filename>
554 // Collation related arguments. All are optional.
555 // To simplify parsing, two choice arguments are disigned as NO_ARG.
556 // The default value is UPPER word in the comment
557 UOPTION_DEF("c_french", 'f', UOPT_NO_ARG
), // --french <on | OFF>
558 UOPTION_DEF("c_alternate", 'a', UOPT_NO_ARG
), // --alternate <NON_IGNORE | shifted>
559 UOPTION_DEF("c_casefirst", 'c', UOPT_REQUIRES_ARG
), // --casefirst <lower | upper | OFF>
560 UOPTION_DEF("c_caselevel", 'l', UOPT_NO_ARG
), // --caselevel <on | OFF>
561 UOPTION_DEF("c_normal", 'n', UOPT_NO_ARG
), // --normal <on | OFF>
562 UOPTION_DEF("c_strength", 's', UOPT_REQUIRES_ARG
), // --strength <1-5>
564 int32_t opt_len
= (sizeof(options
)/sizeof(options
[0]));
565 enum {i
, r
,f
,a
,c
,l
,n
,s
}; // The buffer between the option items' order and their references
567 _remainingArgc
= u_parseArgs(_remainingArgc
, (char**)argv
, opt_len
, options
);
569 if (_remainingArgc
< 0){
570 status
= U_ILLEGAL_ARGUMENT_ERROR
;
575 locale
= "en_US"; // set default locale
578 //#if U_PLATFORM_USES_ONLY_WIN32_API
579 if (options
[i
].doesOccur
) {
581 int tmp
= strtol(options
[i
].value
, &endp
, 0);
582 if (endp
== options
[i
].value
) {
583 status
= U_ILLEGAL_ARGUMENT_ERROR
;
586 win_langid
= MAKELCID(tmp
, SORT_DEFAULT
);
588 win_langid
= uloc_getLCID(locale
);
592 // Set up an ICU collator
593 if (options
[r
].doesOccur
) {
594 // TODO: implement it
596 col
= ucol_open(locale
, &status
);
597 if (U_FAILURE(status
)) {
602 if (options
[f
].doesOccur
) {
603 ucol_setAttribute(col
, UCOL_FRENCH_COLLATION
, UCOL_ON
, &status
);
605 ucol_setAttribute(col
, UCOL_FRENCH_COLLATION
, UCOL_OFF
, &status
);
608 if (options
[a
].doesOccur
) {
609 ucol_setAttribute(col
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
612 if (options
[c
].doesOccur
) { // strcmp() has i18n encoding problem
613 if (strcmp("lower", options
[c
].value
) == 0){
614 ucol_setAttribute(col
, UCOL_CASE_FIRST
, UCOL_LOWER_FIRST
, &status
);
615 } else if (strcmp("upper", options
[c
].value
) == 0) {
616 ucol_setAttribute(col
, UCOL_CASE_FIRST
, UCOL_UPPER_FIRST
, &status
);
618 status
= U_ILLEGAL_ARGUMENT_ERROR
;
623 if (options
[l
].doesOccur
){
624 ucol_setAttribute(col
, UCOL_CASE_LEVEL
, UCOL_ON
, &status
);
627 if (options
[n
].doesOccur
){
628 ucol_setAttribute(col
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
631 if (options
[s
].doesOccur
) {
633 int tmp
= strtol(options
[l
].value
, &endp
, 0);
634 if (endp
== options
[l
].value
) {
635 status
= U_ILLEGAL_ARGUMENT_ERROR
;
639 case 1: ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
); break;
640 case 2: ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_SECONDARY
, &status
); break;
641 case 3: ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_TERTIARY
, &status
); break;
642 case 4: ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_QUATERNARY
, &status
); break;
643 case 5: ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_IDENTICAL
, &status
); break;
644 default: status
= U_ILLEGAL_ARGUMENT_ERROR
; return;
650 //to avoid use the annoying 'id' in TESTCASE(id,test) macro or the like
651 #define TEST(testname, classname, arg1, arg2, arg3, arg4, arg5, arg6) \
655 UErrorCode status = U_ZERO_ERROR;\
656 UPerfFunction * t = new classname(status,arg1, arg2, arg3, arg4, arg5, arg6);\
657 if (U_FAILURE(status)) {\
670 virtual UPerfFunction* runIndexedTest( /*[in]*/int32_t index, /*[in]*/UBool exec, /*[out]*/const char* &name, /*[in]*/ char* par = NULL ){
673 #define TEST_KEYGEN(testname, func)\
674 TEST(testname, CmdKeyGen, col, win_langid, count, rnd_index, &CmdKeyGen::func, 0)
675 TEST_KEYGEN(TestIcu_KeyGen_null
, icu_key_null
);
676 TEST_KEYGEN(TestIcu_KeyGen_len
, icu_key_len
);
677 TEST_KEYGEN(TestPosix_KeyGen_null
, posix_key_null
);
678 TEST_KEYGEN(TestWin_KeyGen_null
, win_key_null
);
679 TEST_KEYGEN(TestWin_KeyGen_len
, win_key_len
);
681 #define TEST_ITER(testname, func)\
682 TEST(testname, CmdIter, col, count, icu_data, &CmdIter::func,0,0)
683 TEST_ITER(TestIcu_ForwardIter_null
, icu_forward_null
);
684 TEST_ITER(TestIcu_ForwardIter_len
, icu_forward_len
);
685 TEST_ITER(TestIcu_BackwardIter_null
, icu_backward_null
);
686 TEST_ITER(TestIcu_BackwardIter_len
, icu_backward_len
);
688 #define TEST_ITER_ALL(testname, func)\
689 TEST(testname, CmdIterAll, col, icu_data_all_len, icu_data_all, CmdIterAll::func,0,0)
690 TEST_ITER_ALL(TestIcu_ForwardIter_all_null
, forward_null
);
691 TEST_ITER_ALL(TestIcu_ForwardIter_all_len
, forward_len
);
692 TEST_ITER_ALL(TestIcu_BackwardIter_all_null
, backward_null
);
693 TEST_ITER_ALL(TestIcu_BackwardIter_all_len
, backward_len
);
695 #define TEST_QSORT(testname, func)\
696 TEST(testname, CmdQsort, rnd_index, count, sizeof(DataIndex), CmdQsort::func,0,0)
697 TEST_QSORT(TestIcu_qsort_strcoll_null
, icu_strcoll_null
);
698 TEST_QSORT(TestIcu_qsort_strcoll_len
, icu_strcoll_len
);
699 TEST_QSORT(TestIcu_qsort_usekey
, icu_cmpkey
);
700 TEST_QSORT(TestPosix_qsort_strcoll_null
, posix_strcoll_null
);
701 TEST_QSORT(TestPosix_qsort_usekey
, posix_cmpkey
);
702 TEST_QSORT(TestWin_qsort_CompareStringW_null
, win_cmp_null
);
703 TEST_QSORT(TestWin_qsort_CompareStringW_len
, win_cmp_len
);
704 TEST_QSORT(TestWin_qsort_usekey
, win_cmpkey
);
706 #define TEST_BIN(testname, func)\
707 TEST(testname, CmdBinSearch, col, win_langid, count, rnd_index, ord_icu_key, &CmdBinSearch::func)
708 TEST_BIN(TestIcu_BinarySearch_strcoll_null
, icu_strcoll_null
);
709 TEST_BIN(TestIcu_BinarySearch_strcoll_len
, icu_strcoll_len
);
710 TEST_BIN(TestIcu_BinarySearch_usekey
, icu_cmpkey
);
711 TEST_BIN(TestIcu_BinarySearch_strcmp
, icu_strcmp
);
712 TEST_BIN(TestIcu_BinarySearch_cmpCPO
, icu_cmpcpo
);
713 TEST_BIN(TestPosix_BinarySearch_strcoll_null
, posix_strcoll_null
);
714 TEST_BIN(TestPosix_BinarySearch_usekey
, posix_cmpkey
);
715 TEST_BIN(TestWin_BinarySearch_CompareStringW_null
, win_cmp_null
);
716 TEST_BIN(TestWin_BinarySearch_CompareStringW_len
, win_cmp_len
);
717 TEST_BIN(TestWin_BinarySearch_usekey
, win_cmpkey
);
718 TEST_BIN(TestWin_BinarySearch_wcscmp
, win_wcscmp
);
726 void prepareData(UErrorCode
& status
){
727 if(U_FAILURE(status
)) return;
728 if (icu_data
) return; // prepared
730 icu_data
= new CA_uchar();
732 // Following code is borrowed from UPerfTest::getLines();
733 const UChar
* line
=NULL
;
736 line
= ucbuf_readline(ucharBuf
,&len
,&status
);
737 if(line
== NULL
|| U_FAILURE(status
)){break;}
739 // Refer to the source code of ucbuf_readline()
740 // 1. 'len' includs the line terminal symbols
741 // 2. The length of the line terminal symbols is only one character
742 // 3. The Windows CR LF line terminal symbols will be converted to CR
745 continue; //skip empty line
747 icu_data
->append_one(len
);
748 memcpy(icu_data
->last(), line
, len
* sizeof(UChar
));
749 icu_data
->last()[len
-1] = NULL
;
752 if(U_FAILURE(status
)) return;
754 // UTF-16 -> UTF-8 conversion.
755 UConverter
*conv
= ucnv_open("utf-8", &status
); // just UTF-8 for now.
756 if (U_FAILURE(status
)) return;
758 count
= icu_data
->count
;
760 icu_data_all_len
= icu_data
->index
[count
]; // includes all NULLs
761 icu_data_all_len
-= count
; // excludes all NULLs
762 icu_data_all_len
+= 1; // the terminal NULL
763 icu_data_all
= new UChar
[icu_data_all_len
];
764 icu_data_all
[icu_data_all_len
- 1] = 0; //the terminal NULL
766 icu_key
= new CA_uint8
;
767 win_data
= new CA_win_wchar
;
768 win_key
= new CA_char
;
769 posix_data
= new CA_char
;
770 posix_key
= new CA_char
;
771 rnd_index
= new DataIndex
[count
];
772 DataIndex::win_langid
= win_langid
;
773 DataIndex::col
= col
;
776 UChar
* p
= icu_data_all
;
779 for (int i
=0; i
< count
; i
++) {
781 s
= sizeof(UChar
) * icu_data
->lengthOf(i
);
782 memcpy(p
, icu_data
->dataOf(i
), s
);
783 p
+= icu_data
->lengthOf(i
);
788 s
= ucol_getSortKey(col
, icu_data
->dataOf(i
), -1,NULL
, 0);
789 icu_key
->append_one(s
);
790 t
= ucol_getSortKey(col
, icu_data
->dataOf(i
), -1,icu_key
->last(), s
);
791 if (t
!= s
) {status
= U_INVALID_FORMAT_ERROR
;return;}
794 s
= ucnv_fromUChars(conv
,NULL
, 0, icu_data
->dataOf(i
), icu_data
->lengthOf(i
), &status
);
795 if (status
== U_BUFFER_OVERFLOW_ERROR
|| status
== U_ZERO_ERROR
){
796 status
= U_ZERO_ERROR
;
800 posix_data
->append_one(s
+ 1); // plus terminal NULL
801 t
= ucnv_fromUChars(conv
,posix_data
->last(), s
, icu_data
->dataOf(i
), icu_data
->lengthOf(i
), &status
);
802 if (U_FAILURE(status
)) return;
803 if ( t
!= s
){status
= U_INVALID_FORMAT_ERROR
;return;}
804 posix_data
->last()[s
] = 0;
807 s
= strxfrm(NULL
, posix_data
->dataOf(i
), 0);
808 if (s
== INT_MAX
){status
= U_INVALID_FORMAT_ERROR
;return;}
809 posix_key
->append_one(s
);
810 t
= strxfrm(posix_key
->last(), posix_data
->dataOf(i
), s
);
811 if (t
!= s
) {status
= U_INVALID_FORMAT_ERROR
;return;}
814 s
= icu_data
->lengthOf(i
) + 1; // plus terminal NULL
815 win_data
->append_one(s
);
816 memcpy(win_data
->last(), icu_data
->dataOf(i
), sizeof(WCHAR
) * s
);
819 s
= LCMapStringW(win_langid
, LCMAP_SORTKEY
, win_data
->dataOf(i
), win_data
->lengthOf(i
), NULL
,0);
820 if (s
== 0) {status
= U_INVALID_FORMAT_ERROR
;return;}
821 win_key
->append_one(s
);
822 t
= LCMapStringW(win_langid
, LCMAP_SORTKEY
, win_data
->dataOf(i
), win_data
->lengthOf(i
), (WCHAR
*)(win_key
->last()),s
);
823 if (t
!= s
) {status
= U_INVALID_FORMAT_ERROR
;return;}
827 // append_one() will make points shifting, should not merge following code into previous iteration
828 for (int i
=0; i
< count
; i
++) {
829 rnd_index
[i
].icu_key
= icu_key
->dataOf(i
);
830 rnd_index
[i
].icu_data
= icu_data
->dataOf(i
);
831 rnd_index
[i
].icu_data_len
= icu_data
->lengthOf(i
);
832 rnd_index
[i
].posix_key
= posix_key
->last();
833 rnd_index
[i
].posix_data
= posix_data
->dataOf(i
);
834 rnd_index
[i
].posix_data_len
= posix_data
->lengthOf(i
);
835 rnd_index
[i
].win_key
= win_key
->dataOf(i
);
836 rnd_index
[i
].win_data
= win_data
->dataOf(i
);
837 rnd_index
[i
].win_data_len
= win_data
->lengthOf(i
);
841 qsort(rnd_index
, count
, sizeof(DataIndex
), CmdQsort::q_random
);
843 #define SORT(data, func) \
844 data = new DataIndex[count];\
845 memcpy(data, rnd_index, count * sizeof(DataIndex));\
846 qsort(data, count, sizeof(DataIndex), CmdQsort::func)
848 SORT(ord_icu_data
, icu_strcoll_len
);
849 SORT(ord_icu_key
, icu_cmpkey
);
850 SORT(ord_posix_data
, posix_strcoll_null
);
851 SORT(ord_posix_key
, posix_cmpkey
);
852 SORT(ord_win_data
, win_cmp_len
);
853 SORT(ord_win_key
, win_cmpkey
);
854 SORT(ord_win_wcscmp
, win_wcscmp
);
855 SORT(ord_icu_strcmp
, icu_strcmp
);
856 SORT(ord_icu_cmpcpo
, icu_cmpcpo
);
861 int main(int argc
, const char *argv
[])
864 UErrorCode status
= U_ZERO_ERROR
;
865 CollPerfTest
test(argc
, argv
, status
);
867 if (U_FAILURE(status
)){
868 printf("The error is %s\n", u_errorName(status
));
869 //TODO: print usage here
873 if (test
.run() == FALSE
){
874 fprintf(stderr
, "FAILED: Tests could not be run please check the "