1 /********************************************************************
3 * Copyright (C) 2001-2006 IBM, Inc. All Rights Reserved.
5 ********************************************************************/
12 #include "unicode/uperf.h"
14 #include "unicode/coll.h"
15 #include <unicode/ucoleitr.h>
19 /* To store an array of string<UNIT> in continue space.
20 Since string<UNIT> itself is treated as an array of UNIT, this
21 class will ease our memory management for an array of string<UNIT>.
24 //template<typename UNIT>
25 #define COMPATCT_ARRAY(CompactArrays, UNIT) \
26 struct CompactArrays{\
27 CompactArrays(const CompactArrays & );\
28 CompactArrays & operator=(const CompactArrays & );\
29 int32_t count;/*total number of the strings*/ \
30 int32_t * index;/*relative offset in data*/ \
31 UNIT * data; /*the real space to hold strings*/ \
33 ~CompactArrays(){free(index);free(data);} \
34 CompactArrays():data(NULL), index(NULL), count(0){ \
35 index = (int32_t *) realloc(index, sizeof(int32_t)); \
38 void append_one(int32_t theLen){ /*include terminal NULL*/ \
40 index = (int32_t *) realloc(index, sizeof(int32_t) * (count + 1)); \
41 index[count] = index[count - 1] + theLen; \
42 data = (UNIT *) realloc(data, sizeof(UNIT) * index[count]); \
44 UNIT * last(){return data + index[count - 1];} \
45 UNIT * dataOf(int32_t i){return data + index[i];} \
46 int32_t lengthOf(int i){return index[i+1] - index[i] - 1; } /*exclude terminating NULL*/ \
49 //typedef CompactArrays<UChar> CA_uchar;
50 //typedef CompactArrays<char> CA_char;
51 //typedef CompactArrays<uint8_t> CA_uint8;
52 //typedef CompactArrays<WCHAR> CA_win_wchar;
54 COMPATCT_ARRAY(CA_uchar
, UChar
)
55 COMPATCT_ARRAY(CA_char
, char)
56 COMPATCT_ARRAY(CA_uint8
, uint8_t)
57 COMPATCT_ARRAY(CA_win_wchar
, WCHAR
)
61 static DWORD win_langid
; // for qsort callback function
62 static UCollator
* col
; // for qsort callback function
68 int32_t posix_data_len
;
73 DWORD
DataIndex::win_langid
;
74 UCollator
* DataIndex::col
;
78 class CmdKeyGen
: public UPerfFunction
{
79 typedef void (CmdKeyGen::* Func
)(int32_t);
80 enum{MAX_KEY_LENGTH
= 5000};
87 union { // to save sapce
88 uint8_t icu_key
[MAX_KEY_LENGTH
];
89 char posix_key
[MAX_KEY_LENGTH
];
90 WCHAR win_key
[MAX_KEY_LENGTH
];
93 CmdKeyGen(UErrorCode
, UCollator
* col
,DWORD win_langid
, int32_t count
, DataIndex
* data
,Func fn
,int32_t)
94 :col(col
),win_langid(win_langid
), count(count
), data(data
), fn(fn
){}
96 virtual long getOperationsPerIteration(){return count
;}
98 virtual void call(UErrorCode
* status
){
99 for(int32_t i
= 0; i
< count
; i
++){
104 void icu_key_null(int32_t i
){
105 ucol_getSortKey(col
, data
[i
].icu_data
, -1, icu_key
, MAX_KEY_LENGTH
);
108 void icu_key_len(int32_t i
){
109 ucol_getSortKey(col
, data
[i
].icu_data
, data
[i
].icu_data_len
, icu_key
, MAX_KEY_LENGTH
);
112 // pre-generated in CollPerfTest::prepareData(), need not to check error here
113 void win_key_null(int32_t i
){
114 //LCMAP_SORTsk 0x00000400 // WC sort sk (normalize)
115 LCMapStringW(win_langid
, LCMAP_SORTKEY
, data
[i
].win_data
, -1, win_key
, MAX_KEY_LENGTH
);
118 void win_key_len(int32_t i
){
119 LCMapStringW(win_langid
, LCMAP_SORTKEY
, data
[i
].win_data
, data
[i
].win_data_len
, win_key
, MAX_KEY_LENGTH
);
122 void posix_key_null(int32_t i
){
123 strxfrm(posix_key
, data
[i
].posix_data
, MAX_KEY_LENGTH
);
128 class CmdIter
: public UPerfFunction
{
129 typedef void (CmdIter::* Func
)(UErrorCode
* , int32_t );
133 UCollationElements
*iter
;
136 CmdIter(UErrorCode
& status
, UCollator
* col
, int32_t count
, CA_uchar
*data
, Func fn
, int32_t,int32_t)
137 :count(count
), data(data
), fn(fn
){
139 UChar dummytext
[] = {0, 0};
140 iter
= ucol_openElements(col
, NULL
, 0, &status
);
141 ucol_setText(iter
, dummytext
, 1, &status
);
144 ucol_closeElements(iter
);
147 virtual long getOperationsPerIteration(){return exec_count
? exec_count
: 1;}
149 virtual void call(UErrorCode
* status
){
151 for(int32_t i
= 0; i
< count
; i
++){
152 (this->*fn
)(status
, i
);
156 void icu_forward_null(UErrorCode
* status
, int32_t i
){
157 ucol_setText(iter
, data
->dataOf(i
), -1, status
);
158 while (ucol_next(iter
, status
) != UCOL_NULLORDER
) exec_count
++;
161 void icu_forward_len(UErrorCode
* status
, int32_t i
){
162 ucol_setText(iter
, data
->dataOf(i
), data
->lengthOf(i
) , status
);
163 while (ucol_next(iter
, status
) != UCOL_NULLORDER
) exec_count
++;
166 void icu_backward_null(UErrorCode
* status
, int32_t i
){
167 ucol_setText(iter
, data
->dataOf(i
), -1, status
);
168 while (ucol_previous(iter
, status
) != UCOL_NULLORDER
) exec_count
++;
171 void icu_backward_len(UErrorCode
* status
, int32_t i
){
172 ucol_setText(iter
, data
->dataOf(i
), data
->lengthOf(i
) , status
);
173 while (ucol_previous(iter
, status
) != UCOL_NULLORDER
) exec_count
++;
177 class CmdIterAll
: public UPerfFunction
{
178 typedef void (CmdIterAll::* Func
)(UErrorCode
* status
);
182 UCollationElements
*iter
;
186 enum CALL
{forward_null
, forward_len
, backward_null
, backward_len
};
189 ucol_closeElements(iter
);
191 CmdIterAll(UErrorCode
& status
, UCollator
* col
, int32_t count
, UChar
* data
, CALL call
,int32_t,int32_t)
192 :count(count
),data(data
){
194 if (call
== forward_null
|| call
== backward_null
) {
195 iter
= ucol_openElements(col
, data
, -1, &status
);
197 iter
= ucol_openElements(col
, data
, count
, &status
);
200 if (call
== forward_null
|| call
== forward_len
){
201 fn
= icu_forward_all
;
203 fn
= icu_backward_all
;
206 virtual long getOperationsPerIteration(){return exec_count
? exec_count
: 1;}
208 virtual void call(UErrorCode
* status
){
212 void icu_forward_all(UErrorCode
* status
){
213 int strlen
= count
- 5;
216 ucol_setOffset(iter
, strindex
, status
);
218 if (ucol_next(iter
, status
) == UCOL_NULLORDER
) {
225 if (strindex
> strlen
) {
228 ucol_setOffset(iter
, strindex
, status
);
234 void icu_backward_all(UErrorCode
* status
){
238 ucol_setOffset(iter
, strindex
, status
);
240 if (ucol_previous(iter
, status
) == UCOL_NULLORDER
) {
247 if (strindex
> strlen
) {
250 ucol_setOffset(iter
, strindex
, status
);
258 struct CmdQsort
: public UPerfFunction
{
260 static int q_random(const void * a
, const void * b
){
261 uint8_t * key_a
= ((DataIndex
*)a
)->icu_key
;
262 uint8_t * key_b
= ((DataIndex
*)b
)->icu_key
;
266 while (*key_a
!= 0) {val_a
+= val_a
*37 + *key_a
++;}
267 while (*key_b
!= 0) {val_b
+= val_b
*37 + *key_b
++;}
268 return val_a
- val_b
;
272 DataIndex * da = (DataIndex *) a; \
273 DataIndex * db = (DataIndex *) b; \
276 static int icu_strcoll_null(const void *a
, const void *b
){
278 return ucol_strcoll(da
->col
, da
->icu_data
, -1, db
->icu_data
, -1) - UCOL_EQUAL
;
281 static int icu_strcoll_len(const void *a
, const void *b
){
283 return ucol_strcoll(da
->col
, da
->icu_data
, da
->icu_data_len
, db
->icu_data
, db
->icu_data_len
) - UCOL_EQUAL
;
286 static int icu_cmpkey (const void *a
, const void *b
){
288 return strcmp((char *) da
->icu_key
, (char *) db
->icu_key
);
291 static int win_cmp_null(const void *a
, const void *b
) {
295 //CSTR_GREATER_THAN 3
296 int t
= CompareStringW(da
->win_langid
, 0, da
->win_data
, -1, db
->win_data
, -1);
298 fprintf(stderr
, "CompareStringW error, error number %x\n", GetLastError());
301 return t
- CSTR_EQUAL
;
305 static int win_cmp_len(const void *a
, const void *b
) {
307 int t
= CompareStringW(da
->win_langid
, 0, da
->win_data
, da
->win_data_len
, db
->win_data
, db
->win_data_len
);
309 fprintf(stderr
, "CompareStringW error, error number %x\n", GetLastError());
312 return t
- CSTR_EQUAL
;
316 #define QFUNC(name, func, data) \
317 static int name (const void *a, const void *b){ \
319 return func(da->data, db->data); \
322 QFUNC(posix_strcoll_null
, strcoll
, posix_data
)
323 QFUNC(posix_cmpkey
, strcmp
, posix_key
)
324 QFUNC(win_cmpkey
, strcmp
, win_key
)
325 QFUNC(win_wcscmp
, wcscmp
, win_data
)
326 QFUNC(icu_strcmp
, u_strcmp
, icu_data
)
327 QFUNC(icu_cmpcpo
, u_strcmpCodePointOrder
, icu_data
)
330 static int32_t exec_count
; // potential muilt-thread problem
332 typedef int (* Func
)(const void *, const void *);
335 void * base
; //Start of target array.
336 int32_t num
; //Array size in elements.
337 int32_t width
; //Element size in bytes.
339 void * backup
; //copy source of base
341 CmdQsort(UErrorCode
& status
,void *theBase
, int32_t num
, int32_t width
, Func fn
, int32_t,int32_t)
342 :backup(theBase
),num(num
),width(width
),fn(fn
){
343 base
= malloc(num
* width
);
344 time_empty(100, &status
); // warm memory/cache
353 memcpy(base
, backup
, num
* width
);
356 double time_empty(int32_t n
, UErrorCode
* status
) {
358 utimer_getTime(&start
);
362 utimer_getTime(&stop
);
363 return utimer_getDeltaSeconds(&start
,&stop
); // ms
366 virtual void call(UErrorCode
* status
){
368 memcpy(base
, backup
, num
* width
);
369 qsort(base
, num
, width
, fn
);
371 virtual double time(int32_t n
, UErrorCode
* status
) {
372 double t1
= time_empty(n
,status
);
373 double t2
= UPerfFunction::time(n
, status
);
374 return t2
-t1
;// < 0 ? t2 : t2-t1;
377 virtual long getOperationsPerIteration(){ return exec_count
?exec_count
:1;}
379 int32_t CmdQsort::exec_count
;
382 class CmdBinSearch
: public UPerfFunction
{
384 typedef int (CmdBinSearch::* Func
)(int, int);
394 CmdBinSearch(UErrorCode
, UCollator
* col
,DWORD win_langid
,int32_t count
,DataIndex
* rnd
,DataIndex
* ord
,Func fn
)
395 :col(col
),win_langid(win_langid
), count(count
), rnd(rnd
), ord(ord
), fn(fn
),exec_count(0){}
398 virtual void call(UErrorCode
* status
){
400 for(int32_t i
= 0; i
< count
; i
++){ // search all data
404 virtual long getOperationsPerIteration(){ return exec_count
?exec_count
:1;}
406 void binary_search(int32_t random
) {
408 int high
= count
- 1;
413 guess
= (high
+ low
)/2;
414 if (last_guess
== guess
) break; // nothing to search
416 r
= (this->*fn
)(random
, guess
);
420 return; // found, search end.
430 int icu_strcoll_null(int32_t i
, int32_t j
){
431 return ucol_strcoll(col
, rnd
[i
].icu_data
, -1, ord
[j
].icu_data
,-1);
434 int icu_strcoll_len(int32_t i
, int32_t j
){
435 return ucol_strcoll(col
, rnd
[i
].icu_data
, rnd
[i
].icu_data_len
, ord
[j
].icu_data
, ord
[j
].icu_data_len
);
438 int icu_cmpkey(int32_t i
, int32_t j
) {
439 return strcmp( (char *) rnd
[i
].icu_key
, (char *) ord
[j
].icu_key
);
442 int win_cmp_null(int32_t i
, int32_t j
) {
443 int t
= CompareStringW(win_langid
, 0, rnd
[i
].win_data
, -1, ord
[j
].win_data
, -1);
445 fprintf(stderr
, "CompareStringW error, error number %x\n", GetLastError());
448 return t
- CSTR_EQUAL
;
452 int win_cmp_len(int32_t i
, int32_t j
) {
453 int t
= CompareStringW(win_langid
, 0, rnd
[i
].win_data
, rnd
[i
].win_data_len
, ord
[j
].win_data
, ord
[j
].win_data_len
);
455 fprintf(stderr
, "CompareStringW error, error number %x\n", GetLastError());
458 return t
- CSTR_EQUAL
;
462 #define BFUNC(name, func, data) \
463 int name(int32_t i, int32_t j) { \
464 return func(rnd[i].data, ord[j].data); \
467 BFUNC(posix_strcoll_null
, strcoll
, posix_data
)
468 BFUNC(posix_cmpkey
, strcmp
, posix_key
)
469 BFUNC(win_cmpkey
, strcmp
, win_key
)
470 BFUNC(win_wcscmp
, wcscmp
, win_data
)
471 BFUNC(icu_strcmp
, u_strcmp
, icu_data
)
472 BFUNC(icu_cmpcpo
, u_strcmpCodePointOrder
, icu_data
)
475 class CollPerfTest
: public UPerfTest
{
480 UChar
* icu_data_all
;
481 int32_t icu_data_all_len
;
486 CA_char
* posix_data
;
488 CA_win_wchar
* win_data
;
491 DataIndex
* rnd_index
; // random by icu key
492 DataIndex
* ord_win_data
;
493 DataIndex
* ord_win_key
;
494 DataIndex
* ord_posix_data
;
495 DataIndex
* ord_posix_key
;
496 DataIndex
* ord_icu_data
;
497 DataIndex
* ord_icu_key
;
498 DataIndex
* ord_win_wcscmp
;
499 DataIndex
* ord_icu_strcmp
;
500 DataIndex
* ord_icu_cmpcpo
;
502 virtual ~CollPerfTest(){
504 delete [] icu_data_all
;
512 delete[] ord_win_data
;
513 delete[] ord_win_key
;
514 delete[] ord_posix_data
;
515 delete[] ord_posix_key
;
516 delete[] ord_icu_data
;
517 delete[] ord_icu_key
;
518 delete[] ord_win_wcscmp
;
519 delete[] ord_icu_strcmp
;
520 delete[] ord_icu_cmpcpo
;
523 CollPerfTest(int32_t argc
, const char* argv
[], UErrorCode
& status
):UPerfTest(argc
, argv
, status
){
536 ord_posix_data
= NULL
;
540 ord_win_wcscmp
= NULL
;
541 ord_icu_strcmp
= NULL
;
542 ord_icu_cmpcpo
= NULL
;
544 if (U_FAILURE(status
)){
548 // Parse additional arguments
550 UOption options
[] = {
551 UOPTION_DEF("langid", 'i', UOPT_REQUIRES_ARG
), // Windows Language ID number.
552 UOPTION_DEF("rulefile", 'r', UOPT_REQUIRES_ARG
), // --rulefile <filename>
553 // Collation related arguments. All are optional.
554 // To simplify parsing, two choice arguments are disigned as NO_ARG.
555 // The default value is UPPER word in the comment
556 UOPTION_DEF("c_french", 'f', UOPT_NO_ARG
), // --french <on | OFF>
557 UOPTION_DEF("c_alternate", 'a', UOPT_NO_ARG
), // --alternate <NON_IGNORE | shifted>
558 UOPTION_DEF("c_casefirst", 'c', UOPT_REQUIRES_ARG
), // --casefirst <lower | upper | OFF>
559 UOPTION_DEF("c_caselevel", 'l', UOPT_NO_ARG
), // --caselevel <on | OFF>
560 UOPTION_DEF("c_normal", 'n', UOPT_NO_ARG
), // --normal <on | OFF>
561 UOPTION_DEF("c_strength", 's', UOPT_REQUIRES_ARG
), // --strength <1-5>
563 int32_t opt_len
= (sizeof(options
)/sizeof(options
[0]));
564 enum {i
, r
,f
,a
,c
,l
,n
,s
}; // The buffer between the option items' order and their references
566 _remainingArgc
= u_parseArgs(_remainingArgc
, (char**)argv
, opt_len
, options
);
568 if (_remainingArgc
< 0){
569 status
= U_ILLEGAL_ARGUMENT_ERROR
;
574 locale
= "en_US"; // set default locale
578 if (options
[i
].doesOccur
) {
580 int tmp
= strtol(options
[i
].value
, &endp
, 0);
581 if (endp
== options
[i
].value
) {
582 status
= U_ILLEGAL_ARGUMENT_ERROR
;
585 win_langid
= MAKELCID(tmp
, SORT_DEFAULT
);
587 win_langid
= uloc_getLCID(locale
);
591 // Set up an ICU collator
592 if (options
[r
].doesOccur
) {
593 // TODO: implement it
595 col
= ucol_open(locale
, &status
);
596 if (U_FAILURE(status
)) {
601 if (options
[f
].doesOccur
) {
602 ucol_setAttribute(col
, UCOL_FRENCH_COLLATION
, UCOL_ON
, &status
);
604 ucol_setAttribute(col
, UCOL_FRENCH_COLLATION
, UCOL_OFF
, &status
);
607 if (options
[a
].doesOccur
) {
608 ucol_setAttribute(col
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
, &status
);
611 if (options
[c
].doesOccur
) { // strcmp() has i18n encoding problem
612 if (strcmp("lower", options
[c
].value
) == 0){
613 ucol_setAttribute(col
, UCOL_CASE_FIRST
, UCOL_LOWER_FIRST
, &status
);
614 } else if (strcmp("upper", options
[c
].value
) == 0) {
615 ucol_setAttribute(col
, UCOL_CASE_FIRST
, UCOL_UPPER_FIRST
, &status
);
617 status
= U_ILLEGAL_ARGUMENT_ERROR
;
622 if (options
[l
].doesOccur
){
623 ucol_setAttribute(col
, UCOL_CASE_LEVEL
, UCOL_ON
, &status
);
626 if (options
[n
].doesOccur
){
627 ucol_setAttribute(col
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
630 if (options
[s
].doesOccur
) {
632 int tmp
= strtol(options
[l
].value
, &endp
, 0);
633 if (endp
== options
[l
].value
) {
634 status
= U_ILLEGAL_ARGUMENT_ERROR
;
638 case 1: ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
); break;
639 case 2: ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_SECONDARY
, &status
); break;
640 case 3: ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_TERTIARY
, &status
); break;
641 case 4: ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_QUATERNARY
, &status
); break;
642 case 5: ucol_setAttribute(col
, UCOL_STRENGTH
, UCOL_IDENTICAL
, &status
); break;
643 default: status
= U_ILLEGAL_ARGUMENT_ERROR
; return;
649 //to avoid use the annoying 'id' in TESTCASE(id,test) macro or the like
650 #define TEST(testname, classname, arg1, arg2, arg3, arg4, arg5, arg6) \
654 UErrorCode status = U_ZERO_ERROR;\
655 UPerfFunction * t = new classname(status,arg1, arg2, arg3, arg4, arg5, arg6);\
656 if (U_FAILURE(status)) {\
669 virtual UPerfFunction* runIndexedTest( /*[in]*/int32_t index, /*[in]*/UBool exec, /*[out]*/const char* &name, /*[in]*/ char* par = NULL ){
672 #define TEST_KEYGEN(testname, func)\
673 TEST(testname, CmdKeyGen, col, win_langid, count, rnd_index, CmdKeyGen::func, 0)
674 TEST_KEYGEN(TestIcu_KeyGen_null
, icu_key_null
);
675 TEST_KEYGEN(TestIcu_KeyGen_len
, icu_key_len
);
676 TEST_KEYGEN(TestPosix_KeyGen_null
, posix_key_null
);
677 TEST_KEYGEN(TestWin_KeyGen_null
, win_key_null
);
678 TEST_KEYGEN(TestWin_KeyGen_len
, win_key_len
);
680 #define TEST_ITER(testname, func)\
681 TEST(testname, CmdIter, col, count, icu_data, CmdIter::func,0,0)
682 TEST_ITER(TestIcu_ForwardIter_null
, icu_forward_null
);
683 TEST_ITER(TestIcu_ForwardIter_len
, icu_forward_len
);
684 TEST_ITER(TestIcu_BackwardIter_null
, icu_backward_null
);
685 TEST_ITER(TestIcu_BackwardIter_len
, icu_backward_len
);
687 #define TEST_ITER_ALL(testname, func)\
688 TEST(testname, CmdIterAll, col, icu_data_all_len, icu_data_all, CmdIterAll::func,0,0)
689 TEST_ITER_ALL(TestIcu_ForwardIter_all_null
, forward_null
);
690 TEST_ITER_ALL(TestIcu_ForwardIter_all_len
, forward_len
);
691 TEST_ITER_ALL(TestIcu_BackwardIter_all_null
, backward_null
);
692 TEST_ITER_ALL(TestIcu_BackwardIter_all_len
, backward_len
);
694 #define TEST_QSORT(testname, func)\
695 TEST(testname, CmdQsort, rnd_index, count, sizeof(DataIndex), CmdQsort::func,0,0)
696 TEST_QSORT(TestIcu_qsort_strcoll_null
, icu_strcoll_null
);
697 TEST_QSORT(TestIcu_qsort_strcoll_len
, icu_strcoll_len
);
698 TEST_QSORT(TestIcu_qsort_usekey
, icu_cmpkey
);
699 TEST_QSORT(TestPosix_qsort_strcoll_null
, posix_strcoll_null
);
700 TEST_QSORT(TestPosix_qsort_usekey
, posix_cmpkey
);
701 TEST_QSORT(TestWin_qsort_CompareStringW_null
, win_cmp_null
);
702 TEST_QSORT(TestWin_qsort_CompareStringW_len
, win_cmp_len
);
703 TEST_QSORT(TestWin_qsort_usekey
, win_cmpkey
);
705 #define TEST_BIN(testname, func)\
706 TEST(testname, CmdBinSearch, col, win_langid, count, rnd_index, ord_icu_key,CmdBinSearch::func)
707 TEST_BIN(TestIcu_BinarySearch_strcoll_null
, icu_strcoll_null
);
708 TEST_BIN(TestIcu_BinarySearch_strcoll_len
, icu_strcoll_len
);
709 TEST_BIN(TestIcu_BinarySearch_usekey
, icu_cmpkey
);
710 TEST_BIN(TestIcu_BinarySearch_strcmp
, icu_strcmp
);
711 TEST_BIN(TestIcu_BinarySearch_cmpCPO
, icu_cmpcpo
);
712 TEST_BIN(TestPosix_BinarySearch_strcoll_null
, posix_strcoll_null
);
713 TEST_BIN(TestPosix_BinarySearch_usekey
, posix_cmpkey
);
714 TEST_BIN(TestWin_BinarySearch_CompareStringW_null
, win_cmp_null
);
715 TEST_BIN(TestWin_BinarySearch_CompareStringW_len
, win_cmp_len
);
716 TEST_BIN(TestWin_BinarySearch_usekey
, win_cmpkey
);
717 TEST_BIN(TestWin_BinarySearch_wcscmp
, win_wcscmp
);
725 void prepareData(UErrorCode
& status
){
726 if(U_FAILURE(status
)) return;
727 if (icu_data
) return; // prepared
729 icu_data
= new CA_uchar();
731 // Following code is borrowed from UPerfTest::getLines();
732 const UChar
* line
=NULL
;
735 line
= ucbuf_readline(ucharBuf
,&len
,&status
);
736 if(line
== NULL
|| U_FAILURE(status
)){break;}
738 // Refer to the source code of ucbuf_readline()
739 // 1. 'len' includs the line terminal symbols
740 // 2. The length of the line terminal symbols is only one character
741 // 3. The Windows CR LF line terminal symbols will be converted to CR
744 continue; //skip empty line
746 icu_data
->append_one(len
);
747 memcpy(icu_data
->last(), line
, len
* sizeof(UChar
));
748 icu_data
->last()[len
-1] = NULL
;
751 if(U_FAILURE(status
)) return;
753 // UTF-16 -> UTF-8 conversion.
754 UConverter
*conv
= ucnv_open("utf-8", &status
); // just UTF-8 for now.
755 if (U_FAILURE(status
)) return;
757 count
= icu_data
->count
;
759 icu_data_all_len
= icu_data
->index
[count
]; // includes all NULLs
760 icu_data_all_len
-= count
; // excludes all NULLs
761 icu_data_all_len
+= 1; // the terminal NULL
762 icu_data_all
= new UChar
[icu_data_all_len
];
763 icu_data_all
[icu_data_all_len
- 1] = 0; //the terminal NULL
765 icu_key
= new CA_uint8
;
766 win_data
= new CA_win_wchar
;
767 win_key
= new CA_char
;
768 posix_data
= new CA_char
;
769 posix_key
= new CA_char
;
770 rnd_index
= new DataIndex
[count
];
771 DataIndex::win_langid
= win_langid
;
772 DataIndex::col
= col
;
775 UChar
* p
= icu_data_all
;
778 for (int i
=0; i
< count
; i
++) {
780 s
= sizeof(UChar
) * icu_data
->lengthOf(i
);
781 memcpy(p
, icu_data
->dataOf(i
), s
);
782 p
+= icu_data
->lengthOf(i
);
787 s
= ucol_getSortKey(col
, icu_data
->dataOf(i
), -1,NULL
, 0);
788 icu_key
->append_one(s
);
789 t
= ucol_getSortKey(col
, icu_data
->dataOf(i
), -1,icu_key
->last(), s
);
790 if (t
!= s
) {status
= U_INVALID_FORMAT_ERROR
;return;}
793 s
= ucnv_fromUChars(conv
,NULL
, 0, icu_data
->dataOf(i
), icu_data
->lengthOf(i
), &status
);
794 if (status
== U_BUFFER_OVERFLOW_ERROR
|| status
== U_ZERO_ERROR
){
795 status
= U_ZERO_ERROR
;
799 posix_data
->append_one(s
+ 1); // plus terminal NULL
800 t
= ucnv_fromUChars(conv
,posix_data
->last(), s
, icu_data
->dataOf(i
), icu_data
->lengthOf(i
), &status
);
801 if (U_FAILURE(status
)) return;
802 if ( t
!= s
){status
= U_INVALID_FORMAT_ERROR
;return;}
803 posix_data
->last()[s
] = 0;
806 s
= strxfrm(NULL
, posix_data
->dataOf(i
), 0);
807 if (s
== INT_MAX
){status
= U_INVALID_FORMAT_ERROR
;return;}
808 posix_key
->append_one(s
);
809 t
= strxfrm(posix_key
->last(), posix_data
->dataOf(i
), s
);
810 if (t
!= s
) {status
= U_INVALID_FORMAT_ERROR
;return;}
813 s
= icu_data
->lengthOf(i
) + 1; // plus terminal NULL
814 win_data
->append_one(s
);
815 memcpy(win_data
->last(), icu_data
->dataOf(i
), sizeof(WCHAR
) * s
);
818 s
= LCMapStringW(win_langid
, LCMAP_SORTKEY
, win_data
->dataOf(i
), win_data
->lengthOf(i
), NULL
,0);
819 if (s
== 0) {status
= U_INVALID_FORMAT_ERROR
;return;}
820 win_key
->append_one(s
);
821 t
= LCMapStringW(win_langid
, LCMAP_SORTKEY
, win_data
->dataOf(i
), win_data
->lengthOf(i
), (WCHAR
*)(win_key
->last()),s
);
822 if (t
!= s
) {status
= U_INVALID_FORMAT_ERROR
;return;}
826 // append_one() will make points shifting, should not merge following code into previous iteration
827 for (int i
=0; i
< count
; i
++) {
828 rnd_index
[i
].icu_key
= icu_key
->dataOf(i
);
829 rnd_index
[i
].icu_data
= icu_data
->dataOf(i
);
830 rnd_index
[i
].icu_data_len
= icu_data
->lengthOf(i
);
831 rnd_index
[i
].posix_key
= posix_key
->last();
832 rnd_index
[i
].posix_data
= posix_data
->dataOf(i
);
833 rnd_index
[i
].posix_data_len
= posix_data
->lengthOf(i
);
834 rnd_index
[i
].win_key
= win_key
->dataOf(i
);
835 rnd_index
[i
].win_data
= win_data
->dataOf(i
);
836 rnd_index
[i
].win_data_len
= win_data
->lengthOf(i
);
840 qsort(rnd_index
, count
, sizeof(DataIndex
), CmdQsort::q_random
);
842 #define SORT(data, func) \
843 data = new DataIndex[count];\
844 memcpy(data, rnd_index, count * sizeof(DataIndex));\
845 qsort(data, count, sizeof(DataIndex), CmdQsort::func)
847 SORT(ord_icu_data
, icu_strcoll_len
);
848 SORT(ord_icu_key
, icu_cmpkey
);
849 SORT(ord_posix_data
, posix_strcoll_null
);
850 SORT(ord_posix_key
, posix_cmpkey
);
851 SORT(ord_win_data
, win_cmp_len
);
852 SORT(ord_win_key
, win_cmpkey
);
853 SORT(ord_win_wcscmp
, win_wcscmp
);
854 SORT(ord_icu_strcmp
, icu_strcmp
);
855 SORT(ord_icu_cmpcpo
, icu_cmpcpo
);
860 int main(int argc
, const char *argv
[])
863 UErrorCode status
= U_ZERO_ERROR
;
864 CollPerfTest
test(argc
, argv
, status
);
866 if (U_FAILURE(status
)){
867 printf("The error is %s\n", u_errorName(status
));
868 //TODO: print usage here
872 if (test
.run() == FALSE
){
873 fprintf(stderr
, "FAILED: Tests could not be run please check the "