diff --git a/icuSources/test/collperf/collperf.cpp b/icuSources/test/collperf/collperf.cpp
new file mode 100644
index 00000000..bd916c5c
--- /dev/null
+++ b/icuSources/test/collperf/collperf.cpp
@@ -0,0 +1,1749 @@
+ * Copyright (C) 2001 IBM, Inc. All Rights Reserved.
+ *
+ ********************************************************************/
+* Modification History:
+* Name Description
+* Andy Heninger First Version
+// This program tests string collation and sort key generation performance.
+// Three APIs can be teste: ICU C , Unix strcoll, strxfrm and Windows LCMapString
+// A file of names is required as input, one per line. It must be in utf-8 or utf-16 format,
+// and include a byte order mark. Either LE or BE format is OK.
+const char gUsageString[] =
+ "usage: collperf options...\n"
+ "-help Display this message.\n"
+ "-file file_name utf-16 format file of names.\n"
+ "-locale name ICU locale to use. Default is en_US\n"
+ "-rules file_name Collation rules file (overrides locale)\n"
+ "-langid 0x1234 Windows Language ID number. Default to value for -locale option\n"
+ " see\n"
+ "-win Run test using Windows native services. (ICU is default)\n"
+ "-unix Run test using Unix strxfrm, strcoll services.\n"
+ "-uselen Use API with string lengths. Default is null-terminated strings\n"
+ "-usekeys Run tests using sortkeys rather than strcoll\n"
+ "-strcmp Run tests using u_strcmp rather than strcoll\n"
+ "-strcmpCPO Run tests using u_strcmpCodePointOrder rather than strcoll\n"
+ "-loop nnnn Loopcount for test. Adjust for reasonable total running time.\n"
+ "-iloop n Inner Loop Count. Default = 1. Number of calls to function\n"
+ " under test at each call point. For measuring test overhead.\n"
+ "-terse Terse numbers-only output. Intended for use by scripts.\n"
+ "-french French accent ordering\n"
+ "-frenchoff No French accent ordering (for use with French locales.)\n"
+ "-norm Normalizing mode on\n"
+ "-shifted Shifted mode\n"
+ "-lower Lower case first\n"
+ "-upper Upper case first\n"
+ "-case Enable separate case level\n"
+ "-level n Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n"
+ "-keyhist Produce a table sort key size vs. string length\n"
+ "-binsearch Binary Search timing test\n"
+ "-keygen Sort Key Generation timing test\n"
+ "-qsort Quicksort timing test\n"
+ "-iter Iteration Performance Test\n"
+ "-dump Display strings, sort keys and CEs.\n"
+ ;
+#ifdef WIN32
+// Stubs for Windows API functions when building on UNIXes.
+typedef int DWORD;
+inline int CompareStringW(DWORD, DWORD, UChar *, int, UChar *, int) {return 0;};
+unsigned long timeGetTime() {
+ struct timeval t;
+ gettimeofday(&t, 0);
+ unsigned long val = t.tv_sec * 1000; // Let it overflow. Who cares.
+ val += t.tv_usec / 1000;
+ return val;
+inline int LCMapStringW(DWORD, DWORD, UChar *, int, UChar *, int) {return 0;};
+const int LCMAP_SORTKEY = 0;
+#define MAKELCID(a,b) 0
+const int SORT_DEFAULT = 0;
+// Command line option variables
+// These global variables are set according to the options specified
+// on the command line by the user.
+char * opt_fName = 0;
+char * opt_locale = "en_US";
+int opt_langid = 0; // Defaults to value corresponding to opt_locale.
+char * opt_rules = 0;
+UBool opt_help = FALSE;
+int opt_loopCount = 1;
+int opt_iLoopCount = 1;
+UBool opt_terse = FALSE;
+UBool opt_qsort = FALSE;
+UBool opt_binsearch = FALSE;
+UBool opt_icu = TRUE;
+UBool opt_win = FALSE; // Run with Windows native functions.
+UBool opt_unix = FALSE; // Run with UNIX strcoll, strxfrm functions.
+UBool opt_uselen = FALSE;
+UBool opt_usekeys = FALSE;
+UBool opt_strcmp = FALSE;
+UBool opt_strcmpCPO = FALSE;
+UBool opt_norm = FALSE;
+UBool opt_keygen = FALSE;
+UBool opt_french = FALSE;
+UBool opt_frenchoff = FALSE;
+UBool opt_shifted = FALSE;
+UBool opt_lower = FALSE;
+UBool opt_upper = FALSE;
+UBool opt_case = FALSE;
+int opt_level = 0;
+UBool opt_keyhist = FALSE;
+UBool opt_itertest = FALSE;
+UBool opt_dump = FALSE;
+// Definitions for the command line options
+struct OptSpec {
+ const char *name;
+ enum {FLAG, NUM, STRING} type;
+ void *pVar;
+OptSpec opts[] = {
+ {"-file", OptSpec::STRING, &opt_fName},
+ {"-locale", OptSpec::STRING, &opt_locale},
+ {"-langid", OptSpec::NUM, &opt_langid},
+ {"-rules", OptSpec::STRING, &opt_rules},
+ {"-qsort", OptSpec::FLAG, &opt_qsort},
+ {"-binsearch", OptSpec::FLAG, &opt_binsearch},
+ {"-iter", OptSpec::FLAG, &opt_itertest},
+ {"-win", OptSpec::FLAG, &opt_win},
+ {"-unix", OptSpec::FLAG, &opt_unix},
+ {"-uselen", OptSpec::FLAG, &opt_uselen},
+ {"-usekeys", OptSpec::FLAG, &opt_usekeys},
+ {"-strcmp", OptSpec::FLAG, &opt_strcmp},
+ {"-strcmpCPO", OptSpec::FLAG, &opt_strcmpCPO},
+ {"-norm", OptSpec::FLAG, &opt_norm},
+ {"-french", OptSpec::FLAG, &opt_french},
+ {"-frenchoff", OptSpec::FLAG, &opt_frenchoff},
+ {"-shifted", OptSpec::FLAG, &opt_shifted},
+ {"-lower", OptSpec::FLAG, &opt_lower},
+ {"-upper", OptSpec::FLAG, &opt_upper},
+ {"-case", OptSpec::FLAG, &opt_case},
+ {"-level", OptSpec::NUM, &opt_level},
+ {"-keyhist", OptSpec::FLAG, &opt_keyhist},
+ {"-keygen", OptSpec::FLAG, &opt_keygen},
+ {"-loop", OptSpec::NUM, &opt_loopCount},
+ {"-iloop", OptSpec::NUM, &opt_iLoopCount},
+ {"-terse", OptSpec::FLAG, &opt_terse},
+ {"-dump", OptSpec::FLAG, &opt_dump},
+ {"-help", OptSpec::FLAG, &opt_help},
+ {"-?", OptSpec::FLAG, &opt_help},
+ {0, OptSpec::FLAG, 0}
+// Global variables pointing to and describing the test file
+// struct Line
+// Each line from the source file (containing a name, presumably) gets
+// one of these structs.
+struct Line {
+ UChar *name;
+ int len;
+ char *winSortKey;
+ char *icuSortKey;
+ char *unixSortKey;
+ char *unixName;
+Line *gFileLines; // Ptr to array of Line structs, one per line in the file.
+int gNumFileLines;
+UCollator *gCol;
+Line **gSortedLines;
+Line **gRandomLines;
+int gCount;
+// ProcessOptions() Function to read the command line options.
+UBool ProcessOptions(int argc, const char **argv, OptSpec opts[])
+ int i;
+ int argNum;
+ const char *pArgName;
+ OptSpec *pOpt;
+ for (argNum=1; argNumname != 0; pOpt++) {
+ if (strcmp(pOpt->name, pArgName) == 0) {
+ switch (pOpt->type) {
+ case OptSpec::FLAG:
+ *(UBool *)(pOpt->pVar) = TRUE;
+ break;
+ case OptSpec::STRING:
+ argNum ++;
+ if (argNum >= argc) {
+ fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name);
+ return FALSE;
+ }
+ *(const char **)(pOpt->pVar) = argv[argNum];
+ break;
+ case OptSpec::NUM:
+ argNum ++;
+ if (argNum >= argc) {
+ fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name);
+ return FALSE;
+ }
+ char *endp;
+ i = strtol(argv[argNum], &endp, 0);
+ if (endp == argv[argNum]) {
+ fprintf(stderr, "integer value expected for \"%s\" option.\n", pOpt->name);
+ return FALSE;
+ }
+ *(int *)(pOpt->pVar) = i;
+ }
+ break;
+ }
+ }
+ if (pOpt->name == 0)
+ {
+ fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);
+ return FALSE;
+ }
+ }
+return TRUE;
+// Comparison functions for use by qsort.
+// Six flavors, ICU or Windows, SortKey or String Compare, Strings with length
+// or null terminated.
+int ICUstrcmpK(const void *a, const void *b) {
+ gCount++;
+ int t = strcmp((*(Line **)a)->icuSortKey, (*(Line **)b)->icuSortKey);
+ return t;
+int ICUstrcmpL(const void *a, const void *b) {
+ gCount++;
+ UCollationResult t;
+ t = ucol_strcoll(gCol, (*(Line **)a)->name, (*(Line **)a)->len, (*(Line **)b)->name, (*(Line **)b)->len);
+ if (t == UCOL_LESS) return -1;
+ if (t == UCOL_GREATER) return +1;
+ return 0;
+int ICUstrcmp(const void *a, const void *b) {
+ gCount++;
+ UCollationResult t;
+ t = ucol_strcoll(gCol, (*(Line **)a)->name, -1, (*(Line **)b)->name, -1);
+ if (t == UCOL_LESS) return -1;
+ if (t == UCOL_GREATER) return +1;
+ return 0;
+int Winstrcmp(const void *a, const void *b) {
+ gCount++;
+ int t;
+ t = CompareStringW(gWinLCID, 0, (*(Line **)a)->name, -1, (*(Line **)b)->name, -1);
+ return t-2;
+int UNIXstrcmp(const void *a, const void *b) {
+ gCount++;
+ int t;
+ t = strcoll((*(Line **)a)->unixName, (*(Line **)b)->unixName);
+ return t;
+int WinstrcmpL(const void *a, const void *b) {
+ gCount++;
+ int t;
+ t = CompareStringW(gWinLCID, 0, (*(Line **)a)->name, (*(Line **)a)->len, (*(Line **)b)->name, (*(Line **)b)->len);
+ return t-2;
+int WinstrcmpK(const void *a, const void *b) {
+ gCount++;
+ int t = strcmp((*(Line **)a)->winSortKey, (*(Line **)b)->winSortKey);
+ return t;
+// Function for sorting the names (lines) into a random order.
+// Order is based on a hash of the ICU Sort key for the lines
+// The randomized order is used as input for the sorting timing tests.
+int ICURandomCmp(const void *a, const void *b) {
+ char *ask = (*(Line **)a)->icuSortKey;
+ char *bsk = (*(Line **)b)->icuSortKey;
+ int aVal = 0;
+ int bVal = 0;
+ int retVal;
+ while (*ask != 0) {
+ aVal += aVal*37 + *ask++;
+ }
+ while (*bsk != 0) {
+ bVal += bVal*37 + *bsk++;
+ }
+ retVal = -1;
+ if (aVal == bVal) {
+ retVal = 0;
+ }
+ else if (aVal > bVal) {
+ retVal = 1;
+ }
+ return retVal;
+// doKeyGen() Key Generation Timing Test
+void doKeyGen()
+ int line;
+ int loops;
+ int iLoop;
+ int t;
+ int len=-1;
+ // Adjust loop count to compensate for file size. Should be order n
+ double dLoopCount = double(opt_loopCount) * (1000. / double(gNumFileLines));
+ int adj_loopCount = int(dLoopCount);
+ if (adj_loopCount < 1) adj_loopCount = 1;
+ unsigned long startTime = timeGetTime();
+ if (opt_win) {
+ for (loops=0; loopsname, (gSortedLines[guess])->name);
+ }
+ gCount++;
+ if (r== 0)
+ break;
+ if (r < 0)
+ hi = guess;
+ else
+ lo = guess;
+ }
+ }
+ }
+ elapsedTime = timeGetTime() - startTime;
+ break;
+ }
+ if (opt_icu)
+ {
+ unsigned long startTime = timeGetTime();
+ UCollationResult r;
+ for (loops=0; loopslen;
+ }
+ int hi = gNumFileLines-1;
+ int lo = 0;
+ int guess = -1;
+ for (;;) {
+ int newGuess = (hi + lo) / 2;
+ if (newGuess == guess)
+ break;
+ guess = newGuess;
+ int ri;
+ if (opt_usekeys) {
+ for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
+ ri = strcmp((gSortedLines[line])->icuSortKey, (gSortedLines[guess])->icuSortKey);
+ }
+ gCount++;
+ r=UCOL_GREATER; if(ri<0) {r=UCOL_LESS;} else if (ri==0) {r=UCOL_EQUAL;}
+ }
+ else
+ {
+ if (opt_uselen) {
+ guessLen = (gSortedLines[guess])->len;
+ }
+ for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
+ r = ucol_strcoll(gCol, (gSortedLines[line])->name, lineLen, (gSortedLines[guess])->name, guessLen);
+ }
+ gCount++;
+ }
+ if (r== UCOL_EQUAL)
+ break;
+ if (r == UCOL_LESS)
+ hi = guess;
+ else
+ lo = guess;
+ }
+ }
+ }
+ elapsedTime = timeGetTime() - startTime;
+ break;
+ }
+ if (opt_win)
+ {
+ unsigned long startTime = timeGetTime();
+ int r;
+ for (loops=0; loopslen;
+ }
+ int hi = gNumFileLines-1;
+ int lo = 0;
+ int guess = -1;
+ for (;;) {
+ int newGuess = (hi + lo) / 2;
+ if (newGuess == guess)
+ break;
+ guess = newGuess;
+ if (opt_usekeys) {
+ for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
+ r = strcmp((gSortedLines[line])->winSortKey, (gSortedLines[guess])->winSortKey);
+ }
+ gCount++;
+ r+=2;
+ }
+ else
+ {
+ if (opt_uselen) {
+ guessLen = (gSortedLines[guess])->len;
+ }
+ for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
+ r = CompareStringW(gWinLCID, 0, (gSortedLines[line])->name, lineLen, (gSortedLines[guess])->name, guessLen);
+ }
+ if (r == 0) {
+ if (opt_terse == FALSE) {
+ fprintf(stderr, "Error returned from Windows CompareStringW.\n");
+ }
+ exit(-1);
+ }
+ gCount++;
+ }
+ if (r== 2) // strings ==
+ break;
+ if (r == 1) // line < guess
+ hi = guess;
+ else // line > guess
+ lo = guess;
+ }
+ }
+ }
+ elapsedTime = timeGetTime() - startTime;
+ break;
+ }
+ if (opt_unix)
+ {
+ unsigned long startTime = timeGetTime();
+ int r;
+ for (loops=0; loopsunixSortKey, (gSortedLines[guess])->unixSortKey);
+ }
+ gCount++;
+ }
+ else
+ {
+ for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
+ r = strcoll((gSortedLines[line])->unixName, (gSortedLines[guess])->unixName);
+ }
+ errno = 0;
+ if (errno != 0) {
+ fprintf(stderr, "Error %d returned from strcoll.\n", errno);
+ exit(-1);
+ }
+ gCount++;
+ }
+ if (r == 0) // strings ==
+ break;
+ if (r < 0) // line < guess
+ hi = guess;
+ else // line > guess
+ lo = guess;
+ }
+ }
+ }
+ elapsedTime = timeGetTime() - startTime;
+ break;
+ }
+ break;
+ }
+ int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount);
+ if (opt_terse == FALSE) {
+ printf("binary search: total # of string compares = %d\n", gCount);
+ printf("binary search: compares per loop = %d\n", gCount / loops);
+ printf("binary search: time per compare = %d ns\n", ns);
+ } else {
+ printf("%d, ", ns);
+ }
+// doQSort() The quick sort timing test. Uses the C library qsort function.
+void doQSort() {
+ int i;
+ Line **sortBuf = new Line *[gNumFileLines];
+ // Adjust loop count to compensate for file size. QSort should be n log(n)
+ double dLoopCount = double(opt_loopCount) * 3000. / (log10(gNumFileLines) * double(gNumFileLines));
+ if (opt_usekeys) dLoopCount *= 5;
+ int adj_loopCount = int(dLoopCount);
+ if (adj_loopCount < 1) adj_loopCount = 1;
+ gCount = 0;
+ unsigned long startTime = timeGetTime();
+ if (opt_win && opt_usekeys) {
+ for (i=0; i maxLen) maxLen = gFileLines[i].len;
+ }
+ // Allocate arrays to hold the histogram data
+ int *accumulatedLen = new int[maxLen+1];
+ int *numKeysOfSize = new int[maxLen+1];
+ for (i=0; i<=maxLen; i++) {
+ accumulatedLen[i] = 0;
+ numKeysOfSize[i] = 0;
+ }
+ // Fill the arrays...
+ for (i=0; i 0) {
+ printf("%d, %f, %f\n", i, (float)accumulatedLen[i] / (float)numKeysOfSize[i],
+ (float)accumulatedLen[i] / (float)(numKeysOfSize[i] * i));
+ }
+ }
+// doForwardIterTest(UBool) Forward iteration test
+// argument null-terminated string used
+void doForwardIterTest(UBool haslen) {
+ int count = 0;
+ UErrorCode error = U_ZERO_ERROR;
+ printf("\n\nPerforming forward iteration performance test with ");
+ if (haslen) {
+ printf("non-null terminated data -----------\n");
+ }
+ else {
+ printf("null terminated data -----------\n");
+ }
+ printf("performance test on strings from file -----------\n");
+ UChar dummytext[] = {0, 0};
+ UCollationElements *iter = ucol_openElements(gCol, NULL, 0, &error);
+ ucol_setText(iter, dummytext, 1, &error);
+ gCount = 0;
+ unsigned long startTime = timeGetTime();
+ while (count < opt_loopCount) {
+ int linecount = 0;
+ while (linecount < gNumFileLines) {
+ UChar *str = gFileLines[linecount].name;
+ int strlen = haslen?gFileLines[linecount].len:-1;
+ ucol_setText(iter, str, strlen, &error);
+ while (ucol_next(iter, &error) != UCOL_NULLORDER) {
+ gCount++;
+ }
+ linecount ++;
+ }
+ count ++;
+ }
+ unsigned long elapsedTime = timeGetTime() - startTime;
+ printf("elapsedTime %d\n", elapsedTime);
+ // empty loop recalculation
+ count = 0;
+ startTime = timeGetTime();
+ while (count < opt_loopCount) {
+ int linecount = 0;
+ while (linecount < gNumFileLines) {
+ UChar *str = gFileLines[linecount].name;
+ int strlen = haslen?gFileLines[linecount].len:-1;
+ ucol_setText(iter, str, strlen, &error);
+ linecount ++;
+ }
+ count ++;
+ }
+ elapsedTime -= (timeGetTime() - startTime);
+ printf("elapsedTime %d\n", elapsedTime);
+ ucol_closeElements(iter);
+ int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount);
+ printf("Total number of strings compared %d in %d loops\n", gNumFileLines,
+ opt_loopCount);
+ printf("Average time per ucol_next() nano seconds %d\n", ns);
+ printf("performance test on skipped-5 concatenated strings from file -----------\n");
+ UChar *str;
+ int strlen = 0;
+ // appending all the strings
+ int linecount = 0;
+ while (linecount < gNumFileLines) {
+ strlen += haslen?gFileLines[linecount].len:
+ u_strlen(gFileLines[linecount].name);
+ linecount ++;
+ }
+ str = (UChar *)malloc(sizeof(UChar) * strlen);
+ int strindex = 0;
+ linecount = 0;
+ while (strindex < strlen) {
+ int len = 0;
+ len += haslen?gFileLines[linecount].len:
+ u_strlen(gFileLines[linecount].name);
+ memcpy(str + strindex, gFileLines[linecount].name,
+ sizeof(UChar) * len);
+ strindex += len;
+ linecount ++;
+ }
+ printf("Total size of strings %d\n", strlen);
+ gCount = 0;
+ count = 0;
+ if (!haslen) {
+ strlen = -1;
+ }
+ iter = ucol_openElements(gCol, str, strlen, &error);
+ if (!haslen) {
+ strlen = u_strlen(str);
+ }
+ strlen -= 5; // any left over characters are not iterated,
+ // this is to ensure the backwards and forwards iterators
+ // gets the same position
+ startTime = timeGetTime();
+ while (count < opt_loopCount) {
+ int count5 = 5;
+ strindex = 0;
+ ucol_setOffset(iter, strindex, &error);
+ while (TRUE) {
+ if (ucol_next(iter, &error) == UCOL_NULLORDER) {
+ break;
+ }
+ gCount++;
+ count5 --;
+ if (count5 == 0) {
+ strindex += 10;
+ if (strindex > strlen) {
+ break;
+ }
+ ucol_setOffset(iter, strindex, &error);
+ count5 = 5;
+ }
+ }
+ count ++;
+ }
+ elapsedTime = timeGetTime() - startTime;
+ printf("elapsedTime %d\n", elapsedTime);
+ // empty loop recalculation
+ int tempgCount = 0;
+ count = 0;
+ startTime = timeGetTime();
+ while (count < opt_loopCount) {
+ int count5 = 5;
+ strindex = 0;
+ ucol_setOffset(iter, strindex, &error);
+ while (TRUE) {
+ tempgCount ++;
+ count5 --;
+ if (count5 == 0) {
+ strindex += 10;
+ if (strindex > strlen) {
+ break;
+ }
+ ucol_setOffset(iter, strindex, &error);
+ count5 = 5;
+ }
+ }
+ count ++;
+ }
+ elapsedTime -= (timeGetTime() - startTime);
+ printf("elapsedTime %d\n", elapsedTime);
+ ucol_closeElements(iter);
+ printf("gCount %d\n", gCount);
+ ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount);
+ printf("Average time per ucol_next() nano seconds %d\n", ns);
+// doBackwardIterTest(UBool) Backwards iteration test
+// argument null-terminated string used
+void doBackwardIterTest(UBool haslen) {
+ int count = 0;
+ UErrorCode error = U_ZERO_ERROR;
+ printf("\n\nPerforming backward iteration performance test with ");
+ if (haslen) {
+ printf("non-null terminated data -----------\n");
+ }
+ else {
+ printf("null terminated data -----------\n");
+ }
+ printf("performance test on strings from file -----------\n");
+ UCollationElements *iter = ucol_openElements(gCol, NULL, 0, &error);
+ UChar dummytext[] = {0, 0};
+ ucol_setText(iter, dummytext, 1, &error);
+ gCount = 0;
+ unsigned long startTime = timeGetTime();
+ while (count < opt_loopCount) {
+ int linecount = 0;
+ while (linecount < gNumFileLines) {
+ UChar *str = gFileLines[linecount].name;
+ int strlen = haslen?gFileLines[linecount].len:-1;
+ ucol_setText(iter, str, strlen, &error);
+ while (ucol_previous(iter, &error) != UCOL_NULLORDER) {
+ gCount ++;
+ }
+ linecount ++;
+ }
+ count ++;
+ }
+ unsigned long elapsedTime = timeGetTime() - startTime;
+ printf("elapsedTime %d\n", elapsedTime);
+ // empty loop recalculation
+ count = 0;
+ startTime = timeGetTime();
+ while (count < opt_loopCount) {
+ int linecount = 0;
+ while (linecount < gNumFileLines) {
+ UChar *str = gFileLines[linecount].name;
+ int strlen = haslen?gFileLines[linecount].len:-1;
+ ucol_setText(iter, str, strlen, &error);
+ linecount ++;
+ }
+ count ++;
+ }
+ elapsedTime -= (timeGetTime() - startTime);
+ printf("elapsedTime %d\n", elapsedTime);
+ ucol_closeElements(iter);
+ int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount);
+ printf("Total number of strings compared %d in %d loops\n", gNumFileLines,
+ opt_loopCount);
+ printf("Average time per ucol_previous() nano seconds %d\n", ns);
+ printf("performance test on skipped-5 concatenated strings from file -----------\n");
+ UChar *str;
+ int strlen = 0;
+ // appending all the strings
+ int linecount = 0;
+ while (linecount < gNumFileLines) {
+ strlen += haslen?gFileLines[linecount].len:
+ u_strlen(gFileLines[linecount].name);
+ linecount ++;
+ }
+ str = (UChar *)malloc(sizeof(UChar) * strlen);
+ int strindex = 0;
+ linecount = 0;
+ while (strindex < strlen) {
+ int len = 0;
+ len += haslen?gFileLines[linecount].len:
+ u_strlen(gFileLines[linecount].name);
+ memcpy(str + strindex, gFileLines[linecount].name,
+ sizeof(UChar) * len);
+ strindex += len;
+ linecount ++;
+ }
+ printf("Total size of strings %d\n", strlen);
+ gCount = 0;
+ count = 0;
+ if (!haslen) {
+ strlen = -1;
+ }
+ iter = ucol_openElements(gCol, str, strlen, &error);
+ if (!haslen) {
+ strlen = u_strlen(str);
+ }
+ startTime = timeGetTime();
+ while (count < opt_loopCount) {
+ int count5 = 5;
+ strindex = 5;
+ ucol_setOffset(iter, strindex, &error);
+ while (TRUE) {
+ if (ucol_previous(iter, &error) == UCOL_NULLORDER) {
+ break;
+ }
+ gCount ++;
+ count5 --;
+ if (count5 == 0) {
+ strindex += 10;
+ if (strindex > strlen) {
+ break;
+ }
+ ucol_setOffset(iter, strindex, &error);
+ count5 = 5;
+ }
+ }
+ count ++;
+ }
+ elapsedTime = timeGetTime() - startTime;
+ printf("elapsedTime %d\n", elapsedTime);
+ // empty loop recalculation
+ count = 0;
+ int tempgCount = 0;
+ startTime = timeGetTime();
+ while (count < opt_loopCount) {
+ int count5 = 5;
+ strindex = 5;
+ ucol_setOffset(iter, strindex, &error);
+ while (TRUE) {
+ tempgCount ++;
+ count5 --;
+ if (count5 == 0) {
+ strindex += 10;
+ if (strindex > strlen) {
+ break;
+ }
+ ucol_setOffset(iter, strindex, &error);
+ count5 = 5;
+ }
+ }
+ count ++;
+ }
+ elapsedTime -= (timeGetTime() - startTime);
+ printf("elapsedTime %d\n", elapsedTime);
+ ucol_closeElements(iter);
+ printf("gCount %d\n", gCount);
+ ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount);
+ printf("Average time per ucol_previous() nano seconds %d\n", ns);
+// doIterTest() Iteration test
+void doIterTest() {
+ doForwardIterTest(opt_uselen);
+ doBackwardIterTest(opt_uselen);
+// UnixConvert -- Convert the lines of the file to the encoding for UNIX
+// Since it appears that Unicode support is going in the general
+// direction of the use of UTF-8 locales, that is the approach
+// that is used here.
+void UnixConvert() {
+ int line;
+ UConverter *cvrtr; // An ICU code page converter.
+ UErrorCode status = U_ZERO_ERROR;
+ cvrtr = ucnv_open("utf-8", &status); // we are just doing UTF-8 locales for now.
+ if (U_FAILURE(status)) {
+ fprintf(stderr, "ICU Converter open failed.: %d\n", &status);
+ exit(-1);
+ }
+ for (line=0; line < gNumFileLines; line++) {
+ int sizeNeeded = ucnv_fromUChars(cvrtr,
+ 0, // ptr to target buffer.
+ 0, // length of target buffer.
+ gFileLines[line].name,
+ -1, // source is null terminated
+ &status);
+ if (status != U_BUFFER_OVERFLOW_ERROR && status != U_ZERO_ERROR) {
+ fprintf(stderr, "Conversion from Unicode, something is wrong.\n");
+ exit(-1);
+ }
+ status = U_ZERO_ERROR;
+ gFileLines[line].unixName = new char[sizeNeeded+1];
+ sizeNeeded = ucnv_fromUChars(cvrtr,
+ gFileLines[line].unixName, // ptr to target buffer.
+ sizeNeeded+1, // length of target buffer.
+ gFileLines[line].name,
+ -1, // source is null terminated
+ &status);
+ if (U_FAILURE(status)) {
+ fprintf(stderr, "ICU Conversion Failed.: %d\n", status);
+ exit(-1);
+ }
+ gFileLines[line].unixName[sizeNeeded] = 0;
+ };
+ ucnv_close(cvrtr);
+// class UCharFile Class to hide all the gorp to read a file in
+// and produce a stream of UChars.
+class UCharFile {
+ UCharFile(const char *fileName);
+ ~UCharFile();
+ UChar get();
+ UBool eof() {return fEof;};
+ UBool error() {return fError;};
+ UCharFile (const UCharFile &other) {}; // No copy constructor.
+ UCharFile & operator = (const UCharFile &other) {return *this;}; // No assignment op
+ FILE *fFile;
+ const char *fName;
+ UBool fEof;
+ UBool fError;
+ UChar fPending2ndSurrogate;
+ enum {UTF16LE, UTF16BE, UTF8} fEncoding;
+UCharFile::UCharFile(const char * fileName) {
+ fEof = FALSE;
+ fError = FALSE;
+ fName = fileName;
+ fFile = fopen(fName, "rb");
+ fPending2ndSurrogate = 0;
+ if (fFile == NULL) {
+ fprintf(stderr, "Can not open file \"%s\"\n", opt_fName);
+ fError = TRUE;
+ return;
+ }
+ //
+ // Look for the byte order mark at the start of the file.
+ //
+ int BOMC1, BOMC2, BOMC3;
+ BOMC1 = fgetc(fFile);
+ BOMC2 = fgetc(fFile);
+ if (BOMC1 == 0xff && BOMC2 == 0xfe) {
+ fEncoding = UTF16LE; }
+ else if (BOMC1 == 0xfe && BOMC2 == 0xff) {
+ fEncoding = UTF16BE; }
+ else if (BOMC1 == 0xEF && BOMC2 == 0xBB && (BOMC3 = fgetc(fFile)) == 0xBF ) {
+ fEncoding = UTF8; }
+ else
+ {
+ fprintf(stderr, "collperf: file \"%s\" encoding must be UTF-8 or UTF-16, and "
+ "must include a BOM.\n", fileName);
+ fError = true;
+ return;
+ }
+UCharFile::~UCharFile() {
+ fclose(fFile);
+UChar UCharFile::get() {
+ UChar c;
+ switch (fEncoding) {
+ case UTF16LE:
+ {
+ int cL, cH;
+ cL = fgetc(fFile);
+ cH = fgetc(fFile);
+ c = cL | (cH << 8);
+ if (cH == EOF) {
+ c = 0;
+ fEof = TRUE;
+ }
+ break;
+ }
+ case UTF16BE:
+ {
+ int cL, cH;
+ cH = fgetc(fFile);
+ cL = fgetc(fFile);
+ c = cL | (cH << 8);
+ if (cL == EOF) {
+ c = 0;
+ fEof = TRUE;
+ }
+ break;
+ }
+ case UTF8:
+ {
+ if (fPending2ndSurrogate != 0) {
+ c = fPending2ndSurrogate;
+ fPending2ndSurrogate = 0;
+ break;
+ }
+ int ch = fgetc(fFile); // Note: c and ch are separate cause eof test doesn't work on UChar type.
+ if (ch == EOF) {
+ c = 0;
+ fEof = TRUE;
+ break;
+ }
+ if (ch <= 0x7f) {
+ // It's ascii. No further utf-8 conversion.
+ c = ch;
+ break;
+ }
+ // Figure out the lenght of the char and read the rest of the bytes
+ // into a temp array.
+ int nBytes;
+ if (ch >= 0xF0) {nBytes=4;}
+ else if (ch >= 0xE0) {nBytes=3;}
+ else if (ch >= 0xC0) {nBytes=2;}
+ else {
+ fprintf(stderr, "utf-8 encoded file contains corrupt data.\n");
+ fError = TRUE;
+ return 0;
+ }
+ unsigned char bytes[10];
+ bytes[0] = (unsigned char)ch;
+ int i;
+ for (i=1; i= 0xc0) {
+ fprintf(stderr, "utf-8 encoded file contains corrupt data.\n");
+ fError = TRUE;
+ return 0;
+ }
+ }
+ // Convert the bytes from the temp array to a Unicode char.
+ i = 0;
+ uint32_t cp;
+ UTF8_NEXT_CHAR_UNSAFE(bytes, i, cp);
+ c = (UChar)cp;
+ if (cp >= 0x10000) {
+ // The code point needs to be broken up into a utf-16 surrogate pair.
+ // Process first half this time through the main loop, and
+ // remember the other half for the next time through.
+ UChar utf16Buf[3];
+ i = 0;
+ UTF16_APPEND_CHAR_UNSAFE(utf16Buf, i, cp);
+ fPending2ndSurrogate = utf16Buf[1];
+ c = utf16Buf[0];
+ }
+ break;
+ };
+ }
+ return c;
+// openRulesCollator - Command line specified a rules file. Read it in
+// and open a collator with it.
+UCollator *openRulesCollator() {
+ UCharFile f(opt_rules);
+ if (f.error()) {
+ return 0;
+ }
+ int bufLen = 10000;
+ UChar *buf = (UChar *)malloc(bufLen * sizeof(UChar));
+ int i = 0;
+ for(;;) {
+ buf[i] = f.get();
+ if (f.eof()) {
+ break;
+ }
+ if (f.error()) {
+ return 0;
+ }
+ i++;
+ if (i >= bufLen) {
+ bufLen += 10000;
+ buf = (UChar *)realloc(buf, bufLen);
+ }
+ }
+ buf[i] = 0;
+ UErrorCode status = U_ZERO_ERROR;
+ UCollator *coll = ucol_openRules(buf, u_strlen(buf), UCOL_OFF,
+ if (U_FAILURE(status)) {
+ fprintf(stderr, "ICU ucol_openRules() open failed.: %d\n", status);
+ return 0;
+ }
+ free(buf);
+ return coll;
+// Main -- process command line, read in and pre-process the test file,
+// call other functions to do the actual tests.
+int main(int argc, const char** argv) {
+ if (ProcessOptions(argc, argv, opts) != TRUE || opt_help || opt_fName == 0) {
+ printf(gUsageString);
+ exit (1);
+ }
+ // Make sure that we've only got one API selected.
+ if (opt_unix || opt_win) opt_icu = FALSE;
+ if (opt_unix) opt_win = FALSE;
+ //
+ // Set up an ICU collator
+ //
+ UErrorCode status = U_ZERO_ERROR;
+ if (opt_rules != 0) {
+ gCol = openRulesCollator();
+ if (gCol == 0) {return -1;}
+ }
+ else {
+ gCol = ucol_open(opt_locale, &status);
+ if (U_FAILURE(status)) {
+ fprintf(stderr, "Collator creation failed.: %d\n", status);
+ return -1;
+ }
+ }
+ if (status==U_USING_DEFAULT_WARNING && opt_terse==FALSE) {
+ fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", opt_locale);
+ }
+ if (status==U_USING_FALLBACK_WARNING && opt_terse==FALSE) {
+ fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", opt_locale);
+ }
+ if (opt_norm) {
+ ucol_setAttribute(gCol, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
+ }
+ if (opt_french && opt_frenchoff) {
+ fprintf(stderr, "collperf: Error, specified both -french and -frenchoff options.");
+ exit(-1);
+ }
+ if (opt_french) {
+ ucol_setAttribute(gCol, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
+ }
+ if (opt_frenchoff) {
+ ucol_setAttribute(gCol, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
+ }
+ if (opt_lower) {
+ ucol_setAttribute(gCol, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, &status);
+ }
+ if (opt_upper) {
+ ucol_setAttribute(gCol, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, &status);
+ }
+ if (opt_case) {
+ ucol_setAttribute(gCol, UCOL_CASE_LEVEL, UCOL_ON, &status);
+ }
+ if (opt_shifted) {
+ ucol_setAttribute(gCol, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
+ }
+ if (opt_level != 0) {
+ switch (opt_level) {
+ case 1:
+ ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_PRIMARY, &status);
+ break;
+ case 2:
+ ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_SECONDARY, &status);
+ break;
+ case 3:
+ ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_TERTIARY, &status);
+ break;
+ case 4:
+ ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
+ break;
+ case 5:
+ ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
+ break;
+ default:
+ fprintf(stderr, "-level param must be between 1 and 5\n");
+ exit(-1);
+ }
+ }
+ if (U_FAILURE(status)) {
+ fprintf(stderr, "Collator attribute setting failed.: %d\n", status);
+ return -1;
+ }
+ //
+ // Set up a Windows LCID
+ //
+ if (opt_langid != 0) {
+ gWinLCID = MAKELCID(opt_langid, SORT_DEFAULT);
+ }
+ else {
+ gWinLCID = uloc_getLCID(opt_locale);
+ }
+ //
+ // Set the UNIX locale
+ //
+ if (opt_unix) {
+ if (setlocale(LC_ALL, opt_locale) == 0) {
+ fprintf(stderr, "setlocale(LC_ALL, %s) failed.\n", opt_locale);
+ exit(-1);
+ }
+ }
+ // Read in the input file.
+ // File assumed to be utf-16.
+ // Lines go onto heap buffers. Global index array to line starts is created.
+ // Lines themselves are null terminated.
+ //
+ UCharFile f(opt_fName);
+ if (f.error()) {
+ exit(-1);
+ }
+ const int MAXLINES = 40000;
+ gFileLines = new Line[MAXLINES];
+ UChar buf[1024];
+ int column = 0;
+ // Read the file, split into lines, and save in memory.
+ // Loop runs once per utf-16 value from the input file,
+ // (The number of bytes read from file per loop iteration depends on external encoding.)
+ for (;;) {
+ UChar c = f.get();
+ if (f.error()){
+ exit(-1);
+ }
+ // We now have a good UTF-16 value in c.
+ // Watch for CR, LF, EOF; these finish off a line.
+ if (c == 0xd) {
+ continue;
+ }
+ if (f.eof() || c == 0x0a || c==0x2028) { // Unipad inserts 2028 line separators!
+ buf[column++] = 0;
+ if (column > 1) {
+ gFileLines[gNumFileLines].name = new UChar[column];
+ gFileLines[gNumFileLines].len = column-1;
+ memcpy(gFileLines[gNumFileLines].name, buf, column * sizeof(UChar));
+ gNumFileLines++;
+ column = 0;
+ if (gNumFileLines >= MAXLINES) {
+ fprintf(stderr, "File too big. Max number of lines is %d\n", MAXLINES);
+ exit(-1);
+ }
+ }
+ if (c == 0xa || c == 0x2028)
+ continue;
+ else
+ break; // EOF
+ }
+ buf[column++] = c;
+ if (column >= 1023)
+ {
+ static UBool warnFlag = TRUE;
+ if (warnFlag) {
+ fprintf(stderr, "Warning - file line longer than 1023 chars truncated.\n");
+ warnFlag = FALSE;
+ }
+ column--;
+ }
+ }
+ if (opt_terse == FALSE) {
+ printf("file \"%s\", %d lines.\n", opt_fName, gNumFileLines);
+ }
+ // Convert the lines to the UNIX encoding.
+ if (opt_unix) {
+ UnixConvert();
+ }
+ //
+ // Pre-compute ICU sort keys for the lines of the file.
+ //
+ int line;
+ int t;
+ for (line=0; line sizeof(buf)) {
+ t = ucol_getSortKey(gCol, gFileLines[line].name, -1, (unsigned char *)gFileLines[line].icuSortKey , t);
+ }
+ else
+ {
+ memcpy(gFileLines[line].icuSortKey, buf, t);
+ }
+ }
+ //
+ // Pre-compute Windows sort keys for the lines of the file.
+ //
+ for (line=0; line sizeof(buf)) {
+ t = LCMapStringW(gWinLCID, LCMAP_SORTKEY, gFileLines[line].name, -1, (unsigned short *)(gFileLines[line].winSortKey), t);
+ }
+ else
+ {
+ memcpy(gFileLines[line].winSortKey, buf, t);
+ }
+ }
+ //
+ // Pre-compute UNIX sort keys for the lines of the file.
+ //
+ if (opt_unix) {
+ for (line=0; line sizeof(buf)) {
+ t = strxfrm(gFileLines[line].unixSortKey, gFileLines[line].unixName, sizeof(buf));
+ }
+ else
+ {
+ memcpy(gFileLines[line].unixSortKey, buf, t);
+ }
+ }
+ }
+ //
+ // Dump file lines, CEs, Sort Keys if requested.
+ //
+ if (opt_dump) {
+ int i;
+ for (line=0; line 0x7e) {
+ printf("\\u%.4x", c);
+ }
+ else {
+ printf("%c", c);
+ }
+ }
+ printf("\n");
+ printf(" CEs: ");
+ UCollationElements *CEiter = ucol_openElements(gCol, gFileLines[line].name, -1, &status);
+ int32_t ce;
+ i = 0;
+ for (;;) {
+ ce = ucol_next(CEiter, &status);
+ if (ce == UCOL_NULLORDER) {
+ break;
+ }
+ printf(" %.8x", ce);
+ if (++i > 8) {
+ printf("\n ");
+ i = 0;
+ }
+ }
+ printf("\n");
+ ucol_closeElements(CEiter);
+ printf(" ICU Sort Key: ");
+ for (i=0; ; i++) {
+ unsigned char c = gFileLines[line].icuSortKey[i];
+ printf("%02x ", c);
+ if (c == 0) {
+ break;
+ }
+ if (i > 0 && i % 20 == 0) {
+ printf("\n ");
+ }
+ }
+ printf("\n");
+ }
+ }
+ //
+ // Pre-sort the lines.
+ //
+ int i;
+ gSortedLines = new Line *[gNumFileLines];
+ for (i=0; i
diff --git a/icuSources/test/unalignedtest/readme b/icuSources/test/unalignedtest/readme
new file mode 100644
index 00000000..acf10a1c
--- /dev/null
+++ b/icuSources/test/unalignedtest/readme
@@ -0,0 +1,27 @@
+Copyright (c) 2002-2003, International Business Machines Corporation and others. All Rights Reserved.
+This is a test to find if ICU is 64 bit clean. This test runs cintltst and intltest through gdb and produces SIGBUS fault
+whenever the kernel encounters a unaligned trap.
+Build and Usage
+To build this test:
+i) Build and test ICU
+ii) cd to /source and run the following command to build the Makefile
+CONFIG_FILES=./test/unalignedtest/Makefile CONFIG_HEADERS= ./config.status
+iii) cd to /source/test/unalignedtest and run 'make' to build the executable
+iv) For testing cintltst run 'make check-cintltst'
+v) For testing intltest run 'make check-intltest'
+vi) To find out if any of the tools are performing unaligned traps:
+ a) cd to /source/data
+ b) run 'make clean'
+ c) run 'env LEAK_CHECKER="/source/test/unalignedtest/unalignedtest -b" make'
diff --git a/icuSources/test/unalignedtest/unaligned.c b/icuSources/test/unalignedtest/unaligned.c
new file mode 100644
index 00000000..69137a30
--- /dev/null
+++ b/icuSources/test/unalignedtest/unaligned.c
@@ -0,0 +1,304 @@
+ This program is a wrapper to assist in debugging analigned traps on the Alpha
+ architectures.
+ Copyright (c) 2002 Sean Hunter
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, and/or sell copies of the Software, and to permit persons
+ to whom the Software is furnished to do so, provided that the above
+ copyright notice(s) and this permission notice appear in all copies of
+ the Software and that both the above copyright notice(s) and this
+ permission notice appear in supporting documentation.
+ Except as contained in this notice, the name of a copyright holder
+ shall not be used in advertising or otherwise to promote the sale, use
+ or other dealings in this Software without prior written authorization
+ of the copyright holder.
+ --------------------------------------------------------------------------------
+ All trademarks and registered trademarks mentioned herein are the property
+ of their respective owners.
+#define TMP_PATH_MAX 1024
+static int
+setsysinfo(unsigned long op, void *buffer, unsigned long size,
+ int *start, void *arg, unsigned long flag)
+ syscall(__NR_osf_setsysinfo, op, buffer, size, start, arg, flag);
+ unsigned int buf[2];
+ buf[0] = SSIN_UACPROC;
+ setsysinfo(SSI_NVPAIRS, buf, 1, 0, 0, 0);
+static void
+ fprintf(stderr,
+ "usage: unaligned [-b] [command-args...]\n\n"
+ " This program is designed to assist debugging of\n"
+ " unaligned traps by running the program in gdb\n"
+ " and causing it to get SIGBUS when it encounters\n"
+ " an unaligned trap.\n\n"
+ " It is free software written by Sean Hunter \n"
+ " based on code by Richard Henderson and Andrew Morgan.\n\n"
+ );
+ exit(1);
+main(int argc, char **argv)
+ const char my_debugger[] = "/usr/bin/gdb";
+ char *temp_str;
+ char *curr;
+ int size = 0;
+ int curr_arg;
+ int isBatchMode = 0;
+ /* check that we have at least 1 argument */
+ if (argc < 2) {
+ usage();
+ }
+ if( strcmp("-b" , argv[1]) == 0 ){
+ isBatchMode = 1;
+ curr_arg = 2;
+ }else{
+ curr_arg = 1;
+ }
+ trap_unaligned();
+ if (argc > 2) {
+ /* We're going to use bash process redirection to create a "file" for gdb to read
+ * containing the arguments we need */
+ size = 2048;
+ for(; curr_arg < argc; curr_arg++) {
+ size += strlen(argv[curr_arg]);
+ }
+ temp_str = (char *) malloc(sizeof(char) * size);
+ if (!temp_str) {
+ fprintf(stderr, "Unable to malloc memory for string use: %s\n", strerror(errno));
+ exit(255);
+ }
+ if(isBatchMode==1){
+ sprintf(temp_str, "%s -batch %s -x <( echo file %s; echo set args", my_debugger, argv[2], argv[2]);
+ }else{
+ sprintf(temp_str, "%s %s -x <( echo file %s; echo set args", my_debugger, argv[1], argv[1]);
+ }
+ curr = temp_str + strlen(temp_str);
+ for(curr_arg = 2; curr_arg < argc; curr_arg++) {
+ sprintf(curr, " %s", argv[curr_arg]);
+ curr = temp_str + strlen(temp_str);
+ }
+#ifndef NOAUTORUN
+ curr = temp_str + strlen(temp_str);
+ sprintf(curr, "; echo run");
+ curr = temp_str + strlen(temp_str);
+ sprintf(curr, ")");
+ execlp("/bin/bash", "/bin/bash", "-c", temp_str, NULL);
+ }
+ else {
+ execlp(my_debugger, my_debugger, argv[1], NULL);
+ }
+ /* if we fall through to here, our exec failed -- announce the fact */
+ fprintf(stderr, "Unable to execute command: %s\n", strerror(errno));
+ usage();
+/* use gcc unaligned.c -o unaliged to compile. Add -DNOAUTORUN if you
+don't want gdb to automatically run the program */
diff --git a/icuSources/test/usetperf/bitset.cpp b/icuSources/test/usetperf/bitset.cpp
index 00000000..cdff2adb
--- /dev/null
+++ b/icuSources/test/usetperf/usetperf.cpp
@@ -0,0 +1,122 @@
+* Copyright (c) 2002-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+* 2002-09-20 aliu Created.
+#include "unicode/utypes.h"
+#include "unicode/uniset.h"
+#include "unicode/uchar.h"
+#include "unicode/usetiter.h"
+#include "bitset.h"
+#include "timer.h"
+#define LENGTH(a) (sizeof(a)/sizeof(a[0]))
+int main(int argc, const char *argv[]) {
+ Timer timer;
+ BitSet bs;
+ UnicodeSet us;
+ int32_t i, j, n, temp;
+ UChar32 cp;
+ double t;
+ int32_t PROPS[] = {
+ // category iterations for add, contains, iterator
+ U_TITLECASE_LETTER, 100, 100, 20000000,
+ U_UNASSIGNED, 30, 100, 20000000,
+ };
+ for (j=0; j %f ms/loop\n", t, t*1e3/n);
+ // contains()
+ n = PROPS[j+2];
+ printf("Testing contains() x %d...", n);
+ temp = 0;
+ timer.start();
+ for (i=0; i %f ms/loop\n", t, t*1e3/n);
+ // iterator
+ n = PROPS[j+3];
+ printf("Testing iterator x %d...", n);
+ temp = 0;
+ timer.start();
+ for (i=0; i %f ns/loop\n", t, t*1e9/n);
+ }
+ char* PAT[] = {
+ "['A-Za-z\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6\\u00D9-\\u00DD\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6\\u00F9-\\u00FD\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130\\u0134-\\u0137\\u0139-\\u013E\\u0143-\\u0148\\u014C-\\u0151\\u0154-\\u0165\\u0168-\\u017E\\u01A0-\\u01A1\\u01AF-\\u01B0\\u01CD-\\u01DC\\u01DE-\\u01E1\\u01E6-\\u01ED\\u01F0\\u01F4-\\u01F5\\u01F8-\\u01FB\\u0200-\\u021B\\u021E-\\u021F\\u0226-\\u0233\\u1E00-\\u1E99\\u1EA0-\\u1EF9\\u212A-\\u212B]",
+ "['.0-9A-Za-z~\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6\\u00D9-\\u00DD\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6\\u00F9-\\u00FD\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130\\u0134-\\u0137\\u0139-\\u013E\\u0143-\\u0148\\u014C-\\u0151\\u0154-\\u0165\\u0168-\\u017E\\u01A0-\\u01A1\\u01AF-\\u01B0\\u01CD-\\u01DC\\u01DE-\\u01E3\\u01E6-\\u01ED\\u01F0\\u01F4-\\u01F5\\u01F8-\\u021B\\u021E-\\u021F\\u0226-\\u0233\\u0301\\u0303-\\u0304\\u0306-\\u0307\\u0310\\u0314-\\u0315\\u0323\\u0325\\u0331\\u0341\\u0344\\u0385-\\u0386\\u0388-\\u038A\\u038C\\u038E-\\u0390\\u03AC-\\u03B0\\u03CC-\\u03CE\\u03D3\\u0403\\u040C\\u040E\\u0419\\u0439\\u0453\\u045C\\u045E\\u04C1-\\u04C2\\u04D0-\\u04D1\\u04D6-\\u04D7\\u04E2-\\u04E3\\u04EE-\\u04EF\\u1E00-\\u1E99\\u1EA0-\\u1EF9\\u1F01\\u1F03-\\u1F05\\u1F07\\u1F09\\u1F0B-\\u1F0D\\u1F0F\\u1F11\\u1F13-\\u1F15\\u1F19\\u1F1B-\\u1F1D\\u1F21\\u1F23-\\u1F25\\u1F27\\u1F29\\u1F2B-\\u1F2D\\u1F2F\\u1F31\\u1F33-\\u1F35\\u1F37\\u1F39\\u1F3B-\\u1F3D\\u1F3F\\u1F41\\u1F43-\\u1F45\\u1F49\\u1F4B-\\u1F4D\\u1F51\\u1F53-\\u1F55\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F\\u1F61\\u1F63-\\u1F65\\u1F67\\u1F69\\u1F6B-\\u1F6D\\u1F6F\\u1F71\\u1F73\\u1F75\\u1F77\\u1F79\\u1F7B\\u1F7D\\u1F81\\u1F83-\\u1F85\\u1F87\\u1F89\\u1F8B-\\u1F8D\\u1F8F\\u1F91\\u1F93-\\u1F95\\u1F97\\u1F99\\u1F9B-\\u1F9D\\u1F9F\\u1FA1\\u1FA3-\\u1FA5\\u1FA7\\u1FA9\\u1FAB-\\u1FAD\\u1FAF-\\u1FB1\\u1FB4\\u1FB8-\\u1FB9\\u1FBB\\u1FC4\\u1FC9\\u1FCB\\u1FCE\\u1FD0-\\u1FD1\\u1FD3\\u1FD8-\\u1FD9\\u1FDB\\u1FDE\\u1FE0-\\u1FE1\\u1FE3\\u1FE5\\u1FE8-\\u1FE9\\u1FEB-\\u1FEC\\u1FEE\\u1FF4\\u1FF9\\u1FFB\\u212A-\\u212B\\uE04D\\uE064]",
+ "[\\u0901-\\u0903\\u0905-\\u0939\\u093C-\\u094D\\u0950-\\u0954\\u0958-\\u096F]",
+ };
+ UErrorCode ec = U_ZERO_ERROR;
+ n = 2000;
+ for (j=0; j %f us/loop\n", t, t*1e6/n);
+ }
+ return 0;
diff --git a/icuSources/test/usetperf/usetperf.dsp b/icuSources/test/usetperf/usetperf.dsp
+static void
+roundtrip(UConverter *cnv, int32_t intermediateCapacity, UErrorCode *pErrorCode) {
+ const UChar *pIn, *pInLimit;
+ UChar *pOut, *pOutLimit;
+ char *pInter, *pInterLimit, *p;
+ UBool flush;
+ ucnv_reset(cnv);
+ pIn=input;
+ pInLimit=input+inputLength;
+ pOut=output;
+ pOutLimit=output+OUTPUT_CAPACITY;
+ pInterLimit=intermediate+intermediateCapacity;
+ encodedLength=outputLength=0;
+ flush=FALSE;
+ while(pIn0);
+ _time=timeGetTime()-_time;
+ if(U_FAILURE(errorCode)) {
+ fprintf(stderr, "error in roundtrip conversion (%s): %s\n", encName, u_errorName(errorCode));
+ return 0x7fffffff;
+ }
+ if(0!=u_memcmp(input, output, inputLength)) {
+ fprintf(stderr, "error: roundtrip failed, input[]!=output[]\n");
+ return 0x7fffffff;
+ }
+ return _time;
+static void
+perEncAndCapacity(UConverter *cnv, const char *encName, int32_t intermediateCapacity) {
+ double rtTime;
+ unsigned long _time;
+ int32_t n;
+ /*printf("test performance for %s with intermediate capacity %d\n", encName, intermediateCapacity);*/
+ /* warm up caches and estimate loop time */
+ n=10;
+ for(;;) {
+ _time=measureRoundtrips(roundtrip, cnv, encName, intermediateCapacity, n);
+ if(_time<500 && _time0) {
+ /* convert the block */
+ p=intermediate;
+ limit=p+length;
+ ucnv_toUnicode(cnv,
+ &pOut, pOutLimit,
+ &p, limit,
+ &errorCode);
+ if(U_FAILURE(errorCode)) {
+ fprintf(stderr, "error converting input to UTF-16: %s\n", u_errorName(errorCode));
+ ucnv_close(cnv);
+ return FALSE;
+ }
+ /* read the next block */
+ length=readBlock(in);
+ if(length<0) {
+ ucnv_close(cnv);
+ return FALSE;
+ }
+ }
+ /* flush the converter */
+ ucnv_toUnicode(cnv,
+ &pOut, pOutLimit,
+ &p, p,
+ &errorCode);
+ ucnv_close(cnv);
+ if(U_FAILURE(errorCode)) {
+ fprintf(stderr, "error converting input to UTF-16: %s\n", u_errorName(errorCode));
+ return FALSE;
+ }
+ inputLength=(int32_t)(pOut-input);
+ countInputCodePoints=u_countChar32(input, inputLength);
+ if(inputLength<=0) {
+ fprintf(stderr, "warning: input is empty\n");
+ return FALSE;
+ }
+ return TRUE;
+static void
+showUsage(const char *myName) {
+ fprintf(stderr,
+ "Usage:\n"
+ "%s [-e encoding-name] filename | '-'\n"
+ " encoding-name must be the name of an encoding supported by ICU\n"
+ " the filename of the input file with text to be used\n"
+ " can be a dash (-) for standard input\n",
+ myName);
+ * Read file using some encoding, convert to 1M UTF-16 input buffer.
+ * For each UTF to be tested:
+ * n times:
+ * convert from UTF-16 input buffer to UTF, 4kB buffer
+ * convert from 4kB buffer to 1M UTF-16 output buffer
+ * adjust n so that time elapsed is 10s (#define)
+ * ->divide 10s by time, increase n by that factor, run 2nd time
+ * n times:
+ * empty function
+ * subtract out loop/function overhead
+ * display #code points - #UTF bytes - time per roundtrip
+ *
+ * * do the same again with an intermediate buffer size of 20 instead of 4kB
+ *
+ * Test following UTFs:
+ * UTF-16BE, UTF-16LE, UTF-8, SCSU, BOCU-1, CESU-8
+ *
+ * Command-line arguments:
+ * - encoding (default UTF-8, detect BOM)
+ * - filename (allow "-")
+ */
+extern int
+main(int argc, const char *argv[]) {
+ FILE *in;
+ const char *myName, *encName, *filename, *basename;
+ myName=argv[0];
+ if(argc<2) {
+ showUsage(myName);
+ return 1;
+ }
+ /* get encoding name argument */
+ if(argv[1][0]=='-' && argv[1][1]=='e') {
+ encName=argv[1]+2;
+ --argc;
+ ++argv;
+ if(*encName==0) {
+ if(argc<2) {
+ showUsage(myName);
+ return 1;
+ }
+ encName=argv[1];
+ --argc;
+ ++argv;
+ }
+ } else {
+ encName=NULL;
+ }
+ /* get filename argument */
+ if(argc<2) {
+ showUsage(myName);
+ return 1;
+ }
+ filename=argv[1];
+ if(filename[0]=='-' && filename[1]==0) {
+ filename="(standard input)";
+ in=stdin;
+ /* set stdin to binary mode */
+ _setmode(_fileno(stdin), _O_BINARY);
+ } else {
+ in=fopen(filename, "rb");
+ if(in==NULL) {
+ fprintf(stderr, "error opening \"%s\"\n", filename);
+ showUsage(myName);
+ return 2;
+ }
+ }
+ /* read input */
+ basename=strrchr(filename, U_FILE_SEP_CHAR);
+ if(basename!=NULL) {
+ ++basename;
+ } else {
+ basename=filename;
+ }
+ printf("# testing converter performance with file \"%s\"\n", basename);
+ if(!readInput(in, encName)) {
+ fprintf(stderr, "error reading \"%s\" (encoding %s)\n", filename, encName);
+ showUsage(myName);
+ return 2;
+ }
+ if(in!=stdin) {
+ fclose(in);
+ }
+ /* test performance */
+ testPerformance();
+ return 0;
diff --git a/icuSources/test/utfperf/utfperf.dsp b/icuSources/test/utfperf/utfperf.dsp
+* T*MAP: codepage->Unicode
+* To compile, just call a C compiler/linker with this source file.
+* On Windows: cl rptp2ucm.c
+#error File moved to charset/source/ucmtools/ on 2002-nov-06
+/* see */
diff --git a/icuSources/tools/makeconv/misc/ucmmerge.c b/icuSources/tools/makeconv/misc/ucmmerge.c
new file mode 100644
index 00000000..a8035bba
--- /dev/null
+++ b/icuSources/tools/makeconv/misc/ucmmerge.c
@@ -0,0 +1,26 @@
+* Copyright (C) 2000, International Business Machines
+* Corporation and others. All Rights Reserved.
+* file name: ucmmerge.c
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+* created on: 2000nov09
+* created by: Markus W. Scherer
+* This tool reads two .ucm files and merges them.
+* Merging the files allows to update the ICU data while keeping ICU-specific
+* changes like "MBCS"->"EBCDIC_STATEFUL" or adding .
+* To compile, just call a C compiler/linker with this source file.
+* On Windows: cl ucmmerge.c
+#error File moved to charset/source/ucmtools/ on 2002-nov-06
+/* see */
diff --git a/icuSources/tools/makeconv/misc/ucmstrip.c b/icuSources/tools/makeconv/misc/ucmstrip.c
new file mode 100644
index 00000000..751af780
--- /dev/null
+++ b/icuSources/tools/makeconv/misc/ucmstrip.c
@@ -0,0 +1,28 @@
+* Copyright (C) 2000, International Business Machines
+* Corporation and others. All Rights Reserved.
+* file name: ucmstrip.c
+* encoding: US-ASCII
+* tab size: 8 (not used)
+* indentation:4
+* created on: 2000nov09
+* created by: Markus W. Scherer
+* This tool reads a .ucm file, expects there to be a line in the header with
+* "File created on..." and removes the lines before and including that.
+* Then it removes lines with and and .
+* This helps comparing .ucm files with different copyright statements and
+* different state specifications.
+* To compile, just call a C compiler/linker with this source file.
+* On Windows: cl ucmstrip.c
+#error File moved to charset/source/ucmtools/ on 2002-nov-06
+/* see */
