X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/03115e547b9a040702fdddc7f9bf4c315a8c62a7..724c79ffa17a77cdef4b9590621d31d99a5a3745:/icuSources/test/utfperf/utfperf.c?ds=sidebyside diff --git a/icuSources/test/utfperf/utfperf.c b/icuSources/test/utfperf/utfperf.c new file mode 100644 index 00000000..dfe7e3f8 --- /dev/null +++ b/icuSources/test/utfperf/utfperf.c @@ -0,0 +1,450 @@ +/* +********************************************************************** +* Copyright (C) 2002, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: utfperf.c +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002apr17 +* created by: Markus W. Scherer +* +* Performance test program for Unicode converters +* (converters that support all Unicode code points). +* Takes a UTF-8 file as input. +*/ + +#include +#include + +#include /* for _O_BINARY */ +#include /* for _setmode() */ + +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) +# include +#else +# include + static unsigned long + timeGetTime() { + struct timeval t; + + gettimeofday(&t, 0); + return t.tv_sec*1000+t.tv_usec/1000; + }; +#endif + +#include "unicode/utypes.h" +#include "unicode/ucnv.h" +#include "unicode/ustring.h" + +/* definitions and text buffers */ + +#define INPUT_CAPACITY (1024*1024) +#define INTERMEDIATE_CAPACITY 4096 +#define INTERMEDIATE_SMALL_CAPACITY 20 +#define OUTPUT_CAPACITY INPUT_CAPACITY + +#define TARGET_MEASURE_TIME_MS 2000 + +#define PERCENT(a, b) (int)(((a)*200+1)/(2*(b))) + +#define ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0])) + +static UChar input[INPUT_CAPACITY], output[OUTPUT_CAPACITY]; +static char intermediate[INTERMEDIATE_CAPACITY]; + +static int32_t inputLength, encodedLength, outputLength, countInputCodePoints; + +static int32_t utf8Length=0; +static double utf8Time=0.; + +static const char *const +utfNames[]={ + "UTF-8", /* UTF-8 should always be first to serve as percentage reference */ + "SCSU", "BOCU-1" /*, "CESU-8" *//*, "UTF-16BE", "UTF-16LE"*//*, "GB18030"*/ +}; + +/* functions */ + +typedef void +RoundtripFn(UConverter *cnv, int32_t intermediateCapacity, UErrorCode *pErrorCode); + +static void +roundtrip(UConverter *cnv, int32_t intermediateCapacity, UErrorCode *pErrorCode) { + const UChar *pIn, *pInLimit; + UChar *pOut, *pOutLimit; + char *pInter, *pInterLimit, *p; + UBool flush; + + ucnv_reset(cnv); + + pIn=input; + pInLimit=input+inputLength; + + pOut=output; + pOutLimit=output+OUTPUT_CAPACITY; + + pInterLimit=intermediate+intermediateCapacity; + + encodedLength=outputLength=0; + flush=FALSE; + + while(pIn0); + _time=timeGetTime()-_time; + + if(U_FAILURE(errorCode)) { + fprintf(stderr, "error in roundtrip conversion (%s): %s\n", encName, u_errorName(errorCode)); + return 0x7fffffff; + } + + if(0!=u_memcmp(input, output, inputLength)) { + fprintf(stderr, "error: roundtrip failed, input[]!=output[]\n"); + return 0x7fffffff; + } + + return _time; +} + +static void +perEncAndCapacity(UConverter *cnv, const char *encName, int32_t intermediateCapacity) { + double rtTime; + unsigned long _time; + int32_t n; + + /*printf("test performance for %s with intermediate capacity %d\n", encName, intermediateCapacity);*/ + + /* warm up caches and estimate loop time */ + n=10; + for(;;) { + _time=measureRoundtrips(roundtrip, cnv, encName, intermediateCapacity, n); + if(_time<500 && _time0) { + /* convert the block */ + p=intermediate; + limit=p+length; + + ucnv_toUnicode(cnv, + &pOut, pOutLimit, + &p, limit, + NULL, FALSE, + &errorCode); + if(U_FAILURE(errorCode)) { + fprintf(stderr, "error converting input to UTF-16: %s\n", u_errorName(errorCode)); + ucnv_close(cnv); + return FALSE; + } + + /* read the next block */ + length=readBlock(in); + if(length<0) { + ucnv_close(cnv); + return FALSE; + } + } + + /* flush the converter */ + ucnv_toUnicode(cnv, + &pOut, pOutLimit, + &p, p, + NULL, TRUE, + &errorCode); + ucnv_close(cnv); + + if(U_FAILURE(errorCode)) { + fprintf(stderr, "error converting input to UTF-16: %s\n", u_errorName(errorCode)); + return FALSE; + } + + inputLength=(int32_t)(pOut-input); + countInputCodePoints=u_countChar32(input, inputLength); + if(inputLength<=0) { + fprintf(stderr, "warning: input is empty\n"); + return FALSE; + } + + return TRUE; +} + +static void +showUsage(const char *myName) { + fprintf(stderr, + "Usage:\n" + "%s [-e encoding-name] filename | '-'\n" + " encoding-name must be the name of an encoding supported by ICU\n" + " the filename of the input file with text to be used\n" + " can be a dash (-) for standard input\n", + myName); +} + +/* + * Read file using some encoding, convert to 1M UTF-16 input buffer. + * For each UTF to be tested: + * n times: + * convert from UTF-16 input buffer to UTF, 4kB buffer + * convert from 4kB buffer to 1M UTF-16 output buffer + * adjust n so that time elapsed is 10s (#define) + * ->divide 10s by time, increase n by that factor, run 2nd time + * n times: + * empty function + * subtract out loop/function overhead + * display #code points - #UTF bytes - time per roundtrip + * + * * do the same again with an intermediate buffer size of 20 instead of 4kB + * + * Test following UTFs: + * UTF-16BE, UTF-16LE, UTF-8, SCSU, BOCU-1, CESU-8 + * + * Command-line arguments: + * - encoding (default UTF-8, detect BOM) + * - filename (allow "-") + */ +extern int +main(int argc, const char *argv[]) { + FILE *in; + const char *myName, *encName, *filename, *basename; + + myName=argv[0]; + if(argc<2) { + showUsage(myName); + return 1; + } + + /* get encoding name argument */ + if(argv[1][0]=='-' && argv[1][1]=='e') { + encName=argv[1]+2; + --argc; + ++argv; + if(*encName==0) { + if(argc<2) { + showUsage(myName); + return 1; + } + encName=argv[1]; + --argc; + ++argv; + } + } else { + encName=NULL; + } + + /* get filename argument */ + if(argc<2) { + showUsage(myName); + return 1; + } + filename=argv[1]; + if(filename[0]=='-' && filename[1]==0) { + filename="(standard input)"; + in=stdin; + /* set stdin to binary mode */ + _setmode(_fileno(stdin), _O_BINARY); + } else { + in=fopen(filename, "rb"); + if(in==NULL) { + fprintf(stderr, "error opening \"%s\"\n", filename); + showUsage(myName); + return 2; + } + } + + /* read input */ + basename=strrchr(filename, U_FILE_SEP_CHAR); + if(basename!=NULL) { + ++basename; + } else { + basename=filename; + } + printf("# testing converter performance with file \"%s\"\n", basename); + if(!readInput(in, encName)) { + fprintf(stderr, "error reading \"%s\" (encoding %s)\n", filename, encName); + showUsage(myName); + return 2; + } + if(in!=stdin) { + fclose(in); + } + + /* test performance */ + testPerformance(); + return 0; +}