]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/test/utfperf/utfperf.c
ICU-6.2.21.tar.gz
[apple/icu.git] / icuSources / test / utfperf / utfperf.c
diff --git a/icuSources/test/utfperf/utfperf.c b/icuSources/test/utfperf/utfperf.c
deleted file mode 100644 (file)
index dfe7e3f..0000000
+++ /dev/null
@@ -1,450 +0,0 @@
-/*  
-**********************************************************************
-*   Copyright (C) 2002, International Business Machines
-*   Corporation and others.  All Rights Reserved.
-**********************************************************************
-*   file name:  utfperf.c
-*   encoding:   US-ASCII
-*   tab size:   8 (not used)
-*   indentation:4
-*
-*   created on: 2002apr17
-*   created by: Markus W. Scherer
-*
-*   Performance test program for Unicode converters
-*   (converters that support all Unicode code points).
-*   Takes a UTF-8 file as input.
-*/
-
-#include <stdio.h>
-#include <string.h>
-
-#include <fcntl.h>     /* for _O_BINARY */
-#include <io.h>                /* for _setmode() */
-
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-#   include <windows.h>
-#else
-#   include <sys/time.h>
-    static unsigned long
-    timeGetTime() {
-        struct timeval t;
-
-        gettimeofday(&t, 0);
-        return t.tv_sec*1000+t.tv_usec/1000;
-    };
-#endif
-
-#include "unicode/utypes.h"
-#include "unicode/ucnv.h"
-#include "unicode/ustring.h"
-
-/* definitions and text buffers */
-
-#define INPUT_CAPACITY (1024*1024)
-#define INTERMEDIATE_CAPACITY 4096
-#define INTERMEDIATE_SMALL_CAPACITY 20
-#define OUTPUT_CAPACITY INPUT_CAPACITY
-
-#define TARGET_MEASURE_TIME_MS 2000
-
-#define PERCENT(a, b) (int)(((a)*200+1)/(2*(b)))
-
-#define ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0]))
-
-static UChar input[INPUT_CAPACITY], output[OUTPUT_CAPACITY];
-static char intermediate[INTERMEDIATE_CAPACITY];
-
-static int32_t inputLength, encodedLength, outputLength, countInputCodePoints;
-
-static int32_t utf8Length=0;
-static double utf8Time=0.;
-
-static const char *const
-utfNames[]={
-    "UTF-8", /* UTF-8 should always be first to serve as percentage reference */
-    "SCSU", "BOCU-1" /*, "CESU-8" *//*, "UTF-16BE", "UTF-16LE"*//*, "GB18030"*/
-};
-
-/* functions */
-
-typedef void
-RoundtripFn(UConverter *cnv, int32_t intermediateCapacity, UErrorCode *pErrorCode);
-
-static void
-roundtrip(UConverter *cnv, int32_t intermediateCapacity, UErrorCode *pErrorCode) {
-    const UChar *pIn, *pInLimit;
-    UChar *pOut, *pOutLimit;
-    char *pInter, *pInterLimit, *p;
-    UBool flush;
-
-    ucnv_reset(cnv);
-
-    pIn=input;
-    pInLimit=input+inputLength;
-
-    pOut=output;
-    pOutLimit=output+OUTPUT_CAPACITY;
-
-    pInterLimit=intermediate+intermediateCapacity;
-
-    encodedLength=outputLength=0;
-    flush=FALSE;
-
-    while(pIn<pInLimit || !flush) {
-        /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */
-        pInter=intermediate;
-        flush=(UBool)(pIn==pInLimit);
-        ucnv_fromUnicode(cnv,
-                         &pInter, pInterLimit,
-                         &pIn, pInLimit,
-                         NULL, flush,
-                         pErrorCode);
-        encodedLength+=(int32_t)(pInter-intermediate);
-
-        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
-            /* in case flush was TRUE make sure that we convert once more to really flush */
-            flush=FALSE;
-            *pErrorCode=U_ZERO_ERROR;
-        } else if(U_FAILURE(*pErrorCode)) {
-            return;
-        }
-
-        /* convert the block [intermediate..pInter[ back to UTF-16 */
-        p=intermediate;
-        ucnv_toUnicode(cnv,
-                       &pOut, pOutLimit,
-                       &p, pInter,
-                       NULL, flush,
-                       pErrorCode);
-        if(U_FAILURE(*pErrorCode)) {
-            return;
-        }
-        /* intermediate must have been consumed (p==pInter) because of the converter semantics */
-    }
-
-    outputLength=pOut-output;
-    if(inputLength!=outputLength) {
-        fprintf(stderr, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength, outputLength);
-        *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
-    }
-}
-
-static void
-noop(UConverter *cnv, int32_t intermediateCapacity, UErrorCode *pErrorCode) {
-    /* do nothing */
-}
-
-static unsigned long
-measureRoundtrips(RoundtripFn *fn, UConverter *cnv, const char *encName, int32_t intermediateCapacity, int32_t n) {
-    unsigned long _time;
-    UErrorCode errorCode;
-
-    _time=timeGetTime();
-    errorCode=U_ZERO_ERROR;
-    do {
-        fn(cnv, intermediateCapacity, &errorCode);
-    } while(U_SUCCESS(errorCode) && --n>0);
-    _time=timeGetTime()-_time;
-
-    if(U_FAILURE(errorCode)) {
-        fprintf(stderr, "error in roundtrip conversion (%s): %s\n", encName, u_errorName(errorCode));
-        return 0x7fffffff;
-    }
-
-    if(0!=u_memcmp(input, output, inputLength)) {
-        fprintf(stderr, "error: roundtrip failed, input[]!=output[]\n");
-        return 0x7fffffff;
-    }
-
-    return _time;
-}
-
-static void
-perEncAndCapacity(UConverter *cnv, const char *encName, int32_t intermediateCapacity) {
-    double rtTime;
-    unsigned long _time;
-    int32_t n;
-
-    /*printf("test performance for %s with intermediate capacity %d\n", encName, intermediateCapacity);*/
-
-    /* warm up caches and estimate loop time */
-    n=10;
-    for(;;) {
-        _time=measureRoundtrips(roundtrip, cnv, encName, intermediateCapacity, n);
-        if(_time<500 && _time<TARGET_MEASURE_TIME_MS/10) {
-            n*=10;
-        } else {
-            break;
-        }
-    }
-
-    if(_time<TARGET_MEASURE_TIME_MS) {
-        n=(n*TARGET_MEASURE_TIME_MS)/_time+1;
-    }
-
-    /* run actual measurement with a target test time of 10s */
-    _time=measureRoundtrips(roundtrip, cnv, encName, intermediateCapacity, n);
-
-    /* subtract same number of loops over no-operation function */
-    _time-=measureRoundtrips(noop, cnv, encName, intermediateCapacity, n);
-
-    rtTime=((double)_time*1000.)/(double)n;
-
-    /* report */
-    printf("* performance report for                %8s:\n", encName);
-    printf("  intermediate buffer capacity          %8d B\n", intermediateCapacity);
-    if(intermediateCapacity==INTERMEDIATE_CAPACITY && utf8Length!=0) {
-        printf("  number of encoding bytes              %8d B  (%3d%% of UTF-8)\n", encodedLength, PERCENT(encodedLength, utf8Length));
-        printf("  roundtrip conversion time             %8g &#956;s (%3d%% of UTF-8)\n", rtTime, PERCENT(rtTime, utf8Time));
-    } else {
-        printf("  number of encoding bytes              %8d B\n", encodedLength);
-        printf("  roundtrip conversion time             %8g &#956;s\n", rtTime);
-    }
-    printf("  average bytes/code point              %8g B/cp\n", (double)encodedLength/countInputCodePoints);
-    puts("");
-
-    /* set UTF-8 values */
-    if(intermediateCapacity==INTERMEDIATE_CAPACITY && 0==strcmp(encName, "UTF-8")) {
-        utf8Length=encodedLength;
-        utf8Time=rtTime;
-    }
-}
-
-static void
-perEnc(UConverter *cnv, const char *encName) {
-    /*printf("test performance for %s\n", encName);*/
-    perEncAndCapacity(cnv, encName, INTERMEDIATE_CAPACITY);
-    perEncAndCapacity(cnv, encName, INTERMEDIATE_SMALL_CAPACITY);
-}
-
-static void
-testPerformance() {
-    UConverter *cnv;
-    UErrorCode errorCode;
-    int32_t i;
-
-    printf("number of code points                   %8d cp\n", countInputCodePoints);
-    printf("platform endianness:                    %8s-endian\n", U_IS_BIG_ENDIAN ? "big" : "little");
-    puts("");
-    for(i=0; i<ARRAY_LENGTH(utfNames); ++i) {
-        errorCode=U_ZERO_ERROR;
-        cnv=ucnv_open(utfNames[i], &errorCode);
-        if(U_SUCCESS(errorCode)) {
-            perEnc(cnv, utfNames[i]);
-            ucnv_close(cnv);
-        } else {
-            fprintf(stderr, "error opening converter for \"%s\" - %s\n", utfNames[i], u_errorName(errorCode));
-        }
-    }
-}
-
-/* read a complete block from the input file */
-static int32_t
-readBlock(FILE *in) {
-    int length, blockLength;
-
-    blockLength=0;
-    while(blockLength<INTERMEDIATE_CAPACITY && !feof(in)) {
-        length=fread(intermediate, 1, INTERMEDIATE_CAPACITY-blockLength, in);
-        if(length<0 || ferror(in)) {
-            return -1;
-        }
-        blockLength+=length;
-    }
-
-    return (int32_t)blockLength;
-}
-
-static UBool
-readInput(FILE *in, const char *encName) {
-    UConverter *cnv;
-    UChar *pOut, *pOutLimit;
-    const char *p, *limit;
-    int32_t length;
-    UErrorCode errorCode;
-
-    pOut=input;
-    pOutLimit=input+INPUT_CAPACITY;
-
-    errorCode=U_ZERO_ERROR;
-
-    /* read the first block and open the converter */
-    length=readBlock(in);
-    if(length<0) {
-        return FALSE;
-    }
-
-    if(encName==NULL) {
-        int32_t signatureLength;
-        encName=ucnv_detectUnicodeSignature(intermediate, length,
-                                            &signatureLength,
-                                            &errorCode);
-        if(U_FAILURE(errorCode) || encName==NULL) {
-            /* default to UTF-8 */
-            printf("no Unicode signature - using UTF-8\n");
-            encName="UTF-8";
-            errorCode=U_ZERO_ERROR;
-        } else {
-            printf("detected signature for %s (removing %d bytes)\n", encName, signatureLength);
-            /* remove signature byte sequence */
-            memmove(intermediate, intermediate+signatureLength, length-=signatureLength);
-        }
-    }
-
-    cnv=ucnv_open(encName, &errorCode);
-    if(U_FAILURE(errorCode)) {
-        fprintf(stderr, "error: unable to ucnv_open(\"%s\") - %s\n", encName, u_errorName(errorCode));
-        return FALSE;
-    }
-
-    while(length>0) {
-        /* convert the block */
-        p=intermediate;
-        limit=p+length;
-
-        ucnv_toUnicode(cnv,
-                       &pOut, pOutLimit,
-                       &p, limit,
-                       NULL, FALSE,
-                       &errorCode);
-        if(U_FAILURE(errorCode)) {
-            fprintf(stderr, "error converting input to UTF-16: %s\n", u_errorName(errorCode));
-            ucnv_close(cnv);
-            return FALSE;
-        }
-
-        /* read the next block */
-        length=readBlock(in);
-        if(length<0) {
-            ucnv_close(cnv);
-            return FALSE;
-        }
-    }
-
-    /* flush the converter */
-    ucnv_toUnicode(cnv,
-                   &pOut, pOutLimit,
-                   &p, p,
-                   NULL, TRUE,
-                   &errorCode);
-    ucnv_close(cnv);
-
-    if(U_FAILURE(errorCode)) {
-        fprintf(stderr, "error converting input to UTF-16: %s\n", u_errorName(errorCode));
-        return FALSE;
-    }
-
-    inputLength=(int32_t)(pOut-input);
-    countInputCodePoints=u_countChar32(input, inputLength);
-    if(inputLength<=0) {
-        fprintf(stderr, "warning: input is empty\n");
-        return FALSE;
-    }
-
-    return TRUE;
-}
-
-static void
-showUsage(const char *myName) {
-    fprintf(stderr,
-            "Usage:\n"
-            "%s [-e encoding-name] filename | '-'\n"
-            "    encoding-name must be the name of an encoding supported by ICU\n"
-            "    the filename of the input file with text to be used\n"
-            "      can be a dash (-) for standard input\n",
-            myName);
-}
-
-/*
- * Read file using some encoding, convert to 1M UTF-16 input buffer.
- * For each UTF to be tested:
- *   n times:
- *     convert from UTF-16 input buffer to UTF, 4kB buffer
- *     convert from 4kB buffer to 1M UTF-16 output buffer
- *   adjust n so that time elapsed is 10s (#define)
- *     ->divide 10s by time, increase n by that factor, run 2nd time
- *   n times:
- *     empty function
- *   subtract out loop/function overhead
- *   display #code points - #UTF bytes - time per roundtrip
- *
- *   * do the same again with an intermediate buffer size of 20 instead of 4kB
- *
- * Test following UTFs:
- * UTF-16BE, UTF-16LE, UTF-8, SCSU, BOCU-1, CESU-8
- *
- * Command-line arguments:
- * - encoding (default UTF-8, detect BOM)
- * - filename (allow "-")
- */
-extern int
-main(int argc, const char *argv[]) {
-    FILE *in;
-    const char *myName, *encName, *filename, *basename;
-
-    myName=argv[0];
-    if(argc<2) {
-        showUsage(myName);
-        return 1;
-    }
-
-    /* get encoding name argument */
-    if(argv[1][0]=='-' && argv[1][1]=='e') {
-        encName=argv[1]+2;
-        --argc;
-        ++argv;
-        if(*encName==0) {
-            if(argc<2) {
-                showUsage(myName);
-                return 1;
-            }
-            encName=argv[1];
-            --argc;
-            ++argv;
-        }
-    } else {
-        encName=NULL;
-    }
-
-    /* get filename argument */
-    if(argc<2) {
-        showUsage(myName);
-        return 1;
-    }
-    filename=argv[1];
-    if(filename[0]=='-' && filename[1]==0) {
-        filename="(standard input)";
-        in=stdin;
-        /* set stdin to binary mode */
-        _setmode(_fileno(stdin), _O_BINARY);
-    } else {
-        in=fopen(filename, "rb");
-        if(in==NULL) {
-            fprintf(stderr, "error opening \"%s\"\n", filename);
-            showUsage(myName);
-            return 2;
-        }
-    }
-
-    /* read input */
-    basename=strrchr(filename, U_FILE_SEP_CHAR);
-    if(basename!=NULL) {
-        ++basename;
-    } else {
-        basename=filename;
-    }
-    printf("# testing converter performance with file \"%s\"\n", basename);
-    if(!readInput(in, encName)) {
-        fprintf(stderr, "error reading \"%s\" (encoding %s)\n", filename, encName);
-        showUsage(myName);
-        return 2;
-    }
-    if(in!=stdin) {
-        fclose(in);
-    }
-
-    /* test performance */
-    testPerformance();
-    return 0;
-}