From 9d88c94317aeac5dd26c1dbe8c2112dbe855d2b5 Mon Sep 17 00:00:00 2001 From: Apple Date: Tue, 2 Oct 2007 00:44:14 +0000 Subject: [PATCH] ICU-6.2.22.tar.gz --- icuSources/allinone/all/all.dsp | 93 ++ icuSources/layout/unicode/loengine.h | 358 +++++ icuSources/samples/xml2txt/readme.txt | 3 + icuSources/test/collperf/Makefile.in | 89 ++ icuSources/test/collperf/collperf.cpp | 1749 +++++++++++++++++++++ icuSources/test/collperf/collperf.dsp | 160 ++ icuSources/test/collperf/readme.html | 84 + icuSources/test/perf/all/all.dsp | 63 + icuSources/test/unalignedtest/Makefile.in | 83 + icuSources/test/unalignedtest/readme | 27 + icuSources/test/unalignedtest/unaligned.c | 304 ++++ icuSources/test/usetperf/bitset.cpp | 63 + icuSources/test/usetperf/bitset.h | 38 + icuSources/test/usetperf/timer.h | 62 + icuSources/test/usetperf/usetperf.cpp | 122 ++ icuSources/test/usetperf/usetperf.dsp | 164 ++ icuSources/test/utfperf/utfperf.c | 450 ++++++ icuSources/test/utfperf/utfperf.dsp | 158 ++ icuSources/tools/makeconv/misc/canonucm.c | 29 + icuSources/tools/makeconv/misc/rptp2ucm.c | 31 + icuSources/tools/makeconv/misc/ucmmerge.c | 26 + icuSources/tools/makeconv/misc/ucmstrip.c | 28 + makefile | 10 +- 23 files changed, 4190 insertions(+), 4 deletions(-) create mode 100644 icuSources/allinone/all/all.dsp create mode 100644 icuSources/layout/unicode/loengine.h create mode 100644 icuSources/samples/xml2txt/readme.txt create mode 100644 icuSources/test/collperf/Makefile.in create mode 100644 icuSources/test/collperf/collperf.cpp create mode 100644 icuSources/test/collperf/collperf.dsp create mode 100644 icuSources/test/collperf/readme.html create mode 100644 icuSources/test/perf/all/all.dsp create mode 100644 icuSources/test/unalignedtest/Makefile.in create mode 100644 icuSources/test/unalignedtest/readme create mode 100644 icuSources/test/unalignedtest/unaligned.c create mode 100644 icuSources/test/usetperf/bitset.cpp create mode 100644 icuSources/test/usetperf/bitset.h create mode 100644 icuSources/test/usetperf/timer.h create mode 100644 icuSources/test/usetperf/usetperf.cpp create mode 100644 icuSources/test/usetperf/usetperf.dsp create mode 100644 icuSources/test/utfperf/utfperf.c create mode 100644 icuSources/test/utfperf/utfperf.dsp create mode 100644 icuSources/tools/makeconv/misc/canonucm.c create mode 100644 icuSources/tools/makeconv/misc/rptp2ucm.c create mode 100644 icuSources/tools/makeconv/misc/ucmmerge.c create mode 100644 icuSources/tools/makeconv/misc/ucmstrip.c diff --git a/icuSources/allinone/all/all.dsp b/icuSources/allinone/all/all.dsp new file mode 100644 index 00000000..5336d612 --- /dev/null +++ b/icuSources/allinone/all/all.dsp @@ -0,0 +1,93 @@ +# Microsoft Developer Studio Project File - Name="all" - Package Owner=<4> +# Microsoft Developer Studio Generated Build File, Format Version 6.00 +# ** DO NOT EDIT ** + +# TARGTYPE "Win32 (x86) Generic Project" 0x010a + +CFG=ALL - WIN32 DEBUG +!MESSAGE This is not a valid makefile. To build this project using NMAKE, +!MESSAGE use the Export Makefile command and run +!MESSAGE +!MESSAGE NMAKE /f "all.mak". +!MESSAGE +!MESSAGE You can specify a configuration when running NMAKE +!MESSAGE by defining the macro CFG on the command line. For example: +!MESSAGE +!MESSAGE NMAKE /f "all.mak" CFG="ALL - WIN32 DEBUG" +!MESSAGE +!MESSAGE Possible choices for configuration are: +!MESSAGE +!MESSAGE "all - Win32 Release" (based on "Win32 (x86) Generic Project") +!MESSAGE "all - Win32 Debug" (based on "Win32 (x86) Generic Project") +!MESSAGE "all - Win64 Release" (based on "Win32 (x86) Generic Project") +!MESSAGE "all - Win64 Debug" (based on "Win32 (x86) Generic Project") +!MESSAGE + +# Begin Project +# PROP AllowPerConfigDependencies 0 +# PROP Scc_ProjName "" +# PROP Scc_LocalPath "" +MTL=midl.exe + +!IF "$(CFG)" == "all - Win32 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "Release" +# PROP BASE Intermediate_Dir "Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Release" +# PROP Intermediate_Dir "Release" +# PROP Target_Dir "" + +!ELSEIF "$(CFG)" == "all - Win32 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "Debug" +# PROP BASE Intermediate_Dir "Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Debug" +# PROP Intermediate_Dir "Debug" +# PROP Target_Dir "" + +!ELSEIF "$(CFG)" == "all - Win64 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "Release" +# PROP BASE Intermediate_Dir "Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Release" +# PROP Intermediate_Dir "Release" +# PROP Target_Dir "" + +!ELSEIF "$(CFG)" == "all - Win64 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "Debug" +# PROP BASE Intermediate_Dir "Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Debug" +# PROP Intermediate_Dir "Debug" +# PROP Target_Dir "" + +!ENDIF + +# Begin Target + +# Name "all - Win32 Release" +# Name "all - Win32 Debug" +# Name "all - Win64 Release" +# Name "all - Win64 Debug" +# End Target +# End Project diff --git a/icuSources/layout/unicode/loengine.h b/icuSources/layout/unicode/loengine.h new file mode 100644 index 00000000..76612cd0 --- /dev/null +++ b/icuSources/layout/unicode/loengine.h @@ -0,0 +1,358 @@ +/* + * + * (C) Copyright IBM Corp. 1998-2004 - All Rights Reserved + * + */ + +#ifndef __LOENGINE_H +#define __LOENGINE_H + +#include "unicode/utypes.h" +#include "unicode/uobject.h" +#include "unicode/uscript.h" +#include "unicode/unistr.h" + +#include "layout/LETypes.h" +#include "layout/LayoutEngine.h" + +U_NAMESPACE_BEGIN + +/** + * This is a wrapper class designed to allow ICU clients to + * use LayoutEngine in a way that is consistent with the rest + * of ICU. + * + * (LayoutEngine was developed seperately from ICU and + * the same source is used in non-ICU environments, so it cannot + * be changed to match ICU coding conventions). + * + * This class is designed for clients who wish to use LayoutEngine + * to layout complex text. If you need to subclass LayoutEngine, + * you'll need to use the LayoutEngine interfaces directly. + * + * Basically, it creates an instance of LayoutEngine, stashes + * it in fLayoutEngine, and uses it to implement the layout + * functionality. + * + * Use the createInstance method to create an ICULayoutEngine. Use + * delete to destroy it. The layoutChars method computes the glyphs + * and positions, and saves them in the ICULayoutEngine object. + * Use getGlyphs, getPositions and getCharIndices to retreive this + * data. + * + * You'll also need an implementation of LEFontInstance for your platform. + * + * @see LayoutEngine.h + * @see LEFontInstance.h + * + * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release. + */ +#ifndef U_HIDE_OBSOLETE_API +class U_LAYOUT_API ICULayoutEngine : public UObject { +private: + /** + * This holds the instance of LayoutEngine that does all + * the work. + */ + LayoutEngine *fLayoutEngine; + + /** + * This no argument constructor is private so that clients + * can't envoke it. Clients should use createInstance. + * + * @see createInstance + */ + ICULayoutEngine(); + + /** + * The main constructor. It is defined as private to + * stop clients from invoking it. Clients should use + * createInstance. + * + * @param layoutEngine - the LayoutEngine that this instance wraps. + * + * @see createInstance + */ + ICULayoutEngine(LayoutEngine *layoutEngine); + +public: + + /** + * The destructor. At least on Windows it needs to be + * virtual to ensure that it deletes the object from the + * same heap that createInstance will allocate it from. We + * don't know why this is... + * + * @see createInstance + * + * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release. + */ + virtual ~ICULayoutEngine(); + + /** + * This method computes the glyph, character index and position arrays + * for the input characters. + * + * @param chars - the input character context + * @param startOffset - the starting offset of the characters to process + * @param endOffset - the ending offset of the characters to process + * @param maxOffset - the number of characters in the input context + * @param rightToLeft - TRUE if the characers are in a right to left directional run + * @param x - the initial X position + * @param y - the initial Y position + * @param success - output parameter set to an error code if the operation fails + * + * @return the number of glyphs in the glyph array + * + * Note; the glyph, character index and position array can be accessed + * using the getter method below. + * + * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release. + */ + int32_t layoutChars(const UChar chars[], + int32_t startOffset, + int32_t endOffset, + int32_t maxOffset, + UBool rightToLeft, + float x, float y, + UErrorCode &success); + + + /** + * This method computes the glyph, character index and position arrays + * for the input characters. + * + * @param str - the input character context + * @param startOffset - the starting offset of the characters to process + * @param endOffset - the ending offset of the characters to process + * @param rightToLeft - TRUE if the characers are in a right to left directional run + * @param x - the initial X position + * @param y - the initial Y position + * @param success - output parameter set to an error code if the operation fails + * + * @return the number of glyphs in the glyph array + * + * Note; the glyph, character index and position array can be accessed + * using the getter method below. + * + * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release. + */ + int32_t layoutString(const UnicodeString &str, + int32_t startOffset, + int32_t endOffset, + UBool rightToLeft, + float x, float y, + UErrorCode &success); + + /** + * This method returns the number of glyphs in the glyph array. Note + * that the number of glyphs will be greater than or equal to the number + * of characters used to create the LayoutEngine. + * + * @return the number of glyphs in the glyph array + * + * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release. + */ + int32_t countGlyphs() const; + + /** + * This method copies the glyph array into a caller supplied array. + * The caller must ensure that the array is large enough to hold all + * the glyphs. + * + * @param glyphs - the destiniation glyph array + * @param success - output parameter set to an error code if the operation fails + * + * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release. + */ + void getGlyphs(uint32_t glyphs[], UErrorCode &success); + + /** + * This method copies the character index array into a caller supplied array. + * The caller must ensure that the array is large enough to hold a character + * index for each glyph. + * + * @param charIndices - the destiniation character index array + * @param success - output parameter set to an error code if the operation fails + * + * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release. + */ + void getCharIndices(int32_t charIndices[], UErrorCode &success); + + /** + * This method copies the character index array into a caller supplied array. + * The caller must ensure that the array is large enough to hold a character + * index for each glyph. + * + * @param charIndices - the destiniation character index array + * @param indexBase - an offset which will be added to each index + * @param success - output parameter set to an error code if the operation fails + * + * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release. + */ + void getCharIndices(int32_t charIndices[], int32_t indexBase, UErrorCode &success); + + /** + * This method copies the position array into a caller supplied array. + * The caller must ensure that the array is large enough to hold an + * X and Y position for each glyph, plus an extra X and Y for the + * advance of the last glyph. + * + * @param positions - the destiniation position array + * @param success - output parameter set to an error code if the operation fails + * + * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release. + */ + void getGlyphPositions(float positions[], UErrorCode &success); + + /** + * This method returns the X and Y position of the glyph at the + * given index. + * + * Input parameters: + * @param glyphIndex - the index of the glyph + * + * Output parameters: + * @param x - the glyph's X position + * @param y - the glyph's Y position + * @param success - output parameter set to an error code if the operation fails + * + * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release. + */ + void getGlyphPosition(int32_t glyphIndex, float &x, float &y, UErrorCode &success); + + /** + * This method returns an ICULayoutEngine capable of laying out text + * in the given font, script and langauge. + * + * @param fontInstance - the font of the text + * @param scriptCode - the script of the text + * @param locale - used to determine the language of the text + * @param success - output parameter set to an error code if the operation fails + * + * @return an ICULayoutEngine which can layout text in the given font. + * + * NOTE: currently, locale is ignored... + * + * @see LEFontInstance + * + * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release. + */ + static ICULayoutEngine *createInstance(const LEFontInstance *fontInstance, + UScriptCode scriptCode, Locale &locale, + UErrorCode &success); + + /** + * ICU "poor man's RTTI", returns a UClassID for the actual class. + * + * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release. + */ + virtual UClassID getDynamicClassID() const; + + /** + * ICU "poor man's RTTI", returns a UClassID for this class. + * + * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release. + */ + static UClassID getStaticClassID(); +}; + +inline ICULayoutEngine::ICULayoutEngine() +{ + // nothing at all... +} + +inline ICULayoutEngine::ICULayoutEngine(LayoutEngine *layoutEngine) + : fLayoutEngine(layoutEngine) +{ + // nothing else to do +} + +inline ICULayoutEngine::~ICULayoutEngine() +{ + delete fLayoutEngine; + fLayoutEngine = 0; +} + +inline int32_t ICULayoutEngine::layoutChars(const UChar chars[], + int32_t startOffset, + int32_t endOffset, + int32_t maxOffset, + UBool rightToLeft, + float x, float y, + UErrorCode &success) +{ + // NOTE: call reset() so that clients can safely reuse + fLayoutEngine->reset(); + return fLayoutEngine->layoutChars(chars, + startOffset, + endOffset - startOffset, + maxOffset, + rightToLeft, + x, y, + (LEErrorCode &) success); +} + +inline int32_t ICULayoutEngine::layoutString(const UnicodeString &str, + int32_t startOffset, + int32_t endOffset, + UBool rightToLeft, + float x, float y, + UErrorCode &success) +{ + // NOTE: call reset() so that clients can safely reuse + fLayoutEngine->reset(); + return fLayoutEngine->layoutChars(str.getBuffer(), + startOffset, + endOffset - startOffset, + str.length(), + rightToLeft, + x, y, + (LEErrorCode &) success); +} + +inline int32_t ICULayoutEngine::countGlyphs() const +{ + return fLayoutEngine->getGlyphCount(); +} + +inline void ICULayoutEngine::getGlyphs(uint32_t glyphs[], UErrorCode &success) +{ + fLayoutEngine->getGlyphs(glyphs, (LEErrorCode &) success); +} + +inline void ICULayoutEngine::getCharIndices(int32_t charIndices[], UErrorCode &success) +{ + fLayoutEngine->getCharIndices(charIndices, (LEErrorCode &) success); +} + +inline void ICULayoutEngine::getCharIndices(int32_t charIndices[], int32_t indexBase, UErrorCode &success) +{ + fLayoutEngine->getCharIndices(charIndices, indexBase, (LEErrorCode &) success); +} + +inline void ICULayoutEngine::getGlyphPositions(float positions[], UErrorCode &success) +{ + fLayoutEngine->getGlyphPositions(positions, (LEErrorCode &) success); +} + +inline void ICULayoutEngine::getGlyphPosition(int32_t glyphIndex, float &x, float &y, UErrorCode &success) +{ + fLayoutEngine->getGlyphPosition(glyphIndex, x, y, (LEErrorCode &) success); +} + +inline ICULayoutEngine *ICULayoutEngine::createInstance(const LEFontInstance *fontInstance, + UScriptCode scriptCode, + Locale &locale, UErrorCode &success) +{ + LayoutEngine *engine = LayoutEngine::layoutEngineFactory(fontInstance, + (le_int32) scriptCode, + 0, + (LEErrorCode &) success); + + return new ICULayoutEngine(engine); +} +#endif // U_HIDE_OBSOLETE_API + +U_NAMESPACE_END +#endif diff --git a/icuSources/samples/xml2txt/readme.txt b/icuSources/samples/xml2txt/readme.txt new file mode 100644 index 00000000..30158ad2 --- /dev/null +++ b/icuSources/samples/xml2txt/readme.txt @@ -0,0 +1,3 @@ +Copyright (c) 2002-2003, International Business Machines Corporation and others. All Rights Reserved. + +The xml2txt sample has been deprecated. It was a demonstration of converting ICU4C XML resource bundles into .txt resource bundles. Since it was written, the ICU4C XML resource bundle format has been depreceated, and ICU now uses XLIFF (XML Localization Interchange File Format) instead. \ No newline at end of file diff --git a/icuSources/test/collperf/Makefile.in b/icuSources/test/collperf/Makefile.in new file mode 100644 index 00000000..bca32116 --- /dev/null +++ b/icuSources/test/collperf/Makefile.in @@ -0,0 +1,89 @@ +## Makefile.in for ICU - test/collperf +## Copyright (c) 2001, International Business Machines Corporation and +## others. All Rights Reserved. + +## Source directory information +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ + +top_builddir = ../.. + +include $(top_builddir)/icudefs.mk + +## Platform-specific setup +include @platform_make_fragment@ + +## Build directory information +subdir = test/collperf + +## Extra files to remove for 'make clean' +CLEANFILES = *~ $(DEPS) + +## Target information +TARGET = collperf + +DEFS = @DEFS@ +CPPFLAGS = @CPPFLAGS@ -I$(top_builddir)/common -I$(top_srcdir)/common -I$(top_srcdir)/i18n +CFLAGS = @CFLAGS@ +CXXFLAGS = @CXXFLAGS@ +ENABLE_RPATH = @ENABLE_RPATH@ +ifeq ($(ENABLE_RPATH),YES) +RPATHLDFLAGS = $(LD_RPATH)$(LD_RPATH_PRE)$(libdir) +endif +LDFLAGS = @LDFLAGS@ $(RPATHLDFLAGS) +LIBS = $(LIBICUI18N) $(LIBICUUC) @LIBS@ @LIB_M@ + +OBJECTS = collperf.o + +DEPS = $(OBJECTS:.o=.d) + +## List of phony targets +.PHONY : all all-local install install-local clean clean-local \ +distclean distclean-local dist dist-local check check-local + +## Clear suffix list +.SUFFIXES : + +## List of standard targets +all: all-local +install: install-local +clean: clean-local +distclean : distclean-local +dist: dist-local +check: all check-local + +all-local: $(TARGET) + +install-local: + +dist-local: + +clean-local: + test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES) + $(RMV) $(OBJECTS) $(TARGET) + +distclean-local: clean-local + $(RMV) Makefile + +check-local: all-local + +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) \ + && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status + +$(TARGET) : $(OBJECTS) + $(LINK.cc) -o $@ $^ $(LIBS) + +invoke: + ICU_DATA=$${ICU_DATA:-$(top_builddir)/data/} TZ=PST8PDT $(INVOKE) $(INVOCATION) + +ifeq (,$(MAKECMDGOALS)) +-include $(DEPS) +else +ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),) +ifneq ($(patsubst %install,,$(MAKECMDGOALS)),) +-include $(DEPS) +endif +endif +endif + diff --git a/icuSources/test/collperf/collperf.cpp b/icuSources/test/collperf/collperf.cpp new file mode 100644 index 00000000..bd916c5c --- /dev/null +++ b/icuSources/test/collperf/collperf.cpp @@ -0,0 +1,1749 @@ +/******************************************************************** + * COPYRIGHT: + * Copyright (C) 2001 IBM, Inc. All Rights Reserved. + * + ********************************************************************/ +/******************************************************************************** +* +* File CALLCOLL.C +* +* Modification History: +* Name Description +* Andy Heninger First Version +* +********************************************************************************* +*/ + +// +// This program tests string collation and sort key generation performance. +// Three APIs can be teste: ICU C , Unix strcoll, strxfrm and Windows LCMapString +// A file of names is required as input, one per line. It must be in utf-8 or utf-16 format, +// and include a byte order mark. Either LE or BE format is OK. +// + +const char gUsageString[] = + "usage: collperf options...\n" + "-help Display this message.\n" + "-file file_name utf-16 format file of names.\n" + "-locale name ICU locale to use. Default is en_US\n" + "-rules file_name Collation rules file (overrides locale)\n" + "-langid 0x1234 Windows Language ID number. Default to value for -locale option\n" + " see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm\n" + "-win Run test using Windows native services. (ICU is default)\n" + "-unix Run test using Unix strxfrm, strcoll services.\n" + "-uselen Use API with string lengths. Default is null-terminated strings\n" + "-usekeys Run tests using sortkeys rather than strcoll\n" + "-strcmp Run tests using u_strcmp rather than strcoll\n" + "-strcmpCPO Run tests using u_strcmpCodePointOrder rather than strcoll\n" + "-loop nnnn Loopcount for test. Adjust for reasonable total running time.\n" + "-iloop n Inner Loop Count. Default = 1. Number of calls to function\n" + " under test at each call point. For measuring test overhead.\n" + "-terse Terse numbers-only output. Intended for use by scripts.\n" + "-french French accent ordering\n" + "-frenchoff No French accent ordering (for use with French locales.)\n" + "-norm Normalizing mode on\n" + "-shifted Shifted mode\n" + "-lower Lower case first\n" + "-upper Upper case first\n" + "-case Enable separate case level\n" + "-level n Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n" + "-keyhist Produce a table sort key size vs. string length\n" + "-binsearch Binary Search timing test\n" + "-keygen Sort Key Generation timing test\n" + "-qsort Quicksort timing test\n" + "-iter Iteration Performance Test\n" + "-dump Display strings, sort keys and CEs.\n" + ; + + + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef WIN32 +#include +#else +// +// Stubs for Windows API functions when building on UNIXes. +// +typedef int DWORD; +inline int CompareStringW(DWORD, DWORD, UChar *, int, UChar *, int) {return 0;}; +#include +unsigned long timeGetTime() { + struct timeval t; + gettimeofday(&t, 0); + unsigned long val = t.tv_sec * 1000; // Let it overflow. Who cares. + val += t.tv_usec / 1000; + return val; +}; +inline int LCMapStringW(DWORD, DWORD, UChar *, int, UChar *, int) {return 0;}; +const int LCMAP_SORTKEY = 0; +#define MAKELCID(a,b) 0 +const int SORT_DEFAULT = 0; +#endif + + + +// +// Command line option variables +// These global variables are set according to the options specified +// on the command line by the user. +char * opt_fName = 0; +char * opt_locale = "en_US"; +int opt_langid = 0; // Defaults to value corresponding to opt_locale. +char * opt_rules = 0; +UBool opt_help = FALSE; +int opt_loopCount = 1; +int opt_iLoopCount = 1; +UBool opt_terse = FALSE; +UBool opt_qsort = FALSE; +UBool opt_binsearch = FALSE; +UBool opt_icu = TRUE; +UBool opt_win = FALSE; // Run with Windows native functions. +UBool opt_unix = FALSE; // Run with UNIX strcoll, strxfrm functions. +UBool opt_uselen = FALSE; +UBool opt_usekeys = FALSE; +UBool opt_strcmp = FALSE; +UBool opt_strcmpCPO = FALSE; +UBool opt_norm = FALSE; +UBool opt_keygen = FALSE; +UBool opt_french = FALSE; +UBool opt_frenchoff = FALSE; +UBool opt_shifted = FALSE; +UBool opt_lower = FALSE; +UBool opt_upper = FALSE; +UBool opt_case = FALSE; +int opt_level = 0; +UBool opt_keyhist = FALSE; +UBool opt_itertest = FALSE; +UBool opt_dump = FALSE; + + + +// +// Definitions for the command line options +// +struct OptSpec { + const char *name; + enum {FLAG, NUM, STRING} type; + void *pVar; +}; + +OptSpec opts[] = { + {"-file", OptSpec::STRING, &opt_fName}, + {"-locale", OptSpec::STRING, &opt_locale}, + {"-langid", OptSpec::NUM, &opt_langid}, + {"-rules", OptSpec::STRING, &opt_rules}, + {"-qsort", OptSpec::FLAG, &opt_qsort}, + {"-binsearch", OptSpec::FLAG, &opt_binsearch}, + {"-iter", OptSpec::FLAG, &opt_itertest}, + {"-win", OptSpec::FLAG, &opt_win}, + {"-unix", OptSpec::FLAG, &opt_unix}, + {"-uselen", OptSpec::FLAG, &opt_uselen}, + {"-usekeys", OptSpec::FLAG, &opt_usekeys}, + {"-strcmp", OptSpec::FLAG, &opt_strcmp}, + {"-strcmpCPO", OptSpec::FLAG, &opt_strcmpCPO}, + {"-norm", OptSpec::FLAG, &opt_norm}, + {"-french", OptSpec::FLAG, &opt_french}, + {"-frenchoff", OptSpec::FLAG, &opt_frenchoff}, + {"-shifted", OptSpec::FLAG, &opt_shifted}, + {"-lower", OptSpec::FLAG, &opt_lower}, + {"-upper", OptSpec::FLAG, &opt_upper}, + {"-case", OptSpec::FLAG, &opt_case}, + {"-level", OptSpec::NUM, &opt_level}, + {"-keyhist", OptSpec::FLAG, &opt_keyhist}, + {"-keygen", OptSpec::FLAG, &opt_keygen}, + {"-loop", OptSpec::NUM, &opt_loopCount}, + {"-iloop", OptSpec::NUM, &opt_iLoopCount}, + {"-terse", OptSpec::FLAG, &opt_terse}, + {"-dump", OptSpec::FLAG, &opt_dump}, + {"-help", OptSpec::FLAG, &opt_help}, + {"-?", OptSpec::FLAG, &opt_help}, + {0, OptSpec::FLAG, 0} +}; + + +//--------------------------------------------------------------------------- +// +// Global variables pointing to and describing the test file +// +//--------------------------------------------------------------------------- + +// +// struct Line +// +// Each line from the source file (containing a name, presumably) gets +// one of these structs. +// +struct Line { + UChar *name; + int len; + char *winSortKey; + char *icuSortKey; + char *unixSortKey; + char *unixName; +}; + + + +Line *gFileLines; // Ptr to array of Line structs, one per line in the file. +int gNumFileLines; +UCollator *gCol; +DWORD gWinLCID; + +Line **gSortedLines; +Line **gRandomLines; +int gCount; + + + +//--------------------------------------------------------------------------- +// +// ProcessOptions() Function to read the command line options. +// +//--------------------------------------------------------------------------- +UBool ProcessOptions(int argc, const char **argv, OptSpec opts[]) +{ + int i; + int argNum; + const char *pArgName; + OptSpec *pOpt; + + for (argNum=1; argNumname != 0; pOpt++) { + if (strcmp(pOpt->name, pArgName) == 0) { + switch (pOpt->type) { + case OptSpec::FLAG: + *(UBool *)(pOpt->pVar) = TRUE; + break; + case OptSpec::STRING: + argNum ++; + if (argNum >= argc) { + fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name); + return FALSE; + } + *(const char **)(pOpt->pVar) = argv[argNum]; + break; + case OptSpec::NUM: + argNum ++; + if (argNum >= argc) { + fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name); + return FALSE; + } + char *endp; + i = strtol(argv[argNum], &endp, 0); + if (endp == argv[argNum]) { + fprintf(stderr, "integer value expected for \"%s\" option.\n", pOpt->name); + return FALSE; + } + *(int *)(pOpt->pVar) = i; + } + break; + } + } + if (pOpt->name == 0) + { + fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName); + return FALSE; + } + } +return TRUE; +} + +//--------------------------------------------------------------------------------------- +// +// Comparison functions for use by qsort. +// +// Six flavors, ICU or Windows, SortKey or String Compare, Strings with length +// or null terminated. +// +//--------------------------------------------------------------------------------------- +int ICUstrcmpK(const void *a, const void *b) { + gCount++; + int t = strcmp((*(Line **)a)->icuSortKey, (*(Line **)b)->icuSortKey); + return t; +} + + +int ICUstrcmpL(const void *a, const void *b) { + gCount++; + UCollationResult t; + t = ucol_strcoll(gCol, (*(Line **)a)->name, (*(Line **)a)->len, (*(Line **)b)->name, (*(Line **)b)->len); + if (t == UCOL_LESS) return -1; + if (t == UCOL_GREATER) return +1; + return 0; +} + + +int ICUstrcmp(const void *a, const void *b) { + gCount++; + UCollationResult t; + t = ucol_strcoll(gCol, (*(Line **)a)->name, -1, (*(Line **)b)->name, -1); + if (t == UCOL_LESS) return -1; + if (t == UCOL_GREATER) return +1; + return 0; +} + + +int Winstrcmp(const void *a, const void *b) { + gCount++; + int t; + t = CompareStringW(gWinLCID, 0, (*(Line **)a)->name, -1, (*(Line **)b)->name, -1); + return t-2; +} + + +int UNIXstrcmp(const void *a, const void *b) { + gCount++; + int t; + t = strcoll((*(Line **)a)->unixName, (*(Line **)b)->unixName); + return t; +} + + +int WinstrcmpL(const void *a, const void *b) { + gCount++; + int t; + t = CompareStringW(gWinLCID, 0, (*(Line **)a)->name, (*(Line **)a)->len, (*(Line **)b)->name, (*(Line **)b)->len); + return t-2; +} + + +int WinstrcmpK(const void *a, const void *b) { + gCount++; + int t = strcmp((*(Line **)a)->winSortKey, (*(Line **)b)->winSortKey); + return t; +} + + +//--------------------------------------------------------------------------------------- +// +// Function for sorting the names (lines) into a random order. +// Order is based on a hash of the ICU Sort key for the lines +// The randomized order is used as input for the sorting timing tests. +// +//--------------------------------------------------------------------------------------- +int ICURandomCmp(const void *a, const void *b) { + char *ask = (*(Line **)a)->icuSortKey; + char *bsk = (*(Line **)b)->icuSortKey; + int aVal = 0; + int bVal = 0; + int retVal; + while (*ask != 0) { + aVal += aVal*37 + *ask++; + } + while (*bsk != 0) { + bVal += bVal*37 + *bsk++; + } + retVal = -1; + if (aVal == bVal) { + retVal = 0; + } + else if (aVal > bVal) { + retVal = 1; + } + return retVal; +} + +//--------------------------------------------------------------------------------------- +// +// doKeyGen() Key Generation Timing Test +// +//--------------------------------------------------------------------------------------- +void doKeyGen() +{ + int line; + int loops; + int iLoop; + int t; + int len=-1; + + // Adjust loop count to compensate for file size. Should be order n + double dLoopCount = double(opt_loopCount) * (1000. / double(gNumFileLines)); + int adj_loopCount = int(dLoopCount); + if (adj_loopCount < 1) adj_loopCount = 1; + + + unsigned long startTime = timeGetTime(); + + if (opt_win) { + for (loops=0; loopsname, (gSortedLines[guess])->name); + } + gCount++; + if (r== 0) + break; + if (r < 0) + hi = guess; + else + lo = guess; + } + } + } + elapsedTime = timeGetTime() - startTime; + break; + } + + + if (opt_icu) + { + unsigned long startTime = timeGetTime(); + UCollationResult r; + for (loops=0; loopslen; + } + int hi = gNumFileLines-1; + int lo = 0; + int guess = -1; + for (;;) { + int newGuess = (hi + lo) / 2; + if (newGuess == guess) + break; + guess = newGuess; + int ri; + if (opt_usekeys) { + for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { + ri = strcmp((gSortedLines[line])->icuSortKey, (gSortedLines[guess])->icuSortKey); + } + gCount++; + r=UCOL_GREATER; if(ri<0) {r=UCOL_LESS;} else if (ri==0) {r=UCOL_EQUAL;} + } + else + { + if (opt_uselen) { + guessLen = (gSortedLines[guess])->len; + } + for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { + r = ucol_strcoll(gCol, (gSortedLines[line])->name, lineLen, (gSortedLines[guess])->name, guessLen); + } + gCount++; + } + if (r== UCOL_EQUAL) + break; + if (r == UCOL_LESS) + hi = guess; + else + lo = guess; + } + } + } + elapsedTime = timeGetTime() - startTime; + break; + } + + if (opt_win) + { + unsigned long startTime = timeGetTime(); + int r; + for (loops=0; loopslen; + } + int hi = gNumFileLines-1; + int lo = 0; + int guess = -1; + for (;;) { + int newGuess = (hi + lo) / 2; + if (newGuess == guess) + break; + guess = newGuess; + if (opt_usekeys) { + for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { + r = strcmp((gSortedLines[line])->winSortKey, (gSortedLines[guess])->winSortKey); + } + gCount++; + r+=2; + } + else + { + if (opt_uselen) { + guessLen = (gSortedLines[guess])->len; + } + for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { + r = CompareStringW(gWinLCID, 0, (gSortedLines[line])->name, lineLen, (gSortedLines[guess])->name, guessLen); + } + if (r == 0) { + if (opt_terse == FALSE) { + fprintf(stderr, "Error returned from Windows CompareStringW.\n"); + } + exit(-1); + } + gCount++; + } + if (r== 2) // strings == + break; + if (r == 1) // line < guess + hi = guess; + else // line > guess + lo = guess; + } + } + } + elapsedTime = timeGetTime() - startTime; + break; + } + + if (opt_unix) + { + unsigned long startTime = timeGetTime(); + int r; + for (loops=0; loopsunixSortKey, (gSortedLines[guess])->unixSortKey); + } + gCount++; + } + else + { + for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) { + r = strcoll((gSortedLines[line])->unixName, (gSortedLines[guess])->unixName); + } + errno = 0; + if (errno != 0) { + fprintf(stderr, "Error %d returned from strcoll.\n", errno); + exit(-1); + } + gCount++; + } + if (r == 0) // strings == + break; + if (r < 0) // line < guess + hi = guess; + else // line > guess + lo = guess; + } + } + } + elapsedTime = timeGetTime() - startTime; + break; + } + break; + } + + int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount); + if (opt_terse == FALSE) { + printf("binary search: total # of string compares = %d\n", gCount); + printf("binary search: compares per loop = %d\n", gCount / loops); + printf("binary search: time per compare = %d ns\n", ns); + } else { + printf("%d, ", ns); + } + +} + + + + +//--------------------------------------------------------------------------------------- +// +// doQSort() The quick sort timing test. Uses the C library qsort function. +// +//--------------------------------------------------------------------------------------- +void doQSort() { + int i; + Line **sortBuf = new Line *[gNumFileLines]; + + // Adjust loop count to compensate for file size. QSort should be n log(n) + double dLoopCount = double(opt_loopCount) * 3000. / (log10(gNumFileLines) * double(gNumFileLines)); + if (opt_usekeys) dLoopCount *= 5; + int adj_loopCount = int(dLoopCount); + if (adj_loopCount < 1) adj_loopCount = 1; + + + gCount = 0; + unsigned long startTime = timeGetTime(); + if (opt_win && opt_usekeys) { + for (i=0; i maxLen) maxLen = gFileLines[i].len; + } + + // Allocate arrays to hold the histogram data + int *accumulatedLen = new int[maxLen+1]; + int *numKeysOfSize = new int[maxLen+1]; + for (i=0; i<=maxLen; i++) { + accumulatedLen[i] = 0; + numKeysOfSize[i] = 0; + } + + // Fill the arrays... + for (i=0; i 0) { + printf("%d, %f, %f\n", i, (float)accumulatedLen[i] / (float)numKeysOfSize[i], + (float)accumulatedLen[i] / (float)(numKeysOfSize[i] * i)); + } + } +} + +//--------------------------------------------------------------------------------------- +// +// doForwardIterTest(UBool) Forward iteration test +// argument null-terminated string used +// +//--------------------------------------------------------------------------------------- +void doForwardIterTest(UBool haslen) { + int count = 0; + + UErrorCode error = U_ZERO_ERROR; + printf("\n\nPerforming forward iteration performance test with "); + + if (haslen) { + printf("non-null terminated data -----------\n"); + } + else { + printf("null terminated data -----------\n"); + } + printf("performance test on strings from file -----------\n"); + + UChar dummytext[] = {0, 0}; + UCollationElements *iter = ucol_openElements(gCol, NULL, 0, &error); + ucol_setText(iter, dummytext, 1, &error); + + gCount = 0; + unsigned long startTime = timeGetTime(); + while (count < opt_loopCount) { + int linecount = 0; + while (linecount < gNumFileLines) { + UChar *str = gFileLines[linecount].name; + int strlen = haslen?gFileLines[linecount].len:-1; + ucol_setText(iter, str, strlen, &error); + while (ucol_next(iter, &error) != UCOL_NULLORDER) { + gCount++; + } + + linecount ++; + } + count ++; + } + unsigned long elapsedTime = timeGetTime() - startTime; + printf("elapsedTime %d\n", elapsedTime); + + // empty loop recalculation + count = 0; + startTime = timeGetTime(); + while (count < opt_loopCount) { + int linecount = 0; + while (linecount < gNumFileLines) { + UChar *str = gFileLines[linecount].name; + int strlen = haslen?gFileLines[linecount].len:-1; + ucol_setText(iter, str, strlen, &error); + linecount ++; + } + count ++; + } + elapsedTime -= (timeGetTime() - startTime); + printf("elapsedTime %d\n", elapsedTime); + + ucol_closeElements(iter); + + int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount); + printf("Total number of strings compared %d in %d loops\n", gNumFileLines, + opt_loopCount); + printf("Average time per ucol_next() nano seconds %d\n", ns); + + printf("performance test on skipped-5 concatenated strings from file -----------\n"); + + UChar *str; + int strlen = 0; + // appending all the strings + int linecount = 0; + while (linecount < gNumFileLines) { + strlen += haslen?gFileLines[linecount].len: + u_strlen(gFileLines[linecount].name); + linecount ++; + } + str = (UChar *)malloc(sizeof(UChar) * strlen); + int strindex = 0; + linecount = 0; + while (strindex < strlen) { + int len = 0; + len += haslen?gFileLines[linecount].len: + u_strlen(gFileLines[linecount].name); + memcpy(str + strindex, gFileLines[linecount].name, + sizeof(UChar) * len); + strindex += len; + linecount ++; + } + + printf("Total size of strings %d\n", strlen); + + gCount = 0; + count = 0; + + if (!haslen) { + strlen = -1; + } + iter = ucol_openElements(gCol, str, strlen, &error); + if (!haslen) { + strlen = u_strlen(str); + } + strlen -= 5; // any left over characters are not iterated, + // this is to ensure the backwards and forwards iterators + // gets the same position + startTime = timeGetTime(); + while (count < opt_loopCount) { + int count5 = 5; + strindex = 0; + ucol_setOffset(iter, strindex, &error); + while (TRUE) { + if (ucol_next(iter, &error) == UCOL_NULLORDER) { + break; + } + gCount++; + count5 --; + if (count5 == 0) { + strindex += 10; + if (strindex > strlen) { + break; + } + ucol_setOffset(iter, strindex, &error); + count5 = 5; + } + } + count ++; + } + + elapsedTime = timeGetTime() - startTime; + printf("elapsedTime %d\n", elapsedTime); + + // empty loop recalculation + int tempgCount = 0; + count = 0; + startTime = timeGetTime(); + while (count < opt_loopCount) { + int count5 = 5; + strindex = 0; + ucol_setOffset(iter, strindex, &error); + while (TRUE) { + tempgCount ++; + count5 --; + if (count5 == 0) { + strindex += 10; + if (strindex > strlen) { + break; + } + ucol_setOffset(iter, strindex, &error); + count5 = 5; + } + } + count ++; + } + elapsedTime -= (timeGetTime() - startTime); + printf("elapsedTime %d\n", elapsedTime); + + ucol_closeElements(iter); + + printf("gCount %d\n", gCount); + ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount); + printf("Average time per ucol_next() nano seconds %d\n", ns); +} + +//--------------------------------------------------------------------------------------- +// +// doBackwardIterTest(UBool) Backwards iteration test +// argument null-terminated string used +// +//--------------------------------------------------------------------------------------- +void doBackwardIterTest(UBool haslen) { + int count = 0; + UErrorCode error = U_ZERO_ERROR; + printf("\n\nPerforming backward iteration performance test with "); + + if (haslen) { + printf("non-null terminated data -----------\n"); + } + else { + printf("null terminated data -----------\n"); + } + + printf("performance test on strings from file -----------\n"); + + UCollationElements *iter = ucol_openElements(gCol, NULL, 0, &error); + UChar dummytext[] = {0, 0}; + ucol_setText(iter, dummytext, 1, &error); + + gCount = 0; + unsigned long startTime = timeGetTime(); + while (count < opt_loopCount) { + int linecount = 0; + while (linecount < gNumFileLines) { + UChar *str = gFileLines[linecount].name; + int strlen = haslen?gFileLines[linecount].len:-1; + ucol_setText(iter, str, strlen, &error); + while (ucol_previous(iter, &error) != UCOL_NULLORDER) { + gCount ++; + } + + linecount ++; + } + count ++; + } + unsigned long elapsedTime = timeGetTime() - startTime; + + printf("elapsedTime %d\n", elapsedTime); + + // empty loop recalculation + count = 0; + startTime = timeGetTime(); + while (count < opt_loopCount) { + int linecount = 0; + while (linecount < gNumFileLines) { + UChar *str = gFileLines[linecount].name; + int strlen = haslen?gFileLines[linecount].len:-1; + ucol_setText(iter, str, strlen, &error); + linecount ++; + } + count ++; + } + elapsedTime -= (timeGetTime() - startTime); + + printf("elapsedTime %d\n", elapsedTime); + ucol_closeElements(iter); + + int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount); + printf("Total number of strings compared %d in %d loops\n", gNumFileLines, + opt_loopCount); + printf("Average time per ucol_previous() nano seconds %d\n", ns); + + printf("performance test on skipped-5 concatenated strings from file -----------\n"); + + UChar *str; + int strlen = 0; + // appending all the strings + int linecount = 0; + while (linecount < gNumFileLines) { + strlen += haslen?gFileLines[linecount].len: + u_strlen(gFileLines[linecount].name); + linecount ++; + } + str = (UChar *)malloc(sizeof(UChar) * strlen); + int strindex = 0; + linecount = 0; + while (strindex < strlen) { + int len = 0; + len += haslen?gFileLines[linecount].len: + u_strlen(gFileLines[linecount].name); + memcpy(str + strindex, gFileLines[linecount].name, + sizeof(UChar) * len); + strindex += len; + linecount ++; + } + + printf("Total size of strings %d\n", strlen); + + gCount = 0; + count = 0; + + if (!haslen) { + strlen = -1; + } + + iter = ucol_openElements(gCol, str, strlen, &error); + if (!haslen) { + strlen = u_strlen(str); + } + + startTime = timeGetTime(); + while (count < opt_loopCount) { + int count5 = 5; + strindex = 5; + ucol_setOffset(iter, strindex, &error); + while (TRUE) { + if (ucol_previous(iter, &error) == UCOL_NULLORDER) { + break; + } + gCount ++; + count5 --; + if (count5 == 0) { + strindex += 10; + if (strindex > strlen) { + break; + } + ucol_setOffset(iter, strindex, &error); + count5 = 5; + } + } + count ++; + } + + elapsedTime = timeGetTime() - startTime; + printf("elapsedTime %d\n", elapsedTime); + + // empty loop recalculation + count = 0; + int tempgCount = 0; + startTime = timeGetTime(); + while (count < opt_loopCount) { + int count5 = 5; + strindex = 5; + ucol_setOffset(iter, strindex, &error); + while (TRUE) { + tempgCount ++; + count5 --; + if (count5 == 0) { + strindex += 10; + if (strindex > strlen) { + break; + } + ucol_setOffset(iter, strindex, &error); + count5 = 5; + } + } + count ++; + } + elapsedTime -= (timeGetTime() - startTime); + printf("elapsedTime %d\n", elapsedTime); + ucol_closeElements(iter); + + printf("gCount %d\n", gCount); + ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount); + printf("Average time per ucol_previous() nano seconds %d\n", ns); +} + +//--------------------------------------------------------------------------------------- +// +// doIterTest() Iteration test +// +//--------------------------------------------------------------------------------------- +void doIterTest() { + doForwardIterTest(opt_uselen); + doBackwardIterTest(opt_uselen); +} + + +//---------------------------------------------------------------------------------------- +// +// UnixConvert -- Convert the lines of the file to the encoding for UNIX +// Since it appears that Unicode support is going in the general +// direction of the use of UTF-8 locales, that is the approach +// that is used here. +// +//---------------------------------------------------------------------------------------- +void UnixConvert() { + int line; + + UConverter *cvrtr; // An ICU code page converter. + UErrorCode status = U_ZERO_ERROR; + + + cvrtr = ucnv_open("utf-8", &status); // we are just doing UTF-8 locales for now. + if (U_FAILURE(status)) { + fprintf(stderr, "ICU Converter open failed.: %d\n", &status); + exit(-1); + } + + for (line=0; line < gNumFileLines; line++) { + int sizeNeeded = ucnv_fromUChars(cvrtr, + 0, // ptr to target buffer. + 0, // length of target buffer. + gFileLines[line].name, + -1, // source is null terminated + &status); + if (status != U_BUFFER_OVERFLOW_ERROR && status != U_ZERO_ERROR) { + fprintf(stderr, "Conversion from Unicode, something is wrong.\n"); + exit(-1); + } + status = U_ZERO_ERROR; + gFileLines[line].unixName = new char[sizeNeeded+1]; + sizeNeeded = ucnv_fromUChars(cvrtr, + gFileLines[line].unixName, // ptr to target buffer. + sizeNeeded+1, // length of target buffer. + gFileLines[line].name, + -1, // source is null terminated + &status); + if (U_FAILURE(status)) { + fprintf(stderr, "ICU Conversion Failed.: %d\n", status); + exit(-1); + } + gFileLines[line].unixName[sizeNeeded] = 0; + }; + ucnv_close(cvrtr); +} + + +//---------------------------------------------------------------------------------------- +// +// class UCharFile Class to hide all the gorp to read a file in +// and produce a stream of UChars. +// +//---------------------------------------------------------------------------------------- +class UCharFile { +public: + UCharFile(const char *fileName); + ~UCharFile(); + UChar get(); + UBool eof() {return fEof;}; + UBool error() {return fError;}; + +private: + UCharFile (const UCharFile &other) {}; // No copy constructor. + UCharFile & operator = (const UCharFile &other) {return *this;}; // No assignment op + + FILE *fFile; + const char *fName; + UBool fEof; + UBool fError; + UChar fPending2ndSurrogate; + + enum {UTF16LE, UTF16BE, UTF8} fEncoding; +}; + +UCharFile::UCharFile(const char * fileName) { + fEof = FALSE; + fError = FALSE; + fName = fileName; + fFile = fopen(fName, "rb"); + fPending2ndSurrogate = 0; + if (fFile == NULL) { + fprintf(stderr, "Can not open file \"%s\"\n", opt_fName); + fError = TRUE; + return; + } + // + // Look for the byte order mark at the start of the file. + // + int BOMC1, BOMC2, BOMC3; + BOMC1 = fgetc(fFile); + BOMC2 = fgetc(fFile); + + if (BOMC1 == 0xff && BOMC2 == 0xfe) { + fEncoding = UTF16LE; } + else if (BOMC1 == 0xfe && BOMC2 == 0xff) { + fEncoding = UTF16BE; } + else if (BOMC1 == 0xEF && BOMC2 == 0xBB && (BOMC3 = fgetc(fFile)) == 0xBF ) { + fEncoding = UTF8; } + else + { + fprintf(stderr, "collperf: file \"%s\" encoding must be UTF-8 or UTF-16, and " + "must include a BOM.\n", fileName); + fError = true; + return; + } +} + + +UCharFile::~UCharFile() { + fclose(fFile); +} + + + +UChar UCharFile::get() { + UChar c; + switch (fEncoding) { + case UTF16LE: + { + int cL, cH; + cL = fgetc(fFile); + cH = fgetc(fFile); + c = cL | (cH << 8); + if (cH == EOF) { + c = 0; + fEof = TRUE; + } + break; + } + case UTF16BE: + { + int cL, cH; + cH = fgetc(fFile); + cL = fgetc(fFile); + c = cL | (cH << 8); + if (cL == EOF) { + c = 0; + fEof = TRUE; + } + break; + } + case UTF8: + { + if (fPending2ndSurrogate != 0) { + c = fPending2ndSurrogate; + fPending2ndSurrogate = 0; + break; + } + + int ch = fgetc(fFile); // Note: c and ch are separate cause eof test doesn't work on UChar type. + if (ch == EOF) { + c = 0; + fEof = TRUE; + break; + } + + if (ch <= 0x7f) { + // It's ascii. No further utf-8 conversion. + c = ch; + break; + } + + // Figure out the lenght of the char and read the rest of the bytes + // into a temp array. + int nBytes; + if (ch >= 0xF0) {nBytes=4;} + else if (ch >= 0xE0) {nBytes=3;} + else if (ch >= 0xC0) {nBytes=2;} + else { + fprintf(stderr, "utf-8 encoded file contains corrupt data.\n"); + fError = TRUE; + return 0; + } + + unsigned char bytes[10]; + bytes[0] = (unsigned char)ch; + int i; + for (i=1; i= 0xc0) { + fprintf(stderr, "utf-8 encoded file contains corrupt data.\n"); + fError = TRUE; + return 0; + } + } + + // Convert the bytes from the temp array to a Unicode char. + i = 0; + uint32_t cp; + UTF8_NEXT_CHAR_UNSAFE(bytes, i, cp); + c = (UChar)cp; + + if (cp >= 0x10000) { + // The code point needs to be broken up into a utf-16 surrogate pair. + // Process first half this time through the main loop, and + // remember the other half for the next time through. + UChar utf16Buf[3]; + i = 0; + UTF16_APPEND_CHAR_UNSAFE(utf16Buf, i, cp); + fPending2ndSurrogate = utf16Buf[1]; + c = utf16Buf[0]; + } + break; + }; + } + return c; +} + +//---------------------------------------------------------------------------------------- +// +// openRulesCollator - Command line specified a rules file. Read it in +// and open a collator with it. +// +//---------------------------------------------------------------------------------------- +UCollator *openRulesCollator() { + UCharFile f(opt_rules); + if (f.error()) { + return 0; + } + + int bufLen = 10000; + UChar *buf = (UChar *)malloc(bufLen * sizeof(UChar)); + int i = 0; + + for(;;) { + buf[i] = f.get(); + if (f.eof()) { + break; + } + if (f.error()) { + return 0; + } + i++; + if (i >= bufLen) { + bufLen += 10000; + buf = (UChar *)realloc(buf, bufLen); + } + } + buf[i] = 0; + + UErrorCode status = U_ZERO_ERROR; + UCollator *coll = ucol_openRules(buf, u_strlen(buf), UCOL_OFF, + UCOL_DEFAULT_STRENGTH, NULL, &status); + if (U_FAILURE(status)) { + fprintf(stderr, "ICU ucol_openRules() open failed.: %d\n", status); + return 0; + } + free(buf); + return coll; +} + + + + + +//---------------------------------------------------------------------------------------- +// +// Main -- process command line, read in and pre-process the test file, +// call other functions to do the actual tests. +// +//---------------------------------------------------------------------------------------- +int main(int argc, const char** argv) { + if (ProcessOptions(argc, argv, opts) != TRUE || opt_help || opt_fName == 0) { + printf(gUsageString); + exit (1); + } + + // Make sure that we've only got one API selected. + if (opt_unix || opt_win) opt_icu = FALSE; + if (opt_unix) opt_win = FALSE; + + // + // Set up an ICU collator + // + UErrorCode status = U_ZERO_ERROR; + + if (opt_rules != 0) { + gCol = openRulesCollator(); + if (gCol == 0) {return -1;} + } + else { + gCol = ucol_open(opt_locale, &status); + if (U_FAILURE(status)) { + fprintf(stderr, "Collator creation failed.: %d\n", status); + return -1; + } + } + if (status==U_USING_DEFAULT_WARNING && opt_terse==FALSE) { + fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", opt_locale); + } + if (status==U_USING_FALLBACK_WARNING && opt_terse==FALSE) { + fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", opt_locale); + } + + if (opt_norm) { + ucol_setAttribute(gCol, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); + } + if (opt_french && opt_frenchoff) { + fprintf(stderr, "collperf: Error, specified both -french and -frenchoff options."); + exit(-1); + } + if (opt_french) { + ucol_setAttribute(gCol, UCOL_FRENCH_COLLATION, UCOL_ON, &status); + } + if (opt_frenchoff) { + ucol_setAttribute(gCol, UCOL_FRENCH_COLLATION, UCOL_OFF, &status); + } + if (opt_lower) { + ucol_setAttribute(gCol, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, &status); + } + if (opt_upper) { + ucol_setAttribute(gCol, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, &status); + } + if (opt_case) { + ucol_setAttribute(gCol, UCOL_CASE_LEVEL, UCOL_ON, &status); + } + if (opt_shifted) { + ucol_setAttribute(gCol, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status); + } + if (opt_level != 0) { + switch (opt_level) { + case 1: + ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_PRIMARY, &status); + break; + case 2: + ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_SECONDARY, &status); + break; + case 3: + ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_TERTIARY, &status); + break; + case 4: + ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_QUATERNARY, &status); + break; + case 5: + ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_IDENTICAL, &status); + break; + default: + fprintf(stderr, "-level param must be between 1 and 5\n"); + exit(-1); + } + } + + if (U_FAILURE(status)) { + fprintf(stderr, "Collator attribute setting failed.: %d\n", status); + return -1; + } + + + // + // Set up a Windows LCID + // + if (opt_langid != 0) { + gWinLCID = MAKELCID(opt_langid, SORT_DEFAULT); + } + else { + gWinLCID = uloc_getLCID(opt_locale); + } + + + // + // Set the UNIX locale + // + if (opt_unix) { + if (setlocale(LC_ALL, opt_locale) == 0) { + fprintf(stderr, "setlocale(LC_ALL, %s) failed.\n", opt_locale); + exit(-1); + } + } + + // Read in the input file. + // File assumed to be utf-16. + // Lines go onto heap buffers. Global index array to line starts is created. + // Lines themselves are null terminated. + // + + UCharFile f(opt_fName); + if (f.error()) { + exit(-1); + } + + const int MAXLINES = 40000; + gFileLines = new Line[MAXLINES]; + UChar buf[1024]; + int column = 0; + + // Read the file, split into lines, and save in memory. + // Loop runs once per utf-16 value from the input file, + // (The number of bytes read from file per loop iteration depends on external encoding.) + for (;;) { + + UChar c = f.get(); + if (f.error()){ + exit(-1); + } + + + // We now have a good UTF-16 value in c. + + // Watch for CR, LF, EOF; these finish off a line. + if (c == 0xd) { + continue; + } + + if (f.eof() || c == 0x0a || c==0x2028) { // Unipad inserts 2028 line separators! + buf[column++] = 0; + if (column > 1) { + gFileLines[gNumFileLines].name = new UChar[column]; + gFileLines[gNumFileLines].len = column-1; + memcpy(gFileLines[gNumFileLines].name, buf, column * sizeof(UChar)); + gNumFileLines++; + column = 0; + if (gNumFileLines >= MAXLINES) { + fprintf(stderr, "File too big. Max number of lines is %d\n", MAXLINES); + exit(-1); + } + + } + if (c == 0xa || c == 0x2028) + continue; + else + break; // EOF + } + buf[column++] = c; + if (column >= 1023) + { + static UBool warnFlag = TRUE; + if (warnFlag) { + fprintf(stderr, "Warning - file line longer than 1023 chars truncated.\n"); + warnFlag = FALSE; + } + column--; + } + } + + if (opt_terse == FALSE) { + printf("file \"%s\", %d lines.\n", opt_fName, gNumFileLines); + } + + + // Convert the lines to the UNIX encoding. + if (opt_unix) { + UnixConvert(); + } + + // + // Pre-compute ICU sort keys for the lines of the file. + // + int line; + int t; + + for (line=0; line sizeof(buf)) { + t = ucol_getSortKey(gCol, gFileLines[line].name, -1, (unsigned char *)gFileLines[line].icuSortKey , t); + } + else + { + memcpy(gFileLines[line].icuSortKey, buf, t); + } + } + + + + // + // Pre-compute Windows sort keys for the lines of the file. + // + for (line=0; line sizeof(buf)) { + t = LCMapStringW(gWinLCID, LCMAP_SORTKEY, gFileLines[line].name, -1, (unsigned short *)(gFileLines[line].winSortKey), t); + } + else + { + memcpy(gFileLines[line].winSortKey, buf, t); + } + } + + // + // Pre-compute UNIX sort keys for the lines of the file. + // + if (opt_unix) { + for (line=0; line sizeof(buf)) { + t = strxfrm(gFileLines[line].unixSortKey, gFileLines[line].unixName, sizeof(buf)); + } + else + { + memcpy(gFileLines[line].unixSortKey, buf, t); + } + } + } + + + // + // Dump file lines, CEs, Sort Keys if requested. + // + if (opt_dump) { + int i; + for (line=0; line 0x7e) { + printf("\\u%.4x", c); + } + else { + printf("%c", c); + } + } + printf("\n"); + + printf(" CEs: "); + UCollationElements *CEiter = ucol_openElements(gCol, gFileLines[line].name, -1, &status); + int32_t ce; + i = 0; + for (;;) { + ce = ucol_next(CEiter, &status); + if (ce == UCOL_NULLORDER) { + break; + } + printf(" %.8x", ce); + if (++i > 8) { + printf("\n "); + i = 0; + } + } + printf("\n"); + ucol_closeElements(CEiter); + + + printf(" ICU Sort Key: "); + for (i=0; ; i++) { + unsigned char c = gFileLines[line].icuSortKey[i]; + printf("%02x ", c); + if (c == 0) { + break; + } + if (i > 0 && i % 20 == 0) { + printf("\n "); + } + } + printf("\n"); + } + } + + + // + // Pre-sort the lines. + // + int i; + gSortedLines = new Line *[gNumFileLines]; + for (i=0; i +# Microsoft Developer Studio Generated Build File, Format Version 6.00 +# ** DO NOT EDIT ** + +# TARGTYPE "Win32 (x86) Console Application" 0x0103 + +CFG=collperf - Win32 Debug +!MESSAGE This is not a valid makefile. To build this project using NMAKE, +!MESSAGE use the Export Makefile command and run +!MESSAGE +!MESSAGE NMAKE /f "collperf.mak". +!MESSAGE +!MESSAGE You can specify a configuration when running NMAKE +!MESSAGE by defining the macro CFG on the command line. For example: +!MESSAGE +!MESSAGE NMAKE /f "collperf.mak" CFG="collperf - Win32 Debug" +!MESSAGE +!MESSAGE Possible choices for configuration are: +!MESSAGE +!MESSAGE "collperf - Win32 Release" (based on "Win32 (x86) Console Application") +!MESSAGE "collperf - Win32 Debug" (based on "Win32 (x86) Console Application") +!MESSAGE "collperf - Win64 Release" (based on "Win32 (x86) Console Application") +!MESSAGE "collperf - Win64 Debug" (based on "Win32 (x86) Console Application") +!MESSAGE + +# Begin Project +# PROP AllowPerConfigDependencies 0 +# PROP Scc_ProjName "" +# PROP Scc_LocalPath "" +CPP=cl.exe +RSC=rc.exe + +!IF "$(CFG)" == "collperf - Win32 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "Release" +# PROP BASE Intermediate_Dir "Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Release" +# PROP Intermediate_Dir "Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +MTL=midl.exe +# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c +# ADD CPP /nologo /G6 /MD /W3 /GX /O2 /Ob2 /I "..\..\..\include" /I "..\..\tools\ctestfw" /I "..\..\common" /I "..\..\i18n" /I "..\..\tools\toolutil" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c +# SUBTRACT CPP /YX +# ADD BASE RSC /l 0x409 /d "NDEBUG" +# ADD RSC /l 0x409 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +# ADD LINK32 icuuc.lib icuin.lib ctestfw.lib icutu.lib kernel32.lib user32.lib advapi32.lib shell32.lib winmm.lib /nologo /subsystem:console /machine:I386 /libpath:"..\..\..\lib\\" + +!ELSEIF "$(CFG)" == "collperf - Win32 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "Debug" +# PROP BASE Intermediate_Dir "Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Debug" +# PROP Intermediate_Dir "Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +MTL=midl.exe +# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c +# ADD CPP /nologo /G6 /MDd /W3 /Gm /GX /ZI /Od /I "..\..\..\include" /I "..\..\tools\ctestfw" /I "..\..\common" /I "..\..\i18n" /I "..\..\tools\toolutil" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FR /FD /GZ /c +# ADD BASE RSC /l 0x409 /d "_DEBUG" +# ADD RSC /l 0x409 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept +# ADD LINK32 icuucd.lib icuind.lib icutud.lib winmm.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"..\..\..\lib\\" + +!ELSEIF "$(CFG)" == "collperf - Win64 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "Release" +# PROP BASE Intermediate_Dir "Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Release" +# PROP Intermediate_Dir "Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +MTL=midl.exe +# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c +# ADD CPP /nologo /MD /W3 /GX /Zi /O2 /Op /I "..\..\..\include" /I "..\..\tools\ctestfw" /I "..\..\common" /I "..\..\i18n" /I "..\..\tools\toolutil" /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /Wp64 /Zm600 /c +# SUBTRACT CPP /YX +# ADD BASE RSC /l 0x409 /d "NDEBUG" +# ADD RSC /l 0x409 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:IX86 /machine:IA64 +# ADD LINK32 icuuc.lib icuin.lib ctestfw.lib icutu.lib kernel32.lib user32.lib advapi32.lib shell32.lib winmm.lib /nologo /subsystem:console /machine:IX86 /libpath:"..\..\..\lib\\" /machine:IA64 + +!ELSEIF "$(CFG)" == "collperf - Win64 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "Debug" +# PROP BASE Intermediate_Dir "Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Debug" +# PROP Intermediate_Dir "Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +MTL=midl.exe +# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c +# ADD CPP /nologo /MDd /W3 /Gm /GX /Zi /Od /Op /I "..\..\..\include" /I "..\..\tools\ctestfw" /I "..\..\common" /I "..\..\i18n" /I "..\..\tools\toolutil" /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FR /FD /GZ /Wp64 /Zm600 /c +# ADD BASE RSC /l 0x409 /d "_DEBUG" +# ADD RSC /l 0x409 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:IX86 /pdbtype:sept /machine:IA64 +# ADD LINK32 icuucd.lib icuind.lib icutud.lib winmm.lib /nologo /subsystem:console /incremental:no /debug /machine:IX86 /pdbtype:sept /libpath:"..\..\..\lib\\" /machine:IA64 + +!ENDIF + +# Begin Target + +# Name "collperf - Win32 Release" +# Name "collperf - Win32 Debug" +# Name "collperf - Win64 Release" +# Name "collperf - Win64 Debug" +# Begin Group "Source Files" + +# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" +# Begin Source File + +SOURCE=.\collperf.cpp +# End Source File +# End Group +# Begin Group "Header Files" + +# PROP Default_Filter "h;hpp;hxx;hm;inl" +# End Group +# Begin Group "Resource Files" + +# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe" +# End Group +# End Target +# End Project diff --git a/icuSources/test/collperf/readme.html b/icuSources/test/collperf/readme.html new file mode 100644 index 00000000..40940d10 --- /dev/null +++ b/icuSources/test/collperf/readme.html @@ -0,0 +1,84 @@ + + + + + + + + collperf + + + + +

collperf

+

collperf is a test program for comparing collation performance and key lengths of ICU, Windows native collation +and Unix/POSIX collation. It operates on a file of lines (names, for example), and performs one of three tests:

+ +
    +
  1. Sort Key generation. Report on key lengths and key generation times. +
  2. Binary search. Report the average time required to look up each of the names (file lines) from the file in + a sorted list of all of the names. +
  3. Quick Sort. Report the time required to sort the file in memory, using the C library qsort function. The file + order is randomized prior to the sort. +
+ +

+

Usage Summary

+ +
+

+ + + + +
+

collperf -help +

+
Usage: strperf options...
+-help                  Display this message.
+-file file_name        utf-16 format file of names
+-locale name           ICU locale to use. Default is en_US
+-langid 0x1234         Windows Language ID number. Default 0x409 (en_US)
+                       see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm
+-win                   Run test using Windows native services. (ICU is default)
+-unix                  Run test using Unix strxfrm, strcoll services.
+-uselen                Use API with string lengths. Default is null-terminated strings
+-usekeys               Run tests using sortkeys rather than strcoll
+-loop nnnn             Loopcount for test. Adjust for reasonable total running time.
+-terse                 Terse numbers-only output. Intended for use by scripts.
+-french                French accent ordering
+-norm                  Normalizing mode on
+-shifted               Shifted mode
+-lower                 Lower case first
+-upper                 Upper case first
+-case                  Enable separate case level
+-level n               Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical
+-binsearch             Binary Search timing test
+-keygen                Sort Key Generation timing test
+-qsort                 Quicksort timing test
+
+
+

+
+ +

Example

+ +
+

+ + + + +
C:\>collperf -loop 200 -file latin.txt -keygen -shifted -level 4
+ file "latin.txt", 7604 lines.
+ Sort Key Generation: total # of keys = 197704
+ Sort Key Generation: time per key = 4253 ns
+ Key Length / character = 1.730054
+ +

+ +

+ + + + \ No newline at end of file diff --git a/icuSources/test/perf/all/all.dsp b/icuSources/test/perf/all/all.dsp new file mode 100644 index 00000000..83d50f67 --- /dev/null +++ b/icuSources/test/perf/all/all.dsp @@ -0,0 +1,63 @@ +# Microsoft Developer Studio Project File - Name="all" - Package Owner=<4> +# Microsoft Developer Studio Generated Build File, Format Version 6.00 +# ** DO NOT EDIT ** + +# TARGTYPE "Win32 (x86) Generic Project" 0x010a + +CFG=all - Win32 Debug +!MESSAGE This is not a valid makefile. To build this project using NMAKE, +!MESSAGE use the Export Makefile command and run +!MESSAGE +!MESSAGE NMAKE /f "all.mak". +!MESSAGE +!MESSAGE You can specify a configuration when running NMAKE +!MESSAGE by defining the macro CFG on the command line. For example: +!MESSAGE +!MESSAGE NMAKE /f "all.mak" CFG="all - Win32 Debug" +!MESSAGE +!MESSAGE Possible choices for configuration are: +!MESSAGE +!MESSAGE "all - Win32 Release" (based on "Win32 (x86) Generic Project") +!MESSAGE "all - Win32 Debug" (based on "Win32 (x86) Generic Project") +!MESSAGE + +# Begin Project +# PROP AllowPerConfigDependencies 0 +# PROP Scc_ProjName "" +# PROP Scc_LocalPath "" +MTL=midl.exe + +!IF "$(CFG)" == "all - Win32 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "Release" +# PROP BASE Intermediate_Dir "Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Release" +# PROP Intermediate_Dir "Release" +# PROP Target_Dir "" + +!ELSEIF "$(CFG)" == "all - Win32 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "Debug" +# PROP BASE Intermediate_Dir "Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Debug" +# PROP Intermediate_Dir "Debug" +# PROP Target_Dir "" + +!ENDIF + +# Begin Target + +# Name "all - Win32 Release" +# Name "all - Win32 Debug" +# End Target +# End Project diff --git a/icuSources/test/unalignedtest/Makefile.in b/icuSources/test/unalignedtest/Makefile.in new file mode 100644 index 00000000..d4fa022e --- /dev/null +++ b/icuSources/test/unalignedtest/Makefile.in @@ -0,0 +1,83 @@ +## Makefile.in for ICU - test/unalignedtest +## Copyright (c) 2001, International Business Machines Corporation and +## others. All Rights Reserved. + +## Source directory information +srcdir = @srcdir@ +top_srcdir = @top_srcdir@ + +top_builddir = ../.. + +include $(top_builddir)/icudefs.mk + +## Build directory information +subdir = test/unalignedtest + +## Extra files to remove for 'make clean' +CLEANFILES = *~ $(DEPS) + +## Target information +TARGET = unalignedtest + +DEFS += -I$(top_builddir)/common -I$(top_srcdir)/common +LIBS = $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M) + +OBJECTS = unaligned.o + +DEPS = $(OBJECTS:.o=.d) + +## List of phony targets +.PHONY : all all-local install install-local clean clean-local \ +distclean distclean-local dist dist-local check check-local + +## Clear suffix list +.SUFFIXES : + +## List of standard targets +all: all-local +install: install-local +clean: clean-local +distclean : distclean-local +dist: dist-local +check: all check-local + +all-local: $(TARGET) + +install-local: + +dist-local: + +clean-local: + test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES) + $(RMV) $(OBJECTS) $(TARGET) + +distclean-local: clean-local + $(RMV) Makefile + +check-local: all-local check-cintltst check-intltest + +check-cintltst: + - $(INVOKE) ./$(TARGET) $(top_srcdir)/test/cintltst/cintltst -a + +check-intltest: + - $(INVOKE) ./$(TARGET) $(top_srcdir)/test/intltest/intltest -a + +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + cd $(top_builddir) \ + && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status + +$(TARGET) : $(OBJECTS) + $(LINK.cc) -o $@ $^ + +invoke: + ICU_DATA=$${ICU_DATA:-$(top_builddir)/data/} TZ=PST8PDT $(INVOKE) $(INVOCATION) + +ifeq (,$(MAKECMDGOALS)) +-include $(DEPS) +else +ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),) +ifneq ($(patsubst %install,,$(MAKECMDGOALS)),) +-include $(DEPS) +endif +endif +endif diff --git a/icuSources/test/unalignedtest/readme b/icuSources/test/unalignedtest/readme new file mode 100644 index 00000000..acf10a1c --- /dev/null +++ b/icuSources/test/unalignedtest/readme @@ -0,0 +1,27 @@ +Copyright (c) 2002-2003, International Business Machines Corporation and others. All Rights Reserved. +Unalignedtest +============= + +This is a test to find if ICU is 64 bit clean. This test runs cintltst and intltest through gdb and produces SIGBUS fault +whenever the kernel encounters a unaligned trap. + +Build and Usage +================== +To build this test: + +i) Build and test ICU + +ii) cd to /source and run the following command to build the Makefile +CONFIG_FILES=./test/unalignedtest/Makefile CONFIG_HEADERS= ./config.status + +iii) cd to /source/test/unalignedtest and run 'make' to build the executable + +iv) For testing cintltst run 'make check-cintltst' + +v) For testing intltest run 'make check-intltest' + +vi) To find out if any of the tools are performing unaligned traps: + a) cd to /source/data + b) run 'make clean' + c) run 'env LEAK_CHECKER="/source/test/unalignedtest/unalignedtest -b" make' + diff --git a/icuSources/test/unalignedtest/unaligned.c b/icuSources/test/unalignedtest/unaligned.c new file mode 100644 index 00000000..69137a30 --- /dev/null +++ b/icuSources/test/unalignedtest/unaligned.c @@ -0,0 +1,304 @@ +/* + + This program is a wrapper to assist in debugging analigned traps on the Alpha + + architectures. + + + + COPYRIGHT AND PERMISSION NOTICE + + + + Copyright (c) 2002 Sean Hunter + + + + Permission is hereby granted, free of charge, to any person obtaining a + + copy of this software and associated documentation files (the + + "Software"), to deal in the Software without restriction, including + + without limitation the rights to use, copy, modify, merge, publish, + + distribute, and/or sell copies of the Software, and to permit persons + + to whom the Software is furnished to do so, provided that the above + + copyright notice(s) and this permission notice appear in all copies of + + the Software and that both the above copyright notice(s) and this + + permission notice appear in supporting documentation. + + + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT + + OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + + HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL + + INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING + + FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, + + NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION + + WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + + + Except as contained in this notice, the name of a copyright holder + + shall not be used in advertising or otherwise to promote the sale, use + + or other dealings in this Software without prior written authorization + + of the copyright holder. + + + + -------------------------------------------------------------------------------- + + All trademarks and registered trademarks mentioned herein are the property + + of their respective owners. + + + +*/ + +#include + +#include + + + +#include + +#include + + + +#define TMP_PATH_MAX 1024 + + + + + +static int + +setsysinfo(unsigned long op, void *buffer, unsigned long size, + + int *start, void *arg, unsigned long flag) + +{ + + syscall(__NR_osf_setsysinfo, op, buffer, size, start, arg, flag); + +} + + + + + +void + +trap_unaligned(void) + +{ + + unsigned int buf[2]; + + buf[0] = SSIN_UACPROC; + + buf[1] = UAC_SIGBUS | UAC_NOPRINT; + + setsysinfo(SSI_NVPAIRS, buf, 1, 0, 0, 0); + +} + + + + + +static void + +usage(void) + +{ + + fprintf(stderr, + + "usage: unaligned [-b] [command-args...]\n\n" + + " This program is designed to assist debugging of\n" + + " unaligned traps by running the program in gdb\n" + + " and causing it to get SIGBUS when it encounters\n" + + " an unaligned trap.\n\n" + + " It is free software written by Sean Hunter \n" + + " based on code by Richard Henderson and Andrew Morgan.\n\n" + + ); + + + + exit(1); + +} + + + + + +int + +main(int argc, char **argv) + +{ + + const char my_debugger[] = "/usr/bin/gdb"; + + + + char *temp_str; + + char *curr; + + int size = 0; + + int curr_arg; + + int isBatchMode = 0; + + + + /* check that we have at least 1 argument */ + + if (argc < 2) { + + usage(); + + } + + if( strcmp("-b" , argv[1]) == 0 ){ + + isBatchMode = 1; + + curr_arg = 2; + + }else{ + + curr_arg = 1; + + } + + + + trap_unaligned(); + + + + if (argc > 2) { + + /* We're going to use bash process redirection to create a "file" for gdb to read + + * containing the arguments we need */ + + size = 2048; + + for(; curr_arg < argc; curr_arg++) { + + size += strlen(argv[curr_arg]); + + } + + temp_str = (char *) malloc(sizeof(char) * size); + + if (!temp_str) { + + fprintf(stderr, "Unable to malloc memory for string use: %s\n", strerror(errno)); + + exit(255); + + } + + if(isBatchMode==1){ + + sprintf(temp_str, "%s -batch %s -x <( echo file %s; echo set args", my_debugger, argv[2], argv[2]); + + }else{ + + sprintf(temp_str, "%s %s -x <( echo file %s; echo set args", my_debugger, argv[1], argv[1]); + + } + + curr = temp_str + strlen(temp_str); + + for(curr_arg = 2; curr_arg < argc; curr_arg++) { + + sprintf(curr, " %s", argv[curr_arg]); + + curr = temp_str + strlen(temp_str); + + } + +#ifndef NOAUTORUN + + curr = temp_str + strlen(temp_str); + + sprintf(curr, "; echo run"); + +#endif + + curr = temp_str + strlen(temp_str); + + sprintf(curr, ")"); + + + + execlp("/bin/bash", "/bin/bash", "-c", temp_str, NULL); + + + + } + + else { + + execlp(my_debugger, my_debugger, argv[1], NULL); + + } + + + + /* if we fall through to here, our exec failed -- announce the fact */ + + fprintf(stderr, "Unable to execute command: %s\n", strerror(errno)); + + + + usage(); + + + +} + + + +/* use gcc unaligned.c -o unaliged to compile. Add -DNOAUTORUN if you + +don't want gdb to automatically run the program */ + + + diff --git a/icuSources/test/usetperf/bitset.cpp b/icuSources/test/usetperf/bitset.cpp new file mode 100644 index 00000000..d1356c0e --- /dev/null +++ b/icuSources/test/usetperf/bitset.cpp @@ -0,0 +1,63 @@ +/* +********************************************************************** +* Copyright (c) 2002-2004, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* 2002-09-20 aliu Created. +*/ + +#include "unicode/utypes.h" +#include "cmemory.h" +#include "bitset.h" + +// TODO: have a separate capacity, so the len can just be set to +// zero in the clearAll() method, and growth can be smarter. + +const int32_t SLOP = 8; + +const int32_t BYTES_PER_WORD = sizeof(int32_t); + +BitSet::BitSet() { + len = SLOP; + data = (int32_t*) uprv_malloc(len * BYTES_PER_WORD); + clearAll(); +} + +BitSet::~BitSet() { + uprv_free(data); +} + +UBool BitSet::get(int32_t bitIndex) const { + uint32_t longIndex = bitIndex >> 5; + int32_t bitInLong = bitIndex & 0x1F; + return (longIndex < len) ? (((data[longIndex] >> bitInLong) & 1) != 0) + : FALSE; +} + +void BitSet::set(int32_t bitIndex) { + uint32_t longIndex = bitIndex >> 5; + int32_t bitInLong = bitIndex & 0x1F; + if (longIndex >= len) { + ensureCapacity(longIndex+1); + } + data[longIndex] |= (1 << bitInLong); +} + +void BitSet::clearAll() { + for (uint32_t i=0; i + +class Timer { + LARGE_INTEGER tstart, tend; +public: + Timer() {} + inline void start() { + QueryPerformanceCounter(&tstart); + } + inline double stop() { + QueryPerformanceCounter(&tend); + LARGE_INTEGER freq; + int result = QueryPerformanceFrequency(&freq); + return ((double)(tend.QuadPart - tstart.QuadPart))/((double)freq.QuadPart); + } +}; + +//---------------------------------------------------------------------- +// UNIX + +#else + +#include + +class Timer { + struct timeval tstart, tend; + struct timezone tz; +public: + Timer() {} + inline void start() { + gettimeofday(&tstart, &tz); + } + inline double stop() { + gettimeofday(&tend, &tz); + double t1, t2; + t1 = (double)tstart.tv_sec + (double)tstart.tv_usec*1e-6; + t2 = (double)tend.tv_sec + (double)tend.tv_usec*1e-6; + return t2-t1; + } +}; + +#endif +#endif diff --git a/icuSources/test/usetperf/usetperf.cpp b/icuSources/test/usetperf/usetperf.cpp new file mode 100644 index 00000000..cdff2adb --- /dev/null +++ b/icuSources/test/usetperf/usetperf.cpp @@ -0,0 +1,122 @@ +/* +********************************************************************** +* Copyright (c) 2002-2004, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* 2002-09-20 aliu Created. +*/ + +#include + +#include "unicode/utypes.h" +#include "unicode/uniset.h" +#include "unicode/uchar.h" +#include "unicode/usetiter.h" +#include "bitset.h" +#include "timer.h" + +#define LENGTH(a) (sizeof(a)/sizeof(a[0])) + +int main(int argc, const char *argv[]) { + + Timer timer; + BitSet bs; + UnicodeSet us; + int32_t i, j, n, temp; + UChar32 cp; + double t; + + int32_t PROPS[] = { + // category iterations for add, contains, iterator + U_TITLECASE_LETTER, 100, 100, 20000000, + U_UNASSIGNED, 30, 100, 20000000, + }; + + for (j=0; j %f ms/loop\n", t, t*1e3/n); + + // contains() + n = PROPS[j+2]; + printf("Testing contains() x %d...", n); + temp = 0; + timer.start(); + for (i=0; i %f ms/loop\n", t, t*1e3/n); + + // iterator + n = PROPS[j+3]; + printf("Testing iterator x %d...", n); + temp = 0; + timer.start(); + for (i=0; i %f ns/loop\n", t, t*1e9/n); + } + + char* PAT[] = { + "['A-Za-z\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6\\u00D9-\\u00DD\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6\\u00F9-\\u00FD\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130\\u0134-\\u0137\\u0139-\\u013E\\u0143-\\u0148\\u014C-\\u0151\\u0154-\\u0165\\u0168-\\u017E\\u01A0-\\u01A1\\u01AF-\\u01B0\\u01CD-\\u01DC\\u01DE-\\u01E1\\u01E6-\\u01ED\\u01F0\\u01F4-\\u01F5\\u01F8-\\u01FB\\u0200-\\u021B\\u021E-\\u021F\\u0226-\\u0233\\u1E00-\\u1E99\\u1EA0-\\u1EF9\\u212A-\\u212B]", + + "['.0-9A-Za-z~\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6\\u00D9-\\u00DD\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6\\u00F9-\\u00FD\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130\\u0134-\\u0137\\u0139-\\u013E\\u0143-\\u0148\\u014C-\\u0151\\u0154-\\u0165\\u0168-\\u017E\\u01A0-\\u01A1\\u01AF-\\u01B0\\u01CD-\\u01DC\\u01DE-\\u01E3\\u01E6-\\u01ED\\u01F0\\u01F4-\\u01F5\\u01F8-\\u021B\\u021E-\\u021F\\u0226-\\u0233\\u0301\\u0303-\\u0304\\u0306-\\u0307\\u0310\\u0314-\\u0315\\u0323\\u0325\\u0331\\u0341\\u0344\\u0385-\\u0386\\u0388-\\u038A\\u038C\\u038E-\\u0390\\u03AC-\\u03B0\\u03CC-\\u03CE\\u03D3\\u0403\\u040C\\u040E\\u0419\\u0439\\u0453\\u045C\\u045E\\u04C1-\\u04C2\\u04D0-\\u04D1\\u04D6-\\u04D7\\u04E2-\\u04E3\\u04EE-\\u04EF\\u1E00-\\u1E99\\u1EA0-\\u1EF9\\u1F01\\u1F03-\\u1F05\\u1F07\\u1F09\\u1F0B-\\u1F0D\\u1F0F\\u1F11\\u1F13-\\u1F15\\u1F19\\u1F1B-\\u1F1D\\u1F21\\u1F23-\\u1F25\\u1F27\\u1F29\\u1F2B-\\u1F2D\\u1F2F\\u1F31\\u1F33-\\u1F35\\u1F37\\u1F39\\u1F3B-\\u1F3D\\u1F3F\\u1F41\\u1F43-\\u1F45\\u1F49\\u1F4B-\\u1F4D\\u1F51\\u1F53-\\u1F55\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F\\u1F61\\u1F63-\\u1F65\\u1F67\\u1F69\\u1F6B-\\u1F6D\\u1F6F\\u1F71\\u1F73\\u1F75\\u1F77\\u1F79\\u1F7B\\u1F7D\\u1F81\\u1F83-\\u1F85\\u1F87\\u1F89\\u1F8B-\\u1F8D\\u1F8F\\u1F91\\u1F93-\\u1F95\\u1F97\\u1F99\\u1F9B-\\u1F9D\\u1F9F\\u1FA1\\u1FA3-\\u1FA5\\u1FA7\\u1FA9\\u1FAB-\\u1FAD\\u1FAF-\\u1FB1\\u1FB4\\u1FB8-\\u1FB9\\u1FBB\\u1FC4\\u1FC9\\u1FCB\\u1FCE\\u1FD0-\\u1FD1\\u1FD3\\u1FD8-\\u1FD9\\u1FDB\\u1FDE\\u1FE0-\\u1FE1\\u1FE3\\u1FE5\\u1FE8-\\u1FE9\\u1FEB-\\u1FEC\\u1FEE\\u1FF4\\u1FF9\\u1FFB\\u212A-\\u212B\\uE04D\\uE064]", + + "[\\u0901-\\u0903\\u0905-\\u0939\\u093C-\\u094D\\u0950-\\u0954\\u0958-\\u096F]", + }; + + UErrorCode ec = U_ZERO_ERROR; + + n = 2000; + + for (j=0; j %f us/loop\n", t, t*1e6/n); + } + + return 0; +} diff --git a/icuSources/test/usetperf/usetperf.dsp b/icuSources/test/usetperf/usetperf.dsp new file mode 100644 index 00000000..3d012285 --- /dev/null +++ b/icuSources/test/usetperf/usetperf.dsp @@ -0,0 +1,164 @@ +# Microsoft Developer Studio Project File - Name="usetperf" - Package Owner=<4> +# Microsoft Developer Studio Generated Build File, Format Version 6.00 +# ** DO NOT EDIT ** + +# TARGTYPE "Win32 (x86) Console Application" 0x0103 + +CFG=usetperf - Win32 Debug +!MESSAGE This is not a valid makefile. To build this project using NMAKE, +!MESSAGE use the Export Makefile command and run +!MESSAGE +!MESSAGE NMAKE /f "usetperf.mak". +!MESSAGE +!MESSAGE You can specify a configuration when running NMAKE +!MESSAGE by defining the macro CFG on the command line. For example: +!MESSAGE +!MESSAGE NMAKE /f "usetperf.mak" CFG="usetperf - Win32 Debug" +!MESSAGE +!MESSAGE Possible choices for configuration are: +!MESSAGE +!MESSAGE "usetperf - Win32 Release" (based on "Win32 (x86) Console Application") +!MESSAGE "usetperf - Win32 Debug" (based on "Win32 (x86) Console Application") +!MESSAGE "usetperf - Win64 Release" (based on "Win32 (x86) Console Application") +!MESSAGE "usetperf - Win64 Debug" (based on "Win32 (x86) Console Application") +!MESSAGE + +# Begin Project +# PROP AllowPerConfigDependencies 0 +# PROP Scc_ProjName "" +# PROP Scc_LocalPath "" +CPP=cl.exe +RSC=rc.exe + +!IF "$(CFG)" == "usetperf - Win32 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "Release" +# PROP BASE Intermediate_Dir "Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Release" +# PROP Intermediate_Dir "Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +MTL=midl.exe +# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c +# ADD CPP /nologo /G6 /MD /W3 /GX /O2 /I "..\..\..\include" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c +# ADD BASE RSC /l 0x409 /d "NDEBUG" +# ADD RSC /l 0x409 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +# ADD LINK32 icuuc.lib kernel32.lib user32.lib gdi32.lib winmm.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 /libpath:"..\..\..\lib" + +!ELSEIF "$(CFG)" == "usetperf - Win32 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "Debug" +# PROP BASE Intermediate_Dir "Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Debug" +# PROP Intermediate_Dir "Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +MTL=midl.exe +# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c +# ADD CPP /nologo /G6 /MDd /W3 /Gm /GX /ZI /Od /I "..\..\..\include" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c +# ADD BASE RSC /l 0x409 /d "_DEBUG" +# ADD RSC /l 0x409 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept +# ADD LINK32 icuucd.lib winmm.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"..\..\..\lib" + +!ELSEIF "$(CFG)" == "usetperf - Win64 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "Release" +# PROP BASE Intermediate_Dir "Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Release" +# PROP Intermediate_Dir "Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +MTL=midl.exe +# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c +# ADD CPP /nologo /MD /W3 /GX /Zi /O2 /Op /I "..\..\..\include" /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /Wp64 /Zm600 /c +# ADD BASE RSC /l 0x409 /d "NDEBUG" +# ADD RSC /l 0x409 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:IX86 /machine:IA64 +# ADD LINK32 icuuc.lib kernel32.lib user32.lib gdi32.lib winmm.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:IX86 /libpath:"..\..\..\lib" /machine:IA64 + +!ELSEIF "$(CFG)" == "usetperf - Win64 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "Debug" +# PROP BASE Intermediate_Dir "Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Debug" +# PROP Intermediate_Dir "Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +MTL=midl.exe +# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c +# ADD CPP /nologo /MDd /W3 /Gm /GX /Zi /Od /Op /I "..\..\..\include" /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /GZ /Wp64 /Zm600 /c +# ADD BASE RSC /l 0x409 /d "_DEBUG" +# ADD RSC /l 0x409 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:IX86 /pdbtype:sept /machine:IA64 +# ADD LINK32 icuucd.lib winmm.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /incremental:no /debug /machine:IX86 /pdbtype:sept /libpath:"..\..\..\lib" /machine:IA64 + +!ENDIF + +# Begin Target + +# Name "usetperf - Win32 Release" +# Name "usetperf - Win32 Debug" +# Name "usetperf - Win64 Release" +# Name "usetperf - Win64 Debug" +# Begin Group "Source Files" + +# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" +# Begin Source File + +SOURCE=.\bitset.cpp +# ADD CPP /I "..\..\common" +# End Source File +# Begin Source File + +SOURCE=.\usetperf.cpp +# ADD CPP /I "..\..\common" +# End Source File +# End Group +# Begin Group "Header Files" + +# PROP Default_Filter "h;hpp;hxx;hm;inl" +# End Group +# Begin Group "Resource Files" + +# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe" +# End Group +# End Target +# End Project diff --git a/icuSources/test/utfperf/utfperf.c b/icuSources/test/utfperf/utfperf.c new file mode 100644 index 00000000..dfe7e3f8 --- /dev/null +++ b/icuSources/test/utfperf/utfperf.c @@ -0,0 +1,450 @@ +/* +********************************************************************** +* Copyright (C) 2002, International Business Machines +* Corporation and others. All Rights Reserved. +********************************************************************** +* file name: utfperf.c +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2002apr17 +* created by: Markus W. Scherer +* +* Performance test program for Unicode converters +* (converters that support all Unicode code points). +* Takes a UTF-8 file as input. +*/ + +#include +#include + +#include /* for _O_BINARY */ +#include /* for _setmode() */ + +#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) +# include +#else +# include + static unsigned long + timeGetTime() { + struct timeval t; + + gettimeofday(&t, 0); + return t.tv_sec*1000+t.tv_usec/1000; + }; +#endif + +#include "unicode/utypes.h" +#include "unicode/ucnv.h" +#include "unicode/ustring.h" + +/* definitions and text buffers */ + +#define INPUT_CAPACITY (1024*1024) +#define INTERMEDIATE_CAPACITY 4096 +#define INTERMEDIATE_SMALL_CAPACITY 20 +#define OUTPUT_CAPACITY INPUT_CAPACITY + +#define TARGET_MEASURE_TIME_MS 2000 + +#define PERCENT(a, b) (int)(((a)*200+1)/(2*(b))) + +#define ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0])) + +static UChar input[INPUT_CAPACITY], output[OUTPUT_CAPACITY]; +static char intermediate[INTERMEDIATE_CAPACITY]; + +static int32_t inputLength, encodedLength, outputLength, countInputCodePoints; + +static int32_t utf8Length=0; +static double utf8Time=0.; + +static const char *const +utfNames[]={ + "UTF-8", /* UTF-8 should always be first to serve as percentage reference */ + "SCSU", "BOCU-1" /*, "CESU-8" *//*, "UTF-16BE", "UTF-16LE"*//*, "GB18030"*/ +}; + +/* functions */ + +typedef void +RoundtripFn(UConverter *cnv, int32_t intermediateCapacity, UErrorCode *pErrorCode); + +static void +roundtrip(UConverter *cnv, int32_t intermediateCapacity, UErrorCode *pErrorCode) { + const UChar *pIn, *pInLimit; + UChar *pOut, *pOutLimit; + char *pInter, *pInterLimit, *p; + UBool flush; + + ucnv_reset(cnv); + + pIn=input; + pInLimit=input+inputLength; + + pOut=output; + pOutLimit=output+OUTPUT_CAPACITY; + + pInterLimit=intermediate+intermediateCapacity; + + encodedLength=outputLength=0; + flush=FALSE; + + while(pIn0); + _time=timeGetTime()-_time; + + if(U_FAILURE(errorCode)) { + fprintf(stderr, "error in roundtrip conversion (%s): %s\n", encName, u_errorName(errorCode)); + return 0x7fffffff; + } + + if(0!=u_memcmp(input, output, inputLength)) { + fprintf(stderr, "error: roundtrip failed, input[]!=output[]\n"); + return 0x7fffffff; + } + + return _time; +} + +static void +perEncAndCapacity(UConverter *cnv, const char *encName, int32_t intermediateCapacity) { + double rtTime; + unsigned long _time; + int32_t n; + + /*printf("test performance for %s with intermediate capacity %d\n", encName, intermediateCapacity);*/ + + /* warm up caches and estimate loop time */ + n=10; + for(;;) { + _time=measureRoundtrips(roundtrip, cnv, encName, intermediateCapacity, n); + if(_time<500 && _time0) { + /* convert the block */ + p=intermediate; + limit=p+length; + + ucnv_toUnicode(cnv, + &pOut, pOutLimit, + &p, limit, + NULL, FALSE, + &errorCode); + if(U_FAILURE(errorCode)) { + fprintf(stderr, "error converting input to UTF-16: %s\n", u_errorName(errorCode)); + ucnv_close(cnv); + return FALSE; + } + + /* read the next block */ + length=readBlock(in); + if(length<0) { + ucnv_close(cnv); + return FALSE; + } + } + + /* flush the converter */ + ucnv_toUnicode(cnv, + &pOut, pOutLimit, + &p, p, + NULL, TRUE, + &errorCode); + ucnv_close(cnv); + + if(U_FAILURE(errorCode)) { + fprintf(stderr, "error converting input to UTF-16: %s\n", u_errorName(errorCode)); + return FALSE; + } + + inputLength=(int32_t)(pOut-input); + countInputCodePoints=u_countChar32(input, inputLength); + if(inputLength<=0) { + fprintf(stderr, "warning: input is empty\n"); + return FALSE; + } + + return TRUE; +} + +static void +showUsage(const char *myName) { + fprintf(stderr, + "Usage:\n" + "%s [-e encoding-name] filename | '-'\n" + " encoding-name must be the name of an encoding supported by ICU\n" + " the filename of the input file with text to be used\n" + " can be a dash (-) for standard input\n", + myName); +} + +/* + * Read file using some encoding, convert to 1M UTF-16 input buffer. + * For each UTF to be tested: + * n times: + * convert from UTF-16 input buffer to UTF, 4kB buffer + * convert from 4kB buffer to 1M UTF-16 output buffer + * adjust n so that time elapsed is 10s (#define) + * ->divide 10s by time, increase n by that factor, run 2nd time + * n times: + * empty function + * subtract out loop/function overhead + * display #code points - #UTF bytes - time per roundtrip + * + * * do the same again with an intermediate buffer size of 20 instead of 4kB + * + * Test following UTFs: + * UTF-16BE, UTF-16LE, UTF-8, SCSU, BOCU-1, CESU-8 + * + * Command-line arguments: + * - encoding (default UTF-8, detect BOM) + * - filename (allow "-") + */ +extern int +main(int argc, const char *argv[]) { + FILE *in; + const char *myName, *encName, *filename, *basename; + + myName=argv[0]; + if(argc<2) { + showUsage(myName); + return 1; + } + + /* get encoding name argument */ + if(argv[1][0]=='-' && argv[1][1]=='e') { + encName=argv[1]+2; + --argc; + ++argv; + if(*encName==0) { + if(argc<2) { + showUsage(myName); + return 1; + } + encName=argv[1]; + --argc; + ++argv; + } + } else { + encName=NULL; + } + + /* get filename argument */ + if(argc<2) { + showUsage(myName); + return 1; + } + filename=argv[1]; + if(filename[0]=='-' && filename[1]==0) { + filename="(standard input)"; + in=stdin; + /* set stdin to binary mode */ + _setmode(_fileno(stdin), _O_BINARY); + } else { + in=fopen(filename, "rb"); + if(in==NULL) { + fprintf(stderr, "error opening \"%s\"\n", filename); + showUsage(myName); + return 2; + } + } + + /* read input */ + basename=strrchr(filename, U_FILE_SEP_CHAR); + if(basename!=NULL) { + ++basename; + } else { + basename=filename; + } + printf("# testing converter performance with file \"%s\"\n", basename); + if(!readInput(in, encName)) { + fprintf(stderr, "error reading \"%s\" (encoding %s)\n", filename, encName); + showUsage(myName); + return 2; + } + if(in!=stdin) { + fclose(in); + } + + /* test performance */ + testPerformance(); + return 0; +} diff --git a/icuSources/test/utfperf/utfperf.dsp b/icuSources/test/utfperf/utfperf.dsp new file mode 100644 index 00000000..83512224 --- /dev/null +++ b/icuSources/test/utfperf/utfperf.dsp @@ -0,0 +1,158 @@ +# Microsoft Developer Studio Project File - Name="utfperf" - Package Owner=<4> +# Microsoft Developer Studio Generated Build File, Format Version 6.00 +# ** DO NOT EDIT ** + +# TARGTYPE "Win32 (x86) Console Application" 0x0103 + +CFG=utfperf - Win32 Debug +!MESSAGE This is not a valid makefile. To build this project using NMAKE, +!MESSAGE use the Export Makefile command and run +!MESSAGE +!MESSAGE NMAKE /f "utfperf.mak". +!MESSAGE +!MESSAGE You can specify a configuration when running NMAKE +!MESSAGE by defining the macro CFG on the command line. For example: +!MESSAGE +!MESSAGE NMAKE /f "utfperf.mak" CFG="utfperf - Win32 Debug" +!MESSAGE +!MESSAGE Possible choices for configuration are: +!MESSAGE +!MESSAGE "utfperf - Win32 Release" (based on "Win32 (x86) Console Application") +!MESSAGE "utfperf - Win32 Debug" (based on "Win32 (x86) Console Application") +!MESSAGE "utfperf - Win64 Release" (based on "Win32 (x86) Console Application") +!MESSAGE "utfperf - Win64 Debug" (based on "Win32 (x86) Console Application") +!MESSAGE + +# Begin Project +# PROP AllowPerConfigDependencies 0 +# PROP Scc_ProjName "" +# PROP Scc_LocalPath "" +CPP=cl.exe +RSC=rc.exe + +!IF "$(CFG)" == "utfperf - Win32 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "Release" +# PROP BASE Intermediate_Dir "Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Release" +# PROP Intermediate_Dir "Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +MTL=midl.exe +# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c +# ADD CPP /nologo /G6 /MD /W3 /GX /O2 /I "..\..\..\include" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c +# ADD BASE RSC /l 0x409 /d "NDEBUG" +# ADD RSC /l 0x409 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 +# ADD LINK32 icuuc.lib kernel32.lib user32.lib gdi32.lib winmm.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 /libpath:"..\..\..\lib" + +!ELSEIF "$(CFG)" == "utfperf - Win32 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "Debug" +# PROP BASE Intermediate_Dir "Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Debug" +# PROP Intermediate_Dir "Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +MTL=midl.exe +# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c +# ADD CPP /nologo /G6 /MDd /W3 /Gm /GX /ZI /Od /I "..\..\..\include" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c +# ADD BASE RSC /l 0x409 /d "_DEBUG" +# ADD RSC /l 0x409 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept +# ADD LINK32 icuucd.lib winmm.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"..\..\..\lib" + +!ELSEIF "$(CFG)" == "utfperf - Win64 Release" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 0 +# PROP BASE Output_Dir "Release" +# PROP BASE Intermediate_Dir "Release" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 0 +# PROP Output_Dir "Release" +# PROP Intermediate_Dir "Release" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +MTL=midl.exe +# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c +# ADD CPP /nologo /MD /W3 /GX /Zi /O2 /Op /I "..\..\..\include" /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /Wp64 /Zm600 /c +# ADD BASE RSC /l 0x409 /d "NDEBUG" +# ADD RSC /l 0x409 /d "NDEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:IX86 /machine:IA64 +# ADD LINK32 icuuc.lib kernel32.lib user32.lib gdi32.lib winmm.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:IX86 /libpath:"..\..\..\lib" /machine:IA64 + +!ELSEIF "$(CFG)" == "utfperf - Win64 Debug" + +# PROP BASE Use_MFC 0 +# PROP BASE Use_Debug_Libraries 1 +# PROP BASE Output_Dir "Debug" +# PROP BASE Intermediate_Dir "Debug" +# PROP BASE Target_Dir "" +# PROP Use_MFC 0 +# PROP Use_Debug_Libraries 1 +# PROP Output_Dir "Debug" +# PROP Intermediate_Dir "Debug" +# PROP Ignore_Export_Lib 0 +# PROP Target_Dir "" +MTL=midl.exe +# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c +# ADD CPP /nologo /MDd /W3 /Gm /GX /Zi /Od /Op /I "..\..\..\include" /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /GZ /Wp64 /Zm600 /c +# ADD BASE RSC /l 0x409 /d "_DEBUG" +# ADD RSC /l 0x409 /d "_DEBUG" +BSC32=bscmake.exe +# ADD BASE BSC32 /nologo +# ADD BSC32 /nologo +LINK32=link.exe +# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:IX86 /pdbtype:sept /machine:IA64 +# ADD LINK32 icuucd.lib winmm.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /incremental:no /debug /machine:IX86 /pdbtype:sept /libpath:"..\..\..\lib" /machine:IA64 + +!ENDIF + +# Begin Target + +# Name "utfperf - Win32 Release" +# Name "utfperf - Win32 Debug" +# Name "utfperf - Win64 Release" +# Name "utfperf - Win64 Debug" +# Begin Group "Source Files" + +# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat" +# Begin Source File + +SOURCE=.\utfperf.c +# End Source File +# End Group +# Begin Group "Header Files" + +# PROP Default_Filter "h;hpp;hxx;hm;inl" +# End Group +# Begin Group "Resource Files" + +# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe" +# End Group +# End Target +# End Project diff --git a/icuSources/tools/makeconv/misc/canonucm.c b/icuSources/tools/makeconv/misc/canonucm.c new file mode 100644 index 00000000..a37f8ddf --- /dev/null +++ b/icuSources/tools/makeconv/misc/canonucm.c @@ -0,0 +1,29 @@ +/* +******************************************************************************* +* +* Copyright (C) 2000, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: canonucm.c +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2000nov08 +* created by: Markus W. Scherer +* +* This tool reads a .ucm file and canonicalizes it: In the CHARMAP section, +* - sort by Unicode code points +* - print all code points in uppercase hexadecimal +* - print all Unicode code points with 4, 5, or 6 digits as needed +* - remove the comments +* - remove unnecessary spaces +* +* To compile, just call a C compiler/linker with this source file. +* On Windows: cl canonucm.c +*/ + +#error File moved to charset/source/ucmtools/ on 2002-nov-06 + +/* see http://oss.software.ibm.com/cvs/icu/charset/source/ucmtools/ */ diff --git a/icuSources/tools/makeconv/misc/rptp2ucm.c b/icuSources/tools/makeconv/misc/rptp2ucm.c new file mode 100644 index 00000000..7ec83209 --- /dev/null +++ b/icuSources/tools/makeconv/misc/rptp2ucm.c @@ -0,0 +1,31 @@ +/* +******************************************************************************* +* +* Copyright (C) 2000-2001, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: rptp2ucm.c +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2001feb16 +* created by: Markus W. Scherer +* +* This tool reads two CDRA conversion table files (RPMAP & TPMAP or RXMAP and TXMAP) and +* generates a canonicalized ICU .ucm file from them. +* If the RPMAP/RXMAP file does not contain a comment line with the substitution character, +* then this tool also attempts to read the header of the corresponding UPMAP/UXMAP file +* to extract subchar and subchar1. +* +* R*MAP: Unicode->codepage +* T*MAP: codepage->Unicode +* +* To compile, just call a C compiler/linker with this source file. +* On Windows: cl rptp2ucm.c +*/ + +#error File moved to charset/source/ucmtools/ on 2002-nov-06 + +/* see http://oss.software.ibm.com/cvs/icu/charset/source/ucmtools/ */ diff --git a/icuSources/tools/makeconv/misc/ucmmerge.c b/icuSources/tools/makeconv/misc/ucmmerge.c new file mode 100644 index 00000000..a8035bba --- /dev/null +++ b/icuSources/tools/makeconv/misc/ucmmerge.c @@ -0,0 +1,26 @@ +/* +******************************************************************************* +* +* Copyright (C) 2000, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ucmmerge.c +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2000nov09 +* created by: Markus W. Scherer +* +* This tool reads two .ucm files and merges them. +* Merging the files allows to update the ICU data while keeping ICU-specific +* changes like "MBCS"->"EBCDIC_STATEFUL" or adding . +* +* To compile, just call a C compiler/linker with this source file. +* On Windows: cl ucmmerge.c +*/ + +#error File moved to charset/source/ucmtools/ on 2002-nov-06 + +/* see http://oss.software.ibm.com/cvs/icu/charset/source/ucmtools/ */ diff --git a/icuSources/tools/makeconv/misc/ucmstrip.c b/icuSources/tools/makeconv/misc/ucmstrip.c new file mode 100644 index 00000000..751af780 --- /dev/null +++ b/icuSources/tools/makeconv/misc/ucmstrip.c @@ -0,0 +1,28 @@ +/* +******************************************************************************* +* +* Copyright (C) 2000, International Business Machines +* Corporation and others. All Rights Reserved. +* +******************************************************************************* +* file name: ucmstrip.c +* encoding: US-ASCII +* tab size: 8 (not used) +* indentation:4 +* +* created on: 2000nov09 +* created by: Markus W. Scherer +* +* This tool reads a .ucm file, expects there to be a line in the header with +* "File created on..." and removes the lines before and including that. +* Then it removes lines with and and . +* This helps comparing .ucm files with different copyright statements and +* different state specifications. +* +* To compile, just call a C compiler/linker with this source file. +* On Windows: cl ucmstrip.c +*/ + +#error File moved to charset/source/ucmtools/ on 2002-nov-06 + +/* see http://oss.software.ibm.com/cvs/icu/charset/source/ucmtools/ */ diff --git a/makefile b/makefile index a4d01949..d85cd0fc 100644 --- a/makefile +++ b/makefile @@ -139,19 +139,19 @@ LIBOVERRIDES=LIBICUDT="-L$(OBJROOT) -l$(LIB_NAME)" \ ENV= APPLE_INTERNAL_DIR="$(APPLE_INTERNAL_DIR)" \ CFLAGS="-DICU_DATA_DIR=\"\\\"/usr/share/icu/\\\"\" $(RC_ARCHS:%=-arch %) -g -Os -fno-exceptions" \ - CXXFLAGS="-DICU_DATA_DIR=\"\\\"/usr/share/icu/\\\"\" $(RC_ARCHS:%=-arch %) -g -Os -fno-exceptions -fno-rtti" \ + CXXFLAGS="-DICU_DATA_DIR=\"\\\"/usr/share/icu/\\\"\" $(RC_ARCHS:%=-arch %) -g -Os -fno-exceptions -fno-rtti -fvisibility-inlines-hidden" \ RC_ARCHS="$(RC_ARCHS)" \ DYLD_LIBRARY_PATH="$(DSTROOT)/usr/local/lib" ENV_CONFIGURE= APPLE_INTERNAL_DIR="$(APPLE_INTERNAL_DIR)" \ CFLAGS="-DICU_DATA_DIR=\"\\\"/usr/share/icu/\\\"\" -g -Os -fno-exceptions" \ - CXXFLAGS="-DICU_DATA_DIR=\"\\\"/usr/share/icu/\\\"\" -g -Os -fno-exceptions -fno-rtti" \ + CXXFLAGS="-DICU_DATA_DIR=\"\\\"/usr/share/icu/\\\"\" -g -Os -fno-exceptions -fno-rtti -fvisibility-inlines-hidden" \ RC_ARCHS="$(RC_ARCHS)" \ DYLD_LIBRARY_PATH="$(DSTROOT)/usr/local/lib" ENV_DEBUG = APPLE_INTERNAL_DIR="$(APPLE_INTERNAL_DIR)" \ CFLAGS="-DICU_DATA_DIR=\"\\\"/usr/share/icu/\\\"\" $(RC_ARCHS:%=-arch %) -O0 -g -fno-exceptions" \ - CXXFLAGS="-DICU_DATA_DIR=\"\\\"/usr/share/icu/\\\"\" $(RC_ARCHS:%=-arch %) -O0 -g -fno-exceptions -fno-rtti" \ + CXXFLAGS="-DICU_DATA_DIR=\"\\\"/usr/share/icu/\\\"\" $(RC_ARCHS:%=-arch %) -O0 -g -fno-exceptions -fno-rtti -fvisibility-inlines-hidden" \ RC_ARCHS="$(RC_ARCHS)" \ DYLD_LIBRARY_PATH="$(DSTROOT)/usr/local/lib" @@ -178,8 +178,10 @@ endif icu debug : $(OBJROOT)/Makefile (cd $(OBJROOT); \ $(MAKE) $($(ENV_$@)); \ + tmpfile=`mktemp -t weakexternal` || exit 1; \ + nm -m $(COMMON_OBJ) $(I18N_OBJ) $(STUB_DATA_OBJ) | fgrep "weak external" | fgrep -v "undefined" | sed -e 's/.*weak external //' | uniq | cat >$$tmpfile; \ $($(ENV_$@)) $(CXX) -current_version $(ICU_VERS).$(ICU_SUBVERS) -compatibility_version 1 -dynamiclib -dynamic \ - $(RC_ARCHS:%=-arch %) $(CXXFLAGS) $(LDFLAGS) -single_module $(SECTORDER_FLAGS) \ + $(RC_ARCHS:%=-arch %) $(CXXFLAGS) $(LDFLAGS) -single_module $(SECTORDER_FLAGS) -unexported_symbols_list $$tmpfile \ -install_name $(libdir)$(INSTALLED_DYLIB) -o ./$(INSTALLED_DYLIB) $(COMMON_OBJ) $(I18N_OBJ) $(STUB_DATA_OBJ); \ if test -f ./$(ICU_DATA_DIR)/$(B_DATA_FILE); then \ ln -fs ./$(ICU_DATA_DIR)/$(B_DATA_FILE); \ -- 2.45.2