]> git.saurik.com Git - apple/icu.git/commitdiff
ICU-6.2.16.tar.gz mac-os-x-10410x86 v6.2.16
authorApple <opensource@apple.com>
Thu, 19 Apr 2007 23:41:38 +0000 (23:41 +0000)
committerApple <opensource@apple.com>
Thu, 19 Apr 2007 23:41:38 +0000 (23:41 +0000)
23 files changed:
icuSources/allinone/all/all.dsp [new file with mode: 0644]
icuSources/layout/unicode/loengine.h [new file with mode: 0644]
icuSources/samples/xml2txt/readme.txt [new file with mode: 0644]
icuSources/test/collperf/Makefile.in [new file with mode: 0644]
icuSources/test/collperf/collperf.cpp [new file with mode: 0644]
icuSources/test/collperf/collperf.dsp [new file with mode: 0644]
icuSources/test/collperf/readme.html [new file with mode: 0644]
icuSources/test/perf/all/all.dsp [new file with mode: 0644]
icuSources/test/unalignedtest/Makefile.in [new file with mode: 0644]
icuSources/test/unalignedtest/readme [new file with mode: 0644]
icuSources/test/unalignedtest/unaligned.c [new file with mode: 0644]
icuSources/test/usetperf/bitset.cpp [new file with mode: 0644]
icuSources/test/usetperf/bitset.h [new file with mode: 0644]
icuSources/test/usetperf/timer.h [new file with mode: 0644]
icuSources/test/usetperf/usetperf.cpp [new file with mode: 0644]
icuSources/test/usetperf/usetperf.dsp [new file with mode: 0644]
icuSources/test/utfperf/utfperf.c [new file with mode: 0644]
icuSources/test/utfperf/utfperf.dsp [new file with mode: 0644]
icuSources/tools/makeconv/misc/canonucm.c [new file with mode: 0644]
icuSources/tools/makeconv/misc/rptp2ucm.c [new file with mode: 0644]
icuSources/tools/makeconv/misc/ucmmerge.c [new file with mode: 0644]
icuSources/tools/makeconv/misc/ucmstrip.c [new file with mode: 0644]
makefile

diff --git a/icuSources/allinone/all/all.dsp b/icuSources/allinone/all/all.dsp
new file mode 100644 (file)
index 0000000..5336d61
--- /dev/null
@@ -0,0 +1,93 @@
+# Microsoft Developer Studio Project File - Name="all" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** DO NOT EDIT **
+
+# TARGTYPE "Win32 (x86) Generic Project" 0x010a
+
+CFG=ALL - WIN32 DEBUG
+!MESSAGE This is not a valid makefile. To build this project using NMAKE,
+!MESSAGE use the Export Makefile command and run
+!MESSAGE 
+!MESSAGE NMAKE /f "all.mak".
+!MESSAGE 
+!MESSAGE You can specify a configuration when running NMAKE
+!MESSAGE by defining the macro CFG on the command line. For example:
+!MESSAGE 
+!MESSAGE NMAKE /f "all.mak" CFG="ALL - WIN32 DEBUG"
+!MESSAGE 
+!MESSAGE Possible choices for configuration are:
+!MESSAGE 
+!MESSAGE "all - Win32 Release" (based on "Win32 (x86) Generic Project")
+!MESSAGE "all - Win32 Debug" (based on "Win32 (x86) Generic Project")
+!MESSAGE "all - Win64 Release" (based on "Win32 (x86) Generic Project")
+!MESSAGE "all - Win64 Debug" (based on "Win32 (x86) Generic Project")
+!MESSAGE 
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+MTL=midl.exe
+
+!IF  "$(CFG)" == "all - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Target_Dir ""
+
+!ELSEIF  "$(CFG)" == "all - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Target_Dir ""
+
+!ELSEIF  "$(CFG)" == "all - Win64 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Target_Dir ""
+
+!ELSEIF  "$(CFG)" == "all - Win64 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Target_Dir ""
+
+!ENDIF 
+
+# Begin Target
+
+# Name "all - Win32 Release"
+# Name "all - Win32 Debug"
+# Name "all - Win64 Release"
+# Name "all - Win64 Debug"
+# End Target
+# End Project
diff --git a/icuSources/layout/unicode/loengine.h b/icuSources/layout/unicode/loengine.h
new file mode 100644 (file)
index 0000000..76612cd
--- /dev/null
@@ -0,0 +1,358 @@
+/*
+ *
+ * (C) Copyright IBM Corp. 1998-2004 - All Rights Reserved
+ *
+ */
+
+#ifndef __LOENGINE_H
+#define __LOENGINE_H
+
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "unicode/uscript.h"
+#include "unicode/unistr.h"
+
+#include "layout/LETypes.h"
+#include "layout/LayoutEngine.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * This is a wrapper class designed to allow ICU clients to
+ * use LayoutEngine in a way that is consistent with the rest
+ * of ICU.
+ *
+ * (LayoutEngine was developed seperately from ICU and
+ * the same source is used in non-ICU environments, so it cannot
+ * be changed to match ICU coding conventions).
+ *
+ * This class is designed for clients who wish to use LayoutEngine
+ * to layout complex text. If you need to subclass LayoutEngine,
+ * you'll need to use the LayoutEngine interfaces directly.
+ *
+ * Basically, it creates an instance of LayoutEngine, stashes
+ * it in fLayoutEngine, and uses it to implement the layout
+ * functionality.
+ *
+ * Use the createInstance method to create an ICULayoutEngine. Use
+ * delete to destroy it. The layoutChars method computes the glyphs
+ * and positions, and saves them in the ICULayoutEngine object.
+ * Use getGlyphs, getPositions and getCharIndices to retreive this
+ * data.
+ *
+ * You'll also need an implementation of LEFontInstance for your platform.
+ *
+ * @see LayoutEngine.h
+ * @see LEFontInstance.h
+ *
+ * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release.
+ */
+#ifndef U_HIDE_OBSOLETE_API
+class U_LAYOUT_API ICULayoutEngine : public UObject {
+private:
+    /**
+     * This holds the instance of LayoutEngine that does all
+     * the work.
+     */
+    LayoutEngine *fLayoutEngine;
+
+    /**
+     * This no argument constructor is private so that clients
+     * can't envoke it. Clients should use createInstance.
+     *
+     * @see createInstance
+     */
+    ICULayoutEngine();
+
+    /**
+     * The main constructor. It is defined as private to
+     * stop clients from invoking it. Clients should use
+     * createInstance.
+     *
+     * @param layoutEngine - the LayoutEngine that this instance wraps.
+     *
+     * @see createInstance
+     */
+    ICULayoutEngine(LayoutEngine *layoutEngine);
+
+public:
+
+    /**
+     * The destructor. At least on Windows it needs to be
+     * virtual to ensure that it deletes the object from the
+     * same heap that createInstance will allocate it from. We
+     * don't know why this is...
+     *
+     * @see createInstance
+     *
+     * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release.
+     */
+    virtual ~ICULayoutEngine();
+
+    /**
+     * This method computes the glyph, character index and position arrays
+     * for the input characters.
+     *
+     * @param chars - the input character context
+     * @param startOffset - the starting offset of the characters to process
+     * @param endOffset - the ending offset of the characters to process
+     * @param maxOffset - the number of characters in the input context
+     * @param rightToLeft - TRUE if the characers are in a right to left directional run
+     * @param x - the initial X position
+     * @param y - the initial Y position
+     * @param success - output parameter set to an error code if the operation fails
+     *
+     * @return the number of glyphs in the glyph array
+     *
+     * Note; the glyph, character index and position array can be accessed
+     * using the getter method below.
+     *
+     * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release.
+     */
+    int32_t layoutChars(const UChar chars[],
+                        int32_t startOffset,
+                        int32_t endOffset,
+                        int32_t maxOffset,
+                        UBool rightToLeft,
+                        float x, float y,
+                        UErrorCode &success);
+
+
+    /**
+     * This method computes the glyph, character index and position arrays
+     * for the input characters.
+     *
+     * @param str - the input character context
+     * @param startOffset - the starting offset of the characters to process
+     * @param endOffset - the ending offset of the characters to process
+     * @param rightToLeft - TRUE if the characers are in a right to left directional run
+     * @param x - the initial X position
+     * @param y - the initial Y position
+     * @param success - output parameter set to an error code if the operation fails
+     *
+     * @return the number of glyphs in the glyph array
+     *
+     * Note; the glyph, character index and position array can be accessed
+     * using the getter method below.
+     *
+     * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release.
+     */
+    int32_t layoutString(const UnicodeString &str,
+                         int32_t startOffset,
+                         int32_t endOffset,
+                         UBool rightToLeft,
+                         float x, float y,
+                         UErrorCode &success);
+
+    /**
+     * This method returns the number of glyphs in the glyph array. Note
+     * that the number of glyphs will be greater than or equal to the number
+     * of characters used to create the LayoutEngine.
+     *
+     * @return the number of glyphs in the glyph array
+     *
+     * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release.
+     */
+    int32_t countGlyphs() const;
+
+    /**
+     * This method copies the glyph array into a caller supplied array.
+     * The caller must ensure that the array is large enough to hold all
+     * the glyphs.
+     *
+     * @param glyphs - the destiniation glyph array
+     * @param success - output parameter set to an error code if the operation fails
+     *
+     * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release.
+     */
+    void getGlyphs(uint32_t glyphs[], UErrorCode &success);
+
+    /**
+     * This method copies the character index array into a caller supplied array.
+     * The caller must ensure that the array is large enough to hold a character
+     * index for each glyph.
+     *
+     * @param charIndices - the destiniation character index array
+     * @param success - output parameter set to an error code if the operation fails
+     *
+     * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release.
+     */
+    void getCharIndices(int32_t charIndices[], UErrorCode &success);
+
+    /**
+     * This method copies the character index array into a caller supplied array.
+     * The caller must ensure that the array is large enough to hold a character
+     * index for each glyph.
+     *
+     * @param charIndices - the destiniation character index array
+     * @param indexBase - an offset which will be added to each index
+     * @param success - output parameter set to an error code if the operation fails
+     *
+     * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release.
+     */
+    void getCharIndices(int32_t charIndices[], int32_t indexBase, UErrorCode &success);
+
+    /**
+     * This method copies the position array into a caller supplied array.
+     * The caller must ensure that the array is large enough to hold an
+     * X and Y position for each glyph, plus an extra X and Y for the
+     * advance of the last glyph.
+     *
+     * @param positions - the destiniation position array
+     * @param success - output parameter set to an error code if the operation fails
+     *
+     * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release.
+     */
+    void getGlyphPositions(float positions[], UErrorCode &success);
+
+    /**
+     * This method returns the X and Y position of the glyph at the
+     * given index.
+     *
+     * Input parameters:
+     * @param glyphIndex - the index of the glyph
+     *
+     * Output parameters:
+     * @param x - the glyph's X position
+     * @param y - the glyph's Y position
+     * @param success - output parameter set to an error code if the operation fails
+     *
+     * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release.
+     */
+    void getGlyphPosition(int32_t glyphIndex, float &x, float &y, UErrorCode &success);
+
+    /**
+     * This method returns an ICULayoutEngine capable of laying out text
+     * in the given font, script and langauge.
+     *
+     * @param fontInstance - the font of the text
+     * @param scriptCode - the script of the text
+     * @param locale - used to determine the language of the text
+     * @param success - output parameter set to an error code if the operation fails
+     *
+     * @return an ICULayoutEngine which can layout text in the given font.
+     *
+     * NOTE: currently, locale is ignored...
+     *
+     * @see LEFontInstance
+     *
+     * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release.
+     */
+    static ICULayoutEngine *createInstance(const LEFontInstance *fontInstance,
+                                           UScriptCode scriptCode, Locale &locale,
+                                           UErrorCode &success);
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for the actual class.
+     *
+     * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release.
+     */
+    virtual UClassID getDynamicClassID() const;
+
+    /**
+     * ICU "poor man's RTTI", returns a UClassID for this class.
+     *
+     * @obsolete ICU 3.0. Use LayoutEngine.h instead since this API will be removed in that release.
+     */
+    static UClassID getStaticClassID();
+};
+
+inline ICULayoutEngine::ICULayoutEngine()
+{
+    // nothing at all...
+}
+
+inline ICULayoutEngine::ICULayoutEngine(LayoutEngine *layoutEngine)
+    : fLayoutEngine(layoutEngine)
+{
+    // nothing else to do
+}
+
+inline ICULayoutEngine::~ICULayoutEngine()
+{
+    delete fLayoutEngine;
+    fLayoutEngine = 0;
+}
+
+inline int32_t ICULayoutEngine::layoutChars(const UChar chars[],
+                                            int32_t startOffset,
+                                            int32_t endOffset,
+                                            int32_t maxOffset,
+                                            UBool rightToLeft,
+                                            float x, float y,
+                                            UErrorCode &success)
+{
+    // NOTE: call reset() so that clients can safely reuse
+    fLayoutEngine->reset();
+    return fLayoutEngine->layoutChars(chars,
+                                      startOffset,
+                                      endOffset - startOffset,
+                                      maxOffset,
+                                      rightToLeft,
+                                      x, y,
+                                      (LEErrorCode &) success);
+}
+
+inline int32_t ICULayoutEngine::layoutString(const UnicodeString &str,
+                                            int32_t startOffset,
+                                            int32_t endOffset,
+                                            UBool rightToLeft,
+                                            float x, float y,
+                                            UErrorCode &success)
+{
+    // NOTE: call reset() so that clients can safely reuse
+    fLayoutEngine->reset();
+    return fLayoutEngine->layoutChars(str.getBuffer(),
+                                      startOffset,
+                                      endOffset - startOffset,
+                                      str.length(),
+                                      rightToLeft,
+                                      x, y,
+                                      (LEErrorCode &) success);
+}
+
+inline int32_t ICULayoutEngine::countGlyphs() const
+{
+    return fLayoutEngine->getGlyphCount();
+}
+
+inline void ICULayoutEngine::getGlyphs(uint32_t glyphs[], UErrorCode &success)
+{
+    fLayoutEngine->getGlyphs(glyphs, (LEErrorCode &) success);
+}
+
+inline void ICULayoutEngine::getCharIndices(int32_t charIndices[], UErrorCode &success)
+{
+    fLayoutEngine->getCharIndices(charIndices, (LEErrorCode &) success);
+}
+
+inline void ICULayoutEngine::getCharIndices(int32_t charIndices[], int32_t indexBase, UErrorCode &success)
+{
+    fLayoutEngine->getCharIndices(charIndices, indexBase, (LEErrorCode &) success);
+}
+
+inline void ICULayoutEngine::getGlyphPositions(float positions[], UErrorCode &success)
+{
+    fLayoutEngine->getGlyphPositions(positions, (LEErrorCode &) success);
+}
+
+inline void ICULayoutEngine::getGlyphPosition(int32_t glyphIndex, float &x, float &y, UErrorCode &success)
+{
+    fLayoutEngine->getGlyphPosition(glyphIndex, x, y, (LEErrorCode &) success);
+}
+
+inline ICULayoutEngine *ICULayoutEngine::createInstance(const LEFontInstance *fontInstance,
+                                                        UScriptCode scriptCode,
+                                                        Locale &locale, UErrorCode &success)
+{
+    LayoutEngine *engine = LayoutEngine::layoutEngineFactory(fontInstance,
+                                                             (le_int32) scriptCode,
+                                                             0,
+                                                             (LEErrorCode &) success);
+
+    return new ICULayoutEngine(engine);
+}
+#endif // U_HIDE_OBSOLETE_API
+
+U_NAMESPACE_END
+#endif
diff --git a/icuSources/samples/xml2txt/readme.txt b/icuSources/samples/xml2txt/readme.txt
new file mode 100644 (file)
index 0000000..30158ad
--- /dev/null
@@ -0,0 +1,3 @@
+Copyright (c) 2002-2003, International Business Machines Corporation and others. All Rights Reserved.
+
+The xml2txt sample has been deprecated. It was a demonstration of converting ICU4C XML resource bundles into .txt resource bundles. Since it was written, the ICU4C XML resource bundle format has been depreceated, and ICU now uses XLIFF (XML Localization Interchange File Format) instead. 
\ No newline at end of file
diff --git a/icuSources/test/collperf/Makefile.in b/icuSources/test/collperf/Makefile.in
new file mode 100644 (file)
index 0000000..bca3211
--- /dev/null
@@ -0,0 +1,89 @@
+## Makefile.in for ICU - test/collperf
+## Copyright (c) 2001, International Business Machines Corporation and
+## others. All Rights Reserved.
+
+## Source directory information
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+
+top_builddir = ../..
+
+include $(top_builddir)/icudefs.mk
+
+## Platform-specific setup
+include @platform_make_fragment@
+
+## Build directory information
+subdir = test/collperf
+
+## Extra files to remove for 'make clean'
+CLEANFILES = *~ $(DEPS)
+
+## Target information
+TARGET = collperf
+
+DEFS = @DEFS@
+CPPFLAGS = @CPPFLAGS@ -I$(top_builddir)/common -I$(top_srcdir)/common -I$(top_srcdir)/i18n 
+CFLAGS = @CFLAGS@
+CXXFLAGS = @CXXFLAGS@
+ENABLE_RPATH = @ENABLE_RPATH@
+ifeq ($(ENABLE_RPATH),YES)
+RPATHLDFLAGS = $(LD_RPATH)$(LD_RPATH_PRE)$(libdir)
+endif
+LDFLAGS = @LDFLAGS@ $(RPATHLDFLAGS)
+LIBS = $(LIBICUI18N) $(LIBICUUC) @LIBS@ @LIB_M@
+
+OBJECTS = collperf.o
+
+DEPS = $(OBJECTS:.o=.d)
+
+## List of phony targets
+.PHONY : all all-local install install-local clean clean-local \
+distclean distclean-local dist dist-local check check-local
+
+## Clear suffix list
+.SUFFIXES :
+
+## List of standard targets
+all: all-local
+install: install-local
+clean: clean-local
+distclean : distclean-local
+dist: dist-local
+check: all check-local
+
+all-local: $(TARGET)
+
+install-local:
+
+dist-local:
+
+clean-local:
+       test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
+       $(RMV) $(OBJECTS) $(TARGET)
+
+distclean-local: clean-local
+       $(RMV) Makefile
+
+check-local: all-local
+
+Makefile: $(srcdir)/Makefile.in  $(top_builddir)/config.status
+       cd $(top_builddir) \
+        && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+$(TARGET) : $(OBJECTS)
+       $(LINK.cc) -o $@ $^ $(LIBS)
+
+invoke:
+       ICU_DATA=$${ICU_DATA:-$(top_builddir)/data/} TZ=PST8PDT $(INVOKE) $(INVOCATION)
+
+ifeq (,$(MAKECMDGOALS))
+-include $(DEPS)
+else
+ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
+ifneq ($(patsubst %install,,$(MAKECMDGOALS)),)
+-include $(DEPS)
+endif
+endif
+endif
+
diff --git a/icuSources/test/collperf/collperf.cpp b/icuSources/test/collperf/collperf.cpp
new file mode 100644 (file)
index 0000000..bd916c5
--- /dev/null
@@ -0,0 +1,1749 @@
+/********************************************************************
+ * COPYRIGHT:
+ * Copyright (C) 2001 IBM, Inc.   All Rights Reserved.
+ *
+ ********************************************************************/
+/********************************************************************************
+*
+* File CALLCOLL.C
+*
+* Modification History:
+*        Name                     Description
+*     Andy Heninger             First Version
+*
+*********************************************************************************
+*/
+
+//
+//  This program tests string collation and sort key generation performance.
+//      Three APIs can be teste: ICU C , Unix strcoll, strxfrm and Windows LCMapString
+//      A file of names is required as input, one per line.  It must be in utf-8 or utf-16 format,
+//      and include a byte order mark.  Either LE or BE format is OK.
+//
+
+const char gUsageString[] =
+ "usage:  collperf options...\n"
+    "-help                      Display this message.\n"
+    "-file file_name            utf-16 format file of names.\n"
+    "-locale name               ICU locale to use.  Default is en_US\n"
+    "-rules file_name           Collation rules file (overrides locale)\n"
+    "-langid 0x1234             Windows Language ID number.  Default to value for -locale option\n"
+    "                              see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm\n"
+    "-win                       Run test using Windows native services.  (ICU is default)\n"
+    "-unix                      Run test using Unix strxfrm, strcoll services.\n"
+    "-uselen                    Use API with string lengths.  Default is null-terminated strings\n"
+    "-usekeys                   Run tests using sortkeys rather than strcoll\n"
+    "-strcmp                    Run tests using u_strcmp rather than strcoll\n"
+    "-strcmpCPO                 Run tests using u_strcmpCodePointOrder rather than strcoll\n"
+    "-loop nnnn                 Loopcount for test.  Adjust for reasonable total running time.\n"
+    "-iloop n                   Inner Loop Count.  Default = 1.  Number of calls to function\n"
+    "                               under test at each call point.  For measuring test overhead.\n"
+    "-terse                     Terse numbers-only output.  Intended for use by scripts.\n"
+    "-french                    French accent ordering\n"
+    "-frenchoff                 No French accent ordering (for use with French locales.)\n"
+    "-norm                      Normalizing mode on\n"
+    "-shifted                   Shifted mode\n"
+    "-lower                     Lower case first\n"
+    "-upper                     Upper case first\n"
+    "-case                      Enable separate case level\n"
+    "-level n                   Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n"
+    "-keyhist                   Produce a table sort key size vs. string length\n"
+    "-binsearch                 Binary Search timing test\n"
+    "-keygen                    Sort Key Generation timing test\n"
+    "-qsort                     Quicksort timing test\n"
+    "-iter                      Iteration Performance Test\n"
+    "-dump                      Display strings, sort keys and CEs.\n"
+    ;
+
+
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+#include <locale.h>
+#include <errno.h>
+
+#include <unicode/utypes.h>
+#include <unicode/ucol.h>
+#include <unicode/ucoleitr.h>
+#include <unicode/uloc.h>
+#include <unicode/ustring.h>
+#include <unicode/ures.h>
+#include <unicode/uchar.h>
+#include <unicode/ucnv.h>
+#include <unicode/utf8.h>
+
+#ifdef WIN32
+#include <windows.h>
+#else
+//
+//  Stubs for Windows API functions when building on UNIXes.
+//
+typedef int DWORD;
+inline int CompareStringW(DWORD, DWORD, UChar *, int, UChar *, int) {return 0;};
+#include <sys/time.h>
+unsigned long timeGetTime() {
+    struct timeval t;
+    gettimeofday(&t, 0);
+    unsigned long val = t.tv_sec * 1000;  // Let it overflow.  Who cares.
+    val += t.tv_usec / 1000;
+    return val;
+};
+inline int LCMapStringW(DWORD, DWORD, UChar *, int, UChar *, int) {return 0;};
+const int LCMAP_SORTKEY = 0;
+#define MAKELCID(a,b) 0
+const int SORT_DEFAULT = 0;
+#endif
+
+
+
+//
+//  Command line option variables
+//     These global variables are set according to the options specified
+//     on the command line by the user.
+char * opt_fName      = 0;
+char * opt_locale     = "en_US";
+int    opt_langid     = 0;         // Defaults to value corresponding to opt_locale.
+char * opt_rules      = 0;
+UBool  opt_help       = FALSE;
+int    opt_loopCount  = 1;
+int    opt_iLoopCount = 1;
+UBool  opt_terse      = FALSE;
+UBool  opt_qsort      = FALSE;
+UBool  opt_binsearch  = FALSE;
+UBool  opt_icu        = TRUE;
+UBool  opt_win        = FALSE;      // Run with Windows native functions.
+UBool  opt_unix       = FALSE;      // Run with UNIX strcoll, strxfrm functions.
+UBool  opt_uselen     = FALSE;
+UBool  opt_usekeys    = FALSE;
+UBool  opt_strcmp     = FALSE;
+UBool  opt_strcmpCPO  = FALSE;
+UBool  opt_norm       = FALSE;
+UBool  opt_keygen     = FALSE;
+UBool  opt_french     = FALSE;
+UBool  opt_frenchoff  = FALSE;
+UBool  opt_shifted    = FALSE;
+UBool  opt_lower      = FALSE;
+UBool  opt_upper      = FALSE;
+UBool  opt_case       = FALSE;
+int    opt_level      = 0;
+UBool  opt_keyhist    = FALSE;
+UBool  opt_itertest   = FALSE;
+UBool  opt_dump       = FALSE;
+
+
+
+//
+//   Definitions for the command line options
+//
+struct OptSpec {
+    const char *name;
+    enum {FLAG, NUM, STRING} type;
+    void *pVar;
+};
+
+OptSpec opts[] = {
+    {"-file",        OptSpec::STRING, &opt_fName},
+    {"-locale",      OptSpec::STRING, &opt_locale},
+    {"-langid",      OptSpec::NUM,    &opt_langid},
+    {"-rules",       OptSpec::STRING, &opt_rules},
+    {"-qsort",       OptSpec::FLAG,   &opt_qsort},
+    {"-binsearch",   OptSpec::FLAG,   &opt_binsearch},
+    {"-iter",        OptSpec::FLAG,   &opt_itertest},
+    {"-win",         OptSpec::FLAG,   &opt_win},
+    {"-unix",        OptSpec::FLAG,   &opt_unix},
+    {"-uselen",      OptSpec::FLAG,   &opt_uselen},
+    {"-usekeys",     OptSpec::FLAG,   &opt_usekeys},
+    {"-strcmp",      OptSpec::FLAG,   &opt_strcmp},
+    {"-strcmpCPO",   OptSpec::FLAG,   &opt_strcmpCPO},
+    {"-norm",        OptSpec::FLAG,   &opt_norm},
+    {"-french",      OptSpec::FLAG,   &opt_french},
+    {"-frenchoff",   OptSpec::FLAG,   &opt_frenchoff},
+    {"-shifted",     OptSpec::FLAG,   &opt_shifted},
+    {"-lower",       OptSpec::FLAG,   &opt_lower},
+    {"-upper",       OptSpec::FLAG,   &opt_upper},
+    {"-case",        OptSpec::FLAG,   &opt_case},
+    {"-level",       OptSpec::NUM,    &opt_level},
+    {"-keyhist",     OptSpec::FLAG,   &opt_keyhist},
+    {"-keygen",      OptSpec::FLAG,   &opt_keygen},
+    {"-loop",        OptSpec::NUM,    &opt_loopCount},
+    {"-iloop",       OptSpec::NUM,    &opt_iLoopCount},
+    {"-terse",       OptSpec::FLAG,   &opt_terse},
+    {"-dump",        OptSpec::FLAG,   &opt_dump},
+    {"-help",        OptSpec::FLAG,   &opt_help},
+    {"-?",           OptSpec::FLAG,   &opt_help},
+    {0, OptSpec::FLAG, 0}
+};
+
+
+//---------------------------------------------------------------------------
+//
+//  Global variables pointing to and describing the test file
+//
+//---------------------------------------------------------------------------
+
+//
+//   struct Line
+//
+//      Each line from the source file (containing a name, presumably) gets
+//      one of these structs.
+//
+struct  Line {
+    UChar     *name;
+    int        len;
+    char      *winSortKey;
+    char      *icuSortKey;
+    char      *unixSortKey;
+    char      *unixName;
+};
+
+
+
+Line          *gFileLines;           // Ptr to array of Line structs, one per line in the file.
+int            gNumFileLines;
+UCollator     *gCol;
+DWORD          gWinLCID;
+
+Line          **gSortedLines;
+Line          **gRandomLines;
+int            gCount;
+
+
+
+//---------------------------------------------------------------------------
+//
+//  ProcessOptions()    Function to read the command line options.
+//
+//---------------------------------------------------------------------------
+UBool ProcessOptions(int argc, const char **argv, OptSpec opts[])
+{
+    int         i;
+    int         argNum;
+    const char  *pArgName;
+    OptSpec    *pOpt;
+
+    for (argNum=1; argNum<argc; argNum++) {
+        pArgName = argv[argNum];
+        for (pOpt = opts;  pOpt->name != 0; pOpt++) {
+            if (strcmp(pOpt->name, pArgName) == 0) {
+                switch (pOpt->type) {
+                case OptSpec::FLAG:
+                    *(UBool *)(pOpt->pVar) = TRUE;
+                    break;
+                case OptSpec::STRING:
+                    argNum ++;
+                    if (argNum >= argc) {
+                        fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name);
+                        return FALSE;
+                    }
+                    *(const char **)(pOpt->pVar)  = argv[argNum];
+                    break;
+                case OptSpec::NUM:
+                    argNum ++;
+                    if (argNum >= argc) {
+                        fprintf(stderr, "value expected for \"%s\" option.\n", pOpt->name);
+                        return FALSE;
+                    }
+                    char *endp;
+                    i = strtol(argv[argNum], &endp, 0);
+                    if (endp == argv[argNum]) {
+                        fprintf(stderr, "integer value expected for \"%s\" option.\n", pOpt->name);
+                        return FALSE;
+                    }
+                    *(int *)(pOpt->pVar) = i;
+                }
+                break;
+            }
+        }
+        if (pOpt->name == 0)
+        {
+            fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);
+            return FALSE;
+        }
+    }
+return TRUE;
+}
+
+//---------------------------------------------------------------------------------------
+//
+//   Comparison functions for use by qsort.
+//
+//       Six flavors, ICU or Windows, SortKey or String Compare, Strings with length
+//           or null terminated.
+//
+//---------------------------------------------------------------------------------------
+int ICUstrcmpK(const void *a, const void *b) {
+    gCount++;
+    int t = strcmp((*(Line **)a)->icuSortKey, (*(Line **)b)->icuSortKey);
+    return t;
+}
+
+
+int ICUstrcmpL(const void *a, const void *b) {
+    gCount++;
+    UCollationResult t;
+    t = ucol_strcoll(gCol, (*(Line **)a)->name, (*(Line **)a)->len, (*(Line **)b)->name, (*(Line **)b)->len);
+    if (t == UCOL_LESS) return -1;
+    if (t == UCOL_GREATER) return +1;
+    return 0;
+}
+
+
+int ICUstrcmp(const void *a, const void *b) {
+    gCount++;
+    UCollationResult t;
+    t = ucol_strcoll(gCol, (*(Line **)a)->name, -1, (*(Line **)b)->name, -1);
+    if (t == UCOL_LESS) return -1;
+    if (t == UCOL_GREATER) return +1;
+    return 0;
+}
+
+
+int Winstrcmp(const void *a, const void *b) {
+    gCount++;
+    int t;
+    t = CompareStringW(gWinLCID, 0, (*(Line **)a)->name, -1, (*(Line **)b)->name, -1);
+    return t-2;
+}
+
+
+int UNIXstrcmp(const void *a, const void *b) {
+    gCount++;
+    int t;
+    t = strcoll((*(Line **)a)->unixName, (*(Line **)b)->unixName);
+    return t;
+}
+
+
+int WinstrcmpL(const void *a, const void *b) {
+    gCount++;
+    int t;
+    t = CompareStringW(gWinLCID, 0, (*(Line **)a)->name, (*(Line **)a)->len, (*(Line **)b)->name, (*(Line **)b)->len);
+    return t-2;
+}
+
+
+int WinstrcmpK(const void *a, const void *b) {
+    gCount++;
+    int t = strcmp((*(Line **)a)->winSortKey, (*(Line **)b)->winSortKey);
+    return t;
+}
+
+
+//---------------------------------------------------------------------------------------
+//
+//   Function for sorting the names (lines) into a random order.
+//      Order is based on a hash of the  ICU Sort key for the lines
+//      The randomized order is used as input for the sorting timing tests.
+//
+//---------------------------------------------------------------------------------------
+int ICURandomCmp(const void *a, const void *b) {
+    char  *ask = (*(Line **)a)->icuSortKey;
+    char  *bsk = (*(Line **)b)->icuSortKey;
+    int   aVal = 0;
+    int   bVal = 0;
+    int   retVal;
+    while (*ask != 0) {
+        aVal += aVal*37 + *ask++;
+    }
+    while (*bsk != 0) {
+        bVal += bVal*37 + *bsk++;
+    }
+    retVal = -1;
+    if (aVal == bVal) {
+        retVal = 0;
+    }
+    else if (aVal > bVal) {
+        retVal = 1;
+    }
+    return retVal;
+}
+
+//---------------------------------------------------------------------------------------
+//
+//   doKeyGen()     Key Generation Timing Test
+//
+//---------------------------------------------------------------------------------------
+void doKeyGen()
+{
+    int  line;
+    int  loops;
+    int  iLoop;
+    int  t;
+    int  len=-1;
+
+    // Adjust loop count to compensate for file size.   Should be order n
+    double dLoopCount = double(opt_loopCount) * (1000. /  double(gNumFileLines));
+    int adj_loopCount = int(dLoopCount);
+    if (adj_loopCount < 1) adj_loopCount = 1;
+
+
+    unsigned long startTime = timeGetTime();
+
+    if (opt_win) {
+        for (loops=0; loops<adj_loopCount; loops++) {
+            for (line=0; line < gNumFileLines; line++) {
+                if (opt_uselen) {
+                    len = gFileLines[line].len;
+                }
+                for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
+                    t=LCMapStringW(gWinLCID, LCMAP_SORTKEY,
+                        gFileLines[line].name, len,
+                        (unsigned short *)gFileLines[line].winSortKey, 5000);    // TODO  something with length.
+                }
+            }
+        }
+    }
+    else if (opt_icu)
+    {
+        for (loops=0; loops<adj_loopCount; loops++) {
+            for (line=0; line < gNumFileLines; line++) {
+                if (opt_uselen) {
+                    len = gFileLines[line].len;
+                }
+                for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
+                    t = ucol_getSortKey(gCol, gFileLines[line].name, len, (unsigned char *)gFileLines[line].icuSortKey, 5000);
+                }
+            }
+        }
+    }
+    else if (opt_unix)
+    {
+        for (loops=0; loops<adj_loopCount; loops++) {
+            for (line=0; line < gNumFileLines; line++) {
+                for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
+                t = strxfrm(gFileLines[line].unixSortKey, gFileLines[line].unixName, 5000);
+                }
+            }
+        }
+    }
+
+    unsigned long elapsedTime = timeGetTime() - startTime;
+    int ns = (int)(float(1000000) * (float)elapsedTime / (float)(adj_loopCount*gNumFileLines));
+
+    if (opt_terse == FALSE) {
+        printf("Sort Key Generation:  total # of keys = %d\n", loops*gNumFileLines);
+        printf("Sort Key Generation:  time per key = %d ns\n", ns);
+    }
+    else {
+        printf("%d,  ", ns);
+    }
+
+    int   totalKeyLen = 0;
+    int   totalChars  = 0;
+    for (line=0; line<gNumFileLines; line++) {
+        totalChars += u_strlen(gFileLines[line].name);
+        if (opt_win) {
+            totalKeyLen += strlen(gFileLines[line].winSortKey);
+        }
+        else if (opt_icu) {
+            totalKeyLen += strlen(gFileLines[line].icuSortKey);
+        }
+        else if (opt_unix) {
+            totalKeyLen += strlen(gFileLines[line].unixSortKey);
+        }
+
+    }
+    if (opt_terse == FALSE) {
+        printf("Key Length / character = %f\n", (float)totalKeyLen / (float)totalChars);
+    } else {
+        printf("%f, ", (float)totalKeyLen / (float)totalChars);
+    }
+}
+
+
+
+//---------------------------------------------------------------------------------------
+//
+//    doBinarySearch()    Binary Search timing test.  Each name from the list
+//                        is looked up in the full sorted list of names.
+//
+//---------------------------------------------------------------------------------------
+void doBinarySearch()
+{
+
+    gCount = 0;
+    int  line;
+    int  loops;
+    int  iLoop;
+    unsigned long elapsedTime;
+
+    // Adjust loop count to compensate for file size.   Should be order n (lookups) * log n  (compares/lookup)
+    // Accurate timings do not depend on this being perfect.  The correction is just to try to
+    //   get total running times of about the right order, so the that user doesn't need to
+    //   manually adjust the loop count for every different file size.
+    double dLoopCount = double(opt_loopCount) * 3000. / (log10(gNumFileLines) * double(gNumFileLines));
+    if (opt_usekeys) dLoopCount *= 5;
+    int adj_loopCount = int(dLoopCount);
+    if (adj_loopCount < 1) adj_loopCount = 1;
+
+
+    for (;;) {  // not really a loop, just allows "break" to work, to simplify
+                //   inadvertantly running more than one test through here.
+        if (opt_strcmp || opt_strcmpCPO) 
+        {
+            unsigned long startTime = timeGetTime();
+            typedef int32_t (U_EXPORT2 *PF)(const UChar *, const UChar *);
+            PF pf = u_strcmp;
+            if (opt_strcmpCPO) {pf = u_strcmpCodePointOrder;}
+            if (opt_strcmp && opt_win) {pf = (PF)wcscmp;}   // Damn the difference between int32_t and int
+                                                            //   which forces the use of a cast here.
+            
+            int r;
+            for (loops=0; loops<adj_loopCount; loops++) {
+                
+                for (line=0; line < gNumFileLines; line++) {
+                    int hi      = gNumFileLines-1;
+                    int lo      = 0;
+                    int  guess = -1;
+                    for (;;) {
+                        int newGuess = (hi + lo) / 2;
+                        if (newGuess == guess)
+                            break;
+                        guess = newGuess;
+                        for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
+                            r = (*pf)((gSortedLines[line])->name, (gSortedLines[guess])->name);
+                        }
+                        gCount++;
+                        if (r== 0)
+                            break;
+                        if (r < 0)
+                            hi = guess;
+                        else
+                            lo   = guess;
+                    }
+                }
+            }
+            elapsedTime = timeGetTime() - startTime;
+            break;
+        }
+        
+        
+        if (opt_icu)
+        {
+            unsigned long startTime = timeGetTime();
+            UCollationResult  r;
+            for (loops=0; loops<adj_loopCount; loops++) {
+                
+                for (line=0; line < gNumFileLines; line++) {
+                    int lineLen  = -1;
+                    int guessLen = -1;
+                    if (opt_uselen) {
+                        lineLen = (gSortedLines[line])->len;
+                    }
+                    int hi      = gNumFileLines-1;
+                    int lo      = 0;
+                    int  guess = -1;
+                    for (;;) {
+                        int newGuess = (hi + lo) / 2;
+                        if (newGuess == guess)
+                            break;
+                        guess = newGuess;
+                        int ri;
+                        if (opt_usekeys) {
+                            for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
+                                ri = strcmp((gSortedLines[line])->icuSortKey, (gSortedLines[guess])->icuSortKey);
+                            }
+                            gCount++;
+                            r=UCOL_GREATER; if(ri<0) {r=UCOL_LESS;} else if (ri==0) {r=UCOL_EQUAL;}
+                        }
+                        else
+                        {
+                            if (opt_uselen) {
+                                guessLen = (gSortedLines[guess])->len;
+                            }
+                            for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
+                                r = ucol_strcoll(gCol, (gSortedLines[line])->name, lineLen, (gSortedLines[guess])->name, guessLen);
+                            }
+                            gCount++;
+                        }
+                        if (r== UCOL_EQUAL)
+                            break;
+                        if (r == UCOL_LESS)
+                            hi = guess;
+                        else
+                            lo   = guess;
+                    }
+                }
+            }
+            elapsedTime = timeGetTime() - startTime;
+            break;
+        }
+        
+        if (opt_win)
+        {
+            unsigned long startTime = timeGetTime();
+            int r;
+            for (loops=0; loops<adj_loopCount; loops++) {
+                
+                for (line=0; line < gNumFileLines; line++) {
+                    int lineLen  = -1;
+                    int guessLen = -1;
+                    if (opt_uselen) {
+                        lineLen = (gSortedLines[line])->len;
+                    }
+                    int hi   = gNumFileLines-1;
+                    int lo   = 0;
+                    int  guess = -1;
+                    for (;;) {
+                        int newGuess = (hi + lo) / 2;
+                        if (newGuess == guess)
+                            break;
+                        guess = newGuess;
+                        if (opt_usekeys) {
+                            for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
+                                r = strcmp((gSortedLines[line])->winSortKey, (gSortedLines[guess])->winSortKey);
+                            }
+                            gCount++;
+                            r+=2;
+                        }
+                        else
+                        {
+                            if (opt_uselen) {
+                                guessLen = (gSortedLines[guess])->len;
+                            }
+                            for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
+                                r = CompareStringW(gWinLCID, 0, (gSortedLines[line])->name, lineLen, (gSortedLines[guess])->name, guessLen);
+                            }
+                            if (r == 0) {
+                                if (opt_terse == FALSE) {
+                                    fprintf(stderr, "Error returned from Windows CompareStringW.\n");
+                                }
+                                exit(-1);
+                            }
+                            gCount++;
+                        }
+                        if (r== 2)   //  strings ==
+                            break;
+                        if (r == 1)  //  line < guess
+                            hi = guess;
+                        else         //  line > guess
+                            lo   = guess;
+                    }
+                }
+            }
+            elapsedTime = timeGetTime() - startTime;
+            break;
+        }
+        
+        if (opt_unix)
+        {
+            unsigned long startTime = timeGetTime();
+            int r;
+            for (loops=0; loops<adj_loopCount; loops++) {
+                
+                for (line=0; line < gNumFileLines; line++) {
+                    int hi   = gNumFileLines-1;
+                    int lo   = 0;
+                    int  guess = -1;
+                    for (;;) {
+                        int newGuess = (hi + lo) / 2;
+                        if (newGuess == guess)
+                            break;
+                        guess = newGuess;
+                        if (opt_usekeys) {
+                            for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
+                                 r = strcmp((gSortedLines[line])->unixSortKey, (gSortedLines[guess])->unixSortKey);
+                            }
+                            gCount++;
+                        }
+                        else
+                        {
+                            for (iLoop=0; iLoop < opt_iLoopCount; iLoop++) {
+                                r = strcoll((gSortedLines[line])->unixName, (gSortedLines[guess])->unixName);
+                            }
+                            errno = 0;
+                            if (errno != 0) {
+                                fprintf(stderr, "Error %d returned from strcoll.\n", errno);
+                                exit(-1);
+                            }
+                            gCount++;
+                        }
+                        if (r == 0)   //  strings ==
+                            break;
+                        if (r < 0)  //  line < guess
+                            hi = guess;
+                        else         //  line > guess
+                            lo   = guess;
+                    }
+                }
+            }
+            elapsedTime = timeGetTime() - startTime;
+            break;
+        }
+        break;
+    }
+
+    int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount);
+    if (opt_terse == FALSE) {
+        printf("binary search:  total # of string compares = %d\n", gCount);
+        printf("binary search:  compares per loop = %d\n", gCount / loops);
+        printf("binary search:  time per compare = %d ns\n", ns);
+    } else {
+        printf("%d, ", ns);
+    }
+
+}
+
+
+
+
+//---------------------------------------------------------------------------------------
+//
+//   doQSort()    The quick sort timing test.  Uses the C library qsort function.
+//
+//---------------------------------------------------------------------------------------
+void doQSort() {
+    int i;
+    Line **sortBuf = new Line *[gNumFileLines];
+
+    // Adjust loop count to compensate for file size.   QSort should be n log(n)
+    double dLoopCount = double(opt_loopCount) * 3000. / (log10(gNumFileLines) * double(gNumFileLines));
+    if (opt_usekeys) dLoopCount *= 5;
+    int adj_loopCount = int(dLoopCount);
+    if (adj_loopCount < 1) adj_loopCount = 1;
+
+
+    gCount = 0;
+    unsigned long startTime = timeGetTime();
+    if (opt_win && opt_usekeys) {
+        for (i=0; i<opt_loopCount; i++) {
+            memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *));
+            qsort(sortBuf, gNumFileLines, sizeof(Line *), WinstrcmpK);
+        }
+    }
+
+    else if (opt_win && opt_uselen) {
+        for (i=0; i<adj_loopCount; i++) {
+            memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *));
+            qsort(sortBuf, gNumFileLines, sizeof(Line *), WinstrcmpL);
+        }
+    }
+
+
+    else if (opt_win && !opt_uselen) {
+        for (i=0; i<adj_loopCount; i++) {
+            memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *));
+            qsort(sortBuf, gNumFileLines, sizeof(Line *), Winstrcmp);
+        }
+    }
+
+    else if (opt_icu && opt_usekeys) {
+        for (i=0; i<adj_loopCount; i++) {
+            memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *));
+            qsort(sortBuf, gNumFileLines, sizeof(Line *), ICUstrcmpK);
+        }
+    }
+
+    else if (opt_icu && opt_uselen) {
+        for (i=0; i<adj_loopCount; i++) {
+            memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *));
+            qsort(sortBuf, gNumFileLines, sizeof(Line *), ICUstrcmpL);
+        }
+    }
+
+
+    else if (opt_icu && !opt_uselen) {
+        for (i=0; i<adj_loopCount; i++) {
+            memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *));
+            qsort(sortBuf, gNumFileLines, sizeof(Line *), ICUstrcmp);
+        }
+    }
+
+    else if (opt_unix && !opt_usekeys) {
+        for (i=0; i<adj_loopCount; i++) {
+            memcpy(sortBuf, gRandomLines, gNumFileLines * sizeof(Line *));
+            qsort(sortBuf, gNumFileLines, sizeof(Line *), UNIXstrcmp);
+        }
+    }
+
+    unsigned long elapsedTime = timeGetTime() - startTime;
+    int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount);
+    if (opt_terse == FALSE) {
+        printf("qsort:  total # of string compares = %d\n", gCount);
+        printf("qsort:  time per compare = %d ns\n", ns);
+    } else {
+        printf("%d, ", ns);
+    }
+};
+
+
+
+//---------------------------------------------------------------------------------------
+//
+//    doKeyHist()       Output a table of data for
+//                        average sort key size vs. string length.
+//
+//---------------------------------------------------------------------------------------
+void doKeyHist() {
+    int     i;
+    int     maxLen = 0;
+
+    // Find the maximum string length
+    for (i=0; i<gNumFileLines; i++) {
+        if (gFileLines[i].len > maxLen) maxLen = gFileLines[i].len;
+    }
+
+    // Allocate arrays to hold the histogram data
+    int *accumulatedLen  = new int[maxLen+1];
+    int *numKeysOfSize   = new int[maxLen+1];
+    for (i=0; i<=maxLen; i++) {
+        accumulatedLen[i] = 0;
+        numKeysOfSize[i] = 0;
+    }
+
+    // Fill the arrays...
+    for (i=0; i<gNumFileLines; i++) {
+        int len = gFileLines[i].len;
+        accumulatedLen[len] += strlen(gFileLines[i].icuSortKey);
+        numKeysOfSize[len] += 1;
+    }
+
+    // And write out averages
+    printf("String Length,  Avg Key Length,  Avg Key Len per char\n");
+    for (i=1; i<=maxLen; i++) {
+        if (numKeysOfSize[i] > 0) {
+            printf("%d, %f, %f\n", i, (float)accumulatedLen[i] / (float)numKeysOfSize[i],
+                (float)accumulatedLen[i] / (float)(numKeysOfSize[i] * i));
+        }
+    }
+}
+
+//---------------------------------------------------------------------------------------
+//
+//    doForwardIterTest(UBool)       Forward iteration test
+//                                   argument null-terminated string used
+//
+//---------------------------------------------------------------------------------------
+void doForwardIterTest(UBool haslen) {
+    int count = 0;
+    
+    UErrorCode error = U_ZERO_ERROR;
+    printf("\n\nPerforming forward iteration performance test with ");
+
+    if (haslen) {
+        printf("non-null terminated data -----------\n");
+    }
+    else {
+        printf("null terminated data -----------\n");
+    }
+    printf("performance test on strings from file -----------\n");
+
+    UChar dummytext[] = {0, 0};
+    UCollationElements *iter = ucol_openElements(gCol, NULL, 0, &error);
+    ucol_setText(iter, dummytext, 1, &error);
+    
+    gCount = 0;
+    unsigned long startTime = timeGetTime();
+    while (count < opt_loopCount) {
+        int linecount = 0;
+        while (linecount < gNumFileLines) {
+            UChar *str = gFileLines[linecount].name;
+            int strlen = haslen?gFileLines[linecount].len:-1;
+            ucol_setText(iter, str, strlen, &error);
+            while (ucol_next(iter, &error) != UCOL_NULLORDER) {
+                gCount++;
+            }
+
+            linecount ++;
+        }
+        count ++;
+    }
+    unsigned long elapsedTime = timeGetTime() - startTime;
+    printf("elapsedTime %d\n", elapsedTime);
+
+    // empty loop recalculation
+    count = 0;
+    startTime = timeGetTime();
+    while (count < opt_loopCount) {
+        int linecount = 0;
+        while (linecount < gNumFileLines) {
+            UChar *str = gFileLines[linecount].name;
+            int strlen = haslen?gFileLines[linecount].len:-1;
+            ucol_setText(iter, str, strlen, &error);
+            linecount ++;
+        }
+        count ++;
+    }
+    elapsedTime -= (timeGetTime() - startTime);
+    printf("elapsedTime %d\n", elapsedTime);
+
+    ucol_closeElements(iter);
+
+    int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount);
+    printf("Total number of strings compared %d in %d loops\n", gNumFileLines,
+                                                                opt_loopCount);
+    printf("Average time per ucol_next() nano seconds %d\n", ns);
+
+    printf("performance test on skipped-5 concatenated strings from file -----------\n");
+
+    UChar *str;
+    int    strlen = 0;
+    // appending all the strings
+    int linecount = 0;
+    while (linecount < gNumFileLines) {
+        strlen += haslen?gFileLines[linecount].len:
+                                      u_strlen(gFileLines[linecount].name);
+        linecount ++;
+    }
+    str = (UChar *)malloc(sizeof(UChar) * strlen);
+    int strindex = 0;
+    linecount = 0;
+    while (strindex < strlen) {
+        int len = 0;
+        len += haslen?gFileLines[linecount].len:
+                                      u_strlen(gFileLines[linecount].name);
+        memcpy(str + strindex, gFileLines[linecount].name, 
+               sizeof(UChar) * len);
+        strindex += len;
+        linecount ++;
+    }
+
+    printf("Total size of strings %d\n", strlen);
+
+    gCount = 0;
+    count  = 0;
+
+    if (!haslen) {
+        strlen = -1;
+    }
+    iter = ucol_openElements(gCol, str, strlen, &error);
+    if (!haslen) {
+        strlen = u_strlen(str);
+    }
+    strlen -= 5; // any left over characters are not iterated,
+                 // this is to ensure the backwards and forwards iterators
+                 // gets the same position
+    startTime = timeGetTime();
+    while (count < opt_loopCount) {
+        int count5 = 5;
+        strindex = 0;
+        ucol_setOffset(iter, strindex, &error);
+        while (TRUE) {
+            if (ucol_next(iter, &error) == UCOL_NULLORDER) {
+                break;
+            }
+            gCount++;
+            count5 --;
+            if (count5 == 0) {
+                strindex += 10;
+                if (strindex > strlen) {
+                    break;
+                }
+                ucol_setOffset(iter, strindex, &error);
+                count5 = 5;
+            }
+        }
+        count ++;
+    }
+
+    elapsedTime = timeGetTime() - startTime;
+    printf("elapsedTime %d\n", elapsedTime);
+    
+    // empty loop recalculation
+    int tempgCount = 0;
+    count = 0;
+    startTime = timeGetTime();
+    while (count < opt_loopCount) {
+        int count5 = 5;
+        strindex = 0;
+        ucol_setOffset(iter, strindex, &error);
+        while (TRUE) {
+            tempgCount ++;
+            count5 --;
+            if (count5 == 0) {
+                strindex += 10;
+                if (strindex > strlen) {
+                    break;
+                }
+                ucol_setOffset(iter, strindex, &error);
+                count5 = 5;
+            }
+        }
+        count ++;
+    }
+    elapsedTime -= (timeGetTime() - startTime);
+    printf("elapsedTime %d\n", elapsedTime);
+
+    ucol_closeElements(iter);
+
+    printf("gCount %d\n", gCount);
+    ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount);
+    printf("Average time per ucol_next() nano seconds %d\n", ns);
+}
+
+//---------------------------------------------------------------------------------------
+//
+//    doBackwardIterTest(UBool)      Backwards iteration test
+//                                   argument null-terminated string used
+//
+//---------------------------------------------------------------------------------------
+void doBackwardIterTest(UBool haslen) {
+    int count = 0;
+    UErrorCode error = U_ZERO_ERROR;
+    printf("\n\nPerforming backward iteration performance test with ");
+
+    if (haslen) {
+        printf("non-null terminated data -----------\n");
+    }
+    else {
+        printf("null terminated data -----------\n");
+    }
+    
+    printf("performance test on strings from file -----------\n");
+
+    UCollationElements *iter = ucol_openElements(gCol, NULL, 0, &error);
+    UChar dummytext[] = {0, 0};
+    ucol_setText(iter, dummytext, 1, &error);
+
+    gCount = 0;
+    unsigned long startTime = timeGetTime();
+    while (count < opt_loopCount) {
+        int linecount = 0;
+        while (linecount < gNumFileLines) {
+            UChar *str = gFileLines[linecount].name;
+            int strlen = haslen?gFileLines[linecount].len:-1;
+            ucol_setText(iter, str, strlen, &error);
+            while (ucol_previous(iter, &error) != UCOL_NULLORDER) {
+                gCount ++;
+            }
+
+            linecount ++;
+        }
+        count ++;
+    }
+    unsigned long elapsedTime = timeGetTime() - startTime;
+
+    printf("elapsedTime %d\n", elapsedTime);
+
+    // empty loop recalculation
+    count = 0;
+    startTime = timeGetTime();
+    while (count < opt_loopCount) {
+        int linecount = 0;
+        while (linecount < gNumFileLines) {
+            UChar *str = gFileLines[linecount].name;
+            int strlen = haslen?gFileLines[linecount].len:-1;
+            ucol_setText(iter, str, strlen, &error);
+            linecount ++;
+        }
+        count ++;
+    }
+    elapsedTime -= (timeGetTime() - startTime);
+
+    printf("elapsedTime %d\n", elapsedTime);
+    ucol_closeElements(iter);
+
+    int ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount);
+    printf("Total number of strings compared %d in %d loops\n", gNumFileLines,
+                                                                opt_loopCount);
+    printf("Average time per ucol_previous() nano seconds %d\n", ns);
+
+    printf("performance test on skipped-5 concatenated strings from file -----------\n");
+
+    UChar *str;
+    int    strlen = 0;
+    // appending all the strings
+    int linecount = 0;
+    while (linecount < gNumFileLines) {
+        strlen += haslen?gFileLines[linecount].len:
+                                      u_strlen(gFileLines[linecount].name);
+        linecount ++;
+    }
+    str = (UChar *)malloc(sizeof(UChar) * strlen);
+    int strindex = 0;
+    linecount = 0;
+    while (strindex < strlen) {
+        int len = 0;
+        len += haslen?gFileLines[linecount].len:
+                                      u_strlen(gFileLines[linecount].name);
+        memcpy(str + strindex, gFileLines[linecount].name, 
+               sizeof(UChar) * len);
+        strindex += len;
+        linecount ++;
+    }
+
+    printf("Total size of strings %d\n", strlen);
+
+    gCount = 0;
+    count  = 0;
+
+    if (!haslen) {
+        strlen = -1;
+    }
+
+    iter = ucol_openElements(gCol, str, strlen, &error);
+    if (!haslen) {
+        strlen = u_strlen(str);
+    }
+
+    startTime = timeGetTime();
+    while (count < opt_loopCount) {
+        int count5 = 5;
+        strindex = 5;
+        ucol_setOffset(iter, strindex, &error);
+        while (TRUE) {
+            if (ucol_previous(iter, &error) == UCOL_NULLORDER) {
+                break;
+            }
+             gCount ++;
+             count5 --;
+             if (count5 == 0) {
+                 strindex += 10;
+                 if (strindex > strlen) {
+                    break;
+                 }
+                 ucol_setOffset(iter, strindex, &error);
+                 count5 = 5;
+             }
+        }
+        count ++;
+    }
+
+    elapsedTime = timeGetTime() - startTime;
+    printf("elapsedTime %d\n", elapsedTime);
+    
+    // empty loop recalculation
+    count = 0;
+    int tempgCount = 0;
+    startTime = timeGetTime();
+    while (count < opt_loopCount) {
+        int count5 = 5;
+        strindex = 5;
+        ucol_setOffset(iter, strindex, &error);
+        while (TRUE) {
+             tempgCount ++;
+             count5 --;
+             if (count5 == 0) {
+                 strindex += 10;
+                 if (strindex > strlen) {
+                    break;
+                 }
+                 ucol_setOffset(iter, strindex, &error);
+                 count5 = 5;
+             }
+        }
+        count ++;
+    }
+    elapsedTime -= (timeGetTime() - startTime);
+    printf("elapsedTime %d\n", elapsedTime);
+    ucol_closeElements(iter);
+
+    printf("gCount %d\n", gCount);
+    ns = (int)(float(1000000) * (float)elapsedTime / (float)gCount);
+    printf("Average time per ucol_previous() nano seconds %d\n", ns);
+}
+
+//---------------------------------------------------------------------------------------
+//
+//    doIterTest()       Iteration test
+//
+//---------------------------------------------------------------------------------------
+void doIterTest() {
+    doForwardIterTest(opt_uselen);
+    doBackwardIterTest(opt_uselen);
+}
+
+
+//----------------------------------------------------------------------------------------
+//
+//   UnixConvert   -- Convert the lines of the file to the encoding for UNIX
+//                    Since it appears that Unicode support is going in the general
+//                    direction of the use of UTF-8 locales, that is the approach
+//                    that is used here.
+//
+//----------------------------------------------------------------------------------------
+void  UnixConvert() {
+    int    line;
+
+    UConverter   *cvrtr;    // An ICU code page converter.
+    UErrorCode    status = U_ZERO_ERROR;
+
+
+    cvrtr = ucnv_open("utf-8", &status);    // we are just doing UTF-8 locales for now.
+    if (U_FAILURE(status)) {
+        fprintf(stderr, "ICU Converter open failed.: %d\n", &status);
+        exit(-1);
+    }
+
+    for (line=0; line < gNumFileLines; line++) {
+        int sizeNeeded = ucnv_fromUChars(cvrtr,
+                                         0,            // ptr to target buffer.
+                                         0,            // length of target buffer.
+                                         gFileLines[line].name,
+                                         -1,           //  source is null terminated
+                                         &status);
+        if (status != U_BUFFER_OVERFLOW_ERROR && status != U_ZERO_ERROR) {
+            fprintf(stderr, "Conversion from Unicode, something is wrong.\n");
+            exit(-1);
+        }
+        status = U_ZERO_ERROR;
+        gFileLines[line].unixName = new char[sizeNeeded+1];
+        sizeNeeded = ucnv_fromUChars(cvrtr,
+                                         gFileLines[line].unixName, // ptr to target buffer.
+                                         sizeNeeded+1, // length of target buffer.
+                                         gFileLines[line].name,
+                                         -1,           //  source is null terminated
+                                         &status);
+        if (U_FAILURE(status)) {
+            fprintf(stderr, "ICU Conversion Failed.: %d\n", status);
+            exit(-1);
+        }
+        gFileLines[line].unixName[sizeNeeded] = 0;
+    };
+    ucnv_close(cvrtr);
+}
+
+
+//----------------------------------------------------------------------------------------
+//
+//  class UCharFile   Class to hide all the gorp to read a file in
+//                    and produce a stream of UChars.
+//
+//----------------------------------------------------------------------------------------
+class UCharFile {
+public:
+    UCharFile(const char *fileName);
+    ~UCharFile();
+    UChar   get();
+    UBool   eof() {return fEof;};
+    UBool   error() {return fError;};
+    
+private:
+    UCharFile (const UCharFile &other) {};                         // No copy constructor.
+    UCharFile & operator = (const UCharFile &other) {return *this;};   // No assignment op
+
+    FILE         *fFile;
+    const char   *fName;
+    UBool        fEof;
+    UBool        fError;
+    UChar        fPending2ndSurrogate;
+    
+    enum {UTF16LE, UTF16BE, UTF8} fEncoding;
+};
+
+UCharFile::UCharFile(const char * fileName) {
+    fEof                 = FALSE;
+    fError               = FALSE;
+    fName                = fileName;
+    fFile                = fopen(fName, "rb");
+    fPending2ndSurrogate = 0;
+    if (fFile == NULL) {
+        fprintf(stderr, "Can not open file \"%s\"\n", opt_fName);
+        fError = TRUE;
+        return;
+    }
+    //
+    //  Look for the byte order mark at the start of the file.
+    //
+    int BOMC1, BOMC2, BOMC3;
+    BOMC1 = fgetc(fFile);
+    BOMC2 = fgetc(fFile);
+
+    if (BOMC1 == 0xff && BOMC2 == 0xfe) {
+        fEncoding = UTF16LE; }
+    else if (BOMC1 == 0xfe && BOMC2 == 0xff) {
+        fEncoding = UTF16BE; }
+    else if (BOMC1 == 0xEF && BOMC2 == 0xBB && (BOMC3 = fgetc(fFile)) == 0xBF ) {
+        fEncoding = UTF8; }
+    else
+    {
+        fprintf(stderr, "collperf:  file \"%s\" encoding must be UTF-8 or UTF-16, and "
+            "must include a BOM.\n", fileName);
+        fError = true;
+        return;
+    }
+}
+
+
+UCharFile::~UCharFile() {
+    fclose(fFile);
+}
+
+
+
+UChar UCharFile::get() {
+    UChar   c;
+    switch (fEncoding) {
+    case UTF16LE:
+        {
+            int  cL, cH;
+            cL = fgetc(fFile);
+            cH = fgetc(fFile);
+            c  = cL  | (cH << 8);
+            if (cH == EOF) {
+                c   = 0;
+                fEof = TRUE;
+            }
+            break;
+        }
+    case UTF16BE:
+        {
+            int  cL, cH;
+            cH = fgetc(fFile);
+            cL = fgetc(fFile);
+            c  = cL  | (cH << 8);
+            if (cL == EOF) {
+                c   = 0;
+                fEof = TRUE;
+            }
+            break;
+        }
+    case UTF8:
+        {
+            if (fPending2ndSurrogate != 0) {
+                c = fPending2ndSurrogate;
+                fPending2ndSurrogate = 0;
+                break;
+            }
+            
+            int ch = fgetc(fFile);   // Note:  c and ch are separate cause eof test doesn't work on UChar type.
+            if (ch == EOF) {
+                c = 0;
+                fEof = TRUE;
+                break;
+            }
+            
+            if (ch <= 0x7f) {
+                // It's ascii.  No further utf-8 conversion.
+                c = ch;
+                break;
+            }
+            
+            // Figure out the lenght of the char and read the rest of the bytes
+            //   into a temp array.
+            int nBytes;
+            if (ch >= 0xF0) {nBytes=4;}
+            else if (ch >= 0xE0) {nBytes=3;}
+            else if (ch >= 0xC0) {nBytes=2;}
+            else {
+                fprintf(stderr, "utf-8 encoded file contains corrupt data.\n");
+                fError = TRUE;
+                return 0;
+            }
+            
+            unsigned char  bytes[10];
+            bytes[0] = (unsigned char)ch;
+            int i;
+            for (i=1; i<nBytes; i++) {
+                bytes[i] = fgetc(fFile);
+                if (bytes[i] < 0x80 || bytes[i] >= 0xc0) {
+                    fprintf(stderr, "utf-8 encoded file contains corrupt data.\n");
+                    fError = TRUE;
+                    return 0;
+                }
+            }
+            
+            // Convert the bytes from the temp array to a Unicode char.
+            i = 0;
+            uint32_t  cp;
+            UTF8_NEXT_CHAR_UNSAFE(bytes, i, cp);
+            c = (UChar)cp;
+            
+            if (cp >= 0x10000) {
+                // The code point needs to be broken up into a utf-16 surrogate pair.
+                //  Process first half this time through the main loop, and
+                //   remember the other half for the next time through.
+                UChar utf16Buf[3];
+                i = 0;
+                UTF16_APPEND_CHAR_UNSAFE(utf16Buf, i, cp);
+                fPending2ndSurrogate = utf16Buf[1];
+                c = utf16Buf[0];
+            }
+            break;
+        };
+    }
+    return c;
+}
+
+//----------------------------------------------------------------------------------------
+//
+//   openRulesCollator  - Command line specified a rules file.  Read it in
+//                        and open a collator with it.
+//
+//----------------------------------------------------------------------------------------
+UCollator *openRulesCollator() {
+    UCharFile f(opt_rules);
+    if (f.error()) {
+        return 0;
+    }
+
+    int  bufLen = 10000;
+    UChar *buf = (UChar *)malloc(bufLen * sizeof(UChar));
+    int i = 0;
+
+    for(;;) {
+        buf[i] = f.get();
+        if (f.eof()) {
+            break;
+        }
+        if (f.error()) {
+            return 0;
+        }
+        i++;
+        if (i >= bufLen) {
+            bufLen += 10000;
+            buf = (UChar *)realloc(buf, bufLen);
+        }
+    }
+    buf[i] = 0;
+
+    UErrorCode    status = U_ZERO_ERROR;
+    UCollator *coll = ucol_openRules(buf, u_strlen(buf), UCOL_OFF,
+                                         UCOL_DEFAULT_STRENGTH, NULL, &status);
+    if (U_FAILURE(status)) {
+        fprintf(stderr, "ICU ucol_openRules() open failed.: %d\n", status);
+        return 0;
+    }
+    free(buf);
+    return coll;
+}
+
+
+
+
+
+//----------------------------------------------------------------------------------------
+//
+//    Main   --  process command line, read in and pre-process the test file,
+//                 call other functions to do the actual tests.
+//
+//----------------------------------------------------------------------------------------
+int main(int argc, const char** argv) {
+    if (ProcessOptions(argc, argv, opts) != TRUE || opt_help || opt_fName == 0) {
+        printf(gUsageString);
+        exit (1);
+    }
+
+    // Make sure that we've only got one API selected.
+    if (opt_unix || opt_win) opt_icu = FALSE;
+    if (opt_unix) opt_win = FALSE;
+
+    //
+    //  Set up an ICU collator
+    //
+    UErrorCode          status = U_ZERO_ERROR;
+
+    if (opt_rules != 0) {
+        gCol = openRulesCollator();
+        if (gCol == 0) {return -1;}
+    }
+    else {
+        gCol = ucol_open(opt_locale, &status);
+        if (U_FAILURE(status)) {
+            fprintf(stderr, "Collator creation failed.: %d\n", status);
+            return -1;
+        }
+    }
+    if (status==U_USING_DEFAULT_WARNING && opt_terse==FALSE) {
+        fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", opt_locale);
+    }
+    if (status==U_USING_FALLBACK_WARNING && opt_terse==FALSE) {
+        fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", opt_locale);
+    }
+
+    if (opt_norm) {
+        ucol_setAttribute(gCol, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
+    }
+    if (opt_french && opt_frenchoff) {
+        fprintf(stderr, "collperf:  Error, specified both -french and -frenchoff options.");
+        exit(-1);
+    }
+    if (opt_french) {
+        ucol_setAttribute(gCol, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
+    }
+    if (opt_frenchoff) {
+        ucol_setAttribute(gCol, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
+    }
+    if (opt_lower) {
+        ucol_setAttribute(gCol, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, &status);
+    }
+    if (opt_upper) {
+        ucol_setAttribute(gCol, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, &status);
+    }
+    if (opt_case) {
+        ucol_setAttribute(gCol, UCOL_CASE_LEVEL, UCOL_ON, &status);
+    }
+    if (opt_shifted) {
+        ucol_setAttribute(gCol, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
+    }
+    if (opt_level != 0) {
+        switch (opt_level) {
+        case 1:
+            ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_PRIMARY, &status);
+            break;
+        case 2:
+            ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_SECONDARY, &status);
+            break;
+        case 3:
+            ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_TERTIARY, &status);
+            break;
+        case 4:
+            ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
+            break;
+        case 5:
+            ucol_setAttribute(gCol, UCOL_STRENGTH, UCOL_IDENTICAL, &status);
+            break;
+        default:
+            fprintf(stderr, "-level param must be between 1 and 5\n");
+            exit(-1);
+        }
+    }
+
+    if (U_FAILURE(status)) {
+        fprintf(stderr, "Collator attribute setting failed.: %d\n", status);
+        return -1;
+    }
+
+
+    //
+    //  Set up a Windows LCID
+    //
+    if (opt_langid != 0) {
+        gWinLCID = MAKELCID(opt_langid, SORT_DEFAULT);
+    }
+    else {
+        gWinLCID = uloc_getLCID(opt_locale);
+    }
+
+
+    //
+    //  Set the UNIX locale
+    //
+    if (opt_unix) {
+        if (setlocale(LC_ALL, opt_locale) == 0) {
+            fprintf(stderr, "setlocale(LC_ALL, %s) failed.\n", opt_locale);
+            exit(-1);
+        }
+    }
+
+    // Read in  the input file.
+    //   File assumed to be utf-16.
+    //   Lines go onto heap buffers.  Global index array to line starts is created.
+    //   Lines themselves are null terminated.
+    //
+
+    UCharFile f(opt_fName);
+    if (f.error()) {
+        exit(-1);
+    }
+
+    const int MAXLINES = 40000;
+    gFileLines = new Line[MAXLINES];
+    UChar buf[1024];
+    int   column = 0;
+
+    //  Read the file, split into lines, and save in memory.
+    //  Loop runs once per utf-16 value from the input file,
+    //    (The number of bytes read from file per loop iteration depends on external encoding.)
+    for (;;) {
+
+        UChar c = f.get();
+        if (f.error()){
+            exit(-1);
+        }
+
+
+        // We now have a good UTF-16 value in c.
+
+        // Watch for CR, LF, EOF; these finish off a line.
+        if (c == 0xd) {
+            continue;
+        }
+
+        if (f.eof() || c == 0x0a || c==0x2028) {  // Unipad inserts 2028 line separators!
+            buf[column++] = 0;
+            if (column > 1) {
+                gFileLines[gNumFileLines].name  = new UChar[column];
+                gFileLines[gNumFileLines].len   = column-1;
+                memcpy(gFileLines[gNumFileLines].name, buf, column * sizeof(UChar));
+                gNumFileLines++;
+                column = 0;
+                if (gNumFileLines >= MAXLINES) {
+                    fprintf(stderr, "File too big.  Max number of lines is %d\n", MAXLINES);
+                    exit(-1);
+                }
+
+            }
+            if (c == 0xa || c == 0x2028)
+                continue;
+            else
+                break;  // EOF
+        }
+        buf[column++] = c;
+        if (column >= 1023)
+        {
+            static UBool warnFlag = TRUE;
+            if (warnFlag) {
+                fprintf(stderr, "Warning - file line longer than 1023 chars truncated.\n");
+                warnFlag = FALSE;
+            }
+            column--;
+        }
+    }
+
+    if (opt_terse == FALSE) {
+        printf("file \"%s\", %d lines.\n", opt_fName, gNumFileLines);
+    }
+
+
+    // Convert the lines to the UNIX encoding.
+    if (opt_unix) {
+        UnixConvert();
+    }
+
+    //
+    //  Pre-compute ICU sort keys for the lines of the file.
+    //
+    int line;
+    int t;
+
+    for (line=0; line<gNumFileLines; line++) {
+         t = ucol_getSortKey(gCol, gFileLines[line].name, -1, (unsigned char *)buf, sizeof(buf));
+         gFileLines[line].icuSortKey  = new char[t];
+
+         if (t > sizeof(buf)) {
+             t = ucol_getSortKey(gCol, gFileLines[line].name, -1, (unsigned char *)gFileLines[line].icuSortKey , t);
+         }
+         else
+         {
+             memcpy(gFileLines[line].icuSortKey, buf, t);
+         }
+    }
+
+
+
+    //
+    //  Pre-compute Windows sort keys for the lines of the file.
+    //
+    for (line=0; line<gNumFileLines; line++) {
+         t=LCMapStringW(gWinLCID, LCMAP_SORTKEY, gFileLines[line].name, -1, buf, sizeof(buf));
+         gFileLines[line].winSortKey  = new char[t];
+         if (t > sizeof(buf)) {
+             t = LCMapStringW(gWinLCID, LCMAP_SORTKEY, gFileLines[line].name, -1, (unsigned short *)(gFileLines[line].winSortKey), t);
+         }
+         else
+         {
+             memcpy(gFileLines[line].winSortKey, buf, t);
+         }
+    }
+
+    //
+    //  Pre-compute UNIX sort keys for the lines of the file.
+    //
+    if (opt_unix) {
+        for (line=0; line<gNumFileLines; line++) {
+            t=strxfrm((char *)buf,  gFileLines[line].unixName,  sizeof(buf));
+            gFileLines[line].unixSortKey  = new char[t];
+            if (t > sizeof(buf)) {
+                t = strxfrm(gFileLines[line].unixSortKey,  gFileLines[line].unixName,  sizeof(buf));
+            }
+            else
+            {
+                memcpy(gFileLines[line].unixSortKey, buf, t);
+            }
+        }
+    }
+
+
+    //
+    //  Dump file lines, CEs, Sort Keys if requested.
+    //
+    if (opt_dump) {
+        int  i;
+        for (line=0; line<gNumFileLines; line++) {
+            for (i=0;;i++) {
+                UChar  c = gFileLines[line].name[i];
+                if (c == 0)
+                    break;
+                if (c < 0x20 || c > 0x7e) {
+                    printf("\\u%.4x", c);
+                }
+                else {
+                    printf("%c", c);
+                }
+            }
+            printf("\n");
+
+            printf("   CEs: ");
+            UCollationElements *CEiter = ucol_openElements(gCol, gFileLines[line].name, -1, &status);
+            int32_t ce;
+            i = 0;
+            for (;;) {
+                ce = ucol_next(CEiter, &status);
+                if (ce == UCOL_NULLORDER) {
+                    break;
+                }
+                printf(" %.8x", ce);
+                if (++i > 8) {
+                    printf("\n        ");
+                    i = 0;
+                }
+            }
+            printf("\n");
+            ucol_closeElements(CEiter);
+
+
+            printf("   ICU Sort Key: ");
+            for (i=0; ; i++) {
+                unsigned char c = gFileLines[line].icuSortKey[i];
+                printf("%02x ", c);
+                if (c == 0) {
+                    break;
+                }
+                if (i > 0 && i % 20 == 0) {
+                    printf("\n                 ");
+                }
+           }
+            printf("\n");
+        }
+    }
+
+
+    //
+    //  Pre-sort the lines.
+    //
+    int i;
+    gSortedLines = new Line *[gNumFileLines];
+    for (i=0; i<gNumFileLines; i++) {
+        gSortedLines[i] = &gFileLines[i];
+    }
+
+    if (opt_win) {
+        qsort(gSortedLines, gNumFileLines, sizeof(Line *), Winstrcmp);
+    }
+    else if (opt_unix) {
+        qsort(gSortedLines, gNumFileLines, sizeof(Line *), UNIXstrcmp);
+    }
+    else   /* ICU */
+    {
+        qsort(gSortedLines, gNumFileLines, sizeof(Line *), ICUstrcmp);
+    }
+
+
+    //
+    //  Make up a randomized order, will be used for sorting tests.
+    //
+    gRandomLines = new Line *[gNumFileLines];
+    for (i=0; i<gNumFileLines; i++) {
+        gRandomLines[i] = &gFileLines[i];
+    }
+    qsort(gRandomLines, gNumFileLines, sizeof(Line *), ICURandomCmp);
+
+
+
+
+    //
+    //  We've got the file read into memory.  Go do something with it.
+    //
+
+    if (opt_qsort)     doQSort();
+    if (opt_binsearch) doBinarySearch();
+    if (opt_keygen)    doKeyGen();
+    if (opt_keyhist)   doKeyHist();
+    if (opt_itertest)  doIterTest();
+
+    return 0;
+
+}
diff --git a/icuSources/test/collperf/collperf.dsp b/icuSources/test/collperf/collperf.dsp
new file mode 100644 (file)
index 0000000..4fc8ded
--- /dev/null
@@ -0,0 +1,160 @@
+# Microsoft Developer Studio Project File - Name="collperf" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** DO NOT EDIT **
+
+# TARGTYPE "Win32 (x86) Console Application" 0x0103
+
+CFG=collperf - Win32 Debug
+!MESSAGE This is not a valid makefile. To build this project using NMAKE,
+!MESSAGE use the Export Makefile command and run
+!MESSAGE 
+!MESSAGE NMAKE /f "collperf.mak".
+!MESSAGE 
+!MESSAGE You can specify a configuration when running NMAKE
+!MESSAGE by defining the macro CFG on the command line. For example:
+!MESSAGE 
+!MESSAGE NMAKE /f "collperf.mak" CFG="collperf - Win32 Debug"
+!MESSAGE 
+!MESSAGE Possible choices for configuration are:
+!MESSAGE 
+!MESSAGE "collperf - Win32 Release" (based on "Win32 (x86) Console Application")
+!MESSAGE "collperf - Win32 Debug" (based on "Win32 (x86) Console Application")
+!MESSAGE "collperf - Win64 Release" (based on "Win32 (x86) Console Application")
+!MESSAGE "collperf - Win64 Debug" (based on "Win32 (x86) Console Application")
+!MESSAGE 
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+RSC=rc.exe
+
+!IF  "$(CFG)" == "collperf - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+MTL=midl.exe
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
+# ADD CPP /nologo /G6 /MD /W3 /GX /O2 /Ob2 /I "..\..\..\include" /I "..\..\tools\ctestfw" /I "..\..\common" /I "..\..\i18n" /I "..\..\tools\toolutil" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
+# SUBTRACT CPP /YX
+# ADD BASE RSC /l 0x409 /d "NDEBUG"
+# ADD RSC /l 0x409 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
+# ADD LINK32 icuuc.lib icuin.lib ctestfw.lib icutu.lib kernel32.lib user32.lib advapi32.lib shell32.lib winmm.lib /nologo /subsystem:console /machine:I386 /libpath:"..\..\..\lib\\"
+
+!ELSEIF  "$(CFG)" == "collperf - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+MTL=midl.exe
+# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
+# ADD CPP /nologo /G6 /MDd /W3 /Gm /GX /ZI /Od /I "..\..\..\include" /I "..\..\tools\ctestfw" /I "..\..\common" /I "..\..\i18n" /I "..\..\tools\toolutil" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FR /FD /GZ /c
+# ADD BASE RSC /l 0x409 /d "_DEBUG"
+# ADD RSC /l 0x409 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 icuucd.lib icuind.lib icutud.lib winmm.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"..\..\..\lib\\"
+
+!ELSEIF  "$(CFG)" == "collperf - Win64 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+MTL=midl.exe
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
+# ADD CPP /nologo /MD /W3 /GX /Zi /O2 /Op /I "..\..\..\include" /I "..\..\tools\ctestfw" /I "..\..\common" /I "..\..\i18n" /I "..\..\tools\toolutil" /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /Wp64 /Zm600 /c
+# SUBTRACT CPP /YX
+# ADD BASE RSC /l 0x409 /d "NDEBUG"
+# ADD RSC /l 0x409 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:IX86 /machine:IA64
+# ADD LINK32 icuuc.lib icuin.lib ctestfw.lib icutu.lib kernel32.lib user32.lib advapi32.lib shell32.lib winmm.lib /nologo /subsystem:console /machine:IX86 /libpath:"..\..\..\lib\\" /machine:IA64
+
+!ELSEIF  "$(CFG)" == "collperf - Win64 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+MTL=midl.exe
+# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
+# ADD CPP /nologo /MDd /W3 /Gm /GX /Zi /Od /Op /I "..\..\..\include" /I "..\..\tools\ctestfw" /I "..\..\common" /I "..\..\i18n" /I "..\..\tools\toolutil" /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FR /FD /GZ /Wp64 /Zm600 /c
+# ADD BASE RSC /l 0x409 /d "_DEBUG"
+# ADD RSC /l 0x409 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:IX86 /pdbtype:sept /machine:IA64
+# ADD LINK32 icuucd.lib icuind.lib icutud.lib winmm.lib /nologo /subsystem:console /incremental:no /debug /machine:IX86 /pdbtype:sept /libpath:"..\..\..\lib\\" /machine:IA64
+
+!ENDIF 
+
+# Begin Target
+
+# Name "collperf - Win32 Release"
+# Name "collperf - Win32 Debug"
+# Name "collperf - Win64 Release"
+# Name "collperf - Win64 Debug"
+# Begin Group "Source Files"
+
+# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+# Begin Source File
+
+SOURCE=.\collperf.cpp
+# End Source File
+# End Group
+# Begin Group "Header Files"
+
+# PROP Default_Filter "h;hpp;hxx;hm;inl"
+# End Group
+# Begin Group "Resource Files"
+
+# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+# End Group
+# End Target
+# End Project
diff --git a/icuSources/test/collperf/readme.html b/icuSources/test/collperf/readme.html
new file mode 100644 (file)
index 0000000..40940d1
--- /dev/null
@@ -0,0 +1,84 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2//EN">
+<HTML>
+
+<HEAD>
+       <META HTTP-EQUIV="Content-Type" CONTENT="text/html;CHARSET=iso-8859-1">
+        <meta name="Copyright" content="Copyright (c) 2002, International Business Machines Corporation and others. All Rights Reserved.">
+       <META NAME="GENERATOR" Content="Visual Page 2.0 for Windows">
+       <TITLE>collperf</TITLE>
+</HEAD>
+
+<BODY BGCOLOR="white">
+
+<H2>collperf</H2>
+<P>collperf is a test program for comparing collation performance and key lengths of ICU, Windows native collation
+and Unix/POSIX collation. It operates on a file of lines (names, for example), and performs one of three tests:</P>
+
+<OL>
+       <LI>Sort Key generation. Report on key lengths and key generation times.
+       <LI>Binary search. Report the average time required to look up each of the names (file lines) from the file in
+       a sorted list of all of the names.
+       <LI>Quick Sort. Report the time required to sort the file in memory, using the C library qsort function. The file
+       order is randomized prior to the sort.
+</OL>
+
+<P>
+<P><B>Usage Summary</B></P>
+
+<BLOCKQUOTE>
+       <P>
+       <TABLE BORDER="0" WIDTH="100%" BGCOLOR="#FFFBF0">
+               <TR>
+                       <TD WIDTH="100%">
+                               <P><TT>collperf -help</TT>
+                               <BLOCKQUOTE>
+                                       <PRE><TT>Usage: strperf options...
+-help                  Display this message.
+-file file_name        utf-16 format file of names
+-locale name           ICU locale to use. Default is en_US
+-langid 0x1234         Windows Language ID number. Default 0x409 (en_US)
+                       see http://msdn.microsoft.com/library/psdk/winbase/nls_8xo3.htm
+-win                   Run test using Windows native services. (ICU is default)
+-unix                  Run test using Unix strxfrm, strcoll services.
+-uselen                Use API with string lengths. Default is null-terminated strings
+-usekeys               Run tests using sortkeys rather than strcoll
+-loop nnnn             Loopcount for test. Adjust for reasonable total running time.
+-terse                 Terse numbers-only output. Intended for use by scripts.
+-french                French accent ordering
+-norm                  Normalizing mode on
+-shifted               Shifted mode
+-lower                 Lower case first
+-upper                 Upper case first
+-case                  Enable separate case level
+-level n               Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical
+-binsearch             Binary Search timing test
+-keygen                Sort Key Generation timing test
+-qsort                 Quicksort timing test</TT></PRE>
+                               </BLOCKQUOTE>
+                       </TD>
+               </TR>
+       </TABLE>
+</P>
+</BLOCKQUOTE>
+
+<P><B>Example</B></P>
+
+<BLOCKQUOTE>
+       <P>
+       <TABLE BORDER="0" WIDTH="100%" BGCOLOR="#FFFBF0">
+               <TR>
+                       <TD WIDTH="100%"><TT>C:\&gt;collperf -loop 200 -file latin.txt -keygen -shifted -level 4<BR>
+                               file &quot;latin.txt&quot;, 7604 lines.<BR>
+                               Sort Key Generation: total # of keys = 197704<BR>
+                               Sort Key Generation: time per key = 4253 ns<BR>
+                               Key Length / character = 1.730054</TT></TD>
+               </TR>
+       </TABLE>
+
+</BLOCKQUOTE>
+
+<P>
+
+</BODY>
+
+</HTML>
\ No newline at end of file
diff --git a/icuSources/test/perf/all/all.dsp b/icuSources/test/perf/all/all.dsp
new file mode 100644 (file)
index 0000000..83d50f6
--- /dev/null
@@ -0,0 +1,63 @@
+# Microsoft Developer Studio Project File - Name="all" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** DO NOT EDIT **
+
+# TARGTYPE "Win32 (x86) Generic Project" 0x010a
+
+CFG=all - Win32 Debug
+!MESSAGE This is not a valid makefile. To build this project using NMAKE,
+!MESSAGE use the Export Makefile command and run
+!MESSAGE 
+!MESSAGE NMAKE /f "all.mak".
+!MESSAGE 
+!MESSAGE You can specify a configuration when running NMAKE
+!MESSAGE by defining the macro CFG on the command line. For example:
+!MESSAGE 
+!MESSAGE NMAKE /f "all.mak" CFG="all - Win32 Debug"
+!MESSAGE 
+!MESSAGE Possible choices for configuration are:
+!MESSAGE 
+!MESSAGE "all - Win32 Release" (based on "Win32 (x86) Generic Project")
+!MESSAGE "all - Win32 Debug" (based on "Win32 (x86) Generic Project")
+!MESSAGE 
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+MTL=midl.exe
+
+!IF  "$(CFG)" == "all - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Target_Dir ""
+
+!ELSEIF  "$(CFG)" == "all - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Target_Dir ""
+
+!ENDIF 
+
+# Begin Target
+
+# Name "all - Win32 Release"
+# Name "all - Win32 Debug"
+# End Target
+# End Project
diff --git a/icuSources/test/unalignedtest/Makefile.in b/icuSources/test/unalignedtest/Makefile.in
new file mode 100644 (file)
index 0000000..d4fa022
--- /dev/null
@@ -0,0 +1,83 @@
+## Makefile.in for ICU - test/unalignedtest
+## Copyright (c) 2001, International Business Machines Corporation and
+## others. All Rights Reserved.
+
+## Source directory information
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+
+top_builddir = ../..
+
+include $(top_builddir)/icudefs.mk
+
+## Build directory information
+subdir = test/unalignedtest
+
+## Extra files to remove for 'make clean'
+CLEANFILES = *~ $(DEPS)
+
+## Target information
+TARGET = unalignedtest
+
+DEFS += -I$(top_builddir)/common -I$(top_srcdir)/common
+LIBS = $(LIBICUUC) $(DEFAULT_LIBS) $(LIB_M)
+
+OBJECTS = unaligned.o
+
+DEPS = $(OBJECTS:.o=.d)
+
+## List of phony targets
+.PHONY : all all-local install install-local clean clean-local \
+distclean distclean-local dist dist-local check check-local
+
+## Clear suffix list
+.SUFFIXES :
+
+## List of standard targets
+all: all-local
+install: install-local
+clean: clean-local
+distclean : distclean-local
+dist: dist-local
+check: all check-local
+
+all-local: $(TARGET)
+
+install-local:
+
+dist-local:
+
+clean-local:
+       test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
+       $(RMV) $(OBJECTS) $(TARGET)
+
+distclean-local: clean-local
+       $(RMV) Makefile
+
+check-local: all-local check-cintltst check-intltest
+
+check-cintltst:
+       - $(INVOKE) ./$(TARGET) $(top_srcdir)/test/cintltst/cintltst -a
+
+check-intltest:
+       - $(INVOKE) ./$(TARGET) $(top_srcdir)/test/intltest/intltest -a
+
+Makefile: $(srcdir)/Makefile.in  $(top_builddir)/config.status
+       cd $(top_builddir) \
+        && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+$(TARGET) : $(OBJECTS)
+       $(LINK.cc) -o $@ $^ 
+
+invoke:
+       ICU_DATA=$${ICU_DATA:-$(top_builddir)/data/} TZ=PST8PDT $(INVOKE) $(INVOCATION)
+
+ifeq (,$(MAKECMDGOALS))
+-include $(DEPS)
+else
+ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
+ifneq ($(patsubst %install,,$(MAKECMDGOALS)),)
+-include $(DEPS)
+endif
+endif
+endif
diff --git a/icuSources/test/unalignedtest/readme b/icuSources/test/unalignedtest/readme
new file mode 100644 (file)
index 0000000..acf10a1
--- /dev/null
@@ -0,0 +1,27 @@
+Copyright (c) 2002-2003, International Business Machines Corporation and others. All Rights Reserved.
+Unalignedtest
+=============
+
+This is a test to find if ICU is 64 bit clean. This test runs cintltst and intltest through gdb and produces SIGBUS fault
+whenever the kernel encounters a unaligned trap. 
+
+Build and Usage
+==================
+To build this test:
+
+i)   Build and test ICU
+
+ii)  cd to <icu>/source and run the following command to build the Makefile
+CONFIG_FILES=./test/unalignedtest/Makefile CONFIG_HEADERS= ./config.status
+
+iii) cd to <icu>/source/test/unalignedtest and run 'make' to build the executable
+
+iv) For testing cintltst run 'make check-cintltst'
+
+v) For testing intltest run 'make check-intltest'
+
+vi) To find out if any of the tools are performing unaligned traps:
+       a) cd to <icu>/source/data
+       b) run 'make clean'
+       c) run 'env LEAK_CHECKER="<icu>/source/test/unalignedtest/unalignedtest -b" make' 
+
diff --git a/icuSources/test/unalignedtest/unaligned.c b/icuSources/test/unalignedtest/unaligned.c
new file mode 100644 (file)
index 0000000..69137a3
--- /dev/null
@@ -0,0 +1,304 @@
+/*
+
+    This program is a wrapper to assist in debugging analigned traps on the Alpha
+
+    architectures.
+
+
+
+    COPYRIGHT AND PERMISSION NOTICE
+
+
+
+    Copyright (c) 2002 Sean Hunter
+
+
+
+    Permission is hereby granted, free of charge, to any person obtaining a
+
+    copy of this software and associated documentation files (the
+
+    "Software"), to deal in the Software without restriction, including
+
+    without limitation the rights to use, copy, modify, merge, publish,
+
+    distribute, and/or sell copies of the Software, and to permit persons
+
+    to whom the Software is furnished to do so, provided that the above
+
+    copyright notice(s) and this permission notice appear in all copies of
+
+    the Software and that both the above copyright notice(s) and this
+
+    permission notice appear in supporting documentation.
+
+
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
+
+    OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+
+    HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
+
+    INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
+
+    FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+
+    NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+
+    WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+
+
+    Except as contained in this notice, the name of a copyright holder
+
+    shall not be used in advertising or otherwise to promote the sale, use
+
+    or other dealings in this Software without prior written authorization
+
+    of the copyright holder.
+
+
+
+    --------------------------------------------------------------------------------
+
+    All trademarks and registered trademarks mentioned herein are the property
+
+    of their respective owners.  
+
+
+
+*/
+
+#include <errno.h>
+
+#include <stdio.h>
+
+
+
+#include <asm/sysinfo.h>
+
+#include <asm/unistd.h>
+
+
+
+#define TMP_PATH_MAX 1024
+
+
+
+
+
+static int 
+
+setsysinfo(unsigned long op, void *buffer, unsigned long size,
+
+                     int *start, void *arg, unsigned long flag)
+
+{
+
+       syscall(__NR_osf_setsysinfo, op, buffer, size, start, arg, flag);
+
+}
+
+
+
+
+
+void 
+
+trap_unaligned(void)
+
+{
+
+       unsigned int buf[2];
+
+       buf[0] = SSIN_UACPROC;
+
+       buf[1] = UAC_SIGBUS | UAC_NOPRINT;
+
+       setsysinfo(SSI_NVPAIRS, buf, 1, 0, 0, 0);
+
+}
+
+
+
+
+
+static void 
+
+usage(void)
+
+{
+
+        fprintf(stderr,
+
+               "usage: unaligned [-b] <command-path> [command-args...]\n\n"
+
+               "  This program is designed to assist debugging of\n"
+
+               "  unaligned traps by running the program in gdb\n"
+
+               "  and causing it to get SIGBUS when it encounters\n"
+
+               "  an unaligned trap.\n\n"
+
+               "  It is free software written by Sean Hunter <sean@uncarved.co.uk>\n"
+
+               "  based on code by Richard Henderson and Andrew Morgan.\n\n"
+
+       );
+
+
+
+       exit(1);
+
+}
+
+
+
+
+
+int 
+
+main(int argc, char **argv)
+
+{
+
+       const char my_debugger[] = "/usr/bin/gdb";
+
+
+
+       char *temp_str;
+
+       char *curr;
+
+       int size = 0;
+
+       int curr_arg;
+
+       int isBatchMode = 0;
+
+
+
+       /* check that we have at least 1 argument */
+
+       if (argc < 2) {
+
+               usage();
+
+       }
+
+       if( strcmp("-b" , argv[1]) == 0 ){
+
+           isBatchMode = 1;
+
+           curr_arg = 2;
+
+        }else{
+
+           curr_arg = 1;
+
+       }        
+
+
+
+       trap_unaligned();
+
+
+
+       if (argc > 2) {
+
+               /* We're going to use bash process redirection to create a "file" for gdb to read
+
+                * containing the arguments we need */
+
+               size = 2048;
+
+               for(; curr_arg < argc; curr_arg++) {
+
+                       size += strlen(argv[curr_arg]);
+
+               }
+
+               temp_str = (char *) malloc(sizeof(char) * size);
+
+               if (!temp_str) {
+
+                   fprintf(stderr, "Unable to malloc memory for string use: %s\n", strerror(errno));
+
+                   exit(255);
+
+               }
+
+               if(isBatchMode==1){
+
+                       sprintf(temp_str, "%s -batch %s -x <( echo file %s; echo set args", my_debugger, argv[2], argv[2]);
+
+               }else{
+
+                       sprintf(temp_str, "%s %s -x <( echo file %s; echo set args", my_debugger, argv[1], argv[1]);
+
+               }
+
+               curr = temp_str + strlen(temp_str);
+
+               for(curr_arg = 2; curr_arg < argc; curr_arg++) {
+
+                       sprintf(curr, " %s", argv[curr_arg]);
+
+                       curr = temp_str + strlen(temp_str);
+
+               }
+
+#ifndef NOAUTORUN
+
+               curr = temp_str + strlen(temp_str);
+
+               sprintf(curr, "; echo run");
+
+#endif 
+
+               curr = temp_str + strlen(temp_str);
+
+               sprintf(curr, ")");
+
+
+
+               execlp("/bin/bash", "/bin/bash", "-c", temp_str, NULL);
+
+
+
+       }
+
+       else {
+
+               execlp(my_debugger, my_debugger, argv[1], NULL);
+
+       }       
+
+
+
+       /* if we fall through to here, our exec failed -- announce the fact */
+
+       fprintf(stderr, "Unable to execute command: %s\n", strerror(errno));
+
+
+
+       usage();
+
+
+
+}
+
+
+
+/* use gcc unaligned.c -o unaliged to compile.  Add -DNOAUTORUN if you
+
+don't want gdb to automatically run the program */
+
+
+
diff --git a/icuSources/test/usetperf/bitset.cpp b/icuSources/test/usetperf/bitset.cpp
new file mode 100644 (file)
index 0000000..d1356c0
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+**********************************************************************
+* Copyright (c) 2002-2004, International Business Machines
+* Corporation and others.  All Rights Reserved.
+**********************************************************************
+* 2002-09-20 aliu Created.
+*/
+
+#include "unicode/utypes.h"
+#include "cmemory.h"
+#include "bitset.h"
+
+// TODO: have a separate capacity, so the len can just be set to
+// zero in the clearAll() method, and growth can be smarter.
+
+const int32_t SLOP = 8;
+
+const int32_t BYTES_PER_WORD = sizeof(int32_t);
+
+BitSet::BitSet() {
+    len = SLOP;
+    data = (int32_t*) uprv_malloc(len * BYTES_PER_WORD);
+    clearAll();
+}
+
+BitSet::~BitSet() {
+    uprv_free(data);
+}
+
+UBool BitSet::get(int32_t bitIndex) const {
+    uint32_t longIndex = bitIndex >> 5;
+    int32_t bitInLong = bitIndex & 0x1F;
+    return (longIndex < len) ? (((data[longIndex] >> bitInLong) & 1) != 0)
+        : FALSE;
+}
+
+void BitSet::set(int32_t bitIndex) {
+    uint32_t longIndex = bitIndex >> 5;
+    int32_t bitInLong = bitIndex & 0x1F;
+    if (longIndex >= len) {
+        ensureCapacity(longIndex+1);
+    }
+    data[longIndex] |= (1 << bitInLong);
+}
+
+void BitSet::clearAll() {
+    for (uint32_t i=0; i<len; ++i) data[i] = 0;
+}
+
+void BitSet::ensureCapacity(uint32_t minLen) {
+    uint32_t newLen = len;
+    while (newLen < minLen) newLen <<= 1; // grow exponentially
+    int32_t* newData = (int32_t*) uprv_malloc(newLen * BYTES_PER_WORD);
+    uprv_memcpy(newData, data, len * BYTES_PER_WORD);
+    uprv_free(data);
+    data = newData;
+    int32_t* p = data + len;
+    int32_t* limit = data + newLen;
+    while (p < limit) *p++ = 0;
+    len = newLen;
+}
+
+//eof
diff --git a/icuSources/test/usetperf/bitset.h b/icuSources/test/usetperf/bitset.h
new file mode 100644 (file)
index 0000000..8b7122d
--- /dev/null
@@ -0,0 +1,38 @@
+/*
+**********************************************************************
+* Copyright (c) 2002-2004, International Business Machines
+* Corporation and others.  All Rights Reserved.
+**********************************************************************
+* 2002-09-20 aliu Created.
+*/
+#ifndef __BITSET_H__
+#define __BITSET_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * A simple, limited clone of the java.util.BitSet.
+ */
+class BitSet {
+
+    uint32_t len;
+    int32_t* data;
+
+    void ensureCapacity(uint32_t minLen);
+
+public:
+
+    BitSet();
+    ~BitSet();
+
+    UBool get(int32_t bitIndex) const;
+
+    void set(int32_t bitIndex);
+
+    // Non-java
+    void clearAll();
+
+    // TODO add other methods as needed.
+};
+
+#endif
diff --git a/icuSources/test/usetperf/timer.h b/icuSources/test/usetperf/timer.h
new file mode 100644 (file)
index 0000000..4aa5824
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+**********************************************************************
+* Copyright (c) 2002-2004, International Business Machines
+* Corporation and others.  All Rights Reserved.
+**********************************************************************
+* 2002-09-20 aliu Created.
+*/
+#ifndef __PERFTIMER_H__
+#define __PERFTIMER_H__
+
+#include "unicode/utypes.h"
+
+// Derived from Ram's perftime.h
+
+//----------------------------------------------------------------------
+// Win32
+
+#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
+
+#include <windows.h>
+
+class Timer {
+    LARGE_INTEGER tstart, tend;
+public:
+    Timer() {}
+    inline void start() {
+        QueryPerformanceCounter(&tstart);
+    }
+    inline double stop() {
+        QueryPerformanceCounter(&tend);
+        LARGE_INTEGER freq;
+        int result = QueryPerformanceFrequency(&freq);
+        return ((double)(tend.QuadPart - tstart.QuadPart))/((double)freq.QuadPart);
+    }
+};
+    
+//----------------------------------------------------------------------
+// UNIX
+
+#else
+
+#include <sys/time.h> 
+
+class Timer {
+    struct timeval tstart, tend;
+    struct timezone tz;
+public:
+    Timer() {}
+    inline void start() {
+        gettimeofday(&tstart, &tz);
+    }
+    inline double stop() {
+        gettimeofday(&tend, &tz);
+        double t1, t2;
+        t1 = (double)tstart.tv_sec + (double)tstart.tv_usec*1e-6;
+        t2 = (double)tend.tv_sec + (double)tend.tv_usec*1e-6;
+        return t2-t1;
+    }
+};
+
+#endif
+#endif
diff --git a/icuSources/test/usetperf/usetperf.cpp b/icuSources/test/usetperf/usetperf.cpp
new file mode 100644 (file)
index 0000000..cdff2ad
--- /dev/null
@@ -0,0 +1,122 @@
+/*
+**********************************************************************
+* Copyright (c) 2002-2004, International Business Machines
+* Corporation and others.  All Rights Reserved.
+**********************************************************************
+* 2002-09-20 aliu Created.
+*/
+
+#include <stdio.h>
+
+#include "unicode/utypes.h"
+#include "unicode/uniset.h"
+#include "unicode/uchar.h"
+#include "unicode/usetiter.h"
+#include "bitset.h"
+#include "timer.h"
+
+#define LENGTH(a) (sizeof(a)/sizeof(a[0]))
+
+int main(int argc, const char *argv[]) {
+
+    Timer timer;
+    BitSet bs;
+    UnicodeSet us;
+    int32_t i, j, n, temp;
+    UChar32 cp;
+    double t;
+
+    int32_t PROPS[] = {
+        // category         iterations for add, contains, iterator
+        U_TITLECASE_LETTER, 100, 100, 20000000,
+        U_UNASSIGNED,       30, 100, 20000000,
+    };
+
+    for (j=0; j<LENGTH(PROPS); j+=4) {
+        UCharCategory prop = (UCharCategory) PROPS[j];
+
+        printf("\nGetting characters for character category %d\n", prop);
+        bs.clearAll();
+        int32_t total = 0;
+        for (cp=0; cp<0x110000; ++cp) {
+            if (u_charType(cp) == prop) {
+                bs.set((int32_t) cp);
+                ++total;
+            }
+        }
+        printf("Total characters: %d\n", total);
+        
+        // add()
+        n = PROPS[j+1];
+        printf("Testing add() x %d...", n);
+        timer.start();
+        for (i=0; i<n; ++i) {
+            us.clear();
+            for (cp=0; cp<0x110000; ++cp) {
+                if (bs.get((int32_t) cp)) {
+                    us.add(cp);
+                }
+            }
+        }
+        t = timer.stop();
+        printf("result: %f sec => %f ms/loop\n", t, t*1e3/n);
+
+        // contains()
+        n = PROPS[j+2];
+        printf("Testing contains() x %d...", n);
+        temp = 0;
+        timer.start();
+        for (i=0; i<n; ++i) {
+            us.clear();
+            for (cp=0; cp<0x110000; ++cp) {
+                if (us.contains(cp)) {
+                    temp += cp;
+                }
+            }
+        }
+        t = timer.stop();
+        printf("result: %f sec => %f ms/loop\n", t, t*1e3/n);
+        
+        // iterator
+        n = PROPS[j+3];
+        printf("Testing iterator x %d...", n);
+        temp = 0;
+        timer.start();
+        for (i=0; i<n; ++i) {
+            UnicodeSetIterator uit(us);
+            while (uit.next()) {
+                temp += uit.getCodepoint();
+            }
+        }
+        t = timer.stop();
+        printf("result: %f sec => %f ns/loop\n", t, t*1e9/n);
+    }
+
+    char* PAT[] = {
+        "['A-Za-z\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6\\u00D9-\\u00DD\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6\\u00F9-\\u00FD\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130\\u0134-\\u0137\\u0139-\\u013E\\u0143-\\u0148\\u014C-\\u0151\\u0154-\\u0165\\u0168-\\u017E\\u01A0-\\u01A1\\u01AF-\\u01B0\\u01CD-\\u01DC\\u01DE-\\u01E1\\u01E6-\\u01ED\\u01F0\\u01F4-\\u01F5\\u01F8-\\u01FB\\u0200-\\u021B\\u021E-\\u021F\\u0226-\\u0233\\u1E00-\\u1E99\\u1EA0-\\u1EF9\\u212A-\\u212B]",
+
+        "['.0-9A-Za-z~\\u00C0-\\u00C5\\u00C7-\\u00CF\\u00D1-\\u00D6\\u00D9-\\u00DD\\u00E0-\\u00E5\\u00E7-\\u00EF\\u00F1-\\u00F6\\u00F9-\\u00FD\\u00FF-\\u010F\\u0112-\\u0125\\u0128-\\u0130\\u0134-\\u0137\\u0139-\\u013E\\u0143-\\u0148\\u014C-\\u0151\\u0154-\\u0165\\u0168-\\u017E\\u01A0-\\u01A1\\u01AF-\\u01B0\\u01CD-\\u01DC\\u01DE-\\u01E3\\u01E6-\\u01ED\\u01F0\\u01F4-\\u01F5\\u01F8-\\u021B\\u021E-\\u021F\\u0226-\\u0233\\u0301\\u0303-\\u0304\\u0306-\\u0307\\u0310\\u0314-\\u0315\\u0323\\u0325\\u0331\\u0341\\u0344\\u0385-\\u0386\\u0388-\\u038A\\u038C\\u038E-\\u0390\\u03AC-\\u03B0\\u03CC-\\u03CE\\u03D3\\u0403\\u040C\\u040E\\u0419\\u0439\\u0453\\u045C\\u045E\\u04C1-\\u04C2\\u04D0-\\u04D1\\u04D6-\\u04D7\\u04E2-\\u04E3\\u04EE-\\u04EF\\u1E00-\\u1E99\\u1EA0-\\u1EF9\\u1F01\\u1F03-\\u1F05\\u1F07\\u1F09\\u1F0B-\\u1F0D\\u1F0F\\u1F11\\u1F13-\\u1F15\\u1F19\\u1F1B-\\u1F1D\\u1F21\\u1F23-\\u1F25\\u1F27\\u1F29\\u1F2B-\\u1F2D\\u1F2F\\u1F31\\u1F33-\\u1F35\\u1F37\\u1F39\\u1F3B-\\u1F3D\\u1F3F\\u1F41\\u1F43-\\u1F45\\u1F49\\u1F4B-\\u1F4D\\u1F51\\u1F53-\\u1F55\\u1F57\\u1F59\\u1F5B\\u1F5D\\u1F5F\\u1F61\\u1F63-\\u1F65\\u1F67\\u1F69\\u1F6B-\\u1F6D\\u1F6F\\u1F71\\u1F73\\u1F75\\u1F77\\u1F79\\u1F7B\\u1F7D\\u1F81\\u1F83-\\u1F85\\u1F87\\u1F89\\u1F8B-\\u1F8D\\u1F8F\\u1F91\\u1F93-\\u1F95\\u1F97\\u1F99\\u1F9B-\\u1F9D\\u1F9F\\u1FA1\\u1FA3-\\u1FA5\\u1FA7\\u1FA9\\u1FAB-\\u1FAD\\u1FAF-\\u1FB1\\u1FB4\\u1FB8-\\u1FB9\\u1FBB\\u1FC4\\u1FC9\\u1FCB\\u1FCE\\u1FD0-\\u1FD1\\u1FD3\\u1FD8-\\u1FD9\\u1FDB\\u1FDE\\u1FE0-\\u1FE1\\u1FE3\\u1FE5\\u1FE8-\\u1FE9\\u1FEB-\\u1FEC\\u1FEE\\u1FF4\\u1FF9\\u1FFB\\u212A-\\u212B\\uE04D\\uE064]",
+
+        "[\\u0901-\\u0903\\u0905-\\u0939\\u093C-\\u094D\\u0950-\\u0954\\u0958-\\u096F]",
+    };
+
+    UErrorCode ec = U_ZERO_ERROR;
+
+    n = 2000;
+
+    for (j=0; j<LENGTH(PAT); ++j) {
+
+        printf("\nApplying pattern %s x %d...", PAT[j], n);
+        UnicodeSet set;
+        UnicodeString pat(PAT[j], "");
+
+        timer.start(); 
+        for (i=0; i<n; i++) {
+            set.applyPattern(pat, ec);
+        }
+        t = timer.stop();
+        printf("result: %f sec => %f us/loop\n", t, t*1e6/n);
+    }        
+
+    return 0;
+}
diff --git a/icuSources/test/usetperf/usetperf.dsp b/icuSources/test/usetperf/usetperf.dsp
new file mode 100644 (file)
index 0000000..3d01228
--- /dev/null
@@ -0,0 +1,164 @@
+# Microsoft Developer Studio Project File - Name="usetperf" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** DO NOT EDIT **
+
+# TARGTYPE "Win32 (x86) Console Application" 0x0103
+
+CFG=usetperf - Win32 Debug
+!MESSAGE This is not a valid makefile. To build this project using NMAKE,
+!MESSAGE use the Export Makefile command and run
+!MESSAGE 
+!MESSAGE NMAKE /f "usetperf.mak".
+!MESSAGE 
+!MESSAGE You can specify a configuration when running NMAKE
+!MESSAGE by defining the macro CFG on the command line. For example:
+!MESSAGE 
+!MESSAGE NMAKE /f "usetperf.mak" CFG="usetperf - Win32 Debug"
+!MESSAGE 
+!MESSAGE Possible choices for configuration are:
+!MESSAGE 
+!MESSAGE "usetperf - Win32 Release" (based on "Win32 (x86) Console Application")
+!MESSAGE "usetperf - Win32 Debug" (based on "Win32 (x86) Console Application")
+!MESSAGE "usetperf - Win64 Release" (based on "Win32 (x86) Console Application")
+!MESSAGE "usetperf - Win64 Debug" (based on "Win32 (x86) Console Application")
+!MESSAGE 
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+RSC=rc.exe
+
+!IF  "$(CFG)" == "usetperf - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+MTL=midl.exe
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
+# ADD CPP /nologo /G6 /MD /W3 /GX /O2 /I "..\..\..\include" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
+# ADD BASE RSC /l 0x409 /d "NDEBUG"
+# ADD RSC /l 0x409 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
+# ADD LINK32 icuuc.lib kernel32.lib user32.lib gdi32.lib winmm.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 /libpath:"..\..\..\lib"
+
+!ELSEIF  "$(CFG)" == "usetperf - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+MTL=midl.exe
+# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
+# ADD CPP /nologo /G6 /MDd /W3 /Gm /GX /ZI /Od /I "..\..\..\include" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
+# ADD BASE RSC /l 0x409 /d "_DEBUG"
+# ADD RSC /l 0x409 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 icuucd.lib winmm.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"..\..\..\lib"
+
+!ELSEIF  "$(CFG)" == "usetperf - Win64 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+MTL=midl.exe
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
+# ADD CPP /nologo /MD /W3 /GX /Zi /O2 /Op /I "..\..\..\include" /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /Wp64 /Zm600 /c
+# ADD BASE RSC /l 0x409 /d "NDEBUG"
+# ADD RSC /l 0x409 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:IX86 /machine:IA64
+# ADD LINK32 icuuc.lib kernel32.lib user32.lib gdi32.lib winmm.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:IX86 /libpath:"..\..\..\lib" /machine:IA64
+
+!ELSEIF  "$(CFG)" == "usetperf - Win64 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+MTL=midl.exe
+# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
+# ADD CPP /nologo /MDd /W3 /Gm /GX /Zi /Od /Op /I "..\..\..\include" /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /GZ /Wp64 /Zm600 /c
+# ADD BASE RSC /l 0x409 /d "_DEBUG"
+# ADD RSC /l 0x409 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:IX86 /pdbtype:sept /machine:IA64
+# ADD LINK32 icuucd.lib winmm.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /incremental:no /debug /machine:IX86 /pdbtype:sept /libpath:"..\..\..\lib" /machine:IA64
+
+!ENDIF 
+
+# Begin Target
+
+# Name "usetperf - Win32 Release"
+# Name "usetperf - Win32 Debug"
+# Name "usetperf - Win64 Release"
+# Name "usetperf - Win64 Debug"
+# Begin Group "Source Files"
+
+# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+# Begin Source File
+
+SOURCE=.\bitset.cpp
+# ADD CPP /I "..\..\common"
+# End Source File
+# Begin Source File
+
+SOURCE=.\usetperf.cpp
+# ADD CPP /I "..\..\common"
+# End Source File
+# End Group
+# Begin Group "Header Files"
+
+# PROP Default_Filter "h;hpp;hxx;hm;inl"
+# End Group
+# Begin Group "Resource Files"
+
+# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+# End Group
+# End Target
+# End Project
diff --git a/icuSources/test/utfperf/utfperf.c b/icuSources/test/utfperf/utfperf.c
new file mode 100644 (file)
index 0000000..dfe7e3f
--- /dev/null
@@ -0,0 +1,450 @@
+/*  
+**********************************************************************
+*   Copyright (C) 2002, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+**********************************************************************
+*   file name:  utfperf.c
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2002apr17
+*   created by: Markus W. Scherer
+*
+*   Performance test program for Unicode converters
+*   (converters that support all Unicode code points).
+*   Takes a UTF-8 file as input.
+*/
+
+#include <stdio.h>
+#include <string.h>
+
+#include <fcntl.h>     /* for _O_BINARY */
+#include <io.h>                /* for _setmode() */
+
+#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
+#   include <windows.h>
+#else
+#   include <sys/time.h>
+    static unsigned long
+    timeGetTime() {
+        struct timeval t;
+
+        gettimeofday(&t, 0);
+        return t.tv_sec*1000+t.tv_usec/1000;
+    };
+#endif
+
+#include "unicode/utypes.h"
+#include "unicode/ucnv.h"
+#include "unicode/ustring.h"
+
+/* definitions and text buffers */
+
+#define INPUT_CAPACITY (1024*1024)
+#define INTERMEDIATE_CAPACITY 4096
+#define INTERMEDIATE_SMALL_CAPACITY 20
+#define OUTPUT_CAPACITY INPUT_CAPACITY
+
+#define TARGET_MEASURE_TIME_MS 2000
+
+#define PERCENT(a, b) (int)(((a)*200+1)/(2*(b)))
+
+#define ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0]))
+
+static UChar input[INPUT_CAPACITY], output[OUTPUT_CAPACITY];
+static char intermediate[INTERMEDIATE_CAPACITY];
+
+static int32_t inputLength, encodedLength, outputLength, countInputCodePoints;
+
+static int32_t utf8Length=0;
+static double utf8Time=0.;
+
+static const char *const
+utfNames[]={
+    "UTF-8", /* UTF-8 should always be first to serve as percentage reference */
+    "SCSU", "BOCU-1" /*, "CESU-8" *//*, "UTF-16BE", "UTF-16LE"*//*, "GB18030"*/
+};
+
+/* functions */
+
+typedef void
+RoundtripFn(UConverter *cnv, int32_t intermediateCapacity, UErrorCode *pErrorCode);
+
+static void
+roundtrip(UConverter *cnv, int32_t intermediateCapacity, UErrorCode *pErrorCode) {
+    const UChar *pIn, *pInLimit;
+    UChar *pOut, *pOutLimit;
+    char *pInter, *pInterLimit, *p;
+    UBool flush;
+
+    ucnv_reset(cnv);
+
+    pIn=input;
+    pInLimit=input+inputLength;
+
+    pOut=output;
+    pOutLimit=output+OUTPUT_CAPACITY;
+
+    pInterLimit=intermediate+intermediateCapacity;
+
+    encodedLength=outputLength=0;
+    flush=FALSE;
+
+    while(pIn<pInLimit || !flush) {
+        /* convert a block of [pIn..pInLimit[ to the encoding in intermediate[] */
+        pInter=intermediate;
+        flush=(UBool)(pIn==pInLimit);
+        ucnv_fromUnicode(cnv,
+                         &pInter, pInterLimit,
+                         &pIn, pInLimit,
+                         NULL, flush,
+                         pErrorCode);
+        encodedLength+=(int32_t)(pInter-intermediate);
+
+        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
+            /* in case flush was TRUE make sure that we convert once more to really flush */
+            flush=FALSE;
+            *pErrorCode=U_ZERO_ERROR;
+        } else if(U_FAILURE(*pErrorCode)) {
+            return;
+        }
+
+        /* convert the block [intermediate..pInter[ back to UTF-16 */
+        p=intermediate;
+        ucnv_toUnicode(cnv,
+                       &pOut, pOutLimit,
+                       &p, pInter,
+                       NULL, flush,
+                       pErrorCode);
+        if(U_FAILURE(*pErrorCode)) {
+            return;
+        }
+        /* intermediate must have been consumed (p==pInter) because of the converter semantics */
+    }
+
+    outputLength=pOut-output;
+    if(inputLength!=outputLength) {
+        fprintf(stderr, "error: roundtrip failed, inputLength %d!=outputLength %d\n", inputLength, outputLength);
+        *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
+    }
+}
+
+static void
+noop(UConverter *cnv, int32_t intermediateCapacity, UErrorCode *pErrorCode) {
+    /* do nothing */
+}
+
+static unsigned long
+measureRoundtrips(RoundtripFn *fn, UConverter *cnv, const char *encName, int32_t intermediateCapacity, int32_t n) {
+    unsigned long _time;
+    UErrorCode errorCode;
+
+    _time=timeGetTime();
+    errorCode=U_ZERO_ERROR;
+    do {
+        fn(cnv, intermediateCapacity, &errorCode);
+    } while(U_SUCCESS(errorCode) && --n>0);
+    _time=timeGetTime()-_time;
+
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr, "error in roundtrip conversion (%s): %s\n", encName, u_errorName(errorCode));
+        return 0x7fffffff;
+    }
+
+    if(0!=u_memcmp(input, output, inputLength)) {
+        fprintf(stderr, "error: roundtrip failed, input[]!=output[]\n");
+        return 0x7fffffff;
+    }
+
+    return _time;
+}
+
+static void
+perEncAndCapacity(UConverter *cnv, const char *encName, int32_t intermediateCapacity) {
+    double rtTime;
+    unsigned long _time;
+    int32_t n;
+
+    /*printf("test performance for %s with intermediate capacity %d\n", encName, intermediateCapacity);*/
+
+    /* warm up caches and estimate loop time */
+    n=10;
+    for(;;) {
+        _time=measureRoundtrips(roundtrip, cnv, encName, intermediateCapacity, n);
+        if(_time<500 && _time<TARGET_MEASURE_TIME_MS/10) {
+            n*=10;
+        } else {
+            break;
+        }
+    }
+
+    if(_time<TARGET_MEASURE_TIME_MS) {
+        n=(n*TARGET_MEASURE_TIME_MS)/_time+1;
+    }
+
+    /* run actual measurement with a target test time of 10s */
+    _time=measureRoundtrips(roundtrip, cnv, encName, intermediateCapacity, n);
+
+    /* subtract same number of loops over no-operation function */
+    _time-=measureRoundtrips(noop, cnv, encName, intermediateCapacity, n);
+
+    rtTime=((double)_time*1000.)/(double)n;
+
+    /* report */
+    printf("* performance report for                %8s:\n", encName);
+    printf("  intermediate buffer capacity          %8d B\n", intermediateCapacity);
+    if(intermediateCapacity==INTERMEDIATE_CAPACITY && utf8Length!=0) {
+        printf("  number of encoding bytes              %8d B  (%3d%% of UTF-8)\n", encodedLength, PERCENT(encodedLength, utf8Length));
+        printf("  roundtrip conversion time             %8g &#956;s (%3d%% of UTF-8)\n", rtTime, PERCENT(rtTime, utf8Time));
+    } else {
+        printf("  number of encoding bytes              %8d B\n", encodedLength);
+        printf("  roundtrip conversion time             %8g &#956;s\n", rtTime);
+    }
+    printf("  average bytes/code point              %8g B/cp\n", (double)encodedLength/countInputCodePoints);
+    puts("");
+
+    /* set UTF-8 values */
+    if(intermediateCapacity==INTERMEDIATE_CAPACITY && 0==strcmp(encName, "UTF-8")) {
+        utf8Length=encodedLength;
+        utf8Time=rtTime;
+    }
+}
+
+static void
+perEnc(UConverter *cnv, const char *encName) {
+    /*printf("test performance for %s\n", encName);*/
+    perEncAndCapacity(cnv, encName, INTERMEDIATE_CAPACITY);
+    perEncAndCapacity(cnv, encName, INTERMEDIATE_SMALL_CAPACITY);
+}
+
+static void
+testPerformance() {
+    UConverter *cnv;
+    UErrorCode errorCode;
+    int32_t i;
+
+    printf("number of code points                   %8d cp\n", countInputCodePoints);
+    printf("platform endianness:                    %8s-endian\n", U_IS_BIG_ENDIAN ? "big" : "little");
+    puts("");
+    for(i=0; i<ARRAY_LENGTH(utfNames); ++i) {
+        errorCode=U_ZERO_ERROR;
+        cnv=ucnv_open(utfNames[i], &errorCode);
+        if(U_SUCCESS(errorCode)) {
+            perEnc(cnv, utfNames[i]);
+            ucnv_close(cnv);
+        } else {
+            fprintf(stderr, "error opening converter for \"%s\" - %s\n", utfNames[i], u_errorName(errorCode));
+        }
+    }
+}
+
+/* read a complete block from the input file */
+static int32_t
+readBlock(FILE *in) {
+    int length, blockLength;
+
+    blockLength=0;
+    while(blockLength<INTERMEDIATE_CAPACITY && !feof(in)) {
+        length=fread(intermediate, 1, INTERMEDIATE_CAPACITY-blockLength, in);
+        if(length<0 || ferror(in)) {
+            return -1;
+        }
+        blockLength+=length;
+    }
+
+    return (int32_t)blockLength;
+}
+
+static UBool
+readInput(FILE *in, const char *encName) {
+    UConverter *cnv;
+    UChar *pOut, *pOutLimit;
+    const char *p, *limit;
+    int32_t length;
+    UErrorCode errorCode;
+
+    pOut=input;
+    pOutLimit=input+INPUT_CAPACITY;
+
+    errorCode=U_ZERO_ERROR;
+
+    /* read the first block and open the converter */
+    length=readBlock(in);
+    if(length<0) {
+        return FALSE;
+    }
+
+    if(encName==NULL) {
+        int32_t signatureLength;
+        encName=ucnv_detectUnicodeSignature(intermediate, length,
+                                            &signatureLength,
+                                            &errorCode);
+        if(U_FAILURE(errorCode) || encName==NULL) {
+            /* default to UTF-8 */
+            printf("no Unicode signature - using UTF-8\n");
+            encName="UTF-8";
+            errorCode=U_ZERO_ERROR;
+        } else {
+            printf("detected signature for %s (removing %d bytes)\n", encName, signatureLength);
+            /* remove signature byte sequence */
+            memmove(intermediate, intermediate+signatureLength, length-=signatureLength);
+        }
+    }
+
+    cnv=ucnv_open(encName, &errorCode);
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr, "error: unable to ucnv_open(\"%s\") - %s\n", encName, u_errorName(errorCode));
+        return FALSE;
+    }
+
+    while(length>0) {
+        /* convert the block */
+        p=intermediate;
+        limit=p+length;
+
+        ucnv_toUnicode(cnv,
+                       &pOut, pOutLimit,
+                       &p, limit,
+                       NULL, FALSE,
+                       &errorCode);
+        if(U_FAILURE(errorCode)) {
+            fprintf(stderr, "error converting input to UTF-16: %s\n", u_errorName(errorCode));
+            ucnv_close(cnv);
+            return FALSE;
+        }
+
+        /* read the next block */
+        length=readBlock(in);
+        if(length<0) {
+            ucnv_close(cnv);
+            return FALSE;
+        }
+    }
+
+    /* flush the converter */
+    ucnv_toUnicode(cnv,
+                   &pOut, pOutLimit,
+                   &p, p,
+                   NULL, TRUE,
+                   &errorCode);
+    ucnv_close(cnv);
+
+    if(U_FAILURE(errorCode)) {
+        fprintf(stderr, "error converting input to UTF-16: %s\n", u_errorName(errorCode));
+        return FALSE;
+    }
+
+    inputLength=(int32_t)(pOut-input);
+    countInputCodePoints=u_countChar32(input, inputLength);
+    if(inputLength<=0) {
+        fprintf(stderr, "warning: input is empty\n");
+        return FALSE;
+    }
+
+    return TRUE;
+}
+
+static void
+showUsage(const char *myName) {
+    fprintf(stderr,
+            "Usage:\n"
+            "%s [-e encoding-name] filename | '-'\n"
+            "    encoding-name must be the name of an encoding supported by ICU\n"
+            "    the filename of the input file with text to be used\n"
+            "      can be a dash (-) for standard input\n",
+            myName);
+}
+
+/*
+ * Read file using some encoding, convert to 1M UTF-16 input buffer.
+ * For each UTF to be tested:
+ *   n times:
+ *     convert from UTF-16 input buffer to UTF, 4kB buffer
+ *     convert from 4kB buffer to 1M UTF-16 output buffer
+ *   adjust n so that time elapsed is 10s (#define)
+ *     ->divide 10s by time, increase n by that factor, run 2nd time
+ *   n times:
+ *     empty function
+ *   subtract out loop/function overhead
+ *   display #code points - #UTF bytes - time per roundtrip
+ *
+ *   * do the same again with an intermediate buffer size of 20 instead of 4kB
+ *
+ * Test following UTFs:
+ * UTF-16BE, UTF-16LE, UTF-8, SCSU, BOCU-1, CESU-8
+ *
+ * Command-line arguments:
+ * - encoding (default UTF-8, detect BOM)
+ * - filename (allow "-")
+ */
+extern int
+main(int argc, const char *argv[]) {
+    FILE *in;
+    const char *myName, *encName, *filename, *basename;
+
+    myName=argv[0];
+    if(argc<2) {
+        showUsage(myName);
+        return 1;
+    }
+
+    /* get encoding name argument */
+    if(argv[1][0]=='-' && argv[1][1]=='e') {
+        encName=argv[1]+2;
+        --argc;
+        ++argv;
+        if(*encName==0) {
+            if(argc<2) {
+                showUsage(myName);
+                return 1;
+            }
+            encName=argv[1];
+            --argc;
+            ++argv;
+        }
+    } else {
+        encName=NULL;
+    }
+
+    /* get filename argument */
+    if(argc<2) {
+        showUsage(myName);
+        return 1;
+    }
+    filename=argv[1];
+    if(filename[0]=='-' && filename[1]==0) {
+        filename="(standard input)";
+        in=stdin;
+        /* set stdin to binary mode */
+        _setmode(_fileno(stdin), _O_BINARY);
+    } else {
+        in=fopen(filename, "rb");
+        if(in==NULL) {
+            fprintf(stderr, "error opening \"%s\"\n", filename);
+            showUsage(myName);
+            return 2;
+        }
+    }
+
+    /* read input */
+    basename=strrchr(filename, U_FILE_SEP_CHAR);
+    if(basename!=NULL) {
+        ++basename;
+    } else {
+        basename=filename;
+    }
+    printf("# testing converter performance with file \"%s\"\n", basename);
+    if(!readInput(in, encName)) {
+        fprintf(stderr, "error reading \"%s\" (encoding %s)\n", filename, encName);
+        showUsage(myName);
+        return 2;
+    }
+    if(in!=stdin) {
+        fclose(in);
+    }
+
+    /* test performance */
+    testPerformance();
+    return 0;
+}
diff --git a/icuSources/test/utfperf/utfperf.dsp b/icuSources/test/utfperf/utfperf.dsp
new file mode 100644 (file)
index 0000000..8351222
--- /dev/null
@@ -0,0 +1,158 @@
+# Microsoft Developer Studio Project File - Name="utfperf" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** DO NOT EDIT **
+
+# TARGTYPE "Win32 (x86) Console Application" 0x0103
+
+CFG=utfperf - Win32 Debug
+!MESSAGE This is not a valid makefile. To build this project using NMAKE,
+!MESSAGE use the Export Makefile command and run
+!MESSAGE 
+!MESSAGE NMAKE /f "utfperf.mak".
+!MESSAGE 
+!MESSAGE You can specify a configuration when running NMAKE
+!MESSAGE by defining the macro CFG on the command line. For example:
+!MESSAGE 
+!MESSAGE NMAKE /f "utfperf.mak" CFG="utfperf - Win32 Debug"
+!MESSAGE 
+!MESSAGE Possible choices for configuration are:
+!MESSAGE 
+!MESSAGE "utfperf - Win32 Release" (based on "Win32 (x86) Console Application")
+!MESSAGE "utfperf - Win32 Debug" (based on "Win32 (x86) Console Application")
+!MESSAGE "utfperf - Win64 Release" (based on "Win32 (x86) Console Application")
+!MESSAGE "utfperf - Win64 Debug" (based on "Win32 (x86) Console Application")
+!MESSAGE 
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+RSC=rc.exe
+
+!IF  "$(CFG)" == "utfperf - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+MTL=midl.exe
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
+# ADD CPP /nologo /G6 /MD /W3 /GX /O2 /I "..\..\..\include" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
+# ADD BASE RSC /l 0x409 /d "NDEBUG"
+# ADD RSC /l 0x409 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
+# ADD LINK32 icuuc.lib kernel32.lib user32.lib gdi32.lib winmm.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 /libpath:"..\..\..\lib"
+
+!ELSEIF  "$(CFG)" == "utfperf - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+MTL=midl.exe
+# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
+# ADD CPP /nologo /G6 /MDd /W3 /Gm /GX /ZI /Od /I "..\..\..\include" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
+# ADD BASE RSC /l 0x409 /d "_DEBUG"
+# ADD RSC /l 0x409 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 icuucd.lib winmm.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"..\..\..\lib"
+
+!ELSEIF  "$(CFG)" == "utfperf - Win64 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+MTL=midl.exe
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /FD /c
+# ADD CPP /nologo /MD /W3 /GX /Zi /O2 /Op /I "..\..\..\include" /D "WIN64" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /Wp64 /Zm600 /c
+# ADD BASE RSC /l 0x409 /d "NDEBUG"
+# ADD RSC /l 0x409 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:IX86 /machine:IA64
+# ADD LINK32 icuuc.lib kernel32.lib user32.lib gdi32.lib winmm.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:IX86 /libpath:"..\..\..\lib" /machine:IA64
+
+!ELSEIF  "$(CFG)" == "utfperf - Win64 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+MTL=midl.exe
+# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /FD /GZ /c
+# ADD CPP /nologo /MDd /W3 /Gm /GX /Zi /Od /Op /I "..\..\..\include" /D "WIN64" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /D "_IA64_" /D "WIN32" /D "_AFX_NO_DAO_SUPPORT" /FD /GZ /Wp64 /Zm600 /c
+# ADD BASE RSC /l 0x409 /d "_DEBUG"
+# ADD RSC /l 0x409 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:IX86 /pdbtype:sept /machine:IA64
+# ADD LINK32 icuucd.lib winmm.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /incremental:no /debug /machine:IX86 /pdbtype:sept /libpath:"..\..\..\lib" /machine:IA64
+
+!ENDIF 
+
+# Begin Target
+
+# Name "utfperf - Win32 Release"
+# Name "utfperf - Win32 Debug"
+# Name "utfperf - Win64 Release"
+# Name "utfperf - Win64 Debug"
+# Begin Group "Source Files"
+
+# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+# Begin Source File
+
+SOURCE=.\utfperf.c
+# End Source File
+# End Group
+# Begin Group "Header Files"
+
+# PROP Default_Filter "h;hpp;hxx;hm;inl"
+# End Group
+# Begin Group "Resource Files"
+
+# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+# End Group
+# End Target
+# End Project
diff --git a/icuSources/tools/makeconv/misc/canonucm.c b/icuSources/tools/makeconv/misc/canonucm.c
new file mode 100644 (file)
index 0000000..a37f8dd
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2000, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  canonucm.c
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2000nov08
+*   created by: Markus W. Scherer
+*
+*   This tool reads a .ucm file and canonicalizes it: In the CHARMAP section,
+*   - sort by Unicode code points
+*   - print all code points in uppercase hexadecimal
+*   - print all Unicode code points with 4, 5, or 6 digits as needed
+*   - remove the comments
+*   - remove unnecessary spaces
+*
+*   To compile, just call a C compiler/linker with this source file.
+*   On Windows: cl canonucm.c
+*/
+
+#error File moved to charset/source/ucmtools/ on 2002-nov-06
+
+/* see http://oss.software.ibm.com/cvs/icu/charset/source/ucmtools/ */
diff --git a/icuSources/tools/makeconv/misc/rptp2ucm.c b/icuSources/tools/makeconv/misc/rptp2ucm.c
new file mode 100644 (file)
index 0000000..7ec8320
--- /dev/null
@@ -0,0 +1,31 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2000-2001, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  rptp2ucm.c
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2001feb16
+*   created by: Markus W. Scherer
+*
+*   This tool reads two CDRA conversion table files (RPMAP & TPMAP or RXMAP and TXMAP) and
+*   generates a canonicalized ICU .ucm file from them.
+*   If the RPMAP/RXMAP file does not contain a comment line with the substitution character,
+*   then this tool also attempts to read the header of the corresponding UPMAP/UXMAP file
+*   to extract subchar and subchar1.
+*
+*   R*MAP: Unicode->codepage
+*   T*MAP: codepage->Unicode
+*
+*   To compile, just call a C compiler/linker with this source file.
+*   On Windows: cl rptp2ucm.c
+*/
+
+#error File moved to charset/source/ucmtools/ on 2002-nov-06
+
+/* see http://oss.software.ibm.com/cvs/icu/charset/source/ucmtools/ */
diff --git a/icuSources/tools/makeconv/misc/ucmmerge.c b/icuSources/tools/makeconv/misc/ucmmerge.c
new file mode 100644 (file)
index 0000000..a8035bb
--- /dev/null
@@ -0,0 +1,26 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2000, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  ucmmerge.c
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2000nov09
+*   created by: Markus W. Scherer
+*
+*   This tool reads two .ucm files and merges them.
+*   Merging the files allows to update the ICU data while keeping ICU-specific
+*   changes like "MBCS"->"EBCDIC_STATEFUL" or adding <icu:state>.
+*
+*   To compile, just call a C compiler/linker with this source file.
+*   On Windows: cl ucmmerge.c
+*/
+
+#error File moved to charset/source/ucmtools/ on 2002-nov-06
+
+/* see http://oss.software.ibm.com/cvs/icu/charset/source/ucmtools/ */
diff --git a/icuSources/tools/makeconv/misc/ucmstrip.c b/icuSources/tools/makeconv/misc/ucmstrip.c
new file mode 100644 (file)
index 0000000..751af78
--- /dev/null
@@ -0,0 +1,28 @@
+/*
+*******************************************************************************
+*
+*   Copyright (C) 2000, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*
+*******************************************************************************
+*   file name:  ucmstrip.c
+*   encoding:   US-ASCII
+*   tab size:   8 (not used)
+*   indentation:4
+*
+*   created on: 2000nov09
+*   created by: Markus W. Scherer
+*
+*   This tool reads a .ucm file, expects there to be a line in the header with
+*   "File created on..." and removes the lines before and including that.
+*   Then it removes lines with <icu:state> and <uconv_class> and <code_set_name>.
+*   This helps comparing .ucm files with different copyright statements and
+*   different state specifications.
+*
+*   To compile, just call a C compiler/linker with this source file.
+*   On Windows: cl ucmstrip.c
+*/
+
+#error File moved to charset/source/ucmtools/ on 2002-nov-06
+
+/* see http://oss.software.ibm.com/cvs/icu/charset/source/ucmtools/ */
index a4d01949589206d5378fb6b5cb12955101478514..d85cd0fc7f14881bb9c82795362c1cfe0bedb491 100644 (file)
--- a/makefile
+++ b/makefile
@@ -139,19 +139,19 @@ LIBOVERRIDES=LIBICUDT="-L$(OBJROOT) -l$(LIB_NAME)" \
 
 ENV=   APPLE_INTERNAL_DIR="$(APPLE_INTERNAL_DIR)" \
        CFLAGS="-DICU_DATA_DIR=\"\\\"/usr/share/icu/\\\"\" $(RC_ARCHS:%=-arch %) -g -Os -fno-exceptions" \
-       CXXFLAGS="-DICU_DATA_DIR=\"\\\"/usr/share/icu/\\\"\" $(RC_ARCHS:%=-arch %) -g -Os -fno-exceptions -fno-rtti" \
+       CXXFLAGS="-DICU_DATA_DIR=\"\\\"/usr/share/icu/\\\"\" $(RC_ARCHS:%=-arch %) -g -Os -fno-exceptions -fno-rtti -fvisibility-inlines-hidden" \
        RC_ARCHS="$(RC_ARCHS)" \
        DYLD_LIBRARY_PATH="$(DSTROOT)/usr/local/lib"
        
 ENV_CONFIGURE= APPLE_INTERNAL_DIR="$(APPLE_INTERNAL_DIR)" \
        CFLAGS="-DICU_DATA_DIR=\"\\\"/usr/share/icu/\\\"\" -g -Os -fno-exceptions" \
-       CXXFLAGS="-DICU_DATA_DIR=\"\\\"/usr/share/icu/\\\"\" -g -Os -fno-exceptions -fno-rtti" \
+       CXXFLAGS="-DICU_DATA_DIR=\"\\\"/usr/share/icu/\\\"\" -g -Os -fno-exceptions -fno-rtti -fvisibility-inlines-hidden" \
        RC_ARCHS="$(RC_ARCHS)" \
        DYLD_LIBRARY_PATH="$(DSTROOT)/usr/local/lib"
 
 ENV_DEBUG = APPLE_INTERNAL_DIR="$(APPLE_INTERNAL_DIR)" \
        CFLAGS="-DICU_DATA_DIR=\"\\\"/usr/share/icu/\\\"\" $(RC_ARCHS:%=-arch %) -O0 -g -fno-exceptions" \
-       CXXFLAGS="-DICU_DATA_DIR=\"\\\"/usr/share/icu/\\\"\" $(RC_ARCHS:%=-arch %) -O0 -g -fno-exceptions -fno-rtti" \
+       CXXFLAGS="-DICU_DATA_DIR=\"\\\"/usr/share/icu/\\\"\" $(RC_ARCHS:%=-arch %) -O0 -g -fno-exceptions -fno-rtti -fvisibility-inlines-hidden" \
        RC_ARCHS="$(RC_ARCHS)" \
        DYLD_LIBRARY_PATH="$(DSTROOT)/usr/local/lib"
 
@@ -178,8 +178,10 @@ endif
 icu debug : $(OBJROOT)/Makefile
        (cd $(OBJROOT); \
                $(MAKE) $($(ENV_$@)); \
+               tmpfile=`mktemp -t weakexternal` || exit 1; \
+               nm -m $(COMMON_OBJ) $(I18N_OBJ) $(STUB_DATA_OBJ) | fgrep "weak external" | fgrep -v "undefined" | sed -e 's/.*weak external //' | uniq | cat >$$tmpfile; \
                $($(ENV_$@)) $(CXX) -current_version $(ICU_VERS).$(ICU_SUBVERS) -compatibility_version 1 -dynamiclib -dynamic \
-                       $(RC_ARCHS:%=-arch %) $(CXXFLAGS) $(LDFLAGS) -single_module $(SECTORDER_FLAGS) \
+                       $(RC_ARCHS:%=-arch %) $(CXXFLAGS) $(LDFLAGS) -single_module $(SECTORDER_FLAGS) -unexported_symbols_list $$tmpfile \
                        -install_name $(libdir)$(INSTALLED_DYLIB) -o ./$(INSTALLED_DYLIB) $(COMMON_OBJ) $(I18N_OBJ) $(STUB_DATA_OBJ); \
                if test -f ./$(ICU_DATA_DIR)/$(B_DATA_FILE); then \
                        ln -fs ./$(ICU_DATA_DIR)/$(B_DATA_FILE); \