ICU-531.48.tar.gz

[apple/icu.git] / icuSources / tools / gensprep / gensprep.c
diff --git a/icuSources/tools/gensprep/gensprep.c b/icuSources/tools/gensprep/gensprep.c

index e8f9608ffc88fc7b7b8f8cba0d46063f2624aedb..94ba08d89b9394637584e360bdb0260d76c00e1e 100644 (file)
--- a/icuSources/tools/gensprep/gensprep.c
+++ b/icuSources/tools/gensprep/gensprep.c
@@ -1,7 +1,7 @@
  /*
  *******************************************************************************
  *
-*   Copyright (C) 2003-2004, International Business Machines
+*   Copyright (C) 2003-2012, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
@@ -31,6 +31,7 @@
  #include "uparse.h"
  #include "sprpimpl.h"
  
+#include "unicode/uclean.h"
  #include "unicode/udata.h"
  #include "unicode/utypes.h"
  #include "unicode/putil.h"
@@ -40,14 +41,15 @@ U_CDECL_BEGIN
  #include "gensprep.h"
  U_CDECL_END
  
-#ifdef WIN32
-#   pragma warning(disable: 4100)
-#endif
-
  UBool beVerbose=FALSE, haveCopyright=TRUE;
  
  #define NORM_CORRECTIONS_FILE_NAME "NormalizationCorrections.txt"
  
+#define NORMALIZE_DIRECTIVE "normalize"
+#define NORMALIZE_DIRECTIVE_LEN 9
+#define CHECK_BIDI_DIRECTIVE "check-bidi"
+#define CHECK_BIDI_DIRECTIVE_LEN 10
+
  /* prototypes --------------------------------------------------------------- */
  
  static void
@@ -67,9 +69,9 @@ static UOption options[]={
      UOPTION_DESTDIR,
      UOPTION_SOURCEDIR,
      UOPTION_ICUDATADIR,
-    UOPTION_PACKAGE_NAME,
      UOPTION_BUNDLE_NAME,
      { "normalization", NULL, NULL, NULL, 'n', UOPT_REQUIRES_ARG, 0 },
+    { "norm-correction", NULL, NULL, NULL, 'm', UOPT_REQUIRES_ARG, 0 },
      { "check-bidi", NULL, NULL, NULL,  'k', UOPT_NO_ARG, 0},
      { "unicode", NULL, NULL, NULL, 'u', UOPT_REQUIRES_ARG, 0 },
  };
@@ -82,9 +84,9 @@ enum{
      DESTDIR,
      SOURCEDIR,
      ICUDATADIR,
-    PACKAGE_NAME,
      BUNDLE_NAME,
      NORMALIZE,
+    NORM_CORRECTION_DIR,
      CHECK_BIDI,
      UNICODE_VERSION
  };
@@ -114,10 +116,15 @@ static int printHelp(int argc, char* argv[]){
          "\t                         followed by path, defaults to %s\n",
          u_getDataDirectory());
      fprintf(stderr,
-        "\t-p or --package-name     prepend the output data file name with the package name specified\n"
          "\t-n or --normalize        turn on the option for normalization and include mappings\n"
          "\t                         from NormalizationCorrections.txt from the given path,\n"
-        "\t                         e.g: /test/icu/source/data/unidata\n"
+        "\t                         e.g: /test/icu/source/data/unidata\n");
+    fprintf(stderr,
+        "\t-m or --norm-correction  use NormalizationCorrections.txt from the given path\n"
+        "\t                         when the input file contains a normalization directive.\n"
+        "\t                         unlike -n/--normalize, this option does not force the\n"
+        "\t                         normalization.\n");
+    fprintf(stderr,
          "\t-k or --check-bidi       turn on the option for checking for BiDi in the profile\n"
          "\t-u or --unicode          version of Unicode to be used with this profile followed by the version\n"
          );
@@ -131,7 +138,7 @@ main(int argc, char* argv[]) {
      char* filename = NULL;
  #endif
      const char *srcDir=NULL, *destDir=NULL, *icuUniDataDir=NULL;
-    const char *packageName=NULL, *bundleName=NULL, *inputFileName = NULL;
+    const char *bundleName=NULL, *inputFileName = NULL;
      char *basename=NULL;
      int32_t sprepOptions = 0;
  
@@ -144,7 +151,6 @@ main(int argc, char* argv[]) {
      options[SOURCEDIR].value="";
      options[UNICODE_VERSION].value="0"; /* don't assume the unicode version */
      options[BUNDLE_NAME].value = DATA_NAME;
-    options[PACKAGE_NAME].value = NULL;
      options[NORMALIZE].value = "";
  
      argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
@@ -165,9 +171,12 @@ main(int argc, char* argv[]) {
      haveCopyright=options[COPYRIGHT].doesOccur;
      srcDir=options[SOURCEDIR].value;
      destDir=options[DESTDIR].value;
-    packageName = options[PACKAGE_NAME].value;
      bundleName = options[BUNDLE_NAME].value;
-    icuUniDataDir = options[NORMALIZE].value;
+    if(options[NORMALIZE].doesOccur) {
+        icuUniDataDir = options[NORMALIZE].value;
+    } else {
+        icuUniDataDir = options[NORM_CORRECTION_DIR].value;
+    }
  
      if(argc<2) {
          /* print the help message */
@@ -187,7 +196,7 @@ main(int argc, char* argv[]) {
          "gensprep writes dummy " U_ICUDATA_NAME "_" DATA_NAME "." DATA_TYPE
          " because UCONFIG_NO_IDNA is set, \n"
          "see icu/source/common/unicode/uconfig.h\n");
-    generateData(destDir, packageName, bundleName);
+    generateData(destDir, bundleName);
  
  #else
  
@@ -219,7 +228,7 @@ main(int argc, char* argv[]) {
          return errorCode;
      }
      
-    if(options[NORMALIZE].doesOccur){
+    if(options[NORMALIZE].doesOccur){ /* this option might be set by @normalize;; in the source file */
          /* set up directory for NormalizationCorrections.txt */
          uprv_strcpy(filename,icuUniDataDir);
          basename=filename+uprv_strlen(filename);
@@ -238,7 +247,7 @@ main(int argc, char* argv[]) {
          sprepOptions |= _SPREP_NORMALIZATION_ON;
      }
      
-    if(options[CHECK_BIDI].doesOccur){
+    if(options[CHECK_BIDI].doesOccur){ /* this option might be set by @check-bidi;; in the source file */
          sprepOptions |= _SPREP_CHECK_BIDI_ON;
      }
  
@@ -247,13 +256,15 @@ main(int argc, char* argv[]) {
      /* process parsed data */
      if(U_SUCCESS(errorCode)) {
          /* write the data file */
-       generateData(destDir, packageName, bundleName);
+        generateData(destDir, bundleName);
  
-       cleanUpData();
+        cleanUpData();
      }
  
      uprv_free(filename);
  
+    u_cleanup();
+
  #endif
  
      return errorCode;
@@ -336,13 +347,34 @@ strprepProfileLineFn(void *context,
      const char* typeName;
      uint32_t rangeStart=0,rangeEnd =0;
      const char* filename = (const char*) context;
- 
+    const char *s;
+
+    s = u_skipWhitespace(fields[0][0]);
+    if (*s == '@') {
+        /* special directive */
+        s++;
+        length = fields[0][1] - s;
+        if (length >= NORMALIZE_DIRECTIVE_LEN
+            && uprv_strncmp(s, NORMALIZE_DIRECTIVE, NORMALIZE_DIRECTIVE_LEN) == 0) {
+            options[NORMALIZE].doesOccur = TRUE;
+            return;
+        }
+        else if (length >= CHECK_BIDI_DIRECTIVE_LEN
+            && uprv_strncmp(s, CHECK_BIDI_DIRECTIVE, CHECK_BIDI_DIRECTIVE_LEN) == 0) {
+            options[CHECK_BIDI].doesOccur = TRUE;
+            return;
+        }
+        else {
+            fprintf(stderr, "gensprep error parsing a directive %s.", fields[0][0]);
+        }
+    }
+
      typeName = fields[2][0];
      map = fields[1][0];
  
      if(uprv_strstr(typeName, usprepTypeNames[USPREP_UNASSIGNED])!=NULL){
  
-        u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
+        u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
          if(U_FAILURE(*pErrorCode)){
              fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode));
              return;
@@ -353,7 +385,7 @@ strprepProfileLineFn(void *context,
  
      }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_PROHIBITED])!=NULL){
  
-        u_parseCodePointRange(fields[0][0], &rangeStart,&rangeEnd, pErrorCode);
+        u_parseCodePointRange(s, &rangeStart,&rangeEnd, pErrorCode);
          if(U_FAILURE(*pErrorCode)){
              fprintf(stderr, "Could not parse code point range. Error: %s\n",u_errorName(*pErrorCode));
              return;
@@ -365,8 +397,8 @@ strprepProfileLineFn(void *context,
      }else if(uprv_strstr(typeName, usprepTypeNames[USPREP_MAP])!=NULL){
  
          /* get the character code, field 0 */
-        code=(uint32_t)uprv_strtoul(fields[0][0], &end, 16);
-        if(end<=fields[0][0] || end!=fields[0][1]) {
+        code=(uint32_t)uprv_strtoul(s, &end, 16);
+        if(end<=s || end!=fields[0][1]) {
              fprintf(stderr, "gensprep: syntax error in field 0 at %s\n", fields[0][0]);
              *pErrorCode=U_PARSE_ERROR;
              exit(U_PARSE_ERROR);