]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/tools/genrb/genrb.c
ICU-511.25.tar.gz
[apple/icu.git] / icuSources / tools / genrb / genrb.c
index f6929bb7e1a6d088e30e8d43d6079b371edc1375..7c5248092b23b4f2cb998117ef13889163485898 100644 (file)
@@ -1,7 +1,7 @@
 /*
 *******************************************************************************
 *
-*   Copyright (C) 1998-2004, International Business Machines
+*   Copyright (C) 1998-2012, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
 #include "genrb.h"
 #include "unicode/uclean.h"
 
+#include "ucmndata.h"  /* TODO: for reading the pool bundle */
+
 /* Protos */
-static void  processFile(const char *filename, const char* cp, const char *inputDir, const char *outputDir, const char *packageName, UErrorCode *status);
+void  processFile(const char *filename, const char* cp, const char *inputDir, const char *outputDir,
+    const char *packageName, UBool omitBinaryCollation, UErrorCode *status);
 static char *make_res_filename(const char *filename, const char *outputDir,
                                const char *packageName, UErrorCode *status);
 
@@ -47,14 +50,17 @@ enum
     ICUDATADIR,
     WRITE_JAVA,
     COPYRIGHT,
-    PACKAGE_NAME,
+    JAVA_PACKAGE,
     BUNDLE_NAME,
     WRITE_XLIFF,
-    TOUCHFILE,
     STRICT,
     NO_BINARY_COLLATION,
-    /*added by Jing*/
-    LANGUAGE
+    LANGUAGE,
+    NO_COLLATION_RULES,
+    FORMAT_VERSION,
+    WRITE_POOL_BUNDLE,
+    USE_POOL_BUNDLE,
+    INCLUDE_UNIHAN_COLL
 };
 
 UOption options[]={
@@ -69,22 +75,37 @@ UOption options[]={
                       UOPTION_ICUDATADIR,
                       UOPTION_WRITE_JAVA,
                       UOPTION_COPYRIGHT,
-                      UOPTION_PACKAGE_NAME,
+                      UOPTION_DEF("java-package", '\x01', UOPT_REQUIRES_ARG),
                       UOPTION_BUNDLE_NAME,
-                      UOPTION_DEF( "write-xliff", 'x', UOPT_OPTIONAL_ARG),
-                      UOPTION_DEF( "touchfile", 't', UOPT_NO_ARG),
-                      UOPTION_DEF( "strict",    'k', UOPT_NO_ARG), /* 14 */
-                      UOPTION_DEF( "noBinaryCollation", 'C', UOPT_NO_ARG),/* 15 */
-                      /*added by Jing*/
-                      UOPTION_DEF( "language",  'l', UOPT_REQUIRES_ARG)
+                      UOPTION_DEF("write-xliff", 'x', UOPT_OPTIONAL_ARG),
+                      UOPTION_DEF("strict",    'k', UOPT_NO_ARG), /* 14 */
+                      UOPTION_DEF("noBinaryCollation", 'C', UOPT_NO_ARG),/* 15 */
+                      UOPTION_DEF("language",  'l', UOPT_REQUIRES_ARG), /* 16 */
+                      UOPTION_DEF("omitCollationRules", 'R', UOPT_NO_ARG),/* 17 */
+                      UOPTION_DEF("formatVersion", '\x01', UOPT_REQUIRES_ARG),/* 18 */
+                      UOPTION_DEF("writePoolBundle", '\x01', UOPT_NO_ARG),/* 19 */
+                      UOPTION_DEF("usePoolBundle", '\x01', UOPT_OPTIONAL_ARG),/* 20 */
+                      UOPTION_DEF("includeUnihanColl", '\x01', UOPT_NO_ARG),/* 21 */ /* temporary, don't display in usage info */
                   };
 
 static     UBool       write_java = FALSE;
 static     UBool       write_xliff = FALSE;
-static     UBool       touchfile = FALSE;
 static     const char* outputEnc ="";
-static     const char* gPackageName=NULL;
-static     const char* bundleName=NULL;
+static     struct SRBRoot *newPoolBundle = NULL;
+           UBool       gIncludeUnihanColl = FALSE;
+
+/* TODO: separate header file for ResFile? */
+typedef struct ResFile {
+  uint8_t *fBytes;
+  const int32_t *fIndexes;
+  const char *fKeys;
+  int32_t fKeysLength;
+  int32_t fKeysCount;
+  int32_t fChecksum;
+} ResFile;
+
+static ResFile poolBundle = { NULL };
+
 /*added by Jing*/
 static     const char* language = NULL;
 static     const char* xliffOutputFileName = NULL;
@@ -98,9 +119,12 @@ main(int argc,
     const char *inputDir  = NULL;
     const char *encoding  = "";
     int         i;
+    UBool illegalArg = FALSE;
 
     U_MAIN_INIT_ARGS(argc, argv);
 
+    options[JAVA_PACKAGE].value = "com.ibm.icu.impl.data";
+    options[BUNDLE_NAME].value = "LocaleElements";
     argc = u_parseArgs(argc, argv, (int32_t)(sizeof(options)/sizeof(options[0])), options);
 
     /* error handling, printing usage message */
@@ -109,6 +133,24 @@ main(int argc,
     } else if(argc<2) {
         argc = -1;
     }
+    if(options[WRITE_POOL_BUNDLE].doesOccur && options[USE_POOL_BUNDLE].doesOccur) {
+        fprintf(stderr, "%s: cannot combine --writePoolBundle and --usePoolBundle\n", argv[0]);
+        argc = -1;
+    }
+    if(options[FORMAT_VERSION].doesOccur) {
+        const char *s = options[FORMAT_VERSION].value;
+        if(uprv_strlen(s) != 1 || (s[0] != '1' && s[0] != '2')) {
+            fprintf(stderr, "%s: unsupported --formatVersion %s\n", argv[0], s);
+            argc = -1;
+        } else if(s[0] == '1' &&
+                  (options[WRITE_POOL_BUNDLE].doesOccur || options[USE_POOL_BUNDLE].doesOccur)
+        ) {
+            fprintf(stderr, "%s: cannot combine --formatVersion 1 with --writePoolBundle or --usePoolBundle\n", argv[0]);
+            argc = -1;
+        } else {
+            setFormatVersion(s[0] - '0');
+        }
+    }
 
     if(options[VERSION].doesOccur) {
         fprintf(stderr,
@@ -118,15 +160,26 @@ main(int argc,
         return U_ZERO_ERROR;
     }
 
-    if(argc<0 || options[HELP1].doesOccur || options[HELP2].doesOccur) {
+    if(argc<0) {
+        illegalArg = TRUE;
+    } else if((options[JAVA_PACKAGE].doesOccur || options[BUNDLE_NAME].doesOccur) &&
+              !options[WRITE_JAVA].doesOccur) {
+        fprintf(stderr,
+                "%s error: command line argument --java-package or --bundle-name "
+                "without --write-java\n",
+                argv[0]);
+        illegalArg = TRUE;
+    }
+
+    if(illegalArg || options[HELP1].doesOccur || options[HELP2].doesOccur) {
         /*
-         * Broken into chucks because the C89 standard says the minimum
+         * Broken into chunks because the C89 standard says the minimum
          * required supported string length is 509 bytes.
          */
         fprintf(stderr,
                 "Usage: %s [OPTIONS] [FILES]\n"
                 "\tReads the list of resource bundle source files and creates\n"
-                "\tbinary version of reosurce bundles (.res files)\n",
+                "\tbinary version of resource bundles (.res files)\n",
                 argv[0]);
         fprintf(stderr,
                 "Options:\n"
@@ -145,19 +198,33 @@ main(int argc,
         fprintf(stderr,
                 "\t-j or --write-java       write a Java ListResourceBundle for ICU4J, followed by optional encoding\n"
                 "\t                         defaults to ASCII and \\uXXXX format.\n"
-                "\t-p or --package-name     For ICU4J: package name for writing the ListResourceBundle for ICU4J,\n"
-                "\t                         defaults to com.ibm.icu.impl.data\n"
-                "\t                         For ICU4C: Package name for the .res files on output. Specfiying\n"
-                "\t                         'ICUDATA' defaults to the current ICU4C data name.\n");
+                "\t      --java-package     For --write-java: package name for writing the ListResourceBundle,\n"
+                "\t                         defaults to com.ibm.icu.impl.data\n");
         fprintf(stderr,
-                "\t-b or --bundle-name      bundle name for writing the ListResourceBundle for ICU4J,\n"
+                "\t-b or --bundle-name      For --write-java: root resource bundle name for writing the ListResourceBundle,\n"
                 "\t                         defaults to LocaleElements\n"
-                "\t-x or --write-xliff      write a XLIFF file for the resource bundle. Followed by an optional output file name.\n"
+                "\t-x or --write-xliff      write an XLIFF file for the resource bundle. Followed by\n"
+                "\t                         an optional output file name.\n"
                 "\t-k or --strict           use pedantic parsing of syntax\n"
                 /*added by Jing*/
-                "\t-l or --language         For XLIFF: language code compliant with ISO 639.\n");
+                "\t-l or --language         for XLIFF: language code compliant with BCP 47.\n");
+        fprintf(stderr,
+                "\t-C or --noBinaryCollation  do not generate binary collation image;\n"
+                "\t                           makes .res file smaller but collator instantiation much slower;\n"
+                "\t                           maintains ability to get tailoring rules\n"
+                "\t-R or --omitCollationRules do not include collation (tailoring) rules;\n"
+                "\t                           makes .res file smaller and maintains collator instantiation speed\n"
+                "\t                           but tailoring rules will not be available (they are rarely used)\n");
+        fprintf(stderr,
+                "\t      --formatVersion      write a .res file compatible with the requested formatVersion (single digit);\n"
+                "\t                           for example, --formatVersion 1\n");
+        fprintf(stderr,
+                "\t      --writePoolBundle    write a pool.res file with all of the keys of all input bundles\n"
+                "\t      --usePoolBundle [path-to-pool.res]  point to keys from the pool.res keys pool bundle if they are available there;\n"
+                "\t                           makes .res files smaller but dependent on the pool bundle\n"
+                "\t                           (--writePoolBundle and --usePoolBundle cannot be combined)\n");
 
-        return argc < 0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
+        return illegalArg ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
     }
 
     if(options[VERBOSE].doesOccur) {
@@ -181,26 +248,6 @@ main(int argc,
     if(options[DESTDIR].doesOccur) {
         outputDir = options[DESTDIR].value;
     }
-    if(options[PACKAGE_NAME].doesOccur) {
-        gPackageName = options[PACKAGE_NAME].value;
-        if(!strcmp(gPackageName, "ICUDATA"))
-        {
-            gPackageName = U_ICUDATA_NAME;
-        }
-        if(gPackageName[0] == 0)
-        {
-            gPackageName = NULL;
-        }
-    }
-
-    if(options[TOUCHFILE].doesOccur) {
-        if(gPackageName == NULL) {
-            fprintf(stderr, "%s: Don't use touchfile (-t) option with no package.\n",
-                    argv[0]);
-            return -1;
-        }
-        touchfile = TRUE;
-    }
 
     if(options[ENCODING].doesOccur) {
         encoding = options[ENCODING].value;
@@ -226,10 +273,6 @@ main(int argc,
         outputEnc = options[WRITE_JAVA].value;
     }
 
-    if(options[BUNDLE_NAME].doesOccur) {
-        bundleName = options[BUNDLE_NAME].value;
-    }
-
     if(options[WRITE_XLIFF].doesOccur) {
         write_xliff = TRUE;
         if(options[WRITE_XLIFF].value != NULL){
@@ -237,17 +280,129 @@ main(int argc,
         }
     }
 
-    if(options[NO_BINARY_COLLATION].doesOccur) {
-      initParser(FALSE);
-    } else {
-      initParser(TRUE);
-    }
+    initParser(options[NO_COLLATION_RULES].doesOccur);
 
     /*added by Jing*/
     if(options[LANGUAGE].doesOccur) {
         language = options[LANGUAGE].value;
     }
 
+    if(options[WRITE_POOL_BUNDLE].doesOccur) {
+        newPoolBundle = bundle_open(NULL, TRUE, &status);
+        if(U_FAILURE(status)) {
+            fprintf(stderr, "unable to create an empty bundle for the pool keys: %s\n", u_errorName(status));
+            return status;
+        } else {
+            const char *poolResName = "pool.res";
+            char *nameWithoutSuffix = uprv_malloc(uprv_strlen(poolResName) + 1);
+            if (nameWithoutSuffix == NULL) {
+                fprintf(stderr, "out of memory error\n");
+                return U_MEMORY_ALLOCATION_ERROR;
+            }
+            uprv_strcpy(nameWithoutSuffix, poolResName);
+            *uprv_strrchr(nameWithoutSuffix, '.') = 0;
+            newPoolBundle->fLocale = nameWithoutSuffix;
+        }
+    }
+
+    if(options[USE_POOL_BUNDLE].doesOccur) {
+        const char *poolResName = "pool.res";
+        FileStream *poolFile;
+        int32_t poolFileSize;
+        int32_t indexLength;
+        /*
+         * TODO: Consolidate inputDir/filename handling from main() and processFile()
+         * into a common function, and use it here as well.
+         * Try to create toolutil functions for dealing with dir/filenames and
+         * loading ICU data files without udata_open().
+         * Share code with icupkg?
+         * Also, make_res_filename() seems to be unused. Review and remove.
+         */
+        if (options[USE_POOL_BUNDLE].value!=NULL) {
+            uprv_strcpy(theCurrentFileName, options[USE_POOL_BUNDLE].value);
+            uprv_strcat(theCurrentFileName, U_FILE_SEP_STRING);
+        } else if (inputDir) {
+            uprv_strcpy(theCurrentFileName, inputDir);
+            uprv_strcat(theCurrentFileName, U_FILE_SEP_STRING);
+        } else {
+            *theCurrentFileName = 0;
+        }
+        uprv_strcat(theCurrentFileName, poolResName);
+        poolFile = T_FileStream_open(theCurrentFileName, "rb");
+        if (poolFile == NULL) {
+            fprintf(stderr, "unable to open pool bundle file %s\n", theCurrentFileName);
+            return 1;
+        }
+        poolFileSize = T_FileStream_size(poolFile);
+        if (poolFileSize < 32) {
+            fprintf(stderr, "the pool bundle file %s is too small\n", theCurrentFileName);
+            return 1;
+        }
+        poolBundle.fBytes = (uint8_t *)uprv_malloc((poolFileSize + 15) & ~15);
+        if (poolFileSize > 0 && poolBundle.fBytes == NULL) {
+            fprintf(stderr, "unable to allocate memory for the pool bundle file %s\n", theCurrentFileName);
+            return U_MEMORY_ALLOCATION_ERROR;
+        } else {
+            UDataSwapper *ds;
+            const DataHeader *header;
+            int32_t bytesRead = T_FileStream_read(poolFile, poolBundle.fBytes, poolFileSize);
+            int32_t keysBottom;
+            if (bytesRead != poolFileSize) {
+                fprintf(stderr, "unable to read the pool bundle file %s\n", theCurrentFileName);
+                return 1;
+            }
+            /*
+             * Swap the pool bundle so that a single checked-in file can be used.
+             * The swapper functions also test that the data looks like
+             * a well-formed .res file.
+             */
+            ds = udata_openSwapperForInputData(poolBundle.fBytes, bytesRead,
+                                               U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &status);
+            if (U_FAILURE(status)) {
+                fprintf(stderr, "udata_openSwapperForInputData(pool bundle %s) failed: %s\n",
+                        theCurrentFileName, u_errorName(status));
+                return status;
+            }
+            ures_swap(ds, poolBundle.fBytes, bytesRead, poolBundle.fBytes, &status);
+            udata_closeSwapper(ds);
+            if (U_FAILURE(status)) {
+                fprintf(stderr, "ures_swap(pool bundle %s) failed: %s\n",
+                        theCurrentFileName, u_errorName(status));
+                return status;
+            }
+            header = (const DataHeader *)poolBundle.fBytes;
+            if (header->info.formatVersion[0]!=2) {
+                fprintf(stderr, "invalid format of pool bundle file %s\n", theCurrentFileName);
+                return U_INVALID_FORMAT_ERROR;
+            }
+            poolBundle.fKeys = (const char *)header + header->dataHeader.headerSize;
+            poolBundle.fIndexes = (const int32_t *)poolBundle.fKeys + 1;
+            indexLength = poolBundle.fIndexes[URES_INDEX_LENGTH] & 0xff;
+            if (indexLength <= URES_INDEX_POOL_CHECKSUM) {
+                fprintf(stderr, "insufficient indexes[] in pool bundle file %s\n", theCurrentFileName);
+                return U_INVALID_FORMAT_ERROR;
+            }
+            keysBottom = (1 + indexLength) * 4;
+            poolBundle.fKeys += keysBottom;
+            poolBundle.fKeysLength = (poolBundle.fIndexes[URES_INDEX_KEYS_TOP] * 4) - keysBottom;
+            poolBundle.fChecksum = poolBundle.fIndexes[URES_INDEX_POOL_CHECKSUM];
+        }
+        for (i = 0; i < poolBundle.fKeysLength; ++i) {
+            if (poolBundle.fKeys[i] == 0) {
+                ++poolBundle.fKeysCount;
+            }
+        }
+        T_FileStream_close(poolFile);
+        setUsePoolBundle(TRUE);
+    }
+
+    if(options[INCLUDE_UNIHAN_COLL].doesOccur) {
+        gIncludeUnihanColl = TRUE;
+    }
+
+    if((argc-1)!=1) {
+        printf("genrb number of files: %d\n", argc - 1);
+    }
     /* generate the binary files */
     for(i = 1; i < argc; ++i) {
         status = U_ZERO_ERROR;
@@ -264,15 +419,37 @@ main(int argc,
         if (isVerbose()) {
             printf("Processing file \"%s\"\n", theCurrentFileName);
         }
-        processFile(arg, encoding, inputDir, outputDir, gPackageName, &status);
+        processFile(arg, encoding, inputDir, outputDir, NULL,
+                    options[NO_BINARY_COLLATION].doesOccur,
+                    &status);
+    }
+
+    uprv_free(poolBundle.fBytes);
+
+    if(options[WRITE_POOL_BUNDLE].doesOccur) {
+        char outputFileName[256];
+        bundle_write(newPoolBundle, outputDir, NULL, outputFileName, sizeof(outputFileName), &status);
+        bundle_close(newPoolBundle, &status);
+        if(U_FAILURE(status)) {
+            fprintf(stderr, "unable to write the pool bundle: %s\n", u_errorName(status));
+        }
+    }
+
+    u_cleanup();
+
+    /* Dont return warnings as a failure */
+    if (U_SUCCESS(status)) {
+        return 0;
     }
 
     return status;
 }
 
 /* Process a file */
-static void
-processFile(const char *filename, const char *cp, const char *inputDir, const char *outputDir, const char *packageName, UErrorCode *status) {
+void
+processFile(
+    const char *filename, const char *cp, const char *inputDir, const char *outputDir, const char *packageName,
+    UBool omitBinaryCollation, UErrorCode *status) {
     /*FileStream     *in           = NULL;*/
     struct SRBRoot *data         = NULL;
     UCHARBUF       *ucbuf        = NULL;
@@ -285,6 +462,7 @@ processFile(const char *filename, const char *cp, const char *inputDir, const ch
     int32_t dirlen  = 0;
     int32_t filelen = 0;
 
+
     if (status==NULL || U_FAILURE(*status)) {
         return;
     }
@@ -294,6 +472,7 @@ processFile(const char *filename, const char *cp, const char *inputDir, const ch
     }else{
         filelen = (int32_t)uprv_strlen(filename);
     }
+
     if(inputDir == NULL) {
         const char *filenameBegin = uprv_strrchr(filename, U_FILE_SEP_CHAR);
         openFileName = (char *) uprv_malloc(dirlen + filelen + 2);
@@ -364,7 +543,6 @@ processFile(const char *filename, const char *cp, const char *inputDir, const ch
     uprv_strcat(openFileName, filename);
 
     ucbuf = ucbuf_open(openFileName, &cp,getShowWarning(),TRUE, status);
-
     if(*status == U_FILE_ACCESS_ERROR) {
 
         fprintf(stderr, "couldn't open file %s\n", openFileName == NULL ? filename : openFileName);
@@ -379,52 +557,47 @@ processFile(const char *filename, const char *cp, const char *inputDir, const ch
         printf("autodetected encoding %s\n", cp);
     }
     /* Parse the data into an SRBRoot */
-    data = parse(ucbuf, inputDir, status);
+    data = parse(ucbuf, inputDir, outputDir, !omitBinaryCollation, status);
 
     if (data == NULL || U_FAILURE(*status)) {
         fprintf(stderr, "couldn't parse the file %s. Error:%s\n", filename,u_errorName(*status));
         goto finish;
     }
-
-    /* Determine the target rb filename */
-    rbname = make_res_filename(filename, outputDir, packageName, status);
-    if(touchfile == TRUE) {
-        FileStream *q;
-        char msg[1024];
-        char *tfname = NULL;
-
-        tfname = make_res_filename(filename, outputDir, NULL, status);
-
-        if(U_FAILURE(*status))
-        {
-            fprintf(stderr, "Error writing touchfile for \"%s\"\n", filename);
-            *status = U_FILE_ACCESS_ERROR;
-        } else {
-            uprv_strcat(tfname, ".res");
-            sprintf(msg, "This empty file tells nmake that %s in package %s has been updated.\n",
-                filename, packageName);
-
-            q = T_FileStream_open(tfname, "w");
-            if(q == NULL)
-            {
-                fprintf(stderr, "Error writing touchfile \"%s\"\n", tfname);
-                *status = U_FILE_ACCESS_ERROR;
-            }
-            else
-            {
-                T_FileStream_write(q, msg, (int32_t)uprv_strlen(msg));
-                T_FileStream_close(q);
+    if(options[WRITE_POOL_BUNDLE].doesOccur) {
+        int32_t newKeysLength;
+        const char *newKeys, *newKeysLimit;
+        bundle_compactKeys(data, status);
+        newKeys = bundle_getKeyBytes(data, &newKeysLength);
+        bundle_addKeyBytes(newPoolBundle, newKeys, newKeysLength, status);
+        if(U_FAILURE(*status)) {
+            fprintf(stderr, "bundle_compactKeys(%s) or bundle_getKeyBytes() failed: %s\n",
+                    filename, u_errorName(*status));
+            goto finish;
+        }
+        /* count the number of just-added key strings */
+        for(newKeysLimit = newKeys + newKeysLength; newKeys < newKeysLimit; ++newKeys) {
+            if(*newKeys == 0) {
+                ++newPoolBundle->fKeysCount;
             }
-            uprv_free(tfname);
         }
+    }
 
+    if(options[USE_POOL_BUNDLE].doesOccur) {
+        data->fPoolBundleKeys = poolBundle.fKeys;
+        data->fPoolBundleKeysLength = poolBundle.fKeysLength;
+        data->fPoolBundleKeysCount = poolBundle.fKeysCount;
+        data->fPoolChecksum = poolBundle.fChecksum;
     }
+
+    /* Determine the target rb filename */
+    rbname = make_res_filename(filename, outputDir, packageName, status);
     if(U_FAILURE(*status)) {
         fprintf(stderr, "couldn't make the res fileName for  bundle %s. Error:%s\n", filename,u_errorName(*status));
         goto finish;
     }
     if(write_java== TRUE){
-        bundle_write_java(data,outputDir,outputEnc, outputFileName, sizeof(outputFileName),packageName,bundleName,status);
+        bundle_write_java(data,outputDir,outputEnc, outputFileName, sizeof(outputFileName),
+                          options[JAVA_PACKAGE].value, options[BUNDLE_NAME].value, status);
     }else if(write_xliff ==TRUE){
         bundle_write_xml(data,outputDir,outputEnc, filename, outputFileName, sizeof(outputFileName),language, xliffOutputFileName,status);
     }else{
@@ -467,6 +640,7 @@ make_res_filename(const char *filename,
 
     int32_t pkgLen = 0; /* length of package prefix */
 
+
     if (U_FAILURE(*status)) {
         return 0;
     }