icuSources/extra/uconv/uconv.cpp

   1 /*****************************************************************************
   2 *
   3 *   Copyright (C) 1999-2011, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 *
   6 ******************************************************************************/
   7
   8 /*
   9  * uconv(1): an iconv(1)-like converter using ICU.
  10  *
  11  * Original code by Jonas Utterstr&#x00F6;m <jonas.utterstrom@vittran.norrnod.se>
  12  * contributed in 1999.
  13  *
  14  * Conversion to the C conversion API and many improvements by
  15  * Yves Arrouye <yves@realnames.com>, current maintainer.
  16  *
  17  * Markus Scherer maintainer from 2003.
  18  * See source code repository history for changes.
  19  */
  20
  21 #include <unicode/utypes.h>
  22 #include <unicode/putil.h>
  23 #include <unicode/ucnv.h>
  24 #include <unicode/uenum.h>
  25 #include <unicode/unistr.h>
  26 #include <unicode/translit.h>
  27 #include <unicode/uset.h>
  28 #include <unicode/uclean.h>
  29
  30 #include <stdio.h>
  31 #include <errno.h>
  32 #include <string.h>
  33 #include <stdlib.h>
  34
  35 #include "cmemory.h"
  36 #include "cstring.h"
  37 #include "ustrfmt.h"
  38
  39 #include "unicode/uwmsg.h"
  40
  41 U_NAMESPACE_USE
  42
  43 #if U_PLATFORM_USES_ONLY_WIN32_API && !defined(__STRICT_ANSI__)
  44 #include <io.h>
  45 #include <fcntl.h>
  46 #if U_PLATFORM_USES_ONLY_WIN32_API
  47 #define USE_FILENO_BINARY_MODE 1
  48 /* Windows likes to rename Unix-like functions */
  49 #ifndef fileno
  50 #define fileno _fileno
  51 #endif
  52 #ifndef setmode
  53 #define setmode _setmode
  54 #endif
  55 #ifndef O_BINARY
  56 #define O_BINARY _O_BINARY
  57 #endif
  58 #endif
  59 #endif
  60
  61 #ifdef UCONVMSG_LINK
  62 /* below from the README */
  63 #include "unicode/utypes.h"
  64 #include "unicode/udata.h"
  65 U_CFUNC char uconvmsg_dat[];
  66 #endif
  67
  68 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
  69
  70 #define DEFAULT_BUFSZ   4096
  71 #define UCONVMSG "uconvmsg"
  72
  73 static UResourceBundle *gBundle = 0;    /* Bundle containing messages. */
  74
  75 /*
  76  * Initialize the message bundle so that message strings can be fetched
  77  * by u_wmsg().
  78  *
  79  */
  80
  81 static void initMsg(const char *pname) {
  82     static int ps = 0;
  83
  84     if (!ps) {
  85         char dataPath[2048];        /* XXX Sloppy: should be PATH_MAX. */
  86         UErrorCode err = U_ZERO_ERROR;
  87
  88         ps = 1;
  89
  90         /* Set up our static data - if any */
  91 #ifdef UCONVMSG_LINK
  92         udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err);
  93         if (U_FAILURE(err)) {
  94           fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n",
  95                   pname, u_errorName(err));
  96           err = U_ZERO_ERROR; /* It may still fail */
  97         }
  98 #endif
  99
 100         /* Get messages. */
 101         gBundle = u_wmsg_setPath(UCONVMSG, &err);
 102         if (U_FAILURE(err)) {
 103             fprintf(stderr,
 104                     "%s: warning: couldn't open bundle %s: %s\n",
 105                     pname, UCONVMSG, u_errorName(err));
 106 #ifdef UCONVMSG_LINK
 107             fprintf(stderr,
 108                     "%s: setAppData was called, internal data %s failed to load\n",
 109                         pname, UCONVMSG);
 110 #endif
 111
 112             err = U_ZERO_ERROR;
 113             /* that was try #1, try again with a path */
 114             uprv_strcpy(dataPath, u_getDataDirectory());
 115             uprv_strcat(dataPath, U_FILE_SEP_STRING);
 116             uprv_strcat(dataPath, UCONVMSG);
 117
 118             gBundle = u_wmsg_setPath(dataPath, &err);
 119             if (U_FAILURE(err)) {
 120                 fprintf(stderr,
 121                     "%s: warning: still couldn't open bundle %s: %s\n",
 122                     pname, dataPath, u_errorName(err));
 123                 fprintf(stderr, "%s: warning: messages will not be displayed\n", pname);
 124             }
 125         }
 126     }
 127 }
 128
 129 /* Mapping of callback names to the callbacks passed to the converter
 130    API. */
 131
 132 static struct callback_ent {
 133     const char *name;
 134     UConverterFromUCallback fromu;
 135     const void *fromuctxt;
 136     UConverterToUCallback tou;
 137     const void *touctxt;
 138 } transcode_callbacks[] = {
 139     { "substitute",
 140       UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
 141       UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 },
 142     { "skip",
 143       UCNV_FROM_U_CALLBACK_SKIP, 0,
 144       UCNV_TO_U_CALLBACK_SKIP, 0 },
 145     { "stop",
 146       UCNV_FROM_U_CALLBACK_STOP, 0,
 147       UCNV_TO_U_CALLBACK_STOP, 0 },
 148     { "escape",
 149       UCNV_FROM_U_CALLBACK_ESCAPE, 0,
 150       UCNV_TO_U_CALLBACK_ESCAPE, 0},
 151     { "escape-icu",
 152       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU,
 153       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU },
 154     { "escape-java",
 155       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA,
 156       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA },
 157     { "escape-c",
 158       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C,
 159       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
 160     { "escape-xml",
 161       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
 162       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
 163     { "escape-xml-hex",
 164       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
 165       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
 166     { "escape-xml-dec",
 167       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
 168       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
 169     { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE,
 170       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE }
 171 };
 172
 173 /* Return a pointer to a callback record given its name. */
 174
 175 static const struct callback_ent *findCallback(const char *name) {
 176     int i, count =
 177         sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
 178
 179     /* We'll do a linear search, there aren't many of them and bsearch()
 180        may not be that portable. */
 181
 182     for (i = 0; i < count; ++i) {
 183         if (!uprv_stricmp(name, transcode_callbacks[i].name)) {
 184             return &transcode_callbacks[i];
 185         }
 186     }
 187
 188     return 0;
 189 }
 190
 191 /* Print converter information. If lookfor is set, only that converter will
 192    be printed, otherwise all converters will be printed. If canon is non
 193    zero, tags and aliases for each converter are printed too, in the format
 194    expected for convrters.txt(5). */
 195
 196 static int printConverters(const char *pname, const char *lookfor,
 197     UBool canon)
 198 {
 199     UErrorCode err = U_ZERO_ERROR;
 200     int32_t num;
 201     uint16_t num_stds;
 202     const char **stds;
 203
 204     /* If there is a specified name, just handle that now. */
 205
 206     if (lookfor) {
 207         if (!canon) {
 208             printf("%s\n", lookfor);
 209             return 0;
 210         } else {
 211         /*  Because we are printing a canonical name, we need the
 212             true converter name. We've done that already except for
 213             the default name (because we want to print the exact
 214             name one would get when calling ucnv_getDefaultName()
 215             in non-canon mode). But since we do not know at this
 216             point if we have the default name or something else, we
 217             need to normalize again to the canonical converter
 218             name. */
 219
 220             const char *truename = ucnv_getAlias(lookfor, 0, &err);
 221             if (U_SUCCESS(err)) {
 222                 lookfor = truename;
 223             } else {
 224                 err = U_ZERO_ERROR;
 225             }
 226         }
 227     }
 228
 229     /* Print converter names. We come here for one of two reasons: we
 230        are printing all the names (lookfor was null), or we have a
 231        single converter to print but in canon mode, hence we need to
 232        get to it in order to print everything. */
 233
 234     num = ucnv_countAvailable();
 235     if (num <= 0) {
 236         initMsg(pname);
 237         u_wmsg(stderr, "cantGetNames");
 238         return -1;
 239     }
 240     if (lookfor) {
 241         num = 1;                /* We know where we want to be. */
 242     }
 243
 244     num_stds = ucnv_countStandards();
 245     stds = (const char **) uprv_malloc(num_stds * sizeof(*stds));
 246     if (!stds) {
 247         u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR));
 248         return -1;
 249     } else {
 250         uint16_t s;
 251
 252         if (canon) {
 253             printf("{ ");
 254         }
 255         for (s = 0; s < num_stds; ++s) {
 256             stds[s] = ucnv_getStandard(s, &err);
 257             if (canon) {
 258                 printf("%s ", stds[s]);
 259             }
 260             if (U_FAILURE(err)) {
 261                 u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err));
 262                 goto error_cleanup;
 263             }
 264         }
 265         if (canon) {
 266             puts("}");
 267         }
 268     }
 269
 270     for (int32_t i = 0; i < num; i++) {
 271         const char *name;
 272         uint16_t num_aliases;
 273
 274         /* Set the name either to what we are looking for, or
 275         to the current converter name. */
 276
 277         if (lookfor) {
 278             name = lookfor;
 279         } else {
 280             name = ucnv_getAvailableName(i);
 281         }
 282
 283         /* Get all the aliases associated to the name. */
 284
 285         err = U_ZERO_ERROR;
 286         num_aliases = ucnv_countAliases(name, &err);
 287         if (U_FAILURE(err)) {
 288             printf("%s", name);
 289
 290             UnicodeString str(name, "");
 291             putchar('\t');
 292             u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
 293                 u_wmsg_errorName(err));
 294             goto error_cleanup;
 295         } else {
 296             uint16_t a, s, t;
 297
 298             /* Write all the aliases and their tags. */
 299
 300             for (a = 0; a < num_aliases; ++a) {
 301                 const char *alias = ucnv_getAlias(name, a, &err);
 302
 303                 if (U_FAILURE(err)) {
 304                     UnicodeString str(name, "");
 305                     putchar('\t');
 306                     u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
 307                         u_wmsg_errorName(err));
 308                     goto error_cleanup;
 309                 }
 310
 311                 /* Print the current alias so that it looks right. */
 312                 printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") ,
 313                                  alias,
 314                                  (canon ? "" : " "));
 315
 316                 /* Look (slowly, linear searching) for a tag. */
 317
 318                 if (canon) {
 319                     /* -1 to skip the last standard */
 320                     for (s = t = 0; s < num_stds-1; ++s) {
 321                         UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err);
 322                         if (U_SUCCESS(err)) {
 323                             /* List the standard tags */
 324                             const char *standardName;
 325                             UBool isFirst = TRUE;
 326                             UErrorCode enumError = U_ZERO_ERROR;
 327                             while ((standardName = uenum_next(nameEnum, NULL, &enumError))) {
 328                                 /* See if this alias is supported by this standard. */
 329                                 if (!strcmp(standardName, alias)) {
 330                                     if (!t) {
 331                                         printf(" {");
 332                                         t = 1;
 333                                     }
 334                                     /* Print a * after the default standard name */
 335                                     printf(" %s%s", stds[s], (isFirst ? "*" : ""));
 336                                 }
 337                                 isFirst = FALSE;
 338                             }
 339                         }
 340                     }
 341                     if (t) {
 342                         printf(" }");
 343                     }
 344                 }
 345                 /* Terminate this entry. */
 346                 if (canon) {
 347                     puts("");
 348                 }
 349
 350                 /* Move on. */
 351             }
 352             /* Terminate this entry. */
 353             if (!canon) {
 354                 puts("");
 355             }
 356         }
 357     }
 358
 359     /* Free temporary data. */
 360
 361     uprv_free(stds);
 362
 363     /* Success. */
 364
 365     return 0;
 366 error_cleanup:
 367     uprv_free(stds);
 368     return -1;
 369 }
 370
 371 /* Print all available transliterators. If canon is non zero, print
 372    one transliterator per line. */
 373
 374 static int printTransliterators(UBool canon)
 375 {
 376 #if UCONFIG_NO_TRANSLITERATION
 377     printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n");
 378     return 1;
 379 #else
 380     UErrorCode status = U_ZERO_ERROR;
 381     UEnumeration *ids = utrans_openIDs(&status);
 382     int32_t i, numtrans = uenum_count(ids, &status);
 383
 384     char sepchar = canon ? '\n' : ' ';
 385
 386     for (i = 0; U_SUCCESS(status)&& (i < numtrans); ++i) {
 387         int32_t len;
 388         const char *nextTrans = uenum_next(ids, &len, &status);
 389
 390         printf("%s", nextTrans);
 391         if (i < numtrans - 1) {
 392             putchar(sepchar);
 393         }
 394     }
 395
 396     uenum_close(ids);
 397
 398     /* Add a terminating newline if needed. */
 399
 400     if (sepchar != '\n') {
 401         putchar('\n');
 402     }
 403
 404     /* Success. */
 405
 406     return 0;
 407 #endif
 408 }
 409
 410 enum {
 411     uSP = 0x20,         // space
 412     uCR = 0xd,          // carriage return
 413     uLF = 0xa,          // line feed
 414     uNL = 0x85,         // newline
 415     uLS = 0x2028,       // line separator
 416     uPS = 0x2029,       // paragraph separator
 417     uSig = 0xfeff       // signature/BOM character
 418 };
 419
 420 static inline int32_t
 421 getChunkLimit(const UnicodeString &prev, const UnicodeString &s) {
 422     // find one of
 423     // CR, LF, CRLF, NL, LS, PS
 424     // for paragraph ends (see UAX #13/Unicode 4)
 425     // and include it in the chunk
 426     // all of these characters are on the BMP
 427     // do not include FF or VT in case they are part of a paragraph
 428     // (important for bidi contexts)
 429     static const UChar paraEnds[] = {
 430         0xd, 0xa, 0x85, 0x2028, 0x2029
 431     };
 432     enum {
 433         iCR, iLF, iNL, iLS, iPS, iCount
 434     };
 435
 436     // first, see if there is a CRLF split between prev and s
 437     if (prev.endsWith(paraEnds + iCR, 1)) {
 438         if (s.startsWith(paraEnds + iLF, 1)) {
 439             return 1; // split CRLF, include the LF
 440         } else if (!s.isEmpty()) {
 441             return 0; // complete the last chunk
 442         } else {
 443             return -1; // wait for actual further contents to arrive
 444         }
 445     }
 446
 447     const UChar *u = s.getBuffer(), *limit = u + s.length();
 448     UChar c;
 449
 450     while (u < limit) {
 451         c = *u++;
 452         if (
 453             ((c < uSP) && (c == uCR || c == uLF)) ||
 454             (c == uNL) ||
 455             ((c & uLS) == uLS)
 456         ) {
 457             if (c == uCR) {
 458                 // check for CRLF
 459                 if (u == limit) {
 460                     return -1; // LF may be in the next chunk
 461                 } else if (*u == uLF) {
 462                     ++u; // include the LF in this chunk
 463                 }
 464             }
 465             return (int32_t)(u - s.getBuffer());
 466         }
 467     }
 468
 469     return -1; // continue collecting the chunk
 470 }
 471
 472 enum {
 473     CNV_NO_FEFF,    // cannot convert the U+FEFF Unicode signature character (BOM)
 474     CNV_WITH_FEFF,  // can convert the U+FEFF signature character
 475     CNV_ADDS_FEFF   // automatically adds/detects the U+FEFF signature character
 476 };
 477
 478 static inline UChar
 479 nibbleToHex(uint8_t n) {
 480     n &= 0xf;
 481     return
 482         n <= 9 ?
 483             (UChar)(0x30 + n) :
 484             (UChar)((0x61 - 10) + n);
 485 }
 486
 487 // check the converter's Unicode signature properties;
 488 // the fromUnicode side of the converter must be in its initial state
 489 // and will be reset again if it was used
 490 static int32_t
 491 cnvSigType(UConverter *cnv) {
 492     UErrorCode err;
 493     int32_t result;
 494
 495     // test if the output charset can convert U+FEFF
 496     USet *set = uset_open(1, 0);
 497     err = U_ZERO_ERROR;
 498     ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err);
 499     if (U_SUCCESS(err) && uset_contains(set, uSig)) {
 500         result = CNV_WITH_FEFF;
 501     } else {
 502         result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted
 503     }
 504     uset_close(set);
 505
 506     if (result == CNV_WITH_FEFF) {
 507         // test if the output charset emits a signature anyway
 508         const UChar a[1] = { 0x61 }; // "a"
 509         const UChar *in;
 510
 511         char buffer[20];
 512         char *out;
 513
 514         in = a;
 515         out = buffer;
 516         err = U_ZERO_ERROR;
 517         ucnv_fromUnicode(cnv,
 518             &out, buffer + sizeof(buffer),
 519             &in, a + 1,
 520             NULL, TRUE, &err);
 521         ucnv_resetFromUnicode(cnv);
 522
 523         if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) &&
 524             U_SUCCESS(err)
 525         ) {
 526             result = CNV_ADDS_FEFF;
 527         }
 528     }
 529
 530     return result;
 531 }
 532
 533 class ConvertFile {
 534 public:
 535     ConvertFile() :
 536         buf(NULL), outbuf(NULL), fromoffsets(NULL),
 537         bufsz(0), signature(0) {}
 538
 539     void
 540     setBufferSize(size_t bufferSize) {
 541         bufsz = bufferSize;
 542
 543         buf = new char[2 * bufsz];
 544         outbuf = buf + bufsz;
 545
 546         // +1 for an added U+FEFF in the intermediate Unicode buffer
 547         fromoffsets = new int32_t[bufsz + 1];
 548     }
 549
 550     ~ConvertFile() {
 551         delete [] buf;
 552         delete [] fromoffsets;
 553     }
 554
 555     UBool convertFile(const char *pname,
 556                       const char *fromcpage,
 557                       UConverterToUCallback toucallback,
 558                       const void *touctxt,
 559                       const char *tocpage,
 560                       UConverterFromUCallback fromucallback,
 561                       const void *fromuctxt,
 562                       UBool fallback,
 563                       const char *translit,
 564                       const char *infilestr,
 565                       FILE * outfile, int verbose);
 566 private:
 567     friend int main(int argc, char **argv);
 568
 569     char *buf, *outbuf;
 570     int32_t *fromoffsets;
 571
 572     size_t bufsz;
 573     int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character
 574 };
 575
 576 // Convert a file from one encoding to another
 577 UBool
 578 ConvertFile::convertFile(const char *pname,
 579                          const char *fromcpage,
 580                          UConverterToUCallback toucallback,
 581                          const void *touctxt,
 582                          const char *tocpage,
 583                          UConverterFromUCallback fromucallback,
 584                          const void *fromuctxt,
 585                          UBool fallback,
 586                          const char *translit,
 587                          const char *infilestr,
 588                          FILE * outfile, int verbose)
 589 {
 590     FILE *infile;
 591     UBool ret = TRUE;
 592     UConverter *convfrom = 0;
 593     UConverter *convto = 0;
 594     UErrorCode err = U_ZERO_ERROR;
 595     UBool flush;
 596     const char *cbufp, *prevbufp;
 597     char *bufp;
 598
 599     uint32_t infoffset = 0, outfoffset = 0;   /* Where we are in the file, for error reporting. */
 600
 601     const UChar *unibuf, *unibufbp;
 602     UChar *unibufp;
 603
 604     size_t rd, wr;
 605
 606 #if !UCONFIG_NO_TRANSLITERATION
 607     Transliterator *t = 0;      // Transliterator acting on Unicode data.
 608     UnicodeString chunk;        // One chunk of the text being collected for transformation.
 609 #endif
 610     UnicodeString u;            // String to do the transliteration.
 611     int32_t ulen;
 612
 613     // use conversion offsets for error messages
 614     // unless a transliterator is used -
 615     // a text transformation will reorder characters in unpredictable ways
 616     UBool useOffsets = TRUE;
 617
 618     // Open the correct input file or connect to stdin for reading input
 619
 620     if (infilestr != 0 && strcmp(infilestr, "-")) {
 621         infile = fopen(infilestr, "rb");
 622         if (infile == 0) {
 623             UnicodeString str1(infilestr, "");
 624             str1.append((UChar32) 0);
 625             UnicodeString str2(strerror(errno), "");
 626             str2.append((UChar32) 0);
 627             initMsg(pname);
 628             u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
 629             return FALSE;
 630         }
 631     } else {
 632         infilestr = "-";
 633         infile = stdin;
 634 #ifdef USE_FILENO_BINARY_MODE
 635         if (setmode(fileno(stdin), O_BINARY) == -1) {
 636             initMsg(pname);
 637             u_wmsg(stderr, "cantSetInBinMode");
 638             return FALSE;
 639         }
 640 #endif
 641     }
 642
 643     if (verbose) {
 644         fprintf(stderr, "%s:\n", infilestr);
 645     }
 646
 647 #if !UCONFIG_NO_TRANSLITERATION
 648     // Create transliterator as needed.
 649
 650     if (translit != NULL && *translit) {
 651         UParseError parse;
 652         UnicodeString str(translit), pestr;
 653
 654         /* Create from rules or by ID as needed. */
 655
 656         parse.line = -1;
 657
 658         if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
 659             t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err);
 660         } else {
 661             t = Transliterator::createInstance(translit, UTRANS_FORWARD, err);
 662         }
 663
 664         if (U_FAILURE(err)) {
 665             str.append((UChar32) 0);
 666             initMsg(pname);
 667
 668             if (parse.line >= 0) {
 669                 UChar linebuf[20], offsetbuf[20];
 670                 uprv_itou(linebuf, 20, parse.line, 10, 0);
 671                 uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
 672                 u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(),
 673                     u_wmsg_errorName(err), linebuf, offsetbuf);
 674             } else {
 675                 u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(),
 676                     u_wmsg_errorName(err));
 677             }
 678
 679             if (t) {
 680                 delete t;
 681                 t = 0;
 682             }
 683             goto error_exit;
 684         }
 685
 686         useOffsets = FALSE;
 687     }
 688 #endif
 689
 690     // Create codepage converter. If the codepage or its aliases weren't
 691     // available, it returns NULL and a failure code. We also set the
 692     // callbacks, and return errors in the same way.
 693
 694     convfrom = ucnv_open(fromcpage, &err);
 695     if (U_FAILURE(err)) {
 696         UnicodeString str(fromcpage, "");
 697         initMsg(pname);
 698         u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(),
 699             u_wmsg_errorName(err));
 700         goto error_exit;
 701     }
 702     ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
 703     if (U_FAILURE(err)) {
 704         initMsg(pname);
 705         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
 706         goto error_exit;
 707     }
 708
 709     convto = ucnv_open(tocpage, &err);
 710     if (U_FAILURE(err)) {
 711         UnicodeString str(tocpage, "");
 712         initMsg(pname);
 713         u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(),
 714             u_wmsg_errorName(err));
 715         goto error_exit;
 716     }
 717     ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
 718     if (U_FAILURE(err)) {
 719         initMsg(pname);
 720         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
 721         goto error_exit;
 722     }
 723     ucnv_setFallback(convto, fallback);
 724
 725     UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode;
 726     int8_t sig;
 727
 728     // OK, we can convert now.
 729     sig = signature;
 730     rd = 0;
 731
 732     do {
 733         willexit = FALSE;
 734
 735         // input file offset at the beginning of the next buffer
 736         infoffset += rd;
 737
 738         rd = fread(buf, 1, bufsz, infile);
 739         if (ferror(infile) != 0) {
 740             UnicodeString str(strerror(errno));
 741             initMsg(pname);
 742             u_wmsg(stderr, "cantRead", str.getTerminatedBuffer());
 743             goto error_exit;
 744         }
 745
 746         // Convert the read buffer into the new encoding via Unicode.
 747         // After the call 'unibufp' will be placed behind the last
 748         // character that was converted in the 'unibuf'.
 749         // Also the 'cbufp' is positioned behind the last converted
 750         // character.
 751         // At the last conversion in the file, flush should be set to
 752         // true so that we get all characters converted.
 753         //
 754         // The converter must be flushed at the end of conversion so
 755         // that characters on hold also will be written.
 756
 757         cbufp = buf;
 758         flush = (UBool)(rd != bufsz);
 759
 760         // convert until the input is consumed
 761         do {
 762             // remember the start of the current byte-to-Unicode conversion
 763             prevbufp = cbufp;
 764
 765             unibuf = unibufp = u.getBuffer((int32_t)bufsz);
 766
 767             // Use bufsz instead of u.getCapacity() for the targetLimit
 768             // so that we don't overflow fromoffsets[].
 769             ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp,
 770                 buf + rd, useOffsets ? fromoffsets : NULL, flush, &err);
 771
 772             ulen = (int32_t)(unibufp - unibuf);
 773             u.releaseBuffer(U_SUCCESS(err) ? ulen : 0);
 774
 775             // fromSawEndOfBytes indicates that ucnv_toUnicode() is done
 776             // converting all of the input bytes.
 777             // It works like this because ucnv_toUnicode() returns only under the
 778             // following conditions:
 779             // - an error occurred during conversion (an error code is set)
 780             // - the target buffer is filled (the error code indicates an overflow)
 781             // - the source is consumed
 782             // That is, if the error code does not indicate a failure,
 783             // not even an overflow, then the source must be consumed entirely.
 784             fromSawEndOfBytes = (UBool)U_SUCCESS(err);
 785
 786             if (err == U_BUFFER_OVERFLOW_ERROR) {
 787                 err = U_ZERO_ERROR;
 788             } else if (U_FAILURE(err)) {
 789                 char pos[32], errorBytes[32];
 790                 int8_t i, length, errorLength;
 791
 792                 UErrorCode localError = U_ZERO_ERROR;
 793                 errorLength = (int8_t)sizeof(errorBytes);
 794                 ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError);
 795                 if (U_FAILURE(localError) || errorLength == 0) {
 796                     errorLength = 1;
 797                 }
 798
 799                 // print the input file offset of the start of the error bytes:
 800                 // input file offset of the current byte buffer +
 801                 // length of the just consumed bytes -
 802                 // length of the error bytes
 803                 length =
 804                     (int8_t)sprintf(pos, "%d",
 805                         (int)(infoffset + (cbufp - buf) - errorLength));
 806
 807                 // output the bytes that caused the error
 808                 UnicodeString str;
 809                 for (i = 0; i < errorLength; ++i) {
 810                     if (i > 0) {
 811                         str.append((UChar)uSP);
 812                     }
 813                     str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4));
 814                     str.append(nibbleToHex((uint8_t)errorBytes[i]));
 815                 }
 816
 817                 initMsg(pname);
 818                 u_wmsg(stderr, "problemCvtToU",
 819                         UnicodeString(pos, length, "").getTerminatedBuffer(),
 820                         str.getTerminatedBuffer(),
 821                         u_wmsg_errorName(err));
 822
 823                 willexit = TRUE;
 824                 err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
 825             }
 826
 827             // Replaced a check for whether the input was consumed by
 828             // looping until it is; message key "premEndInput" now obsolete.
 829
 830             if (ulen == 0) {
 831                 continue;
 832             }
 833
 834             // remove a U+FEFF Unicode signature character if requested
 835             if (sig < 0) {
 836                 if (u.charAt(0) == uSig) {
 837                     u.remove(0, 1);
 838
 839                     // account for the removed UChar and offset
 840                     --ulen;
 841
 842                     if (useOffsets) {
 843                         // remove an offset from fromoffsets[] as well
 844                         // to keep the array parallel with the UChars
 845                         memmove(fromoffsets, fromoffsets + 1, ulen * 4);
 846                     }
 847
 848                 }
 849                 sig = 0;
 850             }
 851
 852 #if !UCONFIG_NO_TRANSLITERATION
 853             // Transliterate/transform if needed.
 854
 855             // For transformation, we use chunking code -
 856             // collect Unicode input until, for example, an end-of-line,
 857             // then transform and output-convert that and continue collecting.
 858             // This makes the transformation result independent of the buffer size
 859             // while avoiding the slower keyboard mode.
 860             // The end-of-chunk characters are completely included in the
 861             // transformed string in case they are to be transformed themselves.
 862             if (t != NULL) {
 863                 UnicodeString out;
 864                 int32_t chunkLimit;
 865
 866                 do {
 867                     chunkLimit = getChunkLimit(chunk, u);
 868                     if (chunkLimit < 0 && flush && fromSawEndOfBytes) {
 869                         // use all of the rest at the end of the text
 870                         chunkLimit = u.length();
 871                     }
 872                     if (chunkLimit >= 0) {
 873                         // complete the chunk and transform it
 874                         chunk.append(u, 0, chunkLimit);
 875                         u.remove(0, chunkLimit);
 876                         t->transliterate(chunk);
 877
 878                         // append the transformation result to the result and empty the chunk
 879                         out.append(chunk);
 880                         chunk.remove();
 881                     } else {
 882                         // continue collecting the chunk
 883                         chunk.append(u);
 884                         break;
 885                     }
 886                 } while (!u.isEmpty());
 887
 888                 u = out;
 889                 ulen = u.length();
 890             }
 891 #endif
 892
 893             // add a U+FEFF Unicode signature character if requested
 894             // and possible/necessary
 895             if (sig > 0) {
 896                 if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) {
 897                     u.insert(0, (UChar)uSig);
 898
 899                     if (useOffsets) {
 900                         // insert a pseudo-offset into fromoffsets[] as well
 901                         // to keep the array parallel with the UChars
 902                         memmove(fromoffsets + 1, fromoffsets, ulen * 4);
 903                         fromoffsets[0] = -1;
 904                     }
 905
 906                     // account for the additional UChar and offset
 907                     ++ulen;
 908                 }
 909                 sig = 0;
 910             }
 911
 912             // Convert the Unicode buffer into the destination codepage
 913             // Again 'bufp' will be placed behind the last converted character
 914             // And 'unibufp' will be placed behind the last converted unicode character
 915             // At the last conversion flush should be set to true to ensure that
 916             // all characters left get converted
 917
 918             unibuf = unibufbp = u.getBuffer();
 919
 920             do {
 921                 bufp = outbuf;
 922
 923                 // Use fromSawEndOfBytes in addition to the flush flag -
 924                 // it indicates whether the intermediate Unicode string
 925                 // contains the very last UChars for the very last input bytes.
 926                 ucnv_fromUnicode(convto, &bufp, outbuf + bufsz,
 927                                  &unibufbp,
 928                                  unibuf + ulen,
 929                                  NULL, (UBool)(flush && fromSawEndOfBytes), &err);
 930
 931                 // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done
 932                 // converting all of the intermediate UChars.
 933                 // See comment for fromSawEndOfBytes.
 934                 toSawEndOfUnicode = (UBool)U_SUCCESS(err);
 935
 936                 if (err == U_BUFFER_OVERFLOW_ERROR) {
 937                     err = U_ZERO_ERROR;
 938                 } else if (U_FAILURE(err)) {
 939                     UChar errorUChars[4];
 940                     const char *errtag;
 941                     char pos[32];
 942                     UChar32 c;
 943                     int8_t i, length, errorLength;
 944
 945                     UErrorCode localError = U_ZERO_ERROR;
 946                     errorLength = (int8_t)LENGTHOF(errorUChars);
 947                     ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError);
 948                     if (U_FAILURE(localError) || errorLength == 0) {
 949                         // need at least 1 so that we don't access beyond the length of fromoffsets[]
 950                         errorLength = 1;
 951                     }
 952
 953                     int32_t ferroffset;
 954
 955                     if (useOffsets) {
 956                         // Unicode buffer offset of the start of the error UChars
 957                         ferroffset = (int32_t)((unibufbp - unibuf) - errorLength);
 958                         if (ferroffset < 0) {
 959                             // approximation - the character started in the previous Unicode buffer
 960                             ferroffset = 0;
 961                         }
 962
 963                         // get the corresponding byte offset out of fromoffsets[]
 964                         // go back if the offset is not known for some of the UChars
 965                         int32_t fromoffset;
 966                         do {
 967                             fromoffset = fromoffsets[ferroffset];
 968                         } while (fromoffset < 0 && --ferroffset >= 0);
 969
 970                         // total input file offset =
 971                         // input file offset of the current byte buffer +
 972                         // byte buffer offset of where the current Unicode buffer is converted from +
 973                         // fromoffsets[Unicode offset]
 974                         ferroffset = infoffset + (prevbufp - buf) + fromoffset;
 975                         errtag = "problemCvtFromU";
 976                     } else {
 977                         // Do not use fromoffsets if (t != NULL) because the Unicode text may
 978                         // be different from what the offsets refer to.
 979
 980                         // output file offset
 981                         ferroffset = (int32_t)(outfoffset + (bufp - outbuf));
 982                         errtag = "problemCvtFromUOut";
 983                     }
 984
 985                     length = (int8_t)sprintf(pos, "%u", (int)ferroffset);
 986
 987                     // output the code points that caused the error
 988                     UnicodeString str;
 989                     for (i = 0; i < errorLength;) {
 990                         if (i > 0) {
 991                             str.append((UChar)uSP);
 992                         }
 993                         U16_NEXT(errorUChars, i, errorLength, c);
 994                         if (c >= 0x100000) {
 995                             str.append(nibbleToHex((uint8_t)(c >> 20)));
 996                         }
 997                         if (c >= 0x10000) {
 998                             str.append(nibbleToHex((uint8_t)(c >> 16)));
 999                         }
1000                         str.append(nibbleToHex((uint8_t)(c >> 12)));
1001                         str.append(nibbleToHex((uint8_t)(c >> 8)));
1002                         str.append(nibbleToHex((uint8_t)(c >> 4)));
1003                         str.append(nibbleToHex((uint8_t)c));
1004                     }
1005
1006                     initMsg(pname);
1007                     u_wmsg(stderr, errtag,
1008                             UnicodeString(pos, length, "").getTerminatedBuffer(),
1009                             str.getTerminatedBuffer(),
1010                            u_wmsg_errorName(err));
1011                     u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer());
1012
1013                     willexit = TRUE;
1014                     err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
1015                 }
1016
1017                 // Replaced a check for whether the intermediate Unicode characters were all consumed by
1018                 // looping until they are; message key "premEnd" now obsolete.
1019
1020                 // Finally, write the converted buffer to the output file
1021                 size_t outlen = (size_t) (bufp - outbuf);
1022                 outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile));
1023                 if (wr != outlen) {
1024                     UnicodeString str(strerror(errno));
1025                     initMsg(pname);
1026                     u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer());
1027                     willexit = TRUE;
1028                 }
1029
1030                 if (willexit) {
1031                     goto error_exit;
1032                 }
1033             } while (!toSawEndOfUnicode);
1034         } while (!fromSawEndOfBytes);
1035     } while (!flush);           // Stop when we have flushed the
1036                                 // converters (this means that it's
1037                                 // the end of output)
1038
1039     goto normal_exit;
1040
1041 error_exit:
1042     ret = FALSE;
1043
1044 normal_exit:
1045     // Cleanup.
1046
1047     ucnv_close(convfrom);
1048     ucnv_close(convto);
1049
1050 #if !UCONFIG_NO_TRANSLITERATION
1051     delete t;
1052 #endif
1053
1054     if (infile != stdin) {
1055         fclose(infile);
1056     }
1057
1058     return ret;
1059 }
1060
1061 static void usage(const char *pname, int ecode) {
1062     const UChar *msg;
1063     int32_t msgLen;
1064     UErrorCode err = U_ZERO_ERROR;
1065     FILE *fp = ecode ? stderr : stdout;
1066     int res;
1067
1068     initMsg(pname);
1069     msg =
1070         ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord",
1071                             &msgLen, &err);
1072     UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1));
1073     UnicodeString mname(msg, msgLen + 1);
1074
1075     res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer());
1076     if (!ecode) {
1077         if (!res) {
1078             fputc('\n', fp);
1079         }
1080         if (!u_wmsg(fp, "help")) {
1081             /* Now dump callbacks and finish. */
1082
1083             int i, count =
1084                 sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
1085             for (i = 0; i < count; ++i) {
1086                 fprintf(fp, " %s", transcode_callbacks[i].name);
1087             }
1088             fputc('\n', fp);
1089         }
1090     }
1091
1092     exit(ecode);
1093 }
1094
1095 extern int
1096 main(int argc, char **argv)
1097 {
1098     FILE *outfile;
1099     int ret = 0;
1100
1101     size_t bufsz = DEFAULT_BUFSZ;
1102
1103     const char *fromcpage = 0;
1104     const char *tocpage = 0;
1105     const char *translit = 0;
1106     const char *outfilestr = 0;
1107     UBool fallback = FALSE;
1108
1109     UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
1110     const void *fromuctxt = 0;
1111     UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
1112     const void *touctxt = 0;
1113
1114     char **iter, **remainArgv, **remainArgvLimit;
1115     char **end = argv + argc;
1116
1117     const char *pname;
1118
1119     UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE;
1120     const char *printName = 0;
1121
1122     UBool verbose = FALSE;
1123     UErrorCode status = U_ZERO_ERROR;
1124
1125     ConvertFile cf;
1126
1127     /* Initialize ICU */
1128     u_init(&status);
1129     if (U_FAILURE(status)) {
1130         fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
1131             argv[0], u_errorName(status));
1132         exit(1);
1133     }
1134
1135     // Get and prettify pname.
1136     pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
1137 #if U_PLATFORM_USES_ONLY_WIN32_API
1138     if (!pname) {
1139         pname = uprv_strrchr(*argv, '/');
1140     }
1141 #endif
1142     if (!pname) {
1143         pname = *argv;
1144     } else {
1145         ++pname;
1146     }
1147
1148     // First, get the arguments from command-line
1149     // to know the codepages to convert between
1150
1151     remainArgv = remainArgvLimit = argv + 1;
1152     for (iter = argv + 1; iter != end; iter++) {
1153         // Check for from charset
1154         if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
1155             iter++;
1156             if (iter != end)
1157                 fromcpage = *iter;
1158             else
1159                 usage(pname, 1);
1160         } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
1161             iter++;
1162             if (iter != end)
1163                 tocpage = *iter;
1164             else
1165                 usage(pname, 1);
1166         } else if (strcmp("-x", *iter) == 0) {
1167             iter++;
1168             if (iter != end)
1169                 translit = *iter;
1170             else
1171                 usage(pname, 1);
1172         } else if (!strcmp("--fallback", *iter)) {
1173             fallback = TRUE;
1174         } else if (!strcmp("--no-fallback", *iter)) {
1175             fallback = FALSE;
1176         } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
1177             iter++;
1178             if (iter != end) {
1179                 bufsz = atoi(*iter);
1180                 if ((int) bufsz <= 0) {
1181                     initMsg(pname);
1182                     UnicodeString str(*iter);
1183                     initMsg(pname);
1184                     u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer());
1185                     return 3;
1186                 }
1187             } else {
1188                 usage(pname, 1);
1189             }
1190         } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
1191             if (printTranslits) {
1192                 usage(pname, 1);
1193             }
1194             printConvs = TRUE;
1195         } else if (strcmp("--default-code", *iter) == 0) {
1196             if (printTranslits) {
1197                 usage(pname, 1);
1198             }
1199             printName = ucnv_getDefaultName();
1200         } else if (strcmp("--list-code", *iter) == 0) {
1201             if (printTranslits) {
1202                 usage(pname, 1);
1203             }
1204
1205             iter++;
1206             if (iter != end) {
1207                 UErrorCode e = U_ZERO_ERROR;
1208                 printName = ucnv_getAlias(*iter, 0, &e);
1209                 if (U_FAILURE(e) || !printName) {
1210                     UnicodeString str(*iter);
1211                     initMsg(pname);
1212                     u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer());
1213                     return 2;
1214                 }
1215             } else
1216                 usage(pname, 1);
1217         } else if (strcmp("--canon", *iter) == 0) {
1218             printCanon = TRUE;
1219         } else if (strcmp("-L", *iter) == 0
1220             || !strcmp("--list-transliterators", *iter)) {
1221             if (printConvs) {
1222                 usage(pname, 1);
1223             }
1224             printTranslits = TRUE;
1225         } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
1226             || !strcmp("--help", *iter)) {
1227             usage(pname, 0);
1228         } else if (!strcmp("-c", *iter)) {
1229             fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
1230         } else if (!strcmp("--to-callback", *iter)) {
1231             iter++;
1232             if (iter != end) {
1233                 const struct callback_ent *cbe = findCallback(*iter);
1234                 if (cbe) {
1235                     fromucallback = cbe->fromu;
1236                     fromuctxt = cbe->fromuctxt;
1237                 } else {
1238                     UnicodeString str(*iter);
1239                     initMsg(pname);
1240                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1241                     return 4;
1242                 }
1243             } else {
1244                 usage(pname, 1);
1245             }
1246         } else if (!strcmp("--from-callback", *iter)) {
1247             iter++;
1248             if (iter != end) {
1249                 const struct callback_ent *cbe = findCallback(*iter);
1250                 if (cbe) {
1251                     toucallback = cbe->tou;
1252                     touctxt = cbe->touctxt;
1253                 } else {
1254                     UnicodeString str(*iter);
1255                     initMsg(pname);
1256                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1257                     return 4;
1258                 }
1259             } else {
1260                 usage(pname, 1);
1261             }
1262         } else if (!strcmp("-i", *iter)) {
1263             toucallback = UCNV_TO_U_CALLBACK_SKIP;
1264         } else if (!strcmp("--callback", *iter)) {
1265             iter++;
1266             if (iter != end) {
1267                 const struct callback_ent *cbe = findCallback(*iter);
1268                 if (cbe) {
1269                     fromucallback = cbe->fromu;
1270                     fromuctxt = cbe->fromuctxt;
1271                     toucallback = cbe->tou;
1272                     touctxt = cbe->touctxt;
1273                 } else {
1274                     UnicodeString str(*iter);
1275                     initMsg(pname);
1276                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1277                     return 4;
1278                 }
1279             } else {
1280                 usage(pname, 1);
1281             }
1282         } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
1283             verbose = FALSE;
1284         } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
1285             verbose = TRUE;
1286         } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
1287             printf("%s v2.1  ICU " U_ICU_VERSION "\n", pname);
1288             return 0;
1289         } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
1290             ++iter;
1291             if (iter != end && !outfilestr) {
1292                 outfilestr = *iter;
1293             } else {
1294                 usage(pname, 1);
1295             }
1296         } else if (0 == strcmp("--add-signature", *iter)) {
1297             cf.signature = 1;
1298         } else if (0 == strcmp("--remove-signature", *iter)) {
1299             cf.signature = -1;
1300         } else if (**iter == '-' && (*iter)[1]) {
1301             usage(pname, 1);
1302         } else {
1303             // move a non-option up in argv[]
1304             *remainArgvLimit++ = *iter;
1305         }
1306     }
1307
1308     if (printConvs || printName) {
1309         return printConverters(pname, printName, printCanon) ? 2 : 0;
1310     } else if (printTranslits) {
1311         return printTransliterators(printCanon) ? 3 : 0;
1312     }
1313
1314     if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
1315         fromcpage = ucnv_getDefaultName();
1316     }
1317     if (!tocpage || !uprv_strcmp(tocpage, "-")) {
1318         tocpage = ucnv_getDefaultName();
1319     }
1320
1321     // Open the correct output file or connect to stdout for reading input
1322     if (outfilestr != 0 && strcmp(outfilestr, "-")) {
1323         outfile = fopen(outfilestr, "wb");
1324         if (outfile == 0) {
1325             UnicodeString str1(outfilestr, "");
1326             UnicodeString str2(strerror(errno), "");
1327             initMsg(pname);
1328             u_wmsg(stderr, "cantCreateOutputF",
1329                 str1.getBuffer(), str2.getBuffer());
1330             return 1;
1331         }
1332     } else {
1333         outfilestr = "-";
1334         outfile = stdout;
1335 #ifdef USE_FILENO_BINARY_MODE
1336         if (setmode(fileno(outfile), O_BINARY) == -1) {
1337             u_wmsg(stderr, "cantSetOutBinMode");
1338             exit(-1);
1339         }
1340 #endif
1341     }
1342
1343     /* Loop again on the arguments to find all the input files, and
1344     convert them. */
1345
1346     cf.setBufferSize(bufsz);
1347
1348     if(remainArgv < remainArgvLimit) {
1349         for (iter = remainArgv; iter != remainArgvLimit; iter++) {
1350             if (!cf.convertFile(
1351                     pname, fromcpage, toucallback, touctxt, tocpage,
1352                     fromucallback, fromuctxt, fallback, translit, *iter,
1353                     outfile, verbose)
1354             ) {
1355                 goto error_exit;
1356             }
1357         }
1358     } else {
1359         if (!cf.convertFile(
1360                 pname, fromcpage, toucallback, touctxt, tocpage,
1361                 fromucallback, fromuctxt, fallback, translit, 0,
1362                 outfile, verbose)
1363         ) {
1364             goto error_exit;
1365         }
1366     }
1367
1368     goto normal_exit;
1369 error_exit:
1370 #if !UCONFIG_NO_LEGACY_CONVERSION
1371     ret = 1;
1372 #else
1373     fprintf(stderr, "uconv error: UCONFIG_NO_LEGACY_CONVERSION is on. See uconfig.h\n");
1374 #endif
1375 normal_exit:
1376
1377     if (outfile != stdout) {
1378         fclose(outfile);
1379     }
1380
1381     return ret;
1382 }
1383
1384
1385 /*
1386  * Hey, Emacs, please set the following:
1387  *
1388  * Local Variables:
1389  * indent-tabs-mode: nil
1390  * End:
1391  *
1392  */