icuSources/extra/uconv/uconv.cpp

   1 /*****************************************************************************
   2 *
   3 *   Copyright (C) 1999-2004, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 *
   6 ******************************************************************************/
   7
   8 /*
   9  * uconv(1): an iconv(1)-like converter using ICU.
  10  *
  11  * Original code by Jonas Utterström <jonas.utterstrom@vittran.norrnod.se>
  12  * contributed in 1999.
  13  *
  14  * Conversion to the C conversion API and many improvements by
  15  * Yves Arrouye <yves@realnames.com>, current maintainer.
  16  *
  17  * Markus Scherer maintainer from 2003.
  18  * See source code repository history for changes.
  19  */
  20
  21 #include <unicode/utypes.h>
  22 #include <unicode/putil.h>
  23 #include <unicode/ucnv.h>
  24 #include <unicode/uenum.h>
  25 #include <unicode/unistr.h>
  26 #include <unicode/translit.h>
  27 #include <unicode/uset.h>
  28 #include <unicode/uclean.h>
  29
  30 #include <stdio.h>
  31 #include <errno.h>
  32 #include <string.h>
  33 #include <stdlib.h>
  34
  35 #include "cmemory.h"
  36 #include "cstring.h"
  37 #include "ustrfmt.h"
  38
  39 #include "unicode/uwmsg.h"
  40
  41 #if (defined(WIN32) || defined(U_CYGWIN)) && !defined(__STRICT_ANSI__)
  42 #include <io.h>
  43 #include <fcntl.h>
  44 #define USE_FILENO_BINARY_MODE 1
  45 #endif
  46
  47 #ifdef UCONVMSG_LINK
  48 /* below from the README */
  49 #include "unicode/utypes.h"
  50 #include "unicode/udata.h"
  51 U_CFUNC char uconvmsg_dat[];
  52 #endif
  53
  54 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
  55
  56 #define DEFAULT_BUFSZ   4096
  57 #define UCONVMSG "uconvmsg"
  58
  59 static UResourceBundle *gBundle = 0;    /* Bundle containing messages. */
  60
  61 /*
  62  * Initialize the message bundle so that message strings can be fetched
  63  * by u_wmsg().
  64  *
  65  */
  66
  67 static void initMsg(const char *pname) {
  68     static int ps = 0;
  69
  70     if (!ps) {
  71         char dataPath[2048];        /* XXX Sloppy: should be PATH_MAX. */
  72         UErrorCode err = U_ZERO_ERROR;
  73
  74         ps = 1;
  75
  76         /* Set up our static data - if any */
  77 #ifdef UCONVMSG_LINK
  78         udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err);
  79         if (U_FAILURE(err)) {
  80           fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n",
  81                   pname, u_errorName(err));
  82           err = U_ZERO_ERROR; /* It may still fail */
  83         }
  84 #endif
  85
  86         /* Get messages. */
  87         gBundle = u_wmsg_setPath(UCONVMSG, &err);
  88         if (U_FAILURE(err)) {
  89             fprintf(stderr,
  90                     "%s: warning: couldn't open bundle %s: %s\n",
  91                     pname, UCONVMSG, u_errorName(err));
  92 #ifdef UCONVMSG_LINK
  93             fprintf(stderr,
  94                     "%s: setAppData was called, internal data %s failed to load\n",
  95                         pname, UCONVMSG);
  96 #endif
  97
  98             err = U_ZERO_ERROR;
  99             /* that was try #1, try again with a path */
 100             uprv_strcpy(dataPath, u_getDataDirectory());
 101             uprv_strcat(dataPath, U_FILE_SEP_STRING);
 102             uprv_strcat(dataPath, UCONVMSG);
 103
 104             gBundle = u_wmsg_setPath(dataPath, &err);
 105             if (U_FAILURE(err)) {
 106                 fprintf(stderr,
 107                     "%s: warning: still couldn't open bundle %s: %s\n",
 108                     pname, dataPath, u_errorName(err));
 109                 fprintf(stderr, "%s: warning: messages will not be displayed\n", pname);
 110             }
 111         }
 112     }
 113 }
 114
 115 /* Mapping of callback names to the callbacks passed to the converter
 116    API. */
 117
 118 static struct callback_ent {
 119     const char *name;
 120     UConverterFromUCallback fromu;
 121     const void *fromuctxt;
 122     UConverterToUCallback tou;
 123     const void *touctxt;
 124 } transcode_callbacks[] = {
 125     { "substitute",
 126       UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
 127       UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 },
 128     { "skip",
 129       UCNV_FROM_U_CALLBACK_SKIP, 0,
 130       UCNV_TO_U_CALLBACK_SKIP, 0 },
 131     { "stop",
 132       UCNV_FROM_U_CALLBACK_STOP, 0,
 133       UCNV_TO_U_CALLBACK_STOP, 0 },
 134     { "escape",
 135       UCNV_FROM_U_CALLBACK_ESCAPE, 0,
 136       UCNV_TO_U_CALLBACK_ESCAPE, 0},
 137     { "escape-icu",
 138       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU,
 139       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU },
 140     { "escape-java",
 141       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA,
 142       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA },
 143     { "escape-c",
 144       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C,
 145       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
 146     { "escape-xml",
 147       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
 148       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
 149     { "escape-xml-hex",
 150       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
 151       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
 152     { "escape-xml-dec",
 153       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
 154       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
 155     { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE,
 156       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE }
 157 };
 158
 159 /* Return a pointer to a callback record given its name. */
 160
 161 static const struct callback_ent *findCallback(const char *name) {
 162     int i, count =
 163         sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
 164
 165     /* We'll do a linear search, there aren't many of them and bsearch()
 166        may not be that portable. */
 167
 168     for (i = 0; i < count; ++i) {
 169         if (!uprv_stricmp(name, transcode_callbacks[i].name)) {
 170             return &transcode_callbacks[i];
 171         }
 172     }
 173
 174     return 0;
 175 }
 176
 177 /* Print converter information. If lookfor is set, only that converter will
 178    be printed, otherwise all converters will be printed. If canon is non
 179    zero, tags and aliases for each converter are printed too, in the format
 180    expected for convrters.txt(5). */
 181
 182 static int printConverters(const char *pname, const char *lookfor,
 183     UBool canon)
 184 {
 185     UErrorCode err = U_ZERO_ERROR;
 186     int32_t num;
 187     uint16_t num_stds;
 188     const char **stds;
 189
 190     /* If there is a specified name, just handle that now. */
 191
 192     if (lookfor) {
 193         if (!canon) {
 194             printf("%s\n", lookfor);
 195             return 0;
 196         } else {
 197         /*  Because we are printing a canonical name, we need the
 198             true converter name. We've done that already except for
 199             the default name (because we want to print the exact
 200             name one would get when calling ucnv_getDefaultName()
 201             in non-canon mode). But since we do not know at this
 202             point if we have the default name or something else, we
 203             need to normalize again to the canonical converter
 204             name. */
 205
 206             const char *truename = ucnv_getAlias(lookfor, 0, &err);
 207             if (U_SUCCESS(err)) {
 208                 lookfor = truename;
 209             } else {
 210                 err = U_ZERO_ERROR;
 211             }
 212         }
 213     }
 214
 215     /* Print converter names. We come here for one of two reasons: we
 216        are printing all the names (lookfor was null), or we have a
 217        single converter to print but in canon mode, hence we need to
 218        get to it in order to print everything. */
 219
 220     num = ucnv_countAvailable();
 221     if (num <= 0) {
 222         initMsg(pname);
 223         u_wmsg(stderr, "cantGetNames");
 224         return -1;
 225     }
 226     if (lookfor) {
 227         num = 1;                /* We know where we want to be. */
 228     }
 229
 230     num_stds = ucnv_countStandards();
 231     stds = (const char **) uprv_malloc(num_stds * sizeof(*stds));
 232     if (!stds) {
 233         u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR));
 234         return -1;
 235     } else {
 236         uint16_t s;
 237
 238         if (canon) {
 239             printf("{ ");
 240         }
 241         for (s = 0; s < num_stds; ++s) {
 242             stds[s] = ucnv_getStandard(s, &err);
 243             if (canon) {
 244                 printf("%s ", stds[s]);
 245             }
 246             if (U_FAILURE(err)) {
 247                 u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err));
 248                 return -1;
 249             }
 250         }
 251         if (canon) {
 252             puts("}");
 253         }
 254     }
 255
 256     for (int32_t i = 0; i < num; i++) {
 257         const char *name;
 258         uint16_t num_aliases;
 259
 260         /* Set the name either to what we are looking for, or
 261         to the current converter name. */
 262
 263         if (lookfor) {
 264             name = lookfor;
 265         } else {
 266             name = ucnv_getAvailableName(i);
 267         }
 268
 269         /* Get all the aliases associated to the name. */
 270
 271         err = U_ZERO_ERROR;
 272         num_aliases = ucnv_countAliases(name, &err);
 273         if (U_FAILURE(err)) {
 274             printf("%s", name);
 275
 276             UnicodeString str(name, "");
 277             putchar('\t');
 278             u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
 279                 u_wmsg_errorName(err));
 280             return -1;
 281         } else {
 282             uint16_t a, s, t;
 283
 284             /* Write all the aliases and their tags. */
 285
 286             for (a = 0; a < num_aliases; ++a) {
 287                 const char *alias = ucnv_getAlias(name, a, &err);
 288
 289                 if (U_FAILURE(err)) {
 290                     UnicodeString str(name, "");
 291                     putchar('\t');
 292                     u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
 293                         u_wmsg_errorName(err));
 294                     return -1;
 295                 }
 296
 297                 /* Print the current alias so that it looks right. */
 298                 printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") ,
 299                                  alias,
 300                                  (canon ? "" : " "));
 301
 302                 /* Look (slowly, linear searching) for a tag. */
 303
 304                 if (canon) {
 305                     /* -1 to skip the last standard */
 306                     for (s = t = 0; s < num_stds-1; ++s) {
 307                         UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err);
 308                         if (U_SUCCESS(err)) {
 309                             /* List the standard tags */
 310                             const char *standardName;
 311                             UBool isFirst = TRUE;
 312                             UErrorCode enumError = U_ZERO_ERROR;
 313                             while ((standardName = uenum_next(nameEnum, NULL, &enumError))) {
 314                                 /* See if this alias is supported by this standard. */
 315                                 if (!strcmp(standardName, alias)) {
 316                                     if (!t) {
 317                                         printf(" {");
 318                                         t = 1;
 319                                     }
 320                                     /* Print a * after the default standard name */
 321                                     printf(" %s%s", stds[s], (isFirst ? "*" : ""));
 322                                 }
 323                                 isFirst = FALSE;
 324                             }
 325                         }
 326                     }
 327                     if (t) {
 328                         printf(" }");
 329                     }
 330                 }
 331                 /* Terminate this entry. */
 332                 if (canon) {
 333                     puts("");
 334                 }
 335
 336                 /* Move on. */
 337             }
 338             /* Terminate this entry. */
 339             if (!canon) {
 340                 puts("");
 341             }
 342         }
 343     }
 344
 345     /* Free temporary data. */
 346
 347     uprv_free(stds);
 348
 349     /* Success. */
 350
 351     return 0;
 352 }
 353
 354 /* Print all available transliterators. If canon is non zero, print
 355    one transliterator per line. */
 356
 357 static int printTransliterators(UBool canon)
 358 {
 359 #if UCONFIG_NO_TRANSLITERATION
 360     printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n");
 361     return 1;
 362 #else
 363     int32_t numtrans = utrans_countAvailableIDs(), i;
 364     int buflen = 512;
 365     char *buf = (char *) uprv_malloc(buflen);
 366     char staticbuf[512];
 367
 368     char sepchar = canon ? '\n' : ' ';
 369
 370     if (!buf) {
 371         buf = staticbuf;
 372         buflen = sizeof(staticbuf);
 373     }
 374
 375     for (i = 0; i < numtrans; ++i) {
 376         int32_t len = utrans_getAvailableID(i, buf, buflen);
 377         if (len >= buflen - 1) {
 378             if (buf != staticbuf) {
 379                 buflen <<= 1;
 380                 if (buflen < len) {
 381                     buflen = len + 64;
 382                 }
 383                 buf = (char *) uprv_realloc(buf, buflen);
 384                 if (!buf) {
 385                     buf = staticbuf;
 386                     buflen = sizeof(staticbuf);
 387                 }
 388             }
 389             utrans_getAvailableID(i, buf, buflen);
 390             if (len >= buflen) {
 391                 uprv_strcpy(buf + buflen - 4, "..."); /* Truncate the name. */
 392             }
 393         }
 394
 395         printf("%s", buf);
 396         if (i < numtrans - 1) {
 397             putchar(sepchar);
 398         }
 399     }
 400
 401     /* Add a terminating newline if needed. */
 402
 403     if (sepchar != '\n') {
 404         putchar('\n');
 405     }
 406
 407     /* Free temporary data. */
 408
 409     if (buf != staticbuf) {
 410         uprv_free(buf);
 411     }
 412
 413     /* Success. */
 414
 415     return 0;
 416 #endif
 417 }
 418
 419 enum {
 420     uSP = 0x20,         // space
 421     uCR = 0xd,          // carriage return
 422     uLF = 0xa,          // line feed
 423     uNL = 0x85,         // newline
 424     uLS = 0x2028,       // line separator
 425     uPS = 0x2029,       // paragraph separator
 426     uSig = 0xfeff       // signature/BOM character
 427 };
 428
 429 static inline int32_t
 430 getChunkLimit(const UnicodeString &prev, const UnicodeString &s) {
 431     // find one of
 432     // CR, LF, CRLF, NL, LS, PS
 433     // for paragraph ends (see UAX #13/Unicode 4)
 434     // and include it in the chunk
 435     // all of these characters are on the BMP
 436     // do not include FF or VT in case they are part of a paragraph
 437     // (important for bidi contexts)
 438     static const UChar paraEnds[] = {
 439         0xd, 0xa, 0x85, 0x2028, 0x2029
 440     };
 441     enum {
 442         iCR, iLF, iNL, iLS, iPS, iCount
 443     };
 444
 445     // first, see if there is a CRLF split between prev and s
 446     if (prev.endsWith(paraEnds + iCR, 1)) {
 447         if (s.startsWith(paraEnds + iLF, 1)) {
 448             return 1; // split CRLF, include the LF
 449         } else if (!s.isEmpty()) {
 450             return 0; // complete the last chunk
 451         } else {
 452             return -1; // wait for actual further contents to arrive
 453         }
 454     }
 455
 456     const UChar *u = s.getBuffer(), *limit = u + s.length();
 457     UChar c;
 458
 459     while (u < limit) {
 460         c = *u++;
 461         if (
 462             ((c < uSP) && (c == uCR || c == uLF)) ||
 463             (c == uNL) ||
 464             ((c & uLS) == uLS)
 465         ) {
 466             if (c == uCR) {
 467                 // check for CRLF
 468                 if (u == limit) {
 469                     return -1; // LF may be in the next chunk
 470                 } else if (*u == uLF) {
 471                     ++u; // include the LF in this chunk
 472                 }
 473             }
 474             return (int32_t)(u - s.getBuffer());
 475         }
 476     }
 477
 478     return -1; // continue collecting the chunk
 479 }
 480
 481 enum {
 482     CNV_NO_FEFF,    // cannot convert the U+FEFF Unicode signature character (BOM)
 483     CNV_WITH_FEFF,  // can convert the U+FEFF signature character
 484     CNV_ADDS_FEFF   // automatically adds/detects the U+FEFF signature character
 485 };
 486
 487 static inline UChar
 488 nibbleToHex(uint8_t n) {
 489     n &= 0xf;
 490     return
 491         n <= 9 ?
 492             (UChar)(0x30 + n) :
 493             (UChar)((0x61 - 10) + n);
 494 }
 495
 496 // check the converter's Unicode signature properties;
 497 // the fromUnicode side of the converter must be in its initial state
 498 // and will be reset again if it was used
 499 static int32_t
 500 cnvSigType(UConverter *cnv) {
 501     UErrorCode err;
 502     int32_t result;
 503
 504     // test if the output charset can convert U+FEFF
 505     USet *set = uset_open(1, 0);
 506     err = U_ZERO_ERROR;
 507     ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err);
 508     if (U_SUCCESS(err) && uset_contains(set, uSig)) {
 509         result = CNV_WITH_FEFF;
 510     } else {
 511         result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted
 512     }
 513     uset_close(set);
 514
 515     if (result == CNV_WITH_FEFF) {
 516         // test if the output charset emits a signature anyway
 517         const UChar a[1] = { 0x61 }; // "a"
 518         const UChar *in;
 519
 520         char buffer[20];
 521         char *out;
 522
 523         in = a;
 524         out = buffer;
 525         err = U_ZERO_ERROR;
 526         ucnv_fromUnicode(cnv,
 527             &out, buffer + sizeof(buffer),
 528             &in, a + 1,
 529             NULL, TRUE, &err);
 530         ucnv_resetFromUnicode(cnv);
 531
 532         if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) &&
 533             U_SUCCESS(err)
 534         ) {
 535             result = CNV_ADDS_FEFF;
 536         }
 537     }
 538
 539     return result;
 540 }
 541
 542 class ConvertFile {
 543 public:
 544     ConvertFile() :
 545         buf(NULL), outbuf(NULL), fromoffsets(NULL),
 546         bufsz(0), signature(0) {}
 547
 548     void
 549     setBufferSize(size_t bufferSize) {
 550         bufsz = bufferSize;
 551
 552         buf = new char[2 * bufsz];
 553         outbuf = buf + bufsz;
 554
 555         // +1 for an added U+FEFF in the intermediate Unicode buffer
 556         fromoffsets = new int32_t[bufsz + 1];
 557     }
 558
 559     ~ConvertFile() {
 560         delete [] buf;
 561         delete [] fromoffsets;
 562     }
 563
 564     UBool convertFile(const char *pname,
 565                       const char *fromcpage,
 566                       UConverterToUCallback toucallback,
 567                       const void *touctxt,
 568                       const char *tocpage,
 569                       UConverterFromUCallback fromucallback,
 570                       const void *fromuctxt,
 571                       UBool fallback,
 572                       const char *translit,
 573                       const char *infilestr,
 574                       FILE * outfile, int verbose);
 575 private:
 576     friend int main(int argc, char **argv);
 577
 578     char *buf, *outbuf;
 579     int32_t *fromoffsets;
 580
 581     size_t bufsz;
 582     int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character
 583 };
 584
 585 // Convert a file from one encoding to another
 586 UBool
 587 ConvertFile::convertFile(const char *pname,
 588                          const char *fromcpage,
 589                          UConverterToUCallback toucallback,
 590                          const void *touctxt,
 591                          const char *tocpage,
 592                          UConverterFromUCallback fromucallback,
 593                          const void *fromuctxt,
 594                          UBool fallback,
 595                          const char *translit,
 596                          const char *infilestr,
 597                          FILE * outfile, int verbose)
 598 {
 599     FILE *infile;
 600     UBool ret = TRUE;
 601     UConverter *convfrom = 0;
 602     UConverter *convto = 0;
 603     UErrorCode err = U_ZERO_ERROR;
 604     UBool flush;
 605     const char *cbufp, *prevbufp;
 606     char *bufp;
 607
 608     uint32_t infoffset = 0, outfoffset = 0;   /* Where we are in the file, for error reporting. */
 609
 610     const UChar *unibuf, *unibufbp;
 611     UChar *unibufp;
 612
 613     size_t rd, wr;
 614
 615 #if !UCONFIG_NO_TRANSLITERATION
 616     Transliterator *t = 0;      // Transliterator acting on Unicode data.
 617     UnicodeString chunk;        // One chunk of the text being collected for transformation.
 618 #endif
 619     UnicodeString u;            // String to do the transliteration.
 620     int32_t ulen;
 621
 622     // use conversion offsets for error messages
 623     // unless a transliterator is used -
 624     // a text transformation will reorder characters in unpredictable ways
 625     UBool useOffsets = TRUE;
 626
 627     // Open the correct input file or connect to stdin for reading input
 628
 629     if (infilestr != 0 && strcmp(infilestr, "-")) {
 630         infile = fopen(infilestr, "rb");
 631         if (infile == 0) {
 632             UnicodeString str1(infilestr, "");
 633             str1.append((UChar32) 0);
 634             UnicodeString str2(strerror(errno), "");
 635             str2.append((UChar32) 0);
 636             initMsg(pname);
 637             u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
 638             return FALSE;
 639         }
 640     } else {
 641         infilestr = "-";
 642         infile = stdin;
 643 #ifdef USE_FILENO_BINARY_MODE
 644         if (setmode(fileno(stdin), O_BINARY) == -1) {
 645             initMsg(pname);
 646             u_wmsg(stderr, "cantSetInBinMode");
 647             return FALSE;
 648         }
 649 #endif
 650     }
 651
 652     if (verbose) {
 653         fprintf(stderr, "%s:\n", infilestr);
 654     }
 655
 656 #if !UCONFIG_NO_TRANSLITERATION
 657     // Create transliterator as needed.
 658
 659     if (translit != NULL && *translit) {
 660         UParseError parse;
 661         UnicodeString str(translit), pestr;
 662
 663         /* Create from rules or by ID as needed. */
 664
 665         parse.line = -1;
 666
 667         if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
 668             t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err);
 669         } else {
 670             t = Transliterator::createInstance(translit, UTRANS_FORWARD, err);
 671         }
 672
 673         if (U_FAILURE(err)) {
 674             str.append((UChar32) 0);
 675             initMsg(pname);
 676
 677             if (parse.line >= 0) {
 678                 UChar linebuf[20], offsetbuf[20];
 679                 uprv_itou(linebuf, 20, parse.line, 10, 0);
 680                 uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
 681                 u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(),
 682                     u_wmsg_errorName(err), linebuf, offsetbuf);
 683             } else {
 684                 u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(),
 685                     u_wmsg_errorName(err));
 686             }
 687
 688             if (t) {
 689                 delete t;
 690                 t = 0;
 691             }
 692             goto error_exit;
 693         }
 694
 695         useOffsets = FALSE;
 696     }
 697 #endif
 698
 699     // Create codepage converter. If the codepage or its aliases weren't
 700     // available, it returns NULL and a failure code. We also set the
 701     // callbacks, and return errors in the same way.
 702
 703     convfrom = ucnv_open(fromcpage, &err);
 704     if (U_FAILURE(err)) {
 705         UnicodeString str(fromcpage, "");
 706         initMsg(pname);
 707         u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(),
 708             u_wmsg_errorName(err));
 709         goto error_exit;
 710     }
 711     ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
 712     if (U_FAILURE(err)) {
 713         initMsg(pname);
 714         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
 715         goto error_exit;
 716     }
 717
 718     convto = ucnv_open(tocpage, &err);
 719     if (U_FAILURE(err)) {
 720         UnicodeString str(tocpage, "");
 721         initMsg(pname);
 722         u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(),
 723             u_wmsg_errorName(err));
 724         goto error_exit;
 725     }
 726     ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
 727     if (U_FAILURE(err)) {
 728         initMsg(pname);
 729         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
 730         goto error_exit;
 731     }
 732     ucnv_setFallback(convto, fallback);
 733
 734     UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode;
 735     int8_t sig;
 736
 737     // OK, we can convert now.
 738     sig = signature;
 739     rd = 0;
 740
 741     do {
 742         willexit = FALSE;
 743
 744         // input file offset at the beginning of the next buffer
 745         infoffset += rd;
 746
 747         rd = fread(buf, 1, bufsz, infile);
 748         if (ferror(infile) != 0) {
 749             UnicodeString str(strerror(errno));
 750             initMsg(pname);
 751             u_wmsg(stderr, "cantRead", str.getTerminatedBuffer());
 752             goto error_exit;
 753         }
 754
 755         // Convert the read buffer into the new encoding via Unicode.
 756         // After the call 'unibufp' will be placed behind the last
 757         // character that was converted in the 'unibuf'.
 758         // Also the 'cbufp' is positioned behind the last converted
 759         // character.
 760         // At the last conversion in the file, flush should be set to
 761         // true so that we get all characters converted.
 762         //
 763         // The converter must be flushed at the end of conversion so
 764         // that characters on hold also will be written.
 765
 766         cbufp = buf;
 767         flush = (UBool)(rd != bufsz);
 768
 769         // convert until the input is consumed
 770         do {
 771             // remember the start of the current byte-to-Unicode conversion
 772             prevbufp = cbufp;
 773
 774             unibuf = unibufp = u.getBuffer((int32_t)bufsz);
 775
 776             // Use bufsz instead of u.getCapacity() for the targetLimit
 777             // so that we don't overflow fromoffsets[].
 778             ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp,
 779                 buf + rd, useOffsets ? fromoffsets : NULL, flush, &err);
 780
 781             ulen = (int32_t)(unibufp - unibuf);
 782             u.releaseBuffer(ulen);
 783
 784             // fromSawEndOfBytes indicates that ucnv_toUnicode() is done
 785             // converting all of the input bytes.
 786             // It works like this because ucnv_toUnicode() returns only under the
 787             // following conditions:
 788             // - an error occurred during conversion (an error code is set)
 789             // - the target buffer is filled (the error code indicates an overflow)
 790             // - the source is consumed
 791             // That is, if the error code does not indicate a failure,
 792             // not even an overflow, then the source must be consumed entirely.
 793             fromSawEndOfBytes = (UBool)U_SUCCESS(err);
 794
 795             if (err == U_BUFFER_OVERFLOW_ERROR) {
 796                 err = U_ZERO_ERROR;
 797             } else if (U_FAILURE(err)) {
 798                 char pos[32], errorBytes[32];
 799                 int8_t i, length, errorLength;
 800
 801                 UErrorCode localError = U_ZERO_ERROR;
 802                 errorLength = (int8_t)sizeof(errorBytes);
 803                 ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError);
 804                 if (U_FAILURE(localError) || errorLength == 0) {
 805                     errorLength = 1;
 806                 }
 807
 808                 // print the input file offset of the start of the error bytes:
 809                 // input file offset of the current byte buffer +
 810                 // length of the just consumed bytes -
 811                 // length of the error bytes
 812                 length =
 813                     (int8_t)sprintf(pos, "%d",
 814                         (int)(infoffset + (cbufp - buf) - errorLength));
 815
 816                 // output the bytes that caused the error
 817                 UnicodeString str;
 818                 for (i = 0; i < errorLength; ++i) {
 819                     if (i > 0) {
 820                         str.append((UChar)uSP);
 821                     }
 822                     str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4));
 823                     str.append(nibbleToHex((uint8_t)errorBytes[i]));
 824                 }
 825
 826                 initMsg(pname);
 827                 u_wmsg(stderr, "problemCvtToU",
 828                         UnicodeString(pos, length, "").getTerminatedBuffer(),
 829                         str.getTerminatedBuffer(),
 830                         u_wmsg_errorName(err));
 831
 832                 willexit = TRUE;
 833                 err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
 834             }
 835
 836             // Replaced a check for whether the input was consumed by
 837             // looping until it is; message key "premEndInput" now obsolete.
 838
 839             if (ulen == 0) {
 840                 continue;
 841             }
 842
 843             // remove a U+FEFF Unicode signature character if requested
 844             if (sig < 0) {
 845                 if (u.charAt(0) == uSig) {
 846                     u.remove(0, 1);
 847
 848                     // account for the removed UChar and offset
 849                     --ulen;
 850
 851                     if (useOffsets) {
 852                         // remove an offset from fromoffsets[] as well
 853                         // to keep the array parallel with the UChars
 854                         memmove(fromoffsets, fromoffsets + 1, ulen * 4);
 855                     }
 856
 857                 }
 858                 sig = 0;
 859             }
 860
 861 #if !UCONFIG_NO_TRANSLITERATION
 862             // Transliterate/transform if needed.
 863
 864             // For transformation, we use chunking code -
 865             // collect Unicode input until, for example, an end-of-line,
 866             // then transform and output-convert that and continue collecting.
 867             // This makes the transformation result independent of the buffer size
 868             // while avoiding the slower keyboard mode.
 869             // The end-of-chunk characters are completely included in the
 870             // transformed string in case they are to be transformed themselves.
 871             if (t != NULL) {
 872                 UnicodeString out;
 873                 int32_t chunkLimit;
 874
 875                 do {
 876                     chunkLimit = getChunkLimit(chunk, u);
 877                     if (chunkLimit < 0 && flush && fromSawEndOfBytes) {
 878                         // use all of the rest at the end of the text
 879                         chunkLimit = u.length();
 880                     }
 881                     if (chunkLimit >= 0) {
 882                         // complete the chunk and transform it
 883                         chunk.append(u, 0, chunkLimit);
 884                         u.remove(0, chunkLimit);
 885                         t->transliterate(chunk);
 886
 887                         // append the transformation result to the result and empty the chunk
 888                         out.append(chunk);
 889                         chunk.remove();
 890                     } else {
 891                         // continue collecting the chunk
 892                         chunk.append(u);
 893                         break;
 894                     }
 895                 } while (!u.isEmpty());
 896
 897                 u = out;
 898                 ulen = u.length();
 899             }
 900 #endif
 901
 902             // add a U+FEFF Unicode signature character if requested
 903             // and possible/necessary
 904             if (sig > 0) {
 905                 if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) {
 906                     u.insert(0, (UChar)uSig);
 907
 908                     if (useOffsets) {
 909                         // insert a pseudo-offset into fromoffsets[] as well
 910                         // to keep the array parallel with the UChars
 911                         memmove(fromoffsets + 1, fromoffsets, ulen * 4);
 912                         fromoffsets[0] = -1;
 913                     }
 914
 915                     // account for the additional UChar and offset
 916                     ++ulen;
 917                 }
 918                 sig = 0;
 919             }
 920
 921             // Convert the Unicode buffer into the destination codepage
 922             // Again 'bufp' will be placed behind the last converted character
 923             // And 'unibufp' will be placed behind the last converted unicode character
 924             // At the last conversion flush should be set to true to ensure that
 925             // all characters left get converted
 926
 927             unibuf = unibufbp = u.getBuffer();
 928
 929             do {
 930                 bufp = outbuf;
 931
 932                 // Use fromSawEndOfBytes in addition to the flush flag -
 933                 // it indicates whether the intermediate Unicode string
 934                 // contains the very last UChars for the very last input bytes.
 935                 ucnv_fromUnicode(convto, &bufp, outbuf + bufsz,
 936                                  &unibufbp,
 937                                  unibuf + ulen,
 938                                  NULL, (UBool)(flush && fromSawEndOfBytes), &err);
 939
 940                 // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done
 941                 // converting all of the intermediate UChars.
 942                 // See comment for fromSawEndOfBytes.
 943                 toSawEndOfUnicode = (UBool)U_SUCCESS(err);
 944
 945                 if (err == U_BUFFER_OVERFLOW_ERROR) {
 946                     err = U_ZERO_ERROR;
 947                 } else if (U_FAILURE(err)) {
 948                     UChar errorUChars[4];
 949                     const char *errtag;
 950                     char pos[32];
 951                     UChar32 c;
 952                     int8_t i, length, errorLength;
 953
 954                     UErrorCode localError = U_ZERO_ERROR;
 955                     errorLength = (int8_t)LENGTHOF(errorUChars);
 956                     ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError);
 957                     if (U_FAILURE(localError) || errorLength == 0) {
 958                         // need at least 1 so that we don't access beyond the length of fromoffsets[]
 959                         errorLength = 1;
 960                     }
 961
 962                     int32_t ferroffset;
 963
 964                     if (useOffsets) {
 965                         // Unicode buffer offset of the start of the error UChars
 966                         ferroffset = (int32_t)((unibufbp - unibuf) - errorLength);
 967                         if (ferroffset < 0) {
 968                             // approximation - the character started in the previous Unicode buffer
 969                             ferroffset = 0;
 970                         }
 971
 972                         // get the corresponding byte offset out of fromoffsets[]
 973                         // go back if the offset is not known for some of the UChars
 974                         int32_t fromoffset;
 975                         do {
 976                             fromoffset = fromoffsets[ferroffset];
 977                         } while (fromoffset < 0 && --ferroffset >= 0);
 978
 979                         // total input file offset =
 980                         // input file offset of the current byte buffer +
 981                         // byte buffer offset of where the current Unicode buffer is converted from +
 982                         // fromoffsets[Unicode offset]
 983                         ferroffset = infoffset + (prevbufp - buf) + fromoffset;
 984                         errtag = "problemCvtFromU";
 985                     } else {
 986                         // Do not use fromoffsets if (t != NULL) because the Unicode text may
 987                         // be different from what the offsets refer to.
 988
 989                         // output file offset
 990                         ferroffset = (int32_t)(outfoffset + (bufp - outbuf));
 991                         errtag = "problemCvtFromUOut";
 992                     }
 993
 994                     length = (int8_t)sprintf(pos, "%u", (int)ferroffset);
 995
 996                     // output the code points that caused the error
 997                     UnicodeString str;
 998                     for (i = 0; i < errorLength;) {
 999                         if (i > 0) {
1000                             str.append((UChar)uSP);
1001                         }
1002                         U16_NEXT(errorUChars, i, errorLength, c);
1003                         if (c >= 0x100000) {
1004                             str.append(nibbleToHex((uint8_t)(c >> 20)));
1005                         }
1006                         if (c >= 0x10000) {
1007                             str.append(nibbleToHex((uint8_t)(c >> 16)));
1008                         }
1009                         str.append(nibbleToHex((uint8_t)(c >> 12)));
1010                         str.append(nibbleToHex((uint8_t)(c >> 8)));
1011                         str.append(nibbleToHex((uint8_t)(c >> 4)));
1012                         str.append(nibbleToHex((uint8_t)c));
1013                     }
1014
1015                     initMsg(pname);
1016                     u_wmsg(stderr, errtag,
1017                             UnicodeString(pos, length, "").getTerminatedBuffer(),
1018                             str.getTerminatedBuffer(),
1019                            u_wmsg_errorName(err));
1020                     u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer());
1021
1022                     willexit = TRUE;
1023                     err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
1024                 }
1025
1026                 // Replaced a check for whether the intermediate Unicode characters were all consumed by
1027                 // looping until they are; message key "premEnd" now obsolete.
1028
1029                 // Finally, write the converted buffer to the output file
1030                 size_t outlen = (size_t) (bufp - outbuf);
1031                 outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile));
1032                 if (wr != outlen) {
1033                     UnicodeString str(strerror(errno));
1034                     initMsg(pname);
1035                     u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer());
1036                     willexit = TRUE;
1037                 }
1038
1039                 if (willexit) {
1040                     goto error_exit;
1041                 }
1042             } while (!toSawEndOfUnicode);
1043         } while (!fromSawEndOfBytes);
1044     } while (!flush);           // Stop when we have flushed the
1045                                 // converters (this means that it's
1046                                 // the end of output)
1047
1048     goto normal_exit;
1049
1050 error_exit:
1051     ret = FALSE;
1052
1053 normal_exit:
1054     // Cleanup.
1055
1056     ucnv_close(convfrom);
1057     ucnv_close(convto);
1058
1059 #if !UCONFIG_NO_TRANSLITERATION
1060     delete t;
1061 #endif
1062
1063     if (infile != stdin) {
1064         fclose(infile);
1065     }
1066
1067     return ret;
1068 }
1069
1070 static void usage(const char *pname, int ecode) {
1071     const UChar *msg;
1072     int32_t msgLen;
1073     UErrorCode err = U_ZERO_ERROR;
1074     FILE *fp = ecode ? stderr : stdout;
1075     int res;
1076
1077     initMsg(pname);
1078     msg =
1079         ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord",
1080                             &msgLen, &err);
1081     UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1));
1082     UnicodeString mname(msg, msgLen + 1);
1083
1084     res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer());
1085     if (!ecode) {
1086         if (!res) {
1087             fputc('\n', fp);
1088         }
1089         if (!u_wmsg(fp, "help")) {
1090             /* Now dump callbacks and finish. */
1091
1092             int i, count =
1093                 sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
1094             for (i = 0; i < count; ++i) {
1095                 fprintf(fp, " %s", transcode_callbacks[i].name);
1096             }
1097             fputc('\n', fp);
1098         }
1099     }
1100
1101     exit(ecode);
1102 }
1103
1104 extern int
1105 main(int argc, char **argv)
1106 {
1107     FILE *outfile;
1108     int ret = 0;
1109
1110     size_t bufsz = DEFAULT_BUFSZ;
1111
1112     const char *fromcpage = 0;
1113     const char *tocpage = 0;
1114     const char *translit = 0;
1115     const char *outfilestr = 0;
1116     UBool fallback = FALSE;
1117
1118     UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
1119     const void *fromuctxt = 0;
1120     UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
1121     const void *touctxt = 0;
1122
1123     char **iter, **remainArgv, **remainArgvLimit;
1124     char **end = argv + argc;
1125
1126     const char *pname;
1127
1128     UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE;
1129     const char *printName = 0;
1130
1131     UBool verbose = FALSE;
1132     UErrorCode status = U_ZERO_ERROR;
1133
1134     ConvertFile cf;
1135
1136     /* Initialize ICU */
1137     u_init(&status);
1138     if (U_FAILURE(status)) {
1139         fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
1140             argv[0], u_errorName(status));
1141         exit(1);
1142     }
1143
1144     // Get and prettify pname.
1145     pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
1146 #ifdef WIN32
1147     if (!pname) {
1148         pname = uprv_strrchr(*argv, '/');
1149     }
1150 #endif
1151     if (!pname) {
1152         pname = *argv;
1153     } else {
1154         ++pname;
1155     }
1156
1157     // First, get the arguments from command-line
1158     // to know the codepages to convert between
1159
1160     remainArgv = remainArgvLimit = argv + 1;
1161     for (iter = argv + 1; iter != end; iter++) {
1162         // Check for from charset
1163         if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
1164             iter++;
1165             if (iter != end)
1166                 fromcpage = *iter;
1167             else
1168                 usage(pname, 1);
1169         } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
1170             iter++;
1171             if (iter != end)
1172                 tocpage = *iter;
1173             else
1174                 usage(pname, 1);
1175         } else if (strcmp("-x", *iter) == 0) {
1176             iter++;
1177             if (iter != end)
1178                 translit = *iter;
1179             else
1180                 usage(pname, 1);
1181         } else if (!strcmp("--fallback", *iter)) {
1182             fallback = TRUE;
1183         } else if (!strcmp("--no-fallback", *iter)) {
1184             fallback = FALSE;
1185         } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
1186             iter++;
1187             if (iter != end) {
1188                 bufsz = atoi(*iter);
1189                 if ((int) bufsz <= 0) {
1190                     initMsg(pname);
1191                     UnicodeString str(*iter);
1192                     initMsg(pname);
1193                     u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer());
1194                     return 3;
1195                 }
1196             } else {
1197                 usage(pname, 1);
1198             }
1199         } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
1200             if (printTranslits) {
1201                 usage(pname, 1);
1202             }
1203             printConvs = TRUE;
1204         } else if (strcmp("--default-code", *iter) == 0) {
1205             if (printTranslits) {
1206                 usage(pname, 1);
1207             }
1208             printName = ucnv_getDefaultName();
1209         } else if (strcmp("--list-code", *iter) == 0) {
1210             if (printTranslits) {
1211                 usage(pname, 1);
1212             }
1213
1214             iter++;
1215             if (iter != end) {
1216                 UErrorCode e = U_ZERO_ERROR;
1217                 printName = ucnv_getAlias(*iter, 0, &e);
1218                 if (U_FAILURE(e) || !printName) {
1219                     UnicodeString str(*iter);
1220                     initMsg(pname);
1221                     u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer());
1222                     return 2;
1223                 }
1224             } else
1225                 usage(pname, 1);
1226         } else if (strcmp("--canon", *iter) == 0) {
1227             printCanon = TRUE;
1228         } else if (strcmp("-L", *iter) == 0
1229             || !strcmp("--list-transliterators", *iter)) {
1230             if (printConvs) {
1231                 usage(pname, 1);
1232             }
1233             printTranslits = TRUE;
1234         } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
1235             || !strcmp("--help", *iter)) {
1236             usage(pname, 0);
1237         } else if (!strcmp("-c", *iter)) {
1238             fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
1239         } else if (!strcmp("--to-callback", *iter)) {
1240             iter++;
1241             if (iter != end) {
1242                 const struct callback_ent *cbe = findCallback(*iter);
1243                 if (cbe) {
1244                     fromucallback = cbe->fromu;
1245                     fromuctxt = cbe->fromuctxt;
1246                 } else {
1247                     UnicodeString str(*iter);
1248                     initMsg(pname);
1249                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1250                     return 4;
1251                 }
1252             } else {
1253                 usage(pname, 1);
1254             }
1255         } else if (!strcmp("--from-callback", *iter)) {
1256             iter++;
1257             if (iter != end) {
1258                 const struct callback_ent *cbe = findCallback(*iter);
1259                 if (cbe) {
1260                     toucallback = cbe->tou;
1261                     touctxt = cbe->touctxt;
1262                 } else {
1263                     UnicodeString str(*iter);
1264                     initMsg(pname);
1265                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1266                     return 4;
1267                 }
1268             } else {
1269                 usage(pname, 1);
1270             }
1271         } else if (!strcmp("-i", *iter)) {
1272             toucallback = UCNV_TO_U_CALLBACK_SKIP;
1273         } else if (!strcmp("--callback", *iter)) {
1274             iter++;
1275             if (iter != end) {
1276                 const struct callback_ent *cbe = findCallback(*iter);
1277                 if (cbe) {
1278                     fromucallback = cbe->fromu;
1279                     fromuctxt = cbe->fromuctxt;
1280                     toucallback = cbe->tou;
1281                     touctxt = cbe->touctxt;
1282                 } else {
1283                     UnicodeString str(*iter);
1284                     initMsg(pname);
1285                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1286                     return 4;
1287                 }
1288             } else {
1289                 usage(pname, 1);
1290             }
1291         } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
1292             verbose = FALSE;
1293         } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
1294             verbose = TRUE;
1295         } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
1296             printf("%s v2.1  ICU " U_ICU_VERSION "\n", pname);
1297             return 0;
1298         } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
1299             ++iter;
1300             if (iter != end && !outfilestr) {
1301                 outfilestr = *iter;
1302             } else {
1303                 usage(pname, 1);
1304             }
1305         } else if (0 == strcmp("--add-signature", *iter)) {
1306             cf.signature = 1;
1307         } else if (0 == strcmp("--remove-signature", *iter)) {
1308             cf.signature = -1;
1309         } else if (**iter == '-' && (*iter)[1]) {
1310             usage(pname, 1);
1311         } else {
1312             // move a non-option up in argv[]
1313             *remainArgvLimit++ = *iter;
1314         }
1315     }
1316
1317     if (printConvs || printName) {
1318         return printConverters(pname, printName, printCanon) ? 2 : 0;
1319     } else if (printTranslits) {
1320         return printTransliterators(printCanon) ? 3 : 0;
1321     }
1322
1323     if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
1324         fromcpage = ucnv_getDefaultName();
1325     }
1326     if (!tocpage || !uprv_strcmp(tocpage, "-")) {
1327         tocpage = ucnv_getDefaultName();
1328     }
1329
1330     // Open the correct output file or connect to stdout for reading input
1331     if (outfilestr != 0 && strcmp(outfilestr, "-")) {
1332         outfile = fopen(outfilestr, "wb");
1333         if (outfile == 0) {
1334             UnicodeString str1(outfilestr, "");
1335             UnicodeString str2(strerror(errno), "");
1336             initMsg(pname);
1337             u_wmsg(stderr, "cantCreateOutputF",
1338                 str1.getBuffer(), str2.getBuffer());
1339             return 1;
1340         }
1341     } else {
1342         outfilestr = "-";
1343         outfile = stdout;
1344 #ifdef USE_FILENO_BINARY_MODE
1345         if (setmode(fileno(outfile), O_BINARY) == -1) {
1346             u_wmsg(stderr, "cantSetOutBinMode");
1347             exit(-1);
1348         }
1349 #endif
1350     }
1351
1352     /* Loop again on the arguments to find all the input files, and
1353     convert them. */
1354
1355     cf.setBufferSize(bufsz);
1356
1357     if(remainArgv < remainArgvLimit) {
1358         for (iter = remainArgv; iter != remainArgvLimit; iter++) {
1359             if (!cf.convertFile(
1360                     pname, fromcpage, toucallback, touctxt, tocpage,
1361                     fromucallback, fromuctxt, fallback, translit, *iter,
1362                     outfile, verbose)
1363             ) {
1364                 goto error_exit;
1365             }
1366         }
1367     } else {
1368         if (!cf.convertFile(
1369                 pname, fromcpage, toucallback, touctxt, tocpage,
1370                 fromucallback, fromuctxt, fallback, translit, 0,
1371                 outfile, verbose)
1372         ) {
1373             goto error_exit;
1374         }
1375     }
1376
1377     goto normal_exit;
1378 error_exit:
1379     ret = 1;
1380 normal_exit:
1381
1382     if (outfile != stdout) {
1383         fclose(outfile);
1384     }
1385
1386     return ret;
1387 }
1388
1389
1390 /*
1391  * Hey, Emacs, please set the following:
1392  *
1393  * Local Variables:
1394  * indent-tabs-mode: nil
1395  * End:
1396  *
1397  */