icuSources/extra/uconv/uconv.cpp

   1 /*****************************************************************************
   2 *
   3 *   Copyright (C) 1999-2016, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 *
   6 ******************************************************************************/
   7
   8 /*
   9  * uconv(1): an iconv(1)-like converter using ICU.
  10  *
  11  * Original code by Jonas Utterstr&#x00F6;m <jonas.utterstrom@vittran.norrnod.se>
  12  * contributed in 1999.
  13  *
  14  * Conversion to the C conversion API and many improvements by
  15  * Yves Arrouye <yves@realnames.com>, current maintainer.
  16  *
  17  * Markus Scherer maintainer from 2003.
  18  * See source code repository history for changes.
  19  */
  20
  21 #include <unicode/utypes.h>
  22 #include <unicode/putil.h>
  23 #include <unicode/ucnv.h>
  24 #include <unicode/uenum.h>
  25 #include <unicode/unistr.h>
  26 #include <unicode/translit.h>
  27 #include <unicode/uset.h>
  28 #include <unicode/uclean.h>
  29 #include <unicode/utf16.h>
  30
  31 #include <stdio.h>
  32 #include <errno.h>
  33 #include <string.h>
  34 #include <stdlib.h>
  35
  36 #include "cmemory.h"
  37 #include "cstring.h"
  38 #include "ustrfmt.h"
  39
  40 #include "unicode/uwmsg.h"
  41
  42 U_NAMESPACE_USE
  43
  44 #if U_PLATFORM_USES_ONLY_WIN32_API && !defined(__STRICT_ANSI__)
  45 #include <io.h>
  46 #include <fcntl.h>
  47 #if U_PLATFORM_USES_ONLY_WIN32_API
  48 #define USE_FILENO_BINARY_MODE 1
  49 /* Windows likes to rename Unix-like functions */
  50 #ifndef fileno
  51 #define fileno _fileno
  52 #endif
  53 #ifndef setmode
  54 #define setmode _setmode
  55 #endif
  56 #ifndef O_BINARY
  57 #define O_BINARY _O_BINARY
  58 #endif
  59 #endif
  60 #endif
  61
  62 #ifdef UCONVMSG_LINK
  63 /* below from the README */
  64 #include "unicode/utypes.h"
  65 #include "unicode/udata.h"
  66 U_CFUNC char uconvmsg_dat[];
  67 #endif
  68
  69 #define DEFAULT_BUFSZ   4096
  70 #define UCONVMSG "uconvmsg"
  71
  72 static UResourceBundle *gBundle = 0;    /* Bundle containing messages. */
  73
  74 /*
  75  * Initialize the message bundle so that message strings can be fetched
  76  * by u_wmsg().
  77  *
  78  */
  79
  80 static void initMsg(const char *pname) {
  81     static int ps = 0;
  82
  83     if (!ps) {
  84         char dataPath[2048];        /* XXX Sloppy: should be PATH_MAX. */
  85         UErrorCode err = U_ZERO_ERROR;
  86
  87         ps = 1;
  88
  89         /* Set up our static data - if any */
  90 #if defined(UCONVMSG_LINK) && U_PLATFORM != U_PF_OS390 /* On z/OS, this is failing. */
  91         udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err);
  92         if (U_FAILURE(err)) {
  93           fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n",
  94                   pname, u_errorName(err));
  95           err = U_ZERO_ERROR; /* It may still fail */
  96         }
  97 #endif
  98
  99         /* Get messages. */
 100         gBundle = u_wmsg_setPath(UCONVMSG, &err);
 101         if (U_FAILURE(err)) {
 102             fprintf(stderr,
 103                     "%s: warning: couldn't open bundle %s: %s\n",
 104                     pname, UCONVMSG, u_errorName(err));
 105 #ifdef UCONVMSG_LINK
 106             fprintf(stderr,
 107                     "%s: setAppData was called, internal data %s failed to load\n",
 108                         pname, UCONVMSG);
 109 #endif
 110
 111             err = U_ZERO_ERROR;
 112             /* that was try #1, try again with a path */
 113             uprv_strcpy(dataPath, u_getDataDirectory());
 114             uprv_strcat(dataPath, U_FILE_SEP_STRING);
 115             uprv_strcat(dataPath, UCONVMSG);
 116
 117             gBundle = u_wmsg_setPath(dataPath, &err);
 118             if (U_FAILURE(err)) {
 119                 fprintf(stderr,
 120                     "%s: warning: still couldn't open bundle %s: %s\n",
 121                     pname, dataPath, u_errorName(err));
 122                 fprintf(stderr, "%s: warning: messages will not be displayed\n", pname);
 123             }
 124         }
 125     }
 126 }
 127
 128 /* Mapping of callback names to the callbacks passed to the converter
 129    API. */
 130
 131 static struct callback_ent {
 132     const char *name;
 133     UConverterFromUCallback fromu;
 134     const void *fromuctxt;
 135     UConverterToUCallback tou;
 136     const void *touctxt;
 137 } transcode_callbacks[] = {
 138     { "substitute",
 139       UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
 140       UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 },
 141     { "skip",
 142       UCNV_FROM_U_CALLBACK_SKIP, 0,
 143       UCNV_TO_U_CALLBACK_SKIP, 0 },
 144     { "stop",
 145       UCNV_FROM_U_CALLBACK_STOP, 0,
 146       UCNV_TO_U_CALLBACK_STOP, 0 },
 147     { "escape",
 148       UCNV_FROM_U_CALLBACK_ESCAPE, 0,
 149       UCNV_TO_U_CALLBACK_ESCAPE, 0},
 150     { "escape-icu",
 151       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU,
 152       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU },
 153     { "escape-java",
 154       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA,
 155       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA },
 156     { "escape-c",
 157       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C,
 158       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
 159     { "escape-xml",
 160       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
 161       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
 162     { "escape-xml-hex",
 163       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
 164       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
 165     { "escape-xml-dec",
 166       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
 167       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
 168     { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE,
 169       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE }
 170 };
 171
 172 /* Return a pointer to a callback record given its name. */
 173
 174 static const struct callback_ent *findCallback(const char *name) {
 175     int i, count =
 176         UPRV_LENGTHOF(transcode_callbacks);
 177
 178     /* We'll do a linear search, there aren't many of them and bsearch()
 179        may not be that portable. */
 180
 181     for (i = 0; i < count; ++i) {
 182         if (!uprv_stricmp(name, transcode_callbacks[i].name)) {
 183             return &transcode_callbacks[i];
 184         }
 185     }
 186
 187     return 0;
 188 }
 189
 190 /* Print converter information. If lookfor is set, only that converter will
 191    be printed, otherwise all converters will be printed. If canon is non
 192    zero, tags and aliases for each converter are printed too, in the format
 193    expected for convrters.txt(5). */
 194
 195 static int printConverters(const char *pname, const char *lookfor,
 196     UBool canon)
 197 {
 198     UErrorCode err = U_ZERO_ERROR;
 199     int32_t num;
 200     uint16_t num_stds;
 201     const char **stds;
 202
 203     /* If there is a specified name, just handle that now. */
 204
 205     if (lookfor) {
 206         if (!canon) {
 207             printf("%s\n", lookfor);
 208             return 0;
 209         } else {
 210         /*  Because we are printing a canonical name, we need the
 211             true converter name. We've done that already except for
 212             the default name (because we want to print the exact
 213             name one would get when calling ucnv_getDefaultName()
 214             in non-canon mode). But since we do not know at this
 215             point if we have the default name or something else, we
 216             need to normalize again to the canonical converter
 217             name. */
 218
 219             const char *truename = ucnv_getAlias(lookfor, 0, &err);
 220             if (U_SUCCESS(err)) {
 221                 lookfor = truename;
 222             } else {
 223                 err = U_ZERO_ERROR;
 224             }
 225         }
 226     }
 227
 228     /* Print converter names. We come here for one of two reasons: we
 229        are printing all the names (lookfor was null), or we have a
 230        single converter to print but in canon mode, hence we need to
 231        get to it in order to print everything. */
 232
 233     num = ucnv_countAvailable();
 234     if (num <= 0) {
 235         initMsg(pname);
 236         u_wmsg(stderr, "cantGetNames");
 237         return -1;
 238     }
 239     if (lookfor) {
 240         num = 1;                /* We know where we want to be. */
 241     }
 242
 243     num_stds = ucnv_countStandards();
 244     stds = (const char **) uprv_malloc(num_stds * sizeof(*stds));
 245     if (!stds) {
 246         u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR));
 247         return -1;
 248     } else {
 249         uint16_t s;
 250
 251         if (canon) {
 252             printf("{ ");
 253         }
 254         for (s = 0; s < num_stds; ++s) {
 255             stds[s] = ucnv_getStandard(s, &err);
 256             if (canon) {
 257                 printf("%s ", stds[s]);
 258             }
 259             if (U_FAILURE(err)) {
 260                 u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err));
 261                 goto error_cleanup;
 262             }
 263         }
 264         if (canon) {
 265             puts("}");
 266         }
 267     }
 268
 269     for (int32_t i = 0; i < num; i++) {
 270         const char *name;
 271         uint16_t num_aliases;
 272
 273         /* Set the name either to what we are looking for, or
 274         to the current converter name. */
 275
 276         if (lookfor) {
 277             name = lookfor;
 278         } else {
 279             name = ucnv_getAvailableName(i);
 280         }
 281
 282         /* Get all the aliases associated to the name. */
 283
 284         err = U_ZERO_ERROR;
 285         num_aliases = ucnv_countAliases(name, &err);
 286         if (U_FAILURE(err)) {
 287             printf("%s", name);
 288
 289             UnicodeString str(name, "");
 290             putchar('\t');
 291             u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
 292                 u_wmsg_errorName(err));
 293             goto error_cleanup;
 294         } else {
 295             uint16_t a, s, t;
 296
 297             /* Write all the aliases and their tags. */
 298
 299             for (a = 0; a < num_aliases; ++a) {
 300                 const char *alias = ucnv_getAlias(name, a, &err);
 301
 302                 if (U_FAILURE(err)) {
 303                     UnicodeString str(name, "");
 304                     putchar('\t');
 305                     u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
 306                         u_wmsg_errorName(err));
 307                     goto error_cleanup;
 308                 }
 309
 310                 /* Print the current alias so that it looks right. */
 311                 printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") ,
 312                                  alias,
 313                                  (canon ? "" : " "));
 314
 315                 /* Look (slowly, linear searching) for a tag. */
 316
 317                 if (canon) {
 318                     /* -1 to skip the last standard */
 319                     for (s = t = 0; s < num_stds-1; ++s) {
 320                         UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err);
 321                         if (U_SUCCESS(err)) {
 322                             /* List the standard tags */
 323                             const char *standardName;
 324                             UBool isFirst = TRUE;
 325                             UErrorCode enumError = U_ZERO_ERROR;
 326                             while ((standardName = uenum_next(nameEnum, NULL, &enumError))) {
 327                                 /* See if this alias is supported by this standard. */
 328                                 if (!strcmp(standardName, alias)) {
 329                                     if (!t) {
 330                                         printf(" {");
 331                                         t = 1;
 332                                     }
 333                                     /* Print a * after the default standard name */
 334                                     printf(" %s%s", stds[s], (isFirst ? "*" : ""));
 335                                 }
 336                                 isFirst = FALSE;
 337                             }
 338                         }
 339                     }
 340                     if (t) {
 341                         printf(" }");
 342                     }
 343                 }
 344                 /* Terminate this entry. */
 345                 if (canon) {
 346                     puts("");
 347                 }
 348
 349                 /* Move on. */
 350             }
 351             /* Terminate this entry. */
 352             if (!canon) {
 353                 puts("");
 354             }
 355         }
 356     }
 357
 358     /* Free temporary data. */
 359
 360     uprv_free(stds);
 361
 362     /* Success. */
 363
 364     return 0;
 365 error_cleanup:
 366     uprv_free(stds);
 367     return -1;
 368 }
 369
 370 /* Print all available transliterators. If canon is non zero, print
 371    one transliterator per line. */
 372
 373 static int printTransliterators(UBool canon)
 374 {
 375 #if UCONFIG_NO_TRANSLITERATION
 376     printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n");
 377     return 1;
 378 #else
 379     UErrorCode status = U_ZERO_ERROR;
 380     UEnumeration *ids = utrans_openIDs(&status);
 381     int32_t i, numtrans = uenum_count(ids, &status);
 382
 383     char sepchar = canon ? '\n' : ' ';
 384
 385     for (i = 0; U_SUCCESS(status)&& (i < numtrans); ++i) {
 386         int32_t len;
 387         const char *nextTrans = uenum_next(ids, &len, &status);
 388
 389         printf("%s", nextTrans);
 390         if (i < numtrans - 1) {
 391             putchar(sepchar);
 392         }
 393     }
 394
 395     uenum_close(ids);
 396
 397     /* Add a terminating newline if needed. */
 398
 399     if (sepchar != '\n') {
 400         putchar('\n');
 401     }
 402
 403     /* Success. */
 404
 405     return 0;
 406 #endif
 407 }
 408
 409 enum {
 410     uSP = 0x20,         // space
 411     uCR = 0xd,          // carriage return
 412     uLF = 0xa,          // line feed
 413     uNL = 0x85,         // newline
 414     uLS = 0x2028,       // line separator
 415     uPS = 0x2029,       // paragraph separator
 416     uSig = 0xfeff       // signature/BOM character
 417 };
 418
 419 static inline int32_t
 420 getChunkLimit(const UnicodeString &prev, const UnicodeString &s) {
 421     // find one of
 422     // CR, LF, CRLF, NL, LS, PS
 423     // for paragraph ends (see UAX #13/Unicode 4)
 424     // and include it in the chunk
 425     // all of these characters are on the BMP
 426     // do not include FF or VT in case they are part of a paragraph
 427     // (important for bidi contexts)
 428     static const UChar paraEnds[] = {
 429         0xd, 0xa, 0x85, 0x2028, 0x2029
 430     };
 431     enum {
 432         iCR, iLF, iNL, iLS, iPS, iCount
 433     };
 434
 435     // first, see if there is a CRLF split between prev and s
 436     if (prev.endsWith(paraEnds + iCR, 1)) {
 437         if (s.startsWith(paraEnds + iLF, 1)) {
 438             return 1; // split CRLF, include the LF
 439         } else if (!s.isEmpty()) {
 440             return 0; // complete the last chunk
 441         } else {
 442             return -1; // wait for actual further contents to arrive
 443         }
 444     }
 445
 446     const UChar *u = s.getBuffer(), *limit = u + s.length();
 447     UChar c;
 448
 449     while (u < limit) {
 450         c = *u++;
 451         if (
 452             ((c < uSP) && (c == uCR || c == uLF)) ||
 453             (c == uNL) ||
 454             ((c & uLS) == uLS)
 455         ) {
 456             if (c == uCR) {
 457                 // check for CRLF
 458                 if (u == limit) {
 459                     return -1; // LF may be in the next chunk
 460                 } else if (*u == uLF) {
 461                     ++u; // include the LF in this chunk
 462                 }
 463             }
 464             return (int32_t)(u - s.getBuffer());
 465         }
 466     }
 467
 468     return -1; // continue collecting the chunk
 469 }
 470
 471 enum {
 472     CNV_NO_FEFF,    // cannot convert the U+FEFF Unicode signature character (BOM)
 473     CNV_WITH_FEFF,  // can convert the U+FEFF signature character
 474     CNV_ADDS_FEFF   // automatically adds/detects the U+FEFF signature character
 475 };
 476
 477 static inline UChar
 478 nibbleToHex(uint8_t n) {
 479     n &= 0xf;
 480     return
 481         n <= 9 ?
 482             (UChar)(0x30 + n) :
 483             (UChar)((0x61 - 10) + n);
 484 }
 485
 486 // check the converter's Unicode signature properties;
 487 // the fromUnicode side of the converter must be in its initial state
 488 // and will be reset again if it was used
 489 static int32_t
 490 cnvSigType(UConverter *cnv) {
 491     UErrorCode err;
 492     int32_t result;
 493
 494     // test if the output charset can convert U+FEFF
 495     USet *set = uset_open(1, 0);
 496     err = U_ZERO_ERROR;
 497     ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err);
 498     if (U_SUCCESS(err) && uset_contains(set, uSig)) {
 499         result = CNV_WITH_FEFF;
 500     } else {
 501         result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted
 502     }
 503     uset_close(set);
 504
 505     if (result == CNV_WITH_FEFF) {
 506         // test if the output charset emits a signature anyway
 507         const UChar a[1] = { 0x61 }; // "a"
 508         const UChar *in;
 509
 510         char buffer[20];
 511         char *out;
 512
 513         in = a;
 514         out = buffer;
 515         err = U_ZERO_ERROR;
 516         ucnv_fromUnicode(cnv,
 517             &out, buffer + sizeof(buffer),
 518             &in, a + 1,
 519             NULL, TRUE, &err);
 520         ucnv_resetFromUnicode(cnv);
 521
 522         if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) &&
 523             U_SUCCESS(err)
 524         ) {
 525             result = CNV_ADDS_FEFF;
 526         }
 527     }
 528
 529     return result;
 530 }
 531
 532 class ConvertFile {
 533 public:
 534     ConvertFile() :
 535         buf(NULL), outbuf(NULL), fromoffsets(NULL),
 536         bufsz(0), signature(0) {}
 537
 538     void
 539     setBufferSize(size_t bufferSize) {
 540         bufsz = bufferSize;
 541
 542         buf = new char[2 * bufsz];
 543         outbuf = buf + bufsz;
 544
 545         // +1 for an added U+FEFF in the intermediate Unicode buffer
 546         fromoffsets = new int32_t[bufsz + 1];
 547     }
 548
 549     ~ConvertFile() {
 550         delete [] buf;
 551         delete [] fromoffsets;
 552     }
 553
 554     UBool convertFile(const char *pname,
 555                       const char *fromcpage,
 556                       UConverterToUCallback toucallback,
 557                       const void *touctxt,
 558                       const char *tocpage,
 559                       UConverterFromUCallback fromucallback,
 560                       const void *fromuctxt,
 561                       UBool fallback,
 562                       const char *translit,
 563                       const char *infilestr,
 564                       FILE * outfile, int verbose);
 565 private:
 566     friend int main(int argc, char **argv);
 567
 568     char *buf, *outbuf;
 569     int32_t *fromoffsets;
 570
 571     size_t bufsz;
 572     int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character
 573 };
 574
 575 // Convert a file from one encoding to another
 576 UBool
 577 ConvertFile::convertFile(const char *pname,
 578                          const char *fromcpage,
 579                          UConverterToUCallback toucallback,
 580                          const void *touctxt,
 581                          const char *tocpage,
 582                          UConverterFromUCallback fromucallback,
 583                          const void *fromuctxt,
 584                          UBool fallback,
 585                          const char *translit,
 586                          const char *infilestr,
 587                          FILE * outfile, int verbose)
 588 {
 589     FILE *infile;
 590     UBool ret = TRUE;
 591     UConverter *convfrom = 0;
 592     UConverter *convto = 0;
 593     UErrorCode err = U_ZERO_ERROR;
 594     UBool flush;
 595     UBool closeFile = FALSE;
 596     const char *cbufp, *prevbufp;
 597     char *bufp;
 598
 599     uint32_t infoffset = 0, outfoffset = 0;   /* Where we are in the file, for error reporting. */
 600
 601     const UChar *unibuf, *unibufbp;
 602     UChar *unibufp;
 603
 604     size_t rd, wr;
 605
 606 #if !UCONFIG_NO_TRANSLITERATION
 607     Transliterator *t = 0;      // Transliterator acting on Unicode data.
 608     UnicodeString chunk;        // One chunk of the text being collected for transformation.
 609 #endif
 610     UnicodeString u;            // String to do the transliteration.
 611     int32_t ulen;
 612
 613     // use conversion offsets for error messages
 614     // unless a transliterator is used -
 615     // a text transformation will reorder characters in unpredictable ways
 616     UBool useOffsets = TRUE;
 617
 618     // Open the correct input file or connect to stdin for reading input
 619
 620     if (infilestr != 0 && strcmp(infilestr, "-")) {
 621         infile = fopen(infilestr, "rb");
 622         if (infile == 0) {
 623             UnicodeString str1(infilestr, "");
 624             str1.append((UChar32) 0);
 625             UnicodeString str2(strerror(errno), "");
 626             str2.append((UChar32) 0);
 627             initMsg(pname);
 628             u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
 629             return FALSE;
 630         }
 631         closeFile = TRUE;
 632     } else {
 633         infilestr = "-";
 634         infile = stdin;
 635 #ifdef USE_FILENO_BINARY_MODE
 636         if (setmode(fileno(stdin), O_BINARY) == -1) {
 637             initMsg(pname);
 638             u_wmsg(stderr, "cantSetInBinMode");
 639             return FALSE;
 640         }
 641 #endif
 642     }
 643
 644     if (verbose) {
 645         fprintf(stderr, "%s:\n", infilestr);
 646     }
 647
 648 #if !UCONFIG_NO_TRANSLITERATION
 649     // Create transliterator as needed.
 650
 651     if (translit != NULL && *translit) {
 652         UParseError parse;
 653         UnicodeString str(translit), pestr;
 654
 655         /* Create from rules or by ID as needed. */
 656
 657         parse.line = -1;
 658
 659         if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
 660             t = Transliterator::createFromRules(UNICODE_STRING_SIMPLE("Uconv"), str, UTRANS_FORWARD, parse, err);
 661         } else {
 662             t = Transliterator::createInstance(UnicodeString(translit, -1, US_INV), UTRANS_FORWARD, err);
 663         }
 664
 665         if (U_FAILURE(err)) {
 666             str.append((UChar32) 0);
 667             initMsg(pname);
 668
 669             if (parse.line >= 0) {
 670                 UChar linebuf[20], offsetbuf[20];
 671                 uprv_itou(linebuf, 20, parse.line, 10, 0);
 672                 uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
 673                 u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(),
 674                     u_wmsg_errorName(err), linebuf, offsetbuf);
 675             } else {
 676                 u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(),
 677                     u_wmsg_errorName(err));
 678             }
 679
 680             if (t) {
 681                 delete t;
 682                 t = 0;
 683             }
 684             goto error_exit;
 685         }
 686
 687         useOffsets = FALSE;
 688     }
 689 #endif
 690
 691     // Create codepage converter. If the codepage or its aliases weren't
 692     // available, it returns NULL and a failure code. We also set the
 693     // callbacks, and return errors in the same way.
 694
 695     convfrom = ucnv_open(fromcpage, &err);
 696     if (U_FAILURE(err)) {
 697         UnicodeString str(fromcpage, "");
 698         initMsg(pname);
 699         u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(),
 700             u_wmsg_errorName(err));
 701         goto error_exit;
 702     }
 703     ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
 704     if (U_FAILURE(err)) {
 705         initMsg(pname);
 706         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
 707         goto error_exit;
 708     }
 709
 710     convto = ucnv_open(tocpage, &err);
 711     if (U_FAILURE(err)) {
 712         UnicodeString str(tocpage, "");
 713         initMsg(pname);
 714         u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(),
 715             u_wmsg_errorName(err));
 716         goto error_exit;
 717     }
 718     ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
 719     if (U_FAILURE(err)) {
 720         initMsg(pname);
 721         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
 722         goto error_exit;
 723     }
 724     ucnv_setFallback(convto, fallback);
 725
 726     UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode;
 727     int8_t sig;
 728
 729     // OK, we can convert now.
 730     sig = signature;
 731     rd = 0;
 732
 733     do {
 734         willexit = FALSE;
 735
 736         // input file offset at the beginning of the next buffer
 737         infoffset += rd;
 738
 739         rd = fread(buf, 1, bufsz, infile);
 740         if (ferror(infile) != 0) {
 741             UnicodeString str(strerror(errno));
 742             initMsg(pname);
 743             u_wmsg(stderr, "cantRead", str.getTerminatedBuffer());
 744             goto error_exit;
 745         }
 746
 747         // Convert the read buffer into the new encoding via Unicode.
 748         // After the call 'unibufp' will be placed behind the last
 749         // character that was converted in the 'unibuf'.
 750         // Also the 'cbufp' is positioned behind the last converted
 751         // character.
 752         // At the last conversion in the file, flush should be set to
 753         // true so that we get all characters converted.
 754         //
 755         // The converter must be flushed at the end of conversion so
 756         // that characters on hold also will be written.
 757
 758         cbufp = buf;
 759         flush = (UBool)(rd != bufsz);
 760
 761         // convert until the input is consumed
 762         do {
 763             // remember the start of the current byte-to-Unicode conversion
 764             prevbufp = cbufp;
 765
 766             unibuf = unibufp = u.getBuffer((int32_t)bufsz);
 767
 768             // Use bufsz instead of u.getCapacity() for the targetLimit
 769             // so that we don't overflow fromoffsets[].
 770             ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp,
 771                 buf + rd, useOffsets ? fromoffsets : NULL, flush, &err);
 772
 773             ulen = (int32_t)(unibufp - unibuf);
 774             u.releaseBuffer(U_SUCCESS(err) ? ulen : 0);
 775
 776             // fromSawEndOfBytes indicates that ucnv_toUnicode() is done
 777             // converting all of the input bytes.
 778             // It works like this because ucnv_toUnicode() returns only under the
 779             // following conditions:
 780             // - an error occurred during conversion (an error code is set)
 781             // - the target buffer is filled (the error code indicates an overflow)
 782             // - the source is consumed
 783             // That is, if the error code does not indicate a failure,
 784             // not even an overflow, then the source must be consumed entirely.
 785             fromSawEndOfBytes = (UBool)U_SUCCESS(err);
 786
 787             if (err == U_BUFFER_OVERFLOW_ERROR) {
 788                 err = U_ZERO_ERROR;
 789             } else if (U_FAILURE(err)) {
 790                 char pos[32], errorBytes[32];
 791                 int8_t i, length, errorLength;
 792
 793                 UErrorCode localError = U_ZERO_ERROR;
 794                 errorLength = (int8_t)sizeof(errorBytes);
 795                 ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError);
 796                 if (U_FAILURE(localError) || errorLength == 0) {
 797                     errorLength = 1;
 798                 }
 799
 800                 // print the input file offset of the start of the error bytes:
 801                 // input file offset of the current byte buffer +
 802                 // length of the just consumed bytes -
 803                 // length of the error bytes
 804                 length =
 805                     (int8_t)sprintf(pos, "%d",
 806                         (int)(infoffset + (cbufp - buf) - errorLength));
 807
 808                 // output the bytes that caused the error
 809                 UnicodeString str;
 810                 for (i = 0; i < errorLength; ++i) {
 811                     if (i > 0) {
 812                         str.append((UChar)uSP);
 813                     }
 814                     str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4));
 815                     str.append(nibbleToHex((uint8_t)errorBytes[i]));
 816                 }
 817
 818                 initMsg(pname);
 819                 u_wmsg(stderr, "problemCvtToU",
 820                         UnicodeString(pos, length, "").getTerminatedBuffer(),
 821                         str.getTerminatedBuffer(),
 822                         u_wmsg_errorName(err));
 823
 824                 willexit = TRUE;
 825                 err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
 826             }
 827
 828             // Replaced a check for whether the input was consumed by
 829             // looping until it is; message key "premEndInput" now obsolete.
 830
 831             if (ulen == 0) {
 832                 continue;
 833             }
 834
 835             // remove a U+FEFF Unicode signature character if requested
 836             if (sig < 0) {
 837                 if (u.charAt(0) == uSig) {
 838                     u.remove(0, 1);
 839
 840                     // account for the removed UChar and offset
 841                     --ulen;
 842
 843                     if (useOffsets) {
 844                         // remove an offset from fromoffsets[] as well
 845                         // to keep the array parallel with the UChars
 846                         memmove(fromoffsets, fromoffsets + 1, ulen * 4);
 847                     }
 848
 849                 }
 850                 sig = 0;
 851             }
 852
 853 #if !UCONFIG_NO_TRANSLITERATION
 854             // Transliterate/transform if needed.
 855
 856             // For transformation, we use chunking code -
 857             // collect Unicode input until, for example, an end-of-line,
 858             // then transform and output-convert that and continue collecting.
 859             // This makes the transformation result independent of the buffer size
 860             // while avoiding the slower keyboard mode.
 861             // The end-of-chunk characters are completely included in the
 862             // transformed string in case they are to be transformed themselves.
 863             if (t != NULL) {
 864                 UnicodeString out;
 865                 int32_t chunkLimit;
 866
 867                 do {
 868                     chunkLimit = getChunkLimit(chunk, u);
 869                     if (chunkLimit < 0 && flush && fromSawEndOfBytes) {
 870                         // use all of the rest at the end of the text
 871                         chunkLimit = u.length();
 872                     }
 873                     if (chunkLimit >= 0) {
 874                         // complete the chunk and transform it
 875                         chunk.append(u, 0, chunkLimit);
 876                         u.remove(0, chunkLimit);
 877                         t->transliterate(chunk);
 878
 879                         // append the transformation result to the result and empty the chunk
 880                         out.append(chunk);
 881                         chunk.remove();
 882                     } else {
 883                         // continue collecting the chunk
 884                         chunk.append(u);
 885                         break;
 886                     }
 887                 } while (!u.isEmpty());
 888
 889                 u = out;
 890                 ulen = u.length();
 891             }
 892 #endif
 893
 894             // add a U+FEFF Unicode signature character if requested
 895             // and possible/necessary
 896             if (sig > 0) {
 897                 if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) {
 898                     u.insert(0, (UChar)uSig);
 899
 900                     if (useOffsets) {
 901                         // insert a pseudo-offset into fromoffsets[] as well
 902                         // to keep the array parallel with the UChars
 903                         memmove(fromoffsets + 1, fromoffsets, ulen * 4);
 904                         fromoffsets[0] = -1;
 905                     }
 906
 907                     // account for the additional UChar and offset
 908                     ++ulen;
 909                 }
 910                 sig = 0;
 911             }
 912
 913             // Convert the Unicode buffer into the destination codepage
 914             // Again 'bufp' will be placed behind the last converted character
 915             // And 'unibufp' will be placed behind the last converted unicode character
 916             // At the last conversion flush should be set to true to ensure that
 917             // all characters left get converted
 918
 919             unibuf = unibufbp = u.getBuffer();
 920
 921             do {
 922                 bufp = outbuf;
 923
 924                 // Use fromSawEndOfBytes in addition to the flush flag -
 925                 // it indicates whether the intermediate Unicode string
 926                 // contains the very last UChars for the very last input bytes.
 927                 ucnv_fromUnicode(convto, &bufp, outbuf + bufsz,
 928                                  &unibufbp,
 929                                  unibuf + ulen,
 930                                  NULL, (UBool)(flush && fromSawEndOfBytes), &err);
 931
 932                 // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done
 933                 // converting all of the intermediate UChars.
 934                 // See comment for fromSawEndOfBytes.
 935                 toSawEndOfUnicode = (UBool)U_SUCCESS(err);
 936
 937                 if (err == U_BUFFER_OVERFLOW_ERROR) {
 938                     err = U_ZERO_ERROR;
 939                 } else if (U_FAILURE(err)) {
 940                     UChar errorUChars[4];
 941                     const char *errtag;
 942                     char pos[32];
 943                     UChar32 c;
 944                     int8_t i, length, errorLength;
 945
 946                     UErrorCode localError = U_ZERO_ERROR;
 947                     errorLength = UPRV_LENGTHOF(errorUChars);
 948                     ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError);
 949                     if (U_FAILURE(localError) || errorLength == 0) {
 950                         // need at least 1 so that we don't access beyond the length of fromoffsets[]
 951                         errorLength = 1;
 952                     }
 953
 954                     int32_t ferroffset;
 955
 956                     if (useOffsets) {
 957                         // Unicode buffer offset of the start of the error UChars
 958                         ferroffset = (int32_t)((unibufbp - unibuf) - errorLength);
 959                         if (ferroffset < 0) {
 960                             // approximation - the character started in the previous Unicode buffer
 961                             ferroffset = 0;
 962                         }
 963
 964                         // get the corresponding byte offset out of fromoffsets[]
 965                         // go back if the offset is not known for some of the UChars
 966                         int32_t fromoffset;
 967                         do {
 968                             fromoffset = fromoffsets[ferroffset];
 969                         } while (fromoffset < 0 && --ferroffset >= 0);
 970
 971                         // total input file offset =
 972                         // input file offset of the current byte buffer +
 973                         // byte buffer offset of where the current Unicode buffer is converted from +
 974                         // fromoffsets[Unicode offset]
 975                         ferroffset = infoffset + (prevbufp - buf) + fromoffset;
 976                         errtag = "problemCvtFromU";
 977                     } else {
 978                         // Do not use fromoffsets if (t != NULL) because the Unicode text may
 979                         // be different from what the offsets refer to.
 980
 981                         // output file offset
 982                         ferroffset = (int32_t)(outfoffset + (bufp - outbuf));
 983                         errtag = "problemCvtFromUOut";
 984                     }
 985
 986                     length = (int8_t)sprintf(pos, "%u", (int)ferroffset);
 987
 988                     // output the code points that caused the error
 989                     UnicodeString str;
 990                     for (i = 0; i < errorLength;) {
 991                         if (i > 0) {
 992                             str.append((UChar)uSP);
 993                         }
 994                         U16_NEXT(errorUChars, i, errorLength, c);
 995                         if (c >= 0x100000) {
 996                             str.append(nibbleToHex((uint8_t)(c >> 20)));
 997                         }
 998                         if (c >= 0x10000) {
 999                             str.append(nibbleToHex((uint8_t)(c >> 16)));
1000                         }
1001                         str.append(nibbleToHex((uint8_t)(c >> 12)));
1002                         str.append(nibbleToHex((uint8_t)(c >> 8)));
1003                         str.append(nibbleToHex((uint8_t)(c >> 4)));
1004                         str.append(nibbleToHex((uint8_t)c));
1005                     }
1006
1007                     initMsg(pname);
1008                     u_wmsg(stderr, errtag,
1009                             UnicodeString(pos, length, "").getTerminatedBuffer(),
1010                             str.getTerminatedBuffer(),
1011                            u_wmsg_errorName(err));
1012                     u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer());
1013
1014                     willexit = TRUE;
1015                     err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
1016                 }
1017
1018                 // Replaced a check for whether the intermediate Unicode characters were all consumed by
1019                 // looping until they are; message key "premEnd" now obsolete.
1020
1021                 // Finally, write the converted buffer to the output file
1022                 size_t outlen = (size_t) (bufp - outbuf);
1023                 outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile));
1024                 if (wr != outlen) {
1025                     UnicodeString str(strerror(errno));
1026                     initMsg(pname);
1027                     u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer());
1028                     willexit = TRUE;
1029                 }
1030
1031                 if (willexit) {
1032                     goto error_exit;
1033                 }
1034             } while (!toSawEndOfUnicode);
1035         } while (!fromSawEndOfBytes);
1036     } while (!flush);           // Stop when we have flushed the
1037                                 // converters (this means that it's
1038                                 // the end of output)
1039
1040     goto normal_exit;
1041
1042 error_exit:
1043     ret = FALSE;
1044
1045 normal_exit:
1046     // Cleanup.
1047
1048     ucnv_close(convfrom);
1049     ucnv_close(convto);
1050
1051 #if !UCONFIG_NO_TRANSLITERATION
1052     delete t;
1053 #endif
1054
1055     if (closeFile) {
1056         fclose(infile);
1057     }
1058
1059     return ret;
1060 }
1061
1062 static void usage(const char *pname, int ecode) {
1063     const UChar *msg;
1064     int32_t msgLen;
1065     UErrorCode err = U_ZERO_ERROR;
1066     FILE *fp = ecode ? stderr : stdout;
1067     int res;
1068
1069     initMsg(pname);
1070     msg =
1071         ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord",
1072                             &msgLen, &err);
1073     UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1));
1074     UnicodeString mname(msg, msgLen + 1);
1075
1076     res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer());
1077     if (!ecode) {
1078         if (!res) {
1079             fputc('\n', fp);
1080         }
1081         if (!u_wmsg(fp, "help")) {
1082             /* Now dump callbacks and finish. */
1083
1084             int i, count =
1085                 UPRV_LENGTHOF(transcode_callbacks);
1086             for (i = 0; i < count; ++i) {
1087                 fprintf(fp, " %s", transcode_callbacks[i].name);
1088             }
1089             fputc('\n', fp);
1090         }
1091     }
1092
1093     exit(ecode);
1094 }
1095
1096 extern int
1097 main(int argc, char **argv)
1098 {
1099     FILE *outfile;
1100     int ret = 0;
1101
1102     size_t bufsz = DEFAULT_BUFSZ;
1103
1104     const char *fromcpage = 0;
1105     const char *tocpage = 0;
1106     const char *translit = 0;
1107     const char *outfilestr = 0;
1108     UBool fallback = FALSE;
1109
1110     UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
1111     const void *fromuctxt = 0;
1112     UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
1113     const void *touctxt = 0;
1114
1115     char **iter, **remainArgv, **remainArgvLimit;
1116     char **end = argv + argc;
1117
1118     const char *pname;
1119
1120     UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE;
1121     const char *printName = 0;
1122
1123     UBool verbose = FALSE;
1124     UErrorCode status = U_ZERO_ERROR;
1125
1126     ConvertFile cf;
1127
1128     /* Initialize ICU */
1129     u_init(&status);
1130     if (U_FAILURE(status)) {
1131         fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
1132             argv[0], u_errorName(status));
1133         exit(1);
1134     }
1135
1136     // Get and prettify pname.
1137     pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
1138 #if U_PLATFORM_USES_ONLY_WIN32_API
1139     if (!pname) {
1140         pname = uprv_strrchr(*argv, '/');
1141     }
1142 #endif
1143     if (!pname) {
1144         pname = *argv;
1145     } else {
1146         ++pname;
1147     }
1148
1149     // First, get the arguments from command-line
1150     // to know the codepages to convert between
1151
1152     remainArgv = remainArgvLimit = argv + 1;
1153     for (iter = argv + 1; iter != end; iter++) {
1154         // Check for from charset
1155         if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
1156             iter++;
1157             if (iter != end)
1158                 fromcpage = *iter;
1159             else
1160                 usage(pname, 1);
1161         } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
1162             iter++;
1163             if (iter != end)
1164                 tocpage = *iter;
1165             else
1166                 usage(pname, 1);
1167         } else if (strcmp("-x", *iter) == 0) {
1168             iter++;
1169             if (iter != end)
1170                 translit = *iter;
1171             else
1172                 usage(pname, 1);
1173         } else if (!strcmp("--fallback", *iter)) {
1174             fallback = TRUE;
1175         } else if (!strcmp("--no-fallback", *iter)) {
1176             fallback = FALSE;
1177         } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
1178             iter++;
1179             if (iter != end) {
1180                 bufsz = atoi(*iter);
1181                 if ((int) bufsz <= 0) {
1182                     initMsg(pname);
1183                     UnicodeString str(*iter);
1184                     initMsg(pname);
1185                     u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer());
1186                     return 3;
1187                 }
1188             } else {
1189                 usage(pname, 1);
1190             }
1191         } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
1192             if (printTranslits) {
1193                 usage(pname, 1);
1194             }
1195             printConvs = TRUE;
1196         } else if (strcmp("--default-code", *iter) == 0) {
1197             if (printTranslits) {
1198                 usage(pname, 1);
1199             }
1200             printName = ucnv_getDefaultName();
1201         } else if (strcmp("--list-code", *iter) == 0) {
1202             if (printTranslits) {
1203                 usage(pname, 1);
1204             }
1205
1206             iter++;
1207             if (iter != end) {
1208                 UErrorCode e = U_ZERO_ERROR;
1209                 printName = ucnv_getAlias(*iter, 0, &e);
1210                 if (U_FAILURE(e) || !printName) {
1211                     UnicodeString str(*iter);
1212                     initMsg(pname);
1213                     u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer());
1214                     return 2;
1215                 }
1216             } else
1217                 usage(pname, 1);
1218         } else if (strcmp("--canon", *iter) == 0) {
1219             printCanon = TRUE;
1220         } else if (strcmp("-L", *iter) == 0
1221             || !strcmp("--list-transliterators", *iter)) {
1222             if (printConvs) {
1223                 usage(pname, 1);
1224             }
1225             printTranslits = TRUE;
1226         } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
1227             || !strcmp("--help", *iter)) {
1228             usage(pname, 0);
1229         } else if (!strcmp("-c", *iter)) {
1230             fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
1231         } else if (!strcmp("--to-callback", *iter)) {
1232             iter++;
1233             if (iter != end) {
1234                 const struct callback_ent *cbe = findCallback(*iter);
1235                 if (cbe) {
1236                     fromucallback = cbe->fromu;
1237                     fromuctxt = cbe->fromuctxt;
1238                 } else {
1239                     UnicodeString str(*iter);
1240                     initMsg(pname);
1241                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1242                     return 4;
1243                 }
1244             } else {
1245                 usage(pname, 1);
1246             }
1247         } else if (!strcmp("--from-callback", *iter)) {
1248             iter++;
1249             if (iter != end) {
1250                 const struct callback_ent *cbe = findCallback(*iter);
1251                 if (cbe) {
1252                     toucallback = cbe->tou;
1253                     touctxt = cbe->touctxt;
1254                 } else {
1255                     UnicodeString str(*iter);
1256                     initMsg(pname);
1257                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1258                     return 4;
1259                 }
1260             } else {
1261                 usage(pname, 1);
1262             }
1263         } else if (!strcmp("-i", *iter)) {
1264             toucallback = UCNV_TO_U_CALLBACK_SKIP;
1265         } else if (!strcmp("--callback", *iter)) {
1266             iter++;
1267             if (iter != end) {
1268                 const struct callback_ent *cbe = findCallback(*iter);
1269                 if (cbe) {
1270                     fromucallback = cbe->fromu;
1271                     fromuctxt = cbe->fromuctxt;
1272                     toucallback = cbe->tou;
1273                     touctxt = cbe->touctxt;
1274                 } else {
1275                     UnicodeString str(*iter);
1276                     initMsg(pname);
1277                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1278                     return 4;
1279                 }
1280             } else {
1281                 usage(pname, 1);
1282             }
1283         } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
1284             verbose = FALSE;
1285         } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
1286             verbose = TRUE;
1287         } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
1288             printf("%s v2.1  ICU " U_ICU_VERSION "\n", pname);
1289             return 0;
1290         } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
1291             ++iter;
1292             if (iter != end && !outfilestr) {
1293                 outfilestr = *iter;
1294             } else {
1295                 usage(pname, 1);
1296             }
1297         } else if (0 == strcmp("--add-signature", *iter)) {
1298             cf.signature = 1;
1299         } else if (0 == strcmp("--remove-signature", *iter)) {
1300             cf.signature = -1;
1301         } else if (**iter == '-' && (*iter)[1]) {
1302             usage(pname, 1);
1303         } else {
1304             // move a non-option up in argv[]
1305             *remainArgvLimit++ = *iter;
1306         }
1307     }
1308
1309     if (printConvs || printName) {
1310         return printConverters(pname, printName, printCanon) ? 2 : 0;
1311     } else if (printTranslits) {
1312         return printTransliterators(printCanon) ? 3 : 0;
1313     }
1314
1315     if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
1316         fromcpage = ucnv_getDefaultName();
1317     }
1318     if (!tocpage || !uprv_strcmp(tocpage, "-")) {
1319         tocpage = ucnv_getDefaultName();
1320     }
1321
1322     // Open the correct output file or connect to stdout for reading input
1323     if (outfilestr != 0 && strcmp(outfilestr, "-")) {
1324         outfile = fopen(outfilestr, "wb");
1325         if (outfile == 0) {
1326             UnicodeString str1(outfilestr, "");
1327             UnicodeString str2(strerror(errno), "");
1328             initMsg(pname);
1329             u_wmsg(stderr, "cantCreateOutputF",
1330                 str1.getBuffer(), str2.getBuffer());
1331             return 1;
1332         }
1333     } else {
1334         outfilestr = "-";
1335         outfile = stdout;
1336 #ifdef USE_FILENO_BINARY_MODE
1337         if (setmode(fileno(outfile), O_BINARY) == -1) {
1338             u_wmsg(stderr, "cantSetOutBinMode");
1339             exit(-1);
1340         }
1341 #endif
1342     }
1343
1344     /* Loop again on the arguments to find all the input files, and
1345     convert them. */
1346
1347     cf.setBufferSize(bufsz);
1348
1349     if(remainArgv < remainArgvLimit) {
1350         for (iter = remainArgv; iter != remainArgvLimit; iter++) {
1351             if (!cf.convertFile(
1352                     pname, fromcpage, toucallback, touctxt, tocpage,
1353                     fromucallback, fromuctxt, fallback, translit, *iter,
1354                     outfile, verbose)
1355             ) {
1356                 goto error_exit;
1357             }
1358         }
1359     } else {
1360         if (!cf.convertFile(
1361                 pname, fromcpage, toucallback, touctxt, tocpage,
1362                 fromucallback, fromuctxt, fallback, translit, 0,
1363                 outfile, verbose)
1364         ) {
1365             goto error_exit;
1366         }
1367     }
1368
1369     goto normal_exit;
1370 error_exit:
1371 #if !UCONFIG_NO_LEGACY_CONVERSION
1372     ret = 1;
1373 #else
1374     fprintf(stderr, "uconv error: UCONFIG_NO_LEGACY_CONVERSION is on. See uconfig.h\n");
1375 #endif
1376 normal_exit:
1377
1378     if (outfile != stdout) {
1379         fclose(outfile);
1380     }
1381
1382     u_cleanup();
1383
1384     return ret;
1385 }
1386
1387
1388 /*
1389  * Hey, Emacs, please set the following:
1390  *
1391  * Local Variables:
1392  * indent-tabs-mode: nil
1393  * End:
1394  *
1395  */