icuSources/extra/uconv/uconv.cpp

   1 /*****************************************************************************
   2 *
   3 *   Copyright (C) 1999-2008, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 *
   6 ******************************************************************************/
   7
   8 /*
   9  * uconv(1): an iconv(1)-like converter using ICU.
  10  *
  11  * Original code by Jonas Utterstr&#x00F6;m <jonas.utterstrom@vittran.norrnod.se>
  12  * contributed in 1999.
  13  *
  14  * Conversion to the C conversion API and many improvements by
  15  * Yves Arrouye <yves@realnames.com>, current maintainer.
  16  *
  17  * Markus Scherer maintainer from 2003.
  18  * See source code repository history for changes.
  19  */
  20
  21 #include <unicode/utypes.h>
  22 #include <unicode/putil.h>
  23 #include <unicode/ucnv.h>
  24 #include <unicode/uenum.h>
  25 #include <unicode/unistr.h>
  26 #include <unicode/translit.h>
  27 #include <unicode/uset.h>
  28 #include <unicode/uclean.h>
  29
  30 #include <stdio.h>
  31 #include <errno.h>
  32 #include <string.h>
  33 #include <stdlib.h>
  34
  35 #include "cmemory.h"
  36 #include "cstring.h"
  37 #include "ustrfmt.h"
  38
  39 #include "unicode/uwmsg.h"
  40
  41 U_NAMESPACE_USE
  42
  43 #if (defined(U_WINDOWS) || defined(U_CYGWIN)) && !defined(__STRICT_ANSI__)
  44 #include <io.h>
  45 #include <fcntl.h>
  46 #if defined(U_WINDOWS)
  47 #define USE_FILENO_BINARY_MODE 1
  48 /* Windows likes to rename Unix-like functions */
  49 #ifndef fileno
  50 #define fileno _fileno
  51 #endif
  52 #ifndef setmode
  53 #define setmode _setmode
  54 #endif
  55 #ifndef O_BINARY
  56 #define O_BINARY _O_BINARY
  57 #endif
  58 #endif
  59 #endif
  60
  61 #ifdef UCONVMSG_LINK
  62 /* below from the README */
  63 #include "unicode/utypes.h"
  64 #include "unicode/udata.h"
  65 U_CFUNC char uconvmsg_dat[];
  66 #endif
  67
  68 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
  69
  70 #define DEFAULT_BUFSZ   4096
  71 #define UCONVMSG "uconvmsg"
  72
  73 static UResourceBundle *gBundle = 0;    /* Bundle containing messages. */
  74
  75 /*
  76  * Initialize the message bundle so that message strings can be fetched
  77  * by u_wmsg().
  78  *
  79  */
  80
  81 static void initMsg(const char *pname) {
  82     static int ps = 0;
  83
  84     if (!ps) {
  85         char dataPath[2048];        /* XXX Sloppy: should be PATH_MAX. */
  86         UErrorCode err = U_ZERO_ERROR;
  87
  88         ps = 1;
  89
  90         /* Set up our static data - if any */
  91 #ifdef UCONVMSG_LINK
  92         udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err);
  93         if (U_FAILURE(err)) {
  94           fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n",
  95                   pname, u_errorName(err));
  96           err = U_ZERO_ERROR; /* It may still fail */
  97         }
  98 #endif
  99
 100         /* Get messages. */
 101         gBundle = u_wmsg_setPath(UCONVMSG, &err);
 102         if (U_FAILURE(err)) {
 103             fprintf(stderr,
 104                     "%s: warning: couldn't open bundle %s: %s\n",
 105                     pname, UCONVMSG, u_errorName(err));
 106 #ifdef UCONVMSG_LINK
 107             fprintf(stderr,
 108                     "%s: setAppData was called, internal data %s failed to load\n",
 109                         pname, UCONVMSG);
 110 #endif
 111
 112             err = U_ZERO_ERROR;
 113             /* that was try #1, try again with a path */
 114             uprv_strcpy(dataPath, u_getDataDirectory());
 115             uprv_strcat(dataPath, U_FILE_SEP_STRING);
 116             uprv_strcat(dataPath, UCONVMSG);
 117
 118             gBundle = u_wmsg_setPath(dataPath, &err);
 119             if (U_FAILURE(err)) {
 120                 fprintf(stderr,
 121                     "%s: warning: still couldn't open bundle %s: %s\n",
 122                     pname, dataPath, u_errorName(err));
 123                 fprintf(stderr, "%s: warning: messages will not be displayed\n", pname);
 124             }
 125         }
 126     }
 127 }
 128
 129 /* Mapping of callback names to the callbacks passed to the converter
 130    API. */
 131
 132 static struct callback_ent {
 133     const char *name;
 134     UConverterFromUCallback fromu;
 135     const void *fromuctxt;
 136     UConverterToUCallback tou;
 137     const void *touctxt;
 138 } transcode_callbacks[] = {
 139     { "substitute",
 140       UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
 141       UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 },
 142     { "skip",
 143       UCNV_FROM_U_CALLBACK_SKIP, 0,
 144       UCNV_TO_U_CALLBACK_SKIP, 0 },
 145     { "stop",
 146       UCNV_FROM_U_CALLBACK_STOP, 0,
 147       UCNV_TO_U_CALLBACK_STOP, 0 },
 148     { "escape",
 149       UCNV_FROM_U_CALLBACK_ESCAPE, 0,
 150       UCNV_TO_U_CALLBACK_ESCAPE, 0},
 151     { "escape-icu",
 152       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU,
 153       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU },
 154     { "escape-java",
 155       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA,
 156       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA },
 157     { "escape-c",
 158       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C,
 159       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
 160     { "escape-xml",
 161       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
 162       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
 163     { "escape-xml-hex",
 164       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
 165       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
 166     { "escape-xml-dec",
 167       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
 168       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
 169     { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE,
 170       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE }
 171 };
 172
 173 /* Return a pointer to a callback record given its name. */
 174
 175 static const struct callback_ent *findCallback(const char *name) {
 176     int i, count =
 177         sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
 178
 179     /* We'll do a linear search, there aren't many of them and bsearch()
 180        may not be that portable. */
 181
 182     for (i = 0; i < count; ++i) {
 183         if (!uprv_stricmp(name, transcode_callbacks[i].name)) {
 184             return &transcode_callbacks[i];
 185         }
 186     }
 187
 188     return 0;
 189 }
 190
 191 /* Print converter information. If lookfor is set, only that converter will
 192    be printed, otherwise all converters will be printed. If canon is non
 193    zero, tags and aliases for each converter are printed too, in the format
 194    expected for convrters.txt(5). */
 195
 196 static int printConverters(const char *pname, const char *lookfor,
 197     UBool canon)
 198 {
 199     UErrorCode err = U_ZERO_ERROR;
 200     int32_t num;
 201     uint16_t num_stds;
 202     const char **stds;
 203
 204     /* If there is a specified name, just handle that now. */
 205
 206     if (lookfor) {
 207         if (!canon) {
 208             printf("%s\n", lookfor);
 209             return 0;
 210         } else {
 211         /*  Because we are printing a canonical name, we need the
 212             true converter name. We've done that already except for
 213             the default name (because we want to print the exact
 214             name one would get when calling ucnv_getDefaultName()
 215             in non-canon mode). But since we do not know at this
 216             point if we have the default name or something else, we
 217             need to normalize again to the canonical converter
 218             name. */
 219
 220             const char *truename = ucnv_getAlias(lookfor, 0, &err);
 221             if (U_SUCCESS(err)) {
 222                 lookfor = truename;
 223             } else {
 224                 err = U_ZERO_ERROR;
 225             }
 226         }
 227     }
 228
 229     /* Print converter names. We come here for one of two reasons: we
 230        are printing all the names (lookfor was null), or we have a
 231        single converter to print but in canon mode, hence we need to
 232        get to it in order to print everything. */
 233
 234     num = ucnv_countAvailable();
 235     if (num <= 0) {
 236         initMsg(pname);
 237         u_wmsg(stderr, "cantGetNames");
 238         return -1;
 239     }
 240     if (lookfor) {
 241         num = 1;                /* We know where we want to be. */
 242     }
 243
 244     num_stds = ucnv_countStandards();
 245     stds = (const char **) uprv_malloc(num_stds * sizeof(*stds));
 246     if (!stds) {
 247         u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR));
 248         return -1;
 249     } else {
 250         uint16_t s;
 251
 252         if (canon) {
 253             printf("{ ");
 254         }
 255         for (s = 0; s < num_stds; ++s) {
 256             stds[s] = ucnv_getStandard(s, &err);
 257             if (canon) {
 258                 printf("%s ", stds[s]);
 259             }
 260             if (U_FAILURE(err)) {
 261                 u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err));
 262                 goto error_cleanup;
 263             }
 264         }
 265         if (canon) {
 266             puts("}");
 267         }
 268     }
 269
 270     for (int32_t i = 0; i < num; i++) {
 271         const char *name;
 272         uint16_t num_aliases;
 273
 274         /* Set the name either to what we are looking for, or
 275         to the current converter name. */
 276
 277         if (lookfor) {
 278             name = lookfor;
 279         } else {
 280             name = ucnv_getAvailableName(i);
 281         }
 282
 283         /* Get all the aliases associated to the name. */
 284
 285         err = U_ZERO_ERROR;
 286         num_aliases = ucnv_countAliases(name, &err);
 287         if (U_FAILURE(err)) {
 288             printf("%s", name);
 289
 290             UnicodeString str(name, "");
 291             putchar('\t');
 292             u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
 293                 u_wmsg_errorName(err));
 294             goto error_cleanup;
 295         } else {
 296             uint16_t a, s, t;
 297
 298             /* Write all the aliases and their tags. */
 299
 300             for (a = 0; a < num_aliases; ++a) {
 301                 const char *alias = ucnv_getAlias(name, a, &err);
 302
 303                 if (U_FAILURE(err)) {
 304                     UnicodeString str(name, "");
 305                     putchar('\t');
 306                     u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
 307                         u_wmsg_errorName(err));
 308                     goto error_cleanup;
 309                 }
 310
 311                 /* Print the current alias so that it looks right. */
 312                 printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") ,
 313                                  alias,
 314                                  (canon ? "" : " "));
 315
 316                 /* Look (slowly, linear searching) for a tag. */
 317
 318                 if (canon) {
 319                     /* -1 to skip the last standard */
 320                     for (s = t = 0; s < num_stds-1; ++s) {
 321                         UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err);
 322                         if (U_SUCCESS(err)) {
 323                             /* List the standard tags */
 324                             const char *standardName;
 325                             UBool isFirst = TRUE;
 326                             UErrorCode enumError = U_ZERO_ERROR;
 327                             while ((standardName = uenum_next(nameEnum, NULL, &enumError))) {
 328                                 /* See if this alias is supported by this standard. */
 329                                 if (!strcmp(standardName, alias)) {
 330                                     if (!t) {
 331                                         printf(" {");
 332                                         t = 1;
 333                                     }
 334                                     /* Print a * after the default standard name */
 335                                     printf(" %s%s", stds[s], (isFirst ? "*" : ""));
 336                                 }
 337                                 isFirst = FALSE;
 338                             }
 339                         }
 340                     }
 341                     if (t) {
 342                         printf(" }");
 343                     }
 344                 }
 345                 /* Terminate this entry. */
 346                 if (canon) {
 347                     puts("");
 348                 }
 349
 350                 /* Move on. */
 351             }
 352             /* Terminate this entry. */
 353             if (!canon) {
 354                 puts("");
 355             }
 356         }
 357     }
 358
 359     /* Free temporary data. */
 360
 361     uprv_free(stds);
 362
 363     /* Success. */
 364
 365     return 0;
 366 error_cleanup:
 367     uprv_free(stds);
 368     return -1;
 369 }
 370
 371 /* Print all available transliterators. If canon is non zero, print
 372    one transliterator per line. */
 373
 374 static int printTransliterators(UBool canon)
 375 {
 376 #if UCONFIG_NO_TRANSLITERATION
 377     printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n");
 378     return 1;
 379 #else
 380     int32_t numtrans = utrans_countAvailableIDs(), i;
 381     int buflen = 512;
 382     char *buf = (char *) uprv_malloc(buflen);
 383     char staticbuf[512];
 384
 385     char sepchar = canon ? '\n' : ' ';
 386
 387     if (!buf) {
 388         buf = staticbuf;
 389         buflen = sizeof(staticbuf);
 390     }
 391
 392     for (i = 0; i < numtrans; ++i) {
 393         int32_t len = utrans_getAvailableID(i, buf, buflen);
 394         if (len >= buflen - 1) {
 395             if (buf != staticbuf) {
 396                 buflen <<= 1;
 397                 if (buflen < len) {
 398                     buflen = len + 64;
 399                 }
 400                 buf = (char *) uprv_realloc(buf, buflen);
 401                 if (!buf) {
 402                     buf = staticbuf;
 403                     buflen = sizeof(staticbuf);
 404                 }
 405             }
 406             utrans_getAvailableID(i, buf, buflen);
 407             if (len >= buflen) {
 408                 uprv_strcpy(buf + buflen - 4, "..."); /* Truncate the name. */
 409             }
 410         }
 411
 412         printf("%s", buf);
 413         if (i < numtrans - 1) {
 414             putchar(sepchar);
 415         }
 416     }
 417
 418     /* Add a terminating newline if needed. */
 419
 420     if (sepchar != '\n') {
 421         putchar('\n');
 422     }
 423
 424     /* Free temporary data. */
 425
 426     if (buf != staticbuf) {
 427         uprv_free(buf);
 428     }
 429
 430     /* Success. */
 431
 432     return 0;
 433 #endif
 434 }
 435
 436 enum {
 437     uSP = 0x20,         // space
 438     uCR = 0xd,          // carriage return
 439     uLF = 0xa,          // line feed
 440     uNL = 0x85,         // newline
 441     uLS = 0x2028,       // line separator
 442     uPS = 0x2029,       // paragraph separator
 443     uSig = 0xfeff       // signature/BOM character
 444 };
 445
 446 static inline int32_t
 447 getChunkLimit(const UnicodeString &prev, const UnicodeString &s) {
 448     // find one of
 449     // CR, LF, CRLF, NL, LS, PS
 450     // for paragraph ends (see UAX #13/Unicode 4)
 451     // and include it in the chunk
 452     // all of these characters are on the BMP
 453     // do not include FF or VT in case they are part of a paragraph
 454     // (important for bidi contexts)
 455     static const UChar paraEnds[] = {
 456         0xd, 0xa, 0x85, 0x2028, 0x2029
 457     };
 458     enum {
 459         iCR, iLF, iNL, iLS, iPS, iCount
 460     };
 461
 462     // first, see if there is a CRLF split between prev and s
 463     if (prev.endsWith(paraEnds + iCR, 1)) {
 464         if (s.startsWith(paraEnds + iLF, 1)) {
 465             return 1; // split CRLF, include the LF
 466         } else if (!s.isEmpty()) {
 467             return 0; // complete the last chunk
 468         } else {
 469             return -1; // wait for actual further contents to arrive
 470         }
 471     }
 472
 473     const UChar *u = s.getBuffer(), *limit = u + s.length();
 474     UChar c;
 475
 476     while (u < limit) {
 477         c = *u++;
 478         if (
 479             ((c < uSP) && (c == uCR || c == uLF)) ||
 480             (c == uNL) ||
 481             ((c & uLS) == uLS)
 482         ) {
 483             if (c == uCR) {
 484                 // check for CRLF
 485                 if (u == limit) {
 486                     return -1; // LF may be in the next chunk
 487                 } else if (*u == uLF) {
 488                     ++u; // include the LF in this chunk
 489                 }
 490             }
 491             return (int32_t)(u - s.getBuffer());
 492         }
 493     }
 494
 495     return -1; // continue collecting the chunk
 496 }
 497
 498 enum {
 499     CNV_NO_FEFF,    // cannot convert the U+FEFF Unicode signature character (BOM)
 500     CNV_WITH_FEFF,  // can convert the U+FEFF signature character
 501     CNV_ADDS_FEFF   // automatically adds/detects the U+FEFF signature character
 502 };
 503
 504 static inline UChar
 505 nibbleToHex(uint8_t n) {
 506     n &= 0xf;
 507     return
 508         n <= 9 ?
 509             (UChar)(0x30 + n) :
 510             (UChar)((0x61 - 10) + n);
 511 }
 512
 513 // check the converter's Unicode signature properties;
 514 // the fromUnicode side of the converter must be in its initial state
 515 // and will be reset again if it was used
 516 static int32_t
 517 cnvSigType(UConverter *cnv) {
 518     UErrorCode err;
 519     int32_t result;
 520
 521     // test if the output charset can convert U+FEFF
 522     USet *set = uset_open(1, 0);
 523     err = U_ZERO_ERROR;
 524     ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err);
 525     if (U_SUCCESS(err) && uset_contains(set, uSig)) {
 526         result = CNV_WITH_FEFF;
 527     } else {
 528         result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted
 529     }
 530     uset_close(set);
 531
 532     if (result == CNV_WITH_FEFF) {
 533         // test if the output charset emits a signature anyway
 534         const UChar a[1] = { 0x61 }; // "a"
 535         const UChar *in;
 536
 537         char buffer[20];
 538         char *out;
 539
 540         in = a;
 541         out = buffer;
 542         err = U_ZERO_ERROR;
 543         ucnv_fromUnicode(cnv,
 544             &out, buffer + sizeof(buffer),
 545             &in, a + 1,
 546             NULL, TRUE, &err);
 547         ucnv_resetFromUnicode(cnv);
 548
 549         if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) &&
 550             U_SUCCESS(err)
 551         ) {
 552             result = CNV_ADDS_FEFF;
 553         }
 554     }
 555
 556     return result;
 557 }
 558
 559 class ConvertFile {
 560 public:
 561     ConvertFile() :
 562         buf(NULL), outbuf(NULL), fromoffsets(NULL),
 563         bufsz(0), signature(0) {}
 564
 565     void
 566     setBufferSize(size_t bufferSize) {
 567         bufsz = bufferSize;
 568
 569         buf = new char[2 * bufsz];
 570         outbuf = buf + bufsz;
 571
 572         // +1 for an added U+FEFF in the intermediate Unicode buffer
 573         fromoffsets = new int32_t[bufsz + 1];
 574     }
 575
 576     ~ConvertFile() {
 577         delete [] buf;
 578         delete [] fromoffsets;
 579     }
 580
 581     UBool convertFile(const char *pname,
 582                       const char *fromcpage,
 583                       UConverterToUCallback toucallback,
 584                       const void *touctxt,
 585                       const char *tocpage,
 586                       UConverterFromUCallback fromucallback,
 587                       const void *fromuctxt,
 588                       UBool fallback,
 589                       const char *translit,
 590                       const char *infilestr,
 591                       FILE * outfile, int verbose);
 592 private:
 593     friend int main(int argc, char **argv);
 594
 595     char *buf, *outbuf;
 596     int32_t *fromoffsets;
 597
 598     size_t bufsz;
 599     int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character
 600 };
 601
 602 // Convert a file from one encoding to another
 603 UBool
 604 ConvertFile::convertFile(const char *pname,
 605                          const char *fromcpage,
 606                          UConverterToUCallback toucallback,
 607                          const void *touctxt,
 608                          const char *tocpage,
 609                          UConverterFromUCallback fromucallback,
 610                          const void *fromuctxt,
 611                          UBool fallback,
 612                          const char *translit,
 613                          const char *infilestr,
 614                          FILE * outfile, int verbose)
 615 {
 616     FILE *infile;
 617     UBool ret = TRUE;
 618     UConverter *convfrom = 0;
 619     UConverter *convto = 0;
 620     UErrorCode err = U_ZERO_ERROR;
 621     UBool flush;
 622     const char *cbufp, *prevbufp;
 623     char *bufp;
 624
 625     uint32_t infoffset = 0, outfoffset = 0;   /* Where we are in the file, for error reporting. */
 626
 627     const UChar *unibuf, *unibufbp;
 628     UChar *unibufp;
 629
 630     size_t rd, wr;
 631
 632 #if !UCONFIG_NO_TRANSLITERATION
 633     Transliterator *t = 0;      // Transliterator acting on Unicode data.
 634     UnicodeString chunk;        // One chunk of the text being collected for transformation.
 635 #endif
 636     UnicodeString u;            // String to do the transliteration.
 637     int32_t ulen;
 638
 639     // use conversion offsets for error messages
 640     // unless a transliterator is used -
 641     // a text transformation will reorder characters in unpredictable ways
 642     UBool useOffsets = TRUE;
 643
 644     // Open the correct input file or connect to stdin for reading input
 645
 646     if (infilestr != 0 && strcmp(infilestr, "-")) {
 647         infile = fopen(infilestr, "rb");
 648         if (infile == 0) {
 649             UnicodeString str1(infilestr, "");
 650             str1.append((UChar32) 0);
 651             UnicodeString str2(strerror(errno), "");
 652             str2.append((UChar32) 0);
 653             initMsg(pname);
 654             u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
 655             return FALSE;
 656         }
 657     } else {
 658         infilestr = "-";
 659         infile = stdin;
 660 #ifdef USE_FILENO_BINARY_MODE
 661         if (setmode(fileno(stdin), O_BINARY) == -1) {
 662             initMsg(pname);
 663             u_wmsg(stderr, "cantSetInBinMode");
 664             return FALSE;
 665         }
 666 #endif
 667     }
 668
 669     if (verbose) {
 670         fprintf(stderr, "%s:\n", infilestr);
 671     }
 672
 673 #if !UCONFIG_NO_TRANSLITERATION
 674     // Create transliterator as needed.
 675
 676     if (translit != NULL && *translit) {
 677         UParseError parse;
 678         UnicodeString str(translit), pestr;
 679
 680         /* Create from rules or by ID as needed. */
 681
 682         parse.line = -1;
 683
 684         if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
 685             t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err);
 686         } else {
 687             t = Transliterator::createInstance(translit, UTRANS_FORWARD, err);
 688         }
 689
 690         if (U_FAILURE(err)) {
 691             str.append((UChar32) 0);
 692             initMsg(pname);
 693
 694             if (parse.line >= 0) {
 695                 UChar linebuf[20], offsetbuf[20];
 696                 uprv_itou(linebuf, 20, parse.line, 10, 0);
 697                 uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
 698                 u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(),
 699                     u_wmsg_errorName(err), linebuf, offsetbuf);
 700             } else {
 701                 u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(),
 702                     u_wmsg_errorName(err));
 703             }
 704
 705             if (t) {
 706                 delete t;
 707                 t = 0;
 708             }
 709             goto error_exit;
 710         }
 711
 712         useOffsets = FALSE;
 713     }
 714 #endif
 715
 716     // Create codepage converter. If the codepage or its aliases weren't
 717     // available, it returns NULL and a failure code. We also set the
 718     // callbacks, and return errors in the same way.
 719
 720     convfrom = ucnv_open(fromcpage, &err);
 721     if (U_FAILURE(err)) {
 722         UnicodeString str(fromcpage, "");
 723         initMsg(pname);
 724         u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(),
 725             u_wmsg_errorName(err));
 726         goto error_exit;
 727     }
 728     ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
 729     if (U_FAILURE(err)) {
 730         initMsg(pname);
 731         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
 732         goto error_exit;
 733     }
 734
 735     convto = ucnv_open(tocpage, &err);
 736     if (U_FAILURE(err)) {
 737         UnicodeString str(tocpage, "");
 738         initMsg(pname);
 739         u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(),
 740             u_wmsg_errorName(err));
 741         goto error_exit;
 742     }
 743     ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
 744     if (U_FAILURE(err)) {
 745         initMsg(pname);
 746         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
 747         goto error_exit;
 748     }
 749     ucnv_setFallback(convto, fallback);
 750
 751     UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode;
 752     int8_t sig;
 753
 754     // OK, we can convert now.
 755     sig = signature;
 756     rd = 0;
 757
 758     do {
 759         willexit = FALSE;
 760
 761         // input file offset at the beginning of the next buffer
 762         infoffset += rd;
 763
 764         rd = fread(buf, 1, bufsz, infile);
 765         if (ferror(infile) != 0) {
 766             UnicodeString str(strerror(errno));
 767             initMsg(pname);
 768             u_wmsg(stderr, "cantRead", str.getTerminatedBuffer());
 769             goto error_exit;
 770         }
 771
 772         // Convert the read buffer into the new encoding via Unicode.
 773         // After the call 'unibufp' will be placed behind the last
 774         // character that was converted in the 'unibuf'.
 775         // Also the 'cbufp' is positioned behind the last converted
 776         // character.
 777         // At the last conversion in the file, flush should be set to
 778         // true so that we get all characters converted.
 779         //
 780         // The converter must be flushed at the end of conversion so
 781         // that characters on hold also will be written.
 782
 783         cbufp = buf;
 784         flush = (UBool)(rd != bufsz);
 785
 786         // convert until the input is consumed
 787         do {
 788             // remember the start of the current byte-to-Unicode conversion
 789             prevbufp = cbufp;
 790
 791             unibuf = unibufp = u.getBuffer((int32_t)bufsz);
 792
 793             // Use bufsz instead of u.getCapacity() for the targetLimit
 794             // so that we don't overflow fromoffsets[].
 795             ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp,
 796                 buf + rd, useOffsets ? fromoffsets : NULL, flush, &err);
 797
 798             ulen = (int32_t)(unibufp - unibuf);
 799             u.releaseBuffer(U_SUCCESS(err) ? ulen : 0);
 800
 801             // fromSawEndOfBytes indicates that ucnv_toUnicode() is done
 802             // converting all of the input bytes.
 803             // It works like this because ucnv_toUnicode() returns only under the
 804             // following conditions:
 805             // - an error occurred during conversion (an error code is set)
 806             // - the target buffer is filled (the error code indicates an overflow)
 807             // - the source is consumed
 808             // That is, if the error code does not indicate a failure,
 809             // not even an overflow, then the source must be consumed entirely.
 810             fromSawEndOfBytes = (UBool)U_SUCCESS(err);
 811
 812             if (err == U_BUFFER_OVERFLOW_ERROR) {
 813                 err = U_ZERO_ERROR;
 814             } else if (U_FAILURE(err)) {
 815                 char pos[32], errorBytes[32];
 816                 int8_t i, length, errorLength;
 817
 818                 UErrorCode localError = U_ZERO_ERROR;
 819                 errorLength = (int8_t)sizeof(errorBytes);
 820                 ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError);
 821                 if (U_FAILURE(localError) || errorLength == 0) {
 822                     errorLength = 1;
 823                 }
 824
 825                 // print the input file offset of the start of the error bytes:
 826                 // input file offset of the current byte buffer +
 827                 // length of the just consumed bytes -
 828                 // length of the error bytes
 829                 length =
 830                     (int8_t)sprintf(pos, "%d",
 831                         (int)(infoffset + (cbufp - buf) - errorLength));
 832
 833                 // output the bytes that caused the error
 834                 UnicodeString str;
 835                 for (i = 0; i < errorLength; ++i) {
 836                     if (i > 0) {
 837                         str.append((UChar)uSP);
 838                     }
 839                     str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4));
 840                     str.append(nibbleToHex((uint8_t)errorBytes[i]));
 841                 }
 842
 843                 initMsg(pname);
 844                 u_wmsg(stderr, "problemCvtToU",
 845                         UnicodeString(pos, length, "").getTerminatedBuffer(),
 846                         str.getTerminatedBuffer(),
 847                         u_wmsg_errorName(err));
 848
 849                 willexit = TRUE;
 850                 err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
 851             }
 852
 853             // Replaced a check for whether the input was consumed by
 854             // looping until it is; message key "premEndInput" now obsolete.
 855
 856             if (ulen == 0) {
 857                 continue;
 858             }
 859
 860             // remove a U+FEFF Unicode signature character if requested
 861             if (sig < 0) {
 862                 if (u.charAt(0) == uSig) {
 863                     u.remove(0, 1);
 864
 865                     // account for the removed UChar and offset
 866                     --ulen;
 867
 868                     if (useOffsets) {
 869                         // remove an offset from fromoffsets[] as well
 870                         // to keep the array parallel with the UChars
 871                         memmove(fromoffsets, fromoffsets + 1, ulen * 4);
 872                     }
 873
 874                 }
 875                 sig = 0;
 876             }
 877
 878 #if !UCONFIG_NO_TRANSLITERATION
 879             // Transliterate/transform if needed.
 880
 881             // For transformation, we use chunking code -
 882             // collect Unicode input until, for example, an end-of-line,
 883             // then transform and output-convert that and continue collecting.
 884             // This makes the transformation result independent of the buffer size
 885             // while avoiding the slower keyboard mode.
 886             // The end-of-chunk characters are completely included in the
 887             // transformed string in case they are to be transformed themselves.
 888             if (t != NULL) {
 889                 UnicodeString out;
 890                 int32_t chunkLimit;
 891
 892                 do {
 893                     chunkLimit = getChunkLimit(chunk, u);
 894                     if (chunkLimit < 0 && flush && fromSawEndOfBytes) {
 895                         // use all of the rest at the end of the text
 896                         chunkLimit = u.length();
 897                     }
 898                     if (chunkLimit >= 0) {
 899                         // complete the chunk and transform it
 900                         chunk.append(u, 0, chunkLimit);
 901                         u.remove(0, chunkLimit);
 902                         t->transliterate(chunk);
 903
 904                         // append the transformation result to the result and empty the chunk
 905                         out.append(chunk);
 906                         chunk.remove();
 907                     } else {
 908                         // continue collecting the chunk
 909                         chunk.append(u);
 910                         break;
 911                     }
 912                 } while (!u.isEmpty());
 913
 914                 u = out;
 915                 ulen = u.length();
 916             }
 917 #endif
 918
 919             // add a U+FEFF Unicode signature character if requested
 920             // and possible/necessary
 921             if (sig > 0) {
 922                 if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) {
 923                     u.insert(0, (UChar)uSig);
 924
 925                     if (useOffsets) {
 926                         // insert a pseudo-offset into fromoffsets[] as well
 927                         // to keep the array parallel with the UChars
 928                         memmove(fromoffsets + 1, fromoffsets, ulen * 4);
 929                         fromoffsets[0] = -1;
 930                     }
 931
 932                     // account for the additional UChar and offset
 933                     ++ulen;
 934                 }
 935                 sig = 0;
 936             }
 937
 938             // Convert the Unicode buffer into the destination codepage
 939             // Again 'bufp' will be placed behind the last converted character
 940             // And 'unibufp' will be placed behind the last converted unicode character
 941             // At the last conversion flush should be set to true to ensure that
 942             // all characters left get converted
 943
 944             unibuf = unibufbp = u.getBuffer();
 945
 946             do {
 947                 bufp = outbuf;
 948
 949                 // Use fromSawEndOfBytes in addition to the flush flag -
 950                 // it indicates whether the intermediate Unicode string
 951                 // contains the very last UChars for the very last input bytes.
 952                 ucnv_fromUnicode(convto, &bufp, outbuf + bufsz,
 953                                  &unibufbp,
 954                                  unibuf + ulen,
 955                                  NULL, (UBool)(flush && fromSawEndOfBytes), &err);
 956
 957                 // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done
 958                 // converting all of the intermediate UChars.
 959                 // See comment for fromSawEndOfBytes.
 960                 toSawEndOfUnicode = (UBool)U_SUCCESS(err);
 961
 962                 if (err == U_BUFFER_OVERFLOW_ERROR) {
 963                     err = U_ZERO_ERROR;
 964                 } else if (U_FAILURE(err)) {
 965                     UChar errorUChars[4];
 966                     const char *errtag;
 967                     char pos[32];
 968                     UChar32 c;
 969                     int8_t i, length, errorLength;
 970
 971                     UErrorCode localError = U_ZERO_ERROR;
 972                     errorLength = (int8_t)LENGTHOF(errorUChars);
 973                     ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError);
 974                     if (U_FAILURE(localError) || errorLength == 0) {
 975                         // need at least 1 so that we don't access beyond the length of fromoffsets[]
 976                         errorLength = 1;
 977                     }
 978
 979                     int32_t ferroffset;
 980
 981                     if (useOffsets) {
 982                         // Unicode buffer offset of the start of the error UChars
 983                         ferroffset = (int32_t)((unibufbp - unibuf) - errorLength);
 984                         if (ferroffset < 0) {
 985                             // approximation - the character started in the previous Unicode buffer
 986                             ferroffset = 0;
 987                         }
 988
 989                         // get the corresponding byte offset out of fromoffsets[]
 990                         // go back if the offset is not known for some of the UChars
 991                         int32_t fromoffset;
 992                         do {
 993                             fromoffset = fromoffsets[ferroffset];
 994                         } while (fromoffset < 0 && --ferroffset >= 0);
 995
 996                         // total input file offset =
 997                         // input file offset of the current byte buffer +
 998                         // byte buffer offset of where the current Unicode buffer is converted from +
 999                         // fromoffsets[Unicode offset]
1000                         ferroffset = infoffset + (prevbufp - buf) + fromoffset;
1001                         errtag = "problemCvtFromU";
1002                     } else {
1003                         // Do not use fromoffsets if (t != NULL) because the Unicode text may
1004                         // be different from what the offsets refer to.
1005
1006                         // output file offset
1007                         ferroffset = (int32_t)(outfoffset + (bufp - outbuf));
1008                         errtag = "problemCvtFromUOut";
1009                     }
1010
1011                     length = (int8_t)sprintf(pos, "%u", (int)ferroffset);
1012
1013                     // output the code points that caused the error
1014                     UnicodeString str;
1015                     for (i = 0; i < errorLength;) {
1016                         if (i > 0) {
1017                             str.append((UChar)uSP);
1018                         }
1019                         U16_NEXT(errorUChars, i, errorLength, c);
1020                         if (c >= 0x100000) {
1021                             str.append(nibbleToHex((uint8_t)(c >> 20)));
1022                         }
1023                         if (c >= 0x10000) {
1024                             str.append(nibbleToHex((uint8_t)(c >> 16)));
1025                         }
1026                         str.append(nibbleToHex((uint8_t)(c >> 12)));
1027                         str.append(nibbleToHex((uint8_t)(c >> 8)));
1028                         str.append(nibbleToHex((uint8_t)(c >> 4)));
1029                         str.append(nibbleToHex((uint8_t)c));
1030                     }
1031
1032                     initMsg(pname);
1033                     u_wmsg(stderr, errtag,
1034                             UnicodeString(pos, length, "").getTerminatedBuffer(),
1035                             str.getTerminatedBuffer(),
1036                            u_wmsg_errorName(err));
1037                     u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer());
1038
1039                     willexit = TRUE;
1040                     err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
1041                 }
1042
1043                 // Replaced a check for whether the intermediate Unicode characters were all consumed by
1044                 // looping until they are; message key "premEnd" now obsolete.
1045
1046                 // Finally, write the converted buffer to the output file
1047                 size_t outlen = (size_t) (bufp - outbuf);
1048                 outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile));
1049                 if (wr != outlen) {
1050                     UnicodeString str(strerror(errno));
1051                     initMsg(pname);
1052                     u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer());
1053                     willexit = TRUE;
1054                 }
1055
1056                 if (willexit) {
1057                     goto error_exit;
1058                 }
1059             } while (!toSawEndOfUnicode);
1060         } while (!fromSawEndOfBytes);
1061     } while (!flush);           // Stop when we have flushed the
1062                                 // converters (this means that it's
1063                                 // the end of output)
1064
1065     goto normal_exit;
1066
1067 error_exit:
1068     ret = FALSE;
1069
1070 normal_exit:
1071     // Cleanup.
1072
1073     ucnv_close(convfrom);
1074     ucnv_close(convto);
1075
1076 #if !UCONFIG_NO_TRANSLITERATION
1077     delete t;
1078 #endif
1079
1080     if (infile != stdin) {
1081         fclose(infile);
1082     }
1083
1084     return ret;
1085 }
1086
1087 static void usage(const char *pname, int ecode) {
1088     const UChar *msg;
1089     int32_t msgLen;
1090     UErrorCode err = U_ZERO_ERROR;
1091     FILE *fp = ecode ? stderr : stdout;
1092     int res;
1093
1094     initMsg(pname);
1095     msg =
1096         ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord",
1097                             &msgLen, &err);
1098     UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1));
1099     UnicodeString mname(msg, msgLen + 1);
1100
1101     res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer());
1102     if (!ecode) {
1103         if (!res) {
1104             fputc('\n', fp);
1105         }
1106         if (!u_wmsg(fp, "help")) {
1107             /* Now dump callbacks and finish. */
1108
1109             int i, count =
1110                 sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
1111             for (i = 0; i < count; ++i) {
1112                 fprintf(fp, " %s", transcode_callbacks[i].name);
1113             }
1114             fputc('\n', fp);
1115         }
1116     }
1117
1118     exit(ecode);
1119 }
1120
1121 extern int
1122 main(int argc, char **argv)
1123 {
1124     FILE *outfile;
1125     int ret = 0;
1126
1127     size_t bufsz = DEFAULT_BUFSZ;
1128
1129     const char *fromcpage = 0;
1130     const char *tocpage = 0;
1131     const char *translit = 0;
1132     const char *outfilestr = 0;
1133     UBool fallback = FALSE;
1134
1135     UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
1136     const void *fromuctxt = 0;
1137     UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
1138     const void *touctxt = 0;
1139
1140     char **iter, **remainArgv, **remainArgvLimit;
1141     char **end = argv + argc;
1142
1143     const char *pname;
1144
1145     UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE;
1146     const char *printName = 0;
1147
1148     UBool verbose = FALSE;
1149     UErrorCode status = U_ZERO_ERROR;
1150
1151     ConvertFile cf;
1152
1153     /* Initialize ICU */
1154     u_init(&status);
1155     if (U_FAILURE(status)) {
1156         fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
1157             argv[0], u_errorName(status));
1158         exit(1);
1159     }
1160
1161     // Get and prettify pname.
1162     pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
1163 #ifdef U_WINDOWS
1164     if (!pname) {
1165         pname = uprv_strrchr(*argv, '/');
1166     }
1167 #endif
1168     if (!pname) {
1169         pname = *argv;
1170     } else {
1171         ++pname;
1172     }
1173
1174     // First, get the arguments from command-line
1175     // to know the codepages to convert between
1176
1177     remainArgv = remainArgvLimit = argv + 1;
1178     for (iter = argv + 1; iter != end; iter++) {
1179         // Check for from charset
1180         if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
1181             iter++;
1182             if (iter != end)
1183                 fromcpage = *iter;
1184             else
1185                 usage(pname, 1);
1186         } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
1187             iter++;
1188             if (iter != end)
1189                 tocpage = *iter;
1190             else
1191                 usage(pname, 1);
1192         } else if (strcmp("-x", *iter) == 0) {
1193             iter++;
1194             if (iter != end)
1195                 translit = *iter;
1196             else
1197                 usage(pname, 1);
1198         } else if (!strcmp("--fallback", *iter)) {
1199             fallback = TRUE;
1200         } else if (!strcmp("--no-fallback", *iter)) {
1201             fallback = FALSE;
1202         } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
1203             iter++;
1204             if (iter != end) {
1205                 bufsz = atoi(*iter);
1206                 if ((int) bufsz <= 0) {
1207                     initMsg(pname);
1208                     UnicodeString str(*iter);
1209                     initMsg(pname);
1210                     u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer());
1211                     return 3;
1212                 }
1213             } else {
1214                 usage(pname, 1);
1215             }
1216         } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
1217             if (printTranslits) {
1218                 usage(pname, 1);
1219             }
1220             printConvs = TRUE;
1221         } else if (strcmp("--default-code", *iter) == 0) {
1222             if (printTranslits) {
1223                 usage(pname, 1);
1224             }
1225             printName = ucnv_getDefaultName();
1226         } else if (strcmp("--list-code", *iter) == 0) {
1227             if (printTranslits) {
1228                 usage(pname, 1);
1229             }
1230
1231             iter++;
1232             if (iter != end) {
1233                 UErrorCode e = U_ZERO_ERROR;
1234                 printName = ucnv_getAlias(*iter, 0, &e);
1235                 if (U_FAILURE(e) || !printName) {
1236                     UnicodeString str(*iter);
1237                     initMsg(pname);
1238                     u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer());
1239                     return 2;
1240                 }
1241             } else
1242                 usage(pname, 1);
1243         } else if (strcmp("--canon", *iter) == 0) {
1244             printCanon = TRUE;
1245         } else if (strcmp("-L", *iter) == 0
1246             || !strcmp("--list-transliterators", *iter)) {
1247             if (printConvs) {
1248                 usage(pname, 1);
1249             }
1250             printTranslits = TRUE;
1251         } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
1252             || !strcmp("--help", *iter)) {
1253             usage(pname, 0);
1254         } else if (!strcmp("-c", *iter)) {
1255             fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
1256         } else if (!strcmp("--to-callback", *iter)) {
1257             iter++;
1258             if (iter != end) {
1259                 const struct callback_ent *cbe = findCallback(*iter);
1260                 if (cbe) {
1261                     fromucallback = cbe->fromu;
1262                     fromuctxt = cbe->fromuctxt;
1263                 } else {
1264                     UnicodeString str(*iter);
1265                     initMsg(pname);
1266                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1267                     return 4;
1268                 }
1269             } else {
1270                 usage(pname, 1);
1271             }
1272         } else if (!strcmp("--from-callback", *iter)) {
1273             iter++;
1274             if (iter != end) {
1275                 const struct callback_ent *cbe = findCallback(*iter);
1276                 if (cbe) {
1277                     toucallback = cbe->tou;
1278                     touctxt = cbe->touctxt;
1279                 } else {
1280                     UnicodeString str(*iter);
1281                     initMsg(pname);
1282                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1283                     return 4;
1284                 }
1285             } else {
1286                 usage(pname, 1);
1287             }
1288         } else if (!strcmp("-i", *iter)) {
1289             toucallback = UCNV_TO_U_CALLBACK_SKIP;
1290         } else if (!strcmp("--callback", *iter)) {
1291             iter++;
1292             if (iter != end) {
1293                 const struct callback_ent *cbe = findCallback(*iter);
1294                 if (cbe) {
1295                     fromucallback = cbe->fromu;
1296                     fromuctxt = cbe->fromuctxt;
1297                     toucallback = cbe->tou;
1298                     touctxt = cbe->touctxt;
1299                 } else {
1300                     UnicodeString str(*iter);
1301                     initMsg(pname);
1302                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1303                     return 4;
1304                 }
1305             } else {
1306                 usage(pname, 1);
1307             }
1308         } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
1309             verbose = FALSE;
1310         } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
1311             verbose = TRUE;
1312         } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
1313             printf("%s v2.1  ICU " U_ICU_VERSION "\n", pname);
1314             return 0;
1315         } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
1316             ++iter;
1317             if (iter != end && !outfilestr) {
1318                 outfilestr = *iter;
1319             } else {
1320                 usage(pname, 1);
1321             }
1322         } else if (0 == strcmp("--add-signature", *iter)) {
1323             cf.signature = 1;
1324         } else if (0 == strcmp("--remove-signature", *iter)) {
1325             cf.signature = -1;
1326         } else if (**iter == '-' && (*iter)[1]) {
1327             usage(pname, 1);
1328         } else {
1329             // move a non-option up in argv[]
1330             *remainArgvLimit++ = *iter;
1331         }
1332     }
1333
1334     if (printConvs || printName) {
1335         return printConverters(pname, printName, printCanon) ? 2 : 0;
1336     } else if (printTranslits) {
1337         return printTransliterators(printCanon) ? 3 : 0;
1338     }
1339
1340     if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
1341         fromcpage = ucnv_getDefaultName();
1342     }
1343     if (!tocpage || !uprv_strcmp(tocpage, "-")) {
1344         tocpage = ucnv_getDefaultName();
1345     }
1346
1347     // Open the correct output file or connect to stdout for reading input
1348     if (outfilestr != 0 && strcmp(outfilestr, "-")) {
1349         outfile = fopen(outfilestr, "wb");
1350         if (outfile == 0) {
1351             UnicodeString str1(outfilestr, "");
1352             UnicodeString str2(strerror(errno), "");
1353             initMsg(pname);
1354             u_wmsg(stderr, "cantCreateOutputF",
1355                 str1.getBuffer(), str2.getBuffer());
1356             return 1;
1357         }
1358     } else {
1359         outfilestr = "-";
1360         outfile = stdout;
1361 #ifdef USE_FILENO_BINARY_MODE
1362         if (setmode(fileno(outfile), O_BINARY) == -1) {
1363             u_wmsg(stderr, "cantSetOutBinMode");
1364             exit(-1);
1365         }
1366 #endif
1367     }
1368
1369     /* Loop again on the arguments to find all the input files, and
1370     convert them. */
1371
1372     cf.setBufferSize(bufsz);
1373
1374     if(remainArgv < remainArgvLimit) {
1375         for (iter = remainArgv; iter != remainArgvLimit; iter++) {
1376             if (!cf.convertFile(
1377                     pname, fromcpage, toucallback, touctxt, tocpage,
1378                     fromucallback, fromuctxt, fallback, translit, *iter,
1379                     outfile, verbose)
1380             ) {
1381                 goto error_exit;
1382             }
1383         }
1384     } else {
1385         if (!cf.convertFile(
1386                 pname, fromcpage, toucallback, touctxt, tocpage,
1387                 fromucallback, fromuctxt, fallback, translit, 0,
1388                 outfile, verbose)
1389         ) {
1390             goto error_exit;
1391         }
1392     }
1393
1394     goto normal_exit;
1395 error_exit:
1396     ret = 1;
1397 normal_exit:
1398
1399     if (outfile != stdout) {
1400         fclose(outfile);
1401     }
1402
1403     return ret;
1404 }
1405
1406
1407 /*
1408  * Hey, Emacs, please set the following:
1409  *
1410  * Local Variables:
1411  * indent-tabs-mode: nil
1412  * End:
1413  *
1414  */