icuSources/extra/uconv/uconv.cpp

   1 /*****************************************************************************
   2 *
   3 *   Copyright (C) 1999-2006, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 *
   6 ******************************************************************************/
   7
   8 /*
   9  * uconv(1): an iconv(1)-like converter using ICU.
  10  *
  11  * Original code by Jonas Utterstr&#x00F6;m <jonas.utterstrom@vittran.norrnod.se>
  12  * contributed in 1999.
  13  *
  14  * Conversion to the C conversion API and many improvements by
  15  * Yves Arrouye <yves@realnames.com>, current maintainer.
  16  *
  17  * Markus Scherer maintainer from 2003.
  18  * See source code repository history for changes.
  19  */
  20
  21 #include <unicode/utypes.h>
  22 #include <unicode/putil.h>
  23 #include <unicode/ucnv.h>
  24 #include <unicode/uenum.h>
  25 #include <unicode/unistr.h>
  26 #include <unicode/translit.h>
  27 #include <unicode/uset.h>
  28 #include <unicode/uclean.h>
  29
  30 #include <stdio.h>
  31 #include <errno.h>
  32 #include <string.h>
  33 #include <stdlib.h>
  34
  35 #include "cmemory.h"
  36 #include "cstring.h"
  37 #include "ustrfmt.h"
  38
  39 #include "unicode/uwmsg.h"
  40
  41 #if (defined(U_WINDOWS) || defined(U_CYGWIN)) && !defined(__STRICT_ANSI__)
  42 #include <io.h>
  43 #include <fcntl.h>
  44 #if defined(U_WINDOWS)
  45 #define USE_FILENO_BINARY_MODE 1
  46 /* Windows likes to rename Unix-like functions */
  47 #ifndef fileno
  48 #define fileno _fileno
  49 #endif
  50 #ifndef setmode
  51 #define setmode _setmode
  52 #endif
  53 #ifndef O_BINARY
  54 #define O_BINARY _O_BINARY
  55 #endif
  56 #endif
  57 #endif
  58
  59 #ifdef UCONVMSG_LINK
  60 /* below from the README */
  61 #include "unicode/utypes.h"
  62 #include "unicode/udata.h"
  63 U_CFUNC char uconvmsg_dat[];
  64 #endif
  65
  66 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
  67
  68 #define DEFAULT_BUFSZ   4096
  69 #define UCONVMSG "uconvmsg"
  70
  71 static UResourceBundle *gBundle = 0;    /* Bundle containing messages. */
  72
  73 /*
  74  * Initialize the message bundle so that message strings can be fetched
  75  * by u_wmsg().
  76  *
  77  */
  78
  79 static void initMsg(const char *pname) {
  80     static int ps = 0;
  81
  82     if (!ps) {
  83         char dataPath[2048];        /* XXX Sloppy: should be PATH_MAX. */
  84         UErrorCode err = U_ZERO_ERROR;
  85
  86         ps = 1;
  87
  88         /* Set up our static data - if any */
  89 #ifdef UCONVMSG_LINK
  90         udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err);
  91         if (U_FAILURE(err)) {
  92           fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n",
  93                   pname, u_errorName(err));
  94           err = U_ZERO_ERROR; /* It may still fail */
  95         }
  96 #endif
  97
  98         /* Get messages. */
  99         gBundle = u_wmsg_setPath(UCONVMSG, &err);
 100         if (U_FAILURE(err)) {
 101             fprintf(stderr,
 102                     "%s: warning: couldn't open bundle %s: %s\n",
 103                     pname, UCONVMSG, u_errorName(err));
 104 #ifdef UCONVMSG_LINK
 105             fprintf(stderr,
 106                     "%s: setAppData was called, internal data %s failed to load\n",
 107                         pname, UCONVMSG);
 108 #endif
 109
 110             err = U_ZERO_ERROR;
 111             /* that was try #1, try again with a path */
 112             uprv_strcpy(dataPath, u_getDataDirectory());
 113             uprv_strcat(dataPath, U_FILE_SEP_STRING);
 114             uprv_strcat(dataPath, UCONVMSG);
 115
 116             gBundle = u_wmsg_setPath(dataPath, &err);
 117             if (U_FAILURE(err)) {
 118                 fprintf(stderr,
 119                     "%s: warning: still couldn't open bundle %s: %s\n",
 120                     pname, dataPath, u_errorName(err));
 121                 fprintf(stderr, "%s: warning: messages will not be displayed\n", pname);
 122             }
 123         }
 124     }
 125 }
 126
 127 /* Mapping of callback names to the callbacks passed to the converter
 128    API. */
 129
 130 static struct callback_ent {
 131     const char *name;
 132     UConverterFromUCallback fromu;
 133     const void *fromuctxt;
 134     UConverterToUCallback tou;
 135     const void *touctxt;
 136 } transcode_callbacks[] = {
 137     { "substitute",
 138       UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
 139       UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 },
 140     { "skip",
 141       UCNV_FROM_U_CALLBACK_SKIP, 0,
 142       UCNV_TO_U_CALLBACK_SKIP, 0 },
 143     { "stop",
 144       UCNV_FROM_U_CALLBACK_STOP, 0,
 145       UCNV_TO_U_CALLBACK_STOP, 0 },
 146     { "escape",
 147       UCNV_FROM_U_CALLBACK_ESCAPE, 0,
 148       UCNV_TO_U_CALLBACK_ESCAPE, 0},
 149     { "escape-icu",
 150       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU,
 151       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU },
 152     { "escape-java",
 153       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA,
 154       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA },
 155     { "escape-c",
 156       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C,
 157       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
 158     { "escape-xml",
 159       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
 160       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
 161     { "escape-xml-hex",
 162       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
 163       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
 164     { "escape-xml-dec",
 165       UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
 166       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
 167     { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE,
 168       UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE }
 169 };
 170
 171 /* Return a pointer to a callback record given its name. */
 172
 173 static const struct callback_ent *findCallback(const char *name) {
 174     int i, count =
 175         sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
 176
 177     /* We'll do a linear search, there aren't many of them and bsearch()
 178        may not be that portable. */
 179
 180     for (i = 0; i < count; ++i) {
 181         if (!uprv_stricmp(name, transcode_callbacks[i].name)) {
 182             return &transcode_callbacks[i];
 183         }
 184     }
 185
 186     return 0;
 187 }
 188
 189 /* Print converter information. If lookfor is set, only that converter will
 190    be printed, otherwise all converters will be printed. If canon is non
 191    zero, tags and aliases for each converter are printed too, in the format
 192    expected for convrters.txt(5). */
 193
 194 static int printConverters(const char *pname, const char *lookfor,
 195     UBool canon)
 196 {
 197     UErrorCode err = U_ZERO_ERROR;
 198     int32_t num;
 199     uint16_t num_stds;
 200     const char **stds;
 201
 202     /* If there is a specified name, just handle that now. */
 203
 204     if (lookfor) {
 205         if (!canon) {
 206             printf("%s\n", lookfor);
 207             return 0;
 208         } else {
 209         /*  Because we are printing a canonical name, we need the
 210             true converter name. We've done that already except for
 211             the default name (because we want to print the exact
 212             name one would get when calling ucnv_getDefaultName()
 213             in non-canon mode). But since we do not know at this
 214             point if we have the default name or something else, we
 215             need to normalize again to the canonical converter
 216             name. */
 217
 218             const char *truename = ucnv_getAlias(lookfor, 0, &err);
 219             if (U_SUCCESS(err)) {
 220                 lookfor = truename;
 221             } else {
 222                 err = U_ZERO_ERROR;
 223             }
 224         }
 225     }
 226
 227     /* Print converter names. We come here for one of two reasons: we
 228        are printing all the names (lookfor was null), or we have a
 229        single converter to print but in canon mode, hence we need to
 230        get to it in order to print everything. */
 231
 232     num = ucnv_countAvailable();
 233     if (num <= 0) {
 234         initMsg(pname);
 235         u_wmsg(stderr, "cantGetNames");
 236         return -1;
 237     }
 238     if (lookfor) {
 239         num = 1;                /* We know where we want to be. */
 240     }
 241
 242     num_stds = ucnv_countStandards();
 243     stds = (const char **) uprv_malloc(num_stds * sizeof(*stds));
 244     if (!stds) {
 245         u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR));
 246         return -1;
 247     } else {
 248         uint16_t s;
 249
 250         if (canon) {
 251             printf("{ ");
 252         }
 253         for (s = 0; s < num_stds; ++s) {
 254             stds[s] = ucnv_getStandard(s, &err);
 255             if (canon) {
 256                 printf("%s ", stds[s]);
 257             }
 258             if (U_FAILURE(err)) {
 259                 u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err));
 260                 return -1;
 261             }
 262         }
 263         if (canon) {
 264             puts("}");
 265         }
 266     }
 267
 268     for (int32_t i = 0; i < num; i++) {
 269         const char *name;
 270         uint16_t num_aliases;
 271
 272         /* Set the name either to what we are looking for, or
 273         to the current converter name. */
 274
 275         if (lookfor) {
 276             name = lookfor;
 277         } else {
 278             name = ucnv_getAvailableName(i);
 279         }
 280
 281         /* Get all the aliases associated to the name. */
 282
 283         err = U_ZERO_ERROR;
 284         num_aliases = ucnv_countAliases(name, &err);
 285         if (U_FAILURE(err)) {
 286             printf("%s", name);
 287
 288             UnicodeString str(name, "");
 289             putchar('\t');
 290             u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
 291                 u_wmsg_errorName(err));
 292             return -1;
 293         } else {
 294             uint16_t a, s, t;
 295
 296             /* Write all the aliases and their tags. */
 297
 298             for (a = 0; a < num_aliases; ++a) {
 299                 const char *alias = ucnv_getAlias(name, a, &err);
 300
 301                 if (U_FAILURE(err)) {
 302                     UnicodeString str(name, "");
 303                     putchar('\t');
 304                     u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
 305                         u_wmsg_errorName(err));
 306                     return -1;
 307                 }
 308
 309                 /* Print the current alias so that it looks right. */
 310                 printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") ,
 311                                  alias,
 312                                  (canon ? "" : " "));
 313
 314                 /* Look (slowly, linear searching) for a tag. */
 315
 316                 if (canon) {
 317                     /* -1 to skip the last standard */
 318                     for (s = t = 0; s < num_stds-1; ++s) {
 319                         UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err);
 320                         if (U_SUCCESS(err)) {
 321                             /* List the standard tags */
 322                             const char *standardName;
 323                             UBool isFirst = TRUE;
 324                             UErrorCode enumError = U_ZERO_ERROR;
 325                             while ((standardName = uenum_next(nameEnum, NULL, &enumError))) {
 326                                 /* See if this alias is supported by this standard. */
 327                                 if (!strcmp(standardName, alias)) {
 328                                     if (!t) {
 329                                         printf(" {");
 330                                         t = 1;
 331                                     }
 332                                     /* Print a * after the default standard name */
 333                                     printf(" %s%s", stds[s], (isFirst ? "*" : ""));
 334                                 }
 335                                 isFirst = FALSE;
 336                             }
 337                         }
 338                     }
 339                     if (t) {
 340                         printf(" }");
 341                     }
 342                 }
 343                 /* Terminate this entry. */
 344                 if (canon) {
 345                     puts("");
 346                 }
 347
 348                 /* Move on. */
 349             }
 350             /* Terminate this entry. */
 351             if (!canon) {
 352                 puts("");
 353             }
 354         }
 355     }
 356
 357     /* Free temporary data. */
 358
 359     uprv_free(stds);
 360
 361     /* Success. */
 362
 363     return 0;
 364 }
 365
 366 /* Print all available transliterators. If canon is non zero, print
 367    one transliterator per line. */
 368
 369 static int printTransliterators(UBool canon)
 370 {
 371 #if UCONFIG_NO_TRANSLITERATION
 372     printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n");
 373     return 1;
 374 #else
 375     int32_t numtrans = utrans_countAvailableIDs(), i;
 376     int buflen = 512;
 377     char *buf = (char *) uprv_malloc(buflen);
 378     char staticbuf[512];
 379
 380     char sepchar = canon ? '\n' : ' ';
 381
 382     if (!buf) {
 383         buf = staticbuf;
 384         buflen = sizeof(staticbuf);
 385     }
 386
 387     for (i = 0; i < numtrans; ++i) {
 388         int32_t len = utrans_getAvailableID(i, buf, buflen);
 389         if (len >= buflen - 1) {
 390             if (buf != staticbuf) {
 391                 buflen <<= 1;
 392                 if (buflen < len) {
 393                     buflen = len + 64;
 394                 }
 395                 buf = (char *) uprv_realloc(buf, buflen);
 396                 if (!buf) {
 397                     buf = staticbuf;
 398                     buflen = sizeof(staticbuf);
 399                 }
 400             }
 401             utrans_getAvailableID(i, buf, buflen);
 402             if (len >= buflen) {
 403                 uprv_strcpy(buf + buflen - 4, "..."); /* Truncate the name. */
 404             }
 405         }
 406
 407         printf("%s", buf);
 408         if (i < numtrans - 1) {
 409             putchar(sepchar);
 410         }
 411     }
 412
 413     /* Add a terminating newline if needed. */
 414
 415     if (sepchar != '\n') {
 416         putchar('\n');
 417     }
 418
 419     /* Free temporary data. */
 420
 421     if (buf != staticbuf) {
 422         uprv_free(buf);
 423     }
 424
 425     /* Success. */
 426
 427     return 0;
 428 #endif
 429 }
 430
 431 enum {
 432     uSP = 0x20,         // space
 433     uCR = 0xd,          // carriage return
 434     uLF = 0xa,          // line feed
 435     uNL = 0x85,         // newline
 436     uLS = 0x2028,       // line separator
 437     uPS = 0x2029,       // paragraph separator
 438     uSig = 0xfeff       // signature/BOM character
 439 };
 440
 441 static inline int32_t
 442 getChunkLimit(const UnicodeString &prev, const UnicodeString &s) {
 443     // find one of
 444     // CR, LF, CRLF, NL, LS, PS
 445     // for paragraph ends (see UAX #13/Unicode 4)
 446     // and include it in the chunk
 447     // all of these characters are on the BMP
 448     // do not include FF or VT in case they are part of a paragraph
 449     // (important for bidi contexts)
 450     static const UChar paraEnds[] = {
 451         0xd, 0xa, 0x85, 0x2028, 0x2029
 452     };
 453     enum {
 454         iCR, iLF, iNL, iLS, iPS, iCount
 455     };
 456
 457     // first, see if there is a CRLF split between prev and s
 458     if (prev.endsWith(paraEnds + iCR, 1)) {
 459         if (s.startsWith(paraEnds + iLF, 1)) {
 460             return 1; // split CRLF, include the LF
 461         } else if (!s.isEmpty()) {
 462             return 0; // complete the last chunk
 463         } else {
 464             return -1; // wait for actual further contents to arrive
 465         }
 466     }
 467
 468     const UChar *u = s.getBuffer(), *limit = u + s.length();
 469     UChar c;
 470
 471     while (u < limit) {
 472         c = *u++;
 473         if (
 474             ((c < uSP) && (c == uCR || c == uLF)) ||
 475             (c == uNL) ||
 476             ((c & uLS) == uLS)
 477         ) {
 478             if (c == uCR) {
 479                 // check for CRLF
 480                 if (u == limit) {
 481                     return -1; // LF may be in the next chunk
 482                 } else if (*u == uLF) {
 483                     ++u; // include the LF in this chunk
 484                 }
 485             }
 486             return (int32_t)(u - s.getBuffer());
 487         }
 488     }
 489
 490     return -1; // continue collecting the chunk
 491 }
 492
 493 enum {
 494     CNV_NO_FEFF,    // cannot convert the U+FEFF Unicode signature character (BOM)
 495     CNV_WITH_FEFF,  // can convert the U+FEFF signature character
 496     CNV_ADDS_FEFF   // automatically adds/detects the U+FEFF signature character
 497 };
 498
 499 static inline UChar
 500 nibbleToHex(uint8_t n) {
 501     n &= 0xf;
 502     return
 503         n <= 9 ?
 504             (UChar)(0x30 + n) :
 505             (UChar)((0x61 - 10) + n);
 506 }
 507
 508 // check the converter's Unicode signature properties;
 509 // the fromUnicode side of the converter must be in its initial state
 510 // and will be reset again if it was used
 511 static int32_t
 512 cnvSigType(UConverter *cnv) {
 513     UErrorCode err;
 514     int32_t result;
 515
 516     // test if the output charset can convert U+FEFF
 517     USet *set = uset_open(1, 0);
 518     err = U_ZERO_ERROR;
 519     ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err);
 520     if (U_SUCCESS(err) && uset_contains(set, uSig)) {
 521         result = CNV_WITH_FEFF;
 522     } else {
 523         result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted
 524     }
 525     uset_close(set);
 526
 527     if (result == CNV_WITH_FEFF) {
 528         // test if the output charset emits a signature anyway
 529         const UChar a[1] = { 0x61 }; // "a"
 530         const UChar *in;
 531
 532         char buffer[20];
 533         char *out;
 534
 535         in = a;
 536         out = buffer;
 537         err = U_ZERO_ERROR;
 538         ucnv_fromUnicode(cnv,
 539             &out, buffer + sizeof(buffer),
 540             &in, a + 1,
 541             NULL, TRUE, &err);
 542         ucnv_resetFromUnicode(cnv);
 543
 544         if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) &&
 545             U_SUCCESS(err)
 546         ) {
 547             result = CNV_ADDS_FEFF;
 548         }
 549     }
 550
 551     return result;
 552 }
 553
 554 class ConvertFile {
 555 public:
 556     ConvertFile() :
 557         buf(NULL), outbuf(NULL), fromoffsets(NULL),
 558         bufsz(0), signature(0) {}
 559
 560     void
 561     setBufferSize(size_t bufferSize) {
 562         bufsz = bufferSize;
 563
 564         buf = new char[2 * bufsz];
 565         outbuf = buf + bufsz;
 566
 567         // +1 for an added U+FEFF in the intermediate Unicode buffer
 568         fromoffsets = new int32_t[bufsz + 1];
 569     }
 570
 571     ~ConvertFile() {
 572         delete [] buf;
 573         delete [] fromoffsets;
 574     }
 575
 576     UBool convertFile(const char *pname,
 577                       const char *fromcpage,
 578                       UConverterToUCallback toucallback,
 579                       const void *touctxt,
 580                       const char *tocpage,
 581                       UConverterFromUCallback fromucallback,
 582                       const void *fromuctxt,
 583                       UBool fallback,
 584                       const char *translit,
 585                       const char *infilestr,
 586                       FILE * outfile, int verbose);
 587 private:
 588     friend int main(int argc, char **argv);
 589
 590     char *buf, *outbuf;
 591     int32_t *fromoffsets;
 592
 593     size_t bufsz;
 594     int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character
 595 };
 596
 597 // Convert a file from one encoding to another
 598 UBool
 599 ConvertFile::convertFile(const char *pname,
 600                          const char *fromcpage,
 601                          UConverterToUCallback toucallback,
 602                          const void *touctxt,
 603                          const char *tocpage,
 604                          UConverterFromUCallback fromucallback,
 605                          const void *fromuctxt,
 606                          UBool fallback,
 607                          const char *translit,
 608                          const char *infilestr,
 609                          FILE * outfile, int verbose)
 610 {
 611     FILE *infile;
 612     UBool ret = TRUE;
 613     UConverter *convfrom = 0;
 614     UConverter *convto = 0;
 615     UErrorCode err = U_ZERO_ERROR;
 616     UBool flush;
 617     const char *cbufp, *prevbufp;
 618     char *bufp;
 619
 620     uint32_t infoffset = 0, outfoffset = 0;   /* Where we are in the file, for error reporting. */
 621
 622     const UChar *unibuf, *unibufbp;
 623     UChar *unibufp;
 624
 625     size_t rd, wr;
 626
 627 #if !UCONFIG_NO_TRANSLITERATION
 628     Transliterator *t = 0;      // Transliterator acting on Unicode data.
 629     UnicodeString chunk;        // One chunk of the text being collected for transformation.
 630 #endif
 631     UnicodeString u;            // String to do the transliteration.
 632     int32_t ulen;
 633
 634     // use conversion offsets for error messages
 635     // unless a transliterator is used -
 636     // a text transformation will reorder characters in unpredictable ways
 637     UBool useOffsets = TRUE;
 638
 639     // Open the correct input file or connect to stdin for reading input
 640
 641     if (infilestr != 0 && strcmp(infilestr, "-")) {
 642         infile = fopen(infilestr, "rb");
 643         if (infile == 0) {
 644             UnicodeString str1(infilestr, "");
 645             str1.append((UChar32) 0);
 646             UnicodeString str2(strerror(errno), "");
 647             str2.append((UChar32) 0);
 648             initMsg(pname);
 649             u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
 650             return FALSE;
 651         }
 652     } else {
 653         infilestr = "-";
 654         infile = stdin;
 655 #ifdef USE_FILENO_BINARY_MODE
 656         if (setmode(fileno(stdin), O_BINARY) == -1) {
 657             initMsg(pname);
 658             u_wmsg(stderr, "cantSetInBinMode");
 659             return FALSE;
 660         }
 661 #endif
 662     }
 663
 664     if (verbose) {
 665         fprintf(stderr, "%s:\n", infilestr);
 666     }
 667
 668 #if !UCONFIG_NO_TRANSLITERATION
 669     // Create transliterator as needed.
 670
 671     if (translit != NULL && *translit) {
 672         UParseError parse;
 673         UnicodeString str(translit), pestr;
 674
 675         /* Create from rules or by ID as needed. */
 676
 677         parse.line = -1;
 678
 679         if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
 680             t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err);
 681         } else {
 682             t = Transliterator::createInstance(translit, UTRANS_FORWARD, err);
 683         }
 684
 685         if (U_FAILURE(err)) {
 686             str.append((UChar32) 0);
 687             initMsg(pname);
 688
 689             if (parse.line >= 0) {
 690                 UChar linebuf[20], offsetbuf[20];
 691                 uprv_itou(linebuf, 20, parse.line, 10, 0);
 692                 uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
 693                 u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(),
 694                     u_wmsg_errorName(err), linebuf, offsetbuf);
 695             } else {
 696                 u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(),
 697                     u_wmsg_errorName(err));
 698             }
 699
 700             if (t) {
 701                 delete t;
 702                 t = 0;
 703             }
 704             goto error_exit;
 705         }
 706
 707         useOffsets = FALSE;
 708     }
 709 #endif
 710
 711     // Create codepage converter. If the codepage or its aliases weren't
 712     // available, it returns NULL and a failure code. We also set the
 713     // callbacks, and return errors in the same way.
 714
 715     convfrom = ucnv_open(fromcpage, &err);
 716     if (U_FAILURE(err)) {
 717         UnicodeString str(fromcpage, "");
 718         initMsg(pname);
 719         u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(),
 720             u_wmsg_errorName(err));
 721         goto error_exit;
 722     }
 723     ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
 724     if (U_FAILURE(err)) {
 725         initMsg(pname);
 726         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
 727         goto error_exit;
 728     }
 729
 730     convto = ucnv_open(tocpage, &err);
 731     if (U_FAILURE(err)) {
 732         UnicodeString str(tocpage, "");
 733         initMsg(pname);
 734         u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(),
 735             u_wmsg_errorName(err));
 736         goto error_exit;
 737     }
 738     ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
 739     if (U_FAILURE(err)) {
 740         initMsg(pname);
 741         u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
 742         goto error_exit;
 743     }
 744     ucnv_setFallback(convto, fallback);
 745
 746     UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode;
 747     int8_t sig;
 748
 749     // OK, we can convert now.
 750     sig = signature;
 751     rd = 0;
 752
 753     do {
 754         willexit = FALSE;
 755
 756         // input file offset at the beginning of the next buffer
 757         infoffset += rd;
 758
 759         rd = fread(buf, 1, bufsz, infile);
 760         if (ferror(infile) != 0) {
 761             UnicodeString str(strerror(errno));
 762             initMsg(pname);
 763             u_wmsg(stderr, "cantRead", str.getTerminatedBuffer());
 764             goto error_exit;
 765         }
 766
 767         // Convert the read buffer into the new encoding via Unicode.
 768         // After the call 'unibufp' will be placed behind the last
 769         // character that was converted in the 'unibuf'.
 770         // Also the 'cbufp' is positioned behind the last converted
 771         // character.
 772         // At the last conversion in the file, flush should be set to
 773         // true so that we get all characters converted.
 774         //
 775         // The converter must be flushed at the end of conversion so
 776         // that characters on hold also will be written.
 777
 778         cbufp = buf;
 779         flush = (UBool)(rd != bufsz);
 780
 781         // convert until the input is consumed
 782         do {
 783             // remember the start of the current byte-to-Unicode conversion
 784             prevbufp = cbufp;
 785
 786             unibuf = unibufp = u.getBuffer((int32_t)bufsz);
 787
 788             // Use bufsz instead of u.getCapacity() for the targetLimit
 789             // so that we don't overflow fromoffsets[].
 790             ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp,
 791                 buf + rd, useOffsets ? fromoffsets : NULL, flush, &err);
 792
 793             ulen = (int32_t)(unibufp - unibuf);
 794             u.releaseBuffer(U_SUCCESS(err) ? ulen : 0);
 795
 796             // fromSawEndOfBytes indicates that ucnv_toUnicode() is done
 797             // converting all of the input bytes.
 798             // It works like this because ucnv_toUnicode() returns only under the
 799             // following conditions:
 800             // - an error occurred during conversion (an error code is set)
 801             // - the target buffer is filled (the error code indicates an overflow)
 802             // - the source is consumed
 803             // That is, if the error code does not indicate a failure,
 804             // not even an overflow, then the source must be consumed entirely.
 805             fromSawEndOfBytes = (UBool)U_SUCCESS(err);
 806
 807             if (err == U_BUFFER_OVERFLOW_ERROR) {
 808                 err = U_ZERO_ERROR;
 809             } else if (U_FAILURE(err)) {
 810                 char pos[32], errorBytes[32];
 811                 int8_t i, length, errorLength;
 812
 813                 UErrorCode localError = U_ZERO_ERROR;
 814                 errorLength = (int8_t)sizeof(errorBytes);
 815                 ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError);
 816                 if (U_FAILURE(localError) || errorLength == 0) {
 817                     errorLength = 1;
 818                 }
 819
 820                 // print the input file offset of the start of the error bytes:
 821                 // input file offset of the current byte buffer +
 822                 // length of the just consumed bytes -
 823                 // length of the error bytes
 824                 length =
 825                     (int8_t)sprintf(pos, "%d",
 826                         (int)(infoffset + (cbufp - buf) - errorLength));
 827
 828                 // output the bytes that caused the error
 829                 UnicodeString str;
 830                 for (i = 0; i < errorLength; ++i) {
 831                     if (i > 0) {
 832                         str.append((UChar)uSP);
 833                     }
 834                     str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4));
 835                     str.append(nibbleToHex((uint8_t)errorBytes[i]));
 836                 }
 837
 838                 initMsg(pname);
 839                 u_wmsg(stderr, "problemCvtToU",
 840                         UnicodeString(pos, length, "").getTerminatedBuffer(),
 841                         str.getTerminatedBuffer(),
 842                         u_wmsg_errorName(err));
 843
 844                 willexit = TRUE;
 845                 err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
 846             }
 847
 848             // Replaced a check for whether the input was consumed by
 849             // looping until it is; message key "premEndInput" now obsolete.
 850
 851             if (ulen == 0) {
 852                 continue;
 853             }
 854
 855             // remove a U+FEFF Unicode signature character if requested
 856             if (sig < 0) {
 857                 if (u.charAt(0) == uSig) {
 858                     u.remove(0, 1);
 859
 860                     // account for the removed UChar and offset
 861                     --ulen;
 862
 863                     if (useOffsets) {
 864                         // remove an offset from fromoffsets[] as well
 865                         // to keep the array parallel with the UChars
 866                         memmove(fromoffsets, fromoffsets + 1, ulen * 4);
 867                     }
 868
 869                 }
 870                 sig = 0;
 871             }
 872
 873 #if !UCONFIG_NO_TRANSLITERATION
 874             // Transliterate/transform if needed.
 875
 876             // For transformation, we use chunking code -
 877             // collect Unicode input until, for example, an end-of-line,
 878             // then transform and output-convert that and continue collecting.
 879             // This makes the transformation result independent of the buffer size
 880             // while avoiding the slower keyboard mode.
 881             // The end-of-chunk characters are completely included in the
 882             // transformed string in case they are to be transformed themselves.
 883             if (t != NULL) {
 884                 UnicodeString out;
 885                 int32_t chunkLimit;
 886
 887                 do {
 888                     chunkLimit = getChunkLimit(chunk, u);
 889                     if (chunkLimit < 0 && flush && fromSawEndOfBytes) {
 890                         // use all of the rest at the end of the text
 891                         chunkLimit = u.length();
 892                     }
 893                     if (chunkLimit >= 0) {
 894                         // complete the chunk and transform it
 895                         chunk.append(u, 0, chunkLimit);
 896                         u.remove(0, chunkLimit);
 897                         t->transliterate(chunk);
 898
 899                         // append the transformation result to the result and empty the chunk
 900                         out.append(chunk);
 901                         chunk.remove();
 902                     } else {
 903                         // continue collecting the chunk
 904                         chunk.append(u);
 905                         break;
 906                     }
 907                 } while (!u.isEmpty());
 908
 909                 u = out;
 910                 ulen = u.length();
 911             }
 912 #endif
 913
 914             // add a U+FEFF Unicode signature character if requested
 915             // and possible/necessary
 916             if (sig > 0) {
 917                 if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) {
 918                     u.insert(0, (UChar)uSig);
 919
 920                     if (useOffsets) {
 921                         // insert a pseudo-offset into fromoffsets[] as well
 922                         // to keep the array parallel with the UChars
 923                         memmove(fromoffsets + 1, fromoffsets, ulen * 4);
 924                         fromoffsets[0] = -1;
 925                     }
 926
 927                     // account for the additional UChar and offset
 928                     ++ulen;
 929                 }
 930                 sig = 0;
 931             }
 932
 933             // Convert the Unicode buffer into the destination codepage
 934             // Again 'bufp' will be placed behind the last converted character
 935             // And 'unibufp' will be placed behind the last converted unicode character
 936             // At the last conversion flush should be set to true to ensure that
 937             // all characters left get converted
 938
 939             unibuf = unibufbp = u.getBuffer();
 940
 941             do {
 942                 bufp = outbuf;
 943
 944                 // Use fromSawEndOfBytes in addition to the flush flag -
 945                 // it indicates whether the intermediate Unicode string
 946                 // contains the very last UChars for the very last input bytes.
 947                 ucnv_fromUnicode(convto, &bufp, outbuf + bufsz,
 948                                  &unibufbp,
 949                                  unibuf + ulen,
 950                                  NULL, (UBool)(flush && fromSawEndOfBytes), &err);
 951
 952                 // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done
 953                 // converting all of the intermediate UChars.
 954                 // See comment for fromSawEndOfBytes.
 955                 toSawEndOfUnicode = (UBool)U_SUCCESS(err);
 956
 957                 if (err == U_BUFFER_OVERFLOW_ERROR) {
 958                     err = U_ZERO_ERROR;
 959                 } else if (U_FAILURE(err)) {
 960                     UChar errorUChars[4];
 961                     const char *errtag;
 962                     char pos[32];
 963                     UChar32 c;
 964                     int8_t i, length, errorLength;
 965
 966                     UErrorCode localError = U_ZERO_ERROR;
 967                     errorLength = (int8_t)LENGTHOF(errorUChars);
 968                     ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError);
 969                     if (U_FAILURE(localError) || errorLength == 0) {
 970                         // need at least 1 so that we don't access beyond the length of fromoffsets[]
 971                         errorLength = 1;
 972                     }
 973
 974                     int32_t ferroffset;
 975
 976                     if (useOffsets) {
 977                         // Unicode buffer offset of the start of the error UChars
 978                         ferroffset = (int32_t)((unibufbp - unibuf) - errorLength);
 979                         if (ferroffset < 0) {
 980                             // approximation - the character started in the previous Unicode buffer
 981                             ferroffset = 0;
 982                         }
 983
 984                         // get the corresponding byte offset out of fromoffsets[]
 985                         // go back if the offset is not known for some of the UChars
 986                         int32_t fromoffset;
 987                         do {
 988                             fromoffset = fromoffsets[ferroffset];
 989                         } while (fromoffset < 0 && --ferroffset >= 0);
 990
 991                         // total input file offset =
 992                         // input file offset of the current byte buffer +
 993                         // byte buffer offset of where the current Unicode buffer is converted from +
 994                         // fromoffsets[Unicode offset]
 995                         ferroffset = infoffset + (prevbufp - buf) + fromoffset;
 996                         errtag = "problemCvtFromU";
 997                     } else {
 998                         // Do not use fromoffsets if (t != NULL) because the Unicode text may
 999                         // be different from what the offsets refer to.
1000
1001                         // output file offset
1002                         ferroffset = (int32_t)(outfoffset + (bufp - outbuf));
1003                         errtag = "problemCvtFromUOut";
1004                     }
1005
1006                     length = (int8_t)sprintf(pos, "%u", (int)ferroffset);
1007
1008                     // output the code points that caused the error
1009                     UnicodeString str;
1010                     for (i = 0; i < errorLength;) {
1011                         if (i > 0) {
1012                             str.append((UChar)uSP);
1013                         }
1014                         U16_NEXT(errorUChars, i, errorLength, c);
1015                         if (c >= 0x100000) {
1016                             str.append(nibbleToHex((uint8_t)(c >> 20)));
1017                         }
1018                         if (c >= 0x10000) {
1019                             str.append(nibbleToHex((uint8_t)(c >> 16)));
1020                         }
1021                         str.append(nibbleToHex((uint8_t)(c >> 12)));
1022                         str.append(nibbleToHex((uint8_t)(c >> 8)));
1023                         str.append(nibbleToHex((uint8_t)(c >> 4)));
1024                         str.append(nibbleToHex((uint8_t)c));
1025                     }
1026
1027                     initMsg(pname);
1028                     u_wmsg(stderr, errtag,
1029                             UnicodeString(pos, length, "").getTerminatedBuffer(),
1030                             str.getTerminatedBuffer(),
1031                            u_wmsg_errorName(err));
1032                     u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer());
1033
1034                     willexit = TRUE;
1035                     err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
1036                 }
1037
1038                 // Replaced a check for whether the intermediate Unicode characters were all consumed by
1039                 // looping until they are; message key "premEnd" now obsolete.
1040
1041                 // Finally, write the converted buffer to the output file
1042                 size_t outlen = (size_t) (bufp - outbuf);
1043                 outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile));
1044                 if (wr != outlen) {
1045                     UnicodeString str(strerror(errno));
1046                     initMsg(pname);
1047                     u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer());
1048                     willexit = TRUE;
1049                 }
1050
1051                 if (willexit) {
1052                     goto error_exit;
1053                 }
1054             } while (!toSawEndOfUnicode);
1055         } while (!fromSawEndOfBytes);
1056     } while (!flush);           // Stop when we have flushed the
1057                                 // converters (this means that it's
1058                                 // the end of output)
1059
1060     goto normal_exit;
1061
1062 error_exit:
1063     ret = FALSE;
1064
1065 normal_exit:
1066     // Cleanup.
1067
1068     ucnv_close(convfrom);
1069     ucnv_close(convto);
1070
1071 #if !UCONFIG_NO_TRANSLITERATION
1072     delete t;
1073 #endif
1074
1075     if (infile != stdin) {
1076         fclose(infile);
1077     }
1078
1079     return ret;
1080 }
1081
1082 static void usage(const char *pname, int ecode) {
1083     const UChar *msg;
1084     int32_t msgLen;
1085     UErrorCode err = U_ZERO_ERROR;
1086     FILE *fp = ecode ? stderr : stdout;
1087     int res;
1088
1089     initMsg(pname);
1090     msg =
1091         ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord",
1092                             &msgLen, &err);
1093     UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1));
1094     UnicodeString mname(msg, msgLen + 1);
1095
1096     res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer());
1097     if (!ecode) {
1098         if (!res) {
1099             fputc('\n', fp);
1100         }
1101         if (!u_wmsg(fp, "help")) {
1102             /* Now dump callbacks and finish. */
1103
1104             int i, count =
1105                 sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
1106             for (i = 0; i < count; ++i) {
1107                 fprintf(fp, " %s", transcode_callbacks[i].name);
1108             }
1109             fputc('\n', fp);
1110         }
1111     }
1112
1113     exit(ecode);
1114 }
1115
1116 extern int
1117 main(int argc, char **argv)
1118 {
1119     FILE *outfile;
1120     int ret = 0;
1121
1122     size_t bufsz = DEFAULT_BUFSZ;
1123
1124     const char *fromcpage = 0;
1125     const char *tocpage = 0;
1126     const char *translit = 0;
1127     const char *outfilestr = 0;
1128     UBool fallback = FALSE;
1129
1130     UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
1131     const void *fromuctxt = 0;
1132     UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
1133     const void *touctxt = 0;
1134
1135     char **iter, **remainArgv, **remainArgvLimit;
1136     char **end = argv + argc;
1137
1138     const char *pname;
1139
1140     UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE;
1141     const char *printName = 0;
1142
1143     UBool verbose = FALSE;
1144     UErrorCode status = U_ZERO_ERROR;
1145
1146     ConvertFile cf;
1147
1148     /* Initialize ICU */
1149     u_init(&status);
1150     if (U_FAILURE(status)) {
1151         fprintf(stderr, "%s: can not initialize ICU.  status = %s\n",
1152             argv[0], u_errorName(status));
1153         exit(1);
1154     }
1155
1156     // Get and prettify pname.
1157     pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
1158 #ifdef U_WINDOWS
1159     if (!pname) {
1160         pname = uprv_strrchr(*argv, '/');
1161     }
1162 #endif
1163     if (!pname) {
1164         pname = *argv;
1165     } else {
1166         ++pname;
1167     }
1168
1169     // First, get the arguments from command-line
1170     // to know the codepages to convert between
1171
1172     remainArgv = remainArgvLimit = argv + 1;
1173     for (iter = argv + 1; iter != end; iter++) {
1174         // Check for from charset
1175         if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
1176             iter++;
1177             if (iter != end)
1178                 fromcpage = *iter;
1179             else
1180                 usage(pname, 1);
1181         } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
1182             iter++;
1183             if (iter != end)
1184                 tocpage = *iter;
1185             else
1186                 usage(pname, 1);
1187         } else if (strcmp("-x", *iter) == 0) {
1188             iter++;
1189             if (iter != end)
1190                 translit = *iter;
1191             else
1192                 usage(pname, 1);
1193         } else if (!strcmp("--fallback", *iter)) {
1194             fallback = TRUE;
1195         } else if (!strcmp("--no-fallback", *iter)) {
1196             fallback = FALSE;
1197         } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
1198             iter++;
1199             if (iter != end) {
1200                 bufsz = atoi(*iter);
1201                 if ((int) bufsz <= 0) {
1202                     initMsg(pname);
1203                     UnicodeString str(*iter);
1204                     initMsg(pname);
1205                     u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer());
1206                     return 3;
1207                 }
1208             } else {
1209                 usage(pname, 1);
1210             }
1211         } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
1212             if (printTranslits) {
1213                 usage(pname, 1);
1214             }
1215             printConvs = TRUE;
1216         } else if (strcmp("--default-code", *iter) == 0) {
1217             if (printTranslits) {
1218                 usage(pname, 1);
1219             }
1220             printName = ucnv_getDefaultName();
1221         } else if (strcmp("--list-code", *iter) == 0) {
1222             if (printTranslits) {
1223                 usage(pname, 1);
1224             }
1225
1226             iter++;
1227             if (iter != end) {
1228                 UErrorCode e = U_ZERO_ERROR;
1229                 printName = ucnv_getAlias(*iter, 0, &e);
1230                 if (U_FAILURE(e) || !printName) {
1231                     UnicodeString str(*iter);
1232                     initMsg(pname);
1233                     u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer());
1234                     return 2;
1235                 }
1236             } else
1237                 usage(pname, 1);
1238         } else if (strcmp("--canon", *iter) == 0) {
1239             printCanon = TRUE;
1240         } else if (strcmp("-L", *iter) == 0
1241             || !strcmp("--list-transliterators", *iter)) {
1242             if (printConvs) {
1243                 usage(pname, 1);
1244             }
1245             printTranslits = TRUE;
1246         } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
1247             || !strcmp("--help", *iter)) {
1248             usage(pname, 0);
1249         } else if (!strcmp("-c", *iter)) {
1250             fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
1251         } else if (!strcmp("--to-callback", *iter)) {
1252             iter++;
1253             if (iter != end) {
1254                 const struct callback_ent *cbe = findCallback(*iter);
1255                 if (cbe) {
1256                     fromucallback = cbe->fromu;
1257                     fromuctxt = cbe->fromuctxt;
1258                 } else {
1259                     UnicodeString str(*iter);
1260                     initMsg(pname);
1261                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1262                     return 4;
1263                 }
1264             } else {
1265                 usage(pname, 1);
1266             }
1267         } else if (!strcmp("--from-callback", *iter)) {
1268             iter++;
1269             if (iter != end) {
1270                 const struct callback_ent *cbe = findCallback(*iter);
1271                 if (cbe) {
1272                     toucallback = cbe->tou;
1273                     touctxt = cbe->touctxt;
1274                 } else {
1275                     UnicodeString str(*iter);
1276                     initMsg(pname);
1277                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1278                     return 4;
1279                 }
1280             } else {
1281                 usage(pname, 1);
1282             }
1283         } else if (!strcmp("-i", *iter)) {
1284             toucallback = UCNV_TO_U_CALLBACK_SKIP;
1285         } else if (!strcmp("--callback", *iter)) {
1286             iter++;
1287             if (iter != end) {
1288                 const struct callback_ent *cbe = findCallback(*iter);
1289                 if (cbe) {
1290                     fromucallback = cbe->fromu;
1291                     fromuctxt = cbe->fromuctxt;
1292                     toucallback = cbe->tou;
1293                     touctxt = cbe->touctxt;
1294                 } else {
1295                     UnicodeString str(*iter);
1296                     initMsg(pname);
1297                     u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
1298                     return 4;
1299                 }
1300             } else {
1301                 usage(pname, 1);
1302             }
1303         } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
1304             verbose = FALSE;
1305         } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
1306             verbose = TRUE;
1307         } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
1308             printf("%s v2.1  ICU " U_ICU_VERSION "\n", pname);
1309             return 0;
1310         } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
1311             ++iter;
1312             if (iter != end && !outfilestr) {
1313                 outfilestr = *iter;
1314             } else {
1315                 usage(pname, 1);
1316             }
1317         } else if (0 == strcmp("--add-signature", *iter)) {
1318             cf.signature = 1;
1319         } else if (0 == strcmp("--remove-signature", *iter)) {
1320             cf.signature = -1;
1321         } else if (**iter == '-' && (*iter)[1]) {
1322             usage(pname, 1);
1323         } else {
1324             // move a non-option up in argv[]
1325             *remainArgvLimit++ = *iter;
1326         }
1327     }
1328
1329     if (printConvs || printName) {
1330         return printConverters(pname, printName, printCanon) ? 2 : 0;
1331     } else if (printTranslits) {
1332         return printTransliterators(printCanon) ? 3 : 0;
1333     }
1334
1335     if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
1336         fromcpage = ucnv_getDefaultName();
1337     }
1338     if (!tocpage || !uprv_strcmp(tocpage, "-")) {
1339         tocpage = ucnv_getDefaultName();
1340     }
1341
1342     // Open the correct output file or connect to stdout for reading input
1343     if (outfilestr != 0 && strcmp(outfilestr, "-")) {
1344         outfile = fopen(outfilestr, "wb");
1345         if (outfile == 0) {
1346             UnicodeString str1(outfilestr, "");
1347             UnicodeString str2(strerror(errno), "");
1348             initMsg(pname);
1349             u_wmsg(stderr, "cantCreateOutputF",
1350                 str1.getBuffer(), str2.getBuffer());
1351             return 1;
1352         }
1353     } else {
1354         outfilestr = "-";
1355         outfile = stdout;
1356 #ifdef USE_FILENO_BINARY_MODE
1357         if (setmode(fileno(outfile), O_BINARY) == -1) {
1358             u_wmsg(stderr, "cantSetOutBinMode");
1359             exit(-1);
1360         }
1361 #endif
1362     }
1363
1364     /* Loop again on the arguments to find all the input files, and
1365     convert them. */
1366
1367     cf.setBufferSize(bufsz);
1368
1369     if(remainArgv < remainArgvLimit) {
1370         for (iter = remainArgv; iter != remainArgvLimit; iter++) {
1371             if (!cf.convertFile(
1372                     pname, fromcpage, toucallback, touctxt, tocpage,
1373                     fromucallback, fromuctxt, fallback, translit, *iter,
1374                     outfile, verbose)
1375             ) {
1376                 goto error_exit;
1377             }
1378         }
1379     } else {
1380         if (!cf.convertFile(
1381                 pname, fromcpage, toucallback, touctxt, tocpage,
1382                 fromucallback, fromuctxt, fallback, translit, 0,
1383                 outfile, verbose)
1384         ) {
1385             goto error_exit;
1386         }
1387     }
1388
1389     goto normal_exit;
1390 error_exit:
1391     ret = 1;
1392 normal_exit:
1393
1394     if (outfile != stdout) {
1395         fclose(outfile);
1396     }
1397
1398     return ret;
1399 }
1400
1401
1402 /*
1403  * Hey, Emacs, please set the following:
1404  *
1405  * Local Variables:
1406  * indent-tabs-mode: nil
1407  * End:
1408  *
1409  */