]> git.saurik.com Git - apple/icu.git/blame - icuSources/extra/uconv/uconv.cpp
ICU-59180.0.1.tar.gz
[apple/icu.git] / icuSources / extra / uconv / uconv.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*****************************************************************************
4*
2ca993e8 5* Copyright (C) 1999-2016, International Business Machines
b75a7d8f
A
6* Corporation and others. All Rights Reserved.
7*
8******************************************************************************/
9
10/*
11 * uconv(1): an iconv(1)-like converter using ICU.
12 *
73c04bcf 13 * Original code by Jonas Utterstr&#x00F6;m <jonas.utterstrom@vittran.norrnod.se>
b75a7d8f
A
14 * contributed in 1999.
15 *
16 * Conversion to the C conversion API and many improvements by
17 * Yves Arrouye <yves@realnames.com>, current maintainer.
18 *
374ca955
A
19 * Markus Scherer maintainer from 2003.
20 * See source code repository history for changes.
b75a7d8f
A
21 */
22
23#include <unicode/utypes.h>
374ca955 24#include <unicode/putil.h>
b75a7d8f 25#include <unicode/ucnv.h>
374ca955 26#include <unicode/uenum.h>
b75a7d8f
A
27#include <unicode/unistr.h>
28#include <unicode/translit.h>
374ca955
A
29#include <unicode/uset.h>
30#include <unicode/uclean.h>
51004dcb 31#include <unicode/utf16.h>
b75a7d8f
A
32
33#include <stdio.h>
34#include <errno.h>
35#include <string.h>
36#include <stdlib.h>
37
38#include "cmemory.h"
39#include "cstring.h"
40#include "ustrfmt.h"
41
42#include "unicode/uwmsg.h"
43
46f4442e
A
44U_NAMESPACE_USE
45
4388f060 46#if U_PLATFORM_USES_ONLY_WIN32_API && !defined(__STRICT_ANSI__)
b75a7d8f
A
47#include <io.h>
48#include <fcntl.h>
4388f060 49#if U_PLATFORM_USES_ONLY_WIN32_API
374ca955 50#define USE_FILENO_BINARY_MODE 1
73c04bcf
A
51/* Windows likes to rename Unix-like functions */
52#ifndef fileno
53#define fileno _fileno
54#endif
55#ifndef setmode
56#define setmode _setmode
57#endif
58#ifndef O_BINARY
59#define O_BINARY _O_BINARY
60#endif
61#endif
b75a7d8f
A
62#endif
63
64#ifdef UCONVMSG_LINK
65/* below from the README */
66#include "unicode/utypes.h"
67#include "unicode/udata.h"
68U_CFUNC char uconvmsg_dat[];
69#endif
70
71#define DEFAULT_BUFSZ 4096
72#define UCONVMSG "uconvmsg"
73
74static UResourceBundle *gBundle = 0; /* Bundle containing messages. */
75
76/*
77 * Initialize the message bundle so that message strings can be fetched
78 * by u_wmsg().
79 *
80 */
81
82static void initMsg(const char *pname) {
83 static int ps = 0;
84
85 if (!ps) {
86 char dataPath[2048]; /* XXX Sloppy: should be PATH_MAX. */
87 UErrorCode err = U_ZERO_ERROR;
88
89 ps = 1;
90
91 /* Set up our static data - if any */
51004dcb 92#if defined(UCONVMSG_LINK) && U_PLATFORM != U_PF_OS390 /* On z/OS, this is failing. */
b75a7d8f
A
93 udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err);
94 if (U_FAILURE(err)) {
95 fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n",
96 pname, u_errorName(err));
97 err = U_ZERO_ERROR; /* It may still fail */
98 }
99#endif
100
101 /* Get messages. */
102 gBundle = u_wmsg_setPath(UCONVMSG, &err);
103 if (U_FAILURE(err)) {
104 fprintf(stderr,
105 "%s: warning: couldn't open bundle %s: %s\n",
106 pname, UCONVMSG, u_errorName(err));
107#ifdef UCONVMSG_LINK
108 fprintf(stderr,
109 "%s: setAppData was called, internal data %s failed to load\n",
110 pname, UCONVMSG);
111#endif
112
113 err = U_ZERO_ERROR;
114 /* that was try #1, try again with a path */
115 uprv_strcpy(dataPath, u_getDataDirectory());
116 uprv_strcat(dataPath, U_FILE_SEP_STRING);
117 uprv_strcat(dataPath, UCONVMSG);
118
119 gBundle = u_wmsg_setPath(dataPath, &err);
120 if (U_FAILURE(err)) {
121 fprintf(stderr,
122 "%s: warning: still couldn't open bundle %s: %s\n",
123 pname, dataPath, u_errorName(err));
124 fprintf(stderr, "%s: warning: messages will not be displayed\n", pname);
125 }
126 }
127 }
128}
129
130/* Mapping of callback names to the callbacks passed to the converter
131 API. */
132
133static struct callback_ent {
134 const char *name;
135 UConverterFromUCallback fromu;
136 const void *fromuctxt;
137 UConverterToUCallback tou;
138 const void *touctxt;
139} transcode_callbacks[] = {
140 { "substitute",
141 UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
142 UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 },
143 { "skip",
144 UCNV_FROM_U_CALLBACK_SKIP, 0,
145 UCNV_TO_U_CALLBACK_SKIP, 0 },
146 { "stop",
147 UCNV_FROM_U_CALLBACK_STOP, 0,
148 UCNV_TO_U_CALLBACK_STOP, 0 },
149 { "escape",
150 UCNV_FROM_U_CALLBACK_ESCAPE, 0,
151 UCNV_TO_U_CALLBACK_ESCAPE, 0},
152 { "escape-icu",
153 UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU,
154 UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU },
155 { "escape-java",
156 UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA,
157 UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA },
158 { "escape-c",
159 UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C,
160 UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
161 { "escape-xml",
162 UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
163 UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
164 { "escape-xml-hex",
165 UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
166 UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
167 { "escape-xml-dec",
168 UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
169 UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
170 { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE,
171 UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE }
172};
173
174/* Return a pointer to a callback record given its name. */
175
176static const struct callback_ent *findCallback(const char *name) {
177 int i, count =
2ca993e8 178 UPRV_LENGTHOF(transcode_callbacks);
b75a7d8f
A
179
180 /* We'll do a linear search, there aren't many of them and bsearch()
181 may not be that portable. */
182
183 for (i = 0; i < count; ++i) {
184 if (!uprv_stricmp(name, transcode_callbacks[i].name)) {
185 return &transcode_callbacks[i];
186 }
187 }
188
189 return 0;
190}
191
192/* Print converter information. If lookfor is set, only that converter will
193 be printed, otherwise all converters will be printed. If canon is non
194 zero, tags and aliases for each converter are printed too, in the format
195 expected for convrters.txt(5). */
196
197static int printConverters(const char *pname, const char *lookfor,
374ca955 198 UBool canon)
b75a7d8f
A
199{
200 UErrorCode err = U_ZERO_ERROR;
201 int32_t num;
202 uint16_t num_stds;
203 const char **stds;
204
205 /* If there is a specified name, just handle that now. */
206
207 if (lookfor) {
208 if (!canon) {
209 printf("%s\n", lookfor);
210 return 0;
211 } else {
212 /* Because we are printing a canonical name, we need the
213 true converter name. We've done that already except for
214 the default name (because we want to print the exact
215 name one would get when calling ucnv_getDefaultName()
216 in non-canon mode). But since we do not know at this
217 point if we have the default name or something else, we
218 need to normalize again to the canonical converter
219 name. */
220
221 const char *truename = ucnv_getAlias(lookfor, 0, &err);
222 if (U_SUCCESS(err)) {
223 lookfor = truename;
224 } else {
225 err = U_ZERO_ERROR;
226 }
227 }
228 }
229
230 /* Print converter names. We come here for one of two reasons: we
231 are printing all the names (lookfor was null), or we have a
232 single converter to print but in canon mode, hence we need to
233 get to it in order to print everything. */
234
235 num = ucnv_countAvailable();
236 if (num <= 0) {
237 initMsg(pname);
238 u_wmsg(stderr, "cantGetNames");
239 return -1;
240 }
241 if (lookfor) {
242 num = 1; /* We know where we want to be. */
243 }
244
245 num_stds = ucnv_countStandards();
246 stds = (const char **) uprv_malloc(num_stds * sizeof(*stds));
247 if (!stds) {
248 u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR));
249 return -1;
250 } else {
251 uint16_t s;
252
374ca955
A
253 if (canon) {
254 printf("{ ");
255 }
b75a7d8f
A
256 for (s = 0; s < num_stds; ++s) {
257 stds[s] = ucnv_getStandard(s, &err);
374ca955
A
258 if (canon) {
259 printf("%s ", stds[s]);
260 }
b75a7d8f
A
261 if (U_FAILURE(err)) {
262 u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err));
46f4442e 263 goto error_cleanup;
b75a7d8f
A
264 }
265 }
374ca955
A
266 if (canon) {
267 puts("}");
268 }
b75a7d8f
A
269 }
270
271 for (int32_t i = 0; i < num; i++) {
272 const char *name;
273 uint16_t num_aliases;
274
275 /* Set the name either to what we are looking for, or
276 to the current converter name. */
277
278 if (lookfor) {
279 name = lookfor;
280 } else {
281 name = ucnv_getAvailableName(i);
282 }
283
284 /* Get all the aliases associated to the name. */
285
286 err = U_ZERO_ERROR;
287 num_aliases = ucnv_countAliases(name, &err);
288 if (U_FAILURE(err)) {
289 printf("%s", name);
290
374ca955 291 UnicodeString str(name, "");
b75a7d8f 292 putchar('\t');
374ca955 293 u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
b75a7d8f 294 u_wmsg_errorName(err));
46f4442e 295 goto error_cleanup;
b75a7d8f
A
296 } else {
297 uint16_t a, s, t;
298
299 /* Write all the aliases and their tags. */
300
301 for (a = 0; a < num_aliases; ++a) {
302 const char *alias = ucnv_getAlias(name, a, &err);
303
304 if (U_FAILURE(err)) {
374ca955 305 UnicodeString str(name, "");
b75a7d8f 306 putchar('\t');
374ca955 307 u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
b75a7d8f 308 u_wmsg_errorName(err));
46f4442e 309 goto error_cleanup;
b75a7d8f
A
310 }
311
374ca955
A
312 /* Print the current alias so that it looks right. */
313 printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") ,
314 alias,
315 (canon ? "" : " "));
b75a7d8f
A
316
317 /* Look (slowly, linear searching) for a tag. */
318
319 if (canon) {
374ca955
A
320 /* -1 to skip the last standard */
321 for (s = t = 0; s < num_stds-1; ++s) {
322 UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err);
323 if (U_SUCCESS(err)) {
324 /* List the standard tags */
325 const char *standardName;
326 UBool isFirst = TRUE;
327 UErrorCode enumError = U_ZERO_ERROR;
328 while ((standardName = uenum_next(nameEnum, NULL, &enumError))) {
329 /* See if this alias is supported by this standard. */
330 if (!strcmp(standardName, alias)) {
331 if (!t) {
332 printf(" {");
333 t = 1;
334 }
335 /* Print a * after the default standard name */
336 printf(" %s%s", stds[s], (isFirst ? "*" : ""));
b75a7d8f 337 }
374ca955 338 isFirst = FALSE;
b75a7d8f
A
339 }
340 }
341 }
342 if (t) {
343 printf(" }");
344 }
345 }
374ca955
A
346 /* Terminate this entry. */
347 if (canon) {
348 puts("");
349 }
b75a7d8f
A
350
351 /* Move on. */
b75a7d8f 352 }
374ca955
A
353 /* Terminate this entry. */
354 if (!canon) {
355 puts("");
356 }
b75a7d8f
A
357 }
358 }
359
360 /* Free temporary data. */
361
362 uprv_free(stds);
363
364 /* Success. */
365
366 return 0;
46f4442e
A
367error_cleanup:
368 uprv_free(stds);
369 return -1;
b75a7d8f
A
370}
371
372/* Print all available transliterators. If canon is non zero, print
373 one transliterator per line. */
374
374ca955 375static int printTransliterators(UBool canon)
b75a7d8f
A
376{
377#if UCONFIG_NO_TRANSLITERATION
378 printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n");
379 return 1;
380#else
729e4ab9
A
381 UErrorCode status = U_ZERO_ERROR;
382 UEnumeration *ids = utrans_openIDs(&status);
383 int32_t i, numtrans = uenum_count(ids, &status);
b75a7d8f
A
384
385 char sepchar = canon ? '\n' : ' ';
386
729e4ab9
A
387 for (i = 0; U_SUCCESS(status)&& (i < numtrans); ++i) {
388 int32_t len;
389 const char *nextTrans = uenum_next(ids, &len, &status);
b75a7d8f 390
729e4ab9 391 printf("%s", nextTrans);
b75a7d8f
A
392 if (i < numtrans - 1) {
393 putchar(sepchar);
394 }
395 }
396
729e4ab9
A
397 uenum_close(ids);
398
b75a7d8f
A
399 /* Add a terminating newline if needed. */
400
401 if (sepchar != '\n') {
402 putchar('\n');
403 }
404
b75a7d8f
A
405 /* Success. */
406
407 return 0;
408#endif
409}
410
374ca955
A
411enum {
412 uSP = 0x20, // space
413 uCR = 0xd, // carriage return
414 uLF = 0xa, // line feed
415 uNL = 0x85, // newline
416 uLS = 0x2028, // line separator
417 uPS = 0x2029, // paragraph separator
418 uSig = 0xfeff // signature/BOM character
419};
b75a7d8f 420
374ca955
A
421static inline int32_t
422getChunkLimit(const UnicodeString &prev, const UnicodeString &s) {
423 // find one of
424 // CR, LF, CRLF, NL, LS, PS
425 // for paragraph ends (see UAX #13/Unicode 4)
426 // and include it in the chunk
427 // all of these characters are on the BMP
428 // do not include FF or VT in case they are part of a paragraph
429 // (important for bidi contexts)
430 static const UChar paraEnds[] = {
431 0xd, 0xa, 0x85, 0x2028, 0x2029
432 };
433 enum {
434 iCR, iLF, iNL, iLS, iPS, iCount
435 };
436
437 // first, see if there is a CRLF split between prev and s
438 if (prev.endsWith(paraEnds + iCR, 1)) {
439 if (s.startsWith(paraEnds + iLF, 1)) {
440 return 1; // split CRLF, include the LF
441 } else if (!s.isEmpty()) {
442 return 0; // complete the last chunk
443 } else {
444 return -1; // wait for actual further contents to arrive
b75a7d8f
A
445 }
446 }
447
374ca955
A
448 const UChar *u = s.getBuffer(), *limit = u + s.length();
449 UChar c;
450
451 while (u < limit) {
452 c = *u++;
453 if (
454 ((c < uSP) && (c == uCR || c == uLF)) ||
455 (c == uNL) ||
456 ((c & uLS) == uLS)
457 ) {
458 if (c == uCR) {
459 // check for CRLF
460 if (u == limit) {
461 return -1; // LF may be in the next chunk
462 } else if (*u == uLF) {
463 ++u; // include the LF in this chunk
464 }
465 }
466 return (int32_t)(u - s.getBuffer());
467 }
468 }
469
470 return -1; // continue collecting the chunk
471}
472
473enum {
474 CNV_NO_FEFF, // cannot convert the U+FEFF Unicode signature character (BOM)
475 CNV_WITH_FEFF, // can convert the U+FEFF signature character
476 CNV_ADDS_FEFF // automatically adds/detects the U+FEFF signature character
477};
478
479static inline UChar
480nibbleToHex(uint8_t n) {
481 n &= 0xf;
482 return
483 n <= 9 ?
484 (UChar)(0x30 + n) :
485 (UChar)((0x61 - 10) + n);
486}
487
488// check the converter's Unicode signature properties;
489// the fromUnicode side of the converter must be in its initial state
490// and will be reset again if it was used
491static int32_t
492cnvSigType(UConverter *cnv) {
493 UErrorCode err;
494 int32_t result;
495
496 // test if the output charset can convert U+FEFF
497 USet *set = uset_open(1, 0);
498 err = U_ZERO_ERROR;
499 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err);
500 if (U_SUCCESS(err) && uset_contains(set, uSig)) {
501 result = CNV_WITH_FEFF;
502 } else {
503 result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted
504 }
505 uset_close(set);
506
507 if (result == CNV_WITH_FEFF) {
508 // test if the output charset emits a signature anyway
509 const UChar a[1] = { 0x61 }; // "a"
510 const UChar *in;
511
512 char buffer[20];
513 char *out;
514
515 in = a;
516 out = buffer;
517 err = U_ZERO_ERROR;
518 ucnv_fromUnicode(cnv,
519 &out, buffer + sizeof(buffer),
520 &in, a + 1,
521 NULL, TRUE, &err);
522 ucnv_resetFromUnicode(cnv);
523
524 if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) &&
525 U_SUCCESS(err)
526 ) {
527 result = CNV_ADDS_FEFF;
528 }
529 }
530
531 return result;
b75a7d8f
A
532}
533
374ca955
A
534class ConvertFile {
535public:
536 ConvertFile() :
537 buf(NULL), outbuf(NULL), fromoffsets(NULL),
538 bufsz(0), signature(0) {}
539
540 void
541 setBufferSize(size_t bufferSize) {
542 bufsz = bufferSize;
543
544 buf = new char[2 * bufsz];
545 outbuf = buf + bufsz;
546
547 // +1 for an added U+FEFF in the intermediate Unicode buffer
548 fromoffsets = new int32_t[bufsz + 1];
549 }
550
551 ~ConvertFile() {
552 delete [] buf;
553 delete [] fromoffsets;
554 }
555
556 UBool convertFile(const char *pname,
557 const char *fromcpage,
558 UConverterToUCallback toucallback,
559 const void *touctxt,
560 const char *tocpage,
561 UConverterFromUCallback fromucallback,
562 const void *fromuctxt,
563 UBool fallback,
564 const char *translit,
565 const char *infilestr,
566 FILE * outfile, int verbose);
567private:
568 friend int main(int argc, char **argv);
569
570 char *buf, *outbuf;
571 int32_t *fromoffsets;
572
573 size_t bufsz;
574 int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character
575};
576
b75a7d8f 577// Convert a file from one encoding to another
374ca955
A
578UBool
579ConvertFile::convertFile(const char *pname,
b75a7d8f
A
580 const char *fromcpage,
581 UConverterToUCallback toucallback,
582 const void *touctxt,
583 const char *tocpage,
584 UConverterFromUCallback fromucallback,
585 const void *fromuctxt,
374ca955 586 UBool fallback,
b75a7d8f
A
587 const char *translit,
588 const char *infilestr,
589 FILE * outfile, int verbose)
590{
591 FILE *infile;
592 UBool ret = TRUE;
593 UConverter *convfrom = 0;
594 UConverter *convto = 0;
595 UErrorCode err = U_ZERO_ERROR;
596 UBool flush;
51004dcb 597 UBool closeFile = FALSE;
374ca955 598 const char *cbufp, *prevbufp;
b75a7d8f 599 char *bufp;
b75a7d8f
A
600
601 uint32_t infoffset = 0, outfoffset = 0; /* Where we are in the file, for error reporting. */
602
374ca955 603 const UChar *unibuf, *unibufbp;
b75a7d8f 604 UChar *unibufp;
b75a7d8f 605
374ca955 606 size_t rd, wr;
b75a7d8f
A
607
608#if !UCONFIG_NO_TRANSLITERATION
609 Transliterator *t = 0; // Transliterator acting on Unicode data.
374ca955 610 UnicodeString chunk; // One chunk of the text being collected for transformation.
b75a7d8f
A
611#endif
612 UnicodeString u; // String to do the transliteration.
374ca955
A
613 int32_t ulen;
614
615 // use conversion offsets for error messages
616 // unless a transliterator is used -
617 // a text transformation will reorder characters in unpredictable ways
618 UBool useOffsets = TRUE;
b75a7d8f
A
619
620 // Open the correct input file or connect to stdin for reading input
621
622 if (infilestr != 0 && strcmp(infilestr, "-")) {
623 infile = fopen(infilestr, "rb");
624 if (infile == 0) {
625 UnicodeString str1(infilestr, "");
626 str1.append((UChar32) 0);
627 UnicodeString str2(strerror(errno), "");
628 str2.append((UChar32) 0);
629 initMsg(pname);
630 u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
631 return FALSE;
632 }
51004dcb 633 closeFile = TRUE;
b75a7d8f
A
634 } else {
635 infilestr = "-";
636 infile = stdin;
374ca955 637#ifdef USE_FILENO_BINARY_MODE
b75a7d8f
A
638 if (setmode(fileno(stdin), O_BINARY) == -1) {
639 initMsg(pname);
640 u_wmsg(stderr, "cantSetInBinMode");
641 return FALSE;
642 }
643#endif
644 }
645
646 if (verbose) {
647 fprintf(stderr, "%s:\n", infilestr);
648 }
649
650#if !UCONFIG_NO_TRANSLITERATION
651 // Create transliterator as needed.
652
653 if (translit != NULL && *translit) {
654 UParseError parse;
655 UnicodeString str(translit), pestr;
656
657 /* Create from rules or by ID as needed. */
658
659 parse.line = -1;
660
661 if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
57a6839d 662 t = Transliterator::createFromRules(UNICODE_STRING_SIMPLE("Uconv"), str, UTRANS_FORWARD, parse, err);
b75a7d8f 663 } else {
57a6839d 664 t = Transliterator::createInstance(UnicodeString(translit, -1, US_INV), UTRANS_FORWARD, err);
b75a7d8f
A
665 }
666
667 if (U_FAILURE(err)) {
668 str.append((UChar32) 0);
669 initMsg(pname);
670
671 if (parse.line >= 0) {
672 UChar linebuf[20], offsetbuf[20];
673 uprv_itou(linebuf, 20, parse.line, 10, 0);
674 uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
374ca955 675 u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(),
b75a7d8f
A
676 u_wmsg_errorName(err), linebuf, offsetbuf);
677 } else {
374ca955 678 u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(),
b75a7d8f
A
679 u_wmsg_errorName(err));
680 }
681
682 if (t) {
683 delete t;
684 t = 0;
685 }
686 goto error_exit;
687 }
374ca955
A
688
689 useOffsets = FALSE;
b75a7d8f
A
690 }
691#endif
692
693 // Create codepage converter. If the codepage or its aliases weren't
694 // available, it returns NULL and a failure code. We also set the
695 // callbacks, and return errors in the same way.
696
697 convfrom = ucnv_open(fromcpage, &err);
698 if (U_FAILURE(err)) {
374ca955 699 UnicodeString str(fromcpage, "");
b75a7d8f 700 initMsg(pname);
374ca955 701 u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(),
b75a7d8f
A
702 u_wmsg_errorName(err));
703 goto error_exit;
704 }
705 ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
706 if (U_FAILURE(err)) {
707 initMsg(pname);
708 u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
709 goto error_exit;
710 }
711
712 convto = ucnv_open(tocpage, &err);
713 if (U_FAILURE(err)) {
374ca955 714 UnicodeString str(tocpage, "");
b75a7d8f 715 initMsg(pname);
374ca955 716 u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(),
b75a7d8f
A
717 u_wmsg_errorName(err));
718 goto error_exit;
719 }
720 ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
721 if (U_FAILURE(err)) {
722 initMsg(pname);
723 u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
724 goto error_exit;
725 }
726 ucnv_setFallback(convto, fallback);
727
374ca955
A
728 UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode;
729 int8_t sig;
b75a7d8f
A
730
731 // OK, we can convert now.
374ca955
A
732 sig = signature;
733 rd = 0;
b75a7d8f
A
734
735 do {
374ca955
A
736 willexit = FALSE;
737
738 // input file offset at the beginning of the next buffer
739 infoffset += rd;
b75a7d8f
A
740
741 rd = fread(buf, 1, bufsz, infile);
742 if (ferror(infile) != 0) {
743 UnicodeString str(strerror(errno));
b75a7d8f 744 initMsg(pname);
374ca955 745 u_wmsg(stderr, "cantRead", str.getTerminatedBuffer());
b75a7d8f
A
746 goto error_exit;
747 }
748
374ca955
A
749 // Convert the read buffer into the new encoding via Unicode.
750 // After the call 'unibufp' will be placed behind the last
b75a7d8f 751 // character that was converted in the 'unibuf'.
374ca955 752 // Also the 'cbufp' is positioned behind the last converted
b75a7d8f
A
753 // character.
754 // At the last conversion in the file, flush should be set to
374ca955 755 // true so that we get all characters converted.
b75a7d8f
A
756 //
757 // The converter must be flushed at the end of conversion so
758 // that characters on hold also will be written.
759
b75a7d8f 760 cbufp = buf;
374ca955 761 flush = (UBool)(rd != bufsz);
b75a7d8f 762
374ca955
A
763 // convert until the input is consumed
764 do {
765 // remember the start of the current byte-to-Unicode conversion
766 prevbufp = cbufp;
767
768 unibuf = unibufp = u.getBuffer((int32_t)bufsz);
769
770 // Use bufsz instead of u.getCapacity() for the targetLimit
771 // so that we don't overflow fromoffsets[].
772 ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp,
773 buf + rd, useOffsets ? fromoffsets : NULL, flush, &err);
774
775 ulen = (int32_t)(unibufp - unibuf);
73c04bcf 776 u.releaseBuffer(U_SUCCESS(err) ? ulen : 0);
374ca955
A
777
778 // fromSawEndOfBytes indicates that ucnv_toUnicode() is done
779 // converting all of the input bytes.
780 // It works like this because ucnv_toUnicode() returns only under the
781 // following conditions:
782 // - an error occurred during conversion (an error code is set)
783 // - the target buffer is filled (the error code indicates an overflow)
784 // - the source is consumed
785 // That is, if the error code does not indicate a failure,
786 // not even an overflow, then the source must be consumed entirely.
787 fromSawEndOfBytes = (UBool)U_SUCCESS(err);
788
789 if (err == U_BUFFER_OVERFLOW_ERROR) {
790 err = U_ZERO_ERROR;
791 } else if (U_FAILURE(err)) {
792 char pos[32], errorBytes[32];
793 int8_t i, length, errorLength;
794
795 UErrorCode localError = U_ZERO_ERROR;
796 errorLength = (int8_t)sizeof(errorBytes);
797 ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError);
798 if (U_FAILURE(localError) || errorLength == 0) {
799 errorLength = 1;
800 }
b75a7d8f 801
374ca955
A
802 // print the input file offset of the start of the error bytes:
803 // input file offset of the current byte buffer +
804 // length of the just consumed bytes -
805 // length of the error bytes
806 length =
807 (int8_t)sprintf(pos, "%d",
808 (int)(infoffset + (cbufp - buf) - errorLength));
809
810 // output the bytes that caused the error
811 UnicodeString str;
812 for (i = 0; i < errorLength; ++i) {
813 if (i > 0) {
814 str.append((UChar)uSP);
815 }
816 str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4));
817 str.append(nibbleToHex((uint8_t)errorBytes[i]));
818 }
b75a7d8f 819
374ca955
A
820 initMsg(pname);
821 u_wmsg(stderr, "problemCvtToU",
822 UnicodeString(pos, length, "").getTerminatedBuffer(),
823 str.getTerminatedBuffer(),
824 u_wmsg_errorName(err));
b75a7d8f 825
374ca955
A
826 willexit = TRUE;
827 err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
828 }
b75a7d8f 829
374ca955
A
830 // Replaced a check for whether the input was consumed by
831 // looping until it is; message key "premEndInput" now obsolete.
b75a7d8f 832
374ca955
A
833 if (ulen == 0) {
834 continue;
835 }
b75a7d8f 836
374ca955
A
837 // remove a U+FEFF Unicode signature character if requested
838 if (sig < 0) {
839 if (u.charAt(0) == uSig) {
840 u.remove(0, 1);
b75a7d8f 841
374ca955
A
842 // account for the removed UChar and offset
843 --ulen;
b75a7d8f 844
374ca955
A
845 if (useOffsets) {
846 // remove an offset from fromoffsets[] as well
847 // to keep the array parallel with the UChars
848 memmove(fromoffsets, fromoffsets + 1, ulen * 4);
849 }
b75a7d8f 850
374ca955
A
851 }
852 sig = 0;
853 }
b75a7d8f 854
374ca955
A
855#if !UCONFIG_NO_TRANSLITERATION
856 // Transliterate/transform if needed.
857
858 // For transformation, we use chunking code -
859 // collect Unicode input until, for example, an end-of-line,
860 // then transform and output-convert that and continue collecting.
861 // This makes the transformation result independent of the buffer size
862 // while avoiding the slower keyboard mode.
863 // The end-of-chunk characters are completely included in the
864 // transformed string in case they are to be transformed themselves.
865 if (t != NULL) {
866 UnicodeString out;
867 int32_t chunkLimit;
868
869 do {
870 chunkLimit = getChunkLimit(chunk, u);
871 if (chunkLimit < 0 && flush && fromSawEndOfBytes) {
872 // use all of the rest at the end of the text
873 chunkLimit = u.length();
874 }
875 if (chunkLimit >= 0) {
876 // complete the chunk and transform it
877 chunk.append(u, 0, chunkLimit);
878 u.remove(0, chunkLimit);
879 t->transliterate(chunk);
880
881 // append the transformation result to the result and empty the chunk
882 out.append(chunk);
883 chunk.remove();
884 } else {
885 // continue collecting the chunk
886 chunk.append(u);
887 break;
888 }
889 } while (!u.isEmpty());
b75a7d8f 890
374ca955
A
891 u = out;
892 ulen = u.length();
893 }
894#endif
b75a7d8f 895
374ca955
A
896 // add a U+FEFF Unicode signature character if requested
897 // and possible/necessary
898 if (sig > 0) {
899 if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) {
900 u.insert(0, (UChar)uSig);
901
902 if (useOffsets) {
903 // insert a pseudo-offset into fromoffsets[] as well
904 // to keep the array parallel with the UChars
905 memmove(fromoffsets + 1, fromoffsets, ulen * 4);
906 fromoffsets[0] = -1;
907 }
b75a7d8f 908
374ca955
A
909 // account for the additional UChar and offset
910 ++ulen;
b75a7d8f 911 }
374ca955 912 sig = 0;
b75a7d8f
A
913 }
914
374ca955
A
915 // Convert the Unicode buffer into the destination codepage
916 // Again 'bufp' will be placed behind the last converted character
917 // And 'unibufp' will be placed behind the last converted unicode character
918 // At the last conversion flush should be set to true to ensure that
919 // all characters left get converted
920
921 unibuf = unibufbp = u.getBuffer();
922
923 do {
924 bufp = outbuf;
925
926 // Use fromSawEndOfBytes in addition to the flush flag -
927 // it indicates whether the intermediate Unicode string
928 // contains the very last UChars for the very last input bytes.
929 ucnv_fromUnicode(convto, &bufp, outbuf + bufsz,
930 &unibufbp,
931 unibuf + ulen,
932 NULL, (UBool)(flush && fromSawEndOfBytes), &err);
933
934 // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done
935 // converting all of the intermediate UChars.
936 // See comment for fromSawEndOfBytes.
937 toSawEndOfUnicode = (UBool)U_SUCCESS(err);
938
939 if (err == U_BUFFER_OVERFLOW_ERROR) {
940 err = U_ZERO_ERROR;
941 } else if (U_FAILURE(err)) {
942 UChar errorUChars[4];
943 const char *errtag;
944 char pos[32];
945 UChar32 c;
946 int8_t i, length, errorLength;
947
948 UErrorCode localError = U_ZERO_ERROR;
2ca993e8 949 errorLength = UPRV_LENGTHOF(errorUChars);
374ca955
A
950 ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError);
951 if (U_FAILURE(localError) || errorLength == 0) {
952 // need at least 1 so that we don't access beyond the length of fromoffsets[]
953 errorLength = 1;
954 }
b75a7d8f 955
374ca955 956 int32_t ferroffset;
b75a7d8f 957
374ca955
A
958 if (useOffsets) {
959 // Unicode buffer offset of the start of the error UChars
960 ferroffset = (int32_t)((unibufbp - unibuf) - errorLength);
961 if (ferroffset < 0) {
962 // approximation - the character started in the previous Unicode buffer
963 ferroffset = 0;
964 }
b75a7d8f 965
374ca955
A
966 // get the corresponding byte offset out of fromoffsets[]
967 // go back if the offset is not known for some of the UChars
968 int32_t fromoffset;
969 do {
970 fromoffset = fromoffsets[ferroffset];
971 } while (fromoffset < 0 && --ferroffset >= 0);
972
973 // total input file offset =
974 // input file offset of the current byte buffer +
975 // byte buffer offset of where the current Unicode buffer is converted from +
976 // fromoffsets[Unicode offset]
977 ferroffset = infoffset + (prevbufp - buf) + fromoffset;
978 errtag = "problemCvtFromU";
979 } else {
980 // Do not use fromoffsets if (t != NULL) because the Unicode text may
981 // be different from what the offsets refer to.
982
983 // output file offset
984 ferroffset = (int32_t)(outfoffset + (bufp - outbuf));
985 errtag = "problemCvtFromUOut";
986 }
b75a7d8f 987
374ca955
A
988 length = (int8_t)sprintf(pos, "%u", (int)ferroffset);
989
990 // output the code points that caused the error
991 UnicodeString str;
992 for (i = 0; i < errorLength;) {
993 if (i > 0) {
994 str.append((UChar)uSP);
995 }
996 U16_NEXT(errorUChars, i, errorLength, c);
997 if (c >= 0x100000) {
998 str.append(nibbleToHex((uint8_t)(c >> 20)));
999 }
1000 if (c >= 0x10000) {
1001 str.append(nibbleToHex((uint8_t)(c >> 16)));
1002 }
1003 str.append(nibbleToHex((uint8_t)(c >> 12)));
1004 str.append(nibbleToHex((uint8_t)(c >> 8)));
1005 str.append(nibbleToHex((uint8_t)(c >> 4)));
1006 str.append(nibbleToHex((uint8_t)c));
1007 }
1008
1009 initMsg(pname);
1010 u_wmsg(stderr, errtag,
1011 UnicodeString(pos, length, "").getTerminatedBuffer(),
1012 str.getTerminatedBuffer(),
1013 u_wmsg_errorName(err));
1014 u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer());
1015
1016 willexit = TRUE;
1017 err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
1018 }
1019
1020 // Replaced a check for whether the intermediate Unicode characters were all consumed by
1021 // looping until they are; message key "premEnd" now obsolete.
1022
1023 // Finally, write the converted buffer to the output file
1024 size_t outlen = (size_t) (bufp - outbuf);
1025 outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile));
1026 if (wr != outlen) {
1027 UnicodeString str(strerror(errno));
1028 initMsg(pname);
1029 u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer());
1030 willexit = TRUE;
1031 }
1032
1033 if (willexit) {
1034 goto error_exit;
1035 }
1036 } while (!toSawEndOfUnicode);
1037 } while (!fromSawEndOfBytes);
b75a7d8f
A
1038 } while (!flush); // Stop when we have flushed the
1039 // converters (this means that it's
1040 // the end of output)
1041
1042 goto normal_exit;
1043
1044error_exit:
1045 ret = FALSE;
1046
1047normal_exit:
1048 // Cleanup.
1049
374ca955
A
1050 ucnv_close(convfrom);
1051 ucnv_close(convto);
b75a7d8f
A
1052
1053#if !UCONFIG_NO_TRANSLITERATION
374ca955 1054 delete t;
b75a7d8f
A
1055#endif
1056
51004dcb 1057 if (closeFile) {
b75a7d8f
A
1058 fclose(infile);
1059 }
1060
1061 return ret;
1062}
1063
1064static void usage(const char *pname, int ecode) {
1065 const UChar *msg;
1066 int32_t msgLen;
1067 UErrorCode err = U_ZERO_ERROR;
1068 FILE *fp = ecode ? stderr : stdout;
1069 int res;
1070
1071 initMsg(pname);
1072 msg =
1073 ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord",
1074 &msgLen, &err);
1075 UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1));
1076 UnicodeString mname(msg, msgLen + 1);
1077
1078 res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer());
1079 if (!ecode) {
1080 if (!res) {
1081 fputc('\n', fp);
1082 }
1083 if (!u_wmsg(fp, "help")) {
1084 /* Now dump callbacks and finish. */
1085
1086 int i, count =
2ca993e8 1087 UPRV_LENGTHOF(transcode_callbacks);
b75a7d8f
A
1088 for (i = 0; i < count; ++i) {
1089 fprintf(fp, " %s", transcode_callbacks[i].name);
1090 }
1091 fputc('\n', fp);
1092 }
1093 }
1094
1095 exit(ecode);
1096}
1097
374ca955
A
1098extern int
1099main(int argc, char **argv)
b75a7d8f
A
1100{
1101 FILE *outfile;
1102 int ret = 0;
b75a7d8f
A
1103
1104 size_t bufsz = DEFAULT_BUFSZ;
1105
1106 const char *fromcpage = 0;
1107 const char *tocpage = 0;
1108 const char *translit = 0;
1109 const char *outfilestr = 0;
374ca955 1110 UBool fallback = FALSE;
b75a7d8f
A
1111
1112 UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
1113 const void *fromuctxt = 0;
1114 UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
1115 const void *touctxt = 0;
1116
374ca955 1117 char **iter, **remainArgv, **remainArgvLimit;
b75a7d8f
A
1118 char **end = argv + argc;
1119
1120 const char *pname;
1121
374ca955 1122 UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE;
b75a7d8f 1123 const char *printName = 0;
b75a7d8f 1124
374ca955
A
1125 UBool verbose = FALSE;
1126 UErrorCode status = U_ZERO_ERROR;
1127
1128 ConvertFile cf;
1129
1130 /* Initialize ICU */
1131 u_init(&status);
1132 if (U_FAILURE(status)) {
1133 fprintf(stderr, "%s: can not initialize ICU. status = %s\n",
1134 argv[0], u_errorName(status));
1135 exit(1);
1136 }
b75a7d8f
A
1137
1138 // Get and prettify pname.
1139 pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
4388f060 1140#if U_PLATFORM_USES_ONLY_WIN32_API
b75a7d8f
A
1141 if (!pname) {
1142 pname = uprv_strrchr(*argv, '/');
1143 }
1144#endif
1145 if (!pname) {
1146 pname = *argv;
1147 } else {
1148 ++pname;
1149 }
1150
1151 // First, get the arguments from command-line
1152 // to know the codepages to convert between
1153
374ca955 1154 remainArgv = remainArgvLimit = argv + 1;
b75a7d8f
A
1155 for (iter = argv + 1; iter != end; iter++) {
1156 // Check for from charset
1157 if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
1158 iter++;
1159 if (iter != end)
1160 fromcpage = *iter;
1161 else
1162 usage(pname, 1);
1163 } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
1164 iter++;
1165 if (iter != end)
1166 tocpage = *iter;
1167 else
1168 usage(pname, 1);
1169 } else if (strcmp("-x", *iter) == 0) {
1170 iter++;
1171 if (iter != end)
1172 translit = *iter;
1173 else
1174 usage(pname, 1);
1175 } else if (!strcmp("--fallback", *iter)) {
374ca955 1176 fallback = TRUE;
b75a7d8f 1177 } else if (!strcmp("--no-fallback", *iter)) {
374ca955 1178 fallback = FALSE;
b75a7d8f
A
1179 } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
1180 iter++;
1181 if (iter != end) {
1182 bufsz = atoi(*iter);
1183 if ((int) bufsz <= 0) {
1184 initMsg(pname);
1185 UnicodeString str(*iter);
1186 initMsg(pname);
374ca955 1187 u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer());
b75a7d8f
A
1188 return 3;
1189 }
1190 } else {
1191 usage(pname, 1);
1192 }
1193 } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
1194 if (printTranslits) {
1195 usage(pname, 1);
1196 }
374ca955 1197 printConvs = TRUE;
b75a7d8f
A
1198 } else if (strcmp("--default-code", *iter) == 0) {
1199 if (printTranslits) {
1200 usage(pname, 1);
1201 }
1202 printName = ucnv_getDefaultName();
1203 } else if (strcmp("--list-code", *iter) == 0) {
1204 if (printTranslits) {
1205 usage(pname, 1);
1206 }
1207
1208 iter++;
1209 if (iter != end) {
1210 UErrorCode e = U_ZERO_ERROR;
1211 printName = ucnv_getAlias(*iter, 0, &e);
1212 if (U_FAILURE(e) || !printName) {
1213 UnicodeString str(*iter);
1214 initMsg(pname);
374ca955 1215 u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer());
b75a7d8f
A
1216 return 2;
1217 }
1218 } else
1219 usage(pname, 1);
1220 } else if (strcmp("--canon", *iter) == 0) {
374ca955 1221 printCanon = TRUE;
b75a7d8f
A
1222 } else if (strcmp("-L", *iter) == 0
1223 || !strcmp("--list-transliterators", *iter)) {
1224 if (printConvs) {
1225 usage(pname, 1);
1226 }
374ca955 1227 printTranslits = TRUE;
b75a7d8f
A
1228 } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
1229 || !strcmp("--help", *iter)) {
1230 usage(pname, 0);
1231 } else if (!strcmp("-c", *iter)) {
1232 fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
1233 } else if (!strcmp("--to-callback", *iter)) {
1234 iter++;
1235 if (iter != end) {
1236 const struct callback_ent *cbe = findCallback(*iter);
1237 if (cbe) {
1238 fromucallback = cbe->fromu;
1239 fromuctxt = cbe->fromuctxt;
1240 } else {
1241 UnicodeString str(*iter);
1242 initMsg(pname);
374ca955 1243 u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
b75a7d8f
A
1244 return 4;
1245 }
1246 } else {
1247 usage(pname, 1);
1248 }
1249 } else if (!strcmp("--from-callback", *iter)) {
1250 iter++;
1251 if (iter != end) {
1252 const struct callback_ent *cbe = findCallback(*iter);
1253 if (cbe) {
1254 toucallback = cbe->tou;
1255 touctxt = cbe->touctxt;
1256 } else {
1257 UnicodeString str(*iter);
1258 initMsg(pname);
374ca955 1259 u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
b75a7d8f
A
1260 return 4;
1261 }
1262 } else {
1263 usage(pname, 1);
1264 }
1265 } else if (!strcmp("-i", *iter)) {
1266 toucallback = UCNV_TO_U_CALLBACK_SKIP;
1267 } else if (!strcmp("--callback", *iter)) {
1268 iter++;
1269 if (iter != end) {
1270 const struct callback_ent *cbe = findCallback(*iter);
1271 if (cbe) {
1272 fromucallback = cbe->fromu;
1273 fromuctxt = cbe->fromuctxt;
1274 toucallback = cbe->tou;
1275 touctxt = cbe->touctxt;
1276 } else {
1277 UnicodeString str(*iter);
1278 initMsg(pname);
374ca955 1279 u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
b75a7d8f
A
1280 return 4;
1281 }
1282 } else {
1283 usage(pname, 1);
1284 }
1285 } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
374ca955 1286 verbose = FALSE;
b75a7d8f 1287 } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
374ca955 1288 verbose = TRUE;
b75a7d8f 1289 } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
374ca955 1290 printf("%s v2.1 ICU " U_ICU_VERSION "\n", pname);
b75a7d8f
A
1291 return 0;
1292 } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
1293 ++iter;
1294 if (iter != end && !outfilestr) {
1295 outfilestr = *iter;
1296 } else {
1297 usage(pname, 1);
1298 }
374ca955
A
1299 } else if (0 == strcmp("--add-signature", *iter)) {
1300 cf.signature = 1;
1301 } else if (0 == strcmp("--remove-signature", *iter)) {
1302 cf.signature = -1;
b75a7d8f
A
1303 } else if (**iter == '-' && (*iter)[1]) {
1304 usage(pname, 1);
374ca955
A
1305 } else {
1306 // move a non-option up in argv[]
1307 *remainArgvLimit++ = *iter;
b75a7d8f
A
1308 }
1309 }
1310
1311 if (printConvs || printName) {
1312 return printConverters(pname, printName, printCanon) ? 2 : 0;
1313 } else if (printTranslits) {
1314 return printTransliterators(printCanon) ? 3 : 0;
1315 }
1316
1317 if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
1318 fromcpage = ucnv_getDefaultName();
1319 }
1320 if (!tocpage || !uprv_strcmp(tocpage, "-")) {
1321 tocpage = ucnv_getDefaultName();
1322 }
1323
1324 // Open the correct output file or connect to stdout for reading input
1325 if (outfilestr != 0 && strcmp(outfilestr, "-")) {
1326 outfile = fopen(outfilestr, "wb");
1327 if (outfile == 0) {
1328 UnicodeString str1(outfilestr, "");
1329 UnicodeString str2(strerror(errno), "");
1330 initMsg(pname);
1331 u_wmsg(stderr, "cantCreateOutputF",
1332 str1.getBuffer(), str2.getBuffer());
1333 return 1;
1334 }
1335 } else {
1336 outfilestr = "-";
1337 outfile = stdout;
374ca955 1338#ifdef USE_FILENO_BINARY_MODE
b75a7d8f
A
1339 if (setmode(fileno(outfile), O_BINARY) == -1) {
1340 u_wmsg(stderr, "cantSetOutBinMode");
1341 exit(-1);
1342 }
1343#endif
1344 }
1345
1346 /* Loop again on the arguments to find all the input files, and
374ca955 1347 convert them. */
b75a7d8f 1348
374ca955
A
1349 cf.setBufferSize(bufsz);
1350
1351 if(remainArgv < remainArgvLimit) {
1352 for (iter = remainArgv; iter != remainArgvLimit; iter++) {
1353 if (!cf.convertFile(
1354 pname, fromcpage, toucallback, touctxt, tocpage,
1355 fromucallback, fromuctxt, fallback, translit, *iter,
1356 outfile, verbose)
1357 ) {
b75a7d8f
A
1358 goto error_exit;
1359 }
1360 }
374ca955
A
1361 } else {
1362 if (!cf.convertFile(
1363 pname, fromcpage, toucallback, touctxt, tocpage,
1364 fromucallback, fromuctxt, fallback, translit, 0,
1365 outfile, verbose)
1366 ) {
b75a7d8f
A
1367 goto error_exit;
1368 }
1369 }
1370
1371 goto normal_exit;
1372error_exit:
729e4ab9 1373#if !UCONFIG_NO_LEGACY_CONVERSION
b75a7d8f 1374 ret = 1;
729e4ab9
A
1375#else
1376 fprintf(stderr, "uconv error: UCONFIG_NO_LEGACY_CONVERSION is on. See uconfig.h\n");
1377#endif
b75a7d8f
A
1378normal_exit:
1379
374ca955 1380 if (outfile != stdout) {
b75a7d8f 1381 fclose(outfile);
374ca955 1382 }
b75a7d8f 1383
51004dcb
A
1384 u_cleanup();
1385
b75a7d8f
A
1386 return ret;
1387}
1388
1389
1390/*
1391 * Hey, Emacs, please set the following:
1392 *
1393 * Local Variables:
1394 * indent-tabs-mode: nil
1395 * End:
1396 *
1397 */