]> git.saurik.com Git - apple/icu.git/blame - icuSources/extra/uconv/uconv.cpp
ICU-8.11.tar.gz
[apple/icu.git] / icuSources / extra / uconv / uconv.cpp
CommitLineData
b75a7d8f
A
1/*****************************************************************************
2*
73c04bcf 3* Copyright (C) 1999-2006, International Business Machines
b75a7d8f
A
4* Corporation and others. All Rights Reserved.
5*
6******************************************************************************/
7
8/*
9 * uconv(1): an iconv(1)-like converter using ICU.
10 *
73c04bcf 11 * Original code by Jonas Utterstr&#x00F6;m <jonas.utterstrom@vittran.norrnod.se>
b75a7d8f
A
12 * contributed in 1999.
13 *
14 * Conversion to the C conversion API and many improvements by
15 * Yves Arrouye <yves@realnames.com>, current maintainer.
16 *
374ca955
A
17 * Markus Scherer maintainer from 2003.
18 * See source code repository history for changes.
b75a7d8f
A
19 */
20
21#include <unicode/utypes.h>
374ca955 22#include <unicode/putil.h>
b75a7d8f 23#include <unicode/ucnv.h>
374ca955 24#include <unicode/uenum.h>
b75a7d8f
A
25#include <unicode/unistr.h>
26#include <unicode/translit.h>
374ca955
A
27#include <unicode/uset.h>
28#include <unicode/uclean.h>
b75a7d8f
A
29
30#include <stdio.h>
31#include <errno.h>
32#include <string.h>
33#include <stdlib.h>
34
35#include "cmemory.h"
36#include "cstring.h"
37#include "ustrfmt.h"
38
39#include "unicode/uwmsg.h"
40
73c04bcf 41#if (defined(U_WINDOWS) || defined(U_CYGWIN)) && !defined(__STRICT_ANSI__)
b75a7d8f
A
42#include <io.h>
43#include <fcntl.h>
73c04bcf 44#if defined(U_WINDOWS)
374ca955 45#define USE_FILENO_BINARY_MODE 1
73c04bcf
A
46/* Windows likes to rename Unix-like functions */
47#ifndef fileno
48#define fileno _fileno
49#endif
50#ifndef setmode
51#define setmode _setmode
52#endif
53#ifndef O_BINARY
54#define O_BINARY _O_BINARY
55#endif
56#endif
b75a7d8f
A
57#endif
58
59#ifdef UCONVMSG_LINK
60/* below from the README */
61#include "unicode/utypes.h"
62#include "unicode/udata.h"
63U_CFUNC char uconvmsg_dat[];
64#endif
65
374ca955
A
66#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
67
b75a7d8f
A
68#define DEFAULT_BUFSZ 4096
69#define UCONVMSG "uconvmsg"
70
71static UResourceBundle *gBundle = 0; /* Bundle containing messages. */
72
73/*
74 * Initialize the message bundle so that message strings can be fetched
75 * by u_wmsg().
76 *
77 */
78
79static void initMsg(const char *pname) {
80 static int ps = 0;
81
82 if (!ps) {
83 char dataPath[2048]; /* XXX Sloppy: should be PATH_MAX. */
84 UErrorCode err = U_ZERO_ERROR;
85
86 ps = 1;
87
88 /* Set up our static data - if any */
89#ifdef UCONVMSG_LINK
90 udata_setAppData(UCONVMSG, (const void*) uconvmsg_dat, &err);
91 if (U_FAILURE(err)) {
92 fprintf(stderr, "%s: warning, problem installing our static resource bundle data uconvmsg: %s - trying anyways.\n",
93 pname, u_errorName(err));
94 err = U_ZERO_ERROR; /* It may still fail */
95 }
96#endif
97
98 /* Get messages. */
99 gBundle = u_wmsg_setPath(UCONVMSG, &err);
100 if (U_FAILURE(err)) {
101 fprintf(stderr,
102 "%s: warning: couldn't open bundle %s: %s\n",
103 pname, UCONVMSG, u_errorName(err));
104#ifdef UCONVMSG_LINK
105 fprintf(stderr,
106 "%s: setAppData was called, internal data %s failed to load\n",
107 pname, UCONVMSG);
108#endif
109
110 err = U_ZERO_ERROR;
111 /* that was try #1, try again with a path */
112 uprv_strcpy(dataPath, u_getDataDirectory());
113 uprv_strcat(dataPath, U_FILE_SEP_STRING);
114 uprv_strcat(dataPath, UCONVMSG);
115
116 gBundle = u_wmsg_setPath(dataPath, &err);
117 if (U_FAILURE(err)) {
118 fprintf(stderr,
119 "%s: warning: still couldn't open bundle %s: %s\n",
120 pname, dataPath, u_errorName(err));
121 fprintf(stderr, "%s: warning: messages will not be displayed\n", pname);
122 }
123 }
124 }
125}
126
127/* Mapping of callback names to the callbacks passed to the converter
128 API. */
129
130static struct callback_ent {
131 const char *name;
132 UConverterFromUCallback fromu;
133 const void *fromuctxt;
134 UConverterToUCallback tou;
135 const void *touctxt;
136} transcode_callbacks[] = {
137 { "substitute",
138 UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0,
139 UCNV_TO_U_CALLBACK_SUBSTITUTE, 0 },
140 { "skip",
141 UCNV_FROM_U_CALLBACK_SKIP, 0,
142 UCNV_TO_U_CALLBACK_SKIP, 0 },
143 { "stop",
144 UCNV_FROM_U_CALLBACK_STOP, 0,
145 UCNV_TO_U_CALLBACK_STOP, 0 },
146 { "escape",
147 UCNV_FROM_U_CALLBACK_ESCAPE, 0,
148 UCNV_TO_U_CALLBACK_ESCAPE, 0},
149 { "escape-icu",
150 UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU,
151 UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_ICU },
152 { "escape-java",
153 UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA,
154 UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_JAVA },
155 { "escape-c",
156 UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C,
157 UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C },
158 { "escape-xml",
159 UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
160 UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
161 { "escape-xml-hex",
162 UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX,
163 UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_HEX },
164 { "escape-xml-dec",
165 UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
166 UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC },
167 { "escape-unicode", UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE,
168 UCNV_TO_U_CALLBACK_ESCAPE, UCNV_ESCAPE_UNICODE }
169};
170
171/* Return a pointer to a callback record given its name. */
172
173static const struct callback_ent *findCallback(const char *name) {
174 int i, count =
175 sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
176
177 /* We'll do a linear search, there aren't many of them and bsearch()
178 may not be that portable. */
179
180 for (i = 0; i < count; ++i) {
181 if (!uprv_stricmp(name, transcode_callbacks[i].name)) {
182 return &transcode_callbacks[i];
183 }
184 }
185
186 return 0;
187}
188
189/* Print converter information. If lookfor is set, only that converter will
190 be printed, otherwise all converters will be printed. If canon is non
191 zero, tags and aliases for each converter are printed too, in the format
192 expected for convrters.txt(5). */
193
194static int printConverters(const char *pname, const char *lookfor,
374ca955 195 UBool canon)
b75a7d8f
A
196{
197 UErrorCode err = U_ZERO_ERROR;
198 int32_t num;
199 uint16_t num_stds;
200 const char **stds;
201
202 /* If there is a specified name, just handle that now. */
203
204 if (lookfor) {
205 if (!canon) {
206 printf("%s\n", lookfor);
207 return 0;
208 } else {
209 /* Because we are printing a canonical name, we need the
210 true converter name. We've done that already except for
211 the default name (because we want to print the exact
212 name one would get when calling ucnv_getDefaultName()
213 in non-canon mode). But since we do not know at this
214 point if we have the default name or something else, we
215 need to normalize again to the canonical converter
216 name. */
217
218 const char *truename = ucnv_getAlias(lookfor, 0, &err);
219 if (U_SUCCESS(err)) {
220 lookfor = truename;
221 } else {
222 err = U_ZERO_ERROR;
223 }
224 }
225 }
226
227 /* Print converter names. We come here for one of two reasons: we
228 are printing all the names (lookfor was null), or we have a
229 single converter to print but in canon mode, hence we need to
230 get to it in order to print everything. */
231
232 num = ucnv_countAvailable();
233 if (num <= 0) {
234 initMsg(pname);
235 u_wmsg(stderr, "cantGetNames");
236 return -1;
237 }
238 if (lookfor) {
239 num = 1; /* We know where we want to be. */
240 }
241
242 num_stds = ucnv_countStandards();
243 stds = (const char **) uprv_malloc(num_stds * sizeof(*stds));
244 if (!stds) {
245 u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(U_MEMORY_ALLOCATION_ERROR));
246 return -1;
247 } else {
248 uint16_t s;
249
374ca955
A
250 if (canon) {
251 printf("{ ");
252 }
b75a7d8f
A
253 for (s = 0; s < num_stds; ++s) {
254 stds[s] = ucnv_getStandard(s, &err);
374ca955
A
255 if (canon) {
256 printf("%s ", stds[s]);
257 }
b75a7d8f
A
258 if (U_FAILURE(err)) {
259 u_wmsg(stderr, "cantGetTag", u_wmsg_errorName(err));
260 return -1;
261 }
262 }
374ca955
A
263 if (canon) {
264 puts("}");
265 }
b75a7d8f
A
266 }
267
268 for (int32_t i = 0; i < num; i++) {
269 const char *name;
270 uint16_t num_aliases;
271
272 /* Set the name either to what we are looking for, or
273 to the current converter name. */
274
275 if (lookfor) {
276 name = lookfor;
277 } else {
278 name = ucnv_getAvailableName(i);
279 }
280
281 /* Get all the aliases associated to the name. */
282
283 err = U_ZERO_ERROR;
284 num_aliases = ucnv_countAliases(name, &err);
285 if (U_FAILURE(err)) {
286 printf("%s", name);
287
374ca955 288 UnicodeString str(name, "");
b75a7d8f 289 putchar('\t');
374ca955 290 u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
b75a7d8f
A
291 u_wmsg_errorName(err));
292 return -1;
293 } else {
294 uint16_t a, s, t;
295
296 /* Write all the aliases and their tags. */
297
298 for (a = 0; a < num_aliases; ++a) {
299 const char *alias = ucnv_getAlias(name, a, &err);
300
301 if (U_FAILURE(err)) {
374ca955 302 UnicodeString str(name, "");
b75a7d8f 303 putchar('\t');
374ca955 304 u_wmsg(stderr, "cantGetAliases", str.getTerminatedBuffer(),
b75a7d8f
A
305 u_wmsg_errorName(err));
306 return -1;
307 }
308
374ca955
A
309 /* Print the current alias so that it looks right. */
310 printf("%s%s%s", (canon ? (a == 0? "" : "\t" ) : "") ,
311 alias,
312 (canon ? "" : " "));
b75a7d8f
A
313
314 /* Look (slowly, linear searching) for a tag. */
315
316 if (canon) {
374ca955
A
317 /* -1 to skip the last standard */
318 for (s = t = 0; s < num_stds-1; ++s) {
319 UEnumeration *nameEnum = ucnv_openStandardNames(name, stds[s], &err);
320 if (U_SUCCESS(err)) {
321 /* List the standard tags */
322 const char *standardName;
323 UBool isFirst = TRUE;
324 UErrorCode enumError = U_ZERO_ERROR;
325 while ((standardName = uenum_next(nameEnum, NULL, &enumError))) {
326 /* See if this alias is supported by this standard. */
327 if (!strcmp(standardName, alias)) {
328 if (!t) {
329 printf(" {");
330 t = 1;
331 }
332 /* Print a * after the default standard name */
333 printf(" %s%s", stds[s], (isFirst ? "*" : ""));
b75a7d8f 334 }
374ca955 335 isFirst = FALSE;
b75a7d8f
A
336 }
337 }
338 }
339 if (t) {
340 printf(" }");
341 }
342 }
374ca955
A
343 /* Terminate this entry. */
344 if (canon) {
345 puts("");
346 }
b75a7d8f
A
347
348 /* Move on. */
b75a7d8f 349 }
374ca955
A
350 /* Terminate this entry. */
351 if (!canon) {
352 puts("");
353 }
b75a7d8f
A
354 }
355 }
356
357 /* Free temporary data. */
358
359 uprv_free(stds);
360
361 /* Success. */
362
363 return 0;
364}
365
366/* Print all available transliterators. If canon is non zero, print
367 one transliterator per line. */
368
374ca955 369static int printTransliterators(UBool canon)
b75a7d8f
A
370{
371#if UCONFIG_NO_TRANSLITERATION
372 printf("no transliterators available because of UCONFIG_NO_TRANSLITERATION, see uconfig.h\n");
373 return 1;
374#else
375 int32_t numtrans = utrans_countAvailableIDs(), i;
376 int buflen = 512;
377 char *buf = (char *) uprv_malloc(buflen);
378 char staticbuf[512];
379
380 char sepchar = canon ? '\n' : ' ';
381
382 if (!buf) {
383 buf = staticbuf;
384 buflen = sizeof(staticbuf);
385 }
386
387 for (i = 0; i < numtrans; ++i) {
388 int32_t len = utrans_getAvailableID(i, buf, buflen);
389 if (len >= buflen - 1) {
390 if (buf != staticbuf) {
391 buflen <<= 1;
392 if (buflen < len) {
393 buflen = len + 64;
394 }
395 buf = (char *) uprv_realloc(buf, buflen);
396 if (!buf) {
397 buf = staticbuf;
398 buflen = sizeof(staticbuf);
399 }
400 }
401 utrans_getAvailableID(i, buf, buflen);
402 if (len >= buflen) {
403 uprv_strcpy(buf + buflen - 4, "..."); /* Truncate the name. */
404 }
405 }
406
407 printf("%s", buf);
408 if (i < numtrans - 1) {
409 putchar(sepchar);
410 }
411 }
412
413 /* Add a terminating newline if needed. */
414
415 if (sepchar != '\n') {
416 putchar('\n');
417 }
418
419 /* Free temporary data. */
420
421 if (buf != staticbuf) {
422 uprv_free(buf);
423 }
424
425 /* Success. */
426
427 return 0;
428#endif
429}
430
374ca955
A
431enum {
432 uSP = 0x20, // space
433 uCR = 0xd, // carriage return
434 uLF = 0xa, // line feed
435 uNL = 0x85, // newline
436 uLS = 0x2028, // line separator
437 uPS = 0x2029, // paragraph separator
438 uSig = 0xfeff // signature/BOM character
439};
b75a7d8f 440
374ca955
A
441static inline int32_t
442getChunkLimit(const UnicodeString &prev, const UnicodeString &s) {
443 // find one of
444 // CR, LF, CRLF, NL, LS, PS
445 // for paragraph ends (see UAX #13/Unicode 4)
446 // and include it in the chunk
447 // all of these characters are on the BMP
448 // do not include FF or VT in case they are part of a paragraph
449 // (important for bidi contexts)
450 static const UChar paraEnds[] = {
451 0xd, 0xa, 0x85, 0x2028, 0x2029
452 };
453 enum {
454 iCR, iLF, iNL, iLS, iPS, iCount
455 };
456
457 // first, see if there is a CRLF split between prev and s
458 if (prev.endsWith(paraEnds + iCR, 1)) {
459 if (s.startsWith(paraEnds + iLF, 1)) {
460 return 1; // split CRLF, include the LF
461 } else if (!s.isEmpty()) {
462 return 0; // complete the last chunk
463 } else {
464 return -1; // wait for actual further contents to arrive
b75a7d8f
A
465 }
466 }
467
374ca955
A
468 const UChar *u = s.getBuffer(), *limit = u + s.length();
469 UChar c;
470
471 while (u < limit) {
472 c = *u++;
473 if (
474 ((c < uSP) && (c == uCR || c == uLF)) ||
475 (c == uNL) ||
476 ((c & uLS) == uLS)
477 ) {
478 if (c == uCR) {
479 // check for CRLF
480 if (u == limit) {
481 return -1; // LF may be in the next chunk
482 } else if (*u == uLF) {
483 ++u; // include the LF in this chunk
484 }
485 }
486 return (int32_t)(u - s.getBuffer());
487 }
488 }
489
490 return -1; // continue collecting the chunk
491}
492
493enum {
494 CNV_NO_FEFF, // cannot convert the U+FEFF Unicode signature character (BOM)
495 CNV_WITH_FEFF, // can convert the U+FEFF signature character
496 CNV_ADDS_FEFF // automatically adds/detects the U+FEFF signature character
497};
498
499static inline UChar
500nibbleToHex(uint8_t n) {
501 n &= 0xf;
502 return
503 n <= 9 ?
504 (UChar)(0x30 + n) :
505 (UChar)((0x61 - 10) + n);
506}
507
508// check the converter's Unicode signature properties;
509// the fromUnicode side of the converter must be in its initial state
510// and will be reset again if it was used
511static int32_t
512cnvSigType(UConverter *cnv) {
513 UErrorCode err;
514 int32_t result;
515
516 // test if the output charset can convert U+FEFF
517 USet *set = uset_open(1, 0);
518 err = U_ZERO_ERROR;
519 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &err);
520 if (U_SUCCESS(err) && uset_contains(set, uSig)) {
521 result = CNV_WITH_FEFF;
522 } else {
523 result = CNV_NO_FEFF; // an error occurred or U+FEFF cannot be converted
524 }
525 uset_close(set);
526
527 if (result == CNV_WITH_FEFF) {
528 // test if the output charset emits a signature anyway
529 const UChar a[1] = { 0x61 }; // "a"
530 const UChar *in;
531
532 char buffer[20];
533 char *out;
534
535 in = a;
536 out = buffer;
537 err = U_ZERO_ERROR;
538 ucnv_fromUnicode(cnv,
539 &out, buffer + sizeof(buffer),
540 &in, a + 1,
541 NULL, TRUE, &err);
542 ucnv_resetFromUnicode(cnv);
543
544 if (NULL != ucnv_detectUnicodeSignature(buffer, (int32_t)(out - buffer), NULL, &err) &&
545 U_SUCCESS(err)
546 ) {
547 result = CNV_ADDS_FEFF;
548 }
549 }
550
551 return result;
b75a7d8f
A
552}
553
374ca955
A
554class ConvertFile {
555public:
556 ConvertFile() :
557 buf(NULL), outbuf(NULL), fromoffsets(NULL),
558 bufsz(0), signature(0) {}
559
560 void
561 setBufferSize(size_t bufferSize) {
562 bufsz = bufferSize;
563
564 buf = new char[2 * bufsz];
565 outbuf = buf + bufsz;
566
567 // +1 for an added U+FEFF in the intermediate Unicode buffer
568 fromoffsets = new int32_t[bufsz + 1];
569 }
570
571 ~ConvertFile() {
572 delete [] buf;
573 delete [] fromoffsets;
574 }
575
576 UBool convertFile(const char *pname,
577 const char *fromcpage,
578 UConverterToUCallback toucallback,
579 const void *touctxt,
580 const char *tocpage,
581 UConverterFromUCallback fromucallback,
582 const void *fromuctxt,
583 UBool fallback,
584 const char *translit,
585 const char *infilestr,
586 FILE * outfile, int verbose);
587private:
588 friend int main(int argc, char **argv);
589
590 char *buf, *outbuf;
591 int32_t *fromoffsets;
592
593 size_t bufsz;
594 int8_t signature; // add (1) or remove (-1) a U+FEFF Unicode signature character
595};
596
b75a7d8f 597// Convert a file from one encoding to another
374ca955
A
598UBool
599ConvertFile::convertFile(const char *pname,
b75a7d8f
A
600 const char *fromcpage,
601 UConverterToUCallback toucallback,
602 const void *touctxt,
603 const char *tocpage,
604 UConverterFromUCallback fromucallback,
605 const void *fromuctxt,
374ca955 606 UBool fallback,
b75a7d8f
A
607 const char *translit,
608 const char *infilestr,
609 FILE * outfile, int verbose)
610{
611 FILE *infile;
612 UBool ret = TRUE;
613 UConverter *convfrom = 0;
614 UConverter *convto = 0;
615 UErrorCode err = U_ZERO_ERROR;
616 UBool flush;
374ca955 617 const char *cbufp, *prevbufp;
b75a7d8f 618 char *bufp;
b75a7d8f
A
619
620 uint32_t infoffset = 0, outfoffset = 0; /* Where we are in the file, for error reporting. */
621
374ca955 622 const UChar *unibuf, *unibufbp;
b75a7d8f 623 UChar *unibufp;
b75a7d8f 624
374ca955 625 size_t rd, wr;
b75a7d8f
A
626
627#if !UCONFIG_NO_TRANSLITERATION
628 Transliterator *t = 0; // Transliterator acting on Unicode data.
374ca955 629 UnicodeString chunk; // One chunk of the text being collected for transformation.
b75a7d8f
A
630#endif
631 UnicodeString u; // String to do the transliteration.
374ca955
A
632 int32_t ulen;
633
634 // use conversion offsets for error messages
635 // unless a transliterator is used -
636 // a text transformation will reorder characters in unpredictable ways
637 UBool useOffsets = TRUE;
b75a7d8f
A
638
639 // Open the correct input file or connect to stdin for reading input
640
641 if (infilestr != 0 && strcmp(infilestr, "-")) {
642 infile = fopen(infilestr, "rb");
643 if (infile == 0) {
644 UnicodeString str1(infilestr, "");
645 str1.append((UChar32) 0);
646 UnicodeString str2(strerror(errno), "");
647 str2.append((UChar32) 0);
648 initMsg(pname);
649 u_wmsg(stderr, "cantOpenInputF", str1.getBuffer(), str2.getBuffer());
650 return FALSE;
651 }
652 } else {
653 infilestr = "-";
654 infile = stdin;
374ca955 655#ifdef USE_FILENO_BINARY_MODE
b75a7d8f
A
656 if (setmode(fileno(stdin), O_BINARY) == -1) {
657 initMsg(pname);
658 u_wmsg(stderr, "cantSetInBinMode");
659 return FALSE;
660 }
661#endif
662 }
663
664 if (verbose) {
665 fprintf(stderr, "%s:\n", infilestr);
666 }
667
668#if !UCONFIG_NO_TRANSLITERATION
669 // Create transliterator as needed.
670
671 if (translit != NULL && *translit) {
672 UParseError parse;
673 UnicodeString str(translit), pestr;
674
675 /* Create from rules or by ID as needed. */
676
677 parse.line = -1;
678
679 if (uprv_strchr(translit, ':') || uprv_strchr(translit, '>') || uprv_strchr(translit, '<') || uprv_strchr(translit, '>')) {
680 t = Transliterator::createFromRules("Uconv", str, UTRANS_FORWARD, parse, err);
681 } else {
682 t = Transliterator::createInstance(translit, UTRANS_FORWARD, err);
683 }
684
685 if (U_FAILURE(err)) {
686 str.append((UChar32) 0);
687 initMsg(pname);
688
689 if (parse.line >= 0) {
690 UChar linebuf[20], offsetbuf[20];
691 uprv_itou(linebuf, 20, parse.line, 10, 0);
692 uprv_itou(offsetbuf, 20, parse.offset, 10, 0);
374ca955 693 u_wmsg(stderr, "cantCreateTranslitParseErr", str.getTerminatedBuffer(),
b75a7d8f
A
694 u_wmsg_errorName(err), linebuf, offsetbuf);
695 } else {
374ca955 696 u_wmsg(stderr, "cantCreateTranslit", str.getTerminatedBuffer(),
b75a7d8f
A
697 u_wmsg_errorName(err));
698 }
699
700 if (t) {
701 delete t;
702 t = 0;
703 }
704 goto error_exit;
705 }
374ca955
A
706
707 useOffsets = FALSE;
b75a7d8f
A
708 }
709#endif
710
711 // Create codepage converter. If the codepage or its aliases weren't
712 // available, it returns NULL and a failure code. We also set the
713 // callbacks, and return errors in the same way.
714
715 convfrom = ucnv_open(fromcpage, &err);
716 if (U_FAILURE(err)) {
374ca955 717 UnicodeString str(fromcpage, "");
b75a7d8f 718 initMsg(pname);
374ca955 719 u_wmsg(stderr, "cantOpenFromCodeset", str.getTerminatedBuffer(),
b75a7d8f
A
720 u_wmsg_errorName(err));
721 goto error_exit;
722 }
723 ucnv_setToUCallBack(convfrom, toucallback, touctxt, 0, 0, &err);
724 if (U_FAILURE(err)) {
725 initMsg(pname);
726 u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
727 goto error_exit;
728 }
729
730 convto = ucnv_open(tocpage, &err);
731 if (U_FAILURE(err)) {
374ca955 732 UnicodeString str(tocpage, "");
b75a7d8f 733 initMsg(pname);
374ca955 734 u_wmsg(stderr, "cantOpenToCodeset", str.getTerminatedBuffer(),
b75a7d8f
A
735 u_wmsg_errorName(err));
736 goto error_exit;
737 }
738 ucnv_setFromUCallBack(convto, fromucallback, fromuctxt, 0, 0, &err);
739 if (U_FAILURE(err)) {
740 initMsg(pname);
741 u_wmsg(stderr, "cantSetCallback", u_wmsg_errorName(err));
742 goto error_exit;
743 }
744 ucnv_setFallback(convto, fallback);
745
374ca955
A
746 UBool willexit, fromSawEndOfBytes, toSawEndOfUnicode;
747 int8_t sig;
b75a7d8f
A
748
749 // OK, we can convert now.
374ca955
A
750 sig = signature;
751 rd = 0;
b75a7d8f
A
752
753 do {
374ca955
A
754 willexit = FALSE;
755
756 // input file offset at the beginning of the next buffer
757 infoffset += rd;
b75a7d8f
A
758
759 rd = fread(buf, 1, bufsz, infile);
760 if (ferror(infile) != 0) {
761 UnicodeString str(strerror(errno));
b75a7d8f 762 initMsg(pname);
374ca955 763 u_wmsg(stderr, "cantRead", str.getTerminatedBuffer());
b75a7d8f
A
764 goto error_exit;
765 }
766
374ca955
A
767 // Convert the read buffer into the new encoding via Unicode.
768 // After the call 'unibufp' will be placed behind the last
b75a7d8f 769 // character that was converted in the 'unibuf'.
374ca955 770 // Also the 'cbufp' is positioned behind the last converted
b75a7d8f
A
771 // character.
772 // At the last conversion in the file, flush should be set to
374ca955 773 // true so that we get all characters converted.
b75a7d8f
A
774 //
775 // The converter must be flushed at the end of conversion so
776 // that characters on hold also will be written.
777
b75a7d8f 778 cbufp = buf;
374ca955 779 flush = (UBool)(rd != bufsz);
b75a7d8f 780
374ca955
A
781 // convert until the input is consumed
782 do {
783 // remember the start of the current byte-to-Unicode conversion
784 prevbufp = cbufp;
785
786 unibuf = unibufp = u.getBuffer((int32_t)bufsz);
787
788 // Use bufsz instead of u.getCapacity() for the targetLimit
789 // so that we don't overflow fromoffsets[].
790 ucnv_toUnicode(convfrom, &unibufp, unibuf + bufsz, &cbufp,
791 buf + rd, useOffsets ? fromoffsets : NULL, flush, &err);
792
793 ulen = (int32_t)(unibufp - unibuf);
73c04bcf 794 u.releaseBuffer(U_SUCCESS(err) ? ulen : 0);
374ca955
A
795
796 // fromSawEndOfBytes indicates that ucnv_toUnicode() is done
797 // converting all of the input bytes.
798 // It works like this because ucnv_toUnicode() returns only under the
799 // following conditions:
800 // - an error occurred during conversion (an error code is set)
801 // - the target buffer is filled (the error code indicates an overflow)
802 // - the source is consumed
803 // That is, if the error code does not indicate a failure,
804 // not even an overflow, then the source must be consumed entirely.
805 fromSawEndOfBytes = (UBool)U_SUCCESS(err);
806
807 if (err == U_BUFFER_OVERFLOW_ERROR) {
808 err = U_ZERO_ERROR;
809 } else if (U_FAILURE(err)) {
810 char pos[32], errorBytes[32];
811 int8_t i, length, errorLength;
812
813 UErrorCode localError = U_ZERO_ERROR;
814 errorLength = (int8_t)sizeof(errorBytes);
815 ucnv_getInvalidChars(convfrom, errorBytes, &errorLength, &localError);
816 if (U_FAILURE(localError) || errorLength == 0) {
817 errorLength = 1;
818 }
b75a7d8f 819
374ca955
A
820 // print the input file offset of the start of the error bytes:
821 // input file offset of the current byte buffer +
822 // length of the just consumed bytes -
823 // length of the error bytes
824 length =
825 (int8_t)sprintf(pos, "%d",
826 (int)(infoffset + (cbufp - buf) - errorLength));
827
828 // output the bytes that caused the error
829 UnicodeString str;
830 for (i = 0; i < errorLength; ++i) {
831 if (i > 0) {
832 str.append((UChar)uSP);
833 }
834 str.append(nibbleToHex((uint8_t)errorBytes[i] >> 4));
835 str.append(nibbleToHex((uint8_t)errorBytes[i]));
836 }
b75a7d8f 837
374ca955
A
838 initMsg(pname);
839 u_wmsg(stderr, "problemCvtToU",
840 UnicodeString(pos, length, "").getTerminatedBuffer(),
841 str.getTerminatedBuffer(),
842 u_wmsg_errorName(err));
b75a7d8f 843
374ca955
A
844 willexit = TRUE;
845 err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
846 }
b75a7d8f 847
374ca955
A
848 // Replaced a check for whether the input was consumed by
849 // looping until it is; message key "premEndInput" now obsolete.
b75a7d8f 850
374ca955
A
851 if (ulen == 0) {
852 continue;
853 }
b75a7d8f 854
374ca955
A
855 // remove a U+FEFF Unicode signature character if requested
856 if (sig < 0) {
857 if (u.charAt(0) == uSig) {
858 u.remove(0, 1);
b75a7d8f 859
374ca955
A
860 // account for the removed UChar and offset
861 --ulen;
b75a7d8f 862
374ca955
A
863 if (useOffsets) {
864 // remove an offset from fromoffsets[] as well
865 // to keep the array parallel with the UChars
866 memmove(fromoffsets, fromoffsets + 1, ulen * 4);
867 }
b75a7d8f 868
374ca955
A
869 }
870 sig = 0;
871 }
b75a7d8f 872
374ca955
A
873#if !UCONFIG_NO_TRANSLITERATION
874 // Transliterate/transform if needed.
875
876 // For transformation, we use chunking code -
877 // collect Unicode input until, for example, an end-of-line,
878 // then transform and output-convert that and continue collecting.
879 // This makes the transformation result independent of the buffer size
880 // while avoiding the slower keyboard mode.
881 // The end-of-chunk characters are completely included in the
882 // transformed string in case they are to be transformed themselves.
883 if (t != NULL) {
884 UnicodeString out;
885 int32_t chunkLimit;
886
887 do {
888 chunkLimit = getChunkLimit(chunk, u);
889 if (chunkLimit < 0 && flush && fromSawEndOfBytes) {
890 // use all of the rest at the end of the text
891 chunkLimit = u.length();
892 }
893 if (chunkLimit >= 0) {
894 // complete the chunk and transform it
895 chunk.append(u, 0, chunkLimit);
896 u.remove(0, chunkLimit);
897 t->transliterate(chunk);
898
899 // append the transformation result to the result and empty the chunk
900 out.append(chunk);
901 chunk.remove();
902 } else {
903 // continue collecting the chunk
904 chunk.append(u);
905 break;
906 }
907 } while (!u.isEmpty());
b75a7d8f 908
374ca955
A
909 u = out;
910 ulen = u.length();
911 }
912#endif
b75a7d8f 913
374ca955
A
914 // add a U+FEFF Unicode signature character if requested
915 // and possible/necessary
916 if (sig > 0) {
917 if (u.charAt(0) != uSig && cnvSigType(convto) == CNV_WITH_FEFF) {
918 u.insert(0, (UChar)uSig);
919
920 if (useOffsets) {
921 // insert a pseudo-offset into fromoffsets[] as well
922 // to keep the array parallel with the UChars
923 memmove(fromoffsets + 1, fromoffsets, ulen * 4);
924 fromoffsets[0] = -1;
925 }
b75a7d8f 926
374ca955
A
927 // account for the additional UChar and offset
928 ++ulen;
b75a7d8f 929 }
374ca955 930 sig = 0;
b75a7d8f
A
931 }
932
374ca955
A
933 // Convert the Unicode buffer into the destination codepage
934 // Again 'bufp' will be placed behind the last converted character
935 // And 'unibufp' will be placed behind the last converted unicode character
936 // At the last conversion flush should be set to true to ensure that
937 // all characters left get converted
938
939 unibuf = unibufbp = u.getBuffer();
940
941 do {
942 bufp = outbuf;
943
944 // Use fromSawEndOfBytes in addition to the flush flag -
945 // it indicates whether the intermediate Unicode string
946 // contains the very last UChars for the very last input bytes.
947 ucnv_fromUnicode(convto, &bufp, outbuf + bufsz,
948 &unibufbp,
949 unibuf + ulen,
950 NULL, (UBool)(flush && fromSawEndOfBytes), &err);
951
952 // toSawEndOfUnicode indicates that ucnv_fromUnicode() is done
953 // converting all of the intermediate UChars.
954 // See comment for fromSawEndOfBytes.
955 toSawEndOfUnicode = (UBool)U_SUCCESS(err);
956
957 if (err == U_BUFFER_OVERFLOW_ERROR) {
958 err = U_ZERO_ERROR;
959 } else if (U_FAILURE(err)) {
960 UChar errorUChars[4];
961 const char *errtag;
962 char pos[32];
963 UChar32 c;
964 int8_t i, length, errorLength;
965
966 UErrorCode localError = U_ZERO_ERROR;
967 errorLength = (int8_t)LENGTHOF(errorUChars);
968 ucnv_getInvalidUChars(convto, errorUChars, &errorLength, &localError);
969 if (U_FAILURE(localError) || errorLength == 0) {
970 // need at least 1 so that we don't access beyond the length of fromoffsets[]
971 errorLength = 1;
972 }
b75a7d8f 973
374ca955 974 int32_t ferroffset;
b75a7d8f 975
374ca955
A
976 if (useOffsets) {
977 // Unicode buffer offset of the start of the error UChars
978 ferroffset = (int32_t)((unibufbp - unibuf) - errorLength);
979 if (ferroffset < 0) {
980 // approximation - the character started in the previous Unicode buffer
981 ferroffset = 0;
982 }
b75a7d8f 983
374ca955
A
984 // get the corresponding byte offset out of fromoffsets[]
985 // go back if the offset is not known for some of the UChars
986 int32_t fromoffset;
987 do {
988 fromoffset = fromoffsets[ferroffset];
989 } while (fromoffset < 0 && --ferroffset >= 0);
990
991 // total input file offset =
992 // input file offset of the current byte buffer +
993 // byte buffer offset of where the current Unicode buffer is converted from +
994 // fromoffsets[Unicode offset]
995 ferroffset = infoffset + (prevbufp - buf) + fromoffset;
996 errtag = "problemCvtFromU";
997 } else {
998 // Do not use fromoffsets if (t != NULL) because the Unicode text may
999 // be different from what the offsets refer to.
1000
1001 // output file offset
1002 ferroffset = (int32_t)(outfoffset + (bufp - outbuf));
1003 errtag = "problemCvtFromUOut";
1004 }
b75a7d8f 1005
374ca955
A
1006 length = (int8_t)sprintf(pos, "%u", (int)ferroffset);
1007
1008 // output the code points that caused the error
1009 UnicodeString str;
1010 for (i = 0; i < errorLength;) {
1011 if (i > 0) {
1012 str.append((UChar)uSP);
1013 }
1014 U16_NEXT(errorUChars, i, errorLength, c);
1015 if (c >= 0x100000) {
1016 str.append(nibbleToHex((uint8_t)(c >> 20)));
1017 }
1018 if (c >= 0x10000) {
1019 str.append(nibbleToHex((uint8_t)(c >> 16)));
1020 }
1021 str.append(nibbleToHex((uint8_t)(c >> 12)));
1022 str.append(nibbleToHex((uint8_t)(c >> 8)));
1023 str.append(nibbleToHex((uint8_t)(c >> 4)));
1024 str.append(nibbleToHex((uint8_t)c));
1025 }
1026
1027 initMsg(pname);
1028 u_wmsg(stderr, errtag,
1029 UnicodeString(pos, length, "").getTerminatedBuffer(),
1030 str.getTerminatedBuffer(),
1031 u_wmsg_errorName(err));
1032 u_wmsg(stderr, "errorUnicode", str.getTerminatedBuffer());
1033
1034 willexit = TRUE;
1035 err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
1036 }
1037
1038 // Replaced a check for whether the intermediate Unicode characters were all consumed by
1039 // looping until they are; message key "premEnd" now obsolete.
1040
1041 // Finally, write the converted buffer to the output file
1042 size_t outlen = (size_t) (bufp - outbuf);
1043 outfoffset += (int32_t)(wr = fwrite(outbuf, 1, outlen, outfile));
1044 if (wr != outlen) {
1045 UnicodeString str(strerror(errno));
1046 initMsg(pname);
1047 u_wmsg(stderr, "cantWrite", str.getTerminatedBuffer());
1048 willexit = TRUE;
1049 }
1050
1051 if (willexit) {
1052 goto error_exit;
1053 }
1054 } while (!toSawEndOfUnicode);
1055 } while (!fromSawEndOfBytes);
b75a7d8f
A
1056 } while (!flush); // Stop when we have flushed the
1057 // converters (this means that it's
1058 // the end of output)
1059
1060 goto normal_exit;
1061
1062error_exit:
1063 ret = FALSE;
1064
1065normal_exit:
1066 // Cleanup.
1067
374ca955
A
1068 ucnv_close(convfrom);
1069 ucnv_close(convto);
b75a7d8f
A
1070
1071#if !UCONFIG_NO_TRANSLITERATION
374ca955 1072 delete t;
b75a7d8f
A
1073#endif
1074
b75a7d8f
A
1075 if (infile != stdin) {
1076 fclose(infile);
1077 }
1078
1079 return ret;
1080}
1081
1082static void usage(const char *pname, int ecode) {
1083 const UChar *msg;
1084 int32_t msgLen;
1085 UErrorCode err = U_ZERO_ERROR;
1086 FILE *fp = ecode ? stderr : stdout;
1087 int res;
1088
1089 initMsg(pname);
1090 msg =
1091 ures_getStringByKey(gBundle, ecode ? "lcUsageWord" : "ucUsageWord",
1092 &msgLen, &err);
1093 UnicodeString upname(pname, (int32_t)(uprv_strlen(pname) + 1));
1094 UnicodeString mname(msg, msgLen + 1);
1095
1096 res = u_wmsg(fp, "usage", mname.getBuffer(), upname.getBuffer());
1097 if (!ecode) {
1098 if (!res) {
1099 fputc('\n', fp);
1100 }
1101 if (!u_wmsg(fp, "help")) {
1102 /* Now dump callbacks and finish. */
1103
1104 int i, count =
1105 sizeof(transcode_callbacks) / sizeof(*transcode_callbacks);
1106 for (i = 0; i < count; ++i) {
1107 fprintf(fp, " %s", transcode_callbacks[i].name);
1108 }
1109 fputc('\n', fp);
1110 }
1111 }
1112
1113 exit(ecode);
1114}
1115
374ca955
A
1116extern int
1117main(int argc, char **argv)
b75a7d8f
A
1118{
1119 FILE *outfile;
1120 int ret = 0;
b75a7d8f
A
1121
1122 size_t bufsz = DEFAULT_BUFSZ;
1123
1124 const char *fromcpage = 0;
1125 const char *tocpage = 0;
1126 const char *translit = 0;
1127 const char *outfilestr = 0;
374ca955 1128 UBool fallback = FALSE;
b75a7d8f
A
1129
1130 UConverterFromUCallback fromucallback = UCNV_FROM_U_CALLBACK_STOP;
1131 const void *fromuctxt = 0;
1132 UConverterToUCallback toucallback = UCNV_TO_U_CALLBACK_STOP;
1133 const void *touctxt = 0;
1134
374ca955 1135 char **iter, **remainArgv, **remainArgvLimit;
b75a7d8f
A
1136 char **end = argv + argc;
1137
1138 const char *pname;
1139
374ca955 1140 UBool printConvs = FALSE, printCanon = FALSE, printTranslits = FALSE;
b75a7d8f 1141 const char *printName = 0;
b75a7d8f 1142
374ca955
A
1143 UBool verbose = FALSE;
1144 UErrorCode status = U_ZERO_ERROR;
1145
1146 ConvertFile cf;
1147
1148 /* Initialize ICU */
1149 u_init(&status);
1150 if (U_FAILURE(status)) {
1151 fprintf(stderr, "%s: can not initialize ICU. status = %s\n",
1152 argv[0], u_errorName(status));
1153 exit(1);
1154 }
b75a7d8f
A
1155
1156 // Get and prettify pname.
1157 pname = uprv_strrchr(*argv, U_FILE_SEP_CHAR);
73c04bcf 1158#ifdef U_WINDOWS
b75a7d8f
A
1159 if (!pname) {
1160 pname = uprv_strrchr(*argv, '/');
1161 }
1162#endif
1163 if (!pname) {
1164 pname = *argv;
1165 } else {
1166 ++pname;
1167 }
1168
1169 // First, get the arguments from command-line
1170 // to know the codepages to convert between
1171
374ca955 1172 remainArgv = remainArgvLimit = argv + 1;
b75a7d8f
A
1173 for (iter = argv + 1; iter != end; iter++) {
1174 // Check for from charset
1175 if (strcmp("-f", *iter) == 0 || !strcmp("--from-code", *iter)) {
1176 iter++;
1177 if (iter != end)
1178 fromcpage = *iter;
1179 else
1180 usage(pname, 1);
1181 } else if (strcmp("-t", *iter) == 0 || !strcmp("--to-code", *iter)) {
1182 iter++;
1183 if (iter != end)
1184 tocpage = *iter;
1185 else
1186 usage(pname, 1);
1187 } else if (strcmp("-x", *iter) == 0) {
1188 iter++;
1189 if (iter != end)
1190 translit = *iter;
1191 else
1192 usage(pname, 1);
1193 } else if (!strcmp("--fallback", *iter)) {
374ca955 1194 fallback = TRUE;
b75a7d8f 1195 } else if (!strcmp("--no-fallback", *iter)) {
374ca955 1196 fallback = FALSE;
b75a7d8f
A
1197 } else if (strcmp("-b", *iter) == 0 || !strcmp("--block-size", *iter)) {
1198 iter++;
1199 if (iter != end) {
1200 bufsz = atoi(*iter);
1201 if ((int) bufsz <= 0) {
1202 initMsg(pname);
1203 UnicodeString str(*iter);
1204 initMsg(pname);
374ca955 1205 u_wmsg(stderr, "badBlockSize", str.getTerminatedBuffer());
b75a7d8f
A
1206 return 3;
1207 }
1208 } else {
1209 usage(pname, 1);
1210 }
1211 } else if (strcmp("-l", *iter) == 0 || !strcmp("--list", *iter)) {
1212 if (printTranslits) {
1213 usage(pname, 1);
1214 }
374ca955 1215 printConvs = TRUE;
b75a7d8f
A
1216 } else if (strcmp("--default-code", *iter) == 0) {
1217 if (printTranslits) {
1218 usage(pname, 1);
1219 }
1220 printName = ucnv_getDefaultName();
1221 } else if (strcmp("--list-code", *iter) == 0) {
1222 if (printTranslits) {
1223 usage(pname, 1);
1224 }
1225
1226 iter++;
1227 if (iter != end) {
1228 UErrorCode e = U_ZERO_ERROR;
1229 printName = ucnv_getAlias(*iter, 0, &e);
1230 if (U_FAILURE(e) || !printName) {
1231 UnicodeString str(*iter);
1232 initMsg(pname);
374ca955 1233 u_wmsg(stderr, "noSuchCodeset", str.getTerminatedBuffer());
b75a7d8f
A
1234 return 2;
1235 }
1236 } else
1237 usage(pname, 1);
1238 } else if (strcmp("--canon", *iter) == 0) {
374ca955 1239 printCanon = TRUE;
b75a7d8f
A
1240 } else if (strcmp("-L", *iter) == 0
1241 || !strcmp("--list-transliterators", *iter)) {
1242 if (printConvs) {
1243 usage(pname, 1);
1244 }
374ca955 1245 printTranslits = TRUE;
b75a7d8f
A
1246 } else if (strcmp("-h", *iter) == 0 || !strcmp("-?", *iter)
1247 || !strcmp("--help", *iter)) {
1248 usage(pname, 0);
1249 } else if (!strcmp("-c", *iter)) {
1250 fromucallback = UCNV_FROM_U_CALLBACK_SKIP;
1251 } else if (!strcmp("--to-callback", *iter)) {
1252 iter++;
1253 if (iter != end) {
1254 const struct callback_ent *cbe = findCallback(*iter);
1255 if (cbe) {
1256 fromucallback = cbe->fromu;
1257 fromuctxt = cbe->fromuctxt;
1258 } else {
1259 UnicodeString str(*iter);
1260 initMsg(pname);
374ca955 1261 u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
b75a7d8f
A
1262 return 4;
1263 }
1264 } else {
1265 usage(pname, 1);
1266 }
1267 } else if (!strcmp("--from-callback", *iter)) {
1268 iter++;
1269 if (iter != end) {
1270 const struct callback_ent *cbe = findCallback(*iter);
1271 if (cbe) {
1272 toucallback = cbe->tou;
1273 touctxt = cbe->touctxt;
1274 } else {
1275 UnicodeString str(*iter);
1276 initMsg(pname);
374ca955 1277 u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
b75a7d8f
A
1278 return 4;
1279 }
1280 } else {
1281 usage(pname, 1);
1282 }
1283 } else if (!strcmp("-i", *iter)) {
1284 toucallback = UCNV_TO_U_CALLBACK_SKIP;
1285 } else if (!strcmp("--callback", *iter)) {
1286 iter++;
1287 if (iter != end) {
1288 const struct callback_ent *cbe = findCallback(*iter);
1289 if (cbe) {
1290 fromucallback = cbe->fromu;
1291 fromuctxt = cbe->fromuctxt;
1292 toucallback = cbe->tou;
1293 touctxt = cbe->touctxt;
1294 } else {
1295 UnicodeString str(*iter);
1296 initMsg(pname);
374ca955 1297 u_wmsg(stderr, "unknownCallback", str.getTerminatedBuffer());
b75a7d8f
A
1298 return 4;
1299 }
1300 } else {
1301 usage(pname, 1);
1302 }
1303 } else if (!strcmp("-s", *iter) || !strcmp("--silent", *iter)) {
374ca955 1304 verbose = FALSE;
b75a7d8f 1305 } else if (!strcmp("-v", *iter) || !strcmp("--verbose", *iter)) {
374ca955 1306 verbose = TRUE;
b75a7d8f 1307 } else if (!strcmp("-V", *iter) || !strcmp("--version", *iter)) {
374ca955 1308 printf("%s v2.1 ICU " U_ICU_VERSION "\n", pname);
b75a7d8f
A
1309 return 0;
1310 } else if (!strcmp("-o", *iter) || !strcmp("--output", *iter)) {
1311 ++iter;
1312 if (iter != end && !outfilestr) {
1313 outfilestr = *iter;
1314 } else {
1315 usage(pname, 1);
1316 }
374ca955
A
1317 } else if (0 == strcmp("--add-signature", *iter)) {
1318 cf.signature = 1;
1319 } else if (0 == strcmp("--remove-signature", *iter)) {
1320 cf.signature = -1;
b75a7d8f
A
1321 } else if (**iter == '-' && (*iter)[1]) {
1322 usage(pname, 1);
374ca955
A
1323 } else {
1324 // move a non-option up in argv[]
1325 *remainArgvLimit++ = *iter;
b75a7d8f
A
1326 }
1327 }
1328
1329 if (printConvs || printName) {
1330 return printConverters(pname, printName, printCanon) ? 2 : 0;
1331 } else if (printTranslits) {
1332 return printTransliterators(printCanon) ? 3 : 0;
1333 }
1334
1335 if (!fromcpage || !uprv_strcmp(fromcpage, "-")) {
1336 fromcpage = ucnv_getDefaultName();
1337 }
1338 if (!tocpage || !uprv_strcmp(tocpage, "-")) {
1339 tocpage = ucnv_getDefaultName();
1340 }
1341
1342 // Open the correct output file or connect to stdout for reading input
1343 if (outfilestr != 0 && strcmp(outfilestr, "-")) {
1344 outfile = fopen(outfilestr, "wb");
1345 if (outfile == 0) {
1346 UnicodeString str1(outfilestr, "");
1347 UnicodeString str2(strerror(errno), "");
1348 initMsg(pname);
1349 u_wmsg(stderr, "cantCreateOutputF",
1350 str1.getBuffer(), str2.getBuffer());
1351 return 1;
1352 }
1353 } else {
1354 outfilestr = "-";
1355 outfile = stdout;
374ca955 1356#ifdef USE_FILENO_BINARY_MODE
b75a7d8f
A
1357 if (setmode(fileno(outfile), O_BINARY) == -1) {
1358 u_wmsg(stderr, "cantSetOutBinMode");
1359 exit(-1);
1360 }
1361#endif
1362 }
1363
1364 /* Loop again on the arguments to find all the input files, and
374ca955 1365 convert them. */
b75a7d8f 1366
374ca955
A
1367 cf.setBufferSize(bufsz);
1368
1369 if(remainArgv < remainArgvLimit) {
1370 for (iter = remainArgv; iter != remainArgvLimit; iter++) {
1371 if (!cf.convertFile(
1372 pname, fromcpage, toucallback, touctxt, tocpage,
1373 fromucallback, fromuctxt, fallback, translit, *iter,
1374 outfile, verbose)
1375 ) {
b75a7d8f
A
1376 goto error_exit;
1377 }
1378 }
374ca955
A
1379 } else {
1380 if (!cf.convertFile(
1381 pname, fromcpage, toucallback, touctxt, tocpage,
1382 fromucallback, fromuctxt, fallback, translit, 0,
1383 outfile, verbose)
1384 ) {
b75a7d8f
A
1385 goto error_exit;
1386 }
1387 }
1388
1389 goto normal_exit;
1390error_exit:
1391 ret = 1;
1392normal_exit:
1393
374ca955 1394 if (outfile != stdout) {
b75a7d8f 1395 fclose(outfile);
374ca955 1396 }
b75a7d8f
A
1397
1398 return ret;
1399}
1400
1401
1402/*
1403 * Hey, Emacs, please set the following:
1404 *
1405 * Local Variables:
1406 * indent-tabs-mode: nil
1407 * End:
1408 *
1409 */