]> git.saurik.com Git - apple/icu.git/blame - icuSources/io/ustdio.c
ICU-6.2.16.tar.gz
[apple/icu.git] / icuSources / io / ustdio.c
CommitLineData
b75a7d8f
A
1/*
2******************************************************************************
3*
374ca955 4* Copyright (C) 1998-2004, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7******************************************************************************
8*
9* File ustdio.c
10*
11* Modification History:
12*
13* Date Name Description
14* 11/18/98 stephen Creation.
15* 03/12/99 stephen Modified for new C API.
16* 07/19/99 stephen Fixed read() and gets()
17******************************************************************************
18*/
19
20#include "unicode/ustdio.h"
21#include "unicode/putil.h"
22#include "cmemory.h"
374ca955 23#include "cstring.h"
b75a7d8f
A
24#include "ufile.h"
25#include "ufmt_cmn.h"
26#include "unicode/ucnv.h"
27#include "unicode/ustring.h"
28
29#include <string.h>
30
b75a7d8f 31#define DELIM_LF 0x000A
374ca955
A
32#define DELIM_VT 0x000B
33#define DELIM_FF 0x000C
34#define DELIM_CR 0x000D
35#define DELIM_NEL 0x0085
36#define DELIM_LS 0x2028
37#define DELIM_PS 0x2029
b75a7d8f
A
38
39/* Leave this copyright notice here! */
40static const char copyright[] = U_COPYRIGHT_STRING;
41
374ca955 42/* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */
b75a7d8f
A
43#ifdef WIN32
44static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 };
45static const uint32_t DELIMITERS_LEN = 2;
374ca955 46/* TODO: Default newline writing should be detected based upon the converter being used. */
b75a7d8f
A
47#else
48static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 };
49static const uint32_t DELIMITERS_LEN = 1;
50#endif
51
374ca955
A
52#define IS_FIRST_STRING_DELIMITER(c1) \
53 (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \
54 || (c1) == DELIM_NEL \
55 || (c1) == DELIM_LS \
56 || (c1) == DELIM_PS)
57#define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR)
58#define IS_COMBINED_STRING_DELIMITER(c1, c2) \
59 (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF)
60
b75a7d8f
A
61
62#if !UCONFIG_NO_TRANSLITERATION
63
64U_CAPI UTransliterator* U_EXPORT2
65u_fsettransliterator(UFILE *file, UFileDirection direction,
66 UTransliterator *adopt, UErrorCode *status)
67{
68 UTransliterator *old = NULL;
69
374ca955 70 if(U_FAILURE(*status))
b75a7d8f
A
71 {
72 return adopt;
73 }
74
75 if(!file)
76 {
77 *status = U_ILLEGAL_ARGUMENT_ERROR;
78 return adopt;
79 }
80
81 if(direction & U_READ)
82 {
83 /** TODO: implement */
84 *status = U_UNSUPPORTED_ERROR;
85 return adopt;
86 }
87
88 if(adopt == NULL) /* they are clearing it */
89 {
90 if(file->fTranslit != NULL)
91 {
92 /* TODO: Check side */
93 old = file->fTranslit->translit;
94 uprv_free(file->fTranslit->buffer);
95 file->fTranslit->buffer=NULL;
96 uprv_free(file->fTranslit);
97 file->fTranslit=NULL;
98 }
99 }
100 else
101 {
102 if(file->fTranslit == NULL)
103 {
104 file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer));
105 if(!file->fTranslit)
106 {
107 *status = U_MEMORY_ALLOCATION_ERROR;
108 return adopt;
109 }
110 file->fTranslit->capacity = 0;
111 file->fTranslit->length = 0;
112 file->fTranslit->pos = 0;
113 file->fTranslit->buffer = NULL;
114 }
115 else
116 {
117 old = file->fTranslit->translit;
118 ufile_flush_translit(file);
119 }
120
121 file->fTranslit->translit = adopt;
122 }
123
124 return old;
125}
126
127static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush)
128{
129 int32_t newlen;
130 int32_t junkCount = 0;
131 int32_t textLength;
132 int32_t textLimit;
133 UTransPosition pos;
134 UErrorCode status = U_ZERO_ERROR;
135
136 if(count == NULL)
137 {
138 count = &junkCount;
139 }
140
141 if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit))
142 {
143 /* fast path */
144 return src;
145 }
146
147 /* First: slide over everything */
148 if(f->fTranslit->length > f->fTranslit->pos)
149 {
150 memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos,
151 (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar));
152 }
153 f->fTranslit->length -= f->fTranslit->pos; /* always */
154 f->fTranslit->pos = 0;
155
156 /* Calculate new buffer size needed */
157 newlen = (*count + f->fTranslit->length) * 4;
158
159 if(newlen > f->fTranslit->capacity)
160 {
161 if(f->fTranslit->buffer == NULL)
162 {
163 f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar));
164 }
165 else
166 {
167 f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar));
168 }
169 f->fTranslit->capacity = newlen;
170 }
171
172 /* Now, copy any data over */
173 u_strncpy(f->fTranslit->buffer + f->fTranslit->length,
174 src,
175 *count);
176 f->fTranslit->length += *count;
177
178 /* Now, translit in place as much as we can */
179 if(flush == FALSE)
180 {
181 textLength = f->fTranslit->length;
182 pos.contextStart = 0;
183 pos.contextLimit = textLength;
184 pos.start = 0;
185 pos.limit = textLength;
186
187 utrans_transIncrementalUChars(f->fTranslit->translit,
188 f->fTranslit->buffer, /* because we shifted */
189 &textLength,
190 f->fTranslit->capacity,
191 &pos,
192 &status);
193
b75a7d8f
A
194 /* now: start/limit point to the transliterated text */
195 /* Transliterated is [buffer..pos.start) */
196 *count = pos.start;
197 f->fTranslit->pos = pos.start;
198 f->fTranslit->length = pos.limit;
199
200 return f->fTranslit->buffer;
201 }
202 else
203 {
204 textLength = f->fTranslit->length;
205 textLimit = f->fTranslit->length;
206
207 utrans_transUChars(f->fTranslit->translit,
208 f->fTranslit->buffer,
209 &textLength,
210 f->fTranslit->capacity,
211 0,
212 &textLimit,
213 &status);
214
b75a7d8f
A
215 /* out: converted len */
216 *count = textLimit;
217
218 /* Set pointers to 0 */
219 f->fTranslit->pos = 0;
220 f->fTranslit->length = 0;
221
222 return f->fTranslit->buffer;
223 }
224}
225
226#endif
227
228void
229ufile_flush_translit(UFILE *f)
230{
231#if !UCONFIG_NO_TRANSLITERATION
232 if((!f)||(!f->fTranslit))
233 return;
234#endif
235
236 u_file_write_flush(NULL, 0, f, TRUE);
237}
238
239
240void
241ufile_close_translit(UFILE *f)
242{
243#if !UCONFIG_NO_TRANSLITERATION
244 if((!f)||(!f->fTranslit))
245 return;
246#endif
247
248 ufile_flush_translit(f);
249
250#if !UCONFIG_NO_TRANSLITERATION
251 if(f->fTranslit->translit)
252 utrans_close(f->fTranslit->translit);
253
254 if(f->fTranslit->buffer)
255 {
256 uprv_free(f->fTranslit->buffer);
257 }
258
259 uprv_free(f->fTranslit);
260 f->fTranslit = NULL;
261#endif
262}
263
264
265/* Input/output */
266
267U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
268u_fputs(const UChar *s,
269 UFILE *f)
270{
271 int32_t count = u_file_write(s, u_strlen(s), f);
272 count += u_file_write(DELIMITERS, DELIMITERS_LEN, f);
273 return count;
274}
275
374ca955
A
276U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
277u_fputc(UChar32 uc,
b75a7d8f
A
278 UFILE *f)
279{
374ca955
A
280 UChar buf[2];
281 int32_t idx = 0;
282 UBool isError = FALSE;
283
284 U16_APPEND(buf, idx, sizeof(buf)/sizeof(*buf), uc, isError);
285 if (isError) {
286 return EOF;
287 }
288 return u_file_write(buf, idx, f) == idx ? uc : EOF;
b75a7d8f
A
289}
290
291
292U_CAPI int32_t U_EXPORT2
293u_file_write_flush( const UChar *chars,
294 int32_t count,
295 UFILE *f,
296 UBool flush)
297{
298 /* Set up conversion parameters */
374ca955
A
299 UErrorCode status = U_ZERO_ERROR;
300 const UChar *mySource = chars;
301 const UChar *sourceAlias = chars;
302 const UChar *mySourceEnd;
303 char charBuffer[UFILE_CHARBUFFER_SIZE];
304 char *myTarget = charBuffer;
305 int32_t written = 0;
306 int32_t numConverted = 0;
307
308 if (!f->fFile) {
309 int32_t charsLeft = f->str.fLimit - f->str.fPos;
310 if (flush && charsLeft > count) {
311 count++;
312 }
313 written = ufmt_min(count, charsLeft);
314 u_strncpy(f->str.fPos, chars, written);
315 f->str.fPos += written;
316 return written;
317 }
318
319 if (count < 0) {
320 count = u_strlen(chars);
321 }
322 mySourceEnd = chars + count;
b75a7d8f
A
323
324#if !UCONFIG_NO_TRANSLITERATION
325 if((f->fTranslit) && (f->fTranslit->translit))
326 {
327 /* Do the transliteration */
328 mySource = u_file_translit(f, chars, &count, flush);
329 sourceAlias = mySource;
330 mySourceEnd = mySource + count;
331 }
332#endif
333
334 /* Perform the conversion in a loop */
335 do {
336 status = U_ZERO_ERROR;
337 sourceAlias = mySource;
338 if(f->fConverter != NULL) { /* We have a valid converter */
339 ucnv_fromUnicode(f->fConverter,
340 &myTarget,
374ca955 341 charBuffer + UFILE_CHARBUFFER_SIZE,
b75a7d8f
A
342 &mySource,
343 mySourceEnd,
344 NULL,
345 flush,
346 &status);
347 } else { /*weiv: do the invariant conversion */
348 u_UCharsToChars(mySource, myTarget, count);
349 myTarget += count;
350 }
374ca955 351 numConverted = (int32_t)(myTarget - charBuffer);
b75a7d8f
A
352
353 if (numConverted > 0) {
354 /* write the converted bytes */
374ca955 355 fwrite(charBuffer,
b75a7d8f
A
356 sizeof(char),
357 numConverted,
358 f->fFile);
359
360 written += numConverted;
361 }
374ca955 362 myTarget = charBuffer;
b75a7d8f
A
363 }
364 while(status == U_BUFFER_OVERFLOW_ERROR);
365
366 /* return # of chars written */
367 return written;
368}
369
370U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
371u_file_write( const UChar *chars,
372 int32_t count,
373 UFILE *f)
374{
375 return u_file_write_flush(chars,count,f,FALSE);
376}
377
378
379/* private function used for buffering input */
380void
381ufile_fill_uchar_buffer(UFILE *f)
382{
374ca955
A
383 UErrorCode status;
384 const char *mySource;
385 const char *mySourceEnd;
386 UChar *myTarget;
387 int32_t bufferSize;
388 int32_t maxCPBytes;
389 int32_t bytesRead;
390 int32_t availLength;
391 int32_t dataSize;
392 char charBuffer[UFILE_CHARBUFFER_SIZE];
393 u_localized_string *str;
394
395 if (f->fFile == NULL) {
396 /* There is nothing to do. It's a string. */
397 return;
398 }
b75a7d8f 399
374ca955
A
400 str = &f->str;
401 dataSize = (int32_t)(str->fLimit - str->fPos);
402 if (f->fFileno == 0 && dataSize > 0) {
403 /* Don't read from stdin too many times. There is still some data. */
404 return;
405 }
b75a7d8f
A
406
407 /* shift the buffer if it isn't empty */
b75a7d8f 408 if(dataSize != 0) {
374ca955 409 uprv_memmove(f->fUCBuffer, str->fPos, dataSize * sizeof(UChar));
b75a7d8f
A
410 }
411
412
413 /* record how much buffer space is available */
414 availLength = UFILE_UCHARBUFFER_SIZE - dataSize;
415
416 /* Determine the # of codepage bytes needed to fill our UChar buffer */
417 /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/
418 maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1);
419
420 /* Read in the data to convert */
374ca955
A
421 if (f->fFileno == 0) {
422 /* Special case. Read from stdin one line at a time. */
423 char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile);
424 bytesRead = (retStr ? uprv_strlen(charBuffer) : 0);
425 }
426 else {
427 /* A normal file */
428 bytesRead = (int32_t)fread(charBuffer,
429 sizeof(char),
430 ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE),
431 f->fFile);
432 }
b75a7d8f
A
433
434 /* Set up conversion parameters */
435 status = U_ZERO_ERROR;
374ca955
A
436 mySource = charBuffer;
437 mySourceEnd = charBuffer + bytesRead;
b75a7d8f
A
438 myTarget = f->fUCBuffer + dataSize;
439 bufferSize = UFILE_UCHARBUFFER_SIZE;
440
441 if(f->fConverter != NULL) { /* We have a valid converter */
442 /* Perform the conversion */
443 ucnv_toUnicode(f->fConverter,
444 &myTarget,
445 f->fUCBuffer + bufferSize,
446 &mySource,
447 mySourceEnd,
448 NULL,
449 (UBool)(feof(f->fFile) != 0),
450 &status);
451
452 } else { /*weiv: do the invariant conversion */
453 u_charsToUChars(mySource, myTarget, bytesRead);
454 myTarget += bytesRead;
455 }
456
457 /* update the pointers into our array */
374ca955
A
458 str->fPos = str->fBuffer;
459 str->fLimit = myTarget;
b75a7d8f
A
460}
461
462U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
374ca955
A
463u_fgets(UChar *s,
464 int32_t n,
465 UFILE *f)
b75a7d8f
A
466{
467 int32_t dataSize;
468 int32_t count;
469 UChar *alias;
374ca955 470 const UChar *limit;
b75a7d8f 471 UChar *sItr;
374ca955
A
472 UChar currDelim = 0;
473 u_localized_string *str;
b75a7d8f
A
474
475 if (n <= 0) {
476 /* Caller screwed up. We need to write the null terminatior. */
477 return NULL;
478 }
479
480 /* fill the buffer if needed */
374ca955
A
481 str = &f->str;
482 if (str->fPos >= str->fLimit) {
b75a7d8f
A
483 ufile_fill_uchar_buffer(f);
484 }
485
486 /* subtract 1 from n to compensate for the terminator */
487 --n;
488
489 /* determine the amount of data in the buffer */
374ca955 490 dataSize = (int32_t)(str->fLimit - str->fPos);
b75a7d8f
A
491
492 /* if 0 characters were left, return 0 */
493 if (dataSize == 0)
494 return NULL;
495
496 /* otherwise, iteratively fill the buffer and copy */
497 count = 0;
498 sItr = s;
374ca955 499 currDelim = 0;
b75a7d8f 500 while (dataSize > 0 && count < n) {
374ca955 501 alias = str->fPos;
b75a7d8f
A
502
503 /* Find how much to copy */
504 if (dataSize < n) {
374ca955 505 limit = str->fLimit;
b75a7d8f
A
506 }
507 else {
508 limit = alias + n;
509 }
510
374ca955
A
511 if (!currDelim) {
512 /* Copy UChars until we find the first occurrence of a delimiter character */
513 while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) {
514 count++;
515 *(sItr++) = *(alias++);
516 }
517 /* Preserve the newline */
518 if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) {
519 if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) {
520 currDelim = *alias;
521 }
522 count++;
523 *(sItr++) = *(alias++);
524 }
b75a7d8f 525 }
374ca955
A
526 /* If we have a CRLF combination, preserve that too. */
527 if (alias < limit) {
528 if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) {
529 count++;
530 *(sItr++) = *(alias++);
531 }
532 currDelim = 1; /* This isn't a newline, but it's used to say
533 that we should break later. We've checked all
534 possible newline combinations even across buffer
535 boundaries. */
b75a7d8f
A
536 }
537
538 /* update the current buffer position */
374ca955 539 str->fPos = alias;
b75a7d8f
A
540
541 /* if we found a delimiter */
374ca955 542 if (currDelim == 1) {
b75a7d8f
A
543 /* break out */
544 break;
545 }
546
547 /* refill the buffer */
548 ufile_fill_uchar_buffer(f);
549
550 /* determine the amount of data in the buffer */
374ca955 551 dataSize = (int32_t)(str->fLimit - str->fPos);
b75a7d8f
A
552 }
553
554 /* add the terminator and return s */
555 *sItr = 0x0000;
556 return s;
557}
558
374ca955
A
559U_CFUNC UBool U_EXPORT2
560ufile_getch(UFILE *f, UChar *ch)
b75a7d8f 561{
374ca955
A
562 UBool isValidChar = FALSE;
563
564 *ch = U_EOF;
b75a7d8f 565 /* if we have an available character in the buffer, return it */
374ca955
A
566 if(f->str.fPos < f->str.fLimit){
567 *ch = *(f->str.fPos)++;
568 isValidChar = TRUE;
b75a7d8f 569 }
374ca955
A
570 else if (f) {
571 /* otherwise, fill the buffer and return the next character */
572 if(f->str.fPos >= f->str.fLimit) {
573 ufile_fill_uchar_buffer(f);
574 }
575 if(f->str.fPos < f->str.fLimit) {
576 *ch = *(f->str.fPos)++;
577 isValidChar = TRUE;
578 }
579 }
580 return isValidChar;
b75a7d8f
A
581}
582
374ca955
A
583U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
584u_fgetc(UFILE *f)
585{
586 UChar ch;
587 ufile_getch(f, &ch);
588 return ch;
b75a7d8f
A
589}
590
374ca955
A
591U_CFUNC UBool U_EXPORT2
592ufile_getch32(UFILE *f, UChar32 *c32)
b75a7d8f 593{
374ca955
A
594 UBool isValidChar = FALSE;
595 u_localized_string *str;
596
597 *c32 = U_EOF;
b75a7d8f
A
598
599 /* Fill the buffer if it is empty */
374ca955
A
600 str = &f->str;
601 if (f && str->fPos + 1 >= str->fLimit) {
b75a7d8f
A
602 ufile_fill_uchar_buffer(f);
603 }
604
605 /* Get the next character in the buffer */
374ca955
A
606 if (str->fPos < str->fLimit) {
607 *c32 = *(str->fPos)++;
608 if (U_IS_LEAD(*c32)) {
609 if (str->fPos < str->fLimit) {
610 UChar c16 = *(str->fPos)++;
611 *c32 = U16_GET_SUPPLEMENTARY(*c32, c16);
612 isValidChar = TRUE;
613 }
614 else {
615 *c32 = U_EOF;
616 }
617 }
618 else {
619 isValidChar = TRUE;
620 }
b75a7d8f
A
621 }
622
374ca955
A
623 return isValidChar;
624}
b75a7d8f 625
374ca955
A
626U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
627u_fgetcx(UFILE *f)
628{
629 UChar32 ch;
630 ufile_getch32(f, &ch);
631 return ch;
b75a7d8f
A
632}
633
374ca955
A
634U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
635u_fungetc(UChar32 ch,
b75a7d8f
A
636 UFILE *f)
637{
374ca955
A
638 u_localized_string *str;
639
640 str = &f->str;
641
b75a7d8f 642 /* if we're at the beginning of the buffer, sorry! */
374ca955
A
643 if (str->fPos == str->fBuffer
644 || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer))
645 {
646 ch = U_EOF;
647 }
b75a7d8f 648 else {
374ca955
A
649 /* otherwise, put the character back */
650 /* Remember, read them back on in the reverse order. */
651 if (U_IS_LEAD(ch)) {
652 if (*--(str->fPos) != U16_TRAIL(ch)
653 || *--(str->fPos) != U16_LEAD(ch))
654 {
655 ch = U_EOF;
656 }
657 }
658 else if (*--(str->fPos) != ch) {
659 ch = U_EOF;
660 }
b75a7d8f 661 }
374ca955 662 return ch;
b75a7d8f
A
663}
664
665U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
666u_file_read( UChar *chars,
667 int32_t count,
668 UFILE *f)
669{
670 int32_t dataSize;
374ca955
A
671 int32_t read = 0;
672 u_localized_string *str = &f->str;
b75a7d8f 673
b75a7d8f
A
674 do {
675
676 /* determine the amount of data in the buffer */
374ca955
A
677 dataSize = (int32_t)(str->fLimit - str->fPos);
678 if (dataSize <= 0) {
679 /* fill the buffer */
680 ufile_fill_uchar_buffer(f);
681 dataSize = (int32_t)(str->fLimit - str->fPos);
682 }
683
684 /* Make sure that we don't read too much */
685 if (dataSize > (count - read)) {
686 dataSize = count - read;
687 }
b75a7d8f
A
688
689 /* copy the current data in the buffer */
374ca955 690 memcpy(chars + read, str->fPos, dataSize * sizeof(UChar));
b75a7d8f
A
691
692 /* update number of items read */
693 read += dataSize;
694
695 /* update the current buffer position */
374ca955
A
696 str->fPos += dataSize;
697 }
698 while (dataSize != 0 && read < count);
b75a7d8f
A
699
700 return read;
701}