]> git.saurik.com Git - apple/icu.git/blob - icuSources/io/ustdio.c
ICU-8.11.4.tar.gz
[apple/icu.git] / icuSources / io / ustdio.c
1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1998-2006, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * File ustdio.c
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 11/18/98 stephen Creation.
15 * 03/12/99 stephen Modified for new C API.
16 * 07/19/99 stephen Fixed read() and gets()
17 ******************************************************************************
18 */
19
20 #include "unicode/ustdio.h"
21 #include "unicode/putil.h"
22 #include "cmemory.h"
23 #include "cstring.h"
24 #include "ufile.h"
25 #include "ufmt_cmn.h"
26 #include "unicode/ucnv.h"
27 #include "unicode/ustring.h"
28
29 #include <string.h>
30
31 #define DELIM_LF 0x000A
32 #define DELIM_VT 0x000B
33 #define DELIM_FF 0x000C
34 #define DELIM_CR 0x000D
35 #define DELIM_NEL 0x0085
36 #define DELIM_LS 0x2028
37 #define DELIM_PS 0x2029
38
39 /* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */
40 #ifdef U_WINDOWS
41 static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 };
42 static const uint32_t DELIMITERS_LEN = 2;
43 /* TODO: Default newline writing should be detected based upon the converter being used. */
44 #else
45 static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 };
46 static const uint32_t DELIMITERS_LEN = 1;
47 #endif
48
49 #define IS_FIRST_STRING_DELIMITER(c1) \
50 (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \
51 || (c1) == DELIM_NEL \
52 || (c1) == DELIM_LS \
53 || (c1) == DELIM_PS)
54 #define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR)
55 #define IS_COMBINED_STRING_DELIMITER(c1, c2) \
56 (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF)
57
58
59 #if !UCONFIG_NO_TRANSLITERATION
60
61 U_CAPI UTransliterator* U_EXPORT2
62 u_fsettransliterator(UFILE *file, UFileDirection direction,
63 UTransliterator *adopt, UErrorCode *status)
64 {
65 UTransliterator *old = NULL;
66
67 if(U_FAILURE(*status))
68 {
69 return adopt;
70 }
71
72 if(!file)
73 {
74 *status = U_ILLEGAL_ARGUMENT_ERROR;
75 return adopt;
76 }
77
78 if(direction & U_READ)
79 {
80 /** TODO: implement */
81 *status = U_UNSUPPORTED_ERROR;
82 return adopt;
83 }
84
85 if(adopt == NULL) /* they are clearing it */
86 {
87 if(file->fTranslit != NULL)
88 {
89 /* TODO: Check side */
90 old = file->fTranslit->translit;
91 uprv_free(file->fTranslit->buffer);
92 file->fTranslit->buffer=NULL;
93 uprv_free(file->fTranslit);
94 file->fTranslit=NULL;
95 }
96 }
97 else
98 {
99 if(file->fTranslit == NULL)
100 {
101 file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer));
102 if(!file->fTranslit)
103 {
104 *status = U_MEMORY_ALLOCATION_ERROR;
105 return adopt;
106 }
107 file->fTranslit->capacity = 0;
108 file->fTranslit->length = 0;
109 file->fTranslit->pos = 0;
110 file->fTranslit->buffer = NULL;
111 }
112 else
113 {
114 old = file->fTranslit->translit;
115 ufile_flush_translit(file);
116 }
117
118 file->fTranslit->translit = adopt;
119 }
120
121 return old;
122 }
123
124 static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush)
125 {
126 int32_t newlen;
127 int32_t junkCount = 0;
128 int32_t textLength;
129 int32_t textLimit;
130 UTransPosition pos;
131 UErrorCode status = U_ZERO_ERROR;
132
133 if(count == NULL)
134 {
135 count = &junkCount;
136 }
137
138 if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit))
139 {
140 /* fast path */
141 return src;
142 }
143
144 /* First: slide over everything */
145 if(f->fTranslit->length > f->fTranslit->pos)
146 {
147 memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos,
148 (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar));
149 }
150 f->fTranslit->length -= f->fTranslit->pos; /* always */
151 f->fTranslit->pos = 0;
152
153 /* Calculate new buffer size needed */
154 newlen = (*count + f->fTranslit->length) * 4;
155
156 if(newlen > f->fTranslit->capacity)
157 {
158 if(f->fTranslit->buffer == NULL)
159 {
160 f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar));
161 }
162 else
163 {
164 f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar));
165 }
166 f->fTranslit->capacity = newlen;
167 }
168
169 /* Now, copy any data over */
170 u_strncpy(f->fTranslit->buffer + f->fTranslit->length,
171 src,
172 *count);
173 f->fTranslit->length += *count;
174
175 /* Now, translit in place as much as we can */
176 if(flush == FALSE)
177 {
178 textLength = f->fTranslit->length;
179 pos.contextStart = 0;
180 pos.contextLimit = textLength;
181 pos.start = 0;
182 pos.limit = textLength;
183
184 utrans_transIncrementalUChars(f->fTranslit->translit,
185 f->fTranslit->buffer, /* because we shifted */
186 &textLength,
187 f->fTranslit->capacity,
188 &pos,
189 &status);
190
191 /* now: start/limit point to the transliterated text */
192 /* Transliterated is [buffer..pos.start) */
193 *count = pos.start;
194 f->fTranslit->pos = pos.start;
195 f->fTranslit->length = pos.limit;
196
197 return f->fTranslit->buffer;
198 }
199 else
200 {
201 textLength = f->fTranslit->length;
202 textLimit = f->fTranslit->length;
203
204 utrans_transUChars(f->fTranslit->translit,
205 f->fTranslit->buffer,
206 &textLength,
207 f->fTranslit->capacity,
208 0,
209 &textLimit,
210 &status);
211
212 /* out: converted len */
213 *count = textLimit;
214
215 /* Set pointers to 0 */
216 f->fTranslit->pos = 0;
217 f->fTranslit->length = 0;
218
219 return f->fTranslit->buffer;
220 }
221 }
222
223 #endif
224
225 void
226 ufile_flush_translit(UFILE *f)
227 {
228 #if !UCONFIG_NO_TRANSLITERATION
229 if((!f)||(!f->fTranslit))
230 return;
231 #endif
232
233 u_file_write_flush(NULL, 0, f, FALSE, TRUE);
234 }
235
236
237 void
238 ufile_close_translit(UFILE *f)
239 {
240 #if !UCONFIG_NO_TRANSLITERATION
241 if((!f)||(!f->fTranslit))
242 return;
243 #endif
244
245 ufile_flush_translit(f);
246
247 #if !UCONFIG_NO_TRANSLITERATION
248 if(f->fTranslit->translit)
249 utrans_close(f->fTranslit->translit);
250
251 if(f->fTranslit->buffer)
252 {
253 uprv_free(f->fTranslit->buffer);
254 }
255
256 uprv_free(f->fTranslit);
257 f->fTranslit = NULL;
258 #endif
259 }
260
261
262 /* Input/output */
263
264 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
265 u_fputs(const UChar *s,
266 UFILE *f)
267 {
268 int32_t count = u_file_write(s, u_strlen(s), f);
269 count += u_file_write(DELIMITERS, DELIMITERS_LEN, f);
270 return count;
271 }
272
273 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
274 u_fputc(UChar32 uc,
275 UFILE *f)
276 {
277 UChar buf[2];
278 int32_t idx = 0;
279 UBool isError = FALSE;
280
281 U16_APPEND(buf, idx, sizeof(buf)/sizeof(*buf), uc, isError);
282 if (isError) {
283 return U_EOF;
284 }
285 return u_file_write(buf, idx, f) == idx ? uc : U_EOF;
286 }
287
288
289 U_CAPI int32_t U_EXPORT2
290 u_file_write_flush(const UChar *chars,
291 int32_t count,
292 UFILE *f,
293 UBool flushIO,
294 UBool flushTranslit)
295 {
296 /* Set up conversion parameters */
297 UErrorCode status = U_ZERO_ERROR;
298 const UChar *mySource = chars;
299 const UChar *mySourceEnd;
300 char charBuffer[UFILE_CHARBUFFER_SIZE];
301 char *myTarget = charBuffer;
302 int32_t written = 0;
303 int32_t numConverted = 0;
304
305 if (count < 0) {
306 count = u_strlen(chars);
307 }
308
309 #if !UCONFIG_NO_TRANSLITERATION
310 if((f->fTranslit) && (f->fTranslit->translit))
311 {
312 /* Do the transliteration */
313 mySource = u_file_translit(f, chars, &count, flushTranslit);
314 }
315 #endif
316
317 /* Write to a string. */
318 if (!f->fFile) {
319 int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos);
320 if (flushIO && charsLeft > count) {
321 count++;
322 }
323 written = ufmt_min(count, charsLeft);
324 u_strncpy(f->str.fPos, mySource, written);
325 f->str.fPos += written;
326 return written;
327 }
328
329 mySourceEnd = mySource + count;
330
331 /* Perform the conversion in a loop */
332 do {
333 status = U_ZERO_ERROR;
334 if(f->fConverter != NULL) { /* We have a valid converter */
335 ucnv_fromUnicode(f->fConverter,
336 &myTarget,
337 charBuffer + UFILE_CHARBUFFER_SIZE,
338 &mySource,
339 mySourceEnd,
340 NULL,
341 flushIO,
342 &status);
343 } else { /*weiv: do the invariant conversion */
344 u_UCharsToChars(mySource, myTarget, count);
345 myTarget += count;
346 }
347 numConverted = (int32_t)(myTarget - charBuffer);
348
349 if (numConverted > 0) {
350 /* write the converted bytes */
351 fwrite(charBuffer,
352 sizeof(char),
353 numConverted,
354 f->fFile);
355
356 written += numConverted;
357 }
358 myTarget = charBuffer;
359 }
360 while(status == U_BUFFER_OVERFLOW_ERROR);
361
362 /* return # of chars written */
363 return written;
364 }
365
366 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
367 u_file_write( const UChar *chars,
368 int32_t count,
369 UFILE *f)
370 {
371 return u_file_write_flush(chars,count,f,FALSE,FALSE);
372 }
373
374
375 /* private function used for buffering input */
376 void
377 ufile_fill_uchar_buffer(UFILE *f)
378 {
379 UErrorCode status;
380 const char *mySource;
381 const char *mySourceEnd;
382 UChar *myTarget;
383 int32_t bufferSize;
384 int32_t maxCPBytes;
385 int32_t bytesRead;
386 int32_t availLength;
387 int32_t dataSize;
388 char charBuffer[UFILE_CHARBUFFER_SIZE];
389 u_localized_string *str;
390
391 if (f->fFile == NULL) {
392 /* There is nothing to do. It's a string. */
393 return;
394 }
395
396 str = &f->str;
397 dataSize = (int32_t)(str->fLimit - str->fPos);
398 if (f->fFileno == 0 && dataSize > 0) {
399 /* Don't read from stdin too many times. There is still some data. */
400 return;
401 }
402
403 /* shift the buffer if it isn't empty */
404 if(dataSize != 0) {
405 uprv_memmove(f->fUCBuffer, str->fPos, dataSize * sizeof(UChar));
406 }
407
408
409 /* record how much buffer space is available */
410 availLength = UFILE_UCHARBUFFER_SIZE - dataSize;
411
412 /* Determine the # of codepage bytes needed to fill our UChar buffer */
413 /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/
414 maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1);
415
416 /* Read in the data to convert */
417 if (f->fFileno == 0) {
418 /* Special case. Read from stdin one line at a time. */
419 char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile);
420 bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0);
421 }
422 else {
423 /* A normal file */
424 bytesRead = (int32_t)fread(charBuffer,
425 sizeof(char),
426 ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE),
427 f->fFile);
428 }
429
430 /* Set up conversion parameters */
431 status = U_ZERO_ERROR;
432 mySource = charBuffer;
433 mySourceEnd = charBuffer + bytesRead;
434 myTarget = f->fUCBuffer + dataSize;
435 bufferSize = UFILE_UCHARBUFFER_SIZE;
436
437 if(f->fConverter != NULL) { /* We have a valid converter */
438 /* Perform the conversion */
439 ucnv_toUnicode(f->fConverter,
440 &myTarget,
441 f->fUCBuffer + bufferSize,
442 &mySource,
443 mySourceEnd,
444 NULL,
445 (UBool)(feof(f->fFile) != 0),
446 &status);
447
448 } else { /*weiv: do the invariant conversion */
449 u_charsToUChars(mySource, myTarget, bytesRead);
450 myTarget += bytesRead;
451 }
452
453 /* update the pointers into our array */
454 str->fPos = str->fBuffer;
455 str->fLimit = myTarget;
456 }
457
458 U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
459 u_fgets(UChar *s,
460 int32_t n,
461 UFILE *f)
462 {
463 int32_t dataSize;
464 int32_t count;
465 UChar *alias;
466 const UChar *limit;
467 UChar *sItr;
468 UChar currDelim = 0;
469 u_localized_string *str;
470
471 if (n <= 0) {
472 /* Caller screwed up. We need to write the null terminatior. */
473 return NULL;
474 }
475
476 /* fill the buffer if needed */
477 str = &f->str;
478 if (str->fPos >= str->fLimit) {
479 ufile_fill_uchar_buffer(f);
480 }
481
482 /* subtract 1 from n to compensate for the terminator */
483 --n;
484
485 /* determine the amount of data in the buffer */
486 dataSize = (int32_t)(str->fLimit - str->fPos);
487
488 /* if 0 characters were left, return 0 */
489 if (dataSize == 0)
490 return NULL;
491
492 /* otherwise, iteratively fill the buffer and copy */
493 count = 0;
494 sItr = s;
495 currDelim = 0;
496 while (dataSize > 0 && count < n) {
497 alias = str->fPos;
498
499 /* Find how much to copy */
500 if (dataSize < (n - count)) {
501 limit = str->fLimit;
502 }
503 else {
504 limit = alias + (n - count);
505 }
506
507 if (!currDelim) {
508 /* Copy UChars until we find the first occurrence of a delimiter character */
509 while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) {
510 count++;
511 *(sItr++) = *(alias++);
512 }
513 /* Preserve the newline */
514 if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) {
515 if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) {
516 currDelim = *alias;
517 }
518 else {
519 currDelim = 1; /* This isn't a newline, but it's used to say
520 that we should break later. We've checked all
521 possible newline combinations even across buffer
522 boundaries. */
523 }
524 count++;
525 *(sItr++) = *(alias++);
526 }
527 }
528 /* If we have a CRLF combination, preserve that too. */
529 if (alias < limit) {
530 if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) {
531 count++;
532 *(sItr++) = *(alias++);
533 }
534 currDelim = 1; /* This isn't a newline, but it's used to say
535 that we should break later. We've checked all
536 possible newline combinations even across buffer
537 boundaries. */
538 }
539
540 /* update the current buffer position */
541 str->fPos = alias;
542
543 /* if we found a delimiter */
544 if (currDelim == 1) {
545 /* break out */
546 break;
547 }
548
549 /* refill the buffer */
550 ufile_fill_uchar_buffer(f);
551
552 /* determine the amount of data in the buffer */
553 dataSize = (int32_t)(str->fLimit - str->fPos);
554 }
555
556 /* add the terminator and return s */
557 *sItr = 0x0000;
558 return s;
559 }
560
561 U_CFUNC UBool U_EXPORT2
562 ufile_getch(UFILE *f, UChar *ch)
563 {
564 UBool isValidChar = FALSE;
565
566 *ch = U_EOF;
567 /* if we have an available character in the buffer, return it */
568 if(f->str.fPos < f->str.fLimit){
569 *ch = *(f->str.fPos)++;
570 isValidChar = TRUE;
571 }
572 else if (f) {
573 /* otherwise, fill the buffer and return the next character */
574 if(f->str.fPos >= f->str.fLimit) {
575 ufile_fill_uchar_buffer(f);
576 }
577 if(f->str.fPos < f->str.fLimit) {
578 *ch = *(f->str.fPos)++;
579 isValidChar = TRUE;
580 }
581 }
582 return isValidChar;
583 }
584
585 U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
586 u_fgetc(UFILE *f)
587 {
588 UChar ch;
589 ufile_getch(f, &ch);
590 return ch;
591 }
592
593 U_CFUNC UBool U_EXPORT2
594 ufile_getch32(UFILE *f, UChar32 *c32)
595 {
596 UBool isValidChar = FALSE;
597 u_localized_string *str;
598
599 *c32 = U_EOF;
600
601 /* Fill the buffer if it is empty */
602 str = &f->str;
603 if (f && str->fPos + 1 >= str->fLimit) {
604 ufile_fill_uchar_buffer(f);
605 }
606
607 /* Get the next character in the buffer */
608 if (str->fPos < str->fLimit) {
609 *c32 = *(str->fPos)++;
610 if (U_IS_LEAD(*c32)) {
611 if (str->fPos < str->fLimit) {
612 UChar c16 = *(str->fPos)++;
613 *c32 = U16_GET_SUPPLEMENTARY(*c32, c16);
614 isValidChar = TRUE;
615 }
616 else {
617 *c32 = U_EOF;
618 }
619 }
620 else {
621 isValidChar = TRUE;
622 }
623 }
624
625 return isValidChar;
626 }
627
628 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
629 u_fgetcx(UFILE *f)
630 {
631 UChar32 ch;
632 ufile_getch32(f, &ch);
633 return ch;
634 }
635
636 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
637 u_fungetc(UChar32 ch,
638 UFILE *f)
639 {
640 u_localized_string *str;
641
642 str = &f->str;
643
644 /* if we're at the beginning of the buffer, sorry! */
645 if (str->fPos == str->fBuffer
646 || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer))
647 {
648 ch = U_EOF;
649 }
650 else {
651 /* otherwise, put the character back */
652 /* Remember, read them back on in the reverse order. */
653 if (U_IS_LEAD(ch)) {
654 if (*--(str->fPos) != U16_TRAIL(ch)
655 || *--(str->fPos) != U16_LEAD(ch))
656 {
657 ch = U_EOF;
658 }
659 }
660 else if (*--(str->fPos) != ch) {
661 ch = U_EOF;
662 }
663 }
664 return ch;
665 }
666
667 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
668 u_file_read( UChar *chars,
669 int32_t count,
670 UFILE *f)
671 {
672 int32_t dataSize;
673 int32_t read = 0;
674 u_localized_string *str = &f->str;
675
676 do {
677
678 /* determine the amount of data in the buffer */
679 dataSize = (int32_t)(str->fLimit - str->fPos);
680 if (dataSize <= 0) {
681 /* fill the buffer */
682 ufile_fill_uchar_buffer(f);
683 dataSize = (int32_t)(str->fLimit - str->fPos);
684 }
685
686 /* Make sure that we don't read too much */
687 if (dataSize > (count - read)) {
688 dataSize = count - read;
689 }
690
691 /* copy the current data in the buffer */
692 memcpy(chars + read, str->fPos, dataSize * sizeof(UChar));
693
694 /* update number of items read */
695 read += dataSize;
696
697 /* update the current buffer position */
698 str->fPos += dataSize;
699 }
700 while (dataSize != 0 && read < count);
701
702 return read;
703 }