]> git.saurik.com Git - apple/icu.git/blob - icuSources/io/ustdio.cpp
ICU-62141.0.1.tar.gz
[apple/icu.git] / icuSources / io / ustdio.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 * Copyright (C) 1998-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 ******************************************************************************
10 *
11 * File ustdio.c
12 *
13 * Modification History:
14 *
15 * Date Name Description
16 * 11/18/98 stephen Creation.
17 * 03/12/99 stephen Modified for new C API.
18 * 07/19/99 stephen Fixed read() and gets()
19 ******************************************************************************
20 */
21
22 #include "unicode/ustdio.h"
23
24 #if !UCONFIG_NO_CONVERSION
25
26 #include "unicode/putil.h"
27 #include "cmemory.h"
28 #include "cstring.h"
29 #include "ufile.h"
30 #include "ufmt_cmn.h"
31 #include "unicode/ucnv.h"
32 #include "unicode/ustring.h"
33
34 #include <string.h>
35
36 #define DELIM_LF 0x000A
37 #define DELIM_VT 0x000B
38 #define DELIM_FF 0x000C
39 #define DELIM_CR 0x000D
40 #define DELIM_NEL 0x0085
41 #define DELIM_LS 0x2028
42 #define DELIM_PS 0x2029
43
44 /* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */
45 #if U_PLATFORM_USES_ONLY_WIN32_API
46 static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 };
47 static const uint32_t DELIMITERS_LEN = 2;
48 /* TODO: Default newline writing should be detected based upon the converter being used. */
49 #else
50 static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 };
51 static const uint32_t DELIMITERS_LEN = 1;
52 #endif
53
54 #define IS_FIRST_STRING_DELIMITER(c1) \
55 (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \
56 || (c1) == DELIM_NEL \
57 || (c1) == DELIM_LS \
58 || (c1) == DELIM_PS)
59 #define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR)
60 #define IS_COMBINED_STRING_DELIMITER(c1, c2) \
61 (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF)
62
63
64 #if !UCONFIG_NO_TRANSLITERATION
65
66 U_CAPI UTransliterator* U_EXPORT2
67 u_fsettransliterator(UFILE *file, UFileDirection direction,
68 UTransliterator *adopt, UErrorCode *status)
69 {
70 UTransliterator *old = NULL;
71
72 if(U_FAILURE(*status))
73 {
74 return adopt;
75 }
76
77 if(!file)
78 {
79 *status = U_ILLEGAL_ARGUMENT_ERROR;
80 return adopt;
81 }
82
83 if(direction & U_READ)
84 {
85 /** TODO: implement */
86 *status = U_UNSUPPORTED_ERROR;
87 return adopt;
88 }
89
90 if(adopt == NULL) /* they are clearing it */
91 {
92 if(file->fTranslit != NULL)
93 {
94 /* TODO: Check side */
95 old = file->fTranslit->translit;
96 uprv_free(file->fTranslit->buffer);
97 file->fTranslit->buffer=NULL;
98 uprv_free(file->fTranslit);
99 file->fTranslit=NULL;
100 }
101 }
102 else
103 {
104 if(file->fTranslit == NULL)
105 {
106 file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer));
107 if(!file->fTranslit)
108 {
109 *status = U_MEMORY_ALLOCATION_ERROR;
110 return adopt;
111 }
112 file->fTranslit->capacity = 0;
113 file->fTranslit->length = 0;
114 file->fTranslit->pos = 0;
115 file->fTranslit->buffer = NULL;
116 }
117 else
118 {
119 old = file->fTranslit->translit;
120 ufile_flush_translit(file);
121 }
122
123 file->fTranslit->translit = adopt;
124 }
125
126 return old;
127 }
128
129 static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush)
130 {
131 int32_t newlen;
132 int32_t junkCount = 0;
133 int32_t textLength;
134 int32_t textLimit;
135 UTransPosition pos;
136 UErrorCode status = U_ZERO_ERROR;
137
138 if(count == NULL)
139 {
140 count = &junkCount;
141 }
142
143 if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit))
144 {
145 /* fast path */
146 return src;
147 }
148
149 /* First: slide over everything */
150 if(f->fTranslit->length > f->fTranslit->pos)
151 {
152 memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos,
153 (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar));
154 }
155 f->fTranslit->length -= f->fTranslit->pos; /* always */
156 f->fTranslit->pos = 0;
157
158 /* Calculate new buffer size needed */
159 newlen = (*count + f->fTranslit->length) * 4;
160
161 if(newlen > f->fTranslit->capacity)
162 {
163 if(f->fTranslit->buffer == NULL)
164 {
165 f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar));
166 }
167 else
168 {
169 f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar));
170 }
171 /* Check for malloc/realloc failure. */
172 if (f->fTranslit->buffer == NULL) {
173 return NULL;
174 }
175 f->fTranslit->capacity = newlen;
176 }
177
178 /* Now, copy any data over */
179 u_strncpy(f->fTranslit->buffer + f->fTranslit->length,
180 src,
181 *count);
182 f->fTranslit->length += *count;
183
184 /* Now, translit in place as much as we can */
185 if(flush == FALSE)
186 {
187 textLength = f->fTranslit->length;
188 pos.contextStart = 0;
189 pos.contextLimit = textLength;
190 pos.start = 0;
191 pos.limit = textLength;
192
193 utrans_transIncrementalUChars(f->fTranslit->translit,
194 f->fTranslit->buffer, /* because we shifted */
195 &textLength,
196 f->fTranslit->capacity,
197 &pos,
198 &status);
199
200 /* now: start/limit point to the transliterated text */
201 /* Transliterated is [buffer..pos.start) */
202 *count = pos.start;
203 f->fTranslit->pos = pos.start;
204 f->fTranslit->length = pos.limit;
205
206 return f->fTranslit->buffer;
207 }
208 else
209 {
210 textLength = f->fTranslit->length;
211 textLimit = f->fTranslit->length;
212
213 utrans_transUChars(f->fTranslit->translit,
214 f->fTranslit->buffer,
215 &textLength,
216 f->fTranslit->capacity,
217 0,
218 &textLimit,
219 &status);
220
221 /* out: converted len */
222 *count = textLimit;
223
224 /* Set pointers to 0 */
225 f->fTranslit->pos = 0;
226 f->fTranslit->length = 0;
227
228 return f->fTranslit->buffer;
229 }
230 }
231
232 #endif
233
234 void
235 ufile_flush_translit(UFILE *f)
236 {
237 #if !UCONFIG_NO_TRANSLITERATION
238 if((!f)||(!f->fTranslit))
239 return;
240 #endif
241
242 u_file_write_flush(NULL, 0, f, FALSE, TRUE);
243 }
244
245
246 void
247 ufile_flush_io(UFILE *f)
248 {
249 if((!f) || (!f->fFile)) {
250 return; /* skip if no file */
251 }
252
253 u_file_write_flush(NULL, 0, f, TRUE, FALSE);
254 }
255
256
257 void
258 ufile_close_translit(UFILE *f)
259 {
260 #if !UCONFIG_NO_TRANSLITERATION
261 if((!f)||(!f->fTranslit))
262 return;
263 #endif
264
265 ufile_flush_translit(f);
266
267 #if !UCONFIG_NO_TRANSLITERATION
268 if(f->fTranslit->translit)
269 utrans_close(f->fTranslit->translit);
270
271 if(f->fTranslit->buffer)
272 {
273 uprv_free(f->fTranslit->buffer);
274 }
275
276 uprv_free(f->fTranslit);
277 f->fTranslit = NULL;
278 #endif
279 }
280
281
282 /* Input/output */
283
284 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
285 u_fputs(const UChar *s,
286 UFILE *f)
287 {
288 int32_t count = u_file_write(s, u_strlen(s), f);
289 count += u_file_write(DELIMITERS, DELIMITERS_LEN, f);
290 return count;
291 }
292
293 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
294 u_fputc(UChar32 uc,
295 UFILE *f)
296 {
297 UChar buf[2];
298 int32_t idx = 0;
299 UBool isError = FALSE;
300
301 U16_APPEND(buf, idx, UPRV_LENGTHOF(buf), uc, isError);
302 if (isError) {
303 return U_EOF;
304 }
305 return u_file_write(buf, idx, f) == idx ? uc : U_EOF;
306 }
307
308
309 U_CFUNC int32_t U_EXPORT2
310 u_file_write_flush(const UChar *chars,
311 int32_t count,
312 UFILE *f,
313 UBool flushIO,
314 UBool flushTranslit)
315 {
316 /* Set up conversion parameters */
317 UErrorCode status = U_ZERO_ERROR;
318 const UChar *mySource = chars;
319 const UChar *mySourceBegin;
320 const UChar *mySourceEnd;
321 char charBuffer[UFILE_CHARBUFFER_SIZE];
322 char *myTarget = charBuffer;
323 int32_t written = 0;
324 int32_t numConverted = 0;
325
326 if (count < 0) {
327 count = u_strlen(chars);
328 }
329
330 #if !UCONFIG_NO_TRANSLITERATION
331 if((f->fTranslit) && (f->fTranslit->translit))
332 {
333 /* Do the transliteration */
334 mySource = u_file_translit(f, chars, &count, flushTranslit);
335 }
336 #endif
337
338 /* Write to a string. */
339 if (!f->fFile) {
340 int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos);
341 if (flushIO && charsLeft > count) {
342 count++;
343 }
344 written = ufmt_min(count, charsLeft);
345 u_strncpy(f->str.fPos, mySource, written);
346 f->str.fPos += written;
347 return written;
348 }
349
350 mySourceEnd = mySource + count;
351
352 /* Perform the conversion in a loop */
353 do {
354 mySourceBegin = mySource; /* beginning location for this loop */
355 status = U_ZERO_ERROR;
356 if(f->fConverter != NULL) { /* We have a valid converter */
357 ucnv_fromUnicode(f->fConverter,
358 &myTarget,
359 charBuffer + UFILE_CHARBUFFER_SIZE,
360 &mySource,
361 mySourceEnd,
362 NULL,
363 flushIO,
364 &status);
365 } else { /*weiv: do the invariant conversion */
366 int32_t convertChars = (int32_t) (mySourceEnd - mySource);
367 if (convertChars > UFILE_CHARBUFFER_SIZE) {
368 convertChars = UFILE_CHARBUFFER_SIZE;
369 status = U_BUFFER_OVERFLOW_ERROR;
370 }
371 u_UCharsToChars(mySource, myTarget, convertChars);
372 mySource += convertChars;
373 myTarget += convertChars;
374 }
375 numConverted = (int32_t)(myTarget - charBuffer);
376
377 if (numConverted > 0) {
378 /* write the converted bytes */
379 fwrite(charBuffer,
380 sizeof(char),
381 numConverted,
382 f->fFile);
383
384 written += (int32_t) (mySource - mySourceBegin);
385 }
386 myTarget = charBuffer;
387 }
388 while(status == U_BUFFER_OVERFLOW_ERROR);
389
390 /* return # of chars written */
391 return written;
392 }
393
394 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
395 u_file_write( const UChar *chars,
396 int32_t count,
397 UFILE *f)
398 {
399 return u_file_write_flush(chars,count,f,FALSE,FALSE);
400 }
401
402
403 /* private function used for buffering input */
404 void
405 ufile_fill_uchar_buffer(UFILE *f)
406 {
407 UErrorCode status;
408 const char *mySource;
409 const char *mySourceEnd;
410 UChar *myTarget;
411 int32_t bufferSize;
412 int32_t maxCPBytes;
413 int32_t bytesRead;
414 int32_t availLength;
415 int32_t dataSize;
416 char charBuffer[UFILE_CHARBUFFER_SIZE];
417 u_localized_string *str;
418
419 if (f->fFile == NULL) {
420 /* There is nothing to do. It's a string. */
421 return;
422 }
423
424 str = &f->str;
425 dataSize = (int32_t)(str->fLimit - str->fPos);
426 if (f->fFileno == 0 && dataSize > 0) {
427 /* Don't read from stdin too many times. There is still some data. */
428 return;
429 }
430
431 /* shift the buffer if it isn't empty */
432 if(dataSize != 0) {
433 u_memmove(f->fUCBuffer, str->fPos, dataSize); /* not accessing beyond memory */
434 }
435
436
437 /* record how much buffer space is available */
438 availLength = UFILE_UCHARBUFFER_SIZE - dataSize;
439
440 /* Determine the # of codepage bytes needed to fill our UChar buffer */
441 /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/
442 maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1);
443
444 /* Read in the data to convert */
445 if (f->fFileno == 0) {
446 /* Special case. Read from stdin one line at a time. */
447 char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile);
448 bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0);
449 }
450 else {
451 /* A normal file */
452 bytesRead = (int32_t)fread(charBuffer,
453 sizeof(char),
454 ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE),
455 f->fFile);
456 }
457
458 /* Set up conversion parameters */
459 status = U_ZERO_ERROR;
460 mySource = charBuffer;
461 mySourceEnd = charBuffer + bytesRead;
462 myTarget = f->fUCBuffer + dataSize;
463 bufferSize = UFILE_UCHARBUFFER_SIZE;
464
465 if(f->fConverter != NULL) { /* We have a valid converter */
466 /* Perform the conversion */
467 ucnv_toUnicode(f->fConverter,
468 &myTarget,
469 f->fUCBuffer + bufferSize,
470 &mySource,
471 mySourceEnd,
472 NULL,
473 (UBool)(feof(f->fFile) != 0),
474 &status);
475
476 } else { /*weiv: do the invariant conversion */
477 u_charsToUChars(mySource, myTarget, bytesRead);
478 myTarget += bytesRead;
479 }
480
481 /* update the pointers into our array */
482 str->fPos = str->fBuffer;
483 str->fLimit = myTarget;
484 }
485
486 U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
487 u_fgets(UChar *s,
488 int32_t n,
489 UFILE *f)
490 {
491 int32_t dataSize;
492 int32_t count;
493 UChar *alias;
494 const UChar *limit;
495 UChar *sItr;
496 UChar currDelim = 0;
497 u_localized_string *str;
498
499 if (n <= 0) {
500 /* Caller screwed up. We need to write the null terminatior. */
501 return NULL;
502 }
503
504 /* fill the buffer if needed */
505 str = &f->str;
506 if (str->fPos >= str->fLimit) {
507 ufile_fill_uchar_buffer(f);
508 }
509
510 /* subtract 1 from n to compensate for the terminator */
511 --n;
512
513 /* determine the amount of data in the buffer */
514 dataSize = (int32_t)(str->fLimit - str->fPos);
515
516 /* if 0 characters were left, return 0 */
517 if (dataSize == 0)
518 return NULL;
519
520 /* otherwise, iteratively fill the buffer and copy */
521 count = 0;
522 sItr = s;
523 currDelim = 0;
524 while (dataSize > 0 && count < n) {
525 alias = str->fPos;
526
527 /* Find how much to copy */
528 if (dataSize < (n - count)) {
529 limit = str->fLimit;
530 }
531 else {
532 limit = alias + (n - count);
533 }
534
535 if (!currDelim) {
536 /* Copy UChars until we find the first occurrence of a delimiter character */
537 while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) {
538 count++;
539 *(sItr++) = *(alias++);
540 }
541 /* Preserve the newline */
542 if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) {
543 if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) {
544 currDelim = *alias;
545 }
546 else {
547 currDelim = 1; /* This isn't a newline, but it's used to say
548 that we should break later. We've checked all
549 possible newline combinations even across buffer
550 boundaries. */
551 }
552 count++;
553 *(sItr++) = *(alias++);
554 }
555 }
556 /* If we have a CRLF combination, preserve that too. */
557 if (alias < limit) {
558 if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) {
559 count++;
560 *(sItr++) = *(alias++);
561 }
562 currDelim = 1; /* This isn't a newline, but it's used to say
563 that we should break later. We've checked all
564 possible newline combinations even across buffer
565 boundaries. */
566 }
567
568 /* update the current buffer position */
569 str->fPos = alias;
570
571 /* if we found a delimiter */
572 if (currDelim == 1) {
573 /* break out */
574 break;
575 }
576
577 /* refill the buffer */
578 ufile_fill_uchar_buffer(f);
579
580 /* determine the amount of data in the buffer */
581 dataSize = (int32_t)(str->fLimit - str->fPos);
582 }
583
584 /* add the terminator and return s */
585 *sItr = 0x0000;
586 return s;
587 }
588
589 U_CFUNC UBool U_EXPORT2
590 ufile_getch(UFILE *f, UChar *ch)
591 {
592 UBool isValidChar = FALSE;
593
594 *ch = U_EOF;
595 /* if we have an available character in the buffer, return it */
596 if(f->str.fPos < f->str.fLimit){
597 *ch = *(f->str.fPos)++;
598 isValidChar = TRUE;
599 }
600 else {
601 /* otherwise, fill the buffer and return the next character */
602 if(f->str.fPos >= f->str.fLimit) {
603 ufile_fill_uchar_buffer(f);
604 }
605 if(f->str.fPos < f->str.fLimit) {
606 *ch = *(f->str.fPos)++;
607 isValidChar = TRUE;
608 }
609 }
610 return isValidChar;
611 }
612
613 U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
614 u_fgetc(UFILE *f)
615 {
616 UChar ch;
617 ufile_getch(f, &ch);
618 return ch;
619 }
620
621 U_CFUNC UBool U_EXPORT2
622 ufile_getch32(UFILE *f, UChar32 *c32)
623 {
624 UBool isValidChar = FALSE;
625 u_localized_string *str;
626
627 *c32 = U_EOF;
628
629 /* Fill the buffer if it is empty */
630 str = &f->str;
631 if (f && str->fPos + 1 >= str->fLimit) {
632 ufile_fill_uchar_buffer(f);
633 }
634
635 /* Get the next character in the buffer */
636 if (str->fPos < str->fLimit) {
637 *c32 = *(str->fPos)++;
638 if (U_IS_LEAD(*c32)) {
639 if (str->fPos < str->fLimit) {
640 UChar c16 = *(str->fPos)++;
641 *c32 = U16_GET_SUPPLEMENTARY(*c32, c16);
642 isValidChar = TRUE;
643 }
644 else {
645 *c32 = U_EOF;
646 }
647 }
648 else {
649 isValidChar = TRUE;
650 }
651 }
652
653 return isValidChar;
654 }
655
656 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
657 u_fgetcx(UFILE *f)
658 {
659 UChar32 ch;
660 ufile_getch32(f, &ch);
661 return ch;
662 }
663
664 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
665 u_fungetc(UChar32 ch,
666 UFILE *f)
667 {
668 u_localized_string *str;
669
670 str = &f->str;
671
672 /* if we're at the beginning of the buffer, sorry! */
673 if (str->fPos == str->fBuffer
674 || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer))
675 {
676 ch = U_EOF;
677 }
678 else {
679 /* otherwise, put the character back */
680 /* Remember, read them back on in the reverse order. */
681 if (U_IS_LEAD(ch)) {
682 if (*--(str->fPos) != U16_TRAIL(ch)
683 || *--(str->fPos) != U16_LEAD(ch))
684 {
685 ch = U_EOF;
686 }
687 }
688 else if (*--(str->fPos) != ch) {
689 ch = U_EOF;
690 }
691 }
692 return ch;
693 }
694
695 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
696 u_file_read( UChar *chars,
697 int32_t count,
698 UFILE *f)
699 {
700 int32_t dataSize;
701 int32_t read = 0;
702 u_localized_string *str = &f->str;
703
704 do {
705
706 /* determine the amount of data in the buffer */
707 dataSize = (int32_t)(str->fLimit - str->fPos);
708 if (dataSize <= 0) {
709 /* fill the buffer */
710 ufile_fill_uchar_buffer(f);
711 dataSize = (int32_t)(str->fLimit - str->fPos);
712 }
713
714 /* Make sure that we don't read too much */
715 if (dataSize > (count - read)) {
716 dataSize = count - read;
717 }
718
719 /* copy the current data in the buffer */
720 memcpy(chars + read, str->fPos, dataSize * sizeof(UChar));
721
722 /* update number of items read */
723 read += dataSize;
724
725 /* update the current buffer position */
726 str->fPos += dataSize;
727 }
728 while (dataSize != 0 && read < count);
729
730 return read;
731 }
732 #endif