1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ******************************************************************************
6 * Copyright (C) 1998-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 ******************************************************************************
13 * Modification History:
15 * Date Name Description
16 * 11/18/98 stephen Creation.
17 * 03/12/99 stephen Modified for new C API.
18 * 07/19/99 stephen Fixed read() and gets()
19 ******************************************************************************
22 #include "unicode/ustdio.h"
24 #if !UCONFIG_NO_CONVERSION
26 #include "unicode/putil.h"
31 #include "unicode/ucnv.h"
32 #include "unicode/ustring.h"
36 #define DELIM_LF 0x000A
37 #define DELIM_VT 0x000B
38 #define DELIM_FF 0x000C
39 #define DELIM_CR 0x000D
40 #define DELIM_NEL 0x0085
41 #define DELIM_LS 0x2028
42 #define DELIM_PS 0x2029
44 /* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */
45 #if U_PLATFORM_USES_ONLY_WIN32_API
46 static const UChar DELIMITERS
[] = { DELIM_CR
, DELIM_LF
, 0x0000 };
47 static const uint32_t DELIMITERS_LEN
= 2;
48 /* TODO: Default newline writing should be detected based upon the converter being used. */
50 static const UChar DELIMITERS
[] = { DELIM_LF
, 0x0000 };
51 static const uint32_t DELIMITERS_LEN
= 1;
54 #define IS_FIRST_STRING_DELIMITER(c1) \
55 (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \
56 || (c1) == DELIM_NEL \
59 #define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR)
60 #define IS_COMBINED_STRING_DELIMITER(c1, c2) \
61 (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF)
64 #if !UCONFIG_NO_TRANSLITERATION
66 U_CAPI UTransliterator
* U_EXPORT2
67 u_fsettransliterator(UFILE
*file
, UFileDirection direction
,
68 UTransliterator
*adopt
, UErrorCode
*status
)
70 UTransliterator
*old
= NULL
;
72 if(U_FAILURE(*status
))
79 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
83 if(direction
& U_READ
)
85 /** TODO: implement */
86 *status
= U_UNSUPPORTED_ERROR
;
90 if(adopt
== NULL
) /* they are clearing it */
92 if(file
->fTranslit
!= NULL
)
94 /* TODO: Check side */
95 old
= file
->fTranslit
->translit
;
96 uprv_free(file
->fTranslit
->buffer
);
97 file
->fTranslit
->buffer
=NULL
;
98 uprv_free(file
->fTranslit
);
104 if(file
->fTranslit
== NULL
)
106 file
->fTranslit
= (UFILETranslitBuffer
*) uprv_malloc(sizeof(UFILETranslitBuffer
));
109 *status
= U_MEMORY_ALLOCATION_ERROR
;
112 file
->fTranslit
->capacity
= 0;
113 file
->fTranslit
->length
= 0;
114 file
->fTranslit
->pos
= 0;
115 file
->fTranslit
->buffer
= NULL
;
119 old
= file
->fTranslit
->translit
;
120 ufile_flush_translit(file
);
123 file
->fTranslit
->translit
= adopt
;
129 static const UChar
* u_file_translit(UFILE
*f
, const UChar
*src
, int32_t *count
, UBool flush
)
132 int32_t junkCount
= 0;
136 UErrorCode status
= U_ZERO_ERROR
;
143 if ((!f
)||(!f
->fTranslit
)||(!f
->fTranslit
->translit
))
149 /* First: slide over everything */
150 if(f
->fTranslit
->length
> f
->fTranslit
->pos
)
152 memmove(f
->fTranslit
->buffer
, f
->fTranslit
->buffer
+ f
->fTranslit
->pos
,
153 (f
->fTranslit
->length
- f
->fTranslit
->pos
)*sizeof(UChar
));
155 f
->fTranslit
->length
-= f
->fTranslit
->pos
; /* always */
156 f
->fTranslit
->pos
= 0;
158 /* Calculate new buffer size needed */
159 newlen
= (*count
+ f
->fTranslit
->length
) * 4;
161 if(newlen
> f
->fTranslit
->capacity
)
163 if(f
->fTranslit
->buffer
== NULL
)
165 f
->fTranslit
->buffer
= (UChar
*)uprv_malloc(newlen
* sizeof(UChar
));
169 f
->fTranslit
->buffer
= (UChar
*)uprv_realloc(f
->fTranslit
->buffer
, newlen
* sizeof(UChar
));
171 /* Check for malloc/realloc failure. */
172 if (f
->fTranslit
->buffer
== NULL
) {
175 f
->fTranslit
->capacity
= newlen
;
178 /* Now, copy any data over */
179 u_strncpy(f
->fTranslit
->buffer
+ f
->fTranslit
->length
,
182 f
->fTranslit
->length
+= *count
;
184 /* Now, translit in place as much as we can */
187 textLength
= f
->fTranslit
->length
;
188 pos
.contextStart
= 0;
189 pos
.contextLimit
= textLength
;
191 pos
.limit
= textLength
;
193 utrans_transIncrementalUChars(f
->fTranslit
->translit
,
194 f
->fTranslit
->buffer
, /* because we shifted */
196 f
->fTranslit
->capacity
,
200 /* now: start/limit point to the transliterated text */
201 /* Transliterated is [buffer..pos.start) */
203 f
->fTranslit
->pos
= pos
.start
;
204 f
->fTranslit
->length
= pos
.limit
;
206 return f
->fTranslit
->buffer
;
210 textLength
= f
->fTranslit
->length
;
211 textLimit
= f
->fTranslit
->length
;
213 utrans_transUChars(f
->fTranslit
->translit
,
214 f
->fTranslit
->buffer
,
216 f
->fTranslit
->capacity
,
221 /* out: converted len */
224 /* Set pointers to 0 */
225 f
->fTranslit
->pos
= 0;
226 f
->fTranslit
->length
= 0;
228 return f
->fTranslit
->buffer
;
235 ufile_flush_translit(UFILE
*f
)
237 #if !UCONFIG_NO_TRANSLITERATION
238 if((!f
)||(!f
->fTranslit
))
242 u_file_write_flush(NULL
, 0, f
, FALSE
, TRUE
);
247 ufile_flush_io(UFILE
*f
)
249 if((!f
) || (!f
->fFile
)) {
250 return; /* skip if no file */
253 u_file_write_flush(NULL
, 0, f
, TRUE
, FALSE
);
258 ufile_close_translit(UFILE
*f
)
260 #if !UCONFIG_NO_TRANSLITERATION
261 if((!f
)||(!f
->fTranslit
))
265 ufile_flush_translit(f
);
267 #if !UCONFIG_NO_TRANSLITERATION
268 if(f
->fTranslit
->translit
)
269 utrans_close(f
->fTranslit
->translit
);
271 if(f
->fTranslit
->buffer
)
273 uprv_free(f
->fTranslit
->buffer
);
276 uprv_free(f
->fTranslit
);
284 U_CAPI
int32_t U_EXPORT2
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
285 u_fputs(const UChar
*s
,
288 int32_t count
= u_file_write(s
, u_strlen(s
), f
);
289 count
+= u_file_write(DELIMITERS
, DELIMITERS_LEN
, f
);
293 U_CAPI UChar32 U_EXPORT2
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
299 UBool isError
= FALSE
;
301 U16_APPEND(buf
, idx
, UPRV_LENGTHOF(buf
), uc
, isError
);
305 return u_file_write(buf
, idx
, f
) == idx
? uc
: U_EOF
;
309 U_CFUNC
int32_t U_EXPORT2
310 u_file_write_flush(const UChar
*chars
,
316 /* Set up conversion parameters */
317 UErrorCode status
= U_ZERO_ERROR
;
318 const UChar
*mySource
= chars
;
319 const UChar
*mySourceBegin
;
320 const UChar
*mySourceEnd
;
321 char charBuffer
[UFILE_CHARBUFFER_SIZE
];
322 char *myTarget
= charBuffer
;
324 int32_t numConverted
= 0;
327 count
= u_strlen(chars
);
330 #if !UCONFIG_NO_TRANSLITERATION
331 if((f
->fTranslit
) && (f
->fTranslit
->translit
))
333 /* Do the transliteration */
334 mySource
= u_file_translit(f
, chars
, &count
, flushTranslit
);
338 /* Write to a string. */
340 int32_t charsLeft
= (int32_t)(f
->str
.fLimit
- f
->str
.fPos
);
341 if (flushIO
&& charsLeft
> count
) {
344 written
= ufmt_min(count
, charsLeft
);
345 u_strncpy(f
->str
.fPos
, mySource
, written
);
346 f
->str
.fPos
+= written
;
350 mySourceEnd
= mySource
+ count
;
352 /* Perform the conversion in a loop */
354 mySourceBegin
= mySource
; /* beginning location for this loop */
355 status
= U_ZERO_ERROR
;
356 if(f
->fConverter
!= NULL
) { /* We have a valid converter */
357 ucnv_fromUnicode(f
->fConverter
,
359 charBuffer
+ UFILE_CHARBUFFER_SIZE
,
365 } else { /*weiv: do the invariant conversion */
366 int32_t convertChars
= (int32_t) (mySourceEnd
- mySource
);
367 if (convertChars
> UFILE_CHARBUFFER_SIZE
) {
368 convertChars
= UFILE_CHARBUFFER_SIZE
;
369 status
= U_BUFFER_OVERFLOW_ERROR
;
371 u_UCharsToChars(mySource
, myTarget
, convertChars
);
372 mySource
+= convertChars
;
373 myTarget
+= convertChars
;
375 numConverted
= (int32_t)(myTarget
- charBuffer
);
377 if (numConverted
> 0) {
378 /* write the converted bytes */
384 written
+= (int32_t) (mySource
- mySourceBegin
);
386 myTarget
= charBuffer
;
388 while(status
== U_BUFFER_OVERFLOW_ERROR
);
390 /* return # of chars written */
394 U_CAPI
int32_t U_EXPORT2
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
395 u_file_write( const UChar
*chars
,
399 return u_file_write_flush(chars
,count
,f
,FALSE
,FALSE
);
403 /* private function used for buffering input */
405 ufile_fill_uchar_buffer(UFILE
*f
)
408 const char *mySource
;
409 const char *mySourceEnd
;
416 char charBuffer
[UFILE_CHARBUFFER_SIZE
];
417 u_localized_string
*str
;
419 if (f
->fFile
== NULL
) {
420 /* There is nothing to do. It's a string. */
425 dataSize
= (int32_t)(str
->fLimit
- str
->fPos
);
426 if (f
->fFileno
== 0 && dataSize
> 0) {
427 /* Don't read from stdin too many times. There is still some data. */
431 /* shift the buffer if it isn't empty */
433 u_memmove(f
->fUCBuffer
, str
->fPos
, dataSize
); /* not accessing beyond memory */
437 /* record how much buffer space is available */
438 availLength
= UFILE_UCHARBUFFER_SIZE
- dataSize
;
440 /* Determine the # of codepage bytes needed to fill our UChar buffer */
441 /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/
442 maxCPBytes
= availLength
/ (f
->fConverter
!=NULL
?(2*ucnv_getMinCharSize(f
->fConverter
)):1);
444 /* Read in the data to convert */
445 if (f
->fFileno
== 0) {
446 /* Special case. Read from stdin one line at a time. */
447 char *retStr
= fgets(charBuffer
, ufmt_min(maxCPBytes
, UFILE_CHARBUFFER_SIZE
), f
->fFile
);
448 bytesRead
= (int32_t)(retStr
? uprv_strlen(charBuffer
) : 0);
452 bytesRead
= (int32_t)fread(charBuffer
,
454 ufmt_min(maxCPBytes
, UFILE_CHARBUFFER_SIZE
),
458 /* Set up conversion parameters */
459 status
= U_ZERO_ERROR
;
460 mySource
= charBuffer
;
461 mySourceEnd
= charBuffer
+ bytesRead
;
462 myTarget
= f
->fUCBuffer
+ dataSize
;
463 bufferSize
= UFILE_UCHARBUFFER_SIZE
;
465 if(f
->fConverter
!= NULL
) { /* We have a valid converter */
466 /* Perform the conversion */
467 ucnv_toUnicode(f
->fConverter
,
469 f
->fUCBuffer
+ bufferSize
,
473 (UBool
)(feof(f
->fFile
) != 0),
476 } else { /*weiv: do the invariant conversion */
477 u_charsToUChars(mySource
, myTarget
, bytesRead
);
478 myTarget
+= bytesRead
;
481 /* update the pointers into our array */
482 str
->fPos
= str
->fBuffer
;
483 str
->fLimit
= myTarget
;
486 U_CAPI UChar
* U_EXPORT2
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
497 u_localized_string
*str
;
500 /* Caller screwed up. We need to write the null terminatior. */
504 /* fill the buffer if needed */
506 if (str
->fPos
>= str
->fLimit
) {
507 ufile_fill_uchar_buffer(f
);
510 /* subtract 1 from n to compensate for the terminator */
513 /* determine the amount of data in the buffer */
514 dataSize
= (int32_t)(str
->fLimit
- str
->fPos
);
516 /* if 0 characters were left, return 0 */
520 /* otherwise, iteratively fill the buffer and copy */
524 while (dataSize
> 0 && count
< n
) {
527 /* Find how much to copy */
528 if (dataSize
< (n
- count
)) {
532 limit
= alias
+ (n
- count
);
536 /* Copy UChars until we find the first occurrence of a delimiter character */
537 while (alias
< limit
&& !IS_FIRST_STRING_DELIMITER(*alias
)) {
539 *(sItr
++) = *(alias
++);
541 /* Preserve the newline */
542 if (alias
< limit
&& IS_FIRST_STRING_DELIMITER(*alias
)) {
543 if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias
)) {
547 currDelim
= 1; /* This isn't a newline, but it's used to say
548 that we should break later. We've checked all
549 possible newline combinations even across buffer
553 *(sItr
++) = *(alias
++);
556 /* If we have a CRLF combination, preserve that too. */
558 if (currDelim
&& IS_COMBINED_STRING_DELIMITER(currDelim
, *alias
)) {
560 *(sItr
++) = *(alias
++);
562 currDelim
= 1; /* This isn't a newline, but it's used to say
563 that we should break later. We've checked all
564 possible newline combinations even across buffer
568 /* update the current buffer position */
571 /* if we found a delimiter */
572 if (currDelim
== 1) {
577 /* refill the buffer */
578 ufile_fill_uchar_buffer(f
);
580 /* determine the amount of data in the buffer */
581 dataSize
= (int32_t)(str
->fLimit
- str
->fPos
);
584 /* add the terminator and return s */
589 U_CFUNC UBool U_EXPORT2
590 ufile_getch(UFILE
*f
, UChar
*ch
)
592 UBool isValidChar
= FALSE
;
595 /* if we have an available character in the buffer, return it */
596 if(f
->str
.fPos
< f
->str
.fLimit
){
597 *ch
= *(f
->str
.fPos
)++;
601 /* otherwise, fill the buffer and return the next character */
602 if(f
->str
.fPos
>= f
->str
.fLimit
) {
603 ufile_fill_uchar_buffer(f
);
605 if(f
->str
.fPos
< f
->str
.fLimit
) {
606 *ch
= *(f
->str
.fPos
)++;
613 U_CAPI UChar U_EXPORT2
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
621 U_CFUNC UBool U_EXPORT2
622 ufile_getch32(UFILE
*f
, UChar32
*c32
)
624 UBool isValidChar
= FALSE
;
625 u_localized_string
*str
;
629 /* Fill the buffer if it is empty */
631 if (f
&& str
->fPos
+ 1 >= str
->fLimit
) {
632 ufile_fill_uchar_buffer(f
);
635 /* Get the next character in the buffer */
636 if (str
->fPos
< str
->fLimit
) {
637 *c32
= *(str
->fPos
)++;
638 if (U_IS_LEAD(*c32
)) {
639 if (str
->fPos
< str
->fLimit
) {
640 UChar c16
= *(str
->fPos
)++;
641 *c32
= U16_GET_SUPPLEMENTARY(*c32
, c16
);
656 U_CAPI UChar32 U_EXPORT2
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
660 ufile_getch32(f
, &ch
);
664 U_CAPI UChar32 U_EXPORT2
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
665 u_fungetc(UChar32 ch
,
668 u_localized_string
*str
;
672 /* if we're at the beginning of the buffer, sorry! */
673 if (str
->fPos
== str
->fBuffer
674 || (U_IS_LEAD(ch
) && (str
->fPos
- 1) == str
->fBuffer
))
679 /* otherwise, put the character back */
680 /* Remember, read them back on in the reverse order. */
682 if (*--(str
->fPos
) != U16_TRAIL(ch
)
683 || *--(str
->fPos
) != U16_LEAD(ch
))
688 else if (*--(str
->fPos
) != ch
) {
695 U_CAPI
int32_t U_EXPORT2
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
696 u_file_read( UChar
*chars
,
702 u_localized_string
*str
= &f
->str
;
706 /* determine the amount of data in the buffer */
707 dataSize
= (int32_t)(str
->fLimit
- str
->fPos
);
709 /* fill the buffer */
710 ufile_fill_uchar_buffer(f
);
711 dataSize
= (int32_t)(str
->fLimit
- str
->fPos
);
714 /* Make sure that we don't read too much */
715 if (dataSize
> (count
- read
)) {
716 dataSize
= count
- read
;
719 /* copy the current data in the buffer */
720 memcpy(chars
+ read
, str
->fPos
, dataSize
* sizeof(UChar
));
722 /* update number of items read */
725 /* update the current buffer position */
726 str
->fPos
+= dataSize
;
728 while (dataSize
!= 0 && read
< count
);