2 ******************************************************************************
4 * Copyright (C) 1998-2008, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 11/18/98 stephen Creation.
15 * 03/12/99 stephen Modified for new C API.
16 * 07/19/99 stephen Fixed read() and gets()
17 ******************************************************************************
20 #include "unicode/ustdio.h"
21 #include "unicode/putil.h"
26 #include "unicode/ucnv.h"
27 #include "unicode/ustring.h"
31 #define DELIM_LF 0x000A
32 #define DELIM_VT 0x000B
33 #define DELIM_FF 0x000C
34 #define DELIM_CR 0x000D
35 #define DELIM_NEL 0x0085
36 #define DELIM_LS 0x2028
37 #define DELIM_PS 0x2029
39 /* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */
41 static const UChar DELIMITERS
[] = { DELIM_CR
, DELIM_LF
, 0x0000 };
42 static const uint32_t DELIMITERS_LEN
= 2;
43 /* TODO: Default newline writing should be detected based upon the converter being used. */
45 static const UChar DELIMITERS
[] = { DELIM_LF
, 0x0000 };
46 static const uint32_t DELIMITERS_LEN
= 1;
49 #define IS_FIRST_STRING_DELIMITER(c1) \
50 (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \
51 || (c1) == DELIM_NEL \
54 #define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR)
55 #define IS_COMBINED_STRING_DELIMITER(c1, c2) \
56 (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF)
59 #if !UCONFIG_NO_TRANSLITERATION
61 U_CAPI UTransliterator
* U_EXPORT2
62 u_fsettransliterator(UFILE
*file
, UFileDirection direction
,
63 UTransliterator
*adopt
, UErrorCode
*status
)
65 UTransliterator
*old
= NULL
;
67 if(U_FAILURE(*status
))
74 *status
= U_ILLEGAL_ARGUMENT_ERROR
;
78 if(direction
& U_READ
)
80 /** TODO: implement */
81 *status
= U_UNSUPPORTED_ERROR
;
85 if(adopt
== NULL
) /* they are clearing it */
87 if(file
->fTranslit
!= NULL
)
89 /* TODO: Check side */
90 old
= file
->fTranslit
->translit
;
91 uprv_free(file
->fTranslit
->buffer
);
92 file
->fTranslit
->buffer
=NULL
;
93 uprv_free(file
->fTranslit
);
99 if(file
->fTranslit
== NULL
)
101 file
->fTranslit
= (UFILETranslitBuffer
*) uprv_malloc(sizeof(UFILETranslitBuffer
));
104 *status
= U_MEMORY_ALLOCATION_ERROR
;
107 file
->fTranslit
->capacity
= 0;
108 file
->fTranslit
->length
= 0;
109 file
->fTranslit
->pos
= 0;
110 file
->fTranslit
->buffer
= NULL
;
114 old
= file
->fTranslit
->translit
;
115 ufile_flush_translit(file
);
118 file
->fTranslit
->translit
= adopt
;
124 static const UChar
* u_file_translit(UFILE
*f
, const UChar
*src
, int32_t *count
, UBool flush
)
127 int32_t junkCount
= 0;
131 UErrorCode status
= U_ZERO_ERROR
;
138 if ((!f
)||(!f
->fTranslit
)||(!f
->fTranslit
->translit
))
144 /* First: slide over everything */
145 if(f
->fTranslit
->length
> f
->fTranslit
->pos
)
147 memmove(f
->fTranslit
->buffer
, f
->fTranslit
->buffer
+ f
->fTranslit
->pos
,
148 (f
->fTranslit
->length
- f
->fTranslit
->pos
)*sizeof(UChar
));
150 f
->fTranslit
->length
-= f
->fTranslit
->pos
; /* always */
151 f
->fTranslit
->pos
= 0;
153 /* Calculate new buffer size needed */
154 newlen
= (*count
+ f
->fTranslit
->length
) * 4;
156 if(newlen
> f
->fTranslit
->capacity
)
158 if(f
->fTranslit
->buffer
== NULL
)
160 f
->fTranslit
->buffer
= (UChar
*)uprv_malloc(newlen
* sizeof(UChar
));
164 f
->fTranslit
->buffer
= (UChar
*)uprv_realloc(f
->fTranslit
->buffer
, newlen
* sizeof(UChar
));
166 /* Check for malloc/realloc failure. */
167 if (f
->fTranslit
->buffer
== NULL
) {
170 f
->fTranslit
->capacity
= newlen
;
173 /* Now, copy any data over */
174 u_strncpy(f
->fTranslit
->buffer
+ f
->fTranslit
->length
,
177 f
->fTranslit
->length
+= *count
;
179 /* Now, translit in place as much as we can */
182 textLength
= f
->fTranslit
->length
;
183 pos
.contextStart
= 0;
184 pos
.contextLimit
= textLength
;
186 pos
.limit
= textLength
;
188 utrans_transIncrementalUChars(f
->fTranslit
->translit
,
189 f
->fTranslit
->buffer
, /* because we shifted */
191 f
->fTranslit
->capacity
,
195 /* now: start/limit point to the transliterated text */
196 /* Transliterated is [buffer..pos.start) */
198 f
->fTranslit
->pos
= pos
.start
;
199 f
->fTranslit
->length
= pos
.limit
;
201 return f
->fTranslit
->buffer
;
205 textLength
= f
->fTranslit
->length
;
206 textLimit
= f
->fTranslit
->length
;
208 utrans_transUChars(f
->fTranslit
->translit
,
209 f
->fTranslit
->buffer
,
211 f
->fTranslit
->capacity
,
216 /* out: converted len */
219 /* Set pointers to 0 */
220 f
->fTranslit
->pos
= 0;
221 f
->fTranslit
->length
= 0;
223 return f
->fTranslit
->buffer
;
230 ufile_flush_translit(UFILE
*f
)
232 #if !UCONFIG_NO_TRANSLITERATION
233 if((!f
)||(!f
->fTranslit
))
237 u_file_write_flush(NULL
, 0, f
, FALSE
, TRUE
);
242 ufile_close_translit(UFILE
*f
)
244 #if !UCONFIG_NO_TRANSLITERATION
245 if((!f
)||(!f
->fTranslit
))
249 ufile_flush_translit(f
);
251 #if !UCONFIG_NO_TRANSLITERATION
252 if(f
->fTranslit
->translit
)
253 utrans_close(f
->fTranslit
->translit
);
255 if(f
->fTranslit
->buffer
)
257 uprv_free(f
->fTranslit
->buffer
);
260 uprv_free(f
->fTranslit
);
268 U_CAPI
int32_t U_EXPORT2
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
269 u_fputs(const UChar
*s
,
272 int32_t count
= u_file_write(s
, u_strlen(s
), f
);
273 count
+= u_file_write(DELIMITERS
, DELIMITERS_LEN
, f
);
277 U_CAPI UChar32 U_EXPORT2
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
283 UBool isError
= FALSE
;
285 U16_APPEND(buf
, idx
, sizeof(buf
)/sizeof(*buf
), uc
, isError
);
289 return u_file_write(buf
, idx
, f
) == idx
? uc
: U_EOF
;
293 U_CFUNC
int32_t U_EXPORT2
294 u_file_write_flush(const UChar
*chars
,
300 /* Set up conversion parameters */
301 UErrorCode status
= U_ZERO_ERROR
;
302 const UChar
*mySource
= chars
;
303 const UChar
*mySourceEnd
;
304 char charBuffer
[UFILE_CHARBUFFER_SIZE
];
305 char *myTarget
= charBuffer
;
307 int32_t numConverted
= 0;
310 count
= u_strlen(chars
);
313 #if !UCONFIG_NO_TRANSLITERATION
314 if((f
->fTranslit
) && (f
->fTranslit
->translit
))
316 /* Do the transliteration */
317 mySource
= u_file_translit(f
, chars
, &count
, flushTranslit
);
321 /* Write to a string. */
323 int32_t charsLeft
= (int32_t)(f
->str
.fLimit
- f
->str
.fPos
);
324 if (flushIO
&& charsLeft
> count
) {
327 written
= ufmt_min(count
, charsLeft
);
328 u_strncpy(f
->str
.fPos
, mySource
, written
);
329 f
->str
.fPos
+= written
;
333 mySourceEnd
= mySource
+ count
;
335 /* Perform the conversion in a loop */
337 status
= U_ZERO_ERROR
;
338 if(f
->fConverter
!= NULL
) { /* We have a valid converter */
339 ucnv_fromUnicode(f
->fConverter
,
341 charBuffer
+ UFILE_CHARBUFFER_SIZE
,
347 } else { /*weiv: do the invariant conversion */
348 u_UCharsToChars(mySource
, myTarget
, count
);
351 numConverted
= (int32_t)(myTarget
- charBuffer
);
353 if (numConverted
> 0) {
354 /* write the converted bytes */
360 written
+= numConverted
;
362 myTarget
= charBuffer
;
364 while(status
== U_BUFFER_OVERFLOW_ERROR
);
366 /* return # of chars written */
370 U_CAPI
int32_t U_EXPORT2
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
371 u_file_write( const UChar
*chars
,
375 return u_file_write_flush(chars
,count
,f
,FALSE
,FALSE
);
379 /* private function used for buffering input */
381 ufile_fill_uchar_buffer(UFILE
*f
)
384 const char *mySource
;
385 const char *mySourceEnd
;
392 char charBuffer
[UFILE_CHARBUFFER_SIZE
];
393 u_localized_string
*str
;
395 if (f
->fFile
== NULL
) {
396 /* There is nothing to do. It's a string. */
401 dataSize
= (int32_t)(str
->fLimit
- str
->fPos
);
402 if (f
->fFileno
== 0 && dataSize
> 0) {
403 /* Don't read from stdin too many times. There is still some data. */
407 /* shift the buffer if it isn't empty */
409 uprv_memmove(f
->fUCBuffer
, str
->fPos
, dataSize
* sizeof(UChar
));
413 /* record how much buffer space is available */
414 availLength
= UFILE_UCHARBUFFER_SIZE
- dataSize
;
416 /* Determine the # of codepage bytes needed to fill our UChar buffer */
417 /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/
418 maxCPBytes
= availLength
/ (f
->fConverter
!=NULL
?(2*ucnv_getMinCharSize(f
->fConverter
)):1);
420 /* Read in the data to convert */
421 if (f
->fFileno
== 0) {
422 /* Special case. Read from stdin one line at a time. */
423 char *retStr
= fgets(charBuffer
, ufmt_min(maxCPBytes
, UFILE_CHARBUFFER_SIZE
), f
->fFile
);
424 bytesRead
= (int32_t)(retStr
? uprv_strlen(charBuffer
) : 0);
428 bytesRead
= (int32_t)fread(charBuffer
,
430 ufmt_min(maxCPBytes
, UFILE_CHARBUFFER_SIZE
),
434 /* Set up conversion parameters */
435 status
= U_ZERO_ERROR
;
436 mySource
= charBuffer
;
437 mySourceEnd
= charBuffer
+ bytesRead
;
438 myTarget
= f
->fUCBuffer
+ dataSize
;
439 bufferSize
= UFILE_UCHARBUFFER_SIZE
;
441 if(f
->fConverter
!= NULL
) { /* We have a valid converter */
442 /* Perform the conversion */
443 ucnv_toUnicode(f
->fConverter
,
445 f
->fUCBuffer
+ bufferSize
,
449 (UBool
)(feof(f
->fFile
) != 0),
452 } else { /*weiv: do the invariant conversion */
453 u_charsToUChars(mySource
, myTarget
, bytesRead
);
454 myTarget
+= bytesRead
;
457 /* update the pointers into our array */
458 str
->fPos
= str
->fBuffer
;
459 str
->fLimit
= myTarget
;
462 U_CAPI UChar
* U_EXPORT2
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
473 u_localized_string
*str
;
476 /* Caller screwed up. We need to write the null terminatior. */
480 /* fill the buffer if needed */
482 if (str
->fPos
>= str
->fLimit
) {
483 ufile_fill_uchar_buffer(f
);
486 /* subtract 1 from n to compensate for the terminator */
489 /* determine the amount of data in the buffer */
490 dataSize
= (int32_t)(str
->fLimit
- str
->fPos
);
492 /* if 0 characters were left, return 0 */
496 /* otherwise, iteratively fill the buffer and copy */
500 while (dataSize
> 0 && count
< n
) {
503 /* Find how much to copy */
504 if (dataSize
< (n
- count
)) {
508 limit
= alias
+ (n
- count
);
512 /* Copy UChars until we find the first occurrence of a delimiter character */
513 while (alias
< limit
&& !IS_FIRST_STRING_DELIMITER(*alias
)) {
515 *(sItr
++) = *(alias
++);
517 /* Preserve the newline */
518 if (alias
< limit
&& IS_FIRST_STRING_DELIMITER(*alias
)) {
519 if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias
)) {
523 currDelim
= 1; /* This isn't a newline, but it's used to say
524 that we should break later. We've checked all
525 possible newline combinations even across buffer
529 *(sItr
++) = *(alias
++);
532 /* If we have a CRLF combination, preserve that too. */
534 if (currDelim
&& IS_COMBINED_STRING_DELIMITER(currDelim
, *alias
)) {
536 *(sItr
++) = *(alias
++);
538 currDelim
= 1; /* This isn't a newline, but it's used to say
539 that we should break later. We've checked all
540 possible newline combinations even across buffer
544 /* update the current buffer position */
547 /* if we found a delimiter */
548 if (currDelim
== 1) {
553 /* refill the buffer */
554 ufile_fill_uchar_buffer(f
);
556 /* determine the amount of data in the buffer */
557 dataSize
= (int32_t)(str
->fLimit
- str
->fPos
);
560 /* add the terminator and return s */
565 U_CFUNC UBool U_EXPORT2
566 ufile_getch(UFILE
*f
, UChar
*ch
)
568 UBool isValidChar
= FALSE
;
571 /* if we have an available character in the buffer, return it */
572 if(f
->str
.fPos
< f
->str
.fLimit
){
573 *ch
= *(f
->str
.fPos
)++;
577 /* otherwise, fill the buffer and return the next character */
578 if(f
->str
.fPos
>= f
->str
.fLimit
) {
579 ufile_fill_uchar_buffer(f
);
581 if(f
->str
.fPos
< f
->str
.fLimit
) {
582 *ch
= *(f
->str
.fPos
)++;
589 U_CAPI UChar U_EXPORT2
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
597 U_CFUNC UBool U_EXPORT2
598 ufile_getch32(UFILE
*f
, UChar32
*c32
)
600 UBool isValidChar
= FALSE
;
601 u_localized_string
*str
;
605 /* Fill the buffer if it is empty */
607 if (f
&& str
->fPos
+ 1 >= str
->fLimit
) {
608 ufile_fill_uchar_buffer(f
);
611 /* Get the next character in the buffer */
612 if (str
->fPos
< str
->fLimit
) {
613 *c32
= *(str
->fPos
)++;
614 if (U_IS_LEAD(*c32
)) {
615 if (str
->fPos
< str
->fLimit
) {
616 UChar c16
= *(str
->fPos
)++;
617 *c32
= U16_GET_SUPPLEMENTARY(*c32
, c16
);
632 U_CAPI UChar32 U_EXPORT2
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
636 ufile_getch32(f
, &ch
);
640 U_CAPI UChar32 U_EXPORT2
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
641 u_fungetc(UChar32 ch
,
644 u_localized_string
*str
;
648 /* if we're at the beginning of the buffer, sorry! */
649 if (str
->fPos
== str
->fBuffer
650 || (U_IS_LEAD(ch
) && (str
->fPos
- 1) == str
->fBuffer
))
655 /* otherwise, put the character back */
656 /* Remember, read them back on in the reverse order. */
658 if (*--(str
->fPos
) != U16_TRAIL(ch
)
659 || *--(str
->fPos
) != U16_LEAD(ch
))
664 else if (*--(str
->fPos
) != ch
) {
671 U_CAPI
int32_t U_EXPORT2
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
672 u_file_read( UChar
*chars
,
678 u_localized_string
*str
= &f
->str
;
682 /* determine the amount of data in the buffer */
683 dataSize
= (int32_t)(str
->fLimit
- str
->fPos
);
685 /* fill the buffer */
686 ufile_fill_uchar_buffer(f
);
687 dataSize
= (int32_t)(str
->fLimit
- str
->fPos
);
690 /* Make sure that we don't read too much */
691 if (dataSize
> (count
- read
)) {
692 dataSize
= count
- read
;
695 /* copy the current data in the buffer */
696 memcpy(chars
+ read
, str
->fPos
, dataSize
* sizeof(UChar
));
698 /* update number of items read */
701 /* update the current buffer position */
702 str
->fPos
+= dataSize
;
704 while (dataSize
!= 0 && read
< count
);