2 ****************************************************************************** 
   4 *   Copyright (C) 1998-2006, International Business Machines 
   5 *   Corporation and others.  All Rights Reserved. 
   7 ****************************************************************************** 
  11 * Modification History: 
  13 *   Date        Name        Description 
  14 *   11/18/98    stephen     Creation. 
  15 *   03/12/99    stephen     Modified for new C API. 
  16 *   07/19/99    stephen     Fixed read() and gets() 
  17 ****************************************************************************** 
  20 #include "unicode/ustdio.h" 
  21 #include "unicode/putil.h" 
  26 #include "unicode/ucnv.h" 
  27 #include "unicode/ustring.h" 
  31 #define DELIM_LF 0x000A 
  32 #define DELIM_VT 0x000B 
  33 #define DELIM_FF 0x000C 
  34 #define DELIM_CR 0x000D 
  35 #define DELIM_NEL 0x0085 
  36 #define DELIM_LS 0x2028 
  37 #define DELIM_PS 0x2029 
  39 /* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */ 
  41 static const UChar DELIMITERS 
[] = { DELIM_CR
, DELIM_LF
, 0x0000 }; 
  42 static const uint32_t DELIMITERS_LEN 
= 2; 
  43 /* TODO: Default newline writing should be detected based upon the converter being used. */ 
  45 static const UChar DELIMITERS 
[] = { DELIM_LF
, 0x0000 }; 
  46 static const uint32_t DELIMITERS_LEN 
= 1; 
  49 #define IS_FIRST_STRING_DELIMITER(c1) \ 
  50  (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \ 
  51         || (c1) == DELIM_NEL \ 
  54 #define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR) 
  55 #define IS_COMBINED_STRING_DELIMITER(c1, c2) \ 
  56  (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF) 
  59 #if !UCONFIG_NO_TRANSLITERATION 
  61 U_CAPI UTransliterator
* U_EXPORT2
 
  62 u_fsettransliterator(UFILE 
*file
, UFileDirection direction
, 
  63                      UTransliterator 
*adopt
, UErrorCode 
*status
) 
  65     UTransliterator 
*old 
= NULL
; 
  67     if(U_FAILURE(*status
)) 
  74         *status 
= U_ILLEGAL_ARGUMENT_ERROR
; 
  78     if(direction 
& U_READ
) 
  80         /** TODO: implement */ 
  81         *status 
= U_UNSUPPORTED_ERROR
; 
  85     if(adopt 
== NULL
) /* they are clearing it */ 
  87         if(file
->fTranslit 
!= NULL
) 
  89             /* TODO: Check side */ 
  90             old 
= file
->fTranslit
->translit
; 
  91             uprv_free(file
->fTranslit
->buffer
); 
  92             file
->fTranslit
->buffer
=NULL
; 
  93             uprv_free(file
->fTranslit
); 
  99         if(file
->fTranslit 
== NULL
) 
 101             file
->fTranslit 
= (UFILETranslitBuffer
*) uprv_malloc(sizeof(UFILETranslitBuffer
)); 
 104                 *status 
= U_MEMORY_ALLOCATION_ERROR
; 
 107             file
->fTranslit
->capacity 
= 0; 
 108             file
->fTranslit
->length 
= 0; 
 109             file
->fTranslit
->pos 
= 0; 
 110             file
->fTranslit
->buffer 
= NULL
; 
 114             old 
= file
->fTranslit
->translit
; 
 115             ufile_flush_translit(file
); 
 118         file
->fTranslit
->translit 
= adopt
; 
 124 static const UChar 
* u_file_translit(UFILE 
*f
, const UChar 
*src
, int32_t *count
, UBool flush
) 
 127     int32_t junkCount 
= 0; 
 131     UErrorCode status 
= U_ZERO_ERROR
; 
 138     if ((!f
)||(!f
->fTranslit
)||(!f
->fTranslit
->translit
)) 
 144     /* First: slide over everything */ 
 145     if(f
->fTranslit
->length 
> f
->fTranslit
->pos
) 
 147         memmove(f
->fTranslit
->buffer
, f
->fTranslit
->buffer 
+ f
->fTranslit
->pos
, 
 148             (f
->fTranslit
->length 
- f
->fTranslit
->pos
)*sizeof(UChar
)); 
 150     f
->fTranslit
->length 
-= f
->fTranslit
->pos
; /* always */ 
 151     f
->fTranslit
->pos 
= 0; 
 153     /* Calculate new buffer size needed */ 
 154     newlen 
= (*count 
+ f
->fTranslit
->length
) * 4; 
 156     if(newlen 
> f
->fTranslit
->capacity
) 
 158         if(f
->fTranslit
->buffer 
== NULL
) 
 160             f
->fTranslit
->buffer 
= (UChar
*)uprv_malloc(newlen 
* sizeof(UChar
)); 
 164             f
->fTranslit
->buffer 
= (UChar
*)uprv_realloc(f
->fTranslit
->buffer
, newlen 
* sizeof(UChar
)); 
 166         f
->fTranslit
->capacity 
= newlen
; 
 169     /* Now, copy any data over */ 
 170     u_strncpy(f
->fTranslit
->buffer 
+ f
->fTranslit
->length
, 
 173     f
->fTranslit
->length 
+= *count
; 
 175     /* Now, translit in place as much as we can  */ 
 178         textLength 
= f
->fTranslit
->length
; 
 179         pos
.contextStart 
= 0; 
 180         pos
.contextLimit 
= textLength
; 
 182         pos
.limit        
= textLength
; 
 184         utrans_transIncrementalUChars(f
->fTranslit
->translit
, 
 185             f
->fTranslit
->buffer
, /* because we shifted */ 
 187             f
->fTranslit
->capacity
, 
 191         /* now: start/limit point to the transliterated text */ 
 192         /* Transliterated is [buffer..pos.start) */ 
 194         f
->fTranslit
->pos 
= pos
.start
; 
 195         f
->fTranslit
->length 
= pos
.limit
; 
 197         return f
->fTranslit
->buffer
; 
 201         textLength 
= f
->fTranslit
->length
; 
 202         textLimit 
= f
->fTranslit
->length
; 
 204         utrans_transUChars(f
->fTranslit
->translit
, 
 205             f
->fTranslit
->buffer
, 
 207             f
->fTranslit
->capacity
, 
 212         /* out: converted len */ 
 215         /* Set pointers to 0 */ 
 216         f
->fTranslit
->pos 
= 0; 
 217         f
->fTranslit
->length 
= 0; 
 219         return f
->fTranslit
->buffer
; 
 226 ufile_flush_translit(UFILE 
*f
) 
 228 #if !UCONFIG_NO_TRANSLITERATION 
 229     if((!f
)||(!f
->fTranslit
)) 
 233     u_file_write_flush(NULL
, 0, f
, FALSE
, TRUE
); 
 238 ufile_close_translit(UFILE 
*f
) 
 240 #if !UCONFIG_NO_TRANSLITERATION 
 241     if((!f
)||(!f
->fTranslit
)) 
 245     ufile_flush_translit(f
); 
 247 #if !UCONFIG_NO_TRANSLITERATION 
 248     if(f
->fTranslit
->translit
) 
 249         utrans_close(f
->fTranslit
->translit
); 
 251     if(f
->fTranslit
->buffer
) 
 253         uprv_free(f
->fTranslit
->buffer
); 
 256     uprv_free(f
->fTranslit
); 
 264 U_CAPI 
int32_t U_EXPORT2 
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 
 265 u_fputs(const UChar    
*s
, 
 268     int32_t count 
= u_file_write(s
, u_strlen(s
), f
); 
 269     count 
+= u_file_write(DELIMITERS
, DELIMITERS_LEN
, f
); 
 273 U_CAPI UChar32 U_EXPORT2 
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 
 279     UBool isError 
= FALSE
; 
 281     U16_APPEND(buf
, idx
, sizeof(buf
)/sizeof(*buf
), uc
, isError
); 
 285     return u_file_write(buf
, idx
, f
) == idx 
? uc 
: U_EOF
; 
 289 U_CAPI 
int32_t U_EXPORT2
 
 290 u_file_write_flush(const UChar 
*chars
, 
 296     /* Set up conversion parameters */ 
 297     UErrorCode  status       
= U_ZERO_ERROR
; 
 298     const UChar 
*mySource    
= chars
; 
 299     const UChar 
*mySourceEnd
; 
 300     char        charBuffer
[UFILE_CHARBUFFER_SIZE
]; 
 301     char        *myTarget   
= charBuffer
; 
 303     int32_t     numConverted 
= 0; 
 306         count 
= u_strlen(chars
); 
 309 #if !UCONFIG_NO_TRANSLITERATION 
 310     if((f
->fTranslit
) && (f
->fTranslit
->translit
)) 
 312         /* Do the transliteration */ 
 313         mySource 
= u_file_translit(f
, chars
, &count
, flushTranslit
); 
 317     /* Write to a string. */ 
 319         int32_t charsLeft 
= (int32_t)(f
->str
.fLimit 
- f
->str
.fPos
); 
 320         if (flushIO 
&& charsLeft 
> count
) { 
 323         written 
= ufmt_min(count
, charsLeft
); 
 324         u_strncpy(f
->str
.fPos
, mySource
, written
); 
 325         f
->str
.fPos 
+= written
; 
 329     mySourceEnd 
= mySource 
+ count
; 
 331     /* Perform the conversion in a loop */ 
 333         status     
= U_ZERO_ERROR
; 
 334         if(f
->fConverter 
!= NULL
) { /* We have a valid converter */ 
 335             ucnv_fromUnicode(f
->fConverter
, 
 337                 charBuffer 
+ UFILE_CHARBUFFER_SIZE
, 
 343         } else { /*weiv: do the invariant conversion */ 
 344             u_UCharsToChars(mySource
, myTarget
, count
); 
 347         numConverted 
= (int32_t)(myTarget 
- charBuffer
); 
 349         if (numConverted 
> 0) { 
 350             /* write the converted bytes */ 
 356             written     
+= numConverted
; 
 358         myTarget     
= charBuffer
; 
 360     while(status 
== U_BUFFER_OVERFLOW_ERROR
); 
 362     /* return # of chars written */ 
 366 U_CAPI 
int32_t U_EXPORT2 
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 
 367 u_file_write(    const UChar     
*chars
, 
 371     return u_file_write_flush(chars
,count
,f
,FALSE
,FALSE
); 
 375 /* private function used for buffering input */ 
 377 ufile_fill_uchar_buffer(UFILE 
*f
) 
 380     const char  *mySource
; 
 381     const char  *mySourceEnd
; 
 388     char        charBuffer
[UFILE_CHARBUFFER_SIZE
]; 
 389     u_localized_string 
*str
; 
 391     if (f
->fFile 
== NULL
) { 
 392         /* There is nothing to do. It's a string. */ 
 397     dataSize 
= (int32_t)(str
->fLimit 
- str
->fPos
); 
 398     if (f
->fFileno 
== 0 && dataSize 
> 0) { 
 399         /* Don't read from stdin too many times. There is still some data. */ 
 403     /* shift the buffer if it isn't empty */ 
 405         uprv_memmove(f
->fUCBuffer
, str
->fPos
, dataSize 
* sizeof(UChar
)); 
 409     /* record how much buffer space is available */ 
 410     availLength 
= UFILE_UCHARBUFFER_SIZE 
- dataSize
; 
 412     /* Determine the # of codepage bytes needed to fill our UChar buffer */ 
 413     /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/ 
 414     maxCPBytes 
= availLength 
/ (f
->fConverter
!=NULL
?(2*ucnv_getMinCharSize(f
->fConverter
)):1); 
 416     /* Read in the data to convert */ 
 417     if (f
->fFileno 
== 0) { 
 418         /* Special case. Read from stdin one line at a time. */ 
 419         char *retStr 
= fgets(charBuffer
, ufmt_min(maxCPBytes
, UFILE_CHARBUFFER_SIZE
), f
->fFile
); 
 420         bytesRead 
= (int32_t)(retStr 
? uprv_strlen(charBuffer
) : 0); 
 424         bytesRead 
= (int32_t)fread(charBuffer
, 
 426             ufmt_min(maxCPBytes
, UFILE_CHARBUFFER_SIZE
), 
 430     /* Set up conversion parameters */ 
 431     status      
= U_ZERO_ERROR
; 
 432     mySource    
= charBuffer
; 
 433     mySourceEnd 
= charBuffer 
+ bytesRead
; 
 434     myTarget    
= f
->fUCBuffer 
+ dataSize
; 
 435     bufferSize  
= UFILE_UCHARBUFFER_SIZE
; 
 437     if(f
->fConverter 
!= NULL
) { /* We have a valid converter */ 
 438         /* Perform the conversion */ 
 439         ucnv_toUnicode(f
->fConverter
, 
 441             f
->fUCBuffer 
+ bufferSize
, 
 445             (UBool
)(feof(f
->fFile
) != 0), 
 448     } else { /*weiv: do the invariant conversion */ 
 449         u_charsToUChars(mySource
, myTarget
, bytesRead
); 
 450         myTarget 
+= bytesRead
; 
 453     /* update the pointers into our array */ 
 454     str
->fPos    
= str
->fBuffer
; 
 455     str
->fLimit  
= myTarget
; 
 458 U_CAPI UChar
* U_EXPORT2 
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 
 469     u_localized_string 
*str
; 
 472         /* Caller screwed up. We need to write the null terminatior. */ 
 476     /* fill the buffer if needed */ 
 478     if (str
->fPos 
>= str
->fLimit
) { 
 479         ufile_fill_uchar_buffer(f
); 
 482     /* subtract 1 from n to compensate for the terminator */ 
 485     /* determine the amount of data in the buffer */ 
 486     dataSize 
= (int32_t)(str
->fLimit 
- str
->fPos
); 
 488     /* if 0 characters were left, return 0 */ 
 492     /* otherwise, iteratively fill the buffer and copy */ 
 496     while (dataSize 
> 0 && count 
< n
) { 
 499         /* Find how much to copy */ 
 500         if (dataSize 
< (n 
- count
)) { 
 504             limit 
= alias 
+ (n 
- count
); 
 508             /* Copy UChars until we find the first occurrence of a delimiter character */ 
 509             while (alias 
< limit 
&& !IS_FIRST_STRING_DELIMITER(*alias
)) { 
 511                 *(sItr
++) = *(alias
++); 
 513             /* Preserve the newline */ 
 514             if (alias 
< limit 
&& IS_FIRST_STRING_DELIMITER(*alias
)) { 
 515                 if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias
)) { 
 519                     currDelim 
= 1;  /* This isn't a newline, but it's used to say 
 520                                     that we should break later. We've checked all 
 521                                     possible newline combinations even across buffer 
 525                 *(sItr
++) = *(alias
++); 
 528         /* If we have a CRLF combination, preserve that too. */ 
 530             if (currDelim 
&& IS_COMBINED_STRING_DELIMITER(currDelim
, *alias
)) { 
 532                 *(sItr
++) = *(alias
++); 
 534             currDelim 
= 1;  /* This isn't a newline, but it's used to say 
 535                             that we should break later. We've checked all 
 536                             possible newline combinations even across buffer 
 540         /* update the current buffer position */ 
 543         /* if we found a delimiter */ 
 544         if (currDelim 
== 1) { 
 549         /* refill the buffer */ 
 550         ufile_fill_uchar_buffer(f
); 
 552         /* determine the amount of data in the buffer */ 
 553         dataSize 
= (int32_t)(str
->fLimit 
- str
->fPos
); 
 556     /* add the terminator and return s */ 
 561 U_CFUNC UBool U_EXPORT2
 
 562 ufile_getch(UFILE 
*f
, UChar 
*ch
) 
 564     UBool isValidChar 
= FALSE
; 
 567     /* if we have an available character in the buffer, return it */ 
 568     if(f
->str
.fPos 
< f
->str
.fLimit
){ 
 569         *ch 
= *(f
->str
.fPos
)++; 
 573         /* otherwise, fill the buffer and return the next character */ 
 574         if(f
->str
.fPos 
>= f
->str
.fLimit
) { 
 575             ufile_fill_uchar_buffer(f
); 
 577         if(f
->str
.fPos 
< f
->str
.fLimit
) { 
 578             *ch 
= *(f
->str
.fPos
)++; 
 585 U_CAPI UChar U_EXPORT2 
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 
 593 U_CFUNC UBool U_EXPORT2
 
 594 ufile_getch32(UFILE 
*f
, UChar32 
*c32
) 
 596     UBool isValidChar 
= FALSE
; 
 597     u_localized_string 
*str
; 
 601     /* Fill the buffer if it is empty */ 
 603     if (f 
&& str
->fPos 
+ 1 >= str
->fLimit
) { 
 604         ufile_fill_uchar_buffer(f
); 
 607     /* Get the next character in the buffer */ 
 608     if (str
->fPos 
< str
->fLimit
) { 
 609         *c32 
= *(str
->fPos
)++; 
 610         if (U_IS_LEAD(*c32
)) { 
 611             if (str
->fPos 
< str
->fLimit
) { 
 612                 UChar c16 
= *(str
->fPos
)++; 
 613                 *c32 
= U16_GET_SUPPLEMENTARY(*c32
, c16
); 
 628 U_CAPI UChar32 U_EXPORT2 
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 
 632     ufile_getch32(f
, &ch
); 
 636 U_CAPI UChar32 U_EXPORT2 
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 
 637 u_fungetc(UChar32        ch
, 
 640     u_localized_string 
*str
; 
 644     /* if we're at the beginning of the buffer, sorry! */ 
 645     if (str
->fPos 
== str
->fBuffer
 
 646         || (U_IS_LEAD(ch
) && (str
->fPos 
- 1) == str
->fBuffer
)) 
 651         /* otherwise, put the character back */ 
 652         /* Remember, read them back on in the reverse order. */ 
 654             if (*--(str
->fPos
) != U16_TRAIL(ch
) 
 655                 || *--(str
->fPos
) != U16_LEAD(ch
)) 
 660         else if (*--(str
->fPos
) != ch
) { 
 667 U_CAPI 
int32_t U_EXPORT2 
/* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */ 
 668 u_file_read(    UChar        
*chars
, 
 674     u_localized_string 
*str 
= &f
->str
; 
 678         /* determine the amount of data in the buffer */ 
 679         dataSize 
= (int32_t)(str
->fLimit 
- str
->fPos
); 
 681             /* fill the buffer */ 
 682             ufile_fill_uchar_buffer(f
); 
 683             dataSize 
= (int32_t)(str
->fLimit 
- str
->fPos
); 
 686         /* Make sure that we don't read too much */ 
 687         if (dataSize 
> (count 
- read
)) { 
 688             dataSize 
= count 
- read
; 
 691         /* copy the current data in the buffer */ 
 692         memcpy(chars 
+ read
, str
->fPos
, dataSize 
* sizeof(UChar
)); 
 694         /* update number of items read */ 
 697         /* update the current buffer position */ 
 698         str
->fPos 
+= dataSize
; 
 700     while (dataSize 
!= 0 && read 
< count
);