icuSources/io/ustdio.c

   1 /*
   2  ******************************************************************************
   3  *
   4  *   Copyright (C) 1998-2008, International Business Machines
   5  *   Corporation and others.  All Rights Reserved.
   6  *
   7  ******************************************************************************
   8  *
   9  * File ustdio.c
  10  *
  11  * Modification History:
  12  *
  13  *   Date        Name        Description
  14  *   11/18/98    stephen     Creation.
  15  *   03/12/99    stephen     Modified for new C API.
  16  *   07/19/99    stephen     Fixed read() and gets()
  17  ******************************************************************************
  18  */
  19
  20 #include "unicode/ustdio.h"
  21 #include "unicode/putil.h"
  22 #include "cmemory.h"
  23 #include "cstring.h"
  24 #include "ufile.h"
  25 #include "ufmt_cmn.h"
  26 #include "unicode/ucnv.h"
  27 #include "unicode/ustring.h"
  28
  29 #include <string.h>
  30
  31 #define DELIM_LF 0x000A
  32 #define DELIM_VT 0x000B
  33 #define DELIM_FF 0x000C
  34 #define DELIM_CR 0x000D
  35 #define DELIM_NEL 0x0085
  36 #define DELIM_LS 0x2028
  37 #define DELIM_PS 0x2029
  38
  39 /* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */
  40 #ifdef U_WINDOWS
  41 static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 };
  42 static const uint32_t DELIMITERS_LEN = 2;
  43 /* TODO: Default newline writing should be detected based upon the converter being used. */
  44 #else
  45 static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 };
  46 static const uint32_t DELIMITERS_LEN = 1;
  47 #endif
  48
  49 #define IS_FIRST_STRING_DELIMITER(c1) \
  50  (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \
  51         || (c1) == DELIM_NEL \
  52         || (c1) == DELIM_LS \
  53         || (c1) == DELIM_PS)
  54 #define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR)
  55 #define IS_COMBINED_STRING_DELIMITER(c1, c2) \
  56  (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF)
  57
  58
  59 #if !UCONFIG_NO_TRANSLITERATION
  60
  61 U_CAPI UTransliterator* U_EXPORT2
  62 u_fsettransliterator(UFILE *file, UFileDirection direction,
  63                      UTransliterator *adopt, UErrorCode *status)
  64 {
  65     UTransliterator *old = NULL;
  66
  67     if(U_FAILURE(*status))
  68     {
  69         return adopt;
  70     }
  71
  72     if(!file)
  73     {
  74         *status = U_ILLEGAL_ARGUMENT_ERROR;
  75         return adopt;
  76     }
  77
  78     if(direction & U_READ)
  79     {
  80         /** TODO: implement */
  81         *status = U_UNSUPPORTED_ERROR;
  82         return adopt;
  83     }
  84
  85     if(adopt == NULL) /* they are clearing it */
  86     {
  87         if(file->fTranslit != NULL)
  88         {
  89             /* TODO: Check side */
  90             old = file->fTranslit->translit;
  91             uprv_free(file->fTranslit->buffer);
  92             file->fTranslit->buffer=NULL;
  93             uprv_free(file->fTranslit);
  94             file->fTranslit=NULL;
  95         }
  96     }
  97     else
  98     {
  99         if(file->fTranslit == NULL)
 100         {
 101             file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer));
 102             if(!file->fTranslit)
 103             {
 104                 *status = U_MEMORY_ALLOCATION_ERROR;
 105                 return adopt;
 106             }
 107             file->fTranslit->capacity = 0;
 108             file->fTranslit->length = 0;
 109             file->fTranslit->pos = 0;
 110             file->fTranslit->buffer = NULL;
 111         }
 112         else
 113         {
 114             old = file->fTranslit->translit;
 115             ufile_flush_translit(file);
 116         }
 117
 118         file->fTranslit->translit = adopt;
 119     }
 120
 121     return old;
 122 }
 123
 124 static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush)
 125 {
 126     int32_t newlen;
 127     int32_t junkCount = 0;
 128     int32_t textLength;
 129     int32_t textLimit;
 130     UTransPosition pos;
 131     UErrorCode status = U_ZERO_ERROR;
 132
 133     if(count == NULL)
 134     {
 135         count = &junkCount;
 136     }
 137
 138     if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit))
 139     {
 140         /* fast path */
 141         return src;
 142     }
 143
 144     /* First: slide over everything */
 145     if(f->fTranslit->length > f->fTranslit->pos)
 146     {
 147         memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos,
 148             (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar));
 149     }
 150     f->fTranslit->length -= f->fTranslit->pos; /* always */
 151     f->fTranslit->pos = 0;
 152
 153     /* Calculate new buffer size needed */
 154     newlen = (*count + f->fTranslit->length) * 4;
 155
 156     if(newlen > f->fTranslit->capacity)
 157     {
 158         if(f->fTranslit->buffer == NULL)
 159         {
 160             f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar));
 161         }
 162         else
 163         {
 164             f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar));
 165         }
 166         /* Check for malloc/realloc failure. */
 167         if (f->fTranslit->buffer == NULL) {
 168                 return NULL;
 169         }
 170         f->fTranslit->capacity = newlen;
 171     }
 172
 173     /* Now, copy any data over */
 174     u_strncpy(f->fTranslit->buffer + f->fTranslit->length,
 175         src,
 176         *count);
 177     f->fTranslit->length += *count;
 178
 179     /* Now, translit in place as much as we can  */
 180     if(flush == FALSE)
 181     {
 182         textLength = f->fTranslit->length;
 183         pos.contextStart = 0;
 184         pos.contextLimit = textLength;
 185         pos.start        = 0;
 186         pos.limit        = textLength;
 187
 188         utrans_transIncrementalUChars(f->fTranslit->translit,
 189             f->fTranslit->buffer, /* because we shifted */
 190             &textLength,
 191             f->fTranslit->capacity,
 192             &pos,
 193             &status);
 194
 195         /* now: start/limit point to the transliterated text */
 196         /* Transliterated is [buffer..pos.start) */
 197         *count            = pos.start;
 198         f->fTranslit->pos = pos.start;
 199         f->fTranslit->length = pos.limit;
 200
 201         return f->fTranslit->buffer;
 202     }
 203     else
 204     {
 205         textLength = f->fTranslit->length;
 206         textLimit = f->fTranslit->length;
 207
 208         utrans_transUChars(f->fTranslit->translit,
 209             f->fTranslit->buffer,
 210             &textLength,
 211             f->fTranslit->capacity,
 212             0,
 213             &textLimit,
 214             &status);
 215
 216         /* out: converted len */
 217         *count = textLimit;
 218
 219         /* Set pointers to 0 */
 220         f->fTranslit->pos = 0;
 221         f->fTranslit->length = 0;
 222
 223         return f->fTranslit->buffer;
 224     }
 225 }
 226
 227 #endif
 228
 229 void
 230 ufile_flush_translit(UFILE *f)
 231 {
 232 #if !UCONFIG_NO_TRANSLITERATION
 233     if((!f)||(!f->fTranslit))
 234         return;
 235 #endif
 236
 237     u_file_write_flush(NULL, 0, f, FALSE, TRUE);
 238 }
 239
 240
 241 void
 242 ufile_close_translit(UFILE *f)
 243 {
 244 #if !UCONFIG_NO_TRANSLITERATION
 245     if((!f)||(!f->fTranslit))
 246         return;
 247 #endif
 248
 249     ufile_flush_translit(f);
 250
 251 #if !UCONFIG_NO_TRANSLITERATION
 252     if(f->fTranslit->translit)
 253         utrans_close(f->fTranslit->translit);
 254
 255     if(f->fTranslit->buffer)
 256     {
 257         uprv_free(f->fTranslit->buffer);
 258     }
 259
 260     uprv_free(f->fTranslit);
 261     f->fTranslit = NULL;
 262 #endif
 263 }
 264
 265
 266 /* Input/output */
 267
 268 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
 269 u_fputs(const UChar    *s,
 270         UFILE        *f)
 271 {
 272     int32_t count = u_file_write(s, u_strlen(s), f);
 273     count += u_file_write(DELIMITERS, DELIMITERS_LEN, f);
 274     return count;
 275 }
 276
 277 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
 278 u_fputc(UChar32      uc,
 279         UFILE        *f)
 280 {
 281     UChar buf[2];
 282     int32_t idx = 0;
 283     UBool isError = FALSE;
 284
 285     U16_APPEND(buf, idx, sizeof(buf)/sizeof(*buf), uc, isError);
 286     if (isError) {
 287         return U_EOF;
 288     }
 289     return u_file_write(buf, idx, f) == idx ? uc : U_EOF;
 290 }
 291
 292
 293 U_CFUNC int32_t U_EXPORT2
 294 u_file_write_flush(const UChar *chars,
 295                    int32_t     count,
 296                    UFILE       *f,
 297                    UBool       flushIO,
 298                    UBool       flushTranslit)
 299 {
 300     /* Set up conversion parameters */
 301     UErrorCode  status       = U_ZERO_ERROR;
 302     const UChar *mySource    = chars;
 303     const UChar *mySourceEnd;
 304     char        charBuffer[UFILE_CHARBUFFER_SIZE];
 305     char        *myTarget   = charBuffer;
 306     int32_t     written      = 0;
 307     int32_t     numConverted = 0;
 308
 309     if (count < 0) {
 310         count = u_strlen(chars);
 311     }
 312
 313 #if !UCONFIG_NO_TRANSLITERATION
 314     if((f->fTranslit) && (f->fTranslit->translit))
 315     {
 316         /* Do the transliteration */
 317         mySource = u_file_translit(f, chars, &count, flushTranslit);
 318     }
 319 #endif
 320
 321     /* Write to a string. */
 322     if (!f->fFile) {
 323         int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos);
 324         if (flushIO && charsLeft > count) {
 325             count++;
 326         }
 327         written = ufmt_min(count, charsLeft);
 328         u_strncpy(f->str.fPos, mySource, written);
 329         f->str.fPos += written;
 330         return written;
 331     }
 332
 333     mySourceEnd = mySource + count;
 334
 335     /* Perform the conversion in a loop */
 336     do {
 337         status     = U_ZERO_ERROR;
 338         if(f->fConverter != NULL) { /* We have a valid converter */
 339             ucnv_fromUnicode(f->fConverter,
 340                 &myTarget,
 341                 charBuffer + UFILE_CHARBUFFER_SIZE,
 342                 &mySource,
 343                 mySourceEnd,
 344                 NULL,
 345                 flushIO,
 346                 &status);
 347         } else { /*weiv: do the invariant conversion */
 348             u_UCharsToChars(mySource, myTarget, count);
 349             myTarget += count;
 350         }
 351         numConverted = (int32_t)(myTarget - charBuffer);
 352
 353         if (numConverted > 0) {
 354             /* write the converted bytes */
 355             fwrite(charBuffer,
 356                 sizeof(char),
 357                 numConverted,
 358                 f->fFile);
 359
 360             written     += numConverted;
 361         }
 362         myTarget     = charBuffer;
 363     }
 364     while(status == U_BUFFER_OVERFLOW_ERROR);
 365
 366     /* return # of chars written */
 367     return written;
 368 }
 369
 370 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
 371 u_file_write(    const UChar     *chars,
 372              int32_t        count,
 373              UFILE         *f)
 374 {
 375     return u_file_write_flush(chars,count,f,FALSE,FALSE);
 376 }
 377
 378
 379 /* private function used for buffering input */
 380 void
 381 ufile_fill_uchar_buffer(UFILE *f)
 382 {
 383     UErrorCode  status;
 384     const char  *mySource;
 385     const char  *mySourceEnd;
 386     UChar       *myTarget;
 387     int32_t     bufferSize;
 388     int32_t     maxCPBytes;
 389     int32_t     bytesRead;
 390     int32_t     availLength;
 391     int32_t     dataSize;
 392     char        charBuffer[UFILE_CHARBUFFER_SIZE];
 393     u_localized_string *str;
 394
 395     if (f->fFile == NULL) {
 396         /* There is nothing to do. It's a string. */
 397         return;
 398     }
 399
 400     str = &f->str;
 401     dataSize = (int32_t)(str->fLimit - str->fPos);
 402     if (f->fFileno == 0 && dataSize > 0) {
 403         /* Don't read from stdin too many times. There is still some data. */
 404         return;
 405     }
 406
 407     /* shift the buffer if it isn't empty */
 408     if(dataSize != 0) {
 409         uprv_memmove(f->fUCBuffer, str->fPos, dataSize * sizeof(UChar));
 410     }
 411
 412
 413     /* record how much buffer space is available */
 414     availLength = UFILE_UCHARBUFFER_SIZE - dataSize;
 415
 416     /* Determine the # of codepage bytes needed to fill our UChar buffer */
 417     /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/
 418     maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1);
 419
 420     /* Read in the data to convert */
 421     if (f->fFileno == 0) {
 422         /* Special case. Read from stdin one line at a time. */
 423         char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile);
 424         bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0);
 425     }
 426     else {
 427         /* A normal file */
 428         bytesRead = (int32_t)fread(charBuffer,
 429             sizeof(char),
 430             ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE),
 431             f->fFile);
 432     }
 433
 434     /* Set up conversion parameters */
 435     status      = U_ZERO_ERROR;
 436     mySource    = charBuffer;
 437     mySourceEnd = charBuffer + bytesRead;
 438     myTarget    = f->fUCBuffer + dataSize;
 439     bufferSize  = UFILE_UCHARBUFFER_SIZE;
 440
 441     if(f->fConverter != NULL) { /* We have a valid converter */
 442         /* Perform the conversion */
 443         ucnv_toUnicode(f->fConverter,
 444             &myTarget,
 445             f->fUCBuffer + bufferSize,
 446             &mySource,
 447             mySourceEnd,
 448             NULL,
 449             (UBool)(feof(f->fFile) != 0),
 450             &status);
 451
 452     } else { /*weiv: do the invariant conversion */
 453         u_charsToUChars(mySource, myTarget, bytesRead);
 454         myTarget += bytesRead;
 455     }
 456
 457     /* update the pointers into our array */
 458     str->fPos    = str->fBuffer;
 459     str->fLimit  = myTarget;
 460 }
 461
 462 U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
 463 u_fgets(UChar        *s,
 464         int32_t       n,
 465         UFILE        *f)
 466 {
 467     int32_t dataSize;
 468     int32_t count;
 469     UChar *alias;
 470     const UChar *limit;
 471     UChar *sItr;
 472     UChar currDelim = 0;
 473     u_localized_string *str;
 474
 475     if (n <= 0) {
 476         /* Caller screwed up. We need to write the null terminatior. */
 477         return NULL;
 478     }
 479
 480     /* fill the buffer if needed */
 481     str = &f->str;
 482     if (str->fPos >= str->fLimit) {
 483         ufile_fill_uchar_buffer(f);
 484     }
 485
 486     /* subtract 1 from n to compensate for the terminator */
 487     --n;
 488
 489     /* determine the amount of data in the buffer */
 490     dataSize = (int32_t)(str->fLimit - str->fPos);
 491
 492     /* if 0 characters were left, return 0 */
 493     if (dataSize == 0)
 494         return NULL;
 495
 496     /* otherwise, iteratively fill the buffer and copy */
 497     count = 0;
 498     sItr = s;
 499     currDelim = 0;
 500     while (dataSize > 0 && count < n) {
 501         alias = str->fPos;
 502
 503         /* Find how much to copy */
 504         if (dataSize < (n - count)) {
 505             limit = str->fLimit;
 506         }
 507         else {
 508             limit = alias + (n - count);
 509         }
 510
 511         if (!currDelim) {
 512             /* Copy UChars until we find the first occurrence of a delimiter character */
 513             while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) {
 514                 count++;
 515                 *(sItr++) = *(alias++);
 516             }
 517             /* Preserve the newline */
 518             if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) {
 519                 if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) {
 520                     currDelim = *alias;
 521                 }
 522                 else {
 523                     currDelim = 1;  /* This isn't a newline, but it's used to say
 524                                     that we should break later. We've checked all
 525                                     possible newline combinations even across buffer
 526                                     boundaries. */
 527                 }
 528                 count++;
 529                 *(sItr++) = *(alias++);
 530             }
 531         }
 532         /* If we have a CRLF combination, preserve that too. */
 533         if (alias < limit) {
 534             if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) {
 535                 count++;
 536                 *(sItr++) = *(alias++);
 537             }
 538             currDelim = 1;  /* This isn't a newline, but it's used to say
 539                             that we should break later. We've checked all
 540                             possible newline combinations even across buffer
 541                             boundaries. */
 542         }
 543
 544         /* update the current buffer position */
 545         str->fPos = alias;
 546
 547         /* if we found a delimiter */
 548         if (currDelim == 1) {
 549             /* break out */
 550             break;
 551         }
 552
 553         /* refill the buffer */
 554         ufile_fill_uchar_buffer(f);
 555
 556         /* determine the amount of data in the buffer */
 557         dataSize = (int32_t)(str->fLimit - str->fPos);
 558     }
 559
 560     /* add the terminator and return s */
 561     *sItr = 0x0000;
 562     return s;
 563 }
 564
 565 U_CFUNC UBool U_EXPORT2
 566 ufile_getch(UFILE *f, UChar *ch)
 567 {
 568     UBool isValidChar = FALSE;
 569
 570     *ch = U_EOF;
 571     /* if we have an available character in the buffer, return it */
 572     if(f->str.fPos < f->str.fLimit){
 573         *ch = *(f->str.fPos)++;
 574         isValidChar = TRUE;
 575     }
 576     else {
 577         /* otherwise, fill the buffer and return the next character */
 578         if(f->str.fPos >= f->str.fLimit) {
 579             ufile_fill_uchar_buffer(f);
 580         }
 581         if(f->str.fPos < f->str.fLimit) {
 582             *ch = *(f->str.fPos)++;
 583             isValidChar = TRUE;
 584         }
 585     }
 586     return isValidChar;
 587 }
 588
 589 U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
 590 u_fgetc(UFILE        *f)
 591 {
 592     UChar ch;
 593     ufile_getch(f, &ch);
 594     return ch;
 595 }
 596
 597 U_CFUNC UBool U_EXPORT2
 598 ufile_getch32(UFILE *f, UChar32 *c32)
 599 {
 600     UBool isValidChar = FALSE;
 601     u_localized_string *str;
 602
 603     *c32 = U_EOF;
 604
 605     /* Fill the buffer if it is empty */
 606     str = &f->str;
 607     if (f && str->fPos + 1 >= str->fLimit) {
 608         ufile_fill_uchar_buffer(f);
 609     }
 610
 611     /* Get the next character in the buffer */
 612     if (str->fPos < str->fLimit) {
 613         *c32 = *(str->fPos)++;
 614         if (U_IS_LEAD(*c32)) {
 615             if (str->fPos < str->fLimit) {
 616                 UChar c16 = *(str->fPos)++;
 617                 *c32 = U16_GET_SUPPLEMENTARY(*c32, c16);
 618                 isValidChar = TRUE;
 619             }
 620             else {
 621                 *c32 = U_EOF;
 622             }
 623         }
 624         else {
 625             isValidChar = TRUE;
 626         }
 627     }
 628
 629     return isValidChar;
 630 }
 631
 632 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
 633 u_fgetcx(UFILE        *f)
 634 {
 635     UChar32 ch;
 636     ufile_getch32(f, &ch);
 637     return ch;
 638 }
 639
 640 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
 641 u_fungetc(UChar32        ch,
 642     UFILE        *f)
 643 {
 644     u_localized_string *str;
 645
 646     str = &f->str;
 647
 648     /* if we're at the beginning of the buffer, sorry! */
 649     if (str->fPos == str->fBuffer
 650         || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer))
 651     {
 652         ch = U_EOF;
 653     }
 654     else {
 655         /* otherwise, put the character back */
 656         /* Remember, read them back on in the reverse order. */
 657         if (U_IS_LEAD(ch)) {
 658             if (*--(str->fPos) != U16_TRAIL(ch)
 659                 || *--(str->fPos) != U16_LEAD(ch))
 660             {
 661                 ch = U_EOF;
 662             }
 663         }
 664         else if (*--(str->fPos) != ch) {
 665             ch = U_EOF;
 666         }
 667     }
 668     return ch;
 669 }
 670
 671 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
 672 u_file_read(    UChar        *chars,
 673     int32_t        count,
 674     UFILE         *f)
 675 {
 676     int32_t dataSize;
 677     int32_t read = 0;
 678     u_localized_string *str = &f->str;
 679
 680     do {
 681
 682         /* determine the amount of data in the buffer */
 683         dataSize = (int32_t)(str->fLimit - str->fPos);
 684         if (dataSize <= 0) {
 685             /* fill the buffer */
 686             ufile_fill_uchar_buffer(f);
 687             dataSize = (int32_t)(str->fLimit - str->fPos);
 688         }
 689
 690         /* Make sure that we don't read too much */
 691         if (dataSize > (count - read)) {
 692             dataSize = count - read;
 693         }
 694
 695         /* copy the current data in the buffer */
 696         memcpy(chars + read, str->fPos, dataSize * sizeof(UChar));
 697
 698         /* update number of items read */
 699         read += dataSize;
 700
 701         /* update the current buffer position */
 702         str->fPos += dataSize;
 703     }
 704     while (dataSize != 0 && read < count);
 705
 706     return read;
 707 }