icuSources/io/ustdio.c

   1 /*
   2 ******************************************************************************
   3 *
   4 *   Copyright (C) 1998-2006, International Business Machines
   5 *   Corporation and others.  All Rights Reserved.
   6 *
   7 ******************************************************************************
   8 *
   9 * File ustdio.c
  10 *
  11 * Modification History:
  12 *
  13 *   Date        Name        Description
  14 *   11/18/98    stephen     Creation.
  15 *   03/12/99    stephen     Modified for new C API.
  16 *   07/19/99    stephen     Fixed read() and gets()
  17 ******************************************************************************
  18 */
  19
  20 #include "unicode/ustdio.h"
  21 #include "unicode/putil.h"
  22 #include "cmemory.h"
  23 #include "cstring.h"
  24 #include "ufile.h"
  25 #include "ufmt_cmn.h"
  26 #include "unicode/ucnv.h"
  27 #include "unicode/ustring.h"
  28
  29 #include <string.h>
  30
  31 #define DELIM_LF 0x000A
  32 #define DELIM_VT 0x000B
  33 #define DELIM_FF 0x000C
  34 #define DELIM_CR 0x000D
  35 #define DELIM_NEL 0x0085
  36 #define DELIM_LS 0x2028
  37 #define DELIM_PS 0x2029
  38
  39 /* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */
  40 #ifdef U_WINDOWS
  41 static const UChar DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 };
  42 static const uint32_t DELIMITERS_LEN = 2;
  43 /* TODO: Default newline writing should be detected based upon the converter being used. */
  44 #else
  45 static const UChar DELIMITERS [] = { DELIM_LF, 0x0000 };
  46 static const uint32_t DELIMITERS_LEN = 1;
  47 #endif
  48
  49 #define IS_FIRST_STRING_DELIMITER(c1) \
  50  (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \
  51         || (c1) == DELIM_NEL \
  52         || (c1) == DELIM_LS \
  53         || (c1) == DELIM_PS)
  54 #define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR)
  55 #define IS_COMBINED_STRING_DELIMITER(c1, c2) \
  56  (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF)
  57
  58
  59 #if !UCONFIG_NO_TRANSLITERATION
  60
  61 U_CAPI UTransliterator* U_EXPORT2
  62 u_fsettransliterator(UFILE *file, UFileDirection direction,
  63                      UTransliterator *adopt, UErrorCode *status)
  64 {
  65     UTransliterator *old = NULL;
  66
  67     if(U_FAILURE(*status))
  68     {
  69         return adopt;
  70     }
  71
  72     if(!file)
  73     {
  74         *status = U_ILLEGAL_ARGUMENT_ERROR;
  75         return adopt;
  76     }
  77
  78     if(direction & U_READ)
  79     {
  80         /** TODO: implement */
  81         *status = U_UNSUPPORTED_ERROR;
  82         return adopt;
  83     }
  84
  85     if(adopt == NULL) /* they are clearing it */
  86     {
  87         if(file->fTranslit != NULL)
  88         {
  89             /* TODO: Check side */
  90             old = file->fTranslit->translit;
  91             uprv_free(file->fTranslit->buffer);
  92             file->fTranslit->buffer=NULL;
  93             uprv_free(file->fTranslit);
  94             file->fTranslit=NULL;
  95         }
  96     }
  97     else
  98     {
  99         if(file->fTranslit == NULL)
 100         {
 101             file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer));
 102             if(!file->fTranslit)
 103             {
 104                 *status = U_MEMORY_ALLOCATION_ERROR;
 105                 return adopt;
 106             }
 107             file->fTranslit->capacity = 0;
 108             file->fTranslit->length = 0;
 109             file->fTranslit->pos = 0;
 110             file->fTranslit->buffer = NULL;
 111         }
 112         else
 113         {
 114             old = file->fTranslit->translit;
 115             ufile_flush_translit(file);
 116         }
 117
 118         file->fTranslit->translit = adopt;
 119     }
 120
 121     return old;
 122 }
 123
 124 static const UChar * u_file_translit(UFILE *f, const UChar *src, int32_t *count, UBool flush)
 125 {
 126     int32_t newlen;
 127     int32_t junkCount = 0;
 128     int32_t textLength;
 129     int32_t textLimit;
 130     UTransPosition pos;
 131     UErrorCode status = U_ZERO_ERROR;
 132
 133     if(count == NULL)
 134     {
 135         count = &junkCount;
 136     }
 137
 138     if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit))
 139     {
 140         /* fast path */
 141         return src;
 142     }
 143
 144     /* First: slide over everything */
 145     if(f->fTranslit->length > f->fTranslit->pos)
 146     {
 147         memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos,
 148             (f->fTranslit->length - f->fTranslit->pos)*sizeof(UChar));
 149     }
 150     f->fTranslit->length -= f->fTranslit->pos; /* always */
 151     f->fTranslit->pos = 0;
 152
 153     /* Calculate new buffer size needed */
 154     newlen = (*count + f->fTranslit->length) * 4;
 155
 156     if(newlen > f->fTranslit->capacity)
 157     {
 158         if(f->fTranslit->buffer == NULL)
 159         {
 160             f->fTranslit->buffer = (UChar*)uprv_malloc(newlen * sizeof(UChar));
 161         }
 162         else
 163         {
 164             f->fTranslit->buffer = (UChar*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(UChar));
 165         }
 166         f->fTranslit->capacity = newlen;
 167     }
 168
 169     /* Now, copy any data over */
 170     u_strncpy(f->fTranslit->buffer + f->fTranslit->length,
 171         src,
 172         *count);
 173     f->fTranslit->length += *count;
 174
 175     /* Now, translit in place as much as we can  */
 176     if(flush == FALSE)
 177     {
 178         textLength = f->fTranslit->length;
 179         pos.contextStart = 0;
 180         pos.contextLimit = textLength;
 181         pos.start        = 0;
 182         pos.limit        = textLength;
 183
 184         utrans_transIncrementalUChars(f->fTranslit->translit,
 185             f->fTranslit->buffer, /* because we shifted */
 186             &textLength,
 187             f->fTranslit->capacity,
 188             &pos,
 189             &status);
 190
 191         /* now: start/limit point to the transliterated text */
 192         /* Transliterated is [buffer..pos.start) */
 193         *count            = pos.start;
 194         f->fTranslit->pos = pos.start;
 195         f->fTranslit->length = pos.limit;
 196
 197         return f->fTranslit->buffer;
 198     }
 199     else
 200     {
 201         textLength = f->fTranslit->length;
 202         textLimit = f->fTranslit->length;
 203
 204         utrans_transUChars(f->fTranslit->translit,
 205             f->fTranslit->buffer,
 206             &textLength,
 207             f->fTranslit->capacity,
 208             0,
 209             &textLimit,
 210             &status);
 211
 212         /* out: converted len */
 213         *count = textLimit;
 214
 215         /* Set pointers to 0 */
 216         f->fTranslit->pos = 0;
 217         f->fTranslit->length = 0;
 218
 219         return f->fTranslit->buffer;
 220     }
 221 }
 222
 223 #endif
 224
 225 void
 226 ufile_flush_translit(UFILE *f)
 227 {
 228 #if !UCONFIG_NO_TRANSLITERATION
 229     if((!f)||(!f->fTranslit))
 230         return;
 231 #endif
 232
 233     u_file_write_flush(NULL, 0, f, FALSE, TRUE);
 234 }
 235
 236
 237 void
 238 ufile_close_translit(UFILE *f)
 239 {
 240 #if !UCONFIG_NO_TRANSLITERATION
 241     if((!f)||(!f->fTranslit))
 242         return;
 243 #endif
 244
 245     ufile_flush_translit(f);
 246
 247 #if !UCONFIG_NO_TRANSLITERATION
 248     if(f->fTranslit->translit)
 249         utrans_close(f->fTranslit->translit);
 250
 251     if(f->fTranslit->buffer)
 252     {
 253         uprv_free(f->fTranslit->buffer);
 254     }
 255
 256     uprv_free(f->fTranslit);
 257     f->fTranslit = NULL;
 258 #endif
 259 }
 260
 261
 262 /* Input/output */
 263
 264 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
 265 u_fputs(const UChar    *s,
 266         UFILE        *f)
 267 {
 268     int32_t count = u_file_write(s, u_strlen(s), f);
 269     count += u_file_write(DELIMITERS, DELIMITERS_LEN, f);
 270     return count;
 271 }
 272
 273 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
 274 u_fputc(UChar32      uc,
 275         UFILE        *f)
 276 {
 277     UChar buf[2];
 278     int32_t idx = 0;
 279     UBool isError = FALSE;
 280
 281     U16_APPEND(buf, idx, sizeof(buf)/sizeof(*buf), uc, isError);
 282     if (isError) {
 283         return U_EOF;
 284     }
 285     return u_file_write(buf, idx, f) == idx ? uc : U_EOF;
 286 }
 287
 288
 289 U_CAPI int32_t U_EXPORT2
 290 u_file_write_flush(const UChar *chars,
 291                    int32_t     count,
 292                    UFILE       *f,
 293                    UBool       flushIO,
 294                    UBool       flushTranslit)
 295 {
 296     /* Set up conversion parameters */
 297     UErrorCode  status       = U_ZERO_ERROR;
 298     const UChar *mySource    = chars;
 299     const UChar *mySourceEnd;
 300     char        charBuffer[UFILE_CHARBUFFER_SIZE];
 301     char        *myTarget   = charBuffer;
 302     int32_t     written      = 0;
 303     int32_t     numConverted = 0;
 304
 305     if (count < 0) {
 306         count = u_strlen(chars);
 307     }
 308
 309 #if !UCONFIG_NO_TRANSLITERATION
 310     if((f->fTranslit) && (f->fTranslit->translit))
 311     {
 312         /* Do the transliteration */
 313         mySource = u_file_translit(f, chars, &count, flushTranslit);
 314     }
 315 #endif
 316
 317     /* Write to a string. */
 318     if (!f->fFile) {
 319         int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos);
 320         if (flushIO && charsLeft > count) {
 321             count++;
 322         }
 323         written = ufmt_min(count, charsLeft);
 324         u_strncpy(f->str.fPos, mySource, written);
 325         f->str.fPos += written;
 326         return written;
 327     }
 328
 329     mySourceEnd = mySource + count;
 330
 331     /* Perform the conversion in a loop */
 332     do {
 333         status     = U_ZERO_ERROR;
 334         if(f->fConverter != NULL) { /* We have a valid converter */
 335             ucnv_fromUnicode(f->fConverter,
 336                 &myTarget,
 337                 charBuffer + UFILE_CHARBUFFER_SIZE,
 338                 &mySource,
 339                 mySourceEnd,
 340                 NULL,
 341                 flushIO,
 342                 &status);
 343         } else { /*weiv: do the invariant conversion */
 344             u_UCharsToChars(mySource, myTarget, count);
 345             myTarget += count;
 346         }
 347         numConverted = (int32_t)(myTarget - charBuffer);
 348
 349         if (numConverted > 0) {
 350             /* write the converted bytes */
 351             fwrite(charBuffer,
 352                 sizeof(char),
 353                 numConverted,
 354                 f->fFile);
 355
 356             written     += numConverted;
 357         }
 358         myTarget     = charBuffer;
 359     }
 360     while(status == U_BUFFER_OVERFLOW_ERROR);
 361
 362     /* return # of chars written */
 363     return written;
 364 }
 365
 366 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
 367 u_file_write(    const UChar     *chars,
 368              int32_t        count,
 369              UFILE         *f)
 370 {
 371     return u_file_write_flush(chars,count,f,FALSE,FALSE);
 372 }
 373
 374
 375 /* private function used for buffering input */
 376 void
 377 ufile_fill_uchar_buffer(UFILE *f)
 378 {
 379     UErrorCode  status;
 380     const char  *mySource;
 381     const char  *mySourceEnd;
 382     UChar       *myTarget;
 383     int32_t     bufferSize;
 384     int32_t     maxCPBytes;
 385     int32_t     bytesRead;
 386     int32_t     availLength;
 387     int32_t     dataSize;
 388     char        charBuffer[UFILE_CHARBUFFER_SIZE];
 389     u_localized_string *str;
 390
 391     if (f->fFile == NULL) {
 392         /* There is nothing to do. It's a string. */
 393         return;
 394     }
 395
 396     str = &f->str;
 397     dataSize = (int32_t)(str->fLimit - str->fPos);
 398     if (f->fFileno == 0 && dataSize > 0) {
 399         /* Don't read from stdin too many times. There is still some data. */
 400         return;
 401     }
 402
 403     /* shift the buffer if it isn't empty */
 404     if(dataSize != 0) {
 405         uprv_memmove(f->fUCBuffer, str->fPos, dataSize * sizeof(UChar));
 406     }
 407
 408
 409     /* record how much buffer space is available */
 410     availLength = UFILE_UCHARBUFFER_SIZE - dataSize;
 411
 412     /* Determine the # of codepage bytes needed to fill our UChar buffer */
 413     /* weiv: if converter is NULL, we use invariant converter with charwidth = 1)*/
 414     maxCPBytes = availLength / (f->fConverter!=NULL?(2*ucnv_getMinCharSize(f->fConverter)):1);
 415
 416     /* Read in the data to convert */
 417     if (f->fFileno == 0) {
 418         /* Special case. Read from stdin one line at a time. */
 419         char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile);
 420         bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0);
 421     }
 422     else {
 423         /* A normal file */
 424         bytesRead = (int32_t)fread(charBuffer,
 425             sizeof(char),
 426             ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE),
 427             f->fFile);
 428     }
 429
 430     /* Set up conversion parameters */
 431     status      = U_ZERO_ERROR;
 432     mySource    = charBuffer;
 433     mySourceEnd = charBuffer + bytesRead;
 434     myTarget    = f->fUCBuffer + dataSize;
 435     bufferSize  = UFILE_UCHARBUFFER_SIZE;
 436
 437     if(f->fConverter != NULL) { /* We have a valid converter */
 438         /* Perform the conversion */
 439         ucnv_toUnicode(f->fConverter,
 440             &myTarget,
 441             f->fUCBuffer + bufferSize,
 442             &mySource,
 443             mySourceEnd,
 444             NULL,
 445             (UBool)(feof(f->fFile) != 0),
 446             &status);
 447
 448     } else { /*weiv: do the invariant conversion */
 449         u_charsToUChars(mySource, myTarget, bytesRead);
 450         myTarget += bytesRead;
 451     }
 452
 453     /* update the pointers into our array */
 454     str->fPos    = str->fBuffer;
 455     str->fLimit  = myTarget;
 456 }
 457
 458 U_CAPI UChar* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
 459 u_fgets(UChar        *s,
 460         int32_t       n,
 461         UFILE        *f)
 462 {
 463     int32_t dataSize;
 464     int32_t count;
 465     UChar *alias;
 466     const UChar *limit;
 467     UChar *sItr;
 468     UChar currDelim = 0;
 469     u_localized_string *str;
 470
 471     if (n <= 0) {
 472         /* Caller screwed up. We need to write the null terminatior. */
 473         return NULL;
 474     }
 475
 476     /* fill the buffer if needed */
 477     str = &f->str;
 478     if (str->fPos >= str->fLimit) {
 479         ufile_fill_uchar_buffer(f);
 480     }
 481
 482     /* subtract 1 from n to compensate for the terminator */
 483     --n;
 484
 485     /* determine the amount of data in the buffer */
 486     dataSize = (int32_t)(str->fLimit - str->fPos);
 487
 488     /* if 0 characters were left, return 0 */
 489     if (dataSize == 0)
 490         return NULL;
 491
 492     /* otherwise, iteratively fill the buffer and copy */
 493     count = 0;
 494     sItr = s;
 495     currDelim = 0;
 496     while (dataSize > 0 && count < n) {
 497         alias = str->fPos;
 498
 499         /* Find how much to copy */
 500         if (dataSize < (n - count)) {
 501             limit = str->fLimit;
 502         }
 503         else {
 504             limit = alias + (n - count);
 505         }
 506
 507         if (!currDelim) {
 508             /* Copy UChars until we find the first occurrence of a delimiter character */
 509             while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) {
 510                 count++;
 511                 *(sItr++) = *(alias++);
 512             }
 513             /* Preserve the newline */
 514             if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) {
 515                 if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) {
 516                     currDelim = *alias;
 517                 }
 518                 else {
 519                     currDelim = 1;  /* This isn't a newline, but it's used to say
 520                                     that we should break later. We've checked all
 521                                     possible newline combinations even across buffer
 522                                     boundaries. */
 523                 }
 524                 count++;
 525                 *(sItr++) = *(alias++);
 526             }
 527         }
 528         /* If we have a CRLF combination, preserve that too. */
 529         if (alias < limit) {
 530             if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) {
 531                 count++;
 532                 *(sItr++) = *(alias++);
 533             }
 534             currDelim = 1;  /* This isn't a newline, but it's used to say
 535                             that we should break later. We've checked all
 536                             possible newline combinations even across buffer
 537                             boundaries. */
 538         }
 539
 540         /* update the current buffer position */
 541         str->fPos = alias;
 542
 543         /* if we found a delimiter */
 544         if (currDelim == 1) {
 545             /* break out */
 546             break;
 547         }
 548
 549         /* refill the buffer */
 550         ufile_fill_uchar_buffer(f);
 551
 552         /* determine the amount of data in the buffer */
 553         dataSize = (int32_t)(str->fLimit - str->fPos);
 554     }
 555
 556     /* add the terminator and return s */
 557     *sItr = 0x0000;
 558     return s;
 559 }
 560
 561 U_CFUNC UBool U_EXPORT2
 562 ufile_getch(UFILE *f, UChar *ch)
 563 {
 564     UBool isValidChar = FALSE;
 565
 566     *ch = U_EOF;
 567     /* if we have an available character in the buffer, return it */
 568     if(f->str.fPos < f->str.fLimit){
 569         *ch = *(f->str.fPos)++;
 570         isValidChar = TRUE;
 571     }
 572     else if (f) {
 573         /* otherwise, fill the buffer and return the next character */
 574         if(f->str.fPos >= f->str.fLimit) {
 575             ufile_fill_uchar_buffer(f);
 576         }
 577         if(f->str.fPos < f->str.fLimit) {
 578             *ch = *(f->str.fPos)++;
 579             isValidChar = TRUE;
 580         }
 581     }
 582     return isValidChar;
 583 }
 584
 585 U_CAPI UChar U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
 586 u_fgetc(UFILE        *f)
 587 {
 588     UChar ch;
 589     ufile_getch(f, &ch);
 590     return ch;
 591 }
 592
 593 U_CFUNC UBool U_EXPORT2
 594 ufile_getch32(UFILE *f, UChar32 *c32)
 595 {
 596     UBool isValidChar = FALSE;
 597     u_localized_string *str;
 598
 599     *c32 = U_EOF;
 600
 601     /* Fill the buffer if it is empty */
 602     str = &f->str;
 603     if (f && str->fPos + 1 >= str->fLimit) {
 604         ufile_fill_uchar_buffer(f);
 605     }
 606
 607     /* Get the next character in the buffer */
 608     if (str->fPos < str->fLimit) {
 609         *c32 = *(str->fPos)++;
 610         if (U_IS_LEAD(*c32)) {
 611             if (str->fPos < str->fLimit) {
 612                 UChar c16 = *(str->fPos)++;
 613                 *c32 = U16_GET_SUPPLEMENTARY(*c32, c16);
 614                 isValidChar = TRUE;
 615             }
 616             else {
 617                 *c32 = U_EOF;
 618             }
 619         }
 620         else {
 621             isValidChar = TRUE;
 622         }
 623     }
 624
 625     return isValidChar;
 626 }
 627
 628 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
 629 u_fgetcx(UFILE        *f)
 630 {
 631     UChar32 ch;
 632     ufile_getch32(f, &ch);
 633     return ch;
 634 }
 635
 636 U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
 637 u_fungetc(UChar32        ch,
 638     UFILE        *f)
 639 {
 640     u_localized_string *str;
 641
 642     str = &f->str;
 643
 644     /* if we're at the beginning of the buffer, sorry! */
 645     if (str->fPos == str->fBuffer
 646         || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer))
 647     {
 648         ch = U_EOF;
 649     }
 650     else {
 651         /* otherwise, put the character back */
 652         /* Remember, read them back on in the reverse order. */
 653         if (U_IS_LEAD(ch)) {
 654             if (*--(str->fPos) != U16_TRAIL(ch)
 655                 || *--(str->fPos) != U16_LEAD(ch))
 656             {
 657                 ch = U_EOF;
 658             }
 659         }
 660         else if (*--(str->fPos) != ch) {
 661             ch = U_EOF;
 662         }
 663     }
 664     return ch;
 665 }
 666
 667 U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
 668 u_file_read(    UChar        *chars,
 669     int32_t        count,
 670     UFILE         *f)
 671 {
 672     int32_t dataSize;
 673     int32_t read = 0;
 674     u_localized_string *str = &f->str;
 675
 676     do {
 677
 678         /* determine the amount of data in the buffer */
 679         dataSize = (int32_t)(str->fLimit - str->fPos);
 680         if (dataSize <= 0) {
 681             /* fill the buffer */
 682             ufile_fill_uchar_buffer(f);
 683             dataSize = (int32_t)(str->fLimit - str->fPos);
 684         }
 685
 686         /* Make sure that we don't read too much */
 687         if (dataSize > (count - read)) {
 688             dataSize = count - read;
 689         }
 690
 691         /* copy the current data in the buffer */
 692         memcpy(chars + read, str->fPos, dataSize * sizeof(UChar));
 693
 694         /* update number of items read */
 695         read += dataSize;
 696
 697         /* update the current buffer position */
 698         str->fPos += dataSize;
 699     }
 700     while (dataSize != 0 && read < count);
 701
 702     return read;
 703 }