]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/io/ustream.cpp
ICU-57132.0.1.tar.gz
[apple/icu.git] / icuSources / io / ustream.cpp
index 0426c8043749a90650dbe778c31fa9979fa1cd03..25c54eb55532f91023bb920bc4d8d7f4cbcb7083 100644 (file)
@@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (C) 2001-2006, International Business Machines
+*   Copyright (C) 2001-2016, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *  FILE NAME : ustream.cpp
 ******************************************************************************
 */
 
-
 #include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
 #include "unicode/uobject.h"
 #include "unicode/ustream.h"
 #include "unicode/ucnv.h"
 #include "unicode/uchar.h"
+#include "unicode/utf16.h"
 #include "ustr_cnv.h"
+#include "cmemory.h"
 #include <string.h>
 
 // console IO
 
-#if U_IOSTREAM_SOURCE >= 198506
-
 #if U_IOSTREAM_SOURCE >= 199711
+
 #define STD_NAMESPACE std::
-#else
-#define STD_NAMESPACE
-#endif
 
 #define STD_OSTREAM STD_NAMESPACE ostream
 #define STD_ISTREAM STD_NAMESPACE istream
@@ -49,15 +49,16 @@ operator<<(STD_OSTREAM& stream, const UnicodeString& str)
         if(U_SUCCESS(errorCode)) {
             const UChar *us = str.getBuffer();
             const UChar *uLimit = us + str.length();
-            char *s, *sLimit = buffer + sizeof(buffer);
+            char *s, *sLimit = buffer + (sizeof(buffer) - 1);
             do {
                 errorCode = U_ZERO_ERROR;
                 s = buffer;
                 ucnv_fromUnicode(converter, &s, sLimit, &us, uLimit, 0, FALSE, &errorCode);
+                *s = 0;
 
                 // write this chunk
                 if(s > buffer) {
-                    stream.write(buffer, (int32_t)(s - buffer));
+                    stream << buffer;
                 }
             } while(errorCode == U_BUFFER_OVERFLOW_ERROR);
             u_releaseDefaultConverter(converter);
@@ -71,6 +72,11 @@ operator<<(STD_OSTREAM& stream, const UnicodeString& str)
 U_IO_API STD_ISTREAM & U_EXPORT2
 operator>>(STD_ISTREAM& stream, UnicodeString& str)
 {
+    // This is like ICU status checking.
+    if (stream.fail()) {
+        return stream;
+    }
+
     /* ipfx should eat whitespace when ios::skipws is set */
     UChar uBuffer[16];
     char buffer[16];
@@ -78,28 +84,41 @@ operator>>(STD_ISTREAM& stream, UnicodeString& str)
     UConverter *converter;
     UErrorCode errorCode = U_ZERO_ERROR;
 
-    str.truncate(0);
     // use the default converter to convert chunks of text
     converter = u_getDefaultConverter(&errorCode);
     if(U_SUCCESS(errorCode)) {
         UChar *us = uBuffer;
-        const UChar *uLimit = uBuffer + sizeof(uBuffer)/sizeof(*uBuffer);
+        const UChar *uLimit = uBuffer + UPRV_LENGTHOF(uBuffer);
         const char *s, *sLimit;
         char ch;
         UChar ch32;
-        UBool intialWhitespace = TRUE;
+        UBool initialWhitespace = TRUE;
+        UBool continueReading = TRUE;
 
         /* We need to consume one byte at a time to see what is considered whitespace. */
-        while (!stream.eof()) {
+        while (continueReading) {
             ch = stream.get();
-            sLimit = &ch + 1;
-            errorCode = U_ZERO_ERROR;
+            if (stream.eof()) {
+                // The EOF is only set after the get() of an unavailable byte.
+                if (!initialWhitespace) {
+                    stream.clear(stream.eofbit);
+                }
+                continueReading = FALSE;
+            }
+            sLimit = &ch + (int)continueReading;
             us = uBuffer;
             s = &ch;
-            ucnv_toUnicode(converter, &us, uLimit, &s, sLimit, 0, FALSE, &errorCode);
+            errorCode = U_ZERO_ERROR;
+            /*
+            Since we aren't guaranteed to see the state before this call,
+            this code won't work on stateful encodings like ISO-2022 or an EBCDIC stateful encoding.
+            We flush on the last byte to ensure that we output truncated multibyte characters.
+            */
+            ucnv_toUnicode(converter, &us, uLimit, &s, sLimit, 0, !continueReading, &errorCode);
             if(U_FAILURE(errorCode)) {
-                /* Something really bad happened */
-                return stream;
+                /* Something really bad happened. setstate() isn't always an available API */
+                stream.clear(stream.failbit);
+                goto STOP_READING;
             }
             /* Was the character consumed? */
             if (us != uBuffer) {
@@ -109,7 +128,7 @@ operator>>(STD_ISTREAM& stream, UnicodeString& str)
                 while (uBuffIdx < uBuffSize) {
                     U16_NEXT(uBuffer, uBuffIdx, uBuffSize, ch32);
                     if (u_isWhitespace(ch32)) {
-                        if (!intialWhitespace) {
+                        if (!initialWhitespace) {
                             buffer[idx++] = ch;
                             while (idx > 0) {
                                 stream.putback(buffer[--idx]);
@@ -119,8 +138,17 @@ operator>>(STD_ISTREAM& stream, UnicodeString& str)
                         /* else skip intialWhitespace */
                     }
                     else {
+                        if (initialWhitespace) {
+                            /*
+                            When initialWhitespace is TRUE, we haven't appended any
+                            character yet.  This is where we truncate the string,
+                            to avoid modifying the string before we know if we can
+                            actually read from the stream.
+                            */
+                            str.truncate(0);
+                            initialWhitespace = FALSE;
+                        }
                         str.append(ch32);
-                        intialWhitespace = FALSE;
                     }
                 }
                 idx = 0;
@@ -140,4 +168,4 @@ STOP_READING:
 U_NAMESPACE_END
 
 #endif
-
+#endif