]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/io/ustream.cpp
ICU-62107.0.1.tar.gz
[apple/icu.git] / icuSources / io / ustream.cpp
index 2a547ad47e44453925b1b920e2567e5752ddd9a3..948521a7fef3782951952464af71dcb253bf6d0c 100644 (file)
@@ -1,6 +1,8 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
 /*
 **********************************************************************
-*   Copyright (C) 2001-2004, International Business Machines
+*   Copyright (C) 2001-2016, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 *  FILE NAME : ustream.cpp
 ******************************************************************************
 */
 
-
 #include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
 #include "unicode/uobject.h"
 #include "unicode/ustream.h"
 #include "unicode/ucnv.h"
 #include "unicode/uchar.h"
+#include "unicode/utf16.h"
 #include "ustr_cnv.h"
+#include "cmemory.h"
 #include <string.h>
 
 // console IO
 
-#if U_IOSTREAM_SOURCE >= 198506
-
-#if U_IOSTREAM_SOURCE >= 199711
 #define STD_NAMESPACE std::
-#else
-#define STD_NAMESPACE
-#endif
 
 #define STD_OSTREAM STD_NAMESPACE ostream
 #define STD_ISTREAM STD_NAMESPACE istream
@@ -49,15 +49,16 @@ operator<<(STD_OSTREAM& stream, const UnicodeString& str)
         if(U_SUCCESS(errorCode)) {
             const UChar *us = str.getBuffer();
             const UChar *uLimit = us + str.length();
-            char *s, *sLimit = buffer + sizeof(buffer);
+            char *s, *sLimit = buffer + (sizeof(buffer) - 1);
             do {
                 errorCode = U_ZERO_ERROR;
                 s = buffer;
                 ucnv_fromUnicode(converter, &s, sLimit, &us, uLimit, 0, FALSE, &errorCode);
+                *s = 0;
 
                 // write this chunk
                 if(s > buffer) {
-                    stream.write(buffer, s - buffer);
+                    stream << buffer;
                 }
             } while(errorCode == U_BUFFER_OVERFLOW_ERROR);
             u_releaseDefaultConverter(converter);
@@ -71,6 +72,11 @@ operator<<(STD_OSTREAM& stream, const UnicodeString& str)
 U_IO_API STD_ISTREAM & U_EXPORT2
 operator>>(STD_ISTREAM& stream, UnicodeString& str)
 {
+    // This is like ICU status checking.
+    if (stream.fail()) {
+        return stream;
+    }
+
     /* ipfx should eat whitespace when ios::skipws is set */
     UChar uBuffer[16];
     char buffer[16];
@@ -78,45 +84,72 @@ operator>>(STD_ISTREAM& stream, UnicodeString& str)
     UConverter *converter;
     UErrorCode errorCode = U_ZERO_ERROR;
 
-    str.truncate(0);
     // use the default converter to convert chunks of text
     converter = u_getDefaultConverter(&errorCode);
     if(U_SUCCESS(errorCode)) {
         UChar *us = uBuffer;
-        const UChar *uLimit = uBuffer + sizeof(uBuffer)/sizeof(*uBuffer);
+        const UChar *uLimit = uBuffer + UPRV_LENGTHOF(uBuffer);
         const char *s, *sLimit;
         char ch;
         UChar ch32;
-        UBool intialWhitespace = TRUE;
+        UBool initialWhitespace = TRUE;
+        UBool continueReading = TRUE;
 
         /* We need to consume one byte at a time to see what is considered whitespace. */
-        while (!stream.eof()) {
+        while (continueReading) {
             ch = stream.get();
-            sLimit = &ch + 1;
-            errorCode = U_ZERO_ERROR;
+            if (stream.eof()) {
+                // The EOF is only set after the get() of an unavailable byte.
+                if (!initialWhitespace) {
+                    stream.clear(stream.eofbit);
+                }
+                continueReading = FALSE;
+            }
+            sLimit = &ch + (int)continueReading;
             us = uBuffer;
             s = &ch;
-            ucnv_toUnicode(converter, &us, uLimit, &s, sLimit, 0, FALSE, &errorCode);
+            errorCode = U_ZERO_ERROR;
+            /*
+            Since we aren't guaranteed to see the state before this call,
+            this code won't work on stateful encodings like ISO-2022 or an EBCDIC stateful encoding.
+            We flush on the last byte to ensure that we output truncated multibyte characters.
+            */
+            ucnv_toUnicode(converter, &us, uLimit, &s, sLimit, 0, !continueReading, &errorCode);
             if(U_FAILURE(errorCode)) {
-                /* Something really bad happened */
-                return stream;
+                /* Something really bad happened. setstate() isn't always an available API */
+                stream.clear(stream.failbit);
+                goto STOP_READING;
             }
             /* Was the character consumed? */
             if (us != uBuffer) {
-                U16_GET(uBuffer, 0, 0, us-uBuffer, ch32);
-                if (u_isWhitespace(ch32)) {
-                    if (!intialWhitespace) {
-                        buffer[idx++] = ch;
-                        while (idx > 0) {
-                            stream.putback(buffer[--idx]);
+                /* Reminder: ibm-1390 & JISX0213 can output 2 Unicode code points */
+                int32_t uBuffSize = us-uBuffer;
+                int32_t uBuffIdx = 0;
+                while (uBuffIdx < uBuffSize) {
+                    U16_NEXT(uBuffer, uBuffIdx, uBuffSize, ch32);
+                    if (u_isWhitespace(ch32)) {
+                        if (!initialWhitespace) {
+                            buffer[idx++] = ch;
+                            while (idx > 0) {
+                                stream.putback(buffer[--idx]);
+                            }
+                            goto STOP_READING;
                         }
-                        break;
+                        /* else skip intialWhitespace */
+                    }
+                    else {
+                        if (initialWhitespace) {
+                            /*
+                            When initialWhitespace is TRUE, we haven't appended any
+                            character yet.  This is where we truncate the string,
+                            to avoid modifying the string before we know if we can
+                            actually read from the stream.
+                            */
+                            str.truncate(0);
+                            initialWhitespace = FALSE;
+                        }
+                        str.append(ch32);
                     }
-                    /* else skip intialWhitespace */
-                }
-                else {
-                    str.append(ch32);
-                    intialWhitespace = FALSE;
                 }
                 idx = 0;
             }
@@ -124,6 +157,7 @@ operator>>(STD_ISTREAM& stream, UnicodeString& str)
                 buffer[idx++] = ch;
             }
         }
+STOP_READING:
         u_releaseDefaultConverter(converter);
     }
 
@@ -134,4 +168,3 @@ operator>>(STD_ISTREAM& stream, UnicodeString& str)
 U_NAMESPACE_END
 
 #endif
-