ICU-62107.0.1.tar.gz

[apple/icu.git] / icuSources / io / ustream.cpp
diff --git a/icuSources/io/ustream.cpp b/icuSources/io/ustream.cpp

index 2a547ad47e44453925b1b920e2567e5752ddd9a3..948521a7fef3782951952464af71dcb253bf6d0c 100644 (file)
--- a/icuSources/io/ustream.cpp
+++ b/icuSources/io/ustream.cpp
@@ -1,6 +1,8 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
  /*
  **********************************************************************
-*   Copyright (C) 2001-2004, International Business Machines
+*   Copyright (C) 2001-2016, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  **********************************************************************
  *  FILE NAME : ustream.cpp
@@ -12,24 +14,22 @@
  ******************************************************************************
  */
  
-
  #include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
  #include "unicode/uobject.h"
  #include "unicode/ustream.h"
  #include "unicode/ucnv.h"
  #include "unicode/uchar.h"
+#include "unicode/utf16.h"
  #include "ustr_cnv.h"
+#include "cmemory.h"
  #include <string.h>
  
  // console IO
  
-#if U_IOSTREAM_SOURCE >= 198506
-
-#if U_IOSTREAM_SOURCE >= 199711
  #define STD_NAMESPACE std::
-#else
-#define STD_NAMESPACE
-#endif
  
  #define STD_OSTREAM STD_NAMESPACE ostream
  #define STD_ISTREAM STD_NAMESPACE istream
@@ -49,15 +49,16 @@ operator<<(STD_OSTREAM& stream, const UnicodeString& str)
          if(U_SUCCESS(errorCode)) {
              const UChar *us = str.getBuffer();
              const UChar *uLimit = us + str.length();
-            char *s, *sLimit = buffer + sizeof(buffer);
+            char *s, *sLimit = buffer + (sizeof(buffer) - 1);
              do {
                  errorCode = U_ZERO_ERROR;
                  s = buffer;
                  ucnv_fromUnicode(converter, &s, sLimit, &us, uLimit, 0, FALSE, &errorCode);
+                *s = 0;
  
                  // write this chunk
                  if(s > buffer) {
-                    stream.write(buffer, s - buffer);
+                    stream << buffer;
                  }
              } while(errorCode == U_BUFFER_OVERFLOW_ERROR);
              u_releaseDefaultConverter(converter);
@@ -71,6 +72,11 @@ operator<<(STD_OSTREAM& stream, const UnicodeString& str)
  U_IO_API STD_ISTREAM & U_EXPORT2
  operator>>(STD_ISTREAM& stream, UnicodeString& str)
  {
+    // This is like ICU status checking.
+    if (stream.fail()) {
+        return stream;
+    }
+
      /* ipfx should eat whitespace when ios::skipws is set */
      UChar uBuffer[16];
      char buffer[16];
@@ -78,45 +84,72 @@ operator>>(STD_ISTREAM& stream, UnicodeString& str)
      UConverter *converter;
      UErrorCode errorCode = U_ZERO_ERROR;
  
-    str.truncate(0);
      // use the default converter to convert chunks of text
      converter = u_getDefaultConverter(&errorCode);
      if(U_SUCCESS(errorCode)) {
          UChar *us = uBuffer;
-        const UChar *uLimit = uBuffer + sizeof(uBuffer)/sizeof(*uBuffer);
+        const UChar *uLimit = uBuffer + UPRV_LENGTHOF(uBuffer);
          const char *s, *sLimit;
          char ch;
          UChar ch32;
-        UBool intialWhitespace = TRUE;
+        UBool initialWhitespace = TRUE;
+        UBool continueReading = TRUE;
  
          /* We need to consume one byte at a time to see what is considered whitespace. */
-        while (!stream.eof()) {
+        while (continueReading) {
              ch = stream.get();
-            sLimit = &ch + 1;
-            errorCode = U_ZERO_ERROR;
+            if (stream.eof()) {
+                // The EOF is only set after the get() of an unavailable byte.
+                if (!initialWhitespace) {
+                    stream.clear(stream.eofbit);
+                }
+                continueReading = FALSE;
+            }
+            sLimit = &ch + (int)continueReading;
              us = uBuffer;
              s = &ch;
-            ucnv_toUnicode(converter, &us, uLimit, &s, sLimit, 0, FALSE, &errorCode);
+            errorCode = U_ZERO_ERROR;
+            /*
+            Since we aren't guaranteed to see the state before this call,
+            this code won't work on stateful encodings like ISO-2022 or an EBCDIC stateful encoding.
+            We flush on the last byte to ensure that we output truncated multibyte characters.
+            */
+            ucnv_toUnicode(converter, &us, uLimit, &s, sLimit, 0, !continueReading, &errorCode);
              if(U_FAILURE(errorCode)) {
-                /* Something really bad happened */
-                return stream;
+                /* Something really bad happened. setstate() isn't always an available API */
+                stream.clear(stream.failbit);
+                goto STOP_READING;
              }
              /* Was the character consumed? */
              if (us != uBuffer) {
-                U16_GET(uBuffer, 0, 0, us-uBuffer, ch32);
-                if (u_isWhitespace(ch32)) {
-                    if (!intialWhitespace) {
-                        buffer[idx++] = ch;
-                        while (idx > 0) {
-                            stream.putback(buffer[--idx]);
+                /* Reminder: ibm-1390 & JISX0213 can output 2 Unicode code points */
+                int32_t uBuffSize = us-uBuffer;
+                int32_t uBuffIdx = 0;
+                while (uBuffIdx < uBuffSize) {
+                    U16_NEXT(uBuffer, uBuffIdx, uBuffSize, ch32);
+                    if (u_isWhitespace(ch32)) {
+                        if (!initialWhitespace) {
+                            buffer[idx++] = ch;
+                            while (idx > 0) {
+                                stream.putback(buffer[--idx]);
+                            }
+                            goto STOP_READING;
                          }
-                        break;
+                        /* else skip intialWhitespace */
+                    }
+                    else {
+                        if (initialWhitespace) {
+                            /*
+                            When initialWhitespace is TRUE, we haven't appended any
+                            character yet.  This is where we truncate the string,
+                            to avoid modifying the string before we know if we can
+                            actually read from the stream.
+                            */
+                            str.truncate(0);
+                            initialWhitespace = FALSE;
+                        }
+                        str.append(ch32);
                      }
-                    /* else skip intialWhitespace */
-                }
-                else {
-                    str.append(ch32);
-                    intialWhitespace = FALSE;
                  }
                  idx = 0;
              }
@@ -124,6 +157,7 @@ operator>>(STD_ISTREAM& stream, UnicodeString& str)
                  buffer[idx++] = ch;
              }
          }
+STOP_READING:
          u_releaseDefaultConverter(converter);
      }
  
@@ -134,4 +168,3 @@ operator>>(STD_ISTREAM& stream, UnicodeString& str)
  U_NAMESPACE_END
  
  #endif
-