[apple/icu.git] / icuSources / io / ustream.cpp

// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
*   Copyright (C) 2001-2016, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*  FILE NAME : ustream.cpp
*
*   Modification History:
*
*   Date        Name        Description
*   06/25/2001  grhoten     Move iostream from unistr.h to here
******************************************************************************
*/

#include "unicode/utypes.h"

#if !UCONFIG_NO_CONVERSION

#include "unicode/uobject.h"
#include "unicode/ustream.h"
#include "unicode/ucnv.h"
#include "unicode/uchar.h"
#include "unicode/utf16.h"
#include "ustr_cnv.h"
#include "cmemory.h"
#include <string.h>

// console IO

#define STD_NAMESPACE std::

#define STD_OSTREAM STD_NAMESPACE ostream
#define STD_ISTREAM STD_NAMESPACE istream

U_NAMESPACE_BEGIN

U_IO_API STD_OSTREAM & U_EXPORT2
operator<<(STD_OSTREAM& stream, const UnicodeString& str)
{
    if(str.length() > 0) {
        char buffer[200];
        UConverter *converter;
        UErrorCode errorCode = U_ZERO_ERROR;

        // use the default converter to convert chunks of text
        converter = u_getDefaultConverter(&errorCode);
        if(U_SUCCESS(errorCode)) {
            const UChar *us = str.getBuffer();
            const UChar *uLimit = us + str.length();
            char *s, *sLimit = buffer + (sizeof(buffer) - 1);
            do {
                errorCode = U_ZERO_ERROR;
                s = buffer;
                ucnv_fromUnicode(converter, &s, sLimit, &us, uLimit, 0, FALSE, &errorCode);
                *s = 0;

                // write this chunk
                if(s > buffer) {
                    stream << buffer;
                }
            } while(errorCode == U_BUFFER_OVERFLOW_ERROR);
            u_releaseDefaultConverter(converter);
        }
    }

/*    stream.flush();*/
    return stream;
}

U_IO_API STD_ISTREAM & U_EXPORT2
operator>>(STD_ISTREAM& stream, UnicodeString& str)
{
    // This is like ICU status checking.
    if (stream.fail()) {
        return stream;
    }

    /* ipfx should eat whitespace when ios::skipws is set */
    UChar uBuffer[16];
    char buffer[16];
    int32_t idx = 0;
    UConverter *converter;
    UErrorCode errorCode = U_ZERO_ERROR;

    // use the default converter to convert chunks of text
    converter = u_getDefaultConverter(&errorCode);
    if(U_SUCCESS(errorCode)) {
        UChar *us = uBuffer;
        const UChar *uLimit = uBuffer + UPRV_LENGTHOF(uBuffer);
        const char *s, *sLimit;
        char ch;
        UChar ch32;
        UBool initialWhitespace = TRUE;
        UBool continueReading = TRUE;

        /* We need to consume one byte at a time to see what is considered whitespace. */
        while (continueReading) {
            ch = stream.get();
            if (stream.eof()) {
                // The EOF is only set after the get() of an unavailable byte.
                if (!initialWhitespace) {
                    stream.clear(stream.eofbit);
                }
                continueReading = FALSE;
            }
            sLimit = &ch + (int)continueReading;
            us = uBuffer;
            s = &ch;
            errorCode = U_ZERO_ERROR;
            /*
            Since we aren't guaranteed to see the state before this call,
            this code won't work on stateful encodings like ISO-2022 or an EBCDIC stateful encoding.
            We flush on the last byte to ensure that we output truncated multibyte characters.
            */
            ucnv_toUnicode(converter, &us, uLimit, &s, sLimit, 0, !continueReading, &errorCode);
            if(U_FAILURE(errorCode)) {
                /* Something really bad happened. setstate() isn't always an available API */
                stream.clear(stream.failbit);
                goto STOP_READING;
            }
            /* Was the character consumed? */
            if (us != uBuffer) {
                /* Reminder: ibm-1390 & JISX0213 can output 2 Unicode code points */
                int32_t uBuffSize = static_cast<int32_t>(us-uBuffer);
                int32_t uBuffIdx = 0;
                while (uBuffIdx < uBuffSize) {
                    U16_NEXT(uBuffer, uBuffIdx, uBuffSize, ch32);
                    if (u_isWhitespace(ch32)) {
                        if (!initialWhitespace) {
                            buffer[idx++] = ch;
                            while (idx > 0) {
                                stream.putback(buffer[--idx]);
                            }
                            goto STOP_READING;
                        }
                        /* else skip intialWhitespace */
                    }
                    else {
                        if (initialWhitespace) {
                            /*
                            When initialWhitespace is TRUE, we haven't appended any
                            character yet.  This is where we truncate the string,
                            to avoid modifying the string before we know if we can
                            actually read from the stream.
                            */
                            str.truncate(0);
                            initialWhitespace = FALSE;
                        }
                        str.append(ch32);
                    }
                }
                idx = 0;
            }
            else {
                buffer[idx++] = ch;
            }
        }
STOP_READING:
        u_releaseDefaultConverter(converter);
    }

/*    stream.flush();*/
    return stream;
}

U_NAMESPACE_END

#endif
Commit	Line	Data
f3c0d7a5 A	1	// © 2016 and later: Unicode, Inc. and others.
f3c0d7a5 A	2	// License & terms of use: http://www.unicode.org/copyright.html
374ca955 A	3	/*
374ca955 A	4	**********************************************************************
2ca993e8	5	* Copyright (C) 2001-2016, International Business Machines
374ca955 A	6	* Corporation and others. All Rights Reserved.
	7	**********************************************************************
	8	* FILE NAME : ustream.cpp
	9	*
	10	* Modification History:
	11	*
	12	* Date Name Description
	13	* 06/25/2001 grhoten Move iostream from unistr.h to here
	14	******************************************************************************
	15	*/
	16
374ca955	17	#include "unicode/utypes.h"
b331163b A	18
	19	#if !UCONFIG_NO_CONVERSION
	20
374ca955 A	21	#include "unicode/uobject.h"
	22	#include "unicode/ustream.h"
	23	#include "unicode/ucnv.h"
	24	#include "unicode/uchar.h"
4388f060	25	#include "unicode/utf16.h"
374ca955	26	#include "ustr_cnv.h"
729e4ab9	27	#include "cmemory.h"
374ca955 A	28	#include <string.h>
	29
	30	// console IO
	31
374ca955	32	#define STD_NAMESPACE std::
374ca955 A	33
	34	#define STD_OSTREAM STD_NAMESPACE ostream
	35	#define STD_ISTREAM STD_NAMESPACE istream
	36
	37	U_NAMESPACE_BEGIN
	38
	39	U_IO_API STD_OSTREAM & U_EXPORT2
	40	operator<<(STD_OSTREAM& stream, const UnicodeString& str)
	41	{
	42	if(str.length() > 0) {
	43	char buffer[200];
	44	UConverter *converter;
	45	UErrorCode errorCode = U_ZERO_ERROR;
	46
	47	// use the default converter to convert chunks of text
	48	converter = u_getDefaultConverter(&errorCode);
	49	if(U_SUCCESS(errorCode)) {
	50	const UChar *us = str.getBuffer();
	51	const UChar *uLimit = us + str.length();
729e4ab9	52	char s, sLimit = buffer + (sizeof(buffer) - 1);
374ca955 A	53	do {
	54	errorCode = U_ZERO_ERROR;
	55	s = buffer;
	56	ucnv_fromUnicode(converter, &s, sLimit, &us, uLimit, 0, FALSE, &errorCode);
729e4ab9	57	*s = 0;
374ca955 A	58
	59	// write this chunk
	60	if(s > buffer) {
729e4ab9	61	stream << buffer;
374ca955 A	62	}
	63	} while(errorCode == U_BUFFER_OVERFLOW_ERROR);
	64	u_releaseDefaultConverter(converter);
	65	}
	66	}
	67
	68	/* stream.flush();*/
	69	return stream;
	70	}
	71
	72	U_IO_API STD_ISTREAM & U_EXPORT2
	73	operator>>(STD_ISTREAM& stream, UnicodeString& str)
	74	{
46f4442e A	75	// This is like ICU status checking.
	76	if (stream.fail()) {
	77	return stream;
	78	}
	79
374ca955 A	80	/* ipfx should eat whitespace when ios::skipws is set */
	81	UChar uBuffer[16];
	82	char buffer[16];
	83	int32_t idx = 0;
	84	UConverter *converter;
	85	UErrorCode errorCode = U_ZERO_ERROR;
	86
374ca955 A	87	// use the default converter to convert chunks of text
	88	converter = u_getDefaultConverter(&errorCode);
	89	if(U_SUCCESS(errorCode)) {
	90	UChar *us = uBuffer;
2ca993e8	91	const UChar *uLimit = uBuffer + UPRV_LENGTHOF(uBuffer);
374ca955 A	92	const char s, sLimit;
	93	char ch;
	94	UChar ch32;
46f4442e A	95	UBool initialWhitespace = TRUE;
46f4442e A	96	UBool continueReading = TRUE;
374ca955 A	97
374ca955 A	98	/* We need to consume one byte at a time to see what is considered whitespace. */
46f4442e	99	while (continueReading) {
374ca955	100	ch = stream.get();
46f4442e A	101	if (stream.eof()) {
	102	// The EOF is only set after the get() of an unavailable byte.
	103	if (!initialWhitespace) {
	104	stream.clear(stream.eofbit);
	105	}
	106	continueReading = FALSE;
	107	}
	108	sLimit = &ch + (int)continueReading;
374ca955 A	109	us = uBuffer;
374ca955 A	110	s = &ch;
46f4442e A	111	errorCode = U_ZERO_ERROR;
	112	/*
	113	Since we aren't guaranteed to see the state before this call,
	114	this code won't work on stateful encodings like ISO-2022 or an EBCDIC stateful encoding.
	115	We flush on the last byte to ensure that we output truncated multibyte characters.
	116	*/
	117	ucnv_toUnicode(converter, &us, uLimit, &s, sLimit, 0, !continueReading, &errorCode);
374ca955	118	if(U_FAILURE(errorCode)) {
46f4442e A	119	/* Something really bad happened. setstate() isn't always an available API */
	120	stream.clear(stream.failbit);
	121	goto STOP_READING;
374ca955 A	122	}
	123	/* Was the character consumed? */
	124	if (us != uBuffer) {
73c04bcf	125	/* Reminder: ibm-1390 & JISX0213 can output 2 Unicode code points */
3d1f044b	126	int32_t uBuffSize = static_cast<int32_t>(us-uBuffer);
73c04bcf A	127	int32_t uBuffIdx = 0;
	128	while (uBuffIdx < uBuffSize) {
	129	U16_NEXT(uBuffer, uBuffIdx, uBuffSize, ch32);
	130	if (u_isWhitespace(ch32)) {
46f4442e	131	if (!initialWhitespace) {
73c04bcf A	132	buffer[idx++] = ch;
	133	while (idx > 0) {
	134	stream.putback(buffer[--idx]);
	135	}
	136	goto STOP_READING;
374ca955	137	}
73c04bcf A	138	/* else skip intialWhitespace */
	139	}
	140	else {
46f4442e A	141	if (initialWhitespace) {
	142	/*
	143	When initialWhitespace is TRUE, we haven't appended any
	144	character yet. This is where we truncate the string,
	145	to avoid modifying the string before we know if we can
	146	actually read from the stream.
	147	*/
	148	str.truncate(0);
	149	initialWhitespace = FALSE;
	150	}
73c04bcf	151	str.append(ch32);
374ca955	152	}
374ca955 A	153	}
	154	idx = 0;
	155	}
	156	else {
	157	buffer[idx++] = ch;
	158	}
	159	}
73c04bcf	160	STOP_READING:
374ca955 A	161	u_releaseDefaultConverter(converter);
	162	}
	163
	164	/* stream.flush();*/
	165	return stream;
	166	}
	167
	168	U_NAMESPACE_END
	169
	170	#endif