[apple/icu.git] / icuSources / io / ustream.cpp

/*
**********************************************************************
*   Copyright (C) 2001-2011, International Business Machines
*   Corporation and others.  All Rights Reserved.
**********************************************************************
*  FILE NAME : ustream.cpp
*
*   Modification History:
*
*   Date        Name        Description
*   06/25/2001  grhoten     Move iostream from unistr.h to here
******************************************************************************
*/

#include "unicode/utypes.h"
#include "unicode/uobject.h"
#include "unicode/ustream.h"
#include "unicode/ucnv.h"
#include "unicode/uchar.h"
#include "unicode/utf16.h"
#include "ustr_cnv.h"
#include "cmemory.h"
#include <string.h>

// console IO

#if U_IOSTREAM_SOURCE >= 199711

#define STD_NAMESPACE std::

#define STD_OSTREAM STD_NAMESPACE ostream
#define STD_ISTREAM STD_NAMESPACE istream

U_NAMESPACE_BEGIN

U_IO_API STD_OSTREAM & U_EXPORT2
operator<<(STD_OSTREAM& stream, const UnicodeString& str)
{
    if(str.length() > 0) {
        char buffer[200];
        UConverter *converter;
        UErrorCode errorCode = U_ZERO_ERROR;

        // use the default converter to convert chunks of text
        converter = u_getDefaultConverter(&errorCode);
        if(U_SUCCESS(errorCode)) {
            const UChar *us = str.getBuffer();
            const UChar *uLimit = us + str.length();
            char *s, *sLimit = buffer + (sizeof(buffer) - 1);
            do {
                errorCode = U_ZERO_ERROR;
                s = buffer;
                ucnv_fromUnicode(converter, &s, sLimit, &us, uLimit, 0, FALSE, &errorCode);
                *s = 0;

                // write this chunk
                if(s > buffer) {
                    stream << buffer;
                }
            } while(errorCode == U_BUFFER_OVERFLOW_ERROR);
            u_releaseDefaultConverter(converter);
        }
    }

/*    stream.flush();*/
    return stream;
}

U_IO_API STD_ISTREAM & U_EXPORT2
operator>>(STD_ISTREAM& stream, UnicodeString& str)
{
    // This is like ICU status checking.
    if (stream.fail()) {
        return stream;
    }

    /* ipfx should eat whitespace when ios::skipws is set */
    UChar uBuffer[16];
    char buffer[16];
    int32_t idx = 0;
    UConverter *converter;
    UErrorCode errorCode = U_ZERO_ERROR;

    // use the default converter to convert chunks of text
    converter = u_getDefaultConverter(&errorCode);
    if(U_SUCCESS(errorCode)) {
        UChar *us = uBuffer;
        const UChar *uLimit = uBuffer + sizeof(uBuffer)/sizeof(*uBuffer);
        const char *s, *sLimit;
        char ch;
        UChar ch32;
        UBool initialWhitespace = TRUE;
        UBool continueReading = TRUE;

        /* We need to consume one byte at a time to see what is considered whitespace. */
        while (continueReading) {
            ch = stream.get();
            if (stream.eof()) {
                // The EOF is only set after the get() of an unavailable byte.
                if (!initialWhitespace) {
                    stream.clear(stream.eofbit);
                }
                continueReading = FALSE;
            }
            sLimit = &ch + (int)continueReading;
            us = uBuffer;
            s = &ch;
            errorCode = U_ZERO_ERROR;
            /*
            Since we aren't guaranteed to see the state before this call,
            this code won't work on stateful encodings like ISO-2022 or an EBCDIC stateful encoding.
            We flush on the last byte to ensure that we output truncated multibyte characters.
            */
            ucnv_toUnicode(converter, &us, uLimit, &s, sLimit, 0, !continueReading, &errorCode);
            if(U_FAILURE(errorCode)) {
                /* Something really bad happened. setstate() isn't always an available API */
                stream.clear(stream.failbit);
                goto STOP_READING;
            }
            /* Was the character consumed? */
            if (us != uBuffer) {
                /* Reminder: ibm-1390 & JISX0213 can output 2 Unicode code points */
                int32_t uBuffSize = us-uBuffer;
                int32_t uBuffIdx = 0;
                while (uBuffIdx < uBuffSize) {
                    U16_NEXT(uBuffer, uBuffIdx, uBuffSize, ch32);
                    if (u_isWhitespace(ch32)) {
                        if (!initialWhitespace) {
                            buffer[idx++] = ch;
                            while (idx > 0) {
                                stream.putback(buffer[--idx]);
                            }
                            goto STOP_READING;
                        }
                        /* else skip intialWhitespace */
                    }
                    else {
                        if (initialWhitespace) {
                            /*
                            When initialWhitespace is TRUE, we haven't appended any
                            character yet.  This is where we truncate the string,
                            to avoid modifying the string before we know if we can
                            actually read from the stream.
                            */
                            str.truncate(0);
                            initialWhitespace = FALSE;
                        }
                        str.append(ch32);
                    }
                }
                idx = 0;
            }
            else {
                buffer[idx++] = ch;
            }
        }
STOP_READING:
        u_releaseDefaultConverter(converter);
    }

/*    stream.flush();*/
    return stream;
}

U_NAMESPACE_END

#endif
Commit	Line	Data
374ca955 A	1	/*
374ca955 A	2	**********************************************************************
4388f060	3	* Copyright (C) 2001-2011, International Business Machines
374ca955 A	4	* Corporation and others. All Rights Reserved.
	5	**********************************************************************
	6	* FILE NAME : ustream.cpp
	7	*
	8	* Modification History:
	9	*
	10	* Date Name Description
	11	* 06/25/2001 grhoten Move iostream from unistr.h to here
	12	******************************************************************************
	13	*/
	14
374ca955 A	15	#include "unicode/utypes.h"
	16	#include "unicode/uobject.h"
	17	#include "unicode/ustream.h"
	18	#include "unicode/ucnv.h"
	19	#include "unicode/uchar.h"
4388f060	20	#include "unicode/utf16.h"
374ca955	21	#include "ustr_cnv.h"
729e4ab9	22	#include "cmemory.h"
374ca955 A	23	#include <string.h>
	24
	25	// console IO
	26
374ca955	27	#if U_IOSTREAM_SOURCE >= 199711
4388f060	28
374ca955	29	#define STD_NAMESPACE std::
374ca955 A	30
	31	#define STD_OSTREAM STD_NAMESPACE ostream
	32	#define STD_ISTREAM STD_NAMESPACE istream
	33
	34	U_NAMESPACE_BEGIN
	35
	36	U_IO_API STD_OSTREAM & U_EXPORT2
	37	operator<<(STD_OSTREAM& stream, const UnicodeString& str)
	38	{
	39	if(str.length() > 0) {
	40	char buffer[200];
	41	UConverter *converter;
	42	UErrorCode errorCode = U_ZERO_ERROR;
	43
	44	// use the default converter to convert chunks of text
	45	converter = u_getDefaultConverter(&errorCode);
	46	if(U_SUCCESS(errorCode)) {
	47	const UChar *us = str.getBuffer();
	48	const UChar *uLimit = us + str.length();
729e4ab9	49	char s, sLimit = buffer + (sizeof(buffer) - 1);
374ca955 A	50	do {
	51	errorCode = U_ZERO_ERROR;
	52	s = buffer;
	53	ucnv_fromUnicode(converter, &s, sLimit, &us, uLimit, 0, FALSE, &errorCode);
729e4ab9	54	*s = 0;
374ca955 A	55
	56	// write this chunk
	57	if(s > buffer) {
729e4ab9	58	stream << buffer;
374ca955 A	59	}
	60	} while(errorCode == U_BUFFER_OVERFLOW_ERROR);
	61	u_releaseDefaultConverter(converter);
	62	}
	63	}
	64
	65	/* stream.flush();*/
	66	return stream;
	67	}
	68
	69	U_IO_API STD_ISTREAM & U_EXPORT2
	70	operator>>(STD_ISTREAM& stream, UnicodeString& str)
	71	{
46f4442e A	72	// This is like ICU status checking.
	73	if (stream.fail()) {
	74	return stream;
	75	}
	76
374ca955 A	77	/* ipfx should eat whitespace when ios::skipws is set */
	78	UChar uBuffer[16];
	79	char buffer[16];
	80	int32_t idx = 0;
	81	UConverter *converter;
	82	UErrorCode errorCode = U_ZERO_ERROR;
	83
374ca955 A	84	// use the default converter to convert chunks of text
	85	converter = u_getDefaultConverter(&errorCode);
	86	if(U_SUCCESS(errorCode)) {
	87	UChar *us = uBuffer;
	88	const UChar uLimit = uBuffer + sizeof(uBuffer)/sizeof(uBuffer);
	89	const char s, sLimit;
	90	char ch;
	91	UChar ch32;
46f4442e A	92	UBool initialWhitespace = TRUE;
46f4442e A	93	UBool continueReading = TRUE;
374ca955 A	94
374ca955 A	95	/* We need to consume one byte at a time to see what is considered whitespace. */
46f4442e	96	while (continueReading) {
374ca955	97	ch = stream.get();
46f4442e A	98	if (stream.eof()) {
	99	// The EOF is only set after the get() of an unavailable byte.
	100	if (!initialWhitespace) {
	101	stream.clear(stream.eofbit);
	102	}
	103	continueReading = FALSE;
	104	}
	105	sLimit = &ch + (int)continueReading;
374ca955 A	106	us = uBuffer;
374ca955 A	107	s = &ch;
46f4442e A	108	errorCode = U_ZERO_ERROR;
	109	/*
	110	Since we aren't guaranteed to see the state before this call,
	111	this code won't work on stateful encodings like ISO-2022 or an EBCDIC stateful encoding.
	112	We flush on the last byte to ensure that we output truncated multibyte characters.
	113	*/
	114	ucnv_toUnicode(converter, &us, uLimit, &s, sLimit, 0, !continueReading, &errorCode);
374ca955	115	if(U_FAILURE(errorCode)) {
46f4442e A	116	/* Something really bad happened. setstate() isn't always an available API */
	117	stream.clear(stream.failbit);
	118	goto STOP_READING;
374ca955 A	119	}
	120	/* Was the character consumed? */
	121	if (us != uBuffer) {
73c04bcf A	122	/* Reminder: ibm-1390 & JISX0213 can output 2 Unicode code points */
	123	int32_t uBuffSize = us-uBuffer;
	124	int32_t uBuffIdx = 0;
	125	while (uBuffIdx < uBuffSize) {
	126	U16_NEXT(uBuffer, uBuffIdx, uBuffSize, ch32);
	127	if (u_isWhitespace(ch32)) {
46f4442e	128	if (!initialWhitespace) {
73c04bcf A	129	buffer[idx++] = ch;
	130	while (idx > 0) {
	131	stream.putback(buffer[--idx]);
	132	}
	133	goto STOP_READING;
374ca955	134	}
73c04bcf A	135	/* else skip intialWhitespace */
	136	}
	137	else {
46f4442e A	138	if (initialWhitespace) {
	139	/*
	140	When initialWhitespace is TRUE, we haven't appended any
	141	character yet. This is where we truncate the string,
	142	to avoid modifying the string before we know if we can
	143	actually read from the stream.
	144	*/
	145	str.truncate(0);
	146	initialWhitespace = FALSE;
	147	}
73c04bcf	148	str.append(ch32);
374ca955	149	}
374ca955 A	150	}
	151	idx = 0;
	152	}
	153	else {
	154	buffer[idx++] = ch;
	155	}
	156	}
73c04bcf	157	STOP_READING:
374ca955 A	158	u_releaseDefaultConverter(converter);
	159	}
	160
	161	/* stream.flush();*/
	162	return stream;
	163	}
	164
	165	U_NAMESPACE_END
	166
	167	#endif