[apple/javascriptcore.git] / runtime / UString.cpp

/*
 *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
 *  Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
 *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
 *  Copyright (C) 2009 Google Inc. All rights reserved.
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Library General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Library General Public License for more details.
 *
 *  You should have received a copy of the GNU Library General Public License
 *  along with this library; see the file COPYING.LIB.  If not, write to
 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 *  Boston, MA 02110-1301, USA.
 *
 */

#include "config.h"
#include "UString.h"

#include "JSGlobalObjectFunctions.h"
#include "Heap.h"
#include "Identifier.h"
#include "Operations.h"
#include <ctype.h>
#include <limits.h>
#include <limits>
#include <stdio.h>
#include <stdlib.h>
#include <wtf/ASCIICType.h>
#include <wtf/Assertions.h>
#include <wtf/MathExtras.h>
#include <wtf/StringExtras.h>
#include <wtf/Vector.h>
#include <wtf/dtoa.h>
#include <wtf/unicode/UTF8.h>

#if HAVE(STRINGS_H)
#include <strings.h>
#endif

using namespace WTF;
using namespace WTF::Unicode;
using namespace std;

namespace JSC {

COMPILE_ASSERT(sizeof(UString) == sizeof(void*), UString_should_stay_small);

// Construct a string with UTF-16 data.
UString::UString(const UChar* characters, unsigned length)
    : m_impl(characters ? StringImpl::create(characters, length) : 0)
{
}

// Construct a string with UTF-16 data, from a null-terminated source.
UString::UString(const UChar* characters)
{
    if (!characters)
        return;

    int length = 0;
    while (characters[length] != UChar(0))
        ++length;

    m_impl = StringImpl::create(characters, length);
}

// Construct a string with latin1 data.
UString::UString(const LChar* characters, unsigned length)
    : m_impl(characters ? StringImpl::create(characters, length) : 0)
{
}

UString::UString(const char* characters, unsigned length)
    : m_impl(characters ? StringImpl::create(reinterpret_cast<const LChar*>(characters), length) : 0)
{
}

// Construct a string with latin1 data, from a null-terminated source.
UString::UString(const LChar* characters)
    : m_impl(characters ? StringImpl::create(characters) : 0)
{
}

UString::UString(const char* characters)
    : m_impl(characters ? StringImpl::create(reinterpret_cast<const LChar*>(characters)) : 0)
{
}

UString UString::number(int i)
{
    LChar buf[1 + sizeof(i) * 3];
    LChar* end = buf + WTF_ARRAY_LENGTH(buf);
    LChar* p = end;

    if (i == 0)
        *--p = '0';
    else if (i == INT_MIN) {
        char minBuf[1 + sizeof(i) * 3];
        snprintf(minBuf, sizeof(minBuf), "%d", INT_MIN);
        return UString(minBuf);
    } else {
        bool negative = false;
        if (i < 0) {
            negative = true;
            i = -i;
        }
        while (i) {
            *--p = static_cast<unsigned short>((i % 10) + '0');
            i /= 10;
        }
        if (negative)
            *--p = '-';
    }

    return UString(p, static_cast<unsigned>(end - p));
}

UString UString::number(long long i)
{
    LChar buf[1 + sizeof(i) * 3];
    LChar* end = buf + WTF_ARRAY_LENGTH(buf);
    LChar* p = end;

    if (i == 0)
        *--p = '0';
    else if (i == std::numeric_limits<long long>::min()) {
        char minBuf[1 + sizeof(i) * 3];
#if OS(WINDOWS)
        snprintf(minBuf, sizeof(minBuf), "%I64d", std::numeric_limits<long long>::min());
#else
        snprintf(minBuf, sizeof(minBuf), "%lld", std::numeric_limits<long long>::min());
#endif
        return UString(minBuf);
    } else {
        bool negative = false;
        if (i < 0) {
            negative = true;
            i = -i;
        }
        while (i) {
            *--p = static_cast<unsigned short>((i % 10) + '0');
            i /= 10;
        }
        if (negative)
            *--p = '-';
    }

    return UString(p, static_cast<unsigned>(end - p));
}

UString UString::number(unsigned u)
{
    LChar buf[sizeof(u) * 3];
    LChar* end = buf + WTF_ARRAY_LENGTH(buf);
    LChar* p = end;

    if (u == 0)
        *--p = '0';
    else {
        while (u) {
            *--p = static_cast<unsigned short>((u % 10) + '0');
            u /= 10;
        }
    }

    return UString(p, static_cast<unsigned>(end - p));
}

UString UString::number(long l)
{
    LChar buf[1 + sizeof(l) * 3];
    LChar* end = buf + WTF_ARRAY_LENGTH(buf);
    LChar* p = end;

    if (l == 0)
        *--p = '0';
    else if (l == LONG_MIN) {
        char minBuf[1 + sizeof(l) * 3];
        snprintf(minBuf, sizeof(minBuf), "%ld", LONG_MIN);
        return UString(minBuf);
    } else {
        bool negative = false;
        if (l < 0) {
            negative = true;
            l = -l;
        }
        while (l) {
            *--p = static_cast<unsigned short>((l % 10) + '0');
            l /= 10;
        }
        if (negative)
            *--p = '-';
    }

    return UString(p, end - p);
}

UString UString::number(double d)
{
    NumberToStringBuffer buffer;
    return UString(numberToString(d, buffer));
}

UString UString::substringSharingImpl(unsigned offset, unsigned length) const
{
    // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar).

    unsigned stringLength = this->length();
    offset = min(offset, stringLength);
    length = min(length, stringLength - offset);

    if (!offset && length == stringLength)
        return *this;
    return UString(StringImpl::create(m_impl, offset, length));
}

bool operator==(const UString& s1, const char *s2)
{
    if (s1.isEmpty())
        return !s2;

    return equal(s1.impl(), s2);
}

// This method assumes that all simple checks have been performed by
// the inlined operator==() in the header file.
bool equalSlowCase(const UString& s1, const UString& s2)
{
    StringImpl* rep1 = s1.impl();
    StringImpl* rep2 = s2.impl();
    unsigned size1 = rep1->length();

    // At this point we know 
    //   (a) that the strings are the same length and
    //   (b) that they are greater than zero length.
    bool s1Is8Bit = rep1->is8Bit();
    bool s2Is8Bit = rep2->is8Bit();
    
    if (s1Is8Bit) {
        const LChar* d1 = rep1->characters8();
        if (s2Is8Bit) {
            const LChar* d2 = rep2->characters8();
            
            if (d1 == d2) // Check to see if the data pointers are the same.
                return true;
            
            // Do quick checks for sizes 1 and 2.
            switch (size1) {
            case 1:
                return d1[0] == d2[0];
            case 2:
                return (d1[0] == d2[0]) & (d1[1] == d2[1]);
            default:
                return (!memcmp(d1, d2, size1 * sizeof(LChar)));
            }
        }
        
        const UChar* d2 = rep2->characters16();
        
        for (unsigned i = 0; i < size1; i++) {
            if (d1[i] != d2[i])
                return false;
        }
        return true;
    }
    
    if (s2Is8Bit) {
        const UChar* d1 = rep1->characters16();
        const LChar* d2 = rep2->characters8();
        
        for (unsigned i = 0; i < size1; i++) {
            if (d1[i] != d2[i])
                return false;
        }
        return true;
        
    }
    
    const UChar* d1 = rep1->characters16();
    const UChar* d2 = rep2->characters16();
    
    if (d1 == d2) // Check to see if the data pointers are the same.
        return true;
    
    // Do quick checks for sizes 1 and 2.
    switch (size1) {
    case 1:
        return d1[0] == d2[0];
    case 2:
        return (d1[0] == d2[0]) & (d1[1] == d2[1]);
    default:
        return (!memcmp(d1, d2, size1 * sizeof(UChar)));
    }
}

bool operator<(const UString& s1, const UString& s2)
{
    const unsigned l1 = s1.length();
    const unsigned l2 = s2.length();
    const unsigned lmin = l1 < l2 ? l1 : l2;
    if (s1.is8Bit() && s2.is8Bit()) {
        const LChar* c1 = s1.characters8();
        const LChar* c2 = s2.characters8();
        unsigned length = 0;
        while (length < lmin && *c1 == *c2) {
            c1++;
            c2++;
            length++;
        }
        if (length < lmin)
            return (c1[0] < c2[0]);

        return (l1 < l2);        
    }
    const UChar* c1 = s1.characters();
    const UChar* c2 = s2.characters();
    unsigned length = 0;
    while (length < lmin && *c1 == *c2) {
        c1++;
        c2++;
        length++;
    }
    if (length < lmin)
        return (c1[0] < c2[0]);

    return (l1 < l2);
}

bool operator>(const UString& s1, const UString& s2)
{
    const unsigned l1 = s1.length();
    const unsigned l2 = s2.length();
    const unsigned lmin = l1 < l2 ? l1 : l2;
    const UChar* c1 = s1.characters();
    const UChar* c2 = s2.characters();
    unsigned l = 0;
    while (l < lmin && *c1 == *c2) {
        c1++;
        c2++;
        l++;
    }
    if (l < lmin)
        return (c1[0] > c2[0]);

    return (l1 > l2);
}

CString UString::ascii() const
{
    // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
    // preserved, characters outside of this range are converted to '?'.

    unsigned length = this->length();

    if (this->is8Bit()) {
        const LChar* characters = this->characters8();
        
        char* characterBuffer;
        CString result = CString::newUninitialized(length, characterBuffer);
        
        for (unsigned i = 0; i < length; ++i) {
            LChar ch = characters[i];
            characterBuffer[i] = ch && (ch < 0x20 || ch > 0x7f) ? '?' : ch;
        }
        
        return result;        
    }

    const UChar* characters = this->characters16();

    char* characterBuffer;
    CString result = CString::newUninitialized(length, characterBuffer);

    for (unsigned i = 0; i < length; ++i) {
        UChar ch = characters[i];
        characterBuffer[i] = ch && (ch < 0x20 || ch >= 0x7f) ? '?' : ch;
    }

    return result;
}

CString UString::latin1() const
{
    // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
    // preserved, characters outside of this range are converted to '?'.

    unsigned length = this->length();
    const UChar* characters = this->characters();

    char* characterBuffer;
    CString result = CString::newUninitialized(length, characterBuffer);

    for (unsigned i = 0; i < length; ++i) {
        UChar ch = characters[i];
        characterBuffer[i] = ch > 0xff ? '?' : ch;
    }

    return result;
}

// Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
static inline void putUTF8Triple(char*& buffer, UChar ch)
{
    ASSERT(ch >= 0x0800);
    *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
    *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
    *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
}

CString UString::utf8(bool strict) const
{
    unsigned length = this->length();

    if (!length)
        return CString("", 0);

    // Allocate a buffer big enough to hold all the characters
    // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
    // Optimization ideas, if we find this function is hot:
    //  * We could speculatively create a CStringBuffer to contain 'length' 
    //    characters, and resize if necessary (i.e. if the buffer contains
    //    non-ascii characters). (Alternatively, scan the buffer first for
    //    ascii characters, so we know this will be sufficient).
    //  * We could allocate a CStringBuffer with an appropriate size to
    //    have a good chance of being able to write the string into the
    //    buffer without reallocing (say, 1.5 x length).
    if (length > numeric_limits<unsigned>::max() / 3)
        return CString();

    Vector<char, 1024> bufferVector(length * 3);
    char* buffer = bufferVector.data();

    if (is8Bit()) {
        const LChar* characters = this->characters8();

        ConversionResult result = convertLatin1ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size());
        ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should be sufficient for any conversion
    } else {
        const UChar* characters = this->characters16();

        ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);
        ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion

        // Only produced from strict conversion.
        if (result == sourceIllegal)
            return CString();

        // Check for an unconverted high surrogate.
        if (result == sourceExhausted) {
            if (strict)
                return CString();
            // This should be one unpaired high surrogate. Treat it the same
            // was as an unpaired high surrogate would have been handled in
            // the middle of a string with non-strict conversion - which is
            // to say, simply encode it to UTF-8.
            ASSERT((characters + 1) == (this->characters() + length));
            ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF));
            // There should be room left, since one UChar hasn't been converted.
            ASSERT((buffer + 3) <= (buffer + bufferVector.size()));
            putUTF8Triple(buffer, *characters);
        }
    }

    return CString(bufferVector.data(), buffer - bufferVector.data());
}

} // namespace JSC
Commit	Line	Data
9dae56ea A	1	/*
9dae56ea A	2	* Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
ba379fdc	3	* Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
9dae56ea	4	* Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
ba379fdc	5	* Copyright (C) 2009 Google Inc. All rights reserved.
9dae56ea A	6	*
	7	* This library is free software; you can redistribute it and/or
	8	* modify it under the terms of the GNU Library General Public
	9	* License as published by the Free Software Foundation; either
	10	* version 2 of the License, or (at your option) any later version.
	11	*
	12	* This library is distributed in the hope that it will be useful,
	13	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	14	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	15	* Library General Public License for more details.
	16	*
	17	* You should have received a copy of the GNU Library General Public License
	18	* along with this library; see the file COPYING.LIB. If not, write to
	19	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
	20	* Boston, MA 02110-1301, USA.
	21	*
	22	*/
	23
	24	#include "config.h"
	25	#include "UString.h"
	26
	27	#include "JSGlobalObjectFunctions.h"
14957cd0	28	#include "Heap.h"
9dae56ea A	29	#include "Identifier.h"
	30	#include "Operations.h"
	31	#include <ctype.h>
9dae56ea	32	#include <limits.h>
f9bf01c6	33	#include <limits>
9dae56ea A	34	#include <stdio.h>
	35	#include <stdlib.h>
	36	#include <wtf/ASCIICType.h>
	37	#include <wtf/Assertions.h>
	38	#include <wtf/MathExtras.h>
f9bf01c6	39	#include <wtf/StringExtras.h>
9dae56ea	40	#include <wtf/Vector.h>
6fe7ccc8	41	#include <wtf/dtoa.h>
9dae56ea A	42	#include <wtf/unicode/UTF8.h>
9dae56ea A	43
9dae56ea A	44	#if HAVE(STRINGS_H)
	45	#include <strings.h>
	46	#endif
	47
	48	using namespace WTF;
	49	using namespace WTF::Unicode;
	50	using namespace std;
	51
9dae56ea	52	namespace JSC {
4e4e5a6f	53
14957cd0	54	COMPILE_ASSERT(sizeof(UString) == sizeof(void*), UString_should_stay_small);
9dae56ea	55
14957cd0 A	56	// Construct a string with UTF-16 data.
	57	UString::UString(const UChar* characters, unsigned length)
	58	: m_impl(characters ? StringImpl::create(characters, length) : 0)
9dae56ea	59	{
9dae56ea A	60	}
9dae56ea A	61
14957cd0 A	62	// Construct a string with UTF-16 data, from a null-terminated source.
14957cd0 A	63	UString::UString(const UChar* characters)
9dae56ea	64	{
14957cd0 A	65	if (!characters)
	66	return;
	67
	68	int length = 0;
	69	while (characters[length] != UChar(0))
	70	++length;
	71
	72	m_impl = StringImpl::create(characters, length);
9dae56ea A	73	}
9dae56ea A	74
14957cd0	75	// Construct a string with latin1 data.
6fe7ccc8	76	UString::UString(const LChar* characters, unsigned length)
14957cd0	77	: m_impl(characters ? StringImpl::create(characters, length) : 0)
9dae56ea	78	{
9dae56ea A	79	}
9dae56ea A	80
6fe7ccc8 A	81	UString::UString(const char* characters, unsigned length)
	82	: m_impl(characters ? StringImpl::create(reinterpret_cast<const LChar*>(characters), length) : 0)
	83	{
	84	}
	85
14957cd0	86	// Construct a string with latin1 data, from a null-terminated source.
6fe7ccc8	87	UString::UString(const LChar* characters)
14957cd0	88	: m_impl(characters ? StringImpl::create(characters) : 0)
9dae56ea	89	{
9dae56ea A	90	}
9dae56ea A	91
6fe7ccc8 A	92	UString::UString(const char* characters)
	93	: m_impl(characters ? StringImpl::create(reinterpret_cast<const LChar*>(characters)) : 0)
	94	{
	95	}
	96
14957cd0	97	UString UString::number(int i)
9dae56ea	98	{
6fe7ccc8 A	99	LChar buf[1 + sizeof(i) * 3];
	100	LChar* end = buf + WTF_ARRAY_LENGTH(buf);
	101	LChar* p = end;
4e4e5a6f	102
9dae56ea A	103	if (i == 0)
	104	*--p = '0';
	105	else if (i == INT_MIN) {
	106	char minBuf[1 + sizeof(i) * 3];
fb8617cd	107	snprintf(minBuf, sizeof(minBuf), "%d", INT_MIN);
f9bf01c6	108	return UString(minBuf);
9dae56ea A	109	} else {
	110	bool negative = false;
	111	if (i < 0) {
	112	negative = true;
	113	i = -i;
	114	}
	115	while (i) {
	116	*--p = static_cast<unsigned short>((i % 10) + '0');
	117	i /= 10;
	118	}
	119	if (negative)
	120	*--p = '-';
	121	}
	122
4e4e5a6f	123	return UString(p, static_cast<unsigned>(end - p));
9dae56ea A	124	}
9dae56ea A	125
14957cd0	126	UString UString::number(long long i)
9dae56ea	127	{
6fe7ccc8 A	128	LChar buf[1 + sizeof(i) * 3];
	129	LChar* end = buf + WTF_ARRAY_LENGTH(buf);
	130	LChar* p = end;
f9bf01c6	131
9dae56ea A	132	if (i == 0)
9dae56ea A	133	*--p = '0';
f9bf01c6	134	else if (i == std::numeric_limits<long long>::min()) {
9dae56ea	135	char minBuf[1 + sizeof(i) * 3];
14957cd0 A	136	#if OS(WINDOWS)
	137	snprintf(minBuf, sizeof(minBuf), "%I64d", std::numeric_limits<long long>::min());
	138	#else
f9bf01c6	139	snprintf(minBuf, sizeof(minBuf), "%lld", std::numeric_limits<long long>::min());
14957cd0	140	#endif
9dae56ea A	141	return UString(minBuf);
	142	} else {
	143	bool negative = false;
	144	if (i < 0) {
	145	negative = true;
	146	i = -i;
	147	}
	148	while (i) {
	149	*--p = static_cast<unsigned short>((i % 10) + '0');
	150	i /= 10;
	151	}
	152	if (negative)
	153	*--p = '-';
	154	}
	155
4e4e5a6f	156	return UString(p, static_cast<unsigned>(end - p));
9dae56ea A	157	}
9dae56ea A	158
14957cd0	159	UString UString::number(unsigned u)
9dae56ea	160	{
6fe7ccc8 A	161	LChar buf[sizeof(u) * 3];
	162	LChar* end = buf + WTF_ARRAY_LENGTH(buf);
	163	LChar* p = end;
4e4e5a6f	164
9dae56ea A	165	if (u == 0)
	166	*--p = '0';
	167	else {
	168	while (u) {
	169	*--p = static_cast<unsigned short>((u % 10) + '0');
	170	u /= 10;
	171	}
	172	}
4e4e5a6f A	173
4e4e5a6f A	174	return UString(p, static_cast<unsigned>(end - p));
9dae56ea A	175	}
9dae56ea A	176
14957cd0	177	UString UString::number(long l)
9dae56ea	178	{
6fe7ccc8 A	179	LChar buf[1 + sizeof(l) * 3];
	180	LChar* end = buf + WTF_ARRAY_LENGTH(buf);
	181	LChar* p = end;
9dae56ea A	182
	183	if (l == 0)
	184	*--p = '0';
	185	else if (l == LONG_MIN) {
	186	char minBuf[1 + sizeof(l) * 3];
fb8617cd	187	snprintf(minBuf, sizeof(minBuf), "%ld", LONG_MIN);
9dae56ea A	188	return UString(minBuf);
	189	} else {
	190	bool negative = false;
	191	if (l < 0) {
	192	negative = true;
	193	l = -l;
	194	}
	195	while (l) {
	196	*--p = static_cast<unsigned short>((l % 10) + '0');
	197	l /= 10;
	198	}
	199	if (negative)
	200	*--p = '-';
	201	}
	202
4e4e5a6f	203	return UString(p, end - p);
9dae56ea A	204	}
9dae56ea A	205
14957cd0	206	UString UString::number(double d)
9dae56ea	207	{
14957cd0	208	NumberToStringBuffer buffer;
6fe7ccc8	209	return UString(numberToString(d, buffer));
9dae56ea A	210	}
9dae56ea A	211
14957cd0	212	UString UString::substringSharingImpl(unsigned offset, unsigned length) const
9dae56ea	213	{
14957cd0	214	// FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar).
9dae56ea	215
14957cd0 A	216	unsigned stringLength = this->length();
	217	offset = min(offset, stringLength);
	218	length = min(length, stringLength - offset);
9dae56ea	219
14957cd0	220	if (!offset && length == stringLength)
9dae56ea	221	return *this;
14957cd0	222	return UString(StringImpl::create(m_impl, offset, length));
9dae56ea A	223	}
9dae56ea A	224
9dae56ea A	225	bool operator==(const UString& s1, const char *s2)
9dae56ea A	226	{
6fe7ccc8 A	227	if (s1.isEmpty())
6fe7ccc8 A	228	return !s2;
9dae56ea	229
6fe7ccc8 A	230	return equal(s1.impl(), s2);
	231	}
	232
	233	// This method assumes that all simple checks have been performed by
	234	// the inlined operator==() in the header file.
	235	bool equalSlowCase(const UString& s1, const UString& s2)
	236	{
	237	StringImpl* rep1 = s1.impl();
	238	StringImpl* rep2 = s2.impl();
	239	unsigned size1 = rep1->length();
	240
	241	// At this point we know
	242	// (a) that the strings are the same length and
	243	// (b) that they are greater than zero length.
	244	bool s1Is8Bit = rep1->is8Bit();
	245	bool s2Is8Bit = rep2->is8Bit();
	246
	247	if (s1Is8Bit) {
	248	const LChar* d1 = rep1->characters8();
	249	if (s2Is8Bit) {
	250	const LChar* d2 = rep2->characters8();
	251
	252	if (d1 == d2) // Check to see if the data pointers are the same.
	253	return true;
	254
	255	// Do quick checks for sizes 1 and 2.
	256	switch (size1) {
	257	case 1:
	258	return d1[0] == d2[0];
	259	case 2:
	260	return (d1[0] == d2[0]) & (d1[1] == d2[1]);
	261	default:
	262	return (!memcmp(d1, d2, size1 * sizeof(LChar)));
	263	}
	264	}
	265
	266	const UChar* d2 = rep2->characters16();
	267
	268	for (unsigned i = 0; i < size1; i++) {
	269	if (d1[i] != d2[i])
	270	return false;
	271	}
	272	return true;
	273	}
	274
	275	if (s2Is8Bit) {
	276	const UChar* d1 = rep1->characters16();
	277	const LChar* d2 = rep2->characters8();
	278
	279	for (unsigned i = 0; i < size1; i++) {
	280	if (d1[i] != d2[i])
	281	return false;
	282	}
	283	return true;
	284
	285	}
	286
	287	const UChar* d1 = rep1->characters16();
	288	const UChar* d2 = rep2->characters16();
	289
	290	if (d1 == d2) // Check to see if the data pointers are the same.
	291	return true;
	292
	293	// Do quick checks for sizes 1 and 2.
294	switch (size1) {
295	case 1:
296	return d1[0] == d2[0];
297	case 2:
298	return (d1[0] == d2[0]) & (d1[1] == d2[1]);
299	default:
300	return (!memcmp(d1, d2, size1 * sizeof(UChar)));
301	}
9dae56ea A	302	}
	303
	304	bool operator<(const UString& s1, const UString& s2)
	305	{
14957cd0 A	306	const unsigned l1 = s1.length();
14957cd0 A	307	const unsigned l2 = s2.length();
4e4e5a6f	308	const unsigned lmin = l1 < l2 ? l1 : l2;
6fe7ccc8 A	309	if (s1.is8Bit() && s2.is8Bit()) {
	310	const LChar* c1 = s1.characters8();
	311	const LChar* c2 = s2.characters8();
	312	unsigned length = 0;
	313	while (length < lmin && c1 == c2) {
	314	c1++;
	315	c2++;
	316	length++;
	317	}
	318	if (length < lmin)
	319	return (c1[0] < c2[0]);
	320
	321	return (l1 < l2);
	322	}
14957cd0 A	323	const UChar* c1 = s1.characters();
14957cd0 A	324	const UChar* c2 = s2.characters();
6fe7ccc8 A	325	unsigned length = 0;
6fe7ccc8 A	326	while (length < lmin && c1 == c2) {
9dae56ea A	327	c1++;
9dae56ea A	328	c2++;
6fe7ccc8	329	length++;
9dae56ea	330	}
6fe7ccc8	331	if (length < lmin)
9dae56ea A	332	return (c1[0] < c2[0]);
	333
	334	return (l1 < l2);
	335	}
	336
	337	bool operator>(const UString& s1, const UString& s2)
	338	{
14957cd0 A	339	const unsigned l1 = s1.length();
14957cd0 A	340	const unsigned l2 = s2.length();
4e4e5a6f	341	const unsigned lmin = l1 < l2 ? l1 : l2;
14957cd0 A	342	const UChar* c1 = s1.characters();
14957cd0 A	343	const UChar* c2 = s2.characters();
4e4e5a6f	344	unsigned l = 0;
9dae56ea A	345	while (l < lmin && c1 == c2) {
	346	c1++;
	347	c2++;
	348	l++;
	349	}
	350	if (l < lmin)
	351	return (c1[0] > c2[0]);
	352
	353	return (l1 > l2);
	354	}
	355
14957cd0	356	CString UString::ascii() const
9dae56ea	357	{
14957cd0 A	358	// Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
	359	// preserved, characters outside of this range are converted to '?'.
	360
	361	unsigned length = this->length();
6fe7ccc8 A	362
	363	if (this->is8Bit()) {
	364	const LChar* characters = this->characters8();
	365
	366	char* characterBuffer;
	367	CString result = CString::newUninitialized(length, characterBuffer);
	368
	369	for (unsigned i = 0; i < length; ++i) {
	370	LChar ch = characters[i];
	371	characterBuffer[i] = ch && (ch < 0x20 \|\| ch > 0x7f) ? '?' : ch;
	372	}
	373
	374	return result;
	375	}
	376
	377	const UChar* characters = this->characters16();
14957cd0 A	378
	379	char* characterBuffer;
	380	CString result = CString::newUninitialized(length, characterBuffer);
	381
	382	for (unsigned i = 0; i < length; ++i) {
	383	UChar ch = characters[i];
	384	characterBuffer[i] = ch && (ch < 0x20 \|\| ch >= 0x7f) ? '?' : ch;
9dae56ea A	385	}
9dae56ea A	386
14957cd0 A	387	return result;
14957cd0 A	388	}
9dae56ea	389
14957cd0 A	390	CString UString::latin1() const
	391	{
	392	// Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
	393	// preserved, characters outside of this range are converted to '?'.
	394
	395	unsigned length = this->length();
	396	const UChar* characters = this->characters();
9dae56ea	397
14957cd0 A	398	char* characterBuffer;
	399	CString result = CString::newUninitialized(length, characterBuffer);
	400
	401	for (unsigned i = 0; i < length; ++i) {
	402	UChar ch = characters[i];
	403	characterBuffer[i] = ch > 0xff ? '?' : ch;
	404	}
	405
	406	return result;
9dae56ea A	407	}
9dae56ea A	408
14957cd0 A	409	// Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
14957cd0 A	410	static inline void putUTF8Triple(char*& buffer, UChar ch)
9dae56ea	411	{
14957cd0 A	412	ASSERT(ch >= 0x0800);
	413	*buffer++ = static_cast<char>(((ch >> 12) & 0x0F) \| 0xE0);
	414	*buffer++ = static_cast<char>(((ch >> 6) & 0x3F) \| 0x80);
	415	*buffer++ = static_cast<char>((ch & 0x3F) \| 0x80);
	416	}
	417
	418	CString UString::utf8(bool strict) const
	419	{
	420	unsigned length = this->length();
6fe7ccc8 A	421
	422	if (!length)
	423	return CString("", 0);
14957cd0 A	424
	425	// Allocate a buffer big enough to hold all the characters
	426	// (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
	427	// Optimization ideas, if we find this function is hot:
	428	// * We could speculatively create a CStringBuffer to contain 'length'
	429	// characters, and resize if necessary (i.e. if the buffer contains
	430	// non-ascii characters). (Alternatively, scan the buffer first for
	431	// ascii characters, so we know this will be sufficient).
	432	// * We could allocate a CStringBuffer with an appropriate size to
	433	// have a good chance of being able to write the string into the
	434	// buffer without reallocing (say, 1.5 x length).
b80e6193	435	if (length > numeric_limits<unsigned>::max() / 3)
9dae56ea	436	return CString();
14957cd0	437
6fe7ccc8	438	Vector<char, 1024> bufferVector(length * 3);
14957cd0	439	char* buffer = bufferVector.data();
14957cd0	440
6fe7ccc8 A	441	if (is8Bit()) {
6fe7ccc8 A	442	const LChar* characters = this->characters8();
14957cd0	443
6fe7ccc8 A	444	ConversionResult result = convertLatin1ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size());
	445	ASSERT_UNUSED(result, result != targetExhausted); // (length * 3) should be sufficient for any conversion
	446	} else {
	447	const UChar* characters = this->characters16();
	448
	449	ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);
	450	ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion
	451
	452	// Only produced from strict conversion.
	453	if (result == sourceIllegal)
14957cd0	454	return CString();
6fe7ccc8 A	455
	456	// Check for an unconverted high surrogate.
	457	if (result == sourceExhausted) {
	458	if (strict)
	459	return CString();
	460	// This should be one unpaired high surrogate. Treat it the same
	461	// was as an unpaired high surrogate would have been handled in
	462	// the middle of a string with non-strict conversion - which is
	463	// to say, simply encode it to UTF-8.
	464	ASSERT((characters + 1) == (this->characters() + length));
	465	ASSERT((characters >= 0xD800) && (characters <= 0xDBFF));
	466	// There should be room left, since one UChar hasn't been converted.
	467	ASSERT((buffer + 3) <= (buffer + bufferVector.size()));
	468	putUTF8Triple(buffer, *characters);
	469	}
14957cd0	470	}
9dae56ea	471
14957cd0	472	return CString(bufferVector.data(), buffer - bufferVector.data());
9dae56ea A	473	}
9dae56ea A	474
9dae56ea	475	} // namespace JSC