src/common/unichar.cpp

/////////////////////////////////////////////////////////////////////////////
// Name:        src/common/unichar.cpp
// Purpose:     wxUniChar and wxUniCharRef classes
// Author:      Vaclav Slavik
// Created:     2007-03-19
// RCS-ID:      $Id$
// Copyright:   (c) 2007 REA Elektronik GmbH
// Licence:     wxWindows licence
///////////////////////////////////////////////////////////////////////////////

// ===========================================================================
// headers
// ===========================================================================

// For compilers that support precompilation, includes "wx.h".
#include "wx/wxprec.h"

#ifdef __BORLANDC__
    #pragma hdrstop
#endif

#ifndef WX_PRECOMP
    #include "wx/strconv.h"  // wxConvLibc
    #include "wx/log.h"
#endif

#include "wx/unichar.h"
#include "wx/string.h"

// ===========================================================================
// implementation
// ===========================================================================

// ---------------------------------------------------------------------------
// wxUniChar
// ---------------------------------------------------------------------------

/* static */
wxUniChar::value_type wxUniChar::From8bit(char c)
{
    // all supported charsets have the first 128 characters same as ASCII:
    if ( (unsigned char)c < 0x80 )
        return c;

#if wxUSE_UTF8_LOCALE_ONLY
    wxFAIL_MSG( "invalid UTF-8 character" );
    return wxT('?'); // FIXME-UTF8: what to use as failure character?
#else
    wchar_t buf[2];
    if ( wxConvLibc.ToWChar(buf, 2, &c, 1) != 2 )
    {
        wxFAIL_MSG( "invalid multibyte character" );
        return wxT('?'); // FIXME-UTF8: what to use as failure character?
    }
    return buf[0];
#endif
}

/* static */
char wxUniChar::To8bit(wxUniChar::value_type c)
{
    // all supported charsets have the first 128 characters same as ASCII:
    if ( c < 0x80 )
        return c;

#if wxUSE_UTF8_LOCALE_ONLY
    wxFAIL_MSG( "character cannot be converted to single UTF-8 byte" );
    return '?'; // FIXME-UTF8: what to use as failure character?
#else
    wchar_t in = c;
    char buf[2];
    if ( wxConvLibc.FromWChar(buf, 2, &in, 1) != 2 )
    {
        wxFAIL_MSG( "character cannot be converted to single byte" );
        return '?'; // FIXME-UTF8: what to use as failure character?
    }
    return buf[0];
#endif
}


// ---------------------------------------------------------------------------
// wxUniCharRef
// ---------------------------------------------------------------------------

#if wxUSE_UNICODE_UTF8
wxUniChar wxUniCharRef::UniChar() const
{
    return wxStringOperations::DecodeChar(m_pos);
}

wxUniCharRef& wxUniCharRef::operator=(const wxUniChar& c)
{
    wxStringOperations::Utf8CharBuffer utf(wxStringOperations::EncodeChar(c));
    size_t lenOld = wxStringOperations::GetUtf8CharLength(*m_pos);
    size_t lenNew = wxStringOperations::GetUtf8CharLength(utf[0]);

    if ( lenNew == lenOld )
    {
        // this is the simpler case: if the new value's UTF-8 code has the
        // same length, we can just replace it:

        iterator pos(m_pos);
        for ( size_t i = 0; i < lenNew; ++i, ++pos )
            *pos = utf[i];
    }
    else
    {
        // the worse case is when the new value has either longer or shorter
        // code -- in that case, we have to use wxStringImpl::replace() and
        // this invalidates all iterators, so we have to update them too:

        wxString& str = *wx_const_cast(wxString*, m_node.m_str);
        wxStringImpl& strimpl = str.m_impl;

        int iterDiff = lenNew - lenOld;
        size_t posIdx = m_pos - strimpl.begin();

        // compute positions of outstanding iterators for this string after the
        // replacement is done (there is only a small number of iterators at
        // any time, so we use an array on the stack to avoid unneeded
        // allocation):
        static const size_t STATIC_SIZE = 32;
        size_t indexes_a[STATIC_SIZE];
        size_t *indexes = indexes_a;
        size_t iterNum = 0;
        wxStringIteratorNode *it;
        for ( it = str.m_iterators.ptr; it; it = it->m_next, ++iterNum )
        {
            wxASSERT( it->m_iter || it->m_citer );

            if ( iterNum == STATIC_SIZE )
            {
                wxLogTrace( _T("utf8"), _T("unexpectedly many iterators") );

                size_t total = iterNum + 1;
                for ( wxStringIteratorNode *it2 = it; it2; it2 = it2->m_next )
                    total++;
                indexes = new size_t[total];
                memcpy(indexes, indexes_a, sizeof(size_t) * STATIC_SIZE);
            }

            size_t idx = it->m_iter
                         ? (*it->m_iter - strimpl.begin())
                         : (*it->m_citer - strimpl.begin());

            if ( idx > posIdx )
                idx += iterDiff;

            indexes[iterNum] = idx;
        }

        // update the string:
        strimpl.replace(m_pos, m_pos + lenOld, utf, lenNew);

        // finally, set the iterators to valid values again (note that this
        // updates m_pos as well):
        size_t i;
        for ( i = 0, it = str.m_iterators.ptr; it; it = it->m_next, ++i )
        {
            wxASSERT( i < iterNum );
            wxASSERT( it->m_iter || it->m_citer );

            if ( it->m_iter )
                *it->m_iter = strimpl.begin() + indexes[i];
            else // it->m_citer
                *it->m_citer = strimpl.begin() + indexes[i];
        }

        if ( indexes != indexes_a )
            delete[] indexes;
    }

    return *this;
}
#endif // wxUSE_UNICODE_UTF8
Commit	Line	Data
	1	/////////////////////////////////////////////////////////////////////////////
	2	// Name: src/common/unichar.cpp
	3	// Purpose: wxUniChar and wxUniCharRef classes
	4	// Author: Vaclav Slavik
	5	// Created: 2007-03-19
	6	// RCS-ID: $Id$
	7	// Copyright: (c) 2007 REA Elektronik GmbH
	8	// Licence: wxWindows licence
	9	///////////////////////////////////////////////////////////////////////////////
	10
	11	// ===========================================================================
	12	// headers
	13	// ===========================================================================
	14
	15	// For compilers that support precompilation, includes "wx.h".
	16	#include "wx/wxprec.h"
	17
	18	#ifdef __BORLANDC__
	19	#pragma hdrstop
	20	#endif
	21
	22	#ifndef WX_PRECOMP
	23	#include "wx/strconv.h" // wxConvLibc
	24	#include "wx/log.h"
	25	#endif
	26
	27	#include "wx/unichar.h"
	28	#include "wx/string.h"
	29
	30	// ===========================================================================
	31	// implementation
	32	// ===========================================================================
	33
	34	// ---------------------------------------------------------------------------
	35	// wxUniChar
	36	// ---------------------------------------------------------------------------
	37
	38	/* static */
	39	wxUniChar::value_type wxUniChar::From8bit(char c)
	40	{
	41	// all supported charsets have the first 128 characters same as ASCII:
	42	if ( (unsigned char)c < 0x80 )
	43	return c;
	44
	45	#if wxUSE_UTF8_LOCALE_ONLY
	46	wxFAIL_MSG( "invalid UTF-8 character" );
	47	return wxT('?'); // FIXME-UTF8: what to use as failure character?
	48	#else
	49	wchar_t buf[2];
	50	if ( wxConvLibc.ToWChar(buf, 2, &c, 1) != 2 )
	51	{
	52	wxFAIL_MSG( "invalid multibyte character" );
	53	return wxT('?'); // FIXME-UTF8: what to use as failure character?
	54	}
	55	return buf[0];
	56	#endif
	57	}
	58
	59	/* static */
	60	char wxUniChar::To8bit(wxUniChar::value_type c)
	61	{
	62	// all supported charsets have the first 128 characters same as ASCII:
	63	if ( c < 0x80 )
	64	return c;
	65
	66	#if wxUSE_UTF8_LOCALE_ONLY
	67	wxFAIL_MSG( "character cannot be converted to single UTF-8 byte" );
	68	return '?'; // FIXME-UTF8: what to use as failure character?
	69	#else
	70	wchar_t in = c;
	71	char buf[2];
	72	if ( wxConvLibc.FromWChar(buf, 2, &in, 1) != 2 )
	73	{
	74	wxFAIL_MSG( "character cannot be converted to single byte" );
	75	return '?'; // FIXME-UTF8: what to use as failure character?
	76	}
	77	return buf[0];
	78	#endif
	79	}
	80
	81
	82	// ---------------------------------------------------------------------------
	83	// wxUniCharRef
	84	// ---------------------------------------------------------------------------
	85
	86	#if wxUSE_UNICODE_UTF8
	87	wxUniChar wxUniCharRef::UniChar() const
	88	{
	89	return wxStringOperations::DecodeChar(m_pos);
	90	}
	91
	92	wxUniCharRef& wxUniCharRef::operator=(const wxUniChar& c)
	93	{
	94	wxStringOperations::Utf8CharBuffer utf(wxStringOperations::EncodeChar(c));
	95	size_t lenOld = wxStringOperations::GetUtf8CharLength(*m_pos);
	96	size_t lenNew = wxStringOperations::GetUtf8CharLength(utf[0]);
	97
	98	if ( lenNew == lenOld )
	99	{
	100	// this is the simpler case: if the new value's UTF-8 code has the
	101	// same length, we can just replace it:
	102
	103	iterator pos(m_pos);
	104	for ( size_t i = 0; i < lenNew; ++i, ++pos )
	105	*pos = utf[i];
	106	}
	107	else
	108	{
	109	// the worse case is when the new value has either longer or shorter
	110	// code -- in that case, we have to use wxStringImpl::replace() and
	111	// this invalidates all iterators, so we have to update them too:
	112
	113	wxString& str = wx_const_cast(wxString, m_node.m_str);
	114	wxStringImpl& strimpl = str.m_impl;
	115
	116	int iterDiff = lenNew - lenOld;
	117	size_t posIdx = m_pos - strimpl.begin();
	118
	119	// compute positions of outstanding iterators for this string after the
	120	// replacement is done (there is only a small number of iterators at
	121	// any time, so we use an array on the stack to avoid unneeded
	122	// allocation):
	123	static const size_t STATIC_SIZE = 32;
	124	size_t indexes_a[STATIC_SIZE];
	125	size_t *indexes = indexes_a;
	126	size_t iterNum = 0;
	127	wxStringIteratorNode *it;
	128	for ( it = str.m_iterators.ptr; it; it = it->m_next, ++iterNum )
	129	{
	130	wxASSERT( it->m_iter \|\| it->m_citer );
	131
	132	if ( iterNum == STATIC_SIZE )
	133	{
	134	wxLogTrace( _T("utf8"), _T("unexpectedly many iterators") );
	135
	136	size_t total = iterNum + 1;
	137	for ( wxStringIteratorNode *it2 = it; it2; it2 = it2->m_next )
	138	total++;
	139	indexes = new size_t[total];
	140	memcpy(indexes, indexes_a, sizeof(size_t) * STATIC_SIZE);
	141	}
	142
	143	size_t idx = it->m_iter
	144	? (*it->m_iter - strimpl.begin())
	145	: (*it->m_citer - strimpl.begin());
	146
	147	if ( idx > posIdx )
	148	idx += iterDiff;
	149
	150	indexes[iterNum] = idx;
	151	}
	152
	153	// update the string:
	154	strimpl.replace(m_pos, m_pos + lenOld, utf, lenNew);
	155
	156	// finally, set the iterators to valid values again (note that this
	157	// updates m_pos as well):
	158	size_t i;
	159	for ( i = 0, it = str.m_iterators.ptr; it; it = it->m_next, ++i )
	160	{
	161	wxASSERT( i < iterNum );
	162	wxASSERT( it->m_iter \|\| it->m_citer );
	163
	164	if ( it->m_iter )
	165	*it->m_iter = strimpl.begin() + indexes[i];
	166	else // it->m_citer
	167	*it->m_citer = strimpl.begin() + indexes[i];
	168	}
	169
	170	if ( indexes != indexes_a )
	171	delete[] indexes;
	172	}
	173
	174	return *this;
	175	}
	176	#endif // wxUSE_UNICODE_UTF8