[wxWidgets.git] / src / common / unichar.cpp

/////////////////////////////////////////////////////////////////////////////
// Name:        src/common/unichar.cpp
// Purpose:     wxUniChar and wxUniCharRef classes
// Author:      Vaclav Slavik
// Created:     2007-03-19
// Copyright:   (c) 2007 REA Elektronik GmbH
// Licence:     wxWindows licence
///////////////////////////////////////////////////////////////////////////////

// ===========================================================================
// headers
// ===========================================================================

// For compilers that support precompilation, includes "wx.h".
#include "wx/wxprec.h"

#ifdef __BORLANDC__
    #pragma hdrstop
#endif

#ifndef WX_PRECOMP
    #include "wx/strconv.h"  // wxConvLibc
    #include "wx/log.h"
#endif

#include "wx/unichar.h"
#include "wx/string.h"

// ===========================================================================
// implementation
// ===========================================================================

// ---------------------------------------------------------------------------
// wxUniChar
// ---------------------------------------------------------------------------

/* static */
wxUniChar::value_type wxUniChar::FromHi8bit(char c)
{
#if wxUSE_UTF8_LOCALE_ONLY
    wxFAIL_MSG( "invalid UTF-8 character" );
    wxUnusedVar(c);

    return wxT('?'); // FIXME-UTF8: what to use as failure character?
#else
    char cbuf[2];
    cbuf[0] = c;
    cbuf[1] = '\0';
    wchar_t wbuf[2];
    if ( wxConvLibc.ToWChar(wbuf, 2, cbuf, 2) != 2 )
    {
        wxFAIL_MSG( "invalid multibyte character" );
        return wxT('?'); // FIXME-UTF8: what to use as failure character?
    }
    return wbuf[0];
#endif
}

/* static */
char wxUniChar::ToHi8bit(wxUniChar::value_type v)
{
    char c;
    if ( !GetAsHi8bit(v, &c) )
    {
        wxFAIL_MSG( "character cannot be converted to single byte" );
        c = '?'; // FIXME-UTF8: what to use as failure character?
    }

    return c;
}

/* static */
bool wxUniChar::GetAsHi8bit(value_type v, char *c)
{
    wchar_t wbuf[2];
    wbuf[0] = v;
    wbuf[1] = L'\0';
    char cbuf[2];
    if ( wxConvLibc.FromWChar(cbuf, 2, wbuf, 2) != 2 )
        return false;

    *c = cbuf[0];
    return true;
}

// ---------------------------------------------------------------------------
// wxUniCharRef
// ---------------------------------------------------------------------------

#if wxUSE_UNICODE_UTF8
wxUniChar wxUniCharRef::UniChar() const
{
    return wxStringOperations::DecodeChar(m_pos);
}

wxUniCharRef& wxUniCharRef::operator=(const wxUniChar& c)
{
    wxStringOperations::Utf8CharBuffer utf(wxStringOperations::EncodeChar(c));
    size_t lenOld = wxStringOperations::GetUtf8CharLength(*m_pos);
    size_t lenNew = wxStringOperations::GetUtf8CharLength(utf[0]);

    if ( lenNew == lenOld )
    {
        // this is the simpler case: if the new value's UTF-8 code has the
        // same length, we can just replace it:

        iterator pos(m_pos);
        for ( size_t i = 0; i < lenNew; ++i, ++pos )
            *pos = utf[i];
    }
    else // length of character encoding in UTF-8 changed
    {
        // the worse case is when the new value has either longer or shorter
        // code -- in that case, we have to use wxStringImpl::replace() and
        // this invalidates all iterators, so we have to update them too:

        wxStringImpl& strimpl = m_str.m_impl;

        int iterDiff = lenNew - lenOld;
        size_t posIdx = m_pos - strimpl.begin();

        // compute positions of outstanding iterators for this string after the
        // replacement is done (there is only a small number of iterators at
        // any time, so we use an array on the stack to avoid unneeded
        // allocation):
        static const size_t STATIC_SIZE = 32;
        size_t indexes_a[STATIC_SIZE];
        size_t *indexes = indexes_a;
        size_t iterNum = 0;
        wxStringIteratorNode *it;
        for ( it = m_str.m_iterators.ptr; it; it = it->m_next, ++iterNum )
        {
            wxASSERT( it->m_iter || it->m_citer );

            if ( iterNum == STATIC_SIZE )
            {
                wxLogTrace( wxT("utf8"), wxT("unexpectedly many iterators") );

                size_t total = iterNum + 1;
                for ( wxStringIteratorNode *it2 = it; it2; it2 = it2->m_next )
                    total++;
                indexes = new size_t[total];
                memcpy(indexes, indexes_a, sizeof(size_t) * STATIC_SIZE);
            }

            size_t idx = it->m_iter
                         ? (*it->m_iter - strimpl.begin())
                         : (*it->m_citer - strimpl.begin());

            if ( idx > posIdx )
                idx += iterDiff;

            indexes[iterNum] = idx;
        }

        // update the string:
        strimpl.replace(m_pos, m_pos + lenOld, utf, lenNew);

#if wxUSE_STRING_POS_CACHE
        m_str.InvalidateCache();
#endif // wxUSE_STRING_POS_CACHE

        // finally, set the iterators to valid values again (note that this
        // updates m_pos as well):
        size_t i;
        for ( i = 0, it = m_str.m_iterators.ptr; it; it = it->m_next, ++i )
        {
            wxASSERT( i < iterNum );
            wxASSERT( it->m_iter || it->m_citer );

            if ( it->m_iter )
                *it->m_iter = strimpl.begin() + indexes[i];
            else // it->m_citer
                *it->m_citer = strimpl.begin() + indexes[i];
        }

        if ( indexes != indexes_a )
            delete[] indexes;
    }

    return *this;
}
#endif // wxUSE_UNICODE_UTF8
Commit	Line	Data
dd0ef332 VS	1	/////////////////////////////////////////////////////////////////////////////
	2	// Name: src/common/unichar.cpp
	3	// Purpose: wxUniChar and wxUniCharRef classes
	4	// Author: Vaclav Slavik
	5	// Created: 2007-03-19
dd0ef332 VS	6	// Copyright: (c) 2007 REA Elektronik GmbH
	7	// Licence: wxWindows licence
	8	///////////////////////////////////////////////////////////////////////////////
	9
	10	// ===========================================================================
	11	// headers
	12	// ===========================================================================
	13
	14	// For compilers that support precompilation, includes "wx.h".
	15	#include "wx/wxprec.h"
	16
	17	#ifdef __BORLANDC__
	18	#pragma hdrstop
	19	#endif
	20
0cb6a6e4 VZ	21	#ifndef WX_PRECOMP
0cb6a6e4 VZ	22	#include "wx/strconv.h" // wxConvLibc
98d2df74	23	#include "wx/log.h"
0cb6a6e4 VZ	24	#endif
0cb6a6e4 VZ	25
dd0ef332	26	#include "wx/unichar.h"
155c2f6c	27	#include "wx/string.h"
81727065	28
dd0ef332 VS	29	// ===========================================================================
	30	// implementation
	31	// ===========================================================================
	32
81727065 VS	33	// ---------------------------------------------------------------------------
	34	// wxUniChar
	35	// ---------------------------------------------------------------------------
	36
dd0ef332	37	/* static */
ce65118e	38	wxUniChar::value_type wxUniChar::FromHi8bit(char c)
dd0ef332	39	{
111d9948	40	#if wxUSE_UTF8_LOCALE_ONLY
68fb51cd	41	wxFAIL_MSG( "invalid UTF-8 character" );
eea9eca5 VZ	42	wxUnusedVar(c);
eea9eca5 VZ	43
111d9948 VS	44	return wxT('?'); // FIXME-UTF8: what to use as failure character?
111d9948 VS	45	#else
7a8e90dd VZ	46	char cbuf[2];
	47	cbuf[0] = c;
	48	cbuf[1] = '\0';
	49	wchar_t wbuf[2];
	50	if ( wxConvLibc.ToWChar(wbuf, 2, cbuf, 2) != 2 )
68fb51cd VS	51	{
68fb51cd VS	52	wxFAIL_MSG( "invalid multibyte character" );
dd0ef332	53	return wxT('?'); // FIXME-UTF8: what to use as failure character?
68fb51cd	54	}
7a8e90dd	55	return wbuf[0];
111d9948	56	#endif
dd0ef332 VS	57	}
	58
	59	/* static */
874dbd3a	60	char wxUniChar::ToHi8bit(wxUniChar::value_type v)
dd0ef332	61	{
874dbd3a VZ	62	char c;
	63	if ( !GetAsHi8bit(v, &c) )
	64	{
	65	wxFAIL_MSG( "character cannot be converted to single byte" );
	66	c = '?'; // FIXME-UTF8: what to use as failure character?
	67	}
eea9eca5	68
874dbd3a VZ	69	return c;
	70	}
	71
	72	/* static */
	73	bool wxUniChar::GetAsHi8bit(value_type v, char *c)
	74	{
7a8e90dd	75	wchar_t wbuf[2];
874dbd3a	76	wbuf[0] = v;
7a8e90dd VZ	77	wbuf[1] = L'\0';
	78	char cbuf[2];
	79	if ( wxConvLibc.FromWChar(cbuf, 2, wbuf, 2) != 2 )
874dbd3a	80	return false;
81727065	81
874dbd3a VZ	82	*c = cbuf[0];
	83	return true;
	84	}
81727065 VS	85
	86	// ---------------------------------------------------------------------------
	87	// wxUniCharRef
	88	// ---------------------------------------------------------------------------
	89
	90	#if wxUSE_UNICODE_UTF8
467175ab VS	91	wxUniChar wxUniCharRef::UniChar() const
	92	{
	93	return wxStringOperations::DecodeChar(m_pos);
	94	}
	95
81727065 VS	96	wxUniCharRef& wxUniCharRef::operator=(const wxUniChar& c)
81727065 VS	97	{
467175ab VS	98	wxStringOperations::Utf8CharBuffer utf(wxStringOperations::EncodeChar(c));
	99	size_t lenOld = wxStringOperations::GetUtf8CharLength(*m_pos);
	100	size_t lenNew = wxStringOperations::GetUtf8CharLength(utf[0]);
81727065 VS	101
	102	if ( lenNew == lenOld )
	103	{
b0c4d5d7 VS	104	// this is the simpler case: if the new value's UTF-8 code has the
	105	// same length, we can just replace it:
	106
81727065 VS	107	iterator pos(m_pos);
	108	for ( size_t i = 0; i < lenNew; ++i, ++pos )
	109	*pos = utf[i];
	110	}
68482dc5	111	else // length of character encoding in UTF-8 changed
81727065	112	{
b0c4d5d7 VS	113	// the worse case is when the new value has either longer or shorter
	114	// code -- in that case, we have to use wxStringImpl::replace() and
	115	// this invalidates all iterators, so we have to update them too:
	116
6bd4f281	117	wxStringImpl& strimpl = m_str.m_impl;
b0c4d5d7 VS	118
	119	int iterDiff = lenNew - lenOld;
	120	size_t posIdx = m_pos - strimpl.begin();
	121
	122	// compute positions of outstanding iterators for this string after the
	123	// replacement is done (there is only a small number of iterators at
	124	// any time, so we use an array on the stack to avoid unneeded
	125	// allocation):
	126	static const size_t STATIC_SIZE = 32;
	127	size_t indexes_a[STATIC_SIZE];
	128	size_t *indexes = indexes_a;
	129	size_t iterNum = 0;
	130	wxStringIteratorNode *it;
6bd4f281	131	for ( it = m_str.m_iterators.ptr; it; it = it->m_next, ++iterNum )
b0c4d5d7 VS	132	{
	133	wxASSERT( it->m_iter \|\| it->m_citer );
	134
	135	if ( iterNum == STATIC_SIZE )
	136	{
9a83f860	137	wxLogTrace( wxT("utf8"), wxT("unexpectedly many iterators") );
b0c4d5d7 VS	138
	139	size_t total = iterNum + 1;
	140	for ( wxStringIteratorNode *it2 = it; it2; it2 = it2->m_next )
	141	total++;
	142	indexes = new size_t[total];
	143	memcpy(indexes, indexes_a, sizeof(size_t) * STATIC_SIZE);
	144	}
	145
	146	size_t idx = it->m_iter
	147	? (*it->m_iter - strimpl.begin())
	148	: (*it->m_citer - strimpl.begin());
	149
	150	if ( idx > posIdx )
	151	idx += iterDiff;
	152
	153	indexes[iterNum] = idx;
	154	}
	155
	156	// update the string:
	157	strimpl.replace(m_pos, m_pos + lenOld, utf, lenNew);
	158
68482dc5 VZ	159	#if wxUSE_STRING_POS_CACHE
	160	m_str.InvalidateCache();
	161	#endif // wxUSE_STRING_POS_CACHE
	162
b0c4d5d7 VS	163	// finally, set the iterators to valid values again (note that this
	164	// updates m_pos as well):
	165	size_t i;
6bd4f281	166	for ( i = 0, it = m_str.m_iterators.ptr; it; it = it->m_next, ++i )
b0c4d5d7 VS	167	{
	168	wxASSERT( i < iterNum );
	169	wxASSERT( it->m_iter \|\| it->m_citer );
	170
	171	if ( it->m_iter )
	172	*it->m_iter = strimpl.begin() + indexes[i];
	173	else // it->m_citer
	174	*it->m_citer = strimpl.begin() + indexes[i];
	175	}
	176
	177	if ( indexes != indexes_a )
	178	delete[] indexes;
81727065 VS	179	}
	180
	181	return *this;
	182	}
	183	#endif // wxUSE_UNICODE_UTF8