X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/817270659e986de1b243586d8eb6ad3a76c87480..e733c4ce1e24cf7e4b0b0d8362fc59aaa7a7641c:/src/common/unichar.cpp diff --git a/src/common/unichar.cpp b/src/common/unichar.cpp index 0fec3a7792..de95fcd001 100644 --- a/src/common/unichar.cpp +++ b/src/common/unichar.cpp @@ -3,7 +3,6 @@ // Purpose: wxUniChar and wxUniCharRef classes // Author: Vaclav Slavik // Created: 2007-03-19 -// RCS-ID: $Id$ // Copyright: (c) 2007 REA Elektronik GmbH // Licence: wxWindows licence /////////////////////////////////////////////////////////////////////////////// @@ -21,11 +20,10 @@ #ifndef WX_PRECOMP #include "wx/strconv.h" // wxConvLibc + #include "wx/log.h" #endif #include "wx/unichar.h" - -// FIXME-UTF8: remove once UTF-8 functions moved outside #include "wx/string.h" // =========================================================================== @@ -37,58 +35,147 @@ // --------------------------------------------------------------------------- /* static */ -wxUniChar::value_type wxUniChar::From8bit(char c) +wxUniChar::value_type wxUniChar::FromHi8bit(char c) { - // all supported charsets have the first 128 characters same as ASCII: - if ( (unsigned char)c < 0x80 ) - return c; - - wchar_t buf[2]; - if ( wxConvLibc.ToWChar(buf, 2, &c, 1) != 2 ) +#if wxUSE_UTF8_LOCALE_ONLY + wxFAIL_MSG( "invalid UTF-8 character" ); + wxUnusedVar(c); + + return wxT('?'); // FIXME-UTF8: what to use as failure character? +#else + char cbuf[2]; + cbuf[0] = c; + cbuf[1] = '\0'; + wchar_t wbuf[2]; + if ( wxConvLibc.ToWChar(wbuf, 2, cbuf, 2) != 2 ) + { + wxFAIL_MSG( "invalid multibyte character" ); return wxT('?'); // FIXME-UTF8: what to use as failure character? - return buf[0]; + } + return wbuf[0]; +#endif } /* static */ -char wxUniChar::To8bit(wxUniChar::value_type c) +char wxUniChar::ToHi8bit(wxUniChar::value_type v) { - // all supported charsets have the first 128 characters same as ASCII: - if ( c < 0x80 ) - return c; - - wchar_t in = c; - char buf[2]; - if ( wxConvLibc.FromWChar(buf, 2, &in, 1) != 2 ) - return '?'; // FIXME-UTF8: what to use as failure character? - return buf[0]; + char c; + if ( !GetAsHi8bit(v, &c) ) + { + wxFAIL_MSG( "character cannot be converted to single byte" ); + c = '?'; // FIXME-UTF8: what to use as failure character? + } + + return c; } +/* static */ +bool wxUniChar::GetAsHi8bit(value_type v, char *c) +{ + wchar_t wbuf[2]; + wbuf[0] = v; + wbuf[1] = L'\0'; + char cbuf[2]; + if ( wxConvLibc.FromWChar(cbuf, 2, wbuf, 2) != 2 ) + return false; + + *c = cbuf[0]; + return true; +} // --------------------------------------------------------------------------- // wxUniCharRef // --------------------------------------------------------------------------- #if wxUSE_UNICODE_UTF8 +wxUniChar wxUniCharRef::UniChar() const +{ + return wxStringOperations::DecodeChar(m_pos); +} + wxUniCharRef& wxUniCharRef::operator=(const wxUniChar& c) { - wxString::Utf8CharBuffer utf(wxString::EncodeChar(c)); - size_t lenOld = wxString::GetUtf8CharLength(*m_pos); - size_t lenNew = wxString::GetUtf8CharLength(utf[0]); + wxStringOperations::Utf8CharBuffer utf(wxStringOperations::EncodeChar(c)); + size_t lenOld = wxStringOperations::GetUtf8CharLength(*m_pos); + size_t lenNew = wxStringOperations::GetUtf8CharLength(utf[0]); if ( lenNew == lenOld ) { + // this is the simpler case: if the new value's UTF-8 code has the + // same length, we can just replace it: + iterator pos(m_pos); for ( size_t i = 0; i < lenNew; ++i, ++pos ) *pos = utf[i]; } - else + else // length of character encoding in UTF-8 changed { - size_t idx = m_pos - m_str.begin(); - - m_str.replace(m_pos, m_pos + lenOld, utf, lenNew); - - // this is needed to keep m_pos valid: - m_pos = m_str.begin() + idx; + // the worse case is when the new value has either longer or shorter + // code -- in that case, we have to use wxStringImpl::replace() and + // this invalidates all iterators, so we have to update them too: + + wxStringImpl& strimpl = m_str.m_impl; + + int iterDiff = lenNew - lenOld; + size_t posIdx = m_pos - strimpl.begin(); + + // compute positions of outstanding iterators for this string after the + // replacement is done (there is only a small number of iterators at + // any time, so we use an array on the stack to avoid unneeded + // allocation): + static const size_t STATIC_SIZE = 32; + size_t indexes_a[STATIC_SIZE]; + size_t *indexes = indexes_a; + size_t iterNum = 0; + wxStringIteratorNode *it; + for ( it = m_str.m_iterators.ptr; it; it = it->m_next, ++iterNum ) + { + wxASSERT( it->m_iter || it->m_citer ); + + if ( iterNum == STATIC_SIZE ) + { + wxLogTrace( wxT("utf8"), wxT("unexpectedly many iterators") ); + + size_t total = iterNum + 1; + for ( wxStringIteratorNode *it2 = it; it2; it2 = it2->m_next ) + total++; + indexes = new size_t[total]; + memcpy(indexes, indexes_a, sizeof(size_t) * STATIC_SIZE); + } + + size_t idx = it->m_iter + ? (*it->m_iter - strimpl.begin()) + : (*it->m_citer - strimpl.begin()); + + if ( idx > posIdx ) + idx += iterDiff; + + indexes[iterNum] = idx; + } + + // update the string: + strimpl.replace(m_pos, m_pos + lenOld, utf, lenNew); + +#if wxUSE_STRING_POS_CACHE + m_str.InvalidateCache(); +#endif // wxUSE_STRING_POS_CACHE + + // finally, set the iterators to valid values again (note that this + // updates m_pos as well): + size_t i; + for ( i = 0, it = m_str.m_iterators.ptr; it; it = it->m_next, ++i ) + { + wxASSERT( i < iterNum ); + wxASSERT( it->m_iter || it->m_citer ); + + if ( it->m_iter ) + *it->m_iter = strimpl.begin() + indexes[i]; + else // it->m_citer + *it->m_citer = strimpl.begin() + indexes[i]; + } + + if ( indexes != indexes_a ) + delete[] indexes; } return *this;