X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/817270659e986de1b243586d8eb6ad3a76c87480..2e297951cd2eca56fac39fc21d7ebecc4cb51e52:/src/common/unichar.cpp diff --git a/src/common/unichar.cpp b/src/common/unichar.cpp index 0fec3a7792..653e33f14e 100644 --- a/src/common/unichar.cpp +++ b/src/common/unichar.cpp @@ -21,11 +21,10 @@ #ifndef WX_PRECOMP #include "wx/strconv.h" // wxConvLibc + #include "wx/log.h" #endif #include "wx/unichar.h" - -// FIXME-UTF8: remove once UTF-8 functions moved outside #include "wx/string.h" // =========================================================================== @@ -37,30 +36,38 @@ // --------------------------------------------------------------------------- /* static */ -wxUniChar::value_type wxUniChar::From8bit(char c) +wxUniChar::value_type wxUniChar::FromHi8bit(char c) { - // all supported charsets have the first 128 characters same as ASCII: - if ( (unsigned char)c < 0x80 ) - return c; - +#if wxUSE_UTF8_LOCALE_ONLY + wxFAIL_MSG( "invalid UTF-8 character" ); + return wxT('?'); // FIXME-UTF8: what to use as failure character? +#else wchar_t buf[2]; if ( wxConvLibc.ToWChar(buf, 2, &c, 1) != 2 ) + { + wxFAIL_MSG( "invalid multibyte character" ); return wxT('?'); // FIXME-UTF8: what to use as failure character? + } return buf[0]; +#endif } /* static */ -char wxUniChar::To8bit(wxUniChar::value_type c) +char wxUniChar::ToHi8bit(wxUniChar::value_type c) { - // all supported charsets have the first 128 characters same as ASCII: - if ( c < 0x80 ) - return c; - +#if wxUSE_UTF8_LOCALE_ONLY + wxFAIL_MSG( "character cannot be converted to single UTF-8 byte" ); + return '?'; // FIXME-UTF8: what to use as failure character? +#else wchar_t in = c; char buf[2]; if ( wxConvLibc.FromWChar(buf, 2, &in, 1) != 2 ) + { + wxFAIL_MSG( "character cannot be converted to single byte" ); return '?'; // FIXME-UTF8: what to use as failure character? + } return buf[0]; +#endif } @@ -69,26 +76,91 @@ char wxUniChar::To8bit(wxUniChar::value_type c) // --------------------------------------------------------------------------- #if wxUSE_UNICODE_UTF8 +wxUniChar wxUniCharRef::UniChar() const +{ + return wxStringOperations::DecodeChar(m_pos); +} + wxUniCharRef& wxUniCharRef::operator=(const wxUniChar& c) { - wxString::Utf8CharBuffer utf(wxString::EncodeChar(c)); - size_t lenOld = wxString::GetUtf8CharLength(*m_pos); - size_t lenNew = wxString::GetUtf8CharLength(utf[0]); + wxStringOperations::Utf8CharBuffer utf(wxStringOperations::EncodeChar(c)); + size_t lenOld = wxStringOperations::GetUtf8CharLength(*m_pos); + size_t lenNew = wxStringOperations::GetUtf8CharLength(utf[0]); if ( lenNew == lenOld ) { + // this is the simpler case: if the new value's UTF-8 code has the + // same length, we can just replace it: + iterator pos(m_pos); for ( size_t i = 0; i < lenNew; ++i, ++pos ) *pos = utf[i]; } else { - size_t idx = m_pos - m_str.begin(); - - m_str.replace(m_pos, m_pos + lenOld, utf, lenNew); - - // this is needed to keep m_pos valid: - m_pos = m_str.begin() + idx; + // the worse case is when the new value has either longer or shorter + // code -- in that case, we have to use wxStringImpl::replace() and + // this invalidates all iterators, so we have to update them too: + + wxString& str = *wx_const_cast(wxString*, m_node.m_str); + wxStringImpl& strimpl = str.m_impl; + + int iterDiff = lenNew - lenOld; + size_t posIdx = m_pos - strimpl.begin(); + + // compute positions of outstanding iterators for this string after the + // replacement is done (there is only a small number of iterators at + // any time, so we use an array on the stack to avoid unneeded + // allocation): + static const size_t STATIC_SIZE = 32; + size_t indexes_a[STATIC_SIZE]; + size_t *indexes = indexes_a; + size_t iterNum = 0; + wxStringIteratorNode *it; + for ( it = str.m_iterators.ptr; it; it = it->m_next, ++iterNum ) + { + wxASSERT( it->m_iter || it->m_citer ); + + if ( iterNum == STATIC_SIZE ) + { + wxLogTrace( _T("utf8"), _T("unexpectedly many iterators") ); + + size_t total = iterNum + 1; + for ( wxStringIteratorNode *it2 = it; it2; it2 = it2->m_next ) + total++; + indexes = new size_t[total]; + memcpy(indexes, indexes_a, sizeof(size_t) * STATIC_SIZE); + } + + size_t idx = it->m_iter + ? (*it->m_iter - strimpl.begin()) + : (*it->m_citer - strimpl.begin()); + + if ( idx > posIdx ) + idx += iterDiff; + + indexes[iterNum] = idx; + } + + // update the string: + strimpl.replace(m_pos, m_pos + lenOld, utf, lenNew); + + // finally, set the iterators to valid values again (note that this + // updates m_pos as well): + size_t i; + for ( i = 0, it = str.m_iterators.ptr; it; it = it->m_next, ++i ) + { + wxASSERT( i < iterNum ); + wxASSERT( it->m_iter || it->m_citer ); + + if ( it->m_iter ) + *it->m_iter = strimpl.begin() + indexes[i]; + else // it->m_citer + *it->m_citer = strimpl.begin() + indexes[i]; + } + + if ( indexes != indexes_a ) + delete[] indexes; } return *this;