From 8244507f687f59a6493948cc4482ee19f82e66c5 Mon Sep 17 00:00:00 2001 From: Vadim Zeitlin Date: Fri, 5 Nov 2010 21:40:09 +0000 Subject: [PATCH] Ensure that strings returned by wxMBConv_cf are in NFC form. Normalize all Unicode strings used internally even though the Darwin kernel gives them to us in decomposed (NFD) form. Closes #11730. git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@66033 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- src/common/strconv.cpp | 5 +++-- src/osx/core/strconv_cf.cpp | 8 ++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/common/strconv.cpp b/src/common/strconv.cpp index fb90c2b03b..d442253f35 100644 --- a/src/common/strconv.cpp +++ b/src/common/strconv.cpp @@ -3443,8 +3443,9 @@ WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvCurrent = wxGet_wxConvLibcPtr(); WXDLLIMPEXP_DATA_BASE(wxMBConv *) wxConvUI = wxGet_wxConvLocalPtr(); #ifdef __DARWIN__ -// The xnu kernel always communicates file paths in decomposed UTF-8. -// WARNING: Are we sure that CFString's conversion will cause decomposition? +// It is important to use this conversion object under Darwin as it ensures +// that Unicode strings are (re)composed correctly even though xnu kernel uses +// decomposed form internally (at least for the file names). static wxMBConv_cf wxConvMacUTF8DObj(wxFONTENCODING_UTF8); #endif diff --git a/src/osx/core/strconv_cf.cpp b/src/osx/core/strconv_cf.cpp index c77825c23a..5f37413386 100644 --- a/src/osx/core/strconv_cf.cpp +++ b/src/osx/core/strconv_cf.cpp @@ -90,6 +90,14 @@ WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_cf(wxFontEncoding encoding) if ( theString == NULL ) return wxCONV_FAILED; + // Ensure that the string is in canonical composed form (NFC): this is + // important because Darwin uses decomposed form (NFD) for e.g. file + // names but we want to use NFC internally. + wxCFRef + cfMutableString(CFStringCreateMutableCopy(NULL, 0, theString)); + CFStringNormalize(cfMutableString, kCFStringNormalizationFormC); + theString = cfMutableString; + /* NOTE: The string content includes the NULL element if the source string did * That means we have to do nothing special because the destination will have * the NULL element iff the source did and the NULL element will be included -- 2.45.2