]> git.saurik.com Git - wxWidgets.git/blob - src/osx/core/strconv_cf.cpp
Ensure that strings returned by wxMBConv_cf are in NFC form.
[wxWidgets.git] / src / osx / core / strconv_cf.cpp
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: src/osx/corefoundation/strconv.cpp
3 // Purpose: Unicode conversion classes
4 // Author: David Elliott
5 // Modified by:
6 // Created: 2007-07-06
7 // RCS-ID: $Id$
8 // Copyright: (c) 2007 David Elliott
9 // Licence: wxWindows licence
10 /////////////////////////////////////////////////////////////////////////////
11
12 // For compilers that support precompilation, includes "wx.h".
13 #include "wx/wxprec.h"
14
15 #ifndef WX_PRECOMP
16 #include "wx/string.h"
17 #endif
18
19 #include "wx/strconv.h"
20 #include "wx/fontmap.h"
21
22 #ifdef __DARWIN__
23
24 #include "wx/osx/core/private/strconv_cf.h"
25 #include "wx/osx/core/cfref.h"
26
27
28 // ============================================================================
29 // CoreFoundation conversion classes
30 // ============================================================================
31
32 /* Provide factory functions for unit tests. Not in any header. Do not
33 * assume ABI compatibility even within a given wxWidgets release.
34 */
35
36 #if wxUSE_FONTMAP
37 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_cf( const char* name)
38 {
39 wxMBConv_cf *result = new wxMBConv_cf(name);
40 if(!result->IsOk())
41 {
42 delete result;
43 return NULL;
44 }
45 else
46 return result;
47 }
48 #endif // wxUSE_FONTMAP
49
50 WXDLLIMPEXP_BASE wxMBConv* new_wxMBConv_cf(wxFontEncoding encoding)
51 {
52 wxMBConv_cf *result = new wxMBConv_cf(encoding);
53 if(!result->IsOk())
54 {
55 delete result;
56 return NULL;
57 }
58 else
59 return result;
60 }
61
62 // Provide a constant for the wchat_t encoding used by the host platform.
63 #ifdef WORDS_BIGENDIAN
64 static const CFStringEncoding wxCFStringEncodingWcharT = kCFStringEncodingUTF32BE;
65 #else
66 static const CFStringEncoding wxCFStringEncodingWcharT = kCFStringEncodingUTF32LE;
67 #endif
68
69 size_t wxMBConv_cf::ToWChar(wchar_t * dst, size_t dstSize, const char * src, size_t srcSize) const
70 {
71 wxCHECK(src, wxCONV_FAILED);
72
73 /* NOTE: This is wrong if the source encoding has an element size
74 * other than char (e.g. it's kCFStringEncodingUnicode)
75 * If the user specifies it, it's presumably right though.
76 * Right now we don't support UTF-16 in anyway since wx can do a better job.
77 */
78 if(srcSize == wxNO_LEN)
79 srcSize = strlen(src) + 1;
80
81 // First create the temporary CFString
82 wxCFRef<CFStringRef> theString( CFStringCreateWithBytes (
83 NULL, //the allocator
84 (const UInt8*)src,
85 srcSize,
86 m_encoding,
87 false //no BOM/external representation
88 ));
89
90 if ( theString == NULL )
91 return wxCONV_FAILED;
92
93 // Ensure that the string is in canonical composed form (NFC): this is
94 // important because Darwin uses decomposed form (NFD) for e.g. file
95 // names but we want to use NFC internally.
96 wxCFRef<CFMutableStringRef>
97 cfMutableString(CFStringCreateMutableCopy(NULL, 0, theString));
98 CFStringNormalize(cfMutableString, kCFStringNormalizationFormC);
99 theString = cfMutableString;
100
101 /* NOTE: The string content includes the NULL element if the source string did
102 * That means we have to do nothing special because the destination will have
103 * the NULL element iff the source did and the NULL element will be included
104 * in the count iff it was included in the source count.
105 */
106
107
108 /* If we're compiling against Tiger headers we can support direct conversion
109 * to UTF32. If we are then run against a pre-Tiger system, the encoding
110 * won't be available so we'll defer to the string->UTF-16->UTF-32 conversion.
111 */
112 if(CFStringIsEncodingAvailable(wxCFStringEncodingWcharT))
113 {
114 CFRange fullStringRange = CFRangeMake(0, CFStringGetLength(theString));
115 CFIndex usedBufLen;
116
117 CFIndex charsConverted = CFStringGetBytes(
118 theString,
119 fullStringRange,
120 wxCFStringEncodingWcharT,
121 0,
122 false,
123 // if dstSize is 0 then pass NULL to get required length in usedBufLen
124 dstSize != 0?(UInt8*)dst:NULL,
125 dstSize * sizeof(wchar_t),
126 &usedBufLen);
127
128 if(charsConverted < CFStringGetLength(theString))
129 return wxCONV_FAILED;
130
131 /* usedBufLen is the number of bytes written, so we divide by
132 * sizeof(wchar_t) to get the number of elements written.
133 */
134 wxASSERT( (usedBufLen % sizeof(wchar_t)) == 0 );
135
136 // CFStringGetBytes does exactly the right thing when buffer
137 // pointer is NULL and returns the number of bytes required
138 return usedBufLen / sizeof(wchar_t);
139 }
140 else
141 {
142 // NOTE: Includes NULL iff source did
143 /* NOTE: This is an approximation. The eventual UTF-32 will
144 * possibly have less elements but certainly not more.
145 */
146 size_t returnSize = CFStringGetLength(theString);
147
148 if (dstSize == 0 || dst == NULL)
149 {
150 return returnSize;
151 }
152
153 // Convert the entire string.. too hard to figure out how many UTF-16 we'd need
154 // for an undersized UTF-32 destination buffer.
155 CFRange fullStringRange = CFRangeMake(0, CFStringGetLength(theString));
156 UniChar *szUniCharBuffer = new UniChar[fullStringRange.length];
157
158 CFStringGetCharacters(theString, fullStringRange, szUniCharBuffer);
159
160 wxMBConvUTF16 converter;
161 returnSize = converter.ToWChar( dst, dstSize, (const char*)szUniCharBuffer, fullStringRange.length );
162 delete [] szUniCharBuffer;
163
164 return returnSize;
165 }
166 // NOTREACHED
167 }
168
169 size_t wxMBConv_cf::FromWChar(char *dst, size_t dstSize, const wchar_t *src, size_t srcSize) const
170 {
171 wxCHECK(src, wxCONV_FAILED);
172
173 if(srcSize == wxNO_LEN)
174 srcSize = wxStrlen(src) + 1;
175
176 // Temporary CFString
177 wxCFRef<CFStringRef> theString;
178
179 /* If we're compiling against Tiger headers we can support direct conversion
180 * from UTF32. If we are then run against a pre-Tiger system, the encoding
181 * won't be available so we'll defer to the UTF-32->UTF-16->string conversion.
182 */
183 if(CFStringIsEncodingAvailable(wxCFStringEncodingWcharT))
184 {
185 theString = wxCFRef<CFStringRef>(CFStringCreateWithBytes(
186 kCFAllocatorDefault,
187 (UInt8*)src,
188 srcSize * sizeof(wchar_t),
189 wxCFStringEncodingWcharT,
190 false));
191 }
192 else
193 {
194 wxMBConvUTF16 converter;
195 size_t cbUniBuffer = converter.FromWChar( NULL, 0, src, srcSize );
196 wxASSERT(cbUniBuffer % sizeof(UniChar));
197
198 // Will be free'd by kCFAllocatorMalloc when CFString is released
199 UniChar *tmpUniBuffer = (UniChar*)malloc(cbUniBuffer);
200
201 cbUniBuffer = converter.FromWChar( (char*) tmpUniBuffer, cbUniBuffer, src, srcSize );
202 wxASSERT(cbUniBuffer % sizeof(UniChar));
203
204 theString = wxCFRef<CFStringRef>(CFStringCreateWithCharactersNoCopy(
205 kCFAllocatorDefault,
206 tmpUniBuffer,
207 cbUniBuffer / sizeof(UniChar),
208 kCFAllocatorMalloc
209 ));
210
211 }
212
213 wxCHECK(theString != NULL, wxCONV_FAILED);
214
215 CFIndex usedBufLen;
216
217 CFIndex charsConverted = CFStringGetBytes(
218 theString,
219 CFRangeMake(0, CFStringGetLength(theString)),
220 m_encoding,
221 0, // FAIL on unconvertible characters
222 false, // not an external representation
223 (UInt8*)dst,
224 dstSize,
225 &usedBufLen
226 );
227
228 // when dst is non-NULL, we check usedBufLen against dstSize as
229 // CFStringGetBytes sometimes treats dst as being NULL when dstSize==0
230 if( (charsConverted < CFStringGetLength(theString)) ||
231 (dst && (size_t) usedBufLen > dstSize) )
232 return wxCONV_FAILED;
233
234 return usedBufLen;
235 }
236
237 #endif // __DARWIN__
238
239