]> git.saurik.com Git - wxWidgets.git/blob - src/common/strvararg.cpp
normalize printf/scanf format strings correctly on all platforms, while accounting...
[wxWidgets.git] / src / common / strvararg.cpp
1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strvararg.cpp
3 // Purpose: macros for implementing type-safe vararg passing of strings
4 // Author: Vaclav Slavik
5 // Created: 2007-02-19
6 // RCS-ID: $Id$
7 // Copyright: (c) 2007 REA Elektronik GmbH
8 // Licence: wxWindows licence
9 ///////////////////////////////////////////////////////////////////////////////
10
11 // ============================================================================
12 // declarations
13 // ============================================================================
14
15 // ----------------------------------------------------------------------------
16 // headers
17 // ----------------------------------------------------------------------------
18
19 // for compilers that support precompilation, includes "wx.h".
20 #include "wx/wxprec.h"
21
22 #ifdef __BORLANDC__
23 #pragma hdrstop
24 #endif
25
26 #include "wx/strvararg.h"
27 #include "wx/string.h"
28
29 // ============================================================================
30 // implementation
31 // ============================================================================
32
33 // ----------------------------------------------------------------------------
34 // wxArgNormalizer<>
35 // ----------------------------------------------------------------------------
36
37 const wxStringCharType *wxArgNormalizerNative<const wxString&>::get() const
38 {
39 return m_value.wx_str();
40 }
41
42 const wxStringCharType *wxArgNormalizerNative<const wxCStrData&>::get() const
43 {
44 return m_value.AsInternal();
45 }
46
47 #if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
48 wxArgNormalizerWchar<const wxString&>::wxArgNormalizerWchar(const wxString& s)
49 : wxArgNormalizerWithBuffer<wchar_t>(s.wc_str())
50 {
51 }
52
53 wxArgNormalizerWchar<const wxCStrData&>::wxArgNormalizerWchar(const wxCStrData& s)
54 : wxArgNormalizerWithBuffer<wchar_t>(s.AsWCharBuf())
55 {
56 }
57 #endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
58
59 // ----------------------------------------------------------------------------
60 // wxArgNormalizedString
61 // ----------------------------------------------------------------------------
62
63 wxString wxArgNormalizedString::GetString() const
64 {
65 if ( !IsValid() )
66 return wxEmptyString;
67
68 #if wxUSE_UTF8_LOCALE_ONLY
69 return wxString(wx_reinterpret_cast(const char*, m_ptr));
70 #else
71 #if wxUSE_UNICODE_UTF8
72 if ( wxLocaleIsUtf8 )
73 return wxString(wx_reinterpret_cast(const char*, m_ptr));
74 else
75 #endif
76 return wxString(wx_reinterpret_cast(const wxChar*, m_ptr));
77 #endif // !wxUSE_UTF8_LOCALE_ONLY
78 }
79
80 wxArgNormalizedString::operator wxString() const
81 {
82 return GetString();
83 }
84
85 // ----------------------------------------------------------------------------
86 // wxFormatConverter: class doing the "%s" and "%c" normalization
87 // ----------------------------------------------------------------------------
88
89 /*
90 There are four problems with wxPrintf() etc. format strings:
91
92 1) The printf vararg macros convert all forms of strings into
93 wxStringCharType* representation. This may make the format string
94 incorrect: for example, if %ls was used together with a wchar_t*
95 variadic argument, this would no longer work, because the templates
96 would change wchar_t* argument to wxStringCharType* and %ls would now
97 be incorrect in e.g. UTF-8 build. We need make sure only one specifier
98 form is used.
99
100 2) To complicate matters further, the meaning of %s and %c is different
101 under Windows and on Unix. The Windows/MS convention is as follows:
102
103 In ANSI mode:
104
105 format specifier results in
106 -----------------------------------
107 %s, %hs, %hS char*
108 %ls, %S, %lS wchar_t*
109
110 In Unicode mode:
111
112 format specifier results in
113 -----------------------------------
114 %hs, %S, %hS char*
115 %s, %ls, %lS wchar_t*
116
117 (While on POSIX systems we have %C identical to %lc and %c always means
118 char (in any mode) while %lc always means wchar_t.)
119
120 In other words, we should _only_ use %s on Windows and %ls on Unix for
121 wxUSE_UNICODE_WCHAR build.
122
123 3) To make things even worse, we need two forms in UTF-8 build: one for
124 passing strings to ANSI functions under UTF-8 locales (this one should
125 use %s) and one for widechar functions used under non-UTF-8 locales
126 (this one should use %ls).
127
128 And, of course, the same should be done for %c as well.
129
130 4) Finally, in UTF-8 build when calling ANSI printf() function, we need to
131 translate %c to %s, because not every Unicode character can be
132 represented by a char.
133
134
135 wxScanf() family of functions is simpler, because we don't normalize their
136 variadic arguments and we only have to handle 2) above and only for widechar
137 versions.
138 */
139
140 template<typename T>
141 class wxFormatConverterBase
142 {
143 public:
144 typedef T CharType;
145
146 wxFormatConverterBase()
147 {
148 m_fmtOrig = NULL;
149 m_fmtLast = NULL;
150 m_nCopied = 0;
151 }
152
153 wxCharTypeBuffer<CharType> Convert(const CharType *format)
154 {
155 // this is reset to NULL if we modify the format string
156 m_fmtOrig = format;
157
158 while ( *format )
159 {
160 if ( CopyFmtChar(*format++) == _T('%') )
161 {
162 // skip any flags
163 while ( IsFlagChar(*format) )
164 CopyFmtChar(*format++);
165
166 // and possible width
167 if ( *format == _T('*') )
168 CopyFmtChar(*format++);
169 else
170 SkipDigits(&format);
171
172 // precision?
173 if ( *format == _T('.') )
174 {
175 CopyFmtChar(*format++);
176 if ( *format == _T('*') )
177 CopyFmtChar(*format++);
178 else
179 SkipDigits(&format);
180 }
181
182 // next we can have a size modifier
183 SizeModifier size;
184
185 switch ( *format )
186 {
187 case 'h':
188 size = Size_Short;
189 format++;
190 break;
191
192 case 'l':
193 // "ll" has a different meaning!
194 if ( format[1] != 'l' )
195 {
196 size = Size_Long;
197 format++;
198 break;
199 }
200 //else: fall through
201
202 default:
203 size = Size_Default;
204 }
205
206 CharType outConv = *format;
207 SizeModifier outSize = size;
208
209 // and finally we should have the type
210 switch ( *format )
211 {
212 case _T('S'):
213 case _T('s'):
214 // all strings were converted into the same form by
215 // wxArgNormalizer<T>, this form depends on the context
216 // in which the value is used (scanf/printf/wprintf):
217 HandleString(*format, size, outConv, outSize);
218 break;
219
220 case _T('C'):
221 case _T('c'):
222 HandleChar(*format, size, outConv, outSize);
223 break;
224
225 default:
226 // nothing special to do
227 break;
228 }
229
230 if ( outConv == *format && outSize == size ) // no change
231 {
232 if ( size != Size_Default )
233 CopyFmtChar(*(format - 1));
234 CopyFmtChar(*format);
235 }
236 else // something changed
237 {
238 switch ( outSize )
239 {
240 case Size_Long:
241 InsertFmtChar(_T('l'));
242 break;
243
244 case Size_Short:
245 InsertFmtChar(_T('h'));
246 break;
247
248 case Size_Default:
249 // nothing to do
250 break;
251 }
252 InsertFmtChar(outConv);
253 }
254
255 format++;
256 }
257 }
258
259 // notice that we only translated the string if m_fmtOrig == NULL (as
260 // set by CopyAllBefore()), otherwise we should simply use the original
261 // format
262 if ( m_fmtOrig )
263 {
264 return wxCharTypeBuffer<CharType>::CreateNonOwned(m_fmtOrig);
265 }
266 else
267 {
268 // NULL-terminate converted format string:
269 *m_fmtLast = 0;
270 return m_fmt;
271 }
272 }
273
274 virtual ~wxFormatConverterBase() {}
275
276 protected:
277 enum SizeModifier
278 {
279 Size_Default,
280 Size_Short,
281 Size_Long
282 };
283
284 // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's'
285 // respectively), 'size' is the preceding size modifier; the new values of
286 // conversion and size specifiers must be written to outConv and outSize
287 virtual void HandleString(CharType conv, SizeModifier size,
288 CharType& outConv, SizeModifier& outSize) = 0;
289
290 // ditto for %C or %c
291 virtual void HandleChar(CharType conv, SizeModifier size,
292 CharType& outConv, SizeModifier& outSize) = 0;
293
294 private:
295 // copy another character to the translated format: this function does the
296 // copy if we are translating but doesn't do anything at all if we don't,
297 // so we don't create the translated format string at all unless we really
298 // need to (i.e. InsertFmtChar() is called)
299 CharType CopyFmtChar(CharType ch)
300 {
301 if ( !m_fmtOrig )
302 {
303 // we're translating, do copy
304 *(m_fmtLast++) = ch;
305 }
306 else
307 {
308 // simply increase the count which should be copied by
309 // CopyAllBefore() later if needed
310 m_nCopied++;
311 }
312
313 return ch;
314 }
315
316 // insert an extra character
317 void InsertFmtChar(CharType ch)
318 {
319 if ( m_fmtOrig )
320 {
321 // so far we haven't translated anything yet
322 CopyAllBefore();
323 }
324
325 *(m_fmtLast++) = ch;
326 }
327
328 void CopyAllBefore()
329 {
330 wxASSERT_MSG( m_fmtOrig && m_fmt.data() == NULL, "logic error" );
331
332 // the modified format string is guaranteed to be no longer than
333 // 3/2 of the original (worst case: the entire format string consists
334 // of "%s" repeated and is expanded to "%ls" on Unix), so we can
335 // allocate the buffer now and not worry about running out of space if
336 // we over-allocate a bit:
337 size_t fmtLen = wxStrlen(m_fmtOrig);
338 // worst case is of even length, so there's no rounding error in *3/2:
339 m_fmt.extend(fmtLen * 3 / 2);
340
341 if ( m_nCopied > 0 )
342 wxStrncpy(m_fmt.data(), m_fmtOrig, m_nCopied);
343 m_fmtLast = m_fmt.data() + m_nCopied;
344
345 // we won't need it any longer and resetting it also indicates that we
346 // modified the format
347 m_fmtOrig = NULL;
348 }
349
350 static bool IsFlagChar(CharType ch)
351 {
352 return ch == _T('-') || ch == _T('+') ||
353 ch == _T('0') || ch == _T(' ') || ch == _T('#');
354 }
355
356 void SkipDigits(const CharType **ptpc)
357 {
358 while ( **ptpc >= _T('0') && **ptpc <= _T('9') )
359 CopyFmtChar(*(*ptpc)++);
360 }
361
362 // the translated format
363 wxCharTypeBuffer<CharType> m_fmt;
364 CharType *m_fmtLast;
365
366 // the original format
367 const CharType *m_fmtOrig;
368
369 // the number of characters already copied (i.e. already parsed, but left
370 // unmodified)
371 size_t m_nCopied;
372 };
373
374
375
376 #ifdef __WINDOWS
377
378 // on Windows, we should use %s and %c regardless of the build:
379 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
380 {
381 virtual void HandleString(CharType WXUNUSED(conv),
382 SizeModifier WXUNUSED(size),
383 CharType& outConv, SizeModifier& outSize)
384 {
385 outConv = 's';
386 outSize = Size_Default;
387 }
388
389 virtual void HandleChar(CharType WXUNUSED(conv),
390 SizeModifier WXUNUSED(size),
391 CharType& outConv, SizeModifier& outSize)
392 {
393 outConv = 'c';
394 outSize = Size_Default;
395 }
396 };
397
398 #else // !__WINDOWS__
399
400 // on Unix, it's %s for ANSI functions and %ls for widechar:
401
402 #if !wxUSE_UTF8_LOCALE_ONLY
403 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
404 {
405 virtual void HandleString(CharType WXUNUSED(conv),
406 SizeModifier WXUNUSED(size),
407 CharType& outConv, SizeModifier& outSize)
408 {
409 outConv = 's';
410 outSize = Size_Long;
411 }
412
413 virtual void HandleChar(CharType WXUNUSED(conv),
414 SizeModifier WXUNUSED(size),
415 CharType& outConv, SizeModifier& outSize)
416 {
417 outConv = 'c';
418 outSize = Size_Long;
419 }
420 };
421 #endif // !wxUSE_UTF8_LOCALE_ONLY
422
423 #if wxUSE_UNICODE_UTF8
424 class wxPrintfFormatConverterUtf8 : public wxFormatConverterBase<char>
425 {
426 virtual void HandleString(CharType WXUNUSED(conv),
427 SizeModifier WXUNUSED(size),
428 CharType& outConv, SizeModifier& outSize)
429 {
430 outConv = 's';
431 outSize = Size_Default;
432 }
433
434 virtual void HandleChar(CharType WXUNUSED(conv),
435 SizeModifier WXUNUSED(size),
436 CharType& outConv, SizeModifier& outSize)
437 {
438 // added complication: %c should be translated to %s in UTF-8 build
439 outConv = 's';
440 outSize = Size_Default;
441 }
442 };
443 #endif // wxUSE_UNICODE_UTF8
444
445 #endif // __WINDOWS__/!__WINDOWS__
446
447 #if !wxUSE_UNICODE // FIXME-UTF8: remove
448 class wxPrintfFormatConverterANSI : public wxFormatConverterBase<char>
449 {
450 virtual void HandleString(CharType WXUNUSED(conv),
451 SizeModifier WXUNUSED(size),
452 CharType& outConv, SizeModifier& outSize)
453 {
454 outConv = 's';
455 outSize = Size_Default;
456 }
457
458 virtual void HandleChar(CharType WXUNUSED(conv),
459 SizeModifier WXUNUSED(size),
460 CharType& outConv, SizeModifier& outSize)
461 {
462 outConv = 'c';
463 outSize = Size_Default;
464 }
465 };
466 #endif // ANSI
467
468 #ifndef __WINDOWS__
469 /*
470
471 wxScanf() format translation is different, we need to translate %s to %ls
472 and %c to %lc on Unix (but not Windows and for widechar functions only!).
473
474 So to use native functions in order to get our semantics we must do the
475 following translations in Unicode mode:
476
477 wxWidgets specifier POSIX specifier
478 ----------------------------------------
479
480 %hc, %C, %hC %c
481 %c %lc
482
483 */
484 class wxScanfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
485 {
486 virtual void HandleString(CharType conv, SizeModifier size,
487 CharType& outConv, SizeModifier& outSize)
488 {
489 outConv = 's';
490 outSize = GetOutSize(conv == 'S', size);
491 }
492
493 virtual void HandleChar(CharType conv, SizeModifier size,
494 CharType& outConv, SizeModifier& outSize)
495 {
496 outConv = 'c';
497 outSize = GetOutSize(conv == 'C', size);
498 }
499
500 SizeModifier GetOutSize(bool convIsUpper, SizeModifier size)
501 {
502 // %S and %hS -> %s and %lS -> %ls
503 if ( convIsUpper )
504 {
505 if ( size == Size_Long )
506 return Size_Long;
507 else
508 return Size_Default;
509 }
510 else // %s or %c
511 {
512 if ( size == Size_Default )
513 return Size_Long;
514 else
515 return size;
516 }
517 }
518 };
519
520 const wxWCharBuffer wxScanfConvertFormatW(const wchar_t *format)
521 {
522 return wxScanfFormatConverterWchar().Convert(format);
523 }
524 #endif // !__WINDOWS__
525
526
527 // ----------------------------------------------------------------------------
528 // wxFormatString
529 // ----------------------------------------------------------------------------
530
531 #if !wxUSE_UNICODE_WCHAR
532 const char* wxFormatString::InputAsChar()
533 {
534 if ( m_char )
535 return m_char.data();
536
537 // in ANSI build, wx_str() returns char*, in UTF-8 build, this function
538 // is only called under UTF-8 locales, so we should return UTF-8 string,
539 // which is, again, what wx_str() returns:
540 if ( m_str )
541 return m_str->wx_str();
542
543 // ditto wxCStrData:
544 if ( m_cstr )
545 return m_cstr->AsInternal();
546
547 // the last case is that wide string was passed in: in that case, we need
548 // to convert it:
549 wxASSERT( m_wchar );
550
551 m_char = wxConvLibc.cWC2MB(m_wchar.data());
552
553 return m_char.data();
554 }
555
556 const char* wxFormatString::AsChar()
557 {
558 if ( !m_convertedChar )
559 #if !wxUSE_UNICODE // FIXME-UTF8: remove this
560 m_convertedChar = wxPrintfFormatConverterANSI().Convert(InputAsChar());
561 #else
562 m_convertedChar = wxPrintfFormatConverterUtf8().Convert(InputAsChar());
563 #endif
564
565 return m_convertedChar.data();
566 }
567 #endif // !wxUSE_UNICODE_WCHAR
568
569 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
570 const wchar_t* wxFormatString::InputAsWChar()
571 {
572 if ( m_wchar )
573 return m_wchar.data();
574
575 #if wxUSE_UNICODE_WCHAR
576 if ( m_str )
577 return m_str->wc_str();
578 if ( m_cstr )
579 return m_cstr->AsInternal();
580 #else // wxUSE_UNICODE_UTF8
581 if ( m_str )
582 {
583 m_wchar = m_str->wc_str();
584 return m_wchar.data();
585 }
586 if ( m_cstr )
587 {
588 m_wchar = m_cstr->AsWCharBuf();
589 return m_wchar.data();
590 }
591 #endif // wxUSE_UNICODE_WCHAR/UTF8
592
593 // the last case is that narrow string was passed in: in that case, we need
594 // to convert it:
595 wxASSERT( m_char );
596
597 m_wchar = wxConvLibc.cMB2WC(m_char.data());
598
599 return m_wchar.data();
600 }
601
602 const wchar_t* wxFormatString::AsWChar()
603 {
604 if ( !m_convertedWChar )
605 m_convertedWChar = wxPrintfFormatConverterWchar().Convert(InputAsWChar());
606
607 return m_convertedWChar.data();
608 }
609 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY