correct the test for Windows platform (this also fixes unit test failures in FormatCo...
[wxWidgets.git] / src / common / strvararg.cpp
1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strvararg.cpp
3 // Purpose: macros for implementing type-safe vararg passing of strings
4 // Author: Vaclav Slavik
5 // Created: 2007-02-19
6 // RCS-ID: $Id$
7 // Copyright: (c) 2007 REA Elektronik GmbH
8 // Licence: wxWindows licence
9 ///////////////////////////////////////////////////////////////////////////////
10
11 // ============================================================================
12 // declarations
13 // ============================================================================
14
15 // ----------------------------------------------------------------------------
16 // headers
17 // ----------------------------------------------------------------------------
18
19 // for compilers that support precompilation, includes "wx.h".
20 #include "wx/wxprec.h"
21
22 #ifdef __BORLANDC__
23 #pragma hdrstop
24 #endif
25
26 #include "wx/strvararg.h"
27 #include "wx/string.h"
28 #include "wx/crt.h"
29 #include "wx/private/wxprintf.h"
30
31 // ============================================================================
32 // implementation
33 // ============================================================================
34
35 // ----------------------------------------------------------------------------
36 // wxArgNormalizer<>
37 // ----------------------------------------------------------------------------
38
39 const wxStringCharType *wxArgNormalizerNative<const wxString&>::get() const
40 {
41 return m_value.wx_str();
42 }
43
44 const wxStringCharType *wxArgNormalizerNative<const wxCStrData&>::get() const
45 {
46 return m_value.AsInternal();
47 }
48
49 #if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
50 wxArgNormalizerWchar<const wxString&>::wxArgNormalizerWchar(
51 const wxString& s,
52 const wxFormatString *fmt, unsigned index)
53 : wxArgNormalizerWithBuffer<wchar_t>(s.wc_str(), fmt, index)
54 {
55 }
56
57 wxArgNormalizerWchar<const wxCStrData&>::wxArgNormalizerWchar(
58 const wxCStrData& s,
59 const wxFormatString *fmt, unsigned index)
60 : wxArgNormalizerWithBuffer<wchar_t>(s.AsWCharBuf(), fmt, index)
61 {
62 }
63 #endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
64
65 // ----------------------------------------------------------------------------
66 // wxArgNormalizedString
67 // ----------------------------------------------------------------------------
68
69 wxString wxArgNormalizedString::GetString() const
70 {
71 if ( !IsValid() )
72 return wxEmptyString;
73
74 #if wxUSE_UTF8_LOCALE_ONLY
75 return wxString(reinterpret_cast<const char*>(m_ptr));
76 #else
77 #if wxUSE_UNICODE_UTF8
78 if ( wxLocaleIsUtf8 )
79 return wxString(reinterpret_cast<const char*>(m_ptr));
80 else
81 #endif
82 return wxString(reinterpret_cast<const wxChar*>(m_ptr));
83 #endif // !wxUSE_UTF8_LOCALE_ONLY
84 }
85
86 wxArgNormalizedString::operator wxString() const
87 {
88 return GetString();
89 }
90
91 // ----------------------------------------------------------------------------
92 // wxFormatConverter: class doing the "%s" and "%c" normalization
93 // ----------------------------------------------------------------------------
94
95 /*
96 There are four problems with wxPrintf() etc. format strings:
97
98 1) The printf vararg macros convert all forms of strings into
99 wxStringCharType* representation. This may make the format string
100 incorrect: for example, if %ls was used together with a wchar_t*
101 variadic argument, this would no longer work, because the templates
102 would change wchar_t* argument to wxStringCharType* and %ls would now
103 be incorrect in e.g. UTF-8 build. We need make sure only one specifier
104 form is used.
105
106 2) To complicate matters further, the meaning of %s and %c is different
107 under Windows and on Unix. The Windows/MS convention is as follows:
108
109 In ANSI mode:
110
111 format specifier results in
112 -----------------------------------
113 %s, %hs, %hS char*
114 %ls, %S, %lS wchar_t*
115
116 In Unicode mode:
117
118 format specifier results in
119 -----------------------------------
120 %hs, %S, %hS char*
121 %s, %ls, %lS wchar_t*
122
123 (While on POSIX systems we have %C identical to %lc and %c always means
124 char (in any mode) while %lc always means wchar_t.)
125
126 In other words, we should _only_ use %s on Windows and %ls on Unix for
127 wxUSE_UNICODE_WCHAR build.
128
129 3) To make things even worse, we need two forms in UTF-8 build: one for
130 passing strings to ANSI functions under UTF-8 locales (this one should
131 use %s) and one for widechar functions used under non-UTF-8 locales
132 (this one should use %ls).
133
134 And, of course, the same should be done for %c as well.
135
136
137 wxScanf() family of functions is simpler, because we don't normalize their
138 variadic arguments and we only have to handle 2) above and only for widechar
139 versions.
140 */
141
142 template<typename T>
143 class wxFormatConverterBase
144 {
145 public:
146 typedef T CharType;
147
148 wxFormatConverterBase()
149 {
150 m_fmtOrig = NULL;
151 m_fmtLast = NULL;
152 m_nCopied = 0;
153 }
154
155 wxCharTypeBuffer<CharType> Convert(const CharType *format)
156 {
157 // this is reset to NULL if we modify the format string
158 m_fmtOrig = format;
159
160 while ( *format )
161 {
162 if ( CopyFmtChar(*format++) == _T('%') )
163 {
164 // skip any flags
165 while ( IsFlagChar(*format) )
166 CopyFmtChar(*format++);
167
168 // and possible width
169 if ( *format == _T('*') )
170 CopyFmtChar(*format++);
171 else
172 SkipDigits(&format);
173
174 // precision?
175 if ( *format == _T('.') )
176 {
177 CopyFmtChar(*format++);
178 if ( *format == _T('*') )
179 CopyFmtChar(*format++);
180 else
181 SkipDigits(&format);
182 }
183
184 // next we can have a size modifier
185 SizeModifier size;
186
187 switch ( *format )
188 {
189 case 'h':
190 size = Size_Short;
191 format++;
192 break;
193
194 case 'l':
195 // "ll" has a different meaning!
196 if ( format[1] != 'l' )
197 {
198 size = Size_Long;
199 format++;
200 break;
201 }
202 //else: fall through
203
204 default:
205 size = Size_Default;
206 }
207
208 CharType outConv = *format;
209 SizeModifier outSize = size;
210
211 // and finally we should have the type
212 switch ( *format )
213 {
214 case _T('S'):
215 case _T('s'):
216 // all strings were converted into the same form by
217 // wxArgNormalizer<T>, this form depends on the context
218 // in which the value is used (scanf/printf/wprintf):
219 HandleString(*format, size, outConv, outSize);
220 break;
221
222 case _T('C'):
223 case _T('c'):
224 HandleChar(*format, size, outConv, outSize);
225 break;
226
227 default:
228 // nothing special to do
229 break;
230 }
231
232 if ( outConv == *format && outSize == size ) // no change
233 {
234 if ( size != Size_Default )
235 CopyFmtChar(*(format - 1));
236 CopyFmtChar(*format);
237 }
238 else // something changed
239 {
240 switch ( outSize )
241 {
242 case Size_Long:
243 InsertFmtChar(_T('l'));
244 break;
245
246 case Size_Short:
247 InsertFmtChar(_T('h'));
248 break;
249
250 case Size_Default:
251 // nothing to do
252 break;
253 }
254 InsertFmtChar(outConv);
255 }
256
257 format++;
258 }
259 }
260
261 // notice that we only translated the string if m_fmtOrig == NULL (as
262 // set by CopyAllBefore()), otherwise we should simply use the original
263 // format
264 if ( m_fmtOrig )
265 {
266 return wxCharTypeBuffer<CharType>::CreateNonOwned(m_fmtOrig);
267 }
268 else
269 {
270 // NULL-terminate converted format string:
271 *m_fmtLast = 0;
272 return m_fmt;
273 }
274 }
275
276 virtual ~wxFormatConverterBase() {}
277
278 protected:
279 enum SizeModifier
280 {
281 Size_Default,
282 Size_Short,
283 Size_Long
284 };
285
286 // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's'
287 // respectively), 'size' is the preceding size modifier; the new values of
288 // conversion and size specifiers must be written to outConv and outSize
289 virtual void HandleString(CharType conv, SizeModifier size,
290 CharType& outConv, SizeModifier& outSize) = 0;
291
292 // ditto for %C or %c
293 virtual void HandleChar(CharType conv, SizeModifier size,
294 CharType& outConv, SizeModifier& outSize) = 0;
295
296 private:
297 // copy another character to the translated format: this function does the
298 // copy if we are translating but doesn't do anything at all if we don't,
299 // so we don't create the translated format string at all unless we really
300 // need to (i.e. InsertFmtChar() is called)
301 CharType CopyFmtChar(CharType ch)
302 {
303 if ( !m_fmtOrig )
304 {
305 // we're translating, do copy
306 *(m_fmtLast++) = ch;
307 }
308 else
309 {
310 // simply increase the count which should be copied by
311 // CopyAllBefore() later if needed
312 m_nCopied++;
313 }
314
315 return ch;
316 }
317
318 // insert an extra character
319 void InsertFmtChar(CharType ch)
320 {
321 if ( m_fmtOrig )
322 {
323 // so far we haven't translated anything yet
324 CopyAllBefore();
325 }
326
327 *(m_fmtLast++) = ch;
328 }
329
330 void CopyAllBefore()
331 {
332 wxASSERT_MSG( m_fmtOrig && m_fmt.data() == NULL, "logic error" );
333
334 // the modified format string is guaranteed to be no longer than
335 // 3/2 of the original (worst case: the entire format string consists
336 // of "%s" repeated and is expanded to "%ls" on Unix), so we can
337 // allocate the buffer now and not worry about running out of space if
338 // we over-allocate a bit:
339 size_t fmtLen = wxStrlen(m_fmtOrig);
340 // worst case is of even length, so there's no rounding error in *3/2:
341 m_fmt.extend(fmtLen * 3 / 2);
342
343 if ( m_nCopied > 0 )
344 wxStrncpy(m_fmt.data(), m_fmtOrig, m_nCopied);
345 m_fmtLast = m_fmt.data() + m_nCopied;
346
347 // we won't need it any longer and resetting it also indicates that we
348 // modified the format
349 m_fmtOrig = NULL;
350 }
351
352 static bool IsFlagChar(CharType ch)
353 {
354 return ch == _T('-') || ch == _T('+') ||
355 ch == _T('0') || ch == _T(' ') || ch == _T('#');
356 }
357
358 void SkipDigits(const CharType **ptpc)
359 {
360 while ( **ptpc >= _T('0') && **ptpc <= _T('9') )
361 CopyFmtChar(*(*ptpc)++);
362 }
363
364 // the translated format
365 wxCharTypeBuffer<CharType> m_fmt;
366 CharType *m_fmtLast;
367
368 // the original format
369 const CharType *m_fmtOrig;
370
371 // the number of characters already copied (i.e. already parsed, but left
372 // unmodified)
373 size_t m_nCopied;
374 };
375
376 #ifdef __WINDOWS__
377
378 // on Windows, we should use %s and %c regardless of the build:
379 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
380 {
381 virtual void HandleString(CharType WXUNUSED(conv),
382 SizeModifier WXUNUSED(size),
383 CharType& outConv, SizeModifier& outSize)
384 {
385 outConv = 's';
386 outSize = Size_Default;
387 }
388
389 virtual void HandleChar(CharType WXUNUSED(conv),
390 SizeModifier WXUNUSED(size),
391 CharType& outConv, SizeModifier& outSize)
392 {
393 outConv = 'c';
394 outSize = Size_Default;
395 }
396 };
397
398 #else // !__WINDOWS__
399
400 // on Unix, it's %s for ANSI functions and %ls for widechar:
401
402 #if !wxUSE_UTF8_LOCALE_ONLY
403 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
404 {
405 virtual void HandleString(CharType WXUNUSED(conv),
406 SizeModifier WXUNUSED(size),
407 CharType& outConv, SizeModifier& outSize)
408 {
409 outConv = 's';
410 outSize = Size_Long;
411 }
412
413 virtual void HandleChar(CharType WXUNUSED(conv),
414 SizeModifier WXUNUSED(size),
415 CharType& outConv, SizeModifier& outSize)
416 {
417 outConv = 'c';
418 outSize = Size_Long;
419 }
420 };
421 #endif // !wxUSE_UTF8_LOCALE_ONLY
422
423 #if wxUSE_UNICODE_UTF8
424 class wxPrintfFormatConverterUtf8 : public wxFormatConverterBase<char>
425 {
426 virtual void HandleString(CharType WXUNUSED(conv),
427 SizeModifier WXUNUSED(size),
428 CharType& outConv, SizeModifier& outSize)
429 {
430 outConv = 's';
431 outSize = Size_Default;
432 }
433
434 virtual void HandleChar(CharType WXUNUSED(conv),
435 SizeModifier WXUNUSED(size),
436 CharType& outConv, SizeModifier& outSize)
437 {
438 // chars are represented using wchar_t in both builds, so this is
439 // the same as above
440 outConv = 'c';
441 outSize = Size_Long;
442 }
443 };
444 #endif // wxUSE_UNICODE_UTF8
445
446 #endif // __WINDOWS__/!__WINDOWS__
447
448 #if !wxUSE_UNICODE // FIXME-UTF8: remove
449 class wxPrintfFormatConverterANSI : public wxFormatConverterBase<char>
450 {
451 virtual void HandleString(CharType WXUNUSED(conv),
452 SizeModifier WXUNUSED(size),
453 CharType& outConv, SizeModifier& outSize)
454 {
455 outConv = 's';
456 outSize = Size_Default;
457 }
458
459 virtual void HandleChar(CharType WXUNUSED(conv),
460 SizeModifier WXUNUSED(size),
461 CharType& outConv, SizeModifier& outSize)
462 {
463 outConv = 'c';
464 outSize = Size_Default;
465 }
466 };
467 #endif // ANSI
468
469 #ifndef __WINDOWS__
470 /*
471
472 wxScanf() format translation is different, we need to translate %s to %ls
473 and %c to %lc on Unix (but not Windows and for widechar functions only!).
474
475 So to use native functions in order to get our semantics we must do the
476 following translations in Unicode mode:
477
478 wxWidgets specifier POSIX specifier
479 ----------------------------------------
480
481 %hc, %C, %hC %c
482 %c %lc
483
484 */
485 class wxScanfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
486 {
487 virtual void HandleString(CharType conv, SizeModifier size,
488 CharType& outConv, SizeModifier& outSize)
489 {
490 outConv = 's';
491 outSize = GetOutSize(conv == 'S', size);
492 }
493
494 virtual void HandleChar(CharType conv, SizeModifier size,
495 CharType& outConv, SizeModifier& outSize)
496 {
497 outConv = 'c';
498 outSize = GetOutSize(conv == 'C', size);
499 }
500
501 SizeModifier GetOutSize(bool convIsUpper, SizeModifier size)
502 {
503 // %S and %hS -> %s and %lS -> %ls
504 if ( convIsUpper )
505 {
506 if ( size == Size_Long )
507 return Size_Long;
508 else
509 return Size_Default;
510 }
511 else // %s or %c
512 {
513 if ( size == Size_Default )
514 return Size_Long;
515 else
516 return size;
517 }
518 }
519 };
520
521 const wxScopedWCharBuffer wxScanfConvertFormatW(const wchar_t *format)
522 {
523 return wxScanfFormatConverterWchar().Convert(format);
524 }
525 #endif // !__WINDOWS__
526
527
528 // ----------------------------------------------------------------------------
529 // wxFormatString
530 // ----------------------------------------------------------------------------
531
532 #if !wxUSE_UNICODE_WCHAR
533 const char* wxFormatString::InputAsChar()
534 {
535 if ( m_char )
536 return m_char.data();
537
538 // in ANSI build, wx_str() returns char*, in UTF-8 build, this function
539 // is only called under UTF-8 locales, so we should return UTF-8 string,
540 // which is, again, what wx_str() returns:
541 if ( m_str )
542 return m_str->wx_str();
543
544 // ditto wxCStrData:
545 if ( m_cstr )
546 return m_cstr->AsInternal();
547
548 // the last case is that wide string was passed in: in that case, we need
549 // to convert it:
550 wxASSERT( m_wchar );
551
552 m_char = wxConvLibc.cWC2MB(m_wchar.data());
553
554 return m_char.data();
555 }
556
557 const char* wxFormatString::AsChar()
558 {
559 if ( !m_convertedChar )
560 #if !wxUSE_UNICODE // FIXME-UTF8: remove this
561 m_convertedChar = wxPrintfFormatConverterANSI().Convert(InputAsChar());
562 #else
563 m_convertedChar = wxPrintfFormatConverterUtf8().Convert(InputAsChar());
564 #endif
565
566 return m_convertedChar.data();
567 }
568 #endif // !wxUSE_UNICODE_WCHAR
569
570 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
571 const wchar_t* wxFormatString::InputAsWChar()
572 {
573 if ( m_wchar )
574 return m_wchar.data();
575
576 #if wxUSE_UNICODE_WCHAR
577 if ( m_str )
578 return m_str->wc_str();
579 if ( m_cstr )
580 return m_cstr->AsInternal();
581 #else // wxUSE_UNICODE_UTF8
582 if ( m_str )
583 {
584 m_wchar = m_str->wc_str();
585 return m_wchar.data();
586 }
587 if ( m_cstr )
588 {
589 m_wchar = m_cstr->AsWCharBuf();
590 return m_wchar.data();
591 }
592 #endif // wxUSE_UNICODE_WCHAR/UTF8
593
594 // the last case is that narrow string was passed in: in that case, we need
595 // to convert it:
596 wxASSERT( m_char );
597
598 m_wchar = wxConvLibc.cMB2WC(m_char.data());
599
600 return m_wchar.data();
601 }
602
603 const wchar_t* wxFormatString::AsWChar()
604 {
605 if ( !m_convertedWChar )
606 m_convertedWChar = wxPrintfFormatConverterWchar().Convert(InputAsWChar());
607
608 return m_convertedWChar.data();
609 }
610 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
611
612 wxString wxFormatString::InputAsString() const
613 {
614 if ( m_str )
615 return *m_str;
616 if ( m_cstr )
617 return m_cstr->AsString();
618 if ( m_wchar )
619 return wxString(m_wchar);
620 if ( m_char )
621 return wxString(m_char);
622
623 wxFAIL_MSG( "invalid wxFormatString - not initialized?" );
624 return wxString();
625 }
626
627 // ----------------------------------------------------------------------------
628 // wxFormatString::GetArgumentType()
629 // ----------------------------------------------------------------------------
630
631 namespace
632 {
633
634 template<typename CharType>
635 wxFormatString::ArgumentType DoGetArgumentType(const CharType *format,
636 unsigned n)
637 {
638 wxCHECK_MSG( format, wxFormatString::Arg_Other,
639 "empty format string not allowed here" );
640
641 wxPrintfConvSpecParser<CharType> parser(format);
642
643 wxCHECK_MSG( parser.pspec[n-1] != NULL, wxFormatString::Arg_Other,
644 "requested argument not found - invalid format string?" );
645
646 switch ( parser.pspec[n-1]->m_type )
647 {
648 case wxPAT_CHAR:
649 case wxPAT_WCHAR:
650 return wxFormatString::Arg_Char;
651
652 default:
653 return wxFormatString::Arg_Other;
654 }
655 }
656
657 } // anonymous namespace
658
659 wxFormatString::ArgumentType wxFormatString::GetArgumentType(unsigned n) const
660 {
661 if ( m_char )
662 return DoGetArgumentType(m_char.data(), n);
663 else if ( m_wchar )
664 return DoGetArgumentType(m_wchar.data(), n);
665 else if ( m_str )
666 return DoGetArgumentType(m_str->wx_str(), n);
667 else if ( m_cstr )
668 return DoGetArgumentType(m_cstr->AsInternal(), n);
669
670 wxFAIL_MSG( "unreachable code" );
671 return Arg_Other;
672 }