No changes, just fix a typo in a comment in docview event handling code.
[wxWidgets.git] / src / common / strvararg.cpp
1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strvararg.cpp
3 // Purpose: macros for implementing type-safe vararg passing of strings
4 // Author: Vaclav Slavik
5 // Created: 2007-02-19
6 // Copyright: (c) 2007 REA Elektronik GmbH
7 // Licence: wxWindows licence
8 ///////////////////////////////////////////////////////////////////////////////
9
10 // ============================================================================
11 // declarations
12 // ============================================================================
13
14 // ----------------------------------------------------------------------------
15 // headers
16 // ----------------------------------------------------------------------------
17
18 // for compilers that support precompilation, includes "wx.h".
19 #include "wx/wxprec.h"
20
21 #ifdef __BORLANDC__
22 #pragma hdrstop
23 #endif
24
25 #include "wx/strvararg.h"
26 #include "wx/string.h"
27 #include "wx/crt.h"
28 #include "wx/private/wxprintf.h"
29
30 // ============================================================================
31 // implementation
32 // ============================================================================
33
34 // ----------------------------------------------------------------------------
35 // wxArgNormalizer<>
36 // ----------------------------------------------------------------------------
37
38 const wxStringCharType *wxArgNormalizerNative<const wxString&>::get() const
39 {
40 return m_value.wx_str();
41 }
42
43 const wxStringCharType *wxArgNormalizerNative<const wxCStrData&>::get() const
44 {
45 return m_value.AsInternal();
46 }
47
48 #if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
49 wxArgNormalizerWchar<const wxString&>::wxArgNormalizerWchar(
50 const wxString& s,
51 const wxFormatString *fmt, unsigned index)
52 : wxArgNormalizerWithBuffer<wchar_t>(s.wc_str(), fmt, index)
53 {
54 }
55
56 wxArgNormalizerWchar<const wxCStrData&>::wxArgNormalizerWchar(
57 const wxCStrData& s,
58 const wxFormatString *fmt, unsigned index)
59 : wxArgNormalizerWithBuffer<wchar_t>(s.AsWCharBuf(), fmt, index)
60 {
61 }
62 #endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
63
64 // ----------------------------------------------------------------------------
65 // wxArgNormalizedString
66 // ----------------------------------------------------------------------------
67
68 wxString wxArgNormalizedString::GetString() const
69 {
70 if ( !IsValid() )
71 return wxEmptyString;
72
73 #if wxUSE_UTF8_LOCALE_ONLY
74 return wxString(reinterpret_cast<const char*>(m_ptr));
75 #else
76 #if wxUSE_UNICODE_UTF8
77 if ( wxLocaleIsUtf8 )
78 return wxString(reinterpret_cast<const char*>(m_ptr));
79 else
80 #endif
81 return wxString(reinterpret_cast<const wxChar*>(m_ptr));
82 #endif // !wxUSE_UTF8_LOCALE_ONLY
83 }
84
85 wxArgNormalizedString::operator wxString() const
86 {
87 return GetString();
88 }
89
90 // ----------------------------------------------------------------------------
91 // wxFormatConverter: class doing the "%s" and "%c" normalization
92 // ----------------------------------------------------------------------------
93
94 /*
95 There are four problems with wxPrintf() etc. format strings:
96
97 1) The printf vararg macros convert all forms of strings into
98 wxStringCharType* representation. This may make the format string
99 incorrect: for example, if %ls was used together with a wchar_t*
100 variadic argument, this would no longer work, because the templates
101 would change wchar_t* argument to wxStringCharType* and %ls would now
102 be incorrect in e.g. UTF-8 build. We need make sure only one specifier
103 form is used.
104
105 2) To complicate matters further, the meaning of %s and %c is different
106 under Windows and on Unix. The Windows/MS convention is as follows:
107
108 In ANSI mode:
109
110 format specifier results in
111 -----------------------------------
112 %s, %hs, %hS char*
113 %ls, %S, %lS wchar_t*
114
115 In Unicode mode:
116
117 format specifier results in
118 -----------------------------------
119 %hs, %S, %hS char*
120 %s, %ls, %lS wchar_t*
121
122 (While on POSIX systems we have %C identical to %lc and %c always means
123 char (in any mode) while %lc always means wchar_t.)
124
125 In other words, we should _only_ use %s on Windows and %ls on Unix for
126 wxUSE_UNICODE_WCHAR build.
127
128 3) To make things even worse, we need two forms in UTF-8 build: one for
129 passing strings to ANSI functions under UTF-8 locales (this one should
130 use %s) and one for widechar functions used under non-UTF-8 locales
131 (this one should use %ls).
132
133 And, of course, the same should be done for %c as well.
134
135
136 wxScanf() family of functions is simpler, because we don't normalize their
137 variadic arguments and we only have to handle 2) above and only for widechar
138 versions.
139 */
140
141 template<typename T>
142 class wxFormatConverterBase
143 {
144 public:
145 typedef T CharType;
146
147 wxFormatConverterBase()
148 {
149 m_fmtOrig = NULL;
150 m_fmtLast = NULL;
151 m_nCopied = 0;
152 }
153
154 wxScopedCharTypeBuffer<CharType> Convert(const CharType *format)
155 {
156 // this is reset to NULL if we modify the format string
157 m_fmtOrig = format;
158
159 while ( *format )
160 {
161 if ( CopyFmtChar(*format++) == wxT('%') )
162 {
163 #if wxUSE_PRINTF_POS_PARAMS
164 if ( *format >= '0' && *format <= '9' )
165 {
166 SkipDigits(&format);
167 if ( *format == '$' )
168 {
169 // It was a positional argument specification.
170 CopyFmtChar(*format++);
171 }
172 //else: it was a width specification, nothing else to do.
173 }
174 #endif // wxUSE_PRINTF_POS_PARAMS
175
176 // skip any flags
177 while ( IsFlagChar(*format) )
178 CopyFmtChar(*format++);
179
180 // and possible width
181 if ( *format == wxT('*') )
182 CopyFmtChar(*format++);
183 else
184 SkipDigits(&format);
185
186 // precision?
187 if ( *format == wxT('.') )
188 {
189 CopyFmtChar(*format++);
190 if ( *format == wxT('*') )
191 CopyFmtChar(*format++);
192 else
193 SkipDigits(&format);
194 }
195
196 // next we can have a size modifier
197 SizeModifier size;
198
199 switch ( *format )
200 {
201 case 'h':
202 size = Size_Short;
203 format++;
204 break;
205
206 case 'l':
207 // "ll" has a different meaning!
208 if ( format[1] != 'l' )
209 {
210 size = Size_Long;
211 format++;
212 break;
213 }
214 //else: fall through
215
216 default:
217 size = Size_Default;
218 }
219
220 CharType outConv = *format;
221 SizeModifier outSize = size;
222
223 // and finally we should have the type
224 switch ( *format )
225 {
226 case wxT('S'):
227 case wxT('s'):
228 // all strings were converted into the same form by
229 // wxArgNormalizer<T>, this form depends on the context
230 // in which the value is used (scanf/printf/wprintf):
231 HandleString(*format, size, outConv, outSize);
232 break;
233
234 case wxT('C'):
235 case wxT('c'):
236 HandleChar(*format, size, outConv, outSize);
237 break;
238
239 default:
240 // nothing special to do
241 break;
242 }
243
244 if ( outConv == *format && outSize == size ) // no change
245 {
246 if ( size != Size_Default )
247 CopyFmtChar(*(format - 1));
248 CopyFmtChar(*format);
249 }
250 else // something changed
251 {
252 switch ( outSize )
253 {
254 case Size_Long:
255 InsertFmtChar(wxT('l'));
256 break;
257
258 case Size_Short:
259 InsertFmtChar(wxT('h'));
260 break;
261
262 case Size_Default:
263 // nothing to do
264 break;
265 }
266 InsertFmtChar(outConv);
267 }
268
269 format++;
270 }
271 }
272
273 // notice that we only translated the string if m_fmtOrig == NULL (as
274 // set by CopyAllBefore()), otherwise we should simply use the original
275 // format
276 if ( m_fmtOrig )
277 {
278 return wxScopedCharTypeBuffer<CharType>::CreateNonOwned(m_fmtOrig);
279 }
280 else
281 {
282 // shrink converted format string to actual size (instead of
283 // over-sized allocation from CopyAllBefore()) and NUL-terminate
284 // it:
285 m_fmt.shrink(m_fmtLast - m_fmt.data());
286 return m_fmt;
287 }
288 }
289
290 virtual ~wxFormatConverterBase() {}
291
292 protected:
293 enum SizeModifier
294 {
295 Size_Default,
296 Size_Short,
297 Size_Long
298 };
299
300 // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's'
301 // respectively), 'size' is the preceding size modifier; the new values of
302 // conversion and size specifiers must be written to outConv and outSize
303 virtual void HandleString(CharType conv, SizeModifier size,
304 CharType& outConv, SizeModifier& outSize) = 0;
305
306 // ditto for %C or %c
307 virtual void HandleChar(CharType conv, SizeModifier size,
308 CharType& outConv, SizeModifier& outSize) = 0;
309
310 private:
311 // copy another character to the translated format: this function does the
312 // copy if we are translating but doesn't do anything at all if we don't,
313 // so we don't create the translated format string at all unless we really
314 // need to (i.e. InsertFmtChar() is called)
315 CharType CopyFmtChar(CharType ch)
316 {
317 if ( !m_fmtOrig )
318 {
319 // we're translating, do copy
320 *(m_fmtLast++) = ch;
321 }
322 else
323 {
324 // simply increase the count which should be copied by
325 // CopyAllBefore() later if needed
326 m_nCopied++;
327 }
328
329 return ch;
330 }
331
332 // insert an extra character
333 void InsertFmtChar(CharType ch)
334 {
335 if ( m_fmtOrig )
336 {
337 // so far we haven't translated anything yet
338 CopyAllBefore();
339 }
340
341 *(m_fmtLast++) = ch;
342 }
343
344 void CopyAllBefore()
345 {
346 wxASSERT_MSG( m_fmtOrig && m_fmt.data() == NULL, "logic error" );
347
348 // the modified format string is guaranteed to be no longer than
349 // 3/2 of the original (worst case: the entire format string consists
350 // of "%s" repeated and is expanded to "%ls" on Unix), so we can
351 // allocate the buffer now and not worry about running out of space if
352 // we over-allocate a bit:
353 size_t fmtLen = wxStrlen(m_fmtOrig);
354 // worst case is of even length, so there's no rounding error in *3/2:
355 m_fmt.extend(fmtLen * 3 / 2);
356
357 if ( m_nCopied > 0 )
358 wxStrncpy(m_fmt.data(), m_fmtOrig, m_nCopied);
359 m_fmtLast = m_fmt.data() + m_nCopied;
360
361 // we won't need it any longer and resetting it also indicates that we
362 // modified the format
363 m_fmtOrig = NULL;
364 }
365
366 static bool IsFlagChar(CharType ch)
367 {
368 return ch == wxT('-') || ch == wxT('+') ||
369 ch == wxT('0') || ch == wxT(' ') || ch == wxT('#');
370 }
371
372 void SkipDigits(const CharType **ptpc)
373 {
374 while ( **ptpc >= wxT('0') && **ptpc <= wxT('9') )
375 CopyFmtChar(*(*ptpc)++);
376 }
377
378 // the translated format
379 wxCharTypeBuffer<CharType> m_fmt;
380 CharType *m_fmtLast;
381
382 // the original format
383 const CharType *m_fmtOrig;
384
385 // the number of characters already copied (i.e. already parsed, but left
386 // unmodified)
387 size_t m_nCopied;
388 };
389
390 #if defined(__WINDOWS__) && !defined(__CYGWIN__)
391
392 // on Windows, we should use %s and %c regardless of the build:
393 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
394 {
395 virtual void HandleString(CharType WXUNUSED(conv),
396 SizeModifier WXUNUSED(size),
397 CharType& outConv, SizeModifier& outSize)
398 {
399 outConv = 's';
400 outSize = Size_Default;
401 }
402
403 virtual void HandleChar(CharType WXUNUSED(conv),
404 SizeModifier WXUNUSED(size),
405 CharType& outConv, SizeModifier& outSize)
406 {
407 outConv = 'c';
408 outSize = Size_Default;
409 }
410 };
411
412 #else // !__WINDOWS__
413
414 // on Unix, it's %s for ANSI functions and %ls for widechar:
415
416 #if !wxUSE_UTF8_LOCALE_ONLY
417 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
418 {
419 virtual void HandleString(CharType WXUNUSED(conv),
420 SizeModifier WXUNUSED(size),
421 CharType& outConv, SizeModifier& outSize)
422 {
423 outConv = 's';
424 outSize = Size_Long;
425 }
426
427 virtual void HandleChar(CharType WXUNUSED(conv),
428 SizeModifier WXUNUSED(size),
429 CharType& outConv, SizeModifier& outSize)
430 {
431 outConv = 'c';
432 outSize = Size_Long;
433 }
434 };
435 #endif // !wxUSE_UTF8_LOCALE_ONLY
436
437 #endif // __WINDOWS__/!__WINDOWS__
438
439 #if wxUSE_UNICODE_UTF8
440 class wxPrintfFormatConverterUtf8 : public wxFormatConverterBase<char>
441 {
442 virtual void HandleString(CharType WXUNUSED(conv),
443 SizeModifier WXUNUSED(size),
444 CharType& outConv, SizeModifier& outSize)
445 {
446 outConv = 's';
447 outSize = Size_Default;
448 }
449
450 virtual void HandleChar(CharType WXUNUSED(conv),
451 SizeModifier WXUNUSED(size),
452 CharType& outConv, SizeModifier& outSize)
453 {
454 // chars are represented using wchar_t in both builds, so this is
455 // the same as above
456 outConv = 'c';
457 outSize = Size_Long;
458 }
459 };
460 #endif // wxUSE_UNICODE_UTF8
461
462 #if !wxUSE_UNICODE // FIXME-UTF8: remove
463 class wxPrintfFormatConverterANSI : public wxFormatConverterBase<char>
464 {
465 virtual void HandleString(CharType WXUNUSED(conv),
466 SizeModifier WXUNUSED(size),
467 CharType& outConv, SizeModifier& outSize)
468 {
469 outConv = 's';
470 outSize = Size_Default;
471 }
472
473 virtual void HandleChar(CharType WXUNUSED(conv),
474 SizeModifier WXUNUSED(size),
475 CharType& outConv, SizeModifier& outSize)
476 {
477 outConv = 'c';
478 outSize = Size_Default;
479 }
480 };
481 #endif // ANSI
482
483 #ifndef __WINDOWS__
484 /*
485
486 wxScanf() format translation is different, we need to translate %s to %ls
487 and %c to %lc on Unix (but not Windows and for widechar functions only!).
488
489 So to use native functions in order to get our semantics we must do the
490 following translations in Unicode mode:
491
492 wxWidgets specifier POSIX specifier
493 ----------------------------------------
494
495 %hc, %C, %hC %c
496 %c %lc
497
498 */
499 class wxScanfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
500 {
501 virtual void HandleString(CharType conv, SizeModifier size,
502 CharType& outConv, SizeModifier& outSize)
503 {
504 outConv = 's';
505 outSize = GetOutSize(conv == 'S', size);
506 }
507
508 virtual void HandleChar(CharType conv, SizeModifier size,
509 CharType& outConv, SizeModifier& outSize)
510 {
511 outConv = 'c';
512 outSize = GetOutSize(conv == 'C', size);
513 }
514
515 SizeModifier GetOutSize(bool convIsUpper, SizeModifier size)
516 {
517 // %S and %hS -> %s and %lS -> %ls
518 if ( convIsUpper )
519 {
520 if ( size == Size_Long )
521 return Size_Long;
522 else
523 return Size_Default;
524 }
525 else // %s or %c
526 {
527 if ( size == Size_Default )
528 return Size_Long;
529 else
530 return size;
531 }
532 }
533 };
534
535 const wxScopedWCharBuffer wxScanfConvertFormatW(const wchar_t *format)
536 {
537 return wxScanfFormatConverterWchar().Convert(format);
538 }
539 #endif // !__WINDOWS__
540
541
542 // ----------------------------------------------------------------------------
543 // wxFormatString
544 // ----------------------------------------------------------------------------
545
546 #if !wxUSE_UNICODE_WCHAR
547 const char* wxFormatString::InputAsChar()
548 {
549 if ( m_char )
550 return m_char.data();
551
552 // in ANSI build, wx_str() returns char*, in UTF-8 build, this function
553 // is only called under UTF-8 locales, so we should return UTF-8 string,
554 // which is, again, what wx_str() returns:
555 if ( m_str )
556 return m_str->wx_str();
557
558 // ditto wxCStrData:
559 if ( m_cstr )
560 return m_cstr->AsInternal();
561
562 // the last case is that wide string was passed in: in that case, we need
563 // to convert it:
564 wxASSERT( m_wchar );
565
566 m_char = wxConvLibc.cWC2MB(m_wchar.data());
567
568 return m_char.data();
569 }
570
571 const char* wxFormatString::AsChar()
572 {
573 if ( !m_convertedChar )
574 #if !wxUSE_UNICODE // FIXME-UTF8: remove this
575 m_convertedChar = wxPrintfFormatConverterANSI().Convert(InputAsChar());
576 #else
577 m_convertedChar = wxPrintfFormatConverterUtf8().Convert(InputAsChar());
578 #endif
579
580 return m_convertedChar.data();
581 }
582 #endif // !wxUSE_UNICODE_WCHAR
583
584 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
585 const wchar_t* wxFormatString::InputAsWChar()
586 {
587 if ( m_wchar )
588 return m_wchar.data();
589
590 #if wxUSE_UNICODE_WCHAR
591 if ( m_str )
592 return m_str->wc_str();
593 if ( m_cstr )
594 return m_cstr->AsInternal();
595 #else // wxUSE_UNICODE_UTF8
596 if ( m_str )
597 {
598 m_wchar = m_str->wc_str();
599 return m_wchar.data();
600 }
601 if ( m_cstr )
602 {
603 m_wchar = m_cstr->AsWCharBuf();
604 return m_wchar.data();
605 }
606 #endif // wxUSE_UNICODE_WCHAR/UTF8
607
608 // the last case is that narrow string was passed in: in that case, we need
609 // to convert it:
610 wxASSERT( m_char );
611
612 m_wchar = wxConvLibc.cMB2WC(m_char.data());
613
614 return m_wchar.data();
615 }
616
617 const wchar_t* wxFormatString::AsWChar()
618 {
619 if ( !m_convertedWChar )
620 m_convertedWChar = wxPrintfFormatConverterWchar().Convert(InputAsWChar());
621
622 return m_convertedWChar.data();
623 }
624 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
625
626 wxString wxFormatString::InputAsString() const
627 {
628 if ( m_str )
629 return *m_str;
630 if ( m_cstr )
631 return m_cstr->AsString();
632 if ( m_wchar )
633 return wxString(m_wchar);
634 if ( m_char )
635 return wxString(m_char);
636
637 wxFAIL_MSG( "invalid wxFormatString - not initialized?" );
638 return wxString();
639 }
640
641 // ----------------------------------------------------------------------------
642 // wxFormatString::GetArgumentType()
643 // ----------------------------------------------------------------------------
644
645 namespace
646 {
647
648 template<typename CharType>
649 wxFormatString::ArgumentType DoGetArgumentType(const CharType *format,
650 unsigned n)
651 {
652 wxCHECK_MSG( format, wxFormatString::Arg_Unknown,
653 "empty format string not allowed here" );
654
655 wxPrintfConvSpecParser<CharType> parser(format);
656
657 wxCHECK_MSG( n <= parser.nargs, wxFormatString::Arg_Unknown,
658 "more arguments than format string specifiers?" );
659
660 wxCHECK_MSG( parser.pspec[n-1] != NULL, wxFormatString::Arg_Unknown,
661 "requested argument not found - invalid format string?" );
662
663 switch ( parser.pspec[n-1]->m_type )
664 {
665 case wxPAT_CHAR:
666 case wxPAT_WCHAR:
667 return wxFormatString::Arg_Char;
668
669 case wxPAT_PCHAR:
670 case wxPAT_PWCHAR:
671 return wxFormatString::Arg_String;
672
673 case wxPAT_INT:
674 return wxFormatString::Arg_Int;
675 case wxPAT_LONGINT:
676 return wxFormatString::Arg_LongInt;
677 #ifdef wxLongLong_t
678 case wxPAT_LONGLONGINT:
679 return wxFormatString::Arg_LongLongInt;
680 #endif
681 case wxPAT_SIZET:
682 return wxFormatString::Arg_Size_t;
683
684 case wxPAT_DOUBLE:
685 return wxFormatString::Arg_Double;
686 case wxPAT_LONGDOUBLE:
687 return wxFormatString::Arg_LongDouble;
688
689 case wxPAT_POINTER:
690 return wxFormatString::Arg_Pointer;
691
692 case wxPAT_NINT:
693 return wxFormatString::Arg_IntPtr;
694 case wxPAT_NSHORTINT:
695 return wxFormatString::Arg_ShortIntPtr;
696 case wxPAT_NLONGINT:
697 return wxFormatString::Arg_LongIntPtr;
698
699 case wxPAT_STAR:
700 // "*" requires argument of type int
701 return wxFormatString::Arg_Int;
702
703 case wxPAT_INVALID:
704 // (handled after the switch statement)
705 break;
706 }
707
708 // silence warning
709 wxFAIL_MSG( "unexpected argument type" );
710 return wxFormatString::Arg_Unknown;
711 }
712
713 } // anonymous namespace
714
715 wxFormatString::ArgumentType wxFormatString::GetArgumentType(unsigned n) const
716 {
717 if ( m_char )
718 return DoGetArgumentType(m_char.data(), n);
719 else if ( m_wchar )
720 return DoGetArgumentType(m_wchar.data(), n);
721 else if ( m_str )
722 return DoGetArgumentType(m_str->wx_str(), n);
723 else if ( m_cstr )
724 return DoGetArgumentType(m_cstr->AsInternal(), n);
725
726 wxFAIL_MSG( "unreachable code" );
727 return Arg_Unknown;
728 }