]> git.saurik.com Git - wxWidgets.git/blob - src/common/strvararg.cpp
Fixed wxRichTextCtrl caret test case
[wxWidgets.git] / src / common / strvararg.cpp
1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strvararg.cpp
3 // Purpose: macros for implementing type-safe vararg passing of strings
4 // Author: Vaclav Slavik
5 // Created: 2007-02-19
6 // RCS-ID: $Id$
7 // Copyright: (c) 2007 REA Elektronik GmbH
8 // Licence: wxWindows licence
9 ///////////////////////////////////////////////////////////////////////////////
10
11 // ============================================================================
12 // declarations
13 // ============================================================================
14
15 // ----------------------------------------------------------------------------
16 // headers
17 // ----------------------------------------------------------------------------
18
19 // for compilers that support precompilation, includes "wx.h".
20 #include "wx/wxprec.h"
21
22 #ifdef __BORLANDC__
23 #pragma hdrstop
24 #endif
25
26 #include "wx/strvararg.h"
27 #include "wx/string.h"
28 #include "wx/crt.h"
29 #include "wx/private/wxprintf.h"
30
31 // ============================================================================
32 // implementation
33 // ============================================================================
34
35 // ----------------------------------------------------------------------------
36 // wxArgNormalizer<>
37 // ----------------------------------------------------------------------------
38
39 const wxStringCharType *wxArgNormalizerNative<const wxString&>::get() const
40 {
41 return m_value.wx_str();
42 }
43
44 const wxStringCharType *wxArgNormalizerNative<const wxCStrData&>::get() const
45 {
46 return m_value.AsInternal();
47 }
48
49 #if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
50 wxArgNormalizerWchar<const wxString&>::wxArgNormalizerWchar(
51 const wxString& s,
52 const wxFormatString *fmt, unsigned index)
53 : wxArgNormalizerWithBuffer<wchar_t>(s.wc_str(), fmt, index)
54 {
55 }
56
57 wxArgNormalizerWchar<const wxCStrData&>::wxArgNormalizerWchar(
58 const wxCStrData& s,
59 const wxFormatString *fmt, unsigned index)
60 : wxArgNormalizerWithBuffer<wchar_t>(s.AsWCharBuf(), fmt, index)
61 {
62 }
63 #endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
64
65 // ----------------------------------------------------------------------------
66 // wxArgNormalizedString
67 // ----------------------------------------------------------------------------
68
69 wxString wxArgNormalizedString::GetString() const
70 {
71 if ( !IsValid() )
72 return wxEmptyString;
73
74 #if wxUSE_UTF8_LOCALE_ONLY
75 return wxString(reinterpret_cast<const char*>(m_ptr));
76 #else
77 #if wxUSE_UNICODE_UTF8
78 if ( wxLocaleIsUtf8 )
79 return wxString(reinterpret_cast<const char*>(m_ptr));
80 else
81 #endif
82 return wxString(reinterpret_cast<const wxChar*>(m_ptr));
83 #endif // !wxUSE_UTF8_LOCALE_ONLY
84 }
85
86 wxArgNormalizedString::operator wxString() const
87 {
88 return GetString();
89 }
90
91 // ----------------------------------------------------------------------------
92 // wxFormatConverter: class doing the "%s" and "%c" normalization
93 // ----------------------------------------------------------------------------
94
95 /*
96 There are four problems with wxPrintf() etc. format strings:
97
98 1) The printf vararg macros convert all forms of strings into
99 wxStringCharType* representation. This may make the format string
100 incorrect: for example, if %ls was used together with a wchar_t*
101 variadic argument, this would no longer work, because the templates
102 would change wchar_t* argument to wxStringCharType* and %ls would now
103 be incorrect in e.g. UTF-8 build. We need make sure only one specifier
104 form is used.
105
106 2) To complicate matters further, the meaning of %s and %c is different
107 under Windows and on Unix. The Windows/MS convention is as follows:
108
109 In ANSI mode:
110
111 format specifier results in
112 -----------------------------------
113 %s, %hs, %hS char*
114 %ls, %S, %lS wchar_t*
115
116 In Unicode mode:
117
118 format specifier results in
119 -----------------------------------
120 %hs, %S, %hS char*
121 %s, %ls, %lS wchar_t*
122
123 (While on POSIX systems we have %C identical to %lc and %c always means
124 char (in any mode) while %lc always means wchar_t.)
125
126 In other words, we should _only_ use %s on Windows and %ls on Unix for
127 wxUSE_UNICODE_WCHAR build.
128
129 3) To make things even worse, we need two forms in UTF-8 build: one for
130 passing strings to ANSI functions under UTF-8 locales (this one should
131 use %s) and one for widechar functions used under non-UTF-8 locales
132 (this one should use %ls).
133
134 And, of course, the same should be done for %c as well.
135
136
137 wxScanf() family of functions is simpler, because we don't normalize their
138 variadic arguments and we only have to handle 2) above and only for widechar
139 versions.
140 */
141
142 template<typename T>
143 class wxFormatConverterBase
144 {
145 public:
146 typedef T CharType;
147
148 wxFormatConverterBase()
149 {
150 m_fmtOrig = NULL;
151 m_fmtLast = NULL;
152 m_nCopied = 0;
153 }
154
155 wxScopedCharTypeBuffer<CharType> Convert(const CharType *format)
156 {
157 // this is reset to NULL if we modify the format string
158 m_fmtOrig = format;
159
160 while ( *format )
161 {
162 if ( CopyFmtChar(*format++) == wxT('%') )
163 {
164 #if wxUSE_PRINTF_POS_PARAMS
165 if ( *format >= '0' && *format <= '9' )
166 {
167 SkipDigits(&format);
168 if ( *format == '$' )
169 {
170 // It was a positional argument specification.
171 CopyFmtChar(*format++);
172 }
173 //else: it was a width specification, nothing else to do.
174 }
175 #endif // wxUSE_PRINTF_POS_PARAMS
176
177 // skip any flags
178 while ( IsFlagChar(*format) )
179 CopyFmtChar(*format++);
180
181 // and possible width
182 if ( *format == wxT('*') )
183 CopyFmtChar(*format++);
184 else
185 SkipDigits(&format);
186
187 // precision?
188 if ( *format == wxT('.') )
189 {
190 CopyFmtChar(*format++);
191 if ( *format == wxT('*') )
192 CopyFmtChar(*format++);
193 else
194 SkipDigits(&format);
195 }
196
197 // next we can have a size modifier
198 SizeModifier size;
199
200 switch ( *format )
201 {
202 case 'h':
203 size = Size_Short;
204 format++;
205 break;
206
207 case 'l':
208 // "ll" has a different meaning!
209 if ( format[1] != 'l' )
210 {
211 size = Size_Long;
212 format++;
213 break;
214 }
215 //else: fall through
216
217 default:
218 size = Size_Default;
219 }
220
221 CharType outConv = *format;
222 SizeModifier outSize = size;
223
224 // and finally we should have the type
225 switch ( *format )
226 {
227 case wxT('S'):
228 case wxT('s'):
229 // all strings were converted into the same form by
230 // wxArgNormalizer<T>, this form depends on the context
231 // in which the value is used (scanf/printf/wprintf):
232 HandleString(*format, size, outConv, outSize);
233 break;
234
235 case wxT('C'):
236 case wxT('c'):
237 HandleChar(*format, size, outConv, outSize);
238 break;
239
240 default:
241 // nothing special to do
242 break;
243 }
244
245 if ( outConv == *format && outSize == size ) // no change
246 {
247 if ( size != Size_Default )
248 CopyFmtChar(*(format - 1));
249 CopyFmtChar(*format);
250 }
251 else // something changed
252 {
253 switch ( outSize )
254 {
255 case Size_Long:
256 InsertFmtChar(wxT('l'));
257 break;
258
259 case Size_Short:
260 InsertFmtChar(wxT('h'));
261 break;
262
263 case Size_Default:
264 // nothing to do
265 break;
266 }
267 InsertFmtChar(outConv);
268 }
269
270 format++;
271 }
272 }
273
274 // notice that we only translated the string if m_fmtOrig == NULL (as
275 // set by CopyAllBefore()), otherwise we should simply use the original
276 // format
277 if ( m_fmtOrig )
278 {
279 return wxScopedCharTypeBuffer<CharType>::CreateNonOwned(m_fmtOrig);
280 }
281 else
282 {
283 // shrink converted format string to actual size (instead of
284 // over-sized allocation from CopyAllBefore()) and NUL-terminate
285 // it:
286 m_fmt.shrink(m_fmtLast - m_fmt.data());
287 return m_fmt;
288 }
289 }
290
291 virtual ~wxFormatConverterBase() {}
292
293 protected:
294 enum SizeModifier
295 {
296 Size_Default,
297 Size_Short,
298 Size_Long
299 };
300
301 // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's'
302 // respectively), 'size' is the preceding size modifier; the new values of
303 // conversion and size specifiers must be written to outConv and outSize
304 virtual void HandleString(CharType conv, SizeModifier size,
305 CharType& outConv, SizeModifier& outSize) = 0;
306
307 // ditto for %C or %c
308 virtual void HandleChar(CharType conv, SizeModifier size,
309 CharType& outConv, SizeModifier& outSize) = 0;
310
311 private:
312 // copy another character to the translated format: this function does the
313 // copy if we are translating but doesn't do anything at all if we don't,
314 // so we don't create the translated format string at all unless we really
315 // need to (i.e. InsertFmtChar() is called)
316 CharType CopyFmtChar(CharType ch)
317 {
318 if ( !m_fmtOrig )
319 {
320 // we're translating, do copy
321 *(m_fmtLast++) = ch;
322 }
323 else
324 {
325 // simply increase the count which should be copied by
326 // CopyAllBefore() later if needed
327 m_nCopied++;
328 }
329
330 return ch;
331 }
332
333 // insert an extra character
334 void InsertFmtChar(CharType ch)
335 {
336 if ( m_fmtOrig )
337 {
338 // so far we haven't translated anything yet
339 CopyAllBefore();
340 }
341
342 *(m_fmtLast++) = ch;
343 }
344
345 void CopyAllBefore()
346 {
347 wxASSERT_MSG( m_fmtOrig && m_fmt.data() == NULL, "logic error" );
348
349 // the modified format string is guaranteed to be no longer than
350 // 3/2 of the original (worst case: the entire format string consists
351 // of "%s" repeated and is expanded to "%ls" on Unix), so we can
352 // allocate the buffer now and not worry about running out of space if
353 // we over-allocate a bit:
354 size_t fmtLen = wxStrlen(m_fmtOrig);
355 // worst case is of even length, so there's no rounding error in *3/2:
356 m_fmt.extend(fmtLen * 3 / 2);
357
358 if ( m_nCopied > 0 )
359 wxStrncpy(m_fmt.data(), m_fmtOrig, m_nCopied);
360 m_fmtLast = m_fmt.data() + m_nCopied;
361
362 // we won't need it any longer and resetting it also indicates that we
363 // modified the format
364 m_fmtOrig = NULL;
365 }
366
367 static bool IsFlagChar(CharType ch)
368 {
369 return ch == wxT('-') || ch == wxT('+') ||
370 ch == wxT('0') || ch == wxT(' ') || ch == wxT('#');
371 }
372
373 void SkipDigits(const CharType **ptpc)
374 {
375 while ( **ptpc >= wxT('0') && **ptpc <= wxT('9') )
376 CopyFmtChar(*(*ptpc)++);
377 }
378
379 // the translated format
380 wxCharTypeBuffer<CharType> m_fmt;
381 CharType *m_fmtLast;
382
383 // the original format
384 const CharType *m_fmtOrig;
385
386 // the number of characters already copied (i.e. already parsed, but left
387 // unmodified)
388 size_t m_nCopied;
389 };
390
391 #if defined(__WINDOWS__) && !defined(__CYGWIN__)
392
393 // on Windows, we should use %s and %c regardless of the build:
394 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
395 {
396 virtual void HandleString(CharType WXUNUSED(conv),
397 SizeModifier WXUNUSED(size),
398 CharType& outConv, SizeModifier& outSize)
399 {
400 outConv = 's';
401 outSize = Size_Default;
402 }
403
404 virtual void HandleChar(CharType WXUNUSED(conv),
405 SizeModifier WXUNUSED(size),
406 CharType& outConv, SizeModifier& outSize)
407 {
408 outConv = 'c';
409 outSize = Size_Default;
410 }
411 };
412
413 #else // !__WINDOWS__
414
415 // on Unix, it's %s for ANSI functions and %ls for widechar:
416
417 #if !wxUSE_UTF8_LOCALE_ONLY
418 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
419 {
420 virtual void HandleString(CharType WXUNUSED(conv),
421 SizeModifier WXUNUSED(size),
422 CharType& outConv, SizeModifier& outSize)
423 {
424 outConv = 's';
425 outSize = Size_Long;
426 }
427
428 virtual void HandleChar(CharType WXUNUSED(conv),
429 SizeModifier WXUNUSED(size),
430 CharType& outConv, SizeModifier& outSize)
431 {
432 outConv = 'c';
433 outSize = Size_Long;
434 }
435 };
436 #endif // !wxUSE_UTF8_LOCALE_ONLY
437
438 #endif // __WINDOWS__/!__WINDOWS__
439
440 #if wxUSE_UNICODE_UTF8
441 class wxPrintfFormatConverterUtf8 : public wxFormatConverterBase<char>
442 {
443 virtual void HandleString(CharType WXUNUSED(conv),
444 SizeModifier WXUNUSED(size),
445 CharType& outConv, SizeModifier& outSize)
446 {
447 outConv = 's';
448 outSize = Size_Default;
449 }
450
451 virtual void HandleChar(CharType WXUNUSED(conv),
452 SizeModifier WXUNUSED(size),
453 CharType& outConv, SizeModifier& outSize)
454 {
455 // chars are represented using wchar_t in both builds, so this is
456 // the same as above
457 outConv = 'c';
458 outSize = Size_Long;
459 }
460 };
461 #endif // wxUSE_UNICODE_UTF8
462
463 #if !wxUSE_UNICODE // FIXME-UTF8: remove
464 class wxPrintfFormatConverterANSI : public wxFormatConverterBase<char>
465 {
466 virtual void HandleString(CharType WXUNUSED(conv),
467 SizeModifier WXUNUSED(size),
468 CharType& outConv, SizeModifier& outSize)
469 {
470 outConv = 's';
471 outSize = Size_Default;
472 }
473
474 virtual void HandleChar(CharType WXUNUSED(conv),
475 SizeModifier WXUNUSED(size),
476 CharType& outConv, SizeModifier& outSize)
477 {
478 outConv = 'c';
479 outSize = Size_Default;
480 }
481 };
482 #endif // ANSI
483
484 #ifndef __WINDOWS__
485 /*
486
487 wxScanf() format translation is different, we need to translate %s to %ls
488 and %c to %lc on Unix (but not Windows and for widechar functions only!).
489
490 So to use native functions in order to get our semantics we must do the
491 following translations in Unicode mode:
492
493 wxWidgets specifier POSIX specifier
494 ----------------------------------------
495
496 %hc, %C, %hC %c
497 %c %lc
498
499 */
500 class wxScanfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
501 {
502 virtual void HandleString(CharType conv, SizeModifier size,
503 CharType& outConv, SizeModifier& outSize)
504 {
505 outConv = 's';
506 outSize = GetOutSize(conv == 'S', size);
507 }
508
509 virtual void HandleChar(CharType conv, SizeModifier size,
510 CharType& outConv, SizeModifier& outSize)
511 {
512 outConv = 'c';
513 outSize = GetOutSize(conv == 'C', size);
514 }
515
516 SizeModifier GetOutSize(bool convIsUpper, SizeModifier size)
517 {
518 // %S and %hS -> %s and %lS -> %ls
519 if ( convIsUpper )
520 {
521 if ( size == Size_Long )
522 return Size_Long;
523 else
524 return Size_Default;
525 }
526 else // %s or %c
527 {
528 if ( size == Size_Default )
529 return Size_Long;
530 else
531 return size;
532 }
533 }
534 };
535
536 const wxScopedWCharBuffer wxScanfConvertFormatW(const wchar_t *format)
537 {
538 return wxScanfFormatConverterWchar().Convert(format);
539 }
540 #endif // !__WINDOWS__
541
542
543 // ----------------------------------------------------------------------------
544 // wxFormatString
545 // ----------------------------------------------------------------------------
546
547 #if !wxUSE_UNICODE_WCHAR
548 const char* wxFormatString::InputAsChar()
549 {
550 if ( m_char )
551 return m_char.data();
552
553 // in ANSI build, wx_str() returns char*, in UTF-8 build, this function
554 // is only called under UTF-8 locales, so we should return UTF-8 string,
555 // which is, again, what wx_str() returns:
556 if ( m_str )
557 return m_str->wx_str();
558
559 // ditto wxCStrData:
560 if ( m_cstr )
561 return m_cstr->AsInternal();
562
563 // the last case is that wide string was passed in: in that case, we need
564 // to convert it:
565 wxASSERT( m_wchar );
566
567 m_char = wxConvLibc.cWC2MB(m_wchar.data());
568
569 return m_char.data();
570 }
571
572 const char* wxFormatString::AsChar()
573 {
574 if ( !m_convertedChar )
575 #if !wxUSE_UNICODE // FIXME-UTF8: remove this
576 m_convertedChar = wxPrintfFormatConverterANSI().Convert(InputAsChar());
577 #else
578 m_convertedChar = wxPrintfFormatConverterUtf8().Convert(InputAsChar());
579 #endif
580
581 return m_convertedChar.data();
582 }
583 #endif // !wxUSE_UNICODE_WCHAR
584
585 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
586 const wchar_t* wxFormatString::InputAsWChar()
587 {
588 if ( m_wchar )
589 return m_wchar.data();
590
591 #if wxUSE_UNICODE_WCHAR
592 if ( m_str )
593 return m_str->wc_str();
594 if ( m_cstr )
595 return m_cstr->AsInternal();
596 #else // wxUSE_UNICODE_UTF8
597 if ( m_str )
598 {
599 m_wchar = m_str->wc_str();
600 return m_wchar.data();
601 }
602 if ( m_cstr )
603 {
604 m_wchar = m_cstr->AsWCharBuf();
605 return m_wchar.data();
606 }
607 #endif // wxUSE_UNICODE_WCHAR/UTF8
608
609 // the last case is that narrow string was passed in: in that case, we need
610 // to convert it:
611 wxASSERT( m_char );
612
613 m_wchar = wxConvLibc.cMB2WC(m_char.data());
614
615 return m_wchar.data();
616 }
617
618 const wchar_t* wxFormatString::AsWChar()
619 {
620 if ( !m_convertedWChar )
621 m_convertedWChar = wxPrintfFormatConverterWchar().Convert(InputAsWChar());
622
623 return m_convertedWChar.data();
624 }
625 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
626
627 wxString wxFormatString::InputAsString() const
628 {
629 if ( m_str )
630 return *m_str;
631 if ( m_cstr )
632 return m_cstr->AsString();
633 if ( m_wchar )
634 return wxString(m_wchar);
635 if ( m_char )
636 return wxString(m_char);
637
638 wxFAIL_MSG( "invalid wxFormatString - not initialized?" );
639 return wxString();
640 }
641
642 // ----------------------------------------------------------------------------
643 // wxFormatString::GetArgumentType()
644 // ----------------------------------------------------------------------------
645
646 namespace
647 {
648
649 template<typename CharType>
650 wxFormatString::ArgumentType DoGetArgumentType(const CharType *format,
651 unsigned n)
652 {
653 wxCHECK_MSG( format, wxFormatString::Arg_Unknown,
654 "empty format string not allowed here" );
655
656 wxPrintfConvSpecParser<CharType> parser(format);
657
658 wxCHECK_MSG( n <= parser.nargs, wxFormatString::Arg_Unknown,
659 "more arguments than format string specifiers?" );
660
661 wxCHECK_MSG( parser.pspec[n-1] != NULL, wxFormatString::Arg_Unknown,
662 "requested argument not found - invalid format string?" );
663
664 switch ( parser.pspec[n-1]->m_type )
665 {
666 case wxPAT_CHAR:
667 case wxPAT_WCHAR:
668 return wxFormatString::Arg_Char;
669
670 case wxPAT_PCHAR:
671 case wxPAT_PWCHAR:
672 return wxFormatString::Arg_String;
673
674 case wxPAT_INT:
675 return wxFormatString::Arg_Int;
676 case wxPAT_LONGINT:
677 return wxFormatString::Arg_LongInt;
678 #ifdef wxLongLong_t
679 case wxPAT_LONGLONGINT:
680 return wxFormatString::Arg_LongLongInt;
681 #endif
682 case wxPAT_SIZET:
683 return wxFormatString::Arg_Size_t;
684
685 case wxPAT_DOUBLE:
686 return wxFormatString::Arg_Double;
687 case wxPAT_LONGDOUBLE:
688 return wxFormatString::Arg_LongDouble;
689
690 case wxPAT_POINTER:
691 return wxFormatString::Arg_Pointer;
692
693 case wxPAT_NINT:
694 return wxFormatString::Arg_IntPtr;
695 case wxPAT_NSHORTINT:
696 return wxFormatString::Arg_ShortIntPtr;
697 case wxPAT_NLONGINT:
698 return wxFormatString::Arg_LongIntPtr;
699
700 case wxPAT_STAR:
701 // "*" requires argument of type int
702 return wxFormatString::Arg_Int;
703
704 case wxPAT_INVALID:
705 // (handled after the switch statement)
706 break;
707 }
708
709 // silence warning
710 wxFAIL_MSG( "unexpected argument type" );
711 return wxFormatString::Arg_Unknown;
712 }
713
714 } // anonymous namespace
715
716 wxFormatString::ArgumentType wxFormatString::GetArgumentType(unsigned n) const
717 {
718 if ( m_char )
719 return DoGetArgumentType(m_char.data(), n);
720 else if ( m_wchar )
721 return DoGetArgumentType(m_wchar.data(), n);
722 else if ( m_str )
723 return DoGetArgumentType(m_str->wx_str(), n);
724 else if ( m_cstr )
725 return DoGetArgumentType(m_cstr->AsInternal(), n);
726
727 wxFAIL_MSG( "unreachable code" );
728 return Arg_Unknown;
729 }