]> git.saurik.com Git - wxWidgets.git/blame - src/common/strvararg.cpp
Always link with expat in monolithic build.
[wxWidgets.git] / src / common / strvararg.cpp
CommitLineData
c9f78968
VS
1///////////////////////////////////////////////////////////////////////////////
2// Name: src/common/strvararg.cpp
3// Purpose: macros for implementing type-safe vararg passing of strings
4// Author: Vaclav Slavik
5// Created: 2007-02-19
6// RCS-ID: $Id$
7// Copyright: (c) 2007 REA Elektronik GmbH
8// Licence: wxWindows licence
9///////////////////////////////////////////////////////////////////////////////
10
11// ============================================================================
12// declarations
13// ============================================================================
14
15// ----------------------------------------------------------------------------
16// headers
17// ----------------------------------------------------------------------------
18
19// for compilers that support precompilation, includes "wx.h".
20#include "wx/wxprec.h"
21
22#ifdef __BORLANDC__
23 #pragma hdrstop
24#endif
25
26#include "wx/strvararg.h"
c9f78968 27#include "wx/string.h"
f6e38901 28#include "wx/crt.h"
47346406 29#include "wx/private/wxprintf.h"
c9f78968
VS
30
31// ============================================================================
32// implementation
33// ============================================================================
34
1528e0b8
VS
35// ----------------------------------------------------------------------------
36// wxArgNormalizer<>
37// ----------------------------------------------------------------------------
c9f78968 38
2523e9b7 39const wxStringCharType *wxArgNormalizerNative<const wxString&>::get() const
c9f78968 40{
8f93a29f 41 return m_value.wx_str();
c9f78968 42}
c9f78968 43
2523e9b7 44const wxStringCharType *wxArgNormalizerNative<const wxCStrData&>::get() const
c9f78968 45{
2523e9b7 46 return m_value.AsInternal();
c9f78968
VS
47}
48
111d9948 49#if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
47346406
VS
50wxArgNormalizerWchar<const wxString&>::wxArgNormalizerWchar(
51 const wxString& s,
52 const wxFormatString *fmt, unsigned index)
53 : wxArgNormalizerWithBuffer<wchar_t>(s.wc_str(), fmt, index)
c9f78968 54{
c9f78968
VS
55}
56
47346406
VS
57wxArgNormalizerWchar<const wxCStrData&>::wxArgNormalizerWchar(
58 const wxCStrData& s,
59 const wxFormatString *fmt, unsigned index)
60 : wxArgNormalizerWithBuffer<wchar_t>(s.AsWCharBuf(), fmt, index)
c9f78968 61{
81727065 62}
111d9948 63#endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
81727065 64
1528e0b8
VS
65// ----------------------------------------------------------------------------
66// wxArgNormalizedString
67// ----------------------------------------------------------------------------
68
2523e9b7 69wxString wxArgNormalizedString::GetString() const
359bd4d1 70{
2523e9b7
VS
71 if ( !IsValid() )
72 return wxEmptyString;
73
74#if wxUSE_UTF8_LOCALE_ONLY
5c33522f 75 return wxString(reinterpret_cast<const char*>(m_ptr));
2523e9b7
VS
76#else
77 #if wxUSE_UNICODE_UTF8
78 if ( wxLocaleIsUtf8 )
5c33522f 79 return wxString(reinterpret_cast<const char*>(m_ptr));
2523e9b7
VS
80 else
81 #endif
5c33522f 82 return wxString(reinterpret_cast<const wxChar*>(m_ptr));
2523e9b7 83#endif // !wxUSE_UTF8_LOCALE_ONLY
359bd4d1
VS
84}
85
2523e9b7 86wxArgNormalizedString::operator wxString() const
359bd4d1 87{
2523e9b7 88 return GetString();
359bd4d1 89}
1528e0b8 90
50e27899
VS
91// ----------------------------------------------------------------------------
92// wxFormatConverter: class doing the "%s" and "%c" normalization
93// ----------------------------------------------------------------------------
94
95/*
96 There are four problems with wxPrintf() etc. format strings:
97
98 1) The printf vararg macros convert all forms of strings into
99 wxStringCharType* representation. This may make the format string
100 incorrect: for example, if %ls was used together with a wchar_t*
101 variadic argument, this would no longer work, because the templates
102 would change wchar_t* argument to wxStringCharType* and %ls would now
103 be incorrect in e.g. UTF-8 build. We need make sure only one specifier
104 form is used.
105
106 2) To complicate matters further, the meaning of %s and %c is different
107 under Windows and on Unix. The Windows/MS convention is as follows:
108
109 In ANSI mode:
110
111 format specifier results in
112 -----------------------------------
113 %s, %hs, %hS char*
114 %ls, %S, %lS wchar_t*
115
116 In Unicode mode:
117
118 format specifier results in
119 -----------------------------------
120 %hs, %S, %hS char*
121 %s, %ls, %lS wchar_t*
122
123 (While on POSIX systems we have %C identical to %lc and %c always means
124 char (in any mode) while %lc always means wchar_t.)
125
126 In other words, we should _only_ use %s on Windows and %ls on Unix for
127 wxUSE_UNICODE_WCHAR build.
128
129 3) To make things even worse, we need two forms in UTF-8 build: one for
130 passing strings to ANSI functions under UTF-8 locales (this one should
131 use %s) and one for widechar functions used under non-UTF-8 locales
132 (this one should use %ls).
133
134 And, of course, the same should be done for %c as well.
135
50e27899
VS
136
137 wxScanf() family of functions is simpler, because we don't normalize their
138 variadic arguments and we only have to handle 2) above and only for widechar
139 versions.
140*/
141
142template<typename T>
143class wxFormatConverterBase
144{
145public:
146 typedef T CharType;
147
148 wxFormatConverterBase()
149 {
150 m_fmtOrig = NULL;
151 m_fmtLast = NULL;
152 m_nCopied = 0;
153 }
154
a6bb7a28 155 wxScopedCharTypeBuffer<CharType> Convert(const CharType *format)
50e27899
VS
156 {
157 // this is reset to NULL if we modify the format string
158 m_fmtOrig = format;
159
160 while ( *format )
161 {
9a83f860 162 if ( CopyFmtChar(*format++) == wxT('%') )
50e27899
VS
163 {
164 // skip any flags
165 while ( IsFlagChar(*format) )
166 CopyFmtChar(*format++);
167
168 // and possible width
9a83f860 169 if ( *format == wxT('*') )
50e27899
VS
170 CopyFmtChar(*format++);
171 else
172 SkipDigits(&format);
173
174 // precision?
9a83f860 175 if ( *format == wxT('.') )
50e27899
VS
176 {
177 CopyFmtChar(*format++);
9a83f860 178 if ( *format == wxT('*') )
50e27899
VS
179 CopyFmtChar(*format++);
180 else
181 SkipDigits(&format);
182 }
183
184 // next we can have a size modifier
185 SizeModifier size;
186
187 switch ( *format )
188 {
189 case 'h':
190 size = Size_Short;
191 format++;
192 break;
193
194 case 'l':
195 // "ll" has a different meaning!
196 if ( format[1] != 'l' )
197 {
198 size = Size_Long;
199 format++;
200 break;
201 }
202 //else: fall through
203
204 default:
205 size = Size_Default;
206 }
207
208 CharType outConv = *format;
209 SizeModifier outSize = size;
210
211 // and finally we should have the type
212 switch ( *format )
213 {
9a83f860
VZ
214 case wxT('S'):
215 case wxT('s'):
50e27899
VS
216 // all strings were converted into the same form by
217 // wxArgNormalizer<T>, this form depends on the context
218 // in which the value is used (scanf/printf/wprintf):
219 HandleString(*format, size, outConv, outSize);
220 break;
221
9a83f860
VZ
222 case wxT('C'):
223 case wxT('c'):
50e27899
VS
224 HandleChar(*format, size, outConv, outSize);
225 break;
226
227 default:
228 // nothing special to do
229 break;
230 }
231
232 if ( outConv == *format && outSize == size ) // no change
233 {
234 if ( size != Size_Default )
235 CopyFmtChar(*(format - 1));
236 CopyFmtChar(*format);
237 }
238 else // something changed
239 {
240 switch ( outSize )
241 {
242 case Size_Long:
9a83f860 243 InsertFmtChar(wxT('l'));
50e27899
VS
244 break;
245
246 case Size_Short:
9a83f860 247 InsertFmtChar(wxT('h'));
50e27899
VS
248 break;
249
250 case Size_Default:
251 // nothing to do
252 break;
253 }
254 InsertFmtChar(outConv);
255 }
256
257 format++;
258 }
259 }
260
261 // notice that we only translated the string if m_fmtOrig == NULL (as
262 // set by CopyAllBefore()), otherwise we should simply use the original
263 // format
264 if ( m_fmtOrig )
265 {
a6bb7a28 266 return wxScopedCharTypeBuffer<CharType>::CreateNonOwned(m_fmtOrig);
50e27899
VS
267 }
268 else
269 {
a6bb7a28
VS
270 // shrink converted format string to actual size (instead of
271 // over-sized allocation from CopyAllBefore()) and NUL-terminate
272 // it:
273 m_fmt.shrink(m_fmtLast - m_fmt.data());
50e27899
VS
274 return m_fmt;
275 }
276 }
277
278 virtual ~wxFormatConverterBase() {}
279
280protected:
281 enum SizeModifier
282 {
283 Size_Default,
284 Size_Short,
285 Size_Long
286 };
287
288 // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's'
289 // respectively), 'size' is the preceding size modifier; the new values of
290 // conversion and size specifiers must be written to outConv and outSize
291 virtual void HandleString(CharType conv, SizeModifier size,
292 CharType& outConv, SizeModifier& outSize) = 0;
293
294 // ditto for %C or %c
295 virtual void HandleChar(CharType conv, SizeModifier size,
296 CharType& outConv, SizeModifier& outSize) = 0;
297
298private:
299 // copy another character to the translated format: this function does the
300 // copy if we are translating but doesn't do anything at all if we don't,
301 // so we don't create the translated format string at all unless we really
302 // need to (i.e. InsertFmtChar() is called)
303 CharType CopyFmtChar(CharType ch)
304 {
305 if ( !m_fmtOrig )
306 {
307 // we're translating, do copy
308 *(m_fmtLast++) = ch;
309 }
310 else
311 {
312 // simply increase the count which should be copied by
313 // CopyAllBefore() later if needed
314 m_nCopied++;
315 }
316
317 return ch;
318 }
319
320 // insert an extra character
321 void InsertFmtChar(CharType ch)
322 {
323 if ( m_fmtOrig )
324 {
325 // so far we haven't translated anything yet
326 CopyAllBefore();
327 }
328
329 *(m_fmtLast++) = ch;
330 }
331
332 void CopyAllBefore()
333 {
334 wxASSERT_MSG( m_fmtOrig && m_fmt.data() == NULL, "logic error" );
335
336 // the modified format string is guaranteed to be no longer than
337 // 3/2 of the original (worst case: the entire format string consists
338 // of "%s" repeated and is expanded to "%ls" on Unix), so we can
339 // allocate the buffer now and not worry about running out of space if
340 // we over-allocate a bit:
341 size_t fmtLen = wxStrlen(m_fmtOrig);
342 // worst case is of even length, so there's no rounding error in *3/2:
343 m_fmt.extend(fmtLen * 3 / 2);
344
345 if ( m_nCopied > 0 )
346 wxStrncpy(m_fmt.data(), m_fmtOrig, m_nCopied);
347 m_fmtLast = m_fmt.data() + m_nCopied;
348
349 // we won't need it any longer and resetting it also indicates that we
350 // modified the format
351 m_fmtOrig = NULL;
352 }
353
354 static bool IsFlagChar(CharType ch)
355 {
9a83f860
VZ
356 return ch == wxT('-') || ch == wxT('+') ||
357 ch == wxT('0') || ch == wxT(' ') || ch == wxT('#');
50e27899
VS
358 }
359
360 void SkipDigits(const CharType **ptpc)
361 {
9a83f860 362 while ( **ptpc >= wxT('0') && **ptpc <= wxT('9') )
50e27899
VS
363 CopyFmtChar(*(*ptpc)++);
364 }
365
366 // the translated format
367 wxCharTypeBuffer<CharType> m_fmt;
368 CharType *m_fmtLast;
369
370 // the original format
371 const CharType *m_fmtOrig;
372
373 // the number of characters already copied (i.e. already parsed, but left
374 // unmodified)
375 size_t m_nCopied;
376};
377
715e4f7e 378#if defined(__WINDOWS__) && !defined(__CYGWIN__)
50e27899
VS
379
380// on Windows, we should use %s and %c regardless of the build:
381class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
382{
383 virtual void HandleString(CharType WXUNUSED(conv),
384 SizeModifier WXUNUSED(size),
385 CharType& outConv, SizeModifier& outSize)
386 {
387 outConv = 's';
388 outSize = Size_Default;
389 }
390
391 virtual void HandleChar(CharType WXUNUSED(conv),
392 SizeModifier WXUNUSED(size),
393 CharType& outConv, SizeModifier& outSize)
394 {
395 outConv = 'c';
396 outSize = Size_Default;
397 }
398};
399
400#else // !__WINDOWS__
401
402// on Unix, it's %s for ANSI functions and %ls for widechar:
403
404#if !wxUSE_UTF8_LOCALE_ONLY
405class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
406{
407 virtual void HandleString(CharType WXUNUSED(conv),
408 SizeModifier WXUNUSED(size),
409 CharType& outConv, SizeModifier& outSize)
410 {
411 outConv = 's';
412 outSize = Size_Long;
413 }
414
415 virtual void HandleChar(CharType WXUNUSED(conv),
416 SizeModifier WXUNUSED(size),
417 CharType& outConv, SizeModifier& outSize)
418 {
419 outConv = 'c';
420 outSize = Size_Long;
421 }
422};
423#endif // !wxUSE_UTF8_LOCALE_ONLY
424
425#if wxUSE_UNICODE_UTF8
426class wxPrintfFormatConverterUtf8 : public wxFormatConverterBase<char>
427{
428 virtual void HandleString(CharType WXUNUSED(conv),
429 SizeModifier WXUNUSED(size),
430 CharType& outConv, SizeModifier& outSize)
431 {
432 outConv = 's';
433 outSize = Size_Default;
434 }
435
436 virtual void HandleChar(CharType WXUNUSED(conv),
437 SizeModifier WXUNUSED(size),
438 CharType& outConv, SizeModifier& outSize)
439 {
47346406
VS
440 // chars are represented using wchar_t in both builds, so this is
441 // the same as above
442 outConv = 'c';
443 outSize = Size_Long;
50e27899
VS
444 }
445};
446#endif // wxUSE_UNICODE_UTF8
447
448#endif // __WINDOWS__/!__WINDOWS__
449
450#if !wxUSE_UNICODE // FIXME-UTF8: remove
451class wxPrintfFormatConverterANSI : public wxFormatConverterBase<char>
452{
453 virtual void HandleString(CharType WXUNUSED(conv),
454 SizeModifier WXUNUSED(size),
455 CharType& outConv, SizeModifier& outSize)
456 {
457 outConv = 's';
458 outSize = Size_Default;
459 }
460
461 virtual void HandleChar(CharType WXUNUSED(conv),
462 SizeModifier WXUNUSED(size),
463 CharType& outConv, SizeModifier& outSize)
464 {
465 outConv = 'c';
466 outSize = Size_Default;
467 }
468};
469#endif // ANSI
470
471#ifndef __WINDOWS__
472/*
473
474 wxScanf() format translation is different, we need to translate %s to %ls
475 and %c to %lc on Unix (but not Windows and for widechar functions only!).
476
477 So to use native functions in order to get our semantics we must do the
478 following translations in Unicode mode:
479
480 wxWidgets specifier POSIX specifier
481 ----------------------------------------
482
483 %hc, %C, %hC %c
484 %c %lc
485
486 */
487class wxScanfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
488{
489 virtual void HandleString(CharType conv, SizeModifier size,
490 CharType& outConv, SizeModifier& outSize)
491 {
492 outConv = 's';
493 outSize = GetOutSize(conv == 'S', size);
494 }
495
496 virtual void HandleChar(CharType conv, SizeModifier size,
497 CharType& outConv, SizeModifier& outSize)
498 {
499 outConv = 'c';
500 outSize = GetOutSize(conv == 'C', size);
501 }
502
503 SizeModifier GetOutSize(bool convIsUpper, SizeModifier size)
504 {
505 // %S and %hS -> %s and %lS -> %ls
506 if ( convIsUpper )
507 {
508 if ( size == Size_Long )
509 return Size_Long;
510 else
511 return Size_Default;
512 }
513 else // %s or %c
514 {
515 if ( size == Size_Default )
516 return Size_Long;
517 else
518 return size;
519 }
520 }
521};
522
de4983f3 523const wxScopedWCharBuffer wxScanfConvertFormatW(const wchar_t *format)
50e27899
VS
524{
525 return wxScanfFormatConverterWchar().Convert(format);
526}
527#endif // !__WINDOWS__
528
529
1528e0b8
VS
530// ----------------------------------------------------------------------------
531// wxFormatString
532// ----------------------------------------------------------------------------
533
534#if !wxUSE_UNICODE_WCHAR
50e27899 535const char* wxFormatString::InputAsChar()
1528e0b8
VS
536{
537 if ( m_char )
538 return m_char.data();
539
540 // in ANSI build, wx_str() returns char*, in UTF-8 build, this function
541 // is only called under UTF-8 locales, so we should return UTF-8 string,
542 // which is, again, what wx_str() returns:
543 if ( m_str )
544 return m_str->wx_str();
545
546 // ditto wxCStrData:
547 if ( m_cstr )
548 return m_cstr->AsInternal();
549
550 // the last case is that wide string was passed in: in that case, we need
551 // to convert it:
552 wxASSERT( m_wchar );
553
554 m_char = wxConvLibc.cWC2MB(m_wchar.data());
555
556 return m_char.data();
557}
50e27899
VS
558
559const char* wxFormatString::AsChar()
560{
561 if ( !m_convertedChar )
562#if !wxUSE_UNICODE // FIXME-UTF8: remove this
563 m_convertedChar = wxPrintfFormatConverterANSI().Convert(InputAsChar());
564#else
565 m_convertedChar = wxPrintfFormatConverterUtf8().Convert(InputAsChar());
566#endif
567
568 return m_convertedChar.data();
569}
1528e0b8
VS
570#endif // !wxUSE_UNICODE_WCHAR
571
572#if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
50e27899 573const wchar_t* wxFormatString::InputAsWChar()
1528e0b8
VS
574{
575 if ( m_wchar )
576 return m_wchar.data();
577
578#if wxUSE_UNICODE_WCHAR
579 if ( m_str )
580 return m_str->wc_str();
581 if ( m_cstr )
582 return m_cstr->AsInternal();
583#else // wxUSE_UNICODE_UTF8
584 if ( m_str )
585 {
586 m_wchar = m_str->wc_str();
587 return m_wchar.data();
588 }
589 if ( m_cstr )
590 {
591 m_wchar = m_cstr->AsWCharBuf();
592 return m_wchar.data();
593 }
594#endif // wxUSE_UNICODE_WCHAR/UTF8
595
596 // the last case is that narrow string was passed in: in that case, we need
597 // to convert it:
598 wxASSERT( m_char );
599
600 m_wchar = wxConvLibc.cMB2WC(m_char.data());
601
602 return m_wchar.data();
603}
50e27899
VS
604
605const wchar_t* wxFormatString::AsWChar()
606{
607 if ( !m_convertedWChar )
608 m_convertedWChar = wxPrintfFormatConverterWchar().Convert(InputAsWChar());
609
610 return m_convertedWChar.data();
611}
1528e0b8 612#endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
47346406 613
e68a8744
VS
614wxString wxFormatString::InputAsString() const
615{
616 if ( m_str )
617 return *m_str;
618 if ( m_cstr )
619 return m_cstr->AsString();
620 if ( m_wchar )
621 return wxString(m_wchar);
622 if ( m_char )
623 return wxString(m_char);
624
625 wxFAIL_MSG( "invalid wxFormatString - not initialized?" );
626 return wxString();
627}
628
47346406
VS
629// ----------------------------------------------------------------------------
630// wxFormatString::GetArgumentType()
631// ----------------------------------------------------------------------------
632
633namespace
634{
635
636template<typename CharType>
637wxFormatString::ArgumentType DoGetArgumentType(const CharType *format,
638 unsigned n)
639{
0f895b0c 640 wxCHECK_MSG( format, wxFormatString::Arg_Unknown,
47346406
VS
641 "empty format string not allowed here" );
642
643 wxPrintfConvSpecParser<CharType> parser(format);
644
bb2ce935
VS
645 wxCHECK_MSG( n <= parser.nargs, wxFormatString::Arg_Unknown,
646 "more arguments than format string specifiers?" );
647
0f895b0c 648 wxCHECK_MSG( parser.pspec[n-1] != NULL, wxFormatString::Arg_Unknown,
47346406
VS
649 "requested argument not found - invalid format string?" );
650
651 switch ( parser.pspec[n-1]->m_type )
652 {
653 case wxPAT_CHAR:
654 case wxPAT_WCHAR:
655 return wxFormatString::Arg_Char;
656
0f895b0c
VS
657 case wxPAT_PCHAR:
658 case wxPAT_PWCHAR:
659 return wxFormatString::Arg_String;
660
661 case wxPAT_INT:
662 return wxFormatString::Arg_Int;
663 case wxPAT_LONGINT:
664 return wxFormatString::Arg_LongInt;
665#ifdef wxLongLong_t
666 case wxPAT_LONGLONGINT:
667 return wxFormatString::Arg_LongLongInt;
668#endif
669 case wxPAT_SIZET:
670 return wxFormatString::Arg_Size_t;
671
672 case wxPAT_DOUBLE:
673 return wxFormatString::Arg_Double;
674 case wxPAT_LONGDOUBLE:
675 return wxFormatString::Arg_LongDouble;
676
677 case wxPAT_POINTER:
678 return wxFormatString::Arg_Pointer;
679
680 case wxPAT_NINT:
681 return wxFormatString::Arg_IntPtr;
682 case wxPAT_NSHORTINT:
683 return wxFormatString::Arg_ShortIntPtr;
684 case wxPAT_NLONGINT:
685 return wxFormatString::Arg_LongIntPtr;
686
687 case wxPAT_STAR:
688 // "*" requires argument of type int
689 return wxFormatString::Arg_Int;
690
691 case wxPAT_INVALID:
692 // (handled after the switch statement)
693 break;
47346406 694 }
0f895b0c
VS
695
696 // silence warning
697 wxFAIL_MSG( "unexpected argument type" );
698 return wxFormatString::Arg_Unknown;
47346406
VS
699}
700
701} // anonymous namespace
702
703wxFormatString::ArgumentType wxFormatString::GetArgumentType(unsigned n) const
704{
705 if ( m_char )
706 return DoGetArgumentType(m_char.data(), n);
707 else if ( m_wchar )
708 return DoGetArgumentType(m_wchar.data(), n);
709 else if ( m_str )
710 return DoGetArgumentType(m_str->wx_str(), n);
711 else if ( m_cstr )
712 return DoGetArgumentType(m_cstr->AsInternal(), n);
713
714 wxFAIL_MSG( "unreachable code" );
0f895b0c 715 return Arg_Unknown;
47346406 716}