PCH-less compilation fix
[wxWidgets.git] / src / common / strvararg.cpp
1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/strvararg.cpp
3 // Purpose: macros for implementing type-safe vararg passing of strings
4 // Author: Vaclav Slavik
5 // Created: 2007-02-19
6 // RCS-ID: $Id$
7 // Copyright: (c) 2007 REA Elektronik GmbH
8 // Licence: wxWindows licence
9 ///////////////////////////////////////////////////////////////////////////////
10
11 // ============================================================================
12 // declarations
13 // ============================================================================
14
15 // ----------------------------------------------------------------------------
16 // headers
17 // ----------------------------------------------------------------------------
18
19 // for compilers that support precompilation, includes "wx.h".
20 #include "wx/wxprec.h"
21
22 #ifdef __BORLANDC__
23 #pragma hdrstop
24 #endif
25
26 #include "wx/strvararg.h"
27 #include "wx/string.h"
28 #include "wx/crt.h"
29
30 // ============================================================================
31 // implementation
32 // ============================================================================
33
34 // ----------------------------------------------------------------------------
35 // wxArgNormalizer<>
36 // ----------------------------------------------------------------------------
37
38 const wxStringCharType *wxArgNormalizerNative<const wxString&>::get() const
39 {
40 return m_value.wx_str();
41 }
42
43 const wxStringCharType *wxArgNormalizerNative<const wxCStrData&>::get() const
44 {
45 return m_value.AsInternal();
46 }
47
48 #if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
49 wxArgNormalizerWchar<const wxString&>::wxArgNormalizerWchar(const wxString& s)
50 : wxArgNormalizerWithBuffer<wchar_t>(s.wc_str())
51 {
52 }
53
54 wxArgNormalizerWchar<const wxCStrData&>::wxArgNormalizerWchar(const wxCStrData& s)
55 : wxArgNormalizerWithBuffer<wchar_t>(s.AsWCharBuf())
56 {
57 }
58 #endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY
59
60 // ----------------------------------------------------------------------------
61 // wxArgNormalizedString
62 // ----------------------------------------------------------------------------
63
64 wxString wxArgNormalizedString::GetString() const
65 {
66 if ( !IsValid() )
67 return wxEmptyString;
68
69 #if wxUSE_UTF8_LOCALE_ONLY
70 return wxString(wx_reinterpret_cast(const char*, m_ptr));
71 #else
72 #if wxUSE_UNICODE_UTF8
73 if ( wxLocaleIsUtf8 )
74 return wxString(wx_reinterpret_cast(const char*, m_ptr));
75 else
76 #endif
77 return wxString(wx_reinterpret_cast(const wxChar*, m_ptr));
78 #endif // !wxUSE_UTF8_LOCALE_ONLY
79 }
80
81 wxArgNormalizedString::operator wxString() const
82 {
83 return GetString();
84 }
85
86 // ----------------------------------------------------------------------------
87 // wxFormatConverter: class doing the "%s" and "%c" normalization
88 // ----------------------------------------------------------------------------
89
90 /*
91 There are four problems with wxPrintf() etc. format strings:
92
93 1) The printf vararg macros convert all forms of strings into
94 wxStringCharType* representation. This may make the format string
95 incorrect: for example, if %ls was used together with a wchar_t*
96 variadic argument, this would no longer work, because the templates
97 would change wchar_t* argument to wxStringCharType* and %ls would now
98 be incorrect in e.g. UTF-8 build. We need make sure only one specifier
99 form is used.
100
101 2) To complicate matters further, the meaning of %s and %c is different
102 under Windows and on Unix. The Windows/MS convention is as follows:
103
104 In ANSI mode:
105
106 format specifier results in
107 -----------------------------------
108 %s, %hs, %hS char*
109 %ls, %S, %lS wchar_t*
110
111 In Unicode mode:
112
113 format specifier results in
114 -----------------------------------
115 %hs, %S, %hS char*
116 %s, %ls, %lS wchar_t*
117
118 (While on POSIX systems we have %C identical to %lc and %c always means
119 char (in any mode) while %lc always means wchar_t.)
120
121 In other words, we should _only_ use %s on Windows and %ls on Unix for
122 wxUSE_UNICODE_WCHAR build.
123
124 3) To make things even worse, we need two forms in UTF-8 build: one for
125 passing strings to ANSI functions under UTF-8 locales (this one should
126 use %s) and one for widechar functions used under non-UTF-8 locales
127 (this one should use %ls).
128
129 And, of course, the same should be done for %c as well.
130
131 4) Finally, in UTF-8 build when calling ANSI printf() function, we need to
132 translate %c to %s, because not every Unicode character can be
133 represented by a char.
134
135
136 wxScanf() family of functions is simpler, because we don't normalize their
137 variadic arguments and we only have to handle 2) above and only for widechar
138 versions.
139 */
140
141 template<typename T>
142 class wxFormatConverterBase
143 {
144 public:
145 typedef T CharType;
146
147 wxFormatConverterBase()
148 {
149 m_fmtOrig = NULL;
150 m_fmtLast = NULL;
151 m_nCopied = 0;
152 }
153
154 wxCharTypeBuffer<CharType> Convert(const CharType *format)
155 {
156 // this is reset to NULL if we modify the format string
157 m_fmtOrig = format;
158
159 while ( *format )
160 {
161 if ( CopyFmtChar(*format++) == _T('%') )
162 {
163 // skip any flags
164 while ( IsFlagChar(*format) )
165 CopyFmtChar(*format++);
166
167 // and possible width
168 if ( *format == _T('*') )
169 CopyFmtChar(*format++);
170 else
171 SkipDigits(&format);
172
173 // precision?
174 if ( *format == _T('.') )
175 {
176 CopyFmtChar(*format++);
177 if ( *format == _T('*') )
178 CopyFmtChar(*format++);
179 else
180 SkipDigits(&format);
181 }
182
183 // next we can have a size modifier
184 SizeModifier size;
185
186 switch ( *format )
187 {
188 case 'h':
189 size = Size_Short;
190 format++;
191 break;
192
193 case 'l':
194 // "ll" has a different meaning!
195 if ( format[1] != 'l' )
196 {
197 size = Size_Long;
198 format++;
199 break;
200 }
201 //else: fall through
202
203 default:
204 size = Size_Default;
205 }
206
207 CharType outConv = *format;
208 SizeModifier outSize = size;
209
210 // and finally we should have the type
211 switch ( *format )
212 {
213 case _T('S'):
214 case _T('s'):
215 // all strings were converted into the same form by
216 // wxArgNormalizer<T>, this form depends on the context
217 // in which the value is used (scanf/printf/wprintf):
218 HandleString(*format, size, outConv, outSize);
219 break;
220
221 case _T('C'):
222 case _T('c'):
223 HandleChar(*format, size, outConv, outSize);
224 break;
225
226 default:
227 // nothing special to do
228 break;
229 }
230
231 if ( outConv == *format && outSize == size ) // no change
232 {
233 if ( size != Size_Default )
234 CopyFmtChar(*(format - 1));
235 CopyFmtChar(*format);
236 }
237 else // something changed
238 {
239 switch ( outSize )
240 {
241 case Size_Long:
242 InsertFmtChar(_T('l'));
243 break;
244
245 case Size_Short:
246 InsertFmtChar(_T('h'));
247 break;
248
249 case Size_Default:
250 // nothing to do
251 break;
252 }
253 InsertFmtChar(outConv);
254 }
255
256 format++;
257 }
258 }
259
260 // notice that we only translated the string if m_fmtOrig == NULL (as
261 // set by CopyAllBefore()), otherwise we should simply use the original
262 // format
263 if ( m_fmtOrig )
264 {
265 return wxCharTypeBuffer<CharType>::CreateNonOwned(m_fmtOrig);
266 }
267 else
268 {
269 // NULL-terminate converted format string:
270 *m_fmtLast = 0;
271 return m_fmt;
272 }
273 }
274
275 virtual ~wxFormatConverterBase() {}
276
277 protected:
278 enum SizeModifier
279 {
280 Size_Default,
281 Size_Short,
282 Size_Long
283 };
284
285 // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's'
286 // respectively), 'size' is the preceding size modifier; the new values of
287 // conversion and size specifiers must be written to outConv and outSize
288 virtual void HandleString(CharType conv, SizeModifier size,
289 CharType& outConv, SizeModifier& outSize) = 0;
290
291 // ditto for %C or %c
292 virtual void HandleChar(CharType conv, SizeModifier size,
293 CharType& outConv, SizeModifier& outSize) = 0;
294
295 private:
296 // copy another character to the translated format: this function does the
297 // copy if we are translating but doesn't do anything at all if we don't,
298 // so we don't create the translated format string at all unless we really
299 // need to (i.e. InsertFmtChar() is called)
300 CharType CopyFmtChar(CharType ch)
301 {
302 if ( !m_fmtOrig )
303 {
304 // we're translating, do copy
305 *(m_fmtLast++) = ch;
306 }
307 else
308 {
309 // simply increase the count which should be copied by
310 // CopyAllBefore() later if needed
311 m_nCopied++;
312 }
313
314 return ch;
315 }
316
317 // insert an extra character
318 void InsertFmtChar(CharType ch)
319 {
320 if ( m_fmtOrig )
321 {
322 // so far we haven't translated anything yet
323 CopyAllBefore();
324 }
325
326 *(m_fmtLast++) = ch;
327 }
328
329 void CopyAllBefore()
330 {
331 wxASSERT_MSG( m_fmtOrig && m_fmt.data() == NULL, "logic error" );
332
333 // the modified format string is guaranteed to be no longer than
334 // 3/2 of the original (worst case: the entire format string consists
335 // of "%s" repeated and is expanded to "%ls" on Unix), so we can
336 // allocate the buffer now and not worry about running out of space if
337 // we over-allocate a bit:
338 size_t fmtLen = wxStrlen(m_fmtOrig);
339 // worst case is of even length, so there's no rounding error in *3/2:
340 m_fmt.extend(fmtLen * 3 / 2);
341
342 if ( m_nCopied > 0 )
343 wxStrncpy(m_fmt.data(), m_fmtOrig, m_nCopied);
344 m_fmtLast = m_fmt.data() + m_nCopied;
345
346 // we won't need it any longer and resetting it also indicates that we
347 // modified the format
348 m_fmtOrig = NULL;
349 }
350
351 static bool IsFlagChar(CharType ch)
352 {
353 return ch == _T('-') || ch == _T('+') ||
354 ch == _T('0') || ch == _T(' ') || ch == _T('#');
355 }
356
357 void SkipDigits(const CharType **ptpc)
358 {
359 while ( **ptpc >= _T('0') && **ptpc <= _T('9') )
360 CopyFmtChar(*(*ptpc)++);
361 }
362
363 // the translated format
364 wxCharTypeBuffer<CharType> m_fmt;
365 CharType *m_fmtLast;
366
367 // the original format
368 const CharType *m_fmtOrig;
369
370 // the number of characters already copied (i.e. already parsed, but left
371 // unmodified)
372 size_t m_nCopied;
373 };
374
375
376
377 #ifdef __WINDOWS
378
379 // on Windows, we should use %s and %c regardless of the build:
380 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
381 {
382 virtual void HandleString(CharType WXUNUSED(conv),
383 SizeModifier WXUNUSED(size),
384 CharType& outConv, SizeModifier& outSize)
385 {
386 outConv = 's';
387 outSize = Size_Default;
388 }
389
390 virtual void HandleChar(CharType WXUNUSED(conv),
391 SizeModifier WXUNUSED(size),
392 CharType& outConv, SizeModifier& outSize)
393 {
394 outConv = 'c';
395 outSize = Size_Default;
396 }
397 };
398
399 #else // !__WINDOWS__
400
401 // on Unix, it's %s for ANSI functions and %ls for widechar:
402
403 #if !wxUSE_UTF8_LOCALE_ONLY
404 class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
405 {
406 virtual void HandleString(CharType WXUNUSED(conv),
407 SizeModifier WXUNUSED(size),
408 CharType& outConv, SizeModifier& outSize)
409 {
410 outConv = 's';
411 outSize = Size_Long;
412 }
413
414 virtual void HandleChar(CharType WXUNUSED(conv),
415 SizeModifier WXUNUSED(size),
416 CharType& outConv, SizeModifier& outSize)
417 {
418 outConv = 'c';
419 outSize = Size_Long;
420 }
421 };
422 #endif // !wxUSE_UTF8_LOCALE_ONLY
423
424 #if wxUSE_UNICODE_UTF8
425 class wxPrintfFormatConverterUtf8 : public wxFormatConverterBase<char>
426 {
427 virtual void HandleString(CharType WXUNUSED(conv),
428 SizeModifier WXUNUSED(size),
429 CharType& outConv, SizeModifier& outSize)
430 {
431 outConv = 's';
432 outSize = Size_Default;
433 }
434
435 virtual void HandleChar(CharType WXUNUSED(conv),
436 SizeModifier WXUNUSED(size),
437 CharType& outConv, SizeModifier& outSize)
438 {
439 // added complication: %c should be translated to %s in UTF-8 build
440 outConv = 's';
441 outSize = Size_Default;
442 }
443 };
444 #endif // wxUSE_UNICODE_UTF8
445
446 #endif // __WINDOWS__/!__WINDOWS__
447
448 #if !wxUSE_UNICODE // FIXME-UTF8: remove
449 class wxPrintfFormatConverterANSI : public wxFormatConverterBase<char>
450 {
451 virtual void HandleString(CharType WXUNUSED(conv),
452 SizeModifier WXUNUSED(size),
453 CharType& outConv, SizeModifier& outSize)
454 {
455 outConv = 's';
456 outSize = Size_Default;
457 }
458
459 virtual void HandleChar(CharType WXUNUSED(conv),
460 SizeModifier WXUNUSED(size),
461 CharType& outConv, SizeModifier& outSize)
462 {
463 outConv = 'c';
464 outSize = Size_Default;
465 }
466 };
467 #endif // ANSI
468
469 #ifndef __WINDOWS__
470 /*
471
472 wxScanf() format translation is different, we need to translate %s to %ls
473 and %c to %lc on Unix (but not Windows and for widechar functions only!).
474
475 So to use native functions in order to get our semantics we must do the
476 following translations in Unicode mode:
477
478 wxWidgets specifier POSIX specifier
479 ----------------------------------------
480
481 %hc, %C, %hC %c
482 %c %lc
483
484 */
485 class wxScanfFormatConverterWchar : public wxFormatConverterBase<wchar_t>
486 {
487 virtual void HandleString(CharType conv, SizeModifier size,
488 CharType& outConv, SizeModifier& outSize)
489 {
490 outConv = 's';
491 outSize = GetOutSize(conv == 'S', size);
492 }
493
494 virtual void HandleChar(CharType conv, SizeModifier size,
495 CharType& outConv, SizeModifier& outSize)
496 {
497 outConv = 'c';
498 outSize = GetOutSize(conv == 'C', size);
499 }
500
501 SizeModifier GetOutSize(bool convIsUpper, SizeModifier size)
502 {
503 // %S and %hS -> %s and %lS -> %ls
504 if ( convIsUpper )
505 {
506 if ( size == Size_Long )
507 return Size_Long;
508 else
509 return Size_Default;
510 }
511 else // %s or %c
512 {
513 if ( size == Size_Default )
514 return Size_Long;
515 else
516 return size;
517 }
518 }
519 };
520
521 const wxWCharBuffer wxScanfConvertFormatW(const wchar_t *format)
522 {
523 return wxScanfFormatConverterWchar().Convert(format);
524 }
525 #endif // !__WINDOWS__
526
527
528 // ----------------------------------------------------------------------------
529 // wxFormatString
530 // ----------------------------------------------------------------------------
531
532 #if !wxUSE_UNICODE_WCHAR
533 const char* wxFormatString::InputAsChar()
534 {
535 if ( m_char )
536 return m_char.data();
537
538 // in ANSI build, wx_str() returns char*, in UTF-8 build, this function
539 // is only called under UTF-8 locales, so we should return UTF-8 string,
540 // which is, again, what wx_str() returns:
541 if ( m_str )
542 return m_str->wx_str();
543
544 // ditto wxCStrData:
545 if ( m_cstr )
546 return m_cstr->AsInternal();
547
548 // the last case is that wide string was passed in: in that case, we need
549 // to convert it:
550 wxASSERT( m_wchar );
551
552 m_char = wxConvLibc.cWC2MB(m_wchar.data());
553
554 return m_char.data();
555 }
556
557 const char* wxFormatString::AsChar()
558 {
559 if ( !m_convertedChar )
560 #if !wxUSE_UNICODE // FIXME-UTF8: remove this
561 m_convertedChar = wxPrintfFormatConverterANSI().Convert(InputAsChar());
562 #else
563 m_convertedChar = wxPrintfFormatConverterUtf8().Convert(InputAsChar());
564 #endif
565
566 return m_convertedChar.data();
567 }
568 #endif // !wxUSE_UNICODE_WCHAR
569
570 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
571 const wchar_t* wxFormatString::InputAsWChar()
572 {
573 if ( m_wchar )
574 return m_wchar.data();
575
576 #if wxUSE_UNICODE_WCHAR
577 if ( m_str )
578 return m_str->wc_str();
579 if ( m_cstr )
580 return m_cstr->AsInternal();
581 #else // wxUSE_UNICODE_UTF8
582 if ( m_str )
583 {
584 m_wchar = m_str->wc_str();
585 return m_wchar.data();
586 }
587 if ( m_cstr )
588 {
589 m_wchar = m_cstr->AsWCharBuf();
590 return m_wchar.data();
591 }
592 #endif // wxUSE_UNICODE_WCHAR/UTF8
593
594 // the last case is that narrow string was passed in: in that case, we need
595 // to convert it:
596 wxASSERT( m_char );
597
598 m_wchar = wxConvLibc.cMB2WC(m_char.data());
599
600 return m_wchar.data();
601 }
602
603 const wchar_t* wxFormatString::AsWChar()
604 {
605 if ( !m_convertedWChar )
606 m_convertedWChar = wxPrintfFormatConverterWchar().Convert(InputAsWChar());
607
608 return m_convertedWChar.data();
609 }
610 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY