]>
Commit | Line | Data |
---|---|---|
1 | /////////////////////////////////////////////////////////////////////////////// | |
2 | // Name: src/common/strvararg.cpp | |
3 | // Purpose: macros for implementing type-safe vararg passing of strings | |
4 | // Author: Vaclav Slavik | |
5 | // Created: 2007-02-19 | |
6 | // RCS-ID: $Id$ | |
7 | // Copyright: (c) 2007 REA Elektronik GmbH | |
8 | // Licence: wxWindows licence | |
9 | /////////////////////////////////////////////////////////////////////////////// | |
10 | ||
11 | // ============================================================================ | |
12 | // declarations | |
13 | // ============================================================================ | |
14 | ||
15 | // ---------------------------------------------------------------------------- | |
16 | // headers | |
17 | // ---------------------------------------------------------------------------- | |
18 | ||
19 | // for compilers that support precompilation, includes "wx.h". | |
20 | #include "wx/wxprec.h" | |
21 | ||
22 | #ifdef __BORLANDC__ | |
23 | #pragma hdrstop | |
24 | #endif | |
25 | ||
26 | #include "wx/strvararg.h" | |
27 | #include "wx/string.h" | |
28 | #include "wx/crt.h" | |
29 | ||
30 | // ============================================================================ | |
31 | // implementation | |
32 | // ============================================================================ | |
33 | ||
34 | // ---------------------------------------------------------------------------- | |
35 | // wxArgNormalizer<> | |
36 | // ---------------------------------------------------------------------------- | |
37 | ||
38 | const wxStringCharType *wxArgNormalizerNative<const wxString&>::get() const | |
39 | { | |
40 | return m_value.wx_str(); | |
41 | } | |
42 | ||
43 | const wxStringCharType *wxArgNormalizerNative<const wxCStrData&>::get() const | |
44 | { | |
45 | return m_value.AsInternal(); | |
46 | } | |
47 | ||
48 | #if wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY | |
49 | wxArgNormalizerWchar<const wxString&>::wxArgNormalizerWchar(const wxString& s) | |
50 | : wxArgNormalizerWithBuffer<wchar_t>(s.wc_str()) | |
51 | { | |
52 | } | |
53 | ||
54 | wxArgNormalizerWchar<const wxCStrData&>::wxArgNormalizerWchar(const wxCStrData& s) | |
55 | : wxArgNormalizerWithBuffer<wchar_t>(s.AsWCharBuf()) | |
56 | { | |
57 | } | |
58 | #endif // wxUSE_UNICODE_UTF8 && !wxUSE_UTF8_LOCALE_ONLY | |
59 | ||
60 | // ---------------------------------------------------------------------------- | |
61 | // wxArgNormalizedString | |
62 | // ---------------------------------------------------------------------------- | |
63 | ||
64 | wxString wxArgNormalizedString::GetString() const | |
65 | { | |
66 | if ( !IsValid() ) | |
67 | return wxEmptyString; | |
68 | ||
69 | #if wxUSE_UTF8_LOCALE_ONLY | |
70 | return wxString(wx_reinterpret_cast(const char*, m_ptr)); | |
71 | #else | |
72 | #if wxUSE_UNICODE_UTF8 | |
73 | if ( wxLocaleIsUtf8 ) | |
74 | return wxString(wx_reinterpret_cast(const char*, m_ptr)); | |
75 | else | |
76 | #endif | |
77 | return wxString(wx_reinterpret_cast(const wxChar*, m_ptr)); | |
78 | #endif // !wxUSE_UTF8_LOCALE_ONLY | |
79 | } | |
80 | ||
81 | wxArgNormalizedString::operator wxString() const | |
82 | { | |
83 | return GetString(); | |
84 | } | |
85 | ||
86 | // ---------------------------------------------------------------------------- | |
87 | // wxFormatConverter: class doing the "%s" and "%c" normalization | |
88 | // ---------------------------------------------------------------------------- | |
89 | ||
90 | /* | |
91 | There are four problems with wxPrintf() etc. format strings: | |
92 | ||
93 | 1) The printf vararg macros convert all forms of strings into | |
94 | wxStringCharType* representation. This may make the format string | |
95 | incorrect: for example, if %ls was used together with a wchar_t* | |
96 | variadic argument, this would no longer work, because the templates | |
97 | would change wchar_t* argument to wxStringCharType* and %ls would now | |
98 | be incorrect in e.g. UTF-8 build. We need make sure only one specifier | |
99 | form is used. | |
100 | ||
101 | 2) To complicate matters further, the meaning of %s and %c is different | |
102 | under Windows and on Unix. The Windows/MS convention is as follows: | |
103 | ||
104 | In ANSI mode: | |
105 | ||
106 | format specifier results in | |
107 | ----------------------------------- | |
108 | %s, %hs, %hS char* | |
109 | %ls, %S, %lS wchar_t* | |
110 | ||
111 | In Unicode mode: | |
112 | ||
113 | format specifier results in | |
114 | ----------------------------------- | |
115 | %hs, %S, %hS char* | |
116 | %s, %ls, %lS wchar_t* | |
117 | ||
118 | (While on POSIX systems we have %C identical to %lc and %c always means | |
119 | char (in any mode) while %lc always means wchar_t.) | |
120 | ||
121 | In other words, we should _only_ use %s on Windows and %ls on Unix for | |
122 | wxUSE_UNICODE_WCHAR build. | |
123 | ||
124 | 3) To make things even worse, we need two forms in UTF-8 build: one for | |
125 | passing strings to ANSI functions under UTF-8 locales (this one should | |
126 | use %s) and one for widechar functions used under non-UTF-8 locales | |
127 | (this one should use %ls). | |
128 | ||
129 | And, of course, the same should be done for %c as well. | |
130 | ||
131 | 4) Finally, in UTF-8 build when calling ANSI printf() function, we need to | |
132 | translate %c to %s, because not every Unicode character can be | |
133 | represented by a char. | |
134 | ||
135 | ||
136 | wxScanf() family of functions is simpler, because we don't normalize their | |
137 | variadic arguments and we only have to handle 2) above and only for widechar | |
138 | versions. | |
139 | */ | |
140 | ||
141 | template<typename T> | |
142 | class wxFormatConverterBase | |
143 | { | |
144 | public: | |
145 | typedef T CharType; | |
146 | ||
147 | wxFormatConverterBase() | |
148 | { | |
149 | m_fmtOrig = NULL; | |
150 | m_fmtLast = NULL; | |
151 | m_nCopied = 0; | |
152 | } | |
153 | ||
154 | wxCharTypeBuffer<CharType> Convert(const CharType *format) | |
155 | { | |
156 | // this is reset to NULL if we modify the format string | |
157 | m_fmtOrig = format; | |
158 | ||
159 | while ( *format ) | |
160 | { | |
161 | if ( CopyFmtChar(*format++) == _T('%') ) | |
162 | { | |
163 | // skip any flags | |
164 | while ( IsFlagChar(*format) ) | |
165 | CopyFmtChar(*format++); | |
166 | ||
167 | // and possible width | |
168 | if ( *format == _T('*') ) | |
169 | CopyFmtChar(*format++); | |
170 | else | |
171 | SkipDigits(&format); | |
172 | ||
173 | // precision? | |
174 | if ( *format == _T('.') ) | |
175 | { | |
176 | CopyFmtChar(*format++); | |
177 | if ( *format == _T('*') ) | |
178 | CopyFmtChar(*format++); | |
179 | else | |
180 | SkipDigits(&format); | |
181 | } | |
182 | ||
183 | // next we can have a size modifier | |
184 | SizeModifier size; | |
185 | ||
186 | switch ( *format ) | |
187 | { | |
188 | case 'h': | |
189 | size = Size_Short; | |
190 | format++; | |
191 | break; | |
192 | ||
193 | case 'l': | |
194 | // "ll" has a different meaning! | |
195 | if ( format[1] != 'l' ) | |
196 | { | |
197 | size = Size_Long; | |
198 | format++; | |
199 | break; | |
200 | } | |
201 | //else: fall through | |
202 | ||
203 | default: | |
204 | size = Size_Default; | |
205 | } | |
206 | ||
207 | CharType outConv = *format; | |
208 | SizeModifier outSize = size; | |
209 | ||
210 | // and finally we should have the type | |
211 | switch ( *format ) | |
212 | { | |
213 | case _T('S'): | |
214 | case _T('s'): | |
215 | // all strings were converted into the same form by | |
216 | // wxArgNormalizer<T>, this form depends on the context | |
217 | // in which the value is used (scanf/printf/wprintf): | |
218 | HandleString(*format, size, outConv, outSize); | |
219 | break; | |
220 | ||
221 | case _T('C'): | |
222 | case _T('c'): | |
223 | HandleChar(*format, size, outConv, outSize); | |
224 | break; | |
225 | ||
226 | default: | |
227 | // nothing special to do | |
228 | break; | |
229 | } | |
230 | ||
231 | if ( outConv == *format && outSize == size ) // no change | |
232 | { | |
233 | if ( size != Size_Default ) | |
234 | CopyFmtChar(*(format - 1)); | |
235 | CopyFmtChar(*format); | |
236 | } | |
237 | else // something changed | |
238 | { | |
239 | switch ( outSize ) | |
240 | { | |
241 | case Size_Long: | |
242 | InsertFmtChar(_T('l')); | |
243 | break; | |
244 | ||
245 | case Size_Short: | |
246 | InsertFmtChar(_T('h')); | |
247 | break; | |
248 | ||
249 | case Size_Default: | |
250 | // nothing to do | |
251 | break; | |
252 | } | |
253 | InsertFmtChar(outConv); | |
254 | } | |
255 | ||
256 | format++; | |
257 | } | |
258 | } | |
259 | ||
260 | // notice that we only translated the string if m_fmtOrig == NULL (as | |
261 | // set by CopyAllBefore()), otherwise we should simply use the original | |
262 | // format | |
263 | if ( m_fmtOrig ) | |
264 | { | |
265 | return wxCharTypeBuffer<CharType>::CreateNonOwned(m_fmtOrig); | |
266 | } | |
267 | else | |
268 | { | |
269 | // NULL-terminate converted format string: | |
270 | *m_fmtLast = 0; | |
271 | return m_fmt; | |
272 | } | |
273 | } | |
274 | ||
275 | virtual ~wxFormatConverterBase() {} | |
276 | ||
277 | protected: | |
278 | enum SizeModifier | |
279 | { | |
280 | Size_Default, | |
281 | Size_Short, | |
282 | Size_Long | |
283 | }; | |
284 | ||
285 | // called to handle %S or %s; 'conv' is conversion specifier ('S' or 's' | |
286 | // respectively), 'size' is the preceding size modifier; the new values of | |
287 | // conversion and size specifiers must be written to outConv and outSize | |
288 | virtual void HandleString(CharType conv, SizeModifier size, | |
289 | CharType& outConv, SizeModifier& outSize) = 0; | |
290 | ||
291 | // ditto for %C or %c | |
292 | virtual void HandleChar(CharType conv, SizeModifier size, | |
293 | CharType& outConv, SizeModifier& outSize) = 0; | |
294 | ||
295 | private: | |
296 | // copy another character to the translated format: this function does the | |
297 | // copy if we are translating but doesn't do anything at all if we don't, | |
298 | // so we don't create the translated format string at all unless we really | |
299 | // need to (i.e. InsertFmtChar() is called) | |
300 | CharType CopyFmtChar(CharType ch) | |
301 | { | |
302 | if ( !m_fmtOrig ) | |
303 | { | |
304 | // we're translating, do copy | |
305 | *(m_fmtLast++) = ch; | |
306 | } | |
307 | else | |
308 | { | |
309 | // simply increase the count which should be copied by | |
310 | // CopyAllBefore() later if needed | |
311 | m_nCopied++; | |
312 | } | |
313 | ||
314 | return ch; | |
315 | } | |
316 | ||
317 | // insert an extra character | |
318 | void InsertFmtChar(CharType ch) | |
319 | { | |
320 | if ( m_fmtOrig ) | |
321 | { | |
322 | // so far we haven't translated anything yet | |
323 | CopyAllBefore(); | |
324 | } | |
325 | ||
326 | *(m_fmtLast++) = ch; | |
327 | } | |
328 | ||
329 | void CopyAllBefore() | |
330 | { | |
331 | wxASSERT_MSG( m_fmtOrig && m_fmt.data() == NULL, "logic error" ); | |
332 | ||
333 | // the modified format string is guaranteed to be no longer than | |
334 | // 3/2 of the original (worst case: the entire format string consists | |
335 | // of "%s" repeated and is expanded to "%ls" on Unix), so we can | |
336 | // allocate the buffer now and not worry about running out of space if | |
337 | // we over-allocate a bit: | |
338 | size_t fmtLen = wxStrlen(m_fmtOrig); | |
339 | // worst case is of even length, so there's no rounding error in *3/2: | |
340 | m_fmt.extend(fmtLen * 3 / 2); | |
341 | ||
342 | if ( m_nCopied > 0 ) | |
343 | wxStrncpy(m_fmt.data(), m_fmtOrig, m_nCopied); | |
344 | m_fmtLast = m_fmt.data() + m_nCopied; | |
345 | ||
346 | // we won't need it any longer and resetting it also indicates that we | |
347 | // modified the format | |
348 | m_fmtOrig = NULL; | |
349 | } | |
350 | ||
351 | static bool IsFlagChar(CharType ch) | |
352 | { | |
353 | return ch == _T('-') || ch == _T('+') || | |
354 | ch == _T('0') || ch == _T(' ') || ch == _T('#'); | |
355 | } | |
356 | ||
357 | void SkipDigits(const CharType **ptpc) | |
358 | { | |
359 | while ( **ptpc >= _T('0') && **ptpc <= _T('9') ) | |
360 | CopyFmtChar(*(*ptpc)++); | |
361 | } | |
362 | ||
363 | // the translated format | |
364 | wxCharTypeBuffer<CharType> m_fmt; | |
365 | CharType *m_fmtLast; | |
366 | ||
367 | // the original format | |
368 | const CharType *m_fmtOrig; | |
369 | ||
370 | // the number of characters already copied (i.e. already parsed, but left | |
371 | // unmodified) | |
372 | size_t m_nCopied; | |
373 | }; | |
374 | ||
375 | ||
376 | ||
377 | #ifdef __WINDOWS | |
378 | ||
379 | // on Windows, we should use %s and %c regardless of the build: | |
380 | class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t> | |
381 | { | |
382 | virtual void HandleString(CharType WXUNUSED(conv), | |
383 | SizeModifier WXUNUSED(size), | |
384 | CharType& outConv, SizeModifier& outSize) | |
385 | { | |
386 | outConv = 's'; | |
387 | outSize = Size_Default; | |
388 | } | |
389 | ||
390 | virtual void HandleChar(CharType WXUNUSED(conv), | |
391 | SizeModifier WXUNUSED(size), | |
392 | CharType& outConv, SizeModifier& outSize) | |
393 | { | |
394 | outConv = 'c'; | |
395 | outSize = Size_Default; | |
396 | } | |
397 | }; | |
398 | ||
399 | #else // !__WINDOWS__ | |
400 | ||
401 | // on Unix, it's %s for ANSI functions and %ls for widechar: | |
402 | ||
403 | #if !wxUSE_UTF8_LOCALE_ONLY | |
404 | class wxPrintfFormatConverterWchar : public wxFormatConverterBase<wchar_t> | |
405 | { | |
406 | virtual void HandleString(CharType WXUNUSED(conv), | |
407 | SizeModifier WXUNUSED(size), | |
408 | CharType& outConv, SizeModifier& outSize) | |
409 | { | |
410 | outConv = 's'; | |
411 | outSize = Size_Long; | |
412 | } | |
413 | ||
414 | virtual void HandleChar(CharType WXUNUSED(conv), | |
415 | SizeModifier WXUNUSED(size), | |
416 | CharType& outConv, SizeModifier& outSize) | |
417 | { | |
418 | outConv = 'c'; | |
419 | outSize = Size_Long; | |
420 | } | |
421 | }; | |
422 | #endif // !wxUSE_UTF8_LOCALE_ONLY | |
423 | ||
424 | #if wxUSE_UNICODE_UTF8 | |
425 | class wxPrintfFormatConverterUtf8 : public wxFormatConverterBase<char> | |
426 | { | |
427 | virtual void HandleString(CharType WXUNUSED(conv), | |
428 | SizeModifier WXUNUSED(size), | |
429 | CharType& outConv, SizeModifier& outSize) | |
430 | { | |
431 | outConv = 's'; | |
432 | outSize = Size_Default; | |
433 | } | |
434 | ||
435 | virtual void HandleChar(CharType WXUNUSED(conv), | |
436 | SizeModifier WXUNUSED(size), | |
437 | CharType& outConv, SizeModifier& outSize) | |
438 | { | |
439 | // added complication: %c should be translated to %s in UTF-8 build | |
440 | outConv = 's'; | |
441 | outSize = Size_Default; | |
442 | } | |
443 | }; | |
444 | #endif // wxUSE_UNICODE_UTF8 | |
445 | ||
446 | #endif // __WINDOWS__/!__WINDOWS__ | |
447 | ||
448 | #if !wxUSE_UNICODE // FIXME-UTF8: remove | |
449 | class wxPrintfFormatConverterANSI : public wxFormatConverterBase<char> | |
450 | { | |
451 | virtual void HandleString(CharType WXUNUSED(conv), | |
452 | SizeModifier WXUNUSED(size), | |
453 | CharType& outConv, SizeModifier& outSize) | |
454 | { | |
455 | outConv = 's'; | |
456 | outSize = Size_Default; | |
457 | } | |
458 | ||
459 | virtual void HandleChar(CharType WXUNUSED(conv), | |
460 | SizeModifier WXUNUSED(size), | |
461 | CharType& outConv, SizeModifier& outSize) | |
462 | { | |
463 | outConv = 'c'; | |
464 | outSize = Size_Default; | |
465 | } | |
466 | }; | |
467 | #endif // ANSI | |
468 | ||
469 | #ifndef __WINDOWS__ | |
470 | /* | |
471 | ||
472 | wxScanf() format translation is different, we need to translate %s to %ls | |
473 | and %c to %lc on Unix (but not Windows and for widechar functions only!). | |
474 | ||
475 | So to use native functions in order to get our semantics we must do the | |
476 | following translations in Unicode mode: | |
477 | ||
478 | wxWidgets specifier POSIX specifier | |
479 | ---------------------------------------- | |
480 | ||
481 | %hc, %C, %hC %c | |
482 | %c %lc | |
483 | ||
484 | */ | |
485 | class wxScanfFormatConverterWchar : public wxFormatConverterBase<wchar_t> | |
486 | { | |
487 | virtual void HandleString(CharType conv, SizeModifier size, | |
488 | CharType& outConv, SizeModifier& outSize) | |
489 | { | |
490 | outConv = 's'; | |
491 | outSize = GetOutSize(conv == 'S', size); | |
492 | } | |
493 | ||
494 | virtual void HandleChar(CharType conv, SizeModifier size, | |
495 | CharType& outConv, SizeModifier& outSize) | |
496 | { | |
497 | outConv = 'c'; | |
498 | outSize = GetOutSize(conv == 'C', size); | |
499 | } | |
500 | ||
501 | SizeModifier GetOutSize(bool convIsUpper, SizeModifier size) | |
502 | { | |
503 | // %S and %hS -> %s and %lS -> %ls | |
504 | if ( convIsUpper ) | |
505 | { | |
506 | if ( size == Size_Long ) | |
507 | return Size_Long; | |
508 | else | |
509 | return Size_Default; | |
510 | } | |
511 | else // %s or %c | |
512 | { | |
513 | if ( size == Size_Default ) | |
514 | return Size_Long; | |
515 | else | |
516 | return size; | |
517 | } | |
518 | } | |
519 | }; | |
520 | ||
521 | const wxWCharBuffer wxScanfConvertFormatW(const wchar_t *format) | |
522 | { | |
523 | return wxScanfFormatConverterWchar().Convert(format); | |
524 | } | |
525 | #endif // !__WINDOWS__ | |
526 | ||
527 | ||
528 | // ---------------------------------------------------------------------------- | |
529 | // wxFormatString | |
530 | // ---------------------------------------------------------------------------- | |
531 | ||
532 | #if !wxUSE_UNICODE_WCHAR | |
533 | const char* wxFormatString::InputAsChar() | |
534 | { | |
535 | if ( m_char ) | |
536 | return m_char.data(); | |
537 | ||
538 | // in ANSI build, wx_str() returns char*, in UTF-8 build, this function | |
539 | // is only called under UTF-8 locales, so we should return UTF-8 string, | |
540 | // which is, again, what wx_str() returns: | |
541 | if ( m_str ) | |
542 | return m_str->wx_str(); | |
543 | ||
544 | // ditto wxCStrData: | |
545 | if ( m_cstr ) | |
546 | return m_cstr->AsInternal(); | |
547 | ||
548 | // the last case is that wide string was passed in: in that case, we need | |
549 | // to convert it: | |
550 | wxASSERT( m_wchar ); | |
551 | ||
552 | m_char = wxConvLibc.cWC2MB(m_wchar.data()); | |
553 | ||
554 | return m_char.data(); | |
555 | } | |
556 | ||
557 | const char* wxFormatString::AsChar() | |
558 | { | |
559 | if ( !m_convertedChar ) | |
560 | #if !wxUSE_UNICODE // FIXME-UTF8: remove this | |
561 | m_convertedChar = wxPrintfFormatConverterANSI().Convert(InputAsChar()); | |
562 | #else | |
563 | m_convertedChar = wxPrintfFormatConverterUtf8().Convert(InputAsChar()); | |
564 | #endif | |
565 | ||
566 | return m_convertedChar.data(); | |
567 | } | |
568 | #endif // !wxUSE_UNICODE_WCHAR | |
569 | ||
570 | #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY | |
571 | const wchar_t* wxFormatString::InputAsWChar() | |
572 | { | |
573 | if ( m_wchar ) | |
574 | return m_wchar.data(); | |
575 | ||
576 | #if wxUSE_UNICODE_WCHAR | |
577 | if ( m_str ) | |
578 | return m_str->wc_str(); | |
579 | if ( m_cstr ) | |
580 | return m_cstr->AsInternal(); | |
581 | #else // wxUSE_UNICODE_UTF8 | |
582 | if ( m_str ) | |
583 | { | |
584 | m_wchar = m_str->wc_str(); | |
585 | return m_wchar.data(); | |
586 | } | |
587 | if ( m_cstr ) | |
588 | { | |
589 | m_wchar = m_cstr->AsWCharBuf(); | |
590 | return m_wchar.data(); | |
591 | } | |
592 | #endif // wxUSE_UNICODE_WCHAR/UTF8 | |
593 | ||
594 | // the last case is that narrow string was passed in: in that case, we need | |
595 | // to convert it: | |
596 | wxASSERT( m_char ); | |
597 | ||
598 | m_wchar = wxConvLibc.cMB2WC(m_char.data()); | |
599 | ||
600 | return m_wchar.data(); | |
601 | } | |
602 | ||
603 | const wchar_t* wxFormatString::AsWChar() | |
604 | { | |
605 | if ( !m_convertedWChar ) | |
606 | m_convertedWChar = wxPrintfFormatConverterWchar().Convert(InputAsWChar()); | |
607 | ||
608 | return m_convertedWChar.data(); | |
609 | } | |
610 | #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY |