include/wx/string.h

   1 ///////////////////////////////////////////////////////////////////////////////
   2 // Name:        wx/string.h
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin
   5 // Modified by:
   6 // Created:     29/01/98
   7 // RCS-ID:      $Id$
   8 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   9 // Licence:     wxWindows licence
  10 ///////////////////////////////////////////////////////////////////////////////
  11
  12 /*
  13     Efficient string class [more or less] compatible with MFC CString,
  14     wxWidgets version 1 wxString and std::string and some handy functions
  15     missing from string.h.
  16 */
  17
  18 #ifndef _WX_WXSTRING_H__
  19 #define _WX_WXSTRING_H__
  20
  21 // ----------------------------------------------------------------------------
  22 // headers
  23 // ----------------------------------------------------------------------------
  24
  25 #include "wx/defs.h"        // everybody should include this
  26
  27 #if defined(__WXMAC__) || defined(__VISAGECPP__)
  28     #include <ctype.h>
  29 #endif
  30
  31 #if defined(__VISAGECPP__) && __IBMCPP__ >= 400
  32    // problem in VACPP V4 with including stdlib.h multiple times
  33    // strconv includes it anyway
  34 #  include <stdio.h>
  35 #  include <string.h>
  36 #  include <stdarg.h>
  37 #  include <limits.h>
  38 #else
  39 #  include <string.h>
  40 #  include <stdio.h>
  41 #  include <stdarg.h>
  42 #  include <limits.h>
  43 #  include <stdlib.h>
  44 #endif
  45
  46 #include "wx/wxcrtbase.h"   // for wxChar, wxStrlen() etc.
  47 #include "wx/strvararg.h"
  48 #include "wx/buffer.h"      // for wxCharBuffer
  49 #include "wx/strconv.h"     // for wxConvertXXX() macros and wxMBConv classes
  50 #include "wx/stringimpl.h"
  51 #include "wx/stringops.h"
  52 #include "wx/unichar.h"
  53
  54 // by default we cache the mapping of the positions in UTF-8 string to the byte
  55 // offset as this results in noticeable performance improvements for loops over
  56 // strings using indices; comment out this line to disable this
  57 //
  58 // notice that this optimization is well worth using even in debug builds as it
  59 // changes asymptotic complexity of algorithms using indices to iterate over
  60 // wxString back to expected linear from quadratic
  61 //
  62 // also notice that wxTLS_TYPE() (__declspec(thread) in this case) is unsafe to
  63 // use in DLL build under pre-Vista Windows so we disable this code for now, if
  64 // anybody really needs to use UTF-8 build under Windows with this optimization
  65 // it would have to be re-tested and probably corrected
  66 // CS: under OSX release builds the string destructor/cache cleanup sometimes
  67 // crashes, disable until we find the true reason or a better workaround
  68 #if wxUSE_UNICODE_UTF8 && !defined(__WINDOWS__) && !defined(__WXOSX__)
  69     #define wxUSE_STRING_POS_CACHE 1
  70 #else
  71     #define wxUSE_STRING_POS_CACHE 0
  72 #endif
  73
  74 #if wxUSE_STRING_POS_CACHE
  75     #include "wx/tls.h"
  76
  77     // change this 0 to 1 to enable additional (very expensive) asserts
  78     // verifying that string caching logic works as expected
  79     #if 0
  80         #define wxSTRING_CACHE_ASSERT(cond) wxASSERT(cond)
  81     #else
  82         #define wxSTRING_CACHE_ASSERT(cond)
  83     #endif
  84 #endif // wxUSE_STRING_POS_CACHE
  85
  86 class WXDLLIMPEXP_FWD_BASE wxString;
  87
  88 // unless this symbol is predefined to disable the compatibility functions, do
  89 // use them
  90 #ifndef WXWIN_COMPATIBILITY_STRING_PTR_AS_ITER
  91     #define WXWIN_COMPATIBILITY_STRING_PTR_AS_ITER 1
  92 #endif
  93
  94 namespace wxPrivate
  95 {
  96     template <typename T> struct wxStringAsBufHelper;
  97 }
  98
  99 // ---------------------------------------------------------------------------
 100 // macros
 101 // ---------------------------------------------------------------------------
 102
 103 // casts [unfortunately!] needed to call some broken functions which require
 104 // "char *" instead of "const char *"
 105 #define   WXSTRINGCAST (wxChar *)(const wxChar *)
 106 #define   wxCSTRINGCAST (wxChar *)(const wxChar *)
 107 #define   wxMBSTRINGCAST (char *)(const char *)
 108 #define   wxWCSTRINGCAST (wchar_t *)(const wchar_t *)
 109
 110 // ----------------------------------------------------------------------------
 111 // constants
 112 // ----------------------------------------------------------------------------
 113
 114 #if WXWIN_COMPATIBILITY_2_6
 115
 116 // deprecated in favour of wxString::npos, don't use in new code
 117 //
 118 // maximum possible length for a string means "take all string" everywhere
 119 #define wxSTRING_MAXLEN wxString::npos
 120
 121 #endif // WXWIN_COMPATIBILITY_2_6
 122
 123 // ---------------------------------------------------------------------------
 124 // global functions complementing standard C string library replacements for
 125 // strlen() and portable strcasecmp()
 126 //---------------------------------------------------------------------------
 127
 128 #if WXWIN_COMPATIBILITY_2_8
 129 // Use wxXXX() functions from wxcrt.h instead! These functions are for
 130 // backwards compatibility only.
 131
 132 // checks whether the passed in pointer is NULL and if the string is empty
 133 wxDEPRECATED( inline bool IsEmpty(const char *p) );
 134 inline bool IsEmpty(const char *p) { return (!p || !*p); }
 135
 136 // safe version of strlen() (returns 0 if passed NULL pointer)
 137 wxDEPRECATED( inline size_t Strlen(const char *psz) );
 138 inline size_t Strlen(const char *psz)
 139   { return psz ? strlen(psz) : 0; }
 140
 141 // portable strcasecmp/_stricmp
 142 wxDEPRECATED( inline int Stricmp(const char *psz1, const char *psz2) );
 143 inline int Stricmp(const char *psz1, const char *psz2)
 144     { return wxCRT_StricmpA(psz1, psz2); }
 145
 146 #endif // WXWIN_COMPATIBILITY_2_8
 147
 148 // ----------------------------------------------------------------------------
 149 // wxCStrData
 150 // ----------------------------------------------------------------------------
 151
 152 // Lightweight object returned by wxString::c_str() and implicitly convertible
 153 // to either const char* or const wchar_t*.
 154 class wxCStrData
 155 {
 156 private:
 157     // Ctors; for internal use by wxString and wxCStrData only
 158     wxCStrData(const wxString *str, size_t offset = 0, bool owned = false)
 159         : m_str(str), m_offset(offset), m_owned(owned) {}
 160
 161 public:
 162     // Ctor constructs the object from char literal; they are needed to make
 163     // operator?: compile and they intentionally take char*, not const char*
 164     inline wxCStrData(char *buf);
 165     inline wxCStrData(wchar_t *buf);
 166     inline wxCStrData(const wxCStrData& data);
 167
 168     inline ~wxCStrData();
 169
 170     // AsWChar() and AsChar() can't be defined here as they use wxString and so
 171     // must come after it and because of this won't be inlined when called from
 172     // wxString methods (without a lot of work to extract these wxString methods
 173     // from inside the class itself). But we still define them being inline
 174     // below to let compiler inline them from elsewhere. And because of this we
 175     // must declare them as inline here because otherwise some compilers give
 176     // warnings about them, e.g. mingw32 3.4.5 warns about "<symbol> defined
 177     // locally after being referenced with dllimport linkage" while IRIX
 178     // mipsPro 7.4 warns about "function declared inline after being called".
 179     inline const wchar_t* AsWChar() const;
 180     operator const wchar_t*() const { return AsWChar(); }
 181
 182     inline const char* AsChar() const;
 183     const unsigned char* AsUnsignedChar() const
 184         { return (const unsigned char *) AsChar(); }
 185     operator const char*() const { return AsChar(); }
 186     operator const unsigned char*() const { return AsUnsignedChar(); }
 187
 188     operator const void*() const { return AsChar(); }
 189
 190     // returns buffers that are valid as long as the associated wxString exists
 191     const wxScopedCharBuffer AsCharBuf() const
 192     {
 193         return wxScopedCharBuffer::CreateNonOwned(AsChar());
 194     }
 195
 196     const wxScopedWCharBuffer AsWCharBuf() const
 197     {
 198         return wxScopedWCharBuffer::CreateNonOwned(AsWChar());
 199     }
 200
 201     inline wxString AsString() const;
 202
 203     // returns the value as C string in internal representation (equivalent
 204     // to AsString().wx_str(), but more efficient)
 205     const wxStringCharType *AsInternal() const;
 206
 207     // allow expressions like "c_str()[0]":
 208     inline wxUniChar operator[](size_t n) const;
 209     wxUniChar operator[](int n) const { return operator[](size_t(n)); }
 210     wxUniChar operator[](long n) const { return operator[](size_t(n)); }
 211 #ifndef wxSIZE_T_IS_UINT
 212     wxUniChar operator[](unsigned int n) const { return operator[](size_t(n)); }
 213 #endif // size_t != unsigned int
 214
 215     // These operators are needed to emulate the pointer semantics of c_str():
 216     // expressions like "wxChar *p = str.c_str() + 1;" should continue to work
 217     // (we need both versions to resolve ambiguities). Note that this means
 218     // the 'n' value is interpreted as addition to char*/wchar_t* pointer, it
 219     // is *not* number of Unicode characters in wxString.
 220     wxCStrData operator+(int n) const
 221         { return wxCStrData(m_str, m_offset + n, m_owned); }
 222     wxCStrData operator+(long n) const
 223         { return wxCStrData(m_str, m_offset + n, m_owned); }
 224     wxCStrData operator+(size_t n) const
 225         { return wxCStrData(m_str, m_offset + n, m_owned); }
 226
 227     // and these for "str.c_str() + (p2 - p1)" (it also works for any integer
 228     // expression but it must be ptrdiff_t and not e.g. int to work in this
 229     // example):
 230     wxCStrData operator-(ptrdiff_t n) const
 231     {
 232         wxASSERT_MSG( n <= (ptrdiff_t)m_offset,
 233                       wxT("attempt to construct address before the beginning of the string") );
 234         return wxCStrData(m_str, m_offset - n, m_owned);
 235     }
 236
 237     // this operator is needed to make expressions like "*c_str()" or
 238     // "*(c_str() + 2)" work
 239     inline wxUniChar operator*() const;
 240
 241 private:
 242     // the wxString this object was returned for
 243     const wxString *m_str;
 244     // Offset into c_str() return value. Note that this is *not* offset in
 245     // m_str in Unicode characters. Instead, it is index into the
 246     // char*/wchar_t* buffer returned by c_str(). It's interpretation depends
 247     // on how is the wxCStrData instance used: if it is eventually cast to
 248     // const char*, m_offset will be in bytes form string's start; if it is
 249     // cast to const wchar_t*, it will be in wchar_t values.
 250     size_t m_offset;
 251     // should m_str be deleted, i.e. is it owned by us?
 252     bool m_owned;
 253
 254     friend class WXDLLIMPEXP_FWD_BASE wxString;
 255 };
 256
 257 // ----------------------------------------------------------------------------
 258 // wxStringPrintfMixin
 259 // ---------------------------------------------------------------------------
 260
 261 // NB: VC6 has a bug that causes linker errors if you have template methods
 262 //     in a class using __declspec(dllimport). The solution is to split such
 263 //     class into two classes, one that contains the template methods and does
 264 //     *not* use WXDLLIMPEXP_BASE and another class that contains the rest
 265 //     (with DLL linkage).
 266 //
 267 //     We only do this for VC6 here, because the code is less efficient
 268 //     (Printf() has to use dynamic_cast<>) and because OpenWatcom compiler
 269 //     cannot compile this code.
 270
 271 #if defined(__VISUALC__) && __VISUALC__ < 1300
 272     #define wxNEEDS_WXSTRING_PRINTF_MIXIN
 273 #endif
 274
 275 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
 276 // this class contains implementation of wxString's vararg methods, it's
 277 // exported from wxBase DLL
 278 class WXDLLIMPEXP_BASE wxStringPrintfMixinBase
 279 {
 280 protected:
 281     wxStringPrintfMixinBase() {}
 282
 283 #if !wxUSE_UTF8_LOCALE_ONLY
 284     int DoPrintfWchar(const wxChar *format, ...);
 285     static wxString DoFormatWchar(const wxChar *format, ...);
 286 #endif
 287 #if wxUSE_UNICODE_UTF8
 288     int DoPrintfUtf8(const char *format, ...);
 289     static wxString DoFormatUtf8(const char *format, ...);
 290 #endif
 291 };
 292
 293 // this class contains template wrappers for wxString's vararg methods, it's
 294 // intentionally *not* exported from the DLL in order to fix the VC6 bug
 295 // described above
 296 class wxStringPrintfMixin : public wxStringPrintfMixinBase
 297 {
 298 private:
 299     // to further complicate things, we can't return wxString from
 300     // wxStringPrintfMixin::Format() because wxString is not yet declared at
 301     // this point; the solution is to use this fake type trait template - this
 302     // way the compiler won't know the return type until Format() is used
 303     // (this doesn't compile with Watcom, but VC6 compiles it just fine):
 304     template<typename T> struct StringReturnType
 305     {
 306         typedef wxString type;
 307     };
 308
 309 public:
 310     // these are duplicated wxString methods, they're also declared below
 311     // if !wxNEEDS_WXSTRING_PRINTF_MIXIN:
 312
 313     // static wxString Format(const wString& format, ...) WX_ATTRIBUTE_PRINTF_1;
 314     WX_DEFINE_VARARG_FUNC_SANS_N0(static typename StringReturnType<T1>::type,
 315                                   Format, 1, (const wxFormatString&),
 316                                   DoFormatWchar, DoFormatUtf8)
 317     // We have to implement the version without template arguments manually
 318     // because of the StringReturnType<> hack, although WX_DEFINE_VARARG_FUNC
 319     // normally does it itself. It has to be a template so that we can use
 320     // the hack, even though there's no real template parameter. We can't move
 321     // it to wxStrig, because it would shadow these versions of Format() then.
 322     template<typename T>
 323     inline static typename StringReturnType<T>::type
 324     Format(const T& fmt)
 325     {
 326         // NB: this doesn't compile if T is not (some form of) a string;
 327         //     this makes Format's prototype equivalent to
 328         //     Format(const wxFormatString& fmt)
 329         return DoFormatWchar(wxFormatString(fmt));
 330     }
 331
 332     // int Printf(const wxString& format, ...);
 333     WX_DEFINE_VARARG_FUNC(int, Printf, 1, (const wxFormatString&),
 334                           DoPrintfWchar, DoPrintfUtf8)
 335     // int sprintf(const wxString& format, ...) WX_ATTRIBUTE_PRINTF_2;
 336     WX_DEFINE_VARARG_FUNC(int, sprintf, 1, (const wxFormatString&),
 337                           DoPrintfWchar, DoPrintfUtf8)
 338
 339 protected:
 340     wxStringPrintfMixin() : wxStringPrintfMixinBase() {}
 341 };
 342 #endif // wxNEEDS_WXSTRING_PRINTF_MIXIN
 343
 344
 345 // ----------------------------------------------------------------------------
 346 // wxString: string class trying to be compatible with std::string, MFC
 347 //           CString and wxWindows 1.x wxString all at once
 348 // ---------------------------------------------------------------------------
 349
 350 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
 351     // "non dll-interface class 'wxStringPrintfMixin' used as base interface
 352     // for dll-interface class 'wxString'" -- this is OK in our case
 353     #pragma warning (push)
 354     #pragma warning (disable:4275)
 355 #endif
 356
 357 #if wxUSE_UNICODE_UTF8
 358 // see the comment near wxString::iterator for why we need this
 359 class WXDLLIMPEXP_BASE wxStringIteratorNode
 360 {
 361 public:
 362     wxStringIteratorNode()
 363         : m_str(NULL), m_citer(NULL), m_iter(NULL), m_prev(NULL), m_next(NULL) {}
 364     wxStringIteratorNode(const wxString *str,
 365                           wxStringImpl::const_iterator *citer)
 366         { DoSet(str, citer, NULL); }
 367     wxStringIteratorNode(const wxString *str, wxStringImpl::iterator *iter)
 368         { DoSet(str, NULL, iter); }
 369     ~wxStringIteratorNode()
 370         { clear(); }
 371
 372     inline void set(const wxString *str, wxStringImpl::const_iterator *citer)
 373         { clear(); DoSet(str, citer, NULL); }
 374     inline void set(const wxString *str, wxStringImpl::iterator *iter)
 375         { clear(); DoSet(str, NULL, iter); }
 376
 377     const wxString *m_str;
 378     wxStringImpl::const_iterator *m_citer;
 379     wxStringImpl::iterator *m_iter;
 380     wxStringIteratorNode *m_prev, *m_next;
 381
 382 private:
 383     inline void clear();
 384     inline void DoSet(const wxString *str,
 385                       wxStringImpl::const_iterator *citer,
 386                       wxStringImpl::iterator *iter);
 387
 388     // the node belongs to a particular iterator instance, it's not copied
 389     // when a copy of the iterator is made
 390     wxDECLARE_NO_COPY_CLASS(wxStringIteratorNode);
 391 };
 392 #endif // wxUSE_UNICODE_UTF8
 393
 394 class WXDLLIMPEXP_BASE wxString
 395 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
 396                                 : public wxStringPrintfMixin
 397 #endif
 398 {
 399   // NB: special care was taken in arranging the member functions in such order
 400   //     that all inline functions can be effectively inlined, verify that all
 401   //     performance critical functions are still inlined if you change order!
 402 public:
 403   // an 'invalid' value for string index, moved to this place due to a CW bug
 404   static const size_t npos;
 405
 406 private:
 407   // if we hadn't made these operators private, it would be possible to
 408   // compile "wxString s; s = 17;" without any warnings as 17 is implicitly
 409   // converted to char in C and we do have operator=(char)
 410   //
 411   // NB: we don't need other versions (short/long and unsigned) as attempt
 412   //     to assign another numeric type to wxString will now result in
 413   //     ambiguity between operator=(char) and operator=(int)
 414   wxString& operator=(int);
 415
 416   // these methods are not implemented - there is _no_ conversion from int to
 417   // string, you're doing something wrong if the compiler wants to call it!
 418   //
 419   // try `s << i' or `s.Printf("%d", i)' instead
 420   wxString(int);
 421
 422
 423   // buffer for holding temporary substring when using any of the methods
 424   // that take (char*,size_t) or (wchar_t*,size_t) arguments:
 425   template<typename T>
 426   struct SubstrBufFromType
 427   {
 428       T data;
 429       size_t len;
 430
 431       SubstrBufFromType(const T& data_, size_t len_)
 432           : data(data_), len(len_)
 433       {
 434           wxASSERT_MSG( len != npos, "must have real length" );
 435       }
 436   };
 437
 438 #if wxUSE_UNICODE_UTF8
 439   // even char* -> char* needs conversion, from locale charset to UTF-8
 440   typedef SubstrBufFromType<wxScopedCharBuffer>    SubstrBufFromWC;
 441   typedef SubstrBufFromType<wxScopedCharBuffer>    SubstrBufFromMB;
 442 #elif wxUSE_UNICODE_WCHAR
 443   typedef SubstrBufFromType<const wchar_t*>        SubstrBufFromWC;
 444   typedef SubstrBufFromType<wxScopedWCharBuffer>   SubstrBufFromMB;
 445 #else
 446   typedef SubstrBufFromType<const char*>           SubstrBufFromMB;
 447   typedef SubstrBufFromType<wxScopedCharBuffer>    SubstrBufFromWC;
 448 #endif
 449
 450
 451   // Functions implementing primitive operations on string data; wxString
 452   // methods and iterators are implemented in terms of it. The differences
 453   // between UTF-8 and wchar_t* representations of the string are mostly
 454   // contained here.
 455
 456 #if wxUSE_UNICODE_UTF8
 457   static SubstrBufFromMB ConvertStr(const char *psz, size_t nLength,
 458                                     const wxMBConv& conv);
 459   static SubstrBufFromWC ConvertStr(const wchar_t *pwz, size_t nLength,
 460                                     const wxMBConv& conv);
 461 #elif wxUSE_UNICODE_WCHAR
 462   static SubstrBufFromMB ConvertStr(const char *psz, size_t nLength,
 463                                     const wxMBConv& conv);
 464 #else
 465   static SubstrBufFromWC ConvertStr(const wchar_t *pwz, size_t nLength,
 466                                     const wxMBConv& conv);
 467 #endif
 468
 469 #if !wxUSE_UNICODE_UTF8 // wxUSE_UNICODE_WCHAR or !wxUSE_UNICODE
 470   // returns C string encoded as the implementation expects:
 471   #if wxUSE_UNICODE
 472   static const wchar_t* ImplStr(const wchar_t* str)
 473     { return str ? str : wxT(""); }
 474   static const SubstrBufFromWC ImplStr(const wchar_t* str, size_t n)
 475     { return SubstrBufFromWC(str, (str && n == npos) ? wxWcslen(str) : n); }
 476   static wxScopedWCharBuffer ImplStr(const char* str,
 477                                      const wxMBConv& conv = wxConvLibc)
 478     { return ConvertStr(str, npos, conv).data; }
 479   static SubstrBufFromMB ImplStr(const char* str, size_t n,
 480                                  const wxMBConv& conv = wxConvLibc)
 481     { return ConvertStr(str, n, conv); }
 482   #else
 483   static const char* ImplStr(const char* str,
 484                              const wxMBConv& WXUNUSED(conv) = wxConvLibc)
 485     { return str ? str : ""; }
 486   static const SubstrBufFromMB ImplStr(const char* str, size_t n,
 487                                        const wxMBConv& WXUNUSED(conv) = wxConvLibc)
 488     { return SubstrBufFromMB(str, (str && n == npos) ? wxStrlen(str) : n); }
 489   static wxScopedCharBuffer ImplStr(const wchar_t* str)
 490     { return ConvertStr(str, npos, wxConvLibc).data; }
 491   static SubstrBufFromWC ImplStr(const wchar_t* str, size_t n)
 492     { return ConvertStr(str, n, wxConvLibc); }
 493   #endif
 494
 495   // translates position index in wxString to/from index in underlying
 496   // wxStringImpl:
 497   static size_t PosToImpl(size_t pos) { return pos; }
 498   static void PosLenToImpl(size_t pos, size_t len,
 499                            size_t *implPos, size_t *implLen)
 500     { *implPos = pos; *implLen = len; }
 501   static size_t LenToImpl(size_t len) { return len; }
 502   static size_t PosFromImpl(size_t pos) { return pos; }
 503
 504   // we don't want to define these as empty inline functions as it could
 505   // result in noticeable (and quite unnecessary in non-UTF-8 build) slowdown
 506   // in debug build where the inline functions are not effectively inlined
 507   #define wxSTRING_INVALIDATE_CACHE()
 508   #define wxSTRING_INVALIDATE_CACHED_LENGTH()
 509   #define wxSTRING_UPDATE_CACHED_LENGTH(n)
 510   #define wxSTRING_SET_CACHED_LENGTH(n)
 511
 512 #else // wxUSE_UNICODE_UTF8
 513
 514   static wxScopedCharBuffer ImplStr(const char* str,
 515                                     const wxMBConv& conv = wxConvLibc)
 516     { return ConvertStr(str, npos, conv).data; }
 517   static SubstrBufFromMB ImplStr(const char* str, size_t n,
 518                                  const wxMBConv& conv = wxConvLibc)
 519     { return ConvertStr(str, n, conv); }
 520
 521   static wxScopedCharBuffer ImplStr(const wchar_t* str)
 522     { return ConvertStr(str, npos, wxMBConvUTF8()).data; }
 523   static SubstrBufFromWC ImplStr(const wchar_t* str, size_t n)
 524     { return ConvertStr(str, n, wxMBConvUTF8()); }
 525
 526 #if wxUSE_STRING_POS_CACHE
 527   // this is an extremely simple cache used by PosToImpl(): each cache element
 528   // contains the string it applies to and the index corresponding to the last
 529   // used position in this wxString in its m_impl string
 530   //
 531   // NB: notice that this struct (and nested Element one) must be a POD or we
 532   //     wouldn't be able to use a thread-local variable of this type, in
 533   //     particular it should have no ctor -- we rely on statics being
 534   //     initialized to 0 instead
 535   struct Cache
 536   {
 537       enum { SIZE = 8 };
 538
 539       struct Element
 540       {
 541           const wxString *str;  // the string to which this element applies
 542           size_t pos,           // the cached index in this string
 543                  impl,          // the corresponding position in its m_impl
 544                  len;           // cached length or npos if unknown
 545
 546           // reset cached index to 0
 547           void ResetPos() { pos = impl = 0; }
 548
 549           // reset position and length
 550           void Reset() { ResetPos(); len = npos; }
 551       };
 552
 553       // cache the indices mapping for the last few string used
 554       Element cached[SIZE];
 555
 556       // the last used index
 557       unsigned lastUsed;
 558   };
 559
 560 #ifndef wxHAS_COMPILER_TLS
 561   // we must use an accessor function and not a static variable when the TLS
 562   // variables support is implemented in the library (and not by the compiler)
 563   // because the global s_cache variable could be not yet initialized when a
 564   // ctor of another global object is executed and if that ctor uses any
 565   // wxString methods, bad things happen
 566   //
 567   // however notice that this approach does not work when compiler TLS is used,
 568   // at least not with g++ 4.1.2 under amd64 as it apparently compiles code
 569   // using this accessor incorrectly when optimizations are enabled (-O2 is
 570   // enough) -- luckily we don't need it then neither as static __thread
 571   // variables are initialized by 0 anyhow then and so we can use the variable
 572   // directly
 573   WXEXPORT static Cache& GetCache()
 574   {
 575       static wxTLS_TYPE(Cache) s_cache;
 576
 577       return wxTLS_VALUE(s_cache);
 578   }
 579
 580   // this helper struct is used to ensure that GetCache() is called during
 581   // static initialization time, i.e. before any threads creation, as otherwise
 582   // the static s_cache construction inside GetCache() wouldn't be MT-safe
 583   friend struct wxStrCacheInitializer;
 584 #else // wxHAS_COMPILER_TLS
 585   static wxTLS_TYPE(Cache) ms_cache;
 586   static Cache& GetCache() { return wxTLS_VALUE(ms_cache); }
 587 #endif // !wxHAS_COMPILER_TLS/wxHAS_COMPILER_TLS
 588
 589   static Cache::Element *GetCacheBegin() { return GetCache().cached; }
 590   static Cache::Element *GetCacheEnd() { return GetCacheBegin() + Cache::SIZE; }
 591   static unsigned& LastUsedCacheElement() { return GetCache().lastUsed; }
 592
 593   // this is used in debug builds only to provide a convenient function,
 594   // callable from a debugger, to show the cache contents
 595   friend struct wxStrCacheDumper;
 596
 597   // uncomment this to have access to some profiling statistics on program
 598   // termination
 599   //#define wxPROFILE_STRING_CACHE
 600
 601 #ifdef wxPROFILE_STRING_CACHE
 602   static struct PosToImplCacheStats
 603   {
 604       unsigned postot,  // total non-trivial calls to PosToImpl
 605                poshits, // cache hits from PosToImpl()
 606                mishits, // cached position beyond the needed one
 607                sumpos,  // sum of all positions, used to compute the
 608                         // average position after dividing by postot
 609                sumofs,  // sum of all offsets after using the cache, used to
 610                         // compute the average after dividing by hits
 611                lentot,  // number of total calls to length()
 612                lenhits; // number of cache hits in length()
 613   } ms_cacheStats;
 614
 615   friend struct wxStrCacheStatsDumper;
 616
 617   #define wxCACHE_PROFILE_FIELD_INC(field) ms_cacheStats.field++
 618   #define wxCACHE_PROFILE_FIELD_ADD(field, val) ms_cacheStats.field += (val)
 619 #else // !wxPROFILE_STRING_CACHE
 620   #define wxCACHE_PROFILE_FIELD_INC(field)
 621   #define wxCACHE_PROFILE_FIELD_ADD(field, val)
 622 #endif // wxPROFILE_STRING_CACHE/!wxPROFILE_STRING_CACHE
 623
 624   // note: it could seem that the functions below shouldn't be inline because
 625   // they are big, contain loops and so the compiler shouldn't be able to
 626   // inline them anyhow, however moving them into string.cpp does decrease the
 627   // code performance by ~5%, at least when using g++ 4.1 so do keep them here
 628   // unless tests show that it's not advantageous any more
 629
 630   // return the pointer to the cache element for this string or NULL if not
 631   // cached
 632   Cache::Element *FindCacheElement() const
 633   {
 634       // profiling seems to show a small but consistent gain if we use this
 635       // simple loop instead of starting from the last used element (there are
 636       // a lot of misses in this function...)
 637       Cache::Element * const cacheBegin = GetCacheBegin();
 638 #ifndef wxHAS_COMPILER_TLS
 639       // during destruction tls calls may return NULL, in this case return NULL
 640       // immediately without accessing anything else
 641       if ( cacheBegin == NULL )
 642         return NULL;
 643 #endif
 644       Cache::Element * const cacheEnd = GetCacheEnd();
 645       for ( Cache::Element *c = cacheBegin; c != cacheEnd; c++ )
 646       {
 647           if ( c->str == this )
 648               return c;
 649       }
 650
 651       return NULL;
 652   }
 653
 654   // unlike FindCacheElement(), this one always returns a valid pointer to the
 655   // cache element for this string, it may have valid last cached position and
 656   // its corresponding index in the byte string or not
 657   Cache::Element *GetCacheElement() const
 658   {
 659       Cache::Element * const cacheBegin = GetCacheBegin();
 660       Cache::Element * const cacheEnd = GetCacheEnd();
 661       Cache::Element * const cacheStart = cacheBegin + LastUsedCacheElement();
 662
 663       // check the last used first, this does no (measurable) harm for a miss
 664       // but does help for simple loops addressing the same string all the time
 665       if ( cacheStart->str == this )
 666           return cacheStart;
 667
 668       // notice that we're going to check cacheStart again inside this call but
 669       // profiling shows that it's still faster to use a simple loop like
 670       // inside FindCacheElement() than manually looping with wrapping starting
 671       // from the cache entry after the start one
 672       Cache::Element *c = FindCacheElement();
 673       if ( !c )
 674       {
 675           // claim the next cache entry for this string
 676           c = cacheStart;
 677           if ( ++c == cacheEnd )
 678               c = cacheBegin;
 679
 680           c->str = this;
 681           c->Reset();
 682
 683           // and remember the last used element
 684           LastUsedCacheElement() = c - cacheBegin;
 685       }
 686
 687       return c;
 688   }
 689
 690   size_t DoPosToImpl(size_t pos) const
 691   {
 692       wxCACHE_PROFILE_FIELD_INC(postot);
 693
 694       // NB: although the case of pos == 1 (and offset from cached position
 695       //     equal to 1) are common, nothing is gained by writing special code
 696       //     for handling them, the compiler (at least g++ 4.1 used) seems to
 697       //     optimize the code well enough on its own
 698
 699       wxCACHE_PROFILE_FIELD_ADD(sumpos, pos);
 700
 701       Cache::Element * const cache = GetCacheElement();
 702
 703       // cached position can't be 0 so if it is, it means that this entry was
 704       // used for length caching only so far, i.e. it doesn't count as a hit
 705       // from our point of view
 706       if ( cache->pos )
 707       {
 708           wxCACHE_PROFILE_FIELD_INC(poshits);
 709       }
 710
 711       if ( pos == cache->pos )
 712           return cache->impl;
 713
 714       // this seems to happen only rarely so just reset the cache in this case
 715       // instead of complicating code even further by seeking backwards in this
 716       // case
 717       if ( cache->pos > pos )
 718       {
 719           wxCACHE_PROFILE_FIELD_INC(mishits);
 720
 721           cache->ResetPos();
 722       }
 723
 724       wxCACHE_PROFILE_FIELD_ADD(sumofs, pos - cache->pos);
 725
 726
 727       wxStringImpl::const_iterator i(m_impl.begin() + cache->impl);
 728       for ( size_t n = cache->pos; n < pos; n++ )
 729           wxStringOperations::IncIter(i);
 730
 731       cache->pos = pos;
 732       cache->impl = i - m_impl.begin();
 733
 734       wxSTRING_CACHE_ASSERT(
 735           (int)cache->impl == (begin() + pos).impl() - m_impl.begin() );
 736
 737       return cache->impl;
 738   }
 739
 740   void InvalidateCache()
 741   {
 742       Cache::Element * const cache = FindCacheElement();
 743       if ( cache )
 744           cache->Reset();
 745   }
 746
 747   void InvalidateCachedLength()
 748   {
 749       Cache::Element * const cache = FindCacheElement();
 750       if ( cache )
 751           cache->len = npos;
 752   }
 753
 754   void SetCachedLength(size_t len)
 755   {
 756       // we optimistically cache the length here even if the string wasn't
 757       // present in the cache before, this seems to do no harm and the
 758       // potential for avoiding length recomputation for long strings looks
 759       // interesting
 760       GetCacheElement()->len = len;
 761   }
 762
 763   void UpdateCachedLength(ptrdiff_t delta)
 764   {
 765       Cache::Element * const cache = FindCacheElement();
 766       if ( cache && cache->len != npos )
 767       {
 768           wxSTRING_CACHE_ASSERT( (ptrdiff_t)cache->len + delta >= 0 );
 769
 770           cache->len += delta;
 771       }
 772   }
 773
 774   #define wxSTRING_INVALIDATE_CACHE() InvalidateCache()
 775   #define wxSTRING_INVALIDATE_CACHED_LENGTH() InvalidateCachedLength()
 776   #define wxSTRING_UPDATE_CACHED_LENGTH(n) UpdateCachedLength(n)
 777   #define wxSTRING_SET_CACHED_LENGTH(n) SetCachedLength(n)
 778 #else // !wxUSE_STRING_POS_CACHE
 779   size_t DoPosToImpl(size_t pos) const
 780   {
 781       return (begin() + pos).impl() - m_impl.begin();
 782   }
 783
 784   #define wxSTRING_INVALIDATE_CACHE()
 785   #define wxSTRING_INVALIDATE_CACHED_LENGTH()
 786   #define wxSTRING_UPDATE_CACHED_LENGTH(n)
 787   #define wxSTRING_SET_CACHED_LENGTH(n)
 788 #endif // wxUSE_STRING_POS_CACHE/!wxUSE_STRING_POS_CACHE
 789
 790   size_t PosToImpl(size_t pos) const
 791   {
 792       return pos == 0 || pos == npos ? pos : DoPosToImpl(pos);
 793   }
 794
 795   void PosLenToImpl(size_t pos, size_t len, size_t *implPos, size_t *implLen) const;
 796
 797   size_t LenToImpl(size_t len) const
 798   {
 799       size_t pos, len2;
 800       PosLenToImpl(0, len, &pos, &len2);
 801       return len2;
 802   }
 803
 804   size_t PosFromImpl(size_t pos) const
 805   {
 806       if ( pos == 0 || pos == npos )
 807           return pos;
 808       else
 809           return const_iterator(this, m_impl.begin() + pos) - begin();
 810   }
 811 #endif // !wxUSE_UNICODE_UTF8/wxUSE_UNICODE_UTF8
 812
 813 public:
 814   // standard types
 815   typedef wxUniChar value_type;
 816   typedef wxUniChar char_type;
 817   typedef wxUniCharRef reference;
 818   typedef wxChar* pointer;
 819   typedef const wxChar* const_pointer;
 820
 821   typedef size_t size_type;
 822   typedef wxUniChar const_reference;
 823
 824 #if wxUSE_STD_STRING
 825   #if wxUSE_UNICODE_UTF8
 826     // random access is not O(1), as required by Random Access Iterator
 827     #define WX_STR_ITERATOR_TAG std::bidirectional_iterator_tag
 828   #else
 829     #define WX_STR_ITERATOR_TAG std::random_access_iterator_tag
 830   #endif
 831   #define WX_DEFINE_ITERATOR_CATEGORY(cat) typedef cat iterator_category;
 832 #else
 833   // not defining iterator_category at all in this case is better than defining
 834   // it as some dummy type -- at least it results in more intelligible error
 835   // messages
 836   #define WX_DEFINE_ITERATOR_CATEGORY(cat)
 837 #endif
 838
 839   #define WX_STR_ITERATOR_IMPL(iterator_name, pointer_type, reference_type) \
 840       private:                                                              \
 841           typedef wxStringImpl::iterator_name underlying_iterator;          \
 842       public:                                                               \
 843           WX_DEFINE_ITERATOR_CATEGORY(WX_STR_ITERATOR_TAG)                  \
 844           typedef wxUniChar value_type;                                     \
 845           typedef ptrdiff_t difference_type;                                \
 846           typedef reference_type reference;                                 \
 847           typedef pointer_type pointer;                                     \
 848                                                                             \
 849           reference operator[](size_t n) const { return *(*this + n); }     \
 850                                                                             \
 851           iterator_name& operator++()                                       \
 852             { wxStringOperations::IncIter(m_cur); return *this; }           \
 853           iterator_name& operator--()                                       \
 854             { wxStringOperations::DecIter(m_cur); return *this; }           \
 855           iterator_name operator++(int)                                     \
 856           {                                                                 \
 857               iterator_name tmp = *this;                                    \
 858               wxStringOperations::IncIter(m_cur);                           \
 859               return tmp;                                                   \
 860           }                                                                 \
 861           iterator_name operator--(int)                                     \
 862           {                                                                 \
 863               iterator_name tmp = *this;                                    \
 864               wxStringOperations::DecIter(m_cur);                           \
 865               return tmp;                                                   \
 866           }                                                                 \
 867                                                                             \
 868           iterator_name& operator+=(ptrdiff_t n)                            \
 869           {                                                                 \
 870               m_cur = wxStringOperations::AddToIter(m_cur, n);              \
 871               return *this;                                                 \
 872           }                                                                 \
 873           iterator_name& operator-=(ptrdiff_t n)                            \
 874           {                                                                 \
 875               m_cur = wxStringOperations::AddToIter(m_cur, -n);             \
 876               return *this;                                                 \
 877           }                                                                 \
 878                                                                             \
 879           difference_type operator-(const iterator_name& i) const           \
 880             { return wxStringOperations::DiffIters(m_cur, i.m_cur); }       \
 881                                                                             \
 882           bool operator==(const iterator_name& i) const                     \
 883             { return m_cur == i.m_cur; }                                    \
 884           bool operator!=(const iterator_name& i) const                     \
 885             { return m_cur != i.m_cur; }                                    \
 886                                                                             \
 887           bool operator<(const iterator_name& i) const                      \
 888             { return m_cur < i.m_cur; }                                     \
 889           bool operator>(const iterator_name& i) const                      \
 890             { return m_cur > i.m_cur; }                                     \
 891           bool operator<=(const iterator_name& i) const                     \
 892             { return m_cur <= i.m_cur; }                                    \
 893           bool operator>=(const iterator_name& i) const                     \
 894             { return m_cur >= i.m_cur; }                                    \
 895                                                                             \
 896       private:                                                              \
 897           /* for internal wxString use only: */                             \
 898           underlying_iterator impl() const { return m_cur; }                \
 899                                                                             \
 900           friend class wxString;                                            \
 901           friend class wxCStrData;                                          \
 902                                                                             \
 903       private:                                                              \
 904           underlying_iterator m_cur
 905
 906   class WXDLLIMPEXP_FWD_BASE const_iterator;
 907
 908 #if wxUSE_UNICODE_UTF8
 909   // NB: In UTF-8 build, (non-const) iterator needs to keep reference
 910   //     to the underlying wxStringImpl, because UTF-8 is variable-length
 911   //     encoding and changing the value pointer to by an iterator (using
 912   //     its operator*) requires calling wxStringImpl::replace() if the old
 913   //     and new values differ in their encoding's length.
 914   //
 915   //     Furthermore, the replace() call may invalid all iterators for the
 916   //     string, so we have to keep track of outstanding iterators and update
 917   //     them if replace() happens.
 918   //
 919   //     This is implemented by maintaining linked list of iterators for every
 920   //     string and traversing it in wxUniCharRef::operator=(). Head of the
 921   //     list is stored in wxString. (FIXME-UTF8)
 922
 923   class WXDLLIMPEXP_BASE iterator
 924   {
 925       WX_STR_ITERATOR_IMPL(iterator, wxChar*, wxUniCharRef);
 926
 927   public:
 928       iterator() {}
 929       iterator(const iterator& i)
 930           : m_cur(i.m_cur), m_node(i.str(), &m_cur) {}
 931       iterator& operator=(const iterator& i)
 932       {
 933           if (&i != this)
 934           {
 935               m_cur = i.m_cur;
 936               m_node.set(i.str(), &m_cur);
 937           }
 938           return *this;
 939       }
 940
 941       reference operator*()
 942         { return wxUniCharRef::CreateForString(*str(), m_cur); }
 943
 944       iterator operator+(ptrdiff_t n) const
 945         { return iterator(str(), wxStringOperations::AddToIter(m_cur, n)); }
 946       iterator operator-(ptrdiff_t n) const
 947         { return iterator(str(), wxStringOperations::AddToIter(m_cur, -n)); }
 948
 949       // Normal iterators need to be comparable with the const_iterators so
 950       // declare the comparison operators and implement them below after the
 951       // full const_iterator declaration.
 952       bool operator==(const const_iterator& i) const;
 953       bool operator!=(const const_iterator& i) const;
 954       bool operator<(const const_iterator& i) const;
 955       bool operator>(const const_iterator& i) const;
 956       bool operator<=(const const_iterator& i) const;
 957       bool operator>=(const const_iterator& i) const;
 958
 959   private:
 960       iterator(wxString *wxstr, underlying_iterator ptr)
 961           : m_cur(ptr), m_node(wxstr, &m_cur) {}
 962
 963       wxString* str() const { return const_cast<wxString*>(m_node.m_str); }
 964
 965       wxStringIteratorNode m_node;
 966
 967       friend class const_iterator;
 968   };
 969
 970   class WXDLLIMPEXP_BASE const_iterator
 971   {
 972       // NB: reference_type is intentionally value, not reference, the character
 973       //     may be encoded differently in wxString data:
 974       WX_STR_ITERATOR_IMPL(const_iterator, const wxChar*, wxUniChar);
 975
 976   public:
 977       const_iterator() {}
 978       const_iterator(const const_iterator& i)
 979           : m_cur(i.m_cur), m_node(i.str(), &m_cur) {}
 980       const_iterator(const iterator& i)
 981           : m_cur(i.m_cur), m_node(i.str(), &m_cur) {}
 982
 983       const_iterator& operator=(const const_iterator& i)
 984       {
 985           if (&i != this)
 986           {
 987               m_cur = i.m_cur;
 988               m_node.set(i.str(), &m_cur);
 989           }
 990           return *this;
 991       }
 992       const_iterator& operator=(const iterator& i)
 993         { m_cur = i.m_cur; m_node.set(i.str(), &m_cur); return *this; }
 994
 995       reference operator*() const
 996         { return wxStringOperations::DecodeChar(m_cur); }
 997
 998       const_iterator operator+(ptrdiff_t n) const
 999         { return const_iterator(str(), wxStringOperations::AddToIter(m_cur, n)); }
1000       const_iterator operator-(ptrdiff_t n) const
1001         { return const_iterator(str(), wxStringOperations::AddToIter(m_cur, -n)); }
1002
1003       // Notice that comparison operators taking non-const iterator are not
1004       // needed here because of the implicit conversion from non-const iterator
1005       // to const ones ensure that the versions for const_iterator declared
1006       // inside WX_STR_ITERATOR_IMPL can be used.
1007
1008   private:
1009       // for internal wxString use only:
1010       const_iterator(const wxString *wxstr, underlying_iterator ptr)
1011           : m_cur(ptr), m_node(wxstr, &m_cur) {}
1012
1013       const wxString* str() const { return m_node.m_str; }
1014
1015       wxStringIteratorNode m_node;
1016   };
1017
1018   size_t IterToImplPos(wxString::iterator i) const
1019     { return wxStringImpl::const_iterator(i.impl()) - m_impl.begin(); }
1020
1021   iterator GetIterForNthChar(size_t n)
1022     { return iterator(this, m_impl.begin() + PosToImpl(n)); }
1023   const_iterator GetIterForNthChar(size_t n) const
1024     { return const_iterator(this, m_impl.begin() + PosToImpl(n)); }
1025 #else // !wxUSE_UNICODE_UTF8
1026
1027   class WXDLLIMPEXP_BASE iterator
1028   {
1029       WX_STR_ITERATOR_IMPL(iterator, wxChar*, wxUniCharRef);
1030
1031   public:
1032       iterator() {}
1033       iterator(const iterator& i) : m_cur(i.m_cur) {}
1034
1035       reference operator*()
1036         { return wxUniCharRef::CreateForString(m_cur); }
1037
1038       iterator operator+(ptrdiff_t n) const
1039         { return iterator(wxStringOperations::AddToIter(m_cur, n)); }
1040       iterator operator-(ptrdiff_t n) const
1041         { return iterator(wxStringOperations::AddToIter(m_cur, -n)); }
1042
1043       // As in UTF-8 case above, define comparison operators taking
1044       // const_iterator too.
1045       bool operator==(const const_iterator& i) const;
1046       bool operator!=(const const_iterator& i) const;
1047       bool operator<(const const_iterator& i) const;
1048       bool operator>(const const_iterator& i) const;
1049       bool operator<=(const const_iterator& i) const;
1050       bool operator>=(const const_iterator& i) const;
1051
1052   private:
1053       // for internal wxString use only:
1054       iterator(underlying_iterator ptr) : m_cur(ptr) {}
1055       iterator(wxString *WXUNUSED(str), underlying_iterator ptr) : m_cur(ptr) {}
1056
1057       friend class const_iterator;
1058   };
1059
1060   class WXDLLIMPEXP_BASE const_iterator
1061   {
1062       // NB: reference_type is intentionally value, not reference, the character
1063       //     may be encoded differently in wxString data:
1064       WX_STR_ITERATOR_IMPL(const_iterator, const wxChar*, wxUniChar);
1065
1066   public:
1067       const_iterator() {}
1068       const_iterator(const const_iterator& i) : m_cur(i.m_cur) {}
1069       const_iterator(const iterator& i) : m_cur(i.m_cur) {}
1070
1071       reference operator*() const
1072         { return wxStringOperations::DecodeChar(m_cur); }
1073
1074       const_iterator operator+(ptrdiff_t n) const
1075         { return const_iterator(wxStringOperations::AddToIter(m_cur, n)); }
1076       const_iterator operator-(ptrdiff_t n) const
1077         { return const_iterator(wxStringOperations::AddToIter(m_cur, -n)); }
1078
1079       // As in UTF-8 case above, we don't need comparison operators taking
1080       // iterator because we have an implicit conversion from iterator to
1081       // const_iterator so the operators declared by WX_STR_ITERATOR_IMPL will
1082       // be used.
1083
1084   private:
1085       // for internal wxString use only:
1086       const_iterator(underlying_iterator ptr) : m_cur(ptr) {}
1087       const_iterator(const wxString *WXUNUSED(str), underlying_iterator ptr)
1088           : m_cur(ptr) {}
1089   };
1090
1091   iterator GetIterForNthChar(size_t n) { return begin() + n; }
1092   const_iterator GetIterForNthChar(size_t n) const { return begin() + n; }
1093 #endif // wxUSE_UNICODE_UTF8/!wxUSE_UNICODE_UTF8
1094
1095   #undef WX_STR_ITERATOR_TAG
1096   #undef WX_STR_ITERATOR_IMPL
1097
1098   // This method is mostly used by wxWidgets itself and return the offset of
1099   // the given iterator in bytes relative to the start of the buffer
1100   // representing the current string contents in the current locale encoding.
1101   //
1102   // It is inefficient as it involves converting part of the string to this
1103   // encoding (and also unsafe as it simply returns 0 if the conversion fails)
1104   // and so should be avoided if possible, wx itself only uses it to implement
1105   // backwards-compatible API.
1106   ptrdiff_t IterOffsetInMBStr(const const_iterator& i) const
1107   {
1108       const wxString str(begin(), i);
1109
1110       // This is logically equivalent to strlen(str.mb_str()) but avoids
1111       // actually converting the string to multibyte and just computes the
1112       // length that it would have after conversion.
1113       size_t ofs = wxConvLibc.FromWChar(NULL, 0, str.wc_str(), str.length());
1114       return ofs == wxCONV_FAILED ? 0 : static_cast<ptrdiff_t>(ofs);
1115   }
1116
1117   friend class iterator;
1118   friend class const_iterator;
1119
1120   template <typename T>
1121   class reverse_iterator_impl
1122   {
1123   public:
1124       typedef T iterator_type;
1125
1126       WX_DEFINE_ITERATOR_CATEGORY(typename T::iterator_category)
1127       typedef typename T::value_type value_type;
1128       typedef typename T::difference_type difference_type;
1129       typedef typename T::reference reference;
1130       typedef typename T::pointer *pointer;
1131
1132       reverse_iterator_impl() {}
1133       reverse_iterator_impl(iterator_type i) : m_cur(i) {}
1134       reverse_iterator_impl(const reverse_iterator_impl& ri)
1135           : m_cur(ri.m_cur) {}
1136
1137       iterator_type base() const { return m_cur; }
1138
1139       reference operator*() const { return *(m_cur-1); }
1140       reference operator[](size_t n) const { return *(*this + n); }
1141
1142       reverse_iterator_impl& operator++()
1143         { --m_cur; return *this; }
1144       reverse_iterator_impl operator++(int)
1145         { reverse_iterator_impl tmp = *this; --m_cur; return tmp; }
1146       reverse_iterator_impl& operator--()
1147         { ++m_cur; return *this; }
1148       reverse_iterator_impl operator--(int)
1149         { reverse_iterator_impl tmp = *this; ++m_cur; return tmp; }
1150
1151       // NB: explicit <T> in the functions below is to keep BCC 5.5 happy
1152       reverse_iterator_impl operator+(ptrdiff_t n) const
1153         { return reverse_iterator_impl<T>(m_cur - n); }
1154       reverse_iterator_impl operator-(ptrdiff_t n) const
1155         { return reverse_iterator_impl<T>(m_cur + n); }
1156       reverse_iterator_impl operator+=(ptrdiff_t n)
1157         { m_cur -= n; return *this; }
1158       reverse_iterator_impl operator-=(ptrdiff_t n)
1159         { m_cur += n; return *this; }
1160
1161       unsigned operator-(const reverse_iterator_impl& i) const
1162         { return i.m_cur - m_cur; }
1163
1164       bool operator==(const reverse_iterator_impl& ri) const
1165         { return m_cur == ri.m_cur; }
1166       bool operator!=(const reverse_iterator_impl& ri) const
1167         { return !(*this == ri); }
1168
1169       bool operator<(const reverse_iterator_impl& i) const
1170         { return m_cur > i.m_cur; }
1171       bool operator>(const reverse_iterator_impl& i) const
1172         { return m_cur < i.m_cur; }
1173       bool operator<=(const reverse_iterator_impl& i) const
1174         { return m_cur >= i.m_cur; }
1175       bool operator>=(const reverse_iterator_impl& i) const
1176         { return m_cur <= i.m_cur; }
1177
1178   private:
1179       iterator_type m_cur;
1180   };
1181
1182   typedef reverse_iterator_impl<iterator> reverse_iterator;
1183   typedef reverse_iterator_impl<const_iterator> const_reverse_iterator;
1184
1185 private:
1186   // used to transform an expression built using c_str() (and hence of type
1187   // wxCStrData) to an iterator into the string
1188   static const_iterator CreateConstIterator(const wxCStrData& data)
1189   {
1190       return const_iterator(data.m_str,
1191                             (data.m_str->begin() + data.m_offset).impl());
1192   }
1193
1194   // in UTF-8 STL build, creation from std::string requires conversion under
1195   // non-UTF8 locales, so we can't have and use wxString(wxStringImpl) ctor;
1196   // instead we define dummy type that lets us have wxString ctor for creation
1197   // from wxStringImpl that couldn't be used by user code (in all other builds,
1198   // "standard" ctors can be used):
1199 #if wxUSE_UNICODE_UTF8 && wxUSE_STL_BASED_WXSTRING
1200   struct CtorFromStringImplTag {};
1201
1202   wxString(CtorFromStringImplTag* WXUNUSED(dummy), const wxStringImpl& src)
1203       : m_impl(src) {}
1204
1205   static wxString FromImpl(const wxStringImpl& src)
1206       { return wxString((CtorFromStringImplTag*)NULL, src); }
1207 #else
1208   #if !wxUSE_STL_BASED_WXSTRING
1209   wxString(const wxStringImpl& src) : m_impl(src) { }
1210   // else: already defined as wxString(wxStdString) below
1211   #endif
1212   static wxString FromImpl(const wxStringImpl& src) { return wxString(src); }
1213 #endif
1214
1215 public:
1216   // constructors and destructor
1217     // ctor for an empty string
1218   wxString() {}
1219
1220     // copy ctor
1221   wxString(const wxString& stringSrc) : m_impl(stringSrc.m_impl) { }
1222
1223     // string containing nRepeat copies of ch
1224   wxString(wxUniChar ch, size_t nRepeat = 1 )
1225     { assign(nRepeat, ch); }
1226   wxString(size_t nRepeat, wxUniChar ch)
1227     { assign(nRepeat, ch); }
1228   wxString(wxUniCharRef ch, size_t nRepeat = 1)
1229     { assign(nRepeat, ch); }
1230   wxString(size_t nRepeat, wxUniCharRef ch)
1231     { assign(nRepeat, ch); }
1232   wxString(char ch, size_t nRepeat = 1)
1233     { assign(nRepeat, ch); }
1234   wxString(size_t nRepeat, char ch)
1235     { assign(nRepeat, ch); }
1236   wxString(wchar_t ch, size_t nRepeat = 1)
1237     { assign(nRepeat, ch); }
1238   wxString(size_t nRepeat, wchar_t ch)
1239     { assign(nRepeat, ch); }
1240
1241     // ctors from char* strings:
1242   wxString(const char *psz)
1243     : m_impl(ImplStr(psz)) {}
1244   wxString(const char *psz, const wxMBConv& conv)
1245     : m_impl(ImplStr(psz, conv)) {}
1246   wxString(const char *psz, size_t nLength)
1247     { assign(psz, nLength); }
1248   wxString(const char *psz, const wxMBConv& conv, size_t nLength)
1249   {
1250     SubstrBufFromMB str(ImplStr(psz, nLength, conv));
1251     m_impl.assign(str.data, str.len);
1252   }
1253
1254     // and unsigned char*:
1255   wxString(const unsigned char *psz)
1256     : m_impl(ImplStr((const char*)psz)) {}
1257   wxString(const unsigned char *psz, const wxMBConv& conv)
1258     : m_impl(ImplStr((const char*)psz, conv)) {}
1259   wxString(const unsigned char *psz, size_t nLength)
1260     { assign((const char*)psz, nLength); }
1261   wxString(const unsigned char *psz, const wxMBConv& conv, size_t nLength)
1262   {
1263     SubstrBufFromMB str(ImplStr((const char*)psz, nLength, conv));
1264     m_impl.assign(str.data, str.len);
1265   }
1266
1267     // ctors from wchar_t* strings:
1268   wxString(const wchar_t *pwz)
1269     : m_impl(ImplStr(pwz)) {}
1270   wxString(const wchar_t *pwz, const wxMBConv& WXUNUSED(conv))
1271     : m_impl(ImplStr(pwz)) {}
1272   wxString(const wchar_t *pwz, size_t nLength)
1273     { assign(pwz, nLength); }
1274   wxString(const wchar_t *pwz, const wxMBConv& WXUNUSED(conv), size_t nLength)
1275     { assign(pwz, nLength); }
1276
1277   wxString(const wxScopedCharBuffer& buf)
1278     { assign(buf.data(), buf.length()); }
1279   wxString(const wxScopedWCharBuffer& buf)
1280     { assign(buf.data(), buf.length()); }
1281
1282     // NB: this version uses m_impl.c_str() to force making a copy of the
1283     //     string, so that "wxString(str.c_str())" idiom for passing strings
1284     //     between threads works
1285   wxString(const wxCStrData& cstr)
1286       : m_impl(cstr.AsString().m_impl.c_str()) { }
1287
1288     // as we provide both ctors with this signature for both char and unsigned
1289     // char string, we need to provide one for wxCStrData to resolve ambiguity
1290   wxString(const wxCStrData& cstr, size_t nLength)
1291       : m_impl(cstr.AsString().Mid(0, nLength).m_impl) {}
1292
1293     // and because wxString is convertible to wxCStrData and const wxChar *
1294     // we also need to provide this one
1295   wxString(const wxString& str, size_t nLength)
1296     { assign(str, nLength); }
1297
1298
1299 #if wxUSE_STRING_POS_CACHE
1300   ~wxString()
1301   {
1302       // we need to invalidate our cache entry as another string could be
1303       // recreated at the same address (unlikely, but still possible, with the
1304       // heap-allocated strings but perfectly common with stack-allocated ones)
1305       InvalidateCache();
1306   }
1307 #endif // wxUSE_STRING_POS_CACHE
1308
1309   // even if we're not built with wxUSE_STD_STRING_CONV_IN_WXSTRING == 1 it is
1310   // very convenient to allow implicit conversions from std::string to wxString
1311   // and vice verse as this allows to use the same strings in non-GUI and GUI
1312   // code, however we don't want to unconditionally add this ctor as it would
1313   // make wx lib dependent on libstdc++ on some Linux versions which is bad, so
1314   // instead we ask the client code to define this wxUSE_STD_STRING symbol if
1315   // they need it
1316 #if wxUSE_STD_STRING
1317   #if wxUSE_UNICODE_WCHAR
1318     wxString(const wxStdWideString& str) : m_impl(str) {}
1319   #else // UTF-8 or ANSI
1320     wxString(const wxStdWideString& str)
1321         { assign(str.c_str(), str.length()); }
1322   #endif
1323
1324   #if !wxUSE_UNICODE // ANSI build
1325     // FIXME-UTF8: do this in UTF8 build #if wxUSE_UTF8_LOCALE_ONLY, too
1326     wxString(const std::string& str) : m_impl(str) {}
1327   #else // Unicode
1328     wxString(const std::string& str)
1329         { assign(str.c_str(), str.length()); }
1330   #endif
1331 #endif // wxUSE_STD_STRING
1332
1333   // Also always provide explicit conversions to std::[w]string in any case,
1334   // see below for the implicit ones.
1335 #if wxUSE_STD_STRING
1336   // We can avoid a copy if we already use this string type internally,
1337   // otherwise we create a copy on the fly:
1338   #if wxUSE_UNICODE_WCHAR && wxUSE_STL_BASED_WXSTRING
1339     #define wxStringToStdWstringRetType const wxStdWideString&
1340     const wxStdWideString& ToStdWstring() const { return m_impl; }
1341   #else
1342     // wxStringImpl is either not std::string or needs conversion
1343     #define wxStringToStdWstringRetType wxStdWideString
1344     wxStdWideString ToStdWstring() const
1345     {
1346 #if wxUSE_UNICODE_WCHAR
1347         wxScopedWCharBuffer buf =
1348             wxScopedWCharBuffer::CreateNonOwned(m_impl.c_str(), m_impl.length());
1349 #else // !wxUSE_UNICODE_WCHAR
1350         wxScopedWCharBuffer buf(wc_str());
1351 #endif
1352
1353         return wxStdWideString(buf.data(), buf.length());
1354     }
1355   #endif
1356
1357   #if (!wxUSE_UNICODE || wxUSE_UTF8_LOCALE_ONLY) && wxUSE_STL_BASED_WXSTRING
1358     // wxStringImpl is std::string in the encoding we want
1359     #define wxStringToStdStringRetType const std::string&
1360     const std::string& ToStdString() const { return m_impl; }
1361   #else
1362     // wxStringImpl is either not std::string or needs conversion
1363     #define wxStringToStdStringRetType std::string
1364     std::string ToStdString() const
1365     {
1366         wxScopedCharBuffer buf(mb_str());
1367         return std::string(buf.data(), buf.length());
1368     }
1369   #endif
1370
1371 #if wxUSE_STD_STRING_CONV_IN_WXSTRING
1372     // Implicit conversions to std::[w]string are not provided by default as
1373     // they conflict with the implicit conversions to "const char/wchar_t *"
1374     // which we use for backwards compatibility but do provide them if
1375     // explicitly requested.
1376   operator wxStringToStdStringRetType() const { return ToStdString(); }
1377   operator wxStringToStdWstringRetType() const { return ToStdWstring(); }
1378 #endif // wxUSE_STD_STRING_CONV_IN_WXSTRING
1379
1380 #undef wxStringToStdStringRetType
1381 #undef wxStringToStdWstringRetType
1382
1383 #endif // wxUSE_STD_STRING
1384
1385   wxString Clone() const
1386   {
1387       // make a deep copy of the string, i.e. the returned string will have
1388       // ref count = 1 with refcounted implementation
1389       return wxString::FromImpl(wxStringImpl(m_impl.c_str(), m_impl.length()));
1390   }
1391
1392   // first valid index position
1393   const_iterator begin() const { return const_iterator(this, m_impl.begin()); }
1394   iterator begin() { return iterator(this, m_impl.begin()); }
1395   // position one after the last valid one
1396   const_iterator end() const { return const_iterator(this, m_impl.end()); }
1397   iterator end() { return iterator(this, m_impl.end()); }
1398
1399   // first element of the reversed string
1400   const_reverse_iterator rbegin() const
1401     { return const_reverse_iterator(end()); }
1402   reverse_iterator rbegin()
1403     { return reverse_iterator(end()); }
1404   // one beyond the end of the reversed string
1405   const_reverse_iterator rend() const
1406     { return const_reverse_iterator(begin()); }
1407   reverse_iterator rend()
1408     { return reverse_iterator(begin()); }
1409
1410   // std::string methods:
1411 #if wxUSE_UNICODE_UTF8
1412   size_t length() const
1413   {
1414 #if wxUSE_STRING_POS_CACHE
1415       wxCACHE_PROFILE_FIELD_INC(lentot);
1416
1417       Cache::Element * const cache = GetCacheElement();
1418
1419       if ( cache->len == npos )
1420       {
1421           // it's probably not worth trying to be clever and using cache->pos
1422           // here as it's probably 0 anyhow -- you usually call length() before
1423           // starting to index the string
1424           cache->len = end() - begin();
1425       }
1426       else
1427       {
1428           wxCACHE_PROFILE_FIELD_INC(lenhits);
1429
1430           wxSTRING_CACHE_ASSERT( (int)cache->len == end() - begin() );
1431       }
1432
1433       return cache->len;
1434 #else // !wxUSE_STRING_POS_CACHE
1435       return end() - begin();
1436 #endif // wxUSE_STRING_POS_CACHE/!wxUSE_STRING_POS_CACHE
1437   }
1438 #else
1439   size_t length() const { return m_impl.length(); }
1440 #endif
1441
1442   size_type size() const { return length(); }
1443   size_type max_size() const { return npos; }
1444
1445   bool empty() const { return m_impl.empty(); }
1446
1447   // NB: these methods don't have a well-defined meaning in UTF-8 case
1448   size_type capacity() const { return m_impl.capacity(); }
1449   void reserve(size_t sz) { m_impl.reserve(sz); }
1450
1451   void resize(size_t nSize, wxUniChar ch = wxT('\0'))
1452   {
1453     const size_t len = length();
1454     if ( nSize == len)
1455         return;
1456
1457 #if wxUSE_UNICODE_UTF8
1458     if ( nSize < len )
1459     {
1460         wxSTRING_INVALIDATE_CACHE();
1461
1462         // we can't use wxStringImpl::resize() for truncating the string as it
1463         // counts in bytes, not characters
1464         erase(nSize);
1465         return;
1466     }
1467
1468     // we also can't use (presumably more efficient) resize() if we have to
1469     // append characters taking more than one byte
1470     if ( !ch.IsAscii() )
1471     {
1472         append(nSize - len, ch);
1473     }
1474     else // can use (presumably faster) resize() version
1475 #endif // wxUSE_UNICODE_UTF8
1476     {
1477         wxSTRING_INVALIDATE_CACHED_LENGTH();
1478
1479         m_impl.resize(nSize, (wxStringCharType)ch);
1480     }
1481   }
1482
1483   wxString substr(size_t nStart = 0, size_t nLen = npos) const
1484   {
1485     size_t pos, len;
1486     PosLenToImpl(nStart, nLen, &pos, &len);
1487     return FromImpl(m_impl.substr(pos, len));
1488   }
1489
1490   // generic attributes & operations
1491     // as standard strlen()
1492   size_t Len() const { return length(); }
1493     // string contains any characters?
1494   bool IsEmpty() const { return empty(); }
1495     // empty string is "false", so !str will return true
1496   bool operator!() const { return empty(); }
1497     // truncate the string to given length
1498   wxString& Truncate(size_t uiLen);
1499     // empty string contents
1500   void Empty() { clear(); }
1501     // empty the string and free memory
1502   void Clear() { clear(); }
1503
1504   // contents test
1505     // Is an ascii value
1506   bool IsAscii() const;
1507     // Is a number
1508   bool IsNumber() const;
1509     // Is a word
1510   bool IsWord() const;
1511
1512   // data access (all indexes are 0 based)
1513     // read access
1514     wxUniChar at(size_t n) const
1515       { return wxStringOperations::DecodeChar(m_impl.begin() + PosToImpl(n)); }
1516     wxUniChar GetChar(size_t n) const
1517       { return at(n); }
1518     // read/write access
1519     wxUniCharRef at(size_t n)
1520       { return *GetIterForNthChar(n); }
1521     wxUniCharRef GetWritableChar(size_t n)
1522       { return at(n); }
1523     // write access
1524     void SetChar(size_t n, wxUniChar ch)
1525       { at(n) = ch; }
1526
1527     // get last character
1528     wxUniChar Last() const
1529     {
1530       wxASSERT_MSG( !empty(), wxT("wxString: index out of bounds") );
1531       return *rbegin();
1532     }
1533
1534     // get writable last character
1535     wxUniCharRef Last()
1536     {
1537       wxASSERT_MSG( !empty(), wxT("wxString: index out of bounds") );
1538       return *rbegin();
1539     }
1540
1541     /*
1542        Note that we we must define all of the overloads below to avoid
1543        ambiguity when using str[0].
1544      */
1545     wxUniChar operator[](int n) const
1546       { return at(n); }
1547     wxUniChar operator[](long n) const
1548       { return at(n); }
1549     wxUniChar operator[](size_t n) const
1550       { return at(n); }
1551 #ifndef wxSIZE_T_IS_UINT
1552     wxUniChar operator[](unsigned int n) const
1553       { return at(n); }
1554 #endif // size_t != unsigned int
1555
1556     // operator versions of GetWriteableChar()
1557     wxUniCharRef operator[](int n)
1558       { return at(n); }
1559     wxUniCharRef operator[](long n)
1560       { return at(n); }
1561     wxUniCharRef operator[](size_t n)
1562       { return at(n); }
1563 #ifndef wxSIZE_T_IS_UINT
1564     wxUniCharRef operator[](unsigned int n)
1565       { return at(n); }
1566 #endif // size_t != unsigned int
1567
1568
1569     /*
1570         Overview of wxString conversions, implicit and explicit:
1571
1572         - wxString has a std::[w]string-like c_str() method, however it does
1573           not return a C-style string directly but instead returns wxCStrData
1574           helper object which is convertible to either "char *" narrow string
1575           or "wchar_t *" wide string. Usually the correct conversion will be
1576           applied by the compiler automatically but if this doesn't happen you
1577           need to explicitly choose one using wxCStrData::AsChar() or AsWChar()
1578           methods or another wxString conversion function.
1579
1580         - One of the places where the conversion does *NOT* happen correctly is
1581           when c_str() is passed to a vararg function such as printf() so you
1582           must *NOT* use c_str() with them. Either use wxPrintf() (all wx
1583           functions do handle c_str() correctly, even if they appear to be
1584           vararg (but they're not, really)) or add an explicit AsChar() or, if
1585           compatibility with previous wxWidgets versions is important, add a
1586           cast to "const char *".
1587
1588         - In non-STL mode only, wxString is also implicitly convertible to
1589           wxCStrData. The same warning as above applies.
1590
1591         - c_str() is polymorphic as it can be converted to either narrow or
1592           wide string. If you explicitly need one or the other, choose to use
1593           mb_str() (for narrow) or wc_str() (for wide) instead. Notice that
1594           these functions can return either the pointer to string directly (if
1595           this is what the string uses internally) or a temporary buffer
1596           containing the string and convertible to it. Again, conversion will
1597           usually be done automatically by the compiler but beware of the
1598           vararg functions: you need an explicit cast when using them.
1599
1600         - There are also non-const versions of mb_str() and wc_str() called
1601           char_str() and wchar_str(). They are only meant to be used with
1602           non-const-correct functions and they always return buffers.
1603
1604         - Finally wx_str() returns whatever string representation is used by
1605           wxString internally. It may be either a narrow or wide string
1606           depending on wxWidgets build mode but it will always be a raw pointer
1607           (and not a buffer).
1608      */
1609
1610     // explicit conversion to wxCStrData
1611     wxCStrData c_str() const { return wxCStrData(this); }
1612     wxCStrData data() const { return c_str(); }
1613
1614     // implicit conversion to wxCStrData
1615     operator wxCStrData() const { return c_str(); }
1616
1617     // the first two operators conflict with operators for conversion to
1618     // std::string and they must be disabled if those conversions are enabled;
1619     // the next one only makes sense if conversions to char* are also defined
1620     // and not defining it in STL build also helps us to get more clear error
1621     // messages for the code which relies on implicit conversion to char* in
1622     // STL build
1623 #if !wxUSE_STD_STRING_CONV_IN_WXSTRING
1624     operator const char*() const { return c_str(); }
1625     operator const wchar_t*() const { return c_str(); }
1626
1627     // implicit conversion to untyped pointer for compatibility with previous
1628     // wxWidgets versions: this is the same as conversion to const char * so it
1629     // may fail!
1630     operator const void*() const { return c_str(); }
1631 #endif // !wxUSE_STD_STRING_CONV_IN_WXSTRING
1632
1633     // identical to c_str(), for MFC compatibility
1634     const wxCStrData GetData() const { return c_str(); }
1635
1636     // explicit conversion to C string in internal representation (char*,
1637     // wchar_t*, UTF-8-encoded char*, depending on the build):
1638     const wxStringCharType *wx_str() const { return m_impl.c_str(); }
1639
1640     // conversion to *non-const* multibyte or widestring buffer; modifying
1641     // returned buffer won't affect the string, these methods are only useful
1642     // for passing values to const-incorrect functions
1643     wxWritableCharBuffer char_str(const wxMBConv& conv = wxConvLibc) const
1644         { return mb_str(conv); }
1645     wxWritableWCharBuffer wchar_str() const { return wc_str(); }
1646
1647     // conversion to the buffer of the given type T (= char or wchar_t) and
1648     // also optionally return the buffer length
1649     //
1650     // this is mostly/only useful for the template functions
1651     //
1652     // FIXME-VC6: the second argument only exists for VC6 which doesn't support
1653     //            explicit template function selection, do not use it unless
1654     //            you must support VC6!
1655     template <typename T>
1656     wxCharTypeBuffer<T> tchar_str(size_t *len = NULL,
1657                                   T * WXUNUSED(dummy) = NULL) const
1658     {
1659 #if wxUSE_UNICODE
1660         // we need a helper dispatcher depending on type
1661         return wxPrivate::wxStringAsBufHelper<T>::Get(*this, len);
1662 #else // ANSI
1663         // T can only be char in ANSI build
1664         if ( len )
1665             *len = length();
1666
1667         return wxCharTypeBuffer<T>::CreateNonOwned(wx_str(), length());
1668 #endif // Unicode build kind
1669     }
1670
1671     // conversion to/from plain (i.e. 7 bit) ASCII: this is useful for
1672     // converting numbers or strings which are certain not to contain special
1673     // chars (typically system functions, X atoms, environment variables etc.)
1674     //
1675     // the behaviour of these functions with the strings containing anything
1676     // else than 7 bit ASCII characters is undefined, use at your own risk.
1677 #if wxUSE_UNICODE
1678     static wxString FromAscii(const char *ascii, size_t len);
1679     static wxString FromAscii(const char *ascii);
1680     static wxString FromAscii(char ascii);
1681     const wxScopedCharBuffer ToAscii() const;
1682 #else // ANSI
1683     static wxString FromAscii(const char *ascii) { return wxString( ascii ); }
1684     static wxString FromAscii(const char *ascii, size_t len)
1685         { return wxString( ascii, len ); }
1686     static wxString FromAscii(char ascii) { return wxString( ascii ); }
1687     const char *ToAscii() const { return c_str(); }
1688 #endif // Unicode/!Unicode
1689
1690     // also provide unsigned char overloads as signed/unsigned doesn't matter
1691     // for 7 bit ASCII characters
1692     static wxString FromAscii(const unsigned char *ascii)
1693         { return FromAscii((const char *)ascii); }
1694     static wxString FromAscii(const unsigned char *ascii, size_t len)
1695         { return FromAscii((const char *)ascii, len); }
1696
1697     // conversion to/from UTF-8:
1698 #if wxUSE_UNICODE_UTF8
1699     static wxString FromUTF8Unchecked(const char *utf8)
1700     {
1701       if ( !utf8 )
1702           return wxEmptyString;
1703
1704       wxASSERT( wxStringOperations::IsValidUtf8String(utf8) );
1705       return FromImpl(wxStringImpl(utf8));
1706     }
1707     static wxString FromUTF8Unchecked(const char *utf8, size_t len)
1708     {
1709       if ( !utf8 )
1710           return wxEmptyString;
1711       if ( len == npos )
1712           return FromUTF8Unchecked(utf8);
1713
1714       wxASSERT( wxStringOperations::IsValidUtf8String(utf8, len) );
1715       return FromImpl(wxStringImpl(utf8, len));
1716     }
1717
1718     static wxString FromUTF8(const char *utf8)
1719     {
1720         if ( !utf8 || !wxStringOperations::IsValidUtf8String(utf8) )
1721             return "";
1722
1723         return FromImpl(wxStringImpl(utf8));
1724     }
1725     static wxString FromUTF8(const char *utf8, size_t len)
1726     {
1727         if ( len == npos )
1728             return FromUTF8(utf8);
1729
1730         if ( !utf8 || !wxStringOperations::IsValidUtf8String(utf8, len) )
1731             return "";
1732
1733         return FromImpl(wxStringImpl(utf8, len));
1734     }
1735
1736     const wxScopedCharBuffer utf8_str() const
1737         { return wxCharBuffer::CreateNonOwned(m_impl.c_str(), m_impl.length()); }
1738
1739     // this function exists in UTF-8 build only and returns the length of the
1740     // internal UTF-8 representation
1741     size_t utf8_length() const { return m_impl.length(); }
1742 #elif wxUSE_UNICODE_WCHAR
1743     static wxString FromUTF8(const char *utf8, size_t len = npos)
1744       { return wxString(utf8, wxMBConvUTF8(), len); }
1745     static wxString FromUTF8Unchecked(const char *utf8, size_t len = npos)
1746     {
1747         const wxString s(utf8, wxMBConvUTF8(), len);
1748         wxASSERT_MSG( !utf8 || !*utf8 || !s.empty(),
1749                       "string must be valid UTF-8" );
1750         return s;
1751     }
1752     const wxScopedCharBuffer utf8_str() const { return mb_str(wxMBConvUTF8()); }
1753 #else // ANSI
1754     static wxString FromUTF8(const char *utf8)
1755       { return wxString(wxMBConvUTF8().cMB2WC(utf8)); }
1756     static wxString FromUTF8(const char *utf8, size_t len)
1757     {
1758         size_t wlen;
1759         wxScopedWCharBuffer buf(wxMBConvUTF8().cMB2WC(utf8, len == npos ? wxNO_LEN : len, &wlen));
1760         return wxString(buf.data(), wlen);
1761     }
1762     static wxString FromUTF8Unchecked(const char *utf8, size_t len = npos)
1763     {
1764         size_t wlen;
1765         wxScopedWCharBuffer buf
1766                             (
1767                               wxMBConvUTF8().cMB2WC
1768                                              (
1769                                                utf8,
1770                                                len == npos ? wxNO_LEN : len,
1771                                                &wlen
1772                                              )
1773                             );
1774         wxASSERT_MSG( !utf8 || !*utf8 || wlen,
1775                       "string must be valid UTF-8" );
1776
1777         return wxString(buf.data(), wlen);
1778     }
1779     const wxScopedCharBuffer utf8_str() const
1780       { return wxMBConvUTF8().cWC2MB(wc_str()); }
1781 #endif
1782
1783     const wxScopedCharBuffer ToUTF8() const { return utf8_str(); }
1784
1785     // functions for storing binary data in wxString:
1786 #if wxUSE_UNICODE
1787     static wxString From8BitData(const char *data, size_t len)
1788       { return wxString(data, wxConvISO8859_1, len); }
1789     // version for NUL-terminated data:
1790     static wxString From8BitData(const char *data)
1791       { return wxString(data, wxConvISO8859_1); }
1792     const wxScopedCharBuffer To8BitData() const
1793         { return mb_str(wxConvISO8859_1); }
1794 #else // ANSI
1795     static wxString From8BitData(const char *data, size_t len)
1796       { return wxString(data, len); }
1797     // version for NUL-terminated data:
1798     static wxString From8BitData(const char *data)
1799       { return wxString(data); }
1800     const wxScopedCharBuffer To8BitData() const
1801         { return wxScopedCharBuffer::CreateNonOwned(wx_str(), length()); }
1802 #endif // Unicode/ANSI
1803
1804     // conversions with (possible) format conversions: have to return a
1805     // buffer with temporary data
1806     //
1807     // the functions defined (in either Unicode or ANSI) mode are mb_str() to
1808     // return an ANSI (multibyte) string, wc_str() to return a wide string and
1809     // fn_str() to return a string which should be used with the OS APIs
1810     // accepting the file names. The return value is always the same, but the
1811     // type differs because a function may either return pointer to the buffer
1812     // directly or have to use intermediate buffer for translation.
1813
1814 #if wxUSE_UNICODE
1815
1816     // this is an optimization: even though using mb_str(wxConvLibc) does the
1817     // same thing (i.e. returns pointer to internal representation as locale is
1818     // always an UTF-8 one) in wxUSE_UTF8_LOCALE_ONLY case, we can avoid the
1819     // extra checks and the temporary buffer construction by providing a
1820     // separate mb_str() overload
1821 #if wxUSE_UTF8_LOCALE_ONLY
1822     const char* mb_str() const { return wx_str(); }
1823     const wxScopedCharBuffer mb_str(const wxMBConv& conv) const
1824     {
1825         return AsCharBuf(conv);
1826     }
1827 #else // !wxUSE_UTF8_LOCALE_ONLY
1828     const wxScopedCharBuffer mb_str(const wxMBConv& conv = wxConvLibc) const
1829     {
1830         return AsCharBuf(conv);
1831     }
1832 #endif // wxUSE_UTF8_LOCALE_ONLY/!wxUSE_UTF8_LOCALE_ONLY
1833
1834     const wxWX2MBbuf mbc_str() const { return mb_str(*wxConvCurrent); }
1835
1836 #if wxUSE_UNICODE_WCHAR
1837     const wchar_t* wc_str() const { return wx_str(); }
1838 #elif wxUSE_UNICODE_UTF8
1839     const wxScopedWCharBuffer wc_str() const
1840         { return AsWCharBuf(wxMBConvStrictUTF8()); }
1841 #endif
1842     // for compatibility with !wxUSE_UNICODE version
1843     const wxWX2WCbuf wc_str(const wxMBConv& WXUNUSED(conv)) const
1844       { return wc_str(); }
1845
1846 #if wxMBFILES
1847     const wxScopedCharBuffer fn_str() const { return mb_str(wxConvFile); }
1848 #else // !wxMBFILES
1849     const wxWX2WCbuf fn_str() const { return wc_str(); }
1850 #endif // wxMBFILES/!wxMBFILES
1851
1852 #else // ANSI
1853     const char* mb_str() const { return wx_str(); }
1854
1855     // for compatibility with wxUSE_UNICODE version
1856     const char* mb_str(const wxMBConv& WXUNUSED(conv)) const { return wx_str(); }
1857
1858     const wxWX2MBbuf mbc_str() const { return mb_str(); }
1859
1860     const wxScopedWCharBuffer wc_str(const wxMBConv& conv = wxConvLibc) const
1861         { return AsWCharBuf(conv); }
1862
1863     const wxScopedCharBuffer fn_str() const
1864         { return wxConvFile.cWC2WX( wc_str( wxConvLibc ) ); }
1865 #endif // Unicode/ANSI
1866
1867 #if wxUSE_UNICODE_UTF8
1868     const wxScopedWCharBuffer t_str() const { return wc_str(); }
1869 #elif wxUSE_UNICODE_WCHAR
1870     const wchar_t* t_str() const { return wx_str(); }
1871 #else
1872     const char* t_str() const { return wx_str(); }
1873 #endif
1874
1875
1876   // overloaded assignment
1877     // from another wxString
1878   wxString& operator=(const wxString& stringSrc)
1879   {
1880     if ( this != &stringSrc )
1881     {
1882         wxSTRING_INVALIDATE_CACHE();
1883
1884         m_impl = stringSrc.m_impl;
1885     }
1886
1887     return *this;
1888   }
1889
1890   wxString& operator=(const wxCStrData& cstr)
1891     { return *this = cstr.AsString(); }
1892     // from a character
1893   wxString& operator=(wxUniChar ch)
1894   {
1895     wxSTRING_INVALIDATE_CACHE();
1896
1897 #if wxUSE_UNICODE_UTF8
1898     if ( !ch.IsAscii() )
1899         m_impl = wxStringOperations::EncodeChar(ch);
1900     else
1901 #endif // wxUSE_UNICODE_UTF8
1902         m_impl = (wxStringCharType)ch;
1903     return *this;
1904   }
1905
1906   wxString& operator=(wxUniCharRef ch)
1907     { return operator=((wxUniChar)ch); }
1908   wxString& operator=(char ch)
1909     { return operator=(wxUniChar(ch)); }
1910   wxString& operator=(unsigned char ch)
1911     { return operator=(wxUniChar(ch)); }
1912   wxString& operator=(wchar_t ch)
1913     { return operator=(wxUniChar(ch)); }
1914     // from a C string - STL probably will crash on NULL,
1915     // so we need to compensate in that case
1916 #if wxUSE_STL_BASED_WXSTRING
1917   wxString& operator=(const char *psz)
1918   {
1919       wxSTRING_INVALIDATE_CACHE();
1920
1921       if ( psz )
1922           m_impl = ImplStr(psz);
1923       else
1924           clear();
1925
1926       return *this;
1927   }
1928
1929   wxString& operator=(const wchar_t *pwz)
1930   {
1931       wxSTRING_INVALIDATE_CACHE();
1932
1933       if ( pwz )
1934           m_impl = ImplStr(pwz);
1935       else
1936           clear();
1937
1938       return *this;
1939   }
1940 #else // !wxUSE_STL_BASED_WXSTRING
1941   wxString& operator=(const char *psz)
1942   {
1943       wxSTRING_INVALIDATE_CACHE();
1944
1945       m_impl = ImplStr(psz);
1946
1947       return *this;
1948   }
1949
1950   wxString& operator=(const wchar_t *pwz)
1951   {
1952       wxSTRING_INVALIDATE_CACHE();
1953
1954       m_impl = ImplStr(pwz);
1955
1956       return *this;
1957   }
1958 #endif // wxUSE_STL_BASED_WXSTRING/!wxUSE_STL_BASED_WXSTRING
1959
1960   wxString& operator=(const unsigned char *psz)
1961     { return operator=((const char*)psz); }
1962
1963     // from wxScopedWCharBuffer
1964   wxString& operator=(const wxScopedWCharBuffer& s)
1965     { return assign(s); }
1966     // from wxScopedCharBuffer
1967   wxString& operator=(const wxScopedCharBuffer& s)
1968     { return assign(s); }
1969
1970   // string concatenation
1971     // in place concatenation
1972     /*
1973         Concatenate and return the result. Note that the left to right
1974         associativity of << allows to write things like "str << str1 << str2
1975         << ..." (unlike with +=)
1976      */
1977       // string += string
1978   wxString& operator<<(const wxString& s)
1979   {
1980 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
1981     wxASSERT_MSG( s.IsValid(),
1982                   wxT("did you forget to call UngetWriteBuf()?") );
1983 #endif
1984
1985     append(s);
1986     return *this;
1987   }
1988       // string += C string
1989   wxString& operator<<(const char *psz)
1990     { append(psz); return *this; }
1991   wxString& operator<<(const wchar_t *pwz)
1992     { append(pwz); return *this; }
1993   wxString& operator<<(const wxCStrData& psz)
1994     { append(psz.AsString()); return *this; }
1995       // string += char
1996   wxString& operator<<(wxUniChar ch) { append(1, ch); return *this; }
1997   wxString& operator<<(wxUniCharRef ch) { append(1, ch); return *this; }
1998   wxString& operator<<(char ch) { append(1, ch); return *this; }
1999   wxString& operator<<(unsigned char ch) { append(1, ch); return *this; }
2000   wxString& operator<<(wchar_t ch) { append(1, ch); return *this; }
2001
2002       // string += buffer (i.e. from wxGetString)
2003   wxString& operator<<(const wxScopedWCharBuffer& s)
2004     { return append(s); }
2005   wxString& operator<<(const wxScopedCharBuffer& s)
2006     { return append(s); }
2007
2008     // string += C string
2009   wxString& Append(const wxString& s)
2010     {
2011         // test for empty() to share the string if possible
2012         if ( empty() )
2013             *this = s;
2014         else
2015             append(s);
2016         return *this;
2017     }
2018   wxString& Append(const char* psz)
2019     { append(psz); return *this; }
2020   wxString& Append(const wchar_t* pwz)
2021     { append(pwz); return *this; }
2022   wxString& Append(const wxCStrData& psz)
2023     { append(psz); return *this; }
2024   wxString& Append(const wxScopedCharBuffer& psz)
2025     { append(psz); return *this; }
2026   wxString& Append(const wxScopedWCharBuffer& psz)
2027     { append(psz); return *this; }
2028   wxString& Append(const char* psz, size_t nLen)
2029     { append(psz, nLen); return *this; }
2030   wxString& Append(const wchar_t* pwz, size_t nLen)
2031     { append(pwz, nLen); return *this; }
2032   wxString& Append(const wxCStrData& psz, size_t nLen)
2033     { append(psz, nLen); return *this; }
2034   wxString& Append(const wxScopedCharBuffer& psz, size_t nLen)
2035     { append(psz, nLen); return *this; }
2036   wxString& Append(const wxScopedWCharBuffer& psz, size_t nLen)
2037     { append(psz, nLen); return *this; }
2038     // append count copies of given character
2039   wxString& Append(wxUniChar ch, size_t count = 1u)
2040     { append(count, ch); return *this; }
2041   wxString& Append(wxUniCharRef ch, size_t count = 1u)
2042     { append(count, ch); return *this; }
2043   wxString& Append(char ch, size_t count = 1u)
2044     { append(count, ch); return *this; }
2045   wxString& Append(unsigned char ch, size_t count = 1u)
2046     { append(count, ch); return *this; }
2047   wxString& Append(wchar_t ch, size_t count = 1u)
2048     { append(count, ch); return *this; }
2049
2050     // prepend a string, return the string itself
2051   wxString& Prepend(const wxString& str)
2052     { *this = str + *this; return *this; }
2053
2054     // non-destructive concatenation
2055       // two strings
2056   friend wxString WXDLLIMPEXP_BASE operator+(const wxString& string1,
2057                                              const wxString& string2);
2058       // string with a single char
2059   friend wxString WXDLLIMPEXP_BASE operator+(const wxString& string, wxUniChar ch);
2060       // char with a string
2061   friend wxString WXDLLIMPEXP_BASE operator+(wxUniChar ch, const wxString& string);
2062       // string with C string
2063   friend wxString WXDLLIMPEXP_BASE operator+(const wxString& string,
2064                                              const char *psz);
2065   friend wxString WXDLLIMPEXP_BASE operator+(const wxString& string,
2066                                              const wchar_t *pwz);
2067       // C string with string
2068   friend wxString WXDLLIMPEXP_BASE operator+(const char *psz,
2069                                              const wxString& string);
2070   friend wxString WXDLLIMPEXP_BASE operator+(const wchar_t *pwz,
2071                                              const wxString& string);
2072
2073   // stream-like functions
2074       // insert an int into string
2075   wxString& operator<<(int i)
2076     { return (*this) << Format(wxT("%d"), i); }
2077       // insert an unsigned int into string
2078   wxString& operator<<(unsigned int ui)
2079     { return (*this) << Format(wxT("%u"), ui); }
2080       // insert a long into string
2081   wxString& operator<<(long l)
2082     { return (*this) << Format(wxT("%ld"), l); }
2083       // insert an unsigned long into string
2084   wxString& operator<<(unsigned long ul)
2085     { return (*this) << Format(wxT("%lu"), ul); }
2086 #ifdef wxHAS_LONG_LONG_T_DIFFERENT_FROM_LONG
2087       // insert a long long if they exist and aren't longs
2088   wxString& operator<<(wxLongLong_t ll)
2089     {
2090       return (*this) << Format("%" wxLongLongFmtSpec "d", ll);
2091     }
2092       // insert an unsigned long long
2093   wxString& operator<<(wxULongLong_t ull)
2094     {
2095       return (*this) << Format("%" wxLongLongFmtSpec "u" , ull);
2096     }
2097 #endif // wxHAS_LONG_LONG_T_DIFFERENT_FROM_LONG
2098       // insert a float into string
2099   wxString& operator<<(float f)
2100     { return (*this) << Format(wxT("%f"), f); }
2101       // insert a double into string
2102   wxString& operator<<(double d)
2103     { return (*this) << Format(wxT("%g"), d); }
2104
2105   // string comparison
2106     // case-sensitive comparison (returns a value < 0, = 0 or > 0)
2107   int Cmp(const char *psz) const
2108     { return compare(psz); }
2109   int Cmp(const wchar_t *pwz) const
2110     { return compare(pwz); }
2111   int Cmp(const wxString& s) const
2112     { return compare(s); }
2113   int Cmp(const wxCStrData& s) const
2114     { return compare(s); }
2115   int Cmp(const wxScopedCharBuffer& s) const
2116     { return compare(s); }
2117   int Cmp(const wxScopedWCharBuffer& s) const
2118     { return compare(s); }
2119     // same as Cmp() but not case-sensitive
2120   int CmpNoCase(const wxString& s) const;
2121
2122     // test for the string equality, either considering case or not
2123     // (if compareWithCase then the case matters)
2124   bool IsSameAs(const wxString& str, bool compareWithCase = true) const
2125   {
2126 #if !wxUSE_UNICODE_UTF8
2127       // in UTF-8 build, length() is O(n) and doing this would be _slower_
2128       if ( length() != str.length() )
2129           return false;
2130 #endif
2131       return (compareWithCase ? Cmp(str) : CmpNoCase(str)) == 0;
2132   }
2133   bool IsSameAs(const char *str, bool compareWithCase = true) const
2134     { return (compareWithCase ? Cmp(str) : CmpNoCase(str)) == 0; }
2135   bool IsSameAs(const wchar_t *str, bool compareWithCase = true) const
2136     { return (compareWithCase ? Cmp(str) : CmpNoCase(str)) == 0; }
2137
2138   bool IsSameAs(const wxCStrData& str, bool compareWithCase = true) const
2139     { return IsSameAs(str.AsString(), compareWithCase); }
2140   bool IsSameAs(const wxScopedCharBuffer& str, bool compareWithCase = true) const
2141     { return IsSameAs(str.data(), compareWithCase); }
2142   bool IsSameAs(const wxScopedWCharBuffer& str, bool compareWithCase = true) const
2143     { return IsSameAs(str.data(), compareWithCase); }
2144     // comparison with a single character: returns true if equal
2145   bool IsSameAs(wxUniChar c, bool compareWithCase = true) const;
2146   // FIXME-UTF8: remove these overloads
2147   bool IsSameAs(wxUniCharRef c, bool compareWithCase = true) const
2148     { return IsSameAs(wxUniChar(c), compareWithCase); }
2149   bool IsSameAs(char c, bool compareWithCase = true) const
2150     { return IsSameAs(wxUniChar(c), compareWithCase); }
2151   bool IsSameAs(unsigned char c, bool compareWithCase = true) const
2152     { return IsSameAs(wxUniChar(c), compareWithCase); }
2153   bool IsSameAs(wchar_t c, bool compareWithCase = true) const
2154     { return IsSameAs(wxUniChar(c), compareWithCase); }
2155   bool IsSameAs(int c, bool compareWithCase = true) const
2156     { return IsSameAs(wxUniChar(c), compareWithCase); }
2157
2158   // simple sub-string extraction
2159       // return substring starting at nFirst of length nCount (or till the end
2160       // if nCount = default value)
2161   wxString Mid(size_t nFirst, size_t nCount = npos) const;
2162
2163       // operator version of Mid()
2164   wxString  operator()(size_t start, size_t len) const
2165     { return Mid(start, len); }
2166
2167       // check if the string starts with the given prefix and return the rest
2168       // of the string in the provided pointer if it is not NULL; otherwise
2169       // return false
2170   bool StartsWith(const wxString& prefix, wxString *rest = NULL) const;
2171       // check if the string ends with the given suffix and return the
2172       // beginning of the string before the suffix in the provided pointer if
2173       // it is not NULL; otherwise return false
2174   bool EndsWith(const wxString& suffix, wxString *rest = NULL) const;
2175
2176       // get first nCount characters
2177   wxString Left(size_t nCount) const;
2178       // get last nCount characters
2179   wxString Right(size_t nCount) const;
2180       // get all characters before the first occurrence of ch
2181       // (returns the whole string if ch not found) and also put everything
2182       // following the first occurrence of ch into rest if it's non-NULL
2183   wxString BeforeFirst(wxUniChar ch, wxString *rest = NULL) const;
2184       // get all characters before the last occurrence of ch
2185       // (returns empty string if ch not found) and also put everything
2186       // following the last occurrence of ch into rest if it's non-NULL
2187   wxString BeforeLast(wxUniChar ch, wxString *rest = NULL) const;
2188       // get all characters after the first occurrence of ch
2189       // (returns empty string if ch not found)
2190   wxString AfterFirst(wxUniChar ch) const;
2191       // get all characters after the last occurrence of ch
2192       // (returns the whole string if ch not found)
2193   wxString AfterLast(wxUniChar ch) const;
2194
2195     // for compatibility only, use more explicitly named functions above
2196   wxString Before(wxUniChar ch) const { return BeforeLast(ch); }
2197   wxString After(wxUniChar ch) const { return AfterFirst(ch); }
2198
2199   // case conversion
2200       // convert to upper case in place, return the string itself
2201   wxString& MakeUpper();
2202       // convert to upper case, return the copy of the string
2203   wxString Upper() const { return wxString(*this).MakeUpper(); }
2204       // convert to lower case in place, return the string itself
2205   wxString& MakeLower();
2206       // convert to lower case, return the copy of the string
2207   wxString Lower() const { return wxString(*this).MakeLower(); }
2208       // convert the first character to the upper case and the rest to the
2209       // lower one, return the modified string itself
2210   wxString& MakeCapitalized();
2211       // convert the first character to the upper case and the rest to the
2212       // lower one, return the copy of the string
2213   wxString Capitalize() const { return wxString(*this).MakeCapitalized(); }
2214
2215   // trimming/padding whitespace (either side) and truncating
2216       // remove spaces from left or from right (default) side
2217   wxString& Trim(bool bFromRight = true);
2218       // add nCount copies chPad in the beginning or at the end (default)
2219   wxString& Pad(size_t nCount, wxUniChar chPad = wxT(' '), bool bFromRight = true);
2220
2221   // searching and replacing
2222       // searching (return starting index, or -1 if not found)
2223   int Find(wxUniChar ch, bool bFromEnd = false) const;   // like strchr/strrchr
2224   int Find(wxUniCharRef ch, bool bFromEnd = false) const
2225     { return Find(wxUniChar(ch), bFromEnd); }
2226   int Find(char ch, bool bFromEnd = false) const
2227     { return Find(wxUniChar(ch), bFromEnd); }
2228   int Find(unsigned char ch, bool bFromEnd = false) const
2229     { return Find(wxUniChar(ch), bFromEnd); }
2230   int Find(wchar_t ch, bool bFromEnd = false) const
2231     { return Find(wxUniChar(ch), bFromEnd); }
2232       // searching (return starting index, or -1 if not found)
2233   int Find(const wxString& sub) const               // like strstr
2234   {
2235     size_type idx = find(sub);
2236     return (idx == npos) ? wxNOT_FOUND : (int)idx;
2237   }
2238   int Find(const char *sub) const               // like strstr
2239   {
2240     size_type idx = find(sub);
2241     return (idx == npos) ? wxNOT_FOUND : (int)idx;
2242   }
2243   int Find(const wchar_t *sub) const               // like strstr
2244   {
2245     size_type idx = find(sub);
2246     return (idx == npos) ? wxNOT_FOUND : (int)idx;
2247   }
2248
2249   int Find(const wxCStrData& sub) const
2250     { return Find(sub.AsString()); }
2251   int Find(const wxScopedCharBuffer& sub) const
2252     { return Find(sub.data()); }
2253   int Find(const wxScopedWCharBuffer& sub) const
2254     { return Find(sub.data()); }
2255
2256       // replace first (or all of bReplaceAll) occurrences of substring with
2257       // another string, returns the number of replacements made
2258   size_t Replace(const wxString& strOld,
2259                  const wxString& strNew,
2260                  bool bReplaceAll = true);
2261
2262     // check if the string contents matches a mask containing '*' and '?'
2263   bool Matches(const wxString& mask) const;
2264
2265   // conversion to numbers: all functions return true only if the whole
2266   // string is a number and put the value of this number into the pointer
2267   // provided, the base is the numeric base in which the conversion should be
2268   // done and must be comprised between 2 and 36 or be 0 in which case the
2269   // standard C rules apply (leading '0' => octal, "0x" => hex)
2270       // convert to a signed integer
2271   bool ToLong(long *val, int base = 10) const;
2272       // convert to an unsigned integer
2273   bool ToULong(unsigned long *val, int base = 10) const;
2274       // convert to wxLongLong
2275 #if defined(wxLongLong_t)
2276   bool ToLongLong(wxLongLong_t *val, int base = 10) const;
2277       // convert to wxULongLong
2278   bool ToULongLong(wxULongLong_t *val, int base = 10) const;
2279 #endif // wxLongLong_t
2280       // convert to a double
2281   bool ToDouble(double *val) const;
2282
2283   // conversions to numbers using C locale
2284       // convert to a signed integer
2285   bool ToCLong(long *val, int base = 10) const;
2286       // convert to an unsigned integer
2287   bool ToCULong(unsigned long *val, int base = 10) const;
2288       // convert to a double
2289   bool ToCDouble(double *val) const;
2290
2291   // create a string representing the given floating point number with the
2292   // default (like %g) or fixed (if precision >=0) precision
2293     // in the current locale
2294   static wxString FromDouble(double val, int precision = -1);
2295     // in C locale
2296   static wxString FromCDouble(double val, int precision = -1);
2297
2298 #ifndef wxNEEDS_WXSTRING_PRINTF_MIXIN
2299   // formatted input/output
2300     // as sprintf(), returns the number of characters written or < 0 on error
2301     // (take 'this' into account in attribute parameter count)
2302   // int Printf(const wxString& format, ...);
2303   WX_DEFINE_VARARG_FUNC(int, Printf, 1, (const wxFormatString&),
2304                         DoPrintfWchar, DoPrintfUtf8)
2305 #ifdef __WATCOMC__
2306   // workaround for http://bugzilla.openwatcom.org/show_bug.cgi?id=351
2307   WX_VARARG_WATCOM_WORKAROUND(int, Printf, 1, (const wxString&),
2308                               (wxFormatString(f1)));
2309   WX_VARARG_WATCOM_WORKAROUND(int, Printf, 1, (const wxCStrData&),
2310                               (wxFormatString(f1)));
2311   WX_VARARG_WATCOM_WORKAROUND(int, Printf, 1, (const char*),
2312                               (wxFormatString(f1)));
2313   WX_VARARG_WATCOM_WORKAROUND(int, Printf, 1, (const wchar_t*),
2314                               (wxFormatString(f1)));
2315 #endif
2316 #endif // !wxNEEDS_WXSTRING_PRINTF_MIXIN
2317     // as vprintf(), returns the number of characters written or < 0 on error
2318   int PrintfV(const wxString& format, va_list argptr);
2319
2320 #ifndef wxNEEDS_WXSTRING_PRINTF_MIXIN
2321     // returns the string containing the result of Printf() to it
2322   // static wxString Format(const wxString& format, ...) WX_ATTRIBUTE_PRINTF_1;
2323   WX_DEFINE_VARARG_FUNC(static wxString, Format, 1, (const wxFormatString&),
2324                         DoFormatWchar, DoFormatUtf8)
2325 #ifdef __WATCOMC__
2326   // workaround for http://bugzilla.openwatcom.org/show_bug.cgi?id=351
2327   WX_VARARG_WATCOM_WORKAROUND(static wxString, Format, 1, (const wxString&),
2328                               (wxFormatString(f1)));
2329   WX_VARARG_WATCOM_WORKAROUND(static wxString, Format, 1, (const wxCStrData&),
2330                               (wxFormatString(f1)));
2331   WX_VARARG_WATCOM_WORKAROUND(static wxString, Format, 1, (const char*),
2332                               (wxFormatString(f1)));
2333   WX_VARARG_WATCOM_WORKAROUND(static wxString, Format, 1, (const wchar_t*),
2334                               (wxFormatString(f1)));
2335 #endif
2336 #endif
2337     // the same as above, but takes a va_list
2338   static wxString FormatV(const wxString& format, va_list argptr);
2339
2340   // raw access to string memory
2341     // ensure that string has space for at least nLen characters
2342     // only works if the data of this string is not shared
2343   bool Alloc(size_t nLen) { reserve(nLen); return capacity() >= nLen; }
2344     // minimize the string's memory
2345     // only works if the data of this string is not shared
2346   bool Shrink();
2347 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
2348     // These are deprecated, use wxStringBuffer or wxStringBufferLength instead
2349     //
2350     // get writable buffer of at least nLen bytes. Unget() *must* be called
2351     // a.s.a.p. to put string back in a reasonable state!
2352   wxDEPRECATED( wxStringCharType *GetWriteBuf(size_t nLen) );
2353     // call this immediately after GetWriteBuf() has been used
2354   wxDEPRECATED( void UngetWriteBuf() );
2355   wxDEPRECATED( void UngetWriteBuf(size_t nLen) );
2356 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && wxUSE_UNICODE_UTF8
2357
2358   // wxWidgets version 1 compatibility functions
2359
2360   // use Mid()
2361   wxString SubString(size_t from, size_t to) const
2362       { return Mid(from, (to - from + 1)); }
2363     // values for second parameter of CompareTo function
2364   enum caseCompare {exact, ignoreCase};
2365     // values for first parameter of Strip function
2366   enum stripType {leading = 0x1, trailing = 0x2, both = 0x3};
2367
2368 #ifndef wxNEEDS_WXSTRING_PRINTF_MIXIN
2369   // use Printf()
2370   // (take 'this' into account in attribute parameter count)
2371   // int sprintf(const wxString& format, ...) WX_ATTRIBUTE_PRINTF_2;
2372   WX_DEFINE_VARARG_FUNC(int, sprintf, 1, (const wxFormatString&),
2373                         DoPrintfWchar, DoPrintfUtf8)
2374 #ifdef __WATCOMC__
2375   // workaround for http://bugzilla.openwatcom.org/show_bug.cgi?id=351
2376   WX_VARARG_WATCOM_WORKAROUND(int, sprintf, 1, (const wxString&),
2377                               (wxFormatString(f1)));
2378   WX_VARARG_WATCOM_WORKAROUND(int, sprintf, 1, (const wxCStrData&),
2379                               (wxFormatString(f1)));
2380   WX_VARARG_WATCOM_WORKAROUND(int, sprintf, 1, (const char*),
2381                               (wxFormatString(f1)));
2382   WX_VARARG_WATCOM_WORKAROUND(int, sprintf, 1, (const wchar_t*),
2383                               (wxFormatString(f1)));
2384 #endif
2385 #endif // wxNEEDS_WXSTRING_PRINTF_MIXIN
2386
2387     // use Cmp()
2388   int CompareTo(const wxChar* psz, caseCompare cmp = exact) const
2389     { return cmp == exact ? Cmp(psz) : CmpNoCase(psz); }
2390
2391     // use length()
2392   size_t Length() const { return length(); }
2393     // Count the number of characters
2394   int Freq(wxUniChar ch) const;
2395     // use MakeLower
2396   void LowerCase() { MakeLower(); }
2397     // use MakeUpper
2398   void UpperCase() { MakeUpper(); }
2399     // use Trim except that it doesn't change this string
2400   wxString Strip(stripType w = trailing) const;
2401
2402     // use Find (more general variants not yet supported)
2403   size_t Index(const wxChar* psz) const { return Find(psz); }
2404   size_t Index(wxUniChar ch)         const { return Find(ch);  }
2405     // use Truncate
2406   wxString& Remove(size_t pos) { return Truncate(pos); }
2407   wxString& RemoveLast(size_t n = 1) { return Truncate(length() - n); }
2408
2409   wxString& Remove(size_t nStart, size_t nLen)
2410       { return (wxString&)erase( nStart, nLen ); }
2411
2412     // use Find()
2413   int First( wxUniChar ch ) const { return Find(ch); }
2414   int First( wxUniCharRef ch ) const { return Find(ch); }
2415   int First( char ch ) const { return Find(ch); }
2416   int First( unsigned char ch ) const { return Find(ch); }
2417   int First( wchar_t ch ) const { return Find(ch); }
2418   int First( const wxString& str ) const { return Find(str); }
2419   int Last( wxUniChar ch ) const { return Find(ch, true); }
2420   bool Contains(const wxString& str) const { return Find(str) != wxNOT_FOUND; }
2421
2422     // use empty()
2423   bool IsNull() const { return empty(); }
2424
2425   // std::string compatibility functions
2426
2427     // take nLen chars starting at nPos
2428   wxString(const wxString& str, size_t nPos, size_t nLen)
2429       { assign(str, nPos, nLen); }
2430     // take all characters from first to last
2431   wxString(const_iterator first, const_iterator last)
2432       : m_impl(first.impl(), last.impl()) { }
2433 #if WXWIN_COMPATIBILITY_STRING_PTR_AS_ITER
2434     // the 2 overloads below are for compatibility with the existing code using
2435     // pointers instead of iterators
2436   wxString(const char *first, const char *last)
2437   {
2438       SubstrBufFromMB str(ImplStr(first, last - first));
2439       m_impl.assign(str.data, str.len);
2440   }
2441   wxString(const wchar_t *first, const wchar_t *last)
2442   {
2443       SubstrBufFromWC str(ImplStr(first, last - first));
2444       m_impl.assign(str.data, str.len);
2445   }
2446     // and this one is needed to compile code adding offsets to c_str() result
2447   wxString(const wxCStrData& first, const wxCStrData& last)
2448       : m_impl(CreateConstIterator(first).impl(),
2449                CreateConstIterator(last).impl())
2450   {
2451       wxASSERT_MSG( first.m_str == last.m_str,
2452                     wxT("pointers must be into the same string") );
2453   }
2454 #endif // WXWIN_COMPATIBILITY_STRING_PTR_AS_ITER
2455
2456   // lib.string.modifiers
2457     // append elements str[pos], ..., str[pos+n]
2458   wxString& append(const wxString& str, size_t pos, size_t n)
2459   {
2460       wxSTRING_UPDATE_CACHED_LENGTH(n);
2461
2462       size_t from, len;
2463       str.PosLenToImpl(pos, n, &from, &len);
2464       m_impl.append(str.m_impl, from, len);
2465       return *this;
2466   }
2467     // append a string
2468   wxString& append(const wxString& str)
2469   {
2470       wxSTRING_UPDATE_CACHED_LENGTH(str.length());
2471
2472       m_impl.append(str.m_impl);
2473       return *this;
2474   }
2475
2476     // append first n (or all if n == npos) characters of sz
2477   wxString& append(const char *sz)
2478   {
2479       wxSTRING_INVALIDATE_CACHED_LENGTH();
2480
2481       m_impl.append(ImplStr(sz));
2482       return *this;
2483   }
2484
2485   wxString& append(const wchar_t *sz)
2486   {
2487       wxSTRING_INVALIDATE_CACHED_LENGTH();
2488
2489       m_impl.append(ImplStr(sz));
2490       return *this;
2491   }
2492
2493   wxString& append(const char *sz, size_t n)
2494   {
2495       wxSTRING_INVALIDATE_CACHED_LENGTH();
2496
2497       SubstrBufFromMB str(ImplStr(sz, n));
2498       m_impl.append(str.data, str.len);
2499       return *this;
2500   }
2501   wxString& append(const wchar_t *sz, size_t n)
2502   {
2503       wxSTRING_UPDATE_CACHED_LENGTH(n);
2504
2505       SubstrBufFromWC str(ImplStr(sz, n));
2506       m_impl.append(str.data, str.len);
2507       return *this;
2508   }
2509
2510   wxString& append(const wxCStrData& str)
2511     { return append(str.AsString()); }
2512   wxString& append(const wxScopedCharBuffer& str)
2513     { return append(str.data(), str.length()); }
2514   wxString& append(const wxScopedWCharBuffer& str)
2515     { return append(str.data(), str.length()); }
2516   wxString& append(const wxCStrData& str, size_t n)
2517     { return append(str.AsString(), 0, n); }
2518   wxString& append(const wxScopedCharBuffer& str, size_t n)
2519     { return append(str.data(), n); }
2520   wxString& append(const wxScopedWCharBuffer& str, size_t n)
2521     { return append(str.data(), n); }
2522
2523     // append n copies of ch
2524   wxString& append(size_t n, wxUniChar ch)
2525   {
2526 #if wxUSE_UNICODE_UTF8
2527       if ( !ch.IsAscii() )
2528       {
2529           wxSTRING_INVALIDATE_CACHED_LENGTH();
2530
2531           m_impl.append(wxStringOperations::EncodeNChars(n, ch));
2532       }
2533       else // ASCII
2534 #endif
2535       {
2536           wxSTRING_UPDATE_CACHED_LENGTH(n);
2537
2538           m_impl.append(n, (wxStringCharType)ch);
2539       }
2540
2541       return *this;
2542   }
2543
2544   wxString& append(size_t n, wxUniCharRef ch)
2545     { return append(n, wxUniChar(ch)); }
2546   wxString& append(size_t n, char ch)
2547     { return append(n, wxUniChar(ch)); }
2548   wxString& append(size_t n, unsigned char ch)
2549     { return append(n, wxUniChar(ch)); }
2550   wxString& append(size_t n, wchar_t ch)
2551     { return append(n, wxUniChar(ch)); }
2552
2553     // append from first to last
2554   wxString& append(const_iterator first, const_iterator last)
2555   {
2556       wxSTRING_INVALIDATE_CACHED_LENGTH();
2557
2558       m_impl.append(first.impl(), last.impl());
2559       return *this;
2560   }
2561 #if WXWIN_COMPATIBILITY_STRING_PTR_AS_ITER
2562   wxString& append(const char *first, const char *last)
2563     { return append(first, last - first); }
2564   wxString& append(const wchar_t *first, const wchar_t *last)
2565     { return append(first, last - first); }
2566   wxString& append(const wxCStrData& first, const wxCStrData& last)
2567     { return append(CreateConstIterator(first), CreateConstIterator(last)); }
2568 #endif // WXWIN_COMPATIBILITY_STRING_PTR_AS_ITER
2569
2570     // same as `this_string = str'
2571   wxString& assign(const wxString& str)
2572   {
2573       wxSTRING_SET_CACHED_LENGTH(str.length());
2574
2575       m_impl = str.m_impl;
2576
2577       return *this;
2578   }
2579
2580     // This is a non-standard-compliant overload taking the first "len"
2581     // characters of the source string.
2582   wxString& assign(const wxString& str, size_t len)
2583   {
2584 #if wxUSE_STRING_POS_CACHE
2585       // It is legal to pass len > str.length() to wxStringImpl::assign() but
2586       // by restricting it here we save some work for that function so it's not
2587       // really less efficient and, at the same time, ensure that we don't
2588       // cache invalid length.
2589       const size_t lenSrc = str.length();
2590       if ( len > lenSrc )
2591           len = lenSrc;
2592
2593       wxSTRING_SET_CACHED_LENGTH(len);
2594 #endif // wxUSE_STRING_POS_CACHE
2595
2596       m_impl.assign(str.m_impl, 0, str.LenToImpl(len));
2597
2598       return *this;
2599   }
2600
2601     // same as ` = str[pos..pos + n]
2602   wxString& assign(const wxString& str, size_t pos, size_t n)
2603   {
2604       size_t from, len;
2605       str.PosLenToImpl(pos, n, &from, &len);
2606       m_impl.assign(str.m_impl, from, len);
2607
2608       // it's important to call this after PosLenToImpl() above in case str is
2609       // the same string as this one
2610       wxSTRING_SET_CACHED_LENGTH(n);
2611
2612       return *this;
2613   }
2614
2615     // same as `= first n (or all if n == npos) characters of sz'
2616   wxString& assign(const char *sz)
2617   {
2618       wxSTRING_INVALIDATE_CACHE();
2619
2620       m_impl.assign(ImplStr(sz));
2621
2622       return *this;
2623   }
2624
2625   wxString& assign(const wchar_t *sz)
2626   {
2627       wxSTRING_INVALIDATE_CACHE();
2628
2629       m_impl.assign(ImplStr(sz));
2630
2631       return *this;
2632   }
2633
2634   wxString& assign(const char *sz, size_t n)
2635   {
2636       wxSTRING_INVALIDATE_CACHE();
2637
2638       SubstrBufFromMB str(ImplStr(sz, n));
2639       m_impl.assign(str.data, str.len);
2640
2641       return *this;
2642   }
2643
2644   wxString& assign(const wchar_t *sz, size_t n)
2645   {
2646       wxSTRING_SET_CACHED_LENGTH(n);
2647
2648       SubstrBufFromWC str(ImplStr(sz, n));
2649       m_impl.assign(str.data, str.len);
2650
2651       return *this;
2652   }
2653
2654   wxString& assign(const wxCStrData& str)
2655     { return assign(str.AsString()); }
2656   wxString& assign(const wxScopedCharBuffer& str)
2657     { return assign(str.data(), str.length()); }
2658   wxString& assign(const wxScopedWCharBuffer& str)
2659     { return assign(str.data(), str.length()); }
2660   wxString& assign(const wxCStrData& str, size_t len)
2661     { return assign(str.AsString(), len); }
2662   wxString& assign(const wxScopedCharBuffer& str, size_t len)
2663     { return assign(str.data(), len); }
2664   wxString& assign(const wxScopedWCharBuffer& str, size_t len)
2665     { return assign(str.data(), len); }
2666
2667     // same as `= n copies of ch'
2668   wxString& assign(size_t n, wxUniChar ch)
2669   {
2670       wxSTRING_SET_CACHED_LENGTH(n);
2671
2672 #if wxUSE_UNICODE_UTF8
2673       if ( !ch.IsAscii() )
2674           m_impl.assign(wxStringOperations::EncodeNChars(n, ch));
2675       else
2676 #endif
2677           m_impl.assign(n, (wxStringCharType)ch);
2678
2679       return *this;
2680   }
2681
2682   wxString& assign(size_t n, wxUniCharRef ch)
2683     { return assign(n, wxUniChar(ch)); }
2684   wxString& assign(size_t n, char ch)
2685     { return assign(n, wxUniChar(ch)); }
2686   wxString& assign(size_t n, unsigned char ch)
2687     { return assign(n, wxUniChar(ch)); }
2688   wxString& assign(size_t n, wchar_t ch)
2689     { return assign(n, wxUniChar(ch)); }
2690
2691     // assign from first to last
2692   wxString& assign(const_iterator first, const_iterator last)
2693   {
2694       wxSTRING_INVALIDATE_CACHE();
2695
2696       m_impl.assign(first.impl(), last.impl());
2697
2698       return *this;
2699   }
2700 #if WXWIN_COMPATIBILITY_STRING_PTR_AS_ITER
2701   wxString& assign(const char *first, const char *last)
2702     { return assign(first, last - first); }
2703   wxString& assign(const wchar_t *first, const wchar_t *last)
2704     { return assign(first, last - first); }
2705   wxString& assign(const wxCStrData& first, const wxCStrData& last)
2706     { return assign(CreateConstIterator(first), CreateConstIterator(last)); }
2707 #endif // WXWIN_COMPATIBILITY_STRING_PTR_AS_ITER
2708
2709     // string comparison
2710   int compare(const wxString& str) const;
2711   int compare(const char* sz) const;
2712   int compare(const wchar_t* sz) const;
2713   int compare(const wxCStrData& str) const
2714     { return compare(str.AsString()); }
2715   int compare(const wxScopedCharBuffer& str) const
2716     { return compare(str.data()); }
2717   int compare(const wxScopedWCharBuffer& str) const
2718     { return compare(str.data()); }
2719     // comparison with a substring
2720   int compare(size_t nStart, size_t nLen, const wxString& str) const;
2721     // comparison of 2 substrings
2722   int compare(size_t nStart, size_t nLen,
2723               const wxString& str, size_t nStart2, size_t nLen2) const;
2724     // substring comparison with first nCount characters of sz
2725   int compare(size_t nStart, size_t nLen,
2726               const char* sz, size_t nCount = npos) const;
2727   int compare(size_t nStart, size_t nLen,
2728               const wchar_t* sz, size_t nCount = npos) const;
2729
2730     // insert another string
2731   wxString& insert(size_t nPos, const wxString& str)
2732     { insert(GetIterForNthChar(nPos), str.begin(), str.end()); return *this; }
2733     // insert n chars of str starting at nStart (in str)
2734   wxString& insert(size_t nPos, const wxString& str, size_t nStart, size_t n)
2735   {
2736       wxSTRING_UPDATE_CACHED_LENGTH(n);
2737
2738       size_t from, len;
2739       str.PosLenToImpl(nStart, n, &from, &len);
2740       m_impl.insert(PosToImpl(nPos), str.m_impl, from, len);
2741
2742       return *this;
2743   }
2744
2745     // insert first n (or all if n == npos) characters of sz
2746   wxString& insert(size_t nPos, const char *sz)
2747   {
2748       wxSTRING_INVALIDATE_CACHE();
2749
2750       m_impl.insert(PosToImpl(nPos), ImplStr(sz));
2751
2752       return *this;
2753   }
2754
2755   wxString& insert(size_t nPos, const wchar_t *sz)
2756   {
2757       wxSTRING_INVALIDATE_CACHE();
2758
2759       m_impl.insert(PosToImpl(nPos), ImplStr(sz)); return *this;
2760   }
2761
2762   wxString& insert(size_t nPos, const char *sz, size_t n)
2763   {
2764       wxSTRING_UPDATE_CACHED_LENGTH(n);
2765
2766       SubstrBufFromMB str(ImplStr(sz, n));
2767       m_impl.insert(PosToImpl(nPos), str.data, str.len);
2768
2769       return *this;
2770   }
2771
2772   wxString& insert(size_t nPos, const wchar_t *sz, size_t n)
2773   {
2774       wxSTRING_UPDATE_CACHED_LENGTH(n);
2775
2776       SubstrBufFromWC str(ImplStr(sz, n));
2777       m_impl.insert(PosToImpl(nPos), str.data, str.len);
2778
2779       return *this;
2780   }
2781
2782     // insert n copies of ch
2783   wxString& insert(size_t nPos, size_t n, wxUniChar ch)
2784   {
2785       wxSTRING_UPDATE_CACHED_LENGTH(n);
2786
2787 #if wxUSE_UNICODE_UTF8
2788       if ( !ch.IsAscii() )
2789           m_impl.insert(PosToImpl(nPos), wxStringOperations::EncodeNChars(n, ch));
2790       else
2791 #endif
2792           m_impl.insert(PosToImpl(nPos), n, (wxStringCharType)ch);
2793       return *this;
2794   }
2795
2796   iterator insert(iterator it, wxUniChar ch)
2797   {
2798       wxSTRING_UPDATE_CACHED_LENGTH(1);
2799
2800 #if wxUSE_UNICODE_UTF8
2801       if ( !ch.IsAscii() )
2802       {
2803           size_t pos = IterToImplPos(it);
2804           m_impl.insert(pos, wxStringOperations::EncodeChar(ch));
2805           return iterator(this, m_impl.begin() + pos);
2806       }
2807       else
2808 #endif
2809           return iterator(this, m_impl.insert(it.impl(), (wxStringCharType)ch));
2810   }
2811
2812   void insert(iterator it, const_iterator first, const_iterator last)
2813   {
2814       wxSTRING_INVALIDATE_CACHE();
2815
2816       m_impl.insert(it.impl(), first.impl(), last.impl());
2817   }
2818
2819 #if WXWIN_COMPATIBILITY_STRING_PTR_AS_ITER
2820   void insert(iterator it, const char *first, const char *last)
2821     { insert(it - begin(), first, last - first); }
2822   void insert(iterator it, const wchar_t *first, const wchar_t *last)
2823     { insert(it - begin(), first, last - first); }
2824   void insert(iterator it, const wxCStrData& first, const wxCStrData& last)
2825     { insert(it, CreateConstIterator(first), CreateConstIterator(last)); }
2826 #endif // WXWIN_COMPATIBILITY_STRING_PTR_AS_ITER
2827
2828   void insert(iterator it, size_type n, wxUniChar ch)
2829   {
2830       wxSTRING_UPDATE_CACHED_LENGTH(n);
2831
2832 #if wxUSE_UNICODE_UTF8
2833       if ( !ch.IsAscii() )
2834           m_impl.insert(IterToImplPos(it), wxStringOperations::EncodeNChars(n, ch));
2835       else
2836 #endif
2837           m_impl.insert(it.impl(), n, (wxStringCharType)ch);
2838   }
2839
2840     // delete characters from nStart to nStart + nLen
2841   wxString& erase(size_type pos = 0, size_type n = npos)
2842   {
2843       wxSTRING_INVALIDATE_CACHE();
2844
2845       size_t from, len;
2846       PosLenToImpl(pos, n, &from, &len);
2847       m_impl.erase(from, len);
2848
2849       return *this;
2850   }
2851
2852     // delete characters from first up to last
2853   iterator erase(iterator first, iterator last)
2854   {
2855       wxSTRING_INVALIDATE_CACHE();
2856
2857       return iterator(this, m_impl.erase(first.impl(), last.impl()));
2858   }
2859
2860   iterator erase(iterator first)
2861   {
2862       wxSTRING_UPDATE_CACHED_LENGTH(-1);
2863
2864       return iterator(this, m_impl.erase(first.impl()));
2865   }
2866
2867 #ifdef wxSTRING_BASE_HASNT_CLEAR
2868   void clear() { erase(); }
2869 #else
2870   void clear()
2871   {
2872       wxSTRING_SET_CACHED_LENGTH(0);
2873
2874       m_impl.clear();
2875   }
2876 #endif
2877
2878     // replaces the substring of length nLen starting at nStart
2879   wxString& replace(size_t nStart, size_t nLen, const char* sz)
2880   {
2881       wxSTRING_INVALIDATE_CACHE();
2882
2883       size_t from, len;
2884       PosLenToImpl(nStart, nLen, &from, &len);
2885       m_impl.replace(from, len, ImplStr(sz));
2886
2887       return *this;
2888   }
2889
2890   wxString& replace(size_t nStart, size_t nLen, const wchar_t* sz)
2891   {
2892       wxSTRING_INVALIDATE_CACHE();
2893
2894       size_t from, len;
2895       PosLenToImpl(nStart, nLen, &from, &len);
2896       m_impl.replace(from, len, ImplStr(sz));
2897
2898       return *this;
2899   }
2900
2901     // replaces the substring of length nLen starting at nStart
2902   wxString& replace(size_t nStart, size_t nLen, const wxString& str)
2903   {
2904       wxSTRING_INVALIDATE_CACHE();
2905
2906       size_t from, len;
2907       PosLenToImpl(nStart, nLen, &from, &len);
2908       m_impl.replace(from, len, str.m_impl);
2909
2910       return *this;
2911   }
2912
2913     // replaces the substring with nCount copies of ch
2914   wxString& replace(size_t nStart, size_t nLen, size_t nCount, wxUniChar ch)
2915   {
2916       wxSTRING_INVALIDATE_CACHE();
2917
2918       size_t from, len;
2919       PosLenToImpl(nStart, nLen, &from, &len);
2920 #if wxUSE_UNICODE_UTF8
2921       if ( !ch.IsAscii() )
2922           m_impl.replace(from, len, wxStringOperations::EncodeNChars(nCount, ch));
2923       else
2924 #endif
2925           m_impl.replace(from, len, nCount, (wxStringCharType)ch);
2926
2927       return *this;
2928   }
2929
2930     // replaces a substring with another substring
2931   wxString& replace(size_t nStart, size_t nLen,
2932                     const wxString& str, size_t nStart2, size_t nLen2)
2933   {
2934       wxSTRING_INVALIDATE_CACHE();
2935
2936       size_t from, len;
2937       PosLenToImpl(nStart, nLen, &from, &len);
2938
2939       size_t from2, len2;
2940       str.PosLenToImpl(nStart2, nLen2, &from2, &len2);
2941
2942       m_impl.replace(from, len, str.m_impl, from2, len2);
2943
2944       return *this;
2945   }
2946
2947      // replaces the substring with first nCount chars of sz
2948   wxString& replace(size_t nStart, size_t nLen,
2949                     const char* sz, size_t nCount)
2950   {
2951       wxSTRING_INVALIDATE_CACHE();
2952
2953       size_t from, len;
2954       PosLenToImpl(nStart, nLen, &from, &len);
2955
2956       SubstrBufFromMB str(ImplStr(sz, nCount));
2957
2958       m_impl.replace(from, len, str.data, str.len);
2959
2960       return *this;
2961   }
2962
2963   wxString& replace(size_t nStart, size_t nLen,
2964                     const wchar_t* sz, size_t nCount)
2965   {
2966       wxSTRING_INVALIDATE_CACHE();
2967
2968       size_t from, len;
2969       PosLenToImpl(nStart, nLen, &from, &len);
2970
2971       SubstrBufFromWC str(ImplStr(sz, nCount));
2972
2973       m_impl.replace(from, len, str.data, str.len);
2974
2975       return *this;
2976   }
2977
2978   wxString& replace(size_t nStart, size_t nLen,
2979                     const wxString& s, size_t nCount)
2980   {
2981       wxSTRING_INVALIDATE_CACHE();
2982
2983       size_t from, len;
2984       PosLenToImpl(nStart, nLen, &from, &len);
2985       m_impl.replace(from, len, s.m_impl.c_str(), s.LenToImpl(nCount));
2986
2987       return *this;
2988   }
2989
2990   wxString& replace(iterator first, iterator last, const char* s)
2991   {
2992       wxSTRING_INVALIDATE_CACHE();
2993
2994       m_impl.replace(first.impl(), last.impl(), ImplStr(s));
2995
2996       return *this;
2997   }
2998
2999   wxString& replace(iterator first, iterator last, const wchar_t* s)
3000   {
3001       wxSTRING_INVALIDATE_CACHE();
3002
3003       m_impl.replace(first.impl(), last.impl(), ImplStr(s));
3004
3005       return *this;
3006   }
3007
3008   wxString& replace(iterator first, iterator last, const char* s, size_type n)
3009   {
3010       wxSTRING_INVALIDATE_CACHE();
3011
3012       SubstrBufFromMB str(ImplStr(s, n));
3013       m_impl.replace(first.impl(), last.impl(), str.data, str.len);
3014
3015       return *this;
3016   }
3017
3018   wxString& replace(iterator first, iterator last, const wchar_t* s, size_type n)
3019   {
3020       wxSTRING_INVALIDATE_CACHE();
3021
3022       SubstrBufFromWC str(ImplStr(s, n));
3023       m_impl.replace(first.impl(), last.impl(), str.data, str.len);
3024
3025       return *this;
3026   }
3027
3028   wxString& replace(iterator first, iterator last, const wxString& s)
3029   {
3030       wxSTRING_INVALIDATE_CACHE();
3031
3032       m_impl.replace(first.impl(), last.impl(), s.m_impl);
3033
3034       return *this;
3035   }
3036
3037   wxString& replace(iterator first, iterator last, size_type n, wxUniChar ch)
3038   {
3039       wxSTRING_INVALIDATE_CACHE();
3040
3041 #if wxUSE_UNICODE_UTF8
3042       if ( !ch.IsAscii() )
3043           m_impl.replace(first.impl(), last.impl(),
3044                   wxStringOperations::EncodeNChars(n, ch));
3045       else
3046 #endif
3047           m_impl.replace(first.impl(), last.impl(), n, (wxStringCharType)ch);
3048
3049       return *this;
3050   }
3051
3052   wxString& replace(iterator first, iterator last,
3053                     const_iterator first1, const_iterator last1)
3054   {
3055       wxSTRING_INVALIDATE_CACHE();
3056
3057       m_impl.replace(first.impl(), last.impl(), first1.impl(), last1.impl());
3058
3059       return *this;
3060   }
3061
3062   wxString& replace(iterator first, iterator last,
3063                     const char *first1, const char *last1)
3064     { replace(first, last, first1, last1 - first1); return *this; }
3065   wxString& replace(iterator first, iterator last,
3066                     const wchar_t *first1, const wchar_t *last1)
3067     { replace(first, last, first1, last1 - first1); return *this; }
3068
3069   // swap two strings
3070   void swap(wxString& str)
3071   {
3072 #if wxUSE_STRING_POS_CACHE
3073       // we modify not only this string but also the other one directly so we
3074       // need to invalidate cache for both of them (we could also try to
3075       // exchange their cache entries but it seems unlikely to be worth it)
3076       InvalidateCache();
3077       str.InvalidateCache();
3078 #endif // wxUSE_STRING_POS_CACHE
3079
3080       m_impl.swap(str.m_impl);
3081   }
3082
3083     // find a substring
3084   size_t find(const wxString& str, size_t nStart = 0) const
3085     { return PosFromImpl(m_impl.find(str.m_impl, PosToImpl(nStart))); }
3086
3087     // find first n characters of sz
3088   size_t find(const char* sz, size_t nStart = 0, size_t n = npos) const
3089   {
3090       SubstrBufFromMB str(ImplStr(sz, n));
3091       return PosFromImpl(m_impl.find(str.data, PosToImpl(nStart), str.len));
3092   }
3093   size_t find(const wchar_t* sz, size_t nStart = 0, size_t n = npos) const
3094   {
3095       SubstrBufFromWC str(ImplStr(sz, n));
3096       return PosFromImpl(m_impl.find(str.data, PosToImpl(nStart), str.len));
3097   }
3098   size_t find(const wxScopedCharBuffer& s, size_t nStart = 0, size_t n = npos) const
3099     { return find(s.data(), nStart, n); }
3100   size_t find(const wxScopedWCharBuffer& s, size_t nStart = 0, size_t n = npos) const
3101     { return find(s.data(), nStart, n); }
3102   size_t find(const wxCStrData& s, size_t nStart = 0, size_t n = npos) const
3103     { return find(s.AsWChar(), nStart, n); }
3104
3105     // find the first occurrence of character ch after nStart
3106   size_t find(wxUniChar ch, size_t nStart = 0) const
3107   {
3108 #if wxUSE_UNICODE_UTF8
3109     if ( !ch.IsAscii() )
3110         return PosFromImpl(m_impl.find(wxStringOperations::EncodeChar(ch),
3111                                        PosToImpl(nStart)));
3112     else
3113 #endif
3114         return PosFromImpl(m_impl.find((wxStringCharType)ch,
3115                                        PosToImpl(nStart)));
3116
3117   }
3118   size_t find(wxUniCharRef ch, size_t nStart = 0) const
3119     {  return find(wxUniChar(ch), nStart); }
3120   size_t find(char ch, size_t nStart = 0) const
3121     {  return find(wxUniChar(ch), nStart); }
3122   size_t find(unsigned char ch, size_t nStart = 0) const
3123     {  return find(wxUniChar(ch), nStart); }
3124   size_t find(wchar_t ch, size_t nStart = 0) const
3125     {  return find(wxUniChar(ch), nStart); }
3126
3127     // rfind() family is exactly like find() but works right to left
3128
3129     // as find, but from the end
3130   size_t rfind(const wxString& str, size_t nStart = npos) const
3131     { return PosFromImpl(m_impl.rfind(str.m_impl, PosToImpl(nStart))); }
3132
3133     // as find, but from the end
3134   size_t rfind(const char* sz, size_t nStart = npos, size_t n = npos) const
3135   {
3136       SubstrBufFromMB str(ImplStr(sz, n));
3137       return PosFromImpl(m_impl.rfind(str.data, PosToImpl(nStart), str.len));
3138   }
3139   size_t rfind(const wchar_t* sz, size_t nStart = npos, size_t n = npos) const
3140   {
3141       SubstrBufFromWC str(ImplStr(sz, n));
3142       return PosFromImpl(m_impl.rfind(str.data, PosToImpl(nStart), str.len));
3143   }
3144   size_t rfind(const wxScopedCharBuffer& s, size_t nStart = npos, size_t n = npos) const
3145     { return rfind(s.data(), nStart, n); }
3146   size_t rfind(const wxScopedWCharBuffer& s, size_t nStart = npos, size_t n = npos) const
3147     { return rfind(s.data(), nStart, n); }
3148   size_t rfind(const wxCStrData& s, size_t nStart = npos, size_t n = npos) const
3149     { return rfind(s.AsWChar(), nStart, n); }
3150     // as find, but from the end
3151   size_t rfind(wxUniChar ch, size_t nStart = npos) const
3152   {
3153 #if wxUSE_UNICODE_UTF8
3154     if ( !ch.IsAscii() )
3155         return PosFromImpl(m_impl.rfind(wxStringOperations::EncodeChar(ch),
3156                                         PosToImpl(nStart)));
3157     else
3158 #endif
3159         return PosFromImpl(m_impl.rfind((wxStringCharType)ch,
3160                                         PosToImpl(nStart)));
3161   }
3162   size_t rfind(wxUniCharRef ch, size_t nStart = npos) const
3163     {  return rfind(wxUniChar(ch), nStart); }
3164   size_t rfind(char ch, size_t nStart = npos) const
3165     {  return rfind(wxUniChar(ch), nStart); }
3166   size_t rfind(unsigned char ch, size_t nStart = npos) const
3167     {  return rfind(wxUniChar(ch), nStart); }
3168   size_t rfind(wchar_t ch, size_t nStart = npos) const
3169     {  return rfind(wxUniChar(ch), nStart); }
3170
3171   // find first/last occurrence of any character (not) in the set:
3172 #if wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
3173   // FIXME-UTF8: this is not entirely correct, because it doesn't work if
3174   //             sizeof(wchar_t)==2 and surrogates are present in the string;
3175   //             should we care? Probably not.
3176   size_t find_first_of(const wxString& str, size_t nStart = 0) const
3177     { return m_impl.find_first_of(str.m_impl, nStart); }
3178   size_t find_first_of(const char* sz, size_t nStart = 0) const
3179     { return m_impl.find_first_of(ImplStr(sz), nStart); }
3180   size_t find_first_of(const wchar_t* sz, size_t nStart = 0) const
3181     { return m_impl.find_first_of(ImplStr(sz), nStart); }
3182   size_t find_first_of(const char* sz, size_t nStart, size_t n) const
3183     { return m_impl.find_first_of(ImplStr(sz), nStart, n); }
3184   size_t find_first_of(const wchar_t* sz, size_t nStart, size_t n) const
3185     { return m_impl.find_first_of(ImplStr(sz), nStart, n); }
3186   size_t find_first_of(wxUniChar c, size_t nStart = 0) const
3187     { return m_impl.find_first_of((wxChar)c, nStart); }
3188
3189   size_t find_last_of(const wxString& str, size_t nStart = npos) const
3190     { return m_impl.find_last_of(str.m_impl, nStart); }
3191   size_t find_last_of(const char* sz, size_t nStart = npos) const
3192     { return m_impl.find_last_of(ImplStr(sz), nStart); }
3193   size_t find_last_of(const wchar_t* sz, size_t nStart = npos) const
3194     { return m_impl.find_last_of(ImplStr(sz), nStart); }
3195   size_t find_last_of(const char* sz, size_t nStart, size_t n) const
3196     { return m_impl.find_last_of(ImplStr(sz), nStart, n); }
3197   size_t find_last_of(const wchar_t* sz, size_t nStart, size_t n) const
3198     { return m_impl.find_last_of(ImplStr(sz), nStart, n); }
3199   size_t find_last_of(wxUniChar c, size_t nStart = npos) const
3200     { return m_impl.find_last_of((wxChar)c, nStart); }
3201
3202   size_t find_first_not_of(const wxString& str, size_t nStart = 0) const
3203     { return m_impl.find_first_not_of(str.m_impl, nStart); }
3204   size_t find_first_not_of(const char* sz, size_t nStart = 0) const
3205     { return m_impl.find_first_not_of(ImplStr(sz), nStart); }
3206   size_t find_first_not_of(const wchar_t* sz, size_t nStart = 0) const
3207     { return m_impl.find_first_not_of(ImplStr(sz), nStart); }
3208   size_t find_first_not_of(const char* sz, size_t nStart, size_t n) const
3209     { return m_impl.find_first_not_of(ImplStr(sz), nStart, n); }
3210   size_t find_first_not_of(const wchar_t* sz, size_t nStart, size_t n) const
3211     { return m_impl.find_first_not_of(ImplStr(sz), nStart, n); }
3212   size_t find_first_not_of(wxUniChar c, size_t nStart = 0) const
3213     { return m_impl.find_first_not_of((wxChar)c, nStart); }
3214
3215   size_t find_last_not_of(const wxString& str, size_t nStart = npos) const
3216     { return m_impl.find_last_not_of(str.m_impl, nStart); }
3217   size_t find_last_not_of(const char* sz, size_t nStart = npos) const
3218     { return m_impl.find_last_not_of(ImplStr(sz), nStart); }
3219   size_t find_last_not_of(const wchar_t* sz, size_t nStart = npos) const
3220     { return m_impl.find_last_not_of(ImplStr(sz), nStart); }
3221   size_t find_last_not_of(const char* sz, size_t nStart, size_t n) const
3222     { return m_impl.find_last_not_of(ImplStr(sz), nStart, n); }
3223   size_t find_last_not_of(const wchar_t* sz, size_t nStart, size_t n) const
3224     { return m_impl.find_last_not_of(ImplStr(sz), nStart, n); }
3225   size_t find_last_not_of(wxUniChar c, size_t nStart = npos) const
3226     { return m_impl.find_last_not_of((wxChar)c, nStart); }
3227 #else
3228   // we can't use std::string implementation in UTF-8 build, because the
3229   // character sets would be interpreted wrongly:
3230
3231     // as strpbrk() but starts at nStart, returns npos if not found
3232   size_t find_first_of(const wxString& str, size_t nStart = 0) const
3233 #if wxUSE_UNICODE // FIXME-UTF8: temporary
3234     { return find_first_of(str.wc_str(), nStart); }
3235 #else
3236     { return find_first_of(str.mb_str(), nStart); }
3237 #endif
3238     // same as above
3239   size_t find_first_of(const char* sz, size_t nStart = 0) const;
3240   size_t find_first_of(const wchar_t* sz, size_t nStart = 0) const;
3241   size_t find_first_of(const char* sz, size_t nStart, size_t n) const;
3242   size_t find_first_of(const wchar_t* sz, size_t nStart, size_t n) const;
3243     // same as find(char, size_t)
3244   size_t find_first_of(wxUniChar c, size_t nStart = 0) const
3245     { return find(c, nStart); }
3246     // find the last (starting from nStart) char from str in this string
3247   size_t find_last_of (const wxString& str, size_t nStart = npos) const
3248 #if wxUSE_UNICODE // FIXME-UTF8: temporary
3249     { return find_last_of(str.wc_str(), nStart); }
3250 #else
3251     { return find_last_of(str.mb_str(), nStart); }
3252 #endif
3253     // same as above
3254   size_t find_last_of (const char* sz, size_t nStart = npos) const;
3255   size_t find_last_of (const wchar_t* sz, size_t nStart = npos) const;
3256   size_t find_last_of(const char* sz, size_t nStart, size_t n) const;
3257   size_t find_last_of(const wchar_t* sz, size_t nStart, size_t n) const;
3258     // same as above
3259   size_t find_last_of(wxUniChar c, size_t nStart = npos) const
3260     { return rfind(c, nStart); }
3261
3262     // find first/last occurrence of any character not in the set
3263
3264     // as strspn() (starting from nStart), returns npos on failure
3265   size_t find_first_not_of(const wxString& str, size_t nStart = 0) const
3266 #if wxUSE_UNICODE // FIXME-UTF8: temporary
3267     { return find_first_not_of(str.wc_str(), nStart); }
3268 #else
3269     { return find_first_not_of(str.mb_str(), nStart); }
3270 #endif
3271     // same as above
3272   size_t find_first_not_of(const char* sz, size_t nStart = 0) const;
3273   size_t find_first_not_of(const wchar_t* sz, size_t nStart = 0) const;
3274   size_t find_first_not_of(const char* sz, size_t nStart, size_t n) const;
3275   size_t find_first_not_of(const wchar_t* sz, size_t nStart, size_t n) const;
3276     // same as above
3277   size_t find_first_not_of(wxUniChar ch, size_t nStart = 0) const;
3278     //  as strcspn()
3279   size_t find_last_not_of(const wxString& str, size_t nStart = npos) const
3280 #if wxUSE_UNICODE // FIXME-UTF8: temporary
3281     { return find_last_not_of(str.wc_str(), nStart); }
3282 #else
3283     { return find_last_not_of(str.mb_str(), nStart); }
3284 #endif
3285     // same as above
3286   size_t find_last_not_of(const char* sz, size_t nStart = npos) const;
3287   size_t find_last_not_of(const wchar_t* sz, size_t nStart = npos) const;
3288   size_t find_last_not_of(const char* sz, size_t nStart, size_t n) const;
3289   size_t find_last_not_of(const wchar_t* sz, size_t nStart, size_t n) const;
3290     // same as above
3291   size_t find_last_not_of(wxUniChar ch, size_t nStart = npos) const;
3292 #endif // wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8 or not
3293
3294   // provide char/wchar_t/wxUniCharRef overloads for char-finding functions
3295   // above to resolve ambiguities:
3296   size_t find_first_of(wxUniCharRef ch, size_t nStart = 0) const
3297     {  return find_first_of(wxUniChar(ch), nStart); }
3298   size_t find_first_of(char ch, size_t nStart = 0) const
3299     {  return find_first_of(wxUniChar(ch), nStart); }
3300   size_t find_first_of(unsigned char ch, size_t nStart = 0) const
3301     {  return find_first_of(wxUniChar(ch), nStart); }
3302   size_t find_first_of(wchar_t ch, size_t nStart = 0) const
3303     {  return find_first_of(wxUniChar(ch), nStart); }
3304   size_t find_last_of(wxUniCharRef ch, size_t nStart = npos) const
3305     {  return find_last_of(wxUniChar(ch), nStart); }
3306   size_t find_last_of(char ch, size_t nStart = npos) const
3307     {  return find_last_of(wxUniChar(ch), nStart); }
3308   size_t find_last_of(unsigned char ch, size_t nStart = npos) const
3309     {  return find_last_of(wxUniChar(ch), nStart); }
3310   size_t find_last_of(wchar_t ch, size_t nStart = npos) const
3311     {  return find_last_of(wxUniChar(ch), nStart); }
3312   size_t find_first_not_of(wxUniCharRef ch, size_t nStart = 0) const
3313     {  return find_first_not_of(wxUniChar(ch), nStart); }
3314   size_t find_first_not_of(char ch, size_t nStart = 0) const
3315     {  return find_first_not_of(wxUniChar(ch), nStart); }
3316   size_t find_first_not_of(unsigned char ch, size_t nStart = 0) const
3317     {  return find_first_not_of(wxUniChar(ch), nStart); }
3318   size_t find_first_not_of(wchar_t ch, size_t nStart = 0) const
3319     {  return find_first_not_of(wxUniChar(ch), nStart); }
3320   size_t find_last_not_of(wxUniCharRef ch, size_t nStart = npos) const
3321     {  return find_last_not_of(wxUniChar(ch), nStart); }
3322   size_t find_last_not_of(char ch, size_t nStart = npos) const
3323     {  return find_last_not_of(wxUniChar(ch), nStart); }
3324   size_t find_last_not_of(unsigned char ch, size_t nStart = npos) const
3325     {  return find_last_not_of(wxUniChar(ch), nStart); }
3326   size_t find_last_not_of(wchar_t ch, size_t nStart = npos) const
3327     {  return find_last_not_of(wxUniChar(ch), nStart); }
3328
3329   // and additional overloads for the versions taking strings:
3330   size_t find_first_of(const wxCStrData& sz, size_t nStart = 0) const
3331     { return find_first_of(sz.AsString(), nStart); }
3332   size_t find_first_of(const wxScopedCharBuffer& sz, size_t nStart = 0) const
3333     { return find_first_of(sz.data(), nStart); }
3334   size_t find_first_of(const wxScopedWCharBuffer& sz, size_t nStart = 0) const
3335     { return find_first_of(sz.data(), nStart); }
3336   size_t find_first_of(const wxCStrData& sz, size_t nStart, size_t n) const
3337     { return find_first_of(sz.AsWChar(), nStart, n); }
3338   size_t find_first_of(const wxScopedCharBuffer& sz, size_t nStart, size_t n) const
3339     { return find_first_of(sz.data(), nStart, n); }
3340   size_t find_first_of(const wxScopedWCharBuffer& sz, size_t nStart, size_t n) const
3341     { return find_first_of(sz.data(), nStart, n); }
3342
3343   size_t find_last_of(const wxCStrData& sz, size_t nStart = 0) const
3344     { return find_last_of(sz.AsString(), nStart); }
3345   size_t find_last_of(const wxScopedCharBuffer& sz, size_t nStart = 0) const
3346     { return find_last_of(sz.data(), nStart); }
3347   size_t find_last_of(const wxScopedWCharBuffer& sz, size_t nStart = 0) const
3348     { return find_last_of(sz.data(), nStart); }
3349   size_t find_last_of(const wxCStrData& sz, size_t nStart, size_t n) const
3350     { return find_last_of(sz.AsWChar(), nStart, n); }
3351   size_t find_last_of(const wxScopedCharBuffer& sz, size_t nStart, size_t n) const
3352     { return find_last_of(sz.data(), nStart, n); }
3353   size_t find_last_of(const wxScopedWCharBuffer& sz, size_t nStart, size_t n) const
3354     { return find_last_of(sz.data(), nStart, n); }
3355
3356   size_t find_first_not_of(const wxCStrData& sz, size_t nStart = 0) const
3357     { return find_first_not_of(sz.AsString(), nStart); }
3358   size_t find_first_not_of(const wxScopedCharBuffer& sz, size_t nStart = 0) const
3359     { return find_first_not_of(sz.data(), nStart); }
3360   size_t find_first_not_of(const wxScopedWCharBuffer& sz, size_t nStart = 0) const
3361     { return find_first_not_of(sz.data(), nStart); }
3362   size_t find_first_not_of(const wxCStrData& sz, size_t nStart, size_t n) const
3363     { return find_first_not_of(sz.AsWChar(), nStart, n); }
3364   size_t find_first_not_of(const wxScopedCharBuffer& sz, size_t nStart, size_t n) const
3365     { return find_first_not_of(sz.data(), nStart, n); }
3366   size_t find_first_not_of(const wxScopedWCharBuffer& sz, size_t nStart, size_t n) const
3367     { return find_first_not_of(sz.data(), nStart, n); }
3368
3369   size_t find_last_not_of(const wxCStrData& sz, size_t nStart = 0) const
3370     { return find_last_not_of(sz.AsString(), nStart); }
3371   size_t find_last_not_of(const wxScopedCharBuffer& sz, size_t nStart = 0) const
3372     { return find_last_not_of(sz.data(), nStart); }
3373   size_t find_last_not_of(const wxScopedWCharBuffer& sz, size_t nStart = 0) const
3374     { return find_last_not_of(sz.data(), nStart); }
3375   size_t find_last_not_of(const wxCStrData& sz, size_t nStart, size_t n) const
3376     { return find_last_not_of(sz.AsWChar(), nStart, n); }
3377   size_t find_last_not_of(const wxScopedCharBuffer& sz, size_t nStart, size_t n) const
3378     { return find_last_not_of(sz.data(), nStart, n); }
3379   size_t find_last_not_of(const wxScopedWCharBuffer& sz, size_t nStart, size_t n) const
3380     { return find_last_not_of(sz.data(), nStart, n); }
3381
3382       // string += string
3383   wxString& operator+=(const wxString& s)
3384   {
3385       wxSTRING_INVALIDATE_CACHED_LENGTH();
3386
3387       m_impl += s.m_impl;
3388       return *this;
3389   }
3390       // string += C string
3391   wxString& operator+=(const char *psz)
3392   {
3393       wxSTRING_INVALIDATE_CACHED_LENGTH();
3394
3395       m_impl += ImplStr(psz);
3396       return *this;
3397   }
3398   wxString& operator+=(const wchar_t *pwz)
3399   {
3400       wxSTRING_INVALIDATE_CACHED_LENGTH();
3401
3402       m_impl += ImplStr(pwz);
3403       return *this;
3404   }
3405   wxString& operator+=(const wxCStrData& s)
3406   {
3407       wxSTRING_INVALIDATE_CACHED_LENGTH();
3408
3409       m_impl += s.AsString().m_impl;
3410       return *this;
3411   }
3412   wxString& operator+=(const wxScopedCharBuffer& s)
3413     { return append(s); }
3414   wxString& operator+=(const wxScopedWCharBuffer& s)
3415     { return append(s); }
3416       // string += char
3417   wxString& operator+=(wxUniChar ch)
3418   {
3419       wxSTRING_UPDATE_CACHED_LENGTH(1);
3420
3421 #if wxUSE_UNICODE_UTF8
3422       if ( !ch.IsAscii() )
3423           m_impl += wxStringOperations::EncodeChar(ch);
3424       else
3425 #endif
3426           m_impl += (wxStringCharType)ch;
3427       return *this;
3428   }
3429   wxString& operator+=(wxUniCharRef ch) { return *this += wxUniChar(ch); }
3430   wxString& operator+=(int ch) { return *this += wxUniChar(ch); }
3431   wxString& operator+=(char ch) { return *this += wxUniChar(ch); }
3432   wxString& operator+=(unsigned char ch) { return *this += wxUniChar(ch); }
3433   wxString& operator+=(wchar_t ch) { return *this += wxUniChar(ch); }
3434
3435 private:
3436 #if !wxUSE_STL_BASED_WXSTRING
3437   // helpers for wxStringBuffer and wxStringBufferLength
3438   wxStringCharType *DoGetWriteBuf(size_t nLen)
3439   {
3440       return m_impl.DoGetWriteBuf(nLen);
3441   }
3442
3443   void DoUngetWriteBuf()
3444   {
3445       wxSTRING_INVALIDATE_CACHE();
3446
3447       m_impl.DoUngetWriteBuf();
3448   }
3449
3450   void DoUngetWriteBuf(size_t nLen)
3451   {
3452       wxSTRING_INVALIDATE_CACHE();
3453
3454       m_impl.DoUngetWriteBuf(nLen);
3455   }
3456 #endif // !wxUSE_STL_BASED_WXSTRING
3457
3458 #ifndef wxNEEDS_WXSTRING_PRINTF_MIXIN
3459   #if !wxUSE_UTF8_LOCALE_ONLY
3460   int DoPrintfWchar(const wxChar *format, ...);
3461   static wxString DoFormatWchar(const wxChar *format, ...);
3462   #endif
3463   #if wxUSE_UNICODE_UTF8
3464   int DoPrintfUtf8(const char *format, ...);
3465   static wxString DoFormatUtf8(const char *format, ...);
3466   #endif
3467 #endif
3468
3469 #if !wxUSE_STL_BASED_WXSTRING
3470   // check string's data validity
3471   bool IsValid() const { return m_impl.GetStringData()->IsValid(); }
3472 #endif
3473
3474 private:
3475   wxStringImpl m_impl;
3476
3477   // buffers for compatibility conversion from (char*)c_str() and
3478   // (wchar_t*)c_str(): the pointers returned by these functions should remain
3479   // valid until the string itself is modified for compatibility with the
3480   // existing code and consistency with std::string::c_str() so returning a
3481   // temporary buffer won't do and we need to cache the conversion results
3482
3483   // TODO-UTF8: benchmark various approaches to keeping compatibility buffers
3484   template<typename T>
3485   struct ConvertedBuffer
3486   {
3487       // notice that there is no need to initialize m_len here as it's unused
3488       // as long as m_str is NULL
3489       ConvertedBuffer() : m_str(NULL) {}
3490       ~ConvertedBuffer()
3491           { free(m_str); }
3492
3493       bool Extend(size_t len)
3494       {
3495           // add extra 1 for the trailing NUL
3496           void * const str = realloc(m_str, sizeof(T)*(len + 1));
3497           if ( !str )
3498               return false;
3499
3500           m_str = static_cast<T *>(str);
3501           m_len = len;
3502
3503           return true;
3504       }
3505
3506       const wxScopedCharTypeBuffer<T> AsScopedBuffer() const
3507       {
3508           return wxScopedCharTypeBuffer<T>::CreateNonOwned(m_str, m_len);
3509       }
3510
3511       T *m_str;     // pointer to the string data
3512       size_t m_len; // length, not size, i.e. in chars and without last NUL
3513   };
3514
3515
3516 #if wxUSE_UNICODE
3517   // common mb_str() and wxCStrData::AsChar() helper: performs the conversion
3518   // and returns either m_convertedToChar.m_str (in which case its m_len is
3519   // also updated) or NULL if it failed
3520   //
3521   // there is an important exception: in wxUSE_UNICODE_UTF8 build if conv is a
3522   // UTF-8 one, we return m_impl.c_str() directly, without doing any conversion
3523   // as optimization and so the caller needs to check for this before using
3524   // m_convertedToChar
3525   //
3526   // NB: AsChar() returns char* in any build, unlike mb_str()
3527   const char *AsChar(const wxMBConv& conv) const;
3528
3529   // mb_str() implementation helper
3530   wxScopedCharBuffer AsCharBuf(const wxMBConv& conv) const
3531   {
3532 #if wxUSE_UNICODE_UTF8
3533       // avoid conversion if we can
3534       if ( conv.IsUTF8() )
3535       {
3536           return wxScopedCharBuffer::CreateNonOwned(m_impl.c_str(),
3537                   m_impl.length());
3538       }
3539 #endif // wxUSE_UNICODE_UTF8
3540
3541       // call this solely in order to fill in m_convertedToChar as AsChar()
3542       // updates it as a side effect: this is a bit ugly but it's a completely
3543       // internal function so the users of this class shouldn't care or know
3544       // about it and doing it like this, i.e. having a separate AsChar(),
3545       // allows us to avoid the creation and destruction of a temporary buffer
3546       // when using wxCStrData without duplicating any code
3547       if ( !AsChar(conv) )
3548       {
3549           // although it would be probably more correct to return NULL buffer
3550           // from here if the conversion fails, a lot of existing code doesn't
3551           // expect mb_str() (or wc_str()) to ever return NULL so return an
3552           // empty string otherwise to avoid crashes in it
3553           //
3554           // also, some existing code does check for the conversion success and
3555           // so asserting here would be bad too -- even if it does mean that
3556           // silently losing data is possible for badly written code
3557           return wxScopedCharBuffer::CreateNonOwned("", 0);
3558       }
3559
3560       return m_convertedToChar.AsScopedBuffer();
3561   }
3562
3563   ConvertedBuffer<char> m_convertedToChar;
3564 #endif // !wxUSE_UNICODE
3565
3566 #if !wxUSE_UNICODE_WCHAR
3567   // common wc_str() and wxCStrData::AsWChar() helper for both UTF-8 and ANSI
3568   // builds: converts the string contents into m_convertedToWChar and returns
3569   // NULL if the conversion failed (this can only happen in ANSI build)
3570   //
3571   // NB: AsWChar() returns wchar_t* in any build, unlike wc_str()
3572   const wchar_t *AsWChar(const wxMBConv& conv) const;
3573
3574   // wc_str() implementation helper
3575   wxScopedWCharBuffer AsWCharBuf(const wxMBConv& conv) const
3576   {
3577       if ( !AsWChar(conv) )
3578           return wxScopedWCharBuffer::CreateNonOwned(L"", 0);
3579
3580       return m_convertedToWChar.AsScopedBuffer();
3581   }
3582
3583   ConvertedBuffer<wchar_t> m_convertedToWChar;
3584 #endif // !wxUSE_UNICODE_WCHAR
3585
3586 #if wxUSE_UNICODE_UTF8
3587   // FIXME-UTF8: (try to) move this elsewhere (TLS) or solve differently
3588   //             assigning to character pointer to by wxString::iterator may
3589   //             change the underlying wxStringImpl iterator, so we have to
3590   //             keep track of all iterators and update them as necessary:
3591   struct wxStringIteratorNodeHead
3592   {
3593       wxStringIteratorNodeHead() : ptr(NULL) {}
3594       wxStringIteratorNode *ptr;
3595
3596       // copying is disallowed as it would result in more than one pointer into
3597       // the same linked list
3598       wxDECLARE_NO_COPY_CLASS(wxStringIteratorNodeHead);
3599   };
3600
3601   wxStringIteratorNodeHead m_iterators;
3602
3603   friend class WXDLLIMPEXP_FWD_BASE wxStringIteratorNode;
3604   friend class WXDLLIMPEXP_FWD_BASE wxUniCharRef;
3605 #endif // wxUSE_UNICODE_UTF8
3606
3607   friend class WXDLLIMPEXP_FWD_BASE wxCStrData;
3608   friend class wxStringInternalBuffer;
3609   friend class wxStringInternalBufferLength;
3610 };
3611
3612 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
3613     #pragma warning (pop)
3614 #endif
3615
3616 // string iterator operators that satisfy STL Random Access Iterator
3617 // requirements:
3618 inline wxString::iterator operator+(ptrdiff_t n, wxString::iterator i)
3619   { return i + n; }
3620 inline wxString::const_iterator operator+(ptrdiff_t n, wxString::const_iterator i)
3621   { return i + n; }
3622 inline wxString::reverse_iterator operator+(ptrdiff_t n, wxString::reverse_iterator i)
3623   { return i + n; }
3624 inline wxString::const_reverse_iterator operator+(ptrdiff_t n, wxString::const_reverse_iterator i)
3625   { return i + n; }
3626
3627 // notice that even though for many compilers the friend declarations above are
3628 // enough, from the point of view of C++ standard we must have the declarations
3629 // here as friend ones are not injected in the enclosing namespace and without
3630 // them the code fails to compile with conforming compilers such as xlC or g++4
3631 wxString WXDLLIMPEXP_BASE operator+(const wxString& string1, const wxString& string2);
3632 wxString WXDLLIMPEXP_BASE operator+(const wxString& string, const char *psz);
3633 wxString WXDLLIMPEXP_BASE operator+(const wxString& string, const wchar_t *pwz);
3634 wxString WXDLLIMPEXP_BASE operator+(const char *psz, const wxString& string);
3635 wxString WXDLLIMPEXP_BASE operator+(const wchar_t *pwz, const wxString& string);
3636
3637 wxString WXDLLIMPEXP_BASE operator+(const wxString& string, wxUniChar ch);
3638 wxString WXDLLIMPEXP_BASE operator+(wxUniChar ch, const wxString& string);
3639
3640 inline wxString operator+(const wxString& string, wxUniCharRef ch)
3641     { return string + (wxUniChar)ch; }
3642 inline wxString operator+(const wxString& string, char ch)
3643     { return string + wxUniChar(ch); }
3644 inline wxString operator+(const wxString& string, wchar_t ch)
3645     { return string + wxUniChar(ch); }
3646 inline wxString operator+(wxUniCharRef ch, const wxString& string)
3647     { return (wxUniChar)ch + string; }
3648 inline wxString operator+(char ch, const wxString& string)
3649     { return wxUniChar(ch) + string; }
3650 inline wxString operator+(wchar_t ch, const wxString& string)
3651     { return wxUniChar(ch) + string; }
3652
3653
3654 #define wxGetEmptyString() wxString()
3655
3656 // ----------------------------------------------------------------------------
3657 // helper functions which couldn't be defined inline
3658 // ----------------------------------------------------------------------------
3659
3660 namespace wxPrivate
3661 {
3662
3663 #if wxUSE_UNICODE_WCHAR
3664
3665 template <>
3666 struct wxStringAsBufHelper<char>
3667 {
3668     static wxScopedCharBuffer Get(const wxString& s, size_t *len)
3669     {
3670         wxScopedCharBuffer buf(s.mb_str());
3671         if ( len )
3672             *len = buf ? strlen(buf) : 0;
3673         return buf;
3674     }
3675 };
3676
3677 template <>
3678 struct wxStringAsBufHelper<wchar_t>
3679 {
3680     static wxScopedWCharBuffer Get(const wxString& s, size_t *len)
3681     {
3682         const size_t length = s.length();
3683         if ( len )
3684             *len = length;
3685         return wxScopedWCharBuffer::CreateNonOwned(s.wx_str(), length);
3686     }
3687 };
3688
3689 #elif wxUSE_UNICODE_UTF8
3690
3691 template <>
3692 struct wxStringAsBufHelper<char>
3693 {
3694     static wxScopedCharBuffer Get(const wxString& s, size_t *len)
3695     {
3696         const size_t length = s.utf8_length();
3697         if ( len )
3698             *len = length;
3699         return wxScopedCharBuffer::CreateNonOwned(s.wx_str(), length);
3700     }
3701 };
3702
3703 template <>
3704 struct wxStringAsBufHelper<wchar_t>
3705 {
3706     static wxScopedWCharBuffer Get(const wxString& s, size_t *len)
3707     {
3708         wxScopedWCharBuffer wbuf(s.wc_str());
3709         if ( len )
3710             *len = wxWcslen(wbuf);
3711         return wbuf;
3712     }
3713 };
3714
3715 #endif // Unicode build kind
3716
3717 } // namespace wxPrivate
3718
3719 // ----------------------------------------------------------------------------
3720 // wxStringBuffer: a tiny class allowing to get a writable pointer into string
3721 // ----------------------------------------------------------------------------
3722
3723 #if !wxUSE_STL_BASED_WXSTRING
3724 // string buffer for direct access to string data in their native
3725 // representation:
3726 class wxStringInternalBuffer
3727 {
3728 public:
3729     typedef wxStringCharType CharType;
3730
3731     wxStringInternalBuffer(wxString& str, size_t lenWanted = 1024)
3732         : m_str(str), m_buf(NULL)
3733         { m_buf = m_str.DoGetWriteBuf(lenWanted); }
3734
3735     ~wxStringInternalBuffer() { m_str.DoUngetWriteBuf(); }
3736
3737     operator wxStringCharType*() const { return m_buf; }
3738
3739 private:
3740     wxString&         m_str;
3741     wxStringCharType *m_buf;
3742
3743     wxDECLARE_NO_COPY_CLASS(wxStringInternalBuffer);
3744 };
3745
3746 class wxStringInternalBufferLength
3747 {
3748 public:
3749     typedef wxStringCharType CharType;
3750
3751     wxStringInternalBufferLength(wxString& str, size_t lenWanted = 1024)
3752         : m_str(str), m_buf(NULL), m_len(0), m_lenSet(false)
3753     {
3754         m_buf = m_str.DoGetWriteBuf(lenWanted);
3755         wxASSERT(m_buf != NULL);
3756     }
3757
3758     ~wxStringInternalBufferLength()
3759     {
3760         wxASSERT(m_lenSet);
3761         m_str.DoUngetWriteBuf(m_len);
3762     }
3763
3764     operator wxStringCharType*() const { return m_buf; }
3765     void SetLength(size_t length) { m_len = length; m_lenSet = true; }
3766
3767 private:
3768     wxString&         m_str;
3769     wxStringCharType *m_buf;
3770     size_t            m_len;
3771     bool              m_lenSet;
3772
3773     wxDECLARE_NO_COPY_CLASS(wxStringInternalBufferLength);
3774 };
3775
3776 #endif // !wxUSE_STL_BASED_WXSTRING
3777
3778 template<typename T>
3779 class wxStringTypeBufferBase
3780 {
3781 public:
3782     typedef T CharType;
3783
3784     wxStringTypeBufferBase(wxString& str, size_t lenWanted = 1024)
3785         : m_str(str), m_buf(lenWanted)
3786     {
3787         // for compatibility with old wxStringBuffer which provided direct
3788         // access to wxString internal buffer, initialize ourselves with the
3789         // string initial contents
3790
3791         // FIXME-VC6: remove the ugly (CharType *)NULL and use normal
3792         //            tchar_str<CharType>
3793         size_t len;
3794         const wxCharTypeBuffer<CharType> buf(str.tchar_str(&len, (CharType *)NULL));
3795         if ( buf )
3796         {
3797             if ( len > lenWanted )
3798             {
3799                 // in this case there is not enough space for terminating NUL,
3800                 // ensure that we still put it there
3801                 m_buf.data()[lenWanted] = 0;
3802                 len = lenWanted - 1;
3803             }
3804
3805             memcpy(m_buf.data(), buf, (len + 1)*sizeof(CharType));
3806         }
3807         //else: conversion failed, this can happen when trying to get Unicode
3808         //      string contents into a char string
3809     }
3810
3811     operator CharType*() { return m_buf.data(); }
3812
3813 protected:
3814     wxString& m_str;
3815     wxCharTypeBuffer<CharType> m_buf;
3816 };
3817
3818 template<typename T>
3819 class wxStringTypeBufferLengthBase : public wxStringTypeBufferBase<T>
3820 {
3821 public:
3822     wxStringTypeBufferLengthBase(wxString& str, size_t lenWanted = 1024)
3823         : wxStringTypeBufferBase<T>(str, lenWanted),
3824           m_len(0),
3825           m_lenSet(false)
3826         { }
3827
3828     ~wxStringTypeBufferLengthBase()
3829     {
3830         wxASSERT_MSG( this->m_lenSet, "forgot to call SetLength()" );
3831     }
3832
3833     void SetLength(size_t length) { m_len = length; m_lenSet = true; }
3834
3835 protected:
3836     size_t m_len;
3837     bool m_lenSet;
3838 };
3839
3840 template<typename T>
3841 class wxStringTypeBuffer : public wxStringTypeBufferBase<T>
3842 {
3843 public:
3844     wxStringTypeBuffer(wxString& str, size_t lenWanted = 1024)
3845         : wxStringTypeBufferBase<T>(str, lenWanted)
3846         { }
3847
3848     ~wxStringTypeBuffer()
3849     {
3850         this->m_str.assign(this->m_buf.data());
3851     }
3852
3853     wxDECLARE_NO_COPY_CLASS(wxStringTypeBuffer);
3854 };
3855
3856 template<typename T>
3857 class wxStringTypeBufferLength : public wxStringTypeBufferLengthBase<T>
3858 {
3859 public:
3860     wxStringTypeBufferLength(wxString& str, size_t lenWanted = 1024)
3861         : wxStringTypeBufferLengthBase<T>(str, lenWanted)
3862         { }
3863
3864     ~wxStringTypeBufferLength()
3865     {
3866         this->m_str.assign(this->m_buf.data(), this->m_len);
3867     }
3868
3869     wxDECLARE_NO_COPY_CLASS(wxStringTypeBufferLength);
3870 };
3871
3872 #if wxUSE_STL_BASED_WXSTRING
3873
3874 WXDLLIMPEXP_TEMPLATE_INSTANCE_BASE( wxStringTypeBufferBase<wxStringCharType> )
3875
3876 class wxStringInternalBuffer : public wxStringTypeBufferBase<wxStringCharType>
3877 {
3878 public:
3879     wxStringInternalBuffer(wxString& str, size_t lenWanted = 1024)
3880         : wxStringTypeBufferBase<wxStringCharType>(str, lenWanted) {}
3881     ~wxStringInternalBuffer()
3882         { m_str.m_impl.assign(m_buf.data()); }
3883
3884     wxDECLARE_NO_COPY_CLASS(wxStringInternalBuffer);
3885 };
3886
3887 WXDLLIMPEXP_TEMPLATE_INSTANCE_BASE(
3888     wxStringTypeBufferLengthBase<wxStringCharType> )
3889
3890 class wxStringInternalBufferLength
3891     : public wxStringTypeBufferLengthBase<wxStringCharType>
3892 {
3893 public:
3894     wxStringInternalBufferLength(wxString& str, size_t lenWanted = 1024)
3895         : wxStringTypeBufferLengthBase<wxStringCharType>(str, lenWanted) {}
3896
3897     ~wxStringInternalBufferLength()
3898     {
3899         m_str.m_impl.assign(m_buf.data(), m_len);
3900     }
3901
3902     wxDECLARE_NO_COPY_CLASS(wxStringInternalBufferLength);
3903 };
3904
3905 #endif // wxUSE_STL_BASED_WXSTRING
3906
3907
3908 #if wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
3909 typedef wxStringTypeBuffer<wxChar>        wxStringBuffer;
3910 typedef wxStringTypeBufferLength<wxChar>  wxStringBufferLength;
3911 #else // if !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
3912 typedef wxStringInternalBuffer                wxStringBuffer;
3913 typedef wxStringInternalBufferLength          wxStringBufferLength;
3914 #endif // !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
3915
3916 #if wxUSE_UNICODE_UTF8
3917 typedef wxStringInternalBuffer                wxUTF8StringBuffer;
3918 typedef wxStringInternalBufferLength          wxUTF8StringBufferLength;
3919 #elif wxUSE_UNICODE_WCHAR
3920
3921 WXDLLIMPEXP_TEMPLATE_INSTANCE_BASE( wxStringTypeBufferBase<char> )
3922
3923 // Note about inlined dtors in the classes below: this is done not for
3924 // performance reasons but just to avoid linking errors in the MSVC DLL build
3925 // under Windows: if a class has non-inline methods it must be declared as
3926 // being DLL-exported but, due to an extremely interesting feature of MSVC 7
3927 // and later, any template class which is used as a base of a DLL-exported
3928 // class is implicitly made DLL-exported too, as explained at the bottom of
3929 // http://msdn.microsoft.com/en-us/library/twa2aw10.aspx (just to confirm: yes,
3930 // _inheriting_ from a class can change whether it is being exported from DLL)
3931 //
3932 // But this results in link errors because the base template class is not DLL-
3933 // exported, whether it is declared with WXDLLIMPEXP_BASE or not, because it
3934 // does have only inline functions. So the simplest fix is to just make all the
3935 // functions of these classes inline too.
3936
3937 class wxUTF8StringBuffer : public wxStringTypeBufferBase<char>
3938 {
3939 public:
3940     wxUTF8StringBuffer(wxString& str, size_t lenWanted = 1024)
3941         : wxStringTypeBufferBase<char>(str, lenWanted) {}
3942     ~wxUTF8StringBuffer()
3943     {
3944         wxMBConvStrictUTF8 conv;
3945         size_t wlen = conv.ToWChar(NULL, 0, m_buf);
3946         wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
3947
3948         wxStringInternalBuffer wbuf(m_str, wlen);
3949         conv.ToWChar(wbuf, wlen, m_buf);
3950     }
3951
3952     wxDECLARE_NO_COPY_CLASS(wxUTF8StringBuffer);
3953 };
3954
3955 WXDLLIMPEXP_TEMPLATE_INSTANCE_BASE( wxStringTypeBufferLengthBase<char> )
3956
3957 class wxUTF8StringBufferLength : public wxStringTypeBufferLengthBase<char>
3958 {
3959 public:
3960     wxUTF8StringBufferLength(wxString& str, size_t lenWanted = 1024)
3961         : wxStringTypeBufferLengthBase<char>(str, lenWanted) {}
3962     ~wxUTF8StringBufferLength()
3963     {
3964         wxCHECK_RET(m_lenSet, "length not set");
3965
3966         wxMBConvStrictUTF8 conv;
3967         size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
3968         wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
3969
3970         wxStringInternalBufferLength wbuf(m_str, wlen);
3971         conv.ToWChar(wbuf, wlen, m_buf, m_len);
3972         wbuf.SetLength(wlen);
3973     }
3974
3975     wxDECLARE_NO_COPY_CLASS(wxUTF8StringBufferLength);
3976 };
3977 #endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR
3978
3979
3980 // ---------------------------------------------------------------------------
3981 // wxString comparison functions: operator versions are always case sensitive
3982 // ---------------------------------------------------------------------------
3983
3984 #define wxCMP_WXCHAR_STRING(p, s, op) 0 op s.Cmp(p)
3985
3986 wxDEFINE_ALL_COMPARISONS(const wxChar *, const wxString&, wxCMP_WXCHAR_STRING)
3987
3988 #undef wxCMP_WXCHAR_STRING
3989
3990 inline bool operator==(const wxString& s1, const wxString& s2)
3991     { return s1.IsSameAs(s2); }
3992 inline bool operator!=(const wxString& s1, const wxString& s2)
3993     { return !s1.IsSameAs(s2); }
3994 inline bool operator< (const wxString& s1, const wxString& s2)
3995     { return s1.Cmp(s2) < 0; }
3996 inline bool operator> (const wxString& s1, const wxString& s2)
3997     { return s1.Cmp(s2) >  0; }
3998 inline bool operator<=(const wxString& s1, const wxString& s2)
3999     { return s1.Cmp(s2) <= 0; }
4000 inline bool operator>=(const wxString& s1, const wxString& s2)
4001     { return s1.Cmp(s2) >= 0; }
4002
4003 inline bool operator==(const wxString& s1, const wxCStrData& s2)
4004     { return s1 == s2.AsString(); }
4005 inline bool operator==(const wxCStrData& s1, const wxString& s2)
4006     { return s1.AsString() == s2; }
4007 inline bool operator!=(const wxString& s1, const wxCStrData& s2)
4008     { return s1 != s2.AsString(); }
4009 inline bool operator!=(const wxCStrData& s1, const wxString& s2)
4010     { return s1.AsString() != s2; }
4011
4012 inline bool operator==(const wxString& s1, const wxScopedWCharBuffer& s2)
4013     { return (s1.Cmp((const wchar_t *)s2) == 0); }
4014 inline bool operator==(const wxScopedWCharBuffer& s1, const wxString& s2)
4015     { return (s2.Cmp((const wchar_t *)s1) == 0); }
4016 inline bool operator!=(const wxString& s1, const wxScopedWCharBuffer& s2)
4017     { return (s1.Cmp((const wchar_t *)s2) != 0); }
4018 inline bool operator!=(const wxScopedWCharBuffer& s1, const wxString& s2)
4019     { return (s2.Cmp((const wchar_t *)s1) != 0); }
4020
4021 inline bool operator==(const wxString& s1, const wxScopedCharBuffer& s2)
4022     { return (s1.Cmp((const char *)s2) == 0); }
4023 inline bool operator==(const wxScopedCharBuffer& s1, const wxString& s2)
4024     { return (s2.Cmp((const char *)s1) == 0); }
4025 inline bool operator!=(const wxString& s1, const wxScopedCharBuffer& s2)
4026     { return (s1.Cmp((const char *)s2) != 0); }
4027 inline bool operator!=(const wxScopedCharBuffer& s1, const wxString& s2)
4028     { return (s2.Cmp((const char *)s1) != 0); }
4029
4030 inline wxString operator+(const wxString& string, const wxScopedWCharBuffer& buf)
4031     { return string + (const wchar_t *)buf; }
4032 inline wxString operator+(const wxScopedWCharBuffer& buf, const wxString& string)
4033     { return (const wchar_t *)buf + string; }
4034
4035 inline wxString operator+(const wxString& string, const wxScopedCharBuffer& buf)
4036     { return string + (const char *)buf; }
4037 inline wxString operator+(const wxScopedCharBuffer& buf, const wxString& string)
4038     { return (const char *)buf + string; }
4039
4040 // comparison with char
4041 inline bool operator==(const wxUniChar& c, const wxString& s) { return s.IsSameAs(c); }
4042 inline bool operator==(const wxUniCharRef& c, const wxString& s) { return s.IsSameAs(c); }
4043 inline bool operator==(char c, const wxString& s) { return s.IsSameAs(c); }
4044 inline bool operator==(wchar_t c, const wxString& s) { return s.IsSameAs(c); }
4045 inline bool operator==(int c, const wxString& s) { return s.IsSameAs(c); }
4046 inline bool operator==(const wxString& s, const wxUniChar& c) { return s.IsSameAs(c); }
4047 inline bool operator==(const wxString& s, const wxUniCharRef& c) { return s.IsSameAs(c); }
4048 inline bool operator==(const wxString& s, char c) { return s.IsSameAs(c); }
4049 inline bool operator==(const wxString& s, wchar_t c) { return s.IsSameAs(c); }
4050 inline bool operator!=(const wxUniChar& c, const wxString& s) { return !s.IsSameAs(c); }
4051 inline bool operator!=(const wxUniCharRef& c, const wxString& s) { return !s.IsSameAs(c); }
4052 inline bool operator!=(char c, const wxString& s) { return !s.IsSameAs(c); }
4053 inline bool operator!=(wchar_t c, const wxString& s) { return !s.IsSameAs(c); }
4054 inline bool operator!=(int c, const wxString& s) { return !s.IsSameAs(c); }
4055 inline bool operator!=(const wxString& s, const wxUniChar& c) { return !s.IsSameAs(c); }
4056 inline bool operator!=(const wxString& s, const wxUniCharRef& c) { return !s.IsSameAs(c); }
4057 inline bool operator!=(const wxString& s, char c) { return !s.IsSameAs(c); }
4058 inline bool operator!=(const wxString& s, wchar_t c) { return !s.IsSameAs(c); }
4059
4060
4061 // wxString iterators comparisons
4062 inline bool wxString::iterator::operator==(const const_iterator& i) const
4063     { return i == *this; }
4064 inline bool wxString::iterator::operator!=(const const_iterator& i) const
4065     { return i != *this; }
4066 inline bool wxString::iterator::operator<(const const_iterator& i) const
4067     { return i > *this; }
4068 inline bool wxString::iterator::operator>(const const_iterator& i) const
4069     { return i < *this; }
4070 inline bool wxString::iterator::operator<=(const const_iterator& i) const
4071     { return i >= *this; }
4072 inline bool wxString::iterator::operator>=(const const_iterator& i) const
4073     { return i <= *this; }
4074
4075 // comparison with C string in Unicode build
4076 #if wxUSE_UNICODE
4077
4078 #define wxCMP_CHAR_STRING(p, s, op) wxString(p) op s
4079
4080 wxDEFINE_ALL_COMPARISONS(const char *, const wxString&, wxCMP_CHAR_STRING)
4081
4082 #undef wxCMP_CHAR_STRING
4083
4084 #endif // wxUSE_UNICODE
4085
4086 // we also need to provide the operators for comparison with wxCStrData to
4087 // resolve ambiguity between operator(const wxChar *,const wxString &) and
4088 // operator(const wxChar *, const wxChar *) for "p == s.c_str()"
4089 //
4090 // notice that these are (shallow) pointer comparisons, not (deep) string ones
4091 #define wxCMP_CHAR_CSTRDATA(p, s, op) p op s.AsChar()
4092 #define wxCMP_WCHAR_CSTRDATA(p, s, op) p op s.AsWChar()
4093
4094 wxDEFINE_ALL_COMPARISONS(const wchar_t *, const wxCStrData&, wxCMP_WCHAR_CSTRDATA)
4095 wxDEFINE_ALL_COMPARISONS(const char *, const wxCStrData&, wxCMP_CHAR_CSTRDATA)
4096
4097 #undef wxCMP_CHAR_CSTRDATA
4098 #undef wxCMP_WCHAR_CSTRDATA
4099
4100 // ---------------------------------------------------------------------------
4101 // Implementation only from here until the end of file
4102 // ---------------------------------------------------------------------------
4103
4104 #if wxUSE_STD_IOSTREAM
4105
4106 #include "wx/iosfwrap.h"
4107
4108 WXDLLIMPEXP_BASE wxSTD ostream& operator<<(wxSTD ostream&, const wxString&);
4109 WXDLLIMPEXP_BASE wxSTD ostream& operator<<(wxSTD ostream&, const wxCStrData&);
4110 WXDLLIMPEXP_BASE wxSTD ostream& operator<<(wxSTD ostream&, const wxScopedCharBuffer&);
4111 #ifndef __BORLANDC__
4112 WXDLLIMPEXP_BASE wxSTD ostream& operator<<(wxSTD ostream&, const wxScopedWCharBuffer&);
4113 #endif
4114
4115 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
4116
4117 WXDLLIMPEXP_BASE wxSTD wostream& operator<<(wxSTD wostream&, const wxString&);
4118 WXDLLIMPEXP_BASE wxSTD wostream& operator<<(wxSTD wostream&, const wxCStrData&);
4119 WXDLLIMPEXP_BASE wxSTD wostream& operator<<(wxSTD wostream&, const wxScopedWCharBuffer&);
4120
4121 #endif  // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
4122
4123 #endif  // wxUSE_STD_IOSTREAM
4124
4125 // ---------------------------------------------------------------------------
4126 // wxCStrData implementation
4127 // ---------------------------------------------------------------------------
4128
4129 inline wxCStrData::wxCStrData(char *buf)
4130     : m_str(new wxString(buf)), m_offset(0), m_owned(true) {}
4131 inline wxCStrData::wxCStrData(wchar_t *buf)
4132     : m_str(new wxString(buf)), m_offset(0), m_owned(true) {}
4133
4134 inline wxCStrData::wxCStrData(const wxCStrData& data)
4135     : m_str(data.m_owned ? new wxString(*data.m_str) : data.m_str),
4136       m_offset(data.m_offset),
4137       m_owned(data.m_owned)
4138 {
4139 }
4140
4141 inline wxCStrData::~wxCStrData()
4142 {
4143     if ( m_owned )
4144         delete const_cast<wxString*>(m_str); // cast to silence warnings
4145 }
4146
4147 // AsChar() and AsWChar() implementations simply forward to wxString methods
4148
4149 inline const wchar_t* wxCStrData::AsWChar() const
4150 {
4151     const wchar_t * const p =
4152 #if wxUSE_UNICODE_WCHAR
4153         m_str->wc_str();
4154 #elif wxUSE_UNICODE_UTF8
4155         m_str->AsWChar(wxMBConvStrictUTF8());
4156 #else
4157         m_str->AsWChar(wxConvLibc);
4158 #endif
4159
4160     // in Unicode build the string always has a valid Unicode representation
4161     // and even if a conversion is needed (as in UTF8 case) it can't fail
4162     //
4163     // but in ANSI build the string contents might be not convertible to
4164     // Unicode using the current locale encoding so we do need to check for
4165     // errors
4166 #if !wxUSE_UNICODE
4167     if ( !p )
4168     {
4169         // if conversion fails, return empty string and not NULL to avoid
4170         // crashes in code written with either wxWidgets 2 wxString or
4171         // std::string behaviour in mind: neither of them ever returns NULL
4172         // from its c_str() and so we shouldn't neither
4173         //
4174         // notice that the same is done in AsChar() below and
4175         // wxString::wc_str() and mb_str() for the same reasons
4176         return L"";
4177     }
4178 #endif // !wxUSE_UNICODE
4179
4180     return p + m_offset;
4181 }
4182
4183 inline const char* wxCStrData::AsChar() const
4184 {
4185 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
4186     const char * const p = m_str->AsChar(wxConvLibc);
4187     if ( !p )
4188         return "";
4189 #else // !wxUSE_UNICODE || wxUSE_UTF8_LOCALE_ONLY
4190     const char * const p = m_str->mb_str();
4191 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
4192
4193     return p + m_offset;
4194 }
4195
4196 inline wxString wxCStrData::AsString() const
4197 {
4198     if ( m_offset == 0 )
4199         return *m_str;
4200     else
4201         return m_str->Mid(m_offset);
4202 }
4203
4204 inline const wxStringCharType *wxCStrData::AsInternal() const
4205 {
4206 #if wxUSE_UNICODE_UTF8
4207     return wxStringOperations::AddToIter(m_str->wx_str(), m_offset);
4208 #else
4209     return m_str->wx_str() + m_offset;
4210 #endif
4211 }
4212
4213 inline wxUniChar wxCStrData::operator*() const
4214 {
4215     if ( m_str->empty() )
4216         return wxUniChar(wxT('\0'));
4217     else
4218         return (*m_str)[m_offset];
4219 }
4220
4221 inline wxUniChar wxCStrData::operator[](size_t n) const
4222 {
4223     // NB: we intentionally use operator[] and not at() here because the former
4224     //     works for the terminating NUL while the latter does not
4225     return (*m_str)[m_offset + n];
4226 }
4227
4228 // ----------------------------------------------------------------------------
4229 // more wxCStrData operators
4230 // ----------------------------------------------------------------------------
4231
4232 // we need to define those to allow "size_t pos = p - s.c_str()" where p is
4233 // some pointer into the string
4234 inline size_t operator-(const char *p, const wxCStrData& cs)
4235 {
4236     return p - cs.AsChar();
4237 }
4238
4239 inline size_t operator-(const wchar_t *p, const wxCStrData& cs)
4240 {
4241     return p - cs.AsWChar();
4242 }
4243
4244 // ----------------------------------------------------------------------------
4245 // implementation of wx[W]CharBuffer inline methods using wxCStrData
4246 // ----------------------------------------------------------------------------
4247
4248 // FIXME-UTF8: move this to buffer.h
4249 inline wxCharBuffer::wxCharBuffer(const wxCStrData& cstr)
4250                     : wxCharTypeBufferBase(cstr.AsCharBuf())
4251 {
4252 }
4253
4254 inline wxWCharBuffer::wxWCharBuffer(const wxCStrData& cstr)
4255                     : wxCharTypeBufferBase(cstr.AsWCharBuf())
4256 {
4257 }
4258
4259 #if wxUSE_UNICODE_UTF8
4260 // ----------------------------------------------------------------------------
4261 // implementation of wxStringIteratorNode inline methods
4262 // ----------------------------------------------------------------------------
4263
4264 void wxStringIteratorNode::DoSet(const wxString *str,
4265                                  wxStringImpl::const_iterator *citer,
4266                                  wxStringImpl::iterator *iter)
4267 {
4268     m_prev = NULL;
4269     m_iter = iter;
4270     m_citer = citer;
4271     m_str = str;
4272     if ( str )
4273     {
4274         m_next = str->m_iterators.ptr;
4275         const_cast<wxString*>(m_str)->m_iterators.ptr = this;
4276         if ( m_next )
4277             m_next->m_prev = this;
4278     }
4279     else
4280     {
4281         m_next = NULL;
4282     }
4283 }
4284
4285 void wxStringIteratorNode::clear()
4286 {
4287     if ( m_next )
4288         m_next->m_prev = m_prev;
4289     if ( m_prev )
4290         m_prev->m_next = m_next;
4291     else if ( m_str ) // first in the list
4292         const_cast<wxString*>(m_str)->m_iterators.ptr = m_next;
4293
4294     m_next = m_prev = NULL;
4295     m_citer = NULL;
4296     m_iter = NULL;
4297     m_str = NULL;
4298 }
4299 #endif // wxUSE_UNICODE_UTF8
4300
4301 #if WXWIN_COMPATIBILITY_2_8
4302     // lot of code out there doesn't explicitly include wx/crt.h, but uses
4303     // CRT wrappers that are now declared in wx/wxcrt.h and wx/wxcrtvararg.h,
4304     // so let's include this header now that wxString is defined and it's safe
4305     // to do it:
4306     #include "wx/crt.h"
4307 #endif
4308
4309 // ----------------------------------------------------------------------------
4310 // Checks on wxString characters
4311 // ----------------------------------------------------------------------------
4312
4313 template<bool (T)(const wxUniChar& c)>
4314     inline bool wxStringCheck(const wxString& val)
4315     {
4316         for ( wxString::const_iterator i = val.begin();
4317               i != val.end();
4318               ++i )
4319             if (T(*i) == 0)
4320                 return false;
4321         return true;
4322     }
4323
4324 #endif  // _WX_WXSTRING_H_