include/wx/string.h

   1 ///////////////////////////////////////////////////////////////////////////////
   2 // Name:        wx/string.h
   3 // Purpose:     wxString class
   4 // Author:      Vadim Zeitlin
   5 // Modified by:
   6 // Created:     29/01/98
   7 // Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
   8 // Licence:     wxWindows licence
   9 ///////////////////////////////////////////////////////////////////////////////
  10
  11 /*
  12     Efficient string class [more or less] compatible with MFC CString,
  13     wxWidgets version 1 wxString and std::string and some handy functions
  14     missing from string.h.
  15 */
  16
  17 #ifndef _WX_WXSTRING_H__
  18 #define _WX_WXSTRING_H__
  19
  20 // ----------------------------------------------------------------------------
  21 // headers
  22 // ----------------------------------------------------------------------------
  23
  24 #include "wx/defs.h"        // everybody should include this
  25
  26 #if defined(__WXMAC__) || defined(__VISAGECPP__)
  27     #include <ctype.h>
  28 #endif
  29
  30 #if defined(__VISAGECPP__) && __IBMCPP__ >= 400
  31    // problem in VACPP V4 with including stdlib.h multiple times
  32    // strconv includes it anyway
  33 #  include <stdio.h>
  34 #  include <string.h>
  35 #  include <stdarg.h>
  36 #  include <limits.h>
  37 #else
  38 #  include <string.h>
  39 #  include <stdio.h>
  40 #  include <stdarg.h>
  41 #  include <limits.h>
  42 #  include <stdlib.h>
  43 #endif
  44
  45 #include "wx/wxcrtbase.h"   // for wxChar, wxStrlen() etc.
  46 #include "wx/strvararg.h"
  47 #include "wx/buffer.h"      // for wxCharBuffer
  48 #include "wx/strconv.h"     // for wxConvertXXX() macros and wxMBConv classes
  49 #include "wx/stringimpl.h"
  50 #include "wx/stringops.h"
  51 #include "wx/unichar.h"
  52
  53 // by default we cache the mapping of the positions in UTF-8 string to the byte
  54 // offset as this results in noticeable performance improvements for loops over
  55 // strings using indices; comment out this line to disable this
  56 //
  57 // notice that this optimization is well worth using even in debug builds as it
  58 // changes asymptotic complexity of algorithms using indices to iterate over
  59 // wxString back to expected linear from quadratic
  60 //
  61 // also notice that wxTLS_TYPE() (__declspec(thread) in this case) is unsafe to
  62 // use in DLL build under pre-Vista Windows so we disable this code for now, if
  63 // anybody really needs to use UTF-8 build under Windows with this optimization
  64 // it would have to be re-tested and probably corrected
  65 // CS: under OSX release builds the string destructor/cache cleanup sometimes
  66 // crashes, disable until we find the true reason or a better workaround
  67 #if wxUSE_UNICODE_UTF8 && !defined(__WINDOWS__) && !defined(__WXOSX__)
  68     #define wxUSE_STRING_POS_CACHE 1
  69 #else
  70     #define wxUSE_STRING_POS_CACHE 0
  71 #endif
  72
  73 #if wxUSE_STRING_POS_CACHE
  74     #include "wx/tls.h"
  75
  76     // change this 0 to 1 to enable additional (very expensive) asserts
  77     // verifying that string caching logic works as expected
  78     #if 0
  79         #define wxSTRING_CACHE_ASSERT(cond) wxASSERT(cond)
  80     #else
  81         #define wxSTRING_CACHE_ASSERT(cond)
  82     #endif
  83 #endif // wxUSE_STRING_POS_CACHE
  84
  85 class WXDLLIMPEXP_FWD_BASE wxString;
  86
  87 // unless this symbol is predefined to disable the compatibility functions, do
  88 // use them
  89 #ifndef WXWIN_COMPATIBILITY_STRING_PTR_AS_ITER
  90     #define WXWIN_COMPATIBILITY_STRING_PTR_AS_ITER 1
  91 #endif
  92
  93 namespace wxPrivate
  94 {
  95     template <typename T> struct wxStringAsBufHelper;
  96 }
  97
  98 // ---------------------------------------------------------------------------
  99 // macros
 100 // ---------------------------------------------------------------------------
 101
 102 // casts [unfortunately!] needed to call some broken functions which require
 103 // "char *" instead of "const char *"
 104 #define   WXSTRINGCAST (wxChar *)(const wxChar *)
 105 #define   wxCSTRINGCAST (wxChar *)(const wxChar *)
 106 #define   wxMBSTRINGCAST (char *)(const char *)
 107 #define   wxWCSTRINGCAST (wchar_t *)(const wchar_t *)
 108
 109 // ----------------------------------------------------------------------------
 110 // constants
 111 // ----------------------------------------------------------------------------
 112
 113 #if WXWIN_COMPATIBILITY_2_6
 114
 115 // deprecated in favour of wxString::npos, don't use in new code
 116 //
 117 // maximum possible length for a string means "take all string" everywhere
 118 #define wxSTRING_MAXLEN wxString::npos
 119
 120 #endif // WXWIN_COMPATIBILITY_2_6
 121
 122 // ---------------------------------------------------------------------------
 123 // global functions complementing standard C string library replacements for
 124 // strlen() and portable strcasecmp()
 125 //---------------------------------------------------------------------------
 126
 127 #if WXWIN_COMPATIBILITY_2_8
 128 // Use wxXXX() functions from wxcrt.h instead! These functions are for
 129 // backwards compatibility only.
 130
 131 // checks whether the passed in pointer is NULL and if the string is empty
 132 wxDEPRECATED( inline bool IsEmpty(const char *p) );
 133 inline bool IsEmpty(const char *p) { return (!p || !*p); }
 134
 135 // safe version of strlen() (returns 0 if passed NULL pointer)
 136 wxDEPRECATED( inline size_t Strlen(const char *psz) );
 137 inline size_t Strlen(const char *psz)
 138   { return psz ? strlen(psz) : 0; }
 139
 140 // portable strcasecmp/_stricmp
 141 wxDEPRECATED( inline int Stricmp(const char *psz1, const char *psz2) );
 142 inline int Stricmp(const char *psz1, const char *psz2)
 143     { return wxCRT_StricmpA(psz1, psz2); }
 144
 145 #endif // WXWIN_COMPATIBILITY_2_8
 146
 147 // ----------------------------------------------------------------------------
 148 // wxCStrData
 149 // ----------------------------------------------------------------------------
 150
 151 // Lightweight object returned by wxString::c_str() and implicitly convertible
 152 // to either const char* or const wchar_t*.
 153 class wxCStrData
 154 {
 155 private:
 156     // Ctors; for internal use by wxString and wxCStrData only
 157     wxCStrData(const wxString *str, size_t offset = 0, bool owned = false)
 158         : m_str(str), m_offset(offset), m_owned(owned) {}
 159
 160 public:
 161     // Ctor constructs the object from char literal; they are needed to make
 162     // operator?: compile and they intentionally take char*, not const char*
 163     inline wxCStrData(char *buf);
 164     inline wxCStrData(wchar_t *buf);
 165     inline wxCStrData(const wxCStrData& data);
 166
 167     inline ~wxCStrData();
 168
 169     // AsWChar() and AsChar() can't be defined here as they use wxString and so
 170     // must come after it and because of this won't be inlined when called from
 171     // wxString methods (without a lot of work to extract these wxString methods
 172     // from inside the class itself). But we still define them being inline
 173     // below to let compiler inline them from elsewhere. And because of this we
 174     // must declare them as inline here because otherwise some compilers give
 175     // warnings about them, e.g. mingw32 3.4.5 warns about "<symbol> defined
 176     // locally after being referenced with dllimport linkage" while IRIX
 177     // mipsPro 7.4 warns about "function declared inline after being called".
 178     inline const wchar_t* AsWChar() const;
 179     operator const wchar_t*() const { return AsWChar(); }
 180
 181     inline const char* AsChar() const;
 182     const unsigned char* AsUnsignedChar() const
 183         { return (const unsigned char *) AsChar(); }
 184     operator const char*() const { return AsChar(); }
 185     operator const unsigned char*() const { return AsUnsignedChar(); }
 186
 187     operator const void*() const { return AsChar(); }
 188
 189     // returns buffers that are valid as long as the associated wxString exists
 190     const wxScopedCharBuffer AsCharBuf() const
 191     {
 192         return wxScopedCharBuffer::CreateNonOwned(AsChar());
 193     }
 194
 195     const wxScopedWCharBuffer AsWCharBuf() const
 196     {
 197         return wxScopedWCharBuffer::CreateNonOwned(AsWChar());
 198     }
 199
 200     inline wxString AsString() const;
 201
 202     // returns the value as C string in internal representation (equivalent
 203     // to AsString().wx_str(), but more efficient)
 204     const wxStringCharType *AsInternal() const;
 205
 206     // allow expressions like "c_str()[0]":
 207     inline wxUniChar operator[](size_t n) const;
 208     wxUniChar operator[](int n) const { return operator[](size_t(n)); }
 209     wxUniChar operator[](long n) const { return operator[](size_t(n)); }
 210 #ifndef wxSIZE_T_IS_UINT
 211     wxUniChar operator[](unsigned int n) const { return operator[](size_t(n)); }
 212 #endif // size_t != unsigned int
 213
 214     // These operators are needed to emulate the pointer semantics of c_str():
 215     // expressions like "wxChar *p = str.c_str() + 1;" should continue to work
 216     // (we need both versions to resolve ambiguities). Note that this means
 217     // the 'n' value is interpreted as addition to char*/wchar_t* pointer, it
 218     // is *not* number of Unicode characters in wxString.
 219     wxCStrData operator+(int n) const
 220         { return wxCStrData(m_str, m_offset + n, m_owned); }
 221     wxCStrData operator+(long n) const
 222         { return wxCStrData(m_str, m_offset + n, m_owned); }
 223     wxCStrData operator+(size_t n) const
 224         { return wxCStrData(m_str, m_offset + n, m_owned); }
 225
 226     // and these for "str.c_str() + (p2 - p1)" (it also works for any integer
 227     // expression but it must be ptrdiff_t and not e.g. int to work in this
 228     // example):
 229     wxCStrData operator-(ptrdiff_t n) const
 230     {
 231         wxASSERT_MSG( n <= (ptrdiff_t)m_offset,
 232                       wxT("attempt to construct address before the beginning of the string") );
 233         return wxCStrData(m_str, m_offset - n, m_owned);
 234     }
 235
 236     // this operator is needed to make expressions like "*c_str()" or
 237     // "*(c_str() + 2)" work
 238     inline wxUniChar operator*() const;
 239
 240 private:
 241     // the wxString this object was returned for
 242     const wxString *m_str;
 243     // Offset into c_str() return value. Note that this is *not* offset in
 244     // m_str in Unicode characters. Instead, it is index into the
 245     // char*/wchar_t* buffer returned by c_str(). It's interpretation depends
 246     // on how is the wxCStrData instance used: if it is eventually cast to
 247     // const char*, m_offset will be in bytes form string's start; if it is
 248     // cast to const wchar_t*, it will be in wchar_t values.
 249     size_t m_offset;
 250     // should m_str be deleted, i.e. is it owned by us?
 251     bool m_owned;
 252
 253     friend class WXDLLIMPEXP_FWD_BASE wxString;
 254 };
 255
 256 // ----------------------------------------------------------------------------
 257 // wxStringPrintfMixin
 258 // ---------------------------------------------------------------------------
 259
 260 // NB: VC6 has a bug that causes linker errors if you have template methods
 261 //     in a class using __declspec(dllimport). The solution is to split such
 262 //     class into two classes, one that contains the template methods and does
 263 //     *not* use WXDLLIMPEXP_BASE and another class that contains the rest
 264 //     (with DLL linkage).
 265 //
 266 //     We only do this for VC6 here, because the code is less efficient
 267 //     (Printf() has to use dynamic_cast<>) and because OpenWatcom compiler
 268 //     cannot compile this code.
 269
 270 #if defined(__VISUALC__) && __VISUALC__ < 1300
 271     #define wxNEEDS_WXSTRING_PRINTF_MIXIN
 272 #endif
 273
 274 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
 275 // this class contains implementation of wxString's vararg methods, it's
 276 // exported from wxBase DLL
 277 class WXDLLIMPEXP_BASE wxStringPrintfMixinBase
 278 {
 279 protected:
 280     wxStringPrintfMixinBase() {}
 281
 282 #if !wxUSE_UTF8_LOCALE_ONLY
 283     int DoPrintfWchar(const wxChar *format, ...);
 284     static wxString DoFormatWchar(const wxChar *format, ...);
 285 #endif
 286 #if wxUSE_UNICODE_UTF8
 287     int DoPrintfUtf8(const char *format, ...);
 288     static wxString DoFormatUtf8(const char *format, ...);
 289 #endif
 290 };
 291
 292 // this class contains template wrappers for wxString's vararg methods, it's
 293 // intentionally *not* exported from the DLL in order to fix the VC6 bug
 294 // described above
 295 class wxStringPrintfMixin : public wxStringPrintfMixinBase
 296 {
 297 private:
 298     // to further complicate things, we can't return wxString from
 299     // wxStringPrintfMixin::Format() because wxString is not yet declared at
 300     // this point; the solution is to use this fake type trait template - this
 301     // way the compiler won't know the return type until Format() is used
 302     // (this doesn't compile with Watcom, but VC6 compiles it just fine):
 303     template<typename T> struct StringReturnType
 304     {
 305         typedef wxString type;
 306     };
 307
 308 public:
 309     // these are duplicated wxString methods, they're also declared below
 310     // if !wxNEEDS_WXSTRING_PRINTF_MIXIN:
 311
 312     // static wxString Format(const wString& format, ...) WX_ATTRIBUTE_PRINTF_1;
 313     WX_DEFINE_VARARG_FUNC_SANS_N0(static typename StringReturnType<T1>::type,
 314                                   Format, 1, (const wxFormatString&),
 315                                   DoFormatWchar, DoFormatUtf8)
 316     // We have to implement the version without template arguments manually
 317     // because of the StringReturnType<> hack, although WX_DEFINE_VARARG_FUNC
 318     // normally does it itself. It has to be a template so that we can use
 319     // the hack, even though there's no real template parameter. We can't move
 320     // it to wxStrig, because it would shadow these versions of Format() then.
 321     template<typename T>
 322     inline static typename StringReturnType<T>::type
 323     Format(const T& fmt)
 324     {
 325         // NB: this doesn't compile if T is not (some form of) a string;
 326         //     this makes Format's prototype equivalent to
 327         //     Format(const wxFormatString& fmt)
 328         return DoFormatWchar(wxFormatString(fmt));
 329     }
 330
 331     // int Printf(const wxString& format, ...);
 332     WX_DEFINE_VARARG_FUNC(int, Printf, 1, (const wxFormatString&),
 333                           DoPrintfWchar, DoPrintfUtf8)
 334     // int sprintf(const wxString& format, ...) WX_ATTRIBUTE_PRINTF_2;
 335     WX_DEFINE_VARARG_FUNC(int, sprintf, 1, (const wxFormatString&),
 336                           DoPrintfWchar, DoPrintfUtf8)
 337
 338 protected:
 339     wxStringPrintfMixin() : wxStringPrintfMixinBase() {}
 340 };
 341 #endif // wxNEEDS_WXSTRING_PRINTF_MIXIN
 342
 343
 344 // ----------------------------------------------------------------------------
 345 // wxString: string class trying to be compatible with std::string, MFC
 346 //           CString and wxWindows 1.x wxString all at once
 347 // ---------------------------------------------------------------------------
 348
 349 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
 350     // "non dll-interface class 'wxStringPrintfMixin' used as base interface
 351     // for dll-interface class 'wxString'" -- this is OK in our case
 352     #pragma warning (push)
 353     #pragma warning (disable:4275)
 354 #endif
 355
 356 #if wxUSE_UNICODE_UTF8
 357 // see the comment near wxString::iterator for why we need this
 358 class WXDLLIMPEXP_BASE wxStringIteratorNode
 359 {
 360 public:
 361     wxStringIteratorNode()
 362         : m_str(NULL), m_citer(NULL), m_iter(NULL), m_prev(NULL), m_next(NULL) {}
 363     wxStringIteratorNode(const wxString *str,
 364                           wxStringImpl::const_iterator *citer)
 365         { DoSet(str, citer, NULL); }
 366     wxStringIteratorNode(const wxString *str, wxStringImpl::iterator *iter)
 367         { DoSet(str, NULL, iter); }
 368     ~wxStringIteratorNode()
 369         { clear(); }
 370
 371     inline void set(const wxString *str, wxStringImpl::const_iterator *citer)
 372         { clear(); DoSet(str, citer, NULL); }
 373     inline void set(const wxString *str, wxStringImpl::iterator *iter)
 374         { clear(); DoSet(str, NULL, iter); }
 375
 376     const wxString *m_str;
 377     wxStringImpl::const_iterator *m_citer;
 378     wxStringImpl::iterator *m_iter;
 379     wxStringIteratorNode *m_prev, *m_next;
 380
 381 private:
 382     inline void clear();
 383     inline void DoSet(const wxString *str,
 384                       wxStringImpl::const_iterator *citer,
 385                       wxStringImpl::iterator *iter);
 386
 387     // the node belongs to a particular iterator instance, it's not copied
 388     // when a copy of the iterator is made
 389     wxDECLARE_NO_COPY_CLASS(wxStringIteratorNode);
 390 };
 391 #endif // wxUSE_UNICODE_UTF8
 392
 393 class WXDLLIMPEXP_BASE wxString
 394 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
 395                                 : public wxStringPrintfMixin
 396 #endif
 397 {
 398   // NB: special care was taken in arranging the member functions in such order
 399   //     that all inline functions can be effectively inlined, verify that all
 400   //     performance critical functions are still inlined if you change order!
 401 public:
 402   // an 'invalid' value for string index, moved to this place due to a CW bug
 403   static const size_t npos;
 404
 405 private:
 406   // if we hadn't made these operators private, it would be possible to
 407   // compile "wxString s; s = 17;" without any warnings as 17 is implicitly
 408   // converted to char in C and we do have operator=(char)
 409   //
 410   // NB: we don't need other versions (short/long and unsigned) as attempt
 411   //     to assign another numeric type to wxString will now result in
 412   //     ambiguity between operator=(char) and operator=(int)
 413   wxString& operator=(int);
 414
 415   // these methods are not implemented - there is _no_ conversion from int to
 416   // string, you're doing something wrong if the compiler wants to call it!
 417   //
 418   // try `s << i' or `s.Printf("%d", i)' instead
 419   wxString(int);
 420
 421
 422   // buffer for holding temporary substring when using any of the methods
 423   // that take (char*,size_t) or (wchar_t*,size_t) arguments:
 424   template<typename T>
 425   struct SubstrBufFromType
 426   {
 427       T data;
 428       size_t len;
 429
 430       SubstrBufFromType(const T& data_, size_t len_)
 431           : data(data_), len(len_)
 432       {
 433           wxASSERT_MSG( len != npos, "must have real length" );
 434       }
 435   };
 436
 437 #if wxUSE_UNICODE_UTF8
 438   // even char* -> char* needs conversion, from locale charset to UTF-8
 439   typedef SubstrBufFromType<wxScopedCharBuffer>    SubstrBufFromWC;
 440   typedef SubstrBufFromType<wxScopedCharBuffer>    SubstrBufFromMB;
 441 #elif wxUSE_UNICODE_WCHAR
 442   typedef SubstrBufFromType<const wchar_t*>        SubstrBufFromWC;
 443   typedef SubstrBufFromType<wxScopedWCharBuffer>   SubstrBufFromMB;
 444 #else
 445   typedef SubstrBufFromType<const char*>           SubstrBufFromMB;
 446   typedef SubstrBufFromType<wxScopedCharBuffer>    SubstrBufFromWC;
 447 #endif
 448
 449
 450   // Functions implementing primitive operations on string data; wxString
 451   // methods and iterators are implemented in terms of it. The differences
 452   // between UTF-8 and wchar_t* representations of the string are mostly
 453   // contained here.
 454
 455 #if wxUSE_UNICODE_UTF8
 456   static SubstrBufFromMB ConvertStr(const char *psz, size_t nLength,
 457                                     const wxMBConv& conv);
 458   static SubstrBufFromWC ConvertStr(const wchar_t *pwz, size_t nLength,
 459                                     const wxMBConv& conv);
 460 #elif wxUSE_UNICODE_WCHAR
 461   static SubstrBufFromMB ConvertStr(const char *psz, size_t nLength,
 462                                     const wxMBConv& conv);
 463 #else
 464   static SubstrBufFromWC ConvertStr(const wchar_t *pwz, size_t nLength,
 465                                     const wxMBConv& conv);
 466 #endif
 467
 468 #if !wxUSE_UNICODE_UTF8 // wxUSE_UNICODE_WCHAR or !wxUSE_UNICODE
 469   // returns C string encoded as the implementation expects:
 470   #if wxUSE_UNICODE
 471   static const wchar_t* ImplStr(const wchar_t* str)
 472     { return str ? str : wxT(""); }
 473   static const SubstrBufFromWC ImplStr(const wchar_t* str, size_t n)
 474     { return SubstrBufFromWC(str, (str && n == npos) ? wxWcslen(str) : n); }
 475   static wxScopedWCharBuffer ImplStr(const char* str,
 476                                      const wxMBConv& conv = wxConvLibc)
 477     { return ConvertStr(str, npos, conv).data; }
 478   static SubstrBufFromMB ImplStr(const char* str, size_t n,
 479                                  const wxMBConv& conv = wxConvLibc)
 480     { return ConvertStr(str, n, conv); }
 481   #else
 482   static const char* ImplStr(const char* str,
 483                              const wxMBConv& WXUNUSED(conv) = wxConvLibc)
 484     { return str ? str : ""; }
 485   static const SubstrBufFromMB ImplStr(const char* str, size_t n,
 486                                        const wxMBConv& WXUNUSED(conv) = wxConvLibc)
 487     { return SubstrBufFromMB(str, (str && n == npos) ? wxStrlen(str) : n); }
 488   static wxScopedCharBuffer ImplStr(const wchar_t* str)
 489     { return ConvertStr(str, npos, wxConvLibc).data; }
 490   static SubstrBufFromWC ImplStr(const wchar_t* str, size_t n)
 491     { return ConvertStr(str, n, wxConvLibc); }
 492   #endif
 493
 494   // translates position index in wxString to/from index in underlying
 495   // wxStringImpl:
 496   static size_t PosToImpl(size_t pos) { return pos; }
 497   static void PosLenToImpl(size_t pos, size_t len,
 498                            size_t *implPos, size_t *implLen)
 499     { *implPos = pos; *implLen = len; }
 500   static size_t LenToImpl(size_t len) { return len; }
 501   static size_t PosFromImpl(size_t pos) { return pos; }
 502
 503   // we don't want to define these as empty inline functions as it could
 504   // result in noticeable (and quite unnecessary in non-UTF-8 build) slowdown
 505   // in debug build where the inline functions are not effectively inlined
 506   #define wxSTRING_INVALIDATE_CACHE()
 507   #define wxSTRING_INVALIDATE_CACHED_LENGTH()
 508   #define wxSTRING_UPDATE_CACHED_LENGTH(n)
 509   #define wxSTRING_SET_CACHED_LENGTH(n)
 510
 511 #else // wxUSE_UNICODE_UTF8
 512
 513   static wxScopedCharBuffer ImplStr(const char* str,
 514                                     const wxMBConv& conv = wxConvLibc)
 515     { return ConvertStr(str, npos, conv).data; }
 516   static SubstrBufFromMB ImplStr(const char* str, size_t n,
 517                                  const wxMBConv& conv = wxConvLibc)
 518     { return ConvertStr(str, n, conv); }
 519
 520   static wxScopedCharBuffer ImplStr(const wchar_t* str)
 521     { return ConvertStr(str, npos, wxMBConvUTF8()).data; }
 522   static SubstrBufFromWC ImplStr(const wchar_t* str, size_t n)
 523     { return ConvertStr(str, n, wxMBConvUTF8()); }
 524
 525 #if wxUSE_STRING_POS_CACHE
 526   // this is an extremely simple cache used by PosToImpl(): each cache element
 527   // contains the string it applies to and the index corresponding to the last
 528   // used position in this wxString in its m_impl string
 529   //
 530   // NB: notice that this struct (and nested Element one) must be a POD or we
 531   //     wouldn't be able to use a thread-local variable of this type, in
 532   //     particular it should have no ctor -- we rely on statics being
 533   //     initialized to 0 instead
 534   struct Cache
 535   {
 536       enum { SIZE = 8 };
 537
 538       struct Element
 539       {
 540           const wxString *str;  // the string to which this element applies
 541           size_t pos,           // the cached index in this string
 542                  impl,          // the corresponding position in its m_impl
 543                  len;           // cached length or npos if unknown
 544
 545           // reset cached index to 0
 546           void ResetPos() { pos = impl = 0; }
 547
 548           // reset position and length
 549           void Reset() { ResetPos(); len = npos; }
 550       };
 551
 552       // cache the indices mapping for the last few string used
 553       Element cached[SIZE];
 554
 555       // the last used index
 556       unsigned lastUsed;
 557   };
 558
 559 #ifndef wxHAS_COMPILER_TLS
 560   // we must use an accessor function and not a static variable when the TLS
 561   // variables support is implemented in the library (and not by the compiler)
 562   // because the global s_cache variable could be not yet initialized when a
 563   // ctor of another global object is executed and if that ctor uses any
 564   // wxString methods, bad things happen
 565   //
 566   // however notice that this approach does not work when compiler TLS is used,
 567   // at least not with g++ 4.1.2 under amd64 as it apparently compiles code
 568   // using this accessor incorrectly when optimizations are enabled (-O2 is
 569   // enough) -- luckily we don't need it then neither as static __thread
 570   // variables are initialized by 0 anyhow then and so we can use the variable
 571   // directly
 572   WXEXPORT static Cache& GetCache()
 573   {
 574       static wxTLS_TYPE(Cache) s_cache;
 575
 576       return wxTLS_VALUE(s_cache);
 577   }
 578
 579   // this helper struct is used to ensure that GetCache() is called during
 580   // static initialization time, i.e. before any threads creation, as otherwise
 581   // the static s_cache construction inside GetCache() wouldn't be MT-safe
 582   friend struct wxStrCacheInitializer;
 583 #else // wxHAS_COMPILER_TLS
 584   static wxTLS_TYPE(Cache) ms_cache;
 585   static Cache& GetCache() { return wxTLS_VALUE(ms_cache); }
 586 #endif // !wxHAS_COMPILER_TLS/wxHAS_COMPILER_TLS
 587
 588   static Cache::Element *GetCacheBegin() { return GetCache().cached; }
 589   static Cache::Element *GetCacheEnd() { return GetCacheBegin() + Cache::SIZE; }
 590   static unsigned& LastUsedCacheElement() { return GetCache().lastUsed; }
 591
 592   // this is used in debug builds only to provide a convenient function,
 593   // callable from a debugger, to show the cache contents
 594   friend struct wxStrCacheDumper;
 595
 596   // uncomment this to have access to some profiling statistics on program
 597   // termination
 598   //#define wxPROFILE_STRING_CACHE
 599
 600 #ifdef wxPROFILE_STRING_CACHE
 601   static struct PosToImplCacheStats
 602   {
 603       unsigned postot,  // total non-trivial calls to PosToImpl
 604                poshits, // cache hits from PosToImpl()
 605                mishits, // cached position beyond the needed one
 606                sumpos,  // sum of all positions, used to compute the
 607                         // average position after dividing by postot
 608                sumofs,  // sum of all offsets after using the cache, used to
 609                         // compute the average after dividing by hits
 610                lentot,  // number of total calls to length()
 611                lenhits; // number of cache hits in length()
 612   } ms_cacheStats;
 613
 614   friend struct wxStrCacheStatsDumper;
 615
 616   #define wxCACHE_PROFILE_FIELD_INC(field) ms_cacheStats.field++
 617   #define wxCACHE_PROFILE_FIELD_ADD(field, val) ms_cacheStats.field += (val)
 618 #else // !wxPROFILE_STRING_CACHE
 619   #define wxCACHE_PROFILE_FIELD_INC(field)
 620   #define wxCACHE_PROFILE_FIELD_ADD(field, val)
 621 #endif // wxPROFILE_STRING_CACHE/!wxPROFILE_STRING_CACHE
 622
 623   // note: it could seem that the functions below shouldn't be inline because
 624   // they are big, contain loops and so the compiler shouldn't be able to
 625   // inline them anyhow, however moving them into string.cpp does decrease the
 626   // code performance by ~5%, at least when using g++ 4.1 so do keep them here
 627   // unless tests show that it's not advantageous any more
 628
 629   // return the pointer to the cache element for this string or NULL if not
 630   // cached
 631   Cache::Element *FindCacheElement() const
 632   {
 633       // profiling seems to show a small but consistent gain if we use this
 634       // simple loop instead of starting from the last used element (there are
 635       // a lot of misses in this function...)
 636       Cache::Element * const cacheBegin = GetCacheBegin();
 637 #ifndef wxHAS_COMPILER_TLS
 638       // during destruction tls calls may return NULL, in this case return NULL
 639       // immediately without accessing anything else
 640       if ( cacheBegin == NULL )
 641         return NULL;
 642 #endif
 643       Cache::Element * const cacheEnd = GetCacheEnd();
 644       for ( Cache::Element *c = cacheBegin; c != cacheEnd; c++ )
 645       {
 646           if ( c->str == this )
 647               return c;
 648       }
 649
 650       return NULL;
 651   }
 652
 653   // unlike FindCacheElement(), this one always returns a valid pointer to the
 654   // cache element for this string, it may have valid last cached position and
 655   // its corresponding index in the byte string or not
 656   Cache::Element *GetCacheElement() const
 657   {
 658       Cache::Element * const cacheBegin = GetCacheBegin();
 659       Cache::Element * const cacheEnd = GetCacheEnd();
 660       Cache::Element * const cacheStart = cacheBegin + LastUsedCacheElement();
 661
 662       // check the last used first, this does no (measurable) harm for a miss
 663       // but does help for simple loops addressing the same string all the time
 664       if ( cacheStart->str == this )
 665           return cacheStart;
 666
 667       // notice that we're going to check cacheStart again inside this call but
 668       // profiling shows that it's still faster to use a simple loop like
 669       // inside FindCacheElement() than manually looping with wrapping starting
 670       // from the cache entry after the start one
 671       Cache::Element *c = FindCacheElement();
 672       if ( !c )
 673       {
 674           // claim the next cache entry for this string
 675           c = cacheStart;
 676           if ( ++c == cacheEnd )
 677               c = cacheBegin;
 678
 679           c->str = this;
 680           c->Reset();
 681
 682           // and remember the last used element
 683           LastUsedCacheElement() = c - cacheBegin;
 684       }
 685
 686       return c;
 687   }
 688
 689   size_t DoPosToImpl(size_t pos) const
 690   {
 691       wxCACHE_PROFILE_FIELD_INC(postot);
 692
 693       // NB: although the case of pos == 1 (and offset from cached position
 694       //     equal to 1) are common, nothing is gained by writing special code
 695       //     for handling them, the compiler (at least g++ 4.1 used) seems to
 696       //     optimize the code well enough on its own
 697
 698       wxCACHE_PROFILE_FIELD_ADD(sumpos, pos);
 699
 700       Cache::Element * const cache = GetCacheElement();
 701
 702       // cached position can't be 0 so if it is, it means that this entry was
 703       // used for length caching only so far, i.e. it doesn't count as a hit
 704       // from our point of view
 705       if ( cache->pos )
 706       {
 707           wxCACHE_PROFILE_FIELD_INC(poshits);
 708       }
 709
 710       if ( pos == cache->pos )
 711           return cache->impl;
 712
 713       // this seems to happen only rarely so just reset the cache in this case
 714       // instead of complicating code even further by seeking backwards in this
 715       // case
 716       if ( cache->pos > pos )
 717       {
 718           wxCACHE_PROFILE_FIELD_INC(mishits);
 719
 720           cache->ResetPos();
 721       }
 722
 723       wxCACHE_PROFILE_FIELD_ADD(sumofs, pos - cache->pos);
 724
 725
 726       wxStringImpl::const_iterator i(m_impl.begin() + cache->impl);
 727       for ( size_t n = cache->pos; n < pos; n++ )
 728           wxStringOperations::IncIter(i);
 729
 730       cache->pos = pos;
 731       cache->impl = i - m_impl.begin();
 732
 733       wxSTRING_CACHE_ASSERT(
 734           (int)cache->impl == (begin() + pos).impl() - m_impl.begin() );
 735
 736       return cache->impl;
 737   }
 738
 739   void InvalidateCache()
 740   {
 741       Cache::Element * const cache = FindCacheElement();
 742       if ( cache )
 743           cache->Reset();
 744   }
 745
 746   void InvalidateCachedLength()
 747   {
 748       Cache::Element * const cache = FindCacheElement();
 749       if ( cache )
 750           cache->len = npos;
 751   }
 752
 753   void SetCachedLength(size_t len)
 754   {
 755       // we optimistically cache the length here even if the string wasn't
 756       // present in the cache before, this seems to do no harm and the
 757       // potential for avoiding length recomputation for long strings looks
 758       // interesting
 759       GetCacheElement()->len = len;
 760   }
 761
 762   void UpdateCachedLength(ptrdiff_t delta)
 763   {
 764       Cache::Element * const cache = FindCacheElement();
 765       if ( cache && cache->len != npos )
 766       {
 767           wxSTRING_CACHE_ASSERT( (ptrdiff_t)cache->len + delta >= 0 );
 768
 769           cache->len += delta;
 770       }
 771   }
 772
 773   #define wxSTRING_INVALIDATE_CACHE() InvalidateCache()
 774   #define wxSTRING_INVALIDATE_CACHED_LENGTH() InvalidateCachedLength()
 775   #define wxSTRING_UPDATE_CACHED_LENGTH(n) UpdateCachedLength(n)
 776   #define wxSTRING_SET_CACHED_LENGTH(n) SetCachedLength(n)
 777 #else // !wxUSE_STRING_POS_CACHE
 778   size_t DoPosToImpl(size_t pos) const
 779   {
 780       return (begin() + pos).impl() - m_impl.begin();
 781   }
 782
 783   #define wxSTRING_INVALIDATE_CACHE()
 784   #define wxSTRING_INVALIDATE_CACHED_LENGTH()
 785   #define wxSTRING_UPDATE_CACHED_LENGTH(n)
 786   #define wxSTRING_SET_CACHED_LENGTH(n)
 787 #endif // wxUSE_STRING_POS_CACHE/!wxUSE_STRING_POS_CACHE
 788
 789   size_t PosToImpl(size_t pos) const
 790   {
 791       return pos == 0 || pos == npos ? pos : DoPosToImpl(pos);
 792   }
 793
 794   void PosLenToImpl(size_t pos, size_t len, size_t *implPos, size_t *implLen) const;
 795
 796   size_t LenToImpl(size_t len) const
 797   {
 798       size_t pos, len2;
 799       PosLenToImpl(0, len, &pos, &len2);
 800       return len2;
 801   }
 802
 803   size_t PosFromImpl(size_t pos) const
 804   {
 805       if ( pos == 0 || pos == npos )
 806           return pos;
 807       else
 808           return const_iterator(this, m_impl.begin() + pos) - begin();
 809   }
 810 #endif // !wxUSE_UNICODE_UTF8/wxUSE_UNICODE_UTF8
 811
 812 public:
 813   // standard types
 814   typedef wxUniChar value_type;
 815   typedef wxUniChar char_type;
 816   typedef wxUniCharRef reference;
 817   typedef wxChar* pointer;
 818   typedef const wxChar* const_pointer;
 819
 820   typedef size_t size_type;
 821   typedef wxUniChar const_reference;
 822
 823 #if wxUSE_STD_STRING
 824   #if wxUSE_UNICODE_UTF8
 825     // random access is not O(1), as required by Random Access Iterator
 826     #define WX_STR_ITERATOR_TAG std::bidirectional_iterator_tag
 827   #else
 828     #define WX_STR_ITERATOR_TAG std::random_access_iterator_tag
 829   #endif
 830   #define WX_DEFINE_ITERATOR_CATEGORY(cat) typedef cat iterator_category;
 831 #else
 832   // not defining iterator_category at all in this case is better than defining
 833   // it as some dummy type -- at least it results in more intelligible error
 834   // messages
 835   #define WX_DEFINE_ITERATOR_CATEGORY(cat)
 836 #endif
 837
 838   #define WX_STR_ITERATOR_IMPL(iterator_name, pointer_type, reference_type) \
 839       private:                                                              \
 840           typedef wxStringImpl::iterator_name underlying_iterator;          \
 841       public:                                                               \
 842           WX_DEFINE_ITERATOR_CATEGORY(WX_STR_ITERATOR_TAG)                  \
 843           typedef wxUniChar value_type;                                     \
 844           typedef ptrdiff_t difference_type;                                \
 845           typedef reference_type reference;                                 \
 846           typedef pointer_type pointer;                                     \
 847                                                                             \
 848           reference operator[](size_t n) const { return *(*this + n); }     \
 849                                                                             \
 850           iterator_name& operator++()                                       \
 851             { wxStringOperations::IncIter(m_cur); return *this; }           \
 852           iterator_name& operator--()                                       \
 853             { wxStringOperations::DecIter(m_cur); return *this; }           \
 854           iterator_name operator++(int)                                     \
 855           {                                                                 \
 856               iterator_name tmp = *this;                                    \
 857               wxStringOperations::IncIter(m_cur);                           \
 858               return tmp;                                                   \
 859           }                                                                 \
 860           iterator_name operator--(int)                                     \
 861           {                                                                 \
 862               iterator_name tmp = *this;                                    \
 863               wxStringOperations::DecIter(m_cur);                           \
 864               return tmp;                                                   \
 865           }                                                                 \
 866                                                                             \
 867           iterator_name& operator+=(ptrdiff_t n)                            \
 868           {                                                                 \
 869               m_cur = wxStringOperations::AddToIter(m_cur, n);              \
 870               return *this;                                                 \
 871           }                                                                 \
 872           iterator_name& operator-=(ptrdiff_t n)                            \
 873           {                                                                 \
 874               m_cur = wxStringOperations::AddToIter(m_cur, -n);             \
 875               return *this;                                                 \
 876           }                                                                 \
 877                                                                             \
 878           difference_type operator-(const iterator_name& i) const           \
 879             { return wxStringOperations::DiffIters(m_cur, i.m_cur); }       \
 880                                                                             \
 881           bool operator==(const iterator_name& i) const                     \
 882             { return m_cur == i.m_cur; }                                    \
 883           bool operator!=(const iterator_name& i) const                     \
 884             { return m_cur != i.m_cur; }                                    \
 885                                                                             \
 886           bool operator<(const iterator_name& i) const                      \
 887             { return m_cur < i.m_cur; }                                     \
 888           bool operator>(const iterator_name& i) const                      \
 889             { return m_cur > i.m_cur; }                                     \
 890           bool operator<=(const iterator_name& i) const                     \
 891             { return m_cur <= i.m_cur; }                                    \
 892           bool operator>=(const iterator_name& i) const                     \
 893             { return m_cur >= i.m_cur; }                                    \
 894                                                                             \
 895       private:                                                              \
 896           /* for internal wxString use only: */                             \
 897           underlying_iterator impl() const { return m_cur; }                \
 898                                                                             \
 899           friend class wxString;                                            \
 900           friend class wxCStrData;                                          \
 901                                                                             \
 902       private:                                                              \
 903           underlying_iterator m_cur
 904
 905   class WXDLLIMPEXP_FWD_BASE const_iterator;
 906
 907 #if wxUSE_UNICODE_UTF8
 908   // NB: In UTF-8 build, (non-const) iterator needs to keep reference
 909   //     to the underlying wxStringImpl, because UTF-8 is variable-length
 910   //     encoding and changing the value pointer to by an iterator (using
 911   //     its operator*) requires calling wxStringImpl::replace() if the old
 912   //     and new values differ in their encoding's length.
 913   //
 914   //     Furthermore, the replace() call may invalid all iterators for the
 915   //     string, so we have to keep track of outstanding iterators and update
 916   //     them if replace() happens.
 917   //
 918   //     This is implemented by maintaining linked list of iterators for every
 919   //     string and traversing it in wxUniCharRef::operator=(). Head of the
 920   //     list is stored in wxString. (FIXME-UTF8)
 921
 922   class WXDLLIMPEXP_BASE iterator
 923   {
 924       WX_STR_ITERATOR_IMPL(iterator, wxChar*, wxUniCharRef);
 925
 926   public:
 927       iterator() {}
 928       iterator(const iterator& i)
 929           : m_cur(i.m_cur), m_node(i.str(), &m_cur) {}
 930       iterator& operator=(const iterator& i)
 931       {
 932           if (&i != this)
 933           {
 934               m_cur = i.m_cur;
 935               m_node.set(i.str(), &m_cur);
 936           }
 937           return *this;
 938       }
 939
 940       reference operator*()
 941         { return wxUniCharRef::CreateForString(*str(), m_cur); }
 942
 943       iterator operator+(ptrdiff_t n) const
 944         { return iterator(str(), wxStringOperations::AddToIter(m_cur, n)); }
 945       iterator operator-(ptrdiff_t n) const
 946         { return iterator(str(), wxStringOperations::AddToIter(m_cur, -n)); }
 947
 948       // Normal iterators need to be comparable with the const_iterators so
 949       // declare the comparison operators and implement them below after the
 950       // full const_iterator declaration.
 951       bool operator==(const const_iterator& i) const;
 952       bool operator!=(const const_iterator& i) const;
 953       bool operator<(const const_iterator& i) const;
 954       bool operator>(const const_iterator& i) const;
 955       bool operator<=(const const_iterator& i) const;
 956       bool operator>=(const const_iterator& i) const;
 957
 958   private:
 959       iterator(wxString *wxstr, underlying_iterator ptr)
 960           : m_cur(ptr), m_node(wxstr, &m_cur) {}
 961
 962       wxString* str() const { return const_cast<wxString*>(m_node.m_str); }
 963
 964       wxStringIteratorNode m_node;
 965
 966       friend class const_iterator;
 967   };
 968
 969   class WXDLLIMPEXP_BASE const_iterator
 970   {
 971       // NB: reference_type is intentionally value, not reference, the character
 972       //     may be encoded differently in wxString data:
 973       WX_STR_ITERATOR_IMPL(const_iterator, const wxChar*, wxUniChar);
 974
 975   public:
 976       const_iterator() {}
 977       const_iterator(const const_iterator& i)
 978           : m_cur(i.m_cur), m_node(i.str(), &m_cur) {}
 979       const_iterator(const iterator& i)
 980           : m_cur(i.m_cur), m_node(i.str(), &m_cur) {}
 981
 982       const_iterator& operator=(const const_iterator& i)
 983       {
 984           if (&i != this)
 985           {
 986               m_cur = i.m_cur;
 987               m_node.set(i.str(), &m_cur);
 988           }
 989           return *this;
 990       }
 991       const_iterator& operator=(const iterator& i)
 992         { m_cur = i.m_cur; m_node.set(i.str(), &m_cur); return *this; }
 993
 994       reference operator*() const
 995         { return wxStringOperations::DecodeChar(m_cur); }
 996
 997       const_iterator operator+(ptrdiff_t n) const
 998         { return const_iterator(str(), wxStringOperations::AddToIter(m_cur, n)); }
 999       const_iterator operator-(ptrdiff_t n) const
1000         { return const_iterator(str(), wxStringOperations::AddToIter(m_cur, -n)); }
1001
1002       // Notice that comparison operators taking non-const iterator are not
1003       // needed here because of the implicit conversion from non-const iterator
1004       // to const ones ensure that the versions for const_iterator declared
1005       // inside WX_STR_ITERATOR_IMPL can be used.
1006
1007   private:
1008       // for internal wxString use only:
1009       const_iterator(const wxString *wxstr, underlying_iterator ptr)
1010           : m_cur(ptr), m_node(wxstr, &m_cur) {}
1011
1012       const wxString* str() const { return m_node.m_str; }
1013
1014       wxStringIteratorNode m_node;
1015   };
1016
1017   size_t IterToImplPos(wxString::iterator i) const
1018     { return wxStringImpl::const_iterator(i.impl()) - m_impl.begin(); }
1019
1020   iterator GetIterForNthChar(size_t n)
1021     { return iterator(this, m_impl.begin() + PosToImpl(n)); }
1022   const_iterator GetIterForNthChar(size_t n) const
1023     { return const_iterator(this, m_impl.begin() + PosToImpl(n)); }
1024 #else // !wxUSE_UNICODE_UTF8
1025
1026   class WXDLLIMPEXP_BASE iterator
1027   {
1028       WX_STR_ITERATOR_IMPL(iterator, wxChar*, wxUniCharRef);
1029
1030   public:
1031       iterator() {}
1032       iterator(const iterator& i) : m_cur(i.m_cur) {}
1033
1034       reference operator*()
1035         { return wxUniCharRef::CreateForString(m_cur); }
1036
1037       iterator operator+(ptrdiff_t n) const
1038         { return iterator(wxStringOperations::AddToIter(m_cur, n)); }
1039       iterator operator-(ptrdiff_t n) const
1040         { return iterator(wxStringOperations::AddToIter(m_cur, -n)); }
1041
1042       // As in UTF-8 case above, define comparison operators taking
1043       // const_iterator too.
1044       bool operator==(const const_iterator& i) const;
1045       bool operator!=(const const_iterator& i) const;
1046       bool operator<(const const_iterator& i) const;
1047       bool operator>(const const_iterator& i) const;
1048       bool operator<=(const const_iterator& i) const;
1049       bool operator>=(const const_iterator& i) const;
1050
1051   private:
1052       // for internal wxString use only:
1053       iterator(underlying_iterator ptr) : m_cur(ptr) {}
1054       iterator(wxString *WXUNUSED(str), underlying_iterator ptr) : m_cur(ptr) {}
1055
1056       friend class const_iterator;
1057   };
1058
1059   class WXDLLIMPEXP_BASE const_iterator
1060   {
1061       // NB: reference_type is intentionally value, not reference, the character
1062       //     may be encoded differently in wxString data:
1063       WX_STR_ITERATOR_IMPL(const_iterator, const wxChar*, wxUniChar);
1064
1065   public:
1066       const_iterator() {}
1067       const_iterator(const const_iterator& i) : m_cur(i.m_cur) {}
1068       const_iterator(const iterator& i) : m_cur(i.m_cur) {}
1069
1070       reference operator*() const
1071         { return wxStringOperations::DecodeChar(m_cur); }
1072
1073       const_iterator operator+(ptrdiff_t n) const
1074         { return const_iterator(wxStringOperations::AddToIter(m_cur, n)); }
1075       const_iterator operator-(ptrdiff_t n) const
1076         { return const_iterator(wxStringOperations::AddToIter(m_cur, -n)); }
1077
1078       // As in UTF-8 case above, we don't need comparison operators taking
1079       // iterator because we have an implicit conversion from iterator to
1080       // const_iterator so the operators declared by WX_STR_ITERATOR_IMPL will
1081       // be used.
1082
1083   private:
1084       // for internal wxString use only:
1085       const_iterator(underlying_iterator ptr) : m_cur(ptr) {}
1086       const_iterator(const wxString *WXUNUSED(str), underlying_iterator ptr)
1087           : m_cur(ptr) {}
1088   };
1089
1090   iterator GetIterForNthChar(size_t n) { return begin() + n; }
1091   const_iterator GetIterForNthChar(size_t n) const { return begin() + n; }
1092 #endif // wxUSE_UNICODE_UTF8/!wxUSE_UNICODE_UTF8
1093
1094   #undef WX_STR_ITERATOR_TAG
1095   #undef WX_STR_ITERATOR_IMPL
1096
1097   // This method is mostly used by wxWidgets itself and return the offset of
1098   // the given iterator in bytes relative to the start of the buffer
1099   // representing the current string contents in the current locale encoding.
1100   //
1101   // It is inefficient as it involves converting part of the string to this
1102   // encoding (and also unsafe as it simply returns 0 if the conversion fails)
1103   // and so should be avoided if possible, wx itself only uses it to implement
1104   // backwards-compatible API.
1105   ptrdiff_t IterOffsetInMBStr(const const_iterator& i) const
1106   {
1107       const wxString str(begin(), i);
1108
1109       // This is logically equivalent to strlen(str.mb_str()) but avoids
1110       // actually converting the string to multibyte and just computes the
1111       // length that it would have after conversion.
1112       size_t ofs = wxConvLibc.FromWChar(NULL, 0, str.wc_str(), str.length());
1113       return ofs == wxCONV_FAILED ? 0 : static_cast<ptrdiff_t>(ofs);
1114   }
1115
1116   friend class iterator;
1117   friend class const_iterator;
1118
1119   template <typename T>
1120   class reverse_iterator_impl
1121   {
1122   public:
1123       typedef T iterator_type;
1124
1125       WX_DEFINE_ITERATOR_CATEGORY(typename T::iterator_category)
1126       typedef typename T::value_type value_type;
1127       typedef typename T::difference_type difference_type;
1128       typedef typename T::reference reference;
1129       typedef typename T::pointer *pointer;
1130
1131       reverse_iterator_impl() {}
1132       reverse_iterator_impl(iterator_type i) : m_cur(i) {}
1133       reverse_iterator_impl(const reverse_iterator_impl& ri)
1134           : m_cur(ri.m_cur) {}
1135
1136       iterator_type base() const { return m_cur; }
1137
1138       reference operator*() const { return *(m_cur-1); }
1139       reference operator[](size_t n) const { return *(*this + n); }
1140
1141       reverse_iterator_impl& operator++()
1142         { --m_cur; return *this; }
1143       reverse_iterator_impl operator++(int)
1144         { reverse_iterator_impl tmp = *this; --m_cur; return tmp; }
1145       reverse_iterator_impl& operator--()
1146         { ++m_cur; return *this; }
1147       reverse_iterator_impl operator--(int)
1148         { reverse_iterator_impl tmp = *this; ++m_cur; return tmp; }
1149
1150       // NB: explicit <T> in the functions below is to keep BCC 5.5 happy
1151       reverse_iterator_impl operator+(ptrdiff_t n) const
1152         { return reverse_iterator_impl<T>(m_cur - n); }
1153       reverse_iterator_impl operator-(ptrdiff_t n) const
1154         { return reverse_iterator_impl<T>(m_cur + n); }
1155       reverse_iterator_impl operator+=(ptrdiff_t n)
1156         { m_cur -= n; return *this; }
1157       reverse_iterator_impl operator-=(ptrdiff_t n)
1158         { m_cur += n; return *this; }
1159
1160       unsigned operator-(const reverse_iterator_impl& i) const
1161         { return i.m_cur - m_cur; }
1162
1163       bool operator==(const reverse_iterator_impl& ri) const
1164         { return m_cur == ri.m_cur; }
1165       bool operator!=(const reverse_iterator_impl& ri) const
1166         { return !(*this == ri); }
1167
1168       bool operator<(const reverse_iterator_impl& i) const
1169         { return m_cur > i.m_cur; }
1170       bool operator>(const reverse_iterator_impl& i) const
1171         { return m_cur < i.m_cur; }
1172       bool operator<=(const reverse_iterator_impl& i) const
1173         { return m_cur >= i.m_cur; }
1174       bool operator>=(const reverse_iterator_impl& i) const
1175         { return m_cur <= i.m_cur; }
1176
1177   private:
1178       iterator_type m_cur;
1179   };
1180
1181   typedef reverse_iterator_impl<iterator> reverse_iterator;
1182   typedef reverse_iterator_impl<const_iterator> const_reverse_iterator;
1183
1184 private:
1185   // used to transform an expression built using c_str() (and hence of type
1186   // wxCStrData) to an iterator into the string
1187   static const_iterator CreateConstIterator(const wxCStrData& data)
1188   {
1189       return const_iterator(data.m_str,
1190                             (data.m_str->begin() + data.m_offset).impl());
1191   }
1192
1193   // in UTF-8 STL build, creation from std::string requires conversion under
1194   // non-UTF8 locales, so we can't have and use wxString(wxStringImpl) ctor;
1195   // instead we define dummy type that lets us have wxString ctor for creation
1196   // from wxStringImpl that couldn't be used by user code (in all other builds,
1197   // "standard" ctors can be used):
1198 #if wxUSE_UNICODE_UTF8 && wxUSE_STL_BASED_WXSTRING
1199   struct CtorFromStringImplTag {};
1200
1201   wxString(CtorFromStringImplTag* WXUNUSED(dummy), const wxStringImpl& src)
1202       : m_impl(src) {}
1203
1204   static wxString FromImpl(const wxStringImpl& src)
1205       { return wxString((CtorFromStringImplTag*)NULL, src); }
1206 #else
1207   #if !wxUSE_STL_BASED_WXSTRING
1208   wxString(const wxStringImpl& src) : m_impl(src) { }
1209   // else: already defined as wxString(wxStdString) below
1210   #endif
1211   static wxString FromImpl(const wxStringImpl& src) { return wxString(src); }
1212 #endif
1213
1214 public:
1215   // constructors and destructor
1216     // ctor for an empty string
1217   wxString() {}
1218
1219     // copy ctor
1220   wxString(const wxString& stringSrc) : m_impl(stringSrc.m_impl) { }
1221
1222     // string containing nRepeat copies of ch
1223   wxString(wxUniChar ch, size_t nRepeat = 1 )
1224     { assign(nRepeat, ch); }
1225   wxString(size_t nRepeat, wxUniChar ch)
1226     { assign(nRepeat, ch); }
1227   wxString(wxUniCharRef ch, size_t nRepeat = 1)
1228     { assign(nRepeat, ch); }
1229   wxString(size_t nRepeat, wxUniCharRef ch)
1230     { assign(nRepeat, ch); }
1231   wxString(char ch, size_t nRepeat = 1)
1232     { assign(nRepeat, ch); }
1233   wxString(size_t nRepeat, char ch)
1234     { assign(nRepeat, ch); }
1235   wxString(wchar_t ch, size_t nRepeat = 1)
1236     { assign(nRepeat, ch); }
1237   wxString(size_t nRepeat, wchar_t ch)
1238     { assign(nRepeat, ch); }
1239
1240     // ctors from char* strings:
1241   wxString(const char *psz)
1242     : m_impl(ImplStr(psz)) {}
1243   wxString(const char *psz, const wxMBConv& conv)
1244     : m_impl(ImplStr(psz, conv)) {}
1245   wxString(const char *psz, size_t nLength)
1246     { assign(psz, nLength); }
1247   wxString(const char *psz, const wxMBConv& conv, size_t nLength)
1248   {
1249     SubstrBufFromMB str(ImplStr(psz, nLength, conv));
1250     m_impl.assign(str.data, str.len);
1251   }
1252
1253     // and unsigned char*:
1254   wxString(const unsigned char *psz)
1255     : m_impl(ImplStr((const char*)psz)) {}
1256   wxString(const unsigned char *psz, const wxMBConv& conv)
1257     : m_impl(ImplStr((const char*)psz, conv)) {}
1258   wxString(const unsigned char *psz, size_t nLength)
1259     { assign((const char*)psz, nLength); }
1260   wxString(const unsigned char *psz, const wxMBConv& conv, size_t nLength)
1261   {
1262     SubstrBufFromMB str(ImplStr((const char*)psz, nLength, conv));
1263     m_impl.assign(str.data, str.len);
1264   }
1265
1266     // ctors from wchar_t* strings:
1267   wxString(const wchar_t *pwz)
1268     : m_impl(ImplStr(pwz)) {}
1269   wxString(const wchar_t *pwz, const wxMBConv& WXUNUSED(conv))
1270     : m_impl(ImplStr(pwz)) {}
1271   wxString(const wchar_t *pwz, size_t nLength)
1272     { assign(pwz, nLength); }
1273   wxString(const wchar_t *pwz, const wxMBConv& WXUNUSED(conv), size_t nLength)
1274     { assign(pwz, nLength); }
1275
1276   wxString(const wxScopedCharBuffer& buf)
1277     { assign(buf.data(), buf.length()); }
1278   wxString(const wxScopedWCharBuffer& buf)
1279     { assign(buf.data(), buf.length()); }
1280
1281     // NB: this version uses m_impl.c_str() to force making a copy of the
1282     //     string, so that "wxString(str.c_str())" idiom for passing strings
1283     //     between threads works
1284   wxString(const wxCStrData& cstr)
1285       : m_impl(cstr.AsString().m_impl.c_str()) { }
1286
1287     // as we provide both ctors with this signature for both char and unsigned
1288     // char string, we need to provide one for wxCStrData to resolve ambiguity
1289   wxString(const wxCStrData& cstr, size_t nLength)
1290       : m_impl(cstr.AsString().Mid(0, nLength).m_impl) {}
1291
1292     // and because wxString is convertible to wxCStrData and const wxChar *
1293     // we also need to provide this one
1294   wxString(const wxString& str, size_t nLength)
1295     { assign(str, nLength); }
1296
1297
1298 #if wxUSE_STRING_POS_CACHE
1299   ~wxString()
1300   {
1301       // we need to invalidate our cache entry as another string could be
1302       // recreated at the same address (unlikely, but still possible, with the
1303       // heap-allocated strings but perfectly common with stack-allocated ones)
1304       InvalidateCache();
1305   }
1306 #endif // wxUSE_STRING_POS_CACHE
1307
1308   // even if we're not built with wxUSE_STD_STRING_CONV_IN_WXSTRING == 1 it is
1309   // very convenient to allow implicit conversions from std::string to wxString
1310   // and vice verse as this allows to use the same strings in non-GUI and GUI
1311   // code, however we don't want to unconditionally add this ctor as it would
1312   // make wx lib dependent on libstdc++ on some Linux versions which is bad, so
1313   // instead we ask the client code to define this wxUSE_STD_STRING symbol if
1314   // they need it
1315 #if wxUSE_STD_STRING
1316   #if wxUSE_UNICODE_WCHAR
1317     wxString(const wxStdWideString& str) : m_impl(str) {}
1318   #else // UTF-8 or ANSI
1319     wxString(const wxStdWideString& str)
1320         { assign(str.c_str(), str.length()); }
1321   #endif
1322
1323   #if !wxUSE_UNICODE // ANSI build
1324     // FIXME-UTF8: do this in UTF8 build #if wxUSE_UTF8_LOCALE_ONLY, too
1325     wxString(const std::string& str) : m_impl(str) {}
1326   #else // Unicode
1327     wxString(const std::string& str)
1328         { assign(str.c_str(), str.length()); }
1329   #endif
1330 #endif // wxUSE_STD_STRING
1331
1332   // Also always provide explicit conversions to std::[w]string in any case,
1333   // see below for the implicit ones.
1334 #if wxUSE_STD_STRING
1335   // We can avoid a copy if we already use this string type internally,
1336   // otherwise we create a copy on the fly:
1337   #if wxUSE_UNICODE_WCHAR && wxUSE_STL_BASED_WXSTRING
1338     #define wxStringToStdWstringRetType const wxStdWideString&
1339     const wxStdWideString& ToStdWstring() const { return m_impl; }
1340   #else
1341     // wxStringImpl is either not std::string or needs conversion
1342     #define wxStringToStdWstringRetType wxStdWideString
1343     wxStdWideString ToStdWstring() const
1344     {
1345 #if wxUSE_UNICODE_WCHAR
1346         wxScopedWCharBuffer buf =
1347             wxScopedWCharBuffer::CreateNonOwned(m_impl.c_str(), m_impl.length());
1348 #else // !wxUSE_UNICODE_WCHAR
1349         wxScopedWCharBuffer buf(wc_str());
1350 #endif
1351
1352         return wxStdWideString(buf.data(), buf.length());
1353     }
1354   #endif
1355
1356   #if (!wxUSE_UNICODE || wxUSE_UTF8_LOCALE_ONLY) && wxUSE_STL_BASED_WXSTRING
1357     // wxStringImpl is std::string in the encoding we want
1358     #define wxStringToStdStringRetType const std::string&
1359     const std::string& ToStdString() const { return m_impl; }
1360   #else
1361     // wxStringImpl is either not std::string or needs conversion
1362     #define wxStringToStdStringRetType std::string
1363     std::string ToStdString() const
1364     {
1365         wxScopedCharBuffer buf(mb_str());
1366         return std::string(buf.data(), buf.length());
1367     }
1368   #endif
1369
1370 #if wxUSE_STD_STRING_CONV_IN_WXSTRING
1371     // Implicit conversions to std::[w]string are not provided by default as
1372     // they conflict with the implicit conversions to "const char/wchar_t *"
1373     // which we use for backwards compatibility but do provide them if
1374     // explicitly requested.
1375   operator wxStringToStdStringRetType() const { return ToStdString(); }
1376   operator wxStringToStdWstringRetType() const { return ToStdWstring(); }
1377 #endif // wxUSE_STD_STRING_CONV_IN_WXSTRING
1378
1379 #undef wxStringToStdStringRetType
1380 #undef wxStringToStdWstringRetType
1381
1382 #endif // wxUSE_STD_STRING
1383
1384   wxString Clone() const
1385   {
1386       // make a deep copy of the string, i.e. the returned string will have
1387       // ref count = 1 with refcounted implementation
1388       return wxString::FromImpl(wxStringImpl(m_impl.c_str(), m_impl.length()));
1389   }
1390
1391   // first valid index position
1392   const_iterator begin() const { return const_iterator(this, m_impl.begin()); }
1393   iterator begin() { return iterator(this, m_impl.begin()); }
1394   // position one after the last valid one
1395   const_iterator end() const { return const_iterator(this, m_impl.end()); }
1396   iterator end() { return iterator(this, m_impl.end()); }
1397
1398   // first element of the reversed string
1399   const_reverse_iterator rbegin() const
1400     { return const_reverse_iterator(end()); }
1401   reverse_iterator rbegin()
1402     { return reverse_iterator(end()); }
1403   // one beyond the end of the reversed string
1404   const_reverse_iterator rend() const
1405     { return const_reverse_iterator(begin()); }
1406   reverse_iterator rend()
1407     { return reverse_iterator(begin()); }
1408
1409   // std::string methods:
1410 #if wxUSE_UNICODE_UTF8
1411   size_t length() const
1412   {
1413 #if wxUSE_STRING_POS_CACHE
1414       wxCACHE_PROFILE_FIELD_INC(lentot);
1415
1416       Cache::Element * const cache = GetCacheElement();
1417
1418       if ( cache->len == npos )
1419       {
1420           // it's probably not worth trying to be clever and using cache->pos
1421           // here as it's probably 0 anyhow -- you usually call length() before
1422           // starting to index the string
1423           cache->len = end() - begin();
1424       }
1425       else
1426       {
1427           wxCACHE_PROFILE_FIELD_INC(lenhits);
1428
1429           wxSTRING_CACHE_ASSERT( (int)cache->len == end() - begin() );
1430       }
1431
1432       return cache->len;
1433 #else // !wxUSE_STRING_POS_CACHE
1434       return end() - begin();
1435 #endif // wxUSE_STRING_POS_CACHE/!wxUSE_STRING_POS_CACHE
1436   }
1437 #else
1438   size_t length() const { return m_impl.length(); }
1439 #endif
1440
1441   size_type size() const { return length(); }
1442   size_type max_size() const { return npos; }
1443
1444   bool empty() const { return m_impl.empty(); }
1445
1446   // NB: these methods don't have a well-defined meaning in UTF-8 case
1447   size_type capacity() const { return m_impl.capacity(); }
1448   void reserve(size_t sz) { m_impl.reserve(sz); }
1449
1450   void resize(size_t nSize, wxUniChar ch = wxT('\0'))
1451   {
1452     const size_t len = length();
1453     if ( nSize == len)
1454         return;
1455
1456 #if wxUSE_UNICODE_UTF8
1457     if ( nSize < len )
1458     {
1459         wxSTRING_INVALIDATE_CACHE();
1460
1461         // we can't use wxStringImpl::resize() for truncating the string as it
1462         // counts in bytes, not characters
1463         erase(nSize);
1464         return;
1465     }
1466
1467     // we also can't use (presumably more efficient) resize() if we have to
1468     // append characters taking more than one byte
1469     if ( !ch.IsAscii() )
1470     {
1471         append(nSize - len, ch);
1472     }
1473     else // can use (presumably faster) resize() version
1474 #endif // wxUSE_UNICODE_UTF8
1475     {
1476         wxSTRING_INVALIDATE_CACHED_LENGTH();
1477
1478         m_impl.resize(nSize, (wxStringCharType)ch);
1479     }
1480   }
1481
1482   wxString substr(size_t nStart = 0, size_t nLen = npos) const
1483   {
1484     size_t pos, len;
1485     PosLenToImpl(nStart, nLen, &pos, &len);
1486     return FromImpl(m_impl.substr(pos, len));
1487   }
1488
1489   // generic attributes & operations
1490     // as standard strlen()
1491   size_t Len() const { return length(); }
1492     // string contains any characters?
1493   bool IsEmpty() const { return empty(); }
1494     // empty string is "false", so !str will return true
1495   bool operator!() const { return empty(); }
1496     // truncate the string to given length
1497   wxString& Truncate(size_t uiLen);
1498     // empty string contents
1499   void Empty() { clear(); }
1500     // empty the string and free memory
1501   void Clear() { clear(); }
1502
1503   // contents test
1504     // Is an ascii value
1505   bool IsAscii() const;
1506     // Is a number
1507   bool IsNumber() const;
1508     // Is a word
1509   bool IsWord() const;
1510
1511   // data access (all indexes are 0 based)
1512     // read access
1513     wxUniChar at(size_t n) const
1514       { return wxStringOperations::DecodeChar(m_impl.begin() + PosToImpl(n)); }
1515     wxUniChar GetChar(size_t n) const
1516       { return at(n); }
1517     // read/write access
1518     wxUniCharRef at(size_t n)
1519       { return *GetIterForNthChar(n); }
1520     wxUniCharRef GetWritableChar(size_t n)
1521       { return at(n); }
1522     // write access
1523     void SetChar(size_t n, wxUniChar ch)
1524       { at(n) = ch; }
1525
1526     // get last character
1527     wxUniChar Last() const
1528     {
1529       wxASSERT_MSG( !empty(), wxT("wxString: index out of bounds") );
1530       return *rbegin();
1531     }
1532
1533     // get writable last character
1534     wxUniCharRef Last()
1535     {
1536       wxASSERT_MSG( !empty(), wxT("wxString: index out of bounds") );
1537       return *rbegin();
1538     }
1539
1540     /*
1541        Note that we we must define all of the overloads below to avoid
1542        ambiguity when using str[0].
1543      */
1544     wxUniChar operator[](int n) const
1545       { return at(n); }
1546     wxUniChar operator[](long n) const
1547       { return at(n); }
1548     wxUniChar operator[](size_t n) const
1549       { return at(n); }
1550 #ifndef wxSIZE_T_IS_UINT
1551     wxUniChar operator[](unsigned int n) const
1552       { return at(n); }
1553 #endif // size_t != unsigned int
1554
1555     // operator versions of GetWriteableChar()
1556     wxUniCharRef operator[](int n)
1557       { return at(n); }
1558     wxUniCharRef operator[](long n)
1559       { return at(n); }
1560     wxUniCharRef operator[](size_t n)
1561       { return at(n); }
1562 #ifndef wxSIZE_T_IS_UINT
1563     wxUniCharRef operator[](unsigned int n)
1564       { return at(n); }
1565 #endif // size_t != unsigned int
1566
1567
1568     /*
1569         Overview of wxString conversions, implicit and explicit:
1570
1571         - wxString has a std::[w]string-like c_str() method, however it does
1572           not return a C-style string directly but instead returns wxCStrData
1573           helper object which is convertible to either "char *" narrow string
1574           or "wchar_t *" wide string. Usually the correct conversion will be
1575           applied by the compiler automatically but if this doesn't happen you
1576           need to explicitly choose one using wxCStrData::AsChar() or AsWChar()
1577           methods or another wxString conversion function.
1578
1579         - One of the places where the conversion does *NOT* happen correctly is
1580           when c_str() is passed to a vararg function such as printf() so you
1581           must *NOT* use c_str() with them. Either use wxPrintf() (all wx
1582           functions do handle c_str() correctly, even if they appear to be
1583           vararg (but they're not, really)) or add an explicit AsChar() or, if
1584           compatibility with previous wxWidgets versions is important, add a
1585           cast to "const char *".
1586
1587         - In non-STL mode only, wxString is also implicitly convertible to
1588           wxCStrData. The same warning as above applies.
1589
1590         - c_str() is polymorphic as it can be converted to either narrow or
1591           wide string. If you explicitly need one or the other, choose to use
1592           mb_str() (for narrow) or wc_str() (for wide) instead. Notice that
1593           these functions can return either the pointer to string directly (if
1594           this is what the string uses internally) or a temporary buffer
1595           containing the string and convertible to it. Again, conversion will
1596           usually be done automatically by the compiler but beware of the
1597           vararg functions: you need an explicit cast when using them.
1598
1599         - There are also non-const versions of mb_str() and wc_str() called
1600           char_str() and wchar_str(). They are only meant to be used with
1601           non-const-correct functions and they always return buffers.
1602
1603         - Finally wx_str() returns whatever string representation is used by
1604           wxString internally. It may be either a narrow or wide string
1605           depending on wxWidgets build mode but it will always be a raw pointer
1606           (and not a buffer).
1607      */
1608
1609     // explicit conversion to wxCStrData
1610     wxCStrData c_str() const { return wxCStrData(this); }
1611     wxCStrData data() const { return c_str(); }
1612
1613     // implicit conversion to wxCStrData
1614     operator wxCStrData() const { return c_str(); }
1615
1616     // the first two operators conflict with operators for conversion to
1617     // std::string and they must be disabled if those conversions are enabled;
1618     // the next one only makes sense if conversions to char* are also defined
1619     // and not defining it in STL build also helps us to get more clear error
1620     // messages for the code which relies on implicit conversion to char* in
1621     // STL build
1622 #if !wxUSE_STD_STRING_CONV_IN_WXSTRING
1623     operator const char*() const { return c_str(); }
1624     operator const wchar_t*() const { return c_str(); }
1625
1626     // implicit conversion to untyped pointer for compatibility with previous
1627     // wxWidgets versions: this is the same as conversion to const char * so it
1628     // may fail!
1629     operator const void*() const { return c_str(); }
1630 #endif // !wxUSE_STD_STRING_CONV_IN_WXSTRING
1631
1632     // identical to c_str(), for MFC compatibility
1633     const wxCStrData GetData() const { return c_str(); }
1634
1635     // explicit conversion to C string in internal representation (char*,
1636     // wchar_t*, UTF-8-encoded char*, depending on the build):
1637     const wxStringCharType *wx_str() const { return m_impl.c_str(); }
1638
1639     // conversion to *non-const* multibyte or widestring buffer; modifying
1640     // returned buffer won't affect the string, these methods are only useful
1641     // for passing values to const-incorrect functions
1642     wxWritableCharBuffer char_str(const wxMBConv& conv = wxConvLibc) const
1643         { return mb_str(conv); }
1644     wxWritableWCharBuffer wchar_str() const { return wc_str(); }
1645
1646     // conversion to the buffer of the given type T (= char or wchar_t) and
1647     // also optionally return the buffer length
1648     //
1649     // this is mostly/only useful for the template functions
1650     //
1651     // FIXME-VC6: the second argument only exists for VC6 which doesn't support
1652     //            explicit template function selection, do not use it unless
1653     //            you must support VC6!
1654     template <typename T>
1655     wxCharTypeBuffer<T> tchar_str(size_t *len = NULL,
1656                                   T * WXUNUSED(dummy) = NULL) const
1657     {
1658 #if wxUSE_UNICODE
1659         // we need a helper dispatcher depending on type
1660         return wxPrivate::wxStringAsBufHelper<T>::Get(*this, len);
1661 #else // ANSI
1662         // T can only be char in ANSI build
1663         if ( len )
1664             *len = length();
1665
1666         return wxCharTypeBuffer<T>::CreateNonOwned(wx_str(), length());
1667 #endif // Unicode build kind
1668     }
1669
1670     // conversion to/from plain (i.e. 7 bit) ASCII: this is useful for
1671     // converting numbers or strings which are certain not to contain special
1672     // chars (typically system functions, X atoms, environment variables etc.)
1673     //
1674     // the behaviour of these functions with the strings containing anything
1675     // else than 7 bit ASCII characters is undefined, use at your own risk.
1676 #if wxUSE_UNICODE
1677     static wxString FromAscii(const char *ascii, size_t len);
1678     static wxString FromAscii(const char *ascii);
1679     static wxString FromAscii(char ascii);
1680     const wxScopedCharBuffer ToAscii() const;
1681 #else // ANSI
1682     static wxString FromAscii(const char *ascii) { return wxString( ascii ); }
1683     static wxString FromAscii(const char *ascii, size_t len)
1684         { return wxString( ascii, len ); }
1685     static wxString FromAscii(char ascii) { return wxString( ascii ); }
1686     const char *ToAscii() const { return c_str(); }
1687 #endif // Unicode/!Unicode
1688
1689     // also provide unsigned char overloads as signed/unsigned doesn't matter
1690     // for 7 bit ASCII characters
1691     static wxString FromAscii(const unsigned char *ascii)
1692         { return FromAscii((const char *)ascii); }
1693     static wxString FromAscii(const unsigned char *ascii, size_t len)
1694         { return FromAscii((const char *)ascii, len); }
1695
1696     // conversion to/from UTF-8:
1697 #if wxUSE_UNICODE_UTF8
1698     static wxString FromUTF8Unchecked(const char *utf8)
1699     {
1700       if ( !utf8 )
1701           return wxEmptyString;
1702
1703       wxASSERT( wxStringOperations::IsValidUtf8String(utf8) );
1704       return FromImpl(wxStringImpl(utf8));
1705     }
1706     static wxString FromUTF8Unchecked(const char *utf8, size_t len)
1707     {
1708       if ( !utf8 )
1709           return wxEmptyString;
1710       if ( len == npos )
1711           return FromUTF8Unchecked(utf8);
1712
1713       wxASSERT( wxStringOperations::IsValidUtf8String(utf8, len) );
1714       return FromImpl(wxStringImpl(utf8, len));
1715     }
1716
1717     static wxString FromUTF8(const char *utf8)
1718     {
1719         if ( !utf8 || !wxStringOperations::IsValidUtf8String(utf8) )
1720             return "";
1721
1722         return FromImpl(wxStringImpl(utf8));
1723     }
1724     static wxString FromUTF8(const char *utf8, size_t len)
1725     {
1726         if ( len == npos )
1727             return FromUTF8(utf8);
1728
1729         if ( !utf8 || !wxStringOperations::IsValidUtf8String(utf8, len) )
1730             return "";
1731
1732         return FromImpl(wxStringImpl(utf8, len));
1733     }
1734
1735     const wxScopedCharBuffer utf8_str() const
1736         { return wxCharBuffer::CreateNonOwned(m_impl.c_str(), m_impl.length()); }
1737
1738     // this function exists in UTF-8 build only and returns the length of the
1739     // internal UTF-8 representation
1740     size_t utf8_length() const { return m_impl.length(); }
1741 #elif wxUSE_UNICODE_WCHAR
1742     static wxString FromUTF8(const char *utf8, size_t len = npos)
1743       { return wxString(utf8, wxMBConvUTF8(), len); }
1744     static wxString FromUTF8Unchecked(const char *utf8, size_t len = npos)
1745     {
1746         const wxString s(utf8, wxMBConvUTF8(), len);
1747         wxASSERT_MSG( !utf8 || !*utf8 || !s.empty(),
1748                       "string must be valid UTF-8" );
1749         return s;
1750     }
1751     const wxScopedCharBuffer utf8_str() const { return mb_str(wxMBConvUTF8()); }
1752 #else // ANSI
1753     static wxString FromUTF8(const char *utf8)
1754       { return wxString(wxMBConvUTF8().cMB2WC(utf8)); }
1755     static wxString FromUTF8(const char *utf8, size_t len)
1756     {
1757         size_t wlen;
1758         wxScopedWCharBuffer buf(wxMBConvUTF8().cMB2WC(utf8, len == npos ? wxNO_LEN : len, &wlen));
1759         return wxString(buf.data(), wlen);
1760     }
1761     static wxString FromUTF8Unchecked(const char *utf8, size_t len = npos)
1762     {
1763         size_t wlen;
1764         wxScopedWCharBuffer buf
1765                             (
1766                               wxMBConvUTF8().cMB2WC
1767                                              (
1768                                                utf8,
1769                                                len == npos ? wxNO_LEN : len,
1770                                                &wlen
1771                                              )
1772                             );
1773         wxASSERT_MSG( !utf8 || !*utf8 || wlen,
1774                       "string must be valid UTF-8" );
1775
1776         return wxString(buf.data(), wlen);
1777     }
1778     const wxScopedCharBuffer utf8_str() const
1779       { return wxMBConvUTF8().cWC2MB(wc_str()); }
1780 #endif
1781
1782     const wxScopedCharBuffer ToUTF8() const { return utf8_str(); }
1783
1784     // functions for storing binary data in wxString:
1785 #if wxUSE_UNICODE
1786     static wxString From8BitData(const char *data, size_t len)
1787       { return wxString(data, wxConvISO8859_1, len); }
1788     // version for NUL-terminated data:
1789     static wxString From8BitData(const char *data)
1790       { return wxString(data, wxConvISO8859_1); }
1791     const wxScopedCharBuffer To8BitData() const
1792         { return mb_str(wxConvISO8859_1); }
1793 #else // ANSI
1794     static wxString From8BitData(const char *data, size_t len)
1795       { return wxString(data, len); }
1796     // version for NUL-terminated data:
1797     static wxString From8BitData(const char *data)
1798       { return wxString(data); }
1799     const wxScopedCharBuffer To8BitData() const
1800         { return wxScopedCharBuffer::CreateNonOwned(wx_str(), length()); }
1801 #endif // Unicode/ANSI
1802
1803     // conversions with (possible) format conversions: have to return a
1804     // buffer with temporary data
1805     //
1806     // the functions defined (in either Unicode or ANSI) mode are mb_str() to
1807     // return an ANSI (multibyte) string, wc_str() to return a wide string and
1808     // fn_str() to return a string which should be used with the OS APIs
1809     // accepting the file names. The return value is always the same, but the
1810     // type differs because a function may either return pointer to the buffer
1811     // directly or have to use intermediate buffer for translation.
1812
1813 #if wxUSE_UNICODE
1814
1815     // this is an optimization: even though using mb_str(wxConvLibc) does the
1816     // same thing (i.e. returns pointer to internal representation as locale is
1817     // always an UTF-8 one) in wxUSE_UTF8_LOCALE_ONLY case, we can avoid the
1818     // extra checks and the temporary buffer construction by providing a
1819     // separate mb_str() overload
1820 #if wxUSE_UTF8_LOCALE_ONLY
1821     const char* mb_str() const { return wx_str(); }
1822     const wxScopedCharBuffer mb_str(const wxMBConv& conv) const
1823     {
1824         return AsCharBuf(conv);
1825     }
1826 #else // !wxUSE_UTF8_LOCALE_ONLY
1827     const wxScopedCharBuffer mb_str(const wxMBConv& conv = wxConvLibc) const
1828     {
1829         return AsCharBuf(conv);
1830     }
1831 #endif // wxUSE_UTF8_LOCALE_ONLY/!wxUSE_UTF8_LOCALE_ONLY
1832
1833     const wxWX2MBbuf mbc_str() const { return mb_str(*wxConvCurrent); }
1834
1835 #if wxUSE_UNICODE_WCHAR
1836     const wchar_t* wc_str() const { return wx_str(); }
1837 #elif wxUSE_UNICODE_UTF8
1838     const wxScopedWCharBuffer wc_str() const
1839         { return AsWCharBuf(wxMBConvStrictUTF8()); }
1840 #endif
1841     // for compatibility with !wxUSE_UNICODE version
1842     const wxWX2WCbuf wc_str(const wxMBConv& WXUNUSED(conv)) const
1843       { return wc_str(); }
1844
1845 #if wxMBFILES
1846     const wxScopedCharBuffer fn_str() const { return mb_str(wxConvFile); }
1847 #else // !wxMBFILES
1848     const wxWX2WCbuf fn_str() const { return wc_str(); }
1849 #endif // wxMBFILES/!wxMBFILES
1850
1851 #else // ANSI
1852     const char* mb_str() const { return wx_str(); }
1853
1854     // for compatibility with wxUSE_UNICODE version
1855     const char* mb_str(const wxMBConv& WXUNUSED(conv)) const { return wx_str(); }
1856
1857     const wxWX2MBbuf mbc_str() const { return mb_str(); }
1858
1859     const wxScopedWCharBuffer wc_str(const wxMBConv& conv = wxConvLibc) const
1860         { return AsWCharBuf(conv); }
1861
1862     const wxScopedCharBuffer fn_str() const
1863         { return wxConvFile.cWC2WX( wc_str( wxConvLibc ) ); }
1864 #endif // Unicode/ANSI
1865
1866 #if wxUSE_UNICODE_UTF8
1867     const wxScopedWCharBuffer t_str() const { return wc_str(); }
1868 #elif wxUSE_UNICODE_WCHAR
1869     const wchar_t* t_str() const { return wx_str(); }
1870 #else
1871     const char* t_str() const { return wx_str(); }
1872 #endif
1873
1874
1875   // overloaded assignment
1876     // from another wxString
1877   wxString& operator=(const wxString& stringSrc)
1878   {
1879     if ( this != &stringSrc )
1880     {
1881         wxSTRING_INVALIDATE_CACHE();
1882
1883         m_impl = stringSrc.m_impl;
1884     }
1885
1886     return *this;
1887   }
1888
1889   wxString& operator=(const wxCStrData& cstr)
1890     { return *this = cstr.AsString(); }
1891     // from a character
1892   wxString& operator=(wxUniChar ch)
1893   {
1894     wxSTRING_INVALIDATE_CACHE();
1895
1896 #if wxUSE_UNICODE_UTF8
1897     if ( !ch.IsAscii() )
1898         m_impl = wxStringOperations::EncodeChar(ch);
1899     else
1900 #endif // wxUSE_UNICODE_UTF8
1901         m_impl = (wxStringCharType)ch;
1902     return *this;
1903   }
1904
1905   wxString& operator=(wxUniCharRef ch)
1906     { return operator=((wxUniChar)ch); }
1907   wxString& operator=(char ch)
1908     { return operator=(wxUniChar(ch)); }
1909   wxString& operator=(unsigned char ch)
1910     { return operator=(wxUniChar(ch)); }
1911   wxString& operator=(wchar_t ch)
1912     { return operator=(wxUniChar(ch)); }
1913     // from a C string - STL probably will crash on NULL,
1914     // so we need to compensate in that case
1915 #if wxUSE_STL_BASED_WXSTRING
1916   wxString& operator=(const char *psz)
1917   {
1918       wxSTRING_INVALIDATE_CACHE();
1919
1920       if ( psz )
1921           m_impl = ImplStr(psz);
1922       else
1923           clear();
1924
1925       return *this;
1926   }
1927
1928   wxString& operator=(const wchar_t *pwz)
1929   {
1930       wxSTRING_INVALIDATE_CACHE();
1931
1932       if ( pwz )
1933           m_impl = ImplStr(pwz);
1934       else
1935           clear();
1936
1937       return *this;
1938   }
1939 #else // !wxUSE_STL_BASED_WXSTRING
1940   wxString& operator=(const char *psz)
1941   {
1942       wxSTRING_INVALIDATE_CACHE();
1943
1944       m_impl = ImplStr(psz);
1945
1946       return *this;
1947   }
1948
1949   wxString& operator=(const wchar_t *pwz)
1950   {
1951       wxSTRING_INVALIDATE_CACHE();
1952
1953       m_impl = ImplStr(pwz);
1954
1955       return *this;
1956   }
1957 #endif // wxUSE_STL_BASED_WXSTRING/!wxUSE_STL_BASED_WXSTRING
1958
1959   wxString& operator=(const unsigned char *psz)
1960     { return operator=((const char*)psz); }
1961
1962     // from wxScopedWCharBuffer
1963   wxString& operator=(const wxScopedWCharBuffer& s)
1964     { return assign(s); }
1965     // from wxScopedCharBuffer
1966   wxString& operator=(const wxScopedCharBuffer& s)
1967     { return assign(s); }
1968
1969   // string concatenation
1970     // in place concatenation
1971     /*
1972         Concatenate and return the result. Note that the left to right
1973         associativity of << allows to write things like "str << str1 << str2
1974         << ..." (unlike with +=)
1975      */
1976       // string += string
1977   wxString& operator<<(const wxString& s)
1978   {
1979 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
1980     wxASSERT_MSG( s.IsValid(),
1981                   wxT("did you forget to call UngetWriteBuf()?") );
1982 #endif
1983
1984     append(s);
1985     return *this;
1986   }
1987       // string += C string
1988   wxString& operator<<(const char *psz)
1989     { append(psz); return *this; }
1990   wxString& operator<<(const wchar_t *pwz)
1991     { append(pwz); return *this; }
1992   wxString& operator<<(const wxCStrData& psz)
1993     { append(psz.AsString()); return *this; }
1994       // string += char
1995   wxString& operator<<(wxUniChar ch) { append(1, ch); return *this; }
1996   wxString& operator<<(wxUniCharRef ch) { append(1, ch); return *this; }
1997   wxString& operator<<(char ch) { append(1, ch); return *this; }
1998   wxString& operator<<(unsigned char ch) { append(1, ch); return *this; }
1999   wxString& operator<<(wchar_t ch) { append(1, ch); return *this; }
2000
2001       // string += buffer (i.e. from wxGetString)
2002   wxString& operator<<(const wxScopedWCharBuffer& s)
2003     { return append(s); }
2004   wxString& operator<<(const wxScopedCharBuffer& s)
2005     { return append(s); }
2006
2007     // string += C string
2008   wxString& Append(const wxString& s)
2009     {
2010         // test for empty() to share the string if possible
2011         if ( empty() )
2012             *this = s;
2013         else
2014             append(s);
2015         return *this;
2016     }
2017   wxString& Append(const char* psz)
2018     { append(psz); return *this; }
2019   wxString& Append(const wchar_t* pwz)
2020     { append(pwz); return *this; }
2021   wxString& Append(const wxCStrData& psz)
2022     { append(psz); return *this; }
2023   wxString& Append(const wxScopedCharBuffer& psz)
2024     { append(psz); return *this; }
2025   wxString& Append(const wxScopedWCharBuffer& psz)
2026     { append(psz); return *this; }
2027   wxString& Append(const char* psz, size_t nLen)
2028     { append(psz, nLen); return *this; }
2029   wxString& Append(const wchar_t* pwz, size_t nLen)
2030     { append(pwz, nLen); return *this; }
2031   wxString& Append(const wxCStrData& psz, size_t nLen)
2032     { append(psz, nLen); return *this; }
2033   wxString& Append(const wxScopedCharBuffer& psz, size_t nLen)
2034     { append(psz, nLen); return *this; }
2035   wxString& Append(const wxScopedWCharBuffer& psz, size_t nLen)
2036     { append(psz, nLen); return *this; }
2037     // append count copies of given character
2038   wxString& Append(wxUniChar ch, size_t count = 1u)
2039     { append(count, ch); return *this; }
2040   wxString& Append(wxUniCharRef ch, size_t count = 1u)
2041     { append(count, ch); return *this; }
2042   wxString& Append(char ch, size_t count = 1u)
2043     { append(count, ch); return *this; }
2044   wxString& Append(unsigned char ch, size_t count = 1u)
2045     { append(count, ch); return *this; }
2046   wxString& Append(wchar_t ch, size_t count = 1u)
2047     { append(count, ch); return *this; }
2048
2049     // prepend a string, return the string itself
2050   wxString& Prepend(const wxString& str)
2051     { *this = str + *this; return *this; }
2052
2053     // non-destructive concatenation
2054       // two strings
2055   friend wxString WXDLLIMPEXP_BASE operator+(const wxString& string1,
2056                                              const wxString& string2);
2057       // string with a single char
2058   friend wxString WXDLLIMPEXP_BASE operator+(const wxString& string, wxUniChar ch);
2059       // char with a string
2060   friend wxString WXDLLIMPEXP_BASE operator+(wxUniChar ch, const wxString& string);
2061       // string with C string
2062   friend wxString WXDLLIMPEXP_BASE operator+(const wxString& string,
2063                                              const char *psz);
2064   friend wxString WXDLLIMPEXP_BASE operator+(const wxString& string,
2065                                              const wchar_t *pwz);
2066       // C string with string
2067   friend wxString WXDLLIMPEXP_BASE operator+(const char *psz,
2068                                              const wxString& string);
2069   friend wxString WXDLLIMPEXP_BASE operator+(const wchar_t *pwz,
2070                                              const wxString& string);
2071
2072   // stream-like functions
2073       // insert an int into string
2074   wxString& operator<<(int i)
2075     { return (*this) << Format(wxT("%d"), i); }
2076       // insert an unsigned int into string
2077   wxString& operator<<(unsigned int ui)
2078     { return (*this) << Format(wxT("%u"), ui); }
2079       // insert a long into string
2080   wxString& operator<<(long l)
2081     { return (*this) << Format(wxT("%ld"), l); }
2082       // insert an unsigned long into string
2083   wxString& operator<<(unsigned long ul)
2084     { return (*this) << Format(wxT("%lu"), ul); }
2085 #ifdef wxHAS_LONG_LONG_T_DIFFERENT_FROM_LONG
2086       // insert a long long if they exist and aren't longs
2087   wxString& operator<<(wxLongLong_t ll)
2088     {
2089       return (*this) << Format("%" wxLongLongFmtSpec "d", ll);
2090     }
2091       // insert an unsigned long long
2092   wxString& operator<<(wxULongLong_t ull)
2093     {
2094       return (*this) << Format("%" wxLongLongFmtSpec "u" , ull);
2095     }
2096 #endif // wxHAS_LONG_LONG_T_DIFFERENT_FROM_LONG
2097       // insert a float into string
2098   wxString& operator<<(float f)
2099     { return (*this) << Format(wxT("%f"), f); }
2100       // insert a double into string
2101   wxString& operator<<(double d)
2102     { return (*this) << Format(wxT("%g"), d); }
2103
2104   // string comparison
2105     // case-sensitive comparison (returns a value < 0, = 0 or > 0)
2106   int Cmp(const char *psz) const
2107     { return compare(psz); }
2108   int Cmp(const wchar_t *pwz) const
2109     { return compare(pwz); }
2110   int Cmp(const wxString& s) const
2111     { return compare(s); }
2112   int Cmp(const wxCStrData& s) const
2113     { return compare(s); }
2114   int Cmp(const wxScopedCharBuffer& s) const
2115     { return compare(s); }
2116   int Cmp(const wxScopedWCharBuffer& s) const
2117     { return compare(s); }
2118     // same as Cmp() but not case-sensitive
2119   int CmpNoCase(const wxString& s) const;
2120
2121     // test for the string equality, either considering case or not
2122     // (if compareWithCase then the case matters)
2123   bool IsSameAs(const wxString& str, bool compareWithCase = true) const
2124   {
2125 #if !wxUSE_UNICODE_UTF8
2126       // in UTF-8 build, length() is O(n) and doing this would be _slower_
2127       if ( length() != str.length() )
2128           return false;
2129 #endif
2130       return (compareWithCase ? Cmp(str) : CmpNoCase(str)) == 0;
2131   }
2132   bool IsSameAs(const char *str, bool compareWithCase = true) const
2133     { return (compareWithCase ? Cmp(str) : CmpNoCase(str)) == 0; }
2134   bool IsSameAs(const wchar_t *str, bool compareWithCase = true) const
2135     { return (compareWithCase ? Cmp(str) : CmpNoCase(str)) == 0; }
2136
2137   bool IsSameAs(const wxCStrData& str, bool compareWithCase = true) const
2138     { return IsSameAs(str.AsString(), compareWithCase); }
2139   bool IsSameAs(const wxScopedCharBuffer& str, bool compareWithCase = true) const
2140     { return IsSameAs(str.data(), compareWithCase); }
2141   bool IsSameAs(const wxScopedWCharBuffer& str, bool compareWithCase = true) const
2142     { return IsSameAs(str.data(), compareWithCase); }
2143     // comparison with a single character: returns true if equal
2144   bool IsSameAs(wxUniChar c, bool compareWithCase = true) const;
2145   // FIXME-UTF8: remove these overloads
2146   bool IsSameAs(wxUniCharRef c, bool compareWithCase = true) const
2147     { return IsSameAs(wxUniChar(c), compareWithCase); }
2148   bool IsSameAs(char c, bool compareWithCase = true) const
2149     { return IsSameAs(wxUniChar(c), compareWithCase); }
2150   bool IsSameAs(unsigned char c, bool compareWithCase = true) const
2151     { return IsSameAs(wxUniChar(c), compareWithCase); }
2152   bool IsSameAs(wchar_t c, bool compareWithCase = true) const
2153     { return IsSameAs(wxUniChar(c), compareWithCase); }
2154   bool IsSameAs(int c, bool compareWithCase = true) const
2155     { return IsSameAs(wxUniChar(c), compareWithCase); }
2156
2157   // simple sub-string extraction
2158       // return substring starting at nFirst of length nCount (or till the end
2159       // if nCount = default value)
2160   wxString Mid(size_t nFirst, size_t nCount = npos) const;
2161
2162       // operator version of Mid()
2163   wxString  operator()(size_t start, size_t len) const
2164     { return Mid(start, len); }
2165
2166       // check if the string starts with the given prefix and return the rest
2167       // of the string in the provided pointer if it is not NULL; otherwise
2168       // return false
2169   bool StartsWith(const wxString& prefix, wxString *rest = NULL) const;
2170       // check if the string ends with the given suffix and return the
2171       // beginning of the string before the suffix in the provided pointer if
2172       // it is not NULL; otherwise return false
2173   bool EndsWith(const wxString& suffix, wxString *rest = NULL) const;
2174
2175       // get first nCount characters
2176   wxString Left(size_t nCount) const;
2177       // get last nCount characters
2178   wxString Right(size_t nCount) const;
2179       // get all characters before the first occurrence of ch
2180       // (returns the whole string if ch not found) and also put everything
2181       // following the first occurrence of ch into rest if it's non-NULL
2182   wxString BeforeFirst(wxUniChar ch, wxString *rest = NULL) const;
2183       // get all characters before the last occurrence of ch
2184       // (returns empty string if ch not found) and also put everything
2185       // following the last occurrence of ch into rest if it's non-NULL
2186   wxString BeforeLast(wxUniChar ch, wxString *rest = NULL) const;
2187       // get all characters after the first occurrence of ch
2188       // (returns empty string if ch not found)
2189   wxString AfterFirst(wxUniChar ch) const;
2190       // get all characters after the last occurrence of ch
2191       // (returns the whole string if ch not found)
2192   wxString AfterLast(wxUniChar ch) const;
2193
2194     // for compatibility only, use more explicitly named functions above
2195   wxString Before(wxUniChar ch) const { return BeforeLast(ch); }
2196   wxString After(wxUniChar ch) const { return AfterFirst(ch); }
2197
2198   // case conversion
2199       // convert to upper case in place, return the string itself
2200   wxString& MakeUpper();
2201       // convert to upper case, return the copy of the string
2202   wxString Upper() const { return wxString(*this).MakeUpper(); }
2203       // convert to lower case in place, return the string itself
2204   wxString& MakeLower();
2205       // convert to lower case, return the copy of the string
2206   wxString Lower() const { return wxString(*this).MakeLower(); }
2207       // convert the first character to the upper case and the rest to the
2208       // lower one, return the modified string itself
2209   wxString& MakeCapitalized();
2210       // convert the first character to the upper case and the rest to the
2211       // lower one, return the copy of the string
2212   wxString Capitalize() const { return wxString(*this).MakeCapitalized(); }
2213
2214   // trimming/padding whitespace (either side) and truncating
2215       // remove spaces from left or from right (default) side
2216   wxString& Trim(bool bFromRight = true);
2217       // add nCount copies chPad in the beginning or at the end (default)
2218   wxString& Pad(size_t nCount, wxUniChar chPad = wxT(' '), bool bFromRight = true);
2219
2220   // searching and replacing
2221       // searching (return starting index, or -1 if not found)
2222   int Find(wxUniChar ch, bool bFromEnd = false) const;   // like strchr/strrchr
2223   int Find(wxUniCharRef ch, bool bFromEnd = false) const
2224     { return Find(wxUniChar(ch), bFromEnd); }
2225   int Find(char ch, bool bFromEnd = false) const
2226     { return Find(wxUniChar(ch), bFromEnd); }
2227   int Find(unsigned char ch, bool bFromEnd = false) const
2228     { return Find(wxUniChar(ch), bFromEnd); }
2229   int Find(wchar_t ch, bool bFromEnd = false) const
2230     { return Find(wxUniChar(ch), bFromEnd); }
2231       // searching (return starting index, or -1 if not found)
2232   int Find(const wxString& sub) const               // like strstr
2233   {
2234     size_type idx = find(sub);
2235     return (idx == npos) ? wxNOT_FOUND : (int)idx;
2236   }
2237   int Find(const char *sub) const               // like strstr
2238   {
2239     size_type idx = find(sub);
2240     return (idx == npos) ? wxNOT_FOUND : (int)idx;
2241   }
2242   int Find(const wchar_t *sub) const               // like strstr
2243   {
2244     size_type idx = find(sub);
2245     return (idx == npos) ? wxNOT_FOUND : (int)idx;
2246   }
2247
2248   int Find(const wxCStrData& sub) const
2249     { return Find(sub.AsString()); }
2250   int Find(const wxScopedCharBuffer& sub) const
2251     { return Find(sub.data()); }
2252   int Find(const wxScopedWCharBuffer& sub) const
2253     { return Find(sub.data()); }
2254
2255       // replace first (or all of bReplaceAll) occurrences of substring with
2256       // another string, returns the number of replacements made
2257   size_t Replace(const wxString& strOld,
2258                  const wxString& strNew,
2259                  bool bReplaceAll = true);
2260
2261     // check if the string contents matches a mask containing '*' and '?'
2262   bool Matches(const wxString& mask) const;
2263
2264   // conversion to numbers: all functions return true only if the whole
2265   // string is a number and put the value of this number into the pointer
2266   // provided, the base is the numeric base in which the conversion should be
2267   // done and must be comprised between 2 and 36 or be 0 in which case the
2268   // standard C rules apply (leading '0' => octal, "0x" => hex)
2269       // convert to a signed integer
2270   bool ToLong(long *val, int base = 10) const;
2271       // convert to an unsigned integer
2272   bool ToULong(unsigned long *val, int base = 10) const;
2273       // convert to wxLongLong
2274 #if defined(wxLongLong_t)
2275   bool ToLongLong(wxLongLong_t *val, int base = 10) const;
2276       // convert to wxULongLong
2277   bool ToULongLong(wxULongLong_t *val, int base = 10) const;
2278 #endif // wxLongLong_t
2279       // convert to a double
2280   bool ToDouble(double *val) const;
2281
2282   // conversions to numbers using C locale
2283       // convert to a signed integer
2284   bool ToCLong(long *val, int base = 10) const;
2285       // convert to an unsigned integer
2286   bool ToCULong(unsigned long *val, int base = 10) const;
2287       // convert to a double
2288   bool ToCDouble(double *val) const;
2289
2290   // create a string representing the given floating point number with the
2291   // default (like %g) or fixed (if precision >=0) precision
2292     // in the current locale
2293   static wxString FromDouble(double val, int precision = -1);
2294     // in C locale
2295   static wxString FromCDouble(double val, int precision = -1);
2296
2297 #ifndef wxNEEDS_WXSTRING_PRINTF_MIXIN
2298   // formatted input/output
2299     // as sprintf(), returns the number of characters written or < 0 on error
2300     // (take 'this' into account in attribute parameter count)
2301   // int Printf(const wxString& format, ...);
2302   WX_DEFINE_VARARG_FUNC(int, Printf, 1, (const wxFormatString&),
2303                         DoPrintfWchar, DoPrintfUtf8)
2304 #ifdef __WATCOMC__
2305   // workaround for http://bugzilla.openwatcom.org/show_bug.cgi?id=351
2306   WX_VARARG_WATCOM_WORKAROUND(int, Printf, 1, (const wxString&),
2307                               (wxFormatString(f1)));
2308   WX_VARARG_WATCOM_WORKAROUND(int, Printf, 1, (const wxCStrData&),
2309                               (wxFormatString(f1)));
2310   WX_VARARG_WATCOM_WORKAROUND(int, Printf, 1, (const char*),
2311                               (wxFormatString(f1)));
2312   WX_VARARG_WATCOM_WORKAROUND(int, Printf, 1, (const wchar_t*),
2313                               (wxFormatString(f1)));
2314 #endif
2315 #endif // !wxNEEDS_WXSTRING_PRINTF_MIXIN
2316     // as vprintf(), returns the number of characters written or < 0 on error
2317   int PrintfV(const wxString& format, va_list argptr);
2318
2319 #ifndef wxNEEDS_WXSTRING_PRINTF_MIXIN
2320     // returns the string containing the result of Printf() to it
2321   // static wxString Format(const wxString& format, ...) WX_ATTRIBUTE_PRINTF_1;
2322   WX_DEFINE_VARARG_FUNC(static wxString, Format, 1, (const wxFormatString&),
2323                         DoFormatWchar, DoFormatUtf8)
2324 #ifdef __WATCOMC__
2325   // workaround for http://bugzilla.openwatcom.org/show_bug.cgi?id=351
2326   WX_VARARG_WATCOM_WORKAROUND(static wxString, Format, 1, (const wxString&),
2327                               (wxFormatString(f1)));
2328   WX_VARARG_WATCOM_WORKAROUND(static wxString, Format, 1, (const wxCStrData&),
2329                               (wxFormatString(f1)));
2330   WX_VARARG_WATCOM_WORKAROUND(static wxString, Format, 1, (const char*),
2331                               (wxFormatString(f1)));
2332   WX_VARARG_WATCOM_WORKAROUND(static wxString, Format, 1, (const wchar_t*),
2333                               (wxFormatString(f1)));
2334 #endif
2335 #endif
2336     // the same as above, but takes a va_list
2337   static wxString FormatV(const wxString& format, va_list argptr);
2338
2339   // raw access to string memory
2340     // ensure that string has space for at least nLen characters
2341     // only works if the data of this string is not shared
2342   bool Alloc(size_t nLen) { reserve(nLen); return capacity() >= nLen; }
2343     // minimize the string's memory
2344     // only works if the data of this string is not shared
2345   bool Shrink();
2346 #if WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
2347     // These are deprecated, use wxStringBuffer or wxStringBufferLength instead
2348     //
2349     // get writable buffer of at least nLen bytes. Unget() *must* be called
2350     // a.s.a.p. to put string back in a reasonable state!
2351   wxDEPRECATED( wxStringCharType *GetWriteBuf(size_t nLen) );
2352     // call this immediately after GetWriteBuf() has been used
2353   wxDEPRECATED( void UngetWriteBuf() );
2354   wxDEPRECATED( void UngetWriteBuf(size_t nLen) );
2355 #endif // WXWIN_COMPATIBILITY_2_8 && !wxUSE_STL_BASED_WXSTRING && wxUSE_UNICODE_UTF8
2356
2357   // wxWidgets version 1 compatibility functions
2358
2359   // use Mid()
2360   wxString SubString(size_t from, size_t to) const
2361       { return Mid(from, (to - from + 1)); }
2362     // values for second parameter of CompareTo function
2363   enum caseCompare {exact, ignoreCase};
2364     // values for first parameter of Strip function
2365   enum stripType {leading = 0x1, trailing = 0x2, both = 0x3};
2366
2367 #ifndef wxNEEDS_WXSTRING_PRINTF_MIXIN
2368   // use Printf()
2369   // (take 'this' into account in attribute parameter count)
2370   // int sprintf(const wxString& format, ...) WX_ATTRIBUTE_PRINTF_2;
2371   WX_DEFINE_VARARG_FUNC(int, sprintf, 1, (const wxFormatString&),
2372                         DoPrintfWchar, DoPrintfUtf8)
2373 #ifdef __WATCOMC__
2374   // workaround for http://bugzilla.openwatcom.org/show_bug.cgi?id=351
2375   WX_VARARG_WATCOM_WORKAROUND(int, sprintf, 1, (const wxString&),
2376                               (wxFormatString(f1)));
2377   WX_VARARG_WATCOM_WORKAROUND(int, sprintf, 1, (const wxCStrData&),
2378                               (wxFormatString(f1)));
2379   WX_VARARG_WATCOM_WORKAROUND(int, sprintf, 1, (const char*),
2380                               (wxFormatString(f1)));
2381   WX_VARARG_WATCOM_WORKAROUND(int, sprintf, 1, (const wchar_t*),
2382                               (wxFormatString(f1)));
2383 #endif
2384 #endif // wxNEEDS_WXSTRING_PRINTF_MIXIN
2385
2386     // use Cmp()
2387   int CompareTo(const wxChar* psz, caseCompare cmp = exact) const
2388     { return cmp == exact ? Cmp(psz) : CmpNoCase(psz); }
2389
2390     // use length()
2391   size_t Length() const { return length(); }
2392     // Count the number of characters
2393   int Freq(wxUniChar ch) const;
2394     // use MakeLower
2395   void LowerCase() { MakeLower(); }
2396     // use MakeUpper
2397   void UpperCase() { MakeUpper(); }
2398     // use Trim except that it doesn't change this string
2399   wxString Strip(stripType w = trailing) const;
2400
2401     // use Find (more general variants not yet supported)
2402   size_t Index(const wxChar* psz) const { return Find(psz); }
2403   size_t Index(wxUniChar ch)         const { return Find(ch);  }
2404     // use Truncate
2405   wxString& Remove(size_t pos) { return Truncate(pos); }
2406   wxString& RemoveLast(size_t n = 1) { return Truncate(length() - n); }
2407
2408   wxString& Remove(size_t nStart, size_t nLen)
2409       { return (wxString&)erase( nStart, nLen ); }
2410
2411     // use Find()
2412   int First( wxUniChar ch ) const { return Find(ch); }
2413   int First( wxUniCharRef ch ) const { return Find(ch); }
2414   int First( char ch ) const { return Find(ch); }
2415   int First( unsigned char ch ) const { return Find(ch); }
2416   int First( wchar_t ch ) const { return Find(ch); }
2417   int First( const wxString& str ) const { return Find(str); }
2418   int Last( wxUniChar ch ) const { return Find(ch, true); }
2419   bool Contains(const wxString& str) const { return Find(str) != wxNOT_FOUND; }
2420
2421     // use empty()
2422   bool IsNull() const { return empty(); }
2423
2424   // std::string compatibility functions
2425
2426     // take nLen chars starting at nPos
2427   wxString(const wxString& str, size_t nPos, size_t nLen)
2428       { assign(str, nPos, nLen); }
2429     // take all characters from first to last
2430   wxString(const_iterator first, const_iterator last)
2431       : m_impl(first.impl(), last.impl()) { }
2432 #if WXWIN_COMPATIBILITY_STRING_PTR_AS_ITER
2433     // the 2 overloads below are for compatibility with the existing code using
2434     // pointers instead of iterators
2435   wxString(const char *first, const char *last)
2436   {
2437       SubstrBufFromMB str(ImplStr(first, last - first));
2438       m_impl.assign(str.data, str.len);
2439   }
2440   wxString(const wchar_t *first, const wchar_t *last)
2441   {
2442       SubstrBufFromWC str(ImplStr(first, last - first));
2443       m_impl.assign(str.data, str.len);
2444   }
2445     // and this one is needed to compile code adding offsets to c_str() result
2446   wxString(const wxCStrData& first, const wxCStrData& last)
2447       : m_impl(CreateConstIterator(first).impl(),
2448                CreateConstIterator(last).impl())
2449   {
2450       wxASSERT_MSG( first.m_str == last.m_str,
2451                     wxT("pointers must be into the same string") );
2452   }
2453 #endif // WXWIN_COMPATIBILITY_STRING_PTR_AS_ITER
2454
2455   // lib.string.modifiers
2456     // append elements str[pos], ..., str[pos+n]
2457   wxString& append(const wxString& str, size_t pos, size_t n)
2458   {
2459       wxSTRING_UPDATE_CACHED_LENGTH(n);
2460
2461       size_t from, len;
2462       str.PosLenToImpl(pos, n, &from, &len);
2463       m_impl.append(str.m_impl, from, len);
2464       return *this;
2465   }
2466     // append a string
2467   wxString& append(const wxString& str)
2468   {
2469       wxSTRING_UPDATE_CACHED_LENGTH(str.length());
2470
2471       m_impl.append(str.m_impl);
2472       return *this;
2473   }
2474
2475     // append first n (or all if n == npos) characters of sz
2476   wxString& append(const char *sz)
2477   {
2478       wxSTRING_INVALIDATE_CACHED_LENGTH();
2479
2480       m_impl.append(ImplStr(sz));
2481       return *this;
2482   }
2483
2484   wxString& append(const wchar_t *sz)
2485   {
2486       wxSTRING_INVALIDATE_CACHED_LENGTH();
2487
2488       m_impl.append(ImplStr(sz));
2489       return *this;
2490   }
2491
2492   wxString& append(const char *sz, size_t n)
2493   {
2494       wxSTRING_INVALIDATE_CACHED_LENGTH();
2495
2496       SubstrBufFromMB str(ImplStr(sz, n));
2497       m_impl.append(str.data, str.len);
2498       return *this;
2499   }
2500   wxString& append(const wchar_t *sz, size_t n)
2501   {
2502       wxSTRING_UPDATE_CACHED_LENGTH(n);
2503
2504       SubstrBufFromWC str(ImplStr(sz, n));
2505       m_impl.append(str.data, str.len);
2506       return *this;
2507   }
2508
2509   wxString& append(const wxCStrData& str)
2510     { return append(str.AsString()); }
2511   wxString& append(const wxScopedCharBuffer& str)
2512     { return append(str.data(), str.length()); }
2513   wxString& append(const wxScopedWCharBuffer& str)
2514     { return append(str.data(), str.length()); }
2515   wxString& append(const wxCStrData& str, size_t n)
2516     { return append(str.AsString(), 0, n); }
2517   wxString& append(const wxScopedCharBuffer& str, size_t n)
2518     { return append(str.data(), n); }
2519   wxString& append(const wxScopedWCharBuffer& str, size_t n)
2520     { return append(str.data(), n); }
2521
2522     // append n copies of ch
2523   wxString& append(size_t n, wxUniChar ch)
2524   {
2525 #if wxUSE_UNICODE_UTF8
2526       if ( !ch.IsAscii() )
2527       {
2528           wxSTRING_INVALIDATE_CACHED_LENGTH();
2529
2530           m_impl.append(wxStringOperations::EncodeNChars(n, ch));
2531       }
2532       else // ASCII
2533 #endif
2534       {
2535           wxSTRING_UPDATE_CACHED_LENGTH(n);
2536
2537           m_impl.append(n, (wxStringCharType)ch);
2538       }
2539
2540       return *this;
2541   }
2542
2543   wxString& append(size_t n, wxUniCharRef ch)
2544     { return append(n, wxUniChar(ch)); }
2545   wxString& append(size_t n, char ch)
2546     { return append(n, wxUniChar(ch)); }
2547   wxString& append(size_t n, unsigned char ch)
2548     { return append(n, wxUniChar(ch)); }
2549   wxString& append(size_t n, wchar_t ch)
2550     { return append(n, wxUniChar(ch)); }
2551
2552     // append from first to last
2553   wxString& append(const_iterator first, const_iterator last)
2554   {
2555       wxSTRING_INVALIDATE_CACHED_LENGTH();
2556
2557       m_impl.append(first.impl(), last.impl());
2558       return *this;
2559   }
2560 #if WXWIN_COMPATIBILITY_STRING_PTR_AS_ITER
2561   wxString& append(const char *first, const char *last)
2562     { return append(first, last - first); }
2563   wxString& append(const wchar_t *first, const wchar_t *last)
2564     { return append(first, last - first); }
2565   wxString& append(const wxCStrData& first, const wxCStrData& last)
2566     { return append(CreateConstIterator(first), CreateConstIterator(last)); }
2567 #endif // WXWIN_COMPATIBILITY_STRING_PTR_AS_ITER
2568
2569     // same as `this_string = str'
2570   wxString& assign(const wxString& str)
2571   {
2572       wxSTRING_SET_CACHED_LENGTH(str.length());
2573
2574       m_impl = str.m_impl;
2575
2576       return *this;
2577   }
2578
2579     // This is a non-standard-compliant overload taking the first "len"
2580     // characters of the source string.
2581   wxString& assign(const wxString& str, size_t len)
2582   {
2583 #if wxUSE_STRING_POS_CACHE
2584       // It is legal to pass len > str.length() to wxStringImpl::assign() but
2585       // by restricting it here we save some work for that function so it's not
2586       // really less efficient and, at the same time, ensure that we don't
2587       // cache invalid length.
2588       const size_t lenSrc = str.length();
2589       if ( len > lenSrc )
2590           len = lenSrc;
2591
2592       wxSTRING_SET_CACHED_LENGTH(len);
2593 #endif // wxUSE_STRING_POS_CACHE
2594
2595       m_impl.assign(str.m_impl, 0, str.LenToImpl(len));
2596
2597       return *this;
2598   }
2599
2600     // same as ` = str[pos..pos + n]
2601   wxString& assign(const wxString& str, size_t pos, size_t n)
2602   {
2603       size_t from, len;
2604       str.PosLenToImpl(pos, n, &from, &len);
2605       m_impl.assign(str.m_impl, from, len);
2606
2607       // it's important to call this after PosLenToImpl() above in case str is
2608       // the same string as this one
2609       wxSTRING_SET_CACHED_LENGTH(n);
2610
2611       return *this;
2612   }
2613
2614     // same as `= first n (or all if n == npos) characters of sz'
2615   wxString& assign(const char *sz)
2616   {
2617       wxSTRING_INVALIDATE_CACHE();
2618
2619       m_impl.assign(ImplStr(sz));
2620
2621       return *this;
2622   }
2623
2624   wxString& assign(const wchar_t *sz)
2625   {
2626       wxSTRING_INVALIDATE_CACHE();
2627
2628       m_impl.assign(ImplStr(sz));
2629
2630       return *this;
2631   }
2632
2633   wxString& assign(const char *sz, size_t n)
2634   {
2635       wxSTRING_INVALIDATE_CACHE();
2636
2637       SubstrBufFromMB str(ImplStr(sz, n));
2638       m_impl.assign(str.data, str.len);
2639
2640       return *this;
2641   }
2642
2643   wxString& assign(const wchar_t *sz, size_t n)
2644   {
2645       wxSTRING_SET_CACHED_LENGTH(n);
2646
2647       SubstrBufFromWC str(ImplStr(sz, n));
2648       m_impl.assign(str.data, str.len);
2649
2650       return *this;
2651   }
2652
2653   wxString& assign(const wxCStrData& str)
2654     { return assign(str.AsString()); }
2655   wxString& assign(const wxScopedCharBuffer& str)
2656     { return assign(str.data(), str.length()); }
2657   wxString& assign(const wxScopedWCharBuffer& str)
2658     { return assign(str.data(), str.length()); }
2659   wxString& assign(const wxCStrData& str, size_t len)
2660     { return assign(str.AsString(), len); }
2661   wxString& assign(const wxScopedCharBuffer& str, size_t len)
2662     { return assign(str.data(), len); }
2663   wxString& assign(const wxScopedWCharBuffer& str, size_t len)
2664     { return assign(str.data(), len); }
2665
2666     // same as `= n copies of ch'
2667   wxString& assign(size_t n, wxUniChar ch)
2668   {
2669       wxSTRING_SET_CACHED_LENGTH(n);
2670
2671 #if wxUSE_UNICODE_UTF8
2672       if ( !ch.IsAscii() )
2673           m_impl.assign(wxStringOperations::EncodeNChars(n, ch));
2674       else
2675 #endif
2676           m_impl.assign(n, (wxStringCharType)ch);
2677
2678       return *this;
2679   }
2680
2681   wxString& assign(size_t n, wxUniCharRef ch)
2682     { return assign(n, wxUniChar(ch)); }
2683   wxString& assign(size_t n, char ch)
2684     { return assign(n, wxUniChar(ch)); }
2685   wxString& assign(size_t n, unsigned char ch)
2686     { return assign(n, wxUniChar(ch)); }
2687   wxString& assign(size_t n, wchar_t ch)
2688     { return assign(n, wxUniChar(ch)); }
2689
2690     // assign from first to last
2691   wxString& assign(const_iterator first, const_iterator last)
2692   {
2693       wxSTRING_INVALIDATE_CACHE();
2694
2695       m_impl.assign(first.impl(), last.impl());
2696
2697       return *this;
2698   }
2699 #if WXWIN_COMPATIBILITY_STRING_PTR_AS_ITER
2700   wxString& assign(const char *first, const char *last)
2701     { return assign(first, last - first); }
2702   wxString& assign(const wchar_t *first, const wchar_t *last)
2703     { return assign(first, last - first); }
2704   wxString& assign(const wxCStrData& first, const wxCStrData& last)
2705     { return assign(CreateConstIterator(first), CreateConstIterator(last)); }
2706 #endif // WXWIN_COMPATIBILITY_STRING_PTR_AS_ITER
2707
2708     // string comparison
2709   int compare(const wxString& str) const;
2710   int compare(const char* sz) const;
2711   int compare(const wchar_t* sz) const;
2712   int compare(const wxCStrData& str) const
2713     { return compare(str.AsString()); }
2714   int compare(const wxScopedCharBuffer& str) const
2715     { return compare(str.data()); }
2716   int compare(const wxScopedWCharBuffer& str) const
2717     { return compare(str.data()); }
2718     // comparison with a substring
2719   int compare(size_t nStart, size_t nLen, const wxString& str) const;
2720     // comparison of 2 substrings
2721   int compare(size_t nStart, size_t nLen,
2722               const wxString& str, size_t nStart2, size_t nLen2) const;
2723     // substring comparison with first nCount characters of sz
2724   int compare(size_t nStart, size_t nLen,
2725               const char* sz, size_t nCount = npos) const;
2726   int compare(size_t nStart, size_t nLen,
2727               const wchar_t* sz, size_t nCount = npos) const;
2728
2729     // insert another string
2730   wxString& insert(size_t nPos, const wxString& str)
2731     { insert(GetIterForNthChar(nPos), str.begin(), str.end()); return *this; }
2732     // insert n chars of str starting at nStart (in str)
2733   wxString& insert(size_t nPos, const wxString& str, size_t nStart, size_t n)
2734   {
2735       wxSTRING_UPDATE_CACHED_LENGTH(n);
2736
2737       size_t from, len;
2738       str.PosLenToImpl(nStart, n, &from, &len);
2739       m_impl.insert(PosToImpl(nPos), str.m_impl, from, len);
2740
2741       return *this;
2742   }
2743
2744     // insert first n (or all if n == npos) characters of sz
2745   wxString& insert(size_t nPos, const char *sz)
2746   {
2747       wxSTRING_INVALIDATE_CACHE();
2748
2749       m_impl.insert(PosToImpl(nPos), ImplStr(sz));
2750
2751       return *this;
2752   }
2753
2754   wxString& insert(size_t nPos, const wchar_t *sz)
2755   {
2756       wxSTRING_INVALIDATE_CACHE();
2757
2758       m_impl.insert(PosToImpl(nPos), ImplStr(sz)); return *this;
2759   }
2760
2761   wxString& insert(size_t nPos, const char *sz, size_t n)
2762   {
2763       wxSTRING_UPDATE_CACHED_LENGTH(n);
2764
2765       SubstrBufFromMB str(ImplStr(sz, n));
2766       m_impl.insert(PosToImpl(nPos), str.data, str.len);
2767
2768       return *this;
2769   }
2770
2771   wxString& insert(size_t nPos, const wchar_t *sz, size_t n)
2772   {
2773       wxSTRING_UPDATE_CACHED_LENGTH(n);
2774
2775       SubstrBufFromWC str(ImplStr(sz, n));
2776       m_impl.insert(PosToImpl(nPos), str.data, str.len);
2777
2778       return *this;
2779   }
2780
2781     // insert n copies of ch
2782   wxString& insert(size_t nPos, size_t n, wxUniChar ch)
2783   {
2784       wxSTRING_UPDATE_CACHED_LENGTH(n);
2785
2786 #if wxUSE_UNICODE_UTF8
2787       if ( !ch.IsAscii() )
2788           m_impl.insert(PosToImpl(nPos), wxStringOperations::EncodeNChars(n, ch));
2789       else
2790 #endif
2791           m_impl.insert(PosToImpl(nPos), n, (wxStringCharType)ch);
2792       return *this;
2793   }
2794
2795   iterator insert(iterator it, wxUniChar ch)
2796   {
2797       wxSTRING_UPDATE_CACHED_LENGTH(1);
2798
2799 #if wxUSE_UNICODE_UTF8
2800       if ( !ch.IsAscii() )
2801       {
2802           size_t pos = IterToImplPos(it);
2803           m_impl.insert(pos, wxStringOperations::EncodeChar(ch));
2804           return iterator(this, m_impl.begin() + pos);
2805       }
2806       else
2807 #endif
2808           return iterator(this, m_impl.insert(it.impl(), (wxStringCharType)ch));
2809   }
2810
2811   void insert(iterator it, const_iterator first, const_iterator last)
2812   {
2813       wxSTRING_INVALIDATE_CACHE();
2814
2815       m_impl.insert(it.impl(), first.impl(), last.impl());
2816   }
2817
2818 #if WXWIN_COMPATIBILITY_STRING_PTR_AS_ITER
2819   void insert(iterator it, const char *first, const char *last)
2820     { insert(it - begin(), first, last - first); }
2821   void insert(iterator it, const wchar_t *first, const wchar_t *last)
2822     { insert(it - begin(), first, last - first); }
2823   void insert(iterator it, const wxCStrData& first, const wxCStrData& last)
2824     { insert(it, CreateConstIterator(first), CreateConstIterator(last)); }
2825 #endif // WXWIN_COMPATIBILITY_STRING_PTR_AS_ITER
2826
2827   void insert(iterator it, size_type n, wxUniChar ch)
2828   {
2829       wxSTRING_UPDATE_CACHED_LENGTH(n);
2830
2831 #if wxUSE_UNICODE_UTF8
2832       if ( !ch.IsAscii() )
2833           m_impl.insert(IterToImplPos(it), wxStringOperations::EncodeNChars(n, ch));
2834       else
2835 #endif
2836           m_impl.insert(it.impl(), n, (wxStringCharType)ch);
2837   }
2838
2839     // delete characters from nStart to nStart + nLen
2840   wxString& erase(size_type pos = 0, size_type n = npos)
2841   {
2842       wxSTRING_INVALIDATE_CACHE();
2843
2844       size_t from, len;
2845       PosLenToImpl(pos, n, &from, &len);
2846       m_impl.erase(from, len);
2847
2848       return *this;
2849   }
2850
2851     // delete characters from first up to last
2852   iterator erase(iterator first, iterator last)
2853   {
2854       wxSTRING_INVALIDATE_CACHE();
2855
2856       return iterator(this, m_impl.erase(first.impl(), last.impl()));
2857   }
2858
2859   iterator erase(iterator first)
2860   {
2861       wxSTRING_UPDATE_CACHED_LENGTH(-1);
2862
2863       return iterator(this, m_impl.erase(first.impl()));
2864   }
2865
2866 #ifdef wxSTRING_BASE_HASNT_CLEAR
2867   void clear() { erase(); }
2868 #else
2869   void clear()
2870   {
2871       wxSTRING_SET_CACHED_LENGTH(0);
2872
2873       m_impl.clear();
2874   }
2875 #endif
2876
2877     // replaces the substring of length nLen starting at nStart
2878   wxString& replace(size_t nStart, size_t nLen, const char* sz)
2879   {
2880       wxSTRING_INVALIDATE_CACHE();
2881
2882       size_t from, len;
2883       PosLenToImpl(nStart, nLen, &from, &len);
2884       m_impl.replace(from, len, ImplStr(sz));
2885
2886       return *this;
2887   }
2888
2889   wxString& replace(size_t nStart, size_t nLen, const wchar_t* sz)
2890   {
2891       wxSTRING_INVALIDATE_CACHE();
2892
2893       size_t from, len;
2894       PosLenToImpl(nStart, nLen, &from, &len);
2895       m_impl.replace(from, len, ImplStr(sz));
2896
2897       return *this;
2898   }
2899
2900     // replaces the substring of length nLen starting at nStart
2901   wxString& replace(size_t nStart, size_t nLen, const wxString& str)
2902   {
2903       wxSTRING_INVALIDATE_CACHE();
2904
2905       size_t from, len;
2906       PosLenToImpl(nStart, nLen, &from, &len);
2907       m_impl.replace(from, len, str.m_impl);
2908
2909       return *this;
2910   }
2911
2912     // replaces the substring with nCount copies of ch
2913   wxString& replace(size_t nStart, size_t nLen, size_t nCount, wxUniChar ch)
2914   {
2915       wxSTRING_INVALIDATE_CACHE();
2916
2917       size_t from, len;
2918       PosLenToImpl(nStart, nLen, &from, &len);
2919 #if wxUSE_UNICODE_UTF8
2920       if ( !ch.IsAscii() )
2921           m_impl.replace(from, len, wxStringOperations::EncodeNChars(nCount, ch));
2922       else
2923 #endif
2924           m_impl.replace(from, len, nCount, (wxStringCharType)ch);
2925
2926       return *this;
2927   }
2928
2929     // replaces a substring with another substring
2930   wxString& replace(size_t nStart, size_t nLen,
2931                     const wxString& str, size_t nStart2, size_t nLen2)
2932   {
2933       wxSTRING_INVALIDATE_CACHE();
2934
2935       size_t from, len;
2936       PosLenToImpl(nStart, nLen, &from, &len);
2937
2938       size_t from2, len2;
2939       str.PosLenToImpl(nStart2, nLen2, &from2, &len2);
2940
2941       m_impl.replace(from, len, str.m_impl, from2, len2);
2942
2943       return *this;
2944   }
2945
2946      // replaces the substring with first nCount chars of sz
2947   wxString& replace(size_t nStart, size_t nLen,
2948                     const char* sz, size_t nCount)
2949   {
2950       wxSTRING_INVALIDATE_CACHE();
2951
2952       size_t from, len;
2953       PosLenToImpl(nStart, nLen, &from, &len);
2954
2955       SubstrBufFromMB str(ImplStr(sz, nCount));
2956
2957       m_impl.replace(from, len, str.data, str.len);
2958
2959       return *this;
2960   }
2961
2962   wxString& replace(size_t nStart, size_t nLen,
2963                     const wchar_t* sz, size_t nCount)
2964   {
2965       wxSTRING_INVALIDATE_CACHE();
2966
2967       size_t from, len;
2968       PosLenToImpl(nStart, nLen, &from, &len);
2969
2970       SubstrBufFromWC str(ImplStr(sz, nCount));
2971
2972       m_impl.replace(from, len, str.data, str.len);
2973
2974       return *this;
2975   }
2976
2977   wxString& replace(size_t nStart, size_t nLen,
2978                     const wxString& s, size_t nCount)
2979   {
2980       wxSTRING_INVALIDATE_CACHE();
2981
2982       size_t from, len;
2983       PosLenToImpl(nStart, nLen, &from, &len);
2984       m_impl.replace(from, len, s.m_impl.c_str(), s.LenToImpl(nCount));
2985
2986       return *this;
2987   }
2988
2989   wxString& replace(iterator first, iterator last, const char* s)
2990   {
2991       wxSTRING_INVALIDATE_CACHE();
2992
2993       m_impl.replace(first.impl(), last.impl(), ImplStr(s));
2994
2995       return *this;
2996   }
2997
2998   wxString& replace(iterator first, iterator last, const wchar_t* s)
2999   {
3000       wxSTRING_INVALIDATE_CACHE();
3001
3002       m_impl.replace(first.impl(), last.impl(), ImplStr(s));
3003
3004       return *this;
3005   }
3006
3007   wxString& replace(iterator first, iterator last, const char* s, size_type n)
3008   {
3009       wxSTRING_INVALIDATE_CACHE();
3010
3011       SubstrBufFromMB str(ImplStr(s, n));
3012       m_impl.replace(first.impl(), last.impl(), str.data, str.len);
3013
3014       return *this;
3015   }
3016
3017   wxString& replace(iterator first, iterator last, const wchar_t* s, size_type n)
3018   {
3019       wxSTRING_INVALIDATE_CACHE();
3020
3021       SubstrBufFromWC str(ImplStr(s, n));
3022       m_impl.replace(first.impl(), last.impl(), str.data, str.len);
3023
3024       return *this;
3025   }
3026
3027   wxString& replace(iterator first, iterator last, const wxString& s)
3028   {
3029       wxSTRING_INVALIDATE_CACHE();
3030
3031       m_impl.replace(first.impl(), last.impl(), s.m_impl);
3032
3033       return *this;
3034   }
3035
3036   wxString& replace(iterator first, iterator last, size_type n, wxUniChar ch)
3037   {
3038       wxSTRING_INVALIDATE_CACHE();
3039
3040 #if wxUSE_UNICODE_UTF8
3041       if ( !ch.IsAscii() )
3042           m_impl.replace(first.impl(), last.impl(),
3043                   wxStringOperations::EncodeNChars(n, ch));
3044       else
3045 #endif
3046           m_impl.replace(first.impl(), last.impl(), n, (wxStringCharType)ch);
3047
3048       return *this;
3049   }
3050
3051   wxString& replace(iterator first, iterator last,
3052                     const_iterator first1, const_iterator last1)
3053   {
3054       wxSTRING_INVALIDATE_CACHE();
3055
3056       m_impl.replace(first.impl(), last.impl(), first1.impl(), last1.impl());
3057
3058       return *this;
3059   }
3060
3061   wxString& replace(iterator first, iterator last,
3062                     const char *first1, const char *last1)
3063     { replace(first, last, first1, last1 - first1); return *this; }
3064   wxString& replace(iterator first, iterator last,
3065                     const wchar_t *first1, const wchar_t *last1)
3066     { replace(first, last, first1, last1 - first1); return *this; }
3067
3068   // swap two strings
3069   void swap(wxString& str)
3070   {
3071 #if wxUSE_STRING_POS_CACHE
3072       // we modify not only this string but also the other one directly so we
3073       // need to invalidate cache for both of them (we could also try to
3074       // exchange their cache entries but it seems unlikely to be worth it)
3075       InvalidateCache();
3076       str.InvalidateCache();
3077 #endif // wxUSE_STRING_POS_CACHE
3078
3079       m_impl.swap(str.m_impl);
3080   }
3081
3082     // find a substring
3083   size_t find(const wxString& str, size_t nStart = 0) const
3084     { return PosFromImpl(m_impl.find(str.m_impl, PosToImpl(nStart))); }
3085
3086     // find first n characters of sz
3087   size_t find(const char* sz, size_t nStart = 0, size_t n = npos) const
3088   {
3089       SubstrBufFromMB str(ImplStr(sz, n));
3090       return PosFromImpl(m_impl.find(str.data, PosToImpl(nStart), str.len));
3091   }
3092   size_t find(const wchar_t* sz, size_t nStart = 0, size_t n = npos) const
3093   {
3094       SubstrBufFromWC str(ImplStr(sz, n));
3095       return PosFromImpl(m_impl.find(str.data, PosToImpl(nStart), str.len));
3096   }
3097   size_t find(const wxScopedCharBuffer& s, size_t nStart = 0, size_t n = npos) const
3098     { return find(s.data(), nStart, n); }
3099   size_t find(const wxScopedWCharBuffer& s, size_t nStart = 0, size_t n = npos) const
3100     { return find(s.data(), nStart, n); }
3101   size_t find(const wxCStrData& s, size_t nStart = 0, size_t n = npos) const
3102     { return find(s.AsWChar(), nStart, n); }
3103
3104     // find the first occurrence of character ch after nStart
3105   size_t find(wxUniChar ch, size_t nStart = 0) const
3106   {
3107 #if wxUSE_UNICODE_UTF8
3108     if ( !ch.IsAscii() )
3109         return PosFromImpl(m_impl.find(wxStringOperations::EncodeChar(ch),
3110                                        PosToImpl(nStart)));
3111     else
3112 #endif
3113         return PosFromImpl(m_impl.find((wxStringCharType)ch,
3114                                        PosToImpl(nStart)));
3115
3116   }
3117   size_t find(wxUniCharRef ch, size_t nStart = 0) const
3118     {  return find(wxUniChar(ch), nStart); }
3119   size_t find(char ch, size_t nStart = 0) const
3120     {  return find(wxUniChar(ch), nStart); }
3121   size_t find(unsigned char ch, size_t nStart = 0) const
3122     {  return find(wxUniChar(ch), nStart); }
3123   size_t find(wchar_t ch, size_t nStart = 0) const
3124     {  return find(wxUniChar(ch), nStart); }
3125
3126     // rfind() family is exactly like find() but works right to left
3127
3128     // as find, but from the end
3129   size_t rfind(const wxString& str, size_t nStart = npos) const
3130     { return PosFromImpl(m_impl.rfind(str.m_impl, PosToImpl(nStart))); }
3131
3132     // as find, but from the end
3133   size_t rfind(const char* sz, size_t nStart = npos, size_t n = npos) const
3134   {
3135       SubstrBufFromMB str(ImplStr(sz, n));
3136       return PosFromImpl(m_impl.rfind(str.data, PosToImpl(nStart), str.len));
3137   }
3138   size_t rfind(const wchar_t* sz, size_t nStart = npos, size_t n = npos) const
3139   {
3140       SubstrBufFromWC str(ImplStr(sz, n));
3141       return PosFromImpl(m_impl.rfind(str.data, PosToImpl(nStart), str.len));
3142   }
3143   size_t rfind(const wxScopedCharBuffer& s, size_t nStart = npos, size_t n = npos) const
3144     { return rfind(s.data(), nStart, n); }
3145   size_t rfind(const wxScopedWCharBuffer& s, size_t nStart = npos, size_t n = npos) const
3146     { return rfind(s.data(), nStart, n); }
3147   size_t rfind(const wxCStrData& s, size_t nStart = npos, size_t n = npos) const
3148     { return rfind(s.AsWChar(), nStart, n); }
3149     // as find, but from the end
3150   size_t rfind(wxUniChar ch, size_t nStart = npos) const
3151   {
3152 #if wxUSE_UNICODE_UTF8
3153     if ( !ch.IsAscii() )
3154         return PosFromImpl(m_impl.rfind(wxStringOperations::EncodeChar(ch),
3155                                         PosToImpl(nStart)));
3156     else
3157 #endif
3158         return PosFromImpl(m_impl.rfind((wxStringCharType)ch,
3159                                         PosToImpl(nStart)));
3160   }
3161   size_t rfind(wxUniCharRef ch, size_t nStart = npos) const
3162     {  return rfind(wxUniChar(ch), nStart); }
3163   size_t rfind(char ch, size_t nStart = npos) const
3164     {  return rfind(wxUniChar(ch), nStart); }
3165   size_t rfind(unsigned char ch, size_t nStart = npos) const
3166     {  return rfind(wxUniChar(ch), nStart); }
3167   size_t rfind(wchar_t ch, size_t nStart = npos) const
3168     {  return rfind(wxUniChar(ch), nStart); }
3169
3170   // find first/last occurrence of any character (not) in the set:
3171 #if wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
3172   // FIXME-UTF8: this is not entirely correct, because it doesn't work if
3173   //             sizeof(wchar_t)==2 and surrogates are present in the string;
3174   //             should we care? Probably not.
3175   size_t find_first_of(const wxString& str, size_t nStart = 0) const
3176     { return m_impl.find_first_of(str.m_impl, nStart); }
3177   size_t find_first_of(const char* sz, size_t nStart = 0) const
3178     { return m_impl.find_first_of(ImplStr(sz), nStart); }
3179   size_t find_first_of(const wchar_t* sz, size_t nStart = 0) const
3180     { return m_impl.find_first_of(ImplStr(sz), nStart); }
3181   size_t find_first_of(const char* sz, size_t nStart, size_t n) const
3182     { return m_impl.find_first_of(ImplStr(sz), nStart, n); }
3183   size_t find_first_of(const wchar_t* sz, size_t nStart, size_t n) const
3184     { return m_impl.find_first_of(ImplStr(sz), nStart, n); }
3185   size_t find_first_of(wxUniChar c, size_t nStart = 0) const
3186     { return m_impl.find_first_of((wxChar)c, nStart); }
3187
3188   size_t find_last_of(const wxString& str, size_t nStart = npos) const
3189     { return m_impl.find_last_of(str.m_impl, nStart); }
3190   size_t find_last_of(const char* sz, size_t nStart = npos) const
3191     { return m_impl.find_last_of(ImplStr(sz), nStart); }
3192   size_t find_last_of(const wchar_t* sz, size_t nStart = npos) const
3193     { return m_impl.find_last_of(ImplStr(sz), nStart); }
3194   size_t find_last_of(const char* sz, size_t nStart, size_t n) const
3195     { return m_impl.find_last_of(ImplStr(sz), nStart, n); }
3196   size_t find_last_of(const wchar_t* sz, size_t nStart, size_t n) const
3197     { return m_impl.find_last_of(ImplStr(sz), nStart, n); }
3198   size_t find_last_of(wxUniChar c, size_t nStart = npos) const
3199     { return m_impl.find_last_of((wxChar)c, nStart); }
3200
3201   size_t find_first_not_of(const wxString& str, size_t nStart = 0) const
3202     { return m_impl.find_first_not_of(str.m_impl, nStart); }
3203   size_t find_first_not_of(const char* sz, size_t nStart = 0) const
3204     { return m_impl.find_first_not_of(ImplStr(sz), nStart); }
3205   size_t find_first_not_of(const wchar_t* sz, size_t nStart = 0) const
3206     { return m_impl.find_first_not_of(ImplStr(sz), nStart); }
3207   size_t find_first_not_of(const char* sz, size_t nStart, size_t n) const
3208     { return m_impl.find_first_not_of(ImplStr(sz), nStart, n); }
3209   size_t find_first_not_of(const wchar_t* sz, size_t nStart, size_t n) const
3210     { return m_impl.find_first_not_of(ImplStr(sz), nStart, n); }
3211   size_t find_first_not_of(wxUniChar c, size_t nStart = 0) const
3212     { return m_impl.find_first_not_of((wxChar)c, nStart); }
3213
3214   size_t find_last_not_of(const wxString& str, size_t nStart = npos) const
3215     { return m_impl.find_last_not_of(str.m_impl, nStart); }
3216   size_t find_last_not_of(const char* sz, size_t nStart = npos) const
3217     { return m_impl.find_last_not_of(ImplStr(sz), nStart); }
3218   size_t find_last_not_of(const wchar_t* sz, size_t nStart = npos) const
3219     { return m_impl.find_last_not_of(ImplStr(sz), nStart); }
3220   size_t find_last_not_of(const char* sz, size_t nStart, size_t n) const
3221     { return m_impl.find_last_not_of(ImplStr(sz), nStart, n); }
3222   size_t find_last_not_of(const wchar_t* sz, size_t nStart, size_t n) const
3223     { return m_impl.find_last_not_of(ImplStr(sz), nStart, n); }
3224   size_t find_last_not_of(wxUniChar c, size_t nStart = npos) const
3225     { return m_impl.find_last_not_of((wxChar)c, nStart); }
3226 #else
3227   // we can't use std::string implementation in UTF-8 build, because the
3228   // character sets would be interpreted wrongly:
3229
3230     // as strpbrk() but starts at nStart, returns npos if not found
3231   size_t find_first_of(const wxString& str, size_t nStart = 0) const
3232 #if wxUSE_UNICODE // FIXME-UTF8: temporary
3233     { return find_first_of(str.wc_str(), nStart); }
3234 #else
3235     { return find_first_of(str.mb_str(), nStart); }
3236 #endif
3237     // same as above
3238   size_t find_first_of(const char* sz, size_t nStart = 0) const;
3239   size_t find_first_of(const wchar_t* sz, size_t nStart = 0) const;
3240   size_t find_first_of(const char* sz, size_t nStart, size_t n) const;
3241   size_t find_first_of(const wchar_t* sz, size_t nStart, size_t n) const;
3242     // same as find(char, size_t)
3243   size_t find_first_of(wxUniChar c, size_t nStart = 0) const
3244     { return find(c, nStart); }
3245     // find the last (starting from nStart) char from str in this string
3246   size_t find_last_of (const wxString& str, size_t nStart = npos) const
3247 #if wxUSE_UNICODE // FIXME-UTF8: temporary
3248     { return find_last_of(str.wc_str(), nStart); }
3249 #else
3250     { return find_last_of(str.mb_str(), nStart); }
3251 #endif
3252     // same as above
3253   size_t find_last_of (const char* sz, size_t nStart = npos) const;
3254   size_t find_last_of (const wchar_t* sz, size_t nStart = npos) const;
3255   size_t find_last_of(const char* sz, size_t nStart, size_t n) const;
3256   size_t find_last_of(const wchar_t* sz, size_t nStart, size_t n) const;
3257     // same as above
3258   size_t find_last_of(wxUniChar c, size_t nStart = npos) const
3259     { return rfind(c, nStart); }
3260
3261     // find first/last occurrence of any character not in the set
3262
3263     // as strspn() (starting from nStart), returns npos on failure
3264   size_t find_first_not_of(const wxString& str, size_t nStart = 0) const
3265 #if wxUSE_UNICODE // FIXME-UTF8: temporary
3266     { return find_first_not_of(str.wc_str(), nStart); }
3267 #else
3268     { return find_first_not_of(str.mb_str(), nStart); }
3269 #endif
3270     // same as above
3271   size_t find_first_not_of(const char* sz, size_t nStart = 0) const;
3272   size_t find_first_not_of(const wchar_t* sz, size_t nStart = 0) const;
3273   size_t find_first_not_of(const char* sz, size_t nStart, size_t n) const;
3274   size_t find_first_not_of(const wchar_t* sz, size_t nStart, size_t n) const;
3275     // same as above
3276   size_t find_first_not_of(wxUniChar ch, size_t nStart = 0) const;
3277     //  as strcspn()
3278   size_t find_last_not_of(const wxString& str, size_t nStart = npos) const
3279 #if wxUSE_UNICODE // FIXME-UTF8: temporary
3280     { return find_last_not_of(str.wc_str(), nStart); }
3281 #else
3282     { return find_last_not_of(str.mb_str(), nStart); }
3283 #endif
3284     // same as above
3285   size_t find_last_not_of(const char* sz, size_t nStart = npos) const;
3286   size_t find_last_not_of(const wchar_t* sz, size_t nStart = npos) const;
3287   size_t find_last_not_of(const char* sz, size_t nStart, size_t n) const;
3288   size_t find_last_not_of(const wchar_t* sz, size_t nStart, size_t n) const;
3289     // same as above
3290   size_t find_last_not_of(wxUniChar ch, size_t nStart = npos) const;
3291 #endif // wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8 or not
3292
3293   // provide char/wchar_t/wxUniCharRef overloads for char-finding functions
3294   // above to resolve ambiguities:
3295   size_t find_first_of(wxUniCharRef ch, size_t nStart = 0) const
3296     {  return find_first_of(wxUniChar(ch), nStart); }
3297   size_t find_first_of(char ch, size_t nStart = 0) const
3298     {  return find_first_of(wxUniChar(ch), nStart); }
3299   size_t find_first_of(unsigned char ch, size_t nStart = 0) const
3300     {  return find_first_of(wxUniChar(ch), nStart); }
3301   size_t find_first_of(wchar_t ch, size_t nStart = 0) const
3302     {  return find_first_of(wxUniChar(ch), nStart); }
3303   size_t find_last_of(wxUniCharRef ch, size_t nStart = npos) const
3304     {  return find_last_of(wxUniChar(ch), nStart); }
3305   size_t find_last_of(char ch, size_t nStart = npos) const
3306     {  return find_last_of(wxUniChar(ch), nStart); }
3307   size_t find_last_of(unsigned char ch, size_t nStart = npos) const
3308     {  return find_last_of(wxUniChar(ch), nStart); }
3309   size_t find_last_of(wchar_t ch, size_t nStart = npos) const
3310     {  return find_last_of(wxUniChar(ch), nStart); }
3311   size_t find_first_not_of(wxUniCharRef ch, size_t nStart = 0) const
3312     {  return find_first_not_of(wxUniChar(ch), nStart); }
3313   size_t find_first_not_of(char ch, size_t nStart = 0) const
3314     {  return find_first_not_of(wxUniChar(ch), nStart); }
3315   size_t find_first_not_of(unsigned char ch, size_t nStart = 0) const
3316     {  return find_first_not_of(wxUniChar(ch), nStart); }
3317   size_t find_first_not_of(wchar_t ch, size_t nStart = 0) const
3318     {  return find_first_not_of(wxUniChar(ch), nStart); }
3319   size_t find_last_not_of(wxUniCharRef ch, size_t nStart = npos) const
3320     {  return find_last_not_of(wxUniChar(ch), nStart); }
3321   size_t find_last_not_of(char ch, size_t nStart = npos) const
3322     {  return find_last_not_of(wxUniChar(ch), nStart); }
3323   size_t find_last_not_of(unsigned char ch, size_t nStart = npos) const
3324     {  return find_last_not_of(wxUniChar(ch), nStart); }
3325   size_t find_last_not_of(wchar_t ch, size_t nStart = npos) const
3326     {  return find_last_not_of(wxUniChar(ch), nStart); }
3327
3328   // and additional overloads for the versions taking strings:
3329   size_t find_first_of(const wxCStrData& sz, size_t nStart = 0) const
3330     { return find_first_of(sz.AsString(), nStart); }
3331   size_t find_first_of(const wxScopedCharBuffer& sz, size_t nStart = 0) const
3332     { return find_first_of(sz.data(), nStart); }
3333   size_t find_first_of(const wxScopedWCharBuffer& sz, size_t nStart = 0) const
3334     { return find_first_of(sz.data(), nStart); }
3335   size_t find_first_of(const wxCStrData& sz, size_t nStart, size_t n) const
3336     { return find_first_of(sz.AsWChar(), nStart, n); }
3337   size_t find_first_of(const wxScopedCharBuffer& sz, size_t nStart, size_t n) const
3338     { return find_first_of(sz.data(), nStart, n); }
3339   size_t find_first_of(const wxScopedWCharBuffer& sz, size_t nStart, size_t n) const
3340     { return find_first_of(sz.data(), nStart, n); }
3341
3342   size_t find_last_of(const wxCStrData& sz, size_t nStart = 0) const
3343     { return find_last_of(sz.AsString(), nStart); }
3344   size_t find_last_of(const wxScopedCharBuffer& sz, size_t nStart = 0) const
3345     { return find_last_of(sz.data(), nStart); }
3346   size_t find_last_of(const wxScopedWCharBuffer& sz, size_t nStart = 0) const
3347     { return find_last_of(sz.data(), nStart); }
3348   size_t find_last_of(const wxCStrData& sz, size_t nStart, size_t n) const
3349     { return find_last_of(sz.AsWChar(), nStart, n); }
3350   size_t find_last_of(const wxScopedCharBuffer& sz, size_t nStart, size_t n) const
3351     { return find_last_of(sz.data(), nStart, n); }
3352   size_t find_last_of(const wxScopedWCharBuffer& sz, size_t nStart, size_t n) const
3353     { return find_last_of(sz.data(), nStart, n); }
3354
3355   size_t find_first_not_of(const wxCStrData& sz, size_t nStart = 0) const
3356     { return find_first_not_of(sz.AsString(), nStart); }
3357   size_t find_first_not_of(const wxScopedCharBuffer& sz, size_t nStart = 0) const
3358     { return find_first_not_of(sz.data(), nStart); }
3359   size_t find_first_not_of(const wxScopedWCharBuffer& sz, size_t nStart = 0) const
3360     { return find_first_not_of(sz.data(), nStart); }
3361   size_t find_first_not_of(const wxCStrData& sz, size_t nStart, size_t n) const
3362     { return find_first_not_of(sz.AsWChar(), nStart, n); }
3363   size_t find_first_not_of(const wxScopedCharBuffer& sz, size_t nStart, size_t n) const
3364     { return find_first_not_of(sz.data(), nStart, n); }
3365   size_t find_first_not_of(const wxScopedWCharBuffer& sz, size_t nStart, size_t n) const
3366     { return find_first_not_of(sz.data(), nStart, n); }
3367
3368   size_t find_last_not_of(const wxCStrData& sz, size_t nStart = 0) const
3369     { return find_last_not_of(sz.AsString(), nStart); }
3370   size_t find_last_not_of(const wxScopedCharBuffer& sz, size_t nStart = 0) const
3371     { return find_last_not_of(sz.data(), nStart); }
3372   size_t find_last_not_of(const wxScopedWCharBuffer& sz, size_t nStart = 0) const
3373     { return find_last_not_of(sz.data(), nStart); }
3374   size_t find_last_not_of(const wxCStrData& sz, size_t nStart, size_t n) const
3375     { return find_last_not_of(sz.AsWChar(), nStart, n); }
3376   size_t find_last_not_of(const wxScopedCharBuffer& sz, size_t nStart, size_t n) const
3377     { return find_last_not_of(sz.data(), nStart, n); }
3378   size_t find_last_not_of(const wxScopedWCharBuffer& sz, size_t nStart, size_t n) const
3379     { return find_last_not_of(sz.data(), nStart, n); }
3380
3381       // string += string
3382   wxString& operator+=(const wxString& s)
3383   {
3384       wxSTRING_INVALIDATE_CACHED_LENGTH();
3385
3386       m_impl += s.m_impl;
3387       return *this;
3388   }
3389       // string += C string
3390   wxString& operator+=(const char *psz)
3391   {
3392       wxSTRING_INVALIDATE_CACHED_LENGTH();
3393
3394       m_impl += ImplStr(psz);
3395       return *this;
3396   }
3397   wxString& operator+=(const wchar_t *pwz)
3398   {
3399       wxSTRING_INVALIDATE_CACHED_LENGTH();
3400
3401       m_impl += ImplStr(pwz);
3402       return *this;
3403   }
3404   wxString& operator+=(const wxCStrData& s)
3405   {
3406       wxSTRING_INVALIDATE_CACHED_LENGTH();
3407
3408       m_impl += s.AsString().m_impl;
3409       return *this;
3410   }
3411   wxString& operator+=(const wxScopedCharBuffer& s)
3412     { return append(s); }
3413   wxString& operator+=(const wxScopedWCharBuffer& s)
3414     { return append(s); }
3415       // string += char
3416   wxString& operator+=(wxUniChar ch)
3417   {
3418       wxSTRING_UPDATE_CACHED_LENGTH(1);
3419
3420 #if wxUSE_UNICODE_UTF8
3421       if ( !ch.IsAscii() )
3422           m_impl += wxStringOperations::EncodeChar(ch);
3423       else
3424 #endif
3425           m_impl += (wxStringCharType)ch;
3426       return *this;
3427   }
3428   wxString& operator+=(wxUniCharRef ch) { return *this += wxUniChar(ch); }
3429   wxString& operator+=(int ch) { return *this += wxUniChar(ch); }
3430   wxString& operator+=(char ch) { return *this += wxUniChar(ch); }
3431   wxString& operator+=(unsigned char ch) { return *this += wxUniChar(ch); }
3432   wxString& operator+=(wchar_t ch) { return *this += wxUniChar(ch); }
3433
3434 private:
3435 #if !wxUSE_STL_BASED_WXSTRING
3436   // helpers for wxStringBuffer and wxStringBufferLength
3437   wxStringCharType *DoGetWriteBuf(size_t nLen)
3438   {
3439       return m_impl.DoGetWriteBuf(nLen);
3440   }
3441
3442   void DoUngetWriteBuf()
3443   {
3444       wxSTRING_INVALIDATE_CACHE();
3445
3446       m_impl.DoUngetWriteBuf();
3447   }
3448
3449   void DoUngetWriteBuf(size_t nLen)
3450   {
3451       wxSTRING_INVALIDATE_CACHE();
3452
3453       m_impl.DoUngetWriteBuf(nLen);
3454   }
3455 #endif // !wxUSE_STL_BASED_WXSTRING
3456
3457 #ifndef wxNEEDS_WXSTRING_PRINTF_MIXIN
3458   #if !wxUSE_UTF8_LOCALE_ONLY
3459   int DoPrintfWchar(const wxChar *format, ...);
3460   static wxString DoFormatWchar(const wxChar *format, ...);
3461   #endif
3462   #if wxUSE_UNICODE_UTF8
3463   int DoPrintfUtf8(const char *format, ...);
3464   static wxString DoFormatUtf8(const char *format, ...);
3465   #endif
3466 #endif
3467
3468 #if !wxUSE_STL_BASED_WXSTRING
3469   // check string's data validity
3470   bool IsValid() const { return m_impl.GetStringData()->IsValid(); }
3471 #endif
3472
3473 private:
3474   wxStringImpl m_impl;
3475
3476   // buffers for compatibility conversion from (char*)c_str() and
3477   // (wchar_t*)c_str(): the pointers returned by these functions should remain
3478   // valid until the string itself is modified for compatibility with the
3479   // existing code and consistency with std::string::c_str() so returning a
3480   // temporary buffer won't do and we need to cache the conversion results
3481
3482   // TODO-UTF8: benchmark various approaches to keeping compatibility buffers
3483   template<typename T>
3484   struct ConvertedBuffer
3485   {
3486       // notice that there is no need to initialize m_len here as it's unused
3487       // as long as m_str is NULL
3488       ConvertedBuffer() : m_str(NULL) {}
3489       ~ConvertedBuffer()
3490           { free(m_str); }
3491
3492       bool Extend(size_t len)
3493       {
3494           // add extra 1 for the trailing NUL
3495           void * const str = realloc(m_str, sizeof(T)*(len + 1));
3496           if ( !str )
3497               return false;
3498
3499           m_str = static_cast<T *>(str);
3500           m_len = len;
3501
3502           return true;
3503       }
3504
3505       const wxScopedCharTypeBuffer<T> AsScopedBuffer() const
3506       {
3507           return wxScopedCharTypeBuffer<T>::CreateNonOwned(m_str, m_len);
3508       }
3509
3510       T *m_str;     // pointer to the string data
3511       size_t m_len; // length, not size, i.e. in chars and without last NUL
3512   };
3513
3514
3515 #if wxUSE_UNICODE
3516   // common mb_str() and wxCStrData::AsChar() helper: performs the conversion
3517   // and returns either m_convertedToChar.m_str (in which case its m_len is
3518   // also updated) or NULL if it failed
3519   //
3520   // there is an important exception: in wxUSE_UNICODE_UTF8 build if conv is a
3521   // UTF-8 one, we return m_impl.c_str() directly, without doing any conversion
3522   // as optimization and so the caller needs to check for this before using
3523   // m_convertedToChar
3524   //
3525   // NB: AsChar() returns char* in any build, unlike mb_str()
3526   const char *AsChar(const wxMBConv& conv) const;
3527
3528   // mb_str() implementation helper
3529   wxScopedCharBuffer AsCharBuf(const wxMBConv& conv) const
3530   {
3531 #if wxUSE_UNICODE_UTF8
3532       // avoid conversion if we can
3533       if ( conv.IsUTF8() )
3534       {
3535           return wxScopedCharBuffer::CreateNonOwned(m_impl.c_str(),
3536                   m_impl.length());
3537       }
3538 #endif // wxUSE_UNICODE_UTF8
3539
3540       // call this solely in order to fill in m_convertedToChar as AsChar()
3541       // updates it as a side effect: this is a bit ugly but it's a completely
3542       // internal function so the users of this class shouldn't care or know
3543       // about it and doing it like this, i.e. having a separate AsChar(),
3544       // allows us to avoid the creation and destruction of a temporary buffer
3545       // when using wxCStrData without duplicating any code
3546       if ( !AsChar(conv) )
3547       {
3548           // although it would be probably more correct to return NULL buffer
3549           // from here if the conversion fails, a lot of existing code doesn't
3550           // expect mb_str() (or wc_str()) to ever return NULL so return an
3551           // empty string otherwise to avoid crashes in it
3552           //
3553           // also, some existing code does check for the conversion success and
3554           // so asserting here would be bad too -- even if it does mean that
3555           // silently losing data is possible for badly written code
3556           return wxScopedCharBuffer::CreateNonOwned("", 0);
3557       }
3558
3559       return m_convertedToChar.AsScopedBuffer();
3560   }
3561
3562   ConvertedBuffer<char> m_convertedToChar;
3563 #endif // !wxUSE_UNICODE
3564
3565 #if !wxUSE_UNICODE_WCHAR
3566   // common wc_str() and wxCStrData::AsWChar() helper for both UTF-8 and ANSI
3567   // builds: converts the string contents into m_convertedToWChar and returns
3568   // NULL if the conversion failed (this can only happen in ANSI build)
3569   //
3570   // NB: AsWChar() returns wchar_t* in any build, unlike wc_str()
3571   const wchar_t *AsWChar(const wxMBConv& conv) const;
3572
3573   // wc_str() implementation helper
3574   wxScopedWCharBuffer AsWCharBuf(const wxMBConv& conv) const
3575   {
3576       if ( !AsWChar(conv) )
3577           return wxScopedWCharBuffer::CreateNonOwned(L"", 0);
3578
3579       return m_convertedToWChar.AsScopedBuffer();
3580   }
3581
3582   ConvertedBuffer<wchar_t> m_convertedToWChar;
3583 #endif // !wxUSE_UNICODE_WCHAR
3584
3585 #if wxUSE_UNICODE_UTF8
3586   // FIXME-UTF8: (try to) move this elsewhere (TLS) or solve differently
3587   //             assigning to character pointer to by wxString::iterator may
3588   //             change the underlying wxStringImpl iterator, so we have to
3589   //             keep track of all iterators and update them as necessary:
3590   struct wxStringIteratorNodeHead
3591   {
3592       wxStringIteratorNodeHead() : ptr(NULL) {}
3593       wxStringIteratorNode *ptr;
3594
3595       // copying is disallowed as it would result in more than one pointer into
3596       // the same linked list
3597       wxDECLARE_NO_COPY_CLASS(wxStringIteratorNodeHead);
3598   };
3599
3600   wxStringIteratorNodeHead m_iterators;
3601
3602   friend class WXDLLIMPEXP_FWD_BASE wxStringIteratorNode;
3603   friend class WXDLLIMPEXP_FWD_BASE wxUniCharRef;
3604 #endif // wxUSE_UNICODE_UTF8
3605
3606   friend class WXDLLIMPEXP_FWD_BASE wxCStrData;
3607   friend class wxStringInternalBuffer;
3608   friend class wxStringInternalBufferLength;
3609 };
3610
3611 #ifdef wxNEEDS_WXSTRING_PRINTF_MIXIN
3612     #pragma warning (pop)
3613 #endif
3614
3615 // string iterator operators that satisfy STL Random Access Iterator
3616 // requirements:
3617 inline wxString::iterator operator+(ptrdiff_t n, wxString::iterator i)
3618   { return i + n; }
3619 inline wxString::const_iterator operator+(ptrdiff_t n, wxString::const_iterator i)
3620   { return i + n; }
3621 inline wxString::reverse_iterator operator+(ptrdiff_t n, wxString::reverse_iterator i)
3622   { return i + n; }
3623 inline wxString::const_reverse_iterator operator+(ptrdiff_t n, wxString::const_reverse_iterator i)
3624   { return i + n; }
3625
3626 // notice that even though for many compilers the friend declarations above are
3627 // enough, from the point of view of C++ standard we must have the declarations
3628 // here as friend ones are not injected in the enclosing namespace and without
3629 // them the code fails to compile with conforming compilers such as xlC or g++4
3630 wxString WXDLLIMPEXP_BASE operator+(const wxString& string1, const wxString& string2);
3631 wxString WXDLLIMPEXP_BASE operator+(const wxString& string, const char *psz);
3632 wxString WXDLLIMPEXP_BASE operator+(const wxString& string, const wchar_t *pwz);
3633 wxString WXDLLIMPEXP_BASE operator+(const char *psz, const wxString& string);
3634 wxString WXDLLIMPEXP_BASE operator+(const wchar_t *pwz, const wxString& string);
3635
3636 wxString WXDLLIMPEXP_BASE operator+(const wxString& string, wxUniChar ch);
3637 wxString WXDLLIMPEXP_BASE operator+(wxUniChar ch, const wxString& string);
3638
3639 inline wxString operator+(const wxString& string, wxUniCharRef ch)
3640     { return string + (wxUniChar)ch; }
3641 inline wxString operator+(const wxString& string, char ch)
3642     { return string + wxUniChar(ch); }
3643 inline wxString operator+(const wxString& string, wchar_t ch)
3644     { return string + wxUniChar(ch); }
3645 inline wxString operator+(wxUniCharRef ch, const wxString& string)
3646     { return (wxUniChar)ch + string; }
3647 inline wxString operator+(char ch, const wxString& string)
3648     { return wxUniChar(ch) + string; }
3649 inline wxString operator+(wchar_t ch, const wxString& string)
3650     { return wxUniChar(ch) + string; }
3651
3652
3653 #define wxGetEmptyString() wxString()
3654
3655 // ----------------------------------------------------------------------------
3656 // helper functions which couldn't be defined inline
3657 // ----------------------------------------------------------------------------
3658
3659 namespace wxPrivate
3660 {
3661
3662 #if wxUSE_UNICODE_WCHAR
3663
3664 template <>
3665 struct wxStringAsBufHelper<char>
3666 {
3667     static wxScopedCharBuffer Get(const wxString& s, size_t *len)
3668     {
3669         wxScopedCharBuffer buf(s.mb_str());
3670         if ( len )
3671             *len = buf ? strlen(buf) : 0;
3672         return buf;
3673     }
3674 };
3675
3676 template <>
3677 struct wxStringAsBufHelper<wchar_t>
3678 {
3679     static wxScopedWCharBuffer Get(const wxString& s, size_t *len)
3680     {
3681         const size_t length = s.length();
3682         if ( len )
3683             *len = length;
3684         return wxScopedWCharBuffer::CreateNonOwned(s.wx_str(), length);
3685     }
3686 };
3687
3688 #elif wxUSE_UNICODE_UTF8
3689
3690 template <>
3691 struct wxStringAsBufHelper<char>
3692 {
3693     static wxScopedCharBuffer Get(const wxString& s, size_t *len)
3694     {
3695         const size_t length = s.utf8_length();
3696         if ( len )
3697             *len = length;
3698         return wxScopedCharBuffer::CreateNonOwned(s.wx_str(), length);
3699     }
3700 };
3701
3702 template <>
3703 struct wxStringAsBufHelper<wchar_t>
3704 {
3705     static wxScopedWCharBuffer Get(const wxString& s, size_t *len)
3706     {
3707         wxScopedWCharBuffer wbuf(s.wc_str());
3708         if ( len )
3709             *len = wxWcslen(wbuf);
3710         return wbuf;
3711     }
3712 };
3713
3714 #endif // Unicode build kind
3715
3716 } // namespace wxPrivate
3717
3718 // ----------------------------------------------------------------------------
3719 // wxStringBuffer: a tiny class allowing to get a writable pointer into string
3720 // ----------------------------------------------------------------------------
3721
3722 #if !wxUSE_STL_BASED_WXSTRING
3723 // string buffer for direct access to string data in their native
3724 // representation:
3725 class wxStringInternalBuffer
3726 {
3727 public:
3728     typedef wxStringCharType CharType;
3729
3730     wxStringInternalBuffer(wxString& str, size_t lenWanted = 1024)
3731         : m_str(str), m_buf(NULL)
3732         { m_buf = m_str.DoGetWriteBuf(lenWanted); }
3733
3734     ~wxStringInternalBuffer() { m_str.DoUngetWriteBuf(); }
3735
3736     operator wxStringCharType*() const { return m_buf; }
3737
3738 private:
3739     wxString&         m_str;
3740     wxStringCharType *m_buf;
3741
3742     wxDECLARE_NO_COPY_CLASS(wxStringInternalBuffer);
3743 };
3744
3745 class wxStringInternalBufferLength
3746 {
3747 public:
3748     typedef wxStringCharType CharType;
3749
3750     wxStringInternalBufferLength(wxString& str, size_t lenWanted = 1024)
3751         : m_str(str), m_buf(NULL), m_len(0), m_lenSet(false)
3752     {
3753         m_buf = m_str.DoGetWriteBuf(lenWanted);
3754         wxASSERT(m_buf != NULL);
3755     }
3756
3757     ~wxStringInternalBufferLength()
3758     {
3759         wxASSERT(m_lenSet);
3760         m_str.DoUngetWriteBuf(m_len);
3761     }
3762
3763     operator wxStringCharType*() const { return m_buf; }
3764     void SetLength(size_t length) { m_len = length; m_lenSet = true; }
3765
3766 private:
3767     wxString&         m_str;
3768     wxStringCharType *m_buf;
3769     size_t            m_len;
3770     bool              m_lenSet;
3771
3772     wxDECLARE_NO_COPY_CLASS(wxStringInternalBufferLength);
3773 };
3774
3775 #endif // !wxUSE_STL_BASED_WXSTRING
3776
3777 template<typename T>
3778 class wxStringTypeBufferBase
3779 {
3780 public:
3781     typedef T CharType;
3782
3783     wxStringTypeBufferBase(wxString& str, size_t lenWanted = 1024)
3784         : m_str(str), m_buf(lenWanted)
3785     {
3786         // for compatibility with old wxStringBuffer which provided direct
3787         // access to wxString internal buffer, initialize ourselves with the
3788         // string initial contents
3789
3790         // FIXME-VC6: remove the ugly (CharType *)NULL and use normal
3791         //            tchar_str<CharType>
3792         size_t len;
3793         const wxCharTypeBuffer<CharType> buf(str.tchar_str(&len, (CharType *)NULL));
3794         if ( buf )
3795         {
3796             if ( len > lenWanted )
3797             {
3798                 // in this case there is not enough space for terminating NUL,
3799                 // ensure that we still put it there
3800                 m_buf.data()[lenWanted] = 0;
3801                 len = lenWanted - 1;
3802             }
3803
3804             memcpy(m_buf.data(), buf, (len + 1)*sizeof(CharType));
3805         }
3806         //else: conversion failed, this can happen when trying to get Unicode
3807         //      string contents into a char string
3808     }
3809
3810     operator CharType*() { return m_buf.data(); }
3811
3812 protected:
3813     wxString& m_str;
3814     wxCharTypeBuffer<CharType> m_buf;
3815 };
3816
3817 template<typename T>
3818 class wxStringTypeBufferLengthBase : public wxStringTypeBufferBase<T>
3819 {
3820 public:
3821     wxStringTypeBufferLengthBase(wxString& str, size_t lenWanted = 1024)
3822         : wxStringTypeBufferBase<T>(str, lenWanted),
3823           m_len(0),
3824           m_lenSet(false)
3825         { }
3826
3827     ~wxStringTypeBufferLengthBase()
3828     {
3829         wxASSERT_MSG( this->m_lenSet, "forgot to call SetLength()" );
3830     }
3831
3832     void SetLength(size_t length) { m_len = length; m_lenSet = true; }
3833
3834 protected:
3835     size_t m_len;
3836     bool m_lenSet;
3837 };
3838
3839 template<typename T>
3840 class wxStringTypeBuffer : public wxStringTypeBufferBase<T>
3841 {
3842 public:
3843     wxStringTypeBuffer(wxString& str, size_t lenWanted = 1024)
3844         : wxStringTypeBufferBase<T>(str, lenWanted)
3845         { }
3846
3847     ~wxStringTypeBuffer()
3848     {
3849         this->m_str.assign(this->m_buf.data());
3850     }
3851
3852     wxDECLARE_NO_COPY_CLASS(wxStringTypeBuffer);
3853 };
3854
3855 template<typename T>
3856 class wxStringTypeBufferLength : public wxStringTypeBufferLengthBase<T>
3857 {
3858 public:
3859     wxStringTypeBufferLength(wxString& str, size_t lenWanted = 1024)
3860         : wxStringTypeBufferLengthBase<T>(str, lenWanted)
3861         { }
3862
3863     ~wxStringTypeBufferLength()
3864     {
3865         this->m_str.assign(this->m_buf.data(), this->m_len);
3866     }
3867
3868     wxDECLARE_NO_COPY_CLASS(wxStringTypeBufferLength);
3869 };
3870
3871 #if wxUSE_STL_BASED_WXSTRING
3872
3873 WXDLLIMPEXP_TEMPLATE_INSTANCE_BASE( wxStringTypeBufferBase<wxStringCharType> )
3874
3875 class wxStringInternalBuffer : public wxStringTypeBufferBase<wxStringCharType>
3876 {
3877 public:
3878     wxStringInternalBuffer(wxString& str, size_t lenWanted = 1024)
3879         : wxStringTypeBufferBase<wxStringCharType>(str, lenWanted) {}
3880     ~wxStringInternalBuffer()
3881         { m_str.m_impl.assign(m_buf.data()); }
3882
3883     wxDECLARE_NO_COPY_CLASS(wxStringInternalBuffer);
3884 };
3885
3886 WXDLLIMPEXP_TEMPLATE_INSTANCE_BASE(
3887     wxStringTypeBufferLengthBase<wxStringCharType> )
3888
3889 class wxStringInternalBufferLength
3890     : public wxStringTypeBufferLengthBase<wxStringCharType>
3891 {
3892 public:
3893     wxStringInternalBufferLength(wxString& str, size_t lenWanted = 1024)
3894         : wxStringTypeBufferLengthBase<wxStringCharType>(str, lenWanted) {}
3895
3896     ~wxStringInternalBufferLength()
3897     {
3898         m_str.m_impl.assign(m_buf.data(), m_len);
3899     }
3900
3901     wxDECLARE_NO_COPY_CLASS(wxStringInternalBufferLength);
3902 };
3903
3904 #endif // wxUSE_STL_BASED_WXSTRING
3905
3906
3907 #if wxUSE_STL_BASED_WXSTRING || wxUSE_UNICODE_UTF8
3908 typedef wxStringTypeBuffer<wxChar>        wxStringBuffer;
3909 typedef wxStringTypeBufferLength<wxChar>  wxStringBufferLength;
3910 #else // if !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
3911 typedef wxStringInternalBuffer                wxStringBuffer;
3912 typedef wxStringInternalBufferLength          wxStringBufferLength;
3913 #endif // !wxUSE_STL_BASED_WXSTRING && !wxUSE_UNICODE_UTF8
3914
3915 #if wxUSE_UNICODE_UTF8
3916 typedef wxStringInternalBuffer                wxUTF8StringBuffer;
3917 typedef wxStringInternalBufferLength          wxUTF8StringBufferLength;
3918 #elif wxUSE_UNICODE_WCHAR
3919
3920 WXDLLIMPEXP_TEMPLATE_INSTANCE_BASE( wxStringTypeBufferBase<char> )
3921
3922 // Note about inlined dtors in the classes below: this is done not for
3923 // performance reasons but just to avoid linking errors in the MSVC DLL build
3924 // under Windows: if a class has non-inline methods it must be declared as
3925 // being DLL-exported but, due to an extremely interesting feature of MSVC 7
3926 // and later, any template class which is used as a base of a DLL-exported
3927 // class is implicitly made DLL-exported too, as explained at the bottom of
3928 // http://msdn.microsoft.com/en-us/library/twa2aw10.aspx (just to confirm: yes,
3929 // _inheriting_ from a class can change whether it is being exported from DLL)
3930 //
3931 // But this results in link errors because the base template class is not DLL-
3932 // exported, whether it is declared with WXDLLIMPEXP_BASE or not, because it
3933 // does have only inline functions. So the simplest fix is to just make all the
3934 // functions of these classes inline too.
3935
3936 class wxUTF8StringBuffer : public wxStringTypeBufferBase<char>
3937 {
3938 public:
3939     wxUTF8StringBuffer(wxString& str, size_t lenWanted = 1024)
3940         : wxStringTypeBufferBase<char>(str, lenWanted) {}
3941     ~wxUTF8StringBuffer()
3942     {
3943         wxMBConvStrictUTF8 conv;
3944         size_t wlen = conv.ToWChar(NULL, 0, m_buf);
3945         wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
3946
3947         wxStringInternalBuffer wbuf(m_str, wlen);
3948         conv.ToWChar(wbuf, wlen, m_buf);
3949     }
3950
3951     wxDECLARE_NO_COPY_CLASS(wxUTF8StringBuffer);
3952 };
3953
3954 WXDLLIMPEXP_TEMPLATE_INSTANCE_BASE( wxStringTypeBufferLengthBase<char> )
3955
3956 class wxUTF8StringBufferLength : public wxStringTypeBufferLengthBase<char>
3957 {
3958 public:
3959     wxUTF8StringBufferLength(wxString& str, size_t lenWanted = 1024)
3960         : wxStringTypeBufferLengthBase<char>(str, lenWanted) {}
3961     ~wxUTF8StringBufferLength()
3962     {
3963         wxCHECK_RET(m_lenSet, "length not set");
3964
3965         wxMBConvStrictUTF8 conv;
3966         size_t wlen = conv.ToWChar(NULL, 0, m_buf, m_len);
3967         wxCHECK_RET( wlen != wxCONV_FAILED, "invalid UTF-8 data in string buffer?" );
3968
3969         wxStringInternalBufferLength wbuf(m_str, wlen);
3970         conv.ToWChar(wbuf, wlen, m_buf, m_len);
3971         wbuf.SetLength(wlen);
3972     }
3973
3974     wxDECLARE_NO_COPY_CLASS(wxUTF8StringBufferLength);
3975 };
3976 #endif // wxUSE_UNICODE_UTF8/wxUSE_UNICODE_WCHAR
3977
3978
3979 // ---------------------------------------------------------------------------
3980 // wxString comparison functions: operator versions are always case sensitive
3981 // ---------------------------------------------------------------------------
3982
3983 #define wxCMP_WXCHAR_STRING(p, s, op) 0 op s.Cmp(p)
3984
3985 wxDEFINE_ALL_COMPARISONS(const wxChar *, const wxString&, wxCMP_WXCHAR_STRING)
3986
3987 #undef wxCMP_WXCHAR_STRING
3988
3989 inline bool operator==(const wxString& s1, const wxString& s2)
3990     { return s1.IsSameAs(s2); }
3991 inline bool operator!=(const wxString& s1, const wxString& s2)
3992     { return !s1.IsSameAs(s2); }
3993 inline bool operator< (const wxString& s1, const wxString& s2)
3994     { return s1.Cmp(s2) < 0; }
3995 inline bool operator> (const wxString& s1, const wxString& s2)
3996     { return s1.Cmp(s2) >  0; }
3997 inline bool operator<=(const wxString& s1, const wxString& s2)
3998     { return s1.Cmp(s2) <= 0; }
3999 inline bool operator>=(const wxString& s1, const wxString& s2)
4000     { return s1.Cmp(s2) >= 0; }
4001
4002 inline bool operator==(const wxString& s1, const wxCStrData& s2)
4003     { return s1 == s2.AsString(); }
4004 inline bool operator==(const wxCStrData& s1, const wxString& s2)
4005     { return s1.AsString() == s2; }
4006 inline bool operator!=(const wxString& s1, const wxCStrData& s2)
4007     { return s1 != s2.AsString(); }
4008 inline bool operator!=(const wxCStrData& s1, const wxString& s2)
4009     { return s1.AsString() != s2; }
4010
4011 inline bool operator==(const wxString& s1, const wxScopedWCharBuffer& s2)
4012     { return (s1.Cmp((const wchar_t *)s2) == 0); }
4013 inline bool operator==(const wxScopedWCharBuffer& s1, const wxString& s2)
4014     { return (s2.Cmp((const wchar_t *)s1) == 0); }
4015 inline bool operator!=(const wxString& s1, const wxScopedWCharBuffer& s2)
4016     { return (s1.Cmp((const wchar_t *)s2) != 0); }
4017 inline bool operator!=(const wxScopedWCharBuffer& s1, const wxString& s2)
4018     { return (s2.Cmp((const wchar_t *)s1) != 0); }
4019
4020 inline bool operator==(const wxString& s1, const wxScopedCharBuffer& s2)
4021     { return (s1.Cmp((const char *)s2) == 0); }
4022 inline bool operator==(const wxScopedCharBuffer& s1, const wxString& s2)
4023     { return (s2.Cmp((const char *)s1) == 0); }
4024 inline bool operator!=(const wxString& s1, const wxScopedCharBuffer& s2)
4025     { return (s1.Cmp((const char *)s2) != 0); }
4026 inline bool operator!=(const wxScopedCharBuffer& s1, const wxString& s2)
4027     { return (s2.Cmp((const char *)s1) != 0); }
4028
4029 inline wxString operator+(const wxString& string, const wxScopedWCharBuffer& buf)
4030     { return string + (const wchar_t *)buf; }
4031 inline wxString operator+(const wxScopedWCharBuffer& buf, const wxString& string)
4032     { return (const wchar_t *)buf + string; }
4033
4034 inline wxString operator+(const wxString& string, const wxScopedCharBuffer& buf)
4035     { return string + (const char *)buf; }
4036 inline wxString operator+(const wxScopedCharBuffer& buf, const wxString& string)
4037     { return (const char *)buf + string; }
4038
4039 // comparison with char
4040 inline bool operator==(const wxUniChar& c, const wxString& s) { return s.IsSameAs(c); }
4041 inline bool operator==(const wxUniCharRef& c, const wxString& s) { return s.IsSameAs(c); }
4042 inline bool operator==(char c, const wxString& s) { return s.IsSameAs(c); }
4043 inline bool operator==(wchar_t c, const wxString& s) { return s.IsSameAs(c); }
4044 inline bool operator==(int c, const wxString& s) { return s.IsSameAs(c); }
4045 inline bool operator==(const wxString& s, const wxUniChar& c) { return s.IsSameAs(c); }
4046 inline bool operator==(const wxString& s, const wxUniCharRef& c) { return s.IsSameAs(c); }
4047 inline bool operator==(const wxString& s, char c) { return s.IsSameAs(c); }
4048 inline bool operator==(const wxString& s, wchar_t c) { return s.IsSameAs(c); }
4049 inline bool operator!=(const wxUniChar& c, const wxString& s) { return !s.IsSameAs(c); }
4050 inline bool operator!=(const wxUniCharRef& c, const wxString& s) { return !s.IsSameAs(c); }
4051 inline bool operator!=(char c, const wxString& s) { return !s.IsSameAs(c); }
4052 inline bool operator!=(wchar_t c, const wxString& s) { return !s.IsSameAs(c); }
4053 inline bool operator!=(int c, const wxString& s) { return !s.IsSameAs(c); }
4054 inline bool operator!=(const wxString& s, const wxUniChar& c) { return !s.IsSameAs(c); }
4055 inline bool operator!=(const wxString& s, const wxUniCharRef& c) { return !s.IsSameAs(c); }
4056 inline bool operator!=(const wxString& s, char c) { return !s.IsSameAs(c); }
4057 inline bool operator!=(const wxString& s, wchar_t c) { return !s.IsSameAs(c); }
4058
4059
4060 // wxString iterators comparisons
4061 inline bool wxString::iterator::operator==(const const_iterator& i) const
4062     { return i == *this; }
4063 inline bool wxString::iterator::operator!=(const const_iterator& i) const
4064     { return i != *this; }
4065 inline bool wxString::iterator::operator<(const const_iterator& i) const
4066     { return i > *this; }
4067 inline bool wxString::iterator::operator>(const const_iterator& i) const
4068     { return i < *this; }
4069 inline bool wxString::iterator::operator<=(const const_iterator& i) const
4070     { return i >= *this; }
4071 inline bool wxString::iterator::operator>=(const const_iterator& i) const
4072     { return i <= *this; }
4073
4074 // comparison with C string in Unicode build
4075 #if wxUSE_UNICODE
4076
4077 #define wxCMP_CHAR_STRING(p, s, op) wxString(p) op s
4078
4079 wxDEFINE_ALL_COMPARISONS(const char *, const wxString&, wxCMP_CHAR_STRING)
4080
4081 #undef wxCMP_CHAR_STRING
4082
4083 #endif // wxUSE_UNICODE
4084
4085 // we also need to provide the operators for comparison with wxCStrData to
4086 // resolve ambiguity between operator(const wxChar *,const wxString &) and
4087 // operator(const wxChar *, const wxChar *) for "p == s.c_str()"
4088 //
4089 // notice that these are (shallow) pointer comparisons, not (deep) string ones
4090 #define wxCMP_CHAR_CSTRDATA(p, s, op) p op s.AsChar()
4091 #define wxCMP_WCHAR_CSTRDATA(p, s, op) p op s.AsWChar()
4092
4093 wxDEFINE_ALL_COMPARISONS(const wchar_t *, const wxCStrData&, wxCMP_WCHAR_CSTRDATA)
4094 wxDEFINE_ALL_COMPARISONS(const char *, const wxCStrData&, wxCMP_CHAR_CSTRDATA)
4095
4096 #undef wxCMP_CHAR_CSTRDATA
4097 #undef wxCMP_WCHAR_CSTRDATA
4098
4099 // ---------------------------------------------------------------------------
4100 // Implementation only from here until the end of file
4101 // ---------------------------------------------------------------------------
4102
4103 #if wxUSE_STD_IOSTREAM
4104
4105 #include "wx/iosfwrap.h"
4106
4107 WXDLLIMPEXP_BASE wxSTD ostream& operator<<(wxSTD ostream&, const wxString&);
4108 WXDLLIMPEXP_BASE wxSTD ostream& operator<<(wxSTD ostream&, const wxCStrData&);
4109 WXDLLIMPEXP_BASE wxSTD ostream& operator<<(wxSTD ostream&, const wxScopedCharBuffer&);
4110 #ifndef __BORLANDC__
4111 WXDLLIMPEXP_BASE wxSTD ostream& operator<<(wxSTD ostream&, const wxScopedWCharBuffer&);
4112 #endif
4113
4114 #if wxUSE_UNICODE && defined(HAVE_WOSTREAM)
4115
4116 WXDLLIMPEXP_BASE wxSTD wostream& operator<<(wxSTD wostream&, const wxString&);
4117 WXDLLIMPEXP_BASE wxSTD wostream& operator<<(wxSTD wostream&, const wxCStrData&);
4118 WXDLLIMPEXP_BASE wxSTD wostream& operator<<(wxSTD wostream&, const wxScopedWCharBuffer&);
4119
4120 #endif  // wxUSE_UNICODE && defined(HAVE_WOSTREAM)
4121
4122 #endif  // wxUSE_STD_IOSTREAM
4123
4124 // ---------------------------------------------------------------------------
4125 // wxCStrData implementation
4126 // ---------------------------------------------------------------------------
4127
4128 inline wxCStrData::wxCStrData(char *buf)
4129     : m_str(new wxString(buf)), m_offset(0), m_owned(true) {}
4130 inline wxCStrData::wxCStrData(wchar_t *buf)
4131     : m_str(new wxString(buf)), m_offset(0), m_owned(true) {}
4132
4133 inline wxCStrData::wxCStrData(const wxCStrData& data)
4134     : m_str(data.m_owned ? new wxString(*data.m_str) : data.m_str),
4135       m_offset(data.m_offset),
4136       m_owned(data.m_owned)
4137 {
4138 }
4139
4140 inline wxCStrData::~wxCStrData()
4141 {
4142     if ( m_owned )
4143         delete const_cast<wxString*>(m_str); // cast to silence warnings
4144 }
4145
4146 // AsChar() and AsWChar() implementations simply forward to wxString methods
4147
4148 inline const wchar_t* wxCStrData::AsWChar() const
4149 {
4150     const wchar_t * const p =
4151 #if wxUSE_UNICODE_WCHAR
4152         m_str->wc_str();
4153 #elif wxUSE_UNICODE_UTF8
4154         m_str->AsWChar(wxMBConvStrictUTF8());
4155 #else
4156         m_str->AsWChar(wxConvLibc);
4157 #endif
4158
4159     // in Unicode build the string always has a valid Unicode representation
4160     // and even if a conversion is needed (as in UTF8 case) it can't fail
4161     //
4162     // but in ANSI build the string contents might be not convertible to
4163     // Unicode using the current locale encoding so we do need to check for
4164     // errors
4165 #if !wxUSE_UNICODE
4166     if ( !p )
4167     {
4168         // if conversion fails, return empty string and not NULL to avoid
4169         // crashes in code written with either wxWidgets 2 wxString or
4170         // std::string behaviour in mind: neither of them ever returns NULL
4171         // from its c_str() and so we shouldn't neither
4172         //
4173         // notice that the same is done in AsChar() below and
4174         // wxString::wc_str() and mb_str() for the same reasons
4175         return L"";
4176     }
4177 #endif // !wxUSE_UNICODE
4178
4179     return p + m_offset;
4180 }
4181
4182 inline const char* wxCStrData::AsChar() const
4183 {
4184 #if wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
4185     const char * const p = m_str->AsChar(wxConvLibc);
4186     if ( !p )
4187         return "";
4188 #else // !wxUSE_UNICODE || wxUSE_UTF8_LOCALE_ONLY
4189     const char * const p = m_str->mb_str();
4190 #endif // wxUSE_UNICODE && !wxUSE_UTF8_LOCALE_ONLY
4191
4192     return p + m_offset;
4193 }
4194
4195 inline wxString wxCStrData::AsString() const
4196 {
4197     if ( m_offset == 0 )
4198         return *m_str;
4199     else
4200         return m_str->Mid(m_offset);
4201 }
4202
4203 inline const wxStringCharType *wxCStrData::AsInternal() const
4204 {
4205 #if wxUSE_UNICODE_UTF8
4206     return wxStringOperations::AddToIter(m_str->wx_str(), m_offset);
4207 #else
4208     return m_str->wx_str() + m_offset;
4209 #endif
4210 }
4211
4212 inline wxUniChar wxCStrData::operator*() const
4213 {
4214     if ( m_str->empty() )
4215         return wxUniChar(wxT('\0'));
4216     else
4217         return (*m_str)[m_offset];
4218 }
4219
4220 inline wxUniChar wxCStrData::operator[](size_t n) const
4221 {
4222     // NB: we intentionally use operator[] and not at() here because the former
4223     //     works for the terminating NUL while the latter does not
4224     return (*m_str)[m_offset + n];
4225 }
4226
4227 // ----------------------------------------------------------------------------
4228 // more wxCStrData operators
4229 // ----------------------------------------------------------------------------
4230
4231 // we need to define those to allow "size_t pos = p - s.c_str()" where p is
4232 // some pointer into the string
4233 inline size_t operator-(const char *p, const wxCStrData& cs)
4234 {
4235     return p - cs.AsChar();
4236 }
4237
4238 inline size_t operator-(const wchar_t *p, const wxCStrData& cs)
4239 {
4240     return p - cs.AsWChar();
4241 }
4242
4243 // ----------------------------------------------------------------------------
4244 // implementation of wx[W]CharBuffer inline methods using wxCStrData
4245 // ----------------------------------------------------------------------------
4246
4247 // FIXME-UTF8: move this to buffer.h
4248 inline wxCharBuffer::wxCharBuffer(const wxCStrData& cstr)
4249                     : wxCharTypeBufferBase(cstr.AsCharBuf())
4250 {
4251 }
4252
4253 inline wxWCharBuffer::wxWCharBuffer(const wxCStrData& cstr)
4254                     : wxCharTypeBufferBase(cstr.AsWCharBuf())
4255 {
4256 }
4257
4258 #if wxUSE_UNICODE_UTF8
4259 // ----------------------------------------------------------------------------
4260 // implementation of wxStringIteratorNode inline methods
4261 // ----------------------------------------------------------------------------
4262
4263 void wxStringIteratorNode::DoSet(const wxString *str,
4264                                  wxStringImpl::const_iterator *citer,
4265                                  wxStringImpl::iterator *iter)
4266 {
4267     m_prev = NULL;
4268     m_iter = iter;
4269     m_citer = citer;
4270     m_str = str;
4271     if ( str )
4272     {
4273         m_next = str->m_iterators.ptr;
4274         const_cast<wxString*>(m_str)->m_iterators.ptr = this;
4275         if ( m_next )
4276             m_next->m_prev = this;
4277     }
4278     else
4279     {
4280         m_next = NULL;
4281     }
4282 }
4283
4284 void wxStringIteratorNode::clear()
4285 {
4286     if ( m_next )
4287         m_next->m_prev = m_prev;
4288     if ( m_prev )
4289         m_prev->m_next = m_next;
4290     else if ( m_str ) // first in the list
4291         const_cast<wxString*>(m_str)->m_iterators.ptr = m_next;
4292
4293     m_next = m_prev = NULL;
4294     m_citer = NULL;
4295     m_iter = NULL;
4296     m_str = NULL;
4297 }
4298 #endif // wxUSE_UNICODE_UTF8
4299
4300 #if WXWIN_COMPATIBILITY_2_8
4301     // lot of code out there doesn't explicitly include wx/crt.h, but uses
4302     // CRT wrappers that are now declared in wx/wxcrt.h and wx/wxcrtvararg.h,
4303     // so let's include this header now that wxString is defined and it's safe
4304     // to do it:
4305     #include "wx/crt.h"
4306 #endif
4307
4308 // ----------------------------------------------------------------------------
4309 // Checks on wxString characters
4310 // ----------------------------------------------------------------------------
4311
4312 template<bool (T)(const wxUniChar& c)>
4313     inline bool wxStringCheck(const wxString& val)
4314     {
4315         for ( wxString::const_iterator i = val.begin();
4316               i != val.end();
4317               ++i )
4318             if (T(*i) == 0)
4319                 return false;
4320         return true;
4321     }
4322
4323 #endif  // _WX_WXSTRING_H_