Initial review of various [q-r] by Utensil Candel.

[wxWidgets.git] / interface / string.h
diff --git a/interface/string.h b/interface/string.h

index 4441581d90fec3ff91b9d64f8d9bb2d6bd5bf227..038722cac1449fbab5297ef72155112cfa4abe04 100644 (file)
--- a/interface/string.h
+++ b/interface/string.h
@@ -72,60 +72,256 @@ public:
      @class wxString
      @wxheader{string.h}
  
-    wxString is a class representing a character string. Please see the
-    @ref overview_wxstringoverview "wxString overview" for more information about
-    it.
-
-    As explained there, wxString implements most of the methods of the std::string
-    class.
-    These standard functions are not documented in this manual, please see the
-    STL documentation).
-    The behaviour of all these functions is identical to the behaviour described
-    there.
-
-    You may notice that wxString sometimes has many functions which do the same
-    thing like, for example, wxString::Length,
-    wxString::Len and @c length() which all return the string
-    length. In all cases of such duplication the @c std::string-compatible
-    method (@c length() in this case, always the lowercase version) should be
+    wxString is a class representing a character string. It uses 
+    reference counting and copy-on-write internally and is not
+    thread-safe. Please see the 
+    @ref overview_string "wxString overview" and the 
+    @ref overview_unicode "Unicode overview" for more information
+    about it.
+    
+    Since wxWidgets 3.0 wxString internally uses UCS-2 (basically 2-byte per
+    character wchar_t) under Windows and UTF-8 under Unix, Linux and
+    OS X to store its content. Much work has been done to make
+    existing code using ANSI string literals work as before.
+
+    wxString implements most of the methods of the
+    std::string class. These standard functions are not documented in
+    this manual, please see the STL documentation. The behaviour of
+    all these functions is identical to the behaviour described there.
+
+    You may notice that wxString sometimes has many functions which do
+    the same thing like, for example, wxString::Length, wxString::Len and @c length()
+    which all return the string length. In all cases of such duplication the @c std::string
+    compatible method (@c length() in this case, always the lowercase version) should be
      used as it will ensure smoother transition to @c std::string when wxWidgets
      starts using it instead of wxString.
  
+        Anything may be concatenated (appended to) with a string. However, you can't
+        append something to a C string (including literal constants), so to do this it
+        should be converted to a wxString first.
+        
+        @li @ref operatorout() "operator "
+        @li operator+=()
+        @li operator+()
+        @li Append()
+        @li Prepend()
+
+        A string may be constructed either from a C string, (some number of copies of)
+        a single character or a wide (UNICODE) string. For all constructors (except the
+        default which creates an empty string) there is also a corresponding assignment
+        operator.
+        
+        @li wxString()
+        @li operator=()
+        @li ~wxString
+
+        The MakeXXX() variants modify the string in place, while the other functions
+        return a new string which contains the original text converted to the upper or
+        lower case and leave the original string unchanged.
+        
+        @li MakeUpper()
+        @li Upper()
+        @li MakeLower()
+        @li Lower()
+
+
+        Many functions in this section take a character index in the string. As with C
+        strings and/or arrays, the indices start from 0, so the first character of a
+        string is string[0]. Attempt to access a character beyond the end of the
+        string (which may be even 0 if the string is empty) will provoke an assert
+        failure in @ref overview_debugging "debug build", but no checks are
+        done in release builds.
+        This section also contains both implicit and explicit conversions to C style
+        strings. Although implicit conversion is quite convenient, it is advised to use
+        explicit c_str() method for the sake of clarity. 
+        
+        @li GetChar()
+        @li GetWritableChar()
+        @li SetChar()
+        @li Last()
+        @li operator[]
+        @li c_str()
+        @li mb_str()
+        @li wc_str()
+        @li fn_str()
+        @li operator const char*()
+
+        The default comparison function Cmp() is case-sensitive and
+        so is the default version of IsSameAs(). For case
+        insensitive comparisons you should use CmpNoCase() or
+        give a second parameter to IsSameAs. This last function is may be more
+        convenient if only equality of the strings matters because it returns a boolean
+        @true value if the strings are the same and not 0 (which is usually @false
+        in C)as Cmp() does.
+        Matches() is a poor man's regular expression matcher: it only understands 
+        '*' and '?' metacharacters in the sense of DOS command line interpreter.
+        StartsWith() is helpful when parsing a line of text which should start
+        with some predefined prefix and is more efficient than doing direct string
+        comparison as you would also have to precalculate the length of the prefix then.
+        
+        @li Cmp()
+        @li CmpNoCase()
+        @li IsSameAs()
+        @li Matches()
+        @li StartsWith()
+        @li EndsWith()
+
+        The string provides functions for conversion to signed and unsigned integer and
+        floating point numbers. All three functions take a pointer to the variable to
+        put the numeric value in and return @true if the @b entire string could be
+        converted to a number.
+        
+        @li ToLong()
+        @li ToLongLong()
+        @li ToULong()
+        @li ToULongLong()
+        @li ToDouble()
+
+        These are "advanced" functions and they will be needed quite rarely.
+        Alloc() and Shrink() are only interesting for optimization purposes.
+        wxStringBuffer and wxStringBufferLength classes may be very useful
+        when working with some external API which requires the caller to provide
+        a writable buffer.
+        
+        @li Alloc()
+        @li Shrink()
+        @li wxStringBuffer
+        @li wxStringBufferLength
+
+        Misc. other string functions.
+        
+        @li Trim()
+        @li Truncate()
+        @li Pad()
+
+        These functions return the string length and check whether the string
+        is empty or empty it.
+        
+        @li Len()
+        @li IsEmpty()
+        @li operator!()
+        @li Empty()
+        @li Clear()
+
+
+        These functions allow to extract substring from this string. All of them don't
+        modify the original string and return a new string containing the extracted
+        substring.
+        
+        @li Mid()
+        @li operator()()
+        @li Left()
+        @li Right()
+        @li BeforeFirst()
+        @li BeforeLast()
+        @li AfterFirst()
+        @li AfterLast()
+        @li StartsWith()
+        @li EndsWith()
+
+        These functions replace the standard @e strchr() and @e strstr()
+        functions.
+        
+        @li Find()
+        @li Replace()
+
+        Both formatted versions (Printf/() and stream-like insertion operators
+        exist (for basic types only). Additionally, the Format() function allows
+        to use simply append formatted value to a string:
+
+        @li Format()
+        @li FormatV()
+        @li Printf()
+        @li PrintfV()
+        @li operator>>()
+
+        These functions are deprecated, please consider using new wxWidgets 2.0
+        functions instead of them (or, even better, std::string compatible variants).
+        
+        CompareTo(), Contains(), First(), Freq(), Index(), IsAscii(), IsNull(),
+        IsNumber(), IsWord(), Last(), Length(), LowerCase(), Remove(), Strip(),
+        SubString(), UpperCase()
+
      @library{wxbase}
      @category{data}
  
      @stdobjects
      ::Objects:, ::wxEmptyString,
  
-    @see @ref overview_wxstringoverview "wxString overview", @ref overview_unicode
+    @see @ref overview_string "wxString overview", @ref overview_unicode
      "Unicode overview"
  */
  class wxString
  {
  public:
-    //@{
      /**
-        Initializes the string from first @a nLength characters of C string.
-        The default value of @c wxSTRING_MAXLEN means take all the string.
-        In Unicode build, @e conv's
-        wxMBConv::MB2WC method is called to
-        convert @a psz to wide string (the default converter uses current locale's
-        charset). It is ignored in ANSI build.
-
-        @see @ref overview_mbconvclasses "wxMBConv classes", @ref mbstr()
-             mb_str, @ref wcstr() wc_str
+       Default constructor
      */
      wxString();
-    wxString(const wxString& x);
-    wxString(wxChar ch, size_t n = 1);
-    wxString(const wxChar* psz, size_t nLength = wxSTRING_MAXLEN);
-    wxString(const unsigned char* psz,
-             size_t nLength = wxSTRING_MAXLEN);
-    wxString(const wchar_t* psz, const wxMBConv& conv,
-             size_t nLength = wxSTRING_MAXLEN);
-    wxString(const char* psz, const wxMBConv& conv = wxConvLibc,
-             size_t nLength = wxSTRING_MAXLEN);
-    //@}
+    
+    /**
+       Creates a string from another string. Just increases the ref 
+       count by 1.
+    */
+    wxString(const wxString& stringSrc);
+    
+
+    /**
+       Constructs a string from the string literal @c psz using
+       the current locale encoding to convert it to Unicode.
+    */
+    wxString(const char *psz);
+
+    /**
+       Constructs a string from the string literal @c psz using
+       @c conv to convert it Unicode.
+    */
+    wxString(const char *psz, const wxMBConv& conv);
+
+    /**
+       Constructs a string from the first @ nLength character of the string literal @c psz using
+       the current locale encoding to convert it to Unicode.
+    */
+    wxString(const char *psz, size_t nLength);
+
+    /**
+       Constructs a string from the first @ nLength character of the string literal @c psz using
+       @c conv to convert it Unicode.
+    */
+    wxString(const char *psz, const wxMBConv& conv, size_t nLength);
+
+    /**
+       Constructs a string from the string literal @c pwz.
+    */
+    wxString(const wchar_t *pwz);
+
+    /**
+       Constructs a string from the first @ nLength characters of the string literal @c pwz.
+    */
+    wxString(const wchar_t *pwz, size_t nLength);
+
+    /**
+       Constructs a string from @c buf using the using
+       the current locale encoding to convert it to Unicode.
+    */
+    wxString(const wxCharBuffer& buf);
+    
+    /**
+       Constructs a string from @c buf.
+    */
+    wxString(const wxWCharBuffer& buf);
+
+    /**
+       Constructs a string from @str using the using
+       the current locale encoding to convert it to Unicode.
+    */
+    wxString(const std::string& str);
+    
+    /**
+       Constructs a string from @str.
+    */
+    wxString(const std::wstring& str);
+    
  
      /**
          String destructor. Note that this is not virtual, so wxString must not be
@@ -146,18 +342,43 @@ public:
      wxString AfterLast(wxChar ch) const;
  
      /**
-        Preallocate enough space for wxString to store @a nLen characters. This function
-        may be used to increase speed when the string is constructed by repeated
-        concatenation as in
+        Preallocate enough space for wxString to store @a nLen characters.
+
+        Please note that this method does the same thing as the standard
+        reserve() one and shouldn't be used in new code.
+
+        This function may be used to increase speed when the string is
+        constructed by repeated concatenation as in
  
-        because it will avoid the need to reallocate string memory many times (in case
-        of long strings). Note that it does not set the maximal length of a string - it
-        will still expand if more than @a nLen characters are stored in it. Also, it
-        does not truncate the existing string (use
-        Truncate() for this) even if its current length is
-        greater than @e nLen
+        @code
+            // delete all vowels from the string
+            wxString DeleteAllVowels(const wxString& original)
+            {
+                wxString result;
+
+                size_t len = original.length();
+
+                result.Alloc(len);
+
+                for ( size_t n = 0; n < len; n++ )
+                {
+                    if ( strchr("aeuio", tolower(original[n])) == NULL )
+                        result += original[n];
+                }
+
+                return result;
+            }
+        @endcode
+
+        because it will avoid the need to reallocate string memory many times
+        (in case of long strings). Note that it does not set the maximal length
+        of a string -- it will still expand if more than @a nLen characters are
+        stored in it. Also, it does not truncate the existing string (use
+        Truncate() for this) even if its current length is greater than @a nLen.
+
+        @return @true if memory was successfully allocated, @false otherwise.
      */
-    void Alloc(size_t nLen);
+    bool Alloc(size_t nLen);
  
      //@{
      /**
@@ -181,60 +402,26 @@ public:
      */
      wxString BeforeLast(wxChar ch) const;
  
-    /**
-        The MakeXXX() variants modify the string in place, while the other functions
-        return a new string which contains the original text converted to the upper or
-        lower case and leave the original string unchanged.
-        MakeUpper()
-
-        Upper()
  
-        MakeLower()
-
-        Lower()
+    /**
+        Empties the string and frees memory occupied by it.
+        See also: Empty()
      */
-
+    void Clear();
  
      /**
-        Many functions in this section take a character index in the string. As with C
-        strings and/or arrays, the indices start from 0, so the first character of a
-        string is string[0]. Attempt to access a character beyond the end of the
-        string (which may be even 0 if the string is empty) will provoke an assert
-        failure in @ref overview_debuggingoverview "debug build", but no checks are
-        done in
-        release builds.
-        This section also contains both implicit and explicit conversions to C style
-        strings. Although implicit conversion is quite convenient, it is advised to use
-        explicit @ref cstr() c_str method for the sake of clarity. Also
-        see overview() for the cases where it is necessary to
-        use it.
-        GetChar()
-
-        GetWritableChar()
-
-        SetChar()
-
-        Last()
+        Returns a deep copy of the string.
  
-        @ref operatorbracket() "operator []"
+        That is, the returned string is guaranteed to not share data with this
+        string when using reference-counted wxString implementation.
  
-        @ref cstr() c_str
+        This method is primarily useful for passing strings between threads
+        (because wxString is not thread-safe). Unlike creating a copy using
+        @c wxString(c_str()), Clone() handles embedded NULs correctly.
  
-        @ref mbstr() mb_str
-
-        @ref wcstr() wc_str
-
-        @ref fnstr() fn_str
-
-        @ref operatorconstcharpt() "operator const char*"
-    */
-
-
-    /**
-        Empties the string and frees memory occupied by it.
-        See also: Empty()
-    */
-    void Clear();
+        @since 2.9.0
+     */
+    wxString Clone() const;
  
      //@{
      /**
@@ -268,34 +455,6 @@ public:
      */
      int CompareTo(const wxChar* psz, caseCompare cmp = exact) const;
  
-    /**
-        The default comparison function Cmp() is case-sensitive and
-        so is the default version of IsSameAs(). For case
-        insensitive comparisons you should use CmpNoCase() or
-        give a second parameter to IsSameAs. This last function is may be more
-        convenient if only equality of the strings matters because it returns a boolean
-        @true value if the strings are the same and not 0 (which is usually @false in
-        C)
-        as @c Cmp() does.
-        Matches() is a poor man's regular expression matcher:
-        it only understands '*' and '?' metacharacters in the sense of DOS command line
-        interpreter.
-        StartsWith() is helpful when parsing a line of
-        text which should start with some predefined prefix and is more efficient than
-        doing direct string comparison as you would also have to precalculate the
-        length of the prefix then.
-        Cmp()
-
-        CmpNoCase()
-
-        IsSameAs()
-
-        Matches()
-
-        StartsWith()
-
-        EndsWith()
-    */
  
  
      //@{
@@ -316,34 +475,6 @@ public:
      bool operator =(const wxString& x, const wxChar* t);
      //@}
  
-    /**
-        Anything may be concatenated (appended to) with a string. However, you can't
-        append something to a C string (including literal constants), so to do this it
-        should be converted to a wxString first.
-        @ref operatorout() "operator "
-
-        @ref plusequal() "operator +="
-
-        @ref operatorplus() "operator +"
-
-        Append()
-
-        Prepend()
-    */
-
-
-    /**
-        A string may be constructed either from a C string, (some number of copies of)
-        a single character or a wide (UNICODE) string. For all constructors (except the
-        default which creates an empty string) there is also a corresponding assignment
-        operator.
-        @ref construct() wxString
-
-        @ref operatorassign() "operator ="
-
-        @ref destruct() ~wxString
-    */
-
  
      /**
          Returns @true if target appears anywhere in wxString; else @false.
@@ -352,22 +483,6 @@ public:
      */
      bool Contains(const wxString& str) const;
  
-    /**
-        The string provides functions for conversion to signed and unsigned integer and
-        floating point numbers. All three functions take a pointer to the variable to
-        put the numeric value in and return @true if the @b entire string could be
-        converted to a number.
-        ToLong()
-
-        ToLongLong()
-
-        ToULong()
-
-        ToULongLong()
-
-        ToDouble()
-    */
-
  
      /**
          Makes the string empty, but doesn't free memory occupied by the string.
@@ -434,7 +549,7 @@ public:
          without @a len parameter takes NUL-terminated data.
          This is a convenience method useful when storing binary data in wxString.
  
-        @wxsince{2.8.4}
+        @since 2.8.4
  
          @see wxString::To8BitData
      */
@@ -605,44 +720,16 @@ public:
      */
      bool Matches(const wxString& mask) const;
  
-    /**
-        These are "advanced" functions and they will be needed quite rarely.
-        Alloc() and Shrink() are only
-        interesting for optimization purposes.
-        wxStringBuffer
-        and wxStringBufferLength classes may be very
-        useful when working with some external API which requires the caller to provide
-        a writable buffer.
-        Alloc()
-
-        Shrink()
-
-        wxStringBuffer
-
-        wxStringBufferLength
-    */
-
-
      /**
          Returns a substring starting at @e first, with length @e count, or the rest of
          the string if @a count is the default value.
      */
      wxString Mid(size_t first, size_t count = wxSTRING_MAXLEN) const;
  
-    /**
-        Other string functions.
-        Trim()
-
-        Truncate()
-
-        Pad()
-    */
-
  
      /**
-        Adds @a count copies of @a pad to the beginning, or to the end of the string
-        (the default).
-        Removes spaces from the left or from the right (default).
+        Adds @a count copies of @a pad to the beginning, or to the end of the
+        string (the default).  Removes spaces from the left or from the right (default).
      */
      wxString Pad(size_t count, wxChar pad = ' ',
                   bool fromRight = true);
@@ -658,7 +745,7 @@ public:
          Note that if @c wxUSE_PRINTF_POS_PARAMS is set to 1, then this function supports
          Unix98-style positional parameters:
  
-        @b NB: This function will use a safe version of @e vsprintf() (usually called
+        @note This function will use a safe version of @e vsprintf() (usually called
          @e vsnprintf()) whenever available to always allocate the buffer of correct
          size. Unfortunately, this function is not available on all platforms and the
          dangerous @e vsprintf() will be used then which may lead to buffer overflows.
@@ -700,15 +787,6 @@ public:
      */
      wxString Right(size_t count) const;
  
-    /**
-        These functions replace the standard @e strchr() and @e strstr()
-        functions.
-        Find()
-
-        Replace()
-    */
-
-
      /**
          Sets the character at position @e n.
      */
@@ -729,21 +807,6 @@ public:
      */
      bool StartsWith(const wxString& prefix, wxString rest = NULL) const;
  
-    /**
-        These functions return the string length and check whether the string is empty
-        or empty it.
-        Len()
-
-        IsEmpty()
-
-        @ref operatornot() operator!
-
-        Empty()
-
-        Clear()
-    */
-
-
      /**
          Strip characters at the front and/or end. The same as Trim except that it
          doesn't change this string.
@@ -760,39 +823,13 @@ public:
      */
      wxString SubString(size_t from, size_t to) const;
  
-    /**
-        These functions allow to extract substring from this string. All of them don't
-        modify the original string and return a new string containing the extracted
-        substring.
-        Mid()
-
-        @ref operatorparenth() operator
-
-        Left()
-
-        Right()
-
-        BeforeFirst()
-
-        BeforeLast()
-
-        AfterFirst()
-
-        AfterLast()
-
-        StartsWith()
-
-        EndsWith()
-    */
-
-
      //@{
      /**
          Converts the string to an 8-bit string in ISO-8859-1 encoding in the form of
          a wxCharBuffer (Unicode builds only).
          This is a convenience method useful when storing binary data in wxString.
  
-        @wxsince{2.8.4}
+        @since 2.8.4
  
          @see wxString::From8BitData
      */
@@ -925,34 +962,11 @@ public:
      void UpperCase();
  
      /**
-        Both formatted versions (wxString::Printf) and stream-like
-        insertion operators exist (for basic types only). Additionally, the
-        Format() function allows to use simply append
-        formatted value to a string:
-
-        Format()
-
-        FormatV()
-
-        Printf()
-
-        PrintfV()
-
-        @ref operatorout() "operator "
-    */
-
-
-    /**
-        Returns a pointer to the string data (@c const char* in ANSI build,
-        @c const wchar_t* in Unicode build).
+        Returns a pointer to the string data (@c const char* when using UTF-8
+        internally, @c const wchar_t* when using UCS-2 internally).
          Note that the returned value is not convertible to @c char* or
-        @c wchar_t*, use @ref charstr() char_str or
-        @ref wcharstr() wchar_string if you need to pass string value
-        to a function expecting non-const pointer.
-
-        @see @ref mbstr() mb_str, @ref wcstr() wc_str, @ref
-             fnstr() fn_str, @ref charstr() char_str, @ref
-             wcharstr() wchar_string
+        @c wchar_t*, use char_str() or wchar_str() if you need to pass
+        string value to a function expecting non-const pointer.
      */
      const wxChar* c_str() const;
  
@@ -960,24 +974,17 @@ public:
          Returns an object with string data that is implicitly convertible to
          @c char* pointer. Note that any change to the returned buffer is lost and so
          this function is only usable for passing strings to legacy libraries that
-        don't have const-correct API. Use wxStringBuffer if
-        you want to modify the string.
+        don't have const-correct API. Use wxStringBuffer if you want to modify
+        the string.
  
-        @see @ref mbstr() mb_str, @ref wcstr() wc_str, @ref
-             fnstr() fn_str, @ref cstr() c_str, @ref
-             wcharstr() wchar_str
+        @see c_str()
      */
      wxWritableCharBuffer char_str(const wxMBConv& conv = wxConvLibc) const;
  
      //@{
      /**
          Returns string representation suitable for passing to OS' functions for
-        file handling. In ANSI build, this is same as @ref cstr() c_str.
-        In Unicode build, returned value can be either wide character string
-        or C string in charset matching the @c wxConvFileName object, depending on
-        the OS.
-
-        @see wxMBConv, @ref wcstr() wc_str, @ref wcstr() mb_str
+        file handling. 
      */
      const wchar_t* fn_str() const;
      const const char* fn_str() const;
@@ -992,8 +999,7 @@ public:
          as @ref cstr() c_str.
          The macro wxWX2MBbuf is defined as the correct return type (without const).
  
-        @see wxMBConv, @ref cstr() c_str, @ref wcstr() wc_str, @ref
-             fnstr() fn_str, @ref charstr() char_str
+        @see wxMBConv, c_str(), wc_str(), fn_str(), char_str()
      */
      const char* mb_str(const wxMBConv& conv = wxConvLibc) const;
      const const wxCharBuffer mb_str(const wxMBConv& conv = wxConvLibc) const;
@@ -1008,8 +1014,7 @@ public:
      /**
          These functions work as C++ stream insertion operators: they insert the given
          value into the string. Precision or format cannot be set using them, you can
-        use
-        Printf() for this.
+        use Printf() for this.
      */
      wxString operator(const wxString& str);
      wxString operator(const wxChar* psz);
@@ -1098,13 +1103,11 @@ public:
      //@{
      /**
          Returns wide character representation of the string.
-        In ANSI build, converts using @e conv's wxMBConv::cMB2WC
-        method and returns wxWCharBuffer. In Unicode build, this function is same
-        as @ref cstr() c_str.
-        The macro wxWX2WCbuf is defined as the correct return type (without const).
+        In Unicode build, this function is same as c_str().
+        The macro wxWX2WCbuf is defined as the correct return
+        type (without const).
  
-        @see wxMBConv, @ref cstr() c_str, @ref wcstr() mb_str, @ref
-             fnstr() fn_str, @ref wcharstr() wchar_str
+        @see wxMBConv, c_str(), mb_str(), fn_str(), wchar_str()
      */
      const wchar_t* wc_str(const wxMBConv& conv) const;
      const const wxWCharBuffer wc_str(const wxMBConv& conv) const;
@@ -1117,47 +1120,10 @@ public:
          passing strings to legacy libraries that don't have const-correct API. Use
          wxStringBuffer if you want to modify the string.
  
-        @see @ref mbstr() mb_str, @ref wcstr() wc_str, @ref
-             fnstr() fn_str, @ref cstr() c_str, @ref
-             charstr() char_str
+        @see mb_str(), wc_str(), fn_str(), c_str(), char_str()
      */
      wxWritableWCharBuffer wchar_str() const;
  
-    /**
-        These functions are deprecated, please consider using new wxWidgets 2.0
-        functions instead of them (or, even better, std::string compatible variants).
-        CompareTo()
-
-        Contains()
-
-        First()
-
-        Freq()
-
-        Index()
-
-        IsAscii()
-
-        IsNull()
-
-        IsNumber()
-
-        IsWord()
-
-        Last()
-
-        Length()
-
-        LowerCase()
-
-        Remove()
-
-        Strip()
-
-        SubString()
-
-        UpperCase()
-    */
  };