Doc and comment cleanup, fixes, tweaks

[wxWidgets.git] / interface / wx / string.h
diff --git a/interface/wx/string.h b/interface/wx/string.h

index 5daa5d3493237747dfbf63f8fcb7ca046e470acd..dd0c7d7d2e3408623c10948cf65ed5ac6355f926 100644 (file)
--- a/interface/wx/string.h
+++ b/interface/wx/string.h
@@ -9,7 +9,7 @@
  /**
      @class wxStringBuffer
  
-    This tiny class allows to conveniently access the wxString
+    This tiny class allows you to conveniently access the wxString
      internal buffer as a writable pointer without any risk of forgetting to restore
      the string to the usable state later.
  
@@ -26,20 +26,15 @@
          }
      @endcode
  
-    Note that the exact usage of this depends on whether on not wxUSE_STL is
-    enabled.  If
-    wxUSE_STL is enabled, wxStringBuffer creates a separate empty character buffer,
-    and
-    if wxUSE_STL is disabled, it uses GetWriteBuf() from wxString, keeping the same
-    buffer
-    wxString uses intact.  In other words, relying on wxStringBuffer containing the
-    old
-    wxString data is probably not a good idea if you want to build your program in
-    both
-    with and without wxUSE_STL.
+    Note that the exact usage of this depends on whether or not wxUSE_STL is
+    enabled. If wxUSE_STL is enabled, wxStringBuffer creates a separate empty
+    character buffer, and if wxUSE_STL is disabled, it uses GetWriteBuf() from
+    wxString, keeping the same buffer wxString uses intact. In other words,
+    relying on wxStringBuffer containing the old wxString data is not a good
+    idea if you want to build your program both with and without wxUSE_STL.
  
      @library{wxbase}
-    @category{FIXME}
+    @category{data}
  */
  class wxStringBuffer
  {
@@ -70,24 +65,41 @@ public:
  /**
      @class wxString
  
+    The wxString class has been completely rewritten for wxWidgets 3.0
+    and this change was actually the main reason for the calling that
+    version wxWidgets 3.0.
+
      wxString is a class representing a Unicode character string.
      wxString uses @c std::string internally to store its content
      unless this is not supported by the compiler or disabled
-    specifically when building wxWidgets. Therefore wxString
-    inherits many features from @c std::string's. Most
-    implementations of @std::string are thread-safe and don't
-    use reference counting. By default, wxString uses @c std::string
-    internally even if wxUSE_STL is not defined.
-
-    Since wxWidgets 3.0 wxString internally uses UCS-2 (basically 2-byte per
-    character wchar_t) under Windows and UTF-8 under Unix, Linux and
-    OS X to store its content. Much work has been done to make existing
-    code using ANSI string literals work as before. If you need to have a
-    wxString that uses wchar_t on Unix and Linux, too, you can specify
-    this on the command line with the @c configure @c --disable-utf8 switch.
-
-    As a consequence of this change, iterating over a wxString by index
-    can become inefficient in UTF8 mode and iterators should be used instead:
+    specifically when building wxWidgets and it therefore inherits
+    many features from @c std::string. Most implementations of
+    @c std::string are thread-safe and don't use reference counting.
+    By default, wxString uses @c std::string internally even if
+    wxUSE_STL is not defined.
+
+    wxString now internally uses UTF-16 under Windows and UTF-8 under
+    Unix, Linux and OS X to store its content. Note that when iterating
+    over a UTF-16 string under Windows, the user code has to take care
+    of surrogate pair handling whereas Windows itself has built-in
+    support pairs in UTF-16, such as for drawing strings on screen.
+    
+    Much work has been done to make existing code using ANSI string literals
+    work as before. If you nonetheless need to have a wxString that uses wchar_t
+    on Unix and Linux, too, you can specify this on the command line with the
+    @c configure @c --disable-utf8 switch or you can consider using wxUString
+    or std::wstring instead.
+
+    Accessing a UTF-8 string by index can be very inefficient because
+    a single character is represented by a variable number of bytes so that
+    the entire string has to be parsed in order to find the character.
+    Since iterating over a string by index is a common programming technique and
+    was also possible and encouraged by wxString using the access operator[]()
+    wxString implements caching of the last used index so that iterating over
+    a string is a linear operation even in UTF-8 mode.
+    
+    It is nonetheless recommended to use iterators (instead of index based
+    access) like this:
  
      @code
      wxString s = "hello";
@@ -116,14 +128,16 @@ public:
      described there.
  
      You may notice that wxString sometimes has several functions which do
-    the same thing like, for example, Length(), Len() and length() which
+    the same thing like Length(), Len() and length() which
      all return the string length. In all cases of such duplication the
      @c std::string compatible method should be used.
  
-        Anything may be concatenated (appended to) with a string. However, you can't
-        append something to a C string (including literal constants), so to do this it
-        should be converted to a wxString first.
+    Anything may be concatenated (appended to) with a string. However, you can't
+    append something to a C string (including literal constants), so to do this it
+    should be converted to a wxString first.
  
+        @li insert()
+        @li append()
          @li operator<<()
          @li operator+=()
          @li operator+()
@@ -131,13 +145,14 @@ public:
          @li Prepend()
  
          A string may be constructed either from a C string, (some number of copies of)
-        a single character or a wide (UNICODE) string. For all constructors (except the
+        a single character or a wide (Unicode) string. For all constructors (except the
          default which creates an empty string) there is also a corresponding assignment
          operator.
  
          @li wxString()
          @li operator=()
          @li ~wxString()
+        @li assign()
  
          The MakeXXX() variants modify the string in place, while the other functions
          return a new string which contains the original text converted to the upper or
@@ -147,40 +162,44 @@ public:
          @li Upper()
          @li MakeLower()
          @li Lower()
+        @li MakeCapitalized()
+        @li Capitalize()
  
-        Many functions in this section take a character index in the string. As with C
-        strings and/or arrays, the indices start from 0, so the first character of a
-        string is string[0]. Attempt to access a character beyond the end of the
-        string (which may be even 0 if the string is empty) will provoke an assert
+        Many functions below take a character index in the string. As with C
+        strings and arrays, the indices start from 0, so the first character of a
+        string is string[0]. An attempt to access a character beyond the end of the
+        string (which may even be 0 if the string is empty) will provoke an assert
          failure in @ref overview_debugging "debug build", but no checks are
          done in release builds.
          This section also contains both implicit and explicit conversions to C style
-        strings. Although implicit conversion is quite convenient, it is advised to use
-        explicit c_str() method for the sake of clarity.
+        strings. Although implicit conversion is quite convenient, you are advised
+        to use wc_str() for the sake of clarity.
  
          @li GetChar()
          @li GetWritableChar()
          @li SetChar()
          @li Last()
          @li operator[]()
+        @li wc_str()
+        @li utf8_str()
          @li c_str()
+        @li wx_str()
          @li mb_str()
-        @li wc_str()
          @li fn_str()
  
-        The default comparison function Cmp() is case-sensitive and
-        so is the default version of IsSameAs(). For case
-        insensitive comparisons you should use CmpNoCase() or
-        give a second parameter to IsSameAs. This last function is may be more
+        The default comparison function Cmp() is case-sensitive and so is the default
+        version of IsSameAs(). For case insensitive comparisons you should use CmpNoCase()
+        or give a second parameter to IsSameAs(). This last function is maybe more
          convenient if only equality of the strings matters because it returns a boolean
          @true value if the strings are the same and not 0 (which is usually @false
-        in C)as Cmp() does.
+        in C) as Cmp() does.
          Matches() is a poor man's regular expression matcher: it only understands
          '*' and '?' metacharacters in the sense of DOS command line interpreter.
          StartsWith() is helpful when parsing a line of text which should start
          with some predefined prefix and is more efficient than doing direct string
-        comparison as you would also have to precalculate the length of the prefix then.
+        comparison as you would also have to precalculate the length of the prefix.
  
+        @li compare()
          @li Cmp()
          @li CmpNoCase()
          @li IsSameAs()
@@ -189,7 +208,7 @@ public:
          @li EndsWith()
  
          The string provides functions for conversion to signed and unsigned integer and
-        floating point numbers. All three functions take a pointer to the variable to
+        floating point numbers. All functions take a pointer to the variable to
          put the numeric value in and return @true if the @b entire string could be
          converted to a number.
  
@@ -199,37 +218,41 @@ public:
          @li ToULongLong()
          @li ToDouble()
  
-        These are "advanced" functions and they will be needed quite rarely.
+        The following are "advanced" functions and they will be needed rarely.
          Alloc() and Shrink() are only interesting for optimization purposes.
          wxStringBuffer and wxStringBufferLength classes may be very useful
          when working with some external API which requires the caller to provide
          a writable buffer.
  
+        @li reserve()
+        @li resize()
          @li Alloc()
          @li Shrink()
          @li wxStringBuffer
          @li wxStringBufferLength
  
-        Misc. other string functions.
+        Miscellaneous other string functions.
  
          @li Trim()
          @li Truncate()
          @li Pad()
  
          These functions return the string length and check whether the string
-        is empty or empty it.
+        is empty or they empty it.
  
+        @li length()
+        @li size()
          @li Len()
          @li IsEmpty()
          @li operator!()
          @li Empty()
          @li Clear()
  
-
-        These functions allow to extract substring from this string. All of them don't
-        modify the original string and return a new string containing the extracted
+        These functions allow you to extract a substring from the string. The
+        original string is not modified and the function returns the extracted
          substring.
  
+        @li substr()
          @li Mid()
          @li operator()()
          @li Left()
@@ -244,12 +267,15 @@ public:
          These functions replace the standard @e strchr() and @e strstr()
          functions.
  
+        @li find()
+        @li rfind()
+        @li replace()
          @li Find()
          @li Replace()
  
          Both formatted versions (Printf/() and stream-like insertion operators
          exist (for basic types only). Additionally, the Format() function allows
-        to use simply append formatted value to a string:
+        you to simply append a formatted value to a string:
  
          @li Format()
          @li FormatV()
@@ -257,8 +283,8 @@ public:
          @li PrintfV()
          @li operator>>()
  
-        These functions are deprecated, please consider using new wxWidgets 2.0
-        functions instead of them (or, even better, std::string compatible variants).
+        The following functions are deprecated. Please consider using new wxWidgets 2.0
+        functions instead (or, even better, @c std::string compatible variants).
  
          Contains(), First(), Freq(), IsAscii(), IsNull(),
          IsNumber(), IsWord(), Last(), Length(), LowerCase(), Remove(), Strip(),
@@ -268,10 +294,10 @@ public:
      @category{data}
  
      @stdobjects
-    ::Objects:, ::wxEmptyString,
+    ::Objects, ::wxEmptyString,
  
      @see @ref overview_string "wxString overview", @ref overview_unicode
-    "Unicode overview"
+    "Unicode overview", wxUString
  */
  class wxString
  {
@@ -433,7 +459,7 @@ public:
      /**
         Appends the string literal @e psz with max length @e nLen.
      */
-    wxString& Append(const char* psz, size_t nLen);
+    wxString& Append(const wchar_t* pwz);
  
      /**
         Appends the wide string literal @e psz with max length @e nLen.
@@ -443,7 +469,7 @@ public:
      /**
         Appends the string @e s.
      */
-    wxString &Append(const wxString &s);
+    wxString& Append(const wchar_t* pwz, size_t nLen);
  
      /**
         Appends the character @e ch @e count times.
@@ -463,6 +489,16 @@ public:
      wxString BeforeLast(wxUniChar ch) const;
  
  
+    /**
+        Return the copy of the string with the first string character in the
+        upper case and the subsequent ones in the lower case.
+
+        @since 2.9.0
+
+        @see MakeCapitalized()
+     */
+    wxString Capitalize() const;
+
      /**
          Empties the string and frees memory occupied by it.
          See also: Empty()
@@ -626,14 +662,39 @@ public:
      //@{
      /**
          Converts C string encoded in UTF-8 to wxString.
-        Note that this method assumes that @a s is a valid UTF-8 sequence and
-        doesn't do any validation in release builds, it's validity is only checked in
-        debug builds.
+
+        If @a s is not a valid UTF-8 string, an empty string is returned.
+
+        Notice that when using UTF-8 wxWidgets build there is a more efficient
+        alternative to this function called FromUTF8Unchecked() which, unlike
+        this one, doesn't check that the input string is valid.
+
+        @since 2.8.4
      */
      static wxString FromUTF8(const char* s);
      static wxString FromUTF8(const char* s, size_t len);
      //@}
  
+    //@{
+    /**
+        Converts C string encoded in UTF-8 to wxString without checking its
+        validity.
+
+        This method assumes that @a s is a valid UTF-8 sequence and doesn't do
+        any validation (although an assert failure is triggered in debug builds
+        if the string is invalid). Only use it if you are absolutely sure that
+        @a s is a correct UTF-8 string (e.g. because it comes from another
+        library using UTF-8) and if the performance matters, otherwise use
+        slower (in UTF-8 build) but safer FromUTF8(). Passing a bad UTF-8
+        string to this function will result in creating a corrupted wxString
+        and all the subsequent operations on it will be undefined.
+
+        @since 2.8.9
+    */
+    static wxString FromUTF8Unchecked(const char* s);
+    static wxString FromUTF8Unchecked(const char* s, size_t len);
+    //@}
+
      /**
          Returns the character at position @a n (read-only).
      */
@@ -734,6 +795,8 @@ public:
  
      /**
          Returns this string converted to the lower case.
+
+        @see MakeLower()
      */
      wxString Lower() const;
  
@@ -745,12 +808,28 @@ public:
      void LowerCase();
  
      /**
-        Converts all characters to lower case and returns the result.
+        Converts the first characters of the string to the upper case and all
+        the subsequent ones to the lower case and returns the result.
+
+        @since 2.9.0
+
+        @see Capitalize()
+    */
+    wxString& MakeCapitalized();
+
+    /**
+        Converts all characters to lower case and returns the reference to the
+        modified string.
+
+        @see Lower()
      */
      wxString& MakeLower();
  
      /**
-        Converts all characters to upper case and returns the result.
+        Converts all characters to upper case and returns the reference to the
+        modified string.
+
+        @see Upper()
      */
      wxString& MakeUpper();
  
@@ -884,7 +963,7 @@ public:
          Converts the string to an ASCII, 7-bit string in the form of
          a wxCharBuffer (Unicode builds only) or a C string (ANSI builds).
          Note that this conversion only works if the string contains only ASCII
-        characters. The @ref mbstr() mb_str method provides more
+        characters. The @ref mb_str() "mb_str" method provides more
          powerful means of converting wxString to C string.
      */
      const char* ToAscii() const;
@@ -958,7 +1037,7 @@ public:
          Same as utf8_str().
      */
      const char* ToUTF8() const;
-    const wxCharBuffer ToUF8() const;
+    const wxCharBuffer ToUTF8() const;
      //@}
  
      /**
@@ -993,6 +1072,8 @@ public:
  
      /**
          Returns this string converted to upper case.
+
+        @see MakeUpper()
      */
      wxString Upper() const;
  
@@ -1004,12 +1085,19 @@ public:
      void UpperCase();
  
      /**
-        Returns a pointer to the string data (@c const char* when using UTF-8
-        internally, @c const wchar_t* when using UCS-2 internally).
+        Returns a lightweight intermediate class which is in turn implicitly
+        convertible to both @c const @c char* and to @c const @c wchar_t*.
+        Given this ambiguity it is mostly better to use wc_str(), mb_str() or
+        utf8_str() instead.
+
+        Please see the @ref overview_unicode "Unicode overview" for more
+        information about it.
  
          Note that the returned value is not convertible to @c char* or
          @c wchar_t*, use char_str() or wchar_str() if you need to pass
          string value to a function expecting non-const pointer.
+
+        @see wc_str(), utf8_str(), c_str(), mb_str(), fn_str()
      */
      const wxCStrData c_str() const;
  
@@ -1057,37 +1145,43 @@ public:
      const wxCharBuffer fn_str() const;
      //@}
  
-    //@{
      /**
-        Returns multibyte (C string) representation of the string.
-        In Unicode build, converts using @e conv's wxMBConv::cWC2MB
-        method and returns wxCharBuffer. In ANSI build, this function
-        is same as c_str().
-        The macro wxWX2MBbuf is defined as the correct return type (without const).
+        Returns the multibyte (C string) representation of the string
+        using @e conv's wxMBConv::cWC2MB method and returns wxCharBuffer.
  
-        @see wxMBConv, c_str(), wc_str(), fn_str(), char_str()
+        @see wc_str(), utf8_str(), c_str(), wxMBConv
      */
-    const char* mb_str(const wxMBConv& conv = wxConvLibc) const;
      const wxCharBuffer mb_str(const wxMBConv& conv = wxConvLibc) const;
-    //@}
  
      /**
          Extraction from a stream.
      */
-    friend istream operator(istream& is, wxString& str);
-
-    //@{
-    /**
-        These functions work as C++ stream insertion operators: they insert the given
-        value into the string. Precision or format cannot be set using them, you can
-        use Printf() for this.
-    */
-    wxString operator(const wxString& str);
-    wxString operator(wxUniChar ch);
-    wxString operator(int i);
-    wxString operator(float f);
-    wxString operator(double d);
-    //@}
+    friend istream operator>>(istream& is, wxString& str);
+
+    /**
+        These functions work as C++ stream insertion operators. They insert the
+        given value into the string. Precision and format cannot be set using them.
+        Use Printf() instead.
+    */
+    wxString& operator<<(const wxString& s);
+    wxString& operator<<(const char* psz)
+    wxString& operator<<(const wchar_t* pwz)
+    wxString& operator<<(const wxCStrData& psz)
+    wxString& operator<<(const char* psz);
+    wxString& operator<<(wxUniCharRef ch)
+    wxString& operator<<(char ch)
+    wxString& operator<<(unsigned char ch)
+    wxString& operator<<(wchar_t ch)
+    wxString& operator<<(const wxCharBuffer& s)
+    wxString& operator<<(const wxWCharBuffer& s)
+    wxString& operator<<(wxUniCharRef ch);
+    wxString& operator<<(unsigned int ui);
+    wxString& operator<<(long l);
+    wxString& operator<<(unsigned long ul);
+    wxString& operator<<(wxLongLong_t ll);
+    wxString& operator<<(wxULongLong_t ul);
+    wxString& operator<<(float f);
+    wxString& operator<<(double d);
  
      /**
          Same as Mid (substring extraction).
@@ -1114,7 +1208,7 @@ public:
      //@{
      /**
          Assignment: the effect of each operation is the same as for the corresponding
-        constructor (see @ref construct() "wxString constructors").
+        constructor (see @ref wxString() "wxString constructors").
      */
      wxString operator =(const wxString& str);
      wxString operator =(wxUniChar c);
@@ -1142,6 +1236,8 @@ public:
          Converts the strings contents to UTF-8 and returns it either as a
          temporary wxCharBuffer object or as a pointer to the internal
          string contents in UTF-8 build.
+
+        @see wc_str(), c_str(), mb_str()
      */
      const char* utf8_str() const;
      const wxCharBuffer utf8_str() const;
@@ -1150,13 +1246,14 @@ public:
      //@{
      /**
          Converts the strings contents to the wide character represention
-        and returns it as a temporary wxWCharBuffer object or returns a
-        pointer to the internal string contents in wide character mode.
+        and returns it as a temporary wxWCharBuffer object (Unix and OS X)
+        or returns a pointer to the internal string contents in wide character
+        mode (Windows).
  
          The macro wxWX2WCbuf is defined as the correct return
          type (without const).
  
-        @see wxMBConv, c_str(), mb_str(), fn_str(), wchar_str()
+        @see utf8_str(), c_str(), mb_str(), fn_str(), wchar_str()
      */
      const wchar_t* wc_str() const;
      const wxWCharBuffer wc_str() const;
@@ -1173,6 +1270,13 @@ public:
      */
      wxWritableWCharBuffer wchar_str() const;
  
+    /**
+       Explicit conversion to C string in the internal representation (either
+       wchar_t* or UTF-8-encoded char*, depending on the build).
+    */
+    const wxStringCharType *wx_str() const;
+
+
      /**
          @name Iterator interface
  
@@ -1303,7 +1407,7 @@ wxString wxEmptyString;
  /**
      @class wxStringBufferLength
  
-    This tiny class allows to conveniently access the wxString
+    This tiny class allows you to conveniently access the wxString
      internal buffer as a writable pointer without any risk of forgetting to restore
      the string to the usable state later, and allows the user to set the internal
      length of the string.
@@ -1324,22 +1428,17 @@ wxString wxEmptyString;
          }
      @endcode
  
-    Note that the exact usage of this depends on whether on not wxUSE_STL is
-    enabled.  If
-    wxUSE_STL is enabled, wxStringBuffer creates a separate empty character buffer,
-    and
-    if wxUSE_STL is disabled, it uses GetWriteBuf() from wxString, keeping the same
-    buffer
-    wxString uses intact.  In other words, relying on wxStringBuffer containing the
-    old
-    wxString data is probably not a good idea if you want to build your program in
-    both
-    with and without wxUSE_STL.
+    Note that the exact usage of this depends on whether or not wxUSE_STL is
+    enabled. If wxUSE_STL is enabled, wxStringBuffer creates a separate empty
+    character buffer, and if wxUSE_STL is disabled, it uses GetWriteBuf() from
+    wxString, keeping the same buffer wxString uses intact. In other words,
+    relying on wxStringBuffer containing the old wxString data is not a good
+    idea if you want to build your program both with and without wxUSE_STL.
  
      Note that SetLength @c must be called before wxStringBufferLength destructs.
  
      @library{wxbase}
-    @category{FIXME}
+    @category{data}
  */
  class wxStringBufferLength
  {