X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/bcc8c9033a0694a5fb69619c55555edc6c4902c8..e25cd7750c571191ce69438de5f7c0293d1b5270:/interface/wx/string.h

diff --git a/interface/wx/string.h b/interface/wx/string.h
index dd452dab8b..69044521b3 100644
--- a/interface/wx/string.h
+++ b/interface/wx/string.h
@@ -27,10 +27,10 @@
     @endcode
 
     Note that the exact usage of this depends on whether or not wxUSE_STL is
-    enabled. If wxUSE_STL is enabled, wxStringBuffer creates a separate empty 
+    enabled. If wxUSE_STL is enabled, wxStringBuffer creates a separate empty
     character buffer, and if wxUSE_STL is disabled, it uses GetWriteBuf() from
-    wxString, keeping the same buffer wxString uses intact. In other words, 
-    relying on wxStringBuffer containing the old wxString data is not a good 
+    wxString, keeping the same buffer wxString uses intact. In other words,
+    relying on wxStringBuffer containing the old wxString data is not a good
     idea if you want to build your program both with and without wxUSE_STL.
 
     @library{wxbase}
@@ -65,23 +65,41 @@ public:
 /**
     @class wxString
 
+    The wxString class has been completely rewritten for wxWidgets 3.0
+    and this change was actually the main reason for the calling that
+    version wxWidgets 3.0.
+
     wxString is a class representing a Unicode character string.
     wxString uses @c std::string internally to store its content
     unless this is not supported by the compiler or disabled
-    specifically when building wxWidgets. Therefore wxString
-    inherits many features from @c std::string. Most
-    implementations of @c std::string are thread-safe and don't
-    use reference counting. By default, wxString uses @c std::string
-    internally even if wxUSE_STL is not defined.
-
-    wxWidgets 3.0 wxString internally uses UCS-2 (basically 2-byte per
-    character wchar_t) under Windows and UTF-8 under Unix, Linux and
-    OS X to store its content. Much work has been done to make existing
-    code using ANSI string literals work as before. If you need to have a
-    wxString that uses wchar_t on Unix and Linux, too, you can specify
-    this on the command line with the @c configure @c --disable-utf8 switch.
-    As a consequence of this change, iterating over a wxString by index
-    can become inefficient in UTF8 mode and iterators should be used instead:
+    specifically when building wxWidgets and it therefore inherits
+    many features from @c std::string. Most implementations of
+    @c std::string are thread-safe and don't use reference counting.
+    By default, wxString uses @c std::string internally even if
+    wxUSE_STL is not defined.
+
+    wxString now internally uses UTF-16 under Windows and UTF-8 under
+    Unix, Linux and OS X to store its content. Note that when iterating
+    over a UTF-16 string under Windows, the user code has to take care
+    of surrogate pair handling whereas Windows itself has built-in
+    support pairs in UTF-16, such as for drawing strings on screen.
+    
+    Much work has been done to make existing code using ANSI string literals
+    work as before. If you nonetheless need to have a wxString that uses wchar_t
+    on Unix and Linux, too, you can specify this on the command line with the
+    @c configure @c --disable-utf8 switch or you can consider using wxUString
+    or std::wstring instead.
+
+    Accessing a UTF-8 string by index can be very inefficient because
+    a single character is represented by a variable number of bytes so that
+    the entire string has to be parsed in order to find the character.
+    Since iterating over a string by index is a common programming technique and
+    was also possible and encouraged by wxString using the access operator[]()
+    wxString implements caching of the last used index so that iterating over
+    a string is a linear operation even in UTF-8 mode.
+    
+    It is nonetheless recommended to use iterators (instead of index based
+    access) like this:
 
     @code
     wxString s = "hello";
@@ -118,6 +136,8 @@ public:
     append something to a C string (including literal constants), so to do this it
     should be converted to a wxString first.
 
+        @li insert()
+        @li append()
         @li operator<<()
         @li operator+=()
         @li operator+()
@@ -132,6 +152,7 @@ public:
         @li wxString()
         @li operator=()
         @li ~wxString()
+        @li assign()
 
         The MakeXXX() variants modify the string in place, while the other functions
         return a new string which contains the original text converted to the upper or
@@ -141,6 +162,8 @@ public:
         @li Upper()
         @li MakeLower()
         @li Lower()
+        @li MakeCapitalized()
+        @li Capitalize()
 
         Many functions below take a character index in the string. As with C
         strings and arrays, the indices start from 0, so the first character of a
@@ -149,8 +172,8 @@ public:
         failure in @ref overview_debugging "debug build", but no checks are
         done in release builds.
         This section also contains both implicit and explicit conversions to C style
-        strings. Although implicit conversion is quite convenient, you are advised 
-        to use explicit wc_str() method for the sake of clarity.
+        strings. Although implicit conversion is quite convenient, you are advised
+        to use wc_str() for the sake of clarity.
 
         @li GetChar()
         @li GetWritableChar()
@@ -164,10 +187,9 @@ public:
         @li mb_str()
         @li fn_str()
 
-        The default comparison function Cmp() is case-sensitive and
-        so is the default version of IsSameAs(). For case
-        insensitive comparisons you should use CmpNoCase() or
-        give a second parameter to IsSameAs. This last function is may be more
+        The default comparison function Cmp() is case-sensitive and so is the default
+        version of IsSameAs(). For case insensitive comparisons you should use CmpNoCase()
+        or give a second parameter to IsSameAs(). This last function is maybe more
         convenient if only equality of the strings matters because it returns a boolean
         @true value if the strings are the same and not 0 (which is usually @false
         in C) as Cmp() does.
@@ -177,6 +199,7 @@ public:
         with some predefined prefix and is more efficient than doing direct string
         comparison as you would also have to precalculate the length of the prefix.
 
+        @li compare()
         @li Cmp()
         @li CmpNoCase()
         @li IsSameAs()
@@ -201,6 +224,8 @@ public:
         when working with some external API which requires the caller to provide
         a writable buffer.
 
+        @li reserve()
+        @li resize()
         @li Alloc()
         @li Shrink()
         @li wxStringBuffer
@@ -215,16 +240,19 @@ public:
         These functions return the string length and check whether the string
         is empty or they empty it.
 
+        @li length()
+        @li size()
         @li Len()
         @li IsEmpty()
         @li operator!()
         @li Empty()
         @li Clear()
 
-        These functions allow you to extract a substring from the string. The 
+        These functions allow you to extract a substring from the string. The
         original string is not modified and the function returns the extracted
         substring.
 
+        @li substr()
         @li Mid()
         @li operator()()
         @li Left()
@@ -239,6 +267,9 @@ public:
         These functions replace the standard @e strchr() and @e strstr()
         functions.
 
+        @li find()
+        @li rfind()
+        @li replace()
         @li Find()
         @li Replace()
 
@@ -266,7 +297,7 @@ public:
     ::Objects, ::wxEmptyString,
 
     @see @ref overview_string "wxString overview", @ref overview_unicode
-    "Unicode overview"
+    "Unicode overview", wxUString
 */
 class wxString
 {
@@ -458,6 +489,16 @@ public:
     wxString BeforeLast(wxUniChar ch) const;
 
 
+    /**
+        Return the copy of the string with the first string character in the
+        upper case and the subsequent ones in the lower case.
+
+        @since 2.9.0
+
+        @see MakeCapitalized()
+     */
+    wxString Capitalize() const;
+
     /**
         Empties the string and frees memory occupied by it.
         See also: Empty()
@@ -621,14 +662,39 @@ public:
     //@{
     /**
         Converts C string encoded in UTF-8 to wxString.
-        Note that this method assumes that @a s is a valid UTF-8 sequence and
-        doesn't do any validation in release builds, it's validity is only checked in
-        debug builds.
+
+        If @a s is not a valid UTF-8 string, an empty string is returned.
+
+        Notice that when using UTF-8 wxWidgets build there is a more efficient
+        alternative to this function called FromUTF8Unchecked() which, unlike
+        this one, doesn't check that the input string is valid.
+
+        @since 2.8.4
     */
     static wxString FromUTF8(const char* s);
     static wxString FromUTF8(const char* s, size_t len);
     //@}
 
+    //@{
+    /**
+        Converts C string encoded in UTF-8 to wxString without checking its
+        validity.
+
+        This method assumes that @a s is a valid UTF-8 sequence and doesn't do
+        any validation (although an assert failure is triggered in debug builds
+        if the string is invalid). Only use it if you are absolutely sure that
+        @a s is a correct UTF-8 string (e.g. because it comes from another
+        library using UTF-8) and if the performance matters, otherwise use
+        slower (in UTF-8 build) but safer FromUTF8(). Passing a bad UTF-8
+        string to this function will result in creating a corrupted wxString
+        and all the subsequent operations on it will be undefined.
+
+        @since 2.8.9
+    */
+    static wxString FromUTF8Unchecked(const char* s);
+    static wxString FromUTF8Unchecked(const char* s, size_t len);
+    //@}
+
     /**
         Returns the character at position @a n (read-only).
     */
@@ -729,6 +795,8 @@ public:
 
     /**
         Returns this string converted to the lower case.
+
+        @see MakeLower()
     */
     wxString Lower() const;
 
@@ -740,12 +808,28 @@ public:
     void LowerCase();
 
     /**
-        Converts all characters to lower case and returns the result.
+        Converts the first characters of the string to the upper case and all
+        the subsequent ones to the lower case and returns the result.
+
+        @since 2.9.0
+
+        @see Capitalize()
+    */
+    wxString& MakeCapitalized();
+
+    /**
+        Converts all characters to lower case and returns the reference to the
+        modified string.
+
+        @see Lower()
     */
     wxString& MakeLower();
 
     /**
-        Converts all characters to upper case and returns the result.
+        Converts all characters to upper case and returns the reference to the
+        modified string.
+
+        @see Upper()
     */
     wxString& MakeUpper();
 
@@ -953,7 +1037,7 @@ public:
         Same as utf8_str().
     */
     const char* ToUTF8() const;
-    const wxCharBuffer ToUF8() const;
+    const wxCharBuffer ToUTF8() const;
     //@}
 
     /**
@@ -988,6 +1072,8 @@ public:
 
     /**
         Returns this string converted to upper case.
+
+        @see MakeUpper()
     */
     wxString Upper() const;
 
@@ -1003,14 +1089,14 @@ public:
         convertible to both @c const @c char* and to @c const @c wchar_t*.
         Given this ambiguity it is mostly better to use wc_str(), mb_str() or
         utf8_str() instead.
-        
+
         Please see the @ref overview_unicode "Unicode overview" for more
         information about it.
-        
+
         Note that the returned value is not convertible to @c char* or
         @c wchar_t*, use char_str() or wchar_str() if you need to pass
         string value to a function expecting non-const pointer.
-        
+
         @see wc_str(), utf8_str(), c_str(), mb_str(), fn_str()
     */
     const wxCStrData c_str() const;
@@ -1061,8 +1147,8 @@ public:
 
     /**
         Returns the multibyte (C string) representation of the string
-        using @e conv's wxMBConv::cWC2MB method and returns wxCharBuffer. 
-        
+        using @e conv's wxMBConv::cWC2MB method and returns wxCharBuffer.
+
         @see wc_str(), utf8_str(), c_str(), wxMBConv
     */
     const wxCharBuffer mb_str(const wxMBConv& conv = wxConvLibc) const;
@@ -1150,7 +1236,7 @@ public:
         Converts the strings contents to UTF-8 and returns it either as a
         temporary wxCharBuffer object or as a pointer to the internal
         string contents in UTF-8 build.
-        
+
         @see wc_str(), c_str(), mb_str()
     */
     const char* utf8_str() const;
@@ -1160,14 +1246,14 @@ public:
     //@{
     /**
         Converts the strings contents to the wide character represention
-        and returns it as a temporary wxWCharBuffer object (Unix and OS X) 
+        and returns it as a temporary wxWCharBuffer object (Unix and OS X)
         or returns a pointer to the internal string contents in wide character
         mode (Windows).
 
         The macro wxWX2WCbuf is defined as the correct return
         type (without const).
 
-        @see uf8_str(), c_str(), mb_str(), fn_str(), wchar_str()
+        @see utf8_str(), c_str(), mb_str(), fn_str(), wchar_str()
     */
     const wchar_t* wc_str() const;
     const wxWCharBuffer wc_str() const;
@@ -1184,7 +1270,7 @@ public:
     */
     wxWritableWCharBuffer wchar_str() const;
 
-    /** 
+    /**
        Explicit conversion to C string in the internal representation (either
        wchar_t* or UTF-8-encoded char*, depending on the build).
     */
@@ -1343,10 +1429,10 @@ wxString wxEmptyString;
     @endcode
 
     Note that the exact usage of this depends on whether or not wxUSE_STL is
-    enabled. If wxUSE_STL is enabled, wxStringBuffer creates a separate empty 
+    enabled. If wxUSE_STL is enabled, wxStringBuffer creates a separate empty
     character buffer, and if wxUSE_STL is disabled, it uses GetWriteBuf() from
-    wxString, keeping the same buffer wxString uses intact. In other words, 
-    relying on wxStringBuffer containing the old wxString data is not a good 
+    wxString, keeping the same buffer wxString uses intact. In other words,
+    relying on wxStringBuffer containing the old wxString data is not a good
     idea if you want to build your program both with and without wxUSE_STL.
 
     Note that SetLength @c must be called before wxStringBufferLength destructs.