X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/a660d684eda27638bca0384b2058911a31c8e845..12a44087e448071f9f108208a0a88e908d543d44:/docs/latex/wx/tstring.tex diff --git a/docs/latex/wx/tstring.tex b/docs/latex/wx/tstring.tex index b9afab0b9f..41b70d34bf 100644 --- a/docs/latex/wx/tstring.tex +++ b/docs/latex/wx/tstring.tex @@ -1,1079 +1,6 @@ \section{wxString overview}\label{wxstringoverview} -Class: \helpref{wxString}{wxstring} +Classes: \helpref{wxString}{wxstring} -Strings are used very frequently in most programs. There is no direct support in -the C++ language for strings. A string class can be useful in many -situations: it not only makes the code shorter and easier to read, it also -provides more security, because we don't have to deal with pointer acrobatics. - -wxString is available in two versions: a cut-down wxWindows, -copyright-free version, and a much more powerful GNU-derived version. The default is the -GNU-derived, fully-featured version, ported and revised by Stefan Hammes. - -For backward compatibility most of the member functions of the original -wxWindows wxString class have been included, except some `dangerous' -functions. - -wxString can be compiled under MSW, UNIX and VMS (see below). The -function names have been capitalized to be consistent with the wxWindows -naming scheme. - -The reasons for not using the GNU string class directly are: - -\begin{itemize}\itemsep=0pt -\item It is not available on all systems (generally speaking, it is available only on some -UNIX systems). -\item We can make changes and extensions to the string class as needed and are not -forced to use `only' the functionality of the GNU string class. -\end{itemize} - -The GNU code comes with certain copyright restrictions. If you can't -live with these, you will need to use the cut-down wxString class -instead, by editing wx\_setup.h and appropriate wxWindows makefiles. - -\subsection{Copyright of the original GNU code portion} - -Copyright (C) 1988, 1991, 1992 Free Software Foundation, Inc. -written by Doug Lea (dl@rocky.oswego.edu) - -This file is part of the GNU C++ Library. This library is free -software; you can redistribute it and/or modify it under the terms of -the GNU Library General Public License as published by the Free -Software Foundation; either version 2 of the License, or (at your -option) any later version. This library is distributed in the hope -that it will be useful, but WITHOUT ANY WARRANTY; without even the -implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR -PURPOSE. See the GNU Library General Public License for more details. -You should have received a copy of the GNU Library General Public -License along with this library; if not, write to the Free Software -Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - -\subsection{Features/Additions/Modifications} - -The wxString class offers many string handling functions and a support for -regular expressions. This gives powerful, easy-to-use pattern-matching functionality. -See below for a discussion of the GNU features of wxString. See also -the header file `wxstrgnu.h' which shows all member functions. - -As stated above, there are extensions to the wxString class. -This includes the including of the `old' wxString class member functions. -Below is a list of the additional member functions: - -\begin{itemize}\itemsep=0pt -\item Access to the internal representation. Should be used with care: -\begin{verbatim} - char* GetData() const; -\end{verbatim} -\item To make a copy of 'this' (only for compatibility): -\begin{verbatim} - wxString Copy() const; -\end{verbatim} -\item For case sensitive and case insensitive comparisons: -\begin{verbatim} - enum caseCompare {exact, ignoreCase}; - int CompareTo(const char* cs, caseCompare cmp = exact) const; - int CompareTo(const wxString& st, caseCompare cmp = exact) const; -\end{verbatim} - -\item For case sensitive and case insensitive containment check: -\begin{verbatim} - Bool Contains(const char* pat, caseCompare cmp = exact) const; - Bool Contains(const wxString& pat, caseCompare cmp = exact) const; -\end{verbatim} - -\item For case sensitive and case insensitive index calculation: -\begin{verbatim} - int Index(const char* pat, int i=0, caseCompare cmp = exact) const; - int Index(const wxString& s, int i=0, caseCompare cmp = exact) const; -\end{verbatim} - -\item For element access in addition to the [] operator: -\begin{verbatim} - char& operator()(int); // Indexing with bounds checking -\end{verbatim} - -\item To put something in front of a string: -\begin{verbatim} - wxString& Prepend(const char*); // Prepend a character string - wxString& Prepend(const wxString& s); - wxString& Prepend(char c, int rep=1); // Prepend c rep times -\end{verbatim} - -\item For concatenation: -\begin{verbatim} - wxString& Append(const char* cs); - wxString& Append(const wxString& s); - wxString& Append(char c, int rep=1); // Append c rep times -\end{verbatim} - -\item To get the first and last occurrence of a char or string: -\begin{verbatim} - int First(char c) const; - int First(const char* cs) const; - int First(const wxString& cs) const; - int Last(char c) const; - int Last(const char* cs) const; - int Last(const wxString& cs) const; -\end{verbatim} - -\item To insert something into a string -\begin{verbatim} - wxString& Insert(int pos, const char*); - wxString& Insert(int pos, const wxString&); -\end{verbatim} - -\item To remove data (in addition to the 'Del' functions): -\begin{verbatim} - wxString& Remove(int pos); // Remove pos to end of string - wxString& Remove(int pos, int n); // Remove n chars starting at pos - wxString& RemoveLast(void); // It removes the last char of a string -\end{verbatim} - -\item To replace data: -\begin{verbatim} - wxString& Replace(int pos, int n, const char*); - wxString& Replace(int pos, int n, const wxString&); -\end{verbatim} - -\item Alternative names for compatibility: -\begin{verbatim} - void LowerCase(); // Change self to lower-case - void UpperCase(); // Change self to upper-case -\end{verbatim} - -\item Edward Zimmermann's additions: -\begin{verbatim} - wxString SubString(int from, int to); -\end{verbatim} - -\item Formatted assignment: -\begin{verbatim} - void sprintf(const char *fmt, ...); -\end{verbatim} - -We do not use the 'sprintf' constructor of the old wxString class anymore, -because with that constructor, every initialisation with a string would -go through sprintf and this is not desirable, because sprintf interprets -some characters. With the above function we can write: - -\begin{verbatim} - wxString msg; msg.sprintf("Processing item %d\n",count); -\end{verbatim} - -\item Strip chars at the front and/or end. -This can be useful for trimming strings: -\begin{verbatim} - enum StripType {leading = 0x1, trailing = 0x2, both = 0x3}; - wxSubString Strip(StripType s=trailing, char c=' '); -\end{verbatim} - -\item Line input: -Besides the stream I/O functions this function can be used for non-standard -formatted I/O with arbitrary line terminators. -\begin{verbatim} - friend int Readline(FILE *f, wxString& x, - char terminator = '\\n', - int discard_terminator = 1); -\end{verbatim} - -\item The GNU wxString class lacks some classification functions: -\begin{verbatim} - int IsAscii() const; - int IsWord() const; - int IsNumber() const; - int IsNull() const; - int IsDefined() const; -\end{verbatim} - -\item The meaning of nil has been changed. A wxString x is only nil, if it -has been declared `wxString x'. In all other cases it is NOT nil. This -seems to me more logical than to let a `wxString x=""' be nil as it -was in the original GNU code. - -\item {\bf IMPORTANT:} -the following is a very, very, very ugly macro, but it makes things more -transparent in cases, where a library function requires a -(char*) argument. This is especially the case in wxWindows, -where most char-arguments are (char*) and not (const char*). -this macro should only be used in such cases and NOT to -modify the internal data. The standard type conversion function -of wxString returns a '(const char*)'. -The conventional way would be 'function((char*)string.Chars())'. -With the macro this can be achieved by 'function(wxCHARARG(string))'. -Whis makes it clearer that the usage should be confined -to arguments. See below for examples. - -\begin{verbatim} -#define wxCHARARG(s) ((char*)(s).Chars()) -\end{verbatim} - -\end{itemize} - -\subsection{Function calls} - -When using wxString objects as parameters to other functions you should -note the following: - -\begin{verbatim} -void f1(const char *s){} -void f2(char *s){} - -main(){ - wxString aString; - f1(aString); // ok - f2(aString); // error - f2(wxCHARARG(aString)); // ok - printf("%s",aString); // NO compilation error, but a runtime error. - printf("%s",aString.Chars()); // ok - printf("%s",wxCHARARG(aString)); // ok -} -\end{verbatim} - -\subsection{Header files} - -For DOS and UNIX we use a stub-headerfile {\tt include/base/wxstring.h}\rtfsp -which includes the two headerfiles in the {\tt contrib/wxstring} directory, -namely {\tt contrib/wxstring/wxstrgnu.h} and {\tt contrib/wxstring/wxregex.h}. -If there is a headerfile {\tt contrib/wxstring/wxstring.h}, please -delete it. It will cause problems in the VMS compilation. - -For VMS we have to do an addition due to the not very intelligent inclusion mechanism -of the VMS C++ compiler: -In the VMS-Makefile, the include-file search path is augmented with the -{\tt contrib/wxstring} directory, so that the correct headerfiles -can be included. - -So you have only to specify - -\begin{verbatim} -#define USE_GNU_WXSTRING 1 -\end{verbatim} - -in {\tt include/base/wx\_setup.h} to use the wxString class. - -\subsection{Test program} - -Stefan Hammes has included a test program {\tt test.cc} in the contrib/wxstring directory for many features -of wxString and wxRegex. It also tests Stefan's extensions. -When running the compiled program, there should -be NO assert-errors if everything is OK. When compiling the test -program, you can ignore warnings about unused variables. They -occur because Stefan has used a special method of initializing all -variables to the same start values before each test. - -\subsection{Compilers} - -wxString and wxRegex have been compiled successfully with the following -compilers (it should work on nearly every C++ compiler): - -\begin{itemize}\itemsep=0pt -\item PC MS-Visual C++ 1.0, 1.5 -\item UNIX gcc v2.6.3 -\item UNIX Sun SunPro compiler under Solaris 2.x -\item VMS DEC C++ compiler (on VAX and AXP) -\end{itemize} - -Warnings about type conversion or assignments can be ignored. - -\subsection{GNU Documentation} - -Below is the original GNU wxString and wxRegex -documentation. It describes most functions of the classes. -The function names have been capitalized to be consistent with -the wxWindows naming scheme. The examples are integrated into the test program. - -Copyright (C) 1988, 1991, 1992 Free Software Foundation, Inc. - -Permission is granted to make and distribute verbatim copies of this -manual provided the copyright notice and this permission notice are -preserved on all copies. - -Permission is granted to copy and distribute modified versions of -this manual under the conditions for verbatim copying, provided also -that the section entitled "GNU Library General Public License" is -included exactly as in the original, and provided that the entire -resulting derived work is distributed under the terms of a permission -notice identical to this one. - -Permission is granted to copy and distribute translations of this -manual into another language, under the above conditions for modified -versions, except that the section entitled "GNU Library General Public -License" and this permission notice may be included in translations -approved by the Free Software Foundation instead of in the original -English. - -\subsubsection{The wxString class} - -The `wxString' class is designed to extend GNU C++ to support string -processing capabilities similar to those in languages like Awk. The -class provides facilities that ought to be convenient and efficient -enough to be useful replacements for `char*' based processing via the C -string library (i.e., `strcpy, strcmp,' etc.) in many applications. -Many details about wxString representations are described in the -Representation section. - -A separate `wxSubString' class supports substring extraction and -modification operations. This is implemented in a way that user -programs never directly construct or represent substrings, which are -only used indirectly via wxString operations. - -Another separate class, `wxRegex' is also used indirectly via wxString -operations in support of regular expression searching, matching, and the -like. The wxRegex class is based entirely on the GNU Emacs regex -functions. See \helpref{Regular Expressions}{regularexpressions} -for a full explanation of regular expression syntax. (For -implementation details, see the internal documentation in files -{\tt wxregex.h} and {\tt wxregex.cc}). - -\subsubsection{Constructor examples} - -Strings are initialized and assigned as in the following examples: - -{\tt wxString x;} -Set x to the nil string. This is different from the original GNU code -which sets a strings also to nil when it is assign 0 or "". - -{\tt wxString x = "Hello"; wxString y("Hello");} -Set x and y to a copy of the string "Hello". - -{\tt wxString x = 'A'; wxString y('A');} -Set x and y to the string value "A". - -{\tt wxString u = x; wxString v(x);} -Set u and v to the same string as wxString x - -{\tt wxString u = x.At(1,4); wxString v(x.At(1,4));} -Set u and v to the length 4 substring of x starting at position 1 -(counting indexes from 0). - -{\tt wxString x("abc", 2);} -Sets x to "ab", i.e., the first 2 characters of "abc". - -There are no directly accessible forms for declaring wxSubString -variables. - -The declaration \verb$wxRegex r("[a-zA-Z_][a-zA-Z0-9_]*");$ creates -compiled regular expression suitable for use in wxString operations -described below. (In this case, one that matches any C++ identifier). -The first argument may also be a wxString. Be careful in distinguishing -the role of backslashes in quoted GNU C++ `char*' constants versus those -in Regexes. For example, a wxRegex that matches either one or more tabs -or all strings beginning with "ba" and ending with any number of -occurrences of "na" could be declared as - -\begin{verbatim} - wxRegex r = "\\(\t+\\)\\|\\(ba\\(na\\)*\\)" -\end{verbatim} - -Note that only one backslash is needed -to signify the tab, but two are needed for the parenthesization and -virgule, since the GNU C++ lexical analyzer decodes and strips -backslashes before they are seen by wxRegex. - -There are three additional optional arguments to the wxRegex -constructor that are less commonly useful: - -{\tt fast (default 0)} -`fast' may be set to true (1) if the wxRegex should be -"fast-compiled". This causes an additional compilation step that -is generally worthwhile if the wxRegex will be used many times. - -{\tt bufsize (default max(40, length of the string))} -This is an estimate of the size of the internal compiled -expression. Set it to a larger value if you know that the -expression will require a lot of space. If you do not know, do not -worry: realloc is used if necessary. - -{\tt transtable (default none == 0)} -The address of a byte translation table (a char[256]) that -translates each character before matching. - -As a convenience, several Regexes are predefined and usable in any -program. Here are their declarations from {\tt wxString.h}. -\begin{verbatim} - extern wxRegex RXwhite; // = "[ \n\t]+" - extern wxRegex RXint; // = "-?[0-9]+" - extern wxRegex RXdouble; // = "-?\\(\\([0-9]+\\.[0-9]*\\)\\| - // \\([0-9]+\\)\\| - // \\(\\.[0-9]+\\)\\) - // \\([eE][---+]?[0-9]+\\)?" - extern wxRegex RXalpha; // = "[A-Za-z]+" - extern wxRegex RXlowercase; // = "[a-z]+" - extern wxRegex RXuppercase; // = "[A-Z]+" - extern wxRegex RXalphanum; // = "[0-9A-Za-z]+" - extern wxRegex RXidentifier; // = "[A-Za-z_][A-Za-z0-9_]*" -\end{verbatim} - -\subsubsection{Examples} - -Most {\tt wxString} class capabilities are best shown via example. The -examples below use the following declarations. - -\begin{verbatim} - wxString x = "Hello"; - wxString y = "world"; - wxString n = "123"; - wxString z; - char *s = ","; - wxString lft, mid, rgt; - wxRegex r = "e[a-z]*o"; - wxRegex r2("/[a-z]*/"); - char c; - int i, pos, len; - double f; - wxString words[10]; - words[0] = "a"; - words[1] = "b"; - words[2] = "c"; -\end{verbatim} - -\subsubsection{Comparing, Searching and Matching examples} - -The usual lexicographic relational operators (`==, !=, <, <=, >, >=') -are defined. A functional form `compare(wxString, wxString)' is also -provided, as is `fcompare(wxString, wxString)', which compares Strings -without regard for upper vs. lower case. - -All other matching and searching operations are based on some form -of the (non-public) `match' and `search' functions. `match' and -`search' differ in that `match' attempts to match only at the given -starting position, while `search' starts at the position, and then -proceeds left or right looking for a match. As seen in the following -examples, the second optional `startpos' argument to functions using -`match' and `search' specifies the starting position of the search: If -non-negative, it results in a left-to-right search starting at position -`startpos', and if negative, a right-to-left search starting at -position `x.Length() + startpos'. In all cases, the index returned is -that of the beginning of the match, or -1 if there is no match. - -Three wxString functions serve as front ends to `search' and `match'. -`index' performs a search, returning the index, `matches' performs a -match, returning nonzero (actually, the length of the match) on success, -and `contains' is a boolean function performing either a search or -match, depending on whether an index argument is provided: - -{\tt x.Index("lo")} -Returns the zero-based index of the leftmost occurrence of -substring "lo" (3, in this case). The argument may be a wxString, -wxSubString, char, char*, or wxRegex. - -{\tt x.Index("l", 2)} -Returns the index of the first of the leftmost occurrence of "l" -found starting the search at position x[2], or 2 in this case. - -{\tt x.Index("l", -1)} -Returns the index of the rightmost occurrence of "l", or 3 here. - -{\tt x.Index("l", -3)} -Returns the index of the rightmost occurrence of "l" found by -starting the search at the 3rd to the last position of x, -returning 2 in this case. - -{\tt pos = r.Search("leo", 3, len, 0)} -Returns the index of r in the {\tt char*} string of length 3, starting -at position 0, also placing the length of the match in reference -parameter len. - -{\tt x.Contains("He")} -Returns nonzero if the wxString x contains the substring "He". The -argument may be a wxString, wxSubString, char, char*, or wxRegex. - -{\tt x.Contains("el", 1)} -Returns nonzero if x contains the substring "el" at position 1. -As in this example, the second argument to `contains', if present, -means to match the substring only at that position, and not to -search elsewhere in the string. - -{\tt x.Contains(RXwhite);} -Returns nonzero if x contains any whitespace (space, tab, or -newline). Recall that `RXwhite' is a global whitespace wxRegex. - -{\tt x.Matches("lo", 3)} -Returns nonzero if x starting at position 3 exactly matches "lo", -with no trailing characters (as it does in this example). - -{\tt x.Matches(r)} -Returns nonzero if wxString x as a whole matches wxRegex r. - -{\tt int f = x.Freq("l")} -Returns the number of distinct, nonoverlapping matches to the -argument (2 in this case). - -\subsubsection{Substring extraction examples} - -Substrings may be extracted via the `at', `before', `through', -`from', and `after' functions. These behave as either lvalues or -rvalues. - -{\tt z = x.At(2, 3)} -Sets wxString z to be equal to the length 3 substring of wxString x -starting at zero-based position 2, setting z to "llo" in this -case. A nil wxString is returned if the arguments don't make sense. - -{\tt x.At(2, 2) = "r"} -Sets what was in positions 2 to 3 of x to "r", setting x to "Hero" -in this case. As indicated here, wxSubString assignments may be of -different lengths. - -{\tt x.At("He") = "je";} -x("He") is the substring of x that matches the first occurrence of -it's argument. The substitution sets x to "jello". If "He" did not -occur, the substring would be nil, and the assignment would have -no effect. - -{\tt x.At("l", -1) = "i";} -Replaces the rightmost occurrence of "l" with "i", setting x to -"Helio". - -{\tt z = x.At(r)} -Sets wxString z to the first match in x of wxRegex r, or "ello" in this -case. A nil wxString is returned if there is no match. - -{\tt z = x.Before("o")} -Sets z to the part of x to the left of the first occurrence of -"o", or "Hell" in this case. The argument may also be a wxString, -wxSubString, or wxRegex. (If there is no match, z is set to "".) - -{\tt x.Before("ll") = "Bri";} -Sets the part of x to the left of "ll" to "Bri", setting x to -"Brillo". - -{\tt z = x.Before(2)} -Sets z to the part of x to the left of x[2], or "He" in this case. - -{\tt z = x.After("Hel")} -Sets z to the part of x to the right of "Hel", or "lo" in this -case. - -{\tt z = x.Through("el")} -Sets z to the part of x up and including "el", or "Hel" in this -case. - -{\tt z = x.From("el")} -Sets z to the part of x from "el" to the end, or "ello" in this -case. - -{\tt x.After("Hel") = "p";} -Sets x to "Help"; - -{\tt z = x.After(3)} -Sets z to the part of x to the right of x[3] or "o" in this case. - -{\tt z = " ab c"; z = z.After(RXwhite)} -Sets z to the part of its old string to the right of the first -group of whitespace, setting z to "ab c"; Use GSub(below) to strip -out multiple occurrences of whitespace or any pattern. - -{\tt x[0] = 'J';} -Sets the first element of x to 'J'. x[i] returns a reference to -the ith element of x, or triggers an error if i is out of range. - -{\tt CommonPrefix(x, "Help")} -Returns the wxString containing the common prefix of the two Strings -or "Hel" in this case. - -{\tt CommonSuffix(x, "to")} -Returns the wxString containing the common suffix of the two Strings -or "o" in this case. - -\subsubsection{Concatenation examples} - -{\tt z = x + s + ' ' + y.At("w") + y.After("w") + ".";} -Sets z to "Hello, world." - -{\tt x += y;} -Sets x to "Helloworld". - -{\tt Cat(x, y, z)} -A faster way to say z = x + y. - -{\tt Cat(z, y, x, x)} -Double concatenation; A faster way to say x = z + y + x. - -{\tt y.Prepend(x);} -A faster way to say y = x + y. - -{\tt z = Replicate(x, 3);} -Sets z to "HelloHelloHello". - -{\tt z = Join(words, 3, "/")} -Sets z to the concatenation of the first 3 Strings in wxString array -words, each separated by "/", setting z to "a/b/c" in this case. -The last argument may be "" or 0, indicating no separation. - -\subsubsection{Other manipulation examples} - -{\tt z = "this string has five words"; i = Split(z, words, 10, RXwhite);} -Sets up to 10 elements of wxString array words to the parts of z -separated by whitespace, and returns the number of parts actually -encountered (5 in this case). Here, words[0] = "this", words[1] = -"string", etc. The last argument may be any of the usual. If -there is no match, all of z ends up in words[0]. The words array -is *not* dynamically created by split. - -{\tt int nmatches x.GSub("l","ll")} -Substitutes all original occurrences of "l" with "ll", setting x -to "Hellllo". The first argument may be any of the usual, -including wxRegex. If the second argument is "" or 0, all -occurrences are deleted. gsub returns the number of matches that -were replaced. - -{\tt z = x + y; z.Del("loworl");} -Deletes the leftmost occurrence of "loworl" in z, setting z to -"Held". - -{\tt z = Reverse(x)} -Sets z to the reverse of x, or "olleH". - -{\tt z = Upcase(x)} -Sets z to x, with all letters set to uppercase, setting z to -"HELLO". - -{\tt z = Downcase(x)} -Sets z to x, with all letters set to lowercase, setting z to -"hello" - -{\tt z = Capitalize(x)} -Sets z to x, with the first letter of each word set to uppercase, -and all others to lowercase, setting z to "Hello" - -{\tt x.Reverse(), x.Upcase(), x.Downcase(), x.Capitalize()} -in-place, self-modifying versions of the above. - -\subsubsection{Reading, Writing and Conversion examples} - -{\tt cout << x} -Writes out x. - -{\tt cout << x.At(2, 3)} -Writes out the substring "llo". - -{\tt cin >> x} -Reads a whitespace-bounded string into x. - -{\tt x.Length()} -Returns the length of wxString x (5, in this case). - -{\tt s = (const char*)x} -Can be used to extract the `char*' char array. This coercion is -useful for sending a wxString as an argument to any function -expecting a `const char*' argument (like `atoi', and -`File::open'). This operator must be used with care, since the -conversion returns a pointer to `wxString' internals without copying -the characters: The resulting `(char*)' is only valid until the -next wxString operation, and you must not modify it. (The -conversion is defined to return a const value so that GNU C++ will -produce warning and/or error messages if changes are attempted.) - -\subsection{Regular Expressions}\label{regularexpressions} - -The following are extracts from GNU documentation. - -\subsubsection{Regular Expression Overview} - -Regular expression matching allows you to test whether a string fits -into a specific syntactic shape. You can also search a string for a -substring that fits a pattern. - -A regular expression describes a set of strings. The simplest case -is one that describes a particular string; for example, the string -`foo' when regarded as a regular expression matches `foo' and nothing -else. Nontrivial regular expressions use certain special constructs -so that they can match more than one string. For example, the -regular expression `foo$\backslash$|bar' matches either the string `foo' or the -string `bar'; the regular expression `c[ad]*r' matches any of the -strings `cr', `car', `cdr', `caar', `cadddar' and all other such -strings with any number of `a''s and `d''s. - -The first step in matching a regular expression is to compile it. -You must supply the pattern string and also a pattern buffer to hold -the compiled result. That result contains the pattern in an internal -format that is easier to use in matching. - -Having compiled a pattern, you can match it against strings. You can -match the compiled pattern any number of times against different -strings. - -\subsubsection{Syntax of Regular Expressions} - -Regular expressions have a syntax in which a few characters are -special constructs and the rest are "ordinary". An ordinary -character is a simple regular expression which matches that character -and nothing else. The special characters are `\verb+\$+', `\verb+^+', `.', `*', -`+', `?', `[', `]' and `$\backslash$'. Any other character appearing in a -regular expression is ordinary, unless a `$\backslash$' precedes it. - -For example, `f' is not a special character, so it is ordinary, and -therefore `f' is a regular expression that matches the string `f' and -no other string. (It does *not* match the string `ff'.) Likewise, -`o' is a regular expression that matches only `o'. - -Any two regular expressions A and B can be concatenated. The result -is a regular expression which matches a string if A matches some -amount of the beginning of that string and B matches the rest of the -string. - -As a simple example, we can concatenate the regular expressions `f' -and `o' to get the regular expression `fo', which matches only the -string `fo'. Still trivial. - -Note: for Unix compatibility, special characters are treated as -ordinary ones if they are in contexts where their special meanings -make no sense. For example, `*foo' treats `*' as ordinary since -there is no preceding expression on which the `*' can act. It is -poor practice to depend on this behavior; better to quote the special -character anyway, regardless of where is appears. - -The following are the characters and character sequences which have -special meaning within regular expressions. Any character not -mentioned here is not special; it stands for exactly itself for the -purposes of searching and matching. - -\begin{itemize} -\itemsep=0pt - -\item \rtfsp -{\tt .} is a special character that matches anything except a newline. -Using concatenation, we can make regular expressions like {\tt a.b} -which matches any three-character string which begins with {\tt a} -and ends with {\tt b}. - -\item \rtfsp -{\tt *} is not a construct by itself; it is a suffix, which means the -preceding regular expression is to be repeated as many times as -possible. In {\tt fo*}, the {\tt *} applies to the {\tt o}, so {\tt fo*} -matches {\tt f} followed by any number of {\tt o}'s. - -The case of zero {\tt o}'s is allowed: {\tt fo*} does match {\tt f}. - -{\tt *} always applies to the *smallest* possible preceding -expression. Thus, {\tt fo*} has a repeating {\tt o}, not a repeating -{\tt fo}. - -The matcher processes a {\tt *} construct by matching, immediately, -as many repetitions as can be found. Then it continues with the -rest of the pattern. If that fails, backtracking occurs, -discarding some of the matches of the {\tt *}'d construct in case -that makes it possible to match the rest of the pattern. For -example, matching {\tt c$[$ad$]$*ar} against the string {\tt caddaar}, the -{\tt $[$ad$]$*} first matches {\tt addaa}, but this does not allow the next -{\tt a} in the pattern to match. So the last of the matches of -{\tt $[$ad$]$} is undone and the following {\tt a} is tried again. Now it -succeeds. - -\item \rtfsp -{\tt +} is like {\tt *} except that at least one match for the preceding -pattern is required for {\tt +}. Thus, {\tt c$[$ad$]$+r} does not match -{\tt cr} but does match anything else that {\tt c$[$ad$]$*r} would match. - -\item \rtfsp -{\tt ?} is like {\tt *} except that it allows either zero or one match -for the preceding pattern. Thus, {\tt c$[$ad$]$?r} matches {\tt cr} or -{\tt car} or {\tt cdr}, and nothing else. - -\item \rtfsp -{\tt $[$} begins a "character set", which is terminated by a {\tt $]$}. In -the simplest case, the characters between the two form the set. -Thus, {\tt $[$ad$]$} matches either {\tt a} or {\tt d}, and {\tt $[$ad$]$*} matches any -string of {\tt a}'s and {\tt d}'s (including the empty string), from -which it follows that {\tt c$[$ad$]$*r} matches {\tt car}, etc. - -Character ranges can also be included in a character set, by -writing two characters with a {\tt -} between them. Thus, {\tt $[$a-z$]$} -matches any lower-case letter. Ranges may be intermixed freely -with individual characters, as in {\tt $[$a-z\$\%.$]$}, which matches any -lower case letter or {\tt \$}, {\tt \%} or period. - -Note that the usual special characters are not special any more -inside a character set. A completely different set of special -characters exists inside character sets: {\tt $]$}, {\tt -} and \verb$^$. - -To include a {\tt $]$} in a character set, you must make it the first -character. For example, {\tt $[$$]$a$]$} matches {\tt $]$} or {\tt a}. To include -a {\tt -}, you must use it in a context where it cannot possibly -indicate a range: that is, as the first character, or -immediately after a range. - -\item \rtfsp -\verb$[^$ begins a "complement character set", which matches any -character except the ones specified. Thus, \verb$[^a-z0-9A-Z]$ -matches all characters {\it except} letters and digits. - -\item \rtfsp -\verb$^$ is not special in a character set unless it is the first -character. The character following the \verb$^$ is treated as if it -were first (it may be a {\tt -} or a {\tt $]$}). - -\verb$^$ is a special character that matches the empty string -- but only -if at the beginning of a line in the text being matched. -Otherwise it fails to match anything. Thus, \verb$^foo$ matches a -{\tt foo} which occurs at the beginning of a line. - -\item \rtfsp -{\tt \$} -is similar to \verb$^$ but matches only at the end of a line. Thus, -{\tt xx*\$} matches a string of one or more {\tt x}'s at the end of a line. - -\item \rtfsp -{\tt $\backslash$} -has two functions: it quotes the above special characters -(including {\tt $\backslash$}), and it introduces additional special constructs. - -Because {\tt $\backslash$} quotes special characters, {\tt $\backslash$\$} is a regular -expression which matches only {\tt \$}, and {\tt $\backslash$$[$} is a regular -expression which matches only {\tt $[$}, and so on. - -For the most part, {\tt $\backslash$} followed by any character matches only -that character. However, there are several exceptions: -characters which, when preceded by {\tt $\backslash$}, are special constructs. -Such characters are always ordinary when encountered on their own. - -No new special characters will ever be defined. All extensions -to the regular expression syntax are made by defining new -two-character constructs that begin with {\tt $\backslash$}. - -\item \rtfsp -{\tt $\backslash$|} -specifies an alternative. Two regular expressions A and B with -{\tt $\backslash$|} in between form an expression that matches anything that -either A or B will match. - -Thus, {\tt foo$\backslash$|bar} matches either {\tt foo} or {\tt bar} but no other -string. - -{\tt $\backslash$|} applies to the largest possible surrounding expressions. -Only a surrounding {\tt $\backslash$( ... $\backslash$)} grouping can limit the grouping -power of {\tt $\backslash$|}. - -Full backtracking capability exists when multiple {\tt $\backslash$|}'s are used. - -\item \rtfsp -{\tt $\backslash$( ... $\backslash$)} -is a grouping construct that serves three purposes: -\begin{enumerate} -\item To enclose a set of {\tt $\backslash$|} alternatives for other operations. -Thus, {\tt $\backslash$(foo$\backslash$|bar$\backslash$)x} matches either {\tt foox} or {\tt barx}. -\item To enclose a complicated expression for the postfix {\tt *} to -operate on. Thus, {\tt ba$\backslash$(na$\backslash$)*} matches {\tt bananana}, etc., -with any (zero or more) number of {\tt na}'s. -\item To mark a matched substring for future reference. -\end{enumerate} - -This last application is not a consequence of the idea of a -parenthetical grouping; it is a separate feature which happens -to be assigned as a second meaning to the same {\tt $\backslash$( ... $\backslash$)} -construct because there is no conflict in practice between the -two meanings. Here is an explanation of this feature: - -\item \rtfsp -{\tt $\backslash$DIGIT} -After the end of a {\tt $\backslash$( ... $\backslash$)} construct, the matcher remembers -the beginning and end of the text matched by that construct. -Then, later on in the regular expression, you can use {\tt $\backslash$} -followed by DIGIT to mean "match the same text matched the -DIGIT'th time by the {\tt $\backslash$( ... $\backslash$)} construct." The {\tt $\backslash$( ... $\backslash$)} -constructs are numbered in order of commencement in the regexp. - -The strings matching the first nine {\tt $\backslash$( ... $\backslash$)} constructs -appearing in a regular expression are assigned numbers 1 through -9 in order of their beginnings. {\tt $\backslash$1} through {\tt $\backslash$9} may be used -to refer to the text matched by the corresponding {\tt $\backslash$( ... $\backslash$)} -construct. - -For example, {\tt $\backslash$(.*$\backslash$)$\backslash$1} matches any string that is composed of -two identical halves. The {\tt $\backslash$(.*$\backslash$)} matches the first half, -which may be anything, but the {\tt $\backslash$1} that follows must match the -same exact text. - -\item \rtfsp -{\tt $\backslash$b} -matches the empty string, but only if it is at the beginning or -end of a word. Thus, {\tt $\backslash$bfoo$\backslash$b} matches any occurrence of {\tt foo} -as a separate word. {\tt $\backslash$bball$\backslash$(s$\backslash$|$\backslash$)$\backslash$b} matches {\tt ball} or {\tt balls} -as a separate word. - -\item \rtfsp -{\tt $\backslash$B} -matches the empty string, provided it is *not* at the beginning -or end of a word. - -\item \rtfsp -{\tt $\backslash$<} -matches the empty string, but only if it is at the beginning of -a word. - -\item \rtfsp -{\tt $\backslash$>} -matches the empty string, but only if it is at the end of a word. - -\item \rtfsp -{\tt $\backslash$w} -matches any word-constituent character. - -\item \rtfsp -{\tt $\backslash$W} -matches any character that is not a word-constituent. - -\end{itemize} - - - - - -\section{wxString member functions}\label{wxstringcategories} - -\overview{Overview}{wxstringoverview} - -This section describes categories of \helpref{wxString}{wxstring} class -member functions. - -TODO: describe each one briefly here. - -{\large {\bf Assigment}} - -\begin{itemize}\itemsep=0pt -\item \helpref{wxString::operator $=$}{wxstringoperatorassign}\\ -\end{itemize} - -{\large {\bf Classification}} - -\begin{itemize}\itemsep=0pt -\item \helpref{wxString::IsAscii}{wxstringIsAscii} -\item \helpref{wxString::IsWord}{wxstringIsWord} -\item \helpref{wxString::IsNumber}{wxstringIsNumber} -\item \helpref{wxString::IsNull}{wxstringIsNull} -\item \helpref{wxString::IsDefined}{wxstringIsDefined} -\end{itemize} - -{\large {\bf Comparisons (case sensitive and insensitive)}} - -\begin{itemize}\itemsep=0pt -\item \helpref{wxString::CompareTo}{wxstringCompareTo} -\item \helpref{Compare}{wxstringCompare} -\item \helpref{FCompare}{wxstringFCompare} -\item \helpref{Comparisons}{wxstringComparison} -\end{itemize} - -{\large {\bf Composition and Concatenation}} - -\begin{itemize}\itemsep=0pt -\item \helpref{wxString::operator $+=$}{wxstringPlusEqual} -\item \helpref{wxString::Append}{wxstringAppend} -\item \helpref{wxString::Prepend}{wxstringPrepend} -\item \helpref{wxString::Cat}{wxstringCat} -\item \helpref{operator $+$}{wxstringoperatorplus} -\end{itemize} - -{\large {\bf Constructors/Destructors}} - -\begin{itemize}\itemsep=0pt -\item \helpref{wxString::wxString}{wxstringconstruct} -\item \helpref{wxString::~wxString}{wxstringdestruct} -\end{itemize} - -{\large {\bf Conversions}} - -\begin{itemize} -\item \helpref{wxString::operator const char *}{wxstringoperatorconstcharpt} -\item \helpref{wxString::Chars}{wxstringChars} -\item \helpref{wxString::GetData}{wxstringGetData} -\end{itemize} - -{\large {\bf Deletion/Insertion}} - -\begin{itemize}\itemsep=0pt -\item \helpref{wxString::Del}{wxstringDel} -\item \helpref{wxString::Remove}{wxstringRemove} -\item \helpref{wxString::Insert}{wxstringInsert} -\item \helpref{Split}{wxstringSplit} -\item \helpref{Join}{wxstringJoin} -\end{itemize} - -{\large {\bf Duplication}} - -\begin{itemize}\itemsep=0pt -\item \helpref{wxString::Copy}{wxstringCopy} -\item \helpref{wxString::Replicate}{wxstringReplicate} -\end{itemize} - -{\large {\bf Element access}} - -\begin{itemize}\itemsep=0pt -\item \helpref{wxString::operator[]}{wxstringoperatorbracket} -\item \helpref{wxString::operator()}{wxstringoperatorparenth} -\item \helpref{wxString::Elem}{wxstringElem} -\item \helpref{wxString::Firstchar}{wxstringFirstchar} -\item \helpref{wxString::Lastchar}{wxstringLastchar} -\end{itemize} - -{\large {\bf Extraction of Substrings}} - -\begin{itemize}\itemsep=0pt -\item \helpref{wxString::At}{wxstringAt} -\item \helpref{wxString::Before}{wxstringBefore} -\item \helpref{wxString::Through}{wxstringThrough} -\item \helpref{wxString::From}{wxstringFrom} -\item \helpref{wxString::After}{wxstringAfter} -\item \helpref{wxString::SubString}{wxstringSubString} -\end{itemize} - -{\large {\bf Input/Output}} - -\begin{itemize}\itemsep=0pt -\item \helpref{wxString::sprintf}{wxstringsprintf} -\item \helpref{wxString::operator \cinsert}{wxstringoperatorout} -\item \helpref{wxString::operator \cextract}{wxstringoperatorin} -\item \helpref{wxString::Readline}{wxstringReadline} -\end{itemize} - -{\large {\bf Searching/Matching}} - -\begin{itemize}\itemsep=0pt -\item \helpref{wxString::Index}{wxstringIndex} -\item \helpref{wxString::Contains}{wxstringContains} -\item \helpref{wxString::Matches}{wxstringMatches} -\item \helpref{wxString::Freq}{wxstringFreq} -\item \helpref{wxString::First}{wxstringFirst} -\item \helpref{wxString::Last}{wxstringLast} -\end{itemize} - -{\large {\bf Substitution}} - -\begin{itemize}\itemsep=0pt -\item \helpref{wxString::GSub}{wxstringGSub} -\item \helpref{wxString::Replace}{wxstringReplace} -\end{itemize} - -{\large {\bf Status}} - -\begin{itemize}\itemsep=0pt -\item \helpref{wxString::Length}{wxstringLength} -\item \helpref{wxString::Empty}{wxstringEmpty} -\item \helpref{wxString::Allocation}{wxstringAllocation} -\item \helpref{wxString::IsNull}{wxstringIsNull} -\end{itemize} - -{\large {\bf Transformations}} - -\begin{itemize}\itemsep=0pt -\item \helpref{wxString::Reverse}{wxstringReverse} -\item \helpref{wxString::Upcase}{wxstringUpcase} -\item \helpref{wxString::UpperCase}{wxstringUpperCase} -\item \helpref{wxString::DownCase}{wxstringDownCase} -\item \helpref{wxString::LowerCase}{wxstringLowerCase} -\item \helpref{wxString::Capitalize}{wxstringCapitalize} -\end{itemize} - -{\large {\bf Utilities}} - -\begin{itemize}\itemsep=0pt -\item \helpref{wxString::Strip}{wxstringStrip} -\item \helpref{wxString::Error}{wxstringError} -\item \helpref{wxString::OK}{wxstringOK} -\item \helpref{wxString::Alloc}{wxstringAlloc} -\item \helpref{wxCHARARG}{wxstringwxCHARARG} -\item \helpref{CommonPrefix}{wxstringCommonPrefix} -\item \helpref{CommonSuffix}{wxstringCommonSuffix} -\end{itemize} +TODO.