]> git.saurik.com Git - wxWidgets.git/blame - docs/latex/wx/tstring.tex
GTK
[wxWidgets.git] / docs / latex / wx / tstring.tex
CommitLineData
a660d684
KB
1\section{wxString overview}\label{wxstringoverview}
2
3Class: \helpref{wxString}{wxstring}
4
5Strings are used very frequently in most programs. There is no direct support in
6the C++ language for strings. A string class can be useful in many
7situations: it not only makes the code shorter and easier to read, it also
8provides more security, because we don't have to deal with pointer acrobatics.
9
10wxString is available in two versions: a cut-down wxWindows,
11copyright-free version, and a much more powerful GNU-derived version. The default is the
12GNU-derived, fully-featured version, ported and revised by Stefan Hammes.
13
14For backward compatibility most of the member functions of the original
15wxWindows wxString class have been included, except some `dangerous'
16functions.
17
18wxString can be compiled under MSW, UNIX and VMS (see below). The
19function names have been capitalized to be consistent with the wxWindows
20naming scheme.
21
22The reasons for not using the GNU string class directly are:
23
24\begin{itemize}\itemsep=0pt
25\item It is not available on all systems (generally speaking, it is available only on some
26UNIX systems).
27\item We can make changes and extensions to the string class as needed and are not
28forced to use `only' the functionality of the GNU string class.
29\end{itemize}
30
31The GNU code comes with certain copyright restrictions. If you can't
32live with these, you will need to use the cut-down wxString class
33instead, by editing wx\_setup.h and appropriate wxWindows makefiles.
34
35\subsection{Copyright of the original GNU code portion}
36
37Copyright (C) 1988, 1991, 1992 Free Software Foundation, Inc.
38written by Doug Lea (dl@rocky.oswego.edu)
39
40This file is part of the GNU C++ Library. This library is free
41software; you can redistribute it and/or modify it under the terms of
42the GNU Library General Public License as published by the Free
43Software Foundation; either version 2 of the License, or (at your
44option) any later version. This library is distributed in the hope
45that it will be useful, but WITHOUT ANY WARRANTY; without even the
46implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
47PURPOSE. See the GNU Library General Public License for more details.
48You should have received a copy of the GNU Library General Public
49License along with this library; if not, write to the Free Software
50Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
51
52\subsection{Features/Additions/Modifications}
53
54The wxString class offers many string handling functions and a support for
55regular expressions. This gives powerful, easy-to-use pattern-matching functionality.
56See below for a discussion of the GNU features of wxString. See also
57the header file `wxstrgnu.h' which shows all member functions.
58
59As stated above, there are extensions to the wxString class.
60This includes the including of the `old' wxString class member functions.
61Below is a list of the additional member functions:
62
63\begin{itemize}\itemsep=0pt
64\item Access to the internal representation. Should be used with care:
65\begin{verbatim}
66 char* GetData() const;
67\end{verbatim}
68\item To make a copy of 'this' (only for compatibility):
69\begin{verbatim}
70 wxString Copy() const;
71\end{verbatim}
72\item For case sensitive and case insensitive comparisons:
73\begin{verbatim}
74 enum caseCompare {exact, ignoreCase};
75 int CompareTo(const char* cs, caseCompare cmp = exact) const;
76 int CompareTo(const wxString& st, caseCompare cmp = exact) const;
77\end{verbatim}
78
79\item For case sensitive and case insensitive containment check:
80\begin{verbatim}
81 Bool Contains(const char* pat, caseCompare cmp = exact) const;
82 Bool Contains(const wxString& pat, caseCompare cmp = exact) const;
83\end{verbatim}
84
85\item For case sensitive and case insensitive index calculation:
86\begin{verbatim}
87 int Index(const char* pat, int i=0, caseCompare cmp = exact) const;
88 int Index(const wxString& s, int i=0, caseCompare cmp = exact) const;
89\end{verbatim}
90
91\item For element access in addition to the [] operator:
92\begin{verbatim}
93 char& operator()(int); // Indexing with bounds checking
94\end{verbatim}
95
96\item To put something in front of a string:
97\begin{verbatim}
98 wxString& Prepend(const char*); // Prepend a character string
99 wxString& Prepend(const wxString& s);
100 wxString& Prepend(char c, int rep=1); // Prepend c rep times
101\end{verbatim}
102
103\item For concatenation:
104\begin{verbatim}
105 wxString& Append(const char* cs);
106 wxString& Append(const wxString& s);
107 wxString& Append(char c, int rep=1); // Append c rep times
108\end{verbatim}
109
110\item To get the first and last occurrence of a char or string:
111\begin{verbatim}
112 int First(char c) const;
113 int First(const char* cs) const;
114 int First(const wxString& cs) const;
115 int Last(char c) const;
116 int Last(const char* cs) const;
117 int Last(const wxString& cs) const;
118\end{verbatim}
119
120\item To insert something into a string
121\begin{verbatim}
122 wxString& Insert(int pos, const char*);
123 wxString& Insert(int pos, const wxString&);
124\end{verbatim}
125
126\item To remove data (in addition to the 'Del' functions):
127\begin{verbatim}
128 wxString& Remove(int pos); // Remove pos to end of string
129 wxString& Remove(int pos, int n); // Remove n chars starting at pos
130 wxString& RemoveLast(void); // It removes the last char of a string
131\end{verbatim}
132
133\item To replace data:
134\begin{verbatim}
135 wxString& Replace(int pos, int n, const char*);
136 wxString& Replace(int pos, int n, const wxString&);
137\end{verbatim}
138
139\item Alternative names for compatibility:
140\begin{verbatim}
141 void LowerCase(); // Change self to lower-case
142 void UpperCase(); // Change self to upper-case
143\end{verbatim}
144
145\item Edward Zimmermann's additions:
146\begin{verbatim}
147 wxString SubString(int from, int to);
148\end{verbatim}
149
150\item Formatted assignment:
151\begin{verbatim}
152 void sprintf(const char *fmt, ...);
153\end{verbatim}
154
155We do not use the 'sprintf' constructor of the old wxString class anymore,
156because with that constructor, every initialisation with a string would
157go through sprintf and this is not desirable, because sprintf interprets
158some characters. With the above function we can write:
159
160\begin{verbatim}
161 wxString msg; msg.sprintf("Processing item %d\n",count);
162\end{verbatim}
163
164\item Strip chars at the front and/or end.
165This can be useful for trimming strings:
166\begin{verbatim}
167 enum StripType {leading = 0x1, trailing = 0x2, both = 0x3};
168 wxSubString Strip(StripType s=trailing, char c=' ');
169\end{verbatim}
170
171\item Line input:
172Besides the stream I/O functions this function can be used for non-standard
173formatted I/O with arbitrary line terminators.
174\begin{verbatim}
175 friend int Readline(FILE *f, wxString& x,
176 char terminator = '\\n',
177 int discard_terminator = 1);
178\end{verbatim}
179
180\item The GNU wxString class lacks some classification functions:
181\begin{verbatim}
182 int IsAscii() const;
183 int IsWord() const;
184 int IsNumber() const;
185 int IsNull() const;
186 int IsDefined() const;
187\end{verbatim}
188
189\item The meaning of nil has been changed. A wxString x is only nil, if it
190has been declared `wxString x'. In all other cases it is NOT nil. This
191seems to me more logical than to let a `wxString x=""' be nil as it
192was in the original GNU code.
193
194\item {\bf IMPORTANT:}
195the following is a very, very, very ugly macro, but it makes things more
196transparent in cases, where a library function requires a
197(char*) argument. This is especially the case in wxWindows,
198where most char-arguments are (char*) and not (const char*).
199this macro should only be used in such cases and NOT to
200modify the internal data. The standard type conversion function
201of wxString returns a '(const char*)'.
202The conventional way would be 'function((char*)string.Chars())'.
203With the macro this can be achieved by 'function(wxCHARARG(string))'.
204Whis makes it clearer that the usage should be confined
205to arguments. See below for examples.
206
207\begin{verbatim}
208#define wxCHARARG(s) ((char*)(s).Chars())
209\end{verbatim}
210
211\end{itemize}
212
213\subsection{Function calls}
214
215When using wxString objects as parameters to other functions you should
216note the following:
217
218\begin{verbatim}
219void f1(const char *s){}
220void f2(char *s){}
221
222main(){
223 wxString aString;
224 f1(aString); // ok
225 f2(aString); // error
226 f2(wxCHARARG(aString)); // ok
227 printf("%s",aString); // NO compilation error, but a runtime error.
228 printf("%s",aString.Chars()); // ok
229 printf("%s",wxCHARARG(aString)); // ok
230}
231\end{verbatim}
232
233\subsection{Header files}
234
235For DOS and UNIX we use a stub-headerfile {\tt include/base/wxstring.h}\rtfsp
236which includes the two headerfiles in the {\tt contrib/wxstring} directory,
237namely {\tt contrib/wxstring/wxstrgnu.h} and {\tt contrib/wxstring/wxregex.h}.
238If there is a headerfile {\tt contrib/wxstring/wxstring.h}, please
239delete it. It will cause problems in the VMS compilation.
240
241For VMS we have to do an addition due to the not very intelligent inclusion mechanism
242of the VMS C++ compiler:
243In the VMS-Makefile, the include-file search path is augmented with the
244{\tt contrib/wxstring} directory, so that the correct headerfiles
245can be included.
246
247So you have only to specify
248
249\begin{verbatim}
250#define USE_GNU_WXSTRING 1
251\end{verbatim}
252
253in {\tt include/base/wx\_setup.h} to use the wxString class.
254
255\subsection{Test program}
256
257Stefan Hammes has included a test program {\tt test.cc} in the contrib/wxstring directory for many features
258of wxString and wxRegex. It also tests Stefan's extensions.
259When running the compiled program, there should
260be NO assert-errors if everything is OK. When compiling the test
261program, you can ignore warnings about unused variables. They
262occur because Stefan has used a special method of initializing all
263variables to the same start values before each test.
264
265\subsection{Compilers}
266
267wxString and wxRegex have been compiled successfully with the following
268compilers (it should work on nearly every C++ compiler):
269
270\begin{itemize}\itemsep=0pt
271\item PC MS-Visual C++ 1.0, 1.5
272\item UNIX gcc v2.6.3
273\item UNIX Sun SunPro compiler under Solaris 2.x
274\item VMS DEC C++ compiler (on VAX and AXP)
275\end{itemize}
276
277Warnings about type conversion or assignments can be ignored.
278
279\subsection{GNU Documentation}
280
281Below is the original GNU wxString and wxRegex
282documentation. It describes most functions of the classes.
283The function names have been capitalized to be consistent with
284the wxWindows naming scheme. The examples are integrated into the test program.
285
286Copyright (C) 1988, 1991, 1992 Free Software Foundation, Inc.
287
288Permission is granted to make and distribute verbatim copies of this
289manual provided the copyright notice and this permission notice are
290preserved on all copies.
291
292Permission is granted to copy and distribute modified versions of
293this manual under the conditions for verbatim copying, provided also
294that the section entitled "GNU Library General Public License" is
295included exactly as in the original, and provided that the entire
296resulting derived work is distributed under the terms of a permission
297notice identical to this one.
298
299Permission is granted to copy and distribute translations of this
300manual into another language, under the above conditions for modified
301versions, except that the section entitled "GNU Library General Public
302License" and this permission notice may be included in translations
303approved by the Free Software Foundation instead of in the original
304English.
305
306\subsubsection{The wxString class}
307
308The `wxString' class is designed to extend GNU C++ to support string
309processing capabilities similar to those in languages like Awk. The
310class provides facilities that ought to be convenient and efficient
311enough to be useful replacements for `char*' based processing via the C
312string library (i.e., `strcpy, strcmp,' etc.) in many applications.
313Many details about wxString representations are described in the
314Representation section.
315
316A separate `wxSubString' class supports substring extraction and
317modification operations. This is implemented in a way that user
318programs never directly construct or represent substrings, which are
319only used indirectly via wxString operations.
320
321Another separate class, `wxRegex' is also used indirectly via wxString
322operations in support of regular expression searching, matching, and the
323like. The wxRegex class is based entirely on the GNU Emacs regex
324functions. See \helpref{Regular Expressions}{regularexpressions}
325for a full explanation of regular expression syntax. (For
326implementation details, see the internal documentation in files
327{\tt wxregex.h} and {\tt wxregex.cc}).
328
329\subsubsection{Constructor examples}
330
331Strings are initialized and assigned as in the following examples:
332
333{\tt wxString x;}
334Set x to the nil string. This is different from the original GNU code
335which sets a strings also to nil when it is assign 0 or "".
336
337{\tt wxString x = "Hello"; wxString y("Hello");}
338Set x and y to a copy of the string "Hello".
339
340{\tt wxString x = 'A'; wxString y('A');}
341Set x and y to the string value "A".
342
343{\tt wxString u = x; wxString v(x);}
344Set u and v to the same string as wxString x
345
346{\tt wxString u = x.At(1,4); wxString v(x.At(1,4));}
347Set u and v to the length 4 substring of x starting at position 1
348(counting indexes from 0).
349
350{\tt wxString x("abc", 2);}
351Sets x to "ab", i.e., the first 2 characters of "abc".
352
353There are no directly accessible forms for declaring wxSubString
354variables.
355
356The declaration \verb$wxRegex r("[a-zA-Z_][a-zA-Z0-9_]*");$ creates
357compiled regular expression suitable for use in wxString operations
358described below. (In this case, one that matches any C++ identifier).
359The first argument may also be a wxString. Be careful in distinguishing
360the role of backslashes in quoted GNU C++ `char*' constants versus those
361in Regexes. For example, a wxRegex that matches either one or more tabs
362or all strings beginning with "ba" and ending with any number of
363occurrences of "na" could be declared as
364
365\begin{verbatim}
366 wxRegex r = "\\(\t+\\)\\|\\(ba\\(na\\)*\\)"
367\end{verbatim}
368
369Note that only one backslash is needed
370to signify the tab, but two are needed for the parenthesization and
371virgule, since the GNU C++ lexical analyzer decodes and strips
372backslashes before they are seen by wxRegex.
373
374There are three additional optional arguments to the wxRegex
375constructor that are less commonly useful:
376
377{\tt fast (default 0)}
378`fast' may be set to true (1) if the wxRegex should be
379"fast-compiled". This causes an additional compilation step that
380is generally worthwhile if the wxRegex will be used many times.
381
382{\tt bufsize (default max(40, length of the string))}
383This is an estimate of the size of the internal compiled
384expression. Set it to a larger value if you know that the
385expression will require a lot of space. If you do not know, do not
386worry: realloc is used if necessary.
387
388{\tt transtable (default none == 0)}
389The address of a byte translation table (a char[256]) that
390translates each character before matching.
391
392As a convenience, several Regexes are predefined and usable in any
393program. Here are their declarations from {\tt wxString.h}.
394\begin{verbatim}
395 extern wxRegex RXwhite; // = "[ \n\t]+"
396 extern wxRegex RXint; // = "-?[0-9]+"
397 extern wxRegex RXdouble; // = "-?\\(\\([0-9]+\\.[0-9]*\\)\\|
398 // \\([0-9]+\\)\\|
399 // \\(\\.[0-9]+\\)\\)
400 // \\([eE][---+]?[0-9]+\\)?"
401 extern wxRegex RXalpha; // = "[A-Za-z]+"
402 extern wxRegex RXlowercase; // = "[a-z]+"
403 extern wxRegex RXuppercase; // = "[A-Z]+"
404 extern wxRegex RXalphanum; // = "[0-9A-Za-z]+"
405 extern wxRegex RXidentifier; // = "[A-Za-z_][A-Za-z0-9_]*"
406\end{verbatim}
407
408\subsubsection{Examples}
409
410Most {\tt wxString} class capabilities are best shown via example. The
411examples below use the following declarations.
412
413\begin{verbatim}
414 wxString x = "Hello";
415 wxString y = "world";
416 wxString n = "123";
417 wxString z;
418 char *s = ",";
419 wxString lft, mid, rgt;
420 wxRegex r = "e[a-z]*o";
421 wxRegex r2("/[a-z]*/");
422 char c;
423 int i, pos, len;
424 double f;
425 wxString words[10];
426 words[0] = "a";
427 words[1] = "b";
428 words[2] = "c";
429\end{verbatim}
430
431\subsubsection{Comparing, Searching and Matching examples}
432
433The usual lexicographic relational operators (`==, !=, <, <=, >, >=')
434are defined. A functional form `compare(wxString, wxString)' is also
435provided, as is `fcompare(wxString, wxString)', which compares Strings
436without regard for upper vs. lower case.
437
438All other matching and searching operations are based on some form
439of the (non-public) `match' and `search' functions. `match' and
440`search' differ in that `match' attempts to match only at the given
441starting position, while `search' starts at the position, and then
442proceeds left or right looking for a match. As seen in the following
443examples, the second optional `startpos' argument to functions using
444`match' and `search' specifies the starting position of the search: If
445non-negative, it results in a left-to-right search starting at position
446`startpos', and if negative, a right-to-left search starting at
447position `x.Length() + startpos'. In all cases, the index returned is
448that of the beginning of the match, or -1 if there is no match.
449
450Three wxString functions serve as front ends to `search' and `match'.
451`index' performs a search, returning the index, `matches' performs a
452match, returning nonzero (actually, the length of the match) on success,
453and `contains' is a boolean function performing either a search or
454match, depending on whether an index argument is provided:
455
456{\tt x.Index("lo")}
457Returns the zero-based index of the leftmost occurrence of
458substring "lo" (3, in this case). The argument may be a wxString,
459wxSubString, char, char*, or wxRegex.
460
461{\tt x.Index("l", 2)}
462Returns the index of the first of the leftmost occurrence of "l"
463found starting the search at position x[2], or 2 in this case.
464
465{\tt x.Index("l", -1)}
466Returns the index of the rightmost occurrence of "l", or 3 here.
467
468{\tt x.Index("l", -3)}
469Returns the index of the rightmost occurrence of "l" found by
470starting the search at the 3rd to the last position of x,
471returning 2 in this case.
472
473{\tt pos = r.Search("leo", 3, len, 0)}
474Returns the index of r in the {\tt char*} string of length 3, starting
475at position 0, also placing the length of the match in reference
476parameter len.
477
478{\tt x.Contains("He")}
479Returns nonzero if the wxString x contains the substring "He". The
480argument may be a wxString, wxSubString, char, char*, or wxRegex.
481
482{\tt x.Contains("el", 1)}
483Returns nonzero if x contains the substring "el" at position 1.
484As in this example, the second argument to `contains', if present,
485means to match the substring only at that position, and not to
486search elsewhere in the string.
487
488{\tt x.Contains(RXwhite);}
489Returns nonzero if x contains any whitespace (space, tab, or
490newline). Recall that `RXwhite' is a global whitespace wxRegex.
491
492{\tt x.Matches("lo", 3)}
493Returns nonzero if x starting at position 3 exactly matches "lo",
494with no trailing characters (as it does in this example).
495
496{\tt x.Matches(r)}
497Returns nonzero if wxString x as a whole matches wxRegex r.
498
499{\tt int f = x.Freq("l")}
500Returns the number of distinct, nonoverlapping matches to the
501argument (2 in this case).
502
503\subsubsection{Substring extraction examples}
504
505Substrings may be extracted via the `at', `before', `through',
506`from', and `after' functions. These behave as either lvalues or
507rvalues.
508
509{\tt z = x.At(2, 3)}
510Sets wxString z to be equal to the length 3 substring of wxString x
511starting at zero-based position 2, setting z to "llo" in this
512case. A nil wxString is returned if the arguments don't make sense.
513
514{\tt x.At(2, 2) = "r"}
515Sets what was in positions 2 to 3 of x to "r", setting x to "Hero"
516in this case. As indicated here, wxSubString assignments may be of
517different lengths.
518
519{\tt x.At("He") = "je";}
520x("He") is the substring of x that matches the first occurrence of
521it's argument. The substitution sets x to "jello". If "He" did not
522occur, the substring would be nil, and the assignment would have
523no effect.
524
525{\tt x.At("l", -1) = "i";}
526Replaces the rightmost occurrence of "l" with "i", setting x to
527"Helio".
528
529{\tt z = x.At(r)}
530Sets wxString z to the first match in x of wxRegex r, or "ello" in this
531case. A nil wxString is returned if there is no match.
532
533{\tt z = x.Before("o")}
534Sets z to the part of x to the left of the first occurrence of
535"o", or "Hell" in this case. The argument may also be a wxString,
536wxSubString, or wxRegex. (If there is no match, z is set to "".)
537
538{\tt x.Before("ll") = "Bri";}
539Sets the part of x to the left of "ll" to "Bri", setting x to
540"Brillo".
541
542{\tt z = x.Before(2)}
543Sets z to the part of x to the left of x[2], or "He" in this case.
544
545{\tt z = x.After("Hel")}
546Sets z to the part of x to the right of "Hel", or "lo" in this
547case.
548
549{\tt z = x.Through("el")}
550Sets z to the part of x up and including "el", or "Hel" in this
551case.
552
553{\tt z = x.From("el")}
554Sets z to the part of x from "el" to the end, or "ello" in this
555case.
556
557{\tt x.After("Hel") = "p";}
558Sets x to "Help";
559
560{\tt z = x.After(3)}
561Sets z to the part of x to the right of x[3] or "o" in this case.
562
563{\tt z = " ab c"; z = z.After(RXwhite)}
564Sets z to the part of its old string to the right of the first
565group of whitespace, setting z to "ab c"; Use GSub(below) to strip
566out multiple occurrences of whitespace or any pattern.
567
568{\tt x[0] = 'J';}
569Sets the first element of x to 'J'. x[i] returns a reference to
570the ith element of x, or triggers an error if i is out of range.
571
572{\tt CommonPrefix(x, "Help")}
573Returns the wxString containing the common prefix of the two Strings
574or "Hel" in this case.
575
576{\tt CommonSuffix(x, "to")}
577Returns the wxString containing the common suffix of the two Strings
578or "o" in this case.
579
580\subsubsection{Concatenation examples}
581
582{\tt z = x + s + ' ' + y.At("w") + y.After("w") + ".";}
583Sets z to "Hello, world."
584
585{\tt x += y;}
586Sets x to "Helloworld".
587
588{\tt Cat(x, y, z)}
589A faster way to say z = x + y.
590
591{\tt Cat(z, y, x, x)}
592Double concatenation; A faster way to say x = z + y + x.
593
594{\tt y.Prepend(x);}
595A faster way to say y = x + y.
596
597{\tt z = Replicate(x, 3);}
598Sets z to "HelloHelloHello".
599
600{\tt z = Join(words, 3, "/")}
601Sets z to the concatenation of the first 3 Strings in wxString array
602words, each separated by "/", setting z to "a/b/c" in this case.
603The last argument may be "" or 0, indicating no separation.
604
605\subsubsection{Other manipulation examples}
606
607{\tt z = "this string has five words"; i = Split(z, words, 10, RXwhite);}
608Sets up to 10 elements of wxString array words to the parts of z
609separated by whitespace, and returns the number of parts actually
610encountered (5 in this case). Here, words[0] = "this", words[1] =
611"string", etc. The last argument may be any of the usual. If
612there is no match, all of z ends up in words[0]. The words array
613is *not* dynamically created by split.
614
615{\tt int nmatches x.GSub("l","ll")}
616Substitutes all original occurrences of "l" with "ll", setting x
617to "Hellllo". The first argument may be any of the usual,
618including wxRegex. If the second argument is "" or 0, all
619occurrences are deleted. gsub returns the number of matches that
620were replaced.
621
622{\tt z = x + y; z.Del("loworl");}
623Deletes the leftmost occurrence of "loworl" in z, setting z to
624"Held".
625
626{\tt z = Reverse(x)}
627Sets z to the reverse of x, or "olleH".
628
629{\tt z = Upcase(x)}
630Sets z to x, with all letters set to uppercase, setting z to
631"HELLO".
632
633{\tt z = Downcase(x)}
634Sets z to x, with all letters set to lowercase, setting z to
635"hello"
636
637{\tt z = Capitalize(x)}
638Sets z to x, with the first letter of each word set to uppercase,
639and all others to lowercase, setting z to "Hello"
640
641{\tt x.Reverse(), x.Upcase(), x.Downcase(), x.Capitalize()}
642in-place, self-modifying versions of the above.
643
644\subsubsection{Reading, Writing and Conversion examples}
645
646{\tt cout << x}
647Writes out x.
648
649{\tt cout << x.At(2, 3)}
650Writes out the substring "llo".
651
652{\tt cin >> x}
653Reads a whitespace-bounded string into x.
654
655{\tt x.Length()}
656Returns the length of wxString x (5, in this case).
657
658{\tt s = (const char*)x}
659Can be used to extract the `char*' char array. This coercion is
660useful for sending a wxString as an argument to any function
661expecting a `const char*' argument (like `atoi', and
662`File::open'). This operator must be used with care, since the
663conversion returns a pointer to `wxString' internals without copying
664the characters: The resulting `(char*)' is only valid until the
665next wxString operation, and you must not modify it. (The
666conversion is defined to return a const value so that GNU C++ will
667produce warning and/or error messages if changes are attempted.)
668
669\subsection{Regular Expressions}\label{regularexpressions}
670
671The following are extracts from GNU documentation.
672
673\subsubsection{Regular Expression Overview}
674
675Regular expression matching allows you to test whether a string fits
676into a specific syntactic shape. You can also search a string for a
677substring that fits a pattern.
678
679A regular expression describes a set of strings. The simplest case
680is one that describes a particular string; for example, the string
681`foo' when regarded as a regular expression matches `foo' and nothing
682else. Nontrivial regular expressions use certain special constructs
683so that they can match more than one string. For example, the
684regular expression `foo$\backslash$|bar' matches either the string `foo' or the
685string `bar'; the regular expression `c[ad]*r' matches any of the
686strings `cr', `car', `cdr', `caar', `cadddar' and all other such
687strings with any number of `a''s and `d''s.
688
689The first step in matching a regular expression is to compile it.
690You must supply the pattern string and also a pattern buffer to hold
691the compiled result. That result contains the pattern in an internal
692format that is easier to use in matching.
693
694Having compiled a pattern, you can match it against strings. You can
695match the compiled pattern any number of times against different
696strings.
697
698\subsubsection{Syntax of Regular Expressions}
699
700Regular expressions have a syntax in which a few characters are
701special constructs and the rest are "ordinary". An ordinary
702character is a simple regular expression which matches that character
703and nothing else. The special characters are `\verb+\$+', `\verb+^+', `.', `*',
704`+', `?', `[', `]' and `$\backslash$'. Any other character appearing in a
705regular expression is ordinary, unless a `$\backslash$' precedes it.
706
707For example, `f' is not a special character, so it is ordinary, and
708therefore `f' is a regular expression that matches the string `f' and
709no other string. (It does *not* match the string `ff'.) Likewise,
710`o' is a regular expression that matches only `o'.
711
712Any two regular expressions A and B can be concatenated. The result
713is a regular expression which matches a string if A matches some
714amount of the beginning of that string and B matches the rest of the
715string.
716
717As a simple example, we can concatenate the regular expressions `f'
718and `o' to get the regular expression `fo', which matches only the
719string `fo'. Still trivial.
720
721Note: for Unix compatibility, special characters are treated as
722ordinary ones if they are in contexts where their special meanings
723make no sense. For example, `*foo' treats `*' as ordinary since
724there is no preceding expression on which the `*' can act. It is
725poor practice to depend on this behavior; better to quote the special
726character anyway, regardless of where is appears.
727
728The following are the characters and character sequences which have
729special meaning within regular expressions. Any character not
730mentioned here is not special; it stands for exactly itself for the
731purposes of searching and matching.
732
733\begin{itemize}
734\itemsep=0pt
735
736\item \rtfsp
737{\tt .} is a special character that matches anything except a newline.
738Using concatenation, we can make regular expressions like {\tt a.b}
739which matches any three-character string which begins with {\tt a}
740and ends with {\tt b}.
741
742\item \rtfsp
743{\tt *} is not a construct by itself; it is a suffix, which means the
744preceding regular expression is to be repeated as many times as
745possible. In {\tt fo*}, the {\tt *} applies to the {\tt o}, so {\tt fo*}
746matches {\tt f} followed by any number of {\tt o}'s.
747
748The case of zero {\tt o}'s is allowed: {\tt fo*} does match {\tt f}.
749
750{\tt *} always applies to the *smallest* possible preceding
751expression. Thus, {\tt fo*} has a repeating {\tt o}, not a repeating
752{\tt fo}.
753
754The matcher processes a {\tt *} construct by matching, immediately,
755as many repetitions as can be found. Then it continues with the
756rest of the pattern. If that fails, backtracking occurs,
757discarding some of the matches of the {\tt *}'d construct in case
758that makes it possible to match the rest of the pattern. For
759example, matching {\tt c$[$ad$]$*ar} against the string {\tt caddaar}, the
760{\tt $[$ad$]$*} first matches {\tt addaa}, but this does not allow the next
761{\tt a} in the pattern to match. So the last of the matches of
762{\tt $[$ad$]$} is undone and the following {\tt a} is tried again. Now it
763succeeds.
764
765\item \rtfsp
766{\tt +} is like {\tt *} except that at least one match for the preceding
767pattern is required for {\tt +}. Thus, {\tt c$[$ad$]$+r} does not match
768{\tt cr} but does match anything else that {\tt c$[$ad$]$*r} would match.
769
770\item \rtfsp
771{\tt ?} is like {\tt *} except that it allows either zero or one match
772for the preceding pattern. Thus, {\tt c$[$ad$]$?r} matches {\tt cr} or
773{\tt car} or {\tt cdr}, and nothing else.
774
775\item \rtfsp
776{\tt $[$} begins a "character set", which is terminated by a {\tt $]$}. In
777the simplest case, the characters between the two form the set.
778Thus, {\tt $[$ad$]$} matches either {\tt a} or {\tt d}, and {\tt $[$ad$]$*} matches any
779string of {\tt a}'s and {\tt d}'s (including the empty string), from
780which it follows that {\tt c$[$ad$]$*r} matches {\tt car}, etc.
781
782Character ranges can also be included in a character set, by
783writing two characters with a {\tt -} between them. Thus, {\tt $[$a-z$]$}
784matches any lower-case letter. Ranges may be intermixed freely
785with individual characters, as in {\tt $[$a-z\$\%.$]$}, which matches any
786lower case letter or {\tt \$}, {\tt \%} or period.
787
788Note that the usual special characters are not special any more
789inside a character set. A completely different set of special
790characters exists inside character sets: {\tt $]$}, {\tt -} and \verb$^$.
791
792To include a {\tt $]$} in a character set, you must make it the first
793character. For example, {\tt $[$$]$a$]$} matches {\tt $]$} or {\tt a}. To include
794a {\tt -}, you must use it in a context where it cannot possibly
795indicate a range: that is, as the first character, or
796immediately after a range.
797
798\item \rtfsp
799\verb$[^$ begins a "complement character set", which matches any
800character except the ones specified. Thus, \verb$[^a-z0-9A-Z]$
801matches all characters {\it except} letters and digits.
802
803\item \rtfsp
804\verb$^$ is not special in a character set unless it is the first
805character. The character following the \verb$^$ is treated as if it
806were first (it may be a {\tt -} or a {\tt $]$}).
807
808\verb$^$ is a special character that matches the empty string -- but only
809if at the beginning of a line in the text being matched.
810Otherwise it fails to match anything. Thus, \verb$^foo$ matches a
811{\tt foo} which occurs at the beginning of a line.
812
813\item \rtfsp
814{\tt \$}
815is similar to \verb$^$ but matches only at the end of a line. Thus,
816{\tt xx*\$} matches a string of one or more {\tt x}'s at the end of a line.
817
818\item \rtfsp
819{\tt $\backslash$}
820has two functions: it quotes the above special characters
821(including {\tt $\backslash$}), and it introduces additional special constructs.
822
823Because {\tt $\backslash$} quotes special characters, {\tt $\backslash$\$} is a regular
824expression which matches only {\tt \$}, and {\tt $\backslash$$[$} is a regular
825expression which matches only {\tt $[$}, and so on.
826
827For the most part, {\tt $\backslash$} followed by any character matches only
828that character. However, there are several exceptions:
829characters which, when preceded by {\tt $\backslash$}, are special constructs.
830Such characters are always ordinary when encountered on their own.
831
832No new special characters will ever be defined. All extensions
833to the regular expression syntax are made by defining new
834two-character constructs that begin with {\tt $\backslash$}.
835
836\item \rtfsp
837{\tt $\backslash$|}
838specifies an alternative. Two regular expressions A and B with
839{\tt $\backslash$|} in between form an expression that matches anything that
840either A or B will match.
841
842Thus, {\tt foo$\backslash$|bar} matches either {\tt foo} or {\tt bar} but no other
843string.
844
845{\tt $\backslash$|} applies to the largest possible surrounding expressions.
846Only a surrounding {\tt $\backslash$( ... $\backslash$)} grouping can limit the grouping
847power of {\tt $\backslash$|}.
848
849Full backtracking capability exists when multiple {\tt $\backslash$|}'s are used.
850
851\item \rtfsp
852{\tt $\backslash$( ... $\backslash$)}
853is a grouping construct that serves three purposes:
854\begin{enumerate}
855\item To enclose a set of {\tt $\backslash$|} alternatives for other operations.
856Thus, {\tt $\backslash$(foo$\backslash$|bar$\backslash$)x} matches either {\tt foox} or {\tt barx}.
857\item To enclose a complicated expression for the postfix {\tt *} to
858operate on. Thus, {\tt ba$\backslash$(na$\backslash$)*} matches {\tt bananana}, etc.,
859with any (zero or more) number of {\tt na}'s.
860\item To mark a matched substring for future reference.
861\end{enumerate}
862
863This last application is not a consequence of the idea of a
864parenthetical grouping; it is a separate feature which happens
865to be assigned as a second meaning to the same {\tt $\backslash$( ... $\backslash$)}
866construct because there is no conflict in practice between the
867two meanings. Here is an explanation of this feature:
868
869\item \rtfsp
870{\tt $\backslash$DIGIT}
871After the end of a {\tt $\backslash$( ... $\backslash$)} construct, the matcher remembers
872the beginning and end of the text matched by that construct.
873Then, later on in the regular expression, you can use {\tt $\backslash$}
874followed by DIGIT to mean "match the same text matched the
875DIGIT'th time by the {\tt $\backslash$( ... $\backslash$)} construct." The {\tt $\backslash$( ... $\backslash$)}
876constructs are numbered in order of commencement in the regexp.
877
878The strings matching the first nine {\tt $\backslash$( ... $\backslash$)} constructs
879appearing in a regular expression are assigned numbers 1 through
8809 in order of their beginnings. {\tt $\backslash$1} through {\tt $\backslash$9} may be used
881to refer to the text matched by the corresponding {\tt $\backslash$( ... $\backslash$)}
882construct.
883
884For example, {\tt $\backslash$(.*$\backslash$)$\backslash$1} matches any string that is composed of
885two identical halves. The {\tt $\backslash$(.*$\backslash$)} matches the first half,
886which may be anything, but the {\tt $\backslash$1} that follows must match the
887same exact text.
888
889\item \rtfsp
890{\tt $\backslash$b}
891matches the empty string, but only if it is at the beginning or
892end of a word. Thus, {\tt $\backslash$bfoo$\backslash$b} matches any occurrence of {\tt foo}
893as a separate word. {\tt $\backslash$bball$\backslash$(s$\backslash$|$\backslash$)$\backslash$b} matches {\tt ball} or {\tt balls}
894as a separate word.
895
896\item \rtfsp
897{\tt $\backslash$B}
898matches the empty string, provided it is *not* at the beginning
899or end of a word.
900
901\item \rtfsp
902{\tt $\backslash$<}
903matches the empty string, but only if it is at the beginning of
904a word.
905
906\item \rtfsp
907{\tt $\backslash$>}
908matches the empty string, but only if it is at the end of a word.
909
910\item \rtfsp
911{\tt $\backslash$w}
912matches any word-constituent character.
913
914\item \rtfsp
915{\tt $\backslash$W}
916matches any character that is not a word-constituent.
917
918\end{itemize}
919
920
921
922
923
924\section{wxString member functions}\label{wxstringcategories}
925
926\overview{Overview}{wxstringoverview}
927
928This section describes categories of \helpref{wxString}{wxstring} class
929member functions.
930
931TODO: describe each one briefly here.
932
933{\large {\bf Assigment}}
934
935\begin{itemize}\itemsep=0pt
936\item \helpref{wxString::operator $=$}{wxstringoperatorassign}\\
937\end{itemize}
938
939{\large {\bf Classification}}
940
941\begin{itemize}\itemsep=0pt
942\item \helpref{wxString::IsAscii}{wxstringIsAscii}
943\item \helpref{wxString::IsWord}{wxstringIsWord}
944\item \helpref{wxString::IsNumber}{wxstringIsNumber}
945\item \helpref{wxString::IsNull}{wxstringIsNull}
946\item \helpref{wxString::IsDefined}{wxstringIsDefined}
947\end{itemize}
948
949{\large {\bf Comparisons (case sensitive and insensitive)}}
950
951\begin{itemize}\itemsep=0pt
952\item \helpref{wxString::CompareTo}{wxstringCompareTo}
953\item \helpref{Compare}{wxstringCompare}
954\item \helpref{FCompare}{wxstringFCompare}
955\item \helpref{Comparisons}{wxstringComparison}
956\end{itemize}
957
958{\large {\bf Composition and Concatenation}}
959
960\begin{itemize}\itemsep=0pt
961\item \helpref{wxString::operator $+=$}{wxstringPlusEqual}
962\item \helpref{wxString::Append}{wxstringAppend}
963\item \helpref{wxString::Prepend}{wxstringPrepend}
964\item \helpref{wxString::Cat}{wxstringCat}
965\item \helpref{operator $+$}{wxstringoperatorplus}
966\end{itemize}
967
968{\large {\bf Constructors/Destructors}}
969
970\begin{itemize}\itemsep=0pt
971\item \helpref{wxString::wxString}{wxstringconstruct}
972\item \helpref{wxString::~wxString}{wxstringdestruct}
973\end{itemize}
974
975{\large {\bf Conversions}}
976
977\begin{itemize}
978\item \helpref{wxString::operator const char *}{wxstringoperatorconstcharpt}
979\item \helpref{wxString::Chars}{wxstringChars}
980\item \helpref{wxString::GetData}{wxstringGetData}
981\end{itemize}
982
983{\large {\bf Deletion/Insertion}}
984
985\begin{itemize}\itemsep=0pt
986\item \helpref{wxString::Del}{wxstringDel}
987\item \helpref{wxString::Remove}{wxstringRemove}
988\item \helpref{wxString::Insert}{wxstringInsert}
989\item \helpref{Split}{wxstringSplit}
990\item \helpref{Join}{wxstringJoin}
991\end{itemize}
992
993{\large {\bf Duplication}}
994
995\begin{itemize}\itemsep=0pt
996\item \helpref{wxString::Copy}{wxstringCopy}
997\item \helpref{wxString::Replicate}{wxstringReplicate}
998\end{itemize}
999
1000{\large {\bf Element access}}
1001
1002\begin{itemize}\itemsep=0pt
1003\item \helpref{wxString::operator[]}{wxstringoperatorbracket}
1004\item \helpref{wxString::operator()}{wxstringoperatorparenth}
1005\item \helpref{wxString::Elem}{wxstringElem}
1006\item \helpref{wxString::Firstchar}{wxstringFirstchar}
1007\item \helpref{wxString::Lastchar}{wxstringLastchar}
1008\end{itemize}
1009
1010{\large {\bf Extraction of Substrings}}
1011
1012\begin{itemize}\itemsep=0pt
1013\item \helpref{wxString::At}{wxstringAt}
1014\item \helpref{wxString::Before}{wxstringBefore}
1015\item \helpref{wxString::Through}{wxstringThrough}
1016\item \helpref{wxString::From}{wxstringFrom}
1017\item \helpref{wxString::After}{wxstringAfter}
1018\item \helpref{wxString::SubString}{wxstringSubString}
1019\end{itemize}
1020
1021{\large {\bf Input/Output}}
1022
1023\begin{itemize}\itemsep=0pt
1024\item \helpref{wxString::sprintf}{wxstringsprintf}
1025\item \helpref{wxString::operator \cinsert}{wxstringoperatorout}
1026\item \helpref{wxString::operator \cextract}{wxstringoperatorin}
1027\item \helpref{wxString::Readline}{wxstringReadline}
1028\end{itemize}
1029
1030{\large {\bf Searching/Matching}}
1031
1032\begin{itemize}\itemsep=0pt
1033\item \helpref{wxString::Index}{wxstringIndex}
1034\item \helpref{wxString::Contains}{wxstringContains}
1035\item \helpref{wxString::Matches}{wxstringMatches}
1036\item \helpref{wxString::Freq}{wxstringFreq}
1037\item \helpref{wxString::First}{wxstringFirst}
1038\item \helpref{wxString::Last}{wxstringLast}
1039\end{itemize}
1040
1041{\large {\bf Substitution}}
1042
1043\begin{itemize}\itemsep=0pt
1044\item \helpref{wxString::GSub}{wxstringGSub}
1045\item \helpref{wxString::Replace}{wxstringReplace}
1046\end{itemize}
1047
1048{\large {\bf Status}}
1049
1050\begin{itemize}\itemsep=0pt
1051\item \helpref{wxString::Length}{wxstringLength}
1052\item \helpref{wxString::Empty}{wxstringEmpty}
1053\item \helpref{wxString::Allocation}{wxstringAllocation}
1054\item \helpref{wxString::IsNull}{wxstringIsNull}
1055\end{itemize}
1056
1057{\large {\bf Transformations}}
1058
1059\begin{itemize}\itemsep=0pt
1060\item \helpref{wxString::Reverse}{wxstringReverse}
1061\item \helpref{wxString::Upcase}{wxstringUpcase}
1062\item \helpref{wxString::UpperCase}{wxstringUpperCase}
1063\item \helpref{wxString::DownCase}{wxstringDownCase}
1064\item \helpref{wxString::LowerCase}{wxstringLowerCase}
1065\item \helpref{wxString::Capitalize}{wxstringCapitalize}
1066\end{itemize}
1067
1068{\large {\bf Utilities}}
1069
1070\begin{itemize}\itemsep=0pt
1071\item \helpref{wxString::Strip}{wxstringStrip}
1072\item \helpref{wxString::Error}{wxstringError}
1073\item \helpref{wxString::OK}{wxstringOK}
1074\item \helpref{wxString::Alloc}{wxstringAlloc}
1075\item \helpref{wxCHARARG}{wxstringwxCHARARG}
1076\item \helpref{CommonPrefix}{wxstringCommonPrefix}
1077\item \helpref{CommonSuffix}{wxstringCommonSuffix}
1078\end{itemize}
1079