]> git.saurik.com Git - wxWidgets.git/blob - docs/latex/wx/tstring.tex
Corrected some problems I introduced, added tabevent.tex.
[wxWidgets.git] / docs / latex / wx / tstring.tex
1 \section{wxString overview}\label{wxstringoverview}
2
3 Class: \helpref{wxString}{wxstring}
4
5 Strings are used very frequently in most programs. There is no direct support in
6 the C++ language for strings. A string class can be useful in many
7 situations: it not only makes the code shorter and easier to read, it also
8 provides more security, because we don't have to deal with pointer acrobatics.
9
10 wxString is available in two versions: a cut-down wxWindows,
11 copyright-free version, and a much more powerful GNU-derived version. The default is the
12 GNU-derived, fully-featured version, ported and revised by Stefan Hammes.
13
14 For backward compatibility most of the member functions of the original
15 wxWindows wxString class have been included, except some `dangerous'
16 functions.
17
18 wxString can be compiled under MSW, UNIX and VMS (see below). The
19 function names have been capitalized to be consistent with the wxWindows
20 naming scheme.
21
22 The reasons for not using the GNU string class directly are:
23
24 \begin{itemize}\itemsep=0pt
25 \item It is not available on all systems (generally speaking, it is available only on some
26 UNIX systems).
27 \item We can make changes and extensions to the string class as needed and are not
28 forced to use `only' the functionality of the GNU string class.
29 \end{itemize}
30
31 The GNU code comes with certain copyright restrictions. If you can't
32 live with these, you will need to use the cut-down wxString class
33 instead, by editing wx\_setup.h and appropriate wxWindows makefiles.
34
35 \subsection{Copyright of the original GNU code portion}
36
37 Copyright (C) 1988, 1991, 1992 Free Software Foundation, Inc.
38 written by Doug Lea (dl@rocky.oswego.edu)
39
40 This file is part of the GNU C++ Library. This library is free
41 software; you can redistribute it and/or modify it under the terms of
42 the GNU Library General Public License as published by the Free
43 Software Foundation; either version 2 of the License, or (at your
44 option) any later version. This library is distributed in the hope
45 that it will be useful, but WITHOUT ANY WARRANTY; without even the
46 implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
47 PURPOSE. See the GNU Library General Public License for more details.
48 You should have received a copy of the GNU Library General Public
49 License along with this library; if not, write to the Free Software
50 Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
51
52 \subsection{Features/Additions/Modifications}
53
54 The wxString class offers many string handling functions and a support for
55 regular expressions. This gives powerful, easy-to-use pattern-matching functionality.
56 See below for a discussion of the GNU features of wxString. See also
57 the header file `wxstrgnu.h' which shows all member functions.
58
59 As stated above, there are extensions to the wxString class.
60 This includes the including of the `old' wxString class member functions.
61 Below is a list of the additional member functions:
62
63 \begin{itemize}\itemsep=0pt
64 \item Access to the internal representation. Should be used with care:
65 \begin{verbatim}
66 char* GetData() const;
67 \end{verbatim}
68 \item To make a copy of 'this' (only for compatibility):
69 \begin{verbatim}
70 wxString Copy() const;
71 \end{verbatim}
72 \item For case sensitive and case insensitive comparisons:
73 \begin{verbatim}
74 enum caseCompare {exact, ignoreCase};
75 int CompareTo(const char* cs, caseCompare cmp = exact) const;
76 int CompareTo(const wxString& st, caseCompare cmp = exact) const;
77 \end{verbatim}
78
79 \item For case sensitive and case insensitive containment check:
80 \begin{verbatim}
81 Bool Contains(const char* pat, caseCompare cmp = exact) const;
82 Bool Contains(const wxString& pat, caseCompare cmp = exact) const;
83 \end{verbatim}
84
85 \item For case sensitive and case insensitive index calculation:
86 \begin{verbatim}
87 int Index(const char* pat, int i=0, caseCompare cmp = exact) const;
88 int Index(const wxString& s, int i=0, caseCompare cmp = exact) const;
89 \end{verbatim}
90
91 \item For element access in addition to the [] operator:
92 \begin{verbatim}
93 char& operator()(int); // Indexing with bounds checking
94 \end{verbatim}
95
96 \item To put something in front of a string:
97 \begin{verbatim}
98 wxString& Prepend(const char*); // Prepend a character string
99 wxString& Prepend(const wxString& s);
100 wxString& Prepend(char c, int rep=1); // Prepend c rep times
101 \end{verbatim}
102
103 \item For concatenation:
104 \begin{verbatim}
105 wxString& Append(const char* cs);
106 wxString& Append(const wxString& s);
107 wxString& Append(char c, int rep=1); // Append c rep times
108 \end{verbatim}
109
110 \item To get the first and last occurrence of a char or string:
111 \begin{verbatim}
112 int First(char c) const;
113 int First(const char* cs) const;
114 int First(const wxString& cs) const;
115 int Last(char c) const;
116 int Last(const char* cs) const;
117 int Last(const wxString& cs) const;
118 \end{verbatim}
119
120 \item To insert something into a string
121 \begin{verbatim}
122 wxString& Insert(int pos, const char*);
123 wxString& Insert(int pos, const wxString&);
124 \end{verbatim}
125
126 \item To remove data (in addition to the 'Del' functions):
127 \begin{verbatim}
128 wxString& Remove(int pos); // Remove pos to end of string
129 wxString& Remove(int pos, int n); // Remove n chars starting at pos
130 wxString& RemoveLast(void); // It removes the last char of a string
131 \end{verbatim}
132
133 \item To replace data:
134 \begin{verbatim}
135 wxString& Replace(int pos, int n, const char*);
136 wxString& Replace(int pos, int n, const wxString&);
137 \end{verbatim}
138
139 \item Alternative names for compatibility:
140 \begin{verbatim}
141 void LowerCase(); // Change self to lower-case
142 void UpperCase(); // Change self to upper-case
143 \end{verbatim}
144
145 \item Edward Zimmermann's additions:
146 \begin{verbatim}
147 wxString SubString(int from, int to);
148 \end{verbatim}
149
150 \item Formatted assignment:
151 \begin{verbatim}
152 void sprintf(const char *fmt, ...);
153 \end{verbatim}
154
155 We do not use the 'sprintf' constructor of the old wxString class anymore,
156 because with that constructor, every initialisation with a string would
157 go through sprintf and this is not desirable, because sprintf interprets
158 some characters. With the above function we can write:
159
160 \begin{verbatim}
161 wxString msg; msg.sprintf("Processing item %d\n",count);
162 \end{verbatim}
163
164 \item Strip chars at the front and/or end.
165 This can be useful for trimming strings:
166 \begin{verbatim}
167 enum StripType {leading = 0x1, trailing = 0x2, both = 0x3};
168 wxSubString Strip(StripType s=trailing, char c=' ');
169 \end{verbatim}
170
171 \item Line input:
172 Besides the stream I/O functions this function can be used for non-standard
173 formatted I/O with arbitrary line terminators.
174 \begin{verbatim}
175 friend int Readline(FILE *f, wxString& x,
176 char terminator = '\\n',
177 int discard_terminator = 1);
178 \end{verbatim}
179
180 \item The GNU wxString class lacks some classification functions:
181 \begin{verbatim}
182 int IsAscii() const;
183 int IsWord() const;
184 int IsNumber() const;
185 int IsNull() const;
186 int IsDefined() const;
187 \end{verbatim}
188
189 \item The meaning of nil has been changed. A wxString x is only nil, if it
190 has been declared `wxString x'. In all other cases it is NOT nil. This
191 seems to me more logical than to let a `wxString x=""' be nil as it
192 was in the original GNU code.
193
194 \item {\bf IMPORTANT:}
195 the following is a very, very, very ugly macro, but it makes things more
196 transparent in cases, where a library function requires a
197 (char*) argument. This is especially the case in wxWindows,
198 where most char-arguments are (char*) and not (const char*).
199 this macro should only be used in such cases and NOT to
200 modify the internal data. The standard type conversion function
201 of wxString returns a '(const char*)'.
202 The conventional way would be 'function((char*)string.Chars())'.
203 With the macro this can be achieved by 'function(wxCHARARG(string))'.
204 Whis makes it clearer that the usage should be confined
205 to arguments. See below for examples.
206
207 \begin{verbatim}
208 #define wxCHARARG(s) ((char*)(s).Chars())
209 \end{verbatim}
210
211 \end{itemize}
212
213 \subsection{Function calls}
214
215 When using wxString objects as parameters to other functions you should
216 note the following:
217
218 \begin{verbatim}
219 void f1(const char *s){}
220 void f2(char *s){}
221
222 main(){
223 wxString aString;
224 f1(aString); // ok
225 f2(aString); // error
226 f2(wxCHARARG(aString)); // ok
227 printf("%s",aString); // NO compilation error, but a runtime error.
228 printf("%s",aString.Chars()); // ok
229 printf("%s",wxCHARARG(aString)); // ok
230 }
231 \end{verbatim}
232
233 \subsection{Header files}
234
235 For DOS and UNIX we use a stub-headerfile {\tt include/base/wxstring.h}\rtfsp
236 which includes the two headerfiles in the {\tt contrib/wxstring} directory,
237 namely {\tt contrib/wxstring/wxstrgnu.h} and {\tt contrib/wxstring/wxregex.h}.
238 If there is a headerfile {\tt contrib/wxstring/wxstring.h}, please
239 delete it. It will cause problems in the VMS compilation.
240
241 For VMS we have to do an addition due to the not very intelligent inclusion mechanism
242 of the VMS C++ compiler:
243 In the VMS-Makefile, the include-file search path is augmented with the
244 {\tt contrib/wxstring} directory, so that the correct headerfiles
245 can be included.
246
247 So you have only to specify
248
249 \begin{verbatim}
250 #define USE_GNU_WXSTRING 1
251 \end{verbatim}
252
253 in {\tt include/base/wx\_setup.h} to use the wxString class.
254
255 \subsection{Test program}
256
257 Stefan Hammes has included a test program {\tt test.cc} in the contrib/wxstring directory for many features
258 of wxString and wxRegex. It also tests Stefan's extensions.
259 When running the compiled program, there should
260 be NO assert-errors if everything is OK. When compiling the test
261 program, you can ignore warnings about unused variables. They
262 occur because Stefan has used a special method of initializing all
263 variables to the same start values before each test.
264
265 \subsection{Compilers}
266
267 wxString and wxRegex have been compiled successfully with the following
268 compilers (it should work on nearly every C++ compiler):
269
270 \begin{itemize}\itemsep=0pt
271 \item PC MS-Visual C++ 1.0, 1.5
272 \item UNIX gcc v2.6.3
273 \item UNIX Sun SunPro compiler under Solaris 2.x
274 \item VMS DEC C++ compiler (on VAX and AXP)
275 \end{itemize}
276
277 Warnings about type conversion or assignments can be ignored.
278
279 \subsection{GNU Documentation}
280
281 Below is the original GNU wxString and wxRegex
282 documentation. It describes most functions of the classes.
283 The function names have been capitalized to be consistent with
284 the wxWindows naming scheme. The examples are integrated into the test program.
285
286 Copyright (C) 1988, 1991, 1992 Free Software Foundation, Inc.
287
288 Permission is granted to make and distribute verbatim copies of this
289 manual provided the copyright notice and this permission notice are
290 preserved on all copies.
291
292 Permission is granted to copy and distribute modified versions of
293 this manual under the conditions for verbatim copying, provided also
294 that the section entitled "GNU Library General Public License" is
295 included exactly as in the original, and provided that the entire
296 resulting derived work is distributed under the terms of a permission
297 notice identical to this one.
298
299 Permission is granted to copy and distribute translations of this
300 manual into another language, under the above conditions for modified
301 versions, except that the section entitled "GNU Library General Public
302 License" and this permission notice may be included in translations
303 approved by the Free Software Foundation instead of in the original
304 English.
305
306 \subsubsection{The wxString class}
307
308 The `wxString' class is designed to extend GNU C++ to support string
309 processing capabilities similar to those in languages like Awk. The
310 class provides facilities that ought to be convenient and efficient
311 enough to be useful replacements for `char*' based processing via the C
312 string library (i.e., `strcpy, strcmp,' etc.) in many applications.
313 Many details about wxString representations are described in the
314 Representation section.
315
316 A separate `wxSubString' class supports substring extraction and
317 modification operations. This is implemented in a way that user
318 programs never directly construct or represent substrings, which are
319 only used indirectly via wxString operations.
320
321 Another separate class, `wxRegex' is also used indirectly via wxString
322 operations in support of regular expression searching, matching, and the
323 like. The wxRegex class is based entirely on the GNU Emacs regex
324 functions. See \helpref{Regular Expressions}{regularexpressions}
325 for a full explanation of regular expression syntax. (For
326 implementation details, see the internal documentation in files
327 {\tt wxregex.h} and {\tt wxregex.cc}).
328
329 \subsubsection{Constructor examples}
330
331 Strings are initialized and assigned as in the following examples:
332
333 {\tt wxString x;}
334 Set x to the nil string. This is different from the original GNU code
335 which sets a strings also to nil when it is assign 0 or "".
336
337 {\tt wxString x = "Hello"; wxString y("Hello");}
338 Set x and y to a copy of the string "Hello".
339
340 {\tt wxString x = 'A'; wxString y('A');}
341 Set x and y to the string value "A".
342
343 {\tt wxString u = x; wxString v(x);}
344 Set u and v to the same string as wxString x
345
346 {\tt wxString u = x.At(1,4); wxString v(x.At(1,4));}
347 Set u and v to the length 4 substring of x starting at position 1
348 (counting indexes from 0).
349
350 {\tt wxString x("abc", 2);}
351 Sets x to "ab", i.e., the first 2 characters of "abc".
352
353 There are no directly accessible forms for declaring wxSubString
354 variables.
355
356 The declaration \verb$wxRegex r("[a-zA-Z_][a-zA-Z0-9_]*");$ creates
357 compiled regular expression suitable for use in wxString operations
358 described below. (In this case, one that matches any C++ identifier).
359 The first argument may also be a wxString. Be careful in distinguishing
360 the role of backslashes in quoted GNU C++ `char*' constants versus those
361 in Regexes. For example, a wxRegex that matches either one or more tabs
362 or all strings beginning with "ba" and ending with any number of
363 occurrences of "na" could be declared as
364
365 \begin{verbatim}
366 wxRegex r = "\\(\t+\\)\\|\\(ba\\(na\\)*\\)"
367 \end{verbatim}
368
369 Note that only one backslash is needed
370 to signify the tab, but two are needed for the parenthesization and
371 virgule, since the GNU C++ lexical analyzer decodes and strips
372 backslashes before they are seen by wxRegex.
373
374 There are three additional optional arguments to the wxRegex
375 constructor that are less commonly useful:
376
377 {\tt fast (default 0)}
378 `fast' may be set to true (1) if the wxRegex should be
379 "fast-compiled". This causes an additional compilation step that
380 is generally worthwhile if the wxRegex will be used many times.
381
382 {\tt bufsize (default max(40, length of the string))}
383 This is an estimate of the size of the internal compiled
384 expression. Set it to a larger value if you know that the
385 expression will require a lot of space. If you do not know, do not
386 worry: realloc is used if necessary.
387
388 {\tt transtable (default none == 0)}
389 The address of a byte translation table (a char[256]) that
390 translates each character before matching.
391
392 As a convenience, several Regexes are predefined and usable in any
393 program. Here are their declarations from {\tt wxString.h}.
394 \begin{verbatim}
395 extern wxRegex RXwhite; // = "[ \n\t]+"
396 extern wxRegex RXint; // = "-?[0-9]+"
397 extern wxRegex RXdouble; // = "-?\\(\\([0-9]+\\.[0-9]*\\)\\|
398 // \\([0-9]+\\)\\|
399 // \\(\\.[0-9]+\\)\\)
400 // \\([eE][---+]?[0-9]+\\)?"
401 extern wxRegex RXalpha; // = "[A-Za-z]+"
402 extern wxRegex RXlowercase; // = "[a-z]+"
403 extern wxRegex RXuppercase; // = "[A-Z]+"
404 extern wxRegex RXalphanum; // = "[0-9A-Za-z]+"
405 extern wxRegex RXidentifier; // = "[A-Za-z_][A-Za-z0-9_]*"
406 \end{verbatim}
407
408 \subsubsection{Examples}
409
410 Most {\tt wxString} class capabilities are best shown via example. The
411 examples below use the following declarations.
412
413 \begin{verbatim}
414 wxString x = "Hello";
415 wxString y = "world";
416 wxString n = "123";
417 wxString z;
418 char *s = ",";
419 wxString lft, mid, rgt;
420 wxRegex r = "e[a-z]*o";
421 wxRegex r2("/[a-z]*/");
422 char c;
423 int i, pos, len;
424 double f;
425 wxString words[10];
426 words[0] = "a";
427 words[1] = "b";
428 words[2] = "c";
429 \end{verbatim}
430
431 \subsubsection{Comparing, Searching and Matching examples}
432
433 The usual lexicographic relational operators (`==, !=, <, <=, >, >=')
434 are defined. A functional form `compare(wxString, wxString)' is also
435 provided, as is `fcompare(wxString, wxString)', which compares Strings
436 without regard for upper vs. lower case.
437
438 All other matching and searching operations are based on some form
439 of the (non-public) `match' and `search' functions. `match' and
440 `search' differ in that `match' attempts to match only at the given
441 starting position, while `search' starts at the position, and then
442 proceeds left or right looking for a match. As seen in the following
443 examples, the second optional `startpos' argument to functions using
444 `match' and `search' specifies the starting position of the search: If
445 non-negative, it results in a left-to-right search starting at position
446 `startpos', and if negative, a right-to-left search starting at
447 position `x.Length() + startpos'. In all cases, the index returned is
448 that of the beginning of the match, or -1 if there is no match.
449
450 Three wxString functions serve as front ends to `search' and `match'.
451 `index' performs a search, returning the index, `matches' performs a
452 match, returning nonzero (actually, the length of the match) on success,
453 and `contains' is a boolean function performing either a search or
454 match, depending on whether an index argument is provided:
455
456 {\tt x.Index("lo")}
457 Returns the zero-based index of the leftmost occurrence of
458 substring "lo" (3, in this case). The argument may be a wxString,
459 wxSubString, char, char*, or wxRegex.
460
461 {\tt x.Index("l", 2)}
462 Returns the index of the first of the leftmost occurrence of "l"
463 found starting the search at position x[2], or 2 in this case.
464
465 {\tt x.Index("l", -1)}
466 Returns the index of the rightmost occurrence of "l", or 3 here.
467
468 {\tt x.Index("l", -3)}
469 Returns the index of the rightmost occurrence of "l" found by
470 starting the search at the 3rd to the last position of x,
471 returning 2 in this case.
472
473 {\tt pos = r.Search("leo", 3, len, 0)}
474 Returns the index of r in the {\tt char*} string of length 3, starting
475 at position 0, also placing the length of the match in reference
476 parameter len.
477
478 {\tt x.Contains("He")}
479 Returns nonzero if the wxString x contains the substring "He". The
480 argument may be a wxString, wxSubString, char, char*, or wxRegex.
481
482 {\tt x.Contains("el", 1)}
483 Returns nonzero if x contains the substring "el" at position 1.
484 As in this example, the second argument to `contains', if present,
485 means to match the substring only at that position, and not to
486 search elsewhere in the string.
487
488 {\tt x.Contains(RXwhite);}
489 Returns nonzero if x contains any whitespace (space, tab, or
490 newline). Recall that `RXwhite' is a global whitespace wxRegex.
491
492 {\tt x.Matches("lo", 3)}
493 Returns nonzero if x starting at position 3 exactly matches "lo",
494 with no trailing characters (as it does in this example).
495
496 {\tt x.Matches(r)}
497 Returns nonzero if wxString x as a whole matches wxRegex r.
498
499 {\tt int f = x.Freq("l")}
500 Returns the number of distinct, nonoverlapping matches to the
501 argument (2 in this case).
502
503 \subsubsection{Substring extraction examples}
504
505 Substrings may be extracted via the `at', `before', `through',
506 `from', and `after' functions. These behave as either lvalues or
507 rvalues.
508
509 {\tt z = x.At(2, 3)}
510 Sets wxString z to be equal to the length 3 substring of wxString x
511 starting at zero-based position 2, setting z to "llo" in this
512 case. A nil wxString is returned if the arguments don't make sense.
513
514 {\tt x.At(2, 2) = "r"}
515 Sets what was in positions 2 to 3 of x to "r", setting x to "Hero"
516 in this case. As indicated here, wxSubString assignments may be of
517 different lengths.
518
519 {\tt x.At("He") = "je";}
520 x("He") is the substring of x that matches the first occurrence of
521 it's argument. The substitution sets x to "jello". If "He" did not
522 occur, the substring would be nil, and the assignment would have
523 no effect.
524
525 {\tt x.At("l", -1) = "i";}
526 Replaces the rightmost occurrence of "l" with "i", setting x to
527 "Helio".
528
529 {\tt z = x.At(r)}
530 Sets wxString z to the first match in x of wxRegex r, or "ello" in this
531 case. A nil wxString is returned if there is no match.
532
533 {\tt z = x.Before("o")}
534 Sets z to the part of x to the left of the first occurrence of
535 "o", or "Hell" in this case. The argument may also be a wxString,
536 wxSubString, or wxRegex. (If there is no match, z is set to "".)
537
538 {\tt x.Before("ll") = "Bri";}
539 Sets the part of x to the left of "ll" to "Bri", setting x to
540 "Brillo".
541
542 {\tt z = x.Before(2)}
543 Sets z to the part of x to the left of x[2], or "He" in this case.
544
545 {\tt z = x.After("Hel")}
546 Sets z to the part of x to the right of "Hel", or "lo" in this
547 case.
548
549 {\tt z = x.Through("el")}
550 Sets z to the part of x up and including "el", or "Hel" in this
551 case.
552
553 {\tt z = x.From("el")}
554 Sets z to the part of x from "el" to the end, or "ello" in this
555 case.
556
557 {\tt x.After("Hel") = "p";}
558 Sets x to "Help";
559
560 {\tt z = x.After(3)}
561 Sets z to the part of x to the right of x[3] or "o" in this case.
562
563 {\tt z = " ab c"; z = z.After(RXwhite)}
564 Sets z to the part of its old string to the right of the first
565 group of whitespace, setting z to "ab c"; Use GSub(below) to strip
566 out multiple occurrences of whitespace or any pattern.
567
568 {\tt x[0] = 'J';}
569 Sets the first element of x to 'J'. x[i] returns a reference to
570 the ith element of x, or triggers an error if i is out of range.
571
572 {\tt CommonPrefix(x, "Help")}
573 Returns the wxString containing the common prefix of the two Strings
574 or "Hel" in this case.
575
576 {\tt CommonSuffix(x, "to")}
577 Returns the wxString containing the common suffix of the two Strings
578 or "o" in this case.
579
580 \subsubsection{Concatenation examples}
581
582 {\tt z = x + s + ' ' + y.At("w") + y.After("w") + ".";}
583 Sets z to "Hello, world."
584
585 {\tt x += y;}
586 Sets x to "Helloworld".
587
588 {\tt Cat(x, y, z)}
589 A faster way to say z = x + y.
590
591 {\tt Cat(z, y, x, x)}
592 Double concatenation; A faster way to say x = z + y + x.
593
594 {\tt y.Prepend(x);}
595 A faster way to say y = x + y.
596
597 {\tt z = Replicate(x, 3);}
598 Sets z to "HelloHelloHello".
599
600 {\tt z = Join(words, 3, "/")}
601 Sets z to the concatenation of the first 3 Strings in wxString array
602 words, each separated by "/", setting z to "a/b/c" in this case.
603 The last argument may be "" or 0, indicating no separation.
604
605 \subsubsection{Other manipulation examples}
606
607 {\tt z = "this string has five words"; i = Split(z, words, 10, RXwhite);}
608 Sets up to 10 elements of wxString array words to the parts of z
609 separated by whitespace, and returns the number of parts actually
610 encountered (5 in this case). Here, words[0] = "this", words[1] =
611 "string", etc. The last argument may be any of the usual. If
612 there is no match, all of z ends up in words[0]. The words array
613 is *not* dynamically created by split.
614
615 {\tt int nmatches x.GSub("l","ll")}
616 Substitutes all original occurrences of "l" with "ll", setting x
617 to "Hellllo". The first argument may be any of the usual,
618 including wxRegex. If the second argument is "" or 0, all
619 occurrences are deleted. gsub returns the number of matches that
620 were replaced.
621
622 {\tt z = x + y; z.Del("loworl");}
623 Deletes the leftmost occurrence of "loworl" in z, setting z to
624 "Held".
625
626 {\tt z = Reverse(x)}
627 Sets z to the reverse of x, or "olleH".
628
629 {\tt z = Upcase(x)}
630 Sets z to x, with all letters set to uppercase, setting z to
631 "HELLO".
632
633 {\tt z = Downcase(x)}
634 Sets z to x, with all letters set to lowercase, setting z to
635 "hello"
636
637 {\tt z = Capitalize(x)}
638 Sets z to x, with the first letter of each word set to uppercase,
639 and all others to lowercase, setting z to "Hello"
640
641 {\tt x.Reverse(), x.Upcase(), x.Downcase(), x.Capitalize()}
642 in-place, self-modifying versions of the above.
643
644 \subsubsection{Reading, Writing and Conversion examples}
645
646 {\tt cout << x}
647 Writes out x.
648
649 {\tt cout << x.At(2, 3)}
650 Writes out the substring "llo".
651
652 {\tt cin >> x}
653 Reads a whitespace-bounded string into x.
654
655 {\tt x.Length()}
656 Returns the length of wxString x (5, in this case).
657
658 {\tt s = (const char*)x}
659 Can be used to extract the `char*' char array. This coercion is
660 useful for sending a wxString as an argument to any function
661 expecting a `const char*' argument (like `atoi', and
662 `File::open'). This operator must be used with care, since the
663 conversion returns a pointer to `wxString' internals without copying
664 the characters: The resulting `(char*)' is only valid until the
665 next wxString operation, and you must not modify it. (The
666 conversion is defined to return a const value so that GNU C++ will
667 produce warning and/or error messages if changes are attempted.)
668
669 \subsection{Regular Expressions}\label{regularexpressions}
670
671 The following are extracts from GNU documentation.
672
673 \subsubsection{Regular Expression Overview}
674
675 Regular expression matching allows you to test whether a string fits
676 into a specific syntactic shape. You can also search a string for a
677 substring that fits a pattern.
678
679 A regular expression describes a set of strings. The simplest case
680 is one that describes a particular string; for example, the string
681 `foo' when regarded as a regular expression matches `foo' and nothing
682 else. Nontrivial regular expressions use certain special constructs
683 so that they can match more than one string. For example, the
684 regular expression `foo$\backslash$|bar' matches either the string `foo' or the
685 string `bar'; the regular expression `c[ad]*r' matches any of the
686 strings `cr', `car', `cdr', `caar', `cadddar' and all other such
687 strings with any number of `a''s and `d''s.
688
689 The first step in matching a regular expression is to compile it.
690 You must supply the pattern string and also a pattern buffer to hold
691 the compiled result. That result contains the pattern in an internal
692 format that is easier to use in matching.
693
694 Having compiled a pattern, you can match it against strings. You can
695 match the compiled pattern any number of times against different
696 strings.
697
698 \subsubsection{Syntax of Regular Expressions}
699
700 Regular expressions have a syntax in which a few characters are
701 special constructs and the rest are "ordinary". An ordinary
702 character is a simple regular expression which matches that character
703 and nothing else. The special characters are `\verb+\$+', `\verb+^+', `.', `*',
704 `+', `?', `[', `]' and `$\backslash$'. Any other character appearing in a
705 regular expression is ordinary, unless a `$\backslash$' precedes it.
706
707 For example, `f' is not a special character, so it is ordinary, and
708 therefore `f' is a regular expression that matches the string `f' and
709 no other string. (It does *not* match the string `ff'.) Likewise,
710 `o' is a regular expression that matches only `o'.
711
712 Any two regular expressions A and B can be concatenated. The result
713 is a regular expression which matches a string if A matches some
714 amount of the beginning of that string and B matches the rest of the
715 string.
716
717 As a simple example, we can concatenate the regular expressions `f'
718 and `o' to get the regular expression `fo', which matches only the
719 string `fo'. Still trivial.
720
721 Note: for Unix compatibility, special characters are treated as
722 ordinary ones if they are in contexts where their special meanings
723 make no sense. For example, `*foo' treats `*' as ordinary since
724 there is no preceding expression on which the `*' can act. It is
725 poor practice to depend on this behavior; better to quote the special
726 character anyway, regardless of where is appears.
727
728 The following are the characters and character sequences which have
729 special meaning within regular expressions. Any character not
730 mentioned here is not special; it stands for exactly itself for the
731 purposes of searching and matching.
732
733 \begin{itemize}
734 \itemsep=0pt
735
736 \item \rtfsp
737 {\tt .} is a special character that matches anything except a newline.
738 Using concatenation, we can make regular expressions like {\tt a.b}
739 which matches any three-character string which begins with {\tt a}
740 and ends with {\tt b}.
741
742 \item \rtfsp
743 {\tt *} is not a construct by itself; it is a suffix, which means the
744 preceding regular expression is to be repeated as many times as
745 possible. In {\tt fo*}, the {\tt *} applies to the {\tt o}, so {\tt fo*}
746 matches {\tt f} followed by any number of {\tt o}'s.
747
748 The case of zero {\tt o}'s is allowed: {\tt fo*} does match {\tt f}.
749
750 {\tt *} always applies to the *smallest* possible preceding
751 expression. Thus, {\tt fo*} has a repeating {\tt o}, not a repeating
752 {\tt fo}.
753
754 The matcher processes a {\tt *} construct by matching, immediately,
755 as many repetitions as can be found. Then it continues with the
756 rest of the pattern. If that fails, backtracking occurs,
757 discarding some of the matches of the {\tt *}'d construct in case
758 that makes it possible to match the rest of the pattern. For
759 example, matching {\tt c$[$ad$]$*ar} against the string {\tt caddaar}, the
760 {\tt $[$ad$]$*} first matches {\tt addaa}, but this does not allow the next
761 {\tt a} in the pattern to match. So the last of the matches of
762 {\tt $[$ad$]$} is undone and the following {\tt a} is tried again. Now it
763 succeeds.
764
765 \item \rtfsp
766 {\tt +} is like {\tt *} except that at least one match for the preceding
767 pattern is required for {\tt +}. Thus, {\tt c$[$ad$]$+r} does not match
768 {\tt cr} but does match anything else that {\tt c$[$ad$]$*r} would match.
769
770 \item \rtfsp
771 {\tt ?} is like {\tt *} except that it allows either zero or one match
772 for the preceding pattern. Thus, {\tt c$[$ad$]$?r} matches {\tt cr} or
773 {\tt car} or {\tt cdr}, and nothing else.
774
775 \item \rtfsp
776 {\tt $[$} begins a "character set", which is terminated by a {\tt $]$}. In
777 the simplest case, the characters between the two form the set.
778 Thus, {\tt $[$ad$]$} matches either {\tt a} or {\tt d}, and {\tt $[$ad$]$*} matches any
779 string of {\tt a}'s and {\tt d}'s (including the empty string), from
780 which it follows that {\tt c$[$ad$]$*r} matches {\tt car}, etc.
781
782 Character ranges can also be included in a character set, by
783 writing two characters with a {\tt -} between them. Thus, {\tt $[$a-z$]$}
784 matches any lower-case letter. Ranges may be intermixed freely
785 with individual characters, as in {\tt $[$a-z\$\%.$]$}, which matches any
786 lower case letter or {\tt \$}, {\tt \%} or period.
787
788 Note that the usual special characters are not special any more
789 inside a character set. A completely different set of special
790 characters exists inside character sets: {\tt $]$}, {\tt -} and \verb$^$.
791
792 To include a {\tt $]$} in a character set, you must make it the first
793 character. For example, {\tt $[$$]$a$]$} matches {\tt $]$} or {\tt a}. To include
794 a {\tt -}, you must use it in a context where it cannot possibly
795 indicate a range: that is, as the first character, or
796 immediately after a range.
797
798 \item \rtfsp
799 \verb$[^$ begins a "complement character set", which matches any
800 character except the ones specified. Thus, \verb$[^a-z0-9A-Z]$
801 matches all characters {\it except} letters and digits.
802
803 \item \rtfsp
804 \verb$^$ is not special in a character set unless it is the first
805 character. The character following the \verb$^$ is treated as if it
806 were first (it may be a {\tt -} or a {\tt $]$}).
807
808 \verb$^$ is a special character that matches the empty string -- but only
809 if at the beginning of a line in the text being matched.
810 Otherwise it fails to match anything. Thus, \verb$^foo$ matches a
811 {\tt foo} which occurs at the beginning of a line.
812
813 \item \rtfsp
814 {\tt \$}
815 is similar to \verb$^$ but matches only at the end of a line. Thus,
816 {\tt xx*\$} matches a string of one or more {\tt x}'s at the end of a line.
817
818 \item \rtfsp
819 {\tt $\backslash$}
820 has two functions: it quotes the above special characters
821 (including {\tt $\backslash$}), and it introduces additional special constructs.
822
823 Because {\tt $\backslash$} quotes special characters, {\tt $\backslash$\$} is a regular
824 expression which matches only {\tt \$}, and {\tt $\backslash$$[$} is a regular
825 expression which matches only {\tt $[$}, and so on.
826
827 For the most part, {\tt $\backslash$} followed by any character matches only
828 that character. However, there are several exceptions:
829 characters which, when preceded by {\tt $\backslash$}, are special constructs.
830 Such characters are always ordinary when encountered on their own.
831
832 No new special characters will ever be defined. All extensions
833 to the regular expression syntax are made by defining new
834 two-character constructs that begin with {\tt $\backslash$}.
835
836 \item \rtfsp
837 {\tt $\backslash$|}
838 specifies an alternative. Two regular expressions A and B with
839 {\tt $\backslash$|} in between form an expression that matches anything that
840 either A or B will match.
841
842 Thus, {\tt foo$\backslash$|bar} matches either {\tt foo} or {\tt bar} but no other
843 string.
844
845 {\tt $\backslash$|} applies to the largest possible surrounding expressions.
846 Only a surrounding {\tt $\backslash$( ... $\backslash$)} grouping can limit the grouping
847 power of {\tt $\backslash$|}.
848
849 Full backtracking capability exists when multiple {\tt $\backslash$|}'s are used.
850
851 \item \rtfsp
852 {\tt $\backslash$( ... $\backslash$)}
853 is a grouping construct that serves three purposes:
854 \begin{enumerate}
855 \item To enclose a set of {\tt $\backslash$|} alternatives for other operations.
856 Thus, {\tt $\backslash$(foo$\backslash$|bar$\backslash$)x} matches either {\tt foox} or {\tt barx}.
857 \item To enclose a complicated expression for the postfix {\tt *} to
858 operate on. Thus, {\tt ba$\backslash$(na$\backslash$)*} matches {\tt bananana}, etc.,
859 with any (zero or more) number of {\tt na}'s.
860 \item To mark a matched substring for future reference.
861 \end{enumerate}
862
863 This last application is not a consequence of the idea of a
864 parenthetical grouping; it is a separate feature which happens
865 to be assigned as a second meaning to the same {\tt $\backslash$( ... $\backslash$)}
866 construct because there is no conflict in practice between the
867 two meanings. Here is an explanation of this feature:
868
869 \item \rtfsp
870 {\tt $\backslash$DIGIT}
871 After the end of a {\tt $\backslash$( ... $\backslash$)} construct, the matcher remembers
872 the beginning and end of the text matched by that construct.
873 Then, later on in the regular expression, you can use {\tt $\backslash$}
874 followed by DIGIT to mean "match the same text matched the
875 DIGIT'th time by the {\tt $\backslash$( ... $\backslash$)} construct." The {\tt $\backslash$( ... $\backslash$)}
876 constructs are numbered in order of commencement in the regexp.
877
878 The strings matching the first nine {\tt $\backslash$( ... $\backslash$)} constructs
879 appearing in a regular expression are assigned numbers 1 through
880 9 in order of their beginnings. {\tt $\backslash$1} through {\tt $\backslash$9} may be used
881 to refer to the text matched by the corresponding {\tt $\backslash$( ... $\backslash$)}
882 construct.
883
884 For example, {\tt $\backslash$(.*$\backslash$)$\backslash$1} matches any string that is composed of
885 two identical halves. The {\tt $\backslash$(.*$\backslash$)} matches the first half,
886 which may be anything, but the {\tt $\backslash$1} that follows must match the
887 same exact text.
888
889 \item \rtfsp
890 {\tt $\backslash$b}
891 matches the empty string, but only if it is at the beginning or
892 end of a word. Thus, {\tt $\backslash$bfoo$\backslash$b} matches any occurrence of {\tt foo}
893 as a separate word. {\tt $\backslash$bball$\backslash$(s$\backslash$|$\backslash$)$\backslash$b} matches {\tt ball} or {\tt balls}
894 as a separate word.
895
896 \item \rtfsp
897 {\tt $\backslash$B}
898 matches the empty string, provided it is *not* at the beginning
899 or end of a word.
900
901 \item \rtfsp
902 {\tt $\backslash$<}
903 matches the empty string, but only if it is at the beginning of
904 a word.
905
906 \item \rtfsp
907 {\tt $\backslash$>}
908 matches the empty string, but only if it is at the end of a word.
909
910 \item \rtfsp
911 {\tt $\backslash$w}
912 matches any word-constituent character.
913
914 \item \rtfsp
915 {\tt $\backslash$W}
916 matches any character that is not a word-constituent.
917
918 \end{itemize}
919
920
921
922
923
924 \section{wxString member functions}\label{wxstringcategories}
925
926 \overview{Overview}{wxstringoverview}
927
928 This section describes categories of \helpref{wxString}{wxstring} class
929 member functions.
930
931 TODO: describe each one briefly here.
932
933 {\large {\bf Assigment}}
934
935 \begin{itemize}\itemsep=0pt
936 \item \helpref{wxString::operator $=$}{wxstringoperatorassign}\\
937 \end{itemize}
938
939 {\large {\bf Classification}}
940
941 \begin{itemize}\itemsep=0pt
942 \item \helpref{wxString::IsAscii}{wxstringIsAscii}
943 \item \helpref{wxString::IsWord}{wxstringIsWord}
944 \item \helpref{wxString::IsNumber}{wxstringIsNumber}
945 \item \helpref{wxString::IsNull}{wxstringIsNull}
946 \item \helpref{wxString::IsDefined}{wxstringIsDefined}
947 \end{itemize}
948
949 {\large {\bf Comparisons (case sensitive and insensitive)}}
950
951 \begin{itemize}\itemsep=0pt
952 \item \helpref{wxString::CompareTo}{wxstringCompareTo}
953 \item \helpref{Compare}{wxstringCompare}
954 \item \helpref{FCompare}{wxstringFCompare}
955 \item \helpref{Comparisons}{wxstringComparison}
956 \end{itemize}
957
958 {\large {\bf Composition and Concatenation}}
959
960 \begin{itemize}\itemsep=0pt
961 \item \helpref{wxString::operator $+=$}{wxstringPlusEqual}
962 \item \helpref{wxString::Append}{wxstringAppend}
963 \item \helpref{wxString::Prepend}{wxstringPrepend}
964 \item \helpref{wxString::Cat}{wxstringCat}
965 \item \helpref{operator $+$}{wxstringoperatorplus}
966 \end{itemize}
967
968 {\large {\bf Constructors/Destructors}}
969
970 \begin{itemize}\itemsep=0pt
971 \item \helpref{wxString::wxString}{wxstringconstruct}
972 \item \helpref{wxString::~wxString}{wxstringdestruct}
973 \end{itemize}
974
975 {\large {\bf Conversions}}
976
977 \begin{itemize}
978 \item \helpref{wxString::operator const char *}{wxstringoperatorconstcharpt}
979 \item \helpref{wxString::Chars}{wxstringChars}
980 \item \helpref{wxString::GetData}{wxstringGetData}
981 \end{itemize}
982
983 {\large {\bf Deletion/Insertion}}
984
985 \begin{itemize}\itemsep=0pt
986 \item \helpref{wxString::Del}{wxstringDel}
987 \item \helpref{wxString::Remove}{wxstringRemove}
988 \item \helpref{wxString::Insert}{wxstringInsert}
989 \item \helpref{Split}{wxstringSplit}
990 \item \helpref{Join}{wxstringJoin}
991 \end{itemize}
992
993 {\large {\bf Duplication}}
994
995 \begin{itemize}\itemsep=0pt
996 \item \helpref{wxString::Copy}{wxstringCopy}
997 \item \helpref{wxString::Replicate}{wxstringReplicate}
998 \end{itemize}
999
1000 {\large {\bf Element access}}
1001
1002 \begin{itemize}\itemsep=0pt
1003 \item \helpref{wxString::operator[]}{wxstringoperatorbracket}
1004 \item \helpref{wxString::operator()}{wxstringoperatorparenth}
1005 \item \helpref{wxString::Elem}{wxstringElem}
1006 \item \helpref{wxString::Firstchar}{wxstringFirstchar}
1007 \item \helpref{wxString::Lastchar}{wxstringLastchar}
1008 \end{itemize}
1009
1010 {\large {\bf Extraction of Substrings}}
1011
1012 \begin{itemize}\itemsep=0pt
1013 \item \helpref{wxString::At}{wxstringAt}
1014 \item \helpref{wxString::Before}{wxstringBefore}
1015 \item \helpref{wxString::Through}{wxstringThrough}
1016 \item \helpref{wxString::From}{wxstringFrom}
1017 \item \helpref{wxString::After}{wxstringAfter}
1018 \item \helpref{wxString::SubString}{wxstringSubString}
1019 \end{itemize}
1020
1021 {\large {\bf Input/Output}}
1022
1023 \begin{itemize}\itemsep=0pt
1024 \item \helpref{wxString::sprintf}{wxstringsprintf}
1025 \item \helpref{wxString::operator \cinsert}{wxstringoperatorout}
1026 \item \helpref{wxString::operator \cextract}{wxstringoperatorin}
1027 \item \helpref{wxString::Readline}{wxstringReadline}
1028 \end{itemize}
1029
1030 {\large {\bf Searching/Matching}}
1031
1032 \begin{itemize}\itemsep=0pt
1033 \item \helpref{wxString::Index}{wxstringIndex}
1034 \item \helpref{wxString::Contains}{wxstringContains}
1035 \item \helpref{wxString::Matches}{wxstringMatches}
1036 \item \helpref{wxString::Freq}{wxstringFreq}
1037 \item \helpref{wxString::First}{wxstringFirst}
1038 \item \helpref{wxString::Last}{wxstringLast}
1039 \end{itemize}
1040
1041 {\large {\bf Substitution}}
1042
1043 \begin{itemize}\itemsep=0pt
1044 \item \helpref{wxString::GSub}{wxstringGSub}
1045 \item \helpref{wxString::Replace}{wxstringReplace}
1046 \end{itemize}
1047
1048 {\large {\bf Status}}
1049
1050 \begin{itemize}\itemsep=0pt
1051 \item \helpref{wxString::Length}{wxstringLength}
1052 \item \helpref{wxString::Empty}{wxstringEmpty}
1053 \item \helpref{wxString::Allocation}{wxstringAllocation}
1054 \item \helpref{wxString::IsNull}{wxstringIsNull}
1055 \end{itemize}
1056
1057 {\large {\bf Transformations}}
1058
1059 \begin{itemize}\itemsep=0pt
1060 \item \helpref{wxString::Reverse}{wxstringReverse}
1061 \item \helpref{wxString::Upcase}{wxstringUpcase}
1062 \item \helpref{wxString::UpperCase}{wxstringUpperCase}
1063 \item \helpref{wxString::DownCase}{wxstringDownCase}
1064 \item \helpref{wxString::LowerCase}{wxstringLowerCase}
1065 \item \helpref{wxString::Capitalize}{wxstringCapitalize}
1066 \end{itemize}
1067
1068 {\large {\bf Utilities}}
1069
1070 \begin{itemize}\itemsep=0pt
1071 \item \helpref{wxString::Strip}{wxstringStrip}
1072 \item \helpref{wxString::Error}{wxstringError}
1073 \item \helpref{wxString::OK}{wxstringOK}
1074 \item \helpref{wxString::Alloc}{wxstringAlloc}
1075 \item \helpref{wxCHARARG}{wxstringwxCHARARG}
1076 \item \helpref{CommonPrefix}{wxstringCommonPrefix}
1077 \item \helpref{CommonSuffix}{wxstringCommonSuffix}
1078 \end{itemize}
1079