]>
Commit | Line | Data |
---|---|---|
11ec1f16 VZ |
1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
2 | %% Name: regex.tex | |
3 | %% Purpose: wxRegEx documentation | |
4 | %% Author: Vadim Zeitlin | |
5 | %% Modified by: | |
6 | %% Created: 14.07.01 | |
7 | %% RCS-ID: $Id$ | |
8 | %% Copyright: (c) 2001 Vadim Zeitlin | |
8795498c | 9 | %% License: wxWindows license |
11ec1f16 VZ |
10 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
11 | ||
12 | \section{\class{wxRegEx}}\label{wxregex} | |
13 | ||
0aa7fa9a VS |
14 | wxRegEx represents a regular expression. This class provides support |
15 | for regular expressions matching and also replacement. | |
11ec1f16 | 16 | |
0aa7fa9a VS |
17 | It is built on top of either the system library (if it has support |
18 | for POSIX regular expressions - which is the case of the most modern | |
19 | Unices) or uses the built in Henry Spencer's library. Henry Spencer | |
20 | would appreciate being given credit in the documentation of software | |
21 | which uses his library, but that is not a requirement. | |
22 | ||
23 | Regular expressions, as defined by POSIX, come in two flavours: {\it extended} | |
24 | and {\it basic}. The builtin library also adds a third flavour | |
25 | of expression \helpref{advanced}{wxresyn}, which is not available | |
26 | when using the system library. | |
27 | ||
28 | Unicode is fully supported only when using the builtin library. | |
29 | When using the system library in Unicode mode, the expressions and data | |
30 | are translated to the default 8-bit encoding before being passed to | |
31 | the library. | |
32 | ||
33 | On platforms where a system library is available, the default is to use | |
34 | the builtin library for Unicode builds, and the system library otherwise. | |
35 | It is possible to use the other if preferred by selecting it when building | |
fc2171bd | 36 | the wxWidgets. |
11ec1f16 VZ |
37 | |
38 | \wxheading{Derived from} | |
39 | ||
40 | No base class | |
41 | ||
42 | \wxheading{Data structures} | |
43 | ||
44 | Flags for regex compilation to be used with \helpref{Compile()}{wxregexcompile}: | |
5ef298b3 | 45 | |
11ec1f16 VZ |
46 | \begin{verbatim} |
47 | enum | |
48 | { | |
0aa7fa9a | 49 | // use extended regex syntax |
a43e748a | 50 | wxRE_EXTENDED = 0, |
0aa7fa9a VS |
51 | |
52 | // use advanced RE syntax (built-in regex only) | |
53 | #ifdef wxHAS_REGEX_ADVANCED | |
54 | wxRE_ADVANCED = 1, | |
55 | #endif | |
11ec1f16 VZ |
56 | |
57 | // use basic RE syntax | |
a43e748a | 58 | wxRE_BASIC = 2, |
11ec1f16 VZ |
59 | |
60 | // ignore case in match | |
a43e748a | 61 | wxRE_ICASE = 4, |
11ec1f16 VZ |
62 | |
63 | // only check match, don't set back references | |
a43e748a | 64 | wxRE_NOSUB = 8, |
11ec1f16 VZ |
65 | |
66 | // if not set, treat '\n' as an ordinary character, otherwise it is | |
67 | // special: it is not matched by '.' and '^' and '$' always match | |
a43e748a GT |
68 | // after/before it regardless of the setting of wxRE_NOT[BE]OL |
69 | wxRE_NEWLINE = 16, | |
11ec1f16 VZ |
70 | |
71 | // default flags | |
a43e748a | 72 | wxRE_DEFAULT = wxRE_EXTENDED |
11ec1f16 VZ |
73 | } |
74 | \end{verbatim} | |
75 | ||
76 | Flags for regex matching to be used with \helpref{Matches()}{wxregexmatches}. | |
77 | ||
78 | These flags are mainly useful when doing several matches in a long string | |
7af3ca16 | 79 | to prevent erroneous matches for {\tt '\textasciicircum'} and {\tt '\$'}: |
5ef298b3 | 80 | |
11ec1f16 VZ |
81 | \begin{verbatim} |
82 | enum | |
83 | { | |
84 | // '^' doesn't match at the start of line | |
a43e748a | 85 | wxRE_NOTBOL = 32, |
11ec1f16 VZ |
86 | |
87 | // '$' doesn't match at the end of line | |
a43e748a | 88 | wxRE_NOTEOL = 64 |
11ec1f16 VZ |
89 | } |
90 | \end{verbatim} | |
91 | ||
5ef298b3 VZ |
92 | \wxheading{Examples} |
93 | ||
94 | A bad example of processing some text containing email addresses (the example | |
95 | is bad because the real email addresses can have more complicated form than | |
96 | {\tt user@host.net}): | |
97 | ||
98 | \begin{verbatim} | |
99 | wxString text; | |
100 | ... | |
54d50c6e | 101 | wxRegEx reEmail = wxT("([^@]+)@([[:alnum:].-_].)+([[:alnum:]]+)"); |
5ef298b3 VZ |
102 | if ( reEmail.Matches(text) ) |
103 | { | |
104 | wxString text = reEmail.GetMatch(email); | |
105 | wxString username = reEmail.GetMatch(email, 1); | |
54d50c6e | 106 | if ( reEmail.GetMatch(email, 3) == wxT("com") ) // .com TLD? |
5ef298b3 VZ |
107 | { |
108 | ... | |
109 | } | |
110 | } | |
111 | ||
112 | // or we could do this to hide the email address | |
54d50c6e | 113 | size_t count = reEmail.ReplaceAll(text, wxT("HIDDEN@\\2\\3")); |
5ef298b3 VZ |
114 | printf("text now contains %u hidden addresses", count); |
115 | \end{verbatim} | |
116 | ||
0e10e38d VZ |
117 | \wxheading{Include files} |
118 | ||
119 | <wx/regex.h> | |
120 | ||
a7af285d VZ |
121 | \wxheading{Library} |
122 | ||
123 | \helpref{wxBase}{librarieslist} | |
124 | ||
11ec1f16 VZ |
125 | \latexignore{\rtfignore{\wxheading{Members}}} |
126 | ||
127 | \membersection{wxRegEx::wxRegEx}\label{wxregexwxregex} | |
128 | ||
129 | \func{}{wxRegEx}{\void} | |
130 | ||
131 | Default ctor: use \helpref{Compile()}{wxregexcompile} later. | |
132 | ||
11ec1f16 VZ |
133 | \func{}{wxRegEx}{\param{const wxString\& }{expr}, \param{int }{flags = wxRE\_DEFAULT}} |
134 | ||
135 | Create and compile the regular expression, use | |
136 | \helpref{IsValid}{wxregexisvalid} to test for compilation errors. | |
137 | ||
138 | \membersection{wxRegEx::\destruct{wxRegEx}}\label{wxregexdtor} | |
139 | ||
140 | \func{}{\destruct{wxRegEx}}{\void} | |
141 | ||
142 | dtor not virtual, don't derive from this class | |
143 | ||
144 | \membersection{wxRegEx::Compile}\label{wxregexcompile} | |
145 | ||
146 | \func{bool}{Compile}{\param{const wxString\& }{pattern}, \param{int }{flags = wxRE\_DEFAULT}} | |
147 | ||
cc81d32f | 148 | Compile the string into regular expression, return {\tt true} if ok or {\tt false} |
11ec1f16 VZ |
149 | if string has a syntax error. |
150 | ||
151 | \membersection{wxRegEx::IsValid}\label{wxregexisvalid} | |
152 | ||
153 | \constfunc{bool}{IsValid}{\void} | |
154 | ||
cc81d32f | 155 | Return {\tt true} if this is a valid compiled regular expression, {\tt false} |
11ec1f16 VZ |
156 | otherwise. |
157 | ||
158 | \membersection{wxRegEx::GetMatch}\label{wxregexgetmatch} | |
159 | ||
160 | \constfunc{bool}{GetMatch}{\param{size\_t* }{start}, \param{size\_t* }{len}, \param{size\_t }{index = 0}} | |
161 | ||
162 | Get the start index and the length of the match of the expression | |
163 | (if {\it index} is $0$) or a bracketed subexpression ({\it index} different | |
164 | from $0$). | |
165 | ||
166 | May only be called after successful call to \helpref{Matches()}{wxregexmatches} | |
167 | and only if {\tt wxRE\_NOSUB} was {\bf not} used in | |
168 | \helpref{Compile()}{wxregexcompile}. | |
169 | ||
43e8916f | 170 | Returns {\tt false} if no match or if an error occurred. |
11ec1f16 VZ |
171 | |
172 | \constfunc{wxString}{GetMatch}{\param{const wxString\& }{text}, \param{size\_t }{index = 0}} | |
173 | ||
174 | Returns the part of string corresponding to the match where {\it index} is | |
175 | interpreted as above. Empty string is returned if match failed | |
176 | ||
177 | May only be called after successful call to \helpref{Matches()}{wxregexmatches} | |
178 | and only if {\tt wxRE\_NOSUB} was {\bf not} used in | |
179 | \helpref{Compile()}{wxregexcompile}. | |
180 | ||
86b79b93 VS |
181 | \membersection{wxRegEx::GetMatchCount}\label{wxregexgetmatchcount} |
182 | ||
183 | \constfunc{size\_t}{GetMatchCount}{\void} | |
184 | ||
185 | Returns the size of the array of matches, i.e. the number of bracketed | |
186 | subexpressions plus one for the expression itself, or $0$ on error. | |
187 | ||
188 | May only be called after successful call to \helpref{Compile()}{wxregexcompile}. | |
189 | and only if {\tt wxRE\_NOSUB} was {\bf not} used. | |
190 | ||
11ec1f16 VZ |
191 | \membersection{wxRegEx::Matches}\label{wxregexmatches} |
192 | ||
193 | \constfunc{bool}{Matches}{\param{const wxChar* }{text}, \param{int }{flags = 0}} | |
194 | ||
c9eee7f0 MW |
195 | \constfunc{bool}{Matches}{\param{const wxChar* }{text}, \param{int }{flags}, \param{size\_t }{len}} |
196 | ||
197 | \constfunc{bool}{Matches}{\param{const wxString\& }{text}, \param{int }{flags = 0}} | |
198 | ||
11ec1f16 | 199 | Matches the precompiled regular expression against the string {\it text}, |
cc81d32f | 200 | returns {\tt true} if matches and {\tt false} otherwise. |
11ec1f16 | 201 | |
c9eee7f0 MW |
202 | {\it Flags} may be combination of {\tt wxRE\_NOTBOL} and {\tt wxRE\_NOTEOL}. |
203 | ||
ab0f0edd MW |
204 | Some regex libraries assume that the text given is null terminated, while |
205 | others require the length be given as a separate parameter. Therefore for | |
206 | maximum portability assume that {\it text} cannot contain embedded nulls. | |
207 | ||
208 | When the {\it Matches(const wxChar *text, int flags = 0)} form is used, | |
209 | a {\it wxStrlen()} will be done internally if the regex library requires the | |
210 | length. When using {\it Matches()} in a loop | |
211 | the {\it Matches(text, flags, len)} form can be used instead, making it | |
212 | possible to avoid a {\it wxStrlen()} inside the loop. | |
11ec1f16 VZ |
213 | |
214 | May only be called after successful call to \helpref{Compile()}{wxregexcompile}. | |
215 | ||
216 | \membersection{wxRegEx::Replace}\label{wxregexreplace} | |
217 | ||
218 | \constfunc{int}{Replace}{\param{wxString* }{text}, \param{const wxString\& }{replacement}, \param{size\_t }{maxMatches = 0}} | |
219 | ||
220 | Replaces the current regular expression in the string pointed to by | |
221 | {\it text}, with the text in {\it replacement} and return number of matches | |
222 | replaced (maybe $0$ if none found) or $-1$ on error. | |
223 | ||
6465d401 | 224 | The replacement text may contain back references {\tt $\backslash$number} which will be |
11ec1f16 | 225 | replaced with the value of the corresponding subexpression in the |
6465d401 | 226 | pattern match. {\tt $\backslash$0} corresponds to the entire match and {\tt \&} is a |
11ec1f16 VZ |
227 | synonym for it. Backslash may be used to quote itself or {\tt \&} character. |
228 | ||
229 | {\it maxMatches} may be used to limit the number of replacements made, setting | |
2edb0bde | 230 | it to $1$, for example, will only replace first occurrence (if any) of the |
11ec1f16 VZ |
231 | pattern in the text while default value of $0$ means replace all. |
232 | ||
233 | \membersection{wxRegEx::ReplaceAll}\label{wxregexreplaceall} | |
234 | ||
235 | \constfunc{int}{ReplaceAll}{\param{wxString* }{text}, \param{const wxString\& }{replacement}} | |
236 | ||
2edb0bde | 237 | Replace all occurrences: this is actually a synonym for |
11ec1f16 VZ |
238 | \helpref{Replace()}{wxregexreplace}. |
239 | ||
240 | \wxheading{See also} | |
241 | ||
242 | \helpref{ReplaceFirst}{wxregexreplacefirst} | |
243 | ||
244 | \membersection{wxRegEx::ReplaceFirst}\label{wxregexreplacefirst} | |
245 | ||
246 | \constfunc{int}{ReplaceFirst}{\param{wxString* }{text}, \param{const wxString\& }{replacement}} | |
247 | ||
2edb0bde | 248 | Replace the first occurrence. |
11ec1f16 VZ |
249 | |
250 | \wxheading{See also} | |
251 | ||
252 | \helpref{Replace}{wxregexreplace} | |
253 |