Commit | Line | Data |
---|---|---|
23324ae1 FM |
1 | ///////////////////////////////////////////////////////////////////////////// |
2 | // Name: regex.h | |
e54c96f1 | 3 | // Purpose: interface of wxRegEx |
23324ae1 FM |
4 | // Author: wxWidgets team |
5 | // RCS-ID: $Id$ | |
526954c5 | 6 | // Licence: wxWindows licence |
23324ae1 FM |
7 | ///////////////////////////////////////////////////////////////////////////// |
8 | ||
3ed3a1c8 | 9 | /** |
bbc5b7f8 BP |
10 | @anchor wxRE_FLAGS |
11 | ||
9602ce3d | 12 | Flags for regex compilation to be used with wxRegEx::Compile(). |
3ed3a1c8 BP |
13 | */ |
14 | enum | |
15 | { | |
16 | /** Use extended regex syntax. */ | |
17 | wxRE_EXTENDED = 0, | |
18 | ||
19 | /** Use advanced RE syntax (built-in regex only). */ | |
20 | wxRE_ADVANCED = 1, | |
21 | ||
22 | /** Use basic RE syntax. */ | |
23 | wxRE_BASIC = 2, | |
24 | ||
25 | /** Ignore case in match. */ | |
26 | wxRE_ICASE = 4, | |
27 | ||
28 | /** Only check match, don't set back references. */ | |
29 | wxRE_NOSUB = 8, | |
30 | ||
31 | /** | |
32 | If not set, treat '\n' as an ordinary character, otherwise it is | |
33 | special: it is not matched by '.' and '^' and '$' always match | |
34 | after/before it regardless of the setting of wxRE_NOT[BE]OL. | |
35 | */ | |
36 | wxRE_NEWLINE = 16, | |
37 | ||
38 | /** Default flags.*/ | |
39 | wxRE_DEFAULT = wxRE_EXTENDED | |
40 | }; | |
41 | ||
42 | /** | |
bbc5b7f8 BP |
43 | @anchor wxRE_NOT_FLAGS |
44 | ||
9602ce3d | 45 | Flags for regex matching to be used with wxRegEx::Matches(). |
3ed3a1c8 | 46 | These flags are mainly useful when doing several matches in a long string |
9602ce3d | 47 | to prevent erroneous matches for '^' and '$': |
3ed3a1c8 BP |
48 | */ |
49 | enum | |
50 | { | |
51 | /** '^' doesn't match at the start of line. */ | |
52 | wxRE_NOTBOL = 32, | |
53 | ||
54 | /** '$' doesn't match at the end of line. */ | |
55 | wxRE_NOTEOL = 64 | |
56 | }; | |
57 | ||
23324ae1 FM |
58 | /** |
59 | @class wxRegEx | |
7c913512 | 60 | |
23324ae1 FM |
61 | wxRegEx represents a regular expression. This class provides support |
62 | for regular expressions matching and also replacement. | |
7c913512 | 63 | |
23324ae1 FM |
64 | It is built on top of either the system library (if it has support |
65 | for POSIX regular expressions - which is the case of the most modern | |
66 | Unices) or uses the built in Henry Spencer's library. Henry Spencer | |
67 | would appreciate being given credit in the documentation of software | |
68 | which uses his library, but that is not a requirement. | |
7c913512 | 69 | |
23324ae1 FM |
70 | Regular expressions, as defined by POSIX, come in two flavours: @e extended |
71 | and @e basic. The builtin library also adds a third flavour | |
3ed3a1c8 | 72 | of expression @ref overview_resyntax "advanced", which is not available |
23324ae1 | 73 | when using the system library. |
7c913512 | 74 | |
23324ae1 FM |
75 | Unicode is fully supported only when using the builtin library. |
76 | When using the system library in Unicode mode, the expressions and data | |
77 | are translated to the default 8-bit encoding before being passed to | |
78 | the library. | |
7c913512 | 79 | |
23324ae1 FM |
80 | On platforms where a system library is available, the default is to use |
81 | the builtin library for Unicode builds, and the system library otherwise. | |
82 | It is possible to use the other if preferred by selecting it when building | |
83 | the wxWidgets. | |
7c913512 | 84 | |
23324ae1 FM |
85 | @library{wxbase} |
86 | @category{data} | |
7c913512 | 87 | |
3ed3a1c8 BP |
88 | Examples: |
89 | ||
90 | A bad example of processing some text containing email addresses (the example | |
91 | is bad because the real email addresses can have more complicated form than | |
92 | @c user@host.net): | |
93 | ||
94 | @code | |
95 | wxString text; | |
96 | ... | |
f8ebb70d | 97 | wxRegEx reEmail = "([^@]+)@([[:alnum:].-_].)+([[:alnum:]]+)"; |
3ed3a1c8 BP |
98 | if ( reEmail.Matches(text) ) |
99 | { | |
100 | wxString text = reEmail.GetMatch(email); | |
101 | wxString username = reEmail.GetMatch(email, 1); | |
f8ebb70d | 102 | if ( reEmail.GetMatch(email, 3) == "com" ) // .com TLD? |
3ed3a1c8 BP |
103 | { |
104 | ... | |
105 | } | |
106 | } | |
107 | ||
108 | // or we could do this to hide the email address | |
f8ebb70d | 109 | size_t count = reEmail.ReplaceAll(text, "HIDDEN@\\2\\3"); |
3ed3a1c8 BP |
110 | printf("text now contains %u hidden addresses", count); |
111 | @endcode | |
23324ae1 | 112 | */ |
7c913512 | 113 | class wxRegEx |
23324ae1 FM |
114 | { |
115 | public: | |
3ed3a1c8 BP |
116 | |
117 | /** | |
118 | Default constructor: use Compile() later. | |
119 | */ | |
120 | wxRegEx(); | |
121 | ||
23324ae1 | 122 | /** |
7c913512 | 123 | Create and compile the regular expression, use |
23324ae1 | 124 | IsValid() to test for compilation errors. |
3ed3a1c8 | 125 | |
bbc5b7f8 | 126 | As for the flags, please see @ref wxRE_FLAGS. |
23324ae1 | 127 | */ |
7c913512 | 128 | wxRegEx(const wxString& expr, int flags = wxRE_DEFAULT); |
3ed3a1c8 | 129 | |
23324ae1 FM |
130 | |
131 | /** | |
3ed3a1c8 | 132 | Destructor. It's not virtual, don't derive from this class. |
23324ae1 FM |
133 | */ |
134 | ~wxRegEx(); | |
135 | ||
136 | /** | |
7c913512 | 137 | Compile the string into regular expression, return @true if ok or @false |
23324ae1 | 138 | if string has a syntax error. |
3ed3a1c8 | 139 | |
bbc5b7f8 | 140 | As for the flags, please see @ref wxRE_FLAGS. |
23324ae1 FM |
141 | */ |
142 | bool Compile(const wxString& pattern, int flags = wxRE_DEFAULT); | |
143 | ||
23324ae1 | 144 | /** |
3ed3a1c8 BP |
145 | Get the start index and the length of the match of the expression |
146 | (if @a index is 0) or a bracketed subexpression (@a index different from 0). | |
147 | ||
148 | May only be called after successful call to Matches() and only if @c wxRE_NOSUB | |
149 | was @b not used in Compile(). | |
150 | ||
151 | Returns @false if no match or if an error occurred. | |
152 | ||
23324ae1 | 153 | */ |
328f5751 | 154 | bool GetMatch(size_t* start, size_t* len, size_t index = 0) const; |
3ed3a1c8 BP |
155 | |
156 | /** | |
157 | Returns the part of string corresponding to the match where index is interpreted | |
158 | as above. Empty string is returned if match failed. | |
159 | ||
160 | May only be called after successful call to Matches() and only if @c wxRE_NOSUB | |
161 | was @b not used in Compile(). | |
162 | */ | |
163 | wxString GetMatch(const wxString& text, size_t index = 0) const; | |
23324ae1 FM |
164 | |
165 | /** | |
0824e369 | 166 | Returns the size of the array of matches, i.e.\ the number of bracketed |
23324ae1 | 167 | subexpressions plus one for the expression itself, or 0 on error. |
3ed3a1c8 | 168 | |
23324ae1 FM |
169 | May only be called after successful call to Compile(). |
170 | and only if @c wxRE_NOSUB was @b not used. | |
171 | */ | |
328f5751 | 172 | size_t GetMatchCount() const; |
23324ae1 FM |
173 | |
174 | /** | |
7c913512 | 175 | Return @true if this is a valid compiled regular expression, @false |
23324ae1 FM |
176 | otherwise. |
177 | */ | |
328f5751 | 178 | bool IsValid() const; |
23324ae1 FM |
179 | |
180 | //@{ | |
181 | /** | |
3ed3a1c8 | 182 | Matches the precompiled regular expression against the string @a text, |
23324ae1 | 183 | returns @true if matches and @false otherwise. |
3ed3a1c8 | 184 | |
bbc5b7f8 BP |
185 | @e Flags may be combination of @c wxRE_NOTBOL and @c wxRE_NOTEOL, see |
186 | @ref wxRE_NOT_FLAGS. | |
3ed3a1c8 | 187 | |
23324ae1 FM |
188 | Some regex libraries assume that the text given is null terminated, while |
189 | others require the length be given as a separate parameter. Therefore for | |
4cc4bfaf | 190 | maximum portability assume that @a text cannot contain embedded nulls. |
3ed3a1c8 BP |
191 | |
192 | When the <b>Matches(const wxChar *text, int flags = 0)</b> form is used, | |
193 | a wxStrlen() will be done internally if the regex library requires the | |
194 | length. When using Matches() in a loop the <b>Matches(text, flags, len)</b> | |
195 | form can be used instead, making it possible to avoid a wxStrlen() inside | |
196 | the loop. | |
197 | ||
23324ae1 FM |
198 | May only be called after successful call to Compile(). |
199 | */ | |
328f5751 | 200 | bool Matches(const wxChar* text, int flags = 0) const; |
11e3af6e | 201 | bool Matches(const wxChar* text, int flags, size_t len) const; |
23324ae1 FM |
202 | //@} |
203 | ||
3ed3a1c8 BP |
204 | /** |
205 | Matches the precompiled regular expression against the string @a text, | |
206 | returns @true if matches and @false otherwise. | |
207 | ||
bbc5b7f8 BP |
208 | @e Flags may be combination of @c wxRE_NOTBOL and @c wxRE_NOTEOL, see |
209 | @ref wxRE_NOT_FLAGS. | |
3ed3a1c8 BP |
210 | |
211 | May only be called after successful call to Compile(). | |
212 | */ | |
11e3af6e | 213 | bool Matches(const wxString& text, int flags = 0) const; |
3ed3a1c8 | 214 | |
23324ae1 FM |
215 | /** |
216 | Replaces the current regular expression in the string pointed to by | |
3ed3a1c8 | 217 | @a text, with the text in @a replacement and return number of matches |
23324ae1 | 218 | replaced (maybe 0 if none found) or -1 on error. |
3ed3a1c8 BP |
219 | |
220 | The replacement text may contain back references @c \\number which will be | |
23324ae1 | 221 | replaced with the value of the corresponding subexpression in the |
3ed3a1c8 BP |
222 | pattern match. @c \\0 corresponds to the entire match and @c \& is a |
223 | synonym for it. Backslash may be used to quote itself or @c \& character. | |
224 | ||
4cc4bfaf | 225 | @a maxMatches may be used to limit the number of replacements made, setting |
23324ae1 FM |
226 | it to 1, for example, will only replace first occurrence (if any) of the |
227 | pattern in the text while default value of 0 means replace all. | |
228 | */ | |
229 | int Replace(wxString* text, const wxString& replacement, | |
328f5751 | 230 | size_t maxMatches = 0) const; |
23324ae1 FM |
231 | |
232 | /** | |
7c913512 | 233 | Replace all occurrences: this is actually a synonym for |
23324ae1 | 234 | Replace(). |
3c4f71cc | 235 | |
4cc4bfaf | 236 | @see ReplaceFirst() |
23324ae1 | 237 | */ |
328f5751 | 238 | int ReplaceAll(wxString* text, const wxString& replacement) const; |
23324ae1 FM |
239 | |
240 | /** | |
241 | Replace the first occurrence. | |
242 | */ | |
328f5751 | 243 | int ReplaceFirst(wxString* text, const wxString& replacement) const; |
23324ae1 | 244 | }; |
e54c96f1 | 245 |