]>
Commit | Line | Data |
---|---|---|
23324ae1 FM |
1 | ///////////////////////////////////////////////////////////////////////////// |
2 | // Name: regex.h | |
e54c96f1 | 3 | // Purpose: interface of wxRegEx |
23324ae1 | 4 | // Author: wxWidgets team |
526954c5 | 5 | // Licence: wxWindows licence |
23324ae1 FM |
6 | ///////////////////////////////////////////////////////////////////////////// |
7 | ||
3ed3a1c8 | 8 | /** |
bbc5b7f8 BP |
9 | @anchor wxRE_FLAGS |
10 | ||
9602ce3d | 11 | Flags for regex compilation to be used with wxRegEx::Compile(). |
3ed3a1c8 BP |
12 | */ |
13 | enum | |
14 | { | |
15 | /** Use extended regex syntax. */ | |
16 | wxRE_EXTENDED = 0, | |
17 | ||
18 | /** Use advanced RE syntax (built-in regex only). */ | |
19 | wxRE_ADVANCED = 1, | |
20 | ||
21 | /** Use basic RE syntax. */ | |
22 | wxRE_BASIC = 2, | |
23 | ||
24 | /** Ignore case in match. */ | |
25 | wxRE_ICASE = 4, | |
26 | ||
27 | /** Only check match, don't set back references. */ | |
28 | wxRE_NOSUB = 8, | |
29 | ||
30 | /** | |
31 | If not set, treat '\n' as an ordinary character, otherwise it is | |
32 | special: it is not matched by '.' and '^' and '$' always match | |
33 | after/before it regardless of the setting of wxRE_NOT[BE]OL. | |
34 | */ | |
35 | wxRE_NEWLINE = 16, | |
36 | ||
37 | /** Default flags.*/ | |
38 | wxRE_DEFAULT = wxRE_EXTENDED | |
39 | }; | |
40 | ||
41 | /** | |
bbc5b7f8 BP |
42 | @anchor wxRE_NOT_FLAGS |
43 | ||
9602ce3d | 44 | Flags for regex matching to be used with wxRegEx::Matches(). |
3ed3a1c8 | 45 | These flags are mainly useful when doing several matches in a long string |
9602ce3d | 46 | to prevent erroneous matches for '^' and '$': |
3ed3a1c8 BP |
47 | */ |
48 | enum | |
49 | { | |
50 | /** '^' doesn't match at the start of line. */ | |
51 | wxRE_NOTBOL = 32, | |
52 | ||
53 | /** '$' doesn't match at the end of line. */ | |
54 | wxRE_NOTEOL = 64 | |
55 | }; | |
56 | ||
23324ae1 FM |
57 | /** |
58 | @class wxRegEx | |
7c913512 | 59 | |
23324ae1 FM |
60 | wxRegEx represents a regular expression. This class provides support |
61 | for regular expressions matching and also replacement. | |
7c913512 | 62 | |
23324ae1 FM |
63 | It is built on top of either the system library (if it has support |
64 | for POSIX regular expressions - which is the case of the most modern | |
65 | Unices) or uses the built in Henry Spencer's library. Henry Spencer | |
66 | would appreciate being given credit in the documentation of software | |
67 | which uses his library, but that is not a requirement. | |
7c913512 | 68 | |
23324ae1 FM |
69 | Regular expressions, as defined by POSIX, come in two flavours: @e extended |
70 | and @e basic. The builtin library also adds a third flavour | |
3ed3a1c8 | 71 | of expression @ref overview_resyntax "advanced", which is not available |
23324ae1 | 72 | when using the system library. |
7c913512 | 73 | |
23324ae1 FM |
74 | Unicode is fully supported only when using the builtin library. |
75 | When using the system library in Unicode mode, the expressions and data | |
76 | are translated to the default 8-bit encoding before being passed to | |
77 | the library. | |
7c913512 | 78 | |
23324ae1 FM |
79 | On platforms where a system library is available, the default is to use |
80 | the builtin library for Unicode builds, and the system library otherwise. | |
81 | It is possible to use the other if preferred by selecting it when building | |
82 | the wxWidgets. | |
7c913512 | 83 | |
23324ae1 FM |
84 | @library{wxbase} |
85 | @category{data} | |
7c913512 | 86 | |
3ed3a1c8 BP |
87 | Examples: |
88 | ||
89 | A bad example of processing some text containing email addresses (the example | |
90 | is bad because the real email addresses can have more complicated form than | |
91 | @c user@host.net): | |
92 | ||
93 | @code | |
94 | wxString text; | |
95 | ... | |
f8ebb70d | 96 | wxRegEx reEmail = "([^@]+)@([[:alnum:].-_].)+([[:alnum:]]+)"; |
3ed3a1c8 BP |
97 | if ( reEmail.Matches(text) ) |
98 | { | |
99 | wxString text = reEmail.GetMatch(email); | |
100 | wxString username = reEmail.GetMatch(email, 1); | |
f8ebb70d | 101 | if ( reEmail.GetMatch(email, 3) == "com" ) // .com TLD? |
3ed3a1c8 BP |
102 | { |
103 | ... | |
104 | } | |
105 | } | |
106 | ||
107 | // or we could do this to hide the email address | |
f8ebb70d | 108 | size_t count = reEmail.ReplaceAll(text, "HIDDEN@\\2\\3"); |
3ed3a1c8 BP |
109 | printf("text now contains %u hidden addresses", count); |
110 | @endcode | |
23324ae1 | 111 | */ |
7c913512 | 112 | class wxRegEx |
23324ae1 FM |
113 | { |
114 | public: | |
3ed3a1c8 BP |
115 | |
116 | /** | |
117 | Default constructor: use Compile() later. | |
118 | */ | |
119 | wxRegEx(); | |
120 | ||
23324ae1 | 121 | /** |
7c913512 | 122 | Create and compile the regular expression, use |
23324ae1 | 123 | IsValid() to test for compilation errors. |
3ed3a1c8 | 124 | |
bbc5b7f8 | 125 | As for the flags, please see @ref wxRE_FLAGS. |
23324ae1 | 126 | */ |
7c913512 | 127 | wxRegEx(const wxString& expr, int flags = wxRE_DEFAULT); |
3ed3a1c8 | 128 | |
23324ae1 FM |
129 | |
130 | /** | |
3ed3a1c8 | 131 | Destructor. It's not virtual, don't derive from this class. |
23324ae1 FM |
132 | */ |
133 | ~wxRegEx(); | |
134 | ||
135 | /** | |
7c913512 | 136 | Compile the string into regular expression, return @true if ok or @false |
23324ae1 | 137 | if string has a syntax error. |
3ed3a1c8 | 138 | |
bbc5b7f8 | 139 | As for the flags, please see @ref wxRE_FLAGS. |
23324ae1 FM |
140 | */ |
141 | bool Compile(const wxString& pattern, int flags = wxRE_DEFAULT); | |
142 | ||
23324ae1 | 143 | /** |
3ed3a1c8 BP |
144 | Get the start index and the length of the match of the expression |
145 | (if @a index is 0) or a bracketed subexpression (@a index different from 0). | |
146 | ||
147 | May only be called after successful call to Matches() and only if @c wxRE_NOSUB | |
148 | was @b not used in Compile(). | |
149 | ||
150 | Returns @false if no match or if an error occurred. | |
151 | ||
23324ae1 | 152 | */ |
328f5751 | 153 | bool GetMatch(size_t* start, size_t* len, size_t index = 0) const; |
3ed3a1c8 BP |
154 | |
155 | /** | |
156 | Returns the part of string corresponding to the match where index is interpreted | |
157 | as above. Empty string is returned if match failed. | |
158 | ||
159 | May only be called after successful call to Matches() and only if @c wxRE_NOSUB | |
160 | was @b not used in Compile(). | |
161 | */ | |
162 | wxString GetMatch(const wxString& text, size_t index = 0) const; | |
23324ae1 FM |
163 | |
164 | /** | |
0824e369 | 165 | Returns the size of the array of matches, i.e.\ the number of bracketed |
23324ae1 | 166 | subexpressions plus one for the expression itself, or 0 on error. |
3ed3a1c8 | 167 | |
23324ae1 FM |
168 | May only be called after successful call to Compile(). |
169 | and only if @c wxRE_NOSUB was @b not used. | |
170 | */ | |
328f5751 | 171 | size_t GetMatchCount() const; |
23324ae1 FM |
172 | |
173 | /** | |
7c913512 | 174 | Return @true if this is a valid compiled regular expression, @false |
23324ae1 FM |
175 | otherwise. |
176 | */ | |
328f5751 | 177 | bool IsValid() const; |
23324ae1 FM |
178 | |
179 | //@{ | |
180 | /** | |
3ed3a1c8 | 181 | Matches the precompiled regular expression against the string @a text, |
23324ae1 | 182 | returns @true if matches and @false otherwise. |
3ed3a1c8 | 183 | |
bbc5b7f8 BP |
184 | @e Flags may be combination of @c wxRE_NOTBOL and @c wxRE_NOTEOL, see |
185 | @ref wxRE_NOT_FLAGS. | |
3ed3a1c8 | 186 | |
23324ae1 FM |
187 | Some regex libraries assume that the text given is null terminated, while |
188 | others require the length be given as a separate parameter. Therefore for | |
4cc4bfaf | 189 | maximum portability assume that @a text cannot contain embedded nulls. |
3ed3a1c8 BP |
190 | |
191 | When the <b>Matches(const wxChar *text, int flags = 0)</b> form is used, | |
192 | a wxStrlen() will be done internally if the regex library requires the | |
193 | length. When using Matches() in a loop the <b>Matches(text, flags, len)</b> | |
194 | form can be used instead, making it possible to avoid a wxStrlen() inside | |
195 | the loop. | |
196 | ||
23324ae1 FM |
197 | May only be called after successful call to Compile(). |
198 | */ | |
328f5751 | 199 | bool Matches(const wxChar* text, int flags = 0) const; |
11e3af6e | 200 | bool Matches(const wxChar* text, int flags, size_t len) const; |
23324ae1 FM |
201 | //@} |
202 | ||
3ed3a1c8 BP |
203 | /** |
204 | Matches the precompiled regular expression against the string @a text, | |
205 | returns @true if matches and @false otherwise. | |
206 | ||
bbc5b7f8 BP |
207 | @e Flags may be combination of @c wxRE_NOTBOL and @c wxRE_NOTEOL, see |
208 | @ref wxRE_NOT_FLAGS. | |
3ed3a1c8 BP |
209 | |
210 | May only be called after successful call to Compile(). | |
211 | */ | |
11e3af6e | 212 | bool Matches(const wxString& text, int flags = 0) const; |
3ed3a1c8 | 213 | |
23324ae1 FM |
214 | /** |
215 | Replaces the current regular expression in the string pointed to by | |
3ed3a1c8 | 216 | @a text, with the text in @a replacement and return number of matches |
23324ae1 | 217 | replaced (maybe 0 if none found) or -1 on error. |
3ed3a1c8 BP |
218 | |
219 | The replacement text may contain back references @c \\number which will be | |
23324ae1 | 220 | replaced with the value of the corresponding subexpression in the |
3ed3a1c8 BP |
221 | pattern match. @c \\0 corresponds to the entire match and @c \& is a |
222 | synonym for it. Backslash may be used to quote itself or @c \& character. | |
223 | ||
4cc4bfaf | 224 | @a maxMatches may be used to limit the number of replacements made, setting |
23324ae1 FM |
225 | it to 1, for example, will only replace first occurrence (if any) of the |
226 | pattern in the text while default value of 0 means replace all. | |
227 | */ | |
228 | int Replace(wxString* text, const wxString& replacement, | |
328f5751 | 229 | size_t maxMatches = 0) const; |
23324ae1 FM |
230 | |
231 | /** | |
7c913512 | 232 | Replace all occurrences: this is actually a synonym for |
23324ae1 | 233 | Replace(). |
3c4f71cc | 234 | |
4cc4bfaf | 235 | @see ReplaceFirst() |
23324ae1 | 236 | */ |
328f5751 | 237 | int ReplaceAll(wxString* text, const wxString& replacement) const; |
23324ae1 FM |
238 | |
239 | /** | |
240 | Replace the first occurrence. | |
241 | */ | |
328f5751 | 242 | int ReplaceFirst(wxString* text, const wxString& replacement) const; |
23324ae1 | 243 | }; |
e54c96f1 | 244 |