]>
Commit | Line | Data |
---|---|---|
23324ae1 FM |
1 | ///////////////////////////////////////////////////////////////////////////// |
2 | // Name: regex.h | |
e54c96f1 | 3 | // Purpose: interface of wxRegEx |
23324ae1 FM |
4 | // Author: wxWidgets team |
5 | // RCS-ID: $Id$ | |
6 | // Licence: wxWindows license | |
7 | ///////////////////////////////////////////////////////////////////////////// | |
8 | ||
3ed3a1c8 | 9 | /** |
bbc5b7f8 BP |
10 | @anchor wxRE_FLAGS |
11 | ||
9602ce3d | 12 | Flags for regex compilation to be used with wxRegEx::Compile(). |
3ed3a1c8 BP |
13 | */ |
14 | enum | |
15 | { | |
16 | /** Use extended regex syntax. */ | |
17 | wxRE_EXTENDED = 0, | |
18 | ||
19 | /** Use advanced RE syntax (built-in regex only). */ | |
20 | wxRE_ADVANCED = 1, | |
21 | ||
22 | /** Use basic RE syntax. */ | |
23 | wxRE_BASIC = 2, | |
24 | ||
25 | /** Ignore case in match. */ | |
26 | wxRE_ICASE = 4, | |
27 | ||
28 | /** Only check match, don't set back references. */ | |
29 | wxRE_NOSUB = 8, | |
30 | ||
31 | /** | |
32 | If not set, treat '\n' as an ordinary character, otherwise it is | |
33 | special: it is not matched by '.' and '^' and '$' always match | |
34 | after/before it regardless of the setting of wxRE_NOT[BE]OL. | |
35 | */ | |
36 | wxRE_NEWLINE = 16, | |
37 | ||
38 | /** Default flags.*/ | |
39 | wxRE_DEFAULT = wxRE_EXTENDED | |
40 | }; | |
41 | ||
42 | /** | |
bbc5b7f8 BP |
43 | @anchor wxRE_NOT_FLAGS |
44 | ||
9602ce3d | 45 | Flags for regex matching to be used with wxRegEx::Matches(). |
3ed3a1c8 | 46 | These flags are mainly useful when doing several matches in a long string |
9602ce3d | 47 | to prevent erroneous matches for '^' and '$': |
3ed3a1c8 BP |
48 | */ |
49 | enum | |
50 | { | |
51 | /** '^' doesn't match at the start of line. */ | |
52 | wxRE_NOTBOL = 32, | |
53 | ||
54 | /** '$' doesn't match at the end of line. */ | |
55 | wxRE_NOTEOL = 64 | |
56 | }; | |
57 | ||
23324ae1 FM |
58 | /** |
59 | @class wxRegEx | |
60 | @wxheader{regex.h} | |
7c913512 | 61 | |
23324ae1 FM |
62 | wxRegEx represents a regular expression. This class provides support |
63 | for regular expressions matching and also replacement. | |
7c913512 | 64 | |
23324ae1 FM |
65 | It is built on top of either the system library (if it has support |
66 | for POSIX regular expressions - which is the case of the most modern | |
67 | Unices) or uses the built in Henry Spencer's library. Henry Spencer | |
68 | would appreciate being given credit in the documentation of software | |
69 | which uses his library, but that is not a requirement. | |
7c913512 | 70 | |
23324ae1 FM |
71 | Regular expressions, as defined by POSIX, come in two flavours: @e extended |
72 | and @e basic. The builtin library also adds a third flavour | |
3ed3a1c8 | 73 | of expression @ref overview_resyntax "advanced", which is not available |
23324ae1 | 74 | when using the system library. |
7c913512 | 75 | |
23324ae1 FM |
76 | Unicode is fully supported only when using the builtin library. |
77 | When using the system library in Unicode mode, the expressions and data | |
78 | are translated to the default 8-bit encoding before being passed to | |
79 | the library. | |
7c913512 | 80 | |
23324ae1 FM |
81 | On platforms where a system library is available, the default is to use |
82 | the builtin library for Unicode builds, and the system library otherwise. | |
83 | It is possible to use the other if preferred by selecting it when building | |
84 | the wxWidgets. | |
7c913512 | 85 | |
23324ae1 FM |
86 | @library{wxbase} |
87 | @category{data} | |
7c913512 | 88 | |
3ed3a1c8 BP |
89 | Examples: |
90 | ||
91 | A bad example of processing some text containing email addresses (the example | |
92 | is bad because the real email addresses can have more complicated form than | |
93 | @c user@host.net): | |
94 | ||
95 | @code | |
96 | wxString text; | |
97 | ... | |
98 | wxRegEx reEmail = wxT("([^@]+)@([[:alnum:].-_].)+([[:alnum:]]+)"); | |
99 | if ( reEmail.Matches(text) ) | |
100 | { | |
101 | wxString text = reEmail.GetMatch(email); | |
102 | wxString username = reEmail.GetMatch(email, 1); | |
103 | if ( reEmail.GetMatch(email, 3) == wxT("com") ) // .com TLD? | |
104 | { | |
105 | ... | |
106 | } | |
107 | } | |
108 | ||
109 | // or we could do this to hide the email address | |
110 | size_t count = reEmail.ReplaceAll(text, wxT("HIDDEN@\\2\\3")); | |
111 | printf("text now contains %u hidden addresses", count); | |
112 | @endcode | |
23324ae1 | 113 | */ |
7c913512 | 114 | class wxRegEx |
23324ae1 FM |
115 | { |
116 | public: | |
3ed3a1c8 BP |
117 | |
118 | /** | |
119 | Default constructor: use Compile() later. | |
120 | */ | |
121 | wxRegEx(); | |
122 | ||
23324ae1 | 123 | /** |
7c913512 | 124 | Create and compile the regular expression, use |
23324ae1 | 125 | IsValid() to test for compilation errors. |
3ed3a1c8 | 126 | |
bbc5b7f8 | 127 | As for the flags, please see @ref wxRE_FLAGS. |
23324ae1 | 128 | */ |
7c913512 | 129 | wxRegEx(const wxString& expr, int flags = wxRE_DEFAULT); |
3ed3a1c8 | 130 | |
23324ae1 FM |
131 | |
132 | /** | |
3ed3a1c8 | 133 | Destructor. It's not virtual, don't derive from this class. |
23324ae1 FM |
134 | */ |
135 | ~wxRegEx(); | |
136 | ||
137 | /** | |
7c913512 | 138 | Compile the string into regular expression, return @true if ok or @false |
23324ae1 | 139 | if string has a syntax error. |
3ed3a1c8 | 140 | |
bbc5b7f8 | 141 | As for the flags, please see @ref wxRE_FLAGS. |
23324ae1 FM |
142 | */ |
143 | bool Compile(const wxString& pattern, int flags = wxRE_DEFAULT); | |
144 | ||
23324ae1 | 145 | /** |
3ed3a1c8 BP |
146 | Get the start index and the length of the match of the expression |
147 | (if @a index is 0) or a bracketed subexpression (@a index different from 0). | |
148 | ||
149 | May only be called after successful call to Matches() and only if @c wxRE_NOSUB | |
150 | was @b not used in Compile(). | |
151 | ||
152 | Returns @false if no match or if an error occurred. | |
153 | ||
23324ae1 | 154 | */ |
328f5751 | 155 | bool GetMatch(size_t* start, size_t* len, size_t index = 0) const; |
3ed3a1c8 BP |
156 | |
157 | /** | |
158 | Returns the part of string corresponding to the match where index is interpreted | |
159 | as above. Empty string is returned if match failed. | |
160 | ||
161 | May only be called after successful call to Matches() and only if @c wxRE_NOSUB | |
162 | was @b not used in Compile(). | |
163 | */ | |
164 | wxString GetMatch(const wxString& text, size_t index = 0) const; | |
23324ae1 FM |
165 | |
166 | /** | |
167 | Returns the size of the array of matches, i.e. the number of bracketed | |
168 | subexpressions plus one for the expression itself, or 0 on error. | |
3ed3a1c8 | 169 | |
23324ae1 FM |
170 | May only be called after successful call to Compile(). |
171 | and only if @c wxRE_NOSUB was @b not used. | |
172 | */ | |
328f5751 | 173 | size_t GetMatchCount() const; |
23324ae1 FM |
174 | |
175 | /** | |
7c913512 | 176 | Return @true if this is a valid compiled regular expression, @false |
23324ae1 FM |
177 | otherwise. |
178 | */ | |
328f5751 | 179 | bool IsValid() const; |
23324ae1 FM |
180 | |
181 | //@{ | |
182 | /** | |
3ed3a1c8 | 183 | Matches the precompiled regular expression against the string @a text, |
23324ae1 | 184 | returns @true if matches and @false otherwise. |
3ed3a1c8 | 185 | |
bbc5b7f8 BP |
186 | @e Flags may be combination of @c wxRE_NOTBOL and @c wxRE_NOTEOL, see |
187 | @ref wxRE_NOT_FLAGS. | |
3ed3a1c8 | 188 | |
23324ae1 FM |
189 | Some regex libraries assume that the text given is null terminated, while |
190 | others require the length be given as a separate parameter. Therefore for | |
4cc4bfaf | 191 | maximum portability assume that @a text cannot contain embedded nulls. |
3ed3a1c8 BP |
192 | |
193 | When the <b>Matches(const wxChar *text, int flags = 0)</b> form is used, | |
194 | a wxStrlen() will be done internally if the regex library requires the | |
195 | length. When using Matches() in a loop the <b>Matches(text, flags, len)</b> | |
196 | form can be used instead, making it possible to avoid a wxStrlen() inside | |
197 | the loop. | |
198 | ||
23324ae1 FM |
199 | May only be called after successful call to Compile(). |
200 | */ | |
328f5751 FM |
201 | bool Matches(const wxChar* text, int flags = 0) const; |
202 | const bool Matches(const wxChar* text, int flags, size_t len) const; | |
23324ae1 FM |
203 | //@} |
204 | ||
3ed3a1c8 BP |
205 | /** |
206 | Matches the precompiled regular expression against the string @a text, | |
207 | returns @true if matches and @false otherwise. | |
208 | ||
bbc5b7f8 BP |
209 | @e Flags may be combination of @c wxRE_NOTBOL and @c wxRE_NOTEOL, see |
210 | @ref wxRE_NOT_FLAGS. | |
3ed3a1c8 BP |
211 | |
212 | May only be called after successful call to Compile(). | |
213 | */ | |
214 | const bool Matches(const wxString& text, int flags = 0) const; | |
215 | ||
23324ae1 FM |
216 | /** |
217 | Replaces the current regular expression in the string pointed to by | |
3ed3a1c8 | 218 | @a text, with the text in @a replacement and return number of matches |
23324ae1 | 219 | replaced (maybe 0 if none found) or -1 on error. |
3ed3a1c8 BP |
220 | |
221 | The replacement text may contain back references @c \\number which will be | |
23324ae1 | 222 | replaced with the value of the corresponding subexpression in the |
3ed3a1c8 BP |
223 | pattern match. @c \\0 corresponds to the entire match and @c \& is a |
224 | synonym for it. Backslash may be used to quote itself or @c \& character. | |
225 | ||
4cc4bfaf | 226 | @a maxMatches may be used to limit the number of replacements made, setting |
23324ae1 FM |
227 | it to 1, for example, will only replace first occurrence (if any) of the |
228 | pattern in the text while default value of 0 means replace all. | |
229 | */ | |
230 | int Replace(wxString* text, const wxString& replacement, | |
328f5751 | 231 | size_t maxMatches = 0) const; |
23324ae1 FM |
232 | |
233 | /** | |
7c913512 | 234 | Replace all occurrences: this is actually a synonym for |
23324ae1 | 235 | Replace(). |
3c4f71cc | 236 | |
4cc4bfaf | 237 | @see ReplaceFirst() |
23324ae1 | 238 | */ |
328f5751 | 239 | int ReplaceAll(wxString* text, const wxString& replacement) const; |
23324ae1 FM |
240 | |
241 | /** | |
242 | Replace the first occurrence. | |
243 | */ | |
328f5751 | 244 | int ReplaceFirst(wxString* text, const wxString& replacement) const; |
23324ae1 | 245 | }; |
e54c96f1 | 246 |