]>
Commit | Line | Data |
---|---|---|
1 | ///////////////////////////////////////////////////////////////////////////// | |
2 | // Name: regex.h | |
3 | // Purpose: interface of wxRegEx | |
4 | // Author: wxWidgets team | |
5 | // RCS-ID: $Id$ | |
6 | // Licence: wxWindows licence | |
7 | ///////////////////////////////////////////////////////////////////////////// | |
8 | ||
9 | /** | |
10 | @anchor wxRE_FLAGS | |
11 | ||
12 | Flags for regex compilation to be used with wxRegEx::Compile(). | |
13 | */ | |
14 | enum | |
15 | { | |
16 | /** Use extended regex syntax. */ | |
17 | wxRE_EXTENDED = 0, | |
18 | ||
19 | /** Use advanced RE syntax (built-in regex only). */ | |
20 | wxRE_ADVANCED = 1, | |
21 | ||
22 | /** Use basic RE syntax. */ | |
23 | wxRE_BASIC = 2, | |
24 | ||
25 | /** Ignore case in match. */ | |
26 | wxRE_ICASE = 4, | |
27 | ||
28 | /** Only check match, don't set back references. */ | |
29 | wxRE_NOSUB = 8, | |
30 | ||
31 | /** | |
32 | If not set, treat '\n' as an ordinary character, otherwise it is | |
33 | special: it is not matched by '.' and '^' and '$' always match | |
34 | after/before it regardless of the setting of wxRE_NOT[BE]OL. | |
35 | */ | |
36 | wxRE_NEWLINE = 16, | |
37 | ||
38 | /** Default flags.*/ | |
39 | wxRE_DEFAULT = wxRE_EXTENDED | |
40 | }; | |
41 | ||
42 | /** | |
43 | @anchor wxRE_NOT_FLAGS | |
44 | ||
45 | Flags for regex matching to be used with wxRegEx::Matches(). | |
46 | These flags are mainly useful when doing several matches in a long string | |
47 | to prevent erroneous matches for '^' and '$': | |
48 | */ | |
49 | enum | |
50 | { | |
51 | /** '^' doesn't match at the start of line. */ | |
52 | wxRE_NOTBOL = 32, | |
53 | ||
54 | /** '$' doesn't match at the end of line. */ | |
55 | wxRE_NOTEOL = 64 | |
56 | }; | |
57 | ||
58 | /** | |
59 | @class wxRegEx | |
60 | ||
61 | wxRegEx represents a regular expression. This class provides support | |
62 | for regular expressions matching and also replacement. | |
63 | ||
64 | It is built on top of either the system library (if it has support | |
65 | for POSIX regular expressions - which is the case of the most modern | |
66 | Unices) or uses the built in Henry Spencer's library. Henry Spencer | |
67 | would appreciate being given credit in the documentation of software | |
68 | which uses his library, but that is not a requirement. | |
69 | ||
70 | Regular expressions, as defined by POSIX, come in two flavours: @e extended | |
71 | and @e basic. The builtin library also adds a third flavour | |
72 | of expression @ref overview_resyntax "advanced", which is not available | |
73 | when using the system library. | |
74 | ||
75 | Unicode is fully supported only when using the builtin library. | |
76 | When using the system library in Unicode mode, the expressions and data | |
77 | are translated to the default 8-bit encoding before being passed to | |
78 | the library. | |
79 | ||
80 | On platforms where a system library is available, the default is to use | |
81 | the builtin library for Unicode builds, and the system library otherwise. | |
82 | It is possible to use the other if preferred by selecting it when building | |
83 | the wxWidgets. | |
84 | ||
85 | @library{wxbase} | |
86 | @category{data} | |
87 | ||
88 | Examples: | |
89 | ||
90 | A bad example of processing some text containing email addresses (the example | |
91 | is bad because the real email addresses can have more complicated form than | |
92 | @c user@host.net): | |
93 | ||
94 | @code | |
95 | wxString text; | |
96 | ... | |
97 | wxRegEx reEmail = "([^@]+)@([[:alnum:].-_].)+([[:alnum:]]+)"; | |
98 | if ( reEmail.Matches(text) ) | |
99 | { | |
100 | wxString text = reEmail.GetMatch(email); | |
101 | wxString username = reEmail.GetMatch(email, 1); | |
102 | if ( reEmail.GetMatch(email, 3) == "com" ) // .com TLD? | |
103 | { | |
104 | ... | |
105 | } | |
106 | } | |
107 | ||
108 | // or we could do this to hide the email address | |
109 | size_t count = reEmail.ReplaceAll(text, "HIDDEN@\\2\\3"); | |
110 | printf("text now contains %u hidden addresses", count); | |
111 | @endcode | |
112 | */ | |
113 | class wxRegEx | |
114 | { | |
115 | public: | |
116 | ||
117 | /** | |
118 | Default constructor: use Compile() later. | |
119 | */ | |
120 | wxRegEx(); | |
121 | ||
122 | /** | |
123 | Create and compile the regular expression, use | |
124 | IsValid() to test for compilation errors. | |
125 | ||
126 | As for the flags, please see @ref wxRE_FLAGS. | |
127 | */ | |
128 | wxRegEx(const wxString& expr, int flags = wxRE_DEFAULT); | |
129 | ||
130 | ||
131 | /** | |
132 | Destructor. It's not virtual, don't derive from this class. | |
133 | */ | |
134 | ~wxRegEx(); | |
135 | ||
136 | /** | |
137 | Compile the string into regular expression, return @true if ok or @false | |
138 | if string has a syntax error. | |
139 | ||
140 | As for the flags, please see @ref wxRE_FLAGS. | |
141 | */ | |
142 | bool Compile(const wxString& pattern, int flags = wxRE_DEFAULT); | |
143 | ||
144 | /** | |
145 | Get the start index and the length of the match of the expression | |
146 | (if @a index is 0) or a bracketed subexpression (@a index different from 0). | |
147 | ||
148 | May only be called after successful call to Matches() and only if @c wxRE_NOSUB | |
149 | was @b not used in Compile(). | |
150 | ||
151 | Returns @false if no match or if an error occurred. | |
152 | ||
153 | */ | |
154 | bool GetMatch(size_t* start, size_t* len, size_t index = 0) const; | |
155 | ||
156 | /** | |
157 | Returns the part of string corresponding to the match where index is interpreted | |
158 | as above. Empty string is returned if match failed. | |
159 | ||
160 | May only be called after successful call to Matches() and only if @c wxRE_NOSUB | |
161 | was @b not used in Compile(). | |
162 | */ | |
163 | wxString GetMatch(const wxString& text, size_t index = 0) const; | |
164 | ||
165 | /** | |
166 | Returns the size of the array of matches, i.e. the number of bracketed | |
167 | subexpressions plus one for the expression itself, or 0 on error. | |
168 | ||
169 | May only be called after successful call to Compile(). | |
170 | and only if @c wxRE_NOSUB was @b not used. | |
171 | */ | |
172 | size_t GetMatchCount() const; | |
173 | ||
174 | /** | |
175 | Return @true if this is a valid compiled regular expression, @false | |
176 | otherwise. | |
177 | */ | |
178 | bool IsValid() const; | |
179 | ||
180 | //@{ | |
181 | /** | |
182 | Matches the precompiled regular expression against the string @a text, | |
183 | returns @true if matches and @false otherwise. | |
184 | ||
185 | @e Flags may be combination of @c wxRE_NOTBOL and @c wxRE_NOTEOL, see | |
186 | @ref wxRE_NOT_FLAGS. | |
187 | ||
188 | Some regex libraries assume that the text given is null terminated, while | |
189 | others require the length be given as a separate parameter. Therefore for | |
190 | maximum portability assume that @a text cannot contain embedded nulls. | |
191 | ||
192 | When the <b>Matches(const wxChar *text, int flags = 0)</b> form is used, | |
193 | a wxStrlen() will be done internally if the regex library requires the | |
194 | length. When using Matches() in a loop the <b>Matches(text, flags, len)</b> | |
195 | form can be used instead, making it possible to avoid a wxStrlen() inside | |
196 | the loop. | |
197 | ||
198 | May only be called after successful call to Compile(). | |
199 | */ | |
200 | bool Matches(const wxChar* text, int flags = 0) const; | |
201 | bool Matches(const wxChar* text, int flags, size_t len) const; | |
202 | //@} | |
203 | ||
204 | /** | |
205 | Matches the precompiled regular expression against the string @a text, | |
206 | returns @true if matches and @false otherwise. | |
207 | ||
208 | @e Flags may be combination of @c wxRE_NOTBOL and @c wxRE_NOTEOL, see | |
209 | @ref wxRE_NOT_FLAGS. | |
210 | ||
211 | May only be called after successful call to Compile(). | |
212 | */ | |
213 | bool Matches(const wxString& text, int flags = 0) const; | |
214 | ||
215 | /** | |
216 | Replaces the current regular expression in the string pointed to by | |
217 | @a text, with the text in @a replacement and return number of matches | |
218 | replaced (maybe 0 if none found) or -1 on error. | |
219 | ||
220 | The replacement text may contain back references @c \\number which will be | |
221 | replaced with the value of the corresponding subexpression in the | |
222 | pattern match. @c \\0 corresponds to the entire match and @c \& is a | |
223 | synonym for it. Backslash may be used to quote itself or @c \& character. | |
224 | ||
225 | @a maxMatches may be used to limit the number of replacements made, setting | |
226 | it to 1, for example, will only replace first occurrence (if any) of the | |
227 | pattern in the text while default value of 0 means replace all. | |
228 | */ | |
229 | int Replace(wxString* text, const wxString& replacement, | |
230 | size_t maxMatches = 0) const; | |
231 | ||
232 | /** | |
233 | Replace all occurrences: this is actually a synonym for | |
234 | Replace(). | |
235 | ||
236 | @see ReplaceFirst() | |
237 | */ | |
238 | int ReplaceAll(wxString* text, const wxString& replacement) const; | |
239 | ||
240 | /** | |
241 | Replace the first occurrence. | |
242 | */ | |
243 | int ReplaceFirst(wxString* text, const wxString& replacement) const; | |
244 | }; | |
245 |