]>
git.saurik.com Git - wxWidgets.git/blob - interface/regex.h
1 /////////////////////////////////////////////////////////////////////////////
3 // Purpose: interface of wxRegEx
4 // Author: wxWidgets team
6 // Licence: wxWindows license
7 /////////////////////////////////////////////////////////////////////////////
10 Flags for regex compilation to be used with Compile().
14 /** Use extended regex syntax. */
17 /** Use advanced RE syntax (built-in regex only). */
20 /** Use basic RE syntax. */
23 /** Ignore case in match. */
26 /** Only check match, don't set back references. */
30 If not set, treat '\n' as an ordinary character, otherwise it is
31 special: it is not matched by '.' and '^' and '$' always match
32 after/before it regardless of the setting of wxRE_NOT[BE]OL.
37 wxRE_DEFAULT
= wxRE_EXTENDED
41 Flags for regex matching to be used with Matches().
42 These flags are mainly useful when doing several matches in a long string
43 to prevent erroneous matches for ¡¯¡¯ and ¡¯$¡¯:
47 /** '^' doesn't match at the start of line. */
50 /** '$' doesn't match at the end of line. */
58 wxRegEx represents a regular expression. This class provides support
59 for regular expressions matching and also replacement.
61 It is built on top of either the system library (if it has support
62 for POSIX regular expressions - which is the case of the most modern
63 Unices) or uses the built in Henry Spencer's library. Henry Spencer
64 would appreciate being given credit in the documentation of software
65 which uses his library, but that is not a requirement.
67 Regular expressions, as defined by POSIX, come in two flavours: @e extended
68 and @e basic. The builtin library also adds a third flavour
69 of expression @ref overview_resyntax "advanced", which is not available
70 when using the system library.
72 Unicode is fully supported only when using the builtin library.
73 When using the system library in Unicode mode, the expressions and data
74 are translated to the default 8-bit encoding before being passed to
77 On platforms where a system library is available, the default is to use
78 the builtin library for Unicode builds, and the system library otherwise.
79 It is possible to use the other if preferred by selecting it when building
87 A bad example of processing some text containing email addresses (the example
88 is bad because the real email addresses can have more complicated form than
94 wxRegEx reEmail = wxT("([^@]+)@([[:alnum:].-_].)+([[:alnum:]]+)");
95 if ( reEmail.Matches(text) )
97 wxString text = reEmail.GetMatch(email);
98 wxString username = reEmail.GetMatch(email, 1);
99 if ( reEmail.GetMatch(email, 3) == wxT("com") ) // .com TLD?
105 // or we could do this to hide the email address
106 size_t count = reEmail.ReplaceAll(text, wxT("HIDDEN@\\2\\3"));
107 printf("text now contains %u hidden addresses", count);
115 Default constructor: use Compile() later.
120 Create and compile the regular expression, use
121 IsValid() to test for compilation errors.
123 @todo Add referece to the flag enum.
125 wxRegEx(const wxString
& expr
, int flags
= wxRE_DEFAULT
);
129 Destructor. It's not virtual, don't derive from this class.
134 Compile the string into regular expression, return @true if ok or @false
135 if string has a syntax error.
137 @todo Add referece to the flag enum.
139 bool Compile(const wxString
& pattern
, int flags
= wxRE_DEFAULT
);
142 Get the start index and the length of the match of the expression
143 (if @a index is 0) or a bracketed subexpression (@a index different from 0).
145 May only be called after successful call to Matches() and only if @c wxRE_NOSUB
146 was @b not used in Compile().
148 Returns @false if no match or if an error occurred.
151 bool GetMatch(size_t* start
, size_t* len
, size_t index
= 0) const;
154 Returns the part of string corresponding to the match where index is interpreted
155 as above. Empty string is returned if match failed.
157 May only be called after successful call to Matches() and only if @c wxRE_NOSUB
158 was @b not used in Compile().
160 wxString
GetMatch(const wxString
& text
, size_t index
= 0) const;
163 Returns the size of the array of matches, i.e. the number of bracketed
164 subexpressions plus one for the expression itself, or 0 on error.
166 May only be called after successful call to Compile().
167 and only if @c wxRE_NOSUB was @b not used.
169 size_t GetMatchCount() const;
172 Return @true if this is a valid compiled regular expression, @false
175 bool IsValid() const;
179 Matches the precompiled regular expression against the string @a text,
180 returns @true if matches and @false otherwise.
182 @e Flags may be combination of @c wxRE_NOTBOL and @c wxRE_NOTEOL.
183 @todo Add referece to the flag enum.
185 Some regex libraries assume that the text given is null terminated, while
186 others require the length be given as a separate parameter. Therefore for
187 maximum portability assume that @a text cannot contain embedded nulls.
189 When the <b>Matches(const wxChar *text, int flags = 0)</b> form is used,
190 a wxStrlen() will be done internally if the regex library requires the
191 length. When using Matches() in a loop the <b>Matches(text, flags, len)</b>
192 form can be used instead, making it possible to avoid a wxStrlen() inside
195 May only be called after successful call to Compile().
197 bool Matches(const wxChar
* text
, int flags
= 0) const;
198 const bool Matches(const wxChar
* text
, int flags
, size_t len
) const;
202 Matches the precompiled regular expression against the string @a text,
203 returns @true if matches and @false otherwise.
205 @e Flags may be combination of @c wxRE_NOTBOL and @c wxRE_NOTEOL.
206 @todo Add referece to the flag enum.
208 May only be called after successful call to Compile().
210 const bool Matches(const wxString
& text
, int flags
= 0) const;
213 Replaces the current regular expression in the string pointed to by
214 @a text, with the text in @a replacement and return number of matches
215 replaced (maybe 0 if none found) or -1 on error.
217 The replacement text may contain back references @c \\number which will be
218 replaced with the value of the corresponding subexpression in the
219 pattern match. @c \\0 corresponds to the entire match and @c \& is a
220 synonym for it. Backslash may be used to quote itself or @c \& character.
222 @a maxMatches may be used to limit the number of replacements made, setting
223 it to 1, for example, will only replace first occurrence (if any) of the
224 pattern in the text while default value of 0 means replace all.
226 int Replace(wxString
* text
, const wxString
& replacement
,
227 size_t maxMatches
= 0) const;
230 Replace all occurrences: this is actually a synonym for
235 int ReplaceAll(wxString
* text
, const wxString
& replacement
) const;
238 Replace the first occurrence.
240 int ReplaceFirst(wxString
* text
, const wxString
& replacement
) const;