]> git.saurik.com Git - wxWidgets.git/blob - interface/regex.h
mac paths updated
[wxWidgets.git] / interface / regex.h
1 /////////////////////////////////////////////////////////////////////////////
2 // Name: regex.h
3 // Purpose: interface of wxRegEx
4 // Author: wxWidgets team
5 // RCS-ID: $Id$
6 // Licence: wxWindows license
7 /////////////////////////////////////////////////////////////////////////////
8
9 /**
10 Flags for regex compilation to be used with Compile().
11 */
12 enum
13 {
14 /** Use extended regex syntax. */
15 wxRE_EXTENDED = 0,
16
17 /** Use advanced RE syntax (built-in regex only). */
18 wxRE_ADVANCED = 1,
19
20 /** Use basic RE syntax. */
21 wxRE_BASIC = 2,
22
23 /** Ignore case in match. */
24 wxRE_ICASE = 4,
25
26 /** Only check match, don't set back references. */
27 wxRE_NOSUB = 8,
28
29 /**
30 If not set, treat '\n' as an ordinary character, otherwise it is
31 special: it is not matched by '.' and '^' and '$' always match
32 after/before it regardless of the setting of wxRE_NOT[BE]OL.
33 */
34 wxRE_NEWLINE = 16,
35
36 /** Default flags.*/
37 wxRE_DEFAULT = wxRE_EXTENDED
38 };
39
40 /**
41 Flags for regex matching to be used with Matches().
42 These flags are mainly useful when doing several matches in a long string
43 to prevent erroneous matches for ¡¯¡¯ and ¡¯$¡¯:
44 */
45 enum
46 {
47 /** '^' doesn't match at the start of line. */
48 wxRE_NOTBOL = 32,
49
50 /** '$' doesn't match at the end of line. */
51 wxRE_NOTEOL = 64
52 };
53
54 /**
55 @class wxRegEx
56 @wxheader{regex.h}
57
58 wxRegEx represents a regular expression. This class provides support
59 for regular expressions matching and also replacement.
60
61 It is built on top of either the system library (if it has support
62 for POSIX regular expressions - which is the case of the most modern
63 Unices) or uses the built in Henry Spencer's library. Henry Spencer
64 would appreciate being given credit in the documentation of software
65 which uses his library, but that is not a requirement.
66
67 Regular expressions, as defined by POSIX, come in two flavours: @e extended
68 and @e basic. The builtin library also adds a third flavour
69 of expression @ref overview_resyntax "advanced", which is not available
70 when using the system library.
71
72 Unicode is fully supported only when using the builtin library.
73 When using the system library in Unicode mode, the expressions and data
74 are translated to the default 8-bit encoding before being passed to
75 the library.
76
77 On platforms where a system library is available, the default is to use
78 the builtin library for Unicode builds, and the system library otherwise.
79 It is possible to use the other if preferred by selecting it when building
80 the wxWidgets.
81
82 @library{wxbase}
83 @category{data}
84
85 Examples:
86
87 A bad example of processing some text containing email addresses (the example
88 is bad because the real email addresses can have more complicated form than
89 @c user@host.net):
90
91 @code
92 wxString text;
93 ...
94 wxRegEx reEmail = wxT("([^@]+)@([[:alnum:].-_].)+([[:alnum:]]+)");
95 if ( reEmail.Matches(text) )
96 {
97 wxString text = reEmail.GetMatch(email);
98 wxString username = reEmail.GetMatch(email, 1);
99 if ( reEmail.GetMatch(email, 3) == wxT("com") ) // .com TLD?
100 {
101 ...
102 }
103 }
104
105 // or we could do this to hide the email address
106 size_t count = reEmail.ReplaceAll(text, wxT("HIDDEN@\\2\\3"));
107 printf("text now contains %u hidden addresses", count);
108 @endcode
109 */
110 class wxRegEx
111 {
112 public:
113
114 /**
115 Default constructor: use Compile() later.
116 */
117 wxRegEx();
118
119 /**
120 Create and compile the regular expression, use
121 IsValid() to test for compilation errors.
122
123 @todo Add referece to the flag enum.
124 */
125 wxRegEx(const wxString& expr, int flags = wxRE_DEFAULT);
126
127
128 /**
129 Destructor. It's not virtual, don't derive from this class.
130 */
131 ~wxRegEx();
132
133 /**
134 Compile the string into regular expression, return @true if ok or @false
135 if string has a syntax error.
136
137 @todo Add referece to the flag enum.
138 */
139 bool Compile(const wxString& pattern, int flags = wxRE_DEFAULT);
140
141 /**
142 Get the start index and the length of the match of the expression
143 (if @a index is 0) or a bracketed subexpression (@a index different from 0).
144
145 May only be called after successful call to Matches() and only if @c wxRE_NOSUB
146 was @b not used in Compile().
147
148 Returns @false if no match or if an error occurred.
149
150 */
151 bool GetMatch(size_t* start, size_t* len, size_t index = 0) const;
152
153 /**
154 Returns the part of string corresponding to the match where index is interpreted
155 as above. Empty string is returned if match failed.
156
157 May only be called after successful call to Matches() and only if @c wxRE_NOSUB
158 was @b not used in Compile().
159 */
160 wxString GetMatch(const wxString& text, size_t index = 0) const;
161
162 /**
163 Returns the size of the array of matches, i.e. the number of bracketed
164 subexpressions plus one for the expression itself, or 0 on error.
165
166 May only be called after successful call to Compile().
167 and only if @c wxRE_NOSUB was @b not used.
168 */
169 size_t GetMatchCount() const;
170
171 /**
172 Return @true if this is a valid compiled regular expression, @false
173 otherwise.
174 */
175 bool IsValid() const;
176
177 //@{
178 /**
179 Matches the precompiled regular expression against the string @a text,
180 returns @true if matches and @false otherwise.
181
182 @e Flags may be combination of @c wxRE_NOTBOL and @c wxRE_NOTEOL.
183 @todo Add referece to the flag enum.
184
185 Some regex libraries assume that the text given is null terminated, while
186 others require the length be given as a separate parameter. Therefore for
187 maximum portability assume that @a text cannot contain embedded nulls.
188
189 When the <b>Matches(const wxChar *text, int flags = 0)</b> form is used,
190 a wxStrlen() will be done internally if the regex library requires the
191 length. When using Matches() in a loop the <b>Matches(text, flags, len)</b>
192 form can be used instead, making it possible to avoid a wxStrlen() inside
193 the loop.
194
195 May only be called after successful call to Compile().
196 */
197 bool Matches(const wxChar* text, int flags = 0) const;
198 const bool Matches(const wxChar* text, int flags, size_t len) const;
199 //@}
200
201 /**
202 Matches the precompiled regular expression against the string @a text,
203 returns @true if matches and @false otherwise.
204
205 @e Flags may be combination of @c wxRE_NOTBOL and @c wxRE_NOTEOL.
206 @todo Add referece to the flag enum.
207
208 May only be called after successful call to Compile().
209 */
210 const bool Matches(const wxString& text, int flags = 0) const;
211
212 /**
213 Replaces the current regular expression in the string pointed to by
214 @a text, with the text in @a replacement and return number of matches
215 replaced (maybe 0 if none found) or -1 on error.
216
217 The replacement text may contain back references @c \\number which will be
218 replaced with the value of the corresponding subexpression in the
219 pattern match. @c \\0 corresponds to the entire match and @c \& is a
220 synonym for it. Backslash may be used to quote itself or @c \& character.
221
222 @a maxMatches may be used to limit the number of replacements made, setting
223 it to 1, for example, will only replace first occurrence (if any) of the
224 pattern in the text while default value of 0 means replace all.
225 */
226 int Replace(wxString* text, const wxString& replacement,
227 size_t maxMatches = 0) const;
228
229 /**
230 Replace all occurrences: this is actually a synonym for
231 Replace().
232
233 @see ReplaceFirst()
234 */
235 int ReplaceAll(wxString* text, const wxString& replacement) const;
236
237 /**
238 Replace the first occurrence.
239 */
240 int ReplaceFirst(wxString* text, const wxString& replacement) const;
241 };
242