]> git.saurik.com Git - wxWidgets.git/blob - tests/regex/regex.cpp
3905dcdcc5ce53aa82b6aa924faf427c4c1d3d3e
[wxWidgets.git] / tests / regex / regex.cpp
1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: tests/regex/regex.cpp
3 // Purpose: Test the built-in regex lib and wxRegEx
4 // Author: Mike Wetherell
5 // RCS-ID: $Id$
6 // Copyright: (c) 2004 Mike Wetherell
7 // Licence: wxWidgets licence
8 ///////////////////////////////////////////////////////////////////////////////
9
10 //
11 // Notes:
12 //
13 // To run just one section, say wx_1, do this:
14 // test regex.wx_1
15 //
16 // To run all the regex tests:
17 // test regex
18 //
19 // Some tests must be skipped since they use features which we do not make
20 // available through wxRegEx. To see the list of tests that have been skipped
21 // turn on verbose logging, e.g.:
22 // test --verbose regex
23 //
24 // The tests here are for the builtin library, tests for wxRegEx in general
25 // should go in another module.
26 //
27 // The tests are generated from Henry Spencer's reg.test, additional test
28 // can be added in wxreg.test. These test files are then turned into a C++
29 // include file 'regex.inc' (included below) using a script 'regex.pl'.
30 //
31
32 #if defined(__GNUG__) && !defined(__APPLE__)
33 #pragma implementation
34 #pragma interface
35 #endif
36
37 // For compilers that support precompilation, includes "wx/wx.h".
38 #include "wx/wxprec.h"
39
40 #ifdef __BORLANDC__
41 #pragma hdrstop
42 #endif
43
44 // for all others, include the necessary headers
45 #ifndef WX_PRECOMP
46 #include "wx/wx.h"
47 #endif
48
49 #include "wx/regex.h"
50 #include "wx/cppunit.h"
51 #include <iomanip>
52 #include <stdexcept>
53
54 using namespace std;
55 using namespace CppUnit;
56
57 // many of the tests are specific to the builtin regex lib, so only attempts
58 // to do them when using the builtin regex lib.
59 //
60 #ifdef wxHAS_REGEX_ADVANCED
61
62
63 ///////////////////////////////////////////////////////////////////////////////
64 // The test case - an instance represents a single test
65
66 class RegExTestCase : public TestCase
67 {
68 public:
69 // constructor - create a single testcase
70 RegExTestCase(
71 const string& name,
72 const char *mode,
73 const char *id,
74 const char *flags,
75 const char *pattern,
76 const char *data,
77 const vector<const char *>& expected);
78
79 protected:
80 // run this testcase
81 void runTest();
82
83 private:
84 // workers
85 wxString Conv(const char *str);
86 void parseFlags(const wxString& flags);
87 void doTest(int flavor);
88 static size_t matchCount(const wxString& expr, int flags);
89 static wxString quote(const wxString& arg);
90 const wxChar *convError() const { return _T("<cannot convert>"); }
91
92 // assertions - adds some information about the test that failed
93 void fail(const wxString& msg) const;
94 void failIf(bool condition, const wxString& msg) const
95 { if (condition) fail(msg); }
96
97 // mode, id, flags, pattern, test data, expected results...
98 int m_mode;
99 wxString m_id;
100 wxString m_flags;
101 wxString m_pattern;
102 wxString m_data;
103 wxArrayString m_expected;
104
105 // the flag decoded
106 int m_compileFlags;
107 int m_matchFlags;
108 bool m_basic;
109 bool m_extended;
110 bool m_advanced;
111 };
112
113 // constructor - throws Exception on failure
114 //
115 RegExTestCase::RegExTestCase(
116 const string& name,
117 const char *mode,
118 const char *id,
119 const char *flags,
120 const char *pattern,
121 const char *data,
122 const vector<const char *>& expected)
123 :
124 TestCase(name),
125 m_mode(mode[0]),
126 m_id(Conv(id)),
127 m_flags(Conv(flags)),
128 m_pattern(Conv(pattern)),
129 m_data(Conv(data)),
130 m_compileFlags(0),
131 m_matchFlags(0),
132 m_basic(false),
133 m_extended(false),
134 m_advanced(false)
135 {
136 bool badconv = m_pattern == convError() || m_data == convError();
137 vector<const char *>::const_iterator it;
138
139 for (it = expected.begin(); it != expected.end(); ++it) {
140 m_expected.push_back(Conv(*it));
141 badconv = badconv || *m_expected.rbegin() == convError();
142 }
143
144 failIf(badconv, _T("cannot convert to default character encoding"));
145
146 // the flags need further parsing...
147 parseFlags(m_flags);
148
149 #ifndef wxHAS_REGEX_ADVANCED
150 failIf(!m_basic && !m_extended, _T("advanced regexs not available"));
151 #endif
152 }
153
154 // convert a string from UTF8 to the internal encoding
155 //
156 wxString RegExTestCase::Conv(const char *str)
157 {
158 const wxWCharBuffer wstr = wxConvUTF8.cMB2WC(str);
159 const wxWC2WXbuf buf = wxConvCurrent->cWC2WX(wstr);
160
161 if (!buf || wcscmp(wxConvCurrent->cWX2WC(buf), wstr) != 0)
162 return convError();
163 else
164 return buf;
165 }
166
167 // Parse flags
168 //
169 void RegExTestCase::parseFlags(const wxString& flags)
170 {
171 for (const wxChar *p = flags; *p; p++) {
172 switch (*p) {
173 // noop
174 case '-': break;
175
176 // we don't fully support these flags, but they don't stop us
177 // checking for success of failure of the match, so treat as noop
178 case 'A': case 'B': case 'E': case 'H':
179 case 'I': case 'L': case 'M': case 'N':
180 case 'P': case 'Q': case 'R': case 'S':
181 case 'T': case 'U': case '%':
182 break;
183
184 // match options
185 case '^': m_matchFlags |= wxRE_NOTBOL; break;
186 case '$': m_matchFlags |= wxRE_NOTEOL; break;
187 #if wxUSE_UNICODE
188 case '*': break;
189 #endif
190 // compile options
191 case '&': m_advanced = m_basic = true; break;
192 case 'b': m_basic = true; break;
193 case 'e': m_extended = true; break;
194 case 'i': m_compileFlags |= wxRE_ICASE; break;
195 case 'o': m_compileFlags |= wxRE_NOSUB; break;
196 case 'n': m_compileFlags |= wxRE_NEWLINE; break;
197 case 't': if (strchr("ep", m_mode)) break; // else fall through...
198
199 // anything else we must skip the test
200 default:
201 fail(wxString::Format(
202 _T("requires unsupported flag '%c'"), *p));
203 }
204 }
205 }
206
207 // Try test for all flavours of expression specified
208 //
209 void RegExTestCase::runTest()
210 {
211 if (m_basic)
212 doTest(wxRE_BASIC);
213 if (m_extended)
214 doTest(wxRE_EXTENDED);
215 #ifdef wxHAS_REGEX_ADVANCED
216 if (m_advanced || (!m_basic && !m_extended))
217 doTest(wxRE_ADVANCED);
218 #endif
219 }
220
221 // Try the test for a single flavour of expression
222 //
223 void RegExTestCase::doTest(int flavor)
224 {
225 wxRegEx re(m_pattern, m_compileFlags | flavor);
226
227 // 'e' - test that the pattern fails to compile
228 if (m_mode == 'e')
229 {
230 failIf(re.IsValid(), _T("compile suceeded (should fail)"));
231 return;
232 }
233 failIf(!re.IsValid(), _T("compile failed"));
234
235 bool matches = re.Matches(m_data, m_matchFlags);
236
237 // 'f' or 'p' - test that the pattern does not match
238 if (m_mode == 'f' || m_mode == 'p')
239 {
240 failIf(matches, _T("match suceeded (should fail)"));
241 return;
242 }
243
244 // otherwise 'm' or 'i' - test the pattern does match
245 failIf(!matches, _T("match failed"));
246
247 // Check that wxRegEx is going to allocate a large enough array for the
248 // results we are supposed to get
249 failIf(m_expected.size() > matchCount(m_pattern, m_compileFlags | flavor),
250 _T("wxRegEx has not allocated a large enough array for the ")
251 _T("number of results expected"));
252
253 wxString result;
254 size_t start, len;
255
256 for (size_t i = 0; i < m_expected.size(); i++) {
257 failIf(!re.GetMatch(&start, &len, i), wxString::Format(
258 _T("wxRegEx::GetMatch failed for match %d"), i));
259
260 // m - check the match returns the strings given
261 if (m_mode == 'm')
262 if (start < INT_MAX)
263 result = m_data.substr(start, len);
264 else
265 result = _T("");
266
267 // i - check the match returns the offsets given
268 else if (m_mode == 'i')
269 if (start < INT_MAX)
270 result = wxString::Format(_T("%d %d"), start, start + len - 1);
271 else
272 result = _T("-1 -1");
273
274 failIf(result != m_expected[i], wxString::Format(
275 _T("match(%d) == %s, expected == %s"), i,
276 quote(result).c_str(), quote(m_expected[i]).c_str()));
277 }
278 }
279
280 // assertion - adds some information about the test that failed
281 //
282 void RegExTestCase::fail(const wxString& msg) const
283 {
284 wxString str;
285 wxArrayString::const_iterator it;
286
287 str << (wxChar)m_mode << _T(" ") << m_id << _T(" ") << m_flags << _T(" ")
288 << quote(m_pattern) << _T(" ") << quote(m_data);
289
290 for (it = m_expected.begin(); it != m_expected.end(); ++it)
291 str << _T(" ") << quote(*it);
292
293 if (str.length() > 77)
294 str = str.substr(0, 74) + _T("...");
295
296 str << _T("\n ") << msg;
297
298 // no lossy convs so using utf8
299 CPPUNIT_FAIL(string(str.mb_str(wxConvUTF8)));
300 }
301
302 // quote a string so that it can be displayed (static)
303 //
304 wxString RegExTestCase::quote(const wxString& arg)
305 {
306 const wxChar *needEscape = _T("\a\b\t\n\v\f\r\"\\");
307 const wxChar *escapes = _T("abtnvfr\"\\");
308 wxString str;
309
310 for (size_t i = 0; i < arg.length(); i++) {
311 wxUChar ch = arg[i];
312 const wxChar *p = wxStrchr(needEscape, ch);
313
314 if (p)
315 str += wxString::Format(_T("\\%c"), escapes[p - needEscape]);
316 else if (wxIscntrl(ch))
317 str += wxString::Format(_T("\\%03o"), ch);
318 else
319 str += ch;
320 }
321
322 return str.length() == arg.length() && str.find(' ') == wxString::npos ?
323 str : _T("\"") + str + _T("\"");
324 }
325
326 // Count the number of subexpressions (taken from wxRegExImpl::Compile)
327 //
328 size_t RegExTestCase::matchCount(const wxString& expr, int flags)
329 {
330 // there is always one for the whole expression
331 size_t nMatches = 1;
332
333 // and some more for bracketed subexperessions
334 for ( const wxChar *cptr = expr; *cptr; cptr++ )
335 {
336 if ( *cptr == _T('\\') )
337 {
338 // in basic RE syntax groups are inside \(...\)
339 if ( *++cptr == _T('(') && (flags & wxRE_BASIC) )
340 {
341 nMatches++;
342 }
343 }
344 else if ( *cptr == _T('(') && !(flags & wxRE_BASIC) )
345 {
346 // we know that the previous character is not an unquoted
347 // backslash because it would have been eaten above, so we
348 // have a bar '(' and this indicates a group start for the
349 // extended syntax
350 nMatches++;
351 }
352 }
353
354 return nMatches;
355 }
356
357
358 ///////////////////////////////////////////////////////////////////////////////
359 // Test suite
360 //
361 // In a non-unicode build the regex is affected by the current locale, so
362 // this derived TestSuite is used. It sets the locale in it's run() method
363 // for the duration of the regex tests.
364
365 class RegExTestSuite : public TestSuite
366 {
367 public:
368 RegExTestSuite(string name);
369 void run(TestResult *result);
370 void add(const char *mode, const char *id, const char *flags,
371 const char *pattern, const char *data, const char *expected, ...);
372 };
373
374 // constructor, sets the locale so that it is set when the tests are added
375 //
376 RegExTestSuite::RegExTestSuite(string name) : TestSuite(name)
377 {
378 setlocale(LC_ALL, "");
379 }
380
381 // run the test suite, sets the locale again since it may have been changed
382 // by another test since this suite was crated
383 //
384 void RegExTestSuite::run(TestResult *result)
385 {
386 setlocale(LC_ALL, "");
387 TestSuite::run(result);
388 }
389
390 // Add a testcase to the suite
391 //
392 void RegExTestSuite::add(
393 const char *mode,
394 const char *id,
395 const char *flags,
396 const char *pattern,
397 const char *data,
398 const char *expected, ...)
399 {
400 string name = getName() + "." + id;
401
402 vector<const char *> expected_results;
403 va_list ap;
404
405 for (va_start(ap, expected); expected; expected = va_arg(ap, const char *))
406 expected_results.push_back(expected);
407
408 va_end(ap);
409
410 try {
411 addTest(new RegExTestCase(
412 name, mode, id, flags, pattern, data, expected_results));
413 }
414 catch (Exception& e) {
415 wxLogInfo(wxString::Format(_T("skipping: %s\n %s\n"),
416 wxString(name.c_str(), wxConvUTF8).c_str(),
417 wxString(e.what(), wxConvUTF8).c_str()));
418 }
419 }
420
421
422 // Include the generated tests
423 //
424 #include "regex.inc"
425
426
427 #endif // wxHAS_REGEX_ADVANCED