]> git.saurik.com Git - wxWidgets.git/blame - tests/regex/regextest.cpp
Fix caching wrong length in wxString(str, len) ctor in UTF-8 build.
[wxWidgets.git] / tests / regex / regextest.cpp
CommitLineData
e70833fb
VS
1///////////////////////////////////////////////////////////////////////////////
2// Name: tests/regex/regex.cpp
3// Purpose: Test the built-in regex lib and wxRegEx
4// Author: Mike Wetherell
5// RCS-ID: $Id$
6// Copyright: (c) 2004 Mike Wetherell
526954c5 7// Licence: wxWindows licence
e70833fb
VS
8///////////////////////////////////////////////////////////////////////////////
9
10//
11// Notes:
12//
13// To run just one section, say wx_1, do this:
14// test regex.wx_1
15//
16// To run all the regex tests:
17// test regex
3e5f6c1c 18//
e70833fb
VS
19// Some tests must be skipped since they use features which we do not make
20// available through wxRegEx. To see the list of tests that have been skipped
21// turn on verbose logging, e.g.:
22// test --verbose regex
3e5f6c1c 23//
e70833fb 24// The tests here are for the builtin library, tests for wxRegEx in general
bc10103e 25// should go in wxregex.cpp
e70833fb
VS
26//
27// The tests are generated from Henry Spencer's reg.test, additional test
28// can be added in wxreg.test. These test files are then turned into a C++
29// include file 'regex.inc' (included below) using a script 'regex.pl'.
3e5f6c1c 30//
e70833fb 31
e70833fb 32// For compilers that support precompilation, includes "wx/wx.h".
8899b155 33#include "testprec.h"
e70833fb
VS
34
35#ifdef __BORLANDC__
36 #pragma hdrstop
37#endif
38
6acd08bc
VZ
39#if wxUSE_REGEX
40
e70833fb
VS
41// for all others, include the necessary headers
42#ifndef WX_PRECOMP
43 #include "wx/wx.h"
44#endif
45
56863b16
RN
46
47// many of the tests are specific to the builtin regex lib, so only attempts
48// to do them when using the builtin regex lib.
49//
50#ifdef wxHAS_REGEX_ADVANCED
51
e70833fb 52#include "wx/regex.h"
3e5f6c1c
WS
53#include <string>
54#include <vector>
e70833fb 55
3e5f6c1c
WS
56using CppUnit::Test;
57using CppUnit::TestCase;
58using CppUnit::TestSuite;
59using CppUnit::Exception;
60
0868a030
RN
61using std::string;
62using std::vector;
e70833fb
VS
63
64///////////////////////////////////////////////////////////////////////////////
65// The test case - an instance represents a single test
66
67class RegExTestCase : public TestCase
68{
69public:
70 // constructor - create a single testcase
71 RegExTestCase(
72 const string& name,
73 const char *mode,
74 const char *id,
75 const char *flags,
76 const char *pattern,
77 const char *data,
78 const vector<const char *>& expected);
79
80protected:
81 // run this testcase
82 void runTest();
83
84private:
85 // workers
86 wxString Conv(const char *str);
87 void parseFlags(const wxString& flags);
88 void doTest(int flavor);
e70833fb 89 static wxString quote(const wxString& arg);
9a83f860 90 const wxChar *convError() const { return wxT("<cannot convert>"); }
e70833fb
VS
91
92 // assertions - adds some information about the test that failed
93 void fail(const wxString& msg) const;
94 void failIf(bool condition, const wxString& msg) const
95 { if (condition) fail(msg); }
96
97 // mode, id, flags, pattern, test data, expected results...
98 int m_mode;
99 wxString m_id;
100 wxString m_flags;
101 wxString m_pattern;
102 wxString m_data;
103 wxArrayString m_expected;
104
105 // the flag decoded
106 int m_compileFlags;
107 int m_matchFlags;
108 bool m_basic;
109 bool m_extended;
110 bool m_advanced;
111};
112
113// constructor - throws Exception on failure
114//
115RegExTestCase::RegExTestCase(
116 const string& name,
117 const char *mode,
118 const char *id,
119 const char *flags,
120 const char *pattern,
121 const char *data,
122 const vector<const char *>& expected)
123 :
124 TestCase(name),
125 m_mode(mode[0]),
126 m_id(Conv(id)),
127 m_flags(Conv(flags)),
128 m_pattern(Conv(pattern)),
129 m_data(Conv(data)),
130 m_compileFlags(0),
131 m_matchFlags(0),
132 m_basic(false),
133 m_extended(false),
134 m_advanced(false)
135{
136 bool badconv = m_pattern == convError() || m_data == convError();
8899b155 137 //RN: Removing the std:: here will break MSVC6 compilation
0868a030 138 std::vector<const char *>::const_iterator it;
e70833fb
VS
139
140 for (it = expected.begin(); it != expected.end(); ++it) {
141 m_expected.push_back(Conv(*it));
142 badconv = badconv || *m_expected.rbegin() == convError();
143 }
144
9a83f860 145 failIf(badconv, wxT("cannot convert to default character encoding"));
3e5f6c1c 146
e70833fb
VS
147 // the flags need further parsing...
148 parseFlags(m_flags);
149
150#ifndef wxHAS_REGEX_ADVANCED
9a83f860 151 failIf(!m_basic && !m_extended, wxT("advanced regexs not available"));
e70833fb
VS
152#endif
153}
154
30261041
RN
155int wxWcscmp(const wchar_t* s1, const wchar_t* s2)
156{
157 size_t nLen1 = wxWcslen(s1);
158 size_t nLen2 = wxWcslen(s2);
3e5f6c1c 159
30261041
RN
160 if (nLen1 != nLen2)
161 return nLen1 - nLen2;
3e5f6c1c 162
3d68e52c 163 return memcmp(s1, s2, nLen1*sizeof(wchar_t));
30261041
RN
164}
165
e70833fb
VS
166// convert a string from UTF8 to the internal encoding
167//
168wxString RegExTestCase::Conv(const char *str)
169{
170 const wxWCharBuffer wstr = wxConvUTF8.cMB2WC(str);
171 const wxWC2WXbuf buf = wxConvCurrent->cWC2WX(wstr);
172
30261041 173 if (!buf || wxWcscmp(wxConvCurrent->cWX2WC(buf), wstr) != 0)
e70833fb 174 return convError();
895cae46
VZ
175
176 return buf;
e70833fb
VS
177}
178
179// Parse flags
180//
181void RegExTestCase::parseFlags(const wxString& flags)
182{
86501081
VS
183 for ( wxString::const_iterator p = flags.begin(); p != flags.end(); ++p )
184 {
185 switch ( (*p).GetValue() ) {
e70833fb
VS
186 // noop
187 case '-': break;
188
189 // we don't fully support these flags, but they don't stop us
190 // checking for success of failure of the match, so treat as noop
191 case 'A': case 'B': case 'E': case 'H':
192 case 'I': case 'L': case 'M': case 'N':
193 case 'P': case 'Q': case 'R': case 'S':
194 case 'T': case 'U': case '%':
195 break;
196
197 // match options
198 case '^': m_matchFlags |= wxRE_NOTBOL; break;
199 case '$': m_matchFlags |= wxRE_NOTEOL; break;
200#if wxUSE_UNICODE
201 case '*': break;
202#endif
203 // compile options
204 case '&': m_advanced = m_basic = true; break;
205 case 'b': m_basic = true; break;
206 case 'e': m_extended = true; break;
207 case 'i': m_compileFlags |= wxRE_ICASE; break;
208 case 'o': m_compileFlags |= wxRE_NOSUB; break;
209 case 'n': m_compileFlags |= wxRE_NEWLINE; break;
210 case 't': if (strchr("ep", m_mode)) break; // else fall through...
211
212 // anything else we must skip the test
213 default:
214 fail(wxString::Format(
9a83f860 215 wxT("requires unsupported flag '%c'"), *p));
e70833fb
VS
216 }
217 }
218}
219
220// Try test for all flavours of expression specified
221//
222void RegExTestCase::runTest()
223{
224 if (m_basic)
225 doTest(wxRE_BASIC);
226 if (m_extended)
227 doTest(wxRE_EXTENDED);
228#ifdef wxHAS_REGEX_ADVANCED
229 if (m_advanced || (!m_basic && !m_extended))
230 doTest(wxRE_ADVANCED);
231#endif
232}
3e5f6c1c 233
e70833fb
VS
234// Try the test for a single flavour of expression
235//
236void RegExTestCase::doTest(int flavor)
237{
238 wxRegEx re(m_pattern, m_compileFlags | flavor);
239
240 // 'e' - test that the pattern fails to compile
bc10103e 241 if (m_mode == 'e') {
9a83f860 242 failIf(re.IsValid(), wxT("compile succeeded (should fail)"));
238fb020
VS
243 return;
244 }
9a83f860 245 failIf(!re.IsValid(), wxT("compile failed"));
e70833fb
VS
246
247 bool matches = re.Matches(m_data, m_matchFlags);
248
249 // 'f' or 'p' - test that the pattern does not match
bc10103e 250 if (m_mode == 'f' || m_mode == 'p') {
9a83f860 251 failIf(matches, wxT("match succeeded (should fail)"));
238fb020
VS
252 return;
253 }
e70833fb
VS
254
255 // otherwise 'm' or 'i' - test the pattern does match
9a83f860 256 failIf(!matches, wxT("match failed"));
e70833fb 257
bc10103e
VS
258 if (m_compileFlags & wxRE_NOSUB)
259 return;
260
261 // check wxRegEx has correctly counted the number of subexpressions
0519aac9 262 wxString msg;
9a83f860
VZ
263 msg << wxT("GetMatchCount() == ") << re.GetMatchCount()
264 << wxT(", expected ") << m_expected.size();
0519aac9 265 failIf(m_expected.size() != re.GetMatchCount(), msg);
e70833fb
VS
266
267 for (size_t i = 0; i < m_expected.size(); i++) {
0519aac9
MW
268 wxString result;
269 size_t start, len;
270
271 msg.clear();
9a83f860 272 msg << wxT("wxRegEx::GetMatch failed for match ") << i;
0519aac9 273 failIf(!re.GetMatch(&start, &len, i), msg);
e70833fb
VS
274
275 // m - check the match returns the strings given
276 if (m_mode == 'm')
fe26031e 277 {
e70833fb
VS
278 if (start < INT_MAX)
279 result = m_data.substr(start, len);
280 else
9a83f860 281 result = wxT("");
fe26031e 282 }
e70833fb
VS
283
284 // i - check the match returns the offsets given
285 else if (m_mode == 'i')
fe26031e 286 {
0519aac9 287 if (start > INT_MAX)
9a83f860 288 result = wxT("-1 -1");
0519aac9 289 else if (start + len > 0)
9a83f860 290 result << start << wxT(" ") << start + len - 1;
0519aac9 291 else
9a83f860 292 result << start << wxT(" -1");
fe26031e 293 }
e70833fb 294
0519aac9 295 msg.clear();
9a83f860
VZ
296 msg << wxT("match(") << i << wxT(") == ") << quote(result)
297 << wxT(", expected == ") << quote(m_expected[i]);
0519aac9 298 failIf(result != m_expected[i], msg);
e70833fb
VS
299 }
300}
301
302// assertion - adds some information about the test that failed
303//
304void RegExTestCase::fail(const wxString& msg) const
305{
306 wxString str;
307 wxArrayString::const_iterator it;
308
9a83f860
VZ
309 str << (wxChar)m_mode << wxT(" ") << m_id << wxT(" ") << m_flags << wxT(" ")
310 << quote(m_pattern) << wxT(" ") << quote(m_data);
e70833fb
VS
311
312 for (it = m_expected.begin(); it != m_expected.end(); ++it)
9a83f860 313 str << wxT(" ") << quote(*it);
3e5f6c1c 314
e70833fb 315 if (str.length() > 77)
9a83f860 316 str = str.substr(0, 74) + wxT("...");
e70833fb 317
9a83f860 318 str << wxT("\n ") << msg;
e70833fb
VS
319
320 // no lossy convs so using utf8
321 CPPUNIT_FAIL(string(str.mb_str(wxConvUTF8)));
322}
323
324// quote a string so that it can be displayed (static)
325//
326wxString RegExTestCase::quote(const wxString& arg)
327{
9a83f860
VZ
328 const wxChar *needEscape = wxT("\a\b\t\n\v\f\r\"\\");
329 const wxChar *escapes = wxT("abtnvfr\"\\");
e70833fb
VS
330 wxString str;
331
332 for (size_t i = 0; i < arg.length(); i++) {
3d68e52c 333 wxChar ch = (wxChar)arg[i];
e70833fb 334 const wxChar *p = wxStrchr(needEscape, ch);
3e5f6c1c 335
e70833fb 336 if (p)
9a83f860 337 str += wxString::Format(wxT("\\%c"), escapes[p - needEscape]);
e70833fb 338 else if (wxIscntrl(ch))
9a83f860 339 str += wxString::Format(wxT("\\%03o"), ch);
e70833fb 340 else
fa3b08ca 341 str += (wxChar)ch;
e70833fb
VS
342 }
343
344 return str.length() == arg.length() && str.find(' ') == wxString::npos ?
9a83f860 345 str : wxT("\"") + str + wxT("\"");
e70833fb
VS
346}
347
e70833fb
VS
348
349///////////////////////////////////////////////////////////////////////////////
350// Test suite
e70833fb
VS
351
352class RegExTestSuite : public TestSuite
353{
354public:
0d95d20c 355 RegExTestSuite(string name) : TestSuite(name) { }
e70833fb
VS
356 void add(const char *mode, const char *id, const char *flags,
357 const char *pattern, const char *data, const char *expected, ...);
358};
359
e70833fb
VS
360// Add a testcase to the suite
361//
362void RegExTestSuite::add(
363 const char *mode,
364 const char *id,
365 const char *flags,
366 const char *pattern,
367 const char *data,
368 const char *expected, ...)
369{
370 string name = getName() + "." + id;
371
372 vector<const char *> expected_results;
373 va_list ap;
374
375 for (va_start(ap, expected); expected; expected = va_arg(ap, const char *))
376 expected_results.push_back(expected);
377
378 va_end(ap);
3e5f6c1c 379
e70833fb
VS
380 try {
381 addTest(new RegExTestCase(
382 name, mode, id, flags, pattern, data, expected_results));
383 }
384 catch (Exception& e) {
9a83f860 385 wxLogInfo(wxString::Format(wxT("skipping: %s\n %s\n"),
e70833fb
VS
386 wxString(name.c_str(), wxConvUTF8).c_str(),
387 wxString(e.what(), wxConvUTF8).c_str()));
388 }
389}
390
391
392// Include the generated tests
393//
394#include "regex.inc"
395
396
397#endif // wxHAS_REGEX_ADVANCED
6acd08bc
VZ
398
399#endif // wxUSE_REGEX