tests/regex/regex.cpp

   1 ///////////////////////////////////////////////////////////////////////////////
   2 // Name:        tests/regex/regex.cpp
   3 // Purpose:     Test the built-in regex lib and wxRegEx
   4 // Author:      Mike Wetherell
   5 // RCS-ID:      $Id$
   6 // Copyright:   (c) 2004 Mike Wetherell
   7 // Licence:     wxWidgets licence
   8 ///////////////////////////////////////////////////////////////////////////////
   9
  10 //
  11 // Notes:
  12 //
  13 // To run just one section, say wx_1, do this:
  14 //  test regex.wx_1
  15 //
  16 // To run all the regex tests:
  17 //  test regex
  18 //
  19 // Some tests must be skipped since they use features which we do not make
  20 // available through wxRegEx. To see the list of tests that have been skipped
  21 // turn on verbose logging, e.g.:
  22 //  test --verbose regex
  23 //
  24 // The tests here are for the builtin library, tests for wxRegEx in general
  25 // should go in another module.
  26 //
  27 // The tests are generated from Henry Spencer's reg.test, additional test
  28 // can be added in wxreg.test. These test files are then turned into a C++
  29 // include file 'regex.inc' (included below) using a script 'regex.pl'.
  30 //
  31
  32 #if defined(__GNUG__) && !defined(__APPLE__)
  33     #pragma implementation
  34     #pragma interface
  35 #endif
  36
  37 // For compilers that support precompilation, includes "wx/wx.h".
  38 #include "wx/wxprec.h"
  39
  40 #ifdef __BORLANDC__
  41     #pragma hdrstop
  42 #endif
  43
  44 // for all others, include the necessary headers
  45 #ifndef WX_PRECOMP
  46     #include "wx/wx.h"
  47 #endif
  48
  49 #include "wx/regex.h"
  50 #include "wx/cppunit.h"
  51 #include <iomanip>
  52 #include <stdexcept>
  53
  54 using namespace std;
  55 using namespace CppUnit;
  56
  57 // many of the tests are specific to the builtin regex lib, so only attempts
  58 // to do them when using the builtin regex lib.
  59 //
  60 #ifdef wxHAS_REGEX_ADVANCED
  61
  62
  63 ///////////////////////////////////////////////////////////////////////////////
  64 // The test case - an instance represents a single test
  65
  66 class RegExTestCase : public TestCase
  67 {
  68 public:
  69     // constructor - create a single testcase
  70     RegExTestCase(
  71         const string& name,
  72         const char *mode,
  73         const char *id,
  74         const char *flags,
  75         const char *pattern,
  76         const char *data,
  77         const vector<const char *>& expected);
  78
  79 protected:
  80     // run this testcase
  81     void runTest();
  82
  83 private:
  84     // workers
  85     wxString Conv(const char *str);
  86     void parseFlags(const wxString& flags);
  87     void doTest(int flavor);
  88     static size_t matchCount(const wxString& expr, int flags);
  89     static wxString quote(const wxString& arg);
  90     const wxChar *convError() const { return _T("<cannot convert>"); }
  91
  92     // assertions - adds some information about the test that failed
  93     void fail(const wxString& msg) const;
  94     void failIf(bool condition, const wxString& msg) const
  95         { if (condition) fail(msg); }
  96
  97     // mode, id, flags, pattern, test data, expected results...
  98     int m_mode;
  99     wxString m_id;
 100     wxString m_flags;
 101     wxString m_pattern;
 102     wxString m_data;
 103     wxArrayString m_expected;
 104
 105     // the flag decoded
 106     int m_compileFlags;
 107     int m_matchFlags;
 108     bool m_basic;
 109     bool m_extended;
 110     bool m_advanced;
 111 };
 112
 113 // constructor - throws Exception on failure
 114 //
 115 RegExTestCase::RegExTestCase(
 116     const string& name,
 117     const char *mode,
 118     const char *id,
 119     const char *flags,
 120     const char *pattern,
 121     const char *data,
 122     const vector<const char *>& expected)
 123   :
 124     TestCase(name),
 125     m_mode(mode[0]),
 126     m_id(Conv(id)),
 127     m_flags(Conv(flags)),
 128     m_pattern(Conv(pattern)),
 129     m_data(Conv(data)),
 130     m_compileFlags(0),
 131     m_matchFlags(0),
 132     m_basic(false),
 133     m_extended(false),
 134     m_advanced(false)
 135 {
 136     bool badconv = m_pattern == convError() || m_data == convError();
 137     vector<const char *>::const_iterator it;
 138
 139     for (it = expected.begin(); it != expected.end(); ++it) {
 140         m_expected.push_back(Conv(*it));
 141         badconv = badconv || *m_expected.rbegin() == convError();
 142     }
 143
 144     failIf(badconv, _T("cannot convert to default character encoding"));
 145
 146     // the flags need further parsing...
 147     parseFlags(m_flags);
 148
 149 #ifndef wxHAS_REGEX_ADVANCED
 150     failIf(!m_basic && !m_extended, _T("advanced regexs not available"));
 151 #endif
 152 }
 153
 154 // convert a string from UTF8 to the internal encoding
 155 //
 156 wxString RegExTestCase::Conv(const char *str)
 157 {
 158     const wxWCharBuffer wstr = wxConvUTF8.cMB2WC(str);
 159     const wxWC2WXbuf buf = wxConvCurrent->cWC2WX(wstr);
 160
 161     if (!buf || wcscmp(wxConvCurrent->cWX2WC(buf), wstr) != 0)
 162         return convError();
 163     else
 164         return buf;
 165 }
 166
 167 // Parse flags
 168 //
 169 void RegExTestCase::parseFlags(const wxString& flags)
 170 {
 171     for (const wxChar *p = flags; *p; p++) {
 172         switch (*p) {
 173             // noop
 174             case '-': break;
 175
 176             // we don't fully support these flags, but they don't stop us
 177             // checking for success of failure of the match, so treat as noop
 178             case 'A': case 'B': case 'E': case 'H':
 179             case 'I': case 'L': case 'M': case 'N':
 180             case 'P': case 'Q': case 'R': case 'S':
 181             case 'T': case 'U': case '%':
 182                 break;
 183
 184             // match options
 185             case '^': m_matchFlags |= wxRE_NOTBOL; break;
 186             case '$': m_matchFlags |= wxRE_NOTEOL; break;
 187 #if wxUSE_UNICODE
 188             case '*': break;
 189 #endif
 190             // compile options
 191             case '&': m_advanced = m_basic = true; break;
 192             case 'b': m_basic = true; break;
 193             case 'e': m_extended = true; break;
 194             case 'i': m_compileFlags |= wxRE_ICASE; break;
 195             case 'o': m_compileFlags |= wxRE_NOSUB; break;
 196             case 'n': m_compileFlags |= wxRE_NEWLINE; break;
 197             case 't': if (strchr("ep", m_mode)) break; // else fall through...
 198
 199             // anything else we must skip the test
 200             default:
 201                 fail(wxString::Format(
 202                      _T("requires unsupported flag '%c'"), *p));
 203         }
 204     }
 205 }
 206
 207 // Try test for all flavours of expression specified
 208 //
 209 void RegExTestCase::runTest()
 210 {
 211     if (m_basic)
 212         doTest(wxRE_BASIC);
 213     if (m_extended)
 214         doTest(wxRE_EXTENDED);
 215 #ifdef wxHAS_REGEX_ADVANCED
 216     if (m_advanced || (!m_basic && !m_extended))
 217         doTest(wxRE_ADVANCED);
 218 #endif
 219 }
 220
 221 // Try the test for a single flavour of expression
 222 //
 223 void RegExTestCase::doTest(int flavor)
 224 {
 225     wxRegEx re(m_pattern, m_compileFlags | flavor);
 226
 227     // 'e' - test that the pattern fails to compile
 228     if (m_mode == 'e')
 229     {
 230         failIf(re.IsValid(), _T("compile suceeded (should fail)"));
 231         return;
 232     }
 233     failIf(!re.IsValid(), _T("compile failed"));
 234
 235     bool matches = re.Matches(m_data, m_matchFlags);
 236
 237     // 'f' or 'p' - test that the pattern does not match
 238     if (m_mode == 'f' || m_mode == 'p')
 239     {
 240         failIf(matches, _T("match suceeded (should fail)"));
 241         return;
 242     }
 243
 244     // otherwise 'm' or 'i' - test the pattern does match
 245     failIf(!matches, _T("match failed"));
 246
 247     // Check that wxRegEx is going to allocate a large enough array for the
 248     // results we are supposed to get
 249     failIf(m_expected.size() > matchCount(m_pattern, m_compileFlags | flavor),
 250            _T("wxRegEx has not allocated a large enough array for the ")
 251            _T("number of results expected"));
 252
 253     wxString result;
 254     size_t start, len;
 255
 256     for (size_t i = 0; i < m_expected.size(); i++) {
 257         failIf(!re.GetMatch(&start, &len, i), wxString::Format(
 258                 _T("wxRegEx::GetMatch failed for match %d"), i));
 259
 260         // m - check the match returns the strings given
 261         if (m_mode == 'm')
 262             if (start < INT_MAX)
 263                 result = m_data.substr(start, len);
 264             else
 265                 result = _T("");
 266
 267         // i - check the match returns the offsets given
 268         else if (m_mode == 'i')
 269             if (start < INT_MAX)
 270                 result = wxString::Format(_T("%d %d"), start, start + len - 1);
 271             else
 272                 result = _T("-1 -1");
 273
 274         failIf(result != m_expected[i], wxString::Format(
 275                 _T("match(%d) == %s, expected == %s"), i,
 276                 quote(result).c_str(), quote(m_expected[i]).c_str()));
 277     }
 278 }
 279
 280 // assertion - adds some information about the test that failed
 281 //
 282 void RegExTestCase::fail(const wxString& msg) const
 283 {
 284     wxString str;
 285     wxArrayString::const_iterator it;
 286
 287     str << (wxChar)m_mode << _T(" ") << m_id << _T(" ") << m_flags << _T(" ")
 288         << quote(m_pattern) << _T(" ") << quote(m_data);
 289
 290     for (it = m_expected.begin(); it != m_expected.end(); ++it)
 291         str << _T(" ") << quote(*it);
 292
 293     if (str.length() > 77)
 294         str = str.substr(0, 74) + _T("...");
 295
 296     str << _T("\n ") << msg;
 297
 298     // no lossy convs so using utf8
 299     CPPUNIT_FAIL(string(str.mb_str(wxConvUTF8)));
 300 }
 301
 302 // quote a string so that it can be displayed (static)
 303 //
 304 wxString RegExTestCase::quote(const wxString& arg)
 305 {
 306     const wxChar *needEscape = _T("\a\b\t\n\v\f\r\"\\");
 307     const wxChar *escapes = _T("abtnvfr\"\\");
 308     wxString str;
 309
 310     for (size_t i = 0; i < arg.length(); i++) {
 311         wxUChar ch = arg[i];
 312         const wxChar *p = wxStrchr(needEscape, ch);
 313
 314         if (p)
 315             str += wxString::Format(_T("\\%c"), escapes[p - needEscape]);
 316         else if (wxIscntrl(ch))
 317             str += wxString::Format(_T("\\%03o"), ch);
 318         else
 319             str += ch;
 320     }
 321
 322     return str.length() == arg.length() && str.find(' ') == wxString::npos ?
 323         str : _T("\"") + str + _T("\"");
 324 }
 325
 326 // Count the number of subexpressions (taken from wxRegExImpl::Compile)
 327 //
 328 size_t RegExTestCase::matchCount(const wxString& expr, int flags)
 329 {
 330     // there is always one for the whole expression
 331     size_t nMatches = 1;
 332
 333     // and some more for bracketed subexperessions
 334     for ( const wxChar *cptr = expr; *cptr; cptr++ )
 335     {
 336         if ( *cptr == _T('\\') )
 337         {
 338             // in basic RE syntax groups are inside \(...\)
 339             if ( *++cptr == _T('(') && (flags & wxRE_BASIC) )
 340             {
 341                 nMatches++;
 342             }
 343         }
 344         else if ( *cptr == _T('(') && !(flags & wxRE_BASIC) )
 345         {
 346             // we know that the previous character is not an unquoted
 347             // backslash because it would have been eaten above, so we
 348             // have a bar '(' and this indicates a group start for the
 349             // extended syntax
 350             nMatches++;
 351         }
 352     }
 353
 354     return nMatches;
 355 }
 356
 357
 358 ///////////////////////////////////////////////////////////////////////////////
 359 // Test suite
 360 //
 361 // In a non-unicode build the regex is affected by the current locale, so
 362 // this derived TestSuite is used. It sets the locale in it's run() method
 363 // for the duration of the regex tests.
 364
 365 class RegExTestSuite : public TestSuite
 366 {
 367 public:
 368     RegExTestSuite(string name);
 369     void run(TestResult *result);
 370     void add(const char *mode, const char *id, const char *flags,
 371              const char *pattern, const char *data, const char *expected, ...);
 372 };
 373
 374 // constructor, sets the locale so that it is set when the tests are added
 375 //
 376 RegExTestSuite::RegExTestSuite(string name) : TestSuite(name)
 377 {
 378     setlocale(LC_ALL, "");
 379 }
 380
 381 // run the test suite, sets the locale again since it may have been changed
 382 // by another test since this suite was crated
 383 //
 384 void RegExTestSuite::run(TestResult *result)
 385 {
 386     setlocale(LC_ALL, "");
 387     TestSuite::run(result);
 388 }
 389
 390 // Add a testcase to the suite
 391 //
 392 void RegExTestSuite::add(
 393     const char *mode,
 394     const char *id,
 395     const char *flags,
 396     const char *pattern,
 397     const char *data,
 398     const char *expected, ...)
 399 {
 400     string name = getName() + "." + id;
 401
 402     vector<const char *> expected_results;
 403     va_list ap;
 404
 405     for (va_start(ap, expected); expected; expected = va_arg(ap, const char *))
 406         expected_results.push_back(expected);
 407
 408     va_end(ap);
 409
 410     try {
 411         addTest(new RegExTestCase(
 412             name, mode, id, flags, pattern, data, expected_results));
 413     }
 414     catch (Exception& e) {
 415         wxLogInfo(wxString::Format(_T("skipping: %s\n %s\n"),
 416             wxString(name.c_str(), wxConvUTF8).c_str(),
 417             wxString(e.what(), wxConvUTF8).c_str()));
 418     }
 419 }
 420
 421
 422 // Include the generated tests
 423 //
 424 #include "regex.inc"
 425
 426
 427 #endif // wxHAS_REGEX_ADVANCED