]>
git.saurik.com Git - wxWidgets.git/blob - src/common/regex.cpp
   1 /////////////////////////////////////////////////////////////////////////////// 
   2 // Name:        src/common/regex.cpp 
   3 // Purpose:     regular expression matching 
   4 // Author:      Karsten Ballüder and Vadim Zeitlin 
   8 // Copyright:   (c) 2000 Karsten Ballüder <ballueder@gmx.net> 
   9 //                  2001 Vadim Zeitlin <vadim@wxwindows.org> 
  10 // Licence:     wxWindows licence 
  11 /////////////////////////////////////////////////////////////////////////////// 
  13 // ============================================================================ 
  15 // ============================================================================ 
  17 // ---------------------------------------------------------------------------- 
  19 // ---------------------------------------------------------------------------- 
  22     #pragma implementation "regex.h" 
  25 // For compilers that support precompilation, includes "wx.h". 
  26 #include "wx/wxprec.h" 
  35     #include "wx/object.h" 
  36     #include "wx/string.h" 
  41 // FreeBSD & Watcom require this, it probably doesn't hurt for others 
  42 #if defined(__UNIX__) || defined(__WATCOMC__) || defined(__DIGITALMARS__) 
  43     #include <sys/types.h> 
  50 // ---------------------------------------------------------------------------- 
  52 // ---------------------------------------------------------------------------- 
  54 // the real implementation of wxRegEx 
  62     // return TRUE if Compile() had been called successfully 
  63     bool IsValid() const { return m_isCompiled
; } 
  66     bool Compile(const wxString
& expr
, int flags 
= 0); 
  67     bool Matches(const wxChar 
*str
, int flags 
= 0) const; 
  68     bool GetMatch(size_t *start
, size_t *len
, size_t index 
= 0) const; 
  69     int Replace(wxString 
*pattern
, const wxString
& replacement
, 
  70                 size_t maxMatches 
= 0) const; 
  73     // return the string containing the error message for the given err code 
  74     wxString 
GetErrorMsg(int errorcode
) const; 
  84     // free the RE if compiled 
  95     // free the RE if any and reinit the members 
 106     // the subexpressions data 
 107     regmatch_t 
*m_Matches
; 
 110     // TRUE if m_RegEx is valid 
 114 // ============================================================================ 
 116 // ============================================================================ 
 118 // ---------------------------------------------------------------------------- 
 120 // ---------------------------------------------------------------------------- 
 122 wxRegExImpl::wxRegExImpl() 
 127 wxRegExImpl::~wxRegExImpl() 
 132 wxString 
wxRegExImpl::GetErrorMsg(int errorcode
) const 
 136     // first get the string length needed 
 137     int len 
= regerror(errorcode
, &m_RegEx
, NULL
, 0); 
 143         wxCharBuffer 
buf(len
); 
 145         (void)regerror(errorcode
, &m_RegEx
, (char *)buf
.data(), len
); 
 147         msg 
= wxString(buf
.data(), wxConvLibc
); 
 149         (void)regerror(errorcode
, &m_RegEx
, msg
.GetWriteBuf(len
), len
); 
 152 #endif // Unicode/!Unicode 
 154     else // regerror() returned 0 
 156         msg 
= _("unknown error"); 
 162 bool wxRegExImpl::Compile(const wxString
& expr
, int flags
) 
 166     // translate our flags to regcomp() ones 
 167     wxASSERT_MSG( !(flags 
& 
 168                         ~(wxRE_BASIC 
| wxRE_ICASE 
| wxRE_NOSUB 
| wxRE_NEWLINE
)), 
 169                   _T("unrecognized flags in wxRegEx::Compile") ); 
 172     if ( !(flags 
& wxRE_BASIC
) ) 
 173         flagsRE 
|= REG_EXTENDED
; 
 174     if ( flags 
& wxRE_ICASE 
) 
 175         flagsRE 
|= REG_ICASE
; 
 176     if ( flags 
& wxRE_NOSUB 
) 
 177         flagsRE 
|= REG_NOSUB
; 
 178     if ( flags 
& wxRE_NEWLINE 
) 
 179         flagsRE 
|= REG_NEWLINE
; 
 182     int errorcode 
= regcomp(&m_RegEx
, expr
.mb_str(), flagsRE
); 
 185         wxLogError(_("Invalid regular expression '%s': %s"), 
 186                    expr
.c_str(), GetErrorMsg(errorcode
).c_str()); 
 188         m_isCompiled 
= FALSE
; 
 192         // don't allocate the matches array now, but do it later if necessary 
 193         if ( flags 
& wxRE_NOSUB 
) 
 195             // we don't need it at all 
 200             // we will alloc the array later (only if really needed) but count 
 201             // the number of sub-expressions in the regex right now 
 203             // there is always one for the whole expression 
 206             // and some more for bracketed subexperessions 
 207             for ( const wxChar 
*cptr 
= expr
.c_str(); *cptr
; cptr
++ ) 
 209                 if ( *cptr 
== _T('\\') ) 
 211                     // in basic RE syntax groups are inside \(...\) 
 212                     if ( *++cptr 
== _T('(') && (flags 
& wxRE_BASIC
) ) 
 217                 else if ( *cptr 
== _T('(') && !(flags 
& wxRE_BASIC
) ) 
 219                     // we know that the previous character is not an unquoted 
 220                     // backslash because it would have been eaten above, so we 
 221                     // have a bar '(' and this indicates a group start for the 
 234 bool wxRegExImpl::Matches(const wxChar 
*str
, int flags
) const 
 236     wxCHECK_MSG( IsValid(), FALSE
, _T("must successfully Compile() first") ); 
 238     // translate our flags to regexec() ones 
 239     wxASSERT_MSG( !(flags 
& ~(wxRE_NOTBOL 
| wxRE_NOTEOL
)), 
 240                   _T("unrecognized flags in wxRegEx::Matches") ); 
 243     if ( flags 
& wxRE_NOTBOL 
) 
 244         flagsRE 
|= REG_NOTBOL
; 
 245     if ( flags 
& wxRE_NOTEOL 
) 
 246         flagsRE 
|= REG_NOTEOL
; 
 248     // allocate matches array if needed 
 249     wxRegExImpl 
*self 
= wxConstCast(this, wxRegExImpl
); 
 250     if ( !m_Matches 
&& m_nMatches 
) 
 252         self
->m_Matches 
= new regmatch_t
[m_nMatches
]; 
 256     int rc 
= regexec(&self
->m_RegEx
, wxConvertWX2MB(str
), m_nMatches
, m_Matches
, flagsRE
); 
 261             // matched successfully 
 266             wxLogError(_("Failed to match '%s' in regular expression: %s"), 
 267                        str
, GetErrorMsg(rc
).c_str()); 
 276 bool wxRegExImpl::GetMatch(size_t *start
, size_t *len
, size_t index
) const 
 278     wxCHECK_MSG( IsValid(), FALSE
, _T("must successfully Compile() first") ); 
 279     wxCHECK_MSG( m_Matches
, FALSE
, _T("can't use with wxRE_NOSUB") ); 
 280     wxCHECK_MSG( index 
< m_nMatches
, FALSE
, _T("invalid match index") ); 
 282     const regmatch_t
& match 
= m_Matches
[index
]; 
 285         *start 
= match
.rm_so
; 
 287         *len 
= match
.rm_eo 
- match
.rm_so
; 
 292 int wxRegExImpl::Replace(wxString 
*text
, 
 293                          const wxString
& replacement
, 
 294                          size_t maxMatches
) const 
 296     wxCHECK_MSG( text
, -1, _T("NULL text in wxRegEx::Replace") ); 
 297     wxCHECK_MSG( IsValid(), -1, _T("must successfully Compile() first") ); 
 299     // the replacement text 
 302     // attempt at optimization: don't iterate over the string if it doesn't 
 303     // contain back references at all 
 304     bool mayHaveBackrefs 
= 
 305         replacement
.find_first_of(_T("\\&")) != wxString::npos
; 
 307     if ( !mayHaveBackrefs 
) 
 309         textNew 
= replacement
; 
 312     // the position where we start looking for the match 
 314     // NB: initial version had a nasty bug because it used a wxChar* instead of 
 315     //     an index but the problem is that replace() in the loop invalidates 
 316     //     all pointers into the string so we have to use indices instead 
 317     size_t matchStart 
= 0; 
 319     // number of replacement made: we won't make more than maxMatches of them 
 320     // (unless maxMatches is 0 which doesn't limit the number of replacements) 
 321     size_t countRepl 
= 0; 
 323     // note that "^" shouldn't match after the first call to Matches() so we 
 324     // use wxRE_NOTBOL to prevent it from happening 
 325     while ( (!maxMatches 
|| countRepl 
< maxMatches
) && 
 326             Matches(text
->c_str() + matchStart
, countRepl 
? wxRE_NOTBOL 
: 0) ) 
 328         // the string possibly contains back references: we need to calculate 
 329         // the replacement text anew after each match 
 330         if ( mayHaveBackrefs 
) 
 332             mayHaveBackrefs 
= FALSE
; 
 334             textNew
.reserve(replacement
.length()); 
 336             for ( const wxChar 
*p 
= replacement
.c_str(); *p
; p
++ ) 
 338                 size_t index 
= (size_t)-1; 
 340                 if ( *p 
== _T('\\') ) 
 342                     if ( wxIsdigit(*++p
) ) 
 346                         index 
= (size_t)wxStrtoul(p
, &end
, 10); 
 347                         p 
= end 
- 1; // -1 to compensate for p++ in the loop 
 349                     //else: backslash used as escape character 
 351                 else if ( *p 
== _T('&') ) 
 353                     // treat this as "\0" for compatbility with ed and such 
 357                 // do we have a back reference? 
 358                 if ( index 
!= (size_t)-1 ) 
 362                     if ( !GetMatch(&start
, &len
, index
) ) 
 364                         wxFAIL_MSG( _T("invalid back reference") ); 
 370                         textNew 
+= wxString(text
->c_str() + matchStart 
+ start
, 
 373                         mayHaveBackrefs 
= TRUE
; 
 376                 else // ordinary character 
 384         if ( !GetMatch(&start
, &len
) ) 
 386             // we did have match as Matches() returned true above! 
 387             wxFAIL_MSG( _T("internal logic error in wxRegEx::Replace") ); 
 393         text
->replace(matchStart
, len
, textNew
); 
 397         matchStart 
+= textNew
.length(); 
 403 // ---------------------------------------------------------------------------- 
 404 // wxRegEx: all methods are mostly forwarded to wxRegExImpl 
 405 // ---------------------------------------------------------------------------- 
 418 bool wxRegEx::Compile(const wxString
& expr
, int flags
) 
 422         m_impl 
= new wxRegExImpl
; 
 425     if ( !m_impl
->Compile(expr
, flags
) ) 
 427         // error message already given in wxRegExImpl::Compile 
 437 bool wxRegEx::Matches(const wxChar 
*str
, int flags
) const 
 439     wxCHECK_MSG( IsValid(), FALSE
, _T("must successfully Compile() first") ); 
 441     return m_impl
->Matches(str
, flags
); 
 444 bool wxRegEx::GetMatch(size_t *start
, size_t *len
, size_t index
) const 
 446     wxCHECK_MSG( IsValid(), FALSE
, _T("must successfully Compile() first") ); 
 448     return m_impl
->GetMatch(start
, len
, index
); 
 451 wxString 
wxRegEx::GetMatch(const wxString
& text
, size_t index
) const 
 454     if ( !GetMatch(&start
, &len
, index
) ) 
 455         return wxEmptyString
; 
 457     return text
.Mid(start
, len
); 
 460 int wxRegEx::Replace(wxString 
*pattern
, 
 461                      const wxString
& replacement
, 
 462                      size_t maxMatches
) const 
 464     wxCHECK_MSG( IsValid(), -1, _T("must successfully Compile() first") ); 
 466     return m_impl
->Replace(pattern
, replacement
, maxMatches
); 
 469 #endif // wxUSE_REGEX