]>
git.saurik.com Git - wxWidgets.git/blob - src/common/regex.cpp
943fc2c46b4447128ad40dd8b9a5da734b8e0fa0
   1 /////////////////////////////////////////////////////////////////////////////// 
   2 // Name:        src/common/regex.cpp 
   3 // Purpose:     regular expression matching 
   4 // Author:      Karsten Ballüder and Vadim Zeitlin 
   8 // Copyright:   (c) 2000 Karsten Ballüder <ballueder@gmx.net> 
   9 //                  2001 Vadim Zeitlin <vadim@wxwindows.org> 
  10 // Licence:     wxWindows licence 
  11 /////////////////////////////////////////////////////////////////////////////// 
  13 // ============================================================================ 
  15 // ============================================================================ 
  17 // ---------------------------------------------------------------------------- 
  19 // ---------------------------------------------------------------------------- 
  22     #pragma implementation "regex.h" 
  25 // For compilers that support precompilation, includes "wx.h". 
  26 #include "wx/wxprec.h" 
  35     #include "wx/object.h" 
  36     #include "wx/string.h" 
  41 // FreeBSD & Watcom require this, it probably doesn't hurt for others 
  42 #if defined(__UNIX__) || defined(__WATCOMC__) || defined(__DIGITALMARS__) 
  43     #include <sys/types.h> 
  50 // ---------------------------------------------------------------------------- 
  52 // ---------------------------------------------------------------------------- 
  54 // the real implementation of wxRegEx 
  62     // return TRUE if Compile() had been called successfully 
  63     bool IsValid() const { return m_isCompiled
; } 
  66     bool Compile(const wxString
& expr
, int flags 
= 0); 
  67     bool Matches(const wxChar 
*str
, int flags 
= 0) const; 
  68     bool GetMatch(size_t *start
, size_t *len
, size_t index 
= 0) const; 
  69     int Replace(wxString 
*pattern
, const wxString
& replacement
, 
  70                 size_t maxMatches 
= 0) const; 
  73     // return the string containing the error message for the given err code 
  74     wxString 
GetErrorMsg(int errorcode
) const; 
  84     // free the RE if compiled 
  95     // free the RE if any and reinit the members 
 106     // the subexpressions data 
 107     regmatch_t 
*m_Matches
; 
 110     // TRUE if m_RegEx is valid 
 114 // ============================================================================ 
 116 // ============================================================================ 
 118 // ---------------------------------------------------------------------------- 
 120 // ---------------------------------------------------------------------------- 
 122 wxRegExImpl::wxRegExImpl() 
 127 wxRegExImpl::~wxRegExImpl() 
 132 wxString 
wxRegExImpl::GetErrorMsg(int errorcode
) const 
 136     // first get the string length needed 
 137     int len 
= regerror(errorcode
, &m_RegEx
, NULL
, 0); 
 143         wxCharBuffer 
buf(len
); 
 145         (void)regerror(errorcode
, &m_RegEx
, (char *)buf
.data(), len
); 
 147         msg 
= wxString(buf
.data(), wxConvLibc
); 
 149         (void)regerror(errorcode
, &m_RegEx
, msg
.GetWriteBuf(len
), len
); 
 152 #endif // Unicode/!Unicode 
 154     else // regerror() returned 0 
 156         msg 
= _("unknown error"); 
 162 bool wxRegExImpl::Compile(const wxString
& expr
, int flags
) 
 166     // translate our flags to regcomp() ones 
 167     wxASSERT_MSG( !(flags 
& 
 168                         ~(wxRE_BASIC 
| wxRE_ICASE 
| wxRE_NOSUB 
| wxRE_NEWLINE
)), 
 169                   _T("unrecognized flags in wxRegEx::Compile") ); 
 172     if ( !(flags 
& wxRE_BASIC
) ) 
 173         flagsRE 
|= REG_EXTENDED
; 
 174     if ( flags 
& wxRE_ICASE 
) 
 175         flagsRE 
|= REG_ICASE
; 
 176     if ( flags 
& wxRE_NOSUB 
) 
 177         flagsRE 
|= REG_NOSUB
; 
 178     if ( flags 
& wxRE_NEWLINE 
) 
 179         flagsRE 
|= REG_NEWLINE
; 
 182     int errorcode 
= regcomp(&m_RegEx
, expr
.mb_str(), flagsRE
); 
 185         wxLogError(_("Invalid regular expression '%s': %s"), 
 186                    expr
.c_str(), GetErrorMsg(errorcode
).c_str()); 
 188         m_isCompiled 
= FALSE
; 
 192         // don't allocate the matches array now, but do it later if necessary 
 193         if ( flags 
& wxRE_NOSUB 
) 
 195             // we don't need it at all 
 200             // we will alloc the array later (only if really needed) but count 
 201             // the number of sub-expressions in the regex right now 
 203             // there is always one for the whole expression 
 206             // and some more for bracketed subexperessions 
 207             const wxChar 
*cptr 
= expr
.c_str(); 
 208             wxChar prev 
= _T('\0'); 
 209             while ( *cptr 
!= _T('\0') ) 
 211                 // is this a subexpr start, i.e. "(" for extended regex or 
 212                 // "\(" for a basic one? 
 213                 if ( *cptr 
== _T('(') && 
 214                      (flags 
& wxRE_BASIC 
? prev 
== _T('\\') 
 215                                          : prev 
!= _T('\\')) ) 
 231 bool wxRegExImpl::Matches(const wxChar 
*str
, int flags
) const 
 233     wxCHECK_MSG( IsValid(), FALSE
, _T("must successfully Compile() first") ); 
 235     // translate our flags to regexec() ones 
 236     wxASSERT_MSG( !(flags 
& ~(wxRE_NOTBOL 
| wxRE_NOTEOL
)), 
 237                   _T("unrecognized flags in wxRegEx::Matches") ); 
 240     if ( flags 
& wxRE_NOTBOL 
) 
 241         flagsRE 
|= REG_NOTBOL
; 
 242     if ( flags 
& wxRE_NOTEOL 
) 
 243         flagsRE 
|= REG_NOTEOL
; 
 245     // allocate matches array if needed 
 246     wxRegExImpl 
*self 
= wxConstCast(this, wxRegExImpl
); 
 247     if ( !m_Matches 
&& m_nMatches 
) 
 249         self
->m_Matches 
= new regmatch_t
[m_nMatches
]; 
 253     int rc 
= regexec(&self
->m_RegEx
, wxConvertWX2MB(str
), m_nMatches
, m_Matches
, flagsRE
); 
 258             // matched successfully 
 263             wxLogError(_("Failed to match '%s' in regular expression: %s"), 
 264                        str
, GetErrorMsg(rc
).c_str()); 
 273 bool wxRegExImpl::GetMatch(size_t *start
, size_t *len
, size_t index
) const 
 275     wxCHECK_MSG( IsValid(), FALSE
, _T("must successfully Compile() first") ); 
 276     wxCHECK_MSG( m_Matches
, FALSE
, _T("can't use with wxRE_NOSUB") ); 
 277     wxCHECK_MSG( index 
< m_nMatches
, FALSE
, _T("invalid match index") ); 
 279     const regmatch_t
& match 
= m_Matches
[index
]; 
 282         *start 
= match
.rm_so
; 
 284         *len 
= match
.rm_eo 
- match
.rm_so
; 
 289 int wxRegExImpl::Replace(wxString 
*text
, 
 290                          const wxString
& replacement
, 
 291                          size_t maxMatches
) const 
 293     wxCHECK_MSG( text
, -1, _T("NULL text in wxRegEx::Replace") ); 
 294     wxCHECK_MSG( IsValid(), -1, _T("must successfully Compile() first") ); 
 296     // the replacement text 
 299     // attempt at optimization: don't iterate over the string if it doesn't 
 300     // contain back references at all 
 301     bool mayHaveBackrefs 
= 
 302         replacement
.find_first_of(_T("\\&")) != wxString::npos
; 
 304     if ( !mayHaveBackrefs 
) 
 306         textNew 
= replacement
; 
 309     // the position where we start looking for the match 
 311     // NB: initial version had a nasty bug because it used a wxChar* instead of 
 312     //     an index but the problem is that replace() in the loop invalidates 
 313     //     all pointers into the string so we have to use indices instead 
 314     size_t matchStart 
= 0; 
 316     // number of replacement made: we won't make more than maxMatches of them 
 317     // (unless maxMatches is 0 which doesn't limit the number of replacements) 
 318     size_t countRepl 
= 0; 
 320     // note that "^" shouldn't match after the first call to Matches() so we 
 321     // use wxRE_NOTBOL to prevent it from happening 
 322     while ( (!maxMatches 
|| countRepl 
< maxMatches
) && 
 323             Matches(text
->c_str() + matchStart
, countRepl 
? wxRE_NOTBOL 
: 0) ) 
 325         // the string possibly contains back references: we need to calculate 
 326         // the replacement text anew after each match 
 327         if ( mayHaveBackrefs 
) 
 329             mayHaveBackrefs 
= FALSE
; 
 331             textNew
.reserve(replacement
.length()); 
 333             for ( const wxChar 
*p 
= replacement
.c_str(); *p
; p
++ ) 
 335                 size_t index 
= (size_t)-1; 
 337                 if ( *p 
== _T('\\') ) 
 339                     if ( wxIsdigit(*++p
) ) 
 343                         index 
= (size_t)wxStrtoul(p
, &end
, 10); 
 344                         p 
= end 
- 1; // -1 to compensate for p++ in the loop 
 346                     //else: backslash used as escape character 
 348                 else if ( *p 
== _T('&') ) 
 350                     // treat this as "\0" for compatbility with ed and such 
 354                 // do we have a back reference? 
 355                 if ( index 
!= (size_t)-1 ) 
 359                     if ( !GetMatch(&start
, &len
, index
) ) 
 361                         wxFAIL_MSG( _T("invalid back reference") ); 
 367                         textNew 
+= wxString(text
->c_str() + matchStart 
+ start
, 
 370                         mayHaveBackrefs 
= TRUE
; 
 373                 else // ordinary character 
 381         if ( !GetMatch(&start
, &len
) ) 
 383             // we did have match as Matches() returned true above! 
 384             wxFAIL_MSG( _T("internal logic error in wxRegEx::Replace") ); 
 390         text
->replace(matchStart
, len
, textNew
); 
 394         matchStart 
+= textNew
.length(); 
 400 // ---------------------------------------------------------------------------- 
 401 // wxRegEx: all methods are mostly forwarded to wxRegExImpl 
 402 // ---------------------------------------------------------------------------- 
 415 bool wxRegEx::Compile(const wxString
& expr
, int flags
) 
 419         m_impl 
= new wxRegExImpl
; 
 422     if ( !m_impl
->Compile(expr
, flags
) ) 
 424         // error message already given in wxRegExImpl::Compile 
 434 bool wxRegEx::Matches(const wxChar 
*str
, int flags
) const 
 436     wxCHECK_MSG( IsValid(), FALSE
, _T("must successfully Compile() first") ); 
 438     return m_impl
->Matches(str
, flags
); 
 441 bool wxRegEx::GetMatch(size_t *start
, size_t *len
, size_t index
) const 
 443     wxCHECK_MSG( IsValid(), FALSE
, _T("must successfully Compile() first") ); 
 445     return m_impl
->GetMatch(start
, len
, index
); 
 448 wxString 
wxRegEx::GetMatch(const wxString
& text
, size_t index
) const 
 451     if ( !GetMatch(&start
, &len
, index
) ) 
 452         return wxEmptyString
; 
 454     return text
.Mid(start
, len
); 
 457 int wxRegEx::Replace(wxString 
*pattern
, 
 458                      const wxString
& replacement
, 
 459                      size_t maxMatches
) const 
 461     wxCHECK_MSG( IsValid(), -1, _T("must successfully Compile() first") ); 
 463     return m_impl
->Replace(pattern
, replacement
, maxMatches
); 
 466 #endif // wxUSE_REGEX