]>
git.saurik.com Git - wxWidgets.git/blob - src/common/regex.cpp
   1 /////////////////////////////////////////////////////////////////////////////// 
   2 // Name:        src/common/regex.cpp 
   3 // Purpose:     regular expression matching 
   4 // Author:      Karsten Ballüder and Vadim Zeitlin 
   8 // Copyright:   (c) 2000 Karsten Ballüder <ballueder@gmx.net> 
   9 //                  2001 Vadim Zeitlin <vadim@wxwindows.org> 
  10 // Licence:     wxWindows licence 
  11 /////////////////////////////////////////////////////////////////////////////// 
  13 // ============================================================================ 
  15 // ============================================================================ 
  17 // ---------------------------------------------------------------------------- 
  19 // ---------------------------------------------------------------------------- 
  22     #pragma implementation "regex.h" 
  25 // For compilers that support precompilation, includes "wx.h". 
  26 #include "wx/wxprec.h" 
  35     #include "wx/object.h" 
  36     #include "wx/string.h" 
  41 // FreeBSD & Watcom require this, it probably doesn't hurt for others 
  42 #if defined(__UNIX__) || defined(__WATCOMC__) 
  43     #include <sys/types.h> 
  50 // ---------------------------------------------------------------------------- 
  52 // ---------------------------------------------------------------------------- 
  54 // the real implementation of wxRegEx 
  62     // return TRUE if Compile() had been called successfully 
  63     bool IsValid() const { return m_isCompiled
; } 
  66     bool Compile(const wxString
& expr
, int flags 
= 0); 
  67     bool Matches(const wxChar 
*str
, int flags 
= 0) const; 
  68     bool GetMatch(size_t *start
, size_t *len
, size_t index 
= 0) const; 
  69     int Replace(wxString 
*pattern
, const wxString
& replacement
, 
  70                 size_t maxMatches 
= 0) const; 
  73     // return the string containing the error message for the given err code 
  74     wxString 
GetErrorMsg(int errorcode
) const; 
  76     // free the RE if compiled 
  90     // the subexpressions data 
  91     regmatch_t 
*m_Matches
; 
  94     // TRUE if m_RegEx is valid 
  98 // ============================================================================ 
 100 // ============================================================================ 
 102 // ---------------------------------------------------------------------------- 
 104 // ---------------------------------------------------------------------------- 
 106 wxRegExImpl::wxRegExImpl() 
 108     m_isCompiled 
= FALSE
; 
 113 wxRegExImpl::~wxRegExImpl() 
 120 wxString 
wxRegExImpl::GetErrorMsg(int errorcode
) const 
 124     // first get the string length needed 
 125     int len 
= regerror(errorcode
, &m_RegEx
, NULL
, 0); 
 131         wxCharBuffer 
buf(len
); 
 133         (void)regerror(errorcode
, &m_RegEx
, (char *)buf
.data(), len
); 
 137         (void)regerror(errorcode
, &m_RegEx
, msg
.GetWriteBuf(len
), len
); 
 140 #endif // Unicode/!Unicode 
 142     else // regerror() returned 0 
 144         msg 
= _("unknown error"); 
 150 bool wxRegExImpl::Compile(const wxString
& expr
, int flags
) 
 154     // translate our flags to regcomp() ones 
 155     wxASSERT_MSG( !(flags 
& 
 156                         ~(wxRE_BASIC 
| wxRE_ICASE 
| wxRE_NOSUB 
| wxRE_NEWLINE
)), 
 157                   _T("unrecognized flags in wxRegEx::Compile") ); 
 160     if ( !(flags 
& wxRE_BASIC
) ) 
 161         flagsRE 
|= REG_EXTENDED
; 
 162     if ( flags 
& wxRE_ICASE 
) 
 163         flagsRE 
|= REG_ICASE
; 
 164     if ( flags 
& wxRE_NOSUB 
) 
 165         flagsRE 
|= REG_NOSUB
; 
 166     if ( flags 
& wxRE_NEWLINE 
) 
 167         flagsRE 
|= REG_NEWLINE
; 
 170     int errorcode 
= regcomp(&m_RegEx
, expr
.mb_str(), flagsRE
); 
 173         wxLogError(_("Invalid regular expression '%s': %s"), 
 174                    expr
.c_str(), GetErrorMsg(errorcode
).c_str()); 
 176         m_isCompiled 
= FALSE
; 
 180         // don't allocate the matches array now, but do it later if necessary 
 181         if ( flags 
& wxRE_NOSUB 
) 
 183             // we don't need it at all 
 188             // we will alloc the array later (only if really needed) but count 
 189             // the number of sub-expressions in the regex right now 
 191             // there is always one for the whole expression 
 194             // and some more for bracketed subexperessions 
 195             const wxChar 
*cptr 
= expr
.c_str(); 
 196             wxChar prev 
= _T('\0'); 
 197             while ( *cptr 
!= _T('\0') ) 
 199                 // is this a subexpr start, i.e. "(" for extended regex or 
 200                 // "\(" for a basic one? 
 201                 if ( *cptr 
== _T('(') && 
 202                      (flags 
& wxRE_BASIC 
? prev 
== _T('\\') 
 203                                          : prev 
!= _T('\\')) ) 
 219 bool wxRegExImpl::Matches(const wxChar 
*str
, int flags
) const 
 221     wxCHECK_MSG( IsValid(), FALSE
, _T("must successfully Compile() first") ); 
 223     // translate our flags to regexec() ones 
 224     wxASSERT_MSG( !(flags 
& ~(wxRE_NOTBOL 
| wxRE_NOTEOL
)), 
 225                   _T("unrecognized flags in wxRegEx::Matches") ); 
 228     if ( flags 
& wxRE_NOTBOL 
) 
 229         flagsRE 
|= REG_NOTBOL
; 
 230     if ( flags 
& wxRE_NOTEOL 
) 
 231         flagsRE 
|= REG_NOTEOL
; 
 233     // allocate matches array if needed 
 234     wxRegExImpl 
*self 
= wxConstCast(this, wxRegExImpl
); 
 235     if ( !m_Matches 
&& m_nMatches 
) 
 237         self
->m_Matches 
= new regmatch_t
[m_nMatches
]; 
 241     int rc 
= regexec(&self
->m_RegEx
, wxConvertWX2MB(str
), m_nMatches
, m_Matches
, flagsRE
); 
 246             // matched successfully 
 251             wxLogError(_("Failed to match '%s' in regular expression: %s"), 
 252                        str
, GetErrorMsg(rc
).c_str()); 
 261 bool wxRegExImpl::GetMatch(size_t *start
, size_t *len
, size_t index
) const 
 263     wxCHECK_MSG( IsValid(), FALSE
, _T("must successfully Compile() first") ); 
 264     wxCHECK_MSG( m_Matches
, FALSE
, _T("can't use with wxRE_NOSUB") ); 
 265     wxCHECK_MSG( index 
< m_nMatches
, FALSE
, _T("invalid match index") ); 
 267     const regmatch_t
& match 
= m_Matches
[index
]; 
 270         *start 
= match
.rm_so
; 
 272         *len 
= match
.rm_eo 
- match
.rm_so
; 
 277 int wxRegExImpl::Replace(wxString 
*text
, 
 278                          const wxString
& replacement
, 
 279                          size_t maxMatches
) const 
 281     wxCHECK_MSG( text
, -1, _T("NULL text in wxRegEx::Replace") ); 
 282     wxCHECK_MSG( IsValid(), -1, _T("must successfully Compile() first") ); 
 284     // the replacement text 
 287     // attempt at optimization: don't iterate over the string if it doesn't 
 288     // contain back references at all 
 289     bool mayHaveBackrefs 
= 
 290         replacement
.find_first_of(_T("\\&")) != wxString::npos
; 
 292     if ( !mayHaveBackrefs 
) 
 294         textNew 
= replacement
; 
 297     // the position where we start looking for the match 
 299     // NB: initial version had a nasty bug because it used a wxChar* instead of 
 300     //     an index but the problem is that replace() in the loop invalidates 
 301     //     all pointers into the string so we have to use indices instead 
 302     size_t matchStart 
= 0; 
 304     // number of replacement made: we won't make more than maxMatches of them 
 305     // (unless maxMatches is 0 which doesn't limit the number of replacements) 
 306     size_t countRepl 
= 0; 
 308     // note that "^" shouldn't match after the first call to Matches() so we 
 309     // use wxRE_NOTBOL to prevent it from happening 
 310     while ( (!maxMatches 
|| countRepl 
< maxMatches
) && 
 311             Matches(text
->c_str() + matchStart
, countRepl 
? wxRE_NOTBOL 
: 0) ) 
 313         // the string possibly contains back references: we need to calculate 
 314         // the replacement text anew after each match 
 315         if ( mayHaveBackrefs 
) 
 317             mayHaveBackrefs 
= FALSE
; 
 319             textNew
.reserve(replacement
.length()); 
 321             for ( const wxChar 
*p 
= replacement
.c_str(); *p
; p
++ ) 
 323                 size_t index 
= (size_t)-1; 
 325                 if ( *p 
== _T('\\') ) 
 327                     if ( wxIsdigit(*++p
) ) 
 331                         index 
= (size_t)wxStrtoul(p
, &end
, 10); 
 332                         p 
= end 
- 1; // -1 to compensate for p++ in the loop 
 334                     //else: backslash used as escape character 
 336                 else if ( *p 
== _T('&') ) 
 338                     // treat this as "\0" for compatbility with ed and such 
 342                 // do we have a back reference? 
 343                 if ( index 
!= (size_t)-1 ) 
 347                     if ( !GetMatch(&start
, &len
, index
) ) 
 349                         wxFAIL_MSG( _T("invalid back reference") ); 
 355                         textNew 
+= wxString(text
->c_str() + matchStart 
+ start
, 
 358                         mayHaveBackrefs 
= TRUE
; 
 361                 else // ordinary character 
 369         if ( !GetMatch(&start
, &len
) ) 
 371             // we did have match as Matches() returned true above! 
 372             wxFAIL_MSG( _T("internal logic error in wxRegEx::Replace") ); 
 378         text
->replace(matchStart
, len
, textNew
); 
 382         matchStart 
+= textNew
.length(); 
 388 // ---------------------------------------------------------------------------- 
 389 // wxRegEx: all methods are mostly forwarded to wxRegExImpl 
 390 // ---------------------------------------------------------------------------- 
 403 bool wxRegEx::Compile(const wxString
& expr
, int flags
) 
 407         m_impl 
= new wxRegExImpl
; 
 410     if ( !m_impl
->Compile(expr
, flags
) ) 
 412         // error message already given in wxRegExImpl::Compile 
 422 bool wxRegEx::Matches(const wxChar 
*str
, int flags
) const 
 424     wxCHECK_MSG( IsValid(), FALSE
, _T("must successfully Compile() first") ); 
 426     return m_impl
->Matches(str
, flags
); 
 429 bool wxRegEx::GetMatch(size_t *start
, size_t *len
, size_t index
) const 
 431     wxCHECK_MSG( IsValid(), FALSE
, _T("must successfully Compile() first") ); 
 433     return m_impl
->GetMatch(start
, len
, index
); 
 436 wxString 
wxRegEx::GetMatch(const wxString
& text
, size_t index
) const 
 439     if ( !GetMatch(&start
, &len
, index
) ) 
 440         return wxEmptyString
; 
 442     return text
.Mid(start
, len
); 
 445 int wxRegEx::Replace(wxString 
*pattern
, 
 446                      const wxString
& replacement
, 
 447                      size_t maxMatches
) const 
 449     wxCHECK_MSG( IsValid(), -1, _T("must successfully Compile() first") ); 
 451     return m_impl
->Replace(pattern
, replacement
, maxMatches
); 
 454 #endif // wxUSE_REGEX