]>
git.saurik.com Git - wxWidgets.git/blob - src/common/regex.cpp
1 ///////////////////////////////////////////////////////////////////////////////
2 // Name: src/common/regex.cpp
3 // Purpose: regular expression matching
4 // Author: Karsten Ballüder and Vadim Zeitlin
8 // Copyright: (c) 2000 Karsten Ballüder <ballueder@gmx.net>
9 // 2001 Vadim Zeitlin <vadim@wxwindows.org>
10 // Licence: wxWindows licence
11 ///////////////////////////////////////////////////////////////////////////////
13 // ============================================================================
15 // ============================================================================
17 // ----------------------------------------------------------------------------
19 // ----------------------------------------------------------------------------
21 // For compilers that support precompilation, includes "wx.h".
22 #include "wx/wxprec.h"
31 #include "wx/object.h"
32 #include "wx/string.h"
37 // FreeBSD, Watcom and DMars require this, CW doesn't have nor need it.
38 // Others also don't seem to need it. If you have an error related to
39 // (not) including <sys/types.h> please report details to
40 // wx-dev@lists.wxwindows.org
41 #if defined(__UNIX__) || defined(__WATCOMC__) || defined(__DIGITALMARS__)
42 # include <sys/types.h>
48 // ----------------------------------------------------------------------------
50 // ----------------------------------------------------------------------------
52 // the real implementation of wxRegEx
60 // return true if Compile() had been called successfully
61 bool IsValid() const { return m_isCompiled
; }
64 bool Compile(const wxString
& expr
, int flags
= 0);
65 bool Matches(const wxChar
*str
, int flags
= 0) const;
66 bool GetMatch(size_t *start
, size_t *len
, size_t index
= 0) const;
67 size_t GetMatchCount() const;
68 int Replace(wxString
*pattern
, const wxString
& replacement
,
69 size_t maxMatches
= 0) const;
72 // return the string containing the error message for the given err code
73 wxString
GetErrorMsg(int errorcode
, bool badconv
) const;
83 // free the RE if compiled
94 // free the RE if any and reinit the members
105 // the subexpressions data
106 regmatch_t
*m_Matches
;
109 // true if m_RegEx is valid
113 // ============================================================================
115 // ============================================================================
117 // ----------------------------------------------------------------------------
119 // ----------------------------------------------------------------------------
121 wxRegExImpl::wxRegExImpl()
126 wxRegExImpl::~wxRegExImpl()
131 wxString
wxRegExImpl::GetErrorMsg(int errorcode
, bool badconv
) const
133 #if wxUSE_UNICODE && !defined(__REG_NOFRONT)
134 // currently only needed when using system library in Unicode mode
137 return _("conversion to 8-bit encoding failed");
140 // 'use' badconv to avoid a compiler warning
146 // first get the string length needed
147 int len
= regerror(errorcode
, &m_RegEx
, NULL
, 0);
150 char* szcmbError
= new char[++len
];
152 (void)regerror(errorcode
, &m_RegEx
, szcmbError
, len
);
154 szError
= wxConvertMB2WX(szcmbError
);
155 delete [] szcmbError
;
157 else // regerror() returned 0
159 szError
= _("unknown error");
165 bool wxRegExImpl::Compile(const wxString
& expr
, int flags
)
169 #ifdef WX_NO_REGEX_ADVANCED
170 # define FLAVORS wxRE_BASIC
172 # define FLAVORS (wxRE_ADVANCED | wxRE_BASIC)
173 wxASSERT_MSG( (flags
& FLAVORS
) != FLAVORS
,
174 _T("incompatible flags in wxRegEx::Compile") );
176 wxASSERT_MSG( !(flags
& ~(FLAVORS
| wxRE_ICASE
| wxRE_NOSUB
| wxRE_NEWLINE
)),
177 _T("unrecognized flags in wxRegEx::Compile") );
179 // translate our flags to regcomp() ones
181 if ( !(flags
& wxRE_BASIC
) )
182 #ifndef WX_NO_REGEX_ADVANCED
183 if (flags
& wxRE_ADVANCED
)
184 flagsRE
|= REG_ADVANCED
;
187 flagsRE
|= REG_EXTENDED
;
188 if ( flags
& wxRE_ICASE
)
189 flagsRE
|= REG_ICASE
;
190 if ( flags
& wxRE_NOSUB
)
191 flagsRE
|= REG_NOSUB
;
192 if ( flags
& wxRE_NEWLINE
)
193 flagsRE
|= REG_NEWLINE
;
198 int errorcode
= wx_re_comp(&m_RegEx
, expr
, expr
.length(), flagsRE
);
200 const wxWX2MBbuf conv
= expr
.mbc_str();
201 int errorcode
= conv
? regcomp(&m_RegEx
, conv
, flagsRE
) : REG_BADPAT
;
206 wxLogError(_("Invalid regular expression '%s': %s"),
207 expr
.c_str(), GetErrorMsg(errorcode
, !conv
).c_str());
209 m_isCompiled
= false;
213 // don't allocate the matches array now, but do it later if necessary
214 if ( flags
& wxRE_NOSUB
)
216 // we don't need it at all
221 // we will alloc the array later (only if really needed) but count
222 // the number of sub-expressions in the regex right now
224 // there is always one for the whole expression
227 // and some more for bracketed subexperessions
228 for ( const wxChar
*cptr
= expr
.c_str(); *cptr
; cptr
++ )
230 if ( *cptr
== _T('\\') )
232 // in basic RE syntax groups are inside \(...\)
233 if ( *++cptr
== _T('(') && (flags
& wxRE_BASIC
) )
238 else if ( *cptr
== _T('(') && !(flags
& wxRE_BASIC
) )
240 // we know that the previous character is not an unquoted
241 // backslash because it would have been eaten above, so we
242 // have a bare '(' and this indicates a group start for the
243 // extended syntax. '(?' is used for extensions by perl-
244 // like REs (e.g. advanced), and is not valid for POSIX
245 // extended, so ignore them always.
246 if ( cptr
[1] != _T('?') )
258 bool wxRegExImpl::Matches(const wxChar
*str
, int flags
) const
260 wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );
262 // translate our flags to regexec() ones
263 wxASSERT_MSG( !(flags
& ~(wxRE_NOTBOL
| wxRE_NOTEOL
)),
264 _T("unrecognized flags in wxRegEx::Matches") );
267 if ( flags
& wxRE_NOTBOL
)
268 flagsRE
|= REG_NOTBOL
;
269 if ( flags
& wxRE_NOTEOL
)
270 flagsRE
|= REG_NOTEOL
;
272 // allocate matches array if needed
273 wxRegExImpl
*self
= wxConstCast(this, wxRegExImpl
);
274 if ( !m_Matches
&& m_nMatches
)
276 self
->m_Matches
= new regmatch_t
[m_nMatches
];
282 int rc
= wx_re_exec(&self
->m_RegEx
, str
, wxStrlen(str
), NULL
, m_nMatches
, m_Matches
, flagsRE
);
284 const wxWX2MBbuf conv
= wxConvertWX2MB(str
);
285 int rc
= conv
? regexec(&self
->m_RegEx
, conv
, m_nMatches
, m_Matches
, flagsRE
) : REG_BADPAT
;
291 // matched successfully
296 wxLogError(_("Failed to match '%s' in regular expression: %s"),
297 str
, GetErrorMsg(rc
, !conv
).c_str());
306 bool wxRegExImpl::GetMatch(size_t *start
, size_t *len
, size_t index
) const
308 wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );
309 wxCHECK_MSG( m_nMatches
, false, _T("can't use with wxRE_NOSUB") );
310 wxCHECK_MSG( m_Matches
, false, _T("must call Matches() first") );
311 wxCHECK_MSG( index
< m_nMatches
, false, _T("invalid match index") );
313 const regmatch_t
& match
= m_Matches
[index
];
315 // we need the casts because rm_so can be a 64 bit quantity
317 *start
= wx_truncate_cast(size_t, match
.rm_so
);
319 *len
= wx_truncate_cast(size_t, match
.rm_eo
- match
.rm_so
);
324 size_t wxRegExImpl::GetMatchCount() const
326 wxCHECK_MSG( IsValid(), 0, _T("must successfully Compile() first") );
327 wxCHECK_MSG( m_nMatches
, 0, _T("can't use with wxRE_NOSUB") );
332 int wxRegExImpl::Replace(wxString
*text
,
333 const wxString
& replacement
,
334 size_t maxMatches
) const
336 wxCHECK_MSG( text
, wxNOT_FOUND
, _T("NULL text in wxRegEx::Replace") );
337 wxCHECK_MSG( IsValid(), wxNOT_FOUND
, _T("must successfully Compile() first") );
339 // the replacement text
342 // attempt at optimization: don't iterate over the string if it doesn't
343 // contain back references at all
344 bool mayHaveBackrefs
=
345 replacement
.find_first_of(_T("\\&")) != wxString::npos
;
347 if ( !mayHaveBackrefs
)
349 textNew
= replacement
;
352 // the position where we start looking for the match
354 // NB: initial version had a nasty bug because it used a wxChar* instead of
355 // an index but the problem is that replace() in the loop invalidates
356 // all pointers into the string so we have to use indices instead
357 size_t matchStart
= 0;
359 // number of replacement made: we won't make more than maxMatches of them
360 // (unless maxMatches is 0 which doesn't limit the number of replacements)
361 size_t countRepl
= 0;
363 // note that "^" shouldn't match after the first call to Matches() so we
364 // use wxRE_NOTBOL to prevent it from happening
365 while ( (!maxMatches
|| countRepl
< maxMatches
) &&
366 Matches(text
->c_str() + matchStart
, countRepl
? wxRE_NOTBOL
: 0) )
368 // the string possibly contains back references: we need to calculate
369 // the replacement text anew after each match
370 if ( mayHaveBackrefs
)
372 mayHaveBackrefs
= false;
374 textNew
.reserve(replacement
.length());
376 for ( const wxChar
*p
= replacement
.c_str(); *p
; p
++ )
378 size_t index
= (size_t)-1;
380 if ( *p
== _T('\\') )
382 if ( wxIsdigit(*++p
) )
386 index
= (size_t)wxStrtoul(p
, &end
, 10);
387 p
= end
- 1; // -1 to compensate for p++ in the loop
389 //else: backslash used as escape character
391 else if ( *p
== _T('&') )
393 // treat this as "\0" for compatbility with ed and such
397 // do we have a back reference?
398 if ( index
!= (size_t)-1 )
402 if ( !GetMatch(&start
, &len
, index
) )
404 wxFAIL_MSG( _T("invalid back reference") );
410 textNew
+= wxString(text
->c_str() + matchStart
+ start
,
413 mayHaveBackrefs
= true;
416 else // ordinary character
424 if ( !GetMatch(&start
, &len
) )
426 // we did have match as Matches() returned true above!
427 wxFAIL_MSG( _T("internal logic error in wxRegEx::Replace") );
433 text
->replace(matchStart
, len
, textNew
);
437 matchStart
+= textNew
.length();
443 // ----------------------------------------------------------------------------
444 // wxRegEx: all methods are mostly forwarded to wxRegExImpl
445 // ----------------------------------------------------------------------------
458 bool wxRegEx::Compile(const wxString
& expr
, int flags
)
462 m_impl
= new wxRegExImpl
;
465 if ( !m_impl
->Compile(expr
, flags
) )
467 // error message already given in wxRegExImpl::Compile
477 bool wxRegEx::Matches(const wxChar
*str
, int flags
) const
479 wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );
481 return m_impl
->Matches(str
, flags
);
484 bool wxRegEx::GetMatch(size_t *start
, size_t *len
, size_t index
) const
486 wxCHECK_MSG( IsValid(), false, _T("must successfully Compile() first") );
488 return m_impl
->GetMatch(start
, len
, index
);
491 wxString
wxRegEx::GetMatch(const wxString
& text
, size_t index
) const
494 if ( !GetMatch(&start
, &len
, index
) )
495 return wxEmptyString
;
497 return text
.Mid(start
, len
);
500 size_t wxRegEx::GetMatchCount() const
502 wxCHECK_MSG( IsValid(), 0, _T("must successfully Compile() first") );
504 return m_impl
->GetMatchCount();
507 int wxRegEx::Replace(wxString
*pattern
,
508 const wxString
& replacement
,
509 size_t maxMatches
) const
511 wxCHECK_MSG( IsValid(), wxNOT_FOUND
, _T("must successfully Compile() first") );
513 return m_impl
->Replace(pattern
, replacement
, maxMatches
);
516 #endif // wxUSE_REGEX