X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/2d290a92376232ed2469b8723453ea086e23c0d0..c266eff98c5e44012647f54f38a1e29ecabd8759:/docs/latex/wx/regex.tex?ds=sidebyside diff --git a/docs/latex/wx/regex.tex b/docs/latex/wx/regex.tex index 19a98ccf71..59026f0d71 100644 --- a/docs/latex/wx/regex.tex +++ b/docs/latex/wx/regex.tex @@ -11,38 +11,29 @@ \section{\class{wxRegEx}}\label{wxregex} -wxRegEx represents a regular expression. The regular expressions syntax -supported is the POSIX one. Both basic and extended regular expressions are -supported but, unlike POSIX C API, the extended ones are used by default. - -This class provides support for regular expressions matching and also -replacement. It is built on top of either the system library (if it has support -for POSIX regular expressions - which is the case of the most modern Unices) or -uses the built in Henry Spencer's library. In the latter case you need to abide -by the terms of its copyright: - -\begin{verbatim} -Copyright 1992, 1993, 1994, 1997 Henry Spencer. All rights reserved. -This software is not subject to any license of the American Telephone -and Telegraph Company or of the Regents of the University of California. - -Permission is granted to anyone to use this software for any purpose on -any computer system, and to alter it and redistribute it, subject -to the following restrictions: - -1. The author is not responsible for the consequences of use of this - software, no matter how awful, even if they arise from flaws in it. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. Since few users ever read sources, - credits must appear in the documentation. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. Since few users - ever read sources, credits must appear in the documentation. - -4. This notice may not be removed or altered. -\end{verbatim} +wxRegEx represents a regular expression. This class provides support +for regular expressions matching and also replacement. + +It is built on top of either the system library (if it has support +for POSIX regular expressions - which is the case of the most modern +Unices) or uses the built in Henry Spencer's library. Henry Spencer +would appreciate being given credit in the documentation of software +which uses his library, but that is not a requirement. + +Regular expressions, as defined by POSIX, come in two flavours: {\it extended} +and {\it basic}. The builtin library also adds a third flavour +of expression \helpref{advanced}{wxresyn}, which is not available +when using the system library. + +Unicode is fully supported only when using the builtin library. +When using the system library in Unicode mode, the expressions and data +are translated to the default 8-bit encoding before being passed to +the library. + +On platforms where a system library is available, the default is to use +the builtin library for Unicode builds, and the system library otherwise. +It is possible to use the other if preferred by selecting it when building +the wxWidgets. \wxheading{Derived from} @@ -51,11 +42,17 @@ No base class \wxheading{Data structures} Flags for regex compilation to be used with \helpref{Compile()}{wxregexcompile}: + \begin{verbatim} enum { - // use extended regex syntax (default) + // use extended regex syntax wxRE_EXTENDED = 0, + + // use advanced RE syntax (built-in regex only) +#ifdef wxHAS_REGEX_ADVANCED + wxRE_ADVANCED = 1, +#endif // use basic RE syntax wxRE_BASIC = 2, @@ -79,7 +76,8 @@ enum Flags for regex matching to be used with \helpref{Matches()}{wxregexmatches}. These flags are mainly useful when doing several matches in a long string -to prevent erroneous matches for \verb|'^'| and \verb|'$'| +to prevent erroneous matches for {\tt '\textasciicircum'} and {\tt '\$'}: + \begin{verbatim} enum { @@ -91,6 +89,39 @@ enum } \end{verbatim} +\wxheading{Examples} + +A bad example of processing some text containing email addresses (the example +is bad because the real email addresses can have more complicated form than +{\tt user@host.net}): + +\begin{verbatim} +wxString text; +... +wxRegEx reEmail = wxT("([^@]+)@([[:alnum:].-_].)+([[:alnum:]]+)"); +if ( reEmail.Matches(text) ) +{ + wxString text = reEmail.GetMatch(email); + wxString username = reEmail.GetMatch(email, 1); + if ( reEmail.GetMatch(email, 3) == wxT("com") ) // .com TLD? + { + ... + } +} + +// or we could do this to hide the email address +size_t count = reEmail.ReplaceAll(text, wxT("HIDDEN@\\2\\3")); +printf("text now contains %u hidden addresses", count); +\end{verbatim} + +\wxheading{Include files} + + + +\wxheading{Library} + +\helpref{wxBase}{librarieslist} + \latexignore{\rtfignore{\wxheading{Members}}} \membersection{wxRegEx::wxRegEx}\label{wxregexwxregex} @@ -99,8 +130,6 @@ enum Default ctor: use \helpref{Compile()}{wxregexcompile} later. -\membersection{wxRegEx::wxRegEx}\label{wxregexwxregex} - \func{}{wxRegEx}{\param{const wxString\& }{expr}, \param{int }{flags = wxRE\_DEFAULT}} Create and compile the regular expression, use @@ -116,14 +145,14 @@ dtor not virtual, don't derive from this class \func{bool}{Compile}{\param{const wxString\& }{pattern}, \param{int }{flags = wxRE\_DEFAULT}} -Compile the string into regular expression, return {\tt TRUE} if ok or {\tt FALSE} +Compile the string into regular expression, return {\tt true} if ok or {\tt false} if string has a syntax error. \membersection{wxRegEx::IsValid}\label{wxregexisvalid} \constfunc{bool}{IsValid}{\void} -Return {\tt TRUE} if this is a valid compiled regular expression, {\tt FALSE} +Return {\tt true} if this is a valid compiled regular expression, {\tt false} otherwise. \membersection{wxRegEx::GetMatch}\label{wxregexgetmatch} @@ -138,7 +167,7 @@ May only be called after successful call to \helpref{Matches()}{wxregexmatches} and only if {\tt wxRE\_NOSUB} was {\bf not} used in \helpref{Compile()}{wxregexcompile}. -Returns {\tt FALSE} if no match or if an error occured. +Returns {\tt false} if no match or if an error occurred. \constfunc{wxString}{GetMatch}{\param{const wxString\& }{text}, \param{size\_t }{index = 0}} @@ -149,14 +178,38 @@ May only be called after successful call to \helpref{Matches()}{wxregexmatches} and only if {\tt wxRE\_NOSUB} was {\bf not} used in \helpref{Compile()}{wxregexcompile}. +\membersection{wxRegEx::GetMatchCount}\label{wxregexgetmatchcount} + +\constfunc{size\_t}{GetMatchCount}{\void} + +Returns the size of the array of matches, i.e. the number of bracketed +subexpressions plus one for the expression itself, or $0$ on error. + +May only be called after successful call to \helpref{Compile()}{wxregexcompile}. +and only if {\tt wxRE\_NOSUB} was {\bf not} used. + \membersection{wxRegEx::Matches}\label{wxregexmatches} \constfunc{bool}{Matches}{\param{const wxChar* }{text}, \param{int }{flags = 0}} +\constfunc{bool}{Matches}{\param{const wxChar* }{text}, \param{int }{flags}, \param{size\_t }{len}} + +\constfunc{bool}{Matches}{\param{const wxString\& }{text}, \param{int }{flags = 0}} + Matches the precompiled regular expression against the string {\it text}, -returns {\tt TRUE} if matches and {\tt FALSE} otherwise. +returns {\tt true} if matches and {\tt false} otherwise. + +{\it Flags} may be combination of {\tt wxRE\_NOTBOL} and {\tt wxRE\_NOTEOL}. + +Some regex libraries assume that the text given is null terminated, while +others require the length be given as a separate parameter. Therefore for +maximum portability assume that {\it text} cannot contain embedded nulls. -Flags may be combination of {\tt wxRE\_NOTBOL} and {\tt wxRE\_NOTEOL}. +When the {\it Matches(const wxChar *text, int flags = 0)} form is used, +a {\it wxStrlen()} will be done internally if the regex library requires the +length. When using {\it Matches()} in a loop +the {\it Matches(text, flags, len)} form can be used instead, making it +possible to avoid a {\it wxStrlen()} inside the loop. May only be called after successful call to \helpref{Compile()}{wxregexcompile}. @@ -168,20 +221,20 @@ Replaces the current regular expression in the string pointed to by {\it text}, with the text in {\it replacement} and return number of matches replaced (maybe $0$ if none found) or $-1$ on error. -The replacement text may contain back references {\tt \\number} which will be +The replacement text may contain back references {\tt $\backslash$number} which will be replaced with the value of the corresponding subexpression in the -pattern match. {\tt \\0} corresponds to the entire match and {\tt \&} is a +pattern match. {\tt $\backslash$0} corresponds to the entire match and {\tt \&} is a synonym for it. Backslash may be used to quote itself or {\tt \&} character. {\it maxMatches} may be used to limit the number of replacements made, setting -it to $1$, for example, will only replace first occurence (if any) of the +it to $1$, for example, will only replace first occurrence (if any) of the pattern in the text while default value of $0$ means replace all. \membersection{wxRegEx::ReplaceAll}\label{wxregexreplaceall} \constfunc{int}{ReplaceAll}{\param{wxString* }{text}, \param{const wxString\& }{replacement}} -Replace all occurences: this is actually a synonym for +Replace all occurrences: this is actually a synonym for \helpref{Replace()}{wxregexreplace}. \wxheading{See also} @@ -192,7 +245,7 @@ Replace all occurences: this is actually a synonym for \constfunc{int}{ReplaceFirst}{\param{wxString* }{text}, \param{const wxString\& }{replacement}} -Replace the first occurence. +Replace the first occurrence. \wxheading{See also}