| 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
| 2 | %% Name: regex.tex |
| 3 | %% Purpose: wxRegEx documentation |
| 4 | %% Author: Vadim Zeitlin |
| 5 | %% Modified by: |
| 6 | %% Created: 14.07.01 |
| 7 | %% RCS-ID: $Id$ |
| 8 | %% Copyright: (c) 2001 Vadim Zeitlin |
| 9 | %% License: wxWidgets license |
| 10 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
| 11 | |
| 12 | \section{\class{wxRegEx}}\label{wxregex} |
| 13 | |
| 14 | wxRegEx represents a regular expression. This class provides support |
| 15 | for regular expressions matching and also replacement. |
| 16 | |
| 17 | It is built on top of either the system library (if it has support |
| 18 | for POSIX regular expressions - which is the case of the most modern |
| 19 | Unices) or uses the built in Henry Spencer's library. Henry Spencer |
| 20 | would appreciate being given credit in the documentation of software |
| 21 | which uses his library, but that is not a requirement. |
| 22 | |
| 23 | Regular expressions, as defined by POSIX, come in two flavours: {\it extended} |
| 24 | and {\it basic}. The builtin library also adds a third flavour |
| 25 | of expression \helpref{advanced}{wxresyn}, which is not available |
| 26 | when using the system library. |
| 27 | |
| 28 | Unicode is fully supported only when using the builtin library. |
| 29 | When using the system library in Unicode mode, the expressions and data |
| 30 | are translated to the default 8-bit encoding before being passed to |
| 31 | the library. |
| 32 | |
| 33 | On platforms where a system library is available, the default is to use |
| 34 | the builtin library for Unicode builds, and the system library otherwise. |
| 35 | It is possible to use the other if preferred by selecting it when building |
| 36 | the wxWidgets. |
| 37 | |
| 38 | \wxheading{Derived from} |
| 39 | |
| 40 | No base class |
| 41 | |
| 42 | \wxheading{Data structures} |
| 43 | |
| 44 | Flags for regex compilation to be used with \helpref{Compile()}{wxregexcompile}: |
| 45 | |
| 46 | \begin{verbatim} |
| 47 | enum |
| 48 | { |
| 49 | // use extended regex syntax |
| 50 | wxRE_EXTENDED = 0, |
| 51 | |
| 52 | // use advanced RE syntax (built-in regex only) |
| 53 | #ifdef wxHAS_REGEX_ADVANCED |
| 54 | wxRE_ADVANCED = 1, |
| 55 | #endif |
| 56 | |
| 57 | // use basic RE syntax |
| 58 | wxRE_BASIC = 2, |
| 59 | |
| 60 | // ignore case in match |
| 61 | wxRE_ICASE = 4, |
| 62 | |
| 63 | // only check match, don't set back references |
| 64 | wxRE_NOSUB = 8, |
| 65 | |
| 66 | // if not set, treat '\n' as an ordinary character, otherwise it is |
| 67 | // special: it is not matched by '.' and '^' and '$' always match |
| 68 | // after/before it regardless of the setting of wxRE_NOT[BE]OL |
| 69 | wxRE_NEWLINE = 16, |
| 70 | |
| 71 | // default flags |
| 72 | wxRE_DEFAULT = wxRE_EXTENDED |
| 73 | } |
| 74 | \end{verbatim} |
| 75 | |
| 76 | Flags for regex matching to be used with \helpref{Matches()}{wxregexmatches}. |
| 77 | |
| 78 | These flags are mainly useful when doing several matches in a long string |
| 79 | to prevent erroneous matches for {\tt '\textasciicircum'} and {\tt '\$'}: |
| 80 | |
| 81 | \begin{verbatim} |
| 82 | enum |
| 83 | { |
| 84 | // '^' doesn't match at the start of line |
| 85 | wxRE_NOTBOL = 32, |
| 86 | |
| 87 | // '$' doesn't match at the end of line |
| 88 | wxRE_NOTEOL = 64 |
| 89 | } |
| 90 | \end{verbatim} |
| 91 | |
| 92 | \wxheading{Examples} |
| 93 | |
| 94 | A bad example of processing some text containing email addresses (the example |
| 95 | is bad because the real email addresses can have more complicated form than |
| 96 | {\tt user@host.net}): |
| 97 | |
| 98 | \begin{verbatim} |
| 99 | wxString text; |
| 100 | ... |
| 101 | wxRegEx reEmail = wxT("([^@]+)@([[:alnum:].-_].)+([[:alnum:]]+)"); |
| 102 | if ( reEmail.Matches(text) ) |
| 103 | { |
| 104 | wxString text = reEmail.GetMatch(email); |
| 105 | wxString username = reEmail.GetMatch(email, 1); |
| 106 | if ( reEmail.GetMatch(email, 3) == wxT("com") ) // .com TLD? |
| 107 | { |
| 108 | ... |
| 109 | } |
| 110 | } |
| 111 | |
| 112 | // or we could do this to hide the email address |
| 113 | size_t count = reEmail.ReplaceAll(text, wxT("HIDDEN@\\2\\3")); |
| 114 | printf("text now contains %u hidden addresses", count); |
| 115 | \end{verbatim} |
| 116 | |
| 117 | \latexignore{\rtfignore{\wxheading{Members}}} |
| 118 | |
| 119 | \membersection{wxRegEx::wxRegEx}\label{wxregexwxregex} |
| 120 | |
| 121 | \func{}{wxRegEx}{\void} |
| 122 | |
| 123 | Default ctor: use \helpref{Compile()}{wxregexcompile} later. |
| 124 | |
| 125 | \func{}{wxRegEx}{\param{const wxString\& }{expr}, \param{int }{flags = wxRE\_DEFAULT}} |
| 126 | |
| 127 | Create and compile the regular expression, use |
| 128 | \helpref{IsValid}{wxregexisvalid} to test for compilation errors. |
| 129 | |
| 130 | \membersection{wxRegEx::\destruct{wxRegEx}}\label{wxregexdtor} |
| 131 | |
| 132 | \func{}{\destruct{wxRegEx}}{\void} |
| 133 | |
| 134 | dtor not virtual, don't derive from this class |
| 135 | |
| 136 | \membersection{wxRegEx::Compile}\label{wxregexcompile} |
| 137 | |
| 138 | \func{bool}{Compile}{\param{const wxString\& }{pattern}, \param{int }{flags = wxRE\_DEFAULT}} |
| 139 | |
| 140 | Compile the string into regular expression, return {\tt true} if ok or {\tt false} |
| 141 | if string has a syntax error. |
| 142 | |
| 143 | \membersection{wxRegEx::IsValid}\label{wxregexisvalid} |
| 144 | |
| 145 | \constfunc{bool}{IsValid}{\void} |
| 146 | |
| 147 | Return {\tt true} if this is a valid compiled regular expression, {\tt false} |
| 148 | otherwise. |
| 149 | |
| 150 | \membersection{wxRegEx::GetMatch}\label{wxregexgetmatch} |
| 151 | |
| 152 | \constfunc{bool}{GetMatch}{\param{size\_t* }{start}, \param{size\_t* }{len}, \param{size\_t }{index = 0}} |
| 153 | |
| 154 | Get the start index and the length of the match of the expression |
| 155 | (if {\it index} is $0$) or a bracketed subexpression ({\it index} different |
| 156 | from $0$). |
| 157 | |
| 158 | May only be called after successful call to \helpref{Matches()}{wxregexmatches} |
| 159 | and only if {\tt wxRE\_NOSUB} was {\bf not} used in |
| 160 | \helpref{Compile()}{wxregexcompile}. |
| 161 | |
| 162 | Returns {\tt false} if no match or if an error occured. |
| 163 | |
| 164 | \constfunc{wxString}{GetMatch}{\param{const wxString\& }{text}, \param{size\_t }{index = 0}} |
| 165 | |
| 166 | Returns the part of string corresponding to the match where {\it index} is |
| 167 | interpreted as above. Empty string is returned if match failed |
| 168 | |
| 169 | May only be called after successful call to \helpref{Matches()}{wxregexmatches} |
| 170 | and only if {\tt wxRE\_NOSUB} was {\bf not} used in |
| 171 | \helpref{Compile()}{wxregexcompile}. |
| 172 | |
| 173 | \membersection{wxRegEx::GetMatchCount}\label{wxregexgetmatchcount} |
| 174 | |
| 175 | \constfunc{size\_t}{GetMatchCount}{\void} |
| 176 | |
| 177 | Returns the size of the array of matches, i.e. the number of bracketed |
| 178 | subexpressions plus one for the expression itself, or $0$ on error. |
| 179 | |
| 180 | May only be called after successful call to \helpref{Compile()}{wxregexcompile}. |
| 181 | and only if {\tt wxRE\_NOSUB} was {\bf not} used. |
| 182 | |
| 183 | \membersection{wxRegEx::Matches}\label{wxregexmatches} |
| 184 | |
| 185 | \constfunc{bool}{Matches}{\param{const wxChar* }{text}, \param{int }{flags = 0}} |
| 186 | |
| 187 | Matches the precompiled regular expression against the string {\it text}, |
| 188 | returns {\tt true} if matches and {\tt false} otherwise. |
| 189 | |
| 190 | Flags may be combination of {\tt wxRE\_NOTBOL} and {\tt wxRE\_NOTEOL}. |
| 191 | |
| 192 | May only be called after successful call to \helpref{Compile()}{wxregexcompile}. |
| 193 | |
| 194 | \membersection{wxRegEx::Replace}\label{wxregexreplace} |
| 195 | |
| 196 | \constfunc{int}{Replace}{\param{wxString* }{text}, \param{const wxString\& }{replacement}, \param{size\_t }{maxMatches = 0}} |
| 197 | |
| 198 | Replaces the current regular expression in the string pointed to by |
| 199 | {\it text}, with the text in {\it replacement} and return number of matches |
| 200 | replaced (maybe $0$ if none found) or $-1$ on error. |
| 201 | |
| 202 | The replacement text may contain back references {\tt $\backslash$number} which will be |
| 203 | replaced with the value of the corresponding subexpression in the |
| 204 | pattern match. {\tt $\backslash$0} corresponds to the entire match and {\tt \&} is a |
| 205 | synonym for it. Backslash may be used to quote itself or {\tt \&} character. |
| 206 | |
| 207 | {\it maxMatches} may be used to limit the number of replacements made, setting |
| 208 | it to $1$, for example, will only replace first occurrence (if any) of the |
| 209 | pattern in the text while default value of $0$ means replace all. |
| 210 | |
| 211 | \membersection{wxRegEx::ReplaceAll}\label{wxregexreplaceall} |
| 212 | |
| 213 | \constfunc{int}{ReplaceAll}{\param{wxString* }{text}, \param{const wxString\& }{replacement}} |
| 214 | |
| 215 | Replace all occurrences: this is actually a synonym for |
| 216 | \helpref{Replace()}{wxregexreplace}. |
| 217 | |
| 218 | \wxheading{See also} |
| 219 | |
| 220 | \helpref{ReplaceFirst}{wxregexreplacefirst} |
| 221 | |
| 222 | \membersection{wxRegEx::ReplaceFirst}\label{wxregexreplacefirst} |
| 223 | |
| 224 | \constfunc{int}{ReplaceFirst}{\param{wxString* }{text}, \param{const wxString\& }{replacement}} |
| 225 | |
| 226 | Replace the first occurrence. |
| 227 | |
| 228 | \wxheading{See also} |
| 229 | |
| 230 | \helpref{Replace}{wxregexreplace} |
| 231 | |