]> git.saurik.com Git - apple/libc.git/blame - regex/FreeBSD/regex.3
Libc-1439.100.3.tar.gz
[apple/libc.git] / regex / FreeBSD / regex.3
CommitLineData
5b2abdfb
A
1.\" Copyright (c) 1992, 1993, 1994 Henry Spencer.
2.\" Copyright (c) 1992, 1993, 1994
3.\" The Regents of the University of California. All rights reserved.
4.\"
5.\" This code is derived from software contributed to Berkeley by
6.\" Henry Spencer.
7.\"
8.\" Redistribution and use in source and binary forms, with or without
9.\" modification, are permitted provided that the following conditions
10.\" are met:
11.\" 1. Redistributions of source code must retain the above copyright
12.\" notice, this list of conditions and the following disclaimer.
13.\" 2. Redistributions in binary form must reproduce the above copyright
14.\" notice, this list of conditions and the following disclaimer in the
15.\" documentation and/or other materials provided with the distribution.
5b2abdfb
A
16.\" 4. Neither the name of the University nor the names of its contributors
17.\" may be used to endorse or promote products derived from this software
18.\" without specific prior written permission.
19.\"
20.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30.\" SUCH DAMAGE.
31.\"
32.\" @(#)regex.3 8.4 (Berkeley) 3/20/94
1f2f436a 33.\" $FreeBSD: src/lib/libc/regex/regex.3,v 1.21 2007/01/09 00:28:04 imp Exp $
5b2abdfb 34.\"
ad3c9f2a 35.Dd Sept 29, 2011
5b2abdfb
A
36.Dt REGEX 3
37.Os
38.Sh NAME
39.Nm regcomp ,
ad3c9f2a 40.Nm regcomp_l ,
5b2abdfb 41.Nm regerror ,
ad3c9f2a
A
42.Nm regexec ,
43.Nm regfree ,
44.Nm regncomp ,
45.Nm regncomp_l ,
46.Nm regnexec ,
47.Nm regnwcomp ,
48.Nm regnwcomp_l ,
49.Nm regnwexec ,
50.Nm regwcomp ,
51.Nm regwcomp_l ,
52.Nm regwexec
5b2abdfb 53.Nd regular-expression library
5b2abdfb 54.Sh SYNOPSIS
ad3c9f2a
A
55.Sy (Standards-compliant APIs)
56.Pp
5b2abdfb
A
57.In regex.h
58.Ft int
9385eb3d 59.Fo regcomp
ad3c9f2a
A
60.Fa "regex_t *restrict preg"
61.Fa "const char *restrict pattern"
62.Fa "int cflags"
5b2abdfb
A
63.Fc
64.Ft size_t
65.Fo regerror
ad3c9f2a
A
66.Fa "int errcode"
67.Fa "const regex_t *restrict preg"
68.Fa "char *restrict errbuf"
69.Fa "size_t errbuf_size"
70.Fc
71.Ft int
72.Fo regexec
73.Fa "const regex_t *restrict preg"
74.Fa "const char *restrict string"
75.Fa "size_t nmatch"
76.Fa "regmatch_t pmatch[restrict]"
77.Fa "int eflags"
5b2abdfb
A
78.Fc
79.Ft void
ad3c9f2a
A
80.Fo regfree
81.Fa "regex_t *preg"
82.Fc
83.Pp
84.Sy (Non-portable extensions)
85.Ft int
86.Fo regncomp
87.Fa "regex_t *restrict preg"
88.Fa "const char *restrict pattern"
89.Fa "size_t len"
90.Fa "int cflags"
91.Fc
92.Ft int
93.Fo regnexec
94.Fa "const regex_t *restrict preg"
95.Fa "const char *restrict string"
96.Fa "size_t len"
97.Fa "size_t nmatch"
98.Fa "regmatch_t pmatch[restrict]"
99.Fa "int eflags"
100.Fc
101.Ft int
102.Fo regwcomp
103.Fa "regex_t *restrict preg"
104.Fa "const wchar_t *restrict widepat"
105.Fa "int cflags"
106.Fc
107.Ft int
108.Fo regwexec
109.Fa "const regex_t *restrict preg"
110.Fa "const wchar_t *restrict widestr"
111.Fa "size_t nmatch"
112.Fa "regmatch_t pmatch[restrict]"
113.Fa "int eflags"
114.Fc
115.Ft int
116.Fo regwncomp
117.Fa "regex_t *restrict preg"
118.Fa "const wchar_t *restrict widepat"
119.Fa "size_t len"
120.Fa "int cflags"
121.Fc
122.Ft int
123.Fo regwnexec
124.Fa "const regex_t *restrict preg"
125.Fa "const wchar_t *restrict widestr"
126.Fa "size_t len"
127.Fa "size_t nmatch"
128.Fa "regmatch_t pmatch[restrict]"
129.Fa "int eflags"
130.Fc
131.In regex.h
132.In xlocale.h
133.Ft int
134.Fo regcomp_l
135.Fa "regex_t *restrict preg"
136.Fa "const char *restrict pattern"
137.Fa "int cflags"
138.Fa "locale_t restrict"
139.Fc
140.Ft int
141.Fo regncomp_l
142.Fa "regex_t *restrict preg"
143.Fa "const char *restrict pattern"
144.Fa "size_t len"
145.Fa "int cflags"
146.Fa "locale_t restrict"
147.Fc
148.Ft int
149.Fo regwcomp_l
150.Fa "regex_t *restrict preg"
151.Fa "const wchar_t *restrict widepat"
152.Fa "int cflags"
153.Fa "locale_t restrict"
154.Fc
155.Ft int
156.Fo regwncomp_l
157.Fa "regex_t *restrict preg"
158.Fa "const wchar_t *restrict widepat"
159.Fa "size_t len"
160.Fa "int cflags"
161.Fa "locale_t restrict"
162.Fc
5b2abdfb
A
163.Sh DESCRIPTION
164These routines implement
165.St -p1003.2
166regular expressions
167.Pq Do RE Dc Ns s ;
168see
169.Xr re_format 7 .
9385eb3d
A
170The
171.Fn regcomp
172function
ad3c9f2a 173compiles an RE, written as a string, into an internal form.
5b2abdfb 174.Fn regexec
ad3c9f2a 175matches that internal form against a string and reports results.
5b2abdfb 176.Fn regerror
ad3c9f2a 177transforms error codes from either into human-readable messages.
5b2abdfb
A
178.Fn regfree
179frees any dynamically-allocated storage used by the internal form
180of an RE.
181.Pp
182The header
3d9156a7 183.In regex.h
5b2abdfb
A
184declares two structure types,
185.Ft regex_t
186and
187.Ft regmatch_t ,
188the former for compiled internal forms and the latter for match reporting.
189It also declares the four functions,
190a type
191.Ft regoff_t ,
192and a number of constants with names starting with
193.Dq Dv REG_ .
194.Pp
9385eb3d
A
195The
196.Fn regcomp
197function
5b2abdfb
A
198compiles the regular expression contained in the
199.Fa pattern
200string,
201subject to the flags in
202.Fa cflags ,
203and places the results in the
204.Ft regex_t
205structure pointed to by
206.Fa preg .
9385eb3d
A
207The
208.Fa cflags
209argument
5b2abdfb
A
210is the bitwise OR of zero or more of the following flags:
211.Bl -tag -width REG_EXTENDED
212.It Dv REG_EXTENDED
213Compile modern
214.Pq Dq extended
215REs,
216rather than the obsolete
217.Pq Dq basic
218REs that
219are the default.
220.It Dv REG_BASIC
221This is a synonym for 0,
222provided as a counterpart to
223.Dv REG_EXTENDED
224to improve readability.
225.It Dv REG_NOSPEC
226Compile with recognition of all special characters turned off.
227All characters are thus considered ordinary,
228so the
229.Dq RE
230is a literal string.
231This is an extension,
232compatible with but not specified by
233.St -p1003.2 ,
234and should be used with
235caution in software intended to be portable to other systems.
236.Dv REG_EXTENDED
237and
238.Dv REG_NOSPEC
239may not be used
240in the same call to
241.Fn regcomp .
ad3c9f2a
A
242.It Dv REG_LITERAL
243An alias of
244.Dv REG_NOSPEC .
5b2abdfb
A
245.It Dv REG_ICASE
246Compile for matching that ignores upper/lower case distinctions.
247See
248.Xr re_format 7 .
249.It Dv REG_NOSUB
250Compile for matching that need only report success or failure,
251not what was matched.
252.It Dv REG_NEWLINE
253Compile for newline-sensitive matching.
254By default, newline is a completely ordinary character with no special
255meaning in either REs or strings.
256With this flag,
257.Ql [^
258bracket expressions and
259.Ql .\&
260never match newline,
261a
262.Ql ^\&
263anchor matches the null string after any newline in the string
264in addition to its normal function,
265and the
266.Ql $\&
267anchor matches the null string before any newline in the
268string in addition to its normal function.
269.It Dv REG_PEND
ad3c9f2a
A
270(Note that
271.Dv REG_PEND
272is not recognized by any of the wide character or
273.Dq Nm n
274variants.
275Besides, the
276.Dq Nm n
277variants can be used instead of
278.Dv REG_PEND ;
279see EXTENDED APIS below.)
5b2abdfb
A
280The regular expression ends,
281not at the first NUL,
282but just before the character pointed to by the
283.Va re_endp
284member of the structure pointed to by
285.Fa preg .
286The
287.Va re_endp
288member is of type
289.Ft "const char *" .
290This flag permits inclusion of NULs in the RE;
291they are considered ordinary characters.
292This is an extension,
293compatible with but not specified by
294.St -p1003.2 ,
295and should be used with
296caution in software intended to be portable to other systems.
ad3c9f2a
A
297.It Dv REG_ENHANCED
298Recognized enhanced regular expression features; see
299.Xr re_format 7
300for details.
301This is an extension not specified by
302.St -p1003.2 ,
303and should be used with
304caution in software intended to be portable to other systems.
305.It Dv REG_MINIMAL
306Use minimal (non-greedy) repetitions instead of the normal greedy ones; see
307.Xr re_format 7
308for details.
309(This only applies when both
310.Dv REG_ENHANCED
311and
312.Dv REG_EXTENDED
313are also set.)
314This is an extension not specified by
315.St -p1003.2 ,
316and should be used with
317caution in software intended to be portable to other systems.
318.It Dv REG_UNGREEDY
319Alias of
320.Dv REG_MINIMAL .
5b2abdfb
A
321.El
322.Pp
323When successful,
324.Fn regcomp
325returns 0 and fills in the structure pointed to by
326.Fa preg .
327One member of that structure
328(other than
329.Va re_endp )
330is publicized:
331.Va re_nsub ,
332of type
333.Ft size_t ,
334contains the number of parenthesized subexpressions within the RE
335(except that the value of this member is undefined if the
336.Dv REG_NOSUB
337flag was used).
338If
339.Fn regcomp
340fails, it returns a non-zero error code;
341see
342.Sx DIAGNOSTICS .
343.Pp
9385eb3d
A
344The
345.Fn regexec
346function
5b2abdfb
A
347matches the compiled RE pointed to by
348.Fa preg
349against the
350.Fa string ,
351subject to the flags in
352.Fa eflags ,
353and reports results using
354.Fa nmatch ,
355.Fa pmatch ,
356and the returned value.
357The RE must have been compiled by a previous invocation of
358.Fn regcomp .
359The compiled form is not altered during execution of
360.Fn regexec ,
361so a single compiled RE can be used simultaneously by multiple threads.
362.Pp
363By default,
364the NUL-terminated string pointed to by
365.Fa string
366is considered to be the text of an entire line, minus any terminating
367newline.
368The
369.Fa eflags
370argument is the bitwise OR of zero or more of the following flags:
371.Bl -tag -width REG_STARTEND
372.It Dv REG_NOTBOL
373The first character of
374the string
375is not the beginning of a line, so the
376.Ql ^\&
377anchor should not match before it.
378This does not affect the behavior of newlines under
379.Dv REG_NEWLINE .
380.It Dv REG_NOTEOL
381The NUL terminating
382the string
383does not end a line, so the
384.Ql $\&
385anchor should not match before it.
386This does not affect the behavior of newlines under
387.Dv REG_NEWLINE .
388.It Dv REG_STARTEND
389The string is considered to start at
390.Fa string
391+
392.Fa pmatch Ns [0]. Ns Va rm_so
393and to have a terminating NUL located at
394.Fa string
395+
396.Fa pmatch Ns [0]. Ns Va rm_eo
397(there need not actually be a NUL at that location),
398regardless of the value of
399.Fa nmatch .
400See below for the definition of
401.Fa pmatch
402and
403.Fa nmatch .
404This is an extension,
405compatible with but not specified by
406.St -p1003.2 ,
407and should be used with
408caution in software intended to be portable to other systems.
409Note that a non-zero
410.Va rm_so
411does not imply
412.Dv REG_NOTBOL ;
413.Dv REG_STARTEND
414affects only the location of the string,
415not how it is matched.
416.El
417.Pp
418See
419.Xr re_format 7
420for a discussion of what is matched in situations where an RE or a
421portion thereof could match any of several substrings of
422.Fa string .
423.Pp
424Normally,
425.Fn regexec
426returns 0 for success and the non-zero code
427.Dv REG_NOMATCH
428for failure.
429Other non-zero error codes may be returned in exceptional situations;
430see
431.Sx DIAGNOSTICS .
432.Pp
433If
434.Dv REG_NOSUB
435was specified in the compilation of the RE,
436or if
437.Fa nmatch
438is 0,
439.Fn regexec
440ignores the
441.Fa pmatch
442argument (but see below for the case where
443.Dv REG_STARTEND
444is specified).
445Otherwise,
446.Fa pmatch
447points to an array of
448.Fa nmatch
449structures of type
450.Ft regmatch_t .
451Such a structure has at least the members
452.Va rm_so
453and
454.Va rm_eo ,
455both of type
456.Ft regoff_t
457(a signed arithmetic type at least as large as an
458.Ft off_t
459and a
460.Ft ssize_t ) ,
461containing respectively the offset of the first character of a substring
462and the offset of the first character after the end of the substring.
463Offsets are measured from the beginning of the
464.Fa string
465argument given to
466.Fn regexec .
467An empty substring is denoted by equal offsets,
468both indicating the character following the empty substring.
469.Pp
470The 0th member of the
471.Fa pmatch
472array is filled in to indicate what substring of
473.Fa string
474was matched by the entire RE.
475Remaining members report what substring was matched by parenthesized
476subexpressions within the RE;
477member
478.Va i
479reports subexpression
480.Va i ,
481with subexpressions counted (starting at 1) by the order of their opening
482parentheses in the RE, left to right.
483Unused entries in the array (corresponding either to subexpressions that
484did not participate in the match at all, or to subexpressions that do not
485exist in the RE (that is,
486.Va i
487>
488.Fa preg Ns -> Ns Va re_nsub ) )
489have both
490.Va rm_so
491and
492.Va rm_eo
493set to -1.
494If a subexpression participated in the match several times,
495the reported substring is the last one it matched.
496(Note, as an example in particular, that when the RE
497.Ql "(b*)+"
498matches
499.Ql bbb ,
500the parenthesized subexpression matches each of the three
501.So Li b Sc Ns s
502and then
503an infinite number of empty strings following the last
504.Ql b ,
505so the reported substring is one of the empties.)
506.Pp
507If
508.Dv REG_STARTEND
509is specified,
510.Fa pmatch
511must point to at least one
512.Ft regmatch_t
513(even if
514.Fa nmatch
515is 0 or
516.Dv REG_NOSUB
517was specified),
518to hold the input offsets for
519.Dv REG_STARTEND .
520Use for output is still entirely controlled by
521.Fa nmatch ;
522if
523.Fa nmatch
524is 0 or
525.Dv REG_NOSUB
526was specified,
527the value of
528.Fa pmatch Ns [0]
529will not be changed by a successful
530.Fn regexec .
531.Pp
9385eb3d
A
532The
533.Fn regerror
534function
5b2abdfb
A
535maps a non-zero
536.Fa errcode
537from either
538.Fn regcomp
539or
540.Fn regexec
541to a human-readable, printable message.
542If
543.Fa preg
544is
545.No non\- Ns Dv NULL ,
546the error code should have arisen from use of
547the
548.Ft regex_t
549pointed to by
550.Fa preg ,
551and if the error code came from
552.Fn regcomp ,
553it should have been the result from the most recent
554.Fn regcomp
555using that
556.Ft regex_t .
9385eb3d
A
557The
558.Fn ( regerror
5b2abdfb
A
559may be able to supply a more detailed message using information
560from the
561.Ft regex_t . )
9385eb3d
A
562The
563.Fn regerror
564function
5b2abdfb
A
565places the NUL-terminated message into the buffer pointed to by
566.Fa errbuf ,
567limiting the length (including the NUL) to at most
568.Fa errbuf_size
569bytes.
1f2f436a 570If the whole message will not fit,
5b2abdfb
A
571as much of it as will fit before the terminating NUL is supplied.
572In any case,
573the returned value is the size of buffer needed to hold the whole
574message (including terminating NUL).
575If
576.Fa errbuf_size
577is 0,
578.Fa errbuf
579is ignored but the return value is still correct.
580.Pp
581If the
582.Fa errcode
583given to
584.Fn regerror
585is first ORed with
586.Dv REG_ITOA ,
587the
588.Dq message
589that results is the printable name of the error code,
590e.g.\&
591.Dq Dv REG_NOMATCH ,
592rather than an explanation thereof.
593If
594.Fa errcode
595is
596.Dv REG_ATOI ,
597then
598.Fa preg
599shall be
600.No non\- Ns Dv NULL
601and the
602.Va re_endp
603member of the structure it points to
604must point to the printable name of an error code;
605in this case, the result in
606.Fa errbuf
607is the decimal digits of
608the numeric value of the error code
609(0 if the name is not recognized).
610.Dv REG_ITOA
611and
612.Dv REG_ATOI
613are intended primarily as debugging facilities;
614they are extensions,
615compatible with but not specified by
616.St -p1003.2 ,
617and should be used with
618caution in software intended to be portable to other systems.
619Be warned also that they are considered experimental and changes are possible.
620.Pp
9385eb3d
A
621The
622.Fn regfree
623function
5b2abdfb
A
624frees any dynamically-allocated storage associated with the compiled RE
625pointed to by
626.Fa preg .
627The remaining
628.Ft regex_t
629is no longer a valid compiled RE
630and the effect of supplying it to
631.Fn regexec
632or
633.Fn regerror
634is undefined.
635.Pp
636None of these functions references global variables except for tables
637of constants;
638all are safe for use from multiple threads if the arguments are safe.
ad3c9f2a
A
639.Sh EXTENDED APIS
640These extended APIs are available in Mac OS X 10.8 and beyond, when the
641deployment target is 10.8 or later.
642It should also be noted that any of the
643.Fn regcomp
644variants may be used to initialize a
645.Ft regex_t
646structure, that can then be passed to any of the
647.Fn regexec
648variants.
649So it is quite legal to compile a wide character RE and use it to match a
650multibyte character string, or vice versa.
651.Pp
652The
653.Fn regncomp
654routine compiles regular expressions like
655.Fn regcomp ,
656but the length of the regular expression string is specified, allowing a string
657that is not NUL terminated and/or contains NUL characters.
658This is a modern replacement for using
659.Fn regcomp
660with the
661.Dv REG_PEND
662option.
663.Pp
664Similarly, the
665.Fn regnexec
666routine is like
667.Fn regexec ,
668but the length of the string to match is specified, allowing a string
669that is not NUL terminated and/or contains NUL characters.
670.Pp
671The
672.Fn regwcomp
673and
674.Fn regwexec
675variants take a wide-character
676.Vt ( wchar_t )
677string for the regular expression and string to match.
678And
679.Fn regwncomp
680and
681.Fn regwnexec
682are variants that allow specifying the wide character string length, and
683so allows wide character strings that are not NUL terminated and/or
684contains NUL characters.
685.Sh INTERACTION WITH THE LOCALE
686When
687.Fn regcomp
688or one of its variants is run, the regular expression is compiled into an
689internal form, which may include specific information about the locale currently
690in effect, such as equivalence classes or multi-character collation symbols.
691So a reference to the current locale is also stored with the internal form,
692so that when
693.Fn regexec
694is run, it can use the same locale (even if the locale is changed in-between
695the calls to
696.Fn regcomp
697and
698.Fn regexec ) .
699.Pp
700To provide more direct control over which locale is used,
701routines with
702.Dq Nm _l
703appended to their names are provided that work just like the variants
704without the
705.Dq Nm _l ,
706except that a locale (via a
707.Vt locale_t
708variable type) is specified directly.
709Note that only variants of
710.Fn regcomp
711have
712.Dq Nm _l
713variants, since the
714.Fn regexec
715variants just use the reference to the locale stored in the internal form.
5b2abdfb 716.Sh IMPLEMENTATION CHOICES
ad3c9f2a
A
717The
718.Nm regex
719implementation in Mac OS X 10.8 and later is based on a heavily modified subset
720of TRE (http://laurikari.net/tre/).
721This provides improved performance, better conformance and additional features.
722However, both API and binary compatibility have been maintained with previous
723releases, so binaries
724built on previous releases should work on 10.8 and later, and binaries built on
72510.8 and later should be able to run on previous releases (as long as none of
726the new variants or new features are used.
727.Pp
5b2abdfb
A
728There are a number of decisions that
729.St -p1003.2
730leaves up to the implementor,
731either by explicitly saying
732.Dq undefined
733or by virtue of them being
734forbidden by the RE grammar.
735This implementation treats them as follows.
736.Pp
737See
738.Xr re_format 7
739for a discussion of the definition of case-independent matching.
740.Pp
741There is no particular limit on the length of REs,
742except insofar as memory is limited.
743Memory usage is approximately linear in RE size, and largely insensitive
744to RE complexity, except for bounded repetitions.
745See
746.Sx BUGS
747for one short RE using them
748that will run almost any system out of memory.
749.Pp
750A backslashed character other than one specifically given a magic meaning
751by
752.St -p1003.2
753(such magic meanings occur only in obsolete
754.Bq Dq basic
755REs)
756is taken as an ordinary character.
757.Pp
758Any unmatched
759.Ql [\&
760is a
761.Dv REG_EBRACK
762error.
763.Pp
764Equivalence classes cannot begin or end bracket-expression ranges.
765The endpoint of one range cannot begin another.
766.Pp
767.Dv RE_DUP_MAX ,
768the limit on repetition counts in bounded repetitions, is 255.
769.Pp
770A repetition operator
771.Ql ( ?\& ,
772.Ql *\& ,
773.Ql +\& ,
774or bounds)
775cannot follow another
ad3c9f2a
A
776repetition operator, except for the use of
777.Ql ?\&
778for minimal repetition (for enhanced extended REs; see
779.Xr re_format 7
780for details).
5b2abdfb
A
781A repetition operator cannot begin an expression or subexpression
782or follow
783.Ql ^\&
784or
785.Ql |\& .
786.Pp
787.Ql |\&
788cannot appear first or last in a (sub)expression or after another
789.Ql |\& ,
3d9156a7 790i.e., an operand of
5b2abdfb
A
791.Ql |\&
792cannot be an empty subexpression.
793An empty parenthesized subexpression,
794.Ql "()" ,
795is legal and matches an
796empty (sub)string.
797An empty string is not a legal RE.
798.Pp
799A
800.Ql {\&
801followed by a digit is considered the beginning of bounds for a
802bounded repetition, which must then follow the syntax for bounds.
803A
804.Ql {\&
805.Em not
806followed by a digit is considered an ordinary character.
807.Pp
808.Ql ^\&
809and
810.Ql $\&
811beginning and ending subexpressions in obsolete
812.Pq Dq basic
813REs are anchors, not ordinary characters.
5b2abdfb
A
814.Sh DIAGNOSTICS
815Non-zero error codes from
816.Fn regcomp
817and
818.Fn regexec
819include the following:
820.Pp
821.Bl -tag -width REG_ECOLLATE -compact
822.It Dv REG_NOMATCH
9385eb3d 823The
5b2abdfb 824.Fn regexec
9385eb3d 825function
5b2abdfb
A
826failed to match
827.It Dv REG_BADPAT
828invalid regular expression
829.It Dv REG_ECOLLATE
830invalid collating element
831.It Dv REG_ECTYPE
832invalid character class
833.It Dv REG_EESCAPE
834.Ql \e
835applied to unescapable character
836.It Dv REG_ESUBREG
837invalid backreference number
838.It Dv REG_EBRACK
839brackets
840.Ql "[ ]"
841not balanced
842.It Dv REG_EPAREN
843parentheses
844.Ql "( )"
845not balanced
846.It Dv REG_EBRACE
847braces
848.Ql "{ }"
849not balanced
850.It Dv REG_BADBR
851invalid repetition count(s) in
852.Ql "{ }"
853.It Dv REG_ERANGE
854invalid character range in
855.Ql "[ ]"
856.It Dv REG_ESPACE
857ran out of memory
858.It Dv REG_BADRPT
859.Ql ?\& ,
860.Ql *\& ,
861or
862.Ql +\&
863operand invalid
864.It Dv REG_EMPTY
865empty (sub)expression
866.It Dv REG_ASSERT
1f2f436a 867cannot happen - you found a bug
5b2abdfb 868.It Dv REG_INVARG
3d9156a7
A
869invalid argument, e.g.\& negative-length string
870.It Dv REG_ILLSEQ
871illegal byte sequence (bad multibyte character)
5b2abdfb 872.El
1f2f436a
A
873.Sh SEE ALSO
874.Xr grep 1 ,
875.Xr re_format 7
876.Pp
877.St -p1003.2 ,
878sections 2.8 (Regular Expression Notation)
879and
880B.5 (C Binding for Regular Expression Matching).
5b2abdfb 881.Sh HISTORY
ad3c9f2a
A
882The
883.Nm regex
884implementation is based on a heavily modified subset of TRE
885(http://laurikari.net/tre/), originally written by Ville Laurikari.
886Previous releases used an implementation originally written by
887.An Henry Spencer ,
888and altered for inclusion in the
5b2abdfb
A
889.Bx 4.4
890distribution.
891.Sh BUGS
ad3c9f2a
A
892The beginning-of-line and end-of-line anchors (
893.Dq ^\&
894and
895.Dq $\& )
896are currently implemented so that repetitions can not be applied to them.
897The standards are unclear about whether this is legal, but other
898.Nm regex
899packages do support this case.
900It is best to avoid this non-portable (and not really very useful) case.
5b2abdfb
A
901.Pp
902The back-reference code is subtle and doubts linger about its correctness
903in complex cases.
904.Pp
9385eb3d
A
905The
906.Fn regexec
ad3c9f2a
A
907variants use one of two internal matching engines.
908The normal one is linear worst-case time in the length of the text being
909searched, and quadratic worst-case time in the length of the used regular
910expression.
911When back-references are used, a slower, backtracking engine is used.
912While all backtracking matching engines suffer from extreme slowness for certain
913pathological cases, the normal engines doesn't suffer from these cases.
914It is advised to avoid back-references whenever possible.
5b2abdfb 915.Pp
9385eb3d
A
916The
917.Fn regcomp
ad3c9f2a 918variants
5b2abdfb
A
919implements bounded repetitions by macro expansion,
920which is costly in time and space if counts are large
921or bounded repetitions are nested.
922An RE like, say,
923.Ql "((((a{1,100}){1,100}){1,100}){1,100}){1,100}"
924will (eventually) run almost any existing machine out of swap space.
925.Pp
5b2abdfb
A
926Due to a mistake in
927.St -p1003.2 ,
928things like
929.Ql "a)b"
930are legal REs because
931.Ql )\&
932is
933a special character only in the presence of a previous unmatched
934.Ql (\& .
1f2f436a 935This cannot be fixed until the spec is fixed.
5b2abdfb
A
936.Pp
937The standard's definition of back references is vague.
938For example, does
939.Ql "a\e(\e(b\e)*\e2\e)*d"
940match
941.Ql "abbbd" ?
942Until the standard is clarified,
943behavior in such cases should not be relied on.