From a2acc3b918df953836f263e48937c8e5a06f13b6 Mon Sep 17 00:00:00 2001 From: Ryan Norton Date: Mon, 29 Jul 2002 10:56:07 +0000 Subject: [PATCH 1/1] Tcl regex lib git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@16311 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775 --- src/regex/COPYRIGHT | 176 ++++++- src/regex/Makefile | 144 +----- src/regex/cclass.h | 20 - src/regex/cname.h | 102 ----- src/regex/debug.c | 242 ---------- src/regex/engine.c | 1019 ----------------------------------------- src/regex/re_main.c | 510 --------------------- src/regex/regc_cvec.c | 189 ++++++++ src/regex/regex.7 | 235 ---------- src/regex/regex2.h | 134 ------ src/regex/split.c | 316 ------------- src/regex/utils.h | 22 - 12 files changed, 371 insertions(+), 2738 deletions(-) delete mode 100644 src/regex/cclass.h delete mode 100644 src/regex/cname.h delete mode 100644 src/regex/debug.c delete mode 100644 src/regex/engine.c delete mode 100644 src/regex/re_main.c create mode 100644 src/regex/regc_cvec.c delete mode 100644 src/regex/regex.7 delete mode 100644 src/regex/regex2.h delete mode 100644 src/regex/split.c delete mode 100644 src/regex/utils.h diff --git a/src/regex/COPYRIGHT b/src/regex/COPYRIGHT index 30c1f7a488..65aaadd6cf 100644 --- a/src/regex/COPYRIGHT +++ b/src/regex/COPYRIGHT @@ -1,20 +1,166 @@ -Copyright 1992, 1993, 1994, 1997 Henry Spencer. All rights reserved. -This software is not subject to any license of the American Telephone -and Telegraph Company or of the Regents of the University of California. +This regular expression package was originally developed by Henry Spencer. +It bears the following copyright notice: -Permission is granted to anyone to use this software for any purpose on -any computer system, and to alter it and redistribute it, subject -to the following restrictions: +********************************************************************** -1. The author is not responsible for the consequences of use of this - software, no matter how awful, even if they arise from flaws in it. +Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. Since few users ever read sources, - credits must appear in the documentation. +Development of this software was funded, in part, by Cray Research Inc., +UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics +Corporation, none of whom are responsible for the results. The author +thanks all of them. -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. Since few users - ever read sources, credits must appear in the documentation. +Redistribution and use in source and binary forms -- with or without +modification -- are permitted for any purpose, provided that +redistributions in source form retain this entire copyright notice and +indicate the origin and nature of any modifications. + +I'd appreciate being given credit for this package in the documentation +of software which uses it, but that is not a requirement. + +THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL +HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +********************************************************************** + +PostgreSQL adopted the code out of Tcl 8.4.1. Portions of regc_locale.c +and re_syntax.n were developed by Tcl developers other than Henry; these +files bear the Tcl copyright and license notice: + +********************************************************************** + +This software is copyrighted by the Regents of the University of +California, Sun Microsystems, Inc., Scriptics Corporation, ActiveState +Corporation and other parties. The following terms apply to all files +associated with the software unless explicitly disclaimed in +individual files. + +The authors hereby grant permission to use, copy, modify, distribute, +and license this software and its documentation for any purpose, provided +that existing copyright notices are retained in all copies and that this +notice is included verbatim in any distributions. No written agreement, +license, or royalty fee is required for any of the authorized uses. +Modifications to this software may be copyrighted by their authors +and need not follow the licensing terms described here, provided that +the new terms are clearly indicated on the first page of each file where +they apply. + +IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY +FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES +ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY +DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE +IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE +NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR +MODIFICATIONS. + +GOVERNMENT USE: If you are acquiring this software on behalf of the +U.S. government, the Government shall have only "Restricted Rights" +in the software and related documentation as defined in the Federal +Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you +are acquiring the software on behalf of the Department of Defense, the +software shall be classified as "Commercial Computer Software" and the +Government shall have only "Restricted Rights" as defined in Clause +252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the +authors grant the U.S. Government and others acting in its behalf +permission to use and distribute the software in accordance with the +terms specified in this license. + +********************************************************************** + +Subsequent modifications to the code by the PostgreSQL project follow +the same license terms as the rest of PostgreSQL. +(License follows) +**************************************************************************** +PostgreSQL Database Management System +(formerly known as Postgres, then as Postgres95) + +Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group + +Portions Copyright (c) 1994, The Regents of the University of California + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose, without fee, and without a written agreement +is hereby granted, provided that the above copyright notice and this +paragraph and the following two paragraphs appear in all copies. + +IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR +DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING +LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS +DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY +AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS +ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO +PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. +**************************************************************************** +And if that's not enough, changes made from wxWindows are put under the +wxWindows license: +**************************************************************************** + wxWindows Library Licence, Version 3 + ==================================== + + Copyright (C) 1998 Julian Smart, Robert Roebling [, ...] + + Everyone is permitted to copy and distribute verbatim copies + of this licence document, but changing it is not allowed. + + WXWINDOWS LIBRARY LICENCE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + This library is free software; you can redistribute it and/or modify it + under the terms of the GNU Library General Public Licence as published by + the Free Software Foundation; either version 2 of the Licence, or (at + your option) any later version. + + This library is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library + General Public Licence for more details. + + You should have received a copy of the GNU Library General Public Licence + along with this software, usually in a file named COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, + Boston, MA 02111-1307 USA. + + EXCEPTION NOTICE + + 1. As a special exception, the copyright holders of this library give + permission for additional uses of the text contained in this release of + the library as licenced under the wxWindows Library Licence, applying + either version 3 of the Licence, or (at your option) any later version of + the Licence as published by the copyright holders of version 3 of the + Licence document. + + 2. The exception is that you may use, copy, link, modify and distribute + under the user's own terms, binary object code versions of works based + on the Library. + + 3. If you copy code from files distributed under the terms of the GNU + General Public Licence or the GNU Library General Public Licence into a + copy of this library, as this licence permits, the exception does not + apply to the code that you add in this way. To avoid misleading anyone as + to the status of such modified files, you must delete this exception + notice from such code and/or adjust the licensing conditions notice + accordingly. + + 4. If you write modifications of your own for this library, it is your + choice whether to permit this exception to apply to your modifications. + If you do not wish that, you must delete the exception notice from such + code and/or adjust the licensing conditions notice accordingly. +**************************************************************************** -4. This notice may not be removed or altered. diff --git a/src/regex/Makefile b/src/regex/Makefile index ce20561fa9..cb76d37ea7 100644 --- a/src/regex/Makefile +++ b/src/regex/Makefile @@ -1,130 +1,28 @@ -# You probably want to take -DREDEBUG out of CFLAGS, and put something like -# -O in, *after* testing (-DREDEBUG strengthens testing by enabling a lot of -# internal assertion checking and some debugging facilities). -# Put -Dconst= in for a pre-ANSI compiler. -# Do not take -DPOSIX_MISTAKE out. -# REGCFLAGS isn't important to you (it's for my use in some special contexts). -CFLAGS=-I. -DPOSIX_MISTAKE -DREDEBUG $(REGCFLAGS) +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for backend/regex +# +# IDENTIFICATION +# $Header: /projects/cvsroot/pgsql-server/src/backend/regex/Makefile,v 1.20 2003/02/05 17:41:32 tgl Exp $ +# +#------------------------------------------------------------------------- -# If you have a pre-ANSI compiler, put -o into MKHFLAGS. If you want -# the Berkeley __P macro, put -b in. -MKHFLAGS= +subdir = src/backend/regex +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global -# Flags for linking but not compiling, if any. -LDFLAGS= +OBJS = regcomp.o regerror.o regexec.o regfree.o -# Extra libraries for linking, if any. -LIBS= +all: SUBSYS.o -# Internal stuff, should not need changing. -OBJPRODN=regcomp.o regexec.o regerror.o regfree.o -OBJS=$(OBJPRODN) split.o debug.o re_main.o -H=cclass.h cname.h regex2.h utils.h -REGSRC=regcomp.c regerror.c regexec.c regfree.c -ALLSRC=$(REGSRC) engine.c debug.c re_main.c split.c +SUBSYS.o: $(OBJS) + $(LD) $(LDREL) $(LDOUT) SUBSYS.o $(OBJS) -# Stuff that matters only if you're trying to lint the package. -LINTFLAGS=-I. -Dstatic= -Dconst= -DREDEBUG -LINTC=regcomp.c regexec.c regerror.c regfree.c debug.c re_main.c -JUNKLINT=possible pointer alignment|null effect +# mark inclusion dependencies between .c files explicitly +regcomp.o: regcomp.c regc_lex.c regc_color.c regc_nfa.c regc_cvec.c regc_locale.c -# arrangements to build forward-reference header files -.SUFFIXES: .ih .h -.c.ih: - sh ./mkh $(MKHFLAGS) -p $< >$@ +regexec.o: regexec.c rege_dfa.c -default: r - -lib: purge $(OBJPRODN) - rm -f libregex.a - ar crv libregex.a $(OBJPRODN) - -purge: - rm -f *.o - -# stuff to build regex.h -REGEXH=regex.h -REGEXHSRC=regex2.h $(REGSRC) -$(REGEXH): $(REGEXHSRC) mkh - sh ./mkh $(MKHFLAGS) -i _REGEX_H_ $(REGEXHSRC) >regex.tmp - cmp -s regex.tmp regex.h 2>/dev/null || cp regex.tmp regex.h - rm -f regex.tmp - -# dependencies -$(OBJPRODN) debug.o: utils.h regex.h regex2.h -regcomp.o: cclass.h cname.h regcomp.ih -regexec.o: engine.c engine.ih -regerror.o: regerror.ih -debug.o: debug.ih -re_main.o: re_main.ih - -# tester -re: $(OBJS) - $(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS) -o $@ - -# regression test -r: re tests - ./re &1 | egrep -v '$(JUNKLINT)' | tee lint - -fullprint: - ti README WHATSNEW notes todo | list - ti *.h | list - list *.c - list regex.3 regex.7 - -print: - ti README WHATSNEW notes todo | list - ti *.h | list - list reg*.c engine.c - - -mf.tmp: Makefile - sed '/^REGEXH=/s/=.*/=regex.h/' Makefile | sed '/#DEL$$/d' >$@ - -DTRH=cclass.h cname.h regex2.h utils.h -PRE=COPYRIGHT README WHATSNEW -POST=mkh regex.3 regex.7 tests $(DTRH) $(ALLSRC) fake/*.[ch] -FILES=$(PRE) Makefile $(POST) -DTR=$(PRE) Makefile=mf.tmp $(POST) -dtr: $(FILES) mf.tmp - makedtr $(DTR) >$@ - rm mf.tmp - -cio: $(FILES) - cio $(FILES) - -rdf: $(FILES) - rcsdiff -c $(FILES) 2>&1 | p - -# various forms of cleanup -tidy: - rm -f junk* core core.* *.core dtr *.tmp lint - -clean: tidy - rm -f *.o *.s *.ih re libregex.a - -# don't do this one unless you know what you're doing -spotless: clean - rm -f mkh regex.h +clean: + rm -f SUBSYS.o $(OBJS) diff --git a/src/regex/cclass.h b/src/regex/cclass.h deleted file mode 100644 index 2b50a76197..0000000000 --- a/src/regex/cclass.h +++ /dev/null @@ -1,20 +0,0 @@ -/* character-class table */ -static struct cclass { - char *name; - char *chars; - char *multis; -} cclasses[] = { - { "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", "" }, - { "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", "" }, - { "blank", " \t", "" }, - { "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\25\26\27\30\31\32\33\34\35\36\37\177", "" }, - { "digit", "0123456789", "" }, - { "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", "" }, - { "lower", "abcdefghijklmnopqrstuvwxyz", "" }, - { "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", "" }, - { "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", "" }, - { "space", "\t\n\v\f\r ", "" }, - { "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "" }, - { "xdigit", "0123456789ABCDEFabcdef", "" }, - { NULL, 0, "" } -}; diff --git a/src/regex/cname.h b/src/regex/cname.h deleted file mode 100644 index c1a6dd5656..0000000000 --- a/src/regex/cname.h +++ /dev/null @@ -1,102 +0,0 @@ -/* character-name table */ -static struct cname { - char *name; - char code; -} cnames[] = { - { "NUL", '\0' }, - { "SOH", '\001' }, - { "STX", '\002' }, - { "ETX", '\003' }, - { "EOT", '\004' }, - { "ENQ", '\005' }, - { "ACK", '\006' }, - { "BEL", '\007' }, - { "alert", '\007' }, - { "BS", '\010' }, - { "backspace", '\b' }, - { "HT", '\011' }, - { "tab", '\t' }, - { "LF", '\012' }, - { "newline", '\n' }, - { "VT", '\013' }, - { "vertical-tab", '\v' }, - { "FF", '\014' }, - { "form-feed", '\f' }, - { "CR", '\015' }, - { "carriage-return", '\r' }, - { "SO", '\016' }, - { "SI", '\017' }, - { "DLE", '\020' }, - { "DC1", '\021' }, - { "DC2", '\022' }, - { "DC3", '\023' }, - { "DC4", '\024' }, - { "NAK", '\025' }, - { "SYN", '\026' }, - { "ETB", '\027' }, - { "CAN", '\030' }, - { "EM", '\031' }, - { "SUB", '\032' }, - { "ESC", '\033' }, - { "IS4", '\034' }, - { "FS", '\034' }, - { "IS3", '\035' }, - { "GS", '\035' }, - { "IS2", '\036' }, - { "RS", '\036' }, - { "IS1", '\037' }, - { "US", '\037' }, - { "space", ' ' }, - { "exclamation-mark", '!' }, - { "quotation-mark", '"' }, - { "number-sign", '#' }, - { "dollar-sign", '$' }, - { "percent-sign", '%' }, - { "ampersand", '&' }, - { "apostrophe", '\'' }, - { "left-parenthesis", '(' }, - { "right-parenthesis", ')' }, - { "asterisk", '*' }, - { "plus-sign", '+' }, - { "comma", ',' }, - { "hyphen", '-' }, - { "hyphen-minus", '-' }, - { "period", '.' }, - { "full-stop", '.' }, - { "slash", '/' }, - { "solidus", '/' }, - { "zero", '0' }, - { "one", '1' }, - { "two", '2' }, - { "three", '3' }, - { "four", '4' }, - { "five", '5' }, - { "six", '6' }, - { "seven", '7' }, - { "eight", '8' }, - { "nine", '9' }, - { "colon", ':' }, - { "semicolon", ';' }, - { "less-than-sign", '<' }, - { "equals-sign", '=' }, - { "greater-than-sign", '>' }, - { "question-mark", '?' }, - { "commercial-at", '@' }, - { "left-square-bracket", '[' }, - { "backslash", '\\' }, - { "reverse-solidus", '\\' }, - { "right-square-bracket", ']' }, - { "circumflex", '^' }, - { "circumflex-accent", '^' }, - { "underscore", '_' }, - { "low-line", '_' }, - { "grave-accent", '`' }, - { "left-brace", '{' }, - { "left-curly-bracket", '{' }, - { "vertical-line", '|' }, - { "right-brace", '}' }, - { "right-curly-bracket", '}' }, - { "tilde", '~' }, - { "DEL", '\177' }, - { NULL, 0 }, -}; diff --git a/src/regex/debug.c b/src/regex/debug.c deleted file mode 100644 index bf40bbb3bd..0000000000 --- a/src/regex/debug.c +++ /dev/null @@ -1,242 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include "regex.h" - -#include "utils.h" -#include "regex2.h" -#include "debug.ih" - -/* - - regprint - print a regexp for debugging - == void regprint(regex_t *r, FILE *d); - */ -void -regprint(r, d) -regex_t *r; -FILE *d; -{ - register struct re_guts *g = r->re_g; - register int i; - register int c; - register int last; - int nincat[NC]; - - fprintf(d, "%ld states, %d categories", (long)g->nstates, - g->ncategories); - fprintf(d, ", first %ld last %ld", (long)g->firststate, - (long)g->laststate); - if (g->iflags&USEBOL) - fprintf(d, ", USEBOL"); - if (g->iflags&USEEOL) - fprintf(d, ", USEEOL"); - if (g->iflags&BAD) - fprintf(d, ", BAD"); - if (g->nsub > 0) - fprintf(d, ", nsub=%ld", (long)g->nsub); - if (g->must != NULL) - fprintf(d, ", must(%ld) `%*s'", (long)g->mlen, (int)g->mlen, - g->must); - if (g->backrefs) - fprintf(d, ", backrefs"); - if (g->nplus > 0) - fprintf(d, ", nplus %ld", (long)g->nplus); - fprintf(d, "\n"); - s_print(g, d); - for (i = 0; i < g->ncategories; i++) { - nincat[i] = 0; - for (c = CHAR_MIN; c <= CHAR_MAX; c++) - if (g->categories[c] == i) - nincat[i]++; - } - fprintf(d, "cc0#%d", nincat[0]); - for (i = 1; i < g->ncategories; i++) - if (nincat[i] == 1) { - for (c = CHAR_MIN; c <= CHAR_MAX; c++) - if (g->categories[c] == i) - break; - fprintf(d, ", %d=%s", i, regchar(c)); - } - fprintf(d, "\n"); - for (i = 1; i < g->ncategories; i++) - if (nincat[i] != 1) { - fprintf(d, "cc%d\t", i); - last = -1; - for (c = CHAR_MIN; c <= CHAR_MAX+1; c++) /* +1 does flush */ - if (c <= CHAR_MAX && g->categories[c] == i) { - if (last < 0) { - fprintf(d, "%s", regchar(c)); - last = c; - } - } else { - if (last >= 0) { - if (last != c-1) - fprintf(d, "-%s", - regchar(c-1)); - last = -1; - } - } - fprintf(d, "\n"); - } -} - -/* - - s_print - print the strip for debugging - == static void s_print(register struct re_guts *g, FILE *d); - */ -static void -s_print(g, d) -register struct re_guts *g; -FILE *d; -{ - register sop *s; - register cset *cs; - register int i; - register int done = 0; - register sop opnd; - register int col = 0; - register int last; - register sopno offset = 2; -# define GAP() { if (offset % 5 == 0) { \ - if (col > 40) { \ - fprintf(d, "\n\t"); \ - col = 0; \ - } else { \ - fprintf(d, " "); \ - col++; \ - } \ - } else \ - col++; \ - offset++; \ - } - - if (OP(g->strip[0]) != OEND) - fprintf(d, "missing initial OEND!\n"); - for (s = &g->strip[1]; !done; s++) { - opnd = OPND(*s); - switch (OP(*s)) { - case OEND: - fprintf(d, "\n"); - done = 1; - break; - case OCHAR: - if (strchr("\\|()^$.[+*?{}!<> ", (char)opnd) != NULL) - fprintf(d, "\\%c", (char)opnd); - else - fprintf(d, "%s", regchar((char)opnd)); - break; - case OBOL: - fprintf(d, "^"); - break; - case OEOL: - fprintf(d, "$"); - break; - case OBOW: - fprintf(d, "\\{"); - break; - case OEOW: - fprintf(d, "\\}"); - break; - case OANY: - fprintf(d, "."); - break; - case OANYOF: - fprintf(d, "[(%ld)", (long)opnd); - cs = &g->sets[opnd]; - last = -1; - for (i = 0; i < g->csetsize+1; i++) /* +1 flushes */ - if (CHIN(cs, i) && i < g->csetsize) { - if (last < 0) { - fprintf(d, "%s", regchar(i)); - last = i; - } - } else { - if (last >= 0) { - if (last != i-1) - fprintf(d, "-%s", - regchar(i-1)); - last = -1; - } - } - fprintf(d, "]"); - break; - case OBACK_: - fprintf(d, "(\\<%ld>", (long)opnd); - break; - case O_BACK: - fprintf(d, "<%ld>\\)", (long)opnd); - break; - case OPLUS_: - fprintf(d, "(+"); - if (OP(*(s+opnd)) != O_PLUS) - fprintf(d, "<%ld>", (long)opnd); - break; - case O_PLUS: - if (OP(*(s-opnd)) != OPLUS_) - fprintf(d, "<%ld>", (long)opnd); - fprintf(d, "+)"); - break; - case OQUEST_: - fprintf(d, "(?"); - if (OP(*(s+opnd)) != O_QUEST) - fprintf(d, "<%ld>", (long)opnd); - break; - case O_QUEST: - if (OP(*(s-opnd)) != OQUEST_) - fprintf(d, "<%ld>", (long)opnd); - fprintf(d, "?)"); - break; - case OLPAREN: - fprintf(d, "((<%ld>", (long)opnd); - break; - case ORPAREN: - fprintf(d, "<%ld>))", (long)opnd); - break; - case OCH_: - fprintf(d, "<"); - if (OP(*(s+opnd)) != OOR2) - fprintf(d, "<%ld>", (long)opnd); - break; - case OOR1: - if (OP(*(s-opnd)) != OOR1 && OP(*(s-opnd)) != OCH_) - fprintf(d, "<%ld>", (long)opnd); - fprintf(d, "|"); - break; - case OOR2: - fprintf(d, "|"); - if (OP(*(s+opnd)) != OOR2 && OP(*(s+opnd)) != O_CH) - fprintf(d, "<%ld>", (long)opnd); - break; - case O_CH: - if (OP(*(s-opnd)) != OOR1) - fprintf(d, "<%ld>", (long)opnd); - fprintf(d, ">"); - break; - default: - fprintf(d, "!%d(%d)!", OP(*s), opnd); - break; - } - if (!done) - GAP(); - } -} - -/* - - regchar - make a character printable - == static char *regchar(int ch); - */ -static char * /* -> representation */ -regchar(ch) -int ch; -{ - static char buf[10]; - - if (isprint(ch) || ch == ' ') - sprintf(buf, "%c", ch); - else - sprintf(buf, "\\%o", ch); - return(buf); -} diff --git a/src/regex/engine.c b/src/regex/engine.c deleted file mode 100644 index 0b88dcf1ed..0000000000 --- a/src/regex/engine.c +++ /dev/null @@ -1,1019 +0,0 @@ -/* - * The matching engine and friends. This file is #included by regexec.c - * after suitable #defines of a variety of macros used herein, so that - * different state representations can be used without duplicating masses - * of code. - */ - -#ifdef SNAMES -#define matcher smatcher -#define fast sfast -#define slow sslow -#define dissect sdissect -#define backref sbackref -#define step sstep -#define print sprint -#define at sat -#define match smat -#endif -#ifdef LNAMES -#define matcher lmatcher -#define fast lfast -#define slow lslow -#define dissect ldissect -#define backref lbackref -#define step lstep -#define print lprint -#define at lat -#define match lmat -#endif - -/* another structure passed up and down to avoid zillions of parameters */ -struct match { - struct re_guts *g; - int eflags; - regmatch_t *pmatch; /* [nsub+1] (0 element unused) */ - char *offp; /* offsets work from here */ - char *beginp; /* start of string -- virtual NUL precedes */ - char *endp; /* end of string -- virtual NUL here */ - char *coldp; /* can be no match starting before here */ - char **lastpos; /* [nplus+1] */ - STATEVARS; - states st; /* current states */ - states fresh; /* states for a fresh start */ - states tmp; /* temporary */ - states empty; /* empty set of states */ -}; - -#include "engine.ih" - -#ifdef REDEBUG -#define SP(t, s, c) print(m, t, s, c, stdout) -#define AT(t, p1, p2, s1, s2) at(m, t, p1, p2, s1, s2) -#define NOTE(str) { if (m->eflags®_TRACE) printf("=%s\n", (str)); } -#else -#define SP(t, s, c) /* nothing */ -#define AT(t, p1, p2, s1, s2) /* nothing */ -#define NOTE(s) /* nothing */ -#endif - -/* - - matcher - the actual matching engine - == static int matcher(register struct re_guts *g, char *string, \ - == size_t nmatch, regmatch_t pmatch[], int eflags); - */ -static int /* 0 success, REG_NOMATCH failure */ -matcher(g, string, nmatch, pmatch, eflags) -register struct re_guts *g; -char *string; -size_t nmatch; -regmatch_t pmatch[]; -int eflags; -{ - register char *endp; - register size_t i; - struct match mv; - register struct match *m = &mv; - register char *dp; - const register sopno gf = g->firststate+1; /* +1 for OEND */ - const register sopno gl = g->laststate; - char *start; - char *stop; - - /* simplify the situation where possible */ - if (g->cflags®_NOSUB) - nmatch = 0; - if (eflags®_STARTEND) { - start = string + pmatch[0].rm_so; - stop = string + pmatch[0].rm_eo; - } else { - start = string; - stop = start + strlen(start); - } - if (stop < start) - return(REG_INVARG); - - /* prescreening; this does wonders for this rather slow code */ - if (g->must != NULL) { - for (dp = start; dp < stop; dp++) - if (*dp == g->must[0] && stop - dp >= g->mlen && - memcmp(dp, g->must, (size_t)g->mlen) == 0) - break; - if (dp == stop) /* we didn't find g->must */ - return(REG_NOMATCH); - } - - /* match struct setup */ - m->g = g; - m->eflags = eflags; - m->pmatch = NULL; - m->lastpos = NULL; - m->offp = string; - m->beginp = start; - m->endp = stop; - STATESETUP(m, 4); - SETUP(m->st); - SETUP(m->fresh); - SETUP(m->tmp); - SETUP(m->empty); - CLEAR(m->empty); - - /* this loop does only one repetition except for backrefs */ - for (;;) { - endp = fast(m, start, stop, gf, gl); - if (endp == NULL) { /* a miss */ - STATETEARDOWN(m); - return(REG_NOMATCH); - } - if (nmatch == 0 && !g->backrefs) - break; /* no further info needed */ - - /* where? */ - assert(m->coldp != NULL); - for (;;) { - NOTE("finding start"); - endp = slow(m, m->coldp, stop, gf, gl); - if (endp != NULL) - break; - assert(m->coldp < m->endp); - m->coldp++; - } - if (nmatch == 1 && !g->backrefs) - break; /* no further info needed */ - - /* oh my, he wants the subexpressions... */ - if (m->pmatch == NULL) - m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) * - sizeof(regmatch_t)); - if (m->pmatch == NULL) { - STATETEARDOWN(m); - return(REG_ESPACE); - } - for (i = 1; i <= m->g->nsub; i++) - m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1; - if (!g->backrefs && !(m->eflags®_BACKR)) { - NOTE("dissecting"); - dp = dissect(m, m->coldp, endp, gf, gl); - } else { - if (g->nplus > 0 && m->lastpos == NULL) - m->lastpos = (char **)malloc((g->nplus+1) * - sizeof(char *)); - if (g->nplus > 0 && m->lastpos == NULL) { - free(m->pmatch); - STATETEARDOWN(m); - return(REG_ESPACE); - } - NOTE("backref dissect"); - dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); - } - if (dp != NULL) - break; - - /* uh-oh... we couldn't find a subexpression-level match */ - assert(g->backrefs); /* must be back references doing it */ - assert(g->nplus == 0 || m->lastpos != NULL); - for (;;) { - if (dp != NULL || endp <= m->coldp) - break; /* defeat */ - NOTE("backoff"); - endp = slow(m, m->coldp, endp-1, gf, gl); - if (endp == NULL) - break; /* defeat */ - /* try it on a shorter possibility */ -#ifndef NDEBUG - for (i = 1; i <= m->g->nsub; i++) { - assert(m->pmatch[i].rm_so == -1); - assert(m->pmatch[i].rm_eo == -1); - } -#endif - NOTE("backoff dissect"); - dp = backref(m, m->coldp, endp, gf, gl, (sopno)0); - } - assert(dp == NULL || dp == endp); - if (dp != NULL) /* found a shorter one */ - break; - - /* despite initial appearances, there is no match here */ - NOTE("false alarm"); - start = m->coldp + 1; /* recycle starting later */ - assert(start <= stop); - } - - /* fill in the details if requested */ - if (nmatch > 0) { - pmatch[0].rm_so = m->coldp - m->offp; - pmatch[0].rm_eo = endp - m->offp; - } - if (nmatch > 1) { - assert(m->pmatch != NULL); - for (i = 1; i < nmatch; i++) - if (i <= m->g->nsub) - pmatch[i] = m->pmatch[i]; - else { - pmatch[i].rm_so = -1; - pmatch[i].rm_eo = -1; - } - } - - if (m->pmatch != NULL) - free((char *)m->pmatch); - if (m->lastpos != NULL) - free((char *)m->lastpos); - STATETEARDOWN(m); - return(0); -} - -/* - - dissect - figure out what matched what, no back references - == static char *dissect(register struct match *m, char *start, \ - == char *stop, sopno startst, sopno stopst); - */ -static char * /* == stop (success) always */ -dissect(m, start, stop, startst, stopst) -register struct match *m; -char *start; -char *stop; -sopno startst; -sopno stopst; -{ - register int i; - register sopno ss; /* start sop of current subRE */ - register sopno es; /* end sop of current subRE */ - register char *sp; /* start of string matched by it */ - register char *stp; /* string matched by it cannot pass here */ - register char *rest; /* start of rest of string */ - register char *tail; /* string unmatched by rest of RE */ - register sopno ssub; /* start sop of subsubRE */ - register sopno esub; /* end sop of subsubRE */ - register char *ssp; /* start of string matched by subsubRE */ - register char *sep; /* end of string matched by subsubRE */ - register char *oldssp; /* previous ssp */ - register char *dp; - - AT("diss", start, stop, startst, stopst); - sp = start; - for (ss = startst; ss < stopst; ss = es) { - /* identify end of subRE */ - es = ss; - switch (OP(m->g->strip[es])) { - case OPLUS_: - case OQUEST_: - es += OPND(m->g->strip[es]); - break; - case OCH_: - while (OP(m->g->strip[es]) != O_CH) - es += OPND(m->g->strip[es]); - break; - } - es++; - - /* figure out what it matched */ - switch (OP(m->g->strip[ss])) { - case OEND: - assert(nope); - break; - case OCHAR: - sp++; - break; - case OBOL: - case OEOL: - case OBOW: - case OEOW: - break; - case OANY: - case OANYOF: - sp++; - break; - case OBACK_: - case O_BACK: - assert(nope); - break; - /* cases where length of match is hard to find */ - case OQUEST_: - stp = stop; - for (;;) { - /* how long could this one be? */ - rest = slow(m, sp, stp, ss, es); - assert(rest != NULL); /* it did match */ - /* could the rest match the rest? */ - tail = slow(m, rest, stop, es, stopst); - if (tail == stop) - break; /* yes! */ - /* no -- try a shorter match for this one */ - stp = rest - 1; - assert(stp >= sp); /* it did work */ - } - ssub = ss + 1; - esub = es - 1; - /* did innards match? */ - if (slow(m, sp, rest, ssub, esub) != NULL) { - dp = dissect(m, sp, rest, ssub, esub); - assert(dp == rest); - } else /* no */ - assert(sp == rest); - sp = rest; - break; - case OPLUS_: - stp = stop; - for (;;) { - /* how long could this one be? */ - rest = slow(m, sp, stp, ss, es); - assert(rest != NULL); /* it did match */ - /* could the rest match the rest? */ - tail = slow(m, rest, stop, es, stopst); - if (tail == stop) - break; /* yes! */ - /* no -- try a shorter match for this one */ - stp = rest - 1; - assert(stp >= sp); /* it did work */ - } - ssub = ss + 1; - esub = es - 1; - ssp = sp; - oldssp = ssp; - for (;;) { /* find last match of innards */ - sep = slow(m, ssp, rest, ssub, esub); - if (sep == NULL || sep == ssp) - break; /* failed or matched null */ - oldssp = ssp; /* on to next try */ - ssp = sep; - } - if (sep == NULL) { - /* last successful match */ - sep = ssp; - ssp = oldssp; - } - assert(sep == rest); /* must exhaust substring */ - assert(slow(m, ssp, sep, ssub, esub) == rest); - dp = dissect(m, ssp, sep, ssub, esub); - assert(dp == sep); - sp = rest; - break; - case OCH_: - stp = stop; - for (;;) { - /* how long could this one be? */ - rest = slow(m, sp, stp, ss, es); - assert(rest != NULL); /* it did match */ - /* could the rest match the rest? */ - tail = slow(m, rest, stop, es, stopst); - if (tail == stop) - break; /* yes! */ - /* no -- try a shorter match for this one */ - stp = rest - 1; - assert(stp >= sp); /* it did work */ - } - ssub = ss + 1; - esub = ss + OPND(m->g->strip[ss]) - 1; - assert(OP(m->g->strip[esub]) == OOR1); - for (;;) { /* find first matching branch */ - if (slow(m, sp, rest, ssub, esub) == rest) - break; /* it matched all of it */ - /* that one missed, try next one */ - assert(OP(m->g->strip[esub]) == OOR1); - esub++; - assert(OP(m->g->strip[esub]) == OOR2); - ssub = esub + 1; - esub += OPND(m->g->strip[esub]); - if (OP(m->g->strip[esub]) == OOR2) - esub--; - else - assert(OP(m->g->strip[esub]) == O_CH); - } - dp = dissect(m, sp, rest, ssub, esub); - assert(dp == rest); - sp = rest; - break; - case O_PLUS: - case O_QUEST: - case OOR1: - case OOR2: - case O_CH: - assert(nope); - break; - case OLPAREN: - i = OPND(m->g->strip[ss]); - assert(0 < i && i <= m->g->nsub); - m->pmatch[i].rm_so = sp - m->offp; - break; - case ORPAREN: - i = OPND(m->g->strip[ss]); - assert(0 < i && i <= m->g->nsub); - m->pmatch[i].rm_eo = sp - m->offp; - break; - default: /* uh oh */ - assert(nope); - break; - } - } - - assert(sp == stop); - return(sp); -} - -/* - - backref - figure out what matched what, figuring in back references - == static char *backref(register struct match *m, char *start, \ - == char *stop, sopno startst, sopno stopst, sopno lev); - */ -static char * /* == stop (success) or NULL (failure) */ -backref(m, start, stop, startst, stopst, lev) -register struct match *m; -char *start; -char *stop; -sopno startst; -sopno stopst; -sopno lev; /* PLUS nesting level */ -{ - register int i; - register sopno ss; /* start sop of current subRE */ - register char *sp; /* start of string matched by it */ - register sopno ssub; /* start sop of subsubRE */ - register sopno esub; /* end sop of subsubRE */ - register char *ssp; /* start of string matched by subsubRE */ - register char *dp; - register size_t len; - register int hard; - register sop s; - register regoff_t offsave; - register cset *cs; - - AT("back", start, stop, startst, stopst); - sp = start; - - /* get as far as we can with easy stuff */ - hard = 0; - for (ss = startst; !hard && ss < stopst; ss++) - switch (OP(s = m->g->strip[ss])) { - case OCHAR: - if (sp == stop || *sp++ != (char)OPND(s)) - return(NULL); - break; - case OANY: - if (sp == stop) - return(NULL); - sp++; - break; - case OANYOF: - cs = &m->g->sets[OPND(s)]; - if (sp == stop || !CHIN(cs, *sp++)) - return(NULL); - break; - case OBOL: - if ( (sp == m->beginp && !(m->eflags®_NOTBOL)) || - (sp < m->endp && *(sp-1) == '\n' && - (m->g->cflags®_NEWLINE)) ) - { /* yes */ } - else - return(NULL); - break; - case OEOL: - if ( (sp == m->endp && !(m->eflags®_NOTEOL)) || - (sp < m->endp && *sp == '\n' && - (m->g->cflags®_NEWLINE)) ) - { /* yes */ } - else - return(NULL); - break; - case OBOW: - if (( (sp == m->beginp && !(m->eflags®_NOTBOL)) || - (sp < m->endp && *(sp-1) == '\n' && - (m->g->cflags®_NEWLINE)) || - (sp > m->beginp && - !ISWORD(*(sp-1))) ) && - (sp < m->endp && ISWORD(*sp)) ) - { /* yes */ } - else - return(NULL); - break; - case OEOW: - if (( (sp == m->endp && !(m->eflags®_NOTEOL)) || - (sp < m->endp && *sp == '\n' && - (m->g->cflags®_NEWLINE)) || - (sp < m->endp && !ISWORD(*sp)) ) && - (sp > m->beginp && ISWORD(*(sp-1))) ) - { /* yes */ } - else - return(NULL); - break; - case O_QUEST: - break; - case OOR1: /* matches null but needs to skip */ - ss++; - s = m->g->strip[ss]; - do { - assert(OP(s) == OOR2); - ss += OPND(s); - } while (OP(s = m->g->strip[ss]) != O_CH); - /* note that the ss++ gets us past the O_CH */ - break; - default: /* have to make a choice */ - hard = 1; - break; - } - if (!hard) { /* that was it! */ - if (sp != stop) - return(NULL); - return(sp); - } - ss--; /* adjust for the for's final increment */ - - /* the hard stuff */ - AT("hard", sp, stop, ss, stopst); - s = m->g->strip[ss]; - switch (OP(s)) { - case OBACK_: /* the vilest depths */ - i = OPND(s); - assert(0 < i && i <= m->g->nsub); - if (m->pmatch[i].rm_eo == -1) - return(NULL); - assert(m->pmatch[i].rm_so != -1); - len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so; - assert(stop - m->beginp >= len); - if (sp > stop - len) - return(NULL); /* not enough left to match */ - ssp = m->offp + m->pmatch[i].rm_so; - if (memcmp(sp, ssp, len) != 0) - return(NULL); - while (m->g->strip[ss] != SOP(O_BACK, i)) - ss++; - return(backref(m, sp+len, stop, ss+1, stopst, lev)); - break; - case OQUEST_: /* to null or not */ - dp = backref(m, sp, stop, ss+1, stopst, lev); - if (dp != NULL) - return(dp); /* not */ - return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev)); - break; - case OPLUS_: - assert(m->lastpos != NULL); - assert(lev+1 <= m->g->nplus); - m->lastpos[lev+1] = sp; - return(backref(m, sp, stop, ss+1, stopst, lev+1)); - break; - case O_PLUS: - if (sp == m->lastpos[lev]) /* last pass matched null */ - return(backref(m, sp, stop, ss+1, stopst, lev-1)); - /* try another pass */ - m->lastpos[lev] = sp; - dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev); - if (dp == NULL) - return(backref(m, sp, stop, ss+1, stopst, lev-1)); - else - return(dp); - break; - case OCH_: /* find the right one, if any */ - ssub = ss + 1; - esub = ss + OPND(s) - 1; - assert(OP(m->g->strip[esub]) == OOR1); - for (;;) { /* find first matching branch */ - dp = backref(m, sp, stop, ssub, esub, lev); - if (dp != NULL) - return(dp); - /* that one missed, try next one */ - if (OP(m->g->strip[esub]) == O_CH) - return(NULL); /* there is none */ - esub++; - assert(OP(m->g->strip[esub]) == OOR2); - ssub = esub + 1; - esub += OPND(m->g->strip[esub]); - if (OP(m->g->strip[esub]) == OOR2) - esub--; - else - assert(OP(m->g->strip[esub]) == O_CH); - } - break; - case OLPAREN: /* must undo assignment if rest fails */ - i = OPND(s); - assert(0 < i && i <= m->g->nsub); - offsave = m->pmatch[i].rm_so; - m->pmatch[i].rm_so = sp - m->offp; - dp = backref(m, sp, stop, ss+1, stopst, lev); - if (dp != NULL) - return(dp); - m->pmatch[i].rm_so = offsave; - return(NULL); - break; - case ORPAREN: /* must undo assignment if rest fails */ - i = OPND(s); - assert(0 < i && i <= m->g->nsub); - offsave = m->pmatch[i].rm_eo; - m->pmatch[i].rm_eo = sp - m->offp; - dp = backref(m, sp, stop, ss+1, stopst, lev); - if (dp != NULL) - return(dp); - m->pmatch[i].rm_eo = offsave; - return(NULL); - break; - default: /* uh oh */ - assert(nope); - break; - } - - /* "can't happen" */ - assert(nope); - /* NOTREACHED */ - return((char *)NULL); /* dummy */ -} - -/* - - fast - step through the string at top speed - == static char *fast(register struct match *m, char *start, \ - == char *stop, sopno startst, sopno stopst); - */ -static char * /* where tentative match ended, or NULL */ -fast(m, start, stop, startst, stopst) -register struct match *m; -char *start; -char *stop; -sopno startst; -sopno stopst; -{ - register states st = m->st; - register states fresh = m->fresh; - register states tmp = m->tmp; - register char *p = start; - register int c = (start == m->beginp) ? OUT : *(start-1); - register int lastc; /* previous c */ - register int flagch; - register int i; - register char *coldp; /* last p after which no match was underway */ - - CLEAR(st); - SET1(st, startst); - st = step(m->g, startst, stopst, st, NOTHING, st); - ASSIGN(fresh, st); - SP("start", st, *p); - coldp = NULL; - for (;;) { - /* next character */ - lastc = c; - c = (p == m->endp) ? OUT : *p; - if (EQ(st, fresh)) - coldp = p; - - /* is there an EOL and/or BOL between lastc and c? */ - flagch = '\0'; - i = 0; - if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || - (lastc == OUT && !(m->eflags®_NOTBOL)) ) { - flagch = BOL; - i = m->g->nbol; - } - if ( (c == '\n' && m->g->cflags®_NEWLINE) || - (c == OUT && !(m->eflags®_NOTEOL)) ) { - flagch = (flagch == BOL) ? BOLEOL : EOL; - i += m->g->neol; - } - if (i != 0) { - for (; i > 0; i--) - st = step(m->g, startst, stopst, st, flagch, st); - SP("boleol", st, c); - } - - /* how about a word boundary? */ - if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && - (c != OUT && ISWORD(c)) ) { - flagch = BOW; - } - if ( (lastc != OUT && ISWORD(lastc)) && - (flagch == EOL || (c != OUT && !ISWORD(c))) ) { - flagch = EOW; - } - if (flagch == BOW || flagch == EOW) { - st = step(m->g, startst, stopst, st, flagch, st); - SP("boweow", st, c); - } - - /* are we done? */ - if (ISSET(st, stopst) || p == stop) - break; /* NOTE BREAK OUT */ - - /* no, we must deal with this character */ - ASSIGN(tmp, st); - ASSIGN(st, fresh); - assert(c != OUT); - st = step(m->g, startst, stopst, tmp, c, st); - SP("aft", st, c); - assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); - p++; - } - - assert(coldp != NULL); - m->coldp = coldp; - if (ISSET(st, stopst)) - return(p+1); - else - return(NULL); -} - -/* - - slow - step through the string more deliberately - == static char *slow(register struct match *m, char *start, \ - == char *stop, sopno startst, sopno stopst); - */ -static char * /* where it ended */ -slow(m, start, stop, startst, stopst) -register struct match *m; -char *start; -char *stop; -sopno startst; -sopno stopst; -{ - register states st = m->st; - register states empty = m->empty; - register states tmp = m->tmp; - register char *p = start; - register int c = (start == m->beginp) ? OUT : *(start-1); - register int lastc; /* previous c */ - register int flagch; - register int i; - register char *matchp; /* last p at which a match ended */ - - AT("slow", start, stop, startst, stopst); - CLEAR(st); - SET1(st, startst); - SP("sstart", st, *p); - st = step(m->g, startst, stopst, st, NOTHING, st); - matchp = NULL; - for (;;) { - /* next character */ - lastc = c; - c = (p == m->endp) ? OUT : *p; - - /* is there an EOL and/or BOL between lastc and c? */ - flagch = '\0'; - i = 0; - if ( (lastc == '\n' && m->g->cflags®_NEWLINE) || - (lastc == OUT && !(m->eflags®_NOTBOL)) ) { - flagch = BOL; - i = m->g->nbol; - } - if ( (c == '\n' && m->g->cflags®_NEWLINE) || - (c == OUT && !(m->eflags®_NOTEOL)) ) { - flagch = (flagch == BOL) ? BOLEOL : EOL; - i += m->g->neol; - } - if (i != 0) { - for (; i > 0; i--) - st = step(m->g, startst, stopst, st, flagch, st); - SP("sboleol", st, c); - } - - /* how about a word boundary? */ - if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) && - (c != OUT && ISWORD(c)) ) { - flagch = BOW; - } - if ( (lastc != OUT && ISWORD(lastc)) && - (flagch == EOL || (c != OUT && !ISWORD(c))) ) { - flagch = EOW; - } - if (flagch == BOW || flagch == EOW) { - st = step(m->g, startst, stopst, st, flagch, st); - SP("sboweow", st, c); - } - - /* are we done? */ - if (ISSET(st, stopst)) - matchp = p; - if (EQ(st, empty) || p == stop) - break; /* NOTE BREAK OUT */ - - /* no, we must deal with this character */ - ASSIGN(tmp, st); - ASSIGN(st, empty); - assert(c != OUT); - st = step(m->g, startst, stopst, tmp, c, st); - SP("saft", st, c); - assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st)); - p++; - } - - return(matchp); -} - - -/* - - step - map set of states reachable before char to set reachable after - == static states step(register struct re_guts *g, sopno start, sopno stop, \ - == register states bef, int ch, register states aft); - == #define BOL (OUT+1) - == #define EOL (BOL+1) - == #define BOLEOL (BOL+2) - == #define NOTHING (BOL+3) - == #define BOW (BOL+4) - == #define EOW (BOL+5) - == #define CODEMAX (BOL+5) // highest code used - == #define NONCHAR(c) ((c) > CHAR_MAX) - == #define NNONCHAR (CODEMAX-CHAR_MAX) - */ -static states -step(g, start, stop, bef, ch, aft) -register struct re_guts *g; -sopno start; /* start state within strip */ -sopno stop; /* state after stop state within strip */ -register states bef; /* states reachable before */ -int ch; /* character or NONCHAR code */ -register states aft; /* states already known reachable after */ -{ - register cset *cs; - register sop s; - register sopno pc; - register onestate here; /* note, macros know this name */ - register sopno look; - register long i; - - for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) { - s = g->strip[pc]; - switch (OP(s)) { - case OEND: - assert(pc == stop-1); - break; - case OCHAR: - /* only characters can match */ - assert(!NONCHAR(ch) || ch != (char)OPND(s)); - if (ch == (char)OPND(s)) - FWD(aft, bef, 1); - break; - case OBOL: - if (ch == BOL || ch == BOLEOL) - FWD(aft, bef, 1); - break; - case OEOL: - if (ch == EOL || ch == BOLEOL) - FWD(aft, bef, 1); - break; - case OBOW: - if (ch == BOW) - FWD(aft, bef, 1); - break; - case OEOW: - if (ch == EOW) - FWD(aft, bef, 1); - break; - case OANY: - if (!NONCHAR(ch)) - FWD(aft, bef, 1); - break; - case OANYOF: - cs = &g->sets[OPND(s)]; - if (!NONCHAR(ch) && CHIN(cs, ch)) - FWD(aft, bef, 1); - break; - case OBACK_: /* ignored here */ - case O_BACK: - FWD(aft, aft, 1); - break; - case OPLUS_: /* forward, this is just an empty */ - FWD(aft, aft, 1); - break; - case O_PLUS: /* both forward and back */ - FWD(aft, aft, 1); - i = ISSETBACK(aft, OPND(s)); - BACK(aft, aft, OPND(s)); - if (!i && ISSETBACK(aft, OPND(s))) { - /* oho, must reconsider loop body */ - pc -= OPND(s) + 1; - INIT(here, pc); - } - break; - case OQUEST_: /* two branches, both forward */ - FWD(aft, aft, 1); - FWD(aft, aft, OPND(s)); - break; - case O_QUEST: /* just an empty */ - FWD(aft, aft, 1); - break; - case OLPAREN: /* not significant here */ - case ORPAREN: - FWD(aft, aft, 1); - break; - case OCH_: /* mark the first two branches */ - FWD(aft, aft, 1); - assert(OP(g->strip[pc+OPND(s)]) == OOR2); - FWD(aft, aft, OPND(s)); - break; - case OOR1: /* done a branch, find the O_CH */ - if (ISSTATEIN(aft, here)) { - for (look = 1; - OP(s = g->strip[pc+look]) != O_CH; - look += OPND(s)) - assert(OP(s) == OOR2); - FWD(aft, aft, look); - } - break; - case OOR2: /* propagate OCH_'s marking */ - FWD(aft, aft, 1); - if (OP(g->strip[pc+OPND(s)]) != O_CH) { - assert(OP(g->strip[pc+OPND(s)]) == OOR2); - FWD(aft, aft, OPND(s)); - } - break; - case O_CH: /* just empty */ - FWD(aft, aft, 1); - break; - default: /* ooooops... */ - assert(nope); - break; - } - } - - return(aft); -} - -#ifdef REDEBUG -/* - - print - print a set of states - == #ifdef REDEBUG - == static void print(struct match *m, char *caption, states st, \ - == int ch, FILE *d); - == #endif - */ -static void -print(m, caption, st, ch, d) -struct match *m; -char *caption; -states st; -int ch; -FILE *d; -{ - register struct re_guts *g = m->g; - register int i; - register int first = 1; - - if (!(m->eflags®_TRACE)) - return; - - fprintf(d, "%s", caption); - if (ch != '\0') - fprintf(d, " %s", pchar(ch)); - for (i = 0; i < g->nstates; i++) - if (ISSET(st, i)) { - fprintf(d, "%s%d", (first) ? "\t" : ", ", i); - first = 0; - } - fprintf(d, "\n"); -} - -/* - - at - print current situation - == #ifdef REDEBUG - == static void at(struct match *m, char *title, char *start, char *stop, \ - == sopno startst, sopno stopst); - == #endif - */ -static void -at(m, title, start, stop, startst, stopst) -struct match *m; -char *title; -char *start; -char *stop; -sopno startst; -sopno stopst; -{ - if (!(m->eflags®_TRACE)) - return; - - printf("%s %s-", title, pchar(*start)); - printf("%s ", pchar(*stop)); - printf("%ld-%ld\n", (long)startst, (long)stopst); -} - -#ifndef PCHARDONE -#define PCHARDONE /* never again */ -/* - - pchar - make a character printable - == #ifdef REDEBUG - == static char *pchar(int ch); - == #endif - * - * Is this identical to regchar() over in debug.c? Well, yes. But a - * duplicate here avoids having a debugging-capable regexec.o tied to - * a matching debug.o, and this is convenient. It all disappears in - * the non-debug compilation anyway, so it doesn't matter much. - */ -static char * /* -> representation */ -pchar(ch) -int ch; -{ - static char pbuf[10]; - - if (isprint(ch) || ch == ' ') - sprintf(pbuf, "%c", ch); - else - sprintf(pbuf, "\\%o", ch); - return(pbuf); -} -#endif -#endif - -#undef matcher -#undef fast -#undef slow -#undef dissect -#undef backref -#undef step -#undef print -#undef at -#undef match diff --git a/src/regex/re_main.c b/src/regex/re_main.c deleted file mode 100644 index 0221e7713d..0000000000 --- a/src/regex/re_main.c +++ /dev/null @@ -1,510 +0,0 @@ -#include -#include -#include -#include -#include - -#include "main.ih" - -char *progname; -int debug = 0; -int line = 0; -int status = 0; - -int copts = REG_EXTENDED; -int eopts = 0; -regoff_t startoff = 0; -regoff_t endoff = 0; - - -extern int split(); -extern void regprint(); - -/* - - main - do the simple case, hand off to regress() for regression - */ -main(argc, argv) -int argc; -char *argv[]; -{ - regex_t re; -# define NS 10 - regmatch_t subs[NS]; - char erbuf[100]; - int err; - size_t len; - int c; - int errflg = 0; - register int i; - extern int optind; - extern char *optarg; - - progname = argv[0]; - - while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF) - switch (c) { - case 'c': /* compile options */ - copts = options('c', optarg); - break; - case 'e': /* execute options */ - eopts = options('e', optarg); - break; - case 'S': /* start offset */ - startoff = (regoff_t)atoi(optarg); - break; - case 'E': /* end offset */ - endoff = (regoff_t)atoi(optarg); - break; - case 'x': /* Debugging. */ - debug++; - break; - case '?': - default: - errflg++; - break; - } - if (errflg) { - fprintf(stderr, "usage: %s ", progname); - fprintf(stderr, "[-c copt][-C][-d] [re]\n"); - exit(2); - } - - if (optind >= argc) { - regress(stdin); - exit(status); - } - - err = regcomp(&re, argv[optind++], copts); - if (err) { - len = regerror(err, &re, erbuf, sizeof(erbuf)); - fprintf(stderr, "error %s, %d/%d `%s'\n", - eprint(err), len, sizeof(erbuf), erbuf); - exit(status); - } - regprint(&re, stdout); - - if (optind >= argc) { - regfree(&re); - exit(status); - } - - if (eopts®_STARTEND) { - subs[0].rm_so = startoff; - subs[0].rm_eo = strlen(argv[optind]) - endoff; - } - err = regexec(&re, argv[optind], (size_t)NS, subs, eopts); - if (err) { - len = regerror(err, &re, erbuf, sizeof(erbuf)); - fprintf(stderr, "error %s, %d/%d `%s'\n", - eprint(err), len, sizeof(erbuf), erbuf); - exit(status); - } - if (!(copts®_NOSUB)) { - len = (int)(subs[0].rm_eo - subs[0].rm_so); - if (subs[0].rm_so != -1) { - if (len != 0) - printf("match `%.*s'\n", len, - argv[optind] + subs[0].rm_so); - else - printf("match `'@%.1s\n", - argv[optind] + subs[0].rm_so); - } - for (i = 1; i < NS; i++) - if (subs[i].rm_so != -1) - printf("(%d) `%.*s'\n", i, - (int)(subs[i].rm_eo - subs[i].rm_so), - argv[optind] + subs[i].rm_so); - } - exit(status); -} - -/* - - regress - main loop of regression test - == void regress(FILE *in); - */ -void -regress(in) -FILE *in; -{ - char inbuf[1000]; -# define MAXF 10 - char *f[MAXF]; - int nf; - int i; - char erbuf[100]; - size_t ne; - char *badpat = "invalid regular expression"; -# define SHORT 10 - char *bpname = "REG_BADPAT"; - regex_t re; - - while (fgets(inbuf, sizeof(inbuf), in) != NULL) { - line++; - if (inbuf[0] == '#' || inbuf[0] == '\n') - continue; /* NOTE CONTINUE */ - inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */ - if (debug) - fprintf(stdout, "%d:\n", line); - nf = split(inbuf, f, MAXF, "\t\t"); - if (nf < 3) { - fprintf(stderr, "bad input, line %d\n", line); - exit(1); - } - for (i = 0; i < nf; i++) - if (strcmp(f[i], "\"\"") == 0) - f[i] = ""; - if (nf <= 3) - f[3] = NULL; - if (nf <= 4) - f[4] = NULL; - try(f[0], f[1], f[2], f[3], f[4], options('c', f[1])); - if (opt('&', f[1])) /* try with either type of RE */ - try(f[0], f[1], f[2], f[3], f[4], - options('c', f[1]) &~ REG_EXTENDED); - } - - ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); - if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) { - fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n", - erbuf, badpat); - status = 1; - } - ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT); - if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' || - ne != strlen(badpat)+1) { - fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n", - erbuf, SHORT-1, badpat); - status = 1; - } - ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); - if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) { - fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n", - erbuf, bpname); - status = 1; - } - re.re_endp = bpname; - ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf)); - if (atoi(erbuf) != (int)REG_BADPAT) { - fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n", - erbuf, (long)REG_BADPAT); - status = 1; - } else if (ne != strlen(erbuf)+1) { - fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n", - erbuf, (long)REG_BADPAT); - status = 1; - } -} - -/* - - try - try it, and report on problems - == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts); - */ -void -try(f0, f1, f2, f3, f4, opts) -char *f0; -char *f1; -char *f2; -char *f3; -char *f4; -int opts; /* may not match f1 */ -{ - regex_t re; -# define NSUBS 10 - regmatch_t subs[NSUBS]; -# define NSHOULD 15 - char *should[NSHOULD]; - int nshould; - char erbuf[100]; - int err; - int len; - char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE"; - register int i; - char *grump; - char f0copy[1000]; - char f2copy[1000]; - - strcpy(f0copy, f0); - re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL; - fixstr(f0copy); - err = regcomp(&re, f0copy, opts); - if (err != 0 && (!opt('C', f1) || err != efind(f2))) { - /* unexpected error or wrong error */ - len = regerror(err, &re, erbuf, sizeof(erbuf)); - fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n", - line, type, eprint(err), len, - sizeof(erbuf), erbuf); - status = 1; - } else if (err == 0 && opt('C', f1)) { - /* unexpected success */ - fprintf(stderr, "%d: %s should have given REG_%s\n", - line, type, f2); - status = 1; - err = 1; /* so we won't try regexec */ - } - - if (err != 0) { - regfree(&re); - return; - } - - strcpy(f2copy, f2); - fixstr(f2copy); - - if (options('e', f1)®_STARTEND) { - if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL) - fprintf(stderr, "%d: bad STARTEND syntax\n", line); - subs[0].rm_so = strchr(f2, '(') - f2 + 1; - subs[0].rm_eo = strchr(f2, ')') - f2; - } - err = regexec(&re, f2copy, NSUBS, subs, options('e', f1)); - - if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) { - /* unexpected error or wrong error */ - len = regerror(err, &re, erbuf, sizeof(erbuf)); - fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n", - line, type, eprint(err), len, - sizeof(erbuf), erbuf); - status = 1; - } else if (err != 0) { - /* nothing more to check */ - } else if (f3 == NULL) { - /* unexpected success */ - fprintf(stderr, "%d: %s exec should have failed\n", - line, type); - status = 1; - err = 1; /* just on principle */ - } else if (opts®_NOSUB) { - /* nothing more to check */ - } else if ((grump = check(f2, subs[0], f3)) != NULL) { - fprintf(stderr, "%d: %s %s\n", line, type, grump); - status = 1; - err = 1; - } - - if (err != 0 || f4 == NULL) { - regfree(&re); - return; - } - - for (i = 1; i < NSHOULD; i++) - should[i] = NULL; - nshould = split(f4, should+1, NSHOULD-1, ","); - if (nshould == 0) { - nshould = 1; - should[1] = ""; - } - for (i = 1; i < NSUBS; i++) { - grump = check(f2, subs[i], should[i]); - if (grump != NULL) { - fprintf(stderr, "%d: %s $%d %s\n", line, - type, i, grump); - status = 1; - err = 1; - } - } - - regfree(&re); -} - -/* - - options - pick options out of a regression-test string - == int options(int type, char *s); - */ -int -options(type, s) -int type; /* 'c' compile, 'e' exec */ -char *s; -{ - register char *p; - register int o = (type == 'c') ? copts : eopts; - register char *legal = (type == 'c') ? "bisnmp" : "^$#tl"; - - for (p = s; *p != '\0'; p++) - if (strchr(legal, *p) != NULL) - switch (*p) { - case 'b': - o &= ~REG_EXTENDED; - break; - case 'i': - o |= REG_ICASE; - break; - case 's': - o |= REG_NOSUB; - break; - case 'n': - o |= REG_NEWLINE; - break; - case 'm': - o &= ~REG_EXTENDED; - o |= REG_NOSPEC; - break; - case 'p': - o |= REG_PEND; - break; - case '^': - o |= REG_NOTBOL; - break; - case '$': - o |= REG_NOTEOL; - break; - case '#': - o |= REG_STARTEND; - break; - case 't': /* trace */ - o |= REG_TRACE; - break; - case 'l': /* force long representation */ - o |= REG_LARGE; - break; - case 'r': /* force backref use */ - o |= REG_BACKR; - break; - } - return(o); -} - -/* - - opt - is a particular option in a regression string? - == int opt(int c, char *s); - */ -int /* predicate */ -opt(c, s) -int c; -char *s; -{ - return(strchr(s, c) != NULL); -} - -/* - - fixstr - transform magic characters in strings - == void fixstr(register char *p); - */ -void -fixstr(p) -register char *p; -{ - if (p == NULL) - return; - - for (; *p != '\0'; p++) - if (*p == 'N') - *p = '\n'; - else if (*p == 'T') - *p = '\t'; - else if (*p == 'S') - *p = ' '; - else if (*p == 'Z') - *p = '\0'; -} - -/* - - check - check a substring match - == char *check(char *str, regmatch_t sub, char *should); - */ -char * /* NULL or complaint */ -check(str, sub, should) -char *str; -regmatch_t sub; -char *should; -{ - register int len; - register int shlen; - register char *p; - static char grump[500]; - register char *at = NULL; - - if (should != NULL && strcmp(should, "-") == 0) - should = NULL; - if (should != NULL && should[0] == '@') { - at = should + 1; - should = ""; - } - - /* check rm_so and rm_eo for consistency */ - if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) || - (sub.rm_so != -1 && sub.rm_eo == -1) || - (sub.rm_so != -1 && sub.rm_so < 0) || - (sub.rm_eo != -1 && sub.rm_eo < 0) ) { - sprintf(grump, "start %ld end %ld", (long)sub.rm_so, - (long)sub.rm_eo); - return(grump); - } - - /* check for no match */ - if (sub.rm_so == -1 && should == NULL) - return(NULL); - if (sub.rm_so == -1) - return("did not match"); - - /* check for in range */ - if (sub.rm_eo > strlen(str)) { - sprintf(grump, "start %ld end %ld, past end of string", - (long)sub.rm_so, (long)sub.rm_eo); - return(grump); - } - - len = (int)(sub.rm_eo - sub.rm_so); - shlen = (int)strlen(should); - p = str + sub.rm_so; - - /* check for not supposed to match */ - if (should == NULL) { - sprintf(grump, "matched `%.*s'", len, p); - return(grump); - } - - /* check for wrong match */ - if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) { - sprintf(grump, "matched `%.*s' instead", len, p); - return(grump); - } - if (shlen > 0) - return(NULL); - - /* check null match in right place */ - if (at == NULL) - return(NULL); - shlen = strlen(at); - if (shlen == 0) - shlen = 1; /* force check for end-of-string */ - if (strncmp(p, at, shlen) != 0) { - sprintf(grump, "matched null at `%.20s'", p); - return(grump); - } - return(NULL); -} - -/* - - eprint - convert error number to name - == static char *eprint(int err); - */ -static char * -eprint(err) -int err; -{ - static char epbuf[100]; - size_t len; - - len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf)); - assert(len <= sizeof(epbuf)); - return(epbuf); -} - -/* - - efind - convert error name to number - == static int efind(char *name); - */ -static int -efind(name) -char *name; -{ - static char efbuf[100]; - size_t n; - regex_t re; - - sprintf(efbuf, "REG_%s", name); - assert(strlen(efbuf) < sizeof(efbuf)); - re.re_endp = efbuf; - (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf)); - return(atoi(efbuf)); -} diff --git a/src/regex/regc_cvec.c b/src/regex/regc_cvec.c new file mode 100644 index 0000000000..b6aa8c98f1 --- /dev/null +++ b/src/regex/regc_cvec.c @@ -0,0 +1,189 @@ +/* + * Utility functions for handling cvecs + * This file is #included by regcomp.c. + * + * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. + * + * Development of this software was funded, in part, by Cray Research Inc., + * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics + * Corporation, none of whom are responsible for the results. The author + * thanks all of them. + * + * Redistribution and use in source and binary forms -- with or without + * modification -- are permitted for any purpose, provided that + * redistributions in source form retain this entire copyright notice and + * indicate the origin and nature of any modifications. + * + * I'd appreciate being given credit for this package in the documentation + * of software which uses it, but that is not a requirement. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $Header$ + * + */ + +/* + * newcvec - allocate a new cvec + */ +static struct cvec * +newcvec(int nchrs, /* to hold this many chrs... */ + int nranges, /* ... and this many ranges... */ + int nmcces) /* ... and this many MCCEs */ +{ + size_t n; + size_t nc; + struct cvec *cv; + + nc = (size_t) nchrs + (size_t) nmcces *(MAXMCCE + 1) + (size_t) nranges *2; + + n = sizeof(struct cvec) + (size_t) (nmcces - 1) * sizeof(chr *) + + nc * sizeof(chr); + cv = (struct cvec *) MALLOC(n); + if (cv == NULL) + return NULL; + cv->chrspace = nchrs; + cv->chrs = (chr *) &cv->mcces[nmcces]; /* chrs just after MCCE + * ptrs */ + cv->mccespace = nmcces; + cv->ranges = cv->chrs + nchrs + nmcces * (MAXMCCE + 1); + cv->rangespace = nranges; + return clearcvec(cv); +} + +/* + * clearcvec - clear a possibly-new cvec + * Returns pointer as convenience. + */ +static struct cvec * +clearcvec(struct cvec * cv) +{ + int i; + + assert(cv != NULL); + cv->nchrs = 0; + assert(cv->chrs == (chr *) &cv->mcces[cv->mccespace]); + cv->nmcces = 0; + cv->nmccechrs = 0; + cv->nranges = 0; + for (i = 0; i < cv->mccespace; i++) + cv->mcces[i] = NULL; + + return cv; +} + +/* + * addchr - add a chr to a cvec + */ +static void +addchr(struct cvec * cv, /* character vector */ + chr c) /* character to add */ +{ + assert(cv->nchrs < cv->chrspace - cv->nmccechrs); + cv->chrs[cv->nchrs++] = (chr) c; +} + +/* + * addrange - add a range to a cvec + */ +static void +addrange(struct cvec * cv, /* character vector */ + chr from, /* first character of range */ + chr to) /* last character of range */ +{ + assert(cv->nranges < cv->rangespace); + cv->ranges[cv->nranges * 2] = (chr) from; + cv->ranges[cv->nranges * 2 + 1] = (chr) to; + cv->nranges++; +} + +/* + * addmcce - add an MCCE to a cvec + */ +static void +addmcce(struct cvec * cv, /* character vector */ + chr *startp, /* beginning of text */ + chr *endp) /* just past end of text */ +{ + int len; + int i; + chr *s; + chr *d; + + if (startp == NULL && endp == NULL) + return; + len = endp - startp; + assert(len > 0); + assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs); + assert(cv->nmcces < cv->mccespace); + d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1]; + cv->mcces[cv->nmcces++] = d; + for (s = startp, i = len; i > 0; s++, i--) + *d++ = *s; + *d++ = 0; /* endmarker */ + assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]); + cv->nmccechrs += len + 1; +} + +/* + * haschr - does a cvec contain this chr? + */ +static int /* predicate */ +haschr(struct cvec * cv, /* character vector */ + chr c) /* character to test for */ +{ + int i; + chr *p; + + for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) + { + if (*p == c) + return 1; + } + for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) + { + if ((*p <= c) && (c <= *(p + 1))) + return 1; + } + return 0; +} + +/* + * getcvec - get a cvec, remembering it as v->cv + */ +static struct cvec * +getcvec(struct vars * v, /* context */ + int nchrs, /* to hold this many chrs... */ + int nranges, /* ... and this many ranges... */ + int nmcces) /* ... and this many MCCEs */ +{ + if (v->cv != NULL && nchrs <= v->cv->chrspace && + nranges <= v->cv->rangespace && nmcces <= v->cv->mccespace) + return clearcvec(v->cv); + + if (v->cv != NULL) + freecvec(v->cv); + v->cv = newcvec(nchrs, nranges, nmcces); + if (v->cv == NULL) + ERR(REG_ESPACE); + + return v->cv; +} + +/* + * freecvec - free a cvec + */ +static void +freecvec(struct cvec * cv) +{ + FREE(cv); +} diff --git a/src/regex/regex.7 b/src/regex/regex.7 deleted file mode 100644 index 0fa180269e..0000000000 --- a/src/regex/regex.7 +++ /dev/null @@ -1,235 +0,0 @@ -.TH REGEX 7 "25 Oct 1995" -.BY "Henry Spencer" -.SH NAME -regex \- POSIX 1003.2 regular expressions -.SH DESCRIPTION -Regular expressions (``RE''s), -as defined in POSIX 1003.2, come in two forms: -modern REs (roughly those of -.IR egrep ; -1003.2 calls these ``extended'' REs) -and obsolete REs (roughly those of -.IR ed ; -1003.2 ``basic'' REs). -Obsolete REs mostly exist for backward compatibility in some old programs; -they will be discussed at the end. -1003.2 leaves some aspects of RE syntax and semantics open; -`\(dg' marks decisions on these aspects that -may not be fully portable to other 1003.2 implementations. -.PP -A (modern) RE is one\(dg or more non-empty\(dg \fIbranches\fR, -separated by `|'. -It matches anything that matches one of the branches. -.PP -A branch is one\(dg or more \fIpieces\fR, concatenated. -It matches a match for the first, followed by a match for the second, etc. -.PP -A piece is an \fIatom\fR possibly followed -by a single\(dg `*', `+', `?', or \fIbound\fR. -An atom followed by `*' matches a sequence of 0 or more matches of the atom. -An atom followed by `+' matches a sequence of 1 or more matches of the atom. -An atom followed by `?' matches a sequence of 0 or 1 matches of the atom. -.PP -A \fIbound\fR is `{' followed by an unsigned decimal integer, -possibly followed by `,' -possibly followed by another unsigned decimal integer, -always followed by `}'. -The integers must lie between 0 and RE_DUP_MAX (255\(dg) inclusive, -and if there are two of them, the first may not exceed the second. -An atom followed by a bound containing one integer \fIi\fR -and no comma matches -a sequence of exactly \fIi\fR matches of the atom. -An atom followed by a bound -containing one integer \fIi\fR and a comma matches -a sequence of \fIi\fR or more matches of the atom. -An atom followed by a bound -containing two integers \fIi\fR and \fIj\fR matches -a sequence of \fIi\fR through \fIj\fR (inclusive) matches of the atom. -.PP -An atom is a regular expression enclosed in `()' (matching a match for the -regular expression), -an empty set of `()' (matching the null string)\(dg, -a \fIbracket expression\fR (see below), `.' -(matching any single character), `^' (matching the null string at the -beginning of a line), `$' (matching the null string at the -end of a line), a `\e' followed by one of the characters -`^.[$()|*+?{\e' -(matching that character taken as an ordinary character), -a `\e' followed by any other character\(dg -(matching that character taken as an ordinary character, -as if the `\e' had not been present\(dg), -or a single character with no other significance (matching that character). -A `{' followed by a character other than a digit is an ordinary -character, not the beginning of a bound\(dg. -It is illegal to end an RE with `\e'. -.PP -A \fIbracket expression\fR is a list of characters enclosed in `[]'. -It normally matches any single character from the list (but see below). -If the list begins with `^', -it matches any single character -(but see below) \fInot\fR from the rest of the list. -If two characters in the list are separated by `\-', this is shorthand -for the full \fIrange\fR of characters between those two (inclusive) in the -collating sequence, -e.g. `[0\-9]' in ASCII matches any decimal digit. -It is illegal\(dg for two ranges to share an -endpoint, e.g. `a\-c\-e'. -Ranges are very collating-sequence-dependent, -and portable programs should avoid relying on them. -.PP -To include a literal `]' in the list, make it the first character -(following a possible `^'). -To include a literal `\-', make it the first or last character, -or the second endpoint of a range. -To use a literal `\-' as the first endpoint of a range, -enclose it in `[.' and `.]' to make it a collating element (see below). -With the exception of these and some combinations using `[' (see next -paragraphs), all other special characters, including `\e', lose their -special significance within a bracket expression. -.PP -Within a bracket expression, a collating element (a character, -a multi-character sequence that collates as if it were a single character, -or a collating-sequence name for either) -enclosed in `[.' and `.]' stands for the -sequence of characters of that collating element. -The sequence is a single element of the bracket expression's list. -A bracket expression containing a multi-character collating element -can thus match more than one character, -e.g. if the collating sequence includes a `ch' collating element, -then the RE `[[.ch.]]*c' matches the first five characters -of `chchcc'. -.PP -Within a bracket expression, a collating element enclosed in `[=' and -`=]' is an equivalence class, standing for the sequences of characters -of all collating elements equivalent to that one, including itself. -(If there are no other equivalent collating elements, -the treatment is as if the enclosing delimiters were `[.' and `.]'.) -For example, if o and \o'o^' are the members of an equivalence class, -then `[[=o=]]', `[[=\o'o^'=]]', and `[o\o'o^']' are all synonymous. -An equivalence class may not\(dg be an endpoint -of a range. -.PP -Within a bracket expression, the name of a \fIcharacter class\fR enclosed -in `[:' and `:]' stands for the list of all characters belonging to that -class. -Standard character class names are: -.PP -.RS -.nf -.ta 3c 6c 9c -alnum digit punct -alpha graph space -blank lower upper -cntrl print xdigit -.fi -.RE -.PP -These stand for the character classes defined in -.IR ctype (3). -A locale may provide others. -A character class may not be used as an endpoint of a range. -.PP -There are two special cases\(dg of bracket expressions: -the bracket expressions `[[:<:]]' and `[[:>:]]' match the null string at -the beginning and end of a word respectively. -A word is defined as a sequence of -word characters -which is neither preceded nor followed by -word characters. -A word character is an -.I alnum -character (as defined by -.IR ctype (3)) -or an underscore. -This is an extension, -compatible with but not specified by POSIX 1003.2, -and should be used with -caution in software intended to be portable to other systems. -.PP -In the event that an RE could match more than one substring of a given -string, -the RE matches the one starting earliest in the string. -If the RE could match more than one substring starting at that point, -it matches the longest. -Subexpressions also match the longest possible substrings, subject to -the constraint that the whole match be as long as possible, -with subexpressions starting earlier in the RE taking priority over -ones starting later. -Note that higher-level subexpressions thus take priority over -their lower-level component subexpressions. -.PP -Match lengths are measured in characters, not collating elements. -A null string is considered longer than no match at all. -For example, -`bb*' matches the three middle characters of `abbbc', -`(wee|week)(knights|nights)' matches all ten characters of `weeknights', -when `(.*).*' is matched against `abc' the parenthesized subexpression -matches all three characters, and -when `(a*)*' is matched against `bc' both the whole RE and the parenthesized -subexpression match the null string. -.PP -If case-independent matching is specified, -the effect is much as if all case distinctions had vanished from the -alphabet. -When an alphabetic that exists in multiple cases appears as an -ordinary character outside a bracket expression, it is effectively -transformed into a bracket expression containing both cases, -e.g. `x' becomes `[xX]'. -When it appears inside a bracket expression, all case counterparts -of it are added to the bracket expression, so that (e.g.) `[x]' -becomes `[xX]' and `[^x]' becomes `[^xX]'. -.PP -No particular limit is imposed on the length of REs\(dg. -Programs intended to be portable should not employ REs longer -than 256 bytes, -as an implementation can refuse to accept such REs and remain -POSIX-compliant. -.PP -Obsolete (``basic'') regular expressions differ in several respects. -`|', `+', and `?' are ordinary characters and there is no equivalent -for their functionality. -The delimiters for bounds are `\e{' and `\e}', -with `{' and `}' by themselves ordinary characters. -The parentheses for nested subexpressions are `\e(' and `\e)', -with `(' and `)' by themselves ordinary characters. -`^' is an ordinary character except at the beginning of the -RE or\(dg the beginning of a parenthesized subexpression, -`$' is an ordinary character except at the end of the -RE or\(dg the end of a parenthesized subexpression, -and `*' is an ordinary character if it appears at the beginning of the -RE or the beginning of a parenthesized subexpression -(after a possible leading `^'). -Finally, there is one new type of atom, a \fIback reference\fR: -`\e' followed by a non-zero decimal digit \fId\fR -matches the same sequence of characters -matched by the \fId\fRth parenthesized subexpression -(numbering subexpressions by the positions of their opening parentheses, -left to right), -so that (e.g.) `\e([bc]\e)\e1' matches `bb' or `cc' but not `bc'. -.SH SEE ALSO -regex(3) -.PP -POSIX 1003.2, section 2.8 (Regular Expression Notation). -.SH HISTORY -Written by Henry Spencer, based on the 1003.2 spec. -.SH BUGS -Having two kinds of REs is a botch. -.PP -The current 1003.2 spec says that `)' is an ordinary character in -the absence of an unmatched `('; -this was an unintentional result of a wording error, -and change is likely. -Avoid relying on it. -.PP -Back references are a dreadful botch, -posing major problems for efficient implementations. -They are also somewhat vaguely defined -(does -`a\e(\e(b\e)*\e2\e)*d' match `abbbd'?). -Avoid using them. -.PP -1003.2's specification of case-independent matching is vague. -The ``one case implies all cases'' definition given above -is current consensus among implementors as to the right interpretation. -.PP -The syntax for word boundaries is incredibly ugly. diff --git a/src/regex/regex2.h b/src/regex/regex2.h deleted file mode 100644 index 58fd8d8a43..0000000000 --- a/src/regex/regex2.h +++ /dev/null @@ -1,134 +0,0 @@ -/* - * First, the stuff that ends up in the outside-world include file - = typedef off_t regoff_t; - = typedef struct { - = int re_magic; - = size_t re_nsub; // number of parenthesized subexpressions - = const char *re_endp; // end pointer for REG_PEND - = struct re_guts *re_g; // none of your business :-) - = } regex_t; - = typedef struct { - = regoff_t rm_so; // start of match - = regoff_t rm_eo; // end of match - = } regmatch_t; - */ -/* - * internals of regex_t - */ -#define MAGIC1 ((('r'^0200)<<8) | 'e') - -/* - * The internal representation is a *strip*, a sequence of - * operators ending with an endmarker. (Some terminology etc. is a - * historical relic of earlier versions which used multiple strips.) - * Certain oddities in the representation are there to permit running - * the machinery backwards; in particular, any deviation from sequential - * flow must be marked at both its source and its destination. Some - * fine points: - * - * - OPLUS_ and O_PLUS are *inside* the loop they create. - * - OQUEST_ and O_QUEST are *outside* the bypass they create. - * - OCH_ and O_CH are *outside* the multi-way branch they create, while - * OOR1 and OOR2 are respectively the end and the beginning of one of - * the branches. Note that there is an implicit OOR2 following OCH_ - * and an implicit OOR1 preceding O_CH. - * - * In state representations, an operator's bit is on to signify a state - * immediately *preceding* "execution" of that operator. - */ -typedef long sop; /* strip operator */ -typedef long sopno; -#define OPRMASK 0x7c000000 -#define OPDMASK 0x03ffffff -#define OPSHIFT (26) -#define OP(n) ((n)&OPRMASK) -#define OPND(n) ((n)&OPDMASK) -#define SOP(op, opnd) ((op)|(opnd)) -/* operators meaning operand */ -/* (back, fwd are offsets) */ -#define OEND (1< uch [csetsize] */ - uch mask; /* bit within array */ - uch hash; /* hash code */ - size_t smultis; - char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */ -} cset; -/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */ -#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c)) -#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c)) -#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask) -#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */ -#define MCsub(p, cs, cp) mcsub(p, cs, cp) -#define MCin(p, cs, cp) mcin(p, cs, cp) - -/* stuff for character categories */ -typedef unsigned char cat_t; - -/* - * main compiled-expression structure - */ -struct re_guts { - int magic; -# define MAGIC2 ((('R'^0200)<<8)|'E') - sop *strip; /* malloced area for strip */ - int csetsize; /* number of bits in a cset vector */ - int ncsets; /* number of csets in use */ - cset *sets; /* -> cset [ncsets] */ - uch *setbits; /* -> uch[csetsize][ncsets/CHAR_BIT] */ - int cflags; /* copy of regcomp() cflags argument */ - sopno nstates; /* = number of sops */ - sopno firststate; /* the initial OEND (normally 0) */ - sopno laststate; /* the final OEND */ - int iflags; /* internal flags */ -# define USEBOL 01 /* used ^ */ -# define USEEOL 02 /* used $ */ -# define BAD 04 /* something wrong */ - int nbol; /* number of ^ used */ - int neol; /* number of $ used */ - int ncategories; /* how many character categories */ - cat_t *categories; /* ->catspace[-CHAR_MIN] */ - char *must; /* match must contain this string */ - int mlen; /* length of must */ - size_t nsub; /* copy of re_nsub */ - int backrefs; /* does it use back references? */ - sopno nplus; /* how deep does it nest +s? */ - /* catspace must be last */ - cat_t catspace[1]; /* actually [NC] */ -}; - -/* misc utilities */ -#define OUT (CHAR_MAX+1) /* a non-character value */ -#define ISWORD(c) (isalnum(c) || (c) == '_') diff --git a/src/regex/split.c b/src/regex/split.c deleted file mode 100644 index 188bdb775b..0000000000 --- a/src/regex/split.c +++ /dev/null @@ -1,316 +0,0 @@ -#include -#include - -/* - - split - divide a string into fields, like awk split() - = int split(char *string, char *fields[], int nfields, char *sep); - */ -int /* number of fields, including overflow */ -split(string, fields, nfields, sep) -char *string; -char *fields[]; /* list is not NULL-terminated */ -int nfields; /* number of entries available in fields[] */ -char *sep; /* "" white, "c" single char, "ab" [ab]+ */ -{ - register char *p = string; - register char c; /* latest character */ - register char sepc = sep[0]; - register char sepc2; - register int fn; - register char **fp = fields; - register char *sepp; - register int trimtrail; - - /* white space */ - if (sepc == '\0') { - while ((c = *p++) == ' ' || c == '\t') - continue; - p--; - trimtrail = 1; - sep = " \t"; /* note, code below knows this is 2 long */ - sepc = ' '; - } else - trimtrail = 0; - sepc2 = sep[1]; /* now we can safely pick this up */ - - /* catch empties */ - if (*p == '\0') - return(0); - - /* single separator */ - if (sepc2 == '\0') { - fn = nfields; - for (;;) { - *fp++ = p; - fn--; - if (fn == 0) - break; - while ((c = *p++) != sepc) - if (c == '\0') - return(nfields - fn); - *(p-1) = '\0'; - } - /* we have overflowed the fields vector -- just count them */ - fn = nfields; - for (;;) { - while ((c = *p++) != sepc) - if (c == '\0') - return(fn); - fn++; - } - /* not reached */ - } - - /* two separators */ - if (sep[2] == '\0') { - fn = nfields; - for (;;) { - *fp++ = p; - fn--; - while ((c = *p++) != sepc && c != sepc2) - if (c == '\0') { - if (trimtrail && **(fp-1) == '\0') - fn++; - return(nfields - fn); - } - if (fn == 0) - break; - *(p-1) = '\0'; - while ((c = *p++) == sepc || c == sepc2) - continue; - p--; - } - /* we have overflowed the fields vector -- just count them */ - fn = nfields; - while (c != '\0') { - while ((c = *p++) == sepc || c == sepc2) - continue; - p--; - fn++; - while ((c = *p++) != '\0' && c != sepc && c != sepc2) - continue; - } - /* might have to trim trailing white space */ - if (trimtrail) { - p--; - while ((c = *--p) == sepc || c == sepc2) - continue; - p++; - if (*p != '\0') { - if (fn == nfields+1) - *p = '\0'; - fn--; - } - } - return(fn); - } - - /* n separators */ - fn = 0; - for (;;) { - if (fn < nfields) - *fp++ = p; - fn++; - for (;;) { - c = *p++; - if (c == '\0') - return(fn); - sepp = sep; - while ((sepc = *sepp++) != '\0' && sepc != c) - continue; - if (sepc != '\0') /* it was a separator */ - break; - } - if (fn < nfields) - *(p-1) = '\0'; - for (;;) { - c = *p++; - sepp = sep; - while ((sepc = *sepp++) != '\0' && sepc != c) - continue; - if (sepc == '\0') /* it wasn't a separator */ - break; - } - p--; - } - - /* not reached */ -} - -#ifdef TEST_SPLIT - - -/* - * test program - * pgm runs regression - * pgm sep splits stdin lines by sep - * pgm str sep splits str by sep - * pgm str sep n splits str by sep n times - */ -int -main(argc, argv) -int argc; -char *argv[]; -{ - char buf[512]; - register int n; -# define MNF 10 - char *fields[MNF]; - - if (argc > 4) - for (n = atoi(argv[3]); n > 0; n--) { - (void) strcpy(buf, argv[1]); - } - else if (argc > 3) - for (n = atoi(argv[3]); n > 0; n--) { - (void) strcpy(buf, argv[1]); - (void) split(buf, fields, MNF, argv[2]); - } - else if (argc > 2) - dosplit(argv[1], argv[2]); - else if (argc > 1) - while (fgets(buf, sizeof(buf), stdin) != NULL) { - buf[strlen(buf)-1] = '\0'; /* stomp newline */ - dosplit(buf, argv[1]); - } - else - regress(); - - exit(0); -} - -dosplit(string, seps) -char *string; -char *seps; -{ -# define NF 5 - char *fields[NF]; - register int nf; - - nf = split(string, fields, NF, seps); - print(nf, NF, fields); -} - -print(nf, nfp, fields) -int nf; -int nfp; -char *fields[]; -{ - register int fn; - register int bound; - - bound = (nf > nfp) ? nfp : nf; - printf("%d:\t", nf); - for (fn = 0; fn < bound; fn++) - printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n"); -} - -#define RNF 5 /* some table entries know this */ -struct { - char *str; - char *seps; - int nf; - char *fi[RNF]; -} tests[] = { - "", " ", 0, { "" }, - " ", " ", 2, { "", "" }, - "x", " ", 1, { "x" }, - "xy", " ", 1, { "xy" }, - "x y", " ", 2, { "x", "y" }, - "abc def g ", " ", 5, { "abc", "def", "", "g", "" }, - " a bcd", " ", 4, { "", "", "a", "bcd" }, - "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, - " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, - - "", " _", 0, { "" }, - " ", " _", 2, { "", "" }, - "x", " _", 1, { "x" }, - "x y", " _", 2, { "x", "y" }, - "ab _ cd", " _", 2, { "ab", "cd" }, - " a_b c ", " _", 5, { "", "a", "b", "c", "" }, - "a b c_d e f", " _", 6, { "a", "b", "c", "d", "e f" }, - " a b c d ", " _", 6, { "", "a", "b", "c", "d " }, - - "", " _~", 0, { "" }, - " ", " _~", 2, { "", "" }, - "x", " _~", 1, { "x" }, - "x y", " _~", 2, { "x", "y" }, - "ab _~ cd", " _~", 2, { "ab", "cd" }, - " a_b c~", " _~", 5, { "", "a", "b", "c", "" }, - "a b_c d~e f", " _~", 6, { "a", "b", "c", "d", "e f" }, - "~a b c d ", " _~", 6, { "", "a", "b", "c", "d " }, - - "", " _~-", 0, { "" }, - " ", " _~-", 2, { "", "" }, - "x", " _~-", 1, { "x" }, - "x y", " _~-", 2, { "x", "y" }, - "ab _~- cd", " _~-", 2, { "ab", "cd" }, - " a_b c~", " _~-", 5, { "", "a", "b", "c", "" }, - "a b_c-d~e f", " _~-", 6, { "a", "b", "c", "d", "e f" }, - "~a-b c d ", " _~-", 6, { "", "a", "b", "c", "d " }, - - "", " ", 0, { "" }, - " ", " ", 2, { "", "" }, - "x", " ", 1, { "x" }, - "xy", " ", 1, { "xy" }, - "x y", " ", 2, { "x", "y" }, - "abc def g ", " ", 4, { "abc", "def", "g", "" }, - " a bcd", " ", 3, { "", "a", "bcd" }, - "a b c d e f", " ", 6, { "a", "b", "c", "d", "e f" }, - " a b c d ", " ", 6, { "", "a", "b", "c", "d " }, - - "", "", 0, { "" }, - " ", "", 0, { "" }, - "x", "", 1, { "x" }, - "xy", "", 1, { "xy" }, - "x y", "", 2, { "x", "y" }, - "abc def g ", "", 3, { "abc", "def", "g" }, - "\t a bcd", "", 2, { "a", "bcd" }, - " a \tb\t c ", "", 3, { "a", "b", "c" }, - "a b c d e ", "", 5, { "a", "b", "c", "d", "e" }, - "a b\tc d e f", "", 6, { "a", "b", "c", "d", "e f" }, - " a b c d e f ", "", 6, { "a", "b", "c", "d", "e f " }, - - NULL, NULL, 0, { NULL }, -}; - -regress() -{ - char buf[512]; - register int n; - char *fields[RNF+1]; - register int nf; - register int i; - register int printit; - register char *f; - - for (n = 0; tests[n].str != NULL; n++) { - (void) strcpy(buf, tests[n].str); - fields[RNF] = NULL; - nf = split(buf, fields, RNF, tests[n].seps); - printit = 0; - if (nf != tests[n].nf) { - printf("split `%s' by `%s' gave %d fields, not %d\n", - tests[n].str, tests[n].seps, nf, tests[n].nf); - printit = 1; - } else if (fields[RNF] != NULL) { - printf("split() went beyond array end\n"); - printit = 1; - } else { - for (i = 0; i < nf && i < RNF; i++) { - f = fields[i]; - if (f == NULL) - f = "(NULL)"; - if (strcmp(f, tests[n].fi[i]) != 0) { - printf("split `%s' by `%s', field %d is `%s', not `%s'\n", - tests[n].str, tests[n].seps, - i, fields[i], tests[n].fi[i]); - printit = 1; - } - } - } - if (printit) - print(nf, RNF, fields); - } -} -#endif diff --git a/src/regex/utils.h b/src/regex/utils.h deleted file mode 100644 index 1a997ac8fc..0000000000 --- a/src/regex/utils.h +++ /dev/null @@ -1,22 +0,0 @@ -/* utility definitions */ -#ifdef _POSIX2_RE_DUP_MAX -#define DUPMAX _POSIX2_RE_DUP_MAX -#else -#define DUPMAX 255 -#endif -#define INFINITY (DUPMAX + 1) -#define NC (CHAR_MAX - CHAR_MIN + 1) -typedef unsigned char uch; - -/* switch off assertions (if not already off) if no REDEBUG */ -#ifndef REDEBUG -#ifndef NDEBUG -#define NDEBUG /* no assertions please */ -#endif -#endif -#include - -/* for old systems with bcopy() but no memmove() */ -#ifdef USEBCOPY -#define memmove(d, s, c) bcopy(s, d, c) -#endif -- 2.45.2