X-Git-Url: https://git.saurik.com/apple/libc.git/blobdiff_plain/7b00c0c43f52e9d27168e67a26aac19065cdb40c..ad3c9f2af814c84582fdd1649e49ec4f68572c5a:/regex/TRE/lib/tre-parse.c

diff --git a/regex/TRE/lib/tre-parse.c b/regex/TRE/lib/tre-parse.c
new file mode 100644
index 0000000..ad09c26
--- /dev/null
+++ b/regex/TRE/lib/tre-parse.c
@@ -0,0 +1,2334 @@
+/*
+  tre-parse.c - Regexp parser
+
+  This software is released under a BSD-style license.
+  See the file LICENSE for details and copyright.
+
+*/
+
+/*
+  This parser is just a simple recursive descent parser for POSIX.2
+  regexps.  The parser supports both the obsolete default syntax and
+  the "extended" syntax, and some nonstandard extensions.
+*/
+
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+#include <string.h>
+#include <assert.h>
+#include <limits.h>
+#include <stddef.h>
+
+#include "xmalloc.h"
+#include "tre-mem.h"
+#include "tre-ast.h"
+#include "tre-stack.h"
+#include "tre-parse.h"
+
+/* BSD compatibility:
+     Before looking up a collating symbol, check if the name matches in
+     the character names (cnames) array; if so, use the corresponding
+     character.
+
+     Also set ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND, which will preserve
+     the implementation choice that for ERE, a non-numeric character following
+     a left brace that would normally be a bound, causes the left brace to be
+     literal. */
+#define BSD_COMPATIBILITY
+#ifdef BSD_COMPATIBILITY
+#include "cname.h"
+#define ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
+#endif /* BSD_COMPATIBILITY */
+
+/* Characters with special meanings in regexp syntax. */
+#define CHAR_PIPE	   L'|'
+#define CHAR_LPAREN	   L'('
+#define CHAR_RPAREN	   L')'
+#define CHAR_LBRACE	   L'{'
+#define CHAR_RBRACE	   L'}'
+#define CHAR_LBRACKET	   L'['
+#define CHAR_RBRACKET	   L']'
+#define CHAR_MINUS	   L'-'
+#define CHAR_STAR	   L'*'
+#define CHAR_QUESTIONMARK  L'?'
+#define CHAR_PLUS	   L'+'
+#define CHAR_PERIOD	   L'.'
+#define CHAR_COLON	   L':'
+#define CHAR_EQUAL	   L'='
+#define CHAR_COMMA	   L','
+#define CHAR_CARET	   L'^'
+#define CHAR_DOLLAR	   L'$'
+#define CHAR_BACKSLASH	   L'\\'
+#define CHAR_HASH	   L'#'
+#define CHAR_TILDE	   L'~'
+
+
+/* Some macros for expanding \w, \s, etc. */
+static const struct tre_macro_struct {
+  const char c;
+  const char *expansion;
+} tre_macros[] =
+  { {'t', "\t"},	   {'n', "\n"},		   {'r', "\r"},
+    {'f', "\f"},	   {'a', "\a"},		   {'e', "\033"},
+    {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
+    {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"},   {'D', "[^[:digit:]]"},
+    { 0, NULL }
+  };
+
+
+/* Expands a macro delimited by `regex' and `regex_end' to `buf', which
+   must have at least `len' items.  Sets buf[0] to zero if the there
+   is no match in `tre_macros'. */
+static void
+tre_expand_macro(const tre_char_t *regex, const tre_char_t *regex_end,
+		 tre_char_t *buf, size_t buf_len)
+{
+  int i;
+
+  buf[0] = 0;
+  if (regex >= regex_end)
+    return;
+
+  for (i = 0; tre_macros[i].expansion; i++)
+    {
+      if (tre_macros[i].c == *regex)
+	{
+	  unsigned int j;
+	  DPRINT(("Expanding macro '%c' => '%s'\n",
+		  tre_macros[i].c, tre_macros[i].expansion));
+	  for (j = 0; tre_macros[i].expansion[j] && j < buf_len; j++)
+	    buf[j] = tre_macros[i].expansion[j];
+	  buf[j] = 0;
+	  break;
+	}
+    }
+}
+
+static reg_errcode_t
+tre_new_item(tre_mem_t mem, int type, int val, int *max_i,
+	 tre_bracket_match_list_t **items)
+{
+  reg_errcode_t status = REG_OK;
+  tre_bracket_match_list_t *array = *items;
+  int i = array->num_bracket_matches;
+  /* Allocate more space if necessary. */
+  if (i >= *max_i)
+    {
+      tre_bracket_match_list_t *new_items;
+      DPRINT(("out of tre_bracket_match_list_t array space (%d)\n", i));
+      /* If the array is already 1024 items large, give up -- there's
+	 probably an error in the regexp (e.g. not a '\0' terminated
+	 string and missing ']') */
+      if (*max_i >= 1024)
+	return REG_ESPACE;
+      *max_i *= 2;
+      new_items = xrealloc(array, SIZEOF_BRACKET_MATCH_LIST_N(*max_i));
+      if (new_items == NULL)
+	return REG_ESPACE;
+      *items = array = new_items;
+    }
+  array->bracket_matches[i].type = type;
+  array->bracket_matches[i].value = val;
+  array->num_bracket_matches++;
+  return status;
+}
+
+#ifndef TRE_USE_SYSTEM_WCTYPE
+
+/* isalnum() and the rest may be macros, so wrap them to functions. */
+int tre_isalnum_func(tre_cint_t c) { return tre_isalnum(c); }
+int tre_isalpha_func(tre_cint_t c) { return tre_isalpha(c); }
+
+#ifdef tre_isascii
+int tre_isascii_func(tre_cint_t c) { return tre_isascii(c); }
+#else /* !tre_isascii */
+int tre_isascii_func(tre_cint_t c) { return !(c >> 7); }
+#endif /* !tre_isascii */
+
+#ifdef tre_isblank
+int tre_isblank_func(tre_cint_t c) { return tre_isblank(c); }
+#else /* !tre_isblank */
+int tre_isblank_func(tre_cint_t c) { return ((c == ' ') || (c == '\t')); }
+#endif /* !tre_isblank */
+
+int tre_iscntrl_func(tre_cint_t c) { return tre_iscntrl(c); }
+int tre_isdigit_func(tre_cint_t c) { return tre_isdigit(c); }
+int tre_isgraph_func(tre_cint_t c) { return tre_isgraph(c); }
+int tre_islower_func(tre_cint_t c) { return tre_islower(c); }
+int tre_isprint_func(tre_cint_t c) { return tre_isprint(c); }
+int tre_ispunct_func(tre_cint_t c) { return tre_ispunct(c); }
+int tre_isspace_func(tre_cint_t c) { return tre_isspace(c); }
+int tre_isupper_func(tre_cint_t c) { return tre_isupper(c); }
+int tre_isxdigit_func(tre_cint_t c) { return tre_isxdigit(c); }
+
+struct {
+  char *name;
+  int (*func)(tre_cint_t);
+} tre_ctype_map[] = {
+  { "alnum", &tre_isalnum_func },
+  { "alpha", &tre_isalpha_func },
+#ifdef tre_isascii
+  { "ascii", &tre_isascii_func },
+#endif /* tre_isascii */
+#ifdef tre_isblank
+  { "blank", &tre_isblank_func },
+#endif /* tre_isblank */
+  { "cntrl", &tre_iscntrl_func },
+  { "digit", &tre_isdigit_func },
+  { "graph", &tre_isgraph_func },
+  { "lower", &tre_islower_func },
+  { "print", &tre_isprint_func },
+  { "punct", &tre_ispunct_func },
+  { "space", &tre_isspace_func },
+  { "upper", &tre_isupper_func },
+  { "xdigit", &tre_isxdigit_func },
+  { NULL, NULL}
+};
+
+tre_ctype_t tre_ctype(const char *name)
+{
+  int i;
+  for (i = 0; tre_ctype_map[i].name != NULL; i++)
+    {
+      if (strcmp(name, tre_ctype_map[i].name) == 0)
+	return tre_ctype_map[i].func;
+    }
+  return (tre_ctype_t)0;
+}
+#endif /* !TRE_USE_SYSTEM_WCTYPE */
+
+#define REST(re) (int)(ctx->re_end - (re)), (re)
+
+#define START_COLLATING_SYMBOLS		16
+#define MAX_COLLATING_SYMBOL_LEN	4
+
+typedef struct {
+  const tre_char_t *start;
+  int len;
+} tre_collating_symbol;
+
+#include <xlocale.h>
+
+int __collate_equiv_value(locale_t loc, const wchar_t *str, size_t len);
+
+#ifdef BSD_COMPATIBILITY
+static wchar_t
+tre_search_cnames(const wchar_t *name, size_t len)
+{
+  size_t low = 0;
+  size_t high = NCNAMES - 1;
+  size_t cur;
+  int cmp;
+
+  while(low <= high)
+    {
+      cur = (low + high) / 2;
+      cmp = wcsncmp(name, cnames[cur].name, len);
+      if (cmp == 0 && cnames[cur].name[len] == 0) return cnames[cur].code;
+      if (cmp > 0) low = cur + 1;
+      else high = cur - 1;
+    }
+  return (wchar_t)-1;
+}
+#endif /* BSD_COMPATIBILITY */
+
+/* Scan the contents of a bracket expression, and create a
+ * tre_bracket_match_list_t encoding the bracket expression.  If during
+ * the scan, multi-character collating symbols are detected, switch
+ * into a mode to collect those MCCSs into a tre_collating_symbol
+ * list and pass them back.  tre_parse_bracket will use that to
+ * create a new string composed of a union of the bracket expression
+ * without the MCCSs and the MCCSs (e.g., [x[.ch.]] => [x]|ch), and
+ * call tre_parse (recursive) to parse that new string (which will
+ * call tre_parse_bracket and tre_parse_bracket_items again. */
+static reg_errcode_t
+tre_parse_bracket_items(tre_parse_ctx_t *ctx, tre_bracket_match_list_t **items,
+			int *items_size, tre_collating_symbol **result)
+{
+  const tre_char_t *re = ctx->re;
+  const tre_char_t *re_end = ctx->re_end;
+  tre_collating_symbol *col_syms = NULL;
+  tre_collating_symbol *cp = NULL;
+  int n_col_syms = 0;
+  reg_errcode_t status;
+  int max_i = *items_size;
+  int other = 0;  /* contains content other than multi-character collating
+		   * symbols */
+  int range = -1; /* -1 unset, 0 begin range set, +1 end range expected */
+  tre_cint_t min, c;
+  int invert = ((*items)->flags & TRE_BRACKET_MATCH_FLAG_NEGATE);
+  int collect_MCCS = 0;
+  const tre_char_t *start;
+
+  for ( ;re < re_end; re++)
+    {
+      switch (*re)
+	{
+	case CHAR_MINUS:
+	  /* A first hyphen */
+	  if (re == ctx->re)
+	    {
+	      DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
+	      min = CHAR_MINUS;
+	      other++;
+	      range = 0;
+	      break;
+	    }
+	  /* The hyphen is the end range */
+	  if (range > 0)
+	    {
+	      DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
+	      c = CHAR_MINUS;
+	      goto process_end_range;
+	    }
+	  if (re + 1 >= re_end)
+	    {
+	      status = REG_EBRACK;
+	      goto error;
+	    }
+	  /* The hyphen is at the end */
+	  if (re[1] == CHAR_RBRACKET)
+	    {
+	      DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
+	      c = CHAR_MINUS;
+	      goto process_begin_range;
+	    }
+	  /* Two ranges are not allowed to share an endpoint, or begin
+	   * range is illegal. */
+	  if (range < 0)
+	    {
+	      status = REG_ERANGE;
+	      goto error;
+	    }
+	  range = 1; /* Expect end range */
+	  DPRINT(("tre_parse_bracket:   range: '%.*" STRF "'\n", REST(re)));
+	  break;
+
+	case CHAR_LBRACKET:
+	  if (re + 1 >= re_end)
+	    {
+	      status = REG_EBRACK;
+	      goto error;
+	    }
+	  switch (re[1])
+	    {
+	    case CHAR_PERIOD:
+	      {
+		re += 2;
+		start = re;
+		for (;; re++)
+		  {
+		    if (re >= re_end)
+		      {
+			status = REG_ECOLLATE;
+			goto error;
+		      }
+		    if (*re == CHAR_PERIOD)
+		      {
+			if (re + 1 >= re_end)
+			  {
+			    status = REG_ECOLLATE;
+			    goto error;
+			  }
+			/* Found end */
+			if (re[1] == CHAR_RBRACKET)
+			  {
+			    DPRINT(("tre_parse_bracket:   collating "
+				    "symbol: '%.*" STRF "'\n",
+				    REST(start - 2)));
+			    /* Empty name */
+			    if (re == start)
+			      {
+				status = REG_ECOLLATE;
+				goto error;
+			      }
+#ifdef BSD_COMPATIBILITY
+			    /* Check if the name is in cnames; if so, use
+			       the corresponding code */
+			    c = tre_search_cnames(start, re - start);
+			    if (c != (wchar_t)-1)
+			      {
+				re++;
+				goto process_single_character;
+			      }
+#endif /* BSD_COMPATIBILITY */
+			    /* Verify this is a known sequence */
+			    if (__collate_equiv_value(ctx->loc, start,
+							  re - start) <= 0)
+			      {
+				status = REG_ECOLLATE;
+				goto error;
+			      }
+			    /* Process single character collating symbols */
+			    if (re - start == 1)
+			      {
+				c = *start;
+				re++;
+				goto process_single_character;
+			      }
+			    /* Inverted MCCSs are undefined */
+			    if (invert)
+			      {
+				status = REG_ECOLLATE;
+				goto error;
+			      }
+			    /* Can't have MCCSs as an endpoint to a range */
+			    if (range > 0)
+			      {
+				status = REG_ERANGE;
+				goto error;
+			      }
+			    range = -1;
+			    /* Switch into MCCS collection mode (if not
+			     * already there */
+#if TRE_DEBUG
+			    if (!collect_MCCS)
+			      {
+				collect_MCCS = 1;
+				DPRINT(("tre_parse_bracket: Detected MCCS\n"));
+			      }
+#else /* !TRE_DEBUG */
+			    collect_MCCS = 1;
+#endif /* !TRE_DEBUG */
+			    /* Allocate a memory block the first time */
+			    if (!cp)
+			      {
+				if ((col_syms = xmalloc(sizeof(*col_syms) *
+					    (START_COLLATING_SYMBOLS + 2)))
+					    == NULL)
+				  return REG_ESPACE;
+				cp = col_syms + 1;
+				n_col_syms = START_COLLATING_SYMBOLS;
+			      }
+			    /* Enlarge the memory block is more is needed */
+			    if ((cp - col_syms) - 1 >= n_col_syms)
+			      {
+				int i = n_col_syms;
+				tre_collating_symbol *tmp =
+				    xrealloc(col_syms, sizeof(*col_syms) *
+					     ((n_col_syms *= 2) + 2));
+				if (tmp == NULL)
+				  {
+				    xfree(col_syms);
+				    return REG_ESPACE;
+				  }
+				DPRINT(("tre_list_collating_symbols: "
+					"Enlarging col_syms to %d\n",
+					n_col_syms));
+				col_syms = tmp;
+				cp = col_syms + i + 1;
+			      }
+			    cp->start = start;
+			    cp->len = re - start;
+			    cp++;
+			    re++;
+			    break;
+			  }
+		      }
+		  }
+		break;
+	      }
+
+	    case CHAR_EQUAL:
+	    case CHAR_COLON:
+	      {
+		/* Process equivalence and character classes */
+		tre_char_t kind = re[1];
+
+		/* Can't have a class as an endpoint to a range */
+		if (range > 0)
+		  {
+		    status = REG_ERANGE;
+		    goto error;
+		  }
+		if (!collect_MCCS && range == 0)
+		  {
+		    status = tre_new_item(ctx->mem, TRE_BRACKET_MATCH_TYPE_CHAR,
+					  min, &max_i, items);
+		    if (status != REG_OK)
+		      goto error;
+		  }
+		range = -1;
+		re += 2;
+		start = re;
+		for (;; re++)
+		  {
+		    if (re >= re_end)
+		      {
+			status = kind == CHAR_EQUAL ? REG_ECOLLATE : REG_ECTYPE;
+			goto error;
+		      }
+		    if (*re == kind)
+		      {
+			if (re + 1 >= re_end)
+			  {
+			    status = kind == CHAR_EQUAL ? REG_ECOLLATE :
+							  REG_ECTYPE;
+			    goto error;
+			  }
+			/* Found end */
+			if (re[1] == CHAR_RBRACKET)
+			  {
+			    if (re == start)
+			      {
+				/* Empty class name */
+				status = kind == CHAR_EQUAL ? REG_ECOLLATE :
+							      REG_ECTYPE;
+				goto error;
+			      }
+			    /* Process equivalence class */
+			    if (kind == CHAR_EQUAL)
+			      {
+				int equiv;
+
+				DPRINT(("tre_parse_bracket:   equivalence: '%.*"
+					STRF "'\n", REST(start - 2)));
+
+				/* While we find the collation value even for
+				   multi-character collating elements , we
+				   don't (yet) match any collation values
+				   against multi-character sequences.  We'd have
+				   to enumerate those multi-character sequences
+				   and like multi-character collating symbols,
+				   create a union of those sequences with the
+				   rest of the bracket expression.  While
+				   doable, a bracket expression matching
+				   multiple characters, that doesn't explicitly
+				   contain multi-character sequences, might
+				   be unexpected, so we punt for now. */
+				if ((equiv = __collate_equiv_value(ctx->loc,
+					     start, re - start)) <= 0)
+				  {
+				    /* The standard says that if no collating
+				       element if found, we use the collating
+				       symbol itself.  But __collate_equiv_value
+				       doesn't make a distinction between
+				       an element that is in a equvalence
+				       class with others, or is the only member,
+				       so we already know there is no collating
+				       symbol.  (Note that in the case of a
+				       collating element whose collation value
+				       is unique, matching against the
+				       collating element itself, or against
+				       its collation value, is equivalent.) */
+#ifdef BSD_COMPATIBILITY
+				    /* Check if the name is in cnames; if so,
+				       use the corresponding code */
+				    c = tre_search_cnames(start, re - start);
+				    if (c != (wchar_t)-1)
+				      {
+					re++;
+					goto process_single_character;
+				      }
+#endif /* BSD_COMPATIBILITY */
+				    status = REG_ECOLLATE;
+				    goto error;
+				  }
+				if (!collect_MCCS)
+				  {
+				    status = tre_new_item(ctx->mem,
+					     TRE_BRACKET_MATCH_TYPE_EQUIVALENCE,
+					     equiv, &max_i, items);
+				    if (status != REG_OK)
+				      goto error;
+				  }
+			      }
+			    else
+			      {
+				/* Process character class */
+				DPRINT(("tre_parse_bracket:  class: '%.*" STRF
+					"'\n", REST(start - 2)));
+				if (!collect_MCCS)
+				  {
+				    char tmp_str[64];
+				    tre_ctype_t class;
+				    int len = MIN(re - start, 63);
+#ifdef TRE_WCHAR
+				    {
+				      tre_char_t tmp_wcs[64];
+				      wcsncpy(tmp_wcs, start, (size_t)len);
+				      tmp_wcs[len] = L'\0';
+#if defined HAVE_WCSRTOMBS
+				      {
+					mbstate_t state;
+					const tre_char_t *src = tmp_wcs;
+					memset(&state, '\0', sizeof(state));
+					len = wcsrtombs_l(tmp_str, &src,
+						      sizeof(tmp_str), &state,
+						      ctx->loc);
+				      }
+#elif defined HAVE_WCSTOMBS
+				      len = wcstombs(tmp_str, tmp_wcs, 63);
+#endif /* defined HAVE_WCSTOMBS */
+				    }
+#else /* !TRE_WCHAR */
+				    strncpy(tmp_str, (const char*)start, len);
+#endif /* !TRE_WCHAR */
+				    tmp_str[len] = '\0';
+				    DPRINT(("  class name: %s\n", tmp_str));
+				    class = tre_ctype_l(tmp_str, ctx->loc);
+				    if (!class)
+				      {
+					status = REG_ECTYPE;
+					goto error;
+				      }
+				    status = tre_new_item(ctx->mem,
+					     TRE_BRACKET_MATCH_TYPE_CLASS,
+					     class, &max_i, items);
+				    if (status != REG_OK)
+				      goto error;
+				  }
+			      }
+			    re++;
+			    break;
+			  }
+		      }
+		  }
+		other++;
+		break;
+	      }
+
+	    default:
+	      DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
+	      c = CHAR_LBRACKET;
+	      goto process_single_character;
+	      break;
+	    }
+	  break;
+
+	case CHAR_RBRACKET:
+	  /* A first right bracket */
+	  if (re == ctx->re)
+	    {
+	      DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
+	      min = CHAR_RBRACKET;
+	      range = 0;
+	      other++;
+	      break;
+	    }
+	  /* Done */
+	  if (collect_MCCS)
+	    {
+	      DPRINT(("tre_parse_bracket:	done: '%.*" STRF "'\n",
+		      REST(re)));
+	      if (col_syms)
+		{
+		  /* Mark the character following the right bracket.  Set len
+		   * to whether there are other things besides the
+		   * multi-character collating symbols */
+		  col_syms->start = re + 1;
+		  col_syms->len = other;
+		  /* Mark the end of the list */
+		  cp->start = NULL;
+		}
+	      *result = col_syms;
+	      return REG_OK;
+	    }
+	  /* range > 0 is not possible, since we did a lookahead after the
+	   * hyphen */
+	  if (range == 0)
+	    {
+	      status = tre_new_item(ctx->mem, TRE_BRACKET_MATCH_TYPE_CHAR,
+				    min, &max_i, items);
+	      if (status != REG_OK)
+		goto error;
+	    }
+	  DPRINT(("tre_parse_bracket:	done: '%.*" STRF "'\n", REST(re)));
+	  *items_size = max_i;
+	  ctx->re = re + 1;
+	  return REG_OK;
+
+	default:
+	  DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
+	  c = *re;
+process_single_character:
+	  /* Process single character */
+	  if (range > 0)
+	    {
+	      int mine, maxe;
+
+process_end_range:
+	      /* Get collation equivalence values */
+	      mine = __collate_equiv_value(ctx->loc, &min, 1);
+	      maxe = __collate_equiv_value(ctx->loc, &c, 1);
+	      if (maxe < mine)
+		{
+		  status = REG_ERANGE;
+		  goto error;
+		}
+	      if (!collect_MCCS)
+		{
+		  status = tre_new_item(ctx->mem,
+					TRE_BRACKET_MATCH_TYPE_RANGE_BEGIN,
+					mine, &max_i, items);
+		  if (status != REG_OK)
+		    goto error;
+		  status = tre_new_item(ctx->mem,
+					TRE_BRACKET_MATCH_TYPE_RANGE_END,
+					maxe, &max_i, items);
+		  if (status != REG_OK)
+		    goto error;
+		}
+	      range = -1;
+	    }
+	  else
+	    {
+process_begin_range:
+	      if (!collect_MCCS)
+		{
+		  if (range == 0)
+		    {
+		      status = tre_new_item(ctx->mem,
+					    TRE_BRACKET_MATCH_TYPE_CHAR,
+					    min, &max_i, items);
+		      if (status != REG_OK)
+			goto error;
+		    }
+		  min = c;
+		}
+	      range = 0;
+	    }
+	  other++;
+	  break;
+	}
+    }
+  status = REG_EBRACK;
+error:
+  DPRINT(("tre_parse_bracket:	error: '%.*" STRF "', status=%d\n",
+	  REST(re), status));
+  if (col_syms)
+    xfree(col_syms);
+  return status;
+}
+
+#ifdef TRE_DEBUG
+static const char *bracket_match_type_str[] = {
+  "unused",
+  "char",
+  "range begin",
+  "range end",
+  "class",
+  "equivalence value",
+};
+#endif /* TRE_DEBUG */
+
+static reg_errcode_t
+tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
+{
+  tre_ast_node_t *node;
+  reg_errcode_t status = REG_OK;
+  tre_bracket_match_list_t *items;
+  int max_i = 32;
+  tre_collating_symbol *col_syms = NULL;
+
+  /* Handle special cases [[:<:]] and [[:>:]] */
+  if (ctx->re_end - ctx->re >= 6 && ctx->re[0] == CHAR_LBRACKET
+      && ctx->re[1] == CHAR_COLON && (ctx->re[2] == L'<' || ctx->re[2] == L'>')
+      && ctx->re[3] == CHAR_COLON && ctx->re[4] == CHAR_RBRACKET
+      && ctx->re[5] == CHAR_RBRACKET)
+    {
+      *result = tre_ast_new_literal(ctx->mem, ASSERTION,
+		      (ctx->re[2] == L'<') ? ASSERT_AT_BOW : ASSERT_AT_EOW,
+		      -1);
+      DPRINT(("tre_parse_bracket: special case %s\n", (ctx->re[2] == L'<') ?
+	      "[[:<:]]" : "[[:>:]]"));
+      ctx->re += 6;
+      return *result ? REG_OK : REG_ESPACE;
+    }
+
+  /* Start off with an array of `max_i' elements. */
+  items = xcalloc(1, SIZEOF_BRACKET_MATCH_LIST_N(max_i));
+  if (items == NULL)
+    return REG_ESPACE;
+
+  if (*ctx->re == CHAR_CARET)
+    {
+      DPRINT(("tre_parse_bracket: negate: '%.*" STRF "'\n", REST(ctx->re)));
+      items->flags |= TRE_BRACKET_MATCH_FLAG_NEGATE;
+      ctx->re++;
+    }
+
+  status = tre_parse_bracket_items(ctx, &items, &max_i, &col_syms);
+
+  if (status != REG_OK)
+    goto parse_bracket_done;
+
+  /* If there are collating symbols, split off the multi-character ones
+   * into a union of the bracket expression (without the collating symbols)
+   * and the multiple-character sequences.  We create an equivalent input
+   * string and run tre_parse() recursively */
+  if (col_syms)
+    {
+      tre_char_t *str, *sp;
+      tre_collating_symbol *cp;
+      tre_parse_ctx_t subctx;
+
+      /* Allocate a new string.  We start with the size of the original
+       * bracket expression (minus 1) and add 2 (for a leading "[" and
+       * a trailing nil; don't need a "^", since it is illegal to have
+       * inverted MCCSs).  Since a multi-character collating symbols
+       * will be converted from "[.xx.]" to "|xx" (n+4 to n+1), we don't
+       * need to worry about the new string getting too long. */
+      xfree(items);
+      str = xmalloc(sizeof(*str) * ((col_syms->start - ctx->re) + 2));
+      if (str == NULL)
+	{
+	  xfree(col_syms);
+	  return REG_ESPACE;
+	}
+      sp = str;
+      if (col_syms->len > 0)
+	{
+	  /* There are other items in the bracket expression besides the
+	   * multi-character collating symbols, so create a new bracket
+	   * expression with only those other itmes. */
+	  const tre_char_t *re;
+	  ptrdiff_t i;
+
+	  *sp++ = '[';
+	  re = ctx->re;
+	  for (cp = col_syms + 1; cp->start; cp++)
+	    {
+	      /* The "- 2" is to account for the "[." */
+	      if ((i = ((cp->start - re) - 2)) > 0)
+		{
+		  memcpy(sp, re, sizeof(*sp) * i);
+		  sp += i;
+		}
+	      /* The "+ 2" is to account for the ".]" */
+	      re = cp->start + cp->len + 2;
+	    }
+	    i = col_syms->start - re; /* Includes the trailing right bracket */
+	    memcpy(sp, re, sizeof(*sp) * i);
+	    sp += i;
+	    *sp++ = '|';
+	}
+      for (cp = col_syms + 1; cp->start; cp++)
+	{
+	  memcpy(sp, cp->start, sizeof(*sp) * cp->len);
+	  sp += cp->len;
+	  if (cp[1].start)
+	    *sp++ = '|';
+	}
+      *sp = 0;
+      DPRINT(("tre_parse_bracket: Reparsing bracket expression with '%ls'\n",
+	      str));
+
+      memcpy(&subctx, ctx, sizeof(subctx));
+      subctx.re = str;
+      subctx.len = sp - str;
+      subctx.nofirstsub = 1;
+      subctx.cflags |= REG_EXTENDED; /* Force extended mode for parsing */
+      status = tre_parse(&subctx);
+      xfree(str);
+      if (status != REG_OK)
+	{
+	  xfree(col_syms);
+	  return status;
+	}
+      ctx->re = col_syms->start;
+      ctx->position = subctx.position;
+      xfree(col_syms);
+      *result = subctx.result;
+      DPRINT(("tre_parse_bracket: Returning to original string\n"));
+      return REG_OK;
+    }
+
+  DPRINT(("tre_parse_bracket: creating bracket expression literal\n"));
+  node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position);
+  if (node == NULL)
+    {
+      status = REG_ESPACE;
+      goto parse_bracket_done;
+    }
+  else
+    {
+      tre_literal_t *l = node->obj;
+      l->u.bracket_match_list = tre_mem_alloc(ctx->mem,
+					 SIZEOF_BRACKET_MATCH_LIST(items));
+      if (l->u.bracket_match_list == NULL)
+	{
+	  status = REG_ESPACE;
+	  goto parse_bracket_done;
+	}
+      memcpy(l->u.bracket_match_list, items, SIZEOF_BRACKET_MATCH_LIST(items));
+    }
+
+#ifdef TRE_DEBUG
+  {
+    int i;
+    tre_bracket_match_t *b;
+    DPRINT(("tre_parse_bracket: %d bracket match items, flags 0x%x\n",
+	    items->num_bracket_matches, items->flags));
+    for (i = 0, b = items->bracket_matches;
+	 i < items->num_bracket_matches; i++, b++)
+      {
+	DPRINT(("   %d: %s %d\n", i, bracket_match_type_str[b->type],
+		b->value));
+      }
+  }
+#endif /* TRE_DEBUG */
+
+ parse_bracket_done:
+  xfree(items);
+  ctx->position++;
+  *result = node;
+  return status;
+}
+
+
+/* Parses a positive decimal integer.  Returns -1 if the string does not
+   contain a valid number. */
+static int
+tre_parse_int(const tre_char_t **regex, const tre_char_t *regex_end)
+{
+  int num = -1;
+  const tre_char_t *r = *regex;
+  while (r < regex_end && *r >= L'0' && *r <= L'9')
+    {
+      if (num < 0)
+	num = 0;
+      num = num * 10 + *r - L'0';
+      r++;
+    }
+  *regex = r;
+  return num;
+}
+
+
+static reg_errcode_t
+tre_parse_bound(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
+{
+  int min, max;
+#ifdef TRE_APPROX
+  int i;
+  int cost_ins, cost_del, cost_subst, cost_max;
+  int limit_ins, limit_del, limit_subst, limit_err;
+  const tre_char_t *start;
+#endif /* TRE_APPROX */
+  const tre_char_t *r = ctx->re;
+  int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
+#ifdef TRE_APPROX
+  int approx = 0;
+  int costs_set = 0;
+  int counts_set = 0;
+
+  cost_ins = cost_del = cost_subst = cost_max = TRE_PARAM_UNSET;
+  limit_ins = limit_del = limit_subst = limit_err = TRE_PARAM_UNSET;
+#endif /* TRE_APPROX */
+
+  /* Parse number (minimum repetition count). */
+  min = -1;
+  if (r >= ctx->re_end)
+#ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
+    return (ctx->cflags & REG_EXTENDED) ? REG_NOMATCH : REG_EBRACE;
+#else /* !ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
+    return REG_EBRACE;
+#endif /* !ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
+  if (*r >= L'0' && *r <= L'9') {
+    DPRINT(("tre_parse:	  min count: '%.*" STRF "'\n", REST(r)));
+    min = tre_parse_int(&r, ctx->re_end);
+  }
+#ifndef TRE_APPROX
+  else
+#ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
+      /* For ERE, return REG_NOMATCH to signal that the lbrace should
+         be treated as a literal */
+      return (ctx->cflags & REG_EXTENDED) ? REG_NOMATCH : REG_BADBR;
+#else /* !ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
+      return REG_BADBR;
+#endif /* !ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
+#endif /* !TRE_APPROX */
+
+  /* Parse comma and second number (maximum repetition count). */
+  max = min;
+  if (r < ctx->re_end && *r == CHAR_COMMA)
+    {
+      r++;
+      DPRINT(("tre_parse:   max count: '%.*" STRF "'\n", REST(r)));
+      max = tre_parse_int(&r, ctx->re_end);
+    }
+
+  /* Check that the repeat counts are sane. */
+  if ((max >= 0 && min > max) || min > RE_DUP_MAX || max > RE_DUP_MAX)
+    return REG_BADBR;
+
+
+#ifdef TRE_APPROX
+  /*
+   '{'
+     optionally followed immediately by a number == minimum repcount
+     optionally followed by , then a number == maximum repcount
+      + then a number == maximum insertion count
+      - then a number == maximum deletion count
+      # then a number == maximum substitution count
+      ~ then a number == maximum number of errors
+      Any of +, -, # or ~ without followed by a number means that
+      the maximum count/number of errors is infinite.
+
+      An equation of the form
+	Xi + Yd + Zs < C
+      can be specified to set costs and the cost limit to a value
+      different from the default value:
+	- X is the cost of an insertion
+	- Y is the cost of a deletion
+	- Z is the cost of a substitution
+	- C is the maximum cost
+
+      If no count limit or cost is set for an operation, the operation
+      is not allowed at all.
+  */
+
+
+  do {
+    int done;
+    start = r;
+
+    /* Parse count limit settings */
+    done = 0;
+    if (!counts_set)
+      while (r + 1 < ctx->re_end && !done)
+	{
+	  switch (*r)
+	    {
+	    case CHAR_PLUS:  /* Insert limit */
+	      DPRINT(("tre_parse:   ins limit: '%.*" STRF "'\n", REST(r)));
+	      r++;
+	      limit_ins = tre_parse_int(&r, ctx->re_end);
+	      if (limit_ins < 0)
+		limit_ins = INT_MAX;
+	      counts_set = 1;
+	      break;
+	    case CHAR_MINUS: /* Delete limit */
+	      DPRINT(("tre_parse:   del limit: '%.*" STRF "'\n", REST(r)));
+	      r++;
+	      limit_del = tre_parse_int(&r, ctx->re_end);
+	      if (limit_del < 0)
+		limit_del = INT_MAX;
+	      counts_set = 1;
+	      break;
+	    case CHAR_HASH:  /* Substitute limit */
+	      DPRINT(("tre_parse: subst limit: '%.*" STRF "'\n", REST(r)));
+	      r++;
+	      limit_subst = tre_parse_int(&r, ctx->re_end);
+	      if (limit_subst < 0)
+		limit_subst = INT_MAX;
+	      counts_set = 1;
+	      break;
+	    case CHAR_TILDE: /* Maximum number of changes */
+	      DPRINT(("tre_parse: count limit: '%.*" STRF "'\n", REST(r)));
+	      r++;
+	      limit_err = tre_parse_int(&r, ctx->re_end);
+	      if (limit_err < 0)
+		limit_err = INT_MAX;
+	      approx = 1;
+	      break;
+	    case CHAR_COMMA:
+	      r++;
+	      break;
+	    case L' ':
+	      r++;
+	      break;
+	    case L'}':
+	      done = 1;
+	      break;
+	    default:
+	      done = 1;
+	      break;
+	    }
+	}
+
+    /* Parse cost restriction equation. */
+    done = 0;
+    if (!costs_set)
+      while (r + 1 < ctx->re_end && !done)
+	{
+	  switch (*r)
+	    {
+	    case CHAR_PLUS:
+	    case L' ':
+	      r++;
+	      break;
+	    case L'<':
+	      DPRINT(("tre_parse:    max cost: '%.*" STRF "'\n", REST(r)));
+	      r++;
+	      while (*r == L' ')
+		r++;
+	      cost_max = tre_parse_int(&r, ctx->re_end);
+	      if (cost_max < 0)
+		cost_max = INT_MAX;
+	      else
+		cost_max--;
+	      approx = 1;
+	      break;
+	    case CHAR_COMMA:
+	      r++;
+	      done = 1;
+	      break;
+	    default:
+	      if (*r >= L'0' && *r <= L'9')
+		{
+#ifdef TRE_DEBUG
+		  const tre_char_t *sr = r;
+#endif /* TRE_DEBUG */
+		  int cost = tre_parse_int(&r, ctx->re_end);
+		  /* XXX - make sure r is not past end. */
+		  switch (*r)
+		    {
+		    case L'i':	/* Insert cost */
+		      DPRINT(("tre_parse:    ins cost: '%.*" STRF "'\n",
+			      REST(sr)));
+		      r++;
+		      cost_ins = cost;
+		      costs_set = 1;
+		      break;
+		    case L'd':	/* Delete cost */
+		      DPRINT(("tre_parse:    del cost: '%.*" STRF "'\n",
+			      REST(sr)));
+		      r++;
+		      cost_del = cost;
+		      costs_set = 1;
+		      break;
+		    case L's':	/* Substitute cost */
+		      DPRINT(("tre_parse:  subst cost: '%.*" STRF "'\n",
+			      REST(sr)));
+		      r++;
+		      cost_subst = cost;
+		      costs_set = 1;
+		      break;
+		    default:
+		      return REG_BADBR;
+		    }
+		}
+	      else
+		{
+		  done = 1;
+		  break;
+		}
+	    }
+	}
+  } while (start != r);
+#endif /* TRE_APPROX */
+
+  /*{*//* Missing }. */
+  if (r >= ctx->re_end)
+    return REG_EBRACE;
+
+  /* Empty contents of {}. */
+  if (r == ctx->re)
+    return REG_BADBR;
+
+  /* Parse the ending '}' or '\}'.*/
+  if (ctx->cflags & REG_EXTENDED)
+    {
+      if (r >= ctx->re_end || *r != CHAR_RBRACE)
+	return REG_BADBR;
+      r++;
+      /* Parse trailing '?' marking minimal repetition. */
+      if (r < ctx->re_end)
+	{
+	  if (*r == CHAR_QUESTIONMARK)
+	    {
+	      /* Process the question mark only in enhanced mode.
+		 Otherwise, the question mark is an error in ERE
+		 or a literal in BRE */
+	      if (ctx->cflags & REG_ENHANCED)
+		{
+		  minimal = !(ctx->cflags & REG_UNGREEDY);
+		  r++;
+		}
+	      else return REG_BADRPT;
+	    }
+	  else if (*r == CHAR_STAR || *r == CHAR_PLUS)
+	    {
+	      /* These are reserved for future extensions. */
+	      return REG_BADRPT;
+	    }
+	}
+    }
+  else
+    {
+      if (r + 1 >= ctx->re_end
+	  || *r != CHAR_BACKSLASH
+	  || *(r + 1) != CHAR_RBRACE)
+	return REG_BADBR;
+      r += 2;
+      if (r < ctx->re_end && *r == CHAR_STAR)
+	{
+	  /* This is reserved for future extensions. */
+	  return REG_BADRPT;
+	}
+    }
+
+  if (minimal)
+    ctx->num_reorder_tags++;
+
+  if (!result) goto parse_bound_exit;
+  /* Create the AST node(s). */
+  /* Originally, if min == 0 && max == 0, we immediately replace the whole
+     iteration with EMPTY.  This unfortunately drops any submatches, and
+     messes up setting the pmatch values (we can get tags of -1, and
+     tag values in the billions).  So we leave it and process this case as
+     usual, and wait until tre_expand_ast() to replace with EMPTY */
+#ifdef TRE_APPROX
+  if (min < 0 && max < 0)
+    /* Only approximate parameters set, no repetitions. */
+    min = max = 1;
+#endif /* TRE_APPROX */
+
+  *result = tre_ast_new_iter(ctx->mem, *result, min, max, minimal);
+  if (!*result)
+    return REG_ESPACE;
+
+#ifdef TRE_APPROX
+  /* If approximate matching parameters are set, add them to the
+     iteration node. */
+  if (approx || costs_set || counts_set)
+    {
+      int *params;
+      tre_iteration_t *iter = (*result)->obj;
+
+      if (costs_set || counts_set)
+	{
+	  if (limit_ins == TRE_PARAM_UNSET)
+	    {
+	      if (cost_ins == TRE_PARAM_UNSET)
+		limit_ins = 0;
+	      else
+		limit_ins = INT_MAX;
+	    }
+
+	  if (limit_del == TRE_PARAM_UNSET)
+	    {
+	      if (cost_del == TRE_PARAM_UNSET)
+		limit_del = 0;
+	      else
+		limit_del = INT_MAX;
+	    }
+
+	  if (limit_subst == TRE_PARAM_UNSET)
+	    {
+	      if (cost_subst == TRE_PARAM_UNSET)
+		limit_subst = 0;
+	      else
+		limit_subst = INT_MAX;
+	    }
+	}
+
+      if (cost_max == TRE_PARAM_UNSET)
+	cost_max = INT_MAX;
+      if (limit_err == TRE_PARAM_UNSET)
+	limit_err = INT_MAX;
+
+      ctx->have_approx = 1;
+      params = tre_mem_alloc(ctx->mem, sizeof(*params) * TRE_PARAM_LAST);
+      if (!params)
+	return REG_ESPACE;
+      for (i = 0; i < TRE_PARAM_LAST; i++)
+	params[i] = TRE_PARAM_UNSET;
+      params[TRE_PARAM_COST_INS] = cost_ins;
+      params[TRE_PARAM_COST_DEL] = cost_del;
+      params[TRE_PARAM_COST_SUBST] = cost_subst;
+      params[TRE_PARAM_COST_MAX] = cost_max;
+      params[TRE_PARAM_MAX_INS] = limit_ins;
+      params[TRE_PARAM_MAX_DEL] = limit_del;
+      params[TRE_PARAM_MAX_SUBST] = limit_subst;
+      params[TRE_PARAM_MAX_ERR] = limit_err;
+      iter->params = params;
+    }
+#endif /* TRE_APPROX */
+
+parse_bound_exit:
+#ifdef TRE_APPROX
+  DPRINT(("tre_parse_bound: min %d, max %d, costs [%d,%d,%d, total %d], "
+	  "limits [%d,%d,%d, total %d]\n",
+	  min, max, cost_ins, cost_del, cost_subst, cost_max,
+	  limit_ins, limit_del, limit_subst, limit_err));
+#else /* !TRE_APPROX */
+  DPRINT(("tre_parse_bound: min %d, max %d\n", min, max));
+#endif /* !TRE_APPROX */
+
+
+  ctx->re = r;
+  return REG_OK;
+}
+
+/* Previously, we had PARSE_RESTORE_CFLAGS restore the cflags, but for
+   non-self-contained options, like (?i), this causes ((?i)fu)bar to be
+   treated more like ((?i)fu(?-i)bar), so the pmatch value is incorrect.
+   Because we now set up tags for even non-capturing parenthesized
+   subexpressions, we always call PARSE_MARK_FOR_SUBMATCH.  So if we
+   pass the unmodified version of cflags to PARSE_MARK_FOR_SUBMATCH and
+   have it restore cflags after the subexpression, we don't need to have
+   a separate PARSE_RESTORE_CFLAGS, and then after processing the
+   non-self-contained option, we can call PARSE_ATOM instead of PARSE_RE.
+   This has the side-benefit of now matching the perl behavior: the RE
+   foo(?i)bar|zap is foo(?i)bar OR (?i)zap instead of TRE previous behavior
+   of foo AND (?i) (bar OR zap). */
+typedef enum {
+  PARSE_RE = 0,
+  PARSE_ATOM,
+  PARSE_MARK_FOR_SUBMATCH,
+  PARSE_BRANCH,
+  PARSE_PIECE,
+  PARSE_CATENATION,
+  PARSE_POST_CATENATION,
+  PARSE_UNION,
+  PARSE_POST_UNION,
+  PARSE_POSTFIX,
+} tre_parse_re_stack_symbol_t;
+
+
+reg_errcode_t
+tre_parse(tre_parse_ctx_t *ctx)
+{
+  tre_ast_node_t *result = NULL;
+  tre_parse_re_stack_symbol_t symbol;
+  reg_errcode_t status = REG_OK;
+  tre_stack_t *stack = ctx->stack;
+  int bottom = tre_stack_num_objects(stack);
+  int depth = 0;
+  int temporary_cflags = 0;
+  int bre_branch_begin;
+#ifdef TRE_DEBUG
+  const tre_char_t *tmp_re;
+#endif
+
+  DPRINT(("tre_parse: parsing '%.*" STRF "', len = %d cflags = 0%o\n",
+	  ctx->len, ctx->re, ctx->len, ctx->cflags));
+
+  if (ctx->len <= 0) return REG_EMPTY;
+  if (!ctx->nofirstsub)
+    {
+      STACK_PUSH(stack, int, ctx->cflags);
+      STACK_PUSH(stack, int, ctx->submatch_id);
+      STACK_PUSH(stack, int, PARSE_MARK_FOR_SUBMATCH);
+      ctx->submatch_id++;
+    }
+  STACK_PUSH(stack, int, 0); // bre_branch_begin
+  STACK_PUSH(stack, int, PARSE_RE);
+  ctx->re_start = ctx->re;
+  ctx->re_end = ctx->re + ctx->len;
+
+
+  /* The following is basically just a recursive descent parser.  I use
+     an explicit stack instead of recursive functions mostly because of
+     two reasons: compatibility with systems which have an overflowable
+     call stack, and efficiency (both in lines of code and speed).  */
+  while (tre_stack_num_objects(stack) > bottom)
+    {
+      symbol = tre_stack_pop_int(stack);
+      switch (symbol)
+	{
+	case PARSE_RE:
+	  /* Parse a full regexp.  A regexp is one or more branches,
+	     separated by the union operator `|'. */
+	  bre_branch_begin = tre_stack_pop_int(stack);
+	  if (
+#ifdef REG_LITERAL
+	      !(ctx->cflags & REG_LITERAL) &&
+#endif /* REG_LITERAL */
+	      ctx->cflags & (REG_EXTENDED | REG_ENHANCED))
+	    STACK_PUSHX(stack, int, PARSE_UNION);
+	  STACK_PUSHX(stack, int, bre_branch_begin);
+	  STACK_PUSHX(stack, int, PARSE_BRANCH);
+	  break;
+
+	case PARSE_BRANCH:
+	  /* Parse a branch.  A branch is one or more pieces, concatenated.
+	     A piece is an atom possibly followed by a postfix operator. */
+	  bre_branch_begin = tre_stack_pop_int(stack);
+	  STACK_PUSHX(stack, int, PARSE_CATENATION);
+	  STACK_PUSHX(stack, int, bre_branch_begin);
+	  STACK_PUSHX(stack, int, PARSE_PIECE);
+	  break;
+
+	case PARSE_PIECE:
+	  /* Parse a piece.  A piece is an atom possibly followed by one
+	     or more postfix operators. */
+	  bre_branch_begin = tre_stack_pop_int(stack);
+	  STACK_PUSHX(stack, int, PARSE_POSTFIX);
+	  STACK_PUSHX(stack, int, bre_branch_begin);
+	  STACK_PUSHX(stack, int, PARSE_ATOM);
+	  break;
+
+	case PARSE_CATENATION:
+	  /* If the expression has not ended, parse another piece. */
+	  {
+	    tre_char_t c;
+	    if (ctx->re >= ctx->re_end)
+	      break;
+	    c = *ctx->re;
+#ifdef REG_LITERAL
+	    if (!(ctx->cflags & REG_LITERAL))
+	      {
+#endif /* REG_LITERAL */
+		if ((ctx->cflags & REG_EXTENDED && c == CHAR_PIPE) ||
+		    ((ctx->cflags & (REG_EXTENDED | REG_ENHANCED)) == REG_ENHANCED
+		    && ctx->re + 1 < ctx->re_end && c == CHAR_BACKSLASH &&
+		    *(ctx->re + 1) == CHAR_PIPE))
+		  break;
+		if ((ctx->cflags & REG_EXTENDED
+		     && c == CHAR_RPAREN && depth > 0)
+		    || (!(ctx->cflags & REG_EXTENDED)
+			&& ctx->re + 1 < ctx->re_end && c == CHAR_BACKSLASH
+			    && *(ctx->re + 1) == CHAR_RPAREN))
+		  {
+		    if (!(ctx->cflags & REG_EXTENDED) && depth == 0)
+		      return REG_EPAREN;
+		    DPRINT(("tre_parse:	  group end: '%.*" STRF "'\n",
+			    REST(ctx->re)));
+		    depth--;
+		    if (!(ctx->cflags & (REG_EXTENDED | REG_ENHANCED)))
+		      ctx->re += 2;
+		    break;
+		  }
+#ifdef REG_LITERAL
+	      }
+#endif /* REG_LITERAL */
+
+#ifdef REG_LEFT_ASSOC
+	    if (ctx->cflags & REG_LEFT_ASSOC)
+	      {
+		/* Left associative concatenation. */
+		STACK_PUSHX(stack, int, PARSE_CATENATION);
+		STACK_PUSHX(stack, voidptr, result);
+		STACK_PUSHX(stack, int, PARSE_POST_CATENATION);
+		STACK_PUSHX(stack, int, 0); // bre_branch_begin
+		STACK_PUSHX(stack, int, PARSE_PIECE);
+	      }
+	    else
+#endif /* REG_LEFT_ASSOC */
+	      {
+		/* Default case, right associative concatenation. */
+		STACK_PUSHX(stack, voidptr, result);
+		STACK_PUSHX(stack, int, PARSE_POST_CATENATION);
+		STACK_PUSHX(stack, int, PARSE_CATENATION);
+		STACK_PUSHX(stack, int, 0); // bre_branch_begin
+		STACK_PUSHX(stack, int, PARSE_PIECE);
+	      }
+	    break;
+	  }
+
+	case PARSE_POST_CATENATION:
+	  {
+	    tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
+	    tre_ast_node_t *tmp_node;
+	    tmp_node = tre_ast_new_catenation(ctx->mem, tree, result);
+	    if (!tmp_node)
+	      return REG_ESPACE;
+	    result = tmp_node;
+	    break;
+	  }
+
+	case PARSE_UNION:
+	  if (ctx->re >= ctx->re_end)
+	    break;
+#ifdef REG_LITERAL
+	  if (ctx->cflags & REG_LITERAL)
+	    break;
+#endif /* REG_LITERAL */
+	  if (!(ctx->cflags & REG_EXTENDED))
+	    {
+	      if (*ctx->re != CHAR_BACKSLASH || ctx->re + 1 >= ctx->re_end)
+		break;
+	      ctx->re++;
+	    }
+	  switch (*ctx->re)
+	    {
+	    case CHAR_PIPE:
+	      DPRINT(("tre_parse:	union: '%.*" STRF "'\n",
+		      REST(ctx->re)));
+	      STACK_PUSHX(stack, int, PARSE_UNION);
+	      STACK_PUSHX(stack, voidptr, (void *)ctx->re);
+	      STACK_PUSHX(stack, voidptr, result);
+	      STACK_PUSHX(stack, int, PARSE_POST_UNION);
+	      /* We need to pass a boolean (eventually) to PARSE_ATOM to
+		 indicate if this is the beginning of a BRE extended branch. */
+	      STACK_PUSHX(stack, int, (ctx->cflags & (REG_EXTENDED | REG_ENHANCED)) == REG_ENHANCED); // bre_branch_begin
+	      STACK_PUSHX(stack, int, PARSE_BRANCH);
+	      ctx->re++;
+	      break;
+
+	    case CHAR_RPAREN:
+	      ctx->re++;
+	      break;
+
+	    default:
+	      if (!(ctx->cflags & REG_EXTENDED))
+		ctx->re--;
+	      break;
+	    }
+	  break;
+
+	case PARSE_POST_UNION:
+	  {
+	    tre_ast_node_t *tmp_node;
+	    tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
+	    const tre_char_t *pipechar = tre_stack_pop_voidptr(stack);
+	    /* error on empty expression at end of union */
+	    if (pipechar == ctx->re - 1)
+	      {
+		return REG_EMPTY;
+	      }
+	    tmp_node = tre_ast_new_union(ctx->mem, tree, result);
+	    if (!tmp_node)
+	      return REG_ESPACE;
+	    result = tmp_node;
+	    break;
+	  }
+
+	case PARSE_POSTFIX:
+	  /* Parse postfix operators. */
+	  if (ctx->re >= ctx->re_end)
+	    break;
+#ifdef REG_LITERAL
+	  if (ctx->cflags & REG_LITERAL)
+	    break;
+#endif /* REG_LITERAL */
+	  int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
+	  int rep_min = 0;
+	  int rep_max = -1;
+#ifdef TRE_DEBUG
+	  int lbrace_off;
+#endif
+	  switch (*ctx->re)
+	    {
+	    case CHAR_PLUS:
+	    case CHAR_QUESTIONMARK:
+	      if (!(ctx->cflags & REG_EXTENDED))
+		break;
+		/*FALLTHROUGH*/
+	    case CHAR_STAR:
+	      {
+		tre_ast_node_t *tmp_node;
+#ifdef TRE_DEBUG
+		const char *tstr = "star";
+		tmp_re = ctx->re;
+#endif
+
+	handle_plus_or_question:
+		/* error on iteration of raw assertion (not in subexpression) */
+		if (result->type == LITERAL && result->submatch_id < 0 &&
+		    IS_ASSERTION((tre_literal_t *)result->obj))
+		  {
+		    if (!(ctx->cflags & REG_EXTENDED)) break;
+		    return REG_BADRPT;
+		  }
+		if (*ctx->re == CHAR_PLUS)
+		  {
+		    rep_min = 1;
+#ifdef TRE_DEBUG
+		    tstr = "plus";
+#endif
+		  }
+		if (*ctx->re == CHAR_QUESTIONMARK)
+		  {
+		    rep_max = 1;
+#ifdef TRE_DEBUG
+		    tstr = "questionmark";
+#endif
+		  }
+
+		if (ctx->cflags & REG_EXTENDED)
+		  {
+		    if (ctx->re + 1 < ctx->re_end)
+		      {
+			if (*(ctx->re + 1) == CHAR_QUESTIONMARK)
+			  {
+			    /* Process the question mark only in enhanced mode.
+			       Otherwise, the question mark is an error in ERE */
+			    if (ctx->cflags & REG_ENHANCED)
+			      {
+				minimal = !(ctx->cflags & REG_UNGREEDY);
+				ctx->re++;
+			      }
+			    else return REG_BADRPT;
+			  }
+			else if (*(ctx->re + 1) == CHAR_STAR
+				 || *(ctx->re + 1) == CHAR_PLUS)
+			  {
+			    /* These are reserved for future extensions. */
+			    return REG_BADRPT;
+			  }
+		      }
+		  }
+		else
+		  {
+		    if (ctx->re + 1 < ctx->re_end && *(ctx->re + 1) == CHAR_STAR)
+		      {
+			/* This is reserved for future extensions. */
+			return REG_BADRPT;
+		      }
+		    if (ctx->re + 2 < ctx->re_end)
+		      {
+			if (*(ctx->re + 1) == CHAR_BACKSLASH && *(ctx->re + 1) == CHAR_QUESTIONMARK)
+			  {
+			    /* Process the question mark only in enhanced mode.
+			       Otherwise, the question mark is a literal in BRE */
+			    if (ctx->cflags & REG_ENHANCED)
+			      {
+				minimal = !(ctx->cflags & REG_UNGREEDY);
+				ctx->re += 2;
+			      }
+			  }
+			else if (*(ctx->re + 1) == CHAR_BACKSLASH && *(ctx->re + 2) == CHAR_PLUS)
+			  {
+			    /* This is reserved for future extensions. */
+			    return REG_BADRPT;
+			  }
+		      }
+		  }
+
+		if (minimal)
+		  ctx->num_reorder_tags++;
+
+		DPRINT(("tre_parse: %s %s: '%.*" STRF "'\n",
+			minimal ? "  minimal" : "greedy", tstr, REST(tmp_re)));
+		if (result == NULL)
+		  {
+		    if (ctx->cflags & REG_EXTENDED) return REG_BADRPT;
+		    else goto parse_literal;
+		  }
+		ctx->re++;
+		tmp_node = tre_ast_new_iter(ctx->mem, result, rep_min, rep_max,
+					    minimal);
+		if (tmp_node == NULL)
+		  return REG_ESPACE;
+		result = tmp_node;
+
+		/* Set the iterator with a submatch id in the invisible range
+		 * (which will be overridden if a real submatch is needed) */
+		result->submatch_id = ctx->submatch_id_invisible++;
+
+#if 0
+		/* We don't allow multiple postfixes, but this might be needed
+		   to support approximate matching */
+		STACK_PUSHX(stack, int, PARSE_POSTFIX);
+#endif
+	      }
+	      break;
+
+	    case CHAR_BACKSLASH:
+	      /* "\{" is special without REG_EXTENDED */
+	      /* "\+" and "\?" are special with REG_ENHANCED for BRE */
+	      if (!(ctx->cflags & REG_EXTENDED)
+		  && ctx->re + 1 < ctx->re_end)
+		{
+		  switch (*(ctx->re + 1))
+		    {
+		    case CHAR_LBRACE:
+		      ctx->re++;
+#ifdef TRE_DEBUG
+		      lbrace_off = 2;
+#endif
+		      goto parse_brace;
+		    case CHAR_PLUS:
+		    case CHAR_QUESTIONMARK:
+		      if (ctx->cflags & REG_ENHANCED)
+			{
+#ifdef TRE_DEBUG
+			  tmp_re = ctx->re;
+#endif
+			  ctx->re++;
+			  goto handle_plus_or_question;
+			}
+		      break;
+		    }
+		  break;
+		}
+	      else
+		break;
+
+	    case CHAR_LBRACE:
+	      {
+		int raw_assertion;
+
+		/* "{" is literal without REG_EXTENDED */
+		if (!(ctx->cflags & REG_EXTENDED))
+		  break;
+#ifdef TRE_DEBUG
+		lbrace_off = 1;
+#endif
+
+	    parse_brace:
+		/* error on iteration of raw assertion (not in subexpression),
+		   but wait until after parsing bounds */
+		raw_assertion = (result->type == LITERAL
+				 && result->submatch_id < 0
+				 && IS_ASSERTION((tre_literal_t *)result->obj));
+		ctx->re++;
+
+		status = tre_parse_bound(ctx, &result);
+#ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
+		/* For ERE, if status is REG_NOMATCH, this mean the lbrace
+		   is to be treated as a literal. */
+		if (status == REG_NOMATCH)
+		  {
+		    ctx->re--;
+		    break;
+		  }
+#endif /* ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
+		DPRINT(("tre_parse:	bound: '%.*" STRF "'\n",
+			REST(ctx->re - lbrace_off)));
+		if (status != REG_OK)
+		  return status;
+		if (raw_assertion) return REG_BADRPT;
+
+		/* Set the iterator with a submatch id in the invisible range
+		 * (which will be overridden if a real submatch is needed) */
+		if (result->type == ITERATION)
+		  result->submatch_id = ctx->submatch_id_invisible++;
+
+#if 0
+		/* We don't allow multiple postfixes, but this might be needed
+		   to support approximate matching */
+		STACK_PUSHX(stack, int, PARSE_POSTFIX);
+#endif
+		break;
+	      }
+	    }
+	  break;
+
+	case PARSE_ATOM:
+	  {
+	    /* Parse an atom.  An atom is a regular expression enclosed in `()',
+	       an empty set of `()', a bracket expression, `.', `^', `$',
+	       a `\' followed by a character, or a single character. */
+
+	    /* The stack contains a boolean value, whether PARSE_ATOM is
+	       being called just after the start of a group (left paren)
+	       in a BRE */
+	    bre_branch_begin = tre_stack_pop_int(stack);
+
+	    /* End of regexp? (empty string). */
+	    if (ctx->re >= ctx->re_end)
+	      goto parse_literal;
+
+#ifdef REG_LITERAL
+	    if (ctx->cflags & REG_LITERAL)
+	      goto parse_literal;
+#endif /* REG_LITERAL */
+
+	    switch (*ctx->re)
+	      {
+	      case CHAR_LPAREN:  /* parenthesized subexpression */
+
+		/* Handle "(?...)" extensions.  They work in a way similar
+		   to Perls corresponding extensions. */
+		if ((ctx->cflags & (REG_EXTENDED|REG_ENHANCED)) ==
+		    (REG_EXTENDED|REG_ENHANCED)
+		    && *(ctx->re + 1) == CHAR_QUESTIONMARK)
+		  {
+		    int new_cflags = ctx->cflags;
+		    int bit = 1;
+		    int invisible_submatch = 0;
+		    DPRINT(("tre_parse:	extension: '%.*" STRF "'\n",
+			    REST(ctx->re)));
+		    ctx->re += 2;
+		    while (/*CONSTCOND*/1)
+		      {
+			if (*ctx->re == L'i')
+			  {
+			    DPRINT(("tre_parse:	    icase: '%.*" STRF "'\n",
+				    REST(ctx->re)));
+			    if (bit)
+			      new_cflags |= REG_ICASE;
+			    else
+			      new_cflags &= ~REG_ICASE;
+			    ctx->re++;
+			  }
+			else if (*ctx->re == L'n')
+			  {
+			    DPRINT(("tre_parse:	  newline: '%.*" STRF "'\n",
+				    REST(ctx->re)));
+			    if (bit)
+			      new_cflags |= REG_NEWLINE;
+			    else
+			      new_cflags &= ~REG_NEWLINE;
+			    ctx->re++;
+			  }
+#ifdef REG_LEFT_ASSOC
+			else if (*ctx->re == L'l')
+			  {
+			    DPRINT(("tre_parse: left assoc: '%.*" STRF "'\n",
+				    REST(ctx->re)));
+			    if (bit)
+			      new_cflags |= REG_LEFT_ASSOC;
+			    else
+			      new_cflags &= ~REG_LEFT_ASSOC;
+			    ctx->re++;
+			  }
+#endif /* REG_LEFT_ASSOC */
+#ifdef REG_UNGREEDY
+			else if (*ctx->re == L'U')
+			  {
+			    DPRINT(("tre_parse:    ungreedy: '%.*" STRF "'\n",
+				    REST(ctx->re)));
+			    if (bit)
+			      new_cflags |= REG_UNGREEDY;
+			    else
+			      new_cflags &= ~REG_UNGREEDY;
+			    ctx->re++;
+			  }
+#endif /* REG_UNGREEDY */
+			else if (*ctx->re == CHAR_MINUS)
+			  {
+			    DPRINT(("tre_parse:	 turn off: '%.*" STRF "'\n",
+				    REST(ctx->re)));
+			    ctx->re++;
+			    bit = 0;
+			  }
+			else if (*ctx->re == CHAR_COLON)
+			  {
+			    DPRINT(("tre_parse:	 no group: '%.*" STRF
+				    "', (invisible submatch %d)\n",
+				    REST(ctx->re), ctx->submatch_id_invisible));
+			    ctx->re++;
+			    depth++;
+			    invisible_submatch = 1;
+			    break;
+			  }
+			else if (*ctx->re == CHAR_HASH)
+			  {
+			    DPRINT(("tre_parse:    comment: '%.*" STRF "'\n",
+				    REST(ctx->re)));
+			    /* A comment can contain any character except a
+			       right parenthesis */
+			    while (*ctx->re != CHAR_RPAREN
+				   && ctx->re < ctx->re_end)
+			      ctx->re++;
+			    if (*ctx->re == CHAR_RPAREN && ctx->re < ctx->re_end)
+			      {
+				ctx->re++;
+				break;
+			      }
+			    else
+			      return REG_BADPAT;
+			  }
+			else if (*ctx->re == CHAR_RPAREN)
+			  {
+			    ctx->re++;
+			    break;
+			  }
+			else
+			  return REG_BADRPT;
+		      }
+
+		    /* Turn on the cflags changes for the rest of the
+		       enclosing group. */
+		    if (invisible_submatch)
+		      {
+			STACK_PUSHX(stack, int, ctx->cflags);
+			STACK_PUSHX(stack, int, ctx->submatch_id_invisible);
+			STACK_PUSHX(stack, int, PARSE_MARK_FOR_SUBMATCH);
+			ctx->submatch_id_invisible++;
+			STACK_PUSHX(stack, int, 0); // bre_branch_begin
+			STACK_PUSHX(stack, int, PARSE_RE);
+		      }
+		    else {
+			STACK_PUSHX(stack, int, 0); // bre_branch_begin
+			STACK_PUSHX(stack, int, PARSE_ATOM);
+		    }
+		    ctx->cflags = new_cflags;
+		    break;
+		  }
+
+		if (ctx->cflags & REG_EXTENDED)
+		  {
+		parse_bre_lparen:
+		    DPRINT(("tre_parse: group begin: '%.*" STRF
+			    "', submatch %d\n", REST(ctx->re),
+			    ctx->submatch_id));
+		    ctx->re++;
+		    /* First parse a whole RE, then mark the resulting tree
+		       for submatching. */
+		    STACK_PUSHX(stack, int, ctx->cflags);
+		    STACK_PUSHX(stack, int, ctx->submatch_id);
+		    STACK_PUSHX(stack, int, PARSE_MARK_FOR_SUBMATCH);
+		    /* We need to pass a boolean (eventually) to PARSE_ATOM to
+		       indicate if this is the beginning of a BRE group. */
+		    STACK_PUSHX(stack, int, !(ctx->cflags & REG_EXTENDED));
+		    STACK_PUSHX(stack, int, PARSE_RE);
+		    ctx->submatch_id++;
+		    depth++;
+		  }
+		else
+		  goto parse_literal;
+		break;
+
+	      case CHAR_RPAREN:  /* end of current subexpression */
+		if (ctx->cflags & REG_EXTENDED && depth > 0)
+		  {
+	      parse_bre_rparen_empty:
+		    if (!(ctx->cflags & REG_EXTENDED) && depth == 0)
+		      return REG_EPAREN;
+		    DPRINT(("tre_parse:	    empty: '%.*" STRF "'\n",
+			    REST(ctx->re)));
+		    /* We were expecting an atom, but instead the current
+		       subexpression was closed.  POSIX leaves the meaning of
+		       this to be implementation-defined.  We interpret this as
+		       an empty expression (which matches an empty string).  */
+		    result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+		    if (result == NULL)
+		      return REG_ESPACE;
+		    if (!(ctx->cflags & REG_EXTENDED))
+		      ctx->re--;
+		  }
+		else
+		  goto parse_literal;
+		break;
+
+	      case CHAR_LBRACKET: /* bracket expression */
+		DPRINT(("tre_parse:     bracket: '%.*" STRF "'\n",
+			REST(ctx->re)));
+		ctx->re++;
+		status = tre_parse_bracket(ctx, &result);
+		if (status != REG_OK)
+		  return status;
+		break;
+
+	      case CHAR_BACKSLASH:
+		/* Deal with "\(", "\)" or "\{" for BREs */
+		if (!(ctx->cflags & REG_EXTENDED)
+		    && ctx->re + 1 < ctx->re_end)
+		  {
+		    if (*(ctx->re + 1) == CHAR_LPAREN)
+		      {
+			ctx->re++;
+			goto parse_bre_lparen;
+		      }
+		    else if (*(ctx->re + 1) == CHAR_RPAREN)
+		      {
+			ctx->re++;
+			goto parse_bre_rparen_empty;
+		      }
+		    if (*(ctx->re + 1) == CHAR_LBRACE) goto parse_literal;
+		  }
+
+		if (ctx->re + 1 >= ctx->re_end)
+		  /* Trailing backslash. */
+		  return REG_EESCAPE;
+
+		if (!(ctx->cflags & REG_ENHANCED))
+		  {
+		    DPRINT(("tre_parse:  unenhanced bleep: '%.*" STRF "'\n", REST(ctx->re)));
+		    ctx->re++;
+		    goto unenhanced_backslash;
+		  }
+
+		/* If a macro is used, parse the expanded macro recursively. */
+		{
+		  tre_char_t buf[64];
+		  tre_expand_macro(ctx->re + 1, ctx->re_end,
+				   buf, elementsof(buf));
+		  if (buf[0] != 0)
+		    {
+		      tre_parse_ctx_t subctx;
+		      memcpy(&subctx, ctx, sizeof(subctx));
+		      subctx.re = buf;
+		      subctx.len = tre_strlen(buf);
+		      subctx.nofirstsub = 1;
+		      status = tre_parse(&subctx);
+		      if (status != REG_OK)
+			return status;
+		      ctx->re += 2;
+		      ctx->position = subctx.position;
+		      result = subctx.result;
+		      break;
+		    }
+		}
+
+#ifdef REG_LITERAL
+		if (*(ctx->re + 1) == L'Q')
+		  {
+		    DPRINT(("tre_parse: tmp literal: '%.*" STRF "'\n",
+			    REST(ctx->re)));
+		    ctx->cflags |= REG_LITERAL;
+		    temporary_cflags |= REG_LITERAL;
+		    ctx->re += 2;
+		    STACK_PUSHX(stack, int, 0);
+		    STACK_PUSHX(stack, int, PARSE_ATOM);
+		    break;
+		  }
+#endif /* REG_LITERAL */
+
+		DPRINT(("tre_parse:  bleep: '%.*" STRF "'\n", REST(ctx->re)));
+		ctx->re++;
+		switch (*ctx->re)
+		  {
+		  case L'b':
+		    result = tre_ast_new_literal(ctx->mem, ASSERTION,
+						 ASSERT_AT_WB, -1);
+		    ctx->re++;
+		    break;
+		  case L'B':
+		    result = tre_ast_new_literal(ctx->mem, ASSERTION,
+						 ASSERT_AT_WB_NEG, -1);
+		    ctx->re++;
+		    break;
+		  case L'<':
+		    result = tre_ast_new_literal(ctx->mem, ASSERTION,
+						 ASSERT_AT_BOW, -1);
+		    ctx->re++;
+		    break;
+		  case L'>':
+		    result = tre_ast_new_literal(ctx->mem, ASSERTION,
+						 ASSERT_AT_EOW, -1);
+		    ctx->re++;
+		    break;
+		  case L'x':
+		    ctx->re++;
+		    if (ctx->re[0] != CHAR_LBRACE && ctx->re < ctx->re_end)
+		      {
+			/* 8 bit hex char. */
+			char tmp[3] = {0, 0, 0};
+			long val;
+			DPRINT(("tre_parse:  8 bit hex: '%.*" STRF "'\n",
+				REST(ctx->re - 2)));
+
+			if (tre_isxdigit_l(ctx->re[0], ctx->loc) &&
+			    ctx->re < ctx->re_end)
+			  {
+			    tmp[0] = (char)ctx->re[0];
+			    ctx->re++;
+			  }
+			if (tre_isxdigit_l(ctx->re[0], ctx->loc) &&
+			    ctx->re < ctx->re_end)
+			  {
+			    tmp[1] = (char)ctx->re[0];
+			    ctx->re++;
+			  }
+			val = strtol(tmp, NULL, 16);
+			result = tre_ast_new_literal(ctx->mem, (int)val,
+						     (int)val, ctx->position);
+			ctx->position++;
+			break;
+		      }
+		    else if (ctx->re < ctx->re_end)
+		      {
+			/* Wide char. */
+			char tmp[32];
+			long val;
+			int i = 0;
+			ctx->re++;
+			while (ctx->re_end - ctx->re >= 0)
+			  {
+			    if (ctx->re[0] == CHAR_RBRACE)
+			      break;
+			    if (tre_isxdigit_l(ctx->re[0], ctx->loc))
+			      {
+				tmp[i] = (char)ctx->re[0];
+				i++;
+				ctx->re++;
+				continue;
+			      }
+			    return REG_EBRACE;
+			  }
+			ctx->re++;
+			tmp[i] = 0;
+			val = strtol(tmp, NULL, 16);
+			result = tre_ast_new_literal(ctx->mem, (int)val, (int)val,
+						     ctx->position);
+			ctx->position++;
+			break;
+		      }
+		    /*FALLTHROUGH*/
+
+		  default:
+		  unenhanced_backslash:
+		    if ((ctx->cflags & (REG_EXTENDED | REG_ENHANCED)) !=
+			REG_EXTENDED &&
+			tre_isdigit_l(*ctx->re, ctx->loc) && *ctx->re != L'0')
+		      {
+			/* Back reference (only in BRE or enhanced). */
+			int val = *ctx->re - L'0';
+			DPRINT(("tre_parse:     backref: '%.*" STRF "'\n",
+				REST(ctx->re - 1)));
+			result = tre_ast_new_literal(ctx->mem, BACKREF, val,
+						     ctx->position);
+			if (result == NULL)
+			  return REG_ESPACE;
+
+			/* Set the backref with a submatch id in the invisible
+			 * range (which will be overridden if a real submatch
+			 * is needed) */
+			result->submatch_id = ctx->submatch_id_invisible++;
+
+			ctx->position++;
+			ctx->num_reorder_tags++;
+			ctx->max_backref = MAX(val, ctx->max_backref);
+			ctx->re++;
+		      }
+		    else
+		      {
+			/* Escaped character. */
+			DPRINT(("tre_parse:     escaped: '%.*" STRF "'\n",
+				REST(ctx->re - 1)));
+			result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re,
+						     ctx->position);
+			ctx->position++;
+			ctx->re++;
+		      }
+		    break;
+		  }
+		if (result == NULL)
+		  return REG_ESPACE;
+		break;
+
+	      case CHAR_PERIOD:	 /* the any-symbol */
+		DPRINT(("tre_parse:	  any: '%.*" STRF "'\n",
+			REST(ctx->re)));
+		if (ctx->cflags & REG_NEWLINE)
+		  {
+		    tre_ast_node_t *tmp1;
+		    tre_ast_node_t *tmp2;
+		    tmp1 = tre_ast_new_literal(ctx->mem, 0, L'\n' - 1,
+					       ctx->position);
+		    if (!tmp1)
+		      return REG_ESPACE;
+		    tmp2 = tre_ast_new_literal(ctx->mem, L'\n' + 1, TRE_CHAR_MAX,
+					       ctx->position + 1);
+		    if (!tmp2)
+		      return REG_ESPACE;
+		    result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
+		    if (!result)
+		      return REG_ESPACE;
+		    ctx->position += 2;
+		  }
+		else
+		  {
+		    result = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX,
+						 ctx->position);
+		    if (!result)
+		      return REG_ESPACE;
+		    ctx->position++;
+		  }
+		ctx->re++;
+		break;
+
+	      case CHAR_CARET:	 /* beginning of line assertion */
+		/* '^' has a special meaning everywhere in EREs, at the
+		   beginning of the RE and after \( is BREs.  It is also
+		   special in enhanced BREs at the beginning of each branches
+		   of a union */
+		if (ctx->cflags & REG_EXTENDED
+		    || bre_branch_begin
+		    || ctx->re == ctx->re_start)
+		  {
+		    DPRINT(("tre_parse:	      BOL: '%.*" STRF "'\n",
+			    REST(ctx->re)));
+		    result = tre_ast_new_literal(ctx->mem, ASSERTION,
+						 ASSERT_AT_BOL, -1);
+		    if (result == NULL)
+		      return REG_ESPACE;
+		    ctx->re++;
+		  }
+		else
+		  goto parse_literal;
+		break;
+
+	      case CHAR_DOLLAR:	 /* end of line assertion. */
+		/* '$' is special everywhere in EREs, and in the end of the
+		   string and before \) is BREs. */
+		if (ctx->cflags & REG_EXTENDED
+		    || (ctx->re + 2 < ctx->re_end
+			&& *(ctx->re + 1) == CHAR_BACKSLASH
+			&& *(ctx->re + 2) == CHAR_RPAREN)
+		    || ctx->re + 1 == ctx->re_end)
+		  {
+		    DPRINT(("tre_parse:	      EOL: '%.*" STRF "'\n",
+			    REST(ctx->re)));
+		    result = tre_ast_new_literal(ctx->mem, ASSERTION,
+						 ASSERT_AT_EOL, -1);
+		    if (result == NULL)
+		      return REG_ESPACE;
+		    ctx->re++;
+		  }
+		else
+		  goto parse_literal;
+		break;
+
+	      default:
+	      parse_literal:
+
+		if (temporary_cflags && ctx->re + 1 < ctx->re_end
+		    && *ctx->re == CHAR_BACKSLASH && *(ctx->re + 1) == L'E')
+		  {
+		    DPRINT(("tre_parse:	 end tmps: '%.*" STRF "'\n",
+			    REST(ctx->re)));
+		    ctx->cflags &= ~temporary_cflags;
+		    temporary_cflags = 0;
+		    ctx->re += 2;
+		    if (ctx->re < ctx->re_end)
+		      {
+			STACK_PUSHX(stack, int, 0);
+			STACK_PUSHX(stack, int, PARSE_ATOM);
+		      }
+		    else
+		      {
+			result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+			if (!result) return REG_ESPACE;
+		      }
+		    break;
+		  }
+
+
+		/* We are expecting an atom.  If the subexpression (or the whole
+		   regexp ends here, we interpret it as an empty expression
+		   (which matches an empty string), which is an error.
+		   Iterations of an empty expression is also an error. */
+#ifdef REG_LITERAL
+		if (!(ctx->cflags & REG_LITERAL))
+		  {
+#endif /* REG_LITERAL */
+		    /* error on end of string */
+		    if (ctx->re >= ctx->re_end) return depth > 0 ? REG_EPAREN
+						       : REG_EMPTY;
+		    /* error on unions and iterations of empty expressions */
+		    if (ctx->cflags & REG_EXTENDED)
+		      {
+			if (ctx->re < ctx->re_end)
+			  {
+			    if (*ctx->re == CHAR_PIPE) return REG_EMPTY;
+			    if (*ctx->re == CHAR_LBRACE)
+			      {
+				ctx->re++;
+		  empty_parse_bound:
+				/* We need to parse the bound first and return
+				   any error, before returning REG_BADRPT */
+				status = tre_parse_bound(ctx, NULL);
+#ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
+				/* For ERE, if REG_NOMATCH is returned, we
+				   treat the lbrace as a literal. */
+				if (status == REG_NOMATCH)
+				  {
+				    ctx->re--;
+				    /* Drop down to literal-handling code */
+				  }
+				else
+				  {
+#endif /* ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
+				    if (status != REG_OK)
+				      return status;
+				    return REG_BADRPT;
+#ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
+				  }
+#endif /* ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
+			      }
+#ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
+			    else
+#endif /* ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
+			    if (*ctx->re == CHAR_STAR
+				|| *ctx->re == CHAR_PLUS
+				|| *ctx->re == CHAR_QUESTIONMARK)
+			      {
+				return REG_BADRPT;
+			      }
+			  }
+		      }
+		    else if (ctx->re + 1 < ctx->re_end
+			     && *ctx->re == CHAR_BACKSLASH
+			     && *(ctx->re + 1) == CHAR_LBRACE)
+		      {
+			ctx->re += 2;
+			goto empty_parse_bound;
+		      }
+#ifdef REG_LITERAL
+		  }
+#endif /* REG_LITERAL */
+
+		DPRINT(("tre_parse:     literal: '%.*" STRF "'\n",
+			REST(ctx->re)));
+		/* Note that we can't use an tre_isalpha() test here, since there
+		   may be characters which are alphabetic but neither upper or
+		   lower case. */
+		if (ctx->cflags & REG_ICASE
+		    && (tre_isupper_l(*ctx->re, ctx->loc) ||
+		    tre_islower_l(*ctx->re, ctx->loc)))
+		  {
+		    tre_ast_node_t *tmp1;
+		    tre_ast_node_t *tmp2;
+
+		    /* XXX - Can there be more than one opposite-case
+		       counterpoints for some character in some locale?  Or
+		       more than two characters which all should be regarded
+		       the same character if case is ignored?  If yes, there
+		       does not seem to be a portable way to detect it.  I guess
+		       that at least for multi-character collating elements there
+		       could be several opposite-case counterpoints, but they
+		       cannot be supported portably anyway. */
+		    tmp1 = tre_ast_new_literal(ctx->mem,
+					       tre_toupper_l(*ctx->re, ctx->loc),
+					       tre_toupper_l(*ctx->re, ctx->loc),
+					       ctx->position);
+		    if (!tmp1)
+		      return REG_ESPACE;
+		    tmp2 = tre_ast_new_literal(ctx->mem,
+					       tre_tolower_l(*ctx->re, ctx->loc),
+					       tre_tolower_l(*ctx->re, ctx->loc),
+					       ctx->position);
+		    if (!tmp2)
+		      return REG_ESPACE;
+		    result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
+		    if (!result)
+		      return REG_ESPACE;
+		  }
+		else
+		  {
+		    result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re,
+						 ctx->position);
+		    if (!result)
+		      return REG_ESPACE;
+		  }
+		ctx->position++;
+		ctx->re++;
+		break;
+	      }
+	    break;
+	  }
+
+	case PARSE_MARK_FOR_SUBMATCH:
+	  {
+	    int submatch_id = tre_stack_pop_int(stack);
+
+	    ctx->cflags = tre_stack_pop_int(stack); /* restore cflags */
+	    if (result->submatch_id >= 0 &&
+		result->submatch_id < SUBMATCH_ID_INVISIBLE_START)
+	      {
+		tre_ast_node_t *n, *tmp_node;
+		if (submatch_id >= SUBMATCH_ID_INVISIBLE_START)
+		  break;
+		n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+		if (n == NULL)
+		  return REG_ESPACE;
+		tmp_node = tre_ast_new_catenation(ctx->mem, n, result);
+		if (tmp_node == NULL)
+		  return REG_ESPACE;
+		tmp_node->num_submatches = result->num_submatches;
+		result = tmp_node;
+	      }
+	    result->submatch_id = submatch_id;
+	    if (submatch_id < SUBMATCH_ID_INVISIBLE_START)
+	      result->num_submatches++;
+	    break;
+	  }
+
+	default:
+	  assert(0);
+	  break;
+	}
+    }
+
+  /* Check for missing closing parentheses. */
+  if (depth > 0)
+    return REG_EPAREN;
+
+  ctx->result = result;
+
+  return REG_OK;
+}
+
+/* EOF */