Libc-825.24.tar.gz

[apple/libc.git] / regex / TRE / lib / tre-parse.c
diff --git a/regex/TRE/lib/tre-parse.c b/regex/TRE/lib/tre-parse.c

new file mode 100644 (file)

index 0000000..ad09c26
--- /dev/null
+++ b/regex/TRE/lib/tre-parse.c
@@ -0,0 +1,2334 @@
+/*
+  tre-parse.c - Regexp parser
+
+  This software is released under a BSD-style license.
+  See the file LICENSE for details and copyright.
+
+*/
+
+/*
+  This parser is just a simple recursive descent parser for POSIX.2
+  regexps.  The parser supports both the obsolete default syntax and
+  the "extended" syntax, and some nonstandard extensions.
+*/
+
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif /* HAVE_CONFIG_H */
+#include <string.h>
+#include <assert.h>
+#include <limits.h>
+#include <stddef.h>
+
+#include "xmalloc.h"
+#include "tre-mem.h"
+#include "tre-ast.h"
+#include "tre-stack.h"
+#include "tre-parse.h"
+
+/* BSD compatibility:
+     Before looking up a collating symbol, check if the name matches in
+     the character names (cnames) array; if so, use the corresponding
+     character.
+
+     Also set ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND, which will preserve
+     the implementation choice that for ERE, a non-numeric character following
+     a left brace that would normally be a bound, causes the left brace to be
+     literal. */
+#define BSD_COMPATIBILITY
+#ifdef BSD_COMPATIBILITY
+#include "cname.h"
+#define ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
+#endif /* BSD_COMPATIBILITY */
+
+/* Characters with special meanings in regexp syntax. */
+#define CHAR_PIPE         L'|'
+#define CHAR_LPAREN       L'('
+#define CHAR_RPAREN       L')'
+#define CHAR_LBRACE       L'{'
+#define CHAR_RBRACE       L'}'
+#define CHAR_LBRACKET     L'['
+#define CHAR_RBRACKET     L']'
+#define CHAR_MINUS        L'-'
+#define CHAR_STAR         L'*'
+#define CHAR_QUESTIONMARK  L'?'
+#define CHAR_PLUS         L'+'
+#define CHAR_PERIOD       L'.'
+#define CHAR_COLON        L':'
+#define CHAR_EQUAL        L'='
+#define CHAR_COMMA        L','
+#define CHAR_CARET        L'^'
+#define CHAR_DOLLAR       L'$'
+#define CHAR_BACKSLASH    L'\\'
+#define CHAR_HASH         L'#'
+#define CHAR_TILDE        L'~'
+
+
+/* Some macros for expanding \w, \s, etc. */
+static const struct tre_macro_struct {
+  const char c;
+  const char *expansion;
+} tre_macros[] =
+  { {'t', "\t"},          {'n', "\n"},            {'r', "\r"},
+    {'f', "\f"},          {'a', "\a"},            {'e', "\033"},
+    {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
+    {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"},   {'D', "[^[:digit:]]"},
+    { 0, NULL }
+  };
+
+
+/* Expands a macro delimited by `regex' and `regex_end' to `buf', which
+   must have at least `len' items.  Sets buf[0] to zero if the there
+   is no match in `tre_macros'. */
+static void
+tre_expand_macro(const tre_char_t *regex, const tre_char_t *regex_end,
+                tre_char_t *buf, size_t buf_len)
+{
+  int i;
+
+  buf[0] = 0;
+  if (regex >= regex_end)
+    return;
+
+  for (i = 0; tre_macros[i].expansion; i++)
+    {
+      if (tre_macros[i].c == *regex)
+       {
+         unsigned int j;
+         DPRINT(("Expanding macro '%c' => '%s'\n",
+                 tre_macros[i].c, tre_macros[i].expansion));
+         for (j = 0; tre_macros[i].expansion[j] && j < buf_len; j++)
+           buf[j] = tre_macros[i].expansion[j];
+         buf[j] = 0;
+         break;
+       }
+    }
+}
+
+static reg_errcode_t
+tre_new_item(tre_mem_t mem, int type, int val, int *max_i,
+        tre_bracket_match_list_t **items)
+{
+  reg_errcode_t status = REG_OK;
+  tre_bracket_match_list_t *array = *items;
+  int i = array->num_bracket_matches;
+  /* Allocate more space if necessary. */
+  if (i >= *max_i)
+    {
+      tre_bracket_match_list_t *new_items;
+      DPRINT(("out of tre_bracket_match_list_t array space (%d)\n", i));
+      /* If the array is already 1024 items large, give up -- there's
+        probably an error in the regexp (e.g. not a '\0' terminated
+        string and missing ']') */
+      if (*max_i >= 1024)
+       return REG_ESPACE;
+      *max_i *= 2;
+      new_items = xrealloc(array, SIZEOF_BRACKET_MATCH_LIST_N(*max_i));
+      if (new_items == NULL)
+       return REG_ESPACE;
+      *items = array = new_items;
+    }
+  array->bracket_matches[i].type = type;
+  array->bracket_matches[i].value = val;
+  array->num_bracket_matches++;
+  return status;
+}
+
+#ifndef TRE_USE_SYSTEM_WCTYPE
+
+/* isalnum() and the rest may be macros, so wrap them to functions. */
+int tre_isalnum_func(tre_cint_t c) { return tre_isalnum(c); }
+int tre_isalpha_func(tre_cint_t c) { return tre_isalpha(c); }
+
+#ifdef tre_isascii
+int tre_isascii_func(tre_cint_t c) { return tre_isascii(c); }
+#else /* !tre_isascii */
+int tre_isascii_func(tre_cint_t c) { return !(c >> 7); }
+#endif /* !tre_isascii */
+
+#ifdef tre_isblank
+int tre_isblank_func(tre_cint_t c) { return tre_isblank(c); }
+#else /* !tre_isblank */
+int tre_isblank_func(tre_cint_t c) { return ((c == ' ') || (c == '\t')); }
+#endif /* !tre_isblank */
+
+int tre_iscntrl_func(tre_cint_t c) { return tre_iscntrl(c); }
+int tre_isdigit_func(tre_cint_t c) { return tre_isdigit(c); }
+int tre_isgraph_func(tre_cint_t c) { return tre_isgraph(c); }
+int tre_islower_func(tre_cint_t c) { return tre_islower(c); }
+int tre_isprint_func(tre_cint_t c) { return tre_isprint(c); }
+int tre_ispunct_func(tre_cint_t c) { return tre_ispunct(c); }
+int tre_isspace_func(tre_cint_t c) { return tre_isspace(c); }
+int tre_isupper_func(tre_cint_t c) { return tre_isupper(c); }
+int tre_isxdigit_func(tre_cint_t c) { return tre_isxdigit(c); }
+
+struct {
+  char *name;
+  int (*func)(tre_cint_t);
+} tre_ctype_map[] = {
+  { "alnum", &tre_isalnum_func },
+  { "alpha", &tre_isalpha_func },
+#ifdef tre_isascii
+  { "ascii", &tre_isascii_func },
+#endif /* tre_isascii */
+#ifdef tre_isblank
+  { "blank", &tre_isblank_func },
+#endif /* tre_isblank */
+  { "cntrl", &tre_iscntrl_func },
+  { "digit", &tre_isdigit_func },
+  { "graph", &tre_isgraph_func },
+  { "lower", &tre_islower_func },
+  { "print", &tre_isprint_func },
+  { "punct", &tre_ispunct_func },
+  { "space", &tre_isspace_func },
+  { "upper", &tre_isupper_func },
+  { "xdigit", &tre_isxdigit_func },
+  { NULL, NULL}
+};
+
+tre_ctype_t tre_ctype(const char *name)
+{
+  int i;
+  for (i = 0; tre_ctype_map[i].name != NULL; i++)
+    {
+      if (strcmp(name, tre_ctype_map[i].name) == 0)
+       return tre_ctype_map[i].func;
+    }
+  return (tre_ctype_t)0;
+}
+#endif /* !TRE_USE_SYSTEM_WCTYPE */
+
+#define REST(re) (int)(ctx->re_end - (re)), (re)
+
+#define START_COLLATING_SYMBOLS                16
+#define MAX_COLLATING_SYMBOL_LEN       4
+
+typedef struct {
+  const tre_char_t *start;
+  int len;
+} tre_collating_symbol;
+
+#include <xlocale.h>
+
+int __collate_equiv_value(locale_t loc, const wchar_t *str, size_t len);
+
+#ifdef BSD_COMPATIBILITY
+static wchar_t
+tre_search_cnames(const wchar_t *name, size_t len)
+{
+  size_t low = 0;
+  size_t high = NCNAMES - 1;
+  size_t cur;
+  int cmp;
+
+  while(low <= high)
+    {
+      cur = (low + high) / 2;
+      cmp = wcsncmp(name, cnames[cur].name, len);
+      if (cmp == 0 && cnames[cur].name[len] == 0) return cnames[cur].code;
+      if (cmp > 0) low = cur + 1;
+      else high = cur - 1;
+    }
+  return (wchar_t)-1;
+}
+#endif /* BSD_COMPATIBILITY */
+
+/* Scan the contents of a bracket expression, and create a
+ * tre_bracket_match_list_t encoding the bracket expression.  If during
+ * the scan, multi-character collating symbols are detected, switch
+ * into a mode to collect those MCCSs into a tre_collating_symbol
+ * list and pass them back.  tre_parse_bracket will use that to
+ * create a new string composed of a union of the bracket expression
+ * without the MCCSs and the MCCSs (e.g., [x[.ch.]] => [x]|ch), and
+ * call tre_parse (recursive) to parse that new string (which will
+ * call tre_parse_bracket and tre_parse_bracket_items again. */
+static reg_errcode_t
+tre_parse_bracket_items(tre_parse_ctx_t *ctx, tre_bracket_match_list_t **items,
+                       int *items_size, tre_collating_symbol **result)
+{
+  const tre_char_t *re = ctx->re;
+  const tre_char_t *re_end = ctx->re_end;
+  tre_collating_symbol *col_syms = NULL;
+  tre_collating_symbol *cp = NULL;
+  int n_col_syms = 0;
+  reg_errcode_t status;
+  int max_i = *items_size;
+  int other = 0;  /* contains content other than multi-character collating
+                  * symbols */
+  int range = -1; /* -1 unset, 0 begin range set, +1 end range expected */
+  tre_cint_t min, c;
+  int invert = ((*items)->flags & TRE_BRACKET_MATCH_FLAG_NEGATE);
+  int collect_MCCS = 0;
+  const tre_char_t *start;
+
+  for ( ;re < re_end; re++)
+    {
+      switch (*re)
+       {
+       case CHAR_MINUS:
+         /* A first hyphen */
+         if (re == ctx->re)
+           {
+             DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
+             min = CHAR_MINUS;
+             other++;
+             range = 0;
+             break;
+           }
+         /* The hyphen is the end range */
+         if (range > 0)
+           {
+             DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
+             c = CHAR_MINUS;
+             goto process_end_range;
+           }
+         if (re + 1 >= re_end)
+           {
+             status = REG_EBRACK;
+             goto error;
+           }
+         /* The hyphen is at the end */
+         if (re[1] == CHAR_RBRACKET)
+           {
+             DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
+             c = CHAR_MINUS;
+             goto process_begin_range;
+           }
+         /* Two ranges are not allowed to share an endpoint, or begin
+          * range is illegal. */
+         if (range < 0)
+           {
+             status = REG_ERANGE;
+             goto error;
+           }
+         range = 1; /* Expect end range */
+         DPRINT(("tre_parse_bracket:   range: '%.*" STRF "'\n", REST(re)));
+         break;
+
+       case CHAR_LBRACKET:
+         if (re + 1 >= re_end)
+           {
+             status = REG_EBRACK;
+             goto error;
+           }
+         switch (re[1])
+           {
+           case CHAR_PERIOD:
+             {
+               re += 2;
+               start = re;
+               for (;; re++)
+                 {
+                   if (re >= re_end)
+                     {
+                       status = REG_ECOLLATE;
+                       goto error;
+                     }
+                   if (*re == CHAR_PERIOD)
+                     {
+                       if (re + 1 >= re_end)
+                         {
+                           status = REG_ECOLLATE;
+                           goto error;
+                         }
+                       /* Found end */
+                       if (re[1] == CHAR_RBRACKET)
+                         {
+                           DPRINT(("tre_parse_bracket:   collating "
+                                   "symbol: '%.*" STRF "'\n",
+                                   REST(start - 2)));
+                           /* Empty name */
+                           if (re == start)
+                             {
+                               status = REG_ECOLLATE;
+                               goto error;
+                             }
+#ifdef BSD_COMPATIBILITY
+                           /* Check if the name is in cnames; if so, use
+                              the corresponding code */
+                           c = tre_search_cnames(start, re - start);
+                           if (c != (wchar_t)-1)
+                             {
+                               re++;
+                               goto process_single_character;
+                             }
+#endif /* BSD_COMPATIBILITY */
+                           /* Verify this is a known sequence */
+                           if (__collate_equiv_value(ctx->loc, start,
+                                                         re - start) <= 0)
+                             {
+                               status = REG_ECOLLATE;
+                               goto error;
+                             }
+                           /* Process single character collating symbols */
+                           if (re - start == 1)
+                             {
+                               c = *start;
+                               re++;
+                               goto process_single_character;
+                             }
+                           /* Inverted MCCSs are undefined */
+                           if (invert)
+                             {
+                               status = REG_ECOLLATE;
+                               goto error;
+                             }
+                           /* Can't have MCCSs as an endpoint to a range */
+                           if (range > 0)
+                             {
+                               status = REG_ERANGE;
+                               goto error;
+                             }
+                           range = -1;
+                           /* Switch into MCCS collection mode (if not
+                            * already there */
+#if TRE_DEBUG
+                           if (!collect_MCCS)
+                             {
+                               collect_MCCS = 1;
+                               DPRINT(("tre_parse_bracket: Detected MCCS\n"));
+                             }
+#else /* !TRE_DEBUG */
+                           collect_MCCS = 1;
+#endif /* !TRE_DEBUG */
+                           /* Allocate a memory block the first time */
+                           if (!cp)
+                             {
+                               if ((col_syms = xmalloc(sizeof(*col_syms) *
+                                           (START_COLLATING_SYMBOLS + 2)))
+                                           == NULL)
+                                 return REG_ESPACE;
+                               cp = col_syms + 1;
+                               n_col_syms = START_COLLATING_SYMBOLS;
+                             }
+                           /* Enlarge the memory block is more is needed */
+                           if ((cp - col_syms) - 1 >= n_col_syms)
+                             {
+                               int i = n_col_syms;
+                               tre_collating_symbol *tmp =
+                                   xrealloc(col_syms, sizeof(*col_syms) *
+                                            ((n_col_syms *= 2) + 2));
+                               if (tmp == NULL)
+                                 {
+                                   xfree(col_syms);
+                                   return REG_ESPACE;
+                                 }
+                               DPRINT(("tre_list_collating_symbols: "
+                                       "Enlarging col_syms to %d\n",
+                                       n_col_syms));
+                               col_syms = tmp;
+                               cp = col_syms + i + 1;
+                             }
+                           cp->start = start;
+                           cp->len = re - start;
+                           cp++;
+                           re++;
+                           break;
+                         }
+                     }
+                 }
+               break;
+             }
+
+           case CHAR_EQUAL:
+           case CHAR_COLON:
+             {
+               /* Process equivalence and character classes */
+               tre_char_t kind = re[1];
+
+               /* Can't have a class as an endpoint to a range */
+               if (range > 0)
+                 {
+                   status = REG_ERANGE;
+                   goto error;
+                 }
+               if (!collect_MCCS && range == 0)
+                 {
+                   status = tre_new_item(ctx->mem, TRE_BRACKET_MATCH_TYPE_CHAR,
+                                         min, &max_i, items);
+                   if (status != REG_OK)
+                     goto error;
+                 }
+               range = -1;
+               re += 2;
+               start = re;
+               for (;; re++)
+                 {
+                   if (re >= re_end)
+                     {
+                       status = kind == CHAR_EQUAL ? REG_ECOLLATE : REG_ECTYPE;
+                       goto error;
+                     }
+                   if (*re == kind)
+                     {
+                       if (re + 1 >= re_end)
+                         {
+                           status = kind == CHAR_EQUAL ? REG_ECOLLATE :
+                                                         REG_ECTYPE;
+                           goto error;
+                         }
+                       /* Found end */
+                       if (re[1] == CHAR_RBRACKET)
+                         {
+                           if (re == start)
+                             {
+                               /* Empty class name */
+                               status = kind == CHAR_EQUAL ? REG_ECOLLATE :
+                                                             REG_ECTYPE;
+                               goto error;
+                             }
+                           /* Process equivalence class */
+                           if (kind == CHAR_EQUAL)
+                             {
+                               int equiv;
+
+                               DPRINT(("tre_parse_bracket:   equivalence: '%.*"
+                                       STRF "'\n", REST(start - 2)));
+
+                               /* While we find the collation value even for
+                                  multi-character collating elements , we
+                                  don't (yet) match any collation values
+                                  against multi-character sequences.  We'd have
+                                  to enumerate those multi-character sequences
+                                  and like multi-character collating symbols,
+                                  create a union of those sequences with the
+                                  rest of the bracket expression.  While
+                                  doable, a bracket expression matching
+                                  multiple characters, that doesn't explicitly
+                                  contain multi-character sequences, might
+                                  be unexpected, so we punt for now. */
+                               if ((equiv = __collate_equiv_value(ctx->loc,
+                                            start, re - start)) <= 0)
+                                 {
+                                   /* The standard says that if no collating
+                                      element if found, we use the collating
+                                      symbol itself.  But __collate_equiv_value
+                                      doesn't make a distinction between
+                                      an element that is in a equvalence
+                                      class with others, or is the only member,
+                                      so we already know there is no collating
+                                      symbol.  (Note that in the case of a
+                                      collating element whose collation value
+                                      is unique, matching against the
+                                      collating element itself, or against
+                                      its collation value, is equivalent.) */
+#ifdef BSD_COMPATIBILITY
+                                   /* Check if the name is in cnames; if so,
+                                      use the corresponding code */
+                                   c = tre_search_cnames(start, re - start);
+                                   if (c != (wchar_t)-1)
+                                     {
+                                       re++;
+                                       goto process_single_character;
+                                     }
+#endif /* BSD_COMPATIBILITY */
+                                   status = REG_ECOLLATE;
+                                   goto error;
+                                 }
+                               if (!collect_MCCS)
+                                 {
+                                   status = tre_new_item(ctx->mem,
+                                            TRE_BRACKET_MATCH_TYPE_EQUIVALENCE,
+                                            equiv, &max_i, items);
+                                   if (status != REG_OK)
+                                     goto error;
+                                 }
+                             }
+                           else
+                             {
+                               /* Process character class */
+                               DPRINT(("tre_parse_bracket:  class: '%.*" STRF
+                                       "'\n", REST(start - 2)));
+                               if (!collect_MCCS)
+                                 {
+                                   char tmp_str[64];
+                                   tre_ctype_t class;
+                                   int len = MIN(re - start, 63);
+#ifdef TRE_WCHAR
+                                   {
+                                     tre_char_t tmp_wcs[64];
+                                     wcsncpy(tmp_wcs, start, (size_t)len);
+                                     tmp_wcs[len] = L'\0';
+#if defined HAVE_WCSRTOMBS
+                                     {
+                                       mbstate_t state;
+                                       const tre_char_t *src = tmp_wcs;
+                                       memset(&state, '\0', sizeof(state));
+                                       len = wcsrtombs_l(tmp_str, &src,
+                                                     sizeof(tmp_str), &state,
+                                                     ctx->loc);
+                                     }
+#elif defined HAVE_WCSTOMBS
+                                     len = wcstombs(tmp_str, tmp_wcs, 63);
+#endif /* defined HAVE_WCSTOMBS */
+                                   }
+#else /* !TRE_WCHAR */
+                                   strncpy(tmp_str, (const char*)start, len);
+#endif /* !TRE_WCHAR */
+                                   tmp_str[len] = '\0';
+                                   DPRINT(("  class name: %s\n", tmp_str));
+                                   class = tre_ctype_l(tmp_str, ctx->loc);
+                                   if (!class)
+                                     {
+                                       status = REG_ECTYPE;
+                                       goto error;
+                                     }
+                                   status = tre_new_item(ctx->mem,
+                                            TRE_BRACKET_MATCH_TYPE_CLASS,
+                                            class, &max_i, items);
+                                   if (status != REG_OK)
+                                     goto error;
+                                 }
+                             }
+                           re++;
+                           break;
+                         }
+                     }
+                 }
+               other++;
+               break;
+             }
+
+           default:
+             DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
+             c = CHAR_LBRACKET;
+             goto process_single_character;
+             break;
+           }
+         break;
+
+       case CHAR_RBRACKET:
+         /* A first right bracket */
+         if (re == ctx->re)
+           {
+             DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
+             min = CHAR_RBRACKET;
+             range = 0;
+             other++;
+             break;
+           }
+         /* Done */
+         if (collect_MCCS)
+           {
+             DPRINT(("tre_parse_bracket:       done: '%.*" STRF "'\n",
+                     REST(re)));
+             if (col_syms)
+               {
+                 /* Mark the character following the right bracket.  Set len
+                  * to whether there are other things besides the
+                  * multi-character collating symbols */
+                 col_syms->start = re + 1;
+                 col_syms->len = other;
+                 /* Mark the end of the list */
+                 cp->start = NULL;
+               }
+             *result = col_syms;
+             return REG_OK;
+           }
+         /* range > 0 is not possible, since we did a lookahead after the
+          * hyphen */
+         if (range == 0)
+           {
+             status = tre_new_item(ctx->mem, TRE_BRACKET_MATCH_TYPE_CHAR,
+                                   min, &max_i, items);
+             if (status != REG_OK)
+               goto error;
+           }
+         DPRINT(("tre_parse_bracket:   done: '%.*" STRF "'\n", REST(re)));
+         *items_size = max_i;
+         ctx->re = re + 1;
+         return REG_OK;
+
+       default:
+         DPRINT(("tre_parse_bracket:   char: '%.*" STRF "'\n", REST(re)));
+         c = *re;
+process_single_character:
+         /* Process single character */
+         if (range > 0)
+           {
+             int mine, maxe;
+
+process_end_range:
+             /* Get collation equivalence values */
+             mine = __collate_equiv_value(ctx->loc, &min, 1);
+             maxe = __collate_equiv_value(ctx->loc, &c, 1);
+             if (maxe < mine)
+               {
+                 status = REG_ERANGE;
+                 goto error;
+               }
+             if (!collect_MCCS)
+               {
+                 status = tre_new_item(ctx->mem,
+                                       TRE_BRACKET_MATCH_TYPE_RANGE_BEGIN,
+                                       mine, &max_i, items);
+                 if (status != REG_OK)
+                   goto error;
+                 status = tre_new_item(ctx->mem,
+                                       TRE_BRACKET_MATCH_TYPE_RANGE_END,
+                                       maxe, &max_i, items);
+                 if (status != REG_OK)
+                   goto error;
+               }
+             range = -1;
+           }
+         else
+           {
+process_begin_range:
+             if (!collect_MCCS)
+               {
+                 if (range == 0)
+                   {
+                     status = tre_new_item(ctx->mem,
+                                           TRE_BRACKET_MATCH_TYPE_CHAR,
+                                           min, &max_i, items);
+                     if (status != REG_OK)
+                       goto error;
+                   }
+                 min = c;
+               }
+             range = 0;
+           }
+         other++;
+         break;
+       }
+    }
+  status = REG_EBRACK;
+error:
+  DPRINT(("tre_parse_bracket:  error: '%.*" STRF "', status=%d\n",
+         REST(re), status));
+  if (col_syms)
+    xfree(col_syms);
+  return status;
+}
+
+#ifdef TRE_DEBUG
+static const char *bracket_match_type_str[] = {
+  "unused",
+  "char",
+  "range begin",
+  "range end",
+  "class",
+  "equivalence value",
+};
+#endif /* TRE_DEBUG */
+
+static reg_errcode_t
+tre_parse_bracket(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
+{
+  tre_ast_node_t *node;
+  reg_errcode_t status = REG_OK;
+  tre_bracket_match_list_t *items;
+  int max_i = 32;
+  tre_collating_symbol *col_syms = NULL;
+
+  /* Handle special cases [[:<:]] and [[:>:]] */
+  if (ctx->re_end - ctx->re >= 6 && ctx->re[0] == CHAR_LBRACKET
+      && ctx->re[1] == CHAR_COLON && (ctx->re[2] == L'<' || ctx->re[2] == L'>')
+      && ctx->re[3] == CHAR_COLON && ctx->re[4] == CHAR_RBRACKET
+      && ctx->re[5] == CHAR_RBRACKET)
+    {
+      *result = tre_ast_new_literal(ctx->mem, ASSERTION,
+                     (ctx->re[2] == L'<') ? ASSERT_AT_BOW : ASSERT_AT_EOW,
+                     -1);
+      DPRINT(("tre_parse_bracket: special case %s\n", (ctx->re[2] == L'<') ?
+             "[[:<:]]" : "[[:>:]]"));
+      ctx->re += 6;
+      return *result ? REG_OK : REG_ESPACE;
+    }
+
+  /* Start off with an array of `max_i' elements. */
+  items = xcalloc(1, SIZEOF_BRACKET_MATCH_LIST_N(max_i));
+  if (items == NULL)
+    return REG_ESPACE;
+
+  if (*ctx->re == CHAR_CARET)
+    {
+      DPRINT(("tre_parse_bracket: negate: '%.*" STRF "'\n", REST(ctx->re)));
+      items->flags |= TRE_BRACKET_MATCH_FLAG_NEGATE;
+      ctx->re++;
+    }
+
+  status = tre_parse_bracket_items(ctx, &items, &max_i, &col_syms);
+
+  if (status != REG_OK)
+    goto parse_bracket_done;
+
+  /* If there are collating symbols, split off the multi-character ones
+   * into a union of the bracket expression (without the collating symbols)
+   * and the multiple-character sequences.  We create an equivalent input
+   * string and run tre_parse() recursively */
+  if (col_syms)
+    {
+      tre_char_t *str, *sp;
+      tre_collating_symbol *cp;
+      tre_parse_ctx_t subctx;
+
+      /* Allocate a new string.  We start with the size of the original
+       * bracket expression (minus 1) and add 2 (for a leading "[" and
+       * a trailing nil; don't need a "^", since it is illegal to have
+       * inverted MCCSs).  Since a multi-character collating symbols
+       * will be converted from "[.xx.]" to "|xx" (n+4 to n+1), we don't
+       * need to worry about the new string getting too long. */
+      xfree(items);
+      str = xmalloc(sizeof(*str) * ((col_syms->start - ctx->re) + 2));
+      if (str == NULL)
+       {
+         xfree(col_syms);
+         return REG_ESPACE;
+       }
+      sp = str;
+      if (col_syms->len > 0)
+       {
+         /* There are other items in the bracket expression besides the
+          * multi-character collating symbols, so create a new bracket
+          * expression with only those other itmes. */
+         const tre_char_t *re;
+         ptrdiff_t i;
+
+         *sp++ = '[';
+         re = ctx->re;
+         for (cp = col_syms + 1; cp->start; cp++)
+           {
+             /* The "- 2" is to account for the "[." */
+             if ((i = ((cp->start - re) - 2)) > 0)
+               {
+                 memcpy(sp, re, sizeof(*sp) * i);
+                 sp += i;
+               }
+             /* The "+ 2" is to account for the ".]" */
+             re = cp->start + cp->len + 2;
+           }
+           i = col_syms->start - re; /* Includes the trailing right bracket */
+           memcpy(sp, re, sizeof(*sp) * i);
+           sp += i;
+           *sp++ = '|';
+       }
+      for (cp = col_syms + 1; cp->start; cp++)
+       {
+         memcpy(sp, cp->start, sizeof(*sp) * cp->len);
+         sp += cp->len;
+         if (cp[1].start)
+           *sp++ = '|';
+       }
+      *sp = 0;
+      DPRINT(("tre_parse_bracket: Reparsing bracket expression with '%ls'\n",
+             str));
+
+      memcpy(&subctx, ctx, sizeof(subctx));
+      subctx.re = str;
+      subctx.len = sp - str;
+      subctx.nofirstsub = 1;
+      subctx.cflags |= REG_EXTENDED; /* Force extended mode for parsing */
+      status = tre_parse(&subctx);
+      xfree(str);
+      if (status != REG_OK)
+       {
+         xfree(col_syms);
+         return status;
+       }
+      ctx->re = col_syms->start;
+      ctx->position = subctx.position;
+      xfree(col_syms);
+      *result = subctx.result;
+      DPRINT(("tre_parse_bracket: Returning to original string\n"));
+      return REG_OK;
+    }
+
+  DPRINT(("tre_parse_bracket: creating bracket expression literal\n"));
+  node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position);
+  if (node == NULL)
+    {
+      status = REG_ESPACE;
+      goto parse_bracket_done;
+    }
+  else
+    {
+      tre_literal_t *l = node->obj;
+      l->u.bracket_match_list = tre_mem_alloc(ctx->mem,
+                                        SIZEOF_BRACKET_MATCH_LIST(items));
+      if (l->u.bracket_match_list == NULL)
+       {
+         status = REG_ESPACE;
+         goto parse_bracket_done;
+       }
+      memcpy(l->u.bracket_match_list, items, SIZEOF_BRACKET_MATCH_LIST(items));
+    }
+
+#ifdef TRE_DEBUG
+  {
+    int i;
+    tre_bracket_match_t *b;
+    DPRINT(("tre_parse_bracket: %d bracket match items, flags 0x%x\n",
+           items->num_bracket_matches, items->flags));
+    for (i = 0, b = items->bracket_matches;
+        i < items->num_bracket_matches; i++, b++)
+      {
+       DPRINT(("   %d: %s %d\n", i, bracket_match_type_str[b->type],
+               b->value));
+      }
+  }
+#endif /* TRE_DEBUG */
+
+ parse_bracket_done:
+  xfree(items);
+  ctx->position++;
+  *result = node;
+  return status;
+}
+
+
+/* Parses a positive decimal integer.  Returns -1 if the string does not
+   contain a valid number. */
+static int
+tre_parse_int(const tre_char_t **regex, const tre_char_t *regex_end)
+{
+  int num = -1;
+  const tre_char_t *r = *regex;
+  while (r < regex_end && *r >= L'0' && *r <= L'9')
+    {
+      if (num < 0)
+       num = 0;
+      num = num * 10 + *r - L'0';
+      r++;
+    }
+  *regex = r;
+  return num;
+}
+
+
+static reg_errcode_t
+tre_parse_bound(tre_parse_ctx_t *ctx, tre_ast_node_t **result)
+{
+  int min, max;
+#ifdef TRE_APPROX
+  int i;
+  int cost_ins, cost_del, cost_subst, cost_max;
+  int limit_ins, limit_del, limit_subst, limit_err;
+  const tre_char_t *start;
+#endif /* TRE_APPROX */
+  const tre_char_t *r = ctx->re;
+  int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
+#ifdef TRE_APPROX
+  int approx = 0;
+  int costs_set = 0;
+  int counts_set = 0;
+
+  cost_ins = cost_del = cost_subst = cost_max = TRE_PARAM_UNSET;
+  limit_ins = limit_del = limit_subst = limit_err = TRE_PARAM_UNSET;
+#endif /* TRE_APPROX */
+
+  /* Parse number (minimum repetition count). */
+  min = -1;
+  if (r >= ctx->re_end)
+#ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
+    return (ctx->cflags & REG_EXTENDED) ? REG_NOMATCH : REG_EBRACE;
+#else /* !ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
+    return REG_EBRACE;
+#endif /* !ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
+  if (*r >= L'0' && *r <= L'9') {
+    DPRINT(("tre_parse:          min count: '%.*" STRF "'\n", REST(r)));
+    min = tre_parse_int(&r, ctx->re_end);
+  }
+#ifndef TRE_APPROX
+  else
+#ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
+      /* For ERE, return REG_NOMATCH to signal that the lbrace should
+         be treated as a literal */
+      return (ctx->cflags & REG_EXTENDED) ? REG_NOMATCH : REG_BADBR;
+#else /* !ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
+      return REG_BADBR;
+#endif /* !ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
+#endif /* !TRE_APPROX */
+
+  /* Parse comma and second number (maximum repetition count). */
+  max = min;
+  if (r < ctx->re_end && *r == CHAR_COMMA)
+    {
+      r++;
+      DPRINT(("tre_parse:   max count: '%.*" STRF "'\n", REST(r)));
+      max = tre_parse_int(&r, ctx->re_end);
+    }
+
+  /* Check that the repeat counts are sane. */
+  if ((max >= 0 && min > max) || min > RE_DUP_MAX || max > RE_DUP_MAX)
+    return REG_BADBR;
+
+
+#ifdef TRE_APPROX
+  /*
+   '{'
+     optionally followed immediately by a number == minimum repcount
+     optionally followed by , then a number == maximum repcount
+      + then a number == maximum insertion count
+      - then a number == maximum deletion count
+      # then a number == maximum substitution count
+      ~ then a number == maximum number of errors
+      Any of +, -, # or ~ without followed by a number means that
+      the maximum count/number of errors is infinite.
+
+      An equation of the form
+       Xi + Yd + Zs < C
+      can be specified to set costs and the cost limit to a value
+      different from the default value:
+       - X is the cost of an insertion
+       - Y is the cost of a deletion
+       - Z is the cost of a substitution
+       - C is the maximum cost
+
+      If no count limit or cost is set for an operation, the operation
+      is not allowed at all.
+  */
+
+
+  do {
+    int done;
+    start = r;
+
+    /* Parse count limit settings */
+    done = 0;
+    if (!counts_set)
+      while (r + 1 < ctx->re_end && !done)
+       {
+         switch (*r)
+           {
+           case CHAR_PLUS:  /* Insert limit */
+             DPRINT(("tre_parse:   ins limit: '%.*" STRF "'\n", REST(r)));
+             r++;
+             limit_ins = tre_parse_int(&r, ctx->re_end);
+             if (limit_ins < 0)
+               limit_ins = INT_MAX;
+             counts_set = 1;
+             break;
+           case CHAR_MINUS: /* Delete limit */
+             DPRINT(("tre_parse:   del limit: '%.*" STRF "'\n", REST(r)));
+             r++;
+             limit_del = tre_parse_int(&r, ctx->re_end);
+             if (limit_del < 0)
+               limit_del = INT_MAX;
+             counts_set = 1;
+             break;
+           case CHAR_HASH:  /* Substitute limit */
+             DPRINT(("tre_parse: subst limit: '%.*" STRF "'\n", REST(r)));
+             r++;
+             limit_subst = tre_parse_int(&r, ctx->re_end);
+             if (limit_subst < 0)
+               limit_subst = INT_MAX;
+             counts_set = 1;
+             break;
+           case CHAR_TILDE: /* Maximum number of changes */
+             DPRINT(("tre_parse: count limit: '%.*" STRF "'\n", REST(r)));
+             r++;
+             limit_err = tre_parse_int(&r, ctx->re_end);
+             if (limit_err < 0)
+               limit_err = INT_MAX;
+             approx = 1;
+             break;
+           case CHAR_COMMA:
+             r++;
+             break;
+           case L' ':
+             r++;
+             break;
+           case L'}':
+             done = 1;
+             break;
+           default:
+             done = 1;
+             break;
+           }
+       }
+
+    /* Parse cost restriction equation. */
+    done = 0;
+    if (!costs_set)
+      while (r + 1 < ctx->re_end && !done)
+       {
+         switch (*r)
+           {
+           case CHAR_PLUS:
+           case L' ':
+             r++;
+             break;
+           case L'<':
+             DPRINT(("tre_parse:    max cost: '%.*" STRF "'\n", REST(r)));
+             r++;
+             while (*r == L' ')
+               r++;
+             cost_max = tre_parse_int(&r, ctx->re_end);
+             if (cost_max < 0)
+               cost_max = INT_MAX;
+             else
+               cost_max--;
+             approx = 1;
+             break;
+           case CHAR_COMMA:
+             r++;
+             done = 1;
+             break;
+           default:
+             if (*r >= L'0' && *r <= L'9')
+               {
+#ifdef TRE_DEBUG
+                 const tre_char_t *sr = r;
+#endif /* TRE_DEBUG */
+                 int cost = tre_parse_int(&r, ctx->re_end);
+                 /* XXX - make sure r is not past end. */
+                 switch (*r)
+                   {
+                   case L'i':  /* Insert cost */
+                     DPRINT(("tre_parse:    ins cost: '%.*" STRF "'\n",
+                             REST(sr)));
+                     r++;
+                     cost_ins = cost;
+                     costs_set = 1;
+                     break;
+                   case L'd':  /* Delete cost */
+                     DPRINT(("tre_parse:    del cost: '%.*" STRF "'\n",
+                             REST(sr)));
+                     r++;
+                     cost_del = cost;
+                     costs_set = 1;
+                     break;
+                   case L's':  /* Substitute cost */
+                     DPRINT(("tre_parse:  subst cost: '%.*" STRF "'\n",
+                             REST(sr)));
+                     r++;
+                     cost_subst = cost;
+                     costs_set = 1;
+                     break;
+                   default:
+                     return REG_BADBR;
+                   }
+               }
+             else
+               {
+                 done = 1;
+                 break;
+               }
+           }
+       }
+  } while (start != r);
+#endif /* TRE_APPROX */
+
+  /*{*//* Missing }. */
+  if (r >= ctx->re_end)
+    return REG_EBRACE;
+
+  /* Empty contents of {}. */
+  if (r == ctx->re)
+    return REG_BADBR;
+
+  /* Parse the ending '}' or '\}'.*/
+  if (ctx->cflags & REG_EXTENDED)
+    {
+      if (r >= ctx->re_end || *r != CHAR_RBRACE)
+       return REG_BADBR;
+      r++;
+      /* Parse trailing '?' marking minimal repetition. */
+      if (r < ctx->re_end)
+       {
+         if (*r == CHAR_QUESTIONMARK)
+           {
+             /* Process the question mark only in enhanced mode.
+                Otherwise, the question mark is an error in ERE
+                or a literal in BRE */
+             if (ctx->cflags & REG_ENHANCED)
+               {
+                 minimal = !(ctx->cflags & REG_UNGREEDY);
+                 r++;
+               }
+             else return REG_BADRPT;
+           }
+         else if (*r == CHAR_STAR || *r == CHAR_PLUS)
+           {
+             /* These are reserved for future extensions. */
+             return REG_BADRPT;
+           }
+       }
+    }
+  else
+    {
+      if (r + 1 >= ctx->re_end
+         || *r != CHAR_BACKSLASH
+         || *(r + 1) != CHAR_RBRACE)
+       return REG_BADBR;
+      r += 2;
+      if (r < ctx->re_end && *r == CHAR_STAR)
+       {
+         /* This is reserved for future extensions. */
+         return REG_BADRPT;
+       }
+    }
+
+  if (minimal)
+    ctx->num_reorder_tags++;
+
+  if (!result) goto parse_bound_exit;
+  /* Create the AST node(s). */
+  /* Originally, if min == 0 && max == 0, we immediately replace the whole
+     iteration with EMPTY.  This unfortunately drops any submatches, and
+     messes up setting the pmatch values (we can get tags of -1, and
+     tag values in the billions).  So we leave it and process this case as
+     usual, and wait until tre_expand_ast() to replace with EMPTY */
+#ifdef TRE_APPROX
+  if (min < 0 && max < 0)
+    /* Only approximate parameters set, no repetitions. */
+    min = max = 1;
+#endif /* TRE_APPROX */
+
+  *result = tre_ast_new_iter(ctx->mem, *result, min, max, minimal);
+  if (!*result)
+    return REG_ESPACE;
+
+#ifdef TRE_APPROX
+  /* If approximate matching parameters are set, add them to the
+     iteration node. */
+  if (approx || costs_set || counts_set)
+    {
+      int *params;
+      tre_iteration_t *iter = (*result)->obj;
+
+      if (costs_set || counts_set)
+       {
+         if (limit_ins == TRE_PARAM_UNSET)
+           {
+             if (cost_ins == TRE_PARAM_UNSET)
+               limit_ins = 0;
+             else
+               limit_ins = INT_MAX;
+           }
+
+         if (limit_del == TRE_PARAM_UNSET)
+           {
+             if (cost_del == TRE_PARAM_UNSET)
+               limit_del = 0;
+             else
+               limit_del = INT_MAX;
+           }
+
+         if (limit_subst == TRE_PARAM_UNSET)
+           {
+             if (cost_subst == TRE_PARAM_UNSET)
+               limit_subst = 0;
+             else
+               limit_subst = INT_MAX;
+           }
+       }
+
+      if (cost_max == TRE_PARAM_UNSET)
+       cost_max = INT_MAX;
+      if (limit_err == TRE_PARAM_UNSET)
+       limit_err = INT_MAX;
+
+      ctx->have_approx = 1;
+      params = tre_mem_alloc(ctx->mem, sizeof(*params) * TRE_PARAM_LAST);
+      if (!params)
+       return REG_ESPACE;
+      for (i = 0; i < TRE_PARAM_LAST; i++)
+       params[i] = TRE_PARAM_UNSET;
+      params[TRE_PARAM_COST_INS] = cost_ins;
+      params[TRE_PARAM_COST_DEL] = cost_del;
+      params[TRE_PARAM_COST_SUBST] = cost_subst;
+      params[TRE_PARAM_COST_MAX] = cost_max;
+      params[TRE_PARAM_MAX_INS] = limit_ins;
+      params[TRE_PARAM_MAX_DEL] = limit_del;
+      params[TRE_PARAM_MAX_SUBST] = limit_subst;
+      params[TRE_PARAM_MAX_ERR] = limit_err;
+      iter->params = params;
+    }
+#endif /* TRE_APPROX */
+
+parse_bound_exit:
+#ifdef TRE_APPROX
+  DPRINT(("tre_parse_bound: min %d, max %d, costs [%d,%d,%d, total %d], "
+         "limits [%d,%d,%d, total %d]\n",
+         min, max, cost_ins, cost_del, cost_subst, cost_max,
+         limit_ins, limit_del, limit_subst, limit_err));
+#else /* !TRE_APPROX */
+  DPRINT(("tre_parse_bound: min %d, max %d\n", min, max));
+#endif /* !TRE_APPROX */
+
+
+  ctx->re = r;
+  return REG_OK;
+}
+
+/* Previously, we had PARSE_RESTORE_CFLAGS restore the cflags, but for
+   non-self-contained options, like (?i), this causes ((?i)fu)bar to be
+   treated more like ((?i)fu(?-i)bar), so the pmatch value is incorrect.
+   Because we now set up tags for even non-capturing parenthesized
+   subexpressions, we always call PARSE_MARK_FOR_SUBMATCH.  So if we
+   pass the unmodified version of cflags to PARSE_MARK_FOR_SUBMATCH and
+   have it restore cflags after the subexpression, we don't need to have
+   a separate PARSE_RESTORE_CFLAGS, and then after processing the
+   non-self-contained option, we can call PARSE_ATOM instead of PARSE_RE.
+   This has the side-benefit of now matching the perl behavior: the RE
+   foo(?i)bar|zap is foo(?i)bar OR (?i)zap instead of TRE previous behavior
+   of foo AND (?i) (bar OR zap). */
+typedef enum {
+  PARSE_RE = 0,
+  PARSE_ATOM,
+  PARSE_MARK_FOR_SUBMATCH,
+  PARSE_BRANCH,
+  PARSE_PIECE,
+  PARSE_CATENATION,
+  PARSE_POST_CATENATION,
+  PARSE_UNION,
+  PARSE_POST_UNION,
+  PARSE_POSTFIX,
+} tre_parse_re_stack_symbol_t;
+
+
+reg_errcode_t
+tre_parse(tre_parse_ctx_t *ctx)
+{
+  tre_ast_node_t *result = NULL;
+  tre_parse_re_stack_symbol_t symbol;
+  reg_errcode_t status = REG_OK;
+  tre_stack_t *stack = ctx->stack;
+  int bottom = tre_stack_num_objects(stack);
+  int depth = 0;
+  int temporary_cflags = 0;
+  int bre_branch_begin;
+#ifdef TRE_DEBUG
+  const tre_char_t *tmp_re;
+#endif
+
+  DPRINT(("tre_parse: parsing '%.*" STRF "', len = %d cflags = 0%o\n",
+         ctx->len, ctx->re, ctx->len, ctx->cflags));
+
+  if (ctx->len <= 0) return REG_EMPTY;
+  if (!ctx->nofirstsub)
+    {
+      STACK_PUSH(stack, int, ctx->cflags);
+      STACK_PUSH(stack, int, ctx->submatch_id);
+      STACK_PUSH(stack, int, PARSE_MARK_FOR_SUBMATCH);
+      ctx->submatch_id++;
+    }
+  STACK_PUSH(stack, int, 0); // bre_branch_begin
+  STACK_PUSH(stack, int, PARSE_RE);
+  ctx->re_start = ctx->re;
+  ctx->re_end = ctx->re + ctx->len;
+
+
+  /* The following is basically just a recursive descent parser.  I use
+     an explicit stack instead of recursive functions mostly because of
+     two reasons: compatibility with systems which have an overflowable
+     call stack, and efficiency (both in lines of code and speed).  */
+  while (tre_stack_num_objects(stack) > bottom)
+    {
+      symbol = tre_stack_pop_int(stack);
+      switch (symbol)
+       {
+       case PARSE_RE:
+         /* Parse a full regexp.  A regexp is one or more branches,
+            separated by the union operator `|'. */
+         bre_branch_begin = tre_stack_pop_int(stack);
+         if (
+#ifdef REG_LITERAL
+             !(ctx->cflags & REG_LITERAL) &&
+#endif /* REG_LITERAL */
+             ctx->cflags & (REG_EXTENDED | REG_ENHANCED))
+           STACK_PUSHX(stack, int, PARSE_UNION);
+         STACK_PUSHX(stack, int, bre_branch_begin);
+         STACK_PUSHX(stack, int, PARSE_BRANCH);
+         break;
+
+       case PARSE_BRANCH:
+         /* Parse a branch.  A branch is one or more pieces, concatenated.
+            A piece is an atom possibly followed by a postfix operator. */
+         bre_branch_begin = tre_stack_pop_int(stack);
+         STACK_PUSHX(stack, int, PARSE_CATENATION);
+         STACK_PUSHX(stack, int, bre_branch_begin);
+         STACK_PUSHX(stack, int, PARSE_PIECE);
+         break;
+
+       case PARSE_PIECE:
+         /* Parse a piece.  A piece is an atom possibly followed by one
+            or more postfix operators. */
+         bre_branch_begin = tre_stack_pop_int(stack);
+         STACK_PUSHX(stack, int, PARSE_POSTFIX);
+         STACK_PUSHX(stack, int, bre_branch_begin);
+         STACK_PUSHX(stack, int, PARSE_ATOM);
+         break;
+
+       case PARSE_CATENATION:
+         /* If the expression has not ended, parse another piece. */
+         {
+           tre_char_t c;
+           if (ctx->re >= ctx->re_end)
+             break;
+           c = *ctx->re;
+#ifdef REG_LITERAL
+           if (!(ctx->cflags & REG_LITERAL))
+             {
+#endif /* REG_LITERAL */
+               if ((ctx->cflags & REG_EXTENDED && c == CHAR_PIPE) ||
+                   ((ctx->cflags & (REG_EXTENDED | REG_ENHANCED)) == REG_ENHANCED
+                   && ctx->re + 1 < ctx->re_end && c == CHAR_BACKSLASH &&
+                   *(ctx->re + 1) == CHAR_PIPE))
+                 break;
+               if ((ctx->cflags & REG_EXTENDED
+                    && c == CHAR_RPAREN && depth > 0)
+                   || (!(ctx->cflags & REG_EXTENDED)
+                       && ctx->re + 1 < ctx->re_end && c == CHAR_BACKSLASH
+                           && *(ctx->re + 1) == CHAR_RPAREN))
+                 {
+                   if (!(ctx->cflags & REG_EXTENDED) && depth == 0)
+                     return REG_EPAREN;
+                   DPRINT(("tre_parse:   group end: '%.*" STRF "'\n",
+                           REST(ctx->re)));
+                   depth--;
+                   if (!(ctx->cflags & (REG_EXTENDED | REG_ENHANCED)))
+                     ctx->re += 2;
+                   break;
+                 }
+#ifdef REG_LITERAL
+             }
+#endif /* REG_LITERAL */
+
+#ifdef REG_LEFT_ASSOC
+           if (ctx->cflags & REG_LEFT_ASSOC)
+             {
+               /* Left associative concatenation. */
+               STACK_PUSHX(stack, int, PARSE_CATENATION);
+               STACK_PUSHX(stack, voidptr, result);
+               STACK_PUSHX(stack, int, PARSE_POST_CATENATION);
+               STACK_PUSHX(stack, int, 0); // bre_branch_begin
+               STACK_PUSHX(stack, int, PARSE_PIECE);
+             }
+           else
+#endif /* REG_LEFT_ASSOC */
+             {
+               /* Default case, right associative concatenation. */
+               STACK_PUSHX(stack, voidptr, result);
+               STACK_PUSHX(stack, int, PARSE_POST_CATENATION);
+               STACK_PUSHX(stack, int, PARSE_CATENATION);
+               STACK_PUSHX(stack, int, 0); // bre_branch_begin
+               STACK_PUSHX(stack, int, PARSE_PIECE);
+             }
+           break;
+         }
+
+       case PARSE_POST_CATENATION:
+         {
+           tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
+           tre_ast_node_t *tmp_node;
+           tmp_node = tre_ast_new_catenation(ctx->mem, tree, result);
+           if (!tmp_node)
+             return REG_ESPACE;
+           result = tmp_node;
+           break;
+         }
+
+       case PARSE_UNION:
+         if (ctx->re >= ctx->re_end)
+           break;
+#ifdef REG_LITERAL
+         if (ctx->cflags & REG_LITERAL)
+           break;
+#endif /* REG_LITERAL */
+         if (!(ctx->cflags & REG_EXTENDED))
+           {
+             if (*ctx->re != CHAR_BACKSLASH || ctx->re + 1 >= ctx->re_end)
+               break;
+             ctx->re++;
+           }
+         switch (*ctx->re)
+           {
+           case CHAR_PIPE:
+             DPRINT(("tre_parse:       union: '%.*" STRF "'\n",
+                     REST(ctx->re)));
+             STACK_PUSHX(stack, int, PARSE_UNION);
+             STACK_PUSHX(stack, voidptr, (void *)ctx->re);
+             STACK_PUSHX(stack, voidptr, result);
+             STACK_PUSHX(stack, int, PARSE_POST_UNION);
+             /* We need to pass a boolean (eventually) to PARSE_ATOM to
+                indicate if this is the beginning of a BRE extended branch. */
+             STACK_PUSHX(stack, int, (ctx->cflags & (REG_EXTENDED | REG_ENHANCED)) == REG_ENHANCED); // bre_branch_begin
+             STACK_PUSHX(stack, int, PARSE_BRANCH);
+             ctx->re++;
+             break;
+
+           case CHAR_RPAREN:
+             ctx->re++;
+             break;
+
+           default:
+             if (!(ctx->cflags & REG_EXTENDED))
+               ctx->re--;
+             break;
+           }
+         break;
+
+       case PARSE_POST_UNION:
+         {
+           tre_ast_node_t *tmp_node;
+           tre_ast_node_t *tree = tre_stack_pop_voidptr(stack);
+           const tre_char_t *pipechar = tre_stack_pop_voidptr(stack);
+           /* error on empty expression at end of union */
+           if (pipechar == ctx->re - 1)
+             {
+               return REG_EMPTY;
+             }
+           tmp_node = tre_ast_new_union(ctx->mem, tree, result);
+           if (!tmp_node)
+             return REG_ESPACE;
+           result = tmp_node;
+           break;
+         }
+
+       case PARSE_POSTFIX:
+         /* Parse postfix operators. */
+         if (ctx->re >= ctx->re_end)
+           break;
+#ifdef REG_LITERAL
+         if (ctx->cflags & REG_LITERAL)
+           break;
+#endif /* REG_LITERAL */
+         int minimal = (ctx->cflags & REG_UNGREEDY) ? 1 : 0;
+         int rep_min = 0;
+         int rep_max = -1;
+#ifdef TRE_DEBUG
+         int lbrace_off;
+#endif
+         switch (*ctx->re)
+           {
+           case CHAR_PLUS:
+           case CHAR_QUESTIONMARK:
+             if (!(ctx->cflags & REG_EXTENDED))
+               break;
+               /*FALLTHROUGH*/
+           case CHAR_STAR:
+             {
+               tre_ast_node_t *tmp_node;
+#ifdef TRE_DEBUG
+               const char *tstr = "star";
+               tmp_re = ctx->re;
+#endif
+
+       handle_plus_or_question:
+               /* error on iteration of raw assertion (not in subexpression) */
+               if (result->type == LITERAL && result->submatch_id < 0 &&
+                   IS_ASSERTION((tre_literal_t *)result->obj))
+                 {
+                   if (!(ctx->cflags & REG_EXTENDED)) break;
+                   return REG_BADRPT;
+                 }
+               if (*ctx->re == CHAR_PLUS)
+                 {
+                   rep_min = 1;
+#ifdef TRE_DEBUG
+                   tstr = "plus";
+#endif
+                 }
+               if (*ctx->re == CHAR_QUESTIONMARK)
+                 {
+                   rep_max = 1;
+#ifdef TRE_DEBUG
+                   tstr = "questionmark";
+#endif
+                 }
+
+               if (ctx->cflags & REG_EXTENDED)
+                 {
+                   if (ctx->re + 1 < ctx->re_end)
+                     {
+                       if (*(ctx->re + 1) == CHAR_QUESTIONMARK)
+                         {
+                           /* Process the question mark only in enhanced mode.
+                              Otherwise, the question mark is an error in ERE */
+                           if (ctx->cflags & REG_ENHANCED)
+                             {
+                               minimal = !(ctx->cflags & REG_UNGREEDY);
+                               ctx->re++;
+                             }
+                           else return REG_BADRPT;
+                         }
+                       else if (*(ctx->re + 1) == CHAR_STAR
+                                || *(ctx->re + 1) == CHAR_PLUS)
+                         {
+                           /* These are reserved for future extensions. */
+                           return REG_BADRPT;
+                         }
+                     }
+                 }
+               else
+                 {
+                   if (ctx->re + 1 < ctx->re_end && *(ctx->re + 1) == CHAR_STAR)
+                     {
+                       /* This is reserved for future extensions. */
+                       return REG_BADRPT;
+                     }
+                   if (ctx->re + 2 < ctx->re_end)
+                     {
+                       if (*(ctx->re + 1) == CHAR_BACKSLASH && *(ctx->re + 1) == CHAR_QUESTIONMARK)
+                         {
+                           /* Process the question mark only in enhanced mode.
+                              Otherwise, the question mark is a literal in BRE */
+                           if (ctx->cflags & REG_ENHANCED)
+                             {
+                               minimal = !(ctx->cflags & REG_UNGREEDY);
+                               ctx->re += 2;
+                             }
+                         }
+                       else if (*(ctx->re + 1) == CHAR_BACKSLASH && *(ctx->re + 2) == CHAR_PLUS)
+                         {
+                           /* This is reserved for future extensions. */
+                           return REG_BADRPT;
+                         }
+                     }
+                 }
+
+               if (minimal)
+                 ctx->num_reorder_tags++;
+
+               DPRINT(("tre_parse: %s %s: '%.*" STRF "'\n",
+                       minimal ? "  minimal" : "greedy", tstr, REST(tmp_re)));
+               if (result == NULL)
+                 {
+                   if (ctx->cflags & REG_EXTENDED) return REG_BADRPT;
+                   else goto parse_literal;
+                 }
+               ctx->re++;
+               tmp_node = tre_ast_new_iter(ctx->mem, result, rep_min, rep_max,
+                                           minimal);
+               if (tmp_node == NULL)
+                 return REG_ESPACE;
+               result = tmp_node;
+
+               /* Set the iterator with a submatch id in the invisible range
+                * (which will be overridden if a real submatch is needed) */
+               result->submatch_id = ctx->submatch_id_invisible++;
+
+#if 0
+               /* We don't allow multiple postfixes, but this might be needed
+                  to support approximate matching */
+               STACK_PUSHX(stack, int, PARSE_POSTFIX);
+#endif
+             }
+             break;
+
+           case CHAR_BACKSLASH:
+             /* "\{" is special without REG_EXTENDED */
+             /* "\+" and "\?" are special with REG_ENHANCED for BRE */
+             if (!(ctx->cflags & REG_EXTENDED)
+                 && ctx->re + 1 < ctx->re_end)
+               {
+                 switch (*(ctx->re + 1))
+                   {
+                   case CHAR_LBRACE:
+                     ctx->re++;
+#ifdef TRE_DEBUG
+                     lbrace_off = 2;
+#endif
+                     goto parse_brace;
+                   case CHAR_PLUS:
+                   case CHAR_QUESTIONMARK:
+                     if (ctx->cflags & REG_ENHANCED)
+                       {
+#ifdef TRE_DEBUG
+                         tmp_re = ctx->re;
+#endif
+                         ctx->re++;
+                         goto handle_plus_or_question;
+                       }
+                     break;
+                   }
+                 break;
+               }
+             else
+               break;
+
+           case CHAR_LBRACE:
+             {
+               int raw_assertion;
+
+               /* "{" is literal without REG_EXTENDED */
+               if (!(ctx->cflags & REG_EXTENDED))
+                 break;
+#ifdef TRE_DEBUG
+               lbrace_off = 1;
+#endif
+
+           parse_brace:
+               /* error on iteration of raw assertion (not in subexpression),
+                  but wait until after parsing bounds */
+               raw_assertion = (result->type == LITERAL
+                                && result->submatch_id < 0
+                                && IS_ASSERTION((tre_literal_t *)result->obj));
+               ctx->re++;
+
+               status = tre_parse_bound(ctx, &result);
+#ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
+               /* For ERE, if status is REG_NOMATCH, this mean the lbrace
+                  is to be treated as a literal. */
+               if (status == REG_NOMATCH)
+                 {
+                   ctx->re--;
+                   break;
+                 }
+#endif /* ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
+               DPRINT(("tre_parse:     bound: '%.*" STRF "'\n",
+                       REST(ctx->re - lbrace_off)));
+               if (status != REG_OK)
+                 return status;
+               if (raw_assertion) return REG_BADRPT;
+
+               /* Set the iterator with a submatch id in the invisible range
+                * (which will be overridden if a real submatch is needed) */
+               if (result->type == ITERATION)
+                 result->submatch_id = ctx->submatch_id_invisible++;
+
+#if 0
+               /* We don't allow multiple postfixes, but this might be needed
+                  to support approximate matching */
+               STACK_PUSHX(stack, int, PARSE_POSTFIX);
+#endif
+               break;
+             }
+           }
+         break;
+
+       case PARSE_ATOM:
+         {
+           /* Parse an atom.  An atom is a regular expression enclosed in `()',
+              an empty set of `()', a bracket expression, `.', `^', `$',
+              a `\' followed by a character, or a single character. */
+
+           /* The stack contains a boolean value, whether PARSE_ATOM is
+              being called just after the start of a group (left paren)
+              in a BRE */
+           bre_branch_begin = tre_stack_pop_int(stack);
+
+           /* End of regexp? (empty string). */
+           if (ctx->re >= ctx->re_end)
+             goto parse_literal;
+
+#ifdef REG_LITERAL
+           if (ctx->cflags & REG_LITERAL)
+             goto parse_literal;
+#endif /* REG_LITERAL */
+
+           switch (*ctx->re)
+             {
+             case CHAR_LPAREN:  /* parenthesized subexpression */
+
+               /* Handle "(?...)" extensions.  They work in a way similar
+                  to Perls corresponding extensions. */
+               if ((ctx->cflags & (REG_EXTENDED|REG_ENHANCED)) ==
+                   (REG_EXTENDED|REG_ENHANCED)
+                   && *(ctx->re + 1) == CHAR_QUESTIONMARK)
+                 {
+                   int new_cflags = ctx->cflags;
+                   int bit = 1;
+                   int invisible_submatch = 0;
+                   DPRINT(("tre_parse: extension: '%.*" STRF "'\n",
+                           REST(ctx->re)));
+                   ctx->re += 2;
+                   while (/*CONSTCOND*/1)
+                     {
+                       if (*ctx->re == L'i')
+                         {
+                           DPRINT(("tre_parse:     icase: '%.*" STRF "'\n",
+                                   REST(ctx->re)));
+                           if (bit)
+                             new_cflags |= REG_ICASE;
+                           else
+                             new_cflags &= ~REG_ICASE;
+                           ctx->re++;
+                         }
+                       else if (*ctx->re == L'n')
+                         {
+                           DPRINT(("tre_parse:   newline: '%.*" STRF "'\n",
+                                   REST(ctx->re)));
+                           if (bit)
+                             new_cflags |= REG_NEWLINE;
+                           else
+                             new_cflags &= ~REG_NEWLINE;
+                           ctx->re++;
+                         }
+#ifdef REG_LEFT_ASSOC
+                       else if (*ctx->re == L'l')
+                         {
+                           DPRINT(("tre_parse: left assoc: '%.*" STRF "'\n",
+                                   REST(ctx->re)));
+                           if (bit)
+                             new_cflags |= REG_LEFT_ASSOC;
+                           else
+                             new_cflags &= ~REG_LEFT_ASSOC;
+                           ctx->re++;
+                         }
+#endif /* REG_LEFT_ASSOC */
+#ifdef REG_UNGREEDY
+                       else if (*ctx->re == L'U')
+                         {
+                           DPRINT(("tre_parse:    ungreedy: '%.*" STRF "'\n",
+                                   REST(ctx->re)));
+                           if (bit)
+                             new_cflags |= REG_UNGREEDY;
+                           else
+                             new_cflags &= ~REG_UNGREEDY;
+                           ctx->re++;
+                         }
+#endif /* REG_UNGREEDY */
+                       else if (*ctx->re == CHAR_MINUS)
+                         {
+                           DPRINT(("tre_parse:  turn off: '%.*" STRF "'\n",
+                                   REST(ctx->re)));
+                           ctx->re++;
+                           bit = 0;
+                         }
+                       else if (*ctx->re == CHAR_COLON)
+                         {
+                           DPRINT(("tre_parse:  no group: '%.*" STRF
+                                   "', (invisible submatch %d)\n",
+                                   REST(ctx->re), ctx->submatch_id_invisible));
+                           ctx->re++;
+                           depth++;
+                           invisible_submatch = 1;
+                           break;
+                         }
+                       else if (*ctx->re == CHAR_HASH)
+                         {
+                           DPRINT(("tre_parse:    comment: '%.*" STRF "'\n",
+                                   REST(ctx->re)));
+                           /* A comment can contain any character except a
+                              right parenthesis */
+                           while (*ctx->re != CHAR_RPAREN
+                                  && ctx->re < ctx->re_end)
+                             ctx->re++;
+                           if (*ctx->re == CHAR_RPAREN && ctx->re < ctx->re_end)
+                             {
+                               ctx->re++;
+                               break;
+                             }
+                           else
+                             return REG_BADPAT;
+                         }
+                       else if (*ctx->re == CHAR_RPAREN)
+                         {
+                           ctx->re++;
+                           break;
+                         }
+                       else
+                         return REG_BADRPT;
+                     }
+
+                   /* Turn on the cflags changes for the rest of the
+                      enclosing group. */
+                   if (invisible_submatch)
+                     {
+                       STACK_PUSHX(stack, int, ctx->cflags);
+                       STACK_PUSHX(stack, int, ctx->submatch_id_invisible);
+                       STACK_PUSHX(stack, int, PARSE_MARK_FOR_SUBMATCH);
+                       ctx->submatch_id_invisible++;
+                       STACK_PUSHX(stack, int, 0); // bre_branch_begin
+                       STACK_PUSHX(stack, int, PARSE_RE);
+                     }
+                   else {
+                       STACK_PUSHX(stack, int, 0); // bre_branch_begin
+                       STACK_PUSHX(stack, int, PARSE_ATOM);
+                   }
+                   ctx->cflags = new_cflags;
+                   break;
+                 }
+
+               if (ctx->cflags & REG_EXTENDED)
+                 {
+               parse_bre_lparen:
+                   DPRINT(("tre_parse: group begin: '%.*" STRF
+                           "', submatch %d\n", REST(ctx->re),
+                           ctx->submatch_id));
+                   ctx->re++;
+                   /* First parse a whole RE, then mark the resulting tree
+                      for submatching. */
+                   STACK_PUSHX(stack, int, ctx->cflags);
+                   STACK_PUSHX(stack, int, ctx->submatch_id);
+                   STACK_PUSHX(stack, int, PARSE_MARK_FOR_SUBMATCH);
+                   /* We need to pass a boolean (eventually) to PARSE_ATOM to
+                      indicate if this is the beginning of a BRE group. */
+                   STACK_PUSHX(stack, int, !(ctx->cflags & REG_EXTENDED));
+                   STACK_PUSHX(stack, int, PARSE_RE);
+                   ctx->submatch_id++;
+                   depth++;
+                 }
+               else
+                 goto parse_literal;
+               break;
+
+             case CHAR_RPAREN:  /* end of current subexpression */
+               if (ctx->cflags & REG_EXTENDED && depth > 0)
+                 {
+             parse_bre_rparen_empty:
+                   if (!(ctx->cflags & REG_EXTENDED) && depth == 0)
+                     return REG_EPAREN;
+                   DPRINT(("tre_parse:     empty: '%.*" STRF "'\n",
+                           REST(ctx->re)));
+                   /* We were expecting an atom, but instead the current
+                      subexpression was closed.  POSIX leaves the meaning of
+                      this to be implementation-defined.  We interpret this as
+                      an empty expression (which matches an empty string).  */
+                   result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+                   if (result == NULL)
+                     return REG_ESPACE;
+                   if (!(ctx->cflags & REG_EXTENDED))
+                     ctx->re--;
+                 }
+               else
+                 goto parse_literal;
+               break;
+
+             case CHAR_LBRACKET: /* bracket expression */
+               DPRINT(("tre_parse:     bracket: '%.*" STRF "'\n",
+                       REST(ctx->re)));
+               ctx->re++;
+               status = tre_parse_bracket(ctx, &result);
+               if (status != REG_OK)
+                 return status;
+               break;
+
+             case CHAR_BACKSLASH:
+               /* Deal with "\(", "\)" or "\{" for BREs */
+               if (!(ctx->cflags & REG_EXTENDED)
+                   && ctx->re + 1 < ctx->re_end)
+                 {
+                   if (*(ctx->re + 1) == CHAR_LPAREN)
+                     {
+                       ctx->re++;
+                       goto parse_bre_lparen;
+                     }
+                   else if (*(ctx->re + 1) == CHAR_RPAREN)
+                     {
+                       ctx->re++;
+                       goto parse_bre_rparen_empty;
+                     }
+                   if (*(ctx->re + 1) == CHAR_LBRACE) goto parse_literal;
+                 }
+
+               if (ctx->re + 1 >= ctx->re_end)
+                 /* Trailing backslash. */
+                 return REG_EESCAPE;
+
+               if (!(ctx->cflags & REG_ENHANCED))
+                 {
+                   DPRINT(("tre_parse:  unenhanced bleep: '%.*" STRF "'\n", REST(ctx->re)));
+                   ctx->re++;
+                   goto unenhanced_backslash;
+                 }
+
+               /* If a macro is used, parse the expanded macro recursively. */
+               {
+                 tre_char_t buf[64];
+                 tre_expand_macro(ctx->re + 1, ctx->re_end,
+                                  buf, elementsof(buf));
+                 if (buf[0] != 0)
+                   {
+                     tre_parse_ctx_t subctx;
+                     memcpy(&subctx, ctx, sizeof(subctx));
+                     subctx.re = buf;
+                     subctx.len = tre_strlen(buf);
+                     subctx.nofirstsub = 1;
+                     status = tre_parse(&subctx);
+                     if (status != REG_OK)
+                       return status;
+                     ctx->re += 2;
+                     ctx->position = subctx.position;
+                     result = subctx.result;
+                     break;
+                   }
+               }
+
+#ifdef REG_LITERAL
+               if (*(ctx->re + 1) == L'Q')
+                 {
+                   DPRINT(("tre_parse: tmp literal: '%.*" STRF "'\n",
+                           REST(ctx->re)));
+                   ctx->cflags |= REG_LITERAL;
+                   temporary_cflags |= REG_LITERAL;
+                   ctx->re += 2;
+                   STACK_PUSHX(stack, int, 0);
+                   STACK_PUSHX(stack, int, PARSE_ATOM);
+                   break;
+                 }
+#endif /* REG_LITERAL */
+
+               DPRINT(("tre_parse:  bleep: '%.*" STRF "'\n", REST(ctx->re)));
+               ctx->re++;
+               switch (*ctx->re)
+                 {
+                 case L'b':
+                   result = tre_ast_new_literal(ctx->mem, ASSERTION,
+                                                ASSERT_AT_WB, -1);
+                   ctx->re++;
+                   break;
+                 case L'B':
+                   result = tre_ast_new_literal(ctx->mem, ASSERTION,
+                                                ASSERT_AT_WB_NEG, -1);
+                   ctx->re++;
+                   break;
+                 case L'<':
+                   result = tre_ast_new_literal(ctx->mem, ASSERTION,
+                                                ASSERT_AT_BOW, -1);
+                   ctx->re++;
+                   break;
+                 case L'>':
+                   result = tre_ast_new_literal(ctx->mem, ASSERTION,
+                                                ASSERT_AT_EOW, -1);
+                   ctx->re++;
+                   break;
+                 case L'x':
+                   ctx->re++;
+                   if (ctx->re[0] != CHAR_LBRACE && ctx->re < ctx->re_end)
+                     {
+                       /* 8 bit hex char. */
+                       char tmp[3] = {0, 0, 0};
+                       long val;
+                       DPRINT(("tre_parse:  8 bit hex: '%.*" STRF "'\n",
+                               REST(ctx->re - 2)));
+
+                       if (tre_isxdigit_l(ctx->re[0], ctx->loc) &&
+                           ctx->re < ctx->re_end)
+                         {
+                           tmp[0] = (char)ctx->re[0];
+                           ctx->re++;
+                         }
+                       if (tre_isxdigit_l(ctx->re[0], ctx->loc) &&
+                           ctx->re < ctx->re_end)
+                         {
+                           tmp[1] = (char)ctx->re[0];
+                           ctx->re++;
+                         }
+                       val = strtol(tmp, NULL, 16);
+                       result = tre_ast_new_literal(ctx->mem, (int)val,
+                                                    (int)val, ctx->position);
+                       ctx->position++;
+                       break;
+                     }
+                   else if (ctx->re < ctx->re_end)
+                     {
+                       /* Wide char. */
+                       char tmp[32];
+                       long val;
+                       int i = 0;
+                       ctx->re++;
+                       while (ctx->re_end - ctx->re >= 0)
+                         {
+                           if (ctx->re[0] == CHAR_RBRACE)
+                             break;
+                           if (tre_isxdigit_l(ctx->re[0], ctx->loc))
+                             {
+                               tmp[i] = (char)ctx->re[0];
+                               i++;
+                               ctx->re++;
+                               continue;
+                             }
+                           return REG_EBRACE;
+                         }
+                       ctx->re++;
+                       tmp[i] = 0;
+                       val = strtol(tmp, NULL, 16);
+                       result = tre_ast_new_literal(ctx->mem, (int)val, (int)val,
+                                                    ctx->position);
+                       ctx->position++;
+                       break;
+                     }
+                   /*FALLTHROUGH*/
+
+                 default:
+                 unenhanced_backslash:
+                   if ((ctx->cflags & (REG_EXTENDED | REG_ENHANCED)) !=
+                       REG_EXTENDED &&
+                       tre_isdigit_l(*ctx->re, ctx->loc) && *ctx->re != L'0')
+                     {
+                       /* Back reference (only in BRE or enhanced). */
+                       int val = *ctx->re - L'0';
+                       DPRINT(("tre_parse:     backref: '%.*" STRF "'\n",
+                               REST(ctx->re - 1)));
+                       result = tre_ast_new_literal(ctx->mem, BACKREF, val,
+                                                    ctx->position);
+                       if (result == NULL)
+                         return REG_ESPACE;
+
+                       /* Set the backref with a submatch id in the invisible
+                        * range (which will be overridden if a real submatch
+                        * is needed) */
+                       result->submatch_id = ctx->submatch_id_invisible++;
+
+                       ctx->position++;
+                       ctx->num_reorder_tags++;
+                       ctx->max_backref = MAX(val, ctx->max_backref);
+                       ctx->re++;
+                     }
+                   else
+                     {
+                       /* Escaped character. */
+                       DPRINT(("tre_parse:     escaped: '%.*" STRF "'\n",
+                               REST(ctx->re - 1)));
+                       result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re,
+                                                    ctx->position);
+                       ctx->position++;
+                       ctx->re++;
+                     }
+                   break;
+                 }
+               if (result == NULL)
+                 return REG_ESPACE;
+               break;
+
+             case CHAR_PERIOD:  /* the any-symbol */
+               DPRINT(("tre_parse:       any: '%.*" STRF "'\n",
+                       REST(ctx->re)));
+               if (ctx->cflags & REG_NEWLINE)
+                 {
+                   tre_ast_node_t *tmp1;
+                   tre_ast_node_t *tmp2;
+                   tmp1 = tre_ast_new_literal(ctx->mem, 0, L'\n' - 1,
+                                              ctx->position);
+                   if (!tmp1)
+                     return REG_ESPACE;
+                   tmp2 = tre_ast_new_literal(ctx->mem, L'\n' + 1, TRE_CHAR_MAX,
+                                              ctx->position + 1);
+                   if (!tmp2)
+                     return REG_ESPACE;
+                   result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
+                   if (!result)
+                     return REG_ESPACE;
+                   ctx->position += 2;
+                 }
+               else
+                 {
+                   result = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX,
+                                                ctx->position);
+                   if (!result)
+                     return REG_ESPACE;
+                   ctx->position++;
+                 }
+               ctx->re++;
+               break;
+
+             case CHAR_CARET:   /* beginning of line assertion */
+               /* '^' has a special meaning everywhere in EREs, at the
+                  beginning of the RE and after \( is BREs.  It is also
+                  special in enhanced BREs at the beginning of each branches
+                  of a union */
+               if (ctx->cflags & REG_EXTENDED
+                   || bre_branch_begin
+                   || ctx->re == ctx->re_start)
+                 {
+                   DPRINT(("tre_parse:       BOL: '%.*" STRF "'\n",
+                           REST(ctx->re)));
+                   result = tre_ast_new_literal(ctx->mem, ASSERTION,
+                                                ASSERT_AT_BOL, -1);
+                   if (result == NULL)
+                     return REG_ESPACE;
+                   ctx->re++;
+                 }
+               else
+                 goto parse_literal;
+               break;
+
+             case CHAR_DOLLAR:  /* end of line assertion. */
+               /* '$' is special everywhere in EREs, and in the end of the
+                  string and before \) is BREs. */
+               if (ctx->cflags & REG_EXTENDED
+                   || (ctx->re + 2 < ctx->re_end
+                       && *(ctx->re + 1) == CHAR_BACKSLASH
+                       && *(ctx->re + 2) == CHAR_RPAREN)
+                   || ctx->re + 1 == ctx->re_end)
+                 {
+                   DPRINT(("tre_parse:       EOL: '%.*" STRF "'\n",
+                           REST(ctx->re)));
+                   result = tre_ast_new_literal(ctx->mem, ASSERTION,
+                                                ASSERT_AT_EOL, -1);
+                   if (result == NULL)
+                     return REG_ESPACE;
+                   ctx->re++;
+                 }
+               else
+                 goto parse_literal;
+               break;
+
+             default:
+             parse_literal:
+
+               if (temporary_cflags && ctx->re + 1 < ctx->re_end
+                   && *ctx->re == CHAR_BACKSLASH && *(ctx->re + 1) == L'E')
+                 {
+                   DPRINT(("tre_parse:  end tmps: '%.*" STRF "'\n",
+                           REST(ctx->re)));
+                   ctx->cflags &= ~temporary_cflags;
+                   temporary_cflags = 0;
+                   ctx->re += 2;
+                   if (ctx->re < ctx->re_end)
+                     {
+                       STACK_PUSHX(stack, int, 0);
+                       STACK_PUSHX(stack, int, PARSE_ATOM);
+                     }
+                   else
+                     {
+                       result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+                       if (!result) return REG_ESPACE;
+                     }
+                   break;
+                 }
+
+
+               /* We are expecting an atom.  If the subexpression (or the whole
+                  regexp ends here, we interpret it as an empty expression
+                  (which matches an empty string), which is an error.
+                  Iterations of an empty expression is also an error. */
+#ifdef REG_LITERAL
+               if (!(ctx->cflags & REG_LITERAL))
+                 {
+#endif /* REG_LITERAL */
+                   /* error on end of string */
+                   if (ctx->re >= ctx->re_end) return depth > 0 ? REG_EPAREN
+                                                      : REG_EMPTY;
+                   /* error on unions and iterations of empty expressions */
+                   if (ctx->cflags & REG_EXTENDED)
+                     {
+                       if (ctx->re < ctx->re_end)
+                         {
+                           if (*ctx->re == CHAR_PIPE) return REG_EMPTY;
+                           if (*ctx->re == CHAR_LBRACE)
+                             {
+                               ctx->re++;
+                 empty_parse_bound:
+                               /* We need to parse the bound first and return
+                                  any error, before returning REG_BADRPT */
+                               status = tre_parse_bound(ctx, NULL);
+#ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
+                               /* For ERE, if REG_NOMATCH is returned, we
+                                  treat the lbrace as a literal. */
+                               if (status == REG_NOMATCH)
+                                 {
+                                   ctx->re--;
+                                   /* Drop down to literal-handling code */
+                                 }
+                               else
+                                 {
+#endif /* ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
+                                   if (status != REG_OK)
+                                     return status;
+                                   return REG_BADRPT;
+#ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
+                                 }
+#endif /* ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
+                             }
+#ifdef ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND
+                           else
+#endif /* ERE_LITERAL_LBRACE_ON_NON_NUMERIC_BOUND */
+                           if (*ctx->re == CHAR_STAR
+                               || *ctx->re == CHAR_PLUS
+                               || *ctx->re == CHAR_QUESTIONMARK)
+                             {
+                               return REG_BADRPT;
+                             }
+                         }
+                     }
+                   else if (ctx->re + 1 < ctx->re_end
+                            && *ctx->re == CHAR_BACKSLASH
+                            && *(ctx->re + 1) == CHAR_LBRACE)
+                     {
+                       ctx->re += 2;
+                       goto empty_parse_bound;
+                     }
+#ifdef REG_LITERAL
+                 }
+#endif /* REG_LITERAL */
+
+               DPRINT(("tre_parse:     literal: '%.*" STRF "'\n",
+                       REST(ctx->re)));
+               /* Note that we can't use an tre_isalpha() test here, since there
+                  may be characters which are alphabetic but neither upper or
+                  lower case. */
+               if (ctx->cflags & REG_ICASE
+                   && (tre_isupper_l(*ctx->re, ctx->loc) ||
+                   tre_islower_l(*ctx->re, ctx->loc)))
+                 {
+                   tre_ast_node_t *tmp1;
+                   tre_ast_node_t *tmp2;
+
+                   /* XXX - Can there be more than one opposite-case
+                      counterpoints for some character in some locale?  Or
+                      more than two characters which all should be regarded
+                      the same character if case is ignored?  If yes, there
+                      does not seem to be a portable way to detect it.  I guess
+                      that at least for multi-character collating elements there
+                      could be several opposite-case counterpoints, but they
+                      cannot be supported portably anyway. */
+                   tmp1 = tre_ast_new_literal(ctx->mem,
+                                              tre_toupper_l(*ctx->re, ctx->loc),
+                                              tre_toupper_l(*ctx->re, ctx->loc),
+                                              ctx->position);
+                   if (!tmp1)
+                     return REG_ESPACE;
+                   tmp2 = tre_ast_new_literal(ctx->mem,
+                                              tre_tolower_l(*ctx->re, ctx->loc),
+                                              tre_tolower_l(*ctx->re, ctx->loc),
+                                              ctx->position);
+                   if (!tmp2)
+                     return REG_ESPACE;
+                   result = tre_ast_new_union(ctx->mem, tmp1, tmp2);
+                   if (!result)
+                     return REG_ESPACE;
+                 }
+               else
+                 {
+                   result = tre_ast_new_literal(ctx->mem, *ctx->re, *ctx->re,
+                                                ctx->position);
+                   if (!result)
+                     return REG_ESPACE;
+                 }
+               ctx->position++;
+               ctx->re++;
+               break;
+             }
+           break;
+         }
+
+       case PARSE_MARK_FOR_SUBMATCH:
+         {
+           int submatch_id = tre_stack_pop_int(stack);
+
+           ctx->cflags = tre_stack_pop_int(stack); /* restore cflags */
+           if (result->submatch_id >= 0 &&
+               result->submatch_id < SUBMATCH_ID_INVISIBLE_START)
+             {
+               tre_ast_node_t *n, *tmp_node;
+               if (submatch_id >= SUBMATCH_ID_INVISIBLE_START)
+                 break;
+               n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+               if (n == NULL)
+                 return REG_ESPACE;
+               tmp_node = tre_ast_new_catenation(ctx->mem, n, result);
+               if (tmp_node == NULL)
+                 return REG_ESPACE;
+               tmp_node->num_submatches = result->num_submatches;
+               result = tmp_node;
+             }
+           result->submatch_id = submatch_id;
+           if (submatch_id < SUBMATCH_ID_INVISIBLE_START)
+             result->num_submatches++;
+           break;
+         }
+
+       default:
+         assert(0);
+         break;
+       }
+    }
+
+  /* Check for missing closing parentheses. */
+  if (depth > 0)
+    return REG_EPAREN;
+
+  ctx->result = result;
+
+  return REG_OK;
+}
+
+/* EOF */