regex/TRE/lib/tre-compile.c

   1 /*
   2   tre-compile.c - TRE regex compiler
   3
   4   This software is released under a BSD-style license.
   5   See the file LICENSE for details and copyright.
   6
   7 */
   8
   9 /*
  10   TODO:
  11    - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
  12      function calls.
  13 */
  14
  15
  16 #ifdef HAVE_CONFIG_H
  17 #include <config.h>
  18 #endif /* HAVE_CONFIG_H */
  19 #include <stdio.h>
  20 #include <assert.h>
  21 #include <string.h>
  22 #include <limits.h>
  23
  24 #include "tre-internal.h"
  25 #include "tre-mem.h"
  26 #include "tre-stack.h"
  27 #include "tre-ast.h"
  28 #include "tre-parse.h"
  29 #include "tre-compile.h"
  30 #include "tre.h"
  31 #include "tre-last-matched.h"
  32 #include "xmalloc.h"
  33
  34 /*
  35   The bit_ffs() macro in bitstring.h is flawed.  Replace it with a working one.
  36 */
  37 #undef bit_ffs
  38 #define bit_ffs(name, nbits, value) { \
  39         register bitstr_t *_name = name; \
  40         register int _byte, _nbits = nbits; \
  41         register int _stopbyte = _bit_byte(_nbits), _value = -1; \
  42         for (_byte = 0; _byte <= _stopbyte; ++_byte) \
  43                 if (_name[_byte]) { \
  44                         _value = _byte << 3; \
  45                         for (_stopbyte = _name[_byte]; !(_stopbyte&0x1); \
  46                             ++_value, _stopbyte >>= 1); \
  47                         break; \
  48                 } \
  49         *(value) = _value; \
  50 }
  51
  52 /*
  53   Algorithms to setup tags so that submatch addressing can be done.
  54 */
  55
  56
  57 #ifdef TRE_DEBUG
  58 static const char *tag_dir_str[] = {
  59   "minimize",
  60   "maximize",
  61   "left-maximize"
  62 };
  63
  64 static const char _indent[] = "  ";
  65
  66 static void
  67 print_indent(int indent)
  68 {
  69   while (indent-- > 0)
  70     DPRINT((_indent));
  71 }
  72
  73 static void print_last_matched_pre(tre_last_matched_pre_t *lm, int indent,
  74                                    int num_tags);
  75 static void
  76 print_last_match_branch_pre(tre_last_matched_branch_pre_t *branch, int indent,
  77                             int num_tags)
  78 {
  79   tre_last_matched_pre_t *u = branch->last_matched;
  80   int n_last_matched = 0;
  81
  82   while (u)
  83     {
  84       n_last_matched++;
  85       u = u->next;
  86     }
  87
  88   print_indent(indent);
  89   DPRINT(("BRANCH: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
  90           branch->tot_branches, branch->tot_last_matched, branch->tot_tags));
  91   print_indent(indent);
  92   DPRINT(("..n_last_matched=%d last_matched=%d\n", branch->n_last_matched,
  93           n_last_matched));
  94   if (branch->n_last_matched != n_last_matched)
  95     DPRINT(("*** mismatch between n_last_matched and unions ***\n"));
  96   if (branch->cmp_tag > 0)
  97     {
  98       int i;
  99       const char *sep = " tags=";
 100       print_indent(indent);
 101       DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
 102       for (i = 0; i < num_tags; i++)
 103         if (bit_test(branch->tags, i))
 104           {
 105             DPRINT(("%s%d", sep, i));
 106             sep = ",";
 107           }
 108       DPRINT(("\n"));
 109     }
 110
 111   u = branch->last_matched;
 112   indent++;
 113   while (u)
 114     {
 115       print_last_matched_pre(u, indent, num_tags);
 116       u = u->next;
 117     }
 118 }
 119
 120 static void
 121 print_last_matched_pre(tre_last_matched_pre_t *lm, int indent, int num_tags)
 122 {
 123   tre_last_matched_branch_pre_t *b = lm->branches;
 124   int n_branches = 0;
 125
 126   while (b)
 127     {
 128       n_branches++;
 129       b = b->next;
 130     }
 131
 132   print_indent(indent);
 133   DPRINT(("LAST_MATCHED: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
 134           lm->tot_branches, lm->tot_last_matched, lm->tot_tags));
 135   print_indent(indent);
 136   DPRINT(("..start_tag=%d n_branches=%d branches=%d\n", lm->start_tag,
 137           lm->n_branches, n_branches));
 138   if (lm->n_branches != n_branches)
 139     DPRINT(("*** mismatch between n and branches ***\n"));
 140
 141   b = lm->branches;
 142   indent++;
 143   while (b)
 144     {
 145       print_last_match_branch_pre(b, indent, num_tags);
 146       b = b->next;
 147     }
 148 }
 149
 150 static void print_last_matched(tre_last_matched_t *lm, int indent);
 151 static void
 152 print_last_match_branch(tre_last_matched_branch_t *branch, int indent)
 153 {
 154   tre_last_matched_t *u;
 155   int i;
 156
 157   print_indent(indent);
 158   DPRINT(("BRANCH: n_last_matched=%d\n", branch->n_last_matched));
 159   if (branch->cmp_tag > 0)
 160     {
 161       print_indent(indent);
 162       DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
 163       if (branch->n_tags > 0)
 164         {
 165           const char *sep = " tags=";
 166           for (i = 0; i < branch->n_tags; i++)
 167             {
 168               DPRINT(("%s%d", sep, branch->tags[i]));
 169               sep = ",";
 170             }
 171         }
 172       DPRINT(("\n"));
 173     }
 174
 175   u = branch->last_matched;
 176   indent++;
 177   for (i = branch->n_last_matched; i > 0; i--, u++)
 178     print_last_matched(u, indent);
 179 }
 180
 181 static void
 182 print_last_matched(tre_last_matched_t *lm, int indent)
 183 {
 184   int i;
 185   tre_last_matched_branch_t *b;
 186
 187   print_indent(indent);
 188   DPRINT(("LAST_MATCHED: n_branches=%d start_tag=%d\n", lm->n_branches,
 189           lm->start_tag));
 190
 191   b = lm->branches;
 192   indent++;
 193   for (i = lm->n_branches; i > 0; i--, b++)
 194     print_last_match_branch(b, indent);
 195 }
 196 #endif /* TRE_DEBUG */
 197
 198
 199 /* Merge the tre_last_matched_branch_pre_t of src into dst, creating a new
 200    one if needed.  If tag_id > 0, add that tag as well (a negative tag_id will
 201    create an unset tre_last_matched_branch_pre_t. */
 202 static reg_errcode_t
 203 tre_merge_branches(tre_mem_t mem, tre_ast_node_t *dst, tre_ast_node_t *src,
 204                    int tag_id, int num_tags)
 205 {
 206   tre_last_matched_branch_pre_t *db = dst->last_matched_branch;
 207   tre_last_matched_branch_pre_t *sb = (src ? src->last_matched_branch : NULL);
 208
 209   if (db)
 210     {
 211       if (sb)
 212         {
 213           bitstr_t *l = db->tags;
 214           bitstr_t *r = sb->tags;
 215           int i = bitstr_size(num_tags);
 216
 217           while(i-- > 0)
 218             *l++ |= *r++;
 219           /* db and sb are the info from two parallel sub-trees, so the tags
 220              must be mutually exclusive, and we can just add their numbers */
 221           db->n_tags += sb->n_tags;
 222           db->tot_tags += sb->tot_tags;
 223           if (db->last_matched)
 224             {
 225               if (sb->last_matched)
 226                 {
 227                   tre_last_matched_pre_t *u = db->last_matched;
 228
 229                   while(u->next)
 230                     u = u->next;
 231                   u->next = sb->last_matched;
 232                   db->n_last_matched += sb->n_last_matched;
 233                   db->tot_branches += sb->tot_branches;
 234                   db->tot_last_matched += sb->tot_last_matched;
 235                 }
 236             }
 237             else if (sb->last_matched)
 238               {
 239                 db->last_matched = sb->last_matched;
 240                 db->n_last_matched = sb->n_last_matched;
 241                 db->tot_branches = sb->tot_branches;
 242                 db->tot_last_matched = sb->tot_last_matched;
 243               }
 244         }
 245     }
 246   else
 247     db = sb;
 248
 249   if (tag_id != 0)
 250     {
 251       if (!db)
 252         {
 253           db = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
 254                               + bitstr_size(num_tags));
 255           if (db == NULL)
 256             return REG_ESPACE;
 257           db->tot_branches = 1;
 258         }
 259       if (tag_id > 0)
 260         {
 261           /* tag_id is a new tag, and shouldn't exist in db's tags,
 262              so we can always increment n_tags */
 263           bit_set(db->tags, tag_id);
 264           db->n_tags++;
 265           db->tot_tags++;
 266         }
 267     }
 268   dst->last_matched_branch = db;
 269   return REG_OK;
 270 }
 271
 272
 273 /* Inserts a catenation node to the root of the tree given in `node'.
 274    As the left child a new tag with number `tag_id' to `node' is added,
 275    and the right child is the old root. */
 276 static reg_errcode_t
 277 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
 278 {
 279   tre_catenation_t *c;
 280
 281   DPRINT(("add_tag_left: tag %d\n", tag_id));
 282
 283   c = tre_mem_alloc(mem, sizeof(*c));
 284   if (c == NULL)
 285     return REG_ESPACE;
 286   c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
 287   if (c->left == NULL)
 288     return REG_ESPACE;
 289   c->right = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
 290   if (c->right == NULL)
 291     return REG_ESPACE;
 292
 293   c->right->obj = node->obj;
 294   c->right->type = node->type;
 295   c->right->last_matched_branch = node->last_matched_branch;
 296   c->right->nullable = -1;
 297   c->right->submatch_id = -1;
 298   node->obj = c;
 299   node->type = CATENATION;
 300   node->original = c->right;
 301   return REG_OK;
 302 }
 303
 304 /* Inserts a catenation node to the root of the tree given in `node'.
 305    As the right child a new tag with number `tag_id' to `node' is added,
 306    and the left child is the old root. */
 307 static reg_errcode_t
 308 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
 309 {
 310   tre_catenation_t *c;
 311
 312   DPRINT(("tre_add_tag_right: tag %d\n", tag_id));
 313
 314   c = tre_mem_alloc(mem, sizeof(*c));
 315   if (c == NULL)
 316     return REG_ESPACE;
 317   c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
 318   if (c->right == NULL)
 319     return REG_ESPACE;
 320   c->left = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
 321   if (c->left == NULL)
 322     return REG_ESPACE;
 323
 324   c->left->obj = node->obj;
 325   c->left->type = node->type;
 326   c->left->last_matched_branch = node->last_matched_branch;
 327   c->left->nullable = -1;
 328   c->left->submatch_id = -1;
 329   node->obj = c;
 330   node->type = CATENATION;
 331   node->original = c->left;
 332   return REG_OK;
 333 }
 334
 335 typedef enum {
 336   ADDTAGS_RECURSE,
 337   ADDTAGS_RECURSE_NOT_TOP_UNION,
 338   ADDTAGS_AFTER_ITERATION,
 339   ADDTAGS_AFTER_UNION_LEFT,
 340   ADDTAGS_AFTER_UNION_RIGHT,
 341   ADDTAGS_AFTER_CAT_LEFT,
 342   ADDTAGS_AFTER_CAT_RIGHT,
 343   ADDTAGS_SET_SUBMATCH_END,
 344   ADDTAGS_UNION_RECURSE,
 345   ADDTAGS_UNION_RIGHT_RECURSE,
 346   ADDTAGS_AFTER_UNION_TOP,
 347 } tre_addtags_symbol_t;
 348
 349 enum {
 350   COPY_LAST_MATCHED_BRANCH,
 351   COPY_LAST_MATCHED_BRANCH_NEXT,
 352   COPY_LAST_MATCHED,
 353   COPY_LAST_MATCHED_NEXT,
 354 };
 355
 356
 357 #define REGSET_UNSET            ((unsigned)-1)
 358
 359 /* Go through `regset' and set submatch data for submatches that are
 360    using this tag. */
 361 static void
 362 tre_purge_regset(unsigned *regset, tre_tnfa_t *tnfa, int tag)
 363 {
 364   int i;
 365
 366   for (i = 0; regset[i] != REGSET_UNSET; i++)
 367     {
 368       int id = regset[i] / 2;
 369       int start = !(regset[i] % 2);
 370       if (id >= SUBMATCH_ID_INVISIBLE_START)
 371         continue;
 372       DPRINT(("  Using tag %d for %s offset of "
 373               "submatch %d\n", tag,
 374               start ? "start" : "end", id));
 375       if (start)
 376         tnfa->submatch_data[id].so_tag = tag;
 377       else
 378         tnfa->submatch_data[id].eo_tag = tag;
 379     }
 380   regset[0] = -1;
 381 }
 382
 383
 384 #define REGSET_HAS_STARTS       0x1
 385 #define REGSET_HAS_ENDS         0x2
 386
 387
 388 /* Adds tags to appropriate locations in the parse tree in `tree', so that
 389    subexpressions marked for submatch addressing can be traced. */
 390 static reg_errcode_t
 391 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
 392              tre_tnfa_t *tnfa)
 393 {
 394   reg_errcode_t status = REG_OK;
 395   tre_addtags_symbol_t symbol;
 396   tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
 397   int bottom = tre_stack_num_objects(stack);
 398   /* True for first pass (counting number of needed tags) */
 399   int first_pass = (mem == NULL || tnfa == NULL);
 400   unsigned *regset, *orig_regset;
 401   int regset_contains = 0;
 402   int num_tags = 0; /* Total number of tags. */
 403   int num_minimals = 0;  /* Number of special minimal tags. */
 404   int tag = 0;      /* The tag that is to be added next. */
 405   int next_tag = 1; /* Next tag to use after this one. */
 406   int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
 407   int *reorder_tags = NULL; /* Tag reorder array: a pair for each reorder,
 408                              * the first is the tag to reorder, the second
 409                              * is the tag after which the first is reordered */
 410   int *rtp;                 /* Pointer used to fill in reorder_tags and
 411                              * tag_order */
 412   int *to_reorder;          /* Transform array converting sequential order to
 413                              * that specified by reorder_tags */
 414   int id;
 415
 416   tre_tag_direction_t direction = TRE_TAG_LEFT_MAXIMIZE;
 417   if (!first_pass)
 418     {
 419       DPRINT(("Initializing direction to %s\n", tag_dir_str[direction]));
 420       tnfa->end_tag = 0;
 421       tnfa->minimal_tags[0] = -1;
 422     }
 423
 424   regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches
 425                    + tnfa->num_submatches_invisible + 1) * 2));
 426   if (regset == NULL)
 427     {
 428       status = REG_ESPACE;
 429       goto error_regset;
 430     }
 431   regset[0] = REGSET_UNSET;
 432   orig_regset = regset;
 433
 434   if (!first_pass)
 435     {
 436       /* Allocate all memory for reorder_tags, tag_order, to_seq_order and
 437        * to_reorder in one batch (assuming all are the same type) */
 438       rtp = reorder_tags = xmalloc(sizeof(*reorder_tags) *
 439                                    ((2 * tnfa->num_reorder_tags + 1) +
 440                                    tnfa->num_tags));
 441       if (reorder_tags == NULL)
 442         {
 443           status = REG_ESPACE;
 444           goto error_reorder_tags;
 445         }
 446       to_reorder = reorder_tags + (2 * tnfa->num_reorder_tags + 1);
 447     }
 448
 449   STACK_PUSH(stack, voidptr, node);
 450   STACK_PUSH(stack, int, ADDTAGS_RECURSE);
 451
 452   while (tre_stack_num_objects(stack) > bottom)
 453     {
 454       if (status != REG_OK)
 455         break;
 456
 457       symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
 458       switch (symbol)
 459         {
 460           int top_union;
 461
 462         case ADDTAGS_SET_SUBMATCH_END:
 463           {
 464             int i;
 465
 466             id = tre_stack_pop_int(stack);
 467             node = tre_stack_pop_voidptr(stack);
 468             /* Add end of this submatch to regset. */
 469             for (i = 0; regset[i] != REGSET_UNSET; i++);
 470             regset[i] = id * 2 + 1;
 471             regset[i + 1] = -1;
 472             regset_contains |= REGSET_HAS_ENDS;
 473
 474             /* Always put a tag after a minimal iterator. */
 475             if (minimal_tag >= 0)
 476               {
 477                 if (first_pass)
 478                   {
 479                     node->num_tags++;
 480                     DPRINT(("  ADDTAGS_SET_SUBMATCH_END: node->num_tags = %d\n",
 481                             node->num_tags));
 482                   }
 483                 else
 484                   {
 485                     int i;
 486                     status = tre_merge_branches(mem, node, NULL, tag,
 487                                                 tnfa->num_tags);
 488                     if (status != REG_OK)
 489                       break;
 490                     status = tre_add_tag_right(mem, node, tag);
 491                     if (status != REG_OK)
 492                       break;
 493                     tnfa->tag_directions[tag] = TRE_TAG_MINIMIZE;
 494                     DPRINT(("Setting t%d direction to %s\n", tag,
 495                             tag_dir_str[tnfa->tag_directions[tag]]));
 496                     DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
 497                     for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
 498                     tnfa->minimal_tags[i] = tag;
 499                     tnfa->minimal_tags[i + 1] = minimal_tag;
 500                     tnfa->minimal_tags[i + 2] = -1;
 501
 502                     DPRINT(("  Minimal end: t%d reordered to "
 503                             "after t%d\n", tag, minimal_tag));
 504                     /* Append to tag_order, move "tag" after
 505                      * "minimal_tag" */
 506                     *rtp++ = tag;
 507                     *rtp++ = minimal_tag;
 508
 509                     num_minimals++;
 510                     tre_purge_regset(regset, tnfa, tag);
 511                   }
 512
 513                 minimal_tag = -1;
 514                 DPRINT(("  ADDTAGS_SET_SUBMATCH_END num_tags++ tag=%d\n", tag));
 515                 regset[0] = REGSET_UNSET;
 516                 regset_contains = 0;
 517                 tag = next_tag;
 518                 num_tags++;
 519                 next_tag++;
 520               }
 521             break;
 522           }
 523
 524         case ADDTAGS_RECURSE_NOT_TOP_UNION:
 525           /* Like ADDTAGS_RECURSE, except that top_union is set to zero,
 526            * indicating that if a union is being processed, it is not the
 527            * top-most of a series */
 528           top_union = 0;
 529           goto do_addtags_recurse;
 530
 531         case ADDTAGS_RECURSE:
 532           /* Setting top_union to 1 means that if a union is begin processed,
 533            * it is the top-most of a series, and should recurse through the
 534            * series to set the left_tag and right_tag values */
 535           top_union = 1;
 536
 537 do_addtags_recurse:
 538           node = tre_stack_pop_voidptr(stack);
 539
 540           id = node->submatch_id;
 541           if (id >= 0)
 542             {
 543               int i;
 544
 545
 546               /* Add start of this submatch to regset. */
 547               for (i = 0; regset[i] != REGSET_UNSET; i++);
 548               regset[i] = id * 2;
 549               regset[i + 1] = -1;
 550               regset_contains |= REGSET_HAS_STARTS;
 551
 552               /* Add end of this submatch to regset after processing this
 553                  node. */
 554               STACK_PUSH(stack, voidptr, node);
 555               STACK_PUSHX(stack, int, id);
 556               STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
 557             }
 558
 559           switch (node->type)
 560             {
 561             case LITERAL:
 562               {
 563                 tre_literal_t *lit = node->obj;
 564
 565                 if (!IS_SPECIAL(lit) || IS_BACKREF(lit) || IS_EMPTY(lit) || IS_ASSERTION(lit))
 566                   {
 567                     DPRINT(("Literal %d-%d\n",
 568                             (int)lit->code_min, (int)lit->code_max));
 569                     if (regset_contains)
 570                       {
 571                         /* Regset is not empty, so add a tag before the
 572                            literal or backref. */
 573                         if (first_pass)
 574                           {
 575                             DPRINT(("  ADDTAGS_RECURSE:LITERAL node->num_tags = 1\n"));
 576                             node->num_tags = 1;
 577                           }
 578                         else
 579                           {
 580                             status = tre_merge_branches(mem, node, NULL, tag,
 581                                                         tnfa->num_tags);
 582                             if (status != REG_OK)
 583                               break;
 584                             status = tre_add_tag_left(mem, node, tag);
 585                             if (status != REG_OK)
 586                               break;
 587                             if (regset_contains == REGSET_HAS_STARTS)
 588                               tnfa->tag_directions[tag] = TRE_TAG_LEFT_MAXIMIZE;
 589                             else
 590                               tnfa->tag_directions[tag] = direction;
 591                             DPRINT(("Setting t%d direction to %s\n", tag,
 592                                     tag_dir_str[tnfa->tag_directions[tag]]));
 593                             tre_purge_regset(regset, tnfa, tag);
 594
 595                             if (IS_BACKREF(lit))
 596                               {
 597                                 int b = lit->code_max;
 598                                 int t = tnfa->submatch_data[b].so_tag;
 599                                 /* Fail if the referenced submatch hasn't been
 600                                  * completed yet */
 601                                 if (tnfa->submatch_data[b].eo_tag < 0)
 602                                   {
 603                                     status = REG_ESUBREG;
 604                                     break;
 605                                   }
 606                                 if (t < tag)
 607                                   {
 608                                     DPRINT(("  Backref %d start: "
 609                                             "t%d reordered to before t%d\n",
 610                                             b, tag, t));
 611                                     if(t > 0)
 612                                       t--;
 613                                     /* Append to tag_order, move "tag" after
 614                                      * "t" */
 615                                     *rtp++ = tag;
 616                                     *rtp++ = t;
 617                                   }
 618 #if TRE_DEBUG
 619                                 else
 620                                   DPRINT(("  Backref %d start: "
 621                                           "(t%d already before t%d)\n",
 622                                             b, tag, t));
 623 #endif /* TRE_DEBUG */
 624                               }
 625                           }
 626
 627                         DPRINT(("  ADDTAGS_RECURSE:LITERAL num_tags++ tag=%d\n",
 628                                 tag));
 629                         regset[0] = REGSET_UNSET;
 630                         regset_contains = 0;
 631                         tag = next_tag;
 632                         num_tags++;
 633                         next_tag++;
 634                       }
 635                   }
 636                 else
 637                   {
 638                     assert(!IS_TAG(lit));
 639                   }
 640                 break;
 641               }
 642             case CATENATION:
 643               {
 644                 tre_catenation_t *cat = node->obj;
 645                 tre_ast_node_t *left = cat->left;
 646                 tre_ast_node_t *right = cat->right;
 647                 int reserved_tag = -1;
 648                 DPRINT(("Catenation, next_tag = %d\n", next_tag));
 649
 650
 651                 /* After processing right child. */
 652                 STACK_PUSHX(stack, voidptr, node);
 653                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
 654
 655                 /* Process right child. */
 656                 STACK_PUSHX(stack, voidptr, right);
 657                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
 658
 659                 /* After processing left child. */
 660                 STACK_PUSHX(stack, int, next_tag + left->num_tags);
 661                 DPRINT(("  Pushing %d for after left\n",
 662                         next_tag + left->num_tags));
 663                 if (left->num_tags > 0 && right->num_tags > 0)
 664                   {
 665                     /* Reserve the next tag to the right child. */
 666                     DPRINT(("  ADDTAGS_RECURSE:CATENATION num_tags++ "
 667                             "Reserving next_tag %d to right child\n",
 668                             next_tag));
 669                     reserved_tag = next_tag;
 670                     next_tag++;
 671                   }
 672                 STACK_PUSHX(stack, int, reserved_tag);
 673                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
 674
 675                 /* Process left child. */
 676                 STACK_PUSHX(stack, voidptr, left);
 677                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
 678
 679                 }
 680               break;
 681             case ITERATION:
 682               {
 683                 tre_iteration_t *iter = node->obj;
 684                 DPRINT(("Iteration\n"));
 685
 686                 if (first_pass)
 687                   STACK_PUSHX(stack, int, regset_contains != 0);
 688                 STACK_PUSHX(stack, int, tag);
 689                 STACK_PUSHX(stack, voidptr, node);
 690                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
 691
 692                 STACK_PUSHX(stack, voidptr, iter->arg);
 693                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
 694
 695                 /* Regset is not empty, so add a tag here (this always happens
 696                    because iterators always get submatch id, even if in the
 697                    invisible range) */
 698                 if (regset_contains)
 699                   {
 700                     if (!first_pass)
 701                       {
 702                         status = tre_merge_branches(mem, node, NULL, tag,
 703                                                     tnfa->num_tags);
 704                         if (status != REG_OK)
 705                           break;
 706                         status = tre_add_tag_left(mem, node, tag);
 707                         if (status != REG_OK)
 708                           break;
 709                         if (regset_contains == REGSET_HAS_STARTS && tag != 0)
 710                           tnfa->tag_directions[tag] = iter->minimal ?
 711                                                       TRE_TAG_MINIMIZE :
 712                                                       TRE_TAG_LEFT_MAXIMIZE;
 713                         else
 714                           tnfa->tag_directions[tag] = direction;
 715                         DPRINT(("Setting t%d direction to %s\n", tag,
 716                                 tag_dir_str[tnfa->tag_directions[tag]]));
 717                         tre_purge_regset(regset, tnfa, tag);
 718                       }
 719
 720                     DPRINT(("  ADDTAGS_RECURSE:ITERATION num_tags++ tag=%d\n",
 721                             tag));
 722                     regset[0] = REGSET_UNSET;
 723                     regset_contains = 0;
 724                     tag = next_tag;
 725                     num_tags++;
 726                     next_tag++;
 727                   }
 728                 direction = TRE_TAG_LEFT_MAXIMIZE;
 729                 DPRINT(("  Setting direction to %s\n", tag_dir_str[direction]));
 730               }
 731               break;
 732             case UNION:
 733               {
 734                 tre_union_t *uni;
 735                 tre_ast_node_t *left;
 736                 tre_ast_node_t *right;
 737                 int front_tag = -1;
 738
 739                 DPRINT(("Union\n"));
 740
 741                 if (regset_contains)
 742                   {
 743                     DPRINT(("  UNION num_tags++ tag=%d\n", tag));
 744                     front_tag = tag;
 745                     tag = next_tag;
 746                     num_tags++;
 747                     next_tag++;
 748                   }
 749
 750                 /* For the top union, walk the tree of consecutive unions,
 751                  * setting the left_tag and right_tag values in increasing
 752                  * order (left to right priority) */
 753                 if (top_union &&
 754                     (node->num_submatches -
 755                     (node->submatch_id >= 0 &&
 756                     node->submatch_id < SUBMATCH_ID_INVISIBLE_START)) > 0)
 757                   {
 758                     tre_ast_node_t *n;
 759                     int last = tre_stack_num_objects(stack);
 760
 761                     STACK_PUSH(stack, voidptr, node);
 762                     STACK_PUSH(stack, int, ADDTAGS_UNION_RECURSE);
 763
 764                     while (tre_stack_num_objects(stack) > last)
 765                       {
 766                         symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
 767                         switch (symbol)
 768                           {
 769                           case ADDTAGS_UNION_RECURSE:
 770                             n = tre_stack_pop_voidptr(stack);
 771                             uni = n->obj;
 772                             left = uni->left;
 773
 774                             /* Since the top union has num_submatches > 0,
 775                              * we set all the consecutive union's
 776                              * make_branches to 1 to force the generation
 777                              * of end tags for each union branch. */
 778                             n->make_branches = 1;
 779
 780                             STACK_PUSH(stack, voidptr, n);
 781                             STACK_PUSH(stack, int,
 782                                        ADDTAGS_UNION_RIGHT_RECURSE);
 783
 784                             if (left->type == UNION)
 785                               {
 786                                 STACK_PUSH(stack, voidptr, left);
 787                                 STACK_PUSH(stack, int,
 788                                            ADDTAGS_UNION_RECURSE);
 789                               }
 790                             else
 791                               {
 792                                 DPRINT(("  ADDTAGS_UNION_RECURSE "
 793                                         "num_tags++ tag=%d\n", tag));
 794                                 uni->left_tag = tag;
 795                                 tag = next_tag;
 796                                 num_tags++;
 797                                 next_tag++;
 798                               }
 799                             break;
 800
 801                           case ADDTAGS_UNION_RIGHT_RECURSE:
 802                             n = tre_stack_pop_voidptr(stack);
 803                             uni = n->obj;
 804                             right = uni->right;
 805
 806                             if (right->type == UNION)
 807                               {
 808                                 STACK_PUSH(stack, voidptr, right);
 809                                 STACK_PUSH(stack, int,
 810                                            ADDTAGS_UNION_RECURSE);
 811                               }
 812                             else
 813                               {
 814                                 DPRINT(("  ADDTAGS_UNION_RIGHT_RECURSE "
 815                                         "num_tags++ tag=%d\n", tag));
 816                                 uni->right_tag = tag;
 817                                 tag = next_tag;
 818                                 num_tags++;
 819                                 next_tag++;
 820                               }
 821
 822                             break;
 823
 824                           default:
 825                             assert(0);
 826                             break;
 827
 828                           } /* end switch(symbol) */
 829                       } /* end while(tre_stack_num_objects(stack) > last */
 830                     if (!first_pass)
 831                       {
 832                         STACK_PUSHX(stack, int, front_tag);
 833                         STACK_PUSHX(stack, voidptr, node);
 834                         STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_TOP);
 835                       }
 836                   } /* end if (top_union && ...) */
 837
 838                 uni = node->obj;
 839                 left = uni->left;
 840                 right = uni->right;
 841
 842                 /* After processing right child. */
 843                 STACK_PUSHX(stack, voidptr, regset);
 844                 STACK_PUSHX(stack, int, regset_contains != 0);
 845                 STACK_PUSHX(stack, voidptr, node);
 846                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
 847
 848                 /* Process right child. */
 849                 STACK_PUSHX(stack, voidptr, right);
 850                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
 851
 852                 /* After processing left child. */
 853                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
 854
 855                 /* Process left child. */
 856                 STACK_PUSHX(stack, voidptr, left);
 857                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
 858
 859                 /* Regset is not empty, so add a tag here. */
 860                 if (regset_contains)
 861                   {
 862                     if (!first_pass)
 863                       {
 864                         status = tre_merge_branches(mem, node, NULL, front_tag,
 865                                                     tnfa->num_tags);
 866                         if (status != REG_OK)
 867                           break;
 868                         status = tre_add_tag_left(mem, node, front_tag);
 869                         if (status != REG_OK)
 870                           break;
 871                         if (regset_contains == REGSET_HAS_STARTS)
 872                           tnfa->tag_directions[front_tag] = TRE_TAG_LEFT_MAXIMIZE;
 873                         else
 874                           tnfa->tag_directions[front_tag] = direction;
 875                         DPRINT(("Setting t%d direction to %s\n", front_tag,
 876                                 tag_dir_str[tnfa->tag_directions[front_tag]]));
 877                         tre_purge_regset(regset, tnfa, front_tag);
 878                       }
 879
 880                     regset[0] = REGSET_UNSET;
 881                     regset_contains = 0;
 882                   }
 883
 884                 break;
 885               }
 886             } /* end switch (node->type) */
 887
 888           break; /* end case: ADDTAGS_RECURSE */
 889
 890         case ADDTAGS_AFTER_ITERATION:
 891           {
 892             tre_iteration_t *iter;
 893             tre_ast_node_t *orig;
 894             int enter_tag;
 895
 896             node = tre_stack_pop_voidptr(stack);
 897             orig = node->original ? node->original : node;
 898             iter = (tre_iteration_t *)orig->obj;
 899             enter_tag = tre_stack_pop_int(stack);
 900             if (iter->minimal)
 901               minimal_tag = enter_tag;
 902
 903             DPRINT(("After iteration\n"));
 904             if (first_pass)
 905               {
 906                 node->num_tags = iter->arg->num_tags + tre_stack_pop_int(stack);
 907                 DPRINT(("  ADDTAGS_AFTER_ITERATION: node->num_tags = %d\n",
 908                         node->num_tags));
 909               }
 910             else
 911               {
 912                 /* node->last_matched_branch will have the start tag (the tag
 913                    just *before* the iteration).  iter->arg->last_matched_branch
 914                    will have the tag(s) inside the iteration, the ones that
 915                    may need to be reset if the iteration doesn't match.  So
 916                    before we merge iter->arg into node, we need to set up
 917                    a new tre_last_matched_t and tre_last_matched_branch_t,
 918                    using any of the inside tags as cmp_tag (we choose the first
 919                    tag found by bit_ffs).  If there are no inside tags, we
 920                    don't bother creating the extra structures. */
 921                 tre_last_matched_branch_pre_t *b =
 922                                                 iter->arg->last_matched_branch;
 923
 924                 if (b && b->n_tags > 0)
 925                   {
 926                     tre_last_matched_pre_t *u;
 927
 928                     bit_ffs(b->tags, num_tags, &b->cmp_tag);
 929                     DPRINT(("  ADDTAGS_AFTER_ITERATION: n_tags=%d "
 930                             "cmp_tag = %d\n", b->n_tags, b->cmp_tag));
 931
 932                     u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t) +
 933                                        sizeof(tre_last_matched_branch_pre_t)
 934                                        + bitstr_size(tnfa->num_tags));
 935                     if (!u)
 936                       {
 937                         status = REG_ESPACE;
 938                         break;
 939                       }
 940                     u->branches = b;
 941                     u->n_branches = 1;
 942                     u->start_tag = b->cmp_tag;
 943                     u->tot_branches = b->tot_branches;
 944                     u->tot_last_matched = 1 + b->tot_last_matched;
 945                     u->tot_tags = b->tot_tags;
 946
 947                     b = (tre_last_matched_branch_pre_t *)(u + 1);
 948                     b->last_matched = u;
 949                     b->n_last_matched = 1;
 950                     b->tot_branches = 1 + u->tot_branches;
 951                     b->tot_last_matched = u->tot_last_matched;
 952                     b->tot_tags = u->tot_tags;
 953
 954                     iter->arg->last_matched_branch = b;
 955                   }
 956                 status = tre_merge_branches(mem, node, iter->arg, 0,
 957                                             tnfa->num_tags);
 958                 if (status != REG_OK)
 959                   break;
 960
 961                 if (iter->minimal)
 962                   {
 963                     /* Add a union with a left EMPTY literal and the right
 964                        being iter->arg.  This should force the tags inside
 965                        the minimal iteration to prefer being unset */
 966                     if (iter->min == 0 && iter->max <= 1)
 967                       {
 968                         tre_ast_node_t *u, *e;
 969
 970                         e = tre_ast_new_literal(mem, EMPTY, -1, -1);
 971                         if (e == NULL)
 972                           {
 973                             status = REG_ESPACE;
 974                             break;
 975                           }
 976                         u = tre_ast_new_union(mem, e, iter->arg);
 977                         if (u == NULL)
 978                           {
 979                             status = REG_ESPACE;
 980                             break;
 981                           }
 982                         iter->arg = u;
 983                       }
 984
 985                     direction = TRE_TAG_MINIMIZE;
 986                   }
 987                 else
 988                   direction = TRE_TAG_MAXIMIZE;
 989                 DPRINT(("  Setting direction to %s\n", tag_dir_str[direction]));
 990               }
 991             break;
 992           }
 993
 994         case ADDTAGS_AFTER_CAT_LEFT:
 995           {
 996             int new_tag = tre_stack_pop_int(stack);
 997             next_tag = tre_stack_pop_int(stack);
 998             DPRINT(("After cat left, tag = %d, next_tag = %d\n",
 999                     tag, next_tag));
1000             if (new_tag >= 0)
1001               {
1002                 DPRINT(("  Setting tag to %d\n", new_tag));
1003                 tag = new_tag;
1004               }
1005             break;
1006           }
1007
1008         case ADDTAGS_AFTER_CAT_RIGHT:
1009           {
1010             tre_catenation_t *cat;
1011
1012             DPRINT(("After cat right\n"));
1013             node = tre_stack_pop_voidptr(stack);
1014             cat = node->obj;
1015             if (first_pass)
1016               {
1017                 node->num_tags = cat->left->num_tags + cat->right->num_tags;
1018                 DPRINT(("  ADDTAGS_AFTER_CAT_RIGHT: node->num_tags = %d\n",
1019                         node->num_tags));
1020               }
1021             else
1022               {
1023                 status = tre_merge_branches(mem, cat->left, cat->right, 0,
1024                                             tnfa->num_tags);
1025                 if (status != REG_OK)
1026                   break;
1027                 status = tre_merge_branches(mem, node, cat->left, 0,
1028                                             tnfa->num_tags);
1029               }
1030             break;
1031           }
1032
1033         case ADDTAGS_AFTER_UNION_LEFT:
1034           DPRINT(("After union left\n"));
1035           /* Lift the bottom of the `regset' array so that when processing
1036              the right operand the items currently in the array are
1037              invisible.  The original bottom was saved at ADDTAGS_UNION and
1038              will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1039           while (*regset != REGSET_UNSET)
1040             regset++;
1041           regset_contains = 0;
1042           break;
1043
1044         case ADDTAGS_AFTER_UNION_RIGHT:
1045           {
1046             int added_tags;
1047             tre_ast_node_t *orig;
1048             tre_union_t *uni;
1049             /* Note: node may not be a UNION, but a CATENATION with a left
1050              * tag.  So that is why we pass the original node->obj on the
1051              * stack, to get the union's true values. */
1052
1053             DPRINT(("After union right\n"));
1054             node = tre_stack_pop_voidptr(stack);
1055             orig = node->original ? node->original : node;
1056             uni = (tre_union_t *)orig->obj;
1057             added_tags = tre_stack_pop_int(stack);
1058             if (first_pass)
1059               {
1060                 node->num_tags = uni->left->num_tags + uni->right->num_tags
1061                                  + added_tags;
1062                 if (uni->left_tag > 0)
1063                   node->num_tags++;
1064                 if (uni->right_tag > 0)
1065                   node->num_tags++;
1066                 DPRINT(("  ADDTAGS_AFTER_UNION_RIGHT: node->num_tags = %d\n",
1067                         node->num_tags));
1068               }
1069             regset = tre_stack_pop_voidptr(stack);
1070
1071             /* Add tags after both children, the left child gets a smaller
1072                tag than the right child.  This guarantees that we prefer
1073                the left child over the right child. */
1074             /* XXX - This is not always necessary (if the children have
1075                tags which must be seen for every match of that child). */
1076             if (!first_pass && node->make_branches)
1077               {
1078                 tre_last_matched_branch_pre_t *lb =
1079                   uni->left->last_matched_branch;
1080                 tre_last_matched_branch_pre_t *rb =
1081                   uni->right->last_matched_branch;
1082                 tre_last_matched_pre_t *lu =
1083                   uni->left->last_matched_in_progress;
1084                 tre_last_matched_pre_t *ru =
1085                   uni->right->last_matched_in_progress;
1086                 tre_last_matched_pre_t *u;
1087                 /* We don't need to call tre_merge_branches because these
1088                  * tags don't participate in submatch ranges, so don't need
1089                  * to be recorded.  But we do set the cmp_tag entry of the
1090                  * tre_last_matched_branch_pre_t, so we might call
1091                  * tre_merge_branches if we need to create an empty
1092                  * tre_last_matched_branch_pre_t. */
1093                 if (uni->left_tag > 0)
1094                   {
1095                     DPRINT(("Setting t%d direction to maximize\n",
1096                             uni->left_tag));
1097                     status = tre_add_tag_right(mem, uni->left, uni->left_tag);
1098                     if (status != REG_OK)
1099                       break;
1100                     tnfa->tag_directions[uni->left_tag] = TRE_TAG_MAXIMIZE;
1101                     if (!lb)
1102                       {
1103                         status = tre_merge_branches(mem, uni->left, NULL, -1,
1104                                                     tnfa->num_tags);
1105                         if (status != REG_OK)
1106                           break;
1107                         lb = uni->left->last_matched_branch;
1108                       }
1109                     lb->cmp_tag = uni->left_tag;
1110                   }
1111                 if (uni->right_tag > 0)
1112                   {
1113                     DPRINT(("Setting t%d direction to maximize\n",
1114                             uni->right_tag));
1115                     status = tre_add_tag_right(mem, uni->right, uni->right_tag);
1116                     if (status != REG_OK)
1117                       break;
1118                     tnfa->tag_directions[uni->right_tag] = TRE_TAG_MAXIMIZE;
1119                     if (!rb)
1120                       {
1121                         status = tre_merge_branches(mem, uni->right, NULL, -1,
1122                                                     tnfa->num_tags);
1123                         if (status != REG_OK)
1124                           break;
1125                         rb = uni->right->last_matched_branch;
1126                       }
1127                     rb->cmp_tag = uni->right_tag;
1128                   }
1129                 /* Now merge the tre_last_matched_branch_pre_t into a
1130                    tre_last_matched_pre_t */
1131                 if (lu == NULL)
1132                   {
1133                     if (ru == NULL)
1134                       {
1135                         /* Create a new tre_last_matched_pre_t */
1136                         u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t));
1137                         if (!u)
1138                           {
1139                             status = REG_ESPACE;
1140                             break;
1141                           }
1142                         u->tot_last_matched = 1;
1143
1144                         if (lb)
1145                           {
1146                             u->branches = lb;
1147                             u->n_branches = 1;
1148                             u->tot_branches += lb->tot_branches;
1149                             u->tot_last_matched += lb->tot_last_matched;
1150                             u->tot_tags += lb->tot_tags;
1151                             if (rb)
1152                               {
1153                                 lb->next = rb;
1154                                 u->n_branches++;
1155                                 u->tot_branches += rb->tot_branches;
1156                                 u->tot_last_matched += rb->tot_last_matched;
1157                                 u->tot_tags += rb->tot_tags;
1158                               }
1159                           }
1160                         else if (rb)
1161                           {
1162                             u->branches = rb;
1163                             u->n_branches = 1;
1164                             u->tot_branches += rb->tot_branches;
1165                             u->tot_last_matched += rb->tot_last_matched;
1166                             u->tot_tags += rb->tot_tags;
1167                           }
1168                       }
1169                     else
1170                       {
1171                         /* Use ru, and add lb */
1172                         u = ru;
1173                         if (lb)
1174                           {
1175                             lb->next = u->branches;
1176                             u->branches = lb;
1177                             u->n_branches++;
1178                             u->tot_branches += lb->tot_branches;
1179                             u->tot_last_matched += lb->tot_last_matched;
1180                             u->tot_tags += lb->tot_tags;
1181                           }
1182                       }
1183                   }
1184                 else if (ru == NULL)
1185                   {
1186                     /* Use lu, and add rb */
1187                     u = lu;
1188                     if (rb)
1189                       {
1190                         rb->next = u->branches;
1191                         u->branches = rb;
1192                         u->n_branches++;
1193                         u->tot_branches += rb->tot_branches;
1194                         u->tot_last_matched += rb->tot_last_matched;
1195                         u->tot_tags += rb->tot_tags;
1196                       }
1197                   }
1198                 else
1199                   {
1200                     /* Merge lu and ru into lu */
1201                     if (lu->branches)
1202                       {
1203                         if (ru->branches)
1204                           {
1205                             tre_last_matched_branch_pre_t *b = lu->branches;
1206                             while (b->next) b = b->next;
1207                             b->next = ru->branches;
1208                             lu->n_branches += ru->n_branches;
1209                           }
1210                       }
1211                     else if (ru->branches)
1212                       {
1213                         lu->branches = ru->branches;
1214                         lu->n_branches = ru->n_branches;
1215                       }
1216                     lu->tot_branches += ru->tot_branches;
1217                     lu->tot_last_matched += ru->tot_last_matched - 1;
1218                     lu->tot_tags += ru->tot_tags;
1219                     u = lu;
1220                   }
1221                 node->last_matched_in_progress = u;
1222               }
1223             direction = TRE_TAG_MAXIMIZE;
1224             break;
1225           }
1226
1227         case ADDTAGS_AFTER_UNION_TOP: /* only called when not first_pass */
1228           {
1229             tre_last_matched_branch_pre_t *b;
1230             tre_last_matched_pre_t *u;
1231             int start_tag;
1232
1233             DPRINT(("After union top\n"));
1234             node = tre_stack_pop_voidptr(stack);
1235             start_tag = tre_stack_pop_int(stack);
1236             b = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
1237                                + bitstr_size(tnfa->num_tags));
1238             if (!b)
1239               {
1240                 status = REG_ESPACE;
1241                 break;
1242               }
1243
1244             u = node->last_matched_in_progress;
1245             u->start_tag = start_tag;
1246             b->tot_branches = 1 + u->tot_branches;
1247             b->tot_last_matched = u->tot_last_matched;
1248             b->tot_tags = u->tot_tags;
1249             b->last_matched = u;
1250             b->n_last_matched = 1;
1251             node->last_matched_branch = b;
1252             node->last_matched_in_progress = NULL;
1253             break;
1254           }
1255
1256         default:
1257           assert(0);
1258           break;
1259
1260         } /* end switch(symbol) */
1261     } /* end while(tre_stack_num_objects(stack) > bottom) */
1262
1263   if (status != REG_OK)
1264     {
1265       DPRINT(("Error during %s pass\n", first_pass ? "first" : "second"));
1266       goto error_post_compile;
1267     }
1268
1269   if (!first_pass)
1270     {
1271       int i;
1272       if (num_tags != tnfa->num_tags)
1273         {
1274           DPRINT(("num_tags(%d) != tnfa->num_tags(%d)\n", num_tags,
1275                   tnfa->num_tags));
1276           status = REG_BADPAT;
1277           goto error_post_compile;
1278         }
1279
1280       tre_purge_regset(regset, tnfa, tag);
1281       DPRINT(("Setting t%d to %s\n", num_tags,
1282               tag_dir_str[direction]));
1283       tnfa->tag_directions[num_tags] = direction;
1284
1285       if (rtp > reorder_tags + 2 * tnfa->num_reorder_tags)
1286         {
1287           DPRINT(("Processed %d reorder tags instead of %d\n",
1288                   (int)(rtp - reorder_tags) / 2, tnfa->num_reorder_tags));
1289           status = REG_BADPAT;
1290           goto error_post_compile;
1291         }
1292     *rtp = -1;
1293 #if TRE_DEBUG
1294       if (reorder_tags[0] >= 0)
1295         {
1296           DPRINT(("reorder_tags:\n"));
1297           for (rtp = reorder_tags; *rtp >= 0;)
1298             {
1299               DPRINT(("%d after ", *rtp++));
1300               DPRINT(("%d\n", *rtp++));
1301             }
1302         }
1303         else
1304           DPRINT(("No reorder_tags\n"));
1305 #endif /* TRE_DEBUG */
1306
1307       /* Initialize to_reorder */
1308       for (i = 0; i < num_tags; i++)
1309         to_reorder[i] = i;
1310       /* Use to_seq_order to convert reorder_tags values, and use those to
1311        * reorder to_reorder */
1312       for (rtp = reorder_tags; *rtp >= 0;)
1313         {
1314           int j, high, low;
1315           int ti = *rtp++;
1316
1317           /* Skip reordering the final tag */
1318           if (ti >= num_tags)
1319             {
1320               DPRINT(("Skipping reorder of %d\n", ti));
1321               rtp++;
1322               continue;
1323             }
1324           /* The number of the tag to reorder */
1325           high = to_reorder[ti];
1326           /* Reorder after this tag */
1327           low = to_reorder[*rtp++];
1328
1329           DPRINT(("ti=%d high=%d low=%d\n", ti, high, low));
1330           if (low > high)
1331             {
1332               DPRINT(("Tag %d already before %d\n", high, low));
1333               continue;
1334             }
1335           for (j = 0; j < num_tags; j++)
1336             if (to_reorder[j] > low && to_reorder[j] < high)
1337               to_reorder[j]++;
1338           to_reorder[ti] = low + 1;
1339 #ifdef TRE_DEBUG
1340           DPRINT(("to_reorder=("));
1341           for (j = 0; j < num_tags; j++)
1342             {
1343               DPRINT(("%d", to_reorder[j]));
1344               if (j < num_tags - 1)
1345                 DPRINT((","));
1346             }
1347           DPRINT((")\n"));
1348 #endif /* TRE_DEBUG */
1349         }
1350       /* Determine if reordering in really necessary */
1351       {
1352         int need_reorder = 0;
1353         for (i = 0; i < num_tags; i++)
1354           if(to_reorder[i] != i)
1355             {
1356               need_reorder = 1;
1357               break;
1358             }
1359         /* If need_reorder is not set, free reorder_tags, and set to NULL,
1360          * indicating no reordering is needed */
1361         if (!need_reorder)
1362           {
1363             DPRINT(("Don't need to reorder\n"));
1364             xfree(reorder_tags);
1365             reorder_tags = NULL;
1366           }
1367       }
1368     }
1369
1370   if (reorder_tags)
1371     {
1372       int i;
1373       tre_tag_direction_t *new_tag_directions;
1374 #if TRE_DEBUG
1375       DPRINT(("to_reorder:"));
1376       for (i = 0; i < num_tags; i++)
1377         DPRINT((" %d->%d", i, to_reorder[i]));
1378       DPRINT(("\n"));
1379 #endif /* TRE_DEBUG */
1380
1381       DPRINT(("Reordering submatch_data\n"));
1382       for (i = 0; i < (int)tnfa->num_submatches; i++)
1383         {
1384 #if TRE_DEBUG
1385           int so = tnfa->submatch_data[i].so_tag;
1386           int eo = tnfa->submatch_data[i].eo_tag;
1387 #endif /* TRE_DEBUG */
1388           tnfa->submatch_data[i].so_tag =
1389             to_reorder[tnfa->submatch_data[i].so_tag];
1390           tnfa->submatch_data[i].eo_tag =
1391             tnfa->submatch_data[i].eo_tag < num_tags ?
1392             to_reorder[tnfa->submatch_data[i].eo_tag] :
1393             tnfa->submatch_data[i].eo_tag;
1394           DPRINT(("pmatch[%d]: {%d, %d}->{%d, %d}\n", i, so, eo,
1395                   tnfa->submatch_data[i].so_tag,
1396                   tnfa->submatch_data[i].eo_tag));
1397         }
1398
1399       DPRINT(("Reordering tag_directions\n"));
1400       /* We only allocate num_tags directions and reorder them.  The
1401        * num_tags-th direction (end tag) is left unchanged. */
1402       new_tag_directions = xmalloc(sizeof(*new_tag_directions) * num_tags);
1403       if (new_tag_directions == NULL)
1404         {
1405           status = REG_ESPACE;
1406           goto error_post_compile;
1407         }
1408       for (i = 0; i < num_tags; i++)
1409         {
1410           new_tag_directions[to_reorder[i]] = tnfa->tag_directions[i];
1411         }
1412 #if TRE_DEBUG
1413       for (i = 0; i < num_tags; i++)
1414         {
1415           DPRINT(("t%d %s->%s\n", i,
1416                   tag_dir_str[tnfa->tag_directions[i]],
1417                   tag_dir_str[new_tag_directions[i]]));
1418         }
1419         DPRINT(("t%d %s->%s\n", num_tags,
1420                 tag_dir_str[tnfa->tag_directions[num_tags]],
1421                 tag_dir_str[tnfa->tag_directions[num_tags]]));
1422 #endif /* TRE_DEBUG */
1423       memcpy(tnfa->tag_directions, new_tag_directions, sizeof(*new_tag_directions) * num_tags);
1424       xfree(new_tag_directions);
1425
1426       DPRINT(("Reordering minimal_tags\n"));
1427       for (i = 0; tnfa->minimal_tags[i] >= 0; i++)
1428         tnfa->minimal_tags[i] = tnfa->minimal_tags[i] < num_tags ?
1429                                 to_reorder[tnfa->minimal_tags[i]] :
1430                                 tnfa->minimal_tags[i];
1431
1432       DPRINT(("Reordering AST tags\n"));
1433       STACK_PUSH(stack, voidptr, tree);
1434       while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1435         {
1436           node = tre_stack_pop_voidptr(stack);
1437
1438           switch (node->type)
1439             {
1440             case LITERAL:
1441               {
1442                 tre_literal_t *lit = (tre_literal_t *)node->obj;
1443                 if (IS_TAG(lit))
1444                   lit->code_max = to_reorder[lit->code_max];
1445                 break;
1446               }
1447
1448             case UNION:
1449               {
1450                 tre_union_t *uni = (tre_union_t *)node->obj;
1451                 STACK_PUSHX(stack, voidptr, uni->right);
1452                 STACK_PUSHX(stack, voidptr, uni->left);
1453                 break;
1454               }
1455
1456             case CATENATION:
1457               {
1458                 tre_catenation_t *cat = (tre_catenation_t *)node->obj;
1459                 STACK_PUSHX(stack, voidptr, cat->right);
1460                 STACK_PUSHX(stack, voidptr, cat->left);
1461                 break;
1462               }
1463
1464             case ITERATION:
1465               {
1466                 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
1467                 STACK_PUSHX(stack, voidptr, iter->arg);
1468                 break;
1469               }
1470
1471             default:
1472               assert(0);
1473               break;
1474             }
1475         }
1476         if (status != REG_OK)
1477           {
1478             DPRINT(("Error while reordering tags\n"));
1479             goto error_post_compile;
1480           }
1481     }
1482
1483
1484   if (!first_pass)
1485     {
1486       if (tree->last_matched_branch)
1487         {
1488           tre_last_matched_branch_t *buf, *b, *bb;
1489           tre_last_matched_branch_pre_t *bp;
1490           tre_last_matched_t *u, *uu;
1491           tre_last_matched_pre_t *up;
1492           int *t;
1493           int i;
1494 #ifdef TRE_DEBUG
1495           tre_last_matched_branch_t *_b;
1496           tre_last_matched_t *_u;
1497           int *_t;
1498
1499           DPRINT(("last_match_branch_pre:\n"));
1500           print_last_match_branch_pre(tree->last_matched_branch, 0, num_tags);
1501 #endif /* TRE_DEBUG */
1502           buf = (tre_last_matched_branch_t *)xcalloc(1,
1503                                      tree->last_matched_branch->tot_branches
1504                                      * sizeof(tre_last_matched_branch_t) +
1505                                      tree->last_matched_branch->tot_last_matched
1506                                      * sizeof(tre_last_matched_t) +
1507                                      tree->last_matched_branch->tot_tags *
1508                                      sizeof(int));
1509           if (!buf)
1510             {
1511               status = REG_ESPACE;
1512               goto error_post_compile;
1513             }
1514
1515           b = buf;
1516           u = (tre_last_matched_t *)(b +
1517               tree->last_matched_branch->tot_branches);
1518           t = (int *)(u + tree->last_matched_branch->tot_last_matched);
1519 #ifdef TRE_DEBUG
1520           _b = b;
1521           _u = u;
1522           _t = t;
1523 #endif /* TRE_DEBUG */
1524           DPRINT(("Copying info_pre to info\n"));
1525           STACK_PUSH(stack, voidptr, tree->last_matched_branch);
1526           STACK_PUSH(stack, int, 1);
1527           STACK_PUSH(stack, int, COPY_LAST_MATCHED_BRANCH);
1528
1529           while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1530             {
1531               switch (tre_stack_pop_int(stack))
1532                 {
1533                 case COPY_LAST_MATCHED_BRANCH:
1534                   i = tre_stack_pop_int(stack);
1535                   /* The tre_last_matched_branch_pre_t * is still on the
1536                      stack */
1537                   STACK_PUSHX(stack, voidptr, b);
1538                   STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1539                   b += i;
1540                   break;
1541
1542                 case COPY_LAST_MATCHED_BRANCH_NEXT:
1543                   bb = tre_stack_pop_voidptr(stack);
1544                   bp = tre_stack_pop_voidptr(stack);
1545                   bb->n_last_matched = bp->n_last_matched;
1546                   bb->cmp_tag = bp->cmp_tag;
1547                   if (bp->n_tags > 0)
1548                     {
1549                       int n;
1550                       n = bb->n_tags = bp->n_tags;
1551                       bb->tags = t;
1552                       for (i = 0; i < num_tags; i++)
1553                         if (bit_test(bp->tags, i))
1554                           {
1555                             *t++ = i;
1556                             if (--n <= 0)
1557                               break;
1558                           }
1559                     }
1560                   if (bp->next)
1561                     {
1562                       STACK_PUSHX(stack, voidptr, bp->next);
1563                       STACK_PUSHX(stack, voidptr, bb + 1);
1564                       STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1565                     }
1566                   if (bp->n_last_matched > 0)
1567                     {
1568                       bb->last_matched = u;
1569                       STACK_PUSHX(stack, voidptr, bp->last_matched);
1570                       STACK_PUSHX(stack, int, bp->n_last_matched);
1571                       STACK_PUSHX(stack, int, COPY_LAST_MATCHED);
1572                     }
1573                   break;
1574
1575                 case COPY_LAST_MATCHED:
1576                   i = tre_stack_pop_int(stack);
1577                   /* The tre_last_matched_pre_t * is still on the stack */
1578                   STACK_PUSHX(stack, voidptr, u);
1579                   STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1580                   u += i;
1581                   break;
1582
1583                 case COPY_LAST_MATCHED_NEXT:
1584                   uu = tre_stack_pop_voidptr(stack);
1585                   up = tre_stack_pop_voidptr(stack);
1586                   uu->n_branches = up->n_branches;
1587                   uu->branches = b;
1588                   uu->start_tag = up->start_tag;
1589                   if (up->next)
1590                     {
1591                       STACK_PUSHX(stack, voidptr, up->next);
1592                       STACK_PUSHX(stack, voidptr, uu + 1);
1593                       STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1594                     }
1595                   STACK_PUSHX(stack, voidptr, up->branches);
1596                   STACK_PUSHX(stack, int, up->n_branches);
1597                   STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH);
1598                   break;
1599                 }
1600             }
1601           if (status != REG_OK)
1602             goto error_post_compile;
1603 #ifdef TRE_DEBUG
1604           DPRINT(("last_matched_branch:\n"));
1605           print_last_match_branch(buf, 0);
1606           if (b != _b + tree->last_matched_branch->tot_branches)
1607             DPRINT(("b/%p != _b + tree->last_matched_branch->tot_branches/%p\n",
1608                     b, _b + tree->last_matched_branch->tot_branches));
1609           if (u != _u + tree->last_matched_branch->tot_last_matched)
1610             DPRINT(("u/%p != _u + "
1611                     "tree->last_matched_branch->tot_last_matched/%p\n",
1612                     u, _u + tree->last_matched_branch->tot_last_matched));
1613           if (t != _t + tree->last_matched_branch->tot_tags)
1614             DPRINT(("t/%p != _t + tree->last_matched_branch->tot_tags/%p\n",
1615                     t, _t + tree->last_matched_branch->tot_tags));
1616 #endif /* TRE_DEBUG */
1617           tnfa->last_matched_branch = buf;
1618         }
1619 #ifdef TRE_DEBUG
1620       else
1621         DPRINT(("No last_match_branch_pre\n"));
1622 #endif /* TRE_DEBUG */
1623     }
1624
1625   DPRINT(("tre_add_tags: %s complete.  Number of tags %d.\n",
1626           first_pass? "First pass" : "Second pass", num_tags));
1627 #ifdef TRE_DEBUG
1628   tre_ast_print(tree);
1629 #endif /* TRE_DEBUG */
1630   DPRINT(("tre_add_tags: tree->num_tags=%d num_tags=%d\n", tree->num_tags,
1631           num_tags));
1632   assert(tree->num_tags == num_tags);
1633   tnfa->end_tag = num_tags;
1634   tnfa->num_tags = num_tags;
1635   tnfa->num_minimals = num_minimals;
1636 error_post_compile:
1637   xfree(reorder_tags);
1638 error_reorder_tags:
1639   xfree(orig_regset);
1640 error_regset:
1641   return status;
1642 }
1643
1644
1645
1646 /*
1647   AST to TNFA compilation routines.
1648 */
1649
1650 typedef enum {
1651   COPY_RECURSE,
1652   COPY_SET_RESULT_PTR
1653 } tre_copyast_symbol_t;
1654
1655 /* Flags for tre_copy_ast(). */
1656 #define COPY_REMOVE_TAGS         1
1657 #define COPY_MAXIMIZE_FIRST_TAG  2
1658
1659 static reg_errcode_t
1660 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1661              int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1662              tre_ast_node_t **copy, int *max_pos)
1663 {
1664   reg_errcode_t status = REG_OK;
1665   int bottom = tre_stack_num_objects(stack);
1666   int num_copied = 0;
1667   int first_tag = 1;
1668   tre_ast_node_t **result = copy;
1669   tre_copyast_symbol_t symbol;
1670
1671   STACK_PUSH(stack, voidptr, ast);
1672   STACK_PUSH(stack, int, COPY_RECURSE);
1673
1674   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1675     {
1676       tre_ast_node_t *node;
1677       if (status != REG_OK)
1678         break;
1679
1680       symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1681       switch (symbol)
1682         {
1683         case COPY_SET_RESULT_PTR:
1684           result = tre_stack_pop_voidptr(stack);
1685           break;
1686         case COPY_RECURSE:
1687           node = tre_stack_pop_voidptr(stack);
1688           switch (node->type)
1689             {
1690             case LITERAL:
1691               {
1692                 tre_literal_t *lit = node->obj;
1693                 int pos = lit->position;
1694                 int min = lit->code_min;
1695                 int max = lit->code_max;
1696                 tre_bracket_match_list_t *list = !IS_SPECIAL(lit) ?
1697                                                   lit->u.bracket_match_list :
1698                                                   NULL;
1699                 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1700                   {
1701                     /* XXX - e.g. [ab] has only one position but two
1702                        nodes, so we are creating holes in the state space
1703                        here.  Not fatal, just wastes memory. */
1704                     pos += *pos_add;
1705                     num_copied++;
1706                   }
1707                 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
1708                   {
1709                     /* Change this tag to empty. */
1710                     min = EMPTY;
1711                     max = pos = -1;
1712                   }
1713                 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
1714                          && first_tag)
1715                   {
1716                     /* Maximize the first tag. */
1717                     if (tag_directions[max] == TRE_TAG_LEFT_MAXIMIZE)
1718                       tag_directions[max] = TRE_TAG_MAXIMIZE;
1719                     first_tag = 0;
1720                   }
1721                 *result = tre_ast_new_literal(mem, min, max, pos);
1722                 if (*result == NULL)
1723                   status = REG_ESPACE;
1724
1725                 if (pos > *max_pos)
1726                   *max_pos = pos;
1727
1728                 if (!IS_SPECIAL(lit))
1729                   ((tre_literal_t *)(*result)->obj)->u.bracket_match_list
1730                       = list;
1731                 break;
1732               }
1733             case UNION:
1734               {
1735                 tre_union_t *uni = node->obj;
1736                 tre_union_t *tmp;
1737                 *result = tre_ast_new_union(mem, uni->left, uni->right);
1738                 if (*result == NULL)
1739                   {
1740                     status = REG_ESPACE;
1741                     break;
1742                   }
1743                 tmp = (*result)->obj;
1744                 result = &tmp->left;
1745                 STACK_PUSHX(stack, voidptr, uni->right);
1746                 STACK_PUSHX(stack, int, COPY_RECURSE);
1747                 STACK_PUSHX(stack, voidptr, &tmp->right);
1748                 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1749                 STACK_PUSHX(stack, voidptr, uni->left);
1750                 STACK_PUSHX(stack, int, COPY_RECURSE);
1751                 break;
1752               }
1753             case CATENATION:
1754               {
1755                 tre_catenation_t *cat = node->obj;
1756                 tre_catenation_t *tmp;
1757                 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
1758                 if (*result == NULL)
1759                   {
1760                     status = REG_ESPACE;
1761                     break;
1762                   }
1763                 tmp = (*result)->obj;
1764                 tmp->left = NULL;
1765                 tmp->right = NULL;
1766                 result = &tmp->left;
1767
1768                 STACK_PUSHX(stack, voidptr, cat->right);
1769                 STACK_PUSHX(stack, int, COPY_RECURSE);
1770                 STACK_PUSHX(stack, voidptr, &tmp->right);
1771                 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1772                 STACK_PUSHX(stack, voidptr, cat->left);
1773                 STACK_PUSHX(stack, int, COPY_RECURSE);
1774                 break;
1775               }
1776             case ITERATION:
1777               {
1778                 tre_iteration_t *iter = node->obj;
1779                 STACK_PUSHX(stack, voidptr, iter->arg);
1780                 STACK_PUSHX(stack, int, COPY_RECURSE);
1781                 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
1782                                            iter->max, iter->minimal);
1783                 if (*result == NULL)
1784                   {
1785                     status = REG_ESPACE;
1786                     break;
1787                   }
1788                 iter = (*result)->obj;
1789                 result = &iter->arg;
1790                 break;
1791               }
1792             default:
1793               assert(0);
1794               break;
1795             }
1796           break;
1797         }
1798     }
1799   *pos_add += num_copied;
1800   return status;
1801 }
1802
1803 typedef enum {
1804   EXPAND_RECURSE,
1805   EXPAND_AFTER_ITER
1806 } tre_expand_ast_symbol_t;
1807
1808 /* Expands each iteration node that has a finite nonzero minimum or maximum
1809    iteration count to a catenated sequence of copies of the node. */
1810 static reg_errcode_t
1811 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1812                int *position, tre_tag_direction_t *tag_directions,
1813                int __unused *max_depth)
1814 {
1815   reg_errcode_t status = REG_OK;
1816   int bottom = tre_stack_num_objects(stack);
1817   int pos_add = 0;
1818   int pos_add_total = 0;
1819   int max_pos = 0;
1820 #ifdef TRE_APPROX
1821   /* Current approximate matching parameters. */
1822   int params[TRE_PARAM_LAST];
1823   /* Approximate parameter nesting level. */
1824   int params_depth = 0;
1825 #endif /* TRE_APPROX */
1826   int iter_depth = 0;
1827 #ifdef TRE_APPROX
1828   int i;
1829 #endif /* TRE_APPROX */
1830
1831 #ifdef TRE_APPROX
1832   for (i = 0; i < TRE_PARAM_LAST; i++)
1833     params[i] = TRE_PARAM_DEFAULT;
1834 #endif /* TRE_APPROX */
1835
1836   STACK_PUSHR(stack, voidptr, ast);
1837   STACK_PUSHR(stack, int, EXPAND_RECURSE);
1838   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1839     {
1840       tre_ast_node_t *node;
1841       tre_expand_ast_symbol_t symbol;
1842
1843       if (status != REG_OK)
1844         break;
1845
1846       DPRINT(("pos_add %d\n", pos_add));
1847
1848       symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1849       node = tre_stack_pop_voidptr(stack);
1850       switch (symbol)
1851         {
1852         case EXPAND_RECURSE:
1853           switch (node->type)
1854             {
1855             case LITERAL:
1856               {
1857                 tre_literal_t *lit= node->obj;
1858                 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1859                   {
1860                     lit->position += pos_add;
1861                     if (lit->position > max_pos)
1862                       max_pos = lit->position;
1863                   }
1864                 break;
1865               }
1866             case UNION:
1867               {
1868                 tre_union_t *uni = node->obj;
1869                 STACK_PUSHX(stack, voidptr, uni->right);
1870                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1871                 STACK_PUSHX(stack, voidptr, uni->left);
1872                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1873                 break;
1874               }
1875             case CATENATION:
1876               {
1877                 tre_catenation_t *cat = node->obj;
1878                 STACK_PUSHX(stack, voidptr, cat->right);
1879                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1880                 STACK_PUSHX(stack, voidptr, cat->left);
1881                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1882                 break;
1883               }
1884             case ITERATION:
1885               {
1886                 tre_iteration_t *iter = node->obj;
1887                 STACK_PUSHX(stack, int, pos_add);
1888                 STACK_PUSHX(stack, voidptr, node);
1889                 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1890                 STACK_PUSHX(stack, voidptr, iter->arg);
1891                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1892                 /* If we are going to expand this node at EXPAND_AFTER_ITER
1893                    then don't increase the `pos' fields of the nodes now, it
1894                    will get done when expanding. */
1895                 if (iter->min > 1 || iter->max > 1)
1896                   pos_add = 0;
1897                 iter_depth++;
1898                 DPRINT(("iter\n"));
1899                 break;
1900               }
1901             default:
1902               assert(0);
1903               break;
1904             }
1905           break;
1906         case EXPAND_AFTER_ITER:
1907           {
1908             tre_iteration_t *iter = node->obj;
1909             int pos_add_last;
1910             pos_add = tre_stack_pop_int(stack);
1911             pos_add_last = pos_add;
1912             /* Originally (in tre_parse_bound), if min == 0 && max == 0, we
1913                immediate replace the whole iteration with EMPTY.  This
1914                unfortunately drops any submatches, and messes up setting the
1915                pmatch values (we can get tags of -1, and tag values in the
1916                billions).  So we left it there and replace with EMPTY here. */
1917             if (iter->min == 0 && iter->max == 0)
1918               {
1919                 tre_ast_node_t *empty = tre_ast_new_literal(mem, EMPTY, -1, -1);
1920                 if (empty == NULL)
1921                   return REG_ESPACE;
1922                 node->obj = empty->obj;
1923                 node->type = empty->type;
1924               }
1925             else if (iter->min > 1 || iter->max > 1)
1926               {
1927                 tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1928                 int j;
1929                 int pos_add_save = pos_add;
1930
1931                 /* Create a catenated sequence of copies of the node. */
1932                 for (j = 0; j < iter->min; j++)
1933                   {
1934                     tre_ast_node_t *copy;
1935                     /* Remove tags from all but the last copy. */
1936                     int flags = ((j + 1 < iter->min)
1937                                  ? COPY_REMOVE_TAGS
1938                                  : COPY_MAXIMIZE_FIRST_TAG);
1939                     DPRINT(("  pos_add %d\n", pos_add));
1940                     pos_add_save = pos_add;
1941                     status = tre_copy_ast(mem, stack, iter->arg, flags,
1942                                           &pos_add, tag_directions, &copy,
1943                                           &max_pos);
1944                     if (status != REG_OK)
1945                       return status;
1946                     if (seq1 != NULL)
1947                       seq1 = tre_ast_new_catenation(mem, seq1, copy);
1948                     else
1949                       seq1 = copy;
1950                     if (seq1 == NULL)
1951                       return REG_ESPACE;
1952                   }
1953
1954                 if (iter->max == -1)
1955                   {
1956                     /* No upper limit. */
1957                     pos_add_save = pos_add;
1958                     status = tre_copy_ast(mem, stack, iter->arg, 0,
1959                                           &pos_add, NULL, &seq2, &max_pos);
1960                     if (status != REG_OK)
1961                       return status;
1962                     seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1963                     if (seq2 == NULL)
1964                       return REG_ESPACE;
1965                   }
1966                 else
1967                   {
1968                     for (j = iter->min; j < iter->max; j++)
1969                       {
1970                         tre_ast_node_t *tmp, *copy;
1971                         pos_add_save = pos_add;
1972                         status = tre_copy_ast(mem, stack, iter->arg, 0,
1973                                               &pos_add, NULL, &copy, &max_pos);
1974                         if (status != REG_OK)
1975                           return status;
1976                         if (seq2 != NULL)
1977                           seq2 = tre_ast_new_catenation(mem, copy, seq2);
1978                         else
1979                           seq2 = copy;
1980                         if (seq2 == NULL)
1981                           return REG_ESPACE;
1982                         tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1983                         if (tmp == NULL)
1984                           return REG_ESPACE;
1985                         seq2 = tre_ast_new_union(mem, tmp, seq2);
1986                         if (seq2 == NULL)
1987                           return REG_ESPACE;
1988                       }
1989                   }
1990
1991                 pos_add = pos_add_save;
1992                 if (seq1 == NULL)
1993                   seq1 = seq2;
1994                 else if (seq2 != NULL)
1995                   seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1996                 if (seq1 == NULL)
1997                   return REG_ESPACE;
1998                 node->obj = seq1->obj;
1999                 node->type = seq1->type;
2000               }
2001
2002             iter_depth--;
2003             pos_add_total += pos_add - pos_add_last;
2004             if (iter_depth == 0)
2005               pos_add = pos_add_total;
2006
2007 #ifdef TRE_APPROX
2008             /* If approximate parameters are specified, surround the result
2009                with two parameter setting nodes.  The one on the left sets
2010                the specified parameters, and the one on the right restores
2011                the old parameters. */
2012             if (iter->params)
2013               {
2014                 tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy;
2015                 int *old_params;
2016
2017                 tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1);
2018                 if (!tmp_l)
2019                   return REG_ESPACE;
2020                 ((tre_literal_t *)tmp_l->obj)->u.params = iter->params;
2021                 iter->params[TRE_PARAM_DEPTH] = params_depth + 1;
2022                 tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1);
2023                 if (!tmp_r)
2024                   return REG_ESPACE;
2025                 old_params = tre_mem_alloc(mem, sizeof(*old_params)
2026                                            * TRE_PARAM_LAST);
2027                 if (!old_params)
2028                   return REG_ESPACE;
2029                 for (i = 0; i < TRE_PARAM_LAST; i++)
2030                   old_params[i] = params[i];
2031                 ((tre_literal_t *)tmp_r->obj)->u.params = old_params;
2032                 old_params[TRE_PARAM_DEPTH] = params_depth;
2033                 /* XXX - this is the only place where ast_new_node is
2034                    needed -- should be moved inside AST module. */
2035                 node_copy = tre_ast_new_node(mem, ITERATION,
2036                                              sizeof(tre_iteration_t));
2037                 if (!node_copy)
2038                   return REG_ESPACE;
2039                 node_copy->obj = node->obj;
2040                 tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy);
2041                 if (!tmp_node)
2042                   return REG_ESPACE;
2043                 tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r);
2044                 if (!tmp_node)
2045                   return REG_ESPACE;
2046                 /* Replace the contents of `node' with `tmp_node'. */
2047                 memcpy(node, tmp_node, sizeof(*node));
2048                 node->obj = tmp_node->obj;
2049                 node->type = tmp_node->type;
2050                 params_depth++;
2051                 if (params_depth > *max_depth)
2052                   *max_depth = params_depth;
2053               }
2054 #endif /* TRE_APPROX */
2055             break;
2056           }
2057         default:
2058           assert(0);
2059           break;
2060         }
2061     }
2062
2063   *position += pos_add_total;
2064
2065   /* `max_pos' should never be larger than `*position' if the above
2066      code works, but just an extra safeguard let's make sure
2067      `*position' is set large enough so enough memory will be
2068      allocated for the transition table. */
2069   if (max_pos > *position)
2070     *position = max_pos;
2071
2072 #ifdef TRE_DEBUG
2073   DPRINT(("Expanded AST:\n"));
2074   tre_ast_print(ast);
2075   DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
2076 #endif
2077
2078   return status;
2079 }
2080
2081 static tre_pos_and_tags_t *
2082 tre_set_empty(tre_mem_t mem)
2083 {
2084   tre_pos_and_tags_t *new_set;
2085
2086   new_set = tre_mem_calloc(mem, sizeof(*new_set));
2087   if (new_set == NULL)
2088     return NULL;
2089
2090   new_set[0].position = -1;
2091   new_set[0].code_min = -1;
2092   new_set[0].code_max = -1;
2093
2094   return new_set;
2095 }
2096
2097 static tre_pos_and_tags_t *
2098 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2099             tre_bracket_match_list_t *bracket_match_list, int backref)
2100 {
2101   tre_pos_and_tags_t *new_set;
2102
2103   new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
2104   if (new_set == NULL)
2105     return NULL;
2106
2107   new_set[0].position = position;
2108   new_set[0].code_min = code_min;
2109   new_set[0].code_max = code_max;
2110   new_set[0].bracket_match_list = bracket_match_list;
2111   new_set[0].backref = backref;
2112   new_set[1].position = -1;
2113   new_set[1].code_min = -1;
2114   new_set[1].code_max = -1;
2115
2116   return new_set;
2117 }
2118
2119 static tre_pos_and_tags_t *
2120 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2121               int *tags, int assertions, int *params)
2122 {
2123   int s1, s2, i, j;
2124   tre_pos_and_tags_t *new_set;
2125   int *new_tags;
2126   int num_tags;
2127
2128   for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2129   for (s1 = 0; set1[s1].position >= 0; s1++);
2130   for (s2 = 0; set2[s2].position >= 0; s2++);
2131   new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2132   if (!new_set )
2133     return NULL;
2134
2135   for (s1 = 0; set1[s1].position >= 0; s1++)
2136     {
2137       new_set[s1].position = set1[s1].position;
2138       new_set[s1].code_min = set1[s1].code_min;
2139       new_set[s1].code_max = set1[s1].code_max;
2140       new_set[s1].assertions = set1[s1].assertions | assertions;
2141       new_set[s1].bracket_match_list = set1[s1].bracket_match_list;
2142       new_set[s1].backref = set1[s1].backref;
2143       if (set1[s1].tags == NULL && tags == NULL)
2144         new_set[s1].tags = NULL;
2145       else
2146         {
2147           for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2148           new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2149                                          * (i + num_tags + 1)));
2150           if (new_tags == NULL)
2151             return NULL;
2152           for (j = 0; j < i; j++)
2153             new_tags[j] = set1[s1].tags[j];
2154           for (i = 0; i < num_tags; i++)
2155             new_tags[j + i] = tags[i];
2156           new_tags[j + i] = -1;
2157           new_set[s1].tags = new_tags;
2158         }
2159       if (set1[s1].params)
2160         new_set[s1].params = set1[s1].params;
2161       if (params)
2162         {
2163           if (!new_set[s1].params)
2164             new_set[s1].params = params;
2165           else
2166             {
2167               new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) *
2168                                                  TRE_PARAM_LAST);
2169               if (!new_set[s1].params)
2170                 return NULL;
2171               for (i = 0; i < TRE_PARAM_LAST; i++)
2172                 if (params[i] != TRE_PARAM_UNSET)
2173                   new_set[s1].params[i] = params[i];
2174             }
2175         }
2176     }
2177
2178   for (s2 = 0; set2[s2].position >= 0; s2++)
2179     {
2180       new_set[s1 + s2].position = set2[s2].position;
2181       new_set[s1 + s2].code_min = set2[s2].code_min;
2182       new_set[s1 + s2].code_max = set2[s2].code_max;
2183       /* XXX - why not | assertions here as well? */
2184       new_set[s1 + s2].assertions = set2[s2].assertions;
2185       new_set[s1 + s2].bracket_match_list = set2[s2].bracket_match_list;
2186       new_set[s1 + s2].backref = set2[s2].backref;
2187       if (set2[s2].tags == NULL)
2188         new_set[s1 + s2].tags = NULL;
2189       else
2190         {
2191           for (i = 0; set2[s2].tags[i] >= 0; i++);
2192           new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2193           if (new_tags == NULL)
2194             return NULL;
2195           for (j = 0; j < i; j++)
2196             new_tags[j] = set2[s2].tags[j];
2197           new_tags[j] = -1;
2198           new_set[s1 + s2].tags = new_tags;
2199         }
2200       if (set2[s2].params)
2201         new_set[s1 + s2].params = set2[s2].params;
2202       if (params)
2203         {
2204           if (!new_set[s1 + s2].params)
2205             new_set[s1 + s2].params = params;
2206           else
2207             {
2208               new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) *
2209                                                       TRE_PARAM_LAST);
2210               if (!new_set[s1 + s2].params)
2211                 return NULL;
2212               for (i = 0; i < TRE_PARAM_LAST; i++)
2213                 if (params[i] != TRE_PARAM_UNSET)
2214                   new_set[s1 + s2].params[i] = params[i];
2215             }
2216         }
2217     }
2218   new_set[s1 + s2].position = -1;
2219   return new_set;
2220 }
2221
2222 /* Finds the empty path through `node' which is the one that should be
2223    taken according to POSIX.2 rules, and adds the tags on that path to
2224    `tags'.   `tags' may be NULL.  If `num_tags_seen' is not NULL, it is
2225    set to the number of tags seen on the path. */
2226 static reg_errcode_t
2227 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2228                 int *assertions, int *params, int *num_tags_seen,
2229                 int *params_seen)
2230 {
2231   tre_literal_t *lit;
2232   tre_union_t *uni;
2233   tre_catenation_t *cat;
2234   tre_iteration_t *iter;
2235   int i;
2236   int bottom = tre_stack_num_objects(stack);
2237   reg_errcode_t status = REG_OK;
2238   if (num_tags_seen)
2239     *num_tags_seen = 0;
2240   if (params_seen)
2241     *params_seen = 0;
2242
2243   status = tre_stack_push_voidptr(stack, node);
2244
2245   /* Walk through the tree recursively. */
2246   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2247     {
2248       node = tre_stack_pop_voidptr(stack);
2249
2250       switch (node->type)
2251         {
2252         case LITERAL:
2253           lit = (tre_literal_t *)node->obj;
2254           switch (lit->code_min)
2255             {
2256             case TAG:
2257               if (lit->code_max >= 0)
2258                 {
2259                   if (tags != NULL)
2260                     {
2261                       /* Add the tag to `tags'. */
2262                       for (i = 0; tags[i] >= 0; i++)
2263                         if (tags[i] == lit->code_max)
2264                           break;
2265                       if (tags[i] < 0)
2266                         {
2267                           tags[i] = lit->code_max;
2268                           tags[i + 1] = -1;
2269                         }
2270                     }
2271                   if (num_tags_seen)
2272                     (*num_tags_seen)++;
2273                 }
2274               break;
2275             case ASSERTION:
2276               assert(lit->code_max >= 1
2277                      || lit->code_max <= ASSERT_LAST);
2278               if (assertions != NULL)
2279                 *assertions |= lit->code_max;
2280               break;
2281             case PARAMETER:
2282               if (params != NULL)
2283                 for (i = 0; i < TRE_PARAM_LAST; i++)
2284                   params[i] = lit->u.params[i];
2285               if (params_seen != NULL)
2286                 *params_seen = 1;
2287               break;
2288             case EMPTY:
2289               break;
2290             default:
2291               assert(0);
2292               break;
2293             }
2294           break;
2295
2296         case UNION:
2297           /* Subexpressions starting earlier take priority over ones
2298              starting later, so we prefer the left subexpression over the
2299              right subexpression. */
2300           uni = (tre_union_t *)node->obj;
2301           if (uni->left->nullable)
2302             STACK_PUSHX(stack, voidptr, uni->left)
2303           else if (uni->right->nullable)
2304             STACK_PUSHX(stack, voidptr, uni->right)
2305           else
2306             assert(0);
2307           break;
2308
2309         case CATENATION:
2310           /* The path must go through both children. */
2311           cat = (tre_catenation_t *)node->obj;
2312           assert(cat->left->nullable);
2313           assert(cat->right->nullable);
2314           STACK_PUSHX(stack, voidptr, cat->left);
2315           STACK_PUSHX(stack, voidptr, cat->right);
2316           break;
2317
2318         case ITERATION:
2319           /* A match with an empty string is preferred over no match at
2320              all, so we go through the argument if possible. */
2321           iter = (tre_iteration_t *)node->obj;
2322           if (iter->arg->nullable)
2323             STACK_PUSHX(stack, voidptr, iter->arg);
2324           break;
2325
2326         default:
2327           assert(0);
2328           break;
2329         }
2330     }
2331
2332   return status;
2333 }
2334
2335
2336 typedef enum {
2337   NFL_RECURSE,
2338   NFL_POST_UNION,
2339   NFL_POST_CATENATION,
2340   NFL_POST_ITERATION
2341 } tre_nfl_stack_symbol_t;
2342
2343
2344 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2345    the nodes of the AST `tree'. */
2346 static reg_errcode_t
2347 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2348 {
2349   int bottom = tre_stack_num_objects(stack);
2350
2351   STACK_PUSHR(stack, voidptr, tree);
2352   STACK_PUSHR(stack, int, NFL_RECURSE);
2353
2354   while (tre_stack_num_objects(stack) > bottom)
2355     {
2356       tre_nfl_stack_symbol_t symbol;
2357       tre_ast_node_t *node;
2358
2359       symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2360       node = tre_stack_pop_voidptr(stack);
2361       switch (symbol)
2362         {
2363         case NFL_RECURSE:
2364           switch (node->type)
2365             {
2366             case LITERAL:
2367               {
2368                 tre_literal_t *lit = (tre_literal_t *)node->obj;
2369                 if (IS_BACKREF(lit))
2370                   {
2371                     /* Back references: nullable = false, firstpos = {i},
2372                        lastpos = {i}. */
2373                     node->nullable = 0;
2374                     node->firstpos = tre_set_one(mem, lit->position, 0,
2375                                              TRE_CHAR_MAX, NULL, -1);
2376                     if (!node->firstpos)
2377                       return REG_ESPACE;
2378                     node->lastpos = tre_set_one(mem, lit->position, 0,
2379                                                 TRE_CHAR_MAX, NULL,
2380                                                 (int)lit->code_max);
2381                     if (!node->lastpos)
2382                       return REG_ESPACE;
2383                   }
2384                 else if (lit->code_min < 0)
2385                   {
2386                     /* Tags, empty strings, params, and zero width assertions:
2387                        nullable = true, firstpos = {}, and lastpos = {}. */
2388                     node->nullable = 1;
2389                     node->firstpos = tre_set_empty(mem);
2390                     if (!node->firstpos)
2391                       return REG_ESPACE;
2392                     node->lastpos = tre_set_empty(mem);
2393                     if (!node->lastpos)
2394                       return REG_ESPACE;
2395                   }
2396                 else
2397                   {
2398                     /* Literal at position i: nullable = false, firstpos = {i},
2399                        lastpos = {i}. */
2400                     node->nullable = 0;
2401                     node->firstpos =
2402                       tre_set_one(mem, lit->position, (int)lit->code_min,
2403                                   (int)lit->code_max, NULL, -1);
2404                     if (!node->firstpos)
2405                       return REG_ESPACE;
2406                     node->lastpos = tre_set_one(mem, lit->position,
2407                                                 (int)lit->code_min,
2408                                                 (int)lit->code_max,
2409                                                 lit->u.bracket_match_list,
2410                                                 -1);
2411                     if (!node->lastpos)
2412                       return REG_ESPACE;
2413                   }
2414                 break;
2415               }
2416
2417             case UNION:
2418               /* Compute the attributes for the two subtrees, and after that
2419                  for this node. */
2420               STACK_PUSHR(stack, voidptr, node);
2421               STACK_PUSHR(stack, int, NFL_POST_UNION);
2422               STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2423               STACK_PUSHR(stack, int, NFL_RECURSE);
2424               STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2425               STACK_PUSHR(stack, int, NFL_RECURSE);
2426               break;
2427
2428             case CATENATION:
2429               /* Compute the attributes for the two subtrees, and after that
2430                  for this node. */
2431               STACK_PUSHR(stack, voidptr, node);
2432               STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2433               STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2434               STACK_PUSHR(stack, int, NFL_RECURSE);
2435               STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2436               STACK_PUSHR(stack, int, NFL_RECURSE);
2437               break;
2438
2439             case ITERATION:
2440               /* Compute the attributes for the subtree, and after that for
2441                  this node. */
2442               STACK_PUSHR(stack, voidptr, node);
2443               STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2444               STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2445               STACK_PUSHR(stack, int, NFL_RECURSE);
2446               break;
2447             }
2448           break; /* end case: NFL_RECURSE */
2449
2450         case NFL_POST_UNION:
2451           {
2452             tre_union_t *uni = (tre_union_t *)node->obj;
2453             node->nullable = uni->left->nullable || uni->right->nullable;
2454             node->firstpos = tre_set_union(mem, uni->left->firstpos,
2455                                            uni->right->firstpos, NULL, 0, NULL);
2456             if (!node->firstpos)
2457               return REG_ESPACE;
2458             node->lastpos = tre_set_union(mem, uni->left->lastpos,
2459                                           uni->right->lastpos, NULL, 0, NULL);
2460             if (!node->lastpos)
2461               return REG_ESPACE;
2462             break;
2463           }
2464
2465         case NFL_POST_ITERATION:
2466           {
2467             int num_tags, *tags, assertions, params_seen;
2468             int *params;
2469             reg_errcode_t status;
2470             tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2471
2472             /* From Ville Laurikari's original 2001 Master's thesis, the
2473                firstpos(n) and lastpos(n) of an iteration is just the
2474                corresponding values of the iteration's argument.  Unfortunately,
2475                this isn't sufficient for the following BRE:
2476
2477                    \(a*\)*b\(\1\)    matched against    ab
2478
2479                The backreference wants to force the first subexpression to
2480                be the empty string, but there is no transition for this.  So
2481                we need to modify the lastpos(n) of an iteration to be the
2482                equivalent of that of catentation.  Using the same notation as
2483                in the thesis, lastpos(n) is redefined as:
2484
2485                    if nullable(c1) then
2486                        lastpos(c1) U
2487                        addtags(lastpos(c1),
2488                                emptymatch(c1))
2489                    else
2490                        lastpos(c1)
2491
2492                where c1 is the argument node.  firstpos(n) remains the same. */
2493
2494             /* Compute lastpos. */
2495             if (iter->min == 0 || iter->arg->nullable)
2496               {
2497                 node->nullable = 1;
2498                 if (iter->arg->nullable)
2499                   {
2500                     /* The arg matches the empty string.  Make a first pass
2501                        with tre_match_empty() to get the number of tags and
2502                        parameters. */
2503                     status = tre_match_empty(stack, iter->arg,
2504                                              NULL, NULL, NULL, &num_tags,
2505                                              &params_seen);
2506                     if (status != REG_OK)
2507                       return status;
2508                     /* Allocate arrays for the tags and parameters. */
2509                     tags = xmalloc(sizeof(int) * (num_tags + 1));
2510                     if (!tags)
2511                       return REG_ESPACE;
2512                     tags[0] = -1;
2513                     assertions = 0;
2514                     params = NULL;
2515                     if (params_seen)
2516                       {
2517                         params = tre_mem_alloc(mem, sizeof(*params)
2518                                                * TRE_PARAM_LAST);
2519                         if (!params)
2520                           {
2521                             xfree(tags);
2522                             return REG_ESPACE;
2523                           }
2524                       }
2525                     /* Second pass with tre_mach_empty() to get the list of
2526                        tags and parameters. */
2527                     status = tre_match_empty(stack, iter->arg, tags,
2528                                              &assertions, params, NULL, NULL);
2529                     if (status != REG_OK)
2530                       {
2531                         xfree(tags);
2532                         return status;
2533                       }
2534                     node->lastpos =
2535                       tre_set_union(mem, iter->arg->lastpos, iter->arg->lastpos,
2536                                     tags, assertions, params);
2537                     xfree(tags);
2538                     if (!node->lastpos)
2539                       return REG_ESPACE;
2540                   }
2541                 else
2542                   node->lastpos = iter->arg->lastpos;
2543               }
2544             else
2545               {
2546                 node->nullable = 0;
2547                 node->lastpos = iter->arg->lastpos;
2548               }
2549             node->firstpos = iter->arg->firstpos;
2550             break;
2551           }
2552
2553         case NFL_POST_CATENATION:
2554           {
2555             int num_tags, *tags, assertions, params_seen;
2556             int *params;
2557             reg_errcode_t status;
2558             tre_catenation_t *cat = node->obj;
2559             node->nullable = cat->left->nullable && cat->right->nullable;
2560
2561             /* Compute firstpos. */
2562             if (cat->left->nullable)
2563               {
2564                 /* The left side matches the empty string.  Make a first pass
2565                    with tre_match_empty() to get the number of tags and
2566                    parameters. */
2567                 status = tre_match_empty(stack, cat->left,
2568                                          NULL, NULL, NULL, &num_tags,
2569                                          &params_seen);
2570                 if (status != REG_OK)
2571                   return status;
2572                 /* Allocate arrays for the tags and parameters. */
2573                 tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2574                 if (!tags)
2575                   return REG_ESPACE;
2576                 tags[0] = -1;
2577                 assertions = 0;
2578                 params = NULL;
2579                 if (params_seen)
2580                   {
2581                     params = tre_mem_alloc(mem, sizeof(*params)
2582                                            * TRE_PARAM_LAST);
2583                     if (!params)
2584                       {
2585                         xfree(tags);
2586                         return REG_ESPACE;
2587                       }
2588                   }
2589                 /* Second pass with tre_mach_empty() to get the list of
2590                    tags and parameters. */
2591                 status = tre_match_empty(stack, cat->left, tags,
2592                                          &assertions, params, NULL, NULL);
2593                 if (status != REG_OK)
2594                   {
2595                     xfree(tags);
2596                     return status;
2597                   }
2598                 node->firstpos =
2599                   tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2600                                 tags, assertions, params);
2601                 xfree(tags);
2602                 if (!node->firstpos)
2603                   return REG_ESPACE;
2604               }
2605             else
2606               {
2607                 node->firstpos = cat->left->firstpos;
2608               }
2609
2610             /* Compute lastpos. */
2611             if (cat->right->nullable)
2612               {
2613                 /* The right side matches the empty string.  Make a first pass
2614                    with tre_match_empty() to get the number of tags and
2615                    parameters. */
2616                 status = tre_match_empty(stack, cat->right,
2617                                          NULL, NULL, NULL, &num_tags,
2618                                          &params_seen);
2619                 if (status != REG_OK)
2620                   return status;
2621                 /* Allocate arrays for the tags and parameters. */
2622                 tags = xmalloc(sizeof(int) * (num_tags + 1));
2623                 if (!tags)
2624                   return REG_ESPACE;
2625                 tags[0] = -1;
2626                 assertions = 0;
2627                 params = NULL;
2628                 if (params_seen)
2629                   {
2630                     params = tre_mem_alloc(mem, sizeof(*params)
2631                                            * TRE_PARAM_LAST);
2632                     if (!params)
2633                       {
2634                         xfree(tags);
2635                         return REG_ESPACE;
2636                       }
2637                   }
2638                 /* Second pass with tre_mach_empty() to get the list of
2639                    tags and parameters. */
2640                 status = tre_match_empty(stack, cat->right, tags,
2641                                          &assertions, params, NULL, NULL);
2642                 if (status != REG_OK)
2643                   {
2644                     xfree(tags);
2645                     return status;
2646                   }
2647                 node->lastpos =
2648                   tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2649                                 tags, assertions, params);
2650                 xfree(tags);
2651                 if (!node->lastpos)
2652                   return REG_ESPACE;
2653               }
2654             else
2655               {
2656                 node->lastpos = cat->right->lastpos;
2657               }
2658             break;
2659           }
2660
2661         default:
2662           assert(0);
2663           break;
2664         }
2665     }
2666
2667   return REG_OK;
2668 }
2669
2670
2671 /* Adds a transition from each position in `p1' to each position in `p2'. */
2672 static reg_errcode_t
2673 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2674                tre_tnfa_transition_t *transitions,
2675                int *counts, int *offs)
2676 {
2677   tre_pos_and_tags_t *orig_p2 = p2;
2678   tre_tnfa_transition_t *trans;
2679   int i, j, k, l, dup, prev_p2_pos;
2680
2681   if (transitions != NULL)
2682     while (p1->position >= 0)
2683       {
2684         p2 = orig_p2;
2685         prev_p2_pos = -1;
2686         while (p2->position >= 0)
2687           {
2688             /* Optimization: if this position was already handled, skip it. */
2689             if (p2->position == prev_p2_pos)
2690               {
2691                 p2++;
2692                 continue;
2693               }
2694             prev_p2_pos = p2->position;
2695             /* Set `trans' to point to the next unused transition from
2696                position `p1->position'. */
2697             trans = transitions + offs[p1->position];
2698             while (trans->state != NULL)
2699               {
2700 #if 0
2701                 /* If we find a previous transition from `p1->position' to
2702                    `p2->position', it is overwritten.  This can happen only
2703                    if there are nested loops in the regexp, like in "((a)*)*".
2704                    In POSIX.2 repetition using the outer loop is always
2705                    preferred over using the inner loop.  Therefore the
2706                    transition for the inner loop is useless and can be thrown
2707                    away. */
2708                 /* XXX - The same position is used for all nodes in a bracket
2709                    expression, so this optimization cannot be used (it will
2710                    break bracket expressions) unless I figure out a way to
2711                    detect it here. */
2712                 if (trans->state_id == p2->position)
2713                   {
2714                     DPRINT(("*"));
2715                     break;
2716                   }
2717 #endif
2718                 trans++;
2719               }
2720
2721             if (trans->state == NULL)
2722               (trans + 1)->state = NULL;
2723             /* Use the character ranges, assertions, etc. from `p1' for
2724                the transition from `p1' to `p2'. */
2725             trans->code_min = p1->code_min;
2726             trans->code_max = p1->code_max;
2727             trans->state = transitions + offs[p2->position];
2728             trans->state_id = p2->position;
2729             trans->assertions = p1->assertions | p2->assertions
2730               | (p1->bracket_match_list != NULL ? ASSERT_BRACKET_MATCH : 0);
2731             if (p1->backref >= 0)
2732               {
2733                 assert((trans->assertions & ASSERT_BRACKET_MATCH) == 0);
2734                 assert(p2->backref < 0);
2735                 trans->u.backref = p1->backref;
2736                 trans->assertions |= ASSERT_BACKREF;
2737               }
2738             if (p1->bracket_match_list != NULL)
2739               {
2740                 trans->u.bracket_match_list =
2741                   xmalloc(SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
2742                 if (trans->u.bracket_match_list == NULL)
2743                   return REG_ESPACE;
2744                 memcpy(trans->u.bracket_match_list, p1->bracket_match_list,
2745                        SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
2746               }
2747
2748             /* Find out how many tags this transition has. */
2749             i = 0;
2750             if (p1->tags != NULL)
2751               while(p1->tags[i] >= 0)
2752                 i++;
2753             j = 0;
2754             if (p2->tags != NULL)
2755               while(p2->tags[j] >= 0)
2756                 j++;
2757
2758             /* If we are overwriting a transition, free the old tag array. */
2759             if (trans->tags != NULL)
2760               xfree(trans->tags);
2761             trans->tags = NULL;
2762
2763             /* If there were any tags, allocate an array and fill it. */
2764             if (i + j > 0)
2765               {
2766                 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2767                 if (!trans->tags)
2768                   return REG_ESPACE;
2769                 i = 0;
2770                 if (p1->tags != NULL)
2771                   while(p1->tags[i] >= 0)
2772                     {
2773                       trans->tags[i] = p1->tags[i];
2774                       i++;
2775                     }
2776                 l = i;
2777                 j = 0;
2778                 if (p2->tags != NULL)
2779                   while (p2->tags[j] >= 0)
2780                     {
2781                       /* Don't add duplicates. */
2782                       dup = 0;
2783                       for (k = 0; k < i; k++)
2784                         if (trans->tags[k] == p2->tags[j])
2785                           {
2786                             dup = 1;
2787                             break;
2788                           }
2789                       if (!dup)
2790                         trans->tags[l++] = p2->tags[j];
2791                       j++;
2792                     }
2793                 trans->tags[l] = -1;
2794               }
2795
2796             /* Set the parameter array.  If both `p2' and `p1' have same
2797                parameters, the values in `p2' override those in `p1'. */
2798             if (p1->params || p2->params)
2799               {
2800                 if (!trans->params)
2801                   trans->params = xmalloc(sizeof(*trans->params)
2802                                           * TRE_PARAM_LAST);
2803                 if (!trans->params)
2804                   return REG_ESPACE;
2805                 for (i = 0; i < TRE_PARAM_LAST; i++)
2806                   {
2807                     trans->params[i] = TRE_PARAM_UNSET;
2808                     if (p1->params && p1->params[i] != TRE_PARAM_UNSET)
2809                       trans->params[i] = p1->params[i];
2810                     if (p2->params && p2->params[i] != TRE_PARAM_UNSET)
2811                       trans->params[i] = p2->params[i];
2812                   }
2813               }
2814             else
2815               {
2816                 if (trans->params)
2817                   xfree(trans->params);
2818                 trans->params = NULL;
2819               }
2820
2821
2822 #ifdef TRE_DEBUG
2823             {
2824               int *tags;
2825
2826               DPRINT(("  %2d -> %2d on %3d", p1->position, p2->position,
2827                       p1->code_min));
2828               if (p1->code_max != p1->code_min)
2829                 DPRINT(("-%3d", p1->code_max));
2830               tags = trans->tags;
2831               if (tags)
2832                 {
2833                   DPRINT((", tags ["));
2834                   while (*tags >= 0)
2835                     {
2836                       DPRINT(("%d", *tags));
2837                       tags++;
2838                       if (*tags >= 0)
2839                         DPRINT((","));
2840                     }
2841                   DPRINT(("]"));
2842                 }
2843               if (trans->assertions)
2844                 DPRINT((", assert %d", trans->assertions));
2845               if (trans->assertions & ASSERT_BACKREF)
2846                 DPRINT((", backref %d", trans->u.backref));
2847               else if (trans->assertions & ASSERT_BRACKET_MATCH)
2848                 DPRINT((", bracket_match_list %p",
2849                         trans->u.bracket_match_list));
2850               if (trans->params)
2851                 {
2852                   DPRINT((", "));
2853                   tre_print_params(trans->params);
2854                 }
2855               DPRINT(("\n"));
2856             }
2857 #endif /* TRE_DEBUG */
2858             p2++;
2859           }
2860         p1++;
2861       }
2862   else
2863     /* Compute a maximum limit for the number of transitions leaving
2864        from each state. */
2865     while (p1->position >= 0)
2866       {
2867         p2 = orig_p2;
2868         while (p2->position >= 0)
2869           {
2870             counts[p1->position]++;
2871             p2++;
2872           }
2873         p1++;
2874       }
2875   return REG_OK;
2876 }
2877
2878 /* Converts the syntax tree to a TNFA.  All the transitions in the TNFA are
2879    labelled with one character range (there are no transitions on empty
2880    strings).  The TNFA takes O(n^2) space in the worst case, `n' is size of
2881    the regexp. */
2882 static reg_errcode_t
2883 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2884                 int *counts, int *offs)
2885 {
2886   tre_union_t *uni;
2887   tre_catenation_t *cat;
2888   tre_iteration_t *iter;
2889   reg_errcode_t errcode = REG_OK;
2890
2891   /* XXX - recurse using a stack!. */
2892   switch (node->type)
2893     {
2894     case LITERAL:
2895       break;
2896     case UNION:
2897       uni = (tre_union_t *)node->obj;
2898       errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2899       if (errcode != REG_OK)
2900         return errcode;
2901       errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2902       break;
2903
2904     case CATENATION:
2905       cat = (tre_catenation_t *)node->obj;
2906       /* Add a transition from each position in cat->left->lastpos
2907          to each position in cat->right->firstpos. */
2908       errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2909                                transitions, counts, offs);
2910       if (errcode != REG_OK)
2911         return errcode;
2912       errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2913       if (errcode != REG_OK)
2914         return errcode;
2915       errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2916       break;
2917
2918     case ITERATION:
2919       iter = (tre_iteration_t *)node->obj;
2920       assert(iter->max == -1 || iter->max == 1);
2921
2922       if (iter->max == -1)
2923         {
2924           assert(iter->min == 0 || iter->min == 1);
2925           /* Add a transition from each last position in the iterated
2926              expression to each first position. */
2927           errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2928                                    transitions, counts, offs);
2929           if (errcode != REG_OK)
2930             return errcode;
2931         }
2932       errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2933       break;
2934     }
2935   return errcode;
2936 }
2937
2938
2939 #define ERROR_EXIT(err)           \
2940   do                              \
2941     {                             \
2942       errcode = err;              \
2943       if (/*CONSTCOND*/1)         \
2944         goto error_exit;          \
2945     }                             \
2946  while (/*CONSTCOND*/0)
2947
2948
2949 int
2950 tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags,
2951             locale_t loc)
2952 {
2953   tre_stack_t *stack;
2954   tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2955   tre_pos_and_tags_t *p;
2956   int *counts = NULL, *offs = NULL;
2957   int i, add = 0;
2958   tre_tnfa_transition_t *transitions, *initial;
2959   tre_tnfa_t *tnfa = NULL;
2960   tre_submatch_data_t *submatch_data = NULL;
2961   tre_tag_direction_t *tag_directions = NULL;
2962   reg_errcode_t errcode;
2963   tre_mem_t mem;
2964
2965   /* Parse context. */
2966   tre_parse_ctx_t parse_ctx;
2967
2968   /* Allocate a stack used throughout the compilation process for various
2969      purposes. */
2970   stack = tre_stack_new(512, 10240, 128);
2971   if (!stack)
2972     return REG_ESPACE;
2973   /* Allocate a fast memory allocator. */
2974   mem = tre_mem_new();
2975   if (!mem)
2976     {
2977       tre_stack_destroy(stack);
2978       return REG_ESPACE;
2979     }
2980
2981   /* Parse the regexp. */
2982   memset(&parse_ctx, 0, sizeof(parse_ctx));
2983   parse_ctx.mem = mem;
2984   parse_ctx.stack = stack;
2985   parse_ctx.re = regex;
2986   parse_ctx.len = n;
2987   /* Only allow REG_UNGREEDY to be set if both REG_ENHANCED and REG_EXTENDED
2988      are also set */
2989   if ((cflags & (REG_ENHANCED | REG_EXTENDED)) != (REG_ENHANCED | REG_EXTENDED))
2990     cflags &= ~REG_UNGREEDY;
2991   parse_ctx.cflags = cflags;
2992   parse_ctx.max_backref = -1;
2993   parse_ctx.loc = loc;
2994   parse_ctx.submatch_id_invisible = SUBMATCH_ID_INVISIBLE_START;
2995
2996   DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
2997   errcode = tre_parse(&parse_ctx);
2998   if (errcode != REG_OK)
2999     ERROR_EXIT(errcode);
3000   preg->re_nsub = parse_ctx.submatch_id - 1;
3001   tree = parse_ctx.result;
3002
3003   /* Back references and approximate matching cannot currently be used
3004      in the same regexp. */
3005   if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
3006     ERROR_EXIT(REG_BADPAT);
3007
3008 #ifdef TRE_DEBUG
3009   tre_ast_print(tree);
3010 #endif /* TRE_DEBUG */
3011
3012   /* Referring to nonexistent subexpressions is illegal. */
3013   if (parse_ctx.max_backref > (int)preg->re_nsub)
3014     ERROR_EXIT(REG_ESUBREG);
3015
3016   /* Allocate the TNFA struct. */
3017   tnfa = xcalloc(1, sizeof(tre_tnfa_t));
3018   if (tnfa == NULL)
3019     ERROR_EXIT(REG_ESPACE);
3020   tnfa->have_backrefs = parse_ctx.max_backref >= 0;
3021   tnfa->have_approx = parse_ctx.have_approx;
3022   tnfa->num_submatches = parse_ctx.submatch_id;
3023   tnfa->num_submatches_invisible = parse_ctx.submatch_id_invisible
3024                                    - SUBMATCH_ID_INVISIBLE_START;
3025   tnfa->num_reorder_tags = parse_ctx.num_reorder_tags;
3026   tnfa->loc = parse_ctx.loc;
3027
3028   /* Set up tags for submatch addressing.  If REG_NOSUB is set and the
3029      regexp does not have back references, this can be skipped. */
3030   if (tnfa->num_reorder_tags > 0 || !(cflags & REG_NOSUB))
3031     {
3032       DPRINT(("tre_compile: setting up tags\n"));
3033
3034       /* Figure out how many tags we will need. */
3035       errcode = tre_add_tags(NULL, stack, tree, tnfa);
3036       if (errcode != REG_OK)
3037         ERROR_EXIT(errcode);
3038 #ifdef TRE_DEBUG
3039       tre_ast_print(tree);
3040 #endif /* TRE_DEBUG */
3041
3042       if (tnfa->num_tags > 0)
3043         {
3044           tag_directions = xmalloc(sizeof(*tag_directions)
3045                                    * (tnfa->num_tags + 1));
3046           if (tag_directions == NULL)
3047             ERROR_EXIT(REG_ESPACE);
3048           tnfa->tag_directions = tag_directions;
3049           memset(tag_directions, -1,
3050                  sizeof(*tag_directions) * (tnfa->num_tags + 1));
3051         }
3052       tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 3,
3053                                    sizeof(*tnfa->minimal_tags));
3054       if (tnfa->minimal_tags == NULL)
3055         ERROR_EXIT(REG_ESPACE);
3056
3057       submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
3058                               sizeof(*submatch_data));
3059       if (submatch_data == NULL)
3060         ERROR_EXIT(REG_ESPACE);
3061       /* Set the eo_tag value to -1 to indicate that that corresponding
3062        * submatch has not be completed yet */
3063       for (i = 0; i < parse_ctx.submatch_id; i++)
3064         {
3065           submatch_data[i].eo_tag = -1;
3066         }
3067       tnfa->submatch_data = submatch_data;
3068
3069       errcode = tre_add_tags(mem, stack, tree, tnfa);
3070       if (errcode != REG_OK)
3071         ERROR_EXIT(errcode);
3072
3073 #ifdef TRE_DEBUG
3074       for (i = 0; i < parse_ctx.submatch_id; i++)
3075         DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3076                 i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
3077       for (i = 0; i <= tnfa->num_tags; i++)
3078         DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
3079 #endif /* TRE_DEBUG */
3080     }
3081
3082   /* Expand iteration nodes. */
3083   errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
3084                            tag_directions, &tnfa->params_depth);
3085   if (errcode != REG_OK)
3086     ERROR_EXIT(errcode);
3087
3088   /* Add a dummy node for the final state.
3089      XXX - For certain patterns this dummy node can be optimized away,
3090            for example "a*" or "ab*".   Figure out a simple way to detect
3091            this possibility. */
3092   tmp_ast_l = tree;
3093   tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
3094   if (tmp_ast_r == NULL)
3095     ERROR_EXIT(REG_ESPACE);
3096
3097   tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
3098   if (tree == NULL)
3099     ERROR_EXIT(REG_ESPACE);
3100
3101 #ifdef TRE_DEBUG
3102   tre_ast_print(tree);
3103   DPRINT(("Number of states: %d\n", parse_ctx.position));
3104   if (submatch_data)
3105     for (i = 0; i < parse_ctx.submatch_id; i++)
3106       DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3107               i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
3108   if (tag_directions)
3109     for (i = 0; i <= tnfa->num_tags; i++)
3110       DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
3111 #endif /* TRE_DEBUG */
3112
3113   errcode = tre_compute_nfl(mem, stack, tree);
3114   if (errcode != REG_OK)
3115     ERROR_EXIT(errcode);
3116
3117   counts = xmalloc(sizeof(int) * parse_ctx.position);
3118   if (counts == NULL)
3119     ERROR_EXIT(REG_ESPACE);
3120
3121   offs = xmalloc(sizeof(int) * parse_ctx.position);
3122   if (offs == NULL)
3123     ERROR_EXIT(REG_ESPACE);
3124
3125   for (i = 0; i < parse_ctx.position; i++)
3126     counts[i] = 0;
3127   tre_ast_to_tnfa(tree, NULL, counts, NULL);
3128
3129   add = 0;
3130   for (i = 0; i < parse_ctx.position; i++)
3131     {
3132       offs[i] = add;
3133       add += counts[i] + 1;
3134       counts[i] = 0;
3135     }
3136   transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
3137   if (transitions == NULL)
3138     ERROR_EXIT(REG_ESPACE);
3139   tnfa->transitions = transitions;
3140   tnfa->num_transitions = add;
3141
3142   DPRINT(("Converting to TNFA:\n"));
3143   errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
3144   if (errcode != REG_OK)
3145     ERROR_EXIT(errcode);
3146
3147 #ifdef USE_FIRSTPOS_CHARS /* not defined */
3148   /* If in eight bit mode, compute a table of characters that can be the
3149      first character of a match. */
3150   tnfa->first_char = -1;
3151   if (TRE_MB_CUR_MAX_L(tnfa->loc) == 1 && !tmp_ast_l->nullable)
3152     {
3153       int count = 0;
3154       tre_cint_t k;
3155       DPRINT(("Characters that can start a match:"));
3156       tnfa->firstpos_chars = xcalloc(256, sizeof(char));
3157       if (tnfa->firstpos_chars == NULL)
3158         ERROR_EXIT(REG_ESPACE);
3159       for (p = tree->firstpos; p->position >= 0; p++)
3160         {
3161           tre_tnfa_transition_t *j = transitions + offs[p->position];
3162           while (j->state != NULL)
3163             {
3164               for (k = j->code_min; k <= j->code_max && k < 256; k++)
3165                 {
3166                   DPRINT((" %d", k));
3167                   tnfa->firstpos_chars[k] = 1;
3168                   count++;
3169                 }
3170               j++;
3171             }
3172         }
3173       DPRINT(("\n"));
3174 #define TRE_OPTIMIZE_FIRST_CHAR 1
3175 #if TRE_OPTIMIZE_FIRST_CHAR
3176       if (count == 1)
3177         {
3178           for (k = 0; k < 256; k++)
3179             if (tnfa->firstpos_chars[k])
3180               {
3181                 DPRINT(("first char must be %d\n", k));
3182                 tnfa->first_char = k;
3183                 xfree(tnfa->firstpos_chars);
3184                 tnfa->firstpos_chars = NULL;
3185                 break;
3186               }
3187         }
3188 #endif
3189
3190     }
3191   else
3192     tnfa->firstpos_chars = NULL;
3193 #else /* !USE_FIRSTPOS_CHARS */
3194
3195   /* Set first_char only if there is only one character that can be the
3196      first character of a match */
3197   tnfa->first_char = -1;
3198   if (!tmp_ast_l->nullable)
3199     {
3200       int scanning = 1;
3201       for (p = tree->firstpos; scanning && p->position >= 0; p++)
3202         {
3203           tre_tnfa_transition_t *j = transitions + offs[p->position];
3204           while (j->state != NULL)
3205             {
3206               if (j->code_min <= j->code_max)
3207                 {
3208                   if (j->code_max != j->code_min || j->code_min == -1 || tnfa->first_char != -1)
3209                     {
3210                       tnfa->first_char = -1;
3211                       scanning = 0;
3212                       break;
3213                     }
3214                   tnfa->first_char = j->code_min;
3215                 }
3216               j++;
3217             }
3218         }
3219 #ifdef TRE_DEBUG
3220       if (tnfa->first_char >= 0)
3221         DPRINT(("first char must be %d\n", tnfa->first_char));
3222 #endif /* TRE_DEBUG */
3223     }
3224 #endif /* !USE_FIRSTPOS_CHARS */
3225
3226   p = tree->firstpos;
3227   i = 0;
3228   while (p->position >= 0)
3229     {
3230       i++;
3231
3232 #ifdef TRE_DEBUG
3233       {
3234         int *tags;
3235         DPRINT(("initial: %d", p->position));
3236         tags = p->tags;
3237         if (tags != NULL)
3238           {
3239             if (*tags >= 0)
3240               DPRINT(("/"));
3241             while (*tags >= 0)
3242               {
3243                 DPRINT(("%d", *tags));
3244                 tags++;
3245                 if (*tags >= 0)
3246                   DPRINT((","));
3247               }
3248           }
3249         DPRINT((", assert %d", p->assertions));
3250         if (p->params)
3251           {
3252             DPRINT((", "));
3253             tre_print_params(p->params);
3254           }
3255         DPRINT(("\n"));
3256       }
3257 #endif /* TRE_DEBUG */
3258
3259       p++;
3260     }
3261
3262   initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
3263   if (initial == NULL)
3264     ERROR_EXIT(REG_ESPACE);
3265   tnfa->initial = initial;
3266
3267   i = 0;
3268   for (p = tree->firstpos; p->position >= 0; p++)
3269     {
3270       initial[i].state = transitions + offs[p->position];
3271       initial[i].state_id = p->position;
3272       initial[i].tags = NULL;
3273       /* Copy the arrays p->tags, and p->params, they are allocated
3274          from a tre_mem object. */
3275       if (p->tags)
3276         {
3277           int j;
3278           for (j = 0; p->tags[j] >= 0; j++);
3279           initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
3280           if (!initial[i].tags)
3281             ERROR_EXIT(REG_ESPACE);
3282           memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
3283         }
3284       initial[i].params = NULL;
3285       if (p->params)
3286         {
3287           initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST);
3288           if (!initial[i].params)
3289             ERROR_EXIT(REG_ESPACE);
3290           memcpy(initial[i].params, p->params,
3291                  sizeof(*p->params) * TRE_PARAM_LAST);
3292         }
3293       initial[i].assertions = p->assertions;
3294       i++;
3295     }
3296   initial[i].state = NULL;
3297
3298   tnfa->num_transitions = add;
3299   tnfa->final = transitions + offs[tree->lastpos[0].position];
3300   tnfa->num_states = parse_ctx.position;
3301   tnfa->cflags = cflags;
3302
3303   DPRINT(("final state %d (%p)\n", tree->lastpos[0].position,
3304           (void *)tnfa->final));
3305
3306   tre_mem_destroy(mem);
3307   tre_stack_destroy(stack);
3308   xfree(counts);
3309   xfree(offs);
3310
3311 #ifdef TRE_USE_SYSTEM_REGEX_H
3312   preg->re_magic = RE_MAGIC;
3313 #endif /* TRE_USE_SYSTEM_REGEX_H */
3314   preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3315 #ifdef __LIBC__
3316   /* In Libc, we need to retain the locale.  Outside Libc, we already called
3317      duplocale() which does the retaining. */
3318   XL_RETAIN(tnfa->loc);
3319 #endif /* __LIBC__ */
3320   return REG_OK;
3321
3322  error_exit:
3323   /* Free everything that was allocated and return the error code. */
3324   tre_mem_destroy(mem);
3325   if (stack != NULL)
3326     tre_stack_destroy(stack);
3327   if (counts != NULL)
3328     xfree(counts);
3329   if (offs != NULL)
3330     xfree(offs);
3331
3332   /* Set tnfa into preg, so that calling tre_free() will free the contents
3333      of tnfa.  But in Libc, NULL out the loc field since we never retained
3334      the locale.  Outside Libc, we let tre_free() call freelocale(). */
3335   preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3336 #ifdef __LIBC__
3337   if(tnfa) tnfa->loc = NULL;
3338 #endif /* __LIBC__ */
3339
3340   tre_free(preg);
3341   return errcode;
3342 }
3343
3344
3345
3346
3347 void
3348 tre_free(regex_t *preg)
3349 {
3350   tre_tnfa_t *tnfa;
3351   unsigned int i;
3352   tre_tnfa_transition_t *trans;
3353
3354 #ifdef TRE_USE_SYSTEM_REGEX_H
3355   preg->re_magic = 0;
3356 #endif /* TRE_USE_SYSTEM_REGEX_H */
3357   tnfa = (void *)preg->TRE_REGEX_T_FIELD;
3358   if (!tnfa)
3359     return;
3360   preg->TRE_REGEX_T_FIELD = NULL;
3361
3362   for (i = 0; i < tnfa->num_transitions; i++)
3363     if (tnfa->transitions[i].state)
3364       {
3365         if (tnfa->transitions[i].tags)
3366           xfree(tnfa->transitions[i].tags);
3367         if (tnfa->transitions[i].assertions & ASSERT_BRACKET_MATCH)
3368           xfree(tnfa->transitions[i].u.bracket_match_list);
3369         if (tnfa->transitions[i].params)
3370           xfree(tnfa->transitions[i].params);
3371       }
3372   if (tnfa->transitions)
3373     xfree(tnfa->transitions);
3374
3375   if (tnfa->initial)
3376     {
3377       for (trans = tnfa->initial; trans->state; trans++)
3378         {
3379           if (trans->tags)
3380             xfree(trans->tags);
3381           if (trans->params)
3382             xfree(trans->params);
3383         }
3384       xfree(tnfa->initial);
3385     }
3386
3387   if (tnfa->submatch_data)
3388     {
3389       xfree(tnfa->submatch_data);
3390     }
3391
3392   if (tnfa->tag_directions)
3393     xfree(tnfa->tag_directions);
3394 #ifdef USE_FIRSTPOS_CHARS /* not defined */
3395   if (tnfa->firstpos_chars)
3396     xfree(tnfa->firstpos_chars);
3397 #endif /* USE_FIRSTPOS_CHARS */
3398   if (tnfa->minimal_tags)
3399     xfree(tnfa->minimal_tags);
3400
3401   if (tnfa->loc)
3402 #ifdef __LIBC__
3403     XL_RELEASE(tnfa->loc);
3404 #else /* !__LIBC__ */
3405     freelocale(tnfa->loc);
3406 #endif /* !__LIBC__ */
3407
3408   if (tnfa->last_matched_branch)
3409     xfree(tnfa->last_matched_branch);
3410
3411   xfree(tnfa);
3412 }
3413
3414 #ifndef __LIBC__
3415 char *
3416 tre_version(void)
3417 {
3418   static char str[256];
3419   char *version;
3420
3421   if (str[0] == 0)
3422     {
3423       (void) tre_config(TRE_CONFIG_VERSION, &version);
3424       (void) snprintf(str, sizeof(str), "TRE %s (BSD)", version);
3425     }
3426   return str;
3427 }
3428
3429 int
3430 tre_config(int query, void *result)
3431 {
3432   int *int_result = result;
3433   const char **string_result = result;
3434
3435   switch (query)
3436     {
3437     case TRE_CONFIG_APPROX:
3438 #ifdef TRE_APPROX
3439       *int_result = 1;
3440 #else /* !TRE_APPROX */
3441       *int_result = 0;
3442 #endif /* !TRE_APPROX */
3443       return REG_OK;
3444
3445     case TRE_CONFIG_WCHAR:
3446 #ifdef TRE_WCHAR
3447       *int_result = 1;
3448 #else /* !TRE_WCHAR */
3449       *int_result = 0;
3450 #endif /* !TRE_WCHAR */
3451       return REG_OK;
3452
3453     case TRE_CONFIG_MULTIBYTE:
3454 #ifdef TRE_MULTIBYTE
3455       *int_result = 1;
3456 #else /* !TRE_MULTIBYTE */
3457       *int_result = 0;
3458 #endif /* !TRE_MULTIBYTE */
3459       return REG_OK;
3460
3461     case TRE_CONFIG_SYSTEM_ABI:
3462 #ifdef TRE_CONFIG_SYSTEM_ABI
3463       *int_result = 1;
3464 #else /* !TRE_CONFIG_SYSTEM_ABI */
3465       *int_result = 0;
3466 #endif /* !TRE_CONFIG_SYSTEM_ABI */
3467       return REG_OK;
3468
3469     case TRE_CONFIG_VERSION:
3470       *string_result = TRE_VERSION;
3471       return REG_OK;
3472     }
3473
3474   return REG_NOMATCH;
3475 }
3476 #endif /* !__LIBC__ */
3477
3478
3479 /* EOF */