regex/TRE/lib/tre-compile.c

   1 /*
   2   tre-compile.c - TRE regex compiler
   3
   4   This software is released under a BSD-style license.
   5   See the file LICENSE for details and copyright.
   6
   7 */
   8
   9 /*
  10   TODO:
  11    - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
  12      function calls.
  13 */
  14
  15
  16 #ifdef HAVE_CONFIG_H
  17 #include <config.h>
  18 #endif /* HAVE_CONFIG_H */
  19 #include <stdio.h>
  20 #include <assert.h>
  21 #include <string.h>
  22 #include <limits.h>
  23
  24 #include "tre-internal.h"
  25 #include "tre-mem.h"
  26 #include "tre-stack.h"
  27 #include "tre-ast.h"
  28 #include "tre-parse.h"
  29 #include "tre-compile.h"
  30 #include "tre.h"
  31 #include "tre-last-matched.h"
  32 #include "xmalloc.h"
  33
  34 #pragma clang diagnostic push
  35 #pragma clang diagnostic ignored "-Wunreachable-code"
  36
  37 /*
  38   The bit_ffs() macro in bitstring.h is flawed.  Replace it with a working one.
  39 */
  40 #undef bit_ffs
  41 #define bit_ffs(name, nbits, value) { \
  42         register bitstr_t *_name = name; \
  43         register int _byte, _nbits = nbits; \
  44         register int _stopbyte = _bit_byte(_nbits), _value = -1; \
  45         for (_byte = 0; _byte <= _stopbyte; ++_byte) \
  46                 if (_name[_byte]) { \
  47                         _value = _byte << 3; \
  48                         for (_stopbyte = _name[_byte]; !(_stopbyte&0x1); \
  49                             ++_value, _stopbyte >>= 1); \
  50                         break; \
  51                 } \
  52         *(value) = _value; \
  53 }
  54
  55 /*
  56   Algorithms to setup tags so that submatch addressing can be done.
  57 */
  58
  59
  60 #ifdef TRE_DEBUG
  61 static const char *tag_dir_str[] = {
  62   "minimize",
  63   "maximize",
  64   "left-maximize"
  65 };
  66
  67 static const char _indent[] = "  ";
  68
  69 static void
  70 print_indent(int indent)
  71 {
  72   while (indent-- > 0)
  73     DPRINT((_indent));
  74 }
  75
  76 static void print_last_matched_pre(tre_last_matched_pre_t *lm, int indent,
  77                                    int num_tags);
  78 static void
  79 print_last_match_branch_pre(tre_last_matched_branch_pre_t *branch, int indent,
  80                             int num_tags)
  81 {
  82   tre_last_matched_pre_t *u = branch->last_matched;
  83   int n_last_matched = 0;
  84
  85   while (u)
  86     {
  87       n_last_matched++;
  88       u = u->next;
  89     }
  90
  91   print_indent(indent);
  92   DPRINT(("BRANCH: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
  93           branch->tot_branches, branch->tot_last_matched, branch->tot_tags));
  94   print_indent(indent);
  95   DPRINT(("..n_last_matched=%d last_matched=%d\n", branch->n_last_matched,
  96           n_last_matched));
  97   if (branch->n_last_matched != n_last_matched)
  98     DPRINT(("*** mismatch between n_last_matched and unions ***\n"));
  99   if (branch->cmp_tag > 0)
 100     {
 101       int i;
 102       const char *sep = " tags=";
 103       print_indent(indent);
 104       DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
 105       for (i = 0; i < num_tags; i++)
 106         if (bit_test(branch->tags, i))
 107           {
 108             DPRINT(("%s%d", sep, i));
 109             sep = ",";
 110           }
 111       DPRINT(("\n"));
 112     }
 113
 114   u = branch->last_matched;
 115   indent++;
 116   while (u)
 117     {
 118       print_last_matched_pre(u, indent, num_tags);
 119       u = u->next;
 120     }
 121 }
 122
 123 static void
 124 print_last_matched_pre(tre_last_matched_pre_t *lm, int indent, int num_tags)
 125 {
 126   tre_last_matched_branch_pre_t *b = lm->branches;
 127   int n_branches = 0;
 128
 129   while (b)
 130     {
 131       n_branches++;
 132       b = b->next;
 133     }
 134
 135   print_indent(indent);
 136   DPRINT(("LAST_MATCHED: tot_branches=%d tot_last_matched=%d tot_tags=%d\n",
 137           lm->tot_branches, lm->tot_last_matched, lm->tot_tags));
 138   print_indent(indent);
 139   DPRINT(("..start_tag=%d n_branches=%d branches=%d\n", lm->start_tag,
 140           lm->n_branches, n_branches));
 141   if (lm->n_branches != n_branches)
 142     DPRINT(("*** mismatch between n and branches ***\n"));
 143
 144   b = lm->branches;
 145   indent++;
 146   while (b)
 147     {
 148       print_last_match_branch_pre(b, indent, num_tags);
 149       b = b->next;
 150     }
 151 }
 152
 153 static void print_last_matched(tre_last_matched_t *lm, int indent);
 154 static void
 155 print_last_match_branch(tre_last_matched_branch_t *branch, int indent)
 156 {
 157   tre_last_matched_t *u;
 158   int i;
 159
 160   print_indent(indent);
 161   DPRINT(("BRANCH: n_last_matched=%d\n", branch->n_last_matched));
 162   if (branch->cmp_tag > 0)
 163     {
 164       print_indent(indent);
 165       DPRINT(("..cmp_tag=%d n_tags=%d", branch->cmp_tag, branch->n_tags));
 166       if (branch->n_tags > 0)
 167         {
 168           const char *sep = " tags=";
 169           for (i = 0; i < branch->n_tags; i++)
 170             {
 171               DPRINT(("%s%d", sep, branch->tags[i]));
 172               sep = ",";
 173             }
 174         }
 175       DPRINT(("\n"));
 176     }
 177
 178   u = branch->last_matched;
 179   indent++;
 180   for (i = branch->n_last_matched; i > 0; i--, u++)
 181     print_last_matched(u, indent);
 182 }
 183
 184 static void
 185 print_last_matched(tre_last_matched_t *lm, int indent)
 186 {
 187   int i;
 188   tre_last_matched_branch_t *b;
 189
 190   print_indent(indent);
 191   DPRINT(("LAST_MATCHED: n_branches=%d start_tag=%d\n", lm->n_branches,
 192           lm->start_tag));
 193
 194   b = lm->branches;
 195   indent++;
 196   for (i = lm->n_branches; i > 0; i--, b++)
 197     print_last_match_branch(b, indent);
 198 }
 199 #endif /* TRE_DEBUG */
 200
 201
 202 /* Merge the tre_last_matched_branch_pre_t of src into dst, creating a new
 203    one if needed.  If tag_id > 0, add that tag as well (a negative tag_id will
 204    create an unset tre_last_matched_branch_pre_t. */
 205 static reg_errcode_t
 206 tre_merge_branches(tre_mem_t mem, tre_ast_node_t *dst, tre_ast_node_t *src,
 207                    int tag_id, int num_tags)
 208 {
 209   tre_last_matched_branch_pre_t *db = dst->last_matched_branch;
 210   tre_last_matched_branch_pre_t *sb = (src ? src->last_matched_branch : NULL);
 211
 212   if (db)
 213     {
 214       if (sb)
 215         {
 216           bitstr_t *l = db->tags;
 217           bitstr_t *r = sb->tags;
 218           int i = bitstr_size(num_tags);
 219
 220           while(i-- > 0)
 221             *l++ |= *r++;
 222           /* db and sb are the info from two parallel sub-trees, so the tags
 223              must be mutually exclusive, and we can just add their numbers */
 224           db->n_tags += sb->n_tags;
 225           db->tot_tags += sb->tot_tags;
 226           if (db->last_matched)
 227             {
 228               if (sb->last_matched)
 229                 {
 230                   tre_last_matched_pre_t *u = db->last_matched;
 231
 232                   while(u->next)
 233                     u = u->next;
 234                   u->next = sb->last_matched;
 235                   db->n_last_matched += sb->n_last_matched;
 236                   db->tot_branches += sb->tot_branches;
 237                   db->tot_last_matched += sb->tot_last_matched;
 238                 }
 239             }
 240             else if (sb->last_matched)
 241               {
 242                 db->last_matched = sb->last_matched;
 243                 db->n_last_matched = sb->n_last_matched;
 244                 db->tot_branches = sb->tot_branches;
 245                 db->tot_last_matched = sb->tot_last_matched;
 246               }
 247         }
 248     }
 249   else
 250     db = sb;
 251
 252   if (tag_id != 0)
 253     {
 254       if (!db)
 255         {
 256           db = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
 257                               + bitstr_size(num_tags));
 258           if (db == NULL)
 259             return REG_ESPACE;
 260           db->tot_branches = 1;
 261         }
 262       if (tag_id > 0)
 263         {
 264           /* tag_id is a new tag, and shouldn't exist in db's tags,
 265              so we can always increment n_tags */
 266           bit_set(db->tags, tag_id);
 267           db->n_tags++;
 268           db->tot_tags++;
 269         }
 270     }
 271   dst->last_matched_branch = db;
 272   return REG_OK;
 273 }
 274
 275
 276 /* Inserts a catenation node to the root of the tree given in `node'.
 277    As the left child a new tag with number `tag_id' to `node' is added,
 278    and the right child is the old root. */
 279 static reg_errcode_t
 280 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
 281 {
 282   tre_catenation_t *c;
 283
 284   DPRINT(("add_tag_left: tag %d\n", tag_id));
 285
 286   c = tre_mem_alloc(mem, sizeof(*c));
 287   if (c == NULL)
 288     return REG_ESPACE;
 289   c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
 290   if (c->left == NULL)
 291     return REG_ESPACE;
 292   c->right = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
 293   if (c->right == NULL)
 294     return REG_ESPACE;
 295
 296   c->right->obj = node->obj;
 297   c->right->type = node->type;
 298   c->right->last_matched_branch = node->last_matched_branch;
 299   c->right->nullable = -1;
 300   c->right->submatch_id = -1;
 301   node->obj = c;
 302   node->type = CATENATION;
 303   node->original = c->right;
 304   return REG_OK;
 305 }
 306
 307 /* Inserts a catenation node to the root of the tree given in `node'.
 308    As the right child a new tag with number `tag_id' to `node' is added,
 309    and the left child is the old root. */
 310 static reg_errcode_t
 311 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
 312 {
 313   tre_catenation_t *c;
 314
 315   DPRINT(("tre_add_tag_right: tag %d\n", tag_id));
 316
 317   c = tre_mem_alloc(mem, sizeof(*c));
 318   if (c == NULL)
 319     return REG_ESPACE;
 320   c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
 321   if (c->right == NULL)
 322     return REG_ESPACE;
 323   c->left = tre_mem_calloc(mem, sizeof(tre_ast_node_t));
 324   if (c->left == NULL)
 325     return REG_ESPACE;
 326
 327   c->left->obj = node->obj;
 328   c->left->type = node->type;
 329   c->left->last_matched_branch = node->last_matched_branch;
 330   c->left->nullable = -1;
 331   c->left->submatch_id = -1;
 332   node->obj = c;
 333   node->type = CATENATION;
 334   node->original = c->left;
 335   return REG_OK;
 336 }
 337
 338 typedef enum {
 339   ADDTAGS_RECURSE,
 340   ADDTAGS_RECURSE_NOT_TOP_UNION,
 341   ADDTAGS_AFTER_ITERATION,
 342   ADDTAGS_AFTER_UNION_LEFT,
 343   ADDTAGS_AFTER_UNION_RIGHT,
 344   ADDTAGS_AFTER_CAT_LEFT,
 345   ADDTAGS_AFTER_CAT_RIGHT,
 346   ADDTAGS_SET_SUBMATCH_END,
 347   ADDTAGS_UNION_RECURSE,
 348   ADDTAGS_UNION_RIGHT_RECURSE,
 349   ADDTAGS_AFTER_UNION_TOP,
 350 } tre_addtags_symbol_t;
 351
 352 enum {
 353   COPY_LAST_MATCHED_BRANCH,
 354   COPY_LAST_MATCHED_BRANCH_NEXT,
 355   COPY_LAST_MATCHED,
 356   COPY_LAST_MATCHED_NEXT,
 357 };
 358
 359
 360 #define REGSET_UNSET            ((unsigned)-1)
 361
 362 /* Go through `regset' and set submatch data for submatches that are
 363    using this tag. */
 364 static void
 365 tre_purge_regset(unsigned *regset, tre_tnfa_t *tnfa, int tag)
 366 {
 367   int i;
 368
 369   for (i = 0; regset[i] != REGSET_UNSET; i++)
 370     {
 371       int id = regset[i] / 2;
 372       int start = !(regset[i] % 2);
 373       if (id >= SUBMATCH_ID_INVISIBLE_START)
 374         continue;
 375       DPRINT(("  Using tag %d for %s offset of "
 376               "submatch %d\n", tag,
 377               start ? "start" : "end", id));
 378       if (start)
 379         tnfa->submatch_data[id].so_tag = tag;
 380       else
 381         tnfa->submatch_data[id].eo_tag = tag;
 382     }
 383   regset[0] = -1;
 384 }
 385
 386
 387 #define REGSET_HAS_STARTS       0x1
 388 #define REGSET_HAS_ENDS         0x2
 389
 390
 391 /* Adds tags to appropriate locations in the parse tree in `tree', so that
 392    subexpressions marked for submatch addressing can be traced. */
 393 static reg_errcode_t
 394 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
 395              tre_tnfa_t *tnfa)
 396 {
 397   reg_errcode_t status = REG_OK;
 398   tre_addtags_symbol_t symbol;
 399   tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
 400   int bottom = tre_stack_num_objects(stack);
 401   /* True for first pass (counting number of needed tags) */
 402   int first_pass = (mem == NULL || tnfa == NULL);
 403   unsigned *regset, *orig_regset;
 404   int regset_contains = 0;
 405   int num_tags = 0; /* Total number of tags. */
 406   int num_minimals = 0;  /* Number of special minimal tags. */
 407   int tag = 0;      /* The tag that is to be added next. */
 408   int next_tag = 1; /* Next tag to use after this one. */
 409   int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
 410   int *reorder_tags = NULL; /* Tag reorder array: a pair for each reorder,
 411                              * the first is the tag to reorder, the second
 412                              * is the tag after which the first is reordered */
 413   int *rtp;                 /* Pointer used to fill in reorder_tags and
 414                              * tag_order */
 415   int *to_reorder;          /* Transform array converting sequential order to
 416                              * that specified by reorder_tags */
 417   int id;
 418
 419   tre_tag_direction_t direction = TRE_TAG_LEFT_MAXIMIZE;
 420   if (!first_pass)
 421     {
 422       DPRINT(("Initializing direction to %s\n", tag_dir_str[direction]));
 423       tnfa->end_tag = 0;
 424       tnfa->minimal_tags[0] = -1;
 425     }
 426
 427   regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches
 428                    + tnfa->num_submatches_invisible + 1) * 2));
 429   if (regset == NULL)
 430     {
 431       status = REG_ESPACE;
 432       goto error_regset;
 433     }
 434   regset[0] = REGSET_UNSET;
 435   orig_regset = regset;
 436
 437   if (!first_pass)
 438     {
 439       /* Allocate all memory for reorder_tags, tag_order, to_seq_order and
 440        * to_reorder in one batch (assuming all are the same type) */
 441       rtp = reorder_tags = xmalloc(sizeof(*reorder_tags) *
 442                                    ((2 * tnfa->num_reorder_tags + 1) +
 443                                    tnfa->num_tags));
 444       if (reorder_tags == NULL)
 445         {
 446           status = REG_ESPACE;
 447           goto error_reorder_tags;
 448         }
 449       to_reorder = reorder_tags + (2 * tnfa->num_reorder_tags + 1);
 450     }
 451
 452   STACK_PUSH(stack, voidptr, node);
 453   STACK_PUSH(stack, int, ADDTAGS_RECURSE);
 454
 455   while (tre_stack_num_objects(stack) > bottom)
 456     {
 457       if (status != REG_OK)
 458         break;
 459
 460       symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
 461       switch (symbol)
 462         {
 463           int top_union;
 464
 465         case ADDTAGS_SET_SUBMATCH_END:
 466           {
 467             int i;
 468
 469             id = tre_stack_pop_int(stack);
 470             node = tre_stack_pop_voidptr(stack);
 471             /* Add end of this submatch to regset. */
 472             for (i = 0; regset[i] != REGSET_UNSET; i++);
 473             regset[i] = id * 2 + 1;
 474             regset[i + 1] = -1;
 475             regset_contains |= REGSET_HAS_ENDS;
 476
 477             /* Always put a tag after a minimal iterator. */
 478             if (minimal_tag >= 0)
 479               {
 480                 if (first_pass)
 481                   {
 482                     node->num_tags++;
 483                     DPRINT(("  ADDTAGS_SET_SUBMATCH_END: node->num_tags = %d\n",
 484                             node->num_tags));
 485                   }
 486                 else
 487                   {
 488                     int i;
 489                     status = tre_merge_branches(mem, node, NULL, tag,
 490                                                 tnfa->num_tags);
 491                     if (status != REG_OK)
 492                       break;
 493                     status = tre_add_tag_right(mem, node, tag);
 494                     if (status != REG_OK)
 495                       break;
 496                     tnfa->tag_directions[tag] = TRE_TAG_MINIMIZE;
 497                     DPRINT(("Setting t%d direction to %s\n", tag,
 498                             tag_dir_str[tnfa->tag_directions[tag]]));
 499                     DPRINT(("Minimal %d, %d\n", minimal_tag, tag));
 500                     for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
 501                     tnfa->minimal_tags[i] = tag;
 502                     tnfa->minimal_tags[i + 1] = minimal_tag;
 503                     tnfa->minimal_tags[i + 2] = -1;
 504
 505                     DPRINT(("  Minimal end: t%d reordered to "
 506                             "after t%d\n", tag, minimal_tag));
 507                     /* Append to tag_order, move "tag" after
 508                      * "minimal_tag" */
 509                     *rtp++ = tag;
 510                     *rtp++ = minimal_tag;
 511
 512                     num_minimals++;
 513                     tre_purge_regset(regset, tnfa, tag);
 514                   }
 515
 516                 minimal_tag = -1;
 517                 DPRINT(("  ADDTAGS_SET_SUBMATCH_END num_tags++ tag=%d\n", tag));
 518                 regset[0] = REGSET_UNSET;
 519                 regset_contains = 0;
 520                 tag = next_tag;
 521                 num_tags++;
 522                 next_tag++;
 523               }
 524             break;
 525           }
 526
 527         case ADDTAGS_RECURSE_NOT_TOP_UNION:
 528           /* Like ADDTAGS_RECURSE, except that top_union is set to zero,
 529            * indicating that if a union is being processed, it is not the
 530            * top-most of a series */
 531           top_union = 0;
 532           goto do_addtags_recurse;
 533
 534         case ADDTAGS_RECURSE:
 535           /* Setting top_union to 1 means that if a union is begin processed,
 536            * it is the top-most of a series, and should recurse through the
 537            * series to set the left_tag and right_tag values */
 538           top_union = 1;
 539
 540 do_addtags_recurse:
 541           node = tre_stack_pop_voidptr(stack);
 542
 543           id = node->submatch_id;
 544           if (id >= 0)
 545             {
 546               int i;
 547
 548
 549               /* Add start of this submatch to regset. */
 550               for (i = 0; regset[i] != REGSET_UNSET; i++);
 551               regset[i] = id * 2;
 552               regset[i + 1] = -1;
 553               regset_contains |= REGSET_HAS_STARTS;
 554
 555               /* Add end of this submatch to regset after processing this
 556                  node. */
 557               STACK_PUSH(stack, voidptr, node);
 558               STACK_PUSHX(stack, int, id);
 559               STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
 560             }
 561
 562           switch (node->type)
 563             {
 564             case LITERAL:
 565               {
 566                 tre_literal_t *lit = node->obj;
 567
 568                 if (!IS_SPECIAL(lit) || IS_BACKREF(lit) || IS_EMPTY(lit) || IS_ASSERTION(lit))
 569                   {
 570                     DPRINT(("Literal %d-%d\n",
 571                             (int)lit->code_min, (int)lit->code_max));
 572                     if (regset_contains)
 573                       {
 574                         /* Regset is not empty, so add a tag before the
 575                            literal or backref. */
 576                         if (first_pass)
 577                           {
 578                             DPRINT(("  ADDTAGS_RECURSE:LITERAL node->num_tags = 1\n"));
 579                             node->num_tags = 1;
 580                           }
 581                         else
 582                           {
 583                             status = tre_merge_branches(mem, node, NULL, tag,
 584                                                         tnfa->num_tags);
 585                             if (status != REG_OK)
 586                               break;
 587                             status = tre_add_tag_left(mem, node, tag);
 588                             if (status != REG_OK)
 589                               break;
 590                             if (regset_contains == REGSET_HAS_STARTS)
 591                               tnfa->tag_directions[tag] = TRE_TAG_LEFT_MAXIMIZE;
 592                             else
 593                               tnfa->tag_directions[tag] = direction;
 594                             DPRINT(("Setting t%d direction to %s\n", tag,
 595                                     tag_dir_str[tnfa->tag_directions[tag]]));
 596                             tre_purge_regset(regset, tnfa, tag);
 597
 598                             if (IS_BACKREF(lit))
 599                               {
 600                                 int b = lit->code_max;
 601                                 int t = tnfa->submatch_data[b].so_tag;
 602                                 /* Fail if the referenced submatch hasn't been
 603                                  * completed yet */
 604                                 if (tnfa->submatch_data[b].eo_tag < 0)
 605                                   {
 606                                     status = REG_ESUBREG;
 607                                     break;
 608                                   }
 609                                 if (t < tag)
 610                                   {
 611                                     DPRINT(("  Backref %d start: "
 612                                             "t%d reordered to before t%d\n",
 613                                             b, tag, t));
 614                                     if(t > 0)
 615                                       t--;
 616                                     /* Append to tag_order, move "tag" after
 617                                      * "t" */
 618                                     *rtp++ = tag;
 619                                     *rtp++ = t;
 620                                   }
 621 #if TRE_DEBUG
 622                                 else
 623                                   DPRINT(("  Backref %d start: "
 624                                           "(t%d already before t%d)\n",
 625                                             b, tag, t));
 626 #endif /* TRE_DEBUG */
 627                               }
 628                           }
 629
 630                         DPRINT(("  ADDTAGS_RECURSE:LITERAL num_tags++ tag=%d\n",
 631                                 tag));
 632                         regset[0] = REGSET_UNSET;
 633                         regset_contains = 0;
 634                         tag = next_tag;
 635                         num_tags++;
 636                         next_tag++;
 637                       }
 638                   }
 639                 else
 640                   {
 641                     assert(!IS_TAG(lit));
 642                   }
 643                 break;
 644               }
 645             case CATENATION:
 646               {
 647                 tre_catenation_t *cat = node->obj;
 648                 tre_ast_node_t *left = cat->left;
 649                 tre_ast_node_t *right = cat->right;
 650                 int reserved_tag = -1;
 651                 DPRINT(("Catenation, next_tag = %d\n", next_tag));
 652
 653
 654                 /* After processing right child. */
 655                 STACK_PUSHX(stack, voidptr, node);
 656                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
 657
 658                 /* Process right child. */
 659                 STACK_PUSHX(stack, voidptr, right);
 660                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
 661
 662                 /* After processing left child. */
 663                 STACK_PUSHX(stack, int, next_tag + left->num_tags);
 664                 DPRINT(("  Pushing %d for after left\n",
 665                         next_tag + left->num_tags));
 666                 if (left->num_tags > 0 && right->num_tags > 0)
 667                   {
 668                     /* Reserve the next tag to the right child. */
 669                     DPRINT(("  ADDTAGS_RECURSE:CATENATION num_tags++ "
 670                             "Reserving next_tag %d to right child\n",
 671                             next_tag));
 672                     reserved_tag = next_tag;
 673                     next_tag++;
 674                   }
 675                 STACK_PUSHX(stack, int, reserved_tag);
 676                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
 677
 678                 /* Process left child. */
 679                 STACK_PUSHX(stack, voidptr, left);
 680                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
 681
 682                 }
 683               break;
 684             case ITERATION:
 685               {
 686                 tre_iteration_t *iter = node->obj;
 687                 DPRINT(("Iteration\n"));
 688
 689                 if (first_pass)
 690                   STACK_PUSHX(stack, int, regset_contains != 0);
 691                 STACK_PUSHX(stack, int, tag);
 692                 STACK_PUSHX(stack, voidptr, node);
 693                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
 694
 695                 STACK_PUSHX(stack, voidptr, iter->arg);
 696                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
 697
 698                 /* Regset is not empty, so add a tag here (this always happens
 699                    because iterators always get submatch id, even if in the
 700                    invisible range) */
 701                 if (regset_contains)
 702                   {
 703                     if (!first_pass)
 704                       {
 705                         status = tre_merge_branches(mem, node, NULL, tag,
 706                                                     tnfa->num_tags);
 707                         if (status != REG_OK)
 708                           break;
 709                         status = tre_add_tag_left(mem, node, tag);
 710                         if (status != REG_OK)
 711                           break;
 712                         if (regset_contains == REGSET_HAS_STARTS && tag != 0)
 713                           tnfa->tag_directions[tag] = iter->minimal ?
 714                                                       TRE_TAG_MINIMIZE :
 715                                                       TRE_TAG_LEFT_MAXIMIZE;
 716                         else
 717                           tnfa->tag_directions[tag] = direction;
 718                         DPRINT(("Setting t%d direction to %s\n", tag,
 719                                 tag_dir_str[tnfa->tag_directions[tag]]));
 720                         tre_purge_regset(regset, tnfa, tag);
 721                       }
 722
 723                     DPRINT(("  ADDTAGS_RECURSE:ITERATION num_tags++ tag=%d\n",
 724                             tag));
 725                     regset[0] = REGSET_UNSET;
 726                     regset_contains = 0;
 727                     tag = next_tag;
 728                     num_tags++;
 729                     next_tag++;
 730                   }
 731                 direction = TRE_TAG_LEFT_MAXIMIZE;
 732                 DPRINT(("  Setting direction to %s\n", tag_dir_str[direction]));
 733               }
 734               break;
 735             case UNION:
 736               {
 737                 tre_union_t *uni;
 738                 tre_ast_node_t *left;
 739                 tre_ast_node_t *right;
 740                 int front_tag = -1;
 741
 742                 DPRINT(("Union\n"));
 743
 744                 if (regset_contains)
 745                   {
 746                     DPRINT(("  UNION num_tags++ tag=%d\n", tag));
 747                     front_tag = tag;
 748                     tag = next_tag;
 749                     num_tags++;
 750                     next_tag++;
 751                   }
 752
 753                 /* For the top union, walk the tree of consecutive unions,
 754                  * setting the left_tag and right_tag values in increasing
 755                  * order (left to right priority) */
 756                 if (top_union &&
 757                     (node->num_submatches -
 758                     (node->submatch_id >= 0 &&
 759                     node->submatch_id < SUBMATCH_ID_INVISIBLE_START)) > 0)
 760                   {
 761                     tre_ast_node_t *n;
 762                     int last = tre_stack_num_objects(stack);
 763
 764                     STACK_PUSH(stack, voidptr, node);
 765                     STACK_PUSH(stack, int, ADDTAGS_UNION_RECURSE);
 766
 767                     while (tre_stack_num_objects(stack) > last)
 768                       {
 769                         symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
 770                         switch (symbol)
 771                           {
 772                           case ADDTAGS_UNION_RECURSE:
 773                             n = tre_stack_pop_voidptr(stack);
 774                             uni = n->obj;
 775                             left = uni->left;
 776
 777                             /* Since the top union has num_submatches > 0,
 778                              * we set all the consecutive union's
 779                              * make_branches to 1 to force the generation
 780                              * of end tags for each union branch. */
 781                             n->make_branches = 1;
 782
 783                             STACK_PUSH(stack, voidptr, n);
 784                             STACK_PUSH(stack, int,
 785                                        ADDTAGS_UNION_RIGHT_RECURSE);
 786
 787                             if (left->type == UNION)
 788                               {
 789                                 STACK_PUSH(stack, voidptr, left);
 790                                 STACK_PUSH(stack, int,
 791                                            ADDTAGS_UNION_RECURSE);
 792                               }
 793                             else
 794                               {
 795                                 DPRINT(("  ADDTAGS_UNION_RECURSE "
 796                                         "num_tags++ tag=%d\n", tag));
 797                                 uni->left_tag = tag;
 798                                 tag = next_tag;
 799                                 num_tags++;
 800                                 next_tag++;
 801                               }
 802                             break;
 803
 804                           case ADDTAGS_UNION_RIGHT_RECURSE:
 805                             n = tre_stack_pop_voidptr(stack);
 806                             uni = n->obj;
 807                             right = uni->right;
 808
 809                             if (right->type == UNION)
 810                               {
 811                                 STACK_PUSH(stack, voidptr, right);
 812                                 STACK_PUSH(stack, int,
 813                                            ADDTAGS_UNION_RECURSE);
 814                               }
 815                             else
 816                               {
 817                                 DPRINT(("  ADDTAGS_UNION_RIGHT_RECURSE "
 818                                         "num_tags++ tag=%d\n", tag));
 819                                 uni->right_tag = tag;
 820                                 tag = next_tag;
 821                                 num_tags++;
 822                                 next_tag++;
 823                               }
 824
 825                             break;
 826
 827                           default:
 828                             assert(0);
 829                             break;
 830
 831                           } /* end switch(symbol) */
 832                       } /* end while(tre_stack_num_objects(stack) > last */
 833                     if (!first_pass)
 834                       {
 835                         STACK_PUSHX(stack, int, front_tag);
 836                         STACK_PUSHX(stack, voidptr, node);
 837                         STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_TOP);
 838                       }
 839                   } /* end if (top_union && ...) */
 840
 841                 uni = node->obj;
 842                 left = uni->left;
 843                 right = uni->right;
 844
 845                 /* After processing right child. */
 846                 STACK_PUSHX(stack, voidptr, regset);
 847                 STACK_PUSHX(stack, int, regset_contains != 0);
 848                 STACK_PUSHX(stack, voidptr, node);
 849                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
 850
 851                 /* Process right child. */
 852                 STACK_PUSHX(stack, voidptr, right);
 853                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
 854
 855                 /* After processing left child. */
 856                 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
 857
 858                 /* Process left child. */
 859                 STACK_PUSHX(stack, voidptr, left);
 860                 STACK_PUSHX(stack, int, ADDTAGS_RECURSE_NOT_TOP_UNION);
 861
 862                 /* Regset is not empty, so add a tag here. */
 863                 if (regset_contains)
 864                   {
 865                     if (!first_pass)
 866                       {
 867                         status = tre_merge_branches(mem, node, NULL, front_tag,
 868                                                     tnfa->num_tags);
 869                         if (status != REG_OK)
 870                           break;
 871                         status = tre_add_tag_left(mem, node, front_tag);
 872                         if (status != REG_OK)
 873                           break;
 874                         if (regset_contains == REGSET_HAS_STARTS)
 875                           tnfa->tag_directions[front_tag] = TRE_TAG_LEFT_MAXIMIZE;
 876                         else
 877                           tnfa->tag_directions[front_tag] = direction;
 878                         DPRINT(("Setting t%d direction to %s\n", front_tag,
 879                                 tag_dir_str[tnfa->tag_directions[front_tag]]));
 880                         tre_purge_regset(regset, tnfa, front_tag);
 881                       }
 882
 883                     regset[0] = REGSET_UNSET;
 884                     regset_contains = 0;
 885                   }
 886
 887                 break;
 888               }
 889             } /* end switch (node->type) */
 890
 891           break; /* end case: ADDTAGS_RECURSE */
 892
 893         case ADDTAGS_AFTER_ITERATION:
 894           {
 895             tre_iteration_t *iter;
 896             tre_ast_node_t *orig;
 897             int enter_tag;
 898
 899             node = tre_stack_pop_voidptr(stack);
 900             orig = node->original ? node->original : node;
 901             iter = (tre_iteration_t *)orig->obj;
 902             enter_tag = tre_stack_pop_int(stack);
 903             if (iter->minimal)
 904               minimal_tag = enter_tag;
 905
 906             DPRINT(("After iteration\n"));
 907             if (first_pass)
 908               {
 909                 node->num_tags = iter->arg->num_tags + tre_stack_pop_int(stack);
 910                 DPRINT(("  ADDTAGS_AFTER_ITERATION: node->num_tags = %d\n",
 911                         node->num_tags));
 912               }
 913             else
 914               {
 915                 /* node->last_matched_branch will have the start tag (the tag
 916                    just *before* the iteration).  iter->arg->last_matched_branch
 917                    will have the tag(s) inside the iteration, the ones that
 918                    may need to be reset if the iteration doesn't match.  So
 919                    before we merge iter->arg into node, we need to set up
 920                    a new tre_last_matched_t and tre_last_matched_branch_t,
 921                    using any of the inside tags as cmp_tag (we choose the first
 922                    tag found by bit_ffs).  If there are no inside tags, we
 923                    don't bother creating the extra structures. */
 924                 tre_last_matched_branch_pre_t *b =
 925                                                 iter->arg->last_matched_branch;
 926
 927                 if (b && b->n_tags > 0)
 928                   {
 929                     tre_last_matched_pre_t *u;
 930
 931                     bit_ffs(b->tags, num_tags, &b->cmp_tag);
 932                     DPRINT(("  ADDTAGS_AFTER_ITERATION: n_tags=%d "
 933                             "cmp_tag = %d\n", b->n_tags, b->cmp_tag));
 934
 935                     u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t) +
 936                                        sizeof(tre_last_matched_branch_pre_t)
 937                                        + bitstr_size(tnfa->num_tags));
 938                     if (!u)
 939                       {
 940                         status = REG_ESPACE;
 941                         break;
 942                       }
 943                     u->branches = b;
 944                     u->n_branches = 1;
 945                     u->start_tag = b->cmp_tag;
 946                     u->tot_branches = b->tot_branches;
 947                     u->tot_last_matched = 1 + b->tot_last_matched;
 948                     u->tot_tags = b->tot_tags;
 949
 950                     b = (tre_last_matched_branch_pre_t *)(u + 1);
 951                     b->last_matched = u;
 952                     b->n_last_matched = 1;
 953                     b->tot_branches = 1 + u->tot_branches;
 954                     b->tot_last_matched = u->tot_last_matched;
 955                     b->tot_tags = u->tot_tags;
 956
 957                     iter->arg->last_matched_branch = b;
 958                   }
 959                 status = tre_merge_branches(mem, node, iter->arg, 0,
 960                                             tnfa->num_tags);
 961                 if (status != REG_OK)
 962                   break;
 963
 964                 if (iter->minimal)
 965                   {
 966                     /* Add a union with a left EMPTY literal and the right
 967                        being iter->arg.  This should force the tags inside
 968                        the minimal iteration to prefer being unset */
 969                     if (iter->min == 0 && iter->max <= 1)
 970                       {
 971                         tre_ast_node_t *u, *e;
 972
 973                         e = tre_ast_new_literal(mem, EMPTY, -1, -1);
 974                         if (e == NULL)
 975                           {
 976                             status = REG_ESPACE;
 977                             break;
 978                           }
 979                         u = tre_ast_new_union(mem, e, iter->arg);
 980                         if (u == NULL)
 981                           {
 982                             status = REG_ESPACE;
 983                             break;
 984                           }
 985                         iter->arg = u;
 986                       }
 987
 988                     direction = TRE_TAG_MINIMIZE;
 989                   }
 990                 else
 991                   direction = TRE_TAG_MAXIMIZE;
 992                 DPRINT(("  Setting direction to %s\n", tag_dir_str[direction]));
 993               }
 994             break;
 995           }
 996
 997         case ADDTAGS_AFTER_CAT_LEFT:
 998           {
 999             int new_tag = tre_stack_pop_int(stack);
1000             next_tag = tre_stack_pop_int(stack);
1001             DPRINT(("After cat left, tag = %d, next_tag = %d\n",
1002                     tag, next_tag));
1003             if (new_tag >= 0)
1004               {
1005                 DPRINT(("  Setting tag to %d\n", new_tag));
1006                 tag = new_tag;
1007               }
1008             break;
1009           }
1010
1011         case ADDTAGS_AFTER_CAT_RIGHT:
1012           {
1013             tre_catenation_t *cat;
1014
1015             DPRINT(("After cat right\n"));
1016             node = tre_stack_pop_voidptr(stack);
1017             cat = node->obj;
1018             if (first_pass)
1019               {
1020                 node->num_tags = cat->left->num_tags + cat->right->num_tags;
1021                 DPRINT(("  ADDTAGS_AFTER_CAT_RIGHT: node->num_tags = %d\n",
1022                         node->num_tags));
1023               }
1024             else
1025               {
1026                 status = tre_merge_branches(mem, cat->left, cat->right, 0,
1027                                             tnfa->num_tags);
1028                 if (status != REG_OK)
1029                   break;
1030                 status = tre_merge_branches(mem, node, cat->left, 0,
1031                                             tnfa->num_tags);
1032               }
1033             break;
1034           }
1035
1036         case ADDTAGS_AFTER_UNION_LEFT:
1037           DPRINT(("After union left\n"));
1038           /* Lift the bottom of the `regset' array so that when processing
1039              the right operand the items currently in the array are
1040              invisible.  The original bottom was saved at ADDTAGS_UNION and
1041              will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1042           while (*regset != REGSET_UNSET)
1043             regset++;
1044           regset_contains = 0;
1045           break;
1046
1047         case ADDTAGS_AFTER_UNION_RIGHT:
1048           {
1049             int added_tags;
1050             tre_ast_node_t *orig;
1051             tre_union_t *uni;
1052             /* Note: node may not be a UNION, but a CATENATION with a left
1053              * tag.  So that is why we pass the original node->obj on the
1054              * stack, to get the union's true values. */
1055
1056             DPRINT(("After union right\n"));
1057             node = tre_stack_pop_voidptr(stack);
1058             orig = node->original ? node->original : node;
1059             uni = (tre_union_t *)orig->obj;
1060             added_tags = tre_stack_pop_int(stack);
1061             if (first_pass)
1062               {
1063                 node->num_tags = uni->left->num_tags + uni->right->num_tags
1064                                  + added_tags;
1065                 if (uni->left_tag > 0)
1066                   node->num_tags++;
1067                 if (uni->right_tag > 0)
1068                   node->num_tags++;
1069                 DPRINT(("  ADDTAGS_AFTER_UNION_RIGHT: node->num_tags = %d\n",
1070                         node->num_tags));
1071               }
1072             regset = tre_stack_pop_voidptr(stack);
1073
1074             /* Add tags after both children, the left child gets a smaller
1075                tag than the right child.  This guarantees that we prefer
1076                the left child over the right child. */
1077             /* XXX - This is not always necessary (if the children have
1078                tags which must be seen for every match of that child). */
1079             if (!first_pass && node->make_branches)
1080               {
1081                 tre_last_matched_branch_pre_t *lb =
1082                   uni->left->last_matched_branch;
1083                 tre_last_matched_branch_pre_t *rb =
1084                   uni->right->last_matched_branch;
1085                 tre_last_matched_pre_t *lu =
1086                   uni->left->last_matched_in_progress;
1087                 tre_last_matched_pre_t *ru =
1088                   uni->right->last_matched_in_progress;
1089                 tre_last_matched_pre_t *u;
1090                 /* We don't need to call tre_merge_branches because these
1091                  * tags don't participate in submatch ranges, so don't need
1092                  * to be recorded.  But we do set the cmp_tag entry of the
1093                  * tre_last_matched_branch_pre_t, so we might call
1094                  * tre_merge_branches if we need to create an empty
1095                  * tre_last_matched_branch_pre_t. */
1096                 if (uni->left_tag > 0)
1097                   {
1098                     DPRINT(("Setting t%d direction to maximize\n",
1099                             uni->left_tag));
1100                     status = tre_add_tag_right(mem, uni->left, uni->left_tag);
1101                     if (status != REG_OK)
1102                       break;
1103                     tnfa->tag_directions[uni->left_tag] = TRE_TAG_MAXIMIZE;
1104                     if (!lb)
1105                       {
1106                         status = tre_merge_branches(mem, uni->left, NULL, -1,
1107                                                     tnfa->num_tags);
1108                         if (status != REG_OK)
1109                           break;
1110                         lb = uni->left->last_matched_branch;
1111                       }
1112                     lb->cmp_tag = uni->left_tag;
1113                   }
1114                 if (uni->right_tag > 0)
1115                   {
1116                     DPRINT(("Setting t%d direction to maximize\n",
1117                             uni->right_tag));
1118                     status = tre_add_tag_right(mem, uni->right, uni->right_tag);
1119                     if (status != REG_OK)
1120                       break;
1121                     tnfa->tag_directions[uni->right_tag] = TRE_TAG_MAXIMIZE;
1122                     if (!rb)
1123                       {
1124                         status = tre_merge_branches(mem, uni->right, NULL, -1,
1125                                                     tnfa->num_tags);
1126                         if (status != REG_OK)
1127                           break;
1128                         rb = uni->right->last_matched_branch;
1129                       }
1130                     rb->cmp_tag = uni->right_tag;
1131                   }
1132                 /* Now merge the tre_last_matched_branch_pre_t into a
1133                    tre_last_matched_pre_t */
1134                 if (lu == NULL)
1135                   {
1136                     if (ru == NULL)
1137                       {
1138                         /* Create a new tre_last_matched_pre_t */
1139                         u = tre_mem_calloc(mem, sizeof(tre_last_matched_pre_t));
1140                         if (!u)
1141                           {
1142                             status = REG_ESPACE;
1143                             break;
1144                           }
1145                         u->tot_last_matched = 1;
1146
1147                         if (lb)
1148                           {
1149                             u->branches = lb;
1150                             u->n_branches = 1;
1151                             u->tot_branches += lb->tot_branches;
1152                             u->tot_last_matched += lb->tot_last_matched;
1153                             u->tot_tags += lb->tot_tags;
1154                             if (rb)
1155                               {
1156                                 lb->next = rb;
1157                                 u->n_branches++;
1158                                 u->tot_branches += rb->tot_branches;
1159                                 u->tot_last_matched += rb->tot_last_matched;
1160                                 u->tot_tags += rb->tot_tags;
1161                               }
1162                           }
1163                         else if (rb)
1164                           {
1165                             u->branches = rb;
1166                             u->n_branches = 1;
1167                             u->tot_branches += rb->tot_branches;
1168                             u->tot_last_matched += rb->tot_last_matched;
1169                             u->tot_tags += rb->tot_tags;
1170                           }
1171                       }
1172                     else
1173                       {
1174                         /* Use ru, and add lb */
1175                         u = ru;
1176                         if (lb)
1177                           {
1178                             lb->next = u->branches;
1179                             u->branches = lb;
1180                             u->n_branches++;
1181                             u->tot_branches += lb->tot_branches;
1182                             u->tot_last_matched += lb->tot_last_matched;
1183                             u->tot_tags += lb->tot_tags;
1184                           }
1185                       }
1186                   }
1187                 else if (ru == NULL)
1188                   {
1189                     /* Use lu, and add rb */
1190                     u = lu;
1191                     if (rb)
1192                       {
1193                         rb->next = u->branches;
1194                         u->branches = rb;
1195                         u->n_branches++;
1196                         u->tot_branches += rb->tot_branches;
1197                         u->tot_last_matched += rb->tot_last_matched;
1198                         u->tot_tags += rb->tot_tags;
1199                       }
1200                   }
1201                 else
1202                   {
1203                     /* Merge lu and ru into lu */
1204                     if (lu->branches)
1205                       {
1206                         if (ru->branches)
1207                           {
1208                             tre_last_matched_branch_pre_t *b = lu->branches;
1209                             while (b->next) b = b->next;
1210                             b->next = ru->branches;
1211                             lu->n_branches += ru->n_branches;
1212                           }
1213                       }
1214                     else if (ru->branches)
1215                       {
1216                         lu->branches = ru->branches;
1217                         lu->n_branches = ru->n_branches;
1218                       }
1219                     lu->tot_branches += ru->tot_branches;
1220                     lu->tot_last_matched += ru->tot_last_matched - 1;
1221                     lu->tot_tags += ru->tot_tags;
1222                     u = lu;
1223                   }
1224                 node->last_matched_in_progress = u;
1225               }
1226             direction = TRE_TAG_MAXIMIZE;
1227             break;
1228           }
1229
1230         case ADDTAGS_AFTER_UNION_TOP: /* only called when not first_pass */
1231           {
1232             tre_last_matched_branch_pre_t *b;
1233             tre_last_matched_pre_t *u;
1234             int start_tag;
1235
1236             DPRINT(("After union top\n"));
1237             node = tre_stack_pop_voidptr(stack);
1238             start_tag = tre_stack_pop_int(stack);
1239             b = tre_mem_calloc(mem, sizeof(tre_last_matched_branch_pre_t)
1240                                + bitstr_size(tnfa->num_tags));
1241             if (!b)
1242               {
1243                 status = REG_ESPACE;
1244                 break;
1245               }
1246
1247             u = node->last_matched_in_progress;
1248             u->start_tag = start_tag;
1249             b->tot_branches = 1 + u->tot_branches;
1250             b->tot_last_matched = u->tot_last_matched;
1251             b->tot_tags = u->tot_tags;
1252             b->last_matched = u;
1253             b->n_last_matched = 1;
1254             node->last_matched_branch = b;
1255             node->last_matched_in_progress = NULL;
1256             break;
1257           }
1258
1259         default:
1260           assert(0);
1261           break;
1262
1263         } /* end switch(symbol) */
1264     } /* end while(tre_stack_num_objects(stack) > bottom) */
1265
1266   if (status != REG_OK)
1267     {
1268       DPRINT(("Error during %s pass\n", first_pass ? "first" : "second"));
1269       goto error_post_compile;
1270     }
1271
1272   if (!first_pass)
1273     {
1274       int i;
1275       if (num_tags != tnfa->num_tags)
1276         {
1277           DPRINT(("num_tags(%d) != tnfa->num_tags(%d)\n", num_tags,
1278                   tnfa->num_tags));
1279           status = REG_BADPAT;
1280           goto error_post_compile;
1281         }
1282
1283       tre_purge_regset(regset, tnfa, tag);
1284       DPRINT(("Setting t%d to %s\n", num_tags,
1285               tag_dir_str[direction]));
1286       tnfa->tag_directions[num_tags] = direction;
1287
1288       if (rtp > reorder_tags + 2 * tnfa->num_reorder_tags)
1289         {
1290           DPRINT(("Processed %d reorder tags instead of %d\n",
1291                   (int)(rtp - reorder_tags) / 2, tnfa->num_reorder_tags));
1292           status = REG_BADPAT;
1293           goto error_post_compile;
1294         }
1295     *rtp = -1;
1296 #if TRE_DEBUG
1297       if (reorder_tags[0] >= 0)
1298         {
1299           DPRINT(("reorder_tags:\n"));
1300           for (rtp = reorder_tags; *rtp >= 0;)
1301             {
1302               DPRINT(("%d after ", *rtp++));
1303               DPRINT(("%d\n", *rtp++));
1304             }
1305         }
1306         else
1307           DPRINT(("No reorder_tags\n"));
1308 #endif /* TRE_DEBUG */
1309
1310       /* Initialize to_reorder */
1311       for (i = 0; i < num_tags; i++)
1312         to_reorder[i] = i;
1313       /* Use to_seq_order to convert reorder_tags values, and use those to
1314        * reorder to_reorder */
1315       for (rtp = reorder_tags; *rtp >= 0;)
1316         {
1317           int j, high, low;
1318           int ti = *rtp++;
1319
1320           /* Skip reordering the final tag */
1321           if (ti >= num_tags)
1322             {
1323               DPRINT(("Skipping reorder of %d\n", ti));
1324               rtp++;
1325               continue;
1326             }
1327           /* The number of the tag to reorder */
1328           high = to_reorder[ti];
1329           /* Reorder after this tag */
1330           low = to_reorder[*rtp++];
1331
1332           DPRINT(("ti=%d high=%d low=%d\n", ti, high, low));
1333           if (low > high)
1334             {
1335               DPRINT(("Tag %d already before %d\n", high, low));
1336               continue;
1337             }
1338           for (j = 0; j < num_tags; j++)
1339             if (to_reorder[j] > low && to_reorder[j] < high)
1340               to_reorder[j]++;
1341           to_reorder[ti] = low + 1;
1342 #ifdef TRE_DEBUG
1343           DPRINT(("to_reorder=("));
1344           for (j = 0; j < num_tags; j++)
1345             {
1346               DPRINT(("%d", to_reorder[j]));
1347               if (j < num_tags - 1)
1348                 DPRINT((","));
1349             }
1350           DPRINT((")\n"));
1351 #endif /* TRE_DEBUG */
1352         }
1353       /* Determine if reordering in really necessary */
1354       {
1355         int need_reorder = 0;
1356         for (i = 0; i < num_tags; i++)
1357           if(to_reorder[i] != i)
1358             {
1359               need_reorder = 1;
1360               break;
1361             }
1362         /* If need_reorder is not set, free reorder_tags, and set to NULL,
1363          * indicating no reordering is needed */
1364         if (!need_reorder)
1365           {
1366             DPRINT(("Don't need to reorder\n"));
1367             xfree(reorder_tags);
1368             reorder_tags = NULL;
1369           }
1370       }
1371     }
1372
1373   if (reorder_tags)
1374     {
1375       int i;
1376       tre_tag_direction_t *new_tag_directions;
1377 #if TRE_DEBUG
1378       DPRINT(("to_reorder:"));
1379       for (i = 0; i < num_tags; i++)
1380         DPRINT((" %d->%d", i, to_reorder[i]));
1381       DPRINT(("\n"));
1382 #endif /* TRE_DEBUG */
1383
1384       DPRINT(("Reordering submatch_data\n"));
1385       for (i = 0; i < (int)tnfa->num_submatches; i++)
1386         {
1387 #if TRE_DEBUG
1388           int so = tnfa->submatch_data[i].so_tag;
1389           int eo = tnfa->submatch_data[i].eo_tag;
1390 #endif /* TRE_DEBUG */
1391           tnfa->submatch_data[i].so_tag =
1392             to_reorder[tnfa->submatch_data[i].so_tag];
1393           tnfa->submatch_data[i].eo_tag =
1394             tnfa->submatch_data[i].eo_tag < num_tags ?
1395             to_reorder[tnfa->submatch_data[i].eo_tag] :
1396             tnfa->submatch_data[i].eo_tag;
1397           DPRINT(("pmatch[%d]: {%d, %d}->{%d, %d}\n", i, so, eo,
1398                   tnfa->submatch_data[i].so_tag,
1399                   tnfa->submatch_data[i].eo_tag));
1400         }
1401
1402       DPRINT(("Reordering tag_directions\n"));
1403       /* We only allocate num_tags directions and reorder them.  The
1404        * num_tags-th direction (end tag) is left unchanged. */
1405       new_tag_directions = xmalloc(sizeof(*new_tag_directions) * num_tags);
1406       if (new_tag_directions == NULL)
1407         {
1408           status = REG_ESPACE;
1409           goto error_post_compile;
1410         }
1411       for (i = 0; i < num_tags; i++)
1412         {
1413           new_tag_directions[to_reorder[i]] = tnfa->tag_directions[i];
1414         }
1415 #if TRE_DEBUG
1416       for (i = 0; i < num_tags; i++)
1417         {
1418           DPRINT(("t%d %s->%s\n", i,
1419                   tag_dir_str[tnfa->tag_directions[i]],
1420                   tag_dir_str[new_tag_directions[i]]));
1421         }
1422         DPRINT(("t%d %s->%s\n", num_tags,
1423                 tag_dir_str[tnfa->tag_directions[num_tags]],
1424                 tag_dir_str[tnfa->tag_directions[num_tags]]));
1425 #endif /* TRE_DEBUG */
1426       memcpy(tnfa->tag_directions, new_tag_directions, sizeof(*new_tag_directions) * num_tags);
1427       xfree(new_tag_directions);
1428
1429       DPRINT(("Reordering minimal_tags\n"));
1430       for (i = 0; tnfa->minimal_tags[i] >= 0; i++)
1431         tnfa->minimal_tags[i] = tnfa->minimal_tags[i] < num_tags ?
1432                                 to_reorder[tnfa->minimal_tags[i]] :
1433                                 tnfa->minimal_tags[i];
1434
1435       DPRINT(("Reordering AST tags\n"));
1436       STACK_PUSH(stack, voidptr, tree);
1437       while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1438         {
1439           node = tre_stack_pop_voidptr(stack);
1440
1441           switch (node->type)
1442             {
1443             case LITERAL:
1444               {
1445                 tre_literal_t *lit = (tre_literal_t *)node->obj;
1446                 if (IS_TAG(lit))
1447                   lit->code_max = to_reorder[lit->code_max];
1448                 break;
1449               }
1450
1451             case UNION:
1452               {
1453                 tre_union_t *uni = (tre_union_t *)node->obj;
1454                 STACK_PUSHX(stack, voidptr, uni->right);
1455                 STACK_PUSHX(stack, voidptr, uni->left);
1456                 break;
1457               }
1458
1459             case CATENATION:
1460               {
1461                 tre_catenation_t *cat = (tre_catenation_t *)node->obj;
1462                 STACK_PUSHX(stack, voidptr, cat->right);
1463                 STACK_PUSHX(stack, voidptr, cat->left);
1464                 break;
1465               }
1466
1467             case ITERATION:
1468               {
1469                 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
1470                 STACK_PUSHX(stack, voidptr, iter->arg);
1471                 break;
1472               }
1473
1474             default:
1475               assert(0);
1476               break;
1477             }
1478         }
1479         if (status != REG_OK)
1480           {
1481             DPRINT(("Error while reordering tags\n"));
1482             goto error_post_compile;
1483           }
1484     }
1485
1486
1487   if (!first_pass)
1488     {
1489       if (tree->last_matched_branch)
1490         {
1491           tre_last_matched_branch_t *buf, *b, *bb;
1492           tre_last_matched_branch_pre_t *bp;
1493           tre_last_matched_t *u, *uu;
1494           tre_last_matched_pre_t *up;
1495           int *t;
1496           int i;
1497 #ifdef TRE_DEBUG
1498           tre_last_matched_branch_t *_b;
1499           tre_last_matched_t *_u;
1500           int *_t;
1501
1502           DPRINT(("last_match_branch_pre:\n"));
1503           print_last_match_branch_pre(tree->last_matched_branch, 0, num_tags);
1504 #endif /* TRE_DEBUG */
1505           buf = (tre_last_matched_branch_t *)xcalloc(1,
1506                                      tree->last_matched_branch->tot_branches
1507                                      * sizeof(tre_last_matched_branch_t) +
1508                                      tree->last_matched_branch->tot_last_matched
1509                                      * sizeof(tre_last_matched_t) +
1510                                      tree->last_matched_branch->tot_tags *
1511                                      sizeof(int));
1512           if (!buf)
1513             {
1514               status = REG_ESPACE;
1515               goto error_post_compile;
1516             }
1517
1518           b = buf;
1519           u = (tre_last_matched_t *)(b +
1520               tree->last_matched_branch->tot_branches);
1521           t = (int *)(u + tree->last_matched_branch->tot_last_matched);
1522 #ifdef TRE_DEBUG
1523           _b = b;
1524           _u = u;
1525           _t = t;
1526 #endif /* TRE_DEBUG */
1527           DPRINT(("Copying info_pre to info\n"));
1528           STACK_PUSH(stack, voidptr, tree->last_matched_branch);
1529           STACK_PUSH(stack, int, 1);
1530           STACK_PUSH(stack, int, COPY_LAST_MATCHED_BRANCH);
1531
1532           while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1533             {
1534               switch (tre_stack_pop_int(stack))
1535                 {
1536                 case COPY_LAST_MATCHED_BRANCH:
1537                   i = tre_stack_pop_int(stack);
1538                   /* The tre_last_matched_branch_pre_t * is still on the
1539                      stack */
1540                   STACK_PUSHX(stack, voidptr, b);
1541                   STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1542                   b += i;
1543                   break;
1544
1545                 case COPY_LAST_MATCHED_BRANCH_NEXT:
1546                   bb = tre_stack_pop_voidptr(stack);
1547                   bp = tre_stack_pop_voidptr(stack);
1548                   bb->n_last_matched = bp->n_last_matched;
1549                   bb->cmp_tag = bp->cmp_tag;
1550                   if (bp->n_tags > 0)
1551                     {
1552                       int n;
1553                       n = bb->n_tags = bp->n_tags;
1554                       bb->tags = t;
1555                       for (i = 0; i < num_tags; i++)
1556                         if (bit_test(bp->tags, i))
1557                           {
1558                             *t++ = i;
1559                             if (--n <= 0)
1560                               break;
1561                           }
1562                     }
1563                   if (bp->next)
1564                     {
1565                       STACK_PUSHX(stack, voidptr, bp->next);
1566                       STACK_PUSHX(stack, voidptr, bb + 1);
1567                       STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH_NEXT);
1568                     }
1569                   if (bp->n_last_matched > 0)
1570                     {
1571                       bb->last_matched = u;
1572                       STACK_PUSHX(stack, voidptr, bp->last_matched);
1573                       STACK_PUSHX(stack, int, bp->n_last_matched);
1574                       STACK_PUSHX(stack, int, COPY_LAST_MATCHED);
1575                     }
1576                   break;
1577
1578                 case COPY_LAST_MATCHED:
1579                   i = tre_stack_pop_int(stack);
1580                   /* The tre_last_matched_pre_t * is still on the stack */
1581                   STACK_PUSHX(stack, voidptr, u);
1582                   STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1583                   u += i;
1584                   break;
1585
1586                 case COPY_LAST_MATCHED_NEXT:
1587                   uu = tre_stack_pop_voidptr(stack);
1588                   up = tre_stack_pop_voidptr(stack);
1589                   uu->n_branches = up->n_branches;
1590                   uu->branches = b;
1591                   uu->start_tag = up->start_tag;
1592                   if (up->next)
1593                     {
1594                       STACK_PUSHX(stack, voidptr, up->next);
1595                       STACK_PUSHX(stack, voidptr, uu + 1);
1596                       STACK_PUSHX(stack, int, COPY_LAST_MATCHED_NEXT);
1597                     }
1598                   STACK_PUSHX(stack, voidptr, up->branches);
1599                   STACK_PUSHX(stack, int, up->n_branches);
1600                   STACK_PUSHX(stack, int, COPY_LAST_MATCHED_BRANCH);
1601                   break;
1602                 }
1603             }
1604           if (status != REG_OK)
1605             goto error_post_compile;
1606 #ifdef TRE_DEBUG
1607           DPRINT(("last_matched_branch:\n"));
1608           print_last_match_branch(buf, 0);
1609           if (b != _b + tree->last_matched_branch->tot_branches)
1610             DPRINT(("b/%p != _b + tree->last_matched_branch->tot_branches/%p\n",
1611                     b, _b + tree->last_matched_branch->tot_branches));
1612           if (u != _u + tree->last_matched_branch->tot_last_matched)
1613             DPRINT(("u/%p != _u + "
1614                     "tree->last_matched_branch->tot_last_matched/%p\n",
1615                     u, _u + tree->last_matched_branch->tot_last_matched));
1616           if (t != _t + tree->last_matched_branch->tot_tags)
1617             DPRINT(("t/%p != _t + tree->last_matched_branch->tot_tags/%p\n",
1618                     t, _t + tree->last_matched_branch->tot_tags));
1619 #endif /* TRE_DEBUG */
1620           tnfa->last_matched_branch = buf;
1621         }
1622 #ifdef TRE_DEBUG
1623       else
1624         DPRINT(("No last_match_branch_pre\n"));
1625 #endif /* TRE_DEBUG */
1626     }
1627
1628   DPRINT(("tre_add_tags: %s complete.  Number of tags %d.\n",
1629           first_pass? "First pass" : "Second pass", num_tags));
1630 #ifdef TRE_DEBUG
1631   tre_ast_print(tree);
1632 #endif /* TRE_DEBUG */
1633   DPRINT(("tre_add_tags: tree->num_tags=%d num_tags=%d\n", tree->num_tags,
1634           num_tags));
1635   assert(tree->num_tags == num_tags);
1636   tnfa->end_tag = num_tags;
1637   tnfa->num_tags = num_tags;
1638   tnfa->num_minimals = num_minimals;
1639 error_post_compile:
1640   xfree(reorder_tags);
1641 error_reorder_tags:
1642   xfree(orig_regset);
1643 error_regset:
1644   return status;
1645 }
1646
1647
1648
1649 /*
1650   AST to TNFA compilation routines.
1651 */
1652
1653 typedef enum {
1654   COPY_RECURSE,
1655   COPY_SET_RESULT_PTR
1656 } tre_copyast_symbol_t;
1657
1658 /* Flags for tre_copy_ast(). */
1659 #define COPY_REMOVE_TAGS         1
1660 #define COPY_MAXIMIZE_FIRST_TAG  2
1661
1662 static reg_errcode_t
1663 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1664              int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1665              tre_ast_node_t **copy, int *max_pos)
1666 {
1667   reg_errcode_t status = REG_OK;
1668   int bottom = tre_stack_num_objects(stack);
1669   int num_copied = 0;
1670   int first_tag = 1;
1671   tre_ast_node_t **result = copy;
1672   tre_copyast_symbol_t symbol;
1673
1674   STACK_PUSH(stack, voidptr, ast);
1675   STACK_PUSH(stack, int, COPY_RECURSE);
1676
1677   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1678     {
1679       tre_ast_node_t *node;
1680       if (status != REG_OK)
1681         break;
1682
1683       symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1684       switch (symbol)
1685         {
1686         case COPY_SET_RESULT_PTR:
1687           result = tre_stack_pop_voidptr(stack);
1688           break;
1689         case COPY_RECURSE:
1690           node = tre_stack_pop_voidptr(stack);
1691           switch (node->type)
1692             {
1693             case LITERAL:
1694               {
1695                 tre_literal_t *lit = node->obj;
1696                 int pos = lit->position;
1697                 int min = lit->code_min;
1698                 int max = lit->code_max;
1699                 tre_bracket_match_list_t *list = !IS_SPECIAL(lit) ?
1700                                                   lit->u.bracket_match_list :
1701                                                   NULL;
1702                 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1703                   {
1704                     /* XXX - e.g. [ab] has only one position but two
1705                        nodes, so we are creating holes in the state space
1706                        here.  Not fatal, just wastes memory. */
1707                     pos += *pos_add;
1708                     num_copied++;
1709                   }
1710                 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
1711                   {
1712                     /* Change this tag to empty. */
1713                     min = EMPTY;
1714                     max = pos = -1;
1715                   }
1716                 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
1717                          && first_tag)
1718                   {
1719                     /* Maximize the first tag. */
1720                     if (tag_directions[max] == TRE_TAG_LEFT_MAXIMIZE)
1721                       tag_directions[max] = TRE_TAG_MAXIMIZE;
1722                     first_tag = 0;
1723                   }
1724                 *result = tre_ast_new_literal(mem, min, max, pos);
1725                 if (*result == NULL)
1726                   status = REG_ESPACE;
1727
1728                 if (pos > *max_pos)
1729                   *max_pos = pos;
1730
1731                 if (!IS_SPECIAL(lit))
1732                   ((tre_literal_t *)(*result)->obj)->u.bracket_match_list
1733                       = list;
1734                 break;
1735               }
1736             case UNION:
1737               {
1738                 tre_union_t *uni = node->obj;
1739                 tre_union_t *tmp;
1740                 *result = tre_ast_new_union(mem, uni->left, uni->right);
1741                 if (*result == NULL)
1742                   {
1743                     status = REG_ESPACE;
1744                     break;
1745                   }
1746                 tmp = (*result)->obj;
1747                 result = &tmp->left;
1748                 STACK_PUSHX(stack, voidptr, uni->right);
1749                 STACK_PUSHX(stack, int, COPY_RECURSE);
1750                 STACK_PUSHX(stack, voidptr, &tmp->right);
1751                 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1752                 STACK_PUSHX(stack, voidptr, uni->left);
1753                 STACK_PUSHX(stack, int, COPY_RECURSE);
1754                 break;
1755               }
1756             case CATENATION:
1757               {
1758                 tre_catenation_t *cat = node->obj;
1759                 tre_catenation_t *tmp;
1760                 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
1761                 if (*result == NULL)
1762                   {
1763                     status = REG_ESPACE;
1764                     break;
1765                   }
1766                 tmp = (*result)->obj;
1767                 tmp->left = NULL;
1768                 tmp->right = NULL;
1769                 result = &tmp->left;
1770
1771                 STACK_PUSHX(stack, voidptr, cat->right);
1772                 STACK_PUSHX(stack, int, COPY_RECURSE);
1773                 STACK_PUSHX(stack, voidptr, &tmp->right);
1774                 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1775                 STACK_PUSHX(stack, voidptr, cat->left);
1776                 STACK_PUSHX(stack, int, COPY_RECURSE);
1777                 break;
1778               }
1779             case ITERATION:
1780               {
1781                 tre_iteration_t *iter = node->obj;
1782                 STACK_PUSHX(stack, voidptr, iter->arg);
1783                 STACK_PUSHX(stack, int, COPY_RECURSE);
1784                 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
1785                                            iter->max, iter->minimal);
1786                 if (*result == NULL)
1787                   {
1788                     status = REG_ESPACE;
1789                     break;
1790                   }
1791                 iter = (*result)->obj;
1792                 result = &iter->arg;
1793                 break;
1794               }
1795             default:
1796               assert(0);
1797               break;
1798             }
1799           break;
1800         }
1801     }
1802   *pos_add += num_copied;
1803   return status;
1804 }
1805
1806 typedef enum {
1807   EXPAND_RECURSE,
1808   EXPAND_AFTER_ITER
1809 } tre_expand_ast_symbol_t;
1810
1811 /* Expands each iteration node that has a finite nonzero minimum or maximum
1812    iteration count to a catenated sequence of copies of the node. */
1813 static reg_errcode_t
1814 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1815                int *position, tre_tag_direction_t *tag_directions,
1816                int __unused *max_depth)
1817 {
1818   reg_errcode_t status = REG_OK;
1819   int bottom = tre_stack_num_objects(stack);
1820   int pos_add = 0;
1821   int pos_add_total = 0;
1822   int max_pos = 0;
1823 #ifdef TRE_APPROX
1824   /* Current approximate matching parameters. */
1825   int params[TRE_PARAM_LAST];
1826   /* Approximate parameter nesting level. */
1827   int params_depth = 0;
1828 #endif /* TRE_APPROX */
1829   int iter_depth = 0;
1830 #ifdef TRE_APPROX
1831   int i;
1832 #endif /* TRE_APPROX */
1833
1834 #ifdef TRE_APPROX
1835   for (i = 0; i < TRE_PARAM_LAST; i++)
1836     params[i] = TRE_PARAM_DEFAULT;
1837 #endif /* TRE_APPROX */
1838
1839   STACK_PUSHR(stack, voidptr, ast);
1840   STACK_PUSHR(stack, int, EXPAND_RECURSE);
1841   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1842     {
1843       tre_ast_node_t *node;
1844       tre_expand_ast_symbol_t symbol;
1845
1846       if (status != REG_OK)
1847         break;
1848
1849       DPRINT(("pos_add %d\n", pos_add));
1850
1851       symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1852       node = tre_stack_pop_voidptr(stack);
1853       switch (symbol)
1854         {
1855         case EXPAND_RECURSE:
1856           switch (node->type)
1857             {
1858             case LITERAL:
1859               {
1860                 tre_literal_t *lit= node->obj;
1861                 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1862                   {
1863                     lit->position += pos_add;
1864                     if (lit->position > max_pos)
1865                       max_pos = lit->position;
1866                   }
1867                 break;
1868               }
1869             case UNION:
1870               {
1871                 tre_union_t *uni = node->obj;
1872                 STACK_PUSHX(stack, voidptr, uni->right);
1873                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1874                 STACK_PUSHX(stack, voidptr, uni->left);
1875                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1876                 break;
1877               }
1878             case CATENATION:
1879               {
1880                 tre_catenation_t *cat = node->obj;
1881                 STACK_PUSHX(stack, voidptr, cat->right);
1882                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1883                 STACK_PUSHX(stack, voidptr, cat->left);
1884                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1885                 break;
1886               }
1887             case ITERATION:
1888               {
1889                 tre_iteration_t *iter = node->obj;
1890                 STACK_PUSHX(stack, int, pos_add);
1891                 STACK_PUSHX(stack, voidptr, node);
1892                 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1893                 STACK_PUSHX(stack, voidptr, iter->arg);
1894                 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1895                 /* If we are going to expand this node at EXPAND_AFTER_ITER
1896                    then don't increase the `pos' fields of the nodes now, it
1897                    will get done when expanding. */
1898                 if (iter->min > 1 || iter->max > 1)
1899                   pos_add = 0;
1900                 iter_depth++;
1901                 DPRINT(("iter\n"));
1902                 break;
1903               }
1904             default:
1905               assert(0);
1906               break;
1907             }
1908           break;
1909         case EXPAND_AFTER_ITER:
1910           {
1911             tre_iteration_t *iter = node->obj;
1912             int pos_add_last;
1913             pos_add = tre_stack_pop_int(stack);
1914             pos_add_last = pos_add;
1915             /* Originally (in tre_parse_bound), if min == 0 && max == 0, we
1916                immediate replace the whole iteration with EMPTY.  This
1917                unfortunately drops any submatches, and messes up setting the
1918                pmatch values (we can get tags of -1, and tag values in the
1919                billions).  So we left it there and replace with EMPTY here. */
1920             if (iter->min == 0 && iter->max == 0)
1921               {
1922                 tre_ast_node_t *empty = tre_ast_new_literal(mem, EMPTY, -1, -1);
1923                 if (empty == NULL)
1924                   return REG_ESPACE;
1925                 node->obj = empty->obj;
1926                 node->type = empty->type;
1927               }
1928             else if (iter->min > 1 || iter->max > 1)
1929               {
1930                 tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1931                 int j;
1932                 int pos_add_save = pos_add;
1933
1934                 /* Create a catenated sequence of copies of the node. */
1935                 for (j = 0; j < iter->min; j++)
1936                   {
1937                     tre_ast_node_t *copy;
1938                     /* Remove tags from all but the last copy. */
1939                     int flags = ((j + 1 < iter->min)
1940                                  ? COPY_REMOVE_TAGS
1941                                  : COPY_MAXIMIZE_FIRST_TAG);
1942                     DPRINT(("  pos_add %d\n", pos_add));
1943                     pos_add_save = pos_add;
1944                     status = tre_copy_ast(mem, stack, iter->arg, flags,
1945                                           &pos_add, tag_directions, &copy,
1946                                           &max_pos);
1947                     if (status != REG_OK)
1948                       return status;
1949                     if (seq1 != NULL)
1950                       seq1 = tre_ast_new_catenation(mem, seq1, copy);
1951                     else
1952                       seq1 = copy;
1953                     if (seq1 == NULL)
1954                       return REG_ESPACE;
1955                   }
1956
1957                 if (iter->max == -1)
1958                   {
1959                     /* No upper limit. */
1960                     pos_add_save = pos_add;
1961                     status = tre_copy_ast(mem, stack, iter->arg, 0,
1962                                           &pos_add, NULL, &seq2, &max_pos);
1963                     if (status != REG_OK)
1964                       return status;
1965                     seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1966                     if (seq2 == NULL)
1967                       return REG_ESPACE;
1968                   }
1969                 else
1970                   {
1971                     for (j = iter->min; j < iter->max; j++)
1972                       {
1973                         tre_ast_node_t *tmp, *copy;
1974                         pos_add_save = pos_add;
1975                         status = tre_copy_ast(mem, stack, iter->arg, 0,
1976                                               &pos_add, NULL, &copy, &max_pos);
1977                         if (status != REG_OK)
1978                           return status;
1979                         if (seq2 != NULL)
1980                           seq2 = tre_ast_new_catenation(mem, copy, seq2);
1981                         else
1982                           seq2 = copy;
1983                         if (seq2 == NULL)
1984                           return REG_ESPACE;
1985                         tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1986                         if (tmp == NULL)
1987                           return REG_ESPACE;
1988                         seq2 = tre_ast_new_union(mem, tmp, seq2);
1989                         if (seq2 == NULL)
1990                           return REG_ESPACE;
1991                       }
1992                   }
1993
1994                 pos_add = pos_add_save;
1995                 if (seq1 == NULL)
1996                   seq1 = seq2;
1997                 else if (seq2 != NULL)
1998                   seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1999                 if (seq1 == NULL)
2000                   return REG_ESPACE;
2001                 node->obj = seq1->obj;
2002                 node->type = seq1->type;
2003               }
2004
2005             iter_depth--;
2006             pos_add_total += pos_add - pos_add_last;
2007             if (iter_depth == 0)
2008               pos_add = pos_add_total;
2009
2010 #ifdef TRE_APPROX
2011             /* If approximate parameters are specified, surround the result
2012                with two parameter setting nodes.  The one on the left sets
2013                the specified parameters, and the one on the right restores
2014                the old parameters. */
2015             if (iter->params)
2016               {
2017                 tre_ast_node_t *tmp_l, *tmp_r, *tmp_node, *node_copy;
2018                 int *old_params;
2019
2020                 tmp_l = tre_ast_new_literal(mem, PARAMETER, 0, -1);
2021                 if (!tmp_l)
2022                   return REG_ESPACE;
2023                 ((tre_literal_t *)tmp_l->obj)->u.params = iter->params;
2024                 iter->params[TRE_PARAM_DEPTH] = params_depth + 1;
2025                 tmp_r = tre_ast_new_literal(mem, PARAMETER, 0, -1);
2026                 if (!tmp_r)
2027                   return REG_ESPACE;
2028                 old_params = tre_mem_alloc(mem, sizeof(*old_params)
2029                                            * TRE_PARAM_LAST);
2030                 if (!old_params)
2031                   return REG_ESPACE;
2032                 for (i = 0; i < TRE_PARAM_LAST; i++)
2033                   old_params[i] = params[i];
2034                 ((tre_literal_t *)tmp_r->obj)->u.params = old_params;
2035                 old_params[TRE_PARAM_DEPTH] = params_depth;
2036                 /* XXX - this is the only place where ast_new_node is
2037                    needed -- should be moved inside AST module. */
2038                 node_copy = tre_ast_new_node(mem, ITERATION,
2039                                              sizeof(tre_iteration_t));
2040                 if (!node_copy)
2041                   return REG_ESPACE;
2042                 node_copy->obj = node->obj;
2043                 tmp_node = tre_ast_new_catenation(mem, tmp_l, node_copy);
2044                 if (!tmp_node)
2045                   return REG_ESPACE;
2046                 tmp_node = tre_ast_new_catenation(mem, tmp_node, tmp_r);
2047                 if (!tmp_node)
2048                   return REG_ESPACE;
2049                 /* Replace the contents of `node' with `tmp_node'. */
2050                 memcpy(node, tmp_node, sizeof(*node));
2051                 node->obj = tmp_node->obj;
2052                 node->type = tmp_node->type;
2053                 params_depth++;
2054                 if (params_depth > *max_depth)
2055                   *max_depth = params_depth;
2056               }
2057 #endif /* TRE_APPROX */
2058             break;
2059           }
2060         default:
2061           assert(0);
2062           break;
2063         }
2064     }
2065
2066   *position += pos_add_total;
2067
2068   /* `max_pos' should never be larger than `*position' if the above
2069      code works, but just an extra safeguard let's make sure
2070      `*position' is set large enough so enough memory will be
2071      allocated for the transition table. */
2072   if (max_pos > *position)
2073     *position = max_pos;
2074
2075 #ifdef TRE_DEBUG
2076   DPRINT(("Expanded AST:\n"));
2077   tre_ast_print(ast);
2078   DPRINT(("*position %d, max_pos %d\n", *position, max_pos));
2079 #endif
2080
2081   return status;
2082 }
2083
2084 static tre_pos_and_tags_t *
2085 tre_set_empty(tre_mem_t mem)
2086 {
2087   tre_pos_and_tags_t *new_set;
2088
2089   new_set = tre_mem_calloc(mem, sizeof(*new_set));
2090   if (new_set == NULL)
2091     return NULL;
2092
2093   new_set[0].position = -1;
2094   new_set[0].code_min = -1;
2095   new_set[0].code_max = -1;
2096
2097   return new_set;
2098 }
2099
2100 static tre_pos_and_tags_t *
2101 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2102             tre_bracket_match_list_t *bracket_match_list, int backref)
2103 {
2104   tre_pos_and_tags_t *new_set;
2105
2106   new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
2107   if (new_set == NULL)
2108     return NULL;
2109
2110   new_set[0].position = position;
2111   new_set[0].code_min = code_min;
2112   new_set[0].code_max = code_max;
2113   new_set[0].bracket_match_list = bracket_match_list;
2114   new_set[0].backref = backref;
2115   new_set[1].position = -1;
2116   new_set[1].code_min = -1;
2117   new_set[1].code_max = -1;
2118
2119   return new_set;
2120 }
2121
2122 static tre_pos_and_tags_t *
2123 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2124               int *tags, int assertions, int *params)
2125 {
2126   int s1, s2, i, j;
2127   tre_pos_and_tags_t *new_set;
2128   int *new_tags;
2129   int num_tags;
2130
2131   for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2132   for (s1 = 0; set1[s1].position >= 0; s1++);
2133   for (s2 = 0; set2[s2].position >= 0; s2++);
2134   new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2135   if (!new_set )
2136     return NULL;
2137
2138   for (s1 = 0; set1[s1].position >= 0; s1++)
2139     {
2140       new_set[s1].position = set1[s1].position;
2141       new_set[s1].code_min = set1[s1].code_min;
2142       new_set[s1].code_max = set1[s1].code_max;
2143       new_set[s1].assertions = set1[s1].assertions | assertions;
2144       new_set[s1].bracket_match_list = set1[s1].bracket_match_list;
2145       new_set[s1].backref = set1[s1].backref;
2146       if (set1[s1].tags == NULL && tags == NULL)
2147         new_set[s1].tags = NULL;
2148       else
2149         {
2150           for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2151           new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2152                                          * (i + num_tags + 1)));
2153           if (new_tags == NULL)
2154             return NULL;
2155           for (j = 0; j < i; j++)
2156             new_tags[j] = set1[s1].tags[j];
2157           for (i = 0; i < num_tags; i++)
2158             new_tags[j + i] = tags[i];
2159           new_tags[j + i] = -1;
2160           new_set[s1].tags = new_tags;
2161         }
2162       if (set1[s1].params)
2163         new_set[s1].params = set1[s1].params;
2164       if (params)
2165         {
2166           if (!new_set[s1].params)
2167             new_set[s1].params = params;
2168           else
2169             {
2170               new_set[s1].params = tre_mem_alloc(mem, sizeof(*params) *
2171                                                  TRE_PARAM_LAST);
2172               if (!new_set[s1].params)
2173                 return NULL;
2174               for (i = 0; i < TRE_PARAM_LAST; i++)
2175                 if (params[i] != TRE_PARAM_UNSET)
2176                   new_set[s1].params[i] = params[i];
2177             }
2178         }
2179     }
2180
2181   for (s2 = 0; set2[s2].position >= 0; s2++)
2182     {
2183       new_set[s1 + s2].position = set2[s2].position;
2184       new_set[s1 + s2].code_min = set2[s2].code_min;
2185       new_set[s1 + s2].code_max = set2[s2].code_max;
2186       /* XXX - why not | assertions here as well? */
2187       new_set[s1 + s2].assertions = set2[s2].assertions;
2188       new_set[s1 + s2].bracket_match_list = set2[s2].bracket_match_list;
2189       new_set[s1 + s2].backref = set2[s2].backref;
2190       if (set2[s2].tags == NULL)
2191         new_set[s1 + s2].tags = NULL;
2192       else
2193         {
2194           for (i = 0; set2[s2].tags[i] >= 0; i++);
2195           new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2196           if (new_tags == NULL)
2197             return NULL;
2198           for (j = 0; j < i; j++)
2199             new_tags[j] = set2[s2].tags[j];
2200           new_tags[j] = -1;
2201           new_set[s1 + s2].tags = new_tags;
2202         }
2203       if (set2[s2].params)
2204         new_set[s1 + s2].params = set2[s2].params;
2205       if (params)
2206         {
2207           if (!new_set[s1 + s2].params)
2208             new_set[s1 + s2].params = params;
2209           else
2210             {
2211               new_set[s1 + s2].params = tre_mem_alloc(mem, sizeof(*params) *
2212                                                       TRE_PARAM_LAST);
2213               if (!new_set[s1 + s2].params)
2214                 return NULL;
2215               for (i = 0; i < TRE_PARAM_LAST; i++)
2216                 if (params[i] != TRE_PARAM_UNSET)
2217                   new_set[s1 + s2].params[i] = params[i];
2218             }
2219         }
2220     }
2221   new_set[s1 + s2].position = -1;
2222   return new_set;
2223 }
2224
2225 /* Finds the empty path through `node' which is the one that should be
2226    taken according to POSIX.2 rules, and adds the tags on that path to
2227    `tags'.   `tags' may be NULL.  If `num_tags_seen' is not NULL, it is
2228    set to the number of tags seen on the path. */
2229 static reg_errcode_t
2230 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2231                 int *assertions, int *params, int *num_tags_seen,
2232                 int *params_seen)
2233 {
2234   tre_literal_t *lit;
2235   tre_union_t *uni;
2236   tre_catenation_t *cat;
2237   tre_iteration_t *iter;
2238   int i;
2239   int bottom = tre_stack_num_objects(stack);
2240   reg_errcode_t status = REG_OK;
2241   if (num_tags_seen)
2242     *num_tags_seen = 0;
2243   if (params_seen)
2244     *params_seen = 0;
2245
2246   status = tre_stack_push_voidptr(stack, node);
2247
2248   /* Walk through the tree recursively. */
2249   while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2250     {
2251       node = tre_stack_pop_voidptr(stack);
2252
2253       switch (node->type)
2254         {
2255         case LITERAL:
2256           lit = (tre_literal_t *)node->obj;
2257           switch (lit->code_min)
2258             {
2259             case TAG:
2260               if (lit->code_max >= 0)
2261                 {
2262                   if (tags != NULL)
2263                     {
2264                       /* Add the tag to `tags'. */
2265                       for (i = 0; tags[i] >= 0; i++)
2266                         if (tags[i] == lit->code_max)
2267                           break;
2268                       if (tags[i] < 0)
2269                         {
2270                           tags[i] = lit->code_max;
2271                           tags[i + 1] = -1;
2272                         }
2273                     }
2274                   if (num_tags_seen)
2275                     (*num_tags_seen)++;
2276                 }
2277               break;
2278             case ASSERTION:
2279               assert(lit->code_max >= 1
2280                      || lit->code_max <= ASSERT_LAST);
2281               if (assertions != NULL)
2282                 *assertions |= lit->code_max;
2283               break;
2284             case PARAMETER:
2285               if (params != NULL)
2286                 for (i = 0; i < TRE_PARAM_LAST; i++)
2287                   params[i] = lit->u.params[i];
2288               if (params_seen != NULL)
2289                 *params_seen = 1;
2290               break;
2291             case EMPTY:
2292               break;
2293             default:
2294               assert(0);
2295               break;
2296             }
2297           break;
2298
2299         case UNION:
2300           /* Subexpressions starting earlier take priority over ones
2301              starting later, so we prefer the left subexpression over the
2302              right subexpression. */
2303           uni = (tre_union_t *)node->obj;
2304           if (uni->left->nullable)
2305             STACK_PUSHX(stack, voidptr, uni->left)
2306           else if (uni->right->nullable)
2307             STACK_PUSHX(stack, voidptr, uni->right)
2308           else
2309             assert(0);
2310           break;
2311
2312         case CATENATION:
2313           /* The path must go through both children. */
2314           cat = (tre_catenation_t *)node->obj;
2315           assert(cat->left->nullable);
2316           assert(cat->right->nullable);
2317           STACK_PUSHX(stack, voidptr, cat->left);
2318           STACK_PUSHX(stack, voidptr, cat->right);
2319           break;
2320
2321         case ITERATION:
2322           /* A match with an empty string is preferred over no match at
2323              all, so we go through the argument if possible. */
2324           iter = (tre_iteration_t *)node->obj;
2325           if (iter->arg->nullable)
2326             STACK_PUSHX(stack, voidptr, iter->arg);
2327           break;
2328
2329         default:
2330           assert(0);
2331           break;
2332         }
2333     }
2334
2335   return status;
2336 }
2337
2338
2339 typedef enum {
2340   NFL_RECURSE,
2341   NFL_POST_UNION,
2342   NFL_POST_CATENATION,
2343   NFL_POST_ITERATION
2344 } tre_nfl_stack_symbol_t;
2345
2346
2347 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2348    the nodes of the AST `tree'. */
2349 static reg_errcode_t
2350 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2351 {
2352   int bottom = tre_stack_num_objects(stack);
2353
2354   STACK_PUSHR(stack, voidptr, tree);
2355   STACK_PUSHR(stack, int, NFL_RECURSE);
2356
2357   while (tre_stack_num_objects(stack) > bottom)
2358     {
2359       tre_nfl_stack_symbol_t symbol;
2360       tre_ast_node_t *node;
2361
2362       symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2363       node = tre_stack_pop_voidptr(stack);
2364       switch (symbol)
2365         {
2366         case NFL_RECURSE:
2367           switch (node->type)
2368             {
2369             case LITERAL:
2370               {
2371                 tre_literal_t *lit = (tre_literal_t *)node->obj;
2372                 if (IS_BACKREF(lit))
2373                   {
2374                     /* Back references: nullable = false, firstpos = {i},
2375                        lastpos = {i}. */
2376                     node->nullable = 0;
2377                     node->firstpos = tre_set_one(mem, lit->position, 0,
2378                                              TRE_CHAR_MAX, NULL, -1);
2379                     if (!node->firstpos)
2380                       return REG_ESPACE;
2381                     node->lastpos = tre_set_one(mem, lit->position, 0,
2382                                                 TRE_CHAR_MAX, NULL,
2383                                                 (int)lit->code_max);
2384                     if (!node->lastpos)
2385                       return REG_ESPACE;
2386                   }
2387                 else if (lit->code_min < 0)
2388                   {
2389                     /* Tags, empty strings, params, and zero width assertions:
2390                        nullable = true, firstpos = {}, and lastpos = {}. */
2391                     node->nullable = 1;
2392                     node->firstpos = tre_set_empty(mem);
2393                     if (!node->firstpos)
2394                       return REG_ESPACE;
2395                     node->lastpos = tre_set_empty(mem);
2396                     if (!node->lastpos)
2397                       return REG_ESPACE;
2398                   }
2399                 else
2400                   {
2401                     /* Literal at position i: nullable = false, firstpos = {i},
2402                        lastpos = {i}. */
2403                     node->nullable = 0;
2404                     node->firstpos =
2405                       tre_set_one(mem, lit->position, (int)lit->code_min,
2406                                   (int)lit->code_max, NULL, -1);
2407                     if (!node->firstpos)
2408                       return REG_ESPACE;
2409                     node->lastpos = tre_set_one(mem, lit->position,
2410                                                 (int)lit->code_min,
2411                                                 (int)lit->code_max,
2412                                                 lit->u.bracket_match_list,
2413                                                 -1);
2414                     if (!node->lastpos)
2415                       return REG_ESPACE;
2416                   }
2417                 break;
2418               }
2419
2420             case UNION:
2421               /* Compute the attributes for the two subtrees, and after that
2422                  for this node. */
2423               STACK_PUSHR(stack, voidptr, node);
2424               STACK_PUSHR(stack, int, NFL_POST_UNION);
2425               STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2426               STACK_PUSHR(stack, int, NFL_RECURSE);
2427               STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2428               STACK_PUSHR(stack, int, NFL_RECURSE);
2429               break;
2430
2431             case CATENATION:
2432               /* Compute the attributes for the two subtrees, and after that
2433                  for this node. */
2434               STACK_PUSHR(stack, voidptr, node);
2435               STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2436               STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2437               STACK_PUSHR(stack, int, NFL_RECURSE);
2438               STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2439               STACK_PUSHR(stack, int, NFL_RECURSE);
2440               break;
2441
2442             case ITERATION:
2443               /* Compute the attributes for the subtree, and after that for
2444                  this node. */
2445               STACK_PUSHR(stack, voidptr, node);
2446               STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2447               STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2448               STACK_PUSHR(stack, int, NFL_RECURSE);
2449               break;
2450             }
2451           break; /* end case: NFL_RECURSE */
2452
2453         case NFL_POST_UNION:
2454           {
2455             tre_union_t *uni = (tre_union_t *)node->obj;
2456             node->nullable = uni->left->nullable || uni->right->nullable;
2457             node->firstpos = tre_set_union(mem, uni->left->firstpos,
2458                                            uni->right->firstpos, NULL, 0, NULL);
2459             if (!node->firstpos)
2460               return REG_ESPACE;
2461             node->lastpos = tre_set_union(mem, uni->left->lastpos,
2462                                           uni->right->lastpos, NULL, 0, NULL);
2463             if (!node->lastpos)
2464               return REG_ESPACE;
2465             break;
2466           }
2467
2468         case NFL_POST_ITERATION:
2469           {
2470             int num_tags, *tags, assertions, params_seen;
2471             int *params;
2472             reg_errcode_t status;
2473             tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2474
2475             /* From Ville Laurikari's original 2001 Master's thesis, the
2476                firstpos(n) and lastpos(n) of an iteration is just the
2477                corresponding values of the iteration's argument.  Unfortunately,
2478                this isn't sufficient for the following BRE:
2479
2480                    \(a*\)*b\(\1\)    matched against    ab
2481
2482                The backreference wants to force the first subexpression to
2483                be the empty string, but there is no transition for this.  So
2484                we need to modify the lastpos(n) of an iteration to be the
2485                equivalent of that of catentation.  Using the same notation as
2486                in the thesis, lastpos(n) is redefined as:
2487
2488                    if nullable(c1) then
2489                        lastpos(c1) U
2490                        addtags(lastpos(c1),
2491                                emptymatch(c1))
2492                    else
2493                        lastpos(c1)
2494
2495                where c1 is the argument node.  firstpos(n) remains the same. */
2496
2497             /* Compute lastpos. */
2498             if (iter->min == 0 || iter->arg->nullable)
2499               {
2500                 node->nullable = 1;
2501                 if (iter->arg->nullable)
2502                   {
2503                     /* The arg matches the empty string.  Make a first pass
2504                        with tre_match_empty() to get the number of tags and
2505                        parameters. */
2506                     status = tre_match_empty(stack, iter->arg,
2507                                              NULL, NULL, NULL, &num_tags,
2508                                              &params_seen);
2509                     if (status != REG_OK)
2510                       return status;
2511                     /* Allocate arrays for the tags and parameters. */
2512                     tags = xmalloc(sizeof(int) * (num_tags + 1));
2513                     if (!tags)
2514                       return REG_ESPACE;
2515                     tags[0] = -1;
2516                     assertions = 0;
2517                     params = NULL;
2518                     if (params_seen)
2519                       {
2520                         params = tre_mem_alloc(mem, sizeof(*params)
2521                                                * TRE_PARAM_LAST);
2522                         if (!params)
2523                           {
2524                             xfree(tags);
2525                             return REG_ESPACE;
2526                           }
2527                       }
2528                     /* Second pass with tre_mach_empty() to get the list of
2529                        tags and parameters. */
2530                     status = tre_match_empty(stack, iter->arg, tags,
2531                                              &assertions, params, NULL, NULL);
2532                     if (status != REG_OK)
2533                       {
2534                         xfree(tags);
2535                         return status;
2536                       }
2537                     node->lastpos =
2538                       tre_set_union(mem, iter->arg->lastpos, iter->arg->lastpos,
2539                                     tags, assertions, params);
2540                     xfree(tags);
2541                     if (!node->lastpos)
2542                       return REG_ESPACE;
2543                   }
2544                 else
2545                   node->lastpos = iter->arg->lastpos;
2546               }
2547             else
2548               {
2549                 node->nullable = 0;
2550                 node->lastpos = iter->arg->lastpos;
2551               }
2552             node->firstpos = iter->arg->firstpos;
2553             break;
2554           }
2555
2556         case NFL_POST_CATENATION:
2557           {
2558             int num_tags, *tags, assertions, params_seen;
2559             int *params;
2560             reg_errcode_t status;
2561             tre_catenation_t *cat = node->obj;
2562             node->nullable = cat->left->nullable && cat->right->nullable;
2563
2564             /* Compute firstpos. */
2565             if (cat->left->nullable)
2566               {
2567                 /* The left side matches the empty string.  Make a first pass
2568                    with tre_match_empty() to get the number of tags and
2569                    parameters. */
2570                 status = tre_match_empty(stack, cat->left,
2571                                          NULL, NULL, NULL, &num_tags,
2572                                          &params_seen);
2573                 if (status != REG_OK)
2574                   return status;
2575                 /* Allocate arrays for the tags and parameters. */
2576                 tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2577                 if (!tags)
2578                   return REG_ESPACE;
2579                 tags[0] = -1;
2580                 assertions = 0;
2581                 params = NULL;
2582                 if (params_seen)
2583                   {
2584                     params = tre_mem_alloc(mem, sizeof(*params)
2585                                            * TRE_PARAM_LAST);
2586                     if (!params)
2587                       {
2588                         xfree(tags);
2589                         return REG_ESPACE;
2590                       }
2591                   }
2592                 /* Second pass with tre_mach_empty() to get the list of
2593                    tags and parameters. */
2594                 status = tre_match_empty(stack, cat->left, tags,
2595                                          &assertions, params, NULL, NULL);
2596                 if (status != REG_OK)
2597                   {
2598                     xfree(tags);
2599                     return status;
2600                   }
2601                 node->firstpos =
2602                   tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2603                                 tags, assertions, params);
2604                 xfree(tags);
2605                 if (!node->firstpos)
2606                   return REG_ESPACE;
2607               }
2608             else
2609               {
2610                 node->firstpos = cat->left->firstpos;
2611               }
2612
2613             /* Compute lastpos. */
2614             if (cat->right->nullable)
2615               {
2616                 /* The right side matches the empty string.  Make a first pass
2617                    with tre_match_empty() to get the number of tags and
2618                    parameters. */
2619                 status = tre_match_empty(stack, cat->right,
2620                                          NULL, NULL, NULL, &num_tags,
2621                                          &params_seen);
2622                 if (status != REG_OK)
2623                   return status;
2624                 /* Allocate arrays for the tags and parameters. */
2625                 tags = xmalloc(sizeof(int) * (num_tags + 1));
2626                 if (!tags)
2627                   return REG_ESPACE;
2628                 tags[0] = -1;
2629                 assertions = 0;
2630                 params = NULL;
2631                 if (params_seen)
2632                   {
2633                     params = tre_mem_alloc(mem, sizeof(*params)
2634                                            * TRE_PARAM_LAST);
2635                     if (!params)
2636                       {
2637                         xfree(tags);
2638                         return REG_ESPACE;
2639                       }
2640                   }
2641                 /* Second pass with tre_mach_empty() to get the list of
2642                    tags and parameters. */
2643                 status = tre_match_empty(stack, cat->right, tags,
2644                                          &assertions, params, NULL, NULL);
2645                 if (status != REG_OK)
2646                   {
2647                     xfree(tags);
2648                     return status;
2649                   }
2650                 node->lastpos =
2651                   tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2652                                 tags, assertions, params);
2653                 xfree(tags);
2654                 if (!node->lastpos)
2655                   return REG_ESPACE;
2656               }
2657             else
2658               {
2659                 node->lastpos = cat->right->lastpos;
2660               }
2661             break;
2662           }
2663
2664         default:
2665           assert(0);
2666           break;
2667         }
2668     }
2669
2670   return REG_OK;
2671 }
2672
2673
2674 /* Adds a transition from each position in `p1' to each position in `p2'. */
2675 static reg_errcode_t
2676 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2677                tre_tnfa_transition_t *transitions,
2678                int *counts, int *offs)
2679 {
2680   tre_pos_and_tags_t *orig_p2 = p2;
2681   tre_tnfa_transition_t *trans;
2682   int i, j, k, l, dup, prev_p2_pos;
2683
2684   if (transitions != NULL)
2685     while (p1->position >= 0)
2686       {
2687         p2 = orig_p2;
2688         prev_p2_pos = -1;
2689         while (p2->position >= 0)
2690           {
2691             /* Optimization: if this position was already handled, skip it. */
2692             if (p2->position == prev_p2_pos)
2693               {
2694                 p2++;
2695                 continue;
2696               }
2697             prev_p2_pos = p2->position;
2698             /* Set `trans' to point to the next unused transition from
2699                position `p1->position'. */
2700             trans = transitions + offs[p1->position];
2701             while (trans->state != NULL)
2702               {
2703 #if 0
2704                 /* If we find a previous transition from `p1->position' to
2705                    `p2->position', it is overwritten.  This can happen only
2706                    if there are nested loops in the regexp, like in "((a)*)*".
2707                    In POSIX.2 repetition using the outer loop is always
2708                    preferred over using the inner loop.  Therefore the
2709                    transition for the inner loop is useless and can be thrown
2710                    away. */
2711                 /* XXX - The same position is used for all nodes in a bracket
2712                    expression, so this optimization cannot be used (it will
2713                    break bracket expressions) unless I figure out a way to
2714                    detect it here. */
2715                 if (trans->state_id == p2->position)
2716                   {
2717                     DPRINT(("*"));
2718                     break;
2719                   }
2720 #endif
2721                 trans++;
2722               }
2723
2724             if (trans->state == NULL)
2725               (trans + 1)->state = NULL;
2726             /* Use the character ranges, assertions, etc. from `p1' for
2727                the transition from `p1' to `p2'. */
2728             trans->code_min = p1->code_min;
2729             trans->code_max = p1->code_max;
2730             trans->state = transitions + offs[p2->position];
2731             trans->state_id = p2->position;
2732             trans->assertions = p1->assertions | p2->assertions
2733               | (p1->bracket_match_list != NULL ? ASSERT_BRACKET_MATCH : 0);
2734             if (p1->backref >= 0)
2735               {
2736                 assert((trans->assertions & ASSERT_BRACKET_MATCH) == 0);
2737                 assert(p2->backref < 0);
2738                 trans->u.backref = p1->backref;
2739                 trans->assertions |= ASSERT_BACKREF;
2740               }
2741             if (p1->bracket_match_list != NULL)
2742               {
2743                 trans->u.bracket_match_list =
2744                   xmalloc(SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
2745                 if (trans->u.bracket_match_list == NULL)
2746                   return REG_ESPACE;
2747                 memcpy(trans->u.bracket_match_list, p1->bracket_match_list,
2748                        SIZEOF_BRACKET_MATCH_LIST(p1->bracket_match_list));
2749               }
2750
2751             /* Find out how many tags this transition has. */
2752             i = 0;
2753             if (p1->tags != NULL)
2754               while(p1->tags[i] >= 0)
2755                 i++;
2756             j = 0;
2757             if (p2->tags != NULL)
2758               while(p2->tags[j] >= 0)
2759                 j++;
2760
2761             /* If we are overwriting a transition, free the old tag array. */
2762             if (trans->tags != NULL)
2763               xfree(trans->tags);
2764             trans->tags = NULL;
2765
2766             /* If there were any tags, allocate an array and fill it. */
2767             if (i + j > 0)
2768               {
2769                 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2770                 if (!trans->tags)
2771                   return REG_ESPACE;
2772                 i = 0;
2773                 if (p1->tags != NULL)
2774                   while(p1->tags[i] >= 0)
2775                     {
2776                       trans->tags[i] = p1->tags[i];
2777                       i++;
2778                     }
2779                 l = i;
2780                 j = 0;
2781                 if (p2->tags != NULL)
2782                   while (p2->tags[j] >= 0)
2783                     {
2784                       /* Don't add duplicates. */
2785                       dup = 0;
2786                       for (k = 0; k < i; k++)
2787                         if (trans->tags[k] == p2->tags[j])
2788                           {
2789                             dup = 1;
2790                             break;
2791                           }
2792                       if (!dup)
2793                         trans->tags[l++] = p2->tags[j];
2794                       j++;
2795                     }
2796                 trans->tags[l] = -1;
2797               }
2798
2799             /* Set the parameter array.  If both `p2' and `p1' have same
2800                parameters, the values in `p2' override those in `p1'. */
2801             if (p1->params || p2->params)
2802               {
2803                 if (!trans->params)
2804                   trans->params = xmalloc(sizeof(*trans->params)
2805                                           * TRE_PARAM_LAST);
2806                 if (!trans->params)
2807                   return REG_ESPACE;
2808                 for (i = 0; i < TRE_PARAM_LAST; i++)
2809                   {
2810                     trans->params[i] = TRE_PARAM_UNSET;
2811                     if (p1->params && p1->params[i] != TRE_PARAM_UNSET)
2812                       trans->params[i] = p1->params[i];
2813                     if (p2->params && p2->params[i] != TRE_PARAM_UNSET)
2814                       trans->params[i] = p2->params[i];
2815                   }
2816               }
2817             else
2818               {
2819                 if (trans->params)
2820                   xfree(trans->params);
2821                 trans->params = NULL;
2822               }
2823
2824
2825 #ifdef TRE_DEBUG
2826             {
2827               int *tags;
2828
2829               DPRINT(("  %2d -> %2d on %3d", p1->position, p2->position,
2830                       p1->code_min));
2831               if (p1->code_max != p1->code_min)
2832                 DPRINT(("-%3d", p1->code_max));
2833               tags = trans->tags;
2834               if (tags)
2835                 {
2836                   DPRINT((", tags ["));
2837                   while (*tags >= 0)
2838                     {
2839                       DPRINT(("%d", *tags));
2840                       tags++;
2841                       if (*tags >= 0)
2842                         DPRINT((","));
2843                     }
2844                   DPRINT(("]"));
2845                 }
2846               if (trans->assertions)
2847                 DPRINT((", assert %d", trans->assertions));
2848               if (trans->assertions & ASSERT_BACKREF)
2849                 DPRINT((", backref %d", trans->u.backref));
2850               else if (trans->assertions & ASSERT_BRACKET_MATCH)
2851                 DPRINT((", bracket_match_list %p",
2852                         trans->u.bracket_match_list));
2853               if (trans->params)
2854                 {
2855                   DPRINT((", "));
2856                   tre_print_params(trans->params);
2857                 }
2858               DPRINT(("\n"));
2859             }
2860 #endif /* TRE_DEBUG */
2861             p2++;
2862           }
2863         p1++;
2864       }
2865   else
2866     /* Compute a maximum limit for the number of transitions leaving
2867        from each state. */
2868     while (p1->position >= 0)
2869       {
2870         p2 = orig_p2;
2871         while (p2->position >= 0)
2872           {
2873             counts[p1->position]++;
2874             p2++;
2875           }
2876         p1++;
2877       }
2878   return REG_OK;
2879 }
2880
2881 /* Converts the syntax tree to a TNFA.  All the transitions in the TNFA are
2882    labelled with one character range (there are no transitions on empty
2883    strings).  The TNFA takes O(n^2) space in the worst case, `n' is size of
2884    the regexp. */
2885 static reg_errcode_t
2886 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2887                 int *counts, int *offs)
2888 {
2889   tre_union_t *uni;
2890   tre_catenation_t *cat;
2891   tre_iteration_t *iter;
2892   reg_errcode_t errcode = REG_OK;
2893
2894   /* XXX - recurse using a stack!. */
2895   switch (node->type)
2896     {
2897     case LITERAL:
2898       break;
2899     case UNION:
2900       uni = (tre_union_t *)node->obj;
2901       errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2902       if (errcode != REG_OK)
2903         return errcode;
2904       errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2905       break;
2906
2907     case CATENATION:
2908       cat = (tre_catenation_t *)node->obj;
2909       /* Add a transition from each position in cat->left->lastpos
2910          to each position in cat->right->firstpos. */
2911       errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2912                                transitions, counts, offs);
2913       if (errcode != REG_OK)
2914         return errcode;
2915       errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2916       if (errcode != REG_OK)
2917         return errcode;
2918       errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2919       break;
2920
2921     case ITERATION:
2922       iter = (tre_iteration_t *)node->obj;
2923       assert(iter->max == -1 || iter->max == 1);
2924
2925       if (iter->max == -1)
2926         {
2927           assert(iter->min == 0 || iter->min == 1);
2928           /* Add a transition from each last position in the iterated
2929              expression to each first position. */
2930           errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2931                                    transitions, counts, offs);
2932           if (errcode != REG_OK)
2933             return errcode;
2934         }
2935       errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2936       break;
2937     }
2938   return errcode;
2939 }
2940
2941
2942 #define ERROR_EXIT(err)           \
2943   do                              \
2944     {                             \
2945       errcode = err;              \
2946       if (/*CONSTCOND*/1)         \
2947         goto error_exit;          \
2948     }                             \
2949  while (/*CONSTCOND*/0)
2950
2951
2952 int
2953 tre_compile(regex_t *preg, const tre_char_t *regex, size_t n, int cflags,
2954             locale_t loc)
2955 {
2956   tre_stack_t *stack;
2957   tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2958   tre_pos_and_tags_t *p;
2959   int *counts = NULL, *offs = NULL;
2960   int i, add = 0;
2961   tre_tnfa_transition_t *transitions, *initial;
2962   tre_tnfa_t *tnfa = NULL;
2963   tre_submatch_data_t *submatch_data = NULL;
2964   tre_tag_direction_t *tag_directions = NULL;
2965   reg_errcode_t errcode;
2966   tre_mem_t mem;
2967
2968   /* Parse context. */
2969   tre_parse_ctx_t parse_ctx;
2970
2971   /* Allocate a stack used throughout the compilation process for various
2972      purposes. */
2973   stack = tre_stack_new(512, 10240, 128);
2974   if (!stack)
2975     return REG_ESPACE;
2976   /* Allocate a fast memory allocator. */
2977   mem = tre_mem_new();
2978   if (!mem)
2979     {
2980       tre_stack_destroy(stack);
2981       return REG_ESPACE;
2982     }
2983
2984   /* Parse the regexp. */
2985   memset(&parse_ctx, 0, sizeof(parse_ctx));
2986   parse_ctx.mem = mem;
2987   parse_ctx.stack = stack;
2988   parse_ctx.re = regex;
2989   parse_ctx.len = n;
2990   /* Only allow REG_UNGREEDY to be set if both REG_ENHANCED and REG_EXTENDED
2991      are also set */
2992   if ((cflags & (REG_ENHANCED | REG_EXTENDED)) != (REG_ENHANCED | REG_EXTENDED))
2993     cflags &= ~REG_UNGREEDY;
2994   parse_ctx.cflags = cflags;
2995   parse_ctx.max_backref = -1;
2996   parse_ctx.loc = loc;
2997   parse_ctx.submatch_id_invisible = SUBMATCH_ID_INVISIBLE_START;
2998
2999   DPRINT(("tre_compile: parsing '%.*" STRF "'\n", (int)n, regex));
3000   errcode = tre_parse(&parse_ctx);
3001   if (errcode != REG_OK)
3002     ERROR_EXIT(errcode);
3003   preg->re_nsub = parse_ctx.submatch_id - 1;
3004   tree = parse_ctx.result;
3005
3006   /* Back references and approximate matching cannot currently be used
3007      in the same regexp. */
3008   if (parse_ctx.max_backref >= 0 && parse_ctx.have_approx)
3009     ERROR_EXIT(REG_BADPAT);
3010
3011 #ifdef TRE_DEBUG
3012   tre_ast_print(tree);
3013 #endif /* TRE_DEBUG */
3014
3015   /* Referring to nonexistent subexpressions is illegal. */
3016   if (parse_ctx.max_backref > (int)preg->re_nsub)
3017     ERROR_EXIT(REG_ESUBREG);
3018
3019   /* Allocate the TNFA struct. */
3020   tnfa = xcalloc(1, sizeof(tre_tnfa_t));
3021   if (tnfa == NULL)
3022     ERROR_EXIT(REG_ESPACE);
3023   tnfa->have_backrefs = parse_ctx.max_backref >= 0;
3024   tnfa->have_approx = parse_ctx.have_approx;
3025   tnfa->num_submatches = parse_ctx.submatch_id;
3026   tnfa->num_submatches_invisible = parse_ctx.submatch_id_invisible
3027                                    - SUBMATCH_ID_INVISIBLE_START;
3028   tnfa->num_reorder_tags = parse_ctx.num_reorder_tags;
3029   tnfa->loc = parse_ctx.loc;
3030
3031   /* Set up tags for submatch addressing.  If REG_NOSUB is set and the
3032      regexp does not have back references, this can be skipped. */
3033   if (tnfa->num_reorder_tags > 0 || !(cflags & REG_NOSUB))
3034     {
3035       DPRINT(("tre_compile: setting up tags\n"));
3036
3037       /* Figure out how many tags we will need. */
3038       errcode = tre_add_tags(NULL, stack, tree, tnfa);
3039       if (errcode != REG_OK)
3040         ERROR_EXIT(errcode);
3041 #ifdef TRE_DEBUG
3042       tre_ast_print(tree);
3043 #endif /* TRE_DEBUG */
3044
3045       if (tnfa->num_tags > 0)
3046         {
3047           tag_directions = xmalloc(sizeof(*tag_directions)
3048                                    * (tnfa->num_tags + 1));
3049           if (tag_directions == NULL)
3050             ERROR_EXIT(REG_ESPACE);
3051           tnfa->tag_directions = tag_directions;
3052           memset(tag_directions, -1,
3053                  sizeof(*tag_directions) * (tnfa->num_tags + 1));
3054         }
3055       tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 3,
3056                                    sizeof(*tnfa->minimal_tags));
3057       if (tnfa->minimal_tags == NULL)
3058         ERROR_EXIT(REG_ESPACE);
3059
3060       submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
3061                               sizeof(*submatch_data));
3062       if (submatch_data == NULL)
3063         ERROR_EXIT(REG_ESPACE);
3064       /* Set the eo_tag value to -1 to indicate that that corresponding
3065        * submatch has not be completed yet */
3066       for (i = 0; i < parse_ctx.submatch_id; i++)
3067         {
3068           submatch_data[i].eo_tag = -1;
3069         }
3070       tnfa->submatch_data = submatch_data;
3071
3072       errcode = tre_add_tags(mem, stack, tree, tnfa);
3073       if (errcode != REG_OK)
3074         ERROR_EXIT(errcode);
3075
3076 #ifdef TRE_DEBUG
3077       for (i = 0; i < parse_ctx.submatch_id; i++)
3078         DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3079                 i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
3080       for (i = 0; i <= tnfa->num_tags; i++)
3081         DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
3082 #endif /* TRE_DEBUG */
3083     }
3084
3085   /* Expand iteration nodes. */
3086   errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
3087                            tag_directions, &tnfa->params_depth);
3088   if (errcode != REG_OK)
3089     ERROR_EXIT(errcode);
3090
3091   /* Add a dummy node for the final state.
3092      XXX - For certain patterns this dummy node can be optimized away,
3093            for example "a*" or "ab*".   Figure out a simple way to detect
3094            this possibility. */
3095   tmp_ast_l = tree;
3096   tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
3097   if (tmp_ast_r == NULL)
3098     ERROR_EXIT(REG_ESPACE);
3099
3100   tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
3101   if (tree == NULL)
3102     ERROR_EXIT(REG_ESPACE);
3103
3104 #ifdef TRE_DEBUG
3105   tre_ast_print(tree);
3106   DPRINT(("Number of states: %d\n", parse_ctx.position));
3107   if (submatch_data)
3108     for (i = 0; i < parse_ctx.submatch_id; i++)
3109       DPRINT(("pmatch[%d] = {t%d, t%d}\n",
3110               i, submatch_data[i].so_tag, submatch_data[i].eo_tag));
3111   if (tag_directions)
3112     for (i = 0; i <= tnfa->num_tags; i++)
3113       DPRINT(("t%d is %s\n", i, tag_dir_str[tag_directions[i]]));
3114 #endif /* TRE_DEBUG */
3115
3116   errcode = tre_compute_nfl(mem, stack, tree);
3117   if (errcode != REG_OK)
3118     ERROR_EXIT(errcode);
3119
3120   counts = xmalloc(sizeof(int) * parse_ctx.position);
3121   if (counts == NULL)
3122     ERROR_EXIT(REG_ESPACE);
3123
3124   offs = xmalloc(sizeof(int) * parse_ctx.position);
3125   if (offs == NULL)
3126     ERROR_EXIT(REG_ESPACE);
3127
3128   for (i = 0; i < parse_ctx.position; i++)
3129     counts[i] = 0;
3130   tre_ast_to_tnfa(tree, NULL, counts, NULL);
3131
3132   add = 0;
3133   for (i = 0; i < parse_ctx.position; i++)
3134     {
3135       offs[i] = add;
3136       add += counts[i] + 1;
3137       counts[i] = 0;
3138     }
3139   transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
3140   if (transitions == NULL)
3141     ERROR_EXIT(REG_ESPACE);
3142   tnfa->transitions = transitions;
3143   tnfa->num_transitions = add;
3144
3145   DPRINT(("Converting to TNFA:\n"));
3146   errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
3147   if (errcode != REG_OK)
3148     ERROR_EXIT(errcode);
3149
3150 #ifdef USE_FIRSTPOS_CHARS /* not defined */
3151   /* If in eight bit mode, compute a table of characters that can be the
3152      first character of a match. */
3153   tnfa->first_char = -1;
3154   if (TRE_MB_CUR_MAX_L(tnfa->loc) == 1 && !tmp_ast_l->nullable)
3155     {
3156       int count = 0;
3157       tre_cint_t k;
3158       DPRINT(("Characters that can start a match:"));
3159       tnfa->firstpos_chars = xcalloc(256, sizeof(char));
3160       if (tnfa->firstpos_chars == NULL)
3161         ERROR_EXIT(REG_ESPACE);
3162       for (p = tree->firstpos; p->position >= 0; p++)
3163         {
3164           tre_tnfa_transition_t *j = transitions + offs[p->position];
3165           while (j->state != NULL)
3166             {
3167               for (k = j->code_min; k <= j->code_max && k < 256; k++)
3168                 {
3169                   DPRINT((" %d", k));
3170                   tnfa->firstpos_chars[k] = 1;
3171                   count++;
3172                 }
3173               j++;
3174             }
3175         }
3176       DPRINT(("\n"));
3177 #define TRE_OPTIMIZE_FIRST_CHAR 1
3178 #if TRE_OPTIMIZE_FIRST_CHAR
3179       if (count == 1)
3180         {
3181           for (k = 0; k < 256; k++)
3182             if (tnfa->firstpos_chars[k])
3183               {
3184                 DPRINT(("first char must be %d\n", k));
3185                 tnfa->first_char = k;
3186                 xfree(tnfa->firstpos_chars);
3187                 tnfa->firstpos_chars = NULL;
3188                 break;
3189               }
3190         }
3191 #endif
3192
3193     }
3194   else
3195     tnfa->firstpos_chars = NULL;
3196 #else /* !USE_FIRSTPOS_CHARS */
3197
3198   /* Set first_char only if there is only one character that can be the
3199      first character of a match */
3200   tnfa->first_char = -1;
3201   if (!tmp_ast_l->nullable)
3202     {
3203       int scanning = 1;
3204       for (p = tree->firstpos; scanning && p->position >= 0; p++)
3205         {
3206           tre_tnfa_transition_t *j = transitions + offs[p->position];
3207           while (j->state != NULL)
3208             {
3209               if (j->code_min <= j->code_max)
3210                 {
3211                   if (j->code_max != j->code_min || j->code_min == -1 || tnfa->first_char != -1)
3212                     {
3213                       tnfa->first_char = -1;
3214                       scanning = 0;
3215                       break;
3216                     }
3217                   tnfa->first_char = j->code_min;
3218                 }
3219               j++;
3220             }
3221         }
3222 #ifdef TRE_DEBUG
3223       if (tnfa->first_char >= 0)
3224         DPRINT(("first char must be %d\n", tnfa->first_char));
3225 #endif /* TRE_DEBUG */
3226     }
3227 #endif /* !USE_FIRSTPOS_CHARS */
3228
3229   p = tree->firstpos;
3230   i = 0;
3231   while (p->position >= 0)
3232     {
3233       i++;
3234
3235 #ifdef TRE_DEBUG
3236       {
3237         int *tags;
3238         DPRINT(("initial: %d", p->position));
3239         tags = p->tags;
3240         if (tags != NULL)
3241           {
3242             if (*tags >= 0)
3243               DPRINT(("/"));
3244             while (*tags >= 0)
3245               {
3246                 DPRINT(("%d", *tags));
3247                 tags++;
3248                 if (*tags >= 0)
3249                   DPRINT((","));
3250               }
3251           }
3252         DPRINT((", assert %d", p->assertions));
3253         if (p->params)
3254           {
3255             DPRINT((", "));
3256             tre_print_params(p->params);
3257           }
3258         DPRINT(("\n"));
3259       }
3260 #endif /* TRE_DEBUG */
3261
3262       p++;
3263     }
3264
3265   initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
3266   if (initial == NULL)
3267     ERROR_EXIT(REG_ESPACE);
3268   tnfa->initial = initial;
3269
3270   i = 0;
3271   for (p = tree->firstpos; p->position >= 0; p++)
3272     {
3273       initial[i].state = transitions + offs[p->position];
3274       initial[i].state_id = p->position;
3275       initial[i].tags = NULL;
3276       /* Copy the arrays p->tags, and p->params, they are allocated
3277          from a tre_mem object. */
3278       if (p->tags)
3279         {
3280           int j;
3281           for (j = 0; p->tags[j] >= 0; j++);
3282           initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
3283           if (!initial[i].tags)
3284             ERROR_EXIT(REG_ESPACE);
3285           memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
3286         }
3287       initial[i].params = NULL;
3288       if (p->params)
3289         {
3290           initial[i].params = xmalloc(sizeof(*p->params) * TRE_PARAM_LAST);
3291           if (!initial[i].params)
3292             ERROR_EXIT(REG_ESPACE);
3293           memcpy(initial[i].params, p->params,
3294                  sizeof(*p->params) * TRE_PARAM_LAST);
3295         }
3296       initial[i].assertions = p->assertions;
3297       i++;
3298     }
3299   initial[i].state = NULL;
3300
3301   tnfa->num_transitions = add;
3302   tnfa->final = transitions + offs[tree->lastpos[0].position];
3303   tnfa->num_states = parse_ctx.position;
3304   tnfa->cflags = cflags;
3305
3306   DPRINT(("final state %d (%p)\n", tree->lastpos[0].position,
3307           (void *)tnfa->final));
3308
3309   tre_mem_destroy(mem);
3310   tre_stack_destroy(stack);
3311   xfree(counts);
3312   xfree(offs);
3313
3314 #ifdef TRE_USE_SYSTEM_REGEX_H
3315   preg->re_magic = RE_MAGIC;
3316 #endif /* TRE_USE_SYSTEM_REGEX_H */
3317   preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3318 #ifdef __LIBC__
3319   /* In Libc, we need to retain the locale.  Outside Libc, we already called
3320      duplocale() which does the retaining. */
3321   XL_RETAIN(tnfa->loc);
3322 #endif /* __LIBC__ */
3323   return REG_OK;
3324
3325  error_exit:
3326   /* Free everything that was allocated and return the error code. */
3327   tre_mem_destroy(mem);
3328   if (stack != NULL)
3329     tre_stack_destroy(stack);
3330   if (counts != NULL)
3331     xfree(counts);
3332   if (offs != NULL)
3333     xfree(offs);
3334
3335   /* Set tnfa into preg, so that calling tre_free() will free the contents
3336      of tnfa.  But in Libc, NULL out the loc field since we never retained
3337      the locale.  Outside Libc, we let tre_free() call freelocale(). */
3338   preg->TRE_REGEX_T_FIELD = (void *)tnfa;
3339 #ifdef __LIBC__
3340   if(tnfa) tnfa->loc = NULL;
3341 #endif /* __LIBC__ */
3342
3343   tre_free(preg);
3344   return errcode;
3345 }
3346
3347
3348
3349
3350 void
3351 tre_free(regex_t *preg)
3352 {
3353   tre_tnfa_t *tnfa;
3354   unsigned int i;
3355   tre_tnfa_transition_t *trans;
3356
3357 #ifdef TRE_USE_SYSTEM_REGEX_H
3358   preg->re_magic = 0;
3359 #endif /* TRE_USE_SYSTEM_REGEX_H */
3360   tnfa = (void *)preg->TRE_REGEX_T_FIELD;
3361   if (!tnfa)
3362     return;
3363   preg->TRE_REGEX_T_FIELD = NULL;
3364
3365   for (i = 0; i < tnfa->num_transitions; i++)
3366     if (tnfa->transitions[i].state)
3367       {
3368         if (tnfa->transitions[i].tags)
3369           xfree(tnfa->transitions[i].tags);
3370         if (tnfa->transitions[i].assertions & ASSERT_BRACKET_MATCH)
3371           xfree(tnfa->transitions[i].u.bracket_match_list);
3372         if (tnfa->transitions[i].params)
3373           xfree(tnfa->transitions[i].params);
3374       }
3375   if (tnfa->transitions)
3376     xfree(tnfa->transitions);
3377
3378   if (tnfa->initial)
3379     {
3380       for (trans = tnfa->initial; trans->state; trans++)
3381         {
3382           if (trans->tags)
3383             xfree(trans->tags);
3384           if (trans->params)
3385             xfree(trans->params);
3386         }
3387       xfree(tnfa->initial);
3388     }
3389
3390   if (tnfa->submatch_data)
3391     {
3392       xfree(tnfa->submatch_data);
3393     }
3394
3395   if (tnfa->tag_directions)
3396     xfree(tnfa->tag_directions);
3397 #ifdef USE_FIRSTPOS_CHARS /* not defined */
3398   if (tnfa->firstpos_chars)
3399     xfree(tnfa->firstpos_chars);
3400 #endif /* USE_FIRSTPOS_CHARS */
3401   if (tnfa->minimal_tags)
3402     xfree(tnfa->minimal_tags);
3403
3404   if (tnfa->loc)
3405 #ifdef __LIBC__
3406     XL_RELEASE(tnfa->loc);
3407 #else /* !__LIBC__ */
3408     freelocale(tnfa->loc);
3409 #endif /* !__LIBC__ */
3410
3411   if (tnfa->last_matched_branch)
3412     xfree(tnfa->last_matched_branch);
3413
3414   xfree(tnfa);
3415 }
3416
3417 #ifndef __LIBC__
3418 char *
3419 tre_version(void)
3420 {
3421   static char str[256];
3422   char *version;
3423
3424   if (str[0] == 0)
3425     {
3426       (void) tre_config(TRE_CONFIG_VERSION, &version);
3427       (void) snprintf(str, sizeof(str), "TRE %s (BSD)", version);
3428     }
3429   return str;
3430 }
3431
3432 int
3433 tre_config(int query, void *result)
3434 {
3435   int *int_result = result;
3436   const char **string_result = result;
3437
3438   switch (query)
3439     {
3440     case TRE_CONFIG_APPROX:
3441 #ifdef TRE_APPROX
3442       *int_result = 1;
3443 #else /* !TRE_APPROX */
3444       *int_result = 0;
3445 #endif /* !TRE_APPROX */
3446       return REG_OK;
3447
3448     case TRE_CONFIG_WCHAR:
3449 #ifdef TRE_WCHAR
3450       *int_result = 1;
3451 #else /* !TRE_WCHAR */
3452       *int_result = 0;
3453 #endif /* !TRE_WCHAR */
3454       return REG_OK;
3455
3456     case TRE_CONFIG_MULTIBYTE:
3457 #ifdef TRE_MULTIBYTE
3458       *int_result = 1;
3459 #else /* !TRE_MULTIBYTE */
3460       *int_result = 0;
3461 #endif /* !TRE_MULTIBYTE */
3462       return REG_OK;
3463
3464     case TRE_CONFIG_SYSTEM_ABI:
3465 #ifdef TRE_CONFIG_SYSTEM_ABI
3466       *int_result = 1;
3467 #else /* !TRE_CONFIG_SYSTEM_ABI */
3468       *int_result = 0;
3469 #endif /* !TRE_CONFIG_SYSTEM_ABI */
3470       return REG_OK;
3471
3472     case TRE_CONFIG_VERSION:
3473       *string_result = TRE_VERSION;
3474       return REG_OK;
3475     }
3476
3477   return REG_NOMATCH;
3478 }
3479 #endif /* !__LIBC__ */
3480
3481 #pragma clang diagnostic push
3482 /* EOF */