2 tre-match-utils.h - TRE matcher helper definitions
4 This software is released under a BSD-style license.
5 See the file LICENSE for details and copyright.
9 #include "tre-internal.h"
11 #define str_source ((const tre_str_source*)string)
17 /* Wide character and multibyte support. */
20 #define GET_NEXT_WCHAR() \
23 if (type == STR_BYTE) \
26 if (len >= 0 && pos >= len) \
29 next_c = (unsigned char)(*str_byte++); \
31 else if (type == STR_WIDE) \
34 if (len >= 0 && pos >= len) \
37 next_c = *str_wide++; \
39 else if (type == STR_MBS) \
41 pos += pos_add_next; \
42 if (str_byte == NULL) \
59 w = tre_mbrtowc_l(&next_c, str_byte, (size_t)max, &mbstate, \
61 if (w == (size_t)-1 || w == (size_t)-2) \
63 if (w == 0 && len >= 0) \
77 else if (type == STR_USER) \
79 pos += pos_add_next; \
80 str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
81 str_source->context); \
83 } while(/*CONSTCOND*/0)
84 #else /* !TRE_STR_USER */
85 #define GET_NEXT_WCHAR() \
88 if (type == STR_BYTE) \
91 if (len >= 0 && pos >= len) \
94 next_c = (unsigned char)(*str_byte++); \
96 else if (type == STR_WIDE) \
99 if (len >= 0 && pos >= len) \
102 next_c = *str_wide++; \
104 else if (type == STR_MBS) \
106 pos += pos_add_next; \
107 if (str_byte == NULL) \
124 w = tre_mbrtowc_l(&next_c, str_byte, (size_t)max, &mbstate, \
126 if (w == (size_t)-1 || w == (size_t)-2) \
128 if (w == 0 && len >= 0) \
142 } while(/*CONSTCOND*/0)
143 #endif /* !TRE_STR_USER */
145 #else /* !TRE_MULTIBYTE */
147 /* Wide character support, no multibyte support. */
150 #define GET_NEXT_WCHAR() \
153 if (type == STR_BYTE) \
156 if (len >= 0 && pos >= len) \
159 next_c = (unsigned char)(*str_byte++); \
161 else if (type == STR_WIDE) \
164 if (len >= 0 && pos >= len) \
167 next_c = *str_wide++; \
169 else if (type == STR_USER) \
171 pos += pos_add_next; \
172 str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
173 str_source->context); \
175 } while(/*CONSTCOND*/0)
176 #else /* !TRE_STR_USER */
177 #define GET_NEXT_WCHAR() \
180 if (type == STR_BYTE) \
183 if (len >= 0 && pos >= len) \
186 next_c = (unsigned char)(*str_byte++); \
188 else if (type == STR_WIDE) \
191 if (len >= 0 && pos >= len) \
194 next_c = *str_wide++; \
196 } while(/*CONSTCOND*/0)
197 #endif /* !TRE_STR_USER */
199 #endif /* !TRE_MULTIBYTE */
201 #else /* !TRE_WCHAR */
203 /* No wide character or multibyte support. */
206 #define GET_NEXT_WCHAR() \
209 if (type == STR_BYTE) \
212 if (len >= 0 && pos >= len) \
215 next_c = (unsigned char)(*str_byte++); \
217 else if (type == STR_USER) \
219 pos += pos_add_next; \
220 str_user_end = str_source->get_next_char(&next_c, &pos_add_next, \
221 str_source->context); \
223 } while(/*CONSTCOND*/0)
224 #else /* !TRE_STR_USER */
225 #define GET_NEXT_WCHAR() \
228 if (type == STR_BYTE) \
231 if (len >= 0 && pos >= len) \
234 next_c = (unsigned char)(*str_byte++); \
236 } while(/*CONSTCOND*/0)
237 #endif /* !TRE_STR_USER */
239 #endif /* !TRE_WCHAR */
243 /* Assumes tre_tnfa_t *tnfa in scope */
244 #define IS_WORD_CHAR(c) ((c) == L'_' || tre_isalnum_l(c, tnfa->loc))
246 #define CHECK_ASSERTIONS(assertions) \
247 (((assertions & ASSERT_AT_BOL) \
248 && (pos > 0 || reg_notbol) \
249 && (prev_c != L'\n' || !reg_newline)) \
250 || ((assertions & ASSERT_AT_EOL) \
251 && (next_c != L'\0' || reg_noteol) \
252 && (next_c != L'\n' || !reg_newline)) \
253 || ((assertions & ASSERT_AT_BOW) \
254 && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \
255 || ((assertions & ASSERT_AT_EOW) \
256 && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \
257 || ((assertions & ASSERT_AT_WB) \
258 && (pos != 0 && next_c != L'\0' \
259 && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c))) \
260 || ((assertions & ASSERT_AT_WB_NEG) \
261 && (pos == 0 || next_c == L'\0' \
262 || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
264 #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags) \
265 ((trans_i->assertions & ASSERT_BRACKET_MATCH) \
266 && !tre_bracket_match(trans_i->u.bracket_match_list,(tre_cint_t)prev_c, \
273 tre_tag_get(const tre_tag_t
*tags
, int i
)
276 return tags
->count
> 0 ? tags
->value
: -1;
280 tre_tag_set(tre_tag_t
*tags
, int i
, int val
, int touch
)
283 if (tags
->count
++ == 0)
290 tre_tag_reset(tre_tag_t
*tags
, int i
)
296 tre_tag_touch_get(const tre_tag_t
*tags
, int i
)
298 return tags
[i
].touch
;
303 tre_print_tags(const tre_tag_t
*tags
, int num_tags
)
306 for (i
= 0; i
< num_tags
; i
++, tags
++)
311 DPRINT(("%d:(0,-1)", i
));
314 DPRINT(("%d:(1,%d)", i
, tags
->first
));
317 DPRINT(("%d:(%d,%d,%d)", i
, tags
->count
, tags
->first
,
321 if (i
< (num_tags
- 1))
327 tre_print_tags_all(const tre_tag_t
*tags
, int num_tags
)
330 for (i
= 0; i
< num_tags
; i
++, tags
++)
335 DPRINT(("%d:(0,-1)/%d", i
, tags
->touch
));
338 DPRINT(("%d:(1,%d)/%d", i
, tags
->first
, tags
->touch
));
341 DPRINT(("%d:(%d,%d,%d)/%d", i
, tags
->count
, tags
->first
,
342 tags
->value
, tags
->touch
));
345 if (i
< (num_tags
- 1))
349 #endif /* TRE_DEBUG */
351 /* Return < 0, = 0 or > 0 depending on how the start/end pairs of a minimal
352 * group between t1 and t2 compare (t1 loses if < 0, t1 wins if > 0) */
354 tre_minimal_tag_order(int start
, int end
, const tre_tag_t
*tags1
,
355 const tre_tag_t
*tags2
)
357 const tre_tag_t
*t1
, *t2
;
361 /* We need both start tags to be set */
362 if (t1
->count
== 0 || t2
->count
== 0)
365 /* The start tags must be equal */
366 if (t1
->value
!= t2
->value
)
371 /* For the end tags, we prefer set over unset, because unset means that
372 * the end tag is still growing */
375 /* if t2 is set, t1 loses since it is unset */
379 /* if t2 not set, t1 wins since it is set */
380 else if (t2
->count
== 0)
383 /* least current value wins */
384 return t2
->value
- t1
->value
;
387 /* Return < 0, = 0 or > 0 depending on how the i-th item of t1 and t2 compare
388 * (t1 loses if < 0, t1 wins if > 0) */
390 tre_tag_order_1(int i
, tre_tag_direction_t dir
, const tre_tag_t
*t1
,
399 case TRE_TAG_MINIMIZE
:
400 /* least current value wins (because tags are initialized to all zeros,
401 * unset wins over set; also, tre_minimal_tag_order() will have already
402 * been run, which checks for being unset) */
403 return t2
->value
- t1
->value
;
405 case TRE_TAG_MAXIMIZE
:
409 /* if neither t1 and t2 are set, try next tag */
412 /* t2 is set, t1 loses since it is unset */
415 /* if t2 not set, t1 wins since it is set */
416 else if (t2
->count
== 0)
418 /* greatest initial value wins */
419 if ((diff
= t1
->first
- t2
->first
) != 0)
421 /* least number of times the tag was set, wins */
422 if ((diff
= t2
->count
- t1
->count
) != 0)
424 /* if the tags were only set once, they only have initial values */
427 /* greatest current value wins */
428 return t1
->value
- t2
->value
;
430 case TRE_TAG_LEFT_MAXIMIZE
:
434 /* if neither t1 and t2 are set, try next tag */
437 /* t2 is set, t1 loses since it is unset */
440 /* if t2 not set, t1 wins since it is set */
441 else if (t2
->count
== 0)
443 /* least initial value wins */
444 if ((diff
= t2
->first
- t1
->first
) != 0)
446 /* least number of times the tag was set, wins */
447 if ((diff
= t2
->count
- t1
->count
) != 0)
449 /* if the tags were only set once, they only have initial values */
452 /* greatest current value wins */
453 return t1
->value
- t2
->value
;
456 /* Shouldn't happen: only assert if TRE_DEBUG defined */
464 #define _MORE_DEBUGGING
465 #endif /* TRE_DEBUG */
467 /* Returns 1 if `t1' wins `t2', 0 otherwise. */
469 #ifdef _MORE_DEBUGGING
470 _tre_tag_order(int num_tags
, tre_tag_direction_t
*tag_directions
,
471 const tre_tag_t
*t1
, const tre_tag_t
*t2
)
472 #else /* !_MORE_DEBUGGING */
473 tre_tag_order(int num_tags
, tre_tag_direction_t
*tag_directions
,
474 const tre_tag_t
*t1
, const tre_tag_t
*t2
)
475 #endif /* !_MORE_DEBUGGING */
479 for (i
= 0; i
< num_tags
; i
++)
481 if ((ret
= tre_tag_order_1(i
, tag_directions
[i
], t1
, t2
)) != 0)
488 #ifdef _MORE_DEBUGGING
490 tre_tag_order(int num_tags
, tre_tag_direction_t
*tag_directions
,
491 const tre_tag_t
*t1
, const tre_tag_t
*t2
)
493 int ret
= _tre_tag_order(num_tags
, tag_directions
, t1
, t2
);
494 DPRINT(("tre_tag_order: "));
495 tre_print_tags(t1
, num_tags
);
496 DPRINT((" %s ", ret
? "wins" : "doesn't win"));
497 tre_print_tags(t2
, num_tags
);
501 #endif /* _MORE_DEBUGGING */
504 #include <xlocale_private.h>
505 #else /* !__LIBC__ */
507 #endif /* !__LIBC__ */
509 int __collate_equiv_value(locale_t loc
, const wchar_t *str
, size_t len
);
512 tre_bracket_match(tre_bracket_match_list_t
* __restrict list
, tre_cint_t wc
,
513 const tre_tnfa_t
* __restrict tnfa
)
517 tre_bracket_match_t
*b
;
519 int we
, ue
, le
, got_equiv
= 0;
520 int icase
= ((tnfa
->cflags
& REG_ICASE
) != 0);
522 DPRINT(("tre_bracket_match: %p, %d, %d\n", list
, wc
, icase
));
525 if (tre_islower_l(wc
, tnfa
->loc
))
528 uc
= tre_toupper_l(wc
, tnfa
->loc
);
530 else if (tre_isupper_l(wc
, tnfa
->loc
))
533 lc
= tre_tolower_l(wc
, tnfa
->loc
);
540 for (i
= 0, b
= list
->bracket_matches
; i
< list
->num_bracket_matches
;
545 case TRE_BRACKET_MATCH_TYPE_CHAR
:
547 match
= (b
->value
== uc
|| b
->value
== lc
);
549 match
= (b
->value
== wc
);
551 case TRE_BRACKET_MATCH_TYPE_RANGE_BEGIN
:
553 tre_cint_t start
= b
->value
, end
;
554 if (++i
>= list
->num_bracket_matches
||
555 (++b
)->type
!= TRE_BRACKET_MATCH_TYPE_RANGE_END
)
557 DPRINT(("tre_bracket_match: no following range end\n"));
566 ue
= __collate_equiv_value(tnfa
->loc
, &uc
, 1);
567 le
= __collate_equiv_value(tnfa
->loc
, &lc
, 1);
570 we
= __collate_equiv_value(tnfa
->loc
, &wc
, 1);
574 match
= ((start
<= ue
&& ue
<= end
) ||
575 (start
<= le
&& le
<= end
));
577 match
= (start
<= we
&& we
<= end
);
580 case TRE_BRACKET_MATCH_TYPE_RANGE_END
:
581 DPRINT(("tre_bracket_match: range end without preceeding start\n"));
584 case TRE_BRACKET_MATCH_TYPE_CLASS
:
586 match
= (tre_isctype_l(uc
, b
->value
, tnfa
->loc
) ||
587 tre_isctype_l(lc
, b
->value
, tnfa
->loc
));
589 match
= (tre_isctype_l(wc
, b
->value
, tnfa
->loc
));
591 case TRE_BRACKET_MATCH_TYPE_EQUIVALENCE
:
596 ue
= __collate_equiv_value(tnfa
->loc
, &uc
, 1);
597 le
= __collate_equiv_value(tnfa
->loc
, &lc
, 1);
600 we
= __collate_equiv_value(tnfa
->loc
, &wc
, 1);
604 match
= (b
->value
== ue
|| b
->value
== le
);
606 match
= (b
->value
== we
);
609 DPRINT(("tre_bracket_match: unknown type %d\n", b
->type
));
617 if (list
->flags
& TRE_BRACKET_MATCH_FLAG_NEGATE
)