]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/repattrn.cpp
ICU-491.11.1.tar.gz
[apple/icu.git] / icuSources / i18n / repattrn.cpp
CommitLineData
b75a7d8f 1//
46f4442e 2// file: repattrn.cpp
b75a7d8f
A
3//
4/*
5***************************************************************************
4388f060 6* Copyright (C) 2002-2012 International Business Machines Corporation *
b75a7d8f
A
7* and others. All rights reserved. *
8***************************************************************************
9*/
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_REGULAR_EXPRESSIONS
14
15#include "unicode/regex.h"
374ca955 16#include "unicode/uclean.h"
b75a7d8f
A
17#include "uassert.h"
18#include "uvector.h"
19#include "uvectr32.h"
729e4ab9 20#include "uvectr64.h"
b75a7d8f
A
21#include "regexcmp.h"
22#include "regeximp.h"
23#include "regexst.h"
24
25U_NAMESPACE_BEGIN
26
27//--------------------------------------------------------------------------
28//
29// RegexPattern Default Constructor
30//
31//--------------------------------------------------------------------------
32RegexPattern::RegexPattern() {
33 // Init all of this instances data.
34 init();
73c04bcf 35}
b75a7d8f
A
36
37
38//--------------------------------------------------------------------------
39//
40// Copy Constructor Note: This is a rather inefficient implementation,
41// but it probably doesn't matter.
42//
43//--------------------------------------------------------------------------
44RegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) {
46f4442e 45 init();
b75a7d8f
A
46 *this = other;
47}
48
49
50
51//--------------------------------------------------------------------------
52//
729e4ab9 53// Assignment Operator
b75a7d8f
A
54//
55//--------------------------------------------------------------------------
56RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
57 if (this == &other) {
58 // Source and destination are the same. Don't do anything.
59 return *this;
60 }
61
62 // Clean out any previous contents of object being assigned to.
63 zap();
64
65 // Give target object a default initialization
66 init();
67
68 // Copy simple fields
729e4ab9
A
69 if ( other.fPatternString == NULL ) {
70 fPatternString = NULL;
71 fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
72 } else {
73 fPatternString = new UnicodeString(*(other.fPatternString));
74 UErrorCode status = U_ZERO_ERROR;
75 fPattern = utext_openConstUnicodeString(NULL, fPatternString, &status);
76 if (U_FAILURE(status)) {
77 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
78 return *this;
79 }
80 }
b75a7d8f
A
81 fFlags = other.fFlags;
82 fLiteralText = other.fLiteralText;
83 fDeferredStatus = other.fDeferredStatus;
84 fMinMatchLen = other.fMinMatchLen;
374ca955
A
85 fFrameSize = other.fFrameSize;
86 fDataSize = other.fDataSize;
b75a7d8f 87 fMaxCaptureDigits = other.fMaxCaptureDigits;
46f4442e 88 fStaticSets = other.fStaticSets;
374ca955 89 fStaticSets8 = other.fStaticSets8;
46f4442e 90
b75a7d8f
A
91 fStartType = other.fStartType;
92 fInitialStringIdx = other.fInitialStringIdx;
93 fInitialStringLen = other.fInitialStringLen;
94 *fInitialChars = *other.fInitialChars;
b75a7d8f 95 fInitialChar = other.fInitialChar;
374ca955 96 *fInitialChars8 = *other.fInitialChars8;
729e4ab9 97 fNeedsAltInput = other.fNeedsAltInput;
b75a7d8f
A
98
99 // Copy the pattern. It's just values, nothing deep to copy.
100 fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
101 fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
102
46f4442e 103 // Copy the Unicode Sets.
b75a7d8f 104 // Could be made more efficient if the sets were reference counted and shared,
46f4442e 105 // but I doubt that pattern copying will be particularly common.
b75a7d8f
A
106 // Note: init() already added an empty element zero to fSets
107 int32_t i;
108 int32_t numSets = other.fSets->size();
109 fSets8 = new Regex8BitSet[numSets];
46f4442e
A
110 if (fSets8 == NULL) {
111 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
112 return *this;
113 }
b75a7d8f
A
114 for (i=1; i<numSets; i++) {
115 if (U_FAILURE(fDeferredStatus)) {
116 return *this;
117 }
118 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
119 UnicodeSet *newSet = new UnicodeSet(*sourceSet);
120 if (newSet == NULL) {
121 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
122 break;
123 }
124 fSets->addElement(newSet, fDeferredStatus);
125 fSets8[i] = other.fSets8[i];
126 }
127
128 return *this;
129}
130
131
132//--------------------------------------------------------------------------
133//
134// init Shared initialization for use by constructors.
135// Bring an uninitialized RegexPattern up to a default state.
136//
137//--------------------------------------------------------------------------
138void RegexPattern::init() {
139 fFlags = 0;
374ca955
A
140 fCompiledPat = 0;
141 fLiteralText.remove();
142 fSets = NULL;
143 fSets8 = NULL;
b75a7d8f
A
144 fDeferredStatus = U_ZERO_ERROR;
145 fMinMatchLen = 0;
b75a7d8f
A
146 fFrameSize = 0;
147 fDataSize = 0;
374ca955 148 fGroupMap = NULL;
46f4442e 149 fMaxCaptureDigits = 1;
374ca955
A
150 fStaticSets = NULL;
151 fStaticSets8 = NULL;
b75a7d8f
A
152 fStartType = START_NO_INFO;
153 fInitialStringIdx = 0;
154 fInitialStringLen = 0;
155 fInitialChars = NULL;
b75a7d8f 156 fInitialChar = 0;
374ca955 157 fInitialChars8 = NULL;
729e4ab9 158 fNeedsAltInput = FALSE;
46f4442e 159
729e4ab9
A
160 fPattern = NULL; // will be set later
161 fPatternString = NULL; // may be set later
162 fCompiledPat = new UVector64(fDeferredStatus);
b75a7d8f
A
163 fGroupMap = new UVector32(fDeferredStatus);
164 fSets = new UVector(fDeferredStatus);
165 fInitialChars = new UnicodeSet;
166 fInitialChars8 = new Regex8BitSet;
167 if (U_FAILURE(fDeferredStatus)) {
168 return;
169 }
170 if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL ||
171 fInitialChars == NULL || fInitialChars8 == NULL) {
172 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
173 return;
174 }
175
176 // Slot zero of the vector of sets is reserved. Fill it here.
177 fSets->addElement((int32_t)0, fDeferredStatus);
178}
179
180
181//--------------------------------------------------------------------------
182//
46f4442e 183// zap Delete everything owned by this RegexPattern.
b75a7d8f
A
184//
185//--------------------------------------------------------------------------
186void RegexPattern::zap() {
187 delete fCompiledPat;
188 fCompiledPat = NULL;
189 int i;
190 for (i=1; i<fSets->size(); i++) {
191 UnicodeSet *s;
192 s = (UnicodeSet *)fSets->elementAt(i);
193 if (s != NULL) {
194 delete s;
195 }
196 }
197 delete fSets;
198 fSets = NULL;
374ca955
A
199 delete[] fSets8;
200 fSets8 = NULL;
b75a7d8f
A
201 delete fGroupMap;
202 fGroupMap = NULL;
203 delete fInitialChars;
204 fInitialChars = NULL;
205 delete fInitialChars8;
206 fInitialChars8 = NULL;
729e4ab9
A
207 if (fPattern != NULL) {
208 utext_close(fPattern);
209 fPattern = NULL;
210 }
211 if (fPatternString != NULL) {
212 delete fPatternString;
213 fPatternString = NULL;
214 }
b75a7d8f
A
215}
216
217
218//--------------------------------------------------------------------------
219//
220// Destructor
221//
222//--------------------------------------------------------------------------
223RegexPattern::~RegexPattern() {
224 zap();
73c04bcf 225}
b75a7d8f
A
226
227
228//--------------------------------------------------------------------------
229//
230// Clone
231//
232//--------------------------------------------------------------------------
46f4442e 233RegexPattern *RegexPattern::clone() const {
b75a7d8f
A
234 RegexPattern *copy = new RegexPattern(*this);
235 return copy;
73c04bcf 236}
b75a7d8f
A
237
238
239//--------------------------------------------------------------------------
240//
241// operator == (comparison) Consider to patterns to be == if the
242// pattern strings and the flags are the same.
729e4ab9
A
243// Note that pattern strings with the same
244// characters can still be considered different.
b75a7d8f
A
245//
246//--------------------------------------------------------------------------
247UBool RegexPattern::operator ==(const RegexPattern &other) const {
729e4ab9
A
248 if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
249 if (this->fPatternString != NULL && other.fPatternString != NULL) {
250 return *(this->fPatternString) == *(other.fPatternString);
251 } else if (this->fPattern == NULL) {
252 if (other.fPattern == NULL) {
253 return TRUE;
254 }
255 } else if (other.fPattern != NULL) {
256 UTEXT_SETNATIVEINDEX(this->fPattern, 0);
257 UTEXT_SETNATIVEINDEX(other.fPattern, 0);
258 return utext_equals(this->fPattern, other.fPattern);
259 }
260 }
261 return FALSE;
b75a7d8f
A
262}
263
264//---------------------------------------------------------------------
265//
46f4442e 266// compile
b75a7d8f
A
267//
268//---------------------------------------------------------------------
374ca955
A
269RegexPattern * U_EXPORT2
270RegexPattern::compile(const UnicodeString &regex,
271 uint32_t flags,
272 UParseError &pe,
273 UErrorCode &status)
274{
729e4ab9
A
275 if (U_FAILURE(status)) {
276 return NULL;
277 }
278
279 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
280 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
281 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
282
283 if ((flags & ~allFlags) != 0) {
284 status = U_REGEX_INVALID_FLAG;
285 return NULL;
286 }
287
4388f060 288 if ((flags & UREGEX_CANON_EQ) != 0) {
729e4ab9
A
289 status = U_REGEX_UNIMPLEMENTED;
290 return NULL;
291 }
292
293 RegexPattern *This = new RegexPattern;
294 if (This == NULL) {
295 status = U_MEMORY_ALLOCATION_ERROR;
296 return NULL;
297 }
298 if (U_FAILURE(This->fDeferredStatus)) {
299 status = This->fDeferredStatus;
300 delete This;
301 return NULL;
302 }
303 This->fFlags = flags;
304
305 RegexCompile compiler(This, status);
306 compiler.compile(regex, pe, status);
307
308 if (U_FAILURE(status)) {
309 delete This;
310 This = NULL;
311 }
312
313 return This;
314}
315
b75a7d8f 316
729e4ab9
A
317//
318// compile, UText mode
319//
320RegexPattern * U_EXPORT2
321RegexPattern::compile(UText *regex,
322 uint32_t flags,
323 UParseError &pe,
324 UErrorCode &status)
325{
b75a7d8f
A
326 if (U_FAILURE(status)) {
327 return NULL;
328 }
329
330 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
46f4442e 331 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
729e4ab9 332 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
b75a7d8f
A
333
334 if ((flags & ~allFlags) != 0) {
335 status = U_REGEX_INVALID_FLAG;
336 return NULL;
337 }
338
4388f060 339 if ((flags & UREGEX_CANON_EQ) != 0) {
b75a7d8f
A
340 status = U_REGEX_UNIMPLEMENTED;
341 return NULL;
342 }
343
344 RegexPattern *This = new RegexPattern;
345 if (This == NULL) {
346 status = U_MEMORY_ALLOCATION_ERROR;
347 return NULL;
348 }
349 if (U_FAILURE(This->fDeferredStatus)) {
350 status = This->fDeferredStatus;
46f4442e 351 delete This;
b75a7d8f
A
352 return NULL;
353 }
354 This->fFlags = flags;
355
356 RegexCompile compiler(This, status);
357 compiler.compile(regex, pe, status);
46f4442e
A
358
359 if (U_FAILURE(status)) {
360 delete This;
361 This = NULL;
362 }
b75a7d8f
A
363
364 return This;
73c04bcf 365}
46f4442e 366
b75a7d8f
A
367//
368// compile with default flags.
369//
374ca955
A
370RegexPattern * U_EXPORT2
371RegexPattern::compile(const UnicodeString &regex,
372 UParseError &pe,
46f4442e 373 UErrorCode &err)
b75a7d8f 374{
46f4442e 375 return compile(regex, 0, pe, err);
b75a7d8f
A
376}
377
378
729e4ab9
A
379//
380// compile with default flags, UText mode
381//
382RegexPattern * U_EXPORT2
383RegexPattern::compile(UText *regex,
384 UParseError &pe,
385 UErrorCode &err)
386{
387 return compile(regex, 0, pe, err);
388}
389
b75a7d8f
A
390
391//
392// compile with no UParseErr parameter.
393//
374ca955 394RegexPattern * U_EXPORT2
729e4ab9
A
395RegexPattern::compile(const UnicodeString &regex,
396 uint32_t flags,
397 UErrorCode &err)
b75a7d8f
A
398{
399 UParseError pe;
46f4442e 400 return compile(regex, flags, pe, err);
b75a7d8f
A
401}
402
403
729e4ab9
A
404//
405// compile with no UParseErr parameter, UText mode
406//
407RegexPattern * U_EXPORT2
408RegexPattern::compile(UText *regex,
409 uint32_t flags,
410 UErrorCode &err)
411{
412 UParseError pe;
413 return compile(regex, flags, pe, err);
414}
415
b75a7d8f
A
416
417//---------------------------------------------------------------------
418//
419// flags
420//
421//---------------------------------------------------------------------
422uint32_t RegexPattern::flags() const {
423 return fFlags;
424}
425
426
427//---------------------------------------------------------------------
428//
429// matcher(UnicodeString, err)
430//
431//---------------------------------------------------------------------
432RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
433 UErrorCode &status) const {
434 RegexMatcher *retMatcher = matcher(status);
435 if (retMatcher != NULL) {
729e4ab9
A
436 retMatcher->fDeferredStatus = status;
437 retMatcher->reset(input);
438 }
439 return retMatcher;
440}
441
b75a7d8f
A
442
443//---------------------------------------------------------------------
444//
445// matcher(status)
446//
447//---------------------------------------------------------------------
448RegexMatcher *RegexPattern::matcher(UErrorCode &status) const {
449 RegexMatcher *retMatcher = NULL;
450
451 if (U_FAILURE(status)) {
452 return NULL;
453 }
454 if (U_FAILURE(fDeferredStatus)) {
455 status = fDeferredStatus;
456 return NULL;
457 }
458
46f4442e 459 retMatcher = new RegexMatcher(this);
b75a7d8f
A
460 if (retMatcher == NULL) {
461 status = U_MEMORY_ALLOCATION_ERROR;
462 return NULL;
463 }
464 return retMatcher;
73c04bcf 465}
b75a7d8f
A
466
467
468
469//---------------------------------------------------------------------
470//
471// matches Convenience function to test for a match, starting
472// with a pattern string and a data string.
473//
474//---------------------------------------------------------------------
374ca955 475UBool U_EXPORT2 RegexPattern::matches(const UnicodeString &regex,
b75a7d8f
A
476 const UnicodeString &input,
477 UParseError &pe,
478 UErrorCode &status) {
479
480 if (U_FAILURE(status)) {return FALSE;}
481
482 UBool retVal;
483 RegexPattern *pat = NULL;
484 RegexMatcher *matcher = NULL;
485
486 pat = RegexPattern::compile(regex, 0, pe, status);
487 matcher = pat->matcher(input, status);
488 retVal = matcher->matches(status);
489
490 delete matcher;
491 delete pat;
492 return retVal;
493}
494
495
729e4ab9
A
496//
497// matches, UText mode
498//
499UBool U_EXPORT2 RegexPattern::matches(UText *regex,
500 UText *input,
501 UParseError &pe,
502 UErrorCode &status) {
503
504 if (U_FAILURE(status)) {return FALSE;}
505
4388f060 506 UBool retVal = FALSE;
729e4ab9
A
507 RegexPattern *pat = NULL;
508 RegexMatcher *matcher = NULL;
509
510 pat = RegexPattern::compile(regex, 0, pe, status);
4388f060
A
511 matcher = pat->matcher(status);
512 if (U_SUCCESS(status)) {
513 matcher->reset(input);
514 retVal = matcher->matches(status);
515 }
729e4ab9
A
516
517 delete matcher;
518 delete pat;
519 return retVal;
520}
521
522
523
b75a7d8f
A
524
525
526//---------------------------------------------------------------------
527//
528// pattern
529//
530//---------------------------------------------------------------------
531UnicodeString RegexPattern::pattern() const {
729e4ab9
A
532 if (fPatternString != NULL) {
533 return *fPatternString;
534 } else if (fPattern == NULL) {
535 return UnicodeString();
536 } else {
537 UErrorCode status = U_ZERO_ERROR;
538 int64_t nativeLen = utext_nativeLength(fPattern);
539 int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
540 UnicodeString result;
541
542 status = U_ZERO_ERROR;
543 UChar *resultChars = result.getBuffer(len16);
544 utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
545 result.releaseBuffer(len16);
546
547 return result;
548 }
b75a7d8f
A
549}
550
551
552
553
729e4ab9
A
554//---------------------------------------------------------------------
555//
556// patternText
557//
558//---------------------------------------------------------------------
559UText *RegexPattern::patternText(UErrorCode &status) const {
560 if (U_FAILURE(status)) {return NULL;}
561 status = U_ZERO_ERROR;
562
563 if (fPattern != NULL) {
564 return fPattern;
565 } else {
566 RegexStaticSets::initGlobals(&status);
567 return RegexStaticSets::gStaticSets->fEmptyText;
568 }
569}
570
571
572
b75a7d8f
A
573//---------------------------------------------------------------------
574//
575// split
576//
577//---------------------------------------------------------------------
578int32_t RegexPattern::split(const UnicodeString &input,
579 UnicodeString dest[],
580 int32_t destCapacity,
729e4ab9
A
581 UErrorCode &status) const
582{
583 if (U_FAILURE(status)) {
584 return 0;
585 };
586
587 RegexMatcher m(this);
588 int32_t r = 0;
589 // Check m's status to make sure all is ok.
590 if (U_SUCCESS(m.fDeferredStatus)) {
591 r = m.split(input, dest, destCapacity, status);
592 }
593 return r;
594}
595
596//
597// split, UText mode
598//
599int32_t RegexPattern::split(UText *input,
600 UText *dest[],
601 int32_t destCapacity,
602 UErrorCode &status) const
b75a7d8f
A
603{
604 if (U_FAILURE(status)) {
605 return 0;
606 };
607
608 RegexMatcher m(this);
46f4442e
A
609 int32_t r = 0;
610 // Check m's status to make sure all is ok.
611 if (U_SUCCESS(m.fDeferredStatus)) {
612 r = m.split(input, dest, destCapacity, status);
613 }
b75a7d8f
A
614 return r;
615}
616
617
618
619//---------------------------------------------------------------------
620//
621// dump Output the compiled form of the pattern.
622// Debugging function only.
623//
624//---------------------------------------------------------------------
b75a7d8f 625#if defined(REGEX_DEBUG)
374ca955 626void RegexPattern::dumpOp(int32_t index) const {
b75a7d8f
A
627 static const char * const opNames[] = {URX_OPCODE_NAMES};
628 int32_t op = fCompiledPat->elementAti(index);
629 int32_t val = URX_VAL(op);
630 int32_t type = URX_TYPE(op);
631 int32_t pinnedType = type;
46f4442e 632 if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
b75a7d8f
A
633 pinnedType = 0;
634 }
46f4442e 635
374ca955 636 REGEX_DUMP_DEBUG_PRINTF(("%4d %08x %-15s ", index, op, opNames[pinnedType]));
b75a7d8f
A
637 switch (type) {
638 case URX_NOP:
639 case URX_DOTANY:
640 case URX_DOTANY_ALL:
b75a7d8f
A
641 case URX_FAIL:
642 case URX_CARET:
643 case URX_DOLLAR:
644 case URX_BACKSLASH_G:
645 case URX_BACKSLASH_X:
646 case URX_END:
647 case URX_DOLLAR_M:
648 case URX_CARET_M:
649 // Types with no operand field of interest.
650 break;
46f4442e 651
b75a7d8f
A
652 case URX_RESERVED_OP:
653 case URX_START_CAPTURE:
654 case URX_END_CAPTURE:
655 case URX_STATE_SAVE:
656 case URX_JMP:
657 case URX_JMP_SAV:
658 case URX_JMP_SAV_X:
659 case URX_BACKSLASH_B:
374ca955 660 case URX_BACKSLASH_BU:
b75a7d8f
A
661 case URX_BACKSLASH_D:
662 case URX_BACKSLASH_Z:
663 case URX_STRING_LEN:
664 case URX_CTR_INIT:
665 case URX_CTR_INIT_NG:
666 case URX_CTR_LOOP:
667 case URX_CTR_LOOP_NG:
668 case URX_RELOC_OPRND:
669 case URX_STO_SP:
670 case URX_LD_SP:
671 case URX_BACKREF:
672 case URX_STO_INP_LOC:
673 case URX_JMPX:
674 case URX_LA_START:
675 case URX_LA_END:
676 case URX_BACKREF_I:
677 case URX_LB_START:
678 case URX_LB_CONT:
679 case URX_LB_END:
680 case URX_LBN_CONT:
681 case URX_LBN_END:
682 case URX_LOOP_C:
683 case URX_LOOP_DOT_I:
684 // types with an integer operand field.
374ca955 685 REGEX_DUMP_DEBUG_PRINTF(("%d", val));
b75a7d8f 686 break;
46f4442e 687
b75a7d8f
A
688 case URX_ONECHAR:
689 case URX_ONECHAR_I:
374ca955 690 REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?'));
b75a7d8f 691 break;
46f4442e 692
b75a7d8f
A
693 case URX_STRING:
694 case URX_STRING_I:
695 {
696 int32_t lengthOp = fCompiledPat->elementAti(index+1);
697 U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
698 int32_t length = URX_VAL(lengthOp);
699 int32_t i;
700 for (i=val; i<val+length; i++) {
701 UChar c = fLiteralText[i];
702 if (c < 32 || c >= 256) {c = '.';}
374ca955 703 REGEX_DUMP_DEBUG_PRINTF(("%c", c));
b75a7d8f
A
704 }
705 }
706 break;
707
708 case URX_SETREF:
709 case URX_LOOP_SR_I:
710 {
711 UnicodeString s;
712 UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
713 set->toPattern(s, TRUE);
714 for (int32_t i=0; i<s.length(); i++) {
374ca955 715 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
b75a7d8f
A
716 }
717 }
718 break;
719
720 case URX_STATIC_SETREF:
721 case URX_STAT_SETREF_N:
722 {
723 UnicodeString s;
724 if (val & URX_NEG_SET) {
374ca955 725 REGEX_DUMP_DEBUG_PRINTF(("NOT "));
b75a7d8f
A
726 val &= ~URX_NEG_SET;
727 }
728 UnicodeSet *set = fStaticSets[val];
729 set->toPattern(s, TRUE);
730 for (int32_t i=0; i<s.length(); i++) {
374ca955 731 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
b75a7d8f
A
732 }
733 }
734 break;
735
46f4442e 736
b75a7d8f 737 default:
374ca955 738 REGEX_DUMP_DEBUG_PRINTF(("??????"));
b75a7d8f
A
739 break;
740 }
374ca955 741 REGEX_DUMP_DEBUG_PRINTF(("\n"));
b75a7d8f 742}
374ca955 743#endif
b75a7d8f
A
744
745
b75a7d8f 746#if defined(REGEX_DEBUG)
46f4442e 747U_CAPI void U_EXPORT2
374ca955 748RegexPatternDump(const RegexPattern *This) {
b75a7d8f
A
749 int index;
750 int i;
751
374ca955 752 REGEX_DUMP_DEBUG_PRINTF(("Original Pattern: "));
729e4ab9
A
753 UChar32 c = utext_next32From(This->fPattern, 0);
754 while (c != U_SENTINEL) {
755 if (c<32 || c>256) {
756 c = '.';
757 }
758 REGEX_DUMP_DEBUG_PRINTF(("%c", c));
759
760 c = UTEXT_NEXT32(This->fPattern);
b75a7d8f 761 }
374ca955
A
762 REGEX_DUMP_DEBUG_PRINTF(("\n"));
763 REGEX_DUMP_DEBUG_PRINTF((" Min Match Length: %d\n", This->fMinMatchLen));
46f4442e 764 REGEX_DUMP_DEBUG_PRINTF((" Match Start Type: %s\n", START_OF_MATCH_STR(This->fStartType)));
374ca955 765 if (This->fStartType == START_STRING) {
729e4ab9 766 REGEX_DUMP_DEBUG_PRINTF((" Initial match string: \""));
374ca955
A
767 for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) {
768 REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i])); // TODO: non-printables, surrogates.
b75a7d8f 769 }
729e4ab9 770 REGEX_DUMP_DEBUG_PRINTF(("\"\n"));
b75a7d8f 771
374ca955
A
772 } else if (This->fStartType == START_SET) {
773 int32_t numSetChars = This->fInitialChars->size();
b75a7d8f
A
774 if (numSetChars > 20) {
775 numSetChars = 20;
776 }
374ca955 777 REGEX_DUMP_DEBUG_PRINTF((" Match First Chars : "));
b75a7d8f 778 for (i=0; i<numSetChars; i++) {
374ca955 779 UChar32 c = This->fInitialChars->charAt(i);
46f4442e 780 if (0x20<c && c <0x7e) {
374ca955 781 REGEX_DUMP_DEBUG_PRINTF(("%c ", c));
b75a7d8f 782 } else {
374ca955 783 REGEX_DUMP_DEBUG_PRINTF(("%#x ", c));
b75a7d8f
A
784 }
785 }
374ca955
A
786 if (numSetChars < This->fInitialChars->size()) {
787 REGEX_DUMP_DEBUG_PRINTF((" ..."));
b75a7d8f 788 }
374ca955 789 REGEX_DUMP_DEBUG_PRINTF(("\n"));
b75a7d8f 790
374ca955
A
791 } else if (This->fStartType == START_CHAR) {
792 REGEX_DUMP_DEBUG_PRINTF((" First char of Match : "));
793 if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) {
794 REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar));
b75a7d8f 795 } else {
374ca955 796 REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar));
b75a7d8f
A
797 }
798 }
799
374ca955
A
800 REGEX_DUMP_DEBUG_PRINTF(("\nIndex Binary Type Operand\n" \
801 "-------------------------------------------\n"));
802 for (index = 0; index<This->fCompiledPat->size(); index++) {
803 This->dumpOp(index);
b75a7d8f 804 }
374ca955 805 REGEX_DUMP_DEBUG_PRINTF(("\n\n"));
46f4442e 806}
374ca955 807#endif
b75a7d8f
A
808
809
810
374ca955 811UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
b75a7d8f
A
812
813U_NAMESPACE_END
814#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS