]> git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/repattrn.cpp
ICU-57149.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / repattrn.cpp
1 //
2 // file: repattrn.cpp
3 //
4 /*
5 ***************************************************************************
6 * Copyright (C) 2002-2016 International Business Machines Corporation
7 * and others. All rights reserved.
8 ***************************************************************************
9 */
10
11 #include "unicode/utypes.h"
12
13 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
14
15 #include "unicode/regex.h"
16 #include "unicode/uclean.h"
17 #include "cmemory.h"
18 #include "cstr.h"
19 #include "uassert.h"
20 #include "uhash.h"
21 #include "uvector.h"
22 #include "uvectr32.h"
23 #include "uvectr64.h"
24 #include "regexcmp.h"
25 #include "regeximp.h"
26 #include "regexst.h"
27
28 U_NAMESPACE_BEGIN
29
30 //--------------------------------------------------------------------------
31 //
32 // RegexPattern Default Constructor
33 //
34 //--------------------------------------------------------------------------
35 RegexPattern::RegexPattern() {
36 // Init all of this instances data.
37 init();
38 }
39
40
41 //--------------------------------------------------------------------------
42 //
43 // Copy Constructor Note: This is a rather inefficient implementation,
44 // but it probably doesn't matter.
45 //
46 //--------------------------------------------------------------------------
47 RegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) {
48 init();
49 *this = other;
50 }
51
52
53
54 //--------------------------------------------------------------------------
55 //
56 // Assignment Operator
57 //
58 //--------------------------------------------------------------------------
59 RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
60 if (this == &other) {
61 // Source and destination are the same. Don't do anything.
62 return *this;
63 }
64
65 // Clean out any previous contents of object being assigned to.
66 zap();
67
68 // Give target object a default initialization
69 init();
70
71 // Copy simple fields
72 fDeferredStatus = other.fDeferredStatus;
73
74 if (U_FAILURE(fDeferredStatus)) {
75 return *this;
76 }
77
78 if (other.fPatternString == NULL) {
79 fPatternString = NULL;
80 fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
81 } else {
82 fPatternString = new UnicodeString(*(other.fPatternString));
83 if (fPatternString == NULL) {
84 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
85 } else {
86 fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDeferredStatus);
87 }
88 }
89 if (U_FAILURE(fDeferredStatus)) {
90 return *this;
91 }
92
93 fFlags = other.fFlags;
94 fLiteralText = other.fLiteralText;
95 fMinMatchLen = other.fMinMatchLen;
96 fFrameSize = other.fFrameSize;
97 fDataSize = other.fDataSize;
98 fStaticSets = other.fStaticSets;
99 fStaticSets8 = other.fStaticSets8;
100
101 fStartType = other.fStartType;
102 fInitialStringIdx = other.fInitialStringIdx;
103 fInitialStringLen = other.fInitialStringLen;
104 *fInitialChars = *other.fInitialChars;
105 fInitialChar = other.fInitialChar;
106 *fInitialChars8 = *other.fInitialChars8;
107 fNeedsAltInput = other.fNeedsAltInput;
108
109 // Copy the pattern. It's just values, nothing deep to copy.
110 fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
111 fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
112
113 // Copy the Unicode Sets.
114 // Could be made more efficient if the sets were reference counted and shared,
115 // but I doubt that pattern copying will be particularly common.
116 // Note: init() already added an empty element zero to fSets
117 int32_t i;
118 int32_t numSets = other.fSets->size();
119 fSets8 = new Regex8BitSet[numSets];
120 if (fSets8 == NULL) {
121 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
122 return *this;
123 }
124 for (i=1; i<numSets; i++) {
125 if (U_FAILURE(fDeferredStatus)) {
126 return *this;
127 }
128 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
129 UnicodeSet *newSet = new UnicodeSet(*sourceSet);
130 if (newSet == NULL) {
131 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
132 break;
133 }
134 fSets->addElement(newSet, fDeferredStatus);
135 fSets8[i] = other.fSets8[i];
136 }
137
138 // Copy the named capture group hash map.
139 int32_t hashPos = UHASH_FIRST;
140 while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) {
141 if (U_FAILURE(fDeferredStatus)) {
142 break;
143 }
144 const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer;
145 UnicodeString *key = new UnicodeString(*name);
146 int32_t val = hashEl->value.integer;
147 if (key == NULL) {
148 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
149 } else {
150 uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus);
151 }
152 }
153 return *this;
154 }
155
156
157 //--------------------------------------------------------------------------
158 //
159 // init Shared initialization for use by constructors.
160 // Bring an uninitialized RegexPattern up to a default state.
161 //
162 //--------------------------------------------------------------------------
163 void RegexPattern::init() {
164 fFlags = 0;
165 fCompiledPat = 0;
166 fLiteralText.remove();
167 fSets = NULL;
168 fSets8 = NULL;
169 fDeferredStatus = U_ZERO_ERROR;
170 fMinMatchLen = 0;
171 fFrameSize = 0;
172 fDataSize = 0;
173 fGroupMap = NULL;
174 fStaticSets = NULL;
175 fStaticSets8 = NULL;
176 fStartType = START_NO_INFO;
177 fInitialStringIdx = 0;
178 fInitialStringLen = 0;
179 fInitialChars = NULL;
180 fInitialChar = 0;
181 fInitialChars8 = NULL;
182 fNeedsAltInput = FALSE;
183 fNamedCaptureMap = NULL;
184
185 fPattern = NULL; // will be set later
186 fPatternString = NULL; // may be set later
187 fCompiledPat = new UVector64(fDeferredStatus);
188 fGroupMap = new UVector32(fDeferredStatus);
189 fSets = new UVector(fDeferredStatus);
190 fInitialChars = new UnicodeSet;
191 fInitialChars8 = new Regex8BitSet;
192 fNamedCaptureMap = uhash_open(uhash_hashUnicodeString, // Key hash function
193 uhash_compareUnicodeString, // Key comparator function
194 uhash_compareLong, // Value comparator function
195 &fDeferredStatus);
196 if (U_FAILURE(fDeferredStatus)) {
197 return;
198 }
199 if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL ||
200 fInitialChars == NULL || fInitialChars8 == NULL || fNamedCaptureMap == NULL) {
201 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
202 return;
203 }
204
205 // Slot zero of the vector of sets is reserved. Fill it here.
206 fSets->addElement((int32_t)0, fDeferredStatus);
207
208 // fNamedCaptureMap owns its key strings, type (UnicodeString *)
209 uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject);
210 }
211
212
213 //--------------------------------------------------------------------------
214 //
215 // zap Delete everything owned by this RegexPattern.
216 //
217 //--------------------------------------------------------------------------
218 void RegexPattern::zap() {
219 delete fCompiledPat;
220 fCompiledPat = NULL;
221 int i;
222 for (i=1; i<fSets->size(); i++) {
223 UnicodeSet *s;
224 s = (UnicodeSet *)fSets->elementAt(i);
225 if (s != NULL) {
226 delete s;
227 }
228 }
229 delete fSets;
230 fSets = NULL;
231 delete[] fSets8;
232 fSets8 = NULL;
233 delete fGroupMap;
234 fGroupMap = NULL;
235 delete fInitialChars;
236 fInitialChars = NULL;
237 delete fInitialChars8;
238 fInitialChars8 = NULL;
239 if (fPattern != NULL) {
240 utext_close(fPattern);
241 fPattern = NULL;
242 }
243 if (fPatternString != NULL) {
244 delete fPatternString;
245 fPatternString = NULL;
246 }
247 uhash_close(fNamedCaptureMap);
248 fNamedCaptureMap = NULL;
249 }
250
251
252 //--------------------------------------------------------------------------
253 //
254 // Destructor
255 //
256 //--------------------------------------------------------------------------
257 RegexPattern::~RegexPattern() {
258 zap();
259 }
260
261
262 //--------------------------------------------------------------------------
263 //
264 // Clone
265 //
266 //--------------------------------------------------------------------------
267 RegexPattern *RegexPattern::clone() const {
268 RegexPattern *copy = new RegexPattern(*this);
269 return copy;
270 }
271
272
273 //--------------------------------------------------------------------------
274 //
275 // operator == (comparison) Consider to patterns to be == if the
276 // pattern strings and the flags are the same.
277 // Note that pattern strings with the same
278 // characters can still be considered different.
279 //
280 //--------------------------------------------------------------------------
281 UBool RegexPattern::operator ==(const RegexPattern &other) const {
282 if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
283 if (this->fPatternString != NULL && other.fPatternString != NULL) {
284 return *(this->fPatternString) == *(other.fPatternString);
285 } else if (this->fPattern == NULL) {
286 if (other.fPattern == NULL) {
287 return TRUE;
288 }
289 } else if (other.fPattern != NULL) {
290 UTEXT_SETNATIVEINDEX(this->fPattern, 0);
291 UTEXT_SETNATIVEINDEX(other.fPattern, 0);
292 return utext_equals(this->fPattern, other.fPattern);
293 }
294 }
295 return FALSE;
296 }
297
298 //---------------------------------------------------------------------
299 //
300 // compile
301 //
302 //---------------------------------------------------------------------
303 RegexPattern * U_EXPORT2
304 RegexPattern::compile(const UnicodeString &regex,
305 uint32_t flags,
306 UParseError &pe,
307 UErrorCode &status)
308 {
309 if (U_FAILURE(status)) {
310 return NULL;
311 }
312
313 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
314 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
315 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
316
317 if ((flags & ~allFlags) != 0) {
318 status = U_REGEX_INVALID_FLAG;
319 return NULL;
320 }
321
322 if ((flags & UREGEX_CANON_EQ) != 0) {
323 status = U_REGEX_UNIMPLEMENTED;
324 return NULL;
325 }
326
327 RegexPattern *This = new RegexPattern;
328 if (This == NULL) {
329 status = U_MEMORY_ALLOCATION_ERROR;
330 return NULL;
331 }
332 if (U_FAILURE(This->fDeferredStatus)) {
333 status = This->fDeferredStatus;
334 delete This;
335 return NULL;
336 }
337 This->fFlags = flags;
338
339 RegexCompile compiler(This, status);
340 compiler.compile(regex, pe, status);
341
342 if (U_FAILURE(status)) {
343 delete This;
344 This = NULL;
345 }
346
347 return This;
348 }
349
350
351 //
352 // compile, UText mode
353 //
354 RegexPattern * U_EXPORT2
355 RegexPattern::compile(UText *regex,
356 uint32_t flags,
357 UParseError &pe,
358 UErrorCode &status)
359 {
360 if (U_FAILURE(status)) {
361 return NULL;
362 }
363
364 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
365 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
366 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
367
368 if ((flags & ~allFlags) != 0) {
369 status = U_REGEX_INVALID_FLAG;
370 return NULL;
371 }
372
373 if ((flags & UREGEX_CANON_EQ) != 0) {
374 status = U_REGEX_UNIMPLEMENTED;
375 return NULL;
376 }
377
378 RegexPattern *This = new RegexPattern;
379 if (This == NULL) {
380 status = U_MEMORY_ALLOCATION_ERROR;
381 return NULL;
382 }
383 if (U_FAILURE(This->fDeferredStatus)) {
384 status = This->fDeferredStatus;
385 delete This;
386 return NULL;
387 }
388 This->fFlags = flags;
389
390 RegexCompile compiler(This, status);
391 compiler.compile(regex, pe, status);
392
393 if (U_FAILURE(status)) {
394 delete This;
395 This = NULL;
396 }
397
398 return This;
399 }
400
401 //
402 // compile with default flags.
403 //
404 RegexPattern * U_EXPORT2
405 RegexPattern::compile(const UnicodeString &regex,
406 UParseError &pe,
407 UErrorCode &err)
408 {
409 return compile(regex, 0, pe, err);
410 }
411
412
413 //
414 // compile with default flags, UText mode
415 //
416 RegexPattern * U_EXPORT2
417 RegexPattern::compile(UText *regex,
418 UParseError &pe,
419 UErrorCode &err)
420 {
421 return compile(regex, 0, pe, err);
422 }
423
424
425 //
426 // compile with no UParseErr parameter.
427 //
428 RegexPattern * U_EXPORT2
429 RegexPattern::compile(const UnicodeString &regex,
430 uint32_t flags,
431 UErrorCode &err)
432 {
433 UParseError pe;
434 return compile(regex, flags, pe, err);
435 }
436
437
438 //
439 // compile with no UParseErr parameter, UText mode
440 //
441 RegexPattern * U_EXPORT2
442 RegexPattern::compile(UText *regex,
443 uint32_t flags,
444 UErrorCode &err)
445 {
446 UParseError pe;
447 return compile(regex, flags, pe, err);
448 }
449
450
451 //---------------------------------------------------------------------
452 //
453 // flags
454 //
455 //---------------------------------------------------------------------
456 uint32_t RegexPattern::flags() const {
457 return fFlags;
458 }
459
460
461 //---------------------------------------------------------------------
462 //
463 // matcher(UnicodeString, err)
464 //
465 //---------------------------------------------------------------------
466 RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
467 UErrorCode &status) const {
468 RegexMatcher *retMatcher = matcher(status);
469 if (retMatcher != NULL) {
470 retMatcher->fDeferredStatus = status;
471 retMatcher->reset(input);
472 }
473 return retMatcher;
474 }
475
476
477 //---------------------------------------------------------------------
478 //
479 // matcher(status)
480 //
481 //---------------------------------------------------------------------
482 RegexMatcher *RegexPattern::matcher(UErrorCode &status) const {
483 RegexMatcher *retMatcher = NULL;
484
485 if (U_FAILURE(status)) {
486 return NULL;
487 }
488 if (U_FAILURE(fDeferredStatus)) {
489 status = fDeferredStatus;
490 return NULL;
491 }
492
493 retMatcher = new RegexMatcher(this);
494 if (retMatcher == NULL) {
495 status = U_MEMORY_ALLOCATION_ERROR;
496 return NULL;
497 }
498 return retMatcher;
499 }
500
501
502
503 //---------------------------------------------------------------------
504 //
505 // matches Convenience function to test for a match, starting
506 // with a pattern string and a data string.
507 //
508 //---------------------------------------------------------------------
509 UBool U_EXPORT2 RegexPattern::matches(const UnicodeString &regex,
510 const UnicodeString &input,
511 UParseError &pe,
512 UErrorCode &status) {
513
514 if (U_FAILURE(status)) {return FALSE;}
515
516 UBool retVal;
517 RegexPattern *pat = NULL;
518 RegexMatcher *matcher = NULL;
519
520 pat = RegexPattern::compile(regex, 0, pe, status);
521 matcher = pat->matcher(input, status);
522 retVal = matcher->matches(status);
523
524 delete matcher;
525 delete pat;
526 return retVal;
527 }
528
529
530 //
531 // matches, UText mode
532 //
533 UBool U_EXPORT2 RegexPattern::matches(UText *regex,
534 UText *input,
535 UParseError &pe,
536 UErrorCode &status) {
537
538 if (U_FAILURE(status)) {return FALSE;}
539
540 UBool retVal = FALSE;
541 RegexPattern *pat = NULL;
542 RegexMatcher *matcher = NULL;
543
544 pat = RegexPattern::compile(regex, 0, pe, status);
545 matcher = pat->matcher(status);
546 if (U_SUCCESS(status)) {
547 matcher->reset(input);
548 retVal = matcher->matches(status);
549 }
550
551 delete matcher;
552 delete pat;
553 return retVal;
554 }
555
556
557
558
559
560 //---------------------------------------------------------------------
561 //
562 // pattern
563 //
564 //---------------------------------------------------------------------
565 UnicodeString RegexPattern::pattern() const {
566 if (fPatternString != NULL) {
567 return *fPatternString;
568 } else if (fPattern == NULL) {
569 return UnicodeString();
570 } else {
571 UErrorCode status = U_ZERO_ERROR;
572 int64_t nativeLen = utext_nativeLength(fPattern);
573 int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
574 UnicodeString result;
575
576 status = U_ZERO_ERROR;
577 UChar *resultChars = result.getBuffer(len16);
578 utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
579 result.releaseBuffer(len16);
580
581 return result;
582 }
583 }
584
585
586
587
588 //---------------------------------------------------------------------
589 //
590 // patternText
591 //
592 //---------------------------------------------------------------------
593 UText *RegexPattern::patternText(UErrorCode &status) const {
594 if (U_FAILURE(status)) {return NULL;}
595 status = U_ZERO_ERROR;
596
597 if (fPattern != NULL) {
598 return fPattern;
599 } else {
600 RegexStaticSets::initGlobals(&status);
601 return RegexStaticSets::gStaticSets->fEmptyText;
602 }
603 }
604
605
606 //--------------------------------------------------------------------------------
607 //
608 // groupNumberFromName()
609 //
610 //--------------------------------------------------------------------------------
611 int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const {
612 if (U_FAILURE(status)) {
613 return 0;
614 }
615
616 // No need to explicitly check for syntactically valid names.
617 // Invalid ones will never be in the map, and the lookup will fail.
618
619 int32_t number = uhash_geti(fNamedCaptureMap, &groupName);
620 if (number == 0) {
621 status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
622 }
623 return number;
624 }
625
626 int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const {
627 if (U_FAILURE(status)) {
628 return 0;
629 }
630 UnicodeString name(groupName, nameLength, US_INV);
631 return groupNumberFromName(name, status);
632 }
633
634
635 //---------------------------------------------------------------------
636 //
637 // split
638 //
639 //---------------------------------------------------------------------
640 int32_t RegexPattern::split(const UnicodeString &input,
641 UnicodeString dest[],
642 int32_t destCapacity,
643 UErrorCode &status) const
644 {
645 if (U_FAILURE(status)) {
646 return 0;
647 };
648
649 RegexMatcher m(this);
650 int32_t r = 0;
651 // Check m's status to make sure all is ok.
652 if (U_SUCCESS(m.fDeferredStatus)) {
653 r = m.split(input, dest, destCapacity, status);
654 }
655 return r;
656 }
657
658 //
659 // split, UText mode
660 //
661 int32_t RegexPattern::split(UText *input,
662 UText *dest[],
663 int32_t destCapacity,
664 UErrorCode &status) const
665 {
666 if (U_FAILURE(status)) {
667 return 0;
668 };
669
670 RegexMatcher m(this);
671 int32_t r = 0;
672 // Check m's status to make sure all is ok.
673 if (U_SUCCESS(m.fDeferredStatus)) {
674 r = m.split(input, dest, destCapacity, status);
675 }
676 return r;
677 }
678
679
680 //---------------------------------------------------------------------
681 //
682 // dump Output the compiled form of the pattern.
683 // Debugging function only.
684 //
685 //---------------------------------------------------------------------
686 void RegexPattern::dumpOp(int32_t index) const {
687 (void)index; // Suppress warnings in non-debug build.
688 #if defined(REGEX_DEBUG)
689 static const char * const opNames[] = {URX_OPCODE_NAMES};
690 int32_t op = fCompiledPat->elementAti(index);
691 int32_t val = URX_VAL(op);
692 int32_t type = URX_TYPE(op);
693 int32_t pinnedType = type;
694 if ((uint32_t)pinnedType >= UPRV_LENGTHOF(opNames)) {
695 pinnedType = 0;
696 }
697
698 printf("%4d %08x %-15s ", index, op, opNames[pinnedType]);
699 switch (type) {
700 case URX_NOP:
701 case URX_DOTANY:
702 case URX_DOTANY_ALL:
703 case URX_FAIL:
704 case URX_CARET:
705 case URX_DOLLAR:
706 case URX_BACKSLASH_G:
707 case URX_BACKSLASH_X:
708 case URX_END:
709 case URX_DOLLAR_M:
710 case URX_CARET_M:
711 // Types with no operand field of interest.
712 break;
713
714 case URX_RESERVED_OP:
715 case URX_START_CAPTURE:
716 case URX_END_CAPTURE:
717 case URX_STATE_SAVE:
718 case URX_JMP:
719 case URX_JMP_SAV:
720 case URX_JMP_SAV_X:
721 case URX_BACKSLASH_B:
722 case URX_BACKSLASH_BU:
723 case URX_BACKSLASH_D:
724 case URX_BACKSLASH_Z:
725 case URX_STRING_LEN:
726 case URX_CTR_INIT:
727 case URX_CTR_INIT_NG:
728 case URX_CTR_LOOP:
729 case URX_CTR_LOOP_NG:
730 case URX_RELOC_OPRND:
731 case URX_STO_SP:
732 case URX_LD_SP:
733 case URX_BACKREF:
734 case URX_STO_INP_LOC:
735 case URX_JMPX:
736 case URX_LA_START:
737 case URX_LA_END:
738 case URX_BACKREF_I:
739 case URX_LB_START:
740 case URX_LB_CONT:
741 case URX_LB_END:
742 case URX_LBN_CONT:
743 case URX_LBN_END:
744 case URX_LOOP_C:
745 case URX_LOOP_DOT_I:
746 case URX_BACKSLASH_H:
747 case URX_BACKSLASH_R:
748 case URX_BACKSLASH_V:
749 // types with an integer operand field.
750 printf("%d", val);
751 break;
752
753 case URX_ONECHAR:
754 case URX_ONECHAR_I:
755 if (val < 0x20) {
756 printf("%#x", val);
757 } else {
758 printf("'%s'", CStr(UnicodeString(val))());
759 }
760 break;
761
762 case URX_STRING:
763 case URX_STRING_I:
764 {
765 int32_t lengthOp = fCompiledPat->elementAti(index+1);
766 U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
767 int32_t length = URX_VAL(lengthOp);
768 UnicodeString str(fLiteralText, val, length);
769 printf("%s", CStr(str)());
770 }
771 break;
772
773 case URX_SETREF:
774 case URX_LOOP_SR_I:
775 {
776 UnicodeString s;
777 UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
778 set->toPattern(s, TRUE);
779 printf("%s", CStr(s)());
780 }
781 break;
782
783 case URX_STATIC_SETREF:
784 case URX_STAT_SETREF_N:
785 {
786 UnicodeString s;
787 if (val & URX_NEG_SET) {
788 printf("NOT ");
789 val &= ~URX_NEG_SET;
790 }
791 UnicodeSet *set = fStaticSets[val];
792 set->toPattern(s, TRUE);
793 printf("%s", CStr(s)());
794 }
795 break;
796
797
798 default:
799 printf("??????");
800 break;
801 }
802 printf("\n");
803 #endif
804 }
805
806
807 void RegexPattern::dumpPattern() const {
808 #if defined(REGEX_DEBUG)
809 int index;
810
811 UnicodeString patStr;
812 for (UChar32 c = utext_next32From(fPattern, 0); c != U_SENTINEL; c = utext_next32(fPattern)) {
813 patStr.append(c);
814 }
815 printf("Original Pattern: \"%s\"\n", CStr(patStr)());
816 printf(" Min Match Length: %d\n", fMinMatchLen);
817 printf(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType));
818 if (fStartType == START_STRING) {
819 UnicodeString initialString(fLiteralText,fInitialStringIdx, fInitialStringLen);
820 printf(" Initial match string: \"%s\"\n", CStr(initialString)());
821 } else if (fStartType == START_SET) {
822 UnicodeString s;
823 fInitialChars->toPattern(s, TRUE);
824 printf(" Match First Chars: %s\n", CStr(s)());
825
826 } else if (fStartType == START_CHAR) {
827 printf(" First char of Match: ");
828 if (fInitialChar > 0x20) {
829 printf("'%s'\n", CStr(UnicodeString(fInitialChar))());
830 } else {
831 printf("%#x\n", fInitialChar);
832 }
833 }
834
835 printf("Named Capture Groups:\n");
836 if (uhash_count(fNamedCaptureMap) == 0) {
837 printf(" None\n");
838 } else {
839 int32_t pos = UHASH_FIRST;
840 const UHashElement *el = NULL;
841 while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
842 const UnicodeString *name = (const UnicodeString *)el->key.pointer;
843 int32_t number = el->value.integer;
844 printf(" %d\t%s\n", number, CStr(*name)());
845 }
846 }
847
848 printf("\nIndex Binary Type Operand\n" \
849 "-------------------------------------------\n");
850 for (index = 0; index<fCompiledPat->size(); index++) {
851 dumpOp(index);
852 }
853 printf("\n\n");
854 #endif
855 }
856
857
858
859 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
860
861 U_NAMESPACE_END
862 #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS