]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/repattrn.cpp
ICU-461.18.tar.gz
[apple/icu.git] / icuSources / i18n / repattrn.cpp
CommitLineData
b75a7d8f 1//
46f4442e 2// file: repattrn.cpp
b75a7d8f
A
3//
4/*
5***************************************************************************
729e4ab9 6* Copyright (C) 2002-2010 International Business Machines Corporation *
b75a7d8f
A
7* and others. All rights reserved. *
8***************************************************************************
9*/
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_REGULAR_EXPRESSIONS
14
15#include "unicode/regex.h"
374ca955 16#include "unicode/uclean.h"
b75a7d8f
A
17#include "uassert.h"
18#include "uvector.h"
19#include "uvectr32.h"
729e4ab9 20#include "uvectr64.h"
b75a7d8f
A
21#include "regexcmp.h"
22#include "regeximp.h"
23#include "regexst.h"
24
25U_NAMESPACE_BEGIN
26
27//--------------------------------------------------------------------------
28//
29// RegexPattern Default Constructor
30//
31//--------------------------------------------------------------------------
32RegexPattern::RegexPattern() {
374ca955
A
33 UErrorCode status = U_ZERO_ERROR;
34 u_init(&status);
729e4ab9 35
b75a7d8f
A
36 // Init all of this instances data.
37 init();
73c04bcf 38}
b75a7d8f
A
39
40
41//--------------------------------------------------------------------------
42//
43// Copy Constructor Note: This is a rather inefficient implementation,
44// but it probably doesn't matter.
45//
46//--------------------------------------------------------------------------
47RegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) {
46f4442e 48 init();
b75a7d8f
A
49 *this = other;
50}
51
52
53
54//--------------------------------------------------------------------------
55//
729e4ab9 56// Assignment Operator
b75a7d8f
A
57//
58//--------------------------------------------------------------------------
59RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
60 if (this == &other) {
61 // Source and destination are the same. Don't do anything.
62 return *this;
63 }
64
65 // Clean out any previous contents of object being assigned to.
66 zap();
67
68 // Give target object a default initialization
69 init();
70
71 // Copy simple fields
729e4ab9
A
72 if ( other.fPatternString == NULL ) {
73 fPatternString = NULL;
74 fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
75 } else {
76 fPatternString = new UnicodeString(*(other.fPatternString));
77 UErrorCode status = U_ZERO_ERROR;
78 fPattern = utext_openConstUnicodeString(NULL, fPatternString, &status);
79 if (U_FAILURE(status)) {
80 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
81 return *this;
82 }
83 }
b75a7d8f
A
84 fFlags = other.fFlags;
85 fLiteralText = other.fLiteralText;
86 fDeferredStatus = other.fDeferredStatus;
87 fMinMatchLen = other.fMinMatchLen;
374ca955
A
88 fFrameSize = other.fFrameSize;
89 fDataSize = other.fDataSize;
b75a7d8f 90 fMaxCaptureDigits = other.fMaxCaptureDigits;
46f4442e 91 fStaticSets = other.fStaticSets;
374ca955 92 fStaticSets8 = other.fStaticSets8;
46f4442e 93
b75a7d8f
A
94 fStartType = other.fStartType;
95 fInitialStringIdx = other.fInitialStringIdx;
96 fInitialStringLen = other.fInitialStringLen;
97 *fInitialChars = *other.fInitialChars;
b75a7d8f 98 fInitialChar = other.fInitialChar;
374ca955 99 *fInitialChars8 = *other.fInitialChars8;
729e4ab9 100 fNeedsAltInput = other.fNeedsAltInput;
b75a7d8f
A
101
102 // Copy the pattern. It's just values, nothing deep to copy.
103 fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
104 fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
105
46f4442e 106 // Copy the Unicode Sets.
b75a7d8f 107 // Could be made more efficient if the sets were reference counted and shared,
46f4442e 108 // but I doubt that pattern copying will be particularly common.
b75a7d8f
A
109 // Note: init() already added an empty element zero to fSets
110 int32_t i;
111 int32_t numSets = other.fSets->size();
112 fSets8 = new Regex8BitSet[numSets];
46f4442e
A
113 if (fSets8 == NULL) {
114 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
115 return *this;
116 }
b75a7d8f
A
117 for (i=1; i<numSets; i++) {
118 if (U_FAILURE(fDeferredStatus)) {
119 return *this;
120 }
121 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
122 UnicodeSet *newSet = new UnicodeSet(*sourceSet);
123 if (newSet == NULL) {
124 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
125 break;
126 }
127 fSets->addElement(newSet, fDeferredStatus);
128 fSets8[i] = other.fSets8[i];
129 }
130
131 return *this;
132}
133
134
135//--------------------------------------------------------------------------
136//
137// init Shared initialization for use by constructors.
138// Bring an uninitialized RegexPattern up to a default state.
139//
140//--------------------------------------------------------------------------
141void RegexPattern::init() {
142 fFlags = 0;
374ca955
A
143 fCompiledPat = 0;
144 fLiteralText.remove();
145 fSets = NULL;
146 fSets8 = NULL;
b75a7d8f
A
147 fDeferredStatus = U_ZERO_ERROR;
148 fMinMatchLen = 0;
b75a7d8f
A
149 fFrameSize = 0;
150 fDataSize = 0;
374ca955 151 fGroupMap = NULL;
46f4442e 152 fMaxCaptureDigits = 1;
374ca955
A
153 fStaticSets = NULL;
154 fStaticSets8 = NULL;
b75a7d8f
A
155 fStartType = START_NO_INFO;
156 fInitialStringIdx = 0;
157 fInitialStringLen = 0;
158 fInitialChars = NULL;
b75a7d8f 159 fInitialChar = 0;
374ca955 160 fInitialChars8 = NULL;
729e4ab9 161 fNeedsAltInput = FALSE;
46f4442e 162
729e4ab9
A
163 fPattern = NULL; // will be set later
164 fPatternString = NULL; // may be set later
165 fCompiledPat = new UVector64(fDeferredStatus);
b75a7d8f
A
166 fGroupMap = new UVector32(fDeferredStatus);
167 fSets = new UVector(fDeferredStatus);
168 fInitialChars = new UnicodeSet;
169 fInitialChars8 = new Regex8BitSet;
170 if (U_FAILURE(fDeferredStatus)) {
171 return;
172 }
173 if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL ||
174 fInitialChars == NULL || fInitialChars8 == NULL) {
175 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
176 return;
177 }
178
179 // Slot zero of the vector of sets is reserved. Fill it here.
180 fSets->addElement((int32_t)0, fDeferredStatus);
181}
182
183
184//--------------------------------------------------------------------------
185//
46f4442e 186// zap Delete everything owned by this RegexPattern.
b75a7d8f
A
187//
188//--------------------------------------------------------------------------
189void RegexPattern::zap() {
190 delete fCompiledPat;
191 fCompiledPat = NULL;
192 int i;
193 for (i=1; i<fSets->size(); i++) {
194 UnicodeSet *s;
195 s = (UnicodeSet *)fSets->elementAt(i);
196 if (s != NULL) {
197 delete s;
198 }
199 }
200 delete fSets;
201 fSets = NULL;
374ca955
A
202 delete[] fSets8;
203 fSets8 = NULL;
b75a7d8f
A
204 delete fGroupMap;
205 fGroupMap = NULL;
206 delete fInitialChars;
207 fInitialChars = NULL;
208 delete fInitialChars8;
209 fInitialChars8 = NULL;
729e4ab9
A
210 if (fPattern != NULL) {
211 utext_close(fPattern);
212 fPattern = NULL;
213 }
214 if (fPatternString != NULL) {
215 delete fPatternString;
216 fPatternString = NULL;
217 }
b75a7d8f
A
218}
219
220
221//--------------------------------------------------------------------------
222//
223// Destructor
224//
225//--------------------------------------------------------------------------
226RegexPattern::~RegexPattern() {
227 zap();
73c04bcf 228}
b75a7d8f
A
229
230
231//--------------------------------------------------------------------------
232//
233// Clone
234//
235//--------------------------------------------------------------------------
46f4442e 236RegexPattern *RegexPattern::clone() const {
b75a7d8f
A
237 RegexPattern *copy = new RegexPattern(*this);
238 return copy;
73c04bcf 239}
b75a7d8f
A
240
241
242//--------------------------------------------------------------------------
243//
244// operator == (comparison) Consider to patterns to be == if the
245// pattern strings and the flags are the same.
729e4ab9
A
246// Note that pattern strings with the same
247// characters can still be considered different.
b75a7d8f
A
248//
249//--------------------------------------------------------------------------
250UBool RegexPattern::operator ==(const RegexPattern &other) const {
729e4ab9
A
251 if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
252 if (this->fPatternString != NULL && other.fPatternString != NULL) {
253 return *(this->fPatternString) == *(other.fPatternString);
254 } else if (this->fPattern == NULL) {
255 if (other.fPattern == NULL) {
256 return TRUE;
257 }
258 } else if (other.fPattern != NULL) {
259 UTEXT_SETNATIVEINDEX(this->fPattern, 0);
260 UTEXT_SETNATIVEINDEX(other.fPattern, 0);
261 return utext_equals(this->fPattern, other.fPattern);
262 }
263 }
264 return FALSE;
b75a7d8f
A
265}
266
267//---------------------------------------------------------------------
268//
46f4442e 269// compile
b75a7d8f
A
270//
271//---------------------------------------------------------------------
374ca955
A
272RegexPattern * U_EXPORT2
273RegexPattern::compile(const UnicodeString &regex,
274 uint32_t flags,
275 UParseError &pe,
276 UErrorCode &status)
277{
729e4ab9
A
278 if (U_FAILURE(status)) {
279 return NULL;
280 }
281
282 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
283 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
284 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
285
286 if ((flags & ~allFlags) != 0) {
287 status = U_REGEX_INVALID_FLAG;
288 return NULL;
289 }
290
291 if ((flags & (UREGEX_CANON_EQ | UREGEX_LITERAL)) != 0) {
292 status = U_REGEX_UNIMPLEMENTED;
293 return NULL;
294 }
295
296 RegexPattern *This = new RegexPattern;
297 if (This == NULL) {
298 status = U_MEMORY_ALLOCATION_ERROR;
299 return NULL;
300 }
301 if (U_FAILURE(This->fDeferredStatus)) {
302 status = This->fDeferredStatus;
303 delete This;
304 return NULL;
305 }
306 This->fFlags = flags;
307
308 RegexCompile compiler(This, status);
309 compiler.compile(regex, pe, status);
310
311 if (U_FAILURE(status)) {
312 delete This;
313 This = NULL;
314 }
315
316 return This;
317}
318
b75a7d8f 319
729e4ab9
A
320//
321// compile, UText mode
322//
323RegexPattern * U_EXPORT2
324RegexPattern::compile(UText *regex,
325 uint32_t flags,
326 UParseError &pe,
327 UErrorCode &status)
328{
b75a7d8f
A
329 if (U_FAILURE(status)) {
330 return NULL;
331 }
332
333 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
46f4442e 334 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
729e4ab9 335 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
b75a7d8f
A
336
337 if ((flags & ~allFlags) != 0) {
338 status = U_REGEX_INVALID_FLAG;
339 return NULL;
340 }
341
729e4ab9 342 if ((flags & (UREGEX_CANON_EQ | UREGEX_LITERAL)) != 0) {
b75a7d8f
A
343 status = U_REGEX_UNIMPLEMENTED;
344 return NULL;
345 }
346
347 RegexPattern *This = new RegexPattern;
348 if (This == NULL) {
349 status = U_MEMORY_ALLOCATION_ERROR;
350 return NULL;
351 }
352 if (U_FAILURE(This->fDeferredStatus)) {
353 status = This->fDeferredStatus;
46f4442e 354 delete This;
b75a7d8f
A
355 return NULL;
356 }
357 This->fFlags = flags;
358
359 RegexCompile compiler(This, status);
360 compiler.compile(regex, pe, status);
46f4442e
A
361
362 if (U_FAILURE(status)) {
363 delete This;
364 This = NULL;
365 }
b75a7d8f
A
366
367 return This;
73c04bcf 368}
46f4442e 369
b75a7d8f
A
370//
371// compile with default flags.
372//
374ca955
A
373RegexPattern * U_EXPORT2
374RegexPattern::compile(const UnicodeString &regex,
375 UParseError &pe,
46f4442e 376 UErrorCode &err)
b75a7d8f 377{
46f4442e 378 return compile(regex, 0, pe, err);
b75a7d8f
A
379}
380
381
729e4ab9
A
382//
383// compile with default flags, UText mode
384//
385RegexPattern * U_EXPORT2
386RegexPattern::compile(UText *regex,
387 UParseError &pe,
388 UErrorCode &err)
389{
390 return compile(regex, 0, pe, err);
391}
392
b75a7d8f
A
393
394//
395// compile with no UParseErr parameter.
396//
374ca955 397RegexPattern * U_EXPORT2
729e4ab9
A
398RegexPattern::compile(const UnicodeString &regex,
399 uint32_t flags,
400 UErrorCode &err)
b75a7d8f
A
401{
402 UParseError pe;
46f4442e 403 return compile(regex, flags, pe, err);
b75a7d8f
A
404}
405
406
729e4ab9
A
407//
408// compile with no UParseErr parameter, UText mode
409//
410RegexPattern * U_EXPORT2
411RegexPattern::compile(UText *regex,
412 uint32_t flags,
413 UErrorCode &err)
414{
415 UParseError pe;
416 return compile(regex, flags, pe, err);
417}
418
b75a7d8f
A
419
420//---------------------------------------------------------------------
421//
422// flags
423//
424//---------------------------------------------------------------------
425uint32_t RegexPattern::flags() const {
426 return fFlags;
427}
428
429
430//---------------------------------------------------------------------
431//
432// matcher(UnicodeString, err)
433//
434//---------------------------------------------------------------------
435RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
436 UErrorCode &status) const {
437 RegexMatcher *retMatcher = matcher(status);
438 if (retMatcher != NULL) {
729e4ab9
A
439 retMatcher->fDeferredStatus = status;
440 retMatcher->reset(input);
441 }
442 return retMatcher;
443}
444
445//
446// matcher, UText mode
447//
448RegexMatcher *RegexPattern::matcher(UText *input,
449 PatternIsUTextFlag /*flag*/,
450 UErrorCode &status) const {
451 RegexMatcher *retMatcher = matcher(status);
452 if (retMatcher != NULL) {
453 retMatcher->fDeferredStatus = status;
b75a7d8f
A
454 retMatcher->reset(input);
455 }
456 return retMatcher;
73c04bcf 457}
b75a7d8f 458
73c04bcf 459#if 0
374ca955 460RegexMatcher *RegexPattern::matcher(const UChar * /*input*/,
46f4442e 461 UErrorCode &status) const
374ca955
A
462{
463 /* This should never get called. The API with UnicodeString should be called instead. */
464 if (U_SUCCESS(status)) {
465 status = U_UNSUPPORTED_ERROR;
466 }
467 return NULL;
468}
73c04bcf 469#endif
b75a7d8f
A
470
471//---------------------------------------------------------------------
472//
473// matcher(status)
474//
475//---------------------------------------------------------------------
476RegexMatcher *RegexPattern::matcher(UErrorCode &status) const {
477 RegexMatcher *retMatcher = NULL;
478
479 if (U_FAILURE(status)) {
480 return NULL;
481 }
482 if (U_FAILURE(fDeferredStatus)) {
483 status = fDeferredStatus;
484 return NULL;
485 }
486
46f4442e 487 retMatcher = new RegexMatcher(this);
b75a7d8f
A
488 if (retMatcher == NULL) {
489 status = U_MEMORY_ALLOCATION_ERROR;
490 return NULL;
491 }
492 return retMatcher;
73c04bcf 493}
b75a7d8f
A
494
495
496
497//---------------------------------------------------------------------
498//
499// matches Convenience function to test for a match, starting
500// with a pattern string and a data string.
501//
502//---------------------------------------------------------------------
374ca955 503UBool U_EXPORT2 RegexPattern::matches(const UnicodeString &regex,
b75a7d8f
A
504 const UnicodeString &input,
505 UParseError &pe,
506 UErrorCode &status) {
507
508 if (U_FAILURE(status)) {return FALSE;}
509
510 UBool retVal;
511 RegexPattern *pat = NULL;
512 RegexMatcher *matcher = NULL;
513
514 pat = RegexPattern::compile(regex, 0, pe, status);
515 matcher = pat->matcher(input, status);
516 retVal = matcher->matches(status);
517
518 delete matcher;
519 delete pat;
520 return retVal;
521}
522
523
729e4ab9
A
524//
525// matches, UText mode
526//
527UBool U_EXPORT2 RegexPattern::matches(UText *regex,
528 UText *input,
529 UParseError &pe,
530 UErrorCode &status) {
531
532 if (U_FAILURE(status)) {return FALSE;}
533
534 UBool retVal;
535 RegexPattern *pat = NULL;
536 RegexMatcher *matcher = NULL;
537
538 pat = RegexPattern::compile(regex, 0, pe, status);
539 matcher = pat->matcher(input, PATTERN_IS_UTEXT, status);
540 retVal = matcher->matches(status);
541
542 delete matcher;
543 delete pat;
544 return retVal;
545}
546
547
548
b75a7d8f
A
549
550
551//---------------------------------------------------------------------
552//
553// pattern
554//
555//---------------------------------------------------------------------
556UnicodeString RegexPattern::pattern() const {
729e4ab9
A
557 if (fPatternString != NULL) {
558 return *fPatternString;
559 } else if (fPattern == NULL) {
560 return UnicodeString();
561 } else {
562 UErrorCode status = U_ZERO_ERROR;
563 int64_t nativeLen = utext_nativeLength(fPattern);
564 int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
565 UnicodeString result;
566
567 status = U_ZERO_ERROR;
568 UChar *resultChars = result.getBuffer(len16);
569 utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
570 result.releaseBuffer(len16);
571
572 return result;
573 }
b75a7d8f
A
574}
575
576
577
578
729e4ab9
A
579//---------------------------------------------------------------------
580//
581// patternText
582//
583//---------------------------------------------------------------------
584UText *RegexPattern::patternText(UErrorCode &status) const {
585 if (U_FAILURE(status)) {return NULL;}
586 status = U_ZERO_ERROR;
587
588 if (fPattern != NULL) {
589 return fPattern;
590 } else {
591 RegexStaticSets::initGlobals(&status);
592 return RegexStaticSets::gStaticSets->fEmptyText;
593 }
594}
595
596
597
b75a7d8f
A
598//---------------------------------------------------------------------
599//
600// split
601//
602//---------------------------------------------------------------------
603int32_t RegexPattern::split(const UnicodeString &input,
604 UnicodeString dest[],
605 int32_t destCapacity,
729e4ab9
A
606 UErrorCode &status) const
607{
608 if (U_FAILURE(status)) {
609 return 0;
610 };
611
612 RegexMatcher m(this);
613 int32_t r = 0;
614 // Check m's status to make sure all is ok.
615 if (U_SUCCESS(m.fDeferredStatus)) {
616 r = m.split(input, dest, destCapacity, status);
617 }
618 return r;
619}
620
621//
622// split, UText mode
623//
624int32_t RegexPattern::split(UText *input,
625 UText *dest[],
626 int32_t destCapacity,
627 UErrorCode &status) const
b75a7d8f
A
628{
629 if (U_FAILURE(status)) {
630 return 0;
631 };
632
633 RegexMatcher m(this);
46f4442e
A
634 int32_t r = 0;
635 // Check m's status to make sure all is ok.
636 if (U_SUCCESS(m.fDeferredStatus)) {
637 r = m.split(input, dest, destCapacity, status);
638 }
b75a7d8f
A
639 return r;
640}
641
642
643
644//---------------------------------------------------------------------
645//
646// dump Output the compiled form of the pattern.
647// Debugging function only.
648//
649//---------------------------------------------------------------------
b75a7d8f 650#if defined(REGEX_DEBUG)
374ca955 651void RegexPattern::dumpOp(int32_t index) const {
b75a7d8f
A
652 static const char * const opNames[] = {URX_OPCODE_NAMES};
653 int32_t op = fCompiledPat->elementAti(index);
654 int32_t val = URX_VAL(op);
655 int32_t type = URX_TYPE(op);
656 int32_t pinnedType = type;
46f4442e 657 if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) {
b75a7d8f
A
658 pinnedType = 0;
659 }
46f4442e 660
374ca955 661 REGEX_DUMP_DEBUG_PRINTF(("%4d %08x %-15s ", index, op, opNames[pinnedType]));
b75a7d8f
A
662 switch (type) {
663 case URX_NOP:
664 case URX_DOTANY:
665 case URX_DOTANY_ALL:
b75a7d8f
A
666 case URX_FAIL:
667 case URX_CARET:
668 case URX_DOLLAR:
669 case URX_BACKSLASH_G:
670 case URX_BACKSLASH_X:
671 case URX_END:
672 case URX_DOLLAR_M:
673 case URX_CARET_M:
674 // Types with no operand field of interest.
675 break;
46f4442e 676
b75a7d8f
A
677 case URX_RESERVED_OP:
678 case URX_START_CAPTURE:
679 case URX_END_CAPTURE:
680 case URX_STATE_SAVE:
681 case URX_JMP:
682 case URX_JMP_SAV:
683 case URX_JMP_SAV_X:
684 case URX_BACKSLASH_B:
374ca955 685 case URX_BACKSLASH_BU:
b75a7d8f
A
686 case URX_BACKSLASH_D:
687 case URX_BACKSLASH_Z:
688 case URX_STRING_LEN:
689 case URX_CTR_INIT:
690 case URX_CTR_INIT_NG:
691 case URX_CTR_LOOP:
692 case URX_CTR_LOOP_NG:
693 case URX_RELOC_OPRND:
694 case URX_STO_SP:
695 case URX_LD_SP:
696 case URX_BACKREF:
697 case URX_STO_INP_LOC:
698 case URX_JMPX:
699 case URX_LA_START:
700 case URX_LA_END:
701 case URX_BACKREF_I:
702 case URX_LB_START:
703 case URX_LB_CONT:
704 case URX_LB_END:
705 case URX_LBN_CONT:
706 case URX_LBN_END:
707 case URX_LOOP_C:
708 case URX_LOOP_DOT_I:
709 // types with an integer operand field.
374ca955 710 REGEX_DUMP_DEBUG_PRINTF(("%d", val));
b75a7d8f 711 break;
46f4442e 712
b75a7d8f
A
713 case URX_ONECHAR:
714 case URX_ONECHAR_I:
374ca955 715 REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?'));
b75a7d8f 716 break;
46f4442e 717
b75a7d8f
A
718 case URX_STRING:
719 case URX_STRING_I:
720 {
721 int32_t lengthOp = fCompiledPat->elementAti(index+1);
722 U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
723 int32_t length = URX_VAL(lengthOp);
724 int32_t i;
725 for (i=val; i<val+length; i++) {
726 UChar c = fLiteralText[i];
727 if (c < 32 || c >= 256) {c = '.';}
374ca955 728 REGEX_DUMP_DEBUG_PRINTF(("%c", c));
b75a7d8f
A
729 }
730 }
731 break;
732
733 case URX_SETREF:
734 case URX_LOOP_SR_I:
735 {
736 UnicodeString s;
737 UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
738 set->toPattern(s, TRUE);
739 for (int32_t i=0; i<s.length(); i++) {
374ca955 740 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
b75a7d8f
A
741 }
742 }
743 break;
744
745 case URX_STATIC_SETREF:
746 case URX_STAT_SETREF_N:
747 {
748 UnicodeString s;
749 if (val & URX_NEG_SET) {
374ca955 750 REGEX_DUMP_DEBUG_PRINTF(("NOT "));
b75a7d8f
A
751 val &= ~URX_NEG_SET;
752 }
753 UnicodeSet *set = fStaticSets[val];
754 set->toPattern(s, TRUE);
755 for (int32_t i=0; i<s.length(); i++) {
374ca955 756 REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i)));
b75a7d8f
A
757 }
758 }
759 break;
760
46f4442e 761
b75a7d8f 762 default:
374ca955 763 REGEX_DUMP_DEBUG_PRINTF(("??????"));
b75a7d8f
A
764 break;
765 }
374ca955 766 REGEX_DUMP_DEBUG_PRINTF(("\n"));
b75a7d8f 767}
374ca955 768#endif
b75a7d8f
A
769
770
b75a7d8f 771#if defined(REGEX_DEBUG)
46f4442e 772U_CAPI void U_EXPORT2
374ca955 773RegexPatternDump(const RegexPattern *This) {
b75a7d8f
A
774 int index;
775 int i;
776
374ca955 777 REGEX_DUMP_DEBUG_PRINTF(("Original Pattern: "));
729e4ab9
A
778 UChar32 c = utext_next32From(This->fPattern, 0);
779 while (c != U_SENTINEL) {
780 if (c<32 || c>256) {
781 c = '.';
782 }
783 REGEX_DUMP_DEBUG_PRINTF(("%c", c));
784
785 c = UTEXT_NEXT32(This->fPattern);
b75a7d8f 786 }
374ca955
A
787 REGEX_DUMP_DEBUG_PRINTF(("\n"));
788 REGEX_DUMP_DEBUG_PRINTF((" Min Match Length: %d\n", This->fMinMatchLen));
46f4442e 789 REGEX_DUMP_DEBUG_PRINTF((" Match Start Type: %s\n", START_OF_MATCH_STR(This->fStartType)));
374ca955 790 if (This->fStartType == START_STRING) {
729e4ab9 791 REGEX_DUMP_DEBUG_PRINTF((" Initial match string: \""));
374ca955
A
792 for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) {
793 REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i])); // TODO: non-printables, surrogates.
b75a7d8f 794 }
729e4ab9 795 REGEX_DUMP_DEBUG_PRINTF(("\"\n"));
b75a7d8f 796
374ca955
A
797 } else if (This->fStartType == START_SET) {
798 int32_t numSetChars = This->fInitialChars->size();
b75a7d8f
A
799 if (numSetChars > 20) {
800 numSetChars = 20;
801 }
374ca955 802 REGEX_DUMP_DEBUG_PRINTF((" Match First Chars : "));
b75a7d8f 803 for (i=0; i<numSetChars; i++) {
374ca955 804 UChar32 c = This->fInitialChars->charAt(i);
46f4442e 805 if (0x20<c && c <0x7e) {
374ca955 806 REGEX_DUMP_DEBUG_PRINTF(("%c ", c));
b75a7d8f 807 } else {
374ca955 808 REGEX_DUMP_DEBUG_PRINTF(("%#x ", c));
b75a7d8f
A
809 }
810 }
374ca955
A
811 if (numSetChars < This->fInitialChars->size()) {
812 REGEX_DUMP_DEBUG_PRINTF((" ..."));
b75a7d8f 813 }
374ca955 814 REGEX_DUMP_DEBUG_PRINTF(("\n"));
b75a7d8f 815
374ca955
A
816 } else if (This->fStartType == START_CHAR) {
817 REGEX_DUMP_DEBUG_PRINTF((" First char of Match : "));
818 if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) {
819 REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar));
b75a7d8f 820 } else {
374ca955 821 REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar));
b75a7d8f
A
822 }
823 }
824
374ca955
A
825 REGEX_DUMP_DEBUG_PRINTF(("\nIndex Binary Type Operand\n" \
826 "-------------------------------------------\n"));
827 for (index = 0; index<This->fCompiledPat->size(); index++) {
828 This->dumpOp(index);
b75a7d8f 829 }
374ca955 830 REGEX_DUMP_DEBUG_PRINTF(("\n\n"));
46f4442e 831}
374ca955 832#endif
b75a7d8f
A
833
834
835
374ca955 836UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
b75a7d8f
A
837
838U_NAMESPACE_END
839#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS