]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/repattrn.cpp
ICU-57149.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / repattrn.cpp
CommitLineData
b75a7d8f 1//
46f4442e 2// file: repattrn.cpp
b75a7d8f
A
3//
4/*
5***************************************************************************
2ca993e8
A
6* Copyright (C) 2002-2016 International Business Machines Corporation
7* and others. All rights reserved.
b75a7d8f
A
8***************************************************************************
9*/
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_REGULAR_EXPRESSIONS
14
15#include "unicode/regex.h"
374ca955 16#include "unicode/uclean.h"
2ca993e8
A
17#include "cmemory.h"
18#include "cstr.h"
b75a7d8f 19#include "uassert.h"
b331163b 20#include "uhash.h"
b75a7d8f
A
21#include "uvector.h"
22#include "uvectr32.h"
729e4ab9 23#include "uvectr64.h"
b75a7d8f
A
24#include "regexcmp.h"
25#include "regeximp.h"
26#include "regexst.h"
27
28U_NAMESPACE_BEGIN
29
30//--------------------------------------------------------------------------
31//
32// RegexPattern Default Constructor
33//
34//--------------------------------------------------------------------------
35RegexPattern::RegexPattern() {
36 // Init all of this instances data.
37 init();
73c04bcf 38}
b75a7d8f
A
39
40
41//--------------------------------------------------------------------------
42//
43// Copy Constructor Note: This is a rather inefficient implementation,
44// but it probably doesn't matter.
45//
46//--------------------------------------------------------------------------
47RegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) {
46f4442e 48 init();
b75a7d8f
A
49 *this = other;
50}
51
52
53
54//--------------------------------------------------------------------------
55//
729e4ab9 56// Assignment Operator
b75a7d8f
A
57//
58//--------------------------------------------------------------------------
59RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
60 if (this == &other) {
61 // Source and destination are the same. Don't do anything.
62 return *this;
63 }
64
65 // Clean out any previous contents of object being assigned to.
66 zap();
67
68 // Give target object a default initialization
69 init();
70
71 // Copy simple fields
b331163b
A
72 fDeferredStatus = other.fDeferredStatus;
73
74 if (U_FAILURE(fDeferredStatus)) {
75 return *this;
76 }
77
78 if (other.fPatternString == NULL) {
729e4ab9 79 fPatternString = NULL;
b331163b 80 fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
729e4ab9
A
81 } else {
82 fPatternString = new UnicodeString(*(other.fPatternString));
b331163b 83 if (fPatternString == NULL) {
729e4ab9 84 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
b331163b
A
85 } else {
86 fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDeferredStatus);
729e4ab9
A
87 }
88 }
b331163b
A
89 if (U_FAILURE(fDeferredStatus)) {
90 return *this;
91 }
92
b75a7d8f
A
93 fFlags = other.fFlags;
94 fLiteralText = other.fLiteralText;
b75a7d8f 95 fMinMatchLen = other.fMinMatchLen;
374ca955
A
96 fFrameSize = other.fFrameSize;
97 fDataSize = other.fDataSize;
46f4442e 98 fStaticSets = other.fStaticSets;
374ca955 99 fStaticSets8 = other.fStaticSets8;
46f4442e 100
b75a7d8f
A
101 fStartType = other.fStartType;
102 fInitialStringIdx = other.fInitialStringIdx;
103 fInitialStringLen = other.fInitialStringLen;
104 *fInitialChars = *other.fInitialChars;
b75a7d8f 105 fInitialChar = other.fInitialChar;
374ca955 106 *fInitialChars8 = *other.fInitialChars8;
729e4ab9 107 fNeedsAltInput = other.fNeedsAltInput;
b75a7d8f
A
108
109 // Copy the pattern. It's just values, nothing deep to copy.
110 fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
111 fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
112
46f4442e 113 // Copy the Unicode Sets.
b75a7d8f 114 // Could be made more efficient if the sets were reference counted and shared,
46f4442e 115 // but I doubt that pattern copying will be particularly common.
b75a7d8f
A
116 // Note: init() already added an empty element zero to fSets
117 int32_t i;
118 int32_t numSets = other.fSets->size();
119 fSets8 = new Regex8BitSet[numSets];
46f4442e
A
120 if (fSets8 == NULL) {
121 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
122 return *this;
123 }
b75a7d8f
A
124 for (i=1; i<numSets; i++) {
125 if (U_FAILURE(fDeferredStatus)) {
126 return *this;
127 }
128 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
129 UnicodeSet *newSet = new UnicodeSet(*sourceSet);
130 if (newSet == NULL) {
131 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
132 break;
133 }
134 fSets->addElement(newSet, fDeferredStatus);
135 fSets8[i] = other.fSets8[i];
136 }
137
b331163b
A
138 // Copy the named capture group hash map.
139 int32_t hashPos = UHASH_FIRST;
140 while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) {
141 if (U_FAILURE(fDeferredStatus)) {
142 break;
143 }
144 const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer;
145 UnicodeString *key = new UnicodeString(*name);
146 int32_t val = hashEl->value.integer;
147 if (key == NULL) {
148 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
149 } else {
150 uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus);
151 }
152 }
b75a7d8f
A
153 return *this;
154}
155
156
157//--------------------------------------------------------------------------
158//
159// init Shared initialization for use by constructors.
160// Bring an uninitialized RegexPattern up to a default state.
161//
162//--------------------------------------------------------------------------
163void RegexPattern::init() {
164 fFlags = 0;
374ca955
A
165 fCompiledPat = 0;
166 fLiteralText.remove();
167 fSets = NULL;
168 fSets8 = NULL;
b75a7d8f
A
169 fDeferredStatus = U_ZERO_ERROR;
170 fMinMatchLen = 0;
b75a7d8f
A
171 fFrameSize = 0;
172 fDataSize = 0;
374ca955 173 fGroupMap = NULL;
374ca955
A
174 fStaticSets = NULL;
175 fStaticSets8 = NULL;
b75a7d8f
A
176 fStartType = START_NO_INFO;
177 fInitialStringIdx = 0;
178 fInitialStringLen = 0;
179 fInitialChars = NULL;
b75a7d8f 180 fInitialChar = 0;
374ca955 181 fInitialChars8 = NULL;
729e4ab9 182 fNeedsAltInput = FALSE;
b331163b 183 fNamedCaptureMap = NULL;
46f4442e 184
729e4ab9
A
185 fPattern = NULL; // will be set later
186 fPatternString = NULL; // may be set later
187 fCompiledPat = new UVector64(fDeferredStatus);
b75a7d8f
A
188 fGroupMap = new UVector32(fDeferredStatus);
189 fSets = new UVector(fDeferredStatus);
190 fInitialChars = new UnicodeSet;
191 fInitialChars8 = new Regex8BitSet;
b331163b
A
192 fNamedCaptureMap = uhash_open(uhash_hashUnicodeString, // Key hash function
193 uhash_compareUnicodeString, // Key comparator function
194 uhash_compareLong, // Value comparator function
195 &fDeferredStatus);
b75a7d8f
A
196 if (U_FAILURE(fDeferredStatus)) {
197 return;
198 }
199 if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL ||
b331163b 200 fInitialChars == NULL || fInitialChars8 == NULL || fNamedCaptureMap == NULL) {
b75a7d8f
A
201 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
202 return;
203 }
204
205 // Slot zero of the vector of sets is reserved. Fill it here.
206 fSets->addElement((int32_t)0, fDeferredStatus);
b331163b
A
207
208 // fNamedCaptureMap owns its key strings, type (UnicodeString *)
209 uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject);
b75a7d8f
A
210}
211
212
213//--------------------------------------------------------------------------
214//
46f4442e 215// zap Delete everything owned by this RegexPattern.
b75a7d8f
A
216//
217//--------------------------------------------------------------------------
218void RegexPattern::zap() {
219 delete fCompiledPat;
220 fCompiledPat = NULL;
221 int i;
222 for (i=1; i<fSets->size(); i++) {
223 UnicodeSet *s;
224 s = (UnicodeSet *)fSets->elementAt(i);
225 if (s != NULL) {
226 delete s;
227 }
228 }
229 delete fSets;
230 fSets = NULL;
374ca955
A
231 delete[] fSets8;
232 fSets8 = NULL;
b75a7d8f
A
233 delete fGroupMap;
234 fGroupMap = NULL;
235 delete fInitialChars;
236 fInitialChars = NULL;
237 delete fInitialChars8;
238 fInitialChars8 = NULL;
729e4ab9
A
239 if (fPattern != NULL) {
240 utext_close(fPattern);
241 fPattern = NULL;
242 }
243 if (fPatternString != NULL) {
244 delete fPatternString;
245 fPatternString = NULL;
246 }
b331163b
A
247 uhash_close(fNamedCaptureMap);
248 fNamedCaptureMap = NULL;
b75a7d8f
A
249}
250
251
252//--------------------------------------------------------------------------
253//
254// Destructor
255//
256//--------------------------------------------------------------------------
257RegexPattern::~RegexPattern() {
258 zap();
73c04bcf 259}
b75a7d8f
A
260
261
262//--------------------------------------------------------------------------
263//
264// Clone
265//
266//--------------------------------------------------------------------------
46f4442e 267RegexPattern *RegexPattern::clone() const {
b75a7d8f
A
268 RegexPattern *copy = new RegexPattern(*this);
269 return copy;
73c04bcf 270}
b75a7d8f
A
271
272
273//--------------------------------------------------------------------------
274//
275// operator == (comparison) Consider to patterns to be == if the
276// pattern strings and the flags are the same.
729e4ab9
A
277// Note that pattern strings with the same
278// characters can still be considered different.
b75a7d8f
A
279//
280//--------------------------------------------------------------------------
281UBool RegexPattern::operator ==(const RegexPattern &other) const {
729e4ab9
A
282 if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
283 if (this->fPatternString != NULL && other.fPatternString != NULL) {
284 return *(this->fPatternString) == *(other.fPatternString);
285 } else if (this->fPattern == NULL) {
286 if (other.fPattern == NULL) {
287 return TRUE;
288 }
289 } else if (other.fPattern != NULL) {
290 UTEXT_SETNATIVEINDEX(this->fPattern, 0);
291 UTEXT_SETNATIVEINDEX(other.fPattern, 0);
292 return utext_equals(this->fPattern, other.fPattern);
293 }
294 }
295 return FALSE;
b75a7d8f
A
296}
297
298//---------------------------------------------------------------------
299//
46f4442e 300// compile
b75a7d8f
A
301//
302//---------------------------------------------------------------------
374ca955
A
303RegexPattern * U_EXPORT2
304RegexPattern::compile(const UnicodeString &regex,
305 uint32_t flags,
306 UParseError &pe,
307 UErrorCode &status)
308{
729e4ab9
A
309 if (U_FAILURE(status)) {
310 return NULL;
311 }
57a6839d 312
729e4ab9
A
313 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
314 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
315 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
57a6839d 316
729e4ab9
A
317 if ((flags & ~allFlags) != 0) {
318 status = U_REGEX_INVALID_FLAG;
319 return NULL;
320 }
57a6839d 321
4388f060 322 if ((flags & UREGEX_CANON_EQ) != 0) {
729e4ab9
A
323 status = U_REGEX_UNIMPLEMENTED;
324 return NULL;
325 }
57a6839d 326
729e4ab9
A
327 RegexPattern *This = new RegexPattern;
328 if (This == NULL) {
329 status = U_MEMORY_ALLOCATION_ERROR;
330 return NULL;
331 }
332 if (U_FAILURE(This->fDeferredStatus)) {
333 status = This->fDeferredStatus;
334 delete This;
335 return NULL;
336 }
337 This->fFlags = flags;
57a6839d 338
729e4ab9
A
339 RegexCompile compiler(This, status);
340 compiler.compile(regex, pe, status);
57a6839d 341
729e4ab9
A
342 if (U_FAILURE(status)) {
343 delete This;
344 This = NULL;
345 }
57a6839d 346
729e4ab9
A
347 return This;
348}
349
b75a7d8f 350
729e4ab9
A
351//
352// compile, UText mode
353//
354RegexPattern * U_EXPORT2
355RegexPattern::compile(UText *regex,
356 uint32_t flags,
357 UParseError &pe,
358 UErrorCode &status)
359{
b75a7d8f
A
360 if (U_FAILURE(status)) {
361 return NULL;
362 }
363
364 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
46f4442e 365 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
729e4ab9 366 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
b75a7d8f
A
367
368 if ((flags & ~allFlags) != 0) {
369 status = U_REGEX_INVALID_FLAG;
370 return NULL;
371 }
372
4388f060 373 if ((flags & UREGEX_CANON_EQ) != 0) {
b75a7d8f
A
374 status = U_REGEX_UNIMPLEMENTED;
375 return NULL;
376 }
377
378 RegexPattern *This = new RegexPattern;
379 if (This == NULL) {
380 status = U_MEMORY_ALLOCATION_ERROR;
381 return NULL;
382 }
383 if (U_FAILURE(This->fDeferredStatus)) {
384 status = This->fDeferredStatus;
46f4442e 385 delete This;
b75a7d8f
A
386 return NULL;
387 }
388 This->fFlags = flags;
389
390 RegexCompile compiler(This, status);
391 compiler.compile(regex, pe, status);
57a6839d 392
46f4442e
A
393 if (U_FAILURE(status)) {
394 delete This;
395 This = NULL;
396 }
b75a7d8f
A
397
398 return This;
73c04bcf 399}
46f4442e 400
b75a7d8f
A
401//
402// compile with default flags.
403//
374ca955
A
404RegexPattern * U_EXPORT2
405RegexPattern::compile(const UnicodeString &regex,
406 UParseError &pe,
46f4442e 407 UErrorCode &err)
b75a7d8f 408{
46f4442e 409 return compile(regex, 0, pe, err);
b75a7d8f
A
410}
411
412
729e4ab9
A
413//
414// compile with default flags, UText mode
415//
416RegexPattern * U_EXPORT2
417RegexPattern::compile(UText *regex,
418 UParseError &pe,
419 UErrorCode &err)
420{
421 return compile(regex, 0, pe, err);
422}
423
b75a7d8f
A
424
425//
426// compile with no UParseErr parameter.
427//
374ca955 428RegexPattern * U_EXPORT2
729e4ab9
A
429RegexPattern::compile(const UnicodeString &regex,
430 uint32_t flags,
431 UErrorCode &err)
b75a7d8f
A
432{
433 UParseError pe;
46f4442e 434 return compile(regex, flags, pe, err);
b75a7d8f
A
435}
436
437
729e4ab9
A
438//
439// compile with no UParseErr parameter, UText mode
440//
441RegexPattern * U_EXPORT2
442RegexPattern::compile(UText *regex,
443 uint32_t flags,
444 UErrorCode &err)
445{
446 UParseError pe;
447 return compile(regex, flags, pe, err);
448}
449
b75a7d8f
A
450
451//---------------------------------------------------------------------
452//
453// flags
454//
455//---------------------------------------------------------------------
456uint32_t RegexPattern::flags() const {
457 return fFlags;
458}
459
460
461//---------------------------------------------------------------------
462//
463// matcher(UnicodeString, err)
464//
465//---------------------------------------------------------------------
466RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
467 UErrorCode &status) const {
468 RegexMatcher *retMatcher = matcher(status);
469 if (retMatcher != NULL) {
729e4ab9
A
470 retMatcher->fDeferredStatus = status;
471 retMatcher->reset(input);
472 }
473 return retMatcher;
474}
475
b75a7d8f
A
476
477//---------------------------------------------------------------------
478//
479// matcher(status)
480//
481//---------------------------------------------------------------------
482RegexMatcher *RegexPattern::matcher(UErrorCode &status) const {
483 RegexMatcher *retMatcher = NULL;
484
485 if (U_FAILURE(status)) {
486 return NULL;
487 }
488 if (U_FAILURE(fDeferredStatus)) {
489 status = fDeferredStatus;
490 return NULL;
491 }
492
46f4442e 493 retMatcher = new RegexMatcher(this);
b75a7d8f
A
494 if (retMatcher == NULL) {
495 status = U_MEMORY_ALLOCATION_ERROR;
496 return NULL;
497 }
498 return retMatcher;
73c04bcf 499}
b75a7d8f
A
500
501
502
503//---------------------------------------------------------------------
504//
505// matches Convenience function to test for a match, starting
506// with a pattern string and a data string.
507//
508//---------------------------------------------------------------------
374ca955 509UBool U_EXPORT2 RegexPattern::matches(const UnicodeString &regex,
b75a7d8f
A
510 const UnicodeString &input,
511 UParseError &pe,
512 UErrorCode &status) {
513
514 if (U_FAILURE(status)) {return FALSE;}
515
516 UBool retVal;
517 RegexPattern *pat = NULL;
518 RegexMatcher *matcher = NULL;
519
520 pat = RegexPattern::compile(regex, 0, pe, status);
521 matcher = pat->matcher(input, status);
522 retVal = matcher->matches(status);
523
524 delete matcher;
525 delete pat;
526 return retVal;
527}
528
529
729e4ab9
A
530//
531// matches, UText mode
532//
533UBool U_EXPORT2 RegexPattern::matches(UText *regex,
534 UText *input,
535 UParseError &pe,
536 UErrorCode &status) {
537
538 if (U_FAILURE(status)) {return FALSE;}
539
4388f060 540 UBool retVal = FALSE;
729e4ab9
A
541 RegexPattern *pat = NULL;
542 RegexMatcher *matcher = NULL;
543
544 pat = RegexPattern::compile(regex, 0, pe, status);
4388f060
A
545 matcher = pat->matcher(status);
546 if (U_SUCCESS(status)) {
547 matcher->reset(input);
548 retVal = matcher->matches(status);
549 }
729e4ab9
A
550
551 delete matcher;
552 delete pat;
553 return retVal;
554}
555
556
557
b75a7d8f
A
558
559
560//---------------------------------------------------------------------
561//
562// pattern
563//
564//---------------------------------------------------------------------
565UnicodeString RegexPattern::pattern() const {
729e4ab9
A
566 if (fPatternString != NULL) {
567 return *fPatternString;
568 } else if (fPattern == NULL) {
569 return UnicodeString();
570 } else {
571 UErrorCode status = U_ZERO_ERROR;
572 int64_t nativeLen = utext_nativeLength(fPattern);
573 int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
574 UnicodeString result;
57a6839d 575
729e4ab9
A
576 status = U_ZERO_ERROR;
577 UChar *resultChars = result.getBuffer(len16);
578 utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
579 result.releaseBuffer(len16);
57a6839d 580
729e4ab9
A
581 return result;
582 }
b75a7d8f
A
583}
584
585
586
587
729e4ab9
A
588//---------------------------------------------------------------------
589//
590// patternText
591//
592//---------------------------------------------------------------------
593UText *RegexPattern::patternText(UErrorCode &status) const {
594 if (U_FAILURE(status)) {return NULL;}
595 status = U_ZERO_ERROR;
596
597 if (fPattern != NULL) {
598 return fPattern;
599 } else {
600 RegexStaticSets::initGlobals(&status);
601 return RegexStaticSets::gStaticSets->fEmptyText;
602 }
603}
604
605
b331163b
A
606//--------------------------------------------------------------------------------
607//
608// groupNumberFromName()
609//
610//--------------------------------------------------------------------------------
611int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const {
612 if (U_FAILURE(status)) {
613 return 0;
614 }
615
616 // No need to explicitly check for syntactically valid names.
617 // Invalid ones will never be in the map, and the lookup will fail.
618
619 int32_t number = uhash_geti(fNamedCaptureMap, &groupName);
620 if (number == 0) {
621 status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
622 }
623 return number;
624}
625
626int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const {
627 if (U_FAILURE(status)) {
628 return 0;
629 }
630 UnicodeString name(groupName, nameLength, US_INV);
631 return groupNumberFromName(name, status);
632}
633
729e4ab9 634
b75a7d8f
A
635//---------------------------------------------------------------------
636//
637// split
638//
639//---------------------------------------------------------------------
640int32_t RegexPattern::split(const UnicodeString &input,
641 UnicodeString dest[],
642 int32_t destCapacity,
729e4ab9
A
643 UErrorCode &status) const
644{
645 if (U_FAILURE(status)) {
646 return 0;
647 };
648
649 RegexMatcher m(this);
650 int32_t r = 0;
651 // Check m's status to make sure all is ok.
652 if (U_SUCCESS(m.fDeferredStatus)) {
653 r = m.split(input, dest, destCapacity, status);
654 }
655 return r;
656}
657
658//
659// split, UText mode
660//
661int32_t RegexPattern::split(UText *input,
662 UText *dest[],
663 int32_t destCapacity,
664 UErrorCode &status) const
b75a7d8f
A
665{
666 if (U_FAILURE(status)) {
667 return 0;
668 };
669
670 RegexMatcher m(this);
46f4442e
A
671 int32_t r = 0;
672 // Check m's status to make sure all is ok.
673 if (U_SUCCESS(m.fDeferredStatus)) {
674 r = m.split(input, dest, destCapacity, status);
675 }
b75a7d8f
A
676 return r;
677}
678
679
b75a7d8f
A
680//---------------------------------------------------------------------
681//
682// dump Output the compiled form of the pattern.
683// Debugging function only.
684//
685//---------------------------------------------------------------------
374ca955 686void RegexPattern::dumpOp(int32_t index) const {
57a6839d
A
687 (void)index; // Suppress warnings in non-debug build.
688#if defined(REGEX_DEBUG)
b75a7d8f
A
689 static const char * const opNames[] = {URX_OPCODE_NAMES};
690 int32_t op = fCompiledPat->elementAti(index);
691 int32_t val = URX_VAL(op);
692 int32_t type = URX_TYPE(op);
693 int32_t pinnedType = type;
2ca993e8 694 if ((uint32_t)pinnedType >= UPRV_LENGTHOF(opNames)) {
b75a7d8f
A
695 pinnedType = 0;
696 }
46f4442e 697
57a6839d 698 printf("%4d %08x %-15s ", index, op, opNames[pinnedType]);
b75a7d8f
A
699 switch (type) {
700 case URX_NOP:
701 case URX_DOTANY:
702 case URX_DOTANY_ALL:
b75a7d8f
A
703 case URX_FAIL:
704 case URX_CARET:
705 case URX_DOLLAR:
706 case URX_BACKSLASH_G:
707 case URX_BACKSLASH_X:
708 case URX_END:
709 case URX_DOLLAR_M:
710 case URX_CARET_M:
711 // Types with no operand field of interest.
712 break;
46f4442e 713
b75a7d8f
A
714 case URX_RESERVED_OP:
715 case URX_START_CAPTURE:
716 case URX_END_CAPTURE:
717 case URX_STATE_SAVE:
718 case URX_JMP:
719 case URX_JMP_SAV:
720 case URX_JMP_SAV_X:
721 case URX_BACKSLASH_B:
374ca955 722 case URX_BACKSLASH_BU:
b75a7d8f
A
723 case URX_BACKSLASH_D:
724 case URX_BACKSLASH_Z:
725 case URX_STRING_LEN:
726 case URX_CTR_INIT:
727 case URX_CTR_INIT_NG:
728 case URX_CTR_LOOP:
729 case URX_CTR_LOOP_NG:
730 case URX_RELOC_OPRND:
731 case URX_STO_SP:
732 case URX_LD_SP:
733 case URX_BACKREF:
734 case URX_STO_INP_LOC:
735 case URX_JMPX:
736 case URX_LA_START:
737 case URX_LA_END:
738 case URX_BACKREF_I:
739 case URX_LB_START:
740 case URX_LB_CONT:
741 case URX_LB_END:
742 case URX_LBN_CONT:
743 case URX_LBN_END:
744 case URX_LOOP_C:
745 case URX_LOOP_DOT_I:
b331163b
A
746 case URX_BACKSLASH_H:
747 case URX_BACKSLASH_R:
748 case URX_BACKSLASH_V:
b75a7d8f 749 // types with an integer operand field.
57a6839d 750 printf("%d", val);
b75a7d8f 751 break;
46f4442e 752
b75a7d8f
A
753 case URX_ONECHAR:
754 case URX_ONECHAR_I:
2ca993e8
A
755 if (val < 0x20) {
756 printf("%#x", val);
757 } else {
758 printf("'%s'", CStr(UnicodeString(val))());
759 }
b75a7d8f 760 break;
46f4442e 761
b75a7d8f
A
762 case URX_STRING:
763 case URX_STRING_I:
764 {
765 int32_t lengthOp = fCompiledPat->elementAti(index+1);
766 U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
767 int32_t length = URX_VAL(lengthOp);
2ca993e8
A
768 UnicodeString str(fLiteralText, val, length);
769 printf("%s", CStr(str)());
b75a7d8f
A
770 }
771 break;
772
773 case URX_SETREF:
774 case URX_LOOP_SR_I:
775 {
776 UnicodeString s;
777 UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
778 set->toPattern(s, TRUE);
2ca993e8 779 printf("%s", CStr(s)());
b75a7d8f
A
780 }
781 break;
782
783 case URX_STATIC_SETREF:
784 case URX_STAT_SETREF_N:
785 {
786 UnicodeString s;
787 if (val & URX_NEG_SET) {
57a6839d 788 printf("NOT ");
b75a7d8f
A
789 val &= ~URX_NEG_SET;
790 }
791 UnicodeSet *set = fStaticSets[val];
792 set->toPattern(s, TRUE);
2ca993e8 793 printf("%s", CStr(s)());
b75a7d8f
A
794 }
795 break;
796
46f4442e 797
b75a7d8f 798 default:
57a6839d 799 printf("??????");
b75a7d8f
A
800 break;
801 }
57a6839d 802 printf("\n");
374ca955 803#endif
57a6839d 804}
b75a7d8f
A
805
806
57a6839d 807void RegexPattern::dumpPattern() const {
b75a7d8f
A
808#if defined(REGEX_DEBUG)
809 int index;
57a6839d 810
2ca993e8
A
811 UnicodeString patStr;
812 for (UChar32 c = utext_next32From(fPattern, 0); c != U_SENTINEL; c = utext_next32(fPattern)) {
813 patStr.append(c);
57a6839d 814 }
2ca993e8 815 printf("Original Pattern: \"%s\"\n", CStr(patStr)());
57a6839d
A
816 printf(" Min Match Length: %d\n", fMinMatchLen);
817 printf(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType));
818 if (fStartType == START_STRING) {
2ca993e8
A
819 UnicodeString initialString(fLiteralText,fInitialStringIdx, fInitialStringLen);
820 printf(" Initial match string: \"%s\"\n", CStr(initialString)());
57a6839d 821 } else if (fStartType == START_SET) {
2ca993e8
A
822 UnicodeString s;
823 fInitialChars->toPattern(s, TRUE);
824 printf(" Match First Chars: %s\n", CStr(s)());
b75a7d8f 825
57a6839d 826 } else if (fStartType == START_CHAR) {
2ca993e8
A
827 printf(" First char of Match: ");
828 if (fInitialChar > 0x20) {
829 printf("'%s'\n", CStr(UnicodeString(fInitialChar))());
b75a7d8f 830 } else {
57a6839d 831 printf("%#x\n", fInitialChar);
b75a7d8f
A
832 }
833 }
834
b331163b
A
835 printf("Named Capture Groups:\n");
836 if (uhash_count(fNamedCaptureMap) == 0) {
837 printf(" None\n");
838 } else {
839 int32_t pos = UHASH_FIRST;
840 const UHashElement *el = NULL;
841 while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
842 const UnicodeString *name = (const UnicodeString *)el->key.pointer;
b331163b 843 int32_t number = el->value.integer;
2ca993e8 844 printf(" %d\t%s\n", number, CStr(*name)());
b331163b
A
845 }
846 }
847
57a6839d
A
848 printf("\nIndex Binary Type Operand\n" \
849 "-------------------------------------------\n");
850 for (index = 0; index<fCompiledPat->size(); index++) {
851 dumpOp(index);
b75a7d8f 852 }
57a6839d 853 printf("\n\n");
374ca955 854#endif
57a6839d 855}
b75a7d8f
A
856
857
858
374ca955 859UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
b75a7d8f
A
860
861U_NAMESPACE_END
862#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS