]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/repattrn.cpp
ICU-64252.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / repattrn.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f 3//
46f4442e 4// file: repattrn.cpp
b75a7d8f
A
5//
6/*
7***************************************************************************
2ca993e8
A
8* Copyright (C) 2002-2016 International Business Machines Corporation
9* and others. All rights reserved.
b75a7d8f
A
10***************************************************************************
11*/
12
13#include "unicode/utypes.h"
14
15#if !UCONFIG_NO_REGULAR_EXPRESSIONS
16
17#include "unicode/regex.h"
374ca955 18#include "unicode/uclean.h"
2ca993e8
A
19#include "cmemory.h"
20#include "cstr.h"
b75a7d8f 21#include "uassert.h"
b331163b 22#include "uhash.h"
b75a7d8f
A
23#include "uvector.h"
24#include "uvectr32.h"
729e4ab9 25#include "uvectr64.h"
b75a7d8f
A
26#include "regexcmp.h"
27#include "regeximp.h"
28#include "regexst.h"
29
30U_NAMESPACE_BEGIN
31
32//--------------------------------------------------------------------------
33//
34// RegexPattern Default Constructor
35//
36//--------------------------------------------------------------------------
37RegexPattern::RegexPattern() {
38 // Init all of this instances data.
39 init();
73c04bcf 40}
b75a7d8f
A
41
42
43//--------------------------------------------------------------------------
44//
45// Copy Constructor Note: This is a rather inefficient implementation,
46// but it probably doesn't matter.
47//
48//--------------------------------------------------------------------------
49RegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) {
46f4442e 50 init();
b75a7d8f
A
51 *this = other;
52}
53
54
55
56//--------------------------------------------------------------------------
57//
729e4ab9 58// Assignment Operator
b75a7d8f
A
59//
60//--------------------------------------------------------------------------
61RegexPattern &RegexPattern::operator = (const RegexPattern &other) {
62 if (this == &other) {
63 // Source and destination are the same. Don't do anything.
64 return *this;
65 }
66
67 // Clean out any previous contents of object being assigned to.
68 zap();
69
70 // Give target object a default initialization
71 init();
72
73 // Copy simple fields
b331163b
A
74 fDeferredStatus = other.fDeferredStatus;
75
76 if (U_FAILURE(fDeferredStatus)) {
77 return *this;
78 }
79
80 if (other.fPatternString == NULL) {
729e4ab9 81 fPatternString = NULL;
b331163b 82 fPattern = utext_clone(fPattern, other.fPattern, FALSE, TRUE, &fDeferredStatus);
729e4ab9
A
83 } else {
84 fPatternString = new UnicodeString(*(other.fPatternString));
b331163b 85 if (fPatternString == NULL) {
729e4ab9 86 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
b331163b
A
87 } else {
88 fPattern = utext_openConstUnicodeString(NULL, fPatternString, &fDeferredStatus);
729e4ab9
A
89 }
90 }
b331163b
A
91 if (U_FAILURE(fDeferredStatus)) {
92 return *this;
93 }
94
b75a7d8f
A
95 fFlags = other.fFlags;
96 fLiteralText = other.fLiteralText;
b75a7d8f 97 fMinMatchLen = other.fMinMatchLen;
374ca955
A
98 fFrameSize = other.fFrameSize;
99 fDataSize = other.fDataSize;
46f4442e 100 fStaticSets = other.fStaticSets;
374ca955 101 fStaticSets8 = other.fStaticSets8;
46f4442e 102
b75a7d8f
A
103 fStartType = other.fStartType;
104 fInitialStringIdx = other.fInitialStringIdx;
105 fInitialStringLen = other.fInitialStringLen;
106 *fInitialChars = *other.fInitialChars;
b75a7d8f 107 fInitialChar = other.fInitialChar;
374ca955 108 *fInitialChars8 = *other.fInitialChars8;
729e4ab9 109 fNeedsAltInput = other.fNeedsAltInput;
b75a7d8f
A
110
111 // Copy the pattern. It's just values, nothing deep to copy.
112 fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus);
113 fGroupMap->assign(*other.fGroupMap, fDeferredStatus);
114
46f4442e 115 // Copy the Unicode Sets.
b75a7d8f 116 // Could be made more efficient if the sets were reference counted and shared,
46f4442e 117 // but I doubt that pattern copying will be particularly common.
b75a7d8f
A
118 // Note: init() already added an empty element zero to fSets
119 int32_t i;
120 int32_t numSets = other.fSets->size();
121 fSets8 = new Regex8BitSet[numSets];
46f4442e
A
122 if (fSets8 == NULL) {
123 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
124 return *this;
125 }
b75a7d8f
A
126 for (i=1; i<numSets; i++) {
127 if (U_FAILURE(fDeferredStatus)) {
128 return *this;
129 }
130 UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i);
131 UnicodeSet *newSet = new UnicodeSet(*sourceSet);
132 if (newSet == NULL) {
133 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
134 break;
135 }
136 fSets->addElement(newSet, fDeferredStatus);
137 fSets8[i] = other.fSets8[i];
138 }
139
b331163b
A
140 // Copy the named capture group hash map.
141 int32_t hashPos = UHASH_FIRST;
142 while (const UHashElement *hashEl = uhash_nextElement(other.fNamedCaptureMap, &hashPos)) {
143 if (U_FAILURE(fDeferredStatus)) {
144 break;
145 }
146 const UnicodeString *name = (const UnicodeString *)hashEl->key.pointer;
147 UnicodeString *key = new UnicodeString(*name);
148 int32_t val = hashEl->value.integer;
149 if (key == NULL) {
150 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
151 } else {
152 uhash_puti(fNamedCaptureMap, key, val, &fDeferredStatus);
153 }
154 }
b75a7d8f
A
155 return *this;
156}
157
158
159//--------------------------------------------------------------------------
160//
161// init Shared initialization for use by constructors.
162// Bring an uninitialized RegexPattern up to a default state.
163//
164//--------------------------------------------------------------------------
165void RegexPattern::init() {
166 fFlags = 0;
374ca955
A
167 fCompiledPat = 0;
168 fLiteralText.remove();
169 fSets = NULL;
170 fSets8 = NULL;
b75a7d8f
A
171 fDeferredStatus = U_ZERO_ERROR;
172 fMinMatchLen = 0;
b75a7d8f
A
173 fFrameSize = 0;
174 fDataSize = 0;
374ca955 175 fGroupMap = NULL;
374ca955
A
176 fStaticSets = NULL;
177 fStaticSets8 = NULL;
b75a7d8f
A
178 fStartType = START_NO_INFO;
179 fInitialStringIdx = 0;
180 fInitialStringLen = 0;
181 fInitialChars = NULL;
b75a7d8f 182 fInitialChar = 0;
374ca955 183 fInitialChars8 = NULL;
729e4ab9 184 fNeedsAltInput = FALSE;
b331163b 185 fNamedCaptureMap = NULL;
46f4442e 186
729e4ab9
A
187 fPattern = NULL; // will be set later
188 fPatternString = NULL; // may be set later
189 fCompiledPat = new UVector64(fDeferredStatus);
b75a7d8f
A
190 fGroupMap = new UVector32(fDeferredStatus);
191 fSets = new UVector(fDeferredStatus);
192 fInitialChars = new UnicodeSet;
193 fInitialChars8 = new Regex8BitSet;
b331163b
A
194 fNamedCaptureMap = uhash_open(uhash_hashUnicodeString, // Key hash function
195 uhash_compareUnicodeString, // Key comparator function
196 uhash_compareLong, // Value comparator function
197 &fDeferredStatus);
b75a7d8f
A
198 if (U_FAILURE(fDeferredStatus)) {
199 return;
200 }
201 if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL ||
b331163b 202 fInitialChars == NULL || fInitialChars8 == NULL || fNamedCaptureMap == NULL) {
b75a7d8f
A
203 fDeferredStatus = U_MEMORY_ALLOCATION_ERROR;
204 return;
205 }
206
207 // Slot zero of the vector of sets is reserved. Fill it here.
208 fSets->addElement((int32_t)0, fDeferredStatus);
b331163b
A
209
210 // fNamedCaptureMap owns its key strings, type (UnicodeString *)
211 uhash_setKeyDeleter(fNamedCaptureMap, uprv_deleteUObject);
b75a7d8f
A
212}
213
214
215//--------------------------------------------------------------------------
216//
46f4442e 217// zap Delete everything owned by this RegexPattern.
b75a7d8f
A
218//
219//--------------------------------------------------------------------------
220void RegexPattern::zap() {
221 delete fCompiledPat;
222 fCompiledPat = NULL;
223 int i;
224 for (i=1; i<fSets->size(); i++) {
225 UnicodeSet *s;
226 s = (UnicodeSet *)fSets->elementAt(i);
227 if (s != NULL) {
228 delete s;
229 }
230 }
231 delete fSets;
232 fSets = NULL;
374ca955
A
233 delete[] fSets8;
234 fSets8 = NULL;
b75a7d8f
A
235 delete fGroupMap;
236 fGroupMap = NULL;
237 delete fInitialChars;
238 fInitialChars = NULL;
239 delete fInitialChars8;
240 fInitialChars8 = NULL;
729e4ab9
A
241 if (fPattern != NULL) {
242 utext_close(fPattern);
243 fPattern = NULL;
244 }
245 if (fPatternString != NULL) {
246 delete fPatternString;
247 fPatternString = NULL;
248 }
b331163b
A
249 uhash_close(fNamedCaptureMap);
250 fNamedCaptureMap = NULL;
b75a7d8f
A
251}
252
253
254//--------------------------------------------------------------------------
255//
256// Destructor
257//
258//--------------------------------------------------------------------------
259RegexPattern::~RegexPattern() {
260 zap();
73c04bcf 261}
b75a7d8f
A
262
263
264//--------------------------------------------------------------------------
265//
266// Clone
267//
268//--------------------------------------------------------------------------
46f4442e 269RegexPattern *RegexPattern::clone() const {
b75a7d8f
A
270 RegexPattern *copy = new RegexPattern(*this);
271 return copy;
73c04bcf 272}
b75a7d8f
A
273
274
275//--------------------------------------------------------------------------
276//
277// operator == (comparison) Consider to patterns to be == if the
278// pattern strings and the flags are the same.
729e4ab9
A
279// Note that pattern strings with the same
280// characters can still be considered different.
b75a7d8f
A
281//
282//--------------------------------------------------------------------------
283UBool RegexPattern::operator ==(const RegexPattern &other) const {
729e4ab9
A
284 if (this->fFlags == other.fFlags && this->fDeferredStatus == other.fDeferredStatus) {
285 if (this->fPatternString != NULL && other.fPatternString != NULL) {
286 return *(this->fPatternString) == *(other.fPatternString);
287 } else if (this->fPattern == NULL) {
288 if (other.fPattern == NULL) {
289 return TRUE;
290 }
291 } else if (other.fPattern != NULL) {
292 UTEXT_SETNATIVEINDEX(this->fPattern, 0);
293 UTEXT_SETNATIVEINDEX(other.fPattern, 0);
294 return utext_equals(this->fPattern, other.fPattern);
295 }
296 }
297 return FALSE;
b75a7d8f
A
298}
299
300//---------------------------------------------------------------------
301//
46f4442e 302// compile
b75a7d8f
A
303//
304//---------------------------------------------------------------------
374ca955
A
305RegexPattern * U_EXPORT2
306RegexPattern::compile(const UnicodeString &regex,
307 uint32_t flags,
308 UParseError &pe,
309 UErrorCode &status)
310{
729e4ab9
A
311 if (U_FAILURE(status)) {
312 return NULL;
313 }
57a6839d 314
729e4ab9
A
315 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
316 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
317 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
57a6839d 318
729e4ab9
A
319 if ((flags & ~allFlags) != 0) {
320 status = U_REGEX_INVALID_FLAG;
321 return NULL;
322 }
57a6839d 323
4388f060 324 if ((flags & UREGEX_CANON_EQ) != 0) {
729e4ab9
A
325 status = U_REGEX_UNIMPLEMENTED;
326 return NULL;
327 }
57a6839d 328
729e4ab9
A
329 RegexPattern *This = new RegexPattern;
330 if (This == NULL) {
331 status = U_MEMORY_ALLOCATION_ERROR;
332 return NULL;
333 }
334 if (U_FAILURE(This->fDeferredStatus)) {
335 status = This->fDeferredStatus;
336 delete This;
337 return NULL;
338 }
339 This->fFlags = flags;
57a6839d 340
729e4ab9
A
341 RegexCompile compiler(This, status);
342 compiler.compile(regex, pe, status);
57a6839d 343
729e4ab9
A
344 if (U_FAILURE(status)) {
345 delete This;
346 This = NULL;
347 }
57a6839d 348
729e4ab9
A
349 return This;
350}
351
b75a7d8f 352
729e4ab9
A
353//
354// compile, UText mode
355//
356RegexPattern * U_EXPORT2
357RegexPattern::compile(UText *regex,
358 uint32_t flags,
359 UParseError &pe,
360 UErrorCode &status)
361{
b75a7d8f
A
362 if (U_FAILURE(status)) {
363 return NULL;
364 }
365
366 const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS |
46f4442e 367 UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD |
729e4ab9 368 UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES | UREGEX_LITERAL;
b75a7d8f
A
369
370 if ((flags & ~allFlags) != 0) {
371 status = U_REGEX_INVALID_FLAG;
372 return NULL;
373 }
374
4388f060 375 if ((flags & UREGEX_CANON_EQ) != 0) {
b75a7d8f
A
376 status = U_REGEX_UNIMPLEMENTED;
377 return NULL;
378 }
379
380 RegexPattern *This = new RegexPattern;
381 if (This == NULL) {
382 status = U_MEMORY_ALLOCATION_ERROR;
383 return NULL;
384 }
385 if (U_FAILURE(This->fDeferredStatus)) {
386 status = This->fDeferredStatus;
46f4442e 387 delete This;
b75a7d8f
A
388 return NULL;
389 }
390 This->fFlags = flags;
391
392 RegexCompile compiler(This, status);
393 compiler.compile(regex, pe, status);
57a6839d 394
46f4442e
A
395 if (U_FAILURE(status)) {
396 delete This;
397 This = NULL;
398 }
b75a7d8f
A
399
400 return This;
73c04bcf 401}
46f4442e 402
b75a7d8f
A
403//
404// compile with default flags.
405//
374ca955
A
406RegexPattern * U_EXPORT2
407RegexPattern::compile(const UnicodeString &regex,
408 UParseError &pe,
46f4442e 409 UErrorCode &err)
b75a7d8f 410{
46f4442e 411 return compile(regex, 0, pe, err);
b75a7d8f
A
412}
413
414
729e4ab9
A
415//
416// compile with default flags, UText mode
417//
418RegexPattern * U_EXPORT2
419RegexPattern::compile(UText *regex,
420 UParseError &pe,
421 UErrorCode &err)
422{
423 return compile(regex, 0, pe, err);
424}
425
b75a7d8f
A
426
427//
428// compile with no UParseErr parameter.
429//
374ca955 430RegexPattern * U_EXPORT2
729e4ab9
A
431RegexPattern::compile(const UnicodeString &regex,
432 uint32_t flags,
433 UErrorCode &err)
b75a7d8f
A
434{
435 UParseError pe;
46f4442e 436 return compile(regex, flags, pe, err);
b75a7d8f
A
437}
438
439
729e4ab9
A
440//
441// compile with no UParseErr parameter, UText mode
442//
443RegexPattern * U_EXPORT2
444RegexPattern::compile(UText *regex,
445 uint32_t flags,
446 UErrorCode &err)
447{
448 UParseError pe;
449 return compile(regex, flags, pe, err);
450}
451
b75a7d8f
A
452
453//---------------------------------------------------------------------
454//
455// flags
456//
457//---------------------------------------------------------------------
458uint32_t RegexPattern::flags() const {
459 return fFlags;
460}
461
462
463//---------------------------------------------------------------------
464//
465// matcher(UnicodeString, err)
466//
467//---------------------------------------------------------------------
468RegexMatcher *RegexPattern::matcher(const UnicodeString &input,
469 UErrorCode &status) const {
470 RegexMatcher *retMatcher = matcher(status);
471 if (retMatcher != NULL) {
729e4ab9
A
472 retMatcher->fDeferredStatus = status;
473 retMatcher->reset(input);
474 }
475 return retMatcher;
476}
477
b75a7d8f
A
478
479//---------------------------------------------------------------------
480//
481// matcher(status)
482//
483//---------------------------------------------------------------------
484RegexMatcher *RegexPattern::matcher(UErrorCode &status) const {
485 RegexMatcher *retMatcher = NULL;
486
487 if (U_FAILURE(status)) {
488 return NULL;
489 }
490 if (U_FAILURE(fDeferredStatus)) {
491 status = fDeferredStatus;
492 return NULL;
493 }
494
46f4442e 495 retMatcher = new RegexMatcher(this);
b75a7d8f
A
496 if (retMatcher == NULL) {
497 status = U_MEMORY_ALLOCATION_ERROR;
498 return NULL;
499 }
500 return retMatcher;
73c04bcf 501}
b75a7d8f
A
502
503
504
505//---------------------------------------------------------------------
506//
507// matches Convenience function to test for a match, starting
508// with a pattern string and a data string.
509//
510//---------------------------------------------------------------------
374ca955 511UBool U_EXPORT2 RegexPattern::matches(const UnicodeString &regex,
b75a7d8f
A
512 const UnicodeString &input,
513 UParseError &pe,
514 UErrorCode &status) {
515
516 if (U_FAILURE(status)) {return FALSE;}
517
518 UBool retVal;
519 RegexPattern *pat = NULL;
520 RegexMatcher *matcher = NULL;
521
522 pat = RegexPattern::compile(regex, 0, pe, status);
523 matcher = pat->matcher(input, status);
524 retVal = matcher->matches(status);
525
526 delete matcher;
527 delete pat;
528 return retVal;
529}
530
531
729e4ab9
A
532//
533// matches, UText mode
534//
535UBool U_EXPORT2 RegexPattern::matches(UText *regex,
536 UText *input,
537 UParseError &pe,
538 UErrorCode &status) {
539
540 if (U_FAILURE(status)) {return FALSE;}
541
4388f060 542 UBool retVal = FALSE;
729e4ab9
A
543 RegexPattern *pat = NULL;
544 RegexMatcher *matcher = NULL;
545
546 pat = RegexPattern::compile(regex, 0, pe, status);
4388f060
A
547 matcher = pat->matcher(status);
548 if (U_SUCCESS(status)) {
549 matcher->reset(input);
550 retVal = matcher->matches(status);
551 }
729e4ab9
A
552
553 delete matcher;
554 delete pat;
555 return retVal;
556}
557
558
559
b75a7d8f
A
560
561
562//---------------------------------------------------------------------
563//
564// pattern
565//
566//---------------------------------------------------------------------
567UnicodeString RegexPattern::pattern() const {
729e4ab9
A
568 if (fPatternString != NULL) {
569 return *fPatternString;
570 } else if (fPattern == NULL) {
571 return UnicodeString();
572 } else {
573 UErrorCode status = U_ZERO_ERROR;
574 int64_t nativeLen = utext_nativeLength(fPattern);
575 int32_t len16 = utext_extract(fPattern, 0, nativeLen, NULL, 0, &status); // buffer overflow error
576 UnicodeString result;
57a6839d 577
729e4ab9
A
578 status = U_ZERO_ERROR;
579 UChar *resultChars = result.getBuffer(len16);
580 utext_extract(fPattern, 0, nativeLen, resultChars, len16, &status); // unterminated warning
581 result.releaseBuffer(len16);
57a6839d 582
729e4ab9
A
583 return result;
584 }
b75a7d8f
A
585}
586
587
588
589
729e4ab9
A
590//---------------------------------------------------------------------
591//
592// patternText
593//
594//---------------------------------------------------------------------
595UText *RegexPattern::patternText(UErrorCode &status) const {
596 if (U_FAILURE(status)) {return NULL;}
597 status = U_ZERO_ERROR;
598
599 if (fPattern != NULL) {
600 return fPattern;
601 } else {
602 RegexStaticSets::initGlobals(&status);
603 return RegexStaticSets::gStaticSets->fEmptyText;
604 }
605}
606
607
b331163b
A
608//--------------------------------------------------------------------------------
609//
610// groupNumberFromName()
611//
612//--------------------------------------------------------------------------------
613int32_t RegexPattern::groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const {
614 if (U_FAILURE(status)) {
615 return 0;
616 }
617
618 // No need to explicitly check for syntactically valid names.
619 // Invalid ones will never be in the map, and the lookup will fail.
620
621 int32_t number = uhash_geti(fNamedCaptureMap, &groupName);
622 if (number == 0) {
623 status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
624 }
625 return number;
626}
627
628int32_t RegexPattern::groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const {
629 if (U_FAILURE(status)) {
630 return 0;
631 }
632 UnicodeString name(groupName, nameLength, US_INV);
633 return groupNumberFromName(name, status);
634}
635
729e4ab9 636
b75a7d8f
A
637//---------------------------------------------------------------------
638//
639// split
640//
641//---------------------------------------------------------------------
642int32_t RegexPattern::split(const UnicodeString &input,
643 UnicodeString dest[],
644 int32_t destCapacity,
729e4ab9
A
645 UErrorCode &status) const
646{
647 if (U_FAILURE(status)) {
648 return 0;
649 };
650
651 RegexMatcher m(this);
652 int32_t r = 0;
653 // Check m's status to make sure all is ok.
654 if (U_SUCCESS(m.fDeferredStatus)) {
655 r = m.split(input, dest, destCapacity, status);
656 }
657 return r;
658}
659
660//
661// split, UText mode
662//
663int32_t RegexPattern::split(UText *input,
664 UText *dest[],
665 int32_t destCapacity,
666 UErrorCode &status) const
b75a7d8f
A
667{
668 if (U_FAILURE(status)) {
669 return 0;
670 };
671
672 RegexMatcher m(this);
46f4442e
A
673 int32_t r = 0;
674 // Check m's status to make sure all is ok.
675 if (U_SUCCESS(m.fDeferredStatus)) {
676 r = m.split(input, dest, destCapacity, status);
677 }
b75a7d8f
A
678 return r;
679}
680
681
b75a7d8f
A
682//---------------------------------------------------------------------
683//
684// dump Output the compiled form of the pattern.
685// Debugging function only.
686//
687//---------------------------------------------------------------------
374ca955 688void RegexPattern::dumpOp(int32_t index) const {
57a6839d
A
689 (void)index; // Suppress warnings in non-debug build.
690#if defined(REGEX_DEBUG)
b75a7d8f
A
691 static const char * const opNames[] = {URX_OPCODE_NAMES};
692 int32_t op = fCompiledPat->elementAti(index);
693 int32_t val = URX_VAL(op);
694 int32_t type = URX_TYPE(op);
695 int32_t pinnedType = type;
2ca993e8 696 if ((uint32_t)pinnedType >= UPRV_LENGTHOF(opNames)) {
b75a7d8f
A
697 pinnedType = 0;
698 }
46f4442e 699
57a6839d 700 printf("%4d %08x %-15s ", index, op, opNames[pinnedType]);
b75a7d8f
A
701 switch (type) {
702 case URX_NOP:
703 case URX_DOTANY:
704 case URX_DOTANY_ALL:
b75a7d8f
A
705 case URX_FAIL:
706 case URX_CARET:
707 case URX_DOLLAR:
708 case URX_BACKSLASH_G:
709 case URX_BACKSLASH_X:
710 case URX_END:
711 case URX_DOLLAR_M:
712 case URX_CARET_M:
713 // Types with no operand field of interest.
714 break;
46f4442e 715
b75a7d8f
A
716 case URX_RESERVED_OP:
717 case URX_START_CAPTURE:
718 case URX_END_CAPTURE:
719 case URX_STATE_SAVE:
720 case URX_JMP:
721 case URX_JMP_SAV:
722 case URX_JMP_SAV_X:
723 case URX_BACKSLASH_B:
374ca955 724 case URX_BACKSLASH_BU:
b75a7d8f
A
725 case URX_BACKSLASH_D:
726 case URX_BACKSLASH_Z:
727 case URX_STRING_LEN:
728 case URX_CTR_INIT:
729 case URX_CTR_INIT_NG:
730 case URX_CTR_LOOP:
731 case URX_CTR_LOOP_NG:
732 case URX_RELOC_OPRND:
733 case URX_STO_SP:
734 case URX_LD_SP:
735 case URX_BACKREF:
736 case URX_STO_INP_LOC:
737 case URX_JMPX:
738 case URX_LA_START:
739 case URX_LA_END:
740 case URX_BACKREF_I:
741 case URX_LB_START:
742 case URX_LB_CONT:
743 case URX_LB_END:
744 case URX_LBN_CONT:
745 case URX_LBN_END:
746 case URX_LOOP_C:
747 case URX_LOOP_DOT_I:
b331163b
A
748 case URX_BACKSLASH_H:
749 case URX_BACKSLASH_R:
750 case URX_BACKSLASH_V:
b75a7d8f 751 // types with an integer operand field.
57a6839d 752 printf("%d", val);
b75a7d8f 753 break;
46f4442e 754
b75a7d8f
A
755 case URX_ONECHAR:
756 case URX_ONECHAR_I:
2ca993e8
A
757 if (val < 0x20) {
758 printf("%#x", val);
759 } else {
760 printf("'%s'", CStr(UnicodeString(val))());
761 }
b75a7d8f 762 break;
46f4442e 763
b75a7d8f
A
764 case URX_STRING:
765 case URX_STRING_I:
766 {
767 int32_t lengthOp = fCompiledPat->elementAti(index+1);
768 U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN);
769 int32_t length = URX_VAL(lengthOp);
2ca993e8
A
770 UnicodeString str(fLiteralText, val, length);
771 printf("%s", CStr(str)());
b75a7d8f
A
772 }
773 break;
774
775 case URX_SETREF:
776 case URX_LOOP_SR_I:
777 {
778 UnicodeString s;
779 UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val);
780 set->toPattern(s, TRUE);
2ca993e8 781 printf("%s", CStr(s)());
b75a7d8f
A
782 }
783 break;
784
785 case URX_STATIC_SETREF:
786 case URX_STAT_SETREF_N:
787 {
788 UnicodeString s;
789 if (val & URX_NEG_SET) {
57a6839d 790 printf("NOT ");
b75a7d8f
A
791 val &= ~URX_NEG_SET;
792 }
793 UnicodeSet *set = fStaticSets[val];
794 set->toPattern(s, TRUE);
2ca993e8 795 printf("%s", CStr(s)());
b75a7d8f
A
796 }
797 break;
798
46f4442e 799
b75a7d8f 800 default:
57a6839d 801 printf("??????");
b75a7d8f
A
802 break;
803 }
57a6839d 804 printf("\n");
374ca955 805#endif
57a6839d 806}
b75a7d8f
A
807
808
57a6839d 809void RegexPattern::dumpPattern() const {
b75a7d8f
A
810#if defined(REGEX_DEBUG)
811 int index;
57a6839d 812
2ca993e8
A
813 UnicodeString patStr;
814 for (UChar32 c = utext_next32From(fPattern, 0); c != U_SENTINEL; c = utext_next32(fPattern)) {
815 patStr.append(c);
57a6839d 816 }
2ca993e8 817 printf("Original Pattern: \"%s\"\n", CStr(patStr)());
57a6839d
A
818 printf(" Min Match Length: %d\n", fMinMatchLen);
819 printf(" Match Start Type: %s\n", START_OF_MATCH_STR(fStartType));
820 if (fStartType == START_STRING) {
2ca993e8
A
821 UnicodeString initialString(fLiteralText,fInitialStringIdx, fInitialStringLen);
822 printf(" Initial match string: \"%s\"\n", CStr(initialString)());
57a6839d 823 } else if (fStartType == START_SET) {
2ca993e8
A
824 UnicodeString s;
825 fInitialChars->toPattern(s, TRUE);
826 printf(" Match First Chars: %s\n", CStr(s)());
b75a7d8f 827
57a6839d 828 } else if (fStartType == START_CHAR) {
2ca993e8
A
829 printf(" First char of Match: ");
830 if (fInitialChar > 0x20) {
831 printf("'%s'\n", CStr(UnicodeString(fInitialChar))());
b75a7d8f 832 } else {
57a6839d 833 printf("%#x\n", fInitialChar);
b75a7d8f
A
834 }
835 }
836
b331163b
A
837 printf("Named Capture Groups:\n");
838 if (uhash_count(fNamedCaptureMap) == 0) {
839 printf(" None\n");
840 } else {
841 int32_t pos = UHASH_FIRST;
842 const UHashElement *el = NULL;
843 while ((el = uhash_nextElement(fNamedCaptureMap, &pos))) {
844 const UnicodeString *name = (const UnicodeString *)el->key.pointer;
b331163b 845 int32_t number = el->value.integer;
2ca993e8 846 printf(" %d\t%s\n", number, CStr(*name)());
b331163b
A
847 }
848 }
849
57a6839d
A
850 printf("\nIndex Binary Type Operand\n" \
851 "-------------------------------------------\n");
852 for (index = 0; index<fCompiledPat->size(); index++) {
853 dumpOp(index);
b75a7d8f 854 }
57a6839d 855 printf("\n\n");
374ca955 856#endif
57a6839d 857}
b75a7d8f
A
858
859
860
374ca955 861UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern)
b75a7d8f
A
862
863U_NAMESPACE_END
864#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS