]>
Commit | Line | Data |
---|---|---|
b75a7d8f | 1 | // |
46f4442e | 2 | // file: repattrn.cpp |
b75a7d8f A |
3 | // |
4 | /* | |
5 | *************************************************************************** | |
46f4442e | 6 | * Copyright (C) 2002-2008 International Business Machines Corporation * |
b75a7d8f A |
7 | * and others. All rights reserved. * |
8 | *************************************************************************** | |
9 | */ | |
10 | ||
11 | #include "unicode/utypes.h" | |
12 | ||
13 | #if !UCONFIG_NO_REGULAR_EXPRESSIONS | |
14 | ||
15 | #include "unicode/regex.h" | |
374ca955 | 16 | #include "unicode/uclean.h" |
b75a7d8f A |
17 | #include "uassert.h" |
18 | #include "uvector.h" | |
19 | #include "uvectr32.h" | |
20 | #include "regexcmp.h" | |
21 | #include "regeximp.h" | |
22 | #include "regexst.h" | |
23 | ||
24 | U_NAMESPACE_BEGIN | |
25 | ||
26 | //-------------------------------------------------------------------------- | |
27 | // | |
28 | // RegexPattern Default Constructor | |
29 | // | |
30 | //-------------------------------------------------------------------------- | |
31 | RegexPattern::RegexPattern() { | |
374ca955 A |
32 | UErrorCode status = U_ZERO_ERROR; |
33 | u_init(&status); | |
b75a7d8f A |
34 | // Init all of this instances data. |
35 | init(); | |
36 | ||
37 | // Lazy init of all shared global sets. | |
38 | RegexStaticSets::initGlobals(&fDeferredStatus); | |
73c04bcf | 39 | } |
b75a7d8f A |
40 | |
41 | ||
42 | //-------------------------------------------------------------------------- | |
43 | // | |
44 | // Copy Constructor Note: This is a rather inefficient implementation, | |
45 | // but it probably doesn't matter. | |
46 | // | |
47 | //-------------------------------------------------------------------------- | |
48 | RegexPattern::RegexPattern(const RegexPattern &other) : UObject(other) { | |
46f4442e | 49 | init(); |
b75a7d8f A |
50 | *this = other; |
51 | } | |
52 | ||
53 | ||
54 | ||
55 | //-------------------------------------------------------------------------- | |
56 | // | |
57 | // Assignmenet Operator | |
58 | // | |
59 | //-------------------------------------------------------------------------- | |
60 | RegexPattern &RegexPattern::operator = (const RegexPattern &other) { | |
61 | if (this == &other) { | |
62 | // Source and destination are the same. Don't do anything. | |
63 | return *this; | |
64 | } | |
65 | ||
66 | // Clean out any previous contents of object being assigned to. | |
67 | zap(); | |
68 | ||
69 | // Give target object a default initialization | |
70 | init(); | |
71 | ||
72 | // Copy simple fields | |
73 | fPattern = other.fPattern; | |
74 | fFlags = other.fFlags; | |
75 | fLiteralText = other.fLiteralText; | |
76 | fDeferredStatus = other.fDeferredStatus; | |
77 | fMinMatchLen = other.fMinMatchLen; | |
374ca955 A |
78 | fFrameSize = other.fFrameSize; |
79 | fDataSize = other.fDataSize; | |
b75a7d8f | 80 | fMaxCaptureDigits = other.fMaxCaptureDigits; |
46f4442e | 81 | fStaticSets = other.fStaticSets; |
374ca955 | 82 | fStaticSets8 = other.fStaticSets8; |
46f4442e | 83 | |
b75a7d8f A |
84 | fStartType = other.fStartType; |
85 | fInitialStringIdx = other.fInitialStringIdx; | |
86 | fInitialStringLen = other.fInitialStringLen; | |
87 | *fInitialChars = *other.fInitialChars; | |
b75a7d8f | 88 | fInitialChar = other.fInitialChar; |
374ca955 | 89 | *fInitialChars8 = *other.fInitialChars8; |
b75a7d8f A |
90 | |
91 | // Copy the pattern. It's just values, nothing deep to copy. | |
92 | fCompiledPat->assign(*other.fCompiledPat, fDeferredStatus); | |
93 | fGroupMap->assign(*other.fGroupMap, fDeferredStatus); | |
94 | ||
46f4442e | 95 | // Copy the Unicode Sets. |
b75a7d8f | 96 | // Could be made more efficient if the sets were reference counted and shared, |
46f4442e | 97 | // but I doubt that pattern copying will be particularly common. |
b75a7d8f A |
98 | // Note: init() already added an empty element zero to fSets |
99 | int32_t i; | |
100 | int32_t numSets = other.fSets->size(); | |
101 | fSets8 = new Regex8BitSet[numSets]; | |
46f4442e A |
102 | if (fSets8 == NULL) { |
103 | fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; | |
104 | return *this; | |
105 | } | |
b75a7d8f A |
106 | for (i=1; i<numSets; i++) { |
107 | if (U_FAILURE(fDeferredStatus)) { | |
108 | return *this; | |
109 | } | |
110 | UnicodeSet *sourceSet = (UnicodeSet *)other.fSets->elementAt(i); | |
111 | UnicodeSet *newSet = new UnicodeSet(*sourceSet); | |
112 | if (newSet == NULL) { | |
113 | fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; | |
114 | break; | |
115 | } | |
116 | fSets->addElement(newSet, fDeferredStatus); | |
117 | fSets8[i] = other.fSets8[i]; | |
118 | } | |
119 | ||
120 | return *this; | |
121 | } | |
122 | ||
123 | ||
124 | //-------------------------------------------------------------------------- | |
125 | // | |
126 | // init Shared initialization for use by constructors. | |
127 | // Bring an uninitialized RegexPattern up to a default state. | |
128 | // | |
129 | //-------------------------------------------------------------------------- | |
130 | void RegexPattern::init() { | |
374ca955 | 131 | fPattern.remove(); |
b75a7d8f | 132 | fFlags = 0; |
374ca955 A |
133 | fCompiledPat = 0; |
134 | fLiteralText.remove(); | |
135 | fSets = NULL; | |
136 | fSets8 = NULL; | |
b75a7d8f A |
137 | fDeferredStatus = U_ZERO_ERROR; |
138 | fMinMatchLen = 0; | |
b75a7d8f A |
139 | fFrameSize = 0; |
140 | fDataSize = 0; | |
374ca955 | 141 | fGroupMap = NULL; |
46f4442e | 142 | fMaxCaptureDigits = 1; |
374ca955 A |
143 | fStaticSets = NULL; |
144 | fStaticSets8 = NULL; | |
b75a7d8f A |
145 | fStartType = START_NO_INFO; |
146 | fInitialStringIdx = 0; | |
147 | fInitialStringLen = 0; | |
148 | fInitialChars = NULL; | |
b75a7d8f | 149 | fInitialChar = 0; |
374ca955 | 150 | fInitialChars8 = NULL; |
46f4442e | 151 | |
b75a7d8f A |
152 | fCompiledPat = new UVector32(fDeferredStatus); |
153 | fGroupMap = new UVector32(fDeferredStatus); | |
154 | fSets = new UVector(fDeferredStatus); | |
155 | fInitialChars = new UnicodeSet; | |
156 | fInitialChars8 = new Regex8BitSet; | |
157 | if (U_FAILURE(fDeferredStatus)) { | |
158 | return; | |
159 | } | |
160 | if (fCompiledPat == NULL || fGroupMap == NULL || fSets == NULL || | |
161 | fInitialChars == NULL || fInitialChars8 == NULL) { | |
162 | fDeferredStatus = U_MEMORY_ALLOCATION_ERROR; | |
163 | return; | |
164 | } | |
165 | ||
166 | // Slot zero of the vector of sets is reserved. Fill it here. | |
167 | fSets->addElement((int32_t)0, fDeferredStatus); | |
168 | } | |
169 | ||
170 | ||
171 | //-------------------------------------------------------------------------- | |
172 | // | |
46f4442e | 173 | // zap Delete everything owned by this RegexPattern. |
b75a7d8f A |
174 | // |
175 | //-------------------------------------------------------------------------- | |
176 | void RegexPattern::zap() { | |
177 | delete fCompiledPat; | |
178 | fCompiledPat = NULL; | |
179 | int i; | |
180 | for (i=1; i<fSets->size(); i++) { | |
181 | UnicodeSet *s; | |
182 | s = (UnicodeSet *)fSets->elementAt(i); | |
183 | if (s != NULL) { | |
184 | delete s; | |
185 | } | |
186 | } | |
187 | delete fSets; | |
188 | fSets = NULL; | |
374ca955 A |
189 | delete[] fSets8; |
190 | fSets8 = NULL; | |
b75a7d8f A |
191 | delete fGroupMap; |
192 | fGroupMap = NULL; | |
193 | delete fInitialChars; | |
194 | fInitialChars = NULL; | |
195 | delete fInitialChars8; | |
196 | fInitialChars8 = NULL; | |
b75a7d8f A |
197 | } |
198 | ||
199 | ||
200 | //-------------------------------------------------------------------------- | |
201 | // | |
202 | // Destructor | |
203 | // | |
204 | //-------------------------------------------------------------------------- | |
205 | RegexPattern::~RegexPattern() { | |
206 | zap(); | |
73c04bcf | 207 | } |
b75a7d8f A |
208 | |
209 | ||
210 | //-------------------------------------------------------------------------- | |
211 | // | |
212 | // Clone | |
213 | // | |
214 | //-------------------------------------------------------------------------- | |
46f4442e | 215 | RegexPattern *RegexPattern::clone() const { |
b75a7d8f A |
216 | RegexPattern *copy = new RegexPattern(*this); |
217 | return copy; | |
73c04bcf | 218 | } |
b75a7d8f A |
219 | |
220 | ||
221 | //-------------------------------------------------------------------------- | |
222 | // | |
223 | // operator == (comparison) Consider to patterns to be == if the | |
224 | // pattern strings and the flags are the same. | |
225 | // | |
226 | //-------------------------------------------------------------------------- | |
227 | UBool RegexPattern::operator ==(const RegexPattern &other) const { | |
228 | UBool r = this->fFlags == other.fFlags && | |
229 | this->fPattern == other.fPattern && | |
230 | this->fDeferredStatus == other.fDeferredStatus; | |
231 | return r; | |
232 | } | |
233 | ||
234 | //--------------------------------------------------------------------- | |
235 | // | |
46f4442e | 236 | // compile |
b75a7d8f A |
237 | // |
238 | //--------------------------------------------------------------------- | |
374ca955 A |
239 | RegexPattern * U_EXPORT2 |
240 | RegexPattern::compile(const UnicodeString ®ex, | |
241 | uint32_t flags, | |
242 | UParseError &pe, | |
243 | UErrorCode &status) | |
244 | { | |
b75a7d8f A |
245 | |
246 | if (U_FAILURE(status)) { | |
247 | return NULL; | |
248 | } | |
249 | ||
250 | const uint32_t allFlags = UREGEX_CANON_EQ | UREGEX_CASE_INSENSITIVE | UREGEX_COMMENTS | | |
46f4442e A |
251 | UREGEX_DOTALL | UREGEX_MULTILINE | UREGEX_UWORD | |
252 | UREGEX_ERROR_ON_UNKNOWN_ESCAPES | UREGEX_UNIX_LINES; | |
b75a7d8f A |
253 | |
254 | if ((flags & ~allFlags) != 0) { | |
255 | status = U_REGEX_INVALID_FLAG; | |
256 | return NULL; | |
257 | } | |
258 | ||
259 | if ((flags & UREGEX_CANON_EQ) != 0) { | |
260 | status = U_REGEX_UNIMPLEMENTED; | |
261 | return NULL; | |
262 | } | |
263 | ||
264 | RegexPattern *This = new RegexPattern; | |
265 | if (This == NULL) { | |
266 | status = U_MEMORY_ALLOCATION_ERROR; | |
267 | return NULL; | |
268 | } | |
269 | if (U_FAILURE(This->fDeferredStatus)) { | |
270 | status = This->fDeferredStatus; | |
46f4442e | 271 | delete This; |
b75a7d8f A |
272 | return NULL; |
273 | } | |
274 | This->fFlags = flags; | |
275 | ||
276 | RegexCompile compiler(This, status); | |
277 | compiler.compile(regex, pe, status); | |
46f4442e A |
278 | |
279 | if (U_FAILURE(status)) { | |
280 | delete This; | |
281 | This = NULL; | |
282 | } | |
b75a7d8f A |
283 | |
284 | return This; | |
73c04bcf | 285 | } |
46f4442e | 286 | |
b75a7d8f A |
287 | // |
288 | // compile with default flags. | |
289 | // | |
374ca955 A |
290 | RegexPattern * U_EXPORT2 |
291 | RegexPattern::compile(const UnicodeString ®ex, | |
292 | UParseError &pe, | |
46f4442e | 293 | UErrorCode &err) |
b75a7d8f | 294 | { |
46f4442e | 295 | return compile(regex, 0, pe, err); |
b75a7d8f A |
296 | } |
297 | ||
298 | ||
299 | ||
300 | // | |
301 | // compile with no UParseErr parameter. | |
302 | // | |
374ca955 A |
303 | RegexPattern * U_EXPORT2 |
304 | RegexPattern::compile( const UnicodeString ®ex, | |
b75a7d8f | 305 | uint32_t flags, |
46f4442e | 306 | UErrorCode &err) |
b75a7d8f A |
307 | { |
308 | UParseError pe; | |
46f4442e | 309 | return compile(regex, flags, pe, err); |
b75a7d8f A |
310 | } |
311 | ||
312 | ||
313 | ||
314 | //--------------------------------------------------------------------- | |
315 | // | |
316 | // flags | |
317 | // | |
318 | //--------------------------------------------------------------------- | |
319 | uint32_t RegexPattern::flags() const { | |
320 | return fFlags; | |
321 | } | |
322 | ||
323 | ||
324 | //--------------------------------------------------------------------- | |
325 | // | |
326 | // matcher(UnicodeString, err) | |
327 | // | |
328 | //--------------------------------------------------------------------- | |
329 | RegexMatcher *RegexPattern::matcher(const UnicodeString &input, | |
330 | UErrorCode &status) const { | |
331 | RegexMatcher *retMatcher = matcher(status); | |
46f4442e | 332 | retMatcher->fDeferredStatus = status; |
b75a7d8f A |
333 | if (retMatcher != NULL) { |
334 | retMatcher->reset(input); | |
335 | } | |
336 | return retMatcher; | |
73c04bcf | 337 | } |
b75a7d8f | 338 | |
73c04bcf | 339 | #if 0 |
374ca955 | 340 | RegexMatcher *RegexPattern::matcher(const UChar * /*input*/, |
46f4442e | 341 | UErrorCode &status) const |
374ca955 A |
342 | { |
343 | /* This should never get called. The API with UnicodeString should be called instead. */ | |
344 | if (U_SUCCESS(status)) { | |
345 | status = U_UNSUPPORTED_ERROR; | |
346 | } | |
347 | return NULL; | |
348 | } | |
73c04bcf | 349 | #endif |
b75a7d8f A |
350 | |
351 | //--------------------------------------------------------------------- | |
352 | // | |
353 | // matcher(status) | |
354 | // | |
355 | //--------------------------------------------------------------------- | |
356 | RegexMatcher *RegexPattern::matcher(UErrorCode &status) const { | |
357 | RegexMatcher *retMatcher = NULL; | |
358 | ||
359 | if (U_FAILURE(status)) { | |
360 | return NULL; | |
361 | } | |
362 | if (U_FAILURE(fDeferredStatus)) { | |
363 | status = fDeferredStatus; | |
364 | return NULL; | |
365 | } | |
366 | ||
46f4442e | 367 | retMatcher = new RegexMatcher(this); |
b75a7d8f A |
368 | if (retMatcher == NULL) { |
369 | status = U_MEMORY_ALLOCATION_ERROR; | |
370 | return NULL; | |
371 | } | |
372 | return retMatcher; | |
73c04bcf | 373 | } |
b75a7d8f A |
374 | |
375 | ||
376 | ||
377 | //--------------------------------------------------------------------- | |
378 | // | |
379 | // matches Convenience function to test for a match, starting | |
380 | // with a pattern string and a data string. | |
381 | // | |
382 | //--------------------------------------------------------------------- | |
374ca955 | 383 | UBool U_EXPORT2 RegexPattern::matches(const UnicodeString ®ex, |
b75a7d8f A |
384 | const UnicodeString &input, |
385 | UParseError &pe, | |
386 | UErrorCode &status) { | |
387 | ||
388 | if (U_FAILURE(status)) {return FALSE;} | |
389 | ||
390 | UBool retVal; | |
391 | RegexPattern *pat = NULL; | |
392 | RegexMatcher *matcher = NULL; | |
393 | ||
394 | pat = RegexPattern::compile(regex, 0, pe, status); | |
395 | matcher = pat->matcher(input, status); | |
396 | retVal = matcher->matches(status); | |
397 | ||
398 | delete matcher; | |
399 | delete pat; | |
400 | return retVal; | |
401 | } | |
402 | ||
403 | ||
404 | ||
405 | ||
406 | //--------------------------------------------------------------------- | |
407 | // | |
408 | // pattern | |
409 | // | |
410 | //--------------------------------------------------------------------- | |
411 | UnicodeString RegexPattern::pattern() const { | |
412 | return fPattern; | |
413 | } | |
414 | ||
415 | ||
416 | ||
417 | ||
418 | //--------------------------------------------------------------------- | |
419 | // | |
420 | // split | |
421 | // | |
422 | //--------------------------------------------------------------------- | |
423 | int32_t RegexPattern::split(const UnicodeString &input, | |
424 | UnicodeString dest[], | |
425 | int32_t destCapacity, | |
426 | UErrorCode &status) const | |
427 | { | |
428 | if (U_FAILURE(status)) { | |
429 | return 0; | |
430 | }; | |
431 | ||
432 | RegexMatcher m(this); | |
46f4442e A |
433 | int32_t r = 0; |
434 | // Check m's status to make sure all is ok. | |
435 | if (U_SUCCESS(m.fDeferredStatus)) { | |
436 | r = m.split(input, dest, destCapacity, status); | |
437 | } | |
b75a7d8f A |
438 | return r; |
439 | } | |
440 | ||
441 | ||
442 | ||
443 | //--------------------------------------------------------------------- | |
444 | // | |
445 | // dump Output the compiled form of the pattern. | |
446 | // Debugging function only. | |
447 | // | |
448 | //--------------------------------------------------------------------- | |
b75a7d8f | 449 | #if defined(REGEX_DEBUG) |
374ca955 | 450 | void RegexPattern::dumpOp(int32_t index) const { |
b75a7d8f A |
451 | static const char * const opNames[] = {URX_OPCODE_NAMES}; |
452 | int32_t op = fCompiledPat->elementAti(index); | |
453 | int32_t val = URX_VAL(op); | |
454 | int32_t type = URX_TYPE(op); | |
455 | int32_t pinnedType = type; | |
46f4442e | 456 | if ((uint32_t)pinnedType >= sizeof(opNames)/sizeof(char *)) { |
b75a7d8f A |
457 | pinnedType = 0; |
458 | } | |
46f4442e | 459 | |
374ca955 | 460 | REGEX_DUMP_DEBUG_PRINTF(("%4d %08x %-15s ", index, op, opNames[pinnedType])); |
b75a7d8f A |
461 | switch (type) { |
462 | case URX_NOP: | |
463 | case URX_DOTANY: | |
464 | case URX_DOTANY_ALL: | |
b75a7d8f A |
465 | case URX_FAIL: |
466 | case URX_CARET: | |
467 | case URX_DOLLAR: | |
468 | case URX_BACKSLASH_G: | |
469 | case URX_BACKSLASH_X: | |
470 | case URX_END: | |
471 | case URX_DOLLAR_M: | |
472 | case URX_CARET_M: | |
473 | // Types with no operand field of interest. | |
474 | break; | |
46f4442e | 475 | |
b75a7d8f A |
476 | case URX_RESERVED_OP: |
477 | case URX_START_CAPTURE: | |
478 | case URX_END_CAPTURE: | |
479 | case URX_STATE_SAVE: | |
480 | case URX_JMP: | |
481 | case URX_JMP_SAV: | |
482 | case URX_JMP_SAV_X: | |
483 | case URX_BACKSLASH_B: | |
374ca955 | 484 | case URX_BACKSLASH_BU: |
b75a7d8f A |
485 | case URX_BACKSLASH_D: |
486 | case URX_BACKSLASH_Z: | |
487 | case URX_STRING_LEN: | |
488 | case URX_CTR_INIT: | |
489 | case URX_CTR_INIT_NG: | |
490 | case URX_CTR_LOOP: | |
491 | case URX_CTR_LOOP_NG: | |
492 | case URX_RELOC_OPRND: | |
493 | case URX_STO_SP: | |
494 | case URX_LD_SP: | |
495 | case URX_BACKREF: | |
496 | case URX_STO_INP_LOC: | |
497 | case URX_JMPX: | |
498 | case URX_LA_START: | |
499 | case URX_LA_END: | |
500 | case URX_BACKREF_I: | |
501 | case URX_LB_START: | |
502 | case URX_LB_CONT: | |
503 | case URX_LB_END: | |
504 | case URX_LBN_CONT: | |
505 | case URX_LBN_END: | |
506 | case URX_LOOP_C: | |
507 | case URX_LOOP_DOT_I: | |
508 | // types with an integer operand field. | |
374ca955 | 509 | REGEX_DUMP_DEBUG_PRINTF(("%d", val)); |
b75a7d8f | 510 | break; |
46f4442e | 511 | |
b75a7d8f A |
512 | case URX_ONECHAR: |
513 | case URX_ONECHAR_I: | |
374ca955 | 514 | REGEX_DUMP_DEBUG_PRINTF(("%c", val<256?val:'?')); |
b75a7d8f | 515 | break; |
46f4442e | 516 | |
b75a7d8f A |
517 | case URX_STRING: |
518 | case URX_STRING_I: | |
519 | { | |
520 | int32_t lengthOp = fCompiledPat->elementAti(index+1); | |
521 | U_ASSERT(URX_TYPE(lengthOp) == URX_STRING_LEN); | |
522 | int32_t length = URX_VAL(lengthOp); | |
523 | int32_t i; | |
524 | for (i=val; i<val+length; i++) { | |
525 | UChar c = fLiteralText[i]; | |
526 | if (c < 32 || c >= 256) {c = '.';} | |
374ca955 | 527 | REGEX_DUMP_DEBUG_PRINTF(("%c", c)); |
b75a7d8f A |
528 | } |
529 | } | |
530 | break; | |
531 | ||
532 | case URX_SETREF: | |
533 | case URX_LOOP_SR_I: | |
534 | { | |
535 | UnicodeString s; | |
536 | UnicodeSet *set = (UnicodeSet *)fSets->elementAt(val); | |
537 | set->toPattern(s, TRUE); | |
538 | for (int32_t i=0; i<s.length(); i++) { | |
374ca955 | 539 | REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i))); |
b75a7d8f A |
540 | } |
541 | } | |
542 | break; | |
543 | ||
544 | case URX_STATIC_SETREF: | |
545 | case URX_STAT_SETREF_N: | |
546 | { | |
547 | UnicodeString s; | |
548 | if (val & URX_NEG_SET) { | |
374ca955 | 549 | REGEX_DUMP_DEBUG_PRINTF(("NOT ")); |
b75a7d8f A |
550 | val &= ~URX_NEG_SET; |
551 | } | |
552 | UnicodeSet *set = fStaticSets[val]; | |
553 | set->toPattern(s, TRUE); | |
554 | for (int32_t i=0; i<s.length(); i++) { | |
374ca955 | 555 | REGEX_DUMP_DEBUG_PRINTF(("%c", s.charAt(i))); |
b75a7d8f A |
556 | } |
557 | } | |
558 | break; | |
559 | ||
46f4442e | 560 | |
b75a7d8f | 561 | default: |
374ca955 | 562 | REGEX_DUMP_DEBUG_PRINTF(("??????")); |
b75a7d8f A |
563 | break; |
564 | } | |
374ca955 | 565 | REGEX_DUMP_DEBUG_PRINTF(("\n")); |
b75a7d8f | 566 | } |
374ca955 | 567 | #endif |
b75a7d8f A |
568 | |
569 | ||
b75a7d8f | 570 | #if defined(REGEX_DEBUG) |
46f4442e | 571 | U_CAPI void U_EXPORT2 |
374ca955 | 572 | RegexPatternDump(const RegexPattern *This) { |
b75a7d8f A |
573 | int index; |
574 | int i; | |
575 | ||
374ca955 A |
576 | REGEX_DUMP_DEBUG_PRINTF(("Original Pattern: ")); |
577 | for (i=0; i<This->fPattern.length(); i++) { | |
578 | REGEX_DUMP_DEBUG_PRINTF(("%c", This->fPattern.charAt(i))); | |
b75a7d8f | 579 | } |
374ca955 A |
580 | REGEX_DUMP_DEBUG_PRINTF(("\n")); |
581 | REGEX_DUMP_DEBUG_PRINTF((" Min Match Length: %d\n", This->fMinMatchLen)); | |
46f4442e | 582 | REGEX_DUMP_DEBUG_PRINTF((" Match Start Type: %s\n", START_OF_MATCH_STR(This->fStartType))); |
374ca955 A |
583 | if (This->fStartType == START_STRING) { |
584 | REGEX_DUMP_DEBUG_PRINTF((" Initial match sting: \"")); | |
585 | for (i=This->fInitialStringIdx; i<This->fInitialStringIdx+This->fInitialStringLen; i++) { | |
586 | REGEX_DUMP_DEBUG_PRINTF(("%c", This->fLiteralText[i])); // TODO: non-printables, surrogates. | |
b75a7d8f A |
587 | } |
588 | ||
374ca955 A |
589 | } else if (This->fStartType == START_SET) { |
590 | int32_t numSetChars = This->fInitialChars->size(); | |
b75a7d8f A |
591 | if (numSetChars > 20) { |
592 | numSetChars = 20; | |
593 | } | |
374ca955 | 594 | REGEX_DUMP_DEBUG_PRINTF((" Match First Chars : ")); |
b75a7d8f | 595 | for (i=0; i<numSetChars; i++) { |
374ca955 | 596 | UChar32 c = This->fInitialChars->charAt(i); |
46f4442e | 597 | if (0x20<c && c <0x7e) { |
374ca955 | 598 | REGEX_DUMP_DEBUG_PRINTF(("%c ", c)); |
b75a7d8f | 599 | } else { |
374ca955 | 600 | REGEX_DUMP_DEBUG_PRINTF(("%#x ", c)); |
b75a7d8f A |
601 | } |
602 | } | |
374ca955 A |
603 | if (numSetChars < This->fInitialChars->size()) { |
604 | REGEX_DUMP_DEBUG_PRINTF((" ...")); | |
b75a7d8f | 605 | } |
374ca955 | 606 | REGEX_DUMP_DEBUG_PRINTF(("\n")); |
b75a7d8f | 607 | |
374ca955 A |
608 | } else if (This->fStartType == START_CHAR) { |
609 | REGEX_DUMP_DEBUG_PRINTF((" First char of Match : ")); | |
610 | if (0x20 < This->fInitialChar && This->fInitialChar<0x7e) { | |
611 | REGEX_DUMP_DEBUG_PRINTF(("%c\n", This->fInitialChar)); | |
b75a7d8f | 612 | } else { |
374ca955 | 613 | REGEX_DUMP_DEBUG_PRINTF(("%#x\n", This->fInitialChar)); |
b75a7d8f A |
614 | } |
615 | } | |
616 | ||
374ca955 A |
617 | REGEX_DUMP_DEBUG_PRINTF(("\nIndex Binary Type Operand\n" \ |
618 | "-------------------------------------------\n")); | |
619 | for (index = 0; index<This->fCompiledPat->size(); index++) { | |
620 | This->dumpOp(index); | |
b75a7d8f | 621 | } |
374ca955 | 622 | REGEX_DUMP_DEBUG_PRINTF(("\n\n")); |
46f4442e | 623 | } |
374ca955 | 624 | #endif |
b75a7d8f A |
625 | |
626 | ||
627 | ||
374ca955 | 628 | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RegexPattern) |
b75a7d8f A |
629 | |
630 | U_NAMESPACE_END | |
631 | #endif // !UCONFIG_NO_REGULAR_EXPRESSIONS |