]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/uregex.cpp
ICU-64232.0.1.tar.gz
[apple/icu.git] / icuSources / i18n / uregex.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
374ca955
A
3/*
4*******************************************************************************
b331163b 5* Copyright (C) 2004-2015, International Business Machines
374ca955
A
6* Corporation and others. All Rights Reserved.
7*******************************************************************************
57a6839d 8* file name: uregex.cpp
374ca955
A
9*/
10
11#include "unicode/utypes.h"
12
13#if !UCONFIG_NO_REGULAR_EXPRESSIONS
14
15#include "unicode/regex.h"
16#include "unicode/uregex.h"
17#include "unicode/unistr.h"
18#include "unicode/ustring.h"
19#include "unicode/uchar.h"
20#include "unicode/uobject.h"
4388f060 21#include "unicode/utf16.h"
374ca955 22#include "cmemory.h"
b331163b
A
23#include "uassert.h"
24#include "uhash.h"
25#include "umutex.h"
26#include "uvectr32.h"
374ca955 27
729e4ab9
A
28#include "regextxt.h"
29
729e4ab9 30U_NAMESPACE_BEGIN
46f4442e 31
729e4ab9
A
32#define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0)
33
34struct RegularExpression: public UMemory {
374ca955 35public:
729e4ab9
A
36 RegularExpression();
37 ~RegularExpression();
374ca955
A
38 int32_t fMagic;
39 RegexPattern *fPat;
57a6839d 40 u_atomic_int32_t *fPatRefCount;
374ca955
A
41 UChar *fPatString;
42 int32_t fPatStringLen;
43 RegexMatcher *fMatcher;
44 const UChar *fText; // Text from setText()
45 int32_t fTextLength; // Length provided by user with setText(), which
46 // may be -1.
729e4ab9 47 UBool fOwnsText;
374ca955
A
48};
49
50static const int32_t REXP_MAGIC = 0x72657870; // "rexp" in ASCII
51
729e4ab9 52RegularExpression::RegularExpression() {
374ca955
A
53 fMagic = REXP_MAGIC;
54 fPat = NULL;
55 fPatRefCount = NULL;
56 fPatString = NULL;
57 fPatStringLen = 0;
58 fMatcher = NULL;
59 fText = NULL;
60 fTextLength = 0;
729e4ab9 61 fOwnsText = FALSE;
374ca955
A
62}
63
729e4ab9 64RegularExpression::~RegularExpression() {
374ca955
A
65 delete fMatcher;
66 fMatcher = NULL;
67 if (fPatRefCount!=NULL && umtx_atomic_dec(fPatRefCount)==0) {
68 delete fPat;
69 uprv_free(fPatString);
57a6839d 70 uprv_free((void *)fPatRefCount);
374ca955 71 }
729e4ab9
A
72 if (fOwnsText && fText!=NULL) {
73 uprv_free((void *)fText);
74 }
374ca955
A
75 fMagic = 0;
76}
77
729e4ab9
A
78U_NAMESPACE_END
79
80U_NAMESPACE_USE
81
374ca955
A
82//----------------------------------------------------------------------------------------
83//
84// validateRE Do boilerplate style checks on API function parameters.
85// Return TRUE if they look OK.
86//----------------------------------------------------------------------------------------
4388f060 87static UBool validateRE(const RegularExpression *re, UBool requiresText, UErrorCode *status) {
374ca955
A
88 if (U_FAILURE(*status)) {
89 return FALSE;
90 }
91 if (re == NULL || re->fMagic != REXP_MAGIC) {
374ca955
A
92 *status = U_ILLEGAL_ARGUMENT_ERROR;
93 return FALSE;
94 }
729e4ab9
A
95 // !!! Not sure how to update this with the new UText backing, which is stored in re->fMatcher anyway
96 if (requiresText && re->fText == NULL && !re->fOwnsText) {
374ca955
A
97 *status = U_REGEX_INVALID_STATE;
98 return FALSE;
99 }
100 return TRUE;
101}
102
103//----------------------------------------------------------------------------------------
104//
105// uregex_open
106//
107//----------------------------------------------------------------------------------------
108U_CAPI URegularExpression * U_EXPORT2
109uregex_open( const UChar *pattern,
110 int32_t patternLength,
111 uint32_t flags,
112 UParseError *pe,
113 UErrorCode *status) {
114
115 if (U_FAILURE(*status)) {
116 return NULL;
117 }
118 if (pattern == NULL || patternLength < -1 || patternLength == 0) {
119 *status = U_ILLEGAL_ARGUMENT_ERROR;
120 return NULL;
121 }
122 int32_t actualPatLen = patternLength;
123 if (actualPatLen == -1) {
124 actualPatLen = u_strlen(pattern);
125 }
126
57a6839d
A
127 RegularExpression *re = new RegularExpression;
128 u_atomic_int32_t *refC = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
374ca955
A
129 UChar *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(actualPatLen+1));
130 if (re == NULL || refC == NULL || patBuf == NULL) {
131 *status = U_MEMORY_ALLOCATION_ERROR;
132 delete re;
57a6839d 133 uprv_free((void *)refC);
374ca955
A
134 uprv_free(patBuf);
135 return NULL;
136 }
137 re->fPatRefCount = refC;
138 *re->fPatRefCount = 1;
139
140 //
141 // Make a copy of the pattern string, so we can return it later if asked.
729e4ab9
A
142 // For compiling the pattern, we will use a UText wrapper around
143 // this local copy, to avoid making even more copies.
374ca955
A
144 //
145 re->fPatString = patBuf;
146 re->fPatStringLen = patternLength;
147 u_memcpy(patBuf, pattern, actualPatLen);
148 patBuf[actualPatLen] = 0;
b331163b 149
729e4ab9
A
150 UText patText = UTEXT_INITIALIZER;
151 utext_openUChars(&patText, patBuf, patternLength, status);
374ca955
A
152
153 //
154 // Compile the pattern
155 //
156 if (pe != NULL) {
729e4ab9 157 re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
374ca955 158 } else {
729e4ab9 159 re->fPat = RegexPattern::compile(&patText, flags, *status);
374ca955 160 }
729e4ab9 161 utext_close(&patText);
b331163b 162
374ca955
A
163 if (U_FAILURE(*status)) {
164 goto ErrorExit;
165 }
166
167 //
168 // Create the matcher object
169 //
170 re->fMatcher = re->fPat->matcher(*status);
171 if (U_SUCCESS(*status)) {
729e4ab9 172 return (URegularExpression*)re;
374ca955
A
173 }
174
175ErrorExit:
176 delete re;
177 return NULL;
178
179}
180
729e4ab9
A
181//----------------------------------------------------------------------------------------
182//
183// uregex_openUText
184//
185//----------------------------------------------------------------------------------------
186U_CAPI URegularExpression * U_EXPORT2
187uregex_openUText(UText *pattern,
188 uint32_t flags,
189 UParseError *pe,
190 UErrorCode *status) {
b331163b 191
729e4ab9
A
192 if (U_FAILURE(*status)) {
193 return NULL;
194 }
195 if (pattern == NULL) {
196 *status = U_ILLEGAL_ARGUMENT_ERROR;
197 return NULL;
198 }
b331163b 199
729e4ab9 200 int64_t patternNativeLength = utext_nativeLength(pattern);
b331163b 201
729e4ab9
A
202 if (patternNativeLength == 0) {
203 *status = U_ILLEGAL_ARGUMENT_ERROR;
204 return NULL;
205 }
b331163b 206
729e4ab9 207 RegularExpression *re = new RegularExpression;
b331163b 208
729e4ab9
A
209 UErrorCode lengthStatus = U_ZERO_ERROR;
210 int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, NULL, 0, &lengthStatus);
b331163b 211
57a6839d 212 u_atomic_int32_t *refC = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
729e4ab9
A
213 UChar *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(pattern16Length+1));
214 if (re == NULL || refC == NULL || patBuf == NULL) {
215 *status = U_MEMORY_ALLOCATION_ERROR;
216 delete re;
57a6839d 217 uprv_free((void *)refC);
729e4ab9
A
218 uprv_free(patBuf);
219 return NULL;
220 }
221 re->fPatRefCount = refC;
222 *re->fPatRefCount = 1;
b331163b 223
729e4ab9
A
224 //
225 // Make a copy of the pattern string, so we can return it later if asked.
226 // For compiling the pattern, we will use a read-only UText wrapper
227 // around this local copy, to avoid making even more copies.
228 //
229 re->fPatString = patBuf;
230 re->fPatStringLen = pattern16Length;
231 utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status);
b331163b 232
729e4ab9
A
233 UText patText = UTEXT_INITIALIZER;
234 utext_openUChars(&patText, patBuf, pattern16Length, status);
b331163b 235
729e4ab9
A
236 //
237 // Compile the pattern
238 //
239 if (pe != NULL) {
240 re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
241 } else {
242 re->fPat = RegexPattern::compile(&patText, flags, *status);
243 }
244 utext_close(&patText);
b331163b 245
729e4ab9
A
246 if (U_FAILURE(*status)) {
247 goto ErrorExit;
248 }
b331163b 249
729e4ab9
A
250 //
251 // Create the matcher object
252 //
253 re->fMatcher = re->fPat->matcher(*status);
254 if (U_SUCCESS(*status)) {
255 return (URegularExpression*)re;
256 }
b331163b 257
729e4ab9
A
258ErrorExit:
259 delete re;
260 return NULL;
b331163b 261
729e4ab9
A
262}
263
374ca955
A
264//----------------------------------------------------------------------------------------
265//
266// uregex_close
267//
268//----------------------------------------------------------------------------------------
269U_CAPI void U_EXPORT2
729e4ab9
A
270uregex_close(URegularExpression *re2) {
271 RegularExpression *re = (RegularExpression*)re2;
374ca955 272 UErrorCode status = U_ZERO_ERROR;
4388f060 273 if (validateRE(re, FALSE, &status) == FALSE) {
374ca955
A
274 return;
275 }
276 delete re;
277}
278
279
280//----------------------------------------------------------------------------------------
281//
282// uregex_clone
283//
284//----------------------------------------------------------------------------------------
b331163b 285U_CAPI URegularExpression * U_EXPORT2
729e4ab9
A
286uregex_clone(const URegularExpression *source2, UErrorCode *status) {
287 RegularExpression *source = (RegularExpression*)source2;
4388f060 288 if (validateRE(source, FALSE, status) == FALSE) {
374ca955
A
289 return NULL;
290 }
291
729e4ab9 292 RegularExpression *clone = new RegularExpression;
374ca955
A
293 if (clone == NULL) {
294 *status = U_MEMORY_ALLOCATION_ERROR;
295 return NULL;
296 }
297
298 clone->fMatcher = source->fPat->matcher(*status);
299 if (U_FAILURE(*status)) {
300 delete clone;
301 return NULL;
302 }
374ca955
A
303
304 clone->fPat = source->fPat;
b331163b 305 clone->fPatRefCount = source->fPatRefCount;
374ca955
A
306 clone->fPatString = source->fPatString;
307 clone->fPatStringLen = source->fPatStringLen;
308 umtx_atomic_inc(source->fPatRefCount);
309 // Note: fText is not cloned.
310
729e4ab9 311 return (URegularExpression*)clone;
73c04bcf 312}
374ca955
A
313
314
315
316
73c04bcf 317//------------------------------------------------------------------------------
374ca955
A
318//
319// uregex_pattern
320//
73c04bcf 321//------------------------------------------------------------------------------
b331163b 322U_CAPI const UChar * U_EXPORT2
729e4ab9
A
323uregex_pattern(const URegularExpression *regexp2,
324 int32_t *patLength,
325 UErrorCode *status) {
326 RegularExpression *regexp = (RegularExpression*)regexp2;
b331163b 327
4388f060 328 if (validateRE(regexp, FALSE, status) == FALSE) {
374ca955
A
329 return NULL;
330 }
331 if (patLength != NULL) {
332 *patLength = regexp->fPatStringLen;
333 }
334 return regexp->fPatString;
73c04bcf 335}
374ca955
A
336
337
729e4ab9
A
338//------------------------------------------------------------------------------
339//
340// uregex_patternUText
341//
342//------------------------------------------------------------------------------
343U_CAPI UText * U_EXPORT2
344uregex_patternUText(const URegularExpression *regexp2,
345 UErrorCode *status) {
346 RegularExpression *regexp = (RegularExpression*)regexp2;
347 return regexp->fPat->patternText(*status);
348}
349
350
73c04bcf 351//------------------------------------------------------------------------------
374ca955
A
352//
353// uregex_flags
354//
73c04bcf 355//------------------------------------------------------------------------------
b331163b 356U_CAPI int32_t U_EXPORT2
729e4ab9
A
357uregex_flags(const URegularExpression *regexp2, UErrorCode *status) {
358 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 359 if (validateRE(regexp, FALSE, status) == FALSE) {
374ca955
A
360 return 0;
361 }
362 int32_t flags = regexp->fPat->flags();
363 return flags;
73c04bcf 364}
374ca955
A
365
366
73c04bcf 367//------------------------------------------------------------------------------
374ca955
A
368//
369// uregex_setText
370//
73c04bcf 371//------------------------------------------------------------------------------
b331163b 372U_CAPI void U_EXPORT2
729e4ab9 373uregex_setText(URegularExpression *regexp2,
374ca955
A
374 const UChar *text,
375 int32_t textLength,
376 UErrorCode *status) {
729e4ab9 377 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 378 if (validateRE(regexp, FALSE, status) == FALSE) {
374ca955
A
379 return;
380 }
381 if (text == NULL || textLength < -1) {
382 *status = U_ILLEGAL_ARGUMENT_ERROR;
383 return;
384 }
b331163b 385
729e4ab9
A
386 if (regexp->fOwnsText && regexp->fText != NULL) {
387 uprv_free((void *)regexp->fText);
388 }
b331163b 389
374ca955
A
390 regexp->fText = text;
391 regexp->fTextLength = textLength;
729e4ab9 392 regexp->fOwnsText = FALSE;
b331163b 393
729e4ab9
A
394 UText input = UTEXT_INITIALIZER;
395 utext_openUChars(&input, text, textLength, status);
396 regexp->fMatcher->reset(&input);
397 utext_close(&input); // reset() made a shallow clone, so we don't need this copy
398}
399
374ca955 400
729e4ab9
A
401//------------------------------------------------------------------------------
402//
403// uregex_setUText
404//
405//------------------------------------------------------------------------------
b331163b 406U_CAPI void U_EXPORT2
729e4ab9
A
407uregex_setUText(URegularExpression *regexp2,
408 UText *text,
409 UErrorCode *status) {
410 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 411 if (validateRE(regexp, FALSE, status) == FALSE) {
729e4ab9
A
412 return;
413 }
414 if (text == NULL) {
415 *status = U_ILLEGAL_ARGUMENT_ERROR;
416 return;
417 }
b331163b 418
729e4ab9
A
419 if (regexp->fOwnsText && regexp->fText != NULL) {
420 uprv_free((void *)regexp->fText);
421 }
b331163b 422
729e4ab9
A
423 regexp->fText = NULL; // only fill it in on request
424 regexp->fTextLength = -1;
425 regexp->fOwnsText = TRUE;
426 regexp->fMatcher->reset(text);
73c04bcf 427}
374ca955
A
428
429
430
73c04bcf 431//------------------------------------------------------------------------------
374ca955
A
432//
433// uregex_getText
434//
73c04bcf 435//------------------------------------------------------------------------------
b331163b 436U_CAPI const UChar * U_EXPORT2
729e4ab9 437uregex_getText(URegularExpression *regexp2,
374ca955
A
438 int32_t *textLength,
439 UErrorCode *status) {
729e4ab9 440 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 441 if (validateRE(regexp, FALSE, status) == FALSE) {
374ca955
A
442 return NULL;
443 }
b331163b 444
729e4ab9
A
445 if (regexp->fText == NULL) {
446 // need to fill in the text
447 UText *inputText = regexp->fMatcher->inputText();
448 int64_t inputNativeLength = utext_nativeLength(inputText);
449 if (UTEXT_FULL_TEXT_IN_CHUNK(inputText, inputNativeLength)) {
450 regexp->fText = inputText->chunkContents;
451 regexp->fTextLength = (int32_t)inputNativeLength;
452 regexp->fOwnsText = FALSE; // because the UText owns it
453 } else {
454 UErrorCode lengthStatus = U_ZERO_ERROR;
455 regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, NULL, 0, &lengthStatus); // buffer overflow error
456 UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(regexp->fTextLength+1));
b331163b 457
729e4ab9
A
458 utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status);
459 regexp->fText = inputChars;
460 regexp->fOwnsText = TRUE; // should already be set but just in case
461 }
462 }
b331163b 463
374ca955
A
464 if (textLength != NULL) {
465 *textLength = regexp->fTextLength;
466 }
467 return regexp->fText;
73c04bcf 468}
374ca955
A
469
470
729e4ab9
A
471//------------------------------------------------------------------------------
472//
473// uregex_getUText
474//
475//------------------------------------------------------------------------------
b331163b 476U_CAPI UText * U_EXPORT2
729e4ab9
A
477uregex_getUText(URegularExpression *regexp2,
478 UText *dest,
479 UErrorCode *status) {
480 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 481 if (validateRE(regexp, FALSE, status) == FALSE) {
729e4ab9
A
482 return dest;
483 }
484 return regexp->fMatcher->getInput(dest, *status);
485}
486
487
4388f060
A
488//------------------------------------------------------------------------------
489//
490// uregex_refreshUText
491//
492//------------------------------------------------------------------------------
b331163b 493U_CAPI void U_EXPORT2
4388f060
A
494uregex_refreshUText(URegularExpression *regexp2,
495 UText *text,
496 UErrorCode *status) {
497 RegularExpression *regexp = (RegularExpression*)regexp2;
498 if (validateRE(regexp, FALSE, status) == FALSE) {
499 return;
500 }
501 regexp->fMatcher->refreshInputText(text, *status);
502}
503
504
73c04bcf 505//------------------------------------------------------------------------------
374ca955
A
506//
507// uregex_matches
508//
73c04bcf 509//------------------------------------------------------------------------------
b331163b 510U_CAPI UBool U_EXPORT2
729e4ab9
A
511uregex_matches(URegularExpression *regexp2,
512 int32_t startIndex,
513 UErrorCode *status) {
514 return uregex_matches64( regexp2, (int64_t)startIndex, status);
515}
516
b331163b 517U_CAPI UBool U_EXPORT2
729e4ab9
A
518uregex_matches64(URegularExpression *regexp2,
519 int64_t startIndex,
520 UErrorCode *status) {
521 RegularExpression *regexp = (RegularExpression*)regexp2;
46f4442e 522 UBool result = FALSE;
4388f060 523 if (validateRE(regexp, TRUE, status) == FALSE) {
46f4442e
A
524 return result;
525 }
526 if (startIndex == -1) {
527 result = regexp->fMatcher->matches(*status);
528 } else {
529 result = regexp->fMatcher->matches(startIndex, *status);
374ca955 530 }
374ca955 531 return result;
73c04bcf 532}
374ca955
A
533
534
73c04bcf 535//------------------------------------------------------------------------------
374ca955
A
536//
537// uregex_lookingAt
538//
73c04bcf 539//------------------------------------------------------------------------------
b331163b 540U_CAPI UBool U_EXPORT2
729e4ab9 541uregex_lookingAt(URegularExpression *regexp2,
374ca955
A
542 int32_t startIndex,
543 UErrorCode *status) {
729e4ab9
A
544 return uregex_lookingAt64( regexp2, (int64_t)startIndex, status);
545}
546
b331163b 547U_CAPI UBool U_EXPORT2
729e4ab9
A
548uregex_lookingAt64(URegularExpression *regexp2,
549 int64_t startIndex,
550 UErrorCode *status) {
551 RegularExpression *regexp = (RegularExpression*)regexp2;
46f4442e 552 UBool result = FALSE;
4388f060 553 if (validateRE(regexp, TRUE, status) == FALSE) {
46f4442e
A
554 return result;
555 }
556 if (startIndex == -1) {
557 result = regexp->fMatcher->lookingAt(*status);
558 } else {
559 result = regexp->fMatcher->lookingAt(startIndex, *status);
374ca955 560 }
374ca955 561 return result;
73c04bcf 562}
374ca955
A
563
564
565
73c04bcf 566//------------------------------------------------------------------------------
374ca955
A
567//
568// uregex_find
569//
73c04bcf 570//------------------------------------------------------------------------------
b331163b 571U_CAPI UBool U_EXPORT2
729e4ab9 572uregex_find(URegularExpression *regexp2,
b331163b 573 int32_t startIndex,
374ca955 574 UErrorCode *status) {
729e4ab9
A
575 return uregex_find64( regexp2, (int64_t)startIndex, status);
576}
577
b331163b 578U_CAPI UBool U_EXPORT2
729e4ab9 579uregex_find64(URegularExpression *regexp2,
b331163b 580 int64_t startIndex,
729e4ab9
A
581 UErrorCode *status) {
582 RegularExpression *regexp = (RegularExpression*)regexp2;
46f4442e 583 UBool result = FALSE;
4388f060 584 if (validateRE(regexp, TRUE, status) == FALSE) {
46f4442e
A
585 return result;
586 }
587 if (startIndex == -1) {
588 regexp->fMatcher->resetPreserveRegion();
b331163b 589 result = regexp->fMatcher->find(*status);
46f4442e
A
590 } else {
591 result = regexp->fMatcher->find(startIndex, *status);
374ca955 592 }
374ca955 593 return result;
73c04bcf 594}
374ca955 595
729e4ab9 596
73c04bcf 597//------------------------------------------------------------------------------
374ca955
A
598//
599// uregex_findNext
600//
73c04bcf 601//------------------------------------------------------------------------------
b331163b 602U_CAPI UBool U_EXPORT2
729e4ab9 603uregex_findNext(URegularExpression *regexp2,
374ca955 604 UErrorCode *status) {
729e4ab9 605 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 606 if (validateRE(regexp, TRUE, status) == FALSE) {
374ca955
A
607 return FALSE;
608 }
b331163b 609 UBool result = regexp->fMatcher->find(*status);
374ca955 610 return result;
73c04bcf 611}
374ca955 612
73c04bcf 613//------------------------------------------------------------------------------
374ca955
A
614//
615// uregex_groupCount
616//
73c04bcf 617//------------------------------------------------------------------------------
b331163b 618U_CAPI int32_t U_EXPORT2
729e4ab9 619uregex_groupCount(URegularExpression *regexp2,
374ca955 620 UErrorCode *status) {
729e4ab9 621 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 622 if (validateRE(regexp, FALSE, status) == FALSE) {
374ca955
A
623 return 0;
624 }
625 int32_t result = regexp->fMatcher->groupCount();
626 return result;
73c04bcf 627}
374ca955
A
628
629
b331163b
A
630//------------------------------------------------------------------------------
631//
632// uregex_groupNumberFromName
633//
634//------------------------------------------------------------------------------
635int32_t
636uregex_groupNumberFromName(URegularExpression *regexp2,
637 const UChar *groupName,
638 int32_t nameLength,
639 UErrorCode *status) {
640 RegularExpression *regexp = (RegularExpression*)regexp2;
641 if (validateRE(regexp, FALSE, status) == FALSE) {
642 return 0;
643 }
644 int32_t result = regexp->fPat->groupNumberFromName(UnicodeString(groupName, nameLength), *status);
645 return result;
646}
647
648int32_t
649uregex_groupNumberFromCName(URegularExpression *regexp2,
650 const char *groupName,
651 int32_t nameLength,
652 UErrorCode *status) {
653 RegularExpression *regexp = (RegularExpression*)regexp2;
654 if (validateRE(regexp, FALSE, status) == FALSE) {
655 return 0;
656 }
657 return regexp->fPat->groupNumberFromName(groupName, nameLength, *status);
658}
659
73c04bcf 660//------------------------------------------------------------------------------
374ca955
A
661//
662// uregex_group
663//
73c04bcf 664//------------------------------------------------------------------------------
b331163b 665U_CAPI int32_t U_EXPORT2
729e4ab9 666uregex_group(URegularExpression *regexp2,
374ca955
A
667 int32_t groupNum,
668 UChar *dest,
669 int32_t destCapacity,
670 UErrorCode *status) {
729e4ab9 671 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 672 if (validateRE(regexp, TRUE, status) == FALSE) {
374ca955
A
673 return 0;
674 }
675 if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) {
676 *status = U_ILLEGAL_ARGUMENT_ERROR;
677 return 0;
678 }
b331163b 679
729e4ab9
A
680 if (destCapacity == 0 || regexp->fText != NULL) {
681 // If preflighting or if we already have the text as UChars,
b331163b
A
682 // this is a little cheaper than extracting from the UText
683
729e4ab9
A
684 //
685 // Pick up the range of characters from the matcher
686 //
687 int32_t startIx = regexp->fMatcher->start(groupNum, *status);
688 int32_t endIx = regexp->fMatcher->end (groupNum, *status);
689 if (U_FAILURE(*status)) {
690 return 0;
691 }
374ca955 692
729e4ab9
A
693 //
694 // Trim length based on buffer capacity
b331163b 695 //
729e4ab9
A
696 int32_t fullLength = endIx - startIx;
697 int32_t copyLength = fullLength;
698 if (copyLength < destCapacity) {
699 dest[copyLength] = 0;
700 } else if (copyLength == destCapacity) {
701 *status = U_STRING_NOT_TERMINATED_WARNING;
702 } else {
703 copyLength = destCapacity;
704 *status = U_BUFFER_OVERFLOW_ERROR;
705 }
b331163b 706
729e4ab9
A
707 //
708 // Copy capture group to user's buffer
709 //
710 if (copyLength > 0) {
711 u_memcpy(dest, &regexp->fText[startIx], copyLength);
712 }
713 return fullLength;
374ca955 714 } else {
b331163b
A
715 int64_t start = regexp->fMatcher->start64(groupNum, *status);
716 int64_t limit = regexp->fMatcher->end64(groupNum, *status);
717 if (U_FAILURE(*status)) {
718 return 0;
57a6839d 719 }
b331163b
A
720 // Note edge cases:
721 // Group didn't match: start == end == -1. UText trims to 0, UText gives zero length result.
722 // Zero Length Match: start == end.
723 int32_t length = utext_extract(regexp->fMatcher->inputText(), start, limit, dest, destCapacity, status);
724 return length;
374ca955 725 }
b331163b 726
729e4ab9 727}
374ca955 728
729e4ab9
A
729
730//------------------------------------------------------------------------------
731//
732// uregex_groupUText
733//
734//------------------------------------------------------------------------------
b331163b 735U_CAPI UText * U_EXPORT2
729e4ab9
A
736uregex_groupUText(URegularExpression *regexp2,
737 int32_t groupNum,
738 UText *dest,
739 int64_t *groupLength,
740 UErrorCode *status) {
741 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 742 if (validateRE(regexp, TRUE, status) == FALSE) {
729e4ab9
A
743 UErrorCode emptyTextStatus = U_ZERO_ERROR;
744 return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
374ca955 745 }
729e4ab9
A
746
747 return regexp->fMatcher->group(groupNum, dest, *groupLength, *status);
73c04bcf 748}
374ca955 749
73c04bcf 750//------------------------------------------------------------------------------
374ca955
A
751//
752// uregex_start
753//
73c04bcf 754//------------------------------------------------------------------------------
b331163b 755U_CAPI int32_t U_EXPORT2
729e4ab9 756uregex_start(URegularExpression *regexp2,
374ca955
A
757 int32_t groupNum,
758 UErrorCode *status) {
729e4ab9
A
759 return (int32_t)uregex_start64( regexp2, groupNum, status);
760}
761
b331163b 762U_CAPI int64_t U_EXPORT2
729e4ab9
A
763uregex_start64(URegularExpression *regexp2,
764 int32_t groupNum,
765 UErrorCode *status) {
766 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 767 if (validateRE(regexp, TRUE, status) == FALSE) {
374ca955
A
768 return 0;
769 }
3d1f044b 770 int64_t result = regexp->fMatcher->start64(groupNum, *status);
374ca955 771 return result;
73c04bcf 772}
374ca955 773
73c04bcf 774//------------------------------------------------------------------------------
374ca955
A
775//
776// uregex_end
777//
73c04bcf 778//------------------------------------------------------------------------------
b331163b 779U_CAPI int32_t U_EXPORT2
729e4ab9 780uregex_end(URegularExpression *regexp2,
374ca955
A
781 int32_t groupNum,
782 UErrorCode *status) {
729e4ab9
A
783 return (int32_t)uregex_end64( regexp2, groupNum, status);
784}
785
b331163b 786U_CAPI int64_t U_EXPORT2
729e4ab9
A
787uregex_end64(URegularExpression *regexp2,
788 int32_t groupNum,
789 UErrorCode *status) {
790 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 791 if (validateRE(regexp, TRUE, status) == FALSE) {
374ca955
A
792 return 0;
793 }
3d1f044b 794 int64_t result = regexp->fMatcher->end64(groupNum, *status);
374ca955 795 return result;
73c04bcf 796}
374ca955 797
73c04bcf 798//------------------------------------------------------------------------------
374ca955
A
799//
800// uregex_reset
801//
73c04bcf 802//------------------------------------------------------------------------------
b331163b 803U_CAPI void U_EXPORT2
729e4ab9 804uregex_reset(URegularExpression *regexp2,
374ca955
A
805 int32_t index,
806 UErrorCode *status) {
729e4ab9
A
807 uregex_reset64( regexp2, (int64_t)index, status);
808}
809
b331163b 810U_CAPI void U_EXPORT2
729e4ab9
A
811uregex_reset64(URegularExpression *regexp2,
812 int64_t index,
813 UErrorCode *status) {
814 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 815 if (validateRE(regexp, TRUE, status) == FALSE) {
374ca955
A
816 return;
817 }
818 regexp->fMatcher->reset(index, *status);
73c04bcf 819}
374ca955
A
820
821
46f4442e
A
822//------------------------------------------------------------------------------
823//
824// uregex_setRegion
825//
826//------------------------------------------------------------------------------
b331163b 827U_CAPI void U_EXPORT2
729e4ab9 828uregex_setRegion(URegularExpression *regexp2,
46f4442e
A
829 int32_t regionStart,
830 int32_t regionLimit,
831 UErrorCode *status) {
729e4ab9
A
832 uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status);
833}
834
b331163b 835U_CAPI void U_EXPORT2
729e4ab9
A
836uregex_setRegion64(URegularExpression *regexp2,
837 int64_t regionStart,
838 int64_t regionLimit,
839 UErrorCode *status) {
840 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 841 if (validateRE(regexp, TRUE, status) == FALSE) {
46f4442e
A
842 return;
843 }
844 regexp->fMatcher->region(regionStart, regionLimit, *status);
845}
846
847
729e4ab9
A
848//------------------------------------------------------------------------------
849//
850// uregex_setRegionAndStart
851//
852//------------------------------------------------------------------------------
b331163b 853U_CAPI void U_EXPORT2
729e4ab9
A
854uregex_setRegionAndStart(URegularExpression *regexp2,
855 int64_t regionStart,
856 int64_t regionLimit,
857 int64_t startIndex,
858 UErrorCode *status) {
859 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 860 if (validateRE(regexp, TRUE, status) == FALSE) {
729e4ab9
A
861 return;
862 }
863 regexp->fMatcher->region(regionStart, regionLimit, startIndex, *status);
864}
865
46f4442e
A
866//------------------------------------------------------------------------------
867//
868// uregex_regionStart
869//
870//------------------------------------------------------------------------------
b331163b 871U_CAPI int32_t U_EXPORT2
729e4ab9 872uregex_regionStart(const URegularExpression *regexp2,
46f4442e 873 UErrorCode *status) {
729e4ab9
A
874 return (int32_t)uregex_regionStart64(regexp2, status);
875}
876
b331163b 877U_CAPI int64_t U_EXPORT2
729e4ab9
A
878uregex_regionStart64(const URegularExpression *regexp2,
879 UErrorCode *status) {
880 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 881 if (validateRE(regexp, TRUE, status) == FALSE) {
46f4442e
A
882 return 0;
883 }
884 return regexp->fMatcher->regionStart();
885}
886
887
888//------------------------------------------------------------------------------
889//
890// uregex_regionEnd
891//
892//------------------------------------------------------------------------------
b331163b 893U_CAPI int32_t U_EXPORT2
729e4ab9 894uregex_regionEnd(const URegularExpression *regexp2,
46f4442e 895 UErrorCode *status) {
729e4ab9
A
896 return (int32_t)uregex_regionEnd64(regexp2, status);
897}
898
b331163b 899U_CAPI int64_t U_EXPORT2
729e4ab9
A
900uregex_regionEnd64(const URegularExpression *regexp2,
901 UErrorCode *status) {
902 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 903 if (validateRE(regexp, TRUE, status) == FALSE) {
46f4442e
A
904 return 0;
905 }
906 return regexp->fMatcher->regionEnd();
907}
908
909
910//------------------------------------------------------------------------------
911//
912// uregex_hasTransparentBounds
913//
914//------------------------------------------------------------------------------
b331163b 915U_CAPI UBool U_EXPORT2
729e4ab9 916uregex_hasTransparentBounds(const URegularExpression *regexp2,
46f4442e 917 UErrorCode *status) {
729e4ab9 918 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 919 if (validateRE(regexp, FALSE, status) == FALSE) {
46f4442e
A
920 return FALSE;
921 }
922 return regexp->fMatcher->hasTransparentBounds();
923}
924
925
926//------------------------------------------------------------------------------
927//
928// uregex_useTransparentBounds
929//
930//------------------------------------------------------------------------------
b331163b 931U_CAPI void U_EXPORT2
729e4ab9
A
932uregex_useTransparentBounds(URegularExpression *regexp2,
933 UBool b,
934 UErrorCode *status) {
935 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 936 if (validateRE(regexp, FALSE, status) == FALSE) {
46f4442e
A
937 return;
938 }
939 regexp->fMatcher->useTransparentBounds(b);
940}
941
942
943//------------------------------------------------------------------------------
944//
945// uregex_hasAnchoringBounds
946//
947//------------------------------------------------------------------------------
b331163b 948U_CAPI UBool U_EXPORT2
729e4ab9
A
949uregex_hasAnchoringBounds(const URegularExpression *regexp2,
950 UErrorCode *status) {
951 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 952 if (validateRE(regexp, FALSE, status) == FALSE) {
46f4442e
A
953 return FALSE;
954 }
955 return regexp->fMatcher->hasAnchoringBounds();
956}
957
958
959//------------------------------------------------------------------------------
960//
961// uregex_useAnchoringBounds
962//
963//------------------------------------------------------------------------------
b331163b 964U_CAPI void U_EXPORT2
729e4ab9
A
965uregex_useAnchoringBounds(URegularExpression *regexp2,
966 UBool b,
967 UErrorCode *status) {
968 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 969 if (validateRE(regexp, FALSE, status) == FALSE) {
46f4442e
A
970 return;
971 }
972 regexp->fMatcher->useAnchoringBounds(b);
973}
974
975
976//------------------------------------------------------------------------------
977//
978// uregex_hitEnd
979//
980//------------------------------------------------------------------------------
b331163b 981U_CAPI UBool U_EXPORT2
729e4ab9 982uregex_hitEnd(const URegularExpression *regexp2,
46f4442e 983 UErrorCode *status) {
729e4ab9 984 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 985 if (validateRE(regexp, TRUE, status) == FALSE) {
46f4442e
A
986 return FALSE;
987 }
988 return regexp->fMatcher->hitEnd();
989}
990
991
992//------------------------------------------------------------------------------
993//
994// uregex_requireEnd
995//
996//------------------------------------------------------------------------------
b331163b 997U_CAPI UBool U_EXPORT2
729e4ab9 998uregex_requireEnd(const URegularExpression *regexp2,
46f4442e 999 UErrorCode *status) {
729e4ab9 1000 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1001 if (validateRE(regexp, TRUE, status) == FALSE) {
46f4442e
A
1002 return FALSE;
1003 }
1004 return regexp->fMatcher->requireEnd();
1005}
1006
1007
1008//------------------------------------------------------------------------------
1009//
1010// uregex_setTimeLimit
1011//
1012//------------------------------------------------------------------------------
b331163b 1013U_CAPI void U_EXPORT2
729e4ab9 1014uregex_setTimeLimit(URegularExpression *regexp2,
46f4442e
A
1015 int32_t limit,
1016 UErrorCode *status) {
729e4ab9 1017 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1018 if (validateRE(regexp, FALSE, status)) {
46f4442e
A
1019 regexp->fMatcher->setTimeLimit(limit, *status);
1020 }
1021}
1022
1023
1024
1025//------------------------------------------------------------------------------
1026//
1027// uregex_getTimeLimit
1028//
1029//------------------------------------------------------------------------------
b331163b 1030U_CAPI int32_t U_EXPORT2
729e4ab9 1031uregex_getTimeLimit(const URegularExpression *regexp2,
46f4442e
A
1032 UErrorCode *status) {
1033 int32_t retVal = 0;
729e4ab9 1034 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1035 if (validateRE(regexp, FALSE, status)) {
46f4442e
A
1036 retVal = regexp->fMatcher->getTimeLimit();
1037 }
1038 return retVal;
1039}
1040
1041
1042
1043//------------------------------------------------------------------------------
1044//
1045// uregex_setStackLimit
1046//
1047//------------------------------------------------------------------------------
b331163b 1048U_CAPI void U_EXPORT2
729e4ab9
A
1049uregex_setStackLimit(URegularExpression *regexp2,
1050 int32_t limit,
1051 UErrorCode *status) {
1052 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1053 if (validateRE(regexp, FALSE, status)) {
46f4442e
A
1054 regexp->fMatcher->setStackLimit(limit, *status);
1055 }
1056}
1057
1058
1059
1060//------------------------------------------------------------------------------
1061//
1062// uregex_getStackLimit
1063//
1064//------------------------------------------------------------------------------
b331163b 1065U_CAPI int32_t U_EXPORT2
729e4ab9
A
1066uregex_getStackLimit(const URegularExpression *regexp2,
1067 UErrorCode *status) {
46f4442e 1068 int32_t retVal = 0;
729e4ab9 1069 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1070 if (validateRE(regexp, FALSE, status)) {
46f4442e
A
1071 retVal = regexp->fMatcher->getStackLimit();
1072 }
1073 return retVal;
1074}
1075
1076
1077//------------------------------------------------------------------------------
1078//
1079// uregex_setMatchCallback
1080//
1081//------------------------------------------------------------------------------
1082U_CAPI void U_EXPORT2
729e4ab9 1083uregex_setMatchCallback(URegularExpression *regexp2,
46f4442e
A
1084 URegexMatchCallback *callback,
1085 const void *context,
1086 UErrorCode *status) {
729e4ab9 1087 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1088 if (validateRE(regexp, FALSE, status)) {
729e4ab9 1089 regexp->fMatcher->setMatchCallback(callback, context, *status);
46f4442e
A
1090 }
1091}
1092
1093
1094//------------------------------------------------------------------------------
1095//
1096// uregex_getMatchCallback
1097//
1098//------------------------------------------------------------------------------
b331163b 1099U_CAPI void U_EXPORT2
729e4ab9 1100uregex_getMatchCallback(const URegularExpression *regexp2,
46f4442e
A
1101 URegexMatchCallback **callback,
1102 const void **context,
1103 UErrorCode *status) {
729e4ab9 1104 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1105 if (validateRE(regexp, FALSE, status)) {
46f4442e
A
1106 regexp->fMatcher->getMatchCallback(*callback, *context, *status);
1107 }
1108}
1109
1110
729e4ab9
A
1111//------------------------------------------------------------------------------
1112//
1113// uregex_setMatchProgressCallback
1114//
1115//------------------------------------------------------------------------------
1116U_CAPI void U_EXPORT2
1117uregex_setFindProgressCallback(URegularExpression *regexp2,
1118 URegexFindProgressCallback *callback,
1119 const void *context,
1120 UErrorCode *status) {
1121 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1122 if (validateRE(regexp, FALSE, status)) {
729e4ab9
A
1123 regexp->fMatcher->setFindProgressCallback(callback, context, *status);
1124 }
1125}
1126
1127
1128//------------------------------------------------------------------------------
1129//
1130// uregex_getMatchCallback
1131//
1132//------------------------------------------------------------------------------
b331163b 1133U_CAPI void U_EXPORT2
729e4ab9
A
1134uregex_getFindProgressCallback(const URegularExpression *regexp2,
1135 URegexFindProgressCallback **callback,
1136 const void **context,
1137 UErrorCode *status) {
1138 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1139 if (validateRE(regexp, FALSE, status)) {
729e4ab9
A
1140 regexp->fMatcher->getFindProgressCallback(*callback, *context, *status);
1141 }
1142}
1143
1144
73c04bcf 1145//------------------------------------------------------------------------------
374ca955
A
1146//
1147// uregex_replaceAll
1148//
73c04bcf 1149//------------------------------------------------------------------------------
b331163b 1150U_CAPI int32_t U_EXPORT2
729e4ab9 1151uregex_replaceAll(URegularExpression *regexp2,
73c04bcf 1152 const UChar *replacementText,
374ca955
A
1153 int32_t replacementLength,
1154 UChar *destBuf,
1155 int32_t destCapacity,
1156 UErrorCode *status) {
729e4ab9 1157 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1158 if (validateRE(regexp, TRUE, status) == FALSE) {
374ca955
A
1159 return 0;
1160 }
1161 if (replacementText == NULL || replacementLength < -1 ||
729e4ab9 1162 (destBuf == NULL && destCapacity > 0) ||
374ca955
A
1163 destCapacity < 0) {
1164 *status = U_ILLEGAL_ARGUMENT_ERROR;
1165 return 0;
1166 }
1167
1168 int32_t len = 0;
729e4ab9
A
1169
1170 uregex_reset(regexp2, 0, status);
1171
1172 // Note: Seperate error code variables for findNext() and appendReplacement()
1173 // are used so that destination buffer overflow errors
1174 // in appendReplacement won't stop findNext() from working.
1175 // appendReplacement() and appendTail() special case incoming buffer
1176 // overflow errors, continuing to return the correct length.
1177 UErrorCode findStatus = *status;
1178 while (uregex_findNext(regexp2, &findStatus)) {
1179 len += uregex_appendReplacement(regexp2, replacementText, replacementLength,
374ca955
A
1180 &destBuf, &destCapacity, status);
1181 }
729e4ab9 1182 len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
b331163b 1183
729e4ab9
A
1184 if (U_FAILURE(findStatus)) {
1185 // If anything went wrong with the findNext(), make that error trump
1186 // whatever may have happened with the append() operations.
1187 // Errors in findNext() are not expected.
1188 *status = findStatus;
1189 }
374ca955
A
1190
1191 return len;
73c04bcf 1192}
374ca955
A
1193
1194
729e4ab9
A
1195//------------------------------------------------------------------------------
1196//
1197// uregex_replaceAllUText
1198//
1199//------------------------------------------------------------------------------
b331163b 1200U_CAPI UText * U_EXPORT2
729e4ab9
A
1201uregex_replaceAllUText(URegularExpression *regexp2,
1202 UText *replacementText,
1203 UText *dest,
1204 UErrorCode *status) {
1205 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1206 if (validateRE(regexp, TRUE, status) == FALSE) {
729e4ab9
A
1207 return 0;
1208 }
1209 if (replacementText == NULL) {
1210 *status = U_ILLEGAL_ARGUMENT_ERROR;
1211 return 0;
1212 }
b331163b 1213
729e4ab9
A
1214 dest = regexp->fMatcher->replaceAll(replacementText, dest, *status);
1215 return dest;
1216}
b331163b 1217
729e4ab9 1218
73c04bcf 1219//------------------------------------------------------------------------------
374ca955
A
1220//
1221// uregex_replaceFirst
1222//
73c04bcf 1223//------------------------------------------------------------------------------
b331163b 1224U_CAPI int32_t U_EXPORT2
729e4ab9 1225uregex_replaceFirst(URegularExpression *regexp2,
73c04bcf 1226 const UChar *replacementText,
374ca955
A
1227 int32_t replacementLength,
1228 UChar *destBuf,
1229 int32_t destCapacity,
1230 UErrorCode *status) {
729e4ab9 1231 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1232 if (validateRE(regexp, TRUE, status) == FALSE) {
374ca955
A
1233 return 0;
1234 }
1235 if (replacementText == NULL || replacementLength < -1 ||
729e4ab9 1236 (destBuf == NULL && destCapacity > 0) ||
374ca955
A
1237 destCapacity < 0) {
1238 *status = U_ILLEGAL_ARGUMENT_ERROR;
1239 return 0;
1240 }
1241
1242 int32_t len = 0;
1243 UBool findSucceeded;
729e4ab9
A
1244 uregex_reset(regexp2, 0, status);
1245 findSucceeded = uregex_find(regexp2, 0, status);
374ca955 1246 if (findSucceeded) {
b331163b 1247 len = uregex_appendReplacement(regexp2, replacementText, replacementLength,
374ca955
A
1248 &destBuf, &destCapacity, status);
1249 }
729e4ab9 1250 len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
374ca955
A
1251
1252 return len;
73c04bcf 1253}
374ca955
A
1254
1255
73c04bcf 1256//------------------------------------------------------------------------------
374ca955 1257//
729e4ab9 1258// uregex_replaceFirstUText
374ca955 1259//
73c04bcf 1260//------------------------------------------------------------------------------
b331163b 1261U_CAPI UText * U_EXPORT2
729e4ab9
A
1262uregex_replaceFirstUText(URegularExpression *regexp2,
1263 UText *replacementText,
1264 UText *dest,
1265 UErrorCode *status) {
1266 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1267 if (validateRE(regexp, TRUE, status) == FALSE) {
729e4ab9
A
1268 return 0;
1269 }
1270 if (replacementText == NULL) {
1271 *status = U_ILLEGAL_ARGUMENT_ERROR;
1272 return 0;
1273 }
b331163b 1274
729e4ab9
A
1275 dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status);
1276 return dest;
1277}
1278
374ca955 1279
729e4ab9
A
1280//------------------------------------------------------------------------------
1281//
1282// uregex_appendReplacement
1283//
1284//------------------------------------------------------------------------------
374ca955 1285
729e4ab9 1286U_NAMESPACE_BEGIN
374ca955
A
1287//
1288// Dummy class, because these functions need to be friends of class RegexMatcher,
1289// and stand-alone C functions don't work as friends
1290//
374ca955
A
1291class RegexCImpl {
1292 public:
729e4ab9 1293 inline static int32_t appendReplacement(RegularExpression *regexp,
73c04bcf 1294 const UChar *replacementText,
374ca955
A
1295 int32_t replacementLength,
1296 UChar **destBuf,
1297 int32_t *destCapacity,
1298 UErrorCode *status);
1299
729e4ab9
A
1300 inline static int32_t appendTail(RegularExpression *regexp,
1301 UChar **destBuf,
1302 int32_t *destCapacity,
1303 UErrorCode *status);
b331163b 1304
729e4ab9
A
1305 inline static int32_t split(RegularExpression *regexp,
1306 UChar *destBuf,
1307 int32_t destCapacity,
1308 int32_t *requiredCapacity,
1309 UChar *destFields[],
1310 int32_t destFieldsCapacity,
1311 UErrorCode *status);
374ca955 1312};
374ca955 1313
729e4ab9 1314U_NAMESPACE_END
374ca955 1315
374ca955
A
1316
1317
1318static const UChar BACKSLASH = 0x5c;
1319static const UChar DOLLARSIGN = 0x24;
b331163b
A
1320static const UChar LEFTBRACKET = 0x7b;
1321static const UChar RIGHTBRACKET = 0x7d;
374ca955
A
1322
1323//
1324// Move a character to an output buffer, with bounds checking on the index.
1325// Index advances even if capacity is exceeded, for preflight size computations.
1326// This little sequence is used a LOT.
1327//
1328static inline void appendToBuf(UChar c, int32_t *idx, UChar *buf, int32_t bufCapacity) {
1329 if (*idx < bufCapacity) {
1330 buf[*idx] = c;
1331 }
1332 (*idx)++;
1333}
1334
1335
1336//
1337// appendReplacement, the actual implementation.
1338//
729e4ab9
A
1339int32_t RegexCImpl::appendReplacement(RegularExpression *regexp,
1340 const UChar *replacementText,
1341 int32_t replacementLength,
1342 UChar **destBuf,
1343 int32_t *destCapacity,
1344 UErrorCode *status) {
374ca955
A
1345
1346 // If we come in with a buffer overflow error, don't suppress the operation.
1347 // A series of appendReplacements, appendTail need to correctly preflight
1348 // the buffer size when an overflow happens somewhere in the middle.
1349 UBool pendingBufferOverflow = FALSE;
729e4ab9 1350 if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
374ca955
A
1351 pendingBufferOverflow = TRUE;
1352 *status = U_ZERO_ERROR;
1353 }
1354
1355 //
1356 // Validate all paramters
1357 //
4388f060 1358 if (validateRE(regexp, TRUE, status) == FALSE) {
374ca955
A
1359 return 0;
1360 }
1361 if (replacementText == NULL || replacementLength < -1 ||
b331163b 1362 destCapacity == NULL || destBuf == NULL ||
729e4ab9 1363 (*destBuf == NULL && *destCapacity > 0) ||
374ca955
A
1364 *destCapacity < 0) {
1365 *status = U_ILLEGAL_ARGUMENT_ERROR;
1366 return 0;
1367 }
1368
1369 RegexMatcher *m = regexp->fMatcher;
1370 if (m->fMatch == FALSE) {
1371 *status = U_REGEX_INVALID_STATE;
1372 return 0;
1373 }
1374
1375 UChar *dest = *destBuf;
1376 int32_t capacity = *destCapacity;
1377 int32_t destIdx = 0;
1378 int32_t i;
b331163b 1379
374ca955
A
1380 // If it wasn't supplied by the caller, get the length of the replacement text.
1381 // TODO: slightly smarter logic in the copy loop could watch for the NUL on
1382 // the fly and avoid this step.
1383 if (replacementLength == -1) {
1384 replacementLength = u_strlen(replacementText);
1385 }
1386
1387 // Copy input string from the end of previous match to start of current match
729e4ab9
A
1388 if (regexp->fText != NULL) {
1389 int32_t matchStart;
1390 int32_t lastMatchEnd;
1391 if (UTEXT_USES_U16(m->fInputText)) {
1392 lastMatchEnd = (int32_t)m->fLastMatchEnd;
1393 matchStart = (int32_t)m->fMatchStart;
1394 } else {
1395 // !!!: Would like a better way to do this!
b331163b
A
1396 UErrorCode tempStatus = U_ZERO_ERROR;
1397 lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, NULL, 0, &tempStatus);
1398 tempStatus = U_ZERO_ERROR;
1399 matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, NULL, 0, &tempStatus);
729e4ab9
A
1400 }
1401 for (i=lastMatchEnd; i<matchStart; i++) {
1402 appendToBuf(regexp->fText[i], &destIdx, dest, capacity);
b331163b 1403 }
729e4ab9
A
1404 } else {
1405 UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore
1406 destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart,
4388f060
A
1407 dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity),
1408 &possibleOverflowError);
374ca955 1409 }
4388f060 1410 U_ASSERT(destIdx >= 0);
374ca955
A
1411
1412 // scan the replacement text, looking for substitutions ($n) and \escapes.
1413 int32_t replIdx = 0;
b331163b 1414 while (replIdx < replacementLength && U_SUCCESS(*status)) {
374ca955
A
1415 UChar c = replacementText[replIdx];
1416 replIdx++;
1417 if (c != DOLLARSIGN && c != BACKSLASH) {
b331163b 1418 // Common case, no substitution, no escaping,
374ca955
A
1419 // just copy the char to the dest buf.
1420 appendToBuf(c, &destIdx, dest, capacity);
1421 continue;
1422 }
1423
1424 if (c == BACKSLASH) {
1425 // Backslash Escape. Copy the following char out without further checks.
1426 // Note: Surrogate pairs don't need any special handling
1427 // The second half wont be a '$' or a '\', and
1428 // will move to the dest normally on the next
1429 // loop iteration.
1430 if (replIdx >= replacementLength) {
1431 break;
1432 }
1433 c = replacementText[replIdx];
1434
1435 if (c==0x55/*U*/ || c==0x75/*u*/) {
1436 // We have a \udddd or \Udddddddd escape sequence.
b331163b 1437 UChar32 escapedChar =
729e4ab9 1438 u_unescapeAt(uregex_ucstr_unescape_charAt,
b331163b 1439 &replIdx, // Index is updated by unescapeAt
374ca955 1440 replacementLength, // Length of replacement text
73c04bcf 1441 (void *)replacementText);
374ca955
A
1442
1443 if (escapedChar != (UChar32)0xFFFFFFFF) {
1444 if (escapedChar <= 0xffff) {
1445 appendToBuf((UChar)escapedChar, &destIdx, dest, capacity);
1446 } else {
1447 appendToBuf(U16_LEAD(escapedChar), &destIdx, dest, capacity);
1448 appendToBuf(U16_TRAIL(escapedChar), &destIdx, dest, capacity);
1449 }
1450 continue;
1451 }
1452 // Note: if the \u escape was invalid, just fall through and
1453 // treat it as a plain \<anything> escape.
1454 }
1455
1456 // Plain backslash escape. Just put out the escaped character.
1457 appendToBuf(c, &destIdx, dest, capacity);
1458
1459 replIdx++;
1460 continue;
1461 }
1462
b331163b
A
1463 // We've got a $. Pick up the following capture group name or number.
1464 // For numbers, consume only digits that produce a valid capture group for the pattern.
374ca955 1465
374ca955 1466 int32_t groupNum = 0;
b331163b 1467 U_ASSERT(c == DOLLARSIGN);
0f5d89e8
A
1468 UChar32 c32 = -1;
1469 if (replIdx < replacementLength) {
1470 U16_GET(replacementText, 0, replIdx, replacementLength, c32);
1471 }
b331163b
A
1472 if (u_isdigit(c32)) {
1473 int32_t numDigits = 0;
1474 int32_t numCaptureGroups = m->fPattern->fGroupMap->size();
1475 for (;;) {
1476 if (replIdx >= replacementLength) {
1477 break;
1478 }
1479 U16_GET(replacementText, 0, replIdx, replacementLength, c32);
1480 if (u_isdigit(c32) == FALSE) {
1481 break;
1482 }
374ca955 1483
b331163b
A
1484 int32_t digitVal = u_charDigitValue(c32);
1485 if (groupNum * 10 + digitVal <= numCaptureGroups) {
1486 groupNum = groupNum * 10 + digitVal;
1487 U16_FWD_1(replacementText, replIdx, replacementLength);
1488 numDigits++;
1489 } else {
1490 if (numDigits == 0) {
1491 *status = U_INDEX_OUTOFBOUNDS_ERROR;
1492 }
1493 break;
1494 }
1495 }
1496 } else if (c32 == LEFTBRACKET) {
1497 // Scan for Named Capture Group, ${name}.
1498 UnicodeString groupName;
374ca955 1499 U16_FWD_1(replacementText, replIdx, replacementLength);
b331163b
A
1500 while (U_SUCCESS(*status) && c32 != RIGHTBRACKET) {
1501 if (replIdx >= replacementLength) {
1502 *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
1503 break;
1504 }
1505 U16_NEXT(replacementText, replIdx, replacementLength, c32);
1506 if ((c32 >= 0x41 && c32 <= 0x5a) || // A..Z
1507 (c32 >= 0x61 && c32 <= 0x7a) || // a..z
1508 (c32 >= 0x31 && c32 <= 0x39)) { // 0..9
1509 groupName.append(c32);
1510 } else if (c32 == RIGHTBRACKET) {
1511 groupNum = uhash_geti(regexp->fPat->fNamedCaptureMap, &groupName);
1512 if (groupNum == 0) {
1513 // Name not defined by pattern.
1514 *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
1515 }
1516 } else {
1517 // Character was something other than a name char or a closing '}'
1518 *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
1519 }
374ca955 1520 }
b331163b
A
1521 } else {
1522 // $ not followed by {name} or digits.
1523 *status = U_REGEX_INVALID_CAPTURE_GROUP_NAME;
374ca955
A
1524 }
1525
1526
374ca955 1527 // Finally, append the capture group data to the destination.
b331163b
A
1528 if (U_SUCCESS(*status)) {
1529 destIdx += uregex_group((URegularExpression*)regexp, groupNum,
1530 dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), status);
1531 if (*status == U_BUFFER_OVERFLOW_ERROR) {
1532 // Ignore buffer overflow when extracting the group. We need to
1533 // continue on to get full size of the untruncated result. We will
1534 // raise our own buffer overflow error at the end.
1535 *status = U_ZERO_ERROR;
1536 }
374ca955
A
1537 }
1538
1539 if (U_FAILURE(*status)) {
b331163b 1540 // bad group number or name.
374ca955
A
1541 break;
1542 }
374ca955
A
1543 }
1544
1545 //
1546 // Nul Terminate the dest buffer if possible.
1547 // Set the appropriate buffer overflow or not terminated error, if needed.
1548 //
1549 if (destIdx < capacity) {
1550 dest[destIdx] = 0;
b331163b
A
1551 } else if (U_SUCCESS(*status)) {
1552 if (destIdx == *destCapacity) {
1553 *status = U_STRING_NOT_TERMINATED_WARNING;
1554 } else {
1555 *status = U_BUFFER_OVERFLOW_ERROR;
1556 }
374ca955 1557 }
b331163b 1558
374ca955
A
1559 //
1560 // Return an updated dest buffer and capacity to the caller.
1561 //
1562 if (destIdx > 0 && *destCapacity > 0) {
1563 if (destIdx < capacity) {
1564 *destBuf += destIdx;
1565 *destCapacity -= destIdx;
1566 } else {
1567 *destBuf += capacity;
1568 *destCapacity = 0;
1569 }
1570 }
1571
1572 // If we came in with a buffer overflow, make sure we go out with one also.
1573 // (A zero length match right at the end of the previous match could
1574 // make this function succeed even though a previous call had overflowed the buf)
1575 if (pendingBufferOverflow && U_SUCCESS(*status)) {
1576 *status = U_BUFFER_OVERFLOW_ERROR;
1577 }
1578
1579 return destIdx;
1580}
1581
1582//
729e4ab9 1583// appendReplacement the actual API function,
374ca955 1584//
b331163b 1585U_CAPI int32_t U_EXPORT2
729e4ab9
A
1586uregex_appendReplacement(URegularExpression *regexp2,
1587 const UChar *replacementText,
1588 int32_t replacementLength,
1589 UChar **destBuf,
1590 int32_t *destCapacity,
1591 UErrorCode *status) {
b331163b 1592
729e4ab9 1593 RegularExpression *regexp = (RegularExpression*)regexp2;
374ca955
A
1594 return RegexCImpl::appendReplacement(
1595 regexp, replacementText, replacementLength,destBuf, destCapacity, status);
1596}
1597
729e4ab9
A
1598//
1599// uregex_appendReplacementUText...can just use the normal C++ method
1600//
b331163b 1601U_CAPI void U_EXPORT2
729e4ab9
A
1602uregex_appendReplacementUText(URegularExpression *regexp2,
1603 UText *replText,
1604 UText *dest,
1605 UErrorCode *status) {
1606 RegularExpression *regexp = (RegularExpression*)regexp2;
1607 regexp->fMatcher->appendReplacement(dest, replText, *status);
1608}
1609
374ca955 1610
73c04bcf 1611//------------------------------------------------------------------------------
374ca955
A
1612//
1613// uregex_appendTail
1614//
73c04bcf 1615//------------------------------------------------------------------------------
729e4ab9
A
1616int32_t RegexCImpl::appendTail(RegularExpression *regexp,
1617 UChar **destBuf,
1618 int32_t *destCapacity,
1619 UErrorCode *status)
46f4442e 1620{
374ca955
A
1621
1622 // If we come in with a buffer overflow error, don't suppress the operation.
1623 // A series of appendReplacements, appendTail need to correctly preflight
1624 // the buffer size when an overflow happens somewhere in the middle.
1625 UBool pendingBufferOverflow = FALSE;
729e4ab9 1626 if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
374ca955
A
1627 pendingBufferOverflow = TRUE;
1628 *status = U_ZERO_ERROR;
1629 }
1630
4388f060 1631 if (validateRE(regexp, TRUE, status) == FALSE) {
374ca955
A
1632 return 0;
1633 }
b331163b
A
1634
1635 if (destCapacity == NULL || destBuf == NULL ||
729e4ab9
A
1636 (*destBuf == NULL && *destCapacity > 0) ||
1637 *destCapacity < 0)
1638 {
1639 *status = U_ILLEGAL_ARGUMENT_ERROR;
1640 return 0;
374ca955
A
1641 }
1642
729e4ab9
A
1643 RegexMatcher *m = regexp->fMatcher;
1644
374ca955
A
1645 int32_t destIdx = 0;
1646 int32_t destCap = *destCapacity;
1647 UChar *dest = *destBuf;
b331163b 1648
729e4ab9
A
1649 if (regexp->fText != NULL) {
1650 int32_t srcIdx;
1651 int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd);
1652 if (nativeIdx == -1) {
1653 srcIdx = 0;
1654 } else if (UTEXT_USES_U16(m->fInputText)) {
1655 srcIdx = (int32_t)nativeIdx;
374ca955 1656 } else {
3d1f044b
A
1657 UErrorCode newStatus = U_ZERO_ERROR;
1658 srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &newStatus);
729e4ab9 1659 }
b331163b 1660
729e4ab9 1661 for (;;) {
4388f060
A
1662 U_ASSERT(destIdx >= 0);
1663
729e4ab9 1664 if (srcIdx == regexp->fTextLength) {
374ca955
A
1665 break;
1666 }
729e4ab9
A
1667 UChar c = regexp->fText[srcIdx];
1668 if (c == 0 && regexp->fTextLength == -1) {
1669 regexp->fTextLength = srcIdx;
1670 break;
1671 }
4388f060 1672
729e4ab9
A
1673 if (destIdx < destCap) {
1674 dest[destIdx] = c;
1675 } else {
1676 // We've overflowed the dest buffer.
1677 // If the total input string length is known, we can
1678 // compute the total buffer size needed without scanning through the string.
1679 if (regexp->fTextLength > 0) {
1680 destIdx += (regexp->fTextLength - srcIdx);
1681 break;
1682 }
1683 }
1684 srcIdx++;
1685 destIdx++;
b331163b 1686 }
729e4ab9
A
1687 } else {
1688 int64_t srcIdx;
1689 if (m->fMatch) {
b331163b 1690 // The most recent call to find() succeeded.
729e4ab9
A
1691 srcIdx = m->fMatchEnd;
1692 } else {
1693 // The last call to find() on this matcher failed().
1694 // Look back to the end of the last find() that succeeded for src index.
1695 srcIdx = m->fLastMatchEnd;
1696 if (srcIdx == -1) {
1697 // There has been no successful match with this matcher.
1698 // We want to copy the whole string.
1699 srcIdx = 0;
1700 }
374ca955 1701 }
729e4ab9
A
1702
1703 destIdx = utext_extract(m->fInputText, srcIdx, m->fInputLength, dest, destCap, status);
374ca955
A
1704 }
1705
1706 //
1707 // NUL terminate the output string, if possible, otherwise issue the
1708 // appropriate error or warning.
1709 //
1710 if (destIdx < destCap) {
1711 dest[destIdx] = 0;
1712 } else if (destIdx == destCap) {
1713 *status = U_STRING_NOT_TERMINATED_WARNING;
1714 } else {
1715 *status = U_BUFFER_OVERFLOW_ERROR;
1716 }
1717
1718 //
1719 // Update the user's buffer ptr and capacity vars to reflect the
1720 // amount used.
1721 //
1722 if (destIdx < destCap) {
1723 *destBuf += destIdx;
1724 *destCapacity -= destIdx;
4388f060 1725 } else if (*destBuf != NULL) {
374ca955
A
1726 *destBuf += destCap;
1727 *destCapacity = 0;
1728 }
1729
1730 if (pendingBufferOverflow && U_SUCCESS(*status)) {
1731 *status = U_BUFFER_OVERFLOW_ERROR;
1732 }
1733
1734 return destIdx;
73c04bcf 1735}
374ca955
A
1736
1737
729e4ab9
A
1738//
1739// appendTail the actual API function
1740//
b331163b 1741U_CAPI int32_t U_EXPORT2
729e4ab9 1742uregex_appendTail(URegularExpression *regexp2,
374ca955
A
1743 UChar **destBuf,
1744 int32_t *destCapacity,
1745 UErrorCode *status) {
729e4ab9 1746 RegularExpression *regexp = (RegularExpression*)regexp2;
374ca955
A
1747 return RegexCImpl::appendTail(regexp, destBuf, destCapacity, status);
1748}
1749
1750
729e4ab9
A
1751//
1752// uregex_appendTailUText...can just use the normal C++ method
1753//
b331163b 1754U_CAPI UText * U_EXPORT2
729e4ab9
A
1755uregex_appendTailUText(URegularExpression *regexp2,
1756 UText *dest,
1757 UErrorCode *status) {
1758 RegularExpression *regexp = (RegularExpression*)regexp2;
1759 return regexp->fMatcher->appendTail(dest, *status);
1760}
1761
1762
73c04bcf 1763//------------------------------------------------------------------------------
374ca955
A
1764//
1765// copyString Internal utility to copy a string to an output buffer,
1766// while managing buffer overflow and preflight size
1767// computation. NUL termination is added to destination,
1768// and the NUL is counted in the output size.
1769//
73c04bcf 1770//------------------------------------------------------------------------------
729e4ab9 1771#if 0
374ca955
A
1772static void copyString(UChar *destBuffer, // Destination buffer.
1773 int32_t destCapacity, // Total capacity of dest buffer
1774 int32_t *destIndex, // Index into dest buffer. Updated on return.
1775 // Update not clipped to destCapacity.
1776 const UChar *srcPtr, // Pointer to source string
1777 int32_t srcLen) // Source string len.
1778{
1779 int32_t si;
1780 int32_t di = *destIndex;
1781 UChar c;
1782
1783 for (si=0; si<srcLen; si++) {
1784 c = srcPtr[si];
1785 if (di < destCapacity) {
1786 destBuffer[di] = c;
1787 di++;
1788 } else {
1789 di += srcLen - si;
1790 break;
1791 }
1792 }
73c04bcf
A
1793 if (di<destCapacity) {
1794 destBuffer[di] = 0;
1795 }
1796 di++;
374ca955
A
1797 *destIndex = di;
1798}
729e4ab9 1799#endif
374ca955 1800
73c04bcf 1801//------------------------------------------------------------------------------
374ca955
A
1802//
1803// uregex_split
1804//
73c04bcf 1805//------------------------------------------------------------------------------
729e4ab9
A
1806int32_t RegexCImpl::split(RegularExpression *regexp,
1807 UChar *destBuf,
1808 int32_t destCapacity,
1809 int32_t *requiredCapacity,
1810 UChar *destFields[],
1811 int32_t destFieldsCapacity,
1812 UErrorCode *status) {
374ca955
A
1813 //
1814 // Reset for the input text
1815 //
1816 regexp->fMatcher->reset();
729e4ab9
A
1817 UText *inputText = regexp->fMatcher->fInputText;
1818 int64_t nextOutputStringStart = 0;
1819 int64_t inputLen = regexp->fMatcher->fInputLength;
374ca955
A
1820 if (inputLen == 0) {
1821 return 0;
1822 }
1823
374ca955
A
1824 //
1825 // Loop through the input text, searching for the delimiter pattern
1826 //
1827 int32_t i; // Index of the field being processed.
1828 int32_t destIdx = 0; // Next available position in destBuf;
1829 int32_t numCaptureGroups = regexp->fMatcher->groupCount();
729e4ab9 1830 UErrorCode tStatus = U_ZERO_ERROR; // Want to ignore any buffer overflow errors so that the strings are still counted
374ca955
A
1831 for (i=0; ; i++) {
1832 if (i>=destFieldsCapacity-1) {
729e4ab9 1833 // There are one or zero output strings left.
374ca955
A
1834 // Fill the last output string with whatever is left from the input, then exit the loop.
1835 // ( i will be == destFieldsCapacity if we filled the output array while processing
1836 // capture groups of the delimiter expression, in which case we will discard the
1837 // last capture group saved in favor of the unprocessed remainder of the
1838 // input string.)
729e4ab9
A
1839 if (inputLen > nextOutputStringStart) {
1840 if (i != destFieldsCapacity-1) {
1841 // No fields are left. Recycle the last one for holding the trailing part of
1842 // the input string.
1843 i = destFieldsCapacity-1;
1844 destIdx = (int32_t)(destFields[i] - destFields[0]);
1845 }
b331163b 1846
729e4ab9
A
1847 destFields[i] = &destBuf[destIdx];
1848 destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
1849 &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
374ca955 1850 }
374ca955
A
1851 break;
1852 }
b331163b 1853
374ca955
A
1854 if (regexp->fMatcher->find()) {
1855 // We found another delimiter. Move everything from where we started looking
1856 // up until the start of the delimiter into the next output string.
374ca955 1857 destFields[i] = &destBuf[destIdx];
b331163b 1858
729e4ab9
A
1859 destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart,
1860 &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus);
1861 if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
1862 tStatus = U_ZERO_ERROR;
1863 } else {
1864 *status = tStatus;
1865 }
1866 nextOutputStringStart = regexp->fMatcher->fMatchEnd;
b331163b 1867
374ca955
A
1868 // If the delimiter pattern has capturing parentheses, the captured
1869 // text goes out into the next n destination strings.
1870 int32_t groupNum;
1871 for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) {
1872 // If we've run out of output string slots, bail out.
1873 if (i==destFieldsCapacity-1) {
1874 break;
1875 }
1876 i++;
b331163b 1877
374ca955 1878 // Set up to extract the capture group contents into the dest buffer.
374ca955 1879 destFields[i] = &destBuf[destIdx];
729e4ab9 1880 tStatus = U_ZERO_ERROR;
b331163b
A
1881 int32_t t = uregex_group((URegularExpression*)regexp,
1882 groupNum,
1883 destFields[i],
1884 REMAINING_CAPACITY(destIdx, destCapacity),
4388f060 1885 &tStatus);
374ca955
A
1886 destIdx += t + 1; // Record the space used in the output string buffer.
1887 // +1 for the NUL that terminates the string.
729e4ab9
A
1888 if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
1889 tStatus = U_ZERO_ERROR;
1890 } else {
1891 *status = tStatus;
1892 }
374ca955
A
1893 }
1894
1895 if (nextOutputStringStart == inputLen) {
b331163b 1896 // The delimiter was at the end of the string.
4388f060
A
1897 // Output an empty string, and then we are done.
1898 if (destIdx < destCapacity) {
1899 destBuf[destIdx] = 0;
1900 }
1901 if (i < destFieldsCapacity-1) {
1902 ++i;
1903 }
1904 if (destIdx < destCapacity) {
1905 destFields[i] = destBuf + destIdx;
1906 }
1907 ++destIdx;
374ca955
A
1908 break;
1909 }
1910
1911 }
1912 else
1913 {
1914 // We ran off the end of the input while looking for the next delimiter.
1915 // All the remaining text goes into the current output string.
1916 destFields[i] = &destBuf[destIdx];
729e4ab9
A
1917 destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
1918 &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
374ca955
A
1919 break;
1920 }
1921 }
1922
1923 // Zero out any unused portion of the destFields array
1924 int j;
1925 for (j=i+1; j<destFieldsCapacity; j++) {
1926 destFields[j] = NULL;
1927 }
1928
1929 if (requiredCapacity != NULL) {
1930 *requiredCapacity = destIdx;
1931 }
73c04bcf 1932 if (destIdx > destCapacity) {
374ca955
A
1933 *status = U_BUFFER_OVERFLOW_ERROR;
1934 }
1935 return i+1;
1936}
1937
729e4ab9
A
1938//
1939// uregex_split The actual API function
1940//
b331163b 1941U_CAPI int32_t U_EXPORT2
729e4ab9
A
1942uregex_split(URegularExpression *regexp2,
1943 UChar *destBuf,
1944 int32_t destCapacity,
1945 int32_t *requiredCapacity,
1946 UChar *destFields[],
1947 int32_t destFieldsCapacity,
1948 UErrorCode *status) {
1949 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1950 if (validateRE(regexp, TRUE, status) == FALSE) {
729e4ab9
A
1951 return 0;
1952 }
1953 if ((destBuf == NULL && destCapacity > 0) ||
1954 destCapacity < 0 ||
1955 destFields == NULL ||
1956 destFieldsCapacity < 1 ) {
1957 *status = U_ILLEGAL_ARGUMENT_ERROR;
1958 return 0;
1959 }
b331163b 1960
729e4ab9
A
1961 return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status);
1962}
b331163b 1963
729e4ab9
A
1964
1965//
1966// uregex_splitUText...can just use the normal C++ method
1967//
b331163b 1968U_CAPI int32_t U_EXPORT2
729e4ab9
A
1969uregex_splitUText(URegularExpression *regexp2,
1970 UText *destFields[],
1971 int32_t destFieldsCapacity,
1972 UErrorCode *status) {
1973 RegularExpression *regexp = (RegularExpression*)regexp2;
1974 return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, destFieldsCapacity, *status);
1975}
1976
374ca955 1977
374ca955 1978#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
73c04bcf 1979