]> git.saurik.com Git - apple/icu.git/blame - icuSources/i18n/uregex.cpp
ICU-531.48.tar.gz
[apple/icu.git] / icuSources / i18n / uregex.cpp
CommitLineData
374ca955
A
1/*
2*******************************************************************************
57a6839d 3* Copyright (C) 2004-2013, International Business Machines
374ca955
A
4* Corporation and others. All Rights Reserved.
5*******************************************************************************
57a6839d 6* file name: uregex.cpp
374ca955
A
7*/
8
9#include "unicode/utypes.h"
10
11#if !UCONFIG_NO_REGULAR_EXPRESSIONS
12
13#include "unicode/regex.h"
14#include "unicode/uregex.h"
15#include "unicode/unistr.h"
16#include "unicode/ustring.h"
17#include "unicode/uchar.h"
18#include "unicode/uobject.h"
4388f060 19#include "unicode/utf16.h"
374ca955
A
20#include "umutex.h"
21#include "uassert.h"
22#include "cmemory.h"
23
729e4ab9
A
24#include "regextxt.h"
25
26#include <stdio.h>
27
28U_NAMESPACE_BEGIN
46f4442e 29
729e4ab9
A
30#define REMAINING_CAPACITY(idx,len) ((((len)-(idx))>0)?((len)-(idx)):0)
31
32struct RegularExpression: public UMemory {
374ca955 33public:
729e4ab9
A
34 RegularExpression();
35 ~RegularExpression();
374ca955
A
36 int32_t fMagic;
37 RegexPattern *fPat;
57a6839d 38 u_atomic_int32_t *fPatRefCount;
374ca955
A
39 UChar *fPatString;
40 int32_t fPatStringLen;
41 RegexMatcher *fMatcher;
42 const UChar *fText; // Text from setText()
43 int32_t fTextLength; // Length provided by user with setText(), which
44 // may be -1.
729e4ab9 45 UBool fOwnsText;
374ca955
A
46};
47
48static const int32_t REXP_MAGIC = 0x72657870; // "rexp" in ASCII
49
729e4ab9 50RegularExpression::RegularExpression() {
374ca955
A
51 fMagic = REXP_MAGIC;
52 fPat = NULL;
53 fPatRefCount = NULL;
54 fPatString = NULL;
55 fPatStringLen = 0;
56 fMatcher = NULL;
57 fText = NULL;
58 fTextLength = 0;
729e4ab9 59 fOwnsText = FALSE;
374ca955
A
60}
61
729e4ab9 62RegularExpression::~RegularExpression() {
374ca955
A
63 delete fMatcher;
64 fMatcher = NULL;
65 if (fPatRefCount!=NULL && umtx_atomic_dec(fPatRefCount)==0) {
66 delete fPat;
67 uprv_free(fPatString);
57a6839d 68 uprv_free((void *)fPatRefCount);
374ca955 69 }
729e4ab9
A
70 if (fOwnsText && fText!=NULL) {
71 uprv_free((void *)fText);
72 }
374ca955
A
73 fMagic = 0;
74}
75
729e4ab9
A
76U_NAMESPACE_END
77
78U_NAMESPACE_USE
79
374ca955
A
80//----------------------------------------------------------------------------------------
81//
82// validateRE Do boilerplate style checks on API function parameters.
83// Return TRUE if they look OK.
84//----------------------------------------------------------------------------------------
4388f060 85static UBool validateRE(const RegularExpression *re, UBool requiresText, UErrorCode *status) {
374ca955
A
86 if (U_FAILURE(*status)) {
87 return FALSE;
88 }
89 if (re == NULL || re->fMagic != REXP_MAGIC) {
374ca955
A
90 *status = U_ILLEGAL_ARGUMENT_ERROR;
91 return FALSE;
92 }
729e4ab9
A
93 // !!! Not sure how to update this with the new UText backing, which is stored in re->fMatcher anyway
94 if (requiresText && re->fText == NULL && !re->fOwnsText) {
374ca955
A
95 *status = U_REGEX_INVALID_STATE;
96 return FALSE;
97 }
98 return TRUE;
99}
100
101//----------------------------------------------------------------------------------------
102//
103// uregex_open
104//
105//----------------------------------------------------------------------------------------
106U_CAPI URegularExpression * U_EXPORT2
107uregex_open( const UChar *pattern,
108 int32_t patternLength,
109 uint32_t flags,
110 UParseError *pe,
111 UErrorCode *status) {
112
113 if (U_FAILURE(*status)) {
114 return NULL;
115 }
116 if (pattern == NULL || patternLength < -1 || patternLength == 0) {
117 *status = U_ILLEGAL_ARGUMENT_ERROR;
118 return NULL;
119 }
120 int32_t actualPatLen = patternLength;
121 if (actualPatLen == -1) {
122 actualPatLen = u_strlen(pattern);
123 }
124
57a6839d
A
125 RegularExpression *re = new RegularExpression;
126 u_atomic_int32_t *refC = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
374ca955
A
127 UChar *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(actualPatLen+1));
128 if (re == NULL || refC == NULL || patBuf == NULL) {
129 *status = U_MEMORY_ALLOCATION_ERROR;
130 delete re;
57a6839d 131 uprv_free((void *)refC);
374ca955
A
132 uprv_free(patBuf);
133 return NULL;
134 }
135 re->fPatRefCount = refC;
136 *re->fPatRefCount = 1;
137
138 //
139 // Make a copy of the pattern string, so we can return it later if asked.
729e4ab9
A
140 // For compiling the pattern, we will use a UText wrapper around
141 // this local copy, to avoid making even more copies.
374ca955
A
142 //
143 re->fPatString = patBuf;
144 re->fPatStringLen = patternLength;
145 u_memcpy(patBuf, pattern, actualPatLen);
146 patBuf[actualPatLen] = 0;
729e4ab9
A
147
148 UText patText = UTEXT_INITIALIZER;
149 utext_openUChars(&patText, patBuf, patternLength, status);
374ca955
A
150
151 //
152 // Compile the pattern
153 //
154 if (pe != NULL) {
729e4ab9 155 re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
374ca955 156 } else {
729e4ab9 157 re->fPat = RegexPattern::compile(&patText, flags, *status);
374ca955 158 }
729e4ab9
A
159 utext_close(&patText);
160
374ca955
A
161 if (U_FAILURE(*status)) {
162 goto ErrorExit;
163 }
164
165 //
166 // Create the matcher object
167 //
168 re->fMatcher = re->fPat->matcher(*status);
169 if (U_SUCCESS(*status)) {
729e4ab9 170 return (URegularExpression*)re;
374ca955
A
171 }
172
173ErrorExit:
174 delete re;
175 return NULL;
176
177}
178
729e4ab9
A
179//----------------------------------------------------------------------------------------
180//
181// uregex_openUText
182//
183//----------------------------------------------------------------------------------------
184U_CAPI URegularExpression * U_EXPORT2
185uregex_openUText(UText *pattern,
186 uint32_t flags,
187 UParseError *pe,
188 UErrorCode *status) {
189
190 if (U_FAILURE(*status)) {
191 return NULL;
192 }
193 if (pattern == NULL) {
194 *status = U_ILLEGAL_ARGUMENT_ERROR;
195 return NULL;
196 }
197
198 int64_t patternNativeLength = utext_nativeLength(pattern);
199
200 if (patternNativeLength == 0) {
201 *status = U_ILLEGAL_ARGUMENT_ERROR;
202 return NULL;
203 }
204
205 RegularExpression *re = new RegularExpression;
206
207 UErrorCode lengthStatus = U_ZERO_ERROR;
208 int32_t pattern16Length = utext_extract(pattern, 0, patternNativeLength, NULL, 0, &lengthStatus);
209
57a6839d 210 u_atomic_int32_t *refC = (u_atomic_int32_t *)uprv_malloc(sizeof(int32_t));
729e4ab9
A
211 UChar *patBuf = (UChar *)uprv_malloc(sizeof(UChar)*(pattern16Length+1));
212 if (re == NULL || refC == NULL || patBuf == NULL) {
213 *status = U_MEMORY_ALLOCATION_ERROR;
214 delete re;
57a6839d 215 uprv_free((void *)refC);
729e4ab9
A
216 uprv_free(patBuf);
217 return NULL;
218 }
219 re->fPatRefCount = refC;
220 *re->fPatRefCount = 1;
221
222 //
223 // Make a copy of the pattern string, so we can return it later if asked.
224 // For compiling the pattern, we will use a read-only UText wrapper
225 // around this local copy, to avoid making even more copies.
226 //
227 re->fPatString = patBuf;
228 re->fPatStringLen = pattern16Length;
229 utext_extract(pattern, 0, patternNativeLength, patBuf, pattern16Length+1, status);
230
231 UText patText = UTEXT_INITIALIZER;
232 utext_openUChars(&patText, patBuf, pattern16Length, status);
233
234 //
235 // Compile the pattern
236 //
237 if (pe != NULL) {
238 re->fPat = RegexPattern::compile(&patText, flags, *pe, *status);
239 } else {
240 re->fPat = RegexPattern::compile(&patText, flags, *status);
241 }
242 utext_close(&patText);
243
244 if (U_FAILURE(*status)) {
245 goto ErrorExit;
246 }
247
248 //
249 // Create the matcher object
250 //
251 re->fMatcher = re->fPat->matcher(*status);
252 if (U_SUCCESS(*status)) {
253 return (URegularExpression*)re;
254 }
255
256ErrorExit:
257 delete re;
258 return NULL;
259
260}
261
374ca955
A
262//----------------------------------------------------------------------------------------
263//
264// uregex_close
265//
266//----------------------------------------------------------------------------------------
267U_CAPI void U_EXPORT2
729e4ab9
A
268uregex_close(URegularExpression *re2) {
269 RegularExpression *re = (RegularExpression*)re2;
374ca955 270 UErrorCode status = U_ZERO_ERROR;
4388f060 271 if (validateRE(re, FALSE, &status) == FALSE) {
374ca955
A
272 return;
273 }
274 delete re;
275}
276
277
278//----------------------------------------------------------------------------------------
279//
280// uregex_clone
281//
282//----------------------------------------------------------------------------------------
283U_CAPI URegularExpression * U_EXPORT2
729e4ab9
A
284uregex_clone(const URegularExpression *source2, UErrorCode *status) {
285 RegularExpression *source = (RegularExpression*)source2;
4388f060 286 if (validateRE(source, FALSE, status) == FALSE) {
374ca955
A
287 return NULL;
288 }
289
729e4ab9 290 RegularExpression *clone = new RegularExpression;
374ca955
A
291 if (clone == NULL) {
292 *status = U_MEMORY_ALLOCATION_ERROR;
293 return NULL;
294 }
295
296 clone->fMatcher = source->fPat->matcher(*status);
297 if (U_FAILURE(*status)) {
298 delete clone;
299 return NULL;
300 }
374ca955
A
301
302 clone->fPat = source->fPat;
303 clone->fPatRefCount = source->fPatRefCount;
304 clone->fPatString = source->fPatString;
305 clone->fPatStringLen = source->fPatStringLen;
306 umtx_atomic_inc(source->fPatRefCount);
307 // Note: fText is not cloned.
308
729e4ab9 309 return (URegularExpression*)clone;
73c04bcf 310}
374ca955
A
311
312
313
314
73c04bcf 315//------------------------------------------------------------------------------
374ca955
A
316//
317// uregex_pattern
318//
73c04bcf 319//------------------------------------------------------------------------------
374ca955 320U_CAPI const UChar * U_EXPORT2
729e4ab9
A
321uregex_pattern(const URegularExpression *regexp2,
322 int32_t *patLength,
323 UErrorCode *status) {
324 RegularExpression *regexp = (RegularExpression*)regexp2;
374ca955 325
4388f060 326 if (validateRE(regexp, FALSE, status) == FALSE) {
374ca955
A
327 return NULL;
328 }
329 if (patLength != NULL) {
330 *patLength = regexp->fPatStringLen;
331 }
332 return regexp->fPatString;
73c04bcf 333}
374ca955
A
334
335
729e4ab9
A
336//------------------------------------------------------------------------------
337//
338// uregex_patternUText
339//
340//------------------------------------------------------------------------------
341U_CAPI UText * U_EXPORT2
342uregex_patternUText(const URegularExpression *regexp2,
343 UErrorCode *status) {
344 RegularExpression *regexp = (RegularExpression*)regexp2;
345 return regexp->fPat->patternText(*status);
346}
347
348
73c04bcf 349//------------------------------------------------------------------------------
374ca955
A
350//
351// uregex_flags
352//
73c04bcf 353//------------------------------------------------------------------------------
374ca955 354U_CAPI int32_t U_EXPORT2
729e4ab9
A
355uregex_flags(const URegularExpression *regexp2, UErrorCode *status) {
356 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 357 if (validateRE(regexp, FALSE, status) == FALSE) {
374ca955
A
358 return 0;
359 }
360 int32_t flags = regexp->fPat->flags();
361 return flags;
73c04bcf 362}
374ca955
A
363
364
73c04bcf 365//------------------------------------------------------------------------------
374ca955
A
366//
367// uregex_setText
368//
73c04bcf 369//------------------------------------------------------------------------------
374ca955 370U_CAPI void U_EXPORT2
729e4ab9 371uregex_setText(URegularExpression *regexp2,
374ca955
A
372 const UChar *text,
373 int32_t textLength,
374 UErrorCode *status) {
729e4ab9 375 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 376 if (validateRE(regexp, FALSE, status) == FALSE) {
374ca955
A
377 return;
378 }
379 if (text == NULL || textLength < -1) {
380 *status = U_ILLEGAL_ARGUMENT_ERROR;
381 return;
382 }
729e4ab9
A
383
384 if (regexp->fOwnsText && regexp->fText != NULL) {
385 uprv_free((void *)regexp->fText);
386 }
387
374ca955
A
388 regexp->fText = text;
389 regexp->fTextLength = textLength;
729e4ab9
A
390 regexp->fOwnsText = FALSE;
391
392 UText input = UTEXT_INITIALIZER;
393 utext_openUChars(&input, text, textLength, status);
394 regexp->fMatcher->reset(&input);
395 utext_close(&input); // reset() made a shallow clone, so we don't need this copy
396}
397
374ca955 398
729e4ab9
A
399//------------------------------------------------------------------------------
400//
401// uregex_setUText
402//
403//------------------------------------------------------------------------------
404U_CAPI void U_EXPORT2
405uregex_setUText(URegularExpression *regexp2,
406 UText *text,
407 UErrorCode *status) {
408 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 409 if (validateRE(regexp, FALSE, status) == FALSE) {
729e4ab9
A
410 return;
411 }
412 if (text == NULL) {
413 *status = U_ILLEGAL_ARGUMENT_ERROR;
414 return;
415 }
416
417 if (regexp->fOwnsText && regexp->fText != NULL) {
418 uprv_free((void *)regexp->fText);
419 }
420
421 regexp->fText = NULL; // only fill it in on request
422 regexp->fTextLength = -1;
423 regexp->fOwnsText = TRUE;
424 regexp->fMatcher->reset(text);
73c04bcf 425}
374ca955
A
426
427
428
73c04bcf 429//------------------------------------------------------------------------------
374ca955
A
430//
431// uregex_getText
432//
73c04bcf 433//------------------------------------------------------------------------------
374ca955 434U_CAPI const UChar * U_EXPORT2
729e4ab9 435uregex_getText(URegularExpression *regexp2,
374ca955
A
436 int32_t *textLength,
437 UErrorCode *status) {
729e4ab9 438 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 439 if (validateRE(regexp, FALSE, status) == FALSE) {
374ca955
A
440 return NULL;
441 }
729e4ab9
A
442
443 if (regexp->fText == NULL) {
444 // need to fill in the text
445 UText *inputText = regexp->fMatcher->inputText();
446 int64_t inputNativeLength = utext_nativeLength(inputText);
447 if (UTEXT_FULL_TEXT_IN_CHUNK(inputText, inputNativeLength)) {
448 regexp->fText = inputText->chunkContents;
449 regexp->fTextLength = (int32_t)inputNativeLength;
450 regexp->fOwnsText = FALSE; // because the UText owns it
451 } else {
452 UErrorCode lengthStatus = U_ZERO_ERROR;
453 regexp->fTextLength = utext_extract(inputText, 0, inputNativeLength, NULL, 0, &lengthStatus); // buffer overflow error
454 UChar *inputChars = (UChar *)uprv_malloc(sizeof(UChar)*(regexp->fTextLength+1));
455
456 utext_extract(inputText, 0, inputNativeLength, inputChars, regexp->fTextLength+1, status);
457 regexp->fText = inputChars;
458 regexp->fOwnsText = TRUE; // should already be set but just in case
459 }
460 }
461
374ca955
A
462 if (textLength != NULL) {
463 *textLength = regexp->fTextLength;
464 }
465 return regexp->fText;
73c04bcf 466}
374ca955
A
467
468
729e4ab9
A
469//------------------------------------------------------------------------------
470//
471// uregex_getUText
472//
473//------------------------------------------------------------------------------
474U_CAPI UText * U_EXPORT2
475uregex_getUText(URegularExpression *regexp2,
476 UText *dest,
477 UErrorCode *status) {
478 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 479 if (validateRE(regexp, FALSE, status) == FALSE) {
729e4ab9
A
480 return dest;
481 }
482 return regexp->fMatcher->getInput(dest, *status);
483}
484
485
4388f060
A
486//------------------------------------------------------------------------------
487//
488// uregex_refreshUText
489//
490//------------------------------------------------------------------------------
491U_CAPI void U_EXPORT2
492uregex_refreshUText(URegularExpression *regexp2,
493 UText *text,
494 UErrorCode *status) {
495 RegularExpression *regexp = (RegularExpression*)regexp2;
496 if (validateRE(regexp, FALSE, status) == FALSE) {
497 return;
498 }
499 regexp->fMatcher->refreshInputText(text, *status);
500}
501
502
73c04bcf 503//------------------------------------------------------------------------------
374ca955
A
504//
505// uregex_matches
506//
73c04bcf 507//------------------------------------------------------------------------------
374ca955 508U_CAPI UBool U_EXPORT2
729e4ab9
A
509uregex_matches(URegularExpression *regexp2,
510 int32_t startIndex,
511 UErrorCode *status) {
512 return uregex_matches64( regexp2, (int64_t)startIndex, status);
513}
514
515U_CAPI UBool U_EXPORT2
516uregex_matches64(URegularExpression *regexp2,
517 int64_t startIndex,
518 UErrorCode *status) {
519 RegularExpression *regexp = (RegularExpression*)regexp2;
46f4442e 520 UBool result = FALSE;
4388f060 521 if (validateRE(regexp, TRUE, status) == FALSE) {
46f4442e
A
522 return result;
523 }
524 if (startIndex == -1) {
525 result = regexp->fMatcher->matches(*status);
526 } else {
527 result = regexp->fMatcher->matches(startIndex, *status);
374ca955 528 }
374ca955 529 return result;
73c04bcf 530}
374ca955
A
531
532
73c04bcf 533//------------------------------------------------------------------------------
374ca955
A
534//
535// uregex_lookingAt
536//
73c04bcf 537//------------------------------------------------------------------------------
374ca955 538U_CAPI UBool U_EXPORT2
729e4ab9 539uregex_lookingAt(URegularExpression *regexp2,
374ca955
A
540 int32_t startIndex,
541 UErrorCode *status) {
729e4ab9
A
542 return uregex_lookingAt64( regexp2, (int64_t)startIndex, status);
543}
544
545U_CAPI UBool U_EXPORT2
546uregex_lookingAt64(URegularExpression *regexp2,
547 int64_t startIndex,
548 UErrorCode *status) {
549 RegularExpression *regexp = (RegularExpression*)regexp2;
46f4442e 550 UBool result = FALSE;
4388f060 551 if (validateRE(regexp, TRUE, status) == FALSE) {
46f4442e
A
552 return result;
553 }
554 if (startIndex == -1) {
555 result = regexp->fMatcher->lookingAt(*status);
556 } else {
557 result = regexp->fMatcher->lookingAt(startIndex, *status);
374ca955 558 }
374ca955 559 return result;
73c04bcf 560}
374ca955
A
561
562
563
73c04bcf 564//------------------------------------------------------------------------------
374ca955
A
565//
566// uregex_find
567//
73c04bcf 568//------------------------------------------------------------------------------
374ca955 569U_CAPI UBool U_EXPORT2
729e4ab9 570uregex_find(URegularExpression *regexp2,
374ca955
A
571 int32_t startIndex,
572 UErrorCode *status) {
729e4ab9
A
573 return uregex_find64( regexp2, (int64_t)startIndex, status);
574}
575
576U_CAPI UBool U_EXPORT2
577uregex_find64(URegularExpression *regexp2,
578 int64_t startIndex,
579 UErrorCode *status) {
580 RegularExpression *regexp = (RegularExpression*)regexp2;
46f4442e 581 UBool result = FALSE;
4388f060 582 if (validateRE(regexp, TRUE, status) == FALSE) {
46f4442e
A
583 return result;
584 }
585 if (startIndex == -1) {
586 regexp->fMatcher->resetPreserveRegion();
587 result = regexp->fMatcher->find();
588 } else {
589 result = regexp->fMatcher->find(startIndex, *status);
374ca955 590 }
374ca955 591 return result;
73c04bcf 592}
374ca955 593
729e4ab9 594
73c04bcf 595//------------------------------------------------------------------------------
374ca955
A
596//
597// uregex_findNext
598//
73c04bcf 599//------------------------------------------------------------------------------
374ca955 600U_CAPI UBool U_EXPORT2
729e4ab9 601uregex_findNext(URegularExpression *regexp2,
374ca955 602 UErrorCode *status) {
729e4ab9 603 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 604 if (validateRE(regexp, TRUE, status) == FALSE) {
374ca955
A
605 return FALSE;
606 }
607 UBool result = regexp->fMatcher->find();
608 return result;
73c04bcf 609}
374ca955 610
73c04bcf 611//------------------------------------------------------------------------------
374ca955
A
612//
613// uregex_groupCount
614//
73c04bcf 615//------------------------------------------------------------------------------
374ca955 616U_CAPI int32_t U_EXPORT2
729e4ab9 617uregex_groupCount(URegularExpression *regexp2,
374ca955 618 UErrorCode *status) {
729e4ab9 619 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 620 if (validateRE(regexp, FALSE, status) == FALSE) {
374ca955
A
621 return 0;
622 }
623 int32_t result = regexp->fMatcher->groupCount();
624 return result;
73c04bcf 625}
374ca955
A
626
627
73c04bcf 628//------------------------------------------------------------------------------
374ca955
A
629//
630// uregex_group
631//
73c04bcf 632//------------------------------------------------------------------------------
374ca955 633U_CAPI int32_t U_EXPORT2
729e4ab9 634uregex_group(URegularExpression *regexp2,
374ca955
A
635 int32_t groupNum,
636 UChar *dest,
637 int32_t destCapacity,
638 UErrorCode *status) {
729e4ab9 639 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 640 if (validateRE(regexp, TRUE, status) == FALSE) {
374ca955
A
641 return 0;
642 }
643 if (destCapacity < 0 || (destCapacity > 0 && dest == NULL)) {
644 *status = U_ILLEGAL_ARGUMENT_ERROR;
645 return 0;
646 }
729e4ab9
A
647
648 if (destCapacity == 0 || regexp->fText != NULL) {
649 // If preflighting or if we already have the text as UChars,
650 // this is a little cheaper than going through uregex_groupUTextDeep()
651
652 //
653 // Pick up the range of characters from the matcher
654 //
655 int32_t startIx = regexp->fMatcher->start(groupNum, *status);
656 int32_t endIx = regexp->fMatcher->end (groupNum, *status);
657 if (U_FAILURE(*status)) {
658 return 0;
659 }
374ca955 660
729e4ab9
A
661 //
662 // Trim length based on buffer capacity
663 //
664 int32_t fullLength = endIx - startIx;
665 int32_t copyLength = fullLength;
666 if (copyLength < destCapacity) {
667 dest[copyLength] = 0;
668 } else if (copyLength == destCapacity) {
669 *status = U_STRING_NOT_TERMINATED_WARNING;
670 } else {
671 copyLength = destCapacity;
672 *status = U_BUFFER_OVERFLOW_ERROR;
673 }
674
675 //
676 // Copy capture group to user's buffer
677 //
678 if (copyLength > 0) {
679 u_memcpy(dest, &regexp->fText[startIx], copyLength);
680 }
681 return fullLength;
374ca955 682 } else {
57a6839d 683 int32_t result = 0;
729e4ab9 684 UText *groupText = uregex_groupUTextDeep(regexp2, groupNum, NULL, status);
57a6839d
A
685 if (U_SUCCESS(*status)) {
686 result = utext_extract(groupText, 0, utext_nativeLength(groupText), dest, destCapacity, status);
687 }
729e4ab9
A
688 utext_close(groupText);
689 return result;
374ca955 690 }
729e4ab9 691}
374ca955 692
729e4ab9
A
693
694//------------------------------------------------------------------------------
695//
696// uregex_groupUText
697//
698//------------------------------------------------------------------------------
699U_CAPI UText * U_EXPORT2
700uregex_groupUText(URegularExpression *regexp2,
701 int32_t groupNum,
702 UText *dest,
703 int64_t *groupLength,
704 UErrorCode *status) {
705 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 706 if (validateRE(regexp, TRUE, status) == FALSE) {
729e4ab9
A
707 UErrorCode emptyTextStatus = U_ZERO_ERROR;
708 return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
374ca955 709 }
729e4ab9
A
710
711 return regexp->fMatcher->group(groupNum, dest, *groupLength, *status);
73c04bcf 712}
374ca955 713
729e4ab9
A
714//------------------------------------------------------------------------------
715//
716// uregex_groupUTextDeep
717//
718//------------------------------------------------------------------------------
719U_CAPI UText * U_EXPORT2
720uregex_groupUTextDeep(URegularExpression *regexp2,
721 int32_t groupNum,
722 UText *dest,
723 UErrorCode *status) {
724 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 725 if (validateRE(regexp, TRUE, status) == FALSE) {
729e4ab9
A
726 UErrorCode emptyTextStatus = U_ZERO_ERROR;
727 return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
728 }
729
730 if (regexp->fText != NULL) {
731 //
732 // Pick up the range of characters from the matcher
733 // and use our already-extracted characters
734 //
735 int32_t startIx = regexp->fMatcher->start(groupNum, *status);
736 int32_t endIx = regexp->fMatcher->end (groupNum, *status);
737 if (U_FAILURE(*status)) {
738 UErrorCode emptyTextStatus = U_ZERO_ERROR;
739 return (dest ? dest : utext_openUChars(NULL, NULL, 0, &emptyTextStatus));
740 }
741
742 if (dest) {
743 utext_replace(dest, 0, utext_nativeLength(dest), &regexp->fText[startIx], endIx - startIx, status);
744 } else {
745 UText groupText = UTEXT_INITIALIZER;
746 utext_openUChars(&groupText, &regexp->fText[startIx], endIx - startIx, status);
747 dest = utext_clone(NULL, &groupText, TRUE, FALSE, status);
748 utext_close(&groupText);
749 }
750
751 return dest;
752 } else {
753 return regexp->fMatcher->group(groupNum, dest, *status);
754 }
755}
374ca955 756
73c04bcf 757//------------------------------------------------------------------------------
374ca955
A
758//
759// uregex_start
760//
73c04bcf 761//------------------------------------------------------------------------------
374ca955 762U_CAPI int32_t U_EXPORT2
729e4ab9 763uregex_start(URegularExpression *regexp2,
374ca955
A
764 int32_t groupNum,
765 UErrorCode *status) {
729e4ab9
A
766 return (int32_t)uregex_start64( regexp2, groupNum, status);
767}
768
769U_CAPI int64_t U_EXPORT2
770uregex_start64(URegularExpression *regexp2,
771 int32_t groupNum,
772 UErrorCode *status) {
773 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 774 if (validateRE(regexp, TRUE, status) == FALSE) {
374ca955
A
775 return 0;
776 }
777 int32_t result = regexp->fMatcher->start(groupNum, *status);
778 return result;
73c04bcf 779}
374ca955 780
73c04bcf 781//------------------------------------------------------------------------------
374ca955
A
782//
783// uregex_end
784//
73c04bcf 785//------------------------------------------------------------------------------
374ca955 786U_CAPI int32_t U_EXPORT2
729e4ab9 787uregex_end(URegularExpression *regexp2,
374ca955
A
788 int32_t groupNum,
789 UErrorCode *status) {
729e4ab9
A
790 return (int32_t)uregex_end64( regexp2, groupNum, status);
791}
792
793U_CAPI int64_t U_EXPORT2
794uregex_end64(URegularExpression *regexp2,
795 int32_t groupNum,
796 UErrorCode *status) {
797 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 798 if (validateRE(regexp, TRUE, status) == FALSE) {
374ca955
A
799 return 0;
800 }
801 int32_t result = regexp->fMatcher->end(groupNum, *status);
802 return result;
73c04bcf 803}
374ca955 804
73c04bcf 805//------------------------------------------------------------------------------
374ca955
A
806//
807// uregex_reset
808//
73c04bcf 809//------------------------------------------------------------------------------
374ca955 810U_CAPI void U_EXPORT2
729e4ab9 811uregex_reset(URegularExpression *regexp2,
374ca955
A
812 int32_t index,
813 UErrorCode *status) {
729e4ab9
A
814 uregex_reset64( regexp2, (int64_t)index, status);
815}
816
817U_CAPI void U_EXPORT2
818uregex_reset64(URegularExpression *regexp2,
819 int64_t index,
820 UErrorCode *status) {
821 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 822 if (validateRE(regexp, TRUE, status) == FALSE) {
374ca955
A
823 return;
824 }
825 regexp->fMatcher->reset(index, *status);
73c04bcf 826}
374ca955
A
827
828
46f4442e
A
829//------------------------------------------------------------------------------
830//
831// uregex_setRegion
832//
833//------------------------------------------------------------------------------
834U_CAPI void U_EXPORT2
729e4ab9 835uregex_setRegion(URegularExpression *regexp2,
46f4442e
A
836 int32_t regionStart,
837 int32_t regionLimit,
838 UErrorCode *status) {
729e4ab9
A
839 uregex_setRegion64( regexp2, (int64_t)regionStart, (int64_t)regionLimit, status);
840}
841
842U_CAPI void U_EXPORT2
843uregex_setRegion64(URegularExpression *regexp2,
844 int64_t regionStart,
845 int64_t regionLimit,
846 UErrorCode *status) {
847 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 848 if (validateRE(regexp, TRUE, status) == FALSE) {
46f4442e
A
849 return;
850 }
851 regexp->fMatcher->region(regionStart, regionLimit, *status);
852}
853
854
729e4ab9
A
855//------------------------------------------------------------------------------
856//
857// uregex_setRegionAndStart
858//
859//------------------------------------------------------------------------------
51004dcb 860U_CAPI void U_EXPORT2
729e4ab9
A
861uregex_setRegionAndStart(URegularExpression *regexp2,
862 int64_t regionStart,
863 int64_t regionLimit,
864 int64_t startIndex,
865 UErrorCode *status) {
866 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 867 if (validateRE(regexp, TRUE, status) == FALSE) {
729e4ab9
A
868 return;
869 }
870 regexp->fMatcher->region(regionStart, regionLimit, startIndex, *status);
871}
872
46f4442e
A
873//------------------------------------------------------------------------------
874//
875// uregex_regionStart
876//
877//------------------------------------------------------------------------------
878U_CAPI int32_t U_EXPORT2
729e4ab9 879uregex_regionStart(const URegularExpression *regexp2,
46f4442e 880 UErrorCode *status) {
729e4ab9
A
881 return (int32_t)uregex_regionStart64(regexp2, status);
882}
883
884U_CAPI int64_t U_EXPORT2
885uregex_regionStart64(const URegularExpression *regexp2,
886 UErrorCode *status) {
887 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 888 if (validateRE(regexp, TRUE, status) == FALSE) {
46f4442e
A
889 return 0;
890 }
891 return regexp->fMatcher->regionStart();
892}
893
894
895//------------------------------------------------------------------------------
896//
897// uregex_regionEnd
898//
899//------------------------------------------------------------------------------
900U_CAPI int32_t U_EXPORT2
729e4ab9 901uregex_regionEnd(const URegularExpression *regexp2,
46f4442e 902 UErrorCode *status) {
729e4ab9
A
903 return (int32_t)uregex_regionEnd64(regexp2, status);
904}
905
906U_CAPI int64_t U_EXPORT2
907uregex_regionEnd64(const URegularExpression *regexp2,
908 UErrorCode *status) {
909 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 910 if (validateRE(regexp, TRUE, status) == FALSE) {
46f4442e
A
911 return 0;
912 }
913 return regexp->fMatcher->regionEnd();
914}
915
916
917//------------------------------------------------------------------------------
918//
919// uregex_hasTransparentBounds
920//
921//------------------------------------------------------------------------------
922U_CAPI UBool U_EXPORT2
729e4ab9 923uregex_hasTransparentBounds(const URegularExpression *regexp2,
46f4442e 924 UErrorCode *status) {
729e4ab9 925 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 926 if (validateRE(regexp, FALSE, status) == FALSE) {
46f4442e
A
927 return FALSE;
928 }
929 return regexp->fMatcher->hasTransparentBounds();
930}
931
932
933//------------------------------------------------------------------------------
934//
935// uregex_useTransparentBounds
936//
937//------------------------------------------------------------------------------
938U_CAPI void U_EXPORT2
729e4ab9
A
939uregex_useTransparentBounds(URegularExpression *regexp2,
940 UBool b,
941 UErrorCode *status) {
942 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 943 if (validateRE(regexp, FALSE, status) == FALSE) {
46f4442e
A
944 return;
945 }
946 regexp->fMatcher->useTransparentBounds(b);
947}
948
949
950//------------------------------------------------------------------------------
951//
952// uregex_hasAnchoringBounds
953//
954//------------------------------------------------------------------------------
955U_CAPI UBool U_EXPORT2
729e4ab9
A
956uregex_hasAnchoringBounds(const URegularExpression *regexp2,
957 UErrorCode *status) {
958 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 959 if (validateRE(regexp, FALSE, status) == FALSE) {
46f4442e
A
960 return FALSE;
961 }
962 return regexp->fMatcher->hasAnchoringBounds();
963}
964
965
966//------------------------------------------------------------------------------
967//
968// uregex_useAnchoringBounds
969//
970//------------------------------------------------------------------------------
971U_CAPI void U_EXPORT2
729e4ab9
A
972uregex_useAnchoringBounds(URegularExpression *regexp2,
973 UBool b,
974 UErrorCode *status) {
975 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 976 if (validateRE(regexp, FALSE, status) == FALSE) {
46f4442e
A
977 return;
978 }
979 regexp->fMatcher->useAnchoringBounds(b);
980}
981
982
983//------------------------------------------------------------------------------
984//
985// uregex_hitEnd
986//
987//------------------------------------------------------------------------------
988U_CAPI UBool U_EXPORT2
729e4ab9 989uregex_hitEnd(const URegularExpression *regexp2,
46f4442e 990 UErrorCode *status) {
729e4ab9 991 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 992 if (validateRE(regexp, TRUE, status) == FALSE) {
46f4442e
A
993 return FALSE;
994 }
995 return regexp->fMatcher->hitEnd();
996}
997
998
999//------------------------------------------------------------------------------
1000//
1001// uregex_requireEnd
1002//
1003//------------------------------------------------------------------------------
1004U_CAPI UBool U_EXPORT2
729e4ab9 1005uregex_requireEnd(const URegularExpression *regexp2,
46f4442e 1006 UErrorCode *status) {
729e4ab9 1007 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1008 if (validateRE(regexp, TRUE, status) == FALSE) {
46f4442e
A
1009 return FALSE;
1010 }
1011 return regexp->fMatcher->requireEnd();
1012}
1013
1014
1015//------------------------------------------------------------------------------
1016//
1017// uregex_setTimeLimit
1018//
1019//------------------------------------------------------------------------------
1020U_CAPI void U_EXPORT2
729e4ab9 1021uregex_setTimeLimit(URegularExpression *regexp2,
46f4442e
A
1022 int32_t limit,
1023 UErrorCode *status) {
729e4ab9 1024 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1025 if (validateRE(regexp, FALSE, status)) {
46f4442e
A
1026 regexp->fMatcher->setTimeLimit(limit, *status);
1027 }
1028}
1029
1030
1031
1032//------------------------------------------------------------------------------
1033//
1034// uregex_getTimeLimit
1035//
1036//------------------------------------------------------------------------------
1037U_CAPI int32_t U_EXPORT2
729e4ab9 1038uregex_getTimeLimit(const URegularExpression *regexp2,
46f4442e
A
1039 UErrorCode *status) {
1040 int32_t retVal = 0;
729e4ab9 1041 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1042 if (validateRE(regexp, FALSE, status)) {
46f4442e
A
1043 retVal = regexp->fMatcher->getTimeLimit();
1044 }
1045 return retVal;
1046}
1047
1048
1049
1050//------------------------------------------------------------------------------
1051//
1052// uregex_setStackLimit
1053//
1054//------------------------------------------------------------------------------
1055U_CAPI void U_EXPORT2
729e4ab9
A
1056uregex_setStackLimit(URegularExpression *regexp2,
1057 int32_t limit,
1058 UErrorCode *status) {
1059 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1060 if (validateRE(regexp, FALSE, status)) {
46f4442e
A
1061 regexp->fMatcher->setStackLimit(limit, *status);
1062 }
1063}
1064
1065
1066
1067//------------------------------------------------------------------------------
1068//
1069// uregex_getStackLimit
1070//
1071//------------------------------------------------------------------------------
1072U_CAPI int32_t U_EXPORT2
729e4ab9
A
1073uregex_getStackLimit(const URegularExpression *regexp2,
1074 UErrorCode *status) {
46f4442e 1075 int32_t retVal = 0;
729e4ab9 1076 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1077 if (validateRE(regexp, FALSE, status)) {
46f4442e
A
1078 retVal = regexp->fMatcher->getStackLimit();
1079 }
1080 return retVal;
1081}
1082
1083
1084//------------------------------------------------------------------------------
1085//
1086// uregex_setMatchCallback
1087//
1088//------------------------------------------------------------------------------
1089U_CAPI void U_EXPORT2
729e4ab9 1090uregex_setMatchCallback(URegularExpression *regexp2,
46f4442e
A
1091 URegexMatchCallback *callback,
1092 const void *context,
1093 UErrorCode *status) {
729e4ab9 1094 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1095 if (validateRE(regexp, FALSE, status)) {
729e4ab9 1096 regexp->fMatcher->setMatchCallback(callback, context, *status);
46f4442e
A
1097 }
1098}
1099
1100
1101//------------------------------------------------------------------------------
1102//
1103// uregex_getMatchCallback
1104//
1105//------------------------------------------------------------------------------
1106U_CAPI void U_EXPORT2
729e4ab9 1107uregex_getMatchCallback(const URegularExpression *regexp2,
46f4442e
A
1108 URegexMatchCallback **callback,
1109 const void **context,
1110 UErrorCode *status) {
729e4ab9 1111 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1112 if (validateRE(regexp, FALSE, status)) {
46f4442e
A
1113 regexp->fMatcher->getMatchCallback(*callback, *context, *status);
1114 }
1115}
1116
1117
729e4ab9
A
1118//------------------------------------------------------------------------------
1119//
1120// uregex_setMatchProgressCallback
1121//
1122//------------------------------------------------------------------------------
1123U_CAPI void U_EXPORT2
1124uregex_setFindProgressCallback(URegularExpression *regexp2,
1125 URegexFindProgressCallback *callback,
1126 const void *context,
1127 UErrorCode *status) {
1128 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1129 if (validateRE(regexp, FALSE, status)) {
729e4ab9
A
1130 regexp->fMatcher->setFindProgressCallback(callback, context, *status);
1131 }
1132}
1133
1134
1135//------------------------------------------------------------------------------
1136//
1137// uregex_getMatchCallback
1138//
1139//------------------------------------------------------------------------------
1140U_CAPI void U_EXPORT2
1141uregex_getFindProgressCallback(const URegularExpression *regexp2,
1142 URegexFindProgressCallback **callback,
1143 const void **context,
1144 UErrorCode *status) {
1145 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1146 if (validateRE(regexp, FALSE, status)) {
729e4ab9
A
1147 regexp->fMatcher->getFindProgressCallback(*callback, *context, *status);
1148 }
1149}
1150
1151
73c04bcf 1152//------------------------------------------------------------------------------
374ca955
A
1153//
1154// uregex_replaceAll
1155//
73c04bcf 1156//------------------------------------------------------------------------------
374ca955 1157U_CAPI int32_t U_EXPORT2
729e4ab9 1158uregex_replaceAll(URegularExpression *regexp2,
73c04bcf 1159 const UChar *replacementText,
374ca955
A
1160 int32_t replacementLength,
1161 UChar *destBuf,
1162 int32_t destCapacity,
1163 UErrorCode *status) {
729e4ab9 1164 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1165 if (validateRE(regexp, TRUE, status) == FALSE) {
374ca955
A
1166 return 0;
1167 }
1168 if (replacementText == NULL || replacementLength < -1 ||
729e4ab9 1169 (destBuf == NULL && destCapacity > 0) ||
374ca955
A
1170 destCapacity < 0) {
1171 *status = U_ILLEGAL_ARGUMENT_ERROR;
1172 return 0;
1173 }
1174
1175 int32_t len = 0;
729e4ab9
A
1176
1177 uregex_reset(regexp2, 0, status);
1178
1179 // Note: Seperate error code variables for findNext() and appendReplacement()
1180 // are used so that destination buffer overflow errors
1181 // in appendReplacement won't stop findNext() from working.
1182 // appendReplacement() and appendTail() special case incoming buffer
1183 // overflow errors, continuing to return the correct length.
1184 UErrorCode findStatus = *status;
1185 while (uregex_findNext(regexp2, &findStatus)) {
1186 len += uregex_appendReplacement(regexp2, replacementText, replacementLength,
374ca955
A
1187 &destBuf, &destCapacity, status);
1188 }
729e4ab9
A
1189 len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
1190
1191 if (U_FAILURE(findStatus)) {
1192 // If anything went wrong with the findNext(), make that error trump
1193 // whatever may have happened with the append() operations.
1194 // Errors in findNext() are not expected.
1195 *status = findStatus;
1196 }
374ca955
A
1197
1198 return len;
73c04bcf 1199}
374ca955
A
1200
1201
729e4ab9
A
1202//------------------------------------------------------------------------------
1203//
1204// uregex_replaceAllUText
1205//
1206//------------------------------------------------------------------------------
1207U_CAPI UText * U_EXPORT2
1208uregex_replaceAllUText(URegularExpression *regexp2,
1209 UText *replacementText,
1210 UText *dest,
1211 UErrorCode *status) {
1212 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1213 if (validateRE(regexp, TRUE, status) == FALSE) {
729e4ab9
A
1214 return 0;
1215 }
1216 if (replacementText == NULL) {
1217 *status = U_ILLEGAL_ARGUMENT_ERROR;
1218 return 0;
1219 }
1220
1221 dest = regexp->fMatcher->replaceAll(replacementText, dest, *status);
1222 return dest;
1223}
1224
1225
73c04bcf 1226//------------------------------------------------------------------------------
374ca955
A
1227//
1228// uregex_replaceFirst
1229//
73c04bcf 1230//------------------------------------------------------------------------------
374ca955 1231U_CAPI int32_t U_EXPORT2
729e4ab9 1232uregex_replaceFirst(URegularExpression *regexp2,
73c04bcf 1233 const UChar *replacementText,
374ca955
A
1234 int32_t replacementLength,
1235 UChar *destBuf,
1236 int32_t destCapacity,
1237 UErrorCode *status) {
729e4ab9 1238 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1239 if (validateRE(regexp, TRUE, status) == FALSE) {
374ca955
A
1240 return 0;
1241 }
1242 if (replacementText == NULL || replacementLength < -1 ||
729e4ab9 1243 (destBuf == NULL && destCapacity > 0) ||
374ca955
A
1244 destCapacity < 0) {
1245 *status = U_ILLEGAL_ARGUMENT_ERROR;
1246 return 0;
1247 }
1248
1249 int32_t len = 0;
1250 UBool findSucceeded;
729e4ab9
A
1251 uregex_reset(regexp2, 0, status);
1252 findSucceeded = uregex_find(regexp2, 0, status);
374ca955 1253 if (findSucceeded) {
729e4ab9 1254 len = uregex_appendReplacement(regexp2, replacementText, replacementLength,
374ca955
A
1255 &destBuf, &destCapacity, status);
1256 }
729e4ab9 1257 len += uregex_appendTail(regexp2, &destBuf, &destCapacity, status);
374ca955
A
1258
1259 return len;
73c04bcf 1260}
374ca955
A
1261
1262
73c04bcf 1263//------------------------------------------------------------------------------
374ca955 1264//
729e4ab9 1265// uregex_replaceFirstUText
374ca955 1266//
73c04bcf 1267//------------------------------------------------------------------------------
729e4ab9
A
1268U_CAPI UText * U_EXPORT2
1269uregex_replaceFirstUText(URegularExpression *regexp2,
1270 UText *replacementText,
1271 UText *dest,
1272 UErrorCode *status) {
1273 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1274 if (validateRE(regexp, TRUE, status) == FALSE) {
729e4ab9
A
1275 return 0;
1276 }
1277 if (replacementText == NULL) {
1278 *status = U_ILLEGAL_ARGUMENT_ERROR;
1279 return 0;
1280 }
1281
1282 dest = regexp->fMatcher->replaceFirst(replacementText, dest, *status);
1283 return dest;
1284}
1285
374ca955 1286
729e4ab9
A
1287//------------------------------------------------------------------------------
1288//
1289// uregex_appendReplacement
1290//
1291//------------------------------------------------------------------------------
374ca955 1292
729e4ab9 1293U_NAMESPACE_BEGIN
374ca955
A
1294//
1295// Dummy class, because these functions need to be friends of class RegexMatcher,
1296// and stand-alone C functions don't work as friends
1297//
374ca955
A
1298class RegexCImpl {
1299 public:
729e4ab9 1300 inline static int32_t appendReplacement(RegularExpression *regexp,
73c04bcf 1301 const UChar *replacementText,
374ca955
A
1302 int32_t replacementLength,
1303 UChar **destBuf,
1304 int32_t *destCapacity,
1305 UErrorCode *status);
1306
729e4ab9
A
1307 inline static int32_t appendTail(RegularExpression *regexp,
1308 UChar **destBuf,
1309 int32_t *destCapacity,
1310 UErrorCode *status);
1311
1312 inline static int32_t split(RegularExpression *regexp,
1313 UChar *destBuf,
1314 int32_t destCapacity,
1315 int32_t *requiredCapacity,
1316 UChar *destFields[],
1317 int32_t destFieldsCapacity,
1318 UErrorCode *status);
374ca955 1319};
374ca955 1320
729e4ab9 1321U_NAMESPACE_END
374ca955 1322
374ca955
A
1323
1324
1325static const UChar BACKSLASH = 0x5c;
1326static const UChar DOLLARSIGN = 0x24;
1327
1328//
1329// Move a character to an output buffer, with bounds checking on the index.
1330// Index advances even if capacity is exceeded, for preflight size computations.
1331// This little sequence is used a LOT.
1332//
1333static inline void appendToBuf(UChar c, int32_t *idx, UChar *buf, int32_t bufCapacity) {
1334 if (*idx < bufCapacity) {
1335 buf[*idx] = c;
1336 }
1337 (*idx)++;
1338}
1339
1340
1341//
1342// appendReplacement, the actual implementation.
1343//
729e4ab9
A
1344int32_t RegexCImpl::appendReplacement(RegularExpression *regexp,
1345 const UChar *replacementText,
1346 int32_t replacementLength,
1347 UChar **destBuf,
1348 int32_t *destCapacity,
1349 UErrorCode *status) {
374ca955
A
1350
1351 // If we come in with a buffer overflow error, don't suppress the operation.
1352 // A series of appendReplacements, appendTail need to correctly preflight
1353 // the buffer size when an overflow happens somewhere in the middle.
1354 UBool pendingBufferOverflow = FALSE;
729e4ab9 1355 if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
374ca955
A
1356 pendingBufferOverflow = TRUE;
1357 *status = U_ZERO_ERROR;
1358 }
1359
1360 //
1361 // Validate all paramters
1362 //
4388f060 1363 if (validateRE(regexp, TRUE, status) == FALSE) {
374ca955
A
1364 return 0;
1365 }
1366 if (replacementText == NULL || replacementLength < -1 ||
1367 destCapacity == NULL || destBuf == NULL ||
729e4ab9 1368 (*destBuf == NULL && *destCapacity > 0) ||
374ca955
A
1369 *destCapacity < 0) {
1370 *status = U_ILLEGAL_ARGUMENT_ERROR;
1371 return 0;
1372 }
1373
1374 RegexMatcher *m = regexp->fMatcher;
1375 if (m->fMatch == FALSE) {
1376 *status = U_REGEX_INVALID_STATE;
1377 return 0;
1378 }
1379
1380 UChar *dest = *destBuf;
1381 int32_t capacity = *destCapacity;
1382 int32_t destIdx = 0;
1383 int32_t i;
1384
1385 // If it wasn't supplied by the caller, get the length of the replacement text.
1386 // TODO: slightly smarter logic in the copy loop could watch for the NUL on
1387 // the fly and avoid this step.
1388 if (replacementLength == -1) {
1389 replacementLength = u_strlen(replacementText);
1390 }
1391
1392 // Copy input string from the end of previous match to start of current match
729e4ab9
A
1393 if (regexp->fText != NULL) {
1394 int32_t matchStart;
1395 int32_t lastMatchEnd;
1396 if (UTEXT_USES_U16(m->fInputText)) {
1397 lastMatchEnd = (int32_t)m->fLastMatchEnd;
1398 matchStart = (int32_t)m->fMatchStart;
1399 } else {
1400 // !!!: Would like a better way to do this!
1401 UErrorCode status = U_ZERO_ERROR;
1402 lastMatchEnd = utext_extract(m->fInputText, 0, m->fLastMatchEnd, NULL, 0, &status);
1403 status = U_ZERO_ERROR;
1404 matchStart = lastMatchEnd + utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart, NULL, 0, &status);
1405 }
1406 for (i=lastMatchEnd; i<matchStart; i++) {
1407 appendToBuf(regexp->fText[i], &destIdx, dest, capacity);
1408 }
1409 } else {
1410 UErrorCode possibleOverflowError = U_ZERO_ERROR; // ignore
1411 destIdx += utext_extract(m->fInputText, m->fLastMatchEnd, m->fMatchStart,
4388f060
A
1412 dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity),
1413 &possibleOverflowError);
374ca955 1414 }
4388f060 1415 U_ASSERT(destIdx >= 0);
374ca955
A
1416
1417 // scan the replacement text, looking for substitutions ($n) and \escapes.
1418 int32_t replIdx = 0;
1419 while (replIdx < replacementLength) {
1420 UChar c = replacementText[replIdx];
1421 replIdx++;
1422 if (c != DOLLARSIGN && c != BACKSLASH) {
1423 // Common case, no substitution, no escaping,
1424 // just copy the char to the dest buf.
1425 appendToBuf(c, &destIdx, dest, capacity);
1426 continue;
1427 }
1428
1429 if (c == BACKSLASH) {
1430 // Backslash Escape. Copy the following char out without further checks.
1431 // Note: Surrogate pairs don't need any special handling
1432 // The second half wont be a '$' or a '\', and
1433 // will move to the dest normally on the next
1434 // loop iteration.
1435 if (replIdx >= replacementLength) {
1436 break;
1437 }
1438 c = replacementText[replIdx];
1439
1440 if (c==0x55/*U*/ || c==0x75/*u*/) {
1441 // We have a \udddd or \Udddddddd escape sequence.
1442 UChar32 escapedChar =
729e4ab9 1443 u_unescapeAt(uregex_ucstr_unescape_charAt,
374ca955
A
1444 &replIdx, // Index is updated by unescapeAt
1445 replacementLength, // Length of replacement text
73c04bcf 1446 (void *)replacementText);
374ca955
A
1447
1448 if (escapedChar != (UChar32)0xFFFFFFFF) {
1449 if (escapedChar <= 0xffff) {
1450 appendToBuf((UChar)escapedChar, &destIdx, dest, capacity);
1451 } else {
1452 appendToBuf(U16_LEAD(escapedChar), &destIdx, dest, capacity);
1453 appendToBuf(U16_TRAIL(escapedChar), &destIdx, dest, capacity);
1454 }
1455 continue;
1456 }
1457 // Note: if the \u escape was invalid, just fall through and
1458 // treat it as a plain \<anything> escape.
1459 }
1460
1461 // Plain backslash escape. Just put out the escaped character.
1462 appendToBuf(c, &destIdx, dest, capacity);
1463
1464 replIdx++;
1465 continue;
1466 }
1467
1468
1469
1470 // We've got a $. Pick up a capture group number if one follows.
1471 // Consume at most the number of digits necessary for the largest capture
1472 // number that is valid for this pattern.
1473
1474 int32_t numDigits = 0;
1475 int32_t groupNum = 0;
1476 UChar32 digitC;
1477 for (;;) {
1478 if (replIdx >= replacementLength) {
1479 break;
1480 }
1481 U16_GET(replacementText, 0, replIdx, replacementLength, digitC);
1482 if (u_isdigit(digitC) == FALSE) {
1483 break;
1484 }
1485
1486 U16_FWD_1(replacementText, replIdx, replacementLength);
1487 groupNum=groupNum*10 + u_charDigitValue(digitC);
1488 numDigits++;
1489 if (numDigits >= m->fPattern->fMaxCaptureDigits) {
1490 break;
1491 }
1492 }
1493
1494
1495 if (numDigits == 0) {
1496 // The $ didn't introduce a group number at all.
1497 // Treat it as just part of the substitution text.
1498 appendToBuf(DOLLARSIGN, &destIdx, dest, capacity);
1499 continue;
1500 }
1501
1502 // Finally, append the capture group data to the destination.
4388f060
A
1503 destIdx += uregex_group((URegularExpression*)regexp, groupNum,
1504 dest==NULL?NULL:&dest[destIdx], REMAINING_CAPACITY(destIdx, capacity), status);
374ca955
A
1505 if (*status == U_BUFFER_OVERFLOW_ERROR) {
1506 // Ignore buffer overflow when extracting the group. We need to
1507 // continue on to get full size of the untruncated result. We will
1508 // raise our own buffer overflow error at the end.
1509 *status = U_ZERO_ERROR;
1510 }
1511
1512 if (U_FAILURE(*status)) {
1513 // Can fail if group number is out of range.
1514 break;
1515 }
1516
1517 }
1518
1519 //
1520 // Nul Terminate the dest buffer if possible.
1521 // Set the appropriate buffer overflow or not terminated error, if needed.
1522 //
1523 if (destIdx < capacity) {
1524 dest[destIdx] = 0;
1525 } else if (destIdx == *destCapacity) {
1526 *status = U_STRING_NOT_TERMINATED_WARNING;
1527 } else {
1528 *status = U_BUFFER_OVERFLOW_ERROR;
1529 }
1530
1531 //
1532 // Return an updated dest buffer and capacity to the caller.
1533 //
1534 if (destIdx > 0 && *destCapacity > 0) {
1535 if (destIdx < capacity) {
1536 *destBuf += destIdx;
1537 *destCapacity -= destIdx;
1538 } else {
1539 *destBuf += capacity;
1540 *destCapacity = 0;
1541 }
1542 }
1543
1544 // If we came in with a buffer overflow, make sure we go out with one also.
1545 // (A zero length match right at the end of the previous match could
1546 // make this function succeed even though a previous call had overflowed the buf)
1547 if (pendingBufferOverflow && U_SUCCESS(*status)) {
1548 *status = U_BUFFER_OVERFLOW_ERROR;
1549 }
1550
1551 return destIdx;
1552}
1553
1554//
729e4ab9 1555// appendReplacement the actual API function,
374ca955
A
1556//
1557U_CAPI int32_t U_EXPORT2
729e4ab9
A
1558uregex_appendReplacement(URegularExpression *regexp2,
1559 const UChar *replacementText,
1560 int32_t replacementLength,
1561 UChar **destBuf,
1562 int32_t *destCapacity,
1563 UErrorCode *status) {
1564
1565 RegularExpression *regexp = (RegularExpression*)regexp2;
374ca955
A
1566 return RegexCImpl::appendReplacement(
1567 regexp, replacementText, replacementLength,destBuf, destCapacity, status);
1568}
1569
729e4ab9
A
1570//
1571// uregex_appendReplacementUText...can just use the normal C++ method
1572//
1573U_CAPI void U_EXPORT2
1574uregex_appendReplacementUText(URegularExpression *regexp2,
1575 UText *replText,
1576 UText *dest,
1577 UErrorCode *status) {
1578 RegularExpression *regexp = (RegularExpression*)regexp2;
1579 regexp->fMatcher->appendReplacement(dest, replText, *status);
1580}
1581
374ca955 1582
73c04bcf 1583//------------------------------------------------------------------------------
374ca955
A
1584//
1585// uregex_appendTail
1586//
73c04bcf 1587//------------------------------------------------------------------------------
729e4ab9
A
1588int32_t RegexCImpl::appendTail(RegularExpression *regexp,
1589 UChar **destBuf,
1590 int32_t *destCapacity,
1591 UErrorCode *status)
46f4442e 1592{
374ca955
A
1593
1594 // If we come in with a buffer overflow error, don't suppress the operation.
1595 // A series of appendReplacements, appendTail need to correctly preflight
1596 // the buffer size when an overflow happens somewhere in the middle.
1597 UBool pendingBufferOverflow = FALSE;
729e4ab9 1598 if (*status == U_BUFFER_OVERFLOW_ERROR && destCapacity != NULL && *destCapacity == 0) {
374ca955
A
1599 pendingBufferOverflow = TRUE;
1600 *status = U_ZERO_ERROR;
1601 }
1602
4388f060 1603 if (validateRE(regexp, TRUE, status) == FALSE) {
374ca955
A
1604 return 0;
1605 }
729e4ab9
A
1606
1607 if (destCapacity == NULL || destBuf == NULL ||
1608 (*destBuf == NULL && *destCapacity > 0) ||
1609 *destCapacity < 0)
1610 {
1611 *status = U_ILLEGAL_ARGUMENT_ERROR;
1612 return 0;
374ca955
A
1613 }
1614
729e4ab9
A
1615 RegexMatcher *m = regexp->fMatcher;
1616
374ca955
A
1617 int32_t destIdx = 0;
1618 int32_t destCap = *destCapacity;
1619 UChar *dest = *destBuf;
729e4ab9
A
1620
1621 if (regexp->fText != NULL) {
1622 int32_t srcIdx;
1623 int64_t nativeIdx = (m->fMatch ? m->fMatchEnd : m->fLastMatchEnd);
1624 if (nativeIdx == -1) {
1625 srcIdx = 0;
1626 } else if (UTEXT_USES_U16(m->fInputText)) {
1627 srcIdx = (int32_t)nativeIdx;
374ca955 1628 } else {
729e4ab9
A
1629 UErrorCode status = U_ZERO_ERROR;
1630 srcIdx = utext_extract(m->fInputText, 0, nativeIdx, NULL, 0, &status);
1631 }
1632
1633 for (;;) {
4388f060
A
1634 U_ASSERT(destIdx >= 0);
1635
729e4ab9 1636 if (srcIdx == regexp->fTextLength) {
374ca955
A
1637 break;
1638 }
729e4ab9
A
1639 UChar c = regexp->fText[srcIdx];
1640 if (c == 0 && regexp->fTextLength == -1) {
1641 regexp->fTextLength = srcIdx;
1642 break;
1643 }
4388f060 1644
729e4ab9
A
1645 if (destIdx < destCap) {
1646 dest[destIdx] = c;
1647 } else {
1648 // We've overflowed the dest buffer.
1649 // If the total input string length is known, we can
1650 // compute the total buffer size needed without scanning through the string.
1651 if (regexp->fTextLength > 0) {
1652 destIdx += (regexp->fTextLength - srcIdx);
1653 break;
1654 }
1655 }
1656 srcIdx++;
1657 destIdx++;
1658 }
1659 } else {
1660 int64_t srcIdx;
1661 if (m->fMatch) {
1662 // The most recent call to find() succeeded.
1663 srcIdx = m->fMatchEnd;
1664 } else {
1665 // The last call to find() on this matcher failed().
1666 // Look back to the end of the last find() that succeeded for src index.
1667 srcIdx = m->fLastMatchEnd;
1668 if (srcIdx == -1) {
1669 // There has been no successful match with this matcher.
1670 // We want to copy the whole string.
1671 srcIdx = 0;
1672 }
374ca955 1673 }
729e4ab9
A
1674
1675 destIdx = utext_extract(m->fInputText, srcIdx, m->fInputLength, dest, destCap, status);
374ca955
A
1676 }
1677
1678 //
1679 // NUL terminate the output string, if possible, otherwise issue the
1680 // appropriate error or warning.
1681 //
1682 if (destIdx < destCap) {
1683 dest[destIdx] = 0;
1684 } else if (destIdx == destCap) {
1685 *status = U_STRING_NOT_TERMINATED_WARNING;
1686 } else {
1687 *status = U_BUFFER_OVERFLOW_ERROR;
1688 }
1689
1690 //
1691 // Update the user's buffer ptr and capacity vars to reflect the
1692 // amount used.
1693 //
1694 if (destIdx < destCap) {
1695 *destBuf += destIdx;
1696 *destCapacity -= destIdx;
4388f060 1697 } else if (*destBuf != NULL) {
374ca955
A
1698 *destBuf += destCap;
1699 *destCapacity = 0;
1700 }
1701
1702 if (pendingBufferOverflow && U_SUCCESS(*status)) {
1703 *status = U_BUFFER_OVERFLOW_ERROR;
1704 }
1705
1706 return destIdx;
73c04bcf 1707}
374ca955
A
1708
1709
729e4ab9
A
1710//
1711// appendTail the actual API function
1712//
374ca955 1713U_CAPI int32_t U_EXPORT2
729e4ab9 1714uregex_appendTail(URegularExpression *regexp2,
374ca955
A
1715 UChar **destBuf,
1716 int32_t *destCapacity,
1717 UErrorCode *status) {
729e4ab9 1718 RegularExpression *regexp = (RegularExpression*)regexp2;
374ca955
A
1719 return RegexCImpl::appendTail(regexp, destBuf, destCapacity, status);
1720}
1721
1722
729e4ab9
A
1723//
1724// uregex_appendTailUText...can just use the normal C++ method
1725//
1726U_CAPI UText * U_EXPORT2
1727uregex_appendTailUText(URegularExpression *regexp2,
1728 UText *dest,
1729 UErrorCode *status) {
1730 RegularExpression *regexp = (RegularExpression*)regexp2;
1731 return regexp->fMatcher->appendTail(dest, *status);
1732}
1733
1734
73c04bcf 1735//------------------------------------------------------------------------------
374ca955
A
1736//
1737// copyString Internal utility to copy a string to an output buffer,
1738// while managing buffer overflow and preflight size
1739// computation. NUL termination is added to destination,
1740// and the NUL is counted in the output size.
1741//
73c04bcf 1742//------------------------------------------------------------------------------
729e4ab9 1743#if 0
374ca955
A
1744static void copyString(UChar *destBuffer, // Destination buffer.
1745 int32_t destCapacity, // Total capacity of dest buffer
1746 int32_t *destIndex, // Index into dest buffer. Updated on return.
1747 // Update not clipped to destCapacity.
1748 const UChar *srcPtr, // Pointer to source string
1749 int32_t srcLen) // Source string len.
1750{
1751 int32_t si;
1752 int32_t di = *destIndex;
1753 UChar c;
1754
1755 for (si=0; si<srcLen; si++) {
1756 c = srcPtr[si];
1757 if (di < destCapacity) {
1758 destBuffer[di] = c;
1759 di++;
1760 } else {
1761 di += srcLen - si;
1762 break;
1763 }
1764 }
73c04bcf
A
1765 if (di<destCapacity) {
1766 destBuffer[di] = 0;
1767 }
1768 di++;
374ca955
A
1769 *destIndex = di;
1770}
729e4ab9 1771#endif
374ca955 1772
73c04bcf 1773//------------------------------------------------------------------------------
374ca955
A
1774//
1775// uregex_split
1776//
73c04bcf 1777//------------------------------------------------------------------------------
729e4ab9
A
1778int32_t RegexCImpl::split(RegularExpression *regexp,
1779 UChar *destBuf,
1780 int32_t destCapacity,
1781 int32_t *requiredCapacity,
1782 UChar *destFields[],
1783 int32_t destFieldsCapacity,
1784 UErrorCode *status) {
374ca955
A
1785 //
1786 // Reset for the input text
1787 //
1788 regexp->fMatcher->reset();
729e4ab9
A
1789 UText *inputText = regexp->fMatcher->fInputText;
1790 int64_t nextOutputStringStart = 0;
1791 int64_t inputLen = regexp->fMatcher->fInputLength;
374ca955
A
1792 if (inputLen == 0) {
1793 return 0;
1794 }
1795
374ca955
A
1796 //
1797 // Loop through the input text, searching for the delimiter pattern
1798 //
1799 int32_t i; // Index of the field being processed.
1800 int32_t destIdx = 0; // Next available position in destBuf;
1801 int32_t numCaptureGroups = regexp->fMatcher->groupCount();
729e4ab9 1802 UErrorCode tStatus = U_ZERO_ERROR; // Want to ignore any buffer overflow errors so that the strings are still counted
374ca955
A
1803 for (i=0; ; i++) {
1804 if (i>=destFieldsCapacity-1) {
729e4ab9 1805 // There are one or zero output strings left.
374ca955
A
1806 // Fill the last output string with whatever is left from the input, then exit the loop.
1807 // ( i will be == destFieldsCapacity if we filled the output array while processing
1808 // capture groups of the delimiter expression, in which case we will discard the
1809 // last capture group saved in favor of the unprocessed remainder of the
1810 // input string.)
729e4ab9
A
1811 if (inputLen > nextOutputStringStart) {
1812 if (i != destFieldsCapacity-1) {
1813 // No fields are left. Recycle the last one for holding the trailing part of
1814 // the input string.
1815 i = destFieldsCapacity-1;
1816 destIdx = (int32_t)(destFields[i] - destFields[0]);
1817 }
1818
1819 destFields[i] = &destBuf[destIdx];
1820 destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
1821 &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
374ca955 1822 }
374ca955
A
1823 break;
1824 }
1825
1826 if (regexp->fMatcher->find()) {
1827 // We found another delimiter. Move everything from where we started looking
1828 // up until the start of the delimiter into the next output string.
374ca955 1829 destFields[i] = &destBuf[destIdx];
729e4ab9
A
1830
1831 destIdx += 1 + utext_extract(inputText, nextOutputStringStart, regexp->fMatcher->fMatchStart,
1832 &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), &tStatus);
1833 if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
1834 tStatus = U_ZERO_ERROR;
1835 } else {
1836 *status = tStatus;
1837 }
1838 nextOutputStringStart = regexp->fMatcher->fMatchEnd;
374ca955
A
1839
1840 // If the delimiter pattern has capturing parentheses, the captured
1841 // text goes out into the next n destination strings.
1842 int32_t groupNum;
1843 for (groupNum=1; groupNum<=numCaptureGroups; groupNum++) {
1844 // If we've run out of output string slots, bail out.
1845 if (i==destFieldsCapacity-1) {
1846 break;
1847 }
1848 i++;
1849
1850 // Set up to extract the capture group contents into the dest buffer.
374ca955 1851 destFields[i] = &destBuf[destIdx];
729e4ab9 1852 tStatus = U_ZERO_ERROR;
4388f060
A
1853 int32_t t = uregex_group((URegularExpression*)regexp,
1854 groupNum,
1855 destFields[i],
1856 REMAINING_CAPACITY(destIdx, destCapacity),
1857 &tStatus);
374ca955
A
1858 destIdx += t + 1; // Record the space used in the output string buffer.
1859 // +1 for the NUL that terminates the string.
729e4ab9
A
1860 if (tStatus == U_BUFFER_OVERFLOW_ERROR) {
1861 tStatus = U_ZERO_ERROR;
1862 } else {
1863 *status = tStatus;
1864 }
374ca955
A
1865 }
1866
1867 if (nextOutputStringStart == inputLen) {
4388f060
A
1868 // The delimiter was at the end of the string.
1869 // Output an empty string, and then we are done.
1870 if (destIdx < destCapacity) {
1871 destBuf[destIdx] = 0;
1872 }
1873 if (i < destFieldsCapacity-1) {
1874 ++i;
1875 }
1876 if (destIdx < destCapacity) {
1877 destFields[i] = destBuf + destIdx;
1878 }
1879 ++destIdx;
374ca955
A
1880 break;
1881 }
1882
1883 }
1884 else
1885 {
1886 // We ran off the end of the input while looking for the next delimiter.
1887 // All the remaining text goes into the current output string.
1888 destFields[i] = &destBuf[destIdx];
729e4ab9
A
1889 destIdx += 1 + utext_extract(inputText, nextOutputStringStart, inputLen,
1890 &destBuf[destIdx], REMAINING_CAPACITY(destIdx, destCapacity), status);
374ca955
A
1891 break;
1892 }
1893 }
1894
1895 // Zero out any unused portion of the destFields array
1896 int j;
1897 for (j=i+1; j<destFieldsCapacity; j++) {
1898 destFields[j] = NULL;
1899 }
1900
1901 if (requiredCapacity != NULL) {
1902 *requiredCapacity = destIdx;
1903 }
73c04bcf 1904 if (destIdx > destCapacity) {
374ca955
A
1905 *status = U_BUFFER_OVERFLOW_ERROR;
1906 }
1907 return i+1;
1908}
1909
729e4ab9
A
1910//
1911// uregex_split The actual API function
1912//
1913U_CAPI int32_t U_EXPORT2
1914uregex_split(URegularExpression *regexp2,
1915 UChar *destBuf,
1916 int32_t destCapacity,
1917 int32_t *requiredCapacity,
1918 UChar *destFields[],
1919 int32_t destFieldsCapacity,
1920 UErrorCode *status) {
1921 RegularExpression *regexp = (RegularExpression*)regexp2;
4388f060 1922 if (validateRE(regexp, TRUE, status) == FALSE) {
729e4ab9
A
1923 return 0;
1924 }
1925 if ((destBuf == NULL && destCapacity > 0) ||
1926 destCapacity < 0 ||
1927 destFields == NULL ||
1928 destFieldsCapacity < 1 ) {
1929 *status = U_ILLEGAL_ARGUMENT_ERROR;
1930 return 0;
1931 }
1932
1933 return RegexCImpl::split(regexp, destBuf, destCapacity, requiredCapacity, destFields, destFieldsCapacity, status);
1934}
1935
1936
1937//
1938// uregex_splitUText...can just use the normal C++ method
1939//
1940U_CAPI int32_t U_EXPORT2
1941uregex_splitUText(URegularExpression *regexp2,
1942 UText *destFields[],
1943 int32_t destFieldsCapacity,
1944 UErrorCode *status) {
1945 RegularExpression *regexp = (RegularExpression*)regexp2;
1946 return regexp->fMatcher->split(regexp->fMatcher->inputText(), destFields, destFieldsCapacity, *status);
1947}
1948
374ca955 1949
374ca955 1950#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
73c04bcf 1951