X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/57a6839dcb3bba09e8228b822b290604668416fe..HEAD:/icuSources/i18n/unicode/regex.h?ds=sidebyside diff --git a/icuSources/i18n/unicode/regex.h b/icuSources/i18n/unicode/regex.h index b2ef4ce3..6338eb7c 100644 --- a/icuSources/i18n/unicode/regex.h +++ b/icuSources/i18n/unicode/regex.h @@ -1,10 +1,12 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** -* Copyright (C) 2002-2013, International Business Machines +* Copyright (C) 2002-2016, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * file name: regex.h -* encoding: US-ASCII +* encoding: UTF-8 * indentation:4 * * created on: 2002oct22 @@ -22,28 +24,28 @@ * \file * \brief C++ API: Regular Expressions * - *
The ICU API for processing regular expressions consists of two classes,
- * RegexPattern
and RegexMatcher
.
- * RegexPattern
objects represent a pre-processed, or compiled
+ * The ICU API for processing regular expressions consists of two classes,
+ * `RegexPattern` and `RegexMatcher`.
+ * `RegexPattern` objects represent a pre-processed, or compiled
* regular expression. They are created from a regular expression pattern string,
- * and can be used to create RegexMatcher
objects for the pattern.
Class RegexMatcher
bundles together a regular expression
+ * Class `RegexMatcher` bundles together a regular expression
* pattern and a target string to which the search pattern will be applied.
- * RegexMatcher
includes API for doing plain find or search
+ * `RegexMatcher` includes API for doing plain find or search
* operations, for search and replace operations, and for obtaining detailed
- * information about bounds of a match.
Note that by constructing RegexMatcher
objects directly from regular
+ * Note that by constructing `RegexMatcher` objects directly from regular
* expression pattern strings application code can be simplified and the explicit
- * need for RegexPattern
objects can usually be eliminated.
- *
RegexPattern
represents a compiled regular expression. It includes
+ * Class `RegexPattern` represents a compiled regular expression. It includes
* factory methods for creating a RegexPattern object from the source (string) form
* of a regular expression, methods for creating RegexMatchers that allow the pattern
* to be applied to input text, and a few convenience methods for simple common
* uses of regular expressions.
*
- * Class RegexPattern is not intended to be subclassed.
+ * Class RegexPattern is not intended to be subclassed. * * @stable ICU 2.4 */ -class U_I18N_API RegexPattern: public UObject { +class U_I18N_API RegexPattern U_FINAL : public UObject { public: /** * default constructor. Create a RegexPattern object that refers to no actual * pattern. Not normally needed; RegexPattern objects are usually - * created using the factory methodcompile()
.
+ * created using the factory method `compile()`.
*
* @stable ICU 2.4
*/
@@ -109,7 +113,7 @@ public:
/**
* Comparison operator. Two RegexPattern objects are considered equal if they
- * were constructed from identical source patterns using the same match flag
+ * were constructed from identical source patterns using the same #URegexpFlag
* settings.
* @param that a RegexPattern object to compare with "this".
* @return TRUE if the objects are equivalent.
@@ -119,7 +123,7 @@ public:
/**
* Comparison operator. Two RegexPattern objects are considered equal if they
- * were constructed from identical source patterns using the same match flag
+ * were constructed from identical source patterns using the same #URegexpFlag
* settings.
* @param that a RegexPattern object to compare with "this".
* @return TRUE if the objects are different.
@@ -136,7 +140,7 @@ public:
/**
* Create an exact copy of this RegexPattern object. Since RegexPattern is not
- * intended to be subclasses, clone()
and the copy construction are
+ * intended to be subclassed, clone()
and the copy construction are
* equivalent operations.
* @return the copy of this RegexPattern
* @stable ICU 2.4
@@ -149,16 +153,16 @@ public:
* object. These compile methods, rather than the constructors, are the usual
* way that RegexPattern objects are created.
*
- * Note that RegexPattern objects must not be deleted while RegexMatcher + * Note that RegexPattern objects must not be deleted while RegexMatcher * objects created from the pattern are active. RegexMatchers keep a pointer * back to their pattern, so premature deletion of the pattern is a - * catastrophic error.
+ * catastrophic error. * - *All pattern match mode flags are set to their default values.
+ * All #URegexpFlag pattern match mode flags are set to their default values. * - *Note that it is often more convenient to construct a RegexMatcher directly + * Note that it is often more convenient to construct a RegexMatcher directly * from a pattern string rather than separately compiling the pattern and - * then creating a RegexMatcher object from the pattern.
+ * then creating a RegexMatcher object from the pattern. * * @param regex The regular expression to be compiled. * @param pe Receives the position (line and column nubers) of any error @@ -177,16 +181,16 @@ public: * object. These compile methods, rather than the constructors, are the usual * way that RegexPattern objects are created. * - *Note that RegexPattern objects must not be deleted while RegexMatcher + * Note that RegexPattern objects must not be deleted while RegexMatcher * objects created from the pattern are active. RegexMatchers keep a pointer * back to their pattern, so premature deletion of the pattern is a - * catastrophic error.
+ * catastrophic error. * - *All pattern match mode flags are set to their default values.
+ * All #URegexpFlag pattern match mode flags are set to their default values. * - *Note that it is often more convenient to construct a RegexMatcher directly + * Note that it is often more convenient to construct a RegexMatcher directly * from a pattern string rather than separately compiling the pattern and - * then creating a RegexMatcher object from the pattern.
+ * then creating a RegexMatcher object from the pattern. * * @param regex The regular expression to be compiled. Note, the text referred * to by this UText must not be deleted during the lifetime of the @@ -204,21 +208,21 @@ public: /** * Compiles the regular expression in string form into a RegexPattern - * object using the specified match mode flags. These compile methods, + * object using the specified #URegexpFlag match mode flags. These compile methods, * rather than the constructors, are the usual way that RegexPattern objects * are created. * - *Note that RegexPattern objects must not be deleted while RegexMatcher + * Note that RegexPattern objects must not be deleted while RegexMatcher * objects created from the pattern are active. RegexMatchers keep a pointer * back to their pattern, so premature deletion of the pattern is a - * catastrophic error.
+ * catastrophic error. * - *Note that it is often more convenient to construct a RegexMatcher directly + * Note that it is often more convenient to construct a RegexMatcher directly * from a pattern string instead of than separately compiling the pattern and - * then creating a RegexMatcher object from the pattern.
+ * then creating a RegexMatcher object from the pattern. * * @param regex The regular expression to be compiled. - * @param flags The match mode flags to be used. + * @param flags The #URegexpFlag match mode flags to be used, e.g. #UREGEX_CASE_INSENSITIVE. * @param pe Receives the position (line and column numbers) of any error * within the regular expression.) * @param status A reference to a UErrorCode to receive any errors. @@ -233,23 +237,23 @@ public: /** * Compiles the regular expression in string form into a RegexPattern - * object using the specified match mode flags. These compile methods, + * object using the specified #URegexpFlag match mode flags. These compile methods, * rather than the constructors, are the usual way that RegexPattern objects * are created. * - *Note that RegexPattern objects must not be deleted while RegexMatcher + * Note that RegexPattern objects must not be deleted while RegexMatcher * objects created from the pattern are active. RegexMatchers keep a pointer * back to their pattern, so premature deletion of the pattern is a - * catastrophic error.
+ * catastrophic error. * - *Note that it is often more convenient to construct a RegexMatcher directly + * Note that it is often more convenient to construct a RegexMatcher directly * from a pattern string instead of than separately compiling the pattern and - * then creating a RegexMatcher object from the pattern.
+ * then creating a RegexMatcher object from the pattern. * * @param regex The regular expression to be compiled. Note, the text referred * to by this UText must not be deleted during the lifetime of the * RegexPattern object or any RegexMatcher object created from it. - * @param flags The match mode flags to be used. + * @param flags The #URegexpFlag match mode flags to be used, e.g. #UREGEX_CASE_INSENSITIVE. * @param pe Receives the position (line and column numbers) of any error * within the regular expression.) * @param status A reference to a UErrorCode to receive any errors. @@ -264,21 +268,21 @@ public: /** * Compiles the regular expression in string form into a RegexPattern - * object using the specified match mode flags. These compile methods, + * object using the specified #URegexpFlag match mode flags. These compile methods, * rather than the constructors, are the usual way that RegexPattern objects * are created. * - *Note that RegexPattern objects must not be deleted while RegexMatcher + * Note that RegexPattern objects must not be deleted while RegexMatcher * objects created from the pattern are active. RegexMatchers keep a pointer * back to their pattern, so premature deletion of the pattern is a - * catastrophic error.
+ * catastrophic error. * - *Note that it is often more convenient to construct a RegexMatcher directly + * Note that it is often more convenient to construct a RegexMatcher directly * from a pattern string instead of than separately compiling the pattern and - * then creating a RegexMatcher object from the pattern.
+ * then creating a RegexMatcher object from the pattern. * * @param regex The regular expression to be compiled. - * @param flags The match mode flags to be used. + * @param flags The #URegexpFlag match mode flags to be used, e.g. #UREGEX_CASE_INSENSITIVE. * @param status A reference to a UErrorCode to receive any errors. * @return A regexPattern object for the compiled pattern. * @@ -290,23 +294,23 @@ public: /** * Compiles the regular expression in string form into a RegexPattern - * object using the specified match mode flags. These compile methods, + * object using the specified #URegexpFlag match mode flags. These compile methods, * rather than the constructors, are the usual way that RegexPattern objects * are created. * - *Note that RegexPattern objects must not be deleted while RegexMatcher + * Note that RegexPattern objects must not be deleted while RegexMatcher * objects created from the pattern are active. RegexMatchers keep a pointer * back to their pattern, so premature deletion of the pattern is a - * catastrophic error.
+ * catastrophic error. * - *Note that it is often more convenient to construct a RegexMatcher directly + * Note that it is often more convenient to construct a RegexMatcher directly * from a pattern string instead of than separately compiling the pattern and - * then creating a RegexMatcher object from the pattern.
+ * then creating a RegexMatcher object from the pattern. * * @param regex The regular expression to be compiled. Note, the text referred * to by this UText must not be deleted during the lifetime of the * RegexPattern object or any RegexMatcher object created from it. - * @param flags The match mode flags to be used. + * @param flags The #URegexpFlag match mode flags to be used, e.g. #UREGEX_CASE_INSENSITIVE. * @param status A reference to a UErrorCode to receive any errors. * @return A regexPattern object for the compiled pattern. * @@ -317,8 +321,8 @@ public: UErrorCode &status); /** - * Get the match mode flags that were used when compiling this pattern. - * @return the match mode flags + * Get the #URegexpFlag match mode flags that were used when compiling this pattern. + * @return the #URegexpFlag match mode flags * @stable ICU 2.4 */ virtual uint32_t flags() const; @@ -328,7 +332,7 @@ public: * RegexMatcher can then be used to perform match, find or replace operations * on the input. Note that a RegexPattern object must not be deleted while * RegexMatchers created from it still exist and might possibly be used again. - *+ * * The matcher will retain a reference to the supplied input string, and all regexp * pattern matching operations happen directly on this original string. It is * critical that the string not be altered or deleted before use by the regular @@ -346,17 +350,17 @@ public: private: /** * Cause a compilation error if an application accidentally attempts to - * create a matcher with a (UChar *) string as input rather than + * create a matcher with a (char16_t *) string as input rather than * a UnicodeString. Avoids a dangling reference to a temporary string. - *
- * To efficiently work with UChar *strings, wrap the data in a UnicodeString
+ *
+ * To efficiently work with char16_t *strings, wrap the data in a UnicodeString
* using one of the aliasing constructors, such as
- * UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength);
+ * `UnicodeString(UBool isTerminated, const char16_t *text, int32_t textLength);`
* or in a UText, using
- * utext_openUChars(UText *ut, const UChar *text, int64_t textLength, UErrorCode *status);
+ * `utext_openUChars(UText *ut, const char16_t *text, int64_t textLength, UErrorCode *status);`
*
*/
- RegexMatcher *matcher(const UChar *input,
+ RegexMatcher *matcher(const char16_t *input,
UErrorCode &status) const;
public:
@@ -437,6 +441,41 @@ public:
virtual UText *patternText(UErrorCode &status) const;
+ /**
+ * Get the group number corresponding to a named capture group.
+ * The returned number can be used with any function that access
+ * capture groups by number.
+ *
+ * The function returns an error status if the specified name does not
+ * appear in the pattern.
+ *
+ * @param groupName The capture group name.
+ * @param status A UErrorCode to receive any errors.
+ *
+ * @stable ICU 55
+ */
+ virtual int32_t groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const;
+
+
+ /**
+ * Get the group number corresponding to a named capture group.
+ * The returned number can be used with any function that access
+ * capture groups by number.
+ *
+ * The function returns an error status if the specified name does not
+ * appear in the pattern.
+ *
+ * @param groupName The capture group name,
+ * platform invariant characters only.
+ * @param nameLength The length of the name, or -1 if the name is
+ * nul-terminated.
+ * @param status A UErrorCode to receive any errors.
+ *
+ * @stable ICU 55
+ */
+ virtual int32_t groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const;
+
+
/**
* Split a string into fields. Somewhat like split() from Perl or Java.
* Pattern matches identify delimiters that separate the input
@@ -482,7 +521,7 @@ public:
/**
- * Split a string into fields. Somewhat like split() from Perl or Java.
+ * Split a string into fields. Somewhat like %split() from Perl or Java.
* Pattern matches identify delimiters that separate the input
* into fields. The input data between the delimiters becomes the
* fields themselves.
@@ -501,7 +540,7 @@ public:
* This behavior differs from Java, which ignores capture groups.
*
* For the best performance on split() operations,
- * RegexMatcher::split
is preferable to this function
+ * `RegexMatcher::split()` is preferable to this function
*
* @param input The string to be split into fields. The field delimiters
* match the pattern (in the "this" object)
@@ -573,8 +612,6 @@ private:
UVector32 *fGroupMap; // Map from capture group number to position of
// the group's variables in the matcher stack frame.
- int32_t fMaxCaptureDigits;
-
UnicodeSet **fStaticSets; // Ptr to static (shared) sets for predefined
// regex character classes, e.g. Word.
@@ -589,6 +626,8 @@ private:
Regex8BitSet *fInitialChars8;
UBool fNeedsAltInput;
+ UHashtable *fNamedCaptureMap; // Map from capture group names to numbers.
+
friend class RegexCompile;
friend class RegexMatcher;
friend class RegexCImpl;
@@ -596,8 +635,9 @@ private:
//
// Implementation Methods
//
- void init(); // Common initialization, for use by constructors.
- void zap(); // Common cleanup
+ void init(); // Common initialization, for use by constructors.
+ bool initNamedCaptureMap(); // Lazy init for fNamedCaptureMap.
+ void zap(); // Common cleanup
void dumpOp(int32_t index) const;
@@ -608,7 +648,7 @@ private:
* @internal
*/
void dumpPattern() const;
-#endif
+#endif /* U_HIDE_INTERNAL_API */
};
@@ -622,7 +662,7 @@ private:
*
* @stable ICU 2.4
*/
-class U_I18N_API RegexMatcher: public UObject {
+class U_I18N_API RegexMatcher U_FINAL : public UObject {
public:
/**
@@ -634,8 +674,7 @@ public:
* its matcher() method to create the RegexMatcher objects.
*
* @param regexp The Regular Expression to be compiled.
- * @param flags Regular expression options, such as case insensitive matching.
- * @see UREGEX_CASE_INSENSITIVE
+ * @param flags #URegexpFlag options, such as #UREGEX_CASE_INSENSITIVE.
* @param status Any errors are reported by setting this UErrorCode variable.
* @stable ICU 2.6
*/
@@ -650,8 +689,7 @@ public:
* its matcher() method to create the RegexMatcher objects.
*
* @param regexp The regular expression to be compiled.
- * @param flags Regular expression options, such as case insensitive matching.
- * @see UREGEX_CASE_INSENSITIVE
+ * @param flags #URegexpFlag options, such as #UREGEX_CASE_INSENSITIVE.
* @param status Any errors are reported by setting this UErrorCode variable.
*
* @stable ICU 4.6
@@ -665,7 +703,7 @@ public:
* created for the same expression, it will be more efficient to
* separately create and cache a RegexPattern object, and use
* its matcher() method to create the RegexMatcher objects.
- *
+ * * The matcher will retain a reference to the supplied input string, and all regexp * pattern matching operations happen directly on the original string. It is * critical that the string not be altered or deleted before use by the regular @@ -674,8 +712,7 @@ public: * @param regexp The Regular Expression to be compiled. * @param input The string to match. The matcher retains a reference to the * caller's string; mo copy is made. - * @param flags Regular expression options, such as case insensitive matching. - * @see UREGEX_CASE_INSENSITIVE + * @param flags #URegexpFlag options, such as #UREGEX_CASE_INSENSITIVE. * @param status Any errors are reported by setting this UErrorCode variable. * @stable ICU 2.6 */ @@ -689,7 +726,7 @@ public: * created for the same expression, it will be more efficient to * separately create and cache a RegexPattern object, and use * its matcher() method to create the RegexMatcher objects. - *
+ * * The matcher will make a shallow clone of the supplied input text, and all regexp * pattern matching operations happen on this clone. While read-only operations on * the supplied text are permitted, it is critical that the underlying string not be @@ -697,8 +734,7 @@ public: * * @param regexp The Regular Expression to be compiled. * @param input The string to match. The matcher retains a shallow clone of the text. - * @param flags Regular expression options, such as case insensitive matching. - * @see UREGEX_CASE_INSENSITIVE + * @param flags #URegexpFlag options, such as #UREGEX_CASE_INSENSITIVE. * @param status Any errors are reported by setting this UErrorCode variable. * * @stable ICU 4.6 @@ -709,17 +745,16 @@ public: private: /** * Cause a compilation error if an application accidentally attempts to - * create a matcher with a (UChar *) string as input rather than + * create a matcher with a (char16_t *) string as input rather than * a UnicodeString. Avoids a dangling reference to a temporary string. - *
- * To efficiently work with UChar *strings, wrap the data in a UnicodeString
+ *
+ * To efficiently work with char16_t *strings, wrap the data in a UnicodeString
* using one of the aliasing constructors, such as
- * UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength);
+ * `UnicodeString(UBool isTerminated, const char16_t *text, int32_t textLength);`
* or in a UText, using
- * utext_openUChars(UText *ut, const UChar *text, int64_t textLength, UErrorCode *status);
- *
+ * `utext_openUChars(UText *ut, const char16_t *text, int64_t textLength, UErrorCode *status);`
*/
- RegexMatcher(const UnicodeString ®exp, const UChar *input,
+ RegexMatcher(const UnicodeString ®exp, const char16_t *input,
uint32_t flags, UErrorCode &status);
public:
@@ -760,8 +795,8 @@ public:
* always starts at the beginning of the input region;
* unlike that function, it does not require that the entire region be matched.
*
- *
If the match succeeds then more information can be obtained via the start()
,
- * end()
, and group()
functions.
If the match succeeds then more information can be obtained via the start()
,
- * end()
, and group()
functions.
start(), end()
and group()
+ * If a match is found, `start()`, `end()` and `group()`
* will provide more information regarding the match.
- * Note that if the input string is changed by the application, + * Note that if the input string is changed by the application, * use find(startPos, status) instead of find(), because the saved starting - * position may not be valid with the altered input string.
+ * position may not be valid with the altered input string. * @return TRUE if a match is found. * @stable ICU 2.4 */ virtual UBool find(); + /** + * Find the next pattern match in the input string. + * The find begins searching the input at the location following the end of + * the previous match, or at the start of the string if there is no previous match. + * If a match is found, `start()`, `end()` and `group()` + * will provide more information regarding the match. + * + * Note that if the input string is changed by the application, + * use find(startPos, status) instead of find(), because the saved starting + * position may not be valid with the altered input string. + * @param status A reference to a UErrorCode to receive any errors. + * @return TRUE if a match is found. + * @stable ICU 55 + */ + virtual UBool find(UErrorCode &status); + /** * Resets this RegexMatcher and then attempts to find the next substring of the * input string that matches the pattern, starting at the specified index. @@ -829,6 +880,11 @@ public: * Returns a string containing the text captured by the given group * during the previous match operation. Group(0) is the entire match. * + * A zero length string is returned both for capture groups that did not + * participate in the match and for actual zero length matches. + * To distinguish between these two cases use the function start(), + * which returns -1 for non-participating groups. + * * @param groupNum the capture group number * @param status A reference to a UErrorCode to receive any errors. * Possible errors are U_REGEX_INVALID_STATE if no match @@ -839,7 +895,6 @@ public: */ virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const; - /** * Returns the number of capturing groups in this matcher's pattern. * @return the number of capture groups @@ -868,6 +923,11 @@ public: * Returns a shallow clone of the entire live input string with the UText current native index * set to the beginning of the requested group. * + * A group length of zero is returned both for capture groups that did not + * participate in the match and for actual zero length matches. + * To distinguish between these two cases use the function start(), + * which returns -1 for non-participating groups. + * * @param groupNum The capture group number. * @param dest The UText into which the input should be cloned, or NULL to create a new UText. * @param group_len A reference to receive the length of the desired capture group @@ -881,24 +941,6 @@ public: */ virtual UText *group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const; - /** - * Returns a string containing the text captured by the given group - * during the previous match operation. Group(0) is the entire match. - * - * @param groupNum the capture group number - * @param dest A mutable UText in which the matching text is placed. - * If NULL, a new UText will be created (which may not be mutable). - * @param status A reference to a UErrorCode to receive any errors. - * Possible errors are U_REGEX_INVALID_STATE if no match - * has been attempted or the last match failed. - * @return A string containing the matched input text. If a pre-allocated UText - * was provided, it will always be used and returned. - * - * @internal ICU 4.4 technology preview - */ - virtual UText *group(int32_t groupNum, UText *dest, UErrorCode &status) const; - - /** * Returns the index in the input string of the start of the text matched * during the previous match operation. @@ -948,7 +990,6 @@ public: */ virtual int64_t start64(int32_t group, UErrorCode &status) const; - /** * Returns the index in the input string of the first character following the * text matched during the previous match operation. @@ -1018,7 +1059,6 @@ public: */ virtual int64_t end64(int32_t group, UErrorCode &status) const; - /** * Resets this matcher. The effect is to remove any memory of previous matches, * and to cause subsequent find() operations to begin at the beginning of @@ -1035,10 +1075,10 @@ public: * The effect is to remove any memory of previous matches, * and to cause subsequent find() operations to begin at * the specified (native) position in the input string. - *+ * * The matcher's region is reset to its default, which is the entire * input string. - *
+ * * An alternative to this function is to set a match region * beginning at the desired index. * @@ -1113,17 +1153,17 @@ public: private: /** * Cause a compilation error if an application accidentally attempts to - * reset a matcher with a (UChar *) string as input rather than + * reset a matcher with a (char16_t *) string as input rather than * a UnicodeString. Avoids a dangling reference to a temporary string. - *
- * To efficiently work with UChar *strings, wrap the data in a UnicodeString
+ *
+ * To efficiently work with char16_t *strings, wrap the data in a UnicodeString
* using one of the aliasing constructors, such as
- * UnicodeString(UBool isTerminated, const UChar *text, int32_t textLength);
+ * `UnicodeString(UBool isTerminated, const char16_t *text, int32_t textLength);`
* or in a UText, using
- * utext_openUChars(UText *ut, const UChar *text, int64_t textLength, UErrorCode *status);
+ * `utext_openUChars(UText *ut, const char16_t *text, int64_t textLength, UErrorCode *status);`
*
*/
- RegexMatcher &reset(const UChar *input);
+ RegexMatcher &reset(const char16_t *input);
public:
/**
@@ -1369,15 +1409,15 @@ public:
* the pattern with the replacement string. This is a convenience
* function that provides a complete find-and-replace operation.
*
- *
This function first resets this RegexMatcher. It then scans the input string + * This function first resets this RegexMatcher. It then scans the input string * looking for a match of the pattern. Input that is not part * of the match is appended directly to the result string; the match is replaced * in the result by the replacement string. The replacement string may contain - * references to captured groups.
+ * references to captured groups. * - *The state of the matcher (the position at which a subsequent find() + * The state of the matcher (the position at which a subsequent find() * would begin) after completing a replaceFirst() is not specified. The - * RegexMatcher should be reset before doing additional find() operations.
+ * RegexMatcher should be reset before doing additional find() operations. * * @param replacement a string containing the replacement text. * @param status a reference to a UErrorCode to receive any errors. @@ -1392,15 +1432,15 @@ public: * the pattern with the replacement string. This is a convenience * function that provides a complete find-and-replace operation. * - *This function first resets this RegexMatcher. It then scans the input string + * This function first resets this RegexMatcher. It then scans the input string * looking for a match of the pattern. Input that is not part * of the match is appended directly to the result string; the match is replaced * in the result by the replacement string. The replacement string may contain - * references to captured groups.
+ * references to captured groups. * - *The state of the matcher (the position at which a subsequent find() + * The state of the matcher (the position at which a subsequent find() * would begin) after completing a replaceFirst() is not specified. The - * RegexMatcher should be reset before doing additional find() operations.
+ * RegexMatcher should be reset before doing additional find() operations. * * @param replacement a string containing the replacement text. * @param dest a mutable UText in which the results are placed. @@ -1418,13 +1458,13 @@ public: * Implements a replace operation intended to be used as part of an * incremental find-and-replace. * - *The input string, starting from the end of the previous replacement and ending at + * The input string, starting from the end of the previous replacement and ending at * the start of the current match, is appended to the destination string. Then the * replacement string is appended to the output string, - * including handling any substitutions of captured text.
+ * including handling any substitutions of captured text. * - *For simple, prepackaged, non-incremental find-and-replace - * operations, see replaceFirst() or replaceAll().
+ * For simple, prepackaged, non-incremental find-and-replace + * operations, see replaceFirst() or replaceAll(). * * @param dest A UnicodeString to which the results of the find-and-replace are appended. * @param replacement A UnicodeString that provides the text to be substituted for @@ -1449,13 +1489,13 @@ public: * Implements a replace operation intended to be used as part of an * incremental find-and-replace. * - *The input string, starting from the end of the previous replacement and ending at + * The input string, starting from the end of the previous replacement and ending at * the start of the current match, is appended to the destination string. Then the * replacement string is appended to the output string, - * including handling any substitutions of captured text.
+ * including handling any substitutions of captured text. * - *For simple, prepackaged, non-incremental find-and-replace - * operations, see replaceFirst() or replaceAll().
+ * For simple, prepackaged, non-incremental find-and-replace + * operations, see replaceFirst() or replaceAll(). * * @param dest A mutable UText to which the results of the find-and-replace are appended. * Must not be NULL. @@ -1479,8 +1519,8 @@ public: /** * As the final step in a find-and-replace operation, append the remainder * of the input string, starting at the position following the last appendReplacement(), - * to the destination string.appendTail()
is intended to be invoked after one
- * or more invocations of the RegexMatcher::appendReplacement()
.
+ * to the destination string. `appendTail()` is intended to be invoked after one
+ * or more invocations of the `RegexMatcher::appendReplacement()`.
*
* @param dest A UnicodeString to which the results of the find-and-replace are appended.
* @return the destination string.
@@ -1492,8 +1532,8 @@ public:
/**
* As the final step in a find-and-replace operation, append the remainder
* of the input string, starting at the position following the last appendReplacement(),
- * to the destination string. appendTail()
is intended to be invoked after one
- * or more invocations of the RegexMatcher::appendReplacement()
.
+ * to the destination string. `appendTail()` is intended to be invoked after one
+ * or more invocations of the `RegexMatcher::appendReplacement()`.
*
* @param dest A mutable UText to which the results of the find-and-replace are appended.
* Must not be NULL.
@@ -1506,7 +1546,7 @@ public:
/**
- * Split a string into fields. Somewhat like split() from Perl.
+ * Split a string into fields. Somewhat like %split() from Perl.
* The pattern matches identify delimiters that separate the input
* into fields. The input data between the matches becomes the
* fields themselves.
@@ -1535,7 +1575,7 @@ public:
/**
- * Split a string into fields. Somewhat like split() from Perl.
+ * Split a string into fields. Somewhat like %split() from Perl.
* The pattern matches identify delimiters that separate the input
* into fields. The input data between the matches becomes the
* fields themselves.
@@ -1570,14 +1610,14 @@ public:
* infinite loop.
* When a limit is set a match operation will fail with an error if the
* limit is exceeded.
- * + * * The units of the limit are steps of the match engine. * Correspondence with actual processor time will depend on the speed * of the processor and the details of the specific pattern, but will * typically be on the order of milliseconds. - *
+ * * By default, the matching time is not limited. - *
+ * * * @param limit The limit value, or 0 for no limit. * @param status A reference to a UErrorCode to receive any errors. @@ -1596,16 +1636,16 @@ public: /** * Set the amount of heap storage available for use by the match backtracking stack. * The matcher is also reset, discarding any results from previous matches. - *
+ * * ICU uses a backtracking regular expression engine, with the backtrack stack * maintained on the heap. This function sets the limit to the amount of memory - * that can be used for this purpose. A backtracking stack overflow will + * that can be used for this purpose. A backtracking stack overflow will * result in an error from the match operation that caused it. - *
+ * * A limit is desirable because a malicious or poorly designed pattern can use * excessive memory, potentially crashing the process. A limit is enabled * by default. - *
+ * * @param limit The maximum size, in bytes, of the matching backtrack stack. * A value of zero means no limit. * The limit must be greater or equal to zero. @@ -1744,11 +1784,13 @@ private: REStackFrame *resetStack(); inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status); void IncrementTime(UErrorCode &status); - UBool ReportFindProgress(int64_t matchIndex, UErrorCode &status); + + // Call user find callback function, if set. Return TRUE if operation should be interrupted. + inline UBool findProgressInterrupt(int64_t matchIndex, UErrorCode &status); int64_t appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const; - UBool findUsingChunk(); + UBool findUsingChunk(UErrorCode &status); void MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status); UBool isChunkWordBoundary(int32_t pos); @@ -1837,4 +1879,7 @@ private: U_NAMESPACE_END #endif // UCONFIG_NO_REGULAR_EXPRESSIONS + +#endif /* U_SHOW_CPLUSPLUS_API */ + #endif