]>
Commit | Line | Data |
---|---|---|
374ca955 A |
1 | /* |
2 | ********************************************************************** | |
2ca993e8 | 3 | * Copyright (C) 2004-2016, International Business Machines |
374ca955 A |
4 | * Corporation and others. All Rights Reserved. |
5 | ********************************************************************** | |
729e4ab9 | 6 | * file name: uregex.h |
374ca955 A |
7 | * encoding: US-ASCII |
8 | * indentation:4 | |
9 | * | |
10 | * created on: 2004mar09 | |
11 | * created by: Andy Heninger | |
12 | * | |
13 | * ICU Regular Expressions, API for C | |
14 | */ | |
15 | ||
16 | /** | |
17 | * \file | |
18 | * \brief C API: Regular Expressions | |
19 | * | |
20 | * <p>This is a C wrapper around the C++ RegexPattern and RegexMatcher classes.</p> | |
21 | */ | |
22 | ||
23 | #ifndef UREGEX_H | |
24 | #define UREGEX_H | |
25 | ||
729e4ab9 | 26 | #include "unicode/utext.h" |
374ca955 A |
27 | #include "unicode/utypes.h" |
28 | ||
29 | #if !UCONFIG_NO_REGULAR_EXPRESSIONS | |
30 | ||
729e4ab9 | 31 | #include "unicode/localpointer.h" |
374ca955 A |
32 | #include "unicode/parseerr.h" |
33 | ||
374ca955 A |
34 | struct URegularExpression; |
35 | /** | |
4388f060 | 36 | * Structure representing a compiled regular expression, plus the results |
374ca955 | 37 | * of a match operation. |
73c04bcf | 38 | * @stable ICU 3.0 |
374ca955 A |
39 | */ |
40 | typedef struct URegularExpression URegularExpression; | |
41 | ||
374ca955 A |
42 | |
43 | /** | |
44 | * Constants for Regular Expression Match Modes. | |
45 | * @stable ICU 2.4 | |
46 | */ | |
47 | typedef enum URegexpFlag{ | |
374ca955 | 48 | |
73c04bcf A |
49 | #ifndef U_HIDE_DRAFT_API |
50 | /** Forces normalization of pattern and strings. | |
51 | Not implemented yet, just a placeholder, hence draft. | |
52 | @draft ICU 2.4 */ | |
53 | UREGEX_CANON_EQ = 128, | |
51004dcb | 54 | #endif /* U_HIDE_DRAFT_API */ |
374ca955 A |
55 | /** Enable case insensitive matching. @stable ICU 2.4 */ |
56 | UREGEX_CASE_INSENSITIVE = 2, | |
57 | ||
58 | /** Allow white space and comments within patterns @stable ICU 2.4 */ | |
59 | UREGEX_COMMENTS = 4, | |
60 | ||
61 | /** If set, '.' matches line terminators, otherwise '.' matching stops at line end. | |
62 | * @stable ICU 2.4 */ | |
63 | UREGEX_DOTALL = 32, | |
46f4442e A |
64 | |
65 | /** If set, treat the entire pattern as a literal string. | |
66 | * Metacharacters or escape sequences in the input sequence will be given | |
4388f060 | 67 | * no special meaning. |
46f4442e | 68 | * |
4388f060 | 69 | * The flag UREGEX_CASE_INSENSITIVE retains its impact |
46f4442e A |
70 | * on matching when used in conjunction with this flag. |
71 | * The other flags become superfluous. | |
4388f060 | 72 | * |
729e4ab9 | 73 | * @stable ICU 4.0 |
46f4442e A |
74 | */ |
75 | UREGEX_LITERAL = 16, | |
374ca955 A |
76 | |
77 | /** Control behavior of "$" and "^" | |
78 | * If set, recognize line terminators within string, | |
79 | * otherwise, match only at start and end of input string. | |
80 | * @stable ICU 2.4 */ | |
81 | UREGEX_MULTILINE = 8, | |
46f4442e A |
82 | |
83 | /** Unix-only line endings. | |
729e4ab9 | 84 | * When this mode is enabled, only \\u000a is recognized as a line ending |
46f4442e | 85 | * in the behavior of ., ^, and $. |
729e4ab9 | 86 | * @stable ICU 4.0 |
46f4442e A |
87 | */ |
88 | UREGEX_UNIX_LINES = 1, | |
374ca955 A |
89 | |
90 | /** Unicode word boundaries. | |
91 | * If set, \b uses the Unicode TR 29 definition of word boundaries. | |
92 | * Warning: Unicode word boundaries are quite different from | |
93 | * traditional regular expression word boundaries. See | |
94 | * http://unicode.org/reports/tr29/#Word_Boundaries | |
73c04bcf | 95 | * @stable ICU 2.8 |
374ca955 | 96 | */ |
46f4442e A |
97 | UREGEX_UWORD = 256, |
98 | ||
99 | /** Error on Unrecognized backslash escapes. | |
100 | * If set, fail with an error on patterns that contain | |
4388f060 | 101 | * backslash-escaped ASCII letters without a known special |
46f4442e A |
102 | * meaning. If this flag is not set, these |
103 | * escaped letters represent themselves. | |
729e4ab9 | 104 | * @stable ICU 4.0 |
46f4442e A |
105 | */ |
106 | UREGEX_ERROR_ON_UNKNOWN_ESCAPES = 512 | |
107 | ||
374ca955 A |
108 | } URegexpFlag; |
109 | ||
110 | /** | |
111 | * Open (compile) an ICU regular expression. Compiles the regular expression in | |
112 | * string form into an internal representation using the specified match mode flags. | |
113 | * The resulting regular expression handle can then be used to perform various | |
114 | * matching operations. | |
729e4ab9 | 115 | * |
374ca955 A |
116 | * |
117 | * @param pattern The Regular Expression pattern to be compiled. | |
118 | * @param patternLength The length of the pattern, or -1 if the pattern is | |
4388f060 | 119 | * NUL terminated. |
374ca955 A |
120 | * @param flags Flags that alter the default matching behavior for |
121 | * the regular expression, UREGEX_CASE_INSENSITIVE, for | |
122 | * example. For default behavior, set this parameter to zero. | |
123 | * See <code>enum URegexpFlag</code>. All desired flags | |
124 | * are bitwise-ORed together. | |
4388f060 | 125 | * @param pe Receives the position (line and column numbers) of any syntax |
374ca955 A |
126 | * error within the source regular expression string. If this |
127 | * information is not wanted, pass NULL for this parameter. | |
128 | * @param status Receives error detected by this function. | |
73c04bcf | 129 | * @stable ICU 3.0 |
374ca955 A |
130 | * |
131 | */ | |
73c04bcf | 132 | U_STABLE URegularExpression * U_EXPORT2 |
374ca955 A |
133 | uregex_open( const UChar *pattern, |
134 | int32_t patternLength, | |
135 | uint32_t flags, | |
136 | UParseError *pe, | |
137 | UErrorCode *status); | |
4388f060 | 138 | |
729e4ab9 A |
139 | /** |
140 | * Open (compile) an ICU regular expression. Compiles the regular expression in | |
141 | * string form into an internal representation using the specified match mode flags. | |
142 | * The resulting regular expression handle can then be used to perform various | |
143 | * matching operations. | |
144 | * <p> | |
145 | * The contents of the pattern UText will be extracted and saved. Ownership of the | |
146 | * UText struct itself remains with the caller. This is to match the behavior of | |
147 | * uregex_open(). | |
148 | * | |
149 | * @param pattern The Regular Expression pattern to be compiled. | |
150 | * @param flags Flags that alter the default matching behavior for | |
151 | * the regular expression, UREGEX_CASE_INSENSITIVE, for | |
152 | * example. For default behavior, set this parameter to zero. | |
153 | * See <code>enum URegexpFlag</code>. All desired flags | |
154 | * are bitwise-ORed together. | |
4388f060 | 155 | * @param pe Receives the position (line and column numbers) of any syntax |
729e4ab9 A |
156 | * error within the source regular expression string. If this |
157 | * information is not wanted, pass NULL for this parameter. | |
158 | * @param status Receives error detected by this function. | |
159 | * | |
4388f060 | 160 | * @stable ICU 4.6 |
729e4ab9 | 161 | */ |
51004dcb | 162 | U_STABLE URegularExpression * U_EXPORT2 |
729e4ab9 A |
163 | uregex_openUText(UText *pattern, |
164 | uint32_t flags, | |
165 | UParseError *pe, | |
166 | UErrorCode *status); | |
4388f060 | 167 | |
374ca955 A |
168 | /** |
169 | * Open (compile) an ICU regular expression. The resulting regular expression | |
170 | * handle can then be used to perform various matching operations. | |
171 | * <p> | |
172 | * This function is the same as uregex_open, except that the pattern | |
173 | * is supplied as an 8 bit char * string in the default code page. | |
174 | * | |
175 | * @param pattern The Regular Expression pattern to be compiled, | |
4388f060 | 176 | * NUL terminated. |
374ca955 A |
177 | * @param flags Flags that alter the default matching behavior for |
178 | * the regular expression, UREGEX_CASE_INSENSITIVE, for | |
179 | * example. For default behavior, set this parameter to zero. | |
180 | * See <code>enum URegexpFlag</code>. All desired flags | |
181 | * are bitwise-ORed together. | |
4388f060 | 182 | * @param pe Receives the position (line and column numbers) of any syntax |
374ca955 A |
183 | * error within the source regular expression string. If this |
184 | * information is not wanted, pass NULL for this parameter. | |
185 | * @param status Receives errors detected by this function. | |
186 | * @return The URegularExpression object representing the compiled | |
187 | * pattern. | |
188 | * | |
73c04bcf | 189 | * @stable ICU 3.0 |
374ca955 | 190 | */ |
73c04bcf A |
191 | #if !UCONFIG_NO_CONVERSION |
192 | U_STABLE URegularExpression * U_EXPORT2 | |
374ca955 A |
193 | uregex_openC( const char *pattern, |
194 | uint32_t flags, | |
195 | UParseError *pe, | |
196 | UErrorCode *status); | |
73c04bcf | 197 | #endif |
374ca955 A |
198 | |
199 | ||
200 | ||
201 | /** | |
202 | * Close the regular expression, recovering all resources (memory) it | |
203 | * was holding. | |
204 | * | |
205 | * @param regexp The regular expression to be closed. | |
73c04bcf | 206 | * @stable ICU 3.0 |
374ca955 | 207 | */ |
73c04bcf | 208 | U_STABLE void U_EXPORT2 |
374ca955 A |
209 | uregex_close(URegularExpression *regexp); |
210 | ||
729e4ab9 A |
211 | #if U_SHOW_CPLUSPLUS_API |
212 | ||
213 | U_NAMESPACE_BEGIN | |
214 | ||
215 | /** | |
216 | * \class LocalURegularExpressionPointer | |
217 | * "Smart pointer" class, closes a URegularExpression via uregex_close(). | |
218 | * For most methods see the LocalPointerBase base class. | |
219 | * | |
220 | * @see LocalPointerBase | |
221 | * @see LocalPointer | |
222 | * @stable ICU 4.4 | |
223 | */ | |
224 | U_DEFINE_LOCAL_OPEN_POINTER(LocalURegularExpressionPointer, URegularExpression, uregex_close); | |
225 | ||
226 | U_NAMESPACE_END | |
227 | ||
228 | #endif | |
229 | ||
374ca955 A |
230 | /** |
231 | * Make a copy of a compiled regular expression. Cloning a regular | |
232 | * expression is faster than opening a second instance from the source | |
233 | * form of the expression, and requires less memory. | |
234 | * <p> | |
235 | * Note that the current input string and the position of any matched text | |
4388f060 | 236 | * within it are not cloned; only the pattern itself and the |
374ca955 A |
237 | * match mode flags are copied. |
238 | * <p> | |
239 | * Cloning can be particularly useful to threaded applications that perform | |
240 | * multiple match operations in parallel. Each concurrent RE | |
241 | * operation requires its own instance of a URegularExpression. | |
242 | * | |
243 | * @param regexp The compiled regular expression to be cloned. | |
244 | * @param status Receives indication of any errors encountered | |
245 | * @return the cloned copy of the compiled regular expression. | |
73c04bcf | 246 | * @stable ICU 3.0 |
374ca955 | 247 | */ |
73c04bcf | 248 | U_STABLE URegularExpression * U_EXPORT2 |
374ca955 A |
249 | uregex_clone(const URegularExpression *regexp, UErrorCode *status); |
250 | ||
251 | /** | |
729e4ab9 A |
252 | * Returns a pointer to the source form of the pattern for this regular expression. |
253 | * This function will work even if the pattern was originally specified as a UText. | |
374ca955 A |
254 | * |
255 | * @param regexp The compiled regular expression. | |
256 | * @param patLength This output parameter will be set to the length of the | |
257 | * pattern string. A NULL pointer may be used here if the | |
258 | * pattern length is not needed, as would be the case if | |
259 | * the pattern is known in advance to be a NUL terminated | |
260 | * string. | |
261 | * @param status Receives errors detected by this function. | |
262 | * @return a pointer to the pattern string. The storage for the string is | |
263 | * owned by the regular expression object, and must not be | |
264 | * altered or deleted by the application. The returned string | |
265 | * will remain valid until the regular expression is closed. | |
73c04bcf | 266 | * @stable ICU 3.0 |
374ca955 | 267 | */ |
73c04bcf | 268 | U_STABLE const UChar * U_EXPORT2 |
729e4ab9 A |
269 | uregex_pattern(const URegularExpression *regexp, |
270 | int32_t *patLength, | |
271 | UErrorCode *status); | |
272 | ||
273 | /** | |
274 | * Returns the source text of the pattern for this regular expression. | |
275 | * This function will work even if the pattern was originally specified as a UChar string. | |
276 | * | |
277 | * @param regexp The compiled regular expression. | |
278 | * @param status Receives errors detected by this function. | |
279 | * @return the pattern text. The storage for the text is owned by the regular expression | |
280 | * object, and must not be altered or deleted. | |
281 | * | |
4388f060 | 282 | * @stable ICU 4.6 |
729e4ab9 | 283 | */ |
51004dcb | 284 | U_STABLE UText * U_EXPORT2 |
729e4ab9 A |
285 | uregex_patternUText(const URegularExpression *regexp, |
286 | UErrorCode *status); | |
374ca955 | 287 | |
374ca955 A |
288 | /** |
289 | * Get the match mode flags that were specified when compiling this regular expression. | |
290 | * @param status Receives errors detected by this function. | |
291 | * @param regexp The compiled regular expression. | |
292 | * @return The match mode flags | |
293 | * @see URegexpFlag | |
73c04bcf | 294 | * @stable ICU 3.0 |
374ca955 | 295 | */ |
73c04bcf | 296 | U_STABLE int32_t U_EXPORT2 |
374ca955 A |
297 | uregex_flags(const URegularExpression *regexp, |
298 | UErrorCode *status); | |
299 | ||
300 | ||
301 | /** | |
302 | * Set the subject text string upon which the regular expression will look for matches. | |
303 | * This function may be called any number of times, allowing the regular | |
304 | * expression pattern to be applied to different strings. | |
305 | * <p> | |
306 | * Regular expression matching operations work directly on the application's | |
307 | * string data. No copy is made. The subject string data must not be | |
308 | * altered after calling this function until after all regular expression | |
309 | * operations involving this string data are completed. | |
310 | * <p> | |
311 | * Zero length strings are permitted. In this case, no subsequent match | |
312 | * operation will dereference the text string pointer. | |
313 | * | |
314 | * @param regexp The compiled regular expression. | |
315 | * @param text The subject text string. | |
316 | * @param textLength The length of the subject text, or -1 if the string | |
317 | * is NUL terminated. | |
318 | * @param status Receives errors detected by this function. | |
73c04bcf | 319 | * @stable ICU 3.0 |
374ca955 | 320 | */ |
73c04bcf | 321 | U_STABLE void U_EXPORT2 |
374ca955 A |
322 | uregex_setText(URegularExpression *regexp, |
323 | const UChar *text, | |
324 | int32_t textLength, | |
325 | UErrorCode *status); | |
326 | ||
729e4ab9 A |
327 | |
328 | /** | |
329 | * Set the subject text string upon which the regular expression will look for matches. | |
330 | * This function may be called any number of times, allowing the regular | |
331 | * expression pattern to be applied to different strings. | |
332 | * <p> | |
333 | * Regular expression matching operations work directly on the application's | |
334 | * string data; only a shallow clone is made. The subject string data must not be | |
335 | * altered after calling this function until after all regular expression | |
336 | * operations involving this string data are completed. | |
337 | * | |
338 | * @param regexp The compiled regular expression. | |
339 | * @param text The subject text string. | |
340 | * @param status Receives errors detected by this function. | |
341 | * | |
4388f060 | 342 | * @stable ICU 4.6 |
729e4ab9 | 343 | */ |
51004dcb | 344 | U_STABLE void U_EXPORT2 |
729e4ab9 A |
345 | uregex_setUText(URegularExpression *regexp, |
346 | UText *text, | |
347 | UErrorCode *status); | |
348 | ||
374ca955 A |
349 | /** |
350 | * Get the subject text that is currently associated with this | |
729e4ab9 A |
351 | * regular expression object. If the input was supplied using uregex_setText(), |
352 | * that pointer will be returned. Otherwise, the characters in the input will | |
353 | * be extracted to a buffer and returned. In either case, ownership remains | |
354 | * with the regular expression object. | |
355 | * | |
356 | * This function will work even if the input was originally specified as a UText. | |
374ca955 A |
357 | * |
358 | * @param regexp The compiled regular expression. | |
359 | * @param textLength The length of the string is returned in this output parameter. | |
360 | * A NULL pointer may be used here if the | |
361 | * text length is not needed, as would be the case if | |
362 | * the text is known in advance to be a NUL terminated | |
363 | * string. | |
364 | * @param status Receives errors detected by this function. | |
729e4ab9 | 365 | * @return Pointer to the subject text string currently associated with |
374ca955 | 366 | * this regular expression. |
73c04bcf | 367 | * @stable ICU 3.0 |
374ca955 | 368 | */ |
73c04bcf | 369 | U_STABLE const UChar * U_EXPORT2 |
374ca955 A |
370 | uregex_getText(URegularExpression *regexp, |
371 | int32_t *textLength, | |
372 | UErrorCode *status); | |
4388f060 | 373 | |
729e4ab9 A |
374 | /** |
375 | * Get the subject text that is currently associated with this | |
376 | * regular expression object. | |
377 | * | |
378 | * This function will work even if the input was originally specified as a UChar string. | |
379 | * | |
380 | * @param regexp The compiled regular expression. | |
381 | * @param dest A mutable UText in which to store the current input. | |
382 | * If NULL, a new UText will be created as an immutable shallow clone | |
383 | * of the actual input string. | |
384 | * @param status Receives errors detected by this function. | |
385 | * @return The subject text currently associated with this regular expression. | |
386 | * If a pre-allocated UText was provided, it will always be used and returned. | |
387 | * | |
4388f060 | 388 | * @stable ICU 4.6 |
729e4ab9 | 389 | */ |
51004dcb | 390 | U_STABLE UText * U_EXPORT2 |
729e4ab9 A |
391 | uregex_getUText(URegularExpression *regexp, |
392 | UText *dest, | |
393 | UErrorCode *status); | |
374ca955 | 394 | |
4388f060 A |
395 | /** |
396 | * Set the subject text string upon which the regular expression is looking for matches | |
397 | * without changing any other aspect of the matching state. | |
398 | * The new and previous text strings must have the same content. | |
399 | * | |
400 | * This function is intended for use in environments where ICU is operating on | |
401 | * strings that may move around in memory. It provides a mechanism for notifying | |
402 | * ICU that the string has been relocated, and providing a new UText to access the | |
403 | * string in its new position. | |
404 | * | |
405 | * Note that the regular expression implementation never copies the underlying text | |
406 | * of a string being matched, but always operates directly on the original text | |
407 | * provided by the user. Refreshing simply drops the references to the old text | |
408 | * and replaces them with references to the new. | |
409 | * | |
410 | * Caution: this function is normally used only by very specialized | |
411 | * system-level code. One example use case is with garbage collection | |
412 | * that moves the text in memory. | |
413 | * | |
414 | * @param regexp The compiled regular expression. | |
415 | * @param text The new (moved) text string. | |
416 | * @param status Receives errors detected by this function. | |
417 | * | |
51004dcb | 418 | * @stable ICU 4.8 |
4388f060 | 419 | */ |
51004dcb | 420 | U_STABLE void U_EXPORT2 |
4388f060 A |
421 | uregex_refreshUText(URegularExpression *regexp, |
422 | UText *text, | |
423 | UErrorCode *status); | |
424 | ||
374ca955 | 425 | /** |
46f4442e A |
426 | * Attempts to match the input string against the pattern. |
427 | * To succeed, the match must extend to the end of the string, | |
428 | * or cover the complete match region. | |
429 | * | |
430 | * If startIndex >= zero the match operation starts at the specified | |
431 | * index and must extend to the end of the input string. Any region | |
432 | * that has been specified is reset. | |
433 | * | |
434 | * If startIndex == -1 the match must cover the input region, or the entire | |
435 | * input string if no region has been set. This directly corresponds to | |
436 | * Matcher.matches() in Java | |
374ca955 A |
437 | * |
438 | * @param regexp The compiled regular expression. | |
729e4ab9 | 439 | * @param startIndex The input string (native) index at which to begin matching, or -1 |
46f4442e | 440 | * to match the input Region. |
374ca955 A |
441 | * @param status Receives errors detected by this function. |
442 | * @return TRUE if there is a match | |
73c04bcf | 443 | * @stable ICU 3.0 |
374ca955 | 444 | */ |
73c04bcf | 445 | U_STABLE UBool U_EXPORT2 |
374ca955 A |
446 | uregex_matches(URegularExpression *regexp, |
447 | int32_t startIndex, | |
448 | UErrorCode *status); | |
449 | ||
729e4ab9 A |
450 | /** |
451 | * 64bit version of uregex_matches. | |
4388f060 A |
452 | * Attempts to match the input string against the pattern. |
453 | * To succeed, the match must extend to the end of the string, | |
454 | * or cover the complete match region. | |
455 | * | |
456 | * If startIndex >= zero the match operation starts at the specified | |
457 | * index and must extend to the end of the input string. Any region | |
458 | * that has been specified is reset. | |
459 | * | |
460 | * If startIndex == -1 the match must cover the input region, or the entire | |
461 | * input string if no region has been set. This directly corresponds to | |
462 | * Matcher.matches() in Java | |
463 | * | |
464 | * @param regexp The compiled regular expression. | |
465 | * @param startIndex The input string (native) index at which to begin matching, or -1 | |
466 | * to match the input Region. | |
467 | * @param status Receives errors detected by this function. | |
468 | * @return TRUE if there is a match | |
469 | * @stable ICU 4.6 | |
729e4ab9 | 470 | */ |
51004dcb | 471 | U_STABLE UBool U_EXPORT2 |
729e4ab9 A |
472 | uregex_matches64(URegularExpression *regexp, |
473 | int64_t startIndex, | |
474 | UErrorCode *status); | |
475 | ||
374ca955 A |
476 | /** |
477 | * Attempts to match the input string, starting from the specified index, against the pattern. | |
478 | * The match may be of any length, and is not required to extend to the end | |
479 | * of the input string. Contrast with uregex_matches(). | |
480 | * | |
46f4442e A |
481 | * <p>If startIndex is >= 0 any input region that was set for this |
482 | * URegularExpression is reset before the operation begins. | |
483 | * | |
484 | * <p>If the specified starting index == -1 the match begins at the start of the input | |
485 | * region, or at the start of the full string if no region has been specified. | |
486 | * This corresponds directly with Matcher.lookingAt() in Java. | |
487 | * | |
374ca955 A |
488 | * <p>If the match succeeds then more information can be obtained via the |
489 | * <code>uregexp_start()</code>, <code>uregexp_end()</code>, | |
2ca993e8 | 490 | * and <code>uregex_group()</code> functions.</p> |
374ca955 A |
491 | * |
492 | * @param regexp The compiled regular expression. | |
729e4ab9 | 493 | * @param startIndex The input string (native) index at which to begin matching, or |
46f4442e | 494 | * -1 to match the Input Region |
374ca955 A |
495 | * @param status A reference to a UErrorCode to receive any errors. |
496 | * @return TRUE if there is a match. | |
73c04bcf | 497 | * @stable ICU 3.0 |
374ca955 | 498 | */ |
73c04bcf | 499 | U_STABLE UBool U_EXPORT2 |
374ca955 A |
500 | uregex_lookingAt(URegularExpression *regexp, |
501 | int32_t startIndex, | |
502 | UErrorCode *status); | |
503 | ||
729e4ab9 A |
504 | /** |
505 | * 64bit version of uregex_lookingAt. | |
4388f060 A |
506 | * Attempts to match the input string, starting from the specified index, against the pattern. |
507 | * The match may be of any length, and is not required to extend to the end | |
508 | * of the input string. Contrast with uregex_matches(). | |
509 | * | |
510 | * <p>If startIndex is >= 0 any input region that was set for this | |
511 | * URegularExpression is reset before the operation begins. | |
512 | * | |
513 | * <p>If the specified starting index == -1 the match begins at the start of the input | |
514 | * region, or at the start of the full string if no region has been specified. | |
515 | * This corresponds directly with Matcher.lookingAt() in Java. | |
516 | * | |
517 | * <p>If the match succeeds then more information can be obtained via the | |
518 | * <code>uregexp_start()</code>, <code>uregexp_end()</code>, | |
2ca993e8 | 519 | * and <code>uregex_group()</code> functions.</p> |
4388f060 A |
520 | * |
521 | * @param regexp The compiled regular expression. | |
522 | * @param startIndex The input string (native) index at which to begin matching, or | |
523 | * -1 to match the Input Region | |
524 | * @param status A reference to a UErrorCode to receive any errors. | |
525 | * @return TRUE if there is a match. | |
526 | * @stable ICU 4.6 | |
729e4ab9 | 527 | */ |
51004dcb | 528 | U_STABLE UBool U_EXPORT2 |
729e4ab9 A |
529 | uregex_lookingAt64(URegularExpression *regexp, |
530 | int64_t startIndex, | |
531 | UErrorCode *status); | |
532 | ||
374ca955 A |
533 | /** |
534 | * Find the first matching substring of the input string that matches the pattern. | |
46f4442e A |
535 | * If startIndex is >= zero the search for a match begins at the specified index, |
536 | * and any match region is reset. This corresponds directly with | |
537 | * Matcher.find(startIndex) in Java. | |
538 | * | |
539 | * If startIndex == -1 the search begins at the start of the input region, | |
540 | * or at the start of the full string if no region has been specified. | |
541 | * | |
374ca955 A |
542 | * If a match is found, <code>uregex_start(), uregex_end()</code>, and |
543 | * <code>uregex_group()</code> will provide more information regarding the match. | |
544 | * | |
545 | * @param regexp The compiled regular expression. | |
729e4ab9 | 546 | * @param startIndex The position (native) in the input string to begin the search, or |
46f4442e | 547 | * -1 to search within the Input Region. |
374ca955 A |
548 | * @param status A reference to a UErrorCode to receive any errors. |
549 | * @return TRUE if a match is found. | |
73c04bcf | 550 | * @stable ICU 3.0 |
374ca955 | 551 | */ |
73c04bcf | 552 | U_STABLE UBool U_EXPORT2 |
374ca955 A |
553 | uregex_find(URegularExpression *regexp, |
554 | int32_t startIndex, | |
555 | UErrorCode *status); | |
556 | ||
729e4ab9 A |
557 | /** |
558 | * 64bit version of uregex_find. | |
4388f060 A |
559 | * Find the first matching substring of the input string that matches the pattern. |
560 | * If startIndex is >= zero the search for a match begins at the specified index, | |
561 | * and any match region is reset. This corresponds directly with | |
562 | * Matcher.find(startIndex) in Java. | |
563 | * | |
564 | * If startIndex == -1 the search begins at the start of the input region, | |
565 | * or at the start of the full string if no region has been specified. | |
566 | * | |
567 | * If a match is found, <code>uregex_start(), uregex_end()</code>, and | |
568 | * <code>uregex_group()</code> will provide more information regarding the match. | |
569 | * | |
570 | * @param regexp The compiled regular expression. | |
571 | * @param startIndex The position (native) in the input string to begin the search, or | |
572 | * -1 to search within the Input Region. | |
573 | * @param status A reference to a UErrorCode to receive any errors. | |
574 | * @return TRUE if a match is found. | |
575 | * @stable ICU 4.6 | |
729e4ab9 | 576 | */ |
51004dcb | 577 | U_STABLE UBool U_EXPORT2 |
729e4ab9 A |
578 | uregex_find64(URegularExpression *regexp, |
579 | int64_t startIndex, | |
580 | UErrorCode *status); | |
581 | ||
374ca955 | 582 | /** |
46f4442e A |
583 | * Find the next pattern match in the input string. Begin searching |
584 | * the input at the location following the end of he previous match, | |
585 | * or at the start of the string (or region) if there is no | |
586 | * previous match. If a match is found, <code>uregex_start(), uregex_end()</code>, and | |
374ca955 A |
587 | * <code>uregex_group()</code> will provide more information regarding the match. |
588 | * | |
589 | * @param regexp The compiled regular expression. | |
590 | * @param status A reference to a UErrorCode to receive any errors. | |
591 | * @return TRUE if a match is found. | |
592 | * @see uregex_reset | |
73c04bcf | 593 | * @stable ICU 3.0 |
374ca955 | 594 | */ |
73c04bcf | 595 | U_STABLE UBool U_EXPORT2 |
374ca955 A |
596 | uregex_findNext(URegularExpression *regexp, |
597 | UErrorCode *status); | |
598 | ||
599 | /** | |
600 | * Get the number of capturing groups in this regular expression's pattern. | |
601 | * @param regexp The compiled regular expression. | |
602 | * @param status A reference to a UErrorCode to receive any errors. | |
603 | * @return the number of capture groups | |
73c04bcf | 604 | * @stable ICU 3.0 |
374ca955 | 605 | */ |
73c04bcf | 606 | U_STABLE int32_t U_EXPORT2 |
374ca955 A |
607 | uregex_groupCount(URegularExpression *regexp, |
608 | UErrorCode *status); | |
609 | ||
b331163b A |
610 | /** |
611 | * Get the group number corresponding to a named capture group. | |
612 | * The returned number can be used with any function that access | |
613 | * capture groups by number. | |
614 | * | |
615 | * The function returns an error status if the specified name does not | |
616 | * appear in the pattern. | |
617 | * | |
618 | * @param regexp The compiled regular expression. | |
619 | * @param groupName The capture group name. | |
620 | * @param nameLength The length of the name, or -1 if the name is a | |
621 | * nul-terminated string. | |
622 | * @param status A pointer to a UErrorCode to receive any errors. | |
623 | * | |
2ca993e8 | 624 | * @stable ICU 55 |
b331163b | 625 | */ |
2ca993e8 | 626 | U_STABLE int32_t U_EXPORT2 |
b331163b A |
627 | uregex_groupNumberFromName(URegularExpression *regexp, |
628 | const UChar *groupName, | |
629 | int32_t nameLength, | |
630 | UErrorCode *status); | |
631 | ||
632 | ||
633 | /** | |
634 | * Get the group number corresponding to a named capture group. | |
635 | * The returned number can be used with any function that access | |
636 | * capture groups by number. | |
637 | * | |
638 | * The function returns an error status if the specified name does not | |
639 | * appear in the pattern. | |
640 | * | |
641 | * @param regexp The compiled regular expression. | |
642 | * @param groupName The capture group name, | |
643 | * platform invariant characters only. | |
644 | * @param nameLength The length of the name, or -1 if the name is | |
645 | * nul-terminated. | |
646 | * @param status A pointer to a UErrorCode to receive any errors. | |
647 | * | |
2ca993e8 | 648 | * @stable ICU 55 |
b331163b | 649 | */ |
2ca993e8 | 650 | U_STABLE int32_t U_EXPORT2 |
b331163b A |
651 | uregex_groupNumberFromCName(URegularExpression *regexp, |
652 | const char *groupName, | |
653 | int32_t nameLength, | |
654 | UErrorCode *status); | |
b331163b | 655 | |
374ca955 A |
656 | /** Extract the string for the specified matching expression or subexpression. |
657 | * Group #0 is the complete string of matched text. | |
658 | * Group #1 is the text matched by the first set of capturing parentheses. | |
659 | * | |
660 | * @param regexp The compiled regular expression. | |
661 | * @param groupNum The capture group to extract. Group 0 is the complete | |
662 | * match. The value of this parameter must be | |
663 | * less than or equal to the number of capture groups in | |
664 | * the pattern. | |
665 | * @param dest Buffer to receive the matching string data | |
666 | * @param destCapacity Capacity of the dest buffer. | |
667 | * @param status A reference to a UErrorCode to receive any errors. | |
668 | * @return Length of matching data, | |
669 | * or -1 if no applicable match. | |
73c04bcf | 670 | * @stable ICU 3.0 |
374ca955 | 671 | */ |
73c04bcf | 672 | U_STABLE int32_t U_EXPORT2 |
374ca955 A |
673 | uregex_group(URegularExpression *regexp, |
674 | int32_t groupNum, | |
675 | UChar *dest, | |
676 | int32_t destCapacity, | |
677 | UErrorCode *status); | |
678 | ||
b331163b A |
679 | /** Returns a shallow immutable clone of the entire input string with the current index set |
680 | * to the beginning of the requested capture group. The capture group length is also | |
729e4ab9 A |
681 | * returned via groupLength. |
682 | * Group #0 is the complete string of matched text. | |
683 | * Group #1 is the text matched by the first set of capturing parentheses. | |
684 | * | |
685 | * @param regexp The compiled regular expression. | |
686 | * @param groupNum The capture group to extract. Group 0 is the complete | |
687 | * match. The value of this parameter must be | |
688 | * less than or equal to the number of capture groups in | |
689 | * the pattern. | |
690 | * @param dest A mutable UText in which to store the current input. | |
691 | * If NULL, a new UText will be created as an immutable shallow clone | |
692 | * of the entire input string. | |
b331163b | 693 | * @param groupLength The group length of the desired capture group. Output parameter. |
729e4ab9 A |
694 | * @param status A reference to a UErrorCode to receive any errors. |
695 | * @return The subject text currently associated with this regular expression. | |
696 | * If a pre-allocated UText was provided, it will always be used and returned. | |
697 | ||
698 | * | |
4388f060 | 699 | * @stable ICU 4.6 |
729e4ab9 | 700 | */ |
51004dcb | 701 | U_STABLE UText * U_EXPORT2 |
729e4ab9 A |
702 | uregex_groupUText(URegularExpression *regexp, |
703 | int32_t groupNum, | |
704 | UText *dest, | |
705 | int64_t *groupLength, | |
706 | UErrorCode *status); | |
707 | ||
374ca955 A |
708 | /** |
709 | * Returns the index in the input string of the start of the text matched by the | |
710 | * specified capture group during the previous match operation. Return -1 if | |
711 | * the capture group was not part of the last match. | |
712 | * Group #0 refers to the complete range of matched text. | |
713 | * Group #1 refers to the text matched by the first set of capturing parentheses. | |
714 | * | |
715 | * @param regexp The compiled regular expression. | |
716 | * @param groupNum The capture group number | |
717 | * @param status A reference to a UErrorCode to receive any errors. | |
729e4ab9 | 718 | * @return the starting (native) position in the input of the text matched |
374ca955 | 719 | * by the specified group. |
73c04bcf | 720 | * @stable ICU 3.0 |
374ca955 | 721 | */ |
73c04bcf | 722 | U_STABLE int32_t U_EXPORT2 |
374ca955 A |
723 | uregex_start(URegularExpression *regexp, |
724 | int32_t groupNum, | |
725 | UErrorCode *status); | |
726 | ||
729e4ab9 A |
727 | /** |
728 | * 64bit version of uregex_start. | |
4388f060 A |
729 | * Returns the index in the input string of the start of the text matched by the |
730 | * specified capture group during the previous match operation. Return -1 if | |
731 | * the capture group was not part of the last match. | |
732 | * Group #0 refers to the complete range of matched text. | |
733 | * Group #1 refers to the text matched by the first set of capturing parentheses. | |
734 | * | |
735 | * @param regexp The compiled regular expression. | |
736 | * @param groupNum The capture group number | |
737 | * @param status A reference to a UErrorCode to receive any errors. | |
738 | * @return the starting (native) position in the input of the text matched | |
739 | * by the specified group. | |
740 | * @stable ICU 4.6 | |
729e4ab9 | 741 | */ |
51004dcb | 742 | U_STABLE int64_t U_EXPORT2 |
729e4ab9 A |
743 | uregex_start64(URegularExpression *regexp, |
744 | int32_t groupNum, | |
745 | UErrorCode *status); | |
746 | ||
374ca955 A |
747 | /** |
748 | * Returns the index in the input string of the position following the end | |
749 | * of the text matched by the specified capture group. | |
750 | * Return -1 if the capture group was not part of the last match. | |
751 | * Group #0 refers to the complete range of matched text. | |
752 | * Group #1 refers to the text matched by the first set of capturing parentheses. | |
753 | * | |
754 | * @param regexp The compiled regular expression. | |
755 | * @param groupNum The capture group number | |
756 | * @param status A reference to a UErrorCode to receive any errors. | |
729e4ab9 | 757 | * @return the (native) index of the position following the last matched character. |
73c04bcf | 758 | * @stable ICU 3.0 |
374ca955 | 759 | */ |
73c04bcf | 760 | U_STABLE int32_t U_EXPORT2 |
374ca955 A |
761 | uregex_end(URegularExpression *regexp, |
762 | int32_t groupNum, | |
763 | UErrorCode *status); | |
764 | ||
729e4ab9 A |
765 | /** |
766 | * 64bit version of uregex_end. | |
4388f060 A |
767 | * Returns the index in the input string of the position following the end |
768 | * of the text matched by the specified capture group. | |
769 | * Return -1 if the capture group was not part of the last match. | |
770 | * Group #0 refers to the complete range of matched text. | |
771 | * Group #1 refers to the text matched by the first set of capturing parentheses. | |
772 | * | |
773 | * @param regexp The compiled regular expression. | |
774 | * @param groupNum The capture group number | |
775 | * @param status A reference to a UErrorCode to receive any errors. | |
776 | * @return the (native) index of the position following the last matched character. | |
777 | * @stable ICU 4.6 | |
729e4ab9 | 778 | */ |
51004dcb | 779 | U_STABLE int64_t U_EXPORT2 |
729e4ab9 A |
780 | uregex_end64(URegularExpression *regexp, |
781 | int32_t groupNum, | |
782 | UErrorCode *status); | |
783 | ||
374ca955 A |
784 | /** |
785 | * Reset any saved state from the previous match. Has the effect of | |
786 | * causing uregex_findNext to begin at the specified index, and causing | |
787 | * uregex_start(), uregex_end() and uregex_group() to return an error | |
46f4442e A |
788 | * indicating that there is no match information available. Clears any |
789 | * match region that may have been set. | |
374ca955 A |
790 | * |
791 | * @param regexp The compiled regular expression. | |
729e4ab9 | 792 | * @param index The position (native) in the text at which a |
374ca955 A |
793 | * uregex_findNext() should begin searching. |
794 | * @param status A reference to a UErrorCode to receive any errors. | |
73c04bcf | 795 | * @stable ICU 3.0 |
374ca955 | 796 | */ |
73c04bcf | 797 | U_STABLE void U_EXPORT2 |
374ca955 A |
798 | uregex_reset(URegularExpression *regexp, |
799 | int32_t index, | |
800 | UErrorCode *status); | |
4388f060 | 801 | |
729e4ab9 | 802 | /** |
4388f060 A |
803 | * 64bit version of uregex_reset. |
804 | * Reset any saved state from the previous match. Has the effect of | |
805 | * causing uregex_findNext to begin at the specified index, and causing | |
806 | * uregex_start(), uregex_end() and uregex_group() to return an error | |
807 | * indicating that there is no match information available. Clears any | |
808 | * match region that may have been set. | |
809 | * | |
810 | * @param regexp The compiled regular expression. | |
811 | * @param index The position (native) in the text at which a | |
812 | * uregex_findNext() should begin searching. | |
813 | * @param status A reference to a UErrorCode to receive any errors. | |
814 | * @stable ICU 4.6 | |
729e4ab9 | 815 | */ |
51004dcb | 816 | U_STABLE void U_EXPORT2 |
729e4ab9 A |
817 | uregex_reset64(URegularExpression *regexp, |
818 | int64_t index, | |
819 | UErrorCode *status); | |
820 | ||
4388f060 A |
821 | /** |
822 | * Sets the limits of the matching region for this URegularExpression. | |
46f4442e A |
823 | * The region is the part of the input string that will be considered when matching. |
824 | * Invoking this method resets any saved state from the previous match, | |
825 | * then sets the region to start at the index specified by the start parameter | |
826 | * and end at the index specified by the end parameter. | |
827 | * | |
828 | * Depending on the transparency and anchoring being used (see useTransparentBounds | |
829 | * and useAnchoringBounds), certain constructs such as anchors may behave differently | |
830 | * at or around the boundaries of the region | |
831 | * | |
832 | * The function will fail if start is greater than limit, or if either index | |
833 | * is less than zero or greater than the length of the string being matched. | |
834 | * | |
835 | * @param regexp The compiled regular expression. | |
729e4ab9 A |
836 | * @param regionStart The (native) index to begin searches at. |
837 | * @param regionLimit The (native) index to end searches at (exclusive). | |
46f4442e | 838 | * @param status A pointer to a UErrorCode to receive any errors. |
729e4ab9 | 839 | * @stable ICU 4.0 |
46f4442e | 840 | */ |
729e4ab9 | 841 | U_STABLE void U_EXPORT2 |
46f4442e A |
842 | uregex_setRegion(URegularExpression *regexp, |
843 | int32_t regionStart, | |
844 | int32_t regionLimit, | |
845 | UErrorCode *status); | |
846 | ||
729e4ab9 | 847 | /** |
4388f060 A |
848 | * 64bit version of uregex_setRegion. |
849 | * Sets the limits of the matching region for this URegularExpression. | |
850 | * The region is the part of the input string that will be considered when matching. | |
851 | * Invoking this method resets any saved state from the previous match, | |
852 | * then sets the region to start at the index specified by the start parameter | |
853 | * and end at the index specified by the end parameter. | |
854 | * | |
855 | * Depending on the transparency and anchoring being used (see useTransparentBounds | |
856 | * and useAnchoringBounds), certain constructs such as anchors may behave differently | |
857 | * at or around the boundaries of the region | |
858 | * | |
859 | * The function will fail if start is greater than limit, or if either index | |
860 | * is less than zero or greater than the length of the string being matched. | |
861 | * | |
862 | * @param regexp The compiled regular expression. | |
863 | * @param regionStart The (native) index to begin searches at. | |
864 | * @param regionLimit The (native) index to end searches at (exclusive). | |
865 | * @param status A pointer to a UErrorCode to receive any errors. | |
866 | * @stable ICU 4.6 | |
729e4ab9 | 867 | */ |
51004dcb | 868 | U_STABLE void U_EXPORT2 |
729e4ab9 A |
869 | uregex_setRegion64(URegularExpression *regexp, |
870 | int64_t regionStart, | |
871 | int64_t regionLimit, | |
872 | UErrorCode *status); | |
873 | ||
874 | /** | |
4388f060 A |
875 | * Set the matching region and the starting index for subsequent matches |
876 | * in a single operation. | |
877 | * This is useful because the usual function for setting the starting | |
878 | * index, urgex_reset(), also resets any region limits. | |
879 | * | |
880 | * @param regexp The compiled regular expression. | |
881 | * @param regionStart The (native) index to begin searches at. | |
882 | * @param regionLimit The (native) index to end searches at (exclusive). | |
883 | * @param startIndex The index in the input text at which the next | |
884 | * match operation should begin. | |
885 | * @param status A pointer to a UErrorCode to receive any errors. | |
886 | * @stable ICU 4.6 | |
729e4ab9 | 887 | */ |
51004dcb | 888 | U_STABLE void U_EXPORT2 |
729e4ab9 A |
889 | uregex_setRegionAndStart(URegularExpression *regexp, |
890 | int64_t regionStart, | |
891 | int64_t regionLimit, | |
892 | int64_t startIndex, | |
893 | UErrorCode *status); | |
894 | ||
46f4442e A |
895 | /** |
896 | * Reports the start index of the matching region. Any matches found are limited to | |
897 | * to the region bounded by regionStart (inclusive) and regionEnd (exclusive). | |
898 | * | |
899 | * @param regexp The compiled regular expression. | |
900 | * @param status A pointer to a UErrorCode to receive any errors. | |
729e4ab9 A |
901 | * @return The starting (native) index of this matcher's region. |
902 | * @stable ICU 4.0 | |
46f4442e | 903 | */ |
729e4ab9 | 904 | U_STABLE int32_t U_EXPORT2 |
46f4442e A |
905 | uregex_regionStart(const URegularExpression *regexp, |
906 | UErrorCode *status); | |
907 | ||
729e4ab9 | 908 | /** |
4388f060 A |
909 | * 64bit version of uregex_regionStart. |
910 | * Reports the start index of the matching region. Any matches found are limited to | |
911 | * to the region bounded by regionStart (inclusive) and regionEnd (exclusive). | |
912 | * | |
913 | * @param regexp The compiled regular expression. | |
914 | * @param status A pointer to a UErrorCode to receive any errors. | |
915 | * @return The starting (native) index of this matcher's region. | |
916 | * @stable ICU 4.6 | |
729e4ab9 | 917 | */ |
51004dcb | 918 | U_STABLE int64_t U_EXPORT2 |
729e4ab9 A |
919 | uregex_regionStart64(const URegularExpression *regexp, |
920 | UErrorCode *status); | |
46f4442e A |
921 | |
922 | /** | |
923 | * Reports the end index (exclusive) of the matching region for this URegularExpression. | |
924 | * Any matches found are limited to to the region bounded by regionStart (inclusive) | |
925 | * and regionEnd (exclusive). | |
926 | * | |
927 | * @param regexp The compiled regular expression. | |
928 | * @param status A pointer to a UErrorCode to receive any errors. | |
729e4ab9 A |
929 | * @return The ending point (native) of this matcher's region. |
930 | * @stable ICU 4.0 | |
46f4442e | 931 | */ |
729e4ab9 | 932 | U_STABLE int32_t U_EXPORT2 |
46f4442e A |
933 | uregex_regionEnd(const URegularExpression *regexp, |
934 | UErrorCode *status); | |
935 | ||
729e4ab9 | 936 | /** |
4388f060 A |
937 | * 64bit version of uregex_regionEnd. |
938 | * Reports the end index (exclusive) of the matching region for this URegularExpression. | |
939 | * Any matches found are limited to to the region bounded by regionStart (inclusive) | |
940 | * and regionEnd (exclusive). | |
941 | * | |
942 | * @param regexp The compiled regular expression. | |
943 | * @param status A pointer to a UErrorCode to receive any errors. | |
944 | * @return The ending point (native) of this matcher's region. | |
945 | * @stable ICU 4.6 | |
729e4ab9 | 946 | */ |
51004dcb | 947 | U_STABLE int64_t U_EXPORT2 |
729e4ab9 A |
948 | uregex_regionEnd64(const URegularExpression *regexp, |
949 | UErrorCode *status); | |
950 | ||
46f4442e A |
951 | /** |
952 | * Queries the transparency of region bounds for this URegularExpression. | |
953 | * See useTransparentBounds for a description of transparent and opaque bounds. | |
954 | * By default, matching boundaries are opaque. | |
955 | * | |
956 | * @param regexp The compiled regular expression. | |
957 | * @param status A pointer to a UErrorCode to receive any errors. | |
958 | * @return TRUE if this matcher is using opaque bounds, false if it is not. | |
729e4ab9 | 959 | * @stable ICU 4.0 |
46f4442e | 960 | */ |
729e4ab9 | 961 | U_STABLE UBool U_EXPORT2 |
46f4442e A |
962 | uregex_hasTransparentBounds(const URegularExpression *regexp, |
963 | UErrorCode *status); | |
964 | ||
965 | ||
966 | /** | |
967 | * Sets the transparency of region bounds for this URegularExpression. | |
968 | * Invoking this function with an argument of TRUE will set matches to use transparent bounds. | |
969 | * If the boolean argument is FALSE, then opaque bounds will be used. | |
970 | * | |
971 | * Using transparent bounds, the boundaries of the matching region are transparent | |
972 | * to lookahead, lookbehind, and boundary matching constructs. Those constructs can | |
973 | * see text beyond the boundaries of the region while checking for a match. | |
974 | * | |
975 | * With opaque bounds, no text outside of the matching region is visible to lookahead, | |
976 | * lookbehind, and boundary matching constructs. | |
977 | * | |
978 | * By default, opaque bounds are used. | |
979 | * | |
980 | * @param regexp The compiled regular expression. | |
981 | * @param b TRUE for transparent bounds; FALSE for opaque bounds | |
982 | * @param status A pointer to a UErrorCode to receive any errors. | |
729e4ab9 | 983 | * @stable ICU 4.0 |
46f4442e | 984 | **/ |
729e4ab9 | 985 | U_STABLE void U_EXPORT2 |
46f4442e A |
986 | uregex_useTransparentBounds(URegularExpression *regexp, |
987 | UBool b, | |
988 | UErrorCode *status); | |
989 | ||
990 | ||
991 | /** | |
992 | * Return true if this URegularExpression is using anchoring bounds. | |
993 | * By default, anchoring region bounds are used. | |
994 | * | |
995 | * @param regexp The compiled regular expression. | |
996 | * @param status A pointer to a UErrorCode to receive any errors. | |
997 | * @return TRUE if this matcher is using anchoring bounds. | |
729e4ab9 | 998 | * @stable ICU 4.0 |
46f4442e | 999 | */ |
729e4ab9 | 1000 | U_STABLE UBool U_EXPORT2 |
46f4442e A |
1001 | uregex_hasAnchoringBounds(const URegularExpression *regexp, |
1002 | UErrorCode *status); | |
1003 | ||
1004 | ||
1005 | /** | |
1006 | * Set whether this URegularExpression is using Anchoring Bounds for its region. | |
1007 | * With anchoring bounds, pattern anchors such as ^ and $ will match at the start | |
1008 | * and end of the region. Without Anchoring Bounds, anchors will only match at | |
1009 | * the positions they would in the complete text. | |
1010 | * | |
1011 | * Anchoring Bounds are the default for regions. | |
1012 | * | |
1013 | * @param regexp The compiled regular expression. | |
1014 | * @param b TRUE if to enable anchoring bounds; FALSE to disable them. | |
1015 | * @param status A pointer to a UErrorCode to receive any errors. | |
729e4ab9 | 1016 | * @stable ICU 4.0 |
46f4442e | 1017 | */ |
729e4ab9 | 1018 | U_STABLE void U_EXPORT2 |
46f4442e A |
1019 | uregex_useAnchoringBounds(URegularExpression *regexp, |
1020 | UBool b, | |
1021 | UErrorCode *status); | |
1022 | ||
1023 | /** | |
1024 | * Return TRUE if the most recent matching operation touched the | |
1025 | * end of the text being processed. In this case, additional input text could | |
1026 | * change the results of that match. | |
1027 | * | |
1028 | * @param regexp The compiled regular expression. | |
1029 | * @param status A pointer to a UErrorCode to receive any errors. | |
1030 | * @return TRUE if the most recent match hit the end of input | |
729e4ab9 | 1031 | * @stable ICU 4.0 |
46f4442e | 1032 | */ |
729e4ab9 | 1033 | U_STABLE UBool U_EXPORT2 |
46f4442e A |
1034 | uregex_hitEnd(const URegularExpression *regexp, |
1035 | UErrorCode *status); | |
1036 | ||
1037 | /** | |
1038 | * Return TRUE the most recent match succeeded and additional input could cause | |
1039 | * it to fail. If this function returns false and a match was found, then more input | |
1040 | * might change the match but the match won't be lost. If a match was not found, | |
1041 | * then requireEnd has no meaning. | |
1042 | * | |
1043 | * @param regexp The compiled regular expression. | |
1044 | * @param status A pointer to a UErrorCode to receive any errors. | |
1045 | * @return TRUE if more input could cause the most recent match to no longer match. | |
729e4ab9 | 1046 | * @stable ICU 4.0 |
46f4442e | 1047 | */ |
729e4ab9 | 1048 | U_STABLE UBool U_EXPORT2 |
46f4442e A |
1049 | uregex_requireEnd(const URegularExpression *regexp, |
1050 | UErrorCode *status); | |
1051 | ||
1052 | ||
1053 | ||
1054 | ||
374ca955 A |
1055 | |
1056 | /** | |
1057 | * Replaces every substring of the input that matches the pattern | |
1058 | * with the given replacement string. This is a convenience function that | |
1059 | * provides a complete find-and-replace-all operation. | |
1060 | * | |
1061 | * This method scans the input string looking for matches of the pattern. | |
1062 | * Input that is not part of any match is copied unchanged to the | |
1063 | * destination buffer. Matched regions are replaced in the output | |
1064 | * buffer by the replacement string. The replacement string may contain | |
1065 | * references to capture groups; these take the form of $1, $2, etc. | |
1066 | * | |
1067 | * @param regexp The compiled regular expression. | |
1068 | * @param replacementText A string containing the replacement text. | |
1069 | * @param replacementLength The length of the replacement string, or | |
1070 | * -1 if it is NUL terminated. | |
1071 | * @param destBuf A (UChar *) buffer that will receive the result. | |
4388f060 | 1072 | * @param destCapacity The capacity of the destination buffer. |
374ca955 A |
1073 | * @param status A reference to a UErrorCode to receive any errors. |
1074 | * @return The length of the string resulting from the find | |
1075 | * and replace operation. In the event that the | |
1076 | * destination capacity is inadequate, the return value | |
1077 | * is still the full length of the untruncated string. | |
73c04bcf | 1078 | * @stable ICU 3.0 |
374ca955 | 1079 | */ |
73c04bcf | 1080 | U_STABLE int32_t U_EXPORT2 |
374ca955 | 1081 | uregex_replaceAll(URegularExpression *regexp, |
73c04bcf | 1082 | const UChar *replacementText, |
374ca955 A |
1083 | int32_t replacementLength, |
1084 | UChar *destBuf, | |
1085 | int32_t destCapacity, | |
1086 | UErrorCode *status); | |
1087 | ||
729e4ab9 A |
1088 | /** |
1089 | * Replaces every substring of the input that matches the pattern | |
1090 | * with the given replacement string. This is a convenience function that | |
1091 | * provides a complete find-and-replace-all operation. | |
1092 | * | |
1093 | * This method scans the input string looking for matches of the pattern. | |
1094 | * Input that is not part of any match is copied unchanged to the | |
1095 | * destination buffer. Matched regions are replaced in the output | |
1096 | * buffer by the replacement string. The replacement string may contain | |
1097 | * references to capture groups; these take the form of $1, $2, etc. | |
1098 | * | |
1099 | * @param regexp The compiled regular expression. | |
1100 | * @param replacement A string containing the replacement text. | |
1101 | * @param dest A mutable UText that will receive the result. | |
1102 | * If NULL, a new UText will be created (which may not be mutable). | |
1103 | * @param status A reference to a UErrorCode to receive any errors. | |
1104 | * @return A UText containing the results of the find and replace. | |
1105 | * If a pre-allocated UText was provided, it will always be used and returned. | |
1106 | * | |
4388f060 | 1107 | * @stable ICU 4.6 |
729e4ab9 | 1108 | */ |
51004dcb | 1109 | U_STABLE UText * U_EXPORT2 |
729e4ab9 A |
1110 | uregex_replaceAllUText(URegularExpression *regexp, |
1111 | UText *replacement, | |
1112 | UText *dest, | |
1113 | UErrorCode *status); | |
374ca955 A |
1114 | |
1115 | /** | |
1116 | * Replaces the first substring of the input that matches the pattern | |
1117 | * with the given replacement string. This is a convenience function that | |
1118 | * provides a complete find-and-replace operation. | |
1119 | * | |
1120 | * This method scans the input string looking for a match of the pattern. | |
1121 | * All input that is not part of the match is copied unchanged to the | |
1122 | * destination buffer. The matched region is replaced in the output | |
1123 | * buffer by the replacement string. The replacement string may contain | |
1124 | * references to capture groups; these take the form of $1, $2, etc. | |
1125 | * | |
1126 | * @param regexp The compiled regular expression. | |
1127 | * @param replacementText A string containing the replacement text. | |
1128 | * @param replacementLength The length of the replacement string, or | |
1129 | * -1 if it is NUL terminated. | |
1130 | * @param destBuf A (UChar *) buffer that will receive the result. | |
4388f060 | 1131 | * @param destCapacity The capacity of the destination buffer. |
374ca955 A |
1132 | * @param status a reference to a UErrorCode to receive any errors. |
1133 | * @return The length of the string resulting from the find | |
1134 | * and replace operation. In the event that the | |
1135 | * destination capacity is inadequate, the return value | |
1136 | * is still the full length of the untruncated string. | |
73c04bcf | 1137 | * @stable ICU 3.0 |
374ca955 | 1138 | */ |
73c04bcf | 1139 | U_STABLE int32_t U_EXPORT2 |
374ca955 | 1140 | uregex_replaceFirst(URegularExpression *regexp, |
73c04bcf | 1141 | const UChar *replacementText, |
374ca955 A |
1142 | int32_t replacementLength, |
1143 | UChar *destBuf, | |
1144 | int32_t destCapacity, | |
1145 | UErrorCode *status); | |
1146 | ||
729e4ab9 A |
1147 | /** |
1148 | * Replaces the first substring of the input that matches the pattern | |
1149 | * with the given replacement string. This is a convenience function that | |
1150 | * provides a complete find-and-replace operation. | |
1151 | * | |
1152 | * This method scans the input string looking for a match of the pattern. | |
1153 | * All input that is not part of the match is copied unchanged to the | |
1154 | * destination buffer. The matched region is replaced in the output | |
1155 | * buffer by the replacement string. The replacement string may contain | |
1156 | * references to capture groups; these take the form of $1, $2, etc. | |
1157 | * | |
1158 | * @param regexp The compiled regular expression. | |
1159 | * @param replacement A string containing the replacement text. | |
1160 | * @param dest A mutable UText that will receive the result. | |
1161 | * If NULL, a new UText will be created (which may not be mutable). | |
1162 | * @param status A reference to a UErrorCode to receive any errors. | |
1163 | * @return A UText containing the results of the find and replace. | |
1164 | * If a pre-allocated UText was provided, it will always be used and returned. | |
1165 | * | |
4388f060 | 1166 | * @stable ICU 4.6 |
729e4ab9 | 1167 | */ |
51004dcb | 1168 | U_STABLE UText * U_EXPORT2 |
729e4ab9 A |
1169 | uregex_replaceFirstUText(URegularExpression *regexp, |
1170 | UText *replacement, | |
1171 | UText *dest, | |
1172 | UErrorCode *status); | |
1173 | ||
374ca955 A |
1174 | /** |
1175 | * Implements a replace operation intended to be used as part of an | |
1176 | * incremental find-and-replace. | |
1177 | * | |
1178 | * <p>The input string, starting from the end of the previous match and ending at | |
1179 | * the start of the current match, is appended to the destination string. Then the | |
1180 | * replacement string is appended to the output string, | |
1181 | * including handling any substitutions of captured text.</p> | |
1182 | * | |
1183 | * <p>A note on preflight computation of buffersize and error handling: | |
1184 | * Calls to uregex_appendReplacement() and uregex_appendTail() are | |
1185 | * designed to be chained, one after another, with the destination | |
1186 | * buffer pointer and buffer capacity updated after each in preparation | |
1187 | * to for the next. If the destination buffer is exhausted partway through such a | |
1188 | * sequence, a U_BUFFER_OVERFLOW_ERROR status will be returned. Normal | |
1189 | * ICU conventions are for a function to perform no action if it is | |
1190 | * called with an error status, but for this one case, uregex_appendRepacement() | |
1191 | * will operate normally so that buffer size computations will complete | |
1192 | * correctly. | |
1193 | * | |
1194 | * <p>For simple, prepackaged, non-incremental find-and-replace | |
1195 | * operations, see replaceFirst() or replaceAll().</p> | |
1196 | * | |
1197 | * @param regexp The regular expression object. | |
1198 | * @param replacementText The string that will replace the matched portion of the | |
1199 | * input string as it is copied to the destination buffer. | |
1200 | * The replacement text may contain references ($1, for | |
1201 | * example) to capture groups from the match. | |
1202 | * @param replacementLength The length of the replacement text string, | |
1203 | * or -1 if the string is NUL terminated. | |
1204 | * @param destBuf The buffer into which the results of the | |
1205 | * find-and-replace are placed. On return, this pointer | |
1206 | * will be updated to refer to the beginning of the | |
1207 | * unused portion of buffer, leaving it in position for | |
1208 | * a subsequent call to this function. | |
1209 | * @param destCapacity The size of the output buffer, On return, this | |
1210 | * parameter will be updated to reflect the space remaining | |
1211 | * unused in the output buffer. | |
1212 | * @param status A reference to a UErrorCode to receive any errors. | |
1213 | * @return The length of the result string. In the event that | |
1214 | * destCapacity is inadequate, the full length of the | |
1215 | * untruncated output string is returned. | |
1216 | * | |
73c04bcf | 1217 | * @stable ICU 3.0 |
374ca955 A |
1218 | * |
1219 | */ | |
73c04bcf | 1220 | U_STABLE int32_t U_EXPORT2 |
374ca955 | 1221 | uregex_appendReplacement(URegularExpression *regexp, |
729e4ab9 A |
1222 | const UChar *replacementText, |
1223 | int32_t replacementLength, | |
1224 | UChar **destBuf, | |
1225 | int32_t *destCapacity, | |
1226 | UErrorCode *status); | |
1227 | ||
729e4ab9 A |
1228 | /** |
1229 | * Implements a replace operation intended to be used as part of an | |
1230 | * incremental find-and-replace. | |
1231 | * | |
1232 | * <p>The input string, starting from the end of the previous match and ending at | |
1233 | * the start of the current match, is appended to the destination string. Then the | |
1234 | * replacement string is appended to the output string, | |
1235 | * including handling any substitutions of captured text.</p> | |
1236 | * | |
1237 | * <p>For simple, prepackaged, non-incremental find-and-replace | |
1238 | * operations, see replaceFirst() or replaceAll().</p> | |
1239 | * | |
1240 | * @param regexp The regular expression object. | |
1241 | * @param replacementText The string that will replace the matched portion of the | |
1242 | * input string as it is copied to the destination buffer. | |
1243 | * The replacement text may contain references ($1, for | |
1244 | * example) to capture groups from the match. | |
1245 | * @param dest A mutable UText that will receive the result. Must not be NULL. | |
1246 | * @param status A reference to a UErrorCode to receive any errors. | |
1247 | * | |
4388f060 | 1248 | * @stable ICU 4.6 |
729e4ab9 | 1249 | */ |
51004dcb | 1250 | U_STABLE void U_EXPORT2 |
729e4ab9 A |
1251 | uregex_appendReplacementUText(URegularExpression *regexp, |
1252 | UText *replacementText, | |
1253 | UText *dest, | |
1254 | UErrorCode *status); | |
374ca955 | 1255 | |
374ca955 A |
1256 | /** |
1257 | * As the final step in a find-and-replace operation, append the remainder | |
1258 | * of the input string, starting at the position following the last match, | |
1259 | * to the destination string. <code>uregex_appendTail()</code> is intended | |
1260 | * to be invoked after one or more invocations of the | |
1261 | * <code>uregex_appendReplacement()</code> function. | |
1262 | * | |
1263 | * @param regexp The regular expression object. This is needed to | |
1264 | * obtain the input string and with the position | |
1265 | * of the last match within it. | |
1266 | * @param destBuf The buffer in which the results of the | |
1267 | * find-and-replace are placed. On return, the pointer | |
1268 | * will be updated to refer to the beginning of the | |
1269 | * unused portion of buffer. | |
1270 | * @param destCapacity The size of the output buffer, On return, this | |
1271 | * value will be updated to reflect the space remaining | |
1272 | * unused in the output buffer. | |
1273 | * @param status A reference to a UErrorCode to receive any errors. | |
1274 | * @return The length of the result string. In the event that | |
1275 | * destCapacity is inadequate, the full length of the | |
1276 | * untruncated output string is returned. | |
1277 | * | |
73c04bcf | 1278 | * @stable ICU 3.0 |
374ca955 | 1279 | */ |
73c04bcf | 1280 | U_STABLE int32_t U_EXPORT2 |
374ca955 A |
1281 | uregex_appendTail(URegularExpression *regexp, |
1282 | UChar **destBuf, | |
1283 | int32_t *destCapacity, | |
1284 | UErrorCode *status); | |
1285 | ||
729e4ab9 A |
1286 | /** |
1287 | * As the final step in a find-and-replace operation, append the remainder | |
1288 | * of the input string, starting at the position following the last match, | |
1289 | * to the destination string. <code>uregex_appendTailUText()</code> is intended | |
1290 | * to be invoked after one or more invocations of the | |
1291 | * <code>uregex_appendReplacementUText()</code> function. | |
1292 | * | |
1293 | * @param regexp The regular expression object. This is needed to | |
1294 | * obtain the input string and with the position | |
1295 | * of the last match within it. | |
1296 | * @param dest A mutable UText that will receive the result. Must not be NULL. | |
4388f060 A |
1297 | * |
1298 | * @param status Error code | |
1299 | * | |
729e4ab9 A |
1300 | * @return The destination UText. |
1301 | * | |
4388f060 | 1302 | * @stable ICU 4.6 |
729e4ab9 | 1303 | */ |
51004dcb | 1304 | U_STABLE UText * U_EXPORT2 |
729e4ab9 A |
1305 | uregex_appendTailUText(URegularExpression *regexp, |
1306 | UText *dest, | |
1307 | UErrorCode *status); | |
374ca955 | 1308 | |
374ca955 A |
1309 | /** |
1310 | * Split a string into fields. Somewhat like split() from Perl. | |
1311 | * The pattern matches identify delimiters that separate the input | |
1312 | * into fields. The input data between the matches becomes the | |
1313 | * fields themselves. | |
4388f060 | 1314 | * |
374ca955 | 1315 | * Each of the fields is copied from the input string to the destination |
729e4ab9 | 1316 | * buffer, and NUL terminated. The position of each field within |
374ca955 | 1317 | * the destination buffer is returned in the destFields array. |
73c04bcf | 1318 | * |
4388f060 A |
1319 | * If the delimiter pattern includes capture groups, the captured text will |
1320 | * also appear in the destination array of output strings, interspersed | |
1321 | * with the fields. This is similar to Perl, but differs from Java, | |
1322 | * which ignores the presence of capture groups in the pattern. | |
1323 | * | |
1324 | * Trailing empty fields will always be returned, assuming sufficient | |
1325 | * destination capacity. This differs from the default behavior for Java | |
1326 | * and Perl where trailing empty fields are not returned. | |
1327 | * | |
1328 | * The number of strings produced by the split operation is returned. | |
1329 | * This count includes the strings from capture groups in the delimiter pattern. | |
1330 | * This behavior differs from Java, which ignores capture groups. | |
374ca955 A |
1331 | * |
1332 | * @param regexp The compiled regular expression. | |
1333 | * @param destBuf A (UChar *) buffer to receive the fields that | |
1334 | * are extracted from the input string. These | |
1335 | * field pointers will refer to positions within the | |
1336 | * destination buffer supplied by the caller. Any | |
1337 | * extra positions within the destFields array will be | |
1338 | * set to NULL. | |
1339 | * @param destCapacity The capacity of the destBuf. | |
1340 | * @param requiredCapacity The actual capacity required of the destBuf. | |
1341 | * If destCapacity is too small, requiredCapacity will return | |
1342 | * the total capacity required to hold all of the output, and | |
1343 | * a U_BUFFER_OVERFLOW_ERROR will be returned. | |
1344 | * @param destFields An array to be filled with the position of each | |
1345 | * of the extracted fields within destBuf. | |
1346 | * @param destFieldsCapacity The number of elements in the destFields array. | |
1347 | * If the number of fields found is less than destFieldsCapacity, | |
1348 | * the extra destFields elements are set to zero. | |
1349 | * If destFieldsCapacity is too small, the trailing part of the | |
1350 | * input, including any field delimiters, is treated as if it | |
1351 | * were the last field - it is copied to the destBuf, and | |
1352 | * its position is in the destBuf is stored in the last element | |
1353 | * of destFields. This behavior mimics that of Perl. It is not | |
1354 | * an error condition, and no error status is returned when all destField | |
1355 | * positions are used. | |
1356 | * @param status A reference to a UErrorCode to receive any errors. | |
1357 | * @return The number of fields into which the input string was split. | |
73c04bcf | 1358 | * @stable ICU 3.0 |
374ca955 | 1359 | */ |
73c04bcf | 1360 | U_STABLE int32_t U_EXPORT2 |
374ca955 A |
1361 | uregex_split( URegularExpression *regexp, |
1362 | UChar *destBuf, | |
1363 | int32_t destCapacity, | |
1364 | int32_t *requiredCapacity, | |
1365 | UChar *destFields[], | |
1366 | int32_t destFieldsCapacity, | |
1367 | UErrorCode *status); | |
1368 | ||
729e4ab9 A |
1369 | /** |
1370 | * Split a string into fields. Somewhat like split() from Perl. | |
1371 | * The pattern matches identify delimiters that separate the input | |
1372 | * into fields. The input data between the matches becomes the | |
1373 | * fields themselves. | |
1374 | * <p> | |
1375 | * The behavior of this function is not very closely aligned with uregex_split(); | |
1376 | * instead, it is based on (and implemented directly on top of) the C++ split method. | |
1377 | * | |
1378 | * @param regexp The compiled regular expression. | |
1379 | * @param destFields An array of mutable UText structs to receive the results of the split. | |
1380 | * If a field is NULL, a new UText is allocated to contain the results for | |
1381 | * that field. This new UText is not guaranteed to be mutable. | |
1382 | * @param destFieldsCapacity The number of elements in the destination array. | |
1383 | * If the number of fields found is less than destCapacity, the | |
1384 | * extra strings in the destination array are not altered. | |
1385 | * If the number of destination strings is less than the number | |
1386 | * of fields, the trailing part of the input string, including any | |
1387 | * field delimiters, is placed in the last destination string. | |
1388 | * This behavior mimics that of Perl. It is not an error condition, and no | |
1389 | * error status is returned when all destField positions are used. | |
1390 | * @param status A reference to a UErrorCode to receive any errors. | |
1391 | * @return The number of fields into which the input string was split. | |
1392 | * | |
4388f060 | 1393 | * @stable ICU 4.6 |
729e4ab9 | 1394 | */ |
51004dcb | 1395 | U_STABLE int32_t U_EXPORT2 |
729e4ab9 A |
1396 | uregex_splitUText(URegularExpression *regexp, |
1397 | UText *destFields[], | |
1398 | int32_t destFieldsCapacity, | |
1399 | UErrorCode *status); | |
1400 | ||
46f4442e A |
1401 | /** |
1402 | * Set a processing time limit for match operations with this URegularExpression. | |
1403 | * | |
1404 | * Some patterns, when matching certain strings, can run in exponential time. | |
1405 | * For practical purposes, the match operation may appear to be in an | |
1406 | * infinite loop. | |
1407 | * When a limit is set a match operation will fail with an error if the | |
1408 | * limit is exceeded. | |
1409 | * <p> | |
1410 | * The units of the limit are steps of the match engine. | |
1411 | * Correspondence with actual processor time will depend on the speed | |
1412 | * of the processor and the details of the specific pattern, but will | |
1413 | * typically be on the order of milliseconds. | |
1414 | * <p> | |
1415 | * By default, the matching time is not limited. | |
1416 | * <p> | |
1417 | * | |
1418 | * @param regexp The compiled regular expression. | |
1419 | * @param limit The limit value, or 0 for no limit. | |
1420 | * @param status A reference to a UErrorCode to receive any errors. | |
729e4ab9 | 1421 | * @stable ICU 4.0 |
46f4442e | 1422 | */ |
729e4ab9 | 1423 | U_STABLE void U_EXPORT2 |
46f4442e A |
1424 | uregex_setTimeLimit(URegularExpression *regexp, |
1425 | int32_t limit, | |
1426 | UErrorCode *status); | |
1427 | ||
1428 | /** | |
1429 | * Get the time limit for for matches with this URegularExpression. | |
1430 | * A return value of zero indicates that there is no limit. | |
1431 | * | |
1432 | * @param regexp The compiled regular expression. | |
1433 | * @param status A reference to a UErrorCode to receive any errors. | |
1434 | * @return the maximum allowed time for a match, in units of processing steps. | |
729e4ab9 | 1435 | * @stable ICU 4.0 |
46f4442e | 1436 | */ |
729e4ab9 | 1437 | U_STABLE int32_t U_EXPORT2 |
46f4442e A |
1438 | uregex_getTimeLimit(const URegularExpression *regexp, |
1439 | UErrorCode *status); | |
1440 | ||
1441 | /** | |
4388f060 | 1442 | * Set the amount of heap storage available for use by the match backtracking stack. |
46f4442e A |
1443 | * <p> |
1444 | * ICU uses a backtracking regular expression engine, with the backtrack stack | |
1445 | * maintained on the heap. This function sets the limit to the amount of memory | |
1446 | * that can be used for this purpose. A backtracking stack overflow will | |
1447 | * result in an error from the match operation that caused it. | |
1448 | * <p> | |
1449 | * A limit is desirable because a malicious or poorly designed pattern can use | |
1450 | * excessive memory, potentially crashing the process. A limit is enabled | |
1451 | * by default. | |
1452 | * <p> | |
1453 | * @param regexp The compiled regular expression. | |
1454 | * @param limit The maximum size, in bytes, of the matching backtrack stack. | |
57a6839d A |
1455 | * A value of zero means no limit. |
1456 | * The limit must be greater than or equal to zero. | |
46f4442e A |
1457 | * @param status A reference to a UErrorCode to receive any errors. |
1458 | * | |
729e4ab9 | 1459 | * @stable ICU 4.0 |
46f4442e | 1460 | */ |
729e4ab9 | 1461 | U_STABLE void U_EXPORT2 |
46f4442e A |
1462 | uregex_setStackLimit(URegularExpression *regexp, |
1463 | int32_t limit, | |
1464 | UErrorCode *status); | |
1465 | ||
1466 | /** | |
1467 | * Get the size of the heap storage available for use by the back tracking stack. | |
1468 | * | |
1469 | * @return the maximum backtracking stack size, in bytes, or zero if the | |
1470 | * stack size is unlimited. | |
729e4ab9 | 1471 | * @stable ICU 4.0 |
46f4442e | 1472 | */ |
729e4ab9 | 1473 | U_STABLE int32_t U_EXPORT2 |
46f4442e A |
1474 | uregex_getStackLimit(const URegularExpression *regexp, |
1475 | UErrorCode *status); | |
1476 | ||
1477 | ||
1478 | /** | |
1479 | * Function pointer for a regular expression matching callback function. | |
1480 | * When set, a callback function will be called periodically during matching | |
1481 | * operations. If the call back function returns FALSE, the matching | |
1482 | * operation will be terminated early. | |
1483 | * | |
1484 | * Note: the callback function must not call other functions on this | |
1485 | * URegularExpression. | |
1486 | * | |
1487 | * @param context context pointer. The callback function will be invoked | |
1488 | * with the context specified at the time that | |
1489 | * uregex_setMatchCallback() is called. | |
1490 | * @param steps the accumulated processing time, in match steps, | |
1491 | * for this matching operation. | |
1492 | * @return TRUE to continue the matching operation. | |
1493 | * FALSE to terminate the matching operation. | |
729e4ab9 | 1494 | * @stable ICU 4.0 |
46f4442e A |
1495 | */ |
1496 | U_CDECL_BEGIN | |
1497 | typedef UBool U_CALLCONV URegexMatchCallback ( | |
1498 | const void *context, | |
1499 | int32_t steps); | |
1500 | U_CDECL_END | |
1501 | ||
1502 | /** | |
1503 | * Set a callback function for this URegularExpression. | |
1504 | * During matching operations the function will be called periodically, | |
1505 | * giving the application the opportunity to terminate a long-running | |
1506 | * match. | |
1507 | * | |
1508 | * @param regexp The compiled regular expression. | |
1509 | * @param callback A pointer to the user-supplied callback function. | |
1510 | * @param context User context pointer. The value supplied at the | |
1511 | * time the callback function is set will be saved | |
1512 | * and passed to the callback each time that it is called. | |
1513 | * @param status A reference to a UErrorCode to receive any errors. | |
729e4ab9 | 1514 | * @stable ICU 4.0 |
46f4442e | 1515 | */ |
729e4ab9 | 1516 | U_STABLE void U_EXPORT2 |
46f4442e A |
1517 | uregex_setMatchCallback(URegularExpression *regexp, |
1518 | URegexMatchCallback *callback, | |
1519 | const void *context, | |
1520 | UErrorCode *status); | |
1521 | ||
1522 | ||
1523 | /** | |
1524 | * Get the callback function for this URegularExpression. | |
1525 | * | |
1526 | * @param regexp The compiled regular expression. | |
4388f060 | 1527 | * @param callback Out parameter, receives a pointer to the user-supplied |
46f4442e A |
1528 | * callback function. |
1529 | * @param context Out parameter, receives the user context pointer that | |
1530 | * was set when uregex_setMatchCallback() was called. | |
1531 | * @param status A reference to a UErrorCode to receive any errors. | |
729e4ab9 | 1532 | * @stable ICU 4.0 |
46f4442e | 1533 | */ |
729e4ab9 | 1534 | U_STABLE void U_EXPORT2 |
46f4442e A |
1535 | uregex_getMatchCallback(const URegularExpression *regexp, |
1536 | URegexMatchCallback **callback, | |
1537 | const void **context, | |
1538 | UErrorCode *status); | |
1539 | ||
729e4ab9 A |
1540 | /** |
1541 | * Function pointer for a regular expression find callback function. | |
1542 | * | |
1543 | * When set, a callback function will be called during a find operation | |
1544 | * and for operations that depend on find, such as findNext, split and some replace | |
1545 | * operations like replaceFirst. | |
1546 | * The callback will usually be called after each attempt at a match, but this is not a | |
1547 | * guarantee that the callback will be invoked at each character. For finds where the | |
1548 | * match engine is invoked at each character, this may be close to true, but less likely | |
1549 | * for more optimized loops where the pattern is known to only start, and the match | |
1550 | * engine invoked, at certain characters. | |
1551 | * When invoked, this callback will specify the index at which a match operation is about | |
1552 | * to be attempted, giving the application the opportunity to terminate a long-running | |
1553 | * find operation. | |
1554 | * | |
1555 | * If the call back function returns FALSE, the find operation will be terminated early. | |
1556 | * | |
1557 | * Note: the callback function must not call other functions on this | |
1558 | * URegularExpression | |
1559 | * | |
1560 | * @param context context pointer. The callback function will be invoked | |
1561 | * with the context specified at the time that | |
1562 | * uregex_setFindProgressCallback() is called. | |
1563 | * @param matchIndex the next index at which a match attempt will be attempted for this | |
1564 | * find operation. If this callback interrupts the search, this is the | |
1565 | * index at which a find/findNext operation may be re-initiated. | |
1566 | * @return TRUE to continue the matching operation. | |
1567 | * FALSE to terminate the matching operation. | |
4388f060 | 1568 | * @stable ICU 4.6 |
729e4ab9 A |
1569 | */ |
1570 | U_CDECL_BEGIN | |
1571 | typedef UBool U_CALLCONV URegexFindProgressCallback ( | |
1572 | const void *context, | |
1573 | int64_t matchIndex); | |
1574 | U_CDECL_END | |
1575 | ||
4388f060 | 1576 | |
729e4ab9 A |
1577 | /** |
1578 | * Set the find progress callback function for this URegularExpression. | |
1579 | * | |
1580 | * @param regexp The compiled regular expression. | |
1581 | * @param callback A pointer to the user-supplied callback function. | |
1582 | * @param context User context pointer. The value supplied at the | |
1583 | * time the callback function is set will be saved | |
1584 | * and passed to the callback each time that it is called. | |
1585 | * @param status A reference to a UErrorCode to receive any errors. | |
4388f060 | 1586 | * @stable ICU 4.6 |
729e4ab9 | 1587 | */ |
51004dcb | 1588 | U_STABLE void U_EXPORT2 |
729e4ab9 A |
1589 | uregex_setFindProgressCallback(URegularExpression *regexp, |
1590 | URegexFindProgressCallback *callback, | |
1591 | const void *context, | |
1592 | UErrorCode *status); | |
1593 | ||
729e4ab9 A |
1594 | /** |
1595 | * Get the find progress callback function for this URegularExpression. | |
1596 | * | |
1597 | * @param regexp The compiled regular expression. | |
4388f060 | 1598 | * @param callback Out parameter, receives a pointer to the user-supplied |
729e4ab9 A |
1599 | * callback function. |
1600 | * @param context Out parameter, receives the user context pointer that | |
1601 | * was set when uregex_setFindProgressCallback() was called. | |
1602 | * @param status A reference to a UErrorCode to receive any errors. | |
4388f060 | 1603 | * @stable ICU 4.6 |
729e4ab9 | 1604 | */ |
51004dcb | 1605 | U_STABLE void U_EXPORT2 |
729e4ab9 A |
1606 | uregex_getFindProgressCallback(const URegularExpression *regexp, |
1607 | URegexFindProgressCallback **callback, | |
1608 | const void **context, | |
1609 | UErrorCode *status); | |
46f4442e | 1610 | |
374ca955 A |
1611 | #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */ |
1612 | #endif /* UREGEX_H */ |