]>
Commit | Line | Data |
---|---|---|
73c04bcf A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
729e4ab9 | 4 | * Copyright (C) 2004-2010, International Business Machines |
73c04bcf A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: utext.h | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2004oct06 | |
14 | * created by: Markus W. Scherer | |
15 | */ | |
16 | ||
17 | #ifndef __UTEXT_H__ | |
18 | #define __UTEXT_H__ | |
19 | ||
20 | /** | |
21 | * \file | |
22 | * \brief C API: Abstract Unicode Text API | |
23 | * | |
24 | * The Text Access API provides a means to allow text that is stored in alternative | |
25 | * formats to work with ICU services. ICU normally operates on text that is | |
26 | * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type | |
27 | * UnicodeString for C++ APIs. | |
28 | * | |
29 | * ICU Text Access allows other formats, such as UTF-8 or non-contiguous | |
30 | * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services. | |
31 | * | |
32 | * There are three general classes of usage for UText: | |
33 | * | |
34 | * Application Level Use. This is the simplest usage - applications would | |
35 | * use one of the utext_open() functions on their input text, and pass | |
36 | * the resulting UText to the desired ICU service. | |
37 | * | |
38 | * Second is usage in ICU Services, such as break iteration, that will need to | |
39 | * operate on input presented to them as a UText. These implementations | |
40 | * will need to use the iteration and related UText functions to gain | |
41 | * access to the actual text. | |
42 | * | |
43 | * The third class of UText users are "text providers." These are the | |
44 | * UText implementations for the various text storage formats. An application | |
45 | * or system with a unique text storage format can implement a set of | |
46 | * UText provider functions for that format, which will then allow | |
47 | * ICU services to operate on that format. | |
48 | * | |
49 | * | |
50 | * <em>Iterating over text</em> | |
51 | * | |
52 | * Here is sample code for a forward iteration over the contents of a UText | |
53 | * | |
54 | * \code | |
55 | * UChar32 c; | |
56 | * UText *ut = whatever(); | |
57 | * | |
58 | * for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) { | |
59 | * // do whatever with the codepoint c here. | |
60 | * } | |
61 | * \endcode | |
62 | * | |
63 | * And here is similar code to iterate in the reverse direction, from the end | |
64 | * of the text towards the beginning. | |
65 | * | |
66 | * \code | |
67 | * UChar32 c; | |
68 | * UText *ut = whatever(); | |
69 | * int textLength = utext_nativeLength(ut); | |
70 | * for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) { | |
71 | * // do whatever with the codepoint c here. | |
72 | * } | |
73 | * \endcode | |
74 | * | |
75 | * <em>Characters and Indexing</em> | |
76 | * | |
77 | * Indexing into text by UText functions is nearly always in terms of the native | |
78 | * indexing of the underlying text storage. The storage format could be UTF-8 | |
79 | * or UTF-32, for example. When coding to the UText access API, no assumptions | |
80 | * can be made regarding the size of characters, or how far an index | |
81 | * may move when iterating between characters. | |
82 | * | |
83 | * All indices supplied to UText functions are pinned to the length of the | |
84 | * text. An out-of-bounds index is not considered to be an error, but is | |
85 | * adjusted to be in the range 0 <= index <= length of input text. | |
86 | * | |
87 | * | |
88 | * When an index position is returned from a UText function, it will be | |
89 | * a native index to the underlying text. In the case of multi-unit characters, | |
90 | * it will always refer to the first position of the character, | |
91 | * never to the interior. This is essentially the same thing as saying that | |
92 | * a returned index will always point to a boundary between characters. | |
93 | * | |
94 | * When a native index is supplied to a UText function, all indices that | |
95 | * refer to any part of a multi-unit character representation are considered | |
96 | * to be equivalent. In the case of multi-unit characters, an incoming index | |
97 | * will be logically normalized to refer to the start of the character. | |
98 | * | |
99 | * It is possible to test whether a native index is on a code point boundary | |
100 | * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex(). | |
101 | * If the index is returned unchanged, it was on a code point boundary. If | |
102 | * an adjusted index is returned, the original index referred to the | |
103 | * interior of a character. | |
104 | * | |
105 | * <em>Conventions for calling UText functions</em> | |
106 | * | |
107 | * Most UText access functions have as their first parameter a (UText *) pointer, | |
108 | * which specifies the UText to be used. Unless otherwise noted, the | |
109 | * pointer must refer to a valid, open UText. Attempting to | |
110 | * use a closed UText or passing a NULL pointer is a programming error and | |
111 | * will produce undefined results or NULL pointer exceptions. | |
112 | * | |
113 | * The UText_Open family of functions can either open an existing (closed) | |
114 | * UText, or heap allocate a new UText. Here is sample code for creating | |
115 | * a stack-allocated UText. | |
116 | * | |
117 | * \code | |
118 | * char *s = whatever(); // A utf-8 string | |
119 | * U_ErrorCode status = U_ZERO_ERROR; | |
120 | * UText ut = UTEXT_INITIALIZER; | |
121 | * utext_openUTF8(ut, s, -1, &status); | |
122 | * if (U_FAILURE(status)) { | |
123 | * // error handling | |
124 | * } else { | |
125 | * // work with the UText | |
126 | * } | |
127 | * \endcode | |
128 | * | |
129 | * Any existing UText passed to an open function _must_ have been initialized, | |
130 | * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated | |
131 | * by an open function. Passing NULL will cause the open function to | |
132 | * heap-allocate and fully initialize a new UText. | |
133 | * | |
134 | */ | |
135 | ||
136 | ||
137 | ||
138 | #include "unicode/utypes.h" | |
729e4ab9 A |
139 | #include "unicode/uchar.h" |
140 | #if U_SHOW_CPLUSPLUS_API | |
141 | #include "unicode/localpointer.h" | |
73c04bcf A |
142 | #include "unicode/rep.h" |
143 | #include "unicode/unistr.h" | |
144 | #include "unicode/chariter.h" | |
145 | #endif | |
146 | ||
147 | ||
148 | U_CDECL_BEGIN | |
149 | ||
150 | struct UText; | |
46f4442e | 151 | typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */ |
73c04bcf A |
152 | |
153 | ||
154 | /*************************************************************************************** | |
155 | * | |
156 | * C Functions for creating UText wrappers around various kinds of text strings. | |
157 | * | |
158 | ****************************************************************************************/ | |
159 | ||
160 | ||
161 | /** | |
162 | * Close function for UText instances. | |
163 | * Cleans up, releases any resources being held by an open UText. | |
164 | * <p> | |
165 | * If the UText was originally allocated by one of the utext_open functions, | |
166 | * the storage associated with the utext will also be freed. | |
167 | * If the UText storage originated with the application, as it would with | |
168 | * a local or static instance, the storage will not be deleted. | |
169 | * | |
170 | * An open UText can be reset to refer to new string by using one of the utext_open() | |
171 | * functions without first closing the UText. | |
172 | * | |
173 | * @param ut The UText to be closed. | |
174 | * @return NULL if the UText struct was deleted by the close. If the UText struct | |
175 | * was originally provided by the caller to the open function, it is | |
176 | * returned by this function, and may be safely used again in | |
177 | * a subsequent utext_open. | |
178 | * | |
46f4442e | 179 | * @stable ICU 3.4 |
73c04bcf | 180 | */ |
46f4442e | 181 | U_STABLE UText * U_EXPORT2 |
73c04bcf A |
182 | utext_close(UText *ut); |
183 | ||
729e4ab9 A |
184 | #if U_SHOW_CPLUSPLUS_API |
185 | ||
186 | U_NAMESPACE_BEGIN | |
187 | ||
188 | /** | |
189 | * \class LocalUTextPointer | |
190 | * "Smart pointer" class, closes a UText via utext_close(). | |
191 | * For most methods see the LocalPointerBase base class. | |
192 | * | |
193 | * @see LocalPointerBase | |
194 | * @see LocalPointer | |
195 | * @stable ICU 4.4 | |
196 | */ | |
197 | U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close); | |
198 | ||
199 | U_NAMESPACE_END | |
200 | ||
201 | #endif | |
73c04bcf A |
202 | |
203 | /** | |
204 | * Open a read-only UText implementation for UTF-8 strings. | |
205 | * | |
206 | * \htmlonly | |
207 | * Any invalid UTF-8 in the input will be handled in this way: | |
208 | * a sequence of bytes that has the form of a truncated, but otherwise valid, | |
209 | * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD. | |
210 | * Any other illegal bytes will each be replaced by a \uFFFD. | |
211 | * \endhtmlonly | |
212 | * | |
213 | * @param ut Pointer to a UText struct. If NULL, a new UText will be created. | |
214 | * If non-NULL, must refer to an initialized UText struct, which will then | |
215 | * be reset to reference the specified UTF-8 string. | |
216 | * @param s A UTF-8 string. Must not be NULL. | |
217 | * @param length The length of the UTF-8 string in bytes, or -1 if the string is | |
218 | * zero terminated. | |
219 | * @param status Errors are returned here. | |
220 | * @return A pointer to the UText. If a pre-allocated UText was provided, it | |
221 | * will always be used and returned. | |
46f4442e | 222 | * @stable ICU 3.4 |
73c04bcf | 223 | */ |
46f4442e | 224 | U_STABLE UText * U_EXPORT2 |
73c04bcf A |
225 | utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status); |
226 | ||
227 | ||
228 | /** | |
229 | * Open a read-only UText for UChar * string. | |
230 | * | |
231 | * @param ut Pointer to a UText struct. If NULL, a new UText will be created. | |
232 | * If non-NULL, must refer to an initialized UText struct, which will then | |
233 | * be reset to reference the specified UChar string. | |
234 | * @param s A UChar (UTF-16) string | |
235 | * @param length The number of UChars in the input string, or -1 if the string is | |
236 | * zero terminated. | |
237 | * @param status Errors are returned here. | |
238 | * @return A pointer to the UText. If a pre-allocated UText was provided, it | |
239 | * will always be used and returned. | |
46f4442e | 240 | * @stable ICU 3.4 |
73c04bcf | 241 | */ |
46f4442e | 242 | U_STABLE UText * U_EXPORT2 |
73c04bcf A |
243 | utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status); |
244 | ||
245 | ||
729e4ab9 | 246 | #if U_SHOW_CPLUSPLUS_API |
73c04bcf A |
247 | /** |
248 | * Open a writable UText for a non-const UnicodeString. | |
249 | * | |
250 | * @param ut Pointer to a UText struct. If NULL, a new UText will be created. | |
251 | * If non-NULL, must refer to an initialized UText struct, which will then | |
252 | * be reset to reference the specified input string. | |
253 | * @param s A UnicodeString. | |
254 | * @param status Errors are returned here. | |
255 | * @return Pointer to the UText. If a UText was supplied as input, this | |
256 | * will always be used and returned. | |
46f4442e | 257 | * @stable ICU 3.4 |
73c04bcf | 258 | */ |
46f4442e A |
259 | U_STABLE UText * U_EXPORT2 |
260 | utext_openUnicodeString(UText *ut, U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status); | |
73c04bcf A |
261 | |
262 | ||
263 | /** | |
264 | * Open a UText for a const UnicodeString. The resulting UText will not be writable. | |
265 | * | |
266 | * @param ut Pointer to a UText struct. If NULL, a new UText will be created. | |
267 | * If non-NULL, must refer to an initialized UText struct, which will then | |
268 | * be reset to reference the specified input string. | |
269 | * @param s A const UnicodeString to be wrapped. | |
270 | * @param status Errors are returned here. | |
271 | * @return Pointer to the UText. If a UText was supplied as input, this | |
272 | * will always be used and returned. | |
46f4442e | 273 | * @stable ICU 3.4 |
73c04bcf | 274 | */ |
46f4442e A |
275 | U_STABLE UText * U_EXPORT2 |
276 | utext_openConstUnicodeString(UText *ut, const U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status); | |
73c04bcf A |
277 | |
278 | ||
279 | /** | |
280 | * Open a writable UText implementation for an ICU Replaceable object. | |
281 | * @param ut Pointer to a UText struct. If NULL, a new UText will be created. | |
282 | * If non-NULL, must refer to an already existing UText, which will then | |
283 | * be reset to reference the specified replaceable text. | |
284 | * @param rep A Replaceable text object. | |
285 | * @param status Errors are returned here. | |
286 | * @return Pointer to the UText. If a UText was supplied as input, this | |
287 | * will always be used and returned. | |
288 | * @see Replaceable | |
46f4442e | 289 | * @stable ICU 3.4 |
73c04bcf | 290 | */ |
46f4442e A |
291 | U_STABLE UText * U_EXPORT2 |
292 | utext_openReplaceable(UText *ut, U_NAMESPACE_QUALIFIER Replaceable *rep, UErrorCode *status); | |
73c04bcf A |
293 | |
294 | /** | |
295 | * Open a UText implementation over an ICU CharacterIterator. | |
296 | * @param ut Pointer to a UText struct. If NULL, a new UText will be created. | |
297 | * If non-NULL, must refer to an already existing UText, which will then | |
298 | * be reset to reference the specified replaceable text. | |
299 | * @param ci A Character Iterator. | |
300 | * @param status Errors are returned here. | |
301 | * @return Pointer to the UText. If a UText was supplied as input, this | |
302 | * will always be used and returned. | |
303 | * @see Replaceable | |
46f4442e | 304 | * @stable ICU 3.4 |
73c04bcf | 305 | */ |
46f4442e A |
306 | U_STABLE UText * U_EXPORT2 |
307 | utext_openCharacterIterator(UText *ut, U_NAMESPACE_QUALIFIER CharacterIterator *ic, UErrorCode *status); | |
73c04bcf A |
308 | |
309 | #endif | |
310 | ||
311 | ||
312 | /** | |
313 | * Clone a UText. This is much like opening a UText where the source text is itself | |
314 | * another UText. | |
315 | * | |
316 | * A deep clone will copy both the UText data structures and the underlying text. | |
317 | * The original and cloned UText will operate completely independently; modifications | |
318 | * made to the text in one will not affect the other. Text providers are not | |
319 | * required to support deep clones. The user of clone() must check the status return | |
320 | * and be prepared to handle failures. | |
321 | * | |
322 | * The standard UText implementations for UTF8, UChar *, UnicodeString and | |
323 | * Replaceable all support deep cloning. | |
324 | * | |
325 | * The UText returned from a deep clone will be writable, assuming that the text | |
326 | * provider is able to support writing, even if the source UText had been made | |
327 | * non-writable by means of UText_freeze(). | |
328 | * | |
329 | * A shallow clone replicates only the UText data structures; it does not make | |
330 | * a copy of the underlying text. Shallow clones can be used as an efficient way to | |
331 | * have multiple iterators active in a single text string that is not being | |
332 | * modified. | |
333 | * | |
334 | * A shallow clone operation will not fail, barring truly exceptional conditions such | |
335 | * as memory allocation failures. | |
336 | * | |
337 | * Shallow UText clones should be avoided if the UText functions that modify the | |
338 | * text are expected to be used, either on the original or the cloned UText. | |
339 | * Any such modifications can cause unpredictable behavior. Read Only | |
340 | * shallow clones provide some protection against errors of this type by | |
341 | * disabling text modification via the cloned UText. | |
342 | * | |
343 | * A shallow clone made with the readOnly parameter == FALSE will preserve the | |
344 | * utext_isWritable() state of the source object. Note, however, that | |
345 | * write operations must be avoided while more than one UText exists that refer | |
346 | * to the same underlying text. | |
347 | * | |
348 | * A UText and its clone may be safely concurrently accessed by separate threads. | |
349 | * This is true for read access only with shallow clones, and for both read and | |
350 | * write access with deep clones. | |
351 | * It is the responsibility of the Text Provider to ensure that this thread safety | |
352 | * constraint is met. | |
353 | * | |
354 | * @param dest A UText struct to be filled in with the result of the clone operation, | |
355 | * or NULL if the clone function should heap-allocate a new UText struct. | |
356 | * If non-NULL, must refer to an already existing UText, which will then | |
357 | * be reset to become the clone. | |
358 | * @param src The UText to be cloned. | |
359 | * @param deep TRUE to request a deep clone, FALSE for a shallow clone. | |
360 | * @param readOnly TRUE to request that the cloned UText have read only access to the | |
361 | * underlying text. | |
362 | ||
363 | * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR | |
364 | * will be returned if the text provider is unable to clone the | |
365 | * original text. | |
366 | * @return The newly created clone, or NULL if the clone operation failed. | |
46f4442e | 367 | * @stable ICU 3.4 |
73c04bcf | 368 | */ |
46f4442e | 369 | U_STABLE UText * U_EXPORT2 |
73c04bcf A |
370 | utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status); |
371 | ||
372 | ||
373 | /** | |
374 | * Compare two UText objects for equality. | |
375 | * UTexts are equal if they are iterating over the same text, and | |
376 | * have the same iteration position within the text. | |
377 | * If either or both of the parameters are NULL, the comparison is FALSE. | |
378 | * | |
379 | * @param a The first of the two UTexts to compare. | |
380 | * @param b The other UText to be compared. | |
381 | * @return TRUE if the two UTexts are equal. | |
46f4442e | 382 | * @stable ICU 3.6 |
73c04bcf | 383 | */ |
46f4442e | 384 | U_STABLE UBool U_EXPORT2 |
73c04bcf A |
385 | utext_equals(const UText *a, const UText *b); |
386 | ||
387 | ||
388 | /***************************************************************************** | |
389 | * | |
390 | * Functions to work with the text represeted by a UText wrapper | |
391 | * | |
392 | *****************************************************************************/ | |
393 | ||
394 | /** | |
395 | * Get the length of the text. Depending on the characteristics | |
396 | * of the underlying text representation, this may be expensive. | |
397 | * @see utext_isLengthExpensive() | |
398 | * | |
399 | * | |
400 | * @param ut the text to be accessed. | |
401 | * @return the length of the text, expressed in native units. | |
402 | * | |
46f4442e | 403 | * @stable ICU 3.4 |
73c04bcf | 404 | */ |
46f4442e | 405 | U_STABLE int64_t U_EXPORT2 |
73c04bcf A |
406 | utext_nativeLength(UText *ut); |
407 | ||
408 | /** | |
409 | * Return TRUE if calculating the length of the text could be expensive. | |
410 | * Finding the length of NUL terminated strings is considered to be expensive. | |
411 | * | |
412 | * Note that the value of this function may change | |
413 | * as the result of other operations on a UText. | |
414 | * Once the length of a string has been discovered, it will no longer | |
415 | * be expensive to report it. | |
416 | * | |
417 | * @param ut the text to be accessed. | |
418 | * @return TRUE if determining the length of the text could be time consuming. | |
46f4442e | 419 | * @stable ICU 3.4 |
73c04bcf | 420 | */ |
46f4442e | 421 | U_STABLE UBool U_EXPORT2 |
73c04bcf A |
422 | utext_isLengthExpensive(const UText *ut); |
423 | ||
424 | /** | |
425 | * Returns the code point at the requested index, | |
426 | * or U_SENTINEL (-1) if it is out of bounds. | |
427 | * | |
428 | * If the specified index points to the interior of a multi-unit | |
429 | * character - one of the trail bytes of a UTF-8 sequence, for example - | |
430 | * the complete code point will be returned. | |
431 | * | |
432 | * The iteration position will be set to the start of the returned code point. | |
433 | * | |
434 | * This function is roughly equivalent to the the sequence | |
435 | * utext_setNativeIndex(index); | |
436 | * utext_current32(); | |
437 | * (There is a subtle difference if the index is out of bounds by being less than zero - | |
438 | * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current() | |
439 | * will return the char at zero. utext_char32At(negative index), on the other hand, will | |
440 | * return the U_SENTINEL value of -1.) | |
441 | * | |
442 | * @param ut the text to be accessed | |
443 | * @param nativeIndex the native index of the character to be accessed. If the index points | |
444 | * to other than the first unit of a multi-unit character, it will be adjusted | |
445 | * to the start of the character. | |
446 | * @return the code point at the specified index. | |
46f4442e | 447 | * @stable ICU 3.4 |
73c04bcf | 448 | */ |
46f4442e | 449 | U_STABLE UChar32 U_EXPORT2 |
73c04bcf A |
450 | utext_char32At(UText *ut, int64_t nativeIndex); |
451 | ||
452 | ||
453 | /** | |
454 | * | |
455 | * Get the code point at the current iteration position, | |
456 | * or U_SENTINEL (-1) if the iteration has reached the end of | |
457 | * the input text. | |
458 | * | |
459 | * @param ut the text to be accessed. | |
460 | * @return the Unicode code point at the current iterator position. | |
46f4442e | 461 | * @stable ICU 3.4 |
73c04bcf | 462 | */ |
46f4442e | 463 | U_STABLE UChar32 U_EXPORT2 |
73c04bcf A |
464 | utext_current32(UText *ut); |
465 | ||
466 | ||
467 | /** | |
468 | * Get the code point at the current iteration position of the UText, and | |
469 | * advance the position to the first index following the character. | |
470 | * | |
471 | * If the position is at the end of the text (the index following | |
472 | * the last character, which is also the length of the text), | |
473 | * return U_SENTINEL (-1) and do not advance the index. | |
474 | * | |
475 | * This is a post-increment operation. | |
476 | * | |
477 | * An inline macro version of this function, UTEXT_NEXT32(), | |
478 | * is available for performance critical use. | |
479 | * | |
480 | * @param ut the text to be accessed. | |
481 | * @return the Unicode code point at the iteration position. | |
482 | * @see UTEXT_NEXT32 | |
46f4442e | 483 | * @stable ICU 3.4 |
73c04bcf | 484 | */ |
46f4442e | 485 | U_STABLE UChar32 U_EXPORT2 |
73c04bcf A |
486 | utext_next32(UText *ut); |
487 | ||
488 | ||
489 | /** | |
490 | * Move the iterator position to the character (code point) whose | |
491 | * index precedes the current position, and return that character. | |
492 | * This is a pre-decrement operation. | |
493 | * | |
494 | * If the initial position is at the start of the text (index of 0) | |
495 | * return U_SENTINEL (-1), and leave the position unchanged. | |
496 | * | |
497 | * An inline macro version of this function, UTEXT_PREVIOUS32(), | |
498 | * is available for performance critical use. | |
499 | * | |
500 | * @param ut the text to be accessed. | |
501 | * @return the previous UChar32 code point, or U_SENTINEL (-1) | |
502 | * if the iteration has reached the start of the text. | |
503 | * @see UTEXT_PREVIOUS32 | |
46f4442e | 504 | * @stable ICU 3.4 |
73c04bcf | 505 | */ |
46f4442e | 506 | U_STABLE UChar32 U_EXPORT2 |
73c04bcf A |
507 | utext_previous32(UText *ut); |
508 | ||
509 | ||
510 | /** | |
511 | * Set the iteration index and return the code point at that index. | |
512 | * Leave the iteration index at the start of the following code point. | |
513 | * | |
514 | * This function is the most efficient and convenient way to | |
515 | * begin a forward iteration. The results are identical to the those | |
516 | * from the sequence | |
517 | * \code | |
518 | * utext_setIndex(); | |
519 | * utext_next32(); | |
520 | * \endcode | |
521 | * | |
522 | * @param ut the text to be accessed. | |
523 | * @param nativeIndex Iteration index, in the native units of the text provider. | |
524 | * @return Code point which starts at or before index, | |
525 | * or U_SENTINEL (-1) if it is out of bounds. | |
46f4442e | 526 | * @stable ICU 3.4 |
73c04bcf | 527 | */ |
46f4442e | 528 | U_STABLE UChar32 U_EXPORT2 |
73c04bcf A |
529 | utext_next32From(UText *ut, int64_t nativeIndex); |
530 | ||
531 | ||
532 | ||
533 | /** | |
534 | * Set the iteration index, and return the code point preceding the | |
535 | * one specified by the initial index. Leave the iteration position | |
536 | * at the start of the returned code point. | |
537 | * | |
538 | * This function is the most efficient and convenient way to | |
539 | * begin a backwards iteration. | |
540 | * | |
541 | * @param ut the text to be accessed. | |
542 | * @param nativeIndex Iteration index in the native units of the text provider. | |
543 | * @return Code point preceding the one at the initial index, | |
544 | * or U_SENTINEL (-1) if it is out of bounds. | |
545 | * | |
46f4442e | 546 | * @stable ICU 3.4 |
73c04bcf | 547 | */ |
46f4442e | 548 | U_STABLE UChar32 U_EXPORT2 |
73c04bcf A |
549 | utext_previous32From(UText *ut, int64_t nativeIndex); |
550 | ||
551 | /** | |
552 | * Get the current iterator position, which can range from 0 to | |
553 | * the length of the text. | |
554 | * The position is a native index into the input text, in whatever format it | |
555 | * may have (possibly UTF-8 for example), and may not always be the same as | |
556 | * the corresponding UChar (UTF-16) index. | |
557 | * The returned position will always be aligned to a code point boundary. | |
558 | * | |
559 | * @param ut the text to be accessed. | |
560 | * @return the current index position, in the native units of the text provider. | |
46f4442e | 561 | * @stable ICU 3.4 |
73c04bcf | 562 | */ |
46f4442e | 563 | U_STABLE int64_t U_EXPORT2 |
73c04bcf A |
564 | utext_getNativeIndex(const UText *ut); |
565 | ||
566 | /** | |
567 | * Set the current iteration position to the nearest code point | |
568 | * boundary at or preceding the specified index. | |
569 | * The index is in the native units of the original input text. | |
570 | * If the index is out of range, it will be pinned to be within | |
571 | * the range of the input text. | |
572 | * <p> | |
573 | * It will usually be more efficient to begin an iteration | |
574 | * using the functions utext_next32From() or utext_previous32From() | |
575 | * rather than setIndex(). | |
576 | * <p> | |
577 | * Moving the index position to an adjacent character is best done | |
578 | * with utext_next32(), utext_previous32() or utext_moveIndex32(). | |
579 | * Attempting to do direct arithmetic on the index position is | |
580 | * complicated by the fact that the size (in native units) of a | |
581 | * character depends on the underlying representation of the character | |
582 | * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not | |
583 | * easily knowable. | |
584 | * | |
585 | * @param ut the text to be accessed. | |
586 | * @param nativeIndex the native unit index of the new iteration position. | |
46f4442e | 587 | * @stable ICU 3.4 |
73c04bcf | 588 | */ |
46f4442e | 589 | U_STABLE void U_EXPORT2 |
73c04bcf A |
590 | utext_setNativeIndex(UText *ut, int64_t nativeIndex); |
591 | ||
592 | /** | |
593 | * Move the iterator postion by delta code points. The number of code points | |
594 | * is a signed number; a negative delta will move the iterator backwards, | |
595 | * towards the start of the text. | |
596 | * <p> | |
597 | * The index is moved by <code>delta</code> code points | |
598 | * forward or backward, but no further backward than to 0 and | |
599 | * no further forward than to utext_nativeLength(). | |
600 | * The resulting index value will be in between 0 and length, inclusive. | |
601 | * | |
602 | * @param ut the text to be accessed. | |
603 | * @param delta the signed number of code points to move the iteration position. | |
604 | * @return TRUE if the position could be moved the requested number of positions while | |
605 | * staying within the range [0 - text length]. | |
46f4442e | 606 | * @stable ICU 3.4 |
73c04bcf | 607 | */ |
46f4442e | 608 | U_STABLE UBool U_EXPORT2 |
73c04bcf A |
609 | utext_moveIndex32(UText *ut, int32_t delta); |
610 | ||
611 | /** | |
612 | * Get the native index of the character preceeding the current position. | |
613 | * If the iteration position is already at the start of the text, zero | |
614 | * is returned. | |
615 | * The value returned is the same as that obtained from the following sequence, | |
616 | * but without the side effect of changing the iteration position. | |
617 | * | |
618 | * \code | |
619 | * UText *ut = whatever; | |
620 | * ... | |
621 | * utext_previous(ut) | |
622 | * utext_getNativeIndex(ut); | |
623 | * \endcode | |
624 | * | |
625 | * This function is most useful during forwards iteration, where it will get the | |
626 | * native index of the character most recently returned from utext_next(). | |
627 | * | |
628 | * @param ut the text to be accessed | |
629 | * @return the native index of the character preceeding the current index position, | |
630 | * or zero if the current position is at the start of the text. | |
46f4442e | 631 | * @stable ICU 3.6 |
73c04bcf | 632 | */ |
46f4442e | 633 | U_STABLE int64_t U_EXPORT2 |
73c04bcf A |
634 | utext_getPreviousNativeIndex(UText *ut); |
635 | ||
636 | ||
637 | /** | |
638 | * | |
639 | * Extract text from a UText into a UChar buffer. The range of text to be extracted | |
640 | * is specified in the native indices of the UText provider. These may not necessarily | |
641 | * be UTF-16 indices. | |
642 | * <p> | |
643 | * The size (number of 16 bit UChars) of the data to be extracted is returned. The | |
644 | * full number of UChars is returned, even when the extracted text is truncated | |
645 | * because the specified buffer size is too small. | |
646 | * <p> | |
647 | * The extracted string will (if you are a user) / must (if you are a text provider) | |
648 | * be NUL-terminated if there is sufficient space in the destination buffer. This | |
649 | * terminating NUL is not included in the returned length. | |
650 | * <p> | |
651 | * The iteration index is left at the position following the last extracted character. | |
652 | * | |
653 | * @param ut the UText from which to extract data. | |
654 | * @param nativeStart the native index of the first character to extract.\ | |
655 | * If the specified index is out of range, | |
656 | * it will be pinned to to be within 0 <= index <= textLength | |
657 | * @param nativeLimit the native string index of the position following the last | |
658 | * character to extract. If the specified index is out of range, | |
659 | * it will be pinned to to be within 0 <= index <= textLength. | |
660 | * nativeLimit must be >= nativeStart. | |
661 | * @param dest the UChar (UTF-16) buffer into which the extracted text is placed | |
662 | * @param destCapacity The size, in UChars, of the destination buffer. May be zero | |
663 | * for precomputing the required size. | |
664 | * @param status receives any error status. | |
665 | * U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the | |
666 | * buffer was too small. Returns number of UChars for preflighting. | |
667 | * @return Number of UChars in the data to be extracted. Does not include a trailing NUL. | |
668 | * | |
46f4442e | 669 | * @stable ICU 3.4 |
73c04bcf | 670 | */ |
46f4442e | 671 | U_STABLE int32_t U_EXPORT2 |
73c04bcf A |
672 | utext_extract(UText *ut, |
673 | int64_t nativeStart, int64_t nativeLimit, | |
674 | UChar *dest, int32_t destCapacity, | |
675 | UErrorCode *status); | |
676 | ||
677 | ||
729e4ab9 A |
678 | /** |
679 | * Compare two UTexts (binary order). The comparison begins at each source text's | |
680 | * iteration position. The iteration position of each UText will be left following | |
681 | * the last character compared. | |
682 | * | |
683 | * The comparison is done in code point order; unlike u_strCompare, you | |
684 | * cannot choose to use code unit order. This is because the characters | |
685 | * in a UText are accessed one code point at a time, and may not be from a UTF-16 | |
686 | * context. | |
687 | * | |
688 | * This functions works with strings of different explicitly specified lengths | |
689 | * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. | |
690 | * A length argument of -1 signifies that as much of the string should be used as | |
691 | * is necessary to compare with the other string. If both length arguments are -1, | |
692 | * the entire remaining portionss of both strings are used. | |
693 | * | |
694 | * @param s1 First source string. | |
695 | * @param length1 Length of first source string in UTF-32 code points. | |
696 | * | |
697 | * @param s2 Second source string. | |
698 | * @param length2 Length of second source string in UTF-32 code points. | |
699 | * | |
700 | * @return <0 or 0 or >0 as usual for string comparisons | |
701 | * | |
702 | * @internal ICU 4.4 technology preview | |
703 | */ | |
704 | U_INTERNAL int32_t U_EXPORT2 | |
705 | utext_compare(UText *s1, int32_t length1, | |
706 | UText *s2, int32_t length2); | |
707 | ||
708 | /** | |
709 | * Compare two UTexts (binary order). The comparison begins at each source text's | |
710 | * iteration position. The iteration position of each UText will be left following | |
711 | * the last character compared. This method differs from utext_compare in that | |
712 | * it accepts native limits rather than lengths for each string. | |
713 | * | |
714 | * The comparison is done in code point order; unlike u_strCompare, you | |
715 | * cannot choose to use code unit order. This is because the characters | |
716 | * in a UText are accessed one code point at a time, and may not be from a UTF-16 | |
717 | * context. | |
718 | * | |
719 | * This functions works with strings of different explicitly specified lengths | |
720 | * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. | |
721 | * A limit argument of -1 signifies that as much of the string should be used as | |
722 | * is necessary to compare with the other string. If both limit arguments are -1, | |
723 | * the entire remaining portionss of both strings are used. | |
724 | * | |
725 | * @param s1 First source string. | |
726 | * @param limit1 Native index of the last character in the first source string to be considered. | |
727 | * | |
728 | * @param s2 Second source string. | |
729 | * @param limit2 Native index of the last character in the second source string to be considered. | |
730 | * | |
731 | * @return <0 or 0 or >0 as usual for string comparisons | |
732 | * | |
733 | * @internal ICU 4.4 technology preview | |
734 | */ | |
735 | U_INTERNAL int32_t U_EXPORT2 | |
736 | utext_compareNativeLimit(UText *s1, int64_t limit1, | |
737 | UText *s2, int64_t limit2); | |
738 | ||
739 | /** | |
740 | * Compare two UTexts case-insensitively using full case folding. The comparison | |
741 | * begins at each source text's iteration position. The iteration position of each | |
742 | * UText will be left following the last character compared. | |
743 | * | |
744 | * The comparison is done in code point order; this is because the characters | |
745 | * in a UText are accessed one code point at a time, and may not be from a UTF-16 | |
746 | * context. | |
747 | * | |
748 | * This functions works with strings of different explicitly specified lengths | |
749 | * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. | |
750 | * A length argument of -1 signifies that as much of the string should be used as | |
751 | * is necessary to compare with the other string. If both length arguments are -1, | |
752 | * the entire remaining portionss of both strings are used. | |
753 | * | |
754 | * @param s1 First source string. | |
755 | * @param length1 Length of first source string in UTF-32 code points. | |
756 | * | |
757 | * @param s2 Second source string. | |
758 | * @param length2 Length of second source string in UTF-32 code points. | |
759 | * | |
760 | * @param options A bit set of options: | |
761 | * - U_FOLD_CASE_DEFAULT or 0 is used for default options: | |
762 | * Comparison in code point order with default case folding. | |
763 | * | |
764 | * - U_FOLD_CASE_EXCLUDE_SPECIAL_I | |
765 | * | |
766 | * @param pErrorCode Must be a valid pointer to an error code value, | |
767 | * which must not indicate a failure before the function call. | |
768 | * | |
769 | * @return <0 or 0 or >0 as usual for string comparisons | |
770 | * | |
771 | * @internal ICU 4.4 technology preview | |
772 | */ | |
773 | U_INTERNAL int32_t U_EXPORT2 | |
774 | utext_caseCompare(UText *s1, int32_t length1, | |
775 | UText *s2, int32_t length2, | |
776 | uint32_t options, UErrorCode *pErrorCode); | |
777 | ||
778 | /** | |
779 | * Compare two UTexts case-insensitively using full case folding. The comparison | |
780 | * begins at each source text's iteration position. The iteration position of each | |
781 | * UText will be left following the last character compared. This method differs from | |
782 | * utext_caseCompare in that it accepts native limits rather than lengths for each | |
783 | * string. | |
784 | * | |
785 | * The comparison is done in code point order; this is because the characters | |
786 | * in a UText are accessed one code point at a time, and may not be from a UTF-16 | |
787 | * context. | |
788 | * | |
789 | * This functions works with strings of different explicitly specified lengths | |
790 | * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. | |
791 | * A limit argument of -1 signifies that as much of the string should be used as | |
792 | * is necessary to compare with the other string. If both length arguments are -1, | |
793 | * the entire remaining portionss of both strings are used. | |
794 | * | |
795 | * @param s1 First source string. | |
796 | * @param limit1 Native index of the last character in the first source string to be considered. | |
797 | * | |
798 | * @param s2 Second source string. | |
799 | * @param limit2 Native index of the last character in the second source string to be considered. | |
800 | * | |
801 | * @param options A bit set of options: | |
802 | * - U_FOLD_CASE_DEFAULT or 0 is used for default options: | |
803 | * Comparison in code point order with default case folding. | |
804 | * | |
805 | * - U_FOLD_CASE_EXCLUDE_SPECIAL_I | |
806 | * | |
807 | * @param pErrorCode Must be a valid pointer to an error code value, | |
808 | * which must not indicate a failure before the function call. | |
809 | * | |
810 | * @return <0 or 0 or >0 as usual for string comparisons | |
811 | * | |
812 | * @internal ICU 4.4 technology preview | |
813 | */ | |
814 | U_INTERNAL int32_t U_EXPORT2 | |
815 | utext_caseCompareNativeLimit(UText *s1, int64_t limit1, | |
816 | UText *s2, int64_t limit2, | |
817 | uint32_t options, UErrorCode *pErrorCode); | |
818 | ||
819 | ||
73c04bcf A |
820 | /************************************************************************************ |
821 | * | |
822 | * #define inline versions of selected performance-critical text access functions | |
823 | * Caution: do not use auto increment++ or decrement-- expressions | |
824 | * as parameters to these macros. | |
825 | * | |
826 | * For most use, where there is no extreme performance constraint, the | |
827 | * normal, non-inline functions are a better choice. The resulting code | |
828 | * will be smaller, and, if the need ever arises, easier to debug. | |
829 | * | |
830 | * These are implemented as #defines rather than real functions | |
831 | * because there is no fully portable way to do inline functions in plain C. | |
832 | * | |
833 | ************************************************************************************/ | |
834 | ||
729e4ab9 A |
835 | /** |
836 | * inline version of utext_current32(), for performance-critical situations. | |
837 | * | |
838 | * Get the code point at the current iteration position of the UText. | |
839 | * Returns U_SENTINEL (-1) if the position is at the end of the | |
840 | * text. | |
841 | * | |
842 | * @internal ICU 4.4 technology preview | |
843 | */ | |
844 | #define UTEXT_CURRENT32(ut) \ | |
845 | ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ | |
846 | ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut)) | |
847 | ||
73c04bcf A |
848 | /** |
849 | * inline version of utext_next32(), for performance-critical situations. | |
850 | * | |
851 | * Get the code point at the current iteration position of the UText, and | |
852 | * advance the position to the first index following the character. | |
853 | * This is a post-increment operation. | |
854 | * Returns U_SENTINEL (-1) if the position is at the end of the | |
855 | * text. | |
856 | * | |
46f4442e | 857 | * @stable ICU 3.4 |
73c04bcf A |
858 | */ |
859 | #define UTEXT_NEXT32(ut) \ | |
860 | ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ | |
861 | ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut)) | |
862 | ||
863 | /** | |
864 | * inline version of utext_previous32(), for performance-critical situations. | |
865 | * | |
866 | * Move the iterator position to the character (code point) whose | |
867 | * index precedes the current position, and return that character. | |
868 | * This is a pre-decrement operation. | |
869 | * Returns U_SENTINEL (-1) if the position is at the start of the text. | |
870 | * | |
46f4442e | 871 | * @stable ICU 3.4 |
73c04bcf A |
872 | */ |
873 | #define UTEXT_PREVIOUS32(ut) \ | |
874 | ((ut)->chunkOffset > 0 && \ | |
875 | (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \ | |
876 | (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut)) | |
877 | ||
878 | /** | |
879 | * inline version of utext_getNativeIndex(), for performance-critical situations. | |
880 | * | |
881 | * Get the current iterator position, which can range from 0 to | |
882 | * the length of the text. | |
883 | * The position is a native index into the input text, in whatever format it | |
884 | * may have (possibly UTF-8 for example), and may not always be the same as | |
885 | * the corresponding UChar (UTF-16) index. | |
886 | * The returned position will always be aligned to a code point boundary. | |
887 | * | |
46f4442e | 888 | * @stable ICU 3.6 |
73c04bcf A |
889 | */ |
890 | #define UTEXT_GETNATIVEINDEX(ut) \ | |
891 | ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \ | |
892 | (ut)->chunkNativeStart+(ut)->chunkOffset : \ | |
893 | (ut)->pFuncs->mapOffsetToNative(ut)) | |
894 | ||
895 | /** | |
896 | * inline version of utext_setNativeIndex(), for performance-critical situations. | |
897 | * | |
898 | * Set the current iteration position to the nearest code point | |
899 | * boundary at or preceding the specified index. | |
900 | * The index is in the native units of the original input text. | |
901 | * If the index is out of range, it will be pinned to be within | |
902 | * the range of the input text. | |
903 | * | |
729e4ab9 | 904 | * @stable ICU 3.8 |
73c04bcf A |
905 | */ |
906 | #define UTEXT_SETNATIVEINDEX(ut, ix) \ | |
907 | { int64_t __offset = (ix) - (ut)->chunkNativeStart; \ | |
908 | if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \ | |
909 | (ut)->chunkOffset=(int32_t)__offset; \ | |
910 | } else { \ | |
911 | utext_setNativeIndex((ut), (ix)); } } | |
912 | ||
913 | ||
914 | ||
73c04bcf A |
915 | /************************************************************************************ |
916 | * | |
917 | * Functions related to writing or modifying the text. | |
918 | * These will work only with modifiable UTexts. Attempting to | |
919 | * modify a read-only UText will return an error status. | |
920 | * | |
921 | ************************************************************************************/ | |
922 | ||
923 | ||
924 | /** | |
925 | * Return TRUE if the text can be written (modified) with utext_replace() or | |
926 | * utext_copy(). For the text to be writable, the text provider must | |
927 | * be of a type that supports writing and the UText must not be frozen. | |
928 | * | |
929 | * Attempting to modify text when utext_isWriteable() is FALSE will fail - | |
930 | * the text will not be modified, and an error will be returned from the function | |
931 | * that attempted the modification. | |
932 | * | |
933 | * @param ut the UText to be tested. | |
934 | * @return TRUE if the text is modifiable. | |
935 | * | |
936 | * @see utext_freeze() | |
937 | * @see utext_replace() | |
938 | * @see utext_copy() | |
46f4442e | 939 | * @stable ICU 3.4 |
73c04bcf A |
940 | * |
941 | */ | |
46f4442e | 942 | U_STABLE UBool U_EXPORT2 |
73c04bcf A |
943 | utext_isWritable(const UText *ut); |
944 | ||
945 | ||
946 | /** | |
947 | * Test whether there is meta data associated with the text. | |
948 | * @see Replaceable::hasMetaData() | |
949 | * | |
950 | * @param ut The UText to be tested | |
951 | * @return TRUE if the underlying text includes meta data. | |
46f4442e | 952 | * @stable ICU 3.4 |
73c04bcf | 953 | */ |
46f4442e | 954 | U_STABLE UBool U_EXPORT2 |
73c04bcf A |
955 | utext_hasMetaData(const UText *ut); |
956 | ||
957 | ||
958 | /** | |
959 | * Replace a range of the original text with a replacement text. | |
960 | * | |
961 | * Leaves the current iteration position at the position following the | |
962 | * newly inserted replacement text. | |
963 | * | |
964 | * This function is only available on UText types that support writing, | |
965 | * that is, ones where utext_isWritable() returns TRUE. | |
966 | * | |
967 | * When using this function, there should be only a single UText opened onto the | |
968 | * underlying native text string. Behavior after a replace operation | |
969 | * on a UText is undefined for any other additional UTexts that refer to the | |
970 | * modified string. | |
971 | * | |
972 | * @param ut the UText representing the text to be operated on. | |
973 | * @param nativeStart the native index of the start of the region to be replaced | |
974 | * @param nativeLimit the native index of the character following the region to be replaced. | |
975 | * @param replacementText pointer to the replacement text | |
976 | * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated. | |
977 | * @param status receives any error status. Possible errors include | |
978 | * U_NO_WRITE_PERMISSION | |
979 | * | |
980 | * @return The signed number of (native) storage units by which | |
981 | * the length of the text expanded or contracted. | |
982 | * | |
46f4442e | 983 | * @stable ICU 3.4 |
73c04bcf | 984 | */ |
46f4442e | 985 | U_STABLE int32_t U_EXPORT2 |
73c04bcf A |
986 | utext_replace(UText *ut, |
987 | int64_t nativeStart, int64_t nativeLimit, | |
988 | const UChar *replacementText, int32_t replacementLength, | |
989 | UErrorCode *status); | |
990 | ||
991 | ||
992 | ||
993 | /** | |
994 | * | |
995 | * Copy or move a substring from one position to another within the text, | |
996 | * while retaining any metadata associated with the text. | |
997 | * This function is used to duplicate or reorder substrings. | |
998 | * The destination index must not overlap the source range. | |
999 | * | |
1000 | * The text to be copied or moved is inserted at destIndex; | |
1001 | * it does not replace or overwrite any existing text. | |
1002 | * | |
1003 | * The iteration position is left following the newly inserted text | |
1004 | * at the destination position. | |
1005 | * | |
1006 | * This function is only available on UText types that support writing, | |
1007 | * that is, ones where utext_isWritable() returns TRUE. | |
1008 | * | |
1009 | * When using this function, there should be only a single UText opened onto the | |
1010 | * underlying native text string. Behavior after a copy operation | |
1011 | * on a UText is undefined in any other additional UTexts that refer to the | |
1012 | * modified string. | |
1013 | * | |
1014 | * @param ut The UText representing the text to be operated on. | |
1015 | * @param nativeStart The native index of the start of the region to be copied or moved | |
1016 | * @param nativeLimit The native index of the character position following the region | |
1017 | * to be copied. | |
1018 | * @param destIndex The native destination index to which the source substring is | |
1019 | * copied or moved. | |
1020 | * @param move If TRUE, then the substring is moved, not copied/duplicated. | |
1021 | * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION | |
1022 | * | |
46f4442e | 1023 | * @stable ICU 3.4 |
73c04bcf | 1024 | */ |
46f4442e | 1025 | U_STABLE void U_EXPORT2 |
73c04bcf A |
1026 | utext_copy(UText *ut, |
1027 | int64_t nativeStart, int64_t nativeLimit, | |
1028 | int64_t destIndex, | |
1029 | UBool move, | |
1030 | UErrorCode *status); | |
1031 | ||
1032 | ||
1033 | /** | |
1034 | * <p> | |
1035 | * Freeze a UText. This prevents any modification to the underlying text itself | |
1036 | * by means of functions operating on this UText. | |
1037 | * </p> | |
1038 | * <p> | |
1039 | * Once frozen, a UText can not be unfrozen. The intent is to ensure | |
1040 | * that a the text underlying a frozen UText wrapper cannot be modified via that UText. | |
1041 | * </p> | |
1042 | * <p> | |
1043 | * Caution: freezing a UText will disable changes made via the specific | |
1044 | * frozen UText wrapper only; it will not have any effect on the ability to | |
1045 | * directly modify the text by bypassing the UText. Any such backdoor modifications | |
1046 | * are always an error while UText access is occuring because the underlying | |
1047 | * text can get out of sync with UText's buffering. | |
1048 | * </p> | |
1049 | * | |
1050 | * @param ut The UText to be frozen. | |
1051 | * @see utext_isWritable() | |
46f4442e | 1052 | * @stable ICU 3.6 |
73c04bcf | 1053 | */ |
46f4442e | 1054 | U_STABLE void U_EXPORT2 |
73c04bcf A |
1055 | utext_freeze(UText *ut); |
1056 | ||
1057 | ||
73c04bcf A |
1058 | /** |
1059 | * UText provider properties (bit field indexes). | |
1060 | * | |
1061 | * @see UText | |
46f4442e | 1062 | * @stable ICU 3.4 |
73c04bcf A |
1063 | */ |
1064 | enum { | |
1065 | /** | |
1066 | * It is potentially time consuming for the provider to determine the length of the text. | |
46f4442e | 1067 | * @stable ICU 3.4 |
73c04bcf A |
1068 | */ |
1069 | UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1, | |
1070 | /** | |
1071 | * Text chunks remain valid and usable until the text object is modified or | |
1072 | * deleted, not just until the next time the access() function is called | |
1073 | * (which is the default). | |
46f4442e | 1074 | * @stable ICU 3.4 |
73c04bcf A |
1075 | */ |
1076 | UTEXT_PROVIDER_STABLE_CHUNKS = 2, | |
1077 | /** | |
1078 | * The provider supports modifying the text via the replace() and copy() | |
1079 | * functions. | |
1080 | * @see Replaceable | |
46f4442e | 1081 | * @stable ICU 3.4 |
73c04bcf A |
1082 | */ |
1083 | UTEXT_PROVIDER_WRITABLE = 3, | |
1084 | /** | |
1085 | * There is meta data associated with the text. | |
1086 | * @see Replaceable::hasMetaData() | |
46f4442e | 1087 | * @stable ICU 3.4 |
73c04bcf A |
1088 | */ |
1089 | UTEXT_PROVIDER_HAS_META_DATA = 4, | |
1090 | /** | |
1091 | * Text provider owns the text storage. | |
1092 | * Generally occurs as the result of a deep clone of the UText. | |
1093 | * When closing the UText, the associated text must | |
1094 | * also be closed/deleted/freed/ whatever is appropriate. | |
46f4442e | 1095 | * @stable ICU 3.6 |
73c04bcf A |
1096 | */ |
1097 | UTEXT_PROVIDER_OWNS_TEXT = 5 | |
1098 | }; | |
1099 | ||
1100 | /** | |
1101 | * Function type declaration for UText.clone(). | |
1102 | * | |
1103 | * clone a UText. Much like opening a UText where the source text is itself | |
1104 | * another UText. | |
1105 | * | |
1106 | * A deep clone will copy both the UText data structures and the underlying text. | |
1107 | * The original and cloned UText will operate completely independently; modifications | |
1108 | * made to the text in one will not effect the other. Text providers are not | |
1109 | * required to support deep clones. The user of clone() must check the status return | |
1110 | * and be prepared to handle failures. | |
1111 | * | |
1112 | * A shallow clone replicates only the UText data structures; it does not make | |
1113 | * a copy of the underlying text. Shallow clones can be used as an efficient way to | |
1114 | * have multiple iterators active in a single text string that is not being | |
1115 | * modified. | |
1116 | * | |
1117 | * A shallow clone operation must not fail except for truly exceptional conditions such | |
1118 | * as memory allocation failures. | |
1119 | * | |
1120 | * A UText and its clone may be safely concurrently accessed by separate threads. | |
1121 | * This is true for both shallow and deep clones. | |
1122 | * It is the responsibility of the Text Provider to ensure that this thread safety | |
1123 | * constraint is met. | |
1124 | ||
1125 | * | |
1126 | * @param dest A UText struct to be filled in with the result of the clone operation, | |
1127 | * or NULL if the clone function should heap-allocate a new UText struct. | |
1128 | * @param src The UText to be cloned. | |
1129 | * @param deep TRUE to request a deep clone, FALSE for a shallow clone. | |
1130 | * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR | |
1131 | * should be returned if the text provider is unable to clone the | |
1132 | * original text. | |
1133 | * @return The newly created clone, or NULL if the clone operation failed. | |
1134 | * | |
46f4442e | 1135 | * @stable ICU 3.4 |
73c04bcf A |
1136 | */ |
1137 | typedef UText * U_CALLCONV | |
1138 | UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status); | |
1139 | ||
1140 | ||
1141 | /** | |
1142 | * Function type declaration for UText.nativeLength(). | |
1143 | * | |
1144 | * @param ut the UText to get the length of. | |
1145 | * @return the length, in the native units of the original text string. | |
1146 | * @see UText | |
46f4442e | 1147 | * @stable ICU 3.4 |
73c04bcf A |
1148 | */ |
1149 | typedef int64_t U_CALLCONV | |
1150 | UTextNativeLength(UText *ut); | |
1151 | ||
1152 | /** | |
1153 | * Function type declaration for UText.access(). Get the description of the text chunk | |
1154 | * containing the text at a requested native index. The UText's iteration | |
1155 | * position will be left at the requested index. If the index is out | |
1156 | * of bounds, the iteration position will be left at the start or end | |
1157 | * of the string, as appropriate. | |
1158 | * | |
1159 | * Chunks must begin and end on code point boundaries. A single code point | |
1160 | * comprised of multiple storage units must never span a chunk boundary. | |
1161 | * | |
1162 | * | |
1163 | * @param ut the UText being accessed. | |
1164 | * @param nativeIndex Requested index of the text to be accessed. | |
1165 | * @param forward If TRUE, then the returned chunk must contain text | |
1166 | * starting from the index, so that start<=index<limit. | |
1167 | * If FALSE, then the returned chunk must contain text | |
1168 | * before the index, so that start<index<=limit. | |
1169 | * @return True if the requested index could be accessed. The chunk | |
1170 | * will contain the requested text. | |
1171 | * False value if a chunk cannot be accessed | |
1172 | * (the requested index is out of bounds). | |
1173 | * | |
1174 | * @see UText | |
46f4442e | 1175 | * @stable ICU 3.4 |
73c04bcf A |
1176 | */ |
1177 | typedef UBool U_CALLCONV | |
1178 | UTextAccess(UText *ut, int64_t nativeIndex, UBool forward); | |
1179 | ||
1180 | /** | |
1181 | * Function type declaration for UText.extract(). | |
1182 | * | |
1183 | * Extract text from a UText into a UChar buffer. The range of text to be extracted | |
1184 | * is specified in the native indices of the UText provider. These may not necessarily | |
1185 | * be UTF-16 indices. | |
1186 | * <p> | |
1187 | * The size (number of 16 bit UChars) in the data to be extracted is returned. The | |
1188 | * full amount is returned, even when the specified buffer size is smaller. | |
1189 | * <p> | |
1190 | * The extracted string will (if you are a user) / must (if you are a text provider) | |
1191 | * be NUL-terminated if there is sufficient space in the destination buffer. | |
1192 | * | |
1193 | * @param ut the UText from which to extract data. | |
1194 | * @param nativeStart the native index of the first characer to extract. | |
1195 | * @param nativeLimit the native string index of the position following the last | |
1196 | * character to extract. | |
1197 | * @param dest the UChar (UTF-16) buffer into which the extracted text is placed | |
1198 | * @param destCapacity The size, in UChars, of the destination buffer. May be zero | |
1199 | * for precomputing the required size. | |
1200 | * @param status receives any error status. | |
1201 | * If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for | |
1202 | * preflighting. | |
1203 | * @return Number of UChars in the data. Does not include a trailing NUL. | |
1204 | * | |
46f4442e | 1205 | * @stable ICU 3.4 |
73c04bcf A |
1206 | */ |
1207 | typedef int32_t U_CALLCONV | |
1208 | UTextExtract(UText *ut, | |
1209 | int64_t nativeStart, int64_t nativeLimit, | |
1210 | UChar *dest, int32_t destCapacity, | |
1211 | UErrorCode *status); | |
1212 | ||
1213 | /** | |
1214 | * Function type declaration for UText.replace(). | |
1215 | * | |
1216 | * Replace a range of the original text with a replacement text. | |
1217 | * | |
1218 | * Leaves the current iteration position at the position following the | |
1219 | * newly inserted replacement text. | |
1220 | * | |
1221 | * This function need only be implemented on UText types that support writing. | |
1222 | * | |
1223 | * When using this function, there should be only a single UText opened onto the | |
1224 | * underlying native text string. The function is responsible for updating the | |
1225 | * text chunk within the UText to reflect the updated iteration position, | |
1226 | * taking into account any changes to the underlying string's structure caused | |
1227 | * by the replace operation. | |
1228 | * | |
1229 | * @param ut the UText representing the text to be operated on. | |
1230 | * @param nativeStart the index of the start of the region to be replaced | |
1231 | * @param nativeLimit the index of the character following the region to be replaced. | |
1232 | * @param replacementText pointer to the replacement text | |
1233 | * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated. | |
1234 | * @param status receives any error status. Possible errors include | |
1235 | * U_NO_WRITE_PERMISSION | |
1236 | * | |
1237 | * @return The signed number of (native) storage units by which | |
1238 | * the length of the text expanded or contracted. | |
1239 | * | |
46f4442e | 1240 | * @stable ICU 3.4 |
73c04bcf A |
1241 | */ |
1242 | typedef int32_t U_CALLCONV | |
1243 | UTextReplace(UText *ut, | |
1244 | int64_t nativeStart, int64_t nativeLimit, | |
1245 | const UChar *replacementText, int32_t replacmentLength, | |
1246 | UErrorCode *status); | |
1247 | ||
1248 | /** | |
1249 | * Function type declaration for UText.copy(). | |
1250 | * | |
1251 | * Copy or move a substring from one position to another within the text, | |
1252 | * while retaining any metadata associated with the text. | |
1253 | * This function is used to duplicate or reorder substrings. | |
1254 | * The destination index must not overlap the source range. | |
1255 | * | |
1256 | * The text to be copied or moved is inserted at destIndex; | |
1257 | * it does not replace or overwrite any existing text. | |
1258 | * | |
1259 | * This function need only be implemented for UText types that support writing. | |
1260 | * | |
1261 | * When using this function, there should be only a single UText opened onto the | |
1262 | * underlying native text string. The function is responsible for updating the | |
1263 | * text chunk within the UText to reflect the updated iteration position, | |
1264 | * taking into account any changes to the underlying string's structure caused | |
1265 | * by the replace operation. | |
1266 | * | |
1267 | * @param ut The UText representing the text to be operated on. | |
1268 | * @param nativeStart The index of the start of the region to be copied or moved | |
1269 | * @param nativeLimit The index of the character following the region to be replaced. | |
1270 | * @param nativeDest The destination index to which the source substring is copied or moved. | |
1271 | * @param move If TRUE, then the substring is moved, not copied/duplicated. | |
1272 | * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION | |
1273 | * | |
46f4442e | 1274 | * @stable ICU 3.4 |
73c04bcf A |
1275 | */ |
1276 | typedef void U_CALLCONV | |
1277 | UTextCopy(UText *ut, | |
1278 | int64_t nativeStart, int64_t nativeLimit, | |
1279 | int64_t nativeDest, | |
1280 | UBool move, | |
1281 | UErrorCode *status); | |
1282 | ||
1283 | /** | |
1284 | * Function type declaration for UText.mapOffsetToNative(). | |
1285 | * Map from the current UChar offset within the current text chunk to | |
1286 | * the corresponding native index in the original source text. | |
1287 | * | |
1288 | * This is required only for text providers that do not use native UTF-16 indexes. | |
1289 | * | |
1290 | * @param ut the UText. | |
1291 | * @return Absolute (native) index corresponding to chunkOffset in the current chunk. | |
1292 | * The returned native index should always be to a code point boundary. | |
1293 | * | |
46f4442e | 1294 | * @stable ICU 3.4 |
73c04bcf A |
1295 | */ |
1296 | typedef int64_t U_CALLCONV | |
1297 | UTextMapOffsetToNative(const UText *ut); | |
1298 | ||
1299 | /** | |
1300 | * Function type declaration for UText.mapIndexToUTF16(). | |
1301 | * Map from a native index to a UChar offset within a text chunk. | |
1302 | * Behavior is undefined if the native index does not fall within the | |
1303 | * current chunk. | |
1304 | * | |
1305 | * This function is required only for text providers that do not use native UTF-16 indexes. | |
1306 | * | |
1307 | * @param ut The UText containing the text chunk. | |
1308 | * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit. | |
1309 | * @return Chunk-relative UTF-16 offset corresponding to the specified native | |
1310 | * index. | |
1311 | * | |
46f4442e | 1312 | * @stable ICU 3.4 |
73c04bcf A |
1313 | */ |
1314 | typedef int32_t U_CALLCONV | |
1315 | UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex); | |
1316 | ||
1317 | ||
1318 | /** | |
1319 | * Function type declaration for UText.utextClose(). | |
1320 | * | |
1321 | * A Text Provider close function is only required for provider types that make | |
1322 | * allocations in their open function (or other functions) that must be | |
1323 | * cleaned when the UText is closed. | |
1324 | * | |
1325 | * The allocation of the UText struct itself and any "extra" storage | |
1326 | * associated with the UText is handled by the common UText implementation | |
1327 | * and does not require provider specific cleanup in a close function. | |
1328 | * | |
1329 | * Most UText provider implementations do not need to implement this function. | |
1330 | * | |
1331 | * @param ut A UText object to be closed. | |
1332 | * | |
46f4442e | 1333 | * @stable ICU 3.4 |
73c04bcf A |
1334 | */ |
1335 | typedef void U_CALLCONV | |
1336 | UTextClose(UText *ut); | |
1337 | ||
1338 | ||
1339 | /** | |
1340 | * (public) Function dispatch table for UText. | |
1341 | * Conceptually very much like a C++ Virtual Function Table. | |
1342 | * This struct defines the organization of the table. | |
1343 | * Each text provider implementation must provide an | |
1344 | * actual table that is initialized with the appropriate functions | |
1345 | * for the type of text being handled. | |
46f4442e | 1346 | * @stable ICU 3.6 |
73c04bcf A |
1347 | */ |
1348 | struct UTextFuncs { | |
1349 | /** | |
1350 | * (public) Function table size, sizeof(UTextFuncs) | |
1351 | * Intended for use should the table grow to accomodate added | |
1352 | * functions in the future, to allow tests for older format | |
1353 | * function tables that do not contain the extensions. | |
1354 | * | |
1355 | * Fields are placed for optimal alignment on | |
1356 | * 32/64/128-bit-pointer machines, by normally grouping together | |
1357 | * 4 32-bit fields, | |
1358 | * 4 pointers, | |
1359 | * 2 64-bit fields | |
1360 | * in sequence. | |
46f4442e | 1361 | * @stable ICU 3.6 |
73c04bcf A |
1362 | */ |
1363 | int32_t tableSize; | |
1364 | ||
1365 | /** | |
1366 | * (private) Alignment padding. | |
1367 | * Do not use, reserved for use by the UText framework only. | |
1368 | * @internal | |
1369 | */ | |
46f4442e | 1370 | int32_t reserved1, /** @internal */ reserved2, /** @internal */ reserved3; |
73c04bcf A |
1371 | |
1372 | ||
1373 | /** | |
1374 | * (public) Function pointer for UTextClone | |
1375 | * | |
1376 | * @see UTextClone | |
46f4442e | 1377 | * @stable ICU 3.6 |
73c04bcf A |
1378 | */ |
1379 | UTextClone *clone; | |
1380 | ||
1381 | /** | |
1382 | * (public) function pointer for UTextLength | |
1383 | * May be expensive to compute! | |
1384 | * | |
1385 | * @see UTextLength | |
46f4442e | 1386 | * @stable ICU 3.6 |
73c04bcf A |
1387 | */ |
1388 | UTextNativeLength *nativeLength; | |
1389 | ||
1390 | /** | |
1391 | * (public) Function pointer for UTextAccess. | |
1392 | * | |
1393 | * @see UTextAccess | |
46f4442e | 1394 | * @stable ICU 3.6 |
73c04bcf A |
1395 | */ |
1396 | UTextAccess *access; | |
1397 | ||
1398 | /** | |
1399 | * (public) Function pointer for UTextExtract. | |
1400 | * | |
1401 | * @see UTextExtract | |
46f4442e | 1402 | * @stable ICU 3.6 |
73c04bcf A |
1403 | */ |
1404 | UTextExtract *extract; | |
1405 | ||
1406 | /** | |
1407 | * (public) Function pointer for UTextReplace. | |
1408 | * | |
1409 | * @see UTextReplace | |
46f4442e | 1410 | * @stable ICU 3.6 |
73c04bcf A |
1411 | */ |
1412 | UTextReplace *replace; | |
1413 | ||
1414 | /** | |
1415 | * (public) Function pointer for UTextCopy. | |
1416 | * | |
1417 | * @see UTextCopy | |
46f4442e | 1418 | * @stable ICU 3.6 |
73c04bcf A |
1419 | */ |
1420 | UTextCopy *copy; | |
1421 | ||
1422 | /** | |
1423 | * (public) Function pointer for UTextMapOffsetToNative. | |
1424 | * | |
1425 | * @see UTextMapOffsetToNative | |
46f4442e | 1426 | * @stable ICU 3.6 |
73c04bcf A |
1427 | */ |
1428 | UTextMapOffsetToNative *mapOffsetToNative; | |
1429 | ||
1430 | /** | |
1431 | * (public) Function pointer for UTextMapNativeIndexToUTF16. | |
1432 | * | |
1433 | * @see UTextMapNativeIndexToUTF16 | |
46f4442e | 1434 | * @stable ICU 3.6 |
73c04bcf A |
1435 | */ |
1436 | UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16; | |
1437 | ||
1438 | /** | |
1439 | * (public) Function pointer for UTextClose. | |
1440 | * | |
1441 | * @see UTextClose | |
46f4442e | 1442 | * @stable ICU 3.6 |
73c04bcf A |
1443 | */ |
1444 | UTextClose *close; | |
1445 | ||
1446 | /** | |
1447 | * (private) Spare function pointer | |
1448 | * @internal | |
1449 | */ | |
73c04bcf | 1450 | UTextClose *spare1; |
729e4ab9 | 1451 | |
73c04bcf A |
1452 | /** |
1453 | * (private) Spare function pointer | |
1454 | * @internal | |
1455 | */ | |
1456 | UTextClose *spare2; | |
1457 | ||
1458 | /** | |
1459 | * (private) Spare function pointer | |
1460 | * @internal | |
1461 | */ | |
1462 | UTextClose *spare3; | |
1463 | ||
1464 | }; | |
46f4442e A |
1465 | /** |
1466 | * Function dispatch table for UText | |
1467 | * @see UTextFuncs | |
1468 | */ | |
73c04bcf A |
1469 | typedef struct UTextFuncs UTextFuncs; |
1470 | ||
73c04bcf A |
1471 | /** |
1472 | * UText struct. Provides the interface between the generic UText access code | |
1473 | * and the UText provider code that works on specific kinds of | |
1474 | * text (UTF-8, noncontiguous UTF-16, whatever.) | |
1475 | * | |
1476 | * Applications that are using predefined types of text providers | |
1477 | * to pass text data to ICU services will have no need to view the | |
1478 | * internals of the UText structs that they open. | |
1479 | * | |
46f4442e | 1480 | * @stable ICU 3.6 |
73c04bcf A |
1481 | */ |
1482 | struct UText { | |
1483 | /** | |
1484 | * (private) Magic. Used to help detect when UText functions are handed | |
1485 | * invalid or unitialized UText structs. | |
1486 | * utext_openXYZ() functions take an initialized, | |
1487 | * but not necessarily open, UText struct as an | |
1488 | * optional fill-in parameter. This magic field | |
1489 | * is used to check for that initialization. | |
1490 | * Text provider close functions must NOT clear | |
1491 | * the magic field because that would prevent | |
1492 | * reuse of the UText struct. | |
1493 | * @internal | |
1494 | */ | |
1495 | uint32_t magic; | |
1496 | ||
1497 | ||
1498 | /** | |
1499 | * (private) Flags for managing the allocation and freeing of | |
1500 | * memory associated with this UText. | |
1501 | * @internal | |
1502 | */ | |
1503 | int32_t flags; | |
1504 | ||
1505 | ||
1506 | /** | |
1507 | * Text provider properties. This set of flags is maintainted by the | |
1508 | * text provider implementation. | |
46f4442e | 1509 | * @stable ICU 3.4 |
73c04bcf A |
1510 | */ |
1511 | int32_t providerProperties; | |
1512 | ||
1513 | /** | |
1514 | * (public) sizeOfStruct=sizeof(UText) | |
1515 | * Allows possible backward compatible extension. | |
1516 | * | |
46f4442e | 1517 | * @stable ICU 3.4 |
73c04bcf A |
1518 | */ |
1519 | int32_t sizeOfStruct; | |
1520 | ||
1521 | /* ------ 16 byte alignment boundary ----------- */ | |
1522 | ||
1523 | ||
1524 | /** | |
1525 | * (protected) Native index of the first character position following | |
1526 | * the current chunk. | |
46f4442e | 1527 | * @stable ICU 3.6 |
73c04bcf A |
1528 | */ |
1529 | int64_t chunkNativeLimit; | |
1530 | ||
1531 | /** | |
1532 | * (protected) Size in bytes of the extra space (pExtra). | |
46f4442e | 1533 | * @stable ICU 3.4 |
73c04bcf A |
1534 | */ |
1535 | int32_t extraSize; | |
1536 | ||
1537 | /** | |
1538 | * (protected) The highest chunk offset where native indexing and | |
1539 | * chunk (UTF-16) indexing correspond. For UTF-16 sources, value | |
1540 | * will be equal to chunkLength. | |
1541 | * | |
46f4442e | 1542 | * @stable ICU 3.6 |
73c04bcf A |
1543 | */ |
1544 | int32_t nativeIndexingLimit; | |
1545 | ||
1546 | /* ---- 16 byte alignment boundary------ */ | |
1547 | ||
1548 | /** | |
1549 | * (protected) Native index of the first character in the text chunk. | |
46f4442e | 1550 | * @stable ICU 3.6 |
73c04bcf A |
1551 | */ |
1552 | int64_t chunkNativeStart; | |
1553 | ||
1554 | /** | |
1555 | * (protected) Current iteration position within the text chunk (UTF-16 buffer). | |
1556 | * This is the index to the character that will be returned by utext_next32(). | |
46f4442e | 1557 | * @stable ICU 3.6 |
73c04bcf A |
1558 | */ |
1559 | int32_t chunkOffset; | |
1560 | ||
1561 | /** | |
1562 | * (protected) Length the text chunk (UTF-16 buffer), in UChars. | |
46f4442e | 1563 | * @stable ICU 3.6 |
73c04bcf A |
1564 | */ |
1565 | int32_t chunkLength; | |
1566 | ||
1567 | /* ---- 16 byte alignment boundary-- */ | |
1568 | ||
1569 | ||
1570 | /** | |
1571 | * (protected) pointer to a chunk of text in UTF-16 format. | |
1572 | * May refer either to original storage of the source of the text, or | |
1573 | * if conversion was required, to a buffer owned by the UText. | |
46f4442e | 1574 | * @stable ICU 3.6 |
73c04bcf A |
1575 | */ |
1576 | const UChar *chunkContents; | |
1577 | ||
1578 | /** | |
1579 | * (public) Pointer to Dispatch table for accessing functions for this UText. | |
46f4442e | 1580 | * @stable ICU 3.6 |
73c04bcf | 1581 | */ |
46f4442e A |
1582 | const UTextFuncs *pFuncs; |
1583 | ||
73c04bcf A |
1584 | /** |
1585 | * (protected) Pointer to additional space requested by the | |
1586 | * text provider during the utext_open operation. | |
46f4442e | 1587 | * @stable ICU 3.4 |
73c04bcf A |
1588 | */ |
1589 | void *pExtra; | |
1590 | ||
1591 | /** | |
1592 | * (protected) Pointer to string or text-containin object or similar. | |
1593 | * This is the source of the text that this UText is wrapping, in a format | |
1594 | * that is known to the text provider functions. | |
46f4442e | 1595 | * @stable ICU 3.4 |
73c04bcf A |
1596 | */ |
1597 | const void *context; | |
1598 | ||
1599 | /* --- 16 byte alignment boundary--- */ | |
1600 | ||
1601 | /** | |
1602 | * (protected) Pointer fields available for use by the text provider. | |
1603 | * Not used by UText common code. | |
46f4442e | 1604 | * @stable ICU 3.6 |
73c04bcf A |
1605 | */ |
1606 | const void *p; | |
1607 | /** | |
1608 | * (protected) Pointer fields available for use by the text provider. | |
1609 | * Not used by UText common code. | |
46f4442e | 1610 | * @stable ICU 3.6 |
73c04bcf A |
1611 | */ |
1612 | const void *q; | |
1613 | /** | |
1614 | * (protected) Pointer fields available for use by the text provider. | |
1615 | * Not used by UText common code. | |
46f4442e | 1616 | * @stable ICU 3.6 |
73c04bcf A |
1617 | */ |
1618 | const void *r; | |
1619 | ||
1620 | /** | |
1621 | * Private field reserved for future use by the UText framework | |
1622 | * itself. This is not to be touched by the text providers. | |
1623 | * @internal ICU 3.4 | |
1624 | */ | |
1625 | void *privP; | |
1626 | ||
1627 | ||
1628 | /* --- 16 byte alignment boundary--- */ | |
1629 | ||
1630 | ||
1631 | /** | |
1632 | * (protected) Integer field reserved for use by the text provider. | |
1633 | * Not used by the UText framework, or by the client (user) of the UText. | |
46f4442e | 1634 | * @stable ICU 3.4 |
73c04bcf A |
1635 | */ |
1636 | int64_t a; | |
1637 | ||
1638 | /** | |
1639 | * (protected) Integer field reserved for use by the text provider. | |
1640 | * Not used by the UText framework, or by the client (user) of the UText. | |
46f4442e | 1641 | * @stable ICU 3.4 |
73c04bcf A |
1642 | */ |
1643 | int32_t b; | |
1644 | ||
1645 | /** | |
1646 | * (protected) Integer field reserved for use by the text provider. | |
1647 | * Not used by the UText framework, or by the client (user) of the UText. | |
46f4442e | 1648 | * @stable ICU 3.4 |
73c04bcf A |
1649 | */ |
1650 | int32_t c; | |
1651 | ||
1652 | /* ---- 16 byte alignment boundary---- */ | |
1653 | ||
1654 | ||
1655 | /** | |
1656 | * Private field reserved for future use by the UText framework | |
1657 | * itself. This is not to be touched by the text providers. | |
1658 | * @internal ICU 3.4 | |
1659 | */ | |
1660 | int64_t privA; | |
1661 | /** | |
1662 | * Private field reserved for future use by the UText framework | |
1663 | * itself. This is not to be touched by the text providers. | |
1664 | * @internal ICU 3.4 | |
1665 | */ | |
1666 | int32_t privB; | |
1667 | /** | |
1668 | * Private field reserved for future use by the UText framework | |
1669 | * itself. This is not to be touched by the text providers. | |
1670 | * @internal ICU 3.4 | |
1671 | */ | |
1672 | int32_t privC; | |
1673 | }; | |
1674 | ||
73c04bcf A |
1675 | |
1676 | /** | |
1677 | * Common function for use by Text Provider implementations to allocate and/or initialize | |
1678 | * a new UText struct. To be called in the implementation of utext_open() functions. | |
1679 | * If the supplied UText parameter is null, a new UText struct will be allocated on the heap. | |
1680 | * If the supplied UText is already open, the provider's close function will be called | |
1681 | * so that the struct can be reused by the open that is in progress. | |
1682 | * | |
1683 | * @param ut pointer to a UText struct to be re-used, or null if a new UText | |
1684 | * should be allocated. | |
1685 | * @param extraSpace The amount of additional space to be allocated as part | |
1686 | * of this UText, for use by types of providers that require | |
1687 | * additional storage. | |
1688 | * @param status Errors are returned here. | |
1689 | * @return pointer to the UText, allocated if necessary, with extra space set up if requested. | |
46f4442e | 1690 | * @stable ICU 3.4 |
73c04bcf | 1691 | */ |
46f4442e | 1692 | U_STABLE UText * U_EXPORT2 |
73c04bcf A |
1693 | utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status); |
1694 | ||
1695 | /** | |
1696 | * @internal | |
1697 | * Value used to help identify correctly initialized UText structs. | |
1698 | * Note: must be publicly visible so that UTEXT_INITIALIZER can access it. | |
1699 | */ | |
1700 | enum { | |
1701 | UTEXT_MAGIC = 0x345ad82c | |
1702 | }; | |
73c04bcf A |
1703 | |
1704 | /** | |
1705 | * initializer to be used with local (stack) instances of a UText | |
1706 | * struct. UText structs must be initialized before passing | |
1707 | * them to one of the utext_open functions. | |
1708 | * | |
46f4442e | 1709 | * @stable ICU 3.6 |
73c04bcf A |
1710 | */ |
1711 | #define UTEXT_INITIALIZER { \ | |
1712 | UTEXT_MAGIC, /* magic */ \ | |
1713 | 0, /* flags */ \ | |
1714 | 0, /* providerProps */ \ | |
1715 | sizeof(UText), /* sizeOfStruct */ \ | |
1716 | 0, /* chunkNativeLimit */ \ | |
1717 | 0, /* extraSize */ \ | |
1718 | 0, /* nativeIndexingLimit */ \ | |
1719 | 0, /* chunkNativeStart */ \ | |
1720 | 0, /* chunkOffset */ \ | |
1721 | 0, /* chunkLength */ \ | |
1722 | NULL, /* chunkContents */ \ | |
1723 | NULL, /* pFuncs */ \ | |
1724 | NULL, /* pExtra */ \ | |
1725 | NULL, /* context */ \ | |
1726 | NULL, NULL, NULL, /* p, q, r */ \ | |
1727 | NULL, /* privP */ \ | |
1728 | 0, 0, 0, /* a, b, c */ \ | |
1729 | 0, 0, 0 /* privA,B,C, */ \ | |
1730 | } | |
1731 | ||
1732 | ||
73c04bcf A |
1733 | U_CDECL_END |
1734 | ||
1735 | ||
1736 | ||
1737 | #endif |