]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
4 | * Copyright (C) 2003, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: unorm_it.h | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2003jan21 | |
14 | * created by: Markus W. Scherer | |
15 | */ | |
16 | ||
17 | #ifndef __UNORM_IT_H__ | |
18 | #define __UNORM_IT_H__ | |
19 | ||
20 | #include "unicode/utypes.h" | |
21 | ||
22 | #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION | |
23 | ||
24 | #include "unicode/uiter.h" | |
25 | #include "unicode/unorm.h" | |
26 | ||
27 | /** | |
28 | * Normalizing UCharIterator wrapper. | |
29 | * This internal API basically duplicates the functionality of the C++ Normalizer | |
30 | * but | |
31 | * - it actually implements a character iterator (UCharIterator) | |
32 | * with few restrictions (see unorm_setIter()) | |
33 | * - it supports UCharIterator getState()/setState() | |
34 | * - it uses lower-level APIs and buffers more text and states, | |
35 | * hopefully resulting in higher performance | |
36 | * | |
37 | * Usage example: | |
38 | * \code | |
39 | * function(UCharIterator *srcIter) { | |
40 | * UNormIterator *uni; | |
41 | * UCharIterator *iter; | |
42 | * UErrorCode errorCode; | |
43 | * | |
44 | * errorCode=U_ZERO_ERROR; | |
45 | * uni=unorm_openIter(&errorCode); | |
46 | * if(U_FAILURE(errorCode)) { | |
47 | * // report error | |
48 | * return; | |
49 | * } | |
50 | * | |
51 | * iter=unorm_setIter(uni, srcIter, UNORM_FCD, &errorCode); | |
52 | * if(U_FAILURE(errorCode)) { | |
53 | * // report error | |
54 | * } else { | |
55 | * // use iter to iterate over the canonically ordered | |
56 | * // version of srcIter's text | |
57 | * uint32_t state; | |
58 | * | |
59 | * ... | |
60 | * | |
61 | * state=uiter_getState(iter); | |
62 | * if(state!=UITER_NO_STATE) { | |
63 | * // use valid state, store it, use iter some more | |
64 | * ... | |
65 | * | |
66 | * // later restore iter to the saved state: | |
67 | * uiter_setState(iter, state, &errorCode); | |
68 | * | |
69 | * ... | |
70 | * } | |
71 | * | |
72 | * ... | |
73 | * } | |
74 | * unorm_closeIter(uni); | |
75 | * } | |
76 | * \endcode | |
77 | * | |
78 | * See also the ICU test suites. | |
79 | * | |
80 | * @internal | |
81 | */ | |
82 | struct UNormIterator; | |
83 | typedef struct UNormIterator UNormIterator; | |
84 | ||
85 | /** | |
86 | * Size of a stack buffer to hold a UNormIterator, see the stackMem parameter | |
87 | * of unorm_openIter(). | |
88 | * | |
89 | * @internal | |
90 | */ | |
91 | #define UNORM_ITER_SIZE 1024 | |
92 | ||
93 | /** | |
94 | * Open a normalizing iterator. Must be closed later. | |
95 | * Use unorm_setIter(). | |
96 | * | |
97 | * @param stackMem Pointer to preallocated (stack-allocated) buffer to hold | |
98 | * the UNormIterator if possible; can be NULL. | |
99 | * @param stackMemSize Number of bytes at stackMem; can be 0, | |
100 | * or should be >= UNORM_ITER_SIZE for a non-NULL stackMem. | |
101 | * @param pErrorCode ICU error code | |
102 | * @return an allocated and pre-initialized UNormIterator | |
103 | * @internal | |
104 | */ | |
105 | U_CAPI UNormIterator * U_EXPORT2 | |
106 | unorm_openIter(void *stackMem, int32_t stackMemSize, UErrorCode *pErrorCode); | |
107 | ||
108 | /** | |
109 | * Close a normalizing iterator. | |
110 | * | |
111 | * @param uni UNormIterator from unorm_openIter() | |
112 | * @internal | |
113 | */ | |
114 | U_CAPI void U_EXPORT2 | |
115 | unorm_closeIter(UNormIterator *uni); | |
116 | ||
117 | /** | |
118 | * Set a UCharIterator and a normalization mode for the normalizing iterator | |
119 | * to wrap. The normalizing iterator will read from the character iterator, | |
120 | * normalize the text, and in turn deliver it with its own wrapper UCharIterator | |
121 | * interface which it returns. | |
122 | * | |
123 | * The source iterator remains at its current position through the unorm_setIter() | |
124 | * call but will be used and moved as soon as the | |
125 | * the returned normalizing iterator is. | |
126 | * | |
127 | * The returned interface pointer is valid for as long as the normalizing iterator | |
128 | * is open and until another unorm_setIter() call is made on it. | |
129 | * | |
130 | * The normalizing iterator's UCharIterator interface has the following properties: | |
131 | * - getIndex() and move() will almost always return UITER_UNKNOWN_INDEX | |
132 | * - getState() will return UITER_NO_STATE for unknown states for positions | |
133 | * that are not at normalization boundaries | |
134 | * | |
135 | * @param uni UNormIterator from unorm_openIter() | |
136 | * @param iter The source text UCharIterator to be wrapped. It is aliases into the normalizing iterator. | |
137 | * Must support getState() and setState(). | |
138 | * @param mode The normalization mode. | |
139 | * @param pErrorCode ICU error code | |
140 | * @return an alias to the normalizing iterator's UCharIterator interface | |
141 | * @internal | |
142 | */ | |
143 | U_CAPI UCharIterator * U_EXPORT2 | |
144 | unorm_setIter(UNormIterator *uni, UCharIterator *iter, UNormalizationMode mode, UErrorCode *pErrorCode); | |
145 | ||
146 | #endif /* uconfig.h switches */ | |
147 | ||
148 | #endif |