]>
Commit | Line | Data |
---|---|---|
1 | // © 2016 and later: Unicode, Inc. and others. | |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | /* | |
4 | ******************************************************************************* | |
5 | * | |
6 | * Copyright (C) 1998-2016, International Business Machines | |
7 | * Corporation and others. All Rights Reserved. | |
8 | * | |
9 | ******************************************************************************* | |
10 | * | |
11 | * File ucbuf.h | |
12 | * | |
13 | * Modification History: | |
14 | * | |
15 | * Date Name Description | |
16 | * 05/10/01 Ram Creation. | |
17 | * | |
18 | * This API reads in files and returns UChars | |
19 | ******************************************************************************* | |
20 | */ | |
21 | ||
22 | #include "unicode/localpointer.h" | |
23 | #include "unicode/ucnv.h" | |
24 | #include "filestrm.h" | |
25 | ||
26 | #if !UCONFIG_NO_CONVERSION | |
27 | ||
28 | #ifndef UCBUF_H | |
29 | #define UCBUF_H 1 | |
30 | ||
31 | typedef struct UCHARBUF UCHARBUF; | |
32 | /** | |
33 | * End of file value | |
34 | */ | |
35 | #define U_EOF ((int32_t)0xFFFFFFFF) | |
36 | /** | |
37 | * Error value if a sequence cannot be unescaped | |
38 | */ | |
39 | #define U_ERR ((int32_t)0xFFFFFFFE) | |
40 | ||
41 | typedef struct ULine ULine; | |
42 | ||
43 | struct ULine { | |
44 | UChar *name; | |
45 | int32_t len; | |
46 | }; | |
47 | ||
48 | /** | |
49 | * Opens the UCHARBUF with the given file stream and code page for conversion | |
50 | * @param fileName Name of the file to open. | |
51 | * @param codepage The encoding of the file stream to convert to Unicode. | |
52 | * If *codepoge is NULL on input the API will try to autodetect | |
53 | * popular Unicode encodings | |
54 | * @param showWarning Flag to print out warnings to STDOUT | |
55 | * @param buffered If TRUE performs a buffered read of the input file. If FALSE reads | |
56 | * the whole file into memory and converts it. | |
57 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
58 | * indicates a failure on entry, the function will immediately return. | |
59 | * On exit the value will indicate the success of the operation. | |
60 | * @return pointer to the newly opened UCHARBUF | |
61 | */ | |
62 | U_CAPI UCHARBUF* U_EXPORT2 | |
63 | ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err); | |
64 | ||
65 | /** | |
66 | * Gets a UTF-16 code unit at the current position from the converted buffer | |
67 | * and increments the current position | |
68 | * @param buf Pointer to UCHARBUF structure | |
69 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
70 | * indicates a failure on entry, the function will immediately return. | |
71 | * On exit the value will indicate the success of the operation. | |
72 | */ | |
73 | U_CAPI int32_t U_EXPORT2 | |
74 | ucbuf_getc(UCHARBUF* buf,UErrorCode* err); | |
75 | ||
76 | /** | |
77 | * Gets a UTF-32 code point at the current position from the converted buffer | |
78 | * and increments the current position | |
79 | * @param buf Pointer to UCHARBUF structure | |
80 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
81 | * indicates a failure on entry, the function will immediately return. | |
82 | * On exit the value will indicate the success of the operation. | |
83 | */ | |
84 | U_CAPI int32_t U_EXPORT2 | |
85 | ucbuf_getc32(UCHARBUF* buf,UErrorCode* err); | |
86 | ||
87 | /** | |
88 | * Gets a UTF-16 code unit at the current position from the converted buffer after | |
89 | * unescaping and increments the current position. If the escape sequence is for UTF-32 | |
90 | * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned | |
91 | * @param buf Pointer to UCHARBUF structure | |
92 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
93 | * indicates a failure on entry, the function will immediately return. | |
94 | * On exit the value will indicate the success of the operation. | |
95 | */ | |
96 | U_CAPI int32_t U_EXPORT2 | |
97 | ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err); | |
98 | ||
99 | /** | |
100 | * Gets a pointer to the current position in the internal buffer and length of the line. | |
101 | * It imperative to make a copy of the returned buffer before performing operations on it. | |
102 | * @param buf Pointer to UCHARBUF structure | |
103 | * @param len Output param to receive the len of the buffer returned till end of the line | |
104 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
105 | * indicates a failure on entry, the function will immediately return. | |
106 | * On exit the value will indicate the success of the operation. | |
107 | * Error: U_TRUNCATED_CHAR_FOUND | |
108 | * @return Pointer to the internal buffer, NULL if EOF | |
109 | */ | |
110 | U_CAPI const UChar* U_EXPORT2 | |
111 | ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err); | |
112 | ||
113 | ||
114 | /** | |
115 | * Resets the buffers and the underlying file stream. | |
116 | * @param buf Pointer to UCHARBUF structure | |
117 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
118 | * indicates a failure on entry, the function will immediately return. | |
119 | * On exit the value will indicate the success of the operation. | |
120 | */ | |
121 | U_CAPI void U_EXPORT2 | |
122 | ucbuf_rewind(UCHARBUF* buf,UErrorCode* err); | |
123 | ||
124 | /** | |
125 | * Returns a pointer to the internal converted buffer | |
126 | * @param buf Pointer to UCHARBUF structure | |
127 | * @param len Pointer to int32_t to receive the lenth of buffer | |
128 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
129 | * indicates a failure on entry, the function will immediately return. | |
130 | * On exit the value will indicate the success of the operation. | |
131 | * @return Pointer to internal UChar buffer | |
132 | */ | |
133 | U_CAPI const UChar* U_EXPORT2 | |
134 | ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err); | |
135 | ||
136 | /** | |
137 | * Closes the UCHARBUF structure members and cleans up the malloc'ed memory | |
138 | * @param buf Pointer to UCHARBUF structure | |
139 | */ | |
140 | U_CAPI void U_EXPORT2 | |
141 | ucbuf_close(UCHARBUF* buf); | |
142 | ||
143 | #if U_SHOW_CPLUSPLUS_API | |
144 | ||
145 | U_NAMESPACE_BEGIN | |
146 | ||
147 | /** | |
148 | * \class LocalUCHARBUFPointer | |
149 | * "Smart pointer" class, closes a UCHARBUF via ucbuf_close(). | |
150 | * For most methods see the LocalPointerBase base class. | |
151 | * | |
152 | * @see LocalPointerBase | |
153 | * @see LocalPointer | |
154 | */ | |
155 | U_DEFINE_LOCAL_OPEN_POINTER(LocalUCHARBUFPointer, UCHARBUF, ucbuf_close); | |
156 | ||
157 | U_NAMESPACE_END | |
158 | ||
159 | #endif | |
160 | ||
161 | /** | |
162 | * Rewinds the buffer by one codepoint. Does not rewind over escaped characters. | |
163 | */ | |
164 | U_CAPI void U_EXPORT2 | |
165 | ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf); | |
166 | ||
167 | ||
168 | /** | |
169 | * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. | |
170 | * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring | |
171 | * the converter to correct state for converting the rest of the stream. So the UConverter parameter | |
172 | * is necessary. | |
173 | * If the charset was autodetected, the caller must close both the input FileStream | |
174 | * and the converter. | |
175 | * | |
176 | * @param fileName The file name to be opened and encoding autodected | |
177 | * @param conv Output param to receive the opened converter if autodetected; NULL otherwise. | |
178 | * @param cp Output param to receive the detected encoding | |
179 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
180 | * indicates a failure on entry, the function will immediately return. | |
181 | * On exit the value will indicate the success of the operation. | |
182 | * @return The input FileStream if its charset was autodetected; NULL otherwise. | |
183 | */ | |
184 | U_CAPI FileStream * U_EXPORT2 | |
185 | ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, | |
186 | int32_t* signatureLength, UErrorCode* status); | |
187 | ||
188 | /** | |
189 | * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. | |
190 | * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring | |
191 | * the converter to correct state for converting the rest of the stream. So the UConverter parameter | |
192 | * is necessary. | |
193 | * If the charset was autodetected, the caller must close the converter. | |
194 | * | |
195 | * @param fileStream The file stream whose encoding is to be detected | |
196 | * @param conv Output param to receive the opened converter if autodetected; NULL otherwise. | |
197 | * @param cp Output param to receive the detected encoding | |
198 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
199 | * indicates a failure on entry, the function will immediately return. | |
200 | * On exit the value will indicate the success of the operation. | |
201 | * @return Boolean whether the Unicode charset was autodetected. | |
202 | */ | |
203 | ||
204 | U_CAPI UBool U_EXPORT2 | |
205 | ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status); | |
206 | ||
207 | /** | |
208 | * Returns the approximate size in UChars required for converting the file to UChars | |
209 | */ | |
210 | U_CAPI int32_t U_EXPORT2 | |
211 | ucbuf_size(UCHARBUF* buf); | |
212 | ||
213 | U_CAPI const char* U_EXPORT2 | |
214 | ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status); | |
215 | ||
216 | #endif | |
217 | #endif | |
218 |