]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/toolutil/ucbuf.h
ICU-57132.0.1.tar.gz
[apple/icu.git] / icuSources / tools / toolutil / ucbuf.h
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1998-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 *
9 * File ucbuf.h
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 05/10/01 Ram Creation.
15 *
16 * This API reads in files and returns UChars
17 *******************************************************************************
18 */
19
20 #include "unicode/localpointer.h"
21 #include "unicode/ucnv.h"
22 #include "filestrm.h"
23
24 #if !UCONFIG_NO_CONVERSION
25
26 #ifndef UCBUF_H
27 #define UCBUF_H 1
28
29 typedef struct UCHARBUF UCHARBUF;
30 /**
31 * End of file value
32 */
33 #define U_EOF 0xFFFFFFFF
34 /**
35 * Error value if a sequence cannot be unescaped
36 */
37 #define U_ERR 0xFFFFFFFE
38
39 typedef struct ULine ULine;
40
41 struct ULine {
42 UChar *name;
43 int32_t len;
44 };
45
46 /**
47 * Opens the UCHARBUF with the given file stream and code page for conversion
48 * @param fileName Name of the file to open.
49 * @param codepage The encoding of the file stream to convert to Unicode.
50 * If *codepoge is NULL on input the API will try to autodetect
51 * popular Unicode encodings
52 * @param showWarning Flag to print out warnings to STDOUT
53 * @param buffered If TRUE performs a buffered read of the input file. If FALSE reads
54 * the whole file into memory and converts it.
55 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
56 * indicates a failure on entry, the function will immediately return.
57 * On exit the value will indicate the success of the operation.
58 * @return pointer to the newly opened UCHARBUF
59 */
60 U_CAPI UCHARBUF* U_EXPORT2
61 ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err);
62
63 /**
64 * Gets a UTF-16 code unit at the current position from the converted buffer
65 * and increments the current position
66 * @param buf Pointer to UCHARBUF structure
67 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
68 * indicates a failure on entry, the function will immediately return.
69 * On exit the value will indicate the success of the operation.
70 */
71 U_CAPI int32_t U_EXPORT2
72 ucbuf_getc(UCHARBUF* buf,UErrorCode* err);
73
74 /**
75 * Gets a UTF-32 code point at the current position from the converted buffer
76 * and increments the current position
77 * @param buf Pointer to UCHARBUF structure
78 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
79 * indicates a failure on entry, the function will immediately return.
80 * On exit the value will indicate the success of the operation.
81 */
82 U_CAPI int32_t U_EXPORT2
83 ucbuf_getc32(UCHARBUF* buf,UErrorCode* err);
84
85 /**
86 * Gets a UTF-16 code unit at the current position from the converted buffer after
87 * unescaping and increments the current position. If the escape sequence is for UTF-32
88 * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned
89 * @param buf Pointer to UCHARBUF structure
90 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
91 * indicates a failure on entry, the function will immediately return.
92 * On exit the value will indicate the success of the operation.
93 */
94 U_CAPI int32_t U_EXPORT2
95 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err);
96
97 /**
98 * Gets a pointer to the current position in the internal buffer and length of the line.
99 * It imperative to make a copy of the returned buffer before performing operations on it.
100 * @param buf Pointer to UCHARBUF structure
101 * @param len Output param to receive the len of the buffer returned till end of the line
102 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
103 * indicates a failure on entry, the function will immediately return.
104 * On exit the value will indicate the success of the operation.
105 * Error: U_TRUNCATED_CHAR_FOUND
106 * @return Pointer to the internal buffer, NULL if EOF
107 */
108 U_CAPI const UChar* U_EXPORT2
109 ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err);
110
111
112 /**
113 * Resets the buffers and the underlying file stream.
114 * @param buf Pointer to UCHARBUF structure
115 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
116 * indicates a failure on entry, the function will immediately return.
117 * On exit the value will indicate the success of the operation.
118 */
119 U_CAPI void U_EXPORT2
120 ucbuf_rewind(UCHARBUF* buf,UErrorCode* err);
121
122 /**
123 * Returns a pointer to the internal converted buffer
124 * @param buf Pointer to UCHARBUF structure
125 * @param len Pointer to int32_t to receive the lenth of buffer
126 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
127 * indicates a failure on entry, the function will immediately return.
128 * On exit the value will indicate the success of the operation.
129 * @return Pointer to internal UChar buffer
130 */
131 U_CAPI const UChar* U_EXPORT2
132 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err);
133
134 /**
135 * Closes the UCHARBUF structure members and cleans up the malloc'ed memory
136 * @param buf Pointer to UCHARBUF structure
137 */
138 U_CAPI void U_EXPORT2
139 ucbuf_close(UCHARBUF* buf);
140
141 #if U_SHOW_CPLUSPLUS_API
142
143 U_NAMESPACE_BEGIN
144
145 /**
146 * \class LocalUCHARBUFPointer
147 * "Smart pointer" class, closes a UCHARBUF via ucbuf_close().
148 * For most methods see the LocalPointerBase base class.
149 *
150 * @see LocalPointerBase
151 * @see LocalPointer
152 */
153 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCHARBUFPointer, UCHARBUF, ucbuf_close);
154
155 U_NAMESPACE_END
156
157 #endif
158
159 /**
160 * Rewinds the buffer by one codepoint. Does not rewind over escaped characters.
161 */
162 U_CAPI void U_EXPORT2
163 ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf);
164
165
166 /**
167 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
168 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
169 * the converter to correct state for converting the rest of the stream. So the UConverter parameter
170 * is necessary.
171 * If the charset was autodetected, the caller must close both the input FileStream
172 * and the converter.
173 *
174 * @param fileName The file name to be opened and encoding autodected
175 * @param conv Output param to receive the opened converter if autodetected; NULL otherwise.
176 * @param cp Output param to receive the detected encoding
177 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
178 * indicates a failure on entry, the function will immediately return.
179 * On exit the value will indicate the success of the operation.
180 * @return The input FileStream if its charset was autodetected; NULL otherwise.
181 */
182 U_CAPI FileStream * U_EXPORT2
183 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv,
184 int32_t* signatureLength, UErrorCode* status);
185
186 /**
187 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
188 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
189 * the converter to correct state for converting the rest of the stream. So the UConverter parameter
190 * is necessary.
191 * If the charset was autodetected, the caller must close the converter.
192 *
193 * @param fileStream The file stream whose encoding is to be detected
194 * @param conv Output param to receive the opened converter if autodetected; NULL otherwise.
195 * @param cp Output param to receive the detected encoding
196 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
197 * indicates a failure on entry, the function will immediately return.
198 * On exit the value will indicate the success of the operation.
199 * @return Boolean whether the Unicode charset was autodetected.
200 */
201
202 U_CAPI UBool U_EXPORT2
203 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status);
204
205 /**
206 * Returns the approximate size in UChars required for converting the file to UChars
207 */
208 U_CAPI int32_t U_EXPORT2
209 ucbuf_size(UCHARBUF* buf);
210
211 U_CAPI const char* U_EXPORT2
212 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status);
213
214 #endif
215 #endif
216