]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | ******************************************************************************* | |
3 | * | |
4 | * Copyright (C) 1998-2005, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ******************************************************************************* | |
8 | * | |
9 | * File ucbuf.c | |
10 | * | |
11 | * Modification History: | |
12 | * | |
13 | * Date Name Description | |
14 | * 05/10/01 Ram Creation. | |
15 | * | |
16 | * This API reads in files and returns UChars | |
17 | ******************************************************************************* | |
18 | */ | |
19 | ||
20 | #include "unicode/ucnv.h" | |
21 | #include "filestrm.h" | |
22 | ||
23 | #if !UCONFIG_NO_CONVERSION | |
24 | ||
25 | #ifndef UCBUF_H | |
26 | #define UCBUF_H 1 | |
27 | ||
28 | typedef struct UCHARBUF UCHARBUF; | |
29 | /** | |
30 | * End of file value | |
31 | */ | |
32 | #define U_EOF 0xFFFFFFFF | |
33 | /** | |
34 | * Error value if a sequence cannot be unescaped | |
35 | */ | |
36 | #define U_ERR 0xFFFFFFFE | |
37 | ||
38 | typedef struct ULine ULine; | |
39 | ||
40 | struct ULine { | |
41 | UChar *name; | |
42 | int32_t len; | |
43 | }; | |
44 | ||
45 | /** | |
46 | * Opens the UCHARBUF with the given file stream and code page for conversion | |
47 | * @param fileName Name of the file to open. | |
48 | * @param codepage The encoding of the file stream to convert to Unicode. | |
49 | * If *codepoge is NULL on input the API will try to autodetect | |
50 | * popular Unicode encodings | |
51 | * @param showWarning Flag to print out warnings to STDOUT | |
52 | * @param buffered If TRUE performs a buffered read of the input file. If FALSE reads | |
53 | * the whole file into memory and converts it. | |
54 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
55 | * indicates a failure on entry, the function will immediately return. | |
56 | * On exit the value will indicate the success of the operation. | |
57 | * @return pointer to the newly opened UCHARBUF | |
58 | */ | |
59 | U_CAPI UCHARBUF* U_EXPORT2 | |
60 | ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err); | |
61 | ||
62 | /** | |
63 | * Gets a UTF-16 code unit at the current position from the converted buffer | |
64 | * and increments the current position | |
65 | * @param buf Pointer to UCHARBUF structure | |
66 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
67 | * indicates a failure on entry, the function will immediately return. | |
68 | * On exit the value will indicate the success of the operation. | |
69 | */ | |
70 | U_CAPI int32_t U_EXPORT2 | |
71 | ucbuf_getc(UCHARBUF* buf,UErrorCode* err); | |
72 | ||
73 | /** | |
74 | * Gets a UTF-32 code point at the current position from the converted buffer | |
75 | * and increments the current position | |
76 | * @param buf Pointer to UCHARBUF structure | |
77 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
78 | * indicates a failure on entry, the function will immediately return. | |
79 | * On exit the value will indicate the success of the operation. | |
80 | */ | |
81 | U_CAPI int32_t U_EXPORT2 | |
82 | ucbuf_getc32(UCHARBUF* buf,UErrorCode* err); | |
83 | ||
84 | /** | |
85 | * Gets a UTF-16 code unit at the current position from the converted buffer after | |
86 | * unescaping and increments the current position. If the escape sequence is for UTF-32 | |
87 | * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned | |
88 | * @param buf Pointer to UCHARBUF structure | |
89 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
90 | * indicates a failure on entry, the function will immediately return. | |
91 | * On exit the value will indicate the success of the operation. | |
92 | */ | |
93 | U_CAPI int32_t U_EXPORT2 | |
94 | ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err); | |
95 | ||
96 | /** | |
97 | * Gets a pointer to the current position in the internal buffer and length of the line. | |
98 | * It imperative to make a copy of the returned buffere before performing operations on it. | |
99 | * @param buf Pointer to UCHARBUF structure | |
100 | * @param len Output param to receive the len of the buffer returned till end of the line | |
101 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
102 | * indicates a failure on entry, the function will immediately return. | |
103 | * On exit the value will indicate the success of the operation. | |
104 | * Error: U_TRUNCATED_CHAR_FOUND | |
105 | * @return Pointer to the internal buffer, NULL if EOF | |
106 | */ | |
107 | U_CAPI const UChar* U_EXPORT2 | |
108 | ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err); | |
109 | ||
110 | ||
111 | /** | |
112 | * Resets the buffers and the underlying file stream. | |
113 | * @param buf Pointer to UCHARBUF structure | |
114 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
115 | * indicates a failure on entry, the function will immediately return. | |
116 | * On exit the value will indicate the success of the operation. | |
117 | */ | |
118 | U_CAPI void U_EXPORT2 | |
119 | ucbuf_rewind(UCHARBUF* buf,UErrorCode* err); | |
120 | ||
121 | /** | |
122 | * Returns a pointer to the internal converted buffer | |
123 | * @param buf Pointer to UCHARBUF structure | |
124 | * @param len Pointer to int32_t to receive the lenth of buffer | |
125 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
126 | * indicates a failure on entry, the function will immediately return. | |
127 | * On exit the value will indicate the success of the operation. | |
128 | * @return Pointer to internal UChar buffer | |
129 | */ | |
130 | U_CAPI const UChar* U_EXPORT2 | |
131 | ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err); | |
132 | ||
133 | /** | |
134 | * Closes the UCHARBUF structure members and cleans up the malloc'ed memory | |
135 | * @param buf Pointer to UCHARBUF structure | |
136 | */ | |
137 | U_CAPI void U_EXPORT2 | |
138 | ucbuf_close(UCHARBUF* buf); | |
139 | ||
140 | /** | |
141 | * Rewinds the buffer by one codepoint | |
142 | */ | |
143 | U_CAPI void U_EXPORT2 | |
144 | ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf); | |
145 | ||
146 | ||
147 | /** | |
148 | * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. | |
149 | * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring | |
150 | * the converter to correct state for converting the rest of the stream. So the UConverter parameter | |
151 | * is necessary. | |
152 | * If the charset was autodetected, the caller must close both the input FileStream | |
153 | * and the converter. | |
154 | * | |
155 | * @param fileName The file name to be opened and encoding autodected | |
156 | * @param conv Output param to receive the opened converter if autodetected; NULL otherwise. | |
157 | * @param cp Output param to receive the detected encoding | |
158 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
159 | * indicates a failure on entry, the function will immediately return. | |
160 | * On exit the value will indicate the success of the operation. | |
161 | * @return The input FileStream if its charset was autodetected; NULL otherwise. | |
162 | */ | |
163 | U_CAPI FileStream * U_EXPORT2 | |
164 | ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, | |
165 | int32_t* signatureLength, UErrorCode* status); | |
166 | ||
167 | /** | |
168 | * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. | |
169 | * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring | |
170 | * the converter to correct state for converting the rest of the stream. So the UConverter parameter | |
171 | * is necessary. | |
172 | * If the charset was autodetected, the caller must close the converter. | |
173 | * | |
174 | * @param fileStream The file stream whose encoding is to be detected | |
175 | * @param conv Output param to receive the opened converter if autodetected; NULL otherwise. | |
176 | * @param cp Output param to receive the detected encoding | |
177 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
178 | * indicates a failure on entry, the function will immediately return. | |
179 | * On exit the value will indicate the success of the operation. | |
180 | * @return Boolean whether the Unicode charset was autodetected. | |
181 | */ | |
182 | ||
183 | U_CAPI UBool U_EXPORT2 | |
184 | ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status); | |
185 | ||
186 | /** | |
187 | * Returns the approximate size in UChars required for converting the file to UChars | |
188 | */ | |
189 | U_CAPI int32_t U_EXPORT2 | |
190 | ucbuf_size(UCHARBUF* buf); | |
191 | ||
192 | U_CAPI const char* U_EXPORT2 | |
193 | ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status); | |
194 | ||
195 | #endif | |
196 | #endif | |
197 |