]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
4 | * Copyright (C) 1998-2001, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ******************************************************************************* | |
8 | * | |
9 | * File ucbuf.c | |
10 | * | |
11 | * Modification History: | |
12 | * | |
13 | * Date Name Description | |
14 | * 05/10/01 Ram Creation. | |
15 | * | |
16 | * This API reads in files and returns UChars | |
17 | ******************************************************************************* | |
18 | */ | |
19 | ||
20 | #include "unicode/utypes.h" | |
21 | #include "unicode/ucnv.h" | |
22 | #include "filestrm.h" | |
23 | #include "cmemory.h" | |
24 | #include <stdio.h> | |
25 | ||
26 | #ifndef UCBUF_H | |
27 | #define UCBUF_H 1 | |
28 | ||
29 | typedef struct UCHARBUF UCHARBUF; | |
30 | /** | |
31 | * End of file value | |
32 | */ | |
33 | #define U_EOF 0xFFFFFFFF | |
34 | /** | |
35 | * Error value if a sequence cannot be unescaped | |
36 | */ | |
37 | #define U_ERR 0xFFFFFFFE | |
38 | ||
39 | typedef struct ULine ULine; | |
40 | ||
41 | struct ULine { | |
42 | UChar *name; | |
43 | int32_t len; | |
44 | }; | |
45 | ||
46 | /** | |
47 | * Opens the UCHARBUF with the given file stream and code page for conversion | |
48 | * @param fileName Name of the file to open. | |
49 | * @param codepage The encoding of the file stream to convert to Unicode. | |
50 | * If *codepoge is NULL on input the API will try to autodetect | |
51 | * popular Unicode encodings | |
52 | * @param showWarning Flag to print out warnings to STDOUT | |
53 | * @param buffered If TRUE performs a buffered read of the input file. If FALSE reads | |
54 | * the whole file into memory and converts it. | |
55 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
56 | * indicates a failure on entry, the function will immediately return. | |
57 | * On exit the value will indicate the success of the operation. | |
58 | * @return pointer to the newly opened UCHARBUF | |
59 | */ | |
60 | U_CAPI UCHARBUF* U_EXPORT2 | |
61 | ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err); | |
62 | ||
63 | /** | |
64 | * Gets a UTF-16 code unit at the current position from the converted buffer | |
65 | * and increments the current position | |
66 | * @param buf Pointer to UCHARBUF structure | |
67 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
68 | * indicates a failure on entry, the function will immediately return. | |
69 | * On exit the value will indicate the success of the operation. | |
70 | */ | |
71 | U_CAPI int32_t U_EXPORT2 | |
72 | ucbuf_getc(UCHARBUF* buf,UErrorCode* err); | |
73 | ||
74 | /** | |
75 | * Gets a UTF-32 code point at the current position from the converted buffer | |
76 | * and increments the current position | |
77 | * @param buf Pointer to UCHARBUF structure | |
78 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
79 | * indicates a failure on entry, the function will immediately return. | |
80 | * On exit the value will indicate the success of the operation. | |
81 | */ | |
82 | U_CAPI int32_t U_EXPORT2 | |
83 | ucbuf_getc32(UCHARBUF* buf,UErrorCode* err); | |
84 | ||
85 | /** | |
86 | * Gets a UTF-16 code unit at the current position from the converted buffer after | |
87 | * unescaping and increments the current position. If the escape sequence is for UTF-32 | |
88 | * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned | |
89 | * @param buf Pointer to UCHARBUF structure | |
90 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
91 | * indicates a failure on entry, the function will immediately return. | |
92 | * On exit the value will indicate the success of the operation. | |
93 | */ | |
94 | U_CAPI int32_t U_EXPORT2 | |
95 | ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err); | |
96 | ||
97 | /** | |
98 | * Gets a pointer to the current position in the internal buffer and length of the line. | |
99 | * It imperative to make a copy of the returned buffere before performing operations on it. | |
100 | * @param buf Pointer to UCHARBUF structure | |
101 | * @param len Output param to receive the len of the buffer returned till end of the line | |
102 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
103 | * indicates a failure on entry, the function will immediately return. | |
104 | * On exit the value will indicate the success of the operation. | |
105 | * Error: U_TRUNCATED_CHAR_FOUND | |
374ca955 | 106 | * @return Pointer to the internal buffer, NULL if EOF |
b75a7d8f A |
107 | */ |
108 | U_CAPI const UChar* U_EXPORT2 | |
109 | ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err); | |
110 | ||
111 | ||
112 | /** | |
113 | * Resets the buffers and the underlying file stream. | |
114 | * @param buf Pointer to UCHARBUF structure | |
115 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
116 | * indicates a failure on entry, the function will immediately return. | |
117 | * On exit the value will indicate the success of the operation. | |
118 | */ | |
119 | U_CAPI void U_EXPORT2 | |
120 | ucbuf_rewind(UCHARBUF* buf,UErrorCode* err); | |
121 | ||
122 | /** | |
123 | * Returns a pointer to the internal converted buffer | |
124 | * @param buf Pointer to UCHARBUF structure | |
125 | * @param len Pointer to int32_t to receive the lenth of buffer | |
126 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
127 | * indicates a failure on entry, the function will immediately return. | |
128 | * On exit the value will indicate the success of the operation. | |
129 | * @return Pointer to internal UChar buffer | |
130 | */ | |
131 | U_CAPI const UChar* U_EXPORT2 | |
132 | ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err); | |
133 | ||
134 | /** | |
135 | * Closes the UCHARBUF structure members and cleans up the malloc'ed memory | |
136 | * @param buf Pointer to UCHARBUF structure | |
137 | */ | |
138 | U_CAPI void U_EXPORT2 | |
139 | ucbuf_close(UCHARBUF* buf); | |
140 | ||
141 | /** | |
142 | * Rewinds the buffer by one codepoint | |
143 | */ | |
144 | U_CAPI void U_EXPORT2 | |
145 | ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf); | |
146 | ||
147 | ||
148 | /** | |
149 | * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. | |
150 | * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring | |
151 | * the converter to correct state for converting the rest of the stream. So the UConverter parameter | |
152 | * is necessary. | |
153 | * If the charset was autodetected, the caller must close both the input FileStream | |
154 | * and the converter. | |
155 | * | |
156 | * @param fileName The file name to be opened and encoding autodected | |
157 | * @param conv Output param to receive the opened converter if autodetected; NULL otherwise. | |
158 | * @param cp Output param to receive the detected encoding | |
159 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
160 | * indicates a failure on entry, the function will immediately return. | |
161 | * On exit the value will indicate the success of the operation. | |
162 | * @return The input FileStream if its charset was autodetected; NULL otherwise. | |
163 | */ | |
164 | U_CAPI FileStream * U_EXPORT2 | |
165 | ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, | |
166 | int32_t* signatureLength, UErrorCode* status); | |
167 | ||
168 | /** | |
169 | * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. | |
170 | * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring | |
171 | * the converter to correct state for converting the rest of the stream. So the UConverter parameter | |
172 | * is necessary. | |
173 | * If the charset was autodetected, the caller must close the converter. | |
174 | * | |
175 | * @param fileStream The file stream whose encoding is to be detected | |
176 | * @param conv Output param to receive the opened converter if autodetected; NULL otherwise. | |
177 | * @param cp Output param to receive the detected encoding | |
178 | * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value | |
179 | * indicates a failure on entry, the function will immediately return. | |
180 | * On exit the value will indicate the success of the operation. | |
181 | * @return Boolean whether the Unicode charset was autodetected. | |
182 | */ | |
183 | ||
184 | U_CAPI UBool U_EXPORT2 | |
185 | ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status); | |
186 | ||
187 | /** | |
188 | * Returns the approximate size in UChars required for converting the file to UChars | |
189 | */ | |
190 | U_CAPI int32_t U_EXPORT2 | |
191 | ucbuf_size(UCHARBUF* buf); | |
192 | ||
193 | U_CAPI const char* U_EXPORT2 | |
194 | ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status); | |
195 | ||
196 | #endif |