]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/toolutil/ucbuf.h
ICU-66108.tar.gz
[apple/icu.git] / icuSources / tools / toolutil / ucbuf.h
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4*******************************************************************************
5*
2ca993e8 6* Copyright (C) 1998-2016, International Business Machines
b75a7d8f
A
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10*
2ca993e8 11* File ucbuf.h
b75a7d8f
A
12*
13* Modification History:
14*
15* Date Name Description
16* 05/10/01 Ram Creation.
17*
18* This API reads in files and returns UChars
19*******************************************************************************
20*/
21
2ca993e8 22#include "unicode/localpointer.h"
b75a7d8f
A
23#include "unicode/ucnv.h"
24#include "filestrm.h"
73c04bcf
A
25
26#if !UCONFIG_NO_CONVERSION
b75a7d8f
A
27
28#ifndef UCBUF_H
29#define UCBUF_H 1
30
31typedef struct UCHARBUF UCHARBUF;
32/**
33 * End of file value
34 */
340931cb 35#define U_EOF ((int32_t)0xFFFFFFFF)
b75a7d8f
A
36/**
37 * Error value if a sequence cannot be unescaped
38 */
340931cb 39#define U_ERR ((int32_t)0xFFFFFFFE)
b75a7d8f
A
40
41typedef struct ULine ULine;
42
43struct ULine {
44 UChar *name;
45 int32_t len;
46};
47
48/**
49 * Opens the UCHARBUF with the given file stream and code page for conversion
50 * @param fileName Name of the file to open.
2ca993e8 51 * @param codepage The encoding of the file stream to convert to Unicode.
b75a7d8f
A
52 * If *codepoge is NULL on input the API will try to autodetect
53 * popular Unicode encodings
54 * @param showWarning Flag to print out warnings to STDOUT
2ca993e8 55 * @param buffered If TRUE performs a buffered read of the input file. If FALSE reads
b75a7d8f
A
56 * the whole file into memory and converts it.
57 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
58 * indicates a failure on entry, the function will immediately return.
59 * On exit the value will indicate the success of the operation.
60 * @return pointer to the newly opened UCHARBUF
61 */
62U_CAPI UCHARBUF* U_EXPORT2
63ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err);
64
65/**
66 * Gets a UTF-16 code unit at the current position from the converted buffer
67 * and increments the current position
68 * @param buf Pointer to UCHARBUF structure
69 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
70 * indicates a failure on entry, the function will immediately return.
71 * On exit the value will indicate the success of the operation.
72 */
73U_CAPI int32_t U_EXPORT2
74ucbuf_getc(UCHARBUF* buf,UErrorCode* err);
75
76/**
77 * Gets a UTF-32 code point at the current position from the converted buffer
78 * and increments the current position
79 * @param buf Pointer to UCHARBUF structure
80 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
81 * indicates a failure on entry, the function will immediately return.
82 * On exit the value will indicate the success of the operation.
83 */
84U_CAPI int32_t U_EXPORT2
85ucbuf_getc32(UCHARBUF* buf,UErrorCode* err);
86
87/**
2ca993e8 88 * Gets a UTF-16 code unit at the current position from the converted buffer after
b75a7d8f
A
89 * unescaping and increments the current position. If the escape sequence is for UTF-32
90 * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned
91 * @param buf Pointer to UCHARBUF structure
92 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
93 * indicates a failure on entry, the function will immediately return.
94 * On exit the value will indicate the success of the operation.
95 */
96U_CAPI int32_t U_EXPORT2
97ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err);
98
99/**
100 * Gets a pointer to the current position in the internal buffer and length of the line.
2ca993e8 101 * It imperative to make a copy of the returned buffer before performing operations on it.
b75a7d8f
A
102 * @param buf Pointer to UCHARBUF structure
103 * @param len Output param to receive the len of the buffer returned till end of the line
104 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
105 * indicates a failure on entry, the function will immediately return.
106 * On exit the value will indicate the success of the operation.
107 * Error: U_TRUNCATED_CHAR_FOUND
374ca955 108 * @return Pointer to the internal buffer, NULL if EOF
b75a7d8f
A
109 */
110U_CAPI const UChar* U_EXPORT2
111ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err);
112
113
114/**
115 * Resets the buffers and the underlying file stream.
116 * @param buf Pointer to UCHARBUF structure
117 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
118 * indicates a failure on entry, the function will immediately return.
119 * On exit the value will indicate the success of the operation.
120 */
121U_CAPI void U_EXPORT2
122ucbuf_rewind(UCHARBUF* buf,UErrorCode* err);
123
124/**
125 * Returns a pointer to the internal converted buffer
126 * @param buf Pointer to UCHARBUF structure
127 * @param len Pointer to int32_t to receive the lenth of buffer
128 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
129 * indicates a failure on entry, the function will immediately return.
130 * On exit the value will indicate the success of the operation.
131 * @return Pointer to internal UChar buffer
132 */
133U_CAPI const UChar* U_EXPORT2
134ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err);
135
136/**
137 * Closes the UCHARBUF structure members and cleans up the malloc'ed memory
138 * @param buf Pointer to UCHARBUF structure
139 */
140U_CAPI void U_EXPORT2
141ucbuf_close(UCHARBUF* buf);
142
2ca993e8
A
143#if U_SHOW_CPLUSPLUS_API
144
145U_NAMESPACE_BEGIN
146
147/**
148 * \class LocalUCHARBUFPointer
149 * "Smart pointer" class, closes a UCHARBUF via ucbuf_close().
150 * For most methods see the LocalPointerBase base class.
151 *
152 * @see LocalPointerBase
153 * @see LocalPointer
154 */
155U_DEFINE_LOCAL_OPEN_POINTER(LocalUCHARBUFPointer, UCHARBUF, ucbuf_close);
156
157U_NAMESPACE_END
158
159#endif
160
b75a7d8f 161/**
46f4442e 162 * Rewinds the buffer by one codepoint. Does not rewind over escaped characters.
b75a7d8f
A
163 */
164U_CAPI void U_EXPORT2
165ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf);
166
167
168/**
2ca993e8 169 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
b75a7d8f
A
170 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
171 * the converter to correct state for converting the rest of the stream. So the UConverter parameter
172 * is necessary.
173 * If the charset was autodetected, the caller must close both the input FileStream
174 * and the converter.
175 *
176 * @param fileName The file name to be opened and encoding autodected
177 * @param conv Output param to receive the opened converter if autodetected; NULL otherwise.
178 * @param cp Output param to receive the detected encoding
179 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
180 * indicates a failure on entry, the function will immediately return.
181 * On exit the value will indicate the success of the operation.
182 * @return The input FileStream if its charset was autodetected; NULL otherwise.
183 */
184U_CAPI FileStream * U_EXPORT2
185ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv,
186int32_t* signatureLength, UErrorCode* status);
187
188/**
2ca993e8 189 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
b75a7d8f
A
190 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
191 * the converter to correct state for converting the rest of the stream. So the UConverter parameter
192 * is necessary.
193 * If the charset was autodetected, the caller must close the converter.
194 *
195 * @param fileStream The file stream whose encoding is to be detected
196 * @param conv Output param to receive the opened converter if autodetected; NULL otherwise.
197 * @param cp Output param to receive the detected encoding
198 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
199 * indicates a failure on entry, the function will immediately return.
200 * On exit the value will indicate the success of the operation.
201 * @return Boolean whether the Unicode charset was autodetected.
202 */
203
204U_CAPI UBool U_EXPORT2
205ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status);
206
207/**
208 * Returns the approximate size in UChars required for converting the file to UChars
209 */
210U_CAPI int32_t U_EXPORT2
211ucbuf_size(UCHARBUF* buf);
212
213U_CAPI const char* U_EXPORT2
214ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status);
215
216#endif
73c04bcf
A
217#endif
218