]>
git.saurik.com Git - apple/icu.git/blob - icuSources/tools/toolutil/ucbuf.h
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 1998-2016, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
13 * Modification History:
15 * Date Name Description
16 * 05/10/01 Ram Creation.
18 * This API reads in files and returns UChars
19 *******************************************************************************
22 #include "unicode/localpointer.h"
23 #include "unicode/ucnv.h"
26 #if !UCONFIG_NO_CONVERSION
31 typedef struct UCHARBUF UCHARBUF
;
35 #define U_EOF 0xFFFFFFFF
37 * Error value if a sequence cannot be unescaped
39 #define U_ERR 0xFFFFFFFE
41 typedef struct ULine ULine
;
49 * Opens the UCHARBUF with the given file stream and code page for conversion
50 * @param fileName Name of the file to open.
51 * @param codepage The encoding of the file stream to convert to Unicode.
52 * If *codepoge is NULL on input the API will try to autodetect
53 * popular Unicode encodings
54 * @param showWarning Flag to print out warnings to STDOUT
55 * @param buffered If TRUE performs a buffered read of the input file. If FALSE reads
56 * the whole file into memory and converts it.
57 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
58 * indicates a failure on entry, the function will immediately return.
59 * On exit the value will indicate the success of the operation.
60 * @return pointer to the newly opened UCHARBUF
62 U_CAPI UCHARBUF
* U_EXPORT2
63 ucbuf_open(const char* fileName
,const char** codepage
,UBool showWarning
, UBool buffered
, UErrorCode
* err
);
66 * Gets a UTF-16 code unit at the current position from the converted buffer
67 * and increments the current position
68 * @param buf Pointer to UCHARBUF structure
69 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
70 * indicates a failure on entry, the function will immediately return.
71 * On exit the value will indicate the success of the operation.
73 U_CAPI
int32_t U_EXPORT2
74 ucbuf_getc(UCHARBUF
* buf
,UErrorCode
* err
);
77 * Gets a UTF-32 code point at the current position from the converted buffer
78 * and increments the current position
79 * @param buf Pointer to UCHARBUF structure
80 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
81 * indicates a failure on entry, the function will immediately return.
82 * On exit the value will indicate the success of the operation.
84 U_CAPI
int32_t U_EXPORT2
85 ucbuf_getc32(UCHARBUF
* buf
,UErrorCode
* err
);
88 * Gets a UTF-16 code unit at the current position from the converted buffer after
89 * unescaping and increments the current position. If the escape sequence is for UTF-32
90 * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned
91 * @param buf Pointer to UCHARBUF structure
92 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
93 * indicates a failure on entry, the function will immediately return.
94 * On exit the value will indicate the success of the operation.
96 U_CAPI
int32_t U_EXPORT2
97 ucbuf_getcx32(UCHARBUF
* buf
,UErrorCode
* err
);
100 * Gets a pointer to the current position in the internal buffer and length of the line.
101 * It imperative to make a copy of the returned buffer before performing operations on it.
102 * @param buf Pointer to UCHARBUF structure
103 * @param len Output param to receive the len of the buffer returned till end of the line
104 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
105 * indicates a failure on entry, the function will immediately return.
106 * On exit the value will indicate the success of the operation.
107 * Error: U_TRUNCATED_CHAR_FOUND
108 * @return Pointer to the internal buffer, NULL if EOF
110 U_CAPI
const UChar
* U_EXPORT2
111 ucbuf_readline(UCHARBUF
* buf
,int32_t* len
, UErrorCode
* err
);
115 * Resets the buffers and the underlying file stream.
116 * @param buf Pointer to UCHARBUF structure
117 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
118 * indicates a failure on entry, the function will immediately return.
119 * On exit the value will indicate the success of the operation.
121 U_CAPI
void U_EXPORT2
122 ucbuf_rewind(UCHARBUF
* buf
,UErrorCode
* err
);
125 * Returns a pointer to the internal converted buffer
126 * @param buf Pointer to UCHARBUF structure
127 * @param len Pointer to int32_t to receive the lenth of buffer
128 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
129 * indicates a failure on entry, the function will immediately return.
130 * On exit the value will indicate the success of the operation.
131 * @return Pointer to internal UChar buffer
133 U_CAPI
const UChar
* U_EXPORT2
134 ucbuf_getBuffer(UCHARBUF
* buf
,int32_t* len
,UErrorCode
* err
);
137 * Closes the UCHARBUF structure members and cleans up the malloc'ed memory
138 * @param buf Pointer to UCHARBUF structure
140 U_CAPI
void U_EXPORT2
141 ucbuf_close(UCHARBUF
* buf
);
143 #if U_SHOW_CPLUSPLUS_API
148 * \class LocalUCHARBUFPointer
149 * "Smart pointer" class, closes a UCHARBUF via ucbuf_close().
150 * For most methods see the LocalPointerBase base class.
152 * @see LocalPointerBase
155 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCHARBUFPointer
, UCHARBUF
, ucbuf_close
);
162 * Rewinds the buffer by one codepoint. Does not rewind over escaped characters.
164 U_CAPI
void U_EXPORT2
165 ucbuf_ungetc(int32_t ungetChar
,UCHARBUF
* buf
);
169 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
170 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
171 * the converter to correct state for converting the rest of the stream. So the UConverter parameter
173 * If the charset was autodetected, the caller must close both the input FileStream
176 * @param fileName The file name to be opened and encoding autodected
177 * @param conv Output param to receive the opened converter if autodetected; NULL otherwise.
178 * @param cp Output param to receive the detected encoding
179 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
180 * indicates a failure on entry, the function will immediately return.
181 * On exit the value will indicate the success of the operation.
182 * @return The input FileStream if its charset was autodetected; NULL otherwise.
184 U_CAPI FileStream
* U_EXPORT2
185 ucbuf_autodetect(const char* fileName
, const char** cp
,UConverter
** conv
,
186 int32_t* signatureLength
, UErrorCode
* status
);
189 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
190 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
191 * the converter to correct state for converting the rest of the stream. So the UConverter parameter
193 * If the charset was autodetected, the caller must close the converter.
195 * @param fileStream The file stream whose encoding is to be detected
196 * @param conv Output param to receive the opened converter if autodetected; NULL otherwise.
197 * @param cp Output param to receive the detected encoding
198 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
199 * indicates a failure on entry, the function will immediately return.
200 * On exit the value will indicate the success of the operation.
201 * @return Boolean whether the Unicode charset was autodetected.
204 U_CAPI UBool U_EXPORT2
205 ucbuf_autodetect_fs(FileStream
* in
, const char** cp
, UConverter
** conv
, int32_t* signatureLength
, UErrorCode
* status
);
208 * Returns the approximate size in UChars required for converting the file to UChars
210 U_CAPI
int32_t U_EXPORT2
211 ucbuf_size(UCHARBUF
* buf
);
213 U_CAPI
const char* U_EXPORT2
214 ucbuf_resolveFileName(const char* inputDir
, const char* fileName
, char* target
, int32_t* len
, UErrorCode
* status
);