]>
git.saurik.com Git - apple/icu.git/blob - icuSources/tools/toolutil/ucbuf.h
2 *******************************************************************************
4 * Copyright (C) 1998-2005, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 05/10/01 Ram Creation.
16 * This API reads in files and returns UChars
17 *******************************************************************************
20 #include "unicode/ucnv.h"
23 #if !UCONFIG_NO_CONVERSION
28 typedef struct UCHARBUF UCHARBUF
;
32 #define U_EOF 0xFFFFFFFF
34 * Error value if a sequence cannot be unescaped
36 #define U_ERR 0xFFFFFFFE
38 typedef struct ULine ULine
;
46 * Opens the UCHARBUF with the given file stream and code page for conversion
47 * @param fileName Name of the file to open.
48 * @param codepage The encoding of the file stream to convert to Unicode.
49 * If *codepoge is NULL on input the API will try to autodetect
50 * popular Unicode encodings
51 * @param showWarning Flag to print out warnings to STDOUT
52 * @param buffered If TRUE performs a buffered read of the input file. If FALSE reads
53 * the whole file into memory and converts it.
54 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
55 * indicates a failure on entry, the function will immediately return.
56 * On exit the value will indicate the success of the operation.
57 * @return pointer to the newly opened UCHARBUF
59 U_CAPI UCHARBUF
* U_EXPORT2
60 ucbuf_open(const char* fileName
,const char** codepage
,UBool showWarning
, UBool buffered
, UErrorCode
* err
);
63 * Gets a UTF-16 code unit at the current position from the converted buffer
64 * and increments the current position
65 * @param buf Pointer to UCHARBUF structure
66 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
67 * indicates a failure on entry, the function will immediately return.
68 * On exit the value will indicate the success of the operation.
70 U_CAPI
int32_t U_EXPORT2
71 ucbuf_getc(UCHARBUF
* buf
,UErrorCode
* err
);
74 * Gets a UTF-32 code point at the current position from the converted buffer
75 * and increments the current position
76 * @param buf Pointer to UCHARBUF structure
77 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
78 * indicates a failure on entry, the function will immediately return.
79 * On exit the value will indicate the success of the operation.
81 U_CAPI
int32_t U_EXPORT2
82 ucbuf_getc32(UCHARBUF
* buf
,UErrorCode
* err
);
85 * Gets a UTF-16 code unit at the current position from the converted buffer after
86 * unescaping and increments the current position. If the escape sequence is for UTF-32
87 * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned
88 * @param buf Pointer to UCHARBUF structure
89 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
90 * indicates a failure on entry, the function will immediately return.
91 * On exit the value will indicate the success of the operation.
93 U_CAPI
int32_t U_EXPORT2
94 ucbuf_getcx32(UCHARBUF
* buf
,UErrorCode
* err
);
97 * Gets a pointer to the current position in the internal buffer and length of the line.
98 * It imperative to make a copy of the returned buffere before performing operations on it.
99 * @param buf Pointer to UCHARBUF structure
100 * @param len Output param to receive the len of the buffer returned till end of the line
101 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
102 * indicates a failure on entry, the function will immediately return.
103 * On exit the value will indicate the success of the operation.
104 * Error: U_TRUNCATED_CHAR_FOUND
105 * @return Pointer to the internal buffer, NULL if EOF
107 U_CAPI
const UChar
* U_EXPORT2
108 ucbuf_readline(UCHARBUF
* buf
,int32_t* len
, UErrorCode
* err
);
112 * Resets the buffers and the underlying file stream.
113 * @param buf Pointer to UCHARBUF structure
114 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
115 * indicates a failure on entry, the function will immediately return.
116 * On exit the value will indicate the success of the operation.
118 U_CAPI
void U_EXPORT2
119 ucbuf_rewind(UCHARBUF
* buf
,UErrorCode
* err
);
122 * Returns a pointer to the internal converted buffer
123 * @param buf Pointer to UCHARBUF structure
124 * @param len Pointer to int32_t to receive the lenth of buffer
125 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
126 * indicates a failure on entry, the function will immediately return.
127 * On exit the value will indicate the success of the operation.
128 * @return Pointer to internal UChar buffer
130 U_CAPI
const UChar
* U_EXPORT2
131 ucbuf_getBuffer(UCHARBUF
* buf
,int32_t* len
,UErrorCode
* err
);
134 * Closes the UCHARBUF structure members and cleans up the malloc'ed memory
135 * @param buf Pointer to UCHARBUF structure
137 U_CAPI
void U_EXPORT2
138 ucbuf_close(UCHARBUF
* buf
);
141 * Rewinds the buffer by one codepoint
143 U_CAPI
void U_EXPORT2
144 ucbuf_ungetc(int32_t ungetChar
,UCHARBUF
* buf
);
148 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
149 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
150 * the converter to correct state for converting the rest of the stream. So the UConverter parameter
152 * If the charset was autodetected, the caller must close both the input FileStream
155 * @param fileName The file name to be opened and encoding autodected
156 * @param conv Output param to receive the opened converter if autodetected; NULL otherwise.
157 * @param cp Output param to receive the detected encoding
158 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
159 * indicates a failure on entry, the function will immediately return.
160 * On exit the value will indicate the success of the operation.
161 * @return The input FileStream if its charset was autodetected; NULL otherwise.
163 U_CAPI FileStream
* U_EXPORT2
164 ucbuf_autodetect(const char* fileName
, const char** cp
,UConverter
** conv
,
165 int32_t* signatureLength
, UErrorCode
* status
);
168 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
169 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
170 * the converter to correct state for converting the rest of the stream. So the UConverter parameter
172 * If the charset was autodetected, the caller must close the converter.
174 * @param fileStream The file stream whose encoding is to be detected
175 * @param conv Output param to receive the opened converter if autodetected; NULL otherwise.
176 * @param cp Output param to receive the detected encoding
177 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
178 * indicates a failure on entry, the function will immediately return.
179 * On exit the value will indicate the success of the operation.
180 * @return Boolean whether the Unicode charset was autodetected.
183 U_CAPI UBool U_EXPORT2
184 ucbuf_autodetect_fs(FileStream
* in
, const char** cp
, UConverter
** conv
, int32_t* signatureLength
, UErrorCode
* status
);
187 * Returns the approximate size in UChars required for converting the file to UChars
189 U_CAPI
int32_t U_EXPORT2
190 ucbuf_size(UCHARBUF
* buf
);
192 U_CAPI
const char* U_EXPORT2
193 ucbuf_resolveFileName(const char* inputDir
, const char* fileName
, char* target
, int32_t* len
, UErrorCode
* status
);