]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/toolutil/uparse.h
ICU-62107.0.1.tar.gz
[apple/icu.git] / icuSources / tools / toolutil / uparse.h
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4*******************************************************************************
5*
729e4ab9 6* Copyright (C) 2000-2010, International Business Machines
b75a7d8f
A
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: uparse.h
f3c0d7a5 11* encoding: UTF-8
b75a7d8f
A
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2000apr18
16* created by: Markus W. Scherer
17*
18* This file provides a parser for files that are delimited by one single
19* character like ';' or TAB. Example: the Unicode Character Properties files
20* like UnicodeData.txt are semicolon-delimited.
21*/
22
23#ifndef __UPARSE_H__
24#define __UPARSE_H__
25
26#include "unicode/utypes.h"
27
729e4ab9
A
28/**
29 * Is c an invariant-character whitespace?
30 * @param c invariant character
31 */
32#define U_IS_INV_WHITESPACE(c) ((c)==' ' || (c)=='\t' || (c)=='\r' || (c)=='\n')
33
b75a7d8f
A
34U_CDECL_BEGIN
35
36/**
37 * Skip space ' ' and TAB '\t' characters.
38 *
39 * @param s Pointer to characters.
40 * @return Pointer to first character at or after s that is not a space or TAB.
41 */
42U_CAPI const char * U_EXPORT2
43u_skipWhitespace(const char *s);
44
729e4ab9
A
45/**
46 * Trim whitespace (including line endings) from the end of the string.
47 *
48 * @param s Pointer to the string.
49 * @return Pointer to the new end of the string.
50 */
51U_CAPI char * U_EXPORT2
52u_rtrim(char *s);
53
b75a7d8f
A
54/** Function type for u_parseDelimitedFile(). */
55typedef void U_CALLCONV
56UParseLineFn(void *context,
57 char *fields[][2],
58 int32_t fieldCount,
59 UErrorCode *pErrorCode);
60
61/**
62 * Parser for files that are similar to UnicodeData.txt:
63 * This function opens the file and reads it line by line. It skips empty lines
64 * and comment lines that start with a '#'.
65 * All other lines are separated into fields with one delimiter character
66 * (semicolon for Unicode Properties files) between two fields. The last field in
67 * a line does not need to be terminated with a delimiter.
68 *
69 * For each line, after segmenting it, a line function is called.
70 * It gets passed the array of field start and limit pointers that is
71 * passed into this parser and filled by it for each line.
72 * For each field i of the line, the start pointer in fields[i][0]
73 * points to the beginning of the field, while the limit pointer in fields[i][1]
74 * points behind the field, i.e., to the delimiter or the line end.
75 *
76 * The context parameter of the line function is
77 * the same as the one for the parse function.
78 *
79 * The line function may modify the contents of the fields including the
80 * limit characters.
81 *
82 * If the file cannot be opened, or there is a parsing error or a field function
83 * sets *pErrorCode, then the parser returns with *pErrorCode set to an error code.
84 */
85U_CAPI void U_EXPORT2
86u_parseDelimitedFile(const char *filename, char delimiter,
87 char *fields[][2], int32_t fieldCount,
88 UParseLineFn *lineFn, void *context,
89 UErrorCode *pErrorCode);
90
91/**
92 * Parse a string of code points like 0061 0308 0300.
93 * s must end with either ';' or NUL.
94 *
95 * @return Number of code points.
96 */
97U_CAPI int32_t U_EXPORT2
98u_parseCodePoints(const char *s,
99 uint32_t *dest, int32_t destCapacity,
100 UErrorCode *pErrorCode);
101
102/**
103 * Parse a list of code points like 0061 0308 0300
104 * into a UChar * string.
105 * s must end with either ';' or NUL.
106 *
107 * Set the first code point in *pFirst.
108 *
109 * @param s Input char * string.
110 * @param dest Output string buffer.
111 * @param destCapacity Capacity of dest in numbers of UChars.
112 * @param pFirst If pFirst!=NULL the *pFirst will be set to the first
113 * code point in the string.
114 * @param pErrorCode ICU error code.
115 * @return The length of the string in numbers of UChars.
116 */
117U_CAPI int32_t U_EXPORT2
118u_parseString(const char *s,
119 UChar *dest, int32_t destCapacity,
120 uint32_t *pFirst,
121 UErrorCode *pErrorCode);
122
123/**
124 * Parse a code point range like
125 * 0085 or
126 * 4E00..9FA5.
127 *
128 * s must contain such a range and end with either ';' or NUL.
129 *
130 * @return Length of code point range, end-start+1
131 */
132U_CAPI int32_t U_EXPORT2
133u_parseCodePointRange(const char *s,
134 uint32_t *pStart, uint32_t *pEnd,
135 UErrorCode *pErrorCode);
136
729e4ab9
A
137/**
138 * Same as u_parseCodePointRange() but the range may be terminated by
139 * any character. The position of the terminating character is returned via
140 * the *terminator output parameter.
141 */
142U_CAPI int32_t U_EXPORT2
143u_parseCodePointRangeAnyTerminator(const char *s,
144 uint32_t *pStart, uint32_t *pEnd,
145 const char **terminator,
146 UErrorCode *pErrorCode);
b75a7d8f 147
b75a7d8f
A
148U_CAPI int32_t U_EXPORT2
149u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status);
150
151U_CDECL_END
152
153#endif