]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/toolutil/uparse.h
ICU-400.42.tar.gz
[apple/icu.git] / icuSources / tools / toolutil / uparse.h
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
3*
374ca955 4* Copyright (C) 2000-2004, International Business Machines
b75a7d8f
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8* file name: uparse.h
9* encoding: US-ASCII
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2000apr18
14* created by: Markus W. Scherer
15*
16* This file provides a parser for files that are delimited by one single
17* character like ';' or TAB. Example: the Unicode Character Properties files
18* like UnicodeData.txt are semicolon-delimited.
19*/
20
21#ifndef __UPARSE_H__
22#define __UPARSE_H__
23
24#include "unicode/utypes.h"
25
26U_CDECL_BEGIN
27
28/**
29 * Skip space ' ' and TAB '\t' characters.
30 *
31 * @param s Pointer to characters.
32 * @return Pointer to first character at or after s that is not a space or TAB.
33 */
34U_CAPI const char * U_EXPORT2
35u_skipWhitespace(const char *s);
36
37/** Function type for u_parseDelimitedFile(). */
38typedef void U_CALLCONV
39UParseLineFn(void *context,
40 char *fields[][2],
41 int32_t fieldCount,
42 UErrorCode *pErrorCode);
43
44/**
45 * Parser for files that are similar to UnicodeData.txt:
46 * This function opens the file and reads it line by line. It skips empty lines
47 * and comment lines that start with a '#'.
48 * All other lines are separated into fields with one delimiter character
49 * (semicolon for Unicode Properties files) between two fields. The last field in
50 * a line does not need to be terminated with a delimiter.
51 *
52 * For each line, after segmenting it, a line function is called.
53 * It gets passed the array of field start and limit pointers that is
54 * passed into this parser and filled by it for each line.
55 * For each field i of the line, the start pointer in fields[i][0]
56 * points to the beginning of the field, while the limit pointer in fields[i][1]
57 * points behind the field, i.e., to the delimiter or the line end.
58 *
59 * The context parameter of the line function is
60 * the same as the one for the parse function.
61 *
62 * The line function may modify the contents of the fields including the
63 * limit characters.
64 *
65 * If the file cannot be opened, or there is a parsing error or a field function
66 * sets *pErrorCode, then the parser returns with *pErrorCode set to an error code.
67 */
68U_CAPI void U_EXPORT2
69u_parseDelimitedFile(const char *filename, char delimiter,
70 char *fields[][2], int32_t fieldCount,
71 UParseLineFn *lineFn, void *context,
72 UErrorCode *pErrorCode);
73
74/**
75 * Parse a string of code points like 0061 0308 0300.
76 * s must end with either ';' or NUL.
77 *
78 * @return Number of code points.
79 */
80U_CAPI int32_t U_EXPORT2
81u_parseCodePoints(const char *s,
82 uint32_t *dest, int32_t destCapacity,
83 UErrorCode *pErrorCode);
84
85/**
86 * Parse a list of code points like 0061 0308 0300
87 * into a UChar * string.
88 * s must end with either ';' or NUL.
89 *
90 * Set the first code point in *pFirst.
91 *
92 * @param s Input char * string.
93 * @param dest Output string buffer.
94 * @param destCapacity Capacity of dest in numbers of UChars.
95 * @param pFirst If pFirst!=NULL the *pFirst will be set to the first
96 * code point in the string.
97 * @param pErrorCode ICU error code.
98 * @return The length of the string in numbers of UChars.
99 */
100U_CAPI int32_t U_EXPORT2
101u_parseString(const char *s,
102 UChar *dest, int32_t destCapacity,
103 uint32_t *pFirst,
104 UErrorCode *pErrorCode);
105
106/**
107 * Parse a code point range like
108 * 0085 or
109 * 4E00..9FA5.
110 *
111 * s must contain such a range and end with either ';' or NUL.
112 *
113 * @return Length of code point range, end-start+1
114 */
115U_CAPI int32_t U_EXPORT2
116u_parseCodePointRange(const char *s,
117 uint32_t *pStart, uint32_t *pEnd,
118 UErrorCode *pErrorCode);
119
120
b75a7d8f
A
121U_CAPI int32_t U_EXPORT2
122u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status);
123
124U_CDECL_END
125
126#endif