]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/util.h
ICU-59173.0.1.tar.gz
[apple/icu.git] / icuSources / common / util.h
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f 3/*
46f4442e 4 **********************************************************************
4388f060 5 * Copyright (c) 2001-2011, International Business Machines
46f4442e
A
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * Date Name Description
9 * 11/19/2001 aliu Creation.
10 **********************************************************************
11 */
12
b75a7d8f
A
13#ifndef ICU_UTIL_H
14#define ICU_UTIL_H
15
16#include "unicode/utypes.h"
17#include "unicode/uobject.h"
18#include "unicode/unistr.h"
19
20//--------------------------------------------------------------------
21// class ICU_Utility
22// i18n utility functions, scoped into the class ICU_Utility.
23//--------------------------------------------------------------------
24
25U_NAMESPACE_BEGIN
26
27class UnicodeMatcher;
28
29class U_COMMON_API ICU_Utility /* not : public UObject because all methods are static */ {
30 public:
31
32 /**
33 * Append a number to the given UnicodeString in the given radix.
34 * Standard digits '0'-'9' are used and letters 'A'-'Z' for
35 * radices 11 through 36.
36 * @param result the digits of the number are appended here
37 * @param n the number to be converted to digits; may be negative.
38 * If negative, a '-' is prepended to the digits.
39 * @param radix a radix from 2 to 36 inclusive.
40 * @param minDigits the minimum number of digits, not including
41 * any '-', to produce. Values less than 2 have no effect. One
42 * digit is always emitted regardless of this parameter.
43 * @return a reference to result
44 */
45 static UnicodeString& appendNumber(UnicodeString& result, int32_t n,
46 int32_t radix = 10,
47 int32_t minDigits = 1);
48
49 /**
50 * Return true if the character is NOT printable ASCII.
51 *
52 * This method should really be in UnicodeString (or similar). For
53 * now, we implement it here and share it with friend classes.
54 */
55 static UBool isUnprintable(UChar32 c);
56
57 /**
58 * Escape unprintable characters using \uxxxx notation for U+0000 to
59 * U+FFFF and \Uxxxxxxxx for U+10000 and above. If the character is
60 * printable ASCII, then do nothing and return FALSE. Otherwise,
61 * append the escaped notation and return TRUE.
62 */
63 static UBool escapeUnprintable(UnicodeString& result, UChar32 c);
64
65 /**
66 * Returns the index of a character, ignoring quoted text.
67 * For example, in the string "abc'hide'h", the 'h' in "hide" will not be
68 * found by a search for 'h'.
69 * @param text text to be searched
70 * @param start the beginning index, inclusive; <code>0 <= start
71 * <= limit</code>.
72 * @param limit the ending index, exclusive; <code>start <= limit
73 * <= text.length()</code>.
74 * @param c character to search for
75 * @return Offset of the first instance of c, or -1 if not found.
76 */
374ca955
A
77//?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons.
78// static int32_t quotedIndexOf(const UnicodeString& text,
79// int32_t start, int32_t limit,
80// UChar c);
b75a7d8f
A
81
82 /**
83 * Skip over a sequence of zero or more white space characters at pos.
84 * @param advance if true, advance pos to the first non-white-space
85 * character at or after pos, or str.length(), if there is none.
86 * Otherwise leave pos unchanged.
87 * @return the index of the first non-white-space character at or
88 * after pos, or str.length(), if there is none.
89 */
90 static int32_t skipWhitespace(const UnicodeString& str, int32_t& pos,
91 UBool advance = FALSE);
92
93 /**
4388f060
A
94 * Skip over Pattern_White_Space in a Replaceable.
95 * Skipping may be done in the forward or
b75a7d8f
A
96 * reverse direction. In either case, the leftmost index will be
97 * inclusive, and the rightmost index will be exclusive. That is,
98 * given a range defined as [start, limit), the call
99 * skipWhitespace(text, start, limit) will advance start past leading
100 * whitespace, whereas the call skipWhitespace(text, limit, start),
101 * will back up limit past trailing whitespace.
102 * @param text the text to be analyzed
103 * @param pos either the start or limit of a range of 'text', to skip
104 * leading or trailing whitespace, respectively
105 * @param stop either the limit or start of a range of 'text', to skip
106 * leading or trailing whitespace, respectively
107 * @return the new start or limit, depending on what was passed in to
108 * 'pos'
109 */
110//?FOR FUTURE USE. DISABLE FOR NOW for coverage reasons.
111//? static int32_t skipWhitespace(const Replaceable& text,
112//? int32_t pos, int32_t stop);
113
114 /**
115 * Parse a single non-whitespace character 'ch', optionally
116 * preceded by whitespace.
117 * @param id the string to be parsed
118 * @param pos INPUT-OUTPUT parameter. On input, pos[0] is the
119 * offset of the first character to be parsed. On output, pos[0]
120 * is the index after the last parsed character. If the parse
121 * fails, pos[0] will be unchanged.
122 * @param ch the non-whitespace character to be parsed.
123 * @return true if 'ch' is seen preceded by zero or more
124 * whitespace characters.
125 */
126 static UBool parseChar(const UnicodeString& id, int32_t& pos, UChar ch);
127
128 /**
129 * Parse a pattern string starting at offset pos. Keywords are
130 * matched case-insensitively. Spaces may be skipped and may be
131 * optional or required. Integer values may be parsed, and if
132 * they are, they will be returned in the given array. If
133 * successful, the offset of the next non-space character is
134 * returned. On failure, -1 is returned.
135 * @param pattern must only contain lowercase characters, which
136 * will match their uppercase equivalents as well. A space
137 * character matches one or more required spaces. A '~' character
138 * matches zero or more optional spaces. A '#' character matches
139 * an integer and stores it in parsedInts, which the caller must
140 * ensure has enough capacity.
141 * @param parsedInts array to receive parsed integers. Caller
142 * must ensure that parsedInts.length is >= the number of '#'
143 * signs in 'pattern'.
144 * @return the position after the last character parsed, or -1 if
145 * the parse failed
146 */
147 static int32_t parsePattern(const UnicodeString& rule, int32_t pos, int32_t limit,
148 const UnicodeString& pattern, int32_t* parsedInts);
149
150 /**
151 * Parse a pattern string within the given Replaceable and a parsing
152 * pattern. Characters are matched literally and case-sensitively
153 * except for the following special characters:
154 *
4388f060 155 * ~ zero or more Pattern_White_Space chars
b75a7d8f
A
156 *
157 * If end of pattern is reached with all matches along the way,
158 * pos is advanced to the first unparsed index and returned.
159 * Otherwise -1 is returned.
160 * @param pat pattern that controls parsing
161 * @param text text to be parsed, starting at index
162 * @param index offset to first character to parse
163 * @param limit offset after last character to parse
164 * @return index after last parsed character, or -1 on parse failure.
165 */
166 static int32_t parsePattern(const UnicodeString& pat,
167 const Replaceable& text,
168 int32_t index,
169 int32_t limit);
170
171 /**
172 * Parse an integer at pos, either of the form \d+ or of the form
173 * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
174 * or octal format.
175 * @param pos INPUT-OUTPUT parameter. On input, the first
176 * character to parse. On output, the character after the last
177 * parsed character.
178 */
179 static int32_t parseInteger(const UnicodeString& rule, int32_t& pos, int32_t limit);
180
181 /**
182 * Parse a Unicode identifier from the given string at the given
183 * position. Return the identifier, or an empty string if there
184 * is no identifier.
185 * @param str the string to parse
186 * @param pos INPUT-OUPUT parameter. On INPUT, pos is the
187 * first character to examine. It must be less than str.length(),
188 * and it must not point to a whitespace character. That is, must
189 * have pos < str.length() and
190 * !UCharacter::isWhitespace(str.char32At(pos)). On
191 * OUTPUT, the position after the last parsed character.
192 * @return the Unicode identifier, or an empty string if there is
193 * no valid identifier at pos.
194 */
195 static UnicodeString parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos);
196
197 /**
198 * Parse an unsigned 31-bit integer at the given offset. Use
199 * UCharacter.digit() to parse individual characters into digits.
200 * @param text the text to be parsed
201 * @param pos INPUT-OUTPUT parameter. On entry, pos is the
202 * offset within text at which to start parsing; it should point
203 * to a valid digit. On exit, pos is the offset after the last
204 * parsed character. If the parse failed, it will be unchanged on
205 * exit. Must be >= 0 on entry.
206 * @param radix the radix in which to parse; must be >= 2 and <=
207 * 36.
208 * @return a non-negative parsed number, or -1 upon parse failure.
209 * Parse fails if there are no digits, that is, if pos does not
210 * point to a valid digit on entry, or if the number to be parsed
211 * does not fit into a 31-bit unsigned integer.
212 */
213 static int32_t parseNumber(const UnicodeString& text,
214 int32_t& pos, int8_t radix);
215
216 static void appendToRule(UnicodeString& rule,
217 UChar32 c,
218 UBool isLiteral,
219 UBool escapeUnprintable,
220 UnicodeString& quoteBuf);
221
222 static void appendToRule(UnicodeString& rule,
223 const UnicodeString& text,
224 UBool isLiteral,
225 UBool escapeUnprintable,
226 UnicodeString& quoteBuf);
227
228 static void appendToRule(UnicodeString& rule,
229 const UnicodeMatcher* matcher,
230 UBool escapeUnprintable,
231 UnicodeString& quoteBuf);
232
233private:
234 // do not instantiate
235 ICU_Utility();
236};
237
238U_NAMESPACE_END
239
240#endif
241//eof