]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
374ca955 A |
3 | /* |
4 | ******************************************************************************* | |
5 | * | |
2ca993e8 | 6 | * Copyright (C) 1999-2015, International Business Machines |
374ca955 A |
7 | * Corporation and others. All Rights Reserved. |
8 | * | |
9 | ******************************************************************************* | |
10 | * file name: uinvchar.h | |
f3c0d7a5 | 11 | * encoding: UTF-8 |
374ca955 A |
12 | * tab size: 8 (not used) |
13 | * indentation:2 | |
14 | * | |
15 | * created on: 2004sep14 | |
16 | * created by: Markus W. Scherer | |
17 | * | |
18 | * Definitions for handling invariant characters, moved here from putil.c | |
19 | * for better modularization. | |
20 | */ | |
21 | ||
22 | #ifndef __UINVCHAR_H__ | |
23 | #define __UINVCHAR_H__ | |
24 | ||
25 | #include "unicode/utypes.h" | |
2ca993e8 A |
26 | #ifdef __cplusplus |
27 | #include "unicode/unistr.h" | |
28 | #endif | |
374ca955 A |
29 | |
30 | /** | |
31 | * Check if a char string only contains invariant characters. | |
32 | * See utypes.h for details. | |
33 | * | |
34 | * @param s Input string pointer. | |
35 | * @param length Length of the string, can be -1 if NUL-terminated. | |
36 | * @return TRUE if s contains only invariant characters. | |
37 | * | |
38 | * @internal (ICU 2.8) | |
39 | */ | |
40 | U_INTERNAL UBool U_EXPORT2 | |
41 | uprv_isInvariantString(const char *s, int32_t length); | |
42 | ||
43 | /** | |
44 | * Check if a Unicode string only contains invariant characters. | |
45 | * See utypes.h for details. | |
46 | * | |
47 | * @param s Input string pointer. | |
48 | * @param length Length of the string, can be -1 if NUL-terminated. | |
49 | * @return TRUE if s contains only invariant characters. | |
50 | * | |
51 | * @internal (ICU 2.8) | |
52 | */ | |
53 | U_INTERNAL UBool U_EXPORT2 | |
54 | uprv_isInvariantUString(const UChar *s, int32_t length); | |
55 | ||
56 | /** | |
57 | * \def U_UPPER_ORDINAL | |
58 | * Get the ordinal number of an uppercase invariant character | |
59 | * @internal | |
60 | */ | |
61 | #if U_CHARSET_FAMILY==U_ASCII_FAMILY | |
62 | # define U_UPPER_ORDINAL(x) ((x)-'A') | |
63 | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY | |
64 | # define U_UPPER_ORDINAL(x) (((x) < 'J') ? ((x)-'A') : \ | |
65 | (((x) < 'S') ? ((x)-'J'+9) : \ | |
66 | ((x)-'S'+18))) | |
67 | #else | |
68 | # error Unknown charset family! | |
69 | #endif | |
70 | ||
340931cb A |
71 | #ifdef __cplusplus |
72 | ||
73 | U_NAMESPACE_BEGIN | |
74 | ||
75 | /** | |
76 | * Like U_UPPER_ORDINAL(x) but with validation. | |
77 | * Returns 0..25 for A..Z else a value outside 0..25. | |
78 | */ | |
79 | inline int32_t uprv_upperOrdinal(int32_t c) { | |
80 | #if U_CHARSET_FAMILY==U_ASCII_FAMILY | |
81 | return c - 'A'; | |
82 | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY | |
83 | // EBCDIC: A-Z (26 letters) is split into three ranges A-I (9 letters), J-R (9), S-Z (8). | |
84 | // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout | |
85 | if (c <= 'I') { return c - 'A'; } // A-I --> 0-8 | |
86 | if (c < 'J') { return -1; } | |
87 | if (c <= 'R') { return c - 'J' + 9; } // J-R --> 9..17 | |
88 | if (c < 'S') { return -1; } | |
89 | return c - 'S' + 18; // S-Z --> 18..25 | |
90 | #else | |
91 | # error Unknown charset family! | |
92 | #endif | |
93 | } | |
94 | ||
95 | // Like U_UPPER_ORDINAL(x) but for lowercase and with validation. | |
96 | // Returns 0..25 for a..z else a value outside 0..25. | |
97 | inline int32_t uprv_lowerOrdinal(int32_t c) { | |
98 | #if U_CHARSET_FAMILY==U_ASCII_FAMILY | |
99 | return c - 'a'; | |
100 | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY | |
101 | // EBCDIC: a-z (26 letters) is split into three ranges a-i (9 letters), j-r (9), s-z (8). | |
102 | // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout | |
103 | if (c <= 'i') { return c - 'a'; } // a-i --> 0-8 | |
104 | if (c < 'j') { return -1; } | |
105 | if (c <= 'r') { return c - 'j' + 9; } // j-r --> 9..17 | |
106 | if (c < 's') { return -1; } | |
107 | return c - 's' + 18; // s-z --> 18..25 | |
108 | #else | |
109 | # error Unknown charset family! | |
110 | #endif | |
111 | } | |
112 | ||
113 | U_NAMESPACE_END | |
114 | ||
115 | #endif | |
116 | ||
117 | /** | |
118 | * Returns true if c == '@' is possible. | |
119 | * The @ sign is variant, and the @ sign used on one | |
120 | * EBCDIC machine won't be compiled the same way on other EBCDIC based machines. | |
121 | * @internal | |
122 | */ | |
123 | U_CFUNC UBool | |
124 | uprv_isEbcdicAtSign(char c); | |
125 | ||
126 | /** | |
127 | * \def uprv_isAtSign | |
128 | * Returns true if c == '@' is possible. | |
129 | * For ASCII, checks for exactly '@'. For EBCDIC, calls uprv_isEbcdicAtSign(). | |
130 | * @internal | |
131 | */ | |
132 | #if U_CHARSET_FAMILY==U_ASCII_FAMILY | |
133 | # define uprv_isAtSign(c) ((c)=='@') | |
134 | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY | |
135 | # define uprv_isAtSign(c) uprv_isEbcdicAtSign(c) | |
136 | #else | |
137 | # error Unknown charset family! | |
138 | #endif | |
139 | ||
729e4ab9 A |
140 | /** |
141 | * Compare two EBCDIC invariant-character strings in ASCII order. | |
142 | * @internal | |
143 | */ | |
144 | U_INTERNAL int32_t U_EXPORT2 | |
145 | uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2); | |
146 | ||
147 | /** | |
148 | * \def uprv_compareInvCharsAsAscii | |
149 | * Compare two invariant-character strings in ASCII order. | |
150 | * @internal | |
151 | */ | |
152 | #if U_CHARSET_FAMILY==U_ASCII_FAMILY | |
153 | # define uprv_compareInvCharsAsAscii(s1, s2) uprv_strcmp(s1, s2) | |
154 | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY | |
155 | # define uprv_compareInvCharsAsAscii(s1, s2) uprv_compareInvEbcdicAsAscii(s1, s2) | |
156 | #else | |
157 | # error Unknown charset family! | |
158 | #endif | |
159 | ||
340931cb A |
160 | /** |
161 | * Converts an EBCDIC invariant character to ASCII. | |
162 | * @internal | |
163 | */ | |
164 | U_INTERNAL char U_EXPORT2 | |
165 | uprv_ebcdicToAscii(char c); | |
166 | ||
167 | /** | |
168 | * \def uprv_invCharToAscii | |
169 | * Converts an invariant character to ASCII. | |
170 | * @internal | |
171 | */ | |
172 | #if U_CHARSET_FAMILY==U_ASCII_FAMILY | |
173 | # define uprv_invCharToAscii(c) (c) | |
174 | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY | |
175 | # define uprv_invCharToAscii(c) uprv_ebcdicToAscii(c) | |
176 | #else | |
177 | # error Unknown charset family! | |
178 | #endif | |
179 | ||
4388f060 A |
180 | /** |
181 | * Converts an EBCDIC invariant character to lowercase ASCII. | |
182 | * @internal | |
183 | */ | |
184 | U_INTERNAL char U_EXPORT2 | |
185 | uprv_ebcdicToLowercaseAscii(char c); | |
186 | ||
187 | /** | |
188 | * \def uprv_invCharToLowercaseAscii | |
189 | * Converts an invariant character to lowercase ASCII. | |
190 | * @internal | |
191 | */ | |
192 | #if U_CHARSET_FAMILY==U_ASCII_FAMILY | |
193 | # define uprv_invCharToLowercaseAscii uprv_asciitolower | |
194 | #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY | |
195 | # define uprv_invCharToLowercaseAscii uprv_ebcdicToLowercaseAscii | |
196 | #else | |
197 | # error Unknown charset family! | |
198 | #endif | |
199 | ||
729e4ab9 A |
200 | /** |
201 | * Copy EBCDIC to ASCII | |
202 | * @internal | |
203 | * @see uprv_strncpy | |
204 | */ | |
205 | U_INTERNAL uint8_t* U_EXPORT2 | |
206 | uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n); | |
207 | ||
208 | ||
209 | /** | |
210 | * Copy ASCII to EBCDIC | |
211 | * @internal | |
212 | * @see uprv_strncpy | |
213 | */ | |
214 | U_INTERNAL uint8_t* U_EXPORT2 | |
215 | uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n); | |
216 | ||
217 | ||
218 | ||
374ca955 | 219 | #endif |