1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
6 * Copyright (C) 1999-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
9 *******************************************************************************
10 * file name: uinvchar.h
12 * tab size: 8 (not used)
15 * created on: 2004sep14
16 * created by: Markus W. Scherer
18 * Definitions for handling invariant characters, moved here from putil.c
19 * for better modularization.
22 #ifndef __UINVCHAR_H__
23 #define __UINVCHAR_H__
25 #include "unicode/utypes.h"
27 #include "unicode/unistr.h"
31 * Check if a char string only contains invariant characters.
32 * See utypes.h for details.
34 * @param s Input string pointer.
35 * @param length Length of the string, can be -1 if NUL-terminated.
36 * @return TRUE if s contains only invariant characters.
40 U_INTERNAL UBool U_EXPORT2
41 uprv_isInvariantString(const char *s
, int32_t length
);
44 * Check if a Unicode string only contains invariant characters.
45 * See utypes.h for details.
47 * @param s Input string pointer.
48 * @param length Length of the string, can be -1 if NUL-terminated.
49 * @return TRUE if s contains only invariant characters.
53 U_INTERNAL UBool U_EXPORT2
54 uprv_isInvariantUString(const UChar
*s
, int32_t length
);
57 * \def U_UPPER_ORDINAL
58 * Get the ordinal number of an uppercase invariant character
61 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
62 # define U_UPPER_ORDINAL(x) ((x)-'A')
63 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
64 # define U_UPPER_ORDINAL(x) (((x) < 'J') ? ((x)-'A') : \
65 (((x) < 'S') ? ((x)-'J'+9) : \
68 # error Unknown charset family!
76 * Like U_UPPER_ORDINAL(x) but with validation.
77 * Returns 0..25 for A..Z else a value outside 0..25.
79 inline int32_t uprv_upperOrdinal(int32_t c
) {
80 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
82 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
83 // EBCDIC: A-Z (26 letters) is split into three ranges A-I (9 letters), J-R (9), S-Z (8).
84 // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
85 if (c
<= 'I') { return c
- 'A'; } // A-I --> 0-8
86 if (c
< 'J') { return -1; }
87 if (c
<= 'R') { return c
- 'J' + 9; } // J-R --> 9..17
88 if (c
< 'S') { return -1; }
89 return c
- 'S' + 18; // S-Z --> 18..25
91 # error Unknown charset family!
95 // Like U_UPPER_ORDINAL(x) but for lowercase and with validation.
96 // Returns 0..25 for a..z else a value outside 0..25.
97 inline int32_t uprv_lowerOrdinal(int32_t c
) {
98 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
100 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
101 // EBCDIC: a-z (26 letters) is split into three ranges a-i (9 letters), j-r (9), s-z (8).
102 // https://en.wikipedia.org/wiki/EBCDIC_037#Codepage_layout
103 if (c
<= 'i') { return c
- 'a'; } // a-i --> 0-8
104 if (c
< 'j') { return -1; }
105 if (c
<= 'r') { return c
- 'j' + 9; } // j-r --> 9..17
106 if (c
< 's') { return -1; }
107 return c
- 's' + 18; // s-z --> 18..25
109 # error Unknown charset family!
118 * Returns true if c == '@' is possible.
119 * The @ sign is variant, and the @ sign used on one
120 * EBCDIC machine won't be compiled the same way on other EBCDIC based machines.
124 uprv_isEbcdicAtSign(char c
);
128 * Returns true if c == '@' is possible.
129 * For ASCII, checks for exactly '@'. For EBCDIC, calls uprv_isEbcdicAtSign().
132 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
133 # define uprv_isAtSign(c) ((c)=='@')
134 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
135 # define uprv_isAtSign(c) uprv_isEbcdicAtSign(c)
137 # error Unknown charset family!
141 * Compare two EBCDIC invariant-character strings in ASCII order.
144 U_INTERNAL
int32_t U_EXPORT2
145 uprv_compareInvEbcdicAsAscii(const char *s1
, const char *s2
);
148 * \def uprv_compareInvCharsAsAscii
149 * Compare two invariant-character strings in ASCII order.
152 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
153 # define uprv_compareInvCharsAsAscii(s1, s2) uprv_strcmp(s1, s2)
154 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
155 # define uprv_compareInvCharsAsAscii(s1, s2) uprv_compareInvEbcdicAsAscii(s1, s2)
157 # error Unknown charset family!
161 * Converts an EBCDIC invariant character to ASCII.
164 U_INTERNAL
char U_EXPORT2
165 uprv_ebcdicToAscii(char c
);
168 * \def uprv_invCharToAscii
169 * Converts an invariant character to ASCII.
172 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
173 # define uprv_invCharToAscii(c) (c)
174 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
175 # define uprv_invCharToAscii(c) uprv_ebcdicToAscii(c)
177 # error Unknown charset family!
181 * Converts an EBCDIC invariant character to lowercase ASCII.
184 U_INTERNAL
char U_EXPORT2
185 uprv_ebcdicToLowercaseAscii(char c
);
188 * \def uprv_invCharToLowercaseAscii
189 * Converts an invariant character to lowercase ASCII.
192 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
193 # define uprv_invCharToLowercaseAscii uprv_asciitolower
194 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
195 # define uprv_invCharToLowercaseAscii uprv_ebcdicToLowercaseAscii
197 # error Unknown charset family!
201 * Copy EBCDIC to ASCII
205 U_INTERNAL
uint8_t* U_EXPORT2
206 uprv_aestrncpy(uint8_t *dst
, const uint8_t *src
, int32_t n
);
210 * Copy ASCII to EBCDIC
214 U_INTERNAL
uint8_t* U_EXPORT2
215 uprv_eastrncpy(uint8_t *dst
, const uint8_t *src
, int32_t n
);