[apple/icu.git] / icuSources / common / cstring.c

/*
******************************************************************************
*
*   Copyright (C) 1997-2003, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
******************************************************************************
*
* File CSTRING.C
*
* @author       Helena Shih
*
* Modification History:
*
*   Date        Name        Description
*   6/18/98     hshih       Created
*   09/08/98    stephen     Added include for ctype, for Mac Port
*   11/15/99    helena      Integrated S/390 IEEE changes. 
******************************************************************************
*/


#include <stdlib.h>
#include <stdio.h>
#include "unicode/utypes.h"
#include "cmemory.h"
#include "cstring.h"
#include "uassert.h"

/*
 * We hardcode case conversion for invariant characters to match our expectation
 * and the compiler execution charset.
 * This prevents problems on systems
 * - with non-default casing behavior, like Turkish system locales where
 *   tolower('I') maps to dotless i and toupper('i') maps to dotted I
 * - where there are no lowercase Latin characters at all, or using different
 *   codes (some old EBCDIC codepages)
 *
 * This works because the compiler usually runs on a platform where the execution
 * charset includes all of the invariant characters at their expected
 * code positions, so that the char * string literals in ICU code match
 * the char literals here.
 *
 * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
 * and the set of uppercase Latin letters is discontiguous as well.
 */

U_CAPI char U_EXPORT2
uprv_toupper(char c) {
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
        c=(char)(c+('A'-'a'));
    }
#else
    if('a'<=c && c<='z') {
        c=(char)(c+('A'-'a'));
    }
#endif
    return c;
}


#if 0
/*
 * Commented out because cstring.h defines uprv_tolower() to be
 * the same as either uprv_asciitolower() or uprv_ebcdictolower()
 * to reduce the amount of code to cover with tests.
 *
 * Note that this uprv_tolower() definition is likely to work for most
 * charset families, not just ASCII and EBCDIC, because its #else branch
 * is written generically.
 */
U_CAPI char U_EXPORT2
uprv_tolower(char c) {
#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
    if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
        c=(char)(c+('a'-'A'));
    }
#else
    if('A'<=c && c<='Z') {
        c=(char)(c+('a'-'A'));
    }
#endif
    return c;
}
#endif

U_CAPI char U_EXPORT2
uprv_asciitolower(char c) {
    if(0x41<=c && c<=0x5a) {
        c=(char)(c+0x20);
    }
    return c;
}

U_CAPI char U_EXPORT2
uprv_ebcdictolower(char c) {
    if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
        (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
        (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
    ) {
        c=(char)(c-0x40);
    }
    return c;
}


U_CAPI char* U_EXPORT2
T_CString_toLowerCase(char* str)
{
    char* origPtr = str;

    if (str) {
        do
            *str = (char)uprv_tolower(*str);
        while (*(str++));
    }

    return origPtr;
}

U_CAPI char* U_EXPORT2
T_CString_toUpperCase(char* str)
{
    char* origPtr = str;

    if (str) {
        do
            *str = (char)uprv_toupper(*str);
        while (*(str++));
    }

    return origPtr;
}

/*
 * Takes a int32_t and fills in  a char* string with that number "radix"-based.
 * Does not handle negative values (makes an empty string for them).
 * Writes at most 12 chars ("-2147483647" plus NUL).
 * Returns the length of the string (not including the NUL).
 */
U_CAPI int32_t U_EXPORT2
T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
{
    char      tbuf[30];
    int32_t   tbx    = sizeof(tbuf);
    uint8_t   digit;
    int32_t   length = 0;
    uint32_t  uval;
    
    U_ASSERT(radix>=2 && radix<=16);
    uval = (uint32_t) v;
    if(v<0 && radix == 10) {
        /* Only in base 10 do we conside numbers to be signed. */
        uval = (uint32_t)(-v); 
        buffer[length++] = '-';
    }
    
    tbx = sizeof(tbuf)-1;
    tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
    do {
        digit = (uint8_t)(uval % radix);
        tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
        uval  = uval / radix;
    } while (uval != 0);
    
    /* copy converted number into user buffer  */
    uprv_strcpy(buffer+length, tbuf+tbx);
    length += sizeof(tbuf) - tbx -1;
    return length;
}


/*
 * Takes a int64_t and fills in  a char* string with that number "radix"-based.
 * Writes at most 21: chars ("-9223372036854775807" plus NUL).
 * Returns the length of the string, not including the terminating NULL.
 */
U_CAPI int32_t U_EXPORT2
T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
{
    char      tbuf[30];
    int32_t   tbx    = sizeof(tbuf);
    uint8_t   digit;
    int32_t   length = 0;
    uint64_t  uval;
    
    U_ASSERT(radix>=2 && radix<=16);
    uval = (uint64_t) v;
    if(v<0 && radix == 10) {
        /* Only in base 10 do we conside numbers to be signed. */
        uval = (uint64_t)(-v); 
        buffer[length++] = '-';
    }
    
    tbx = sizeof(tbuf)-1;
    tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
    do {
        digit = (uint8_t)(uval % radix);
        tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
        uval  = uval / radix;
    } while (uval != 0);
    
    /* copy converted number into user buffer  */
    uprv_strcpy(buffer+length, tbuf+tbx);
    length += sizeof(tbuf) - tbx -1;
    return length;
}


U_CAPI int32_t U_EXPORT2
T_CString_stringToInteger(const char *integerString, int32_t radix)
{
    char *end;
    return uprv_strtoul(integerString, &end, radix);

}
    
U_CAPI int U_EXPORT2
T_CString_stricmp(const char *str1, const char *str2) {
    if(str1==NULL) {
        if(str2==NULL) {
            return 0;
        } else {
            return -1;
        }
    } else if(str2==NULL) {
        return 1;
    } else {
        /* compare non-NULL strings lexically with lowercase */
        int rc;
        unsigned char c1, c2;

        for(;;) {
            c1=(unsigned char)*str1;
            c2=(unsigned char)*str2;
            if(c1==0) {
                if(c2==0) {
                    return 0;
                } else {
                    return -1;
                }
            } else if(c2==0) {
                return 1;
            } else {
                /* compare non-zero characters with lowercase */
                rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
                if(rc!=0) {
                    return rc;
                }
            }
            ++str1;
            ++str2;
        }
    }
}

U_CAPI int U_EXPORT2
T_CString_strnicmp(const char *str1, const char *str2, uint32_t n) {
    if(str1==NULL) {
        if(str2==NULL) {
            return 0;
        } else {
            return -1;
        }
    } else if(str2==NULL) {
        return 1;
    } else {
        /* compare non-NULL strings lexically with lowercase */
        int rc;
        unsigned char c1, c2;

        for(; n--;) {
            c1=(unsigned char)*str1;
            c2=(unsigned char)*str2;
            if(c1==0) {
                if(c2==0) {
                    return 0;
                } else {
                    return -1;
                }
            } else if(c2==0) {
                return 1;
            } else {
                /* compare non-zero characters with lowercase */
                rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
                if(rc!=0) {
                    return rc;
                }
            }
            ++str1;
            ++str2;
        }
    }

    return 0;
}

U_CAPI char* U_EXPORT2
uprv_strdup(const char *src) {
    size_t len = uprv_strlen(src) + 1;
    char *dup = (char *) uprv_malloc(len);

    if (dup) {
        uprv_memcpy(dup, src, len);
    }

    return dup;
}

U_CAPI char* U_EXPORT2
uprv_strndup(const char *src, int32_t n) {
    char *dup;

    if(n < 0) {
        dup = uprv_strdup(src);
    } else {
        dup = (char*)uprv_malloc(n+1);
        if (dup) { 
            uprv_memcpy(dup, src, n);
            dup[n] = 0;
        }
    }

    return dup;
}
Commit	Line	Data
b75a7d8f A	1	/*
	2	******************************************************************************
	3	*
	4	* Copyright (C) 1997-2003, International Business Machines
	5	* Corporation and others. All Rights Reserved.
	6	*
	7	******************************************************************************
	8	*
	9	* File CSTRING.C
	10	*
	11	* @author Helena Shih
	12	*
	13	* Modification History:
	14	*
	15	* Date Name Description
	16	* 6/18/98 hshih Created
	17	* 09/08/98 stephen Added include for ctype, for Mac Port
	18	* 11/15/99 helena Integrated S/390 IEEE changes.
	19	******************************************************************************
	20	*/
	21
	22
	23
	24	#include <stdlib.h>
374ca955	25	#include <stdio.h>
b75a7d8f A	26	#include "unicode/utypes.h"
	27	#include "cmemory.h"
	28	#include "cstring.h"
374ca955	29	#include "uassert.h"
b75a7d8f A	30
	31	/*
	32	* We hardcode case conversion for invariant characters to match our expectation
	33	* and the compiler execution charset.
	34	* This prevents problems on systems
	35	* - with non-default casing behavior, like Turkish system locales where
	36	* tolower('I') maps to dotless i and toupper('i') maps to dotted I
	37	* - where there are no lowercase Latin characters at all, or using different
	38	* codes (some old EBCDIC codepages)
	39	*
	40	* This works because the compiler usually runs on a platform where the execution
	41	* charset includes all of the invariant characters at their expected
	42	* code positions, so that the char * string literals in ICU code match
	43	* the char literals here.
	44	*
	45	* Note that the set of lowercase Latin letters is discontiguous in EBCDIC
	46	* and the set of uppercase Latin letters is discontiguous as well.
	47	*/
	48
	49	U_CAPI char U_EXPORT2
	50	uprv_toupper(char c) {
	51	#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
	52	if(('a'<=c && c<='i') \|\| ('j'<=c && c<='r') \|\| ('s'<=c && c<='z')) {
	53	c=(char)(c+('A'-'a'));
	54	}
	55	#else
	56	if('a'<=c && c<='z') {
	57	c=(char)(c+('A'-'a'));
	58	}
	59	#endif
	60	return c;
	61	}
	62
374ca955 A	63
	64	#if 0
	65	/*
	66	* Commented out because cstring.h defines uprv_tolower() to be
	67	* the same as either uprv_asciitolower() or uprv_ebcdictolower()
	68	* to reduce the amount of code to cover with tests.
	69	*
	70	* Note that this uprv_tolower() definition is likely to work for most
	71	* charset families, not just ASCII and EBCDIC, because its #else branch
	72	* is written generically.
	73	*/
b75a7d8f A	74	U_CAPI char U_EXPORT2
	75	uprv_tolower(char c) {
	76	#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
	77	if(('A'<=c && c<='I') \|\| ('J'<=c && c<='R') \|\| ('S'<=c && c<='Z')) {
	78	c=(char)(c+('a'-'A'));
	79	}
	80	#else
	81	if('A'<=c && c<='Z') {
	82	c=(char)(c+('a'-'A'));
	83	}
	84	#endif
	85	return c;
	86	}
374ca955 A	87	#endif
	88
	89	U_CAPI char U_EXPORT2
	90	uprv_asciitolower(char c) {
	91	if(0x41<=c && c<=0x5a) {
	92	c=(char)(c+0x20);
	93	}
	94	return c;
	95	}
	96
	97	U_CAPI char U_EXPORT2
	98	uprv_ebcdictolower(char c) {
	99	if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) \|\|
	100	(0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) \|\|
	101	(0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
	102	) {
	103	c=(char)(c-0x40);
	104	}
	105	return c;
	106	}
b75a7d8f A	107
	108
	109	U_CAPI char* U_EXPORT2
	110	T_CString_toLowerCase(char* str)
	111	{
	112	char* origPtr = str;
	113
	114	if (str) {
	115	do
	116	str = (char)uprv_tolower(str);
	117	while (*(str++));
	118	}
	119
	120	return origPtr;
	121	}
	122
	123	U_CAPI char* U_EXPORT2
	124	T_CString_toUpperCase(char* str)
	125	{
	126	char* origPtr = str;
	127
	128	if (str) {
	129	do
	130	str = (char)uprv_toupper(str);
	131	while (*(str++));
	132	}
	133
	134	return origPtr;
	135	}
	136
	137	/*
	138	* Takes a int32_t and fills in a char* string with that number "radix"-based.
	139	* Does not handle negative values (makes an empty string for them).
374ca955 A	140	* Writes at most 12 chars ("-2147483647" plus NUL).
374ca955 A	141	* Returns the length of the string (not including the NUL).
b75a7d8f A	142	*/
b75a7d8f A	143	U_CAPI int32_t U_EXPORT2
374ca955	144	T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
b75a7d8f	145	{
374ca955 A	146	char tbuf[30];
	147	int32_t tbx = sizeof(tbuf);
	148	uint8_t digit;
	149	int32_t length = 0;
	150	uint32_t uval;
	151
	152	U_ASSERT(radix>=2 && radix<=16);
	153	uval = (uint32_t) v;
	154	if(v<0 && radix == 10) {
	155	/* Only in base 10 do we conside numbers to be signed. */
	156	uval = (uint32_t)(-v);
	157	buffer[length++] = '-';
b75a7d8f	158	}
374ca955 A	159
	160	tbx = sizeof(tbuf)-1;
	161	tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
	162	do {
	163	digit = (uint8_t)(uval % radix);
	164	tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
	165	uval = uval / radix;
	166	} while (uval != 0);
	167
	168	/* copy converted number into user buffer */
	169	uprv_strcpy(buffer+length, tbuf+tbx);
	170	length += sizeof(tbuf) - tbx -1;
	171	return length;
	172	}
b75a7d8f A	173
b75a7d8f A	174
b75a7d8f	175
374ca955 A	176	/*
	177	* Takes a int64_t and fills in a char* string with that number "radix"-based.
	178	* Writes at most 21: chars ("-9223372036854775807" plus NUL).
	179	* Returns the length of the string, not including the terminating NULL.
	180	*/
	181	U_CAPI int32_t U_EXPORT2
	182	T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
	183	{
	184	char tbuf[30];
	185	int32_t tbx = sizeof(tbuf);
	186	uint8_t digit;
	187	int32_t length = 0;
	188	uint64_t uval;
	189
	190	U_ASSERT(radix>=2 && radix<=16);
	191	uval = (uint64_t) v;
	192	if(v<0 && radix == 10) {
	193	/* Only in base 10 do we conside numbers to be signed. */
	194	uval = (uint64_t)(-v);
	195	buffer[length++] = '-';
	196	}
	197
	198	tbx = sizeof(tbuf)-1;
	199	tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
	200	do {
	201	digit = (uint8_t)(uval % radix);
	202	tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
	203	uval = uval / radix;
	204	} while (uval != 0);
	205
	206	/* copy converted number into user buffer */
	207	uprv_strcpy(buffer+length, tbuf+tbx);
	208	length += sizeof(tbuf) - tbx -1;
	209	return length;
b75a7d8f A	210	}
	211
	212
	213	U_CAPI int32_t U_EXPORT2
	214	T_CString_stringToInteger(const char *integerString, int32_t radix)
	215	{
	216	char *end;
374ca955	217	return uprv_strtoul(integerString, &end, radix);
b75a7d8f A	218
	219	}
	220
	221	U_CAPI int U_EXPORT2
	222	T_CString_stricmp(const char str1, const char str2) {
	223	if(str1==NULL) {
	224	if(str2==NULL) {
	225	return 0;
	226	} else {
	227	return -1;
	228	}
	229	} else if(str2==NULL) {
	230	return 1;
	231	} else {
	232	/* compare non-NULL strings lexically with lowercase */
	233	int rc;
	234	unsigned char c1, c2;
	235
	236	for(;;) {
	237	c1=(unsigned char)*str1;
	238	c2=(unsigned char)*str2;
	239	if(c1==0) {
	240	if(c2==0) {
	241	return 0;
	242	} else {
	243	return -1;
	244	}
	245	} else if(c2==0) {
	246	return 1;
	247	} else {
	248	/* compare non-zero characters with lowercase */
	249	rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
	250	if(rc!=0) {
	251	return rc;
	252	}
	253	}
	254	++str1;
	255	++str2;
	256	}
	257	}
	258	}
	259
	260	U_CAPI int U_EXPORT2
	261	T_CString_strnicmp(const char str1, const char str2, uint32_t n) {
	262	if(str1==NULL) {
	263	if(str2==NULL) {
	264	return 0;
	265	} else {
	266	return -1;
	267	}
	268	} else if(str2==NULL) {
	269	return 1;
	270	} else {
	271	/* compare non-NULL strings lexically with lowercase */
	272	int rc;
	273	unsigned char c1, c2;
	274
	275	for(; n--;) {
	276	c1=(unsigned char)*str1;
	277	c2=(unsigned char)*str2;
	278	if(c1==0) {
	279	if(c2==0) {
	280	return 0;
	281	} else {
282	return -1;
283	}
284	} else if(c2==0) {
285	return 1;
286	} else {
287	/* compare non-zero characters with lowercase */
288	rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
289	if(rc!=0) {
290	return rc;
291	}
292	}
293	++str1;
294	++str2;
295	}
296	}
297
298	return 0;
299	}
300
301	U_CAPI char* U_EXPORT2
302	uprv_strdup(const char *src) {
374ca955	303	size_t len = uprv_strlen(src) + 1;
b75a7d8f A	304	char dup = (char ) uprv_malloc(len);
	305
	306	if (dup) {
	307	uprv_memcpy(dup, src, len);
	308	}
	309
	310	return dup;
	311	}
374ca955 A	312
	313	U_CAPI char* U_EXPORT2
	314	uprv_strndup(const char *src, int32_t n) {
	315	char *dup;
	316
	317	if(n < 0) {
	318	dup = uprv_strdup(src);
	319	} else {
	320	dup = (char*)uprv_malloc(n+1);
	321	if (dup) {
	322	uprv_memcpy(dup, src, n);
	323	dup[n] = 0;
	324	}
	325	}
	326
	327	return dup;
	328	}