[apple/icu.git] / icuSources / common / uinvchar.c

/*
*******************************************************************************
*
*   Copyright (C) 1999-2010, International Business Machines
*   Corporation and others.  All Rights Reserved.
*
*******************************************************************************
*   file name:  uinvchar.c
*   encoding:   US-ASCII
*   tab size:   8 (not used)
*   indentation:2
*
*   created on: 2004sep14
*   created by: Markus W. Scherer
*
*   Functions for handling invariant characters, moved here from putil.c
*   for better modularization.
*/

#include "unicode/utypes.h"
#include "unicode/ustring.h"
#include "udataswp.h"
#include "cstring.h"
#include "cmemory.h"
#include "uassert.h"
#include "uinvchar.h"

/* invariant-character handling --------------------------------------------- */

/*
 * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h)
 * appropriately for most EBCDIC codepages.
 *
 * They currently also map most other ASCII graphic characters,
 * appropriately for codepages 37 and 1047.
 * Exceptions: The characters for []^ have different codes in 37 & 1047.
 * Both versions are mapped to ASCII.
 *
 *    ASCII 37 1047
 * [     5B BA   AD
 * ]     5D BB   BD
 * ^     5E B0   5F
 *
 * There are no mappings for variant characters from Unicode to EBCDIC.
 *
 * Currently, C0 control codes are also included in these maps.
 * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other
 * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A),
 * but there is no mapping for ASCII LF back to EBCDIC.
 *
 *    ASCII EBCDIC S/390-OE
 * LF    0A     25       15
 * NEL   85     15       25
 *
 * The maps below explicitly exclude the variant
 * control and graphical characters that are in ASCII-based
 * codepages at 0x80 and above.
 * "No mapping" is expressed by mapping to a 00 byte.
 *
 * These tables do not establish a converter or a codepage.
 */

static const uint8_t asciiFromEbcdic[256]={
    0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
    0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
    0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,

    0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
    0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
    0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,

    0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
    0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,

    0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};

static const uint8_t ebcdicFromAscii[256]={
    0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
    0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
    0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,

    0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
    0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d,
    0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
    0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07,

    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};

/* Same as asciiFromEbcdic[] except maps all letters to lowercase. */
static const uint8_t lowercaseAsciiFromEbcdic[256]={
    0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
    0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
    0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,

    0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
    0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
    0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,

    0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
    0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,

    0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
};

/*
 * Bit sets indicating which characters of the ASCII repertoire
 * (by ASCII/Unicode code) are "invariant".
 * See utypes.h for more details.
 *
 * As invariant are considered the characters of the ASCII repertoire except
 * for the following:
 * 21  '!' <exclamation mark>
 * 23  '#' <number sign>
 * 24  '$' <dollar sign>
 *
 * 40  '@' <commercial at>
 *
 * 5b  '[' <left bracket>
 * 5c  '\' <backslash>
 * 5d  ']' <right bracket>
 * 5e  '^' <circumflex>
 *
 * 60  '`' <grave accent>
 *
 * 7b  '{' <left brace>
 * 7c  '|' <vertical line>
 * 7d  '}' <right brace>
 * 7e  '~' <tilde>
 */
static const uint32_t invariantChars[4]={
    0xfffffbff, /* 00..1f but not 0a */
    0xffffffe5, /* 20..3f but not 21 23 24 */
    0x87fffffe, /* 40..5f but not 40 5b..5e */
    0x87fffffe  /* 60..7f but not 60 7b..7e */
};

/*
 * test unsigned types (or values known to be non-negative) for invariant characters,
 * tests ASCII-family character values
 */
#define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0)

/* test signed types for invariant characters, adds test for positive values */
#define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c))

#if U_CHARSET_FAMILY==U_ASCII_FAMILY
#define CHAR_TO_UCHAR(c) c
#define UCHAR_TO_CHAR(c) c
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
#define CHAR_TO_UCHAR(u) asciiFromEbcdic[u]
#define UCHAR_TO_CHAR(u) ebcdicFromAscii[u]
#else
#   error U_CHARSET_FAMILY is not valid
#endif


U_CAPI void U_EXPORT2
u_charsToUChars(const char *cs, UChar *us, int32_t length) {
    UChar u;
    uint8_t c;

    /*
     * Allow the entire ASCII repertoire to be mapped _to_ Unicode.
     * For EBCDIC systems, this works for characters with codes from
     * codepages 37 and 1047 or compatible.
     */
    while(length>0) {
        c=(uint8_t)(*cs++);
        u=(UChar)CHAR_TO_UCHAR(c);
        U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */
        *us++=u;
        --length;
    }
}

U_CAPI void U_EXPORT2
u_UCharsToChars(const UChar *us, char *cs, int32_t length) {
    UChar u;

    while(length>0) {
        u=*us++;
        if(!UCHAR_IS_INVARIANT(u)) {
            U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */
            u=0;
        }
        *cs++=(char)UCHAR_TO_CHAR(u);
        --length;
    }
}

U_CAPI UBool U_EXPORT2
uprv_isInvariantString(const char *s, int32_t length) {
    uint8_t c;

    for(;;) {
        if(length<0) {
            /* NUL-terminated */
            c=(uint8_t)*s++;
            if(c==0) {
                break;
            }
        } else {
            /* count length */
            if(length==0) {
                break;
            }
            --length;
            c=(uint8_t)*s++;
            if(c==0) {
                continue; /* NUL is invariant */
            }
        }
        /* c!=0 now, one branch below checks c==0 for variant characters */

        /*
         * no assertions here because these functions are legitimately called
         * for strings with variant characters
         */
#if U_CHARSET_FAMILY==U_ASCII_FAMILY
        if(!UCHAR_IS_INVARIANT(c)) {
            return FALSE; /* found a variant char */
        }
#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
        c=CHAR_TO_UCHAR(c);
        if(c==0 || !UCHAR_IS_INVARIANT(c)) {
            return FALSE; /* found a variant char */
        }
#else
#   error U_CHARSET_FAMILY is not valid
#endif
    }
    return TRUE;
}

U_CAPI UBool U_EXPORT2
uprv_isInvariantUString(const UChar *s, int32_t length) {
    UChar c;

    for(;;) {
        if(length<0) {
            /* NUL-terminated */
            c=*s++;
            if(c==0) {
                break;
            }
        } else {
            /* count length */
            if(length==0) {
                break;
            }
            --length;
            c=*s++;
        }

        /*
         * no assertions here because these functions are legitimately called
         * for strings with variant characters
         */
        if(!UCHAR_IS_INVARIANT(c)) {
            return FALSE; /* found a variant char */
        }
    }
    return TRUE;
}

/* UDataSwapFn implementations used in udataswp.c ------- */

/* convert ASCII to EBCDIC and verify that all characters are invariant */
U_CAPI int32_t U_EXPORT2
uprv_ebcdicFromAscii(const UDataSwapper *ds,
                     const void *inData, int32_t length, void *outData,
                     UErrorCode *pErrorCode) {
    const uint8_t *s;
    uint8_t *t;
    uint8_t c;

    int32_t count;

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return 0;
    }
    if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    /* setup and swapping */
    s=(const uint8_t *)inData;
    t=(uint8_t *)outData;
    count=length;
    while(count>0) {
        c=*s++;
        if(!UCHAR_IS_INVARIANT(c)) {
            udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n",
                             length, length-count);
            *pErrorCode=U_INVALID_CHAR_FOUND;
            return 0;
        }
        *t++=ebcdicFromAscii[c];
        --count;
    }

    return length;
}

/* this function only checks and copies ASCII strings without conversion */
U_CFUNC int32_t
uprv_copyAscii(const UDataSwapper *ds,
               const void *inData, int32_t length, void *outData,
               UErrorCode *pErrorCode) {
    const uint8_t *s;
    uint8_t c;

    int32_t count;

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return 0;
    }
    if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    /* setup and checking */
    s=(const uint8_t *)inData;
    count=length;
    while(count>0) {
        c=*s++;
        if(!UCHAR_IS_INVARIANT(c)) {
            udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n",
                             length, length-count);
            *pErrorCode=U_INVALID_CHAR_FOUND;
            return 0;
        }
        --count;
    }

    if(length>0 && inData!=outData) {
        uprv_memcpy(outData, inData, length);
    }

    return length;
}

/* convert EBCDIC to ASCII and verify that all characters are invariant */
U_CFUNC int32_t
uprv_asciiFromEbcdic(const UDataSwapper *ds,
                     const void *inData, int32_t length, void *outData,
                     UErrorCode *pErrorCode) {
    const uint8_t *s;
    uint8_t *t;
    uint8_t c;

    int32_t count;

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return 0;
    }
    if(ds==NULL || inData==NULL || length<0 ||  (length>0 && outData==NULL)) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    /* setup and swapping */
    s=(const uint8_t *)inData;
    t=(uint8_t *)outData;
    count=length;
    while(count>0) {
        c=*s++;
        if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
            udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n",
                             length, length-count);
            *pErrorCode=U_INVALID_CHAR_FOUND;
            return 0;
        }
        *t++=c;
        --count;
    }

    return length;
}

/* this function only checks and copies EBCDIC strings without conversion */
U_CFUNC int32_t
uprv_copyEbcdic(const UDataSwapper *ds,
                const void *inData, int32_t length, void *outData,
                UErrorCode *pErrorCode) {
    const uint8_t *s;
    uint8_t c;

    int32_t count;

    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
        return 0;
    }
    if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }

    /* setup and checking */
    s=(const uint8_t *)inData;
    count=length;
    while(count>0) {
        c=*s++;
        if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
            udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n",
                             length, length-count);
            *pErrorCode=U_INVALID_CHAR_FOUND;
            return 0;
        }
        --count;
    }

    if(length>0 && inData!=outData) {
        uprv_memcpy(outData, inData, length);
    }

    return length;
}

/* compare invariant strings; variant characters compare less than others and unlike each other */
U_CFUNC int32_t
uprv_compareInvAscii(const UDataSwapper *ds,
                     const char *outString, int32_t outLength,
                     const UChar *localString, int32_t localLength) {
    int32_t minLength;
    UChar32 c1, c2;
    uint8_t c;

    if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
        return 0;
    }

    if(outLength<0) {
        outLength=(int32_t)uprv_strlen(outString);
    }
    if(localLength<0) {
        localLength=u_strlen(localString);
    }

    minLength= outLength<localLength ? outLength : localLength;

    while(minLength>0) {
        c=(uint8_t)*outString++;
        if(UCHAR_IS_INVARIANT(c)) {
            c1=c;
        } else {
            c1=-1;
        }

        c2=*localString++;
        if(!UCHAR_IS_INVARIANT(c2)) {
            c2=-2;
        }

        if((c1-=c2)!=0) {
            return c1;
        }

        --minLength;
    }

    /* strings start with same prefix, compare lengths */
    return outLength-localLength;
}

U_CFUNC int32_t
uprv_compareInvEbcdic(const UDataSwapper *ds,
                      const char *outString, int32_t outLength,
                      const UChar *localString, int32_t localLength) {
    int32_t minLength;
    UChar32 c1, c2;
    uint8_t c;

    if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
        return 0;
    }

    if(outLength<0) {
        outLength=(int32_t)uprv_strlen(outString);
    }
    if(localLength<0) {
        localLength=u_strlen(localString);
    }

    minLength= outLength<localLength ? outLength : localLength;

    while(minLength>0) {
        c=(uint8_t)*outString++;
        if(c==0) {
            c1=0;
        } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) {
            /* c1 is set */
        } else {
            c1=-1;
        }

        c2=*localString++;
        if(!UCHAR_IS_INVARIANT(c2)) {
            c2=-2;
        }

        if((c1-=c2)!=0) {
            return c1;
        }

        --minLength;
    }

    /* strings start with same prefix, compare lengths */
    return outLength-localLength;
}

U_CAPI int32_t U_EXPORT2
uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) {
    int32_t c1, c2;

    for(;; ++s1, ++s2) {
        c1=(uint8_t)*s1;
        c2=(uint8_t)*s2;
        if(c1!=c2) {
            if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) {
                c1=-(int32_t)(uint8_t)*s1;
            }
            if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) {
                c2=-(int32_t)(uint8_t)*s2;
            }
            return c1-c2;
        } else if(c1==0) {
            return 0;
        }
    }
}

U_CAPI char U_EXPORT2
uprv_ebcdicToLowercaseAscii(char c) {
    return (char)lowercaseAsciiFromEbcdic[(uint8_t)c];
}

U_INTERNAL uint8_t* U_EXPORT2
uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
{
  uint8_t *orig_dst = dst;

  if(n==-1) { 
    n = uprv_strlen((const char*)src)+1; /* copy NUL */
  }
  /* copy non-null */
  while(*src && n>0) {
    *(dst++) = asciiFromEbcdic[*(src++)];
    n--;
  }
  /* pad */
  while(n>0) {
    *(dst++) = 0;
    n--;
  }
  return orig_dst;
}

U_INTERNAL uint8_t* U_EXPORT2
uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
{
  uint8_t *orig_dst = dst;

  if(n==-1) { 
    n = uprv_strlen((const char*)src)+1; /* copy NUL */
  }
  /* copy non-null */
  while(*src && n>0) {
    char ch = ebcdicFromAscii[*(src++)];
    if(ch == 0) {
      ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */
    }
    *(dst++) = ch;
    n--;
  }
  /* pad */
  while(n>0) {
    *(dst++) = 0;
    n--;
  }
  return orig_dst;
}
Commit	Line	Data
374ca955 A	1	/*
	2	*******************************************************************************
	3	*
729e4ab9	4	* Copyright (C) 1999-2010, International Business Machines
374ca955 A	5	* Corporation and others. All Rights Reserved.
	6	*
	7	*******************************************************************************
	8	* file name: uinvchar.c
	9	* encoding: US-ASCII
	10	* tab size: 8 (not used)
	11	* indentation:2
	12	*
	13	* created on: 2004sep14
	14	* created by: Markus W. Scherer
	15	*
	16	* Functions for handling invariant characters, moved here from putil.c
	17	* for better modularization.
	18	*/
	19
	20	#include "unicode/utypes.h"
	21	#include "unicode/ustring.h"
	22	#include "udataswp.h"
	23	#include "cstring.h"
	24	#include "cmemory.h"
	25	#include "uassert.h"
	26	#include "uinvchar.h"
	27
	28	/* invariant-character handling --------------------------------------------- */
	29
	30	/*
	31	* These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h)
	32	* appropriately for most EBCDIC codepages.
	33	*
	34	* They currently also map most other ASCII graphic characters,
	35	* appropriately for codepages 37 and 1047.
	36	* Exceptions: The characters for []^ have different codes in 37 & 1047.
	37	* Both versions are mapped to ASCII.
	38	*
	39	* ASCII 37 1047
	40	* [ 5B BA AD
	41	* ] 5D BB BD
	42	* ^ 5E B0 5F
	43	*
	44	* There are no mappings for variant characters from Unicode to EBCDIC.
	45	*
	46	* Currently, C0 control codes are also included in these maps.
	47	* Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other
	48	* EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A),
	49	* but there is no mapping for ASCII LF back to EBCDIC.
	50	*
	51	* ASCII EBCDIC S/390-OE
	52	* LF 0A 25 15
	53	* NEL 85 15 25
	54	*
	55	* The maps below explicitly exclude the variant
	56	* control and graphical characters that are in ASCII-based
	57	* codepages at 0x80 and above.
	58	* "No mapping" is expressed by mapping to a 00 byte.
	59	*
	60	* These tables do not establish a converter or a codepage.
	61	*/
	62
	63	static const uint8_t asciiFromEbcdic[256]={
	64	0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
	65	0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
	66	0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
	67	0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
	68
69	0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
70	0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
71	0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
72	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
73
74	0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
75	0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
76	0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
77	0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
78
79	0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
80	0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
81	0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
82	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
83	};
84
85	static const uint8_t ebcdicFromAscii[256]={
86	0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
87	0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
88	0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
89	0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,
90
91	0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
92	0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d,
93	0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
94	0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07,
95
96	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
97	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
98	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
99	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
100
101	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
102	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
103	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
104	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
105	};
106
4388f060 A	107	/* Same as asciiFromEbcdic[] except maps all letters to lowercase. */
	108	static const uint8_t lowercaseAsciiFromEbcdic[256]={
	109	0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
	110	0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
	111	0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
	112	0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
	113
	114	0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
	115	0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
	116	0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
	117	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
	118
	119	0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	120	0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	121	0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
	122	0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
	123
	124	0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	125	0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	126	0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
	127	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
	128	};
	129
374ca955 A	130	/*
	131	* Bit sets indicating which characters of the ASCII repertoire
	132	* (by ASCII/Unicode code) are "invariant".
	133	* See utypes.h for more details.
	134	*
	135	* As invariant are considered the characters of the ASCII repertoire except
	136	* for the following:
	137	* 21 '!' <exclamation mark>
	138	* 23 '#' <number sign>
	139	* 24 '$' <dollar sign>
	140	*
	141	* 40 '@' <commercial at>
	142	*
	143	* 5b '[' <left bracket>
	144	* 5c '\' <backslash>
	145	* 5d ']' <right bracket>
	146	* 5e '^' <circumflex>
	147	*
	148	* 60 '`' <grave accent>
	149	*
	150	* 7b '{' <left brace>
	151	* 7c '\|' <vertical line>
	152	* 7d '}' <right brace>
	153	* 7e '~' <tilde>
	154	*/
	155	static const uint32_t invariantChars[4]={
	156	0xfffffbff, /* 00..1f but not 0a */
	157	0xffffffe5, /* 20..3f but not 21 23 24 */
	158	0x87fffffe, /* 40..5f but not 40 5b..5e */
	159	0x87fffffe /* 60..7f but not 60 7b..7e */
	160	};
	161
	162	/*
	163	* test unsigned types (or values known to be non-negative) for invariant characters,
	164	* tests ASCII-family character values
	165	*/
	166	#define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0)
	167
	168	/* test signed types for invariant characters, adds test for positive values */
	169	#define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c))
	170
73c04bcf A	171	#if U_CHARSET_FAMILY==U_ASCII_FAMILY
	172	#define CHAR_TO_UCHAR(c) c
	173	#define UCHAR_TO_CHAR(c) c
	174	#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
	175	#define CHAR_TO_UCHAR(u) asciiFromEbcdic[u]
	176	#define UCHAR_TO_CHAR(u) ebcdicFromAscii[u]
	177	#else
	178	# error U_CHARSET_FAMILY is not valid
	179	#endif
	180
	181
374ca955 A	182	U_CAPI void U_EXPORT2
	183	u_charsToUChars(const char cs, UChar us, int32_t length) {
	184	UChar u;
	185	uint8_t c;
374ca955 A	186
	187	/*
	188	* Allow the entire ASCII repertoire to be mapped _to_ Unicode.
	189	* For EBCDIC systems, this works for characters with codes from
	190	* codepages 37 and 1047 or compatible.
	191	*/
374ca955 A	192	while(length>0) {
374ca955 A	193	c=(uint8_t)(*cs++);
73c04bcf A	194	u=(UChar)CHAR_TO_UCHAR(c);
73c04bcf A	195	U_ASSERT((u!=0 \|\| c==0)); /* only invariant chars converted? */
374ca955 A	196	*us++=u;
	197	--length;
	198	}
374ca955 A	199	}
	200
	201	U_CAPI void U_EXPORT2
	202	u_UCharsToChars(const UChar us, char cs, int32_t length) {
	203	UChar u;
374ca955	204
374ca955 A	205	while(length>0) {
	206	u=*us++;
	207	if(!UCHAR_IS_INVARIANT(u)) {
73c04bcf	208	U_ASSERT(FALSE); /* Variant characters were used. These are not portable in ICU. */
374ca955 A	209	u=0;
374ca955 A	210	}
73c04bcf	211	*cs++=(char)UCHAR_TO_CHAR(u);
374ca955 A	212	--length;
374ca955 A	213	}
374ca955 A	214	}
	215
	216	U_CAPI UBool U_EXPORT2
	217	uprv_isInvariantString(const char *s, int32_t length) {
	218	uint8_t c;
	219
	220	for(;;) {
	221	if(length<0) {
	222	/* NUL-terminated */
	223	c=(uint8_t)*s++;
	224	if(c==0) {
	225	break;
	226	}
	227	} else {
	228	/* count length */
	229	if(length==0) {
	230	break;
	231	}
	232	--length;
	233	c=(uint8_t)*s++;
	234	if(c==0) {
	235	continue; /* NUL is invariant */
	236	}
	237	}
	238	/* c!=0 now, one branch below checks c==0 for variant characters */
	239
	240	/*
	241	* no assertions here because these functions are legitimately called
	242	* for strings with variant characters
	243	*/
	244	#if U_CHARSET_FAMILY==U_ASCII_FAMILY
	245	if(!UCHAR_IS_INVARIANT(c)) {
	246	return FALSE; /* found a variant char */
	247	}
	248	#elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
73c04bcf	249	c=CHAR_TO_UCHAR(c);
374ca955 A	250	if(c==0 \|\| !UCHAR_IS_INVARIANT(c)) {
	251	return FALSE; /* found a variant char */
	252	}
	253	#else
	254	# error U_CHARSET_FAMILY is not valid
	255	#endif
	256	}
	257	return TRUE;
	258	}
	259
	260	U_CAPI UBool U_EXPORT2
	261	uprv_isInvariantUString(const UChar *s, int32_t length) {
	262	UChar c;
	263
	264	for(;;) {
	265	if(length<0) {
	266	/* NUL-terminated */
	267	c=*s++;
	268	if(c==0) {
	269	break;
	270	}
	271	} else {
	272	/* count length */
	273	if(length==0) {
	274	break;
	275	}
	276	--length;
	277	c=*s++;
	278	}
	279
	280	/*
	281	* no assertions here because these functions are legitimately called
	282	* for strings with variant characters
	283	*/
	284	if(!UCHAR_IS_INVARIANT(c)) {
	285	return FALSE; /* found a variant char */
	286	}
	287	}
	288	return TRUE;
	289	}
	290
	291	/* UDataSwapFn implementations used in udataswp.c ------- */
	292
	293	/* convert ASCII to EBCDIC and verify that all characters are invariant */
73c04bcf	294	U_CAPI int32_t U_EXPORT2
374ca955 A	295	uprv_ebcdicFromAscii(const UDataSwapper *ds,
	296	const void inData, int32_t length, void outData,
	297	UErrorCode *pErrorCode) {
	298	const uint8_t *s;
	299	uint8_t *t;
	300	uint8_t c;
	301
	302	int32_t count;
	303
	304	if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {
	305	return 0;
	306	}
	307	if(ds==NULL \|\| inData==NULL \|\| length<0 \|\| (length>0 && outData==NULL)) {
	308	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
	309	return 0;
	310	}
	311
	312	/* setup and swapping */
	313	s=(const uint8_t *)inData;
	314	t=(uint8_t *)outData;
	315	count=length;
	316	while(count>0) {
	317	c=*s++;
	318	if(!UCHAR_IS_INVARIANT(c)) {
	319	udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n",
	320	length, length-count);
	321	*pErrorCode=U_INVALID_CHAR_FOUND;
	322	return 0;
	323	}
	324	*t++=ebcdicFromAscii[c];
	325	--count;
	326	}
	327
	328	return length;
	329	}
	330
	331	/* this function only checks and copies ASCII strings without conversion */
	332	U_CFUNC int32_t
	333	uprv_copyAscii(const UDataSwapper *ds,
	334	const void inData, int32_t length, void outData,
	335	UErrorCode *pErrorCode) {
	336	const uint8_t *s;
	337	uint8_t c;
	338
	339	int32_t count;
	340
	341	if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {
	342	return 0;
	343	}
	344	if(ds==NULL \|\| inData==NULL \|\| length<0 \|\| (length>0 && outData==NULL)) {
	345	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
	346	return 0;
	347	}
	348
	349	/* setup and checking */
	350	s=(const uint8_t *)inData;
	351	count=length;
	352	while(count>0) {
	353	c=*s++;
	354	if(!UCHAR_IS_INVARIANT(c)) {
	355	udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n",
	356	length, length-count);
	357	*pErrorCode=U_INVALID_CHAR_FOUND;
	358	return 0;
359	}
360	--count;
361	}
362
363	if(length>0 && inData!=outData) {
364	uprv_memcpy(outData, inData, length);
365	}
366
367	return length;
368	}
369
370	/* convert EBCDIC to ASCII and verify that all characters are invariant */
371	U_CFUNC int32_t
372	uprv_asciiFromEbcdic(const UDataSwapper *ds,
373	const void inData, int32_t length, void outData,
374	UErrorCode *pErrorCode) {
375	const uint8_t *s;
376	uint8_t *t;
377	uint8_t c;
378
379	int32_t count;
380
381	if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {
382	return 0;
383	}
384	if(ds==NULL \|\| inData==NULL \|\| length<0 \|\| (length>0 && outData==NULL)) {
385	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
386	return 0;
387	}
388
389	/* setup and swapping */
390	s=(const uint8_t *)inData;
391	t=(uint8_t *)outData;
392	count=length;
393	while(count>0) {
394	c=*s++;
395	if(c!=0 && ((c=asciiFromEbcdic[c])==0 \|\| !UCHAR_IS_INVARIANT(c))) {
396	udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n",
397	length, length-count);
398	*pErrorCode=U_INVALID_CHAR_FOUND;
399	return 0;
400	}
401	*t++=c;
402	--count;
403	}
404
405	return length;
406	}
407
408	/* this function only checks and copies EBCDIC strings without conversion */
409	U_CFUNC int32_t
410	uprv_copyEbcdic(const UDataSwapper *ds,
411	const void inData, int32_t length, void outData,
412	UErrorCode *pErrorCode) {
413	const uint8_t *s;
414	uint8_t c;
415
416	int32_t count;
417
418	if(pErrorCode==NULL \|\| U_FAILURE(*pErrorCode)) {
419	return 0;
420	}
421	if(ds==NULL \|\| inData==NULL \|\| length<0 \|\| (length>0 && outData==NULL)) {
422	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
423	return 0;
424	}
425
426	/* setup and checking */
427	s=(const uint8_t *)inData;
428	count=length;
429	while(count>0) {
430	c=*s++;
431	if(c!=0 && ((c=asciiFromEbcdic[c])==0 \|\| !UCHAR_IS_INVARIANT(c))) {
432	udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n",
433	length, length-count);
434	*pErrorCode=U_INVALID_CHAR_FOUND;
435	return 0;
436	}
437	--count;
438	}
439
440	if(length>0 && inData!=outData) {
441	uprv_memcpy(outData, inData, length);
442	}
443
444	return length;
445	}
446
447	/* compare invariant strings; variant characters compare less than others and unlike each other */
448	U_CFUNC int32_t
449	uprv_compareInvAscii(const UDataSwapper *ds,
450	const char *outString, int32_t outLength,
451	const UChar *localString, int32_t localLength) {
452	int32_t minLength;
453	UChar32 c1, c2;
454	uint8_t c;
455
456	if(outString==NULL \|\| outLength<-1 \|\| localString==NULL \|\| localLength<-1) {
457	return 0;
458	}
459
460	if(outLength<0) {
461	outLength=(int32_t)uprv_strlen(outString);
462	}
463	if(localLength<0) {
464	localLength=u_strlen(localString);
465	}
466
467	minLength= outLength<localLength ? outLength : localLength;
468
469	while(minLength>0) {
470	c=(uint8_t)*outString++;
471	if(UCHAR_IS_INVARIANT(c)) {
472	c1=c;
473	} else {
474	c1=-1;
475	}
476
477	c2=*localString++;
478	if(!UCHAR_IS_INVARIANT(c2)) {
729e4ab9	479	c2=-2;
374ca955 A	480	}
	481
	482	if((c1-=c2)!=0) {
	483	return c1;
	484	}
	485
	486	--minLength;
	487	}
	488
	489	/* strings start with same prefix, compare lengths */
	490	return outLength-localLength;
	491	}
	492
	493	U_CFUNC int32_t
	494	uprv_compareInvEbcdic(const UDataSwapper *ds,
	495	const char *outString, int32_t outLength,
	496	const UChar *localString, int32_t localLength) {
	497	int32_t minLength;
	498	UChar32 c1, c2;
	499	uint8_t c;
	500
	501	if(outString==NULL \|\| outLength<-1 \|\| localString==NULL \|\| localLength<-1) {
	502	return 0;
	503	}
	504
	505	if(outLength<0) {
	506	outLength=(int32_t)uprv_strlen(outString);
	507	}
	508	if(localLength<0) {
	509	localLength=u_strlen(localString);
	510	}
	511
	512	minLength= outLength<localLength ? outLength : localLength;
	513
	514	while(minLength>0) {
	515	c=(uint8_t)*outString++;
	516	if(c==0) {
	517	c1=0;
	518	} else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) {
	519	/* c1 is set */
	520	} else {
	521	c1=-1;
	522	}
	523
	524	c2=*localString++;
	525	if(!UCHAR_IS_INVARIANT(c2)) {
729e4ab9	526	c2=-2;
374ca955 A	527	}
	528
	529	if((c1-=c2)!=0) {
	530	return c1;
	531	}
	532
	533	--minLength;
	534	}
	535
	536	/* strings start with same prefix, compare lengths */
	537	return outLength-localLength;
	538	}
729e4ab9 A	539
	540	U_CAPI int32_t U_EXPORT2
	541	uprv_compareInvEbcdicAsAscii(const char s1, const char s2) {
	542	int32_t c1, c2;
	543
	544	for(;; ++s1, ++s2) {
	545	c1=(uint8_t)*s1;
	546	c2=(uint8_t)*s2;
	547	if(c1!=c2) {
	548	if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 \|\| !UCHAR_IS_INVARIANT(c1))) {
	549	c1=-(int32_t)(uint8_t)*s1;
	550	}
	551	if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 \|\| !UCHAR_IS_INVARIANT(c2))) {
	552	c2=-(int32_t)(uint8_t)*s2;
	553	}
	554	return c1-c2;
	555	} else if(c1==0) {
	556	return 0;
	557	}
	558	}
	559	}
	560
4388f060 A	561	U_CAPI char U_EXPORT2
	562	uprv_ebcdicToLowercaseAscii(char c) {
	563	return (char)lowercaseAsciiFromEbcdic[(uint8_t)c];
	564	}
729e4ab9 A	565
	566	U_INTERNAL uint8_t* U_EXPORT2
	567	uprv_aestrncpy(uint8_t dst, const uint8_t src, int32_t n)
	568	{
	569	uint8_t *orig_dst = dst;
	570
	571	if(n==-1) {
	572	n = uprv_strlen((const char)src)+1; / copy NUL */
	573	}
	574	/* copy non-null */
	575	while(*src && n>0) {
	576	(dst++) = asciiFromEbcdic[(src++)];
	577	n--;
	578	}
	579	/* pad */
	580	while(n>0) {
	581	*(dst++) = 0;
	582	n--;
	583	}
	584	return orig_dst;
	585	}
	586
	587	U_INTERNAL uint8_t* U_EXPORT2
	588	uprv_eastrncpy(uint8_t dst, const uint8_t src, int32_t n)
	589	{
	590	uint8_t *orig_dst = dst;
	591
	592	if(n==-1) {
	593	n = uprv_strlen((const char)src)+1; / copy NUL */
	594	}
	595	/* copy non-null */
	596	while(*src && n>0) {
	597	char ch = ebcdicFromAscii[*(src++)];
	598	if(ch == 0) {
	599	ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */
	600	}
	601	*(dst++) = ch;
	602	n--;
	603	}
	604	/* pad */
	605	while(n>0) {
	606	*(dst++) = 0;
	607	n--;
	608	}
	609	return orig_dst;
	610	}
	611