X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/3ca4086b22723db16d7e0fc23aa9021b4785096a..c26400bd067ce812a7dd3b090dce0928b8b0e80e:/src/regex/regc_locale.c diff --git a/src/regex/regc_locale.c b/src/regex/regc_locale.c index 695b665b1f..15bcef32d1 100644 --- a/src/regex/regc_locale.c +++ b/src/regex/regc_locale.c @@ -10,112 +10,122 @@ * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * * RCS: @(#) $Id$ + * + * wxWidgets: + * The Scriptics license can be found in the file COPYRIGHT. Modifications + * for wxWidgets are under the wxWidgets licence, see README for details. */ +#ifndef CONST +#define CONST const +#endif + /* ASCII character-name table */ static struct cname { - char *name; + chr *name; char code; } cnames[] = { - {"NUL", '\0'}, - {"SOH", '\001'}, - {"STX", '\002'}, - {"ETX", '\003'}, - {"EOT", '\004'}, - {"ENQ", '\005'}, - {"ACK", '\006'}, - {"BEL", '\007'}, - {"alert", '\007'}, - {"BS", '\010'}, - {"backspace", '\b'}, - {"HT", '\011'}, - {"tab", '\t'}, - {"LF", '\012'}, - {"newline", '\n'}, - {"VT", '\013'}, - {"vertical-tab", '\v'}, - {"FF", '\014'}, - {"form-feed", '\f'}, - {"CR", '\015'}, - {"carriage-return", '\r'}, - {"SO", '\016'}, - {"SI", '\017'}, - {"DLE", '\020'}, - {"DC1", '\021'}, - {"DC2", '\022'}, - {"DC3", '\023'}, - {"DC4", '\024'}, - {"NAK", '\025'}, - {"SYN", '\026'}, - {"ETB", '\027'}, - {"CAN", '\030'}, - {"EM", '\031'}, - {"SUB", '\032'}, - {"ESC", '\033'}, - {"IS4", '\034'}, - {"FS", '\034'}, - {"IS3", '\035'}, - {"GS", '\035'}, - {"IS2", '\036'}, - {"RS", '\036'}, - {"IS1", '\037'}, - {"US", '\037'}, - {"space", ' '}, - {"exclamation-mark",'!'}, - {"quotation-mark", '"'}, - {"number-sign", '#'}, - {"dollar-sign", '$'}, - {"percent-sign", '%'}, - {"ampersand", '&'}, - {"apostrophe", '\''}, - {"left-parenthesis",'('}, - {"right-parenthesis", ')'}, - {"asterisk", '*'}, - {"plus-sign", '+'}, - {"comma", ','}, - {"hyphen", '-'}, - {"hyphen-minus", '-'}, - {"period", '.'}, - {"full-stop", '.'}, - {"slash", '/'}, - {"solidus", '/'}, - {"zero", '0'}, - {"one", '1'}, - {"two", '2'}, - {"three", '3'}, - {"four", '4'}, - {"five", '5'}, - {"six", '6'}, - {"seven", '7'}, - {"eight", '8'}, - {"nine", '9'}, - {"colon", ':'}, - {"semicolon", ';'}, - {"less-than-sign", '<'}, - {"equals-sign", '='}, - {"greater-than-sign", '>'}, - {"question-mark", '?'}, - {"commercial-at", '@'}, - {"left-square-bracket", '['}, - {"backslash", '\\'}, - {"reverse-solidus", '\\'}, - {"right-square-bracket", ']'}, - {"circumflex", '^'}, - {"circumflex-accent", '^'}, - {"underscore", '_'}, - {"low-line", '_'}, - {"grave-accent", '`'}, - {"left-brace", '{'}, - {"left-curly-bracket", '{'}, - {"vertical-line", '|'}, - {"right-brace", '}'}, - {"right-curly-bracket", '}'}, - {"tilde", '~'}, - {"DEL", '\177'}, - {NULL, 0} + {wxT("NUL"), '\0'}, + {wxT("SOH"), '\001'}, + {wxT("STX"), '\002'}, + {wxT("ETX"), '\003'}, + {wxT("EOT"), '\004'}, + {wxT("ENQ"), '\005'}, + {wxT("ACK"), '\006'}, + {wxT("BEL"), '\007'}, + {wxT("alert"), '\007'}, + {wxT("BS"), '\010'}, + {wxT("backspace"), '\b'}, + {wxT("HT"), '\011'}, + {wxT("tab"), '\t'}, + {wxT("LF"), '\012'}, + {wxT("newline"), '\n'}, + {wxT("VT"), '\013'}, + {wxT("vertical-tab"), '\v'}, + {wxT("FF"), '\014'}, + {wxT("form-feed"), '\f'}, + {wxT("CR"), '\015'}, + {wxT("carriage-return"), '\r'}, + {wxT("SO"), '\016'}, + {wxT("SI"), '\017'}, + {wxT("DLE"), '\020'}, + {wxT("DC1"), '\021'}, + {wxT("DC2"), '\022'}, + {wxT("DC3"), '\023'}, + {wxT("DC4"), '\024'}, + {wxT("NAK"), '\025'}, + {wxT("SYN"), '\026'}, + {wxT("ETB"), '\027'}, + {wxT("CAN"), '\030'}, + {wxT("EM"), '\031'}, + {wxT("SUB"), '\032'}, + {wxT("ESC"), '\033'}, + {wxT("IS4"), '\034'}, + {wxT("FS"), '\034'}, + {wxT("IS3"), '\035'}, + {wxT("GS"), '\035'}, + {wxT("IS2"), '\036'}, + {wxT("RS"), '\036'}, + {wxT("IS1"), '\037'}, + {wxT("US"), '\037'}, + {wxT("space"), ' '}, + {wxT("exclamation-mark"), '!'}, + {wxT("quotation-mark"), '"'}, + {wxT("number-sign"), '#'}, + {wxT("dollar-sign"), '$'}, + {wxT("percent-sign"), '%'}, + {wxT("ampersand"), '&'}, + {wxT("apostrophe"), '\''}, + {wxT("left-parenthesis"), '('}, + {wxT("right-parenthesis"), ')'}, + {wxT("asterisk"), '*'}, + {wxT("plus-sign"), '+'}, + {wxT("comma"), ','}, + {wxT("hyphen"), '-'}, + {wxT("hyphen-minus"), '-'}, + {wxT("period"), '.'}, + {wxT("full-stop"), '.'}, + {wxT("slash"), '/'}, + {wxT("solidus"), '/'}, + {wxT("zero"), '0'}, + {wxT("one"), '1'}, + {wxT("two"), '2'}, + {wxT("three"), '3'}, + {wxT("four"), '4'}, + {wxT("five"), '5'}, + {wxT("six"), '6'}, + {wxT("seven"), '7'}, + {wxT("eight"), '8'}, + {wxT("nine"), '9'}, + {wxT("colon"), ':'}, + {wxT("semicolon"), ';'}, + {wxT("less-than-sign"), '<'}, + {wxT("equals-sign"), '='}, + {wxT("greater-than-sign"), '>'}, + {wxT("question-mark"), '?'}, + {wxT("commercial-at"), '@'}, + {wxT("left-square-bracket"), '['}, + {wxT("backslash"), '\\'}, + {wxT("reverse-solidus"), '\\'}, + {wxT("right-square-bracket"), ']'}, + {wxT("circumflex"), '^'}, + {wxT("circumflex-accent"), '^'}, + {wxT("underscore"), '_'}, + {wxT("low-line"), '_'}, + {wxT("grave-accent"), '`'}, + {wxT("left-brace"), '{'}, + {wxT("left-curly-bracket"), '{'}, + {wxT("vertical-line"), '|'}, + {wxT("right-brace"), '}'}, + {wxT("right-curly-bracket"), '}'}, + {wxT("tilde"), '~'}, + {wxT("DEL"), '\177'}, + {NULL, 0} }; +#if wxUSE_UNICODE + /* Unicode character-class tables */ typedef struct crange { @@ -518,6 +528,120 @@ static chr graphCharTable[] = { * End of auto-generated Unicode character ranges declarations. */ +/* + * Supply implementations for some tcl functions that this module depends on + * to make it self contained + */ + +#include "tclUniData.c" +#define Tcl_UniChar wxChar + +/* + * Compute the uppercase equivalent of the given Unicode character. + * Taken from tcl. + */ + +Tcl_UniChar Tcl_UniCharToUpper(int ch) +{ + int info = GetUniCharInfo(ch); + + if (GetCaseType(info) & 0x04) { + return (Tcl_UniChar) (ch - GetDelta(info)); + } else { + return ch; + } +} + +/* + * Compute the lowercase equivalent of the given Unicode character. + * Taken from tcl. + */ + +Tcl_UniChar Tcl_UniCharToLower(int ch) +{ + int info = GetUniCharInfo(ch); + + if (GetCaseType(info) & 0x02) { + return (Tcl_UniChar) (ch + GetDelta(info)); + } else { + return ch; + } +} + +/* + * Compute the titlecase equivalent of the given Unicode character. + * Taken from tcl. + */ + +Tcl_UniChar Tcl_UniCharToTitle(int ch) +{ + int info = GetUniCharInfo(ch); + int mode = GetCaseType(info); + + if (mode & 0x1) { + /* + * Subtract or add one depending on the original case. + */ + + return (Tcl_UniChar) (ch + ((mode & 0x4) ? -1 : 1)); + } else if (mode == 0x4) { + return (Tcl_UniChar) (ch - GetDelta(info)); + } else { + return ch; + } +} + +#else /* wxUSE_UNICODE */ + +#include + +typedef int (*isfunc_t)(int); + +/* ASCII character-class table */ +static struct cclass { + char *name; + char *chars; + int hasch; + isfunc_t isfunc; +} cclasses[] = { + {"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ +0123456789", 1, isalnum}, + {"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", + 1, isalpha}, + {"blank", " \t", 0, NULL}, + {"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ +\25\26\27\30\31\32\33\34\35\36\37\177", 0, iscntrl}, + {"digit", "0123456789", 0, isdigit}, + {"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ +0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + 1, isgraph}, + {"lower", "abcdefghijklmnopqrstuvwxyz", + 1, islower}, + {"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ +0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", + 1, isprint}, + {"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + 0, ispunct}, + {"space", "\t\n\v\f\r ", 0, isspace}, + {"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", + 0, isupper}, + {"xdigit", "0123456789ABCDEFabcdef", + 0, isxdigit}, + {NULL, 0, 0, NULL} +}; + +/* + * Supply implementations for some tcl functions that this module depends on + * to make it self contained + */ + +#define Tcl_UniChar wxChar +Tcl_UniChar Tcl_UniCharToUpper(int ch) { return wxCRT_ToupperNative(ch); } +Tcl_UniChar Tcl_UniCharToLower(int ch) { return wxCRT_TolowerNative(ch); } +Tcl_UniChar Tcl_UniCharToTitle(int ch) { return wxCRT_ToupperNative(ch); } + +#endif /* !wxUSE_UNICODE */ + #define CH NOCELT /* @@ -569,8 +693,6 @@ element(v, startp, endp) { struct cname *cn; size_t len; - Tcl_DString ds; - CONST char *np; /* generic: one-chr names stand for themselves */ assert(startp < endp); @@ -582,14 +704,11 @@ element(v, startp, endp) NOTE(REG_ULOCALE); /* search table */ - Tcl_DStringInit(&ds); - np = Tcl_UniCharToUtfDString(startp, (int)len, &ds); for (cn=cnames; cn->name!=NULL; cn++) { - if (strlen(cn->name)==len && strncmp(cn->name, np, len)==0) { + if (wxCRT_StrlenNative(cn->name)==len && wxCRT_StrncmpNative(cn->name, startp, len)==0) { break; /* NOTE BREAK OUT */ } } - Tcl_DStringFree(&ds); if (cn->name != NULL) { return CHR(cn->code); } @@ -708,6 +827,8 @@ eclass(v, c, cases) return cv; } +#if wxUSE_UNICODE + /* - cclass - supply cvec for a character class * Must include case counterparts on request. @@ -722,18 +843,17 @@ cclass(v, startp, endp, cases) { size_t len; struct cvec *cv = NULL; - Tcl_DString ds; - CONST char *np; - char **namePtr; + CONST chr *np; + chr **namePtr; int i, index; /* * The following arrays define the valid character class names. */ - static char *classNames[] = { - "alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph", - "lower", "print", "punct", "space", "upper", "xdigit", NULL + static chr *classNames[] = { + wxT("alnum"), wxT("alpha"), wxT("ascii"), wxT("blank"), wxT("cntrl"), wxT("digit"), wxT("graph"), + wxT("lower"), wxT("print"), wxT("punct"), wxT("space"), wxT("upper"), wxT("xdigit"), NULL }; enum classes { @@ -747,16 +867,15 @@ cclass(v, startp, endp, cases) */ len = endp - startp; - Tcl_DStringInit(&ds); - np = Tcl_UniCharToUtfDString(startp, (int)len, &ds); + np = startp; /* * Remap lower and upper to alpha if the match is case insensitive. */ - if (cases && len == 5 && (strncmp("lower", np, 5) == 0 - || strncmp("upper", np, 5) == 0)) { - np = "alpha"; + if (cases && len == 5 && (wxCRT_StrncmpNative(wxT("lower"), np, 5) == 0 + || wxCRT_StrncmpNative(wxT("upper"), np, 5) == 0)) { + np = wxT("alpha"); } /* @@ -765,12 +884,11 @@ cclass(v, startp, endp, cases) index = -1; for (namePtr=classNames,i=0 ; *namePtr!=NULL ; namePtr++,i++) { - if ((strlen(*namePtr) == len) && (strncmp(*namePtr, np, len) == 0)) { + if ((wxCRT_StrlenNative(*namePtr) == len) && (wxCRT_StrncmpNative(*namePtr, np, len) == 0)) { index = i; break; } } - Tcl_DStringInit(&ds); if (index == -1) { ERR(REG_ECTYPE); return NULL; @@ -919,6 +1037,79 @@ cclass(v, startp, endp, cases) return cv; } +#else /* wxUSE_UNICODE */ + +static struct cvec * +cclass(v, startp, endp, cases) +struct vars *v; +chr *startp; /* where the name starts */ +chr *endp; /* just past the end of the name */ +int cases; /* case-independent? */ +{ + size_t len; + char *p; + struct cclass *cc; + struct cvec *cv; + chr *np; + int i; + int count; + char buf[256]; + const char *loc; + + /* find the name */ + len = endp - startp; + np = startp; + if (cases && len == 5 && (wxCRT_StrncmpNative(wxT("lower"), np, 5) == 0 || + wxCRT_StrncmpNative(wxT("upper"), np, 5) == 0)) + np = wxT("alpha"); + for (cc = cclasses; cc->name != NULL; cc++) + if (wxCRT_StrlenNative(cc->name) == len && wxCRT_StrncmpNative(cc->name, np, len) == 0) + break; /* NOTE BREAK OUT */ + if (cc->name == NULL) { + ERR(REG_ECTYPE); + return NULL; + } + + loc = setlocale(LC_CTYPE, NULL); + + if (!cc->isfunc || loc == NULL || strcmp(loc, "C") == 0) + { + /* set up vector */ + cv = getcvec(v, (int)strlen(cc->chars), 0, 0); + if (cv == NULL) { + ERR(REG_ESPACE); + return NULL; + } + + /* fill it in */ + for (p = cc->chars; *p != '\0'; p++) + addchr(cv, (chr)*p); + } + else + { + count = 0; + for (i = 0; i < 256; i++) + if (cc->isfunc(i)) + buf[count++] = i; + + /* set up vector */ + cv = getcvec(v, count, 0, 0); + if (cv == NULL) { + ERR(REG_ESPACE); + return NULL; + } + + /* fill it in */ + for (i = 0; i < count; i++) + addchr(cv, buf[i]); + } + + return cv; +} + +#endif /* !wxUSE_UNICODE */ + + /* - allcases - supply cvec for all case counterparts of a chr (including itself) * This is a shortcut, preferably an efficient one, for simple characters;