Cycript.ios
Cycript.osx
Cycript.lib
+DerivedCoreProperties.txt
+PropList.txt
return -1;
}
+static void U(char *&local, unsigned point) {
+ if (false) {
+ } else if (point < 0x000080) {
+ *local++ = point;
+ } else if (point < 0x000800) {
+ *local++ = 0xc0 | point >> 0x06 & 0x1f;
+ goto one;
+ } else if (point < 0x010000) {
+ *local++ = 0xe0 | point >> 0x0c & 0x0f;
+ goto two;
+ } else if (point < 0x110000) {
+ *local++ = 0xf0 | point >> 0x12 & 0x07;
+ *local++ = 0x80 | point >> 0x0c & 0x3f;
+ two:
+ *local++ = 0x80 | point >> 0x06 & 0x3f;
+ one:
+ *local++ = 0x80 | point >> 0x00 & 0x3f;
+ } else _assert(false);
+}
+
+static void U(char *&local, const char *text, yy_size_t &i) {
+ unsigned point;
+
+ char next(text[++i]);
+ if (next != '{') {
+ point = H(text[i + 0]) << 12 | H(text[i + 1]) << 8 | H(text[i + 2]) << 4 | H(text[i + 3]);
+ i += 3;
+ } else {
+ point = 0;
+ for (;;) {
+ next = text[++i];
+ if (next == '}')
+ break;
+ point = (point << 4) | H(next);
+ }
+ }
+
+ U(local, point);
+}
+
#define YY_INPUT(data, value, size) { \
if (yyextra->data_.eof()) \
value = YY_NULL; \
%option full
%option ecs
-%option align
-Escape \\[\\'"bfnrtv]|\\[0-7]|\\[4-7][0-7]|\\[0-3][0-7][0-7]?|\\x[0-9a-fA-F]{2}|\\u[0-9a-fA-F]{4}|\\\n
+U1 [\x00-\x7f]
+U0 [\x80-\xbf]
+U2 [\xc2-\xdf]
+U3 [\xe0-\xef]
+U4 [\xf0-\xf4]
-IdentifierStart [a-zA-Z$_]
-IdentifierPart [a-zA-Z$_0-9]
+HexDigit [0-9a-fA-F]
+LineTerminatorSequence \r?\n|\r|\xe2\x80[\xa8\xa9]
+WhiteSpace [\x09\x0b\x0c\x20]|\xc2\xa0|\xef\xbb\xbf
+UnicodeEscape \\u({HexDigit}{4}|\{{HexDigit}+\})
+
+OctalEscape \\[1-7]|\\[4-7][0-7]|\\[0-3][0-7][0-7]?
+StringEscape \\['"\\bfnrtv]|\\0|{OctalEscape}|\\x{HexDigit}{2}|{UnicodeEscape}
+StringExtra {StringEscape}|\\{LineTerminatorSequence}
+SingleString ([^'\\\n]|{StringExtra})*
+DoubleString ([^"\\\n]|{StringExtra})*
+StringPrefix '{SingleString}|\"{DoubleString}
+
+@include UnicodeIDStart.l
+@include UnicodeIDContinue.l
+
+IdentifierMore [$_]
+
+UnicodeStart {IdentifierMore}|{UnicodeIDStart}
+UnicodePart {IdentifierMore}|\xe2\x80[\x8c\x8d]|{UnicodeIDContinue}
+UnicodeFail {U2}|{U3}|{U3}{U0}|{U4}|{U4}{U0}|{U4}{U0}{U0}
+UnicodeScrap {UnicodePart}*{UnicodeFail}?
+
+IdentifierStart {UnicodeStart}|{UnicodeEscape}
+IdentifierPart {UnicodePart}|{UnicodeEscape}
+IdentifierFail {UnicodeFail}|\\(u({HexDigit}{0,3}|\{{HexDigit}*))?
+IdentifierScrap {IdentifierPart}*{IdentifierFail}?
NonTerminator [^\n]
BackslashSequence \\{NonTerminator}
RegularExpressionFirstChar [^\n*\\/]|{BackslashSequence}
RegularExpressionChar [^\n\\/]|{BackslashSequence}
-RegularExpressionFlags {IdentifierPart}*
+RegularExpressionFlags {UnicodePart}*
RegularExpressionChars {RegularExpressionChar}*
RegularExpressionBody {RegularExpressionFirstChar}{RegularExpressionChars}
%%
<RegExp>\/{RegularExpressionBody}\/{RegularExpressionFlags} L C I(literal, RegEx(Y), tk::RegularExpressionLiteral, hi::Constant);
+<RegExp>\/{RegularExpressionBody}\/{RegularExpressionFlags}{UnicodeFail} L E("invalid flags")
<RegExp>\/{RegularExpressionBody}?\\? L E("unterminated regex")
#![^\n]* L M
"xml" L C I(identifier, Identifier("xml"), tk::XML, hi::Meta);
@end
-{IdentifierStart}{IdentifierPart}* L C I(identifier, Identifier(Y), tk::Identifier_, hi::Identifier);
+{UnicodeStart}{UnicodePart}* L C I(identifier, Identifier(Y), tk::Identifier_, hi::Identifier);
+
+{IdentifierStart}{IdentifierPart}* L C {
+ char *value(A char[yyleng + 1]);
+ char *local(value);
+
+ for (yy_size_t i(0), e(yyleng); i != e; ++i) {
+ char next(yytext[i]);
+ if (next != '\\')
+ *local++ = next;
+ else
+ U(local, yytext, ++i);
+ }
+
+ *local = '\0';
+ I(identifier, Identifier(value), tk::Identifier_, hi::Identifier);
+}
+
+({IdentifierStart}{IdentifierPart}*)?{IdentifierFail} L E("invalid identifier")
0[0-7]+ L C I(number, Number(strtoull(yytext + 1, NULL, 8)), tk::NumericLiteral, hi::Constant);
0[0-9]+ L C I(number, Number(strtoull(yytext + 1, NULL, 10)), tk::NumericLiteral, hi::Constant);
0[bB][0-1]+ L C I(number, Number(strtoull(yytext + 2, NULL, 2)), tk::NumericLiteral, hi::Constant);
(\.[0-9]+|(0|[1-9][0-9]*)(\.[0-9]*)?)([eE][+-]?[0-9]+)? L C I(number, Number(strtod(yytext, NULL)), tk::NumericLiteral, hi::Constant);
-(\.[0-9]+|(0|[1-9][0-9]*)(\.[0-9]*)?)[eE][+-]?{IdentifierPart}* L E("invalid exponent")
-(\.?[0-9]|(0|[1-9][0-9]*)\.){IdentifierPart}* L E("invalid number")
+(\.[0-9]+|(0|[1-9][0-9]*)(\.[0-9]*)?)[eE][+-]?{IdentifierScrap} L E("invalid exponent")
+(\.?[0-9]|(0|[1-9][0-9]*)\.){IdentifierScrap} L E("invalid number")
-\"([^"\\\n]|{Escape})*\"|'([^'\\\n]|{Escape})*' L C {
+'{SingleString}'|\"{DoubleString}\" L C {
char *value(A char[yyleng]);
char *local(value);
char next(yytext[i]);
if (yytext[i] == '\\')
- switch (next = yytext[++i]) {
- case '\n': continue;
+ // XXX: support more line continuation characters
+ if (false) line: {
+ yylloc->end.lines(1);
+ yylloc->end.columns(yyleng - i);
+ } else switch (next = yytext[++i]) {
+ case '\n': goto line;
+
case '\\': next = '\\'; break;
case '\'': next = '\''; break;
case '"': next = '"'; break;
break;
case 'x':
- next = H(yytext[i + 1]) << 4 | H(yytext[i + 2]);
+ U(local, H(yytext[i + 1]) << 4 | H(yytext[i + 2]));
i += 2;
- break;
+ continue;
+
+ case 'u':
+ U(local, yytext, i);
+ continue;
}
*local++ = next;
I(string, String(value, local - value), tk::StringLiteral, hi::Constant);
}
-(\"([^"\\\n]|{Escape})*|'([^'\\\n]|{Escape})*)(\\(x.{0,2}|u.{0,4})?)? L E("invalid escape")
+{StringPrefix}\\(x.{0,2}|u([^{].{0,3}|\{[^}]*)?|{UnicodeFail})? L E("invalid escape")
+{StringPrefix} L E("invalid string")
-\r?\n|\r|\xe2\x80[\xa8\xa9] yylloc->step(); yylloc->end.lines(); N
+{LineTerminatorSequence} yylloc->step(); yylloc->end.lines(); N
-[ \t] L
+{WhiteSpace} L
<<EOF>> if (yyextra->auto_) { yyextra->auto_ = false; F(tk::AutoComplete, hi::Nothing); } L yyterminate();
-@{IdentifierStart}{IdentifierPart}*|\xe2.|. L E("unknown token")
+@({UnicodeStart}{UnicodeScrap}|{UnicodeFail}) L E("invalid keyword")
+
+. L E("invalid character")
%%
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# }}}
+file=$1
+shift
+
filters=("$@")
-while IFS= read -r line; do
- if [[ ${line} = @if* ]]; then
- line=${line#@if }
- for name in "${filters[@]}"; do
- if [[ ${line} = ${name}' '* ]]; then
- echo "${line#${name} }"
- fi
- done
- elif [[ ${line} = @begin* ]]; then
- set ${line}; shift
- filter=
- for name in "${filters[@]}"; do
- for side in "$@"; do
- if [[ ${name} == ${side} ]]; then
- unset filter
+function include() {
+ file=$1
+ shift
+
+ dir=/${file}
+ dir=${dir%/*}
+ dir=${dir:-/.}
+ dir=${dir#/}
+ dir=${dir}/
+
+ while IFS= read -r line; do
+ if false; then :
+ elif [[ ${line} = @if* ]]; then
+ line=${line#@if }
+ for name in "${filters[@]}"; do
+ if [[ ${line} = ${name}' '* ]]; then
+ echo "${line#${name} }"
fi
done
- done
- elif [[ ${line} = @end ]]; then
- unset filter
- elif [[ -z ${filter+@} ]]; then
- echo "${line}"
- fi
-done
+ elif [[ ${line} = @begin* ]]; then
+ set ${line}; shift
+ filter=
+ for name in "${filters[@]}"; do
+ for side in "$@"; do
+ if [[ ${name} == ${side} ]]; then
+ unset filter
+ fi
+ done
+ done
+ elif [[ ${line} = @end ]]; then
+ unset filter
+ elif [[ ${line} = @include* ]]; then
+ line=${line#@include }
+ include "${dir}${line}"
+ elif [[ -z ${filter+@} ]]; then
+ echo "${line}"
+ fi
+ done <"${file}"
+}
+
+include "${file}"
str << (single ? '\'' : '"');
for (const char *value(data), *end(data + size); value != end; ++value)
- switch (*value) {
+ switch (uint8_t next = *value) {
case '\\': str << "\\\\"; break;
case '\b': str << "\\b"; break;
case '\f': str << "\\f"; break;
else goto simple;
break;
+ case '\0':
+ if (value[1] >= '0' && value[1] <= '9')
+ str << "\\x00";
+ else
+ str << "\\0";
+ break;
+
default:
- // this test is designed to be "awesome", generating neither warnings nor incorrect results
- if (*value < 0x20 || *value >= 0x7f)
- str << "\\x" << std::setbase(16) << std::setw(2) << std::setfill('0') << unsigned(uint8_t(*value));
- else simple:
+ if (next >= 0x20 && next < 0x7f) simple:
str << *value;
+ else {
+ unsigned levels(1);
+ if ((next & 0x80) != 0)
+ while ((next & 0x80 >> ++levels) != 0);
+
+ unsigned point(next & 0xff >> levels);
+ while (--levels != 0)
+ point = point << 6 | uint8_t(*++value) & 0x3f;
+
+ if (point < 0x100)
+ str << "\\x" << std::setbase(16) << std::setw(2) << std::setfill('0') << point;
+ else if (point < 0x10000)
+ str << "\\u" << std::setbase(16) << std::setw(4) << std::setfill('0') << point;
+ else {
+ point -= 0x10000;
+ str << "\\u" << std::setbase(16) << std::setw(4) << std::setfill('0') << (0xd800 | point >> 0x0a);
+ str << "\\u" << std::setbase(16) << std::setw(4) << std::setfill('0') << (0xdc00 | point & 0x3ff);
+ }
+ }
}
str << (single ? '\'' : '"');
CLEANFILES += Cycript.yy
Cycript.yy: Cycript.yy.in
- $(srcdir)/Filter.sh <$< >$@ $(filters)
+ $(srcdir)/Filter.sh $< >$@ $(filters)
CLEANFILES += Cycript.l
-Cycript.l: Cycript.l.in
- $(srcdir)/Filter.sh <$< >$@ $(filters)
+Cycript.l: Cycript.l.in UnicodeIDStart.l UnicodeIDContinue.l
+ $(srcdir)/Filter.sh $< >$@ $(filters)
CLEANFILES += lex.cy.cpp
lex.cy.cpp: Cycript.l
@CY_EXECUTE_TRUE@Bridge.hpp: Bridge.gperf
@CY_EXECUTE_TRUE@ $(GPERF) $< | $(SED) -e 's/defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__/0/' >$@
Cycript.yy: Cycript.yy.in
- $(srcdir)/Filter.sh <$< >$@ $(filters)
-Cycript.l: Cycript.l.in
- $(srcdir)/Filter.sh <$< >$@ $(filters)
+ $(srcdir)/Filter.sh $< >$@ $(filters)
+Cycript.l: Cycript.l.in UnicodeIDStart.l UnicodeIDContinue.l
+ $(srcdir)/Filter.sh $< >$@ $(filters)
lex.cy.cpp: Cycript.l
$(FLEX) -b -t $< | $(SED) -e 's/int yyl;/yy_size_t yyl;/;s/int yyleng_r;/yy_size_t yyleng_r;/;s/yyg =/yyg __attribute__((__unused__)) =/' >$@
grep -F 'No backing up.' lex.backup >/dev/null
--- /dev/null
+UnicodeIDContinue_0 [\x30-\x39\x41-\x5a\x5f\x61-\x7a]|\xc2[\xaa\xba\xb5\xb7]|\xc3[\x80-\x96\x98-\xb6\xb8-\xbf]|\xcb[\x80\x81\xa2\xa3\xa1\x86-\x91\xac\xae\xa0\xa4]|\xcd[\x80-\xb4\xb6\xb7\xba-\xbd\xbf]|\xce[\x86-\x8a\x8c\x8e-\xa1\xa3-\xbf]|\xcf[\x80-\xb5\xb7-\xbf]|\xd2[\x80\x81\x83-\x87\x8a-\xbf]|\xd4[\x80-\xaf\xb1-\xbf]|\xd5[\x80-\x96\x99\xa1-\xbf]|\xd6[\x80-\x87\x91-\xbd\xbf]|\xd7[\x81\x82\x84\x85\x87\x90-\xaa\xb0-\xb2]|\xd8[\x90-\x9a\xa0-\xbf]|\xd9[\x80-\xa9\xae-\xbf]|\xdb[\x80-\x93\x95-\x9c\x9f-\xa8\xaa-\xbc\xbf]|\xdc[\x90-\xbf]|\xdd[\x80-\x8a\x8d-\xbf]|\xde[\x80-\xb1]|\xdf[\x80-\xb5\xba]|[\xc4-\xca\xcc\xd0\xd1\xd3\xda][\x80-\xbf]|\xe0\xa0[\x80-\xad]|\xe0\xa1[\x80-\x9b]|\xe0\xa2[\xa0-\xb4]|\xe0\xa3[\xa3-\xbf]|\xe0\xa5[\x80-\xa3\xa6-\xaf\xb1-\xbf]|\xe0\xa6[\x80-\x83\x85-\x8c\x8f\x90\x93-\xa8\xaa-\xb0\xb2\xb6-\xb9\xbc-\xbf]|\xe0\xa7[\x80-\x84\x87\x88\x8b-\x8e\x97\x9c\x9d\x9f-\xa3\xa6-\xb1]|\xe0\xa8[\x81-\x83\x85-\x8a\x8f\x90\x93-\xa8\xaa-\xb0\xb2\xb3\xb5\xb6\xb8\xb9\xbc\xbe\xbf]|\xe0\xa9[\x80-\x82\x87\x88\x8b-\x8d\x91\x99-\x9c\x9e\xa6-\xb5]
+UnicodeIDContinue_1 \xe0\xaa[\x81-\x83\x85-\x8d\x8f-\x91\x93-\xa8\xaa-\xb0\xb2\xb3\xb5-\xb9\xbc-\xbf]|\xe0\xab[\x80-\x85\x87-\x89\x8b-\x8d\x90\xa0-\xa3\xa6-\xaf\xb9]|\xe0\xac[\x81-\x83\x85-\x8c\x8f\x90\x93-\xa8\xaa-\xb0\xb2\xb3\xb5-\xb9\xbc-\xbf]|\xe0\xad[\x80-\x84\x87\x88\x8b-\x8d\x96\x97\x9c\x9d\x9f-\xa3\xa6-\xaf\xb1]|\xe0\xae[\x82\x83\x85-\x8a\x8e-\x90\x92-\x95\x99\x9a\x9c\x9e\x9f\xa3\xa4\xa8-\xaa\xae-\xb9\xbe\xbf]|\xe0\xaf[\x80-\x82\x86-\x88\x8a-\x8d\x90\x97\xa6-\xaf]|\xe0\xb0[\x80-\x83\x85-\x8c\x8e-\x90\x92-\xa8\xaa-\xb9\xbd-\xbf]|\xe0\xb1[\x80-\x84\x86-\x88\x8a-\x8d\x95\x96\x98-\x9a\xa0-\xa3\xa6-\xaf]|\xe0\xb2[\x81-\x83\x85-\x8c\x8e-\x90\x92-\xa8\xaa-\xb3\xb5-\xb9\xbc-\xbf]|\xe0\xb3[\x80-\x84\x86-\x88\x8a-\x8d\x95\x96\x9e\xa0-\xa3\xa6-\xaf\xb1\xb2]|\xe0\xb4[\x81-\x83\x85-\x8c\x8e-\x90\x92-\xba\xbd-\xbf]|\xe0\xb5[\x80-\x84\x86-\x88\x8a-\x8e\x97\x9f-\xa3\xa6-\xaf\xba-\xbf]|\xe0\xb6[\x82\x83\x85-\x96\x9a-\xb1\xb3-\xbb\xbd]|\xe0\xb7[\x80-\x86\x8a\x8f-\x94\x96\x98-\x9f\xa6-\xaf\xb2\xb3]|\xe0\xb8[\x81-\xba]
+UnicodeIDContinue_2 \xe0\xb9[\x80-\x8e\x90-\x99]|\xe0\xba[\x81\x82\x84\x87\x88\x8a\x8d\x94-\x97\x99-\x9f\xa1-\xa3\xa5\xa7\xaa\xab\xad-\xb9\xbb-\xbd]|\xe0\xbb[\x80-\x84\x86\x88-\x8d\x90-\x99\x9c-\x9f]|\xe0\xbc[\x80\xa0\xa2-\xa6\xa1\xa8\xa9\xa7\xbe\xb5\xb7\x98\x99\xb9\xbf]|\xe0\xbd[\x80-\x87\x89-\xac\xb1-\xbf]|\xe0\xbe[\x80-\x84\x86-\x97\x99-\xbc]|\xe0\xbf[\x86]|\xe0[\xa4][\x80-\xbf]|\xe1\x81[\x80-\x89\x90-\xbf]|\xe1\x82[\x80-\x9d\xa0-\xbf]|\xe1\x83[\x80-\x85\x87\x8d\x90-\xba\xbc-\xbf]|\xe1\x89[\x80-\x88\x8a-\x8d\x90-\x96\x98\x9a-\x9d\xa0-\xbf]|\xe1\x8a[\x80-\x88\x8a-\x8d\x90-\xb0\xb2-\xb5\xb8-\xbe]|\xe1\x8b[\x80\x82-\x85\x88-\x96\x98-\xbf]|\xe1\x8c[\x80-\x90\x92-\x95\x98-\xbf]|\xe1\x8d[\x80-\x9a\x9d-\x9f\xa9-\xb1]|\xe1\x8e[\x80-\x8f\xa0-\xbf]|\xe1\x8f[\x80-\xb5\xb8-\xbd]|\xe1\x90[\x81-\xbf]|\xe1\x99[\x80-\xac\xaf-\xbf]|\xe1\x9a[\x81-\x9a\xa0-\xbf]|\xe1\x9b[\x80-\xaa\xae-\xb8]|\xe1\x9c[\x80-\x8c\x8e-\x94\xa0-\xb4]|\xe1\x9d[\x80-\x93\xa0-\xac\xae-\xb0\xb2\xb3]|\xe1\x9f[\x80-\x93\x97\x9c\x9d\xa0-\xa9]|\xe1\xa0[\x8b-\x8d\x90-\x99\xa0-\xbf]
+UnicodeIDContinue_3 \xe1\xa1[\x80-\xb7]|\xe1\xa2[\x80-\xaa\xb0-\xbf]|\xe1\xa3[\x80-\xb5]|\xe1\xa4[\x80-\x9e\xa0-\xab\xb0-\xbb]|\xe1\xa5[\x86-\xad\xb0-\xb4]|\xe1\xa6[\x80-\xab\xb0-\xbf]|\xe1\xa7[\x80-\x89\x90-\x9a]|\xe1\xa8[\x80-\x9b\xa0-\xbf]|\xe1\xa9[\x80-\x9e\xa0-\xbc\xbf]|\xe1\xaa[\x80-\x89\x90-\x99\xa7\xb0-\xbd]|\xe1\xad[\x80-\x8b\x90-\x99\xab-\xb3]|\xe1\xaf[\x80-\xb3]|\xe1\xb0[\x80-\xb7]|\xe1\xb1[\x80-\x89\x8d-\xbd]|\xe1\xb3[\x90-\x92\x94-\xb6\xb8\xb9]|\xe1\xb7[\x80-\xb5\xbc-\xbf]|\xe1\xbc[\x80-\x95\x98-\x9d\xa0-\xbf]|\xe1\xbd[\x80-\x85\x88-\x8d\x90-\x97\x99\x9b\x9d\x9f-\xbd]|\xe1\xbe[\x80-\xb4\xb6-\xbc\xbe]|\xe1\xbf[\x82-\x84\x86-\x8c\x90-\x93\x96-\x9b\xa0-\xac\xb2-\xb4\xb6-\xbc]|\xe1[\x80\x84-\x88\x91-\x98\x9e\xac\xae\xb4-\xb6\xb8-\xbb][\x80-\xbf]|\xe2\x80[\xbf]|\xe2\x81[\x80\xb1\x94\xbf]|\xe2\x82[\x90-\x9c]|\xe2\x83[\x90-\x9c\xa1\xa5-\xb0]|\xe2\x84[\x82\x87\x8a-\x93\x95\x98-\x9d\xa4\xa6\xa8\xaa-\xb9\xbc-\xbf]|\xe2\x85[\x85-\x89\x8e\xa0-\xbf]|\xe2\x86[\x80-\x88]|\xe2\xb0[\x80-\xae\xb0-\xbf]|\xe2\xb1[\x80-\x9e\xa0-\xbf]
+UnicodeIDContinue_4 \xe2\xb3[\x80-\xa4\xab-\xb3]|\xe2\xb4[\x80-\xa5\xa7\xad\xb0-\xbf]|\xe2\xb5[\x80-\xa7\xaf\xbf]|\xe2\xb6[\x80-\x96\xa0-\xa6\xa8-\xae\xb0-\xb6\xb8-\xbe]|\xe2\xb7[\x80-\x86\x88-\x8e\x90-\x96\x98-\x9e\xa0-\xbf]|\xe2[\xb2][\x80-\xbf]|\xe3\x80[\x85-\x87\xa1-\xaf\xb1-\xb5\xb8-\xbc]|\xe3\x81[\x81-\xbf]|\xe3\x82[\x80-\x96\x99-\x9f\xa1-\xbf]|\xe3\x83[\x80-\xba\xbc-\xbf]|\xe3\x84[\x85-\xad\xb1-\xbf]|\xe3\x86[\x80-\x8e\xa0-\xba]|\xe3\x87[\xb0-\xbf]|\xe3[\x85\x90-\xbf][\x80-\xbf]|\xe4\xb6[\x80-\xb5]|\xe4[\x80-\xb5\xb8-\xbf][\x80-\xbf]|\xe9\xbf[\x80-\x95]|\xe9[\x80-\xbe][\x80-\xbf]|\xea\x92[\x80-\x8c]|\xea\x93[\x90-\xbd]|\xea\x98[\x80-\x8c\x90-\xab]|\xea\x99[\x80-\xaf\xb4-\xbd\xbf]|\xea\x9b[\x80-\xb1]|\xea\x9c[\x97-\x9f\xa2-\xbf]|\xea\x9e[\x80-\x88\x8b-\xad\xb0-\xb7]|\xea\x9f[\xb7-\xbf]|\xea\xa0[\x80-\xa7]|\xea\xa1[\x80-\xb3]|\xea\xa3[\x80-\x84\x90-\x99\xa0-\xb7\xbb\xbd]|\xea\xa4[\x80-\xad\xb0-\xbf]|\xea\xa5[\x80-\x93\xa0-\xbc]|\xea\xa7[\x80\x8f-\x99\xa0-\xbe]|\xea\xa8[\x80-\xb6]|\xea\xa9[\x80-\x8d\x90-\x99\xa0-\xb6\xba-\xbf]
+UnicodeIDContinue_5 \xea\xab[\x80-\x82\x9b-\x9d\xa0-\xaf\xb2-\xb6]|\xea\xac[\x81-\x86\x89-\x8e\x91-\x96\xa0-\xa6\xa8-\xae\xb0-\xbf]|\xea\xad[\x80-\x9a\x9c-\xa5\xb0-\xbf]|\xea\xaf[\x80-\xaa\xac\xad\xb0-\xb9]|\xea[\x80-\x91\x94-\x97\x9a\x9d\xa2\xa6\xaa\xae\xb0-\xbf][\x80-\xbf]|\xed\x9e[\x80-\xa3\xb0-\xbf]|\xed\x9f[\x80-\x86\x8b-\xbb]|\xed[\x80-\x9d][\x80-\xbf]|\xef\xa9[\x80-\xad\xb0-\xbf]|\xef\xab[\x80-\x99]|\xef\xac[\x80-\x86\x93-\x97\x9d-\xa8\xaa-\xb6\xb8-\xbc\xbe]|\xef\xad[\x80\x81\x83\x84\x86-\xbf]|\xef\xae[\x80-\xb1]|\xef\xaf[\x93-\xbf]|\xef\xb4[\x80-\xbd]|\xef\xb5[\x90-\xbf]|\xef\xb6[\x80-\x8f\x92-\xbf]|\xef\xb7[\x80-\x87\xb0-\xbb]|\xef\xb8[\x80-\x8f\xa0-\xaf\xb3\xb4]|\xef\xb9[\x8d-\x8f\xb0-\xb4\xb6-\xbf]|\xef\xbb[\x80-\xbc]|\xef\xbc[\x90-\x99\xa1-\xba\xbf]|\xef\xbd[\x81-\x9a\xa6-\xbf]|\xef\xbe[\x80-\xbe]|\xef\xbf[\x82-\x87\x8a-\x8f\x92-\x97\x9a-\x9c]|\xef[\xa4-\xa8\xaa\xb0-\xb3\xba][\x80-\xbf]|[\xe5-\xe8\xeb\xec][\x80-\xbf][\x80-\xbf]|\xf0\x90\x80[\x80-\x8b\x8d-\xa6\xa8-\xba\xbc\xbd\xbf]|\xf0\x90\x81[\x80-\x8d\x90-\x9d]
+UnicodeIDContinue_6 \xf0\x90\x83[\x80-\xba]|\xf0\x90\x85[\x80-\xb4]|\xf0\x90\x87[\xbd]|\xf0\x90\x8a[\x80-\x9c\xa0-\xbf]|\xf0\x90\x8b[\x80-\x90\xa0]|\xf0\x90\x8c[\x80-\x9f\xb0-\xbf]|\xf0\x90\x8d[\x80-\x8a\x90-\xba]|\xf0\x90\x8e[\x80-\x9d\xa0-\xbf]|\xf0\x90\x8f[\x80-\x83\x88-\x8f\x91-\x95]|\xf0\x90\x92[\x80-\x9d\xa0-\xa9]|\xf0\x90\x94[\x80-\xa7\xb0-\xbf]|\xf0\x90\x95[\x80-\xa3]|\xf0\x90\x9c[\x80-\xb6]|\xf0\x90\x9d[\x80-\x95\xa0-\xa7]|\xf0\x90\xa0[\x80-\x85\x88\x8a-\xb5\xb7\xb8\xbc\xbf]|\xf0\x90\xa1[\x80-\x95\xa0-\xb6]|\xf0\x90\xa2[\x80-\x9e]|\xf0\x90\xa3[\xa0-\xb2\xb4\xb5]|\xf0\x90\xa4[\x80-\x95\xa0-\xb9]|\xf0\x90\xa6[\x80-\xb7\xbe\xbf]|\xf0\x90\xa8[\x80-\x83\x85\x86\x8c-\x93\x95-\x97\x99-\xb3\xb8-\xba\xbf]|\xf0\x90\xa9[\xa0-\xbc]|\xf0\x90\xaa[\x80-\x9c]|\xf0\x90\xab[\x80-\x87\x89-\xa6]|\xf0\x90\xac[\x80-\xb5]|\xf0\x90\xad[\x80-\x95\xa0-\xb2]|\xf0\x90\xae[\x80-\x91]|\xf0\x90\xb1[\x80-\x88]|\xf0\x90\xb2[\x80-\xb2]|\xf0\x90\xb3[\x80-\xb2]|\xf0\x90[\x82\x90\x91\x98-\x9b\xb0][\x80-\xbf]|\xf0\x91\x81[\x80-\x86\xa7-\xaf\xbf\xa6]
+UnicodeIDContinue_7 \xf0\x91\x82[\x80-\xba]|\xf0\x91\x83[\x90-\xa8\xb0-\xb9]|\xf0\x91\x84[\x80-\xb4\xb6-\xbf]|\xf0\x91\x85[\x90-\xb3\xb6]|\xf0\x91\x87[\x80-\x84\x8a-\x8c\x90-\x9a\x9c]|\xf0\x91\x88[\x80-\x91\x93-\xb7]|\xf0\x91\x8a[\x80-\x86\x88\x8a-\x8d\x8f-\x9d\x9f-\xa8\xb0-\xbf]|\xf0\x91\x8b[\x80-\xaa\xb0-\xb9]|\xf0\x91\x8c[\x80-\x83\x85-\x8c\x8f\x90\x93-\xa8\xaa-\xb0\xb2\xb3\xb5-\xb9\xbc-\xbf]|\xf0\x91\x8d[\x80-\x84\x87\x88\x8b-\x8d\x90\x97\x9d-\xa3\xa6-\xac\xb0-\xb4]|\xf0\x91\x93[\x80-\x85\x87\x90-\x99]|\xf0\x91\x96[\x80-\xb5\xb8-\xbf]|\xf0\x91\x97[\x80\x98-\x9d]|\xf0\x91\x99[\x80\x84\x90-\x99]|\xf0\x91\x9a[\x80-\xb7]|\xf0\x91\x9b[\x80-\x89]|\xf0\x91\x9c[\x80-\x99\x9d-\xab\xb0-\xb9]|\xf0\x91\xa2[\xa0-\xbf]|\xf0\x91\xa3[\x80-\xa9\xbf]|\xf0\x91\xab[\x80-\xb8]|\xf0\x91[\x80\x86\x92\x98][\x80-\xbf]|\xf0\x92\x8e[\x80-\x99]|\xf0\x92\x91[\x80-\xae]|\xf0\x92\x95[\x80-\x83]|\xf0\x92[\x80-\x8d\x90\x92-\x94][\x80-\xbf]|\xf0\x93\x90[\x80-\xae]|\xf0\x93[\x80-\x8f][\x80-\xbf]|\xf0\x94\x99[\x80-\x86]|\xf0\x94[\x90-\x98][\x80-\xbf]
+UnicodeIDContinue_8 \xf0\x96\xa8[\x80-\xb8]|\xf0\x96\xa9[\x80-\x9e\xa0-\xa9]|\xf0\x96\xab[\x90-\xad\xb0-\xb4]|\xf0\x96\xac[\x80-\xb6]|\xf0\x96\xad[\x80-\x83\x90-\x99\xa3-\xb7\xbd-\xbf]|\xf0\x96\xae[\x80-\x8f]|\xf0\x96\xbd[\x80-\x84\x90-\xbe]|\xf0\x96\xbe[\x8f-\x9f]|\xf0\x96[\xa0-\xa7\xbc][\x80-\xbf]|\xf0\x9b\x80[\x80\x81]|\xf0\x9b\xb2[\x80-\x88\x90-\x99\x9d\x9e]|\xf0\x9b\xb1[\x80-\xaa\xb0-\xbc]|\xf0\x9b[\xb0][\x80-\xbf]|\xf0\x9d\x85[\xa5-\xa9\xad-\xb2\xbb-\xbf]|\xf0\x9d\x86[\x80-\x82\xab\x85-\x8b\xac\xad\xaa]|\xf0\x9d\x89[\x82-\x84]|\xf0\x9d\x91[\x80-\x94\x96-\xbf]|\xf0\x9d\x92[\x80-\x9c\x9e\x9f\xa2\xa5\xa6\xa9-\xac\xae-\xb9\xbb\xbd-\xbf]|\xf0\x9d\x93[\x80-\x83\x85-\xbf]|\xf0\x9d\x94[\x80-\x85\x87-\x8a\x8d-\x94\x96-\x9c\x9e-\xb9\xbb-\xbe]|\xf0\x9d\x95[\x80-\x84\x86\x8a-\x90\x92-\xbf]|\xf0\x9d\x9a[\x80-\xa5\xa8-\xbf]|\xf0\x9d\x9b[\x80\x82-\x9a\x9c-\xba\xbc-\xbf]|\xf0\x9d\x9c[\x80-\x94\x96-\xb4\xb6-\xbf]|\xf0\x9d\x9d[\x80-\x8e\x90-\xae\xb0-\xbf]|\xf0\x9d\x9e[\x80-\x88\x8a-\xa8\xaa-\xbf]|\xf0\x9d\x9f[\x80-\x82\x84-\x8b\x8e-\xbf]
+UnicodeIDContinue {UnicodeIDContinue_0}|{UnicodeIDContinue_1}|{UnicodeIDContinue_2}|{UnicodeIDContinue_3}|{UnicodeIDContinue_4}|{UnicodeIDContinue_5}|{UnicodeIDContinue_6}|{UnicodeIDContinue_7}|{UnicodeIDContinue_8}|\xf0\x9d\xa8[\x80-\xb6\xbb-\xbf]|\xf0\x9d\xa9[\x80-\xac\xb5]|\xf0\x9d\xaa[\xa1-\xa3\x84\xa5-\xaf\xa4\x9b-\x9f]|\xf0\x9d[\x90\x96-\x99][\x80-\xbf]|\xf0\x9e\xa3[\x80-\x84\x90-\x96]|\xf0\x9e\xb8[\x80-\x83\x85-\x9f\xa1\xa2\xa4\xa7\xa9-\xb2\xb4-\xb7\xb9\xbb]|\xf0\x9e\xb9[\x82\x87\x89\x8b\x8d-\x8f\x91\x92\x94\x97\x99\x9b\x9d\x9f\xa1\xa2\xa4\xa7-\xaa\xac-\xb2\xb4-\xb7\xb9-\xbc\xbe]|\xf0\x9e\xba[\x80-\x89\x8b-\x9b\xa1-\xa3\xa5-\xa9\xab-\xbb]|\xf0\x9e[\xa0-\xa2][\x80-\xbf]|\xf0\xaa\x9b[\x80-\x96]|\xf0\xaa[\x80-\x9a\x9c-\xbf][\x80-\xbf]|\xf0\xab\x9c[\x80-\xb4]|\xf0\xab\xa0[\x80-\x9d\xa0-\xbf]|\xf0\xab[\x80-\x9b\x9d-\x9f\xa1-\xbf][\x80-\xbf]|\xf0\xac\xba[\x80-\xa1]|\xf0\xac[\x80-\xb9][\x80-\xbf]|\xf0\xaf\xa8[\x80-\x9d]|\xf0\xaf[\xa0-\xa7][\x80-\xbf]|\xf0[\xa0-\xa9][\x80-\xbf][\x80-\xbf]|\xf3\xa0\x87[\x80-\xaf]|\xf3\xa0[\x84-\x86][\x80-\xbf]
--- /dev/null
+UnicodeIDStart_0 [\x41-\x5a\x61-\x7a]|\xc2[\xaa\xb5\xba]|\xc3[\x80-\x96\x98-\xb6\xb8-\xbf]|\xcb[\x80\x81\xa2\xa3\xa1\x86-\x91\xac\xae\xa0\xa4]|\xcd[\xb0-\xb4\xb6\xb7\xba-\xbd\xbf]|\xce[\x86\x88-\x8a\x8c\x8e-\xa1\xa3-\xbf]|\xcf[\x80-\xb5\xb7-\xbf]|\xd2[\x80\x81\x8a-\xbf]|\xd4[\x80-\xaf\xb1-\xbf]|\xd5[\x80-\x96\x99\xa1-\xbf]|\xd6[\x80-\x87]|\xd7[\x90-\xaa\xb0-\xb2]|\xd8[\xa0-\xbf]|\xd9[\x80-\x8a\xae\xaf\xb1-\xbf]|\xdb[\x80-\x93\x95\xa5\xa6\xae\xaf\xba-\xbc\xbf]|\xdc[\x90\x92-\xaf]|\xdd[\x8d-\xbf]|\xde[\x80-\xa5\xb1]|\xdf[\x8a-\xaa\xb4\xb5\xba]|[\xc4-\xca\xd0\xd1\xd3\xda][\x80-\xbf]|\xe0\xa0[\x80-\x95\x9a\xa4\xa8]|\xe0\xa1[\x80-\x98]|\xe0\xa2[\xa0-\xb4]|\xe0\xa4[\x84-\xb9\xbd]|\xe0\xa5[\x90\x98-\xa1\xb1-\xbf]|\xe0\xa6[\x80\x85-\x8c\x8f\x90\x93-\xa8\xaa-\xb0\xb2\xb6-\xb9\xbd]|\xe0\xa7[\xa0\xa1\x8e\xb0\xb1\x9c\x9d\x9f]|\xe0\xa8[\x85-\x8a\x8f\x90\x93-\xa8\xaa-\xb0\xb2\xb3\xb5\xb6\xb8\xb9]|\xe0\xa9[\xb2-\xb4\x99-\x9c\x9e]|\xe0\xaa[\x85-\x8d\x8f-\x91\x93-\xa8\xaa-\xb0\xb2\xb3\xb5-\xb9\xbd]|\xe0\xab[\x90\xa0\xb9\xa1]
+UnicodeIDStart_1 \xe0\xac[\x85-\x8c\x8f\x90\x93-\xa8\xaa-\xb0\xb2\xb3\xb5-\xb9\xbd]|\xe0\xad[\xa0\xa1\xb1\x9c\x9d\x9f]|\xe0\xae[\x83\x85-\x8a\x8e-\x90\x92-\x95\x99\x9a\x9c\x9e\x9f\xa3\xa4\xa8-\xaa\xae-\xb9]|\xe0\xaf[\x90]|\xe0\xb0[\x85-\x8c\x8e-\x90\x92-\xa8\xaa-\xb9\xbd]|\xe0\xb1[\x98-\x9a\xa0\xa1]|\xe0\xb2[\x85-\x8c\x8e-\x90\x92-\xa8\xaa-\xb3\xb5-\xb9\xbd]|\xe0\xb3[\xa0\xa1\xb2\x9e\xb1]|\xe0\xb4[\x85-\x8c\x8e-\x90\x92-\xba\xbd]|\xe0\xb5[\xa0\xa1\xbf\x8e\xba-\xbe\x9f]|\xe0\xb6[\x85-\x96\x9a-\xb1\xb3-\xbb\xbd]|\xe0\xb7[\x80-\x86]|\xe0\xb8[\x81-\xb0\xb2\xb3]|\xe0\xb9[\x80-\x86]|\xe0\xba[\x81\x82\x84\x87\x88\x8a\x8d\x94-\x97\x99-\x9f\xa1-\xa3\xa5\xa7\xaa\xab\xad-\xb0\xb2\xb3\xbd]|\xe0\xbb[\x80-\x84\x86\x9c-\x9f]|\xe0\xbc[\x80]|\xe0\xbd[\x80-\x87\x89-\xac]|\xe0\xbe[\x88-\x8c]|\xe1\x80[\x80-\xaa\xbf]|\xe1\x81[\x90-\x95\x9a-\x9d\xa1\xa5\xa6\xae-\xb0\xb5-\xbf]|\xe1\x82[\x80\x81\x8e\xa0-\xbf]|\xe1\x83[\x80-\x85\x87\x8d\x90-\xba\xbc-\xbf]|\xe1\x89[\x80-\x88\x8a-\x8d\x90-\x96\x98\x9a-\x9d\xa0-\xbf]|\xe1\x8a[\x80-\x88\x8a-\x8d\x90-\xb0\xb2-\xb5\xb8-\xbe]
+UnicodeIDStart_2 \xe1\x8b[\x80\x82-\x85\x88-\x96\x98-\xbf]|\xe1\x8c[\x80-\x90\x92-\x95\x98-\xbf]|\xe1\x8d[\x80-\x9a]|\xe1\x8e[\x80-\x8f\xa0-\xbf]|\xe1\x8f[\x80-\xb5\xb8-\xbd]|\xe1\x90[\x81-\xbf]|\xe1\x99[\x80-\xac\xaf-\xbf]|\xe1\x9a[\x81-\x9a\xa0-\xbf]|\xe1\x9b[\x80-\xaa\xae-\xb8]|\xe1\x9c[\x80-\x8c\x8e-\x91\xa0-\xb1]|\xe1\x9d[\x80-\x91\xa0-\xac\xae-\xb0]|\xe1\x9e[\x80-\xb3]|\xe1\x9f[\x9c\x97]|\xe1\xa0[\xa0-\xbf]|\xe1\xa1[\x80-\xb7]|\xe1\xa2[\x80-\xa8\xaa\xb0-\xbf]|\xe1\xa3[\x80-\xb5]|\xe1\xa4[\x80-\x9e]|\xe1\xa5[\x90-\xad\xb0-\xb4]|\xe1\xa6[\x80-\xab\xb0-\xbf]|\xe1\xa7[\x80-\x89]|\xe1\xa8[\x80-\x96\xa0-\xbf]|\xe1\xa9[\x80-\x94]|\xe1\xaa[\xa7]|\xe1\xac[\x85-\xb3]|\xe1\xad[\x85-\x8b]|\xe1\xae[\x83-\xa0\xae\xaf\xba-\xbf]|\xe1\xaf[\x80-\xa5]|\xe1\xb0[\x80-\xa3]|\xe1\xb1[\x8d-\x8f\x9a-\xbd]|\xe1\xb3[\xa9-\xac\xae-\xb1\xb5\xb6]|\xe1\xbc[\x80-\x95\x98-\x9d\xa0-\xbf]|\xe1\xbd[\x80-\x85\x88-\x8d\x90-\x97\x99\x9b\x9d\x9f-\xbd]|\xe1\xbe[\x80-\xb4\xb6-\xbc\xbe]|\xe1\xbf[\x82-\x84\x86-\x8c\x90-\x93\x96-\x9b\xa0-\xac\xb2-\xb4\xb6-\xbc]
+UnicodeIDStart_3 \xe1[\x84-\x88\x91-\x98\xb4-\xb6\xb8-\xbb][\x80-\xbf]|\xe2\x81[\xb1\xbf]|\xe2\x82[\x90-\x9c]|\xe2\x84[\x82\x87\x8a-\x93\x95\x98-\x9d\xa4\xa6\xa8\xaa-\xb9\xbc-\xbf]|\xe2\x85[\x85-\x89\x8e\xa0-\xbf]|\xe2\x86[\x80-\x88]|\xe2\xb0[\x80-\xae\xb0-\xbf]|\xe2\xb1[\x80-\x9e\xa0-\xbf]|\xe2\xb3[\x80-\xa4\xab-\xae\xb2\xb3]|\xe2\xb4[\x80-\xa5\xa7\xad\xb0-\xbf]|\xe2\xb5[\x80-\xa7\xaf]|\xe2\xb6[\x80-\x96\xa0-\xa6\xa8-\xae\xb0-\xb6\xb8-\xbe]|\xe2\xb7[\x80-\x86\x88-\x8e\x90-\x96\x98-\x9e]|\xe2[\xb2][\x80-\xbf]|\xe3\x80[\x85-\x87\xa1-\xa9\xb1-\xb5\xb8-\xbc]|\xe3\x81[\x81-\xbf]|\xe3\x82[\x80-\x96\x9b-\x9f\xa1-\xbf]|\xe3\x83[\x80-\xba\xbc-\xbf]|\xe3\x84[\x85-\xad\xb1-\xbf]|\xe3\x86[\x80-\x8e\xa0-\xba]|\xe3\x87[\xb0-\xbf]|\xe3[\x85\x90-\xbf][\x80-\xbf]|\xe4\xb6[\x80-\xb5]|\xe4[\x80-\xb5\xb8-\xbf][\x80-\xbf]|\xe9\xbf[\x80-\x95]|\xe9[\x80-\xbe][\x80-\xbf]|\xea\x92[\x80-\x8c]|\xea\x93[\x90-\xbd]|\xea\x98[\x80-\x8c\x90-\x9f\xaa\xab]|\xea\x99[\x80-\xae\xbf]|\xea\x9a[\x80-\x9d\xa0-\xbf]|\xea\x9b[\x80-\xaf]|\xea\x9c[\x97-\x9f\xa2-\xbf]
+UnicodeIDStart_4 \xea\x9e[\x80-\x88\x8b-\xad\xb0-\xb7]|\xea\x9f[\xb7-\xbf]|\xea\xa0[\x80\x81\x83-\x85\x87-\x8a\x8c-\xa2]|\xea\xa1[\x80-\xb3]|\xea\xa2[\x82-\xb3]|\xea\xa3[\xb2-\xb7\xbb\xbd]|\xea\xa4[\x8a-\xa5\xb0-\xbf]|\xea\xa5[\x80-\x86\xa0-\xbc]|\xea\xa6[\x84-\xb2]|\xea\xa7[\xa0-\xa4\xa6-\xae\x8f\xbb\xba\xaf\xbc-\xbe]|\xea\xa8[\x80-\xa8]|\xea\xa9[\x80-\x82\x84-\x8b\xa0-\xb6\xba\xbe\xbf]|\xea\xaa[\x80-\xaf\xb1\xb5\xb6\xb9-\xbd]|\xea\xab[\x80\xa0\x82\xa3-\xa6\xa1\xa8-\xaa\xa7\xa2\xb2-\xb4\x9b-\x9d]|\xea\xac[\x81-\x86\x89-\x8e\x91-\x96\xa0-\xa6\xa8-\xae\xb0-\xbf]|\xea\xad[\x80-\x9a\x9c-\xa5\xb0-\xbf]|\xea\xaf[\x80-\xa2]|\xea[\x80-\x91\x94-\x97\x9d\xae\xb0-\xbf][\x80-\xbf]|\xed\x9e[\x80-\xa3\xb0-\xbf]|\xed\x9f[\x80-\x86\x8b-\xbb]|\xed[\x80-\x9d][\x80-\xbf]|\xef\xa9[\x80-\xad\xb0-\xbf]|\xef\xab[\x80-\x99]|\xef\xac[\x80-\x86\x93-\x97\x9d\x9f-\xa8\xaa-\xb6\xb8-\xbc\xbe]|\xef\xad[\x80\x81\x83\x84\x86-\xbf]|\xef\xae[\x80-\xb1]|\xef\xaf[\x93-\xbf]|\xef\xb4[\x80-\xbd]|\xef\xb5[\x90-\xbf]|\xef\xb6[\x80-\x8f\x92-\xbf]
+UnicodeIDStart_5 \xef\xb7[\x80-\x87\xb0-\xbb]|\xef\xb9[\xb0-\xb4\xb6-\xbf]|\xef\xbb[\x80-\xbc]|\xef\xbc[\xa1-\xba]|\xef\xbd[\x81-\x9a\xa6-\xbf]|\xef\xbe[\x80-\xbe]|\xef\xbf[\x82-\x87\x8a-\x8f\x92-\x97\x9a-\x9c]|\xef[\xa4-\xa8\xaa\xb0-\xb3\xba][\x80-\xbf]|[\xe5-\xe8\xeb\xec][\x80-\xbf][\x80-\xbf]|\xf0\x90\x80[\x80-\x8b\x8d-\xa6\xa8-\xba\xbc\xbd\xbf]|\xf0\x90\x81[\x80-\x8d\x90-\x9d]|\xf0\x90\x83[\x80-\xba]|\xf0\x90\x85[\x80-\xb4]|\xf0\x90\x8a[\x80-\x9c\xa0-\xbf]|\xf0\x90\x8b[\x80-\x90]|\xf0\x90\x8c[\x80-\x9f\xb0-\xbf]|\xf0\x90\x8d[\x80-\x8a\x90-\xb5]|\xf0\x90\x8e[\x80-\x9d\xa0-\xbf]|\xf0\x90\x8f[\x80-\x83\x88-\x8f\x91-\x95]|\xf0\x90\x92[\x80-\x9d]|\xf0\x90\x94[\x80-\xa7\xb0-\xbf]|\xf0\x90\x95[\x80-\xa3]|\xf0\x90\x9c[\x80-\xb6]|\xf0\x90\x9d[\x80-\x95\xa0-\xa7]|\xf0\x90\xa0[\x80-\x85\x88\x8a-\xb5\xb7\xb8\xbc\xbf]|\xf0\x90\xa1[\x80-\x95\xa0-\xb6]|\xf0\x90\xa2[\x80-\x9e]|\xf0\x90\xa3[\xa0-\xb2\xb4\xb5]|\xf0\x90\xa4[\x80-\x95\xa0-\xb9]|\xf0\x90\xa6[\x80-\xb7\xbe\xbf]|\xf0\x90\xa8[\x80\x90-\x93\x95-\x97\x99-\xb3]
+UnicodeIDStart_6 \xf0\x90\xa9[\xa0-\xbc]|\xf0\x90\xaa[\x80-\x9c]|\xf0\x90\xab[\x80-\x87\x89-\xa4]|\xf0\x90\xac[\x80-\xb5]|\xf0\x90\xad[\x80-\x95\xa0-\xb2]|\xf0\x90\xae[\x80-\x91]|\xf0\x90\xb1[\x80-\x88]|\xf0\x90\xb2[\x80-\xb2]|\xf0\x90\xb3[\x80-\xb2]|\xf0\x90[\x82\x90\x91\x98-\x9b\xb0][\x80-\xbf]|\xf0\x91\x80[\x83-\xb7]|\xf0\x91\x82[\x83-\xaf]|\xf0\x91\x83[\x90-\xa8]|\xf0\x91\x84[\x83-\xa6]|\xf0\x91\x85[\x90-\xb2\xb6]|\xf0\x91\x86[\x83-\xb2]|\xf0\x91\x87[\x81-\x84\x9a\x9c]|\xf0\x91\x88[\x80-\x91\x93-\xab]|\xf0\x91\x8a[\x80-\x86\x88\x8a-\x8d\x8f-\x9d\x9f-\xa8\xb0-\xbf]|\xf0\x91\x8b[\x80-\x9e]|\xf0\x91\x8c[\x85-\x8c\x8f\x90\x93-\xa8\xaa-\xb0\xb2\xb3\xb5-\xb9\xbd]|\xf0\x91\x8d[\xa0\xa1\x90\x9d-\x9f]|\xf0\x91\x92[\x80-\xaf]|\xf0\x91\x93[\x84\x85\x87]|\xf0\x91\x96[\x80-\xae]|\xf0\x91\x97[\x98-\x9b]|\xf0\x91\x98[\x80-\xaf]|\xf0\x91\x99[\x84]|\xf0\x91\x9a[\x80-\xaa]|\xf0\x91\x9c[\x80-\x99]|\xf0\x91\xa2[\xa0-\xbf]|\xf0\x91\xa3[\x80-\x9f\xbf]|\xf0\x91\xab[\x80-\xb8]|\xf0\x92\x8e[\x80-\x99]|\xf0\x92\x91[\x80-\xae]
+UnicodeIDStart_7 \xf0\x92\x95[\x80-\x83]|\xf0\x92[\x80-\x8d\x90\x92-\x94][\x80-\xbf]|\xf0\x93\x90[\x80-\xae]|\xf0\x93[\x80-\x8f][\x80-\xbf]|\xf0\x94\x99[\x80-\x86]|\xf0\x94[\x90-\x98][\x80-\xbf]|\xf0\x96\xa8[\x80-\xb8]|\xf0\x96\xa9[\x80-\x9e]|\xf0\x96\xab[\x90-\xad]|\xf0\x96\xac[\x80-\xaf]|\xf0\x96\xad[\x80-\x83\xa3-\xb7\xbd-\xbf]|\xf0\x96\xae[\x80-\x8f]|\xf0\x96\xbd[\x80-\x84\x90]|\xf0\x96\xbe[\x93-\x9f]|\xf0\x96[\xa0-\xa7\xbc][\x80-\xbf]|\xf0\x9b\x80[\x80\x81]|\xf0\x9b\xb2[\x80-\x88\x90-\x99]|\xf0\x9b\xb1[\x80-\xaa\xb0-\xbc]|\xf0\x9b[\xb0][\x80-\xbf]|\xf0\x9d\x91[\x80-\x94\x96-\xbf]|\xf0\x9d\x92[\x80-\x9c\x9e\x9f\xa2\xa5\xa6\xa9-\xac\xae-\xb9\xbb\xbd-\xbf]|\xf0\x9d\x93[\x80-\x83\x85-\xbf]|\xf0\x9d\x94[\x80-\x85\x87-\x8a\x8d-\x94\x96-\x9c\x9e-\xb9\xbb-\xbe]|\xf0\x9d\x95[\x80-\x84\x86\x8a-\x90\x92-\xbf]|\xf0\x9d\x9a[\x80-\xa5\xa8-\xbf]|\xf0\x9d\x9b[\x80\x82-\x9a\x9c-\xba\xbc-\xbf]|\xf0\x9d\x9c[\x80-\x94\x96-\xb4\xb6-\xbf]|\xf0\x9d\x9d[\x80-\x8e\x90-\xae\xb0-\xbf]|\xf0\x9d\x9e[\x80-\x88\x8a-\xa8\xaa-\xbf]
+UnicodeIDStart {UnicodeIDStart_0}|{UnicodeIDStart_1}|{UnicodeIDStart_2}|{UnicodeIDStart_3}|{UnicodeIDStart_4}|{UnicodeIDStart_5}|{UnicodeIDStart_6}|{UnicodeIDStart_7}|\xf0\x9d\x9f[\x80-\x82\x84-\x8b]|\xf0\x9d[\x90\x96-\x99][\x80-\xbf]|\xf0\x9e\xa3[\x80-\x84]|\xf0\x9e\xb8[\x80-\x83\x85-\x9f\xa1\xa2\xa4\xa7\xa9-\xb2\xb4-\xb7\xb9\xbb]|\xf0\x9e\xb9[\x82\x87\x89\x8b\x8d-\x8f\x91\x92\x94\x97\x99\x9b\x9d\x9f\xa1\xa2\xa4\xa7-\xaa\xac-\xb2\xb4-\xb7\xb9-\xbc\xbe]|\xf0\x9e\xba[\x80-\x89\x8b-\x9b\xa1-\xa3\xa5-\xa9\xab-\xbb]|\xf0\x9e[\xa0-\xa2][\x80-\xbf]|\xf0\xaa\x9b[\x80-\x96]|\xf0\xaa[\x80-\x9a\x9c-\xbf][\x80-\xbf]|\xf0\xab\x9c[\x80-\xb4]|\xf0\xab\xa0[\x80-\x9d\xa0-\xbf]|\xf0\xab[\x80-\x9b\x9d-\x9f\xa1-\xbf][\x80-\xbf]|\xf0\xac\xba[\x80-\xa1]|\xf0\xac[\x80-\xb9][\x80-\xbf]|\xf0\xaf\xa8[\x80-\x9d]|\xf0\xaf[\xa0-\xa7][\x80-\xbf]|\xf0[\xa0-\xa9][\x80-\xbf][\x80-\xbf]
--- /dev/null
+#!/bin/bash
+./apple-make.sh
+grep '^State' build.osx-i386/lex.backup | wc -l
--- /dev/null
+# Cycript - Optimizing JavaScript Compiler/Runtime
+# Copyright (C) 2009-2015 Jay Freeman (saurik)
+
+# GNU Affero General Public License, Version 3 {{{
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+# }}}
+
+.DELETE_ON_ERROR:
+
+unicode := unicode.sh unicode.py
+
+unicode += DerivedCoreProperties.txt
+unicode += PropList.txt
+
+all: UnicodeIDStart.l UnicodeIDContinue.l
+
+%.txt:
+ wget -qc http://www.unicode.org/Public/UCD/latest/ucd/$@
+
+UnicodeIDStart.l: $(unicode)
+ ./unicode.sh UnicodeIDStart ID_Start DerivedCoreProperties.txt Other_ID_Start PropList.txt >$@
+
+UnicodeIDContinue.l: $(unicode)
+ ./unicode.sh UnicodeIDContinue ID_Continue DerivedCoreProperties.txt Other_ID_Continue PropList.txt >$@
+
+.PHONY: all
--- /dev/null
+#!/usr/bin/python
+
+# Cycript - Optimizing JavaScript Compiler/Runtime
+# Copyright (C) 2009-2015 Jay Freeman (saurik)
+
+# GNU Affero General Public License, Version 3 {{{
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+# }}}
+
+import sys
+
+trees = [dict(), dict(), dict(), dict()]
+
+for line in sys.stdin:
+ line = line[0:14]
+ line = line.rstrip(' \n')
+ line = line.split('..')
+ if len(line) == 1:
+ line.append(line[0])
+ line = [int(end, 16) for end in line]
+ for point in range(line[0], line[1] + 1):
+ # http://stackoverflow.com/questions/7105874/
+ point = "\\U%08x" % point
+ point = point.decode('unicode-escape')
+ point = point.encode('utf-8')
+ point = list(point)
+ tree = trees[len(point) - 1]
+ for unit in point:
+ unit = ord(unit)
+ tree = tree.setdefault(unit, dict())
+
+items = []
+
+def build(index, tree, units):
+ if index == 0:
+ keys = tree.keys()
+ else:
+ keys = []
+ for unit, tree in tree.iteritems():
+ if build(index - 1, tree, units + [unit]):
+ keys.append(unit)
+
+ if len(keys) == 0:
+ return False
+ if len(keys) == 0xc0 - 0x80:
+ return True
+
+ item = ''
+ for unit in units:
+ item += '\\x%02x' % unit
+ item += '['
+
+ first = -1
+ last = -1
+
+ assert len(keys) != 0
+ for unit in keys + [-1]:
+ if unit != -1:
+ if first == -1:
+ first = unit
+ last = unit
+ continue
+ if unit == last + 1:
+ last = unit
+ continue
+
+ item += '\\x%02x' % first
+ if first != last:
+ if last != first + 1:
+ item += '-'
+ item += '\\x%02x' % last
+
+ first = unit
+ last = unit
+
+ item += ']'
+
+ for i in range(0, index):
+ item += '[\\x80-\\xbf]'
+
+ items.append(item)
+ return False
+
+for index, tree in enumerate(trees):
+ build(index, tree, [])
+
+name = sys.argv[1]
+parts = []
+part = []
+length = 0
+index = 0
+for item in items:
+ part += [item]
+ length += len(item) + 1
+ if length > 1000:
+ indexed = name + '_' + str(index)
+ index += 1
+ print indexed, '|'.join(part)
+ parts += ['{' + indexed + '}']
+ part = []
+ length = 0
+parts += part
+print name, '|'.join(parts)
--- /dev/null
+#!/bin/bash
+
+# Cycript - Optimizing JavaScript Compiler/Runtime
+# Copyright (C) 2009-2015 Jay Freeman (saurik)
+
+# GNU Affero General Public License, Version 3 {{{
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+# }}}
+
+set -e
+
+name=$1
+shift 1
+
+while [[ $# != 0 ]]; do
+ prop=$1
+ data=$2
+ shift 2
+ grep -F "; ${prop} #" "${data}"
+done | ./unicode.py "${name}"