+ // Test ticket 6789: implement Java-compatible Unicode, UnicodeBig and UnicodeLittle converters
+ // For details about these encodings see convrtrs.txt.
+ // Standard UTF-16
+ { "UTF-16", :bin{ 00610062 }, "ab", :intvector{ 0,2 }, :int{1}, :int{0}, "", "&C", :bin{""} }
+ { "UTF-16", :bin{ feff0061 }, "a", :intvector{ 2 }, :int{1}, :int{0}, "", "&C", :bin{""} }
+ { "UTF-16", :bin{ fffe0061 }, "\u6100", :intvector{ 2 }, :int{1}, :int{0}, "", "&C", :bin{""} }
+ // Java "Unicode" requires a BOM
+ { "+UTF-16,version=1", :bin{ 00610062 }, "\\x00\\x61b", :intvector{ 0,0,0,0,0,0,0,0,2 }, :int{1}, :int{0}, "", "&C", :bin{""} }
+ { "+UTF-16,version=1", :bin{ feff0061 }, "a", :intvector{ 2 }, :int{1}, :int{0}, "", "&C", :bin{""} }
+ { "+UTF-16,version=1", :bin{ fffe0061 }, "\u6100", :intvector{ 2 }, :int{1}, :int{0}, "", "&C", :bin{""} }
+ // Standard UTF-16BE
+ { "UTF-16BE", :bin{ 00610062 }, "ab", :intvector{ 0,2 }, :int{1}, :int{0}, "", "&C", :bin{""} }
+ { "UTF-16BE", :bin{ feff0061 }, "\ufeffa", :intvector{ 0,2 }, :int{1}, :int{0}, "", "&C", :bin{""} }
+ { "UTF-16BE", :bin{ fffe0061 }, "\ufffea", :intvector{ 0,2 }, :int{1}, :int{0}, "", "&C", :bin{""} }
+ // Java "UnicodeBig" requires a BE BOM or no BOM; it consumes the BE BOM
+ { "UTF-16BE,version=1",:bin{ 00610062 }, "ab", :intvector{ 0,2 }, :int{1}, :int{0}, "", "&C", :bin{""} }
+ { "UnicodeBig", :bin{ feff0061 }, "a", :intvector{ 2 }, :int{1}, :int{0}, "", "&C", :bin{""} }
+ { "UnicodeBig", :bin{ fffe0061 }, "\\xFF\\xFEa", :intvector{ 0,0,0,0,0,0,0,0,2 }, :int{1}, :int{0}, "", "&C", :bin{""} }
+ // Standard UTF-16LE
+ { "UTF-16LE", :bin{ 61006200 }, "ab", :intvector{ 0,2 }, :int{1}, :int{0}, "", "&C", :bin{""} }
+ { "UTF-16LE", :bin{ fffe6100 }, "\ufeffa", :intvector{ 0,2 }, :int{1}, :int{0}, "", "&C", :bin{""} }
+ { "UTF-16LE", :bin{ feff6100 }, "\ufffea", :intvector{ 0,2 }, :int{1}, :int{0}, "", "&C", :bin{""} }
+ // Java "UnicodeLittle" requires an LE BOM or no BOM; it consumes the LE BOM
+ { "UTF-16LE,version=1",:bin{ 61006200 }, "ab", :intvector{ 0,2 }, :int{1}, :int{0}, "", "&C", :bin{""} }
+ { "UnicodeLittle", :bin{ fffe6100 }, "a", :intvector{ 2 }, :int{1}, :int{0}, "", "&C", :bin{""} }
+ { "x-UTF-16LE-BOM", :bin{ feff6100 }, "\\xFE\\xFFa", :intvector{ 0,0,0,0,0,0,0,0,2 }, :int{1}, :int{0}, "", "&C", :bin{""} }
+
+ // Test ticket 7704: implement Java-compatible "UTF-16" converter.
+ // Same as standard UTF-16 but fromUnicode always writes big-endian byte stream.
+ { "+UTF-16,version=2", :bin{ 00610062 }, "ab", :intvector{ 0,2 }, :int{1}, :int{0}, "", "&C", :bin{""} }
+ { "+UTF-16,version=2", :bin{ feff0061 }, "a", :intvector{ 2 }, :int{1}, :int{0}, "", "&C", :bin{""} }
+ { "+UTF-16,version=2", :bin{ fffe0061 }, "\u6100", :intvector{ 2 }, :int{1}, :int{0}, "", "&C", :bin{""} }
+
+ // Test ticket 5691: consistent illegal sequences
+ // The following test cases are for illegal character byte sequences.
+ //
+ // Unfortunately, we cannot use the Shift-JIS examples from the ticket
+ // comments because our Shift-JIS table is Windows-compatible and
+ // therefore has no illegal single bytes. Same for GBK.
+ // Instead, we use the stricter GB 18030 also for 2-byte examples.
+ // The byte sequences are generally slightly different from the ticket
+ // comment, simply using assigned characters rather than just
+ // theoretically valid sequences.
+ {
+ "gb18030",
+ :bin{ 618140813c81ff7a },
+ "a\u4e02\\x81<\\x81\\xFFz",
+ :intvector{ 0,1,3,3,3,3,4,5,5,5,5,5,5,5,5,7 },
+ :int{1}, :int{0}, "", "&C", :bin{""}
+ }
+ {
+ "EUC-JP",
+ :bin{ 618fb0a98fb03c8f3cb0a97a },
+ "a\u4e28\\x8F\\xB0<\\x8F<\u9022z",
+ :intvector{ 0,1,4,4,4,4,5,5,5,5,6,7,7,7,7,8,9,11 },
+ :int{1}, :int{0}, "", "&C", :bin{""}
+ }
+ {
+ "gb18030",
+ :bin{ 618130fc318130fc8181303c3e813cfc817afe90a8bc },
+ "a\u05ed\\x810\u9f07\\x810<>\\x81<\u9f07z\ue854\u1e3f",
+ :intvector{ 0,1,5,5,5,5,6,7,9,9,9,9,10,11,12,13,13,13,13,14,15,17,18,20 },
+ :int{1}, :int{0}, "", "&C", :bin{""}
+ }
+ {
+ "UTF-8",
+ :bin{ 61f1808182f180813cf18081fff180ff3cf1ff3c3e7a },
+ "a\U00040042\\xF1\\x80\\x81<\\xF1\\x80\\x81\\xFF\\xF1\\x80\\xFF<\\xF1\\xFF<>z",
+ :intvector{ 0,1,1,5,5,5,5,5,5,5,5,5,5,5,5,8,9,9,9,9,9,9,9,9,9,9,9,9,12,12,12,12,13,13,13,13,13,13,13,13,15,15,15,15,16,17,17,17,17,18,18,18,18,19,20,21 },
+ :int{1}, :int{0}, "", "&C", :bin{""}
+ }
+ {
+ "ISO-2022-JP",
+ :bin{ 1b24424141af4142affe41431b2842 },
+ "\u758f\\xAF\u758e\\xAF\\xFE\u790e",
+ :intvector{ 3,5,5,5,5,6,8,8,8,8,8,8,8,8,10 },
+ :int{1}, :int{0}, "", "&C", :bin{""}
+ }
+ {
+ "ibm-25546",
+ :bin{ 411b242943420e4141af4142affe41430f5a },
+ "AB\uc88b\\xAF\uc88c\\xAF\\xFE\uc88dZ",
+ :intvector{ 0,5,7,9,9,9,9,10,12,12,12,12,12,12,12,12,14,17 },
+ :int{1}, :int{0}, "", "&C", :bin{""}
+ }
+ {
+ "ISO-2022-KR",
+ :bin{ 411b242943420e4141af4142affe41430f5a },
+ "AB\uc88b\\xAF\uc88c\\xAF\\xFE\uc88dZ",
+ :intvector{ 0,5,7,9,9,9,9,10,12,12,12,12,12,12,12,12,14,17 },
+ :int{1}, :int{0}, "", "&C", :bin{""}
+ }
+ {
+ "ISO-2022-CN",
+ :bin{ 411b242941420e4141af4142affe41430f5a },
+ "AB\u4eae\\xAF\u8c05\\xAF\\xFE\u64a9Z",
+ :intvector{ 0,5,7,9,9,9,9,10,12,12,12,12,12,12,12,12,14,17 },
+ :int{1}, :int{0}, "", "&C", :bin{""}
+ }
+ {
+ "ISO-2022-CN-CNS",
+ :bin{ 411b2429470e21702541256f0f },
+ "A\u00a7\u03c4\u02c7",
+ :intvector{ 0,6,8,10 },
+ :int{1}, :int{0}, "", "&C", :bin{""}
+ }
+ {
+ "HZ",
+ :bin{ 417e7b4141af4142affe41437e7d5a },
+ "A\u4eae\\xAF\u8c05\\xAF\\xFE\u64a9Z",
+ :intvector{ 0,3,5,5,5,5,6,8,8,8,8,8,8,8,8,10,14 },
+ :int{1}, :int{0}, "", "&C", :bin{""}
+ }
+ // Test ticket 5691: consistent illegal sequences
+ // The following test cases are for illegal escape/designator/shift sequences.
+ //
+ // ISO-2022-JP and -CN with illegal escape sequences.
+ {
+ "ISO-2022-JP",
+ :bin{ 611b24201b244241411b283f1b28427a },
+ "a\\x1B$ \u758f\\x1B\u2538z",
+ :intvector{ 0,1,1,1,1,2,3,7,9,9,9,9,10,15 },
+ :int{1}, :int{0}, "", "&C", :bin{""}
+ }
+ {
+ "ISO-2022-CN",
+ :bin{ 611b2429201b2429410e41410f7a },
+ "a\\x1B$) \u4eaez",
+ :intvector{ 0,1,1,1,1,2,3,4,10,13 },
+ :int{1}, :int{0}, "", "&C", :bin{""}
+ }
+ // Test ticket 5691: ISO-2022-JP-2 with illegal single-shift SS2 and SS3 sequences.
+ // The first ESC N comes before its designator sequence, the last sequence is ESC+space.
+ {
+ "ISO-2022-JP-2",
+ :bin{ 4e1b4e4e1b2e414e1b4e4e4e1b204e },
+ "N\\x1BNNN\xceN\\x1B N",
+ :intvector{ 0,1,1,1,1,2,3,7,10,11,12,12,12,12,13,14 },
+ :int{1}, :int{0}, "", "&C", :bin{""}
+ }
+ {
+ "ISO-2022-CN-EXT",
+ :bin{ 4e1b4e4e1b242a484e1b4e4e4e4e1b204e },
+ "N\\x1BNNN\u8f0eN\\x1B N",
+ :intvector{ 0,1,1,1,1,2,3,8,11,13,14,14,14,14,15,16 },
+ :int{1}, :int{0}, "", "&C", :bin{""}
+ }
+ /*
+ * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
+ {
+ "ISO-2022-CN-EXT",
+ :bin{ 4f1b4f4f1b242b494f1b4f4f4f4f1b204f },
+ "O\\x1BOOO\u492bO\\x1B O",
+ :intvector{ 0,1,1,1,1,2,3,8,11,13,14,14,14,14,15,16 },
+ :int{1}, :int{0}, "", "&C", :bin{""}
+ }
+ */
+ // Test ticket 5691: HZ with illegal tilde sequences.
+ {
+ "HZ",
+ :bin{ 417e20427e21437e80447e7b41417e207e41427e7f41437e7d5a },
+ "A\\x7E B\\x7E!C\\x7E\\x80D\u4eae\\x7E\\x20\\x7E\u8c05\\x7E\\x7F\u64a9Z",
+ :intvector{ 0,1,1,1,1,2,3,4,4,4,4,5,6,7,7,7,7,7,7,7,7,9, // SBCS
+ 12,14,14,14,14,14,14,14,14,16,16,16,16,17,19,19,19,19,19,19,19,19,21, // DBCS
+ 25 }, // SBCS
+ :int{1}, :int{0}, "", "&C", :bin{""}
+ }
+ // Test ticket 5691: Example from Peter Edberg.
+ {
+ "ISO-2022-JP",
+ :bin{ 1b244230212f7e742630801b284a621b2458631b2842648061 },
+ "\u4e9c\ufffd\u7199\ufffdb\ufffd$Xcd\ufffda",
+ :intvector{ 3,5,7,9,14,15,16,17,18,22,23,24 },
+ :int{1}, :int{0}, "", "?", :bin{""}
+ }
+ // Test bug 6071 (2:1 Unicode:charset SBCS mapping).
+ {
+ "*test1bmp",
+ :bin{ 050008 },
+ "e@uv",
+ :intvector{ 0,1,2,2 },
+ :int{1}, :int{1}, "", "?", :bin{""}
+ }
+ // test that HZ limits its byte values to lead bytes 21..7d and trail bytes 21..7e
+ {
+ "HZ",
+ :bin{ 7e7b21212120217e217f772100007e217e7e7d207e7e807e0a2b },
+ "\u3000\ufffd\u3013\ufffd\u9ccc\ufffd\ufffd\u3013 ~\ufffd+",
+ :intvector{ 2,4,6,8,10,12,14,15,19,20,22,25 },
+ :int{1}, :int{1}, "", "?", :bin{""}
+ }
+ // improve coverage of ISO-2022-JP converter with hardcoded JIS X 0201 and
+ // using the Shift-JIS table for JIS X 0208 (ticket #5797)
+ {
+ "ISO-2022-JP",
+ :bin{ 1b284a7d7e801b2442306c20217f7e21202160217f22202225227f5f211b2842 },
+ "}\u203e\ufffd\u4e00\ufffd\ufffd\ufffd\ufffd\xf7\ufffd\ufffd\u25b2\ufffd\u6f3e",
+ :intvector{ 3,4,5,9,11,12,14,16,17,19,21,23,25,27 },
+ :int{1}, :int{1}, "", "?", :bin{""}
+ }
+ // improve coverage of ISO-2022-JP converter by simulating erroneous input
+ {
+ "ISO-2022-JP-2",
+ :bin{ 0f0ed11b2e41461b244141411b4e411b2e4147451b4ed31b2e4641411b4ed2 },
+ "\u0046\u4eae\u00c1\u6865\u4eae",
+ :intvector{ 6, 10, 14, 18, 26 },
+ :int{1}, :int{0}, "", "0", :bin{""}
+ }
+ // improve coverage of JIS7 converter by simulating incomplete shifted input
+ {
+ "JIS7",
+ :bin{ 0e11 },
+ "",
+ :intvector{},
+ :int{1}, :int{0}, "", "0", :bin{""}
+ }