+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
-* Copyright (C) 2003-2004, International Business Machines
+* Copyright (C) 2003-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: ucnv_ext.h
-* encoding: US-ASCII
+* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
/*
* See icuhtml/design/conversion/conversion_extensions.html
*
- * Conversion extensions serve two purposes:
+ * Conversion extensions serve three purposes:
* 1. They support m:n mappings.
* 2. They support extension-only conversion files that are used together
* with the regular conversion data in base files.
+ * 3. They support mappings with more complicated meta data,
+ * for example "good one-way" mappings (|4).
*
* A base file may contain an extension table (explicitly requested or
* implicitly generated for m:n mappings), but its extension table is not
* return no mapping, but request for <subchar1>;
* }
* if(bit 31 set) {
- * roundtrip;
+ * roundtrip (|0);
+ * } else if(bit 30 set) {
+ * "good one-way" mapping (|4); -- new in ICU4C 51, _MBCSHeader.version 5.4/4.4
* } else {
- * fallback;
+ * normal fallback (|1);
* }
- * // bits 30..29 reserved, 0
+ * // bit 29 reserved, 0
* length=(value>>24)&0x1f; (bits 28..24)
* if(length==1..3) {
* bits 23..0 contain 1..3 bytes, padded with 00s on the left;
UConverterFromUnicodeArgs *pArgs, int32_t srcIndex,
UErrorCode *pErrorCode);
+/*
+ * Add code points and strings to the set according to the extension mappings.
+ * Limitation on the UConverterSetFilter:
+ * The filters currently assume that they are used with 1:1 mappings.
+ * They only apply to single input code points, and then they pass through
+ * only mappings with single-charset-code results.
+ * For example, the Shift-JIS filter only works for 2-byte results and tests
+ * that those 2 bytes are in the JIS X 0208 range of Shift-JIS.
+ */
U_CFUNC void
ucnv_extGetUnicodeSet(const UConverterSharedData *sharedData,
- USetAdder *sa,
+ const USetAdder *sa,
UConverterUnicodeSet which,
+ UConverterSetFilter filter,
UErrorCode *pErrorCode);
/* toUnicode helpers -------------------------------------------------------- */
#define UCNV_EXT_FROM_U_LENGTH_SHIFT 24
#define UCNV_EXT_FROM_U_ROUNDTRIP_FLAG ((uint32_t)1<<31)
-#define UCNV_EXT_FROM_U_RESERVED_MASK 0x60000000
+#define UCNV_EXT_FROM_U_GOOD_ONE_WAY_FLAG 0x40000000
+#define UCNV_EXT_FROM_U_STATUS_MASK 0xc0000000
+#define UCNV_EXT_FROM_U_RESERVED_MASK 0x20000000
#define UCNV_EXT_FROM_U_DATA_MASK 0xffffff
/* special value for "no mapping" to <subchar1> (impossible roundtrip to 0 bytes, value 01) */
#define UCNV_EXT_FROM_U_IS_ROUNDTRIP(value) (((value)&UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)!=0)
#define UCNV_EXT_FROM_U_MASK_ROUNDTRIP(value) ((value)&~UCNV_EXT_FROM_U_ROUNDTRIP_FLAG)
-/* use after masking off the roundtrip flag */
+/* get length; masks away all other bits */
#define UCNV_EXT_FROM_U_GET_LENGTH(value) (int32_t)(((value)>>UCNV_EXT_FROM_U_LENGTH_SHIFT)&UCNV_EXT_MAX_BYTES)
/* get bytes or bytes index */