[apple/icu.git] / icuSources / common / ucptrie_impl.h

// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

// ucptrie_impl.h (modified from utrie2_impl.h)
// created: 2017dec29 Markus W. Scherer

#ifndef __UCPTRIE_IMPL_H__
#define __UCPTRIE_IMPL_H__

#include "unicode/ucptrie.h"
#ifdef UCPTRIE_DEBUG
#include "unicode/umutablecptrie.h"
#endif

// UCPTrie signature values, in platform endianness and opposite endianness.
// The UCPTrie signature ASCII byte values spell "Tri3".
#define UCPTRIE_SIG     0x54726933
#define UCPTRIE_OE_SIG  0x33697254

/**
 * Header data for the binary, memory-mappable representation of a UCPTrie/CodePointTrie.
 * @internal
 */
struct UCPTrieHeader {
    /** "Tri3" in big-endian US-ASCII (0x54726933) */
    uint32_t signature;

    /**
     * Options bit field:
     * Bits 15..12: Data length bits 19..16.
     * Bits 11..8: Data null block offset bits 19..16.
     * Bits 7..6: UCPTrieType
     * Bits 5..3: Reserved (0).
     * Bits 2..0: UCPTrieValueWidth
     */
    uint16_t options;

    /** Total length of the index tables. */
    uint16_t indexLength;

    /** Data length bits 15..0. */
    uint16_t dataLength;

    /** Index-3 null block offset, 0x7fff or 0xffff if none. */
    uint16_t index3NullOffset;

    /** Data null block offset bits 15..0, 0xfffff if none. */
    uint16_t dataNullOffset;

    /**
     * First code point of the single-value range ending with U+10ffff,
     * rounded up and then shifted right by UCPTRIE_SHIFT_2.
     */
    uint16_t shiftedHighStart;
};

/**
 * Constants for use with UCPTrieHeader.options.
 * @internal
 */
enum {
    UCPTRIE_OPTIONS_DATA_LENGTH_MASK = 0xf000,
    UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00,
    UCPTRIE_OPTIONS_RESERVED_MASK = 0x38,
    UCPTRIE_OPTIONS_VALUE_BITS_MASK = 7,
    /**
     * Value for index3NullOffset which indicates that there is no index-3 null block.
     * Bit 15 is unused for this value because this bit is used if the index-3 contains
     * 18-bit indexes.
     */
    UCPTRIE_NO_INDEX3_NULL_OFFSET = 0x7fff,
    UCPTRIE_NO_DATA_NULL_OFFSET = 0xfffff
};

// Internal constants.
enum {
    /** The length of the BMP index table. 1024=0x400 */
    UCPTRIE_BMP_INDEX_LENGTH = 0x10000 >> UCPTRIE_FAST_SHIFT,

    UCPTRIE_SMALL_LIMIT = 0x1000,
    UCPTRIE_SMALL_INDEX_LENGTH = UCPTRIE_SMALL_LIMIT >> UCPTRIE_FAST_SHIFT,

    /** Shift size for getting the index-3 table offset. */
    UCPTRIE_SHIFT_3 = 4,

    /** Shift size for getting the index-2 table offset. */
    UCPTRIE_SHIFT_2 = 5 + UCPTRIE_SHIFT_3,

    /** Shift size for getting the index-1 table offset. */
    UCPTRIE_SHIFT_1 = 5 + UCPTRIE_SHIFT_2,

    /**
     * Difference between two shift sizes,
     * for getting an index-2 offset from an index-3 offset. 5=9-4
     */
    UCPTRIE_SHIFT_2_3 = UCPTRIE_SHIFT_2 - UCPTRIE_SHIFT_3,

    /**
     * Difference between two shift sizes,
     * for getting an index-1 offset from an index-2 offset. 5=14-9
     */
    UCPTRIE_SHIFT_1_2 = UCPTRIE_SHIFT_1 - UCPTRIE_SHIFT_2,

    /**
     * Number of index-1 entries for the BMP. (4)
     * This part of the index-1 table is omitted from the serialized form.
     */
    UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> UCPTRIE_SHIFT_1,

    /** Number of entries in an index-2 block. 32=0x20 */
    UCPTRIE_INDEX_2_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_1_2,

    /** Mask for getting the lower bits for the in-index-2-block offset. */
    UCPTRIE_INDEX_2_MASK = UCPTRIE_INDEX_2_BLOCK_LENGTH - 1,

    /** Number of code points per index-2 table entry. 512=0x200 */
    UCPTRIE_CP_PER_INDEX_2_ENTRY = 1 << UCPTRIE_SHIFT_2,

    /** Number of entries in an index-3 block. 32=0x20 */
    UCPTRIE_INDEX_3_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_2_3,

    /** Mask for getting the lower bits for the in-index-3-block offset. */
    UCPTRIE_INDEX_3_MASK = UCPTRIE_INDEX_3_BLOCK_LENGTH - 1,

    /** Number of entries in a small data block. 16=0x10 */
    UCPTRIE_SMALL_DATA_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_3,

    /** Mask for getting the lower bits for the in-small-data-block offset. */
    UCPTRIE_SMALL_DATA_MASK = UCPTRIE_SMALL_DATA_BLOCK_LENGTH - 1
};

typedef UChar32
UCPTrieGetRange(const void *trie, UChar32 start,
                UCPMapValueFilter *filter, const void *context, uint32_t *pValue);

U_CFUNC UChar32
ucptrie_internalGetRange(UCPTrieGetRange *getRange,
                         const void *trie, UChar32 start,
                         UCPMapRangeOption option, uint32_t surrogateValue,
                         UCPMapValueFilter *filter, const void *context, uint32_t *pValue);

#ifdef UCPTRIE_DEBUG
U_CFUNC void
ucptrie_printLengths(const UCPTrie *trie, const char *which);

U_CFUNC void umutablecptrie_setName(UMutableCPTrie *builder, const char *name);
#endif

/*
 * Format of the binary, memory-mappable representation of a UCPTrie/CodePointTrie.
 * For overview information see http://site.icu-project.org/design/struct/utrie
 *
 * The binary trie data should be 32-bit-aligned.
 * The overall layout is:
 *
 * UCPTrieHeader header; -- 16 bytes, see struct definition above
 * uint16_t index[header.indexLength];
 * uintXY_t data[header.dataLength];
 *
 * The trie data array is an array of uint16_t, uint32_t, or uint8_t,
 * specified via the UCPTrieValueWidth when building the trie.
 * The data array is 32-bit-aligned for uint32_t, otherwise 16-bit-aligned.
 * The overall length of the trie data is a multiple of 4 bytes.
 * (Padding is added at the end of the index array and/or near the end of the data array as needed.)
 *
 * The length of the data array (dataLength) is stored as an integer split across two fields
 * of the header struct (high bits in header.options).
 *
 * The trie type can be "fast" or "small" which determines the index structure,
 * specified via the UCPTrieType when building the trie.
 *
 * The type and valueWidth are stored in the header.options.
 * There are reserved type and valueWidth values, and reserved header.options bits.
 * They could be used in future format extensions.
 * Code reading the trie structure must fail with an error when unknown values or options are set.
 *
 * Values for ASCII character (U+0000..U+007F) can always be found at the start of the data array.
 *
 * Values for code points below a type-specific fast-indexing limit are found via two-stage lookup.
 * For a "fast" trie, the limit is the BMP/supplementary boundary at U+10000.
 * For a "small" trie, the limit is UCPTRIE_SMALL_MAX+1=U+1000.
 *
 * All code points in the range highStart..U+10FFFF map to a single highValue
 * which is stored at the second-to-last position of the data array.
 * (See UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET.)
 * The highStart value is header.shiftedHighStart<<UCPTRIE_SHIFT_2.
 * (UCPTRIE_SHIFT_2=9)
 *
 * Values for code points fast_limit..highStart-1 are found via four-stage lookup.
 * The data block size is smaller for this range than for the fast range.
 * This together with more index stages with small blocks makes this range
 * more easily compactable.
 *
 * There is also a trie error value stored at the last position of the data array.
 * (See UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET.)
 * It is intended to be returned for inputs that are not Unicode code points
 * (outside U+0000..U+10FFFF), or in string processing for ill-formed input
 * (unpaired surrogate in UTF-16, ill-formed UTF-8 subsequence).
 *
 * For a "fast" trie:
 *
 * The index array starts with the BMP index table for BMP code point lookup.
 * Its length is 1024=0x400.
 *
 * The supplementary index-1 table follows the BMP index table.
 * Variable length, for code points up to highStart-1.
 * Maximum length 64=0x40=0x100000>>UCPTRIE_SHIFT_1.
 * (For 0x100000 supplementary code points U+10000..U+10ffff.)
 *
 * After this index-1 table follow the variable-length index-3 and index-2 tables.
 *
 * The supplementary index tables are omitted completely
 * if there is only BMP data (highStart<=U+10000).
 *
 * For a "small" trie:
 *
 * The index array starts with a fast-index table for lookup of code points U+0000..U+0FFF.
 *
 * The "supplementary" index tables are always stored.
 * The index-1 table starts from U+0000, its maximum length is 68=0x44=0x110000>>UCPTRIE_SHIFT_1.
 *
 * For both trie types:
 *
 * The last index-2 block may be a partial block, storing indexes only for code points
 * below highStart.
 *
 * Lookup for ASCII code point c:
 *
 * Linear access from the start of the data array.
 *
 * value = data[c];
 *
 * Lookup for fast-range code point c:
 *
 * Shift the code point right by UCPTRIE_FAST_SHIFT=6 bits,
 * fetch the index array value at that offset,
 * add the lower code point bits, index into the data array.
 *
 * value = data[index[c>>6] + (c&0x3f)];
 *
 * (This works for ASCII as well.)
 *
 * Lookup for small-range code point c below highStart:
 *
 * Split the code point into four bit fields using several sets of shifts & masks
 * to read consecutive values from the index-1, index-2, index-3 and data tables.
 *
 * If all of the data block offsets in an index-3 block fit within 16 bits (up to 0xffff),
 * then the data block offsets are stored directly as uint16_t.
 *
 * Otherwise (this is very unusual but possible), the index-2 entry for the index-3 block
 * has bit 15 (0x8000) set, and each set of 8 index-3 entries is preceded by
 * an additional uint16_t word. Data block offsets are 18 bits wide, with the top 2 bits stored
 * in the additional word.
 *
 * See ucptrie_internalSmallIndex() for details.
 *
 * (In a "small" trie, this works for ASCII and below-fast_limit code points as well.)
 *
 * Compaction:
 *
 * Multiple code point ranges ("blocks") that are aligned on certain boundaries
 * (determined by the shifting/bit fields of code points) and
 * map to the same data values normally share a single subsequence of the data array.
 * Data blocks can also overlap partially.
 * (Depending on the builder code finding duplicate and overlapping blocks.)
 *
 * Iteration over same-value ranges:
 *
 * Range iteration (ucptrie_getRange()) walks the structure from a start code point
 * until some code point is found that maps to a different value;
 * the end of the returned range is just before that.
 *
 * The header.dataNullOffset (split across two header fields, high bits in header.options)
 * is the offset of a widely shared data block filled with one single value.
 * It helps quickly skip over large ranges of data with that value.
 * The builder must ensure that if the start of any data block (fast or small)
 * matches the dataNullOffset, then the whole block must be filled with the null value.
 * Special care must be taken if there is no fast null data block
 * but a small one, which is shorter, and it matches the *start* of some fast data block.
 *
 * Similarly, the header.index3NullOffset is the index-array offset of an index-3 block
 * where all index entries point to the dataNullOffset.
 * If there is no such data or index-3 block, then these offsets are set to
 * values that cannot be reached (data offset out of range/reserved index offset),
 * normally UCPTRIE_NO_DATA_NULL_OFFSET or UCPTRIE_NO_INDEX3_NULL_OFFSET respectively.
 */

#endif
Commit	Line	Data
3d1f044b A	1	// © 2017 and later: Unicode, Inc. and others.
	2	// License & terms of use: http://www.unicode.org/copyright.html
	3
	4	// ucptrie_impl.h (modified from utrie2_impl.h)
	5	// created: 2017dec29 Markus W. Scherer
	6
	7	#ifndef __UCPTRIE_IMPL_H__
	8	#define __UCPTRIE_IMPL_H__
	9
	10	#include "unicode/ucptrie.h"
	11	#ifdef UCPTRIE_DEBUG
	12	#include "unicode/umutablecptrie.h"
	13	#endif
	14
	15	// UCPTrie signature values, in platform endianness and opposite endianness.
	16	// The UCPTrie signature ASCII byte values spell "Tri3".
	17	#define UCPTRIE_SIG 0x54726933
	18	#define UCPTRIE_OE_SIG 0x33697254
	19
	20	/**
	21	* Header data for the binary, memory-mappable representation of a UCPTrie/CodePointTrie.
	22	* @internal
	23	*/
	24	struct UCPTrieHeader {
	25	/** "Tri3" in big-endian US-ASCII (0x54726933) */
	26	uint32_t signature;
	27
	28	/**
	29	* Options bit field:
	30	* Bits 15..12: Data length bits 19..16.
	31	* Bits 11..8: Data null block offset bits 19..16.
	32	* Bits 7..6: UCPTrieType
	33	* Bits 5..3: Reserved (0).
	34	* Bits 2..0: UCPTrieValueWidth
	35	*/
	36	uint16_t options;
	37
	38	/** Total length of the index tables. */
	39	uint16_t indexLength;
	40
	41	/** Data length bits 15..0. */
	42	uint16_t dataLength;
	43
	44	/** Index-3 null block offset, 0x7fff or 0xffff if none. */
	45	uint16_t index3NullOffset;
	46
	47	/** Data null block offset bits 15..0, 0xfffff if none. */
	48	uint16_t dataNullOffset;
	49
	50	/**
	51	* First code point of the single-value range ending with U+10ffff,
	52	* rounded up and then shifted right by UCPTRIE_SHIFT_2.
	53	*/
	54	uint16_t shiftedHighStart;
	55	};
	56
	57	/**
	58	* Constants for use with UCPTrieHeader.options.
	59	* @internal
	60	*/
	61	enum {
	62	UCPTRIE_OPTIONS_DATA_LENGTH_MASK = 0xf000,
	63	UCPTRIE_OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00,
	64	UCPTRIE_OPTIONS_RESERVED_MASK = 0x38,
65	UCPTRIE_OPTIONS_VALUE_BITS_MASK = 7,
66	/**
67	* Value for index3NullOffset which indicates that there is no index-3 null block.
68	* Bit 15 is unused for this value because this bit is used if the index-3 contains
69	* 18-bit indexes.
70	*/
71	UCPTRIE_NO_INDEX3_NULL_OFFSET = 0x7fff,
72	UCPTRIE_NO_DATA_NULL_OFFSET = 0xfffff
73	};
74
75	// Internal constants.
76	enum {
77	/** The length of the BMP index table. 1024=0x400 */
78	UCPTRIE_BMP_INDEX_LENGTH = 0x10000 >> UCPTRIE_FAST_SHIFT,
79
80	UCPTRIE_SMALL_LIMIT = 0x1000,
81	UCPTRIE_SMALL_INDEX_LENGTH = UCPTRIE_SMALL_LIMIT >> UCPTRIE_FAST_SHIFT,
82
83	/** Shift size for getting the index-3 table offset. */
84	UCPTRIE_SHIFT_3 = 4,
85
86	/** Shift size for getting the index-2 table offset. */
87	UCPTRIE_SHIFT_2 = 5 + UCPTRIE_SHIFT_3,
88
89	/** Shift size for getting the index-1 table offset. */
90	UCPTRIE_SHIFT_1 = 5 + UCPTRIE_SHIFT_2,
91
92	/**
93	* Difference between two shift sizes,
94	* for getting an index-2 offset from an index-3 offset. 5=9-4
95	*/
96	UCPTRIE_SHIFT_2_3 = UCPTRIE_SHIFT_2 - UCPTRIE_SHIFT_3,
97
98	/**
99	* Difference between two shift sizes,
100	* for getting an index-1 offset from an index-2 offset. 5=14-9
101	*/
102	UCPTRIE_SHIFT_1_2 = UCPTRIE_SHIFT_1 - UCPTRIE_SHIFT_2,
103
104	/**
105	* Number of index-1 entries for the BMP. (4)
106	* This part of the index-1 table is omitted from the serialized form.
107	*/
108	UCPTRIE_OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> UCPTRIE_SHIFT_1,
109
110	/** Number of entries in an index-2 block. 32=0x20 */
111	UCPTRIE_INDEX_2_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_1_2,
112
113	/** Mask for getting the lower bits for the in-index-2-block offset. */
114	UCPTRIE_INDEX_2_MASK = UCPTRIE_INDEX_2_BLOCK_LENGTH - 1,
115
116	/** Number of code points per index-2 table entry. 512=0x200 */
117	UCPTRIE_CP_PER_INDEX_2_ENTRY = 1 << UCPTRIE_SHIFT_2,
118
119	/** Number of entries in an index-3 block. 32=0x20 */
120	UCPTRIE_INDEX_3_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_2_3,
121
122	/** Mask for getting the lower bits for the in-index-3-block offset. */
123	UCPTRIE_INDEX_3_MASK = UCPTRIE_INDEX_3_BLOCK_LENGTH - 1,
124
125	/** Number of entries in a small data block. 16=0x10 */
126	UCPTRIE_SMALL_DATA_BLOCK_LENGTH = 1 << UCPTRIE_SHIFT_3,
127
128	/** Mask for getting the lower bits for the in-small-data-block offset. */
129	UCPTRIE_SMALL_DATA_MASK = UCPTRIE_SMALL_DATA_BLOCK_LENGTH - 1
130	};
131
132	typedef UChar32
133	UCPTrieGetRange(const void *trie, UChar32 start,
134	UCPMapValueFilter filter, const void context, uint32_t *pValue);
135
136	U_CFUNC UChar32
137	ucptrie_internalGetRange(UCPTrieGetRange *getRange,
138	const void *trie, UChar32 start,
139	UCPMapRangeOption option, uint32_t surrogateValue,
140	UCPMapValueFilter filter, const void context, uint32_t *pValue);
141
142	#ifdef UCPTRIE_DEBUG
143	U_CFUNC void
144	ucptrie_printLengths(const UCPTrie trie, const char which);
145
146	U_CFUNC void umutablecptrie_setName(UMutableCPTrie builder, const char name);
147	#endif
148
149	/*
150	* Format of the binary, memory-mappable representation of a UCPTrie/CodePointTrie.
151	* For overview information see http://site.icu-project.org/design/struct/utrie
152	*
153	* The binary trie data should be 32-bit-aligned.
154	* The overall layout is:
155	*
156	* UCPTrieHeader header; -- 16 bytes, see struct definition above
157	* uint16_t index[header.indexLength];
158	* uintXY_t data[header.dataLength];
159	*
160	* The trie data array is an array of uint16_t, uint32_t, or uint8_t,
161	* specified via the UCPTrieValueWidth when building the trie.
162	* The data array is 32-bit-aligned for uint32_t, otherwise 16-bit-aligned.
163	* The overall length of the trie data is a multiple of 4 bytes.
164	* (Padding is added at the end of the index array and/or near the end of the data array as needed.)
165	*
166	* The length of the data array (dataLength) is stored as an integer split across two fields
167	* of the header struct (high bits in header.options).
168	*
169	* The trie type can be "fast" or "small" which determines the index structure,
170	* specified via the UCPTrieType when building the trie.
171	*
172	* The type and valueWidth are stored in the header.options.
173	* There are reserved type and valueWidth values, and reserved header.options bits.
174	* They could be used in future format extensions.
175	* Code reading the trie structure must fail with an error when unknown values or options are set.
176	*
177	* Values for ASCII character (U+0000..U+007F) can always be found at the start of the data array.
178	*
179	* Values for code points below a type-specific fast-indexing limit are found via two-stage lookup.
180	* For a "fast" trie, the limit is the BMP/supplementary boundary at U+10000.
181	* For a "small" trie, the limit is UCPTRIE_SMALL_MAX+1=U+1000.
182	*
183	* All code points in the range highStart..U+10FFFF map to a single highValue
184	* which is stored at the second-to-last position of the data array.
185	* (See UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET.)
186	* The highStart value is header.shiftedHighStart<<UCPTRIE_SHIFT_2.
187	* (UCPTRIE_SHIFT_2=9)
188	*
189	* Values for code points fast_limit..highStart-1 are found via four-stage lookup.
190	* The data block size is smaller for this range than for the fast range.
191	* This together with more index stages with small blocks makes this range
192	* more easily compactable.
193	*
194	* There is also a trie error value stored at the last position of the data array.
195	* (See UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET.)
196	* It is intended to be returned for inputs that are not Unicode code points
197	* (outside U+0000..U+10FFFF), or in string processing for ill-formed input
198	* (unpaired surrogate in UTF-16, ill-formed UTF-8 subsequence).
199	*
200	* For a "fast" trie:
201	*
202	* The index array starts with the BMP index table for BMP code point lookup.
203	* Its length is 1024=0x400.
204	*
205	* The supplementary index-1 table follows the BMP index table.
206	* Variable length, for code points up to highStart-1.
207	* Maximum length 64=0x40=0x100000>>UCPTRIE_SHIFT_1.
208	* (For 0x100000 supplementary code points U+10000..U+10ffff.)
209	*
210	* After this index-1 table follow the variable-length index-3 and index-2 tables.
211	*
212	* The supplementary index tables are omitted completely
213	* if there is only BMP data (highStart<=U+10000).
214	*
215	* For a "small" trie:
216	*
217	* The index array starts with a fast-index table for lookup of code points U+0000..U+0FFF.
218	*
219	* The "supplementary" index tables are always stored.
220	* The index-1 table starts from U+0000, its maximum length is 68=0x44=0x110000>>UCPTRIE_SHIFT_1.
221	*
222	* For both trie types:
223	*
224	* The last index-2 block may be a partial block, storing indexes only for code points
225	* below highStart.
226	*
227	* Lookup for ASCII code point c:
228	*
229	* Linear access from the start of the data array.
230	*
231	* value = data[c];
232	*
233	* Lookup for fast-range code point c:
234	*
235	* Shift the code point right by UCPTRIE_FAST_SHIFT=6 bits,
236	* fetch the index array value at that offset,
237	* add the lower code point bits, index into the data array.
238	*
239	* value = data[index[c>>6] + (c&0x3f)];
240	*
241	* (This works for ASCII as well.)
242	*
243	* Lookup for small-range code point c below highStart:
244	*
245	* Split the code point into four bit fields using several sets of shifts & masks
246	* to read consecutive values from the index-1, index-2, index-3 and data tables.
247	*
248	* If all of the data block offsets in an index-3 block fit within 16 bits (up to 0xffff),
249	* then the data block offsets are stored directly as uint16_t.
250	*
251	* Otherwise (this is very unusual but possible), the index-2 entry for the index-3 block
252	* has bit 15 (0x8000) set, and each set of 8 index-3 entries is preceded by
253	* an additional uint16_t word. Data block offsets are 18 bits wide, with the top 2 bits stored
254	* in the additional word.
255	*
256	* See ucptrie_internalSmallIndex() for details.
257	*
258	* (In a "small" trie, this works for ASCII and below-fast_limit code points as well.)
259	*
260	* Compaction:
261	*
262	* Multiple code point ranges ("blocks") that are aligned on certain boundaries
263	* (determined by the shifting/bit fields of code points) and
264	* map to the same data values normally share a single subsequence of the data array.
265	* Data blocks can also overlap partially.
266	* (Depending on the builder code finding duplicate and overlapping blocks.)
267	*
268	* Iteration over same-value ranges:
269	*
270	* Range iteration (ucptrie_getRange()) walks the structure from a start code point
271	* until some code point is found that maps to a different value;
272	* the end of the returned range is just before that.
273	*
274	* The header.dataNullOffset (split across two header fields, high bits in header.options)
275	* is the offset of a widely shared data block filled with one single value.
276	* It helps quickly skip over large ranges of data with that value.
277	* The builder must ensure that if the start of any data block (fast or small)
278	* matches the dataNullOffset, then the whole block must be filled with the null value.
279	* Special care must be taken if there is no fast null data block
280	* but a small one, which is shorter, and it matches the start of some fast data block.
281	*
282	* Similarly, the header.index3NullOffset is the index-array offset of an index-3 block
283	* where all index entries point to the dataNullOffset.
284	* If there is no such data or index-3 block, then these offsets are set to
285	* values that cannot be reached (data offset out of range/reserved index offset),
286	* normally UCPTRIE_NO_DATA_NULL_OFFSET or UCPTRIE_NO_INDEX3_NULL_OFFSET respectively.
287	*/
288
289	#endif