]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/bocsu.c
2 *******************************************************************************
3 * Copyright (C) 2001-2003, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
8 * tab size: 8 (not used)
11 * Author: Markus W. Scherer
13 * Modification history:
14 * 05/18/2001 weiv Made into separate module
18 #include "unicode/utypes.h"
20 #if !UCONFIG_NO_COLLATION
25 * encode one difference value -0x10ffff..+0x10ffff in 1..3 bytes,
26 * preserving lexical order
29 u_writeDiff(int32_t diff
, uint8_t *p
) {
30 if(diff
>=SLOPE_REACH_NEG_1
) {
31 if(diff
<=SLOPE_REACH_POS_1
) {
32 *p
++=(uint8_t)(SLOPE_MIDDLE
+diff
);
33 } else if(diff
<=SLOPE_REACH_POS_2
) {
34 *p
++=(uint8_t)(SLOPE_START_POS_2
+(diff
/SLOPE_TAIL_COUNT
));
35 *p
++=(uint8_t)(SLOPE_MIN
+diff%SLOPE_TAIL_COUNT
);
36 } else if(diff
<=SLOPE_REACH_POS_3
) {
37 p
[2]=(uint8_t)(SLOPE_MIN
+diff%SLOPE_TAIL_COUNT
);
38 diff
/=SLOPE_TAIL_COUNT
;
39 p
[1]=(uint8_t)(SLOPE_MIN
+diff%SLOPE_TAIL_COUNT
);
40 *p
=(uint8_t)(SLOPE_START_POS_3
+(diff
/SLOPE_TAIL_COUNT
));
43 p
[3]=(uint8_t)(SLOPE_MIN
+diff%SLOPE_TAIL_COUNT
);
44 diff
/=SLOPE_TAIL_COUNT
;
45 p
[2]=(uint8_t)(SLOPE_MIN
+diff%SLOPE_TAIL_COUNT
);
46 diff
/=SLOPE_TAIL_COUNT
;
47 p
[1]=(uint8_t)(SLOPE_MIN
+diff%SLOPE_TAIL_COUNT
);
54 if(diff
>=SLOPE_REACH_NEG_2
) {
55 NEGDIVMOD(diff
, SLOPE_TAIL_COUNT
, m
);
56 *p
++=(uint8_t)(SLOPE_START_NEG_2
+diff
);
57 *p
++=(uint8_t)(SLOPE_MIN
+m
);
58 } else if(diff
>=SLOPE_REACH_NEG_3
) {
59 NEGDIVMOD(diff
, SLOPE_TAIL_COUNT
, m
);
60 p
[2]=(uint8_t)(SLOPE_MIN
+m
);
61 NEGDIVMOD(diff
, SLOPE_TAIL_COUNT
, m
);
62 p
[1]=(uint8_t)(SLOPE_MIN
+m
);
63 *p
=(uint8_t)(SLOPE_START_NEG_3
+diff
);
66 NEGDIVMOD(diff
, SLOPE_TAIL_COUNT
, m
);
67 p
[3]=(uint8_t)(SLOPE_MIN
+m
);
68 NEGDIVMOD(diff
, SLOPE_TAIL_COUNT
, m
);
69 p
[2]=(uint8_t)(SLOPE_MIN
+m
);
70 NEGDIVMOD(diff
, SLOPE_TAIL_COUNT
, m
);
71 p
[1]=(uint8_t)(SLOPE_MIN
+m
);
79 /* How many bytes would writeDiff() write? */
81 lengthOfDiff(int32_t diff
) {
82 if(diff
>=SLOPE_REACH_NEG_1
) {
83 if(diff
<=SLOPE_REACH_POS_1
) {
85 } else if(diff
<=SLOPE_REACH_POS_2
) {
87 } else if(diff
<=SLOPE_REACH_POS_3
) {
93 if(diff
>=SLOPE_REACH_NEG_2
) {
95 } else if(diff
>=SLOPE_REACH_NEG_3
) {
104 * Encode the code points of a string as
105 * a sequence of byte-encoded differences (slope detection),
106 * preserving lexical order.
108 * Optimize the difference-taking for runs of Unicode text within
111 * Most small scripts are allocated within aligned 128-blocks of Unicode
112 * code points. Lexical order is preserved if "prev" is always moved
113 * into the middle of such a block.
115 * Additionally, "prev" is moved from anywhere in the Unihan
116 * area into the middle of that area.
117 * Note that the identical-level run in a sort key is generated from
118 * NFD text - there are never Hangul characters included.
121 u_writeIdenticalLevelRun(const UChar
*s
, int32_t length
, uint8_t *p
) {
130 if(prev
<0x4e00 || prev
>=0xa000) {
131 prev
=(prev
&~0x7f)-SLOPE_REACH_NEG_1
;
134 * Unihan U+4e00..U+9fa5:
135 * double-bytes down from the upper end
137 prev
=0x9fff-SLOPE_REACH_POS_2
;
140 UTF_NEXT_CHAR(s
, i
, length
, c
);
141 p
=u_writeDiff(c
-prev
, p
);
144 return (int32_t)(p
-p0
);
148 u_writeIdenticalLevelRunTwoChars(UChar32 first
, UChar32 second
, uint8_t *p
) {
150 if(first
<0x4e00 || first
>=0xa000) {
151 first
=(first
&~0x7f)-SLOPE_REACH_NEG_1
;
154 * Unihan U+4e00..U+9fa5:
155 * double-bytes down from the upper end
157 first
=0x9fff-SLOPE_REACH_POS_2
;
160 p
=u_writeDiff(second
-first
, p
);
161 return (int32_t)(p
-p0
);
164 /* How many bytes would writeIdenticalLevelRun() write? */
166 u_lengthOfIdenticalLevelRun(const UChar
*s
, int32_t length
) {
168 int32_t i
, runLength
;
174 if(prev
<0x4e00 || prev
>=0xa000) {
175 prev
=(prev
&~0x7f)-SLOPE_REACH_NEG_1
;
178 * Unihan U+4e00..U+9fa5:
179 * double-bytes down from the upper end
181 prev
=0x9fff-SLOPE_REACH_POS_2
;
184 UTF_NEXT_CHAR(s
, i
, length
, c
);
185 runLength
+=lengthOfDiff(c
-prev
);
191 #endif /* #if !UCONFIG_NO_COLLATION */