]>
git.saurik.com Git - apple/icu.git/blob - icuSources/i18n/collation.cpp
   2 ******************************************************************************* 
   3 * Copyright (C) 2010-2014, International Business Machines 
   4 * Corporation and others.  All Rights Reserved. 
   5 ******************************************************************************* 
   8 * created on: 2010oct27 
   9 * created by: Markus W. Scherer 
  12 #include "unicode/utypes.h" 
  14 #if !UCONFIG_NO_COLLATION 
  16 #include "collation.h" 
  21 // Some compilers don't care if constants are defined in the .cpp file. 
  22 // MS Visual C++ does not like it, but gcc requires it. clang does not care. 
  24 const uint8_t Collation::LEVEL_SEPARATOR_BYTE
; 
  25 const uint8_t Collation::MERGE_SEPARATOR_BYTE
; 
  26 const uint32_t Collation::ONLY_TERTIARY_MASK
; 
  27 const uint32_t Collation::CASE_AND_TERTIARY_MASK
; 
  31 Collation::incTwoBytePrimaryByOffset(uint32_t basePrimary
, UBool isCompressible
, int32_t offset
) { 
  32     // Extract the second byte, minus the minimum byte value, 
  33     // plus the offset, modulo the number of usable byte values, plus the minimum. 
  34     // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 
  37         offset 
+= ((int32_t)(basePrimary 
>> 16) & 0xff) - 4; 
  38         primary 
= (uint32_t)((offset 
% 251) + 4) << 16; 
  41         offset 
+= ((int32_t)(basePrimary 
>> 16) & 0xff) - 2; 
  42         primary 
= (uint32_t)((offset 
% 254) + 2) << 16; 
  45     // First byte, assume no further overflow. 
  46     return primary 
| ((basePrimary 
& 0xff000000) + (uint32_t)(offset 
<< 24)); 
  50 Collation::incThreeBytePrimaryByOffset(uint32_t basePrimary
, UBool isCompressible
, int32_t offset
) { 
  51     // Extract the third byte, minus the minimum byte value, 
  52     // plus the offset, modulo the number of usable byte values, plus the minimum. 
  53     offset 
+= ((int32_t)(basePrimary 
>> 8) & 0xff) - 2; 
  54     uint32_t primary 
= (uint32_t)((offset 
% 254) + 2) << 8; 
  56     // Same with the second byte, 
  57     // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 
  59         offset 
+= ((int32_t)(basePrimary 
>> 16) & 0xff) - 4; 
  60         primary 
|= (uint32_t)((offset 
% 251) + 4) << 16; 
  63         offset 
+= ((int32_t)(basePrimary 
>> 16) & 0xff) - 2; 
  64         primary 
|= (uint32_t)((offset 
% 254) + 2) << 16; 
  67     // First byte, assume no further overflow. 
  68     return primary 
| ((basePrimary 
& 0xff000000) + (uint32_t)(offset 
<< 24)); 
  72 Collation::decTwoBytePrimaryByOneStep(uint32_t basePrimary
, UBool isCompressible
, int32_t step
) { 
  73     // Extract the second byte, minus the minimum byte value, 
  74     // minus the step, modulo the number of usable byte values, plus the minimum. 
  75     // Reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 
  76     // Assume no further underflow for the first byte. 
  77     U_ASSERT(0 < step 
&& step 
<= 0x7f); 
  78     int32_t byte2 
= ((int32_t)(basePrimary 
>> 16) & 0xff) - step
; 
  82             basePrimary 
-= 0x1000000; 
  87             basePrimary 
-= 0x1000000; 
  90     return (basePrimary 
& 0xff000000) | ((uint32_t)byte2 
<< 16); 
  94 Collation::decThreeBytePrimaryByOneStep(uint32_t basePrimary
, UBool isCompressible
, int32_t step
) { 
  95     // Extract the third byte, minus the minimum byte value, 
  96     // minus the step, modulo the number of usable byte values, plus the minimum. 
  97     U_ASSERT(0 < step 
&& step 
<= 0x7f); 
  98     int32_t byte3 
= ((int32_t)(basePrimary 
>> 8) & 0xff) - step
; 
 100         return (basePrimary 
& 0xffff0000) | ((uint32_t)byte3 
<< 8); 
 103     // Same with the second byte, 
 104     // but reserve the PRIMARY_COMPRESSION_LOW_BYTE and high byte if necessary. 
 105     int32_t byte2 
= ((int32_t)(basePrimary 
>> 16) & 0xff) - 1; 
 109             basePrimary 
-= 0x1000000; 
 114             basePrimary 
-= 0x1000000; 
 117     // First byte, assume no further underflow. 
 118     return (basePrimary 
& 0xff000000) | ((uint32_t)byte2 
<< 16) | ((uint32_t)byte3 
<< 8); 
 122 Collation::getThreeBytePrimaryForOffsetData(UChar32 c
, int64_t dataCE
) { 
 123     uint32_t p 
= (uint32_t)(dataCE 
>> 32);  // three-byte primary pppppp00 
 124     int32_t lower32 
= (int32_t)dataCE
;  // base code point b & step s: bbbbbbss (bit 7: isCompressible) 
 125     int32_t offset 
= (c 
- (lower32 
>> 8)) * (lower32 
& 0x7f);  // delta * increment 
 126     UBool isCompressible 
= (lower32 
& 0x80) != 0; 
 127     return Collation::incThreeBytePrimaryByOffset(p
, isCompressible
, offset
); 
 131 Collation::unassignedPrimaryFromCodePoint(UChar32 c
) { 
 132     // Create a gap before U+0000. Use c=-1 for [first unassigned]. 
 134     // Fourth byte: 18 values, every 14th byte value (gap of 13). 
 135     uint32_t primary 
= 2 + (c 
% 18) * 14; 
 137     // Third byte: 254 values. 
 138     primary 
|= (2 + (c 
% 254)) << 8; 
 140     // Second byte: 251 values 04..FE excluding the primary compression bytes. 
 141     primary 
|= (4 + (c 
% 251)) << 16; 
 142     // One lead byte covers all code points (c < 0x1182B4 = 1*251*254*18). 
 143     return primary 
| (UNASSIGNED_IMPLICIT_BYTE 
<< 24); 
 148 #endif  // !UCONFIG_NO_COLLATION