]>
git.saurik.com Git - redis.git/blob - lzf_c.c
   2  * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de> 
   4  * Redistribution and use in source and binary forms, with or without modifica- 
   5  * tion, are permitted provided that the following conditions are met: 
   7  *   1.  Redistributions of source code must retain the above copyright notice, 
   8  *       this list of conditions and the following disclaimer. 
  10  *   2.  Redistributions in binary form must reproduce the above copyright 
  11  *       notice, this list of conditions and the following disclaimer in the 
  12  *       documentation and/or other materials provided with the distribution. 
  14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 
  15  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- 
  16  * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO 
  17  * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- 
  18  * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
  19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
  20  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
  21  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- 
  22  * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 
  23  * OF THE POSSIBILITY OF SUCH DAMAGE. 
  25  * Alternatively, the contents of this file may be used under the terms of 
  26  * the GNU General Public License ("GPL") version 2 or any later version, 
  27  * in which case the provisions of the GPL are applicable instead of 
  28  * the above. If you wish to allow the use of your version of this file 
  29  * only under the terms of the GPL and not to allow others to use your 
  30  * version of this file under the BSD license, indicate your decision 
  31  * by deleting the provisions above and replace them with the notice 
  32  * and other provisions required by the GPL. If you do not delete the 
  33  * provisions above, a recipient may use your version of this file under 
  34  * either the BSD or the GPL. 
  39 #define HSIZE (1 << (HLOG)) 
  42  * don't play with this unless you benchmark! 
  43  * decompression is not dependent on the hash function 
  44  * the hashing function might seem strange, just believe me 
  48 # define FRST(p) (((p[0]) << 8) | p[1]) 
  49 # define NEXT(v,p) (((v) << 8) | p[2]) 
  51 #  define IDX(h) ((( h             >> (3*8 - HLOG)) - h  ) & (HSIZE - 1)) 
  53 #  define IDX(h) ((( h             >> (3*8 - HLOG)) - h*5) & (HSIZE - 1)) 
  55 #  define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1)) 
  59  * IDX works because it is very similar to a multiplicative hash, e.g. 
  60  * ((h * 57321 >> (3*8 - HLOG)) & (HSIZE - 1)) 
  61  * the latter is also quite fast on newer CPUs, and compresses similarly. 
  63  * the next one is also quite good, albeit slow ;) 
  64  * (int)(cos(h & 0xffffff) * 1e6) 
  68 /* original lzv-like hash function, much worse and thus slower */ 
  69 # define FRST(p) (p[0] << 5) ^ p[1] 
  70 # define NEXT(v,p) ((v) << 5) ^ p[2] 
  71 # define IDX(h) ((h) & (HSIZE - 1)) 
  74 #define        MAX_LIT        (1 <<  5) 
  75 #define        MAX_OFF        (1 << 13) 
  76 #define        MAX_REF        ((1 << 8) + (1 << 3)) 
  79 # define expect(expr,value)         __builtin_expect ((expr),(value)) 
  80 # define inline                     inline 
  82 # define expect(expr,value)         (expr) 
  83 # define inline                     static 
  86 #define expect_false(expr) expect ((expr) != 0, 0) 
  87 #define expect_true(expr)  expect ((expr) != 0, 1) 
  92  * 000LLLLL <L+1>    ; literal 
  93  * LLLooooo oooooooo ; backref L 
  94  * 111ooooo LLLLLLLL oooooooo ; backref L+7 
  99 lzf_compress (const void *const in_data
, unsigned int in_len
, 
 100               void *out_data
, unsigned int out_len
 
 110   const u8 
*ip 
= (const u8 
*)in_data
; 
 111         u8 
*op 
= (u8 
*)out_data
; 
 112   const u8 
*in_end  
= ip 
+ in_len
; 
 113         u8 
*out_end 
= op 
+ out_len
; 
 116   /* off requires a type wide enough to hold a general pointer difference. 
 117    * ISO C doesn't have that (size_t might not be enough and ptrdiff_t only 
 118    * works for differences within a single object). We also assume that no 
 119    * no bit pattern traps. Since the only platform that is both non-POSIX 
 120    * and fails to support both assumptions is windows 64 bit, we make a 
 121    * special workaround for it. 
 123 #if defined (WIN32) && defined (_M_X64) 
 124   unsigned _int64 off
; /* workaround for missing POSIX compliance */ 
 131   if (!in_len 
|| !out_len
) 
 135   memset (htab
, 0, sizeof (htab
)); 
 137   for (hslot 
= htab
; hslot 
< htab 
+ HSIZE
; hslot
++) 
 142   lit 
= 0; op
++; /* start run */ 
 145   while (ip 
< in_end 
- 2) 
 147       hval 
= NEXT (hval
, ip
); 
 148       hslot 
= htab 
+ IDX (hval
); 
 149       ref 
= *hslot
; *hslot 
= ip
; 
 153           && ref 
< ip 
/* the next test will actually take care of this, but this is faster */ 
 155           && (off 
= ip 
- ref 
- 1) < MAX_OFF
 
 157           && ref 
> (u8 
*)in_data
 
 163           && *(u16 
*)ref 
== *(u16 
*)ip
 
 168           /* match found at *ref++ */ 
 169           unsigned int len 
= 2; 
 170           unsigned int maxlen 
= in_end 
- ip 
- len
; 
 171           maxlen 
= maxlen 
> MAX_REF 
? MAX_REF 
: maxlen
; 
 173           op 
[- lit 
- 1] = lit 
- 1; /* stop run */ 
 174           op 
-= !lit
; /* undo run if length is zero */ 
 176           if (expect_false (op 
+ 3 + 1 >= out_end
)) 
 181               if (expect_true (maxlen 
> 16)) 
 183                   len
++; if (ref 
[len
] != ip 
[len
]) break; 
 184                   len
++; if (ref 
[len
] != ip 
[len
]) break; 
 185                   len
++; if (ref 
[len
] != ip 
[len
]) break; 
 186                   len
++; if (ref 
[len
] != ip 
[len
]) break; 
 188                   len
++; if (ref 
[len
] != ip 
[len
]) break; 
 189                   len
++; if (ref 
[len
] != ip 
[len
]) break; 
 190                   len
++; if (ref 
[len
] != ip 
[len
]) break; 
 191                   len
++; if (ref 
[len
] != ip 
[len
]) break; 
 193                   len
++; if (ref 
[len
] != ip 
[len
]) break; 
 194                   len
++; if (ref 
[len
] != ip 
[len
]) break; 
 195                   len
++; if (ref 
[len
] != ip 
[len
]) break; 
 196                   len
++; if (ref 
[len
] != ip 
[len
]) break; 
 198                   len
++; if (ref 
[len
] != ip 
[len
]) break; 
 199                   len
++; if (ref 
[len
] != ip 
[len
]) break; 
 200                   len
++; if (ref 
[len
] != ip 
[len
]) break; 
 201                   len
++; if (ref 
[len
] != ip 
[len
]) break; 
 206               while (len 
< maxlen 
&& ref
[len
] == ip
[len
]); 
 211           len 
-= 2; /* len is now #octets - 1 */ 
 216               *op
++ = (off 
>> 8) + (len 
<< 5); 
 220               *op
++ = (off 
>> 8) + (  7 << 5); 
 225           lit 
= 0; op
++; /* start run */ 
 229           if (expect_false (ip 
>= in_end 
- 2)) 
 232 #if ULTRA_FAST || VERY_FAST 
 234 # if VERY_FAST && !ULTRA_FAST 
 239           hval 
= NEXT (hval
, ip
); 
 240           htab
[IDX (hval
)] = ip
; 
 243 # if VERY_FAST && !ULTRA_FAST 
 244           hval 
= NEXT (hval
, ip
); 
 245           htab
[IDX (hval
)] = ip
; 
 253               hval 
= NEXT (hval
, ip
); 
 254               htab
[IDX (hval
)] = ip
; 
 262           /* one more literal byte we must copy */ 
 263           if (expect_false (op 
>= out_end
)) 
 266           lit
++; *op
++ = *ip
++; 
 268           if (expect_false (lit 
== MAX_LIT
)) 
 270               op 
[- lit 
- 1] = lit 
- 1; /* stop run */ 
 271               lit 
= 0; op
++; /* start run */ 
 276   if (op 
+ 3 > out_end
) /* at most 3 bytes can be missing here */ 
 281       lit
++; *op
++ = *ip
++; 
 283       if (expect_false (lit 
== MAX_LIT
)) 
 285           op 
[- lit 
- 1] = lit 
- 1; /* stop run */ 
 286           lit 
= 0; op
++; /* start run */ 
 290   op 
[- lit 
- 1] = lit 
- 1; /* end run */ 
 291   op 
-= !lit
; /* undo run if length is zero */ 
 293   return op 
- (u8 
*)out_data
;