]>
Commit | Line | Data |
---|---|---|
39037602 A |
1 | /* |
2 | * Copyright (c) 2016-2016 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
0a7de745 | 5 | * |
39037602 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
0a7de745 | 14 | * |
39037602 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
0a7de745 | 17 | * |
39037602 A |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
0a7de745 | 25 | * |
39037602 A |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ | |
28 | ||
29 | // LZ4_RAW buffer API | |
30 | // EB May 2015 | |
31 | // Mar 2016 Imported from the Compression project, with minor optimisations and | |
32 | // early abort detection (Derek Kumar) | |
33 | ||
34 | #include "lz4.h" | |
5ba3f43e | 35 | #define memcpy __builtin_memcpy |
39037602 | 36 | |
0a7de745 A |
37 | size_t |
38 | lz4raw_decode_buffer(uint8_t * __restrict dst_buffer, size_t dst_size, | |
39 | const uint8_t * __restrict src_buffer, size_t src_size, | |
40 | void * __restrict work __attribute__((unused))) | |
39037602 | 41 | { |
0a7de745 A |
42 | const uint8_t * src = src_buffer; |
43 | uint8_t * dst = dst_buffer; | |
44 | ||
45 | // Go fast if we can, keeping away from the end of buffers | |
39037602 | 46 | #if LZ4_ENABLE_ASSEMBLY_DECODE |
0a7de745 A |
47 | if (dst_size > LZ4_GOFAST_SAFETY_MARGIN && src_size > LZ4_GOFAST_SAFETY_MARGIN) { |
48 | if (lz4_decode_asm(&dst, dst_buffer, dst_buffer + dst_size - LZ4_GOFAST_SAFETY_MARGIN, &src, src_buffer + src_size - LZ4_GOFAST_SAFETY_MARGIN)) { | |
49 | return 0; // FAIL | |
50 | } | |
51 | } | |
39037602 A |
52 | #endif |
53 | //DRKTODO: Can the 'C' "safety" decode be eliminated for 4/16K fixed-sized buffers? | |
39037602 | 54 | |
0a7de745 A |
55 | // Finish safe |
56 | if (lz4_decode(&dst, dst_buffer, dst_buffer + dst_size, &src, src_buffer + src_size)) { | |
57 | return 0; // FAIL | |
58 | } | |
59 | return (size_t)(dst - dst_buffer); // bytes produced | |
39037602 A |
60 | } |
61 | // Debug flags | |
62 | #if LZ4DEBUG | |
63 | #define DEBUG_LZ4_ENCODE_ERRORS (1) | |
64 | #define DEBUG_LZ4_DECODE_ERRORS (1) | |
65 | #endif | |
66 | ||
67 | #if DEBUG_LZ4_ENCODE_ERRORS | |
68 | #endif | |
69 | ||
70 | #if !LZ4_ENABLE_ASSEMBLY_ENCODE | |
71 | ||
72 | #if defined(__x86_64__) || defined(__x86_64h__) | |
73 | # define LZ4_MATCH_SEARCH_INIT_SIZE 32 | |
74 | # define LZ4_MATCH_SEARCH_LOOP_SIZE 32 | |
75 | #else | |
76 | # define LZ4_MATCH_SEARCH_INIT_SIZE 8 | |
77 | # define LZ4_MATCH_SEARCH_LOOP_SIZE 8 | |
78 | #endif | |
79 | ||
80 | // Return hash for 4-byte sequence X | |
0a7de745 A |
81 | static inline uint32_t |
82 | lz4_hash(uint32_t x) | |
83 | { | |
84 | return (x * 2654435761U) >> (32 - LZ4_COMPRESS_HASH_BITS); | |
85 | } | |
39037602 A |
86 | |
87 | // Store 0xfff..fff at *PTR | |
0a7de745 A |
88 | static inline void |
89 | lz4_fill16(uint8_t * ptr) | |
39037602 | 90 | { |
0a7de745 A |
91 | store8(ptr, -1); |
92 | store8(ptr + 8, -1); | |
39037602 A |
93 | } |
94 | ||
95 | // Return number of matching bytes 0..4 at positions A and B. | |
0a7de745 A |
96 | static inline size_t |
97 | lz4_nmatch4(const uint8_t * a, const uint8_t * b) | |
39037602 | 98 | { |
0a7de745 A |
99 | uint32_t x = load4(a) ^ load4(b); |
100 | return (x == 0)?4:(__builtin_ctzl(x) >> 3); | |
39037602 A |
101 | } |
102 | ||
103 | // Return number of matching bytes 0..8 at positions A and B. | |
0a7de745 A |
104 | static inline size_t |
105 | lz4_nmatch8(const uint8_t * a, const uint8_t * b) | |
39037602 | 106 | { |
0a7de745 A |
107 | uint64_t x = load8(a) ^ load8(b); |
108 | return (x == 0)?8:(__builtin_ctzll(x) >> 3); | |
39037602 A |
109 | } |
110 | ||
111 | // Return number of matching bytes 0..16 at positions A and B. | |
0a7de745 A |
112 | static inline size_t |
113 | lz4_nmatch16(const uint8_t * a, const uint8_t * b) | |
39037602 | 114 | { |
0a7de745 A |
115 | size_t n = lz4_nmatch8(a, b); |
116 | return (n == 8)?(8 + lz4_nmatch8(a + 8, b + 8)):n; | |
39037602 A |
117 | } |
118 | ||
119 | // Return number of matching bytes 0..32 at positions A and B. | |
0a7de745 A |
120 | static inline size_t |
121 | lz4_nmatch32(const uint8_t * a, const uint8_t * b) | |
39037602 | 122 | { |
0a7de745 A |
123 | size_t n = lz4_nmatch16(a, b); |
124 | return (n == 16)?(16 + lz4_nmatch16(a + 16, b + 16)):n; | |
39037602 A |
125 | } |
126 | ||
127 | // Return number of matching bytes 0..64 at positions A and B. | |
0a7de745 A |
128 | static inline size_t |
129 | lz4_nmatch64(const uint8_t * a, const uint8_t * b) | |
39037602 | 130 | { |
0a7de745 A |
131 | size_t n = lz4_nmatch32(a, b); |
132 | return (n == 32)?(32 + lz4_nmatch32(a + 32, b + 32)):n; | |
39037602 A |
133 | } |
134 | ||
135 | // Compile-time selection, return number of matching bytes 0..N at positions A and B. | |
0a7de745 A |
136 | static inline size_t |
137 | lz4_nmatch(int N, const uint8_t * a, const uint8_t * b) | |
39037602 | 138 | { |
0a7de745 A |
139 | switch (N) { |
140 | case 4: return lz4_nmatch4(a, b); | |
141 | case 8: return lz4_nmatch8(a, b); | |
142 | case 16: return lz4_nmatch16(a, b); | |
143 | case 32: return lz4_nmatch32(a, b); | |
144 | case 64: return lz4_nmatch64(a, b); | |
145 | } | |
146 | __builtin_trap(); // FAIL | |
39037602 A |
147 | } |
148 | ||
149 | // Store LENGTH in DST using the literal_length/match_length extension scheme: X is the sum of all bytes until we reach a byte < 0xff. | |
150 | // We are allowed to access a constant number of bytes above DST_END. | |
151 | // Return incremented DST pointer on success, and 0 on failure | |
0a7de745 A |
152 | static inline uint8_t * |
153 | lz4_store_length(uint8_t * dst, const uint8_t * const end, uint32_t L) | |
154 | { | |
39037602 | 155 | (void)end; |
0a7de745 A |
156 | while (L >= 17 * 255) { |
157 | lz4_fill16(dst); | |
158 | dst += 16; | |
159 | L -= 16 * 255; | |
160 | } | |
161 | lz4_fill16(dst); | |
162 | //DRKTODO verify these modulos/divisions are optimally handled by clang | |
163 | dst += L / 255; | |
164 | *dst++ = L % 255; | |
165 | return dst; | |
39037602 A |
166 | } |
167 | ||
0a7de745 A |
168 | static inline uint32_t |
169 | clamp(uint32_t x, uint32_t max) | |
170 | __attribute__((overloadable)) | |
171 | { | |
172 | return x > max ? max : x; | |
173 | } | |
39037602 | 174 | |
0a7de745 A |
175 | static inline uint8_t * |
176 | copy_literal(uint8_t *dst, const uint8_t * restrict src, uint32_t L) | |
177 | { | |
178 | uint8_t *end = dst + L; | |
179 | { copy16(dst, src); dst += 16; src += 16; } | |
180 | while (dst < end) { | |
181 | copy32(dst, src); dst += 32; src += 32; | |
182 | } | |
183 | return end; | |
39037602 A |
184 | } |
185 | ||
0a7de745 A |
186 | static uint8_t * |
187 | lz4_emit_match(uint32_t L, uint32_t M, uint32_t D, | |
188 | uint8_t * restrict dst, | |
189 | const uint8_t * const end, | |
190 | const uint8_t * restrict src) | |
191 | { | |
192 | // The LZ4 encoding scheme requires that M is at least 4, because | |
193 | // the actual value stored by the encoding is M - 4. Check this | |
194 | // requirement for debug builds. | |
195 | assert(M >= 4 && "LZ4 encoding requires that M is at least 4"); | |
196 | // Having checked that M >= 4, translate M by four. | |
197 | M -= 4; | |
198 | // Similarly, we must have D < 2**16, because we use only two bytes | |
199 | // to represent the value of D in the encoding. | |
200 | assert(D <= USHRT_MAX && "LZ4 encoding requries that D can be stored in two bytes."); | |
201 | // Construct the command byte by clamping both L and M to 0 ... 15 | |
202 | // and packing them into a single byte, and store it. | |
203 | *dst++ = clamp(L, 15) << 4 | clamp(M, 15); | |
204 | // If L is 15 or greater, we need to encode extra literal length bytes. | |
205 | if (L >= 15) { | |
206 | dst = lz4_store_length(dst, end, L - 15); | |
207 | if (dst == 0 || dst + L >= end) { | |
208 | return NULL; | |
209 | } | |
210 | } | |
211 | // Copy the literal itself from src to dst. | |
212 | dst = copy_literal(dst, src, L); | |
213 | // Store match distance. | |
214 | store2(dst, D); dst += 2; | |
215 | // If M is 15 or greater, we need to encode extra match length bytes. | |
216 | if (M >= 15) { | |
217 | dst = lz4_store_length(dst, end, M - 15); | |
218 | if (dst == 0) { | |
219 | return NULL; | |
220 | } | |
221 | } | |
222 | return dst; | |
39037602 A |
223 | } |
224 | ||
225 | /* #ifndef LZ4_EARLY_ABORT */ | |
226 | /* #define LZ4_EARLY_ABORT (1) */ | |
227 | /* #endif */ | |
228 | ||
229 | #if LZ4_EARLY_ABORT | |
230 | int lz4_do_early_abort = 1; | |
231 | int lz4_early_aborts = 0; | |
232 | #define LZ4_EARLY_ABORT_EVAL (448) | |
233 | #define LZ4_EARLY_ABORT_MIN_COMPRESSION_FACTOR (20) | |
234 | #endif /* LZ4_EARLY_ABORT */ | |
235 | ||
0a7de745 A |
236 | void |
237 | lz4_encode_2gb(uint8_t ** dst_ptr, | |
238 | size_t dst_size, | |
239 | const uint8_t ** src_ptr, | |
240 | const uint8_t * src_begin, | |
241 | size_t src_size, | |
242 | lz4_hash_entry_t hash_table[LZ4_COMPRESS_HASH_ENTRIES], | |
243 | int skip_final_literals) | |
39037602 | 244 | { |
0a7de745 A |
245 | uint8_t *dst = *dst_ptr; // current output stream position |
246 | uint8_t *end = dst + dst_size - LZ4_GOFAST_SAFETY_MARGIN; | |
247 | const uint8_t *src = *src_ptr; // current input stream literal to encode | |
248 | const uint8_t *src_end = src + src_size - LZ4_GOFAST_SAFETY_MARGIN; | |
249 | const uint8_t *match_begin = 0; // first byte of matched sequence | |
250 | const uint8_t *match_end = 0; // first byte after matched sequence | |
39037602 | 251 | #if LZ4_EARLY_ABORT |
0a7de745 A |
252 | uint8_t * const dst_begin = dst; |
253 | uint32_t lz4_do_abort_eval = lz4_do_early_abort; | |
39037602 | 254 | #endif |
0a7de745 A |
255 | |
256 | while (dst < end) { | |
257 | ptrdiff_t match_distance = 0; | |
258 | for (match_begin = src; match_begin < src_end; match_begin += 1) { | |
259 | const uint32_t pos = (uint32_t)(match_begin - src_begin); | |
260 | const uint32_t w0 = load4(match_begin); | |
261 | const uint32_t w1 = load4(match_begin + 1); | |
262 | const uint32_t w2 = load4(match_begin + 2); | |
263 | const uint32_t w3 = load4(match_begin + 3); | |
264 | const int i0 = lz4_hash(w0); | |
265 | const int i1 = lz4_hash(w1); | |
266 | const int i2 = lz4_hash(w2); | |
267 | const int i3 = lz4_hash(w3); | |
268 | const uint8_t *c0 = src_begin + hash_table[i0].offset; | |
269 | const uint8_t *c1 = src_begin + hash_table[i1].offset; | |
270 | const uint8_t *c2 = src_begin + hash_table[i2].offset; | |
271 | const uint8_t *c3 = src_begin + hash_table[i3].offset; | |
272 | const uint32_t m0 = hash_table[i0].word; | |
273 | const uint32_t m1 = hash_table[i1].word; | |
274 | const uint32_t m2 = hash_table[i2].word; | |
275 | const uint32_t m3 = hash_table[i3].word; | |
276 | hash_table[i0].offset = pos; | |
277 | hash_table[i0].word = w0; | |
278 | hash_table[i1].offset = pos + 1; | |
279 | hash_table[i1].word = w1; | |
280 | ||
281 | hash_table[i2].offset = pos + 2; | |
282 | hash_table[i2].word = w2; | |
283 | hash_table[i3].offset = pos + 3; | |
284 | hash_table[i3].word = w3; | |
285 | ||
286 | match_distance = (match_begin - c0); | |
287 | if (w0 == m0 && match_distance < 0x10000 && match_distance > 0) { | |
288 | match_end = match_begin + 4; | |
289 | goto EXPAND_FORWARD; | |
290 | } | |
291 | ||
292 | match_begin++; | |
293 | match_distance = (match_begin - c1); | |
294 | if (w1 == m1 && match_distance < 0x10000 && match_distance > 0) { | |
295 | match_end = match_begin + 4; | |
296 | goto EXPAND_FORWARD; | |
297 | } | |
298 | ||
299 | match_begin++; | |
300 | match_distance = (match_begin - c2); | |
301 | if (w2 == m2 && match_distance < 0x10000 && match_distance > 0) { | |
302 | match_end = match_begin + 4; | |
303 | goto EXPAND_FORWARD; | |
304 | } | |
305 | ||
306 | match_begin++; | |
307 | match_distance = (match_begin - c3); | |
308 | if (w3 == m3 && match_distance < 0x10000 && match_distance > 0) { | |
309 | match_end = match_begin + 4; | |
310 | goto EXPAND_FORWARD; | |
311 | } | |
39037602 A |
312 | |
313 | #if LZ4_EARLY_ABORT | |
0a7de745 A |
314 | //DRKTODO: Evaluate unrolling further. 2xunrolling had some modest benefits |
315 | if (lz4_do_abort_eval && ((pos) >= LZ4_EARLY_ABORT_EVAL)) { | |
316 | ptrdiff_t dstd = dst - dst_begin; | |
317 | ||
318 | if (dstd == 0) { | |
319 | lz4_early_aborts++; | |
320 | return; | |
321 | } | |
322 | ||
323 | /* if (dstd >= pos) { */ | |
324 | /* return; */ | |
325 | /* } */ | |
326 | /* ptrdiff_t cbytes = pos - dstd; */ | |
327 | /* if ((cbytes * LZ4_EARLY_ABORT_MIN_COMPRESSION_FACTOR) > pos) { */ | |
328 | /* return; */ | |
329 | /* } */ | |
330 | lz4_do_abort_eval = 0; | |
331 | } | |
39037602 | 332 | #endif |
0a7de745 A |
333 | } |
334 | ||
335 | if (skip_final_literals) { | |
336 | *src_ptr = src; *dst_ptr = dst; return; | |
337 | } // do not emit the final literal sequence | |
338 | ||
339 | // Emit a trailing literal that covers the remainder of the source buffer, | |
340 | // if we can do so without exceeding the bounds of the destination buffer. | |
341 | size_t src_remaining = src_end + LZ4_GOFAST_SAFETY_MARGIN - src; | |
342 | if (src_remaining < 15) { | |
343 | *dst++ = (uint8_t)(src_remaining << 4); | |
344 | memcpy(dst, src, 16); dst += src_remaining; | |
345 | } else { | |
346 | *dst++ = 0xf0; | |
347 | dst = lz4_store_length(dst, end, (uint32_t)(src_remaining - 15)); | |
348 | if (dst == 0 || dst + src_remaining >= end) { | |
349 | return; | |
350 | } | |
351 | memcpy(dst, src, src_remaining); dst += src_remaining; | |
352 | } | |
353 | *dst_ptr = dst; | |
354 | *src_ptr = src + src_remaining; | |
355 | return; | |
356 | ||
357 | EXPAND_FORWARD: | |
358 | ||
359 | // Expand match forward | |
360 | { | |
361 | const uint8_t * ref_end = match_end - match_distance; | |
362 | while (match_end < src_end) { | |
363 | size_t n = lz4_nmatch(LZ4_MATCH_SEARCH_LOOP_SIZE, ref_end, match_end); | |
364 | if (n < LZ4_MATCH_SEARCH_LOOP_SIZE) { | |
365 | match_end += n; break; | |
366 | } | |
367 | match_end += LZ4_MATCH_SEARCH_LOOP_SIZE; | |
368 | ref_end += LZ4_MATCH_SEARCH_LOOP_SIZE; | |
369 | } | |
370 | } | |
371 | ||
372 | // Expand match backward | |
373 | { | |
374 | // match_begin_min = max(src_begin + match_distance,literal) | |
375 | const uint8_t * match_begin_min = src_begin + match_distance; | |
376 | match_begin_min = (match_begin_min < src)?src:match_begin_min; | |
377 | const uint8_t * ref_begin = match_begin - match_distance; | |
378 | ||
379 | while (match_begin > match_begin_min && ref_begin[-1] == match_begin[-1]) { | |
380 | match_begin -= 1; ref_begin -= 1; | |
381 | } | |
382 | } | |
383 | ||
384 | // Emit match | |
385 | dst = lz4_emit_match((uint32_t)(match_begin - src), (uint32_t)(match_end - match_begin), (uint32_t)match_distance, dst, end, src); | |
386 | if (!dst) { | |
387 | return; | |
388 | } | |
389 | ||
390 | // Update state | |
391 | src = match_end; | |
392 | ||
393 | // Update return values to include the last fully encoded match | |
394 | *dst_ptr = dst; | |
395 | *src_ptr = src; | |
396 | } | |
39037602 A |
397 | } |
398 | ||
399 | #endif | |
400 | ||
0a7de745 A |
401 | size_t |
402 | lz4raw_encode_buffer(uint8_t * __restrict dst_buffer, size_t dst_size, | |
403 | const uint8_t * __restrict src_buffer, size_t src_size, | |
404 | lz4_hash_entry_t hash_table[LZ4_COMPRESS_HASH_ENTRIES]) | |
39037602 | 405 | { |
0a7de745 A |
406 | // Initialize hash table |
407 | const lz4_hash_entry_t HASH_FILL = { .offset = 0x80000000, .word = 0x0 }; | |
408 | ||
409 | const uint8_t * src = src_buffer; | |
410 | uint8_t * dst = dst_buffer; | |
411 | ||
412 | // We need several blocks because our base function is limited to 2GB input | |
413 | const size_t BLOCK_SIZE = 0x7ffff000; | |
414 | while (src_size > 0) { | |
415 | //DRKTODO either implement pattern4 or figure out optimal unroll | |
416 | //DRKTODO: bizarrely, with plain O3 the compiler generates a single | |
417 | //DRKTODO: scalar STP per loop iteration with the stock loop | |
418 | //DRKTODO If hand unrolled, it switches to NEON store pairs | |
419 | // Reset hash table for each block | |
39037602 A |
420 | /* #if __STDC_HOSTED__ */ |
421 | /* memset_pattern8(hash_table, &HASH_FILL, lz4_encode_scratch_size); */ | |
422 | /* #else */ | |
423 | /* for (int i=0;i<LZ4_COMPRESS_HASH_ENTRIES;i++) hash_table[i] = HASH_FILL; */ | |
424 | /* #endif */ | |
425 | ||
0a7de745 A |
426 | for (int i = 0; i < LZ4_COMPRESS_HASH_ENTRIES;) { |
427 | hash_table[i++] = HASH_FILL; | |
428 | hash_table[i++] = HASH_FILL; | |
429 | hash_table[i++] = HASH_FILL; | |
430 | hash_table[i++] = HASH_FILL; | |
431 | } | |
432 | ||
433 | // Bytes to encode in this block | |
434 | const size_t src_to_encode = src_size > BLOCK_SIZE ? BLOCK_SIZE : src_size; | |
435 | ||
436 | // Run the encoder, only the last block emits final literals. Allows concatenation of encoded payloads. | |
437 | // Blocks are encoded independently, so src_begin is set to each block origin instead of src_buffer | |
438 | uint8_t * dst_start = dst; | |
439 | const uint8_t * src_start = src; | |
440 | lz4_encode_2gb(&dst, dst_size, &src, src, src_to_encode, hash_table, src_to_encode < src_size); | |
441 | ||
442 | // Check progress | |
443 | size_t dst_used = dst - dst_start; | |
444 | size_t src_used = src - src_start; // src_used <= src_to_encode | |
445 | if (src_to_encode == src_size && src_used < src_to_encode) { | |
446 | return 0; // FAIL to encode last block | |
447 | } | |
448 | // Note that there is a potential problem here in case of non compressible data requiring more blocks. | |
449 | // We may end up here with src_used very small, or even 0, and will not be able to make progress during | |
450 | // compression. We FAIL unless the length of literals remaining at the end is small enough. | |
451 | if (src_to_encode < src_size && src_to_encode - src_used >= (1 << 16)) { | |
452 | return 0; // FAIL too many literals | |
453 | } | |
454 | // Update counters (SRC and DST already have been updated) | |
455 | src_size -= src_used; | |
456 | dst_size -= dst_used; | |
457 | } | |
458 | ||
459 | return (size_t)(dst - dst_buffer); // bytes produced | |
39037602 A |
460 | } |
461 | ||
39037602 A |
462 | typedef uint32_t lz4_uint128 __attribute__((ext_vector_type(4))) __attribute__((__aligned__(1))); |
463 | ||
0a7de745 A |
464 | int |
465 | lz4_decode(uint8_t ** dst_ptr, | |
466 | uint8_t * dst_begin, | |
467 | uint8_t * dst_end, | |
468 | const uint8_t ** src_ptr, | |
469 | const uint8_t * src_end) | |
39037602 | 470 | { |
0a7de745 A |
471 | uint8_t * dst = *dst_ptr; |
472 | const uint8_t * src = *src_ptr; | |
473 | ||
474 | // Require dst_end > dst. | |
475 | if (dst_end <= dst) { | |
476 | goto OUT_FULL; | |
477 | } | |
478 | ||
479 | while (src < src_end) { | |
480 | // Keep last good position | |
481 | *src_ptr = src; | |
482 | *dst_ptr = dst; | |
483 | ||
484 | uint8_t cmd = *src++; // 1 byte encoding literal+(match-4) length: LLLLMMMM | |
485 | uint32_t literalLength = (cmd >> 4) & 15; // 0..15 | |
486 | uint32_t matchLength = 4 + (cmd & 15); // 4..19 | |
487 | ||
488 | // extra bytes for literalLength | |
489 | if (__improbable(literalLength == 15)) { | |
490 | uint8_t s; | |
491 | do { | |
39037602 | 492 | #if DEBUG_LZ4_DECODE_ERRORS |
0a7de745 A |
493 | if (__improbable(src >= src_end)) { |
494 | printf("Truncated SRC literal length\n"); | |
495 | } | |
39037602 | 496 | #endif |
0a7de745 A |
497 | if (__improbable(src >= src_end)) { |
498 | goto IN_FAIL; // unexpected end of input (1 byte needed) | |
499 | } | |
500 | s = *src++; | |
501 | literalLength += s; | |
502 | } while (__improbable(s == 255)); | |
503 | } | |
504 | ||
505 | // copy literal | |
39037602 | 506 | #if DEBUG_LZ4_DECODE_ERRORS |
0a7de745 A |
507 | if (__improbable(literalLength > (size_t)(src_end - src))) { |
508 | printf("Truncated SRC literal\n"); | |
509 | } | |
39037602 | 510 | #endif |
0a7de745 A |
511 | if (__improbable(literalLength > (size_t)(src_end - src))) { |
512 | goto IN_FAIL; | |
513 | } | |
514 | if (__improbable(literalLength > (size_t)(dst_end - dst))) { | |
515 | // literal will take us past the end of the destination buffer, | |
516 | // so we can only copy part of it. | |
517 | literalLength = (uint32_t)(dst_end - dst); | |
518 | memcpy(dst, src, literalLength); | |
519 | dst += literalLength; | |
520 | goto OUT_FULL; | |
521 | } | |
522 | memcpy(dst, src, literalLength); | |
523 | src += literalLength; | |
524 | dst += literalLength; | |
525 | ||
526 | if (__improbable(src >= src_end)) { | |
527 | goto OUT_FULL; // valid end of stream | |
528 | } | |
39037602 | 529 | #if DEBUG_LZ4_DECODE_ERRORS |
0a7de745 A |
530 | if (__improbable(2 > (size_t)(src_end - src))) { |
531 | printf("Truncated SRC distance\n"); | |
532 | } | |
39037602 | 533 | #endif |
0a7de745 A |
534 | if (__improbable(2 > (size_t)(src_end - src))) { |
535 | goto IN_FAIL; // unexpected end of input (2 bytes needed) | |
536 | } | |
537 | //DRKTODO: this causes an alignment increase warning (legitimate?) | |
538 | //DRKTODO: cast of char * to uint16_t* | |
39037602 A |
539 | #pragma clang diagnostic push |
540 | #pragma clang diagnostic ignored "-Wcast-align" | |
0a7de745 A |
541 | |
542 | // match distance | |
543 | uint64_t matchDistance = *(const uint16_t *)src; // 0x0000 <= matchDistance <= 0xffff | |
39037602 | 544 | #pragma clang diagnostic pop |
0a7de745 | 545 | src += 2; |
39037602 | 546 | #if DEBUG_LZ4_DECODE_ERRORS |
0a7de745 A |
547 | if (matchDistance == 0) { |
548 | printf("Invalid match distance D = 0\n"); | |
549 | } | |
39037602 | 550 | #endif |
0a7de745 A |
551 | if (__improbable(matchDistance == 0)) { |
552 | goto IN_FAIL; // 0x0000 invalid | |
553 | } | |
554 | uint8_t * ref = dst - matchDistance; | |
39037602 | 555 | #if DEBUG_LZ4_DECODE_ERRORS |
0a7de745 A |
556 | if (__improbable(ref < dst_begin)) { |
557 | printf("Invalid reference D=0x%llx dst_begin=%p dst=%p dst_end=%p\n", matchDistance, dst_begin, dst, dst_end); | |
558 | } | |
39037602 | 559 | #endif |
0a7de745 A |
560 | if (__improbable(ref < dst_begin)) { |
561 | goto OUT_FAIL; // out of range | |
562 | } | |
563 | // extra bytes for matchLength | |
564 | if (__improbable(matchLength == 19)) { | |
565 | uint8_t s; | |
566 | do { | |
39037602 | 567 | #if DEBUG_LZ4_DECODE_ERRORS |
0a7de745 A |
568 | if (__improbable(src >= src_end)) { |
569 | printf("Truncated SRC match length\n"); | |
570 | } | |
39037602 | 571 | #endif |
0a7de745 A |
572 | if (__improbable(src >= src_end)) { |
573 | goto IN_FAIL; // unexpected end of input (1 byte needed) | |
574 | } | |
575 | s = *src++; | |
576 | matchLength += s; | |
577 | } while (__improbable(s == 255)); | |
578 | } | |
579 | ||
580 | // copy match (may overlap) | |
581 | if (__improbable(matchLength > (size_t)(dst_end - dst))) { | |
582 | // match will take us past the end of the destination buffer, | |
583 | // so we can only copy part of it. | |
584 | matchLength = (uint32_t)(dst_end - dst); | |
585 | for (uint32_t i = 0; i < matchLength; ++i) { | |
586 | dst[i] = ref[i]; | |
587 | } | |
588 | dst += matchLength; | |
589 | goto OUT_FULL; | |
590 | } | |
591 | for (uint64_t i = 0; i < matchLength; i++) { | |
592 | dst[i] = ref[i]; | |
593 | } | |
594 | dst += matchLength; | |
595 | } | |
596 | ||
597 | // We reached the end of the input buffer after a full instruction | |
39037602 | 598 | OUT_FULL: |
0a7de745 A |
599 | // Or we reached the end of the output buffer |
600 | *dst_ptr = dst; | |
601 | *src_ptr = src; | |
602 | return 0; | |
603 | ||
604 | // Error conditions | |
39037602 A |
605 | OUT_FAIL: |
606 | IN_FAIL: | |
0a7de745 | 607 | return 1; // FAIL |
39037602 | 608 | } |