]>
Commit | Line | Data |
---|---|---|
39037602 A |
1 | /* |
2 | * Copyright (c) 2016-2016 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | // LZ4_RAW buffer API | |
30 | // EB May 2015 | |
31 | // Mar 2016 Imported from the Compression project, with minor optimisations and | |
32 | // early abort detection (Derek Kumar) | |
33 | ||
34 | #include "lz4.h" | |
5ba3f43e | 35 | #define memcpy __builtin_memcpy |
39037602 A |
36 | |
37 | size_t lz4raw_decode_buffer(uint8_t * __restrict dst_buffer, size_t dst_size, | |
38 | const uint8_t * __restrict src_buffer, size_t src_size, | |
39 | void * __restrict work __attribute__((unused))) | |
40 | { | |
41 | const uint8_t * src = src_buffer; | |
42 | uint8_t * dst = dst_buffer; | |
43 | ||
44 | // Go fast if we can, keeping away from the end of buffers | |
45 | #if LZ4_ENABLE_ASSEMBLY_DECODE | |
46 | if (dst_size > LZ4_GOFAST_SAFETY_MARGIN && src_size > LZ4_GOFAST_SAFETY_MARGIN) | |
47 | { | |
48 | if (lz4_decode_asm(&dst, dst_buffer, dst_buffer + dst_size - LZ4_GOFAST_SAFETY_MARGIN, &src, src_buffer + src_size - LZ4_GOFAST_SAFETY_MARGIN)) | |
49 | return 0; // FAIL | |
50 | } | |
51 | #endif | |
52 | //DRKTODO: Can the 'C' "safety" decode be eliminated for 4/16K fixed-sized buffers? | |
53 | ||
54 | // Finish safe | |
55 | if (lz4_decode(&dst, dst_buffer, dst_buffer + dst_size, &src, src_buffer + src_size)) | |
56 | return 0; // FAIL | |
57 | ||
58 | return (size_t)(dst - dst_buffer); // bytes produced | |
59 | } | |
60 | // Debug flags | |
61 | #if LZ4DEBUG | |
62 | #define DEBUG_LZ4_ENCODE_ERRORS (1) | |
63 | #define DEBUG_LZ4_DECODE_ERRORS (1) | |
64 | #endif | |
65 | ||
66 | #if DEBUG_LZ4_ENCODE_ERRORS | |
67 | #endif | |
68 | ||
69 | #if !LZ4_ENABLE_ASSEMBLY_ENCODE | |
70 | ||
71 | #if defined(__x86_64__) || defined(__x86_64h__) | |
72 | # define LZ4_MATCH_SEARCH_INIT_SIZE 32 | |
73 | # define LZ4_MATCH_SEARCH_LOOP_SIZE 32 | |
74 | #else | |
75 | # define LZ4_MATCH_SEARCH_INIT_SIZE 8 | |
76 | # define LZ4_MATCH_SEARCH_LOOP_SIZE 8 | |
77 | #endif | |
78 | ||
79 | // Return hash for 4-byte sequence X | |
80 | static inline uint32_t lz4_hash(uint32_t x) { return (x * 2654435761U) >> (32 - LZ4_COMPRESS_HASH_BITS); } | |
81 | ||
82 | // Store 0xfff..fff at *PTR | |
83 | static inline void lz4_fill16(uint8_t * ptr) | |
84 | { | |
85 | store8(ptr,-1); | |
86 | store8(ptr+8,-1); | |
87 | } | |
88 | ||
89 | // Return number of matching bytes 0..4 at positions A and B. | |
90 | static inline size_t lz4_nmatch4(const uint8_t * a,const uint8_t * b) | |
91 | { | |
92 | uint32_t x = load4(a) ^ load4(b); | |
93 | return (x == 0)?4:(__builtin_ctzl(x) >> 3); | |
94 | } | |
95 | ||
96 | // Return number of matching bytes 0..8 at positions A and B. | |
97 | static inline size_t lz4_nmatch8(const uint8_t * a,const uint8_t * b) | |
98 | { | |
99 | uint64_t x = load8(a) ^ load8(b); | |
100 | return (x == 0)?8:(__builtin_ctzll(x) >> 3); | |
101 | } | |
102 | ||
103 | // Return number of matching bytes 0..16 at positions A and B. | |
104 | static inline size_t lz4_nmatch16(const uint8_t * a,const uint8_t * b) | |
105 | { | |
106 | size_t n = lz4_nmatch8(a,b); | |
107 | return (n == 8)?(8 + lz4_nmatch8(a+8,b+8)):n; | |
108 | } | |
109 | ||
110 | // Return number of matching bytes 0..32 at positions A and B. | |
111 | static inline size_t lz4_nmatch32(const uint8_t * a,const uint8_t * b) | |
112 | { | |
113 | size_t n = lz4_nmatch16(a,b); | |
114 | return (n == 16)?(16 + lz4_nmatch16(a+16,b+16)):n; | |
115 | } | |
116 | ||
117 | // Return number of matching bytes 0..64 at positions A and B. | |
118 | static inline size_t lz4_nmatch64(const uint8_t * a,const uint8_t * b) | |
119 | { | |
120 | size_t n = lz4_nmatch32(a,b); | |
121 | return (n == 32)?(32 + lz4_nmatch32(a+32,b+32)):n; | |
122 | } | |
123 | ||
124 | // Compile-time selection, return number of matching bytes 0..N at positions A and B. | |
125 | static inline size_t lz4_nmatch(int N, const uint8_t * a, const uint8_t * b) | |
126 | { | |
127 | switch (N) { | |
128 | case 4: return lz4_nmatch4(a,b); | |
129 | case 8: return lz4_nmatch8(a,b); | |
130 | case 16: return lz4_nmatch16(a,b); | |
131 | case 32: return lz4_nmatch32(a,b); | |
132 | case 64: return lz4_nmatch64(a,b); | |
133 | } | |
134 | __builtin_trap(); // FAIL | |
135 | } | |
136 | ||
137 | // Store LENGTH in DST using the literal_length/match_length extension scheme: X is the sum of all bytes until we reach a byte < 0xff. | |
138 | // We are allowed to access a constant number of bytes above DST_END. | |
139 | // Return incremented DST pointer on success, and 0 on failure | |
140 | static inline uint8_t *lz4_store_length(uint8_t * dst, const uint8_t * const end, uint32_t L) { | |
141 | (void)end; | |
142 | while (L >= 17*255) { | |
143 | lz4_fill16(dst); | |
144 | dst += 16; | |
145 | L -= 16*255; | |
146 | } | |
147 | lz4_fill16(dst); | |
148 | //DRKTODO verify these modulos/divisions are optimally handled by clang | |
149 | dst += L/255; | |
150 | *dst++ = L%255; | |
151 | return dst; | |
152 | } | |
153 | ||
154 | static inline uint32_t clamp(uint32_t x, uint32_t max) __attribute__((overloadable)) { return x > max ? max : x; } | |
155 | ||
156 | static inline uint8_t *copy_literal(uint8_t *dst, const uint8_t * restrict src, uint32_t L) { | |
157 | uint8_t *end = dst + L; | |
158 | { copy16(dst, src); dst += 16; src += 16; } | |
159 | while (dst < end) { copy32(dst, src); dst += 32; src += 32; } | |
160 | return end; | |
161 | } | |
162 | ||
163 | static uint8_t *lz4_emit_match(uint32_t L, uint32_t M, uint32_t D, | |
164 | uint8_t * restrict dst, | |
165 | const uint8_t * const end, | |
166 | const uint8_t * restrict src) { | |
167 | // The LZ4 encoding scheme requires that M is at least 4, because | |
168 | // the actual value stored by the encoding is M - 4. Check this | |
169 | // requirement for debug builds. | |
170 | assert(M >= 4 && "LZ4 encoding requires that M is at least 4"); | |
171 | // Having checked that M >= 4, translate M by four. | |
172 | M -= 4; | |
173 | // Similarly, we must have D < 2**16, because we use only two bytes | |
174 | // to represent the value of D in the encoding. | |
175 | assert(D <= USHRT_MAX && "LZ4 encoding requries that D can be stored in two bytes."); | |
176 | // Construct the command byte by clamping both L and M to 0 ... 15 | |
177 | // and packing them into a single byte, and store it. | |
178 | *dst++ = clamp(L, 15) << 4 | clamp(M, 15); | |
179 | // If L is 15 or greater, we need to encode extra literal length bytes. | |
180 | if (L >= 15) { | |
181 | dst = lz4_store_length(dst, end, L - 15); | |
182 | if (dst == 0 || dst + L >= end) return NULL; | |
183 | } | |
184 | // Copy the literal itself from src to dst. | |
185 | dst = copy_literal(dst, src, L); | |
186 | // Store match distance. | |
187 | store2(dst, D); dst += 2; | |
188 | // If M is 15 or greater, we need to encode extra match length bytes. | |
189 | if (M >= 15) { | |
190 | dst = lz4_store_length(dst, end, M - 15); | |
191 | if (dst == 0) return NULL; | |
192 | } | |
193 | return dst; | |
194 | } | |
195 | ||
196 | /* #ifndef LZ4_EARLY_ABORT */ | |
197 | /* #define LZ4_EARLY_ABORT (1) */ | |
198 | /* #endif */ | |
199 | ||
200 | #if LZ4_EARLY_ABORT | |
201 | int lz4_do_early_abort = 1; | |
202 | int lz4_early_aborts = 0; | |
203 | #define LZ4_EARLY_ABORT_EVAL (448) | |
204 | #define LZ4_EARLY_ABORT_MIN_COMPRESSION_FACTOR (20) | |
205 | #endif /* LZ4_EARLY_ABORT */ | |
206 | ||
207 | void lz4_encode_2gb(uint8_t ** dst_ptr, | |
208 | size_t dst_size, | |
209 | const uint8_t ** src_ptr, | |
210 | const uint8_t * src_begin, | |
211 | size_t src_size, | |
212 | lz4_hash_entry_t hash_table[LZ4_COMPRESS_HASH_ENTRIES], | |
213 | int skip_final_literals) | |
214 | { | |
215 | uint8_t *dst = *dst_ptr; // current output stream position | |
216 | uint8_t *end = dst + dst_size - LZ4_GOFAST_SAFETY_MARGIN; | |
217 | const uint8_t *src = *src_ptr; // current input stream literal to encode | |
218 | const uint8_t *src_end = src + src_size - LZ4_GOFAST_SAFETY_MARGIN; | |
219 | const uint8_t *match_begin = 0; // first byte of matched sequence | |
220 | const uint8_t *match_end = 0; // first byte after matched sequence | |
221 | #if LZ4_EARLY_ABORT | |
222 | uint8_t * const dst_begin = dst; | |
223 | uint32_t lz4_do_abort_eval = lz4_do_early_abort; | |
224 | #endif | |
225 | ||
226 | while (dst < end) | |
227 | { | |
228 | ptrdiff_t match_distance = 0; | |
229 | for (match_begin = src; match_begin < src_end; match_begin += 1) { | |
230 | const uint32_t pos = (uint32_t)(match_begin - src_begin); | |
231 | const uint32_t w0 = load4(match_begin); | |
232 | const uint32_t w1 = load4(match_begin + 1); | |
233 | const uint32_t w2 = load4(match_begin + 2); | |
234 | const uint32_t w3 = load4(match_begin + 3); | |
235 | const int i0 = lz4_hash(w0); | |
236 | const int i1 = lz4_hash(w1); | |
237 | const int i2 = lz4_hash(w2); | |
238 | const int i3 = lz4_hash(w3); | |
239 | const uint8_t *c0 = src_begin + hash_table[i0].offset; | |
240 | const uint8_t *c1 = src_begin + hash_table[i1].offset; | |
241 | const uint8_t *c2 = src_begin + hash_table[i2].offset; | |
242 | const uint8_t *c3 = src_begin + hash_table[i3].offset; | |
243 | const uint32_t m0 = hash_table[i0].word; | |
244 | const uint32_t m1 = hash_table[i1].word; | |
245 | const uint32_t m2 = hash_table[i2].word; | |
246 | const uint32_t m3 = hash_table[i3].word; | |
247 | hash_table[i0].offset = pos; | |
248 | hash_table[i0].word = w0; | |
249 | hash_table[i1].offset = pos + 1; | |
250 | hash_table[i1].word = w1; | |
251 | ||
252 | hash_table[i2].offset = pos + 2; | |
253 | hash_table[i2].word = w2; | |
254 | hash_table[i3].offset = pos + 3; | |
255 | hash_table[i3].word = w3; | |
256 | ||
257 | match_distance = (match_begin - c0); | |
258 | if (w0 == m0 && match_distance < 0x10000 && match_distance > 0) { | |
259 | match_end = match_begin + 4; | |
260 | goto EXPAND_FORWARD; | |
261 | } | |
262 | ||
263 | match_begin++; | |
264 | match_distance = (match_begin - c1); | |
265 | if (w1 == m1 && match_distance < 0x10000 && match_distance > 0) { | |
266 | match_end = match_begin + 4; | |
267 | goto EXPAND_FORWARD; | |
268 | } | |
269 | ||
270 | match_begin++; | |
271 | match_distance = (match_begin - c2); | |
272 | if (w2 == m2 && match_distance < 0x10000 && match_distance > 0) { | |
273 | match_end = match_begin + 4; | |
274 | goto EXPAND_FORWARD; | |
275 | } | |
276 | ||
277 | match_begin++; | |
278 | match_distance = (match_begin - c3); | |
279 | if (w3 == m3 && match_distance < 0x10000 && match_distance > 0) { | |
280 | match_end = match_begin + 4; | |
281 | goto EXPAND_FORWARD; | |
282 | } | |
283 | ||
284 | #if LZ4_EARLY_ABORT | |
285 | //DRKTODO: Evaluate unrolling further. 2xunrolling had some modest benefits | |
286 | if (lz4_do_abort_eval && ((pos) >= LZ4_EARLY_ABORT_EVAL)) { | |
287 | ptrdiff_t dstd = dst - dst_begin; | |
288 | ||
289 | if (dstd == 0) { | |
290 | lz4_early_aborts++; | |
291 | return; | |
292 | } | |
293 | ||
294 | /* if (dstd >= pos) { */ | |
295 | /* return; */ | |
296 | /* } */ | |
297 | /* ptrdiff_t cbytes = pos - dstd; */ | |
298 | /* if ((cbytes * LZ4_EARLY_ABORT_MIN_COMPRESSION_FACTOR) > pos) { */ | |
299 | /* return; */ | |
300 | /* } */ | |
301 | lz4_do_abort_eval = 0; | |
302 | } | |
303 | #endif | |
304 | } | |
305 | ||
306 | if (skip_final_literals) { *src_ptr = src; *dst_ptr = dst; return; } // do not emit the final literal sequence | |
307 | ||
308 | // Emit a trailing literal that covers the remainder of the source buffer, | |
309 | // if we can do so without exceeding the bounds of the destination buffer. | |
310 | size_t src_remaining = src_end + LZ4_GOFAST_SAFETY_MARGIN - src; | |
311 | if (src_remaining < 15) { | |
312 | *dst++ = (uint8_t)(src_remaining << 4); | |
313 | memcpy(dst, src, 16); dst += src_remaining; | |
314 | } else { | |
315 | *dst++ = 0xf0; | |
316 | dst = lz4_store_length(dst, end, (uint32_t)(src_remaining - 15)); | |
317 | if (dst == 0 || dst + src_remaining >= end) return; | |
318 | memcpy(dst, src, src_remaining); dst += src_remaining; | |
319 | } | |
320 | *dst_ptr = dst; | |
321 | *src_ptr = src + src_remaining; | |
322 | return; | |
323 | ||
324 | EXPAND_FORWARD: | |
325 | ||
326 | // Expand match forward | |
327 | { | |
328 | const uint8_t * ref_end = match_end - match_distance; | |
329 | while (match_end < src_end) | |
330 | { | |
331 | size_t n = lz4_nmatch(LZ4_MATCH_SEARCH_LOOP_SIZE, ref_end, match_end); | |
332 | if (n < LZ4_MATCH_SEARCH_LOOP_SIZE) { match_end += n; break; } | |
333 | match_end += LZ4_MATCH_SEARCH_LOOP_SIZE; | |
334 | ref_end += LZ4_MATCH_SEARCH_LOOP_SIZE; | |
335 | } | |
336 | } | |
337 | ||
338 | // Expand match backward | |
339 | { | |
340 | // match_begin_min = max(src_begin + match_distance,literal) | |
341 | const uint8_t * match_begin_min = src_begin + match_distance; | |
342 | match_begin_min = (match_begin_min < src)?src:match_begin_min; | |
343 | const uint8_t * ref_begin = match_begin - match_distance; | |
344 | ||
345 | while (match_begin > match_begin_min && ref_begin[-1] == match_begin[-1] ) { match_begin -= 1; ref_begin -= 1; } | |
346 | } | |
347 | ||
348 | // Emit match | |
349 | dst = lz4_emit_match((uint32_t)(match_begin - src), (uint32_t)(match_end - match_begin), (uint32_t)match_distance, dst, end, src); | |
350 | if (!dst) return; | |
351 | ||
352 | // Update state | |
353 | src = match_end; | |
354 | ||
355 | // Update return values to include the last fully encoded match | |
356 | *dst_ptr = dst; | |
357 | *src_ptr = src; | |
358 | } | |
359 | } | |
360 | ||
361 | #endif | |
362 | ||
363 | size_t lz4raw_encode_buffer(uint8_t * __restrict dst_buffer, size_t dst_size, | |
364 | const uint8_t * __restrict src_buffer, size_t src_size, | |
365 | lz4_hash_entry_t hash_table[LZ4_COMPRESS_HASH_ENTRIES]) | |
366 | { | |
367 | // Initialize hash table | |
368 | const lz4_hash_entry_t HASH_FILL = { .offset = 0x80000000, .word = 0x0 }; | |
369 | ||
370 | const uint8_t * src = src_buffer; | |
371 | uint8_t * dst = dst_buffer; | |
372 | ||
373 | // We need several blocks because our base function is limited to 2GB input | |
374 | const size_t BLOCK_SIZE = 0x7ffff000; | |
375 | while (src_size > 0) | |
376 | { | |
377 | //DRKTODO either implement pattern4 or figure out optimal unroll | |
378 | //DRKTODO: bizarrely, with plain O3 the compiler generates a single | |
379 | //DRKTODO: scalar STP per loop iteration with the stock loop | |
380 | //DRKTODO If hand unrolled, it switches to NEON store pairs | |
381 | // Reset hash table for each block | |
382 | /* #if __STDC_HOSTED__ */ | |
383 | /* memset_pattern8(hash_table, &HASH_FILL, lz4_encode_scratch_size); */ | |
384 | /* #else */ | |
385 | /* for (int i=0;i<LZ4_COMPRESS_HASH_ENTRIES;i++) hash_table[i] = HASH_FILL; */ | |
386 | /* #endif */ | |
387 | ||
388 | for (int i=0;i<LZ4_COMPRESS_HASH_ENTRIES;) { | |
389 | hash_table[i++] = HASH_FILL; | |
390 | hash_table[i++] = HASH_FILL; | |
391 | hash_table[i++] = HASH_FILL; | |
392 | hash_table[i++] = HASH_FILL; | |
393 | } | |
394 | ||
395 | // Bytes to encode in this block | |
396 | const size_t src_to_encode = src_size > BLOCK_SIZE ? BLOCK_SIZE : src_size; | |
397 | ||
398 | // Run the encoder, only the last block emits final literals. Allows concatenation of encoded payloads. | |
399 | // Blocks are encoded independently, so src_begin is set to each block origin instead of src_buffer | |
400 | uint8_t * dst_start = dst; | |
401 | const uint8_t * src_start = src; | |
402 | lz4_encode_2gb(&dst, dst_size, &src, src, src_to_encode, hash_table, src_to_encode < src_size); | |
403 | ||
404 | // Check progress | |
405 | size_t dst_used = dst - dst_start; | |
406 | size_t src_used = src - src_start; // src_used <= src_to_encode | |
407 | if (src_to_encode == src_size && src_used < src_to_encode) return 0; // FAIL to encode last block | |
408 | ||
409 | // Note that there is a potential problem here in case of non compressible data requiring more blocks. | |
410 | // We may end up here with src_used very small, or even 0, and will not be able to make progress during | |
411 | // compression. We FAIL unless the length of literals remaining at the end is small enough. | |
412 | if (src_to_encode < src_size && src_to_encode - src_used >= (1<<16)) return 0; // FAIL too many literals | |
413 | ||
414 | // Update counters (SRC and DST already have been updated) | |
415 | src_size -= src_used; | |
416 | dst_size -= dst_used; | |
417 | } | |
418 | ||
419 | return (size_t)(dst - dst_buffer); // bytes produced | |
420 | } | |
421 | ||
422 | #define likely(expr) __builtin_expect((expr) != 0, 1) | |
423 | #define unlikely(expr) __builtin_expect((expr) != 0, 0) | |
424 | typedef uint32_t lz4_uint128 __attribute__((ext_vector_type(4))) __attribute__((__aligned__(1))); | |
425 | ||
426 | int lz4_decode(uint8_t ** dst_ptr, | |
427 | uint8_t * dst_begin, | |
428 | uint8_t * dst_end, | |
429 | const uint8_t ** src_ptr, | |
430 | const uint8_t * src_end) | |
431 | { | |
432 | uint8_t * dst = *dst_ptr; | |
433 | const uint8_t * src = *src_ptr; | |
434 | ||
435 | // Require dst_end > dst. | |
436 | if (dst_end <= dst) goto OUT_FULL; | |
437 | ||
438 | while (src < src_end) | |
439 | { | |
440 | // Keep last good position | |
441 | *src_ptr = src; | |
442 | *dst_ptr = dst; | |
443 | ||
444 | uint8_t cmd = *src++; // 1 byte encoding literal+(match-4) length: LLLLMMMM | |
445 | uint32_t literalLength = (cmd >> 4) & 15; // 0..15 | |
446 | uint32_t matchLength = 4 + (cmd & 15); // 4..19 | |
447 | ||
448 | // extra bytes for literalLength | |
449 | if (unlikely(literalLength == 15)) | |
450 | { | |
451 | uint8_t s; | |
452 | do { | |
453 | #if DEBUG_LZ4_DECODE_ERRORS | |
454 | if (unlikely(src >= src_end)) printf("Truncated SRC literal length\n"); | |
455 | #endif | |
456 | if (unlikely(src >= src_end)) goto IN_FAIL; // unexpected end of input (1 byte needed) | |
457 | s = *src++; | |
458 | literalLength += s; | |
459 | } while (unlikely(s == 255)); | |
460 | } | |
461 | ||
462 | // copy literal | |
463 | #if DEBUG_LZ4_DECODE_ERRORS | |
464 | if (unlikely(literalLength > (size_t)(src_end - src))) printf("Truncated SRC literal\n"); | |
465 | #endif | |
466 | if (unlikely(literalLength > (size_t)(src_end - src))) goto IN_FAIL; | |
467 | if (unlikely(literalLength > (size_t)(dst_end - dst))) { | |
468 | // literal will take us past the end of the destination buffer, | |
469 | // so we can only copy part of it. | |
470 | literalLength = (uint32_t)(dst_end - dst); | |
471 | memcpy(dst, src, literalLength); | |
472 | dst += literalLength; | |
473 | goto OUT_FULL; | |
474 | } | |
475 | memcpy(dst,src,literalLength); | |
476 | src += literalLength; | |
477 | dst += literalLength; | |
478 | ||
479 | if (unlikely(src >= src_end)) goto OUT_FULL; // valid end of stream | |
480 | #if DEBUG_LZ4_DECODE_ERRORS | |
481 | if (unlikely(2 > (size_t)(src_end - src))) printf("Truncated SRC distance\n"); | |
482 | #endif | |
483 | if (unlikely(2 > (size_t)(src_end - src))) goto IN_FAIL; // unexpected end of input (2 bytes needed) | |
484 | ||
485 | //DRKTODO: this causes an alignment increase warning (legitimate?) | |
486 | //DRKTODO: cast of char * to uint16_t* | |
487 | #pragma clang diagnostic push | |
488 | #pragma clang diagnostic ignored "-Wcast-align" | |
489 | ||
490 | // match distance | |
491 | uint64_t matchDistance = *(const uint16_t *)src; // 0x0000 <= matchDistance <= 0xffff | |
492 | #pragma clang diagnostic pop | |
493 | src += 2; | |
494 | #if DEBUG_LZ4_DECODE_ERRORS | |
495 | if (matchDistance == 0) printf("Invalid match distance D = 0\n"); | |
496 | #endif | |
497 | if (unlikely(matchDistance == 0)) goto IN_FAIL; // 0x0000 invalid | |
498 | uint8_t * ref = dst - matchDistance; | |
499 | #if DEBUG_LZ4_DECODE_ERRORS | |
500 | if (unlikely(ref < dst_begin)) printf("Invalid reference D=0x%llx dst_begin=%p dst=%p dst_end=%p\n",matchDistance,dst_begin,dst,dst_end); | |
501 | #endif | |
502 | if (unlikely(ref < dst_begin)) goto OUT_FAIL; // out of range | |
503 | ||
504 | // extra bytes for matchLength | |
505 | if (unlikely(matchLength == 19)) | |
506 | { | |
507 | uint8_t s; | |
508 | do { | |
509 | #if DEBUG_LZ4_DECODE_ERRORS | |
510 | if (unlikely(src >= src_end)) printf("Truncated SRC match length\n"); | |
511 | #endif | |
512 | if (unlikely(src >= src_end)) goto IN_FAIL; // unexpected end of input (1 byte needed) | |
513 | s = *src++; | |
514 | matchLength += s; | |
515 | } while (unlikely(s == 255)); | |
516 | } | |
517 | ||
518 | // copy match (may overlap) | |
519 | if (unlikely(matchLength > (size_t)(dst_end - dst))) { | |
520 | // match will take us past the end of the destination buffer, | |
521 | // so we can only copy part of it. | |
522 | matchLength = (uint32_t)(dst_end - dst); | |
523 | for (uint32_t i=0; i<matchLength; ++i) dst[i] = ref[i]; | |
524 | dst += matchLength; | |
525 | goto OUT_FULL; | |
526 | } | |
527 | for (uint64_t i=0;i<matchLength;i++) dst[i] = ref[i]; | |
528 | dst += matchLength; | |
529 | } | |
530 | ||
531 | // We reached the end of the input buffer after a full instruction | |
532 | OUT_FULL: | |
533 | // Or we reached the end of the output buffer | |
534 | *dst_ptr = dst; | |
535 | *src_ptr = src; | |
536 | return 0; | |
537 | ||
538 | // Error conditions | |
539 | OUT_FAIL: | |
540 | IN_FAIL: | |
541 | return 1; // FAIL | |
542 | } |