]>
Commit | Line | Data |
---|---|---|
39037602 A |
1 | /* |
2 | * Copyright (c) 2016-2016 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | // LZ4_RAW buffer API | |
30 | // EB May 2015 | |
31 | // Mar 2016 Imported from the Compression project, with minor optimisations and | |
32 | // early abort detection (Derek Kumar) | |
33 | ||
34 | #include "lz4.h" | |
35 | ||
36 | size_t lz4raw_decode_buffer(uint8_t * __restrict dst_buffer, size_t dst_size, | |
37 | const uint8_t * __restrict src_buffer, size_t src_size, | |
38 | void * __restrict work __attribute__((unused))) | |
39 | { | |
40 | const uint8_t * src = src_buffer; | |
41 | uint8_t * dst = dst_buffer; | |
42 | ||
43 | // Go fast if we can, keeping away from the end of buffers | |
44 | #if LZ4_ENABLE_ASSEMBLY_DECODE | |
45 | if (dst_size > LZ4_GOFAST_SAFETY_MARGIN && src_size > LZ4_GOFAST_SAFETY_MARGIN) | |
46 | { | |
47 | if (lz4_decode_asm(&dst, dst_buffer, dst_buffer + dst_size - LZ4_GOFAST_SAFETY_MARGIN, &src, src_buffer + src_size - LZ4_GOFAST_SAFETY_MARGIN)) | |
48 | return 0; // FAIL | |
49 | } | |
50 | #endif | |
51 | //DRKTODO: Can the 'C' "safety" decode be eliminated for 4/16K fixed-sized buffers? | |
52 | ||
53 | // Finish safe | |
54 | if (lz4_decode(&dst, dst_buffer, dst_buffer + dst_size, &src, src_buffer + src_size)) | |
55 | return 0; // FAIL | |
56 | ||
57 | return (size_t)(dst - dst_buffer); // bytes produced | |
58 | } | |
59 | // Debug flags | |
60 | #if LZ4DEBUG | |
61 | #define DEBUG_LZ4_ENCODE_ERRORS (1) | |
62 | #define DEBUG_LZ4_DECODE_ERRORS (1) | |
63 | #endif | |
64 | ||
65 | #if DEBUG_LZ4_ENCODE_ERRORS | |
66 | #endif | |
67 | ||
68 | #if !LZ4_ENABLE_ASSEMBLY_ENCODE | |
69 | ||
70 | #if defined(__x86_64__) || defined(__x86_64h__) | |
71 | # define LZ4_MATCH_SEARCH_INIT_SIZE 32 | |
72 | # define LZ4_MATCH_SEARCH_LOOP_SIZE 32 | |
73 | #else | |
74 | # define LZ4_MATCH_SEARCH_INIT_SIZE 8 | |
75 | # define LZ4_MATCH_SEARCH_LOOP_SIZE 8 | |
76 | #endif | |
77 | ||
78 | // Return hash for 4-byte sequence X | |
79 | static inline uint32_t lz4_hash(uint32_t x) { return (x * 2654435761U) >> (32 - LZ4_COMPRESS_HASH_BITS); } | |
80 | ||
81 | // Store 0xfff..fff at *PTR | |
82 | static inline void lz4_fill16(uint8_t * ptr) | |
83 | { | |
84 | store8(ptr,-1); | |
85 | store8(ptr+8,-1); | |
86 | } | |
87 | ||
88 | // Return number of matching bytes 0..4 at positions A and B. | |
89 | static inline size_t lz4_nmatch4(const uint8_t * a,const uint8_t * b) | |
90 | { | |
91 | uint32_t x = load4(a) ^ load4(b); | |
92 | return (x == 0)?4:(__builtin_ctzl(x) >> 3); | |
93 | } | |
94 | ||
95 | // Return number of matching bytes 0..8 at positions A and B. | |
96 | static inline size_t lz4_nmatch8(const uint8_t * a,const uint8_t * b) | |
97 | { | |
98 | uint64_t x = load8(a) ^ load8(b); | |
99 | return (x == 0)?8:(__builtin_ctzll(x) >> 3); | |
100 | } | |
101 | ||
102 | // Return number of matching bytes 0..16 at positions A and B. | |
103 | static inline size_t lz4_nmatch16(const uint8_t * a,const uint8_t * b) | |
104 | { | |
105 | size_t n = lz4_nmatch8(a,b); | |
106 | return (n == 8)?(8 + lz4_nmatch8(a+8,b+8)):n; | |
107 | } | |
108 | ||
109 | // Return number of matching bytes 0..32 at positions A and B. | |
110 | static inline size_t lz4_nmatch32(const uint8_t * a,const uint8_t * b) | |
111 | { | |
112 | size_t n = lz4_nmatch16(a,b); | |
113 | return (n == 16)?(16 + lz4_nmatch16(a+16,b+16)):n; | |
114 | } | |
115 | ||
116 | // Return number of matching bytes 0..64 at positions A and B. | |
117 | static inline size_t lz4_nmatch64(const uint8_t * a,const uint8_t * b) | |
118 | { | |
119 | size_t n = lz4_nmatch32(a,b); | |
120 | return (n == 32)?(32 + lz4_nmatch32(a+32,b+32)):n; | |
121 | } | |
122 | ||
123 | // Compile-time selection, return number of matching bytes 0..N at positions A and B. | |
124 | static inline size_t lz4_nmatch(int N, const uint8_t * a, const uint8_t * b) | |
125 | { | |
126 | switch (N) { | |
127 | case 4: return lz4_nmatch4(a,b); | |
128 | case 8: return lz4_nmatch8(a,b); | |
129 | case 16: return lz4_nmatch16(a,b); | |
130 | case 32: return lz4_nmatch32(a,b); | |
131 | case 64: return lz4_nmatch64(a,b); | |
132 | } | |
133 | __builtin_trap(); // FAIL | |
134 | } | |
135 | ||
136 | // Store LENGTH in DST using the literal_length/match_length extension scheme: X is the sum of all bytes until we reach a byte < 0xff. | |
137 | // We are allowed to access a constant number of bytes above DST_END. | |
138 | // Return incremented DST pointer on success, and 0 on failure | |
139 | static inline uint8_t *lz4_store_length(uint8_t * dst, const uint8_t * const end, uint32_t L) { | |
140 | (void)end; | |
141 | while (L >= 17*255) { | |
142 | lz4_fill16(dst); | |
143 | dst += 16; | |
144 | L -= 16*255; | |
145 | } | |
146 | lz4_fill16(dst); | |
147 | //DRKTODO verify these modulos/divisions are optimally handled by clang | |
148 | dst += L/255; | |
149 | *dst++ = L%255; | |
150 | return dst; | |
151 | } | |
152 | ||
153 | static inline uint32_t clamp(uint32_t x, uint32_t max) __attribute__((overloadable)) { return x > max ? max : x; } | |
154 | ||
155 | static inline uint8_t *copy_literal(uint8_t *dst, const uint8_t * restrict src, uint32_t L) { | |
156 | uint8_t *end = dst + L; | |
157 | { copy16(dst, src); dst += 16; src += 16; } | |
158 | while (dst < end) { copy32(dst, src); dst += 32; src += 32; } | |
159 | return end; | |
160 | } | |
161 | ||
162 | static uint8_t *lz4_emit_match(uint32_t L, uint32_t M, uint32_t D, | |
163 | uint8_t * restrict dst, | |
164 | const uint8_t * const end, | |
165 | const uint8_t * restrict src) { | |
166 | // The LZ4 encoding scheme requires that M is at least 4, because | |
167 | // the actual value stored by the encoding is M - 4. Check this | |
168 | // requirement for debug builds. | |
169 | assert(M >= 4 && "LZ4 encoding requires that M is at least 4"); | |
170 | // Having checked that M >= 4, translate M by four. | |
171 | M -= 4; | |
172 | // Similarly, we must have D < 2**16, because we use only two bytes | |
173 | // to represent the value of D in the encoding. | |
174 | assert(D <= USHRT_MAX && "LZ4 encoding requries that D can be stored in two bytes."); | |
175 | // Construct the command byte by clamping both L and M to 0 ... 15 | |
176 | // and packing them into a single byte, and store it. | |
177 | *dst++ = clamp(L, 15) << 4 | clamp(M, 15); | |
178 | // If L is 15 or greater, we need to encode extra literal length bytes. | |
179 | if (L >= 15) { | |
180 | dst = lz4_store_length(dst, end, L - 15); | |
181 | if (dst == 0 || dst + L >= end) return NULL; | |
182 | } | |
183 | // Copy the literal itself from src to dst. | |
184 | dst = copy_literal(dst, src, L); | |
185 | // Store match distance. | |
186 | store2(dst, D); dst += 2; | |
187 | // If M is 15 or greater, we need to encode extra match length bytes. | |
188 | if (M >= 15) { | |
189 | dst = lz4_store_length(dst, end, M - 15); | |
190 | if (dst == 0) return NULL; | |
191 | } | |
192 | return dst; | |
193 | } | |
194 | ||
195 | /* #ifndef LZ4_EARLY_ABORT */ | |
196 | /* #define LZ4_EARLY_ABORT (1) */ | |
197 | /* #endif */ | |
198 | ||
199 | #if LZ4_EARLY_ABORT | |
200 | int lz4_do_early_abort = 1; | |
201 | int lz4_early_aborts = 0; | |
202 | #define LZ4_EARLY_ABORT_EVAL (448) | |
203 | #define LZ4_EARLY_ABORT_MIN_COMPRESSION_FACTOR (20) | |
204 | #endif /* LZ4_EARLY_ABORT */ | |
205 | ||
206 | void lz4_encode_2gb(uint8_t ** dst_ptr, | |
207 | size_t dst_size, | |
208 | const uint8_t ** src_ptr, | |
209 | const uint8_t * src_begin, | |
210 | size_t src_size, | |
211 | lz4_hash_entry_t hash_table[LZ4_COMPRESS_HASH_ENTRIES], | |
212 | int skip_final_literals) | |
213 | { | |
214 | uint8_t *dst = *dst_ptr; // current output stream position | |
215 | uint8_t *end = dst + dst_size - LZ4_GOFAST_SAFETY_MARGIN; | |
216 | const uint8_t *src = *src_ptr; // current input stream literal to encode | |
217 | const uint8_t *src_end = src + src_size - LZ4_GOFAST_SAFETY_MARGIN; | |
218 | const uint8_t *match_begin = 0; // first byte of matched sequence | |
219 | const uint8_t *match_end = 0; // first byte after matched sequence | |
220 | #if LZ4_EARLY_ABORT | |
221 | uint8_t * const dst_begin = dst; | |
222 | uint32_t lz4_do_abort_eval = lz4_do_early_abort; | |
223 | #endif | |
224 | ||
225 | while (dst < end) | |
226 | { | |
227 | ptrdiff_t match_distance = 0; | |
228 | for (match_begin = src; match_begin < src_end; match_begin += 1) { | |
229 | const uint32_t pos = (uint32_t)(match_begin - src_begin); | |
230 | const uint32_t w0 = load4(match_begin); | |
231 | const uint32_t w1 = load4(match_begin + 1); | |
232 | const uint32_t w2 = load4(match_begin + 2); | |
233 | const uint32_t w3 = load4(match_begin + 3); | |
234 | const int i0 = lz4_hash(w0); | |
235 | const int i1 = lz4_hash(w1); | |
236 | const int i2 = lz4_hash(w2); | |
237 | const int i3 = lz4_hash(w3); | |
238 | const uint8_t *c0 = src_begin + hash_table[i0].offset; | |
239 | const uint8_t *c1 = src_begin + hash_table[i1].offset; | |
240 | const uint8_t *c2 = src_begin + hash_table[i2].offset; | |
241 | const uint8_t *c3 = src_begin + hash_table[i3].offset; | |
242 | const uint32_t m0 = hash_table[i0].word; | |
243 | const uint32_t m1 = hash_table[i1].word; | |
244 | const uint32_t m2 = hash_table[i2].word; | |
245 | const uint32_t m3 = hash_table[i3].word; | |
246 | hash_table[i0].offset = pos; | |
247 | hash_table[i0].word = w0; | |
248 | hash_table[i1].offset = pos + 1; | |
249 | hash_table[i1].word = w1; | |
250 | ||
251 | hash_table[i2].offset = pos + 2; | |
252 | hash_table[i2].word = w2; | |
253 | hash_table[i3].offset = pos + 3; | |
254 | hash_table[i3].word = w3; | |
255 | ||
256 | match_distance = (match_begin - c0); | |
257 | if (w0 == m0 && match_distance < 0x10000 && match_distance > 0) { | |
258 | match_end = match_begin + 4; | |
259 | goto EXPAND_FORWARD; | |
260 | } | |
261 | ||
262 | match_begin++; | |
263 | match_distance = (match_begin - c1); | |
264 | if (w1 == m1 && match_distance < 0x10000 && match_distance > 0) { | |
265 | match_end = match_begin + 4; | |
266 | goto EXPAND_FORWARD; | |
267 | } | |
268 | ||
269 | match_begin++; | |
270 | match_distance = (match_begin - c2); | |
271 | if (w2 == m2 && match_distance < 0x10000 && match_distance > 0) { | |
272 | match_end = match_begin + 4; | |
273 | goto EXPAND_FORWARD; | |
274 | } | |
275 | ||
276 | match_begin++; | |
277 | match_distance = (match_begin - c3); | |
278 | if (w3 == m3 && match_distance < 0x10000 && match_distance > 0) { | |
279 | match_end = match_begin + 4; | |
280 | goto EXPAND_FORWARD; | |
281 | } | |
282 | ||
283 | #if LZ4_EARLY_ABORT | |
284 | //DRKTODO: Evaluate unrolling further. 2xunrolling had some modest benefits | |
285 | if (lz4_do_abort_eval && ((pos) >= LZ4_EARLY_ABORT_EVAL)) { | |
286 | ptrdiff_t dstd = dst - dst_begin; | |
287 | ||
288 | if (dstd == 0) { | |
289 | lz4_early_aborts++; | |
290 | return; | |
291 | } | |
292 | ||
293 | /* if (dstd >= pos) { */ | |
294 | /* return; */ | |
295 | /* } */ | |
296 | /* ptrdiff_t cbytes = pos - dstd; */ | |
297 | /* if ((cbytes * LZ4_EARLY_ABORT_MIN_COMPRESSION_FACTOR) > pos) { */ | |
298 | /* return; */ | |
299 | /* } */ | |
300 | lz4_do_abort_eval = 0; | |
301 | } | |
302 | #endif | |
303 | } | |
304 | ||
305 | if (skip_final_literals) { *src_ptr = src; *dst_ptr = dst; return; } // do not emit the final literal sequence | |
306 | ||
307 | // Emit a trailing literal that covers the remainder of the source buffer, | |
308 | // if we can do so without exceeding the bounds of the destination buffer. | |
309 | size_t src_remaining = src_end + LZ4_GOFAST_SAFETY_MARGIN - src; | |
310 | if (src_remaining < 15) { | |
311 | *dst++ = (uint8_t)(src_remaining << 4); | |
312 | memcpy(dst, src, 16); dst += src_remaining; | |
313 | } else { | |
314 | *dst++ = 0xf0; | |
315 | dst = lz4_store_length(dst, end, (uint32_t)(src_remaining - 15)); | |
316 | if (dst == 0 || dst + src_remaining >= end) return; | |
317 | memcpy(dst, src, src_remaining); dst += src_remaining; | |
318 | } | |
319 | *dst_ptr = dst; | |
320 | *src_ptr = src + src_remaining; | |
321 | return; | |
322 | ||
323 | EXPAND_FORWARD: | |
324 | ||
325 | // Expand match forward | |
326 | { | |
327 | const uint8_t * ref_end = match_end - match_distance; | |
328 | while (match_end < src_end) | |
329 | { | |
330 | size_t n = lz4_nmatch(LZ4_MATCH_SEARCH_LOOP_SIZE, ref_end, match_end); | |
331 | if (n < LZ4_MATCH_SEARCH_LOOP_SIZE) { match_end += n; break; } | |
332 | match_end += LZ4_MATCH_SEARCH_LOOP_SIZE; | |
333 | ref_end += LZ4_MATCH_SEARCH_LOOP_SIZE; | |
334 | } | |
335 | } | |
336 | ||
337 | // Expand match backward | |
338 | { | |
339 | // match_begin_min = max(src_begin + match_distance,literal) | |
340 | const uint8_t * match_begin_min = src_begin + match_distance; | |
341 | match_begin_min = (match_begin_min < src)?src:match_begin_min; | |
342 | const uint8_t * ref_begin = match_begin - match_distance; | |
343 | ||
344 | while (match_begin > match_begin_min && ref_begin[-1] == match_begin[-1] ) { match_begin -= 1; ref_begin -= 1; } | |
345 | } | |
346 | ||
347 | // Emit match | |
348 | dst = lz4_emit_match((uint32_t)(match_begin - src), (uint32_t)(match_end - match_begin), (uint32_t)match_distance, dst, end, src); | |
349 | if (!dst) return; | |
350 | ||
351 | // Update state | |
352 | src = match_end; | |
353 | ||
354 | // Update return values to include the last fully encoded match | |
355 | *dst_ptr = dst; | |
356 | *src_ptr = src; | |
357 | } | |
358 | } | |
359 | ||
360 | #endif | |
361 | ||
362 | size_t lz4raw_encode_buffer(uint8_t * __restrict dst_buffer, size_t dst_size, | |
363 | const uint8_t * __restrict src_buffer, size_t src_size, | |
364 | lz4_hash_entry_t hash_table[LZ4_COMPRESS_HASH_ENTRIES]) | |
365 | { | |
366 | // Initialize hash table | |
367 | const lz4_hash_entry_t HASH_FILL = { .offset = 0x80000000, .word = 0x0 }; | |
368 | ||
369 | const uint8_t * src = src_buffer; | |
370 | uint8_t * dst = dst_buffer; | |
371 | ||
372 | // We need several blocks because our base function is limited to 2GB input | |
373 | const size_t BLOCK_SIZE = 0x7ffff000; | |
374 | while (src_size > 0) | |
375 | { | |
376 | //DRKTODO either implement pattern4 or figure out optimal unroll | |
377 | //DRKTODO: bizarrely, with plain O3 the compiler generates a single | |
378 | //DRKTODO: scalar STP per loop iteration with the stock loop | |
379 | //DRKTODO If hand unrolled, it switches to NEON store pairs | |
380 | // Reset hash table for each block | |
381 | /* #if __STDC_HOSTED__ */ | |
382 | /* memset_pattern8(hash_table, &HASH_FILL, lz4_encode_scratch_size); */ | |
383 | /* #else */ | |
384 | /* for (int i=0;i<LZ4_COMPRESS_HASH_ENTRIES;i++) hash_table[i] = HASH_FILL; */ | |
385 | /* #endif */ | |
386 | ||
387 | for (int i=0;i<LZ4_COMPRESS_HASH_ENTRIES;) { | |
388 | hash_table[i++] = HASH_FILL; | |
389 | hash_table[i++] = HASH_FILL; | |
390 | hash_table[i++] = HASH_FILL; | |
391 | hash_table[i++] = HASH_FILL; | |
392 | } | |
393 | ||
394 | // Bytes to encode in this block | |
395 | const size_t src_to_encode = src_size > BLOCK_SIZE ? BLOCK_SIZE : src_size; | |
396 | ||
397 | // Run the encoder, only the last block emits final literals. Allows concatenation of encoded payloads. | |
398 | // Blocks are encoded independently, so src_begin is set to each block origin instead of src_buffer | |
399 | uint8_t * dst_start = dst; | |
400 | const uint8_t * src_start = src; | |
401 | lz4_encode_2gb(&dst, dst_size, &src, src, src_to_encode, hash_table, src_to_encode < src_size); | |
402 | ||
403 | // Check progress | |
404 | size_t dst_used = dst - dst_start; | |
405 | size_t src_used = src - src_start; // src_used <= src_to_encode | |
406 | if (src_to_encode == src_size && src_used < src_to_encode) return 0; // FAIL to encode last block | |
407 | ||
408 | // Note that there is a potential problem here in case of non compressible data requiring more blocks. | |
409 | // We may end up here with src_used very small, or even 0, and will not be able to make progress during | |
410 | // compression. We FAIL unless the length of literals remaining at the end is small enough. | |
411 | if (src_to_encode < src_size && src_to_encode - src_used >= (1<<16)) return 0; // FAIL too many literals | |
412 | ||
413 | // Update counters (SRC and DST already have been updated) | |
414 | src_size -= src_used; | |
415 | dst_size -= dst_used; | |
416 | } | |
417 | ||
418 | return (size_t)(dst - dst_buffer); // bytes produced | |
419 | } | |
420 | ||
421 | #define likely(expr) __builtin_expect((expr) != 0, 1) | |
422 | #define unlikely(expr) __builtin_expect((expr) != 0, 0) | |
423 | typedef uint32_t lz4_uint128 __attribute__((ext_vector_type(4))) __attribute__((__aligned__(1))); | |
424 | ||
425 | int lz4_decode(uint8_t ** dst_ptr, | |
426 | uint8_t * dst_begin, | |
427 | uint8_t * dst_end, | |
428 | const uint8_t ** src_ptr, | |
429 | const uint8_t * src_end) | |
430 | { | |
431 | uint8_t * dst = *dst_ptr; | |
432 | const uint8_t * src = *src_ptr; | |
433 | ||
434 | // Require dst_end > dst. | |
435 | if (dst_end <= dst) goto OUT_FULL; | |
436 | ||
437 | while (src < src_end) | |
438 | { | |
439 | // Keep last good position | |
440 | *src_ptr = src; | |
441 | *dst_ptr = dst; | |
442 | ||
443 | uint8_t cmd = *src++; // 1 byte encoding literal+(match-4) length: LLLLMMMM | |
444 | uint32_t literalLength = (cmd >> 4) & 15; // 0..15 | |
445 | uint32_t matchLength = 4 + (cmd & 15); // 4..19 | |
446 | ||
447 | // extra bytes for literalLength | |
448 | if (unlikely(literalLength == 15)) | |
449 | { | |
450 | uint8_t s; | |
451 | do { | |
452 | #if DEBUG_LZ4_DECODE_ERRORS | |
453 | if (unlikely(src >= src_end)) printf("Truncated SRC literal length\n"); | |
454 | #endif | |
455 | if (unlikely(src >= src_end)) goto IN_FAIL; // unexpected end of input (1 byte needed) | |
456 | s = *src++; | |
457 | literalLength += s; | |
458 | } while (unlikely(s == 255)); | |
459 | } | |
460 | ||
461 | // copy literal | |
462 | #if DEBUG_LZ4_DECODE_ERRORS | |
463 | if (unlikely(literalLength > (size_t)(src_end - src))) printf("Truncated SRC literal\n"); | |
464 | #endif | |
465 | if (unlikely(literalLength > (size_t)(src_end - src))) goto IN_FAIL; | |
466 | if (unlikely(literalLength > (size_t)(dst_end - dst))) { | |
467 | // literal will take us past the end of the destination buffer, | |
468 | // so we can only copy part of it. | |
469 | literalLength = (uint32_t)(dst_end - dst); | |
470 | memcpy(dst, src, literalLength); | |
471 | dst += literalLength; | |
472 | goto OUT_FULL; | |
473 | } | |
474 | memcpy(dst,src,literalLength); | |
475 | src += literalLength; | |
476 | dst += literalLength; | |
477 | ||
478 | if (unlikely(src >= src_end)) goto OUT_FULL; // valid end of stream | |
479 | #if DEBUG_LZ4_DECODE_ERRORS | |
480 | if (unlikely(2 > (size_t)(src_end - src))) printf("Truncated SRC distance\n"); | |
481 | #endif | |
482 | if (unlikely(2 > (size_t)(src_end - src))) goto IN_FAIL; // unexpected end of input (2 bytes needed) | |
483 | ||
484 | //DRKTODO: this causes an alignment increase warning (legitimate?) | |
485 | //DRKTODO: cast of char * to uint16_t* | |
486 | #pragma clang diagnostic push | |
487 | #pragma clang diagnostic ignored "-Wcast-align" | |
488 | ||
489 | // match distance | |
490 | uint64_t matchDistance = *(const uint16_t *)src; // 0x0000 <= matchDistance <= 0xffff | |
491 | #pragma clang diagnostic pop | |
492 | src += 2; | |
493 | #if DEBUG_LZ4_DECODE_ERRORS | |
494 | if (matchDistance == 0) printf("Invalid match distance D = 0\n"); | |
495 | #endif | |
496 | if (unlikely(matchDistance == 0)) goto IN_FAIL; // 0x0000 invalid | |
497 | uint8_t * ref = dst - matchDistance; | |
498 | #if DEBUG_LZ4_DECODE_ERRORS | |
499 | if (unlikely(ref < dst_begin)) printf("Invalid reference D=0x%llx dst_begin=%p dst=%p dst_end=%p\n",matchDistance,dst_begin,dst,dst_end); | |
500 | #endif | |
501 | if (unlikely(ref < dst_begin)) goto OUT_FAIL; // out of range | |
502 | ||
503 | // extra bytes for matchLength | |
504 | if (unlikely(matchLength == 19)) | |
505 | { | |
506 | uint8_t s; | |
507 | do { | |
508 | #if DEBUG_LZ4_DECODE_ERRORS | |
509 | if (unlikely(src >= src_end)) printf("Truncated SRC match length\n"); | |
510 | #endif | |
511 | if (unlikely(src >= src_end)) goto IN_FAIL; // unexpected end of input (1 byte needed) | |
512 | s = *src++; | |
513 | matchLength += s; | |
514 | } while (unlikely(s == 255)); | |
515 | } | |
516 | ||
517 | // copy match (may overlap) | |
518 | if (unlikely(matchLength > (size_t)(dst_end - dst))) { | |
519 | // match will take us past the end of the destination buffer, | |
520 | // so we can only copy part of it. | |
521 | matchLength = (uint32_t)(dst_end - dst); | |
522 | for (uint32_t i=0; i<matchLength; ++i) dst[i] = ref[i]; | |
523 | dst += matchLength; | |
524 | goto OUT_FULL; | |
525 | } | |
526 | for (uint64_t i=0;i<matchLength;i++) dst[i] = ref[i]; | |
527 | dst += matchLength; | |
528 | } | |
529 | ||
530 | // We reached the end of the input buffer after a full instruction | |
531 | OUT_FULL: | |
532 | // Or we reached the end of the output buffer | |
533 | *dst_ptr = dst; | |
534 | *src_ptr = src; | |
535 | return 0; | |
536 | ||
537 | // Error conditions | |
538 | OUT_FAIL: | |
539 | IN_FAIL: | |
540 | return 1; // FAIL | |
541 | } |