]>
Commit | Line | Data |
---|---|---|
39037602 A |
1 | /* |
2 | * Copyright (c) 2010-2016 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
0a7de745 | 5 | * |
39037602 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
0a7de745 | 14 | * |
39037602 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
0a7de745 | 17 | * |
39037602 A |
18 | * The Original Code and all software distributed under the License are |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
0a7de745 | 25 | * |
39037602 A |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
27 | */ | |
28 | /* This module implements a hybrid/adaptive compression scheme, using WKdm where | |
29 | * profitable and, currently, an LZ4 variant elsewhere. | |
30 | * (Created 2016, Derek Kumar) | |
31 | */ | |
32 | #include "lz4.h" | |
33 | #include "WKdm_new.h" | |
34 | #include <vm/vm_compressor_algorithms.h> | |
35 | #include <vm/vm_compressor.h> | |
36 | ||
37 | #define MZV_MAGIC (17185) | |
5ba3f43e A |
38 | #if defined(__arm64__) |
39 | #include <arm/proc_reg.h> | |
40 | #endif | |
41 | ||
39037602 A |
42 | #define LZ4_SCRATCH_ALIGN (64) |
43 | #define WKC_SCRATCH_ALIGN (64) | |
44 | ||
45 | #define LZ4_SCRATCH_ALIGN (64) | |
46 | #define WKC_SCRATCH_ALIGN (64) | |
47 | ||
48 | #define memcpy_T_NT memcpy | |
49 | #define memcpy_NT_T memcpy | |
50 | ||
51 | typedef union { | |
52 | uint8_t lz4state[lz4_encode_scratch_size]__attribute((aligned(LZ4_SCRATCH_ALIGN))); | |
53 | uint8_t wkscratch[0] __attribute((aligned(WKC_SCRATCH_ALIGN))); // TODO | |
54 | } compressor_encode_scratch_t; | |
55 | ||
56 | typedef union { | |
57 | uint8_t lz4decodestate[lz4_encode_scratch_size]__attribute((aligned(64))); | |
58 | uint8_t wkdecompscratch[0] __attribute((aligned(64))); | |
59 | } compressor_decode_scratch_t; | |
60 | ||
61 | typedef struct { | |
62 | uint16_t lz4_selection_run; | |
63 | uint16_t lz4_run_length; | |
64 | uint16_t lz4_preselects; | |
65 | uint32_t lz4_total_preselects; | |
66 | uint16_t lz4_failure_skips; | |
67 | uint32_t lz4_total_failure_skips; | |
68 | uint16_t lz4_failure_run_length; | |
69 | uint16_t lz4_total_unprofitables; | |
70 | uint32_t lz4_total_negatives; | |
71 | uint32_t lz4_total_failures; | |
72 | } compressor_state_t; | |
73 | ||
74 | compressor_tuneables_t vmctune = { | |
75 | .lz4_threshold = 2048, | |
76 | .wkdm_reeval_threshold = 1536, | |
77 | .lz4_max_failure_skips = 0, | |
78 | .lz4_max_failure_run_length = ~0U, | |
79 | .lz4_max_preselects = 0, | |
80 | .lz4_run_preselection_threshold = ~0U, | |
81 | .lz4_run_continue_bytes = 0, | |
82 | .lz4_profitable_bytes = 0, | |
83 | }; | |
84 | ||
85 | compressor_state_t vmcstate = { | |
86 | .lz4_selection_run = 0, | |
87 | .lz4_run_length = 0, | |
88 | .lz4_preselects = 0, | |
89 | .lz4_total_preselects = 0, | |
90 | .lz4_failure_skips = 0, | |
91 | .lz4_total_failure_skips = 0, | |
92 | .lz4_failure_run_length = 0, | |
93 | .lz4_total_unprofitables = 0, | |
94 | .lz4_total_negatives = 0, | |
95 | }; | |
96 | ||
97 | compressor_stats_t compressor_stats; | |
98 | ||
99 | enum compressor_preselect_t { | |
100 | CPRESELLZ4 = 0, | |
101 | CSKIPLZ4 = 1, | |
102 | CPRESELWK = 2, | |
103 | }; | |
104 | ||
105 | vm_compressor_mode_t vm_compressor_current_codec = VM_COMPRESSOR_DEFAULT_CODEC; | |
106 | ||
5ba3f43e A |
107 | boolean_t vm_compressor_force_sw_wkdm = FALSE; |
108 | ||
39037602 A |
109 | boolean_t verbose = FALSE; |
110 | ||
5ba3f43e A |
111 | #define VMDBGSTAT (DEBUG) |
112 | #if VMDBGSTATS | |
0a7de745 A |
113 | #define VM_COMPRESSOR_STAT_DBG(x...) \ |
114 | do { \ | |
115 | (x); \ | |
39037602 | 116 | } while(0) |
5ba3f43e | 117 | #else |
0a7de745 A |
118 | #define VM_COMPRESSOR_STAT_DBG(x...) \ |
119 | do { \ | |
5ba3f43e A |
120 | } while (0) |
121 | #endif | |
122 | ||
123 | #define VMCSTATS (DEVELOPMENT || DEBUG) | |
124 | #if VMCSTATS | |
0a7de745 A |
125 | #define VM_COMPRESSOR_STAT(x...) \ |
126 | do { \ | |
127 | (x); \ | |
39037602 A |
128 | } while(0) |
129 | //TODO make atomic where needed, decompression paths | |
0a7de745 A |
130 | #define VM_DECOMPRESSOR_STAT(x...) \ |
131 | do { \ | |
132 | (x); \ | |
39037602 A |
133 | } while(0) |
134 | #else | |
0a7de745 A |
135 | #define VM_COMPRESSOR_STAT(x...) \ |
136 | do { \ | |
39037602 | 137 | }while (0) |
0a7de745 A |
138 | #define VM_DECOMPRESSOR_STAT(x...) \ |
139 | do { \ | |
39037602 A |
140 | }while (0) |
141 | #endif | |
142 | ||
0a7de745 A |
143 | static inline enum compressor_preselect_t |
144 | compressor_preselect(void) | |
145 | { | |
39037602 A |
146 | if (vmcstate.lz4_failure_skips >= vmctune.lz4_max_failure_skips) { |
147 | vmcstate.lz4_failure_skips = 0; | |
148 | vmcstate.lz4_failure_run_length = 0; | |
149 | } | |
150 | ||
151 | if (vmcstate.lz4_failure_run_length >= vmctune.lz4_max_failure_run_length) { | |
152 | vmcstate.lz4_failure_skips++; | |
153 | vmcstate.lz4_total_failure_skips++; | |
154 | return CSKIPLZ4; | |
155 | } | |
156 | ||
157 | if (vmcstate.lz4_preselects >= vmctune.lz4_max_preselects) { | |
158 | vmcstate.lz4_preselects = 0; | |
159 | return CPRESELWK; | |
160 | } | |
161 | ||
162 | if (vmcstate.lz4_run_length >= vmctune.lz4_run_preselection_threshold) { | |
163 | vmcstate.lz4_preselects++; | |
164 | vmcstate.lz4_total_preselects++; | |
165 | return CPRESELLZ4; | |
166 | } | |
167 | return CPRESELWK; | |
168 | } | |
169 | ||
0a7de745 A |
170 | static inline void |
171 | compressor_selector_update(int lz4sz, int didwk, int wksz) | |
172 | { | |
39037602 A |
173 | VM_COMPRESSOR_STAT(compressor_stats.lz4_compressions++); |
174 | ||
175 | if (lz4sz == 0) { | |
0a7de745 | 176 | VM_COMPRESSOR_STAT(compressor_stats.lz4_compressed_bytes += PAGE_SIZE); |
39037602 A |
177 | VM_COMPRESSOR_STAT(compressor_stats.lz4_compression_failures++); |
178 | vmcstate.lz4_failure_run_length++; | |
179 | VM_COMPRESSOR_STAT(vmcstate.lz4_total_failures++); | |
180 | vmcstate.lz4_run_length = 0; | |
181 | } else { | |
182 | vmcstate.lz4_failure_run_length = 0; | |
183 | ||
0a7de745 | 184 | VM_COMPRESSOR_STAT(compressor_stats.lz4_compressed_bytes += lz4sz); |
39037602 A |
185 | |
186 | if (lz4sz <= vmctune.wkdm_reeval_threshold) { | |
187 | vmcstate.lz4_run_length = 0; | |
188 | } else { | |
189 | if (!didwk) { | |
190 | vmcstate.lz4_run_length++; | |
191 | } | |
192 | } | |
193 | ||
194 | if (didwk) { | |
195 | if (__probable(wksz > lz4sz)) { | |
196 | uint32_t lz4delta = wksz - lz4sz; | |
0a7de745 | 197 | VM_COMPRESSOR_STAT(compressor_stats.lz4_wk_compression_delta += lz4delta); |
39037602 A |
198 | if (lz4delta >= vmctune.lz4_run_continue_bytes) { |
199 | vmcstate.lz4_run_length++; | |
200 | } else if (lz4delta <= vmctune.lz4_profitable_bytes) { | |
201 | vmcstate.lz4_failure_run_length++; | |
202 | VM_COMPRESSOR_STAT(vmcstate.lz4_total_unprofitables++); | |
203 | vmcstate.lz4_run_length = 0; | |
204 | } else { | |
205 | vmcstate.lz4_run_length = 0; | |
206 | } | |
207 | } else { | |
0a7de745 | 208 | VM_COMPRESSOR_STAT(compressor_stats.lz4_wk_compression_negative_delta += (lz4sz - wksz)); |
39037602 A |
209 | vmcstate.lz4_failure_run_length++; |
210 | VM_COMPRESSOR_STAT(vmcstate.lz4_total_negatives++); | |
211 | vmcstate.lz4_run_length = 0; | |
212 | } | |
213 | } | |
214 | } | |
215 | } | |
216 | ||
5ba3f43e | 217 | |
0a7de745 A |
218 | static inline void |
219 | WKdm_hv(uint32_t *wkbuf) | |
220 | { | |
5ba3f43e A |
221 | #if DEVELOPMENT || DEBUG |
222 | uint32_t *inw = (uint32_t *) wkbuf; | |
223 | if (*inw != MZV_MAGIC) { | |
224 | if ((*inw | *(inw + 1) | *(inw + 2)) & 0xFFFF0000) { | |
0a7de745 | 225 | panic("WKdm(%p): invalid header 0x%x 0x%x 0x%x\n", wkbuf, *inw, *(inw + 1), *(inw + 2)); |
5ba3f43e A |
226 | } |
227 | } | |
228 | #else /* DEVELOPMENT || DEBUG */ | |
229 | (void) wkbuf; | |
230 | #endif | |
231 | } | |
232 | ||
39037602 A |
233 | //todo fix clang diagnostic |
234 | #pragma clang diagnostic push | |
235 | #pragma clang diagnostic ignored "-Wincompatible-pointer-types" | |
236 | ||
5ba3f43e A |
237 | #if defined(__arm64__) |
238 | #endif | |
239 | ||
0a7de745 A |
240 | static inline void |
241 | WKdmD(WK_word* src_buf, WK_word* dest_buf, WK_word* scratch, unsigned int bytes) | |
242 | { | |
5ba3f43e A |
243 | #if defined(__arm64__) |
244 | #endif | |
245 | WKdm_hv(src_buf); | |
246 | #if defined(__arm64__) | |
247 | if (PAGE_SIZE == 4096) { | |
248 | WKdm_decompress_4k(src_buf, dest_buf, scratch, bytes); | |
249 | } else { | |
250 | __unused uint64_t wdsstart; | |
251 | ||
252 | VM_COMPRESSOR_STAT_DBG(wdsstart = mach_absolute_time()); | |
253 | WKdm_decompress_16k(src_buf, dest_buf, scratch, bytes); | |
254 | ||
255 | VM_COMPRESSOR_STAT_DBG(compressor_stats.wks_dabstime += mach_absolute_time() - wdsstart); | |
256 | VM_COMPRESSOR_STAT(compressor_stats.wks_decompressions++); | |
39037602 | 257 | } |
5ba3f43e | 258 | #else /* !defined arm64 */ |
39037602 | 259 | WKdm_decompress_new(src_buf, dest_buf, scratch, bytes); |
5ba3f43e | 260 | #endif |
39037602 | 261 | } |
5ba3f43e A |
262 | #if DEVELOPMENT || DEBUG |
263 | int precompy, wkswhw; | |
264 | #endif | |
39037602 | 265 | |
0a7de745 A |
266 | static inline int |
267 | WKdmC(WK_word* src_buf, WK_word* dest_buf, WK_word* scratch, boolean_t *incomp_copy, unsigned int limit) | |
268 | { | |
5ba3f43e A |
269 | (void)incomp_copy; |
270 | int wkcval; | |
271 | #if defined(__arm64__) | |
272 | if (PAGE_SIZE == 4096) { | |
273 | wkcval = WKdm_compress_4k(src_buf, dest_buf, scratch, limit); | |
274 | } else { | |
275 | __unused uint64_t wcswstart; | |
276 | ||
277 | VM_COMPRESSOR_STAT_DBG(wcswstart = mach_absolute_time()); | |
278 | ||
279 | int wkswsz = WKdm_compress_16k(src_buf, dest_buf, scratch, limit); | |
280 | ||
281 | VM_COMPRESSOR_STAT_DBG(compressor_stats.wks_cabstime += mach_absolute_time() - wcswstart); | |
282 | VM_COMPRESSOR_STAT(compressor_stats.wks_compressions++); | |
283 | wkcval = wkswsz; | |
284 | } | |
285 | #else | |
286 | wkcval = WKdm_compress_new(src_buf, dest_buf, scratch, limit); | |
287 | #endif | |
288 | return wkcval; | |
39037602 A |
289 | } |
290 | ||
291 | ||
0a7de745 A |
292 | int |
293 | metacompressor(const uint8_t *in, uint8_t *cdst, int32_t outbufsz, uint16_t *codec, void *cscratchin, boolean_t *incomp_copy) | |
294 | { | |
39037602 A |
295 | int sz = -1; |
296 | int dowk = FALSE, dolz4 = FALSE, skiplz4 = FALSE; | |
297 | int insize = PAGE_SIZE; | |
298 | compressor_encode_scratch_t *cscratch = cscratchin; | |
299 | ||
300 | if (vm_compressor_current_codec == CMODE_WK) { | |
301 | dowk = TRUE; | |
302 | } else if (vm_compressor_current_codec == CMODE_LZ4) { | |
303 | dolz4 = TRUE; | |
304 | } else if (vm_compressor_current_codec == CMODE_HYB) { | |
305 | enum compressor_preselect_t presel = compressor_preselect(); | |
306 | if (presel == CPRESELLZ4) { | |
307 | dolz4 = TRUE; | |
308 | goto lz4compress; | |
309 | } else if (presel == CSKIPLZ4) { | |
310 | dowk = TRUE; | |
311 | skiplz4 = TRUE; | |
312 | } else { | |
313 | assert(presel == CPRESELWK); | |
314 | dowk = TRUE; | |
315 | } | |
316 | } | |
317 | ||
318 | if (dowk) { | |
319 | *codec = CCWK; | |
39037602 | 320 | VM_COMPRESSOR_STAT(compressor_stats.wk_compressions++); |
5ba3f43e | 321 | sz = WKdmC(in, cdst, &cscratch->wkscratch[0], incomp_copy, outbufsz); |
39037602 | 322 | |
39037602 | 323 | if (sz == -1) { |
0a7de745 | 324 | VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total += PAGE_SIZE); |
39037602 A |
325 | VM_COMPRESSOR_STAT(compressor_stats.wk_compression_failures++); |
326 | ||
327 | if (vm_compressor_current_codec == CMODE_HYB) { | |
328 | goto lz4eval; | |
329 | } | |
330 | goto cexit; | |
331 | } else if (sz == 0) { | |
332 | VM_COMPRESSOR_STAT(compressor_stats.wk_sv_compressions++); | |
0a7de745 | 333 | VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total += 4); |
39037602 | 334 | } else { |
0a7de745 | 335 | VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total += sz); |
39037602 A |
336 | } |
337 | } | |
338 | lz4eval: | |
339 | if (vm_compressor_current_codec == CMODE_HYB) { | |
340 | if (((sz == -1) || (sz >= vmctune.lz4_threshold)) && (skiplz4 == FALSE)) { | |
341 | dolz4 = TRUE; | |
342 | } else { | |
5ba3f43e A |
343 | #if DEVELOPMENT || DEBUG |
344 | int wkc = (sz == -1) ? PAGE_SIZE : sz; | |
345 | #endif | |
39037602 | 346 | VM_COMPRESSOR_STAT(compressor_stats.wk_compressions_exclusive++); |
0a7de745 | 347 | VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_exclusive += wkc); |
39037602 A |
348 | goto cexit; |
349 | } | |
350 | } | |
351 | ||
352 | lz4compress: | |
353 | ||
354 | if (dolz4) { | |
355 | if (sz == -1) { | |
356 | sz = PAGE_SIZE; | |
357 | } | |
358 | int wksz = sz; | |
359 | *codec = CCLZ4; | |
360 | ||
361 | sz = (int) lz4raw_encode_buffer(cdst, outbufsz, in, insize, &cscratch->lz4state[0]); | |
362 | ||
39037602 A |
363 | compressor_selector_update(sz, dowk, wksz); |
364 | if (sz == 0) { | |
365 | sz = -1; | |
366 | goto cexit; | |
367 | } | |
368 | } | |
369 | cexit: | |
370 | return sz; | |
371 | } | |
372 | ||
0a7de745 A |
373 | void |
374 | metadecompressor(const uint8_t *source, uint8_t *dest, uint32_t csize, uint16_t ccodec, void *compressor_dscratchin) | |
375 | { | |
39037602 A |
376 | int dolz4 = (ccodec == CCLZ4); |
377 | int rval; | |
378 | compressor_decode_scratch_t *compressor_dscratch = compressor_dscratchin; | |
0a7de745 | 379 | |
39037602 A |
380 | if (dolz4) { |
381 | rval = (int)lz4raw_decode_buffer(dest, PAGE_SIZE, source, csize, &compressor_dscratch->lz4decodestate[0]); | |
0a7de745 A |
382 | VM_DECOMPRESSOR_STAT(compressor_stats.lz4_decompressions += 1); |
383 | VM_DECOMPRESSOR_STAT(compressor_stats.lz4_decompressed_bytes += csize); | |
5ba3f43e A |
384 | #if DEVELOPMENT || DEBUG |
385 | uint32_t *d32 = dest; | |
386 | #endif | |
387 | assertf(rval == PAGE_SIZE, "LZ4 decode: size != pgsize %d, header: 0x%x, 0x%x, 0x%x", | |
0a7de745 | 388 | rval, *d32, *(d32 + 1), *(d32 + 2)); |
39037602 A |
389 | } else { |
390 | assert(ccodec == CCWK); | |
5ba3f43e | 391 | |
39037602 | 392 | WKdmD(source, dest, &compressor_dscratch->wkdecompscratch[0], csize); |
5ba3f43e | 393 | |
0a7de745 A |
394 | VM_DECOMPRESSOR_STAT(compressor_stats.wk_decompressions += 1); |
395 | VM_DECOMPRESSOR_STAT(compressor_stats.wk_decompressed_bytes += csize); | |
39037602 A |
396 | } |
397 | } | |
398 | #pragma clang diagnostic pop | |
399 | ||
0a7de745 A |
400 | uint32_t |
401 | vm_compressor_get_encode_scratch_size(void) | |
402 | { | |
39037602 A |
403 | if (vm_compressor_current_codec != VM_COMPRESSOR_DEFAULT_CODEC) { |
404 | return MAX(sizeof(compressor_encode_scratch_t), WKdm_SCRATCH_BUF_SIZE_INTERNAL); | |
405 | } else { | |
406 | return WKdm_SCRATCH_BUF_SIZE_INTERNAL; | |
407 | } | |
408 | } | |
409 | ||
0a7de745 A |
410 | uint32_t |
411 | vm_compressor_get_decode_scratch_size(void) | |
412 | { | |
39037602 A |
413 | if (vm_compressor_current_codec != VM_COMPRESSOR_DEFAULT_CODEC) { |
414 | return MAX(sizeof(compressor_decode_scratch_t), WKdm_SCRATCH_BUF_SIZE_INTERNAL); | |
415 | } else { | |
416 | return WKdm_SCRATCH_BUF_SIZE_INTERNAL; | |
417 | } | |
418 | } | |
419 | ||
420 | ||
0a7de745 A |
421 | int |
422 | vm_compressor_algorithm(void) | |
423 | { | |
39037602 A |
424 | return vm_compressor_current_codec; |
425 | } | |
426 | ||
0a7de745 A |
427 | void |
428 | vm_compressor_algorithm_init(void) | |
429 | { | |
39037602 A |
430 | vm_compressor_mode_t new_codec = VM_COMPRESSOR_DEFAULT_CODEC; |
431 | ||
5ba3f43e A |
432 | #if defined(__arm64__) |
433 | new_codec = CMODE_HYB; | |
434 | ||
435 | if (PAGE_SIZE == 16384) { | |
436 | vmctune.lz4_threshold = 12288; | |
437 | } | |
438 | #endif | |
39037602 A |
439 | |
440 | PE_parse_boot_argn("vm_compressor_codec", &new_codec, sizeof(new_codec)); | |
441 | assertf(((new_codec == VM_COMPRESSOR_DEFAULT_CODEC) || (new_codec == CMODE_WK) || | |
0a7de745 | 442 | (new_codec == CMODE_LZ4) || (new_codec == CMODE_HYB)), |
39037602 A |
443 | "Invalid VM compression codec: %u", new_codec); |
444 | ||
0a7de745 | 445 | #if defined(__arm__) || defined(__arm64__) |
5ba3f43e A |
446 | uint32_t tmpc; |
447 | if (PE_parse_boot_argn("-vm_compressor_wk", &tmpc, sizeof(tmpc))) { | |
39037602 | 448 | new_codec = VM_COMPRESSOR_DEFAULT_CODEC; |
5ba3f43e | 449 | } else if (PE_parse_boot_argn("-vm_compressor_hybrid", &tmpc, sizeof(tmpc))) { |
39037602 A |
450 | new_codec = CMODE_HYB; |
451 | } | |
452 | ||
5ba3f43e A |
453 | vm_compressor_current_codec = new_codec; |
454 | #endif /* arm/arm64 */ | |
39037602 | 455 | } |