]>
Commit | Line | Data |
---|---|---|
39037602 A |
1 | /* |
2 | * Copyright (c) 2010-2016 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | /* This module implements a hybrid/adaptive compression scheme, using WKdm where | |
29 | * profitable and, currently, an LZ4 variant elsewhere. | |
30 | * (Created 2016, Derek Kumar) | |
31 | */ | |
32 | #include "lz4.h" | |
33 | #include "WKdm_new.h" | |
34 | #include <vm/vm_compressor_algorithms.h> | |
35 | #include <vm/vm_compressor.h> | |
36 | ||
37 | #define MZV_MAGIC (17185) | |
5ba3f43e A |
38 | #if defined(__arm64__) |
39 | #include <arm/proc_reg.h> | |
40 | #endif | |
41 | ||
39037602 A |
42 | #define LZ4_SCRATCH_ALIGN (64) |
43 | #define WKC_SCRATCH_ALIGN (64) | |
44 | ||
45 | #define LZ4_SCRATCH_ALIGN (64) | |
46 | #define WKC_SCRATCH_ALIGN (64) | |
47 | ||
48 | #define memcpy_T_NT memcpy | |
49 | #define memcpy_NT_T memcpy | |
50 | ||
51 | typedef union { | |
52 | uint8_t lz4state[lz4_encode_scratch_size]__attribute((aligned(LZ4_SCRATCH_ALIGN))); | |
53 | uint8_t wkscratch[0] __attribute((aligned(WKC_SCRATCH_ALIGN))); // TODO | |
54 | } compressor_encode_scratch_t; | |
55 | ||
56 | typedef union { | |
57 | uint8_t lz4decodestate[lz4_encode_scratch_size]__attribute((aligned(64))); | |
58 | uint8_t wkdecompscratch[0] __attribute((aligned(64))); | |
59 | } compressor_decode_scratch_t; | |
60 | ||
61 | typedef struct { | |
62 | uint16_t lz4_selection_run; | |
63 | uint16_t lz4_run_length; | |
64 | uint16_t lz4_preselects; | |
65 | uint32_t lz4_total_preselects; | |
66 | uint16_t lz4_failure_skips; | |
67 | uint32_t lz4_total_failure_skips; | |
68 | uint16_t lz4_failure_run_length; | |
69 | uint16_t lz4_total_unprofitables; | |
70 | uint32_t lz4_total_negatives; | |
71 | uint32_t lz4_total_failures; | |
72 | } compressor_state_t; | |
73 | ||
74 | compressor_tuneables_t vmctune = { | |
75 | .lz4_threshold = 2048, | |
76 | .wkdm_reeval_threshold = 1536, | |
77 | .lz4_max_failure_skips = 0, | |
78 | .lz4_max_failure_run_length = ~0U, | |
79 | .lz4_max_preselects = 0, | |
80 | .lz4_run_preselection_threshold = ~0U, | |
81 | .lz4_run_continue_bytes = 0, | |
82 | .lz4_profitable_bytes = 0, | |
83 | }; | |
84 | ||
85 | compressor_state_t vmcstate = { | |
86 | .lz4_selection_run = 0, | |
87 | .lz4_run_length = 0, | |
88 | .lz4_preselects = 0, | |
89 | .lz4_total_preselects = 0, | |
90 | .lz4_failure_skips = 0, | |
91 | .lz4_total_failure_skips = 0, | |
92 | .lz4_failure_run_length = 0, | |
93 | .lz4_total_unprofitables = 0, | |
94 | .lz4_total_negatives = 0, | |
95 | }; | |
96 | ||
97 | compressor_stats_t compressor_stats; | |
98 | ||
99 | enum compressor_preselect_t { | |
100 | CPRESELLZ4 = 0, | |
101 | CSKIPLZ4 = 1, | |
102 | CPRESELWK = 2, | |
103 | }; | |
104 | ||
105 | vm_compressor_mode_t vm_compressor_current_codec = VM_COMPRESSOR_DEFAULT_CODEC; | |
106 | ||
5ba3f43e A |
107 | boolean_t vm_compressor_force_sw_wkdm = FALSE; |
108 | ||
39037602 A |
109 | boolean_t verbose = FALSE; |
110 | ||
5ba3f43e A |
111 | #define VMDBGSTAT (DEBUG) |
112 | #if VMDBGSTATS | |
113 | #define VM_COMPRESSOR_STAT_DBG(x...) \ | |
39037602 | 114 | do { \ |
5ba3f43e | 115 | (x); \ |
39037602 | 116 | } while(0) |
5ba3f43e A |
117 | #else |
118 | #define VM_COMPRESSOR_STAT_DBG(x...) \ | |
119 | do { \ | |
120 | } while (0) | |
121 | #endif | |
122 | ||
123 | #define VMCSTATS (DEVELOPMENT || DEBUG) | |
124 | #if VMCSTATS | |
39037602 A |
125 | #define VM_COMPRESSOR_STAT(x...) \ |
126 | do { \ | |
127 | (x); \ | |
128 | } while(0) | |
129 | //TODO make atomic where needed, decompression paths | |
130 | #define VM_DECOMPRESSOR_STAT(x...) \ | |
131 | do { \ | |
132 | (x); \ | |
133 | } while(0) | |
134 | #else | |
39037602 A |
135 | #define VM_COMPRESSOR_STAT(x...) \ |
136 | do { \ | |
137 | }while (0) | |
138 | #define VM_DECOMPRESSOR_STAT(x...) \ | |
139 | do { \ | |
140 | }while (0) | |
141 | #endif | |
142 | ||
143 | static inline enum compressor_preselect_t compressor_preselect(void) { | |
144 | if (vmcstate.lz4_failure_skips >= vmctune.lz4_max_failure_skips) { | |
145 | vmcstate.lz4_failure_skips = 0; | |
146 | vmcstate.lz4_failure_run_length = 0; | |
147 | } | |
148 | ||
149 | if (vmcstate.lz4_failure_run_length >= vmctune.lz4_max_failure_run_length) { | |
150 | vmcstate.lz4_failure_skips++; | |
151 | vmcstate.lz4_total_failure_skips++; | |
152 | return CSKIPLZ4; | |
153 | } | |
154 | ||
155 | if (vmcstate.lz4_preselects >= vmctune.lz4_max_preselects) { | |
156 | vmcstate.lz4_preselects = 0; | |
157 | return CPRESELWK; | |
158 | } | |
159 | ||
160 | if (vmcstate.lz4_run_length >= vmctune.lz4_run_preselection_threshold) { | |
161 | vmcstate.lz4_preselects++; | |
162 | vmcstate.lz4_total_preselects++; | |
163 | return CPRESELLZ4; | |
164 | } | |
165 | return CPRESELWK; | |
166 | } | |
167 | ||
168 | static inline void compressor_selector_update(int lz4sz, int didwk, int wksz) { | |
169 | VM_COMPRESSOR_STAT(compressor_stats.lz4_compressions++); | |
170 | ||
171 | if (lz4sz == 0) { | |
172 | VM_COMPRESSOR_STAT(compressor_stats.lz4_compressed_bytes+=PAGE_SIZE); | |
173 | VM_COMPRESSOR_STAT(compressor_stats.lz4_compression_failures++); | |
174 | vmcstate.lz4_failure_run_length++; | |
175 | VM_COMPRESSOR_STAT(vmcstate.lz4_total_failures++); | |
176 | vmcstate.lz4_run_length = 0; | |
177 | } else { | |
178 | vmcstate.lz4_failure_run_length = 0; | |
179 | ||
180 | VM_COMPRESSOR_STAT(compressor_stats.lz4_compressed_bytes+=lz4sz); | |
181 | ||
182 | if (lz4sz <= vmctune.wkdm_reeval_threshold) { | |
183 | vmcstate.lz4_run_length = 0; | |
184 | } else { | |
185 | if (!didwk) { | |
186 | vmcstate.lz4_run_length++; | |
187 | } | |
188 | } | |
189 | ||
190 | if (didwk) { | |
191 | if (__probable(wksz > lz4sz)) { | |
192 | uint32_t lz4delta = wksz - lz4sz; | |
193 | VM_COMPRESSOR_STAT(compressor_stats.lz4_wk_compression_delta+=lz4delta); | |
194 | if (lz4delta >= vmctune.lz4_run_continue_bytes) { | |
195 | vmcstate.lz4_run_length++; | |
196 | } else if (lz4delta <= vmctune.lz4_profitable_bytes) { | |
197 | vmcstate.lz4_failure_run_length++; | |
198 | VM_COMPRESSOR_STAT(vmcstate.lz4_total_unprofitables++); | |
199 | vmcstate.lz4_run_length = 0; | |
200 | } else { | |
201 | vmcstate.lz4_run_length = 0; | |
202 | } | |
203 | } else { | |
204 | VM_COMPRESSOR_STAT(compressor_stats.lz4_wk_compression_negative_delta+=(lz4sz-wksz)); | |
205 | vmcstate.lz4_failure_run_length++; | |
206 | VM_COMPRESSOR_STAT(vmcstate.lz4_total_negatives++); | |
207 | vmcstate.lz4_run_length = 0; | |
208 | } | |
209 | } | |
210 | } | |
211 | } | |
212 | ||
5ba3f43e A |
213 | |
214 | static inline void WKdm_hv(uint32_t *wkbuf) { | |
215 | #if DEVELOPMENT || DEBUG | |
216 | uint32_t *inw = (uint32_t *) wkbuf; | |
217 | if (*inw != MZV_MAGIC) { | |
218 | if ((*inw | *(inw + 1) | *(inw + 2)) & 0xFFFF0000) { | |
219 | panic("WKdm(%p): invalid header 0x%x 0x%x 0x%x\n", wkbuf, *inw, *(inw +1), *(inw+2)); | |
220 | } | |
221 | } | |
222 | #else /* DEVELOPMENT || DEBUG */ | |
223 | (void) wkbuf; | |
224 | #endif | |
225 | } | |
226 | ||
39037602 A |
227 | //todo fix clang diagnostic |
228 | #pragma clang diagnostic push | |
229 | #pragma clang diagnostic ignored "-Wincompatible-pointer-types" | |
230 | ||
5ba3f43e A |
231 | #if defined(__arm64__) |
232 | #endif | |
233 | ||
39037602 | 234 | static inline void WKdmD(WK_word* src_buf, WK_word* dest_buf, WK_word* scratch, unsigned int bytes) { |
5ba3f43e A |
235 | #if defined(__arm64__) |
236 | #endif | |
237 | WKdm_hv(src_buf); | |
238 | #if defined(__arm64__) | |
239 | if (PAGE_SIZE == 4096) { | |
240 | WKdm_decompress_4k(src_buf, dest_buf, scratch, bytes); | |
241 | } else { | |
242 | __unused uint64_t wdsstart; | |
243 | ||
244 | VM_COMPRESSOR_STAT_DBG(wdsstart = mach_absolute_time()); | |
245 | WKdm_decompress_16k(src_buf, dest_buf, scratch, bytes); | |
246 | ||
247 | VM_COMPRESSOR_STAT_DBG(compressor_stats.wks_dabstime += mach_absolute_time() - wdsstart); | |
248 | VM_COMPRESSOR_STAT(compressor_stats.wks_decompressions++); | |
39037602 | 249 | } |
5ba3f43e | 250 | #else /* !defined arm64 */ |
39037602 | 251 | WKdm_decompress_new(src_buf, dest_buf, scratch, bytes); |
5ba3f43e | 252 | #endif |
39037602 | 253 | } |
5ba3f43e A |
254 | #if DEVELOPMENT || DEBUG |
255 | int precompy, wkswhw; | |
256 | #endif | |
39037602 | 257 | |
5ba3f43e A |
258 | static inline int WKdmC(WK_word* src_buf, WK_word* dest_buf, WK_word* scratch, boolean_t *incomp_copy, unsigned int limit) { |
259 | (void)incomp_copy; | |
260 | int wkcval; | |
261 | #if defined(__arm64__) | |
262 | if (PAGE_SIZE == 4096) { | |
263 | wkcval = WKdm_compress_4k(src_buf, dest_buf, scratch, limit); | |
264 | } else { | |
265 | __unused uint64_t wcswstart; | |
266 | ||
267 | VM_COMPRESSOR_STAT_DBG(wcswstart = mach_absolute_time()); | |
268 | ||
269 | int wkswsz = WKdm_compress_16k(src_buf, dest_buf, scratch, limit); | |
270 | ||
271 | VM_COMPRESSOR_STAT_DBG(compressor_stats.wks_cabstime += mach_absolute_time() - wcswstart); | |
272 | VM_COMPRESSOR_STAT(compressor_stats.wks_compressions++); | |
273 | wkcval = wkswsz; | |
274 | } | |
275 | #else | |
276 | wkcval = WKdm_compress_new(src_buf, dest_buf, scratch, limit); | |
277 | #endif | |
278 | return wkcval; | |
39037602 A |
279 | } |
280 | ||
281 | ||
5ba3f43e | 282 | int metacompressor(const uint8_t *in, uint8_t *cdst, int32_t outbufsz, uint16_t *codec, void *cscratchin, boolean_t *incomp_copy) { |
39037602 A |
283 | int sz = -1; |
284 | int dowk = FALSE, dolz4 = FALSE, skiplz4 = FALSE; | |
285 | int insize = PAGE_SIZE; | |
286 | compressor_encode_scratch_t *cscratch = cscratchin; | |
287 | ||
288 | if (vm_compressor_current_codec == CMODE_WK) { | |
289 | dowk = TRUE; | |
290 | } else if (vm_compressor_current_codec == CMODE_LZ4) { | |
291 | dolz4 = TRUE; | |
292 | } else if (vm_compressor_current_codec == CMODE_HYB) { | |
293 | enum compressor_preselect_t presel = compressor_preselect(); | |
294 | if (presel == CPRESELLZ4) { | |
295 | dolz4 = TRUE; | |
296 | goto lz4compress; | |
297 | } else if (presel == CSKIPLZ4) { | |
298 | dowk = TRUE; | |
299 | skiplz4 = TRUE; | |
300 | } else { | |
301 | assert(presel == CPRESELWK); | |
302 | dowk = TRUE; | |
303 | } | |
304 | } | |
305 | ||
306 | if (dowk) { | |
307 | *codec = CCWK; | |
39037602 | 308 | VM_COMPRESSOR_STAT(compressor_stats.wk_compressions++); |
5ba3f43e | 309 | sz = WKdmC(in, cdst, &cscratch->wkscratch[0], incomp_copy, outbufsz); |
39037602 | 310 | |
39037602 A |
311 | if (sz == -1) { |
312 | VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total+=PAGE_SIZE); | |
313 | VM_COMPRESSOR_STAT(compressor_stats.wk_compression_failures++); | |
314 | ||
315 | if (vm_compressor_current_codec == CMODE_HYB) { | |
316 | goto lz4eval; | |
317 | } | |
318 | goto cexit; | |
319 | } else if (sz == 0) { | |
320 | VM_COMPRESSOR_STAT(compressor_stats.wk_sv_compressions++); | |
5ba3f43e | 321 | VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total+=4); |
39037602 A |
322 | } else { |
323 | VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total+=sz); | |
324 | } | |
325 | } | |
326 | lz4eval: | |
327 | if (vm_compressor_current_codec == CMODE_HYB) { | |
328 | if (((sz == -1) || (sz >= vmctune.lz4_threshold)) && (skiplz4 == FALSE)) { | |
329 | dolz4 = TRUE; | |
330 | } else { | |
5ba3f43e A |
331 | #if DEVELOPMENT || DEBUG |
332 | int wkc = (sz == -1) ? PAGE_SIZE : sz; | |
333 | #endif | |
39037602 A |
334 | VM_COMPRESSOR_STAT(compressor_stats.wk_compressions_exclusive++); |
335 | VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_exclusive+=wkc); | |
336 | goto cexit; | |
337 | } | |
338 | } | |
339 | ||
340 | lz4compress: | |
341 | ||
342 | if (dolz4) { | |
343 | if (sz == -1) { | |
344 | sz = PAGE_SIZE; | |
345 | } | |
346 | int wksz = sz; | |
347 | *codec = CCLZ4; | |
348 | ||
349 | sz = (int) lz4raw_encode_buffer(cdst, outbufsz, in, insize, &cscratch->lz4state[0]); | |
350 | ||
39037602 A |
351 | compressor_selector_update(sz, dowk, wksz); |
352 | if (sz == 0) { | |
353 | sz = -1; | |
354 | goto cexit; | |
355 | } | |
356 | } | |
357 | cexit: | |
358 | return sz; | |
359 | } | |
360 | ||
361 | void metadecompressor(const uint8_t *source, uint8_t *dest, uint32_t csize, uint16_t ccodec, void *compressor_dscratchin) { | |
362 | int dolz4 = (ccodec == CCLZ4); | |
363 | int rval; | |
364 | compressor_decode_scratch_t *compressor_dscratch = compressor_dscratchin; | |
365 | ||
366 | if (dolz4) { | |
367 | rval = (int)lz4raw_decode_buffer(dest, PAGE_SIZE, source, csize, &compressor_dscratch->lz4decodestate[0]); | |
368 | VM_DECOMPRESSOR_STAT(compressor_stats.lz4_decompressions+=1); | |
369 | VM_DECOMPRESSOR_STAT(compressor_stats.lz4_decompressed_bytes+=csize); | |
5ba3f43e A |
370 | #if DEVELOPMENT || DEBUG |
371 | uint32_t *d32 = dest; | |
372 | #endif | |
373 | assertf(rval == PAGE_SIZE, "LZ4 decode: size != pgsize %d, header: 0x%x, 0x%x, 0x%x", | |
374 | rval, *d32, *(d32+1), *(d32+2)); | |
39037602 A |
375 | } else { |
376 | assert(ccodec == CCWK); | |
5ba3f43e | 377 | |
39037602 | 378 | WKdmD(source, dest, &compressor_dscratch->wkdecompscratch[0], csize); |
5ba3f43e | 379 | |
39037602 A |
380 | VM_DECOMPRESSOR_STAT(compressor_stats.wk_decompressions+=1); |
381 | VM_DECOMPRESSOR_STAT(compressor_stats.wk_decompressed_bytes+=csize); | |
382 | } | |
383 | } | |
384 | #pragma clang diagnostic pop | |
385 | ||
386 | uint32_t vm_compressor_get_encode_scratch_size(void) { | |
387 | if (vm_compressor_current_codec != VM_COMPRESSOR_DEFAULT_CODEC) { | |
388 | return MAX(sizeof(compressor_encode_scratch_t), WKdm_SCRATCH_BUF_SIZE_INTERNAL); | |
389 | } else { | |
390 | return WKdm_SCRATCH_BUF_SIZE_INTERNAL; | |
391 | } | |
392 | } | |
393 | ||
394 | uint32_t vm_compressor_get_decode_scratch_size(void) { | |
395 | if (vm_compressor_current_codec != VM_COMPRESSOR_DEFAULT_CODEC) { | |
396 | return MAX(sizeof(compressor_decode_scratch_t), WKdm_SCRATCH_BUF_SIZE_INTERNAL); | |
397 | } else { | |
398 | return WKdm_SCRATCH_BUF_SIZE_INTERNAL; | |
399 | } | |
400 | } | |
401 | ||
402 | ||
403 | int vm_compressor_algorithm(void) { | |
404 | return vm_compressor_current_codec; | |
405 | } | |
406 | ||
407 | void vm_compressor_algorithm_init(void) { | |
408 | vm_compressor_mode_t new_codec = VM_COMPRESSOR_DEFAULT_CODEC; | |
409 | ||
5ba3f43e A |
410 | #if defined(__arm64__) |
411 | new_codec = CMODE_HYB; | |
412 | ||
413 | if (PAGE_SIZE == 16384) { | |
414 | vmctune.lz4_threshold = 12288; | |
415 | } | |
416 | #endif | |
39037602 A |
417 | |
418 | PE_parse_boot_argn("vm_compressor_codec", &new_codec, sizeof(new_codec)); | |
419 | assertf(((new_codec == VM_COMPRESSOR_DEFAULT_CODEC) || (new_codec == CMODE_WK) || | |
420 | (new_codec == CMODE_LZ4) || (new_codec = CMODE_HYB)), | |
421 | "Invalid VM compression codec: %u", new_codec); | |
422 | ||
5ba3f43e A |
423 | #if defined(__arm__)||defined(__arm64__) |
424 | uint32_t tmpc; | |
425 | if (PE_parse_boot_argn("-vm_compressor_wk", &tmpc, sizeof(tmpc))) { | |
39037602 | 426 | new_codec = VM_COMPRESSOR_DEFAULT_CODEC; |
5ba3f43e | 427 | } else if (PE_parse_boot_argn("-vm_compressor_hybrid", &tmpc, sizeof(tmpc))) { |
39037602 A |
428 | new_codec = CMODE_HYB; |
429 | } | |
430 | ||
5ba3f43e A |
431 | vm_compressor_current_codec = new_codec; |
432 | #endif /* arm/arm64 */ | |
39037602 | 433 | } |