]> git.saurik.com Git - apple/xnu.git/blame - osfmk/vm/vm_compressor_algorithms.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / osfmk / vm / vm_compressor_algorithms.c
CommitLineData
39037602
A
1/*
2 * Copyright (c) 2010-2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
0a7de745 5 *
39037602
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
0a7de745 14 *
39037602
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
0a7de745 17 *
39037602
A
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
0a7de745 25 *
39037602
A
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* This module implements a hybrid/adaptive compression scheme, using WKdm where
29 * profitable and, currently, an LZ4 variant elsewhere.
30 * (Created 2016, Derek Kumar)
31 */
32#include "lz4.h"
33#include "WKdm_new.h"
34#include <vm/vm_compressor_algorithms.h>
35#include <vm/vm_compressor.h>
36
37#define MZV_MAGIC (17185)
5ba3f43e
A
38#if defined(__arm64__)
39#include <arm/proc_reg.h>
40#endif
41
39037602
A
42#define LZ4_SCRATCH_ALIGN (64)
43#define WKC_SCRATCH_ALIGN (64)
44
45#define LZ4_SCRATCH_ALIGN (64)
46#define WKC_SCRATCH_ALIGN (64)
47
48#define memcpy_T_NT memcpy
49#define memcpy_NT_T memcpy
50
51typedef union {
52 uint8_t lz4state[lz4_encode_scratch_size]__attribute((aligned(LZ4_SCRATCH_ALIGN)));
53 uint8_t wkscratch[0] __attribute((aligned(WKC_SCRATCH_ALIGN))); // TODO
54} compressor_encode_scratch_t;
55
56typedef union {
57 uint8_t lz4decodestate[lz4_encode_scratch_size]__attribute((aligned(64)));
58 uint8_t wkdecompscratch[0] __attribute((aligned(64)));
59} compressor_decode_scratch_t;
60
61typedef struct {
62 uint16_t lz4_selection_run;
63 uint16_t lz4_run_length;
64 uint16_t lz4_preselects;
65 uint32_t lz4_total_preselects;
66 uint16_t lz4_failure_skips;
67 uint32_t lz4_total_failure_skips;
68 uint16_t lz4_failure_run_length;
69 uint16_t lz4_total_unprofitables;
70 uint32_t lz4_total_negatives;
71 uint32_t lz4_total_failures;
72} compressor_state_t;
73
74compressor_tuneables_t vmctune = {
75 .lz4_threshold = 2048,
76 .wkdm_reeval_threshold = 1536,
77 .lz4_max_failure_skips = 0,
78 .lz4_max_failure_run_length = ~0U,
79 .lz4_max_preselects = 0,
80 .lz4_run_preselection_threshold = ~0U,
81 .lz4_run_continue_bytes = 0,
82 .lz4_profitable_bytes = 0,
83};
84
85compressor_state_t vmcstate = {
86 .lz4_selection_run = 0,
87 .lz4_run_length = 0,
88 .lz4_preselects = 0,
89 .lz4_total_preselects = 0,
90 .lz4_failure_skips = 0,
91 .lz4_total_failure_skips = 0,
92 .lz4_failure_run_length = 0,
93 .lz4_total_unprofitables = 0,
94 .lz4_total_negatives = 0,
95};
96
97compressor_stats_t compressor_stats;
98
99enum compressor_preselect_t {
100 CPRESELLZ4 = 0,
101 CSKIPLZ4 = 1,
102 CPRESELWK = 2,
103};
104
105vm_compressor_mode_t vm_compressor_current_codec = VM_COMPRESSOR_DEFAULT_CODEC;
106
5ba3f43e
A
107boolean_t vm_compressor_force_sw_wkdm = FALSE;
108
39037602
A
109boolean_t verbose = FALSE;
110
5ba3f43e
A
111#define VMDBGSTAT (DEBUG)
112#if VMDBGSTATS
0a7de745
A
113#define VM_COMPRESSOR_STAT_DBG(x...) \
114 do { \
115 (x); \
39037602 116 } while(0)
5ba3f43e 117#else
0a7de745
A
118#define VM_COMPRESSOR_STAT_DBG(x...) \
119 do { \
5ba3f43e
A
120 } while (0)
121#endif
122
123#define VMCSTATS (DEVELOPMENT || DEBUG)
124#if VMCSTATS
0a7de745
A
125#define VM_COMPRESSOR_STAT(x...) \
126 do { \
127 (x); \
39037602
A
128 } while(0)
129//TODO make atomic where needed, decompression paths
0a7de745
A
130#define VM_DECOMPRESSOR_STAT(x...) \
131 do { \
132 (x); \
39037602
A
133 } while(0)
134#else
0a7de745
A
135#define VM_COMPRESSOR_STAT(x...) \
136 do { \
39037602 137 }while (0)
0a7de745
A
138#define VM_DECOMPRESSOR_STAT(x...) \
139 do { \
39037602
A
140 }while (0)
141#endif
142
0a7de745
A
143static inline enum compressor_preselect_t
144compressor_preselect(void)
145{
39037602
A
146 if (vmcstate.lz4_failure_skips >= vmctune.lz4_max_failure_skips) {
147 vmcstate.lz4_failure_skips = 0;
148 vmcstate.lz4_failure_run_length = 0;
149 }
150
151 if (vmcstate.lz4_failure_run_length >= vmctune.lz4_max_failure_run_length) {
152 vmcstate.lz4_failure_skips++;
153 vmcstate.lz4_total_failure_skips++;
154 return CSKIPLZ4;
155 }
156
157 if (vmcstate.lz4_preselects >= vmctune.lz4_max_preselects) {
158 vmcstate.lz4_preselects = 0;
159 return CPRESELWK;
160 }
161
162 if (vmcstate.lz4_run_length >= vmctune.lz4_run_preselection_threshold) {
163 vmcstate.lz4_preselects++;
164 vmcstate.lz4_total_preselects++;
165 return CPRESELLZ4;
166 }
167 return CPRESELWK;
168}
169
0a7de745
A
170static inline void
171compressor_selector_update(int lz4sz, int didwk, int wksz)
172{
39037602
A
173 VM_COMPRESSOR_STAT(compressor_stats.lz4_compressions++);
174
175 if (lz4sz == 0) {
0a7de745 176 VM_COMPRESSOR_STAT(compressor_stats.lz4_compressed_bytes += PAGE_SIZE);
39037602
A
177 VM_COMPRESSOR_STAT(compressor_stats.lz4_compression_failures++);
178 vmcstate.lz4_failure_run_length++;
179 VM_COMPRESSOR_STAT(vmcstate.lz4_total_failures++);
180 vmcstate.lz4_run_length = 0;
181 } else {
182 vmcstate.lz4_failure_run_length = 0;
183
0a7de745 184 VM_COMPRESSOR_STAT(compressor_stats.lz4_compressed_bytes += lz4sz);
39037602
A
185
186 if (lz4sz <= vmctune.wkdm_reeval_threshold) {
187 vmcstate.lz4_run_length = 0;
188 } else {
189 if (!didwk) {
190 vmcstate.lz4_run_length++;
191 }
192 }
193
194 if (didwk) {
195 if (__probable(wksz > lz4sz)) {
196 uint32_t lz4delta = wksz - lz4sz;
0a7de745 197 VM_COMPRESSOR_STAT(compressor_stats.lz4_wk_compression_delta += lz4delta);
39037602
A
198 if (lz4delta >= vmctune.lz4_run_continue_bytes) {
199 vmcstate.lz4_run_length++;
200 } else if (lz4delta <= vmctune.lz4_profitable_bytes) {
201 vmcstate.lz4_failure_run_length++;
202 VM_COMPRESSOR_STAT(vmcstate.lz4_total_unprofitables++);
203 vmcstate.lz4_run_length = 0;
204 } else {
205 vmcstate.lz4_run_length = 0;
206 }
207 } else {
0a7de745 208 VM_COMPRESSOR_STAT(compressor_stats.lz4_wk_compression_negative_delta += (lz4sz - wksz));
39037602
A
209 vmcstate.lz4_failure_run_length++;
210 VM_COMPRESSOR_STAT(vmcstate.lz4_total_negatives++);
211 vmcstate.lz4_run_length = 0;
212 }
213 }
214 }
215}
216
5ba3f43e 217
0a7de745
A
218static inline void
219WKdm_hv(uint32_t *wkbuf)
220{
5ba3f43e
A
221#if DEVELOPMENT || DEBUG
222 uint32_t *inw = (uint32_t *) wkbuf;
223 if (*inw != MZV_MAGIC) {
224 if ((*inw | *(inw + 1) | *(inw + 2)) & 0xFFFF0000) {
0a7de745 225 panic("WKdm(%p): invalid header 0x%x 0x%x 0x%x\n", wkbuf, *inw, *(inw + 1), *(inw + 2));
5ba3f43e
A
226 }
227 }
228#else /* DEVELOPMENT || DEBUG */
229 (void) wkbuf;
230#endif
231}
232
39037602
A
233//todo fix clang diagnostic
234#pragma clang diagnostic push
235#pragma clang diagnostic ignored "-Wincompatible-pointer-types"
236
5ba3f43e
A
237#if defined(__arm64__)
238#endif
239
f427ee49
A
240static inline bool
241WKdmD(WK_word* src_buf, WK_word* dest_buf, WK_word* scratch, unsigned int bytes,
242 __unused uint32_t *pop_count)
0a7de745 243{
5ba3f43e
A
244#if defined(__arm64__)
245#endif
246 WKdm_hv(src_buf);
247#if defined(__arm64__)
248 if (PAGE_SIZE == 4096) {
249 WKdm_decompress_4k(src_buf, dest_buf, scratch, bytes);
250 } else {
251 __unused uint64_t wdsstart;
252
253 VM_COMPRESSOR_STAT_DBG(wdsstart = mach_absolute_time());
254 WKdm_decompress_16k(src_buf, dest_buf, scratch, bytes);
255
256 VM_COMPRESSOR_STAT_DBG(compressor_stats.wks_dabstime += mach_absolute_time() - wdsstart);
257 VM_COMPRESSOR_STAT(compressor_stats.wks_decompressions++);
39037602 258 }
5ba3f43e 259#else /* !defined arm64 */
39037602 260 WKdm_decompress_new(src_buf, dest_buf, scratch, bytes);
5ba3f43e 261#endif
f427ee49 262 return true;
39037602 263}
5ba3f43e
A
264#if DEVELOPMENT || DEBUG
265int precompy, wkswhw;
266#endif
39037602 267
0a7de745 268static inline int
f427ee49
A
269WKdmC(WK_word* src_buf, WK_word* dest_buf, WK_word* scratch,
270 boolean_t *incomp_copy, unsigned int limit, __unused uint32_t *pop_count)
0a7de745 271{
5ba3f43e
A
272 (void)incomp_copy;
273 int wkcval;
274#if defined(__arm64__)
275 if (PAGE_SIZE == 4096) {
276 wkcval = WKdm_compress_4k(src_buf, dest_buf, scratch, limit);
277 } else {
278 __unused uint64_t wcswstart;
279
280 VM_COMPRESSOR_STAT_DBG(wcswstart = mach_absolute_time());
281
282 int wkswsz = WKdm_compress_16k(src_buf, dest_buf, scratch, limit);
283
284 VM_COMPRESSOR_STAT_DBG(compressor_stats.wks_cabstime += mach_absolute_time() - wcswstart);
285 VM_COMPRESSOR_STAT(compressor_stats.wks_compressions++);
286 wkcval = wkswsz;
287 }
288#else
289 wkcval = WKdm_compress_new(src_buf, dest_buf, scratch, limit);
290#endif
291 return wkcval;
39037602
A
292}
293
294
0a7de745 295int
f427ee49
A
296metacompressor(const uint8_t *in, uint8_t *cdst, int32_t outbufsz, uint16_t *codec,
297 void *cscratchin, boolean_t *incomp_copy, uint32_t *pop_count_p)
0a7de745 298{
39037602
A
299 int sz = -1;
300 int dowk = FALSE, dolz4 = FALSE, skiplz4 = FALSE;
301 int insize = PAGE_SIZE;
302 compressor_encode_scratch_t *cscratch = cscratchin;
f427ee49
A
303 /* Not all paths lead to an inline population count. */
304 uint32_t pop_count = C_SLOT_NO_POPCOUNT;
39037602
A
305
306 if (vm_compressor_current_codec == CMODE_WK) {
307 dowk = TRUE;
308 } else if (vm_compressor_current_codec == CMODE_LZ4) {
309 dolz4 = TRUE;
310 } else if (vm_compressor_current_codec == CMODE_HYB) {
311 enum compressor_preselect_t presel = compressor_preselect();
312 if (presel == CPRESELLZ4) {
313 dolz4 = TRUE;
314 goto lz4compress;
315 } else if (presel == CSKIPLZ4) {
316 dowk = TRUE;
317 skiplz4 = TRUE;
318 } else {
319 assert(presel == CPRESELWK);
320 dowk = TRUE;
321 }
322 }
323
324 if (dowk) {
325 *codec = CCWK;
39037602 326 VM_COMPRESSOR_STAT(compressor_stats.wk_compressions++);
f427ee49 327 sz = WKdmC(in, cdst, &cscratch->wkscratch[0], incomp_copy, outbufsz, &pop_count);
39037602 328
39037602 329 if (sz == -1) {
0a7de745 330 VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total += PAGE_SIZE);
39037602
A
331 VM_COMPRESSOR_STAT(compressor_stats.wk_compression_failures++);
332
333 if (vm_compressor_current_codec == CMODE_HYB) {
334 goto lz4eval;
335 }
336 goto cexit;
337 } else if (sz == 0) {
338 VM_COMPRESSOR_STAT(compressor_stats.wk_sv_compressions++);
0a7de745 339 VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total += 4);
39037602 340 } else {
0a7de745 341 VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_total += sz);
39037602
A
342 }
343 }
344lz4eval:
345 if (vm_compressor_current_codec == CMODE_HYB) {
346 if (((sz == -1) || (sz >= vmctune.lz4_threshold)) && (skiplz4 == FALSE)) {
347 dolz4 = TRUE;
348 } else {
5ba3f43e
A
349#if DEVELOPMENT || DEBUG
350 int wkc = (sz == -1) ? PAGE_SIZE : sz;
351#endif
39037602 352 VM_COMPRESSOR_STAT(compressor_stats.wk_compressions_exclusive++);
0a7de745 353 VM_COMPRESSOR_STAT(compressor_stats.wk_compressed_bytes_exclusive += wkc);
39037602
A
354 goto cexit;
355 }
356 }
357
358lz4compress:
359
360 if (dolz4) {
361 if (sz == -1) {
362 sz = PAGE_SIZE;
363 }
364 int wksz = sz;
365 *codec = CCLZ4;
366
367 sz = (int) lz4raw_encode_buffer(cdst, outbufsz, in, insize, &cscratch->lz4state[0]);
368
39037602
A
369 compressor_selector_update(sz, dowk, wksz);
370 if (sz == 0) {
371 sz = -1;
372 goto cexit;
373 }
374 }
375cexit:
f427ee49
A
376 assert(pop_count_p != NULL);
377 *pop_count_p = pop_count;
39037602
A
378 return sz;
379}
380
f427ee49
A
381bool
382metadecompressor(const uint8_t *source, uint8_t *dest, uint32_t csize,
383 uint16_t ccodec, void *compressor_dscratchin, uint32_t *pop_count_p)
0a7de745 384{
39037602
A
385 int dolz4 = (ccodec == CCLZ4);
386 int rval;
387 compressor_decode_scratch_t *compressor_dscratch = compressor_dscratchin;
f427ee49
A
388 /* Not all paths lead to an inline population count. */
389 uint32_t pop_count = C_SLOT_NO_POPCOUNT;
390 bool success;
0a7de745 391
39037602
A
392 if (dolz4) {
393 rval = (int)lz4raw_decode_buffer(dest, PAGE_SIZE, source, csize, &compressor_dscratch->lz4decodestate[0]);
0a7de745
A
394 VM_DECOMPRESSOR_STAT(compressor_stats.lz4_decompressions += 1);
395 VM_DECOMPRESSOR_STAT(compressor_stats.lz4_decompressed_bytes += csize);
5ba3f43e
A
396#if DEVELOPMENT || DEBUG
397 uint32_t *d32 = dest;
398#endif
399 assertf(rval == PAGE_SIZE, "LZ4 decode: size != pgsize %d, header: 0x%x, 0x%x, 0x%x",
0a7de745 400 rval, *d32, *(d32 + 1), *(d32 + 2));
f427ee49 401 success = (rval == PAGE_SIZE);
39037602
A
402 } else {
403 assert(ccodec == CCWK);
5ba3f43e 404
f427ee49 405 success = WKdmD(source, dest, &compressor_dscratch->wkdecompscratch[0], csize, &pop_count);
5ba3f43e 406
0a7de745
A
407 VM_DECOMPRESSOR_STAT(compressor_stats.wk_decompressions += 1);
408 VM_DECOMPRESSOR_STAT(compressor_stats.wk_decompressed_bytes += csize);
39037602 409 }
f427ee49
A
410
411 assert(pop_count_p != NULL);
412 *pop_count_p = pop_count;
413 return success;
39037602
A
414}
415#pragma clang diagnostic pop
416
0a7de745
A
417uint32_t
418vm_compressor_get_encode_scratch_size(void)
419{
39037602
A
420 if (vm_compressor_current_codec != VM_COMPRESSOR_DEFAULT_CODEC) {
421 return MAX(sizeof(compressor_encode_scratch_t), WKdm_SCRATCH_BUF_SIZE_INTERNAL);
422 } else {
423 return WKdm_SCRATCH_BUF_SIZE_INTERNAL;
424 }
425}
426
0a7de745
A
427uint32_t
428vm_compressor_get_decode_scratch_size(void)
429{
39037602
A
430 if (vm_compressor_current_codec != VM_COMPRESSOR_DEFAULT_CODEC) {
431 return MAX(sizeof(compressor_decode_scratch_t), WKdm_SCRATCH_BUF_SIZE_INTERNAL);
432 } else {
433 return WKdm_SCRATCH_BUF_SIZE_INTERNAL;
434 }
435}
436
437
0a7de745
A
438int
439vm_compressor_algorithm(void)
440{
39037602
A
441 return vm_compressor_current_codec;
442}
443
0a7de745
A
444void
445vm_compressor_algorithm_init(void)
446{
39037602
A
447 vm_compressor_mode_t new_codec = VM_COMPRESSOR_DEFAULT_CODEC;
448
5ba3f43e
A
449#if defined(__arm64__)
450 new_codec = CMODE_HYB;
451
452 if (PAGE_SIZE == 16384) {
453 vmctune.lz4_threshold = 12288;
454 }
455#endif
39037602
A
456
457 PE_parse_boot_argn("vm_compressor_codec", &new_codec, sizeof(new_codec));
458 assertf(((new_codec == VM_COMPRESSOR_DEFAULT_CODEC) || (new_codec == CMODE_WK) ||
0a7de745 459 (new_codec == CMODE_LZ4) || (new_codec == CMODE_HYB)),
39037602
A
460 "Invalid VM compression codec: %u", new_codec);
461
0a7de745 462#if defined(__arm__) || defined(__arm64__)
5ba3f43e
A
463 uint32_t tmpc;
464 if (PE_parse_boot_argn("-vm_compressor_wk", &tmpc, sizeof(tmpc))) {
39037602 465 new_codec = VM_COMPRESSOR_DEFAULT_CODEC;
5ba3f43e 466 } else if (PE_parse_boot_argn("-vm_compressor_hybrid", &tmpc, sizeof(tmpc))) {
39037602
A
467 new_codec = CMODE_HYB;
468 }
469
5ba3f43e
A
470 vm_compressor_current_codec = new_codec;
471#endif /* arm/arm64 */
39037602 472}