2 * Copyright (c) 2010-2016 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* This module implements a hybrid/adaptive compression scheme, using WKdm where
29 * profitable and, currently, an LZ4 variant elsewhere.
30 * (Created 2016, Derek Kumar)
34 #include <vm/vm_compressor_algorithms.h>
35 #include <vm/vm_compressor.h>
37 #define MZV_MAGIC (17185)
38 #if defined(__arm64__)
39 #include <arm/proc_reg.h>
42 #define LZ4_SCRATCH_ALIGN (64)
43 #define WKC_SCRATCH_ALIGN (64)
45 #define LZ4_SCRATCH_ALIGN (64)
46 #define WKC_SCRATCH_ALIGN (64)
48 #define memcpy_T_NT memcpy
49 #define memcpy_NT_T memcpy
52 uint8_t lz4state
[lz4_encode_scratch_size
]__attribute((aligned(LZ4_SCRATCH_ALIGN
)));
53 uint8_t wkscratch
[0] __attribute((aligned(WKC_SCRATCH_ALIGN
))); // TODO
54 } compressor_encode_scratch_t
;
57 uint8_t lz4decodestate
[lz4_encode_scratch_size
]__attribute((aligned(64)));
58 uint8_t wkdecompscratch
[0] __attribute((aligned(64)));
59 } compressor_decode_scratch_t
;
62 uint16_t lz4_selection_run
;
63 uint16_t lz4_run_length
;
64 uint16_t lz4_preselects
;
65 uint32_t lz4_total_preselects
;
66 uint16_t lz4_failure_skips
;
67 uint32_t lz4_total_failure_skips
;
68 uint16_t lz4_failure_run_length
;
69 uint16_t lz4_total_unprofitables
;
70 uint32_t lz4_total_negatives
;
71 uint32_t lz4_total_failures
;
74 compressor_tuneables_t vmctune
= {
75 .lz4_threshold
= 2048,
76 .wkdm_reeval_threshold
= 1536,
77 .lz4_max_failure_skips
= 0,
78 .lz4_max_failure_run_length
= ~0U,
79 .lz4_max_preselects
= 0,
80 .lz4_run_preselection_threshold
= ~0U,
81 .lz4_run_continue_bytes
= 0,
82 .lz4_profitable_bytes
= 0,
85 compressor_state_t vmcstate
= {
86 .lz4_selection_run
= 0,
89 .lz4_total_preselects
= 0,
90 .lz4_failure_skips
= 0,
91 .lz4_total_failure_skips
= 0,
92 .lz4_failure_run_length
= 0,
93 .lz4_total_unprofitables
= 0,
94 .lz4_total_negatives
= 0,
97 compressor_stats_t compressor_stats
;
99 enum compressor_preselect_t
{
105 vm_compressor_mode_t vm_compressor_current_codec
= VM_COMPRESSOR_DEFAULT_CODEC
;
107 boolean_t vm_compressor_force_sw_wkdm
= FALSE
;
109 boolean_t verbose
= FALSE
;
111 #define VMDBGSTAT (DEBUG)
113 #define VM_COMPRESSOR_STAT_DBG(x...) \
118 #define VM_COMPRESSOR_STAT_DBG(x...) \
123 #define VMCSTATS (DEVELOPMENT || DEBUG)
125 #define VM_COMPRESSOR_STAT(x...) \
129 //TODO make atomic where needed, decompression paths
130 #define VM_DECOMPRESSOR_STAT(x...) \
135 #define VM_COMPRESSOR_STAT(x...) \
138 #define VM_DECOMPRESSOR_STAT(x...) \
143 static inline enum compressor_preselect_t
144 compressor_preselect(void)
146 if (vmcstate
.lz4_failure_skips
>= vmctune
.lz4_max_failure_skips
) {
147 vmcstate
.lz4_failure_skips
= 0;
148 vmcstate
.lz4_failure_run_length
= 0;
151 if (vmcstate
.lz4_failure_run_length
>= vmctune
.lz4_max_failure_run_length
) {
152 vmcstate
.lz4_failure_skips
++;
153 vmcstate
.lz4_total_failure_skips
++;
157 if (vmcstate
.lz4_preselects
>= vmctune
.lz4_max_preselects
) {
158 vmcstate
.lz4_preselects
= 0;
162 if (vmcstate
.lz4_run_length
>= vmctune
.lz4_run_preselection_threshold
) {
163 vmcstate
.lz4_preselects
++;
164 vmcstate
.lz4_total_preselects
++;
171 compressor_selector_update(int lz4sz
, int didwk
, int wksz
)
173 VM_COMPRESSOR_STAT(compressor_stats
.lz4_compressions
++);
176 VM_COMPRESSOR_STAT(compressor_stats
.lz4_compressed_bytes
+= PAGE_SIZE
);
177 VM_COMPRESSOR_STAT(compressor_stats
.lz4_compression_failures
++);
178 vmcstate
.lz4_failure_run_length
++;
179 VM_COMPRESSOR_STAT(vmcstate
.lz4_total_failures
++);
180 vmcstate
.lz4_run_length
= 0;
182 vmcstate
.lz4_failure_run_length
= 0;
184 VM_COMPRESSOR_STAT(compressor_stats
.lz4_compressed_bytes
+= lz4sz
);
186 if (lz4sz
<= vmctune
.wkdm_reeval_threshold
) {
187 vmcstate
.lz4_run_length
= 0;
190 vmcstate
.lz4_run_length
++;
195 if (__probable(wksz
> lz4sz
)) {
196 uint32_t lz4delta
= wksz
- lz4sz
;
197 VM_COMPRESSOR_STAT(compressor_stats
.lz4_wk_compression_delta
+= lz4delta
);
198 if (lz4delta
>= vmctune
.lz4_run_continue_bytes
) {
199 vmcstate
.lz4_run_length
++;
200 } else if (lz4delta
<= vmctune
.lz4_profitable_bytes
) {
201 vmcstate
.lz4_failure_run_length
++;
202 VM_COMPRESSOR_STAT(vmcstate
.lz4_total_unprofitables
++);
203 vmcstate
.lz4_run_length
= 0;
205 vmcstate
.lz4_run_length
= 0;
208 VM_COMPRESSOR_STAT(compressor_stats
.lz4_wk_compression_negative_delta
+= (lz4sz
- wksz
));
209 vmcstate
.lz4_failure_run_length
++;
210 VM_COMPRESSOR_STAT(vmcstate
.lz4_total_negatives
++);
211 vmcstate
.lz4_run_length
= 0;
219 WKdm_hv(uint32_t *wkbuf
)
221 #if DEVELOPMENT || DEBUG
222 uint32_t *inw
= (uint32_t *) wkbuf
;
223 if (*inw
!= MZV_MAGIC
) {
224 if ((*inw
| *(inw
+ 1) | *(inw
+ 2)) & 0xFFFF0000) {
225 panic("WKdm(%p): invalid header 0x%x 0x%x 0x%x\n", wkbuf
, *inw
, *(inw
+ 1), *(inw
+ 2));
228 #else /* DEVELOPMENT || DEBUG */
233 //todo fix clang diagnostic
234 #pragma clang diagnostic push
235 #pragma clang diagnostic ignored "-Wincompatible-pointer-types"
237 #if defined(__arm64__)
241 WKdmD(WK_word
* src_buf
, WK_word
* dest_buf
, WK_word
* scratch
, unsigned int bytes
)
243 #if defined(__arm64__)
246 #if defined(__arm64__)
247 if (PAGE_SIZE
== 4096) {
248 WKdm_decompress_4k(src_buf
, dest_buf
, scratch
, bytes
);
250 __unused
uint64_t wdsstart
;
252 VM_COMPRESSOR_STAT_DBG(wdsstart
= mach_absolute_time());
253 WKdm_decompress_16k(src_buf
, dest_buf
, scratch
, bytes
);
255 VM_COMPRESSOR_STAT_DBG(compressor_stats
.wks_dabstime
+= mach_absolute_time() - wdsstart
);
256 VM_COMPRESSOR_STAT(compressor_stats
.wks_decompressions
++);
258 #else /* !defined arm64 */
259 WKdm_decompress_new(src_buf
, dest_buf
, scratch
, bytes
);
262 #if DEVELOPMENT || DEBUG
263 int precompy
, wkswhw
;
267 WKdmC(WK_word
* src_buf
, WK_word
* dest_buf
, WK_word
* scratch
, boolean_t
*incomp_copy
, unsigned int limit
)
271 #if defined(__arm64__)
272 if (PAGE_SIZE
== 4096) {
273 wkcval
= WKdm_compress_4k(src_buf
, dest_buf
, scratch
, limit
);
275 __unused
uint64_t wcswstart
;
277 VM_COMPRESSOR_STAT_DBG(wcswstart
= mach_absolute_time());
279 int wkswsz
= WKdm_compress_16k(src_buf
, dest_buf
, scratch
, limit
);
281 VM_COMPRESSOR_STAT_DBG(compressor_stats
.wks_cabstime
+= mach_absolute_time() - wcswstart
);
282 VM_COMPRESSOR_STAT(compressor_stats
.wks_compressions
++);
286 wkcval
= WKdm_compress_new(src_buf
, dest_buf
, scratch
, limit
);
293 metacompressor(const uint8_t *in
, uint8_t *cdst
, int32_t outbufsz
, uint16_t *codec
, void *cscratchin
, boolean_t
*incomp_copy
)
296 int dowk
= FALSE
, dolz4
= FALSE
, skiplz4
= FALSE
;
297 int insize
= PAGE_SIZE
;
298 compressor_encode_scratch_t
*cscratch
= cscratchin
;
300 if (vm_compressor_current_codec
== CMODE_WK
) {
302 } else if (vm_compressor_current_codec
== CMODE_LZ4
) {
304 } else if (vm_compressor_current_codec
== CMODE_HYB
) {
305 enum compressor_preselect_t presel
= compressor_preselect();
306 if (presel
== CPRESELLZ4
) {
309 } else if (presel
== CSKIPLZ4
) {
313 assert(presel
== CPRESELWK
);
320 VM_COMPRESSOR_STAT(compressor_stats
.wk_compressions
++);
321 sz
= WKdmC(in
, cdst
, &cscratch
->wkscratch
[0], incomp_copy
, outbufsz
);
324 VM_COMPRESSOR_STAT(compressor_stats
.wk_compressed_bytes_total
+= PAGE_SIZE
);
325 VM_COMPRESSOR_STAT(compressor_stats
.wk_compression_failures
++);
327 if (vm_compressor_current_codec
== CMODE_HYB
) {
331 } else if (sz
== 0) {
332 VM_COMPRESSOR_STAT(compressor_stats
.wk_sv_compressions
++);
333 VM_COMPRESSOR_STAT(compressor_stats
.wk_compressed_bytes_total
+= 4);
335 VM_COMPRESSOR_STAT(compressor_stats
.wk_compressed_bytes_total
+= sz
);
339 if (vm_compressor_current_codec
== CMODE_HYB
) {
340 if (((sz
== -1) || (sz
>= vmctune
.lz4_threshold
)) && (skiplz4
== FALSE
)) {
343 #if DEVELOPMENT || DEBUG
344 int wkc
= (sz
== -1) ? PAGE_SIZE
: sz
;
346 VM_COMPRESSOR_STAT(compressor_stats
.wk_compressions_exclusive
++);
347 VM_COMPRESSOR_STAT(compressor_stats
.wk_compressed_bytes_exclusive
+= wkc
);
361 sz
= (int) lz4raw_encode_buffer(cdst
, outbufsz
, in
, insize
, &cscratch
->lz4state
[0]);
363 compressor_selector_update(sz
, dowk
, wksz
);
374 metadecompressor(const uint8_t *source
, uint8_t *dest
, uint32_t csize
, uint16_t ccodec
, void *compressor_dscratchin
)
376 int dolz4
= (ccodec
== CCLZ4
);
378 compressor_decode_scratch_t
*compressor_dscratch
= compressor_dscratchin
;
381 rval
= (int)lz4raw_decode_buffer(dest
, PAGE_SIZE
, source
, csize
, &compressor_dscratch
->lz4decodestate
[0]);
382 VM_DECOMPRESSOR_STAT(compressor_stats
.lz4_decompressions
+= 1);
383 VM_DECOMPRESSOR_STAT(compressor_stats
.lz4_decompressed_bytes
+= csize
);
384 #if DEVELOPMENT || DEBUG
385 uint32_t *d32
= dest
;
387 assertf(rval
== PAGE_SIZE
, "LZ4 decode: size != pgsize %d, header: 0x%x, 0x%x, 0x%x",
388 rval
, *d32
, *(d32
+ 1), *(d32
+ 2));
390 assert(ccodec
== CCWK
);
392 WKdmD(source
, dest
, &compressor_dscratch
->wkdecompscratch
[0], csize
);
394 VM_DECOMPRESSOR_STAT(compressor_stats
.wk_decompressions
+= 1);
395 VM_DECOMPRESSOR_STAT(compressor_stats
.wk_decompressed_bytes
+= csize
);
398 #pragma clang diagnostic pop
401 vm_compressor_get_encode_scratch_size(void)
403 if (vm_compressor_current_codec
!= VM_COMPRESSOR_DEFAULT_CODEC
) {
404 return MAX(sizeof(compressor_encode_scratch_t
), WKdm_SCRATCH_BUF_SIZE_INTERNAL
);
406 return WKdm_SCRATCH_BUF_SIZE_INTERNAL
;
411 vm_compressor_get_decode_scratch_size(void)
413 if (vm_compressor_current_codec
!= VM_COMPRESSOR_DEFAULT_CODEC
) {
414 return MAX(sizeof(compressor_decode_scratch_t
), WKdm_SCRATCH_BUF_SIZE_INTERNAL
);
416 return WKdm_SCRATCH_BUF_SIZE_INTERNAL
;
422 vm_compressor_algorithm(void)
424 return vm_compressor_current_codec
;
428 vm_compressor_algorithm_init(void)
430 vm_compressor_mode_t new_codec
= VM_COMPRESSOR_DEFAULT_CODEC
;
432 #if defined(__arm64__)
433 new_codec
= CMODE_HYB
;
435 if (PAGE_SIZE
== 16384) {
436 vmctune
.lz4_threshold
= 12288;
440 PE_parse_boot_argn("vm_compressor_codec", &new_codec
, sizeof(new_codec
));
441 assertf(((new_codec
== VM_COMPRESSOR_DEFAULT_CODEC
) || (new_codec
== CMODE_WK
) ||
442 (new_codec
== CMODE_LZ4
) || (new_codec
== CMODE_HYB
)),
443 "Invalid VM compression codec: %u", new_codec
);
445 #if defined(__arm__) || defined(__arm64__)
447 if (PE_parse_boot_argn("-vm_compressor_wk", &tmpc
, sizeof(tmpc
))) {
448 new_codec
= VM_COMPRESSOR_DEFAULT_CODEC
;
449 } else if (PE_parse_boot_argn("-vm_compressor_hybrid", &tmpc
, sizeof(tmpc
))) {
450 new_codec
= CMODE_HYB
;
453 vm_compressor_current_codec
= new_codec
;
454 #endif /* arm/arm64 */