2 * Copyright (c) 2010-2016 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* This module implements a hybrid/adaptive compression scheme, using WKdm where
29 * profitable and, currently, an LZ4 variant elsewhere.
30 * (Created 2016, Derek Kumar)
34 #include <vm/vm_compressor_algorithms.h>
35 #include <vm/vm_compressor.h>
37 #define MZV_MAGIC (17185)
38 #if defined(__arm64__)
39 #include <arm/proc_reg.h>
42 #define LZ4_SCRATCH_ALIGN (64)
43 #define WKC_SCRATCH_ALIGN (64)
45 #define LZ4_SCRATCH_ALIGN (64)
46 #define WKC_SCRATCH_ALIGN (64)
48 #define memcpy_T_NT memcpy
49 #define memcpy_NT_T memcpy
52 uint8_t lz4state
[lz4_encode_scratch_size
]__attribute((aligned(LZ4_SCRATCH_ALIGN
)));
53 uint8_t wkscratch
[0] __attribute((aligned(WKC_SCRATCH_ALIGN
))); // TODO
54 } compressor_encode_scratch_t
;
57 uint8_t lz4decodestate
[lz4_encode_scratch_size
]__attribute((aligned(64)));
58 uint8_t wkdecompscratch
[0] __attribute((aligned(64)));
59 } compressor_decode_scratch_t
;
62 uint16_t lz4_selection_run
;
63 uint16_t lz4_run_length
;
64 uint16_t lz4_preselects
;
65 uint32_t lz4_total_preselects
;
66 uint16_t lz4_failure_skips
;
67 uint32_t lz4_total_failure_skips
;
68 uint16_t lz4_failure_run_length
;
69 uint16_t lz4_total_unprofitables
;
70 uint32_t lz4_total_negatives
;
71 uint32_t lz4_total_failures
;
74 compressor_tuneables_t vmctune
= {
75 .lz4_threshold
= 2048,
76 .wkdm_reeval_threshold
= 1536,
77 .lz4_max_failure_skips
= 0,
78 .lz4_max_failure_run_length
= ~0U,
79 .lz4_max_preselects
= 0,
80 .lz4_run_preselection_threshold
= ~0U,
81 .lz4_run_continue_bytes
= 0,
82 .lz4_profitable_bytes
= 0,
85 compressor_state_t vmcstate
= {
86 .lz4_selection_run
= 0,
89 .lz4_total_preselects
= 0,
90 .lz4_failure_skips
= 0,
91 .lz4_total_failure_skips
= 0,
92 .lz4_failure_run_length
= 0,
93 .lz4_total_unprofitables
= 0,
94 .lz4_total_negatives
= 0,
97 compressor_stats_t compressor_stats
;
99 enum compressor_preselect_t
{
105 vm_compressor_mode_t vm_compressor_current_codec
= VM_COMPRESSOR_DEFAULT_CODEC
;
107 boolean_t vm_compressor_force_sw_wkdm
= FALSE
;
109 boolean_t verbose
= FALSE
;
111 #define VMDBGSTAT (DEBUG)
113 #define VM_COMPRESSOR_STAT_DBG(x...) \
118 #define VM_COMPRESSOR_STAT_DBG(x...) \
123 #define VMCSTATS (DEVELOPMENT || DEBUG)
125 #define VM_COMPRESSOR_STAT(x...) \
129 //TODO make atomic where needed, decompression paths
130 #define VM_DECOMPRESSOR_STAT(x...) \
135 #define VM_COMPRESSOR_STAT(x...) \
138 #define VM_DECOMPRESSOR_STAT(x...) \
143 static inline enum compressor_preselect_t
compressor_preselect(void) {
144 if (vmcstate
.lz4_failure_skips
>= vmctune
.lz4_max_failure_skips
) {
145 vmcstate
.lz4_failure_skips
= 0;
146 vmcstate
.lz4_failure_run_length
= 0;
149 if (vmcstate
.lz4_failure_run_length
>= vmctune
.lz4_max_failure_run_length
) {
150 vmcstate
.lz4_failure_skips
++;
151 vmcstate
.lz4_total_failure_skips
++;
155 if (vmcstate
.lz4_preselects
>= vmctune
.lz4_max_preselects
) {
156 vmcstate
.lz4_preselects
= 0;
160 if (vmcstate
.lz4_run_length
>= vmctune
.lz4_run_preselection_threshold
) {
161 vmcstate
.lz4_preselects
++;
162 vmcstate
.lz4_total_preselects
++;
168 static inline void compressor_selector_update(int lz4sz
, int didwk
, int wksz
) {
169 VM_COMPRESSOR_STAT(compressor_stats
.lz4_compressions
++);
172 VM_COMPRESSOR_STAT(compressor_stats
.lz4_compressed_bytes
+=PAGE_SIZE
);
173 VM_COMPRESSOR_STAT(compressor_stats
.lz4_compression_failures
++);
174 vmcstate
.lz4_failure_run_length
++;
175 VM_COMPRESSOR_STAT(vmcstate
.lz4_total_failures
++);
176 vmcstate
.lz4_run_length
= 0;
178 vmcstate
.lz4_failure_run_length
= 0;
180 VM_COMPRESSOR_STAT(compressor_stats
.lz4_compressed_bytes
+=lz4sz
);
182 if (lz4sz
<= vmctune
.wkdm_reeval_threshold
) {
183 vmcstate
.lz4_run_length
= 0;
186 vmcstate
.lz4_run_length
++;
191 if (__probable(wksz
> lz4sz
)) {
192 uint32_t lz4delta
= wksz
- lz4sz
;
193 VM_COMPRESSOR_STAT(compressor_stats
.lz4_wk_compression_delta
+=lz4delta
);
194 if (lz4delta
>= vmctune
.lz4_run_continue_bytes
) {
195 vmcstate
.lz4_run_length
++;
196 } else if (lz4delta
<= vmctune
.lz4_profitable_bytes
) {
197 vmcstate
.lz4_failure_run_length
++;
198 VM_COMPRESSOR_STAT(vmcstate
.lz4_total_unprofitables
++);
199 vmcstate
.lz4_run_length
= 0;
201 vmcstate
.lz4_run_length
= 0;
204 VM_COMPRESSOR_STAT(compressor_stats
.lz4_wk_compression_negative_delta
+=(lz4sz
-wksz
));
205 vmcstate
.lz4_failure_run_length
++;
206 VM_COMPRESSOR_STAT(vmcstate
.lz4_total_negatives
++);
207 vmcstate
.lz4_run_length
= 0;
214 static inline void WKdm_hv(uint32_t *wkbuf
) {
215 #if DEVELOPMENT || DEBUG
216 uint32_t *inw
= (uint32_t *) wkbuf
;
217 if (*inw
!= MZV_MAGIC
) {
218 if ((*inw
| *(inw
+ 1) | *(inw
+ 2)) & 0xFFFF0000) {
219 panic("WKdm(%p): invalid header 0x%x 0x%x 0x%x\n", wkbuf
, *inw
, *(inw
+1), *(inw
+2));
222 #else /* DEVELOPMENT || DEBUG */
227 //todo fix clang diagnostic
228 #pragma clang diagnostic push
229 #pragma clang diagnostic ignored "-Wincompatible-pointer-types"
231 #if defined(__arm64__)
234 static inline void WKdmD(WK_word
* src_buf
, WK_word
* dest_buf
, WK_word
* scratch
, unsigned int bytes
) {
235 #if defined(__arm64__)
238 #if defined(__arm64__)
239 if (PAGE_SIZE
== 4096) {
240 WKdm_decompress_4k(src_buf
, dest_buf
, scratch
, bytes
);
242 __unused
uint64_t wdsstart
;
244 VM_COMPRESSOR_STAT_DBG(wdsstart
= mach_absolute_time());
245 WKdm_decompress_16k(src_buf
, dest_buf
, scratch
, bytes
);
247 VM_COMPRESSOR_STAT_DBG(compressor_stats
.wks_dabstime
+= mach_absolute_time() - wdsstart
);
248 VM_COMPRESSOR_STAT(compressor_stats
.wks_decompressions
++);
250 #else /* !defined arm64 */
251 WKdm_decompress_new(src_buf
, dest_buf
, scratch
, bytes
);
254 #if DEVELOPMENT || DEBUG
255 int precompy
, wkswhw
;
258 static inline int WKdmC(WK_word
* src_buf
, WK_word
* dest_buf
, WK_word
* scratch
, boolean_t
*incomp_copy
, unsigned int limit
) {
261 #if defined(__arm64__)
262 if (PAGE_SIZE
== 4096) {
263 wkcval
= WKdm_compress_4k(src_buf
, dest_buf
, scratch
, limit
);
265 __unused
uint64_t wcswstart
;
267 VM_COMPRESSOR_STAT_DBG(wcswstart
= mach_absolute_time());
269 int wkswsz
= WKdm_compress_16k(src_buf
, dest_buf
, scratch
, limit
);
271 VM_COMPRESSOR_STAT_DBG(compressor_stats
.wks_cabstime
+= mach_absolute_time() - wcswstart
);
272 VM_COMPRESSOR_STAT(compressor_stats
.wks_compressions
++);
276 wkcval
= WKdm_compress_new(src_buf
, dest_buf
, scratch
, limit
);
282 int metacompressor(const uint8_t *in
, uint8_t *cdst
, int32_t outbufsz
, uint16_t *codec
, void *cscratchin
, boolean_t
*incomp_copy
) {
284 int dowk
= FALSE
, dolz4
= FALSE
, skiplz4
= FALSE
;
285 int insize
= PAGE_SIZE
;
286 compressor_encode_scratch_t
*cscratch
= cscratchin
;
288 if (vm_compressor_current_codec
== CMODE_WK
) {
290 } else if (vm_compressor_current_codec
== CMODE_LZ4
) {
292 } else if (vm_compressor_current_codec
== CMODE_HYB
) {
293 enum compressor_preselect_t presel
= compressor_preselect();
294 if (presel
== CPRESELLZ4
) {
297 } else if (presel
== CSKIPLZ4
) {
301 assert(presel
== CPRESELWK
);
308 VM_COMPRESSOR_STAT(compressor_stats
.wk_compressions
++);
309 sz
= WKdmC(in
, cdst
, &cscratch
->wkscratch
[0], incomp_copy
, outbufsz
);
312 VM_COMPRESSOR_STAT(compressor_stats
.wk_compressed_bytes_total
+=PAGE_SIZE
);
313 VM_COMPRESSOR_STAT(compressor_stats
.wk_compression_failures
++);
315 if (vm_compressor_current_codec
== CMODE_HYB
) {
319 } else if (sz
== 0) {
320 VM_COMPRESSOR_STAT(compressor_stats
.wk_sv_compressions
++);
321 VM_COMPRESSOR_STAT(compressor_stats
.wk_compressed_bytes_total
+=4);
323 VM_COMPRESSOR_STAT(compressor_stats
.wk_compressed_bytes_total
+=sz
);
327 if (vm_compressor_current_codec
== CMODE_HYB
) {
328 if (((sz
== -1) || (sz
>= vmctune
.lz4_threshold
)) && (skiplz4
== FALSE
)) {
331 #if DEVELOPMENT || DEBUG
332 int wkc
= (sz
== -1) ? PAGE_SIZE
: sz
;
334 VM_COMPRESSOR_STAT(compressor_stats
.wk_compressions_exclusive
++);
335 VM_COMPRESSOR_STAT(compressor_stats
.wk_compressed_bytes_exclusive
+=wkc
);
349 sz
= (int) lz4raw_encode_buffer(cdst
, outbufsz
, in
, insize
, &cscratch
->lz4state
[0]);
351 compressor_selector_update(sz
, dowk
, wksz
);
361 void metadecompressor(const uint8_t *source
, uint8_t *dest
, uint32_t csize
, uint16_t ccodec
, void *compressor_dscratchin
) {
362 int dolz4
= (ccodec
== CCLZ4
);
364 compressor_decode_scratch_t
*compressor_dscratch
= compressor_dscratchin
;
367 rval
= (int)lz4raw_decode_buffer(dest
, PAGE_SIZE
, source
, csize
, &compressor_dscratch
->lz4decodestate
[0]);
368 VM_DECOMPRESSOR_STAT(compressor_stats
.lz4_decompressions
+=1);
369 VM_DECOMPRESSOR_STAT(compressor_stats
.lz4_decompressed_bytes
+=csize
);
370 #if DEVELOPMENT || DEBUG
371 uint32_t *d32
= dest
;
373 assertf(rval
== PAGE_SIZE
, "LZ4 decode: size != pgsize %d, header: 0x%x, 0x%x, 0x%x",
374 rval
, *d32
, *(d32
+1), *(d32
+2));
376 assert(ccodec
== CCWK
);
378 WKdmD(source
, dest
, &compressor_dscratch
->wkdecompscratch
[0], csize
);
380 VM_DECOMPRESSOR_STAT(compressor_stats
.wk_decompressions
+=1);
381 VM_DECOMPRESSOR_STAT(compressor_stats
.wk_decompressed_bytes
+=csize
);
384 #pragma clang diagnostic pop
386 uint32_t vm_compressor_get_encode_scratch_size(void) {
387 if (vm_compressor_current_codec
!= VM_COMPRESSOR_DEFAULT_CODEC
) {
388 return MAX(sizeof(compressor_encode_scratch_t
), WKdm_SCRATCH_BUF_SIZE_INTERNAL
);
390 return WKdm_SCRATCH_BUF_SIZE_INTERNAL
;
394 uint32_t vm_compressor_get_decode_scratch_size(void) {
395 if (vm_compressor_current_codec
!= VM_COMPRESSOR_DEFAULT_CODEC
) {
396 return MAX(sizeof(compressor_decode_scratch_t
), WKdm_SCRATCH_BUF_SIZE_INTERNAL
);
398 return WKdm_SCRATCH_BUF_SIZE_INTERNAL
;
403 int vm_compressor_algorithm(void) {
404 return vm_compressor_current_codec
;
407 void vm_compressor_algorithm_init(void) {
408 vm_compressor_mode_t new_codec
= VM_COMPRESSOR_DEFAULT_CODEC
;
410 #if defined(__arm64__)
411 new_codec
= CMODE_HYB
;
413 if (PAGE_SIZE
== 16384) {
414 vmctune
.lz4_threshold
= 12288;
418 PE_parse_boot_argn("vm_compressor_codec", &new_codec
, sizeof(new_codec
));
419 assertf(((new_codec
== VM_COMPRESSOR_DEFAULT_CODEC
) || (new_codec
== CMODE_WK
) ||
420 (new_codec
== CMODE_LZ4
) || (new_codec
== CMODE_HYB
)),
421 "Invalid VM compression codec: %u", new_codec
);
423 #if defined(__arm__)||defined(__arm64__)
425 if (PE_parse_boot_argn("-vm_compressor_wk", &tmpc
, sizeof(tmpc
))) {
426 new_codec
= VM_COMPRESSOR_DEFAULT_CODEC
;
427 } else if (PE_parse_boot_argn("-vm_compressor_hybrid", &tmpc
, sizeof(tmpc
))) {
428 new_codec
= CMODE_HYB
;
431 vm_compressor_current_codec
= new_codec
;
432 #endif /* arm/arm64 */