]> git.saurik.com Git - apple/xnu.git/blame - osfmk/x86_64/WKdmDecompress_new.s
xnu-2782.40.9.tar.gz
[apple/xnu.git] / osfmk / x86_64 / WKdmDecompress_new.s
CommitLineData
39236c6e
A
1/*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/*
30 This file contains x86_64 hand optimized implementation of WKdm memory page decompressor.
31
32 void WKdm_decompress (WK_word* src_buf, WK_word* dest_buf, WK_word* scratch, __unused__ unsigned int words);
33
34 input :
35 src_buf : address of input compressed data buffer
36 dest_buf : address of output decompressed buffer
37 scratch : a 16-byte aligned 4k bytes scratch memory provided by the caller
38 words : this argument is not used in the implementation
39
40 output :
41
42 the input buffer is decompressed and the dest_buf is written with decompressed data.
43
44 Am algorithm description of the WKdm compress and bit stream format can be found in the WKdm Compress x86_64 assembly code WKdmCompress.s
45
46 The bit stream (*src_buf) consists of
47 a. 12 bytes header
48 b. 256 bytes for 1024 packed tags
49 c. (varying number of) words for new words not matched to dictionary word.
50 d. (varying number of) 32-bit words for packed 4-bit dict_indices (for class 1 and 3)
51 e. (varying number of) 32-bit words for packed 10-bit low bits (for class 1)
52
53 where the header (of 3 words) specifies the ending boundaries (in 32-bit words) from the start of the bit stream of c,d,e, respectively.
54
55 The decompressor 1st unpacking the bit stream component b/d/e into temorary buffers. Then it sequentially decodes the decompressed word as follows
56
57 for (i=0;i<1024;i++) {
58 tag = *next_tag++
59 switch (tag) {
60 case 0 : *dest_buf++ = 0; break;
61 case 1 : dict_word = dictionary[*dict_index]; dictionary[*dict_index++] = *dest_buf++ = dict_word&0xfffffc00 | *LowBits++; break;
62 case 2 : x = *new_word++; k = (x>>10)&255; k = hashTable[k]; dictionary[k] = *dest_buf++ = x; break;
63 case 3 : *dest_buf++ = dictionary[*dict_index++]; break;
64 }
65
66 cclee, 11/30/12
67*/
68
69 .text
70
71 .globl _WKdm_decompress_new
72_WKdm_decompress_new:
73
74 // save registers, and allocate stack memory for local variables
75
76 pushq %rbp
77 movq %rsp, %rbp
78 pushq %r12
79 pushq %r13
80 pushq %rbx
81
82 subq $(64+8+16), %rsp
83
84 movq %rsi, %r12 // dest_buf
85 movq %rdx, %r13 // scracht_buf
86
87 // PRELOAD_DICTONARY; dictionary starting address : starting address 0(%rsp)
88#if 1
89 movl $1, 0(%rsp)
90 movl $1, 4(%rsp)
91 movl $1, 8(%rsp)
92 movl $1, 12(%rsp)
93 movl $1, 16(%rsp)
94 movl $1, 20(%rsp)
95 movl $1, 24(%rsp)
96 movl $1, 28(%rsp)
97 movl $1, 32(%rsp)
98 movl $1, 36(%rsp)
99 movl $1, 40(%rsp)
100 movl $1, 44(%rsp)
101 movl $1, 48(%rsp)
102 movl $1, 52(%rsp)
103 movl $1, 56(%rsp)
104 movl $1, 60(%rsp)
105#else
106 mov $0x100000001, %rax
107 mov %rax, (%rsp)
108 mov %rax, 8(%rsp)
109 mov %rax, 16(%rsp)
110 mov %rax, 24(%rsp)
111 mov %rax, 32(%rsp)
112 mov %rax, 40(%rsp)
113 mov %rax, 48(%rsp)
114 mov %rax, 56(%rsp)
115#endif
116
117 // WK_unpack_2bits(TAGS_AREA_START(src_buf), TAGS_AREA_END(src_buf), tempTagsArray);
118
119 leaq 268(%rdi), %r10 // TAGS_AREA_END
120 leaq 12(%rdi), %rax // TAGS_AREA_START
121 movq %r13, %rsi // tempTagsArray
122 cmpq %rax, %r10 // TAGS_AREA_END vs TAGS_AREA_START
123 jbe 1f // if TAGS_AREA_END <= TAGS_AREA_START, skip L_WK_unpack_2bits
124 movq %r13, %rcx // next_word
125 xorl %r8d, %r8d // i = 0
126 mov $(50529027<<32)+50529027, %r9
127L_WK_unpack_2bits:
128 movl 12(%rdi,%r8, 4), %eax
129 movl 12(%rdi,%r8, 4), %edx
130 shrl $2, %eax
131 shlq $32, %rax
132 orq %rdx, %rax
133 movq %rax, %rdx
134 shrq $4, %rax
135 andq %r9, %rdx
136 andq %r9, %rax
137 incq %r8 // i++
138 movq %rdx, (%rcx)
139 movq %rax, 8(%rcx)
140 addq $16, %rcx // next_tags += 16
141 cmpq $64, %r8 // i vs 64
142 jne L_WK_unpack_2bits // repeat loop until i==64
1431:
144
145
146 // WK_unpack_4bits(QPOS_AREA_START(src_buf), QPOS_AREA_END(src_buf), tempQPosArray);
147
148 mov 4(%rdi), %eax // WKdm header qpos end
149 leaq (%rdi,%rax,4), %r9 // QPOS_AREA_END
150 mov 0(%rdi), %eax // WKdm header qpos start
151 leaq (%rdi,%rax,4), %r8 // QPOS_AREA_START
152 leaq 1024(%r13), %rbx // tempQPosArray
153 cmpq %r8, %r9 // QPOS_AREA_END vs QPOS_AREA_START
154 jbe 1f // if QPOS_AREA_END <= QPOS_AREA_START, skip L_WK_unpack_4bits
155 leaq 8(%rbx), %rcx // next_qpos
156
157 mov $(252645135<<32)+252645135, %r11
158L_WK_unpack_4bits:
159 movl (%r8), %eax // w = *next_word
160 movl %eax, %edx // w
161 shlq $28, %rax
162 orq %rdx, %rax
163 addq $4, %r8 // next_word++
164 andq %r11, %rax
165 movq %rax, -8(%rcx)
166 addq $8, %rcx // next_qpos+=8
167 cmpq %r8, %r9 // QPOS_AREA_END vs QPOS_AREA_START
168 ja L_WK_unpack_4bits // repeat loop until QPOS_AREA_END <= QPOS_AREA_START
169
170
1711:
172
173 // WK_unpack_3_tenbits(LOW_BITS_AREA_START(src_buf), LOW_BITS_AREA_END(src_buf), tempLowBitsArray);
174
175 movl 8(%rdi), %eax // LOW_BITS_AREA_END offset
176 leaq (%rdi,%rax,4), %rdi // LOW_BITS_AREA_END
177 leaq 2048(%r13), %r11 // tempLowBitsArray
178 leaq 4094(%r13), %r13 // final tenbits addr
179 sub %r9, %rdi // LOW_BITS_AREA_START vs LOW_BITS_AREA_END
180 jle 1f // if START>=END, skip L_WK_unpack_3_tenbits
181 movq %r11, %rcx // next_low_bits
182L_WK_unpack_3_tenbits:
183 movl (%r9), %eax // w = *next_word, 0:c:b:a
184 movl $(1023<<10), %edx
185 movl $(1023<<20), %r8d
186 andl %eax, %edx // b << 10
187 andl %eax, %r8d // c << 20
188 andq $1023, %rax
189 shll $6, %edx
190 shlq $12, %r8
191 orl %edx, %eax
192 orq %r8, %rax
193 cmp %r13, %rcx
194 je 2f
195 mov %rax, (%rcx)
196 jmp 3f
1972: mov %ax, (%rcx)
1983:
199 addq $4, %r9 // next_word++
200 addq $6, %rcx // next_low_bits += 3
201 sub $4, %rdi
202 jg L_WK_unpack_3_tenbits // repeat loop if LOW_BITS_AREA_END > next_word
2031:
204
205
206 #define next_qpos %rbx
207 #define hash %r8
208 #define tags_counter %edi
209 #define dest_buf %r12
210 #define next_full_patt %r10
211
212 leaq _hashLookupTable_new(%rip), hash // hash look up table
213 movl $1024, tags_counter // tags_counter
214 jmp L_next
215
216 .align 4,0x90
217L_nonpartital:
218 jl L_ZERO_TAG
219 cmpb $2, -1(%rsi)
220 je L_MISS_TAG
221
222L_EXACT_TAG:
223 movzbl (next_qpos), %eax // qpos = *next_qpos
224 incq next_qpos // next_qpos++
225 decl tags_counter // tags_counter--
226 movl (%rsp,%rax,4), %eax // w = dictionary[qpos]
227 movl %eax, -4(dest_buf) // *dest_buf = w
228 je L_done
229
230L_next:
231 incq %rsi // next_tag++
232 addq $4, dest_buf
233 cmpb $1, -1(%rsi)
234 jne L_nonpartital
235
236L_PARTIAL_TAG:
237 movzbl (next_qpos),%edx // qpos = *next_qpos
238 incq next_qpos // next_qpos++
239 movl (%rsp,%rdx,4), %eax // read dictionary word
240 andl $-1024, %eax // clear lower 10 bits
241 or (%r11), %ax // pad the lower 10-bits from *next_low_bits
242 addq $2, %r11 // next_low_bits++
243 decl tags_counter // tags_counter--
244 movl %eax, (%rsp,%rdx,4) // *dict_location = newly formed word
245 movl %eax, -4(dest_buf) // *dest_buf = newly formed word
246 jg L_next // repeat loop until next_tag==tag_area_end
247
248L_done:
249
250 // release stack memory, restore registers, and return
251
252 addq $(64+8+16), %rsp
253 popq %rbx
254 popq %r13
255 popq %r12
256 leave
257 ret
258
259 .align 4,0x90
260L_MISS_TAG:
261 movl (next_full_patt), %edx // w = *next_full_patt
262 movl (next_full_patt), %eax // w = *next_full_patt
263 shrl $10, %edx // w>>10
264 addq $4, next_full_patt // next_full_patt++
265 movzbl %dl, %edx // 8-bit hash table index
266 movl %eax, -4(dest_buf) // *dest_buf = word
267 movzbl (hash,%rdx),%edx // qpos
268 decl tags_counter // tags_counter--
269 movl %eax, (%rsp,%rdx) // dictionary[qpos] = word
270 jg L_next // repeat the loop
271 jmp L_done
272
273 .align 4,0x90
274L_ZERO_TAG:
275 decl tags_counter // tags_counter--
276 movl $0, -4(dest_buf) // *dest_buf = 0
277 jg L_next // repeat the loop
278 jmp L_done
279
280