]>
Commit | Line | Data |
---|---|---|
3a60a9f5 A |
1 | #include "WKdm.h" |
2 | ||
3 | /*********************************************************************** | |
4 | * THE PACKING ROUTINES | |
5 | */ | |
6 | ||
7 | /* WK_pack_2bits() | |
8 | * Pack some multiple of four words holding two-bit tags (in the low | |
9 | * two bits of each byte) into an integral number of words, i.e., | |
0a7de745 | 10 | * one fourth as many. |
3a60a9f5 A |
11 | * NOTE: Pad the input out with zeroes to a multiple of four words! |
12 | */ | |
13 | static WK_word* | |
14 | WK_pack_2bits(WK_word* source_buf, | |
0a7de745 A |
15 | WK_word* source_end, |
16 | WK_word* dest_buf) | |
17 | { | |
18 | WK_word* src_next = source_buf; | |
19 | WK_word* dest_next = dest_buf; | |
20 | ||
21 | while (src_next < source_end) { | |
22 | WK_word temp = src_next[0]; | |
23 | temp |= (src_next[1] << 2); | |
24 | temp |= (src_next[2] << 4); | |
25 | temp |= (src_next[3] << 6); | |
26 | ||
27 | dest_next[0] = temp; | |
28 | dest_next++; | |
29 | src_next += 4; | |
30 | } | |
3a60a9f5 | 31 | |
0a7de745 | 32 | return dest_next; |
3a60a9f5 A |
33 | } |
34 | ||
35 | /* WK_pack_4bits() | |
36 | * Pack an even number of words holding 4-bit patterns in the low bits | |
37 | * of each byte into half as many words. | |
38 | * note: pad out the input with zeroes to an even number of words! | |
39 | */ | |
40 | ||
41 | static WK_word* | |
42 | WK_pack_4bits(WK_word* source_buf, | |
0a7de745 A |
43 | WK_word* source_end, |
44 | WK_word* dest_buf) | |
45 | { | |
46 | WK_word* src_next = source_buf; | |
47 | WK_word* dest_next = dest_buf; | |
48 | ||
49 | /* this loop should probably be unrolled */ | |
50 | while (src_next < source_end) { | |
51 | WK_word temp = src_next[0]; | |
52 | temp |= (src_next[1] << 4); | |
3a60a9f5 | 53 | |
0a7de745 A |
54 | dest_next[0] = temp; |
55 | dest_next++; | |
56 | src_next += 2; | |
57 | } | |
58 | ||
59 | return dest_next; | |
3a60a9f5 A |
60 | } |
61 | ||
62 | /* pack_3_tenbits() | |
63 | * Pack a sequence of three ten bit items into one word. | |
64 | * note: pad out the input with zeroes to an even number of words! | |
65 | */ | |
66 | static WK_word* | |
67 | WK_pack_3_tenbits(WK_word* source_buf, | |
0a7de745 A |
68 | WK_word* source_end, |
69 | WK_word* dest_buf) | |
70 | { | |
71 | WK_word* src_next = source_buf; | |
72 | WK_word* dest_next = dest_buf; | |
73 | ||
74 | /* this loop should probably be unrolled */ | |
75 | while (src_next < source_end) { | |
76 | WK_word temp = src_next[0]; | |
77 | temp |= (src_next[1] << 10); | |
78 | temp |= (src_next[2] << 20); | |
3a60a9f5 | 79 | |
0a7de745 A |
80 | dest_next[0] = temp; |
81 | dest_next++; | |
82 | src_next += 3; | |
83 | } | |
84 | ||
85 | return dest_next; | |
3a60a9f5 A |
86 | } |
87 | ||
88 | /*************************************************************************** | |
89 | * WKdm_compress()---THE COMPRESSOR | |
90 | */ | |
91 | ||
92 | unsigned int | |
0a7de745 A |
93 | WKdm_compress(WK_word* src_buf, |
94 | WK_word* dest_buf, | |
95 | unsigned int num_input_words) | |
3a60a9f5 | 96 | { |
0a7de745 | 97 | DictionaryElement dictionary[DICTIONARY_SIZE]; |
3a60a9f5 | 98 | |
0a7de745 A |
99 | /* arrays that hold output data in intermediate form during modeling */ |
100 | /* and whose contents are packed into the actual output after modeling */ | |
3a60a9f5 | 101 | |
0a7de745 A |
102 | /* sizes of these arrays should be increased if you want to compress |
103 | * pages larger than 4KB | |
104 | */ | |
105 | WK_word tempTagsArray[300]; /* tags for everything */ | |
106 | WK_word tempQPosArray[300]; /* queue positions for matches */ | |
107 | WK_word tempLowBitsArray[1200]; /* low bits for partial matches */ | |
3a60a9f5 | 108 | |
0a7de745 A |
109 | /* boundary_tmp will be used for keeping track of what's where in |
110 | * the compressed page during packing | |
111 | */ | |
112 | WK_word* boundary_tmp; | |
3a60a9f5 | 113 | |
0a7de745 A |
114 | /* Fill pointers for filling intermediate arrays (of queue positions |
115 | * and low bits) during encoding. | |
116 | * Full words go straight to the destination buffer area reserved | |
117 | * for them. (Right after where the tags go.) | |
118 | */ | |
119 | WK_word* next_full_patt; | |
120 | char* next_tag = (char *) tempTagsArray; | |
121 | char* next_qp = (char *) tempQPosArray; | |
122 | WK_word* next_low_bits = tempLowBitsArray; | |
3a60a9f5 | 123 | |
0a7de745 A |
124 | WK_word* next_input_word = src_buf; |
125 | WK_word* end_of_input = src_buf + num_input_words; | |
3a60a9f5 | 126 | |
0a7de745 | 127 | PRELOAD_DICTIONARY; |
3a60a9f5 | 128 | |
0a7de745 | 129 | next_full_patt = dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16); |
3a60a9f5 A |
130 | |
131 | #ifdef WK_DEBUG | |
0a7de745 A |
132 | printf("\nIn WKdm_compress\n"); |
133 | printf("About to actually compress, src_buf is %u\n", src_buf); | |
134 | printf("dictionary is at %u\n", dictionary); | |
135 | printf("dest_buf is %u next_full_patt is %u\n", dest_buf, next_full_patt); | |
136 | fflush(stdout); | |
3a60a9f5 A |
137 | #endif |
138 | ||
0a7de745 A |
139 | while (next_input_word < end_of_input) { |
140 | WK_word *dict_location; | |
141 | WK_word dict_word; | |
142 | WK_word input_word = *next_input_word; | |
143 | ||
144 | /* compute hash value, which is a byte offset into the dictionary, | |
145 | * and add it to the base address of the dictionary. Cast back and | |
146 | * forth to/from char * so no shifts are needed | |
147 | */ | |
148 | dict_location = | |
149 | (WK_word *) | |
150 | ((void*) (((char*) dictionary) + HASH_TO_DICT_BYTE_OFFSET(input_word))); | |
151 | ||
152 | dict_word = *dict_location; | |
153 | ||
154 | if (input_word == dict_word) { | |
155 | RECORD_EXACT(dict_location - dictionary); | |
156 | } else if (input_word == 0) { | |
157 | RECORD_ZERO; | |
158 | } else { | |
159 | WK_word input_high_bits = HIGH_BITS(input_word); | |
160 | if (input_high_bits == HIGH_BITS(dict_word)) { | |
161 | RECORD_PARTIAL(dict_location - dictionary, LOW_BITS(input_word)); | |
162 | *dict_location = input_word; | |
163 | } else { | |
164 | RECORD_MISS(input_word); | |
165 | *dict_location = input_word; | |
166 | } | |
167 | } | |
168 | next_input_word++; | |
169 | } | |
3a60a9f5 A |
170 | |
171 | #ifdef WK_DEBUG | |
0a7de745 A |
172 | printf("AFTER MODELING in WKdm_compress()\n"); fflush(stdout); |
173 | printf("tempTagsArray holds %u bytes\n", | |
174 | next_tag - (char *) tempTagsArray); | |
175 | printf("tempQPosArray holds %u bytes\n", | |
176 | next_qp - (char *) tempQPosArray); | |
177 | printf("tempLowBitsArray holds %u bytes\n", | |
178 | (char *) next_low_bits - (char *) tempLowBitsArray); | |
179 | ||
180 | printf("next_full_patt is %p\n", | |
181 | next_full_patt); | |
182 | ||
183 | printf(" i.e., there are %u full patterns\n", | |
184 | next_full_patt - (dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16))); | |
185 | fflush(stdout); | |
186 | ||
187 | { int i; | |
188 | WK_word *arr = (dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16)); | |
189 | ||
190 | printf(" first 20 full patterns are: \n"); | |
191 | for (i = 0; i < 20; i++) { | |
192 | printf(" %d", arr[i]); | |
193 | } | |
194 | printf("\n");} | |
3a60a9f5 A |
195 | #endif |
196 | ||
0a7de745 A |
197 | /* Record (into the header) where we stopped writing full words, |
198 | * which is where we will pack the queue positions. (Recall | |
199 | * that we wrote the full words directly into the dest buffer | |
200 | * during modeling. | |
201 | */ | |
3a60a9f5 | 202 | |
0a7de745 | 203 | SET_QPOS_AREA_START(dest_buf, next_full_patt); |
3a60a9f5 | 204 | |
0a7de745 A |
205 | /* Pack the tags into the tags area, between the page header |
206 | * and the full words area. We don't pad for the packer | |
207 | * because we assume that the page size is a multiple of 16. | |
208 | */ | |
3a60a9f5 A |
209 | |
210 | #ifdef WK_DEBUG | |
0a7de745 A |
211 | printf("about to pack %u bytes holding tags\n", |
212 | next_tag - (char *) tempTagsArray); | |
3a60a9f5 | 213 | |
0a7de745 A |
214 | { int i; |
215 | char* arr = (char *) tempTagsArray; | |
3a60a9f5 | 216 | |
0a7de745 A |
217 | printf(" first 200 tags are: \n"); |
218 | for (i = 0; i < 200; i++) { | |
219 | printf(" %d", arr[i]); | |
220 | } | |
221 | printf("\n");} | |
3a60a9f5 | 222 | #endif |
3a60a9f5 | 223 | |
0a7de745 A |
224 | boundary_tmp = WK_pack_2bits(tempTagsArray, |
225 | (WK_word *) ((void *) next_tag), | |
226 | dest_buf + HEADER_SIZE_IN_WORDS); | |
3a60a9f5 | 227 | |
0a7de745 A |
228 | #ifdef WK_DEBUG |
229 | printf("packing tags stopped at %u\n", boundary_tmp); | |
230 | #endif | |
3a60a9f5 | 231 | |
0a7de745 A |
232 | /* Pack the queue positions into the area just after |
233 | * the full words. We have to round up the source | |
234 | * region to a multiple of two words. | |
235 | */ | |
236 | ||
237 | { | |
238 | unsigned int num_bytes_to_pack = (unsigned int)(next_qp - (char *) tempQPosArray); | |
239 | unsigned int num_packed_words = (num_bytes_to_pack + 7) >> 3; // ceil((double) num_bytes_to_pack / 8); | |
240 | unsigned int num_source_words = num_packed_words * 2; | |
241 | WK_word* endQPosArray = tempQPosArray + num_source_words; | |
242 | ||
243 | /* Pad out the array with zeros to avoid corrupting real packed | |
244 | * values. */ | |
245 | for (; /* next_qp is already set as desired */ | |
246 | next_qp < (char*)endQPosArray; | |
247 | next_qp++) { | |
248 | *next_qp = 0; | |
249 | } | |
3a60a9f5 | 250 | |
0a7de745 A |
251 | #ifdef WK_DEBUG |
252 | printf("about to pack %u (bytes holding) queue posns.\n", | |
253 | num_bytes_to_pack); | |
254 | printf("packing them from %u words into %u words\n", | |
255 | num_source_words, num_packed_words); | |
256 | printf("dest is range %u to %u\n", | |
257 | next_full_patt, next_full_patt + num_packed_words); | |
258 | { int i; | |
259 | char *arr = (char *) tempQPosArray; | |
260 | printf(" first 200 queue positions are: \n"); | |
261 | for (i = 0; i < 200; i++) { | |
262 | printf(" %d", arr[i]); | |
263 | } | |
264 | printf("\n");} | |
265 | #endif | |
3a60a9f5 | 266 | |
0a7de745 A |
267 | boundary_tmp = WK_pack_4bits(tempQPosArray, |
268 | endQPosArray, | |
269 | next_full_patt); | |
270 | #ifdef WK_DEBUG | |
271 | printf("Packing of queue positions stopped at %u\n", boundary_tmp); | |
272 | #endif // WK_DEBUG | |
3a60a9f5 | 273 | |
0a7de745 A |
274 | /* Record (into the header) where we stopped packing queue positions, |
275 | * which is where we will start packing low bits. | |
276 | */ | |
277 | SET_LOW_BITS_AREA_START(dest_buf, boundary_tmp); | |
278 | } | |
279 | ||
280 | /* Pack the low bit patterns into the area just after | |
281 | * the queue positions. We have to round up the source | |
282 | * region to a multiple of three words. | |
283 | */ | |
284 | ||
285 | { | |
286 | unsigned int num_tenbits_to_pack = | |
287 | (unsigned int)(next_low_bits - tempLowBitsArray); | |
288 | unsigned int num_packed_words = (num_tenbits_to_pack + 2) / 3; //ceil((double) num_tenbits_to_pack / 3); | |
289 | unsigned int num_source_words = num_packed_words * 3; | |
290 | WK_word* endLowBitsArray = tempLowBitsArray + num_source_words; | |
291 | ||
292 | /* Pad out the array with zeros to avoid corrupting real packed | |
293 | * values. */ | |
294 | ||
295 | for (; /* next_low_bits is already set as desired */ | |
296 | next_low_bits < endLowBitsArray; | |
297 | next_low_bits++) { | |
298 | *next_low_bits = 0; | |
299 | } | |
3a60a9f5 A |
300 | |
301 | #ifdef WK_DEBUG | |
0a7de745 A |
302 | printf("about to pack low bits\n"); |
303 | printf("num_tenbits_to_pack is %u\n", num_tenbits_to_pack); | |
304 | printf("endLowBitsArray is %u\n", endLowBitsArray); | |
3a60a9f5 | 305 | #endif |
3a60a9f5 | 306 | |
0a7de745 A |
307 | boundary_tmp = WK_pack_3_tenbits(tempLowBitsArray, |
308 | endLowBitsArray, | |
309 | boundary_tmp); | |
3a60a9f5 | 310 | |
0a7de745 A |
311 | SET_LOW_BITS_AREA_END(dest_buf, boundary_tmp); |
312 | } | |
3a60a9f5 | 313 | |
0a7de745 A |
314 | return (unsigned int)((char *) boundary_tmp - (char *) dest_buf); |
315 | } |