]>
Commit | Line | Data |
---|---|---|
3a60a9f5 A |
1 | #include "WKdm.h" |
2 | ||
3 | /*********************************************************************** | |
4 | * THE PACKING ROUTINES | |
5 | */ | |
6 | ||
7 | /* WK_pack_2bits() | |
8 | * Pack some multiple of four words holding two-bit tags (in the low | |
9 | * two bits of each byte) into an integral number of words, i.e., | |
10 | * one fourth as many. | |
11 | * NOTE: Pad the input out with zeroes to a multiple of four words! | |
12 | */ | |
13 | static WK_word* | |
14 | WK_pack_2bits(WK_word* source_buf, | |
15 | WK_word* source_end, | |
16 | WK_word* dest_buf) { | |
17 | ||
18 | register WK_word* src_next = source_buf; | |
19 | WK_word* dest_next = dest_buf; | |
20 | ||
21 | while (src_next < source_end) { | |
22 | register WK_word temp = src_next[0]; | |
23 | temp |= (src_next[1] << 2); | |
24 | temp |= (src_next[2] << 4); | |
25 | temp |= (src_next[3] << 6); | |
26 | ||
27 | dest_next[0] = temp; | |
28 | dest_next++; | |
29 | src_next += 4; | |
30 | } | |
31 | ||
32 | return dest_next; | |
33 | ||
34 | } | |
35 | ||
36 | /* WK_pack_4bits() | |
37 | * Pack an even number of words holding 4-bit patterns in the low bits | |
38 | * of each byte into half as many words. | |
39 | * note: pad out the input with zeroes to an even number of words! | |
40 | */ | |
41 | ||
42 | static WK_word* | |
43 | WK_pack_4bits(WK_word* source_buf, | |
44 | WK_word* source_end, | |
45 | WK_word* dest_buf) { | |
46 | register WK_word* src_next = source_buf; | |
47 | WK_word* dest_next = dest_buf; | |
48 | ||
49 | /* this loop should probably be unrolled */ | |
50 | while (src_next < source_end) { | |
51 | register WK_word temp = src_next[0]; | |
52 | temp |= (src_next[1] << 4); | |
53 | ||
54 | dest_next[0] = temp; | |
55 | dest_next++; | |
56 | src_next += 2; | |
57 | } | |
58 | ||
59 | return dest_next; | |
60 | ||
61 | } | |
62 | ||
63 | /* pack_3_tenbits() | |
64 | * Pack a sequence of three ten bit items into one word. | |
65 | * note: pad out the input with zeroes to an even number of words! | |
66 | */ | |
67 | static WK_word* | |
68 | WK_pack_3_tenbits(WK_word* source_buf, | |
69 | WK_word* source_end, | |
70 | WK_word* dest_buf) { | |
71 | ||
72 | register WK_word* src_next = source_buf; | |
73 | WK_word* dest_next = dest_buf; | |
74 | ||
75 | /* this loop should probably be unrolled */ | |
76 | while (src_next < source_end) { | |
77 | register WK_word temp = src_next[0]; | |
78 | temp |= (src_next[1] << 10); | |
79 | temp |= (src_next[2] << 20); | |
80 | ||
81 | dest_next[0] = temp; | |
82 | dest_next++; | |
83 | src_next += 3; | |
84 | } | |
85 | ||
86 | return dest_next; | |
87 | ||
88 | } | |
89 | ||
90 | /*************************************************************************** | |
91 | * WKdm_compress()---THE COMPRESSOR | |
92 | */ | |
93 | ||
94 | unsigned int | |
95 | WKdm_compress (WK_word* src_buf, | |
96 | WK_word* dest_buf, | |
97 | unsigned int num_input_words) | |
98 | { | |
99 | DictionaryElement dictionary[DICTIONARY_SIZE]; | |
100 | ||
101 | /* arrays that hold output data in intermediate form during modeling */ | |
102 | /* and whose contents are packed into the actual output after modeling */ | |
103 | ||
104 | /* sizes of these arrays should be increased if you want to compress | |
105 | * pages larger than 4KB | |
106 | */ | |
107 | WK_word tempTagsArray[300]; /* tags for everything */ | |
108 | WK_word tempQPosArray[300]; /* queue positions for matches */ | |
109 | WK_word tempLowBitsArray[1200]; /* low bits for partial matches */ | |
110 | ||
111 | /* boundary_tmp will be used for keeping track of what's where in | |
112 | * the compressed page during packing | |
113 | */ | |
114 | WK_word* boundary_tmp; | |
115 | ||
116 | /* Fill pointers for filling intermediate arrays (of queue positions | |
117 | * and low bits) during encoding. | |
118 | * Full words go straight to the destination buffer area reserved | |
119 | * for them. (Right after where the tags go.) | |
120 | */ | |
121 | WK_word* next_full_patt; | |
122 | char* next_tag = (char *) tempTagsArray; | |
123 | char* next_qp = (char *) tempQPosArray; | |
124 | WK_word* next_low_bits = tempLowBitsArray; | |
125 | ||
126 | WK_word* next_input_word = src_buf; | |
127 | WK_word* end_of_input = src_buf + num_input_words; | |
128 | ||
129 | PRELOAD_DICTIONARY; | |
130 | ||
131 | next_full_patt = dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16); | |
132 | ||
133 | #ifdef WK_DEBUG | |
134 | printf("\nIn WKdm_compress\n"); | |
135 | printf("About to actually compress, src_buf is %u\n", src_buf); | |
136 | printf("dictionary is at %u\n", dictionary); | |
137 | printf("dest_buf is %u next_full_patt is %u\n", dest_buf, next_full_patt); | |
138 | fflush(stdout); | |
139 | #endif | |
140 | ||
141 | while (next_input_word < end_of_input) | |
142 | { | |
143 | WK_word *dict_location; | |
144 | WK_word dict_word; | |
145 | WK_word input_word = *next_input_word; | |
146 | ||
147 | /* compute hash value, which is a byte offset into the dictionary, | |
148 | * and add it to the base address of the dictionary. Cast back and | |
149 | * forth to/from char * so no shifts are needed | |
150 | */ | |
151 | dict_location = | |
152 | (WK_word *) | |
153 | (((char*) dictionary) + HASH_TO_DICT_BYTE_OFFSET(input_word)); | |
154 | ||
155 | dict_word = *dict_location; | |
156 | ||
157 | if (input_word == dict_word) | |
158 | { | |
159 | RECORD_EXACT(dict_location - dictionary); | |
160 | } | |
161 | else if (input_word == 0) { | |
162 | RECORD_ZERO; | |
163 | } | |
164 | else | |
165 | { | |
166 | WK_word input_high_bits = HIGH_BITS(input_word); | |
167 | if (input_high_bits == HIGH_BITS(dict_word)) { | |
168 | RECORD_PARTIAL(dict_location - dictionary, LOW_BITS(input_word)); | |
169 | *dict_location = input_word; | |
170 | } | |
171 | else { | |
172 | RECORD_MISS(input_word); | |
173 | *dict_location = input_word; | |
174 | } | |
175 | } | |
176 | next_input_word++; | |
177 | } | |
178 | ||
179 | #ifdef WK_DEBUG | |
180 | printf("AFTER MODELING in WKdm_compress()\n"); fflush(stdout); | |
181 | printf("tempTagsArray holds %u bytes\n", | |
182 | next_tag - (char *) tempTagsArray); | |
183 | printf("tempQPosArray holds %u bytes\n", | |
184 | next_qp - (char *) tempQPosArray); | |
185 | printf("tempLowBitsArray holds %u bytes\n", | |
186 | (char *) next_low_bits - (char *) tempLowBitsArray); | |
187 | ||
188 | printf("next_full_patt is %u\n", | |
189 | (unsigned long) next_full_patt); | |
190 | ||
191 | printf(" i.e., there are %u full patterns\n", | |
192 | next_full_patt - (dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16))); | |
193 | fflush(stdout); | |
194 | ||
195 | { int i; | |
196 | WK_word *arr =(dest_buf + TAGS_AREA_OFFSET + (num_input_words / 16)); | |
197 | ||
198 | printf(" first 20 full patterns are: \n"); | |
199 | for (i = 0; i < 20; i++) { | |
200 | printf(" %d", arr[i]); | |
201 | } | |
202 | printf("\n"); | |
203 | } | |
204 | #endif | |
205 | ||
206 | /* Record (into the header) where we stopped writing full words, | |
207 | * which is where we will pack the queue positions. (Recall | |
208 | * that we wrote the full words directly into the dest buffer | |
209 | * during modeling. | |
210 | */ | |
211 | ||
212 | SET_QPOS_AREA_START(dest_buf,next_full_patt); | |
213 | ||
214 | /* Pack the tags into the tags area, between the page header | |
215 | * and the full words area. We don't pad for the packer | |
216 | * because we assume that the page size is a multiple of 16. | |
217 | */ | |
218 | ||
219 | #ifdef WK_DEBUG | |
220 | printf("about to pack %u bytes holding tags\n", | |
221 | next_tag - (char *) tempTagsArray); | |
222 | ||
223 | { int i; | |
224 | char* arr = (char *) tempTagsArray; | |
225 | ||
226 | printf(" first 200 tags are: \n"); | |
227 | for (i = 0; i < 200; i++) { | |
228 | printf(" %d", arr[i]); | |
229 | } | |
230 | printf("\n"); | |
231 | } | |
232 | #endif | |
233 | ||
234 | boundary_tmp = WK_pack_2bits(tempTagsArray, | |
235 | (WK_word *) next_tag, | |
236 | dest_buf + HEADER_SIZE_IN_WORDS); | |
237 | ||
238 | #ifdef WK_DEBUG | |
239 | printf("packing tags stopped at %u\n", boundary_tmp); | |
240 | #endif | |
241 | ||
242 | /* Pack the queue positions into the area just after | |
243 | * the full words. We have to round up the source | |
244 | * region to a multiple of two words. | |
245 | */ | |
246 | ||
247 | { | |
248 | unsigned int num_bytes_to_pack = next_qp - (char *) tempQPosArray; | |
249 | unsigned int num_packed_words = (num_bytes_to_pack + 7) >> 3; // ceil((double) num_bytes_to_pack / 8); | |
250 | unsigned int num_source_words = num_packed_words * 2; | |
251 | WK_word* endQPosArray = tempQPosArray + num_source_words; | |
252 | ||
253 | /* Pad out the array with zeros to avoid corrupting real packed | |
254 | values. */ | |
255 | for (; /* next_qp is already set as desired */ | |
256 | next_qp < (char*)endQPosArray; | |
257 | next_qp++) { | |
258 | *next_qp = 0; | |
259 | } | |
260 | ||
261 | #ifdef WK_DEBUG | |
262 | printf("about to pack %u (bytes holding) queue posns.\n", | |
263 | num_bytes_to_pack); | |
264 | printf("packing them from %u words into %u words\n", | |
265 | num_source_words, num_packed_words); | |
266 | printf("dest is range %u to %u\n", | |
267 | next_full_patt, next_full_patt + num_packed_words); | |
268 | { int i; | |
269 | char *arr = (char *) tempQPosArray; | |
270 | printf(" first 200 queue positions are: \n"); | |
271 | for (i = 0; i < 200; i++) { | |
272 | printf(" %d", arr[i]); | |
273 | } | |
274 | printf("\n"); | |
275 | } | |
276 | #endif | |
277 | ||
278 | boundary_tmp = WK_pack_4bits(tempQPosArray, | |
279 | endQPosArray, | |
280 | next_full_patt); | |
281 | #ifdef WK_DEBUG | |
282 | printf("Packing of queue positions stopped at %u\n", boundary_tmp); | |
283 | #endif WK_DEBUG | |
284 | ||
285 | /* Record (into the header) where we stopped packing queue positions, | |
286 | * which is where we will start packing low bits. | |
287 | */ | |
288 | SET_LOW_BITS_AREA_START(dest_buf,boundary_tmp); | |
289 | ||
290 | } | |
291 | ||
292 | /* Pack the low bit patterns into the area just after | |
293 | * the queue positions. We have to round up the source | |
294 | * region to a multiple of three words. | |
295 | */ | |
296 | ||
297 | { | |
298 | unsigned int num_tenbits_to_pack = | |
299 | next_low_bits - tempLowBitsArray; | |
300 | unsigned int num_packed_words = (num_tenbits_to_pack + 2) / 3; //ceil((double) num_tenbits_to_pack / 3); | |
301 | unsigned int num_source_words = num_packed_words * 3; | |
302 | WK_word* endLowBitsArray = tempLowBitsArray + num_source_words; | |
303 | ||
304 | /* Pad out the array with zeros to avoid corrupting real packed | |
305 | values. */ | |
306 | ||
307 | for (; /* next_low_bits is already set as desired */ | |
308 | next_low_bits < endLowBitsArray; | |
309 | next_low_bits++) { | |
310 | *next_low_bits = 0; | |
311 | } | |
312 | ||
313 | #ifdef WK_DEBUG | |
314 | printf("about to pack low bits\n"); | |
315 | printf("num_tenbits_to_pack is %u\n", num_tenbits_to_pack); | |
316 | printf("endLowBitsArray is %u\n", endLowBitsArray); | |
317 | #endif | |
318 | ||
319 | boundary_tmp = WK_pack_3_tenbits (tempLowBitsArray, | |
320 | endLowBitsArray, | |
321 | boundary_tmp); | |
322 | ||
323 | SET_LOW_BITS_AREA_END(dest_buf,boundary_tmp); | |
324 | ||
325 | } | |
326 | ||
327 | return ((char *) boundary_tmp - (char *) dest_buf); | |
328 | } |