]>
Commit | Line | Data |
---|---|---|
3a60a9f5 A |
1 | /* direct-mapped partial matching compressor with simple 22/10 split |
2 | * | |
3 | * Compresses buffers using a dictionary based match and partial match | |
4 | * (high bits only or full match) scheme. | |
5 | * | |
6 | * Paul Wilson -- wilson@cs.utexas.edu | |
7 | * Scott F. Kaplan -- sfkaplan@cs.utexas.edu | |
8 | * September 1997 | |
9 | */ | |
10 | ||
11 | /* compressed output format, in memory order | |
12 | * 1. a four-word HEADER containing four one-word values: | |
13 | * i. a one-word code saying what algorithm compressed the data | |
14 | * ii. an integer WORD offset into the page saying | |
15 | * where the queue position area starts | |
16 | * iii. an integer WORD offset into the page saying where | |
17 | * the low-bits area starts | |
18 | * iv. an integer WORD offset into the page saying where the | |
19 | * low-bits area ends | |
20 | * | |
21 | * 2. a 64-word TAGS AREA holding one two-bit tag for each word in | |
22 | * the original (1024-word) page, packed 16 per word | |
23 | * | |
24 | * 3. a variable-sized FULL WORDS AREA (always word aligned and an | |
25 | * integral number of words) holding full-word patterns that | |
26 | * were not in the dictionary when encoded (i.e., dictionary misses) | |
27 | * | |
28 | * 4. a variable-sized QUEUE POSITIONS AREA (always word aligned and | |
29 | * an integral number of words) holding four-bit queue positions, | |
30 | * packed eight per word. | |
31 | * | |
32 | * 5. a variable-sized LOW BITS AREA (always word aligned and an | |
33 | * integral number of words) holding ten-bit low-bit patterns | |
34 | * (from partial matches), packed three per word. | |
35 | */ | |
36 | ||
37 | ||
38 | #ifdef __cplusplus | |
39 | extern "C" { | |
40 | #endif | |
41 | ||
42 | /* ============================================================ */ | |
43 | /* Included files */ | |
44 | ||
45 | //#include <stdio.h> | |
46 | //#include <unistd.h> | |
47 | //#include <math.h> | |
48 | //#include <strings.h> | |
49 | ||
50 | typedef unsigned long WK_word; | |
51 | ||
52 | /* at the moment we have dependencies on the page size. That should | |
53 | * be changed to work for any power-of-two size that's at least 16 | |
54 | * words, or something like that | |
55 | */ | |
56 | ||
57 | #define PAGE_SIZE_IN_WORDS 1024 | |
58 | #define PAGE_SIZE_IN_BYTES 4096 | |
59 | ||
60 | #define DICTIONARY_SIZE 16 | |
61 | ||
62 | /* | |
63 | * macros defining the basic layout of stuff in a page | |
64 | */ | |
65 | #define HEADER_SIZE_IN_WORDS 4 | |
66 | #define TAGS_AREA_OFFSET 4 | |
67 | #define TAGS_AREA_SIZE 64 | |
68 | ||
69 | /* the next few are used during compression to write the header */ | |
70 | #define SET_QPOS_AREA_START(compr_dest_buf,qpos_start_addr) \ | |
71 | (compr_dest_buf[1] = qpos_start_addr - compr_dest_buf) | |
72 | #define SET_LOW_BITS_AREA_START(compr_dest_buf,lb_start_addr) \ | |
73 | (compr_dest_buf[2] = lb_start_addr - compr_dest_buf) | |
74 | #define SET_LOW_BITS_AREA_END(compr_dest_buf,lb_end_addr) \ | |
75 | (compr_dest_buf[3] = lb_end_addr - compr_dest_buf) | |
76 | ||
77 | /* the next few are only use during decompression to read the header */ | |
78 | #define TAGS_AREA_START(decomp_src_buf) \ | |
79 | (decomp_src_buf + TAGS_AREA_OFFSET) | |
80 | #define TAGS_AREA_END(decomp_src_buf) \ | |
81 | (TAGS_AREA_START(decomp_src_buf) + TAGS_AREA_SIZE) | |
82 | #define FULL_WORD_AREA_START(the_buf) TAGS_AREA_END(the_buf) | |
83 | #define QPOS_AREA_START(decomp_src_buf) \ | |
84 | (decomp_src_buf + decomp_src_buf[1]) | |
85 | #define LOW_BITS_AREA_START(decomp_src_buf) \ | |
86 | (decomp_src_buf + (decomp_src_buf[2])) | |
87 | #define QPOS_AREA_END(the_buf) LOW_BITS_AREA_START(the_buf) | |
88 | #define LOW_BITS_AREA_END(decomp_src_buf) \ | |
89 | (decomp_src_buf + (decomp_src_buf[3])) | |
90 | ||
91 | /* ============================================================ */ | |
92 | /* Types and structures */ | |
93 | ||
94 | /* A structure to store each element of the dictionary. */ | |
95 | typedef WK_word DictionaryElement; | |
96 | ||
97 | /* ============================================================ */ | |
98 | /* Misc constants */ | |
99 | ||
100 | #define BITS_PER_WORD 32 | |
101 | #define BYTES_PER_WORD 4 | |
102 | #define NUM_LOW_BITS 10 | |
103 | #define LOW_BITS_MASK 0x3FF | |
104 | #define ALL_ONES_MASK 0xFFFFFFFF | |
105 | ||
106 | #define TWO_BITS_PACKING_MASK 0x03030303 | |
107 | #define FOUR_BITS_PACKING_MASK 0x0F0F0F0F | |
108 | #define TEN_LOW_BITS_MASK 0x000003FF | |
109 | #define TWENTY_TWO_HIGH_BITS_MASK 0xFFFFFC00 | |
110 | ||
111 | /* Tag values. NOTE THAT CODE MAY DEPEND ON THE NUMBERS USED. | |
112 | * Check for conditionals doing arithmetic on these things | |
113 | * before changing them | |
114 | */ | |
115 | #define ZERO_TAG 0x0 | |
116 | #define PARTIAL_TAG 0x1 | |
117 | #define MISS_TAG 0x2 | |
118 | #define EXACT_TAG 0x3 | |
119 | ||
120 | #define BITS_PER_BYTE 8 | |
121 | ||
122 | /* ============================================================ */ | |
123 | /* Global macros */ | |
124 | ||
125 | /* Shift out the low bits of a pattern to give the high bits pattern. | |
126 | The stripped patterns are used for initial tests of partial | |
127 | matches. */ | |
128 | #define HIGH_BITS(word_pattern) (word_pattern >> NUM_LOW_BITS) | |
129 | ||
130 | /* String the high bits of a pattern so the low order bits can | |
131 | be included in an encoding of a partial match. */ | |
132 | #define LOW_BITS(word_pattern) (word_pattern & LOW_BITS_MASK) | |
133 | ||
134 | #if defined DEBUG_WK | |
135 | #define DEBUG_PRINT_1(string) printf (string) | |
136 | #define DEBUG_PRINT_2(string,value) printf(string, value) | |
137 | #else | |
138 | #define DEBUG_PRINT_1(string) | |
139 | #define DEBUG_PRINT_2(string, value) | |
140 | #endif | |
141 | ||
142 | /* Set up the dictionary before performing compression or | |
143 | decompression. Each element is loaded with some value, the | |
144 | high-bits version of that value, and a next pointer. */ | |
145 | #define PRELOAD_DICTIONARY { \ | |
146 | dictionary[0] = 1; \ | |
147 | dictionary[1] = 1; \ | |
148 | dictionary[2] = 1; \ | |
149 | dictionary[3] = 1; \ | |
150 | dictionary[4] = 1; \ | |
151 | dictionary[5] = 1; \ | |
152 | dictionary[6] = 1; \ | |
153 | dictionary[7] = 1; \ | |
154 | dictionary[8] = 1; \ | |
155 | dictionary[9] = 1; \ | |
156 | dictionary[10] = 1; \ | |
157 | dictionary[11] = 1; \ | |
158 | dictionary[12] = 1; \ | |
159 | dictionary[13] = 1; \ | |
160 | dictionary[14] = 1; \ | |
161 | dictionary[15] = 1; \ | |
162 | } | |
163 | ||
164 | /* these are the constants for the hash function lookup table. | |
165 | * Only zero maps to zero. The rest of the tabale is the result | |
166 | * of appending 17 randomizations of the multiples of 4 from | |
167 | * 4 to 56. Generated by a Scheme script in hash.scm. | |
168 | */ | |
169 | #define HASH_LOOKUP_TABLE_CONTENTS { \ | |
170 | 0, 52, 8, 56, 16, 12, 28, 20, 4, 36, 48, 24, 44, 40, 32, 60, \ | |
171 | 8, 12, 28, 20, 4, 60, 16, 36, 24, 48, 44, 32, 52, 56, 40, 12, \ | |
172 | 8, 48, 16, 52, 60, 28, 56, 32, 20, 24, 36, 40, 44, 4, 8, 40, \ | |
173 | 60, 32, 20, 44, 4, 36, 52, 24, 16, 56, 48, 12, 28, 16, 8, 40, \ | |
174 | 36, 28, 32, 12, 4, 44, 52, 20, 24, 48, 60, 56, 40, 48, 8, 32, \ | |
175 | 28, 36, 4, 44, 20, 56, 60, 24, 52, 16, 12, 12, 4, 48, 20, 8, \ | |
176 | 52, 16, 60, 24, 36, 44, 28, 56, 40, 32, 36, 20, 24, 60, 40, 44, \ | |
177 | 52, 16, 32, 4, 48, 8, 28, 56, 12, 28, 32, 40, 52, 36, 16, 20, \ | |
178 | 48, 8, 4, 60, 24, 56, 44, 12, 8, 36, 24, 28, 16, 60, 20, 56, \ | |
179 | 32, 40, 48, 12, 4, 44, 52, 44, 40, 12, 56, 8, 36, 24, 60, 28, \ | |
180 | 48, 4, 32, 20, 16, 52, 60, 12, 24, 36, 8, 4, 16, 56, 48, 44, \ | |
181 | 40, 52, 32, 20, 28, 32, 12, 36, 28, 24, 56, 40, 16, 52, 44, 4, \ | |
182 | 20, 60, 8, 48, 48, 52, 12, 20, 32, 44, 36, 28, 4, 40, 24, 8, \ | |
183 | 56, 60, 16, 36, 32, 8, 40, 4, 52, 24, 44, 20, 12, 28, 48, 56, \ | |
184 | 16, 60, 4, 52, 60, 48, 20, 16, 56, 44, 24, 8, 40, 12, 32, 28, \ | |
185 | 36, 24, 32, 12, 4, 20, 16, 60, 36, 28, 8, 52, 40, 48, 44, 56 \ | |
186 | } | |
187 | ||
188 | #define HASH_TO_DICT_BYTE_OFFSET(pattern) \ | |
189 | (hashLookupTable[((pattern) >> 10) & 0xFF]) | |
190 | ||
191 | extern const char hashLookupTable[]; | |
192 | ||
193 | /* EMIT... macros emit bytes or words into the intermediate arrays | |
194 | */ | |
195 | ||
196 | #define EMIT_BYTE(fill_ptr, byte_value) {*fill_ptr = byte_value; fill_ptr++;} | |
197 | #define EMIT_WORD(fill_ptr,word_value) {*fill_ptr = word_value; fill_ptr++;} | |
198 | ||
199 | /* RECORD... macros record the results of modeling in the intermediate | |
200 | * arrays | |
201 | */ | |
202 | ||
203 | #define RECORD_ZERO { EMIT_BYTE(next_tag,ZERO_TAG); } | |
204 | ||
205 | #define RECORD_EXACT(queue_posn) EMIT_BYTE(next_tag,EXACT_TAG); \ | |
206 | EMIT_BYTE(next_qp,(queue_posn)); | |
207 | ||
208 | #define RECORD_PARTIAL(queue_posn,low_bits_pattern) { \ | |
209 | EMIT_BYTE(next_tag,PARTIAL_TAG); \ | |
210 | EMIT_BYTE(next_qp,(queue_posn)); \ | |
211 | EMIT_WORD(next_low_bits,(low_bits_pattern)) } | |
212 | ||
213 | #define RECORD_MISS(word_pattern) EMIT_BYTE(next_tag,MISS_TAG); \ | |
214 | EMIT_WORD(next_full_patt,(word_pattern)); | |
215 | ||
216 | void | |
217 | WKdm_decompress (WK_word* src_buf, | |
218 | WK_word* dest_buf, | |
219 | unsigned int words); | |
220 | unsigned int | |
221 | WKdm_compress (WK_word* src_buf, | |
222 | WK_word* dest_buf, | |
223 | unsigned int num_input_words); | |
224 | ||
225 | #ifdef __cplusplus | |
226 | } /* extern "C" */ | |
227 | #endif |