1 /* direct-mapped partial matching compressor with simple 22/10 split
3 * Compresses buffers using a dictionary based match and partial match
4 * (high bits only or full match) scheme.
6 * Paul Wilson -- wilson@cs.utexas.edu
7 * Scott F. Kaplan -- sfkaplan@cs.utexas.edu
11 /* compressed output format, in memory order
12 * 1. a four-word HEADER containing four one-word values:
13 * i. a one-word code saying what algorithm compressed the data
14 * ii. an integer WORD offset into the page saying
15 * where the queue position area starts
16 * iii. an integer WORD offset into the page saying where
17 * the low-bits area starts
18 * iv. an integer WORD offset into the page saying where the
21 * 2. a 64-word TAGS AREA holding one two-bit tag for each word in
22 * the original (1024-word) page, packed 16 per word
24 * 3. a variable-sized FULL WORDS AREA (always word aligned and an
25 * integral number of words) holding full-word patterns that
26 * were not in the dictionary when encoded (i.e., dictionary misses)
28 * 4. a variable-sized QUEUE POSITIONS AREA (always word aligned and
29 * an integral number of words) holding four-bit queue positions,
30 * packed eight per word.
32 * 5. a variable-sized LOW BITS AREA (always word aligned and an
33 * integral number of words) holding ten-bit low-bit patterns
34 * (from partial matches), packed three per word.
42 /* ============================================================ */
48 //#include <strings.h>
50 typedef unsigned long WK_word
;
52 /* at the moment we have dependencies on the page size. That should
53 * be changed to work for any power-of-two size that's at least 16
54 * words, or something like that
57 #define PAGE_SIZE_IN_WORDS 1024
58 #define PAGE_SIZE_IN_BYTES 4096
60 #define DICTIONARY_SIZE 16
63 * macros defining the basic layout of stuff in a page
65 #define HEADER_SIZE_IN_WORDS 4
66 #define TAGS_AREA_OFFSET 4
67 #define TAGS_AREA_SIZE 64
69 /* the next few are used during compression to write the header */
70 #define SET_QPOS_AREA_START(compr_dest_buf,qpos_start_addr) \
71 (compr_dest_buf[1] = qpos_start_addr - compr_dest_buf)
72 #define SET_LOW_BITS_AREA_START(compr_dest_buf,lb_start_addr) \
73 (compr_dest_buf[2] = lb_start_addr - compr_dest_buf)
74 #define SET_LOW_BITS_AREA_END(compr_dest_buf,lb_end_addr) \
75 (compr_dest_buf[3] = lb_end_addr - compr_dest_buf)
77 /* the next few are only use during decompression to read the header */
78 #define TAGS_AREA_START(decomp_src_buf) \
79 (decomp_src_buf + TAGS_AREA_OFFSET)
80 #define TAGS_AREA_END(decomp_src_buf) \
81 (TAGS_AREA_START(decomp_src_buf) + TAGS_AREA_SIZE)
82 #define FULL_WORD_AREA_START(the_buf) TAGS_AREA_END(the_buf)
83 #define QPOS_AREA_START(decomp_src_buf) \
84 (decomp_src_buf + decomp_src_buf[1])
85 #define LOW_BITS_AREA_START(decomp_src_buf) \
86 (decomp_src_buf + (decomp_src_buf[2]))
87 #define QPOS_AREA_END(the_buf) LOW_BITS_AREA_START(the_buf)
88 #define LOW_BITS_AREA_END(decomp_src_buf) \
89 (decomp_src_buf + (decomp_src_buf[3]))
91 /* ============================================================ */
92 /* Types and structures */
94 /* A structure to store each element of the dictionary. */
95 typedef WK_word DictionaryElement
;
97 /* ============================================================ */
100 #define BITS_PER_WORD 32
101 #define BYTES_PER_WORD 4
102 #define NUM_LOW_BITS 10
103 #define LOW_BITS_MASK 0x3FF
104 #define ALL_ONES_MASK 0xFFFFFFFF
106 #define TWO_BITS_PACKING_MASK 0x03030303
107 #define FOUR_BITS_PACKING_MASK 0x0F0F0F0F
108 #define TEN_LOW_BITS_MASK 0x000003FF
109 #define TWENTY_TWO_HIGH_BITS_MASK 0xFFFFFC00
111 /* Tag values. NOTE THAT CODE MAY DEPEND ON THE NUMBERS USED.
112 * Check for conditionals doing arithmetic on these things
113 * before changing them
116 #define PARTIAL_TAG 0x1
118 #define EXACT_TAG 0x3
120 #define BITS_PER_BYTE 8
122 /* ============================================================ */
125 /* Shift out the low bits of a pattern to give the high bits pattern.
126 The stripped patterns are used for initial tests of partial
128 #define HIGH_BITS(word_pattern) (word_pattern >> NUM_LOW_BITS)
130 /* String the high bits of a pattern so the low order bits can
131 be included in an encoding of a partial match. */
132 #define LOW_BITS(word_pattern) (word_pattern & LOW_BITS_MASK)
135 #define DEBUG_PRINT_1(string) printf (string)
136 #define DEBUG_PRINT_2(string,value) printf(string, value)
138 #define DEBUG_PRINT_1(string)
139 #define DEBUG_PRINT_2(string, value)
142 /* Set up the dictionary before performing compression or
143 decompression. Each element is loaded with some value, the
144 high-bits version of that value, and a next pointer. */
145 #define PRELOAD_DICTIONARY { \
156 dictionary[10] = 1; \
157 dictionary[11] = 1; \
158 dictionary[12] = 1; \
159 dictionary[13] = 1; \
160 dictionary[14] = 1; \
161 dictionary[15] = 1; \
164 /* these are the constants for the hash function lookup table.
165 * Only zero maps to zero. The rest of the tabale is the result
166 * of appending 17 randomizations of the multiples of 4 from
167 * 4 to 56. Generated by a Scheme script in hash.scm.
169 #define HASH_LOOKUP_TABLE_CONTENTS { \
170 0, 52, 8, 56, 16, 12, 28, 20, 4, 36, 48, 24, 44, 40, 32, 60, \
171 8, 12, 28, 20, 4, 60, 16, 36, 24, 48, 44, 32, 52, 56, 40, 12, \
172 8, 48, 16, 52, 60, 28, 56, 32, 20, 24, 36, 40, 44, 4, 8, 40, \
173 60, 32, 20, 44, 4, 36, 52, 24, 16, 56, 48, 12, 28, 16, 8, 40, \
174 36, 28, 32, 12, 4, 44, 52, 20, 24, 48, 60, 56, 40, 48, 8, 32, \
175 28, 36, 4, 44, 20, 56, 60, 24, 52, 16, 12, 12, 4, 48, 20, 8, \
176 52, 16, 60, 24, 36, 44, 28, 56, 40, 32, 36, 20, 24, 60, 40, 44, \
177 52, 16, 32, 4, 48, 8, 28, 56, 12, 28, 32, 40, 52, 36, 16, 20, \
178 48, 8, 4, 60, 24, 56, 44, 12, 8, 36, 24, 28, 16, 60, 20, 56, \
179 32, 40, 48, 12, 4, 44, 52, 44, 40, 12, 56, 8, 36, 24, 60, 28, \
180 48, 4, 32, 20, 16, 52, 60, 12, 24, 36, 8, 4, 16, 56, 48, 44, \
181 40, 52, 32, 20, 28, 32, 12, 36, 28, 24, 56, 40, 16, 52, 44, 4, \
182 20, 60, 8, 48, 48, 52, 12, 20, 32, 44, 36, 28, 4, 40, 24, 8, \
183 56, 60, 16, 36, 32, 8, 40, 4, 52, 24, 44, 20, 12, 28, 48, 56, \
184 16, 60, 4, 52, 60, 48, 20, 16, 56, 44, 24, 8, 40, 12, 32, 28, \
185 36, 24, 32, 12, 4, 20, 16, 60, 36, 28, 8, 52, 40, 48, 44, 56 \
188 #define HASH_TO_DICT_BYTE_OFFSET(pattern) \
189 (hashLookupTable[((pattern) >> 10) & 0xFF])
191 extern const char hashLookupTable
[];
193 /* EMIT... macros emit bytes or words into the intermediate arrays
196 #define EMIT_BYTE(fill_ptr, byte_value) {*fill_ptr = byte_value; fill_ptr++;}
197 #define EMIT_WORD(fill_ptr,word_value) {*fill_ptr = word_value; fill_ptr++;}
199 /* RECORD... macros record the results of modeling in the intermediate
203 #define RECORD_ZERO { EMIT_BYTE(next_tag,ZERO_TAG); }
205 #define RECORD_EXACT(queue_posn) EMIT_BYTE(next_tag,EXACT_TAG); \
206 EMIT_BYTE(next_qp,(queue_posn));
208 #define RECORD_PARTIAL(queue_posn,low_bits_pattern) { \
209 EMIT_BYTE(next_tag,PARTIAL_TAG); \
210 EMIT_BYTE(next_qp,(queue_posn)); \
211 EMIT_WORD(next_low_bits,(low_bits_pattern)) }
213 #define RECORD_MISS(word_pattern) EMIT_BYTE(next_tag,MISS_TAG); \
214 EMIT_WORD(next_full_patt,(word_pattern));
217 WKdm_decompress (WK_word
* src_buf
,
221 WKdm_compress (WK_word
* src_buf
,
223 unsigned int num_input_words
);