[apple/xnu.git] / libkern / kxld / WKdmDecompress.c

#include <sys/cdefs.h>
#include "WKdm.h"

/***************************************************************************
 *          THE UNPACKING ROUTINES should GO HERE
 */

const char hashLookupTable [] = HASH_LOOKUP_TABLE_CONTENTS;

#if 0
#define GET_NEXT_TAG tags[tagsIndex++]
#define GET_NEXT_FULL_PATTERN fullPatterns[fullPatternsIndex++]
#define GET_NEXT_LOW_BITS lowBits[lowBitsIndex++]
#define GET_NEXT_DICTIONARY_INDEX dictionaryIndices[dictionaryIndicesIndex++]
#endif

/*  WK_unpack_2bits takes any number of words containing 16 two-bit values
 *  and unpacks them into four times as many words containg those
 *  two bit values as bytes (with the low two bits of each byte holding
 *  the actual value.
 */
static WK_word*
WK_unpack_2bits(WK_word *input_buf,
                WK_word *input_end,
                WK_word *output_buf) {

  WK_word *input_next = input_buf;
  WK_word *output_next = output_buf;
  WK_word packing_mask = TWO_BITS_PACKING_MASK;

  /* loop to repeatedly grab one input word and unpack it into
   * 4 output words.  This loop could be unrolled a little---it's
   * designed to be easy to do that.
   */   
  while (input_next < input_end) {
    WK_word temp = input_next[0];
    DEBUG_PRINT_2("Unpacked tags word: %.8x\n", temp);
    output_next[0] = temp & packing_mask;
    output_next[1] = (temp >> 2) & packing_mask;
    output_next[2] = (temp >> 4) & packing_mask;
    output_next[3] = (temp >> 6) & packing_mask;
    
    output_next += 4;
    input_next++;
  }

  return output_next;

}

/* unpack four bits consumes any number of words (between input_buf
 * and input_end) holding 8 4-bit values per word, and unpacks them
 * into twice as many words, with each value in a separate byte.
 * (The four-bit values occupy the low halves of the bytes in the
 * result).
 */
static WK_word*
WK_unpack_4bits(WK_word *input_buf,
                WK_word *input_end,
                WK_word *output_buf) {

  WK_word *input_next = input_buf;
  WK_word *output_next = output_buf;
  WK_word packing_mask = FOUR_BITS_PACKING_MASK;
  
  
  /* loop to repeatedly grab one input word and unpack it into
   * 4 output words.  This loop should probably be unrolled
   * a little---it's designed to be easy to do that.
   */   
  while (input_next < input_end) {
    WK_word temp = input_next[0];
    DEBUG_PRINT_2("Unpacked dictionary indices word: %.8x\n", temp);
    output_next[0] = temp & packing_mask;
    output_next[1] = (temp >> 4) & packing_mask;
    
    output_next += 2;
    input_next++;
  }
  
  return output_next;

}

/* unpack_3_tenbits unpacks three 10-bit items from (the low 30 bits of)
 * a 32-bit word
 */
static WK_word*
WK_unpack_3_tenbits(WK_word *input_buf,
                    WK_word *input_end,
                    WK_word *output_buf) {

  WK_word *input_next = input_buf;
  WK_word *output_next = output_buf;
  WK_word packing_mask = LOW_BITS_MASK;
  
  /* loop to fetch words of input, splitting each into three
   * words of output with 10 meaningful low bits.  This loop
   * probably ought to be unrolled and maybe coiled
   */
  while (input_next < input_end) {
    WK_word temp = input_next[0];
    
    output_next[0] = temp & packing_mask;
    output_next[1] = (temp >> 10) & packing_mask;
    output_next[2] = temp >> 20;
    
    input_next++;
    output_next += 3;
  }
  
  return output_next;

}

/*********************************************************************
 * WKdm_decompress --- THE DECOMPRESSOR                                 
 * Expects WORD pointers to the source and destination buffers
 * and a page size in words.  The page size had better be 1024 unless     
 * somebody finds the places that are dependent on the page size and 
 * fixes them
 */

void
WKdm_decompress (WK_word* src_buf,
		 WK_word* dest_buf,
		 __unused unsigned int words)
{

  DictionaryElement dictionary[DICTIONARY_SIZE];

  /* arrays that hold output data in intermediate form during modeling */
  /* and whose contents are packed into the actual output after modeling */

  /* sizes of these arrays should be increased if you want to compress
   * pages larger than 4KB
   */
  WK_word tempTagsArray[300];        /* tags for everything          */
  WK_word tempQPosArray[300];        /* queue positions for matches  */
  WK_word tempLowBitsArray[1200];    /* low bits for partial matches */

  PRELOAD_DICTIONARY;

#ifdef WK_DEBUG
  printf("\nIn DECOMPRESSOR\n");
  printf("tempTagsArray is at %p\n", tempTagsArray);
  printf("tempQPosArray is at %p\n", tempQPosArray);
  printf("tempLowBitsArray is at %p\n", tempLowBitsArray);

  printf(" first four words of source buffer are:\n");
  printf("   %u\n   %u\n   %u\n   %u\n",
         src_buf[0], src_buf[1], src_buf[2], src_buf[3]);
  
  { int i;
    WK_word *arr =(src_buf + TAGS_AREA_OFFSET + (PAGE_SIZE_IN_WORDS / 16));

    printf("  first 20 full patterns are: \n");
    for (i = 0; i < 20; i++) {
      printf(" %d", arr[i]);
    }
    printf("\n");
  }
#endif

  WK_unpack_2bits(TAGS_AREA_START(src_buf),
                  TAGS_AREA_END(src_buf),
                  tempTagsArray);

#ifdef WK_DEBUG
  { int i;
    char* arr = (char *) tempTagsArray;

    printf("  first 200 tags are: \n");
    for (i = 0; i < 200; i++) {
      printf(" %d", arr[i]);
    }
    printf("\n");
  }
#endif

  WK_unpack_4bits(QPOS_AREA_START(src_buf),
                  QPOS_AREA_END(src_buf),
                  tempQPosArray);

#ifdef WK_DEBUG
  { int i;
    char* arr = (char *) tempQPosArray;

    printf("  first 200 queue positions are: \n");
    for (i = 0; i < 200; i++) {
      printf(" %d", arr[i]);
    }
    printf("\n");
  }
#endif

  WK_unpack_3_tenbits(LOW_BITS_AREA_START(src_buf),
                      LOW_BITS_AREA_END(src_buf),
                      tempLowBitsArray);

#ifdef WK_DEBUG
  printf("AFTER UNPACKING, about to enter main block \n");
#endif

  {
    char *next_tag = (char *) tempTagsArray;
    char *tags_area_end =
       ((char *) tempTagsArray) + PAGE_SIZE_IN_WORDS;
    char *next_q_pos = (char *) tempQPosArray;
    WK_word *next_low_bits = tempLowBitsArray;
    WK_word *next_full_word = FULL_WORD_AREA_START(src_buf);

    WK_word *next_output = dest_buf;

#ifdef WK_DEBUG
    printf("next_output is %u\n", next_output);

    printf("next_tag is %u \n", next_tag);
    printf("tags_area_end is %u\n", tags_area_end);
    printf("next_q_pos is %u\n", next_q_pos);
    printf("next_low_bits is %u\n", next_low_bits);
    printf("next_full_word is %u\n", next_full_word);
#endif 

    /* this loop should probably be unrolled. Maybe we should unpack
     * as 4 bit values, giving two consecutive tags, and switch on
     * that 16 ways to decompress 2 words at a whack
     */
    while (next_tag < tags_area_end) {

       char tag = next_tag[0];

       switch(tag) {

         case ZERO_TAG: {
            *next_output = 0;
            break;
         }
         case EXACT_TAG: {
            WK_word *dict_location = dictionary + *(next_q_pos++);
            /* no need to replace dict. entry if matched exactly */
            *next_output = *dict_location;
            break;
         }
         case PARTIAL_TAG: {
            WK_word *dict_location = dictionary + *(next_q_pos++);
            {
               WK_word temp = *dict_location;

               /* strip out low bits */
               temp = ((temp >> NUM_LOW_BITS) << NUM_LOW_BITS);

               /* add in stored low bits from temp array */
               temp = temp | *(next_low_bits++);

               *dict_location = temp;      /* replace old value in dict. */
               *next_output = temp;    /* and echo it to output */
            }
            break;
         }
         case MISS_TAG: {
            WK_word missed_word = *(next_full_word++);
            WK_word *dict_location = 
              (WK_word *)
              ((void *) (((char *) dictionary) + HASH_TO_DICT_BYTE_OFFSET(missed_word)));
            *dict_location = missed_word;
            *next_output = missed_word;
            break;
         }
       }
       next_tag++;
       next_output++;
    }

#ifdef WK_DEBUG        
    printf("AFTER DECOMPRESSING\n");
    printf("next_output is %p\n", next_output);
    printf("next_tag is %p\n", next_tag);
    printf("next_full_word is %p\n", next_full_word);
    printf("next_q_pos is %p\n", next_q_pos);
#endif
  }
}
Commit	Line	Data
3a60a9f5 A	1	#include <sys/cdefs.h>
	2	#include "WKdm.h"
	3
	4	/***************************************************************************
	5	* THE UNPACKING ROUTINES should GO HERE
	6	*/
	7
	8	const char hashLookupTable [] = HASH_LOOKUP_TABLE_CONTENTS;
	9
	10	#if 0
	11	#define GET_NEXT_TAG tags[tagsIndex++]
	12	#define GET_NEXT_FULL_PATTERN fullPatterns[fullPatternsIndex++]
	13	#define GET_NEXT_LOW_BITS lowBits[lowBitsIndex++]
	14	#define GET_NEXT_DICTIONARY_INDEX dictionaryIndices[dictionaryIndicesIndex++]
	15	#endif
	16
	17	/* WK_unpack_2bits takes any number of words containing 16 two-bit values
	18	* and unpacks them into four times as many words containg those
	19	* two bit values as bytes (with the low two bits of each byte holding
	20	* the actual value.
	21	*/
	22	static WK_word*
	23	WK_unpack_2bits(WK_word *input_buf,
	24	WK_word *input_end,
	25	WK_word *output_buf) {
	26
39037602 A	27	WK_word *input_next = input_buf;
	28	WK_word *output_next = output_buf;
	29	WK_word packing_mask = TWO_BITS_PACKING_MASK;
3a60a9f5 A	30
	31	/* loop to repeatedly grab one input word and unpack it into
	32	* 4 output words. This loop could be unrolled a little---it's
	33	* designed to be easy to do that.
	34	*/
	35	while (input_next < input_end) {
39037602	36	WK_word temp = input_next[0];
3a60a9f5 A	37	DEBUG_PRINT_2("Unpacked tags word: %.8x\n", temp);
	38	output_next[0] = temp & packing_mask;
	39	output_next[1] = (temp >> 2) & packing_mask;
	40	output_next[2] = (temp >> 4) & packing_mask;
	41	output_next[3] = (temp >> 6) & packing_mask;
	42
	43	output_next += 4;
	44	input_next++;
	45	}
	46
	47	return output_next;
	48
	49	}
	50
	51	/* unpack four bits consumes any number of words (between input_buf
	52	* and input_end) holding 8 4-bit values per word, and unpacks them
	53	* into twice as many words, with each value in a separate byte.
	54	* (The four-bit values occupy the low halves of the bytes in the
	55	* result).
	56	*/
	57	static WK_word*
	58	WK_unpack_4bits(WK_word *input_buf,
	59	WK_word *input_end,
	60	WK_word *output_buf) {
	61
39037602 A	62	WK_word *input_next = input_buf;
	63	WK_word *output_next = output_buf;
	64	WK_word packing_mask = FOUR_BITS_PACKING_MASK;
3a60a9f5 A	65
	66
	67	/* loop to repeatedly grab one input word and unpack it into
	68	* 4 output words. This loop should probably be unrolled
	69	* a little---it's designed to be easy to do that.
	70	*/
	71	while (input_next < input_end) {
39037602	72	WK_word temp = input_next[0];
3a60a9f5 A	73	DEBUG_PRINT_2("Unpacked dictionary indices word: %.8x\n", temp);
	74	output_next[0] = temp & packing_mask;
	75	output_next[1] = (temp >> 4) & packing_mask;
	76
	77	output_next += 2;
	78	input_next++;
	79	}
	80
	81	return output_next;
	82
	83	}
	84
	85	/* unpack_3_tenbits unpacks three 10-bit items from (the low 30 bits of)
	86	* a 32-bit word
	87	*/
	88	static WK_word*
	89	WK_unpack_3_tenbits(WK_word *input_buf,
	90	WK_word *input_end,
	91	WK_word *output_buf) {
	92
39037602 A	93	WK_word *input_next = input_buf;
	94	WK_word *output_next = output_buf;
	95	WK_word packing_mask = LOW_BITS_MASK;
3a60a9f5 A	96
	97	/* loop to fetch words of input, splitting each into three
	98	* words of output with 10 meaningful low bits. This loop
	99	* probably ought to be unrolled and maybe coiled
	100	*/
	101	while (input_next < input_end) {
39037602	102	WK_word temp = input_next[0];
3a60a9f5 A	103
	104	output_next[0] = temp & packing_mask;
	105	output_next[1] = (temp >> 10) & packing_mask;
	106	output_next[2] = temp >> 20;
	107
	108	input_next++;
	109	output_next += 3;
	110	}
	111
	112	return output_next;
	113
	114	}
	115
	116	/*********************************************************************
	117	* WKdm_decompress --- THE DECOMPRESSOR
	118	* Expects WORD pointers to the source and destination buffers
	119	* and a page size in words. The page size had better be 1024 unless
	120	* somebody finds the places that are dependent on the page size and
	121	* fixes them
	122	*/
	123
	124	void
	125	WKdm_decompress (WK_word* src_buf,
	126	WK_word* dest_buf,
	127	__unused unsigned int words)
	128	{
	129
	130	DictionaryElement dictionary[DICTIONARY_SIZE];
	131
	132	/* arrays that hold output data in intermediate form during modeling */
	133	/* and whose contents are packed into the actual output after modeling */
	134
	135	/* sizes of these arrays should be increased if you want to compress
	136	* pages larger than 4KB
	137	*/
	138	WK_word tempTagsArray[300]; /* tags for everything */
	139	WK_word tempQPosArray[300]; /* queue positions for matches */
	140	WK_word tempLowBitsArray[1200]; /* low bits for partial matches */
	141
	142	PRELOAD_DICTIONARY;
	143
	144	#ifdef WK_DEBUG
	145	printf("\nIn DECOMPRESSOR\n");
b0d623f7 A	146	printf("tempTagsArray is at %p\n", tempTagsArray);
	147	printf("tempQPosArray is at %p\n", tempQPosArray);
	148	printf("tempLowBitsArray is at %p\n", tempLowBitsArray);
3a60a9f5 A	149
	150	printf(" first four words of source buffer are:\n");
	151	printf(" %u\n %u\n %u\n %u\n",
	152	src_buf[0], src_buf[1], src_buf[2], src_buf[3]);
	153
	154	{ int i;
	155	WK_word *arr =(src_buf + TAGS_AREA_OFFSET + (PAGE_SIZE_IN_WORDS / 16));
	156
	157	printf(" first 20 full patterns are: \n");
	158	for (i = 0; i < 20; i++) {
	159	printf(" %d", arr[i]);
	160	}
	161	printf("\n");
	162	}
	163	#endif
	164
	165	WK_unpack_2bits(TAGS_AREA_START(src_buf),
	166	TAGS_AREA_END(src_buf),
	167	tempTagsArray);
	168
	169	#ifdef WK_DEBUG
	170	{ int i;
	171	char* arr = (char *) tempTagsArray;
	172
	173	printf(" first 200 tags are: \n");
	174	for (i = 0; i < 200; i++) {
	175	printf(" %d", arr[i]);
	176	}
	177	printf("\n");
	178	}
	179	#endif
	180
	181	WK_unpack_4bits(QPOS_AREA_START(src_buf),
	182	QPOS_AREA_END(src_buf),
	183	tempQPosArray);
	184
	185	#ifdef WK_DEBUG
	186	{ int i;
	187	char* arr = (char *) tempQPosArray;
	188
	189	printf(" first 200 queue positions are: \n");
	190	for (i = 0; i < 200; i++) {
	191	printf(" %d", arr[i]);
	192	}
	193	printf("\n");
	194	}
	195	#endif
	196
	197	WK_unpack_3_tenbits(LOW_BITS_AREA_START(src_buf),
	198	LOW_BITS_AREA_END(src_buf),
	199	tempLowBitsArray);
	200
	201	#ifdef WK_DEBUG
	202	printf("AFTER UNPACKING, about to enter main block \n");
	203	#endif
	204
	205	{
39037602	206	char next_tag = (char ) tempTagsArray;
3a60a9f5 A	207	char *tags_area_end =
	208	((char *) tempTagsArray) + PAGE_SIZE_IN_WORDS;
	209	char next_q_pos = (char ) tempQPosArray;
	210	WK_word *next_low_bits = tempLowBitsArray;
	211	WK_word *next_full_word = FULL_WORD_AREA_START(src_buf);
	212
	213	WK_word *next_output = dest_buf;
	214
	215	#ifdef WK_DEBUG
	216	printf("next_output is %u\n", next_output);
	217
	218	printf("next_tag is %u \n", next_tag);
	219	printf("tags_area_end is %u\n", tags_area_end);
	220	printf("next_q_pos is %u\n", next_q_pos);
	221	printf("next_low_bits is %u\n", next_low_bits);
	222	printf("next_full_word is %u\n", next_full_word);
	223	#endif
	224
	225	/* this loop should probably be unrolled. Maybe we should unpack
	226	* as 4 bit values, giving two consecutive tags, and switch on
	227	* that 16 ways to decompress 2 words at a whack
	228	*/
	229	while (next_tag < tags_area_end) {
	230
	231	char tag = next_tag[0];
	232
	233	switch(tag) {
	234
	235	case ZERO_TAG: {
	236	*next_output = 0;
	237	break;
	238	}
	239	case EXACT_TAG: {
	240	WK_word dict_location = dictionary + (next_q_pos++);
	241	/* no need to replace dict. entry if matched exactly */
	242	next_output = dict_location;
	243	break;
	244	}
	245	case PARTIAL_TAG: {
	246	WK_word dict_location = dictionary + (next_q_pos++);
	247	{
	248	WK_word temp = *dict_location;
	249
	250	/* strip out low bits */
	251	temp = ((temp >> NUM_LOW_BITS) << NUM_LOW_BITS);
	252
	253	/* add in stored low bits from temp array */
	254	temp = temp \| *(next_low_bits++);
	255
	256	dict_location = temp; / replace old value in dict. */
	257	next_output = temp; / and echo it to output */
	258	}
	259	break;
	260	}
	261	case MISS_TAG: {
	262	WK_word missed_word = *(next_full_word++);
	263	WK_word *dict_location =
	264	(WK_word *)
316670eb	265	((void ) (((char ) dictionary) + HASH_TO_DICT_BYTE_OFFSET(missed_word)));
3a60a9f5 A	266	*dict_location = missed_word;
	267	*next_output = missed_word;
	268	break;
	269	}
	270	}
	271	next_tag++;
	272	next_output++;
	273	}
	274
	275	#ifdef WK_DEBUG
	276	printf("AFTER DECOMPRESSING\n");
b0d623f7 A	277	printf("next_output is %p\n", next_output);
	278	printf("next_tag is %p\n", next_tag);
	279	printf("next_full_word is %p\n", next_full_word);
	280	printf("next_q_pos is %p\n", next_q_pos);
3a60a9f5 A	281	#endif
	282	}
	283	}