+ cmpq next_input_word, checkpoint // checkpoint time?
+ jne L_scan_loop
+
+CHECKPOINT:
+
+ cmpq end_of_input, checkpoint // end of buffer or compression ratio check?
+ jne L_check_compression_ratio
+
+L_check_zero_page:
+ // check if any dictionary misses in page
+ cmpq start_next_full_patt, next_full_patt
+ jne L_check_single_value_page
+
+ cmpq start_next_qp, next_qp // check if any partial or exact dictionary matches
+ jne L_check_single_value_page
+
+ mov SV_RETURN, %rax // Magic return value
+ jmp L_done
+
+L_check_single_value_page:
+
+ movq next_full_patt, %rax // get # dictionary misses
+ subq start_next_full_patt, %rax
+ shrq $2, %rax
+
+ movq next_qp, %r11 // get # dictionary hits (exact + partial)
+ subq start_next_qp, %r11
+
+ movq next_low_bits, %r13 // get # dictionary partial hits
+ subq start_next_low_bits, %r13
+ shrq $1, %r13
+
+ movq tempTagsArray, %r14 // get the address of the first tag
+
+ // Single value page if one of the follwoing is true:
+ // partial == 0 AND hits == 1023 AND miss == 1 AND tag[0] == 2 (i.e. miss)
+ // partial == 1 AND hits == 1024 AND tag[0] == 1 (i.e. partial)
+ //
+ cmpq $0, %r13 // were there 0 partial hits?
+ jne 1f
+
+ cmpq $1023, %r11 // were there 1023 dictionary hits
+ jne 1f
+
+ cmpq $1, %rax // was there exacly 1 dictionary miss?
+ jne 1f
+
+ cmpb $2, 0(%r14) // was the very 1st tag a miss?
+ je L_is_single_value_page
+
+1:
+ cmpq $1, %r13 // was there 1 partial hit?
+ jne L_check_mostly_zero
+
+ cmpq $1024, %r11 // were there 1024 dictionary hits
+ jne L_check_mostly_zero
+
+ cmpb $1, 0(%r14) // was the very 1st tag a partial?
+ jne L_check_mostly_zero
+
+L_is_single_value_page:
+
+ mov SV_RETURN, %rax // Magic return value
+ jmp L_done
+
+L_check_mostly_zero:
+ // how much space will the sparse packer take?
+ addq %r11, %rax // rax += (next_qp - start_next_qp)
+ movq $6, %rdx
+ mulq %rdx // rax *= 6 (i.e. 4 byte word + 2 byte offset)
+ addq $4, %rax // rax += 4 byte for header
+ movq %rax, %r11
+ // how much space will the defaut packer take?
+ movq next_low_bits, %rax
+ subq start_next_low_bits, %rax // get bytes consumed by lower-10 bits
+ movq $1365, %rdx
+ mulq %rdx
+ shrq $11, %rax // rax = 2/3*(next_low_bits - start_next_low_bits)
+ movq next_full_patt, %rdx
+ subq start_next_full_patt, %rdx // get bytes consumed by dictionary misses
+ addq %rdx, %rax // rax += (next_full_patt - start_next_full_patt)
+ movq next_qp, %rdx
+ subq start_next_qp, %rdx
+ shrq $1, %rdx // get bytes consumed by dictionary hits
+ addq %rdx, %rax // rax += (next_qp - start_next_qp)/2
+ addq $(12+256), %rax // rax += bytes taken by the header + tags
+
+ cmpq %r11, %rax // is default packer the better option?
+ jb L_done_search
+
+ cmpl byte_budget, %r11d // can the sparse packer fit into the given budget?
+ ja L_budgetExhausted
+
+L_sparse_packer:
+
+ movl MZV_MAGIC, 0(dest_buf) // header to indicate a sparse packer
+ addq $4, dest_buf
+
+ movq $0, %rdx // rdx = byte offset in src of non-0 word
+ movq start_next_input_word, %r8
+1:
+ movq 0(%r8, %rdx), %rax // rax = read dword
+ testq %rax, %rax // is dword == 0
+ jne 5f
+3:
+ addq $8, %rdx // 8 more bytes have been processed
+4:
+ cmpq $4096, %rdx
+ jne 1b
+ movq %r11, %rax // store the size of the compressed stream
+ jmp L_done
+
+5:
+ testl %eax, %eax // is lower word == 0
+ je 6f
+ movl %eax, 0(dest_buf) // store the non-0 word in the dest buffer
+ mov %dx, 4(dest_buf) // store the byte index
+ addq $6, dest_buf
+6:
+ shrq $32, %rax // get the upper word into position
+ testl %eax, %eax // is upper word == 0
+ je 3b
+ addq $4, %rdx
+ movl %eax, 0(dest_buf) // store the word in the dest buffer
+ mov %dx, 4(dest_buf) // store the byte index
+ addq $6, dest_buf
+ addq $4, %rdx
+ jmp 4b
+
+L_check_compression_ratio:
+
+ movq end_of_input, checkpoint // checkpoint = end of buffer
+
+ movq next_low_bits, %rax
+ subq start_next_low_bits, %rax // get bytes consumed by lower-10 bits
+ movq $1365, %rdx
+ mulq %rdx
+ shrq $11, %rax // rax = 2/3*(next_low_bits - start_next_low_bits)
+
+ movq next_full_patt, %rdx
+ subq start_next_full_patt, %rdx // get bytes consumed by dictionary misses
+ addq %rdx, %rax // rax += (next_full_patt - start_next_full_patt)
+
+ movq next_qp, %rdx
+ subq start_next_qp, %rdx
+ shrq $1, %rdx
+ addq %rdx, %rax // rax += (next_qp - start_next_qp)/2
+
+ addq $CHKPT_TAG_BYTES, %rax // rax += bytes taken by the tags
+ cmpq $CHKPT_SHRUNK_BYTES, %rax
+ ja L_budgetExhausted // compressed size exceeds budget
+ jmp L_scan_loop