[apple/xnu.git] / libkern / zlib / adler32.c

/*
 * Copyright (c) 2008 Apple Inc. All rights reserved.
 *
 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
 * 
 * This file contains Original Code and/or Modifications of Original Code
 * as defined in and that are subject to the Apple Public Source License
 * Version 2.0 (the 'License'). You may not use this file except in
 * compliance with the License. The rights granted to you under the License
 * may not be used to create, or enable the creation or redistribution of,
 * unlawful or unlicensed copies of an Apple operating system, or to
 * circumvent, violate, or enable the circumvention or violation of, any
 * terms of an Apple operating system software license agreement.
 * 
 * Please obtain a copy of the License at
 * http://www.opensource.apple.com/apsl/ and read it before using this file.
 * 
 * The Original Code and all software distributed under the License are
 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
 * Please see the License for the specific language governing rights and
 * limitations under the License.
 * 
 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
 */
/* adler32.c -- compute the Adler-32 checksum of a data stream
 * Copyright (C) 1995-2004 Mark Adler
 * For conditions of distribution and use, see copyright notice in zlib.h
 */

/* @(#) $Id$ */


#define ZLIB_INTERNAL
#if KERNEL
    #include <libkern/zlib.h>
#else
    #include "zlib.h"
#endif /* KERNEL */

#if defined __x86_64__ || defined __i386__ || defined _ARM_ARCH_6
#include <stdint.h> // For uintptr_t.
    extern uLong adler32_vec(uLong adler, uLong sum2, const Bytef *buf, uInt len);
#endif

#define BASE 65521UL    /* largest prime smaller than 65536 */
#define NMAX 5552
/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */

#define DO1(buf,i)  {adler += (buf)[i]; sum2 += adler;}
#define DO2(buf,i)  DO1(buf,i); DO1(buf,i+1);
#define DO4(buf,i)  DO2(buf,i); DO2(buf,i+2);
#define DO8(buf,i)  DO4(buf,i); DO4(buf,i+4);
#define DO16(buf)   DO8(buf,0); DO8(buf,8);

/* use NO_DIVIDE if your processor does not do division in hardware */
#ifdef NO_DIVIDE
#  define MOD(a) \
    do { \
        if (a >= (BASE << 16)) a -= (BASE << 16); \
        if (a >= (BASE << 15)) a -= (BASE << 15); \
        if (a >= (BASE << 14)) a -= (BASE << 14); \
        if (a >= (BASE << 13)) a -= (BASE << 13); \
        if (a >= (BASE << 12)) a -= (BASE << 12); \
        if (a >= (BASE << 11)) a -= (BASE << 11); \
        if (a >= (BASE << 10)) a -= (BASE << 10); \
        if (a >= (BASE << 9)) a -= (BASE << 9); \
        if (a >= (BASE << 8)) a -= (BASE << 8); \
        if (a >= (BASE << 7)) a -= (BASE << 7); \
        if (a >= (BASE << 6)) a -= (BASE << 6); \
        if (a >= (BASE << 5)) a -= (BASE << 5); \
        if (a >= (BASE << 4)) a -= (BASE << 4); \
        if (a >= (BASE << 3)) a -= (BASE << 3); \
        if (a >= (BASE << 2)) a -= (BASE << 2); \
        if (a >= (BASE << 1)) a -= (BASE << 1); \
        if (a >= BASE) a -= BASE; \
    } while (0)
#  define MOD4(a) \
    do { \
        if (a >= (BASE << 4)) a -= (BASE << 4); \
        if (a >= (BASE << 3)) a -= (BASE << 3); \
        if (a >= (BASE << 2)) a -= (BASE << 2); \
        if (a >= (BASE << 1)) a -= (BASE << 1); \
        if (a >= BASE) a -= BASE; \
    } while (0)
#else
#  define MOD(a) a %= BASE
#  define MOD4(a) a %= BASE
#endif

/* ========================================================================= */
uLong ZEXPORT adler32(adler, buf, len)
    uLong adler;
    const Bytef *buf;
    uInt len;
{
    unsigned long sum2;
    unsigned n;

    /* split Adler-32 into component sums */
    sum2 = (adler >> 16) & 0xffff;
    adler &= 0xffff;

    /* in case user likes doing a byte at a time, keep it fast */
    if (len == 1) {
        adler += buf[0];
        if (adler >= BASE)
            adler -= BASE;
        sum2 += adler;
        if (sum2 >= BASE)
            sum2 -= BASE;
        return adler | (sum2 << 16);
    }

    /* initial Adler-32 value (deferred check for len == 1 speed) */
    if (buf == Z_NULL)
        return 1L;

    /* in case short lengths are provided, keep it somewhat fast */
    if (len < 16) {
        while (len--) {
            adler += *buf++;
            sum2 += adler;
        }
        if (adler >= BASE)
            adler -= BASE;
        MOD4(sum2);             /* only added so many BASE's */
        return adler | (sum2 << 16);
    }

#if defined __x86_64__ || defined __i386__ || defined _ARM_ARCH_6

	if (len>=32000) {	/* use vector code only if len is sufficiently large to compensate registers save/restore */
	/* align buf to 16-byte boundary */
    while (((uintptr_t)buf)&15) { /* not on a 16-byte boundary */
        len--;
        adler += *buf++;
        sum2 += adler;
        if (adler >= BASE) adler -= BASE;
        MOD4(sum2);             /* only added so many BASE's */
    }

    return adler32_vec(adler, sum2, buf, len);      // x86_64 or i386 (up to SSE3) or armv6 or up
	}

#endif	// defined __x86_64__ || defined __i386__ || defined _ARM_ARCH_6

    /* do length NMAX blocks -- requires just one modulo operation */
    while (len >= NMAX) {
        len -= NMAX;
        n = NMAX / 16;          /* NMAX is divisible by 16 */
        do {
            DO16(buf);          /* 16 sums unrolled */
            buf += 16;
        } while (--n);
        MOD(adler);
        MOD(sum2);
    }

    /* do remaining bytes (less than NMAX, still just one modulo) */
    if (len) {                  /* avoid modulos if none remaining */
        while (len >= 16) {
            len -= 16;
            DO16(buf);
            buf += 16;
        }
        while (len--) {
            adler += *buf++;
            sum2 += adler;
        }
        MOD(adler);
        MOD(sum2);
    }

    /* return recombined sums */
    return adler | (sum2 << 16);
}

/* ========================================================================= */
uLong ZEXPORT adler32_combine(adler1, adler2, len2)
    uLong adler1;
    uLong adler2;
    z_off_t len2;
{
    unsigned long sum1;
    unsigned long sum2;
    unsigned rem;

    /* the derivation of this formula is left as an exercise for the reader */
    rem = (unsigned)(len2 % BASE);
    sum1 = adler1 & 0xffff;
    sum2 = rem * sum1;
    MOD(sum2);
    sum1 += (adler2 & 0xffff) + BASE - 1;
    sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
    if (sum1 > BASE) sum1 -= BASE;
    if (sum1 > BASE) sum1 -= BASE;
    if (sum2 > (BASE << 1)) sum2 -= (BASE << 1);
    if (sum2 > BASE) sum2 -= BASE;
    return sum1 | (sum2 << 16);
}
Commit	Line	Data
b0d623f7 A	1	/*
	2	* Copyright (c) 2008 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/* adler32.c -- compute the Adler-32 checksum of a data stream
	29	* Copyright (C) 1995-2004 Mark Adler
	30	* For conditions of distribution and use, see copyright notice in zlib.h
	31	*/
	32
	33	/* @(#) $Id$ */
	34
b7266188	35
b0d623f7 A	36	#define ZLIB_INTERNAL
	37	#if KERNEL
	38	#include <libkern/zlib.h>
	39	#else
	40	#include "zlib.h"
	41	#endif /* KERNEL */
	42
6d2010ae A	43	#if defined __x86_64__ \|\| defined __i386__ \|\| defined _ARM_ARCH_6
	44	#include <stdint.h> // For uintptr_t.
	45	extern uLong adler32_vec(uLong adler, uLong sum2, const Bytef *buf, uInt len);
b7266188 A	46	#endif
b7266188 A	47
b0d623f7 A	48	#define BASE 65521UL /* largest prime smaller than 65536 */
	49	#define NMAX 5552
	50	/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
	51
	52	#define DO1(buf,i) {adler += (buf)[i]; sum2 += adler;}
	53	#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1);
	54	#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2);
	55	#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4);
	56	#define DO16(buf) DO8(buf,0); DO8(buf,8);
	57
	58	/* use NO_DIVIDE if your processor does not do division in hardware */
	59	#ifdef NO_DIVIDE
	60	# define MOD(a) \
	61	do { \
	62	if (a >= (BASE << 16)) a -= (BASE << 16); \
	63	if (a >= (BASE << 15)) a -= (BASE << 15); \
	64	if (a >= (BASE << 14)) a -= (BASE << 14); \
	65	if (a >= (BASE << 13)) a -= (BASE << 13); \
	66	if (a >= (BASE << 12)) a -= (BASE << 12); \
	67	if (a >= (BASE << 11)) a -= (BASE << 11); \
	68	if (a >= (BASE << 10)) a -= (BASE << 10); \
	69	if (a >= (BASE << 9)) a -= (BASE << 9); \
	70	if (a >= (BASE << 8)) a -= (BASE << 8); \
	71	if (a >= (BASE << 7)) a -= (BASE << 7); \
	72	if (a >= (BASE << 6)) a -= (BASE << 6); \
	73	if (a >= (BASE << 5)) a -= (BASE << 5); \
	74	if (a >= (BASE << 4)) a -= (BASE << 4); \
	75	if (a >= (BASE << 3)) a -= (BASE << 3); \
	76	if (a >= (BASE << 2)) a -= (BASE << 2); \
	77	if (a >= (BASE << 1)) a -= (BASE << 1); \
	78	if (a >= BASE) a -= BASE; \
	79	} while (0)
	80	# define MOD4(a) \
	81	do { \
	82	if (a >= (BASE << 4)) a -= (BASE << 4); \
	83	if (a >= (BASE << 3)) a -= (BASE << 3); \
	84	if (a >= (BASE << 2)) a -= (BASE << 2); \
	85	if (a >= (BASE << 1)) a -= (BASE << 1); \
	86	if (a >= BASE) a -= BASE; \
	87	} while (0)
	88	#else
	89	# define MOD(a) a %= BASE
	90	# define MOD4(a) a %= BASE
	91	#endif
	92
	93	/* ========================================================================= */
	94	uLong ZEXPORT adler32(adler, buf, len)
	95	uLong adler;
	96	const Bytef *buf;
	97	uInt len;
	98	{
	99	unsigned long sum2;
	100	unsigned n;
	101
	102	/* split Adler-32 into component sums */
	103	sum2 = (adler >> 16) & 0xffff;
	104	adler &= 0xffff;
	105
	106	/* in case user likes doing a byte at a time, keep it fast */
	107	if (len == 1) {
	108	adler += buf[0];
	109	if (adler >= BASE)
	110	adler -= BASE;
	111	sum2 += adler;
112	if (sum2 >= BASE)
113	sum2 -= BASE;
114	return adler \| (sum2 << 16);
115	}
116
117	/* initial Adler-32 value (deferred check for len == 1 speed) */
118	if (buf == Z_NULL)
119	return 1L;
120
121	/* in case short lengths are provided, keep it somewhat fast */
122	if (len < 16) {
123	while (len--) {
124	adler += *buf++;
125	sum2 += adler;
126	}
127	if (adler >= BASE)
128	adler -= BASE;
129	MOD4(sum2); /* only added so many BASE's */
130	return adler \| (sum2 << 16);
131	}
132
6d2010ae A	133	#if defined __x86_64__ \|\| defined __i386__ \|\| defined _ARM_ARCH_6
	134
	135	if (len>=32000) { /* use vector code only if len is sufficiently large to compensate registers save/restore */
	136	/* align buf to 16-byte boundary */
b7266188 A	137	while (((uintptr_t)buf)&15) { /* not on a 16-byte boundary */
	138	len--;
	139	adler += *buf++;
	140	sum2 += adler;
	141	if (adler >= BASE) adler -= BASE;
	142	MOD4(sum2); /* only added so many BASE's */
	143	}
	144
6d2010ae A	145	return adler32_vec(adler, sum2, buf, len); // x86_64 or i386 (up to SSE3) or armv6 or up
6d2010ae A	146	}
b7266188	147
6d2010ae	148	#endif // defined __x86_64__ \|\| defined __i386__ \|\| defined _ARM_ARCH_6
b7266188	149
b0d623f7 A	150	/* do length NMAX blocks -- requires just one modulo operation */
	151	while (len >= NMAX) {
	152	len -= NMAX;
	153	n = NMAX / 16; /* NMAX is divisible by 16 */
	154	do {
	155	DO16(buf); /* 16 sums unrolled */
	156	buf += 16;
	157	} while (--n);
	158	MOD(adler);
	159	MOD(sum2);
	160	}
	161
	162	/* do remaining bytes (less than NMAX, still just one modulo) */
	163	if (len) { /* avoid modulos if none remaining */
	164	while (len >= 16) {
	165	len -= 16;
	166	DO16(buf);
	167	buf += 16;
	168	}
	169	while (len--) {
	170	adler += *buf++;
	171	sum2 += adler;
	172	}
	173	MOD(adler);
	174	MOD(sum2);
	175	}
	176
	177	/* return recombined sums */
	178	return adler \| (sum2 << 16);
	179	}
	180
	181	/* ========================================================================= */
	182	uLong ZEXPORT adler32_combine(adler1, adler2, len2)
	183	uLong adler1;
	184	uLong adler2;
	185	z_off_t len2;
	186	{
	187	unsigned long sum1;
	188	unsigned long sum2;
	189	unsigned rem;
	190
	191	/* the derivation of this formula is left as an exercise for the reader */
	192	rem = (unsigned)(len2 % BASE);
	193	sum1 = adler1 & 0xffff;
	194	sum2 = rem * sum1;
	195	MOD(sum2);
	196	sum1 += (adler2 & 0xffff) + BASE - 1;
	197	sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
	198	if (sum1 > BASE) sum1 -= BASE;
	199	if (sum1 > BASE) sum1 -= BASE;
	200	if (sum2 > (BASE << 1)) sum2 -= (BASE << 1);
	201	if (sum2 > BASE) sum2 -= BASE;
	202	return sum1 \| (sum2 << 16);
	203	}