bsd/netinet/in_cksum.c

   1 /*
   2  * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1988, 1992, 1993
  30  *      The Regents of the University of California.  All rights reserved.
  31  *
  32  * Redistribution and use in source and binary forms, with or without
  33  * modification, are permitted provided that the following conditions
  34  * are met:
  35  * 1. Redistributions of source code must retain the above copyright
  36  *    notice, this list of conditions and the following disclaimer.
  37  * 2. Redistributions in binary form must reproduce the above copyright
  38  *    notice, this list of conditions and the following disclaimer in the
  39  *    documentation and/or other materials provided with the distribution.
  40  * 3. All advertising materials mentioning features or use of this software
  41  *    must display the following acknowledgement:
  42  *      This product includes software developed by the University of
  43  *      California, Berkeley and its contributors.
  44  * 4. Neither the name of the University nor the names of its contributors
  45  *    may be used to endorse or promote products derived from this software
  46  *    without specific prior written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58  * SUCH DAMAGE.
  59  *
  60  *      @(#)in_cksum.c  8.1 (Berkeley) 6/10/93
  61  */
  62
  63 #include <sys/param.h>
  64 #include <machine/endian.h>
  65 #include <sys/mbuf.h>
  66 #include <kern/debug.h>
  67 #include <net/dlil.h>
  68 #include <netinet/in.h>
  69 #define _IP_VHL
  70 #include <netinet/ip.h>
  71 #include <netinet/ip_var.h>
  72
  73 /*
  74  * Checksum routine for Internet Protocol family headers (Portable Version).
  75  *
  76  * This routine is very heavily used in the network
  77  * code and should be modified for each CPU to be as fast as possible.
  78  */
  79 #define REDUCE16 {                                                        \
  80         q_util.q = sum;                                                   \
  81         l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
  82         sum = l_util.s[0] + l_util.s[1];                                  \
  83         ADDCARRY(sum);                                                    \
  84 }
  85
  86 union l_util {
  87         uint16_t s[2];
  88         uint32_t l;
  89 };
  90
  91 union q_util {
  92         uint16_t s[4];
  93         uint32_t l[2];
  94         uint64_t q;
  95 };
  96
  97 extern uint32_t os_cpu_in_cksum(const void *, uint32_t, uint32_t);
  98
  99 /*
 100  * Perform 16-bit 1's complement sum on a contiguous span.
 101  */
 102 uint16_t
 103 b_sum16(const void *buf, int len)
 104 {
 105         return (os_cpu_in_cksum(buf, len, 0));
 106 }
 107
 108 uint16_t inet_cksum_simple(struct mbuf *, int);
 109 /*
 110  * For the exported _in_cksum symbol in BSDKernel symbol set.
 111  */
 112 uint16_t
 113 inet_cksum_simple(struct mbuf *m, int len)
 114 {
 115         return (inet_cksum(m, 0, 0, len));
 116 }
 117
 118 uint16_t
 119 in_addword(uint16_t a, uint16_t b)
 120 {
 121         uint64_t sum = a + b;
 122
 123         ADDCARRY(sum);
 124         return (sum);
 125 }
 126
 127 uint16_t
 128 in_pseudo(uint32_t a, uint32_t b, uint32_t c)
 129 {
 130         uint64_t sum;
 131         union q_util q_util;
 132         union l_util l_util;
 133
 134         sum = (uint64_t)a + b + c;
 135         REDUCE16;
 136         return (sum);
 137 }
 138
 139 uint16_t
 140 in_pseudo64(uint64_t a, uint64_t b, uint64_t c)
 141 {
 142         uint64_t sum;
 143         union q_util q_util;
 144         union l_util l_util;
 145
 146         sum = a + b + c;
 147         REDUCE16;
 148         return (sum);
 149 }
 150
 151 /*
 152  * May be used on IP header with options.
 153  */
 154 uint16_t
 155 in_cksum_hdr_opt(const struct ip *ip)
 156 {
 157         return (~b_sum16(ip, (IP_VHL_HL(ip->ip_vhl) << 2)) & 0xffff);
 158 }
 159
 160 /*
 161  * A wrapper around the simple in_cksum_hdr() and the more complicated
 162  * inet_cksum(); the former is chosen if the IP header is simple,
 163  * contiguous and 32-bit aligned.  Also does some stats accounting.
 164  */
 165 uint16_t
 166 ip_cksum_hdr_dir(struct mbuf *m, uint32_t hlen, int out)
 167 {
 168         struct ip *ip = mtod(m, struct ip *);
 169
 170         if (out) {
 171                 ipstat.ips_snd_swcsum++;
 172                 ipstat.ips_snd_swcsum_bytes += hlen;
 173         } else {
 174                 ipstat.ips_rcv_swcsum++;
 175                 ipstat.ips_rcv_swcsum_bytes += hlen;
 176         }
 177
 178         if (hlen == sizeof (*ip) &&
 179             m->m_len >= sizeof (*ip) && IP_HDR_ALIGNED_P(ip))
 180                 return (in_cksum_hdr(ip));
 181
 182         return (inet_cksum(m, 0, 0, hlen));
 183 }
 184
 185 uint16_t
 186 ip_cksum_hdr_dir_buffer(const void *buffer, uint32_t hlen, uint32_t len,
 187     int out)
 188 {
 189         const struct ip *ip = buffer;
 190
 191         if (out) {
 192                 ipstat.ips_snd_swcsum++;
 193                 ipstat.ips_snd_swcsum_bytes += hlen;
 194         } else {
 195                 ipstat.ips_rcv_swcsum++;
 196                 ipstat.ips_rcv_swcsum_bytes += hlen;
 197         }
 198
 199         if (hlen == sizeof (*ip) &&
 200             len >= sizeof (*ip) && IP_HDR_ALIGNED_P(ip))
 201                 return (in_cksum_hdr(ip));
 202
 203         return (inet_cksum_buffer(buffer, 0, 0, hlen));
 204 }
 205
 206 /*
 207  * m MUST contain at least an IP header, if nxt is specified;
 208  * nxt is the upper layer protocol number;
 209  * off is an offset where TCP/UDP/ICMP header starts;
 210  * len is a total length of a transport segment (e.g. TCP header + TCP payload)
 211  */
 212 uint16_t
 213 inet_cksum(struct mbuf *m, uint32_t nxt, uint32_t off, uint32_t len)
 214 {
 215         uint32_t sum;
 216
 217         sum = m_sum16(m, off, len);
 218
 219         /* include pseudo header checksum? */
 220         if (nxt != 0) {
 221                 struct ip *ip;
 222                 unsigned char buf[sizeof ((*ip))] __attribute__((aligned(8)));
 223                 uint32_t mlen;
 224
 225                 /*
 226                  * Sanity check
 227                  *
 228                  * Use m_length2() instead of m_length(), as we cannot rely on
 229                  * the caller setting m_pkthdr.len correctly, if the mbuf is
 230                  * a M_PKTHDR one.
 231                  */
 232                 if ((mlen = m_length2(m, NULL)) < sizeof (*ip)) {
 233                         panic("%s: mbuf %p too short (%d) for IPv4 header",
 234                             __func__, m, mlen);
 235                         /* NOTREACHED */
 236                 }
 237
 238                 /*
 239                  * In case the IP header is not contiguous, or not 32-bit
 240                  * aligned, copy it to a local buffer.  Note here that we
 241                  * expect the data pointer to point to the IP header.
 242                  */
 243                 if ((sizeof (*ip) > m->m_len) ||
 244                     !IP_HDR_ALIGNED_P(mtod(m, caddr_t))) {
 245                         m_copydata(m, 0, sizeof (*ip), (caddr_t)buf);
 246                         ip = (struct ip *)(void *)buf;
 247                 } else {
 248                         ip = (struct ip *)(void *)(m->m_data);
 249                 }
 250
 251                 /* add pseudo header checksum */
 252                 sum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 253                     htonl(len + nxt));
 254
 255                 /* fold in carry bits */
 256                 ADDCARRY(sum);
 257         }
 258
 259         return (~sum & 0xffff);
 260 }
 261
 262 /*
 263  * buffer MUST contain at least an IP header, if nxt is specified;
 264  * nxt is the upper layer protocol number;
 265  * off is an offset where TCP/UDP/ICMP header starts;
 266  * len is a total length of a transport segment (e.g. TCP header + TCP payload)
 267  */
 268 uint16_t
 269 inet_cksum_buffer(const void *buffer, uint32_t nxt, uint32_t off,
 270     uint32_t len)
 271 {
 272         uint32_t sum;
 273
 274         if (off >= len)
 275                 panic("%s: off (%d) >= len (%d)", __func__, off, len);
 276
 277         sum = b_sum16(&((const uint8_t *)buffer)[off], len);
 278
 279         /* include pseudo header checksum? */
 280         if (nxt != 0) {
 281                 const struct ip *ip;
 282                 unsigned char buf[sizeof ((*ip))] __attribute__((aligned(8)));
 283
 284                 /*
 285                  * In case the IP header is not contiguous, or not 32-bit
 286                  * aligned, copy it to a local buffer.  Note here that we
 287                  * expect the data pointer to point to the IP header.
 288                  */
 289                 if (!IP_HDR_ALIGNED_P(buffer)) {
 290                         memcpy(buf, buffer, sizeof (*ip));
 291                         ip = (const struct ip *)(const void *)buf;
 292                 } else {
 293                         ip = (const struct ip *)buffer;
 294                 }
 295
 296                 /* add pseudo header checksum */
 297                 sum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 298                     htonl(len + nxt));
 299
 300                 /* fold in carry bits */
 301                 ADDCARRY(sum);
 302         }
 303
 304         return (~sum & 0xffff);
 305 }
 306
 307 #if DEBUG || DEVELOPMENT
 308 #include <mach/branch_predicates.h>
 309 #include <pexpert/pexpert.h>
 310
 311 #define CKSUM_ERR kprintf
 312
 313 /*
 314  * The following routines implement the portable, reference implementation
 315  * of os_cpu_in_cksum_mbuf().  This is currently used only for validating
 316  * the correctness of the platform-specific implementation, at boot time
 317  * in dlil_verify_sum16().  It returns the 32-bit accumulator without doing
 318  * a 1's complement on it.
 319  */
 320 #if !defined(__LP64__)
 321 /* 32-bit version */
 322 uint32_t
 323 in_cksum_mbuf_ref(struct mbuf *m, int len, int off, uint32_t initial_sum)
 324 {
 325         int mlen;
 326         uint32_t sum, partial;
 327         unsigned int final_acc;
 328         uint8_t *data;
 329         boolean_t needs_swap, started_on_odd;
 330
 331         VERIFY(len >= 0);
 332         VERIFY(off >= 0);
 333
 334         needs_swap = FALSE;
 335         started_on_odd = FALSE;
 336         sum = (initial_sum >> 16) + (initial_sum & 0xffff);
 337
 338         for (;;) {
 339                 if (__improbable(m == NULL)) {
 340                         CKSUM_ERR("%s: out of data\n", __func__);
 341                         return ((uint32_t)-1);
 342                 }
 343                 mlen = m->m_len;
 344                 if (mlen > off) {
 345                         mlen -= off;
 346                         data = mtod(m, uint8_t *) + off;
 347                         goto post_initial_offset;
 348                 }
 349                 off -= mlen;
 350                 if (len == 0)
 351                         break;
 352                 m = m->m_next;
 353         }
 354
 355         for (; len > 0; m = m->m_next) {
 356                 if (__improbable(m == NULL)) {
 357                         CKSUM_ERR("%s: out of data\n", __func__);
 358                         return ((uint32_t)-1);
 359                 }
 360                 mlen = m->m_len;
 361                 data = mtod(m, uint8_t *);
 362 post_initial_offset:
 363                 if (mlen == 0)
 364                         continue;
 365                 if (mlen > len)
 366                         mlen = len;
 367                 len -= mlen;
 368
 369                 partial = 0;
 370                 if ((uintptr_t)data & 1) {
 371                         /* Align on word boundary */
 372                         started_on_odd = !started_on_odd;
 373 #if BYTE_ORDER == LITTLE_ENDIAN
 374                         partial = *data << 8;
 375 #else /* BYTE_ORDER != LITTLE_ENDIAN */
 376                         partial = *data;
 377 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
 378                         ++data;
 379                         --mlen;
 380                 }
 381                 needs_swap = started_on_odd;
 382                 while (mlen >= 32) {
 383                         __builtin_prefetch(data + 32);
 384                         partial += *(uint16_t *)(void *)data;
 385                         partial += *(uint16_t *)(void *)(data + 2);
 386                         partial += *(uint16_t *)(void *)(data + 4);
 387                         partial += *(uint16_t *)(void *)(data + 6);
 388                         partial += *(uint16_t *)(void *)(data + 8);
 389                         partial += *(uint16_t *)(void *)(data + 10);
 390                         partial += *(uint16_t *)(void *)(data + 12);
 391                         partial += *(uint16_t *)(void *)(data + 14);
 392                         partial += *(uint16_t *)(void *)(data + 16);
 393                         partial += *(uint16_t *)(void *)(data + 18);
 394                         partial += *(uint16_t *)(void *)(data + 20);
 395                         partial += *(uint16_t *)(void *)(data + 22);
 396                         partial += *(uint16_t *)(void *)(data + 24);
 397                         partial += *(uint16_t *)(void *)(data + 26);
 398                         partial += *(uint16_t *)(void *)(data + 28);
 399                         partial += *(uint16_t *)(void *)(data + 30);
 400                         data += 32;
 401                         mlen -= 32;
 402                         if (__improbable(partial & 0xc0000000)) {
 403                                 if (needs_swap)
 404                                         partial = (partial << 8) +
 405                                             (partial >> 24);
 406                                 sum += (partial >> 16);
 407                                 sum += (partial & 0xffff);
 408                                 partial = 0;
 409                         }
 410                 }
 411                 if (mlen & 16) {
 412                         partial += *(uint16_t *)(void *)data;
 413                         partial += *(uint16_t *)(void *)(data + 2);
 414                         partial += *(uint16_t *)(void *)(data + 4);
 415                         partial += *(uint16_t *)(void *)(data + 6);
 416                         partial += *(uint16_t *)(void *)(data + 8);
 417                         partial += *(uint16_t *)(void *)(data + 10);
 418                         partial += *(uint16_t *)(void *)(data + 12);
 419                         partial += *(uint16_t *)(void *)(data + 14);
 420                         data += 16;
 421                         mlen -= 16;
 422                 }
 423                 /*
 424                  * mlen is not updated below as the remaining tests
 425                  * are using bit masks, which are not affected.
 426                  */
 427                 if (mlen & 8) {
 428                         partial += *(uint16_t *)(void *)data;
 429                         partial += *(uint16_t *)(void *)(data + 2);
 430                         partial += *(uint16_t *)(void *)(data + 4);
 431                         partial += *(uint16_t *)(void *)(data + 6);
 432                         data += 8;
 433                 }
 434                 if (mlen & 4) {
 435                         partial += *(uint16_t *)(void *)data;
 436                         partial += *(uint16_t *)(void *)(data + 2);
 437                         data += 4;
 438                 }
 439                 if (mlen & 2) {
 440                         partial += *(uint16_t *)(void *)data;
 441                         data += 2;
 442                 }
 443                 if (mlen & 1) {
 444 #if BYTE_ORDER == LITTLE_ENDIAN
 445                         partial += *data;
 446 #else /* BYTE_ORDER != LITTLE_ENDIAN */
 447                         partial += *data << 8;
 448 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
 449                         started_on_odd = !started_on_odd;
 450                 }
 451
 452                 if (needs_swap)
 453                         partial = (partial << 8) + (partial >> 24);
 454                 sum += (partial >> 16) + (partial & 0xffff);
 455                 /*
 456                  * Reduce sum to allow potential byte swap
 457                  * in the next iteration without carry.
 458                  */
 459                 sum = (sum >> 16) + (sum & 0xffff);
 460         }
 461         final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff);
 462         final_acc = (final_acc >> 16) + (final_acc & 0xffff);
 463         return (final_acc & 0xffff);
 464 }
 465
 466 #else /* __LP64__ */
 467 /* 64-bit version */
 468 uint32_t
 469 in_cksum_mbuf_ref(struct mbuf *m, int len, int off, uint32_t initial_sum)
 470 {
 471         int mlen;
 472         uint64_t sum, partial;
 473         unsigned int final_acc;
 474         uint8_t *data;
 475         boolean_t needs_swap, started_on_odd;
 476
 477         VERIFY(len >= 0);
 478         VERIFY(off >= 0);
 479
 480         needs_swap = FALSE;
 481         started_on_odd = FALSE;
 482         sum = initial_sum;
 483
 484         for (;;) {
 485                 if (__improbable(m == NULL)) {
 486                         CKSUM_ERR("%s: out of data\n", __func__);
 487                         return ((uint32_t)-1);
 488                 }
 489                 mlen = m->m_len;
 490                 if (mlen > off) {
 491                         mlen -= off;
 492                         data = mtod(m, uint8_t *) + off;
 493                         goto post_initial_offset;
 494                 }
 495                 off -= mlen;
 496                 if (len == 0)
 497                         break;
 498                 m = m->m_next;
 499         }
 500
 501         for (; len > 0; m = m->m_next) {
 502                 if (__improbable(m == NULL)) {
 503                         CKSUM_ERR("%s: out of data\n", __func__);
 504                         return ((uint32_t)-1);
 505                 }
 506                 mlen = m->m_len;
 507                 data = mtod(m, uint8_t *);
 508 post_initial_offset:
 509                 if (mlen == 0)
 510                         continue;
 511                 if (mlen > len)
 512                         mlen = len;
 513                 len -= mlen;
 514
 515                 partial = 0;
 516                 if ((uintptr_t)data & 1) {
 517                         /* Align on word boundary */
 518                         started_on_odd = !started_on_odd;
 519 #if BYTE_ORDER == LITTLE_ENDIAN
 520                         partial = *data << 8;
 521 #else /* BYTE_ORDER != LITTLE_ENDIAN */
 522                         partial = *data;
 523 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
 524                         ++data;
 525                         --mlen;
 526                 }
 527                 needs_swap = started_on_odd;
 528                 if ((uintptr_t)data & 2) {
 529                         if (mlen < 2)
 530                                 goto trailing_bytes;
 531                         partial += *(uint16_t *)(void *)data;
 532                         data += 2;
 533                         mlen -= 2;
 534                 }
 535                 while (mlen >= 64) {
 536                         __builtin_prefetch(data + 32);
 537                         __builtin_prefetch(data + 64);
 538                         partial += *(uint32_t *)(void *)data;
 539                         partial += *(uint32_t *)(void *)(data + 4);
 540                         partial += *(uint32_t *)(void *)(data + 8);
 541                         partial += *(uint32_t *)(void *)(data + 12);
 542                         partial += *(uint32_t *)(void *)(data + 16);
 543                         partial += *(uint32_t *)(void *)(data + 20);
 544                         partial += *(uint32_t *)(void *)(data + 24);
 545                         partial += *(uint32_t *)(void *)(data + 28);
 546                         partial += *(uint32_t *)(void *)(data + 32);
 547                         partial += *(uint32_t *)(void *)(data + 36);
 548                         partial += *(uint32_t *)(void *)(data + 40);
 549                         partial += *(uint32_t *)(void *)(data + 44);
 550                         partial += *(uint32_t *)(void *)(data + 48);
 551                         partial += *(uint32_t *)(void *)(data + 52);
 552                         partial += *(uint32_t *)(void *)(data + 56);
 553                         partial += *(uint32_t *)(void *)(data + 60);
 554                         data += 64;
 555                         mlen -= 64;
 556                         if (__improbable(partial & (3ULL << 62))) {
 557                                 if (needs_swap)
 558                                         partial = (partial << 8) +
 559                                             (partial >> 56);
 560                                 sum += (partial >> 32);
 561                                 sum += (partial & 0xffffffff);
 562                                 partial = 0;
 563                         }
 564                 }
 565                 /*
 566                  * mlen is not updated below as the remaining tests
 567                  * are using bit masks, which are not affected.
 568                  */
 569                 if (mlen & 32) {
 570                         partial += *(uint32_t *)(void *)data;
 571                         partial += *(uint32_t *)(void *)(data + 4);
 572                         partial += *(uint32_t *)(void *)(data + 8);
 573                         partial += *(uint32_t *)(void *)(data + 12);
 574                         partial += *(uint32_t *)(void *)(data + 16);
 575                         partial += *(uint32_t *)(void *)(data + 20);
 576                         partial += *(uint32_t *)(void *)(data + 24);
 577                         partial += *(uint32_t *)(void *)(data + 28);
 578                         data += 32;
 579                 }
 580                 if (mlen & 16) {
 581                         partial += *(uint32_t *)(void *)data;
 582                         partial += *(uint32_t *)(void *)(data + 4);
 583                         partial += *(uint32_t *)(void *)(data + 8);
 584                         partial += *(uint32_t *)(void *)(data + 12);
 585                         data += 16;
 586                 }
 587                 if (mlen & 8) {
 588                         partial += *(uint32_t *)(void *)data;
 589                         partial += *(uint32_t *)(void *)(data + 4);
 590                         data += 8;
 591                 }
 592                 if (mlen & 4) {
 593                         partial += *(uint32_t *)(void *)data;
 594                         data += 4;
 595                 }
 596                 if (mlen & 2) {
 597                         partial += *(uint16_t *)(void *)data;
 598                         data += 2;
 599                 }
 600 trailing_bytes:
 601                 if (mlen & 1) {
 602 #if BYTE_ORDER == LITTLE_ENDIAN
 603                         partial += *data;
 604 #else /* BYTE_ORDER != LITTLE_ENDIAN */
 605                         partial += *data << 8;
 606 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
 607                         started_on_odd = !started_on_odd;
 608                 }
 609
 610                 if (needs_swap)
 611                         partial = (partial << 8) + (partial >> 56);
 612                 sum += (partial >> 32) + (partial & 0xffffffff);
 613                 /*
 614                  * Reduce sum to allow potential byte swap
 615                  * in the next iteration without carry.
 616                  */
 617                 sum = (sum >> 32) + (sum & 0xffffffff);
 618         }
 619         final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) +
 620             ((sum >> 16) & 0xffff) + (sum & 0xffff);
 621         final_acc = (final_acc >> 16) + (final_acc & 0xffff);
 622         final_acc = (final_acc >> 16) + (final_acc & 0xffff);
 623         return (final_acc & 0xffff);
 624 }
 625 #endif /* __LP64 */
 626 #endif /* DEBUG || DEVELOPMENT */