bsd/netinet/in_cksum.c

   1 /*
   2  * Copyright (c) 2000-2017 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Copyright (c) 1988, 1992, 1993
  30  *      The Regents of the University of California.  All rights reserved.
  31  *
  32  * Redistribution and use in source and binary forms, with or without
  33  * modification, are permitted provided that the following conditions
  34  * are met:
  35  * 1. Redistributions of source code must retain the above copyright
  36  *    notice, this list of conditions and the following disclaimer.
  37  * 2. Redistributions in binary form must reproduce the above copyright
  38  *    notice, this list of conditions and the following disclaimer in the
  39  *    documentation and/or other materials provided with the distribution.
  40  * 3. All advertising materials mentioning features or use of this software
  41  *    must display the following acknowledgement:
  42  *      This product includes software developed by the University of
  43  *      California, Berkeley and its contributors.
  44  * 4. Neither the name of the University nor the names of its contributors
  45  *    may be used to endorse or promote products derived from this software
  46  *    without specific prior written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  50  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  51  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  52  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  53  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  54  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  55  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  56  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  57  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  58  * SUCH DAMAGE.
  59  *
  60  *      @(#)in_cksum.c  8.1 (Berkeley) 6/10/93
  61  */
  62
  63 #include <sys/param.h>
  64 #include <machine/endian.h>
  65 #include <sys/mbuf.h>
  66 #include <kern/debug.h>
  67 #include <net/dlil.h>
  68 #include <netinet/in.h>
  69 #define _IP_VHL
  70 #include <netinet/ip.h>
  71 #include <netinet/ip_var.h>
  72
  73 /*
  74  * Checksum routine for Internet Protocol family headers (Portable Version).
  75  *
  76  * This routine is very heavily used in the network
  77  * code and should be modified for each CPU to be as fast as possible.
  78  */
  79 #define REDUCE16 {                                                        \
  80         q_util.q = sum;                                                   \
  81         l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
  82         sum = l_util.s[0] + l_util.s[1];                                  \
  83         ADDCARRY(sum);                                                    \
  84 }
  85
  86 union l_util {
  87         uint16_t s[2];
  88         uint32_t l;
  89 };
  90
  91 union q_util {
  92         uint16_t s[4];
  93         uint32_t l[2];
  94         uint64_t q;
  95 };
  96
  97 extern uint32_t os_cpu_in_cksum(const void *, uint32_t, uint32_t);
  98
  99 /*
 100  * Perform 16-bit 1's complement sum on a contiguous span.
 101  */
 102 uint16_t
 103 b_sum16(const void *buf, int len)
 104 {
 105         return os_cpu_in_cksum(buf, len, 0);
 106 }
 107
 108 uint16_t inet_cksum_simple(struct mbuf *, int);
 109 /*
 110  * For the exported _in_cksum symbol in BSDKernel symbol set.
 111  */
 112 uint16_t
 113 inet_cksum_simple(struct mbuf *m, int len)
 114 {
 115         return inet_cksum(m, 0, 0, len);
 116 }
 117
 118 uint16_t
 119 in_addword(uint16_t a, uint16_t b)
 120 {
 121         uint64_t sum = a + b;
 122
 123         ADDCARRY(sum);
 124         return sum;
 125 }
 126
 127 uint16_t
 128 in_pseudo(uint32_t a, uint32_t b, uint32_t c)
 129 {
 130         uint64_t sum;
 131         union q_util q_util;
 132         union l_util l_util;
 133
 134         sum = (uint64_t)a + b + c;
 135         REDUCE16;
 136         return sum;
 137 }
 138
 139 uint16_t
 140 in_pseudo64(uint64_t a, uint64_t b, uint64_t c)
 141 {
 142         uint64_t sum;
 143         union q_util q_util;
 144         union l_util l_util;
 145
 146         sum = a + b + c;
 147         REDUCE16;
 148         return sum;
 149 }
 150
 151 /*
 152  * May be used on IP header with options.
 153  */
 154 uint16_t
 155 in_cksum_hdr_opt(const struct ip *ip)
 156 {
 157         return ~b_sum16(ip, (IP_VHL_HL(ip->ip_vhl) << 2)) & 0xffff;
 158 }
 159
 160 /*
 161  * A wrapper around the simple in_cksum_hdr() and the more complicated
 162  * inet_cksum(); the former is chosen if the IP header is simple,
 163  * contiguous and 32-bit aligned.  Also does some stats accounting.
 164  */
 165 uint16_t
 166 ip_cksum_hdr_dir(struct mbuf *m, uint32_t hlen, int out)
 167 {
 168         struct ip *ip = mtod(m, struct ip *);
 169
 170         if (out) {
 171                 ipstat.ips_snd_swcsum++;
 172                 ipstat.ips_snd_swcsum_bytes += hlen;
 173         } else {
 174                 ipstat.ips_rcv_swcsum++;
 175                 ipstat.ips_rcv_swcsum_bytes += hlen;
 176         }
 177
 178         if (hlen == sizeof(*ip) &&
 179             m->m_len >= sizeof(*ip) && IP_HDR_ALIGNED_P(ip)) {
 180                 return in_cksum_hdr(ip);
 181         }
 182
 183         return inet_cksum(m, 0, 0, hlen);
 184 }
 185
 186 uint16_t
 187 ip_cksum_hdr_dir_buffer(const void *buffer, uint32_t hlen, uint32_t len,
 188     int out)
 189 {
 190         const struct ip *ip = buffer;
 191
 192         if (out) {
 193                 ipstat.ips_snd_swcsum++;
 194                 ipstat.ips_snd_swcsum_bytes += hlen;
 195         } else {
 196                 ipstat.ips_rcv_swcsum++;
 197                 ipstat.ips_rcv_swcsum_bytes += hlen;
 198         }
 199
 200         if (hlen == sizeof(*ip) &&
 201             len >= sizeof(*ip) && IP_HDR_ALIGNED_P(ip)) {
 202                 return in_cksum_hdr(ip);
 203         }
 204
 205         return inet_cksum_buffer(buffer, 0, 0, hlen);
 206 }
 207
 208 /*
 209  * m MUST contain at least an IP header, if nxt is specified;
 210  * nxt is the upper layer protocol number;
 211  * off is an offset where TCP/UDP/ICMP header starts;
 212  * len is a total length of a transport segment (e.g. TCP header + TCP payload)
 213  */
 214 uint16_t
 215 inet_cksum(struct mbuf *m, uint32_t nxt, uint32_t off, uint32_t len)
 216 {
 217         uint32_t sum;
 218
 219         sum = m_sum16(m, off, len);
 220
 221         /* include pseudo header checksum? */
 222         if (nxt != 0) {
 223                 struct ip *ip;
 224                 unsigned char buf[sizeof((*ip))] __attribute__((aligned(8)));
 225                 uint32_t mlen;
 226
 227                 /*
 228                  * Sanity check
 229                  *
 230                  * Use m_length2() instead of m_length(), as we cannot rely on
 231                  * the caller setting m_pkthdr.len correctly, if the mbuf is
 232                  * a M_PKTHDR one.
 233                  */
 234                 if ((mlen = m_length2(m, NULL)) < sizeof(*ip)) {
 235                         panic("%s: mbuf %p too short (%d) for IPv4 header",
 236                             __func__, m, mlen);
 237                         /* NOTREACHED */
 238                 }
 239
 240                 /*
 241                  * In case the IP header is not contiguous, or not 32-bit
 242                  * aligned, copy it to a local buffer.  Note here that we
 243                  * expect the data pointer to point to the IP header.
 244                  */
 245                 if ((sizeof(*ip) > m->m_len) ||
 246                     !IP_HDR_ALIGNED_P(mtod(m, caddr_t))) {
 247                         m_copydata(m, 0, sizeof(*ip), (caddr_t)buf);
 248                         ip = (struct ip *)(void *)buf;
 249                 } else {
 250                         ip = (struct ip *)(void *)(m->m_data);
 251                 }
 252
 253                 /* add pseudo header checksum */
 254                 sum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 255                     htonl(len + nxt));
 256
 257                 /* fold in carry bits */
 258                 ADDCARRY(sum);
 259         }
 260
 261         return ~sum & 0xffff;
 262 }
 263
 264 /*
 265  * buffer MUST contain at least an IP header, if nxt is specified;
 266  * nxt is the upper layer protocol number;
 267  * off is an offset where TCP/UDP/ICMP header starts;
 268  * len is a total length of a transport segment (e.g. TCP header + TCP payload)
 269  */
 270 uint16_t
 271 inet_cksum_buffer(const void *buffer, uint32_t nxt, uint32_t off,
 272     uint32_t len)
 273 {
 274         uint32_t sum;
 275
 276         if (off >= len) {
 277                 panic("%s: off (%d) >= len (%d)", __func__, off, len);
 278         }
 279
 280         sum = b_sum16(&((const uint8_t *)buffer)[off], len);
 281
 282         /* include pseudo header checksum? */
 283         if (nxt != 0) {
 284                 const struct ip *ip;
 285                 unsigned char buf[sizeof((*ip))] __attribute__((aligned(8)));
 286
 287                 /*
 288                  * In case the IP header is not contiguous, or not 32-bit
 289                  * aligned, copy it to a local buffer.  Note here that we
 290                  * expect the data pointer to point to the IP header.
 291                  */
 292                 if (!IP_HDR_ALIGNED_P(buffer)) {
 293                         memcpy(buf, buffer, sizeof(*ip));
 294                         ip = (const struct ip *)(const void *)buf;
 295                 } else {
 296                         ip = (const struct ip *)buffer;
 297                 }
 298
 299                 /* add pseudo header checksum */
 300                 sum += in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
 301                     htonl(len + nxt));
 302
 303                 /* fold in carry bits */
 304                 ADDCARRY(sum);
 305         }
 306
 307         return ~sum & 0xffff;
 308 }
 309
 310 #if DEBUG || DEVELOPMENT
 311 #include <pexpert/pexpert.h>
 312
 313 #define CKSUM_ERR kprintf
 314
 315 /*
 316  * The following routines implement the portable, reference implementation
 317  * of os_cpu_in_cksum_mbuf().  This is currently used only for validating
 318  * the correctness of the platform-specific implementation, at boot time
 319  * in dlil_verify_sum16().  It returns the 32-bit accumulator without doing
 320  * a 1's complement on it.
 321  */
 322 #if !defined(__LP64__)
 323 /* 32-bit version */
 324 uint32_t
 325 in_cksum_mbuf_ref(struct mbuf *m, int len, int off, uint32_t initial_sum)
 326 {
 327         int mlen;
 328         uint32_t sum, partial;
 329         unsigned int final_acc;
 330         uint8_t *data;
 331         boolean_t needs_swap, started_on_odd;
 332
 333         VERIFY(len >= 0);
 334         VERIFY(off >= 0);
 335
 336         needs_swap = FALSE;
 337         started_on_odd = FALSE;
 338         sum = (initial_sum >> 16) + (initial_sum & 0xffff);
 339
 340         for (;;) {
 341                 if (__improbable(m == NULL)) {
 342                         CKSUM_ERR("%s: out of data\n", __func__);
 343                         return (uint32_t)-1;
 344                 }
 345                 mlen = m->m_len;
 346                 if (mlen > off) {
 347                         mlen -= off;
 348                         data = mtod(m, uint8_t *) + off;
 349                         goto post_initial_offset;
 350                 }
 351                 off -= mlen;
 352                 if (len == 0) {
 353                         break;
 354                 }
 355                 m = m->m_next;
 356         }
 357
 358         for (; len > 0; m = m->m_next) {
 359                 if (__improbable(m == NULL)) {
 360                         CKSUM_ERR("%s: out of data\n", __func__);
 361                         return (uint32_t)-1;
 362                 }
 363                 mlen = m->m_len;
 364                 data = mtod(m, uint8_t *);
 365 post_initial_offset:
 366                 if (mlen == 0) {
 367                         continue;
 368                 }
 369                 if (mlen > len) {
 370                         mlen = len;
 371                 }
 372                 len -= mlen;
 373
 374                 partial = 0;
 375                 if ((uintptr_t)data & 1) {
 376                         /* Align on word boundary */
 377                         started_on_odd = !started_on_odd;
 378 #if BYTE_ORDER == LITTLE_ENDIAN
 379                         partial = *data << 8;
 380 #else /* BYTE_ORDER != LITTLE_ENDIAN */
 381                         partial = *data;
 382 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
 383                         ++data;
 384                         --mlen;
 385                 }
 386                 needs_swap = started_on_odd;
 387                 while (mlen >= 32) {
 388                         __builtin_prefetch(data + 32);
 389                         partial += *(uint16_t *)(void *)data;
 390                         partial += *(uint16_t *)(void *)(data + 2);
 391                         partial += *(uint16_t *)(void *)(data + 4);
 392                         partial += *(uint16_t *)(void *)(data + 6);
 393                         partial += *(uint16_t *)(void *)(data + 8);
 394                         partial += *(uint16_t *)(void *)(data + 10);
 395                         partial += *(uint16_t *)(void *)(data + 12);
 396                         partial += *(uint16_t *)(void *)(data + 14);
 397                         partial += *(uint16_t *)(void *)(data + 16);
 398                         partial += *(uint16_t *)(void *)(data + 18);
 399                         partial += *(uint16_t *)(void *)(data + 20);
 400                         partial += *(uint16_t *)(void *)(data + 22);
 401                         partial += *(uint16_t *)(void *)(data + 24);
 402                         partial += *(uint16_t *)(void *)(data + 26);
 403                         partial += *(uint16_t *)(void *)(data + 28);
 404                         partial += *(uint16_t *)(void *)(data + 30);
 405                         data += 32;
 406                         mlen -= 32;
 407                         if (__improbable(partial & 0xc0000000)) {
 408                                 if (needs_swap) {
 409                                         partial = (partial << 8) +
 410                                             (partial >> 24);
 411                                 }
 412                                 sum += (partial >> 16);
 413                                 sum += (partial & 0xffff);
 414                                 partial = 0;
 415                         }
 416                 }
 417                 if (mlen & 16) {
 418                         partial += *(uint16_t *)(void *)data;
 419                         partial += *(uint16_t *)(void *)(data + 2);
 420                         partial += *(uint16_t *)(void *)(data + 4);
 421                         partial += *(uint16_t *)(void *)(data + 6);
 422                         partial += *(uint16_t *)(void *)(data + 8);
 423                         partial += *(uint16_t *)(void *)(data + 10);
 424                         partial += *(uint16_t *)(void *)(data + 12);
 425                         partial += *(uint16_t *)(void *)(data + 14);
 426                         data += 16;
 427                         mlen -= 16;
 428                 }
 429                 /*
 430                  * mlen is not updated below as the remaining tests
 431                  * are using bit masks, which are not affected.
 432                  */
 433                 if (mlen & 8) {
 434                         partial += *(uint16_t *)(void *)data;
 435                         partial += *(uint16_t *)(void *)(data + 2);
 436                         partial += *(uint16_t *)(void *)(data + 4);
 437                         partial += *(uint16_t *)(void *)(data + 6);
 438                         data += 8;
 439                 }
 440                 if (mlen & 4) {
 441                         partial += *(uint16_t *)(void *)data;
 442                         partial += *(uint16_t *)(void *)(data + 2);
 443                         data += 4;
 444                 }
 445                 if (mlen & 2) {
 446                         partial += *(uint16_t *)(void *)data;
 447                         data += 2;
 448                 }
 449                 if (mlen & 1) {
 450 #if BYTE_ORDER == LITTLE_ENDIAN
 451                         partial += *data;
 452 #else /* BYTE_ORDER != LITTLE_ENDIAN */
 453                         partial += *data << 8;
 454 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
 455                         started_on_odd = !started_on_odd;
 456                 }
 457
 458                 if (needs_swap) {
 459                         partial = (partial << 8) + (partial >> 24);
 460                 }
 461                 sum += (partial >> 16) + (partial & 0xffff);
 462                 /*
 463                  * Reduce sum to allow potential byte swap
 464                  * in the next iteration without carry.
 465                  */
 466                 sum = (sum >> 16) + (sum & 0xffff);
 467         }
 468         final_acc = ((sum >> 16) & 0xffff) + (sum & 0xffff);
 469         final_acc = (final_acc >> 16) + (final_acc & 0xffff);
 470         return final_acc & 0xffff;
 471 }
 472
 473 #else /* __LP64__ */
 474 /* 64-bit version */
 475 uint32_t
 476 in_cksum_mbuf_ref(struct mbuf *m, int len, int off, uint32_t initial_sum)
 477 {
 478         int mlen;
 479         uint64_t sum, partial;
 480         unsigned int final_acc;
 481         uint8_t *data;
 482         boolean_t needs_swap, started_on_odd;
 483
 484         VERIFY(len >= 0);
 485         VERIFY(off >= 0);
 486
 487         needs_swap = FALSE;
 488         started_on_odd = FALSE;
 489         sum = initial_sum;
 490
 491         for (;;) {
 492                 if (__improbable(m == NULL)) {
 493                         CKSUM_ERR("%s: out of data\n", __func__);
 494                         return (uint32_t)-1;
 495                 }
 496                 mlen = m->m_len;
 497                 if (mlen > off) {
 498                         mlen -= off;
 499                         data = mtod(m, uint8_t *) + off;
 500                         goto post_initial_offset;
 501                 }
 502                 off -= mlen;
 503                 if (len == 0) {
 504                         break;
 505                 }
 506                 m = m->m_next;
 507         }
 508
 509         for (; len > 0; m = m->m_next) {
 510                 if (__improbable(m == NULL)) {
 511                         CKSUM_ERR("%s: out of data\n", __func__);
 512                         return (uint32_t)-1;
 513                 }
 514                 mlen = m->m_len;
 515                 data = mtod(m, uint8_t *);
 516 post_initial_offset:
 517                 if (mlen == 0) {
 518                         continue;
 519                 }
 520                 if (mlen > len) {
 521                         mlen = len;
 522                 }
 523                 len -= mlen;
 524
 525                 partial = 0;
 526                 if ((uintptr_t)data & 1) {
 527                         /* Align on word boundary */
 528                         started_on_odd = !started_on_odd;
 529 #if BYTE_ORDER == LITTLE_ENDIAN
 530                         partial = *data << 8;
 531 #else /* BYTE_ORDER != LITTLE_ENDIAN */
 532                         partial = *data;
 533 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
 534                         ++data;
 535                         --mlen;
 536                 }
 537                 needs_swap = started_on_odd;
 538                 if ((uintptr_t)data & 2) {
 539                         if (mlen < 2) {
 540                                 goto trailing_bytes;
 541                         }
 542                         partial += *(uint16_t *)(void *)data;
 543                         data += 2;
 544                         mlen -= 2;
 545                 }
 546                 while (mlen >= 64) {
 547                         __builtin_prefetch(data + 32);
 548                         __builtin_prefetch(data + 64);
 549                         partial += *(uint32_t *)(void *)data;
 550                         partial += *(uint32_t *)(void *)(data + 4);
 551                         partial += *(uint32_t *)(void *)(data + 8);
 552                         partial += *(uint32_t *)(void *)(data + 12);
 553                         partial += *(uint32_t *)(void *)(data + 16);
 554                         partial += *(uint32_t *)(void *)(data + 20);
 555                         partial += *(uint32_t *)(void *)(data + 24);
 556                         partial += *(uint32_t *)(void *)(data + 28);
 557                         partial += *(uint32_t *)(void *)(data + 32);
 558                         partial += *(uint32_t *)(void *)(data + 36);
 559                         partial += *(uint32_t *)(void *)(data + 40);
 560                         partial += *(uint32_t *)(void *)(data + 44);
 561                         partial += *(uint32_t *)(void *)(data + 48);
 562                         partial += *(uint32_t *)(void *)(data + 52);
 563                         partial += *(uint32_t *)(void *)(data + 56);
 564                         partial += *(uint32_t *)(void *)(data + 60);
 565                         data += 64;
 566                         mlen -= 64;
 567                         if (__improbable(partial & (3ULL << 62))) {
 568                                 if (needs_swap) {
 569                                         partial = (partial << 8) +
 570                                             (partial >> 56);
 571                                 }
 572                                 sum += (partial >> 32);
 573                                 sum += (partial & 0xffffffff);
 574                                 partial = 0;
 575                         }
 576                 }
 577                 /*
 578                  * mlen is not updated below as the remaining tests
 579                  * are using bit masks, which are not affected.
 580                  */
 581                 if (mlen & 32) {
 582                         partial += *(uint32_t *)(void *)data;
 583                         partial += *(uint32_t *)(void *)(data + 4);
 584                         partial += *(uint32_t *)(void *)(data + 8);
 585                         partial += *(uint32_t *)(void *)(data + 12);
 586                         partial += *(uint32_t *)(void *)(data + 16);
 587                         partial += *(uint32_t *)(void *)(data + 20);
 588                         partial += *(uint32_t *)(void *)(data + 24);
 589                         partial += *(uint32_t *)(void *)(data + 28);
 590                         data += 32;
 591                 }
 592                 if (mlen & 16) {
 593                         partial += *(uint32_t *)(void *)data;
 594                         partial += *(uint32_t *)(void *)(data + 4);
 595                         partial += *(uint32_t *)(void *)(data + 8);
 596                         partial += *(uint32_t *)(void *)(data + 12);
 597                         data += 16;
 598                 }
 599                 if (mlen & 8) {
 600                         partial += *(uint32_t *)(void *)data;
 601                         partial += *(uint32_t *)(void *)(data + 4);
 602                         data += 8;
 603                 }
 604                 if (mlen & 4) {
 605                         partial += *(uint32_t *)(void *)data;
 606                         data += 4;
 607                 }
 608                 if (mlen & 2) {
 609                         partial += *(uint16_t *)(void *)data;
 610                         data += 2;
 611                 }
 612 trailing_bytes:
 613                 if (mlen & 1) {
 614 #if BYTE_ORDER == LITTLE_ENDIAN
 615                         partial += *data;
 616 #else /* BYTE_ORDER != LITTLE_ENDIAN */
 617                         partial += *data << 8;
 618 #endif /* BYTE_ORDER != LITTLE_ENDIAN */
 619                         started_on_odd = !started_on_odd;
 620                 }
 621
 622                 if (needs_swap) {
 623                         partial = (partial << 8) + (partial >> 56);
 624                 }
 625                 sum += (partial >> 32) + (partial & 0xffffffff);
 626                 /*
 627                  * Reduce sum to allow potential byte swap
 628                  * in the next iteration without carry.
 629                  */
 630                 sum = (sum >> 32) + (sum & 0xffffffff);
 631         }
 632         final_acc = (sum >> 48) + ((sum >> 32) & 0xffff) +
 633             ((sum >> 16) & 0xffff) + (sum & 0xffff);
 634         final_acc = (final_acc >> 16) + (final_acc & 0xffff);
 635         final_acc = (final_acc >> 16) + (final_acc & 0xffff);
 636         return final_acc & 0xffff;
 637 }
 638 #endif /* __LP64 */
 639 #endif /* DEBUG || DEVELOPMENT */