2  * Copyright (c) 2011 Apple Inc. All rights reserved. 
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. The rights granted to you under the License 
  10  * may not be used to create, or enable the creation or redistribution of, 
  11  * unlawful or unlicensed copies of an Apple operating system, or to 
  12  * circumvent, violate, or enable the circumvention or violation of, any 
  13  * terms of an Apple operating system software license agreement. 
  15  * Please obtain a copy of the License at 
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file. 
  18  * The Original Code and all software distributed under the License are 
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  23  * Please see the License for the specific language governing rights and 
  24  * limitations under the License. 
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 
  29 #include <sys/param.h> 
  30 #include <sys/systm.h> 
  31 #include <sys/sysctl.h> 
  33 #include <sys/mcache.h> 
  34 #include <sys/socket.h> 
  35 #include <sys/socketvar.h> 
  36 #include <net/if_types.h> 
  37 #include <net/route.h> 
  38 #include <netinet/in.h> 
  39 #include <netinet/in_systm.h> 
  41 #include <netinet/ip.h> 
  42 #include <netinet/ip_var.h> 
  43 #include <netinet/in_var.h> 
  44 #include <netinet/tcp.h> 
  45 #include <netinet/tcp_seq.h> 
  46 #include <netinet/tcpip.h> 
  47 #include <netinet/tcp_var.h> 
  48 #include <netinet/tcp_lro.h> 
  49 #include <netinet/lro_ext.h> 
  50 #include <kern/locks.h> 
  52 unsigned int lrocount 
= 0; /* A counter used for debugging only */ 
  53 unsigned int lro_seq_outoforder 
= 0; /* Counter for debugging */ 
  54 unsigned int lro_seq_mismatch 
= 0; /* Counter for debugging */ 
  55 unsigned int lro_eject_req 
= 0; /* Counter for tracking flow ejections */ 
  56 unsigned int lro_flushes 
= 0; /* Counter for tracking number of flushes */ 
  57 unsigned int lro_single_flushes 
= 0; 
  58 unsigned int lro_double_flushes 
= 0; 
  59 unsigned int lro_good_flushes 
= 0; 
  61 unsigned int coalesc_sz 
= LRO_MX_COALESCE_PKTS
; 
  62 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, lro_sz
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
  63                 &coalesc_sz
, 0, "Max coalescing size"); 
  65 unsigned int coalesc_time 
= LRO_MX_TIME_TO_BUFFER
; 
  66 SYSCTL_INT(_net_inet_tcp
, OID_AUTO
, lro_time
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
  67                 &coalesc_time
, 0, "Max coalescing time"); 
  69 struct lro_flow lro_flow_list
[TCP_LRO_NUM_FLOWS
];  
  71 char lro_flow_map
[TCP_LRO_FLOW_MAP
];  
  73 static lck_attr_t 
*tcp_lro_mtx_attr 
= NULL
;             /* mutex attributes */ 
  74 static lck_grp_t 
*tcp_lro_mtx_grp 
= NULL
;               /* mutex group */ 
  75 static lck_grp_attr_t 
*tcp_lro_mtx_grp_attr 
= NULL
;     /* mutex group attrs */ 
  76 decl_lck_mtx_data( ,tcp_lro_lock
);      /* Used to synchronize updates */ 
  78 unsigned int lro_byte_count 
= 0; 
  80 uint64_t lro_deadline 
= 0; /* LRO's sense of time - protected by tcp_lro_lock */ 
  81 uint32_t lro_timer_set 
= 0; 
  84 u_int32_t lro_pkt_count 
= 0; /* Number of packets encountered in an LRO period */ 
  85 thread_call_t tcp_lro_timer
; 
  87 extern u_int32_t kipf_count
; 
  89 static void     tcp_lro_timer_proc(void*, void*); 
  90 static void     lro_update_stats(struct mbuf
*); 
  91 static void     lro_update_flush_stats(struct mbuf 
*); 
  92 static void     tcp_lro_flush_flows(void); 
  93 static void     tcp_lro_sched_timer(uint64_t); 
  94 static void     lro_proto_input(struct mbuf 
*); 
  96 static struct mbuf 
*lro_tcp_xsum_validate(struct mbuf
*,  struct ipovly 
*, 
  98 static struct mbuf 
*tcp_lro_process_pkt(struct mbuf
*, struct ip
*, struct tcphdr
*, 
 106         bzero(lro_flow_list
, sizeof (struct lro_flow
) * TCP_LRO_NUM_FLOWS
); 
 107         for (i 
= 0; i 
< TCP_LRO_FLOW_MAP
; i
++) { 
 108                 lro_flow_map
[i
] = TCP_LRO_FLOW_UNINIT
; 
 112          * allocate lock group attribute, group and attribute for tcp_lro_lock 
 114         tcp_lro_mtx_grp_attr 
= lck_grp_attr_alloc_init(); 
 115         tcp_lro_mtx_grp 
= lck_grp_alloc_init("tcplro", tcp_lro_mtx_grp_attr
); 
 116         tcp_lro_mtx_attr 
= lck_attr_alloc_init(); 
 117         lck_mtx_init(&tcp_lro_lock
, tcp_lro_mtx_grp
, tcp_lro_mtx_attr
); 
 119         tcp_lro_timer 
= thread_call_allocate(tcp_lro_timer_proc
, NULL
); 
 120         if (tcp_lro_timer 
== NULL
) { 
 121                 panic_plain("%s: unable to allocate lro timer", __func__
); 
 128 tcp_lro_matching_tuple(struct ip
* ip_hdr
, struct tcphdr 
*tcp_hdr
, int *hash
,  
 131         struct lro_flow 
*flow
; 
 133         unsigned int off 
= 0; 
 136         *hash 
= LRO_HASH(ip_hdr
->ip_src
.s_addr
, ip_hdr
->ip_dst
.s_addr
,  
 137                 tcp_hdr
->th_sport
, tcp_hdr
->th_dport
, (TCP_LRO_FLOW_MAP 
- 1)); 
 139         *flow_id 
= lro_flow_map
[*hash
]; 
 140         if (*flow_id 
== TCP_LRO_FLOW_NOTFOUND
) { 
 144         seqnum 
= tcp_hdr
->th_seq
; 
 145         off 
= tcp_hdr
->th_off 
<< 2; 
 146         payload_len 
= ip_hdr
->ip_len 
- off
; 
 148         flow 
= &lro_flow_list
[*flow_id
]; 
 150         if ((flow
->lr_faddr
.s_addr 
== ip_hdr
->ip_src
.s_addr
) && 
 151                         (flow
->lr_laddr
.s_addr 
== ip_hdr
->ip_dst
.s_addr
) && 
 152                         (flow
->lr_fport 
== tcp_hdr
->th_sport
) && 
 153                         (flow
->lr_lport 
== tcp_hdr
->th_dport
)) { 
 154                 if (flow
->lr_tcphdr 
== NULL
) { 
 155                         if (ntohl(seqnum
) == flow
->lr_seq
) { 
 156                                 return TCP_LRO_COALESCE
; 
 159                                 printf("%s: seqnum = %x, lr_seq = %x\n", 
 160                                         __func__
, ntohl(seqnum
), flow
->lr_seq
); 
 163                         if (SEQ_GT(ntohl(seqnum
), flow
->lr_seq
)) { 
 164                                 lro_seq_outoforder
++; 
 166                                  * Whenever we receive out of order packets it 
 167                                  * signals loss and recovery and LRO doesn't  
 168                                  * let flows recover quickly. So eject. 
 170                                  flow
->lr_flags 
|= LRO_EJECT_REQ
; 
 176                 if (flow
->lr_flags 
& LRO_EJECT_REQ
) { 
 178                                 printf("%s: eject. \n", __func__
); 
 179                         return TCP_LRO_EJECT_FLOW
; 
 181                 if (SEQ_GT(tcp_hdr
->th_ack
, flow
->lr_tcphdr
->th_ack
)) {  
 183                                 printf("%s: th_ack = %x flow_ack = %x \n",  
 184                                         __func__
, tcp_hdr
->th_ack
,  
 185                                         flow
->lr_tcphdr
->th_ack
); 
 187                         return TCP_LRO_EJECT_FLOW
; 
 190                 if (ntohl(seqnum
) == (ntohl(lro_flow_list
[*flow_id
].lr_tcphdr
->th_seq
) + lro_flow_list
[*flow_id
].lr_len
)) {  
 191                         return TCP_LRO_COALESCE
; 
 193                         /* LRO does not handle loss recovery well, eject */ 
 194                         flow
->lr_flags 
|= LRO_EJECT_REQ
; 
 195                         return TCP_LRO_EJECT_FLOW
; 
 198         if (lrodebug
) printf("tcp_lro_matching_tuple: collision \n"); 
 199         return TCP_LRO_COLLISION
; 
 203 tcp_lro_init_flow(int flow_id
, struct ip
* ip_hdr
, struct tcphdr 
*tcp_hdr
,  
 204                         int hash
, u_int32_t timestamp
, int payload_len
) 
 206         struct lro_flow 
*flow 
= NULL
; 
 208         flow 
= &lro_flow_list
[flow_id
]; 
 210         flow
->lr_hash_map 
= hash
; 
 211         flow
->lr_faddr
.s_addr 
= ip_hdr
->ip_src
.s_addr
; 
 212         flow
->lr_laddr
.s_addr 
= ip_hdr
->ip_dst
.s_addr
; 
 213         flow
->lr_fport 
= tcp_hdr
->th_sport
; 
 214         flow
->lr_lport 
= tcp_hdr
->th_dport
; 
 215         lro_flow_map
[hash
] = flow_id
; 
 216         flow
->lr_timestamp 
= timestamp
; 
 217         flow
->lr_seq 
= ntohl(tcp_hdr
->th_seq
) + payload_len
; 
 223 tcp_lro_coalesce(int flow_id
, struct mbuf 
*lro_mb
, struct tcphdr 
*tcphdr
,  
 224                         int payload_len
, int drop_hdrlen
, struct tcpopt 
*topt
,  
 225                         u_int32_t
* tsval
, u_int32_t
* tsecr
, int thflags
) 
 227         struct lro_flow 
*flow 
= NULL
; 
 229         struct ip 
*ip 
= NULL
; 
 231         flow 
=  &lro_flow_list
[flow_id
]; 
 232         if (flow
->lr_mhead
) { 
 234                         printf("%s: lr_mhead %x %d \n", __func__
, flow
->lr_seq
, 
 236                 m_adj(lro_mb
, drop_hdrlen
); 
 238                 last 
= flow
->lr_mtail
; 
 239                 while (last
->m_next 
!= NULL
) { 
 242                 last
->m_next 
= lro_mb
; 
 244                 flow
->lr_mtail 
= lro_mb
; 
 246                 ip 
= mtod(flow
->lr_mhead
, struct ip 
*); 
 247                 ip
->ip_len 
+= lro_mb
->m_pkthdr
.len
; 
 248                 flow
->lr_mhead
->m_pkthdr
.len 
+= lro_mb
->m_pkthdr
.len
; 
 250                 if (flow
->lr_len 
== 0) { 
 251                         panic_plain("%s: Inconsistent LRO flow state", __func__
); 
 253                 flow
->lr_len 
+= payload_len
; 
 254                 flow
->lr_seq 
+= payload_len
; 
 256                  * This bit is re-OR'd each time a packet is added to the  
 257                  * large coalesced packet. 
 259                 flow
->lr_mhead
->m_pkthdr
.aux_flags 
|= MAUXF_SW_LRO_PKT
; 
 260                 flow
->lr_mhead
->m_pkthdr
.lro_npkts
++; /* for tcpstat.tcps_rcvpack */ 
 261                 if (flow
->lr_mhead
->m_pkthdr
.lro_pktlen 
<  
 262                                 lro_mb
->m_pkthdr
.lro_pktlen
) { 
 264                          * For TCP Inter Arrival Jitter calculation, return max   
 265                          * size encountered while coalescing a stream of pkts. 
 267                         flow
->lr_mhead
->m_pkthdr
.lro_pktlen 
=  
 268                                                 lro_mb
->m_pkthdr
.lro_pktlen
; 
 270                 /* Update the timestamp value */ 
 271                 if (topt
->to_flags 
& TOF_TS
) { 
 272                         if ((flow
->lr_tsval
) &&  
 273                                 (TSTMP_GT(topt
->to_tsval
, ntohl(*(flow
->lr_tsval
))))) { 
 274                                 *(flow
->lr_tsval
) = htonl(topt
->to_tsval
); 
 276                         if ((flow
->lr_tsecr
) && 
 277                                 (topt
->to_tsecr 
!= 0) && 
 278                                 (TSTMP_GT(topt
->to_tsecr
, ntohl(*(flow
->lr_tsecr
))))) { 
 280                                         printf("%s: instantaneous RTT = %d \n", __func__
,  
 281                                                 topt
->to_tsecr 
- ntohl(*(flow
->lr_tsecr
))); 
 283                                 *(flow
->lr_tsecr
) = htonl(topt
->to_tsecr
); 
 286                 /* Coalesce the flags */ 
 288                         flow
->lr_tcphdr
->th_flags 
|= thflags
; 
 290                 /* Update receive window */ 
 291                 flow
->lr_tcphdr
->th_win 
= tcphdr
->th_win
; 
 294                         flow
->lr_mhead 
= flow
->lr_mtail 
= lro_mb
; 
 295                         flow
->lr_mhead
->m_pkthdr
.aux_flags 
|= MAUXF_SW_LRO_PKT
; 
 296                         flow
->lr_tcphdr 
= tcphdr
; 
 297                         if ((topt
) && (topt
->to_flags 
& TOF_TS
)) { 
 298                                 ASSERT(tsval 
!= NULL
); 
 299                                 ASSERT(tsecr 
!= NULL
); 
 300                                 flow
->lr_tsval 
= tsval
;  
 301                                 flow
->lr_tsecr 
= tsecr
; 
 303                         flow
->lr_len 
= payload_len
; 
 304                         flow
->lr_timestamp 
= tcp_now
; 
 305                         tcp_lro_sched_timer(0); 
 307                 flow
->lr_seq 
= ntohl(tcphdr
->th_seq
) + payload_len
; 
 310                 tcpstat
.tcps_coalesced_pack
++; 
 316 tcp_lro_eject_flow(int flow_id
) 
 318         struct mbuf 
*mb 
= NULL
; 
 320         mb 
= lro_flow_list
[flow_id
].lr_mhead
; 
 321         ASSERT(lro_flow_map
[lro_flow_list
[flow_id
].lr_hash_map
] == flow_id
); 
 322         lro_flow_map
[lro_flow_list
[flow_id
].lr_hash_map
] = TCP_LRO_FLOW_UNINIT
; 
 323         bzero(&lro_flow_list
[flow_id
], sizeof(struct lro_flow
)); 
 329 tcp_lro_eject_coalesced_pkt(int flow_id
) 
 331         struct mbuf 
*mb 
= NULL
; 
 332         mb 
= lro_flow_list
[flow_id
].lr_mhead
; 
 333         lro_flow_list
[flow_id
].lr_mhead 
=  
 334                 lro_flow_list
[flow_id
].lr_mtail 
= NULL
; 
 335         lro_flow_list
[flow_id
].lr_tcphdr 
= NULL
; 
 340 tcp_lro_insert_flow(struct mbuf 
*lro_mb
, struct ip 
*ip_hdr
,  
 341                         struct tcphdr 
*tcp_hdr
, int payload_len
,  
 342                         int drop_hdrlen
, int hash
, struct tcpopt 
*topt
,  
 343                         u_int32_t 
*tsval
, u_int32_t 
*tsecr
) 
 346         int slot_available 
= 0; 
 347         int candidate_flow 
= 0;  
 348         u_int32_t oldest_timestamp
; 
 349         struct mbuf 
*mb 
= NULL
; 
 352         oldest_timestamp 
= tcp_now
; 
 354         /* handle collision */ 
 355         if (lro_flow_map
[hash
] != TCP_LRO_FLOW_UNINIT
) { 
 359                 candidate_flow 
= lro_flow_map
[hash
]; 
 360                 tcpstat
.tcps_flowtbl_collision
++; 
 364         for (i 
= 0; i 
< TCP_LRO_NUM_FLOWS
; i
++) { 
 365                 if (lro_flow_list
[i
].lr_mhead 
== NULL
) { 
 370                 if (oldest_timestamp 
>= lro_flow_list
[i
].lr_timestamp
) { 
 372                         oldest_timestamp 
= lro_flow_list
[i
].lr_timestamp
; 
 376         if (!slot_available
) { 
 377                 tcpstat
.tcps_flowtbl_full
++; 
 379                 /* kick the oldest flow */ 
 380                 mb 
= tcp_lro_eject_flow(candidate_flow
); 
 383                         if (!slot_available
) { 
 384                                 printf("%s: slot unavailable.\n",__func__
); 
 387                                 printf("%s: collision.\n",__func__
); 
 391                 candidate_flow 
= i
; /* this is now the flow to be used */ 
 395         tcp_lro_init_flow(candidate_flow
, ip_hdr
, tcp_hdr
, hash
,  
 396                                 tcp_now
, payload_len
); 
 397         tcp_lro_coalesce(candidate_flow
, lro_mb
, tcp_hdr
, payload_len
,  
 398                                 drop_hdrlen
, topt
, tsval
, tsecr
, 0); 
 403 tcp_lro_process_pkt(struct mbuf 
*lro_mb
, struct ip 
*ip_hdr
,  
 404                                 struct tcphdr 
*tcp_hdr
, int drop_hdrlen
) 
 406         int flow_id 
= TCP_LRO_FLOW_UNINIT
; 
 408         unsigned int off 
= 0; 
 412         struct mbuf 
*mb 
= NULL
; 
 417         int ret_response 
= TCP_LRO_CONSUMED
; 
 418         int coalesced 
= 0, tcpflags 
= 0, unknown_tcpopts 
= 0; 
 421         if (lro_mb
->m_len 
< (int32_t)sizeof (struct tcpiphdr
)) { 
 422                 if ((lro_mb 
= m_pullup(lro_mb
, sizeof(struct tcpiphdr
))) == 0) { 
 423                         tcpstat
.tcps_rcvshort
++; 
 426                                 printf("tcp_lro_process_pkt:mbuf too short.\n"); 
 432         if ((lro_mb 
= lro_tcp_xsum_validate(lro_mb
,  
 433                                 (struct ipovly
*)ip_hdr
, tcp_hdr
)) == NULL
) { 
 435                         printf("tcp_lro_process_pkt: TCP xsum failed.\n"); 
 443         /* Avoids checksumming in tcp_input */ 
 444         lro_mb
->m_pkthdr
.aux_flags 
|= MAUXF_SW_LRO_DID_CSUM
;     
 446         off 
= tcp_hdr
->th_off 
<< 2; 
 447         optlen 
= off 
- sizeof (struct tcphdr
); 
 448         payload_len 
= ip_hdr
->ip_len 
- off
; 
 449         optp 
= (u_char 
*)(tcp_hdr 
+ 1); 
 451          * Do quick retrieval of timestamp options ("options 
 452          * prediction?").  If timestamp is the only option and it's 
 453          * formatted as recommended in RFC 1323 appendix A, we 
 454          * quickly get the values now and not bother calling 
 455          * tcp_dooptions(), etc. 
 457         if ((optlen 
== TCPOLEN_TSTAMP_APPA 
|| 
 458                         (optlen 
> TCPOLEN_TSTAMP_APPA 
&& 
 459                         optp
[TCPOLEN_TSTAMP_APPA
] == TCPOPT_EOL
)) && 
 460                         *(u_int32_t 
*)optp 
== htonl(TCPOPT_TSTAMP_HDR
) && 
 461                         (tcp_hdr
->th_flags 
& TH_SYN
) == 0) { 
 462                         to
.to_flags 
|= TOF_TS
; 
 463                         to
.to_tsval 
= ntohl(*(u_int32_t 
*)(void *)(optp 
+ 4)); 
 464                         to
.to_tsecr 
= ntohl(*(u_int32_t 
*)(void *)(optp 
+ 8)); 
 467                  * If TCP timestamps are not in use, or not the first option,  
 468                  * skip LRO path since timestamps are used to avoid LRO  
 469                  * from introducing additional latencies for retransmissions 
 470                  * and other slow-paced transmissions. 
 472                 to
.to_flags 
= to
.to_tsecr 
= 0; 
 476         /* list all the conditions that can trigger a flow ejection here */ 
 478         thflags 
= tcp_hdr
->th_flags
; 
 479         if (thflags 
& (TH_SYN 
| TH_URG 
| TH_ECE 
| TH_CWR 
| TH_PUSH 
| TH_RST 
| TH_FIN
)) {  
 480                 eject_flow 
= tcpflags 
= 1; 
 483         if (optlen 
&& !((optlen 
== TCPOLEN_TSTAMP_APPA
) &&  
 484                         (to
.to_flags 
& TOF_TS
))) { 
 485                 eject_flow 
= unknown_tcpopts 
= 1; 
 488         if (payload_len 
<= LRO_MIN_COALESC_SZ
) { /* zero payload ACK */ 
 492         /* Can't coalesce ECN marked packets. */ 
 493         ecn 
= ip_hdr
->ip_tos 
& IPTOS_ECN_MASK
; 
 494         if (ecn 
== IPTOS_ECN_CE
) { 
 496                  * ECN needs quick notification 
 499                         printf("%s: ECE bits set.\n", __func__
); 
 504         lck_mtx_lock_spin(&tcp_lro_lock
); 
 506         retval 
= tcp_lro_matching_tuple(ip_hdr
, tcp_hdr
, &hash
, &flow_id
); 
 510                 lck_mtx_unlock(&tcp_lro_lock
); 
 511                 ret_response 
= TCP_LRO_FLOW_NOTFOUND
; 
 514         case TCP_LRO_COALESCE
: 
 515                 if ((payload_len 
!= 0) && (unknown_tcpopts 
== 0) &&  
 516                         (tcpflags 
== 0) && (ecn 
!= IPTOS_ECN_CE
) && (to
.to_flags 
& TOF_TS
)) {  
 517                         tcp_lro_coalesce(flow_id
, lro_mb
, tcp_hdr
, payload_len
, 
 519                                 (to
.to_flags 
& TOF_TS
) ? (u_int32_t 
*)(void *)(optp 
+ 4) : NULL
, 
 520                                 (to
.to_flags 
& TOF_TS
) ? (u_int32_t 
*)(void *)(optp 
+ 8) : NULL
, 
 523                                 printf("tcp_lro_process_pkt: coalesce len = %d. flow_id = %d payload_len = %d drop_hdrlen = %d optlen = %d lport = %d seqnum = %x.\n", 
 524                                         lro_flow_list
[flow_id
].lr_len
, flow_id
,  
 525                                         payload_len
, drop_hdrlen
, optlen
, 
 526                                         ntohs(lro_flow_list
[flow_id
].lr_lport
), 
 527                                         ntohl(tcp_hdr
->th_seq
)); 
 529                         if (lro_flow_list
[flow_id
].lr_mhead
->m_pkthdr
.lro_npkts 
>= coalesc_sz
) { 
 535                         mb 
= tcp_lro_eject_coalesced_pkt(flow_id
); 
 536                         lro_flow_list
[flow_id
].lr_seq 
= ntohl(tcp_hdr
->th_seq
) + 
 538                         lck_mtx_unlock(&tcp_lro_lock
); 
 544                                         printf("%s: pkt payload_len = %d \n", __func__
, payload_len
); 
 546                                 lro_proto_input(lro_mb
); 
 549                         lck_mtx_unlock(&tcp_lro_lock
); 
 553         case TCP_LRO_EJECT_FLOW
: 
 554                 mb 
= tcp_lro_eject_coalesced_pkt(flow_id
); 
 555                 lck_mtx_unlock(&tcp_lro_lock
); 
 558                                 printf("tcp_lro_process_pkt eject_flow, len = %d\n", mb
->m_pkthdr
.len
); 
 562                 lro_proto_input(lro_mb
); 
 565         case TCP_LRO_COLLISION
: 
 566                 lck_mtx_unlock(&tcp_lro_lock
); 
 567                 ret_response 
= TCP_LRO_FLOW_NOTFOUND
; 
 571                 lck_mtx_unlock(&tcp_lro_lock
); 
 572                 panic_plain("%s: unrecognized type %d", __func__
, retval
); 
 576         if (ret_response 
== TCP_LRO_FLOW_NOTFOUND
) { 
 577                 lro_proto_input(lro_mb
); 
 583 tcp_lro_timer_proc(void *arg1
, void *arg2
) 
 585 #pragma unused(arg1, arg2) 
 587         lck_mtx_lock_spin(&tcp_lro_lock
); 
 589         lck_mtx_unlock(&tcp_lro_lock
); 
 590         tcp_lro_flush_flows(); 
 594 tcp_lro_flush_flows(void) 
 598         struct lro_flow 
*flow
; 
 599         int active_flows 
= 0; 
 600         int outstanding_flows 
= 0; 
 601         int tcpclock_updated 
= 0; 
 603         lck_mtx_lock(&tcp_lro_lock
); 
 605         while (i 
< TCP_LRO_NUM_FLOWS
) { 
 606                 flow 
= &lro_flow_list
[i
]; 
 607                 if (flow
->lr_mhead 
!= NULL
) { 
 609                         if (!tcpclock_updated
) { 
 610                                 calculate_tcp_clock(); 
 611                                 tcpclock_updated 
= 1; 
 613                         if (((tcp_now 
- flow
->lr_timestamp
) >= coalesc_time
) ||  
 614                                 (flow
->lr_mhead
->m_pkthdr
.lro_npkts 
>=  
 618                                         printf("tcp_lro_flush_flows: len =%d n_pkts = %d %d %d \n", 
 620                                         flow
->lr_mhead
->m_pkthdr
.lro_npkts
,  
 621                                         flow
->lr_timestamp
, tcp_now
); 
 623                                 mb 
= tcp_lro_eject_flow(i
); 
 626                                         lck_mtx_unlock(&tcp_lro_lock
); 
 627                                         lro_update_flush_stats(mb
); 
 629                                         lck_mtx_lock(&tcp_lro_lock
); 
 633                                 tcp_lro_sched_timer(0); 
 636                                         printf("tcp_lro_flush_flows: did not flush flow of len =%d deadline = %x timestamp = %x \n",  
 637                                                 flow
->lr_len
, tcp_now
, flow
->lr_timestamp
); 
 641                 if (flow
->lr_flags 
& LRO_EJECT_REQ
) { 
 642                         mb 
= tcp_lro_eject_flow(i
); 
 644                                 lck_mtx_unlock(&tcp_lro_lock
); 
 647                                 lck_mtx_lock(&tcp_lro_lock
); 
 652         lck_mtx_unlock(&tcp_lro_lock
); 
 654         if (lrocount 
== 900) { 
 655                 printf("%s: %d %d %d %d oo: %d mismatch: %d ej_req: %d coll: %d \n",  
 657                         tcpstat
.tcps_coalesced_pack
, 
 658                         tcpstat
.tcps_lro_twopack
, 
 659                         tcpstat
.tcps_lro_multpack
,  
 660                         tcpstat
.tcps_lro_largepack
, 
 664                         tcpstat
.tcps_flowtbl_collision
); 
 665                 printf("%s: all: %d single: %d double: %d good: %d \n", 
 666                         __func__
, lro_flushes
, lro_single_flushes
,  
 667                         lro_double_flushes
, lro_good_flushes
); 
 672         if ((lrodebug 
>= 2) && (active_flows 
> 1)) { 
 673                 printf("lro_flush_flows: active_flows = %d \n", active_flows
); 
 679  * Must be called with tcp_lro_lock held. 
 680  * The hint is non-zero for longer waits. The wait time dictated by coalesc_time 
 681  * takes precedence, so lro_timer_set is not set for the hint case 
 684 tcp_lro_sched_timer(uint64_t hint
) 
 692                 /* the intent is to wake up every coalesc_time msecs */ 
 693                 clock_interval_to_deadline(coalesc_time
,  
 694                         (NSEC_PER_SEC 
/ TCP_RETRANSHZ
), &lro_deadline
); 
 696                 clock_interval_to_deadline(hint
, NSEC_PER_SEC 
/ TCP_RETRANSHZ
, 
 699         thread_call_enter_delayed(tcp_lro_timer
, lro_deadline
); 
 703 tcp_lro(struct mbuf 
*m
, unsigned int hlen
) 
 707         struct tcphdr 
* tcp_hdr 
= NULL
; 
 708         unsigned int off 
= 0; 
 714          * Experiments on cellular show that the RTT is much higher   
 715          * than the coalescing time of 5 msecs, causing lro to flush 
 716          * 80% of the time on a single packet. Increasing  
 717          * coalescing time for cellular does not show marked  
 718          * improvement to throughput either. Loopback perf is hurt 
 719          * by the 5 msec latency and it already sends large packets. 
 721         if ((m
->m_pkthdr
.rcvif
->if_type 
== IFT_CELLULAR
) || 
 722                 (m
->m_pkthdr
.rcvif
->if_type 
== IFT_LOOP
)) { 
 726         ip_hdr 
= mtod(m
, struct ip
*); 
 728         /* only TCP is coalesced */ 
 729         if (ip_hdr
->ip_p 
!= IPPROTO_TCP
) { 
 733         if (m
->m_len 
< (int32_t) sizeof (struct tcpiphdr
)) { 
 734                 if (lrodebug
) printf("tcp_lro m_pullup \n"); 
 735                 if ((m 
= m_pullup(m
, sizeof (struct tcpiphdr
))) == 0) { 
 736                         tcpstat
.tcps_rcvshort
++;  
 738                                 printf("ip_lro: rcvshort.\n"); 
 744         tcp_hdr 
= (struct tcphdr 
*)((caddr_t
)ip_hdr 
+ hlen
); 
 745         tlen 
= ip_hdr
->ip_len 
; //ignore IP header bytes len 
 746         m
->m_pkthdr
.lro_pktlen 
= tlen
; /* Used to return max pkt encountered to tcp */ 
 747         m
->m_pkthdr
.lro_npkts 
= 1; /* Initialize a counter to hold num pkts coalesced */ 
 748         off 
= tcp_hdr
->th_off 
<< 2; 
 749         if (off 
< sizeof (struct tcphdr
) || off 
> tlen
) { 
 750                 tcpstat
.tcps_rcvbadoff
++;  
 752                         printf("ip_lro: TCP off greater than TCP header.\n"); 
 757         return (tcp_lro_process_pkt(m
, ip_hdr
, tcp_hdr
, hlen 
+ off
)); 
 761 lro_proto_input(struct mbuf 
*m
) 
 763         struct ip
* ip_hdr 
= mtod(m
, struct ip
*); 
 766                 printf("lro_proto_input: ip_len = %d \n",  
 770         ip_proto_dispatch_in_wrapper(m
, ip_hdr
->ip_hl 
<< 2, ip_hdr
->ip_p
); 
 774 lro_tcp_xsum_validate(struct mbuf 
*m
,  struct ipovly 
*ipov
, struct tcphdr 
* th
) 
 777         struct ip
* ip 
= (struct ip
*)ipov
; 
 778         int tlen 
= ip
->ip_len
; 
 780         struct ifnet 
*ifp 
= ((m
->m_flags 
& M_PKTHDR
) && m
->m_pkthdr
.rcvif 
!= NULL
) ?  
 781                                 m
->m_pkthdr
.rcvif
: NULL
; 
 783         /* Expect 32-bit aligned data pointer on strict-align platforms */ 
 784         MBUF_STRICT_DATA_ALIGNMENT_CHECK_32(m
); 
 786         if (m
->m_pkthdr
.csum_flags 
& CSUM_DATA_VALID
) { 
 787                 if (m
->m_pkthdr
.csum_flags 
& CSUM_TCP_SUM16
) { 
 791                         bcopy(ipov
->ih_x1
, b
, sizeof (ipov
->ih_x1
)); 
 792                         bzero(ipov
->ih_x1
, sizeof (ipov
->ih_x1
)); 
 793                         ipov
->ih_len 
= (u_short
)tlen
; 
 794 #if BYTE_ORDER != BIG_ENDIAN 
 797                         pseudo 
= in_cksum(m
, sizeof (struct ip
)); 
 798                         bcopy(b
, ipov
->ih_x1
, sizeof (ipov
->ih_x1
)); 
 800                         th
->th_sum 
= in_addword(pseudo
, (m
->m_pkthdr
.csum_data 
& 0xFFFF)); 
 802                         if (m
->m_pkthdr
.csum_flags 
& CSUM_PSEUDO_HDR
) 
 803                                 th
->th_sum 
= m
->m_pkthdr
.csum_data
; 
 805                                 th
->th_sum 
= in_pseudo(ip
->ip_src
.s_addr
, 
 806                                         ip
->ip_dst
.s_addr
, htonl(m
->m_pkthdr
.csum_data 
+ 
 807                                         ip
->ip_len 
+ IPPROTO_TCP
)); 
 809                 th
->th_sum 
^= 0xffff; 
 813                  * Checksum extended TCP header and data. 
 815                 bcopy(ipov
->ih_x1
, b
, sizeof (ipov
->ih_x1
)); 
 816                 bzero(ipov
->ih_x1
, sizeof (ipov
->ih_x1
)); 
 817                 ipov
->ih_len 
= (u_short
)tlen
; 
 818 #if BYTE_ORDER != BIG_ENDIAN 
 821                 len 
= sizeof (struct ip
) + tlen
; 
 822                 th
->th_sum 
= in_cksum(m
, len
); 
 823                 bcopy(b
, ipov
->ih_x1
, sizeof (ipov
->ih_x1
)); 
 825                 tcp_in_cksum_stats(len
); 
 828                 tcpstat
.tcps_rcvbadsum
++; 
 829                 if (ifp 
!= NULL 
&& ifp
->if_tcp_stat 
!= NULL
) { 
 830                         atomic_add_64(&ifp
->if_tcp_stat
->badformat
, 1); 
 833                         printf("lro_tcp_xsum_validate: bad xsum and drop m = %p.\n",m
); 
 837         /* revert back the order as IP will look into this again. */ 
 838 #if BYTE_ORDER != BIG_ENDIAN 
 845  * When TCP detects a stable, steady flow without out of ordering,  
 846  * with a sufficiently high cwnd, it invokes LRO. 
 849 tcp_start_coalescing(struct ip 
*ip_hdr
, struct tcphdr 
*tcp_hdr
, int tlen
)  
 853         struct mbuf 
*eject_mb
; 
 856         hash 
= LRO_HASH(ip_hdr
->ip_src
.s_addr
, ip_hdr
->ip_dst
.s_addr
,  
 857                 tcp_hdr
->th_sport
, tcp_hdr
->th_dport
, 
 858                 (TCP_LRO_FLOW_MAP 
- 1)); 
 861         lck_mtx_lock_spin(&tcp_lro_lock
); 
 862         flow_id 
= lro_flow_map
[hash
]; 
 863         if (flow_id 
!= TCP_LRO_FLOW_NOTFOUND
) { 
 864                 lf 
= &lro_flow_list
[flow_id
]; 
 865                 if ((lf
->lr_faddr
.s_addr 
== ip_hdr
->ip_src
.s_addr
) && 
 866                     (lf
->lr_laddr
.s_addr 
== ip_hdr
->ip_dst
.s_addr
) && 
 867                     (lf
->lr_fport 
== tcp_hdr
->th_sport
) && 
 868                     (lf
->lr_lport 
== tcp_hdr
->th_dport
)) { 
 869                         if ((lf
->lr_tcphdr 
== NULL
) && 
 870                                 (lf
->lr_seq 
!= (tcp_hdr
->th_seq 
+ tlen
))) { 
 871                                 lf
->lr_seq 
= tcp_hdr
->th_seq 
+ tlen
; 
 873                         lf
->lr_flags 
&= ~LRO_EJECT_REQ
; 
 875                 lck_mtx_unlock(&tcp_lro_lock
);  
 879         HTONL(tcp_hdr
->th_seq
); 
 880         HTONL(tcp_hdr
->th_ack
); 
 882                 tcp_lro_insert_flow(NULL
, ip_hdr
, tcp_hdr
, tlen
, 0, hash
,  
 885         lck_mtx_unlock(&tcp_lro_lock
); 
 887         NTOHL(tcp_hdr
->th_seq
); 
 888         NTOHL(tcp_hdr
->th_ack
); 
 890                 printf("%s: src = %x dst = %x sport = %d dport = %d seq %x \n", 
 891                         __func__
, ip_hdr
->ip_src
.s_addr
, ip_hdr
->ip_dst
.s_addr
, 
 892                         tcp_hdr
->th_sport
, tcp_hdr
->th_dport
, tcp_hdr
->th_seq
); 
 894         ASSERT(eject_mb 
== NULL
); 
 899  * When TCP detects loss or idle condition, it stops offloading 
 903 tcp_lro_remove_state(struct in_addr saddr
, struct in_addr daddr
,  
 904                 unsigned short sport
, unsigned short dport
) 
 909         hash 
= LRO_HASH(daddr
.s_addr
, saddr
.s_addr
, dport
, sport
, 
 910                 (TCP_LRO_FLOW_MAP 
- 1)); 
 911         lck_mtx_lock_spin(&tcp_lro_lock
); 
 912         flow_id 
= lro_flow_map
[hash
]; 
 913         if (flow_id 
== TCP_LRO_FLOW_UNINIT
) { 
 914                 lck_mtx_unlock(&tcp_lro_lock
); 
 917         lf 
= &lro_flow_list
[flow_id
]; 
 918         if ((lf
->lr_faddr
.s_addr 
== daddr
.s_addr
) &&  
 919             (lf
->lr_laddr
.s_addr 
== saddr
.s_addr
) && 
 920             (lf
->lr_fport 
== dport
) && 
 921             (lf
->lr_lport 
== sport
)) { 
 923                         printf("%s: %x %x\n", __func__
,  
 924                                 lf
->lr_flags
, lf
->lr_seq
); 
 926                 lf
->lr_flags 
|= LRO_EJECT_REQ
; 
 928         lck_mtx_unlock(&tcp_lro_lock
); 
 933 tcp_update_lro_seq(__uint32_t rcv_nxt
, struct in_addr saddr
, struct in_addr daddr
, 
 934                 unsigned short sport
, unsigned short dport
) 
 939         hash 
= LRO_HASH(daddr
.s_addr
, saddr
.s_addr
, dport
, sport
,  
 940                 (TCP_LRO_FLOW_MAP 
- 1)); 
 941         lck_mtx_lock_spin(&tcp_lro_lock
); 
 942         flow_id 
= lro_flow_map
[hash
]; 
 943         if (flow_id 
== TCP_LRO_FLOW_UNINIT
) { 
 944                 lck_mtx_unlock(&tcp_lro_lock
); 
 947         lf 
= &lro_flow_list
[flow_id
]; 
 948         if ((lf
->lr_faddr
.s_addr 
== daddr
.s_addr
) && 
 949             (lf
->lr_laddr
.s_addr 
== saddr
.s_addr
) && 
 950             (lf
->lr_fport 
== dport
) && 
 951             (lf
->lr_lport 
== sport
) && 
 952             (lf
->lr_tcphdr 
== NULL
)) { 
 953                 lf
->lr_seq 
= (tcp_seq
)rcv_nxt
; 
 955         lck_mtx_unlock(&tcp_lro_lock
); 
 960 lro_update_stats(struct mbuf 
*m
) 
 962         switch(m
->m_pkthdr
.lro_npkts
) { 
 963         case 0: /* fall through */ 
 968                 tcpstat
.tcps_lro_twopack
++; 
 971         case 3: /* fall through */ 
 973                 tcpstat
.tcps_lro_multpack
++; 
 977                 tcpstat
.tcps_lro_largepack
++; 
 984 lro_update_flush_stats(struct mbuf 
*m
) 
 987         switch(m
->m_pkthdr
.lro_npkts
) { 
 989         case 1: lro_single_flushes
++; 
 991         case 2: lro_double_flushes
++; 
 993         default: lro_good_flushes
++;