2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
23 * Copyright (C) 1995-1997 by Darren Reed.
25 * Redistribution and use in source and binary forms are permitted
26 * provided that this notice is preserved and due credit is given
27 * to the original author and the contributors.
30 /* static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-1995 Darren Reed"; */
33 #include "opt_ipfilter.h"
34 #if defined(KERNEL) && !defined(_KERNEL)
37 #define __FreeBSD_version 300000 /* it's a hack, but close enough */
39 #if !defined(_KERNEL) && !defined(KERNEL) && !defined(__KERNEL__)
44 # include <linux/kernel.h>
45 # include <linux/module.h>
48 #include <sys/errno.h>
49 #include <sys/types.h>
50 #include <sys/param.h>
52 #if defined(KERNEL) && (__FreeBSD_version >= 220000)
53 # include <sys/filio.h>
54 # include <sys/fcntl.h>
55 # include <sys/malloc.h>
57 # include <sys/ioctl.h>
62 #include <sys/protosw.h>
64 #include <sys/socket.h>
65 #if defined(_KERNEL) && !defined(linux)
66 # include <sys/systm.h>
68 #if !defined(__SVR4) && !defined(__svr4__)
70 # include <sys/mbuf.h>
73 # include <sys/filio.h>
74 # include <sys/byteorder.h>
75 # include <sys/dditypes.h>
76 # include <sys/stream.h>
77 # include <sys/kmem.h>
84 #include <net/route.h>
85 #include <netinet/in.h>
86 #include <netinet/in_systm.h>
87 #include <netinet/ip.h>
88 #include <netinet/tcp.h>
90 # include <netinet/ip_var.h>
91 # include <netinet/tcp_fsm.h>
93 #include <netinet/udp.h>
94 #include <netinet/ip_icmp.h>
95 #include "netinet/ip_compat.h"
96 #include <netinet/tcpip.h>
97 #include "netinet/ip_fil.h"
98 #include "netinet/ip_nat.h"
99 #include "netinet/ip_frag.h"
100 #include "netinet/ip_proxy.h"
101 #include "netinet/ip_state.h"
103 #define MIN(a,b) (((a)<(b))?(a):(b))
106 #define TCP_CLOSE (TH_FIN|TH_RST)
108 static ipstate_t
*ips_table
[IPSTATE_SIZE
];
109 static int ips_num
= 0;
110 static ips_stat_t ips_stats
;
111 #if (SOLARIS || defined(__sgi)) && defined(_KERNEL)
112 extern kmutex_t ipf_state
;
115 static int fr_matchsrcdst
__P((ipstate_t
*, struct in_addr
, struct in_addr
,
116 fr_info_t
*, void *, u_short
, u_short
));
117 static int fr_state_flush
__P((int));
118 static ips_stat_t
*fr_statetstats
__P((void));
121 #define FIVE_DAYS (2 * 5 * 86400) /* 5 days: half closed session */
123 u_long fr_tcpidletimeout
= FIVE_DAYS
,
124 fr_tcpclosewait
= 60,
129 fr_icmptimeout
= 120;
132 static ips_stat_t
*fr_statetstats()
134 ips_stats
.iss_active
= ips_num
;
135 ips_stats
.iss_table
= ips_table
;
141 * flush state tables. two actions currently defined:
142 * which == 0 : flush all state table entries
143 * which == 1 : flush TCP connections which have started to close but are
144 * stuck for some reason.
146 static int fr_state_flush(which
)
150 register ipstate_t
*is
, **isp
;
151 #if defined(_KERNEL) && !SOLARIS
154 int delete, removed
= 0;
157 MUTEX_ENTER(&ipf_state
);
158 for (i
= 0; i
< IPSTATE_SIZE
; i
++)
159 for (isp
= &ips_table
[i
]; (is
= *isp
); ) {
168 if ((is
->is_p
== IPPROTO_TCP
) &&
169 (((is
->is_state
[0] <= TCPS_ESTABLISHED
) &&
170 (is
->is_state
[1] > TCPS_ESTABLISHED
)) ||
171 ((is
->is_state
[1] <= TCPS_ESTABLISHED
) &&
172 (is
->is_state
[0] > TCPS_ESTABLISHED
))))
179 if (is
->is_p
== IPPROTO_TCP
)
182 ips_stats
.iss_expire
++;
184 ipstate_log(is
, ISL_FLUSH
);
192 MUTEX_EXIT(&ipf_state
);
198 int fr_state_ioctl(data
, cmd
, mode
)
200 #if defined(__NetBSD__) || defined(__OpenBSD__)
207 int arg
, ret
, error
= 0;
212 IRCOPY(data
, (caddr_t
)&arg
, sizeof(arg
));
213 if (arg
== 0 || arg
== 1) {
214 ret
= fr_state_flush(arg
);
215 IWCOPY((caddr_t
)&ret
, data
, sizeof(ret
));
220 IWCOPY((caddr_t
)fr_statetstats(), data
, sizeof(ips_stat_t
));
224 IWCOPY((caddr_t
)&iplused
[IPL_LOGSTATE
], (caddr_t
)data
,
225 sizeof(iplused
[IPL_LOGSTATE
]));
236 * Create a new ipstate structure and hang it off the hash table.
238 int fr_addstate(ip
, fin
, pass
)
244 register ipstate_t
*is
= &ips
;
247 if ((ip
->ip_off
& 0x1fff) || (fin
->fin_fi
.fi_fl
& FI_SHORT
))
249 if (ips_num
== IPSTATE_MAX
) {
257 * Copy and calculate...
259 hv
= (is
->is_p
= ip
->ip_p
);
260 hv
+= (is
->is_src
.s_addr
= ip
->ip_src
.s_addr
);
261 hv
+= (is
->is_dst
.s_addr
= ip
->ip_dst
.s_addr
);
267 struct icmp
*ic
= (struct icmp
*)fin
->fin_dp
;
269 switch (ic
->icmp_type
)
272 is
->is_icmp
.ics_type
= ICMP_ECHOREPLY
; /* XXX */
273 hv
+= (is
->is_icmp
.ics_id
= ic
->icmp_id
);
274 hv
+= (is
->is_icmp
.ics_seq
= ic
->icmp_seq
);
279 is
->is_icmp
.ics_type
= ic
->icmp_type
+ 1;
284 ips_stats
.iss_icmp
++;
285 is
->is_age
= fr_icmptimeout
;
290 register tcphdr_t
*tcp
= (tcphdr_t
*)fin
->fin_dp
;
293 * The endian of the ports doesn't matter, but the ack and
294 * sequence numbers do as we do mathematics on them later.
296 hv
+= (is
->is_dport
= tcp
->th_dport
);
297 hv
+= (is
->is_sport
= tcp
->th_sport
);
298 is
->is_seq
= ntohl(tcp
->th_seq
);
299 is
->is_ack
= ntohl(tcp
->th_ack
);
300 is
->is_swin
= ntohs(tcp
->th_win
);
301 is
->is_dwin
= is
->is_swin
; /* start them the same */
304 * If we're creating state for a starting connection, start the
305 * timer on it as we'll never see an error if it fails to
308 if ((tcp
->th_flags
& (TH_SYN
|TH_ACK
)) == TH_SYN
)
309 is
->is_ack
= 0; /* Trumpet WinSock 'ism */
310 fr_tcp_age(&is
->is_age
, is
->is_state
, ip
, fin
,
311 tcp
->th_sport
== is
->is_sport
);
316 register tcphdr_t
*tcp
= (tcphdr_t
*)fin
->fin_dp
;
318 hv
+= (is
->is_dport
= tcp
->th_dport
);
319 hv
+= (is
->is_sport
= tcp
->th_sport
);
321 is
->is_age
= fr_udptimeout
;
328 KMALLOC(is
, ipstate_t
*, sizeof(*is
));
330 ips_stats
.iss_nomem
++;
333 bcopy((char *)&ips
, (char *)is
, sizeof(*is
));
335 MUTEX_ENTER(&ipf_state
);
339 is
->is_bytes
= ip
->ip_len
;
341 * Copy these from the rule itself.
343 is
->is_opt
= fin
->fin_fr
->fr_ip
.fi_optmsk
;
344 is
->is_optmsk
= fin
->fin_fr
->fr_mip
.fi_optmsk
;
345 is
->is_sec
= fin
->fin_fr
->fr_ip
.fi_secmsk
;
346 is
->is_secmsk
= fin
->fin_fr
->fr_mip
.fi_secmsk
;
347 is
->is_auth
= fin
->fin_fr
->fr_ip
.fi_auth
;
348 is
->is_authmsk
= fin
->fin_fr
->fr_mip
.fi_auth
;
349 is
->is_flags
= fin
->fin_fr
->fr_ip
.fi_fl
;
350 is
->is_flags
|= fin
->fin_fr
->fr_mip
.fi_fl
<< 4;
354 is
->is_next
= ips_table
[hv
];
358 is
->is_ifpout
= fin
->fin_ifp
;
360 is
->is_ifpin
= fin
->fin_ifp
;
361 is
->is_ifpout
= NULL
;
363 if (pass
& FR_LOGFIRST
)
364 is
->is_pass
&= ~(FR_LOGFIRST
|FR_LOG
);
367 ipstate_log(is
, ISL_NEW
);
369 MUTEX_EXIT(&ipf_state
);
370 if (fin
->fin_fi
.fi_fl
& FI_FRAG
)
371 ipfr_newfrag(ip
, fin
, pass
^ FR_KEEPSTATE
);
377 * check to see if a packet with TCP headers fits within the TCP window.
378 * change timeout depending on whether new packet is a SYN-ACK returning for a
379 * SYN or a RST or FIN which indicate time to close up shop.
381 int fr_tcpstate(is
, fin
, ip
, tcp
)
382 register ipstate_t
*is
;
387 register int seqskew
, ackskew
;
388 register u_short swin
, dwin
;
389 register tcp_seq seq
, ack
;
393 * Find difference between last checked packet and this packet.
395 seq
= ntohl(tcp
->th_seq
);
396 ack
= ntohl(tcp
->th_ack
);
397 source
= (ip
->ip_src
.s_addr
== is
->is_src
.s_addr
);
399 if (!(tcp
->th_flags
& TH_ACK
)) /* Pretend an ack was sent */
400 ack
= source
? is
->is_ack
: is
->is_seq
;
405 * Must be an outgoing SYN-ACK in reply to a SYN.
408 seqskew
= seq
- is
->is_seq
;
409 ackskew
= ack
- is
->is_ack
;
413 * Must be a SYN-ACK in reply to a SYN.
416 ackskew
= seq
- is
->is_ack
;
417 seqskew
= ack
- is
->is_seq
;
421 * Make skew values absolute
429 * If the difference in sequence and ack numbers is within the
430 * window size of the connection, store these values and match
441 if ((seqskew
<= dwin
) && (ackskew
<= swin
)) {
445 is
->is_swin
= ntohs(tcp
->th_win
);
449 is
->is_dwin
= ntohs(tcp
->th_win
);
451 ips_stats
.iss_hits
++;
453 is
->is_bytes
+= ip
->ip_len
;
455 * Nearing end of connection, start timeout.
457 fr_tcp_age(&is
->is_age
, is
->is_state
, ip
, fin
, source
);
464 static int fr_matchsrcdst(is
, src
, dst
, fin
, tcp
, sp
, dp
)
466 struct in_addr src
, dst
;
471 int ret
= 0, rev
, out
;
474 rev
= (is
->is_dst
.s_addr
!= dst
.s_addr
);
497 if (((out
&& is
->is_ifpout
== ifp
) ||
498 (!out
&& is
->is_ifpin
== ifp
)) &&
499 (is
->is_dst
.s_addr
== dst
.s_addr
) &&
500 (is
->is_src
.s_addr
== src
.s_addr
) &&
501 (!tcp
|| (sp
== is
->is_sport
) &&
502 (dp
== is
->is_dport
))) {
506 if (((out
&& is
->is_ifpin
== ifp
) ||
507 (!out
&& is
->is_ifpout
== ifp
)) &&
508 (is
->is_dst
.s_addr
== src
.s_addr
) &&
509 (is
->is_src
.s_addr
== dst
.s_addr
) &&
510 (!tcp
|| (sp
== is
->is_dport
) &&
511 (dp
== is
->is_sport
))) {
517 * Whether or not this should be here, is questionable, but the aim
518 * is to get this out of the main line.
521 if (((fin
->fin_fi
.fi_optmsk
& is
->is_optmsk
) != is
->is_opt
) ||
522 ((fin
->fin_fi
.fi_secmsk
& is
->is_secmsk
) != is
->is_sec
) ||
523 ((fin
->fin_fi
.fi_auth
& is
->is_authmsk
) != is
->is_auth
) ||
524 ((fin
->fin_fi
.fi_fl
& (is
->is_flags
>> 4)) !=
525 (is
->is_flags
& 0xf)))
533 * Check if a packet has a registered state.
535 int fr_checkstate(ip
, fin
)
539 register struct in_addr dst
, src
;
540 register ipstate_t
*is
, **isp
;
544 u_int hv
, hlen
, pass
;
546 if ((ip
->ip_off
& 0x1fff) || (fin
->fin_fi
.fi_fl
& FI_SHORT
))
549 hlen
= fin
->fin_hlen
;
550 tcp
= (tcphdr_t
*)((char *)ip
+ hlen
);
551 ic
= (struct icmp
*)tcp
;
552 hv
= (pr
= ip
->ip_p
);
553 hv
+= (src
.s_addr
= ip
->ip_src
.s_addr
);
554 hv
+= (dst
.s_addr
= ip
->ip_dst
.s_addr
);
557 * Search the hash table for matching packet header info.
565 MUTEX_ENTER(&ipf_state
);
566 for (isp
= &ips_table
[hv
]; (is
= *isp
); isp
= &is
->is_next
)
567 if ((is
->is_p
== pr
) &&
568 (ic
->icmp_id
== is
->is_icmp
.ics_id
) &&
569 (ic
->icmp_seq
== is
->is_icmp
.ics_seq
) &&
570 fr_matchsrcdst(is
, src
, dst
, fin
, NULL
, 0, 0)) {
571 if (is
->is_icmp
.ics_type
!= ic
->icmp_type
)
573 is
->is_age
= fr_icmptimeout
;
575 is
->is_bytes
+= ip
->ip_len
;
576 ips_stats
.iss_hits
++;
578 MUTEX_EXIT(&ipf_state
);
581 MUTEX_EXIT(&ipf_state
);
585 register u_short dport
= tcp
->th_dport
, sport
= tcp
->th_sport
;
590 MUTEX_ENTER(&ipf_state
);
591 for (isp
= &ips_table
[hv
]; (is
= *isp
); isp
= &is
->is_next
)
592 if ((is
->is_p
== pr
) &&
593 fr_matchsrcdst(is
, src
, dst
, fin
, tcp
,
595 if (fr_tcpstate(is
, fin
, ip
, tcp
)) {
598 MUTEX_EXIT(&ipf_state
);
601 if (tcp
->th_flags
& TCP_CLOSE
) {
603 isp
= &ips_table
[hv
];
610 MUTEX_EXIT(&ipf_state
);
615 register u_short dport
= tcp
->th_dport
, sport
= tcp
->th_sport
;
621 * Nothing else to match on but ports. and IP#'s
623 MUTEX_ENTER(&ipf_state
);
624 for (is
= ips_table
[hv
]; is
; is
= is
->is_next
)
625 if ((is
->is_p
== pr
) &&
626 fr_matchsrcdst(is
, src
, dst
, fin
,
627 tcp
, sport
, dport
)) {
628 ips_stats
.iss_hits
++;
630 is
->is_bytes
+= ip
->ip_len
;
631 is
->is_age
= fr_udptimeout
;
633 MUTEX_EXIT(&ipf_state
);
636 MUTEX_EXIT(&ipf_state
);
642 ips_stats
.iss_miss
++;
648 * Free memory in use by all state info. kept.
650 void fr_stateunload()
653 register ipstate_t
*is
, **isp
;
655 MUTEX_ENTER(&ipf_state
);
656 for (i
= 0; i
< IPSTATE_SIZE
; i
++)
657 for (isp
= &ips_table
[i
]; (is
= *isp
); ) {
661 MUTEX_EXIT(&ipf_state
);
666 * Slowly expire held state for thingslike UDP and ICMP. Timeouts are set
667 * in expectation of this being called twice per second.
669 void fr_timeoutstate()
672 register ipstate_t
*is
, **isp
;
673 #if defined(_KERNEL) && !SOLARIS
678 MUTEX_ENTER(&ipf_state
);
679 for (i
= 0; i
< IPSTATE_SIZE
; i
++)
680 for (isp
= &ips_table
[i
]; (is
= *isp
); )
681 if (is
->is_age
&& !--is
->is_age
) {
683 if (is
->is_p
== IPPROTO_TCP
)
686 ips_stats
.iss_expire
++;
688 ipstate_log(is
, ISL_EXPIRE
);
694 MUTEX_EXIT(&ipf_state
);
700 * Original idea freom Pradeep Krishnan for use primarily with NAT code.
701 * (pkrishna@netcom.com)
703 void fr_tcp_age(age
, state
, ip
, fin
, dir
)
710 tcphdr_t
*tcp
= (tcphdr_t
*)fin
->fin_dp
;
711 u_char flags
= tcp
->th_flags
;
714 ostate
= state
[1 - dir
];
716 dlen
= ip
->ip_len
- fin
->fin_hlen
- (tcp
->th_off
<< 2);
718 if (flags
& TH_RST
) {
719 if (!(tcp
->th_flags
& TH_PUSH
) && !dlen
) {
721 state
[dir
] = TCPS_CLOSED
;
723 *age
= fr_tcpclosewait
;
724 state
[dir
] = TCPS_CLOSE_WAIT
;
729 *age
= fr_tcptimeout
; /* 1 min */
733 case TCPS_FIN_WAIT_2
:
735 if ((flags
& TH_OPENING
) == TH_OPENING
)
736 state
[dir
] = TCPS_SYN_RECEIVED
;
737 else if (flags
& TH_SYN
)
738 state
[dir
] = TCPS_SYN_SENT
;
740 case TCPS_SYN_RECEIVED
:
741 if ((flags
& (TH_FIN
|TH_ACK
)) == TH_ACK
) {
742 state
[dir
] = TCPS_ESTABLISHED
;
743 current_active_connections
++;
744 *age
= fr_tcpidletimeout
;
748 if ((flags
& (TH_FIN
|TH_ACK
)) == TH_ACK
) {
749 state
[dir
] = TCPS_ESTABLISHED
;
750 current_active_connections
++;
751 *age
= fr_tcpidletimeout
;
754 case TCPS_ESTABLISHED
:
755 if (flags
& TH_FIN
) {
756 state
[dir
] = TCPS_CLOSE_WAIT
;
757 if (!(flags
& TH_PUSH
) && !dlen
&&
758 ostate
> TCPS_ESTABLISHED
)
759 *age
= fr_tcplastack
;
761 *age
= fr_tcpclosewait
;
763 *age
= fr_tcpidletimeout
;
765 case TCPS_CLOSE_WAIT
:
766 if ((flags
& TH_FIN
) && !(flags
& TH_PUSH
) && !dlen
&&
767 ostate
> TCPS_ESTABLISHED
) {
768 *age
= fr_tcplastack
;
769 state
[dir
] = TCPS_LAST_ACK
;
771 *age
= fr_tcpclosewait
;
774 if (flags
& TH_ACK
) {
775 state
[dir
] = TCPS_FIN_WAIT_2
;
776 if (!(flags
& TH_PUSH
) && !dlen
&&
777 ostate
> TCPS_ESTABLISHED
)
778 *age
= fr_tcplastack
;
780 *age
= fr_tcpclosewait
;
781 state
[dir
] = TCPS_CLOSE_WAIT
;
790 void ipstate_log(is
, type
)
799 ipsl
.isl_pkts
= is
->is_pkts
;
800 ipsl
.isl_bytes
= is
->is_bytes
;
801 ipsl
.isl_src
= is
->is_src
;
802 ipsl
.isl_dst
= is
->is_dst
;
803 ipsl
.isl_p
= is
->is_p
;
804 ipsl
.isl_flags
= is
->is_flags
;
805 ipsl
.isl_type
= type
;
806 if (ipsl
.isl_p
== IPPROTO_TCP
|| ipsl
.isl_p
== IPPROTO_UDP
) {
807 ipsl
.isl_sport
= is
->is_sport
;
808 ipsl
.isl_dport
= is
->is_dport
;
809 } else if (ipsl
.isl_p
== IPPROTO_ICMP
)
810 ipsl
.isl_itype
= is
->is_icmp
.ics_type
;
812 ipsl
.isl_ps
.isl_filler
[0] = 0;
813 ipsl
.isl_ps
.isl_filler
[1] = 0;
816 sizes
[0] = sizeof(ipsl
);
819 (void) ipllog(IPL_LOGSTATE
, 0, items
, sizes
, types
, 1);