]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
5d5c5d0d A |
2 | * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. |
3 | * | |
8f6c56a5 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
1c79356b | 5 | * |
8f6c56a5 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
8ad349bb | 24 | * limitations under the License. |
8f6c56a5 A |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
1c79356b A |
27 | */ |
28 | /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */ | |
29 | /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ | |
30 | /* | |
31 | * Copyright (c) 1982, 1986, 1988, 1990, 1993 | |
32 | * The Regents of the University of California. All rights reserved. | |
33 | * | |
34 | * Redistribution and use in source and binary forms, with or without | |
35 | * modification, are permitted provided that the following conditions | |
36 | * are met: | |
37 | * 1. Redistributions of source code must retain the above copyright | |
38 | * notice, this list of conditions and the following disclaimer. | |
39 | * 2. Redistributions in binary form must reproduce the above copyright | |
40 | * notice, this list of conditions and the following disclaimer in the | |
41 | * documentation and/or other materials provided with the distribution. | |
42 | * 3. All advertising materials mentioning features or use of this software | |
43 | * must display the following acknowledgement: | |
44 | * This product includes software developed by the University of | |
45 | * California, Berkeley and its contributors. | |
46 | * 4. Neither the name of the University nor the names of its contributors | |
47 | * may be used to endorse or promote products derived from this software | |
48 | * without specific prior written permission. | |
49 | * | |
50 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
51 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
52 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
53 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
54 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
55 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
56 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
57 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
58 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
59 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
60 | * SUCH DAMAGE. | |
61 | * | |
9bccf70c A |
62 | * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 |
63 | * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $ | |
1c79356b A |
64 | */ |
65 | ||
66 | #include <sys/param.h> | |
67 | #include <sys/systm.h> | |
55e303ae | 68 | #include <sys/filedesc.h> |
91447636 A |
69 | #include <sys/proc_internal.h> |
70 | #include <sys/kauth.h> | |
71 | #include <sys/file_internal.h> | |
1c79356b A |
72 | #include <sys/fcntl.h> |
73 | #include <sys/malloc.h> | |
74 | #include <sys/mbuf.h> | |
75 | #include <sys/domain.h> | |
76 | #include <sys/kernel.h> | |
55e303ae | 77 | #include <sys/event.h> |
1c79356b A |
78 | #include <sys/poll.h> |
79 | #include <sys/protosw.h> | |
80 | #include <sys/socket.h> | |
81 | #include <sys/socketvar.h> | |
82 | #include <sys/resourcevar.h> | |
83 | #include <sys/signalvar.h> | |
84 | #include <sys/sysctl.h> | |
85 | #include <sys/uio.h> | |
86 | #include <sys/ev.h> | |
87 | #include <sys/kdebug.h> | |
88 | #include <net/route.h> | |
89 | #include <netinet/in.h> | |
90 | #include <netinet/in_pcb.h> | |
91 | #include <kern/zalloc.h> | |
91447636 | 92 | #include <kern/locks.h> |
1c79356b A |
93 | #include <machine/limits.h> |
94 | ||
95 | int so_cache_hw = 0; | |
96 | int so_cache_timeouts = 0; | |
97 | int so_cache_max_freed = 0; | |
98 | int cached_sock_count = 0; | |
99 | struct socket *socket_cache_head = 0; | |
100 | struct socket *socket_cache_tail = 0; | |
101 | u_long so_cache_time = 0; | |
102 | int so_cache_init_done = 0; | |
103 | struct zone *so_cache_zone; | |
104 | extern int get_inpcb_str_size(); | |
105 | extern int get_tcp_str_size(); | |
106 | ||
91447636 A |
107 | static lck_grp_t *so_cache_mtx_grp; |
108 | static lck_attr_t *so_cache_mtx_attr; | |
109 | static lck_grp_attr_t *so_cache_mtx_grp_attr; | |
110 | lck_mtx_t *so_cache_mtx; | |
111 | ||
1c79356b A |
112 | #include <machine/limits.h> |
113 | ||
55e303ae A |
114 | static void filt_sordetach(struct knote *kn); |
115 | static int filt_soread(struct knote *kn, long hint); | |
116 | static void filt_sowdetach(struct knote *kn); | |
117 | static int filt_sowrite(struct knote *kn, long hint); | |
118 | static int filt_solisten(struct knote *kn, long hint); | |
119 | ||
120 | static struct filterops solisten_filtops = | |
121 | { 1, NULL, filt_sordetach, filt_solisten }; | |
122 | static struct filterops soread_filtops = | |
123 | { 1, NULL, filt_sordetach, filt_soread }; | |
124 | static struct filterops sowrite_filtops = | |
125 | { 1, NULL, filt_sowdetach, filt_sowrite }; | |
126 | ||
91447636 | 127 | #define EVEN_MORE_LOCKING_DEBUG 0 |
1c79356b A |
128 | int socket_debug = 0; |
129 | int socket_zone = M_SOCKET; | |
130 | so_gen_t so_gencnt; /* generation count for sockets */ | |
131 | ||
132 | MALLOC_DEFINE(M_SONAME, "soname", "socket name"); | |
133 | MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); | |
134 | ||
135 | #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0) | |
136 | #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2) | |
137 | #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1) | |
138 | #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3) | |
139 | #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1) | |
140 | #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8)) | |
141 | #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8)) | |
142 | ||
a3d08fcd | 143 | #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES) |
1c79356b | 144 | |
91447636 | 145 | |
1c79356b A |
146 | SYSCTL_DECL(_kern_ipc); |
147 | ||
148 | static int somaxconn = SOMAXCONN; | |
149 | SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, | |
150 | 0, ""); | |
151 | ||
152 | /* Should we get a maximum also ??? */ | |
fa4905b1 | 153 | static int sosendmaxchain = 65536; |
1c79356b | 154 | static int sosendminchain = 16384; |
55e303ae | 155 | static int sorecvmincopy = 16384; |
1c79356b A |
156 | SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain, |
157 | 0, ""); | |
55e303ae A |
158 | SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy, |
159 | 0, ""); | |
1c79356b A |
160 | |
161 | void so_cache_timer(); | |
162 | ||
163 | /* | |
164 | * Socket operation routines. | |
165 | * These routines are called by the routines in | |
166 | * sys_socket.c or from a system process, and | |
167 | * implement the semantics of socket operations by | |
168 | * switching out to the protocol specific routines. | |
169 | */ | |
170 | ||
9bccf70c | 171 | #ifdef __APPLE__ |
91447636 A |
172 | |
173 | vm_size_t so_cache_zone_element_size; | |
174 | ||
175 | static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid); | |
176 | ||
177 | ||
1c79356b A |
178 | void socketinit() |
179 | { | |
180 | vm_size_t str_size; | |
181 | ||
91447636 A |
182 | if (so_cache_init_done) { |
183 | printf("socketinit: already called...\n"); | |
184 | return; | |
185 | } | |
186 | ||
187 | /* | |
188 | * allocate lock group attribute and group for socket cache mutex | |
189 | */ | |
190 | so_cache_mtx_grp_attr = lck_grp_attr_alloc_init(); | |
21362eb3 | 191 | lck_grp_attr_setdefault(so_cache_mtx_grp_attr); |
91447636 A |
192 | |
193 | so_cache_mtx_grp = lck_grp_alloc_init("so_cache", so_cache_mtx_grp_attr); | |
194 | ||
195 | /* | |
196 | * allocate the lock attribute for socket cache mutex | |
197 | */ | |
198 | so_cache_mtx_attr = lck_attr_alloc_init(); | |
21362eb3 | 199 | lck_attr_setdefault(so_cache_mtx_attr); |
91447636 | 200 | |
1c79356b A |
201 | so_cache_init_done = 1; |
202 | ||
91447636 A |
203 | so_cache_mtx = lck_mtx_alloc_init(so_cache_mtx_grp, so_cache_mtx_attr); /* cached sockets mutex */ |
204 | ||
205 | if (so_cache_mtx == NULL) | |
206 | return; /* we're hosed... */ | |
207 | ||
1c79356b A |
208 | str_size = (vm_size_t)( sizeof(struct socket) + 4 + |
209 | get_inpcb_str_size() + 4 + | |
210 | get_tcp_str_size()); | |
211 | so_cache_zone = zinit (str_size, 120000*str_size, 8192, "socache zone"); | |
212 | #if TEMPDEBUG | |
91447636 | 213 | printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size); |
1c79356b | 214 | #endif |
91447636 A |
215 | timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz)); |
216 | ||
217 | so_cache_zone_element_size = str_size; | |
218 | ||
219 | sflt_init(); | |
1c79356b A |
220 | |
221 | } | |
222 | ||
223 | void cached_sock_alloc(so, waitok) | |
224 | struct socket **so; | |
225 | int waitok; | |
226 | ||
227 | { | |
228 | caddr_t temp; | |
1c79356b A |
229 | register u_long offset; |
230 | ||
231 | ||
91447636 A |
232 | lck_mtx_lock(so_cache_mtx); |
233 | ||
1c79356b A |
234 | if (cached_sock_count) { |
235 | cached_sock_count--; | |
236 | *so = socket_cache_head; | |
237 | if (*so == 0) | |
238 | panic("cached_sock_alloc: cached sock is null"); | |
239 | ||
240 | socket_cache_head = socket_cache_head->cache_next; | |
241 | if (socket_cache_head) | |
242 | socket_cache_head->cache_prev = 0; | |
243 | else | |
244 | socket_cache_tail = 0; | |
91447636 A |
245 | |
246 | lck_mtx_unlock(so_cache_mtx); | |
1c79356b A |
247 | |
248 | temp = (*so)->so_saved_pcb; | |
249 | bzero((caddr_t)*so, sizeof(struct socket)); | |
250 | #if TEMPDEBUG | |
251 | kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so, | |
252 | cached_sock_count); | |
253 | #endif | |
254 | (*so)->so_saved_pcb = temp; | |
91447636 A |
255 | (*so)->cached_in_sock_layer = 1; |
256 | ||
1c79356b A |
257 | } |
258 | else { | |
259 | #if TEMPDEBUG | |
260 | kprintf("Allocating cached sock %x from memory\n", *so); | |
261 | #endif | |
262 | ||
91447636 A |
263 | lck_mtx_unlock(so_cache_mtx); |
264 | ||
1c79356b A |
265 | if (waitok) |
266 | *so = (struct socket *) zalloc(so_cache_zone); | |
267 | else | |
268 | *so = (struct socket *) zalloc_noblock(so_cache_zone); | |
269 | ||
270 | if (*so == 0) | |
271 | return; | |
272 | ||
273 | bzero((caddr_t)*so, sizeof(struct socket)); | |
274 | ||
275 | /* | |
276 | * Define offsets for extra structures into our single block of | |
277 | * memory. Align extra structures on longword boundaries. | |
278 | */ | |
279 | ||
280 | ||
281 | offset = (u_long) *so; | |
282 | offset += sizeof(struct socket); | |
283 | if (offset & 0x3) { | |
284 | offset += 4; | |
285 | offset &= 0xfffffffc; | |
286 | } | |
287 | (*so)->so_saved_pcb = (caddr_t) offset; | |
288 | offset += get_inpcb_str_size(); | |
289 | if (offset & 0x3) { | |
290 | offset += 4; | |
291 | offset &= 0xfffffffc; | |
292 | } | |
293 | ||
294 | ((struct inpcb *) (*so)->so_saved_pcb)->inp_saved_ppcb = (caddr_t) offset; | |
295 | #if TEMPDEBUG | |
296 | kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so, | |
297 | (*so)->so_saved_pcb, | |
298 | ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb); | |
299 | #endif | |
300 | } | |
301 | ||
302 | (*so)->cached_in_sock_layer = 1; | |
303 | } | |
304 | ||
305 | ||
306 | void cached_sock_free(so) | |
307 | struct socket *so; | |
308 | { | |
1c79356b | 309 | |
91447636 | 310 | lck_mtx_lock(so_cache_mtx); |
1c79356b | 311 | |
1c79356b A |
312 | if (++cached_sock_count > MAX_CACHED_SOCKETS) { |
313 | --cached_sock_count; | |
91447636 | 314 | lck_mtx_unlock(so_cache_mtx); |
1c79356b A |
315 | #if TEMPDEBUG |
316 | kprintf("Freeing overflowed cached socket %x\n", so); | |
317 | #endif | |
91447636 | 318 | zfree(so_cache_zone, so); |
1c79356b A |
319 | } |
320 | else { | |
321 | #if TEMPDEBUG | |
322 | kprintf("Freeing socket %x into cache\n", so); | |
323 | #endif | |
324 | if (so_cache_hw < cached_sock_count) | |
325 | so_cache_hw = cached_sock_count; | |
326 | ||
327 | so->cache_next = socket_cache_head; | |
328 | so->cache_prev = 0; | |
329 | if (socket_cache_head) | |
330 | socket_cache_head->cache_prev = so; | |
331 | else | |
332 | socket_cache_tail = so; | |
333 | ||
334 | so->cache_timestamp = so_cache_time; | |
335 | socket_cache_head = so; | |
91447636 | 336 | lck_mtx_unlock(so_cache_mtx); |
1c79356b A |
337 | } |
338 | ||
339 | #if TEMPDEBUG | |
340 | kprintf("Freed cached sock %x into cache - count is %d\n", so, cached_sock_count); | |
341 | #endif | |
342 | ||
343 | ||
344 | } | |
345 | ||
346 | ||
347 | void so_cache_timer() | |
348 | { | |
349 | register struct socket *p; | |
1c79356b | 350 | register int n_freed = 0; |
1c79356b | 351 | |
1c79356b | 352 | |
91447636 | 353 | lck_mtx_lock(so_cache_mtx); |
1c79356b | 354 | |
91447636 | 355 | ++so_cache_time; |
1c79356b | 356 | |
91447636 | 357 | while ( (p = socket_cache_tail) ) |
1c79356b A |
358 | { |
359 | if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT) | |
360 | break; | |
361 | ||
362 | so_cache_timeouts++; | |
363 | ||
91447636 | 364 | if ( (socket_cache_tail = p->cache_prev) ) |
1c79356b A |
365 | p->cache_prev->cache_next = 0; |
366 | if (--cached_sock_count == 0) | |
367 | socket_cache_head = 0; | |
368 | ||
1c79356b | 369 | |
91447636 | 370 | zfree(so_cache_zone, p); |
1c79356b | 371 | |
1c79356b A |
372 | if (++n_freed >= SO_CACHE_MAX_FREE_BATCH) |
373 | { | |
374 | so_cache_max_freed++; | |
375 | break; | |
376 | } | |
377 | } | |
91447636 | 378 | lck_mtx_unlock(so_cache_mtx); |
1c79356b A |
379 | |
380 | timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz)); | |
381 | ||
1c79356b A |
382 | |
383 | } | |
9bccf70c | 384 | #endif /* __APPLE__ */ |
1c79356b A |
385 | |
386 | /* | |
387 | * Get a socket structure from our zone, and initialize it. | |
388 | * We don't implement `waitok' yet (see comments in uipc_domain.c). | |
389 | * Note that it would probably be better to allocate socket | |
390 | * and PCB at the same time, but I'm not convinced that all | |
391 | * the protocols can be easily modified to do this. | |
392 | */ | |
393 | struct socket * | |
394 | soalloc(waitok, dom, type) | |
395 | int waitok; | |
396 | int dom; | |
397 | int type; | |
398 | { | |
399 | struct socket *so; | |
400 | ||
401 | if ((dom == PF_INET) && (type == SOCK_STREAM)) | |
402 | cached_sock_alloc(&so, waitok); | |
403 | else | |
404 | { | |
91447636 | 405 | MALLOC_ZONE(so, struct socket *, sizeof(*so), socket_zone, M_WAITOK); |
1c79356b A |
406 | if (so) |
407 | bzero(so, sizeof *so); | |
408 | } | |
409 | /* XXX race condition for reentrant kernel */ | |
91447636 | 410 | //###LD Atomic add for so_gencnt |
1c79356b A |
411 | if (so) { |
412 | so->so_gencnt = ++so_gencnt; | |
413 | so->so_zone = socket_zone; | |
414 | } | |
415 | ||
416 | return so; | |
417 | } | |
418 | ||
419 | int | |
420 | socreate(dom, aso, type, proto) | |
421 | int dom; | |
422 | struct socket **aso; | |
423 | register int type; | |
424 | int proto; | |
1c79356b A |
425 | { |
426 | struct proc *p = current_proc(); | |
427 | register struct protosw *prp; | |
9bccf70c | 428 | register struct socket *so; |
1c79356b | 429 | register int error = 0; |
55e303ae A |
430 | #if TCPDEBUG |
431 | extern int tcpconsdebug; | |
432 | #endif | |
1c79356b A |
433 | if (proto) |
434 | prp = pffindproto(dom, proto, type); | |
435 | else | |
436 | prp = pffindtype(dom, type); | |
9bccf70c | 437 | |
1c79356b A |
438 | if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) |
439 | return (EPROTONOSUPPORT); | |
9bccf70c A |
440 | #ifndef __APPLE__ |
441 | ||
442 | if (p->p_prison && jail_socket_unixiproute_only && | |
443 | prp->pr_domain->dom_family != PF_LOCAL && | |
444 | prp->pr_domain->dom_family != PF_INET && | |
445 | prp->pr_domain->dom_family != PF_ROUTE) { | |
446 | return (EPROTONOSUPPORT); | |
447 | } | |
448 | ||
449 | #endif | |
1c79356b A |
450 | if (prp->pr_type != type) |
451 | return (EPROTOTYPE); | |
452 | so = soalloc(p != 0, dom, type); | |
453 | if (so == 0) | |
454 | return (ENOBUFS); | |
455 | ||
456 | TAILQ_INIT(&so->so_incomp); | |
457 | TAILQ_INIT(&so->so_comp); | |
458 | so->so_type = type; | |
459 | ||
9bccf70c | 460 | #ifdef __APPLE__ |
1c79356b | 461 | if (p != 0) { |
91447636 A |
462 | so->so_uid = kauth_cred_getuid(kauth_cred_get()); |
463 | if (!suser(kauth_cred_get(),NULL)) | |
1c79356b | 464 | so->so_state = SS_PRIV; |
1c79356b | 465 | } |
9bccf70c | 466 | #else |
91447636 | 467 | so->so_cred = kauth_cred_get_with_ref(); |
9bccf70c | 468 | #endif |
1c79356b | 469 | so->so_proto = prp; |
9bccf70c | 470 | #ifdef __APPLE__ |
1c79356b | 471 | so->so_rcv.sb_flags |= SB_RECV; /* XXX */ |
91447636 | 472 | so->so_rcv.sb_so = so->so_snd.sb_so = so; |
9bccf70c | 473 | #endif |
91447636 A |
474 | |
475 | //### Attachement will create the per pcb lock if necessary and increase refcount | |
37839358 | 476 | so->so_usecount++; /* for creation, make sure it's done before socket is inserted in lists */ |
91447636 A |
477 | |
478 | error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); | |
1c79356b | 479 | if (error) { |
55e303ae A |
480 | /* |
481 | * Warning: | |
482 | * If so_pcb is not zero, the socket will be leaked, | |
483 | * so protocol attachment handler must be coded carefuly | |
484 | */ | |
1c79356b | 485 | so->so_state |= SS_NOFDREF; |
37839358 A |
486 | so->so_usecount--; |
487 | sofreelastref(so, 1); /* will deallocate the socket */ | |
1c79356b A |
488 | return (error); |
489 | } | |
9bccf70c | 490 | #ifdef __APPLE__ |
1c79356b | 491 | prp->pr_domain->dom_refs++; |
1c79356b | 492 | TAILQ_INIT(&so->so_evlist); |
91447636 A |
493 | |
494 | /* Attach socket filters for this protocol */ | |
495 | sflt_initsock(so); | |
55e303ae A |
496 | #if TCPDEBUG |
497 | if (tcpconsdebug == 2) | |
498 | so->so_options |= SO_DEBUG; | |
499 | #endif | |
9bccf70c | 500 | #endif |
55e303ae | 501 | |
1c79356b A |
502 | *aso = so; |
503 | return (0); | |
504 | } | |
505 | ||
506 | int | |
507 | sobind(so, nam) | |
508 | struct socket *so; | |
509 | struct sockaddr *nam; | |
510 | ||
511 | { | |
512 | struct proc *p = current_proc(); | |
91447636 A |
513 | int error = 0; |
514 | struct socket_filter_entry *filter; | |
515 | int filtered = 0; | |
1c79356b | 516 | |
91447636 A |
517 | socket_lock(so, 1); |
518 | ||
519 | /* Socket filter */ | |
520 | error = 0; | |
521 | for (filter = so->so_filt; filter && (error == 0); | |
522 | filter = filter->sfe_next_onsocket) { | |
523 | if (filter->sfe_filter->sf_filter.sf_bind) { | |
524 | if (filtered == 0) { | |
525 | filtered = 1; | |
526 | sflt_use(so); | |
527 | socket_unlock(so, 0); | |
1c79356b | 528 | } |
91447636 A |
529 | error = filter->sfe_filter->sf_filter.sf_bind( |
530 | filter->sfe_cookie, so, nam); | |
1c79356b A |
531 | } |
532 | } | |
91447636 A |
533 | if (filtered != 0) { |
534 | socket_lock(so, 0); | |
535 | sflt_unuse(so); | |
536 | } | |
537 | /* End socket filter */ | |
538 | ||
539 | if (error == 0) | |
540 | error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p); | |
541 | ||
542 | socket_unlock(so, 1); | |
543 | ||
544 | if (error == EJUSTRETURN) | |
545 | error = 0; | |
546 | ||
1c79356b A |
547 | return (error); |
548 | } | |
549 | ||
550 | void | |
551 | sodealloc(so) | |
552 | struct socket *so; | |
553 | { | |
554 | so->so_gencnt = ++so_gencnt; | |
555 | ||
9bccf70c A |
556 | #ifndef __APPLE__ |
557 | if (so->so_rcv.sb_hiwat) | |
558 | (void)chgsbsize(so->so_cred->cr_uidinfo, | |
559 | &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY); | |
560 | if (so->so_snd.sb_hiwat) | |
561 | (void)chgsbsize(so->so_cred->cr_uidinfo, | |
562 | &so->so_snd.sb_hiwat, 0, RLIM_INFINITY); | |
563 | #ifdef INET | |
564 | if (so->so_accf != NULL) { | |
565 | if (so->so_accf->so_accept_filter != NULL && | |
566 | so->so_accf->so_accept_filter->accf_destroy != NULL) { | |
567 | so->so_accf->so_accept_filter->accf_destroy(so); | |
568 | } | |
569 | if (so->so_accf->so_accept_filter_str != NULL) | |
570 | FREE(so->so_accf->so_accept_filter_str, M_ACCF); | |
571 | FREE(so->so_accf, M_ACCF); | |
572 | } | |
573 | #endif /* INET */ | |
21362eb3 | 574 | kauth_cred_rele(so->so_cred); |
9bccf70c A |
575 | zfreei(so->so_zone, so); |
576 | #else | |
1c79356b A |
577 | if (so->cached_in_sock_layer == 1) |
578 | cached_sock_free(so); | |
91447636 A |
579 | else { |
580 | if (so->cached_in_sock_layer == -1) | |
581 | panic("sodealloc: double dealloc: so=%x\n", so); | |
582 | so->cached_in_sock_layer = -1; | |
583 | FREE_ZONE(so, sizeof(*so), so->so_zone); | |
584 | } | |
9bccf70c | 585 | #endif /* __APPLE__ */ |
1c79356b A |
586 | } |
587 | ||
588 | int | |
589 | solisten(so, backlog) | |
590 | register struct socket *so; | |
591 | int backlog; | |
592 | ||
593 | { | |
1c79356b | 594 | struct proc *p = current_proc(); |
91447636 | 595 | int error; |
1c79356b | 596 | |
91447636 A |
597 | socket_lock(so, 1); |
598 | ||
599 | { | |
600 | struct socket_filter_entry *filter; | |
601 | int filtered = 0; | |
602 | error = 0; | |
603 | for (filter = so->so_filt; filter && (error == 0); | |
604 | filter = filter->sfe_next_onsocket) { | |
605 | if (filter->sfe_filter->sf_filter.sf_listen) { | |
606 | if (filtered == 0) { | |
607 | filtered = 1; | |
608 | sflt_use(so); | |
609 | socket_unlock(so, 0); | |
610 | } | |
611 | error = filter->sfe_filter->sf_filter.sf_listen( | |
612 | filter->sfe_cookie, so); | |
613 | } | |
614 | } | |
615 | if (filtered != 0) { | |
616 | socket_lock(so, 0); | |
617 | sflt_unuse(so); | |
618 | } | |
619 | } | |
620 | ||
621 | if (error == 0) { | |
622 | error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); | |
623 | } | |
624 | ||
1c79356b | 625 | if (error) { |
91447636 A |
626 | socket_unlock(so, 1); |
627 | if (error == EJUSTRETURN) | |
628 | error = 0; | |
1c79356b A |
629 | return (error); |
630 | } | |
91447636 A |
631 | |
632 | if (TAILQ_EMPTY(&so->so_comp)) | |
1c79356b A |
633 | so->so_options |= SO_ACCEPTCONN; |
634 | if (backlog < 0 || backlog > somaxconn) | |
635 | backlog = somaxconn; | |
636 | so->so_qlimit = backlog; | |
1c79356b | 637 | |
91447636 | 638 | socket_unlock(so, 1); |
1c79356b A |
639 | return (0); |
640 | } | |
641 | ||
1c79356b | 642 | void |
91447636 | 643 | sofreelastref(so, dealloc) |
1c79356b | 644 | register struct socket *so; |
91447636 | 645 | int dealloc; |
9bccf70c A |
646 | { |
647 | int error; | |
1c79356b A |
648 | struct socket *head = so->so_head; |
649 | ||
91447636 | 650 | /*### Assume socket is locked */ |
1c79356b | 651 | |
3a60a9f5 A |
652 | /* Remove any filters - may be called more than once */ |
653 | sflt_termsock(so); | |
654 | ||
91447636 | 655 | if ((!(so->so_flags & SOF_PCBCLEARING)) || ((so->so_state & SS_NOFDREF) == 0)) { |
9bccf70c | 656 | #ifdef __APPLE__ |
0b4e3aa0 A |
657 | selthreadclear(&so->so_snd.sb_sel); |
658 | selthreadclear(&so->so_rcv.sb_sel); | |
cc9f6e38 A |
659 | so->so_rcv.sb_flags &= ~SB_UPCALL; |
660 | so->so_snd.sb_flags &= ~SB_UPCALL; | |
9bccf70c | 661 | #endif |
1c79356b | 662 | return; |
0b4e3aa0 | 663 | } |
9bccf70c | 664 | if (head != NULL) { |
91447636 | 665 | socket_lock(head, 1); |
9bccf70c A |
666 | if (so->so_state & SS_INCOMP) { |
667 | TAILQ_REMOVE(&head->so_incomp, so, so_list); | |
668 | head->so_incqlen--; | |
669 | } else if (so->so_state & SS_COMP) { | |
670 | /* | |
671 | * We must not decommission a socket that's | |
672 | * on the accept(2) queue. If we do, then | |
673 | * accept(2) may hang after select(2) indicated | |
674 | * that the listening socket was ready. | |
675 | */ | |
676 | #ifdef __APPLE__ | |
677 | selthreadclear(&so->so_snd.sb_sel); | |
678 | selthreadclear(&so->so_rcv.sb_sel); | |
cc9f6e38 A |
679 | so->so_rcv.sb_flags &= ~SB_UPCALL; |
680 | so->so_snd.sb_flags &= ~SB_UPCALL; | |
9bccf70c | 681 | #endif |
91447636 | 682 | socket_unlock(head, 1); |
9bccf70c A |
683 | return; |
684 | } else { | |
685 | panic("sofree: not queued"); | |
686 | } | |
1c79356b | 687 | head->so_qlen--; |
9bccf70c | 688 | so->so_state &= ~SS_INCOMP; |
1c79356b | 689 | so->so_head = NULL; |
91447636 | 690 | socket_unlock(head, 1); |
1c79356b | 691 | } |
9bccf70c | 692 | #ifdef __APPLE__ |
0b4e3aa0 | 693 | selthreadclear(&so->so_snd.sb_sel); |
1c79356b | 694 | sbrelease(&so->so_snd); |
9bccf70c | 695 | #endif |
1c79356b | 696 | sorflush(so); |
91447636 A |
697 | |
698 | /* 3932268: disable upcall */ | |
699 | so->so_rcv.sb_flags &= ~SB_UPCALL; | |
700 | so->so_snd.sb_flags &= ~SB_UPCALL; | |
701 | ||
702 | if (dealloc) | |
703 | sodealloc(so); | |
1c79356b A |
704 | } |
705 | ||
706 | /* | |
707 | * Close a socket on last file table reference removal. | |
708 | * Initiate disconnect if connected. | |
709 | * Free socket when disconnect complete. | |
710 | */ | |
711 | int | |
91447636 | 712 | soclose_locked(so) |
1c79356b A |
713 | register struct socket *so; |
714 | { | |
1c79356b | 715 | int error = 0; |
91447636 A |
716 | lck_mtx_t * mutex_held; |
717 | struct timespec ts; | |
1c79356b | 718 | |
91447636 A |
719 | if (so->so_usecount == 0) { |
720 | panic("soclose: so=%x refcount=0\n", so); | |
1c79356b A |
721 | } |
722 | ||
91447636 A |
723 | sflt_notify(so, sock_evt_closing, NULL); |
724 | ||
725 | if ((so->so_options & SO_ACCEPTCONN)) { | |
726 | struct socket *sp; | |
727 | ||
728 | /* We do not want new connection to be added to the connection queues */ | |
729 | so->so_options &= ~SO_ACCEPTCONN; | |
730 | ||
731 | while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) { | |
732 | /* A bit tricky here. We need to keep | |
733 | * a lock if it's a protocol global lock | |
734 | * but we want the head, not the socket locked | |
735 | * in the case of per-socket lock... | |
736 | */ | |
ff6e181a | 737 | if (so->so_proto->pr_getlock != NULL) { |
91447636 | 738 | socket_unlock(so, 0); |
ff6e181a A |
739 | socket_lock(sp, 1); |
740 | } | |
91447636 | 741 | (void) soabort(sp); |
ff6e181a | 742 | if (so->so_proto->pr_getlock != NULL) { |
91447636 | 743 | socket_unlock(sp, 1); |
ff6e181a A |
744 | socket_lock(so, 0); |
745 | } | |
91447636 A |
746 | } |
747 | ||
748 | while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) { | |
91447636 A |
749 | /* Dequeue from so_comp since sofree() won't do it */ |
750 | TAILQ_REMOVE(&so->so_comp, sp, so_list); | |
751 | so->so_qlen--; | |
ff6e181a A |
752 | |
753 | if (so->so_proto->pr_getlock != NULL) { | |
754 | socket_unlock(so, 0); | |
755 | socket_lock(sp, 1); | |
756 | } | |
757 | ||
91447636 A |
758 | sp->so_state &= ~SS_COMP; |
759 | sp->so_head = NULL; | |
760 | ||
91447636 | 761 | (void) soabort(sp); |
ff6e181a | 762 | if (so->so_proto->pr_getlock != NULL) { |
91447636 | 763 | socket_unlock(sp, 1); |
ff6e181a A |
764 | socket_lock(so, 0); |
765 | } | |
91447636 A |
766 | } |
767 | } | |
768 | if (so->so_pcb == 0) { | |
769 | /* 3915887: mark the socket as ready for dealloc */ | |
770 | so->so_flags |= SOF_PCBCLEARING; | |
1c79356b | 771 | goto discard; |
91447636 | 772 | } |
1c79356b A |
773 | if (so->so_state & SS_ISCONNECTED) { |
774 | if ((so->so_state & SS_ISDISCONNECTING) == 0) { | |
91447636 | 775 | error = sodisconnectlocked(so); |
1c79356b A |
776 | if (error) |
777 | goto drop; | |
778 | } | |
779 | if (so->so_options & SO_LINGER) { | |
780 | if ((so->so_state & SS_ISDISCONNECTING) && | |
781 | (so->so_state & SS_NBIO)) | |
782 | goto drop; | |
91447636 A |
783 | if (so->so_proto->pr_getlock != NULL) |
784 | mutex_held = (*so->so_proto->pr_getlock)(so, 0); | |
785 | else | |
786 | mutex_held = so->so_proto->pr_domain->dom_mtx; | |
1c79356b | 787 | while (so->so_state & SS_ISCONNECTED) { |
91447636 A |
788 | ts.tv_sec = (so->so_linger/100); |
789 | ts.tv_nsec = (so->so_linger % 100) * NSEC_PER_USEC * 1000 * 10; | |
790 | error = msleep((caddr_t)&so->so_timeo, mutex_held, | |
791 | PSOCK | PCATCH, "soclos", &ts); | |
792 | if (error) { | |
793 | /* It's OK when the time fires, don't report an error */ | |
794 | if (error == EWOULDBLOCK) | |
795 | error = 0; | |
1c79356b | 796 | break; |
91447636 | 797 | } |
1c79356b A |
798 | } |
799 | } | |
800 | } | |
801 | drop: | |
91447636 A |
802 | if (so->so_usecount == 0) |
803 | panic("soclose: usecount is zero so=%x\n", so); | |
804 | if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) { | |
1c79356b A |
805 | int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); |
806 | if (error == 0) | |
807 | error = error2; | |
808 | } | |
91447636 A |
809 | if (so->so_usecount <= 0) |
810 | panic("soclose: usecount is zero so=%x\n", so); | |
1c79356b | 811 | discard: |
e3027f41 | 812 | if (so->so_pcb && so->so_state & SS_NOFDREF) |
1c79356b A |
813 | panic("soclose: NOFDREF"); |
814 | so->so_state |= SS_NOFDREF; | |
9bccf70c | 815 | #ifdef __APPLE__ |
1c79356b A |
816 | so->so_proto->pr_domain->dom_refs--; |
817 | evsofree(so); | |
9bccf70c | 818 | #endif |
91447636 | 819 | so->so_usecount--; |
1c79356b | 820 | sofree(so); |
1c79356b A |
821 | return (error); |
822 | } | |
823 | ||
91447636 A |
824 | int |
825 | soclose(so) | |
826 | register struct socket *so; | |
827 | { | |
828 | int error = 0; | |
829 | socket_lock(so, 1); | |
830 | if (so->so_retaincnt == 0) | |
831 | error = soclose_locked(so); | |
832 | else { /* if the FD is going away, but socket is retained in kernel remove its reference */ | |
833 | so->so_usecount--; | |
834 | if (so->so_usecount < 2) | |
835 | panic("soclose: retaincnt non null and so=%x usecount=%x\n", so->so_usecount); | |
836 | } | |
837 | socket_unlock(so, 1); | |
838 | return (error); | |
839 | } | |
840 | ||
841 | ||
1c79356b A |
842 | /* |
843 | * Must be called at splnet... | |
844 | */ | |
91447636 | 845 | //#### Should already be locked |
1c79356b A |
846 | int |
847 | soabort(so) | |
848 | struct socket *so; | |
849 | { | |
9bccf70c | 850 | int error; |
1c79356b | 851 | |
91447636 A |
852 | #ifdef MORE_LOCKING_DEBUG |
853 | lck_mtx_t * mutex_held; | |
854 | ||
855 | if (so->so_proto->pr_getlock != NULL) | |
856 | mutex_held = (*so->so_proto->pr_getlock)(so, 0); | |
857 | else | |
858 | mutex_held = so->so_proto->pr_domain->dom_mtx; | |
859 | lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); | |
860 | #endif | |
861 | ||
9bccf70c A |
862 | error = (*so->so_proto->pr_usrreqs->pru_abort)(so); |
863 | if (error) { | |
864 | sofree(so); | |
865 | return error; | |
866 | } | |
867 | return (0); | |
1c79356b A |
868 | } |
869 | ||
870 | int | |
91447636 | 871 | soacceptlock(so, nam, dolock) |
1c79356b A |
872 | register struct socket *so; |
873 | struct sockaddr **nam; | |
91447636 | 874 | int dolock; |
9bccf70c | 875 | { |
1c79356b | 876 | int error; |
91447636 A |
877 | |
878 | if (dolock) socket_lock(so, 1); | |
1c79356b A |
879 | |
880 | if ((so->so_state & SS_NOFDREF) == 0) | |
881 | panic("soaccept: !NOFDREF"); | |
882 | so->so_state &= ~SS_NOFDREF; | |
883 | error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); | |
1c79356b | 884 | |
91447636 | 885 | if (dolock) socket_unlock(so, 1); |
1c79356b A |
886 | return (error); |
887 | } | |
91447636 A |
888 | int |
889 | soaccept(so, nam) | |
890 | register struct socket *so; | |
891 | struct sockaddr **nam; | |
892 | { | |
893 | return (soacceptlock(so, nam, 1)); | |
894 | } | |
1c79356b A |
895 | |
896 | int | |
91447636 | 897 | soconnectlock(so, nam, dolock) |
1c79356b A |
898 | register struct socket *so; |
899 | struct sockaddr *nam; | |
91447636 | 900 | int dolock; |
1c79356b A |
901 | |
902 | { | |
903 | int s; | |
904 | int error; | |
905 | struct proc *p = current_proc(); | |
1c79356b | 906 | |
91447636 A |
907 | if (dolock) socket_lock(so, 1); |
908 | ||
909 | if (so->so_options & SO_ACCEPTCONN) { | |
910 | if (dolock) socket_unlock(so, 1); | |
1c79356b | 911 | return (EOPNOTSUPP); |
91447636 | 912 | } |
1c79356b A |
913 | /* |
914 | * If protocol is connection-based, can only connect once. | |
915 | * Otherwise, if connected, try to disconnect first. | |
916 | * This allows user to disconnect by connecting to, e.g., | |
917 | * a null address. | |
918 | */ | |
919 | if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && | |
920 | ((so->so_proto->pr_flags & PR_CONNREQUIRED) || | |
91447636 | 921 | (error = sodisconnectlocked(so)))) |
1c79356b A |
922 | error = EISCONN; |
923 | else { | |
91447636 A |
924 | /* |
925 | * Run connect filter before calling protocol: | |
926 | * - non-blocking connect returns before completion; | |
927 | */ | |
928 | { | |
929 | struct socket_filter_entry *filter; | |
930 | int filtered = 0; | |
931 | error = 0; | |
932 | for (filter = so->so_filt; filter && (error == 0); | |
933 | filter = filter->sfe_next_onsocket) { | |
934 | if (filter->sfe_filter->sf_filter.sf_connect_out) { | |
935 | if (filtered == 0) { | |
936 | filtered = 1; | |
937 | sflt_use(so); | |
938 | socket_unlock(so, 0); | |
939 | } | |
940 | error = filter->sfe_filter->sf_filter.sf_connect_out( | |
941 | filter->sfe_cookie, so, nam); | |
942 | } | |
943 | } | |
944 | if (filtered != 0) { | |
945 | socket_lock(so, 0); | |
946 | sflt_unuse(so); | |
947 | } | |
948 | } | |
949 | if (error) { | |
950 | if (error == EJUSTRETURN) | |
951 | error = 0; | |
952 | if (dolock) socket_unlock(so, 1); | |
953 | return error; | |
954 | } | |
955 | ||
1c79356b | 956 | error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); |
1c79356b | 957 | } |
91447636 | 958 | if (dolock) socket_unlock(so, 1); |
1c79356b A |
959 | return (error); |
960 | } | |
961 | ||
91447636 A |
962 | int |
963 | soconnect(so, nam) | |
964 | register struct socket *so; | |
965 | struct sockaddr *nam; | |
966 | { | |
967 | return (soconnectlock(so, nam, 1)); | |
968 | } | |
969 | ||
1c79356b A |
970 | int |
971 | soconnect2(so1, so2) | |
972 | register struct socket *so1; | |
973 | struct socket *so2; | |
974 | { | |
1c79356b | 975 | int error; |
21362eb3 | 976 | //####### Assumes so1 is already locked / |
91447636 | 977 | |
21362eb3 | 978 | socket_lock(so2, 1); |
1c79356b A |
979 | |
980 | error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); | |
91447636 | 981 | |
21362eb3 | 982 | socket_unlock(so2, 1); |
1c79356b A |
983 | return (error); |
984 | } | |
985 | ||
91447636 | 986 | |
1c79356b | 987 | int |
91447636 | 988 | sodisconnectlocked(so) |
1c79356b A |
989 | register struct socket *so; |
990 | { | |
1c79356b | 991 | int error; |
1c79356b A |
992 | |
993 | if ((so->so_state & SS_ISCONNECTED) == 0) { | |
994 | error = ENOTCONN; | |
995 | goto bad; | |
996 | } | |
997 | if (so->so_state & SS_ISDISCONNECTING) { | |
998 | error = EALREADY; | |
999 | goto bad; | |
1000 | } | |
91447636 | 1001 | |
1c79356b | 1002 | error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); |
91447636 | 1003 | |
9bccf70c | 1004 | if (error == 0) { |
91447636 | 1005 | sflt_notify(so, sock_evt_disconnected, NULL); |
1c79356b A |
1006 | } |
1007 | ||
1008 | bad: | |
1c79356b A |
1009 | return (error); |
1010 | } | |
91447636 A |
1011 | //### Locking version |
1012 | int | |
1013 | sodisconnect(so) | |
1014 | register struct socket *so; | |
1015 | { | |
1016 | int error; | |
1017 | ||
1018 | socket_lock(so, 1); | |
1019 | error = sodisconnectlocked(so); | |
1020 | socket_unlock(so, 1); | |
1021 | return(error); | |
1022 | } | |
1c79356b A |
1023 | |
1024 | #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT) | |
91447636 A |
1025 | |
1026 | /* | |
1027 | * sosendcheck will lock the socket buffer if it isn't locked and | |
1028 | * verify that there is space for the data being inserted. | |
1029 | */ | |
1030 | ||
1031 | static int | |
1032 | sosendcheck( | |
1033 | struct socket *so, | |
1034 | struct sockaddr *addr, | |
1035 | long resid, | |
1036 | long clen, | |
1037 | long atomic, | |
1038 | int flags, | |
1039 | int *sblocked) | |
1040 | { | |
1041 | int error = 0; | |
1042 | long space; | |
3a60a9f5 | 1043 | int assumelock = 0; |
91447636 A |
1044 | |
1045 | restart: | |
1046 | if (*sblocked == 0) { | |
3a60a9f5 A |
1047 | if ((so->so_snd.sb_flags & SB_LOCK) != 0 && |
1048 | so->so_send_filt_thread != 0 && | |
1049 | so->so_send_filt_thread == current_thread()) { | |
1050 | /* | |
1051 | * We're being called recursively from a filter, | |
1052 | * allow this to continue. Radar 4150520. | |
1053 | * Don't set sblocked because we don't want | |
1054 | * to perform an unlock later. | |
1055 | */ | |
1056 | assumelock = 1; | |
1057 | } | |
1058 | else { | |
1059 | error = sblock(&so->so_snd, SBLOCKWAIT(flags)); | |
1060 | if (error) { | |
1061 | return error; | |
1062 | } | |
1063 | *sblocked = 1; | |
1064 | } | |
91447636 A |
1065 | } |
1066 | ||
1067 | if (so->so_state & SS_CANTSENDMORE) | |
1068 | return EPIPE; | |
1069 | ||
1070 | if (so->so_error) { | |
1071 | error = so->so_error; | |
1072 | so->so_error = 0; | |
1073 | return error; | |
1074 | } | |
1075 | ||
1076 | if ((so->so_state & SS_ISCONNECTED) == 0) { | |
1077 | /* | |
1078 | * `sendto' and `sendmsg' is allowed on a connection- | |
1079 | * based socket if it supports implied connect. | |
1080 | * Return ENOTCONN if not connected and no address is | |
1081 | * supplied. | |
1082 | */ | |
1083 | if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && | |
1084 | (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { | |
1085 | if ((so->so_state & SS_ISCONFIRMING) == 0 && | |
1086 | !(resid == 0 && clen != 0)) | |
1087 | return ENOTCONN; | |
1088 | } else if (addr == 0 && !(flags&MSG_HOLD)) | |
1089 | return (so->so_proto->pr_flags & PR_CONNREQUIRED) ? ENOTCONN : EDESTADDRREQ; | |
1090 | } | |
1091 | space = sbspace(&so->so_snd); | |
1092 | if (flags & MSG_OOB) | |
1093 | space += 1024; | |
1094 | if ((atomic && resid > so->so_snd.sb_hiwat) || | |
1095 | clen > so->so_snd.sb_hiwat) | |
1096 | return EMSGSIZE; | |
1097 | if (space < resid + clen && | |
1098 | (atomic || space < so->so_snd.sb_lowat || space < clen)) { | |
3a60a9f5 | 1099 | if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) || assumelock) { |
91447636 | 1100 | return EWOULDBLOCK; |
3a60a9f5 | 1101 | } |
91447636 A |
1102 | sbunlock(&so->so_snd, 1); |
1103 | error = sbwait(&so->so_snd); | |
1104 | if (error) { | |
1105 | return error; | |
1106 | } | |
1107 | goto restart; | |
1108 | } | |
1109 | ||
1110 | return 0; | |
1111 | } | |
1112 | ||
1c79356b A |
1113 | /* |
1114 | * Send on a socket. | |
1115 | * If send must go all at once and message is larger than | |
1116 | * send buffering, then hard error. | |
1117 | * Lock against other senders. | |
1118 | * If must go all at once and not enough room now, then | |
1119 | * inform user that this would block and do nothing. | |
1120 | * Otherwise, if nonblocking, send as much as possible. | |
1121 | * The data to be sent is described by "uio" if nonzero, | |
1122 | * otherwise by the mbuf chain "top" (which must be null | |
1123 | * if uio is not). Data provided in mbuf chain must be small | |
1124 | * enough to send all at once. | |
1125 | * | |
1126 | * Returns nonzero on error, timeout or signal; callers | |
1127 | * must check for short counts if EINTR/ERESTART are returned. | |
1128 | * Data and control buffers are freed on return. | |
1129 | * Experiment: | |
1130 | * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf | |
1131 | * MSG_SEND: go thru as for MSG_HOLD on current fragment, then | |
1132 | * point at the mbuf chain being constructed and go from there. | |
1133 | */ | |
1134 | int | |
1135 | sosend(so, addr, uio, top, control, flags) | |
1136 | register struct socket *so; | |
1137 | struct sockaddr *addr; | |
1138 | struct uio *uio; | |
1139 | struct mbuf *top; | |
1140 | struct mbuf *control; | |
1141 | int flags; | |
1142 | ||
1143 | { | |
1144 | struct mbuf **mp; | |
fa4905b1 | 1145 | register struct mbuf *m, *freelist = NULL; |
1c79356b | 1146 | register long space, len, resid; |
91447636 | 1147 | int clen = 0, error, dontroute, mlen, sendflags; |
1c79356b | 1148 | int atomic = sosendallatonce(so) || top; |
91447636 | 1149 | int sblocked = 0; |
1c79356b | 1150 | struct proc *p = current_proc(); |
1c79356b A |
1151 | |
1152 | if (uio) | |
91447636 A |
1153 | // LP64todo - fix this! |
1154 | resid = uio_resid(uio); | |
1c79356b A |
1155 | else |
1156 | resid = top->m_pkthdr.len; | |
1157 | ||
1158 | KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START), | |
1159 | so, | |
1160 | resid, | |
1161 | so->so_snd.sb_cc, | |
1162 | so->so_snd.sb_lowat, | |
1163 | so->so_snd.sb_hiwat); | |
1164 | ||
91447636 A |
1165 | socket_lock(so, 1); |
1166 | ||
1c79356b A |
1167 | /* |
1168 | * In theory resid should be unsigned. | |
1169 | * However, space must be signed, as it might be less than 0 | |
1170 | * if we over-committed, and we must use a signed comparison | |
1171 | * of space and resid. On the other hand, a negative resid | |
1172 | * causes us to loop sending 0-length segments to the protocol. | |
1173 | * | |
1174 | * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM | |
1175 | * type sockets since that's an error. | |
1176 | */ | |
91447636 | 1177 | if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { |
1c79356b | 1178 | error = EINVAL; |
91447636 | 1179 | socket_unlock(so, 1); |
1c79356b A |
1180 | goto out; |
1181 | } | |
1182 | ||
1183 | dontroute = | |
1184 | (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && | |
1185 | (so->so_proto->pr_flags & PR_ATOMIC); | |
1186 | if (p) | |
1187 | p->p_stats->p_ru.ru_msgsnd++; | |
1188 | if (control) | |
1189 | clen = control->m_len; | |
1c79356b | 1190 | |
1c79356b | 1191 | do { |
91447636 A |
1192 | error = sosendcheck(so, addr, resid, clen, atomic, flags, &sblocked); |
1193 | if (error) { | |
3a60a9f5 | 1194 | goto release; |
1c79356b | 1195 | } |
1c79356b | 1196 | mp = ⊤ |
91447636 | 1197 | space = sbspace(&so->so_snd) - clen + ((flags & MSG_OOB) ? 1024 : 0); |
fa4905b1 | 1198 | |
1c79356b | 1199 | do { |
fa4905b1 | 1200 | |
91447636 A |
1201 | if (uio == NULL) { |
1202 | /* | |
1203 | * Data is prepackaged in "top". | |
1204 | */ | |
1205 | resid = 0; | |
1c79356b A |
1206 | if (flags & MSG_EOR) |
1207 | top->m_flags |= M_EOR; | |
91447636 A |
1208 | } else { |
1209 | int chainlength; | |
1210 | int bytes_to_copy; | |
1211 | ||
1212 | bytes_to_copy = min(resid, space); | |
1213 | ||
1214 | if (sosendminchain > 0) { | |
1215 | chainlength = 0; | |
1216 | } else | |
1217 | chainlength = sosendmaxchain; | |
1218 | ||
1219 | socket_unlock(so, 0); | |
1220 | ||
1221 | do { | |
1222 | int num_needed; | |
1223 | int hdrs_needed = (top == 0) ? 1 : 0; | |
1224 | ||
1225 | /* | |
1226 | * try to maintain a local cache of mbuf clusters needed to complete this write | |
1227 | * the list is further limited to the number that are currently needed to fill the socket | |
1228 | * this mechanism allows a large number of mbufs/clusters to be grabbed under a single | |
1229 | * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs | |
1230 | * if we fail early (or miscalcluate the number needed) make sure to release any clusters | |
1231 | * we haven't yet consumed. | |
1232 | */ | |
1233 | if (freelist == NULL && bytes_to_copy > MCLBYTES) { | |
1234 | num_needed = bytes_to_copy / NBPG; | |
1235 | ||
1236 | if ((bytes_to_copy - (num_needed * NBPG)) >= MINCLSIZE) | |
1237 | num_needed++; | |
1238 | ||
1239 | freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, NBPG); | |
1240 | /* Fall back to cluster size if allocation failed */ | |
1241 | } | |
1242 | ||
1243 | if (freelist == NULL && bytes_to_copy > MINCLSIZE) { | |
1244 | num_needed = bytes_to_copy / MCLBYTES; | |
1245 | ||
1246 | if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE) | |
1247 | num_needed++; | |
1248 | ||
1249 | freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, MCLBYTES); | |
1250 | /* Fall back to a single mbuf if allocation failed */ | |
1251 | } | |
1252 | ||
1253 | if (freelist == NULL) { | |
1254 | if (top == 0) | |
1255 | MGETHDR(freelist, M_WAIT, MT_DATA); | |
1256 | else | |
1257 | MGET(freelist, M_WAIT, MT_DATA); | |
1258 | ||
1259 | if (freelist == NULL) { | |
1260 | error = ENOBUFS; | |
1261 | socket_lock(so, 0); | |
3a60a9f5 | 1262 | goto release; |
91447636 A |
1263 | } |
1264 | /* | |
1265 | * For datagram protocols, leave room | |
1266 | * for protocol headers in first mbuf. | |
1267 | */ | |
1268 | if (atomic && top == 0 && bytes_to_copy < MHLEN) | |
1269 | MH_ALIGN(freelist, bytes_to_copy); | |
1270 | } | |
1271 | m = freelist; | |
1272 | freelist = m->m_next; | |
1273 | m->m_next = NULL; | |
1274 | ||
1275 | if ((m->m_flags & M_EXT)) | |
1276 | mlen = m->m_ext.ext_size; | |
1277 | else if ((m->m_flags & M_PKTHDR)) | |
1278 | mlen = MHLEN - m_leadingspace(m); | |
1279 | else | |
1280 | mlen = MLEN; | |
1281 | len = min(mlen, bytes_to_copy); | |
1282 | ||
1283 | chainlength += len; | |
1284 | ||
1285 | space -= len; | |
fa4905b1 | 1286 | |
91447636 A |
1287 | error = uiomove(mtod(m, caddr_t), (int)len, uio); |
1288 | ||
1289 | // LP64todo - fix this! | |
1290 | resid = uio_resid(uio); | |
1291 | ||
1292 | m->m_len = len; | |
1293 | *mp = m; | |
1294 | top->m_pkthdr.len += len; | |
1295 | if (error) | |
1296 | break; | |
1297 | mp = &m->m_next; | |
1298 | if (resid <= 0) { | |
1299 | if (flags & MSG_EOR) | |
1300 | top->m_flags |= M_EOR; | |
1301 | break; | |
1302 | } | |
1303 | bytes_to_copy = min(resid, space); | |
1304 | ||
1305 | } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE)); | |
1306 | ||
1307 | socket_lock(so, 0); | |
1308 | ||
1309 | if (error) | |
1310 | goto release; | |
1311 | } | |
1c79356b A |
1312 | |
1313 | if (flags & (MSG_HOLD|MSG_SEND)) | |
3a60a9f5 A |
1314 | { |
1315 | /* Enqueue for later, go away if HOLD */ | |
1316 | register struct mbuf *mb1; | |
1317 | if (so->so_temp && (flags & MSG_FLUSH)) | |
1318 | { | |
1319 | m_freem(so->so_temp); | |
1320 | so->so_temp = NULL; | |
1321 | } | |
1322 | if (so->so_temp) | |
1323 | so->so_tail->m_next = top; | |
1324 | else | |
1325 | so->so_temp = top; | |
1326 | mb1 = top; | |
1327 | while (mb1->m_next) | |
1328 | mb1 = mb1->m_next; | |
1329 | so->so_tail = mb1; | |
1330 | if (flags & MSG_HOLD) | |
1331 | { | |
1332 | top = NULL; | |
1333 | goto release; | |
1334 | } | |
1335 | top = so->so_temp; | |
1c79356b A |
1336 | } |
1337 | if (dontroute) | |
1338 | so->so_options |= SO_DONTROUTE; | |
1c79356b A |
1339 | /* Compute flags here, for pru_send and NKEs */ |
1340 | sendflags = (flags & MSG_OOB) ? PRUS_OOB : | |
1341 | /* | |
1342 | * If the user set MSG_EOF, the protocol | |
1343 | * understands this flag and nothing left to | |
1344 | * send then use PRU_SEND_EOF instead of PRU_SEND. | |
1345 | */ | |
1346 | ((flags & MSG_EOF) && | |
1347 | (so->so_proto->pr_flags & PR_IMPLOPCL) && | |
1348 | (resid <= 0)) ? | |
1349 | PRUS_EOF : | |
1350 | /* If there is more to send set PRUS_MORETOCOME */ | |
1351 | (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0; | |
91447636 A |
1352 | |
1353 | /* | |
1354 | * Socket filter processing | |
1355 | */ | |
1356 | { | |
1357 | struct socket_filter_entry *filter; | |
1358 | int filtered; | |
1359 | ||
1360 | filtered = 0; | |
1361 | error = 0; | |
1362 | for (filter = so->so_filt; filter && (error == 0); | |
1363 | filter = filter->sfe_next_onsocket) { | |
1364 | if (filter->sfe_filter->sf_filter.sf_data_out) { | |
1365 | int so_flags = 0; | |
1366 | if (filtered == 0) { | |
1367 | filtered = 1; | |
3a60a9f5 | 1368 | so->so_send_filt_thread = current_thread(); |
ff6e181a | 1369 | sflt_use(so); |
91447636 A |
1370 | socket_unlock(so, 0); |
1371 | so_flags = (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0; | |
1372 | } | |
1373 | error = filter->sfe_filter->sf_filter.sf_data_out( | |
1374 | filter->sfe_cookie, so, addr, &top, &control, so_flags); | |
1375 | } | |
1376 | } | |
1377 | ||
1378 | if (filtered) { | |
1379 | /* | |
1380 | * At this point, we've run at least one filter. | |
1381 | * The socket is unlocked as is the socket buffer. | |
1382 | */ | |
1383 | socket_lock(so, 0); | |
ff6e181a | 1384 | sflt_unuse(so); |
3a60a9f5 | 1385 | so->so_send_filt_thread = 0; |
91447636 | 1386 | if (error) { |
3a60a9f5 A |
1387 | if (error == EJUSTRETURN) { |
1388 | error = 0; | |
1389 | clen = 0; | |
1390 | control = 0; | |
1391 | top = 0; | |
91447636 | 1392 | } |
3a60a9f5 A |
1393 | |
1394 | goto release; | |
1c79356b | 1395 | } |
1c79356b A |
1396 | } |
1397 | } | |
91447636 A |
1398 | /* |
1399 | * End Socket filter processing | |
1400 | */ | |
1401 | ||
1402 | if (error == EJUSTRETURN) { | |
1403 | /* A socket filter handled this data */ | |
1404 | error = 0; | |
1405 | } | |
1406 | else { | |
1407 | error = (*so->so_proto->pr_usrreqs->pru_send)(so, | |
1408 | sendflags, top, addr, control, p); | |
1409 | } | |
9bccf70c | 1410 | #ifdef __APPLE__ |
1c79356b A |
1411 | if (flags & MSG_SEND) |
1412 | so->so_temp = NULL; | |
9bccf70c | 1413 | #endif |
1c79356b A |
1414 | if (dontroute) |
1415 | so->so_options &= ~SO_DONTROUTE; | |
1416 | clen = 0; | |
1417 | control = 0; | |
1418 | top = 0; | |
1419 | mp = ⊤ | |
1420 | if (error) | |
1421 | goto release; | |
1422 | } while (resid && space > 0); | |
1423 | } while (resid); | |
1424 | ||
1425 | release: | |
3a60a9f5 A |
1426 | if (sblocked) |
1427 | sbunlock(&so->so_snd, 0); /* will unlock socket */ | |
1428 | else | |
1429 | socket_unlock(so, 1); | |
1c79356b A |
1430 | out: |
1431 | if (top) | |
1432 | m_freem(top); | |
1433 | if (control) | |
1434 | m_freem(control); | |
fa4905b1 A |
1435 | if (freelist) |
1436 | m_freem_list(freelist); | |
1c79356b A |
1437 | |
1438 | KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END, | |
1439 | so, | |
1440 | resid, | |
1441 | so->so_snd.sb_cc, | |
1442 | space, | |
1443 | error); | |
1444 | ||
1445 | return (error); | |
1446 | } | |
1447 | ||
1448 | /* | |
1449 | * Implement receive operations on a socket. | |
1450 | * We depend on the way that records are added to the sockbuf | |
1451 | * by sbappend*. In particular, each record (mbufs linked through m_next) | |
1452 | * must begin with an address if the protocol so specifies, | |
1453 | * followed by an optional mbuf or mbufs containing ancillary data, | |
1454 | * and then zero or more mbufs of data. | |
1455 | * In order to avoid blocking network interrupts for the entire time here, | |
1456 | * we splx() while doing the actual copy to user space. | |
1457 | * Although the sockbuf is locked, new data may still be appended, | |
1458 | * and thus we must maintain consistency of the sockbuf during that time. | |
1459 | * | |
1460 | * The caller may receive the data as a single mbuf chain by supplying | |
1461 | * an mbuf **mp0 for use in returning the chain. The uio is then used | |
1462 | * only for the count in uio_resid. | |
1463 | */ | |
1464 | int | |
1465 | soreceive(so, psa, uio, mp0, controlp, flagsp) | |
1466 | register struct socket *so; | |
1467 | struct sockaddr **psa; | |
1468 | struct uio *uio; | |
1469 | struct mbuf **mp0; | |
1470 | struct mbuf **controlp; | |
1471 | int *flagsp; | |
1472 | { | |
91447636 A |
1473 | register struct mbuf *m, **mp, *ml = NULL; |
1474 | register int flags, len, error, offset; | |
1c79356b A |
1475 | struct protosw *pr = so->so_proto; |
1476 | struct mbuf *nextrecord; | |
1477 | int moff, type = 0; | |
91447636 A |
1478 | // LP64todo - fix this! |
1479 | int orig_resid = uio_resid(uio); | |
55e303ae A |
1480 | volatile struct mbuf *free_list; |
1481 | volatile int delayed_copy_len; | |
1482 | int can_delay; | |
1483 | int need_event; | |
1484 | struct proc *p = current_proc(); | |
1485 | ||
1486 | ||
91447636 | 1487 | // LP64todo - fix this! |
1c79356b A |
1488 | KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START, |
1489 | so, | |
91447636 | 1490 | uio_resid(uio), |
1c79356b A |
1491 | so->so_rcv.sb_cc, |
1492 | so->so_rcv.sb_lowat, | |
1493 | so->so_rcv.sb_hiwat); | |
1494 | ||
91447636 | 1495 | socket_lock(so, 1); |
1c79356b | 1496 | |
91447636 A |
1497 | #ifdef MORE_LOCKING_DEBUG |
1498 | if (so->so_usecount == 1) | |
1499 | panic("soreceive: so=%x no other reference on socket\n", so); | |
1500 | #endif | |
1c79356b A |
1501 | mp = mp0; |
1502 | if (psa) | |
1503 | *psa = 0; | |
1504 | if (controlp) | |
1505 | *controlp = 0; | |
1506 | if (flagsp) | |
1507 | flags = *flagsp &~ MSG_EOR; | |
1508 | else | |
1509 | flags = 0; | |
1510 | /* | |
1511 | * When SO_WANTOOBFLAG is set we try to get out-of-band data | |
1512 | * regardless of the flags argument. Here is the case were | |
1513 | * out-of-band data is not inline. | |
1514 | */ | |
1515 | if ((flags & MSG_OOB) || | |
1516 | ((so->so_options & SO_WANTOOBFLAG) != 0 && | |
1517 | (so->so_options & SO_OOBINLINE) == 0 && | |
1518 | (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) { | |
1519 | m = m_get(M_WAIT, MT_DATA); | |
55e303ae | 1520 | if (m == NULL) { |
91447636 | 1521 | socket_unlock(so, 1); |
55e303ae | 1522 | KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, ENOBUFS,0,0,0,0); |
9bccf70c | 1523 | return (ENOBUFS); |
55e303ae | 1524 | } |
1c79356b A |
1525 | error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); |
1526 | if (error) | |
1527 | goto bad; | |
91447636 | 1528 | socket_unlock(so, 0); |
1c79356b | 1529 | do { |
91447636 | 1530 | // LP64todo - fix this! |
1c79356b | 1531 | error = uiomove(mtod(m, caddr_t), |
91447636 | 1532 | (int) min(uio_resid(uio), m->m_len), uio); |
1c79356b | 1533 | m = m_free(m); |
91447636 A |
1534 | } while (uio_resid(uio) && error == 0 && m); |
1535 | socket_lock(so, 0); | |
1c79356b A |
1536 | bad: |
1537 | if (m) | |
1538 | m_freem(m); | |
9bccf70c A |
1539 | #ifdef __APPLE__ |
1540 | if ((so->so_options & SO_WANTOOBFLAG) != 0) { | |
1541 | if (error == EWOULDBLOCK || error == EINVAL) { | |
1542 | /* | |
1543 | * Let's try to get normal data: | |
1544 | * EWOULDBLOCK: out-of-band data not receive yet; | |
1545 | * EINVAL: out-of-band data already read. | |
1546 | */ | |
1547 | error = 0; | |
1548 | goto nooob; | |
1549 | } else if (error == 0 && flagsp) | |
1550 | *flagsp |= MSG_OOB; | |
91447636 A |
1551 | } |
1552 | socket_unlock(so, 1); | |
1c79356b | 1553 | KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0); |
9bccf70c | 1554 | #endif |
1c79356b A |
1555 | return (error); |
1556 | } | |
1557 | nooob: | |
1558 | if (mp) | |
1559 | *mp = (struct mbuf *)0; | |
91447636 | 1560 | if (so->so_state & SS_ISCONFIRMING && uio_resid(uio)) |
1c79356b A |
1561 | (*pr->pr_usrreqs->pru_rcvd)(so, 0); |
1562 | ||
55e303ae A |
1563 | |
1564 | free_list = (struct mbuf *)0; | |
1565 | delayed_copy_len = 0; | |
1c79356b | 1566 | restart: |
91447636 A |
1567 | #ifdef MORE_LOCKING_DEBUG |
1568 | if (so->so_usecount <= 1) | |
1569 | printf("soreceive: sblock so=%x ref=%d on socket\n", so, so->so_usecount); | |
1570 | #endif | |
9bccf70c A |
1571 | error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); |
1572 | if (error) { | |
91447636 | 1573 | socket_unlock(so, 1); |
1c79356b A |
1574 | KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0); |
1575 | return (error); | |
1576 | } | |
1c79356b A |
1577 | |
1578 | m = so->so_rcv.sb_mb; | |
1579 | /* | |
1580 | * If we have less data than requested, block awaiting more | |
1581 | * (subject to any timeout) if: | |
1582 | * 1. the current count is less than the low water mark, or | |
1583 | * 2. MSG_WAITALL is set, and it is possible to do the entire | |
1584 | * receive operation at once if we block (resid <= hiwat). | |
1585 | * 3. MSG_DONTWAIT is not set | |
1586 | * If MSG_WAITALL is set but resid is larger than the receive buffer, | |
1587 | * we have to do the receive in sections, and thus risk returning | |
1588 | * a short count if a timeout or signal occurs after we start. | |
1589 | */ | |
1590 | if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && | |
91447636 | 1591 | so->so_rcv.sb_cc < uio_resid(uio)) && |
55e303ae | 1592 | (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || |
91447636 | 1593 | ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat)) && |
1c79356b | 1594 | m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { |
55e303ae | 1595 | |
1c79356b A |
1596 | KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1")); |
1597 | if (so->so_error) { | |
1598 | if (m) | |
1599 | goto dontblock; | |
1600 | error = so->so_error; | |
1601 | if ((flags & MSG_PEEK) == 0) | |
1602 | so->so_error = 0; | |
1603 | goto release; | |
1604 | } | |
1605 | if (so->so_state & SS_CANTRCVMORE) { | |
1606 | if (m) | |
1607 | goto dontblock; | |
1608 | else | |
1609 | goto release; | |
1610 | } | |
1611 | for (; m; m = m->m_next) | |
1612 | if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { | |
1613 | m = so->so_rcv.sb_mb; | |
1614 | goto dontblock; | |
1615 | } | |
1616 | if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && | |
1617 | (so->so_proto->pr_flags & PR_CONNREQUIRED)) { | |
1618 | error = ENOTCONN; | |
1619 | goto release; | |
1620 | } | |
91447636 | 1621 | if (uio_resid(uio) == 0) |
1c79356b | 1622 | goto release; |
91447636 | 1623 | if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) { |
1c79356b A |
1624 | error = EWOULDBLOCK; |
1625 | goto release; | |
1626 | } | |
91447636 A |
1627 | sbunlock(&so->so_rcv, 1); |
1628 | #ifdef EVEN_MORE_LOCKING_DEBUG | |
1c79356b A |
1629 | if (socket_debug) |
1630 | printf("Waiting for socket data\n"); | |
91447636 | 1631 | #endif |
55e303ae | 1632 | |
1c79356b | 1633 | error = sbwait(&so->so_rcv); |
91447636 | 1634 | #ifdef EVEN_MORE_LOCKING_DEBUG |
1c79356b A |
1635 | if (socket_debug) |
1636 | printf("SORECEIVE - sbwait returned %d\n", error); | |
91447636 A |
1637 | #endif |
1638 | if (so->so_usecount < 1) | |
1639 | panic("soreceive: after 2nd sblock so=%x ref=%d on socket\n", so, so->so_usecount); | |
9bccf70c | 1640 | if (error) { |
91447636 | 1641 | socket_unlock(so, 1); |
1c79356b A |
1642 | KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0); |
1643 | return (error); | |
1644 | } | |
1645 | goto restart; | |
1646 | } | |
1647 | dontblock: | |
9bccf70c | 1648 | #ifndef __APPLE__ |
1c79356b A |
1649 | if (uio->uio_procp) |
1650 | uio->uio_procp->p_stats->p_ru.ru_msgrcv++; | |
55e303ae A |
1651 | #else /* __APPLE__ */ |
1652 | /* | |
1653 | * 2207985 | |
1654 | * This should be uio->uio-procp; however, some callers of this | |
1655 | * function use auto variables with stack garbage, and fail to | |
1656 | * fill out the uio structure properly. | |
1657 | */ | |
1658 | if (p) | |
1659 | p->p_stats->p_ru.ru_msgrcv++; | |
1660 | #endif /* __APPLE__ */ | |
1c79356b A |
1661 | nextrecord = m->m_nextpkt; |
1662 | if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) { | |
1663 | KASSERT(m->m_type == MT_SONAME, ("receive 1a")); | |
1664 | orig_resid = 0; | |
4a249263 | 1665 | if (psa) { |
1c79356b A |
1666 | *psa = dup_sockaddr(mtod(m, struct sockaddr *), |
1667 | mp0 == 0); | |
4a249263 A |
1668 | if ((*psa == 0) && (flags & MSG_NEEDSA)) { |
1669 | error = EWOULDBLOCK; | |
1670 | goto release; | |
1671 | } | |
1672 | } | |
1c79356b A |
1673 | if (flags & MSG_PEEK) { |
1674 | m = m->m_next; | |
1675 | } else { | |
1676 | sbfree(&so->so_rcv, m); | |
91447636 A |
1677 | if (m->m_next == 0 && so->so_rcv.sb_cc != 0) |
1678 | panic("soreceive: about to create invalid socketbuf"); | |
1c79356b A |
1679 | MFREE(m, so->so_rcv.sb_mb); |
1680 | m = so->so_rcv.sb_mb; | |
1681 | } | |
1682 | } | |
1683 | while (m && m->m_type == MT_CONTROL && error == 0) { | |
1684 | if (flags & MSG_PEEK) { | |
1685 | if (controlp) | |
1686 | *controlp = m_copy(m, 0, m->m_len); | |
1687 | m = m->m_next; | |
1688 | } else { | |
1689 | sbfree(&so->so_rcv, m); | |
1690 | if (controlp) { | |
1691 | if (pr->pr_domain->dom_externalize && | |
1692 | mtod(m, struct cmsghdr *)->cmsg_type == | |
91447636 A |
1693 | SCM_RIGHTS) { |
1694 | socket_unlock(so, 0); /* release socket lock: see 3903171 */ | |
1c79356b | 1695 | error = (*pr->pr_domain->dom_externalize)(m); |
91447636 A |
1696 | socket_lock(so, 0); |
1697 | } | |
1c79356b | 1698 | *controlp = m; |
91447636 A |
1699 | if (m->m_next == 0 && so->so_rcv.sb_cc != 0) |
1700 | panic("soreceive: so->so_rcv.sb_mb->m_next == 0 && so->so_rcv.sb_cc != 0"); | |
1c79356b A |
1701 | so->so_rcv.sb_mb = m->m_next; |
1702 | m->m_next = 0; | |
1703 | m = so->so_rcv.sb_mb; | |
1704 | } else { | |
1705 | MFREE(m, so->so_rcv.sb_mb); | |
1706 | m = so->so_rcv.sb_mb; | |
1707 | } | |
1708 | } | |
1709 | if (controlp) { | |
1710 | orig_resid = 0; | |
1711 | controlp = &(*controlp)->m_next; | |
1712 | } | |
1713 | } | |
1714 | if (m) { | |
1715 | if ((flags & MSG_PEEK) == 0) | |
1716 | m->m_nextpkt = nextrecord; | |
1717 | type = m->m_type; | |
1718 | if (type == MT_OOBDATA) | |
1719 | flags |= MSG_OOB; | |
1720 | } | |
1721 | moff = 0; | |
1722 | offset = 0; | |
fa4905b1 | 1723 | |
91447636 | 1724 | if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy) |
55e303ae A |
1725 | can_delay = 1; |
1726 | else | |
1727 | can_delay = 0; | |
1728 | ||
1729 | need_event = 0; | |
fa4905b1 | 1730 | |
91447636 | 1731 | while (m && (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) { |
1c79356b A |
1732 | if (m->m_type == MT_OOBDATA) { |
1733 | if (type != MT_OOBDATA) | |
1734 | break; | |
1735 | } else if (type == MT_OOBDATA) | |
1736 | break; | |
9bccf70c | 1737 | #ifndef __APPLE__ |
1c79356b A |
1738 | /* |
1739 | * This assertion needs rework. The trouble is Appletalk is uses many | |
1740 | * mbuf types (NOT listed in mbuf.h!) which will trigger this panic. | |
1741 | * For now just remove the assertion... CSM 9/98 | |
1742 | */ | |
1743 | else | |
1744 | KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER, | |
1745 | ("receive 3")); | |
9bccf70c A |
1746 | #else |
1747 | /* | |
1748 | * Make sure to allways set MSG_OOB event when getting | |
1749 | * out of band data inline. | |
1750 | */ | |
1c79356b | 1751 | if ((so->so_options & SO_WANTOOBFLAG) != 0 && |
9bccf70c A |
1752 | (so->so_options & SO_OOBINLINE) != 0 && |
1753 | (so->so_state & SS_RCVATMARK) != 0) { | |
1754 | flags |= MSG_OOB; | |
1755 | } | |
1756 | #endif | |
1c79356b | 1757 | so->so_state &= ~SS_RCVATMARK; |
91447636 A |
1758 | // LP64todo - fix this! |
1759 | len = uio_resid(uio) - delayed_copy_len; | |
1c79356b A |
1760 | if (so->so_oobmark && len > so->so_oobmark - offset) |
1761 | len = so->so_oobmark - offset; | |
1762 | if (len > m->m_len - moff) | |
1763 | len = m->m_len - moff; | |
1764 | /* | |
1765 | * If mp is set, just pass back the mbufs. | |
1766 | * Otherwise copy them out via the uio, then free. | |
1767 | * Sockbuf must be consistent here (points to current mbuf, | |
1768 | * it points to next record) when we drop priority; | |
1769 | * we must note any additions to the sockbuf when we | |
1770 | * block interrupts again. | |
1771 | */ | |
1772 | if (mp == 0) { | |
55e303ae A |
1773 | if (can_delay && len == m->m_len) { |
1774 | /* | |
1775 | * only delay the copy if we're consuming the | |
1776 | * mbuf and we're NOT in MSG_PEEK mode | |
1777 | * and we have enough data to make it worthwile | |
1778 | * to drop and retake the funnel... can_delay | |
1779 | * reflects the state of the 2 latter constraints | |
1780 | * moff should always be zero in these cases | |
1781 | */ | |
1782 | delayed_copy_len += len; | |
1783 | } else { | |
55e303ae A |
1784 | |
1785 | if (delayed_copy_len) { | |
91447636 | 1786 | error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len); |
55e303ae A |
1787 | |
1788 | if (error) { | |
55e303ae A |
1789 | goto release; |
1790 | } | |
1791 | if (m != so->so_rcv.sb_mb) { | |
1792 | /* | |
1793 | * can only get here if MSG_PEEK is not set | |
1794 | * therefore, m should point at the head of the rcv queue... | |
1795 | * if it doesn't, it means something drastically changed | |
1796 | * while we were out from behind the funnel in sodelayed_copy... | |
1797 | * perhaps a RST on the stream... in any event, the stream has | |
1798 | * been interrupted... it's probably best just to return | |
1799 | * whatever data we've moved and let the caller sort it out... | |
1800 | */ | |
1801 | break; | |
1802 | } | |
1803 | } | |
91447636 | 1804 | socket_unlock(so, 0); |
55e303ae | 1805 | error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); |
91447636 | 1806 | socket_lock(so, 0); |
55e303ae | 1807 | |
55e303ae A |
1808 | if (error) |
1809 | goto release; | |
1810 | } | |
1c79356b | 1811 | } else |
91447636 | 1812 | uio_setresid(uio, (uio_resid(uio) - len)); |
55e303ae | 1813 | |
1c79356b A |
1814 | if (len == m->m_len - moff) { |
1815 | if (m->m_flags & M_EOR) | |
1816 | flags |= MSG_EOR; | |
1817 | if (flags & MSG_PEEK) { | |
1818 | m = m->m_next; | |
1819 | moff = 0; | |
1820 | } else { | |
1821 | nextrecord = m->m_nextpkt; | |
1822 | sbfree(&so->so_rcv, m); | |
91447636 | 1823 | m->m_nextpkt = NULL; |
55e303ae | 1824 | |
1c79356b A |
1825 | if (mp) { |
1826 | *mp = m; | |
1827 | mp = &m->m_next; | |
1828 | so->so_rcv.sb_mb = m = m->m_next; | |
1829 | *mp = (struct mbuf *)0; | |
1830 | } else { | |
55e303ae A |
1831 | if (free_list == NULL) |
1832 | free_list = m; | |
1833 | else | |
14353aa8 A |
1834 | ml->m_next = m; |
1835 | ml = m; | |
1836 | so->so_rcv.sb_mb = m = m->m_next; | |
1837 | ml->m_next = 0; | |
1c79356b A |
1838 | } |
1839 | if (m) | |
1840 | m->m_nextpkt = nextrecord; | |
1841 | } | |
1842 | } else { | |
1843 | if (flags & MSG_PEEK) | |
1844 | moff += len; | |
1845 | else { | |
1846 | if (mp) | |
1847 | *mp = m_copym(m, 0, len, M_WAIT); | |
1848 | m->m_data += len; | |
1849 | m->m_len -= len; | |
1850 | so->so_rcv.sb_cc -= len; | |
1851 | } | |
1852 | } | |
1853 | if (so->so_oobmark) { | |
1854 | if ((flags & MSG_PEEK) == 0) { | |
1855 | so->so_oobmark -= len; | |
1856 | if (so->so_oobmark == 0) { | |
1857 | so->so_state |= SS_RCVATMARK; | |
55e303ae A |
1858 | /* |
1859 | * delay posting the actual event until after | |
1860 | * any delayed copy processing has finished | |
1861 | */ | |
1862 | need_event = 1; | |
1c79356b A |
1863 | break; |
1864 | } | |
1865 | } else { | |
1866 | offset += len; | |
1867 | if (offset == so->so_oobmark) | |
1868 | break; | |
1869 | } | |
1870 | } | |
91447636 | 1871 | if (flags & MSG_EOR) |
1c79356b A |
1872 | break; |
1873 | /* | |
55e303ae | 1874 | * If the MSG_WAITALL or MSG_WAITSTREAM flag is set (for non-atomic socket), |
1c79356b A |
1875 | * we must not quit until "uio->uio_resid == 0" or an error |
1876 | * termination. If a signal/timeout occurs, return | |
1877 | * with a short count but without error. | |
1878 | * Keep sockbuf locked against other readers. | |
1879 | */ | |
91447636 | 1880 | while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && (uio_resid(uio) - delayed_copy_len) > 0 && |
1c79356b A |
1881 | !sosendallatonce(so) && !nextrecord) { |
1882 | if (so->so_error || so->so_state & SS_CANTRCVMORE) | |
55e303ae | 1883 | goto release; |
fa4905b1 | 1884 | |
91447636 | 1885 | if (pr->pr_flags & PR_WANTRCVD && so->so_pcb && (((struct inpcb *)so->so_pcb)->inp_state != INPCB_STATE_DEAD)) |
55e303ae A |
1886 | (*pr->pr_usrreqs->pru_rcvd)(so, flags); |
1887 | if (sbwait(&so->so_rcv)) { | |
1888 | error = 0; | |
1889 | goto release; | |
fa4905b1 | 1890 | } |
55e303ae A |
1891 | /* |
1892 | * have to wait until after we get back from the sbwait to do the copy because | |
1893 | * we will drop the funnel if we have enough data that has been delayed... by dropping | |
1894 | * the funnel we open up a window allowing the netisr thread to process the incoming packets | |
1895 | * and to change the state of this socket... we're issuing the sbwait because | |
1896 | * the socket is empty and we're expecting the netisr thread to wake us up when more | |
1897 | * packets arrive... if we allow that processing to happen and then sbwait, we | |
1898 | * could stall forever with packets sitting in the socket if no further packets | |
1899 | * arrive from the remote side. | |
1900 | * | |
1901 | * we want to copy before we've collected all the data to satisfy this request to | |
1902 | * allow the copy to overlap the incoming packet processing on an MP system | |
1903 | */ | |
1904 | if (delayed_copy_len > sorecvmincopy && (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) { | |
1905 | ||
91447636 | 1906 | error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len); |
55e303ae A |
1907 | |
1908 | if (error) | |
1909 | goto release; | |
1c79356b A |
1910 | } |
1911 | m = so->so_rcv.sb_mb; | |
fa4905b1 | 1912 | if (m) { |
1c79356b | 1913 | nextrecord = m->m_nextpkt; |
fa4905b1 | 1914 | } |
1c79356b A |
1915 | } |
1916 | } | |
91447636 A |
1917 | #ifdef MORE_LOCKING_DEBUG |
1918 | if (so->so_usecount <= 1) | |
1919 | panic("soreceive: after big while so=%x ref=%d on socket\n", so, so->so_usecount); | |
1920 | #endif | |
1c79356b A |
1921 | |
1922 | if (m && pr->pr_flags & PR_ATOMIC) { | |
9bccf70c | 1923 | #ifdef __APPLE__ |
1c79356b A |
1924 | if (so->so_options & SO_DONTTRUNC) |
1925 | flags |= MSG_RCVMORE; | |
9bccf70c A |
1926 | else { |
1927 | #endif | |
1928 | flags |= MSG_TRUNC; | |
1c79356b A |
1929 | if ((flags & MSG_PEEK) == 0) |
1930 | (void) sbdroprecord(&so->so_rcv); | |
9bccf70c | 1931 | #ifdef __APPLE__ |
1c79356b | 1932 | } |
9bccf70c | 1933 | #endif |
1c79356b A |
1934 | } |
1935 | if ((flags & MSG_PEEK) == 0) { | |
1936 | if (m == 0) | |
1937 | so->so_rcv.sb_mb = nextrecord; | |
1938 | if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) | |
1939 | (*pr->pr_usrreqs->pru_rcvd)(so, flags); | |
1940 | } | |
9bccf70c | 1941 | #ifdef __APPLE__ |
1c79356b A |
1942 | if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0) |
1943 | flags |= MSG_HAVEMORE; | |
55e303ae A |
1944 | |
1945 | if (delayed_copy_len) { | |
91447636 | 1946 | error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len); |
55e303ae A |
1947 | |
1948 | if (error) | |
1949 | goto release; | |
1950 | } | |
1951 | if (free_list) { | |
1952 | m_freem_list((struct mbuf *)free_list); | |
1953 | free_list = (struct mbuf *)0; | |
1954 | } | |
1955 | if (need_event) | |
1956 | postevent(so, 0, EV_OOB); | |
9bccf70c | 1957 | #endif |
91447636 | 1958 | if (orig_resid == uio_resid(uio) && orig_resid && |
1c79356b | 1959 | (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { |
91447636 | 1960 | sbunlock(&so->so_rcv, 1); |
1c79356b A |
1961 | goto restart; |
1962 | } | |
1963 | ||
1964 | if (flagsp) | |
1965 | *flagsp |= flags; | |
1966 | release: | |
91447636 A |
1967 | #ifdef MORE_LOCKING_DEBUG |
1968 | if (so->so_usecount <= 1) | |
1969 | panic("soreceive: release so=%x ref=%d on socket\n", so, so->so_usecount); | |
1970 | #endif | |
55e303ae | 1971 | if (delayed_copy_len) { |
91447636 | 1972 | error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len); |
55e303ae A |
1973 | } |
1974 | if (free_list) { | |
1975 | m_freem_list((struct mbuf *)free_list); | |
1976 | } | |
91447636 | 1977 | sbunlock(&so->so_rcv, 0); /* will unlock socket */ |
1c79356b | 1978 | |
91447636 | 1979 | // LP64todo - fix this! |
1c79356b A |
1980 | KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, |
1981 | so, | |
91447636 | 1982 | uio_resid(uio), |
1c79356b A |
1983 | so->so_rcv.sb_cc, |
1984 | 0, | |
1985 | error); | |
1986 | ||
1987 | return (error); | |
1988 | } | |
1989 | ||
55e303ae | 1990 | |
91447636 | 1991 | static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid) |
55e303ae A |
1992 | { |
1993 | int error = 0; | |
55e303ae A |
1994 | struct mbuf *m; |
1995 | ||
1996 | m = *free_list; | |
1997 | ||
91447636 | 1998 | socket_unlock(so, 0); |
55e303ae | 1999 | |
55e303ae A |
2000 | while (m && error == 0) { |
2001 | ||
2002 | error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio); | |
2003 | ||
2004 | m = m->m_next; | |
2005 | } | |
2006 | m_freem_list(*free_list); | |
2007 | ||
2008 | *free_list = (struct mbuf *)NULL; | |
2009 | *resid = 0; | |
2010 | ||
91447636 | 2011 | socket_lock(so, 0); |
55e303ae A |
2012 | |
2013 | return (error); | |
2014 | } | |
2015 | ||
2016 | ||
1c79356b A |
2017 | int |
2018 | soshutdown(so, how) | |
2019 | register struct socket *so; | |
21362eb3 | 2020 | register int how; |
1c79356b A |
2021 | { |
2022 | register struct protosw *pr = so->so_proto; | |
1c79356b A |
2023 | int ret; |
2024 | ||
91447636 A |
2025 | socket_lock(so, 1); |
2026 | ||
2027 | sflt_notify(so, sock_evt_shutdown, &how); | |
1c79356b | 2028 | |
9bccf70c | 2029 | if (how != SHUT_WR) { |
1c79356b A |
2030 | sorflush(so); |
2031 | postevent(so, 0, EV_RCLOSED); | |
2032 | } | |
9bccf70c | 2033 | if (how != SHUT_RD) { |
1c79356b A |
2034 | ret = ((*pr->pr_usrreqs->pru_shutdown)(so)); |
2035 | postevent(so, 0, EV_WCLOSED); | |
2036 | KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0); | |
91447636 | 2037 | socket_unlock(so, 1); |
1c79356b A |
2038 | return(ret); |
2039 | } | |
2040 | ||
2041 | KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0); | |
91447636 | 2042 | socket_unlock(so, 1); |
1c79356b A |
2043 | return (0); |
2044 | } | |
2045 | ||
2046 | void | |
2047 | sorflush(so) | |
2048 | register struct socket *so; | |
2049 | { | |
2050 | register struct sockbuf *sb = &so->so_rcv; | |
2051 | register struct protosw *pr = so->so_proto; | |
1c79356b | 2052 | struct sockbuf asb; |
1c79356b | 2053 | |
91447636 A |
2054 | #ifdef MORE_LOCKING_DEBUG |
2055 | lck_mtx_t * mutex_held; | |
2056 | ||
2057 | if (so->so_proto->pr_getlock != NULL) | |
2058 | mutex_held = (*so->so_proto->pr_getlock)(so, 0); | |
2059 | else | |
2060 | mutex_held = so->so_proto->pr_domain->dom_mtx; | |
2061 | lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); | |
2062 | #endif | |
2063 | ||
2064 | sflt_notify(so, sock_evt_flush_read, NULL); | |
1c79356b A |
2065 | |
2066 | sb->sb_flags |= SB_NOINTR; | |
2067 | (void) sblock(sb, M_WAIT); | |
1c79356b | 2068 | socantrcvmore(so); |
91447636 | 2069 | sbunlock(sb, 1); |
9bccf70c | 2070 | #ifdef __APPLE__ |
0b4e3aa0 | 2071 | selthreadclear(&sb->sb_sel); |
9bccf70c | 2072 | #endif |
1c79356b A |
2073 | asb = *sb; |
2074 | bzero((caddr_t)sb, sizeof (*sb)); | |
91447636 | 2075 | sb->sb_so = so; /* reestablish link to socket */ |
9bccf70c A |
2076 | if (asb.sb_flags & SB_KNOTE) { |
2077 | sb->sb_sel.si_note = asb.sb_sel.si_note; | |
2078 | sb->sb_flags = SB_KNOTE; | |
2079 | } | |
1c79356b A |
2080 | if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) |
2081 | (*pr->pr_domain->dom_dispose)(asb.sb_mb); | |
2082 | sbrelease(&asb); | |
2083 | } | |
2084 | ||
2085 | /* | |
2086 | * Perhaps this routine, and sooptcopyout(), below, ought to come in | |
2087 | * an additional variant to handle the case where the option value needs | |
2088 | * to be some kind of integer, but not a specific size. | |
2089 | * In addition to their use here, these functions are also called by the | |
2090 | * protocol-level pr_ctloutput() routines. | |
2091 | */ | |
2092 | int | |
2093 | sooptcopyin(sopt, buf, len, minlen) | |
2094 | struct sockopt *sopt; | |
2095 | void *buf; | |
2096 | size_t len; | |
2097 | size_t minlen; | |
2098 | { | |
2099 | size_t valsize; | |
2100 | ||
2101 | /* | |
2102 | * If the user gives us more than we wanted, we ignore it, | |
2103 | * but if we don't get the minimum length the caller | |
2104 | * wants, we return EINVAL. On success, sopt->sopt_valsize | |
2105 | * is set to however much we actually retrieved. | |
2106 | */ | |
2107 | if ((valsize = sopt->sopt_valsize) < minlen) | |
2108 | return EINVAL; | |
2109 | if (valsize > len) | |
2110 | sopt->sopt_valsize = valsize = len; | |
2111 | ||
2112 | if (sopt->sopt_p != 0) | |
2113 | return (copyin(sopt->sopt_val, buf, valsize)); | |
2114 | ||
91447636 | 2115 | bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize); |
1c79356b A |
2116 | return 0; |
2117 | } | |
2118 | ||
2119 | int | |
2120 | sosetopt(so, sopt) | |
2121 | struct socket *so; | |
2122 | struct sockopt *sopt; | |
2123 | { | |
2124 | int error, optval; | |
2125 | struct linger l; | |
2126 | struct timeval tv; | |
2127 | short val; | |
91447636 A |
2128 | |
2129 | socket_lock(so, 1); | |
1c79356b | 2130 | |
9bccf70c A |
2131 | if (sopt->sopt_dir != SOPT_SET) { |
2132 | sopt->sopt_dir = SOPT_SET; | |
2133 | } | |
2134 | ||
91447636 A |
2135 | { |
2136 | struct socket_filter_entry *filter; | |
2137 | int filtered = 0; | |
2138 | error = 0; | |
2139 | for (filter = so->so_filt; filter && (error == 0); | |
2140 | filter = filter->sfe_next_onsocket) { | |
2141 | if (filter->sfe_filter->sf_filter.sf_setoption) { | |
2142 | if (filtered == 0) { | |
2143 | filtered = 1; | |
2144 | sflt_use(so); | |
2145 | socket_unlock(so, 0); | |
2146 | } | |
2147 | error = filter->sfe_filter->sf_filter.sf_setoption( | |
2148 | filter->sfe_cookie, so, sopt); | |
2149 | } | |
2150 | } | |
2151 | ||
2152 | if (filtered != 0) { | |
2153 | socket_lock(so, 0); | |
2154 | sflt_unuse(so); | |
2155 | ||
2156 | if (error) { | |
2157 | if (error == EJUSTRETURN) | |
2158 | error = 0; | |
2159 | goto bad; | |
2160 | } | |
1c79356b | 2161 | } |
1c79356b A |
2162 | } |
2163 | ||
2164 | error = 0; | |
2165 | if (sopt->sopt_level != SOL_SOCKET) { | |
91447636 A |
2166 | if (so->so_proto && so->so_proto->pr_ctloutput) { |
2167 | error = (*so->so_proto->pr_ctloutput) | |
2168 | (so, sopt); | |
2169 | socket_unlock(so, 1); | |
2170 | return (error); | |
2171 | } | |
1c79356b A |
2172 | error = ENOPROTOOPT; |
2173 | } else { | |
2174 | switch (sopt->sopt_name) { | |
2175 | case SO_LINGER: | |
91447636 | 2176 | case SO_LINGER_SEC: |
1c79356b A |
2177 | error = sooptcopyin(sopt, &l, sizeof l, sizeof l); |
2178 | if (error) | |
2179 | goto bad; | |
2180 | ||
91447636 | 2181 | so->so_linger = (sopt->sopt_name == SO_LINGER) ? l.l_linger : l.l_linger * hz; |
1c79356b A |
2182 | if (l.l_onoff) |
2183 | so->so_options |= SO_LINGER; | |
2184 | else | |
2185 | so->so_options &= ~SO_LINGER; | |
2186 | break; | |
2187 | ||
2188 | case SO_DEBUG: | |
2189 | case SO_KEEPALIVE: | |
2190 | case SO_DONTROUTE: | |
2191 | case SO_USELOOPBACK: | |
2192 | case SO_BROADCAST: | |
2193 | case SO_REUSEADDR: | |
2194 | case SO_REUSEPORT: | |
2195 | case SO_OOBINLINE: | |
2196 | case SO_TIMESTAMP: | |
9bccf70c | 2197 | #ifdef __APPLE__ |
1c79356b A |
2198 | case SO_DONTTRUNC: |
2199 | case SO_WANTMORE: | |
9bccf70c A |
2200 | case SO_WANTOOBFLAG: |
2201 | #endif | |
1c79356b A |
2202 | error = sooptcopyin(sopt, &optval, sizeof optval, |
2203 | sizeof optval); | |
2204 | if (error) | |
2205 | goto bad; | |
2206 | if (optval) | |
2207 | so->so_options |= sopt->sopt_name; | |
2208 | else | |
2209 | so->so_options &= ~sopt->sopt_name; | |
2210 | break; | |
2211 | ||
2212 | case SO_SNDBUF: | |
2213 | case SO_RCVBUF: | |
2214 | case SO_SNDLOWAT: | |
2215 | case SO_RCVLOWAT: | |
2216 | error = sooptcopyin(sopt, &optval, sizeof optval, | |
2217 | sizeof optval); | |
2218 | if (error) | |
2219 | goto bad; | |
2220 | ||
2221 | /* | |
2222 | * Values < 1 make no sense for any of these | |
2223 | * options, so disallow them. | |
2224 | */ | |
2225 | if (optval < 1) { | |
2226 | error = EINVAL; | |
2227 | goto bad; | |
2228 | } | |
2229 | ||
2230 | switch (sopt->sopt_name) { | |
2231 | case SO_SNDBUF: | |
2232 | case SO_RCVBUF: | |
2233 | if (sbreserve(sopt->sopt_name == SO_SNDBUF ? | |
2234 | &so->so_snd : &so->so_rcv, | |
2235 | (u_long) optval) == 0) { | |
2236 | error = ENOBUFS; | |
2237 | goto bad; | |
2238 | } | |
2239 | break; | |
2240 | ||
2241 | /* | |
2242 | * Make sure the low-water is never greater than | |
2243 | * the high-water. | |
2244 | */ | |
2245 | case SO_SNDLOWAT: | |
2246 | so->so_snd.sb_lowat = | |
2247 | (optval > so->so_snd.sb_hiwat) ? | |
2248 | so->so_snd.sb_hiwat : optval; | |
2249 | break; | |
2250 | case SO_RCVLOWAT: | |
2251 | so->so_rcv.sb_lowat = | |
2252 | (optval > so->so_rcv.sb_hiwat) ? | |
2253 | so->so_rcv.sb_hiwat : optval; | |
2254 | break; | |
2255 | } | |
2256 | break; | |
2257 | ||
2258 | case SO_SNDTIMEO: | |
2259 | case SO_RCVTIMEO: | |
2260 | error = sooptcopyin(sopt, &tv, sizeof tv, | |
2261 | sizeof tv); | |
2262 | if (error) | |
2263 | goto bad; | |
2264 | ||
91447636 | 2265 | if (tv.tv_sec < 0 || tv.tv_sec > LONG_MAX || |
9bccf70c A |
2266 | tv.tv_usec < 0 || tv.tv_usec >= 1000000) { |
2267 | error = EDOM; | |
2268 | goto bad; | |
2269 | } | |
91447636 | 2270 | |
1c79356b A |
2271 | switch (sopt->sopt_name) { |
2272 | case SO_SNDTIMEO: | |
91447636 | 2273 | so->so_snd.sb_timeo = tv; |
1c79356b A |
2274 | break; |
2275 | case SO_RCVTIMEO: | |
91447636 | 2276 | so->so_rcv.sb_timeo = tv; |
1c79356b A |
2277 | break; |
2278 | } | |
2279 | break; | |
2280 | ||
2281 | case SO_NKE: | |
9bccf70c A |
2282 | { |
2283 | struct so_nke nke; | |
1c79356b | 2284 | |
9bccf70c A |
2285 | error = sooptcopyin(sopt, &nke, |
2286 | sizeof nke, sizeof nke); | |
1c79356b A |
2287 | if (error) |
2288 | goto bad; | |
2289 | ||
91447636 | 2290 | error = sflt_attach_private(so, NULL, nke.nke_handle, 1); |
1c79356b A |
2291 | break; |
2292 | } | |
2293 | ||
9bccf70c A |
2294 | case SO_NOSIGPIPE: |
2295 | error = sooptcopyin(sopt, &optval, sizeof optval, | |
2296 | sizeof optval); | |
2297 | if (error) | |
2298 | goto bad; | |
2299 | if (optval) | |
2300 | so->so_flags |= SOF_NOSIGPIPE; | |
2301 | else | |
2302 | so->so_flags &= ~SOF_NOSIGPIPE; | |
2303 | ||
2304 | break; | |
2305 | ||
55e303ae A |
2306 | case SO_NOADDRERR: |
2307 | error = sooptcopyin(sopt, &optval, sizeof optval, | |
2308 | sizeof optval); | |
2309 | if (error) | |
2310 | goto bad; | |
2311 | if (optval) | |
2312 | so->so_flags |= SOF_NOADDRAVAIL; | |
2313 | else | |
2314 | so->so_flags &= ~SOF_NOADDRAVAIL; | |
2315 | ||
2316 | break; | |
2317 | ||
1c79356b A |
2318 | default: |
2319 | error = ENOPROTOOPT; | |
2320 | break; | |
2321 | } | |
2322 | if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { | |
2323 | (void) ((*so->so_proto->pr_ctloutput) | |
2324 | (so, sopt)); | |
2325 | } | |
2326 | } | |
2327 | bad: | |
91447636 | 2328 | socket_unlock(so, 1); |
1c79356b A |
2329 | return (error); |
2330 | } | |
2331 | ||
2332 | /* Helper routine for getsockopt */ | |
2333 | int | |
2334 | sooptcopyout(sopt, buf, len) | |
2335 | struct sockopt *sopt; | |
2336 | void *buf; | |
2337 | size_t len; | |
2338 | { | |
2339 | int error; | |
2340 | size_t valsize; | |
2341 | ||
2342 | error = 0; | |
2343 | ||
2344 | /* | |
2345 | * Documented get behavior is that we always return a value, | |
2346 | * possibly truncated to fit in the user's buffer. | |
2347 | * Traditional behavior is that we always tell the user | |
2348 | * precisely how much we copied, rather than something useful | |
2349 | * like the total amount we had available for her. | |
2350 | * Note that this interface is not idempotent; the entire answer must | |
2351 | * generated ahead of time. | |
2352 | */ | |
2353 | valsize = min(len, sopt->sopt_valsize); | |
2354 | sopt->sopt_valsize = valsize; | |
91447636 | 2355 | if (sopt->sopt_val != USER_ADDR_NULL) { |
1c79356b A |
2356 | if (sopt->sopt_p != 0) |
2357 | error = copyout(buf, sopt->sopt_val, valsize); | |
2358 | else | |
91447636 | 2359 | bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize); |
1c79356b A |
2360 | } |
2361 | return error; | |
2362 | } | |
2363 | ||
2364 | int | |
2365 | sogetopt(so, sopt) | |
2366 | struct socket *so; | |
2367 | struct sockopt *sopt; | |
2368 | { | |
2369 | int error, optval; | |
2370 | struct linger l; | |
2371 | struct timeval tv; | |
1c79356b | 2372 | |
9bccf70c A |
2373 | if (sopt->sopt_dir != SOPT_GET) { |
2374 | sopt->sopt_dir = SOPT_GET; | |
2375 | } | |
2376 | ||
91447636 A |
2377 | socket_lock(so, 1); |
2378 | ||
2379 | { | |
2380 | struct socket_filter_entry *filter; | |
2381 | int filtered = 0; | |
2382 | error = 0; | |
2383 | for (filter = so->so_filt; filter && (error == 0); | |
2384 | filter = filter->sfe_next_onsocket) { | |
2385 | if (filter->sfe_filter->sf_filter.sf_getoption) { | |
2386 | if (filtered == 0) { | |
2387 | filtered = 1; | |
2388 | sflt_use(so); | |
2389 | socket_unlock(so, 0); | |
2390 | } | |
2391 | error = filter->sfe_filter->sf_filter.sf_getoption( | |
2392 | filter->sfe_cookie, so, sopt); | |
2393 | } | |
2394 | } | |
2395 | if (filtered != 0) { | |
2396 | socket_lock(so, 0); | |
2397 | sflt_unuse(so); | |
2398 | ||
2399 | if (error) { | |
2400 | if (error == EJUSTRETURN) | |
2401 | error = 0; | |
2402 | socket_unlock(so, 1); | |
2403 | return error; | |
2404 | } | |
1c79356b | 2405 | } |
1c79356b A |
2406 | } |
2407 | ||
2408 | error = 0; | |
2409 | if (sopt->sopt_level != SOL_SOCKET) { | |
2410 | if (so->so_proto && so->so_proto->pr_ctloutput) { | |
91447636 A |
2411 | error = (*so->so_proto->pr_ctloutput) |
2412 | (so, sopt); | |
2413 | socket_unlock(so, 1); | |
2414 | return (error); | |
2415 | } else { | |
2416 | socket_unlock(so, 1); | |
1c79356b | 2417 | return (ENOPROTOOPT); |
91447636 | 2418 | } |
1c79356b A |
2419 | } else { |
2420 | switch (sopt->sopt_name) { | |
2421 | case SO_LINGER: | |
91447636 | 2422 | case SO_LINGER_SEC: |
1c79356b | 2423 | l.l_onoff = so->so_options & SO_LINGER; |
91447636 A |
2424 | l.l_linger = (sopt->sopt_name == SO_LINGER) ? so->so_linger : |
2425 | so->so_linger / hz; | |
1c79356b A |
2426 | error = sooptcopyout(sopt, &l, sizeof l); |
2427 | break; | |
2428 | ||
2429 | case SO_USELOOPBACK: | |
2430 | case SO_DONTROUTE: | |
2431 | case SO_DEBUG: | |
2432 | case SO_KEEPALIVE: | |
2433 | case SO_REUSEADDR: | |
2434 | case SO_REUSEPORT: | |
2435 | case SO_BROADCAST: | |
2436 | case SO_OOBINLINE: | |
2437 | case SO_TIMESTAMP: | |
9bccf70c | 2438 | #ifdef __APPLE__ |
1c79356b A |
2439 | case SO_DONTTRUNC: |
2440 | case SO_WANTMORE: | |
9bccf70c A |
2441 | case SO_WANTOOBFLAG: |
2442 | #endif | |
1c79356b A |
2443 | optval = so->so_options & sopt->sopt_name; |
2444 | integer: | |
2445 | error = sooptcopyout(sopt, &optval, sizeof optval); | |
2446 | break; | |
2447 | ||
2448 | case SO_TYPE: | |
2449 | optval = so->so_type; | |
2450 | goto integer; | |
2451 | ||
9bccf70c | 2452 | #ifdef __APPLE__ |
1c79356b | 2453 | case SO_NREAD: |
9bccf70c A |
2454 | { |
2455 | int pkt_total; | |
1c79356b A |
2456 | struct mbuf *m1; |
2457 | ||
2458 | pkt_total = 0; | |
2459 | m1 = so->so_rcv.sb_mb; | |
2460 | if (so->so_proto->pr_flags & PR_ATOMIC) | |
2461 | { | |
9bccf70c A |
2462 | while (m1) { |
2463 | if (m1->m_type == MT_DATA) | |
1c79356b | 2464 | pkt_total += m1->m_len; |
1c79356b A |
2465 | m1 = m1->m_next; |
2466 | } | |
2467 | optval = pkt_total; | |
2468 | } else | |
2469 | optval = so->so_rcv.sb_cc; | |
1c79356b A |
2470 | goto integer; |
2471 | } | |
91447636 A |
2472 | case SO_NWRITE: |
2473 | optval = so->so_snd.sb_cc; | |
2474 | goto integer; | |
9bccf70c | 2475 | #endif |
1c79356b A |
2476 | case SO_ERROR: |
2477 | optval = so->so_error; | |
2478 | so->so_error = 0; | |
2479 | goto integer; | |
2480 | ||
2481 | case SO_SNDBUF: | |
2482 | optval = so->so_snd.sb_hiwat; | |
2483 | goto integer; | |
2484 | ||
2485 | case SO_RCVBUF: | |
2486 | optval = so->so_rcv.sb_hiwat; | |
2487 | goto integer; | |
2488 | ||
2489 | case SO_SNDLOWAT: | |
2490 | optval = so->so_snd.sb_lowat; | |
2491 | goto integer; | |
2492 | ||
2493 | case SO_RCVLOWAT: | |
2494 | optval = so->so_rcv.sb_lowat; | |
2495 | goto integer; | |
2496 | ||
2497 | case SO_SNDTIMEO: | |
2498 | case SO_RCVTIMEO: | |
91447636 | 2499 | tv = (sopt->sopt_name == SO_SNDTIMEO ? |
1c79356b A |
2500 | so->so_snd.sb_timeo : so->so_rcv.sb_timeo); |
2501 | ||
1c79356b A |
2502 | error = sooptcopyout(sopt, &tv, sizeof tv); |
2503 | break; | |
2504 | ||
91447636 A |
2505 | case SO_NOSIGPIPE: |
2506 | optval = (so->so_flags & SOF_NOSIGPIPE); | |
2507 | goto integer; | |
9bccf70c | 2508 | |
55e303ae | 2509 | case SO_NOADDRERR: |
91447636 A |
2510 | optval = (so->so_flags & SOF_NOADDRAVAIL); |
2511 | goto integer; | |
55e303ae | 2512 | |
1c79356b A |
2513 | default: |
2514 | error = ENOPROTOOPT; | |
2515 | break; | |
2516 | } | |
91447636 | 2517 | socket_unlock(so, 1); |
1c79356b A |
2518 | return (error); |
2519 | } | |
2520 | } | |
2521 | ||
9bccf70c | 2522 | /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ |
1c79356b | 2523 | int |
9bccf70c | 2524 | soopt_getm(struct sockopt *sopt, struct mbuf **mp) |
1c79356b A |
2525 | { |
2526 | struct mbuf *m, *m_prev; | |
2527 | int sopt_size = sopt->sopt_valsize; | |
2528 | ||
a3d08fcd A |
2529 | if (sopt_size > MAX_SOOPTGETM_SIZE) |
2530 | return EMSGSIZE; | |
2531 | ||
1c79356b A |
2532 | MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA); |
2533 | if (m == 0) | |
2534 | return ENOBUFS; | |
2535 | if (sopt_size > MLEN) { | |
2536 | MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT); | |
2537 | if ((m->m_flags & M_EXT) == 0) { | |
2538 | m_free(m); | |
2539 | return ENOBUFS; | |
2540 | } | |
2541 | m->m_len = min(MCLBYTES, sopt_size); | |
2542 | } else { | |
2543 | m->m_len = min(MLEN, sopt_size); | |
2544 | } | |
2545 | sopt_size -= m->m_len; | |
2546 | *mp = m; | |
2547 | m_prev = m; | |
2548 | ||
2549 | while (sopt_size) { | |
2550 | MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA); | |
2551 | if (m == 0) { | |
2552 | m_freem(*mp); | |
2553 | return ENOBUFS; | |
2554 | } | |
2555 | if (sopt_size > MLEN) { | |
2556 | MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT); | |
2557 | if ((m->m_flags & M_EXT) == 0) { | |
2558 | m_freem(*mp); | |
2559 | return ENOBUFS; | |
2560 | } | |
2561 | m->m_len = min(MCLBYTES, sopt_size); | |
2562 | } else { | |
2563 | m->m_len = min(MLEN, sopt_size); | |
2564 | } | |
2565 | sopt_size -= m->m_len; | |
2566 | m_prev->m_next = m; | |
2567 | m_prev = m; | |
2568 | } | |
2569 | return 0; | |
2570 | } | |
2571 | ||
2572 | /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */ | |
2573 | int | |
9bccf70c | 2574 | soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) |
1c79356b A |
2575 | { |
2576 | struct mbuf *m0 = m; | |
2577 | ||
91447636 | 2578 | if (sopt->sopt_val == USER_ADDR_NULL) |
1c79356b A |
2579 | return 0; |
2580 | while (m != NULL && sopt->sopt_valsize >= m->m_len) { | |
2581 | if (sopt->sopt_p != NULL) { | |
2582 | int error; | |
2583 | ||
91447636 | 2584 | error = copyin(sopt->sopt_val, mtod(m, char *), m->m_len); |
1c79356b A |
2585 | if (error != 0) { |
2586 | m_freem(m0); | |
2587 | return(error); | |
2588 | } | |
2589 | } else | |
91447636 | 2590 | bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), mtod(m, char *), m->m_len); |
1c79356b | 2591 | sopt->sopt_valsize -= m->m_len; |
91447636 | 2592 | sopt->sopt_val += m->m_len; |
1c79356b A |
2593 | m = m->m_next; |
2594 | } | |
2595 | if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ | |
9bccf70c | 2596 | panic("soopt_mcopyin"); |
1c79356b A |
2597 | return 0; |
2598 | } | |
2599 | ||
2600 | /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */ | |
2601 | int | |
9bccf70c | 2602 | soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) |
1c79356b A |
2603 | { |
2604 | struct mbuf *m0 = m; | |
2605 | size_t valsize = 0; | |
2606 | ||
91447636 | 2607 | if (sopt->sopt_val == USER_ADDR_NULL) |
1c79356b A |
2608 | return 0; |
2609 | while (m != NULL && sopt->sopt_valsize >= m->m_len) { | |
2610 | if (sopt->sopt_p != NULL) { | |
2611 | int error; | |
2612 | ||
91447636 | 2613 | error = copyout(mtod(m, char *), sopt->sopt_val, m->m_len); |
1c79356b A |
2614 | if (error != 0) { |
2615 | m_freem(m0); | |
2616 | return(error); | |
2617 | } | |
2618 | } else | |
91447636 | 2619 | bcopy(mtod(m, char *), CAST_DOWN(caddr_t, sopt->sopt_val), m->m_len); |
1c79356b | 2620 | sopt->sopt_valsize -= m->m_len; |
91447636 | 2621 | sopt->sopt_val += m->m_len; |
1c79356b A |
2622 | valsize += m->m_len; |
2623 | m = m->m_next; | |
2624 | } | |
2625 | if (m != NULL) { | |
2626 | /* enough soopt buffer should be given from user-land */ | |
2627 | m_freem(m0); | |
2628 | return(EINVAL); | |
2629 | } | |
2630 | sopt->sopt_valsize = valsize; | |
2631 | return 0; | |
2632 | } | |
2633 | ||
9bccf70c A |
2634 | void |
2635 | sohasoutofband(so) | |
2636 | register struct socket *so; | |
2637 | { | |
2638 | struct proc *p; | |
9bccf70c | 2639 | |
9bccf70c A |
2640 | if (so->so_pgid < 0) |
2641 | gsignal(-so->so_pgid, SIGURG); | |
2642 | else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) | |
2643 | psignal(p, SIGURG); | |
2644 | selwakeup(&so->so_rcv.sb_sel); | |
2645 | } | |
2646 | ||
2647 | int | |
91447636 | 2648 | sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql) |
9bccf70c A |
2649 | { |
2650 | struct proc *p = current_proc(); | |
2651 | int revents = 0; | |
91447636 A |
2652 | |
2653 | socket_lock(so, 1); | |
9bccf70c A |
2654 | |
2655 | if (events & (POLLIN | POLLRDNORM)) | |
2656 | if (soreadable(so)) | |
2657 | revents |= events & (POLLIN | POLLRDNORM); | |
2658 | ||
2659 | if (events & (POLLOUT | POLLWRNORM)) | |
2660 | if (sowriteable(so)) | |
2661 | revents |= events & (POLLOUT | POLLWRNORM); | |
2662 | ||
2663 | if (events & (POLLPRI | POLLRDBAND)) | |
2664 | if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) | |
2665 | revents |= events & (POLLPRI | POLLRDBAND); | |
2666 | ||
2667 | if (revents == 0) { | |
2668 | if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { | |
2669 | /* Darwin sets the flag first, BSD calls selrecord first */ | |
2670 | so->so_rcv.sb_flags |= SB_SEL; | |
2671 | selrecord(p, &so->so_rcv.sb_sel, wql); | |
2672 | } | |
2673 | ||
2674 | if (events & (POLLOUT | POLLWRNORM)) { | |
2675 | /* Darwin sets the flag first, BSD calls selrecord first */ | |
2676 | so->so_snd.sb_flags |= SB_SEL; | |
2677 | selrecord(p, &so->so_snd.sb_sel, wql); | |
2678 | } | |
2679 | } | |
2680 | ||
91447636 | 2681 | socket_unlock(so, 1); |
9bccf70c A |
2682 | return (revents); |
2683 | } | |
55e303ae | 2684 | |
91447636 | 2685 | int soo_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p); |
55e303ae A |
2686 | |
2687 | int | |
91447636 | 2688 | soo_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p) |
55e303ae | 2689 | { |
91447636 | 2690 | struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; |
55e303ae | 2691 | struct sockbuf *sb; |
91447636 | 2692 | socket_lock(so, 1); |
55e303ae A |
2693 | |
2694 | switch (kn->kn_filter) { | |
2695 | case EVFILT_READ: | |
2696 | if (so->so_options & SO_ACCEPTCONN) | |
2697 | kn->kn_fop = &solisten_filtops; | |
2698 | else | |
2699 | kn->kn_fop = &soread_filtops; | |
2700 | sb = &so->so_rcv; | |
2701 | break; | |
2702 | case EVFILT_WRITE: | |
2703 | kn->kn_fop = &sowrite_filtops; | |
2704 | sb = &so->so_snd; | |
2705 | break; | |
2706 | default: | |
91447636 | 2707 | socket_unlock(so, 1); |
55e303ae A |
2708 | return (1); |
2709 | } | |
2710 | ||
55e303ae A |
2711 | if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn)) |
2712 | sb->sb_flags |= SB_KNOTE; | |
91447636 | 2713 | socket_unlock(so, 1); |
55e303ae A |
2714 | return (0); |
2715 | } | |
2716 | ||
2717 | static void | |
2718 | filt_sordetach(struct knote *kn) | |
2719 | { | |
91447636 | 2720 | struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; |
55e303ae | 2721 | |
91447636 A |
2722 | socket_lock(so, 1); |
2723 | if (so->so_rcv.sb_flags & SB_KNOTE) | |
55e303ae A |
2724 | if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn)) |
2725 | so->so_rcv.sb_flags &= ~SB_KNOTE; | |
91447636 | 2726 | socket_unlock(so, 1); |
55e303ae A |
2727 | } |
2728 | ||
2729 | /*ARGSUSED*/ | |
2730 | static int | |
2731 | filt_soread(struct knote *kn, long hint) | |
2732 | { | |
91447636 | 2733 | struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; |
55e303ae | 2734 | |
91447636 A |
2735 | if ((hint & SO_FILT_HINT_LOCKED) == 0) |
2736 | socket_lock(so, 1); | |
2737 | ||
2738 | if (so->so_oobmark) { | |
2739 | if (kn->kn_flags & EV_OOBAND) { | |
2740 | kn->kn_data = so->so_rcv.sb_cc - so->so_oobmark; | |
2741 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2742 | socket_unlock(so, 1); | |
2743 | return (1); | |
2744 | } | |
2745 | kn->kn_data = so->so_oobmark; | |
2746 | kn->kn_flags |= EV_OOBAND; | |
2747 | } else { | |
2748 | kn->kn_data = so->so_rcv.sb_cc; | |
2749 | if (so->so_state & SS_CANTRCVMORE) { | |
2750 | kn->kn_flags |= EV_EOF; | |
2751 | kn->kn_fflags = so->so_error; | |
2752 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2753 | socket_unlock(so, 1); | |
2754 | return (1); | |
2755 | } | |
55e303ae | 2756 | } |
91447636 A |
2757 | |
2758 | if (so->so_state & SS_RCVATMARK) { | |
2759 | if (kn->kn_flags & EV_OOBAND) { | |
2760 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2761 | socket_unlock(so, 1); | |
2762 | return (1); | |
2763 | } | |
2764 | kn->kn_flags |= EV_OOBAND; | |
2765 | } else if (kn->kn_flags & EV_OOBAND) { | |
2766 | kn->kn_data = 0; | |
2767 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2768 | socket_unlock(so, 1); | |
2769 | return (0); | |
2770 | } | |
2771 | ||
2772 | if (so->so_error) { /* temporary udp error */ | |
2773 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2774 | socket_unlock(so, 1); | |
55e303ae | 2775 | return (1); |
91447636 A |
2776 | } |
2777 | ||
2778 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2779 | socket_unlock(so, 1); | |
2780 | ||
2781 | return( kn->kn_flags & EV_OOBAND || | |
2782 | kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ? | |
2783 | kn->kn_sdata : so->so_rcv.sb_lowat)); | |
55e303ae A |
2784 | } |
2785 | ||
2786 | static void | |
2787 | filt_sowdetach(struct knote *kn) | |
2788 | { | |
91447636 A |
2789 | struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; |
2790 | socket_lock(so, 1); | |
55e303ae | 2791 | |
91447636 | 2792 | if(so->so_snd.sb_flags & SB_KNOTE) |
55e303ae A |
2793 | if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn)) |
2794 | so->so_snd.sb_flags &= ~SB_KNOTE; | |
91447636 | 2795 | socket_unlock(so, 1); |
55e303ae A |
2796 | } |
2797 | ||
2798 | /*ARGSUSED*/ | |
2799 | static int | |
2800 | filt_sowrite(struct knote *kn, long hint) | |
2801 | { | |
91447636 A |
2802 | struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; |
2803 | ||
2804 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2805 | socket_lock(so, 1); | |
55e303ae A |
2806 | |
2807 | kn->kn_data = sbspace(&so->so_snd); | |
2808 | if (so->so_state & SS_CANTSENDMORE) { | |
2809 | kn->kn_flags |= EV_EOF; | |
2810 | kn->kn_fflags = so->so_error; | |
91447636 A |
2811 | if ((hint & SO_FILT_HINT_LOCKED) == 0) |
2812 | socket_unlock(so, 1); | |
55e303ae A |
2813 | return (1); |
2814 | } | |
91447636 A |
2815 | if (so->so_error) { /* temporary udp error */ |
2816 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2817 | socket_unlock(so, 1); | |
55e303ae | 2818 | return (1); |
91447636 | 2819 | } |
55e303ae | 2820 | if (((so->so_state & SS_ISCONNECTED) == 0) && |
91447636 A |
2821 | (so->so_proto->pr_flags & PR_CONNREQUIRED)) { |
2822 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2823 | socket_unlock(so, 1); | |
55e303ae | 2824 | return (0); |
91447636 A |
2825 | } |
2826 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2827 | socket_unlock(so, 1); | |
55e303ae A |
2828 | if (kn->kn_sfflags & NOTE_LOWAT) |
2829 | return (kn->kn_data >= kn->kn_sdata); | |
2830 | return (kn->kn_data >= so->so_snd.sb_lowat); | |
2831 | } | |
2832 | ||
2833 | /*ARGSUSED*/ | |
2834 | static int | |
2835 | filt_solisten(struct knote *kn, long hint) | |
2836 | { | |
91447636 A |
2837 | struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; |
2838 | int isempty; | |
55e303ae | 2839 | |
91447636 A |
2840 | if ((hint & SO_FILT_HINT_LOCKED) == 0) |
2841 | socket_lock(so, 1); | |
55e303ae | 2842 | kn->kn_data = so->so_qlen; |
91447636 A |
2843 | isempty = ! TAILQ_EMPTY(&so->so_comp); |
2844 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2845 | socket_unlock(so, 1); | |
2846 | return (isempty); | |
55e303ae A |
2847 | } |
2848 | ||
91447636 A |
2849 | |
2850 | int | |
2851 | socket_lock(so, refcount) | |
2852 | struct socket *so; | |
2853 | int refcount; | |
2854 | { | |
21362eb3 A |
2855 | int error = 0, lr, lr_saved; |
2856 | #ifdef __ppc__ | |
2857 | __asm__ volatile("mflr %0" : "=r" (lr)); | |
2858 | lr_saved = lr; | |
2859 | #endif | |
91447636 A |
2860 | |
2861 | if (so->so_proto->pr_lock) { | |
2862 | error = (*so->so_proto->pr_lock)(so, refcount, lr_saved); | |
2863 | } | |
2864 | else { | |
2865 | #ifdef MORE_LOCKING_DEBUG | |
2866 | lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED); | |
2867 | #endif | |
2868 | lck_mtx_lock(so->so_proto->pr_domain->dom_mtx); | |
2869 | if (refcount) | |
2870 | so->so_usecount++; | |
21362eb3 | 2871 | so->reserved3 = (void*)lr_saved; /* save caller for refcount going to zero */ |
91447636 A |
2872 | } |
2873 | ||
2874 | return(error); | |
2875 | ||
2876 | } | |
2877 | ||
2878 | int | |
2879 | socket_unlock(so, refcount) | |
2880 | struct socket *so; | |
2881 | int refcount; | |
2882 | { | |
21362eb3 | 2883 | int error = 0, lr, lr_saved; |
91447636 A |
2884 | lck_mtx_t * mutex_held; |
2885 | ||
21362eb3 A |
2886 | #ifdef __ppc__ |
2887 | __asm__ volatile("mflr %0" : "=r" (lr)); | |
2888 | lr_saved = lr; | |
2889 | #endif | |
2890 | ||
2891 | ||
91447636 A |
2892 | |
2893 | if (so->so_proto == NULL) | |
2894 | panic("socket_unlock null so_proto so=%x\n", so); | |
2895 | ||
2896 | if (so && so->so_proto->pr_unlock) | |
2897 | error = (*so->so_proto->pr_unlock)(so, refcount, lr_saved); | |
2898 | else { | |
2899 | mutex_held = so->so_proto->pr_domain->dom_mtx; | |
2900 | #ifdef MORE_LOCKING_DEBUG | |
2901 | lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); | |
2902 | #endif | |
2903 | if (refcount) { | |
2904 | if (so->so_usecount <= 0) | |
2905 | panic("socket_unlock: bad refcount so=%x value=%d\n", so, so->so_usecount); | |
2906 | so->so_usecount--; | |
2907 | if (so->so_usecount == 0) { | |
2908 | sofreelastref(so, 1); | |
2909 | } | |
21362eb3 A |
2910 | else |
2911 | so->reserved4 = (void*)lr_saved; /* save caller */ | |
91447636 A |
2912 | } |
2913 | lck_mtx_unlock(mutex_held); | |
2914 | } | |
2915 | ||
2916 | return(error); | |
2917 | } | |
2918 | //### Called with socket locked, will unlock socket | |
2919 | void | |
2920 | sofree(so) | |
2921 | struct socket *so; | |
2922 | { | |
2923 | ||
21362eb3 | 2924 | int lr, lr_saved; |
91447636 | 2925 | lck_mtx_t * mutex_held; |
21362eb3 A |
2926 | #ifdef __ppc__ |
2927 | __asm__ volatile("mflr %0" : "=r" (lr)); | |
2928 | lr_saved = lr; | |
2929 | #endif | |
91447636 A |
2930 | if (so->so_proto->pr_getlock != NULL) |
2931 | mutex_held = (*so->so_proto->pr_getlock)(so, 0); | |
2932 | else | |
2933 | mutex_held = so->so_proto->pr_domain->dom_mtx; | |
2934 | lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); | |
2935 | ||
91447636 A |
2936 | sofreelastref(so, 0); |
2937 | } | |
2938 | ||
2939 | void | |
2940 | soreference(so) | |
2941 | struct socket *so; | |
2942 | { | |
2943 | socket_lock(so, 1); /* locks & take one reference on socket */ | |
2944 | socket_unlock(so, 0); /* unlock only */ | |
2945 | } | |
2946 | ||
2947 | void | |
2948 | sodereference(so) | |
2949 | struct socket *so; | |
2950 | { | |
2951 | socket_lock(so, 0); | |
2952 | socket_unlock(so, 1); | |
2953 | } |