]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
5d5c5d0d A |
2 | * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. |
3 | * | |
8f6c56a5 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
1c79356b | 5 | * |
8f6c56a5 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
8ad349bb | 24 | * limitations under the License. |
8f6c56a5 A |
25 | * |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
1c79356b A |
27 | */ |
28 | /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */ | |
29 | /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ | |
30 | /* | |
31 | * Copyright (c) 1982, 1986, 1988, 1990, 1993 | |
32 | * The Regents of the University of California. All rights reserved. | |
33 | * | |
34 | * Redistribution and use in source and binary forms, with or without | |
35 | * modification, are permitted provided that the following conditions | |
36 | * are met: | |
37 | * 1. Redistributions of source code must retain the above copyright | |
38 | * notice, this list of conditions and the following disclaimer. | |
39 | * 2. Redistributions in binary form must reproduce the above copyright | |
40 | * notice, this list of conditions and the following disclaimer in the | |
41 | * documentation and/or other materials provided with the distribution. | |
42 | * 3. All advertising materials mentioning features or use of this software | |
43 | * must display the following acknowledgement: | |
44 | * This product includes software developed by the University of | |
45 | * California, Berkeley and its contributors. | |
46 | * 4. Neither the name of the University nor the names of its contributors | |
47 | * may be used to endorse or promote products derived from this software | |
48 | * without specific prior written permission. | |
49 | * | |
50 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
51 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
52 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
53 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
54 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
55 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
56 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
57 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
58 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
59 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
60 | * SUCH DAMAGE. | |
61 | * | |
9bccf70c A |
62 | * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 |
63 | * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $ | |
1c79356b A |
64 | */ |
65 | ||
66 | #include <sys/param.h> | |
67 | #include <sys/systm.h> | |
55e303ae | 68 | #include <sys/filedesc.h> |
91447636 A |
69 | #include <sys/proc_internal.h> |
70 | #include <sys/kauth.h> | |
71 | #include <sys/file_internal.h> | |
1c79356b A |
72 | #include <sys/fcntl.h> |
73 | #include <sys/malloc.h> | |
74 | #include <sys/mbuf.h> | |
75 | #include <sys/domain.h> | |
76 | #include <sys/kernel.h> | |
55e303ae | 77 | #include <sys/event.h> |
1c79356b A |
78 | #include <sys/poll.h> |
79 | #include <sys/protosw.h> | |
80 | #include <sys/socket.h> | |
81 | #include <sys/socketvar.h> | |
82 | #include <sys/resourcevar.h> | |
83 | #include <sys/signalvar.h> | |
84 | #include <sys/sysctl.h> | |
85 | #include <sys/uio.h> | |
86 | #include <sys/ev.h> | |
87 | #include <sys/kdebug.h> | |
88 | #include <net/route.h> | |
89 | #include <netinet/in.h> | |
90 | #include <netinet/in_pcb.h> | |
91 | #include <kern/zalloc.h> | |
91447636 | 92 | #include <kern/locks.h> |
1c79356b A |
93 | #include <machine/limits.h> |
94 | ||
95 | int so_cache_hw = 0; | |
96 | int so_cache_timeouts = 0; | |
97 | int so_cache_max_freed = 0; | |
98 | int cached_sock_count = 0; | |
99 | struct socket *socket_cache_head = 0; | |
100 | struct socket *socket_cache_tail = 0; | |
101 | u_long so_cache_time = 0; | |
102 | int so_cache_init_done = 0; | |
103 | struct zone *so_cache_zone; | |
104 | extern int get_inpcb_str_size(); | |
105 | extern int get_tcp_str_size(); | |
106 | ||
91447636 A |
107 | static lck_grp_t *so_cache_mtx_grp; |
108 | static lck_attr_t *so_cache_mtx_attr; | |
109 | static lck_grp_attr_t *so_cache_mtx_grp_attr; | |
110 | lck_mtx_t *so_cache_mtx; | |
111 | ||
1c79356b A |
112 | #include <machine/limits.h> |
113 | ||
55e303ae A |
114 | static void filt_sordetach(struct knote *kn); |
115 | static int filt_soread(struct knote *kn, long hint); | |
116 | static void filt_sowdetach(struct knote *kn); | |
117 | static int filt_sowrite(struct knote *kn, long hint); | |
118 | static int filt_solisten(struct knote *kn, long hint); | |
119 | ||
120 | static struct filterops solisten_filtops = | |
121 | { 1, NULL, filt_sordetach, filt_solisten }; | |
122 | static struct filterops soread_filtops = | |
123 | { 1, NULL, filt_sordetach, filt_soread }; | |
124 | static struct filterops sowrite_filtops = | |
125 | { 1, NULL, filt_sowdetach, filt_sowrite }; | |
126 | ||
91447636 | 127 | #define EVEN_MORE_LOCKING_DEBUG 0 |
1c79356b A |
128 | int socket_debug = 0; |
129 | int socket_zone = M_SOCKET; | |
130 | so_gen_t so_gencnt; /* generation count for sockets */ | |
131 | ||
132 | MALLOC_DEFINE(M_SONAME, "soname", "socket name"); | |
133 | MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); | |
134 | ||
135 | #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0) | |
136 | #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2) | |
137 | #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1) | |
138 | #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3) | |
139 | #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1) | |
140 | #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8)) | |
141 | #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8)) | |
142 | ||
a3d08fcd | 143 | #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES) |
1c79356b | 144 | |
91447636 | 145 | |
1c79356b A |
146 | SYSCTL_DECL(_kern_ipc); |
147 | ||
148 | static int somaxconn = SOMAXCONN; | |
149 | SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, | |
150 | 0, ""); | |
151 | ||
152 | /* Should we get a maximum also ??? */ | |
fa4905b1 | 153 | static int sosendmaxchain = 65536; |
1c79356b | 154 | static int sosendminchain = 16384; |
55e303ae | 155 | static int sorecvmincopy = 16384; |
1c79356b A |
156 | SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain, |
157 | 0, ""); | |
55e303ae A |
158 | SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy, |
159 | 0, ""); | |
1c79356b A |
160 | |
161 | void so_cache_timer(); | |
162 | ||
163 | /* | |
164 | * Socket operation routines. | |
165 | * These routines are called by the routines in | |
166 | * sys_socket.c or from a system process, and | |
167 | * implement the semantics of socket operations by | |
168 | * switching out to the protocol specific routines. | |
169 | */ | |
170 | ||
9bccf70c | 171 | #ifdef __APPLE__ |
91447636 A |
172 | |
173 | vm_size_t so_cache_zone_element_size; | |
174 | ||
175 | static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid); | |
176 | ||
177 | ||
1c79356b A |
178 | void socketinit() |
179 | { | |
180 | vm_size_t str_size; | |
181 | ||
91447636 A |
182 | if (so_cache_init_done) { |
183 | printf("socketinit: already called...\n"); | |
184 | return; | |
185 | } | |
186 | ||
187 | /* | |
188 | * allocate lock group attribute and group for socket cache mutex | |
189 | */ | |
190 | so_cache_mtx_grp_attr = lck_grp_attr_alloc_init(); | |
91447636 A |
191 | |
192 | so_cache_mtx_grp = lck_grp_alloc_init("so_cache", so_cache_mtx_grp_attr); | |
193 | ||
194 | /* | |
195 | * allocate the lock attribute for socket cache mutex | |
196 | */ | |
197 | so_cache_mtx_attr = lck_attr_alloc_init(); | |
91447636 | 198 | |
1c79356b A |
199 | so_cache_init_done = 1; |
200 | ||
91447636 A |
201 | so_cache_mtx = lck_mtx_alloc_init(so_cache_mtx_grp, so_cache_mtx_attr); /* cached sockets mutex */ |
202 | ||
203 | if (so_cache_mtx == NULL) | |
204 | return; /* we're hosed... */ | |
205 | ||
1c79356b A |
206 | str_size = (vm_size_t)( sizeof(struct socket) + 4 + |
207 | get_inpcb_str_size() + 4 + | |
208 | get_tcp_str_size()); | |
209 | so_cache_zone = zinit (str_size, 120000*str_size, 8192, "socache zone"); | |
210 | #if TEMPDEBUG | |
91447636 | 211 | printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size); |
1c79356b | 212 | #endif |
91447636 A |
213 | timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz)); |
214 | ||
215 | so_cache_zone_element_size = str_size; | |
216 | ||
217 | sflt_init(); | |
1c79356b A |
218 | |
219 | } | |
220 | ||
221 | void cached_sock_alloc(so, waitok) | |
222 | struct socket **so; | |
223 | int waitok; | |
224 | ||
225 | { | |
226 | caddr_t temp; | |
1c79356b A |
227 | register u_long offset; |
228 | ||
229 | ||
91447636 A |
230 | lck_mtx_lock(so_cache_mtx); |
231 | ||
1c79356b A |
232 | if (cached_sock_count) { |
233 | cached_sock_count--; | |
234 | *so = socket_cache_head; | |
235 | if (*so == 0) | |
236 | panic("cached_sock_alloc: cached sock is null"); | |
237 | ||
238 | socket_cache_head = socket_cache_head->cache_next; | |
239 | if (socket_cache_head) | |
240 | socket_cache_head->cache_prev = 0; | |
241 | else | |
242 | socket_cache_tail = 0; | |
91447636 A |
243 | |
244 | lck_mtx_unlock(so_cache_mtx); | |
1c79356b A |
245 | |
246 | temp = (*so)->so_saved_pcb; | |
247 | bzero((caddr_t)*so, sizeof(struct socket)); | |
248 | #if TEMPDEBUG | |
249 | kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so, | |
250 | cached_sock_count); | |
251 | #endif | |
252 | (*so)->so_saved_pcb = temp; | |
91447636 A |
253 | (*so)->cached_in_sock_layer = 1; |
254 | ||
1c79356b A |
255 | } |
256 | else { | |
257 | #if TEMPDEBUG | |
258 | kprintf("Allocating cached sock %x from memory\n", *so); | |
259 | #endif | |
260 | ||
91447636 A |
261 | lck_mtx_unlock(so_cache_mtx); |
262 | ||
1c79356b A |
263 | if (waitok) |
264 | *so = (struct socket *) zalloc(so_cache_zone); | |
265 | else | |
266 | *so = (struct socket *) zalloc_noblock(so_cache_zone); | |
267 | ||
268 | if (*so == 0) | |
269 | return; | |
270 | ||
271 | bzero((caddr_t)*so, sizeof(struct socket)); | |
272 | ||
273 | /* | |
274 | * Define offsets for extra structures into our single block of | |
275 | * memory. Align extra structures on longword boundaries. | |
276 | */ | |
277 | ||
278 | ||
279 | offset = (u_long) *so; | |
280 | offset += sizeof(struct socket); | |
281 | if (offset & 0x3) { | |
282 | offset += 4; | |
283 | offset &= 0xfffffffc; | |
284 | } | |
285 | (*so)->so_saved_pcb = (caddr_t) offset; | |
286 | offset += get_inpcb_str_size(); | |
287 | if (offset & 0x3) { | |
288 | offset += 4; | |
289 | offset &= 0xfffffffc; | |
290 | } | |
291 | ||
292 | ((struct inpcb *) (*so)->so_saved_pcb)->inp_saved_ppcb = (caddr_t) offset; | |
293 | #if TEMPDEBUG | |
294 | kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so, | |
295 | (*so)->so_saved_pcb, | |
296 | ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb); | |
297 | #endif | |
298 | } | |
299 | ||
300 | (*so)->cached_in_sock_layer = 1; | |
301 | } | |
302 | ||
303 | ||
304 | void cached_sock_free(so) | |
305 | struct socket *so; | |
306 | { | |
1c79356b | 307 | |
91447636 | 308 | lck_mtx_lock(so_cache_mtx); |
1c79356b | 309 | |
1c79356b A |
310 | if (++cached_sock_count > MAX_CACHED_SOCKETS) { |
311 | --cached_sock_count; | |
91447636 | 312 | lck_mtx_unlock(so_cache_mtx); |
1c79356b A |
313 | #if TEMPDEBUG |
314 | kprintf("Freeing overflowed cached socket %x\n", so); | |
315 | #endif | |
91447636 | 316 | zfree(so_cache_zone, so); |
1c79356b A |
317 | } |
318 | else { | |
319 | #if TEMPDEBUG | |
320 | kprintf("Freeing socket %x into cache\n", so); | |
321 | #endif | |
322 | if (so_cache_hw < cached_sock_count) | |
323 | so_cache_hw = cached_sock_count; | |
324 | ||
325 | so->cache_next = socket_cache_head; | |
326 | so->cache_prev = 0; | |
327 | if (socket_cache_head) | |
328 | socket_cache_head->cache_prev = so; | |
329 | else | |
330 | socket_cache_tail = so; | |
331 | ||
332 | so->cache_timestamp = so_cache_time; | |
333 | socket_cache_head = so; | |
91447636 | 334 | lck_mtx_unlock(so_cache_mtx); |
1c79356b A |
335 | } |
336 | ||
337 | #if TEMPDEBUG | |
338 | kprintf("Freed cached sock %x into cache - count is %d\n", so, cached_sock_count); | |
339 | #endif | |
340 | ||
341 | ||
342 | } | |
343 | ||
344 | ||
345 | void so_cache_timer() | |
346 | { | |
347 | register struct socket *p; | |
1c79356b | 348 | register int n_freed = 0; |
1c79356b | 349 | |
1c79356b | 350 | |
91447636 | 351 | lck_mtx_lock(so_cache_mtx); |
1c79356b | 352 | |
91447636 | 353 | ++so_cache_time; |
1c79356b | 354 | |
91447636 | 355 | while ( (p = socket_cache_tail) ) |
1c79356b A |
356 | { |
357 | if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT) | |
358 | break; | |
359 | ||
360 | so_cache_timeouts++; | |
361 | ||
91447636 | 362 | if ( (socket_cache_tail = p->cache_prev) ) |
1c79356b A |
363 | p->cache_prev->cache_next = 0; |
364 | if (--cached_sock_count == 0) | |
365 | socket_cache_head = 0; | |
366 | ||
1c79356b | 367 | |
91447636 | 368 | zfree(so_cache_zone, p); |
1c79356b | 369 | |
1c79356b A |
370 | if (++n_freed >= SO_CACHE_MAX_FREE_BATCH) |
371 | { | |
372 | so_cache_max_freed++; | |
373 | break; | |
374 | } | |
375 | } | |
91447636 | 376 | lck_mtx_unlock(so_cache_mtx); |
1c79356b A |
377 | |
378 | timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz)); | |
379 | ||
1c79356b A |
380 | |
381 | } | |
9bccf70c | 382 | #endif /* __APPLE__ */ |
1c79356b A |
383 | |
384 | /* | |
385 | * Get a socket structure from our zone, and initialize it. | |
386 | * We don't implement `waitok' yet (see comments in uipc_domain.c). | |
387 | * Note that it would probably be better to allocate socket | |
388 | * and PCB at the same time, but I'm not convinced that all | |
389 | * the protocols can be easily modified to do this. | |
390 | */ | |
391 | struct socket * | |
392 | soalloc(waitok, dom, type) | |
393 | int waitok; | |
394 | int dom; | |
395 | int type; | |
396 | { | |
397 | struct socket *so; | |
398 | ||
399 | if ((dom == PF_INET) && (type == SOCK_STREAM)) | |
400 | cached_sock_alloc(&so, waitok); | |
401 | else | |
402 | { | |
91447636 | 403 | MALLOC_ZONE(so, struct socket *, sizeof(*so), socket_zone, M_WAITOK); |
1c79356b A |
404 | if (so) |
405 | bzero(so, sizeof *so); | |
406 | } | |
407 | /* XXX race condition for reentrant kernel */ | |
91447636 | 408 | //###LD Atomic add for so_gencnt |
1c79356b A |
409 | if (so) { |
410 | so->so_gencnt = ++so_gencnt; | |
411 | so->so_zone = socket_zone; | |
412 | } | |
413 | ||
414 | return so; | |
415 | } | |
416 | ||
417 | int | |
418 | socreate(dom, aso, type, proto) | |
419 | int dom; | |
420 | struct socket **aso; | |
421 | register int type; | |
422 | int proto; | |
1c79356b A |
423 | { |
424 | struct proc *p = current_proc(); | |
425 | register struct protosw *prp; | |
9bccf70c | 426 | register struct socket *so; |
1c79356b | 427 | register int error = 0; |
55e303ae A |
428 | #if TCPDEBUG |
429 | extern int tcpconsdebug; | |
430 | #endif | |
1c79356b A |
431 | if (proto) |
432 | prp = pffindproto(dom, proto, type); | |
433 | else | |
434 | prp = pffindtype(dom, type); | |
9bccf70c | 435 | |
1c79356b A |
436 | if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) |
437 | return (EPROTONOSUPPORT); | |
9bccf70c A |
438 | #ifndef __APPLE__ |
439 | ||
440 | if (p->p_prison && jail_socket_unixiproute_only && | |
441 | prp->pr_domain->dom_family != PF_LOCAL && | |
442 | prp->pr_domain->dom_family != PF_INET && | |
443 | prp->pr_domain->dom_family != PF_ROUTE) { | |
444 | return (EPROTONOSUPPORT); | |
445 | } | |
446 | ||
447 | #endif | |
1c79356b A |
448 | if (prp->pr_type != type) |
449 | return (EPROTOTYPE); | |
450 | so = soalloc(p != 0, dom, type); | |
451 | if (so == 0) | |
452 | return (ENOBUFS); | |
453 | ||
454 | TAILQ_INIT(&so->so_incomp); | |
455 | TAILQ_INIT(&so->so_comp); | |
456 | so->so_type = type; | |
457 | ||
9bccf70c | 458 | #ifdef __APPLE__ |
1c79356b | 459 | if (p != 0) { |
91447636 A |
460 | so->so_uid = kauth_cred_getuid(kauth_cred_get()); |
461 | if (!suser(kauth_cred_get(),NULL)) | |
1c79356b | 462 | so->so_state = SS_PRIV; |
1c79356b | 463 | } |
9bccf70c | 464 | #else |
91447636 | 465 | so->so_cred = kauth_cred_get_with_ref(); |
9bccf70c | 466 | #endif |
1c79356b | 467 | so->so_proto = prp; |
9bccf70c | 468 | #ifdef __APPLE__ |
1c79356b | 469 | so->so_rcv.sb_flags |= SB_RECV; /* XXX */ |
91447636 | 470 | so->so_rcv.sb_so = so->so_snd.sb_so = so; |
9bccf70c | 471 | #endif |
4452a7af A |
472 | so->next_lock_lr = 0; |
473 | so->next_unlock_lr = 0; | |
474 | ||
91447636 A |
475 | |
476 | //### Attachement will create the per pcb lock if necessary and increase refcount | |
37839358 | 477 | so->so_usecount++; /* for creation, make sure it's done before socket is inserted in lists */ |
91447636 A |
478 | |
479 | error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); | |
1c79356b | 480 | if (error) { |
55e303ae A |
481 | /* |
482 | * Warning: | |
483 | * If so_pcb is not zero, the socket will be leaked, | |
484 | * so protocol attachment handler must be coded carefuly | |
485 | */ | |
1c79356b | 486 | so->so_state |= SS_NOFDREF; |
37839358 A |
487 | so->so_usecount--; |
488 | sofreelastref(so, 1); /* will deallocate the socket */ | |
1c79356b A |
489 | return (error); |
490 | } | |
9bccf70c | 491 | #ifdef __APPLE__ |
1c79356b | 492 | prp->pr_domain->dom_refs++; |
1c79356b | 493 | TAILQ_INIT(&so->so_evlist); |
91447636 A |
494 | |
495 | /* Attach socket filters for this protocol */ | |
496 | sflt_initsock(so); | |
55e303ae A |
497 | #if TCPDEBUG |
498 | if (tcpconsdebug == 2) | |
499 | so->so_options |= SO_DEBUG; | |
500 | #endif | |
9bccf70c | 501 | #endif |
55e303ae | 502 | |
1c79356b A |
503 | *aso = so; |
504 | return (0); | |
505 | } | |
506 | ||
507 | int | |
508 | sobind(so, nam) | |
509 | struct socket *so; | |
510 | struct sockaddr *nam; | |
511 | ||
512 | { | |
513 | struct proc *p = current_proc(); | |
91447636 A |
514 | int error = 0; |
515 | struct socket_filter_entry *filter; | |
516 | int filtered = 0; | |
1c79356b | 517 | |
91447636 A |
518 | socket_lock(so, 1); |
519 | ||
520 | /* Socket filter */ | |
521 | error = 0; | |
522 | for (filter = so->so_filt; filter && (error == 0); | |
523 | filter = filter->sfe_next_onsocket) { | |
524 | if (filter->sfe_filter->sf_filter.sf_bind) { | |
525 | if (filtered == 0) { | |
526 | filtered = 1; | |
527 | sflt_use(so); | |
528 | socket_unlock(so, 0); | |
1c79356b | 529 | } |
91447636 A |
530 | error = filter->sfe_filter->sf_filter.sf_bind( |
531 | filter->sfe_cookie, so, nam); | |
1c79356b A |
532 | } |
533 | } | |
91447636 A |
534 | if (filtered != 0) { |
535 | socket_lock(so, 0); | |
536 | sflt_unuse(so); | |
537 | } | |
538 | /* End socket filter */ | |
539 | ||
540 | if (error == 0) | |
541 | error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p); | |
542 | ||
543 | socket_unlock(so, 1); | |
544 | ||
545 | if (error == EJUSTRETURN) | |
546 | error = 0; | |
547 | ||
1c79356b A |
548 | return (error); |
549 | } | |
550 | ||
551 | void | |
552 | sodealloc(so) | |
553 | struct socket *so; | |
554 | { | |
555 | so->so_gencnt = ++so_gencnt; | |
556 | ||
9bccf70c A |
557 | #ifndef __APPLE__ |
558 | if (so->so_rcv.sb_hiwat) | |
559 | (void)chgsbsize(so->so_cred->cr_uidinfo, | |
560 | &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY); | |
561 | if (so->so_snd.sb_hiwat) | |
562 | (void)chgsbsize(so->so_cred->cr_uidinfo, | |
563 | &so->so_snd.sb_hiwat, 0, RLIM_INFINITY); | |
564 | #ifdef INET | |
565 | if (so->so_accf != NULL) { | |
566 | if (so->so_accf->so_accept_filter != NULL && | |
567 | so->so_accf->so_accept_filter->accf_destroy != NULL) { | |
568 | so->so_accf->so_accept_filter->accf_destroy(so); | |
569 | } | |
570 | if (so->so_accf->so_accept_filter_str != NULL) | |
571 | FREE(so->so_accf->so_accept_filter_str, M_ACCF); | |
572 | FREE(so->so_accf, M_ACCF); | |
573 | } | |
574 | #endif /* INET */ | |
4452a7af | 575 | kauth_cred_unref(&so->so_cred); |
9bccf70c A |
576 | zfreei(so->so_zone, so); |
577 | #else | |
1c79356b A |
578 | if (so->cached_in_sock_layer == 1) |
579 | cached_sock_free(so); | |
91447636 A |
580 | else { |
581 | if (so->cached_in_sock_layer == -1) | |
582 | panic("sodealloc: double dealloc: so=%x\n", so); | |
583 | so->cached_in_sock_layer = -1; | |
584 | FREE_ZONE(so, sizeof(*so), so->so_zone); | |
585 | } | |
9bccf70c | 586 | #endif /* __APPLE__ */ |
1c79356b A |
587 | } |
588 | ||
589 | int | |
590 | solisten(so, backlog) | |
591 | register struct socket *so; | |
592 | int backlog; | |
593 | ||
594 | { | |
1c79356b | 595 | struct proc *p = current_proc(); |
91447636 | 596 | int error; |
1c79356b | 597 | |
91447636 A |
598 | socket_lock(so, 1); |
599 | ||
600 | { | |
601 | struct socket_filter_entry *filter; | |
602 | int filtered = 0; | |
603 | error = 0; | |
604 | for (filter = so->so_filt; filter && (error == 0); | |
605 | filter = filter->sfe_next_onsocket) { | |
606 | if (filter->sfe_filter->sf_filter.sf_listen) { | |
607 | if (filtered == 0) { | |
608 | filtered = 1; | |
609 | sflt_use(so); | |
610 | socket_unlock(so, 0); | |
611 | } | |
612 | error = filter->sfe_filter->sf_filter.sf_listen( | |
613 | filter->sfe_cookie, so); | |
614 | } | |
615 | } | |
616 | if (filtered != 0) { | |
617 | socket_lock(so, 0); | |
618 | sflt_unuse(so); | |
619 | } | |
620 | } | |
621 | ||
622 | if (error == 0) { | |
623 | error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); | |
624 | } | |
625 | ||
1c79356b | 626 | if (error) { |
91447636 A |
627 | socket_unlock(so, 1); |
628 | if (error == EJUSTRETURN) | |
629 | error = 0; | |
1c79356b A |
630 | return (error); |
631 | } | |
91447636 A |
632 | |
633 | if (TAILQ_EMPTY(&so->so_comp)) | |
1c79356b A |
634 | so->so_options |= SO_ACCEPTCONN; |
635 | if (backlog < 0 || backlog > somaxconn) | |
636 | backlog = somaxconn; | |
637 | so->so_qlimit = backlog; | |
1c79356b | 638 | |
91447636 | 639 | socket_unlock(so, 1); |
1c79356b A |
640 | return (0); |
641 | } | |
642 | ||
1c79356b | 643 | void |
91447636 | 644 | sofreelastref(so, dealloc) |
1c79356b | 645 | register struct socket *so; |
91447636 | 646 | int dealloc; |
9bccf70c A |
647 | { |
648 | int error; | |
1c79356b A |
649 | struct socket *head = so->so_head; |
650 | ||
91447636 | 651 | /*### Assume socket is locked */ |
1c79356b | 652 | |
3a60a9f5 A |
653 | /* Remove any filters - may be called more than once */ |
654 | sflt_termsock(so); | |
655 | ||
91447636 | 656 | if ((!(so->so_flags & SOF_PCBCLEARING)) || ((so->so_state & SS_NOFDREF) == 0)) { |
9bccf70c | 657 | #ifdef __APPLE__ |
0b4e3aa0 A |
658 | selthreadclear(&so->so_snd.sb_sel); |
659 | selthreadclear(&so->so_rcv.sb_sel); | |
cc9f6e38 A |
660 | so->so_rcv.sb_flags &= ~SB_UPCALL; |
661 | so->so_snd.sb_flags &= ~SB_UPCALL; | |
9bccf70c | 662 | #endif |
1c79356b | 663 | return; |
0b4e3aa0 | 664 | } |
9bccf70c | 665 | if (head != NULL) { |
91447636 | 666 | socket_lock(head, 1); |
9bccf70c A |
667 | if (so->so_state & SS_INCOMP) { |
668 | TAILQ_REMOVE(&head->so_incomp, so, so_list); | |
669 | head->so_incqlen--; | |
670 | } else if (so->so_state & SS_COMP) { | |
671 | /* | |
672 | * We must not decommission a socket that's | |
673 | * on the accept(2) queue. If we do, then | |
674 | * accept(2) may hang after select(2) indicated | |
675 | * that the listening socket was ready. | |
676 | */ | |
677 | #ifdef __APPLE__ | |
678 | selthreadclear(&so->so_snd.sb_sel); | |
679 | selthreadclear(&so->so_rcv.sb_sel); | |
cc9f6e38 A |
680 | so->so_rcv.sb_flags &= ~SB_UPCALL; |
681 | so->so_snd.sb_flags &= ~SB_UPCALL; | |
9bccf70c | 682 | #endif |
91447636 | 683 | socket_unlock(head, 1); |
9bccf70c A |
684 | return; |
685 | } else { | |
686 | panic("sofree: not queued"); | |
687 | } | |
1c79356b | 688 | head->so_qlen--; |
9bccf70c | 689 | so->so_state &= ~SS_INCOMP; |
1c79356b | 690 | so->so_head = NULL; |
91447636 | 691 | socket_unlock(head, 1); |
1c79356b | 692 | } |
9bccf70c | 693 | #ifdef __APPLE__ |
0b4e3aa0 | 694 | selthreadclear(&so->so_snd.sb_sel); |
1c79356b | 695 | sbrelease(&so->so_snd); |
9bccf70c | 696 | #endif |
1c79356b | 697 | sorflush(so); |
91447636 A |
698 | |
699 | /* 3932268: disable upcall */ | |
700 | so->so_rcv.sb_flags &= ~SB_UPCALL; | |
701 | so->so_snd.sb_flags &= ~SB_UPCALL; | |
702 | ||
703 | if (dealloc) | |
704 | sodealloc(so); | |
1c79356b A |
705 | } |
706 | ||
707 | /* | |
708 | * Close a socket on last file table reference removal. | |
709 | * Initiate disconnect if connected. | |
710 | * Free socket when disconnect complete. | |
711 | */ | |
712 | int | |
91447636 | 713 | soclose_locked(so) |
1c79356b A |
714 | register struct socket *so; |
715 | { | |
1c79356b | 716 | int error = 0; |
91447636 A |
717 | lck_mtx_t * mutex_held; |
718 | struct timespec ts; | |
1c79356b | 719 | |
91447636 A |
720 | if (so->so_usecount == 0) { |
721 | panic("soclose: so=%x refcount=0\n", so); | |
1c79356b A |
722 | } |
723 | ||
91447636 A |
724 | sflt_notify(so, sock_evt_closing, NULL); |
725 | ||
726 | if ((so->so_options & SO_ACCEPTCONN)) { | |
727 | struct socket *sp; | |
728 | ||
729 | /* We do not want new connection to be added to the connection queues */ | |
730 | so->so_options &= ~SO_ACCEPTCONN; | |
731 | ||
732 | while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) { | |
733 | /* A bit tricky here. We need to keep | |
734 | * a lock if it's a protocol global lock | |
735 | * but we want the head, not the socket locked | |
736 | * in the case of per-socket lock... | |
737 | */ | |
ff6e181a | 738 | if (so->so_proto->pr_getlock != NULL) { |
91447636 | 739 | socket_unlock(so, 0); |
ff6e181a A |
740 | socket_lock(sp, 1); |
741 | } | |
91447636 | 742 | (void) soabort(sp); |
ff6e181a | 743 | if (so->so_proto->pr_getlock != NULL) { |
91447636 | 744 | socket_unlock(sp, 1); |
ff6e181a A |
745 | socket_lock(so, 0); |
746 | } | |
91447636 A |
747 | } |
748 | ||
749 | while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) { | |
91447636 A |
750 | /* Dequeue from so_comp since sofree() won't do it */ |
751 | TAILQ_REMOVE(&so->so_comp, sp, so_list); | |
752 | so->so_qlen--; | |
ff6e181a A |
753 | |
754 | if (so->so_proto->pr_getlock != NULL) { | |
755 | socket_unlock(so, 0); | |
756 | socket_lock(sp, 1); | |
757 | } | |
758 | ||
91447636 A |
759 | sp->so_state &= ~SS_COMP; |
760 | sp->so_head = NULL; | |
761 | ||
91447636 | 762 | (void) soabort(sp); |
ff6e181a | 763 | if (so->so_proto->pr_getlock != NULL) { |
91447636 | 764 | socket_unlock(sp, 1); |
ff6e181a A |
765 | socket_lock(so, 0); |
766 | } | |
91447636 A |
767 | } |
768 | } | |
769 | if (so->so_pcb == 0) { | |
770 | /* 3915887: mark the socket as ready for dealloc */ | |
771 | so->so_flags |= SOF_PCBCLEARING; | |
1c79356b | 772 | goto discard; |
91447636 | 773 | } |
1c79356b A |
774 | if (so->so_state & SS_ISCONNECTED) { |
775 | if ((so->so_state & SS_ISDISCONNECTING) == 0) { | |
91447636 | 776 | error = sodisconnectlocked(so); |
1c79356b A |
777 | if (error) |
778 | goto drop; | |
779 | } | |
780 | if (so->so_options & SO_LINGER) { | |
781 | if ((so->so_state & SS_ISDISCONNECTING) && | |
782 | (so->so_state & SS_NBIO)) | |
783 | goto drop; | |
91447636 A |
784 | if (so->so_proto->pr_getlock != NULL) |
785 | mutex_held = (*so->so_proto->pr_getlock)(so, 0); | |
786 | else | |
787 | mutex_held = so->so_proto->pr_domain->dom_mtx; | |
1c79356b | 788 | while (so->so_state & SS_ISCONNECTED) { |
91447636 A |
789 | ts.tv_sec = (so->so_linger/100); |
790 | ts.tv_nsec = (so->so_linger % 100) * NSEC_PER_USEC * 1000 * 10; | |
791 | error = msleep((caddr_t)&so->so_timeo, mutex_held, | |
792 | PSOCK | PCATCH, "soclos", &ts); | |
793 | if (error) { | |
794 | /* It's OK when the time fires, don't report an error */ | |
795 | if (error == EWOULDBLOCK) | |
796 | error = 0; | |
1c79356b | 797 | break; |
91447636 | 798 | } |
1c79356b A |
799 | } |
800 | } | |
801 | } | |
802 | drop: | |
91447636 A |
803 | if (so->so_usecount == 0) |
804 | panic("soclose: usecount is zero so=%x\n", so); | |
805 | if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) { | |
1c79356b A |
806 | int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); |
807 | if (error == 0) | |
808 | error = error2; | |
809 | } | |
91447636 A |
810 | if (so->so_usecount <= 0) |
811 | panic("soclose: usecount is zero so=%x\n", so); | |
1c79356b | 812 | discard: |
e3027f41 | 813 | if (so->so_pcb && so->so_state & SS_NOFDREF) |
1c79356b A |
814 | panic("soclose: NOFDREF"); |
815 | so->so_state |= SS_NOFDREF; | |
9bccf70c | 816 | #ifdef __APPLE__ |
1c79356b A |
817 | so->so_proto->pr_domain->dom_refs--; |
818 | evsofree(so); | |
9bccf70c | 819 | #endif |
91447636 | 820 | so->so_usecount--; |
1c79356b | 821 | sofree(so); |
1c79356b A |
822 | return (error); |
823 | } | |
824 | ||
91447636 A |
825 | int |
826 | soclose(so) | |
827 | register struct socket *so; | |
828 | { | |
829 | int error = 0; | |
830 | socket_lock(so, 1); | |
831 | if (so->so_retaincnt == 0) | |
832 | error = soclose_locked(so); | |
833 | else { /* if the FD is going away, but socket is retained in kernel remove its reference */ | |
834 | so->so_usecount--; | |
835 | if (so->so_usecount < 2) | |
836 | panic("soclose: retaincnt non null and so=%x usecount=%x\n", so->so_usecount); | |
837 | } | |
838 | socket_unlock(so, 1); | |
839 | return (error); | |
840 | } | |
841 | ||
842 | ||
1c79356b A |
843 | /* |
844 | * Must be called at splnet... | |
845 | */ | |
91447636 | 846 | //#### Should already be locked |
1c79356b A |
847 | int |
848 | soabort(so) | |
849 | struct socket *so; | |
850 | { | |
9bccf70c | 851 | int error; |
1c79356b | 852 | |
91447636 A |
853 | #ifdef MORE_LOCKING_DEBUG |
854 | lck_mtx_t * mutex_held; | |
855 | ||
856 | if (so->so_proto->pr_getlock != NULL) | |
857 | mutex_held = (*so->so_proto->pr_getlock)(so, 0); | |
858 | else | |
859 | mutex_held = so->so_proto->pr_domain->dom_mtx; | |
860 | lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); | |
861 | #endif | |
862 | ||
9bccf70c A |
863 | error = (*so->so_proto->pr_usrreqs->pru_abort)(so); |
864 | if (error) { | |
865 | sofree(so); | |
866 | return error; | |
867 | } | |
868 | return (0); | |
1c79356b A |
869 | } |
870 | ||
871 | int | |
91447636 | 872 | soacceptlock(so, nam, dolock) |
1c79356b A |
873 | register struct socket *so; |
874 | struct sockaddr **nam; | |
91447636 | 875 | int dolock; |
9bccf70c | 876 | { |
1c79356b | 877 | int error; |
91447636 A |
878 | |
879 | if (dolock) socket_lock(so, 1); | |
1c79356b A |
880 | |
881 | if ((so->so_state & SS_NOFDREF) == 0) | |
882 | panic("soaccept: !NOFDREF"); | |
883 | so->so_state &= ~SS_NOFDREF; | |
884 | error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); | |
1c79356b | 885 | |
91447636 | 886 | if (dolock) socket_unlock(so, 1); |
1c79356b A |
887 | return (error); |
888 | } | |
91447636 A |
889 | int |
890 | soaccept(so, nam) | |
891 | register struct socket *so; | |
892 | struct sockaddr **nam; | |
893 | { | |
894 | return (soacceptlock(so, nam, 1)); | |
895 | } | |
1c79356b A |
896 | |
897 | int | |
91447636 | 898 | soconnectlock(so, nam, dolock) |
1c79356b A |
899 | register struct socket *so; |
900 | struct sockaddr *nam; | |
91447636 | 901 | int dolock; |
1c79356b A |
902 | |
903 | { | |
904 | int s; | |
905 | int error; | |
906 | struct proc *p = current_proc(); | |
1c79356b | 907 | |
91447636 A |
908 | if (dolock) socket_lock(so, 1); |
909 | ||
910 | if (so->so_options & SO_ACCEPTCONN) { | |
911 | if (dolock) socket_unlock(so, 1); | |
1c79356b | 912 | return (EOPNOTSUPP); |
91447636 | 913 | } |
1c79356b A |
914 | /* |
915 | * If protocol is connection-based, can only connect once. | |
916 | * Otherwise, if connected, try to disconnect first. | |
917 | * This allows user to disconnect by connecting to, e.g., | |
918 | * a null address. | |
919 | */ | |
920 | if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && | |
921 | ((so->so_proto->pr_flags & PR_CONNREQUIRED) || | |
91447636 | 922 | (error = sodisconnectlocked(so)))) |
1c79356b A |
923 | error = EISCONN; |
924 | else { | |
91447636 A |
925 | /* |
926 | * Run connect filter before calling protocol: | |
927 | * - non-blocking connect returns before completion; | |
928 | */ | |
929 | { | |
930 | struct socket_filter_entry *filter; | |
931 | int filtered = 0; | |
932 | error = 0; | |
933 | for (filter = so->so_filt; filter && (error == 0); | |
934 | filter = filter->sfe_next_onsocket) { | |
935 | if (filter->sfe_filter->sf_filter.sf_connect_out) { | |
936 | if (filtered == 0) { | |
937 | filtered = 1; | |
938 | sflt_use(so); | |
939 | socket_unlock(so, 0); | |
940 | } | |
941 | error = filter->sfe_filter->sf_filter.sf_connect_out( | |
942 | filter->sfe_cookie, so, nam); | |
943 | } | |
944 | } | |
945 | if (filtered != 0) { | |
946 | socket_lock(so, 0); | |
947 | sflt_unuse(so); | |
948 | } | |
949 | } | |
950 | if (error) { | |
951 | if (error == EJUSTRETURN) | |
952 | error = 0; | |
953 | if (dolock) socket_unlock(so, 1); | |
954 | return error; | |
955 | } | |
956 | ||
1c79356b | 957 | error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); |
1c79356b | 958 | } |
91447636 | 959 | if (dolock) socket_unlock(so, 1); |
1c79356b A |
960 | return (error); |
961 | } | |
962 | ||
91447636 A |
963 | int |
964 | soconnect(so, nam) | |
965 | register struct socket *so; | |
966 | struct sockaddr *nam; | |
967 | { | |
968 | return (soconnectlock(so, nam, 1)); | |
969 | } | |
970 | ||
1c79356b A |
971 | int |
972 | soconnect2(so1, so2) | |
973 | register struct socket *so1; | |
974 | struct socket *so2; | |
975 | { | |
1c79356b | 976 | int error; |
91447636 | 977 | |
4452a7af A |
978 | socket_lock(so1, 1); |
979 | if (so2->so_proto->pr_lock) | |
980 | socket_lock(so2, 1); | |
1c79356b A |
981 | |
982 | error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); | |
91447636 | 983 | |
4452a7af A |
984 | socket_unlock(so1, 1); |
985 | if (so2->so_proto->pr_lock) | |
986 | socket_unlock(so2, 1); | |
1c79356b A |
987 | return (error); |
988 | } | |
989 | ||
91447636 | 990 | |
1c79356b | 991 | int |
91447636 | 992 | sodisconnectlocked(so) |
1c79356b A |
993 | register struct socket *so; |
994 | { | |
1c79356b | 995 | int error; |
1c79356b A |
996 | |
997 | if ((so->so_state & SS_ISCONNECTED) == 0) { | |
998 | error = ENOTCONN; | |
999 | goto bad; | |
1000 | } | |
1001 | if (so->so_state & SS_ISDISCONNECTING) { | |
1002 | error = EALREADY; | |
1003 | goto bad; | |
1004 | } | |
91447636 | 1005 | |
1c79356b | 1006 | error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); |
91447636 | 1007 | |
9bccf70c | 1008 | if (error == 0) { |
91447636 | 1009 | sflt_notify(so, sock_evt_disconnected, NULL); |
1c79356b A |
1010 | } |
1011 | ||
1012 | bad: | |
1c79356b A |
1013 | return (error); |
1014 | } | |
91447636 A |
1015 | //### Locking version |
1016 | int | |
1017 | sodisconnect(so) | |
1018 | register struct socket *so; | |
1019 | { | |
1020 | int error; | |
1021 | ||
1022 | socket_lock(so, 1); | |
1023 | error = sodisconnectlocked(so); | |
1024 | socket_unlock(so, 1); | |
1025 | return(error); | |
1026 | } | |
1c79356b A |
1027 | |
1028 | #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT) | |
91447636 A |
1029 | |
1030 | /* | |
1031 | * sosendcheck will lock the socket buffer if it isn't locked and | |
1032 | * verify that there is space for the data being inserted. | |
1033 | */ | |
1034 | ||
1035 | static int | |
1036 | sosendcheck( | |
1037 | struct socket *so, | |
1038 | struct sockaddr *addr, | |
1039 | long resid, | |
1040 | long clen, | |
1041 | long atomic, | |
1042 | int flags, | |
1043 | int *sblocked) | |
1044 | { | |
1045 | int error = 0; | |
1046 | long space; | |
3a60a9f5 | 1047 | int assumelock = 0; |
91447636 A |
1048 | |
1049 | restart: | |
1050 | if (*sblocked == 0) { | |
3a60a9f5 A |
1051 | if ((so->so_snd.sb_flags & SB_LOCK) != 0 && |
1052 | so->so_send_filt_thread != 0 && | |
1053 | so->so_send_filt_thread == current_thread()) { | |
1054 | /* | |
1055 | * We're being called recursively from a filter, | |
1056 | * allow this to continue. Radar 4150520. | |
1057 | * Don't set sblocked because we don't want | |
1058 | * to perform an unlock later. | |
1059 | */ | |
1060 | assumelock = 1; | |
1061 | } | |
1062 | else { | |
1063 | error = sblock(&so->so_snd, SBLOCKWAIT(flags)); | |
1064 | if (error) { | |
1065 | return error; | |
1066 | } | |
1067 | *sblocked = 1; | |
1068 | } | |
91447636 A |
1069 | } |
1070 | ||
1071 | if (so->so_state & SS_CANTSENDMORE) | |
1072 | return EPIPE; | |
1073 | ||
1074 | if (so->so_error) { | |
1075 | error = so->so_error; | |
1076 | so->so_error = 0; | |
1077 | return error; | |
1078 | } | |
1079 | ||
1080 | if ((so->so_state & SS_ISCONNECTED) == 0) { | |
1081 | /* | |
1082 | * `sendto' and `sendmsg' is allowed on a connection- | |
1083 | * based socket if it supports implied connect. | |
1084 | * Return ENOTCONN if not connected and no address is | |
1085 | * supplied. | |
1086 | */ | |
1087 | if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && | |
1088 | (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { | |
1089 | if ((so->so_state & SS_ISCONFIRMING) == 0 && | |
1090 | !(resid == 0 && clen != 0)) | |
1091 | return ENOTCONN; | |
1092 | } else if (addr == 0 && !(flags&MSG_HOLD)) | |
1093 | return (so->so_proto->pr_flags & PR_CONNREQUIRED) ? ENOTCONN : EDESTADDRREQ; | |
1094 | } | |
1095 | space = sbspace(&so->so_snd); | |
1096 | if (flags & MSG_OOB) | |
1097 | space += 1024; | |
1098 | if ((atomic && resid > so->so_snd.sb_hiwat) || | |
1099 | clen > so->so_snd.sb_hiwat) | |
1100 | return EMSGSIZE; | |
1101 | if (space < resid + clen && | |
1102 | (atomic || space < so->so_snd.sb_lowat || space < clen)) { | |
3a60a9f5 | 1103 | if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) || assumelock) { |
91447636 | 1104 | return EWOULDBLOCK; |
3a60a9f5 | 1105 | } |
91447636 A |
1106 | sbunlock(&so->so_snd, 1); |
1107 | error = sbwait(&so->so_snd); | |
1108 | if (error) { | |
1109 | return error; | |
1110 | } | |
1111 | goto restart; | |
1112 | } | |
1113 | ||
1114 | return 0; | |
1115 | } | |
1116 | ||
1c79356b A |
1117 | /* |
1118 | * Send on a socket. | |
1119 | * If send must go all at once and message is larger than | |
1120 | * send buffering, then hard error. | |
1121 | * Lock against other senders. | |
1122 | * If must go all at once and not enough room now, then | |
1123 | * inform user that this would block and do nothing. | |
1124 | * Otherwise, if nonblocking, send as much as possible. | |
1125 | * The data to be sent is described by "uio" if nonzero, | |
1126 | * otherwise by the mbuf chain "top" (which must be null | |
1127 | * if uio is not). Data provided in mbuf chain must be small | |
1128 | * enough to send all at once. | |
1129 | * | |
1130 | * Returns nonzero on error, timeout or signal; callers | |
1131 | * must check for short counts if EINTR/ERESTART are returned. | |
1132 | * Data and control buffers are freed on return. | |
1133 | * Experiment: | |
1134 | * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf | |
1135 | * MSG_SEND: go thru as for MSG_HOLD on current fragment, then | |
1136 | * point at the mbuf chain being constructed and go from there. | |
1137 | */ | |
1138 | int | |
1139 | sosend(so, addr, uio, top, control, flags) | |
1140 | register struct socket *so; | |
1141 | struct sockaddr *addr; | |
1142 | struct uio *uio; | |
1143 | struct mbuf *top; | |
1144 | struct mbuf *control; | |
1145 | int flags; | |
1146 | ||
1147 | { | |
1148 | struct mbuf **mp; | |
fa4905b1 | 1149 | register struct mbuf *m, *freelist = NULL; |
1c79356b | 1150 | register long space, len, resid; |
91447636 | 1151 | int clen = 0, error, dontroute, mlen, sendflags; |
1c79356b | 1152 | int atomic = sosendallatonce(so) || top; |
91447636 | 1153 | int sblocked = 0; |
1c79356b | 1154 | struct proc *p = current_proc(); |
1c79356b A |
1155 | |
1156 | if (uio) | |
91447636 A |
1157 | // LP64todo - fix this! |
1158 | resid = uio_resid(uio); | |
1c79356b A |
1159 | else |
1160 | resid = top->m_pkthdr.len; | |
1161 | ||
1162 | KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START), | |
1163 | so, | |
1164 | resid, | |
1165 | so->so_snd.sb_cc, | |
1166 | so->so_snd.sb_lowat, | |
1167 | so->so_snd.sb_hiwat); | |
1168 | ||
91447636 A |
1169 | socket_lock(so, 1); |
1170 | ||
1c79356b A |
1171 | /* |
1172 | * In theory resid should be unsigned. | |
1173 | * However, space must be signed, as it might be less than 0 | |
1174 | * if we over-committed, and we must use a signed comparison | |
1175 | * of space and resid. On the other hand, a negative resid | |
1176 | * causes us to loop sending 0-length segments to the protocol. | |
1177 | * | |
1178 | * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM | |
1179 | * type sockets since that's an error. | |
1180 | */ | |
91447636 | 1181 | if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { |
1c79356b | 1182 | error = EINVAL; |
91447636 | 1183 | socket_unlock(so, 1); |
1c79356b A |
1184 | goto out; |
1185 | } | |
1186 | ||
1187 | dontroute = | |
1188 | (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && | |
1189 | (so->so_proto->pr_flags & PR_ATOMIC); | |
1190 | if (p) | |
1191 | p->p_stats->p_ru.ru_msgsnd++; | |
1192 | if (control) | |
1193 | clen = control->m_len; | |
1c79356b | 1194 | |
1c79356b | 1195 | do { |
91447636 A |
1196 | error = sosendcheck(so, addr, resid, clen, atomic, flags, &sblocked); |
1197 | if (error) { | |
3a60a9f5 | 1198 | goto release; |
1c79356b | 1199 | } |
1c79356b | 1200 | mp = ⊤ |
91447636 | 1201 | space = sbspace(&so->so_snd) - clen + ((flags & MSG_OOB) ? 1024 : 0); |
fa4905b1 | 1202 | |
1c79356b | 1203 | do { |
fa4905b1 | 1204 | |
91447636 A |
1205 | if (uio == NULL) { |
1206 | /* | |
1207 | * Data is prepackaged in "top". | |
1208 | */ | |
1209 | resid = 0; | |
1c79356b A |
1210 | if (flags & MSG_EOR) |
1211 | top->m_flags |= M_EOR; | |
91447636 A |
1212 | } else { |
1213 | int chainlength; | |
1214 | int bytes_to_copy; | |
1215 | ||
1216 | bytes_to_copy = min(resid, space); | |
1217 | ||
1218 | if (sosendminchain > 0) { | |
1219 | chainlength = 0; | |
1220 | } else | |
1221 | chainlength = sosendmaxchain; | |
1222 | ||
1223 | socket_unlock(so, 0); | |
1224 | ||
1225 | do { | |
1226 | int num_needed; | |
1227 | int hdrs_needed = (top == 0) ? 1 : 0; | |
1228 | ||
1229 | /* | |
1230 | * try to maintain a local cache of mbuf clusters needed to complete this write | |
1231 | * the list is further limited to the number that are currently needed to fill the socket | |
1232 | * this mechanism allows a large number of mbufs/clusters to be grabbed under a single | |
1233 | * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs | |
1234 | * if we fail early (or miscalcluate the number needed) make sure to release any clusters | |
1235 | * we haven't yet consumed. | |
1236 | */ | |
1237 | if (freelist == NULL && bytes_to_copy > MCLBYTES) { | |
1238 | num_needed = bytes_to_copy / NBPG; | |
1239 | ||
1240 | if ((bytes_to_copy - (num_needed * NBPG)) >= MINCLSIZE) | |
1241 | num_needed++; | |
1242 | ||
1243 | freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, NBPG); | |
1244 | /* Fall back to cluster size if allocation failed */ | |
1245 | } | |
1246 | ||
1247 | if (freelist == NULL && bytes_to_copy > MINCLSIZE) { | |
1248 | num_needed = bytes_to_copy / MCLBYTES; | |
1249 | ||
1250 | if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE) | |
1251 | num_needed++; | |
1252 | ||
1253 | freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, MCLBYTES); | |
1254 | /* Fall back to a single mbuf if allocation failed */ | |
1255 | } | |
1256 | ||
1257 | if (freelist == NULL) { | |
1258 | if (top == 0) | |
1259 | MGETHDR(freelist, M_WAIT, MT_DATA); | |
1260 | else | |
1261 | MGET(freelist, M_WAIT, MT_DATA); | |
1262 | ||
1263 | if (freelist == NULL) { | |
1264 | error = ENOBUFS; | |
1265 | socket_lock(so, 0); | |
3a60a9f5 | 1266 | goto release; |
91447636 A |
1267 | } |
1268 | /* | |
1269 | * For datagram protocols, leave room | |
1270 | * for protocol headers in first mbuf. | |
1271 | */ | |
1272 | if (atomic && top == 0 && bytes_to_copy < MHLEN) | |
1273 | MH_ALIGN(freelist, bytes_to_copy); | |
1274 | } | |
1275 | m = freelist; | |
1276 | freelist = m->m_next; | |
1277 | m->m_next = NULL; | |
1278 | ||
1279 | if ((m->m_flags & M_EXT)) | |
1280 | mlen = m->m_ext.ext_size; | |
1281 | else if ((m->m_flags & M_PKTHDR)) | |
1282 | mlen = MHLEN - m_leadingspace(m); | |
1283 | else | |
1284 | mlen = MLEN; | |
1285 | len = min(mlen, bytes_to_copy); | |
1286 | ||
1287 | chainlength += len; | |
1288 | ||
1289 | space -= len; | |
fa4905b1 | 1290 | |
91447636 A |
1291 | error = uiomove(mtod(m, caddr_t), (int)len, uio); |
1292 | ||
1293 | // LP64todo - fix this! | |
1294 | resid = uio_resid(uio); | |
1295 | ||
1296 | m->m_len = len; | |
1297 | *mp = m; | |
1298 | top->m_pkthdr.len += len; | |
1299 | if (error) | |
1300 | break; | |
1301 | mp = &m->m_next; | |
1302 | if (resid <= 0) { | |
1303 | if (flags & MSG_EOR) | |
1304 | top->m_flags |= M_EOR; | |
1305 | break; | |
1306 | } | |
1307 | bytes_to_copy = min(resid, space); | |
1308 | ||
1309 | } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE)); | |
1310 | ||
1311 | socket_lock(so, 0); | |
1312 | ||
1313 | if (error) | |
1314 | goto release; | |
1315 | } | |
1c79356b A |
1316 | |
1317 | if (flags & (MSG_HOLD|MSG_SEND)) | |
3a60a9f5 A |
1318 | { |
1319 | /* Enqueue for later, go away if HOLD */ | |
1320 | register struct mbuf *mb1; | |
1321 | if (so->so_temp && (flags & MSG_FLUSH)) | |
1322 | { | |
1323 | m_freem(so->so_temp); | |
1324 | so->so_temp = NULL; | |
1325 | } | |
1326 | if (so->so_temp) | |
1327 | so->so_tail->m_next = top; | |
1328 | else | |
1329 | so->so_temp = top; | |
1330 | mb1 = top; | |
1331 | while (mb1->m_next) | |
1332 | mb1 = mb1->m_next; | |
1333 | so->so_tail = mb1; | |
1334 | if (flags & MSG_HOLD) | |
1335 | { | |
1336 | top = NULL; | |
1337 | goto release; | |
1338 | } | |
1339 | top = so->so_temp; | |
1c79356b A |
1340 | } |
1341 | if (dontroute) | |
1342 | so->so_options |= SO_DONTROUTE; | |
1c79356b A |
1343 | /* Compute flags here, for pru_send and NKEs */ |
1344 | sendflags = (flags & MSG_OOB) ? PRUS_OOB : | |
1345 | /* | |
1346 | * If the user set MSG_EOF, the protocol | |
1347 | * understands this flag and nothing left to | |
1348 | * send then use PRU_SEND_EOF instead of PRU_SEND. | |
1349 | */ | |
1350 | ((flags & MSG_EOF) && | |
1351 | (so->so_proto->pr_flags & PR_IMPLOPCL) && | |
1352 | (resid <= 0)) ? | |
1353 | PRUS_EOF : | |
1354 | /* If there is more to send set PRUS_MORETOCOME */ | |
1355 | (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0; | |
91447636 A |
1356 | |
1357 | /* | |
1358 | * Socket filter processing | |
1359 | */ | |
1360 | { | |
1361 | struct socket_filter_entry *filter; | |
1362 | int filtered; | |
1363 | ||
1364 | filtered = 0; | |
1365 | error = 0; | |
1366 | for (filter = so->so_filt; filter && (error == 0); | |
1367 | filter = filter->sfe_next_onsocket) { | |
1368 | if (filter->sfe_filter->sf_filter.sf_data_out) { | |
1369 | int so_flags = 0; | |
1370 | if (filtered == 0) { | |
1371 | filtered = 1; | |
3a60a9f5 | 1372 | so->so_send_filt_thread = current_thread(); |
ff6e181a | 1373 | sflt_use(so); |
91447636 A |
1374 | socket_unlock(so, 0); |
1375 | so_flags = (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0; | |
1376 | } | |
1377 | error = filter->sfe_filter->sf_filter.sf_data_out( | |
1378 | filter->sfe_cookie, so, addr, &top, &control, so_flags); | |
1379 | } | |
1380 | } | |
1381 | ||
1382 | if (filtered) { | |
1383 | /* | |
1384 | * At this point, we've run at least one filter. | |
1385 | * The socket is unlocked as is the socket buffer. | |
1386 | */ | |
1387 | socket_lock(so, 0); | |
ff6e181a | 1388 | sflt_unuse(so); |
3a60a9f5 | 1389 | so->so_send_filt_thread = 0; |
91447636 | 1390 | if (error) { |
3a60a9f5 A |
1391 | if (error == EJUSTRETURN) { |
1392 | error = 0; | |
1393 | clen = 0; | |
1394 | control = 0; | |
1395 | top = 0; | |
91447636 | 1396 | } |
3a60a9f5 A |
1397 | |
1398 | goto release; | |
1c79356b | 1399 | } |
1c79356b A |
1400 | } |
1401 | } | |
91447636 A |
1402 | /* |
1403 | * End Socket filter processing | |
1404 | */ | |
1405 | ||
1406 | if (error == EJUSTRETURN) { | |
1407 | /* A socket filter handled this data */ | |
1408 | error = 0; | |
1409 | } | |
1410 | else { | |
1411 | error = (*so->so_proto->pr_usrreqs->pru_send)(so, | |
1412 | sendflags, top, addr, control, p); | |
1413 | } | |
9bccf70c | 1414 | #ifdef __APPLE__ |
1c79356b A |
1415 | if (flags & MSG_SEND) |
1416 | so->so_temp = NULL; | |
9bccf70c | 1417 | #endif |
1c79356b A |
1418 | if (dontroute) |
1419 | so->so_options &= ~SO_DONTROUTE; | |
1420 | clen = 0; | |
1421 | control = 0; | |
1422 | top = 0; | |
1423 | mp = ⊤ | |
1424 | if (error) | |
1425 | goto release; | |
1426 | } while (resid && space > 0); | |
1427 | } while (resid); | |
1428 | ||
1429 | release: | |
3a60a9f5 A |
1430 | if (sblocked) |
1431 | sbunlock(&so->so_snd, 0); /* will unlock socket */ | |
1432 | else | |
1433 | socket_unlock(so, 1); | |
1c79356b A |
1434 | out: |
1435 | if (top) | |
1436 | m_freem(top); | |
1437 | if (control) | |
1438 | m_freem(control); | |
fa4905b1 A |
1439 | if (freelist) |
1440 | m_freem_list(freelist); | |
1c79356b A |
1441 | |
1442 | KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END, | |
1443 | so, | |
1444 | resid, | |
1445 | so->so_snd.sb_cc, | |
1446 | space, | |
1447 | error); | |
1448 | ||
1449 | return (error); | |
1450 | } | |
1451 | ||
1452 | /* | |
1453 | * Implement receive operations on a socket. | |
1454 | * We depend on the way that records are added to the sockbuf | |
1455 | * by sbappend*. In particular, each record (mbufs linked through m_next) | |
1456 | * must begin with an address if the protocol so specifies, | |
1457 | * followed by an optional mbuf or mbufs containing ancillary data, | |
1458 | * and then zero or more mbufs of data. | |
1459 | * In order to avoid blocking network interrupts for the entire time here, | |
1460 | * we splx() while doing the actual copy to user space. | |
1461 | * Although the sockbuf is locked, new data may still be appended, | |
1462 | * and thus we must maintain consistency of the sockbuf during that time. | |
1463 | * | |
1464 | * The caller may receive the data as a single mbuf chain by supplying | |
1465 | * an mbuf **mp0 for use in returning the chain. The uio is then used | |
1466 | * only for the count in uio_resid. | |
1467 | */ | |
1468 | int | |
1469 | soreceive(so, psa, uio, mp0, controlp, flagsp) | |
1470 | register struct socket *so; | |
1471 | struct sockaddr **psa; | |
1472 | struct uio *uio; | |
1473 | struct mbuf **mp0; | |
1474 | struct mbuf **controlp; | |
1475 | int *flagsp; | |
1476 | { | |
91447636 A |
1477 | register struct mbuf *m, **mp, *ml = NULL; |
1478 | register int flags, len, error, offset; | |
1c79356b A |
1479 | struct protosw *pr = so->so_proto; |
1480 | struct mbuf *nextrecord; | |
1481 | int moff, type = 0; | |
91447636 A |
1482 | // LP64todo - fix this! |
1483 | int orig_resid = uio_resid(uio); | |
55e303ae A |
1484 | volatile struct mbuf *free_list; |
1485 | volatile int delayed_copy_len; | |
1486 | int can_delay; | |
1487 | int need_event; | |
1488 | struct proc *p = current_proc(); | |
1489 | ||
1490 | ||
91447636 | 1491 | // LP64todo - fix this! |
1c79356b A |
1492 | KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START, |
1493 | so, | |
91447636 | 1494 | uio_resid(uio), |
1c79356b A |
1495 | so->so_rcv.sb_cc, |
1496 | so->so_rcv.sb_lowat, | |
1497 | so->so_rcv.sb_hiwat); | |
1498 | ||
91447636 | 1499 | socket_lock(so, 1); |
1c79356b | 1500 | |
91447636 A |
1501 | #ifdef MORE_LOCKING_DEBUG |
1502 | if (so->so_usecount == 1) | |
1503 | panic("soreceive: so=%x no other reference on socket\n", so); | |
1504 | #endif | |
1c79356b A |
1505 | mp = mp0; |
1506 | if (psa) | |
1507 | *psa = 0; | |
1508 | if (controlp) | |
1509 | *controlp = 0; | |
1510 | if (flagsp) | |
1511 | flags = *flagsp &~ MSG_EOR; | |
1512 | else | |
1513 | flags = 0; | |
1514 | /* | |
1515 | * When SO_WANTOOBFLAG is set we try to get out-of-band data | |
1516 | * regardless of the flags argument. Here is the case were | |
1517 | * out-of-band data is not inline. | |
1518 | */ | |
1519 | if ((flags & MSG_OOB) || | |
1520 | ((so->so_options & SO_WANTOOBFLAG) != 0 && | |
1521 | (so->so_options & SO_OOBINLINE) == 0 && | |
1522 | (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) { | |
1523 | m = m_get(M_WAIT, MT_DATA); | |
55e303ae | 1524 | if (m == NULL) { |
91447636 | 1525 | socket_unlock(so, 1); |
55e303ae | 1526 | KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, ENOBUFS,0,0,0,0); |
9bccf70c | 1527 | return (ENOBUFS); |
55e303ae | 1528 | } |
1c79356b A |
1529 | error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); |
1530 | if (error) | |
1531 | goto bad; | |
91447636 | 1532 | socket_unlock(so, 0); |
1c79356b | 1533 | do { |
91447636 | 1534 | // LP64todo - fix this! |
1c79356b | 1535 | error = uiomove(mtod(m, caddr_t), |
91447636 | 1536 | (int) min(uio_resid(uio), m->m_len), uio); |
1c79356b | 1537 | m = m_free(m); |
91447636 A |
1538 | } while (uio_resid(uio) && error == 0 && m); |
1539 | socket_lock(so, 0); | |
1c79356b A |
1540 | bad: |
1541 | if (m) | |
1542 | m_freem(m); | |
9bccf70c A |
1543 | #ifdef __APPLE__ |
1544 | if ((so->so_options & SO_WANTOOBFLAG) != 0) { | |
1545 | if (error == EWOULDBLOCK || error == EINVAL) { | |
1546 | /* | |
1547 | * Let's try to get normal data: | |
1548 | * EWOULDBLOCK: out-of-band data not receive yet; | |
1549 | * EINVAL: out-of-band data already read. | |
1550 | */ | |
1551 | error = 0; | |
1552 | goto nooob; | |
1553 | } else if (error == 0 && flagsp) | |
1554 | *flagsp |= MSG_OOB; | |
91447636 A |
1555 | } |
1556 | socket_unlock(so, 1); | |
1c79356b | 1557 | KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0); |
9bccf70c | 1558 | #endif |
1c79356b A |
1559 | return (error); |
1560 | } | |
1561 | nooob: | |
1562 | if (mp) | |
1563 | *mp = (struct mbuf *)0; | |
91447636 | 1564 | if (so->so_state & SS_ISCONFIRMING && uio_resid(uio)) |
1c79356b A |
1565 | (*pr->pr_usrreqs->pru_rcvd)(so, 0); |
1566 | ||
55e303ae A |
1567 | |
1568 | free_list = (struct mbuf *)0; | |
1569 | delayed_copy_len = 0; | |
1c79356b | 1570 | restart: |
91447636 A |
1571 | #ifdef MORE_LOCKING_DEBUG |
1572 | if (so->so_usecount <= 1) | |
1573 | printf("soreceive: sblock so=%x ref=%d on socket\n", so, so->so_usecount); | |
1574 | #endif | |
9bccf70c A |
1575 | error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); |
1576 | if (error) { | |
91447636 | 1577 | socket_unlock(so, 1); |
1c79356b A |
1578 | KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0); |
1579 | return (error); | |
1580 | } | |
1c79356b A |
1581 | |
1582 | m = so->so_rcv.sb_mb; | |
1583 | /* | |
1584 | * If we have less data than requested, block awaiting more | |
1585 | * (subject to any timeout) if: | |
1586 | * 1. the current count is less than the low water mark, or | |
1587 | * 2. MSG_WAITALL is set, and it is possible to do the entire | |
1588 | * receive operation at once if we block (resid <= hiwat). | |
1589 | * 3. MSG_DONTWAIT is not set | |
1590 | * If MSG_WAITALL is set but resid is larger than the receive buffer, | |
1591 | * we have to do the receive in sections, and thus risk returning | |
1592 | * a short count if a timeout or signal occurs after we start. | |
1593 | */ | |
1594 | if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && | |
91447636 | 1595 | so->so_rcv.sb_cc < uio_resid(uio)) && |
55e303ae | 1596 | (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || |
91447636 | 1597 | ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat)) && |
1c79356b | 1598 | m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { |
55e303ae | 1599 | |
1c79356b A |
1600 | KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1")); |
1601 | if (so->so_error) { | |
1602 | if (m) | |
1603 | goto dontblock; | |
1604 | error = so->so_error; | |
1605 | if ((flags & MSG_PEEK) == 0) | |
1606 | so->so_error = 0; | |
1607 | goto release; | |
1608 | } | |
1609 | if (so->so_state & SS_CANTRCVMORE) { | |
1610 | if (m) | |
1611 | goto dontblock; | |
1612 | else | |
1613 | goto release; | |
1614 | } | |
1615 | for (; m; m = m->m_next) | |
1616 | if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { | |
1617 | m = so->so_rcv.sb_mb; | |
1618 | goto dontblock; | |
1619 | } | |
1620 | if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && | |
1621 | (so->so_proto->pr_flags & PR_CONNREQUIRED)) { | |
1622 | error = ENOTCONN; | |
1623 | goto release; | |
1624 | } | |
91447636 | 1625 | if (uio_resid(uio) == 0) |
1c79356b | 1626 | goto release; |
91447636 | 1627 | if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) { |
1c79356b A |
1628 | error = EWOULDBLOCK; |
1629 | goto release; | |
1630 | } | |
91447636 A |
1631 | sbunlock(&so->so_rcv, 1); |
1632 | #ifdef EVEN_MORE_LOCKING_DEBUG | |
1c79356b A |
1633 | if (socket_debug) |
1634 | printf("Waiting for socket data\n"); | |
91447636 | 1635 | #endif |
55e303ae | 1636 | |
1c79356b | 1637 | error = sbwait(&so->so_rcv); |
91447636 | 1638 | #ifdef EVEN_MORE_LOCKING_DEBUG |
1c79356b A |
1639 | if (socket_debug) |
1640 | printf("SORECEIVE - sbwait returned %d\n", error); | |
91447636 A |
1641 | #endif |
1642 | if (so->so_usecount < 1) | |
1643 | panic("soreceive: after 2nd sblock so=%x ref=%d on socket\n", so, so->so_usecount); | |
9bccf70c | 1644 | if (error) { |
91447636 | 1645 | socket_unlock(so, 1); |
1c79356b A |
1646 | KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0); |
1647 | return (error); | |
1648 | } | |
1649 | goto restart; | |
1650 | } | |
1651 | dontblock: | |
9bccf70c | 1652 | #ifndef __APPLE__ |
1c79356b A |
1653 | if (uio->uio_procp) |
1654 | uio->uio_procp->p_stats->p_ru.ru_msgrcv++; | |
55e303ae A |
1655 | #else /* __APPLE__ */ |
1656 | /* | |
1657 | * 2207985 | |
1658 | * This should be uio->uio-procp; however, some callers of this | |
1659 | * function use auto variables with stack garbage, and fail to | |
1660 | * fill out the uio structure properly. | |
1661 | */ | |
1662 | if (p) | |
1663 | p->p_stats->p_ru.ru_msgrcv++; | |
1664 | #endif /* __APPLE__ */ | |
1c79356b A |
1665 | nextrecord = m->m_nextpkt; |
1666 | if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) { | |
1667 | KASSERT(m->m_type == MT_SONAME, ("receive 1a")); | |
1668 | orig_resid = 0; | |
4a249263 | 1669 | if (psa) { |
1c79356b A |
1670 | *psa = dup_sockaddr(mtod(m, struct sockaddr *), |
1671 | mp0 == 0); | |
4a249263 A |
1672 | if ((*psa == 0) && (flags & MSG_NEEDSA)) { |
1673 | error = EWOULDBLOCK; | |
1674 | goto release; | |
1675 | } | |
1676 | } | |
1c79356b A |
1677 | if (flags & MSG_PEEK) { |
1678 | m = m->m_next; | |
1679 | } else { | |
1680 | sbfree(&so->so_rcv, m); | |
91447636 A |
1681 | if (m->m_next == 0 && so->so_rcv.sb_cc != 0) |
1682 | panic("soreceive: about to create invalid socketbuf"); | |
1c79356b A |
1683 | MFREE(m, so->so_rcv.sb_mb); |
1684 | m = so->so_rcv.sb_mb; | |
1685 | } | |
1686 | } | |
1687 | while (m && m->m_type == MT_CONTROL && error == 0) { | |
1688 | if (flags & MSG_PEEK) { | |
1689 | if (controlp) | |
1690 | *controlp = m_copy(m, 0, m->m_len); | |
1691 | m = m->m_next; | |
1692 | } else { | |
1693 | sbfree(&so->so_rcv, m); | |
1694 | if (controlp) { | |
1695 | if (pr->pr_domain->dom_externalize && | |
1696 | mtod(m, struct cmsghdr *)->cmsg_type == | |
91447636 A |
1697 | SCM_RIGHTS) { |
1698 | socket_unlock(so, 0); /* release socket lock: see 3903171 */ | |
1c79356b | 1699 | error = (*pr->pr_domain->dom_externalize)(m); |
91447636 A |
1700 | socket_lock(so, 0); |
1701 | } | |
1c79356b | 1702 | *controlp = m; |
91447636 A |
1703 | if (m->m_next == 0 && so->so_rcv.sb_cc != 0) |
1704 | panic("soreceive: so->so_rcv.sb_mb->m_next == 0 && so->so_rcv.sb_cc != 0"); | |
1c79356b A |
1705 | so->so_rcv.sb_mb = m->m_next; |
1706 | m->m_next = 0; | |
1707 | m = so->so_rcv.sb_mb; | |
1708 | } else { | |
1709 | MFREE(m, so->so_rcv.sb_mb); | |
1710 | m = so->so_rcv.sb_mb; | |
1711 | } | |
1712 | } | |
1713 | if (controlp) { | |
1714 | orig_resid = 0; | |
1715 | controlp = &(*controlp)->m_next; | |
1716 | } | |
1717 | } | |
1718 | if (m) { | |
1719 | if ((flags & MSG_PEEK) == 0) | |
1720 | m->m_nextpkt = nextrecord; | |
1721 | type = m->m_type; | |
1722 | if (type == MT_OOBDATA) | |
1723 | flags |= MSG_OOB; | |
1724 | } | |
1725 | moff = 0; | |
1726 | offset = 0; | |
fa4905b1 | 1727 | |
91447636 | 1728 | if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy) |
55e303ae A |
1729 | can_delay = 1; |
1730 | else | |
1731 | can_delay = 0; | |
1732 | ||
1733 | need_event = 0; | |
fa4905b1 | 1734 | |
91447636 | 1735 | while (m && (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) { |
1c79356b A |
1736 | if (m->m_type == MT_OOBDATA) { |
1737 | if (type != MT_OOBDATA) | |
1738 | break; | |
1739 | } else if (type == MT_OOBDATA) | |
1740 | break; | |
9bccf70c | 1741 | #ifndef __APPLE__ |
1c79356b A |
1742 | /* |
1743 | * This assertion needs rework. The trouble is Appletalk is uses many | |
1744 | * mbuf types (NOT listed in mbuf.h!) which will trigger this panic. | |
1745 | * For now just remove the assertion... CSM 9/98 | |
1746 | */ | |
1747 | else | |
1748 | KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER, | |
1749 | ("receive 3")); | |
9bccf70c A |
1750 | #else |
1751 | /* | |
1752 | * Make sure to allways set MSG_OOB event when getting | |
1753 | * out of band data inline. | |
1754 | */ | |
1c79356b | 1755 | if ((so->so_options & SO_WANTOOBFLAG) != 0 && |
9bccf70c A |
1756 | (so->so_options & SO_OOBINLINE) != 0 && |
1757 | (so->so_state & SS_RCVATMARK) != 0) { | |
1758 | flags |= MSG_OOB; | |
1759 | } | |
1760 | #endif | |
1c79356b | 1761 | so->so_state &= ~SS_RCVATMARK; |
91447636 A |
1762 | // LP64todo - fix this! |
1763 | len = uio_resid(uio) - delayed_copy_len; | |
1c79356b A |
1764 | if (so->so_oobmark && len > so->so_oobmark - offset) |
1765 | len = so->so_oobmark - offset; | |
1766 | if (len > m->m_len - moff) | |
1767 | len = m->m_len - moff; | |
1768 | /* | |
1769 | * If mp is set, just pass back the mbufs. | |
1770 | * Otherwise copy them out via the uio, then free. | |
1771 | * Sockbuf must be consistent here (points to current mbuf, | |
1772 | * it points to next record) when we drop priority; | |
1773 | * we must note any additions to the sockbuf when we | |
1774 | * block interrupts again. | |
1775 | */ | |
1776 | if (mp == 0) { | |
55e303ae A |
1777 | if (can_delay && len == m->m_len) { |
1778 | /* | |
1779 | * only delay the copy if we're consuming the | |
1780 | * mbuf and we're NOT in MSG_PEEK mode | |
1781 | * and we have enough data to make it worthwile | |
1782 | * to drop and retake the funnel... can_delay | |
1783 | * reflects the state of the 2 latter constraints | |
1784 | * moff should always be zero in these cases | |
1785 | */ | |
1786 | delayed_copy_len += len; | |
1787 | } else { | |
55e303ae A |
1788 | |
1789 | if (delayed_copy_len) { | |
91447636 | 1790 | error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len); |
55e303ae A |
1791 | |
1792 | if (error) { | |
55e303ae A |
1793 | goto release; |
1794 | } | |
1795 | if (m != so->so_rcv.sb_mb) { | |
1796 | /* | |
1797 | * can only get here if MSG_PEEK is not set | |
1798 | * therefore, m should point at the head of the rcv queue... | |
1799 | * if it doesn't, it means something drastically changed | |
1800 | * while we were out from behind the funnel in sodelayed_copy... | |
1801 | * perhaps a RST on the stream... in any event, the stream has | |
1802 | * been interrupted... it's probably best just to return | |
1803 | * whatever data we've moved and let the caller sort it out... | |
1804 | */ | |
1805 | break; | |
1806 | } | |
1807 | } | |
91447636 | 1808 | socket_unlock(so, 0); |
55e303ae | 1809 | error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); |
91447636 | 1810 | socket_lock(so, 0); |
55e303ae | 1811 | |
55e303ae A |
1812 | if (error) |
1813 | goto release; | |
1814 | } | |
1c79356b | 1815 | } else |
91447636 | 1816 | uio_setresid(uio, (uio_resid(uio) - len)); |
55e303ae | 1817 | |
1c79356b A |
1818 | if (len == m->m_len - moff) { |
1819 | if (m->m_flags & M_EOR) | |
1820 | flags |= MSG_EOR; | |
1821 | if (flags & MSG_PEEK) { | |
1822 | m = m->m_next; | |
1823 | moff = 0; | |
1824 | } else { | |
1825 | nextrecord = m->m_nextpkt; | |
1826 | sbfree(&so->so_rcv, m); | |
91447636 | 1827 | m->m_nextpkt = NULL; |
55e303ae | 1828 | |
1c79356b A |
1829 | if (mp) { |
1830 | *mp = m; | |
1831 | mp = &m->m_next; | |
1832 | so->so_rcv.sb_mb = m = m->m_next; | |
1833 | *mp = (struct mbuf *)0; | |
1834 | } else { | |
55e303ae A |
1835 | if (free_list == NULL) |
1836 | free_list = m; | |
1837 | else | |
14353aa8 A |
1838 | ml->m_next = m; |
1839 | ml = m; | |
1840 | so->so_rcv.sb_mb = m = m->m_next; | |
1841 | ml->m_next = 0; | |
1c79356b A |
1842 | } |
1843 | if (m) | |
1844 | m->m_nextpkt = nextrecord; | |
1845 | } | |
1846 | } else { | |
1847 | if (flags & MSG_PEEK) | |
1848 | moff += len; | |
1849 | else { | |
1850 | if (mp) | |
1851 | *mp = m_copym(m, 0, len, M_WAIT); | |
1852 | m->m_data += len; | |
1853 | m->m_len -= len; | |
1854 | so->so_rcv.sb_cc -= len; | |
1855 | } | |
1856 | } | |
1857 | if (so->so_oobmark) { | |
1858 | if ((flags & MSG_PEEK) == 0) { | |
1859 | so->so_oobmark -= len; | |
1860 | if (so->so_oobmark == 0) { | |
1861 | so->so_state |= SS_RCVATMARK; | |
55e303ae A |
1862 | /* |
1863 | * delay posting the actual event until after | |
1864 | * any delayed copy processing has finished | |
1865 | */ | |
1866 | need_event = 1; | |
1c79356b A |
1867 | break; |
1868 | } | |
1869 | } else { | |
1870 | offset += len; | |
1871 | if (offset == so->so_oobmark) | |
1872 | break; | |
1873 | } | |
1874 | } | |
91447636 | 1875 | if (flags & MSG_EOR) |
1c79356b A |
1876 | break; |
1877 | /* | |
55e303ae | 1878 | * If the MSG_WAITALL or MSG_WAITSTREAM flag is set (for non-atomic socket), |
1c79356b A |
1879 | * we must not quit until "uio->uio_resid == 0" or an error |
1880 | * termination. If a signal/timeout occurs, return | |
1881 | * with a short count but without error. | |
1882 | * Keep sockbuf locked against other readers. | |
1883 | */ | |
91447636 | 1884 | while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && (uio_resid(uio) - delayed_copy_len) > 0 && |
1c79356b A |
1885 | !sosendallatonce(so) && !nextrecord) { |
1886 | if (so->so_error || so->so_state & SS_CANTRCVMORE) | |
55e303ae | 1887 | goto release; |
fa4905b1 | 1888 | |
91447636 | 1889 | if (pr->pr_flags & PR_WANTRCVD && so->so_pcb && (((struct inpcb *)so->so_pcb)->inp_state != INPCB_STATE_DEAD)) |
55e303ae A |
1890 | (*pr->pr_usrreqs->pru_rcvd)(so, flags); |
1891 | if (sbwait(&so->so_rcv)) { | |
1892 | error = 0; | |
1893 | goto release; | |
fa4905b1 | 1894 | } |
55e303ae A |
1895 | /* |
1896 | * have to wait until after we get back from the sbwait to do the copy because | |
1897 | * we will drop the funnel if we have enough data that has been delayed... by dropping | |
1898 | * the funnel we open up a window allowing the netisr thread to process the incoming packets | |
1899 | * and to change the state of this socket... we're issuing the sbwait because | |
1900 | * the socket is empty and we're expecting the netisr thread to wake us up when more | |
1901 | * packets arrive... if we allow that processing to happen and then sbwait, we | |
1902 | * could stall forever with packets sitting in the socket if no further packets | |
1903 | * arrive from the remote side. | |
1904 | * | |
1905 | * we want to copy before we've collected all the data to satisfy this request to | |
1906 | * allow the copy to overlap the incoming packet processing on an MP system | |
1907 | */ | |
1908 | if (delayed_copy_len > sorecvmincopy && (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) { | |
1909 | ||
91447636 | 1910 | error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len); |
55e303ae A |
1911 | |
1912 | if (error) | |
1913 | goto release; | |
1c79356b A |
1914 | } |
1915 | m = so->so_rcv.sb_mb; | |
fa4905b1 | 1916 | if (m) { |
1c79356b | 1917 | nextrecord = m->m_nextpkt; |
fa4905b1 | 1918 | } |
1c79356b A |
1919 | } |
1920 | } | |
91447636 A |
1921 | #ifdef MORE_LOCKING_DEBUG |
1922 | if (so->so_usecount <= 1) | |
1923 | panic("soreceive: after big while so=%x ref=%d on socket\n", so, so->so_usecount); | |
1924 | #endif | |
1c79356b A |
1925 | |
1926 | if (m && pr->pr_flags & PR_ATOMIC) { | |
9bccf70c | 1927 | #ifdef __APPLE__ |
1c79356b A |
1928 | if (so->so_options & SO_DONTTRUNC) |
1929 | flags |= MSG_RCVMORE; | |
9bccf70c A |
1930 | else { |
1931 | #endif | |
1932 | flags |= MSG_TRUNC; | |
1c79356b A |
1933 | if ((flags & MSG_PEEK) == 0) |
1934 | (void) sbdroprecord(&so->so_rcv); | |
9bccf70c | 1935 | #ifdef __APPLE__ |
1c79356b | 1936 | } |
9bccf70c | 1937 | #endif |
1c79356b A |
1938 | } |
1939 | if ((flags & MSG_PEEK) == 0) { | |
1940 | if (m == 0) | |
1941 | so->so_rcv.sb_mb = nextrecord; | |
1942 | if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) | |
1943 | (*pr->pr_usrreqs->pru_rcvd)(so, flags); | |
1944 | } | |
9bccf70c | 1945 | #ifdef __APPLE__ |
1c79356b A |
1946 | if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0) |
1947 | flags |= MSG_HAVEMORE; | |
55e303ae A |
1948 | |
1949 | if (delayed_copy_len) { | |
91447636 | 1950 | error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len); |
55e303ae A |
1951 | |
1952 | if (error) | |
1953 | goto release; | |
1954 | } | |
1955 | if (free_list) { | |
1956 | m_freem_list((struct mbuf *)free_list); | |
1957 | free_list = (struct mbuf *)0; | |
1958 | } | |
1959 | if (need_event) | |
1960 | postevent(so, 0, EV_OOB); | |
9bccf70c | 1961 | #endif |
91447636 | 1962 | if (orig_resid == uio_resid(uio) && orig_resid && |
1c79356b | 1963 | (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { |
91447636 | 1964 | sbunlock(&so->so_rcv, 1); |
1c79356b A |
1965 | goto restart; |
1966 | } | |
1967 | ||
1968 | if (flagsp) | |
1969 | *flagsp |= flags; | |
1970 | release: | |
91447636 A |
1971 | #ifdef MORE_LOCKING_DEBUG |
1972 | if (so->so_usecount <= 1) | |
1973 | panic("soreceive: release so=%x ref=%d on socket\n", so, so->so_usecount); | |
1974 | #endif | |
55e303ae | 1975 | if (delayed_copy_len) { |
91447636 | 1976 | error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len); |
55e303ae A |
1977 | } |
1978 | if (free_list) { | |
1979 | m_freem_list((struct mbuf *)free_list); | |
1980 | } | |
91447636 | 1981 | sbunlock(&so->so_rcv, 0); /* will unlock socket */ |
1c79356b | 1982 | |
91447636 | 1983 | // LP64todo - fix this! |
1c79356b A |
1984 | KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, |
1985 | so, | |
91447636 | 1986 | uio_resid(uio), |
1c79356b A |
1987 | so->so_rcv.sb_cc, |
1988 | 0, | |
1989 | error); | |
1990 | ||
1991 | return (error); | |
1992 | } | |
1993 | ||
55e303ae | 1994 | |
91447636 | 1995 | static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid) |
55e303ae A |
1996 | { |
1997 | int error = 0; | |
55e303ae A |
1998 | struct mbuf *m; |
1999 | ||
2000 | m = *free_list; | |
2001 | ||
91447636 | 2002 | socket_unlock(so, 0); |
55e303ae | 2003 | |
55e303ae A |
2004 | while (m && error == 0) { |
2005 | ||
2006 | error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio); | |
2007 | ||
2008 | m = m->m_next; | |
2009 | } | |
2010 | m_freem_list(*free_list); | |
2011 | ||
2012 | *free_list = (struct mbuf *)NULL; | |
2013 | *resid = 0; | |
2014 | ||
91447636 | 2015 | socket_lock(so, 0); |
55e303ae A |
2016 | |
2017 | return (error); | |
2018 | } | |
2019 | ||
2020 | ||
1c79356b A |
2021 | int |
2022 | soshutdown(so, how) | |
2023 | register struct socket *so; | |
4452a7af | 2024 | int how; |
1c79356b A |
2025 | { |
2026 | register struct protosw *pr = so->so_proto; | |
1c79356b A |
2027 | int ret; |
2028 | ||
91447636 A |
2029 | socket_lock(so, 1); |
2030 | ||
2031 | sflt_notify(so, sock_evt_shutdown, &how); | |
1c79356b | 2032 | |
9bccf70c | 2033 | if (how != SHUT_WR) { |
1c79356b A |
2034 | sorflush(so); |
2035 | postevent(so, 0, EV_RCLOSED); | |
2036 | } | |
9bccf70c | 2037 | if (how != SHUT_RD) { |
1c79356b A |
2038 | ret = ((*pr->pr_usrreqs->pru_shutdown)(so)); |
2039 | postevent(so, 0, EV_WCLOSED); | |
2040 | KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0); | |
91447636 | 2041 | socket_unlock(so, 1); |
1c79356b A |
2042 | return(ret); |
2043 | } | |
2044 | ||
2045 | KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0); | |
91447636 | 2046 | socket_unlock(so, 1); |
1c79356b A |
2047 | return (0); |
2048 | } | |
2049 | ||
2050 | void | |
2051 | sorflush(so) | |
2052 | register struct socket *so; | |
2053 | { | |
2054 | register struct sockbuf *sb = &so->so_rcv; | |
2055 | register struct protosw *pr = so->so_proto; | |
1c79356b | 2056 | struct sockbuf asb; |
1c79356b | 2057 | |
91447636 A |
2058 | #ifdef MORE_LOCKING_DEBUG |
2059 | lck_mtx_t * mutex_held; | |
2060 | ||
2061 | if (so->so_proto->pr_getlock != NULL) | |
2062 | mutex_held = (*so->so_proto->pr_getlock)(so, 0); | |
2063 | else | |
2064 | mutex_held = so->so_proto->pr_domain->dom_mtx; | |
2065 | lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); | |
2066 | #endif | |
2067 | ||
2068 | sflt_notify(so, sock_evt_flush_read, NULL); | |
1c79356b A |
2069 | |
2070 | sb->sb_flags |= SB_NOINTR; | |
2071 | (void) sblock(sb, M_WAIT); | |
1c79356b | 2072 | socantrcvmore(so); |
91447636 | 2073 | sbunlock(sb, 1); |
9bccf70c | 2074 | #ifdef __APPLE__ |
0b4e3aa0 | 2075 | selthreadclear(&sb->sb_sel); |
9bccf70c | 2076 | #endif |
1c79356b A |
2077 | asb = *sb; |
2078 | bzero((caddr_t)sb, sizeof (*sb)); | |
91447636 | 2079 | sb->sb_so = so; /* reestablish link to socket */ |
9bccf70c A |
2080 | if (asb.sb_flags & SB_KNOTE) { |
2081 | sb->sb_sel.si_note = asb.sb_sel.si_note; | |
2082 | sb->sb_flags = SB_KNOTE; | |
2083 | } | |
1c79356b A |
2084 | if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) |
2085 | (*pr->pr_domain->dom_dispose)(asb.sb_mb); | |
2086 | sbrelease(&asb); | |
2087 | } | |
2088 | ||
2089 | /* | |
2090 | * Perhaps this routine, and sooptcopyout(), below, ought to come in | |
2091 | * an additional variant to handle the case where the option value needs | |
2092 | * to be some kind of integer, but not a specific size. | |
2093 | * In addition to their use here, these functions are also called by the | |
2094 | * protocol-level pr_ctloutput() routines. | |
2095 | */ | |
2096 | int | |
2097 | sooptcopyin(sopt, buf, len, minlen) | |
2098 | struct sockopt *sopt; | |
2099 | void *buf; | |
2100 | size_t len; | |
2101 | size_t minlen; | |
2102 | { | |
2103 | size_t valsize; | |
2104 | ||
2105 | /* | |
2106 | * If the user gives us more than we wanted, we ignore it, | |
2107 | * but if we don't get the minimum length the caller | |
2108 | * wants, we return EINVAL. On success, sopt->sopt_valsize | |
2109 | * is set to however much we actually retrieved. | |
2110 | */ | |
2111 | if ((valsize = sopt->sopt_valsize) < minlen) | |
2112 | return EINVAL; | |
2113 | if (valsize > len) | |
2114 | sopt->sopt_valsize = valsize = len; | |
2115 | ||
2116 | if (sopt->sopt_p != 0) | |
2117 | return (copyin(sopt->sopt_val, buf, valsize)); | |
2118 | ||
91447636 | 2119 | bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize); |
1c79356b A |
2120 | return 0; |
2121 | } | |
2122 | ||
2123 | int | |
2124 | sosetopt(so, sopt) | |
2125 | struct socket *so; | |
2126 | struct sockopt *sopt; | |
2127 | { | |
2128 | int error, optval; | |
2129 | struct linger l; | |
2130 | struct timeval tv; | |
2131 | short val; | |
91447636 A |
2132 | |
2133 | socket_lock(so, 1); | |
1c79356b | 2134 | |
9bccf70c A |
2135 | if (sopt->sopt_dir != SOPT_SET) { |
2136 | sopt->sopt_dir = SOPT_SET; | |
2137 | } | |
2138 | ||
91447636 A |
2139 | { |
2140 | struct socket_filter_entry *filter; | |
2141 | int filtered = 0; | |
2142 | error = 0; | |
2143 | for (filter = so->so_filt; filter && (error == 0); | |
2144 | filter = filter->sfe_next_onsocket) { | |
2145 | if (filter->sfe_filter->sf_filter.sf_setoption) { | |
2146 | if (filtered == 0) { | |
2147 | filtered = 1; | |
2148 | sflt_use(so); | |
2149 | socket_unlock(so, 0); | |
2150 | } | |
2151 | error = filter->sfe_filter->sf_filter.sf_setoption( | |
2152 | filter->sfe_cookie, so, sopt); | |
2153 | } | |
2154 | } | |
2155 | ||
2156 | if (filtered != 0) { | |
2157 | socket_lock(so, 0); | |
2158 | sflt_unuse(so); | |
2159 | ||
2160 | if (error) { | |
2161 | if (error == EJUSTRETURN) | |
2162 | error = 0; | |
2163 | goto bad; | |
2164 | } | |
1c79356b | 2165 | } |
1c79356b A |
2166 | } |
2167 | ||
2168 | error = 0; | |
2169 | if (sopt->sopt_level != SOL_SOCKET) { | |
91447636 A |
2170 | if (so->so_proto && so->so_proto->pr_ctloutput) { |
2171 | error = (*so->so_proto->pr_ctloutput) | |
2172 | (so, sopt); | |
2173 | socket_unlock(so, 1); | |
2174 | return (error); | |
2175 | } | |
1c79356b A |
2176 | error = ENOPROTOOPT; |
2177 | } else { | |
2178 | switch (sopt->sopt_name) { | |
2179 | case SO_LINGER: | |
91447636 | 2180 | case SO_LINGER_SEC: |
1c79356b A |
2181 | error = sooptcopyin(sopt, &l, sizeof l, sizeof l); |
2182 | if (error) | |
2183 | goto bad; | |
2184 | ||
91447636 | 2185 | so->so_linger = (sopt->sopt_name == SO_LINGER) ? l.l_linger : l.l_linger * hz; |
1c79356b A |
2186 | if (l.l_onoff) |
2187 | so->so_options |= SO_LINGER; | |
2188 | else | |
2189 | so->so_options &= ~SO_LINGER; | |
2190 | break; | |
2191 | ||
2192 | case SO_DEBUG: | |
2193 | case SO_KEEPALIVE: | |
2194 | case SO_DONTROUTE: | |
2195 | case SO_USELOOPBACK: | |
2196 | case SO_BROADCAST: | |
2197 | case SO_REUSEADDR: | |
2198 | case SO_REUSEPORT: | |
2199 | case SO_OOBINLINE: | |
2200 | case SO_TIMESTAMP: | |
9bccf70c | 2201 | #ifdef __APPLE__ |
1c79356b A |
2202 | case SO_DONTTRUNC: |
2203 | case SO_WANTMORE: | |
9bccf70c A |
2204 | case SO_WANTOOBFLAG: |
2205 | #endif | |
1c79356b A |
2206 | error = sooptcopyin(sopt, &optval, sizeof optval, |
2207 | sizeof optval); | |
2208 | if (error) | |
2209 | goto bad; | |
2210 | if (optval) | |
2211 | so->so_options |= sopt->sopt_name; | |
2212 | else | |
2213 | so->so_options &= ~sopt->sopt_name; | |
2214 | break; | |
2215 | ||
2216 | case SO_SNDBUF: | |
2217 | case SO_RCVBUF: | |
2218 | case SO_SNDLOWAT: | |
2219 | case SO_RCVLOWAT: | |
2220 | error = sooptcopyin(sopt, &optval, sizeof optval, | |
2221 | sizeof optval); | |
2222 | if (error) | |
2223 | goto bad; | |
2224 | ||
2225 | /* | |
2226 | * Values < 1 make no sense for any of these | |
2227 | * options, so disallow them. | |
2228 | */ | |
2229 | if (optval < 1) { | |
2230 | error = EINVAL; | |
2231 | goto bad; | |
2232 | } | |
2233 | ||
2234 | switch (sopt->sopt_name) { | |
2235 | case SO_SNDBUF: | |
2236 | case SO_RCVBUF: | |
2237 | if (sbreserve(sopt->sopt_name == SO_SNDBUF ? | |
2238 | &so->so_snd : &so->so_rcv, | |
2239 | (u_long) optval) == 0) { | |
2240 | error = ENOBUFS; | |
2241 | goto bad; | |
2242 | } | |
2243 | break; | |
2244 | ||
2245 | /* | |
2246 | * Make sure the low-water is never greater than | |
2247 | * the high-water. | |
2248 | */ | |
2249 | case SO_SNDLOWAT: | |
2250 | so->so_snd.sb_lowat = | |
2251 | (optval > so->so_snd.sb_hiwat) ? | |
2252 | so->so_snd.sb_hiwat : optval; | |
2253 | break; | |
2254 | case SO_RCVLOWAT: | |
2255 | so->so_rcv.sb_lowat = | |
2256 | (optval > so->so_rcv.sb_hiwat) ? | |
2257 | so->so_rcv.sb_hiwat : optval; | |
2258 | break; | |
2259 | } | |
2260 | break; | |
2261 | ||
2262 | case SO_SNDTIMEO: | |
2263 | case SO_RCVTIMEO: | |
2264 | error = sooptcopyin(sopt, &tv, sizeof tv, | |
2265 | sizeof tv); | |
2266 | if (error) | |
2267 | goto bad; | |
2268 | ||
91447636 | 2269 | if (tv.tv_sec < 0 || tv.tv_sec > LONG_MAX || |
9bccf70c A |
2270 | tv.tv_usec < 0 || tv.tv_usec >= 1000000) { |
2271 | error = EDOM; | |
2272 | goto bad; | |
2273 | } | |
91447636 | 2274 | |
1c79356b A |
2275 | switch (sopt->sopt_name) { |
2276 | case SO_SNDTIMEO: | |
91447636 | 2277 | so->so_snd.sb_timeo = tv; |
1c79356b A |
2278 | break; |
2279 | case SO_RCVTIMEO: | |
91447636 | 2280 | so->so_rcv.sb_timeo = tv; |
1c79356b A |
2281 | break; |
2282 | } | |
2283 | break; | |
2284 | ||
2285 | case SO_NKE: | |
9bccf70c A |
2286 | { |
2287 | struct so_nke nke; | |
1c79356b | 2288 | |
9bccf70c A |
2289 | error = sooptcopyin(sopt, &nke, |
2290 | sizeof nke, sizeof nke); | |
1c79356b A |
2291 | if (error) |
2292 | goto bad; | |
2293 | ||
91447636 | 2294 | error = sflt_attach_private(so, NULL, nke.nke_handle, 1); |
1c79356b A |
2295 | break; |
2296 | } | |
2297 | ||
9bccf70c A |
2298 | case SO_NOSIGPIPE: |
2299 | error = sooptcopyin(sopt, &optval, sizeof optval, | |
2300 | sizeof optval); | |
2301 | if (error) | |
2302 | goto bad; | |
2303 | if (optval) | |
2304 | so->so_flags |= SOF_NOSIGPIPE; | |
2305 | else | |
2306 | so->so_flags &= ~SOF_NOSIGPIPE; | |
2307 | ||
2308 | break; | |
2309 | ||
55e303ae A |
2310 | case SO_NOADDRERR: |
2311 | error = sooptcopyin(sopt, &optval, sizeof optval, | |
2312 | sizeof optval); | |
2313 | if (error) | |
2314 | goto bad; | |
2315 | if (optval) | |
2316 | so->so_flags |= SOF_NOADDRAVAIL; | |
2317 | else | |
2318 | so->so_flags &= ~SOF_NOADDRAVAIL; | |
2319 | ||
2320 | break; | |
2321 | ||
1c79356b A |
2322 | default: |
2323 | error = ENOPROTOOPT; | |
2324 | break; | |
2325 | } | |
2326 | if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { | |
2327 | (void) ((*so->so_proto->pr_ctloutput) | |
2328 | (so, sopt)); | |
2329 | } | |
2330 | } | |
2331 | bad: | |
91447636 | 2332 | socket_unlock(so, 1); |
1c79356b A |
2333 | return (error); |
2334 | } | |
2335 | ||
2336 | /* Helper routine for getsockopt */ | |
2337 | int | |
2338 | sooptcopyout(sopt, buf, len) | |
2339 | struct sockopt *sopt; | |
2340 | void *buf; | |
2341 | size_t len; | |
2342 | { | |
2343 | int error; | |
2344 | size_t valsize; | |
2345 | ||
2346 | error = 0; | |
2347 | ||
2348 | /* | |
2349 | * Documented get behavior is that we always return a value, | |
2350 | * possibly truncated to fit in the user's buffer. | |
2351 | * Traditional behavior is that we always tell the user | |
2352 | * precisely how much we copied, rather than something useful | |
2353 | * like the total amount we had available for her. | |
2354 | * Note that this interface is not idempotent; the entire answer must | |
2355 | * generated ahead of time. | |
2356 | */ | |
2357 | valsize = min(len, sopt->sopt_valsize); | |
2358 | sopt->sopt_valsize = valsize; | |
91447636 | 2359 | if (sopt->sopt_val != USER_ADDR_NULL) { |
1c79356b A |
2360 | if (sopt->sopt_p != 0) |
2361 | error = copyout(buf, sopt->sopt_val, valsize); | |
2362 | else | |
91447636 | 2363 | bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize); |
1c79356b A |
2364 | } |
2365 | return error; | |
2366 | } | |
2367 | ||
2368 | int | |
2369 | sogetopt(so, sopt) | |
2370 | struct socket *so; | |
2371 | struct sockopt *sopt; | |
2372 | { | |
2373 | int error, optval; | |
2374 | struct linger l; | |
2375 | struct timeval tv; | |
1c79356b | 2376 | |
9bccf70c A |
2377 | if (sopt->sopt_dir != SOPT_GET) { |
2378 | sopt->sopt_dir = SOPT_GET; | |
2379 | } | |
2380 | ||
91447636 A |
2381 | socket_lock(so, 1); |
2382 | ||
2383 | { | |
2384 | struct socket_filter_entry *filter; | |
2385 | int filtered = 0; | |
2386 | error = 0; | |
2387 | for (filter = so->so_filt; filter && (error == 0); | |
2388 | filter = filter->sfe_next_onsocket) { | |
2389 | if (filter->sfe_filter->sf_filter.sf_getoption) { | |
2390 | if (filtered == 0) { | |
2391 | filtered = 1; | |
2392 | sflt_use(so); | |
2393 | socket_unlock(so, 0); | |
2394 | } | |
2395 | error = filter->sfe_filter->sf_filter.sf_getoption( | |
2396 | filter->sfe_cookie, so, sopt); | |
2397 | } | |
2398 | } | |
2399 | if (filtered != 0) { | |
2400 | socket_lock(so, 0); | |
2401 | sflt_unuse(so); | |
2402 | ||
2403 | if (error) { | |
2404 | if (error == EJUSTRETURN) | |
2405 | error = 0; | |
2406 | socket_unlock(so, 1); | |
2407 | return error; | |
2408 | } | |
1c79356b | 2409 | } |
1c79356b A |
2410 | } |
2411 | ||
2412 | error = 0; | |
2413 | if (sopt->sopt_level != SOL_SOCKET) { | |
2414 | if (so->so_proto && so->so_proto->pr_ctloutput) { | |
91447636 A |
2415 | error = (*so->so_proto->pr_ctloutput) |
2416 | (so, sopt); | |
2417 | socket_unlock(so, 1); | |
2418 | return (error); | |
2419 | } else { | |
2420 | socket_unlock(so, 1); | |
1c79356b | 2421 | return (ENOPROTOOPT); |
91447636 | 2422 | } |
1c79356b A |
2423 | } else { |
2424 | switch (sopt->sopt_name) { | |
2425 | case SO_LINGER: | |
91447636 | 2426 | case SO_LINGER_SEC: |
1c79356b | 2427 | l.l_onoff = so->so_options & SO_LINGER; |
91447636 A |
2428 | l.l_linger = (sopt->sopt_name == SO_LINGER) ? so->so_linger : |
2429 | so->so_linger / hz; | |
1c79356b A |
2430 | error = sooptcopyout(sopt, &l, sizeof l); |
2431 | break; | |
2432 | ||
2433 | case SO_USELOOPBACK: | |
2434 | case SO_DONTROUTE: | |
2435 | case SO_DEBUG: | |
2436 | case SO_KEEPALIVE: | |
2437 | case SO_REUSEADDR: | |
2438 | case SO_REUSEPORT: | |
2439 | case SO_BROADCAST: | |
2440 | case SO_OOBINLINE: | |
2441 | case SO_TIMESTAMP: | |
9bccf70c | 2442 | #ifdef __APPLE__ |
1c79356b A |
2443 | case SO_DONTTRUNC: |
2444 | case SO_WANTMORE: | |
9bccf70c A |
2445 | case SO_WANTOOBFLAG: |
2446 | #endif | |
1c79356b A |
2447 | optval = so->so_options & sopt->sopt_name; |
2448 | integer: | |
2449 | error = sooptcopyout(sopt, &optval, sizeof optval); | |
2450 | break; | |
2451 | ||
2452 | case SO_TYPE: | |
2453 | optval = so->so_type; | |
2454 | goto integer; | |
2455 | ||
9bccf70c | 2456 | #ifdef __APPLE__ |
1c79356b | 2457 | case SO_NREAD: |
9bccf70c A |
2458 | { |
2459 | int pkt_total; | |
1c79356b A |
2460 | struct mbuf *m1; |
2461 | ||
2462 | pkt_total = 0; | |
2463 | m1 = so->so_rcv.sb_mb; | |
2464 | if (so->so_proto->pr_flags & PR_ATOMIC) | |
2465 | { | |
9bccf70c A |
2466 | while (m1) { |
2467 | if (m1->m_type == MT_DATA) | |
1c79356b | 2468 | pkt_total += m1->m_len; |
1c79356b A |
2469 | m1 = m1->m_next; |
2470 | } | |
2471 | optval = pkt_total; | |
2472 | } else | |
2473 | optval = so->so_rcv.sb_cc; | |
1c79356b A |
2474 | goto integer; |
2475 | } | |
91447636 A |
2476 | case SO_NWRITE: |
2477 | optval = so->so_snd.sb_cc; | |
2478 | goto integer; | |
9bccf70c | 2479 | #endif |
1c79356b A |
2480 | case SO_ERROR: |
2481 | optval = so->so_error; | |
2482 | so->so_error = 0; | |
2483 | goto integer; | |
2484 | ||
2485 | case SO_SNDBUF: | |
2486 | optval = so->so_snd.sb_hiwat; | |
2487 | goto integer; | |
2488 | ||
2489 | case SO_RCVBUF: | |
2490 | optval = so->so_rcv.sb_hiwat; | |
2491 | goto integer; | |
2492 | ||
2493 | case SO_SNDLOWAT: | |
2494 | optval = so->so_snd.sb_lowat; | |
2495 | goto integer; | |
2496 | ||
2497 | case SO_RCVLOWAT: | |
2498 | optval = so->so_rcv.sb_lowat; | |
2499 | goto integer; | |
2500 | ||
2501 | case SO_SNDTIMEO: | |
2502 | case SO_RCVTIMEO: | |
91447636 | 2503 | tv = (sopt->sopt_name == SO_SNDTIMEO ? |
1c79356b A |
2504 | so->so_snd.sb_timeo : so->so_rcv.sb_timeo); |
2505 | ||
1c79356b A |
2506 | error = sooptcopyout(sopt, &tv, sizeof tv); |
2507 | break; | |
2508 | ||
91447636 A |
2509 | case SO_NOSIGPIPE: |
2510 | optval = (so->so_flags & SOF_NOSIGPIPE); | |
2511 | goto integer; | |
9bccf70c | 2512 | |
55e303ae | 2513 | case SO_NOADDRERR: |
91447636 A |
2514 | optval = (so->so_flags & SOF_NOADDRAVAIL); |
2515 | goto integer; | |
55e303ae | 2516 | |
1c79356b A |
2517 | default: |
2518 | error = ENOPROTOOPT; | |
2519 | break; | |
2520 | } | |
91447636 | 2521 | socket_unlock(so, 1); |
1c79356b A |
2522 | return (error); |
2523 | } | |
2524 | } | |
2525 | ||
9bccf70c | 2526 | /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ |
1c79356b | 2527 | int |
9bccf70c | 2528 | soopt_getm(struct sockopt *sopt, struct mbuf **mp) |
1c79356b A |
2529 | { |
2530 | struct mbuf *m, *m_prev; | |
2531 | int sopt_size = sopt->sopt_valsize; | |
2532 | ||
a3d08fcd A |
2533 | if (sopt_size > MAX_SOOPTGETM_SIZE) |
2534 | return EMSGSIZE; | |
2535 | ||
1c79356b A |
2536 | MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA); |
2537 | if (m == 0) | |
2538 | return ENOBUFS; | |
2539 | if (sopt_size > MLEN) { | |
2540 | MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT); | |
2541 | if ((m->m_flags & M_EXT) == 0) { | |
2542 | m_free(m); | |
2543 | return ENOBUFS; | |
2544 | } | |
2545 | m->m_len = min(MCLBYTES, sopt_size); | |
2546 | } else { | |
2547 | m->m_len = min(MLEN, sopt_size); | |
2548 | } | |
2549 | sopt_size -= m->m_len; | |
2550 | *mp = m; | |
2551 | m_prev = m; | |
2552 | ||
2553 | while (sopt_size) { | |
2554 | MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA); | |
2555 | if (m == 0) { | |
2556 | m_freem(*mp); | |
2557 | return ENOBUFS; | |
2558 | } | |
2559 | if (sopt_size > MLEN) { | |
2560 | MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT); | |
2561 | if ((m->m_flags & M_EXT) == 0) { | |
2562 | m_freem(*mp); | |
2563 | return ENOBUFS; | |
2564 | } | |
2565 | m->m_len = min(MCLBYTES, sopt_size); | |
2566 | } else { | |
2567 | m->m_len = min(MLEN, sopt_size); | |
2568 | } | |
2569 | sopt_size -= m->m_len; | |
2570 | m_prev->m_next = m; | |
2571 | m_prev = m; | |
2572 | } | |
2573 | return 0; | |
2574 | } | |
2575 | ||
2576 | /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */ | |
2577 | int | |
9bccf70c | 2578 | soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) |
1c79356b A |
2579 | { |
2580 | struct mbuf *m0 = m; | |
2581 | ||
91447636 | 2582 | if (sopt->sopt_val == USER_ADDR_NULL) |
1c79356b A |
2583 | return 0; |
2584 | while (m != NULL && sopt->sopt_valsize >= m->m_len) { | |
2585 | if (sopt->sopt_p != NULL) { | |
2586 | int error; | |
2587 | ||
91447636 | 2588 | error = copyin(sopt->sopt_val, mtod(m, char *), m->m_len); |
1c79356b A |
2589 | if (error != 0) { |
2590 | m_freem(m0); | |
2591 | return(error); | |
2592 | } | |
2593 | } else | |
91447636 | 2594 | bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), mtod(m, char *), m->m_len); |
1c79356b | 2595 | sopt->sopt_valsize -= m->m_len; |
91447636 | 2596 | sopt->sopt_val += m->m_len; |
1c79356b A |
2597 | m = m->m_next; |
2598 | } | |
2599 | if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ | |
9bccf70c | 2600 | panic("soopt_mcopyin"); |
1c79356b A |
2601 | return 0; |
2602 | } | |
2603 | ||
2604 | /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */ | |
2605 | int | |
9bccf70c | 2606 | soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) |
1c79356b A |
2607 | { |
2608 | struct mbuf *m0 = m; | |
2609 | size_t valsize = 0; | |
2610 | ||
91447636 | 2611 | if (sopt->sopt_val == USER_ADDR_NULL) |
1c79356b A |
2612 | return 0; |
2613 | while (m != NULL && sopt->sopt_valsize >= m->m_len) { | |
2614 | if (sopt->sopt_p != NULL) { | |
2615 | int error; | |
2616 | ||
91447636 | 2617 | error = copyout(mtod(m, char *), sopt->sopt_val, m->m_len); |
1c79356b A |
2618 | if (error != 0) { |
2619 | m_freem(m0); | |
2620 | return(error); | |
2621 | } | |
2622 | } else | |
91447636 | 2623 | bcopy(mtod(m, char *), CAST_DOWN(caddr_t, sopt->sopt_val), m->m_len); |
1c79356b | 2624 | sopt->sopt_valsize -= m->m_len; |
91447636 | 2625 | sopt->sopt_val += m->m_len; |
1c79356b A |
2626 | valsize += m->m_len; |
2627 | m = m->m_next; | |
2628 | } | |
2629 | if (m != NULL) { | |
2630 | /* enough soopt buffer should be given from user-land */ | |
2631 | m_freem(m0); | |
2632 | return(EINVAL); | |
2633 | } | |
2634 | sopt->sopt_valsize = valsize; | |
2635 | return 0; | |
2636 | } | |
2637 | ||
9bccf70c A |
2638 | void |
2639 | sohasoutofband(so) | |
2640 | register struct socket *so; | |
2641 | { | |
2642 | struct proc *p; | |
9bccf70c | 2643 | |
9bccf70c A |
2644 | if (so->so_pgid < 0) |
2645 | gsignal(-so->so_pgid, SIGURG); | |
2646 | else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) | |
2647 | psignal(p, SIGURG); | |
2648 | selwakeup(&so->so_rcv.sb_sel); | |
2649 | } | |
2650 | ||
2651 | int | |
91447636 | 2652 | sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql) |
9bccf70c A |
2653 | { |
2654 | struct proc *p = current_proc(); | |
2655 | int revents = 0; | |
91447636 A |
2656 | |
2657 | socket_lock(so, 1); | |
9bccf70c A |
2658 | |
2659 | if (events & (POLLIN | POLLRDNORM)) | |
2660 | if (soreadable(so)) | |
2661 | revents |= events & (POLLIN | POLLRDNORM); | |
2662 | ||
2663 | if (events & (POLLOUT | POLLWRNORM)) | |
2664 | if (sowriteable(so)) | |
2665 | revents |= events & (POLLOUT | POLLWRNORM); | |
2666 | ||
2667 | if (events & (POLLPRI | POLLRDBAND)) | |
2668 | if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) | |
2669 | revents |= events & (POLLPRI | POLLRDBAND); | |
2670 | ||
2671 | if (revents == 0) { | |
2672 | if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { | |
2673 | /* Darwin sets the flag first, BSD calls selrecord first */ | |
2674 | so->so_rcv.sb_flags |= SB_SEL; | |
2675 | selrecord(p, &so->so_rcv.sb_sel, wql); | |
2676 | } | |
2677 | ||
2678 | if (events & (POLLOUT | POLLWRNORM)) { | |
2679 | /* Darwin sets the flag first, BSD calls selrecord first */ | |
2680 | so->so_snd.sb_flags |= SB_SEL; | |
2681 | selrecord(p, &so->so_snd.sb_sel, wql); | |
2682 | } | |
2683 | } | |
2684 | ||
91447636 | 2685 | socket_unlock(so, 1); |
9bccf70c A |
2686 | return (revents); |
2687 | } | |
55e303ae | 2688 | |
91447636 | 2689 | int soo_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p); |
55e303ae A |
2690 | |
2691 | int | |
91447636 | 2692 | soo_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p) |
55e303ae | 2693 | { |
91447636 | 2694 | struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; |
55e303ae | 2695 | struct sockbuf *sb; |
91447636 | 2696 | socket_lock(so, 1); |
55e303ae A |
2697 | |
2698 | switch (kn->kn_filter) { | |
2699 | case EVFILT_READ: | |
2700 | if (so->so_options & SO_ACCEPTCONN) | |
2701 | kn->kn_fop = &solisten_filtops; | |
2702 | else | |
2703 | kn->kn_fop = &soread_filtops; | |
2704 | sb = &so->so_rcv; | |
2705 | break; | |
2706 | case EVFILT_WRITE: | |
2707 | kn->kn_fop = &sowrite_filtops; | |
2708 | sb = &so->so_snd; | |
2709 | break; | |
2710 | default: | |
91447636 | 2711 | socket_unlock(so, 1); |
55e303ae A |
2712 | return (1); |
2713 | } | |
2714 | ||
55e303ae A |
2715 | if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn)) |
2716 | sb->sb_flags |= SB_KNOTE; | |
91447636 | 2717 | socket_unlock(so, 1); |
55e303ae A |
2718 | return (0); |
2719 | } | |
2720 | ||
2721 | static void | |
2722 | filt_sordetach(struct knote *kn) | |
2723 | { | |
91447636 | 2724 | struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; |
55e303ae | 2725 | |
91447636 A |
2726 | socket_lock(so, 1); |
2727 | if (so->so_rcv.sb_flags & SB_KNOTE) | |
55e303ae A |
2728 | if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn)) |
2729 | so->so_rcv.sb_flags &= ~SB_KNOTE; | |
91447636 | 2730 | socket_unlock(so, 1); |
55e303ae A |
2731 | } |
2732 | ||
2733 | /*ARGSUSED*/ | |
2734 | static int | |
2735 | filt_soread(struct knote *kn, long hint) | |
2736 | { | |
91447636 | 2737 | struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; |
55e303ae | 2738 | |
91447636 A |
2739 | if ((hint & SO_FILT_HINT_LOCKED) == 0) |
2740 | socket_lock(so, 1); | |
2741 | ||
2742 | if (so->so_oobmark) { | |
2743 | if (kn->kn_flags & EV_OOBAND) { | |
2744 | kn->kn_data = so->so_rcv.sb_cc - so->so_oobmark; | |
2745 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2746 | socket_unlock(so, 1); | |
2747 | return (1); | |
2748 | } | |
2749 | kn->kn_data = so->so_oobmark; | |
2750 | kn->kn_flags |= EV_OOBAND; | |
2751 | } else { | |
2752 | kn->kn_data = so->so_rcv.sb_cc; | |
2753 | if (so->so_state & SS_CANTRCVMORE) { | |
2754 | kn->kn_flags |= EV_EOF; | |
2755 | kn->kn_fflags = so->so_error; | |
2756 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2757 | socket_unlock(so, 1); | |
2758 | return (1); | |
2759 | } | |
55e303ae | 2760 | } |
91447636 A |
2761 | |
2762 | if (so->so_state & SS_RCVATMARK) { | |
2763 | if (kn->kn_flags & EV_OOBAND) { | |
2764 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2765 | socket_unlock(so, 1); | |
2766 | return (1); | |
2767 | } | |
2768 | kn->kn_flags |= EV_OOBAND; | |
2769 | } else if (kn->kn_flags & EV_OOBAND) { | |
2770 | kn->kn_data = 0; | |
2771 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2772 | socket_unlock(so, 1); | |
2773 | return (0); | |
2774 | } | |
2775 | ||
2776 | if (so->so_error) { /* temporary udp error */ | |
2777 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2778 | socket_unlock(so, 1); | |
55e303ae | 2779 | return (1); |
91447636 A |
2780 | } |
2781 | ||
2782 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2783 | socket_unlock(so, 1); | |
2784 | ||
2785 | return( kn->kn_flags & EV_OOBAND || | |
2786 | kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ? | |
2787 | kn->kn_sdata : so->so_rcv.sb_lowat)); | |
55e303ae A |
2788 | } |
2789 | ||
2790 | static void | |
2791 | filt_sowdetach(struct knote *kn) | |
2792 | { | |
91447636 A |
2793 | struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; |
2794 | socket_lock(so, 1); | |
55e303ae | 2795 | |
91447636 | 2796 | if(so->so_snd.sb_flags & SB_KNOTE) |
55e303ae A |
2797 | if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn)) |
2798 | so->so_snd.sb_flags &= ~SB_KNOTE; | |
91447636 | 2799 | socket_unlock(so, 1); |
55e303ae A |
2800 | } |
2801 | ||
2802 | /*ARGSUSED*/ | |
2803 | static int | |
2804 | filt_sowrite(struct knote *kn, long hint) | |
2805 | { | |
91447636 A |
2806 | struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; |
2807 | ||
2808 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2809 | socket_lock(so, 1); | |
55e303ae A |
2810 | |
2811 | kn->kn_data = sbspace(&so->so_snd); | |
2812 | if (so->so_state & SS_CANTSENDMORE) { | |
2813 | kn->kn_flags |= EV_EOF; | |
2814 | kn->kn_fflags = so->so_error; | |
91447636 A |
2815 | if ((hint & SO_FILT_HINT_LOCKED) == 0) |
2816 | socket_unlock(so, 1); | |
55e303ae A |
2817 | return (1); |
2818 | } | |
91447636 A |
2819 | if (so->so_error) { /* temporary udp error */ |
2820 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2821 | socket_unlock(so, 1); | |
55e303ae | 2822 | return (1); |
91447636 | 2823 | } |
55e303ae | 2824 | if (((so->so_state & SS_ISCONNECTED) == 0) && |
91447636 A |
2825 | (so->so_proto->pr_flags & PR_CONNREQUIRED)) { |
2826 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2827 | socket_unlock(so, 1); | |
55e303ae | 2828 | return (0); |
91447636 A |
2829 | } |
2830 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2831 | socket_unlock(so, 1); | |
55e303ae A |
2832 | if (kn->kn_sfflags & NOTE_LOWAT) |
2833 | return (kn->kn_data >= kn->kn_sdata); | |
2834 | return (kn->kn_data >= so->so_snd.sb_lowat); | |
2835 | } | |
2836 | ||
2837 | /*ARGSUSED*/ | |
2838 | static int | |
2839 | filt_solisten(struct knote *kn, long hint) | |
2840 | { | |
91447636 A |
2841 | struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; |
2842 | int isempty; | |
55e303ae | 2843 | |
91447636 A |
2844 | if ((hint & SO_FILT_HINT_LOCKED) == 0) |
2845 | socket_lock(so, 1); | |
55e303ae | 2846 | kn->kn_data = so->so_qlen; |
91447636 A |
2847 | isempty = ! TAILQ_EMPTY(&so->so_comp); |
2848 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2849 | socket_unlock(so, 1); | |
2850 | return (isempty); | |
55e303ae A |
2851 | } |
2852 | ||
91447636 A |
2853 | |
2854 | int | |
2855 | socket_lock(so, refcount) | |
2856 | struct socket *so; | |
2857 | int refcount; | |
2858 | { | |
4452a7af A |
2859 | int error = 0, lr_saved; |
2860 | ||
2861 | lr_saved = (unsigned int) __builtin_return_address(0); | |
91447636 A |
2862 | |
2863 | if (so->so_proto->pr_lock) { | |
2864 | error = (*so->so_proto->pr_lock)(so, refcount, lr_saved); | |
2865 | } | |
2866 | else { | |
2867 | #ifdef MORE_LOCKING_DEBUG | |
2868 | lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED); | |
2869 | #endif | |
2870 | lck_mtx_lock(so->so_proto->pr_domain->dom_mtx); | |
2871 | if (refcount) | |
2872 | so->so_usecount++; | |
4452a7af A |
2873 | so->lock_lr[so->next_lock_lr] = (void *)lr_saved; |
2874 | so->next_lock_lr = (so->next_lock_lr+1) % SO_LCKDBG_MAX; | |
91447636 A |
2875 | } |
2876 | ||
2877 | return(error); | |
2878 | ||
2879 | } | |
2880 | ||
2881 | int | |
2882 | socket_unlock(so, refcount) | |
2883 | struct socket *so; | |
2884 | int refcount; | |
2885 | { | |
4452a7af | 2886 | int error = 0, lr_saved; |
91447636 A |
2887 | lck_mtx_t * mutex_held; |
2888 | ||
4452a7af | 2889 | lr_saved = (unsigned int) __builtin_return_address(0); |
91447636 A |
2890 | |
2891 | if (so->so_proto == NULL) | |
2892 | panic("socket_unlock null so_proto so=%x\n", so); | |
2893 | ||
2894 | if (so && so->so_proto->pr_unlock) | |
2895 | error = (*so->so_proto->pr_unlock)(so, refcount, lr_saved); | |
2896 | else { | |
2897 | mutex_held = so->so_proto->pr_domain->dom_mtx; | |
2898 | #ifdef MORE_LOCKING_DEBUG | |
2899 | lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); | |
2900 | #endif | |
4452a7af A |
2901 | so->unlock_lr[so->next_unlock_lr] = (void *)lr_saved; |
2902 | so->next_unlock_lr = (so->next_unlock_lr+1) % SO_LCKDBG_MAX; | |
2903 | ||
91447636 A |
2904 | if (refcount) { |
2905 | if (so->so_usecount <= 0) | |
2906 | panic("socket_unlock: bad refcount so=%x value=%d\n", so, so->so_usecount); | |
2907 | so->so_usecount--; | |
2908 | if (so->so_usecount == 0) { | |
2909 | sofreelastref(so, 1); | |
2910 | } | |
91447636 A |
2911 | } |
2912 | lck_mtx_unlock(mutex_held); | |
2913 | } | |
2914 | ||
2915 | return(error); | |
2916 | } | |
2917 | //### Called with socket locked, will unlock socket | |
2918 | void | |
2919 | sofree(so) | |
2920 | struct socket *so; | |
2921 | { | |
2922 | ||
91447636 | 2923 | lck_mtx_t * mutex_held; |
91447636 A |
2924 | if (so->so_proto->pr_getlock != NULL) |
2925 | mutex_held = (*so->so_proto->pr_getlock)(so, 0); | |
2926 | else | |
2927 | mutex_held = so->so_proto->pr_domain->dom_mtx; | |
2928 | lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); | |
2929 | ||
91447636 A |
2930 | sofreelastref(so, 0); |
2931 | } | |
2932 | ||
2933 | void | |
2934 | soreference(so) | |
2935 | struct socket *so; | |
2936 | { | |
2937 | socket_lock(so, 1); /* locks & take one reference on socket */ | |
2938 | socket_unlock(so, 0); /* unlock only */ | |
2939 | } | |
2940 | ||
2941 | void | |
2942 | sodereference(so) | |
2943 | struct socket *so; | |
2944 | { | |
2945 | socket_lock(so, 0); | |
2946 | socket_unlock(so, 1); | |
2947 | } |