]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
91447636 | 2 | * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. |
1c79356b A |
3 | * |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
37839358 A |
6 | * The contents of this file constitute Original Code as defined in and |
7 | * are subject to the Apple Public Source License Version 1.1 (the | |
8 | * "License"). You may not use this file except in compliance with the | |
9 | * License. Please obtain a copy of the License at | |
10 | * http://www.apple.com/publicsource and read it before using this file. | |
1c79356b | 11 | * |
37839358 A |
12 | * This Original Code and all software distributed under the License are |
13 | * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
1c79356b A |
14 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
15 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
37839358 A |
16 | * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the |
17 | * License for the specific language governing rights and limitations | |
18 | * under the License. | |
1c79356b A |
19 | * |
20 | * @APPLE_LICENSE_HEADER_END@ | |
21 | */ | |
22 | /* Copyright (c) 1998, 1999 Apple Computer, Inc. All Rights Reserved */ | |
23 | /* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ | |
24 | /* | |
25 | * Copyright (c) 1982, 1986, 1988, 1990, 1993 | |
26 | * The Regents of the University of California. All rights reserved. | |
27 | * | |
28 | * Redistribution and use in source and binary forms, with or without | |
29 | * modification, are permitted provided that the following conditions | |
30 | * are met: | |
31 | * 1. Redistributions of source code must retain the above copyright | |
32 | * notice, this list of conditions and the following disclaimer. | |
33 | * 2. Redistributions in binary form must reproduce the above copyright | |
34 | * notice, this list of conditions and the following disclaimer in the | |
35 | * documentation and/or other materials provided with the distribution. | |
36 | * 3. All advertising materials mentioning features or use of this software | |
37 | * must display the following acknowledgement: | |
38 | * This product includes software developed by the University of | |
39 | * California, Berkeley and its contributors. | |
40 | * 4. Neither the name of the University nor the names of its contributors | |
41 | * may be used to endorse or promote products derived from this software | |
42 | * without specific prior written permission. | |
43 | * | |
44 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
45 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
46 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
47 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
48 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
49 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
50 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
51 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
52 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
53 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
54 | * SUCH DAMAGE. | |
55 | * | |
9bccf70c A |
56 | * @(#)uipc_socket.c 8.3 (Berkeley) 4/15/94 |
57 | * $FreeBSD: src/sys/kern/uipc_socket.c,v 1.68.2.16 2001/06/14 20:46:06 ume Exp $ | |
1c79356b A |
58 | */ |
59 | ||
60 | #include <sys/param.h> | |
61 | #include <sys/systm.h> | |
55e303ae | 62 | #include <sys/filedesc.h> |
91447636 A |
63 | #include <sys/proc_internal.h> |
64 | #include <sys/kauth.h> | |
65 | #include <sys/file_internal.h> | |
1c79356b A |
66 | #include <sys/fcntl.h> |
67 | #include <sys/malloc.h> | |
68 | #include <sys/mbuf.h> | |
69 | #include <sys/domain.h> | |
70 | #include <sys/kernel.h> | |
55e303ae | 71 | #include <sys/event.h> |
1c79356b A |
72 | #include <sys/poll.h> |
73 | #include <sys/protosw.h> | |
74 | #include <sys/socket.h> | |
75 | #include <sys/socketvar.h> | |
76 | #include <sys/resourcevar.h> | |
77 | #include <sys/signalvar.h> | |
78 | #include <sys/sysctl.h> | |
79 | #include <sys/uio.h> | |
80 | #include <sys/ev.h> | |
81 | #include <sys/kdebug.h> | |
82 | #include <net/route.h> | |
83 | #include <netinet/in.h> | |
84 | #include <netinet/in_pcb.h> | |
85 | #include <kern/zalloc.h> | |
91447636 | 86 | #include <kern/locks.h> |
1c79356b A |
87 | #include <machine/limits.h> |
88 | ||
89 | int so_cache_hw = 0; | |
90 | int so_cache_timeouts = 0; | |
91 | int so_cache_max_freed = 0; | |
92 | int cached_sock_count = 0; | |
93 | struct socket *socket_cache_head = 0; | |
94 | struct socket *socket_cache_tail = 0; | |
95 | u_long so_cache_time = 0; | |
96 | int so_cache_init_done = 0; | |
97 | struct zone *so_cache_zone; | |
98 | extern int get_inpcb_str_size(); | |
99 | extern int get_tcp_str_size(); | |
100 | ||
91447636 A |
101 | static lck_grp_t *so_cache_mtx_grp; |
102 | static lck_attr_t *so_cache_mtx_attr; | |
103 | static lck_grp_attr_t *so_cache_mtx_grp_attr; | |
104 | lck_mtx_t *so_cache_mtx; | |
105 | ||
1c79356b A |
106 | #include <machine/limits.h> |
107 | ||
55e303ae A |
108 | static void filt_sordetach(struct knote *kn); |
109 | static int filt_soread(struct knote *kn, long hint); | |
110 | static void filt_sowdetach(struct knote *kn); | |
111 | static int filt_sowrite(struct knote *kn, long hint); | |
112 | static int filt_solisten(struct knote *kn, long hint); | |
113 | ||
114 | static struct filterops solisten_filtops = | |
115 | { 1, NULL, filt_sordetach, filt_solisten }; | |
116 | static struct filterops soread_filtops = | |
117 | { 1, NULL, filt_sordetach, filt_soread }; | |
118 | static struct filterops sowrite_filtops = | |
119 | { 1, NULL, filt_sowdetach, filt_sowrite }; | |
120 | ||
91447636 | 121 | #define EVEN_MORE_LOCKING_DEBUG 0 |
1c79356b A |
122 | int socket_debug = 0; |
123 | int socket_zone = M_SOCKET; | |
124 | so_gen_t so_gencnt; /* generation count for sockets */ | |
125 | ||
126 | MALLOC_DEFINE(M_SONAME, "soname", "socket name"); | |
127 | MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); | |
128 | ||
129 | #define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0) | |
130 | #define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2) | |
131 | #define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1) | |
132 | #define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3) | |
133 | #define DBG_FNC_SOSEND NETDBG_CODE(DBG_NETSOCK, (4 << 8) | 1) | |
134 | #define DBG_FNC_SORECEIVE NETDBG_CODE(DBG_NETSOCK, (8 << 8)) | |
135 | #define DBG_FNC_SOSHUTDOWN NETDBG_CODE(DBG_NETSOCK, (9 << 8)) | |
136 | ||
a3d08fcd | 137 | #define MAX_SOOPTGETM_SIZE (128 * MCLBYTES) |
1c79356b | 138 | |
91447636 | 139 | |
1c79356b A |
140 | SYSCTL_DECL(_kern_ipc); |
141 | ||
142 | static int somaxconn = SOMAXCONN; | |
143 | SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn, | |
144 | 0, ""); | |
145 | ||
146 | /* Should we get a maximum also ??? */ | |
fa4905b1 | 147 | static int sosendmaxchain = 65536; |
1c79356b | 148 | static int sosendminchain = 16384; |
55e303ae | 149 | static int sorecvmincopy = 16384; |
1c79356b A |
150 | SYSCTL_INT(_kern_ipc, OID_AUTO, sosendminchain, CTLFLAG_RW, &sosendminchain, |
151 | 0, ""); | |
55e303ae A |
152 | SYSCTL_INT(_kern_ipc, OID_AUTO, sorecvmincopy, CTLFLAG_RW, &sorecvmincopy, |
153 | 0, ""); | |
1c79356b A |
154 | |
155 | void so_cache_timer(); | |
156 | ||
157 | /* | |
158 | * Socket operation routines. | |
159 | * These routines are called by the routines in | |
160 | * sys_socket.c or from a system process, and | |
161 | * implement the semantics of socket operations by | |
162 | * switching out to the protocol specific routines. | |
163 | */ | |
164 | ||
9bccf70c | 165 | #ifdef __APPLE__ |
91447636 A |
166 | |
167 | vm_size_t so_cache_zone_element_size; | |
168 | ||
169 | static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid); | |
170 | ||
171 | ||
1c79356b A |
172 | void socketinit() |
173 | { | |
174 | vm_size_t str_size; | |
175 | ||
91447636 A |
176 | if (so_cache_init_done) { |
177 | printf("socketinit: already called...\n"); | |
178 | return; | |
179 | } | |
180 | ||
181 | /* | |
182 | * allocate lock group attribute and group for socket cache mutex | |
183 | */ | |
184 | so_cache_mtx_grp_attr = lck_grp_attr_alloc_init(); | |
185 | lck_grp_attr_setdefault(so_cache_mtx_grp_attr); | |
186 | ||
187 | so_cache_mtx_grp = lck_grp_alloc_init("so_cache", so_cache_mtx_grp_attr); | |
188 | ||
189 | /* | |
190 | * allocate the lock attribute for socket cache mutex | |
191 | */ | |
192 | so_cache_mtx_attr = lck_attr_alloc_init(); | |
193 | lck_attr_setdefault(so_cache_mtx_attr); | |
194 | ||
1c79356b A |
195 | so_cache_init_done = 1; |
196 | ||
91447636 A |
197 | so_cache_mtx = lck_mtx_alloc_init(so_cache_mtx_grp, so_cache_mtx_attr); /* cached sockets mutex */ |
198 | ||
199 | if (so_cache_mtx == NULL) | |
200 | return; /* we're hosed... */ | |
201 | ||
1c79356b A |
202 | str_size = (vm_size_t)( sizeof(struct socket) + 4 + |
203 | get_inpcb_str_size() + 4 + | |
204 | get_tcp_str_size()); | |
205 | so_cache_zone = zinit (str_size, 120000*str_size, 8192, "socache zone"); | |
206 | #if TEMPDEBUG | |
91447636 | 207 | printf("cached_sock_alloc -- so_cache_zone size is %x\n", str_size); |
1c79356b | 208 | #endif |
91447636 A |
209 | timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz)); |
210 | ||
211 | so_cache_zone_element_size = str_size; | |
212 | ||
213 | sflt_init(); | |
1c79356b A |
214 | |
215 | } | |
216 | ||
217 | void cached_sock_alloc(so, waitok) | |
218 | struct socket **so; | |
219 | int waitok; | |
220 | ||
221 | { | |
222 | caddr_t temp; | |
1c79356b A |
223 | register u_long offset; |
224 | ||
225 | ||
91447636 A |
226 | lck_mtx_lock(so_cache_mtx); |
227 | ||
1c79356b A |
228 | if (cached_sock_count) { |
229 | cached_sock_count--; | |
230 | *so = socket_cache_head; | |
231 | if (*so == 0) | |
232 | panic("cached_sock_alloc: cached sock is null"); | |
233 | ||
234 | socket_cache_head = socket_cache_head->cache_next; | |
235 | if (socket_cache_head) | |
236 | socket_cache_head->cache_prev = 0; | |
237 | else | |
238 | socket_cache_tail = 0; | |
91447636 A |
239 | |
240 | lck_mtx_unlock(so_cache_mtx); | |
1c79356b A |
241 | |
242 | temp = (*so)->so_saved_pcb; | |
243 | bzero((caddr_t)*so, sizeof(struct socket)); | |
244 | #if TEMPDEBUG | |
245 | kprintf("cached_sock_alloc - retreiving cached sock %x - count == %d\n", *so, | |
246 | cached_sock_count); | |
247 | #endif | |
248 | (*so)->so_saved_pcb = temp; | |
91447636 A |
249 | (*so)->cached_in_sock_layer = 1; |
250 | ||
1c79356b A |
251 | } |
252 | else { | |
253 | #if TEMPDEBUG | |
254 | kprintf("Allocating cached sock %x from memory\n", *so); | |
255 | #endif | |
256 | ||
91447636 A |
257 | lck_mtx_unlock(so_cache_mtx); |
258 | ||
1c79356b A |
259 | if (waitok) |
260 | *so = (struct socket *) zalloc(so_cache_zone); | |
261 | else | |
262 | *so = (struct socket *) zalloc_noblock(so_cache_zone); | |
263 | ||
264 | if (*so == 0) | |
265 | return; | |
266 | ||
267 | bzero((caddr_t)*so, sizeof(struct socket)); | |
268 | ||
269 | /* | |
270 | * Define offsets for extra structures into our single block of | |
271 | * memory. Align extra structures on longword boundaries. | |
272 | */ | |
273 | ||
274 | ||
275 | offset = (u_long) *so; | |
276 | offset += sizeof(struct socket); | |
277 | if (offset & 0x3) { | |
278 | offset += 4; | |
279 | offset &= 0xfffffffc; | |
280 | } | |
281 | (*so)->so_saved_pcb = (caddr_t) offset; | |
282 | offset += get_inpcb_str_size(); | |
283 | if (offset & 0x3) { | |
284 | offset += 4; | |
285 | offset &= 0xfffffffc; | |
286 | } | |
287 | ||
288 | ((struct inpcb *) (*so)->so_saved_pcb)->inp_saved_ppcb = (caddr_t) offset; | |
289 | #if TEMPDEBUG | |
290 | kprintf("Allocating cached socket - %x, pcb=%x tcpcb=%x\n", *so, | |
291 | (*so)->so_saved_pcb, | |
292 | ((struct inpcb *)(*so)->so_saved_pcb)->inp_saved_ppcb); | |
293 | #endif | |
294 | } | |
295 | ||
296 | (*so)->cached_in_sock_layer = 1; | |
297 | } | |
298 | ||
299 | ||
300 | void cached_sock_free(so) | |
301 | struct socket *so; | |
302 | { | |
1c79356b | 303 | |
91447636 | 304 | lck_mtx_lock(so_cache_mtx); |
1c79356b | 305 | |
1c79356b A |
306 | if (++cached_sock_count > MAX_CACHED_SOCKETS) { |
307 | --cached_sock_count; | |
91447636 | 308 | lck_mtx_unlock(so_cache_mtx); |
1c79356b A |
309 | #if TEMPDEBUG |
310 | kprintf("Freeing overflowed cached socket %x\n", so); | |
311 | #endif | |
91447636 | 312 | zfree(so_cache_zone, so); |
1c79356b A |
313 | } |
314 | else { | |
315 | #if TEMPDEBUG | |
316 | kprintf("Freeing socket %x into cache\n", so); | |
317 | #endif | |
318 | if (so_cache_hw < cached_sock_count) | |
319 | so_cache_hw = cached_sock_count; | |
320 | ||
321 | so->cache_next = socket_cache_head; | |
322 | so->cache_prev = 0; | |
323 | if (socket_cache_head) | |
324 | socket_cache_head->cache_prev = so; | |
325 | else | |
326 | socket_cache_tail = so; | |
327 | ||
328 | so->cache_timestamp = so_cache_time; | |
329 | socket_cache_head = so; | |
91447636 | 330 | lck_mtx_unlock(so_cache_mtx); |
1c79356b A |
331 | } |
332 | ||
333 | #if TEMPDEBUG | |
334 | kprintf("Freed cached sock %x into cache - count is %d\n", so, cached_sock_count); | |
335 | #endif | |
336 | ||
337 | ||
338 | } | |
339 | ||
340 | ||
341 | void so_cache_timer() | |
342 | { | |
343 | register struct socket *p; | |
1c79356b | 344 | register int n_freed = 0; |
1c79356b | 345 | |
1c79356b | 346 | |
91447636 | 347 | lck_mtx_lock(so_cache_mtx); |
1c79356b | 348 | |
91447636 | 349 | ++so_cache_time; |
1c79356b | 350 | |
91447636 | 351 | while ( (p = socket_cache_tail) ) |
1c79356b A |
352 | { |
353 | if ((so_cache_time - p->cache_timestamp) < SO_CACHE_TIME_LIMIT) | |
354 | break; | |
355 | ||
356 | so_cache_timeouts++; | |
357 | ||
91447636 | 358 | if ( (socket_cache_tail = p->cache_prev) ) |
1c79356b A |
359 | p->cache_prev->cache_next = 0; |
360 | if (--cached_sock_count == 0) | |
361 | socket_cache_head = 0; | |
362 | ||
1c79356b | 363 | |
91447636 | 364 | zfree(so_cache_zone, p); |
1c79356b | 365 | |
1c79356b A |
366 | if (++n_freed >= SO_CACHE_MAX_FREE_BATCH) |
367 | { | |
368 | so_cache_max_freed++; | |
369 | break; | |
370 | } | |
371 | } | |
91447636 | 372 | lck_mtx_unlock(so_cache_mtx); |
1c79356b A |
373 | |
374 | timeout(so_cache_timer, NULL, (SO_CACHE_FLUSH_INTERVAL * hz)); | |
375 | ||
1c79356b A |
376 | |
377 | } | |
9bccf70c | 378 | #endif /* __APPLE__ */ |
1c79356b A |
379 | |
380 | /* | |
381 | * Get a socket structure from our zone, and initialize it. | |
382 | * We don't implement `waitok' yet (see comments in uipc_domain.c). | |
383 | * Note that it would probably be better to allocate socket | |
384 | * and PCB at the same time, but I'm not convinced that all | |
385 | * the protocols can be easily modified to do this. | |
386 | */ | |
387 | struct socket * | |
388 | soalloc(waitok, dom, type) | |
389 | int waitok; | |
390 | int dom; | |
391 | int type; | |
392 | { | |
393 | struct socket *so; | |
394 | ||
395 | if ((dom == PF_INET) && (type == SOCK_STREAM)) | |
396 | cached_sock_alloc(&so, waitok); | |
397 | else | |
398 | { | |
91447636 | 399 | MALLOC_ZONE(so, struct socket *, sizeof(*so), socket_zone, M_WAITOK); |
1c79356b A |
400 | if (so) |
401 | bzero(so, sizeof *so); | |
402 | } | |
403 | /* XXX race condition for reentrant kernel */ | |
91447636 | 404 | //###LD Atomic add for so_gencnt |
1c79356b A |
405 | if (so) { |
406 | so->so_gencnt = ++so_gencnt; | |
407 | so->so_zone = socket_zone; | |
408 | } | |
409 | ||
410 | return so; | |
411 | } | |
412 | ||
413 | int | |
414 | socreate(dom, aso, type, proto) | |
415 | int dom; | |
416 | struct socket **aso; | |
417 | register int type; | |
418 | int proto; | |
1c79356b A |
419 | { |
420 | struct proc *p = current_proc(); | |
421 | register struct protosw *prp; | |
9bccf70c | 422 | register struct socket *so; |
1c79356b | 423 | register int error = 0; |
55e303ae A |
424 | #if TCPDEBUG |
425 | extern int tcpconsdebug; | |
426 | #endif | |
1c79356b A |
427 | if (proto) |
428 | prp = pffindproto(dom, proto, type); | |
429 | else | |
430 | prp = pffindtype(dom, type); | |
9bccf70c | 431 | |
1c79356b A |
432 | if (prp == 0 || prp->pr_usrreqs->pru_attach == 0) |
433 | return (EPROTONOSUPPORT); | |
9bccf70c A |
434 | #ifndef __APPLE__ |
435 | ||
436 | if (p->p_prison && jail_socket_unixiproute_only && | |
437 | prp->pr_domain->dom_family != PF_LOCAL && | |
438 | prp->pr_domain->dom_family != PF_INET && | |
439 | prp->pr_domain->dom_family != PF_ROUTE) { | |
440 | return (EPROTONOSUPPORT); | |
441 | } | |
442 | ||
443 | #endif | |
1c79356b A |
444 | if (prp->pr_type != type) |
445 | return (EPROTOTYPE); | |
446 | so = soalloc(p != 0, dom, type); | |
447 | if (so == 0) | |
448 | return (ENOBUFS); | |
449 | ||
450 | TAILQ_INIT(&so->so_incomp); | |
451 | TAILQ_INIT(&so->so_comp); | |
452 | so->so_type = type; | |
453 | ||
9bccf70c | 454 | #ifdef __APPLE__ |
1c79356b | 455 | if (p != 0) { |
91447636 A |
456 | so->so_uid = kauth_cred_getuid(kauth_cred_get()); |
457 | if (!suser(kauth_cred_get(),NULL)) | |
1c79356b | 458 | so->so_state = SS_PRIV; |
1c79356b | 459 | } |
9bccf70c | 460 | #else |
91447636 | 461 | so->so_cred = kauth_cred_get_with_ref(); |
9bccf70c | 462 | #endif |
1c79356b | 463 | so->so_proto = prp; |
9bccf70c | 464 | #ifdef __APPLE__ |
1c79356b | 465 | so->so_rcv.sb_flags |= SB_RECV; /* XXX */ |
91447636 | 466 | so->so_rcv.sb_so = so->so_snd.sb_so = so; |
9bccf70c | 467 | #endif |
91447636 A |
468 | |
469 | //### Attachement will create the per pcb lock if necessary and increase refcount | |
37839358 | 470 | so->so_usecount++; /* for creation, make sure it's done before socket is inserted in lists */ |
91447636 A |
471 | |
472 | error = (*prp->pr_usrreqs->pru_attach)(so, proto, p); | |
1c79356b | 473 | if (error) { |
55e303ae A |
474 | /* |
475 | * Warning: | |
476 | * If so_pcb is not zero, the socket will be leaked, | |
477 | * so protocol attachment handler must be coded carefuly | |
478 | */ | |
1c79356b | 479 | so->so_state |= SS_NOFDREF; |
37839358 A |
480 | so->so_usecount--; |
481 | sofreelastref(so, 1); /* will deallocate the socket */ | |
1c79356b A |
482 | return (error); |
483 | } | |
9bccf70c | 484 | #ifdef __APPLE__ |
1c79356b | 485 | prp->pr_domain->dom_refs++; |
1c79356b | 486 | TAILQ_INIT(&so->so_evlist); |
91447636 A |
487 | |
488 | /* Attach socket filters for this protocol */ | |
489 | sflt_initsock(so); | |
55e303ae A |
490 | #if TCPDEBUG |
491 | if (tcpconsdebug == 2) | |
492 | so->so_options |= SO_DEBUG; | |
493 | #endif | |
9bccf70c | 494 | #endif |
55e303ae | 495 | |
1c79356b A |
496 | *aso = so; |
497 | return (0); | |
498 | } | |
499 | ||
500 | int | |
501 | sobind(so, nam) | |
502 | struct socket *so; | |
503 | struct sockaddr *nam; | |
504 | ||
505 | { | |
506 | struct proc *p = current_proc(); | |
91447636 A |
507 | int error = 0; |
508 | struct socket_filter_entry *filter; | |
509 | int filtered = 0; | |
1c79356b | 510 | |
91447636 A |
511 | socket_lock(so, 1); |
512 | ||
513 | /* Socket filter */ | |
514 | error = 0; | |
515 | for (filter = so->so_filt; filter && (error == 0); | |
516 | filter = filter->sfe_next_onsocket) { | |
517 | if (filter->sfe_filter->sf_filter.sf_bind) { | |
518 | if (filtered == 0) { | |
519 | filtered = 1; | |
520 | sflt_use(so); | |
521 | socket_unlock(so, 0); | |
1c79356b | 522 | } |
91447636 A |
523 | error = filter->sfe_filter->sf_filter.sf_bind( |
524 | filter->sfe_cookie, so, nam); | |
1c79356b A |
525 | } |
526 | } | |
91447636 A |
527 | if (filtered != 0) { |
528 | socket_lock(so, 0); | |
529 | sflt_unuse(so); | |
530 | } | |
531 | /* End socket filter */ | |
532 | ||
533 | if (error == 0) | |
534 | error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p); | |
535 | ||
536 | socket_unlock(so, 1); | |
537 | ||
538 | if (error == EJUSTRETURN) | |
539 | error = 0; | |
540 | ||
1c79356b A |
541 | return (error); |
542 | } | |
543 | ||
544 | void | |
545 | sodealloc(so) | |
546 | struct socket *so; | |
547 | { | |
548 | so->so_gencnt = ++so_gencnt; | |
549 | ||
9bccf70c A |
550 | #ifndef __APPLE__ |
551 | if (so->so_rcv.sb_hiwat) | |
552 | (void)chgsbsize(so->so_cred->cr_uidinfo, | |
553 | &so->so_rcv.sb_hiwat, 0, RLIM_INFINITY); | |
554 | if (so->so_snd.sb_hiwat) | |
555 | (void)chgsbsize(so->so_cred->cr_uidinfo, | |
556 | &so->so_snd.sb_hiwat, 0, RLIM_INFINITY); | |
557 | #ifdef INET | |
558 | if (so->so_accf != NULL) { | |
559 | if (so->so_accf->so_accept_filter != NULL && | |
560 | so->so_accf->so_accept_filter->accf_destroy != NULL) { | |
561 | so->so_accf->so_accept_filter->accf_destroy(so); | |
562 | } | |
563 | if (so->so_accf->so_accept_filter_str != NULL) | |
564 | FREE(so->so_accf->so_accept_filter_str, M_ACCF); | |
565 | FREE(so->so_accf, M_ACCF); | |
566 | } | |
567 | #endif /* INET */ | |
91447636 | 568 | kauth_cred_rele(so->so_cred); |
9bccf70c A |
569 | zfreei(so->so_zone, so); |
570 | #else | |
1c79356b A |
571 | if (so->cached_in_sock_layer == 1) |
572 | cached_sock_free(so); | |
91447636 A |
573 | else { |
574 | if (so->cached_in_sock_layer == -1) | |
575 | panic("sodealloc: double dealloc: so=%x\n", so); | |
576 | so->cached_in_sock_layer = -1; | |
577 | FREE_ZONE(so, sizeof(*so), so->so_zone); | |
578 | } | |
9bccf70c | 579 | #endif /* __APPLE__ */ |
1c79356b A |
580 | } |
581 | ||
582 | int | |
583 | solisten(so, backlog) | |
584 | register struct socket *so; | |
585 | int backlog; | |
586 | ||
587 | { | |
1c79356b | 588 | struct proc *p = current_proc(); |
91447636 | 589 | int error; |
1c79356b | 590 | |
91447636 A |
591 | socket_lock(so, 1); |
592 | ||
593 | { | |
594 | struct socket_filter_entry *filter; | |
595 | int filtered = 0; | |
596 | error = 0; | |
597 | for (filter = so->so_filt; filter && (error == 0); | |
598 | filter = filter->sfe_next_onsocket) { | |
599 | if (filter->sfe_filter->sf_filter.sf_listen) { | |
600 | if (filtered == 0) { | |
601 | filtered = 1; | |
602 | sflt_use(so); | |
603 | socket_unlock(so, 0); | |
604 | } | |
605 | error = filter->sfe_filter->sf_filter.sf_listen( | |
606 | filter->sfe_cookie, so); | |
607 | } | |
608 | } | |
609 | if (filtered != 0) { | |
610 | socket_lock(so, 0); | |
611 | sflt_unuse(so); | |
612 | } | |
613 | } | |
614 | ||
615 | if (error == 0) { | |
616 | error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p); | |
617 | } | |
618 | ||
1c79356b | 619 | if (error) { |
91447636 A |
620 | socket_unlock(so, 1); |
621 | if (error == EJUSTRETURN) | |
622 | error = 0; | |
1c79356b A |
623 | return (error); |
624 | } | |
91447636 A |
625 | |
626 | if (TAILQ_EMPTY(&so->so_comp)) | |
1c79356b A |
627 | so->so_options |= SO_ACCEPTCONN; |
628 | if (backlog < 0 || backlog > somaxconn) | |
629 | backlog = somaxconn; | |
630 | so->so_qlimit = backlog; | |
1c79356b | 631 | |
91447636 | 632 | socket_unlock(so, 1); |
1c79356b A |
633 | return (0); |
634 | } | |
635 | ||
1c79356b | 636 | void |
91447636 | 637 | sofreelastref(so, dealloc) |
1c79356b | 638 | register struct socket *so; |
91447636 | 639 | int dealloc; |
9bccf70c A |
640 | { |
641 | int error; | |
1c79356b A |
642 | struct socket *head = so->so_head; |
643 | ||
91447636 | 644 | /*### Assume socket is locked */ |
1c79356b | 645 | |
3a60a9f5 A |
646 | /* Remove any filters - may be called more than once */ |
647 | sflt_termsock(so); | |
648 | ||
91447636 | 649 | if ((!(so->so_flags & SOF_PCBCLEARING)) || ((so->so_state & SS_NOFDREF) == 0)) { |
9bccf70c | 650 | #ifdef __APPLE__ |
0b4e3aa0 A |
651 | selthreadclear(&so->so_snd.sb_sel); |
652 | selthreadclear(&so->so_rcv.sb_sel); | |
cc9f6e38 A |
653 | so->so_rcv.sb_flags &= ~SB_UPCALL; |
654 | so->so_snd.sb_flags &= ~SB_UPCALL; | |
9bccf70c | 655 | #endif |
1c79356b | 656 | return; |
0b4e3aa0 | 657 | } |
9bccf70c | 658 | if (head != NULL) { |
91447636 | 659 | socket_lock(head, 1); |
9bccf70c A |
660 | if (so->so_state & SS_INCOMP) { |
661 | TAILQ_REMOVE(&head->so_incomp, so, so_list); | |
662 | head->so_incqlen--; | |
663 | } else if (so->so_state & SS_COMP) { | |
664 | /* | |
665 | * We must not decommission a socket that's | |
666 | * on the accept(2) queue. If we do, then | |
667 | * accept(2) may hang after select(2) indicated | |
668 | * that the listening socket was ready. | |
669 | */ | |
670 | #ifdef __APPLE__ | |
671 | selthreadclear(&so->so_snd.sb_sel); | |
672 | selthreadclear(&so->so_rcv.sb_sel); | |
cc9f6e38 A |
673 | so->so_rcv.sb_flags &= ~SB_UPCALL; |
674 | so->so_snd.sb_flags &= ~SB_UPCALL; | |
9bccf70c | 675 | #endif |
91447636 | 676 | socket_unlock(head, 1); |
9bccf70c A |
677 | return; |
678 | } else { | |
679 | panic("sofree: not queued"); | |
680 | } | |
1c79356b | 681 | head->so_qlen--; |
9bccf70c | 682 | so->so_state &= ~SS_INCOMP; |
1c79356b | 683 | so->so_head = NULL; |
91447636 | 684 | socket_unlock(head, 1); |
1c79356b | 685 | } |
9bccf70c | 686 | #ifdef __APPLE__ |
0b4e3aa0 | 687 | selthreadclear(&so->so_snd.sb_sel); |
1c79356b | 688 | sbrelease(&so->so_snd); |
9bccf70c | 689 | #endif |
1c79356b | 690 | sorflush(so); |
91447636 A |
691 | |
692 | /* 3932268: disable upcall */ | |
693 | so->so_rcv.sb_flags &= ~SB_UPCALL; | |
694 | so->so_snd.sb_flags &= ~SB_UPCALL; | |
695 | ||
696 | if (dealloc) | |
697 | sodealloc(so); | |
1c79356b A |
698 | } |
699 | ||
700 | /* | |
701 | * Close a socket on last file table reference removal. | |
702 | * Initiate disconnect if connected. | |
703 | * Free socket when disconnect complete. | |
704 | */ | |
705 | int | |
91447636 | 706 | soclose_locked(so) |
1c79356b A |
707 | register struct socket *so; |
708 | { | |
1c79356b | 709 | int error = 0; |
91447636 A |
710 | lck_mtx_t * mutex_held; |
711 | struct timespec ts; | |
1c79356b | 712 | |
91447636 A |
713 | if (so->so_usecount == 0) { |
714 | panic("soclose: so=%x refcount=0\n", so); | |
1c79356b A |
715 | } |
716 | ||
91447636 A |
717 | sflt_notify(so, sock_evt_closing, NULL); |
718 | ||
719 | if ((so->so_options & SO_ACCEPTCONN)) { | |
720 | struct socket *sp; | |
721 | ||
722 | /* We do not want new connection to be added to the connection queues */ | |
723 | so->so_options &= ~SO_ACCEPTCONN; | |
724 | ||
725 | while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) { | |
726 | /* A bit tricky here. We need to keep | |
727 | * a lock if it's a protocol global lock | |
728 | * but we want the head, not the socket locked | |
729 | * in the case of per-socket lock... | |
730 | */ | |
ff6e181a | 731 | if (so->so_proto->pr_getlock != NULL) { |
91447636 | 732 | socket_unlock(so, 0); |
ff6e181a A |
733 | socket_lock(sp, 1); |
734 | } | |
91447636 | 735 | (void) soabort(sp); |
ff6e181a | 736 | if (so->so_proto->pr_getlock != NULL) { |
91447636 | 737 | socket_unlock(sp, 1); |
ff6e181a A |
738 | socket_lock(so, 0); |
739 | } | |
91447636 A |
740 | } |
741 | ||
742 | while ((sp = TAILQ_FIRST(&so->so_comp)) != NULL) { | |
91447636 A |
743 | /* Dequeue from so_comp since sofree() won't do it */ |
744 | TAILQ_REMOVE(&so->so_comp, sp, so_list); | |
745 | so->so_qlen--; | |
ff6e181a A |
746 | |
747 | if (so->so_proto->pr_getlock != NULL) { | |
748 | socket_unlock(so, 0); | |
749 | socket_lock(sp, 1); | |
750 | } | |
751 | ||
91447636 A |
752 | sp->so_state &= ~SS_COMP; |
753 | sp->so_head = NULL; | |
754 | ||
91447636 | 755 | (void) soabort(sp); |
ff6e181a | 756 | if (so->so_proto->pr_getlock != NULL) { |
91447636 | 757 | socket_unlock(sp, 1); |
ff6e181a A |
758 | socket_lock(so, 0); |
759 | } | |
91447636 A |
760 | } |
761 | } | |
762 | if (so->so_pcb == 0) { | |
763 | /* 3915887: mark the socket as ready for dealloc */ | |
764 | so->so_flags |= SOF_PCBCLEARING; | |
1c79356b | 765 | goto discard; |
91447636 | 766 | } |
1c79356b A |
767 | if (so->so_state & SS_ISCONNECTED) { |
768 | if ((so->so_state & SS_ISDISCONNECTING) == 0) { | |
91447636 | 769 | error = sodisconnectlocked(so); |
1c79356b A |
770 | if (error) |
771 | goto drop; | |
772 | } | |
773 | if (so->so_options & SO_LINGER) { | |
774 | if ((so->so_state & SS_ISDISCONNECTING) && | |
775 | (so->so_state & SS_NBIO)) | |
776 | goto drop; | |
91447636 A |
777 | if (so->so_proto->pr_getlock != NULL) |
778 | mutex_held = (*so->so_proto->pr_getlock)(so, 0); | |
779 | else | |
780 | mutex_held = so->so_proto->pr_domain->dom_mtx; | |
1c79356b | 781 | while (so->so_state & SS_ISCONNECTED) { |
91447636 A |
782 | ts.tv_sec = (so->so_linger/100); |
783 | ts.tv_nsec = (so->so_linger % 100) * NSEC_PER_USEC * 1000 * 10; | |
784 | error = msleep((caddr_t)&so->so_timeo, mutex_held, | |
785 | PSOCK | PCATCH, "soclos", &ts); | |
786 | if (error) { | |
787 | /* It's OK when the time fires, don't report an error */ | |
788 | if (error == EWOULDBLOCK) | |
789 | error = 0; | |
1c79356b | 790 | break; |
91447636 | 791 | } |
1c79356b A |
792 | } |
793 | } | |
794 | } | |
795 | drop: | |
91447636 A |
796 | if (so->so_usecount == 0) |
797 | panic("soclose: usecount is zero so=%x\n", so); | |
798 | if (so->so_pcb && !(so->so_flags & SOF_PCBCLEARING)) { | |
1c79356b A |
799 | int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so); |
800 | if (error == 0) | |
801 | error = error2; | |
802 | } | |
91447636 A |
803 | if (so->so_usecount <= 0) |
804 | panic("soclose: usecount is zero so=%x\n", so); | |
1c79356b | 805 | discard: |
e3027f41 | 806 | if (so->so_pcb && so->so_state & SS_NOFDREF) |
1c79356b A |
807 | panic("soclose: NOFDREF"); |
808 | so->so_state |= SS_NOFDREF; | |
9bccf70c | 809 | #ifdef __APPLE__ |
1c79356b A |
810 | so->so_proto->pr_domain->dom_refs--; |
811 | evsofree(so); | |
9bccf70c | 812 | #endif |
91447636 | 813 | so->so_usecount--; |
1c79356b | 814 | sofree(so); |
1c79356b A |
815 | return (error); |
816 | } | |
817 | ||
91447636 A |
818 | int |
819 | soclose(so) | |
820 | register struct socket *so; | |
821 | { | |
822 | int error = 0; | |
823 | socket_lock(so, 1); | |
824 | if (so->so_retaincnt == 0) | |
825 | error = soclose_locked(so); | |
826 | else { /* if the FD is going away, but socket is retained in kernel remove its reference */ | |
827 | so->so_usecount--; | |
828 | if (so->so_usecount < 2) | |
829 | panic("soclose: retaincnt non null and so=%x usecount=%x\n", so->so_usecount); | |
830 | } | |
831 | socket_unlock(so, 1); | |
832 | return (error); | |
833 | } | |
834 | ||
835 | ||
1c79356b A |
836 | /* |
837 | * Must be called at splnet... | |
838 | */ | |
91447636 | 839 | //#### Should already be locked |
1c79356b A |
840 | int |
841 | soabort(so) | |
842 | struct socket *so; | |
843 | { | |
9bccf70c | 844 | int error; |
1c79356b | 845 | |
91447636 A |
846 | #ifdef MORE_LOCKING_DEBUG |
847 | lck_mtx_t * mutex_held; | |
848 | ||
849 | if (so->so_proto->pr_getlock != NULL) | |
850 | mutex_held = (*so->so_proto->pr_getlock)(so, 0); | |
851 | else | |
852 | mutex_held = so->so_proto->pr_domain->dom_mtx; | |
853 | lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); | |
854 | #endif | |
855 | ||
9bccf70c A |
856 | error = (*so->so_proto->pr_usrreqs->pru_abort)(so); |
857 | if (error) { | |
858 | sofree(so); | |
859 | return error; | |
860 | } | |
861 | return (0); | |
1c79356b A |
862 | } |
863 | ||
864 | int | |
91447636 | 865 | soacceptlock(so, nam, dolock) |
1c79356b A |
866 | register struct socket *so; |
867 | struct sockaddr **nam; | |
91447636 | 868 | int dolock; |
9bccf70c | 869 | { |
1c79356b | 870 | int error; |
91447636 A |
871 | |
872 | if (dolock) socket_lock(so, 1); | |
1c79356b A |
873 | |
874 | if ((so->so_state & SS_NOFDREF) == 0) | |
875 | panic("soaccept: !NOFDREF"); | |
876 | so->so_state &= ~SS_NOFDREF; | |
877 | error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); | |
1c79356b | 878 | |
91447636 | 879 | if (dolock) socket_unlock(so, 1); |
1c79356b A |
880 | return (error); |
881 | } | |
91447636 A |
882 | int |
883 | soaccept(so, nam) | |
884 | register struct socket *so; | |
885 | struct sockaddr **nam; | |
886 | { | |
887 | return (soacceptlock(so, nam, 1)); | |
888 | } | |
1c79356b A |
889 | |
890 | int | |
91447636 | 891 | soconnectlock(so, nam, dolock) |
1c79356b A |
892 | register struct socket *so; |
893 | struct sockaddr *nam; | |
91447636 | 894 | int dolock; |
1c79356b A |
895 | |
896 | { | |
897 | int s; | |
898 | int error; | |
899 | struct proc *p = current_proc(); | |
1c79356b | 900 | |
91447636 A |
901 | if (dolock) socket_lock(so, 1); |
902 | ||
903 | if (so->so_options & SO_ACCEPTCONN) { | |
904 | if (dolock) socket_unlock(so, 1); | |
1c79356b | 905 | return (EOPNOTSUPP); |
91447636 | 906 | } |
1c79356b A |
907 | /* |
908 | * If protocol is connection-based, can only connect once. | |
909 | * Otherwise, if connected, try to disconnect first. | |
910 | * This allows user to disconnect by connecting to, e.g., | |
911 | * a null address. | |
912 | */ | |
913 | if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) && | |
914 | ((so->so_proto->pr_flags & PR_CONNREQUIRED) || | |
91447636 | 915 | (error = sodisconnectlocked(so)))) |
1c79356b A |
916 | error = EISCONN; |
917 | else { | |
91447636 A |
918 | /* |
919 | * Run connect filter before calling protocol: | |
920 | * - non-blocking connect returns before completion; | |
921 | */ | |
922 | { | |
923 | struct socket_filter_entry *filter; | |
924 | int filtered = 0; | |
925 | error = 0; | |
926 | for (filter = so->so_filt; filter && (error == 0); | |
927 | filter = filter->sfe_next_onsocket) { | |
928 | if (filter->sfe_filter->sf_filter.sf_connect_out) { | |
929 | if (filtered == 0) { | |
930 | filtered = 1; | |
931 | sflt_use(so); | |
932 | socket_unlock(so, 0); | |
933 | } | |
934 | error = filter->sfe_filter->sf_filter.sf_connect_out( | |
935 | filter->sfe_cookie, so, nam); | |
936 | } | |
937 | } | |
938 | if (filtered != 0) { | |
939 | socket_lock(so, 0); | |
940 | sflt_unuse(so); | |
941 | } | |
942 | } | |
943 | if (error) { | |
944 | if (error == EJUSTRETURN) | |
945 | error = 0; | |
946 | if (dolock) socket_unlock(so, 1); | |
947 | return error; | |
948 | } | |
949 | ||
1c79356b | 950 | error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p); |
1c79356b | 951 | } |
91447636 | 952 | if (dolock) socket_unlock(so, 1); |
1c79356b A |
953 | return (error); |
954 | } | |
955 | ||
91447636 A |
956 | int |
957 | soconnect(so, nam) | |
958 | register struct socket *so; | |
959 | struct sockaddr *nam; | |
960 | { | |
961 | return (soconnectlock(so, nam, 1)); | |
962 | } | |
963 | ||
1c79356b A |
964 | int |
965 | soconnect2(so1, so2) | |
966 | register struct socket *so1; | |
967 | struct socket *so2; | |
968 | { | |
1c79356b | 969 | int error; |
91447636 A |
970 | //####### Assumes so1 is already locked / |
971 | ||
972 | socket_lock(so2, 1); | |
1c79356b A |
973 | |
974 | error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); | |
91447636 A |
975 | |
976 | socket_unlock(so2, 1); | |
1c79356b A |
977 | return (error); |
978 | } | |
979 | ||
91447636 | 980 | |
1c79356b | 981 | int |
91447636 | 982 | sodisconnectlocked(so) |
1c79356b A |
983 | register struct socket *so; |
984 | { | |
1c79356b | 985 | int error; |
1c79356b A |
986 | |
987 | if ((so->so_state & SS_ISCONNECTED) == 0) { | |
988 | error = ENOTCONN; | |
989 | goto bad; | |
990 | } | |
991 | if (so->so_state & SS_ISDISCONNECTING) { | |
992 | error = EALREADY; | |
993 | goto bad; | |
994 | } | |
91447636 | 995 | |
1c79356b | 996 | error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); |
91447636 | 997 | |
9bccf70c | 998 | if (error == 0) { |
91447636 | 999 | sflt_notify(so, sock_evt_disconnected, NULL); |
1c79356b A |
1000 | } |
1001 | ||
1002 | bad: | |
1c79356b A |
1003 | return (error); |
1004 | } | |
91447636 A |
1005 | //### Locking version |
1006 | int | |
1007 | sodisconnect(so) | |
1008 | register struct socket *so; | |
1009 | { | |
1010 | int error; | |
1011 | ||
1012 | socket_lock(so, 1); | |
1013 | error = sodisconnectlocked(so); | |
1014 | socket_unlock(so, 1); | |
1015 | return(error); | |
1016 | } | |
1c79356b A |
1017 | |
1018 | #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_DONTWAIT : M_WAIT) | |
91447636 A |
1019 | |
1020 | /* | |
1021 | * sosendcheck will lock the socket buffer if it isn't locked and | |
1022 | * verify that there is space for the data being inserted. | |
1023 | */ | |
1024 | ||
1025 | static int | |
1026 | sosendcheck( | |
1027 | struct socket *so, | |
1028 | struct sockaddr *addr, | |
1029 | long resid, | |
1030 | long clen, | |
1031 | long atomic, | |
1032 | int flags, | |
1033 | int *sblocked) | |
1034 | { | |
1035 | int error = 0; | |
1036 | long space; | |
3a60a9f5 | 1037 | int assumelock = 0; |
91447636 A |
1038 | |
1039 | restart: | |
1040 | if (*sblocked == 0) { | |
3a60a9f5 A |
1041 | if ((so->so_snd.sb_flags & SB_LOCK) != 0 && |
1042 | so->so_send_filt_thread != 0 && | |
1043 | so->so_send_filt_thread == current_thread()) { | |
1044 | /* | |
1045 | * We're being called recursively from a filter, | |
1046 | * allow this to continue. Radar 4150520. | |
1047 | * Don't set sblocked because we don't want | |
1048 | * to perform an unlock later. | |
1049 | */ | |
1050 | assumelock = 1; | |
1051 | } | |
1052 | else { | |
1053 | error = sblock(&so->so_snd, SBLOCKWAIT(flags)); | |
1054 | if (error) { | |
1055 | return error; | |
1056 | } | |
1057 | *sblocked = 1; | |
1058 | } | |
91447636 A |
1059 | } |
1060 | ||
1061 | if (so->so_state & SS_CANTSENDMORE) | |
1062 | return EPIPE; | |
1063 | ||
1064 | if (so->so_error) { | |
1065 | error = so->so_error; | |
1066 | so->so_error = 0; | |
1067 | return error; | |
1068 | } | |
1069 | ||
1070 | if ((so->so_state & SS_ISCONNECTED) == 0) { | |
1071 | /* | |
1072 | * `sendto' and `sendmsg' is allowed on a connection- | |
1073 | * based socket if it supports implied connect. | |
1074 | * Return ENOTCONN if not connected and no address is | |
1075 | * supplied. | |
1076 | */ | |
1077 | if ((so->so_proto->pr_flags & PR_CONNREQUIRED) && | |
1078 | (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) { | |
1079 | if ((so->so_state & SS_ISCONFIRMING) == 0 && | |
1080 | !(resid == 0 && clen != 0)) | |
1081 | return ENOTCONN; | |
1082 | } else if (addr == 0 && !(flags&MSG_HOLD)) | |
1083 | return (so->so_proto->pr_flags & PR_CONNREQUIRED) ? ENOTCONN : EDESTADDRREQ; | |
1084 | } | |
1085 | space = sbspace(&so->so_snd); | |
1086 | if (flags & MSG_OOB) | |
1087 | space += 1024; | |
1088 | if ((atomic && resid > so->so_snd.sb_hiwat) || | |
1089 | clen > so->so_snd.sb_hiwat) | |
1090 | return EMSGSIZE; | |
1091 | if (space < resid + clen && | |
1092 | (atomic || space < so->so_snd.sb_lowat || space < clen)) { | |
3a60a9f5 | 1093 | if ((so->so_state & SS_NBIO) || (flags & MSG_NBIO) || assumelock) { |
91447636 | 1094 | return EWOULDBLOCK; |
3a60a9f5 | 1095 | } |
91447636 A |
1096 | sbunlock(&so->so_snd, 1); |
1097 | error = sbwait(&so->so_snd); | |
1098 | if (error) { | |
1099 | return error; | |
1100 | } | |
1101 | goto restart; | |
1102 | } | |
1103 | ||
1104 | return 0; | |
1105 | } | |
1106 | ||
1c79356b A |
1107 | /* |
1108 | * Send on a socket. | |
1109 | * If send must go all at once and message is larger than | |
1110 | * send buffering, then hard error. | |
1111 | * Lock against other senders. | |
1112 | * If must go all at once and not enough room now, then | |
1113 | * inform user that this would block and do nothing. | |
1114 | * Otherwise, if nonblocking, send as much as possible. | |
1115 | * The data to be sent is described by "uio" if nonzero, | |
1116 | * otherwise by the mbuf chain "top" (which must be null | |
1117 | * if uio is not). Data provided in mbuf chain must be small | |
1118 | * enough to send all at once. | |
1119 | * | |
1120 | * Returns nonzero on error, timeout or signal; callers | |
1121 | * must check for short counts if EINTR/ERESTART are returned. | |
1122 | * Data and control buffers are freed on return. | |
1123 | * Experiment: | |
1124 | * MSG_HOLD: go thru most of sosend(), but just enqueue the mbuf | |
1125 | * MSG_SEND: go thru as for MSG_HOLD on current fragment, then | |
1126 | * point at the mbuf chain being constructed and go from there. | |
1127 | */ | |
1128 | int | |
1129 | sosend(so, addr, uio, top, control, flags) | |
1130 | register struct socket *so; | |
1131 | struct sockaddr *addr; | |
1132 | struct uio *uio; | |
1133 | struct mbuf *top; | |
1134 | struct mbuf *control; | |
1135 | int flags; | |
1136 | ||
1137 | { | |
1138 | struct mbuf **mp; | |
fa4905b1 | 1139 | register struct mbuf *m, *freelist = NULL; |
1c79356b | 1140 | register long space, len, resid; |
91447636 | 1141 | int clen = 0, error, dontroute, mlen, sendflags; |
1c79356b | 1142 | int atomic = sosendallatonce(so) || top; |
91447636 | 1143 | int sblocked = 0; |
1c79356b | 1144 | struct proc *p = current_proc(); |
1c79356b A |
1145 | |
1146 | if (uio) | |
91447636 A |
1147 | // LP64todo - fix this! |
1148 | resid = uio_resid(uio); | |
1c79356b A |
1149 | else |
1150 | resid = top->m_pkthdr.len; | |
1151 | ||
1152 | KERNEL_DEBUG((DBG_FNC_SOSEND | DBG_FUNC_START), | |
1153 | so, | |
1154 | resid, | |
1155 | so->so_snd.sb_cc, | |
1156 | so->so_snd.sb_lowat, | |
1157 | so->so_snd.sb_hiwat); | |
1158 | ||
91447636 A |
1159 | socket_lock(so, 1); |
1160 | ||
1c79356b A |
1161 | /* |
1162 | * In theory resid should be unsigned. | |
1163 | * However, space must be signed, as it might be less than 0 | |
1164 | * if we over-committed, and we must use a signed comparison | |
1165 | * of space and resid. On the other hand, a negative resid | |
1166 | * causes us to loop sending 0-length segments to the protocol. | |
1167 | * | |
1168 | * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM | |
1169 | * type sockets since that's an error. | |
1170 | */ | |
91447636 | 1171 | if (resid < 0 || (so->so_type == SOCK_STREAM && (flags & MSG_EOR))) { |
1c79356b | 1172 | error = EINVAL; |
91447636 | 1173 | socket_unlock(so, 1); |
1c79356b A |
1174 | goto out; |
1175 | } | |
1176 | ||
1177 | dontroute = | |
1178 | (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && | |
1179 | (so->so_proto->pr_flags & PR_ATOMIC); | |
1180 | if (p) | |
1181 | p->p_stats->p_ru.ru_msgsnd++; | |
1182 | if (control) | |
1183 | clen = control->m_len; | |
1c79356b | 1184 | |
1c79356b | 1185 | do { |
91447636 A |
1186 | error = sosendcheck(so, addr, resid, clen, atomic, flags, &sblocked); |
1187 | if (error) { | |
3a60a9f5 | 1188 | goto release; |
1c79356b | 1189 | } |
1c79356b | 1190 | mp = ⊤ |
91447636 | 1191 | space = sbspace(&so->so_snd) - clen + ((flags & MSG_OOB) ? 1024 : 0); |
fa4905b1 | 1192 | |
1c79356b | 1193 | do { |
fa4905b1 | 1194 | |
91447636 A |
1195 | if (uio == NULL) { |
1196 | /* | |
1197 | * Data is prepackaged in "top". | |
1198 | */ | |
1199 | resid = 0; | |
1c79356b A |
1200 | if (flags & MSG_EOR) |
1201 | top->m_flags |= M_EOR; | |
91447636 A |
1202 | } else { |
1203 | int chainlength; | |
1204 | int bytes_to_copy; | |
1205 | ||
1206 | bytes_to_copy = min(resid, space); | |
1207 | ||
1208 | if (sosendminchain > 0) { | |
1209 | chainlength = 0; | |
1210 | } else | |
1211 | chainlength = sosendmaxchain; | |
1212 | ||
1213 | socket_unlock(so, 0); | |
1214 | ||
1215 | do { | |
1216 | int num_needed; | |
1217 | int hdrs_needed = (top == 0) ? 1 : 0; | |
1218 | ||
1219 | /* | |
1220 | * try to maintain a local cache of mbuf clusters needed to complete this write | |
1221 | * the list is further limited to the number that are currently needed to fill the socket | |
1222 | * this mechanism allows a large number of mbufs/clusters to be grabbed under a single | |
1223 | * mbuf lock... if we can't get any clusters, than fall back to trying for mbufs | |
1224 | * if we fail early (or miscalcluate the number needed) make sure to release any clusters | |
1225 | * we haven't yet consumed. | |
1226 | */ | |
1227 | if (freelist == NULL && bytes_to_copy > MCLBYTES) { | |
1228 | num_needed = bytes_to_copy / NBPG; | |
1229 | ||
1230 | if ((bytes_to_copy - (num_needed * NBPG)) >= MINCLSIZE) | |
1231 | num_needed++; | |
1232 | ||
1233 | freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, NBPG); | |
1234 | /* Fall back to cluster size if allocation failed */ | |
1235 | } | |
1236 | ||
1237 | if (freelist == NULL && bytes_to_copy > MINCLSIZE) { | |
1238 | num_needed = bytes_to_copy / MCLBYTES; | |
1239 | ||
1240 | if ((bytes_to_copy - (num_needed * MCLBYTES)) >= MINCLSIZE) | |
1241 | num_needed++; | |
1242 | ||
1243 | freelist = m_getpackets_internal(&num_needed, hdrs_needed, M_WAIT, 0, MCLBYTES); | |
1244 | /* Fall back to a single mbuf if allocation failed */ | |
1245 | } | |
1246 | ||
1247 | if (freelist == NULL) { | |
1248 | if (top == 0) | |
1249 | MGETHDR(freelist, M_WAIT, MT_DATA); | |
1250 | else | |
1251 | MGET(freelist, M_WAIT, MT_DATA); | |
1252 | ||
1253 | if (freelist == NULL) { | |
1254 | error = ENOBUFS; | |
1255 | socket_lock(so, 0); | |
3a60a9f5 | 1256 | goto release; |
91447636 A |
1257 | } |
1258 | /* | |
1259 | * For datagram protocols, leave room | |
1260 | * for protocol headers in first mbuf. | |
1261 | */ | |
1262 | if (atomic && top == 0 && bytes_to_copy < MHLEN) | |
1263 | MH_ALIGN(freelist, bytes_to_copy); | |
1264 | } | |
1265 | m = freelist; | |
1266 | freelist = m->m_next; | |
1267 | m->m_next = NULL; | |
1268 | ||
1269 | if ((m->m_flags & M_EXT)) | |
1270 | mlen = m->m_ext.ext_size; | |
1271 | else if ((m->m_flags & M_PKTHDR)) | |
1272 | mlen = MHLEN - m_leadingspace(m); | |
1273 | else | |
1274 | mlen = MLEN; | |
1275 | len = min(mlen, bytes_to_copy); | |
1276 | ||
1277 | chainlength += len; | |
1278 | ||
1279 | space -= len; | |
fa4905b1 | 1280 | |
91447636 A |
1281 | error = uiomove(mtod(m, caddr_t), (int)len, uio); |
1282 | ||
1283 | // LP64todo - fix this! | |
1284 | resid = uio_resid(uio); | |
1285 | ||
1286 | m->m_len = len; | |
1287 | *mp = m; | |
1288 | top->m_pkthdr.len += len; | |
1289 | if (error) | |
1290 | break; | |
1291 | mp = &m->m_next; | |
1292 | if (resid <= 0) { | |
1293 | if (flags & MSG_EOR) | |
1294 | top->m_flags |= M_EOR; | |
1295 | break; | |
1296 | } | |
1297 | bytes_to_copy = min(resid, space); | |
1298 | ||
1299 | } while (space > 0 && (chainlength < sosendmaxchain || atomic || resid < MINCLSIZE)); | |
1300 | ||
1301 | socket_lock(so, 0); | |
1302 | ||
1303 | if (error) | |
1304 | goto release; | |
1305 | } | |
1c79356b A |
1306 | |
1307 | if (flags & (MSG_HOLD|MSG_SEND)) | |
3a60a9f5 A |
1308 | { |
1309 | /* Enqueue for later, go away if HOLD */ | |
1310 | register struct mbuf *mb1; | |
1311 | if (so->so_temp && (flags & MSG_FLUSH)) | |
1312 | { | |
1313 | m_freem(so->so_temp); | |
1314 | so->so_temp = NULL; | |
1315 | } | |
1316 | if (so->so_temp) | |
1317 | so->so_tail->m_next = top; | |
1318 | else | |
1319 | so->so_temp = top; | |
1320 | mb1 = top; | |
1321 | while (mb1->m_next) | |
1322 | mb1 = mb1->m_next; | |
1323 | so->so_tail = mb1; | |
1324 | if (flags & MSG_HOLD) | |
1325 | { | |
1326 | top = NULL; | |
1327 | goto release; | |
1328 | } | |
1329 | top = so->so_temp; | |
1c79356b A |
1330 | } |
1331 | if (dontroute) | |
1332 | so->so_options |= SO_DONTROUTE; | |
1c79356b A |
1333 | /* Compute flags here, for pru_send and NKEs */ |
1334 | sendflags = (flags & MSG_OOB) ? PRUS_OOB : | |
1335 | /* | |
1336 | * If the user set MSG_EOF, the protocol | |
1337 | * understands this flag and nothing left to | |
1338 | * send then use PRU_SEND_EOF instead of PRU_SEND. | |
1339 | */ | |
1340 | ((flags & MSG_EOF) && | |
1341 | (so->so_proto->pr_flags & PR_IMPLOPCL) && | |
1342 | (resid <= 0)) ? | |
1343 | PRUS_EOF : | |
1344 | /* If there is more to send set PRUS_MORETOCOME */ | |
1345 | (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0; | |
91447636 A |
1346 | |
1347 | /* | |
1348 | * Socket filter processing | |
1349 | */ | |
1350 | { | |
1351 | struct socket_filter_entry *filter; | |
1352 | int filtered; | |
1353 | ||
1354 | filtered = 0; | |
1355 | error = 0; | |
1356 | for (filter = so->so_filt; filter && (error == 0); | |
1357 | filter = filter->sfe_next_onsocket) { | |
1358 | if (filter->sfe_filter->sf_filter.sf_data_out) { | |
1359 | int so_flags = 0; | |
1360 | if (filtered == 0) { | |
1361 | filtered = 1; | |
3a60a9f5 | 1362 | so->so_send_filt_thread = current_thread(); |
ff6e181a | 1363 | sflt_use(so); |
91447636 A |
1364 | socket_unlock(so, 0); |
1365 | so_flags = (sendflags & MSG_OOB) ? sock_data_filt_flag_oob : 0; | |
1366 | } | |
1367 | error = filter->sfe_filter->sf_filter.sf_data_out( | |
1368 | filter->sfe_cookie, so, addr, &top, &control, so_flags); | |
1369 | } | |
1370 | } | |
1371 | ||
1372 | if (filtered) { | |
1373 | /* | |
1374 | * At this point, we've run at least one filter. | |
1375 | * The socket is unlocked as is the socket buffer. | |
1376 | */ | |
1377 | socket_lock(so, 0); | |
ff6e181a | 1378 | sflt_unuse(so); |
3a60a9f5 | 1379 | so->so_send_filt_thread = 0; |
91447636 | 1380 | if (error) { |
3a60a9f5 A |
1381 | if (error == EJUSTRETURN) { |
1382 | error = 0; | |
1383 | clen = 0; | |
1384 | control = 0; | |
1385 | top = 0; | |
91447636 | 1386 | } |
3a60a9f5 A |
1387 | |
1388 | goto release; | |
1c79356b | 1389 | } |
1c79356b A |
1390 | } |
1391 | } | |
91447636 A |
1392 | /* |
1393 | * End Socket filter processing | |
1394 | */ | |
1395 | ||
1396 | if (error == EJUSTRETURN) { | |
1397 | /* A socket filter handled this data */ | |
1398 | error = 0; | |
1399 | } | |
1400 | else { | |
1401 | error = (*so->so_proto->pr_usrreqs->pru_send)(so, | |
1402 | sendflags, top, addr, control, p); | |
1403 | } | |
9bccf70c | 1404 | #ifdef __APPLE__ |
1c79356b A |
1405 | if (flags & MSG_SEND) |
1406 | so->so_temp = NULL; | |
9bccf70c | 1407 | #endif |
1c79356b A |
1408 | if (dontroute) |
1409 | so->so_options &= ~SO_DONTROUTE; | |
1410 | clen = 0; | |
1411 | control = 0; | |
1412 | top = 0; | |
1413 | mp = ⊤ | |
1414 | if (error) | |
1415 | goto release; | |
1416 | } while (resid && space > 0); | |
1417 | } while (resid); | |
1418 | ||
1419 | release: | |
3a60a9f5 A |
1420 | if (sblocked) |
1421 | sbunlock(&so->so_snd, 0); /* will unlock socket */ | |
1422 | else | |
1423 | socket_unlock(so, 1); | |
1c79356b A |
1424 | out: |
1425 | if (top) | |
1426 | m_freem(top); | |
1427 | if (control) | |
1428 | m_freem(control); | |
fa4905b1 A |
1429 | if (freelist) |
1430 | m_freem_list(freelist); | |
1c79356b A |
1431 | |
1432 | KERNEL_DEBUG(DBG_FNC_SOSEND | DBG_FUNC_END, | |
1433 | so, | |
1434 | resid, | |
1435 | so->so_snd.sb_cc, | |
1436 | space, | |
1437 | error); | |
1438 | ||
1439 | return (error); | |
1440 | } | |
1441 | ||
1442 | /* | |
1443 | * Implement receive operations on a socket. | |
1444 | * We depend on the way that records are added to the sockbuf | |
1445 | * by sbappend*. In particular, each record (mbufs linked through m_next) | |
1446 | * must begin with an address if the protocol so specifies, | |
1447 | * followed by an optional mbuf or mbufs containing ancillary data, | |
1448 | * and then zero or more mbufs of data. | |
1449 | * In order to avoid blocking network interrupts for the entire time here, | |
1450 | * we splx() while doing the actual copy to user space. | |
1451 | * Although the sockbuf is locked, new data may still be appended, | |
1452 | * and thus we must maintain consistency of the sockbuf during that time. | |
1453 | * | |
1454 | * The caller may receive the data as a single mbuf chain by supplying | |
1455 | * an mbuf **mp0 for use in returning the chain. The uio is then used | |
1456 | * only for the count in uio_resid. | |
1457 | */ | |
1458 | int | |
1459 | soreceive(so, psa, uio, mp0, controlp, flagsp) | |
1460 | register struct socket *so; | |
1461 | struct sockaddr **psa; | |
1462 | struct uio *uio; | |
1463 | struct mbuf **mp0; | |
1464 | struct mbuf **controlp; | |
1465 | int *flagsp; | |
1466 | { | |
91447636 A |
1467 | register struct mbuf *m, **mp, *ml = NULL; |
1468 | register int flags, len, error, offset; | |
1c79356b A |
1469 | struct protosw *pr = so->so_proto; |
1470 | struct mbuf *nextrecord; | |
1471 | int moff, type = 0; | |
91447636 A |
1472 | // LP64todo - fix this! |
1473 | int orig_resid = uio_resid(uio); | |
55e303ae A |
1474 | volatile struct mbuf *free_list; |
1475 | volatile int delayed_copy_len; | |
1476 | int can_delay; | |
1477 | int need_event; | |
1478 | struct proc *p = current_proc(); | |
1479 | ||
1480 | ||
91447636 | 1481 | // LP64todo - fix this! |
1c79356b A |
1482 | KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_START, |
1483 | so, | |
91447636 | 1484 | uio_resid(uio), |
1c79356b A |
1485 | so->so_rcv.sb_cc, |
1486 | so->so_rcv.sb_lowat, | |
1487 | so->so_rcv.sb_hiwat); | |
1488 | ||
91447636 | 1489 | socket_lock(so, 1); |
1c79356b | 1490 | |
91447636 A |
1491 | #ifdef MORE_LOCKING_DEBUG |
1492 | if (so->so_usecount == 1) | |
1493 | panic("soreceive: so=%x no other reference on socket\n", so); | |
1494 | #endif | |
1c79356b A |
1495 | mp = mp0; |
1496 | if (psa) | |
1497 | *psa = 0; | |
1498 | if (controlp) | |
1499 | *controlp = 0; | |
1500 | if (flagsp) | |
1501 | flags = *flagsp &~ MSG_EOR; | |
1502 | else | |
1503 | flags = 0; | |
1504 | /* | |
1505 | * When SO_WANTOOBFLAG is set we try to get out-of-band data | |
1506 | * regardless of the flags argument. Here is the case were | |
1507 | * out-of-band data is not inline. | |
1508 | */ | |
1509 | if ((flags & MSG_OOB) || | |
1510 | ((so->so_options & SO_WANTOOBFLAG) != 0 && | |
1511 | (so->so_options & SO_OOBINLINE) == 0 && | |
1512 | (so->so_oobmark || (so->so_state & SS_RCVATMARK)))) { | |
1513 | m = m_get(M_WAIT, MT_DATA); | |
55e303ae | 1514 | if (m == NULL) { |
91447636 | 1515 | socket_unlock(so, 1); |
55e303ae | 1516 | KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, ENOBUFS,0,0,0,0); |
9bccf70c | 1517 | return (ENOBUFS); |
55e303ae | 1518 | } |
1c79356b A |
1519 | error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); |
1520 | if (error) | |
1521 | goto bad; | |
91447636 | 1522 | socket_unlock(so, 0); |
1c79356b | 1523 | do { |
91447636 | 1524 | // LP64todo - fix this! |
1c79356b | 1525 | error = uiomove(mtod(m, caddr_t), |
91447636 | 1526 | (int) min(uio_resid(uio), m->m_len), uio); |
1c79356b | 1527 | m = m_free(m); |
91447636 A |
1528 | } while (uio_resid(uio) && error == 0 && m); |
1529 | socket_lock(so, 0); | |
1c79356b A |
1530 | bad: |
1531 | if (m) | |
1532 | m_freem(m); | |
9bccf70c A |
1533 | #ifdef __APPLE__ |
1534 | if ((so->so_options & SO_WANTOOBFLAG) != 0) { | |
1535 | if (error == EWOULDBLOCK || error == EINVAL) { | |
1536 | /* | |
1537 | * Let's try to get normal data: | |
1538 | * EWOULDBLOCK: out-of-band data not receive yet; | |
1539 | * EINVAL: out-of-band data already read. | |
1540 | */ | |
1541 | error = 0; | |
1542 | goto nooob; | |
1543 | } else if (error == 0 && flagsp) | |
1544 | *flagsp |= MSG_OOB; | |
91447636 A |
1545 | } |
1546 | socket_unlock(so, 1); | |
1c79356b | 1547 | KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0); |
9bccf70c | 1548 | #endif |
1c79356b A |
1549 | return (error); |
1550 | } | |
1551 | nooob: | |
1552 | if (mp) | |
1553 | *mp = (struct mbuf *)0; | |
91447636 | 1554 | if (so->so_state & SS_ISCONFIRMING && uio_resid(uio)) |
1c79356b A |
1555 | (*pr->pr_usrreqs->pru_rcvd)(so, 0); |
1556 | ||
55e303ae A |
1557 | |
1558 | free_list = (struct mbuf *)0; | |
1559 | delayed_copy_len = 0; | |
1c79356b | 1560 | restart: |
91447636 A |
1561 | #ifdef MORE_LOCKING_DEBUG |
1562 | if (so->so_usecount <= 1) | |
1563 | printf("soreceive: sblock so=%x ref=%d on socket\n", so, so->so_usecount); | |
1564 | #endif | |
9bccf70c A |
1565 | error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); |
1566 | if (error) { | |
91447636 | 1567 | socket_unlock(so, 1); |
1c79356b A |
1568 | KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0); |
1569 | return (error); | |
1570 | } | |
1c79356b A |
1571 | |
1572 | m = so->so_rcv.sb_mb; | |
1573 | /* | |
1574 | * If we have less data than requested, block awaiting more | |
1575 | * (subject to any timeout) if: | |
1576 | * 1. the current count is less than the low water mark, or | |
1577 | * 2. MSG_WAITALL is set, and it is possible to do the entire | |
1578 | * receive operation at once if we block (resid <= hiwat). | |
1579 | * 3. MSG_DONTWAIT is not set | |
1580 | * If MSG_WAITALL is set but resid is larger than the receive buffer, | |
1581 | * we have to do the receive in sections, and thus risk returning | |
1582 | * a short count if a timeout or signal occurs after we start. | |
1583 | */ | |
1584 | if (m == 0 || (((flags & MSG_DONTWAIT) == 0 && | |
91447636 | 1585 | so->so_rcv.sb_cc < uio_resid(uio)) && |
55e303ae | 1586 | (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || |
91447636 | 1587 | ((flags & MSG_WAITALL) && uio_resid(uio) <= so->so_rcv.sb_hiwat)) && |
1c79356b | 1588 | m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) { |
55e303ae | 1589 | |
1c79356b A |
1590 | KASSERT(m != 0 || !so->so_rcv.sb_cc, ("receive 1")); |
1591 | if (so->so_error) { | |
1592 | if (m) | |
1593 | goto dontblock; | |
1594 | error = so->so_error; | |
1595 | if ((flags & MSG_PEEK) == 0) | |
1596 | so->so_error = 0; | |
1597 | goto release; | |
1598 | } | |
1599 | if (so->so_state & SS_CANTRCVMORE) { | |
1600 | if (m) | |
1601 | goto dontblock; | |
1602 | else | |
1603 | goto release; | |
1604 | } | |
1605 | for (; m; m = m->m_next) | |
1606 | if (m->m_type == MT_OOBDATA || (m->m_flags & M_EOR)) { | |
1607 | m = so->so_rcv.sb_mb; | |
1608 | goto dontblock; | |
1609 | } | |
1610 | if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 && | |
1611 | (so->so_proto->pr_flags & PR_CONNREQUIRED)) { | |
1612 | error = ENOTCONN; | |
1613 | goto release; | |
1614 | } | |
91447636 | 1615 | if (uio_resid(uio) == 0) |
1c79356b | 1616 | goto release; |
91447636 | 1617 | if ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO))) { |
1c79356b A |
1618 | error = EWOULDBLOCK; |
1619 | goto release; | |
1620 | } | |
91447636 A |
1621 | sbunlock(&so->so_rcv, 1); |
1622 | #ifdef EVEN_MORE_LOCKING_DEBUG | |
1c79356b A |
1623 | if (socket_debug) |
1624 | printf("Waiting for socket data\n"); | |
91447636 | 1625 | #endif |
55e303ae | 1626 | |
1c79356b | 1627 | error = sbwait(&so->so_rcv); |
91447636 | 1628 | #ifdef EVEN_MORE_LOCKING_DEBUG |
1c79356b A |
1629 | if (socket_debug) |
1630 | printf("SORECEIVE - sbwait returned %d\n", error); | |
91447636 A |
1631 | #endif |
1632 | if (so->so_usecount < 1) | |
1633 | panic("soreceive: after 2nd sblock so=%x ref=%d on socket\n", so, so->so_usecount); | |
9bccf70c | 1634 | if (error) { |
91447636 | 1635 | socket_unlock(so, 1); |
1c79356b A |
1636 | KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, error,0,0,0,0); |
1637 | return (error); | |
1638 | } | |
1639 | goto restart; | |
1640 | } | |
1641 | dontblock: | |
9bccf70c | 1642 | #ifndef __APPLE__ |
1c79356b A |
1643 | if (uio->uio_procp) |
1644 | uio->uio_procp->p_stats->p_ru.ru_msgrcv++; | |
55e303ae A |
1645 | #else /* __APPLE__ */ |
1646 | /* | |
1647 | * 2207985 | |
1648 | * This should be uio->uio-procp; however, some callers of this | |
1649 | * function use auto variables with stack garbage, and fail to | |
1650 | * fill out the uio structure properly. | |
1651 | */ | |
1652 | if (p) | |
1653 | p->p_stats->p_ru.ru_msgrcv++; | |
1654 | #endif /* __APPLE__ */ | |
1c79356b A |
1655 | nextrecord = m->m_nextpkt; |
1656 | if ((pr->pr_flags & PR_ADDR) && m->m_type == MT_SONAME) { | |
1657 | KASSERT(m->m_type == MT_SONAME, ("receive 1a")); | |
1658 | orig_resid = 0; | |
4a249263 | 1659 | if (psa) { |
1c79356b A |
1660 | *psa = dup_sockaddr(mtod(m, struct sockaddr *), |
1661 | mp0 == 0); | |
4a249263 A |
1662 | if ((*psa == 0) && (flags & MSG_NEEDSA)) { |
1663 | error = EWOULDBLOCK; | |
1664 | goto release; | |
1665 | } | |
1666 | } | |
1c79356b A |
1667 | if (flags & MSG_PEEK) { |
1668 | m = m->m_next; | |
1669 | } else { | |
1670 | sbfree(&so->so_rcv, m); | |
91447636 A |
1671 | if (m->m_next == 0 && so->so_rcv.sb_cc != 0) |
1672 | panic("soreceive: about to create invalid socketbuf"); | |
1c79356b A |
1673 | MFREE(m, so->so_rcv.sb_mb); |
1674 | m = so->so_rcv.sb_mb; | |
1675 | } | |
1676 | } | |
1677 | while (m && m->m_type == MT_CONTROL && error == 0) { | |
1678 | if (flags & MSG_PEEK) { | |
1679 | if (controlp) | |
1680 | *controlp = m_copy(m, 0, m->m_len); | |
1681 | m = m->m_next; | |
1682 | } else { | |
1683 | sbfree(&so->so_rcv, m); | |
1684 | if (controlp) { | |
1685 | if (pr->pr_domain->dom_externalize && | |
1686 | mtod(m, struct cmsghdr *)->cmsg_type == | |
91447636 A |
1687 | SCM_RIGHTS) { |
1688 | socket_unlock(so, 0); /* release socket lock: see 3903171 */ | |
1c79356b | 1689 | error = (*pr->pr_domain->dom_externalize)(m); |
91447636 A |
1690 | socket_lock(so, 0); |
1691 | } | |
1c79356b | 1692 | *controlp = m; |
91447636 A |
1693 | if (m->m_next == 0 && so->so_rcv.sb_cc != 0) |
1694 | panic("soreceive: so->so_rcv.sb_mb->m_next == 0 && so->so_rcv.sb_cc != 0"); | |
1c79356b A |
1695 | so->so_rcv.sb_mb = m->m_next; |
1696 | m->m_next = 0; | |
1697 | m = so->so_rcv.sb_mb; | |
1698 | } else { | |
1699 | MFREE(m, so->so_rcv.sb_mb); | |
1700 | m = so->so_rcv.sb_mb; | |
1701 | } | |
1702 | } | |
1703 | if (controlp) { | |
1704 | orig_resid = 0; | |
1705 | controlp = &(*controlp)->m_next; | |
1706 | } | |
1707 | } | |
1708 | if (m) { | |
1709 | if ((flags & MSG_PEEK) == 0) | |
1710 | m->m_nextpkt = nextrecord; | |
1711 | type = m->m_type; | |
1712 | if (type == MT_OOBDATA) | |
1713 | flags |= MSG_OOB; | |
1714 | } | |
1715 | moff = 0; | |
1716 | offset = 0; | |
fa4905b1 | 1717 | |
91447636 | 1718 | if (!(flags & MSG_PEEK) && uio_resid(uio) > sorecvmincopy) |
55e303ae A |
1719 | can_delay = 1; |
1720 | else | |
1721 | can_delay = 0; | |
1722 | ||
1723 | need_event = 0; | |
fa4905b1 | 1724 | |
91447636 | 1725 | while (m && (uio_resid(uio) - delayed_copy_len) > 0 && error == 0) { |
1c79356b A |
1726 | if (m->m_type == MT_OOBDATA) { |
1727 | if (type != MT_OOBDATA) | |
1728 | break; | |
1729 | } else if (type == MT_OOBDATA) | |
1730 | break; | |
9bccf70c | 1731 | #ifndef __APPLE__ |
1c79356b A |
1732 | /* |
1733 | * This assertion needs rework. The trouble is Appletalk is uses many | |
1734 | * mbuf types (NOT listed in mbuf.h!) which will trigger this panic. | |
1735 | * For now just remove the assertion... CSM 9/98 | |
1736 | */ | |
1737 | else | |
1738 | KASSERT(m->m_type == MT_DATA || m->m_type == MT_HEADER, | |
1739 | ("receive 3")); | |
9bccf70c A |
1740 | #else |
1741 | /* | |
1742 | * Make sure to allways set MSG_OOB event when getting | |
1743 | * out of band data inline. | |
1744 | */ | |
1c79356b | 1745 | if ((so->so_options & SO_WANTOOBFLAG) != 0 && |
9bccf70c A |
1746 | (so->so_options & SO_OOBINLINE) != 0 && |
1747 | (so->so_state & SS_RCVATMARK) != 0) { | |
1748 | flags |= MSG_OOB; | |
1749 | } | |
1750 | #endif | |
1c79356b | 1751 | so->so_state &= ~SS_RCVATMARK; |
91447636 A |
1752 | // LP64todo - fix this! |
1753 | len = uio_resid(uio) - delayed_copy_len; | |
1c79356b A |
1754 | if (so->so_oobmark && len > so->so_oobmark - offset) |
1755 | len = so->so_oobmark - offset; | |
1756 | if (len > m->m_len - moff) | |
1757 | len = m->m_len - moff; | |
1758 | /* | |
1759 | * If mp is set, just pass back the mbufs. | |
1760 | * Otherwise copy them out via the uio, then free. | |
1761 | * Sockbuf must be consistent here (points to current mbuf, | |
1762 | * it points to next record) when we drop priority; | |
1763 | * we must note any additions to the sockbuf when we | |
1764 | * block interrupts again. | |
1765 | */ | |
1766 | if (mp == 0) { | |
55e303ae A |
1767 | if (can_delay && len == m->m_len) { |
1768 | /* | |
1769 | * only delay the copy if we're consuming the | |
1770 | * mbuf and we're NOT in MSG_PEEK mode | |
1771 | * and we have enough data to make it worthwile | |
1772 | * to drop and retake the funnel... can_delay | |
1773 | * reflects the state of the 2 latter constraints | |
1774 | * moff should always be zero in these cases | |
1775 | */ | |
1776 | delayed_copy_len += len; | |
1777 | } else { | |
55e303ae A |
1778 | |
1779 | if (delayed_copy_len) { | |
91447636 | 1780 | error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len); |
55e303ae A |
1781 | |
1782 | if (error) { | |
55e303ae A |
1783 | goto release; |
1784 | } | |
1785 | if (m != so->so_rcv.sb_mb) { | |
1786 | /* | |
1787 | * can only get here if MSG_PEEK is not set | |
1788 | * therefore, m should point at the head of the rcv queue... | |
1789 | * if it doesn't, it means something drastically changed | |
1790 | * while we were out from behind the funnel in sodelayed_copy... | |
1791 | * perhaps a RST on the stream... in any event, the stream has | |
1792 | * been interrupted... it's probably best just to return | |
1793 | * whatever data we've moved and let the caller sort it out... | |
1794 | */ | |
1795 | break; | |
1796 | } | |
1797 | } | |
91447636 | 1798 | socket_unlock(so, 0); |
55e303ae | 1799 | error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio); |
91447636 | 1800 | socket_lock(so, 0); |
55e303ae | 1801 | |
55e303ae A |
1802 | if (error) |
1803 | goto release; | |
1804 | } | |
1c79356b | 1805 | } else |
91447636 | 1806 | uio_setresid(uio, (uio_resid(uio) - len)); |
55e303ae | 1807 | |
1c79356b A |
1808 | if (len == m->m_len - moff) { |
1809 | if (m->m_flags & M_EOR) | |
1810 | flags |= MSG_EOR; | |
1811 | if (flags & MSG_PEEK) { | |
1812 | m = m->m_next; | |
1813 | moff = 0; | |
1814 | } else { | |
1815 | nextrecord = m->m_nextpkt; | |
1816 | sbfree(&so->so_rcv, m); | |
91447636 | 1817 | m->m_nextpkt = NULL; |
55e303ae | 1818 | |
1c79356b A |
1819 | if (mp) { |
1820 | *mp = m; | |
1821 | mp = &m->m_next; | |
1822 | so->so_rcv.sb_mb = m = m->m_next; | |
1823 | *mp = (struct mbuf *)0; | |
1824 | } else { | |
55e303ae A |
1825 | if (free_list == NULL) |
1826 | free_list = m; | |
1827 | else | |
14353aa8 A |
1828 | ml->m_next = m; |
1829 | ml = m; | |
1830 | so->so_rcv.sb_mb = m = m->m_next; | |
1831 | ml->m_next = 0; | |
1c79356b A |
1832 | } |
1833 | if (m) | |
1834 | m->m_nextpkt = nextrecord; | |
1835 | } | |
1836 | } else { | |
1837 | if (flags & MSG_PEEK) | |
1838 | moff += len; | |
1839 | else { | |
1840 | if (mp) | |
1841 | *mp = m_copym(m, 0, len, M_WAIT); | |
1842 | m->m_data += len; | |
1843 | m->m_len -= len; | |
1844 | so->so_rcv.sb_cc -= len; | |
1845 | } | |
1846 | } | |
1847 | if (so->so_oobmark) { | |
1848 | if ((flags & MSG_PEEK) == 0) { | |
1849 | so->so_oobmark -= len; | |
1850 | if (so->so_oobmark == 0) { | |
1851 | so->so_state |= SS_RCVATMARK; | |
55e303ae A |
1852 | /* |
1853 | * delay posting the actual event until after | |
1854 | * any delayed copy processing has finished | |
1855 | */ | |
1856 | need_event = 1; | |
1c79356b A |
1857 | break; |
1858 | } | |
1859 | } else { | |
1860 | offset += len; | |
1861 | if (offset == so->so_oobmark) | |
1862 | break; | |
1863 | } | |
1864 | } | |
91447636 | 1865 | if (flags & MSG_EOR) |
1c79356b A |
1866 | break; |
1867 | /* | |
55e303ae | 1868 | * If the MSG_WAITALL or MSG_WAITSTREAM flag is set (for non-atomic socket), |
1c79356b A |
1869 | * we must not quit until "uio->uio_resid == 0" or an error |
1870 | * termination. If a signal/timeout occurs, return | |
1871 | * with a short count but without error. | |
1872 | * Keep sockbuf locked against other readers. | |
1873 | */ | |
91447636 | 1874 | while (flags & (MSG_WAITALL|MSG_WAITSTREAM) && m == 0 && (uio_resid(uio) - delayed_copy_len) > 0 && |
1c79356b A |
1875 | !sosendallatonce(so) && !nextrecord) { |
1876 | if (so->so_error || so->so_state & SS_CANTRCVMORE) | |
55e303ae | 1877 | goto release; |
fa4905b1 | 1878 | |
91447636 | 1879 | if (pr->pr_flags & PR_WANTRCVD && so->so_pcb && (((struct inpcb *)so->so_pcb)->inp_state != INPCB_STATE_DEAD)) |
55e303ae A |
1880 | (*pr->pr_usrreqs->pru_rcvd)(so, flags); |
1881 | if (sbwait(&so->so_rcv)) { | |
1882 | error = 0; | |
1883 | goto release; | |
fa4905b1 | 1884 | } |
55e303ae A |
1885 | /* |
1886 | * have to wait until after we get back from the sbwait to do the copy because | |
1887 | * we will drop the funnel if we have enough data that has been delayed... by dropping | |
1888 | * the funnel we open up a window allowing the netisr thread to process the incoming packets | |
1889 | * and to change the state of this socket... we're issuing the sbwait because | |
1890 | * the socket is empty and we're expecting the netisr thread to wake us up when more | |
1891 | * packets arrive... if we allow that processing to happen and then sbwait, we | |
1892 | * could stall forever with packets sitting in the socket if no further packets | |
1893 | * arrive from the remote side. | |
1894 | * | |
1895 | * we want to copy before we've collected all the data to satisfy this request to | |
1896 | * allow the copy to overlap the incoming packet processing on an MP system | |
1897 | */ | |
1898 | if (delayed_copy_len > sorecvmincopy && (delayed_copy_len > (so->so_rcv.sb_hiwat / 2))) { | |
1899 | ||
91447636 | 1900 | error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len); |
55e303ae A |
1901 | |
1902 | if (error) | |
1903 | goto release; | |
1c79356b A |
1904 | } |
1905 | m = so->so_rcv.sb_mb; | |
fa4905b1 | 1906 | if (m) { |
1c79356b | 1907 | nextrecord = m->m_nextpkt; |
fa4905b1 | 1908 | } |
1c79356b A |
1909 | } |
1910 | } | |
91447636 A |
1911 | #ifdef MORE_LOCKING_DEBUG |
1912 | if (so->so_usecount <= 1) | |
1913 | panic("soreceive: after big while so=%x ref=%d on socket\n", so, so->so_usecount); | |
1914 | #endif | |
1c79356b A |
1915 | |
1916 | if (m && pr->pr_flags & PR_ATOMIC) { | |
9bccf70c | 1917 | #ifdef __APPLE__ |
1c79356b A |
1918 | if (so->so_options & SO_DONTTRUNC) |
1919 | flags |= MSG_RCVMORE; | |
9bccf70c A |
1920 | else { |
1921 | #endif | |
1922 | flags |= MSG_TRUNC; | |
1c79356b A |
1923 | if ((flags & MSG_PEEK) == 0) |
1924 | (void) sbdroprecord(&so->so_rcv); | |
9bccf70c | 1925 | #ifdef __APPLE__ |
1c79356b | 1926 | } |
9bccf70c | 1927 | #endif |
1c79356b A |
1928 | } |
1929 | if ((flags & MSG_PEEK) == 0) { | |
1930 | if (m == 0) | |
1931 | so->so_rcv.sb_mb = nextrecord; | |
1932 | if (pr->pr_flags & PR_WANTRCVD && so->so_pcb) | |
1933 | (*pr->pr_usrreqs->pru_rcvd)(so, flags); | |
1934 | } | |
9bccf70c | 1935 | #ifdef __APPLE__ |
1c79356b A |
1936 | if ((so->so_options & SO_WANTMORE) && so->so_rcv.sb_cc > 0) |
1937 | flags |= MSG_HAVEMORE; | |
55e303ae A |
1938 | |
1939 | if (delayed_copy_len) { | |
91447636 | 1940 | error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len); |
55e303ae A |
1941 | |
1942 | if (error) | |
1943 | goto release; | |
1944 | } | |
1945 | if (free_list) { | |
1946 | m_freem_list((struct mbuf *)free_list); | |
1947 | free_list = (struct mbuf *)0; | |
1948 | } | |
1949 | if (need_event) | |
1950 | postevent(so, 0, EV_OOB); | |
9bccf70c | 1951 | #endif |
91447636 | 1952 | if (orig_resid == uio_resid(uio) && orig_resid && |
1c79356b | 1953 | (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) { |
91447636 | 1954 | sbunlock(&so->so_rcv, 1); |
1c79356b A |
1955 | goto restart; |
1956 | } | |
1957 | ||
1958 | if (flagsp) | |
1959 | *flagsp |= flags; | |
1960 | release: | |
91447636 A |
1961 | #ifdef MORE_LOCKING_DEBUG |
1962 | if (so->so_usecount <= 1) | |
1963 | panic("soreceive: release so=%x ref=%d on socket\n", so, so->so_usecount); | |
1964 | #endif | |
55e303ae | 1965 | if (delayed_copy_len) { |
91447636 | 1966 | error = sodelayed_copy(so, uio, &free_list, &delayed_copy_len); |
55e303ae A |
1967 | } |
1968 | if (free_list) { | |
1969 | m_freem_list((struct mbuf *)free_list); | |
1970 | } | |
91447636 | 1971 | sbunlock(&so->so_rcv, 0); /* will unlock socket */ |
1c79356b | 1972 | |
91447636 | 1973 | // LP64todo - fix this! |
1c79356b A |
1974 | KERNEL_DEBUG(DBG_FNC_SORECEIVE | DBG_FUNC_END, |
1975 | so, | |
91447636 | 1976 | uio_resid(uio), |
1c79356b A |
1977 | so->so_rcv.sb_cc, |
1978 | 0, | |
1979 | error); | |
1980 | ||
1981 | return (error); | |
1982 | } | |
1983 | ||
55e303ae | 1984 | |
91447636 | 1985 | static int sodelayed_copy(struct socket *so, struct uio *uio, struct mbuf **free_list, int *resid) |
55e303ae A |
1986 | { |
1987 | int error = 0; | |
55e303ae A |
1988 | struct mbuf *m; |
1989 | ||
1990 | m = *free_list; | |
1991 | ||
91447636 | 1992 | socket_unlock(so, 0); |
55e303ae | 1993 | |
55e303ae A |
1994 | while (m && error == 0) { |
1995 | ||
1996 | error = uiomove(mtod(m, caddr_t), (int)m->m_len, uio); | |
1997 | ||
1998 | m = m->m_next; | |
1999 | } | |
2000 | m_freem_list(*free_list); | |
2001 | ||
2002 | *free_list = (struct mbuf *)NULL; | |
2003 | *resid = 0; | |
2004 | ||
91447636 | 2005 | socket_lock(so, 0); |
55e303ae A |
2006 | |
2007 | return (error); | |
2008 | } | |
2009 | ||
2010 | ||
1c79356b A |
2011 | int |
2012 | soshutdown(so, how) | |
2013 | register struct socket *so; | |
2014 | register int how; | |
2015 | { | |
2016 | register struct protosw *pr = so->so_proto; | |
1c79356b A |
2017 | int ret; |
2018 | ||
91447636 A |
2019 | socket_lock(so, 1); |
2020 | ||
2021 | sflt_notify(so, sock_evt_shutdown, &how); | |
1c79356b | 2022 | |
9bccf70c | 2023 | if (how != SHUT_WR) { |
1c79356b A |
2024 | sorflush(so); |
2025 | postevent(so, 0, EV_RCLOSED); | |
2026 | } | |
9bccf70c | 2027 | if (how != SHUT_RD) { |
1c79356b A |
2028 | ret = ((*pr->pr_usrreqs->pru_shutdown)(so)); |
2029 | postevent(so, 0, EV_WCLOSED); | |
2030 | KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0); | |
91447636 | 2031 | socket_unlock(so, 1); |
1c79356b A |
2032 | return(ret); |
2033 | } | |
2034 | ||
2035 | KERNEL_DEBUG(DBG_FNC_SOSHUTDOWN | DBG_FUNC_END, 0,0,0,0,0); | |
91447636 | 2036 | socket_unlock(so, 1); |
1c79356b A |
2037 | return (0); |
2038 | } | |
2039 | ||
2040 | void | |
2041 | sorflush(so) | |
2042 | register struct socket *so; | |
2043 | { | |
2044 | register struct sockbuf *sb = &so->so_rcv; | |
2045 | register struct protosw *pr = so->so_proto; | |
1c79356b | 2046 | struct sockbuf asb; |
1c79356b | 2047 | |
91447636 A |
2048 | #ifdef MORE_LOCKING_DEBUG |
2049 | lck_mtx_t * mutex_held; | |
2050 | ||
2051 | if (so->so_proto->pr_getlock != NULL) | |
2052 | mutex_held = (*so->so_proto->pr_getlock)(so, 0); | |
2053 | else | |
2054 | mutex_held = so->so_proto->pr_domain->dom_mtx; | |
2055 | lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); | |
2056 | #endif | |
2057 | ||
2058 | sflt_notify(so, sock_evt_flush_read, NULL); | |
1c79356b A |
2059 | |
2060 | sb->sb_flags |= SB_NOINTR; | |
2061 | (void) sblock(sb, M_WAIT); | |
1c79356b | 2062 | socantrcvmore(so); |
91447636 | 2063 | sbunlock(sb, 1); |
9bccf70c | 2064 | #ifdef __APPLE__ |
0b4e3aa0 | 2065 | selthreadclear(&sb->sb_sel); |
9bccf70c | 2066 | #endif |
1c79356b A |
2067 | asb = *sb; |
2068 | bzero((caddr_t)sb, sizeof (*sb)); | |
91447636 | 2069 | sb->sb_so = so; /* reestablish link to socket */ |
9bccf70c A |
2070 | if (asb.sb_flags & SB_KNOTE) { |
2071 | sb->sb_sel.si_note = asb.sb_sel.si_note; | |
2072 | sb->sb_flags = SB_KNOTE; | |
2073 | } | |
1c79356b A |
2074 | if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose) |
2075 | (*pr->pr_domain->dom_dispose)(asb.sb_mb); | |
2076 | sbrelease(&asb); | |
2077 | } | |
2078 | ||
2079 | /* | |
2080 | * Perhaps this routine, and sooptcopyout(), below, ought to come in | |
2081 | * an additional variant to handle the case where the option value needs | |
2082 | * to be some kind of integer, but not a specific size. | |
2083 | * In addition to their use here, these functions are also called by the | |
2084 | * protocol-level pr_ctloutput() routines. | |
2085 | */ | |
2086 | int | |
2087 | sooptcopyin(sopt, buf, len, minlen) | |
2088 | struct sockopt *sopt; | |
2089 | void *buf; | |
2090 | size_t len; | |
2091 | size_t minlen; | |
2092 | { | |
2093 | size_t valsize; | |
2094 | ||
2095 | /* | |
2096 | * If the user gives us more than we wanted, we ignore it, | |
2097 | * but if we don't get the minimum length the caller | |
2098 | * wants, we return EINVAL. On success, sopt->sopt_valsize | |
2099 | * is set to however much we actually retrieved. | |
2100 | */ | |
2101 | if ((valsize = sopt->sopt_valsize) < minlen) | |
2102 | return EINVAL; | |
2103 | if (valsize > len) | |
2104 | sopt->sopt_valsize = valsize = len; | |
2105 | ||
2106 | if (sopt->sopt_p != 0) | |
2107 | return (copyin(sopt->sopt_val, buf, valsize)); | |
2108 | ||
91447636 | 2109 | bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), buf, valsize); |
1c79356b A |
2110 | return 0; |
2111 | } | |
2112 | ||
2113 | int | |
2114 | sosetopt(so, sopt) | |
2115 | struct socket *so; | |
2116 | struct sockopt *sopt; | |
2117 | { | |
2118 | int error, optval; | |
2119 | struct linger l; | |
2120 | struct timeval tv; | |
2121 | short val; | |
91447636 A |
2122 | |
2123 | socket_lock(so, 1); | |
1c79356b | 2124 | |
9bccf70c A |
2125 | if (sopt->sopt_dir != SOPT_SET) { |
2126 | sopt->sopt_dir = SOPT_SET; | |
2127 | } | |
2128 | ||
91447636 A |
2129 | { |
2130 | struct socket_filter_entry *filter; | |
2131 | int filtered = 0; | |
2132 | error = 0; | |
2133 | for (filter = so->so_filt; filter && (error == 0); | |
2134 | filter = filter->sfe_next_onsocket) { | |
2135 | if (filter->sfe_filter->sf_filter.sf_setoption) { | |
2136 | if (filtered == 0) { | |
2137 | filtered = 1; | |
2138 | sflt_use(so); | |
2139 | socket_unlock(so, 0); | |
2140 | } | |
2141 | error = filter->sfe_filter->sf_filter.sf_setoption( | |
2142 | filter->sfe_cookie, so, sopt); | |
2143 | } | |
2144 | } | |
2145 | ||
2146 | if (filtered != 0) { | |
2147 | socket_lock(so, 0); | |
2148 | sflt_unuse(so); | |
2149 | ||
2150 | if (error) { | |
2151 | if (error == EJUSTRETURN) | |
2152 | error = 0; | |
2153 | goto bad; | |
2154 | } | |
1c79356b | 2155 | } |
1c79356b A |
2156 | } |
2157 | ||
2158 | error = 0; | |
2159 | if (sopt->sopt_level != SOL_SOCKET) { | |
91447636 A |
2160 | if (so->so_proto && so->so_proto->pr_ctloutput) { |
2161 | error = (*so->so_proto->pr_ctloutput) | |
2162 | (so, sopt); | |
2163 | socket_unlock(so, 1); | |
2164 | return (error); | |
2165 | } | |
1c79356b A |
2166 | error = ENOPROTOOPT; |
2167 | } else { | |
2168 | switch (sopt->sopt_name) { | |
2169 | case SO_LINGER: | |
91447636 | 2170 | case SO_LINGER_SEC: |
1c79356b A |
2171 | error = sooptcopyin(sopt, &l, sizeof l, sizeof l); |
2172 | if (error) | |
2173 | goto bad; | |
2174 | ||
91447636 | 2175 | so->so_linger = (sopt->sopt_name == SO_LINGER) ? l.l_linger : l.l_linger * hz; |
1c79356b A |
2176 | if (l.l_onoff) |
2177 | so->so_options |= SO_LINGER; | |
2178 | else | |
2179 | so->so_options &= ~SO_LINGER; | |
2180 | break; | |
2181 | ||
2182 | case SO_DEBUG: | |
2183 | case SO_KEEPALIVE: | |
2184 | case SO_DONTROUTE: | |
2185 | case SO_USELOOPBACK: | |
2186 | case SO_BROADCAST: | |
2187 | case SO_REUSEADDR: | |
2188 | case SO_REUSEPORT: | |
2189 | case SO_OOBINLINE: | |
2190 | case SO_TIMESTAMP: | |
9bccf70c | 2191 | #ifdef __APPLE__ |
1c79356b A |
2192 | case SO_DONTTRUNC: |
2193 | case SO_WANTMORE: | |
9bccf70c A |
2194 | case SO_WANTOOBFLAG: |
2195 | #endif | |
1c79356b A |
2196 | error = sooptcopyin(sopt, &optval, sizeof optval, |
2197 | sizeof optval); | |
2198 | if (error) | |
2199 | goto bad; | |
2200 | if (optval) | |
2201 | so->so_options |= sopt->sopt_name; | |
2202 | else | |
2203 | so->so_options &= ~sopt->sopt_name; | |
2204 | break; | |
2205 | ||
2206 | case SO_SNDBUF: | |
2207 | case SO_RCVBUF: | |
2208 | case SO_SNDLOWAT: | |
2209 | case SO_RCVLOWAT: | |
2210 | error = sooptcopyin(sopt, &optval, sizeof optval, | |
2211 | sizeof optval); | |
2212 | if (error) | |
2213 | goto bad; | |
2214 | ||
2215 | /* | |
2216 | * Values < 1 make no sense for any of these | |
2217 | * options, so disallow them. | |
2218 | */ | |
2219 | if (optval < 1) { | |
2220 | error = EINVAL; | |
2221 | goto bad; | |
2222 | } | |
2223 | ||
2224 | switch (sopt->sopt_name) { | |
2225 | case SO_SNDBUF: | |
2226 | case SO_RCVBUF: | |
2227 | if (sbreserve(sopt->sopt_name == SO_SNDBUF ? | |
2228 | &so->so_snd : &so->so_rcv, | |
2229 | (u_long) optval) == 0) { | |
2230 | error = ENOBUFS; | |
2231 | goto bad; | |
2232 | } | |
2233 | break; | |
2234 | ||
2235 | /* | |
2236 | * Make sure the low-water is never greater than | |
2237 | * the high-water. | |
2238 | */ | |
2239 | case SO_SNDLOWAT: | |
2240 | so->so_snd.sb_lowat = | |
2241 | (optval > so->so_snd.sb_hiwat) ? | |
2242 | so->so_snd.sb_hiwat : optval; | |
2243 | break; | |
2244 | case SO_RCVLOWAT: | |
2245 | so->so_rcv.sb_lowat = | |
2246 | (optval > so->so_rcv.sb_hiwat) ? | |
2247 | so->so_rcv.sb_hiwat : optval; | |
2248 | break; | |
2249 | } | |
2250 | break; | |
2251 | ||
2252 | case SO_SNDTIMEO: | |
2253 | case SO_RCVTIMEO: | |
2254 | error = sooptcopyin(sopt, &tv, sizeof tv, | |
2255 | sizeof tv); | |
2256 | if (error) | |
2257 | goto bad; | |
2258 | ||
91447636 | 2259 | if (tv.tv_sec < 0 || tv.tv_sec > LONG_MAX || |
9bccf70c A |
2260 | tv.tv_usec < 0 || tv.tv_usec >= 1000000) { |
2261 | error = EDOM; | |
2262 | goto bad; | |
2263 | } | |
91447636 | 2264 | |
1c79356b A |
2265 | switch (sopt->sopt_name) { |
2266 | case SO_SNDTIMEO: | |
91447636 | 2267 | so->so_snd.sb_timeo = tv; |
1c79356b A |
2268 | break; |
2269 | case SO_RCVTIMEO: | |
91447636 | 2270 | so->so_rcv.sb_timeo = tv; |
1c79356b A |
2271 | break; |
2272 | } | |
2273 | break; | |
2274 | ||
2275 | case SO_NKE: | |
9bccf70c A |
2276 | { |
2277 | struct so_nke nke; | |
1c79356b | 2278 | |
9bccf70c A |
2279 | error = sooptcopyin(sopt, &nke, |
2280 | sizeof nke, sizeof nke); | |
1c79356b A |
2281 | if (error) |
2282 | goto bad; | |
2283 | ||
91447636 | 2284 | error = sflt_attach_private(so, NULL, nke.nke_handle, 1); |
1c79356b A |
2285 | break; |
2286 | } | |
2287 | ||
9bccf70c A |
2288 | case SO_NOSIGPIPE: |
2289 | error = sooptcopyin(sopt, &optval, sizeof optval, | |
2290 | sizeof optval); | |
2291 | if (error) | |
2292 | goto bad; | |
2293 | if (optval) | |
2294 | so->so_flags |= SOF_NOSIGPIPE; | |
2295 | else | |
2296 | so->so_flags &= ~SOF_NOSIGPIPE; | |
2297 | ||
2298 | break; | |
2299 | ||
55e303ae A |
2300 | case SO_NOADDRERR: |
2301 | error = sooptcopyin(sopt, &optval, sizeof optval, | |
2302 | sizeof optval); | |
2303 | if (error) | |
2304 | goto bad; | |
2305 | if (optval) | |
2306 | so->so_flags |= SOF_NOADDRAVAIL; | |
2307 | else | |
2308 | so->so_flags &= ~SOF_NOADDRAVAIL; | |
2309 | ||
2310 | break; | |
2311 | ||
1c79356b A |
2312 | default: |
2313 | error = ENOPROTOOPT; | |
2314 | break; | |
2315 | } | |
2316 | if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) { | |
2317 | (void) ((*so->so_proto->pr_ctloutput) | |
2318 | (so, sopt)); | |
2319 | } | |
2320 | } | |
2321 | bad: | |
91447636 | 2322 | socket_unlock(so, 1); |
1c79356b A |
2323 | return (error); |
2324 | } | |
2325 | ||
2326 | /* Helper routine for getsockopt */ | |
2327 | int | |
2328 | sooptcopyout(sopt, buf, len) | |
2329 | struct sockopt *sopt; | |
2330 | void *buf; | |
2331 | size_t len; | |
2332 | { | |
2333 | int error; | |
2334 | size_t valsize; | |
2335 | ||
2336 | error = 0; | |
2337 | ||
2338 | /* | |
2339 | * Documented get behavior is that we always return a value, | |
2340 | * possibly truncated to fit in the user's buffer. | |
2341 | * Traditional behavior is that we always tell the user | |
2342 | * precisely how much we copied, rather than something useful | |
2343 | * like the total amount we had available for her. | |
2344 | * Note that this interface is not idempotent; the entire answer must | |
2345 | * generated ahead of time. | |
2346 | */ | |
2347 | valsize = min(len, sopt->sopt_valsize); | |
2348 | sopt->sopt_valsize = valsize; | |
91447636 | 2349 | if (sopt->sopt_val != USER_ADDR_NULL) { |
1c79356b A |
2350 | if (sopt->sopt_p != 0) |
2351 | error = copyout(buf, sopt->sopt_val, valsize); | |
2352 | else | |
91447636 | 2353 | bcopy(buf, CAST_DOWN(caddr_t, sopt->sopt_val), valsize); |
1c79356b A |
2354 | } |
2355 | return error; | |
2356 | } | |
2357 | ||
2358 | int | |
2359 | sogetopt(so, sopt) | |
2360 | struct socket *so; | |
2361 | struct sockopt *sopt; | |
2362 | { | |
2363 | int error, optval; | |
2364 | struct linger l; | |
2365 | struct timeval tv; | |
1c79356b | 2366 | |
9bccf70c A |
2367 | if (sopt->sopt_dir != SOPT_GET) { |
2368 | sopt->sopt_dir = SOPT_GET; | |
2369 | } | |
2370 | ||
91447636 A |
2371 | socket_lock(so, 1); |
2372 | ||
2373 | { | |
2374 | struct socket_filter_entry *filter; | |
2375 | int filtered = 0; | |
2376 | error = 0; | |
2377 | for (filter = so->so_filt; filter && (error == 0); | |
2378 | filter = filter->sfe_next_onsocket) { | |
2379 | if (filter->sfe_filter->sf_filter.sf_getoption) { | |
2380 | if (filtered == 0) { | |
2381 | filtered = 1; | |
2382 | sflt_use(so); | |
2383 | socket_unlock(so, 0); | |
2384 | } | |
2385 | error = filter->sfe_filter->sf_filter.sf_getoption( | |
2386 | filter->sfe_cookie, so, sopt); | |
2387 | } | |
2388 | } | |
2389 | if (filtered != 0) { | |
2390 | socket_lock(so, 0); | |
2391 | sflt_unuse(so); | |
2392 | ||
2393 | if (error) { | |
2394 | if (error == EJUSTRETURN) | |
2395 | error = 0; | |
2396 | socket_unlock(so, 1); | |
2397 | return error; | |
2398 | } | |
1c79356b | 2399 | } |
1c79356b A |
2400 | } |
2401 | ||
2402 | error = 0; | |
2403 | if (sopt->sopt_level != SOL_SOCKET) { | |
2404 | if (so->so_proto && so->so_proto->pr_ctloutput) { | |
91447636 A |
2405 | error = (*so->so_proto->pr_ctloutput) |
2406 | (so, sopt); | |
2407 | socket_unlock(so, 1); | |
2408 | return (error); | |
2409 | } else { | |
2410 | socket_unlock(so, 1); | |
1c79356b | 2411 | return (ENOPROTOOPT); |
91447636 | 2412 | } |
1c79356b A |
2413 | } else { |
2414 | switch (sopt->sopt_name) { | |
2415 | case SO_LINGER: | |
91447636 | 2416 | case SO_LINGER_SEC: |
1c79356b | 2417 | l.l_onoff = so->so_options & SO_LINGER; |
91447636 A |
2418 | l.l_linger = (sopt->sopt_name == SO_LINGER) ? so->so_linger : |
2419 | so->so_linger / hz; | |
1c79356b A |
2420 | error = sooptcopyout(sopt, &l, sizeof l); |
2421 | break; | |
2422 | ||
2423 | case SO_USELOOPBACK: | |
2424 | case SO_DONTROUTE: | |
2425 | case SO_DEBUG: | |
2426 | case SO_KEEPALIVE: | |
2427 | case SO_REUSEADDR: | |
2428 | case SO_REUSEPORT: | |
2429 | case SO_BROADCAST: | |
2430 | case SO_OOBINLINE: | |
2431 | case SO_TIMESTAMP: | |
9bccf70c | 2432 | #ifdef __APPLE__ |
1c79356b A |
2433 | case SO_DONTTRUNC: |
2434 | case SO_WANTMORE: | |
9bccf70c A |
2435 | case SO_WANTOOBFLAG: |
2436 | #endif | |
1c79356b A |
2437 | optval = so->so_options & sopt->sopt_name; |
2438 | integer: | |
2439 | error = sooptcopyout(sopt, &optval, sizeof optval); | |
2440 | break; | |
2441 | ||
2442 | case SO_TYPE: | |
2443 | optval = so->so_type; | |
2444 | goto integer; | |
2445 | ||
9bccf70c | 2446 | #ifdef __APPLE__ |
1c79356b | 2447 | case SO_NREAD: |
9bccf70c A |
2448 | { |
2449 | int pkt_total; | |
1c79356b A |
2450 | struct mbuf *m1; |
2451 | ||
2452 | pkt_total = 0; | |
2453 | m1 = so->so_rcv.sb_mb; | |
2454 | if (so->so_proto->pr_flags & PR_ATOMIC) | |
2455 | { | |
9bccf70c A |
2456 | while (m1) { |
2457 | if (m1->m_type == MT_DATA) | |
1c79356b | 2458 | pkt_total += m1->m_len; |
1c79356b A |
2459 | m1 = m1->m_next; |
2460 | } | |
2461 | optval = pkt_total; | |
2462 | } else | |
2463 | optval = so->so_rcv.sb_cc; | |
1c79356b A |
2464 | goto integer; |
2465 | } | |
91447636 A |
2466 | case SO_NWRITE: |
2467 | optval = so->so_snd.sb_cc; | |
2468 | goto integer; | |
9bccf70c | 2469 | #endif |
1c79356b A |
2470 | case SO_ERROR: |
2471 | optval = so->so_error; | |
2472 | so->so_error = 0; | |
2473 | goto integer; | |
2474 | ||
2475 | case SO_SNDBUF: | |
2476 | optval = so->so_snd.sb_hiwat; | |
2477 | goto integer; | |
2478 | ||
2479 | case SO_RCVBUF: | |
2480 | optval = so->so_rcv.sb_hiwat; | |
2481 | goto integer; | |
2482 | ||
2483 | case SO_SNDLOWAT: | |
2484 | optval = so->so_snd.sb_lowat; | |
2485 | goto integer; | |
2486 | ||
2487 | case SO_RCVLOWAT: | |
2488 | optval = so->so_rcv.sb_lowat; | |
2489 | goto integer; | |
2490 | ||
2491 | case SO_SNDTIMEO: | |
2492 | case SO_RCVTIMEO: | |
91447636 | 2493 | tv = (sopt->sopt_name == SO_SNDTIMEO ? |
1c79356b A |
2494 | so->so_snd.sb_timeo : so->so_rcv.sb_timeo); |
2495 | ||
1c79356b A |
2496 | error = sooptcopyout(sopt, &tv, sizeof tv); |
2497 | break; | |
2498 | ||
91447636 A |
2499 | case SO_NOSIGPIPE: |
2500 | optval = (so->so_flags & SOF_NOSIGPIPE); | |
2501 | goto integer; | |
9bccf70c | 2502 | |
55e303ae | 2503 | case SO_NOADDRERR: |
91447636 A |
2504 | optval = (so->so_flags & SOF_NOADDRAVAIL); |
2505 | goto integer; | |
55e303ae | 2506 | |
1c79356b A |
2507 | default: |
2508 | error = ENOPROTOOPT; | |
2509 | break; | |
2510 | } | |
91447636 | 2511 | socket_unlock(so, 1); |
1c79356b A |
2512 | return (error); |
2513 | } | |
2514 | } | |
2515 | ||
9bccf70c | 2516 | /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ |
1c79356b | 2517 | int |
9bccf70c | 2518 | soopt_getm(struct sockopt *sopt, struct mbuf **mp) |
1c79356b A |
2519 | { |
2520 | struct mbuf *m, *m_prev; | |
2521 | int sopt_size = sopt->sopt_valsize; | |
2522 | ||
a3d08fcd A |
2523 | if (sopt_size > MAX_SOOPTGETM_SIZE) |
2524 | return EMSGSIZE; | |
2525 | ||
1c79356b A |
2526 | MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA); |
2527 | if (m == 0) | |
2528 | return ENOBUFS; | |
2529 | if (sopt_size > MLEN) { | |
2530 | MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT); | |
2531 | if ((m->m_flags & M_EXT) == 0) { | |
2532 | m_free(m); | |
2533 | return ENOBUFS; | |
2534 | } | |
2535 | m->m_len = min(MCLBYTES, sopt_size); | |
2536 | } else { | |
2537 | m->m_len = min(MLEN, sopt_size); | |
2538 | } | |
2539 | sopt_size -= m->m_len; | |
2540 | *mp = m; | |
2541 | m_prev = m; | |
2542 | ||
2543 | while (sopt_size) { | |
2544 | MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_DATA); | |
2545 | if (m == 0) { | |
2546 | m_freem(*mp); | |
2547 | return ENOBUFS; | |
2548 | } | |
2549 | if (sopt_size > MLEN) { | |
2550 | MCLGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT); | |
2551 | if ((m->m_flags & M_EXT) == 0) { | |
2552 | m_freem(*mp); | |
2553 | return ENOBUFS; | |
2554 | } | |
2555 | m->m_len = min(MCLBYTES, sopt_size); | |
2556 | } else { | |
2557 | m->m_len = min(MLEN, sopt_size); | |
2558 | } | |
2559 | sopt_size -= m->m_len; | |
2560 | m_prev->m_next = m; | |
2561 | m_prev = m; | |
2562 | } | |
2563 | return 0; | |
2564 | } | |
2565 | ||
2566 | /* XXX; copyin sopt data into mbuf chain for (__FreeBSD__ < 3) routines. */ | |
2567 | int | |
9bccf70c | 2568 | soopt_mcopyin(struct sockopt *sopt, struct mbuf *m) |
1c79356b A |
2569 | { |
2570 | struct mbuf *m0 = m; | |
2571 | ||
91447636 | 2572 | if (sopt->sopt_val == USER_ADDR_NULL) |
1c79356b A |
2573 | return 0; |
2574 | while (m != NULL && sopt->sopt_valsize >= m->m_len) { | |
2575 | if (sopt->sopt_p != NULL) { | |
2576 | int error; | |
2577 | ||
91447636 | 2578 | error = copyin(sopt->sopt_val, mtod(m, char *), m->m_len); |
1c79356b A |
2579 | if (error != 0) { |
2580 | m_freem(m0); | |
2581 | return(error); | |
2582 | } | |
2583 | } else | |
91447636 | 2584 | bcopy(CAST_DOWN(caddr_t, sopt->sopt_val), mtod(m, char *), m->m_len); |
1c79356b | 2585 | sopt->sopt_valsize -= m->m_len; |
91447636 | 2586 | sopt->sopt_val += m->m_len; |
1c79356b A |
2587 | m = m->m_next; |
2588 | } | |
2589 | if (m != NULL) /* should be allocated enoughly at ip6_sooptmcopyin() */ | |
9bccf70c | 2590 | panic("soopt_mcopyin"); |
1c79356b A |
2591 | return 0; |
2592 | } | |
2593 | ||
2594 | /* XXX; copyout mbuf chain data into soopt for (__FreeBSD__ < 3) routines. */ | |
2595 | int | |
9bccf70c | 2596 | soopt_mcopyout(struct sockopt *sopt, struct mbuf *m) |
1c79356b A |
2597 | { |
2598 | struct mbuf *m0 = m; | |
2599 | size_t valsize = 0; | |
2600 | ||
91447636 | 2601 | if (sopt->sopt_val == USER_ADDR_NULL) |
1c79356b A |
2602 | return 0; |
2603 | while (m != NULL && sopt->sopt_valsize >= m->m_len) { | |
2604 | if (sopt->sopt_p != NULL) { | |
2605 | int error; | |
2606 | ||
91447636 | 2607 | error = copyout(mtod(m, char *), sopt->sopt_val, m->m_len); |
1c79356b A |
2608 | if (error != 0) { |
2609 | m_freem(m0); | |
2610 | return(error); | |
2611 | } | |
2612 | } else | |
91447636 | 2613 | bcopy(mtod(m, char *), CAST_DOWN(caddr_t, sopt->sopt_val), m->m_len); |
1c79356b | 2614 | sopt->sopt_valsize -= m->m_len; |
91447636 | 2615 | sopt->sopt_val += m->m_len; |
1c79356b A |
2616 | valsize += m->m_len; |
2617 | m = m->m_next; | |
2618 | } | |
2619 | if (m != NULL) { | |
2620 | /* enough soopt buffer should be given from user-land */ | |
2621 | m_freem(m0); | |
2622 | return(EINVAL); | |
2623 | } | |
2624 | sopt->sopt_valsize = valsize; | |
2625 | return 0; | |
2626 | } | |
2627 | ||
9bccf70c A |
2628 | void |
2629 | sohasoutofband(so) | |
2630 | register struct socket *so; | |
2631 | { | |
2632 | struct proc *p; | |
9bccf70c | 2633 | |
9bccf70c A |
2634 | if (so->so_pgid < 0) |
2635 | gsignal(-so->so_pgid, SIGURG); | |
2636 | else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0) | |
2637 | psignal(p, SIGURG); | |
2638 | selwakeup(&so->so_rcv.sb_sel); | |
2639 | } | |
2640 | ||
2641 | int | |
91447636 | 2642 | sopoll(struct socket *so, int events, __unused kauth_cred_t cred, void * wql) |
9bccf70c A |
2643 | { |
2644 | struct proc *p = current_proc(); | |
2645 | int revents = 0; | |
91447636 A |
2646 | |
2647 | socket_lock(so, 1); | |
9bccf70c A |
2648 | |
2649 | if (events & (POLLIN | POLLRDNORM)) | |
2650 | if (soreadable(so)) | |
2651 | revents |= events & (POLLIN | POLLRDNORM); | |
2652 | ||
2653 | if (events & (POLLOUT | POLLWRNORM)) | |
2654 | if (sowriteable(so)) | |
2655 | revents |= events & (POLLOUT | POLLWRNORM); | |
2656 | ||
2657 | if (events & (POLLPRI | POLLRDBAND)) | |
2658 | if (so->so_oobmark || (so->so_state & SS_RCVATMARK)) | |
2659 | revents |= events & (POLLPRI | POLLRDBAND); | |
2660 | ||
2661 | if (revents == 0) { | |
2662 | if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { | |
2663 | /* Darwin sets the flag first, BSD calls selrecord first */ | |
2664 | so->so_rcv.sb_flags |= SB_SEL; | |
2665 | selrecord(p, &so->so_rcv.sb_sel, wql); | |
2666 | } | |
2667 | ||
2668 | if (events & (POLLOUT | POLLWRNORM)) { | |
2669 | /* Darwin sets the flag first, BSD calls selrecord first */ | |
2670 | so->so_snd.sb_flags |= SB_SEL; | |
2671 | selrecord(p, &so->so_snd.sb_sel, wql); | |
2672 | } | |
2673 | } | |
2674 | ||
91447636 | 2675 | socket_unlock(so, 1); |
9bccf70c A |
2676 | return (revents); |
2677 | } | |
55e303ae | 2678 | |
91447636 | 2679 | int soo_kqfilter(struct fileproc *fp, struct knote *kn, struct proc *p); |
55e303ae A |
2680 | |
2681 | int | |
91447636 | 2682 | soo_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused struct proc *p) |
55e303ae | 2683 | { |
91447636 | 2684 | struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; |
55e303ae | 2685 | struct sockbuf *sb; |
91447636 | 2686 | socket_lock(so, 1); |
55e303ae A |
2687 | |
2688 | switch (kn->kn_filter) { | |
2689 | case EVFILT_READ: | |
2690 | if (so->so_options & SO_ACCEPTCONN) | |
2691 | kn->kn_fop = &solisten_filtops; | |
2692 | else | |
2693 | kn->kn_fop = &soread_filtops; | |
2694 | sb = &so->so_rcv; | |
2695 | break; | |
2696 | case EVFILT_WRITE: | |
2697 | kn->kn_fop = &sowrite_filtops; | |
2698 | sb = &so->so_snd; | |
2699 | break; | |
2700 | default: | |
91447636 | 2701 | socket_unlock(so, 1); |
55e303ae A |
2702 | return (1); |
2703 | } | |
2704 | ||
55e303ae A |
2705 | if (KNOTE_ATTACH(&sb->sb_sel.si_note, kn)) |
2706 | sb->sb_flags |= SB_KNOTE; | |
91447636 | 2707 | socket_unlock(so, 1); |
55e303ae A |
2708 | return (0); |
2709 | } | |
2710 | ||
2711 | static void | |
2712 | filt_sordetach(struct knote *kn) | |
2713 | { | |
91447636 | 2714 | struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; |
55e303ae | 2715 | |
91447636 A |
2716 | socket_lock(so, 1); |
2717 | if (so->so_rcv.sb_flags & SB_KNOTE) | |
55e303ae A |
2718 | if (KNOTE_DETACH(&so->so_rcv.sb_sel.si_note, kn)) |
2719 | so->so_rcv.sb_flags &= ~SB_KNOTE; | |
91447636 | 2720 | socket_unlock(so, 1); |
55e303ae A |
2721 | } |
2722 | ||
2723 | /*ARGSUSED*/ | |
2724 | static int | |
2725 | filt_soread(struct knote *kn, long hint) | |
2726 | { | |
91447636 | 2727 | struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; |
55e303ae | 2728 | |
91447636 A |
2729 | if ((hint & SO_FILT_HINT_LOCKED) == 0) |
2730 | socket_lock(so, 1); | |
2731 | ||
2732 | if (so->so_oobmark) { | |
2733 | if (kn->kn_flags & EV_OOBAND) { | |
2734 | kn->kn_data = so->so_rcv.sb_cc - so->so_oobmark; | |
2735 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2736 | socket_unlock(so, 1); | |
2737 | return (1); | |
2738 | } | |
2739 | kn->kn_data = so->so_oobmark; | |
2740 | kn->kn_flags |= EV_OOBAND; | |
2741 | } else { | |
2742 | kn->kn_data = so->so_rcv.sb_cc; | |
2743 | if (so->so_state & SS_CANTRCVMORE) { | |
2744 | kn->kn_flags |= EV_EOF; | |
2745 | kn->kn_fflags = so->so_error; | |
2746 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2747 | socket_unlock(so, 1); | |
2748 | return (1); | |
2749 | } | |
55e303ae | 2750 | } |
91447636 A |
2751 | |
2752 | if (so->so_state & SS_RCVATMARK) { | |
2753 | if (kn->kn_flags & EV_OOBAND) { | |
2754 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2755 | socket_unlock(so, 1); | |
2756 | return (1); | |
2757 | } | |
2758 | kn->kn_flags |= EV_OOBAND; | |
2759 | } else if (kn->kn_flags & EV_OOBAND) { | |
2760 | kn->kn_data = 0; | |
2761 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2762 | socket_unlock(so, 1); | |
2763 | return (0); | |
2764 | } | |
2765 | ||
2766 | if (so->so_error) { /* temporary udp error */ | |
2767 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2768 | socket_unlock(so, 1); | |
55e303ae | 2769 | return (1); |
91447636 A |
2770 | } |
2771 | ||
2772 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2773 | socket_unlock(so, 1); | |
2774 | ||
2775 | return( kn->kn_flags & EV_OOBAND || | |
2776 | kn->kn_data >= ((kn->kn_sfflags & NOTE_LOWAT) ? | |
2777 | kn->kn_sdata : so->so_rcv.sb_lowat)); | |
55e303ae A |
2778 | } |
2779 | ||
2780 | static void | |
2781 | filt_sowdetach(struct knote *kn) | |
2782 | { | |
91447636 A |
2783 | struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; |
2784 | socket_lock(so, 1); | |
55e303ae | 2785 | |
91447636 | 2786 | if(so->so_snd.sb_flags & SB_KNOTE) |
55e303ae A |
2787 | if (KNOTE_DETACH(&so->so_snd.sb_sel.si_note, kn)) |
2788 | so->so_snd.sb_flags &= ~SB_KNOTE; | |
91447636 | 2789 | socket_unlock(so, 1); |
55e303ae A |
2790 | } |
2791 | ||
2792 | /*ARGSUSED*/ | |
2793 | static int | |
2794 | filt_sowrite(struct knote *kn, long hint) | |
2795 | { | |
91447636 A |
2796 | struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; |
2797 | ||
2798 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2799 | socket_lock(so, 1); | |
55e303ae A |
2800 | |
2801 | kn->kn_data = sbspace(&so->so_snd); | |
2802 | if (so->so_state & SS_CANTSENDMORE) { | |
2803 | kn->kn_flags |= EV_EOF; | |
2804 | kn->kn_fflags = so->so_error; | |
91447636 A |
2805 | if ((hint & SO_FILT_HINT_LOCKED) == 0) |
2806 | socket_unlock(so, 1); | |
55e303ae A |
2807 | return (1); |
2808 | } | |
91447636 A |
2809 | if (so->so_error) { /* temporary udp error */ |
2810 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2811 | socket_unlock(so, 1); | |
55e303ae | 2812 | return (1); |
91447636 | 2813 | } |
55e303ae | 2814 | if (((so->so_state & SS_ISCONNECTED) == 0) && |
91447636 A |
2815 | (so->so_proto->pr_flags & PR_CONNREQUIRED)) { |
2816 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2817 | socket_unlock(so, 1); | |
55e303ae | 2818 | return (0); |
91447636 A |
2819 | } |
2820 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2821 | socket_unlock(so, 1); | |
55e303ae A |
2822 | if (kn->kn_sfflags & NOTE_LOWAT) |
2823 | return (kn->kn_data >= kn->kn_sdata); | |
2824 | return (kn->kn_data >= so->so_snd.sb_lowat); | |
2825 | } | |
2826 | ||
2827 | /*ARGSUSED*/ | |
2828 | static int | |
2829 | filt_solisten(struct knote *kn, long hint) | |
2830 | { | |
91447636 A |
2831 | struct socket *so = (struct socket *)kn->kn_fp->f_fglob->fg_data; |
2832 | int isempty; | |
55e303ae | 2833 | |
91447636 A |
2834 | if ((hint & SO_FILT_HINT_LOCKED) == 0) |
2835 | socket_lock(so, 1); | |
55e303ae | 2836 | kn->kn_data = so->so_qlen; |
91447636 A |
2837 | isempty = ! TAILQ_EMPTY(&so->so_comp); |
2838 | if ((hint & SO_FILT_HINT_LOCKED) == 0) | |
2839 | socket_unlock(so, 1); | |
2840 | return (isempty); | |
55e303ae A |
2841 | } |
2842 | ||
91447636 A |
2843 | |
2844 | int | |
2845 | socket_lock(so, refcount) | |
2846 | struct socket *so; | |
2847 | int refcount; | |
2848 | { | |
2849 | int error = 0, lr, lr_saved; | |
2850 | #ifdef __ppc__ | |
2851 | __asm__ volatile("mflr %0" : "=r" (lr)); | |
2852 | lr_saved = lr; | |
2853 | #endif | |
2854 | ||
2855 | if (so->so_proto->pr_lock) { | |
2856 | error = (*so->so_proto->pr_lock)(so, refcount, lr_saved); | |
2857 | } | |
2858 | else { | |
2859 | #ifdef MORE_LOCKING_DEBUG | |
2860 | lck_mtx_assert(so->so_proto->pr_domain->dom_mtx, LCK_MTX_ASSERT_NOTOWNED); | |
2861 | #endif | |
2862 | lck_mtx_lock(so->so_proto->pr_domain->dom_mtx); | |
2863 | if (refcount) | |
2864 | so->so_usecount++; | |
2865 | so->reserved3 = (void*)lr_saved; /* save caller for refcount going to zero */ | |
2866 | } | |
2867 | ||
2868 | return(error); | |
2869 | ||
2870 | } | |
2871 | ||
2872 | int | |
2873 | socket_unlock(so, refcount) | |
2874 | struct socket *so; | |
2875 | int refcount; | |
2876 | { | |
2877 | int error = 0, lr, lr_saved; | |
2878 | lck_mtx_t * mutex_held; | |
2879 | ||
2880 | #ifdef __ppc__ | |
2881 | __asm__ volatile("mflr %0" : "=r" (lr)); | |
2882 | lr_saved = lr; | |
2883 | #endif | |
2884 | ||
2885 | ||
2886 | ||
2887 | if (so->so_proto == NULL) | |
2888 | panic("socket_unlock null so_proto so=%x\n", so); | |
2889 | ||
2890 | if (so && so->so_proto->pr_unlock) | |
2891 | error = (*so->so_proto->pr_unlock)(so, refcount, lr_saved); | |
2892 | else { | |
2893 | mutex_held = so->so_proto->pr_domain->dom_mtx; | |
2894 | #ifdef MORE_LOCKING_DEBUG | |
2895 | lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); | |
2896 | #endif | |
2897 | if (refcount) { | |
2898 | if (so->so_usecount <= 0) | |
2899 | panic("socket_unlock: bad refcount so=%x value=%d\n", so, so->so_usecount); | |
2900 | so->so_usecount--; | |
2901 | if (so->so_usecount == 0) { | |
2902 | sofreelastref(so, 1); | |
2903 | } | |
2904 | else | |
2905 | so->reserved4 = (void*)lr_saved; /* save caller */ | |
2906 | } | |
2907 | lck_mtx_unlock(mutex_held); | |
2908 | } | |
2909 | ||
2910 | return(error); | |
2911 | } | |
2912 | //### Called with socket locked, will unlock socket | |
2913 | void | |
2914 | sofree(so) | |
2915 | struct socket *so; | |
2916 | { | |
2917 | ||
2918 | int lr, lr_saved; | |
2919 | lck_mtx_t * mutex_held; | |
2920 | #ifdef __ppc__ | |
2921 | __asm__ volatile("mflr %0" : "=r" (lr)); | |
2922 | lr_saved = lr; | |
2923 | #endif | |
2924 | if (so->so_proto->pr_getlock != NULL) | |
2925 | mutex_held = (*so->so_proto->pr_getlock)(so, 0); | |
2926 | else | |
2927 | mutex_held = so->so_proto->pr_domain->dom_mtx; | |
2928 | lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); | |
2929 | ||
91447636 A |
2930 | sofreelastref(so, 0); |
2931 | } | |
2932 | ||
2933 | void | |
2934 | soreference(so) | |
2935 | struct socket *so; | |
2936 | { | |
2937 | socket_lock(so, 1); /* locks & take one reference on socket */ | |
2938 | socket_unlock(so, 0); /* unlock only */ | |
2939 | } | |
2940 | ||
2941 | void | |
2942 | sodereference(so) | |
2943 | struct socket *so; | |
2944 | { | |
2945 | socket_lock(so, 0); | |
2946 | socket_unlock(so, 1); | |
2947 | } |