git.saurik.com Git - apple/xnu.git/blob

1 /*

3 *

4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@

5 *

6 * This file contains Original Code and/or Modifications of Original Code

7 * as defined in and that are subject to the Apple Public Source License

8 * Version 2.0 (the 'License'). You may not use this file except in

9 * compliance with the License. The rights granted to you under the License

10 * may not be used to create, or enable the creation or redistribution of,

11 * unlawful or unlicensed copies of an Apple operating system, or to

12 * circumvent, violate, or enable the circumvention or violation of, any

13 * terms of an Apple operating system software license agreement.

14 *

15 * Please obtain a copy of the License at

16 * http://www.opensource.apple.com/apsl/ and read it before using this file.

17 *

18 * The Original Code and all software distributed under the License are

19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER

20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,

21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,

22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.

23 * Please see the License for the specific language governing rights and

24 * limitations under the License.

25 *

26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@

27 */

28 /*

31 *

32 * Redistribution and use in source and binary forms, with or without

33 * modification, are permitted provided that the following conditions

34 * are met:

35 * 1. Redistributions of source code must retain the above copyright

36 * notice, this list of conditions and the following disclaimer.

37 * 2. Redistributions in binary form must reproduce the above copyright

38 * notice, this list of conditions and the following disclaimer in the

39 * documentation and/or other materials provided with the distribution.

40 * 3. All advertising materials mentioning features or use of this software

41 * must display the following acknowledgement:

42 * This product includes software developed by the University of

43 * California, Berkeley and its contributors.

44 * 4. Neither the name of the University nor the names of its contributors

45 * may be used to endorse or promote products derived from this software

46 * without specific prior written permission.

47 *

48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND

49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE

52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL

53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS

54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)

55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT

56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY

57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF

58 * SUCH DAMAGE.

59 *

60 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94

61 */

62 /*

63 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce

64 * support for mandatory and extensible security protections. This notice

65 * is included in support of clause 2.2 (b) of the Apple Public License,

66 * Version 2.0.

67 */

69 #include <sys/param.h>

70 #include <sys/systm.h>

71 #include <sys/kernel.h>

72 #include <sys/domain.h>

73 #include <sys/fcntl.h>

74 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */

75 #include <sys/file_internal.h>

76 #include <sys/guarded.h>

77 #include <sys/filedesc.h>

78 #include <sys/lock.h>

79 #include <sys/mbuf.h>

80 #include <sys/namei.h>

81 #include <sys/proc_internal.h>

82 #include <sys/kauth.h>

83 #include <sys/protosw.h>

84 #include <sys/socket.h>

85 #include <sys/socketvar.h>

86 #include <sys/stat.h>

87 #include <sys/sysctl.h>

88 #include <sys/un.h>

89 #include <sys/unpcb.h>

90 #include <sys/vnode_internal.h>

91 #include <sys/kdebug.h>

92 #include <sys/mcache.h>

94 #include <kern/zalloc.h>

95 #include <kern/locks.h>

96 #include <kern/task.h>

98 #if CONFIG_MACF

99 #include <security/mac_framework.h>

100 #endif /* CONFIG_MACF */

101

102 #include <mach/vm_param.h>

103

104 /*

105 * Maximum number of FDs that can be passed in an mbuf

106 */

107 #define UIPC_MAX_CMSG_FD 512

108

 ZONE_DECLARE(unp_zone, "unpzone", sizeof(struct unpcb), ZC_NONE);

110 static unp_gen_t unp_gencnt;

111 static u_int unp_count;

112

 static  LCK_ATTR_DECLARE(unp_mtx_attr, 0, 0);

 static  LCK_GRP_DECLARE(unp_mtx_grp, "unp_list");

 static  LCK_RW_DECLARE_ATTR(unp_list_mtx, &unp_mtx_grp, &unp_mtx_attr);

116

 static  LCK_MTX_DECLARE_ATTR(unp_disconnect_lock, &unp_mtx_grp, &unp_mtx_attr);

 static  LCK_MTX_DECLARE_ATTR(unp_connect_lock, &unp_mtx_grp, &unp_mtx_attr);

 static  LCK_MTX_DECLARE_ATTR(uipc_lock, &unp_mtx_grp, &unp_mtx_attr);

120

121 static u_int disconnect_in_progress;

122

123 static struct unp_head unp_shead, unp_dhead;

124 static int unp_defer, unp_gcing, unp_gcwait;

125 static thread_t unp_gcthread = NULL;

 static LIST_HEAD(, fileglob) unp_msghead = LIST_HEAD_INITIALIZER(unp_msghead);

127

128

129 /*

130 * mDNSResponder tracing. When enabled, endpoints connected to

131 * /var/run/mDNSResponder will be traced; during each send on

132 * the traced socket, we log the PID and process name of the

133 * sending process. We also print out a bit of info related

134 * to the data itself; this assumes ipc_msg_hdr in dnssd_ipc.h

135 * of mDNSResponder stays the same.

136 */

137 #define MDNSRESPONDER_PATH "/var/run/mDNSResponder"

138

139 static int unpst_tracemdns; /* enable tracing */

140

141 #define MDNS_IPC_MSG_HDR_VERSION_1 1

142

143 struct mdns_ipc_msg_hdr {

144 uint32_t version;

145 uint32_t datalen;

146 uint32_t ipc_flags;

147 uint32_t op;

148 union {

149 void *context;

150 uint32_t u32[2];

151 } __attribute__((packed));

152 uint32_t reg_index;

153 } __attribute__((packed));

154

155 /*

156 * Unix communications domain.

157 *

158 * TODO:

159 * SEQPACKET, RDM

160 * rethink name space problems

161 * need a proper out-of-band

162 * lock pushdown

163 */

 static struct   sockaddr sun_noname = { .sa_len = sizeof(sun_noname), .sa_family = AF_LOCAL, .sa_data = { 0 } };

165 static ino_t unp_ino; /* prototype for fake inode numbers */

166

 static int      unp_attach(struct socket *);

 static void     unp_detach(struct unpcb *);

 static int      unp_bind(struct unpcb *, struct sockaddr *, proc_t);

 static int      unp_connect(struct socket *, struct sockaddr *, proc_t);

 static void     unp_disconnect(struct unpcb *);

 static void     unp_shutdown(struct unpcb *);

 static void     unp_drop(struct unpcb *, int);

 __private_extern__ void unp_gc(void);

 static void     unp_scan(struct mbuf *, void (*)(struct fileglob *, void *arg), void *arg);

 static void     unp_mark(struct fileglob *, __unused void *);

 static void     unp_discard(struct fileglob *, void *);

 static int      unp_internalize(struct mbuf *, proc_t);

 static int      unp_listen(struct unpcb *, proc_t);

 static void     unpcb_to_compat(struct unpcb *, struct unpcb_compat *);

 static void     unp_get_locks_in_order(struct socket *so, struct socket *conn_so);

182

183 static void

 unp_get_locks_in_order(struct socket *so, struct socket *conn_so)

185 {

186 if (so < conn_so) {

                 socket_lock(conn_so, 1);

188 } else {

                 struct unpcb *unp = sotounpcb(so);

190 unp->unp_flags |= UNP_DONTDISCONNECT;

191 unp->rw_thrcount++;

                 socket_unlock(so, 0);

193

194 /* Get the locks in the correct order */

                 socket_lock(conn_so, 1);

                 socket_lock(so, 0);

197 unp->rw_thrcount--;

                 if (unp->rw_thrcount == 0) {

199 unp->unp_flags &= ~UNP_DONTDISCONNECT;

200 wakeup(unp);

201 }

202 }

203 }

204

205 static int

 uipc_abort(struct socket *so)

207 {

         struct unpcb *unp = sotounpcb(so);

209

         if (unp == 0) {

211 return EINVAL;

212 }

213 unp_drop(unp, ECONNABORTED);

214 unp_detach(unp);

215 sofree(so);

216 return 0;

217 }

218

219 static int

 uipc_accept(struct socket *so, struct sockaddr **nam)

221 {

         struct unpcb *unp = sotounpcb(so);

223

         if (unp == 0) {

225 return EINVAL;

226 }

227

228 /*

229 * Pass back name of connected socket,

230 * if it was bound and we are still connected

231 * (our peer may have closed already!).

232 */

         if (unp->unp_conn && unp->unp_conn->unp_addr) {

                 *nam = dup_sockaddr((struct sockaddr *)

                     unp->unp_conn->unp_addr, 1);

236 } else {

                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);

238 }

239 return 0;

240 }

241

242 /*

243 * Returns: 0 Success

244 * EISCONN

245 * unp_attach:

246 */

247 static int

 uipc_attach(struct socket *so, __unused int proto, __unused proc_t p)

249 {

         struct unpcb *unp = sotounpcb(so);

251

         if (unp != 0) {

253 return EISCONN;

254 }

255 return unp_attach(so);

256 }

257

258 static int

 uipc_bind(struct socket *so, struct sockaddr *nam, proc_t p)

260 {

         struct unpcb *unp = sotounpcb(so);

262

         if (unp == 0) {

264 return EINVAL;

265 }

266

         return unp_bind(unp, nam, p);

268 }

269

270 /*

271 * Returns: 0 Success

272 * EINVAL

273 * unp_connect:??? [See elsewhere in this file]

274 */

275 static int

 uipc_connect(struct socket *so, struct sockaddr *nam, proc_t p)

277 {

         struct unpcb *unp = sotounpcb(so);

279

         if (unp == 0) {

281 return EINVAL;

282 }

         return unp_connect(so, nam, p);

284 }

285

286 /*

287 * Returns: 0 Success

288 * EINVAL

289 * unp_connect2:EPROTOTYPE Protocol wrong type for socket

290 * unp_connect2:EINVAL Invalid argument

291 */

292 static int

 uipc_connect2(struct socket *so1, struct socket *so2)

294 {

         struct unpcb *unp = sotounpcb(so1);

296

         if (unp == 0) {

298 return EINVAL;

299 }

300

         return unp_connect2(so1, so2);

302 }

303

304 /* control is EOPNOTSUPP */

305

306 static int

 uipc_detach(struct socket *so)

308 {

         struct unpcb *unp = sotounpcb(so);

310

         if (unp == 0) {

312 return EINVAL;

313 }

314

         LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);

316 unp_detach(unp);

317 return 0;

318 }

319

320 static int

 uipc_disconnect(struct socket *so)

322 {

         struct unpcb *unp = sotounpcb(so);

324

         if (unp == 0) {

326 return EINVAL;

327 }

328 unp_disconnect(unp);

329 return 0;

330 }

331

332 /*

333 * Returns: 0 Success

334 * EINVAL

335 */

336 static int

 uipc_listen(struct socket *so, __unused proc_t p)

338 {

         struct unpcb *unp = sotounpcb(so);

340

         if (unp == 0 || unp->unp_vnode == 0) {

342 return EINVAL;

343 }

         return unp_listen(unp, p);

345 }

346

347 static int

 uipc_peeraddr(struct socket *so, struct sockaddr **nam)

349 {

         struct unpcb *unp = sotounpcb(so);

351

352 if (unp == NULL) {

353 return EINVAL;

354 }

         if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) {

                 *nam = dup_sockaddr((struct sockaddr *)

                     unp->unp_conn->unp_addr, 1);

358 } else {

                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);

360 }

361 return 0;

362 }

363

364 static int

 uipc_rcvd(struct socket *so, __unused int flags)

366 {

         struct unpcb *unp = sotounpcb(so);

368 struct socket *so2;

369

         if (unp == 0) {

371 return EINVAL;

372 }

373 switch (so->so_type) {

374 case SOCK_DGRAM:

375 panic("uipc_rcvd DGRAM?");

376 /*NOTREACHED*/

377

378 case SOCK_STREAM:

379 #define rcv (&so->so_rcv)

380 #define snd (&so2->so_snd)

                 if (unp->unp_conn == 0) {

382 break;

383 }

384

385 so2 = unp->unp_conn->unp_socket;

386 unp_get_locks_in_order(so, so2);

387 /*

388 * Adjust backpressure on sender

389 * and wakeup any waiting to write.

390 */

                 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;

392 unp->unp_mbcnt = rcv->sb_mbcnt;

                 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;

394 unp->unp_cc = rcv->sb_cc;

                 if (sb_notify(&so2->so_snd)) {

                         sowakeup(so2, &so2->so_snd, so);

397 }

398

                 socket_unlock(so2, 1);

400

401 #undef snd

402 #undef rcv

403 break;

404

405 default:

406 panic("uipc_rcvd unknown socktype");

407 }

408 return 0;

409 }

410

411 /* pru_rcvoob is EOPNOTSUPP */

412

413 /*

414 * Returns: 0 Success

415 * EINVAL

416 * EOPNOTSUPP

417 * EPIPE

418 * ENOTCONN

419 * EISCONN

420 * unp_internalize:EINVAL

421 * unp_internalize:EBADF

422 * unp_connect:EAFNOSUPPORT Address family not supported

423 * unp_connect:EINVAL Invalid argument

424 * unp_connect:ENOTSOCK Not a socket

425 * unp_connect:ECONNREFUSED Connection refused

426 * unp_connect:EISCONN Socket is connected

427 * unp_connect:EPROTOTYPE Protocol wrong type for socket

428 * unp_connect:???

429 * sbappendaddr:ENOBUFS [5th argument, contents modified]

430 * sbappendaddr:??? [whatever a filter author chooses]

431 */

432 static int

 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,

434 struct mbuf *control, proc_t p)

435 {

436 int error = 0;

         struct unpcb *unp = sotounpcb(so);

438 struct socket *so2;

439

         if (unp == 0) {

441 error = EINVAL;

442 goto release;

443 }

444 if (flags & PRUS_OOB) {

445 error = EOPNOTSUPP;

446 goto release;

447 }

448

449 if (control) {

450 /* release lock to avoid deadlock (4436174) */

                 socket_unlock(so, 0);

                 error = unp_internalize(control, p);

                 socket_lock(so, 0);

454 if (error) {

455 goto release;

456 }

457 }

458

459 switch (so->so_type) {

460 case SOCK_DGRAM:

461 {

462 struct sockaddr *from;

463

464 if (nam) {

465 if (unp->unp_conn) {

466 error = EISCONN;

467 break;

468 }

                         error = unp_connect(so, nam, p);

470 if (error) {

471 so->so_state &= ~SS_ISCONNECTING;

472 break;

473 }

474 } else {

                         if (unp->unp_conn == 0) {

476 error = ENOTCONN;

477 break;

478 }

479 }

480

481 so2 = unp->unp_conn->unp_socket;

482 if (so != so2) {

483 unp_get_locks_in_order(so, so2);

484 }

485

486 if (unp->unp_addr) {

                         from = (struct sockaddr *)unp->unp_addr;

488 } else {

489 from = &sun_noname;

490 }

491 /*

492 * sbappendaddr() will fail when the receiver runs out of

493 * space; in contrast to SOCK_STREAM, we will lose messages

494 * for the SOCK_DGRAM case when the receiver's queue overflows.

495 * SB_UNIX on the socket buffer implies that the callee will

496 * not free the control message, if any, because we would need

497 * to call unp_dispose() on it.

498 */

                 if (sbappendaddr(&so2->so_rcv, from, m, control, &error)) {

500 control = NULL;

                         if (sb_notify(&so2->so_rcv)) {

                                 sowakeup(so2, &so2->so_rcv, so);

503 }

                 } else if (control != NULL && error == 0) {

505 /* A socket filter took control; don't touch it */

506 control = NULL;

507 }

508

509 if (so != so2) {

                         socket_unlock(so2, 1);

511 }

512

513 m = NULL;

514 if (nam) {

515 unp_disconnect(unp);

516 }

517 break;

518 }

519

520 case SOCK_STREAM: {

521 int didreceive = 0;

522 #define rcv (&so2->so_rcv)

523 #define snd (&so->so_snd)

524 /* Connect if not connected yet. */

525 /*

526 * Note: A better implementation would complain

527 * if not equal to the peer's address.

528 */

                 if ((so->so_state & SS_ISCONNECTED) == 0) {

530 if (nam) {

                                 error = unp_connect(so, nam, p);

532 if (error) {

533 so->so_state &= ~SS_ISCONNECTING;

534 break; /* XXX */

535 }

536 } else {

537 error = ENOTCONN;

538 break;

539 }

540 }

541

                 if (so->so_state & SS_CANTSENDMORE) {

543 error = EPIPE;

544 break;

545 }

                 if (unp->unp_conn == 0) {

547 panic("uipc_send connected but no connection?");

548 }

549

550 so2 = unp->unp_conn->unp_socket;

551 unp_get_locks_in_order(so, so2);

552

553 /* Check socket state again as we might have unlocked the socket

554 * while trying to get the locks in order

555 */

556

                 if ((so->so_state & SS_CANTSENDMORE)) {

558 error = EPIPE;

                         socket_unlock(so2, 1);

560 break;

561 }

562

                 if (unp->unp_flags & UNP_TRACE_MDNS) {

564 struct mdns_ipc_msg_hdr hdr;

565

                         if (mbuf_copydata(m, 0, sizeof(hdr), &hdr) == 0 &&

                             hdr.version == ntohl(MDNS_IPC_MSG_HDR_VERSION_1)) {

                                 printf("%s[mDNSResponder] pid=%d (%s): op=0x%x\n",

                                     __func__, p->p_pid, p->p_comm, ntohl(hdr.op));

570 }

571 }

572

573 /*

574 * Send to paired receive port, and then reduce send buffer

575 * hiwater marks to maintain backpressure. Wake up readers.

576 * SB_UNIX flag will allow new record to be appended to the

577 * receiver's queue even when it is already full. It is

578 * possible, however, that append might fail. In that case,

579 * we will need to call unp_dispose() on the control message;

580 * the callee will not free it since SB_UNIX is set.

581 */

582 didreceive = control ?

                     sbappendcontrol(rcv, m, control, &error) : sbappend(rcv, m);

584

                 snd->sb_mbmax -= rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;

                 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;

                 if ((int32_t)snd->sb_hiwat >=

                     (int32_t)(rcv->sb_cc - unp->unp_conn->unp_cc)) {

                         snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;

590 } else {

591 snd->sb_hiwat = 0;

592 }

                 unp->unp_conn->unp_cc = rcv->sb_cc;

594 if (didreceive) {

595 control = NULL;

                         if (sb_notify(&so2->so_rcv)) {

                                 sowakeup(so2, &so2->so_rcv, so);

598 }

                 } else if (control != NULL && error == 0) {

600 /* A socket filter took control; don't touch it */

601 control = NULL;

602 }

603

                 socket_unlock(so2, 1);

605 m = NULL;

606 #undef snd

607 #undef rcv

608 }

609 break;

610

611 default:

612 panic("uipc_send unknown socktype");

613 }

614

615 /*

616 * SEND_EOF is equivalent to a SEND followed by

617 * a SHUTDOWN.

618 */

619 if (flags & PRUS_EOF) {

620 socantsendmore(so);

621 unp_shutdown(unp);

622 }

623

         if (control && error != 0) {

                 socket_unlock(so, 0);

626 unp_dispose(control);

                 socket_lock(so, 0);

628 }

629

630 release:

631 if (control) {

632 m_freem(control);

633 }

634 if (m) {

635 m_freem(m);

636 }

637 return error;

638 }

639

640 static int

 uipc_sense(struct socket *so, void *ub, int isstat64)

642 {

         struct unpcb *unp = sotounpcb(so);

644 struct socket *so2;

645 blksize_t blksize;

646

         if (unp == 0) {

648 return EINVAL;

649 }

650

651 blksize = so->so_snd.sb_hiwat;

         if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {

653 so2 = unp->unp_conn->unp_socket;

654 blksize += so2->so_rcv.sb_cc;

655 }

         if (unp->unp_ino == 0) {

657 unp->unp_ino = unp_ino++;

658 }

659

         if (isstat64 != 0) {

661 struct stat64 *sb64;

662

663 sb64 = (struct stat64 *)ub;

664 sb64->st_blksize = blksize;

665 sb64->st_dev = NODEV;

                 sb64->st_ino = (ino64_t)unp->unp_ino;

667 } else {

668 struct stat *sb;

669

670 sb = (struct stat *)ub;

671 sb->st_blksize = blksize;

672 sb->st_dev = NODEV;

                 sb->st_ino = (ino_t)(uintptr_t)unp->unp_ino;

674 }

675

676 return 0;

677 }

678

679 /*

680 * Returns: 0 Success

681 * EINVAL

682 *

683 * Notes: This is not strictly correct, as unp_shutdown() also calls

684 * socantrcvmore(). These should maybe both be conditionalized

685 * on the 'how' argument in soshutdown() as called from the

686 * shutdown() system call.

687 */

688 static int

 uipc_shutdown(struct socket *so)

690 {

         struct unpcb *unp = sotounpcb(so);

692

         if (unp == 0) {

694 return EINVAL;

695 }

696 socantsendmore(so);

697 unp_shutdown(unp);

698 return 0;

699 }

700

701 /*

702 * Returns: 0 Success

703 * EINVAL Invalid argument

704 */

705 static int

 uipc_sockaddr(struct socket *so, struct sockaddr **nam)

707 {

         struct unpcb *unp = sotounpcb(so);

709

710 if (unp == NULL) {

711 return EINVAL;

712 }

         if (unp->unp_addr != NULL) {

                 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);

715 } else {

                 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);

717 }

718 return 0;

719 }

720

721 struct pr_usrreqs uipc_usrreqs = {

722 .pru_abort = uipc_abort,

723 .pru_accept = uipc_accept,

724 .pru_attach = uipc_attach,

725 .pru_bind = uipc_bind,

726 .pru_connect = uipc_connect,

727 .pru_connect2 = uipc_connect2,

728 .pru_detach = uipc_detach,

729 .pru_disconnect = uipc_disconnect,

730 .pru_listen = uipc_listen,

731 .pru_peeraddr = uipc_peeraddr,

732 .pru_rcvd = uipc_rcvd,

733 .pru_send = uipc_send,

734 .pru_sense = uipc_sense,

735 .pru_shutdown = uipc_shutdown,

736 .pru_sockaddr = uipc_sockaddr,

737 .pru_sosend = sosend,

738 .pru_soreceive = soreceive,

739 };

740

741 int

 uipc_ctloutput(struct socket *so, struct sockopt *sopt)

743 {

         struct unpcb *unp = sotounpcb(so);

745 int error = 0;

746 pid_t peerpid;

747 proc_t p;

748 task_t t;

749 struct socket *peerso;

750

751 switch (sopt->sopt_dir) {

752 case SOPT_GET:

753 switch (sopt->sopt_name) {

754 case LOCAL_PEERCRED:

                         if (unp->unp_flags & UNP_HAVEPC) {

                                 error = sooptcopyout(sopt, &unp->unp_peercred,

757 sizeof(unp->unp_peercred));

758 } else {

                                 if (so->so_type == SOCK_STREAM) {

760 error = ENOTCONN;

761 } else {

762 error = EINVAL;

763 }

764 }

765 break;

766 case LOCAL_PEERPID:

767 case LOCAL_PEEREPID:

                         if (unp->unp_conn == NULL) {

769 error = ENOTCONN;

770 break;

771 }

772 peerso = unp->unp_conn->unp_socket;

773 if (peerso == NULL) {

774 panic("peer is connected but has no socket?");

775 }

776 unp_get_locks_in_order(so, peerso);

                         if (sopt->sopt_name == LOCAL_PEEREPID &&

778 peerso->so_flags & SOF_DELEGATED) {

779 peerpid = peerso->e_pid;

780 } else {

781 peerpid = peerso->last_pid;

782 }

                         socket_unlock(peerso, 1);

                         error = sooptcopyout(sopt, &peerpid, sizeof(peerpid));

785 break;

786 case LOCAL_PEERUUID:

787 case LOCAL_PEEREUUID:

                         if (unp->unp_conn == NULL) {

789 error = ENOTCONN;

790 break;

791 }

792 peerso = unp->unp_conn->unp_socket;

793 if (peerso == NULL) {

794 panic("peer is connected but has no socket?");

795 }

796 unp_get_locks_in_order(so, peerso);

                         if (sopt->sopt_name == LOCAL_PEEREUUID &&

798 peerso->so_flags & SOF_DELEGATED) {

                                 error = sooptcopyout(sopt, &peerso->e_uuid,

800 sizeof(peerso->e_uuid));

801 } else {

                                 error = sooptcopyout(sopt, &peerso->last_uuid,

803 sizeof(peerso->last_uuid));

804 }

                         socket_unlock(peerso, 1);

806 break;

807 case LOCAL_PEERTOKEN:

                         if (unp->unp_conn == NULL) {

809 error = ENOTCONN;

810 break;

811 }

812 peerso = unp->unp_conn->unp_socket;

813 if (peerso == NULL) {

814 panic("peer is connected but has no socket?");

815 }

816 unp_get_locks_in_order(so, peerso);

817 peerpid = peerso->last_pid;

818 p = proc_find(peerpid);

819 if (p != PROC_NULL) {

820 t = proc_task(p);

821 if (t != TASK_NULL) {

822 audit_token_t peertoken;

823 mach_msg_type_number_t count = TASK_AUDIT_TOKEN_COUNT;

                                         if (task_info(t, TASK_AUDIT_TOKEN, (task_info_t)&peertoken, &count) == KERN_SUCCESS) {

                                                 error = sooptcopyout(sopt, &peertoken, sizeof(peertoken));

826 } else {

827 error = EINVAL;

828 }

829 } else {

830 error = EINVAL;

831 }

832 proc_rele(p);

833 } else {

834 error = EINVAL;

835 }

                         socket_unlock(peerso, 1);

837 break;

838 default:

839 error = EOPNOTSUPP;

840 break;

841 }

842 break;

843 case SOPT_SET:

844 default:

845 error = EOPNOTSUPP;

846 break;

847 }

848

849 return error;

850 }

851

852 /*

853 * Both send and receive buffers are allocated PIPSIZ bytes of buffering

854 * for stream sockets, although the total for sender and receiver is

855 * actually only PIPSIZ.

856 * Datagram sockets really use the sendspace as the maximum datagram size,

857 * and don't really want to reserve the sendspace. Their recvspace should

858 * be large enough for at least one max-size datagram plus address.

859 */

860 #ifndef PIPSIZ

861 #define PIPSIZ 8192

862 #endif

863 static u_int32_t unpst_sendspace = PIPSIZ;

864 static u_int32_t unpst_recvspace = PIPSIZ;

 static u_int32_t        unpdg_sendspace = 2 * 1024;       /* really max datagram size */

 static u_int32_t        unpdg_recvspace = 4 * 1024;

867

868 static int unp_rights; /* file descriptors in flight */

869 static int unp_disposed; /* discarded file descriptors */

870

871 SYSCTL_DECL(_net_local_stream);

 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW | CTLFLAG_LOCKED,

     &unpst_sendspace, 0, "");

 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,

     &unpst_recvspace, 0, "");

 SYSCTL_INT(_net_local_stream, OID_AUTO, tracemdns, CTLFLAG_RW | CTLFLAG_LOCKED,

     &unpst_tracemdns, 0, "");

878 SYSCTL_DECL(_net_local_dgram);

 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW | CTLFLAG_LOCKED,

     &unpdg_sendspace, 0, "");

 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW | CTLFLAG_LOCKED,

     &unpdg_recvspace, 0, "");

883 SYSCTL_DECL(_net_local);

 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD | CTLFLAG_LOCKED, &unp_rights, 0, "");

885

886 /*

887 * Returns: 0 Success

888 * ENOBUFS

889 * soreserve:ENOBUFS

890 */

891 static int

 unp_attach(struct socket *so)

893 {

894 struct unpcb *unp;

895 int error = 0;

896

         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {

898 switch (so->so_type) {

899 case SOCK_STREAM:

                         error = soreserve(so, unpst_sendspace, unpst_recvspace);

901 break;

902

903 case SOCK_DGRAM:

                         error = soreserve(so, unpdg_sendspace, unpdg_recvspace);

905 break;

906

907 default:

908 panic("unp_attach");

909 }

910 if (error) {

911 return error;

912 }

913 }

         unp = (struct unpcb *)zalloc(unp_zone);

915 if (unp == NULL) {

916 return ENOBUFS;

917 }

         bzero(unp, sizeof(*unp));

919

         lck_mtx_init(&unp->unp_mtx, &unp_mtx_grp, &unp_mtx_attr);

921

922 lck_rw_lock_exclusive(&unp_list_mtx);

923 LIST_INIT(&unp->unp_refs);

924 unp->unp_socket = so;

925 unp->unp_gencnt = ++unp_gencnt;

926 unp_count++;

         LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ?

             &unp_dhead : &unp_shead, unp, unp_link);

929 lck_rw_done(&unp_list_mtx);

930 so->so_pcb = (caddr_t)unp;

931 /*

932 * Mark AF_UNIX socket buffers accordingly so that:

933 *

934 * a. In the SOCK_STREAM case, socket buffer append won't fail due to

935 * the lack of space; this essentially loosens the sbspace() check,

936 * since there is disconnect between sosend() and uipc_send() with

937 * respect to flow control that might result in our dropping the

938 * data in uipc_send(). By setting this, we allow for slightly

939 * more records to be appended to the receiving socket to avoid

940 * losing data (which we can't afford in the SOCK_STREAM case).

941 * Flow control still takes place since we adjust the sender's

942 * hiwat during each send. This doesn't affect the SOCK_DGRAM

943 * case and append would still fail when the queue overflows.

944 *

945 * b. In the presence of control messages containing internalized

946 * file descriptors, the append routines will not free them since

947 * we'd need to undo the work first via unp_dispose().

948 */

949 so->so_rcv.sb_flags |= SB_UNIX;

950 so->so_snd.sb_flags |= SB_UNIX;

951 return 0;

952 }

953

954 static void

 unp_detach(struct unpcb *unp)

956 {

957 int so_locked = 1;

958

959 lck_rw_lock_exclusive(&unp_list_mtx);

960 LIST_REMOVE(unp, unp_link);

961 --unp_count;

962 ++unp_gencnt;

963 lck_rw_done(&unp_list_mtx);

964 if (unp->unp_vnode) {

965 struct vnode *tvp = NULL;

                 socket_unlock(unp->unp_socket, 0);

967

968 /* Holding unp_connect_lock will avoid a race between

969 * a thread closing the listening socket and a thread

970 * connecting to it.

971 */

972 lck_mtx_lock(&unp_connect_lock);

                 socket_lock(unp->unp_socket, 0);

974 if (unp->unp_vnode) {

975 tvp = unp->unp_vnode;

976 unp->unp_vnode->v_socket = NULL;

977 unp->unp_vnode = NULL;

978 }

979 lck_mtx_unlock(&unp_connect_lock);

980 if (tvp != NULL) {

981 vnode_rele(tvp); /* drop the usecount */

982 }

983 }

984 if (unp->unp_conn) {

985 unp_disconnect(unp);

986 }

         while (unp->unp_refs.lh_first) {

988 struct unpcb *unp2 = NULL;

989

990 /* This datagram socket is connected to one or more

991 * sockets. In order to avoid a race condition between removing

992 * this reference and closing the connected socket, we need

993 * to check disconnect_in_progress

994 */

                 if (so_locked == 1) {

                         socket_unlock(unp->unp_socket, 0);

997 so_locked = 0;

998 }

999 lck_mtx_lock(&unp_disconnect_lock);

                 while (disconnect_in_progress != 0) {

                         (void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,

1002 PSOCK, "disconnect", NULL);

1003 }

1004 disconnect_in_progress = 1;

1005 lck_mtx_unlock(&unp_disconnect_lock);

1006

1007 /* Now we are sure that any unpcb socket disconnect is not happening */

                 if (unp->unp_refs.lh_first != NULL) {

1009 unp2 = unp->unp_refs.lh_first;

                         socket_lock(unp2->unp_socket, 1);

1011 }

1012

1013 lck_mtx_lock(&unp_disconnect_lock);

1014 disconnect_in_progress = 0;

1015 wakeup(&disconnect_in_progress);

1016 lck_mtx_unlock(&unp_disconnect_lock);

1017

1018 if (unp2 != NULL) {

1019 /* We already locked this socket and have a reference on it */

1020 unp_drop(unp2, ECONNRESET);

                         socket_unlock(unp2->unp_socket, 1);

1022 }

1023 }

1024

         if (so_locked == 0) {

                 socket_lock(unp->unp_socket, 0);

1027 so_locked = 1;

1028 }

1029 soisdisconnected(unp->unp_socket);

1030 /* makes sure we're getting dealloced */

1031 unp->unp_socket->so_flags |= SOF_PCBCLEARING;

1032 }

1033

1034 /*

1035 * Returns: 0 Success

1036 * EAFNOSUPPORT

1037 * EINVAL

1038 * EADDRINUSE

1039 * namei:??? [anything namei can return]

1040 * vnode_authorize:??? [anything vnode_authorize can return]

1041 *

1042 * Notes: p at this point is the current process, as this function is

1043 * only called by sobind().

1044 */

1045 static int

1046 unp_bind(

1047 struct unpcb *unp,

1048 struct sockaddr *nam,

1049 proc_t p)

1050 {

         struct sockaddr_un *soun = (struct sockaddr_un *)nam;

1052 struct vnode *vp, *dvp;

1053 struct vnode_attr va;

1054 vfs_context_t ctx = vfs_context_current();

1055 int error, namelen;

1056 struct nameidata nd;

         struct socket *so = unp->unp_socket;

1058 char buf[SOCK_MAXADDRLEN];

1059

         if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {

1061 return EAFNOSUPPORT;

1062 }

1063

1064 /*

1065 * Check if the socket is already bound to an address

1066 */

         if (unp->unp_vnode != NULL) {

1068 return EINVAL;

1069 }

1070 /*

1071 * Check if the socket may have been shut down

1072 */

         if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) ==

1074 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {

1075 return EINVAL;

1076 }

1077

         namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);

         if (namelen <= 0) {

1080 return EINVAL;

1081 }

1082 /*

1083 * Note: sun_path is not a zero terminated "C" string

1084 */

1085 if (namelen >= SOCK_MAXADDRLEN) {

1086 return EINVAL;

1087 }

         bcopy(soun->sun_path, buf, namelen);

1089 buf[namelen] = 0;

1090

         socket_unlock(so, 0);

1092

         NDINIT(&nd, CREATE, OP_MKFIFO, FOLLOW | LOCKPARENT, UIO_SYSSPACE,

1094 CAST_USER_ADDR_T(buf), ctx);

1095 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */

1096 error = namei(&nd);

1097 if (error) {

                 socket_lock(so, 0);

1099 return error;

1100 }

1101 dvp = nd.ni_dvp;

1102 vp = nd.ni_vp;

1103

1104 if (vp != NULL) {

1105 /*

1106 * need to do this before the vnode_put of dvp

1107 * since we may have to release an fs_nodelock

1108 */

1109 nameidone(&nd);

1110

1111 vnode_put(dvp);

1112 vnode_put(vp);

1113

                 socket_lock(so, 0);

1115 return EADDRINUSE;

1116 }

1117

1118 VATTR_INIT(&va);

         VATTR_SET(&va, va_type, VSOCK);

         VATTR_SET(&va, va_mode, (ACCESSPERMS & ~p->p_fd->fd_cmask));

1121

1122 #if CONFIG_MACF

1123 error = mac_vnode_check_create(ctx,

             nd.ni_dvp, &nd.ni_cnd, &va);

1125

         if (error == 0)

1127 #endif /* CONFIG_MACF */

1128 #if CONFIG_MACF_SOCKET_SUBSET

1129 error = mac_vnode_check_uipc_bind(ctx,

             nd.ni_dvp, &nd.ni_cnd, &va);

1131

         if (error == 0)

1133 #endif /* MAC_SOCKET_SUBSET */

1134 /* authorize before creating */

         error = vnode_authorize(dvp, NULL, KAUTH_VNODE_ADD_FILE, ctx);

1136

1137 if (!error) {

1138 /* create the socket */

                 error = vn_create(dvp, &vp, &nd, &va, 0, 0, NULL, ctx);

1140 }

1141

1142 nameidone(&nd);

1143 vnode_put(dvp);

1144

1145 if (error) {

                 socket_lock(so, 0);

1147 return error;

1148 }

1149

         socket_lock(so, 0);

1151

         if (unp->unp_vnode != NULL) {

1153 vnode_put(vp); /* drop the iocount */

1154 return EINVAL;

1155 }

1156

         error = vnode_ref(vp);  /* gain a longterm reference */

1158 if (error) {

1159 vnode_put(vp); /* drop the iocount */

1160 return error;

1161 }

1162

1163 vp->v_socket = unp->unp_socket;

1164 unp->unp_vnode = vp;

         unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);

1166 vnode_put(vp); /* drop the iocount */

1167

1168 return 0;

1169 }

1170

1171

1172 /*

1173 * Returns: 0 Success

1174 * EAFNOSUPPORT Address family not supported

1175 * EINVAL Invalid argument

1176 * ENOTSOCK Not a socket

1177 * ECONNREFUSED Connection refused

1178 * EPROTOTYPE Protocol wrong type for socket

1179 * EISCONN Socket is connected

1180 * unp_connect2:EPROTOTYPE Protocol wrong type for socket

1181 * unp_connect2:EINVAL Invalid argument

1182 * namei:??? [anything namei can return]

1183 * vnode_authorize:???? [anything vnode_authorize can return]

1184 *

1185 * Notes: p at this point is the current process, as this function is

1186 * only called by sosend(), sendfile(), and soconnectlock().

1187 */

1188 static int

 unp_connect(struct socket *so, struct sockaddr *nam, __unused proc_t p)

1190 {

         struct sockaddr_un *soun = (struct sockaddr_un *)nam;

1192 struct vnode *vp;

         struct socket *so2, *so3, *list_so = NULL;

         struct unpcb *unp, *unp2, *unp3;

1195 vfs_context_t ctx = vfs_context_current();

1196 int error, len;

1197 struct nameidata nd;

1198 char buf[SOCK_MAXADDRLEN];

1199

         if (nam->sa_family != 0 && nam->sa_family != AF_UNIX) {

1201 return EAFNOSUPPORT;

1202 }

1203

1204 unp = sotounpcb(so);

1205 so2 = so3 = NULL;

1206

         len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);

         if (len <= 0) {

1209 return EINVAL;

1210 }

1211 /*

1212 * Note: sun_path is not a zero terminated "C" string

1213 */

1214 if (len >= SOCK_MAXADDRLEN) {

1215 return EINVAL;

1216 }

1217

1218 soisconnecting(so);

1219

         bcopy(soun->sun_path, buf, len);

1221 buf[len] = 0;

1222

         socket_unlock(so, 0);

1224

         NDINIT(&nd, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE,

1226 CAST_USER_ADDR_T(buf), ctx);

1227 error = namei(&nd);

1228 if (error) {

                 socket_lock(so, 0);

1230 return error;

1231 }

1232 nameidone(&nd);

1233 vp = nd.ni_vp;

         if (vp->v_type != VSOCK) {

1235 error = ENOTSOCK;

                 socket_lock(so, 0);

1237 goto out;

1238 }

1239

1240 #if CONFIG_MACF_SOCKET_SUBSET

         error = mac_vnode_check_uipc_connect(ctx, vp, so);

1242 if (error) {

                 socket_lock(so, 0);

1244 goto out;

1245 }

1246 #endif /* MAC_SOCKET_SUBSET */

1247

         error = vnode_authorize(vp, NULL, KAUTH_VNODE_WRITE_DATA, ctx);

1249 if (error) {

                 socket_lock(so, 0);

1251 goto out;

1252 }

1253

1254 lck_mtx_lock(&unp_connect_lock);

1255

         if (vp->v_socket == 0) {

1257 lck_mtx_unlock(&unp_connect_lock);

1258 error = ECONNREFUSED;

                 socket_lock(so, 0);

1260 goto out;

1261 }

1262

         socket_lock(vp->v_socket, 1); /* Get a reference on the listening socket */

1264 so2 = vp->v_socket;

1265 lck_mtx_unlock(&unp_connect_lock);

1266

1267

         if (so2->so_pcb == NULL) {

1269 error = ECONNREFUSED;

1270 if (so != so2) {

                         socket_unlock(so2, 1);

                         socket_lock(so, 0);

1273 } else {

1274 /* Release the reference held for the listen socket */

                         VERIFY(so2->so_usecount > 0);

1276 so2->so_usecount--;

1277 }

1278 goto out;

1279 }

1280

1281 if (so < so2) {

                 socket_unlock(so2, 0);

                 socket_lock(so, 0);

                 socket_lock(so2, 0);

         } else if (so > so2) {

                 socket_lock(so, 0);

1287 }

1288 /*

1289 * Check if socket was connected while we were trying to

1290 * get the socket locks in order.

1291 * XXX - probably shouldn't return an error for SOCK_DGRAM

1292 */

         if ((so->so_state & SS_ISCONNECTED) != 0) {

1294 error = EISCONN;

1295 goto decref_out;

1296 }

1297

         if (so->so_type != so2->so_type) {

1299 error = EPROTOTYPE;

1300 goto decref_out;

1301 }

1302

         if (so->so_proto->pr_flags & PR_CONNREQUIRED) {

1304 /* Release the incoming socket but keep a reference */

                 socket_unlock(so, 0);

1306

                 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||

                     (so3 = sonewconn(so2, 0, nam)) == 0) {

1309 error = ECONNREFUSED;

1310 if (so != so2) {

                                 socket_unlock(so2, 1);

                                 socket_lock(so, 0);

1313 } else {

                                 socket_lock(so, 0);

1315 /* Release the reference held for

1316 * listen socket.

1317 */

                                 VERIFY(so2->so_usecount > 0);

1319 so2->so_usecount--;

1320 }

1321 goto out;

1322 }

1323 unp2 = sotounpcb(so2);

1324 unp3 = sotounpcb(so3);

1325 if (unp2->unp_addr) {

1326 unp3->unp_addr = (struct sockaddr_un *)

                             dup_sockaddr((struct sockaddr *)unp2->unp_addr, 1);

1328 }

1329

1330 /*

1331 * unp_peercred management:

1332 *

1333 * The connecter's (client's) credentials are copied

1334 * from its process structure at the time of connect()

1335 * (which is now).

1336 */

                 cru2x(vfs_context_ucred(ctx), &unp3->unp_peercred);

1338 unp3->unp_flags |= UNP_HAVEPC;

1339 /*

1340 * The receiver's (server's) credentials are copied

1341 * from the unp_peercred member of socket on which the

1342 * former called listen(); unp_listen() cached that

1343 * process's credentials at that time so we can use

1344 * them now.

1345 */

                 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,

1347 ("unp_connect: listener without cached peercred"));

1348

1349 /* Here we need to have both so and so2 locks and so2

1350 * is already locked. Lock ordering is required.

1351 */

1352 if (so < so2) {

                         socket_unlock(so2, 0);

                         socket_lock(so, 0);

                         socket_lock(so2, 0);

1356 } else {

                         socket_lock(so, 0);

1358 }

1359

1360 /* Check again if the socket state changed when its lock was released */

                 if ((so->so_state & SS_ISCONNECTED) != 0) {

1362 error = EISCONN;

                         socket_unlock(so2, 1);

                         socket_lock(so3, 0);

                         sofreelastref(so3, 1);

1366 goto out;

1367 }

                 memcpy(&unp->unp_peercred, &unp2->unp_peercred,

1369 sizeof(unp->unp_peercred));

1370 unp->unp_flags |= UNP_HAVEPC;

1371

1372 /* Hold the reference on listening socket until the end */

                 socket_unlock(so2, 0);

1374 list_so = so2;

1375

1376 /* Lock ordering doesn't matter because so3 was just created */

                 socket_lock(so3, 1);

1378 so2 = so3;

1379

1380 /*

1381 * Enable tracing for mDNSResponder endpoints. (The use

1382 * of sizeof instead of strlen below takes the null

1383 * terminating character into account.)

1384 */

1385 if (unpst_tracemdns &&

                     !strncmp(soun->sun_path, MDNSRESPONDER_PATH,

1387 sizeof(MDNSRESPONDER_PATH))) {

1388 unp->unp_flags |= UNP_TRACE_MDNS;

1389 unp2->unp_flags |= UNP_TRACE_MDNS;

1390 }

1391 }

1392

         error = unp_connect2(so, so2);

1394

1395 decref_out:

1396 if (so2 != NULL) {

1397 if (so != so2) {

                         socket_unlock(so2, 1);

1399 } else {

1400 /* Release the extra reference held for the listen socket.

1401 * This is possible only for SOCK_DGRAM sockets. We refuse

1402 * connecting to the same socket for SOCK_STREAM sockets.

1403 */

                         VERIFY(so2->so_usecount > 0);

1405 so2->so_usecount--;

1406 }

1407 }

1408

1409 if (list_so != NULL) {

                 socket_lock(list_so, 0);

                 socket_unlock(list_so, 1);

1412 }

1413

1414 out:

         LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);

1416 vnode_put(vp);

1417 return error;

1418 }

1419

1420 /*

1421 * Returns: 0 Success

1422 * EPROTOTYPE Protocol wrong type for socket

1423 * EINVAL Invalid argument

1424 */

1425 int

 unp_connect2(struct socket *so, struct socket *so2)

1427 {

         struct unpcb *unp = sotounpcb(so);

1429 struct unpcb *unp2;

1430

         if (so2->so_type != so->so_type) {

1432 return EPROTOTYPE;

1433 }

1434

1435 unp2 = sotounpcb(so2);

1436

         LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);

         LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);

1439

1440 /* Verify both sockets are still opened */

         if (unp == 0 || unp2 == 0) {

1442 return EINVAL;

1443 }

1444

1445 unp->unp_conn = unp2;

1446 so2->so_usecount++;

1447

1448 switch (so->so_type) {

1449 case SOCK_DGRAM:

                 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);

1451

1452 if (so != so2) {

1453 /* Avoid lock order reversals due to drop/acquire in soisconnected. */

1454 /* Keep an extra reference on so2 that will be dropped

1455 * soon after getting the locks in order

1456 */

                         socket_unlock(so2, 0);

1458 soisconnected(so);

1459 unp_get_locks_in_order(so, so2);

                         VERIFY(so2->so_usecount > 0);

1461 so2->so_usecount--;

1462 } else {

1463 soisconnected(so);

1464 }

1465

1466 break;

1467

1468 case SOCK_STREAM:

1469 /* This takes care of socketpair */

                 if (!(unp->unp_flags & UNP_HAVEPC) &&

1471 !(unp2->unp_flags & UNP_HAVEPC)) {

                         cru2x(kauth_cred_get(), &unp->unp_peercred);

1473 unp->unp_flags |= UNP_HAVEPC;

1474

                         cru2x(kauth_cred_get(), &unp2->unp_peercred);

1476 unp2->unp_flags |= UNP_HAVEPC;

1477 }

1478 unp2->unp_conn = unp;

1479 so->so_usecount++;

1480

1481 /* Avoid lock order reversals due to drop/acquire in soisconnected. */

                 socket_unlock(so, 0);

1483 soisconnected(so2);

1484

1485 /* Keep an extra reference on so2, that will be dropped soon after

1486 * getting the locks in order again.

1487 */

                 socket_unlock(so2, 0);

1489

                 socket_lock(so, 0);

1491 soisconnected(so);

1492

1493 unp_get_locks_in_order(so, so2);

1494 /* Decrement the extra reference left before */

                 VERIFY(so2->so_usecount > 0);

1496 so2->so_usecount--;

1497 break;

1498

1499 default:

                 panic("unknown socket type %d in unp_connect2", so->so_type);

1501 }

         LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);

         LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);

1504 return 0;

1505 }

1506

1507 static void

 unp_disconnect(struct unpcb *unp)

1509 {

1510 struct unpcb *unp2 = NULL;

         struct socket *so2 = NULL, *so;

1512 struct socket *waitso;

         int so_locked = 1, strdisconn = 0;

1514

1515 so = unp->unp_socket;

         if (unp->unp_conn == NULL) {

1517 return;

1518 }

1519 lck_mtx_lock(&unp_disconnect_lock);

         while (disconnect_in_progress != 0) {

                 if (so_locked == 1) {

                         socket_unlock(so, 0);

1523 so_locked = 0;

1524 }

                 (void)msleep((caddr_t)&disconnect_in_progress, &unp_disconnect_lock,

1526 PSOCK, "disconnect", NULL);

1527 }

1528 disconnect_in_progress = 1;

1529 lck_mtx_unlock(&unp_disconnect_lock);

1530

         if (so_locked == 0) {

                 socket_lock(so, 0);

1533 so_locked = 1;

1534 }

1535

1536 unp2 = unp->unp_conn;

1537

         if (unp2 == 0 || unp2->unp_socket == NULL) {

1539 goto out;

1540 }

1541 so2 = unp2->unp_socket;

1542

1543 try_again:

1544 if (so == so2) {

                 if (so_locked == 0) {

                         socket_lock(so, 0);

1547 }

1548 waitso = so;

         } else if (so < so2) {

                 if (so_locked == 0) {

                         socket_lock(so, 0);

1552 }

                 socket_lock(so2, 1);

1554 waitso = so2;

1555 } else {

                 if (so_locked == 1) {

                         socket_unlock(so, 0);

1558 }

                 socket_lock(so2, 1);

                 socket_lock(so, 0);

1561 waitso = so;

1562 }

1563 so_locked = 1;

1564

         LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);

         LCK_MTX_ASSERT(&unp2->unp_mtx, LCK_MTX_ASSERT_OWNED);

1567

1568 /* Check for the UNP_DONTDISCONNECT flag, if it

1569 * is set, release both sockets and go to sleep

1570 */

1571

         if ((((struct unpcb *)waitso->so_pcb)->unp_flags & UNP_DONTDISCONNECT) != 0) {

1573 if (so != so2) {

                         socket_unlock(so2, 1);

1575 }

1576 so_locked = 0;

1577

                 (void)msleep(waitso->so_pcb, &unp->unp_mtx,

                     PSOCK | PDROP, "unpdisconnect", NULL);

1580 goto try_again;

1581 }

1582

         if (unp->unp_conn == NULL) {

1584 panic("unp_conn became NULL after sleep");

1585 }

1586

1587 unp->unp_conn = NULL;

         VERIFY(so2->so_usecount > 0);

1589 so2->so_usecount--;

1590

         if (unp->unp_flags & UNP_TRACE_MDNS) {

1592 unp->unp_flags &= ~UNP_TRACE_MDNS;

1593 }

1594

         switch (unp->unp_socket->so_type) {

1596 case SOCK_DGRAM:

1597 LIST_REMOVE(unp, unp_reflink);

1598 unp->unp_socket->so_state &= ~SS_ISCONNECTED;

1599 if (so != so2) {

                         socket_unlock(so2, 1);

1601 }

1602 break;

1603

1604 case SOCK_STREAM:

1605 unp2->unp_conn = NULL;

                 VERIFY(so->so_usecount > 0);

1607 so->so_usecount--;

1608

1609 /* Set the socket state correctly but do a wakeup later when

1610 * we release all locks except the socket lock, this will avoid

1611 * a deadlock.

1612 */

                 unp->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);

                 unp->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);

1615

                 unp2->unp_socket->so_state &= ~(SS_ISCONNECTING | SS_ISCONNECTED | SS_ISDISCONNECTING);

                 unp->unp_socket->so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE | SS_ISDISCONNECTED);

1618

                 if (unp2->unp_flags & UNP_TRACE_MDNS) {

1620 unp2->unp_flags &= ~UNP_TRACE_MDNS;

1621 }

1622

1623 strdisconn = 1;

1624 break;

1625 default:

                 panic("unknown socket type %d", so->so_type);

1627 }

1628 out:

1629 lck_mtx_lock(&unp_disconnect_lock);

1630 disconnect_in_progress = 0;

1631 wakeup(&disconnect_in_progress);

1632 lck_mtx_unlock(&unp_disconnect_lock);

1633

1634 if (strdisconn) {

                 socket_unlock(so, 0);

1636 soisdisconnected(so2);

                 socket_unlock(so2, 1);

1638

                 socket_lock(so, 0);

1640 soisdisconnected(so);

1641 }

         LCK_MTX_ASSERT(&unp->unp_mtx, LCK_MTX_ASSERT_OWNED);

1643 return;

1644 }

1645

1646 /*

1647 * unpcb_to_compat copies specific bits of a unpcb to a unpcb_compat format.

1648 * The unpcb_compat data structure is passed to user space and must not change.

1649 */

1650 static void

 unpcb_to_compat(struct unpcb *up, struct unpcb_compat *cp)

1652 {

1653 #if defined(__LP64__)

1654 cp->unp_link.le_next = (u_int32_t)

             VM_KERNEL_ADDRPERM(up->unp_link.le_next);

1656 cp->unp_link.le_prev = (u_int32_t)

             VM_KERNEL_ADDRPERM(up->unp_link.le_prev);

1658 #else

         cp->unp_link.le_next = (struct unpcb_compat *)

             VM_KERNEL_ADDRPERM(up->unp_link.le_next);

         cp->unp_link.le_prev = (struct unpcb_compat **)

             VM_KERNEL_ADDRPERM(up->unp_link.le_prev);

1663 #endif

         cp->unp_socket = (_UNPCB_PTR(struct socket *))

1665 VM_KERNEL_ADDRPERM(up->unp_socket);

         cp->unp_vnode = (_UNPCB_PTR(struct vnode *))

1667 VM_KERNEL_ADDRPERM(up->unp_vnode);

1668 cp->unp_ino = up->unp_ino;

         cp->unp_conn = (_UNPCB_PTR(struct unpcb_compat *))

1670 VM_KERNEL_ADDRPERM(up->unp_conn);

         cp->unp_refs = (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_refs.lh_first);

1672 #if defined(__LP64__)

1673 cp->unp_reflink.le_next =

             (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);

1675 cp->unp_reflink.le_prev =

             (u_int32_t)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);

1677 #else

1678 cp->unp_reflink.le_next =

             (struct unpcb_compat *)VM_KERNEL_ADDRPERM(up->unp_reflink.le_next);

1680 cp->unp_reflink.le_prev =

             (struct unpcb_compat **)VM_KERNEL_ADDRPERM(up->unp_reflink.le_prev);

1682 #endif

         cp->unp_addr = (_UNPCB_PTR(struct sockaddr_un *))

1684 VM_KERNEL_ADDRPERM(up->unp_addr);

1685 cp->unp_cc = up->unp_cc;

1686 cp->unp_mbcnt = up->unp_mbcnt;

1687 cp->unp_gencnt = up->unp_gencnt;

1688 }

1689

1690 static int

1691 unp_pcblist SYSCTL_HANDLER_ARGS

1692 {

1693 #pragma unused(oidp,arg2)

1694 int error, i, n;

1695 struct unpcb *unp, **unp_list;

1696 unp_gen_t gencnt;

1697 struct xunpgen xug;

1698 struct unp_head *head;

1699

1700 lck_rw_lock_shared(&unp_list_mtx);

         head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);

1702

1703 /*

1704 * The process of preparing the PCB list is too time-consuming and

1705 * resource-intensive to repeat twice on every request.

1706 */

         if (req->oldptr == USER_ADDR_NULL) {

1708 n = unp_count;

                 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *

1710 sizeof(struct xunpcb);

1711 lck_rw_done(&unp_list_mtx);

1712 return 0;

1713 }

1714

         if (req->newptr != USER_ADDR_NULL) {

1716 lck_rw_done(&unp_list_mtx);

1717 return EPERM;

1718 }

1719

1720 /*

1721 * OK, now we're committed to doing something.

1722 */

1723 gencnt = unp_gencnt;

1724 n = unp_count;

1725

         bzero(&xug, sizeof(xug));

         xug.xug_len = sizeof(xug);

1728 xug.xug_count = n;

1729 xug.xug_gen = gencnt;

1730 xug.xug_sogen = so_gencnt;

         error = SYSCTL_OUT(req, &xug, sizeof(xug));

1732 if (error) {

1733 lck_rw_done(&unp_list_mtx);

1734 return error;

1735 }

1736

1737 /*

1738 * We are done if there is no pcb

1739 */

         if (n == 0) {

1741 lck_rw_done(&unp_list_mtx);

1742 return 0;

1743 }

1744

         size_t unp_list_len = n * sizeof(*unp_list);

         unp_list = kheap_alloc(KHEAP_TEMP, unp_list_len, Z_WAITOK);

         if (unp_list == 0) {

1748 lck_rw_done(&unp_list_mtx);

1749 return ENOMEM;

1750 }

1751

         for (unp = head->lh_first, i = 0; unp && i < n;

1753 unp = unp->unp_link.le_next) {

                 if (unp->unp_gencnt <= gencnt) {

1755 unp_list[i++] = unp;

1756 }

1757 }

1758 n = i; /* in case we lost some during malloc */

1759

1760 error = 0;

         for (i = 0; i < n; i++) {

1762 unp = unp_list[i];

                 if (unp->unp_gencnt <= gencnt) {

1764 struct xunpcb xu;

1765

                         bzero(&xu, sizeof(xu));

                         xu.xu_len = sizeof(xu);

                         xu.xu_unpp = (_UNPCB_PTR(struct unpcb_compat *))

1769 VM_KERNEL_ADDRPERM(unp);

1770 /*

1771 * XXX - need more locking here to protect against

1772 * connect/disconnect races for SMP.

1773 */

1774 if (unp->unp_addr) {

                                 bcopy(unp->unp_addr, &xu.xu_au,

1776 unp->unp_addr->sun_len);

1777 }

                         if (unp->unp_conn && unp->unp_conn->unp_addr) {

                                 bcopy(unp->unp_conn->unp_addr,

1780 &xu.xu_cau,

1781 unp->unp_conn->unp_addr->sun_len);

1782 }

                         unpcb_to_compat(unp, &xu.xu_unp);

                         sotoxsocket(unp->unp_socket, &xu.xu_socket);

                         error = SYSCTL_OUT(req, &xu, sizeof(xu));

1786 }

1787 }

1788 if (!error) {

1789 /*

1790 * Give the user an updated idea of our state.

1791 * If the generation differs from what we told

1792 * her before, she knows that something happened

1793 * while we were processing this request, and it

1794 * might be necessary to retry.

1795 */

                 bzero(&xug, sizeof(xug));

                 xug.xug_len = sizeof(xug);

1798 xug.xug_gen = unp_gencnt;

1799 xug.xug_sogen = so_gencnt;

1800 xug.xug_count = unp_count;

                 error = SYSCTL_OUT(req, &xug, sizeof(xug));

1802 }

         kheap_free(KHEAP_TEMP, unp_list, unp_list_len);

1804 lck_rw_done(&unp_list_mtx);

1805 return error;

1806 }

1807

 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist,

1809 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,

     (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",

1811 "List of active local datagram sockets");

 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist,

1813 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,

     (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",

1815 "List of active local stream sockets");

1816

1817 #if XNU_TARGET_OS_OSX

1818

1819 static int

1820 unp_pcblist64 SYSCTL_HANDLER_ARGS

1821 {

1822 #pragma unused(oidp,arg2)

1823 int error, i, n;

1824 struct unpcb *unp, **unp_list;

1825 unp_gen_t gencnt;

1826 struct xunpgen xug;

1827 struct unp_head *head;

1828

1829 lck_rw_lock_shared(&unp_list_mtx);

         head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);

1831

1832 /*

1833 * The process of preparing the PCB list is too time-consuming and

1834 * resource-intensive to repeat twice on every request.

1835 */

         if (req->oldptr == USER_ADDR_NULL) {

1837 n = unp_count;

                 req->oldidx = 2 * sizeof(xug) + (n + n / 8) *

                     (sizeof(struct xunpcb64));

1840 lck_rw_done(&unp_list_mtx);

1841 return 0;

1842 }

1843

         if (req->newptr != USER_ADDR_NULL) {

1845 lck_rw_done(&unp_list_mtx);

1846 return EPERM;

1847 }

1848

1849 /*

1850 * OK, now we're committed to doing something.

1851 */

1852 gencnt = unp_gencnt;

1853 n = unp_count;

1854

         bzero(&xug, sizeof(xug));

         xug.xug_len = sizeof(xug);

1857 xug.xug_count = n;

1858 xug.xug_gen = gencnt;

1859 xug.xug_sogen = so_gencnt;

         error = SYSCTL_OUT(req, &xug, sizeof(xug));

1861 if (error) {

1862 lck_rw_done(&unp_list_mtx);

1863 return error;

1864 }

1865

1866 /*

1867 * We are done if there is no pcb

1868 */

         if (n == 0) {

1870 lck_rw_done(&unp_list_mtx);

1871 return 0;

1872 }

1873

         size_t unp_list_size = n * sizeof(*unp_list);

         unp_list = kheap_alloc(KHEAP_TEMP, unp_list_size, Z_WAITOK);

         if (unp_list == 0) {

1877 lck_rw_done(&unp_list_mtx);

1878 return ENOMEM;

1879 }

1880

         for (unp = head->lh_first, i = 0; unp && i < n;

1882 unp = unp->unp_link.le_next) {

                 if (unp->unp_gencnt <= gencnt) {

1884 unp_list[i++] = unp;

1885 }

1886 }

1887 n = i; /* in case we lost some during malloc */

1888

1889 error = 0;

         for (i = 0; i < n; i++) {

1891 unp = unp_list[i];

                 if (unp->unp_gencnt <= gencnt) {

1893 struct xunpcb64 xu;

                         size_t          xu_len = sizeof(struct xunpcb64);

1895

1896 bzero(&xu, xu_len);

1897 xu.xu_len = (u_int32_t)xu_len;

                         xu.xu_unpp = (u_int64_t)VM_KERNEL_ADDRPERM(unp);

1899 xu.xunp_link.le_next = (u_int64_t)

                             VM_KERNEL_ADDRPERM(unp->unp_link.le_next);

1901 xu.xunp_link.le_prev = (u_int64_t)

                             VM_KERNEL_ADDRPERM(unp->unp_link.le_prev);

1903 xu.xunp_socket = (u_int64_t)

1904 VM_KERNEL_ADDRPERM(unp->unp_socket);

1905 xu.xunp_vnode = (u_int64_t)

1906 VM_KERNEL_ADDRPERM(unp->unp_vnode);

1907 xu.xunp_ino = unp->unp_ino;

1908 xu.xunp_conn = (u_int64_t)

1909 VM_KERNEL_ADDRPERM(unp->unp_conn);

1910 xu.xunp_refs = (u_int64_t)

                             VM_KERNEL_ADDRPERM(unp->unp_refs.lh_first);

1912 xu.xunp_reflink.le_next = (u_int64_t)

                             VM_KERNEL_ADDRPERM(unp->unp_reflink.le_next);

1914 xu.xunp_reflink.le_prev = (u_int64_t)

                             VM_KERNEL_ADDRPERM(unp->unp_reflink.le_prev);

1916 xu.xunp_cc = unp->unp_cc;

1917 xu.xunp_mbcnt = unp->unp_mbcnt;

1918 xu.xunp_gencnt = unp->unp_gencnt;

1919

1920 if (unp->unp_socket) {

                                 sotoxsocket64(unp->unp_socket, &xu.xu_socket);

1922 }

1923

1924 /*

1925 * XXX - need more locking here to protect against

1926 * connect/disconnect races for SMP.

1927 */

1928 if (unp->unp_addr) {

                                 bcopy(unp->unp_addr, &xu.xu_au,

1930 unp->unp_addr->sun_len);

1931 }

                         if (unp->unp_conn && unp->unp_conn->unp_addr) {

                                 bcopy(unp->unp_conn->unp_addr,

1934 &xu.xu_cau,

1935 unp->unp_conn->unp_addr->sun_len);

1936 }

1937

                         error = SYSCTL_OUT(req, &xu, xu_len);

1939 }

1940 }

1941 if (!error) {

1942 /*

1943 * Give the user an updated idea of our state.

1944 * If the generation differs from what we told

1945 * her before, she knows that something happened

1946 * while we were processing this request, and it

1947 * might be necessary to retry.

1948 */

                 bzero(&xug, sizeof(xug));

                 xug.xug_len = sizeof(xug);

1951 xug.xug_gen = unp_gencnt;

1952 xug.xug_sogen = so_gencnt;

1953 xug.xug_count = unp_count;

                 error = SYSCTL_OUT(req, &xug, sizeof(xug));

1955 }

         kheap_free(KHEAP_TEMP, unp_list, unp_list_size);

1957 lck_rw_done(&unp_list_mtx);

1958 return error;

1959 }

1960

 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist64,

1962 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,

     (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist64, "S,xunpcb64",

1964 "List of active local datagram sockets 64 bit");

 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist64,

1966 CTLTYPE_STRUCT | CTLFLAG_RD | CTLFLAG_LOCKED,

     (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist64, "S,xunpcb64",

1968 "List of active local stream sockets 64 bit");

1969

1970 #endif /* XNU_TARGET_OS_OSX */

1971

1972 static void

 unp_shutdown(struct unpcb *unp)

1974 {

         struct socket *so = unp->unp_socket;

1976 struct socket *so2;

         if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn) {

1978 so2 = unp->unp_conn->unp_socket;

1979 unp_get_locks_in_order(so, so2);

1980 socantrcvmore(so2);

                 socket_unlock(so2, 1);

1982 }

1983 }

1984

1985 static void

 unp_drop(struct unpcb *unp, int errno)

1987 {

         struct socket *so = unp->unp_socket;

1989

1990 so->so_error = (u_short)errno;

1991 unp_disconnect(unp);

1992 }

1993

1994 /* always called under uipc_lock */

1995 static void

1996 unp_gc_wait(void)

1997 {

         if (unp_gcthread == current_thread()) {

1999 return;

2000 }

2001

         while (unp_gcing != 0) {

2003 unp_gcwait = 1;

                 msleep(&unp_gcing, &uipc_lock, 0, "unp_gc_wait", NULL);

2005 }

2006 }

2007

2008 /*

2009 * fg_insertuipc_mark

2010 *

2011 * Description: Mark fileglob for insertion onto message queue if needed

2012 * Also takes fileglob reference

2013 *

2014 * Parameters: fg Fileglob pointer to insert

2015 *

2016 * Returns: true, if the fileglob needs to be inserted onto msg queue

2017 *

2018 * Locks: Takes and drops fg_lock, potentially many times

2019 */

2020 static boolean_t

 fg_insertuipc_mark(struct fileglob * fg)

2022 {

2023 boolean_t insert = FALSE;

2024

2025 lck_mtx_lock_spin(&fg->fg_lock);

         while (fg->fg_lflags & FG_RMMSGQ) {

2027 lck_mtx_convert_spin(&fg->fg_lock);

2028

2029 fg->fg_lflags |= FG_WRMMSGQ;

                 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_insertuipc", NULL);

2031 }

2032

         os_ref_retain_raw(&fg->fg_count, &f_refgrp);

2034 fg->fg_msgcount++;

         if (fg->fg_msgcount == 1) {

2036 fg->fg_lflags |= FG_INSMSGQ;

2037 insert = TRUE;

2038 }

2039 lck_mtx_unlock(&fg->fg_lock);

2040 return insert;

2041 }

2042

2043 /*

2044 * fg_insertuipc

2045 *

2046 * Description: Insert marked fileglob onto message queue

2047 *

2048 * Parameters: fg Fileglob pointer to insert

2049 *

2050 * Returns: void

2051 *

2052 * Locks: Takes and drops fg_lock & uipc_lock

2053 * DO NOT call this function with proc_fdlock held as unp_gc()

2054 * can potentially try to acquire proc_fdlock, which can result

2055 * in a deadlock if this function is in unp_gc_wait().

2056 */

2057 static void

 fg_insertuipc(struct fileglob * fg)

2059 {

         if (fg->fg_lflags & FG_INSMSGQ) {

2061 lck_mtx_lock_spin(&uipc_lock);

2062 unp_gc_wait();

                 LIST_INSERT_HEAD(&unp_msghead, fg, f_msglist);

2064 lck_mtx_unlock(&uipc_lock);

2065 lck_mtx_lock(&fg->fg_lock);

2066 fg->fg_lflags &= ~FG_INSMSGQ;

                 if (fg->fg_lflags & FG_WINSMSGQ) {

2068 fg->fg_lflags &= ~FG_WINSMSGQ;

2069 wakeup(&fg->fg_lflags);

2070 }

2071 lck_mtx_unlock(&fg->fg_lock);

2072 }

2073 }

2074

2075 /*

2076 * fg_removeuipc_mark

2077 *

2078 * Description: Mark the fileglob for removal from message queue if needed

2079 * Also releases fileglob message queue reference

2080 *

2081 * Parameters: fg Fileglob pointer to remove

2082 *

2083 * Returns: true, if the fileglob needs to be removed from msg queue

2084 *

2085 * Locks: Takes and drops fg_lock, potentially many times

2086 */

2087 static boolean_t

 fg_removeuipc_mark(struct fileglob * fg)

2089 {

2090 boolean_t remove = FALSE;

2091

2092 lck_mtx_lock_spin(&fg->fg_lock);

         while (fg->fg_lflags & FG_INSMSGQ) {

2094 lck_mtx_convert_spin(&fg->fg_lock);

2095

2096 fg->fg_lflags |= FG_WINSMSGQ;

                 msleep(&fg->fg_lflags, &fg->fg_lock, 0, "fg_removeuipc", NULL);

2098 }

2099 fg->fg_msgcount--;

         if (fg->fg_msgcount == 0) {

2101 fg->fg_lflags |= FG_RMMSGQ;

2102 remove = TRUE;

2103 }

2104 lck_mtx_unlock(&fg->fg_lock);

2105 return remove;

2106 }

2107

2108 /*

2109 * fg_removeuipc

2110 *

2111 * Description: Remove marked fileglob from message queue

2112 *

2113 * Parameters: fg Fileglob pointer to remove

2114 *

2115 * Returns: void

2116 *

2117 * Locks: Takes and drops fg_lock & uipc_lock

2118 * DO NOT call this function with proc_fdlock held as unp_gc()

2119 * can potentially try to acquire proc_fdlock, which can result

2120 * in a deadlock if this function is in unp_gc_wait().

2121 */

2122 static void

 fg_removeuipc(struct fileglob * fg)

2124 {

         if (fg->fg_lflags & FG_RMMSGQ) {

2126 lck_mtx_lock_spin(&uipc_lock);

2127 unp_gc_wait();

2128 LIST_REMOVE(fg, f_msglist);

2129 lck_mtx_unlock(&uipc_lock);

2130 lck_mtx_lock(&fg->fg_lock);

2131 fg->fg_lflags &= ~FG_RMMSGQ;

                 if (fg->fg_lflags & FG_WRMMSGQ) {

2133 fg->fg_lflags &= ~FG_WRMMSGQ;

2134 wakeup(&fg->fg_lflags);

2135 }

2136 lck_mtx_unlock(&fg->fg_lock);

2137 }

2138 }

2139

2140 /*

2141 * Returns: 0 Success

2142 * EMSGSIZE The new fd's will not fit

2143 * ENOBUFS Cannot alloc struct fileproc

2144 */

2145 int

 unp_externalize(struct mbuf *rights)

2147 {

2148 proc_t p = current_proc(); /* XXX */

2149 int i;

         struct cmsghdr *cm = mtod(rights, struct cmsghdr *);

         struct fileglob **rp = (struct fileglob **)(cm + 1);

         int *fds = (int *)(cm + 1);

2153 struct fileproc *fp;

2154 struct fileproc **fileproc_l;

         int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);

         int f, error = 0;

2157

2158 fileproc_l = kheap_alloc(KHEAP_TEMP,

             newfds * sizeof(struct fileproc *), Z_WAITOK);

2160 if (fileproc_l == NULL) {

2161 error = ENOMEM;

2162 goto discard;

2163 }

2164

2165 proc_fdlock(p);

2166

2167 /*

2168 * if the new FD's will not fit, then we free them all

2169 */

         if (!fdavail(p, newfds)) {

2171 proc_fdunlock(p);

2172 error = EMSGSIZE;

2173 goto discard;

2174 }

2175 /*

2176 * now change each pointer to an fd in the global table to

2177 * an integer that is the index to the local fd table entry

2178 * that we set up to point to the global one we are transferring.

2179 * XXX (1) this assumes a pointer and int are the same size,

2180 * XXX or the mbuf can hold the expansion

2181 * XXX (2) allocation failures should be non-fatal

2182 */

         for (i = 0; i < newfds; i++) {

                 if (fdalloc(p, 0, &f)) {

2185 panic("unp_externalize:fdalloc");

2186 }

2187 fp = fileproc_alloc_init(NULL);

2188 if (fp == NULL) {

2189 panic("unp_externalize:fileproc_alloc_init");

2190 }

2191 fp->fp_glob = rp[i];

                 if (fg_removeuipc_mark(rp[i])) {

2193 /*

2194 * Take an iocount on the fp for completing the

2195 * removal from the global msg queue

2196 */

2197 os_ref_retain_locked(&fp->fp_iocount);

2198 fileproc_l[i] = fp;

2199 } else {

2200 fileproc_l[i] = NULL;

2201 }

                 procfdtbl_releasefd(p, f, fp);

2203 fds[i] = f;

2204 }

2205 proc_fdunlock(p);

2206

         for (i = 0; i < newfds; i++) {

                 if (fileproc_l[i] != NULL) {

                         VERIFY(fileproc_l[i]->fp_glob != NULL &&

                             (fileproc_l[i]->fp_glob->fg_lflags & FG_RMMSGQ));

                         VERIFY(fds[i] >= 0);

                         fg_removeuipc(fileproc_l[i]->fp_glob);

2213

2214 /* Drop the iocount */

                         fp_drop(p, fds[i], fileproc_l[i], 0);

2216 fileproc_l[i] = NULL;

2217 }

                 if (fds[i] != 0) {

                         (void) OSAddAtomic(-1, &unp_rights);

2220 }

2221 }

2222

2223 discard:

2224 kheap_free(KHEAP_TEMP, fileproc_l,

             newfds * sizeof(struct fileproc *));

2226 if (error) {

                 for (i = 0; i < newfds; i++) {

2228 unp_discard(*rp, p);

2229 *rp++ = NULL;

2230 }

2231 }

2232 return error;

2233 }

2234

2235 void

2236 unp_init(void)

2237 {

         _CASSERT(UIPC_MAX_CMSG_FD >= (MCLBYTES / sizeof(int)));

2239 LIST_INIT(&unp_dhead);

2240 LIST_INIT(&unp_shead);

2241 }

2242

2243 #ifndef MIN

2244 #define MIN(a, b) (((a) < (b)) ? (a) : (b))

2245 #endif

2246

2247 /*

2248 * Returns: 0 Success

2249 * EINVAL

2250 * EBADF

2251 */

2252 static int

 unp_internalize(struct mbuf *control, proc_t p)

2254 {

         struct cmsghdr *cm = mtod(control, struct cmsghdr *);

2256 int *fds;

2257 struct fileglob **rp;

2258 struct fileproc *fp;

2259 int i, error;

2260 int oldfds;

         uint8_t fg_ins[UIPC_MAX_CMSG_FD / 8];

2262

2263 /* 64bit: cmsg_len is 'uint32_t', m_len is 'long' */

         if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||

             (socklen_t)cm->cmsg_len != (socklen_t)control->m_len) {

2266 return EINVAL;

2267 }

         oldfds = (cm->cmsg_len - sizeof(*cm)) / sizeof(int);

         bzero(fg_ins, sizeof(fg_ins));

2270

2271 proc_fdlock(p);

         fds = (int *)(cm + 1);

2273

         for (i = 0; i < oldfds; i++) {

2275 struct fileproc *tmpfp;

                 if ((tmpfp = fp_get_noref_locked(p, fds[i])) == NULL) {

2277 proc_fdunlock(p);

2278 return EBADF;

                 } else if (!fg_sendable(tmpfp->fp_glob)) {

2280 proc_fdunlock(p);

2281 return EINVAL;

                 } else if (fp_isguarded(tmpfp, GUARD_SOCKET_IPC)) {

2283 error = fp_guard_exception(p,

2284 fds[i], tmpfp, kGUARD_EXC_SOCKET_IPC);

2285 proc_fdunlock(p);

2286 return error;

2287 }

2288 }

         rp = (struct fileglob **)(cm + 1);

2290

2291 /* On K64 we need to walk backwards because a fileglob * is twice the size of an fd

2292 * and doing them in-order would result in stomping over unprocessed fd's

2293 */

         for (i = (oldfds - 1); i >= 0; i--) {

                 fp = fp_get_noref_locked(p, fds[i]);

                 if (fg_insertuipc_mark(fp->fp_glob)) {

                         fg_ins[i / 8] |= 0x80 >> (i % 8);

2298 }

2299 rp[i] = fp->fp_glob;

2300 }

2301 proc_fdunlock(p);

2302

         for (i = 0; i < oldfds; i++) {

                 if (fg_ins[i / 8] & (0x80 >> (i % 8))) {

                         VERIFY(rp[i]->fg_lflags & FG_INSMSGQ);

2306 fg_insertuipc(rp[i]);

2307 }

                 (void) OSAddAtomic(1, &unp_rights);

2309 }

2310

2311 return 0;

2312 }

2313

2314 __private_extern__ void

2315 unp_gc(void)

2316 {

2317 struct fileglob *fg, *nextfg;

2318 struct socket *so;

2319 static struct fileglob **extra_ref;

2320 struct fileglob **fpp;

2321 int nunref, i;

2322 int need_gcwakeup = 0;

2323

2324 lck_mtx_lock(&uipc_lock);

2325 if (unp_gcing) {

2326 lck_mtx_unlock(&uipc_lock);

2327 return;

2328 }

2329 unp_gcing = 1;

2330 unp_defer = 0;

2331 unp_gcthread = current_thread();

2332 lck_mtx_unlock(&uipc_lock);

2333 /*

2334 * before going through all this, set all FDs to

2335 * be NOT defered and NOT externally accessible

2336 */

         for (fg = unp_msghead.lh_first; fg != 0; fg = fg->f_msglist.le_next) {

                 os_atomic_andnot(&fg->fg_flag, FMARK | FDEFER, relaxed);

2339 }

2340 do {

                 for (fg = unp_msghead.lh_first; fg != 0;

2342 fg = fg->f_msglist.le_next) {

2343 lck_mtx_lock(&fg->fg_lock);

2344 /*

2345 * If the file is not open, skip it

2346 */

                         if (os_ref_get_count_raw(&fg->fg_count) == 0) {

2348 lck_mtx_unlock(&fg->fg_lock);

2349 continue;

2350 }

2351 /*

2352 * If we already marked it as 'defer' in a

2353 * previous pass, then try process it this time

2354 * and un-mark it

2355 */

                         if (fg->fg_flag & FDEFER) {

                                 os_atomic_andnot(&fg->fg_flag, FDEFER, relaxed);

2358 unp_defer--;

2359 } else {

2360 /*

2361 * if it's not defered, then check if it's

2362 * already marked.. if so skip it

2363 */

                                 if (fg->fg_flag & FMARK) {

2365 lck_mtx_unlock(&fg->fg_lock);

2366 continue;

2367 }

2368 /*

2369 * If all references are from messages

2370 * in transit, then skip it. it's not

2371 * externally accessible.

2372 */

                                 if (os_ref_get_count_raw(&fg->fg_count) ==

2374 fg->fg_msgcount) {

2375 lck_mtx_unlock(&fg->fg_lock);

2376 continue;

2377 }

2378 /*

2379 * If it got this far then it must be

2380 * externally accessible.

2381 */

                                 os_atomic_or(&fg->fg_flag, FMARK, relaxed);

2383 }

2384 /*

2385 * either it was defered, or it is externally

2386 * accessible and not already marked so.

2387 * Now check if it is possibly one of OUR sockets.

2388 */

                         if (FILEGLOB_DTYPE(fg) != DTYPE_SOCKET ||

                             (so = (struct socket *)fg->fg_data) == 0) {

2391 lck_mtx_unlock(&fg->fg_lock);

2392 continue;

2393 }

                         if (so->so_proto->pr_domain != localdomain ||

                             (so->so_proto->pr_flags & PR_RIGHTS) == 0) {

2396 lck_mtx_unlock(&fg->fg_lock);

2397 continue;

2398 }

2399 #ifdef notdef

                         if (so->so_rcv.sb_flags & SB_LOCK) {

2401 /*

2402 * This is problematical; it's not clear

2403 * we need to wait for the sockbuf to be

2404 * unlocked (on a uniprocessor, at least),

2405 * and it's also not clear what to do

2406 * if sbwait returns an error due to receipt

2407 * of a signal. If sbwait does return

2408 * an error, we'll go into an infinite

2409 * loop. Delete all of this for now.

2410 */

                                 (void) sbwait(&so->so_rcv);

2412 goto restart;

2413 }

2414 #endif

2415 /*

2416 * So, Ok, it's one of our sockets and it IS externally

2417 * accessible (or was defered). Now we look

2418 * to see if we hold any file descriptors in its

2419 * message buffers. Follow those links and mark them

2420 * as accessible too.

2421 *

2422 * In case a file is passed onto itself we need to

2423 * release the file lock.

2424 */

2425 lck_mtx_unlock(&fg->fg_lock);

2426

                         unp_scan(so->so_rcv.sb_mb, unp_mark, 0);

2428 }

2429 } while (unp_defer);

2430 /*

2431 * We grab an extra reference to each of the file table entries

2432 * that are not otherwise accessible and then free the rights

2433 * that are stored in messages on them.

2434 *

2435 * The bug in the orginal code is a little tricky, so I'll describe

2436 * what's wrong with it here.

2437 *

2438 * It is incorrect to simply unp_discard each entry for fg_msgcount

2439 * times -- consider the case of sockets A and B that contain

2440 * references to each other. On a last close of some other socket,

2441 * we trigger a gc since the number of outstanding rights (unp_rights)

2442 * is non-zero. If during the sweep phase the gc code un_discards,

2443 * we end up doing a (full) closef on the descriptor. A closef on A

2444 * results in the following chain. Closef calls soo_close, which

2445 * calls soclose. Soclose calls first (through the switch

2446 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply

2447 * returns because the previous instance had set unp_gcing, and

2448 * we return all the way back to soclose, which marks the socket

2449 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush

2450 * to free up the rights that are queued in messages on the socket A,

2451 * i.e., the reference on B. The sorflush calls via the dom_dispose

2452 * switch unp_dispose, which unp_scans with unp_discard. This second

2453 * instance of unp_discard just calls closef on B.

2454 *

2455 * Well, a similar chain occurs on B, resulting in a sorflush on B,

2456 * which results in another closef on A. Unfortunately, A is already

2457 * being closed, and the descriptor has already been marked with

2458 * SS_NOFDREF, and soclose panics at this point.

2459 *

2460 * Here, we first take an extra reference to each inaccessible

2461 * descriptor. Then, we call sorflush ourself, since we know

2462 * it is a Unix domain socket anyhow. After we destroy all the

2463 * rights carried in messages, we do a last closef to get rid

2464 * of our extra reference. This is the last close, and the

2465 * unp_detach etc will shut down the socket.

2466 *

2467 * 91/09/19, bsy@cs.cmu.edu

2468 */

         size_t extra_ref_size = nfiles * sizeof(struct fileglob *);

         extra_ref = kheap_alloc(KHEAP_TEMP, extra_ref_size, Z_WAITOK);

2471 if (extra_ref == NULL) {

2472 goto bail;

2473 }

         for (nunref = 0, fg = unp_msghead.lh_first, fpp = extra_ref; fg != 0;

2475 fg = nextfg) {

2476 lck_mtx_lock(&fg->fg_lock);

2477

2478 nextfg = fg->f_msglist.le_next;

2479 /*

2480 * If it's not open, skip it

2481 */

                 if (os_ref_get_count_raw(&fg->fg_count) == 0) {

2483 lck_mtx_unlock(&fg->fg_lock);

2484 continue;

2485 }

2486 /*

2487 * If all refs are from msgs, and it's not marked accessible

2488 * then it must be referenced from some unreachable cycle

2489 * of (shut-down) FDs, so include it in our

2490 * list of FDs to remove

2491 */

                 if (fg->fg_flag & FMARK) {

2493 lck_mtx_unlock(&fg->fg_lock);

2494 continue;

2495 }

                 if (os_ref_get_count_raw(&fg->fg_count) == fg->fg_msgcount) {

                         os_ref_retain_raw(&fg->fg_count, &f_refgrp);

2498 *fpp++ = fg;

2499 nunref++;

2500 }

2501 lck_mtx_unlock(&fg->fg_lock);

2502 }

2503 /*

2504 * for each FD on our hit list, do the following two things

2505 */

         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {

2507 struct fileglob *tfg;

2508

2509 tfg = *fpp;

2510

                 if (FILEGLOB_DTYPE(tfg) == DTYPE_SOCKET &&

2512 tfg->fg_data != NULL) {

                         so = (struct socket *)(tfg->fg_data);

2514

                         socket_lock(so, 0);

2516

2517 sorflush(so);

2518

                         socket_unlock(so, 0);

2520 }

2521 }

         for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {

2523 fg_drop(PROC_NULL, *fpp);

2524 }

2525

         kheap_free(KHEAP_TEMP, extra_ref, extra_ref_size);

2527

2528 bail:

2529 lck_mtx_lock(&uipc_lock);

2530 unp_gcing = 0;

2531 unp_gcthread = NULL;

2532

         if (unp_gcwait != 0) {

2534 unp_gcwait = 0;

2535 need_gcwakeup = 1;

2536 }

2537 lck_mtx_unlock(&uipc_lock);

2538

         if (need_gcwakeup != 0) {

2540 wakeup(&unp_gcing);

2541 }

2542 }

2543

2544 void

 unp_dispose(struct mbuf *m)

2546 {

2547 if (m) {

                 unp_scan(m, unp_discard, NULL);

2549 }

2550 }

2551

2552 /*

2553 * Returns: 0 Success

2554 */

2555 static int

 unp_listen(struct unpcb *unp, proc_t p)

2557 {

2558 kauth_cred_t safecred = kauth_cred_proc_ref(p);

         cru2x(safecred, &unp->unp_peercred);

2560 kauth_cred_unref(&safecred);

2561 unp->unp_flags |= UNP_HAVEPCCACHED;

2562 return 0;

2563 }

2564

2565 static void

 unp_scan(struct mbuf *m0, void (*op)(struct fileglob *, void *arg), void *arg)

2567 {

2568 struct mbuf *m;

2569 struct fileglob **rp;

2570 struct cmsghdr *cm;

2571 int i;

2572 int qfds;

2573

2574 while (m0) {

                 for (m = m0; m; m = m->m_next) {

                         if (m->m_type == MT_CONTROL &&

                             (size_t)m->m_len >= sizeof(*cm)) {

                                 cm = mtod(m, struct cmsghdr *);

                                 if (cm->cmsg_level != SOL_SOCKET ||

2580 cm->cmsg_type != SCM_RIGHTS) {

2581 continue;

2582 }

                                 qfds = (cm->cmsg_len - sizeof(*cm)) /

2584 sizeof(int);

                                 rp = (struct fileglob **)(cm + 1);

                                 for (i = 0; i < qfds; i++) {

2587 (*op)(*rp++, arg);

2588 }

2589 break; /* XXX, but saves time */

2590 }

2591 }

2592 m0 = m0->m_act;

2593 }

2594 }

2595

2596 static void

 unp_mark(struct fileglob *fg, __unused void *arg)

2598 {

2599 uint32_t oflags, nflags;

2600

         os_atomic_rmw_loop(&fg->fg_flag, oflags, nflags, relaxed, {

2602 if (oflags & FMARK) {

2603 os_atomic_rmw_loop_give_up(return );

2604 }

2605 nflags = oflags | FMARK | FDEFER;

2606 });

2607

2608 unp_defer++;

2609 }

2610

2611 static void

 unp_discard(struct fileglob *fg, void *p)

2613 {

2614 if (p == NULL) {

2615 p = current_proc(); /* XXX */

2616 }

         (void) OSAddAtomic(1, &unp_disposed);

         if (fg_removeuipc_mark(fg)) {

                 VERIFY(fg->fg_lflags & FG_RMMSGQ);

2620 fg_removeuipc(fg);

2621 }

         (void) OSAddAtomic(-1, &unp_rights);

2623

         (void) fg_drop(p, fg);

2625 }

2626

2627 int

 unp_lock(struct socket *so, int refcount, void * lr)

2629 {

2630 void * lr_saved;

         if (lr == 0) {

                 lr_saved = (void *)  __builtin_return_address(0);

2633 } else {

2634 lr_saved = lr;

2635 }

2636

2637 if (so->so_pcb) {

                 lck_mtx_lock(&((struct unpcb *)so->so_pcb)->unp_mtx);

2639 } else {

                 panic("unp_lock: so=%p NO PCB! lr=%p ref=0x%x\n",

2641 so, lr_saved, so->so_usecount);

2642 }

2643

         if (so->so_usecount < 0) {

                 panic("unp_lock: so=%p so_pcb=%p lr=%p ref=0x%x\n",

                     so, so->so_pcb, lr_saved, so->so_usecount);

2647 }

2648

2649 if (refcount) {

                 VERIFY(so->so_usecount > 0);

2651 so->so_usecount++;

2652 }

         so->lock_lr[so->next_lock_lr] = lr_saved;

         so->next_lock_lr = (so->next_lock_lr + 1) % SO_LCKDBG_MAX;

2655 return 0;

2656 }

2657

2658 int

 unp_unlock(struct socket *so, int refcount, void * lr)

2660 {

2661 void * lr_saved;

2662 lck_mtx_t * mutex_held = NULL;

         struct unpcb *unp = sotounpcb(so);

2664

         if (lr == 0) {

                 lr_saved = (void *) __builtin_return_address(0);

2667 } else {

2668 lr_saved = lr;

2669 }

2670

2671 if (refcount) {

2672 so->so_usecount--;

2673 }

2674

         if (so->so_usecount < 0) {

                 panic("unp_unlock: so=%p usecount=%x\n", so, so->so_usecount);

2677 }

         if (so->so_pcb == NULL) {

                 panic("unp_unlock: so=%p NO PCB usecount=%x\n", so, so->so_usecount);

2680 } else {

                 mutex_held = &((struct unpcb *)so->so_pcb)->unp_mtx;

2682 }

2683 LCK_MTX_ASSERT(mutex_held, LCK_MTX_ASSERT_OWNED);

         so->unlock_lr[so->next_unlock_lr] = lr_saved;

         so->next_unlock_lr = (so->next_unlock_lr + 1) % SO_LCKDBG_MAX;

2686

         if (so->so_usecount == 0 && (so->so_flags & SOF_PCBCLEARING)) {

                 sofreelastref(so, 1);

2689

2690 if (unp->unp_addr) {

                         FREE(unp->unp_addr, M_SONAME);

2692 }

2693

2694 lck_mtx_unlock(mutex_held);

2695

                 lck_mtx_destroy(&unp->unp_mtx, &unp_mtx_grp);

2697 zfree(unp_zone, unp);

2698

2699 unp_gc();

2700 } else {

2701 lck_mtx_unlock(mutex_held);

2702 }

2703

2704 return 0;

2705 }

2706

2707 lck_mtx_t *

 unp_getlock(struct socket *so, __unused int flags)

2709 {

         struct unpcb *unp = (struct unpcb *)so->so_pcb;

2711

2712

2713 if (so->so_pcb) {

                 if (so->so_usecount < 0) {

                         panic("unp_getlock: so=%p usecount=%x\n", so, so->so_usecount);

2716 }

2717 return &unp->unp_mtx;

2718 } else {

                 panic("unp_getlock: so=%p NULL so_pcb\n", so);

                 return so->so_proto->pr_domain->dom_mtx;

2721 }

2722 }