X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/cf7d32b81c573a0536dc4da4157f9c26f8d0bed3..db6096698656d32db7df630594bd9617ee54f828:/bsd/kern/kern_proc.c?ds=sidebyside diff --git a/bsd/kern/kern_proc.c b/bsd/kern/kern_proc.c index a3c81e332..6d696b424 100644 --- a/bsd/kern/kern_proc.c +++ b/bsd/kern/kern_proc.c @@ -83,7 +83,6 @@ #include #include #include -#include #include #include #include @@ -97,10 +96,14 @@ #include #include #include +#include #include #include #include #include +#include /* vm_map_switch_protect() */ +#include +#include #if CONFIG_MACF #include @@ -155,7 +158,11 @@ lck_attr_t * lctx_lck_attr; static void lctxinit(void); #endif +int cs_debug; /* declared further down in this file */ + +#if DEBUG #define __PROC_INTERNAL_DEBUG 1 +#endif /* Name to give to core files */ __private_extern__ char corefilename[MAXPATHLEN+1] = {"/cores/core.%P"}; @@ -163,14 +170,12 @@ static void orphanpg(struct pgrp *pg); void proc_name_kdp(task_t t, char * buf, int size); char *proc_name_address(void *p); -static proc_t proc_refinternal_locked(proc_t p); static void pgrp_add(struct pgrp * pgrp, proc_t parent, proc_t child); static void pgrp_remove(proc_t p); static void pgrp_replace(proc_t p, struct pgrp *pgrp); static void pgdelete_dropref(struct pgrp *pgrp); -static proc_t proc_find_zombref(int pid); -static void proc_drop_zombref(proc_t p); extern void pg_rele_dropref(struct pgrp * pgrp); +static int csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user_addr_t uaddittoken); struct fixjob_iterargs { struct pgrp * pg; @@ -284,16 +289,22 @@ out: int isinferior(proc_t p, proc_t t) { -int retval = 0; + int retval = 0; + int nchecked = 0; + proc_t start = p; /* if p==t they are not inferior */ if (p == t) return(0); proc_list_lock(); - for (; p != t; p = p->p_pptr) - if (p->p_pid == 0) + for (; p != t; p = p->p_pptr) { + nchecked++; + + /* Detect here if we're in a cycle */ + if ((p->p_pid == 0) || (p->p_pptr == start) || (nchecked >= nprocs)) goto out; + } retval = 1; out: proc_list_unlock(); @@ -334,7 +345,7 @@ proc_findinternal(int pid, int locked) } p = pfind_locked(pid); - if ((p == PROC_NULL) || (p != proc_refinternal_locked(p))) + if ((p == PROC_NULL) || (p != proc_ref_locked(p))) p = PROC_NULL; if (locked == 0) { @@ -344,6 +355,23 @@ proc_findinternal(int pid, int locked) return(p); } +proc_t +proc_findthread(thread_t thread) +{ + proc_t p = PROC_NULL; + struct uthread *uth; + + proc_list_lock(); + uth = get_bsdthread_info(thread); + if (uth && (uth->uu_flag & UT_VFORK)) + p = uth->uu_proc; + else + p = (proc_t)(get_bsdthreadtask_info(thread)); + p = proc_ref_locked(p); + proc_list_unlock(); + return(p); +} + int proc_rele(proc_t p) { @@ -362,15 +390,15 @@ proc_self(void) p = current_proc(); proc_list_lock(); - if (p != proc_refinternal_locked(p)) + if (p != proc_ref_locked(p)) p = PROC_NULL; proc_list_unlock(); return(p); } -static proc_t -proc_refinternal_locked(proc_t p) +proc_t +proc_ref_locked(proc_t p) { proc_t p1 = p; @@ -401,7 +429,7 @@ proc_rele_locked(proc_t p) } -static proc_t +proc_t proc_find_zombref(int pid) { proc_t p1 = PROC_NULL; @@ -429,7 +457,7 @@ proc_find_zombref(int pid) return(p1); } -static void +void proc_drop_zombref(proc_t p) { proc_list_lock(); @@ -548,18 +576,18 @@ proc_childdrainend(proc_t p) } void -proc_checkdeadrefs(proc_t p) +proc_checkdeadrefs(__unused proc_t p) { -//#if __PROC_INTERNAL_DEBUG +#if __PROC_INTERNAL_DEBUG if ((p->p_listflag & P_LIST_INHASH) != 0) - panic("proc being freed and still in hash %x: %x\n", (unsigned int)p, (unsigned int)p->p_listflag); + panic("proc being freed and still in hash %p: %u\n", p, p->p_listflag); if (p->p_childrencnt != 0) - panic("proc being freed and pending children cnt %x:%x\n", (unsigned int)p, (unsigned int)p->p_childrencnt); + panic("proc being freed and pending children cnt %p:%d\n", p, p->p_childrencnt); if (p->p_refcount != 0) - panic("proc being freed and pending refcount %x:%x\n", (unsigned int)p, (unsigned int)p->p_refcount); + panic("proc being freed and pending refcount %p:%d\n", p, p->p_refcount); if (p->p_parentref != 0) - panic("proc being freed and pending parentrefs %x:%x\n", (unsigned int)p, (unsigned int)p->p_parentref); -//#endif + panic("proc being freed and pending parentrefs %p:%d\n", p, p->p_parentref); +#endif } int @@ -597,7 +625,7 @@ proc_parent(proc_t p) proc_list_lock(); loop: pp = p->p_pptr; - parent = proc_refinternal_locked(pp); + parent = proc_ref_locked(pp); if ((parent == PROC_NULL) && (pp != PROC_NULL) && (pp->p_stat != SZOMB) && ((pp->p_listflag & P_LIST_EXITED) != 0) && ((pp->p_listflag & P_LIST_CHILDDRAINED)== 0)){ pp->p_listflag |= P_LIST_CHILDLKWAIT; msleep(&pp->p_childrencnt, proc_list_mlock, 0, "proc_parent", 0); @@ -724,6 +752,12 @@ proc_suser(proc_t p) return(error); } +task_t +proc_task(proc_t proc) +{ + return (task_t)proc->task; +} + /* * Obtain the first thread in a process * @@ -749,24 +783,66 @@ proc_ucred(proc_t p) return(p->p_ucred); } +struct uthread * +current_uthread() +{ + thread_t th = current_thread(); + + return((struct uthread *)get_bsdthread_info(th)); +} + + int proc_is64bit(proc_t p) { return(IS_64BIT_PROCESS(p)); } +int +proc_pidversion(proc_t p) +{ + return(p->p_idversion); +} + +uint64_t +proc_uniqueid(proc_t p) +{ + return(p->p_uniqueid); +} + +uint64_t +proc_selfuniqueid(void) +{ + proc_t p = current_proc(); + return(p->p_uniqueid); +} + +int +proc_getcdhash(proc_t p, unsigned char *cdhash) +{ + return vn_getcdhash(p->p_textvp, p->p_textoff, cdhash); +} + +void +proc_getexecutableuuid(proc_t p, unsigned char *uuidbuf, unsigned long size) +{ + if (size >= sizeof(p->p_uuid)) { + memcpy(uuidbuf, p->p_uuid, sizeof(p->p_uuid)); + } +} + + void bsd_set_dependency_capable(task_t task) { proc_t p = get_bsdtask_info(task); if (p) { - OSBitOrAtomic(P_DEPENDENCY_CAPABLE, (UInt32 *)&p->p_flag); + OSBitOrAtomic(P_DEPENDENCY_CAPABLE, &p->p_flag); } } -/* LP64todo - figure out how to identify 64-bit processes if NULL procp */ int IS_64BIT_PROCESS(proc_t p) { @@ -783,7 +859,7 @@ proc_t pfind_locked(pid_t pid) { proc_t p; -#ifdef DEBUG +#if DEBUG proc_t q; #endif @@ -792,10 +868,10 @@ pfind_locked(pid_t pid) for (p = PIDHASH(pid)->lh_first; p != 0; p = p->p_hash.le_next) { if (p->p_pid == pid) { -#ifdef DEBUG +#if DEBUG for (q = p->p_hash.le_next; q != 0; q = q->p_hash.le_next) { if ((p !=q) && (q->p_pid == pid)) - panic("two procs with same pid %x:%x:%d:%d\n", (unsigned int)p, (unsigned int)q, p->p_pid, q->p_pid); + panic("two procs with same pid %p:%p:%d:%d\n", p, q, p->p_pid, q->p_pid); } #endif return (p); @@ -994,14 +1070,18 @@ enterpgrp(proc_t p, pid_t pgid, int mksess) sess->s_sid = p->p_pid; sess->s_count = 1; sess->s_ttyvp = NULL; - sess->s_ttyp = NULL; + sess->s_ttyp = TTY_NULL; sess->s_flags = 0; sess->s_listflags = 0; sess->s_ttypgrpid = NO_PID; +#if CONFIG_FINE_LOCK_GROUPS + lck_mtx_init(&sess->s_mlock, proc_mlock_grp, proc_lck_attr); +#else lck_mtx_init(&sess->s_mlock, proc_lck_grp, proc_lck_attr); +#endif bcopy(procsp->s_login, sess->s_login, sizeof(sess->s_login)); - OSBitAndAtomic(~((uint32_t)P_CONTROLT), (UInt32 *)&p->p_flag); + OSBitAndAtomic(~((uint32_t)P_CONTROLT), &p->p_flag); proc_list_lock(); LIST_INSERT_HEAD(SESSHASH(sess->s_sid), sess, s_hash); proc_list_unlock(); @@ -1020,7 +1100,11 @@ enterpgrp(proc_t p, pid_t pgid, int mksess) proc_list_unlock(); } pgrp->pg_id = pgid; +#if CONFIG_FINE_LOCK_GROUPS + lck_mtx_init(&pgrp->pg_mlock, proc_mlock_grp, proc_lck_attr); +#else lck_mtx_init(&pgrp->pg_mlock, proc_lck_grp, proc_lck_attr); +#endif LIST_INIT(&pgrp->pg_members); pgrp->pg_membercnt = 0; pgrp->pg_jobc = 0; @@ -1073,8 +1157,7 @@ leavepgrp(proc_t p) static void pgdelete_dropref(struct pgrp *pgrp) { - struct tty * ttyp; - boolean_t fstate; + struct tty *ttyp; int emptypgrp = 1; struct session *sessp; @@ -1104,14 +1187,18 @@ pgdelete_dropref(struct pgrp *pgrp) proc_list_unlock(); - fstate = thread_funnel_set(kernel_flock, TRUE); - - ttyp = pgrp->pg_session->s_ttyp; - if ((ttyp != NULL) && (pgrp->pg_session->s_ttyp->t_pgrp == pgrp)) { - pgrp->pg_session->s_ttyp->t_pgrp = NULL; - pgrp->pg_session->s_ttypgrpid = NO_PID; + ttyp = SESSION_TP(pgrp->pg_session); + if (ttyp != TTY_NULL) { + if (ttyp->t_pgrp == pgrp) { + tty_lock(ttyp); + /* Re-check after acquiring the lock */ + if (ttyp->t_pgrp == pgrp) { + ttyp->t_pgrp = NULL; + pgrp->pg_session->s_ttypgrpid = NO_PID; + } + tty_unlock(ttyp); + } } - (void) thread_funnel_set(kernel_flock, fstate); proc_list_lock(); @@ -1122,23 +1209,33 @@ pgdelete_dropref(struct pgrp *pgrp) if ((sessp->s_listflags & (S_LIST_TERM | S_LIST_DEAD)) != 0) panic("pg_deleteref: terminating already terminated session"); sessp->s_listflags |= S_LIST_TERM; - ttyp = sessp->s_ttyp; + ttyp = SESSION_TP(sessp); LIST_REMOVE(sessp, s_hash); proc_list_unlock(); - fstate = thread_funnel_set(kernel_flock, TRUE); - if (ttyp != NULL && ttyp->t_session == sessp) - ttyp->t_session = NULL; - (void) thread_funnel_set(kernel_flock, fstate); + if (ttyp != TTY_NULL) { + tty_lock(ttyp); + if (ttyp->t_session == sessp) + ttyp->t_session = NULL; + tty_unlock(ttyp); + } proc_list_lock(); sessp->s_listflags |= S_LIST_DEAD; if (sessp->s_count != 0) panic("pg_deleteref: freeing session in use"); proc_list_unlock(); +#if CONFIG_FINE_LOCK_GROUPS + lck_mtx_destroy(&sessp->s_mlock, proc_mlock_grp); +#else lck_mtx_destroy(&sessp->s_mlock, proc_lck_grp); +#endif FREE_ZONE(sessp, sizeof(struct session), M_SESSION); } else proc_list_unlock(); +#if CONFIG_FINE_LOCK_GROUPS + lck_mtx_destroy(&pgrp->pg_mlock, proc_mlock_grp); +#else lck_mtx_destroy(&pgrp->pg_mlock, proc_lck_grp); +#endif FREE_ZONE(pgrp, sizeof(*pgrp), M_PGRP); } @@ -1389,8 +1486,8 @@ proc_core_name(const char *name, uid_t uid, pid_t pid, char *cf_name, goto toolong; return (0); toolong: - log(LOG_ERR, "pid %ld (%s), uid (%lu): corename is too long\n", - (long)pid, name, (u_long)uid); + log(LOG_ERR, "pid %ld (%s), uid (%u): corename is too long\n", + (long)pid, name, (uint32_t)uid); return (1); } @@ -1598,26 +1695,47 @@ out: SYSCTL_NODE(_kern, KERN_LCTX, lctx, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "Login Context"); -SYSCTL_PROC(_kern_lctx, KERN_LCTX_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT, +SYSCTL_PROC(_kern_lctx, KERN_LCTX_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT | CTLFLAG_LOCKED, 0, 0, sysctl_kern_lctx, "S,lctx", "Return entire login context table"); -SYSCTL_NODE(_kern_lctx, KERN_LCTX_LCID, lcid, CTLFLAG_RD, +SYSCTL_NODE(_kern_lctx, KERN_LCTX_LCID, lcid, CTLFLAG_RD | CTLFLAG_LOCKED, sysctl_kern_lctx, "Login Context Table"); -SYSCTL_INT(_kern_lctx, OID_AUTO, last, CTLFLAG_RD, &lastlcid, 0, ""); -SYSCTL_INT(_kern_lctx, OID_AUTO, count, CTLFLAG_RD, &alllctx_cnt, 0, ""); -SYSCTL_INT(_kern_lctx, OID_AUTO, max, CTLFLAG_RW, &maxlcid, 0, ""); +SYSCTL_INT(_kern_lctx, OID_AUTO, last, CTLFLAG_RD | CTLFLAG_LOCKED, &lastlcid, 0, ""); +SYSCTL_INT(_kern_lctx, OID_AUTO, count, CTLFLAG_RD | CTLFLAG_LOCKED, &alllctx_cnt, 0, ""); +SYSCTL_INT(_kern_lctx, OID_AUTO, max, CTLFLAG_RW | CTLFLAG_LOCKED, &maxlcid, 0, ""); #endif /* LCTX */ /* Code Signing related routines */ int -csops(__unused proc_t p, struct csops_args *uap, __unused register_t *retval) +csops(__unused proc_t p, struct csops_args *uap, __unused int32_t *retval) +{ + return(csops_internal(uap->pid, uap->ops, uap->useraddr, + uap->usersize, USER_ADDR_NULL)); +} + +int +csops_audittoken(__unused proc_t p, struct csops_audittoken_args *uap, __unused int32_t *retval) { - int ops = uap->ops; - pid_t pid = uap->pid; - user_addr_t uaddr = uap->useraddr; - size_t usize = (size_t)CAST_DOWN(size_t, uap->usersize); + if (uap->uaudittoken == USER_ADDR_NULL) + return(EINVAL); + switch (uap->ops) { + case CS_OPS_PIDPATH: + case CS_OPS_ENTITLEMENTS_BLOB: + break; + default: + return(EINVAL); + }; + + return(csops_internal(uap->pid, uap->ops, uap->useraddr, + uap->usersize, uap->uaudittoken)); +} + +static int +csops_internal(pid_t pid, int ops, user_addr_t uaddr, user_size_t usersize, user_addr_t uaudittoken) +{ + size_t usize = (size_t)CAST_DOWN(size_t, usersize); proc_t pt; uint32_t retflags; int vid, forself; @@ -1626,6 +1744,8 @@ csops(__unused proc_t p, struct csops_args *uap, __unused register_t *retval) off_t toff; char * buf; unsigned char cdhash[SHA1_RESULTLEN]; + audit_token_t token; + unsigned int upid=0, uidversion = 0; forself = error = 0; @@ -1642,15 +1762,37 @@ csops(__unused proc_t p, struct csops_args *uap, __unused register_t *retval) return(EOVERFLOW); if (kauth_cred_issuser(kauth_cred_get()) != TRUE) return(EPERM); - } else if ((forself == 0) && ((ops != CS_OPS_STATUS) && (ops != CS_OPS_CDHASH) && (kauth_cred_issuser(kauth_cred_get()) != TRUE))) { - return(EPERM); + } else { + switch (ops) { + case CS_OPS_STATUS: + case CS_OPS_CDHASH: + case CS_OPS_PIDOFFSET: + case CS_OPS_ENTITLEMENTS_BLOB: + break; /* unrestricted */ + default: + if (forself == 0 && kauth_cred_issuser(kauth_cred_get()) != TRUE) + return(EPERM); + break; + } } pt = proc_find(pid); if (pt == PROC_NULL) return(ESRCH); - + upid = pt->p_pid; + uidversion = pt->p_idversion; + if (uaudittoken != USER_ADDR_NULL) { + + error = copyin(uaudittoken, &token, sizeof(audit_token_t)); + if (error != 0) + goto out; + /* verify the audit token pid/idversion matches with proc */ + if ((token.val[5] != upid) || (token.val[7] != uidversion)) { + error = ESRCH; + goto out; + } + } switch (ops) { @@ -1665,7 +1807,13 @@ csops(__unused proc_t p, struct csops_args *uap, __unused register_t *retval) if ((pt->p_csflags & CS_VALID) == CS_VALID) { /* is currently valid */ pt->p_csflags &= ~CS_VALID; /* set invalid */ if ((pt->p_csflags & CS_KILL) == CS_KILL) { + pt->p_csflags |= CS_KILLED; proc_unlock(pt); + if (cs_debug) { + printf("CODE SIGNING: marked invalid by pid %d: " + "p=%d[%s] honoring CS_KILL, final status 0x%x\n", + proc_selfpid(), pt->p_pid, pt->p_comm, pt->p_csflags); + } psignal(pt, SIGKILL); } else proc_unlock(pt); @@ -1700,12 +1848,16 @@ csops(__unused proc_t p, struct csops_args *uap, __unused register_t *retval) tvp = pt->p_textvp; vid = vnode_vid(tvp); - proc_rele(pt); + if (tvp == NULLVP) { + proc_rele(pt); + return(EINVAL); + } buf = (char *)kalloc(usize); - if (buf == NULL) + if (buf == NULL) { + proc_rele(pt); return(ENOMEM); - + } bzero(buf, usize); error = vnode_getwithvid(tvp, vid); @@ -1719,18 +1871,28 @@ csops(__unused proc_t p, struct csops_args *uap, __unused register_t *retval) } kfree(buf, usize); } + + proc_rele(pt); + + return(error); + + case CS_OPS_PIDOFFSET: + toff = pt->p_textoff; + proc_rele(pt); + error = copyout(&toff, uaddr, sizeof(toff)); return(error); case CS_OPS_CDHASH: - if (usize != SHA1_RESULTLEN) { - proc_rele(pt); - return EINVAL; - } /* pt already holds a reference on its p_textvp */ tvp = pt->p_textvp; toff = pt->p_textoff; + if (tvp == NULLVP || usize != SHA1_RESULTLEN) { + proc_rele(pt); + return EINVAL; + } + error = vn_getcdhash(tvp, toff, cdhash); proc_rele(pt); @@ -1739,7 +1901,47 @@ csops(__unused proc_t p, struct csops_args *uap, __unused register_t *retval) } return error; - + + case CS_OPS_ENTITLEMENTS_BLOB: { + char fakeheader[8] = { 0 }; + void *start; + size_t length; + + if ((pt->p_csflags & CS_VALID) == 0) { + error = EINVAL; + break; + } + if (usize < sizeof(fakeheader)) { + error = ERANGE; + break; + } + if (0 != (error = cs_entitlements_blob_get(pt, + &start, &length))) + break; + /* if no entitlement, fill in zero header */ + if (NULL == start) { + start = fakeheader; + length = sizeof(fakeheader); + } else if (usize < length) { + /* ... if input too short, copy out length of entitlement */ + uint32_t length32 = htonl((uint32_t)length); + memcpy(&fakeheader[4], &length32, sizeof(length32)); + + error = copyout(fakeheader, uaddr, sizeof(fakeheader)); + if (error == 0) + error = ERANGE; /* input buffer to short, ERANGE signals that */ + break; + } + error = copyout(start, uaddr, length); + break; + } + + case CS_OPS_MARKRESTRICT: + proc_lock(pt); + pt->p_csflags |= CS_RESTRICT; + proc_unlock(pt); + break; + default: error = EINVAL; break; @@ -1749,7 +1951,6 @@ out: return(error); } - int proc_iterate(flags, callout, arg, filterfn, filterarg) int flags; @@ -1902,14 +2103,17 @@ proc_rebootscan(callout, arg, filterfn, filterarg) proc_t p; int lockheld = 0, retval; + proc_shutdown_exitcount = 0; + ps_allprocscan: proc_list_lock(); + lockheld = 1; for (p = allproc.lh_first; (p != 0); p = p->p_list.le_next) { if ( (filterfn == 0 ) || (filterfn(p, filterarg) != 0)) { - p = proc_refinternal_locked(p); + p = proc_ref_locked(p); proc_list_unlock(); lockheld = 0; @@ -2170,7 +2374,7 @@ pgrp_remove(struct proc * p) pg->pg_membercnt--; if (pg->pg_membercnt < 0) - panic("pgprp: -ve membercnt pgprp:%x p:%x\n",(unsigned int)pg, (unsigned int)p); + panic("pgprp: -ve membercnt pgprp:%p p:%p\n",pg, p); LIST_REMOVE(p, p_pglist); if (pg->pg_members.lh_first == 0) { @@ -2217,7 +2421,7 @@ pgrp_replace(struct proc * p, struct pgrp * newpg) pgrp_lock(oldpg); oldpg->pg_membercnt--; if (oldpg->pg_membercnt < 0) - panic("pgprp: -ve membercnt pgprp:%x p:%x\n",(unsigned int)oldpg, (unsigned int)p); + panic("pgprp: -ve membercnt pgprp:%p p:%p\n",oldpg, p); LIST_REMOVE(p, p_pglist); if (oldpg->pg_members.lh_first == 0) { pgrp_unlock(oldpg); @@ -2307,11 +2511,12 @@ proc_pgrp(proc_t p) assert(pgrp != NULL); - if ((pgrp->pg_listflags & (PGRP_FLAG_TERMINATE | PGRP_FLAG_DEAD)) != 0) - panic("proc_pgrp: ref being povided for dead pgrp"); - - if (pgrp != PGRP_NULL) + if (pgrp != PGRP_NULL) { pgrp->pg_refcount++; + if ((pgrp->pg_listflags & (PGRP_FLAG_TERMINATE | PGRP_FLAG_DEAD)) != 0) + panic("proc_pgrp: ref being povided for dead pgrp"); + } + proc_list_unlock(); return(pgrp); @@ -2373,18 +2578,27 @@ session_rele(struct session *sess) if (sess->s_count != 0) panic("session_rele: freeing session in use"); proc_list_unlock(); +#if CONFIG_FINE_LOCK_GROUPS + lck_mtx_destroy(&sess->s_mlock, proc_mlock_grp); +#else lck_mtx_destroy(&sess->s_mlock, proc_lck_grp); +#endif FREE_ZONE(sess, sizeof(struct session), M_SESSION); } else proc_list_unlock(); } -void +int proc_transstart(proc_t p, int locked) { if (locked == 0) proc_lock(p); while ((p->p_lflag & P_LINTRANSIT) == P_LINTRANSIT) { + if ((p->p_lflag & P_LTRANSCOMMIT) == P_LTRANSCOMMIT) { + if (locked == 0) + proc_unlock(p); + return EDEADLK; + } p->p_lflag |= P_LTRANSWAIT; msleep(&p->p_lflag, &p->p_mlock, 0, "proc_signstart", NULL); } @@ -2392,37 +2606,61 @@ proc_transstart(proc_t p, int locked) p->p_transholder = current_thread(); if (locked == 0) proc_unlock(p); - + return 0; } +void +proc_transcommit(proc_t p, int locked) +{ + if (locked == 0) + proc_lock(p); + + assert ((p->p_lflag & P_LINTRANSIT) == P_LINTRANSIT); + assert (p->p_transholder == current_thread()); + p->p_lflag |= P_LTRANSCOMMIT; + + if ((p->p_lflag & P_LTRANSWAIT) == P_LTRANSWAIT) { + p->p_lflag &= ~P_LTRANSWAIT; + wakeup(&p->p_lflag); + } + if (locked == 0) + proc_unlock(p); +} void proc_transend(proc_t p, int locked) { if (locked == 0) proc_lock(p); - p->p_lflag &= ~P_LINTRANSIT; + + p->p_lflag &= ~( P_LINTRANSIT | P_LTRANSCOMMIT); + p->p_transholder = NULL; if ((p->p_lflag & P_LTRANSWAIT) == P_LTRANSWAIT) { p->p_lflag &= ~P_LTRANSWAIT; wakeup(&p->p_lflag); } - p->p_transholder = NULL; if (locked == 0) proc_unlock(p); } -void +int proc_transwait(proc_t p, int locked) { if (locked == 0) proc_lock(p); while ((p->p_lflag & P_LINTRANSIT) == P_LINTRANSIT) { + if ((p->p_lflag & P_LTRANSCOMMIT) == P_LTRANSCOMMIT && current_proc() == p) { + if (locked == 0) + proc_unlock(p); + return EDEADLK; + } p->p_lflag |= P_LTRANSWAIT; msleep(&p->p_lflag, &p->p_mlock, 0, "proc_signstart", NULL); } if (locked == 0) proc_unlock(p); + return 0; } void @@ -2445,18 +2683,63 @@ proc_knote(struct proc * p, long hint) proc_klist_unlock(); } +void +proc_knote_drain(struct proc *p) +{ + struct knote *kn = NULL; + + /* + * Clear the proc's klist to avoid references after the proc is reaped. + */ + proc_klist_lock(); + while ((kn = SLIST_FIRST(&p->p_klist))) { + kn->kn_ptr.p_proc = PROC_NULL; + KNOTE_DETACH(&p->p_klist, kn); + } + proc_klist_unlock(); +} unsigned long cs_procs_killed = 0; unsigned long cs_procs_invalidated = 0; int cs_force_kill = 0; int cs_force_hard = 0; int cs_debug = 0; -SYSCTL_INT(_vm, OID_AUTO, cs_force_kill, CTLFLAG_RW, &cs_force_kill, 0, ""); -SYSCTL_INT(_vm, OID_AUTO, cs_force_hard, CTLFLAG_RW, &cs_force_hard, 0, ""); -SYSCTL_INT(_vm, OID_AUTO, cs_debug, CTLFLAG_RW, &cs_debug, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, cs_force_kill, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_force_kill, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, cs_force_hard, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_force_hard, 0, ""); +SYSCTL_INT(_vm, OID_AUTO, cs_debug, CTLFLAG_RW | CTLFLAG_LOCKED, &cs_debug, 0, ""); + +int +cs_allow_invalid(struct proc *p) +{ +#if MACH_ASSERT + lck_mtx_assert(&p->p_mlock, LCK_MTX_ASSERT_NOTOWNED); +#endif +#if CONFIG_MACF && CONFIG_ENFORCE_SIGNED_CODE + /* There needs to be a MAC policy to implement this hook, or else the + * kill bits will be cleared here every time. If we have + * CONFIG_ENFORCE_SIGNED_CODE, we can assume there is a policy + * implementing the hook. + */ + if( 0 != mac_proc_check_run_cs_invalid(p)) { + if(cs_debug) printf("CODE SIGNING: cs_allow_invalid() " + "not allowed: pid %d\n", + p->p_pid); + return 0; + } + if(cs_debug) printf("CODE SIGNING: cs_allow_invalid() " + "allowed: pid %d\n", + p->p_pid); + proc_lock(p); + p->p_csflags &= ~(CS_KILL | CS_HARD | CS_VALID); + proc_unlock(p); + vm_map_switch_protect(get_task_map(p->task), FALSE); +#endif + return (p->p_csflags & (CS_KILL | CS_HARD)) == 0; +} int -cs_invalid_page(void) +cs_invalid_page( + addr64_t vaddr) { struct proc *p; int retval; @@ -2475,51 +2758,289 @@ cs_invalid_page(void) if (cs_force_hard) p->p_csflags |= CS_HARD; - if (p->p_csflags & CS_VALID) { - p->p_csflags &= ~CS_VALID; - + /* CS_KILL triggers us to send a kill signal. Nothing else. */ + if (p->p_csflags & CS_KILL) { + p->p_csflags |= CS_KILLED; proc_unlock(p); - cs_procs_invalidated++; - printf("CODE SIGNING: cs_invalid_page: " - "p=%d[%s] clearing CS_VALID\n", - p->p_pid, p->p_comm); + if (cs_debug) { + printf("CODE SIGNING: cs_invalid_page(0x%llx): " + "p=%d[%s] honoring CS_KILL, final status 0x%x\n", + vaddr, p->p_pid, p->p_comm, p->p_csflags); + } + cs_procs_killed++; + psignal(p, SIGKILL); proc_lock(p); - - - if (p->p_csflags & CS_KILL) { - proc_unlock(p); - if (cs_debug) { - printf("CODE SIGNING: cs_invalid_page: " - "p=%d[%s] honoring CS_KILL\n", - p->p_pid, p->p_comm); - } - cs_procs_killed++; - psignal(p, SIGKILL); - proc_lock(p); + } + + /* CS_HARD means fail the mapping operation so the process stays valid. */ + if (p->p_csflags & CS_HARD) { + proc_unlock(p); + if (cs_debug) { + printf("CODE SIGNING: cs_invalid_page(0x%llx): " + "p=%d[%s] honoring CS_HARD\n", + vaddr, p->p_pid, p->p_comm); } - - if (p->p_csflags & CS_HARD) { + retval = 1; + } else { + if (p->p_csflags & CS_VALID) { + p->p_csflags &= ~CS_VALID; + proc_unlock(p); - if (cs_debug) { - printf("CODE SIGNING: cs_invalid_page: " - "p=%d[%s] honoring CS_HARD\n", - p->p_pid, p->p_comm); - } - retval = 1; + cs_procs_invalidated++; + printf("CODE SIGNING: cs_invalid_page(0x%llx): " + "p=%d[%s] clearing CS_VALID\n", + vaddr, p->p_pid, p->p_comm); } else { proc_unlock(p); - retval = 0; - } - } else { - proc_unlock(p); - if (cs_debug) { - printf("CODE SIGNING: cs_invalid_page: " - "p=%d[%s] ignored...\n", - p->p_pid, p->p_comm); } + retval = 0; } return retval; } +void +proc_setregister(proc_t p) +{ + proc_lock(p); + p->p_lflag |= P_LREGISTER; + proc_unlock(p); +} + +void +proc_resetregister(proc_t p) +{ + proc_lock(p); + p->p_lflag &= ~P_LREGISTER; + proc_unlock(p); +} + +pid_t +proc_pgrpid(proc_t p) +{ + return p->p_pgrpid; +} + +pid_t +proc_selfpgrpid() +{ + return current_proc()->p_pgrpid; +} + + +/* return control and action states */ +int +proc_getpcontrol(int pid, int * pcontrolp) +{ + proc_t p; + + p = proc_find(pid); + if (p == PROC_NULL) + return(ESRCH); + if (pcontrolp != NULL) + *pcontrolp = p->p_pcaction; + + proc_rele(p); + return(0); +} + +int +proc_dopcontrol(proc_t p, void *num_found) +{ + int pcontrol; + + proc_lock(p); + + pcontrol = PROC_CONTROL_STATE(p); + + if (PROC_ACTION_STATE(p) ==0) { + switch(pcontrol) { + case P_PCTHROTTLE: + PROC_SETACTION_STATE(p); + proc_unlock(p); + printf("low swap: throttling pid %d (%s)\n", p->p_pid, p->p_comm); + (*(int *)num_found)++; + break; + + case P_PCSUSP: + PROC_SETACTION_STATE(p); + proc_unlock(p); + printf("low swap: suspending pid %d (%s)\n", p->p_pid, p->p_comm); + task_suspend(p->task); + (*(int *)num_found)++; + break; + + case P_PCKILL: + PROC_SETACTION_STATE(p); + proc_unlock(p); + printf("low swap: killing pid %d (%s)\n", p->p_pid, p->p_comm); + psignal(p, SIGKILL); + (*(int *)num_found)++; + break; + + default: + proc_unlock(p); + } + + } else + proc_unlock(p); + + return(PROC_RETURNED); +} + + +/* + * Resume a throttled or suspended process. This is an internal interface that's only + * used by the user level code that presents the GUI when we run out of swap space and + * hence is restricted to processes with superuser privileges. + */ + +int +proc_resetpcontrol(int pid) +{ + proc_t p; + int pcontrol; + int error; + proc_t self = current_proc(); + + /* if the process has been validated to handle resource control or root is valid one */ + if (((self->p_lflag & P_LVMRSRCOWNER) == 0) && (error = suser(kauth_cred_get(), 0))) + return error; + + p = proc_find(pid); + if (p == PROC_NULL) + return(ESRCH); + + proc_lock(p); + + pcontrol = PROC_CONTROL_STATE(p); + + if(PROC_ACTION_STATE(p) !=0) { + switch(pcontrol) { + case P_PCTHROTTLE: + PROC_RESETACTION_STATE(p); + proc_unlock(p); + printf("low swap: unthrottling pid %d (%s)\n", p->p_pid, p->p_comm); + break; + + case P_PCSUSP: + PROC_RESETACTION_STATE(p); + proc_unlock(p); + printf("low swap: resuming pid %d (%s)\n", p->p_pid, p->p_comm); + task_resume(p->task); + break; + + case P_PCKILL: + /* Huh? */ + PROC_SETACTION_STATE(p); + proc_unlock(p); + printf("low swap: attempt to unkill pid %d (%s) ignored\n", p->p_pid, p->p_comm); + break; + + default: + proc_unlock(p); + } + + } else + proc_unlock(p); + + proc_rele(p); + return(0); +} + + +/* + * Return true if the specified process has an action state specified for it and it isn't + * already in an action state and it's using more physical memory than the specified threshold. + * Note: the memory_threshold argument is specified in bytes and is of type uint64_t. + */ + +static int +proc_pcontrol_filter(proc_t p, void *memory_thresholdp) +{ + + return PROC_CONTROL_STATE(p) && /* if there's an action state specified... */ + (PROC_ACTION_STATE(p) == 0) && /* and we're not in the action state yet... */ + (get_task_resident_size(p->task) > *((uint64_t *)memory_thresholdp)); /* and this proc is over the mem threshold, */ + /* then return true to take action on this proc */ +} + + + +/* + * Deal with the out of swap space condition. This routine gets called when + * we want to swap something out but there's no more space left. Since this + * creates a memory deadlock situtation, we need to take action to free up + * some memory resources in order to prevent the system from hanging completely. + * The action we take is based on what the system processes running at user level + * have specified. Processes are marked in one of four categories: ones that + * can be killed immediately, ones that should be suspended, ones that should + * be throttled, and all the rest which are basically none of the above. Which + * processes are marked as being in which category is a user level policy decision; + * we just take action based on those decisions here. + */ + +#define STARTING_PERCENTAGE 50 /* memory threshold expressed as a percentage */ + /* of physical memory */ + +struct timeval last_no_space_action = {0, 0}; + +void +no_paging_space_action(void) +{ + + uint64_t memory_threshold; + int num_found; + struct timeval now; + + /* + * Throttle how often we come through here. Once every 20 seconds should be plenty. + */ + + microtime(&now); + + if (now.tv_sec <= last_no_space_action.tv_sec + 20) + return; + + last_no_space_action = now; + + /* + * Examine all processes and find those that have been marked to have some action + * taken when swap space runs out. Of those processes, select one or more and + * apply the specified action to them. The idea is to only take action against + * a few processes rather than hitting too many at once. If the low swap condition + * persists, this routine will get called again and we'll take action against more + * processes. + * + * Of the processes that have been marked, we choose which ones to take action + * against according to how much physical memory they're presently using. We + * start with the STARTING_THRESHOLD and any processes using more physical memory + * than the percentage threshold will have action taken against it. If there + * are no processes over the threshold, then the threshold is cut in half and we + * look again for processes using more than this threshold. We continue in + * this fashion until we find at least one process to take action against. This + * iterative approach is less than ideally efficient, however we only get here + * when the system is almost in a memory deadlock and is pretty much just + * thrashing if it's doing anything at all. Therefore, the cpu overhead of + * potentially multiple passes here probably isn't revelant. + */ + + memory_threshold = (sane_size * STARTING_PERCENTAGE) / 100; /* resident threshold in bytes */ + + for (num_found = 0; num_found == 0; memory_threshold = memory_threshold / 2) { + proc_iterate(PROC_ALLPROCLIST, proc_dopcontrol, (void *)&num_found, proc_pcontrol_filter, (void *)&memory_threshold); + + /* + * If we just looked with memory_threshold == 0, then there's no need to iterate any further since + * we won't find any eligible processes at this point. + */ + + if (memory_threshold == 0) { + if (num_found == 0) /* log that we couldn't do anything in this case */ + printf("low swap: unable to find any eligible processes to take action on\n"); + + break; + } + } +}