X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/1c79356b52d46aa6b508fb032f5ae709b1f2897b..55e303ae13a4cf49d70f2294092726f2fffb9ef2:/bsd/kern/sysv_sem.c diff --git a/bsd/kern/sysv_sem.c b/bsd/kern/sysv_sem.c index b49976487..8c4c3d47c 100644 --- a/bsd/kern/sysv_sem.c +++ b/bsd/kern/sysv_sem.c @@ -1,21 +1,24 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * * @APPLE_LICENSE_HEADER_END@ */ @@ -26,92 +29,160 @@ * * This software is provided ``AS IS'' without any warranties of any kind. */ +/* + * John Bellardo modified the implementation for Darwin. 12/2000 + */ #include #include -#include #include #include #include -#include +#include +#include + +#include +#include +#include +#include + +/*#include */ +/*#include */ + +/* Uncomment this line to see the debugging output */ +/* #define SEM_DEBUG */ + +/* Macros to deal with the semaphore subsystem lock. The lock currently uses + * the semlock_holder static variable as a mutex. NULL means no lock, any + * value other than NULL means locked. semlock_holder is used because it was + * present in the code before the Darwin port, and for no other reason. + * When the time comes to relax the funnel requirements of the kernel only + * these macros should need to be changed. A spin lock would work well. + */ +/* Aquire the lock */ +#define SUBSYSTEM_LOCK_AQUIRE(p) { sysv_sem_aquiring_threads++; \ + while (semlock_holder != NULL) \ + (void) tsleep((caddr_t)&semlock_holder, (PZERO - 4), "sysvsem", 0); \ + semlock_holder = p; \ + sysv_sem_aquiring_threads--; } + +/* Release the lock */ +#define SUBSYSTEM_LOCK_RELEASE { semlock_holder = NULL; wakeup((caddr_t)&semlock_holder); } +/* Release the lock and return a value */ +#define UNLOCK_AND_RETURN(ret) { SUBSYSTEM_LOCK_RELEASE; return(ret); } + +#define M_SYSVSEM M_SUBPROC + +#if 0 static void seminit __P((void *)); SYSINIT(sysv_sem, SI_SUB_SYSV_SEM, SI_ORDER_FIRST, seminit, NULL) +#endif 0 -#ifndef _SYS_SYSPROTO_H_ -struct __semctl_args; -int __semctl __P((struct proc *p, struct __semctl_args *uap)); +/* Hard system limits to avoid resource starvation / DOS attacks. + * These are not needed if we can make the semaphore pages swappable. + */ +static struct seminfo limitseminfo = { + SEMMAP, /* # of entries in semaphore map */ + SEMMNI, /* # of semaphore identifiers */ + SEMMNS, /* # of semaphores in system */ + SEMMNU, /* # of undo structures in system */ + SEMMSL, /* max # of semaphores per id */ + SEMOPM, /* max # of operations per semop call */ + SEMUME, /* max # of undo entries per process */ + SEMUSZ, /* size in bytes of undo structure */ + SEMVMX, /* semaphore maximum value */ + SEMAEM /* adjust on exit max value */ +}; + +/* Current system allocations. We use this structure to track how many + * resources we have allocated so far. This way we can set large hard limits + * and not allocate the memory for them up front. + */ +struct seminfo seminfo = { + SEMMAP, /* Unused, # of entries in semaphore map */ + 0, /* # of semaphore identifiers */ + 0, /* # of semaphores in system */ + 0, /* # of undo entries in system */ + SEMMSL, /* max # of semaphores per id */ + SEMOPM, /* max # of operations per semop call */ + SEMUME, /* max # of undo entries per process */ + SEMUSZ, /* size in bytes of undo structure */ + SEMVMX, /* semaphore maximum value */ + SEMAEM /* adjust on exit max value */ +}; + +/* A counter so the module unload code knows when there are no more processes using + * the sysv_sem code */ +static long sysv_sem_sleeping_threads = 0; +static long sysv_sem_aquiring_threads = 0; + +struct semctl_args; +int semctl __P((struct proc *p, struct semctl_args *uap, int *)); struct semget_args; -int semget __P((struct proc *p, struct semget_args *uap)); +int semget __P((struct proc *p, struct semget_args *uap, int *)); struct semop_args; -int semop __P((struct proc *p, struct semop_args *uap)); +int semop __P((struct proc *p, struct semop_args *uap, int *)); struct semconfig_args; -int semconfig __P((struct proc *p, struct semconfig_args *uap)); -#endif +int semconfig __P((struct proc *p, struct semconfig_args *uap, int *)); + static struct sem_undo *semu_alloc __P((struct proc *p)); static int semundo_adjust __P((struct proc *p, struct sem_undo **supptr, int semid, int semnum, int adjval)); static void semundo_clear __P((int semid, int semnum)); +typedef int sy_call_t __P((struct proc *, void *, int *)); + /* XXX casting to (sy_call_t *) is bogus, as usual. */ static sy_call_t *semcalls[] = { - (sy_call_t *)__semctl, (sy_call_t *)semget, + (sy_call_t *)semctl, (sy_call_t *)semget, (sy_call_t *)semop, (sy_call_t *)semconfig }; -static int semtot = 0; -struct semid_ds *sema; /* semaphore id pool */ -struct sem *sem; /* semaphore pool */ -static struct sem_undo *semu_list; /* list of active undo structures */ -int *semu; /* undo structure pool */ +static int semtot = 0; /* # of used semaphores */ +struct semid_ds *sema = NULL; /* semaphore id pool */ +struct sem *sem = NULL; /* semaphore pool */ +static struct sem_undo *semu_list = NULL; /* list of active undo structures */ +struct sem_undo *semu = NULL; /* semaphore undo pool */ static struct proc *semlock_holder = NULL; +/* seminit no longer needed. The data structures are grown dynamically */ void -seminit(dummy) - void *dummy; +seminit() { - register int i; - - if (sema == NULL) - panic("sema is NULL"); - if (semu == NULL) - panic("semu is NULL"); - - for (i = 0; i < seminfo.semmni; i++) { - sema[i].sem_base = 0; - sema[i].sem_perm.mode = 0; - } - for (i = 0; i < seminfo.semmnu; i++) { - register struct sem_undo *suptr = SEMU(i); - suptr->un_proc = NULL; - } - semu_list = NULL; } /* * Entry point for all SEM calls + * + * In Darwin this is no longer the entry point. It will be removed after + * the code has been tested better. */ +struct semsys_args { + u_int which; + int a2; + int a3; + int a4; + int a5; +}; int -semsys(p, uap) +semsys(p, uap, retval) struct proc *p; /* XXX actually varargs. */ - struct semsys_args /* { - u_int which; - int a2; - int a3; - int a4; - int a5; - } */ *uap; + struct semsys_args *uap; + register_t *retval; { - while (semlock_holder != NULL && semlock_holder != p) + /* The individual calls handling the locking now */ + /*while (semlock_holder != NULL && semlock_holder != p) (void) tsleep((caddr_t)&semlock_holder, (PZERO - 4), "semsys", 0); + */ if (uap->which >= sizeof(semcalls)/sizeof(semcalls[0])) return (EINVAL); - return ((*semcalls[uap->which])(p, &uap->a2)); + return ((*semcalls[uap->which])(p, &uap->a2, retval)); } /* @@ -136,20 +207,20 @@ struct semconfig_args { #endif int -semconfig(p, uap) +semconfig(p, uap, retval) struct proc *p; struct semconfig_args *uap; + register_t *retval; { int eval = 0; switch (uap->flag) { case SEM_CONFIG_FREEZE: - semlock_holder = p; + SUBSYSTEM_LOCK_AQUIRE(p); break; case SEM_CONFIG_THAW: - semlock_holder = NULL; - wakeup((caddr_t)&semlock_holder); + SUBSYSTEM_LOCK_RELEASE; break; default: @@ -159,13 +230,215 @@ semconfig(p, uap) break; } - p->p_retval[0] = 0; + *retval = 0; return(eval); } +/* Expand the semu array to the given capacity. If the expansion fails + * return 0, otherwise return 1. + * + * Assumes we already have the subsystem lock. + */ +static int +grow_semu_array(newSize) + int newSize; +{ + register int i, j; + register struct sem_undo *newSemu; + if (newSize <= seminfo.semmnu) + return 0; + if (newSize > limitseminfo.semmnu) /* enforce hard limit */ + { +#ifdef SEM_DEBUG + printf("undo structure hard limit of %d reached, requested %d\n", + limitseminfo.semmnu, newSize); +#endif + return 0; + } + newSize = (newSize/SEMMNU_INC + 1) * SEMMNU_INC; + newSize = newSize > limitseminfo.semmnu ? limitseminfo.semmnu : newSize; + +#ifdef SEM_DEBUG + printf("growing semu[] from %d to %d\n", seminfo.semmnu, newSize); +#endif + MALLOC(newSemu, struct sem_undo*, sizeof(struct sem_undo)*newSize, + M_SYSVSEM, M_WAITOK); + if (NULL == newSemu) + { +#ifdef SEM_DEBUG + printf("allocation failed. no changes made.\n"); +#endif + return 0; + } + + /* Initialize our structure. */ + for (i = 0; i < seminfo.semmnu; i++) + { + newSemu[i] = semu[i]; + for(j = 0; j < SEMUME; j++) /* Is this really needed? */ + newSemu[i].un_ent[j] = semu[i].un_ent[j]; + } + for (i = seminfo.semmnu; i < newSize; i++) + { + newSemu[i].un_proc = NULL; + } + + /* Clean up the old array */ + if (semu) + FREE(semu, M_SYSVSEM); + + semu = newSemu; + seminfo.semmnu = newSize; +#ifdef SEM_DEBUG + printf("expansion successful\n"); +#endif + return 1; +} + +/* + * Expand the sema array to the given capacity. If the expansion fails + * we return 0, otherwise we return 1. + * + * Assumes we already have the subsystem lock. + */ +static int +grow_sema_array(newSize) + int newSize; +{ + register struct semid_ds *newSema; + register int i; + + if (newSize <= seminfo.semmni) + return 0; + if (newSize > limitseminfo.semmni) /* enforce hard limit */ + { +#ifdef SEM_DEBUG + printf("identifier hard limit of %d reached, requested %d\n", + limitseminfo.semmni, newSize); +#endif + return 0; + } + newSize = (newSize/SEMMNI_INC + 1) * SEMMNI_INC; + newSize = newSize > limitseminfo.semmni ? limitseminfo.semmni : newSize; + +#ifdef SEM_DEBUG + printf("growing sema[] from %d to %d\n", seminfo.semmni, newSize); +#endif + MALLOC(newSema, struct semid_ds*, sizeof(struct semid_ds)*newSize, + M_SYSVSEM, M_WAITOK); + if (NULL == newSema) + { +#ifdef SEM_DEBUG + printf("allocation failed. no changes made.\n"); +#endif + return 0; + } + + /* Initialize our new ids, and copy over the old ones */ + for (i = 0; i < seminfo.semmni; i++) + { + newSema[i] = sema[i]; + /* This is a hack. What we really want to be able to + * do is change the value a process is waiting on + * without waking it up, but I don't know how to do + * this with the existing code, so we wake up the + * process and let it do a lot of work to determine the + * semaphore set is really not available yet, and then + * sleep on the correct, reallocated semid_ds pointer. + */ + if (sema[i].sem_perm.mode & SEM_ALLOC) + wakeup((caddr_t)&sema[i]); + } + + for (i = seminfo.semmni; i < newSize; i++) + { + newSema[i].sem_base = 0; + newSema[i].sem_perm.mode = 0; + } + + /* Clean up the old array */ + if (sema) + FREE(sema, M_SYSVSEM); + + sema = newSema; + seminfo.semmni = newSize; +#ifdef SEM_DEBUG + printf("expansion successful\n"); +#endif + return 1; +} + +/* + * Expand the sem array to the given capacity. If the expansion fails + * we return 0 (fail), otherwise we return 1 (success). + * + * Assumes we already hold the subsystem lock. + */ +static int +grow_sem_array(newSize) + int newSize; +{ + register struct sem *newSem = NULL; + register int i; + + if (newSize < semtot) + return 0; + if (newSize > limitseminfo.semmns) /* enforce hard limit */ + { +#ifdef SEM_DEBUG + printf("semaphore hard limit of %d reached, requested %d\n", + limitseminfo.semmns, newSize); +#endif + return 0; + } + newSize = (newSize/SEMMNS_INC + 1) * SEMMNS_INC; + newSize = newSize > limitseminfo.semmns ? limitseminfo.semmns : newSize; + +#ifdef SEM_DEBUG + printf("growing sem array from %d to %d\n", seminfo.semmns, newSize); +#endif + MALLOC(newSem, struct sem*, sizeof(struct sem)*newSize, + M_SYSVSEM, M_WAITOK); + if (NULL == newSem) + { +#ifdef SEM_DEBUG + printf("allocation failed. no changes made.\n"); +#endif + return 0; + } + + /* We have our new memory, now copy the old contents over */ + if (sem) + for(i = 0; i < seminfo.semmns; i++) + newSem[i] = sem[i]; + + /* Update our id structures to point to the new semaphores */ + for(i = 0; i < seminfo.semmni; i++) + if (sema[i].sem_perm.mode & SEM_ALLOC) /* ID in use */ + { + if (newSem > sem) + sema[i].sem_base += newSem - sem; + else + sema[i].sem_base -= sem - newSem; + } + + /* clean up the old array */ + if (sem) + FREE(sem, M_SYSVSEM); + + sem = newSem; + seminfo.semmns = newSize; +#ifdef SEM_DEBUG + printf("expansion complete\n"); +#endif + return 1; +} + /* * Allocate a new sem_undo structure for a process * (returns ptr to structure or NULL if no more room) + * + * Assumes we already hold the subsystem lock. */ static struct sem_undo * @@ -219,9 +492,14 @@ semu_alloc(p) supptr = &(suptr->un_next); } - /* If we didn't free anything then just give-up */ + /* If we didn't free anything. Try expanding + * the semu[] array. If that doesn't work + * then fail. We expand last to get the + * most reuse out of existing resources. + */ if (!did_something) - return(NULL); + if (!grow_semu_array(seminfo.semmnu + 1)) + return(NULL); } else { /* * The second pass failed even though we freed @@ -236,6 +514,8 @@ semu_alloc(p) /* * Adjust a particular entry for a particular proc + * + * Assumes we already hold the subsystem lock. */ static int @@ -295,7 +575,7 @@ semundo_adjust(p, supptr, semid, semnum, adjval) /* Didn't find the right entry - create it */ if (adjval == 0) return(0); - if (suptr->un_cnt != seminfo.semume) { + if (suptr->un_cnt != limitseminfo.semume) { sunptr = &suptr->un_ent[suptr->un_cnt]; suptr->un_cnt++; sunptr->un_adjval = adjval; @@ -305,6 +585,8 @@ semundo_adjust(p, supptr, semid, semnum, adjval) return(0); } +/* Assumes we already hold the subsystem lock. + */ static void semundo_clear(semid, semnum) int semid, semnum; @@ -337,41 +619,50 @@ semundo_clear(semid, semnum) * Note that the user-mode half of this passes a union, not a pointer */ #ifndef _SYS_SYSPROTO_H_ -struct __semctl_args { +struct semctl_args { int semid; int semnum; int cmd; - union semun *arg; + union semun arg; }; #endif int -__semctl(p, uap) +semctl(p, uap, retval) struct proc *p; - register struct __semctl_args *uap; + register struct semctl_args *uap; + register_t *retval; { int semid = uap->semid; int semnum = uap->semnum; int cmd = uap->cmd; - union semun *arg = uap->arg; + union semun arg = uap->arg; union semun real_arg; struct ucred *cred = p->p_ucred; int i, rval, eval; struct semid_ds sbuf; register struct semid_ds *semaptr; + AUDIT_ARG(svipc_cmd, cmd); + AUDIT_ARG(svipc_id, semid); + SUBSYSTEM_LOCK_AQUIRE(p); #ifdef SEM_DEBUG printf("call to semctl(%d, %d, %d, 0x%x)\n", semid, semnum, cmd, arg); #endif semid = IPCID_TO_IX(semid); - if (semid < 0 || semid >= seminfo.semmsl) - return(EINVAL); + if (semid < 0 || semid >= seminfo.semmni) +{ +#ifdef SEM_DEBUG + printf("Invalid semid\n"); +#endif + UNLOCK_AND_RETURN(EINVAL); +} semaptr = &sema[semid]; if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 || semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid)) - return(EINVAL); + UNLOCK_AND_RETURN(EINVAL); eval = 0; rval = 0; @@ -379,7 +670,7 @@ __semctl(p, uap) switch (cmd) { case IPC_RMID: if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_M))) - return(eval); + UNLOCK_AND_RETURN(eval); semaptr->sem_perm.cuid = cred->cr_uid; semaptr->sem_perm.uid = cred->cr_uid; semtot -= semaptr->sem_nsems; @@ -397,12 +688,12 @@ __semctl(p, uap) case IPC_SET: if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_M))) - return(eval); - if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) - return(eval); - if ((eval = copyin(real_arg.buf, (caddr_t)&sbuf, + UNLOCK_AND_RETURN(eval); + /*if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) + UNLOCK_AND_RETURN(eval);*/ + if ((eval = copyin(arg.buf, (caddr_t)&sbuf, sizeof(sbuf))) != 0) - return(eval); + UNLOCK_AND_RETURN(eval); semaptr->sem_perm.uid = sbuf.sem_perm.uid; semaptr->sem_perm.gid = sbuf.sem_perm.gid; semaptr->sem_perm.mode = (semaptr->sem_perm.mode & ~0777) | @@ -412,45 +703,45 @@ __semctl(p, uap) case IPC_STAT: if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R))) - return(eval); - if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) - return(eval); - eval = copyout((caddr_t)semaptr, real_arg.buf, + UNLOCK_AND_RETURN(eval); + /*if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) + UNLOCK_AND_RETURN(eval);*/ + eval = copyout((caddr_t)semaptr, arg.buf, sizeof(struct semid_ds)); break; case GETNCNT: if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R))) - return(eval); + UNLOCK_AND_RETURN(eval); if (semnum < 0 || semnum >= semaptr->sem_nsems) - return(EINVAL); + UNLOCK_AND_RETURN(EINVAL); rval = semaptr->sem_base[semnum].semncnt; break; case GETPID: if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R))) - return(eval); + UNLOCK_AND_RETURN(eval); if (semnum < 0 || semnum >= semaptr->sem_nsems) - return(EINVAL); + UNLOCK_AND_RETURN(EINVAL); rval = semaptr->sem_base[semnum].sempid; break; case GETVAL: if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R))) - return(eval); + UNLOCK_AND_RETURN(eval); if (semnum < 0 || semnum >= semaptr->sem_nsems) - return(EINVAL); + UNLOCK_AND_RETURN(EINVAL); rval = semaptr->sem_base[semnum].semval; break; case GETALL: if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R))) - return(eval); - if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) - return(eval); + UNLOCK_AND_RETURN(eval); + /*if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) + UNLOCK_AND_RETURN(eval);*/ for (i = 0; i < semaptr->sem_nsems; i++) { eval = copyout((caddr_t)&semaptr->sem_base[i].semval, - &real_arg.array[i], sizeof(real_arg.array[0])); + &arg.array[i], sizeof(arg.array[0])); if (eval != 0) break; } @@ -458,33 +749,48 @@ __semctl(p, uap) case GETZCNT: if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R))) - return(eval); + UNLOCK_AND_RETURN(eval); if (semnum < 0 || semnum >= semaptr->sem_nsems) - return(EINVAL); + UNLOCK_AND_RETURN(EINVAL); rval = semaptr->sem_base[semnum].semzcnt; break; case SETVAL: if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_W))) - return(eval); + { +#ifdef SEM_DEBUG + printf("Invalid credentials for write\n"); +#endif + UNLOCK_AND_RETURN(eval); + } if (semnum < 0 || semnum >= semaptr->sem_nsems) - return(EINVAL); - if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) - return(eval); - semaptr->sem_base[semnum].semval = real_arg.val; + { +#ifdef SEM_DEBUG + printf("Invalid number out of range for set\n"); +#endif + UNLOCK_AND_RETURN(EINVAL); + } + /*if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) + { +#ifdef SEM_DEBUG + printf("Error during value copyin\n"); +#endif + UNLOCK_AND_RETURN(eval); + }*/ + semaptr->sem_base[semnum].semval = arg.val; semundo_clear(semid, semnum); wakeup((caddr_t)semaptr); break; case SETALL: if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_W))) - return(eval); - if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) - return(eval); + UNLOCK_AND_RETURN(eval); + /*if ((eval = copyin(arg, &real_arg, sizeof(real_arg))) != 0) + UNLOCK_AND_RETURN(eval);*/ for (i = 0; i < semaptr->sem_nsems; i++) { - eval = copyin(&real_arg.array[i], + eval = copyin(&arg.array[i], (caddr_t)&semaptr->sem_base[i].semval, - sizeof(real_arg.array[0])); + sizeof(arg.array[0])); if (eval != 0) break; } @@ -493,12 +799,12 @@ __semctl(p, uap) break; default: - return(EINVAL); + UNLOCK_AND_RETURN(EINVAL); } if (eval == 0) - p->p_retval[0] = rval; - return(eval); + *retval = rval; + UNLOCK_AND_RETURN(eval); } #ifndef _SYS_SYSPROTO_H_ @@ -510,9 +816,10 @@ struct semget_args { #endif int -semget(p, uap) +semget(p, uap, retval) struct proc *p; register struct semget_args *uap; + register_t *retval; { int semid, eval; int key = uap->key; @@ -520,10 +827,14 @@ semget(p, uap) int semflg = uap->semflg; struct ucred *cred = p->p_ucred; + SUBSYSTEM_LOCK_AQUIRE(p); #ifdef SEM_DEBUG - printf("semget(0x%x, %d, 0%o)\n", key, nsems, semflg); + if (key != IPC_PRIVATE) + printf("semget(0x%x, %d, 0%o)\n", key, nsems, semflg); + else + printf("semget(IPC_PRIVATE, %d, 0%o)\n", nsems, semflg); #endif - + if (key != IPC_PRIVATE) { for (semid = 0; semid < seminfo.semmni; semid++) { if ((sema[semid].sem_perm.mode & SEM_ALLOC) && @@ -536,40 +847,46 @@ semget(p, uap) #endif if ((eval = ipcperm(cred, &sema[semid].sem_perm, semflg & 0700))) - return(eval); + UNLOCK_AND_RETURN(eval); if (nsems > 0 && sema[semid].sem_nsems < nsems) { #ifdef SEM_DEBUG printf("too small\n"); #endif - return(EINVAL); + UNLOCK_AND_RETURN(EINVAL); } if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) { #ifdef SEM_DEBUG printf("not exclusive\n"); #endif - return(EEXIST); + UNLOCK_AND_RETURN(EEXIST); } goto found; } } #ifdef SEM_DEBUG - printf("need to allocate the semid_ds\n"); + printf("need to allocate an id for the request\n"); #endif if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) { - if (nsems <= 0 || nsems > seminfo.semmsl) { + if (nsems <= 0 || nsems > limitseminfo.semmsl) { #ifdef SEM_DEBUG printf("nsems out of range (0<%d<=%d)\n", nsems, seminfo.semmsl); #endif - return(EINVAL); + UNLOCK_AND_RETURN(EINVAL); } if (nsems > seminfo.semmns - semtot) { #ifdef SEM_DEBUG printf("not enough semaphores left (need %d, got %d)\n", nsems, seminfo.semmns - semtot); #endif - return(ENOSPC); + if (!grow_sem_array(semtot + nsems)) + { +#ifdef SEM_DEBUG + printf("failed to grow the sem array\n"); +#endif + UNLOCK_AND_RETURN(ENOSPC); + } } for (semid = 0; semid < seminfo.semmni; semid++) { if ((sema[semid].sem_perm.mode & SEM_ALLOC) == 0) @@ -577,9 +894,15 @@ semget(p, uap) } if (semid == seminfo.semmni) { #ifdef SEM_DEBUG - printf("no more semid_ds's available\n"); + printf("no more id's available\n"); #endif - return(ENOSPC); + if (!grow_sema_array(seminfo.semmni + 1)) + { +#ifdef SEM_DEBUG + printf("failed to grow sema array\n"); +#endif + UNLOCK_AND_RETURN(ENOSPC); + } } #ifdef SEM_DEBUG printf("semid %d is available\n", semid); @@ -607,11 +930,16 @@ semget(p, uap) #ifdef SEM_DEBUG printf("didn't find it and wasn't asked to create it\n"); #endif - return(ENOENT); + UNLOCK_AND_RETURN(ENOENT); } found: - p->p_retval[0] = IXSEQ_TO_IPCID(semid, sema[semid].sem_perm); + *retval = IXSEQ_TO_IPCID(semid, sema[semid].sem_perm); + AUDIT_ARG(svipc_id, *retval); +#ifdef SEM_DEBUG + printf("semget is done, returning %d\n", *retval); +#endif + SUBSYSTEM_LOCK_RELEASE; return(0); } @@ -624,9 +952,10 @@ struct semop_args { #endif int -semop(p, uap) +semop(p, uap, retval) struct proc *p; register struct semop_args *uap; + register_t *retval; { int semid = uap->semid; int nsops = uap->nsops; @@ -639,41 +968,43 @@ semop(p, uap) int i, j, eval; int do_wakeup, do_undos; + AUDIT_ARG(svipc_id, uap->semid); + SUBSYSTEM_LOCK_AQUIRE(p); #ifdef SEM_DEBUG printf("call to semop(%d, 0x%x, %d)\n", semid, sops, nsops); #endif semid = IPCID_TO_IX(semid); /* Convert back to zero origin */ - if (semid < 0 || semid >= seminfo.semmsl) - return(EINVAL); + if (semid < 0 || semid >= seminfo.semmni) + UNLOCK_AND_RETURN(EINVAL); semaptr = &sema[semid]; if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0) - return(EINVAL); + UNLOCK_AND_RETURN(EINVAL); if (semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid)) - return(EINVAL); + UNLOCK_AND_RETURN(EINVAL); if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_W))) { #ifdef SEM_DEBUG printf("eval = %d from ipaccess\n", eval); #endif - return(eval); + UNLOCK_AND_RETURN(eval); } if (nsops > MAX_SOPS) { #ifdef SEM_DEBUG printf("too many sops (max=%d, nsops=%d)\n", MAX_SOPS, nsops); #endif - return(E2BIG); + UNLOCK_AND_RETURN(E2BIG); } if ((eval = copyin(uap->sops, &sops, nsops * sizeof(sops[0]))) != 0) { #ifdef SEM_DEBUG - printf("eval = %d from copyin(%08x, %08x, %d)\n", eval, + printf("eval = %d from copyin(%08x, %08x, %ld)\n", eval, uap->sops, &sops, nsops * sizeof(sops[0])); #endif - return(eval); + UNLOCK_AND_RETURN(eval); } /* @@ -694,7 +1025,7 @@ semop(p, uap) sopptr = &sops[i]; if (sopptr->sem_num >= semaptr->sem_nsems) - return(EFBIG); + UNLOCK_AND_RETURN(EFBIG); semptr = &semaptr->sem_base[sopptr->sem_num]; @@ -756,7 +1087,7 @@ semop(p, uap) * NOWAIT flag set then return with EAGAIN. */ if (sopptr->sem_flg & IPC_NOWAIT) - return(EAGAIN); + UNLOCK_AND_RETURN(EAGAIN); if (sopptr->sem_op == 0) semptr->semzcnt++; @@ -766,16 +1097,32 @@ semop(p, uap) #ifdef SEM_DEBUG printf("semop: good night!\n"); #endif + /* Release our lock on the semaphore subsystem so + * another thread can get at the semaphore we are + * waiting for. We will get the lock back after we + * wake up. + */ + SUBSYSTEM_LOCK_RELEASE; + sysv_sem_sleeping_threads++; eval = tsleep((caddr_t)semaptr, (PZERO - 4) | PCATCH, "semwait", 0); + sysv_sem_sleeping_threads--; + #ifdef SEM_DEBUG printf("semop: good morning (eval=%d)!\n", eval); #endif + /* There is no need to get the lock if we are just + * going to return without performing more semaphore + * operations. + */ + if (eval != 0) + return(EINTR); + SUBSYSTEM_LOCK_AQUIRE(p); /* Get it back */ suptr = NULL; /* sem_undo may have been reallocated */ + semaptr = &sema[semid]; /* sema may have been reallocated */ + - if (eval != 0) - return(EINTR); #ifdef SEM_DEBUG printf("semop: good morning!\n"); #endif @@ -788,16 +1135,19 @@ semop(p, uap) /* The man page says to return EIDRM. */ /* Unfortunately, BSD doesn't define that code! */ #ifdef EIDRM - return(EIDRM); + UNLOCK_AND_RETURN(EIDRM); #else - return(EINVAL); + UNLOCK_AND_RETURN(EINVAL); #endif } /* * The semaphore is still alive. Readjust the count of - * waiting processes. + * waiting processes. semptr needs to be recomputed + * because the sem[] may have been reallocated while + * we were sleeping, updating our sem_base pointer. */ + semptr = &semaptr->sem_base[sopptr->sem_num]; if (sopptr->sem_op == 0) semptr->semzcnt--; else @@ -853,7 +1203,7 @@ done: #ifdef SEM_DEBUG printf("eval = %d from semundo_adjust\n", eval); #endif - return(eval); + UNLOCK_AND_RETURN(eval); } /* loop through the sops */ } /* if (do_undos) */ @@ -864,7 +1214,16 @@ done: semptr->sempid = p->p_pid; } - /* Do a wakeup if any semaphore was up'd. */ + /* Do a wakeup if any semaphore was up'd. + * we will release our lock on the semaphore subsystem before + * we wakeup other processes to prevent a little thrashing. + * Note that this is fine because we are done using the + * semaphore structures at this point in time. We only use + * a local variable pointer value, and the retval + * parameter. + * Note 2: Future use of sem_wakeup may reqiure the lock. + */ + SUBSYSTEM_LOCK_RELEASE; if (do_wakeup) { #ifdef SEM_DEBUG printf("semop: doing wakeup\n"); @@ -881,7 +1240,7 @@ done: #ifdef SEM_DEBUG printf("semop: done\n"); #endif - p->p_retval[0] = 0; + *retval = 0; return(0); } @@ -897,17 +1256,16 @@ semexit(p) register struct sem_undo **supptr; int did_something; - /* - * If somebody else is holding the global semaphore facility lock - * then sleep until it is released. + /* If we have not allocated our semaphores yet there can't be + * anything to undo, but we need the lock to prevent + * dynamic memory race conditions. */ - while (semlock_holder != NULL && semlock_holder != p) { -#ifdef SEM_DEBUG - printf("semaphore facility locked - sleeping ...\n"); -#endif - (void) tsleep((caddr_t)&semlock_holder, (PZERO - 4), "semext", 0); + SUBSYSTEM_LOCK_AQUIRE(p); + if (!sem) + { + SUBSYSTEM_LOCK_RELEASE; + return; } - did_something = 0; /* @@ -964,6 +1322,14 @@ semexit(p) } else semaptr->sem_base[semnum].semval += adjval; + /* Maybe we should build a list of semaptr's to wake + * up, finish all access to data structures, release the + * subsystem lock, and wake all the processes. Something + * to think about. It wouldn't buy us anything unless + * wakeup had the potential to block, or the syscall + * funnel state was changed to allow multiple threads + * in the BSD code at once. + */ #ifdef SEM_WAKEUP sem_wakeup((caddr_t)semaptr); #else @@ -986,11 +1352,69 @@ semexit(p) unlock: /* - * If the exiting process is holding the global semaphore facility - * lock then release it. - */ - if (semlock_holder == p) { - semlock_holder = NULL; - wakeup((caddr_t)&semlock_holder); - } + * There is a semaphore leak (i.e. memory leak) in this code. + * We should be deleting the IPC_PRIVATE semaphores when they are + * no longer needed, and we dont. We would have to track which processes + * know about which IPC_PRIVATE semaphores, updating the list after + * every fork. We can't just delete them semaphore when the process + * that created it dies, because that process may well have forked + * some children. So we need to wait until all of it's children have + * died, and so on. Maybe we should tag each IPC_PRIVATE sempahore + * with the creating group ID, count the number of processes left in + * that group, and delete the semaphore when the group is gone. + * Until that code gets implemented we will leak IPC_PRIVATE semaphores. + * There is an upper bound on the size of our semaphore array, so + * leaking the semaphores should not work as a DOS attack. + * + * Please note that the original BSD code this file is based on had the + * same leaky semaphore problem. + */ + + SUBSYSTEM_LOCK_RELEASE; } +/* (struct sysctl_oid *oidp, void *arg1, int arg2, \ + struct sysctl_req *req) */ +static int +sysctl_seminfo SYSCTL_HANDLER_ARGS +{ + int error = 0; + + error = SYSCTL_OUT(req, arg1, sizeof(int)); + if (error || !req->newptr) + return(error); + + SUBSYSTEM_LOCK_AQUIRE(current_proc()); + /* Set the values only if shared memory is not initialised */ + if ((sem == (struct sem *) 0) && + (sema == (struct semid_ds *) 0) && + (semu == (struct semid_ds *) 0) && + (semu_list == (struct sem_undo *) 0)) { + if (error = SYSCTL_IN(req, arg1, sizeof(int))) { + goto out; + } + } else + error = EINVAL; +out: + SUBSYSTEM_LOCK_RELEASE; + return(error); + +} + +/* SYSCTL_NODE(_kern, KERN_SYSV, sysv, CTLFLAG_RW, 0, "SYSV"); */ +extern struct sysctl_oid_list sysctl__kern_sysv_children; +SYSCTL_PROC(_kern_sysv, KSYSV_SEMMNI, semmni, CTLTYPE_INT | CTLFLAG_RW, + &limitseminfo.semmni, 0, &sysctl_seminfo ,"I","semmni"); + +SYSCTL_PROC(_kern_sysv, KSYSV_SEMMNS, semmns, CTLTYPE_INT | CTLFLAG_RW, + &limitseminfo.semmns, 0, &sysctl_seminfo ,"I","semmns"); + +SYSCTL_PROC(_kern_sysv, KSYSV_SEMMNU, semmnu, CTLTYPE_INT | CTLFLAG_RW, + &limitseminfo.semmnu, 0, &sysctl_seminfo ,"I","semmnu"); + +SYSCTL_PROC(_kern_sysv, KSYSV_SEMMSL, semmsl, CTLTYPE_INT | CTLFLAG_RW, + &limitseminfo.semmsl, 0, &sysctl_seminfo ,"I","semmsl"); + +SYSCTL_PROC(_kern_sysv, KSYSV_SEMUNE, semume, CTLTYPE_INT | CTLFLAG_RW, + &limitseminfo.semume, 0, &sysctl_seminfo ,"I","semume"); + +