git.saurik.com Git - apple/xnu.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (c) 2000-2007 Apple Inc. All rights reserved.
	3	*
	4	* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
	5	*
	6	* This file contains Original Code and/or Modifications of Original Code
	7	* as defined in and that are subject to the Apple Public Source License
	8	* Version 2.0 (the 'License'). You may not use this file except in
	9	* compliance with the License. The rights granted to you under the License
	10	* may not be used to create, or enable the creation or redistribution of,
	11	* unlawful or unlicensed copies of an Apple operating system, or to
	12	* circumvent, violate, or enable the circumvention or violation of, any
	13	* terms of an Apple operating system software license agreement.
	14	*
	15	* Please obtain a copy of the License at
	16	* http://www.opensource.apple.com/apsl/ and read it before using this file.
	17	*
	18	* The Original Code and all software distributed under the License are
	19	* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
	20	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
	21	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
	22	* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
	23	* Please see the License for the specific language governing rights and
	24	* limitations under the License.
	25	*
	26	* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
	27	*/
	28	/*
	29	* Implementation of SVID semaphores
	30	*
	31	* Author: Daniel Boulet
	32	*
	33	* This software is provided ``AS IS'' without any warranties of any kind.
	34	*/
	35	/*
	36	* John Bellardo modified the implementation for Darwin. 12/2000
	37	*/
	38	/*
	39	* NOTICE: This file was modified by McAfee Research in 2004 to introduce
	40	* support for mandatory and extensible security protections. This notice
	41	* is included in support of clause 2.2 (b) of the Apple Public License,
	42	* Version 2.0.
	43	* Copyright (c) 2005-2006 SPARTA, Inc.
	44	*/
	45
	46	#include <sys/param.h>
	47	#include <sys/systm.h>
	48	#include <sys/kernel.h>
	49	#include <sys/proc_internal.h>
	50	#include <sys/kauth.h>
	51	#include <sys/sem_internal.h>
	52	#include <sys/malloc.h>
	53	#include <mach/mach_types.h>
	54
	55	#include <sys/filedesc.h>
	56	#include <sys/file_internal.h>
	57	#include <sys/sysctl.h>
	58	#include <sys/ipcs.h>
	59	#include <sys/sysent.h>
	60	#include <sys/sysproto.h>
	61	#if CONFIG_MACF
	62	#include <security/mac_framework.h>
	63	#endif
	64
	65	#include <security/audit/audit.h>
	66
	67	#if SYSV_SEM
	68
	69
	70	/* Uncomment this line to see the debugging output */
	71	/* #define SEM_DEBUG */
	72
	73	/* Uncomment this line to see MAC debugging output. */
	74	/* #define MAC_DEBUG */
	75	#if CONFIG_MACF_DEBUG
	76	#define MPRINTF(a) printf(a)
	77	#else
	78	#define MPRINTF(a)
	79	#endif
	80
	81	#define M_SYSVSEM M_TEMP
	82
	83
	84	/* Hard system limits to avoid resource starvation / DOS attacks.
	85	* These are not needed if we can make the semaphore pages swappable.
	86	*/
	87	static struct seminfo limitseminfo = {
	88	SEMMAP, /* # of entries in semaphore map */
	89	SEMMNI, /* # of semaphore identifiers */
	90	SEMMNS, /* # of semaphores in system */
	91	SEMMNU, /* # of undo structures in system */
	92	SEMMSL, /* max # of semaphores per id */
	93	SEMOPM, /* max # of operations per semop call */
	94	SEMUME, /* max # of undo entries per process */
	95	SEMUSZ, /* size in bytes of undo structure */
	96	SEMVMX, /* semaphore maximum value */
	97	SEMAEM /* adjust on exit max value */
	98	};
	99
	100	/* Current system allocations. We use this structure to track how many
	101	* resources we have allocated so far. This way we can set large hard limits
	102	* and not allocate the memory for them up front.
	103	*/
	104	struct seminfo seminfo = {
	105	SEMMAP, /* Unused, # of entries in semaphore map */
	106	0, /* # of semaphore identifiers */
	107	0, /* # of semaphores in system */
	108	0, /* # of undo entries in system */
	109	SEMMSL, /* max # of semaphores per id */
	110	SEMOPM, /* max # of operations per semop call */
	111	SEMUME, /* max # of undo entries per process */
	112	SEMUSZ, /* size in bytes of undo structure */
	113	SEMVMX, /* semaphore maximum value */
	114	SEMAEM /* adjust on exit max value */
	115	};
	116
	117
	118	static int semu_alloc(struct proc *p);
	119	static int semundo_adjust(struct proc p, int supidx,
	120	int semid, int semnum, int adjval);
	121	static void semundo_clear(int semid, int semnum);
	122
	123	/* XXX casting to (sy_call_t ) is bogus, as usual. /
	124	static sy_call_t *semcalls[] = {
	125	(sy_call_t )semctl, (sy_call_t )semget,
	126	(sy_call_t *)semop
	127	};
	128
	129	static int semtot = 0; /* # of used semaphores */
	130	struct semid_kernel sema = NULL; / semaphore id pool */
	131	struct sem sem_pool = NULL; / semaphore pool */
	132	static int semu_list_idx = -1; /* active undo structures */
	133	struct sem_undo semu = NULL; / semaphore undo pool */
	134
	135
	136	void sysv_sem_lock_init(void);
	137	static lck_grp_t *sysv_sem_subsys_lck_grp;
	138	static lck_grp_attr_t *sysv_sem_subsys_lck_grp_attr;
	139	static lck_attr_t *sysv_sem_subsys_lck_attr;
	140	static lck_mtx_t sysv_sem_subsys_mutex;
	141
	142	#define SYSV_SEM_SUBSYS_LOCK() lck_mtx_lock(&sysv_sem_subsys_mutex)
	143	#define SYSV_SEM_SUBSYS_UNLOCK() lck_mtx_unlock(&sysv_sem_subsys_mutex)
	144
	145
	146	__private_extern__ void
	147	sysv_sem_lock_init( void )
	148	{
	149
	150	sysv_sem_subsys_lck_grp_attr = lck_grp_attr_alloc_init();
	151
	152	sysv_sem_subsys_lck_grp = lck_grp_alloc_init("sysv_sem_subsys_lock", sysv_sem_subsys_lck_grp_attr);
	153
	154	sysv_sem_subsys_lck_attr = lck_attr_alloc_init();
	155	lck_mtx_init(&sysv_sem_subsys_mutex, sysv_sem_subsys_lck_grp, sysv_sem_subsys_lck_attr);
	156	}
	157
	158	static __inline__ user_time_t
	159	sysv_semtime(void)
	160	{
	161	struct timeval tv;
	162	microtime(&tv);
	163	return (tv.tv_sec);
	164	}
	165
	166	/*
	167	* XXX conversion of internal user_time_t to external tume_t loses
	168	* XXX precision; not an issue for us now, since we are only ever
	169	* XXX setting 32 bits worth of time into it.
	170	*
	171	* pad field contents are not moved correspondingly; contents will be lost
	172	*
	173	* NOTE: Source and target may NOT overlap! (target is smaller)
	174	*/
	175	static void
	176	semid_ds_kernelto32(struct user_semid_ds in, struct user32_semid_ds out)
	177	{
	178	out->sem_perm = in->sem_perm;
	179	out->sem_base = CAST_DOWN_EXPLICIT(__int32_t,in->sem_base);
	180	out->sem_nsems = in->sem_nsems;
	181	out->sem_otime = in->sem_otime; /* XXX loses precision */
	182	out->sem_ctime = in->sem_ctime; /* XXX loses precision */
	183	}
	184
	185	static void
	186	semid_ds_kernelto64(struct user_semid_ds in, struct user64_semid_ds out)
	187	{
	188	out->sem_perm = in->sem_perm;
	189	out->sem_base = CAST_DOWN_EXPLICIT(__int32_t,in->sem_base);
	190	out->sem_nsems = in->sem_nsems;
	191	out->sem_otime = in->sem_otime; /* XXX loses precision */
	192	out->sem_ctime = in->sem_ctime; /* XXX loses precision */
	193	}
	194
	195	/*
	196	* pad field contents are not moved correspondingly; contents will be lost
	197	*
	198	* NOTE: Source and target may are permitted to overlap! (source is smaller);
	199	* this works because we copy fields in order from the end of the struct to
	200	* the beginning.
	201	*
	202	* XXX use CAST_USER_ADDR_T() for lack of a CAST_USER_TIME_T(); net effect
	203	* XXX is the same.
	204	*/
	205	static void
	206	semid_ds_32tokernel(struct user32_semid_ds in, struct user_semid_ds out)
	207	{
	208	out->sem_ctime = in->sem_ctime;
	209	out->sem_otime = in->sem_otime;
	210	out->sem_nsems = in->sem_nsems;
	211	out->sem_base = (void *)(uintptr_t)in->sem_base;
	212	out->sem_perm = in->sem_perm;
	213	}
	214
	215	static void
	216	semid_ds_64tokernel(struct user64_semid_ds in, struct user_semid_ds out)
	217	{
	218	out->sem_ctime = in->sem_ctime;
	219	out->sem_otime = in->sem_otime;
	220	out->sem_nsems = in->sem_nsems;
	221	out->sem_base = (void *)(uintptr_t)in->sem_base;
	222	out->sem_perm = in->sem_perm;
	223	}
	224
	225
	226	/*
	227	* semsys
	228	*
	229	* Entry point for all SEM calls: semctl, semget, semop
	230	*
	231	* Parameters: p Process requesting the call
	232	* uap User argument descriptor (see below)
	233	* retval Return value of the selected sem call
	234	*
	235	* Indirect parameters: uap->which sem call to invoke (index in array of sem calls)
	236	* uap->a2 User argument descriptor
	237	*
	238	* Returns: 0 Success
	239	* !0 Not success
	240	*
	241	* Implicit returns: retval Return value of the selected sem call
	242	*
	243	* DEPRECATED: This interface should not be used to call the other SEM
	244	* functions (semctl, semget, semop). The correct usage is
	245	* to call the other SEM functions directly.
	246	*
	247	*/
	248	int
	249	semsys(struct proc p, struct semsys_args uap, int32_t *retval)
	250	{
	251
	252	/* The individual calls handling the locking now */
	253
	254	if (uap->which >= sizeof(semcalls)/sizeof(semcalls[0]))
	255	return (EINVAL);
	256	return ((*semcalls[uap->which])(p, &uap->a2, retval));
	257	}
	258
	259	/*
	260	* Expand the semu array to the given capacity. If the expansion fails
	261	* return 0, otherwise return 1.
	262	*
	263	* Assumes we already have the subsystem lock.
	264	*/
	265	static int
	266	grow_semu_array(int newSize)
	267	{
	268	int i;
	269	struct sem_undo *newSemu;
	270
	271	if (newSize <= seminfo.semmnu)
	272	return 1;
	273	if (newSize > limitseminfo.semmnu) /* enforce hard limit */
	274	{
	275	#ifdef SEM_DEBUG
	276	printf("undo structure hard limit of %d reached, requested %d\n",
	277	limitseminfo.semmnu, newSize);
	278	#endif
	279	return 0;
	280	}
	281	newSize = (newSize/SEMMNU_INC + 1) * SEMMNU_INC;
	282	newSize = newSize > limitseminfo.semmnu ? limitseminfo.semmnu : newSize;
	283
	284	#ifdef SEM_DEBUG
	285	printf("growing semu[] from %d to %d\n", seminfo.semmnu, newSize);
	286	#endif
	287	MALLOC(newSemu, struct sem_undo , sizeof (struct sem_undo) newSize,
	288	M_SYSVSEM, M_WAITOK \| M_ZERO);
	289	if (NULL == newSemu)
	290	{
	291	#ifdef SEM_DEBUG
	292	printf("allocation failed. no changes made.\n");
	293	#endif
	294	return 0;
	295	}
	296
	297	/* copy the old data to the new array */
	298	for (i = 0; i < seminfo.semmnu; i++)
	299	{
	300	newSemu[i] = semu[i];
	301	}
	302	/*
	303	* The new elements (from newSemu[i] to newSemu[newSize-1]) have their
	304	* "un_proc" set to 0 (i.e. NULL) by the M_ZERO flag to MALLOC() above,
	305	* so they're already marked as "not in use".
	306	*/
	307
	308	/* Clean up the old array */
	309	if (semu)
	310	FREE(semu, M_SYSVSEM);
	311
	312	semu = newSemu;
	313	seminfo.semmnu = newSize;
	314	#ifdef SEM_DEBUG
	315	printf("expansion successful\n");
	316	#endif
	317	return 1;
	318	}
	319
	320	/*
	321	* Expand the sema array to the given capacity. If the expansion fails
	322	* we return 0, otherwise we return 1.
	323	*
	324	* Assumes we already have the subsystem lock.
	325	*/
	326	static int
	327	grow_sema_array(int newSize)
	328	{
	329	struct semid_kernel *newSema;
	330	int i;
	331
	332	if (newSize <= seminfo.semmni)
	333	return 0;
	334	if (newSize > limitseminfo.semmni) /* enforce hard limit */
	335	{
	336	#ifdef SEM_DEBUG
	337	printf("identifier hard limit of %d reached, requested %d\n",
	338	limitseminfo.semmni, newSize);
	339	#endif
	340	return 0;
	341	}
	342	newSize = (newSize/SEMMNI_INC + 1) * SEMMNI_INC;
	343	newSize = newSize > limitseminfo.semmni ? limitseminfo.semmni : newSize;
	344
	345	#ifdef SEM_DEBUG
	346	printf("growing sema[] from %d to %d\n", seminfo.semmni, newSize);
	347	#endif
	348	MALLOC(newSema, struct semid_kernel *,
	349	sizeof (struct semid_kernel) * newSize,
	350	M_SYSVSEM, M_WAITOK \| M_ZERO);
	351	if (NULL == newSema)
	352	{
	353	#ifdef SEM_DEBUG
	354	printf("allocation failed. no changes made.\n");
	355	#endif
	356	return 0;
	357	}
	358
	359	/* copy over the old ids */
	360	for (i = 0; i < seminfo.semmni; i++)
	361	{
	362	newSema[i] = sema[i];
	363	/* This is a hack. What we really want to be able to
	364	* do is change the value a process is waiting on
	365	* without waking it up, but I don't know how to do
	366	* this with the existing code, so we wake up the
	367	* process and let it do a lot of work to determine the
	368	* semaphore set is really not available yet, and then
	369	* sleep on the correct, reallocated semid_kernel pointer.
	370	*/
	371	if (sema[i].u.sem_perm.mode & SEM_ALLOC)
	372	wakeup((caddr_t)&sema[i]);
	373	}
	374
	375	#if CONFIG_MACF
	376	for (i = seminfo.semmni; i < newSize; i++)
	377	{
	378	mac_sysvsem_label_init(&newSema[i]);
	379	}
	380	#endif
	381
	382	/*
	383	* The new elements (from newSema[i] to newSema[newSize-1]) have their
	384	* "sem_base" and "sem_perm.mode" set to 0 (i.e. NULL) by the M_ZERO
	385	* flag to MALLOC() above, so they're already marked as "not in use".
	386	*/
	387
	388	/* Clean up the old array */
	389	if (sema)
	390	FREE(sema, M_SYSVSEM);
	391
	392	sema = newSema;
	393	seminfo.semmni = newSize;
	394	#ifdef SEM_DEBUG
	395	printf("expansion successful\n");
	396	#endif
	397	return 1;
	398	}
	399
	400	/*
	401	* Expand the sem_pool array to the given capacity. If the expansion fails
	402	* we return 0 (fail), otherwise we return 1 (success).
	403	*
	404	* Assumes we already hold the subsystem lock.
	405	*/
	406	static int
	407	grow_sem_pool(int new_pool_size)
	408	{
	409	struct sem *new_sem_pool = NULL;
	410	struct sem *sem_free;
	411	int i;
	412
	413	if (new_pool_size < semtot)
	414	return 0;
	415	/* enforce hard limit */
	416	if (new_pool_size > limitseminfo.semmns) {
	417	#ifdef SEM_DEBUG
	418	printf("semaphore hard limit of %d reached, requested %d\n",
	419	limitseminfo.semmns, new_pool_size);
	420	#endif
	421	return 0;
	422	}
	423
	424	new_pool_size = (new_pool_size/SEMMNS_INC + 1) * SEMMNS_INC;
	425	new_pool_size = new_pool_size > limitseminfo.semmns ? limitseminfo.semmns : new_pool_size;
	426
	427	#ifdef SEM_DEBUG
	428	printf("growing sem_pool array from %d to %d\n", seminfo.semmns, new_pool_size);
	429	#endif
	430	MALLOC(new_sem_pool, struct sem , sizeof (struct sem) new_pool_size,
	431	M_SYSVSEM, M_WAITOK \| M_ZERO \| M_NULL);
	432	if (NULL == new_sem_pool) {
	433	#ifdef SEM_DEBUG
	434	printf("allocation failed. no changes made.\n");
	435	#endif
	436	return 0;
	437	}
	438
	439	/* We have our new memory, now copy the old contents over */
	440	if (sem_pool)
	441	for(i = 0; i < seminfo.semmns; i++)
	442	new_sem_pool[i] = sem_pool[i];
	443
	444	/* Update our id structures to point to the new semaphores */
	445	for(i = 0; i < seminfo.semmni; i++) {
	446	if (sema[i].u.sem_perm.mode & SEM_ALLOC) /* ID in use */
	447	sema[i].u.sem_base = new_sem_pool +
	448	(sema[i].u.sem_base - sem_pool);
	449	}
	450
	451	sem_free = sem_pool;
	452	sem_pool = new_sem_pool;
	453
	454	/* clean up the old array */
	455	if (sem_free != NULL)
	456	FREE(sem_free, M_SYSVSEM);
	457
	458	seminfo.semmns = new_pool_size;
	459	#ifdef SEM_DEBUG
	460	printf("expansion complete\n");
	461	#endif
	462	return 1;
	463	}
	464
	465	/*
	466	* Allocate a new sem_undo structure for a process
	467	* (returns ptr to structure or NULL if no more room)
	468	*
	469	* Assumes we already hold the subsystem lock.
	470	*/
	471
	472	static int
	473	semu_alloc(struct proc *p)
	474	{
	475	int i;
	476	struct sem_undo *suptr;
	477	int *supidx;
	478	int attempt;
	479
	480	/*
	481	* Try twice to allocate something.
	482	* (we'll purge any empty structures after the first pass so
	483	* two passes are always enough)
	484	*/
	485
	486	for (attempt = 0; attempt < 2; attempt++) {
	487	/*
	488	* Look for a free structure.
	489	* Fill it in and return it if we find one.
	490	*/
	491
	492	for (i = 0; i < seminfo.semmnu; i++) {
	493	suptr = SEMU(i);
	494	if (suptr->un_proc == NULL) {
	495	suptr->un_next_idx = semu_list_idx;
	496	semu_list_idx = i;
	497	suptr->un_cnt = 0;
	498	suptr->un_ent = NULL;
	499	suptr->un_proc = p;
	500	return i;
	501	}
	502	}
	503
	504	/*
	505	* We didn't find a free one, if this is the first attempt
	506	* then try to free some structures.
	507	*/
	508
	509	if (attempt == 0) {
	510	/* All the structures are in use - try to free some */
	511	int did_something = 0;
	512
	513	supidx = &semu_list_idx;
	514	while (*supidx != -1) {
	515	suptr = SEMU(*supidx);
	516	if (suptr->un_cnt == 0) {
	517	suptr->un_proc = NULL;
	518	*supidx = suptr->un_next_idx;
	519	did_something = 1;
	520	} else
	521	supidx = &(suptr->un_next_idx);
	522	}
	523
	524	/* If we didn't free anything. Try expanding
	525	* the semu[] array. If that doesn't work
	526	* then fail. We expand last to get the
	527	* most reuse out of existing resources.
	528	*/
	529	if (!did_something)
	530	if (!grow_semu_array(seminfo.semmnu + 1))
	531	return -1;
	532	} else {
	533	/*
	534	* The second pass failed even though we freed
	535	* something after the first pass!
	536	* This is IMPOSSIBLE!
	537	*/
	538	panic("semu_alloc - second attempt failed");
	539	}
	540	}
	541	return -1;
	542	}
	543
	544	/*
	545	* Adjust a particular entry for a particular proc
	546	*
	547	* Assumes we already hold the subsystem lock.
	548	*/
	549	static int
	550	semundo_adjust(struct proc p, int supidx, int semid,
	551	int semnum, int adjval)
	552	{
	553	struct sem_undo *suptr;
	554	int suidx;
	555	struct undo sueptr, suepptr, new_sueptr;
	556	int i;
	557
	558	/*
	559	* Look for and remember the sem_undo if the caller doesn't provide it
	560	*/
	561
	562	suidx = *supidx;
	563	if (suidx == -1) {
	564	for (suidx = semu_list_idx; suidx != -1;
	565	suidx = suptr->un_next_idx) {
	566	suptr = SEMU(suidx);
	567	if (suptr->un_proc == p) {
	568	*supidx = suidx;
	569	break;
	570	}
	571	}
	572	if (suidx == -1) {
	573	if (adjval == 0)
	574	return(0);
	575	suidx = semu_alloc(p);
	576	if (suidx == -1)
	577	return(ENOSPC);
	578	*supidx = suidx;
	579	}
	580	}
	581
	582	/*
	583	* Look for the requested entry and adjust it (delete if adjval becomes
	584	* 0).
	585	*/
	586	suptr = SEMU(suidx);
	587	new_sueptr = NULL;
	588	for (i = 0, suepptr = &suptr->un_ent, sueptr = suptr->un_ent;
	589	i < suptr->un_cnt;
	590	i++, suepptr = &sueptr->une_next, sueptr = sueptr->une_next) {
	591	if (sueptr->une_id != semid \|\| sueptr->une_num != semnum)
	592	continue;
	593	if (adjval == 0)
	594	sueptr->une_adjval = 0;
	595	else
	596	sueptr->une_adjval += adjval;
	597	if (sueptr->une_adjval == 0) {
	598	suptr->un_cnt--;
	599	*suepptr = sueptr->une_next;
	600	FREE(sueptr, M_SYSVSEM);
	601	sueptr = NULL;
	602	}
	603	return 0;
	604	}
	605
	606	/* Didn't find the right entry - create it */
	607	if (adjval == 0) {
	608	/* no adjustment: no need for a new entry */
	609	return 0;
	610	}
	611
	612	if (suptr->un_cnt == limitseminfo.semume) {
	613	/* reached the limit number of semaphore undo entries */
	614	return EINVAL;
	615	}
	616
	617	/* allocate a new semaphore undo entry */
	618	MALLOC(new_sueptr, struct undo *, sizeof (struct undo),
	619	M_SYSVSEM, M_WAITOK);
	620	if (new_sueptr == NULL) {
	621	return ENOMEM;
	622	}
	623
	624	/* fill in the new semaphore undo entry */
	625	new_sueptr->une_next = suptr->un_ent;
	626	suptr->un_ent = new_sueptr;
	627	suptr->un_cnt++;
	628	new_sueptr->une_adjval = adjval;
	629	new_sueptr->une_id = semid;
	630	new_sueptr->une_num = semnum;
	631
	632	return 0;
	633	}
	634
	635	/* Assumes we already hold the subsystem lock.
	636	*/
	637	static void
	638	semundo_clear(int semid, int semnum)
	639	{
	640	struct sem_undo *suptr;
	641	int suidx;
	642
	643	for (suidx = semu_list_idx; suidx != -1; suidx = suptr->un_next_idx) {
	644	struct undo *sueptr;
	645	struct undo **suepptr;
	646	int i = 0;
	647
	648	suptr = SEMU(suidx);
	649	sueptr = suptr->un_ent;
	650	suepptr = &suptr->un_ent;
	651	while (i < suptr->un_cnt) {
	652	if (sueptr->une_id == semid) {
	653	if (semnum == -1 \|\| sueptr->une_num == semnum) {
	654	suptr->un_cnt--;
	655	*suepptr = sueptr->une_next;
	656	FREE(sueptr, M_SYSVSEM);
	657	sueptr = *suepptr;
	658	continue;
	659	}
	660	if (semnum != -1)
	661	break;
	662	}
	663	i++;
	664	suepptr = &sueptr->une_next;
	665	sueptr = sueptr->une_next;
	666	}
	667	}
	668	}
	669
	670	/*
	671	* Note that the user-mode half of this passes a union coerced to a
	672	* user_addr_t. The union contains either an int or a pointer, and
	673	* so we have to coerce it back, variant on whether the calling
	674	* process is 64 bit or not. The coercion works for the 'val' element
	675	* because the alignment is the same in user and kernel space.
	676	*/
	677	int
	678	semctl(struct proc p, struct semctl_args uap, int32_t *retval)
	679	{
	680	int semid = uap->semid;
	681	int semnum = uap->semnum;
	682	int cmd = uap->cmd;
	683	user_semun_t user_arg = (user_semun_t)uap->arg;
	684	kauth_cred_t cred = kauth_cred_get();
	685	int i, rval, eval;
	686	struct user_semid_ds sbuf;
	687	struct semid_kernel *semakptr;
	688
	689
	690	AUDIT_ARG(svipc_cmd, cmd);
	691	AUDIT_ARG(svipc_id, semid);
	692
	693	SYSV_SEM_SUBSYS_LOCK();
	694
	695	#ifdef SEM_DEBUG
	696	printf("call to semctl(%d, %d, %d, 0x%qx)\n", semid, semnum, cmd, user_arg);
	697	#endif
	698
	699	semid = IPCID_TO_IX(semid);
	700
	701	if (semid < 0 \|\| semid >= seminfo.semmni) {
	702	#ifdef SEM_DEBUG
	703	printf("Invalid semid\n");
	704	#endif
	705	eval = EINVAL;
	706	goto semctlout;
	707	}
	708
	709	semakptr = &sema[semid];
	710	if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 \|\|
	711	semakptr->u.sem_perm._seq != IPCID_TO_SEQ(uap->semid)) {
	712	eval = EINVAL;
	713	goto semctlout;
	714	}
	715	#if CONFIG_MACF
	716	eval = mac_sysvsem_check_semctl(cred, semakptr, cmd);
	717	if (eval)
	718	goto semctlout;
	719	#endif
	720
	721	eval = 0;
	722	rval = 0;
	723
	724	switch (cmd) {
	725	case IPC_RMID:
	726	if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_M)))
	727	goto semctlout;
	728
	729	semakptr->u.sem_perm.cuid = kauth_cred_getuid(cred);
	730	semakptr->u.sem_perm.uid = kauth_cred_getuid(cred);
	731	semtot -= semakptr->u.sem_nsems;
	732	for (i = semakptr->u.sem_base - sem_pool; i < semtot; i++)
	733	sem_pool[i] = sem_pool[i + semakptr->u.sem_nsems];
	734	for (i = 0; i < seminfo.semmni; i++) {
	735	if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
	736	sema[i].u.sem_base > semakptr->u.sem_base)
	737	sema[i].u.sem_base -= semakptr->u.sem_nsems;
	738	}
	739	semakptr->u.sem_perm.mode = 0;
	740	#if CONFIG_MACF
	741	mac_sysvsem_label_recycle(semakptr);
	742	#endif
	743	semundo_clear(semid, -1);
	744	wakeup((caddr_t)semakptr);
	745	break;
	746
	747	case IPC_SET:
	748	if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_M)))
	749	goto semctlout;
	750
	751	if (IS_64BIT_PROCESS(p)) {
	752	struct user64_semid_ds ds64;
	753	eval = copyin(user_arg.buf, &ds64, sizeof(ds64));
	754	semid_ds_64tokernel(&ds64, &sbuf);
	755	} else {
	756	struct user32_semid_ds ds32;
	757	eval = copyin(user_arg.buf, &ds32, sizeof(ds32));
	758	semid_ds_32tokernel(&ds32, &sbuf);
	759	}
	760
	761	if (eval != 0) {
	762	goto semctlout;
	763	}
	764
	765	semakptr->u.sem_perm.uid = sbuf.sem_perm.uid;
	766	semakptr->u.sem_perm.gid = sbuf.sem_perm.gid;
	767	semakptr->u.sem_perm.mode = (semakptr->u.sem_perm.mode &
	768	~0777) \| (sbuf.sem_perm.mode & 0777);
	769	semakptr->u.sem_ctime = sysv_semtime();
	770	break;
	771
	772	case IPC_STAT:
	773	if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_R)))
	774	goto semctlout;
	775
	776	if (IS_64BIT_PROCESS(p)) {
	777	struct user64_semid_ds semid_ds64;
	778	bzero(&semid_ds64, sizeof(semid_ds64));
	779	semid_ds_kernelto64(&semakptr->u, &semid_ds64);
	780	eval = copyout(&semid_ds64, user_arg.buf, sizeof(semid_ds64));
	781	} else {
	782	struct user32_semid_ds semid_ds32;
	783	bzero(&semid_ds32, sizeof(semid_ds32));
	784	semid_ds_kernelto32(&semakptr->u, &semid_ds32);
	785	eval = copyout(&semid_ds32, user_arg.buf, sizeof(semid_ds32));
	786	}
	787	break;
	788
	789	case GETNCNT:
	790	if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_R)))
	791	goto semctlout;
	792	if (semnum < 0 \|\| semnum >= semakptr->u.sem_nsems) {
	793	eval = EINVAL;
	794	goto semctlout;
	795	}
	796	rval = semakptr->u.sem_base[semnum].semncnt;
	797	break;
	798
	799	case GETPID:
	800	if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_R)))
	801	goto semctlout;
	802	if (semnum < 0 \|\| semnum >= semakptr->u.sem_nsems) {
	803	eval = EINVAL;
	804	goto semctlout;
	805	}
	806	rval = semakptr->u.sem_base[semnum].sempid;
	807	break;
	808
	809	case GETVAL:
	810	if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_R)))
	811	goto semctlout;
	812	if (semnum < 0 \|\| semnum >= semakptr->u.sem_nsems) {
	813	eval = EINVAL;
	814	goto semctlout;
	815	}
	816	rval = semakptr->u.sem_base[semnum].semval;
	817	break;
	818
	819	case GETALL:
	820	if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_R)))
	821	goto semctlout;
	822	/* XXXXXXXXXXXXXXXX TBD XXXXXXXXXXXXXXXX */
	823	for (i = 0; i < semakptr->u.sem_nsems; i++) {
	824	/* XXX could be done in one go... */
	825	eval = copyout((caddr_t)&semakptr->u.sem_base[i].semval,
	826	user_arg.array + (i * sizeof(unsigned short)),
	827	sizeof(unsigned short));
	828	if (eval != 0)
	829	break;
	830	}
	831	break;
	832
	833	case GETZCNT:
	834	if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_R)))
	835	goto semctlout;
	836	if (semnum < 0 \|\| semnum >= semakptr->u.sem_nsems) {
	837	eval = EINVAL;
	838	goto semctlout;
	839	}
	840	rval = semakptr->u.sem_base[semnum].semzcnt;
	841	break;
	842
	843	case SETVAL:
	844	if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_W)))
	845	{
	846	#ifdef SEM_DEBUG
	847	printf("Invalid credentials for write\n");
	848	#endif
	849	goto semctlout;
	850	}
	851	if (semnum < 0 \|\| semnum >= semakptr->u.sem_nsems)
	852	{
	853	#ifdef SEM_DEBUG
	854	printf("Invalid number out of range for set\n");
	855	#endif
	856	eval = EINVAL;
	857	goto semctlout;
	858	}
	859
	860	/*
	861	* Cast down a pointer instead of using 'val' member directly
	862	* to avoid introducing endieness and a pad field into the
	863	* header file. Ugly, but it works.
	864	*/
	865	u_int newsemval = CAST_DOWN_EXPLICIT(u_int, user_arg.buf);
	866
	867	/*
	868	* The check is being performed as unsigned values to match
	869	* eventual destination
	870	*/
	871	if (newsemval > (u_int)seminfo.semvmx)
	872	{
	873	#ifdef SEM_DEBUG
	874	printf("Out of range sem value for set\n");
	875	#endif
	876	eval = ERANGE;
	877	goto semctlout;
	878	}
	879	semakptr->u.sem_base[semnum].semval = newsemval;
	880	semakptr->u.sem_base[semnum].sempid = p->p_pid;
	881	/* XXX scottl Should there be a MAC call here? */
	882	semundo_clear(semid, semnum);
	883	wakeup((caddr_t)semakptr);
	884	break;
	885
	886	case SETALL:
	887	if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_W)))
	888	goto semctlout;
	889	/* XXXXXXXXXXXX TBD ******/
	890	for (i = 0; i < semakptr->u.sem_nsems; i++) {
	891	/* XXX could be done in one go... */
	892	eval = copyin(user_arg.array + (i * sizeof(unsigned short)),
	893	(caddr_t)&semakptr->u.sem_base[i].semval,
	894	sizeof(unsigned short));
	895	if (eval != 0)
	896	break;
	897	semakptr->u.sem_base[i].sempid = p->p_pid;
	898	}
	899	/* XXX scottl Should there be a MAC call here? */
	900	semundo_clear(semid, -1);
	901	wakeup((caddr_t)semakptr);
	902	break;
	903
	904	default:
	905	eval = EINVAL;
	906	goto semctlout;
	907	}
	908
	909	if (eval == 0)
	910	*retval = rval;
	911	semctlout:
	912	SYSV_SEM_SUBSYS_UNLOCK();
	913	return(eval);
	914	}
	915
	916	int
	917	semget(__unused struct proc p, struct semget_args uap, int32_t *retval)
	918	{
	919	int semid, eval;
	920	int key = uap->key;
	921	int nsems = uap->nsems;
	922	int semflg = uap->semflg;
	923	kauth_cred_t cred = kauth_cred_get();
	924
	925	#ifdef SEM_DEBUG
	926	if (key != IPC_PRIVATE)
	927	printf("semget(0x%x, %d, 0%o)\n", key, nsems, semflg);
	928	else
	929	printf("semget(IPC_PRIVATE, %d, 0%o)\n", nsems, semflg);
	930	#endif
	931
	932
	933	SYSV_SEM_SUBSYS_LOCK();
	934
	935
	936	if (key != IPC_PRIVATE) {
	937	for (semid = 0; semid < seminfo.semmni; semid++) {
	938	if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) &&
	939	sema[semid].u.sem_perm._key == key)
	940	break;
	941	}
	942	if (semid < seminfo.semmni) {
	943	#ifdef SEM_DEBUG
	944	printf("found public key\n");
	945	#endif
	946	if ((eval = ipcperm(cred, &sema[semid].u.sem_perm,
	947	semflg & 0700)))
	948	goto semgetout;
	949	if (nsems < 0 \|\| sema[semid].u.sem_nsems < nsems) {
	950	#ifdef SEM_DEBUG
	951	printf("too small\n");
	952	#endif
	953	eval = EINVAL;
	954	goto semgetout;
	955	}
	956	if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) {
	957	#ifdef SEM_DEBUG
	958	printf("not exclusive\n");
	959	#endif
	960	eval = EEXIST;
	961	goto semgetout;
	962	}
	963	#if CONFIG_MACF
	964	eval = mac_sysvsem_check_semget(cred, &sema[semid]);
	965	if (eval)
	966	goto semgetout;
	967	#endif
	968	goto found;
	969	}
	970	}
	971
	972	#ifdef SEM_DEBUG
	973	printf("need to allocate an id for the request\n");
	974	#endif
	975	if (key == IPC_PRIVATE \|\| (semflg & IPC_CREAT)) {
	976	if (nsems <= 0 \|\| nsems > limitseminfo.semmsl) {
	977	#ifdef SEM_DEBUG
	978	printf("nsems out of range (0<%d<=%d)\n", nsems,
	979	seminfo.semmsl);
	980	#endif
	981	eval = EINVAL;
	982	goto semgetout;
	983	}
	984	if (nsems > seminfo.semmns - semtot) {
	985	#ifdef SEM_DEBUG
	986	printf("not enough semaphores left (need %d, got %d)\n",
	987	nsems, seminfo.semmns - semtot);
	988	#endif
	989	if (!grow_sem_pool(semtot + nsems)) {
	990	#ifdef SEM_DEBUG
	991	printf("failed to grow the sem array\n");
	992	#endif
	993	eval = ENOSPC;
	994	goto semgetout;
	995	}
	996	}
	997	for (semid = 0; semid < seminfo.semmni; semid++) {
	998	if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) == 0)
	999	break;
	1000	}
	1001	if (semid == seminfo.semmni) {
	1002	#ifdef SEM_DEBUG
	1003	printf("no more id's available\n");
	1004	#endif
	1005	if (!grow_sema_array(seminfo.semmni + 1))
	1006	{
	1007	#ifdef SEM_DEBUG
	1008	printf("failed to grow sema array\n");
	1009	#endif
	1010	eval = ENOSPC;
	1011	goto semgetout;
	1012	}
	1013	}
	1014	#ifdef SEM_DEBUG
	1015	printf("semid %d is available\n", semid);
	1016	#endif
	1017	sema[semid].u.sem_perm._key = key;
	1018	sema[semid].u.sem_perm.cuid = kauth_cred_getuid(cred);
	1019	sema[semid].u.sem_perm.uid = kauth_cred_getuid(cred);
	1020	sema[semid].u.sem_perm.cgid = kauth_cred_getgid(cred);
	1021	sema[semid].u.sem_perm.gid = kauth_cred_getgid(cred);
	1022	sema[semid].u.sem_perm.mode = (semflg & 0777) \| SEM_ALLOC;
	1023	sema[semid].u.sem_perm._seq =
	1024	(sema[semid].u.sem_perm._seq + 1) & 0x7fff;
	1025	sema[semid].u.sem_nsems = nsems;
	1026	sema[semid].u.sem_otime = 0;
	1027	sema[semid].u.sem_ctime = sysv_semtime();
	1028	sema[semid].u.sem_base = &sem_pool[semtot];
	1029	semtot += nsems;
	1030	bzero(sema[semid].u.sem_base,
	1031	sizeof(sema[semid].u.sem_base[0])*nsems);
	1032	#if CONFIG_MACF
	1033	mac_sysvsem_label_associate(cred, &sema[semid]);
	1034	#endif
	1035	#ifdef SEM_DEBUG
	1036	printf("sembase = 0x%x, next = 0x%x\n", sema[semid].u.sem_base,
	1037	&sem_pool[semtot]);
	1038	#endif
	1039	} else {
	1040	#ifdef SEM_DEBUG
	1041	printf("didn't find it and wasn't asked to create it\n");
	1042	#endif
	1043	eval = ENOENT;
	1044	goto semgetout;
	1045	}
	1046
	1047	found:
	1048	*retval = IXSEQ_TO_IPCID(semid, sema[semid].u.sem_perm);
	1049	AUDIT_ARG(svipc_id, *retval);
	1050	#ifdef SEM_DEBUG
	1051	printf("semget is done, returning %d\n", *retval);
	1052	#endif
	1053	eval = 0;
	1054
	1055	semgetout:
	1056	SYSV_SEM_SUBSYS_UNLOCK();
	1057	return(eval);
	1058	}
	1059
	1060	int
	1061	semop(struct proc p, struct semop_args uap, int32_t *retval)
	1062	{
	1063	int semid = uap->semid;
	1064	int nsops = uap->nsops;
	1065	struct sembuf sops[seminfo.semopm];
	1066	struct semid_kernel *semakptr;
	1067	struct sembuf sopptr = NULL; / protected by 'semptr' */
	1068	struct sem semptr = NULL; / protected by 'if' */
	1069	int supidx = -1;
	1070	int i, j, eval;
	1071	int do_wakeup, do_undos;
	1072
	1073	AUDIT_ARG(svipc_id, uap->semid);
	1074
	1075	SYSV_SEM_SUBSYS_LOCK();
	1076
	1077	#ifdef SEM_DEBUG
	1078	printf("call to semop(%d, 0x%x, %d)\n", semid, sops, nsops);
	1079	#endif
	1080
	1081	semid = IPCID_TO_IX(semid); /* Convert back to zero origin */
	1082
	1083	if (semid < 0 \|\| semid >= seminfo.semmni) {
	1084	eval = EINVAL;
	1085	goto semopout;
	1086	}
	1087
	1088	semakptr = &sema[semid];
	1089	if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0) {
	1090	eval = EINVAL;
	1091	goto semopout;
	1092	}
	1093	if (semakptr->u.sem_perm._seq != IPCID_TO_SEQ(uap->semid)) {
	1094	eval = EINVAL;
	1095	goto semopout;
	1096	}
	1097
	1098	if ((eval = ipcperm(kauth_cred_get(), &semakptr->u.sem_perm, IPC_W))) {
	1099	#ifdef SEM_DEBUG
	1100	printf("eval = %d from ipaccess\n", eval);
	1101	#endif
	1102	goto semopout;
	1103	}
	1104
	1105	if (nsops < 0 \|\| nsops > seminfo.semopm) {
	1106	#ifdef SEM_DEBUG
	1107	printf("too many sops (max=%d, nsops=%d)\n",
	1108	seminfo.semopm, nsops);
	1109	#endif
	1110	eval = E2BIG;
	1111	goto semopout;
	1112	}
	1113
	1114	/* OK for LP64, since sizeof(struct sembuf) is currently invariant */
	1115	if ((eval = copyin(uap->sops, &sops, nsops * sizeof(struct sembuf))) != 0) {
	1116	#ifdef SEM_DEBUG
	1117	printf("eval = %d from copyin(%08x, %08x, %ld)\n", eval,
	1118	uap->sops, &sops, nsops * sizeof(struct sembuf));
	1119	#endif
	1120	goto semopout;
	1121	}
	1122
	1123	#if CONFIG_MACF
	1124	/*
	1125	* Initial pass thru sops to see what permissions are needed.
	1126	*/
	1127	j = 0; /* permission needed */
	1128	for (i = 0; i < nsops; i++)
	1129	j \|= (sops[i].sem_op == 0) ? SEM_R : SEM_A;
	1130
	1131	/*
	1132	* The MAC hook checks whether the thread has read (and possibly
	1133	* write) permissions to the semaphore array based on the
	1134	* sopptr->sem_op value.
	1135	*/
	1136	eval = mac_sysvsem_check_semop(kauth_cred_get(), semakptr, j);
	1137	if (eval)
	1138	goto semopout;
	1139	#endif
	1140
	1141	/*
	1142	* Loop trying to satisfy the vector of requests.
	1143	* If we reach a point where we must wait, any requests already
	1144	* performed are rolled back and we go to sleep until some other
	1145	* process wakes us up. At this point, we start all over again.
	1146	*
	1147	* This ensures that from the perspective of other tasks, a set
	1148	* of requests is atomic (never partially satisfied).
	1149	*/
	1150	do_undos = 0;
	1151
	1152	for (;;) {
	1153	do_wakeup = 0;
	1154
	1155	for (i = 0; i < nsops; i++) {
	1156	sopptr = &sops[i];
	1157
	1158	if (sopptr->sem_num >= semakptr->u.sem_nsems) {
	1159	eval = EFBIG;
	1160	goto semopout;
	1161	}
	1162
	1163	semptr = &semakptr->u.sem_base[sopptr->sem_num];
	1164
	1165	#ifdef SEM_DEBUG
	1166	printf("semop: semakptr=%x, sem_base=%x, semptr=%x, sem[%d]=%d : op=%d, flag=%s\n",
	1167	semakptr, semakptr->u.sem_base, semptr,
	1168	sopptr->sem_num, semptr->semval, sopptr->sem_op,
	1169	(sopptr->sem_flg & IPC_NOWAIT) ? "nowait" : "wait");
	1170	#endif
	1171
	1172	if (sopptr->sem_op < 0) {
	1173	if (semptr->semval + sopptr->sem_op < 0) {
	1174	#ifdef SEM_DEBUG
	1175	printf("semop: can't do it now\n");
	1176	#endif
	1177	break;
	1178	} else {
	1179	semptr->semval += sopptr->sem_op;
	1180	if (semptr->semval == 0 &&
	1181	semptr->semzcnt > 0)
	1182	do_wakeup = 1;
	1183	}
	1184	if (sopptr->sem_flg & SEM_UNDO)
	1185	do_undos = 1;
	1186	} else if (sopptr->sem_op == 0) {
	1187	if (semptr->semval > 0) {
	1188	#ifdef SEM_DEBUG
	1189	printf("semop: not zero now\n");
	1190	#endif
	1191	break;
	1192	}
	1193	} else {
	1194	if (semptr->semncnt > 0)
	1195	do_wakeup = 1;
	1196	semptr->semval += sopptr->sem_op;
	1197	if (sopptr->sem_flg & SEM_UNDO)
	1198	do_undos = 1;
	1199	}
	1200	}
	1201
	1202	/*
	1203	* Did we get through the entire vector?
	1204	*/
	1205	if (i >= nsops)
	1206	goto done;
	1207
	1208	/*
	1209	* No ... rollback anything that we've already done
	1210	*/
	1211	#ifdef SEM_DEBUG
	1212	printf("semop: rollback 0 through %d\n", i-1);
	1213	#endif
	1214	for (j = 0; j < i; j++)
	1215	semakptr->u.sem_base[sops[j].sem_num].semval -=
	1216	sops[j].sem_op;
	1217
	1218	/*
	1219	* If the request that we couldn't satisfy has the
	1220	* NOWAIT flag set then return with EAGAIN.
	1221	*/
	1222	if (sopptr->sem_flg & IPC_NOWAIT) {
	1223	eval = EAGAIN;
	1224	goto semopout;
	1225	}
	1226
	1227	if (sopptr->sem_op == 0)
	1228	semptr->semzcnt++;
	1229	else
	1230	semptr->semncnt++;
	1231
	1232	#ifdef SEM_DEBUG
	1233	printf("semop: good night!\n");
	1234	#endif
	1235	/* Release our lock on the semaphore subsystem so
	1236	* another thread can get at the semaphore we are
	1237	* waiting for. We will get the lock back after we
	1238	* wake up.
	1239	*/
	1240	eval = msleep((caddr_t)semakptr, &sysv_sem_subsys_mutex , (PZERO - 4) \| PCATCH,
	1241	"semwait", 0);
	1242
	1243	#ifdef SEM_DEBUG
	1244	printf("semop: good morning (eval=%d)!\n", eval);
	1245	#endif
	1246	if (eval != 0) {
	1247	eval = EINTR;
	1248	}
	1249
	1250	/*
	1251	* IMPORTANT: while we were asleep, the semaphore array might
	1252	* have been reallocated somewhere else (see grow_sema_array()).
	1253	* When we wake up, we have to re-lookup the semaphore
	1254	* structures and re-validate them.
	1255	*/
	1256
	1257	semptr = NULL;
	1258
	1259	/*
	1260	* Make sure that the semaphore still exists
	1261	*
	1262	* XXX POSIX: Third test this 'if' and 'EINTR' precedence may
	1263	* fail testing; if so, we will need to revert this code.
	1264	*/
	1265	semakptr = &sema[semid]; /* sema may have been reallocated */
	1266	if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 \|\|
	1267	semakptr->u.sem_perm._seq != IPCID_TO_SEQ(uap->semid) \|\|
	1268	sopptr->sem_num >= semakptr->u.sem_nsems) {
	1269	/* The man page says to return EIDRM. */
	1270	/* Unfortunately, BSD doesn't define that code! */
	1271	if (eval == EINTR) {
	1272	/*
	1273	* EINTR takes precedence over the fact that
	1274	* the semaphore disappeared while we were
	1275	* sleeping...
	1276	*/
	1277	} else {
	1278	#ifdef EIDRM
	1279	eval = EIDRM;
	1280	#else
	1281	eval = EINVAL; /* Ancient past */
	1282	#endif
	1283	}
	1284	goto semopout;
	1285	}
	1286
	1287	/*
	1288	* The semaphore is still alive. Readjust the count of
	1289	* waiting processes. semptr needs to be recomputed
	1290	* because the sem[] may have been reallocated while
	1291	* we were sleeping, updating our sem_base pointer.
	1292	*/
	1293	semptr = &semakptr->u.sem_base[sopptr->sem_num];
	1294	if (sopptr->sem_op == 0)
	1295	semptr->semzcnt--;
	1296	else
	1297	semptr->semncnt--;
	1298
	1299	if (eval != 0) { /* EINTR */
	1300	goto semopout;
	1301	}
	1302	}
	1303
	1304	done:
	1305	/*
	1306	* Process any SEM_UNDO requests.
	1307	*/
	1308	if (do_undos) {
	1309	for (i = 0; i < nsops; i++) {
	1310	/*
	1311	* We only need to deal with SEM_UNDO's for non-zero
	1312	* op's.
	1313	*/
	1314	int adjval;
	1315
	1316	if ((sops[i].sem_flg & SEM_UNDO) == 0)
	1317	continue;
	1318	adjval = sops[i].sem_op;
	1319	if (adjval == 0)
	1320	continue;
	1321	eval = semundo_adjust(p, &supidx, semid,
	1322	sops[i].sem_num, -adjval);
	1323	if (eval == 0)
	1324	continue;
	1325
	1326	/*
	1327	* Oh-Oh! We ran out of either sem_undo's or undo's.
	1328	* Rollback the adjustments to this point and then
	1329	* rollback the semaphore ups and down so we can return
	1330	* with an error with all structures restored. We
	1331	* rollback the undo's in the exact reverse order that
	1332	* we applied them. This guarantees that we won't run
	1333	* out of space as we roll things back out.
	1334	*/
	1335	for (j = i - 1; j >= 0; j--) {
	1336	if ((sops[j].sem_flg & SEM_UNDO) == 0)
	1337	continue;
	1338	adjval = sops[j].sem_op;
	1339	if (adjval == 0)
	1340	continue;
	1341	if (semundo_adjust(p, &supidx, semid,
	1342	sops[j].sem_num, adjval) != 0)
	1343	panic("semop - can't undo undos");
	1344	}
	1345
	1346	for (j = 0; j < nsops; j++)
	1347	semakptr->u.sem_base[sops[j].sem_num].semval -=
	1348	sops[j].sem_op;
	1349
	1350	#ifdef SEM_DEBUG
	1351	printf("eval = %d from semundo_adjust\n", eval);
	1352	#endif
	1353	goto semopout;
	1354	} /* loop through the sops */
	1355	} /* if (do_undos) */
	1356
	1357	/* We're definitely done - set the sempid's */
	1358	for (i = 0; i < nsops; i++) {
	1359	sopptr = &sops[i];
	1360	semptr = &semakptr->u.sem_base[sopptr->sem_num];
	1361	semptr->sempid = p->p_pid;
	1362	}
	1363	semakptr->u.sem_otime = sysv_semtime();
	1364
	1365	if (do_wakeup) {
	1366	#ifdef SEM_DEBUG
	1367	printf("semop: doing wakeup\n");
	1368	#ifdef SEM_WAKEUP
	1369	sem_wakeup((caddr_t)semakptr);
	1370	#else
	1371	wakeup((caddr_t)semakptr);
	1372	#endif
	1373	printf("semop: back from wakeup\n");
	1374	#else
	1375	wakeup((caddr_t)semakptr);
	1376	#endif
	1377	}
	1378	#ifdef SEM_DEBUG
	1379	printf("semop: done\n");
	1380	#endif
	1381	*retval = 0;
	1382	eval = 0;
	1383	semopout:
	1384	SYSV_SEM_SUBSYS_UNLOCK();
	1385	return(eval);
	1386	}
	1387
	1388	/*
	1389	* Go through the undo structures for this process and apply the adjustments to
	1390	* semaphores.
	1391	*/
	1392	void
	1393	semexit(struct proc *p)
	1394	{
	1395	struct sem_undo *suptr = NULL;
	1396	int suidx;
	1397	int *supidx;
	1398	int did_something;
	1399
	1400	/* If we have not allocated our semaphores yet there can't be
	1401	* anything to undo, but we need the lock to prevent
	1402	* dynamic memory race conditions.
	1403	*/
	1404	SYSV_SEM_SUBSYS_LOCK();
	1405
	1406	if (!sem_pool)
	1407	{
	1408	SYSV_SEM_SUBSYS_UNLOCK();
	1409	return;
	1410	}
	1411	did_something = 0;
	1412
	1413	/*
	1414	* Go through the chain of undo vectors looking for one
	1415	* associated with this process.
	1416	*/
	1417
	1418	for (supidx = &semu_list_idx; (suidx = *supidx) != -1;
	1419	supidx = &suptr->un_next_idx) {
	1420	suptr = SEMU(suidx);
	1421	if (suptr->un_proc == p)
	1422	break;
	1423	}
	1424
	1425	if (suidx == -1)
	1426	goto unlock;
	1427
	1428	#ifdef SEM_DEBUG
	1429	printf("proc @%08x has undo structure with %d entries\n", p,
	1430	suptr->un_cnt);
	1431	#endif
	1432
	1433	/*
	1434	* If there are any active undo elements then process them.
	1435	*/
	1436	if (suptr->un_cnt > 0) {
	1437	while (suptr->un_ent != NULL) {
	1438	struct undo *sueptr;
	1439	int semid;
	1440	int semnum;
	1441	int adjval;
	1442	struct semid_kernel *semakptr;
	1443
	1444	sueptr = suptr->un_ent;
	1445	semid = sueptr->une_id;
	1446	semnum = sueptr->une_num;
	1447	adjval = sueptr->une_adjval;
	1448
	1449	semakptr = &sema[semid];
	1450	if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0)
	1451	panic("semexit - semid not allocated");
	1452	if (semnum >= semakptr->u.sem_nsems)
	1453	panic("semexit - semnum out of range");
	1454
	1455	#ifdef SEM_DEBUG
	1456	printf("semexit: %08x id=%d num=%d(adj=%d) ; sem=%d\n",
	1457	suptr->un_proc,
	1458	semid,
	1459	semnum,
	1460	adjval,
	1461	semakptr->u.sem_base[semnum].semval);
	1462	#endif
	1463
	1464	if (adjval < 0) {
	1465	if (semakptr->u.sem_base[semnum].semval < -adjval)
	1466	semakptr->u.sem_base[semnum].semval = 0;
	1467	else
	1468	semakptr->u.sem_base[semnum].semval +=
	1469	adjval;
	1470	} else
	1471	semakptr->u.sem_base[semnum].semval += adjval;
	1472
	1473	/* Maybe we should build a list of semakptr's to wake
	1474	* up, finish all access to data structures, release the
	1475	* subsystem lock, and wake all the processes. Something
	1476	* to think about.
	1477	*/
	1478	#ifdef SEM_WAKEUP
	1479	sem_wakeup((caddr_t)semakptr);
	1480	#else
	1481	wakeup((caddr_t)semakptr);
	1482	#endif
	1483	#ifdef SEM_DEBUG
	1484	printf("semexit: back from wakeup\n");
	1485	#endif
	1486	suptr->un_cnt--;
	1487	suptr->un_ent = sueptr->une_next;
	1488	FREE(sueptr, M_SYSVSEM);
	1489	sueptr = NULL;
	1490	}
	1491	}
	1492
	1493	/*
	1494	* Deallocate the undo vector.
	1495	*/
	1496	#ifdef SEM_DEBUG
	1497	printf("removing vector\n");
	1498	#endif
	1499	suptr->un_proc = NULL;
	1500	*supidx = suptr->un_next_idx;
	1501
	1502	unlock:
	1503	/*
	1504	* There is a semaphore leak (i.e. memory leak) in this code.
	1505	* We should be deleting the IPC_PRIVATE semaphores when they are
	1506	* no longer needed, and we dont. We would have to track which processes
	1507	* know about which IPC_PRIVATE semaphores, updating the list after
	1508	* every fork. We can't just delete them semaphore when the process
	1509	* that created it dies, because that process may well have forked
	1510	* some children. So we need to wait until all of it's children have
	1511	* died, and so on. Maybe we should tag each IPC_PRIVATE sempahore
	1512	* with the creating group ID, count the number of processes left in
	1513	* that group, and delete the semaphore when the group is gone.
	1514	* Until that code gets implemented we will leak IPC_PRIVATE semaphores.
	1515	* There is an upper bound on the size of our semaphore array, so
	1516	* leaking the semaphores should not work as a DOS attack.
	1517	*
	1518	* Please note that the original BSD code this file is based on had the
	1519	* same leaky semaphore problem.
	1520	*/
	1521
	1522	SYSV_SEM_SUBSYS_UNLOCK();
	1523	}
	1524
	1525
	1526	/* (struct sysctl_oid oidp, void arg1, int arg2, \
	1527	struct sysctl_req req) /
	1528	static int
	1529	sysctl_seminfo(__unused struct sysctl_oid oidp, void arg1,
	1530	__unused int arg2, struct sysctl_req *req)
	1531	{
	1532	int error = 0;
	1533
	1534	error = SYSCTL_OUT(req, arg1, sizeof(int));
	1535	if (error \|\| req->newptr == USER_ADDR_NULL)
	1536	return(error);
	1537
	1538	SYSV_SEM_SUBSYS_LOCK();
	1539
	1540	/* Set the values only if shared memory is not initialised */
	1541	if ((sem_pool == NULL) &&
	1542	(sema == NULL) &&
	1543	(semu == NULL) &&
	1544	(semu_list_idx == -1)) {
	1545	if ((error = SYSCTL_IN(req, arg1, sizeof(int)))) {
	1546	goto out;
	1547	}
	1548	} else
	1549	error = EINVAL;
	1550	out:
	1551	SYSV_SEM_SUBSYS_UNLOCK();
	1552	return(error);
	1553
	1554	}
	1555
	1556	/* SYSCTL_NODE(_kern, KERN_SYSV, sysv, CTLFLAG_RW, 0, "SYSV"); */
	1557	extern struct sysctl_oid_list sysctl__kern_sysv_children;
	1558	SYSCTL_PROC(_kern_sysv, OID_AUTO, semmni, CTLTYPE_INT \| CTLFLAG_RW \| CTLFLAG_LOCKED,
	1559	&limitseminfo.semmni, 0, &sysctl_seminfo ,"I","semmni");
	1560
	1561	SYSCTL_PROC(_kern_sysv, OID_AUTO, semmns, CTLTYPE_INT \| CTLFLAG_RW \| CTLFLAG_LOCKED,
	1562	&limitseminfo.semmns, 0, &sysctl_seminfo ,"I","semmns");
	1563
	1564	SYSCTL_PROC(_kern_sysv, OID_AUTO, semmnu, CTLTYPE_INT \| CTLFLAG_RW \| CTLFLAG_LOCKED,
	1565	&limitseminfo.semmnu, 0, &sysctl_seminfo ,"I","semmnu");
	1566
	1567	SYSCTL_PROC(_kern_sysv, OID_AUTO, semmsl, CTLTYPE_INT \| CTLFLAG_RW \| CTLFLAG_LOCKED,
	1568	&limitseminfo.semmsl, 0, &sysctl_seminfo ,"I","semmsl");
	1569
	1570	SYSCTL_PROC(_kern_sysv, OID_AUTO, semume, CTLTYPE_INT \| CTLFLAG_RW \| CTLFLAG_LOCKED,
	1571	&limitseminfo.semume, 0, &sysctl_seminfo ,"I","semume");
	1572
	1573
	1574	static int
	1575	IPCS_sem_sysctl(__unused struct sysctl_oid oidp, __unused void arg1,
	1576	__unused int arg2, struct sysctl_req *req)
	1577	{
	1578	int error;
	1579	int cursor;
	1580	union {
	1581	struct user32_IPCS_command u32;
	1582	struct user_IPCS_command u64;
	1583	} ipcs;
	1584	struct user32_semid_ds semid_ds32; /* post conversion, 32 bit version */
	1585	struct user64_semid_ds semid_ds64; /* post conversion, 64 bit version */
	1586	void *semid_dsp;
	1587	size_t ipcs_sz;
	1588	size_t semid_ds_sz;
	1589	struct proc *p = current_proc();
	1590
	1591	if (IS_64BIT_PROCESS(p)) {
	1592	ipcs_sz = sizeof(struct user_IPCS_command);
	1593	semid_ds_sz = sizeof(struct user64_semid_ds);
	1594	} else {
	1595	ipcs_sz = sizeof(struct user32_IPCS_command);
	1596	semid_ds_sz = sizeof(struct user32_semid_ds);
	1597	}
	1598
	1599	/* Copy in the command structure */
	1600	if ((error = SYSCTL_IN(req, &ipcs, ipcs_sz)) != 0) {
	1601	return(error);
	1602	}
	1603
	1604	if (!IS_64BIT_PROCESS(p)) /* convert in place */
	1605	ipcs.u64.ipcs_data = CAST_USER_ADDR_T(ipcs.u32.ipcs_data);
	1606
	1607	/* Let us version this interface... */
	1608	if (ipcs.u64.ipcs_magic != IPCS_MAGIC) {
	1609	return(EINVAL);
	1610	}
	1611
	1612	SYSV_SEM_SUBSYS_LOCK();
	1613	switch(ipcs.u64.ipcs_op) {
	1614	case IPCS_SEM_CONF: /* Obtain global configuration data */
	1615	if (ipcs.u64.ipcs_datalen != sizeof(struct seminfo)) {
	1616	error = ERANGE;
	1617	break;
	1618	}
	1619	if (ipcs.u64.ipcs_cursor != 0) { /* fwd. compat. */
	1620	error = EINVAL;
	1621	break;
	1622	}
	1623	error = copyout(&seminfo, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen);
	1624	break;
	1625
	1626	case IPCS_SEM_ITER: /* Iterate over existing segments */
	1627	cursor = ipcs.u64.ipcs_cursor;
	1628	if (cursor < 0 \|\| cursor >= seminfo.semmni) {
	1629	error = ERANGE;
	1630	break;
	1631	}
	1632	if (ipcs.u64.ipcs_datalen != (int)semid_ds_sz ) {
	1633	error = EINVAL;
	1634	break;
	1635	}
	1636	for( ; cursor < seminfo.semmni; cursor++) {
	1637	if (sema[cursor].u.sem_perm.mode & SEM_ALLOC)
	1638	break;
	1639	continue;
	1640	}
	1641	if (cursor == seminfo.semmni) {
	1642	error = ENOENT;
	1643	break;
	1644	}
	1645
	1646	semid_dsp = &sema[cursor].u; /* default: 64 bit */
	1647
	1648	/*
	1649	* If necessary, convert the 64 bit kernel segment
	1650	* descriptor to a 32 bit user one.
	1651	*/
	1652	if (!IS_64BIT_PROCESS(p)) {
	1653	bzero(&semid_ds32, sizeof(semid_ds32));
	1654	semid_ds_kernelto32(semid_dsp, &semid_ds32);
	1655	semid_dsp = &semid_ds32;
	1656	} else {
	1657	bzero(&semid_ds64, sizeof(semid_ds64));
	1658	semid_ds_kernelto64(semid_dsp, &semid_ds64);
	1659	semid_dsp = &semid_ds64;
	1660	}
	1661
	1662	error = copyout(semid_dsp, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen);
	1663	if (!error) {
	1664	/* update cursor */
	1665	ipcs.u64.ipcs_cursor = cursor + 1;
	1666
	1667	if (!IS_64BIT_PROCESS(p)) /* convert in place */
	1668	ipcs.u32.ipcs_data = CAST_DOWN_EXPLICIT(user32_addr_t,ipcs.u64.ipcs_data);
	1669
	1670	error = SYSCTL_OUT(req, &ipcs, ipcs_sz);
	1671	}
	1672	break;
	1673
	1674	default:
	1675	error = EINVAL;
	1676	break;
	1677	}
	1678	SYSV_SEM_SUBSYS_UNLOCK();
	1679	return(error);
	1680	}
	1681
	1682	SYSCTL_DECL(_kern_sysv_ipcs);
	1683	SYSCTL_PROC(_kern_sysv_ipcs, OID_AUTO, sem, CTLFLAG_RW \| CTLFLAG_ANYBODY \| CTLFLAG_LOCKED,
	1684	0, 0, IPCS_sem_sysctl,
	1685	"S,IPCS_sem_command",
	1686	"ipcs sem command interface");
	1687
	1688	#endif /* SYSV_SEM */