bsd/kern/sysv_sem.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*
  23  * Implementation of SVID semaphores
  24  *
  25  * Author:  Daniel Boulet
  26  *
  27  * This software is provided ``AS IS'' without any warranties of any kind.
  28  */
  29 /*
  30  * John Bellardo modified the implementation for Darwin. 12/2000
  31  */
  32
  33 #include <sys/param.h>
  34 #include <sys/systm.h>
  35 #include <sys/kernel.h>
  36 #include <sys/proc_internal.h>
  37 #include <sys/kauth.h>
  38 #include <sys/sem_internal.h>
  39 #include <sys/malloc.h>
  40 #include <mach/mach_types.h>
  41
  42 #include <sys/filedesc.h>
  43 #include <sys/file_internal.h>
  44 #include <sys/sysctl.h>
  45 #include <sys/ipcs.h>
  46 #include <sys/sysent.h>
  47 #include <sys/sysproto.h>
  48
  49 #include <bsm/audit_kernel.h>
  50
  51
  52 /* Uncomment this line to see the debugging output */
  53 /* #define SEM_DEBUG */
  54
  55 #define M_SYSVSEM       M_TEMP
  56
  57
  58 /* Hard system limits to avoid resource starvation / DOS attacks.
  59  * These are not needed if we can make the semaphore pages swappable.
  60  */
  61 static struct seminfo limitseminfo = {
  62         SEMMAP,        /* # of entries in semaphore map */
  63         SEMMNI,        /* # of semaphore identifiers */
  64         SEMMNS,        /* # of semaphores in system */
  65         SEMMNU,        /* # of undo structures in system */
  66         SEMMSL,        /* max # of semaphores per id */
  67         SEMOPM,        /* max # of operations per semop call */
  68         SEMUME,        /* max # of undo entries per process */
  69         SEMUSZ,        /* size in bytes of undo structure */
  70         SEMVMX,        /* semaphore maximum value */
  71         SEMAEM         /* adjust on exit max value */
  72 };
  73
  74 /* Current system allocations.  We use this structure to track how many
  75  * resources we have allocated so far.  This way we can set large hard limits
  76  * and not allocate the memory for them up front.
  77  */
  78 struct seminfo seminfo = {
  79         SEMMAP, /* Unused, # of entries in semaphore map */
  80         0,      /* # of semaphore identifiers */
  81         0,      /* # of semaphores in system */
  82         0,      /* # of undo entries in system */
  83         SEMMSL, /* max # of semaphores per id */
  84         SEMOPM, /* max # of operations per semop call */
  85         SEMUME, /* max # of undo entries per process */
  86         SEMUSZ, /* size in bytes of undo structure */
  87         SEMVMX, /* semaphore maximum value */
  88         SEMAEM  /* adjust on exit max value */
  89 };
  90
  91
  92 static struct sem_undo *semu_alloc(struct proc *p);
  93 static int semundo_adjust(struct proc *p, struct sem_undo **supptr,
  94                 int semid, int semnum, int adjval);
  95 static void semundo_clear(int semid, int semnum);
  96
  97 /* XXX casting to (sy_call_t *) is bogus, as usual. */
  98 static sy_call_t *semcalls[] = {
  99         (sy_call_t *)semctl, (sy_call_t *)semget,
 100         (sy_call_t *)semop
 101 };
 102
 103 static int              semtot = 0;             /* # of used semaphores */
 104 struct user_semid_ds    *sema = NULL;           /* semaphore id pool */
 105 struct sem              *sem_pool =  NULL;      /* semaphore pool */
 106 static struct sem_undo  *semu_list = NULL;      /* active undo structures */
 107 struct sem_undo         *semu = NULL;           /* semaphore undo pool */
 108
 109
 110 void sysv_sem_lock_init(void);
 111 static lck_grp_t       *sysv_sem_subsys_lck_grp;
 112 static lck_grp_attr_t  *sysv_sem_subsys_lck_grp_attr;
 113 static lck_attr_t      *sysv_sem_subsys_lck_attr;
 114 static lck_mtx_t        sysv_sem_subsys_mutex;
 115
 116 #define SYSV_SEM_SUBSYS_LOCK() lck_mtx_lock(&sysv_sem_subsys_mutex)
 117 #define SYSV_SEM_SUBSYS_UNLOCK() lck_mtx_unlock(&sysv_sem_subsys_mutex)
 118
 119
 120 __private_extern__ void
 121 sysv_sem_lock_init( void )
 122 {
 123
 124     sysv_sem_subsys_lck_grp_attr = lck_grp_attr_alloc_init();
 125     lck_grp_attr_setstat(sysv_sem_subsys_lck_grp_attr);
 126
 127     sysv_sem_subsys_lck_grp = lck_grp_alloc_init("sysv_shm_subsys_lock", sysv_sem_subsys_lck_grp_attr);
 128
 129     sysv_sem_subsys_lck_attr = lck_attr_alloc_init();
 130     lck_attr_setdebug(sysv_sem_subsys_lck_attr);
 131     lck_mtx_init(&sysv_sem_subsys_mutex, sysv_sem_subsys_lck_grp, sysv_sem_subsys_lck_attr);
 132 }
 133
 134 static __inline__ user_time_t
 135 sysv_semtime(void)
 136 {
 137         struct timeval  tv;
 138         microtime(&tv);
 139         return (tv.tv_sec);
 140 }
 141
 142 /*
 143  * XXX conversion of internal user_time_t to external tume_t loses
 144  * XXX precision; not an issue for us now, since we are only ever
 145  * XXX setting 32 bits worth of time into it.
 146  *
 147  * pad field contents are not moved correspondingly; contents will be lost
 148  *
 149  * NOTE: Source and target may *NOT* overlap! (target is smaller)
 150  */
 151 static void
 152 semid_ds_64to32(struct user_semid_ds *in, struct semid_ds *out)
 153 {
 154         out->sem_perm = in->sem_perm;
 155         out->sem_base = (__int32_t)in->sem_base;
 156         out->sem_nsems = in->sem_nsems;
 157         out->sem_otime = in->sem_otime;         /* XXX loses precision */
 158         out->sem_ctime = in->sem_ctime;         /* XXX loses precision */
 159 }
 160
 161 /*
 162  * pad field contents are not moved correspondingly; contents will be lost
 163  *
 164  * NOTE: Source and target may are permitted to overlap! (source is smaller);
 165  * this works because we copy fields in order from the end of the struct to
 166  * the beginning.
 167  *
 168  * XXX use CAST_USER_ADDR_T() for lack of a CAST_USER_TIME_T(); net effect
 169  * XXX is the same.
 170  */
 171 static void
 172 semid_ds_32to64(struct semid_ds *in, struct user_semid_ds *out)
 173 {
 174         out->sem_ctime = in->sem_ctime;
 175         out->sem_otime = in->sem_otime;
 176         out->sem_nsems = in->sem_nsems;
 177         out->sem_base = (void *)in->sem_base;
 178         out->sem_perm = in->sem_perm;
 179 }
 180
 181
 182 /*
 183  * Entry point for all SEM calls
 184  *
 185  * In Darwin this is no longer the entry point.  It will be removed after
 186  *  the code has been tested better.
 187  */
 188 /* XXX actually varargs. */
 189 int
 190 semsys(struct proc *p, struct semsys_args *uap, register_t *retval)
 191 {
 192
 193         /* The individual calls handling the locking now */
 194
 195         if (uap->which >= sizeof(semcalls)/sizeof(semcalls[0]))
 196                 return (EINVAL);
 197         return ((*semcalls[uap->which])(p, &uap->a2, retval));
 198 }
 199
 200 /*
 201  * Expand the semu array to the given capacity.  If the expansion fails
 202  * return 0, otherwise return 1.
 203  *
 204  * Assumes we already have the subsystem lock.
 205  */
 206 static int
 207 grow_semu_array(int newSize)
 208 {
 209         register int i;
 210         register struct sem_undo *newSemu;
 211
 212         if (newSize <= seminfo.semmnu)
 213                 return 1;
 214         if (newSize > limitseminfo.semmnu) /* enforce hard limit */
 215         {
 216 #ifdef SEM_DEBUG
 217                 printf("undo structure hard limit of %d reached, requested %d\n",
 218                         limitseminfo.semmnu, newSize);
 219 #endif
 220                 return 0;
 221         }
 222         newSize = (newSize/SEMMNU_INC + 1) * SEMMNU_INC;
 223         newSize = newSize > limitseminfo.semmnu ? limitseminfo.semmnu : newSize;
 224
 225 #ifdef SEM_DEBUG
 226         printf("growing semu[] from %d to %d\n", seminfo.semmnu, newSize);
 227 #endif
 228         MALLOC(newSemu, struct sem_undo *, sizeof (struct sem_undo) * newSize,
 229                M_SYSVSEM, M_WAITOK | M_ZERO);
 230         if (NULL == newSemu)
 231         {
 232 #ifdef SEM_DEBUG
 233                 printf("allocation failed.  no changes made.\n");
 234 #endif
 235                 return 0;
 236         }
 237
 238         /* copy the old data to the new array */
 239         for (i = 0; i < seminfo.semmnu; i++)
 240         {
 241                 newSemu[i] = semu[i];
 242         }
 243         /*
 244          * The new elements (from newSemu[i] to newSemu[newSize-1]) have their
 245          * "un_proc" set to 0 (i.e. NULL) by the M_ZERO flag to MALLOC() above,
 246          * so they're already marked as "not in use".
 247          */
 248
 249         /* Clean up the old array */
 250         if (semu)
 251                 FREE(semu, M_SYSVSEM);
 252
 253         semu = newSemu;
 254         seminfo.semmnu = newSize;
 255 #ifdef SEM_DEBUG
 256         printf("expansion successful\n");
 257 #endif
 258         return 1;
 259 }
 260
 261 /*
 262  * Expand the sema array to the given capacity.  If the expansion fails
 263  * we return 0, otherwise we return 1.
 264  *
 265  * Assumes we already have the subsystem lock.
 266  */
 267 static int
 268 grow_sema_array(int newSize)
 269 {
 270         register struct user_semid_ds *newSema;
 271         register int i;
 272
 273         if (newSize <= seminfo.semmni)
 274                 return 0;
 275         if (newSize > limitseminfo.semmni) /* enforce hard limit */
 276         {
 277 #ifdef SEM_DEBUG
 278                 printf("identifier hard limit of %d reached, requested %d\n",
 279                         limitseminfo.semmni, newSize);
 280 #endif
 281                 return 0;
 282         }
 283         newSize = (newSize/SEMMNI_INC + 1) * SEMMNI_INC;
 284         newSize = newSize > limitseminfo.semmni ? limitseminfo.semmni : newSize;
 285
 286 #ifdef SEM_DEBUG
 287         printf("growing sema[] from %d to %d\n", seminfo.semmni, newSize);
 288 #endif
 289         MALLOC(newSema, struct user_semid_ds *,
 290                sizeof (struct user_semid_ds) * newSize,
 291                M_SYSVSEM, M_WAITOK | M_ZERO);
 292         if (NULL == newSema)
 293         {
 294 #ifdef SEM_DEBUG
 295                 printf("allocation failed.  no changes made.\n");
 296 #endif
 297                 return 0;
 298         }
 299
 300         /* copy over the old ids */
 301         for (i = 0; i < seminfo.semmni; i++)
 302         {
 303                 newSema[i] = sema[i];
 304                 /* This is a hack.  What we really want to be able to
 305                  * do is change the value a process is waiting on
 306                  * without waking it up, but I don't know how to do
 307                  * this with the existing code, so we wake up the
 308                  * process and let it do a lot of work to determine the
 309                  * semaphore set is really not available yet, and then
 310                  * sleep on the correct, reallocated user_semid_ds pointer.
 311                  */
 312                 if (sema[i].sem_perm.mode & SEM_ALLOC)
 313                         wakeup((caddr_t)&sema[i]);
 314         }
 315         /*
 316          * The new elements (from newSema[i] to newSema[newSize-1]) have their
 317          * "sem_base" and "sem_perm.mode" set to 0 (i.e. NULL) by the M_ZERO
 318          * flag to MALLOC() above, so they're already marked as "not in use".
 319          */
 320
 321         /* Clean up the old array */
 322         if (sema)
 323                 FREE(sema, M_SYSVSEM);
 324
 325         sema = newSema;
 326         seminfo.semmni = newSize;
 327 #ifdef SEM_DEBUG
 328         printf("expansion successful\n");
 329 #endif
 330         return 1;
 331 }
 332
 333 /*
 334  * Expand the sem_pool array to the given capacity.  If the expansion fails
 335  * we return 0 (fail), otherwise we return 1 (success).
 336  *
 337  * Assumes we already hold the subsystem lock.
 338  */
 339 static int
 340 grow_sem_pool(int new_pool_size)
 341 {
 342         struct sem *new_sem_pool = NULL;
 343         struct sem *sem_free;
 344         int i;
 345
 346         if (new_pool_size < semtot)
 347                 return 0;
 348         /* enforce hard limit */
 349         if (new_pool_size > limitseminfo.semmns) {
 350 #ifdef SEM_DEBUG
 351                 printf("semaphore hard limit of %d reached, requested %d\n",
 352                         limitseminfo.semmns, new_pool_size);
 353 #endif
 354                 return 0;
 355         }
 356
 357         new_pool_size = (new_pool_size/SEMMNS_INC + 1) * SEMMNS_INC;
 358         new_pool_size = new_pool_size > limitseminfo.semmns ? limitseminfo.semmns : new_pool_size;
 359
 360 #ifdef SEM_DEBUG
 361         printf("growing sem_pool array from %d to %d\n", seminfo.semmns, new_pool_size);
 362 #endif
 363         MALLOC(new_sem_pool, struct sem *, sizeof (struct sem) * new_pool_size,
 364                M_SYSVSEM, M_WAITOK | M_ZERO);
 365         if (NULL == new_sem_pool) {
 366 #ifdef SEM_DEBUG
 367                 printf("allocation failed.  no changes made.\n");
 368 #endif
 369                 return 0;
 370         }
 371
 372         /* We have our new memory, now copy the old contents over */
 373         if (sem_pool)
 374                 for(i = 0; i < seminfo.semmns; i++)
 375                         new_sem_pool[i] = sem_pool[i];
 376
 377         /* Update our id structures to point to the new semaphores */
 378         for(i = 0; i < seminfo.semmni; i++) {
 379                 if (sema[i].sem_perm.mode & SEM_ALLOC)  /* ID in use */
 380                         sema[i].sem_base += (new_sem_pool - sem_pool);
 381         }
 382
 383         sem_free = sem_pool;
 384         sem_pool = new_sem_pool;
 385
 386         /* clean up the old array */
 387         if (sem_free != NULL)
 388                 FREE(sem_free, M_SYSVSEM);
 389
 390         seminfo.semmns = new_pool_size;
 391 #ifdef SEM_DEBUG
 392         printf("expansion complete\n");
 393 #endif
 394         return 1;
 395 }
 396
 397 /*
 398  * Allocate a new sem_undo structure for a process
 399  * (returns ptr to structure or NULL if no more room)
 400  *
 401  * Assumes we already hold the subsystem lock.
 402  */
 403
 404 static struct sem_undo *
 405 semu_alloc(struct proc *p)
 406 {
 407         register int i;
 408         register struct sem_undo *suptr;
 409         register struct sem_undo **supptr;
 410         int attempt;
 411
 412         /*
 413          * Try twice to allocate something.
 414          * (we'll purge any empty structures after the first pass so
 415          * two passes are always enough)
 416          */
 417
 418         for (attempt = 0; attempt < 2; attempt++) {
 419                 /*
 420                  * Look for a free structure.
 421                  * Fill it in and return it if we find one.
 422                  */
 423
 424                 for (i = 0; i < seminfo.semmnu; i++) {
 425                         suptr = SEMU(i);
 426                         if (suptr->un_proc == NULL) {
 427                                 suptr->un_next = semu_list;
 428                                 semu_list = suptr;
 429                                 suptr->un_cnt = 0;
 430                                 suptr->un_ent = NULL;
 431                                 suptr->un_proc = p;
 432                                 return(suptr);
 433                         }
 434                 }
 435
 436                 /*
 437                  * We didn't find a free one, if this is the first attempt
 438                  * then try to free some structures.
 439                  */
 440
 441                 if (attempt == 0) {
 442                         /* All the structures are in use - try to free some */
 443                         int did_something = 0;
 444
 445                         supptr = &semu_list;
 446                         while ((suptr = *supptr) != NULL) {
 447                                 if (suptr->un_cnt == 0)  {
 448                                         suptr->un_proc = NULL;
 449                                         *supptr = suptr->un_next;
 450                                         did_something = 1;
 451                                 } else
 452                                         supptr = &(suptr->un_next);
 453                         }
 454
 455                         /* If we didn't free anything. Try expanding
 456                          * the semu[] array.  If that doesn't work
 457                          * then fail.  We expand last to get the
 458                          * most reuse out of existing resources.
 459                          */
 460                         if (!did_something)
 461                                 if (!grow_semu_array(seminfo.semmnu + 1))
 462                                         return(NULL);
 463                 } else {
 464                         /*
 465                          * The second pass failed even though we freed
 466                          * something after the first pass!
 467                          * This is IMPOSSIBLE!
 468                          */
 469                         panic("semu_alloc - second attempt failed");
 470                 }
 471         }
 472         return (NULL);
 473 }
 474
 475 /*
 476  * Adjust a particular entry for a particular proc
 477  *
 478  * Assumes we already hold the subsystem lock.
 479  */
 480 static int
 481 semundo_adjust(struct proc *p, struct sem_undo **supptr, int semid,
 482         int semnum, int adjval)
 483 {
 484         register struct sem_undo *suptr;
 485         register struct undo *sueptr, **suepptr, *new_sueptr;
 486         int i;
 487
 488         /*
 489          * Look for and remember the sem_undo if the caller doesn't provide it
 490          */
 491
 492         suptr = *supptr;
 493         if (suptr == NULL) {
 494                 for (suptr = semu_list; suptr != NULL;
 495                     suptr = suptr->un_next) {
 496                         if (suptr->un_proc == p) {
 497                                 *supptr = suptr;
 498                                 break;
 499                         }
 500                 }
 501                 if (suptr == NULL) {
 502                         if (adjval == 0)
 503                                 return(0);
 504                         suptr = semu_alloc(p);
 505                         if (suptr == NULL)
 506                                 return(ENOSPC);
 507                         *supptr = suptr;
 508                 }
 509         }
 510
 511         /*
 512          * Look for the requested entry and adjust it (delete if adjval becomes
 513          * 0).
 514          */
 515         new_sueptr = NULL;
 516         for (i = 0, suepptr = &suptr->un_ent, sueptr = suptr->un_ent;
 517              i < suptr->un_cnt;
 518              i++, suepptr = &sueptr->une_next, sueptr = sueptr->une_next) {
 519                 if (sueptr->une_id != semid || sueptr->une_num != semnum)
 520                         continue;
 521                 if (adjval == 0)
 522                         sueptr->une_adjval = 0;
 523                 else
 524                         sueptr->une_adjval += adjval;
 525                 if (sueptr->une_adjval == 0) {
 526                         suptr->un_cnt--;
 527                         *suepptr = sueptr->une_next;
 528                         FREE(sueptr, M_SYSVSEM);
 529                         sueptr = NULL;
 530                 }
 531                 return 0;
 532         }
 533
 534         /* Didn't find the right entry - create it */
 535         if (adjval == 0) {
 536                 /* no adjustment: no need for a new entry */
 537                 return 0;
 538         }
 539
 540         if (suptr->un_cnt == limitseminfo.semume) {
 541                 /* reached the limit number of semaphore undo entries */
 542                 return EINVAL;
 543         }
 544
 545         /* allocate a new semaphore undo entry */
 546         MALLOC(new_sueptr, struct undo *, sizeof (struct undo),
 547                M_SYSVSEM, M_WAITOK);
 548         if (new_sueptr == NULL) {
 549                 return ENOMEM;
 550         }
 551
 552         /* fill in the new semaphore undo entry */
 553         new_sueptr->une_next = suptr->un_ent;
 554         suptr->un_ent = new_sueptr;
 555         suptr->un_cnt++;
 556         new_sueptr->une_adjval = adjval;
 557         new_sueptr->une_id = semid;
 558         new_sueptr->une_num = semnum;
 559
 560         return 0;
 561 }
 562
 563 /* Assumes we already hold the subsystem lock.
 564  */
 565 static void
 566 semundo_clear(int semid, int semnum)
 567 {
 568         struct sem_undo *suptr;
 569
 570         for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next) {
 571                 struct undo *sueptr;
 572                 struct undo **suepptr;
 573                 int i = 0;
 574
 575                 sueptr = suptr->un_ent;
 576                 suepptr = &suptr->un_ent;
 577                 while (i < suptr->un_cnt) {
 578                         if (sueptr->une_id == semid) {
 579                                 if (semnum == -1 || sueptr->une_num == semnum) {
 580                                         suptr->un_cnt--;
 581                                         *suepptr = sueptr->une_next;
 582                                         FREE(sueptr, M_SYSVSEM);
 583                                         sueptr = *suepptr;
 584                                         continue;
 585                                 }
 586                                 if (semnum != -1)
 587                                         break;
 588                         }
 589                         i++;
 590                         suepptr = &sueptr->une_next;
 591                         sueptr = sueptr->une_next;
 592                 }
 593         }
 594 }
 595
 596 /*
 597  * Note that the user-mode half of this passes a union coerced to a
 598  * user_addr_t.  The union contains either an int or a pointer, and
 599  * so we have to coerce it back, variant on whether the calling
 600  * process is 64 bit or not.  The coercion works for the 'val' element
 601  * because the alignment is the same in user and kernel space.
 602  */
 603 int
 604 semctl(struct proc *p, struct semctl_args *uap, register_t *retval)
 605 {
 606         int semid = uap->semid;
 607         int semnum = uap->semnum;
 608         int cmd = uap->cmd;
 609         user_semun_t user_arg = (user_semun_t)uap->arg;
 610         kauth_cred_t cred = kauth_cred_get();
 611         int i, rval, eval;
 612         struct user_semid_ds sbuf;
 613         struct user_semid_ds *semaptr;
 614         struct user_semid_ds uds;
 615
 616
 617         AUDIT_ARG(svipc_cmd, cmd);
 618         AUDIT_ARG(svipc_id, semid);
 619
 620         SYSV_SEM_SUBSYS_LOCK();
 621
 622 #ifdef SEM_DEBUG
 623         printf("call to semctl(%d, %d, %d, 0x%qx)\n", semid, semnum, cmd, user_arg);
 624 #endif
 625
 626         semid = IPCID_TO_IX(semid);
 627
 628         if (semid < 0 || semid >= seminfo.semmni) {
 629 #ifdef SEM_DEBUG
 630                 printf("Invalid semid\n");
 631 #endif
 632                 eval = EINVAL;
 633                 goto semctlout;
 634         }
 635
 636         semaptr = &sema[semid];
 637         if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
 638             semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid)) {
 639                 eval = EINVAL;
 640                 goto semctlout;
 641         }
 642
 643         eval = 0;
 644         rval = 0;
 645
 646         switch (cmd) {
 647         case IPC_RMID:
 648                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_M)))
 649                         goto semctlout;
 650
 651                 semaptr->sem_perm.cuid = kauth_cred_getuid(cred);
 652                 semaptr->sem_perm.uid = kauth_cred_getuid(cred);
 653                 semtot -= semaptr->sem_nsems;
 654                 for (i = semaptr->sem_base - sem_pool; i < semtot; i++)
 655                         sem_pool[i] = sem_pool[i + semaptr->sem_nsems];
 656                 for (i = 0; i < seminfo.semmni; i++) {
 657                         if ((sema[i].sem_perm.mode & SEM_ALLOC) &&
 658                             sema[i].sem_base > semaptr->sem_base)
 659                                 sema[i].sem_base -= semaptr->sem_nsems;
 660                 }
 661                 semaptr->sem_perm.mode = 0;
 662                 semundo_clear(semid, -1);
 663                 wakeup((caddr_t)semaptr);
 664                 break;
 665
 666         case IPC_SET:
 667                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_M)))
 668                                 goto semctlout;
 669
 670                 if (IS_64BIT_PROCESS(p)) {
 671                         eval = copyin(user_arg.buf, &sbuf, sizeof(struct user_semid_ds));
 672                 } else {
 673                         eval = copyin(user_arg.buf, &sbuf, sizeof(struct semid_ds));
 674                         /* convert in place; ugly, but safe */
 675                         semid_ds_32to64((struct semid_ds *)&sbuf, &sbuf);
 676                 }
 677
 678                 if (eval != 0) {
 679                         goto semctlout;
 680                 }
 681
 682                 semaptr->sem_perm.uid = sbuf.sem_perm.uid;
 683                 semaptr->sem_perm.gid = sbuf.sem_perm.gid;
 684                 semaptr->sem_perm.mode = (semaptr->sem_perm.mode & ~0777) |
 685                     (sbuf.sem_perm.mode & 0777);
 686                 semaptr->sem_ctime = sysv_semtime();
 687                 break;
 688
 689         case IPC_STAT:
 690                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
 691                                 goto semctlout;
 692                 bcopy(semaptr, &uds, sizeof(struct user_semid_ds));
 693                 if (IS_64BIT_PROCESS(p)) {
 694                         eval = copyout(&uds, user_arg.buf, sizeof(struct user_semid_ds));
 695                 } else {
 696                         struct semid_ds semid_ds32;
 697                         semid_ds_64to32(&uds, &semid_ds32);
 698                         eval = copyout(&semid_ds32, user_arg.buf, sizeof(struct semid_ds));
 699                 }
 700                 break;
 701
 702         case GETNCNT:
 703                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
 704                                 goto semctlout;
 705                 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
 706                         eval = EINVAL;
 707                         goto semctlout;
 708                 }
 709                 rval = semaptr->sem_base[semnum].semncnt;
 710                 break;
 711
 712         case GETPID:
 713                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
 714                                 goto semctlout;
 715                 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
 716                         eval = EINVAL;
 717                         goto semctlout;
 718                 }
 719                 rval = semaptr->sem_base[semnum].sempid;
 720                 break;
 721
 722         case GETVAL:
 723                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
 724                                 goto semctlout;
 725                 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
 726                         eval = EINVAL;
 727                         goto semctlout;
 728                 }
 729                 rval = semaptr->sem_base[semnum].semval;
 730                 break;
 731
 732         case GETALL:
 733                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
 734                                 goto semctlout;
 735 /* XXXXXXXXXXXXXXXX TBD XXXXXXXXXXXXXXXX */
 736                 for (i = 0; i < semaptr->sem_nsems; i++) {
 737                         /* XXX could be done in one go... */
 738                         eval = copyout((caddr_t)&semaptr->sem_base[i].semval,
 739                             user_arg.array + (i * sizeof(unsigned short)),
 740                             sizeof(unsigned short));
 741                         if (eval != 0)
 742                                 break;
 743                 }
 744                 break;
 745
 746         case GETZCNT:
 747                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
 748                                 goto semctlout;
 749                 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
 750                         eval = EINVAL;
 751                         goto semctlout;
 752                 }
 753                 rval = semaptr->sem_base[semnum].semzcnt;
 754                 break;
 755
 756         case SETVAL:
 757                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
 758                 {
 759 #ifdef SEM_DEBUG
 760                         printf("Invalid credentials for write\n");
 761 #endif
 762                                 goto semctlout;
 763                 }
 764                 if (semnum < 0 || semnum >= semaptr->sem_nsems)
 765                 {
 766 #ifdef SEM_DEBUG
 767                         printf("Invalid number out of range for set\n");
 768 #endif
 769                         eval = EINVAL;
 770                         goto semctlout;
 771                 }
 772                 /*
 773                  * Cast down a pointer instead of using 'val' member directly
 774                  * to avoid introducing endieness and a pad field into the
 775                  * header file.  Ugly, but it works.
 776                  */
 777                 semaptr->sem_base[semnum].semval = CAST_DOWN(int,user_arg.buf);
 778                 semundo_clear(semid, semnum);
 779                 wakeup((caddr_t)semaptr);
 780                 break;
 781
 782         case SETALL:
 783                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
 784                                 goto semctlout;
 785 /*** XXXXXXXXXXXX TBD ********/
 786                 for (i = 0; i < semaptr->sem_nsems; i++) {
 787                         /* XXX could be done in one go... */
 788                         eval = copyin(user_arg.array + (i * sizeof(unsigned short)),
 789                             (caddr_t)&semaptr->sem_base[i].semval,
 790                             sizeof(unsigned short));
 791                         if (eval != 0)
 792                                 break;
 793                 }
 794                 semundo_clear(semid, -1);
 795                 wakeup((caddr_t)semaptr);
 796                 break;
 797
 798         default:
 799                         eval = EINVAL;
 800                         goto semctlout;
 801         }
 802
 803         if (eval == 0)
 804                 *retval = rval;
 805 semctlout:
 806         SYSV_SEM_SUBSYS_UNLOCK();
 807         return(eval);
 808 }
 809
 810 int
 811 semget(__unused struct proc *p, struct semget_args *uap, register_t *retval)
 812 {
 813         int semid, eval;
 814         int key = uap->key;
 815         int nsems = uap->nsems;
 816         int semflg = uap->semflg;
 817         kauth_cred_t cred = kauth_cred_get();
 818
 819 #ifdef SEM_DEBUG
 820         if (key != IPC_PRIVATE)
 821                 printf("semget(0x%x, %d, 0%o)\n", key, nsems, semflg);
 822         else
 823                 printf("semget(IPC_PRIVATE, %d, 0%o)\n", nsems, semflg);
 824 #endif
 825
 826
 827         SYSV_SEM_SUBSYS_LOCK();
 828
 829
 830         if (key != IPC_PRIVATE) {
 831                 for (semid = 0; semid < seminfo.semmni; semid++) {
 832                         if ((sema[semid].sem_perm.mode & SEM_ALLOC) &&
 833                             sema[semid].sem_perm.key == key)
 834                                 break;
 835                 }
 836                 if (semid < seminfo.semmni) {
 837 #ifdef SEM_DEBUG
 838                         printf("found public key\n");
 839 #endif
 840                         if ((eval = ipcperm(cred, &sema[semid].sem_perm,
 841                             semflg & 0700)))
 842                                 goto semgetout;
 843                         if (nsems < 0 || sema[semid].sem_nsems < nsems) {
 844 #ifdef SEM_DEBUG
 845                                 printf("too small\n");
 846 #endif
 847                                 eval = EINVAL;
 848                                 goto semgetout;
 849                         }
 850                         if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) {
 851 #ifdef SEM_DEBUG
 852                                 printf("not exclusive\n");
 853 #endif
 854                                 eval = EEXIST;
 855                                 goto semgetout;
 856                         }
 857                         goto found;
 858                 }
 859         }
 860
 861 #ifdef SEM_DEBUG
 862         printf("need to allocate an id for the request\n");
 863 #endif
 864         if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) {
 865                 if (nsems <= 0 || nsems > limitseminfo.semmsl) {
 866 #ifdef SEM_DEBUG
 867                         printf("nsems out of range (0<%d<=%d)\n", nsems,
 868                             seminfo.semmsl);
 869 #endif
 870                         eval = EINVAL;
 871                         goto semgetout;
 872                 }
 873                 if (nsems > seminfo.semmns - semtot) {
 874 #ifdef SEM_DEBUG
 875                         printf("not enough semaphores left (need %d, got %d)\n",
 876                             nsems, seminfo.semmns - semtot);
 877 #endif
 878                         if (!grow_sem_pool(semtot + nsems)) {
 879 #ifdef SEM_DEBUG
 880                                 printf("failed to grow the sem array\n");
 881 #endif
 882                                 eval = ENOSPC;
 883                                 goto semgetout;
 884                         }
 885                 }
 886                 for (semid = 0; semid < seminfo.semmni; semid++) {
 887                         if ((sema[semid].sem_perm.mode & SEM_ALLOC) == 0)
 888                                 break;
 889                 }
 890                 if (semid == seminfo.semmni) {
 891 #ifdef SEM_DEBUG
 892                         printf("no more id's available\n");
 893 #endif
 894                         if (!grow_sema_array(seminfo.semmni + 1))
 895                         {
 896 #ifdef SEM_DEBUG
 897                                 printf("failed to grow sema array\n");
 898 #endif
 899                                 eval = ENOSPC;
 900                                 goto semgetout;
 901                         }
 902                 }
 903 #ifdef SEM_DEBUG
 904                 printf("semid %d is available\n", semid);
 905 #endif
 906                 sema[semid].sem_perm.key = key;
 907                 sema[semid].sem_perm.cuid = kauth_cred_getuid(cred);
 908                 sema[semid].sem_perm.uid = kauth_cred_getuid(cred);
 909                 sema[semid].sem_perm.cgid = cred->cr_gid;
 910                 sema[semid].sem_perm.gid = cred->cr_gid;
 911                 sema[semid].sem_perm.mode = (semflg & 0777) | SEM_ALLOC;
 912                 sema[semid].sem_perm.seq =
 913                     (sema[semid].sem_perm.seq + 1) & 0x7fff;
 914                 sema[semid].sem_nsems = nsems;
 915                 sema[semid].sem_otime = 0;
 916                 sema[semid].sem_ctime = sysv_semtime();
 917                 sema[semid].sem_base = &sem_pool[semtot];
 918                 semtot += nsems;
 919                 bzero(sema[semid].sem_base,
 920                     sizeof(sema[semid].sem_base[0])*nsems);
 921 #ifdef SEM_DEBUG
 922                 printf("sembase = 0x%x, next = 0x%x\n", sema[semid].sem_base,
 923                     &sem_pool[semtot]);
 924 #endif
 925         } else {
 926 #ifdef SEM_DEBUG
 927                 printf("didn't find it and wasn't asked to create it\n");
 928 #endif
 929                 eval = ENOENT;
 930                 goto semgetout;
 931         }
 932
 933 found:
 934         *retval = IXSEQ_TO_IPCID(semid, sema[semid].sem_perm);
 935         AUDIT_ARG(svipc_id, *retval);
 936 #ifdef SEM_DEBUG
 937         printf("semget is done, returning %d\n", *retval);
 938 #endif
 939         eval = 0;
 940
 941 semgetout:
 942         SYSV_SEM_SUBSYS_UNLOCK();
 943         return(eval);
 944 }
 945
 946 int
 947 semop(struct proc *p, struct semop_args *uap, register_t *retval)
 948 {
 949         int semid = uap->semid;
 950         int nsops = uap->nsops;
 951         struct sembuf sops[MAX_SOPS];
 952         register struct user_semid_ds *semaptr;
 953         register struct sembuf *sopptr = NULL;  /* protected by 'semptr' */
 954         register struct sem *semptr = NULL;     /* protected by 'if' */
 955         struct sem_undo *suptr = NULL;
 956         int i, j, eval;
 957         int do_wakeup, do_undos;
 958
 959         AUDIT_ARG(svipc_id, uap->semid);
 960
 961         SYSV_SEM_SUBSYS_LOCK();
 962
 963 #ifdef SEM_DEBUG
 964         printf("call to semop(%d, 0x%x, %d)\n", semid, sops, nsops);
 965 #endif
 966
 967         semid = IPCID_TO_IX(semid);     /* Convert back to zero origin */
 968
 969         if (semid < 0 || semid >= seminfo.semmni) {
 970                 eval = EINVAL;
 971                 goto semopout;
 972         }
 973
 974         semaptr = &sema[semid];
 975         if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0) {
 976                 eval = EINVAL;
 977                 goto semopout;
 978         }
 979         if (semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid)) {
 980                 eval = EINVAL;
 981                 goto semopout;
 982         }
 983
 984         if ((eval = ipcperm(kauth_cred_get(), &semaptr->sem_perm, IPC_W))) {
 985 #ifdef SEM_DEBUG
 986                 printf("eval = %d from ipaccess\n", eval);
 987 #endif
 988                 goto semopout;
 989         }
 990
 991         if (nsops < 0 || nsops > MAX_SOPS) {
 992 #ifdef SEM_DEBUG
 993                 printf("too many sops (max=%d, nsops=%d)\n", MAX_SOPS, nsops);
 994 #endif
 995                 eval = E2BIG;
 996                 goto semopout;
 997         }
 998
 999         /*  OK for LP64, since sizeof(struct sembuf) is currently invariant */
1000         if ((eval = copyin(uap->sops, &sops, nsops * sizeof(struct sembuf))) != 0) {
1001 #ifdef SEM_DEBUG
1002                 printf("eval = %d from copyin(%08x, %08x, %ld)\n", eval,
1003                     uap->sops, &sops, nsops * sizeof(struct sembuf));
1004 #endif
1005                 goto semopout;
1006         }
1007
1008         /*
1009          * Loop trying to satisfy the vector of requests.
1010          * If we reach a point where we must wait, any requests already
1011          * performed are rolled back and we go to sleep until some other
1012          * process wakes us up.  At this point, we start all over again.
1013          *
1014          * This ensures that from the perspective of other tasks, a set
1015          * of requests is atomic (never partially satisfied).
1016          */
1017         do_undos = 0;
1018
1019         for (;;) {
1020                 do_wakeup = 0;
1021
1022                 for (i = 0; i < nsops; i++) {
1023                         sopptr = &sops[i];
1024
1025                         if (sopptr->sem_num >= semaptr->sem_nsems) {
1026                                 eval = EFBIG;
1027                                 goto semopout;
1028                         }
1029
1030                         semptr = &semaptr->sem_base[sopptr->sem_num];
1031
1032 #ifdef SEM_DEBUG
1033                         printf("semop:  semaptr=%x, sem_base=%x, semptr=%x, sem[%d]=%d : op=%d, flag=%s\n",
1034                             semaptr, semaptr->sem_base, semptr,
1035                             sopptr->sem_num, semptr->semval, sopptr->sem_op,
1036                             (sopptr->sem_flg & IPC_NOWAIT) ? "nowait" : "wait");
1037 #endif
1038
1039                         if (sopptr->sem_op < 0) {
1040                                 if (semptr->semval + sopptr->sem_op < 0) {
1041 #ifdef SEM_DEBUG
1042                                         printf("semop:  can't do it now\n");
1043 #endif
1044                                         break;
1045                                 } else {
1046                                         semptr->semval += sopptr->sem_op;
1047                                         if (semptr->semval == 0 &&
1048                                             semptr->semzcnt > 0)
1049                                                 do_wakeup = 1;
1050                                 }
1051                                 if (sopptr->sem_flg & SEM_UNDO)
1052                                         do_undos = 1;
1053                         } else if (sopptr->sem_op == 0) {
1054                                 if (semptr->semval > 0) {
1055 #ifdef SEM_DEBUG
1056                                         printf("semop:  not zero now\n");
1057 #endif
1058                                         break;
1059                                 }
1060                         } else {
1061                                 if (semptr->semncnt > 0)
1062                                         do_wakeup = 1;
1063                                 semptr->semval += sopptr->sem_op;
1064                                 if (sopptr->sem_flg & SEM_UNDO)
1065                                         do_undos = 1;
1066                         }
1067                 }
1068
1069                 /*
1070                  * Did we get through the entire vector?
1071                  */
1072                 if (i >= nsops)
1073                         goto done;
1074
1075                 /*
1076                  * No ... rollback anything that we've already done
1077                  */
1078 #ifdef SEM_DEBUG
1079                 printf("semop:  rollback 0 through %d\n", i-1);
1080 #endif
1081                 for (j = 0; j < i; j++)
1082                         semaptr->sem_base[sops[j].sem_num].semval -=
1083                             sops[j].sem_op;
1084
1085                 /*
1086                  * If the request that we couldn't satisfy has the
1087                  * NOWAIT flag set then return with EAGAIN.
1088                  */
1089                 if (sopptr->sem_flg & IPC_NOWAIT) {
1090                         eval = EAGAIN;
1091                         goto semopout;
1092                 }
1093
1094                 if (sopptr->sem_op == 0)
1095                         semptr->semzcnt++;
1096                 else
1097                         semptr->semncnt++;
1098
1099 #ifdef SEM_DEBUG
1100                 printf("semop:  good night!\n");
1101 #endif
1102                 /* Release our lock on the semaphore subsystem so
1103                  * another thread can get at the semaphore we are
1104                  * waiting for. We will get the lock back after we
1105                  * wake up.
1106                  */
1107                 eval = msleep((caddr_t)semaptr, &sysv_sem_subsys_mutex , (PZERO - 4) | PCATCH,
1108                     "semwait", 0);
1109
1110 #ifdef SEM_DEBUG
1111                 printf("semop:  good morning (eval=%d)!\n", eval);
1112 #endif
1113                 if (eval != 0) {
1114                         eval = EINTR;
1115                 }
1116
1117                 /*
1118                  * IMPORTANT: while we were asleep, the semaphore array might
1119                  * have been reallocated somewhere else (see grow_sema_array()).
1120                  * When we wake up, we have to re-lookup the semaphore
1121                  * structures and re-validate them.
1122                  */
1123
1124                 suptr = NULL;   /* sem_undo may have been reallocated */
1125                 semaptr = &sema[semid];    /* sema may have been reallocated */
1126
1127                 /*
1128                  * Make sure that the semaphore still exists
1129                  */
1130                 if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
1131                     semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid) ||
1132                     sopptr->sem_num >= semaptr->sem_nsems) {
1133                         if (eval == EINTR) {
1134                                 /*
1135                                  * EINTR takes precedence over the fact that
1136                                  * the semaphore disappeared while we were
1137                                  * sleeping...
1138                                  */
1139                         } else {
1140                                 /*
1141                                  * The man page says to return EIDRM.
1142                                  * Unfortunately, BSD doesn't define that code!
1143                                  */
1144 #ifdef EIDRM
1145                                 eval = EIDRM;
1146 #else
1147                                 eval = EINVAL;
1148 #endif
1149                         }
1150                         goto semopout;
1151                 }
1152
1153                 /*
1154                  * The semaphore is still alive.  Readjust the count of
1155                  * waiting processes. semptr needs to be recomputed
1156                  * because the sem[] may have been reallocated while
1157                  * we were sleeping, updating our sem_base pointer.
1158                  */
1159                 semptr = &semaptr->sem_base[sopptr->sem_num];
1160                 if (sopptr->sem_op == 0)
1161                         semptr->semzcnt--;
1162                 else
1163                         semptr->semncnt--;
1164
1165                 if (eval != 0) { /* EINTR */
1166                         goto semopout;
1167                 }
1168         }
1169
1170 done:
1171         /*
1172          * Process any SEM_UNDO requests.
1173          */
1174         if (do_undos) {
1175                 for (i = 0; i < nsops; i++) {
1176                         /*
1177                          * We only need to deal with SEM_UNDO's for non-zero
1178                          * op's.
1179                          */
1180                         int adjval;
1181
1182                         if ((sops[i].sem_flg & SEM_UNDO) == 0)
1183                                 continue;
1184                         adjval = sops[i].sem_op;
1185                         if (adjval == 0)
1186                                 continue;
1187                         eval = semundo_adjust(p, &suptr, semid,
1188                             sops[i].sem_num, -adjval);
1189                         if (eval == 0)
1190                                 continue;
1191
1192                         /*
1193                          * Oh-Oh!  We ran out of either sem_undo's or undo's.
1194                          * Rollback the adjustments to this point and then
1195                          * rollback the semaphore ups and down so we can return
1196                          * with an error with all structures restored.  We
1197                          * rollback the undo's in the exact reverse order that
1198                          * we applied them.  This guarantees that we won't run
1199                          * out of space as we roll things back out.
1200                          */
1201                         for (j = i - 1; j >= 0; j--) {
1202                                 if ((sops[j].sem_flg & SEM_UNDO) == 0)
1203                                         continue;
1204                                 adjval = sops[j].sem_op;
1205                                 if (adjval == 0)
1206                                         continue;
1207                                 if (semundo_adjust(p, &suptr, semid,
1208                                     sops[j].sem_num, adjval) != 0)
1209                                         panic("semop - can't undo undos");
1210                         }
1211
1212                         for (j = 0; j < nsops; j++)
1213                                 semaptr->sem_base[sops[j].sem_num].semval -=
1214                                     sops[j].sem_op;
1215
1216 #ifdef SEM_DEBUG
1217                         printf("eval = %d from semundo_adjust\n", eval);
1218 #endif
1219                         goto semopout;
1220                 } /* loop through the sops */
1221         } /* if (do_undos) */
1222
1223         /* We're definitely done - set the sempid's */
1224         for (i = 0; i < nsops; i++) {
1225                 sopptr = &sops[i];
1226                 semptr = &semaptr->sem_base[sopptr->sem_num];
1227                 semptr->sempid = p->p_pid;
1228         }
1229
1230         if (do_wakeup) {
1231 #ifdef SEM_DEBUG
1232                 printf("semop:  doing wakeup\n");
1233 #ifdef SEM_WAKEUP
1234                 sem_wakeup((caddr_t)semaptr);
1235 #else
1236                 wakeup((caddr_t)semaptr);
1237 #endif
1238                 printf("semop:  back from wakeup\n");
1239 #else
1240                 wakeup((caddr_t)semaptr);
1241 #endif
1242         }
1243 #ifdef SEM_DEBUG
1244         printf("semop:  done\n");
1245 #endif
1246         *retval = 0;
1247         eval = 0;
1248 semopout:
1249         SYSV_SEM_SUBSYS_UNLOCK();
1250         return(eval);
1251 }
1252
1253 /*
1254  * Go through the undo structures for this process and apply the adjustments to
1255  * semaphores.
1256  */
1257 void
1258 semexit(struct proc *p)
1259 {
1260         register struct sem_undo *suptr;
1261         register struct sem_undo **supptr;
1262         int did_something;
1263
1264         /* If we have not allocated our semaphores yet there can't be
1265          * anything to undo, but we need the lock to prevent
1266          * dynamic memory race conditions.
1267          */
1268         SYSV_SEM_SUBSYS_LOCK();
1269
1270         if (!sem_pool)
1271         {
1272                 SYSV_SEM_SUBSYS_UNLOCK();
1273                 return;
1274         }
1275         did_something = 0;
1276
1277         /*
1278          * Go through the chain of undo vectors looking for one
1279          * associated with this process.
1280          */
1281
1282         for (supptr = &semu_list; (suptr = *supptr) != NULL;
1283             supptr = &suptr->un_next) {
1284                 if (suptr->un_proc == p)
1285                         break;
1286         }
1287
1288         if (suptr == NULL)
1289                 goto unlock;
1290
1291 #ifdef SEM_DEBUG
1292         printf("proc @%08x has undo structure with %d entries\n", p,
1293             suptr->un_cnt);
1294 #endif
1295
1296         /*
1297          * If there are any active undo elements then process them.
1298          */
1299         if (suptr->un_cnt > 0) {
1300                 while (suptr->un_ent != NULL) {
1301                         struct undo *sueptr;
1302                         int semid;
1303                         int semnum;
1304                         int adjval;
1305                         struct user_semid_ds *semaptr;
1306
1307                         sueptr = suptr->un_ent;
1308                         semid = sueptr->une_id;
1309                         semnum = sueptr->une_num;
1310                         adjval = sueptr->une_adjval;
1311
1312                         semaptr = &sema[semid];
1313                         if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0)
1314                                 panic("semexit - semid not allocated");
1315                         if (semnum >= semaptr->sem_nsems)
1316                                 panic("semexit - semnum out of range");
1317
1318 #ifdef SEM_DEBUG
1319                         printf("semexit:  %08x id=%d num=%d(adj=%d) ; sem=%d\n",
1320                                suptr->un_proc,
1321                                semid,
1322                                semnum,
1323                                adjval,
1324                                semaptr->sem_base[semnum].semval);
1325 #endif
1326
1327                         if (adjval < 0) {
1328                                 if (semaptr->sem_base[semnum].semval < -adjval)
1329                                         semaptr->sem_base[semnum].semval = 0;
1330                                 else
1331                                         semaptr->sem_base[semnum].semval +=
1332                                             adjval;
1333                         } else
1334                                 semaptr->sem_base[semnum].semval += adjval;
1335
1336                 /* Maybe we should build a list of semaptr's to wake
1337                  * up, finish all access to data structures, release the
1338                  * subsystem lock, and wake all the processes.  Something
1339                  * to think about.  It wouldn't buy us anything unless
1340                  * wakeup had the potential to block, or the syscall
1341                  * funnel state was changed to allow multiple threads
1342                  * in the BSD code at once.
1343                  */
1344 #ifdef SEM_WAKEUP
1345                         sem_wakeup((caddr_t)semaptr);
1346 #else
1347                         wakeup((caddr_t)semaptr);
1348 #endif
1349 #ifdef SEM_DEBUG
1350                         printf("semexit:  back from wakeup\n");
1351 #endif
1352                         suptr->un_cnt--;
1353                         suptr->un_ent = sueptr->une_next;
1354                         FREE(sueptr, M_SYSVSEM);
1355                         sueptr = NULL;
1356                 }
1357         }
1358
1359         /*
1360          * Deallocate the undo vector.
1361          */
1362 #ifdef SEM_DEBUG
1363         printf("removing vector\n");
1364 #endif
1365         suptr->un_proc = NULL;
1366         *supptr = suptr->un_next;
1367
1368 unlock:
1369         /*
1370          * There is a semaphore leak (i.e. memory leak) in this code.
1371          * We should be deleting the IPC_PRIVATE semaphores when they are
1372          * no longer needed, and we dont. We would have to track which processes
1373          * know about which IPC_PRIVATE semaphores, updating the list after
1374          * every fork.  We can't just delete them semaphore when the process
1375          * that created it dies, because that process may well have forked
1376          * some children.  So we need to wait until all of it's children have
1377          * died, and so on.  Maybe we should tag each IPC_PRIVATE sempahore
1378          * with the creating group ID, count the number of processes left in
1379          * that group, and delete the semaphore when the group is gone.
1380          * Until that code gets implemented we will leak IPC_PRIVATE semaphores.
1381          * There is an upper bound on the size of our semaphore array, so
1382          * leaking the semaphores should not work as a DOS attack.
1383          *
1384          * Please note that the original BSD code this file is based on had the
1385          * same leaky semaphore problem.
1386          */
1387
1388         SYSV_SEM_SUBSYS_UNLOCK();
1389 }
1390
1391
1392 /* (struct sysctl_oid *oidp, void *arg1, int arg2, \
1393         struct sysctl_req *req) */
1394 static int
1395 sysctl_seminfo(__unused struct sysctl_oid *oidp, void *arg1,
1396         __unused int arg2, struct sysctl_req *req)
1397 {
1398         int error = 0;
1399
1400         error = SYSCTL_OUT(req, arg1, sizeof(int));
1401         if (error || req->newptr == USER_ADDR_NULL)
1402                 return(error);
1403
1404         SYSV_SEM_SUBSYS_LOCK();
1405
1406         /* Set the values only if shared memory is not initialised */
1407         if ((sem_pool == NULL) &&
1408                 (sema == NULL) &&
1409                 (semu == NULL) &&
1410                 (semu_list == NULL)) {
1411                         if ((error = SYSCTL_IN(req, arg1, sizeof(int)))) {
1412                                 goto out;
1413                         }
1414         } else
1415                 error = EINVAL;
1416 out:
1417         SYSV_SEM_SUBSYS_UNLOCK();
1418         return(error);
1419
1420 }
1421
1422 /* SYSCTL_NODE(_kern, KERN_SYSV, sysv, CTLFLAG_RW, 0, "SYSV"); */
1423 extern struct sysctl_oid_list sysctl__kern_sysv_children;
1424 SYSCTL_PROC(_kern_sysv, KSYSV_SEMMNI, semmni, CTLTYPE_INT | CTLFLAG_RW,
1425     &limitseminfo.semmni, 0, &sysctl_seminfo ,"I","semmni");
1426
1427 SYSCTL_PROC(_kern_sysv, KSYSV_SEMMNS, semmns, CTLTYPE_INT | CTLFLAG_RW,
1428     &limitseminfo.semmns, 0, &sysctl_seminfo ,"I","semmns");
1429
1430 SYSCTL_PROC(_kern_sysv, KSYSV_SEMMNU, semmnu, CTLTYPE_INT | CTLFLAG_RW,
1431     &limitseminfo.semmnu, 0, &sysctl_seminfo ,"I","semmnu");
1432
1433 SYSCTL_PROC(_kern_sysv, KSYSV_SEMMSL, semmsl, CTLTYPE_INT | CTLFLAG_RW,
1434     &limitseminfo.semmsl, 0, &sysctl_seminfo ,"I","semmsl");
1435
1436 SYSCTL_PROC(_kern_sysv, KSYSV_SEMUNE, semume, CTLTYPE_INT | CTLFLAG_RW,
1437     &limitseminfo.semume, 0, &sysctl_seminfo ,"I","semume");
1438
1439
1440 static int
1441 IPCS_sem_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1,
1442         __unused int arg2, struct sysctl_req *req)
1443 {
1444         int error;
1445         int cursor;
1446         union {
1447                 struct IPCS_command u32;
1448                 struct user_IPCS_command u64;
1449         } ipcs;
1450         struct semid_ds semid_ds32;     /* post conversion, 32 bit version */
1451         void *semid_dsp;
1452         size_t ipcs_sz = sizeof(struct user_IPCS_command);
1453         size_t semid_ds_sz = sizeof(struct user_semid_ds);
1454         struct proc *p = current_proc();
1455
1456         /* Copy in the command structure */
1457         if ((error = SYSCTL_IN(req, &ipcs, ipcs_sz)) != 0) {
1458                 return(error);
1459         }
1460
1461         if (!IS_64BIT_PROCESS(p)) {
1462                 ipcs_sz = sizeof(struct IPCS_command);
1463                 semid_ds_sz = sizeof(struct semid_ds);
1464         }
1465
1466         /* Let us version this interface... */
1467         if (ipcs.u64.ipcs_magic != IPCS_MAGIC) {
1468                 return(EINVAL);
1469         }
1470
1471         SYSV_SEM_SUBSYS_LOCK();
1472         switch(ipcs.u64.ipcs_op) {
1473         case IPCS_SEM_CONF:     /* Obtain global configuration data */
1474                 if (ipcs.u64.ipcs_datalen != sizeof(struct seminfo)) {
1475                         error = ERANGE;
1476                         break;
1477                 }
1478                 if (ipcs.u64.ipcs_cursor != 0) {        /* fwd. compat. */
1479                         error = EINVAL;
1480                         break;
1481                 }
1482                 error = copyout(&seminfo, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen);
1483                 break;
1484
1485         case IPCS_SEM_ITER:     /* Iterate over existing segments */
1486                 cursor = ipcs.u64.ipcs_cursor;
1487                 if (cursor < 0 || cursor >= seminfo.semmni) {
1488                         error = ERANGE;
1489                         break;
1490                 }
1491                 if (ipcs.u64.ipcs_datalen != (int)semid_ds_sz ) {
1492                         error = EINVAL;
1493                         break;
1494                 }
1495                 for( ; cursor < seminfo.semmni; cursor++) {
1496                         if (sema[cursor].sem_perm.mode & SEM_ALLOC)
1497                                 break;
1498                         continue;
1499                 }
1500                 if (cursor == seminfo.semmni) {
1501                         error = ENOENT;
1502                         break;
1503                 }
1504
1505                 semid_dsp = &sema[cursor];      /* default: 64 bit */
1506
1507                 /*
1508                  * If necessary, convert the 64 bit kernel segment
1509                  * descriptor to a 32 bit user one.
1510                  */
1511                 if (!IS_64BIT_PROCESS(p)) {
1512                         semid_ds_64to32(semid_dsp, &semid_ds32);
1513                         semid_dsp = &semid_ds32;
1514                 }
1515                 error = copyout(semid_dsp, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen);
1516                 if (!error) {
1517                         /* update cursor */
1518                         ipcs.u64.ipcs_cursor = cursor + 1;
1519                         error = SYSCTL_OUT(req, &ipcs, ipcs_sz);
1520                 }
1521                 break;
1522
1523         default:
1524                 error = EINVAL;
1525                 break;
1526         }
1527         SYSV_SEM_SUBSYS_UNLOCK();
1528         return(error);
1529 }
1530
1531 SYSCTL_DECL(_kern_sysv_ipcs);
1532 SYSCTL_PROC(_kern_sysv_ipcs, OID_AUTO, sem, CTLFLAG_RW|CTLFLAG_ANYBODY,
1533         0, 0, IPCS_sem_sysctl,
1534         "S,IPCS_sem_command",
1535         "ipcs sem command interface");