bsd/kern/sysv_sem.c

   1 /*
   2  * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Implementation of SVID semaphores
  30  *
  31  * Author:  Daniel Boulet
  32  *
  33  * This software is provided ``AS IS'' without any warranties of any kind.
  34  */
  35 /*
  36  * John Bellardo modified the implementation for Darwin. 12/2000
  37  */
  38 /*
  39  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
  40  * support for mandatory and extensible security protections.  This notice
  41  * is included in support of clause 2.2 (b) of the Apple Public License,
  42  * Version 2.0.
  43  * Copyright (c) 2005-2006 SPARTA, Inc.
  44  */
  45
  46 #include <sys/param.h>
  47 #include <sys/systm.h>
  48 #include <sys/kernel.h>
  49 #include <sys/proc_internal.h>
  50 #include <sys/kauth.h>
  51 #include <sys/sem_internal.h>
  52 #include <sys/malloc.h>
  53 #include <mach/mach_types.h>
  54
  55 #include <sys/filedesc.h>
  56 #include <sys/file_internal.h>
  57 #include <sys/sysctl.h>
  58 #include <sys/ipcs.h>
  59 #include <sys/sysent.h>
  60 #include <sys/sysproto.h>
  61 #if CONFIG_MACF
  62 #include <security/mac_framework.h>
  63 #endif
  64
  65 #include <security/audit/audit.h>
  66
  67 #if SYSV_SEM
  68
  69
  70 /* Uncomment this line to see the debugging output */
  71 /* #define SEM_DEBUG */
  72
  73 /* Uncomment this line to see MAC debugging output. */
  74 /* #define      MAC_DEBUG */
  75 #if CONFIG_MACF_DEBUG
  76 #define MPRINTF(a)      printf(a)
  77 #else
  78 #define MPRINTF(a)
  79 #endif
  80
  81 #define M_SYSVSEM       M_TEMP
  82
  83
  84 /* Hard system limits to avoid resource starvation / DOS attacks.
  85  * These are not needed if we can make the semaphore pages swappable.
  86  */
  87 static struct seminfo limitseminfo = {
  88         SEMMAP,        /* # of entries in semaphore map */
  89         SEMMNI,        /* # of semaphore identifiers */
  90         SEMMNS,        /* # of semaphores in system */
  91         SEMMNU,        /* # of undo structures in system */
  92         SEMMSL,        /* max # of semaphores per id */
  93         SEMOPM,        /* max # of operations per semop call */
  94         SEMUME,        /* max # of undo entries per process */
  95         SEMUSZ,        /* size in bytes of undo structure */
  96         SEMVMX,        /* semaphore maximum value */
  97         SEMAEM         /* adjust on exit max value */
  98 };
  99
 100 /* Current system allocations.  We use this structure to track how many
 101  * resources we have allocated so far.  This way we can set large hard limits
 102  * and not allocate the memory for them up front.
 103  */
 104 struct seminfo seminfo = {
 105         SEMMAP, /* Unused, # of entries in semaphore map */
 106         0,      /* # of semaphore identifiers */
 107         0,      /* # of semaphores in system */
 108         0,      /* # of undo entries in system */
 109         SEMMSL, /* max # of semaphores per id */
 110         SEMOPM, /* max # of operations per semop call */
 111         SEMUME, /* max # of undo entries per process */
 112         SEMUSZ, /* size in bytes of undo structure */
 113         SEMVMX, /* semaphore maximum value */
 114         SEMAEM  /* adjust on exit max value */
 115 };
 116
 117
 118 static int semu_alloc(struct proc *p);
 119 static int semundo_adjust(struct proc *p, int *supidx,
 120                 int semid, int semnum, int adjval);
 121 static void semundo_clear(int semid, int semnum);
 122
 123 /* XXX casting to (sy_call_t *) is bogus, as usual. */
 124 static sy_call_t *semcalls[] = {
 125         (sy_call_t *)semctl, (sy_call_t *)semget,
 126         (sy_call_t *)semop
 127 };
 128
 129 static int              semtot = 0;             /* # of used semaphores */
 130 struct semid_kernel     *sema = NULL;           /* semaphore id pool */
 131 struct sem              *sem_pool =  NULL;      /* semaphore pool */
 132 static int              semu_list_idx = -1;     /* active undo structures */
 133 struct sem_undo         *semu = NULL;           /* semaphore undo pool */
 134
 135
 136 void sysv_sem_lock_init(void);
 137 static lck_grp_t       *sysv_sem_subsys_lck_grp;
 138 static lck_grp_attr_t  *sysv_sem_subsys_lck_grp_attr;
 139 static lck_attr_t      *sysv_sem_subsys_lck_attr;
 140 static lck_mtx_t        sysv_sem_subsys_mutex;
 141
 142 #define SYSV_SEM_SUBSYS_LOCK() lck_mtx_lock(&sysv_sem_subsys_mutex)
 143 #define SYSV_SEM_SUBSYS_UNLOCK() lck_mtx_unlock(&sysv_sem_subsys_mutex)
 144
 145
 146 __private_extern__ void
 147 sysv_sem_lock_init( void )
 148 {
 149
 150     sysv_sem_subsys_lck_grp_attr = lck_grp_attr_alloc_init();
 151
 152     sysv_sem_subsys_lck_grp = lck_grp_alloc_init("sysv_sem_subsys_lock", sysv_sem_subsys_lck_grp_attr);
 153
 154     sysv_sem_subsys_lck_attr = lck_attr_alloc_init();
 155     lck_mtx_init(&sysv_sem_subsys_mutex, sysv_sem_subsys_lck_grp, sysv_sem_subsys_lck_attr);
 156 }
 157
 158 static __inline__ user_time_t
 159 sysv_semtime(void)
 160 {
 161         struct timeval  tv;
 162         microtime(&tv);
 163         return (tv.tv_sec);
 164 }
 165
 166 /*
 167  * XXX conversion of internal user_time_t to external tume_t loses
 168  * XXX precision; not an issue for us now, since we are only ever
 169  * XXX setting 32 bits worth of time into it.
 170  *
 171  * pad field contents are not moved correspondingly; contents will be lost
 172  *
 173  * NOTE: Source and target may *NOT* overlap! (target is smaller)
 174  */
 175 static void
 176 semid_ds_kernelto32(struct user_semid_ds *in, struct user32_semid_ds *out)
 177 {
 178         out->sem_perm = in->sem_perm;
 179         out->sem_base = CAST_DOWN_EXPLICIT(__int32_t,in->sem_base);
 180         out->sem_nsems = in->sem_nsems;
 181         out->sem_otime = in->sem_otime;         /* XXX loses precision */
 182         out->sem_ctime = in->sem_ctime;         /* XXX loses precision */
 183 }
 184
 185 static void
 186 semid_ds_kernelto64(struct user_semid_ds *in, struct user64_semid_ds *out)
 187 {
 188         out->sem_perm = in->sem_perm;
 189         out->sem_base = CAST_DOWN_EXPLICIT(__int32_t,in->sem_base);
 190         out->sem_nsems = in->sem_nsems;
 191         out->sem_otime = in->sem_otime;         /* XXX loses precision */
 192         out->sem_ctime = in->sem_ctime;         /* XXX loses precision */
 193 }
 194
 195 /*
 196  * pad field contents are not moved correspondingly; contents will be lost
 197  *
 198  * NOTE: Source and target may are permitted to overlap! (source is smaller);
 199  * this works because we copy fields in order from the end of the struct to
 200  * the beginning.
 201  *
 202  * XXX use CAST_USER_ADDR_T() for lack of a CAST_USER_TIME_T(); net effect
 203  * XXX is the same.
 204  */
 205 static void
 206 semid_ds_32tokernel(struct user32_semid_ds *in, struct user_semid_ds *out)
 207 {
 208         out->sem_ctime = in->sem_ctime;
 209         out->sem_otime = in->sem_otime;
 210         out->sem_nsems = in->sem_nsems;
 211         out->sem_base = (void *)(uintptr_t)in->sem_base;
 212         out->sem_perm = in->sem_perm;
 213 }
 214
 215 static void
 216 semid_ds_64tokernel(struct user64_semid_ds *in, struct user_semid_ds *out)
 217 {
 218         out->sem_ctime = in->sem_ctime;
 219         out->sem_otime = in->sem_otime;
 220         out->sem_nsems = in->sem_nsems;
 221         out->sem_base = (void *)(uintptr_t)in->sem_base;
 222         out->sem_perm = in->sem_perm;
 223 }
 224
 225
 226 /*
 227  * semsys
 228  *
 229  * Entry point for all SEM calls: semctl, semget, semop
 230  *
 231  * Parameters:  p       Process requesting the call
 232  *              uap     User argument descriptor (see below)
 233  *              retval  Return value of the selected sem call
 234  *
 235  * Indirect parameters: uap->which      sem call to invoke (index in array of sem calls)
 236  *                      uap->a2         User argument descriptor
 237  *
 238  * Returns:     0       Success
 239  *              !0      Not success
 240  *
 241  * Implicit returns: retval     Return value of the selected sem call
 242  *
 243  * DEPRECATED:  This interface should not be used to call the other SEM
 244  *              functions (semctl, semget, semop). The correct usage is
 245  *              to call the other SEM functions directly.
 246  *
 247  */
 248 int
 249 semsys(struct proc *p, struct semsys_args *uap, int32_t *retval)
 250 {
 251
 252         /* The individual calls handling the locking now */
 253
 254         if (uap->which >= sizeof(semcalls)/sizeof(semcalls[0]))
 255                 return (EINVAL);
 256         return ((*semcalls[uap->which])(p, &uap->a2, retval));
 257 }
 258
 259 /*
 260  * Expand the semu array to the given capacity.  If the expansion fails
 261  * return 0, otherwise return 1.
 262  *
 263  * Assumes we already have the subsystem lock.
 264  */
 265 static int
 266 grow_semu_array(int newSize)
 267 {
 268         register int i;
 269         register struct sem_undo *newSemu;
 270
 271         if (newSize <= seminfo.semmnu)
 272                 return 1;
 273         if (newSize > limitseminfo.semmnu) /* enforce hard limit */
 274         {
 275 #ifdef SEM_DEBUG
 276                 printf("undo structure hard limit of %d reached, requested %d\n",
 277                         limitseminfo.semmnu, newSize);
 278 #endif
 279                 return 0;
 280         }
 281         newSize = (newSize/SEMMNU_INC + 1) * SEMMNU_INC;
 282         newSize = newSize > limitseminfo.semmnu ? limitseminfo.semmnu : newSize;
 283
 284 #ifdef SEM_DEBUG
 285         printf("growing semu[] from %d to %d\n", seminfo.semmnu, newSize);
 286 #endif
 287         MALLOC(newSemu, struct sem_undo *, sizeof (struct sem_undo) * newSize,
 288                M_SYSVSEM, M_WAITOK | M_ZERO);
 289         if (NULL == newSemu)
 290         {
 291 #ifdef SEM_DEBUG
 292                 printf("allocation failed.  no changes made.\n");
 293 #endif
 294                 return 0;
 295         }
 296
 297         /* copy the old data to the new array */
 298         for (i = 0; i < seminfo.semmnu; i++)
 299         {
 300                 newSemu[i] = semu[i];
 301         }
 302         /*
 303          * The new elements (from newSemu[i] to newSemu[newSize-1]) have their
 304          * "un_proc" set to 0 (i.e. NULL) by the M_ZERO flag to MALLOC() above,
 305          * so they're already marked as "not in use".
 306          */
 307
 308         /* Clean up the old array */
 309         if (semu)
 310                 FREE(semu, M_SYSVSEM);
 311
 312         semu = newSemu;
 313         seminfo.semmnu = newSize;
 314 #ifdef SEM_DEBUG
 315         printf("expansion successful\n");
 316 #endif
 317         return 1;
 318 }
 319
 320 /*
 321  * Expand the sema array to the given capacity.  If the expansion fails
 322  * we return 0, otherwise we return 1.
 323  *
 324  * Assumes we already have the subsystem lock.
 325  */
 326 static int
 327 grow_sema_array(int newSize)
 328 {
 329         register struct semid_kernel *newSema;
 330         register int i;
 331
 332         if (newSize <= seminfo.semmni)
 333                 return 0;
 334         if (newSize > limitseminfo.semmni) /* enforce hard limit */
 335         {
 336 #ifdef SEM_DEBUG
 337                 printf("identifier hard limit of %d reached, requested %d\n",
 338                         limitseminfo.semmni, newSize);
 339 #endif
 340                 return 0;
 341         }
 342         newSize = (newSize/SEMMNI_INC + 1) * SEMMNI_INC;
 343         newSize = newSize > limitseminfo.semmni ? limitseminfo.semmni : newSize;
 344
 345 #ifdef SEM_DEBUG
 346         printf("growing sema[] from %d to %d\n", seminfo.semmni, newSize);
 347 #endif
 348         MALLOC(newSema, struct semid_kernel *,
 349                sizeof (struct semid_kernel) * newSize,
 350                M_SYSVSEM, M_WAITOK | M_ZERO);
 351         if (NULL == newSema)
 352         {
 353 #ifdef SEM_DEBUG
 354                 printf("allocation failed.  no changes made.\n");
 355 #endif
 356                 return 0;
 357         }
 358
 359         /* copy over the old ids */
 360         for (i = 0; i < seminfo.semmni; i++)
 361         {
 362                 newSema[i] = sema[i];
 363                 /* This is a hack.  What we really want to be able to
 364                  * do is change the value a process is waiting on
 365                  * without waking it up, but I don't know how to do
 366                  * this with the existing code, so we wake up the
 367                  * process and let it do a lot of work to determine the
 368                  * semaphore set is really not available yet, and then
 369                  * sleep on the correct, reallocated semid_kernel pointer.
 370                  */
 371                 if (sema[i].u.sem_perm.mode & SEM_ALLOC)
 372                         wakeup((caddr_t)&sema[i]);
 373         }
 374
 375 #if CONFIG_MACF
 376         for (i = seminfo.semmni; i < newSize; i++)
 377         {
 378                 mac_sysvsem_label_init(&newSema[i]);
 379         }
 380 #endif
 381
 382         /*
 383          * The new elements (from newSema[i] to newSema[newSize-1]) have their
 384          * "sem_base" and "sem_perm.mode" set to 0 (i.e. NULL) by the M_ZERO
 385          * flag to MALLOC() above, so they're already marked as "not in use".
 386          */
 387
 388         /* Clean up the old array */
 389         if (sema)
 390                 FREE(sema, M_SYSVSEM);
 391
 392         sema = newSema;
 393         seminfo.semmni = newSize;
 394 #ifdef SEM_DEBUG
 395         printf("expansion successful\n");
 396 #endif
 397         return 1;
 398 }
 399
 400 /*
 401  * Expand the sem_pool array to the given capacity.  If the expansion fails
 402  * we return 0 (fail), otherwise we return 1 (success).
 403  *
 404  * Assumes we already hold the subsystem lock.
 405  */
 406 static int
 407 grow_sem_pool(int new_pool_size)
 408 {
 409         struct sem *new_sem_pool = NULL;
 410         struct sem *sem_free;
 411         int i;
 412
 413         if (new_pool_size < semtot)
 414                 return 0;
 415         /* enforce hard limit */
 416         if (new_pool_size > limitseminfo.semmns) {
 417 #ifdef SEM_DEBUG
 418                 printf("semaphore hard limit of %d reached, requested %d\n",
 419                         limitseminfo.semmns, new_pool_size);
 420 #endif
 421                 return 0;
 422         }
 423
 424         new_pool_size = (new_pool_size/SEMMNS_INC + 1) * SEMMNS_INC;
 425         new_pool_size = new_pool_size > limitseminfo.semmns ? limitseminfo.semmns : new_pool_size;
 426
 427 #ifdef SEM_DEBUG
 428         printf("growing sem_pool array from %d to %d\n", seminfo.semmns, new_pool_size);
 429 #endif
 430         MALLOC(new_sem_pool, struct sem *, sizeof (struct sem) * new_pool_size,
 431                M_SYSVSEM, M_WAITOK | M_ZERO);
 432         if (NULL == new_sem_pool) {
 433 #ifdef SEM_DEBUG
 434                 printf("allocation failed.  no changes made.\n");
 435 #endif
 436                 return 0;
 437         }
 438
 439         /* We have our new memory, now copy the old contents over */
 440         if (sem_pool)
 441                 for(i = 0; i < seminfo.semmns; i++)
 442                         new_sem_pool[i] = sem_pool[i];
 443
 444         /* Update our id structures to point to the new semaphores */
 445         for(i = 0; i < seminfo.semmni; i++) {
 446                 if (sema[i].u.sem_perm.mode & SEM_ALLOC)  /* ID in use */
 447                         sema[i].u.sem_base = new_sem_pool +
 448                                 (sema[i].u.sem_base - sem_pool);
 449         }
 450
 451         sem_free = sem_pool;
 452         sem_pool = new_sem_pool;
 453
 454         /* clean up the old array */
 455         if (sem_free != NULL)
 456                 FREE(sem_free, M_SYSVSEM);
 457
 458         seminfo.semmns = new_pool_size;
 459 #ifdef SEM_DEBUG
 460         printf("expansion complete\n");
 461 #endif
 462         return 1;
 463 }
 464
 465 /*
 466  * Allocate a new sem_undo structure for a process
 467  * (returns ptr to structure or NULL if no more room)
 468  *
 469  * Assumes we already hold the subsystem lock.
 470  */
 471
 472 static int
 473 semu_alloc(struct proc *p)
 474 {
 475         register int i;
 476         register struct sem_undo *suptr;
 477         int *supidx;
 478         int attempt;
 479
 480         /*
 481          * Try twice to allocate something.
 482          * (we'll purge any empty structures after the first pass so
 483          * two passes are always enough)
 484          */
 485
 486         for (attempt = 0; attempt < 2; attempt++) {
 487                 /*
 488                  * Look for a free structure.
 489                  * Fill it in and return it if we find one.
 490                  */
 491
 492                 for (i = 0; i < seminfo.semmnu; i++) {
 493                         suptr = SEMU(i);
 494                         if (suptr->un_proc == NULL) {
 495                                 suptr->un_next_idx = semu_list_idx;
 496                                 semu_list_idx = i;
 497                                 suptr->un_cnt = 0;
 498                                 suptr->un_ent = NULL;
 499                                 suptr->un_proc = p;
 500                                 return i;
 501                         }
 502                 }
 503
 504                 /*
 505                  * We didn't find a free one, if this is the first attempt
 506                  * then try to free some structures.
 507                  */
 508
 509                 if (attempt == 0) {
 510                         /* All the structures are in use - try to free some */
 511                         int did_something = 0;
 512
 513                         supidx = &semu_list_idx;
 514                         while (*supidx != -1) {
 515                                 suptr = SEMU(*supidx);
 516                                 if (suptr->un_cnt == 0)  {
 517                                         suptr->un_proc = NULL;
 518                                         *supidx = suptr->un_next_idx;
 519                                         did_something = 1;
 520                                 } else
 521                                         supidx = &(suptr->un_next_idx);
 522                         }
 523
 524                         /* If we didn't free anything. Try expanding
 525                          * the semu[] array.  If that doesn't work
 526                          * then fail.  We expand last to get the
 527                          * most reuse out of existing resources.
 528                          */
 529                         if (!did_something)
 530                                 if (!grow_semu_array(seminfo.semmnu + 1))
 531                                         return -1;
 532                 } else {
 533                         /*
 534                          * The second pass failed even though we freed
 535                          * something after the first pass!
 536                          * This is IMPOSSIBLE!
 537                          */
 538                         panic("semu_alloc - second attempt failed");
 539                 }
 540         }
 541         return -1;
 542 }
 543
 544 /*
 545  * Adjust a particular entry for a particular proc
 546  *
 547  * Assumes we already hold the subsystem lock.
 548  */
 549 static int
 550 semundo_adjust(struct proc *p, int *supidx, int semid,
 551         int semnum, int adjval)
 552 {
 553         register struct sem_undo *suptr;
 554         int suidx;
 555         register struct undo *sueptr, **suepptr, *new_sueptr;
 556         int i;
 557
 558         /*
 559          * Look for and remember the sem_undo if the caller doesn't provide it
 560          */
 561
 562         suidx = *supidx;
 563         if (suidx == -1) {
 564                 for (suidx = semu_list_idx; suidx != -1;
 565                     suidx = suptr->un_next_idx) {
 566                         suptr = SEMU(suidx);
 567                         if (suptr->un_proc == p) {
 568                                 *supidx = suidx;
 569                                 break;
 570                         }
 571                 }
 572                 if (suidx == -1) {
 573                         if (adjval == 0)
 574                                 return(0);
 575                         suidx = semu_alloc(p);
 576                         if (suidx == -1)
 577                                 return(ENOSPC);
 578                         *supidx = suidx;
 579                 }
 580         }
 581
 582         /*
 583          * Look for the requested entry and adjust it (delete if adjval becomes
 584          * 0).
 585          */
 586         suptr = SEMU(suidx);
 587         new_sueptr = NULL;
 588         for (i = 0, suepptr = &suptr->un_ent, sueptr = suptr->un_ent;
 589              i < suptr->un_cnt;
 590              i++, suepptr = &sueptr->une_next, sueptr = sueptr->une_next) {
 591                 if (sueptr->une_id != semid || sueptr->une_num != semnum)
 592                         continue;
 593                 if (adjval == 0)
 594                         sueptr->une_adjval = 0;
 595                 else
 596                         sueptr->une_adjval += adjval;
 597                 if (sueptr->une_adjval == 0) {
 598                         suptr->un_cnt--;
 599                         *suepptr = sueptr->une_next;
 600                         FREE(sueptr, M_SYSVSEM);
 601                         sueptr = NULL;
 602                 }
 603                 return 0;
 604         }
 605
 606         /* Didn't find the right entry - create it */
 607         if (adjval == 0) {
 608                 /* no adjustment: no need for a new entry */
 609                 return 0;
 610         }
 611
 612         if (suptr->un_cnt == limitseminfo.semume) {
 613                 /* reached the limit number of semaphore undo entries */
 614                 return EINVAL;
 615         }
 616
 617         /* allocate a new semaphore undo entry */
 618         MALLOC(new_sueptr, struct undo *, sizeof (struct undo),
 619                M_SYSVSEM, M_WAITOK);
 620         if (new_sueptr == NULL) {
 621                 return ENOMEM;
 622         }
 623
 624         /* fill in the new semaphore undo entry */
 625         new_sueptr->une_next = suptr->un_ent;
 626         suptr->un_ent = new_sueptr;
 627         suptr->un_cnt++;
 628         new_sueptr->une_adjval = adjval;
 629         new_sueptr->une_id = semid;
 630         new_sueptr->une_num = semnum;
 631
 632         return 0;
 633 }
 634
 635 /* Assumes we already hold the subsystem lock.
 636  */
 637 static void
 638 semundo_clear(int semid, int semnum)
 639 {
 640         struct sem_undo *suptr;
 641         int suidx;
 642
 643         for (suidx = semu_list_idx; suidx != -1; suidx = suptr->un_next_idx) {
 644                 struct undo *sueptr;
 645                 struct undo **suepptr;
 646                 int i = 0;
 647
 648                 suptr = SEMU(suidx);
 649                 sueptr = suptr->un_ent;
 650                 suepptr = &suptr->un_ent;
 651                 while (i < suptr->un_cnt) {
 652                         if (sueptr->une_id == semid) {
 653                                 if (semnum == -1 || sueptr->une_num == semnum) {
 654                                         suptr->un_cnt--;
 655                                         *suepptr = sueptr->une_next;
 656                                         FREE(sueptr, M_SYSVSEM);
 657                                         sueptr = *suepptr;
 658                                         continue;
 659                                 }
 660                                 if (semnum != -1)
 661                                         break;
 662                         }
 663                         i++;
 664                         suepptr = &sueptr->une_next;
 665                         sueptr = sueptr->une_next;
 666                 }
 667         }
 668 }
 669
 670 /*
 671  * Note that the user-mode half of this passes a union coerced to a
 672  * user_addr_t.  The union contains either an int or a pointer, and
 673  * so we have to coerce it back, variant on whether the calling
 674  * process is 64 bit or not.  The coercion works for the 'val' element
 675  * because the alignment is the same in user and kernel space.
 676  */
 677 int
 678 semctl(struct proc *p, struct semctl_args *uap, int32_t *retval)
 679 {
 680         int semid = uap->semid;
 681         int semnum = uap->semnum;
 682         int cmd = uap->cmd;
 683         user_semun_t user_arg = (user_semun_t)uap->arg;
 684         kauth_cred_t cred = kauth_cred_get();
 685         int i, rval, eval;
 686         struct user_semid_ds sbuf;
 687         struct semid_kernel *semakptr;
 688
 689
 690         AUDIT_ARG(svipc_cmd, cmd);
 691         AUDIT_ARG(svipc_id, semid);
 692
 693         SYSV_SEM_SUBSYS_LOCK();
 694
 695 #ifdef SEM_DEBUG
 696         printf("call to semctl(%d, %d, %d, 0x%qx)\n", semid, semnum, cmd, user_arg);
 697 #endif
 698
 699         semid = IPCID_TO_IX(semid);
 700
 701         if (semid < 0 || semid >= seminfo.semmni) {
 702 #ifdef SEM_DEBUG
 703                 printf("Invalid semid\n");
 704 #endif
 705                 eval = EINVAL;
 706                 goto semctlout;
 707         }
 708
 709         semakptr = &sema[semid];
 710         if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 ||
 711             semakptr->u.sem_perm._seq != IPCID_TO_SEQ(uap->semid)) {
 712                 eval = EINVAL;
 713                 goto semctlout;
 714         }
 715 #if CONFIG_MACF
 716         eval = mac_sysvsem_check_semctl(cred, semakptr, cmd);
 717         if (eval)
 718                 goto semctlout;
 719 #endif
 720
 721         eval = 0;
 722         rval = 0;
 723
 724         switch (cmd) {
 725         case IPC_RMID:
 726                 if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_M)))
 727                         goto semctlout;
 728
 729                 semakptr->u.sem_perm.cuid = kauth_cred_getuid(cred);
 730                 semakptr->u.sem_perm.uid = kauth_cred_getuid(cred);
 731                 semtot -= semakptr->u.sem_nsems;
 732                 for (i = semakptr->u.sem_base - sem_pool; i < semtot; i++)
 733                         sem_pool[i] = sem_pool[i + semakptr->u.sem_nsems];
 734                 for (i = 0; i < seminfo.semmni; i++) {
 735                         if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
 736                             sema[i].u.sem_base > semakptr->u.sem_base)
 737                                 sema[i].u.sem_base -= semakptr->u.sem_nsems;
 738                 }
 739                 semakptr->u.sem_perm.mode = 0;
 740 #if CONFIG_MACF
 741                 mac_sysvsem_label_recycle(semakptr);
 742 #endif
 743                 semundo_clear(semid, -1);
 744                 wakeup((caddr_t)semakptr);
 745                 break;
 746
 747         case IPC_SET:
 748                 if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_M)))
 749                                 goto semctlout;
 750
 751                 if (IS_64BIT_PROCESS(p)) {
 752                         struct user64_semid_ds ds64;
 753                         eval = copyin(user_arg.buf, &ds64, sizeof(ds64));
 754                         semid_ds_64tokernel(&ds64, &sbuf);
 755                 } else {
 756                         struct user32_semid_ds ds32;
 757                         eval = copyin(user_arg.buf, &ds32, sizeof(ds32));
 758                         semid_ds_32tokernel(&ds32, &sbuf);
 759                 }
 760
 761                 if (eval != 0) {
 762                         goto semctlout;
 763                 }
 764
 765                 semakptr->u.sem_perm.uid = sbuf.sem_perm.uid;
 766                 semakptr->u.sem_perm.gid = sbuf.sem_perm.gid;
 767                 semakptr->u.sem_perm.mode = (semakptr->u.sem_perm.mode &
 768                     ~0777) | (sbuf.sem_perm.mode & 0777);
 769                 semakptr->u.sem_ctime = sysv_semtime();
 770                 break;
 771
 772         case IPC_STAT:
 773                 if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_R)))
 774                                 goto semctlout;
 775
 776                 if (IS_64BIT_PROCESS(p)) {
 777                         struct user64_semid_ds semid_ds64;
 778                         bzero(&semid_ds64, sizeof(semid_ds64));
 779                         semid_ds_kernelto64(&semakptr->u, &semid_ds64);
 780                         eval = copyout(&semid_ds64, user_arg.buf, sizeof(semid_ds64));
 781                 } else {
 782                         struct user32_semid_ds semid_ds32;
 783                         bzero(&semid_ds32, sizeof(semid_ds32));
 784                         semid_ds_kernelto32(&semakptr->u, &semid_ds32);
 785                         eval = copyout(&semid_ds32, user_arg.buf, sizeof(semid_ds32));
 786                 }
 787                 break;
 788
 789         case GETNCNT:
 790                 if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_R)))
 791                                 goto semctlout;
 792                 if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
 793                         eval = EINVAL;
 794                         goto semctlout;
 795                 }
 796                 rval = semakptr->u.sem_base[semnum].semncnt;
 797                 break;
 798
 799         case GETPID:
 800                 if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_R)))
 801                                 goto semctlout;
 802                 if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
 803                         eval = EINVAL;
 804                         goto semctlout;
 805                 }
 806                 rval = semakptr->u.sem_base[semnum].sempid;
 807                 break;
 808
 809         case GETVAL:
 810                 if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_R)))
 811                                 goto semctlout;
 812                 if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
 813                         eval = EINVAL;
 814                         goto semctlout;
 815                 }
 816                 rval = semakptr->u.sem_base[semnum].semval;
 817                 break;
 818
 819         case GETALL:
 820                 if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_R)))
 821                                 goto semctlout;
 822 /* XXXXXXXXXXXXXXXX TBD XXXXXXXXXXXXXXXX */
 823                 for (i = 0; i < semakptr->u.sem_nsems; i++) {
 824                         /* XXX could be done in one go... */
 825                         eval = copyout((caddr_t)&semakptr->u.sem_base[i].semval,
 826                             user_arg.array + (i * sizeof(unsigned short)),
 827                             sizeof(unsigned short));
 828                         if (eval != 0)
 829                                 break;
 830                 }
 831                 break;
 832
 833         case GETZCNT:
 834                 if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_R)))
 835                                 goto semctlout;
 836                 if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
 837                         eval = EINVAL;
 838                         goto semctlout;
 839                 }
 840                 rval = semakptr->u.sem_base[semnum].semzcnt;
 841                 break;
 842
 843         case SETVAL:
 844                 if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_W)))
 845                 {
 846 #ifdef SEM_DEBUG
 847                         printf("Invalid credentials for write\n");
 848 #endif
 849                                 goto semctlout;
 850                 }
 851                 if (semnum < 0 || semnum >= semakptr->u.sem_nsems)
 852                 {
 853 #ifdef SEM_DEBUG
 854                         printf("Invalid number out of range for set\n");
 855 #endif
 856                         eval = EINVAL;
 857                         goto semctlout;
 858                 }
 859
 860                 /*
 861                  * Cast down a pointer instead of using 'val' member directly
 862                  * to avoid introducing endieness and a pad field into the
 863                  * header file.  Ugly, but it works.
 864                  */
 865                 u_int newsemval = CAST_DOWN_EXPLICIT(u_int, user_arg.buf);
 866
 867                 /*
 868                  * The check is being performed as unsigned values to match
 869                  * eventual destination
 870                  */
 871                 if (newsemval > (u_int)seminfo.semvmx)
 872                 {
 873 #ifdef SEM_DEBUG
 874                         printf("Out of range sem value for set\n");
 875 #endif
 876                         eval = ERANGE;
 877                         goto semctlout;
 878                 }
 879                 semakptr->u.sem_base[semnum].semval = newsemval;
 880                 semakptr->u.sem_base[semnum].sempid = p->p_pid;
 881                 /* XXX scottl Should there be a MAC call here? */
 882                 semundo_clear(semid, semnum);
 883                 wakeup((caddr_t)semakptr);
 884                 break;
 885
 886         case SETALL:
 887                 if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_W)))
 888                                 goto semctlout;
 889 /*** XXXXXXXXXXXX TBD ********/
 890                 for (i = 0; i < semakptr->u.sem_nsems; i++) {
 891                         /* XXX could be done in one go... */
 892                         eval = copyin(user_arg.array + (i * sizeof(unsigned short)),
 893                             (caddr_t)&semakptr->u.sem_base[i].semval,
 894                             sizeof(unsigned short));
 895                         if (eval != 0)
 896                                 break;
 897                         semakptr->u.sem_base[i].sempid = p->p_pid;
 898                 }
 899                 /* XXX scottl Should there be a MAC call here? */
 900                 semundo_clear(semid, -1);
 901                 wakeup((caddr_t)semakptr);
 902                 break;
 903
 904         default:
 905                         eval = EINVAL;
 906                         goto semctlout;
 907         }
 908
 909         if (eval == 0)
 910                 *retval = rval;
 911 semctlout:
 912         SYSV_SEM_SUBSYS_UNLOCK();
 913         return(eval);
 914 }
 915
 916 int
 917 semget(__unused struct proc *p, struct semget_args *uap, int32_t *retval)
 918 {
 919         int semid, eval;
 920         int key = uap->key;
 921         int nsems = uap->nsems;
 922         int semflg = uap->semflg;
 923         kauth_cred_t cred = kauth_cred_get();
 924
 925 #ifdef SEM_DEBUG
 926         if (key != IPC_PRIVATE)
 927                 printf("semget(0x%x, %d, 0%o)\n", key, nsems, semflg);
 928         else
 929                 printf("semget(IPC_PRIVATE, %d, 0%o)\n", nsems, semflg);
 930 #endif
 931
 932
 933         SYSV_SEM_SUBSYS_LOCK();
 934
 935
 936         if (key != IPC_PRIVATE) {
 937                 for (semid = 0; semid < seminfo.semmni; semid++) {
 938                         if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) &&
 939                             sema[semid].u.sem_perm._key == key)
 940                                 break;
 941                 }
 942                 if (semid < seminfo.semmni) {
 943 #ifdef SEM_DEBUG
 944                         printf("found public key\n");
 945 #endif
 946                         if ((eval = ipcperm(cred, &sema[semid].u.sem_perm,
 947                             semflg & 0700)))
 948                                 goto semgetout;
 949                         if (nsems < 0 || sema[semid].u.sem_nsems < nsems) {
 950 #ifdef SEM_DEBUG
 951                                 printf("too small\n");
 952 #endif
 953                                 eval = EINVAL;
 954                                 goto semgetout;
 955                         }
 956                         if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) {
 957 #ifdef SEM_DEBUG
 958                                 printf("not exclusive\n");
 959 #endif
 960                                 eval = EEXIST;
 961                                 goto semgetout;
 962                         }
 963 #if CONFIG_MACF
 964                         eval = mac_sysvsem_check_semget(cred, &sema[semid]);
 965                         if (eval)
 966                                 goto semgetout;
 967 #endif
 968                         goto found;
 969                 }
 970         }
 971
 972 #ifdef SEM_DEBUG
 973         printf("need to allocate an id for the request\n");
 974 #endif
 975         if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) {
 976                 if (nsems <= 0 || nsems > limitseminfo.semmsl) {
 977 #ifdef SEM_DEBUG
 978                         printf("nsems out of range (0<%d<=%d)\n", nsems,
 979                             seminfo.semmsl);
 980 #endif
 981                         eval = EINVAL;
 982                         goto semgetout;
 983                 }
 984                 if (nsems > seminfo.semmns - semtot) {
 985 #ifdef SEM_DEBUG
 986                         printf("not enough semaphores left (need %d, got %d)\n",
 987                             nsems, seminfo.semmns - semtot);
 988 #endif
 989                         if (!grow_sem_pool(semtot + nsems)) {
 990 #ifdef SEM_DEBUG
 991                                 printf("failed to grow the sem array\n");
 992 #endif
 993                                 eval = ENOSPC;
 994                                 goto semgetout;
 995                         }
 996                 }
 997                 for (semid = 0; semid < seminfo.semmni; semid++) {
 998                         if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) == 0)
 999                                 break;
1000                 }
1001                 if (semid == seminfo.semmni) {
1002 #ifdef SEM_DEBUG
1003                         printf("no more id's available\n");
1004 #endif
1005                         if (!grow_sema_array(seminfo.semmni + 1))
1006                         {
1007 #ifdef SEM_DEBUG
1008                                 printf("failed to grow sema array\n");
1009 #endif
1010                                 eval = ENOSPC;
1011                                 goto semgetout;
1012                         }
1013                 }
1014 #ifdef SEM_DEBUG
1015                 printf("semid %d is available\n", semid);
1016 #endif
1017                 sema[semid].u.sem_perm._key = key;
1018                 sema[semid].u.sem_perm.cuid = kauth_cred_getuid(cred);
1019                 sema[semid].u.sem_perm.uid = kauth_cred_getuid(cred);
1020                 sema[semid].u.sem_perm.cgid = kauth_cred_getgid(cred);
1021                 sema[semid].u.sem_perm.gid = kauth_cred_getgid(cred);
1022                 sema[semid].u.sem_perm.mode = (semflg & 0777) | SEM_ALLOC;
1023                 sema[semid].u.sem_perm._seq =
1024                     (sema[semid].u.sem_perm._seq + 1) & 0x7fff;
1025                 sema[semid].u.sem_nsems = nsems;
1026                 sema[semid].u.sem_otime = 0;
1027                 sema[semid].u.sem_ctime = sysv_semtime();
1028                 sema[semid].u.sem_base = &sem_pool[semtot];
1029                 semtot += nsems;
1030                 bzero(sema[semid].u.sem_base,
1031                     sizeof(sema[semid].u.sem_base[0])*nsems);
1032 #if CONFIG_MACF
1033                 mac_sysvsem_label_associate(cred, &sema[semid]);
1034 #endif
1035 #ifdef SEM_DEBUG
1036                 printf("sembase = 0x%x, next = 0x%x\n", sema[semid].u.sem_base,
1037                     &sem_pool[semtot]);
1038 #endif
1039         } else {
1040 #ifdef SEM_DEBUG
1041                 printf("didn't find it and wasn't asked to create it\n");
1042 #endif
1043                 eval = ENOENT;
1044                 goto semgetout;
1045         }
1046
1047 found:
1048         *retval = IXSEQ_TO_IPCID(semid, sema[semid].u.sem_perm);
1049         AUDIT_ARG(svipc_id, *retval);
1050 #ifdef SEM_DEBUG
1051         printf("semget is done, returning %d\n", *retval);
1052 #endif
1053         eval = 0;
1054
1055 semgetout:
1056         SYSV_SEM_SUBSYS_UNLOCK();
1057         return(eval);
1058 }
1059
1060 int
1061 semop(struct proc *p, struct semop_args *uap, int32_t *retval)
1062 {
1063         int semid = uap->semid;
1064         int nsops = uap->nsops;
1065         struct sembuf sops[seminfo.semopm];
1066         register struct semid_kernel *semakptr;
1067         register struct sembuf *sopptr = NULL;  /* protected by 'semptr' */
1068         register struct sem *semptr = NULL;     /* protected by 'if' */
1069         int supidx = -1;
1070         int i, j, eval;
1071         int do_wakeup, do_undos;
1072
1073         AUDIT_ARG(svipc_id, uap->semid);
1074
1075         SYSV_SEM_SUBSYS_LOCK();
1076
1077 #ifdef SEM_DEBUG
1078         printf("call to semop(%d, 0x%x, %d)\n", semid, sops, nsops);
1079 #endif
1080
1081         semid = IPCID_TO_IX(semid);     /* Convert back to zero origin */
1082
1083         if (semid < 0 || semid >= seminfo.semmni) {
1084                 eval = EINVAL;
1085                 goto semopout;
1086         }
1087
1088         semakptr = &sema[semid];
1089         if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0) {
1090                 eval = EINVAL;
1091                 goto semopout;
1092         }
1093         if (semakptr->u.sem_perm._seq != IPCID_TO_SEQ(uap->semid)) {
1094                 eval = EINVAL;
1095                 goto semopout;
1096         }
1097
1098         if ((eval = ipcperm(kauth_cred_get(), &semakptr->u.sem_perm, IPC_W))) {
1099 #ifdef SEM_DEBUG
1100                 printf("eval = %d from ipaccess\n", eval);
1101 #endif
1102                 goto semopout;
1103         }
1104
1105         if (nsops < 0 || nsops > seminfo.semopm) {
1106 #ifdef SEM_DEBUG
1107                 printf("too many sops (max=%d, nsops=%d)\n",
1108                     seminfo.semopm, nsops);
1109 #endif
1110                 eval = E2BIG;
1111                 goto semopout;
1112         }
1113
1114         /*  OK for LP64, since sizeof(struct sembuf) is currently invariant */
1115         if ((eval = copyin(uap->sops, &sops, nsops * sizeof(struct sembuf))) != 0) {
1116 #ifdef SEM_DEBUG
1117                 printf("eval = %d from copyin(%08x, %08x, %ld)\n", eval,
1118                     uap->sops, &sops, nsops * sizeof(struct sembuf));
1119 #endif
1120                 goto semopout;
1121         }
1122
1123 #if CONFIG_MACF
1124         /*
1125          * Initial pass thru sops to see what permissions are needed.
1126          */
1127         j = 0;          /* permission needed */
1128         for (i = 0; i < nsops; i++)
1129                 j |= (sops[i].sem_op == 0) ? SEM_R : SEM_A;
1130
1131         /*
1132          * The MAC hook checks whether the thread has read (and possibly
1133          * write) permissions to the semaphore array based on the
1134          * sopptr->sem_op value.
1135          */
1136         eval = mac_sysvsem_check_semop(kauth_cred_get(), semakptr, j);
1137         if (eval)
1138                 goto semopout;
1139 #endif
1140
1141         /*
1142          * Loop trying to satisfy the vector of requests.
1143          * If we reach a point where we must wait, any requests already
1144          * performed are rolled back and we go to sleep until some other
1145          * process wakes us up.  At this point, we start all over again.
1146          *
1147          * This ensures that from the perspective of other tasks, a set
1148          * of requests is atomic (never partially satisfied).
1149          */
1150         do_undos = 0;
1151
1152         for (;;) {
1153                 do_wakeup = 0;
1154
1155                 for (i = 0; i < nsops; i++) {
1156                         sopptr = &sops[i];
1157
1158                         if (sopptr->sem_num >= semakptr->u.sem_nsems) {
1159                                 eval = EFBIG;
1160                                 goto semopout;
1161                         }
1162
1163                         semptr = &semakptr->u.sem_base[sopptr->sem_num];
1164
1165 #ifdef SEM_DEBUG
1166                         printf("semop:  semakptr=%x, sem_base=%x, semptr=%x, sem[%d]=%d : op=%d, flag=%s\n",
1167                             semakptr, semakptr->u.sem_base, semptr,
1168                             sopptr->sem_num, semptr->semval, sopptr->sem_op,
1169                             (sopptr->sem_flg & IPC_NOWAIT) ? "nowait" : "wait");
1170 #endif
1171
1172                         if (sopptr->sem_op < 0) {
1173                                 if (semptr->semval + sopptr->sem_op < 0) {
1174 #ifdef SEM_DEBUG
1175                                         printf("semop:  can't do it now\n");
1176 #endif
1177                                         break;
1178                                 } else {
1179                                         semptr->semval += sopptr->sem_op;
1180                                         if (semptr->semval == 0 &&
1181                                             semptr->semzcnt > 0)
1182                                                 do_wakeup = 1;
1183                                 }
1184                                 if (sopptr->sem_flg & SEM_UNDO)
1185                                         do_undos = 1;
1186                         } else if (sopptr->sem_op == 0) {
1187                                 if (semptr->semval > 0) {
1188 #ifdef SEM_DEBUG
1189                                         printf("semop:  not zero now\n");
1190 #endif
1191                                         break;
1192                                 }
1193                         } else {
1194                                 if (semptr->semncnt > 0)
1195                                         do_wakeup = 1;
1196                                 semptr->semval += sopptr->sem_op;
1197                                 if (sopptr->sem_flg & SEM_UNDO)
1198                                         do_undos = 1;
1199                         }
1200                 }
1201
1202                 /*
1203                  * Did we get through the entire vector?
1204                  */
1205                 if (i >= nsops)
1206                         goto done;
1207
1208                 /*
1209                  * No ... rollback anything that we've already done
1210                  */
1211 #ifdef SEM_DEBUG
1212                 printf("semop:  rollback 0 through %d\n", i-1);
1213 #endif
1214                 for (j = 0; j < i; j++)
1215                         semakptr->u.sem_base[sops[j].sem_num].semval -=
1216                             sops[j].sem_op;
1217
1218                 /*
1219                  * If the request that we couldn't satisfy has the
1220                  * NOWAIT flag set then return with EAGAIN.
1221                  */
1222                 if (sopptr->sem_flg & IPC_NOWAIT) {
1223                         eval = EAGAIN;
1224                         goto semopout;
1225                 }
1226
1227                 if (sopptr->sem_op == 0)
1228                         semptr->semzcnt++;
1229                 else
1230                         semptr->semncnt++;
1231
1232 #ifdef SEM_DEBUG
1233                 printf("semop:  good night!\n");
1234 #endif
1235                 /* Release our lock on the semaphore subsystem so
1236                  * another thread can get at the semaphore we are
1237                  * waiting for. We will get the lock back after we
1238                  * wake up.
1239                  */
1240                 eval = msleep((caddr_t)semakptr, &sysv_sem_subsys_mutex , (PZERO - 4) | PCATCH,
1241                     "semwait", 0);
1242
1243 #ifdef SEM_DEBUG
1244                 printf("semop:  good morning (eval=%d)!\n", eval);
1245 #endif
1246                 if (eval != 0) {
1247                         eval = EINTR;
1248                 }
1249
1250                 /*
1251                  * IMPORTANT: while we were asleep, the semaphore array might
1252                  * have been reallocated somewhere else (see grow_sema_array()).
1253                  * When we wake up, we have to re-lookup the semaphore
1254                  * structures and re-validate them.
1255                  */
1256
1257                 semptr = NULL;
1258
1259                 /*
1260                  * Make sure that the semaphore still exists
1261                  *
1262                  * XXX POSIX: Third test this 'if' and 'EINTR' precedence may
1263                  * fail testing; if so, we will need to revert this code.
1264                  */
1265                 semakptr = &sema[semid];   /* sema may have been reallocated */
1266                 if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 ||
1267                     semakptr->u.sem_perm._seq != IPCID_TO_SEQ(uap->semid) ||
1268                     sopptr->sem_num >= semakptr->u.sem_nsems) {
1269                         /* The man page says to return EIDRM. */
1270                         /* Unfortunately, BSD doesn't define that code! */
1271                         if (eval == EINTR) {
1272                                 /*
1273                                  * EINTR takes precedence over the fact that
1274                                  * the semaphore disappeared while we were
1275                                  * sleeping...
1276                                  */
1277                         } else {
1278 #ifdef EIDRM
1279                                 eval = EIDRM;
1280 #else
1281                                 eval = EINVAL;          /* Ancient past */
1282 #endif
1283                         }
1284                         goto semopout;
1285                 }
1286
1287                 /*
1288                  * The semaphore is still alive.  Readjust the count of
1289                  * waiting processes. semptr needs to be recomputed
1290                  * because the sem[] may have been reallocated while
1291                  * we were sleeping, updating our sem_base pointer.
1292                  */
1293                 semptr = &semakptr->u.sem_base[sopptr->sem_num];
1294                 if (sopptr->sem_op == 0)
1295                         semptr->semzcnt--;
1296                 else
1297                         semptr->semncnt--;
1298
1299                 if (eval != 0) { /* EINTR */
1300                         goto semopout;
1301                 }
1302         }
1303
1304 done:
1305         /*
1306          * Process any SEM_UNDO requests.
1307          */
1308         if (do_undos) {
1309                 for (i = 0; i < nsops; i++) {
1310                         /*
1311                          * We only need to deal with SEM_UNDO's for non-zero
1312                          * op's.
1313                          */
1314                         int adjval;
1315
1316                         if ((sops[i].sem_flg & SEM_UNDO) == 0)
1317                                 continue;
1318                         adjval = sops[i].sem_op;
1319                         if (adjval == 0)
1320                                 continue;
1321                         eval = semundo_adjust(p, &supidx, semid,
1322                             sops[i].sem_num, -adjval);
1323                         if (eval == 0)
1324                                 continue;
1325
1326                         /*
1327                          * Oh-Oh!  We ran out of either sem_undo's or undo's.
1328                          * Rollback the adjustments to this point and then
1329                          * rollback the semaphore ups and down so we can return
1330                          * with an error with all structures restored.  We
1331                          * rollback the undo's in the exact reverse order that
1332                          * we applied them.  This guarantees that we won't run
1333                          * out of space as we roll things back out.
1334                          */
1335                         for (j = i - 1; j >= 0; j--) {
1336                                 if ((sops[j].sem_flg & SEM_UNDO) == 0)
1337                                         continue;
1338                                 adjval = sops[j].sem_op;
1339                                 if (adjval == 0)
1340                                         continue;
1341                                 if (semundo_adjust(p, &supidx, semid,
1342                                     sops[j].sem_num, adjval) != 0)
1343                                         panic("semop - can't undo undos");
1344                         }
1345
1346                         for (j = 0; j < nsops; j++)
1347                                 semakptr->u.sem_base[sops[j].sem_num].semval -=
1348                                     sops[j].sem_op;
1349
1350 #ifdef SEM_DEBUG
1351                         printf("eval = %d from semundo_adjust\n", eval);
1352 #endif
1353                         goto semopout;
1354                 } /* loop through the sops */
1355         } /* if (do_undos) */
1356
1357         /* We're definitely done - set the sempid's */
1358         for (i = 0; i < nsops; i++) {
1359                 sopptr = &sops[i];
1360                 semptr = &semakptr->u.sem_base[sopptr->sem_num];
1361                 semptr->sempid = p->p_pid;
1362         }
1363         semakptr->u.sem_otime = sysv_semtime();
1364
1365         if (do_wakeup) {
1366 #ifdef SEM_DEBUG
1367                 printf("semop:  doing wakeup\n");
1368 #ifdef SEM_WAKEUP
1369                 sem_wakeup((caddr_t)semakptr);
1370 #else
1371                 wakeup((caddr_t)semakptr);
1372 #endif
1373                 printf("semop:  back from wakeup\n");
1374 #else
1375                 wakeup((caddr_t)semakptr);
1376 #endif
1377         }
1378 #ifdef SEM_DEBUG
1379         printf("semop:  done\n");
1380 #endif
1381         *retval = 0;
1382         eval = 0;
1383 semopout:
1384         SYSV_SEM_SUBSYS_UNLOCK();
1385         return(eval);
1386 }
1387
1388 /*
1389  * Go through the undo structures for this process and apply the adjustments to
1390  * semaphores.
1391  */
1392 void
1393 semexit(struct proc *p)
1394 {
1395         register struct sem_undo *suptr = NULL;
1396         int suidx;
1397         int *supidx;
1398         int did_something;
1399
1400         /* If we have not allocated our semaphores yet there can't be
1401          * anything to undo, but we need the lock to prevent
1402          * dynamic memory race conditions.
1403          */
1404         SYSV_SEM_SUBSYS_LOCK();
1405
1406         if (!sem_pool)
1407         {
1408                 SYSV_SEM_SUBSYS_UNLOCK();
1409                 return;
1410         }
1411         did_something = 0;
1412
1413         /*
1414          * Go through the chain of undo vectors looking for one
1415          * associated with this process.
1416          */
1417
1418         for (supidx = &semu_list_idx; (suidx = *supidx) != -1;
1419             supidx = &suptr->un_next_idx) {
1420                 suptr = SEMU(suidx);
1421                 if (suptr->un_proc == p)
1422                         break;
1423         }
1424
1425         if (suidx == -1)
1426                 goto unlock;
1427
1428 #ifdef SEM_DEBUG
1429         printf("proc @%08x has undo structure with %d entries\n", p,
1430             suptr->un_cnt);
1431 #endif
1432
1433         /*
1434          * If there are any active undo elements then process them.
1435          */
1436         if (suptr->un_cnt > 0) {
1437                 while (suptr->un_ent != NULL) {
1438                         struct undo *sueptr;
1439                         int semid;
1440                         int semnum;
1441                         int adjval;
1442                         struct semid_kernel *semakptr;
1443
1444                         sueptr = suptr->un_ent;
1445                         semid = sueptr->une_id;
1446                         semnum = sueptr->une_num;
1447                         adjval = sueptr->une_adjval;
1448
1449                         semakptr = &sema[semid];
1450                         if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0)
1451                                 panic("semexit - semid not allocated");
1452                         if (semnum >= semakptr->u.sem_nsems)
1453                                 panic("semexit - semnum out of range");
1454
1455 #ifdef SEM_DEBUG
1456                         printf("semexit:  %08x id=%d num=%d(adj=%d) ; sem=%d\n",
1457                                suptr->un_proc,
1458                                semid,
1459                                semnum,
1460                                adjval,
1461                                semakptr->u.sem_base[semnum].semval);
1462 #endif
1463
1464                         if (adjval < 0) {
1465                                 if (semakptr->u.sem_base[semnum].semval < -adjval)
1466                                         semakptr->u.sem_base[semnum].semval = 0;
1467                                 else
1468                                         semakptr->u.sem_base[semnum].semval +=
1469                                             adjval;
1470                         } else
1471                                 semakptr->u.sem_base[semnum].semval += adjval;
1472
1473                 /* Maybe we should build a list of semakptr's to wake
1474                  * up, finish all access to data structures, release the
1475                  * subsystem lock, and wake all the processes.  Something
1476                  * to think about.
1477                  */
1478 #ifdef SEM_WAKEUP
1479                         sem_wakeup((caddr_t)semakptr);
1480 #else
1481                         wakeup((caddr_t)semakptr);
1482 #endif
1483 #ifdef SEM_DEBUG
1484                         printf("semexit:  back from wakeup\n");
1485 #endif
1486                         suptr->un_cnt--;
1487                         suptr->un_ent = sueptr->une_next;
1488                         FREE(sueptr, M_SYSVSEM);
1489                         sueptr = NULL;
1490                 }
1491         }
1492
1493         /*
1494          * Deallocate the undo vector.
1495          */
1496 #ifdef SEM_DEBUG
1497         printf("removing vector\n");
1498 #endif
1499         suptr->un_proc = NULL;
1500         *supidx = suptr->un_next_idx;
1501
1502 unlock:
1503         /*
1504          * There is a semaphore leak (i.e. memory leak) in this code.
1505          * We should be deleting the IPC_PRIVATE semaphores when they are
1506          * no longer needed, and we dont. We would have to track which processes
1507          * know about which IPC_PRIVATE semaphores, updating the list after
1508          * every fork.  We can't just delete them semaphore when the process
1509          * that created it dies, because that process may well have forked
1510          * some children.  So we need to wait until all of it's children have
1511          * died, and so on.  Maybe we should tag each IPC_PRIVATE sempahore
1512          * with the creating group ID, count the number of processes left in
1513          * that group, and delete the semaphore when the group is gone.
1514          * Until that code gets implemented we will leak IPC_PRIVATE semaphores.
1515          * There is an upper bound on the size of our semaphore array, so
1516          * leaking the semaphores should not work as a DOS attack.
1517          *
1518          * Please note that the original BSD code this file is based on had the
1519          * same leaky semaphore problem.
1520          */
1521
1522         SYSV_SEM_SUBSYS_UNLOCK();
1523 }
1524
1525
1526 /* (struct sysctl_oid *oidp, void *arg1, int arg2, \
1527         struct sysctl_req *req) */
1528 static int
1529 sysctl_seminfo(__unused struct sysctl_oid *oidp, void *arg1,
1530         __unused int arg2, struct sysctl_req *req)
1531 {
1532         int error = 0;
1533
1534         error = SYSCTL_OUT(req, arg1, sizeof(int));
1535         if (error || req->newptr == USER_ADDR_NULL)
1536                 return(error);
1537
1538         SYSV_SEM_SUBSYS_LOCK();
1539
1540         /* Set the values only if shared memory is not initialised */
1541         if ((sem_pool == NULL) &&
1542                 (sema == NULL) &&
1543                 (semu == NULL) &&
1544                 (semu_list_idx == -1)) {
1545                         if ((error = SYSCTL_IN(req, arg1, sizeof(int)))) {
1546                                 goto out;
1547                         }
1548         } else
1549                 error = EINVAL;
1550 out:
1551         SYSV_SEM_SUBSYS_UNLOCK();
1552         return(error);
1553
1554 }
1555
1556 /* SYSCTL_NODE(_kern, KERN_SYSV, sysv, CTLFLAG_RW, 0, "SYSV"); */
1557 extern struct sysctl_oid_list sysctl__kern_sysv_children;
1558 SYSCTL_PROC(_kern_sysv, OID_AUTO, semmni, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1559     &limitseminfo.semmni, 0, &sysctl_seminfo ,"I","semmni");
1560
1561 SYSCTL_PROC(_kern_sysv, OID_AUTO, semmns, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1562     &limitseminfo.semmns, 0, &sysctl_seminfo ,"I","semmns");
1563
1564 SYSCTL_PROC(_kern_sysv, OID_AUTO, semmnu, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1565     &limitseminfo.semmnu, 0, &sysctl_seminfo ,"I","semmnu");
1566
1567 SYSCTL_PROC(_kern_sysv, OID_AUTO, semmsl, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1568     &limitseminfo.semmsl, 0, &sysctl_seminfo ,"I","semmsl");
1569
1570 SYSCTL_PROC(_kern_sysv, OID_AUTO, semume, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1571     &limitseminfo.semume, 0, &sysctl_seminfo ,"I","semume");
1572
1573
1574 static int
1575 IPCS_sem_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1,
1576         __unused int arg2, struct sysctl_req *req)
1577 {
1578         int error;
1579         int cursor;
1580         union {
1581                 struct user32_IPCS_command u32;
1582                 struct user_IPCS_command u64;
1583         } ipcs;
1584         struct user32_semid_ds semid_ds32;      /* post conversion, 32 bit version */
1585         struct user64_semid_ds semid_ds64;      /* post conversion, 64 bit version */
1586         void *semid_dsp;
1587         size_t ipcs_sz;
1588         size_t semid_ds_sz;
1589         struct proc *p = current_proc();
1590
1591         if (IS_64BIT_PROCESS(p)) {
1592                 ipcs_sz = sizeof(struct user_IPCS_command);
1593                 semid_ds_sz = sizeof(struct user64_semid_ds);
1594         } else {
1595                 ipcs_sz = sizeof(struct user32_IPCS_command);
1596                 semid_ds_sz = sizeof(struct user32_semid_ds);
1597         }
1598
1599         /* Copy in the command structure */
1600         if ((error = SYSCTL_IN(req, &ipcs, ipcs_sz)) != 0) {
1601                 return(error);
1602         }
1603
1604         if (!IS_64BIT_PROCESS(p)) /* convert in place */
1605                 ipcs.u64.ipcs_data = CAST_USER_ADDR_T(ipcs.u32.ipcs_data);
1606
1607         /* Let us version this interface... */
1608         if (ipcs.u64.ipcs_magic != IPCS_MAGIC) {
1609                 return(EINVAL);
1610         }
1611
1612         SYSV_SEM_SUBSYS_LOCK();
1613         switch(ipcs.u64.ipcs_op) {
1614         case IPCS_SEM_CONF:     /* Obtain global configuration data */
1615                 if (ipcs.u64.ipcs_datalen != sizeof(struct seminfo)) {
1616                         error = ERANGE;
1617                         break;
1618                 }
1619                 if (ipcs.u64.ipcs_cursor != 0) {        /* fwd. compat. */
1620                         error = EINVAL;
1621                         break;
1622                 }
1623                 error = copyout(&seminfo, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen);
1624                 break;
1625
1626         case IPCS_SEM_ITER:     /* Iterate over existing segments */
1627                 cursor = ipcs.u64.ipcs_cursor;
1628                 if (cursor < 0 || cursor >= seminfo.semmni) {
1629                         error = ERANGE;
1630                         break;
1631                 }
1632                 if (ipcs.u64.ipcs_datalen != (int)semid_ds_sz ) {
1633                         error = EINVAL;
1634                         break;
1635                 }
1636                 for( ; cursor < seminfo.semmni; cursor++) {
1637                         if (sema[cursor].u.sem_perm.mode & SEM_ALLOC)
1638                                 break;
1639                         continue;
1640                 }
1641                 if (cursor == seminfo.semmni) {
1642                         error = ENOENT;
1643                         break;
1644                 }
1645
1646                 semid_dsp = &sema[cursor].u;    /* default: 64 bit */
1647
1648                 /*
1649                  * If necessary, convert the 64 bit kernel segment
1650                  * descriptor to a 32 bit user one.
1651                  */
1652                 if (!IS_64BIT_PROCESS(p)) {
1653                         bzero(&semid_ds32, sizeof(semid_ds32));
1654                         semid_ds_kernelto32(semid_dsp, &semid_ds32);
1655                         semid_dsp = &semid_ds32;
1656                 } else {
1657                         bzero(&semid_ds64, sizeof(semid_ds64));
1658                         semid_ds_kernelto64(semid_dsp, &semid_ds64);
1659                         semid_dsp = &semid_ds64;
1660                 }
1661
1662                 error = copyout(semid_dsp, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen);
1663                 if (!error) {
1664                         /* update cursor */
1665                         ipcs.u64.ipcs_cursor = cursor + 1;
1666
1667                         if (!IS_64BIT_PROCESS(p))       /* convert in place */
1668                                 ipcs.u32.ipcs_data = CAST_DOWN_EXPLICIT(user32_addr_t,ipcs.u64.ipcs_data);
1669
1670                         error = SYSCTL_OUT(req, &ipcs, ipcs_sz);
1671                 }
1672                 break;
1673
1674         default:
1675                 error = EINVAL;
1676                 break;
1677         }
1678         SYSV_SEM_SUBSYS_UNLOCK();
1679         return(error);
1680 }
1681
1682 SYSCTL_DECL(_kern_sysv_ipcs);
1683 SYSCTL_PROC(_kern_sysv_ipcs, OID_AUTO, sem, CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_LOCKED,
1684         0, 0, IPCS_sem_sysctl,
1685         "S,IPCS_sem_command",
1686         "ipcs sem command interface");
1687
1688 #endif /* SYSV_SEM */