bsd/kern/sysv_sem.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23 /*
  24  * Implementation of SVID semaphores
  25  *
  26  * Author:  Daniel Boulet
  27  *
  28  * This software is provided ``AS IS'' without any warranties of any kind.
  29  */
  30 /*
  31  * John Bellardo modified the implementation for Darwin. 12/2000
  32  */
  33
  34 #include <sys/param.h>
  35 #include <sys/systm.h>
  36 #include <sys/kernel.h>
  37 #include <sys/proc_internal.h>
  38 #include <sys/kauth.h>
  39 #include <sys/sem_internal.h>
  40 #include <sys/malloc.h>
  41 #include <mach/mach_types.h>
  42
  43 #include <sys/filedesc.h>
  44 #include <sys/file_internal.h>
  45 #include <sys/sysctl.h>
  46 #include <sys/ipcs.h>
  47 #include <sys/sysent.h>
  48 #include <sys/sysproto.h>
  49
  50 #include <bsm/audit_kernel.h>
  51
  52
  53 /* Uncomment this line to see the debugging output */
  54 /* #define SEM_DEBUG */
  55
  56 #define M_SYSVSEM       M_TEMP
  57
  58
  59 /* Hard system limits to avoid resource starvation / DOS attacks.
  60  * These are not needed if we can make the semaphore pages swappable.
  61  */
  62 static struct seminfo limitseminfo = {
  63         SEMMAP,        /* # of entries in semaphore map */
  64         SEMMNI,        /* # of semaphore identifiers */
  65         SEMMNS,        /* # of semaphores in system */
  66         SEMMNU,        /* # of undo structures in system */
  67         SEMMSL,        /* max # of semaphores per id */
  68         SEMOPM,        /* max # of operations per semop call */
  69         SEMUME,        /* max # of undo entries per process */
  70         SEMUSZ,        /* size in bytes of undo structure */
  71         SEMVMX,        /* semaphore maximum value */
  72         SEMAEM         /* adjust on exit max value */
  73 };
  74
  75 /* Current system allocations.  We use this structure to track how many
  76  * resources we have allocated so far.  This way we can set large hard limits
  77  * and not allocate the memory for them up front.
  78  */
  79 struct seminfo seminfo = {
  80         SEMMAP, /* Unused, # of entries in semaphore map */
  81         0,      /* # of semaphore identifiers */
  82         0,      /* # of semaphores in system */
  83         0,      /* # of undo entries in system */
  84         SEMMSL, /* max # of semaphores per id */
  85         SEMOPM, /* max # of operations per semop call */
  86         SEMUME, /* max # of undo entries per process */
  87         SEMUSZ, /* size in bytes of undo structure */
  88         SEMVMX, /* semaphore maximum value */
  89         SEMAEM  /* adjust on exit max value */
  90 };
  91
  92
  93 static struct sem_undo *semu_alloc(struct proc *p);
  94 static int semundo_adjust(struct proc *p, struct sem_undo **supptr,
  95                 int semid, int semnum, int adjval);
  96 static void semundo_clear(int semid, int semnum);
  97
  98 /* XXX casting to (sy_call_t *) is bogus, as usual. */
  99 static sy_call_t *semcalls[] = {
 100         (sy_call_t *)semctl, (sy_call_t *)semget,
 101         (sy_call_t *)semop, (sy_call_t *)semconfig
 102 };
 103
 104 static int              semtot = 0;             /* # of used semaphores */
 105 struct user_semid_ds    *sema = NULL;           /* semaphore id pool */
 106 struct sem              *sem_pool =  NULL;      /* semaphore pool */
 107 static struct sem_undo  *semu_list = NULL;      /* active undo structures */
 108 struct sem_undo         *semu = NULL;           /* semaphore undo pool */
 109
 110
 111 void sysv_sem_lock_init(void);
 112 static lck_grp_t       *sysv_sem_subsys_lck_grp;
 113 static lck_grp_attr_t  *sysv_sem_subsys_lck_grp_attr;
 114 static lck_attr_t      *sysv_sem_subsys_lck_attr;
 115 static lck_mtx_t        sysv_sem_subsys_mutex;
 116
 117 #define SYSV_SEM_SUBSYS_LOCK() lck_mtx_lock(&sysv_sem_subsys_mutex)
 118 #define SYSV_SEM_SUBSYS_UNLOCK() lck_mtx_unlock(&sysv_sem_subsys_mutex)
 119
 120
 121 __private_extern__ void
 122 sysv_sem_lock_init( void )
 123 {
 124
 125     sysv_sem_subsys_lck_grp_attr = lck_grp_attr_alloc_init();
 126     lck_grp_attr_setstat(sysv_sem_subsys_lck_grp_attr);
 127
 128     sysv_sem_subsys_lck_grp = lck_grp_alloc_init("sysv_shm_subsys_lock", sysv_sem_subsys_lck_grp_attr);
 129
 130     sysv_sem_subsys_lck_attr = lck_attr_alloc_init();
 131     lck_attr_setdebug(sysv_sem_subsys_lck_attr);
 132     lck_mtx_init(&sysv_sem_subsys_mutex, sysv_sem_subsys_lck_grp, sysv_sem_subsys_lck_attr);
 133 }
 134
 135 static __inline__ user_time_t
 136 sysv_semtime(void)
 137 {
 138         struct timeval  tv;
 139         microtime(&tv);
 140         return (tv.tv_sec);
 141 }
 142
 143 /*
 144  * XXX conversion of internal user_time_t to external tume_t loses
 145  * XXX precision; not an issue for us now, since we are only ever
 146  * XXX setting 32 bits worth of time into it.
 147  *
 148  * pad field contents are not moved correspondingly; contents will be lost
 149  *
 150  * NOTE: Source and target may *NOT* overlap! (target is smaller)
 151  */
 152 static void
 153 semid_ds_64to32(struct user_semid_ds *in, struct semid_ds *out)
 154 {
 155         out->sem_perm = in->sem_perm;
 156         out->sem_base = (__int32_t)in->sem_base;
 157         out->sem_nsems = in->sem_nsems;
 158         out->sem_otime = in->sem_otime;         /* XXX loses precision */
 159         out->sem_ctime = in->sem_ctime;         /* XXX loses precision */
 160 }
 161
 162 /*
 163  * pad field contents are not moved correspondingly; contents will be lost
 164  *
 165  * NOTE: Source and target may are permitted to overlap! (source is smaller);
 166  * this works because we copy fields in order from the end of the struct to
 167  * the beginning.
 168  *
 169  * XXX use CAST_USER_ADDR_T() for lack of a CAST_USER_TIME_T(); net effect
 170  * XXX is the same.
 171  */
 172 static void
 173 semid_ds_32to64(struct semid_ds *in, struct user_semid_ds *out)
 174 {
 175         out->sem_ctime = in->sem_ctime;
 176         out->sem_otime = in->sem_otime;
 177         out->sem_nsems = in->sem_nsems;
 178         out->sem_base = (void *)in->sem_base;
 179         out->sem_perm = in->sem_perm;
 180 }
 181
 182
 183 /*
 184  * Entry point for all SEM calls
 185  *
 186  * In Darwin this is no longer the entry point.  It will be removed after
 187  *  the code has been tested better.
 188  */
 189 /* XXX actually varargs. */
 190 int
 191 semsys(struct proc *p, struct semsys_args *uap, register_t *retval)
 192 {
 193
 194         /* The individual calls handling the locking now */
 195
 196         if (uap->which >= sizeof(semcalls)/sizeof(semcalls[0]))
 197                 return (EINVAL);
 198         return ((*semcalls[uap->which])(p, &uap->a2, retval));
 199 }
 200
 201 /*
 202  * Lock or unlock the entire semaphore facility.
 203  *
 204  * This will probably eventually evolve into a general purpose semaphore
 205  * facility status enquiry mechanism (I don't like the "read /dev/kmem"
 206  * approach currently taken by ipcs and the amount of info that we want
 207  * to be able to extract for ipcs is probably beyond what the capability
 208  * of the getkerninfo facility.
 209  *
 210  * At the time that the current version of semconfig was written, ipcs is
 211  * the only user of the semconfig facility.  It uses it to ensure that the
 212  * semaphore facility data structures remain static while it fishes around
 213  * in /dev/kmem.
 214  */
 215
 216 int
 217 semconfig(__unused struct proc *p, struct semconfig_args *uap, register_t *retval)
 218 {
 219         int eval = 0;
 220
 221         switch (uap->flag) {
 222         case SEM_CONFIG_FREEZE:
 223                 SYSV_SEM_SUBSYS_LOCK();
 224                 break;
 225
 226         case SEM_CONFIG_THAW:
 227                 SYSV_SEM_SUBSYS_UNLOCK();
 228                 break;
 229
 230         default:
 231                 printf("semconfig: unknown flag parameter value (%d) - ignored\n",
 232                     uap->flag);
 233                 eval = EINVAL;
 234                 break;
 235         }
 236
 237         *retval = 0;
 238         return(eval);
 239 }
 240
 241 /*
 242  * Expand the semu array to the given capacity.  If the expansion fails
 243  * return 0, otherwise return 1.
 244  *
 245  * Assumes we already have the subsystem lock.
 246  */
 247 static int
 248 grow_semu_array(int newSize)
 249 {
 250         register int i;
 251         register struct sem_undo *newSemu;
 252
 253         if (newSize <= seminfo.semmnu)
 254                 return 1;
 255         if (newSize > limitseminfo.semmnu) /* enforce hard limit */
 256         {
 257 #ifdef SEM_DEBUG
 258                 printf("undo structure hard limit of %d reached, requested %d\n",
 259                         limitseminfo.semmnu, newSize);
 260 #endif
 261                 return 0;
 262         }
 263         newSize = (newSize/SEMMNU_INC + 1) * SEMMNU_INC;
 264         newSize = newSize > limitseminfo.semmnu ? limitseminfo.semmnu : newSize;
 265
 266 #ifdef SEM_DEBUG
 267         printf("growing semu[] from %d to %d\n", seminfo.semmnu, newSize);
 268 #endif
 269         MALLOC(newSemu, struct sem_undo *, sizeof (struct sem_undo) * newSize,
 270                M_SYSVSEM, M_WAITOK | M_ZERO);
 271         if (NULL == newSemu)
 272         {
 273 #ifdef SEM_DEBUG
 274                 printf("allocation failed.  no changes made.\n");
 275 #endif
 276                 return 0;
 277         }
 278
 279         /* copy the old data to the new array */
 280         for (i = 0; i < seminfo.semmnu; i++)
 281         {
 282                 newSemu[i] = semu[i];
 283         }
 284         /*
 285          * The new elements (from newSemu[i] to newSemu[newSize-1]) have their
 286          * "un_proc" set to 0 (i.e. NULL) by the M_ZERO flag to MALLOC() above,
 287          * so they're already marked as "not in use".
 288          */
 289
 290         /* Clean up the old array */
 291         if (semu)
 292                 FREE(semu, M_SYSVSEM);
 293
 294         semu = newSemu;
 295         seminfo.semmnu = newSize;
 296 #ifdef SEM_DEBUG
 297         printf("expansion successful\n");
 298 #endif
 299         return 1;
 300 }
 301
 302 /*
 303  * Expand the sema array to the given capacity.  If the expansion fails
 304  * we return 0, otherwise we return 1.
 305  *
 306  * Assumes we already have the subsystem lock.
 307  */
 308 static int
 309 grow_sema_array(int newSize)
 310 {
 311         register struct user_semid_ds *newSema;
 312         register int i;
 313
 314         if (newSize <= seminfo.semmni)
 315                 return 0;
 316         if (newSize > limitseminfo.semmni) /* enforce hard limit */
 317         {
 318 #ifdef SEM_DEBUG
 319                 printf("identifier hard limit of %d reached, requested %d\n",
 320                         limitseminfo.semmni, newSize);
 321 #endif
 322                 return 0;
 323         }
 324         newSize = (newSize/SEMMNI_INC + 1) * SEMMNI_INC;
 325         newSize = newSize > limitseminfo.semmni ? limitseminfo.semmni : newSize;
 326
 327 #ifdef SEM_DEBUG
 328         printf("growing sema[] from %d to %d\n", seminfo.semmni, newSize);
 329 #endif
 330         MALLOC(newSema, struct user_semid_ds *,
 331                sizeof (struct user_semid_ds) * newSize,
 332                M_SYSVSEM, M_WAITOK | M_ZERO);
 333         if (NULL == newSema)
 334         {
 335 #ifdef SEM_DEBUG
 336                 printf("allocation failed.  no changes made.\n");
 337 #endif
 338                 return 0;
 339         }
 340
 341         /* copy over the old ids */
 342         for (i = 0; i < seminfo.semmni; i++)
 343         {
 344                 newSema[i] = sema[i];
 345                 /* This is a hack.  What we really want to be able to
 346                  * do is change the value a process is waiting on
 347                  * without waking it up, but I don't know how to do
 348                  * this with the existing code, so we wake up the
 349                  * process and let it do a lot of work to determine the
 350                  * semaphore set is really not available yet, and then
 351                  * sleep on the correct, reallocated user_semid_ds pointer.
 352                  */
 353                 if (sema[i].sem_perm.mode & SEM_ALLOC)
 354                         wakeup((caddr_t)&sema[i]);
 355         }
 356         /*
 357          * The new elements (from newSema[i] to newSema[newSize-1]) have their
 358          * "sem_base" and "sem_perm.mode" set to 0 (i.e. NULL) by the M_ZERO
 359          * flag to MALLOC() above, so they're already marked as "not in use".
 360          */
 361
 362         /* Clean up the old array */
 363         if (sema)
 364                 FREE(sema, M_SYSVSEM);
 365
 366         sema = newSema;
 367         seminfo.semmni = newSize;
 368 #ifdef SEM_DEBUG
 369         printf("expansion successful\n");
 370 #endif
 371         return 1;
 372 }
 373
 374 /*
 375  * Expand the sem_pool array to the given capacity.  If the expansion fails
 376  * we return 0 (fail), otherwise we return 1 (success).
 377  *
 378  * Assumes we already hold the subsystem lock.
 379  */
 380 static int
 381 grow_sem_pool(int new_pool_size)
 382 {
 383         struct sem *new_sem_pool = NULL;
 384         struct sem *sem_free;
 385         int i;
 386
 387         if (new_pool_size < semtot)
 388                 return 0;
 389         /* enforce hard limit */
 390         if (new_pool_size > limitseminfo.semmns) {
 391 #ifdef SEM_DEBUG
 392                 printf("semaphore hard limit of %d reached, requested %d\n",
 393                         limitseminfo.semmns, new_pool_size);
 394 #endif
 395                 return 0;
 396         }
 397
 398         new_pool_size = (new_pool_size/SEMMNS_INC + 1) * SEMMNS_INC;
 399         new_pool_size = new_pool_size > limitseminfo.semmns ? limitseminfo.semmns : new_pool_size;
 400
 401 #ifdef SEM_DEBUG
 402         printf("growing sem_pool array from %d to %d\n", seminfo.semmns, new_pool_size);
 403 #endif
 404         MALLOC(new_sem_pool, struct sem *, sizeof (struct sem) * new_pool_size,
 405                M_SYSVSEM, M_WAITOK | M_ZERO);
 406         if (NULL == new_sem_pool) {
 407 #ifdef SEM_DEBUG
 408                 printf("allocation failed.  no changes made.\n");
 409 #endif
 410                 return 0;
 411         }
 412
 413         /* We have our new memory, now copy the old contents over */
 414         if (sem_pool)
 415                 for(i = 0; i < seminfo.semmns; i++)
 416                         new_sem_pool[i] = sem_pool[i];
 417
 418         /* Update our id structures to point to the new semaphores */
 419         for(i = 0; i < seminfo.semmni; i++) {
 420                 if (sema[i].sem_perm.mode & SEM_ALLOC)  /* ID in use */
 421                         sema[i].sem_base += (new_sem_pool - sem_pool);
 422         }
 423
 424         sem_free = sem_pool;
 425         sem_pool = new_sem_pool;
 426
 427         /* clean up the old array */
 428         if (sem_free != NULL)
 429                 FREE(sem_free, M_SYSVSEM);
 430
 431         seminfo.semmns = new_pool_size;
 432 #ifdef SEM_DEBUG
 433         printf("expansion complete\n");
 434 #endif
 435         return 1;
 436 }
 437
 438 /*
 439  * Allocate a new sem_undo structure for a process
 440  * (returns ptr to structure or NULL if no more room)
 441  *
 442  * Assumes we already hold the subsystem lock.
 443  */
 444
 445 static struct sem_undo *
 446 semu_alloc(struct proc *p)
 447 {
 448         register int i;
 449         register struct sem_undo *suptr;
 450         register struct sem_undo **supptr;
 451         int attempt;
 452
 453         /*
 454          * Try twice to allocate something.
 455          * (we'll purge any empty structures after the first pass so
 456          * two passes are always enough)
 457          */
 458
 459         for (attempt = 0; attempt < 2; attempt++) {
 460                 /*
 461                  * Look for a free structure.
 462                  * Fill it in and return it if we find one.
 463                  */
 464
 465                 for (i = 0; i < seminfo.semmnu; i++) {
 466                         suptr = SEMU(i);
 467                         if (suptr->un_proc == NULL) {
 468                                 suptr->un_next = semu_list;
 469                                 semu_list = suptr;
 470                                 suptr->un_cnt = 0;
 471                                 suptr->un_ent = NULL;
 472                                 suptr->un_proc = p;
 473                                 return(suptr);
 474                         }
 475                 }
 476
 477                 /*
 478                  * We didn't find a free one, if this is the first attempt
 479                  * then try to free some structures.
 480                  */
 481
 482                 if (attempt == 0) {
 483                         /* All the structures are in use - try to free some */
 484                         int did_something = 0;
 485
 486                         supptr = &semu_list;
 487                         while ((suptr = *supptr) != NULL) {
 488                                 if (suptr->un_cnt == 0)  {
 489                                         suptr->un_proc = NULL;
 490                                         *supptr = suptr->un_next;
 491                                         did_something = 1;
 492                                 } else
 493                                         supptr = &(suptr->un_next);
 494                         }
 495
 496                         /* If we didn't free anything. Try expanding
 497                          * the semu[] array.  If that doesn't work
 498                          * then fail.  We expand last to get the
 499                          * most reuse out of existing resources.
 500                          */
 501                         if (!did_something)
 502                                 if (!grow_semu_array(seminfo.semmnu + 1))
 503                                         return(NULL);
 504                 } else {
 505                         /*
 506                          * The second pass failed even though we freed
 507                          * something after the first pass!
 508                          * This is IMPOSSIBLE!
 509                          */
 510                         panic("semu_alloc - second attempt failed");
 511                 }
 512         }
 513         return (NULL);
 514 }
 515
 516 /*
 517  * Adjust a particular entry for a particular proc
 518  *
 519  * Assumes we already hold the subsystem lock.
 520  */
 521 static int
 522 semundo_adjust(struct proc *p, struct sem_undo **supptr, int semid,
 523         int semnum, int adjval)
 524 {
 525         register struct sem_undo *suptr;
 526         register struct undo *sueptr, **suepptr, *new_sueptr;
 527         int i;
 528
 529         /*
 530          * Look for and remember the sem_undo if the caller doesn't provide it
 531          */
 532
 533         suptr = *supptr;
 534         if (suptr == NULL) {
 535                 for (suptr = semu_list; suptr != NULL;
 536                     suptr = suptr->un_next) {
 537                         if (suptr->un_proc == p) {
 538                                 *supptr = suptr;
 539                                 break;
 540                         }
 541                 }
 542                 if (suptr == NULL) {
 543                         if (adjval == 0)
 544                                 return(0);
 545                         suptr = semu_alloc(p);
 546                         if (suptr == NULL)
 547                                 return(ENOSPC);
 548                         *supptr = suptr;
 549                 }
 550         }
 551
 552         /*
 553          * Look for the requested entry and adjust it (delete if adjval becomes
 554          * 0).
 555          */
 556         new_sueptr = NULL;
 557         for (i = 0, suepptr = &suptr->un_ent, sueptr = suptr->un_ent;
 558              i < suptr->un_cnt;
 559              i++, suepptr = &sueptr->une_next, sueptr = sueptr->une_next) {
 560                 if (sueptr->une_id != semid || sueptr->une_num != semnum)
 561                         continue;
 562                 if (adjval == 0)
 563                         sueptr->une_adjval = 0;
 564                 else
 565                         sueptr->une_adjval += adjval;
 566                 if (sueptr->une_adjval == 0) {
 567                         suptr->un_cnt--;
 568                         *suepptr = sueptr->une_next;
 569                         FREE(sueptr, M_SYSVSEM);
 570                         sueptr = NULL;
 571                 }
 572                 return 0;
 573         }
 574
 575         /* Didn't find the right entry - create it */
 576         if (adjval == 0) {
 577                 /* no adjustment: no need for a new entry */
 578                 return 0;
 579         }
 580
 581         if (suptr->un_cnt == limitseminfo.semume) {
 582                 /* reached the limit number of semaphore undo entries */
 583                 return EINVAL;
 584         }
 585
 586         /* allocate a new semaphore undo entry */
 587         MALLOC(new_sueptr, struct undo *, sizeof (struct undo),
 588                M_SYSVSEM, M_WAITOK);
 589         if (new_sueptr == NULL) {
 590                 return ENOMEM;
 591         }
 592
 593         /* fill in the new semaphore undo entry */
 594         new_sueptr->une_next = suptr->un_ent;
 595         suptr->un_ent = new_sueptr;
 596         suptr->un_cnt++;
 597         new_sueptr->une_adjval = adjval;
 598         new_sueptr->une_id = semid;
 599         new_sueptr->une_num = semnum;
 600
 601         return 0;
 602 }
 603
 604 /* Assumes we already hold the subsystem lock.
 605  */
 606 static void
 607 semundo_clear(int semid, int semnum)
 608 {
 609         struct sem_undo *suptr;
 610
 611         for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next) {
 612                 struct undo *sueptr;
 613                 struct undo **suepptr;
 614                 int i = 0;
 615
 616                 sueptr = suptr->un_ent;
 617                 suepptr = &suptr->un_ent;
 618                 while (i < suptr->un_cnt) {
 619                         if (sueptr->une_id == semid) {
 620                                 if (semnum == -1 || sueptr->une_num == semnum) {
 621                                         suptr->un_cnt--;
 622                                         *suepptr = sueptr->une_next;
 623                                         FREE(sueptr, M_SYSVSEM);
 624                                         sueptr = *suepptr;
 625                                         continue;
 626                                 }
 627                                 if (semnum != -1)
 628                                         break;
 629                         }
 630                         i++;
 631                         suepptr = &sueptr->une_next;
 632                         sueptr = sueptr->une_next;
 633                 }
 634         }
 635 }
 636
 637 /*
 638  * Note that the user-mode half of this passes a union coerced to a
 639  * user_addr_t.  The union contains either an int or a pointer, and
 640  * so we have to coerce it back, variant on whether the calling
 641  * process is 64 bit or not.  The coercion works for the 'val' element
 642  * because the alignment is the same in user and kernel space.
 643  */
 644 int
 645 semctl(struct proc *p, struct semctl_args *uap, register_t *retval)
 646 {
 647         int semid = uap->semid;
 648         int semnum = uap->semnum;
 649         int cmd = uap->cmd;
 650         user_semun_t user_arg = (user_semun_t)uap->arg;
 651         kauth_cred_t cred = kauth_cred_get();
 652         int i, rval, eval;
 653         struct user_semid_ds sbuf;
 654         struct user_semid_ds *semaptr;
 655         struct user_semid_ds uds;
 656
 657
 658         AUDIT_ARG(svipc_cmd, cmd);
 659         AUDIT_ARG(svipc_id, semid);
 660
 661         SYSV_SEM_SUBSYS_LOCK();
 662
 663 #ifdef SEM_DEBUG
 664         printf("call to semctl(%d, %d, %d, 0x%qx)\n", semid, semnum, cmd, user_arg);
 665 #endif
 666
 667         semid = IPCID_TO_IX(semid);
 668
 669         if (semid < 0 || semid >= seminfo.semmni) {
 670 #ifdef SEM_DEBUG
 671                 printf("Invalid semid\n");
 672 #endif
 673                 eval = EINVAL;
 674                 goto semctlout;
 675         }
 676
 677         semaptr = &sema[semid];
 678         if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
 679             semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid)) {
 680                 eval = EINVAL;
 681                 goto semctlout;
 682         }
 683
 684         eval = 0;
 685         rval = 0;
 686
 687         switch (cmd) {
 688         case IPC_RMID:
 689                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_M)))
 690                         goto semctlout;
 691
 692                 semaptr->sem_perm.cuid = kauth_cred_getuid(cred);
 693                 semaptr->sem_perm.uid = kauth_cred_getuid(cred);
 694                 semtot -= semaptr->sem_nsems;
 695                 for (i = semaptr->sem_base - sem_pool; i < semtot; i++)
 696                         sem_pool[i] = sem_pool[i + semaptr->sem_nsems];
 697                 for (i = 0; i < seminfo.semmni; i++) {
 698                         if ((sema[i].sem_perm.mode & SEM_ALLOC) &&
 699                             sema[i].sem_base > semaptr->sem_base)
 700                                 sema[i].sem_base -= semaptr->sem_nsems;
 701                 }
 702                 semaptr->sem_perm.mode = 0;
 703                 semundo_clear(semid, -1);
 704                 wakeup((caddr_t)semaptr);
 705                 break;
 706
 707         case IPC_SET:
 708                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_M)))
 709                                 goto semctlout;
 710
 711                 if (IS_64BIT_PROCESS(p)) {
 712                         eval = copyin(user_arg.buf, &sbuf, sizeof(struct user_semid_ds));
 713                 } else {
 714                         eval = copyin(user_arg.buf, &sbuf, sizeof(struct semid_ds));
 715                         /* convert in place; ugly, but safe */
 716                         semid_ds_32to64((struct semid_ds *)&sbuf, &sbuf);
 717                 }
 718
 719                 if (eval != 0) {
 720                         goto semctlout;
 721                 }
 722
 723                 semaptr->sem_perm.uid = sbuf.sem_perm.uid;
 724                 semaptr->sem_perm.gid = sbuf.sem_perm.gid;
 725                 semaptr->sem_perm.mode = (semaptr->sem_perm.mode & ~0777) |
 726                     (sbuf.sem_perm.mode & 0777);
 727                 semaptr->sem_ctime = sysv_semtime();
 728                 break;
 729
 730         case IPC_STAT:
 731                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
 732                                 goto semctlout;
 733                 bcopy(semaptr, &uds, sizeof(struct user_semid_ds));
 734                 if (IS_64BIT_PROCESS(p)) {
 735                         eval = copyout(&uds, user_arg.buf, sizeof(struct user_semid_ds));
 736                 } else {
 737                         struct semid_ds semid_ds32;
 738                         semid_ds_64to32(&uds, &semid_ds32);
 739                         eval = copyout(&semid_ds32, user_arg.buf, sizeof(struct semid_ds));
 740                 }
 741                 break;
 742
 743         case GETNCNT:
 744                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
 745                                 goto semctlout;
 746                 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
 747                         eval = EINVAL;
 748                         goto semctlout;
 749                 }
 750                 rval = semaptr->sem_base[semnum].semncnt;
 751                 break;
 752
 753         case GETPID:
 754                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
 755                                 goto semctlout;
 756                 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
 757                         eval = EINVAL;
 758                         goto semctlout;
 759                 }
 760                 rval = semaptr->sem_base[semnum].sempid;
 761                 break;
 762
 763         case GETVAL:
 764                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
 765                                 goto semctlout;
 766                 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
 767                         eval = EINVAL;
 768                         goto semctlout;
 769                 }
 770                 rval = semaptr->sem_base[semnum].semval;
 771                 break;
 772
 773         case GETALL:
 774                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
 775                                 goto semctlout;
 776 /* XXXXXXXXXXXXXXXX TBD XXXXXXXXXXXXXXXX */
 777                 for (i = 0; i < semaptr->sem_nsems; i++) {
 778                         /* XXX could be done in one go... */
 779                         eval = copyout((caddr_t)&semaptr->sem_base[i].semval,
 780                             user_arg.array + (i * sizeof(unsigned short)),
 781                             sizeof(unsigned short));
 782                         if (eval != 0)
 783                                 break;
 784                 }
 785                 break;
 786
 787         case GETZCNT:
 788                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
 789                                 goto semctlout;
 790                 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
 791                         eval = EINVAL;
 792                         goto semctlout;
 793                 }
 794                 rval = semaptr->sem_base[semnum].semzcnt;
 795                 break;
 796
 797         case SETVAL:
 798                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
 799                 {
 800 #ifdef SEM_DEBUG
 801                         printf("Invalid credentials for write\n");
 802 #endif
 803                                 goto semctlout;
 804                 }
 805                 if (semnum < 0 || semnum >= semaptr->sem_nsems)
 806                 {
 807 #ifdef SEM_DEBUG
 808                         printf("Invalid number out of range for set\n");
 809 #endif
 810                         eval = EINVAL;
 811                         goto semctlout;
 812                 }
 813                 /*
 814                  * Cast down a pointer instead of using 'val' member directly
 815                  * to avoid introducing endieness and a pad field into the
 816                  * header file.  Ugly, but it works.
 817                  */
 818                 semaptr->sem_base[semnum].semval = CAST_DOWN(int,user_arg.buf);
 819                 semundo_clear(semid, semnum);
 820                 wakeup((caddr_t)semaptr);
 821                 break;
 822
 823         case SETALL:
 824                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
 825                                 goto semctlout;
 826 /*** XXXXXXXXXXXX TBD ********/
 827                 for (i = 0; i < semaptr->sem_nsems; i++) {
 828                         /* XXX could be done in one go... */
 829                         eval = copyin(user_arg.array + (i * sizeof(unsigned short)),
 830                             (caddr_t)&semaptr->sem_base[i].semval,
 831                             sizeof(unsigned short));
 832                         if (eval != 0)
 833                                 break;
 834                 }
 835                 semundo_clear(semid, -1);
 836                 wakeup((caddr_t)semaptr);
 837                 break;
 838
 839         default:
 840                         eval = EINVAL;
 841                         goto semctlout;
 842         }
 843
 844         if (eval == 0)
 845                 *retval = rval;
 846 semctlout:
 847         SYSV_SEM_SUBSYS_UNLOCK();
 848         return(eval);
 849 }
 850
 851 int
 852 semget(__unused struct proc *p, struct semget_args *uap, register_t *retval)
 853 {
 854         int semid, eval;
 855         int key = uap->key;
 856         int nsems = uap->nsems;
 857         int semflg = uap->semflg;
 858         kauth_cred_t cred = kauth_cred_get();
 859
 860 #ifdef SEM_DEBUG
 861         if (key != IPC_PRIVATE)
 862                 printf("semget(0x%x, %d, 0%o)\n", key, nsems, semflg);
 863         else
 864                 printf("semget(IPC_PRIVATE, %d, 0%o)\n", nsems, semflg);
 865 #endif
 866
 867
 868         SYSV_SEM_SUBSYS_LOCK();
 869
 870
 871         if (key != IPC_PRIVATE) {
 872                 for (semid = 0; semid < seminfo.semmni; semid++) {
 873                         if ((sema[semid].sem_perm.mode & SEM_ALLOC) &&
 874                             sema[semid].sem_perm.key == key)
 875                                 break;
 876                 }
 877                 if (semid < seminfo.semmni) {
 878 #ifdef SEM_DEBUG
 879                         printf("found public key\n");
 880 #endif
 881                         if ((eval = ipcperm(cred, &sema[semid].sem_perm,
 882                             semflg & 0700)))
 883                                 goto semgetout;
 884                         if (nsems < 0 || sema[semid].sem_nsems < nsems) {
 885 #ifdef SEM_DEBUG
 886                                 printf("too small\n");
 887 #endif
 888                                 eval = EINVAL;
 889                                 goto semgetout;
 890                         }
 891                         if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) {
 892 #ifdef SEM_DEBUG
 893                                 printf("not exclusive\n");
 894 #endif
 895                                 eval = EEXIST;
 896                                 goto semgetout;
 897                         }
 898                         goto found;
 899                 }
 900         }
 901
 902 #ifdef SEM_DEBUG
 903         printf("need to allocate an id for the request\n");
 904 #endif
 905         if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) {
 906                 if (nsems <= 0 || nsems > limitseminfo.semmsl) {
 907 #ifdef SEM_DEBUG
 908                         printf("nsems out of range (0<%d<=%d)\n", nsems,
 909                             seminfo.semmsl);
 910 #endif
 911                         eval = EINVAL;
 912                         goto semgetout;
 913                 }
 914                 if (nsems > seminfo.semmns - semtot) {
 915 #ifdef SEM_DEBUG
 916                         printf("not enough semaphores left (need %d, got %d)\n",
 917                             nsems, seminfo.semmns - semtot);
 918 #endif
 919                         if (!grow_sem_pool(semtot + nsems)) {
 920 #ifdef SEM_DEBUG
 921                                 printf("failed to grow the sem array\n");
 922 #endif
 923                                 eval = ENOSPC;
 924                                 goto semgetout;
 925                         }
 926                 }
 927                 for (semid = 0; semid < seminfo.semmni; semid++) {
 928                         if ((sema[semid].sem_perm.mode & SEM_ALLOC) == 0)
 929                                 break;
 930                 }
 931                 if (semid == seminfo.semmni) {
 932 #ifdef SEM_DEBUG
 933                         printf("no more id's available\n");
 934 #endif
 935                         if (!grow_sema_array(seminfo.semmni + 1))
 936                         {
 937 #ifdef SEM_DEBUG
 938                                 printf("failed to grow sema array\n");
 939 #endif
 940                                 eval = ENOSPC;
 941                                 goto semgetout;
 942                         }
 943                 }
 944 #ifdef SEM_DEBUG
 945                 printf("semid %d is available\n", semid);
 946 #endif
 947                 sema[semid].sem_perm.key = key;
 948                 sema[semid].sem_perm.cuid = kauth_cred_getuid(cred);
 949                 sema[semid].sem_perm.uid = kauth_cred_getuid(cred);
 950                 sema[semid].sem_perm.cgid = cred->cr_gid;
 951                 sema[semid].sem_perm.gid = cred->cr_gid;
 952                 sema[semid].sem_perm.mode = (semflg & 0777) | SEM_ALLOC;
 953                 sema[semid].sem_perm.seq =
 954                     (sema[semid].sem_perm.seq + 1) & 0x7fff;
 955                 sema[semid].sem_nsems = nsems;
 956                 sema[semid].sem_otime = 0;
 957                 sema[semid].sem_ctime = sysv_semtime();
 958                 sema[semid].sem_base = &sem_pool[semtot];
 959                 semtot += nsems;
 960                 bzero(sema[semid].sem_base,
 961                     sizeof(sema[semid].sem_base[0])*nsems);
 962 #ifdef SEM_DEBUG
 963                 printf("sembase = 0x%x, next = 0x%x\n", sema[semid].sem_base,
 964                     &sem_pool[semtot]);
 965 #endif
 966         } else {
 967 #ifdef SEM_DEBUG
 968                 printf("didn't find it and wasn't asked to create it\n");
 969 #endif
 970                 eval = ENOENT;
 971                 goto semgetout;
 972         }
 973
 974 found:
 975         *retval = IXSEQ_TO_IPCID(semid, sema[semid].sem_perm);
 976         AUDIT_ARG(svipc_id, *retval);
 977 #ifdef SEM_DEBUG
 978         printf("semget is done, returning %d\n", *retval);
 979 #endif
 980         eval = 0;
 981
 982 semgetout:
 983         SYSV_SEM_SUBSYS_UNLOCK();
 984         return(eval);
 985 }
 986
 987 int
 988 semop(struct proc *p, struct semop_args *uap, register_t *retval)
 989 {
 990         int semid = uap->semid;
 991         int nsops = uap->nsops;
 992         struct sembuf sops[MAX_SOPS];
 993         register struct user_semid_ds *semaptr;
 994         register struct sembuf *sopptr = NULL;  /* protected by 'semptr' */
 995         register struct sem *semptr = NULL;     /* protected by 'if' */
 996         struct sem_undo *suptr = NULL;
 997         int i, j, eval;
 998         int do_wakeup, do_undos;
 999
1000         AUDIT_ARG(svipc_id, uap->semid);
1001
1002         SYSV_SEM_SUBSYS_LOCK();
1003
1004 #ifdef SEM_DEBUG
1005         printf("call to semop(%d, 0x%x, %d)\n", semid, sops, nsops);
1006 #endif
1007
1008         semid = IPCID_TO_IX(semid);     /* Convert back to zero origin */
1009
1010         if (semid < 0 || semid >= seminfo.semmni) {
1011                 eval = EINVAL;
1012                 goto semopout;
1013         }
1014
1015         semaptr = &sema[semid];
1016         if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0) {
1017                 eval = EINVAL;
1018                 goto semopout;
1019         }
1020         if (semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid)) {
1021                 eval = EINVAL;
1022                 goto semopout;
1023         }
1024
1025         if ((eval = ipcperm(kauth_cred_get(), &semaptr->sem_perm, IPC_W))) {
1026 #ifdef SEM_DEBUG
1027                 printf("eval = %d from ipaccess\n", eval);
1028 #endif
1029                 goto semopout;
1030         }
1031
1032         if (nsops < 0 || nsops > MAX_SOPS) {
1033 #ifdef SEM_DEBUG
1034                 printf("too many sops (max=%d, nsops=%d)\n", MAX_SOPS, nsops);
1035 #endif
1036                 eval = E2BIG;
1037                 goto semopout;
1038         }
1039
1040         /*  OK for LP64, since sizeof(struct sembuf) is currently invariant */
1041         if ((eval = copyin(uap->sops, &sops, nsops * sizeof(struct sembuf))) != 0) {
1042 #ifdef SEM_DEBUG
1043                 printf("eval = %d from copyin(%08x, %08x, %ld)\n", eval,
1044                     uap->sops, &sops, nsops * sizeof(struct sembuf));
1045 #endif
1046                 goto semopout;
1047         }
1048
1049         /*
1050          * Loop trying to satisfy the vector of requests.
1051          * If we reach a point where we must wait, any requests already
1052          * performed are rolled back and we go to sleep until some other
1053          * process wakes us up.  At this point, we start all over again.
1054          *
1055          * This ensures that from the perspective of other tasks, a set
1056          * of requests is atomic (never partially satisfied).
1057          */
1058         do_undos = 0;
1059
1060         for (;;) {
1061                 do_wakeup = 0;
1062
1063                 for (i = 0; i < nsops; i++) {
1064                         sopptr = &sops[i];
1065
1066                         if (sopptr->sem_num >= semaptr->sem_nsems) {
1067                                 eval = EFBIG;
1068                                 goto semopout;
1069                         }
1070
1071                         semptr = &semaptr->sem_base[sopptr->sem_num];
1072
1073 #ifdef SEM_DEBUG
1074                         printf("semop:  semaptr=%x, sem_base=%x, semptr=%x, sem[%d]=%d : op=%d, flag=%s\n",
1075                             semaptr, semaptr->sem_base, semptr,
1076                             sopptr->sem_num, semptr->semval, sopptr->sem_op,
1077                             (sopptr->sem_flg & IPC_NOWAIT) ? "nowait" : "wait");
1078 #endif
1079
1080                         if (sopptr->sem_op < 0) {
1081                                 if (semptr->semval + sopptr->sem_op < 0) {
1082 #ifdef SEM_DEBUG
1083                                         printf("semop:  can't do it now\n");
1084 #endif
1085                                         break;
1086                                 } else {
1087                                         semptr->semval += sopptr->sem_op;
1088                                         if (semptr->semval == 0 &&
1089                                             semptr->semzcnt > 0)
1090                                                 do_wakeup = 1;
1091                                 }
1092                                 if (sopptr->sem_flg & SEM_UNDO)
1093                                         do_undos = 1;
1094                         } else if (sopptr->sem_op == 0) {
1095                                 if (semptr->semval > 0) {
1096 #ifdef SEM_DEBUG
1097                                         printf("semop:  not zero now\n");
1098 #endif
1099                                         break;
1100                                 }
1101                         } else {
1102                                 if (semptr->semncnt > 0)
1103                                         do_wakeup = 1;
1104                                 semptr->semval += sopptr->sem_op;
1105                                 if (sopptr->sem_flg & SEM_UNDO)
1106                                         do_undos = 1;
1107                         }
1108                 }
1109
1110                 /*
1111                  * Did we get through the entire vector?
1112                  */
1113                 if (i >= nsops)
1114                         goto done;
1115
1116                 /*
1117                  * No ... rollback anything that we've already done
1118                  */
1119 #ifdef SEM_DEBUG
1120                 printf("semop:  rollback 0 through %d\n", i-1);
1121 #endif
1122                 for (j = 0; j < i; j++)
1123                         semaptr->sem_base[sops[j].sem_num].semval -=
1124                             sops[j].sem_op;
1125
1126                 /*
1127                  * If the request that we couldn't satisfy has the
1128                  * NOWAIT flag set then return with EAGAIN.
1129                  */
1130                 if (sopptr->sem_flg & IPC_NOWAIT) {
1131                         eval = EAGAIN;
1132                         goto semopout;
1133                 }
1134
1135                 if (sopptr->sem_op == 0)
1136                         semptr->semzcnt++;
1137                 else
1138                         semptr->semncnt++;
1139
1140 #ifdef SEM_DEBUG
1141                 printf("semop:  good night!\n");
1142 #endif
1143                 /* Release our lock on the semaphore subsystem so
1144                  * another thread can get at the semaphore we are
1145                  * waiting for. We will get the lock back after we
1146                  * wake up.
1147                  */
1148                 eval = msleep((caddr_t)semaptr, &sysv_sem_subsys_mutex , (PZERO - 4) | PCATCH,
1149                     "semwait", 0);
1150
1151 #ifdef SEM_DEBUG
1152                 printf("semop:  good morning (eval=%d)!\n", eval);
1153 #endif
1154                 if (eval != 0) {
1155                         eval = EINTR;
1156                 }
1157
1158                 /*
1159                  * IMPORTANT: while we were asleep, the semaphore array might
1160                  * have been reallocated somewhere else (see grow_sema_array()).
1161                  * When we wake up, we have to re-lookup the semaphore
1162                  * structures and re-validate them.
1163                  */
1164
1165                 suptr = NULL;   /* sem_undo may have been reallocated */
1166                 semaptr = &sema[semid];    /* sema may have been reallocated */
1167
1168                 /*
1169                  * Make sure that the semaphore still exists
1170                  */
1171                 if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
1172                     semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid) ||
1173                     sopptr->sem_num >= semaptr->sem_nsems) {
1174                         if (eval == EINTR) {
1175                                 /*
1176                                  * EINTR takes precedence over the fact that
1177                                  * the semaphore disappeared while we were
1178                                  * sleeping...
1179                                  */
1180                         } else {
1181                                 /*
1182                                  * The man page says to return EIDRM.
1183                                  * Unfortunately, BSD doesn't define that code!
1184                                  */
1185 #ifdef EIDRM
1186                                 eval = EIDRM;
1187 #else
1188                                 eval = EINVAL;
1189 #endif
1190                         }
1191                         goto semopout;
1192                 }
1193
1194                 /*
1195                  * The semaphore is still alive.  Readjust the count of
1196                  * waiting processes. semptr needs to be recomputed
1197                  * because the sem[] may have been reallocated while
1198                  * we were sleeping, updating our sem_base pointer.
1199                  */
1200                 semptr = &semaptr->sem_base[sopptr->sem_num];
1201                 if (sopptr->sem_op == 0)
1202                         semptr->semzcnt--;
1203                 else
1204                         semptr->semncnt--;
1205
1206                 if (eval != 0) { /* EINTR */
1207                         goto semopout;
1208                 }
1209         }
1210
1211 done:
1212         /*
1213          * Process any SEM_UNDO requests.
1214          */
1215         if (do_undos) {
1216                 for (i = 0; i < nsops; i++) {
1217                         /*
1218                          * We only need to deal with SEM_UNDO's for non-zero
1219                          * op's.
1220                          */
1221                         int adjval;
1222
1223                         if ((sops[i].sem_flg & SEM_UNDO) == 0)
1224                                 continue;
1225                         adjval = sops[i].sem_op;
1226                         if (adjval == 0)
1227                                 continue;
1228                         eval = semundo_adjust(p, &suptr, semid,
1229                             sops[i].sem_num, -adjval);
1230                         if (eval == 0)
1231                                 continue;
1232
1233                         /*
1234                          * Oh-Oh!  We ran out of either sem_undo's or undo's.
1235                          * Rollback the adjustments to this point and then
1236                          * rollback the semaphore ups and down so we can return
1237                          * with an error with all structures restored.  We
1238                          * rollback the undo's in the exact reverse order that
1239                          * we applied them.  This guarantees that we won't run
1240                          * out of space as we roll things back out.
1241                          */
1242                         for (j = i - 1; j >= 0; j--) {
1243                                 if ((sops[j].sem_flg & SEM_UNDO) == 0)
1244                                         continue;
1245                                 adjval = sops[j].sem_op;
1246                                 if (adjval == 0)
1247                                         continue;
1248                                 if (semundo_adjust(p, &suptr, semid,
1249                                     sops[j].sem_num, adjval) != 0)
1250                                         panic("semop - can't undo undos");
1251                         }
1252
1253                         for (j = 0; j < nsops; j++)
1254                                 semaptr->sem_base[sops[j].sem_num].semval -=
1255                                     sops[j].sem_op;
1256
1257 #ifdef SEM_DEBUG
1258                         printf("eval = %d from semundo_adjust\n", eval);
1259 #endif
1260                         goto semopout;
1261                 } /* loop through the sops */
1262         } /* if (do_undos) */
1263
1264         /* We're definitely done - set the sempid's */
1265         for (i = 0; i < nsops; i++) {
1266                 sopptr = &sops[i];
1267                 semptr = &semaptr->sem_base[sopptr->sem_num];
1268                 semptr->sempid = p->p_pid;
1269         }
1270
1271         if (do_wakeup) {
1272 #ifdef SEM_DEBUG
1273                 printf("semop:  doing wakeup\n");
1274 #ifdef SEM_WAKEUP
1275                 sem_wakeup((caddr_t)semaptr);
1276 #else
1277                 wakeup((caddr_t)semaptr);
1278 #endif
1279                 printf("semop:  back from wakeup\n");
1280 #else
1281                 wakeup((caddr_t)semaptr);
1282 #endif
1283         }
1284 #ifdef SEM_DEBUG
1285         printf("semop:  done\n");
1286 #endif
1287         *retval = 0;
1288         eval = 0;
1289 semopout:
1290         SYSV_SEM_SUBSYS_UNLOCK();
1291         return(eval);
1292 }
1293
1294 /*
1295  * Go through the undo structures for this process and apply the adjustments to
1296  * semaphores.
1297  */
1298 void
1299 semexit(struct proc *p)
1300 {
1301         register struct sem_undo *suptr;
1302         register struct sem_undo **supptr;
1303         int did_something;
1304
1305         /* If we have not allocated our semaphores yet there can't be
1306          * anything to undo, but we need the lock to prevent
1307          * dynamic memory race conditions.
1308          */
1309         SYSV_SEM_SUBSYS_LOCK();
1310
1311         if (!sem_pool)
1312         {
1313                 SYSV_SEM_SUBSYS_UNLOCK();
1314                 return;
1315         }
1316         did_something = 0;
1317
1318         /*
1319          * Go through the chain of undo vectors looking for one
1320          * associated with this process.
1321          */
1322
1323         for (supptr = &semu_list; (suptr = *supptr) != NULL;
1324             supptr = &suptr->un_next) {
1325                 if (suptr->un_proc == p)
1326                         break;
1327         }
1328
1329         if (suptr == NULL)
1330                 goto unlock;
1331
1332 #ifdef SEM_DEBUG
1333         printf("proc @%08x has undo structure with %d entries\n", p,
1334             suptr->un_cnt);
1335 #endif
1336
1337         /*
1338          * If there are any active undo elements then process them.
1339          */
1340         if (suptr->un_cnt > 0) {
1341                 while (suptr->un_ent != NULL) {
1342                         struct undo *sueptr;
1343                         int semid;
1344                         int semnum;
1345                         int adjval;
1346                         struct user_semid_ds *semaptr;
1347
1348                         sueptr = suptr->un_ent;
1349                         semid = sueptr->une_id;
1350                         semnum = sueptr->une_num;
1351                         adjval = sueptr->une_adjval;
1352
1353                         semaptr = &sema[semid];
1354                         if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0)
1355                                 panic("semexit - semid not allocated");
1356                         if (semnum >= semaptr->sem_nsems)
1357                                 panic("semexit - semnum out of range");
1358
1359 #ifdef SEM_DEBUG
1360                         printf("semexit:  %08x id=%d num=%d(adj=%d) ; sem=%d\n",
1361                                suptr->un_proc,
1362                                semid,
1363                                semnum,
1364                                adjval,
1365                                semaptr->sem_base[semnum].semval);
1366 #endif
1367
1368                         if (adjval < 0) {
1369                                 if (semaptr->sem_base[semnum].semval < -adjval)
1370                                         semaptr->sem_base[semnum].semval = 0;
1371                                 else
1372                                         semaptr->sem_base[semnum].semval +=
1373                                             adjval;
1374                         } else
1375                                 semaptr->sem_base[semnum].semval += adjval;
1376
1377                 /* Maybe we should build a list of semaptr's to wake
1378                  * up, finish all access to data structures, release the
1379                  * subsystem lock, and wake all the processes.  Something
1380                  * to think about.  It wouldn't buy us anything unless
1381                  * wakeup had the potential to block, or the syscall
1382                  * funnel state was changed to allow multiple threads
1383                  * in the BSD code at once.
1384                  */
1385 #ifdef SEM_WAKEUP
1386                         sem_wakeup((caddr_t)semaptr);
1387 #else
1388                         wakeup((caddr_t)semaptr);
1389 #endif
1390 #ifdef SEM_DEBUG
1391                         printf("semexit:  back from wakeup\n");
1392 #endif
1393                         suptr->un_cnt--;
1394                         suptr->un_ent = sueptr->une_next;
1395                         FREE(sueptr, M_SYSVSEM);
1396                         sueptr = NULL;
1397                 }
1398         }
1399
1400         /*
1401          * Deallocate the undo vector.
1402          */
1403 #ifdef SEM_DEBUG
1404         printf("removing vector\n");
1405 #endif
1406         suptr->un_proc = NULL;
1407         *supptr = suptr->un_next;
1408
1409 unlock:
1410         /*
1411          * There is a semaphore leak (i.e. memory leak) in this code.
1412          * We should be deleting the IPC_PRIVATE semaphores when they are
1413          * no longer needed, and we dont. We would have to track which processes
1414          * know about which IPC_PRIVATE semaphores, updating the list after
1415          * every fork.  We can't just delete them semaphore when the process
1416          * that created it dies, because that process may well have forked
1417          * some children.  So we need to wait until all of it's children have
1418          * died, and so on.  Maybe we should tag each IPC_PRIVATE sempahore
1419          * with the creating group ID, count the number of processes left in
1420          * that group, and delete the semaphore when the group is gone.
1421          * Until that code gets implemented we will leak IPC_PRIVATE semaphores.
1422          * There is an upper bound on the size of our semaphore array, so
1423          * leaking the semaphores should not work as a DOS attack.
1424          *
1425          * Please note that the original BSD code this file is based on had the
1426          * same leaky semaphore problem.
1427          */
1428
1429         SYSV_SEM_SUBSYS_UNLOCK();
1430 }
1431
1432
1433 /* (struct sysctl_oid *oidp, void *arg1, int arg2, \
1434         struct sysctl_req *req) */
1435 static int
1436 sysctl_seminfo(__unused struct sysctl_oid *oidp, void *arg1,
1437         __unused int arg2, struct sysctl_req *req)
1438 {
1439         int error = 0;
1440
1441         error = SYSCTL_OUT(req, arg1, sizeof(int));
1442         if (error || req->newptr == USER_ADDR_NULL)
1443                 return(error);
1444
1445         SYSV_SEM_SUBSYS_LOCK();
1446
1447         /* Set the values only if shared memory is not initialised */
1448         if ((sem_pool == NULL) &&
1449                 (sema == NULL) &&
1450                 (semu == NULL) &&
1451                 (semu_list == NULL)) {
1452                         if ((error = SYSCTL_IN(req, arg1, sizeof(int)))) {
1453                                 goto out;
1454                         }
1455         } else
1456                 error = EINVAL;
1457 out:
1458         SYSV_SEM_SUBSYS_UNLOCK();
1459         return(error);
1460
1461 }
1462
1463 /* SYSCTL_NODE(_kern, KERN_SYSV, sysv, CTLFLAG_RW, 0, "SYSV"); */
1464 extern struct sysctl_oid_list sysctl__kern_sysv_children;
1465 SYSCTL_PROC(_kern_sysv, KSYSV_SEMMNI, semmni, CTLTYPE_INT | CTLFLAG_RW,
1466     &limitseminfo.semmni, 0, &sysctl_seminfo ,"I","semmni");
1467
1468 SYSCTL_PROC(_kern_sysv, KSYSV_SEMMNS, semmns, CTLTYPE_INT | CTLFLAG_RW,
1469     &limitseminfo.semmns, 0, &sysctl_seminfo ,"I","semmns");
1470
1471 SYSCTL_PROC(_kern_sysv, KSYSV_SEMMNU, semmnu, CTLTYPE_INT | CTLFLAG_RW,
1472     &limitseminfo.semmnu, 0, &sysctl_seminfo ,"I","semmnu");
1473
1474 SYSCTL_PROC(_kern_sysv, KSYSV_SEMMSL, semmsl, CTLTYPE_INT | CTLFLAG_RW,
1475     &limitseminfo.semmsl, 0, &sysctl_seminfo ,"I","semmsl");
1476
1477 SYSCTL_PROC(_kern_sysv, KSYSV_SEMUNE, semume, CTLTYPE_INT | CTLFLAG_RW,
1478     &limitseminfo.semume, 0, &sysctl_seminfo ,"I","semume");
1479
1480
1481 static int
1482 IPCS_sem_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1,
1483         __unused int arg2, struct sysctl_req *req)
1484 {
1485         int error;
1486         int cursor;
1487         union {
1488                 struct IPCS_command u32;
1489                 struct user_IPCS_command u64;
1490         } ipcs;
1491         struct semid_ds semid_ds32;     /* post conversion, 32 bit version */
1492         void *semid_dsp;
1493         size_t ipcs_sz = sizeof(struct user_IPCS_command);
1494         size_t semid_ds_sz = sizeof(struct user_semid_ds);
1495         struct proc *p = current_proc();
1496
1497         /* Copy in the command structure */
1498         if ((error = SYSCTL_IN(req, &ipcs, ipcs_sz)) != 0) {
1499                 return(error);
1500         }
1501
1502         if (!IS_64BIT_PROCESS(p)) {
1503                 ipcs_sz = sizeof(struct IPCS_command);
1504                 semid_ds_sz = sizeof(struct semid_ds);
1505         }
1506
1507         /* Let us version this interface... */
1508         if (ipcs.u64.ipcs_magic != IPCS_MAGIC) {
1509                 return(EINVAL);
1510         }
1511
1512         SYSV_SEM_SUBSYS_LOCK();
1513         switch(ipcs.u64.ipcs_op) {
1514         case IPCS_SEM_CONF:     /* Obtain global configuration data */
1515                 if (ipcs.u64.ipcs_datalen != sizeof(struct seminfo)) {
1516                         error = ERANGE;
1517                         break;
1518                 }
1519                 if (ipcs.u64.ipcs_cursor != 0) {        /* fwd. compat. */
1520                         error = EINVAL;
1521                         break;
1522                 }
1523                 error = copyout(&seminfo, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen);
1524                 break;
1525
1526         case IPCS_SEM_ITER:     /* Iterate over existing segments */
1527                 cursor = ipcs.u64.ipcs_cursor;
1528                 if (cursor < 0 || cursor >= seminfo.semmni) {
1529                         error = ERANGE;
1530                         break;
1531                 }
1532                 if (ipcs.u64.ipcs_datalen != (int)semid_ds_sz ) {
1533                         error = EINVAL;
1534                         break;
1535                 }
1536                 for( ; cursor < seminfo.semmni; cursor++) {
1537                         if (sema[cursor].sem_perm.mode & SEM_ALLOC)
1538                                 break;
1539                         continue;
1540                 }
1541                 if (cursor == seminfo.semmni) {
1542                         error = ENOENT;
1543                         break;
1544                 }
1545
1546                 semid_dsp = &sema[cursor];      /* default: 64 bit */
1547
1548                 /*
1549                  * If necessary, convert the 64 bit kernel segment
1550                  * descriptor to a 32 bit user one.
1551                  */
1552                 if (!IS_64BIT_PROCESS(p)) {
1553                         semid_ds_64to32(semid_dsp, &semid_ds32);
1554                         semid_dsp = &semid_ds32;
1555                 }
1556                 error = copyout(semid_dsp, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen);
1557                 if (!error) {
1558                         /* update cursor */
1559                         ipcs.u64.ipcs_cursor = cursor + 1;
1560                         error = SYSCTL_OUT(req, &ipcs, ipcs_sz);
1561                 }
1562                 break;
1563
1564         default:
1565                 error = EINVAL;
1566                 break;
1567         }
1568         SYSV_SEM_SUBSYS_UNLOCK();
1569         return(error);
1570 }
1571
1572 SYSCTL_DECL(_kern_sysv_ipcs);
1573 SYSCTL_PROC(_kern_sysv_ipcs, OID_AUTO, sem, CTLFLAG_RW|CTLFLAG_ANYBODY,
1574         0, 0, IPCS_sem_sysctl,
1575         "S,IPCS_sem_command",
1576         "ipcs sem command interface");