bsd/kern/sysv_sem.c

   1 /*
   2  * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * Implementation of SVID semaphores
  30  *
  31  * Author:  Daniel Boulet
  32  *
  33  * This software is provided ``AS IS'' without any warranties of any kind.
  34  */
  35 /*
  36  * John Bellardo modified the implementation for Darwin. 12/2000
  37  */
  38 /*
  39  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
  40  * support for mandatory and extensible security protections.  This notice
  41  * is included in support of clause 2.2 (b) of the Apple Public License,
  42  * Version 2.0.
  43  * Copyright (c) 2005-2006 SPARTA, Inc.
  44  */
  45
  46 #include <sys/param.h>
  47 #include <sys/systm.h>
  48 #include <sys/kernel.h>
  49 #include <sys/proc_internal.h>
  50 #include <sys/kauth.h>
  51 #include <sys/sem_internal.h>
  52 #include <sys/malloc.h>
  53 #include <mach/mach_types.h>
  54
  55 #include <sys/filedesc.h>
  56 #include <sys/file_internal.h>
  57 #include <sys/sysctl.h>
  58 #include <sys/ipcs.h>
  59 #include <sys/sysent.h>
  60 #include <sys/sysproto.h>
  61 #if CONFIG_MACF
  62 #include <security/mac_framework.h>
  63 #endif
  64
  65 #include <bsm/audit_kernel.h>
  66
  67 #if SYSV_SEM
  68
  69
  70 /* Uncomment this line to see the debugging output */
  71 /* #define SEM_DEBUG */
  72
  73 /* Uncomment this line to see MAC debugging output. */
  74 /* #define      MAC_DEBUG */
  75 #if CONFIG_MACF_DEBUG
  76 #define MPRINTF(a)      printf(a)
  77 #else
  78 #define MPRINTF(a)
  79 #endif
  80
  81 #define M_SYSVSEM       M_TEMP
  82
  83
  84 /* Hard system limits to avoid resource starvation / DOS attacks.
  85  * These are not needed if we can make the semaphore pages swappable.
  86  */
  87 static struct seminfo limitseminfo = {
  88         SEMMAP,        /* # of entries in semaphore map */
  89         SEMMNI,        /* # of semaphore identifiers */
  90         SEMMNS,        /* # of semaphores in system */
  91         SEMMNU,        /* # of undo structures in system */
  92         SEMMSL,        /* max # of semaphores per id */
  93         SEMOPM,        /* max # of operations per semop call */
  94         SEMUME,        /* max # of undo entries per process */
  95         SEMUSZ,        /* size in bytes of undo structure */
  96         SEMVMX,        /* semaphore maximum value */
  97         SEMAEM         /* adjust on exit max value */
  98 };
  99
 100 /* Current system allocations.  We use this structure to track how many
 101  * resources we have allocated so far.  This way we can set large hard limits
 102  * and not allocate the memory for them up front.
 103  */
 104 struct seminfo seminfo = {
 105         SEMMAP, /* Unused, # of entries in semaphore map */
 106         0,      /* # of semaphore identifiers */
 107         0,      /* # of semaphores in system */
 108         0,      /* # of undo entries in system */
 109         SEMMSL, /* max # of semaphores per id */
 110         SEMOPM, /* max # of operations per semop call */
 111         SEMUME, /* max # of undo entries per process */
 112         SEMUSZ, /* size in bytes of undo structure */
 113         SEMVMX, /* semaphore maximum value */
 114         SEMAEM  /* adjust on exit max value */
 115 };
 116
 117
 118 static int semu_alloc(struct proc *p);
 119 static int semundo_adjust(struct proc *p, int *supidx,
 120                 int semid, int semnum, int adjval);
 121 static void semundo_clear(int semid, int semnum);
 122
 123 /* XXX casting to (sy_call_t *) is bogus, as usual. */
 124 static sy_call_t *semcalls[] = {
 125         (sy_call_t *)semctl, (sy_call_t *)semget,
 126         (sy_call_t *)semop
 127 };
 128
 129 static int              semtot = 0;             /* # of used semaphores */
 130 struct semid_kernel     *sema = NULL;           /* semaphore id pool */
 131 struct sem              *sem_pool =  NULL;      /* semaphore pool */
 132 static int              semu_list_idx = -1;     /* active undo structures */
 133 struct sem_undo         *semu = NULL;           /* semaphore undo pool */
 134
 135
 136 void sysv_sem_lock_init(void);
 137 static lck_grp_t       *sysv_sem_subsys_lck_grp;
 138 static lck_grp_attr_t  *sysv_sem_subsys_lck_grp_attr;
 139 static lck_attr_t      *sysv_sem_subsys_lck_attr;
 140 static lck_mtx_t        sysv_sem_subsys_mutex;
 141
 142 #define SYSV_SEM_SUBSYS_LOCK() lck_mtx_lock(&sysv_sem_subsys_mutex)
 143 #define SYSV_SEM_SUBSYS_UNLOCK() lck_mtx_unlock(&sysv_sem_subsys_mutex)
 144
 145
 146 __private_extern__ void
 147 sysv_sem_lock_init( void )
 148 {
 149
 150     sysv_sem_subsys_lck_grp_attr = lck_grp_attr_alloc_init();
 151
 152     sysv_sem_subsys_lck_grp = lck_grp_alloc_init("sysv_sem_subsys_lock", sysv_sem_subsys_lck_grp_attr);
 153
 154     sysv_sem_subsys_lck_attr = lck_attr_alloc_init();
 155     lck_mtx_init(&sysv_sem_subsys_mutex, sysv_sem_subsys_lck_grp, sysv_sem_subsys_lck_attr);
 156 }
 157
 158 static __inline__ user_time_t
 159 sysv_semtime(void)
 160 {
 161         struct timeval  tv;
 162         microtime(&tv);
 163         return (tv.tv_sec);
 164 }
 165
 166 /*
 167  * XXX conversion of internal user_time_t to external tume_t loses
 168  * XXX precision; not an issue for us now, since we are only ever
 169  * XXX setting 32 bits worth of time into it.
 170  *
 171  * pad field contents are not moved correspondingly; contents will be lost
 172  *
 173  * NOTE: Source and target may *NOT* overlap! (target is smaller)
 174  */
 175 static void
 176 semid_ds_64to32(struct user_semid_ds *in, struct semid_ds *out)
 177 {
 178         out->sem_perm = in->sem_perm;
 179         out->sem_base = (__int32_t)in->sem_base;
 180         out->sem_nsems = in->sem_nsems;
 181         out->sem_otime = in->sem_otime;         /* XXX loses precision */
 182         out->sem_ctime = in->sem_ctime;         /* XXX loses precision */
 183 }
 184
 185 /*
 186  * pad field contents are not moved correspondingly; contents will be lost
 187  *
 188  * NOTE: Source and target may are permitted to overlap! (source is smaller);
 189  * this works because we copy fields in order from the end of the struct to
 190  * the beginning.
 191  *
 192  * XXX use CAST_USER_ADDR_T() for lack of a CAST_USER_TIME_T(); net effect
 193  * XXX is the same.
 194  */
 195 static void
 196 semid_ds_32to64(struct semid_ds *in, struct user_semid_ds *out)
 197 {
 198         out->sem_ctime = in->sem_ctime;
 199         out->sem_otime = in->sem_otime;
 200         out->sem_nsems = in->sem_nsems;
 201         out->sem_base = (void *)in->sem_base;
 202         out->sem_perm = in->sem_perm;
 203 }
 204
 205
 206 /*
 207  * Entry point for all SEM calls
 208  *
 209  * In Darwin this is no longer the entry point.  It will be removed after
 210  *  the code has been tested better.
 211  */
 212 /* XXX actually varargs. */
 213 int
 214 semsys(struct proc *p, struct semsys_args *uap, register_t *retval)
 215 {
 216
 217         /* The individual calls handling the locking now */
 218
 219         if (uap->which >= sizeof(semcalls)/sizeof(semcalls[0]))
 220                 return (EINVAL);
 221         return ((*semcalls[uap->which])(p, &uap->a2, retval));
 222 }
 223
 224 /*
 225  * Expand the semu array to the given capacity.  If the expansion fails
 226  * return 0, otherwise return 1.
 227  *
 228  * Assumes we already have the subsystem lock.
 229  */
 230 static int
 231 grow_semu_array(int newSize)
 232 {
 233         register int i;
 234         register struct sem_undo *newSemu;
 235
 236         if (newSize <= seminfo.semmnu)
 237                 return 1;
 238         if (newSize > limitseminfo.semmnu) /* enforce hard limit */
 239         {
 240 #ifdef SEM_DEBUG
 241                 printf("undo structure hard limit of %d reached, requested %d\n",
 242                         limitseminfo.semmnu, newSize);
 243 #endif
 244                 return 0;
 245         }
 246         newSize = (newSize/SEMMNU_INC + 1) * SEMMNU_INC;
 247         newSize = newSize > limitseminfo.semmnu ? limitseminfo.semmnu : newSize;
 248
 249 #ifdef SEM_DEBUG
 250         printf("growing semu[] from %d to %d\n", seminfo.semmnu, newSize);
 251 #endif
 252         MALLOC(newSemu, struct sem_undo *, sizeof (struct sem_undo) * newSize,
 253                M_SYSVSEM, M_WAITOK | M_ZERO);
 254         if (NULL == newSemu)
 255         {
 256 #ifdef SEM_DEBUG
 257                 printf("allocation failed.  no changes made.\n");
 258 #endif
 259                 return 0;
 260         }
 261
 262         /* copy the old data to the new array */
 263         for (i = 0; i < seminfo.semmnu; i++)
 264         {
 265                 newSemu[i] = semu[i];
 266         }
 267         /*
 268          * The new elements (from newSemu[i] to newSemu[newSize-1]) have their
 269          * "un_proc" set to 0 (i.e. NULL) by the M_ZERO flag to MALLOC() above,
 270          * so they're already marked as "not in use".
 271          */
 272
 273         /* Clean up the old array */
 274         if (semu)
 275                 FREE(semu, M_SYSVSEM);
 276
 277         semu = newSemu;
 278         seminfo.semmnu = newSize;
 279 #ifdef SEM_DEBUG
 280         printf("expansion successful\n");
 281 #endif
 282         return 1;
 283 }
 284
 285 /*
 286  * Expand the sema array to the given capacity.  If the expansion fails
 287  * we return 0, otherwise we return 1.
 288  *
 289  * Assumes we already have the subsystem lock.
 290  */
 291 static int
 292 grow_sema_array(int newSize)
 293 {
 294         register struct semid_kernel *newSema;
 295         register int i;
 296
 297         if (newSize <= seminfo.semmni)
 298                 return 0;
 299         if (newSize > limitseminfo.semmni) /* enforce hard limit */
 300         {
 301 #ifdef SEM_DEBUG
 302                 printf("identifier hard limit of %d reached, requested %d\n",
 303                         limitseminfo.semmni, newSize);
 304 #endif
 305                 return 0;
 306         }
 307         newSize = (newSize/SEMMNI_INC + 1) * SEMMNI_INC;
 308         newSize = newSize > limitseminfo.semmni ? limitseminfo.semmni : newSize;
 309
 310 #ifdef SEM_DEBUG
 311         printf("growing sema[] from %d to %d\n", seminfo.semmni, newSize);
 312 #endif
 313         MALLOC(newSema, struct semid_kernel *,
 314                sizeof (struct semid_kernel) * newSize,
 315                M_SYSVSEM, M_WAITOK | M_ZERO);
 316         if (NULL == newSema)
 317         {
 318 #ifdef SEM_DEBUG
 319                 printf("allocation failed.  no changes made.\n");
 320 #endif
 321                 return 0;
 322         }
 323
 324         /* copy over the old ids */
 325         for (i = 0; i < seminfo.semmni; i++)
 326         {
 327                 newSema[i] = sema[i];
 328                 /* This is a hack.  What we really want to be able to
 329                  * do is change the value a process is waiting on
 330                  * without waking it up, but I don't know how to do
 331                  * this with the existing code, so we wake up the
 332                  * process and let it do a lot of work to determine the
 333                  * semaphore set is really not available yet, and then
 334                  * sleep on the correct, reallocated semid_kernel pointer.
 335                  */
 336                 if (sema[i].u.sem_perm.mode & SEM_ALLOC)
 337                         wakeup((caddr_t)&sema[i]);
 338         }
 339
 340 #if CONFIG_MACF
 341         for (i = seminfo.semmni; i < newSize; i++)
 342         {
 343                 mac_sysvsem_label_init(&newSema[i]);
 344         }
 345 #endif
 346
 347         /*
 348          * The new elements (from newSema[i] to newSema[newSize-1]) have their
 349          * "sem_base" and "sem_perm.mode" set to 0 (i.e. NULL) by the M_ZERO
 350          * flag to MALLOC() above, so they're already marked as "not in use".
 351          */
 352
 353         /* Clean up the old array */
 354         if (sema)
 355                 FREE(sema, M_SYSVSEM);
 356
 357         sema = newSema;
 358         seminfo.semmni = newSize;
 359 #ifdef SEM_DEBUG
 360         printf("expansion successful\n");
 361 #endif
 362         return 1;
 363 }
 364
 365 /*
 366  * Expand the sem_pool array to the given capacity.  If the expansion fails
 367  * we return 0 (fail), otherwise we return 1 (success).
 368  *
 369  * Assumes we already hold the subsystem lock.
 370  */
 371 static int
 372 grow_sem_pool(int new_pool_size)
 373 {
 374         struct sem *new_sem_pool = NULL;
 375         struct sem *sem_free;
 376         int i;
 377
 378         if (new_pool_size < semtot)
 379                 return 0;
 380         /* enforce hard limit */
 381         if (new_pool_size > limitseminfo.semmns) {
 382 #ifdef SEM_DEBUG
 383                 printf("semaphore hard limit of %d reached, requested %d\n",
 384                         limitseminfo.semmns, new_pool_size);
 385 #endif
 386                 return 0;
 387         }
 388
 389         new_pool_size = (new_pool_size/SEMMNS_INC + 1) * SEMMNS_INC;
 390         new_pool_size = new_pool_size > limitseminfo.semmns ? limitseminfo.semmns : new_pool_size;
 391
 392 #ifdef SEM_DEBUG
 393         printf("growing sem_pool array from %d to %d\n", seminfo.semmns, new_pool_size);
 394 #endif
 395         MALLOC(new_sem_pool, struct sem *, sizeof (struct sem) * new_pool_size,
 396                M_SYSVSEM, M_WAITOK | M_ZERO);
 397         if (NULL == new_sem_pool) {
 398 #ifdef SEM_DEBUG
 399                 printf("allocation failed.  no changes made.\n");
 400 #endif
 401                 return 0;
 402         }
 403
 404         /* We have our new memory, now copy the old contents over */
 405         if (sem_pool)
 406                 for(i = 0; i < seminfo.semmns; i++)
 407                         new_sem_pool[i] = sem_pool[i];
 408
 409         /* Update our id structures to point to the new semaphores */
 410         for(i = 0; i < seminfo.semmni; i++) {
 411                 if (sema[i].u.sem_perm.mode & SEM_ALLOC)  /* ID in use */
 412                         sema[i].u.sem_base += (new_sem_pool - sem_pool);
 413         }
 414
 415         sem_free = sem_pool;
 416         sem_pool = new_sem_pool;
 417
 418         /* clean up the old array */
 419         if (sem_free != NULL)
 420                 FREE(sem_free, M_SYSVSEM);
 421
 422         seminfo.semmns = new_pool_size;
 423 #ifdef SEM_DEBUG
 424         printf("expansion complete\n");
 425 #endif
 426         return 1;
 427 }
 428
 429 /*
 430  * Allocate a new sem_undo structure for a process
 431  * (returns ptr to structure or NULL if no more room)
 432  *
 433  * Assumes we already hold the subsystem lock.
 434  */
 435
 436 static int
 437 semu_alloc(struct proc *p)
 438 {
 439         register int i;
 440         register struct sem_undo *suptr;
 441         int *supidx;
 442         int attempt;
 443
 444         /*
 445          * Try twice to allocate something.
 446          * (we'll purge any empty structures after the first pass so
 447          * two passes are always enough)
 448          */
 449
 450         for (attempt = 0; attempt < 2; attempt++) {
 451                 /*
 452                  * Look for a free structure.
 453                  * Fill it in and return it if we find one.
 454                  */
 455
 456                 for (i = 0; i < seminfo.semmnu; i++) {
 457                         suptr = SEMU(i);
 458                         if (suptr->un_proc == NULL) {
 459                                 suptr->un_next_idx = semu_list_idx;
 460                                 semu_list_idx = i;
 461                                 suptr->un_cnt = 0;
 462                                 suptr->un_ent = NULL;
 463                                 suptr->un_proc = p;
 464                                 return i;
 465                         }
 466                 }
 467
 468                 /*
 469                  * We didn't find a free one, if this is the first attempt
 470                  * then try to free some structures.
 471                  */
 472
 473                 if (attempt == 0) {
 474                         /* All the structures are in use - try to free some */
 475                         int did_something = 0;
 476
 477                         supidx = &semu_list_idx;
 478                         while (*supidx != -1) {
 479                                 suptr = SEMU(*supidx);
 480                                 if (suptr->un_cnt == 0)  {
 481                                         suptr->un_proc = NULL;
 482                                         *supidx = suptr->un_next_idx;
 483                                         did_something = 1;
 484                                 } else
 485                                         supidx = &(suptr->un_next_idx);
 486                         }
 487
 488                         /* If we didn't free anything. Try expanding
 489                          * the semu[] array.  If that doesn't work
 490                          * then fail.  We expand last to get the
 491                          * most reuse out of existing resources.
 492                          */
 493                         if (!did_something)
 494                                 if (!grow_semu_array(seminfo.semmnu + 1))
 495                                         return -1;
 496                 } else {
 497                         /*
 498                          * The second pass failed even though we freed
 499                          * something after the first pass!
 500                          * This is IMPOSSIBLE!
 501                          */
 502                         panic("semu_alloc - second attempt failed");
 503                 }
 504         }
 505         return -1;
 506 }
 507
 508 /*
 509  * Adjust a particular entry for a particular proc
 510  *
 511  * Assumes we already hold the subsystem lock.
 512  */
 513 static int
 514 semundo_adjust(struct proc *p, int *supidx, int semid,
 515         int semnum, int adjval)
 516 {
 517         register struct sem_undo *suptr;
 518         int suidx;
 519         register struct undo *sueptr, **suepptr, *new_sueptr;
 520         int i;
 521
 522         /*
 523          * Look for and remember the sem_undo if the caller doesn't provide it
 524          */
 525
 526         suidx = *supidx;
 527         if (suidx == -1) {
 528                 for (suidx = semu_list_idx; suidx != -1;
 529                     suidx = suptr->un_next_idx) {
 530                         suptr = SEMU(suidx);
 531                         if (suptr->un_proc == p) {
 532                                 *supidx = suidx;
 533                                 break;
 534                         }
 535                 }
 536                 if (suidx == -1) {
 537                         if (adjval == 0)
 538                                 return(0);
 539                         suidx = semu_alloc(p);
 540                         if (suidx == -1)
 541                                 return(ENOSPC);
 542                         *supidx = suidx;
 543                 }
 544         }
 545
 546         /*
 547          * Look for the requested entry and adjust it (delete if adjval becomes
 548          * 0).
 549          */
 550         suptr = SEMU(suidx);
 551         new_sueptr = NULL;
 552         for (i = 0, suepptr = &suptr->un_ent, sueptr = suptr->un_ent;
 553              i < suptr->un_cnt;
 554              i++, suepptr = &sueptr->une_next, sueptr = sueptr->une_next) {
 555                 if (sueptr->une_id != semid || sueptr->une_num != semnum)
 556                         continue;
 557                 if (adjval == 0)
 558                         sueptr->une_adjval = 0;
 559                 else
 560                         sueptr->une_adjval += adjval;
 561                 if (sueptr->une_adjval == 0) {
 562                         suptr->un_cnt--;
 563                         *suepptr = sueptr->une_next;
 564                         FREE(sueptr, M_SYSVSEM);
 565                         sueptr = NULL;
 566                 }
 567                 return 0;
 568         }
 569
 570         /* Didn't find the right entry - create it */
 571         if (adjval == 0) {
 572                 /* no adjustment: no need for a new entry */
 573                 return 0;
 574         }
 575
 576         if (suptr->un_cnt == limitseminfo.semume) {
 577                 /* reached the limit number of semaphore undo entries */
 578                 return EINVAL;
 579         }
 580
 581         /* allocate a new semaphore undo entry */
 582         MALLOC(new_sueptr, struct undo *, sizeof (struct undo),
 583                M_SYSVSEM, M_WAITOK);
 584         if (new_sueptr == NULL) {
 585                 return ENOMEM;
 586         }
 587
 588         /* fill in the new semaphore undo entry */
 589         new_sueptr->une_next = suptr->un_ent;
 590         suptr->un_ent = new_sueptr;
 591         suptr->un_cnt++;
 592         new_sueptr->une_adjval = adjval;
 593         new_sueptr->une_id = semid;
 594         new_sueptr->une_num = semnum;
 595
 596         return 0;
 597 }
 598
 599 /* Assumes we already hold the subsystem lock.
 600  */
 601 static void
 602 semundo_clear(int semid, int semnum)
 603 {
 604         struct sem_undo *suptr;
 605         int suidx;
 606
 607         for (suidx = semu_list_idx; suidx != -1; suidx = suptr->un_next_idx) {
 608                 struct undo *sueptr;
 609                 struct undo **suepptr;
 610                 int i = 0;
 611
 612                 suptr = SEMU(suidx);
 613                 sueptr = suptr->un_ent;
 614                 suepptr = &suptr->un_ent;
 615                 while (i < suptr->un_cnt) {
 616                         if (sueptr->une_id == semid) {
 617                                 if (semnum == -1 || sueptr->une_num == semnum) {
 618                                         suptr->un_cnt--;
 619                                         *suepptr = sueptr->une_next;
 620                                         FREE(sueptr, M_SYSVSEM);
 621                                         sueptr = *suepptr;
 622                                         continue;
 623                                 }
 624                                 if (semnum != -1)
 625                                         break;
 626                         }
 627                         i++;
 628                         suepptr = &sueptr->une_next;
 629                         sueptr = sueptr->une_next;
 630                 }
 631         }
 632 }
 633
 634 /*
 635  * Note that the user-mode half of this passes a union coerced to a
 636  * user_addr_t.  The union contains either an int or a pointer, and
 637  * so we have to coerce it back, variant on whether the calling
 638  * process is 64 bit or not.  The coercion works for the 'val' element
 639  * because the alignment is the same in user and kernel space.
 640  */
 641 int
 642 semctl(struct proc *p, struct semctl_args *uap, register_t *retval)
 643 {
 644         int semid = uap->semid;
 645         int semnum = uap->semnum;
 646         int cmd = uap->cmd;
 647         user_semun_t user_arg = (user_semun_t)uap->arg;
 648         kauth_cred_t cred = kauth_cred_get();
 649         int i, rval, eval;
 650         struct user_semid_ds sbuf;
 651         struct semid_kernel *semakptr;
 652         struct user_semid_ds uds;
 653
 654
 655         AUDIT_ARG(svipc_cmd, cmd);
 656         AUDIT_ARG(svipc_id, semid);
 657
 658         SYSV_SEM_SUBSYS_LOCK();
 659
 660 #ifdef SEM_DEBUG
 661         printf("call to semctl(%d, %d, %d, 0x%qx)\n", semid, semnum, cmd, user_arg);
 662 #endif
 663
 664         semid = IPCID_TO_IX(semid);
 665
 666         if (semid < 0 || semid >= seminfo.semmni) {
 667 #ifdef SEM_DEBUG
 668                 printf("Invalid semid\n");
 669 #endif
 670                 eval = EINVAL;
 671                 goto semctlout;
 672         }
 673
 674         semakptr = &sema[semid];
 675         if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 ||
 676             semakptr->u.sem_perm._seq != IPCID_TO_SEQ(uap->semid)) {
 677                 eval = EINVAL;
 678                 goto semctlout;
 679         }
 680 #if CONFIG_MACF
 681         eval = mac_sysvsem_check_semctl(cred, semakptr, cmd);
 682         if (eval)
 683                 goto semctlout;
 684 #endif
 685
 686         eval = 0;
 687         rval = 0;
 688
 689         switch (cmd) {
 690         case IPC_RMID:
 691                 if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_M)))
 692                         goto semctlout;
 693
 694                 semakptr->u.sem_perm.cuid = kauth_cred_getuid(cred);
 695                 semakptr->u.sem_perm.uid = kauth_cred_getuid(cred);
 696                 semtot -= semakptr->u.sem_nsems;
 697                 for (i = semakptr->u.sem_base - sem_pool; i < semtot; i++)
 698                         sem_pool[i] = sem_pool[i + semakptr->u.sem_nsems];
 699                 for (i = 0; i < seminfo.semmni; i++) {
 700                         if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
 701                             sema[i].u.sem_base > semakptr->u.sem_base)
 702                                 sema[i].u.sem_base -= semakptr->u.sem_nsems;
 703                 }
 704                 semakptr->u.sem_perm.mode = 0;
 705 #if CONFIG_MACF
 706                 mac_sysvsem_label_recycle(semakptr);
 707 #endif
 708                 semundo_clear(semid, -1);
 709                 wakeup((caddr_t)semakptr);
 710                 break;
 711
 712         case IPC_SET:
 713                 if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_M)))
 714                                 goto semctlout;
 715
 716                 if (IS_64BIT_PROCESS(p)) {
 717                         eval = copyin(user_arg.buf, &sbuf, sizeof(struct user_semid_ds));
 718                 } else {
 719                         eval = copyin(user_arg.buf, &sbuf, sizeof(struct semid_ds));
 720                         /* convert in place; ugly, but safe */
 721                         semid_ds_32to64((struct semid_ds *)&sbuf, &sbuf);
 722                 }
 723
 724                 if (eval != 0) {
 725                         goto semctlout;
 726                 }
 727
 728                 semakptr->u.sem_perm.uid = sbuf.sem_perm.uid;
 729                 semakptr->u.sem_perm.gid = sbuf.sem_perm.gid;
 730                 semakptr->u.sem_perm.mode = (semakptr->u.sem_perm.mode &
 731                     ~0777) | (sbuf.sem_perm.mode & 0777);
 732                 semakptr->u.sem_ctime = sysv_semtime();
 733                 break;
 734
 735         case IPC_STAT:
 736                 if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_R)))
 737                                 goto semctlout;
 738                 bcopy((caddr_t)&semakptr->u, &uds, sizeof(struct user_semid_ds));
 739                 if (IS_64BIT_PROCESS(p)) {
 740                         eval = copyout(&uds, user_arg.buf, sizeof(struct user_semid_ds));
 741                 } else {
 742                         struct semid_ds semid_ds32;
 743                         semid_ds_64to32(&uds, &semid_ds32);
 744                         eval = copyout(&semid_ds32, user_arg.buf, sizeof(struct semid_ds));
 745                 }
 746                 break;
 747
 748         case GETNCNT:
 749                 if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_R)))
 750                                 goto semctlout;
 751                 if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
 752                         eval = EINVAL;
 753                         goto semctlout;
 754                 }
 755                 rval = semakptr->u.sem_base[semnum].semncnt;
 756                 break;
 757
 758         case GETPID:
 759                 if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_R)))
 760                                 goto semctlout;
 761                 if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
 762                         eval = EINVAL;
 763                         goto semctlout;
 764                 }
 765                 rval = semakptr->u.sem_base[semnum].sempid;
 766                 break;
 767
 768         case GETVAL:
 769                 if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_R)))
 770                                 goto semctlout;
 771                 if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
 772                         eval = EINVAL;
 773                         goto semctlout;
 774                 }
 775                 rval = semakptr->u.sem_base[semnum].semval;
 776                 break;
 777
 778         case GETALL:
 779                 if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_R)))
 780                                 goto semctlout;
 781 /* XXXXXXXXXXXXXXXX TBD XXXXXXXXXXXXXXXX */
 782                 for (i = 0; i < semakptr->u.sem_nsems; i++) {
 783                         /* XXX could be done in one go... */
 784                         eval = copyout((caddr_t)&semakptr->u.sem_base[i].semval,
 785                             user_arg.array + (i * sizeof(unsigned short)),
 786                             sizeof(unsigned short));
 787                         if (eval != 0)
 788                                 break;
 789                 }
 790                 break;
 791
 792         case GETZCNT:
 793                 if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_R)))
 794                                 goto semctlout;
 795                 if (semnum < 0 || semnum >= semakptr->u.sem_nsems) {
 796                         eval = EINVAL;
 797                         goto semctlout;
 798                 }
 799                 rval = semakptr->u.sem_base[semnum].semzcnt;
 800                 break;
 801
 802         case SETVAL:
 803                 if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_W)))
 804                 {
 805 #ifdef SEM_DEBUG
 806                         printf("Invalid credentials for write\n");
 807 #endif
 808                                 goto semctlout;
 809                 }
 810                 if (semnum < 0 || semnum >= semakptr->u.sem_nsems)
 811                 {
 812 #ifdef SEM_DEBUG
 813                         printf("Invalid number out of range for set\n");
 814 #endif
 815                         eval = EINVAL;
 816                         goto semctlout;
 817                 }
 818                 /*
 819                  * Cast down a pointer instead of using 'val' member directly
 820                  * to avoid introducing endieness and a pad field into the
 821                  * header file.  Ugly, but it works.
 822                  */
 823                 semakptr->u.sem_base[semnum].semval = CAST_DOWN(int,user_arg.buf);
 824                 semakptr->u.sem_base[semnum].sempid = p->p_pid;
 825                 /* XXX scottl Should there be a MAC call here? */
 826                 semundo_clear(semid, semnum);
 827                 wakeup((caddr_t)semakptr);
 828                 break;
 829
 830         case SETALL:
 831                 if ((eval = ipcperm(cred, &semakptr->u.sem_perm, IPC_W)))
 832                                 goto semctlout;
 833 /*** XXXXXXXXXXXX TBD ********/
 834                 for (i = 0; i < semakptr->u.sem_nsems; i++) {
 835                         /* XXX could be done in one go... */
 836                         eval = copyin(user_arg.array + (i * sizeof(unsigned short)),
 837                             (caddr_t)&semakptr->u.sem_base[i].semval,
 838                             sizeof(unsigned short));
 839                         if (eval != 0)
 840                                 break;
 841                         semakptr->u.sem_base[i].sempid = p->p_pid;
 842                 }
 843                 /* XXX scottl Should there be a MAC call here? */
 844                 semundo_clear(semid, -1);
 845                 wakeup((caddr_t)semakptr);
 846                 break;
 847
 848         default:
 849                         eval = EINVAL;
 850                         goto semctlout;
 851         }
 852
 853         if (eval == 0)
 854                 *retval = rval;
 855 semctlout:
 856         SYSV_SEM_SUBSYS_UNLOCK();
 857         return(eval);
 858 }
 859
 860 int
 861 semget(__unused struct proc *p, struct semget_args *uap, register_t *retval)
 862 {
 863         int semid, eval;
 864         int key = uap->key;
 865         int nsems = uap->nsems;
 866         int semflg = uap->semflg;
 867         kauth_cred_t cred = kauth_cred_get();
 868
 869 #ifdef SEM_DEBUG
 870         if (key != IPC_PRIVATE)
 871                 printf("semget(0x%x, %d, 0%o)\n", key, nsems, semflg);
 872         else
 873                 printf("semget(IPC_PRIVATE, %d, 0%o)\n", nsems, semflg);
 874 #endif
 875
 876
 877         SYSV_SEM_SUBSYS_LOCK();
 878
 879
 880         if (key != IPC_PRIVATE) {
 881                 for (semid = 0; semid < seminfo.semmni; semid++) {
 882                         if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) &&
 883                             sema[semid].u.sem_perm._key == key)
 884                                 break;
 885                 }
 886                 if (semid < seminfo.semmni) {
 887 #ifdef SEM_DEBUG
 888                         printf("found public key\n");
 889 #endif
 890                         if ((eval = ipcperm(cred, &sema[semid].u.sem_perm,
 891                             semflg & 0700)))
 892                                 goto semgetout;
 893                         if (nsems < 0 || sema[semid].u.sem_nsems < nsems) {
 894 #ifdef SEM_DEBUG
 895                                 printf("too small\n");
 896 #endif
 897                                 eval = EINVAL;
 898                                 goto semgetout;
 899                         }
 900                         if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) {
 901 #ifdef SEM_DEBUG
 902                                 printf("not exclusive\n");
 903 #endif
 904                                 eval = EEXIST;
 905                                 goto semgetout;
 906                         }
 907 #if CONFIG_MACF
 908                         eval = mac_sysvsem_check_semget(cred, &sema[semid]);
 909                         if (eval)
 910                                 goto semgetout;
 911 #endif
 912                         goto found;
 913                 }
 914         }
 915
 916 #ifdef SEM_DEBUG
 917         printf("need to allocate an id for the request\n");
 918 #endif
 919         if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) {
 920                 if (nsems <= 0 || nsems > limitseminfo.semmsl) {
 921 #ifdef SEM_DEBUG
 922                         printf("nsems out of range (0<%d<=%d)\n", nsems,
 923                             seminfo.semmsl);
 924 #endif
 925                         eval = EINVAL;
 926                         goto semgetout;
 927                 }
 928                 if (nsems > seminfo.semmns - semtot) {
 929 #ifdef SEM_DEBUG
 930                         printf("not enough semaphores left (need %d, got %d)\n",
 931                             nsems, seminfo.semmns - semtot);
 932 #endif
 933                         if (!grow_sem_pool(semtot + nsems)) {
 934 #ifdef SEM_DEBUG
 935                                 printf("failed to grow the sem array\n");
 936 #endif
 937                                 eval = ENOSPC;
 938                                 goto semgetout;
 939                         }
 940                 }
 941                 for (semid = 0; semid < seminfo.semmni; semid++) {
 942                         if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) == 0)
 943                                 break;
 944                 }
 945                 if (semid == seminfo.semmni) {
 946 #ifdef SEM_DEBUG
 947                         printf("no more id's available\n");
 948 #endif
 949                         if (!grow_sema_array(seminfo.semmni + 1))
 950                         {
 951 #ifdef SEM_DEBUG
 952                                 printf("failed to grow sema array\n");
 953 #endif
 954                                 eval = ENOSPC;
 955                                 goto semgetout;
 956                         }
 957                 }
 958 #ifdef SEM_DEBUG
 959                 printf("semid %d is available\n", semid);
 960 #endif
 961                 sema[semid].u.sem_perm._key = key;
 962                 sema[semid].u.sem_perm.cuid = kauth_cred_getuid(cred);
 963                 sema[semid].u.sem_perm.uid = kauth_cred_getuid(cred);
 964                 sema[semid].u.sem_perm.cgid = cred->cr_gid;
 965                 sema[semid].u.sem_perm.gid = cred->cr_gid;
 966                 sema[semid].u.sem_perm.mode = (semflg & 0777) | SEM_ALLOC;
 967                 sema[semid].u.sem_perm._seq =
 968                     (sema[semid].u.sem_perm._seq + 1) & 0x7fff;
 969                 sema[semid].u.sem_nsems = nsems;
 970                 sema[semid].u.sem_otime = 0;
 971                 sema[semid].u.sem_ctime = sysv_semtime();
 972                 sema[semid].u.sem_base = &sem_pool[semtot];
 973                 semtot += nsems;
 974                 bzero(sema[semid].u.sem_base,
 975                     sizeof(sema[semid].u.sem_base[0])*nsems);
 976 #if CONFIG_MACF
 977                 mac_sysvsem_label_associate(cred, &sema[semid]);
 978 #endif
 979 #ifdef SEM_DEBUG
 980                 printf("sembase = 0x%x, next = 0x%x\n", sema[semid].u.sem_base,
 981                     &sem_pool[semtot]);
 982 #endif
 983         } else {
 984 #ifdef SEM_DEBUG
 985                 printf("didn't find it and wasn't asked to create it\n");
 986 #endif
 987                 eval = ENOENT;
 988                 goto semgetout;
 989         }
 990
 991 found:
 992         *retval = IXSEQ_TO_IPCID(semid, sema[semid].u.sem_perm);
 993         AUDIT_ARG(svipc_id, *retval);
 994 #ifdef SEM_DEBUG
 995         printf("semget is done, returning %d\n", *retval);
 996 #endif
 997         eval = 0;
 998
 999 semgetout:
1000         SYSV_SEM_SUBSYS_UNLOCK();
1001         return(eval);
1002 }
1003
1004 int
1005 semop(struct proc *p, struct semop_args *uap, register_t *retval)
1006 {
1007         int semid = uap->semid;
1008         int nsops = uap->nsops;
1009         struct sembuf sops[MAX_SOPS];
1010         register struct semid_kernel *semakptr;
1011         register struct sembuf *sopptr = NULL;  /* protected by 'semptr' */
1012         register struct sem *semptr = NULL;     /* protected by 'if' */
1013         int supidx = -1;
1014         int i, j, eval;
1015         int do_wakeup, do_undos;
1016
1017         AUDIT_ARG(svipc_id, uap->semid);
1018
1019         SYSV_SEM_SUBSYS_LOCK();
1020
1021 #ifdef SEM_DEBUG
1022         printf("call to semop(%d, 0x%x, %d)\n", semid, sops, nsops);
1023 #endif
1024
1025         semid = IPCID_TO_IX(semid);     /* Convert back to zero origin */
1026
1027         if (semid < 0 || semid >= seminfo.semmni) {
1028                 eval = EINVAL;
1029                 goto semopout;
1030         }
1031
1032         semakptr = &sema[semid];
1033         if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0) {
1034                 eval = EINVAL;
1035                 goto semopout;
1036         }
1037         if (semakptr->u.sem_perm._seq != IPCID_TO_SEQ(uap->semid)) {
1038                 eval = EINVAL;
1039                 goto semopout;
1040         }
1041
1042         if ((eval = ipcperm(kauth_cred_get(), &semakptr->u.sem_perm, IPC_W))) {
1043 #ifdef SEM_DEBUG
1044                 printf("eval = %d from ipaccess\n", eval);
1045 #endif
1046                 goto semopout;
1047         }
1048
1049         if (nsops < 0 || nsops > MAX_SOPS) {
1050 #ifdef SEM_DEBUG
1051                 printf("too many sops (max=%d, nsops=%d)\n", MAX_SOPS, nsops);
1052 #endif
1053                 eval = E2BIG;
1054                 goto semopout;
1055         }
1056
1057 #if CONFIG_MACF
1058         /*
1059          * Initial pass thru sops to see what permissions are needed.
1060          */
1061         j = 0;          /* permission needed */
1062         for (i = 0; i < nsops; i++)
1063                 j |= (sops[i].sem_op == 0) ? SEM_R : SEM_A;
1064
1065         /*
1066          * The MAC hook checks whether the thread has read (and possibly
1067          * write) permissions to the semaphore array based on the
1068          * sopptr->sem_op value.
1069          */
1070         eval = mac_sysvsem_check_semop(kauth_cred_get(), semakptr, j);
1071         if (eval)
1072                 goto semopout;
1073 #endif
1074
1075         /*  OK for LP64, since sizeof(struct sembuf) is currently invariant */
1076         if ((eval = copyin(uap->sops, &sops, nsops * sizeof(struct sembuf))) != 0) {
1077 #ifdef SEM_DEBUG
1078                 printf("eval = %d from copyin(%08x, %08x, %ld)\n", eval,
1079                     uap->sops, &sops, nsops * sizeof(struct sembuf));
1080 #endif
1081                 goto semopout;
1082         }
1083
1084         /*
1085          * Loop trying to satisfy the vector of requests.
1086          * If we reach a point where we must wait, any requests already
1087          * performed are rolled back and we go to sleep until some other
1088          * process wakes us up.  At this point, we start all over again.
1089          *
1090          * This ensures that from the perspective of other tasks, a set
1091          * of requests is atomic (never partially satisfied).
1092          */
1093         do_undos = 0;
1094
1095         for (;;) {
1096                 do_wakeup = 0;
1097
1098                 for (i = 0; i < nsops; i++) {
1099                         sopptr = &sops[i];
1100
1101                         if (sopptr->sem_num >= semakptr->u.sem_nsems) {
1102                                 eval = EFBIG;
1103                                 goto semopout;
1104                         }
1105
1106                         semptr = &semakptr->u.sem_base[sopptr->sem_num];
1107
1108 #ifdef SEM_DEBUG
1109                         printf("semop:  semakptr=%x, sem_base=%x, semptr=%x, sem[%d]=%d : op=%d, flag=%s\n",
1110                             semakptr, semakptr->u.sem_base, semptr,
1111                             sopptr->sem_num, semptr->semval, sopptr->sem_op,
1112                             (sopptr->sem_flg & IPC_NOWAIT) ? "nowait" : "wait");
1113 #endif
1114
1115                         if (sopptr->sem_op < 0) {
1116                                 if (semptr->semval + sopptr->sem_op < 0) {
1117 #ifdef SEM_DEBUG
1118                                         printf("semop:  can't do it now\n");
1119 #endif
1120                                         break;
1121                                 } else {
1122                                         semptr->semval += sopptr->sem_op;
1123                                         if (semptr->semval == 0 &&
1124                                             semptr->semzcnt > 0)
1125                                                 do_wakeup = 1;
1126                                 }
1127                                 if (sopptr->sem_flg & SEM_UNDO)
1128                                         do_undos = 1;
1129                         } else if (sopptr->sem_op == 0) {
1130                                 if (semptr->semval > 0) {
1131 #ifdef SEM_DEBUG
1132                                         printf("semop:  not zero now\n");
1133 #endif
1134                                         break;
1135                                 }
1136                         } else {
1137                                 if (semptr->semncnt > 0)
1138                                         do_wakeup = 1;
1139                                 semptr->semval += sopptr->sem_op;
1140                                 if (sopptr->sem_flg & SEM_UNDO)
1141                                         do_undos = 1;
1142                         }
1143                 }
1144
1145                 /*
1146                  * Did we get through the entire vector?
1147                  */
1148                 if (i >= nsops)
1149                         goto done;
1150
1151                 /*
1152                  * No ... rollback anything that we've already done
1153                  */
1154 #ifdef SEM_DEBUG
1155                 printf("semop:  rollback 0 through %d\n", i-1);
1156 #endif
1157                 for (j = 0; j < i; j++)
1158                         semakptr->u.sem_base[sops[j].sem_num].semval -=
1159                             sops[j].sem_op;
1160
1161                 /*
1162                  * If the request that we couldn't satisfy has the
1163                  * NOWAIT flag set then return with EAGAIN.
1164                  */
1165                 if (sopptr->sem_flg & IPC_NOWAIT) {
1166                         eval = EAGAIN;
1167                         goto semopout;
1168                 }
1169
1170                 if (sopptr->sem_op == 0)
1171                         semptr->semzcnt++;
1172                 else
1173                         semptr->semncnt++;
1174
1175 #ifdef SEM_DEBUG
1176                 printf("semop:  good night!\n");
1177 #endif
1178                 /* Release our lock on the semaphore subsystem so
1179                  * another thread can get at the semaphore we are
1180                  * waiting for. We will get the lock back after we
1181                  * wake up.
1182                  */
1183                 eval = msleep((caddr_t)semakptr, &sysv_sem_subsys_mutex , (PZERO - 4) | PCATCH,
1184                     "semwait", 0);
1185
1186 #ifdef SEM_DEBUG
1187                 printf("semop:  good morning (eval=%d)!\n", eval);
1188 #endif
1189                 if (eval != 0) {
1190                         eval = EINTR;
1191                 }
1192
1193                 /*
1194                  * IMPORTANT: while we were asleep, the semaphore array might
1195                  * have been reallocated somewhere else (see grow_sema_array()).
1196                  * When we wake up, we have to re-lookup the semaphore
1197                  * structures and re-validate them.
1198                  */
1199
1200                 semptr = NULL;
1201
1202                 /*
1203                  * Make sure that the semaphore still exists
1204                  *
1205                  * XXX POSIX: Third test this 'if' and 'EINTR' precedence may
1206                  * fail testing; if so, we will need to revert this code.
1207                  */
1208                 semakptr = &sema[semid];   /* sema may have been reallocated */
1209                 if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 ||
1210                     semakptr->u.sem_perm._seq != IPCID_TO_SEQ(uap->semid) ||
1211                     sopptr->sem_num >= semakptr->u.sem_nsems) {
1212                         /* The man page says to return EIDRM. */
1213                         /* Unfortunately, BSD doesn't define that code! */
1214                         if (eval == EINTR) {
1215                                 /*
1216                                  * EINTR takes precedence over the fact that
1217                                  * the semaphore disappeared while we were
1218                                  * sleeping...
1219                                  */
1220                         } else {
1221 #ifdef EIDRM
1222                                 eval = EIDRM;
1223 #else
1224                                 eval = EINVAL;          /* Ancient past */
1225 #endif
1226                         }
1227                         goto semopout;
1228                 }
1229
1230                 /*
1231                  * The semaphore is still alive.  Readjust the count of
1232                  * waiting processes. semptr needs to be recomputed
1233                  * because the sem[] may have been reallocated while
1234                  * we were sleeping, updating our sem_base pointer.
1235                  */
1236                 semptr = &semakptr->u.sem_base[sopptr->sem_num];
1237                 if (sopptr->sem_op == 0)
1238                         semptr->semzcnt--;
1239                 else
1240                         semptr->semncnt--;
1241
1242                 if (eval != 0) { /* EINTR */
1243                         goto semopout;
1244                 }
1245         }
1246
1247 done:
1248         /*
1249          * Process any SEM_UNDO requests.
1250          */
1251         if (do_undos) {
1252                 for (i = 0; i < nsops; i++) {
1253                         /*
1254                          * We only need to deal with SEM_UNDO's for non-zero
1255                          * op's.
1256                          */
1257                         int adjval;
1258
1259                         if ((sops[i].sem_flg & SEM_UNDO) == 0)
1260                                 continue;
1261                         adjval = sops[i].sem_op;
1262                         if (adjval == 0)
1263                                 continue;
1264                         eval = semundo_adjust(p, &supidx, semid,
1265                             sops[i].sem_num, -adjval);
1266                         if (eval == 0)
1267                                 continue;
1268
1269                         /*
1270                          * Oh-Oh!  We ran out of either sem_undo's or undo's.
1271                          * Rollback the adjustments to this point and then
1272                          * rollback the semaphore ups and down so we can return
1273                          * with an error with all structures restored.  We
1274                          * rollback the undo's in the exact reverse order that
1275                          * we applied them.  This guarantees that we won't run
1276                          * out of space as we roll things back out.
1277                          */
1278                         for (j = i - 1; j >= 0; j--) {
1279                                 if ((sops[j].sem_flg & SEM_UNDO) == 0)
1280                                         continue;
1281                                 adjval = sops[j].sem_op;
1282                                 if (adjval == 0)
1283                                         continue;
1284                                 if (semundo_adjust(p, &supidx, semid,
1285                                     sops[j].sem_num, adjval) != 0)
1286                                         panic("semop - can't undo undos");
1287                         }
1288
1289                         for (j = 0; j < nsops; j++)
1290                                 semakptr->u.sem_base[sops[j].sem_num].semval -=
1291                                     sops[j].sem_op;
1292
1293 #ifdef SEM_DEBUG
1294                         printf("eval = %d from semundo_adjust\n", eval);
1295 #endif
1296                         goto semopout;
1297                 } /* loop through the sops */
1298         } /* if (do_undos) */
1299
1300         /* We're definitely done - set the sempid's */
1301         for (i = 0; i < nsops; i++) {
1302                 sopptr = &sops[i];
1303                 semptr = &semakptr->u.sem_base[sopptr->sem_num];
1304                 semptr->sempid = p->p_pid;
1305         }
1306         semakptr->u.sem_otime = sysv_semtime();
1307
1308         if (do_wakeup) {
1309 #ifdef SEM_DEBUG
1310                 printf("semop:  doing wakeup\n");
1311 #ifdef SEM_WAKEUP
1312                 sem_wakeup((caddr_t)semakptr);
1313 #else
1314                 wakeup((caddr_t)semakptr);
1315 #endif
1316                 printf("semop:  back from wakeup\n");
1317 #else
1318                 wakeup((caddr_t)semakptr);
1319 #endif
1320         }
1321 #ifdef SEM_DEBUG
1322         printf("semop:  done\n");
1323 #endif
1324         *retval = 0;
1325         eval = 0;
1326 semopout:
1327         SYSV_SEM_SUBSYS_UNLOCK();
1328         return(eval);
1329 }
1330
1331 /*
1332  * Go through the undo structures for this process and apply the adjustments to
1333  * semaphores.
1334  */
1335 void
1336 semexit(struct proc *p)
1337 {
1338         register struct sem_undo *suptr = NULL;
1339         int suidx;
1340         int *supidx;
1341         int did_something;
1342
1343         /* If we have not allocated our semaphores yet there can't be
1344          * anything to undo, but we need the lock to prevent
1345          * dynamic memory race conditions.
1346          */
1347         SYSV_SEM_SUBSYS_LOCK();
1348
1349         if (!sem_pool)
1350         {
1351                 SYSV_SEM_SUBSYS_UNLOCK();
1352                 return;
1353         }
1354         did_something = 0;
1355
1356         /*
1357          * Go through the chain of undo vectors looking for one
1358          * associated with this process.
1359          */
1360
1361         for (supidx = &semu_list_idx; (suidx = *supidx) != -1;
1362             supidx = &suptr->un_next_idx) {
1363                 suptr = SEMU(suidx);
1364                 if (suptr->un_proc == p)
1365                         break;
1366         }
1367
1368         if (suidx == -1)
1369                 goto unlock;
1370
1371 #ifdef SEM_DEBUG
1372         printf("proc @%08x has undo structure with %d entries\n", p,
1373             suptr->un_cnt);
1374 #endif
1375
1376         /*
1377          * If there are any active undo elements then process them.
1378          */
1379         if (suptr->un_cnt > 0) {
1380                 while (suptr->un_ent != NULL) {
1381                         struct undo *sueptr;
1382                         int semid;
1383                         int semnum;
1384                         int adjval;
1385                         struct semid_kernel *semakptr;
1386
1387                         sueptr = suptr->un_ent;
1388                         semid = sueptr->une_id;
1389                         semnum = sueptr->une_num;
1390                         adjval = sueptr->une_adjval;
1391
1392                         semakptr = &sema[semid];
1393                         if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0)
1394                                 panic("semexit - semid not allocated");
1395                         if (semnum >= semakptr->u.sem_nsems)
1396                                 panic("semexit - semnum out of range");
1397
1398 #ifdef SEM_DEBUG
1399                         printf("semexit:  %08x id=%d num=%d(adj=%d) ; sem=%d\n",
1400                                suptr->un_proc,
1401                                semid,
1402                                semnum,
1403                                adjval,
1404                                semakptr->u.sem_base[semnum].semval);
1405 #endif
1406
1407                         if (adjval < 0) {
1408                                 if (semakptr->u.sem_base[semnum].semval < -adjval)
1409                                         semakptr->u.sem_base[semnum].semval = 0;
1410                                 else
1411                                         semakptr->u.sem_base[semnum].semval +=
1412                                             adjval;
1413                         } else
1414                                 semakptr->u.sem_base[semnum].semval += adjval;
1415
1416                 /* Maybe we should build a list of semakptr's to wake
1417                  * up, finish all access to data structures, release the
1418                  * subsystem lock, and wake all the processes.  Something
1419                  * to think about.  It wouldn't buy us anything unless
1420                  * wakeup had the potential to block, or the syscall
1421                  * funnel state was changed to allow multiple threads
1422                  * in the BSD code at once.
1423                  */
1424 #ifdef SEM_WAKEUP
1425                         sem_wakeup((caddr_t)semakptr);
1426 #else
1427                         wakeup((caddr_t)semakptr);
1428 #endif
1429 #ifdef SEM_DEBUG
1430                         printf("semexit:  back from wakeup\n");
1431 #endif
1432                         suptr->un_cnt--;
1433                         suptr->un_ent = sueptr->une_next;
1434                         FREE(sueptr, M_SYSVSEM);
1435                         sueptr = NULL;
1436                 }
1437         }
1438
1439         /*
1440          * Deallocate the undo vector.
1441          */
1442 #ifdef SEM_DEBUG
1443         printf("removing vector\n");
1444 #endif
1445         suptr->un_proc = NULL;
1446         *supidx = suptr->un_next_idx;
1447
1448 unlock:
1449         /*
1450          * There is a semaphore leak (i.e. memory leak) in this code.
1451          * We should be deleting the IPC_PRIVATE semaphores when they are
1452          * no longer needed, and we dont. We would have to track which processes
1453          * know about which IPC_PRIVATE semaphores, updating the list after
1454          * every fork.  We can't just delete them semaphore when the process
1455          * that created it dies, because that process may well have forked
1456          * some children.  So we need to wait until all of it's children have
1457          * died, and so on.  Maybe we should tag each IPC_PRIVATE sempahore
1458          * with the creating group ID, count the number of processes left in
1459          * that group, and delete the semaphore when the group is gone.
1460          * Until that code gets implemented we will leak IPC_PRIVATE semaphores.
1461          * There is an upper bound on the size of our semaphore array, so
1462          * leaking the semaphores should not work as a DOS attack.
1463          *
1464          * Please note that the original BSD code this file is based on had the
1465          * same leaky semaphore problem.
1466          */
1467
1468         SYSV_SEM_SUBSYS_UNLOCK();
1469 }
1470
1471
1472 /* (struct sysctl_oid *oidp, void *arg1, int arg2, \
1473         struct sysctl_req *req) */
1474 static int
1475 sysctl_seminfo(__unused struct sysctl_oid *oidp, void *arg1,
1476         __unused int arg2, struct sysctl_req *req)
1477 {
1478         int error = 0;
1479
1480         error = SYSCTL_OUT(req, arg1, sizeof(int));
1481         if (error || req->newptr == USER_ADDR_NULL)
1482                 return(error);
1483
1484         SYSV_SEM_SUBSYS_LOCK();
1485
1486         /* Set the values only if shared memory is not initialised */
1487         if ((sem_pool == NULL) &&
1488                 (sema == NULL) &&
1489                 (semu == NULL) &&
1490                 (semu_list_idx == -1)) {
1491                         if ((error = SYSCTL_IN(req, arg1, sizeof(int)))) {
1492                                 goto out;
1493                         }
1494         } else
1495                 error = EINVAL;
1496 out:
1497         SYSV_SEM_SUBSYS_UNLOCK();
1498         return(error);
1499
1500 }
1501
1502 /* SYSCTL_NODE(_kern, KERN_SYSV, sysv, CTLFLAG_RW, 0, "SYSV"); */
1503 extern struct sysctl_oid_list sysctl__kern_sysv_children;
1504 SYSCTL_PROC(_kern_sysv, OID_AUTO, semmni, CTLTYPE_INT | CTLFLAG_RW,
1505     &limitseminfo.semmni, 0, &sysctl_seminfo ,"I","semmni");
1506
1507 SYSCTL_PROC(_kern_sysv, OID_AUTO, semmns, CTLTYPE_INT | CTLFLAG_RW,
1508     &limitseminfo.semmns, 0, &sysctl_seminfo ,"I","semmns");
1509
1510 SYSCTL_PROC(_kern_sysv, OID_AUTO, semmnu, CTLTYPE_INT | CTLFLAG_RW,
1511     &limitseminfo.semmnu, 0, &sysctl_seminfo ,"I","semmnu");
1512
1513 SYSCTL_PROC(_kern_sysv, OID_AUTO, semmsl, CTLTYPE_INT | CTLFLAG_RW,
1514     &limitseminfo.semmsl, 0, &sysctl_seminfo ,"I","semmsl");
1515
1516 SYSCTL_PROC(_kern_sysv, OID_AUTO, semume, CTLTYPE_INT | CTLFLAG_RW,
1517     &limitseminfo.semume, 0, &sysctl_seminfo ,"I","semume");
1518
1519
1520 static int
1521 IPCS_sem_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1,
1522         __unused int arg2, struct sysctl_req *req)
1523 {
1524         int error;
1525         int cursor;
1526         union {
1527                 struct IPCS_command u32;
1528                 struct user_IPCS_command u64;
1529         } ipcs;
1530         struct semid_ds semid_ds32;     /* post conversion, 32 bit version */
1531         void *semid_dsp;
1532         size_t ipcs_sz = sizeof(struct user_IPCS_command);
1533         size_t semid_ds_sz = sizeof(struct user_semid_ds);
1534         struct proc *p = current_proc();
1535
1536         if (!IS_64BIT_PROCESS(p)) {
1537                 ipcs_sz = sizeof(struct IPCS_command);
1538                 semid_ds_sz = sizeof(struct semid_ds);
1539         }
1540
1541         /* Copy in the command structure */
1542         if ((error = SYSCTL_IN(req, &ipcs, ipcs_sz)) != 0) {
1543                 return(error);
1544         }
1545
1546         if (!IS_64BIT_PROCESS(p)) /* convert in place */
1547                 ipcs.u64.ipcs_data = CAST_USER_ADDR_T(ipcs.u32.ipcs_data);
1548
1549         /* Let us version this interface... */
1550         if (ipcs.u64.ipcs_magic != IPCS_MAGIC) {
1551                 return(EINVAL);
1552         }
1553
1554         SYSV_SEM_SUBSYS_LOCK();
1555         switch(ipcs.u64.ipcs_op) {
1556         case IPCS_SEM_CONF:     /* Obtain global configuration data */
1557                 if (ipcs.u64.ipcs_datalen != sizeof(struct seminfo)) {
1558                         error = ERANGE;
1559                         break;
1560                 }
1561                 if (ipcs.u64.ipcs_cursor != 0) {        /* fwd. compat. */
1562                         error = EINVAL;
1563                         break;
1564                 }
1565                 error = copyout(&seminfo, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen);
1566                 break;
1567
1568         case IPCS_SEM_ITER:     /* Iterate over existing segments */
1569                 cursor = ipcs.u64.ipcs_cursor;
1570                 if (cursor < 0 || cursor >= seminfo.semmni) {
1571                         error = ERANGE;
1572                         break;
1573                 }
1574                 if (ipcs.u64.ipcs_datalen != (int)semid_ds_sz ) {
1575                         error = EINVAL;
1576                         break;
1577                 }
1578                 for( ; cursor < seminfo.semmni; cursor++) {
1579                         if (sema[cursor].u.sem_perm.mode & SEM_ALLOC)
1580                                 break;
1581                         continue;
1582                 }
1583                 if (cursor == seminfo.semmni) {
1584                         error = ENOENT;
1585                         break;
1586                 }
1587
1588                 semid_dsp = &sema[cursor].u;    /* default: 64 bit */
1589
1590                 /*
1591                  * If necessary, convert the 64 bit kernel segment
1592                  * descriptor to a 32 bit user one.
1593                  */
1594                 if (!IS_64BIT_PROCESS(p)) {
1595                         semid_ds_64to32(semid_dsp, &semid_ds32);
1596                         semid_dsp = &semid_ds32;
1597                 }
1598                 error = copyout(semid_dsp, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen);
1599                 if (!error) {
1600                         /* update cursor */
1601                         ipcs.u64.ipcs_cursor = cursor + 1;
1602                         error = SYSCTL_OUT(req, &ipcs, ipcs_sz);
1603                 }
1604                 break;
1605
1606         default:
1607                 error = EINVAL;
1608                 break;
1609         }
1610         SYSV_SEM_SUBSYS_UNLOCK();
1611         return(error);
1612 }
1613
1614 SYSCTL_DECL(_kern_sysv_ipcs);
1615 SYSCTL_PROC(_kern_sysv_ipcs, OID_AUTO, sem, CTLFLAG_RW|CTLFLAG_ANYBODY,
1616         0, 0, IPCS_sem_sysctl,
1617         "S,IPCS_sem_command",
1618         "ipcs sem command interface");
1619
1620 #endif /* SYSV_SEM */