bsd/kern/sysv_sem.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*
  23  * Implementation of SVID semaphores
  24  *
  25  * Author:  Daniel Boulet
  26  *
  27  * This software is provided ``AS IS'' without any warranties of any kind.
  28  */
  29 /*
  30  * John Bellardo modified the implementation for Darwin. 12/2000
  31  */
  32
  33 #include <sys/param.h>
  34 #include <sys/systm.h>
  35 #include <sys/kernel.h>
  36 #include <sys/proc_internal.h>
  37 #include <sys/kauth.h>
  38 #include <sys/sem_internal.h>
  39 #include <sys/malloc.h>
  40 #include <mach/mach_types.h>
  41
  42 #include <sys/filedesc.h>
  43 #include <sys/file_internal.h>
  44 #include <sys/sysctl.h>
  45 #include <sys/ipcs.h>
  46 #include <sys/sysent.h>
  47 #include <sys/sysproto.h>
  48
  49 #include <bsm/audit_kernel.h>
  50
  51
  52 /* Uncomment this line to see the debugging output */
  53 /* #define SEM_DEBUG */
  54
  55 #define M_SYSVSEM       M_TEMP
  56
  57
  58 /* Hard system limits to avoid resource starvation / DOS attacks.
  59  * These are not needed if we can make the semaphore pages swappable.
  60  */
  61 static struct seminfo limitseminfo = {
  62         SEMMAP,        /* # of entries in semaphore map */
  63         SEMMNI,        /* # of semaphore identifiers */
  64         SEMMNS,        /* # of semaphores in system */
  65         SEMMNU,        /* # of undo structures in system */
  66         SEMMSL,        /* max # of semaphores per id */
  67         SEMOPM,        /* max # of operations per semop call */
  68         SEMUME,        /* max # of undo entries per process */
  69         SEMUSZ,        /* size in bytes of undo structure */
  70         SEMVMX,        /* semaphore maximum value */
  71         SEMAEM         /* adjust on exit max value */
  72 };
  73
  74 /* Current system allocations.  We use this structure to track how many
  75  * resources we have allocated so far.  This way we can set large hard limits
  76  * and not allocate the memory for them up front.
  77  */
  78 struct seminfo seminfo = {
  79         SEMMAP, /* Unused, # of entries in semaphore map */
  80         0,      /* # of semaphore identifiers */
  81         0,      /* # of semaphores in system */
  82         0,      /* # of undo entries in system */
  83         SEMMSL, /* max # of semaphores per id */
  84         SEMOPM, /* max # of operations per semop call */
  85         SEMUME, /* max # of undo entries per process */
  86         SEMUSZ, /* size in bytes of undo structure */
  87         SEMVMX, /* semaphore maximum value */
  88         SEMAEM  /* adjust on exit max value */
  89 };
  90
  91
  92 static struct sem_undo *semu_alloc(struct proc *p);
  93 static int semundo_adjust(struct proc *p, struct sem_undo **supptr,
  94                 int semid, int semnum, int adjval);
  95 static void semundo_clear(int semid, int semnum);
  96
  97 /* XXX casting to (sy_call_t *) is bogus, as usual. */
  98 static sy_call_t *semcalls[] = {
  99         (sy_call_t *)semctl, (sy_call_t *)semget,
 100         (sy_call_t *)semop, (sy_call_t *)semconfig
 101 };
 102
 103 static int              semtot = 0;             /* # of used semaphores */
 104 struct user_semid_ds    *sema = NULL;           /* semaphore id pool */
 105 struct sem              *sem_pool =  NULL;      /* semaphore pool */
 106 static struct sem_undo  *semu_list = NULL;      /* active undo structures */
 107 struct sem_undo         *semu = NULL;           /* semaphore undo pool */
 108
 109
 110 void sysv_sem_lock_init(void);
 111 static lck_grp_t       *sysv_sem_subsys_lck_grp;
 112 static lck_grp_attr_t  *sysv_sem_subsys_lck_grp_attr;
 113 static lck_attr_t      *sysv_sem_subsys_lck_attr;
 114 static lck_mtx_t        sysv_sem_subsys_mutex;
 115
 116 #define SYSV_SEM_SUBSYS_LOCK() lck_mtx_lock(&sysv_sem_subsys_mutex)
 117 #define SYSV_SEM_SUBSYS_UNLOCK() lck_mtx_unlock(&sysv_sem_subsys_mutex)
 118
 119
 120 __private_extern__ void
 121 sysv_sem_lock_init( void )
 122 {
 123
 124     sysv_sem_subsys_lck_grp_attr = lck_grp_attr_alloc_init();
 125     lck_grp_attr_setstat(sysv_sem_subsys_lck_grp_attr);
 126
 127     sysv_sem_subsys_lck_grp = lck_grp_alloc_init("sysv_shm_subsys_lock", sysv_sem_subsys_lck_grp_attr);
 128
 129     sysv_sem_subsys_lck_attr = lck_attr_alloc_init();
 130     lck_attr_setdebug(sysv_sem_subsys_lck_attr);
 131     lck_mtx_init(&sysv_sem_subsys_mutex, sysv_sem_subsys_lck_grp, sysv_sem_subsys_lck_attr);
 132 }
 133
 134 static __inline__ user_time_t
 135 sysv_semtime(void)
 136 {
 137         struct timeval  tv;
 138         microtime(&tv);
 139         return (tv.tv_sec);
 140 }
 141
 142 /*
 143  * XXX conversion of internal user_time_t to external tume_t loses
 144  * XXX precision; not an issue for us now, since we are only ever
 145  * XXX setting 32 bits worth of time into it.
 146  *
 147  * pad field contents are not moved correspondingly; contents will be lost
 148  *
 149  * NOTE: Source and target may *NOT* overlap! (target is smaller)
 150  */
 151 static void
 152 semid_ds_64to32(struct user_semid_ds *in, struct semid_ds *out)
 153 {
 154         out->sem_perm = in->sem_perm;
 155         out->sem_base = (__int32_t)in->sem_base;
 156         out->sem_nsems = in->sem_nsems;
 157         out->sem_otime = in->sem_otime;         /* XXX loses precision */
 158         out->sem_ctime = in->sem_ctime;         /* XXX loses precision */
 159 }
 160
 161 /*
 162  * pad field contents are not moved correspondingly; contents will be lost
 163  *
 164  * NOTE: Source and target may are permitted to overlap! (source is smaller);
 165  * this works because we copy fields in order from the end of the struct to
 166  * the beginning.
 167  *
 168  * XXX use CAST_USER_ADDR_T() for lack of a CAST_USER_TIME_T(); net effect
 169  * XXX is the same.
 170  */
 171 static void
 172 semid_ds_32to64(struct semid_ds *in, struct user_semid_ds *out)
 173 {
 174         out->sem_ctime = in->sem_ctime;
 175         out->sem_otime = in->sem_otime;
 176         out->sem_nsems = in->sem_nsems;
 177         out->sem_base = (void *)in->sem_base;
 178         out->sem_perm = in->sem_perm;
 179 }
 180
 181
 182 /*
 183  * Entry point for all SEM calls
 184  *
 185  * In Darwin this is no longer the entry point.  It will be removed after
 186  *  the code has been tested better.
 187  */
 188 /* XXX actually varargs. */
 189 int
 190 semsys(struct proc *p, struct semsys_args *uap, register_t *retval)
 191 {
 192
 193         /* The individual calls handling the locking now */
 194
 195         if (uap->which >= sizeof(semcalls)/sizeof(semcalls[0]))
 196                 return (EINVAL);
 197         return ((*semcalls[uap->which])(p, &uap->a2, retval));
 198 }
 199
 200 /*
 201  * Lock or unlock the entire semaphore facility.
 202  *
 203  * This will probably eventually evolve into a general purpose semaphore
 204  * facility status enquiry mechanism (I don't like the "read /dev/kmem"
 205  * approach currently taken by ipcs and the amount of info that we want
 206  * to be able to extract for ipcs is probably beyond what the capability
 207  * of the getkerninfo facility.
 208  *
 209  * At the time that the current version of semconfig was written, ipcs is
 210  * the only user of the semconfig facility.  It uses it to ensure that the
 211  * semaphore facility data structures remain static while it fishes around
 212  * in /dev/kmem.
 213  */
 214
 215 int
 216 semconfig(__unused struct proc *p, struct semconfig_args *uap, register_t *retval)
 217 {
 218         int eval = 0;
 219
 220         switch (uap->flag) {
 221         case SEM_CONFIG_FREEZE:
 222                 SYSV_SEM_SUBSYS_LOCK();
 223                 break;
 224
 225         case SEM_CONFIG_THAW:
 226                 SYSV_SEM_SUBSYS_UNLOCK();
 227                 break;
 228
 229         default:
 230                 printf("semconfig: unknown flag parameter value (%d) - ignored\n",
 231                     uap->flag);
 232                 eval = EINVAL;
 233                 break;
 234         }
 235
 236         *retval = 0;
 237         return(eval);
 238 }
 239
 240 /*
 241  * Expand the semu array to the given capacity.  If the expansion fails
 242  * return 0, otherwise return 1.
 243  *
 244  * Assumes we already have the subsystem lock.
 245  */
 246 static int
 247 grow_semu_array(int newSize)
 248 {
 249         register int i;
 250         register struct sem_undo *newSemu;
 251
 252         if (newSize <= seminfo.semmnu)
 253                 return 1;
 254         if (newSize > limitseminfo.semmnu) /* enforce hard limit */
 255         {
 256 #ifdef SEM_DEBUG
 257                 printf("undo structure hard limit of %d reached, requested %d\n",
 258                         limitseminfo.semmnu, newSize);
 259 #endif
 260                 return 0;
 261         }
 262         newSize = (newSize/SEMMNU_INC + 1) * SEMMNU_INC;
 263         newSize = newSize > limitseminfo.semmnu ? limitseminfo.semmnu : newSize;
 264
 265 #ifdef SEM_DEBUG
 266         printf("growing semu[] from %d to %d\n", seminfo.semmnu, newSize);
 267 #endif
 268         MALLOC(newSemu, struct sem_undo *, sizeof (struct sem_undo) * newSize,
 269                M_SYSVSEM, M_WAITOK | M_ZERO);
 270         if (NULL == newSemu)
 271         {
 272 #ifdef SEM_DEBUG
 273                 printf("allocation failed.  no changes made.\n");
 274 #endif
 275                 return 0;
 276         }
 277
 278         /* copy the old data to the new array */
 279         for (i = 0; i < seminfo.semmnu; i++)
 280         {
 281                 newSemu[i] = semu[i];
 282         }
 283         /*
 284          * The new elements (from newSemu[i] to newSemu[newSize-1]) have their
 285          * "un_proc" set to 0 (i.e. NULL) by the M_ZERO flag to MALLOC() above,
 286          * so they're already marked as "not in use".
 287          */
 288
 289         /* Clean up the old array */
 290         if (semu)
 291                 FREE(semu, M_SYSVSEM);
 292
 293         semu = newSemu;
 294         seminfo.semmnu = newSize;
 295 #ifdef SEM_DEBUG
 296         printf("expansion successful\n");
 297 #endif
 298         return 1;
 299 }
 300
 301 /*
 302  * Expand the sema array to the given capacity.  If the expansion fails
 303  * we return 0, otherwise we return 1.
 304  *
 305  * Assumes we already have the subsystem lock.
 306  */
 307 static int
 308 grow_sema_array(int newSize)
 309 {
 310         register struct user_semid_ds *newSema;
 311         register int i;
 312
 313         if (newSize <= seminfo.semmni)
 314                 return 0;
 315         if (newSize > limitseminfo.semmni) /* enforce hard limit */
 316         {
 317 #ifdef SEM_DEBUG
 318                 printf("identifier hard limit of %d reached, requested %d\n",
 319                         limitseminfo.semmni, newSize);
 320 #endif
 321                 return 0;
 322         }
 323         newSize = (newSize/SEMMNI_INC + 1) * SEMMNI_INC;
 324         newSize = newSize > limitseminfo.semmni ? limitseminfo.semmni : newSize;
 325
 326 #ifdef SEM_DEBUG
 327         printf("growing sema[] from %d to %d\n", seminfo.semmni, newSize);
 328 #endif
 329         MALLOC(newSema, struct user_semid_ds *,
 330                sizeof (struct user_semid_ds) * newSize,
 331                M_SYSVSEM, M_WAITOK | M_ZERO);
 332         if (NULL == newSema)
 333         {
 334 #ifdef SEM_DEBUG
 335                 printf("allocation failed.  no changes made.\n");
 336 #endif
 337                 return 0;
 338         }
 339
 340         /* copy over the old ids */
 341         for (i = 0; i < seminfo.semmni; i++)
 342         {
 343                 newSema[i] = sema[i];
 344                 /* This is a hack.  What we really want to be able to
 345                  * do is change the value a process is waiting on
 346                  * without waking it up, but I don't know how to do
 347                  * this with the existing code, so we wake up the
 348                  * process and let it do a lot of work to determine the
 349                  * semaphore set is really not available yet, and then
 350                  * sleep on the correct, reallocated user_semid_ds pointer.
 351                  */
 352                 if (sema[i].sem_perm.mode & SEM_ALLOC)
 353                         wakeup((caddr_t)&sema[i]);
 354         }
 355         /*
 356          * The new elements (from newSema[i] to newSema[newSize-1]) have their
 357          * "sem_base" and "sem_perm.mode" set to 0 (i.e. NULL) by the M_ZERO
 358          * flag to MALLOC() above, so they're already marked as "not in use".
 359          */
 360
 361         /* Clean up the old array */
 362         if (sema)
 363                 FREE(sema, M_SYSVSEM);
 364
 365         sema = newSema;
 366         seminfo.semmni = newSize;
 367 #ifdef SEM_DEBUG
 368         printf("expansion successful\n");
 369 #endif
 370         return 1;
 371 }
 372
 373 /*
 374  * Expand the sem_pool array to the given capacity.  If the expansion fails
 375  * we return 0 (fail), otherwise we return 1 (success).
 376  *
 377  * Assumes we already hold the subsystem lock.
 378  */
 379 static int
 380 grow_sem_pool(int new_pool_size)
 381 {
 382         struct sem *new_sem_pool = NULL;
 383         struct sem *sem_free;
 384         int i;
 385
 386         if (new_pool_size < semtot)
 387                 return 0;
 388         /* enforce hard limit */
 389         if (new_pool_size > limitseminfo.semmns) {
 390 #ifdef SEM_DEBUG
 391                 printf("semaphore hard limit of %d reached, requested %d\n",
 392                         limitseminfo.semmns, new_pool_size);
 393 #endif
 394                 return 0;
 395         }
 396
 397         new_pool_size = (new_pool_size/SEMMNS_INC + 1) * SEMMNS_INC;
 398         new_pool_size = new_pool_size > limitseminfo.semmns ? limitseminfo.semmns : new_pool_size;
 399
 400 #ifdef SEM_DEBUG
 401         printf("growing sem_pool array from %d to %d\n", seminfo.semmns, new_pool_size);
 402 #endif
 403         MALLOC(new_sem_pool, struct sem *, sizeof (struct sem) * new_pool_size,
 404                M_SYSVSEM, M_WAITOK | M_ZERO);
 405         if (NULL == new_sem_pool) {
 406 #ifdef SEM_DEBUG
 407                 printf("allocation failed.  no changes made.\n");
 408 #endif
 409                 return 0;
 410         }
 411
 412         /* We have our new memory, now copy the old contents over */
 413         if (sem_pool)
 414                 for(i = 0; i < seminfo.semmns; i++)
 415                         new_sem_pool[i] = sem_pool[i];
 416
 417         /* Update our id structures to point to the new semaphores */
 418         for(i = 0; i < seminfo.semmni; i++) {
 419                 if (sema[i].sem_perm.mode & SEM_ALLOC)  /* ID in use */
 420                         sema[i].sem_base += (new_sem_pool - sem_pool);
 421         }
 422
 423         sem_free = sem_pool;
 424         sem_pool = new_sem_pool;
 425
 426         /* clean up the old array */
 427         if (sem_free != NULL)
 428                 FREE(sem_free, M_SYSVSEM);
 429
 430         seminfo.semmns = new_pool_size;
 431 #ifdef SEM_DEBUG
 432         printf("expansion complete\n");
 433 #endif
 434         return 1;
 435 }
 436
 437 /*
 438  * Allocate a new sem_undo structure for a process
 439  * (returns ptr to structure or NULL if no more room)
 440  *
 441  * Assumes we already hold the subsystem lock.
 442  */
 443
 444 static struct sem_undo *
 445 semu_alloc(struct proc *p)
 446 {
 447         register int i;
 448         register struct sem_undo *suptr;
 449         register struct sem_undo **supptr;
 450         int attempt;
 451
 452         /*
 453          * Try twice to allocate something.
 454          * (we'll purge any empty structures after the first pass so
 455          * two passes are always enough)
 456          */
 457
 458         for (attempt = 0; attempt < 2; attempt++) {
 459                 /*
 460                  * Look for a free structure.
 461                  * Fill it in and return it if we find one.
 462                  */
 463
 464                 for (i = 0; i < seminfo.semmnu; i++) {
 465                         suptr = SEMU(i);
 466                         if (suptr->un_proc == NULL) {
 467                                 suptr->un_next = semu_list;
 468                                 semu_list = suptr;
 469                                 suptr->un_cnt = 0;
 470                                 suptr->un_ent = NULL;
 471                                 suptr->un_proc = p;
 472                                 return(suptr);
 473                         }
 474                 }
 475
 476                 /*
 477                  * We didn't find a free one, if this is the first attempt
 478                  * then try to free some structures.
 479                  */
 480
 481                 if (attempt == 0) {
 482                         /* All the structures are in use - try to free some */
 483                         int did_something = 0;
 484
 485                         supptr = &semu_list;
 486                         while ((suptr = *supptr) != NULL) {
 487                                 if (suptr->un_cnt == 0)  {
 488                                         suptr->un_proc = NULL;
 489                                         *supptr = suptr->un_next;
 490                                         did_something = 1;
 491                                 } else
 492                                         supptr = &(suptr->un_next);
 493                         }
 494
 495                         /* If we didn't free anything. Try expanding
 496                          * the semu[] array.  If that doesn't work
 497                          * then fail.  We expand last to get the
 498                          * most reuse out of existing resources.
 499                          */
 500                         if (!did_something)
 501                                 if (!grow_semu_array(seminfo.semmnu + 1))
 502                                         return(NULL);
 503                 } else {
 504                         /*
 505                          * The second pass failed even though we freed
 506                          * something after the first pass!
 507                          * This is IMPOSSIBLE!
 508                          */
 509                         panic("semu_alloc - second attempt failed");
 510                 }
 511         }
 512         return (NULL);
 513 }
 514
 515 /*
 516  * Adjust a particular entry for a particular proc
 517  *
 518  * Assumes we already hold the subsystem lock.
 519  */
 520 static int
 521 semundo_adjust(struct proc *p, struct sem_undo **supptr, int semid,
 522         int semnum, int adjval)
 523 {
 524         register struct sem_undo *suptr;
 525         register struct undo *sueptr, **suepptr, *new_sueptr;
 526         int i;
 527
 528         /*
 529          * Look for and remember the sem_undo if the caller doesn't provide it
 530          */
 531
 532         suptr = *supptr;
 533         if (suptr == NULL) {
 534                 for (suptr = semu_list; suptr != NULL;
 535                     suptr = suptr->un_next) {
 536                         if (suptr->un_proc == p) {
 537                                 *supptr = suptr;
 538                                 break;
 539                         }
 540                 }
 541                 if (suptr == NULL) {
 542                         if (adjval == 0)
 543                                 return(0);
 544                         suptr = semu_alloc(p);
 545                         if (suptr == NULL)
 546                                 return(ENOSPC);
 547                         *supptr = suptr;
 548                 }
 549         }
 550
 551         /*
 552          * Look for the requested entry and adjust it (delete if adjval becomes
 553          * 0).
 554          */
 555         new_sueptr = NULL;
 556         for (i = 0, suepptr = &suptr->un_ent, sueptr = suptr->un_ent;
 557              i < suptr->un_cnt;
 558              i++, suepptr = &sueptr->une_next, sueptr = sueptr->une_next) {
 559                 if (sueptr->une_id != semid || sueptr->une_num != semnum)
 560                         continue;
 561                 if (adjval == 0)
 562                         sueptr->une_adjval = 0;
 563                 else
 564                         sueptr->une_adjval += adjval;
 565                 if (sueptr->une_adjval == 0) {
 566                         suptr->un_cnt--;
 567                         *suepptr = sueptr->une_next;
 568                         FREE(sueptr, M_SYSVSEM);
 569                         sueptr = NULL;
 570                 }
 571                 return 0;
 572         }
 573
 574         /* Didn't find the right entry - create it */
 575         if (adjval == 0) {
 576                 /* no adjustment: no need for a new entry */
 577                 return 0;
 578         }
 579
 580         if (suptr->un_cnt == limitseminfo.semume) {
 581                 /* reached the limit number of semaphore undo entries */
 582                 return EINVAL;
 583         }
 584
 585         /* allocate a new semaphore undo entry */
 586         MALLOC(new_sueptr, struct undo *, sizeof (struct undo),
 587                M_SYSVSEM, M_WAITOK);
 588         if (new_sueptr == NULL) {
 589                 return ENOMEM;
 590         }
 591
 592         /* fill in the new semaphore undo entry */
 593         new_sueptr->une_next = suptr->un_ent;
 594         suptr->un_ent = new_sueptr;
 595         suptr->un_cnt++;
 596         new_sueptr->une_adjval = adjval;
 597         new_sueptr->une_id = semid;
 598         new_sueptr->une_num = semnum;
 599
 600         return 0;
 601 }
 602
 603 /* Assumes we already hold the subsystem lock.
 604  */
 605 static void
 606 semundo_clear(int semid, int semnum)
 607 {
 608         struct sem_undo *suptr;
 609
 610         for (suptr = semu_list; suptr != NULL; suptr = suptr->un_next) {
 611                 struct undo *sueptr;
 612                 struct undo **suepptr;
 613                 int i = 0;
 614
 615                 sueptr = suptr->un_ent;
 616                 suepptr = &suptr->un_ent;
 617                 while (i < suptr->un_cnt) {
 618                         if (sueptr->une_id == semid) {
 619                                 if (semnum == -1 || sueptr->une_num == semnum) {
 620                                         suptr->un_cnt--;
 621                                         *suepptr = sueptr->une_next;
 622                                         FREE(sueptr, M_SYSVSEM);
 623                                         sueptr = *suepptr;
 624                                         continue;
 625                                 }
 626                                 if (semnum != -1)
 627                                         break;
 628                         }
 629                         i++;
 630                         suepptr = &sueptr->une_next;
 631                         sueptr = sueptr->une_next;
 632                 }
 633         }
 634 }
 635
 636 /*
 637  * Note that the user-mode half of this passes a union coerced to a
 638  * user_addr_t.  The union contains either an int or a pointer, and
 639  * so we have to coerce it back, variant on whether the calling
 640  * process is 64 bit or not.  The coercion works for the 'val' element
 641  * because the alignment is the same in user and kernel space.
 642  */
 643 int
 644 semctl(struct proc *p, struct semctl_args *uap, register_t *retval)
 645 {
 646         int semid = uap->semid;
 647         int semnum = uap->semnum;
 648         int cmd = uap->cmd;
 649         user_semun_t user_arg = (user_semun_t)uap->arg;
 650         kauth_cred_t cred = kauth_cred_get();
 651         int i, rval, eval;
 652         struct user_semid_ds sbuf;
 653         struct user_semid_ds *semaptr;
 654         struct user_semid_ds uds;
 655
 656
 657         AUDIT_ARG(svipc_cmd, cmd);
 658         AUDIT_ARG(svipc_id, semid);
 659
 660         SYSV_SEM_SUBSYS_LOCK();
 661
 662 #ifdef SEM_DEBUG
 663         printf("call to semctl(%d, %d, %d, 0x%qx)\n", semid, semnum, cmd, user_arg);
 664 #endif
 665
 666         semid = IPCID_TO_IX(semid);
 667
 668         if (semid < 0 || semid >= seminfo.semmni) {
 669 #ifdef SEM_DEBUG
 670                 printf("Invalid semid\n");
 671 #endif
 672                 eval = EINVAL;
 673                 goto semctlout;
 674         }
 675
 676         semaptr = &sema[semid];
 677         if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
 678             semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid)) {
 679                 eval = EINVAL;
 680                 goto semctlout;
 681         }
 682
 683         eval = 0;
 684         rval = 0;
 685
 686         switch (cmd) {
 687         case IPC_RMID:
 688                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_M)))
 689                         goto semctlout;
 690
 691                 semaptr->sem_perm.cuid = kauth_cred_getuid(cred);
 692                 semaptr->sem_perm.uid = kauth_cred_getuid(cred);
 693                 semtot -= semaptr->sem_nsems;
 694                 for (i = semaptr->sem_base - sem_pool; i < semtot; i++)
 695                         sem_pool[i] = sem_pool[i + semaptr->sem_nsems];
 696                 for (i = 0; i < seminfo.semmni; i++) {
 697                         if ((sema[i].sem_perm.mode & SEM_ALLOC) &&
 698                             sema[i].sem_base > semaptr->sem_base)
 699                                 sema[i].sem_base -= semaptr->sem_nsems;
 700                 }
 701                 semaptr->sem_perm.mode = 0;
 702                 semundo_clear(semid, -1);
 703                 wakeup((caddr_t)semaptr);
 704                 break;
 705
 706         case IPC_SET:
 707                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_M)))
 708                                 goto semctlout;
 709
 710                 if (IS_64BIT_PROCESS(p)) {
 711                         eval = copyin(user_arg.buf, &sbuf, sizeof(struct user_semid_ds));
 712                 } else {
 713                         eval = copyin(user_arg.buf, &sbuf, sizeof(struct semid_ds));
 714                         /* convert in place; ugly, but safe */
 715                         semid_ds_32to64((struct semid_ds *)&sbuf, &sbuf);
 716                 }
 717
 718                 if (eval != 0) {
 719                         goto semctlout;
 720                 }
 721
 722                 semaptr->sem_perm.uid = sbuf.sem_perm.uid;
 723                 semaptr->sem_perm.gid = sbuf.sem_perm.gid;
 724                 semaptr->sem_perm.mode = (semaptr->sem_perm.mode & ~0777) |
 725                     (sbuf.sem_perm.mode & 0777);
 726                 semaptr->sem_ctime = sysv_semtime();
 727                 break;
 728
 729         case IPC_STAT:
 730                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
 731                                 goto semctlout;
 732                 bcopy(semaptr, &uds, sizeof(struct user_semid_ds));
 733                 if (IS_64BIT_PROCESS(p)) {
 734                         eval = copyout(&uds, user_arg.buf, sizeof(struct user_semid_ds));
 735                 } else {
 736                         struct semid_ds semid_ds32;
 737                         semid_ds_64to32(&uds, &semid_ds32);
 738                         eval = copyout(&semid_ds32, user_arg.buf, sizeof(struct semid_ds));
 739                 }
 740                 break;
 741
 742         case GETNCNT:
 743                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
 744                                 goto semctlout;
 745                 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
 746                         eval = EINVAL;
 747                         goto semctlout;
 748                 }
 749                 rval = semaptr->sem_base[semnum].semncnt;
 750                 break;
 751
 752         case GETPID:
 753                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
 754                                 goto semctlout;
 755                 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
 756                         eval = EINVAL;
 757                         goto semctlout;
 758                 }
 759                 rval = semaptr->sem_base[semnum].sempid;
 760                 break;
 761
 762         case GETVAL:
 763                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
 764                                 goto semctlout;
 765                 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
 766                         eval = EINVAL;
 767                         goto semctlout;
 768                 }
 769                 rval = semaptr->sem_base[semnum].semval;
 770                 break;
 771
 772         case GETALL:
 773                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
 774                                 goto semctlout;
 775 /* XXXXXXXXXXXXXXXX TBD XXXXXXXXXXXXXXXX */
 776                 for (i = 0; i < semaptr->sem_nsems; i++) {
 777                         /* XXX could be done in one go... */
 778                         eval = copyout((caddr_t)&semaptr->sem_base[i].semval,
 779                             user_arg.array + (i * sizeof(unsigned short)),
 780                             sizeof(unsigned short));
 781                         if (eval != 0)
 782                                 break;
 783                 }
 784                 break;
 785
 786         case GETZCNT:
 787                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_R)))
 788                                 goto semctlout;
 789                 if (semnum < 0 || semnum >= semaptr->sem_nsems) {
 790                         eval = EINVAL;
 791                         goto semctlout;
 792                 }
 793                 rval = semaptr->sem_base[semnum].semzcnt;
 794                 break;
 795
 796         case SETVAL:
 797                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
 798                 {
 799 #ifdef SEM_DEBUG
 800                         printf("Invalid credentials for write\n");
 801 #endif
 802                                 goto semctlout;
 803                 }
 804                 if (semnum < 0 || semnum >= semaptr->sem_nsems)
 805                 {
 806 #ifdef SEM_DEBUG
 807                         printf("Invalid number out of range for set\n");
 808 #endif
 809                         eval = EINVAL;
 810                         goto semctlout;
 811                 }
 812                 /*
 813                  * Cast down a pointer instead of using 'val' member directly
 814                  * to avoid introducing endieness and a pad field into the
 815                  * header file.  Ugly, but it works.
 816                  */
 817                 semaptr->sem_base[semnum].semval = CAST_DOWN(int,user_arg.buf);
 818                 semundo_clear(semid, semnum);
 819                 wakeup((caddr_t)semaptr);
 820                 break;
 821
 822         case SETALL:
 823                 if ((eval = ipcperm(cred, &semaptr->sem_perm, IPC_W)))
 824                                 goto semctlout;
 825 /*** XXXXXXXXXXXX TBD ********/
 826                 for (i = 0; i < semaptr->sem_nsems; i++) {
 827                         /* XXX could be done in one go... */
 828                         eval = copyin(user_arg.array + (i * sizeof(unsigned short)),
 829                             (caddr_t)&semaptr->sem_base[i].semval,
 830                             sizeof(unsigned short));
 831                         if (eval != 0)
 832                                 break;
 833                 }
 834                 semundo_clear(semid, -1);
 835                 wakeup((caddr_t)semaptr);
 836                 break;
 837
 838         default:
 839                         eval = EINVAL;
 840                         goto semctlout;
 841         }
 842
 843         if (eval == 0)
 844                 *retval = rval;
 845 semctlout:
 846         SYSV_SEM_SUBSYS_UNLOCK();
 847         return(eval);
 848 }
 849
 850 int
 851 semget(__unused struct proc *p, struct semget_args *uap, register_t *retval)
 852 {
 853         int semid, eval;
 854         int key = uap->key;
 855         int nsems = uap->nsems;
 856         int semflg = uap->semflg;
 857         kauth_cred_t cred = kauth_cred_get();
 858
 859 #ifdef SEM_DEBUG
 860         if (key != IPC_PRIVATE)
 861                 printf("semget(0x%x, %d, 0%o)\n", key, nsems, semflg);
 862         else
 863                 printf("semget(IPC_PRIVATE, %d, 0%o)\n", nsems, semflg);
 864 #endif
 865
 866
 867         SYSV_SEM_SUBSYS_LOCK();
 868
 869
 870         if (key != IPC_PRIVATE) {
 871                 for (semid = 0; semid < seminfo.semmni; semid++) {
 872                         if ((sema[semid].sem_perm.mode & SEM_ALLOC) &&
 873                             sema[semid].sem_perm.key == key)
 874                                 break;
 875                 }
 876                 if (semid < seminfo.semmni) {
 877 #ifdef SEM_DEBUG
 878                         printf("found public key\n");
 879 #endif
 880                         if ((eval = ipcperm(cred, &sema[semid].sem_perm,
 881                             semflg & 0700)))
 882                                 goto semgetout;
 883                         if (nsems < 0 || sema[semid].sem_nsems < nsems) {
 884 #ifdef SEM_DEBUG
 885                                 printf("too small\n");
 886 #endif
 887                                 eval = EINVAL;
 888                                 goto semgetout;
 889                         }
 890                         if ((semflg & IPC_CREAT) && (semflg & IPC_EXCL)) {
 891 #ifdef SEM_DEBUG
 892                                 printf("not exclusive\n");
 893 #endif
 894                                 eval = EEXIST;
 895                                 goto semgetout;
 896                         }
 897                         goto found;
 898                 }
 899         }
 900
 901 #ifdef SEM_DEBUG
 902         printf("need to allocate an id for the request\n");
 903 #endif
 904         if (key == IPC_PRIVATE || (semflg & IPC_CREAT)) {
 905                 if (nsems <= 0 || nsems > limitseminfo.semmsl) {
 906 #ifdef SEM_DEBUG
 907                         printf("nsems out of range (0<%d<=%d)\n", nsems,
 908                             seminfo.semmsl);
 909 #endif
 910                         eval = EINVAL;
 911                         goto semgetout;
 912                 }
 913                 if (nsems > seminfo.semmns - semtot) {
 914 #ifdef SEM_DEBUG
 915                         printf("not enough semaphores left (need %d, got %d)\n",
 916                             nsems, seminfo.semmns - semtot);
 917 #endif
 918                         if (!grow_sem_pool(semtot + nsems)) {
 919 #ifdef SEM_DEBUG
 920                                 printf("failed to grow the sem array\n");
 921 #endif
 922                                 eval = ENOSPC;
 923                                 goto semgetout;
 924                         }
 925                 }
 926                 for (semid = 0; semid < seminfo.semmni; semid++) {
 927                         if ((sema[semid].sem_perm.mode & SEM_ALLOC) == 0)
 928                                 break;
 929                 }
 930                 if (semid == seminfo.semmni) {
 931 #ifdef SEM_DEBUG
 932                         printf("no more id's available\n");
 933 #endif
 934                         if (!grow_sema_array(seminfo.semmni + 1))
 935                         {
 936 #ifdef SEM_DEBUG
 937                                 printf("failed to grow sema array\n");
 938 #endif
 939                                 eval = ENOSPC;
 940                                 goto semgetout;
 941                         }
 942                 }
 943 #ifdef SEM_DEBUG
 944                 printf("semid %d is available\n", semid);
 945 #endif
 946                 sema[semid].sem_perm.key = key;
 947                 sema[semid].sem_perm.cuid = kauth_cred_getuid(cred);
 948                 sema[semid].sem_perm.uid = kauth_cred_getuid(cred);
 949                 sema[semid].sem_perm.cgid = cred->cr_gid;
 950                 sema[semid].sem_perm.gid = cred->cr_gid;
 951                 sema[semid].sem_perm.mode = (semflg & 0777) | SEM_ALLOC;
 952                 sema[semid].sem_perm.seq =
 953                     (sema[semid].sem_perm.seq + 1) & 0x7fff;
 954                 sema[semid].sem_nsems = nsems;
 955                 sema[semid].sem_otime = 0;
 956                 sema[semid].sem_ctime = sysv_semtime();
 957                 sema[semid].sem_base = &sem_pool[semtot];
 958                 semtot += nsems;
 959                 bzero(sema[semid].sem_base,
 960                     sizeof(sema[semid].sem_base[0])*nsems);
 961 #ifdef SEM_DEBUG
 962                 printf("sembase = 0x%x, next = 0x%x\n", sema[semid].sem_base,
 963                     &sem_pool[semtot]);
 964 #endif
 965         } else {
 966 #ifdef SEM_DEBUG
 967                 printf("didn't find it and wasn't asked to create it\n");
 968 #endif
 969                 eval = ENOENT;
 970                 goto semgetout;
 971         }
 972
 973 found:
 974         *retval = IXSEQ_TO_IPCID(semid, sema[semid].sem_perm);
 975         AUDIT_ARG(svipc_id, *retval);
 976 #ifdef SEM_DEBUG
 977         printf("semget is done, returning %d\n", *retval);
 978 #endif
 979         eval = 0;
 980
 981 semgetout:
 982         SYSV_SEM_SUBSYS_UNLOCK();
 983         return(eval);
 984 }
 985
 986 int
 987 semop(struct proc *p, struct semop_args *uap, register_t *retval)
 988 {
 989         int semid = uap->semid;
 990         int nsops = uap->nsops;
 991         struct sembuf sops[MAX_SOPS];
 992         register struct user_semid_ds *semaptr;
 993         register struct sembuf *sopptr = NULL;  /* protected by 'semptr' */
 994         register struct sem *semptr = NULL;     /* protected by 'if' */
 995         struct sem_undo *suptr = NULL;
 996         int i, j, eval;
 997         int do_wakeup, do_undos;
 998
 999         AUDIT_ARG(svipc_id, uap->semid);
1000
1001         SYSV_SEM_SUBSYS_LOCK();
1002
1003 #ifdef SEM_DEBUG
1004         printf("call to semop(%d, 0x%x, %d)\n", semid, sops, nsops);
1005 #endif
1006
1007         semid = IPCID_TO_IX(semid);     /* Convert back to zero origin */
1008
1009         if (semid < 0 || semid >= seminfo.semmni) {
1010                 eval = EINVAL;
1011                 goto semopout;
1012         }
1013
1014         semaptr = &sema[semid];
1015         if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0) {
1016                 eval = EINVAL;
1017                 goto semopout;
1018         }
1019         if (semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid)) {
1020                 eval = EINVAL;
1021                 goto semopout;
1022         }
1023
1024         if ((eval = ipcperm(kauth_cred_get(), &semaptr->sem_perm, IPC_W))) {
1025 #ifdef SEM_DEBUG
1026                 printf("eval = %d from ipaccess\n", eval);
1027 #endif
1028                 goto semopout;
1029         }
1030
1031         if (nsops < 0 || nsops > MAX_SOPS) {
1032 #ifdef SEM_DEBUG
1033                 printf("too many sops (max=%d, nsops=%d)\n", MAX_SOPS, nsops);
1034 #endif
1035                 eval = E2BIG;
1036                 goto semopout;
1037         }
1038
1039         /*  OK for LP64, since sizeof(struct sembuf) is currently invariant */
1040         if ((eval = copyin(uap->sops, &sops, nsops * sizeof(struct sembuf))) != 0) {
1041 #ifdef SEM_DEBUG
1042                 printf("eval = %d from copyin(%08x, %08x, %ld)\n", eval,
1043                     uap->sops, &sops, nsops * sizeof(struct sembuf));
1044 #endif
1045                 goto semopout;
1046         }
1047
1048         /*
1049          * Loop trying to satisfy the vector of requests.
1050          * If we reach a point where we must wait, any requests already
1051          * performed are rolled back and we go to sleep until some other
1052          * process wakes us up.  At this point, we start all over again.
1053          *
1054          * This ensures that from the perspective of other tasks, a set
1055          * of requests is atomic (never partially satisfied).
1056          */
1057         do_undos = 0;
1058
1059         for (;;) {
1060                 do_wakeup = 0;
1061
1062                 for (i = 0; i < nsops; i++) {
1063                         sopptr = &sops[i];
1064
1065                         if (sopptr->sem_num >= semaptr->sem_nsems) {
1066                                 eval = EFBIG;
1067                                 goto semopout;
1068                         }
1069
1070                         semptr = &semaptr->sem_base[sopptr->sem_num];
1071
1072 #ifdef SEM_DEBUG
1073                         printf("semop:  semaptr=%x, sem_base=%x, semptr=%x, sem[%d]=%d : op=%d, flag=%s\n",
1074                             semaptr, semaptr->sem_base, semptr,
1075                             sopptr->sem_num, semptr->semval, sopptr->sem_op,
1076                             (sopptr->sem_flg & IPC_NOWAIT) ? "nowait" : "wait");
1077 #endif
1078
1079                         if (sopptr->sem_op < 0) {
1080                                 if (semptr->semval + sopptr->sem_op < 0) {
1081 #ifdef SEM_DEBUG
1082                                         printf("semop:  can't do it now\n");
1083 #endif
1084                                         break;
1085                                 } else {
1086                                         semptr->semval += sopptr->sem_op;
1087                                         if (semptr->semval == 0 &&
1088                                             semptr->semzcnt > 0)
1089                                                 do_wakeup = 1;
1090                                 }
1091                                 if (sopptr->sem_flg & SEM_UNDO)
1092                                         do_undos = 1;
1093                         } else if (sopptr->sem_op == 0) {
1094                                 if (semptr->semval > 0) {
1095 #ifdef SEM_DEBUG
1096                                         printf("semop:  not zero now\n");
1097 #endif
1098                                         break;
1099                                 }
1100                         } else {
1101                                 if (semptr->semncnt > 0)
1102                                         do_wakeup = 1;
1103                                 semptr->semval += sopptr->sem_op;
1104                                 if (sopptr->sem_flg & SEM_UNDO)
1105                                         do_undos = 1;
1106                         }
1107                 }
1108
1109                 /*
1110                  * Did we get through the entire vector?
1111                  */
1112                 if (i >= nsops)
1113                         goto done;
1114
1115                 /*
1116                  * No ... rollback anything that we've already done
1117                  */
1118 #ifdef SEM_DEBUG
1119                 printf("semop:  rollback 0 through %d\n", i-1);
1120 #endif
1121                 for (j = 0; j < i; j++)
1122                         semaptr->sem_base[sops[j].sem_num].semval -=
1123                             sops[j].sem_op;
1124
1125                 /*
1126                  * If the request that we couldn't satisfy has the
1127                  * NOWAIT flag set then return with EAGAIN.
1128                  */
1129                 if (sopptr->sem_flg & IPC_NOWAIT) {
1130                         eval = EAGAIN;
1131                         goto semopout;
1132                 }
1133
1134                 if (sopptr->sem_op == 0)
1135                         semptr->semzcnt++;
1136                 else
1137                         semptr->semncnt++;
1138
1139 #ifdef SEM_DEBUG
1140                 printf("semop:  good night!\n");
1141 #endif
1142                 /* Release our lock on the semaphore subsystem so
1143                  * another thread can get at the semaphore we are
1144                  * waiting for. We will get the lock back after we
1145                  * wake up.
1146                  */
1147                 eval = msleep((caddr_t)semaptr, &sysv_sem_subsys_mutex , (PZERO - 4) | PCATCH,
1148                     "semwait", 0);
1149
1150 #ifdef SEM_DEBUG
1151                 printf("semop:  good morning (eval=%d)!\n", eval);
1152 #endif
1153                 if (eval != 0) {
1154                         eval = EINTR;
1155                 }
1156
1157                 /*
1158                  * IMPORTANT: while we were asleep, the semaphore array might
1159                  * have been reallocated somewhere else (see grow_sema_array()).
1160                  * When we wake up, we have to re-lookup the semaphore
1161                  * structures and re-validate them.
1162                  */
1163
1164                 suptr = NULL;   /* sem_undo may have been reallocated */
1165                 semaptr = &sema[semid];    /* sema may have been reallocated */
1166
1167                 /*
1168                  * Make sure that the semaphore still exists
1169                  */
1170                 if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0 ||
1171                     semaptr->sem_perm.seq != IPCID_TO_SEQ(uap->semid) ||
1172                     sopptr->sem_num >= semaptr->sem_nsems) {
1173                         if (eval == EINTR) {
1174                                 /*
1175                                  * EINTR takes precedence over the fact that
1176                                  * the semaphore disappeared while we were
1177                                  * sleeping...
1178                                  */
1179                         } else {
1180                                 /*
1181                                  * The man page says to return EIDRM.
1182                                  * Unfortunately, BSD doesn't define that code!
1183                                  */
1184 #ifdef EIDRM
1185                                 eval = EIDRM;
1186 #else
1187                                 eval = EINVAL;
1188 #endif
1189                         }
1190                         goto semopout;
1191                 }
1192
1193                 /*
1194                  * The semaphore is still alive.  Readjust the count of
1195                  * waiting processes. semptr needs to be recomputed
1196                  * because the sem[] may have been reallocated while
1197                  * we were sleeping, updating our sem_base pointer.
1198                  */
1199                 semptr = &semaptr->sem_base[sopptr->sem_num];
1200                 if (sopptr->sem_op == 0)
1201                         semptr->semzcnt--;
1202                 else
1203                         semptr->semncnt--;
1204
1205                 if (eval != 0) { /* EINTR */
1206                         goto semopout;
1207                 }
1208         }
1209
1210 done:
1211         /*
1212          * Process any SEM_UNDO requests.
1213          */
1214         if (do_undos) {
1215                 for (i = 0; i < nsops; i++) {
1216                         /*
1217                          * We only need to deal with SEM_UNDO's for non-zero
1218                          * op's.
1219                          */
1220                         int adjval;
1221
1222                         if ((sops[i].sem_flg & SEM_UNDO) == 0)
1223                                 continue;
1224                         adjval = sops[i].sem_op;
1225                         if (adjval == 0)
1226                                 continue;
1227                         eval = semundo_adjust(p, &suptr, semid,
1228                             sops[i].sem_num, -adjval);
1229                         if (eval == 0)
1230                                 continue;
1231
1232                         /*
1233                          * Oh-Oh!  We ran out of either sem_undo's or undo's.
1234                          * Rollback the adjustments to this point and then
1235                          * rollback the semaphore ups and down so we can return
1236                          * with an error with all structures restored.  We
1237                          * rollback the undo's in the exact reverse order that
1238                          * we applied them.  This guarantees that we won't run
1239                          * out of space as we roll things back out.
1240                          */
1241                         for (j = i - 1; j >= 0; j--) {
1242                                 if ((sops[j].sem_flg & SEM_UNDO) == 0)
1243                                         continue;
1244                                 adjval = sops[j].sem_op;
1245                                 if (adjval == 0)
1246                                         continue;
1247                                 if (semundo_adjust(p, &suptr, semid,
1248                                     sops[j].sem_num, adjval) != 0)
1249                                         panic("semop - can't undo undos");
1250                         }
1251
1252                         for (j = 0; j < nsops; j++)
1253                                 semaptr->sem_base[sops[j].sem_num].semval -=
1254                                     sops[j].sem_op;
1255
1256 #ifdef SEM_DEBUG
1257                         printf("eval = %d from semundo_adjust\n", eval);
1258 #endif
1259                         goto semopout;
1260                 } /* loop through the sops */
1261         } /* if (do_undos) */
1262
1263         /* We're definitely done - set the sempid's */
1264         for (i = 0; i < nsops; i++) {
1265                 sopptr = &sops[i];
1266                 semptr = &semaptr->sem_base[sopptr->sem_num];
1267                 semptr->sempid = p->p_pid;
1268         }
1269
1270         if (do_wakeup) {
1271 #ifdef SEM_DEBUG
1272                 printf("semop:  doing wakeup\n");
1273 #ifdef SEM_WAKEUP
1274                 sem_wakeup((caddr_t)semaptr);
1275 #else
1276                 wakeup((caddr_t)semaptr);
1277 #endif
1278                 printf("semop:  back from wakeup\n");
1279 #else
1280                 wakeup((caddr_t)semaptr);
1281 #endif
1282         }
1283 #ifdef SEM_DEBUG
1284         printf("semop:  done\n");
1285 #endif
1286         *retval = 0;
1287         eval = 0;
1288 semopout:
1289         SYSV_SEM_SUBSYS_UNLOCK();
1290         return(eval);
1291 }
1292
1293 /*
1294  * Go through the undo structures for this process and apply the adjustments to
1295  * semaphores.
1296  */
1297 void
1298 semexit(struct proc *p)
1299 {
1300         register struct sem_undo *suptr;
1301         register struct sem_undo **supptr;
1302         int did_something;
1303
1304         /* If we have not allocated our semaphores yet there can't be
1305          * anything to undo, but we need the lock to prevent
1306          * dynamic memory race conditions.
1307          */
1308         SYSV_SEM_SUBSYS_LOCK();
1309
1310         if (!sem_pool)
1311         {
1312                 SYSV_SEM_SUBSYS_UNLOCK();
1313                 return;
1314         }
1315         did_something = 0;
1316
1317         /*
1318          * Go through the chain of undo vectors looking for one
1319          * associated with this process.
1320          */
1321
1322         for (supptr = &semu_list; (suptr = *supptr) != NULL;
1323             supptr = &suptr->un_next) {
1324                 if (suptr->un_proc == p)
1325                         break;
1326         }
1327
1328         if (suptr == NULL)
1329                 goto unlock;
1330
1331 #ifdef SEM_DEBUG
1332         printf("proc @%08x has undo structure with %d entries\n", p,
1333             suptr->un_cnt);
1334 #endif
1335
1336         /*
1337          * If there are any active undo elements then process them.
1338          */
1339         if (suptr->un_cnt > 0) {
1340                 while (suptr->un_ent != NULL) {
1341                         struct undo *sueptr;
1342                         int semid;
1343                         int semnum;
1344                         int adjval;
1345                         struct user_semid_ds *semaptr;
1346
1347                         sueptr = suptr->un_ent;
1348                         semid = sueptr->une_id;
1349                         semnum = sueptr->une_num;
1350                         adjval = sueptr->une_adjval;
1351
1352                         semaptr = &sema[semid];
1353                         if ((semaptr->sem_perm.mode & SEM_ALLOC) == 0)
1354                                 panic("semexit - semid not allocated");
1355                         if (semnum >= semaptr->sem_nsems)
1356                                 panic("semexit - semnum out of range");
1357
1358 #ifdef SEM_DEBUG
1359                         printf("semexit:  %08x id=%d num=%d(adj=%d) ; sem=%d\n",
1360                                suptr->un_proc,
1361                                semid,
1362                                semnum,
1363                                adjval,
1364                                semaptr->sem_base[semnum].semval);
1365 #endif
1366
1367                         if (adjval < 0) {
1368                                 if (semaptr->sem_base[semnum].semval < -adjval)
1369                                         semaptr->sem_base[semnum].semval = 0;
1370                                 else
1371                                         semaptr->sem_base[semnum].semval +=
1372                                             adjval;
1373                         } else
1374                                 semaptr->sem_base[semnum].semval += adjval;
1375
1376                 /* Maybe we should build a list of semaptr's to wake
1377                  * up, finish all access to data structures, release the
1378                  * subsystem lock, and wake all the processes.  Something
1379                  * to think about.  It wouldn't buy us anything unless
1380                  * wakeup had the potential to block, or the syscall
1381                  * funnel state was changed to allow multiple threads
1382                  * in the BSD code at once.
1383                  */
1384 #ifdef SEM_WAKEUP
1385                         sem_wakeup((caddr_t)semaptr);
1386 #else
1387                         wakeup((caddr_t)semaptr);
1388 #endif
1389 #ifdef SEM_DEBUG
1390                         printf("semexit:  back from wakeup\n");
1391 #endif
1392                         suptr->un_cnt--;
1393                         suptr->un_ent = sueptr->une_next;
1394                         FREE(sueptr, M_SYSVSEM);
1395                         sueptr = NULL;
1396                 }
1397         }
1398
1399         /*
1400          * Deallocate the undo vector.
1401          */
1402 #ifdef SEM_DEBUG
1403         printf("removing vector\n");
1404 #endif
1405         suptr->un_proc = NULL;
1406         *supptr = suptr->un_next;
1407
1408 unlock:
1409         /*
1410          * There is a semaphore leak (i.e. memory leak) in this code.
1411          * We should be deleting the IPC_PRIVATE semaphores when they are
1412          * no longer needed, and we dont. We would have to track which processes
1413          * know about which IPC_PRIVATE semaphores, updating the list after
1414          * every fork.  We can't just delete them semaphore when the process
1415          * that created it dies, because that process may well have forked
1416          * some children.  So we need to wait until all of it's children have
1417          * died, and so on.  Maybe we should tag each IPC_PRIVATE sempahore
1418          * with the creating group ID, count the number of processes left in
1419          * that group, and delete the semaphore when the group is gone.
1420          * Until that code gets implemented we will leak IPC_PRIVATE semaphores.
1421          * There is an upper bound on the size of our semaphore array, so
1422          * leaking the semaphores should not work as a DOS attack.
1423          *
1424          * Please note that the original BSD code this file is based on had the
1425          * same leaky semaphore problem.
1426          */
1427
1428         SYSV_SEM_SUBSYS_UNLOCK();
1429 }
1430
1431
1432 /* (struct sysctl_oid *oidp, void *arg1, int arg2, \
1433         struct sysctl_req *req) */
1434 static int
1435 sysctl_seminfo(__unused struct sysctl_oid *oidp, void *arg1,
1436         __unused int arg2, struct sysctl_req *req)
1437 {
1438         int error = 0;
1439
1440         error = SYSCTL_OUT(req, arg1, sizeof(int));
1441         if (error || req->newptr == USER_ADDR_NULL)
1442                 return(error);
1443
1444         SYSV_SEM_SUBSYS_LOCK();
1445
1446         /* Set the values only if shared memory is not initialised */
1447         if ((sem_pool == NULL) &&
1448                 (sema == NULL) &&
1449                 (semu == NULL) &&
1450                 (semu_list == NULL)) {
1451                         if ((error = SYSCTL_IN(req, arg1, sizeof(int)))) {
1452                                 goto out;
1453                         }
1454         } else
1455                 error = EINVAL;
1456 out:
1457         SYSV_SEM_SUBSYS_UNLOCK();
1458         return(error);
1459
1460 }
1461
1462 /* SYSCTL_NODE(_kern, KERN_SYSV, sysv, CTLFLAG_RW, 0, "SYSV"); */
1463 extern struct sysctl_oid_list sysctl__kern_sysv_children;
1464 SYSCTL_PROC(_kern_sysv, KSYSV_SEMMNI, semmni, CTLTYPE_INT | CTLFLAG_RW,
1465     &limitseminfo.semmni, 0, &sysctl_seminfo ,"I","semmni");
1466
1467 SYSCTL_PROC(_kern_sysv, KSYSV_SEMMNS, semmns, CTLTYPE_INT | CTLFLAG_RW,
1468     &limitseminfo.semmns, 0, &sysctl_seminfo ,"I","semmns");
1469
1470 SYSCTL_PROC(_kern_sysv, KSYSV_SEMMNU, semmnu, CTLTYPE_INT | CTLFLAG_RW,
1471     &limitseminfo.semmnu, 0, &sysctl_seminfo ,"I","semmnu");
1472
1473 SYSCTL_PROC(_kern_sysv, KSYSV_SEMMSL, semmsl, CTLTYPE_INT | CTLFLAG_RW,
1474     &limitseminfo.semmsl, 0, &sysctl_seminfo ,"I","semmsl");
1475
1476 SYSCTL_PROC(_kern_sysv, KSYSV_SEMUNE, semume, CTLTYPE_INT | CTLFLAG_RW,
1477     &limitseminfo.semume, 0, &sysctl_seminfo ,"I","semume");
1478
1479
1480 static int
1481 IPCS_sem_sysctl(__unused struct sysctl_oid *oidp, __unused void *arg1,
1482         __unused int arg2, struct sysctl_req *req)
1483 {
1484         int error;
1485         int cursor;
1486         union {
1487                 struct IPCS_command u32;
1488                 struct user_IPCS_command u64;
1489         } ipcs;
1490         struct semid_ds semid_ds32;     /* post conversion, 32 bit version */
1491         void *semid_dsp;
1492         size_t ipcs_sz = sizeof(struct user_IPCS_command);
1493         size_t semid_ds_sz = sizeof(struct user_semid_ds);
1494         struct proc *p = current_proc();
1495
1496         /* Copy in the command structure */
1497         if ((error = SYSCTL_IN(req, &ipcs, ipcs_sz)) != 0) {
1498                 return(error);
1499         }
1500
1501         if (!IS_64BIT_PROCESS(p)) {
1502                 ipcs_sz = sizeof(struct IPCS_command);
1503                 semid_ds_sz = sizeof(struct semid_ds);
1504         }
1505
1506         /* Let us version this interface... */
1507         if (ipcs.u64.ipcs_magic != IPCS_MAGIC) {
1508                 return(EINVAL);
1509         }
1510
1511         SYSV_SEM_SUBSYS_LOCK();
1512         switch(ipcs.u64.ipcs_op) {
1513         case IPCS_SEM_CONF:     /* Obtain global configuration data */
1514                 if (ipcs.u64.ipcs_datalen != sizeof(struct seminfo)) {
1515                         error = ERANGE;
1516                         break;
1517                 }
1518                 if (ipcs.u64.ipcs_cursor != 0) {        /* fwd. compat. */
1519                         error = EINVAL;
1520                         break;
1521                 }
1522                 error = copyout(&seminfo, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen);
1523                 break;
1524
1525         case IPCS_SEM_ITER:     /* Iterate over existing segments */
1526                 cursor = ipcs.u64.ipcs_cursor;
1527                 if (cursor < 0 || cursor >= seminfo.semmni) {
1528                         error = ERANGE;
1529                         break;
1530                 }
1531                 if (ipcs.u64.ipcs_datalen != (int)semid_ds_sz ) {
1532                         error = EINVAL;
1533                         break;
1534                 }
1535                 for( ; cursor < seminfo.semmni; cursor++) {
1536                         if (sema[cursor].sem_perm.mode & SEM_ALLOC)
1537                                 break;
1538                         continue;
1539                 }
1540                 if (cursor == seminfo.semmni) {
1541                         error = ENOENT;
1542                         break;
1543                 }
1544
1545                 semid_dsp = &sema[cursor];      /* default: 64 bit */
1546
1547                 /*
1548                  * If necessary, convert the 64 bit kernel segment
1549                  * descriptor to a 32 bit user one.
1550                  */
1551                 if (!IS_64BIT_PROCESS(p)) {
1552                         semid_ds_64to32(semid_dsp, &semid_ds32);
1553                         semid_dsp = &semid_ds32;
1554                 }
1555                 error = copyout(semid_dsp, ipcs.u64.ipcs_data, ipcs.u64.ipcs_datalen);
1556                 if (!error) {
1557                         /* update cursor */
1558                         ipcs.u64.ipcs_cursor = cursor + 1;
1559                         error = SYSCTL_OUT(req, &ipcs, ipcs_sz);
1560                 }
1561                 break;
1562
1563         default:
1564                 error = EINVAL;
1565                 break;
1566         }
1567         SYSV_SEM_SUBSYS_UNLOCK();
1568         return(error);
1569 }
1570
1571 SYSCTL_DECL(_kern_sysv_ipcs);
1572 SYSCTL_PROC(_kern_sysv_ipcs, OID_AUTO, sem, CTLFLAG_RW|CTLFLAG_ANYBODY,
1573         0, 0, IPCS_sem_sysctl,
1574         "S,IPCS_sem_command",
1575         "ipcs sem command interface");