file_cmds-321.40.3.tar.gz
[apple/file_cmds.git] / pax / tar.c
1 /* $OpenBSD: tar.c,v 1.41 2006/03/04 20:24:55 otto Exp $ */
2 /* $NetBSD: tar.c,v 1.5 1995/03/21 09:07:49 cgd Exp $ */
3
4 /*-
5 * Copyright (c) 1992 Keith Muller.
6 * Copyright (c) 1992, 1993
7 * The Regents of the University of California. All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * Keith Muller of the University of California, San Diego.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #include <sys/cdefs.h>
38 #ifndef lint
39 #if 0
40 static const char sccsid[] = "@(#)tar.c 8.2 (Berkeley) 4/18/94";
41 #else
42 __used static const char rcsid[] = "$OpenBSD: tar.c,v 1.41 2006/03/04 20:24:55 otto Exp $";
43 #endif
44 #endif /* not lint */
45
46 #include <sys/types.h>
47 #include <sys/time.h>
48 #include <sys/stat.h>
49 #include <sys/param.h>
50 #include <string.h>
51 #include <stdio.h>
52 #include <unistd.h>
53 #include <stdlib.h>
54 #include "pax.h"
55 #include "extern.h"
56 #include "tar.h"
57
58 /*
59 * Routines for reading, writing and header identify of various versions of tar
60 */
61
62 static size_t expandname(char *, size_t, char **, const char *, size_t);
63 static u_long tar_chksm(char *, int);
64 static char *name_split(char *, int);
65 static int ul_oct(u_long, char *, int, int);
66 #ifndef LONG_OFF_T
67 static int uqd_oct(u_quad_t, char *, int, int);
68 #endif
69
70 static uid_t uid_nobody;
71 static uid_t uid_warn;
72 static gid_t gid_nobody;
73 static gid_t gid_warn;
74
75 /*
76 * Routines common to all versions of tar
77 */
78
79 static int tar_nodir; /* do not write dirs under old tar */
80 char *gnu_name_string; /* GNU ././@LongLink hackery name */
81 char *gnu_link_string; /* GNU ././@LongLink hackery link */
82
83 /*
84 * tar_endwr()
85 * add the tar trailer of two null blocks
86 * Return:
87 * 0 if ok, -1 otherwise (what wr_skip returns)
88 */
89
90 int
91 tar_endwr(void)
92 {
93 return(wr_skip((off_t)(NULLCNT*BLKMULT)));
94 }
95
96 /*
97 * tar_endrd()
98 * no cleanup needed here, just return size of trailer (for append)
99 * Return:
100 * size of trailer (2 * BLKMULT)
101 */
102
103 off_t
104 tar_endrd(void)
105 {
106 return((off_t)(NULLCNT*BLKMULT));
107 }
108
109 /*
110 * tar_trail()
111 * Called to determine if a header block is a valid trailer. We are passed
112 * the block, the in_sync flag (which tells us we are in resync mode;
113 * looking for a valid header), and cnt (which starts at zero) which is
114 * used to count the number of empty blocks we have seen so far.
115 * Return:
116 * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block
117 * could never contain a header.
118 */
119
120 int
121 tar_trail(ARCHD *ignore, char *buf, int in_resync, int *cnt)
122 {
123 int i;
124
125 /*
126 * look for all zero, trailer is two consecutive blocks of zero
127 */
128 for (i = 0; i < BLKMULT; ++i) {
129 if (buf[i] != '\0')
130 break;
131 }
132
133 /*
134 * if not all zero it is not a trailer, but MIGHT be a header.
135 */
136 if (i != BLKMULT)
137 return(-1);
138
139 /*
140 * When given a zero block, we must be careful!
141 * If we are not in resync mode, check for the trailer. Have to watch
142 * out that we do not mis-identify file data as the trailer, so we do
143 * NOT try to id a trailer during resync mode. During resync mode we
144 * might as well throw this block out since a valid header can NEVER be
145 * a block of all 0 (we must have a valid file name).
146 */
147 if (!in_resync && (++*cnt >= NULLCNT))
148 return(0);
149 return(1);
150 }
151
152 /*
153 * ul_oct()
154 * convert an unsigned long to an octal string. many oddball field
155 * termination characters are used by the various versions of tar in the
156 * different fields. term selects which kind to use. str is '0' padded
157 * at the front to len. we are unable to use only one format as many old
158 * tar readers are very cranky about this.
159 * Return:
160 * 0 if the number fit into the string, -1 otherwise
161 */
162
163 static int
164 ul_oct(u_long val, char *str, int len, int term)
165 {
166 char *pt;
167
168 /*
169 * term selects the appropriate character(s) for the end of the string
170 */
171 pt = str + len - 1;
172 switch (term) {
173 case 3:
174 *pt-- = '\0';
175 break;
176 case 2:
177 *pt-- = ' ';
178 *pt-- = '\0';
179 break;
180 case 1:
181 *pt-- = ' ';
182 break;
183 case 0:
184 default:
185 *pt-- = '\0';
186 *pt-- = ' ';
187 break;
188 }
189
190 /*
191 * convert and blank pad if there is space
192 */
193 while (pt >= str) {
194 *pt-- = '0' + (char)(val & 0x7);
195 if ((val = val >> 3) == (u_long)0)
196 break;
197 }
198
199 while (pt >= str)
200 *pt-- = '0';
201 if (val != (u_long)0)
202 return(-1);
203 return(0);
204 }
205
206 #ifndef LONG_OFF_T
207 /*
208 * uqd_oct()
209 * convert an u_quad_t to an octal string. one of many oddball field
210 * termination characters are used by the various versions of tar in the
211 * different fields. term selects which kind to use. str is '0' padded
212 * at the front to len. we are unable to use only one format as many old
213 * tar readers are very cranky about this.
214 * Return:
215 * 0 if the number fit into the string, -1 otherwise
216 */
217
218 static int
219 uqd_oct(u_quad_t val, char *str, int len, int term)
220 {
221 char *pt;
222
223 /*
224 * term selects the appropriate character(s) for the end of the string
225 */
226 pt = str + len - 1;
227 switch (term) {
228 case 3:
229 *pt-- = '\0';
230 break;
231 case 2:
232 *pt-- = ' ';
233 *pt-- = '\0';
234 break;
235 case 1:
236 *pt-- = ' ';
237 break;
238 case 0:
239 default:
240 *pt-- = '\0';
241 *pt-- = ' ';
242 break;
243 }
244
245 /*
246 * convert and blank pad if there is space
247 */
248 while (pt >= str) {
249 *pt-- = '0' + (char)(val & 0x7);
250 if ((val = val >> 3) == 0)
251 break;
252 }
253
254 while (pt >= str)
255 *pt-- = '0';
256 if (val != (u_quad_t)0)
257 return(-1);
258 return(0);
259 }
260 #endif
261
262 /*
263 * tar_chksm()
264 * calculate the checksum for a tar block counting the checksum field as
265 * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks).
266 * NOTE: we use len to short circuit summing 0's on write since we ALWAYS
267 * pad headers with 0.
268 * Return:
269 * unsigned long checksum
270 */
271
272 static u_long
273 tar_chksm(char *blk, int len)
274 {
275 char *stop;
276 char *pt;
277 u_long chksm = BLNKSUM; /* initial value is checksum field sum */
278
279 /*
280 * add the part of the block before the checksum field
281 */
282 pt = blk;
283 stop = blk + CHK_OFFSET;
284 while (pt < stop)
285 chksm += (u_long)(*pt++ & 0xff);
286 /*
287 * move past the checksum field and keep going, spec counts the
288 * checksum field as the sum of 8 blanks (which is pre-computed as
289 * BLNKSUM).
290 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding
291 * starts, no point in summing zero's)
292 */
293 pt += CHK_LEN;
294 stop = blk + len;
295 while (pt < stop)
296 chksm += (u_long)(*pt++ & 0xff);
297 return(chksm);
298 }
299
300 /*
301 * Routines for old BSD style tar (also made portable to sysV tar)
302 */
303
304 /*
305 * tar_id()
306 * determine if a block given to us is a valid tar header (and not a USTAR
307 * header). We have to be on the lookout for those pesky blocks of all
308 * zero's.
309 * Return:
310 * 0 if a tar header, -1 otherwise
311 */
312
313 int
314 tar_id(char *blk, int size)
315 {
316 HD_TAR *hd;
317 HD_USTAR *uhd;
318
319 if (size < BLKMULT)
320 return(-1);
321 hd = (HD_TAR *)blk;
322 uhd = (HD_USTAR *)blk;
323
324 /*
325 * check for block of zero's first, a simple and fast test, then make
326 * sure this is not a ustar header by looking for the ustar magic
327 * cookie. We should use TMAGLEN, but some USTAR archive programs are
328 * wrong and create archives missing the \0. Last we check the
329 * checksum. If this is ok we have to assume it is a valid header.
330 */
331 if (hd->name[0] == '\0')
332 return(-1);
333 if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0)
334 return(-1);
335 if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
336 return(-1);
337 force_one_volume = 1;
338 return(0);
339 }
340
341 /*
342 * tar_opt()
343 * handle tar format specific -o options
344 * Return:
345 * 0 if ok -1 otherwise
346 */
347
348 int
349 tar_opt(void)
350 {
351 OPLIST *opt;
352
353 while ((opt = opt_next()) != NULL) {
354 if (strcmp(opt->name, TAR_OPTION) ||
355 strcmp(opt->value, TAR_NODIR)) {
356 paxwarn(1, "Unknown tar format -o option/value pair %s=%s",
357 opt->name, opt->value);
358 paxwarn(1,"%s=%s is the only supported tar format option",
359 TAR_OPTION, TAR_NODIR);
360 return(-1);
361 }
362
363 /*
364 * we only support one option, and only when writing
365 */
366 if ((act != APPND) && (act != ARCHIVE)) {
367 paxwarn(1, "%s=%s is only supported when writing.",
368 opt->name, opt->value);
369 return(-1);
370 }
371 tar_nodir = 1;
372 }
373 return(0);
374 }
375
376
377 /*
378 * tar_rd()
379 * extract the values out of block already determined to be a tar header.
380 * store the values in the ARCHD parameter.
381 * Return:
382 * 0
383 */
384
385 int
386 tar_rd(ARCHD *arcn, char *buf)
387 {
388 HD_TAR *hd;
389 char *pt;
390
391 /*
392 * we only get proper sized buffers passed to us
393 */
394 if (tar_id(buf, BLKMULT) < 0)
395 return(-1);
396 memset(arcn, 0, sizeof(*arcn));
397 arcn->org_name = arcn->name;
398 arcn->sb.st_nlink = 1;
399
400 /*
401 * copy out the name and values in the stat buffer
402 */
403 hd = (HD_TAR *)buf;
404 if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) {
405 arcn->nlen = expandname(arcn->name, sizeof(arcn->name),
406 &gnu_name_string, hd->name, sizeof(hd->name));
407 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
408 &gnu_link_string, hd->linkname, sizeof(hd->linkname));
409 }
410 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) &
411 0xfff);
412 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
413 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
414 #ifdef LONG_OFF_T
415 arcn->sb.st_size = (off_t)asc_ul(hd->size, sizeof(hd->size), OCT);
416 #else
417 arcn->sb.st_size = (off_t)asc_uqd(hd->size, sizeof(hd->size), OCT);
418 #endif
419 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
420 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
421
422 /*
423 * have to look at the last character, it may be a '/' and that is used
424 * to encode this as a directory
425 */
426 pt = &(arcn->name[arcn->nlen - 1]);
427 arcn->pad = 0;
428 arcn->skip = 0;
429 switch (hd->linkflag) {
430 case SYMTYPE:
431 /*
432 * symbolic link, need to get the link name and set the type in
433 * the st_mode so -v printing will look correct.
434 */
435 arcn->type = PAX_SLK;
436 arcn->sb.st_mode |= S_IFLNK;
437 arcn->ln_nlen = strlcpy(arcn->ln_name, hd->linkname, sizeof(arcn->ln_name));
438 break;
439 case LNKTYPE:
440 /*
441 * hard link, need to get the link name, set the type in the
442 * st_mode and st_nlink so -v printing will look better.
443 */
444 arcn->type = PAX_HLK;
445 arcn->sb.st_nlink = 2;
446 arcn->ln_nlen = strlcpy(arcn->ln_name, hd->linkname, sizeof(arcn->ln_name));
447
448 /*
449 * no idea of what type this thing really points at, but
450 * we set something for printing only.
451 */
452 arcn->sb.st_mode |= S_IFREG;
453 break;
454 case LONGLINKTYPE:
455 case LONGNAMETYPE:
456 /*
457 * GNU long link/file; we tag these here and let the
458 * pax internals deal with it -- too ugly otherwise.
459 */
460 arcn->type =
461 hd->linkflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF;
462 arcn->pad = TAR_PAD(arcn->sb.st_size);
463 arcn->skip = arcn->sb.st_size;
464 break;
465 case DIRTYPE:
466 /*
467 * It is a directory, set the mode for -v printing
468 */
469 arcn->type = PAX_DIR;
470 arcn->sb.st_mode |= S_IFDIR;
471 arcn->sb.st_nlink = 2;
472 break;
473 case AREGTYPE:
474 case REGTYPE:
475 default:
476 /*
477 * If we have a trailing / this is a directory and NOT a file.
478 */
479 arcn->ln_name[0] = '\0';
480 arcn->ln_nlen = 0;
481 if (*pt == '/') {
482 /*
483 * it is a directory, set the mode for -v printing
484 */
485 arcn->type = PAX_DIR;
486 arcn->sb.st_mode |= S_IFDIR;
487 arcn->sb.st_nlink = 2;
488 } else {
489 /*
490 * have a file that will be followed by data. Set the
491 * skip value to the size field and calculate the size
492 * of the padding.
493 */
494 arcn->type = PAX_REG;
495 arcn->sb.st_mode |= S_IFREG;
496 arcn->pad = TAR_PAD(arcn->sb.st_size);
497 arcn->skip = arcn->sb.st_size;
498 }
499 break;
500 }
501
502 /*
503 * strip off any trailing slash.
504 */
505 if (*pt == '/') {
506 *pt = '\0';
507 --arcn->nlen;
508 }
509 return(0);
510 }
511
512 /*
513 * tar_wr()
514 * write a tar header for the file specified in the ARCHD to the archive.
515 * Have to check for file types that cannot be stored and file names that
516 * are too long. Be careful of the term (last arg) to ul_oct, each field
517 * of tar has it own spec for the termination character(s).
518 * ASSUMED: space after header in header block is zero filled
519 * Return:
520 * 0 if file has data to be written after the header, 1 if file has NO
521 * data to write after the header, -1 if archive write failed
522 */
523
524 int
525 tar_wr(ARCHD *arcn)
526 {
527 HD_TAR *hd;
528 int len;
529 HD_TAR hdblk;
530
531 /*
532 * check for those file system types which tar cannot store
533 */
534 switch (arcn->type) {
535 case PAX_DIR:
536 /*
537 * user asked that dirs not be written to the archive
538 */
539 if (tar_nodir)
540 return(1);
541 break;
542 case PAX_CHR:
543 paxwarn(1, "Tar cannot archive a character device %s",
544 arcn->org_name);
545 return(1);
546 case PAX_BLK:
547 paxwarn(1, "Tar cannot archive a block device %s", arcn->org_name);
548 return(1);
549 case PAX_SCK:
550 paxwarn(1, "Tar cannot archive a socket %s", arcn->org_name);
551 return(1);
552 case PAX_FIF:
553 paxwarn(1, "Tar cannot archive a fifo %s", arcn->org_name);
554 return(1);
555 case PAX_SLK:
556 case PAX_HLK:
557 case PAX_HRG:
558 if (arcn->ln_nlen >= sizeof(hd->linkname)) {
559 paxwarn(1, "Link name too long for tar %s",
560 arcn->ln_name);
561 return(1);
562 }
563 break;
564 case PAX_REG:
565 case PAX_CTG:
566 default:
567 break;
568 }
569
570 /*
571 * check file name len, remember extra char for dirs (the / at the end)
572 */
573 len = arcn->nlen;
574 if (arcn->type == PAX_DIR)
575 ++len;
576 if (len >= sizeof(hd->name)) {
577 paxwarn(1, "File name too long for tar %s", arcn->name);
578 return(1);
579 }
580
581 /*
582 * Copy the data out of the ARCHD into the tar header based on the type
583 * of the file. Remember, many tar readers want all fields to be
584 * padded with zero so we zero the header first. We then set the
585 * linkflag field (type), the linkname, the size, and set the padding
586 * (if any) to be added after the file data (0 for all other types,
587 * as they only have a header).
588 */
589 memset(&hdblk, 0, sizeof(hdblk));
590 hd = (HD_TAR *)&hdblk;
591 strlcpy(hd->name, arcn->name, sizeof(hd->name));
592 arcn->pad = 0;
593
594 if (arcn->type == PAX_DIR) {
595 /*
596 * directories are the same as files, except have a filename
597 * that ends with a /, we add the slash here. No data follows
598 * dirs, so no pad.
599 */
600 hd->linkflag = AREGTYPE;
601 hd->name[len-1] = '/';
602 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
603 goto out;
604 } else if (arcn->type == PAX_SLK) {
605 /*
606 * no data follows this file, so no pad
607 */
608 hd->linkflag = SYMTYPE;
609 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
610 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
611 goto out;
612 } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) {
613 /*
614 * no data follows this file, so no pad
615 */
616 hd->linkflag = LNKTYPE;
617 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
618 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
619 goto out;
620 } else {
621 /*
622 * data follows this file, so set the pad
623 */
624 hd->linkflag = AREGTYPE;
625 # ifdef LONG_OFF_T
626 if (ul_oct((u_long)arcn->sb.st_size, hd->size,
627 sizeof(hd->size), 1)) {
628 # else
629 if (uqd_oct((u_quad_t)arcn->sb.st_size, hd->size,
630 sizeof(hd->size), 1)) {
631 # endif
632 paxwarn(1,"File is too large for tar %s", arcn->org_name);
633 return(1);
634 }
635 arcn->pad = TAR_PAD(arcn->sb.st_size);
636 }
637
638 /*
639 * copy those fields that are independent of the type
640 */
641 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) ||
642 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) ||
643 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) ||
644 ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1))
645 goto out;
646
647 /*
648 * calculate and add the checksum, then write the header. A return of
649 * 0 tells the caller to now write the file data, 1 says no data needs
650 * to be written
651 */
652 if (ul_oct(tar_chksm((char *)&hdblk, sizeof(HD_TAR)), hd->chksum,
653 sizeof(hd->chksum), 3))
654 goto out;
655 if (wr_rdbuf((char *)&hdblk, sizeof(HD_TAR)) < 0)
656 return(-1);
657 if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0)
658 return(-1);
659 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
660 return(0);
661 return(1);
662
663 out:
664 /*
665 * header field is out of range
666 */
667 paxwarn(1, "Tar header field is too small for %s", arcn->org_name);
668 return(1);
669 }
670
671 /*
672 * Routines for POSIX ustar
673 */
674
675 /*
676 * ustar_strd()
677 * initialization for ustar read
678 * Return:
679 * 0 if ok, -1 otherwise
680 */
681
682 int
683 ustar_strd(void)
684 {
685 if ((usrtb_start() < 0) || (grptb_start() < 0))
686 return(-1);
687 return(0);
688 }
689
690 /*
691 * ustar_stwr()
692 * initialization for ustar write
693 * Return:
694 * 0 if ok, -1 otherwise
695 */
696
697 int
698 ustar_stwr(void)
699 {
700 if ((uidtb_start() < 0) || (gidtb_start() < 0))
701 return(-1);
702 return(0);
703 }
704
705 /*
706 * ustar_id()
707 * determine if a block given to us is a valid ustar header. We have to
708 * be on the lookout for those pesky blocks of all zero's
709 * Return:
710 * 0 if a ustar header, -1 otherwise
711 */
712
713 int
714 ustar_id(char *blk, int size)
715 {
716 HD_USTAR *hd;
717
718 if (size < BLKMULT)
719 return(-1);
720 hd = (HD_USTAR *)blk;
721
722 /*
723 * check for block of zero's first, a simple and fast test then check
724 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive
725 * programs are fouled up and create archives missing the \0. Last we
726 * check the checksum. If ok we have to assume it is a valid header.
727 */
728 if (hd->name[0] == '\0')
729 return(-1);
730 if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0)
731 return(-1);
732 if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
733 return(-1);
734 return(0);
735 }
736
737 /*
738 * ustar_rd()
739 * extract the values out of block already determined to be a ustar header.
740 * store the values in the ARCHD parameter.
741 * Return:
742 * 0
743 */
744
745 int
746 ustar_rd(ARCHD *arcn, char *buf)
747 {
748 HD_USTAR *hd;
749 char *dest;
750 int cnt = 0;
751 dev_t devmajor;
752 dev_t devminor;
753
754 /*
755 * we only get proper sized buffers
756 */
757 if (ustar_id(buf, BLKMULT) < 0)
758 return(-1);
759 memset(arcn, 0, sizeof(*arcn));
760 arcn->org_name = arcn->name;
761 arcn->sb.st_nlink = 1;
762 hd = (HD_USTAR *)buf;
763
764 /*
765 * see if the filename is split into two parts. if, so joint the parts.
766 * we copy the prefix first and add a / between the prefix and name.
767 */
768 dest = arcn->name;
769 if (*(hd->prefix) != '\0') {
770 cnt = strlcpy(dest, hd->prefix, sizeof(arcn->name) - 1);
771 dest += cnt;
772 *dest++ = '/';
773 cnt++;
774 } else {
775 cnt = 0;
776 }
777
778 if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) {
779 arcn->nlen = cnt + expandname(dest, sizeof(arcn->name) - cnt,
780 &gnu_name_string, hd->name, sizeof(hd->name));
781 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
782 &gnu_link_string, hd->linkname, sizeof(hd->linkname));
783 }
784
785 /*
786 * follow the spec to the letter. we should only have mode bits, strip
787 * off all other crud we may be passed.
788 */
789 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) &
790 0xfff);
791 #ifdef LONG_OFF_T
792 arcn->sb.st_size = (off_t)asc_ul(hd->size, sizeof(hd->size), OCT);
793 #else
794 arcn->sb.st_size = (off_t)asc_uqd(hd->size, sizeof(hd->size), OCT);
795 #endif
796 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
797 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
798
799 /*
800 * If we can find the ascii names for gname and uname in the password
801 * and group files we will use the uid's and gid they bind. Otherwise
802 * we use the uid and gid values stored in the header. (This is what
803 * the POSIX spec wants).
804 */
805 hd->gname[sizeof(hd->gname) - 1] = '\0';
806 if (gid_name(hd->gname, &(arcn->sb.st_gid)) < 0)
807 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
808 hd->uname[sizeof(hd->uname) - 1] = '\0';
809 if (uid_name(hd->uname, &(arcn->sb.st_uid)) < 0)
810 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
811
812 /*
813 * set the defaults, these may be changed depending on the file type
814 */
815 arcn->pad = 0;
816 arcn->skip = 0;
817 arcn->sb.st_rdev = (dev_t)0;
818
819 /*
820 * set the mode and PAX type according to the typeflag in the header
821 */
822 switch (hd->typeflag) {
823 case FIFOTYPE:
824 arcn->type = PAX_FIF;
825 arcn->sb.st_mode |= S_IFIFO;
826 break;
827 case DIRTYPE:
828 arcn->type = PAX_DIR;
829 arcn->sb.st_mode |= S_IFDIR;
830 arcn->sb.st_nlink = 2;
831
832 /*
833 * Some programs that create ustar archives append a '/'
834 * to the pathname for directories. This clearly violates
835 * ustar specs, but we will silently strip it off anyway.
836 */
837 if (arcn->name[arcn->nlen - 1] == '/')
838 arcn->name[--arcn->nlen] = '\0';
839 break;
840 case BLKTYPE:
841 case CHRTYPE:
842 /*
843 * this type requires the rdev field to be set.
844 */
845 if (hd->typeflag == BLKTYPE) {
846 arcn->type = PAX_BLK;
847 arcn->sb.st_mode |= S_IFBLK;
848 } else {
849 arcn->type = PAX_CHR;
850 arcn->sb.st_mode |= S_IFCHR;
851 }
852 devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT);
853 devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT);
854 arcn->sb.st_rdev = TODEV(devmajor, devminor);
855 break;
856 case SYMTYPE:
857 case LNKTYPE:
858 if (hd->typeflag == SYMTYPE) {
859 arcn->type = PAX_SLK;
860 arcn->sb.st_mode |= S_IFLNK;
861 } else {
862 arcn->type = PAX_HLK;
863 /*
864 * so printing looks better
865 */
866 arcn->sb.st_mode |= S_IFREG;
867 arcn->sb.st_nlink = 2;
868 }
869 break;
870 case LONGLINKTYPE:
871 case LONGNAMETYPE:
872 /*
873 * GNU long link/file; we tag these here and let the
874 * pax internals deal with it -- too ugly otherwise.
875 */
876 arcn->type =
877 hd->typeflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF;
878 arcn->pad = TAR_PAD(arcn->sb.st_size);
879 arcn->skip = arcn->sb.st_size;
880 break;
881 case CONTTYPE:
882 case AREGTYPE:
883 case REGTYPE:
884 default:
885 /*
886 * these types have file data that follows. Set the skip and
887 * pad fields.
888 */
889 arcn->type = PAX_REG;
890 arcn->pad = TAR_PAD(arcn->sb.st_size);
891 arcn->skip = arcn->sb.st_size;
892 arcn->sb.st_mode |= S_IFREG;
893 break;
894 }
895 return(0);
896 }
897
898 /*
899 * ustar_wr()
900 * write a ustar header for the file specified in the ARCHD to the archive
901 * Have to check for file types that cannot be stored and file names that
902 * are too long. Be careful of the term (last arg) to ul_oct, we only use
903 * '\0' for the termination character (this is different than picky tar)
904 * ASSUMED: space after header in header block is zero filled
905 * Return:
906 * 0 if file has data to be written after the header, 1 if file has NO
907 * data to write after the header, -1 if archive write failed
908 */
909
910 int
911 ustar_wr(ARCHD *arcn)
912 {
913 HD_USTAR *hd;
914 char *pt;
915 char hdblk[sizeof(HD_USTAR)];
916 mode_t mode12only;
917 int term_char=3; /* orignal setting */
918 term_char=1; /* To pass conformance tests 274, 301 */
919
920 /*
921 * check for those file system types ustar cannot store
922 */
923 if (arcn->type == PAX_SCK) {
924 paxwarn(1, "Ustar cannot archive a socket %s", arcn->org_name);
925 return(1);
926 }
927
928 /*
929 * check the length of the linkname
930 */
931 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
932 (arcn->type == PAX_HRG)) && (arcn->ln_nlen > sizeof(hd->linkname))){
933 paxwarn(1, "Link name too long for ustar %s", arcn->ln_name);
934 /*
935 * Conformance: test pax:285 wants error code to be non-zero, and
936 * test tar:12 wants error code from pax to be 0
937 */
938 return(1);
939 }
940
941 /*
942 * split the path name into prefix and name fields (if needed). if
943 * pt != arcn->name, the name has to be split
944 */
945 if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) {
946 paxwarn(1, "File name too long for ustar %s", arcn->name);
947 return(1);
948 }
949
950 /*
951 * zero out the header so we don't have to worry about zero fill below
952 */
953 memset(hdblk, 0, sizeof(hdblk));
954 hd = (HD_USTAR *)hdblk;
955 arcn->pad = 0L;
956
957 /* To pass conformance tests 274/301, always set these fields to "zero" */
958 ul_oct(0, hd->devmajor, sizeof(hd->devmajor), term_char);
959 ul_oct(0, hd->devminor, sizeof(hd->devminor), term_char);
960
961 /*
962 * split the name, or zero out the prefix
963 */
964 if (pt != arcn->name) {
965 /*
966 * name was split, pt points at the / where the split is to
967 * occur, we remove the / and copy the first part to the prefix
968 */
969 *pt = '\0';
970 strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix));
971 *pt++ = '/';
972 }
973
974 /*
975 * copy the name part. this may be the whole path or the part after
976 * the prefix. both the name and prefix may fill the entire field.
977 */
978 if (strlen(pt) == sizeof(hd->name)) { /* must account for name just fits in buffer */
979 strncpy(hd->name, pt, sizeof(hd->name));
980 } else {
981 strlcpy(hd->name, pt, sizeof(hd->name));
982 }
983
984 /*
985 * set the fields in the header that are type dependent
986 */
987 switch (arcn->type) {
988 case PAX_DIR:
989 hd->typeflag = DIRTYPE;
990 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), term_char))
991 goto out;
992 break;
993 case PAX_CHR:
994 case PAX_BLK:
995 if (arcn->type == PAX_CHR)
996 hd->typeflag = CHRTYPE;
997 else
998 hd->typeflag = BLKTYPE;
999 if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor,
1000 sizeof(hd->devmajor), term_char) ||
1001 ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor,
1002 sizeof(hd->devminor), term_char) ||
1003 ul_oct((u_long)0L, hd->size, sizeof(hd->size), term_char))
1004 goto out;
1005 break;
1006 case PAX_FIF:
1007 hd->typeflag = FIFOTYPE;
1008 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), term_char))
1009 goto out;
1010 break;
1011 case PAX_SLK:
1012 case PAX_HLK:
1013 case PAX_HRG:
1014 if (arcn->type == PAX_SLK)
1015 hd->typeflag = SYMTYPE;
1016 else
1017 hd->typeflag = LNKTYPE;
1018 if (strlen(arcn->ln_name) == sizeof(hd->linkname)) { /* must account for name just fits in buffer */
1019 strncpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
1020 } else {
1021 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
1022 }
1023 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), term_char))
1024 goto out;
1025 break;
1026 case PAX_REG:
1027 case PAX_CTG:
1028 default:
1029 /*
1030 * file data with this type, set the padding
1031 */
1032 if (arcn->type == PAX_CTG)
1033 hd->typeflag = CONTTYPE;
1034 else
1035 hd->typeflag = REGTYPE;
1036 arcn->pad = TAR_PAD(arcn->sb.st_size);
1037 # ifdef LONG_OFF_T
1038 if (ul_oct((u_long)arcn->sb.st_size, hd->size,
1039 sizeof(hd->size), term_char)) {
1040 # else
1041 if (uqd_oct((u_quad_t)arcn->sb.st_size, hd->size,
1042 sizeof(hd->size), term_char)) {
1043 # endif
1044 paxwarn(1,"File is too long for ustar %s",arcn->org_name);
1045 return(1);
1046 }
1047 break;
1048 }
1049
1050 strncpy(hd->magic, TMAGIC, TMAGLEN);
1051 strncpy(hd->version, TVERSION, TVERSLEN);
1052
1053 /*
1054 * set the remaining fields. Some versions want all 16 bits of mode
1055 * we better humor them (they really do not meet spec though)....
1056 */
1057 if (ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), term_char)) {
1058 if (uid_nobody == 0) {
1059 if (uid_name("nobody", &uid_nobody) == -1)
1060 goto out;
1061 }
1062 if (uid_warn != arcn->sb.st_uid) {
1063 uid_warn = arcn->sb.st_uid;
1064 paxwarn(1,
1065 "Ustar header field is too small for uid %lu, "
1066 "using nobody", (u_long)arcn->sb.st_uid);
1067 }
1068 if (ul_oct((u_long)uid_nobody, hd->uid, sizeof(hd->uid), term_char))
1069 goto out;
1070 }
1071 if (ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), term_char)) {
1072 if (gid_nobody == 0) {
1073 if (gid_name("nobody", &gid_nobody) == -1)
1074 goto out;
1075 }
1076 if (gid_warn != arcn->sb.st_gid) {
1077 gid_warn = arcn->sb.st_gid;
1078 paxwarn(1,
1079 "Ustar header field is too small for gid %lu, "
1080 "using nobody", (u_long)arcn->sb.st_gid);
1081 }
1082 if (ul_oct((u_long)gid_nobody, hd->gid, sizeof(hd->gid), term_char))
1083 goto out;
1084 }
1085 /* However, Unix conformance tests do not like MORE than 12 mode bits:
1086 remove all beyond (see definition of stat.st_mode structure) */
1087 mode12only = ((u_long)arcn->sb.st_mode) & 0x00000fff;
1088 if (ul_oct((u_long)mode12only, hd->mode, sizeof(hd->mode), term_char) ||
1089 ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),term_char))
1090 goto out;
1091 strncpy(hd->uname, name_uid(arcn->sb.st_uid, 0), sizeof(hd->uname));
1092 strncpy(hd->gname, name_gid(arcn->sb.st_gid, 0), sizeof(hd->gname));
1093
1094 /*
1095 * calculate and store the checksum write the header to the archive
1096 * return 0 tells the caller to now write the file data, 1 says no data
1097 * needs to be written
1098 */
1099 if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
1100 sizeof(hd->chksum), term_char))
1101 goto out;
1102 if (wr_rdbuf((char *)&hdblk, sizeof(HD_USTAR)) < 0)
1103 return(-1);
1104 if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0)
1105 return(-1);
1106 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
1107 return(0);
1108 return(1);
1109
1110 out:
1111 /*
1112 * header field is out of range
1113 */
1114 paxwarn(1, "Ustar header field is too small for %s", arcn->org_name);
1115 return(1);
1116 }
1117
1118 /*
1119 * name_split()
1120 * see if the name has to be split for storage in a ustar header. We try
1121 * to fit the entire name in the name field without splitting if we can.
1122 * The split point is always at a /
1123 * Return
1124 * character pointer to split point (always the / that is to be removed
1125 * if the split is not needed, the points is set to the start of the file
1126 * name (it would violate the spec to split there). A NULL is returned if
1127 * the file name is too long
1128 */
1129
1130 static char *
1131 name_split(char *name, int len)
1132 {
1133 char *start;
1134
1135 /*
1136 * check to see if the file name is small enough to fit in the name
1137 * field. if so just return a pointer to the name.
1138 * The strings can fill the complete name and prefix fields
1139 * without a NUL terminator.
1140 */
1141 if (len <= TNMSZ)
1142 return(name);
1143 if (len > (TPFSZ + TNMSZ + 1))
1144 return(NULL);
1145
1146 /*
1147 * we start looking at the biggest sized piece that fits in the name
1148 * field. We walk forward looking for a slash to split at. The idea is
1149 * to find the biggest piece to fit in the name field (or the smallest
1150 * prefix we can find) (the -1 is correct the biggest piece would
1151 * include the slash between the two parts that gets thrown away)
1152 */
1153 start = name + len - TNMSZ - 1;
1154 if ((*start == '/') && (start == name))
1155 ++start; /* 101 byte paths with leading '/' are dinged otherwise */
1156 while ((*start != '\0') && (*start != '/'))
1157 ++start;
1158
1159 /*
1160 * if we hit the end of the string, this name cannot be split, so we
1161 * cannot store this file.
1162 */
1163 if (*start == '\0')
1164 return(NULL);
1165 len = start - name;
1166
1167 /*
1168 * NOTE: /str where the length of str == TNMSZ can not be stored under
1169 * the p1003.1-1990 spec for ustar. We could force a prefix of / and
1170 * the file would then expand on extract to //str. The len == 0 below
1171 * makes this special case follow the spec to the letter.
1172 */
1173 if ((len > TPFSZ) || (len == 0))
1174 return(NULL);
1175
1176 /*
1177 * ok have a split point, return it to the caller
1178 */
1179 return(start);
1180 }
1181
1182 static size_t
1183 expandname(char *buf, size_t len, char **gnu_name, const char *name,
1184 size_t name_len)
1185 {
1186 size_t nlen;
1187
1188 if (*gnu_name) {
1189 /* *gnu_name is NUL terminated */
1190 if ((nlen = strlcpy(buf, *gnu_name, len)) >= len)
1191 nlen = len - 1;
1192 free(*gnu_name);
1193 *gnu_name = NULL;
1194 } else {
1195 if (name_len < len) {
1196 /* name may not be null terminated: it might be as big as the
1197 field, so copy is limited to the max size of the header field */
1198 if ((nlen = strlcpy(buf, name, name_len+1)) >= name_len+1)
1199 nlen = name_len;
1200 } else {
1201 if ((nlen = strlcpy(buf, name, len)) >= len)
1202 nlen = len - 1;
1203 }
1204 }
1205 return(nlen);
1206 }