file_cmds-220.4.tar.gz
[apple/file_cmds.git] / pax / tar.c
1 /* $OpenBSD: tar.c,v 1.41 2006/03/04 20:24:55 otto Exp $ */
2 /* $NetBSD: tar.c,v 1.5 1995/03/21 09:07:49 cgd Exp $ */
3
4 /*-
5 * Copyright (c) 1992 Keith Muller.
6 * Copyright (c) 1992, 1993
7 * The Regents of the University of California. All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * Keith Muller of the University of California, San Diego.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #ifndef lint
38 #if 0
39 static const char sccsid[] = "@(#)tar.c 8.2 (Berkeley) 4/18/94";
40 #else
41 static const char rcsid[] = "$OpenBSD: tar.c,v 1.41 2006/03/04 20:24:55 otto Exp $";
42 #endif
43 #endif /* not lint */
44
45 #include <sys/types.h>
46 #include <sys/time.h>
47 #include <sys/stat.h>
48 #include <sys/param.h>
49 #include <string.h>
50 #include <stdio.h>
51 #include <unistd.h>
52 #include <stdlib.h>
53 #include "pax.h"
54 #include "extern.h"
55 #include "tar.h"
56
57 /*
58 * Routines for reading, writing and header identify of various versions of tar
59 */
60
61 static size_t expandname(char *, size_t, char **, const char *, size_t);
62 static u_long tar_chksm(char *, int);
63 static char *name_split(char *, int);
64 static int ul_oct(u_long, char *, int, int);
65 #ifndef LONG_OFF_T
66 static int uqd_oct(u_quad_t, char *, int, int);
67 #endif
68
69 static uid_t uid_nobody;
70 static uid_t uid_warn;
71 static gid_t gid_nobody;
72 static gid_t gid_warn;
73
74 /*
75 * Routines common to all versions of tar
76 */
77
78 static int tar_nodir; /* do not write dirs under old tar */
79 char *gnu_name_string; /* GNU ././@LongLink hackery name */
80 char *gnu_link_string; /* GNU ././@LongLink hackery link */
81
82 /*
83 * tar_endwr()
84 * add the tar trailer of two null blocks
85 * Return:
86 * 0 if ok, -1 otherwise (what wr_skip returns)
87 */
88
89 int
90 tar_endwr(void)
91 {
92 return(wr_skip((off_t)(NULLCNT*BLKMULT)));
93 }
94
95 /*
96 * tar_endrd()
97 * no cleanup needed here, just return size of trailer (for append)
98 * Return:
99 * size of trailer (2 * BLKMULT)
100 */
101
102 off_t
103 tar_endrd(void)
104 {
105 return((off_t)(NULLCNT*BLKMULT));
106 }
107
108 /*
109 * tar_trail()
110 * Called to determine if a header block is a valid trailer. We are passed
111 * the block, the in_sync flag (which tells us we are in resync mode;
112 * looking for a valid header), and cnt (which starts at zero) which is
113 * used to count the number of empty blocks we have seen so far.
114 * Return:
115 * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block
116 * could never contain a header.
117 */
118
119 int
120 tar_trail(ARCHD *ignore, char *buf, int in_resync, int *cnt)
121 {
122 int i;
123
124 /*
125 * look for all zero, trailer is two consecutive blocks of zero
126 */
127 for (i = 0; i < BLKMULT; ++i) {
128 if (buf[i] != '\0')
129 break;
130 }
131
132 /*
133 * if not all zero it is not a trailer, but MIGHT be a header.
134 */
135 if (i != BLKMULT)
136 return(-1);
137
138 /*
139 * When given a zero block, we must be careful!
140 * If we are not in resync mode, check for the trailer. Have to watch
141 * out that we do not mis-identify file data as the trailer, so we do
142 * NOT try to id a trailer during resync mode. During resync mode we
143 * might as well throw this block out since a valid header can NEVER be
144 * a block of all 0 (we must have a valid file name).
145 */
146 if (!in_resync && (++*cnt >= NULLCNT))
147 return(0);
148 return(1);
149 }
150
151 /*
152 * ul_oct()
153 * convert an unsigned long to an octal string. many oddball field
154 * termination characters are used by the various versions of tar in the
155 * different fields. term selects which kind to use. str is '0' padded
156 * at the front to len. we are unable to use only one format as many old
157 * tar readers are very cranky about this.
158 * Return:
159 * 0 if the number fit into the string, -1 otherwise
160 */
161
162 static int
163 ul_oct(u_long val, char *str, int len, int term)
164 {
165 char *pt;
166
167 /*
168 * term selects the appropriate character(s) for the end of the string
169 */
170 pt = str + len - 1;
171 switch (term) {
172 case 3:
173 *pt-- = '\0';
174 break;
175 case 2:
176 *pt-- = ' ';
177 *pt-- = '\0';
178 break;
179 case 1:
180 *pt-- = ' ';
181 break;
182 case 0:
183 default:
184 *pt-- = '\0';
185 *pt-- = ' ';
186 break;
187 }
188
189 /*
190 * convert and blank pad if there is space
191 */
192 while (pt >= str) {
193 *pt-- = '0' + (char)(val & 0x7);
194 if ((val = val >> 3) == (u_long)0)
195 break;
196 }
197
198 while (pt >= str)
199 *pt-- = '0';
200 if (val != (u_long)0)
201 return(-1);
202 return(0);
203 }
204
205 #ifndef LONG_OFF_T
206 /*
207 * uqd_oct()
208 * convert an u_quad_t to an octal string. one of many oddball field
209 * termination characters are used by the various versions of tar in the
210 * different fields. term selects which kind to use. str is '0' padded
211 * at the front to len. we are unable to use only one format as many old
212 * tar readers are very cranky about this.
213 * Return:
214 * 0 if the number fit into the string, -1 otherwise
215 */
216
217 static int
218 uqd_oct(u_quad_t val, char *str, int len, int term)
219 {
220 char *pt;
221
222 /*
223 * term selects the appropriate character(s) for the end of the string
224 */
225 pt = str + len - 1;
226 switch (term) {
227 case 3:
228 *pt-- = '\0';
229 break;
230 case 2:
231 *pt-- = ' ';
232 *pt-- = '\0';
233 break;
234 case 1:
235 *pt-- = ' ';
236 break;
237 case 0:
238 default:
239 *pt-- = '\0';
240 *pt-- = ' ';
241 break;
242 }
243
244 /*
245 * convert and blank pad if there is space
246 */
247 while (pt >= str) {
248 *pt-- = '0' + (char)(val & 0x7);
249 if ((val = val >> 3) == 0)
250 break;
251 }
252
253 while (pt >= str)
254 *pt-- = '0';
255 if (val != (u_quad_t)0)
256 return(-1);
257 return(0);
258 }
259 #endif
260
261 /*
262 * tar_chksm()
263 * calculate the checksum for a tar block counting the checksum field as
264 * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks).
265 * NOTE: we use len to short circuit summing 0's on write since we ALWAYS
266 * pad headers with 0.
267 * Return:
268 * unsigned long checksum
269 */
270
271 static u_long
272 tar_chksm(char *blk, int len)
273 {
274 char *stop;
275 char *pt;
276 u_long chksm = BLNKSUM; /* initial value is checksum field sum */
277
278 /*
279 * add the part of the block before the checksum field
280 */
281 pt = blk;
282 stop = blk + CHK_OFFSET;
283 while (pt < stop)
284 chksm += (u_long)(*pt++ & 0xff);
285 /*
286 * move past the checksum field and keep going, spec counts the
287 * checksum field as the sum of 8 blanks (which is pre-computed as
288 * BLNKSUM).
289 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding
290 * starts, no point in summing zero's)
291 */
292 pt += CHK_LEN;
293 stop = blk + len;
294 while (pt < stop)
295 chksm += (u_long)(*pt++ & 0xff);
296 return(chksm);
297 }
298
299 /*
300 * Routines for old BSD style tar (also made portable to sysV tar)
301 */
302
303 /*
304 * tar_id()
305 * determine if a block given to us is a valid tar header (and not a USTAR
306 * header). We have to be on the lookout for those pesky blocks of all
307 * zero's.
308 * Return:
309 * 0 if a tar header, -1 otherwise
310 */
311
312 int
313 tar_id(char *blk, int size)
314 {
315 HD_TAR *hd;
316 HD_USTAR *uhd;
317
318 if (size < BLKMULT)
319 return(-1);
320 hd = (HD_TAR *)blk;
321 uhd = (HD_USTAR *)blk;
322
323 /*
324 * check for block of zero's first, a simple and fast test, then make
325 * sure this is not a ustar header by looking for the ustar magic
326 * cookie. We should use TMAGLEN, but some USTAR archive programs are
327 * wrong and create archives missing the \0. Last we check the
328 * checksum. If this is ok we have to assume it is a valid header.
329 */
330 if (hd->name[0] == '\0')
331 return(-1);
332 if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0)
333 return(-1);
334 if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
335 return(-1);
336 force_one_volume = 1;
337 return(0);
338 }
339
340 /*
341 * tar_opt()
342 * handle tar format specific -o options
343 * Return:
344 * 0 if ok -1 otherwise
345 */
346
347 int
348 tar_opt(void)
349 {
350 OPLIST *opt;
351
352 while ((opt = opt_next()) != NULL) {
353 if (strcmp(opt->name, TAR_OPTION) ||
354 strcmp(opt->value, TAR_NODIR)) {
355 paxwarn(1, "Unknown tar format -o option/value pair %s=%s",
356 opt->name, opt->value);
357 paxwarn(1,"%s=%s is the only supported tar format option",
358 TAR_OPTION, TAR_NODIR);
359 return(-1);
360 }
361
362 /*
363 * we only support one option, and only when writing
364 */
365 if ((act != APPND) && (act != ARCHIVE)) {
366 paxwarn(1, "%s=%s is only supported when writing.",
367 opt->name, opt->value);
368 return(-1);
369 }
370 tar_nodir = 1;
371 }
372 return(0);
373 }
374
375
376 /*
377 * tar_rd()
378 * extract the values out of block already determined to be a tar header.
379 * store the values in the ARCHD parameter.
380 * Return:
381 * 0
382 */
383
384 int
385 tar_rd(ARCHD *arcn, char *buf)
386 {
387 HD_TAR *hd;
388 char *pt;
389
390 /*
391 * we only get proper sized buffers passed to us
392 */
393 if (tar_id(buf, BLKMULT) < 0)
394 return(-1);
395 memset(arcn, 0, sizeof(*arcn));
396 arcn->org_name = arcn->name;
397 arcn->sb.st_nlink = 1;
398
399 /*
400 * copy out the name and values in the stat buffer
401 */
402 hd = (HD_TAR *)buf;
403 if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) {
404 arcn->nlen = expandname(arcn->name, sizeof(arcn->name),
405 &gnu_name_string, hd->name, sizeof(hd->name));
406 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
407 &gnu_link_string, hd->linkname, sizeof(hd->linkname));
408 }
409 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) &
410 0xfff);
411 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
412 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
413 #ifdef LONG_OFF_T
414 arcn->sb.st_size = (off_t)asc_ul(hd->size, sizeof(hd->size), OCT);
415 #else
416 arcn->sb.st_size = (off_t)asc_uqd(hd->size, sizeof(hd->size), OCT);
417 #endif
418 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
419 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
420
421 /*
422 * have to look at the last character, it may be a '/' and that is used
423 * to encode this as a directory
424 */
425 pt = &(arcn->name[arcn->nlen - 1]);
426 arcn->pad = 0;
427 arcn->skip = 0;
428 switch (hd->linkflag) {
429 case SYMTYPE:
430 /*
431 * symbolic link, need to get the link name and set the type in
432 * the st_mode so -v printing will look correct.
433 */
434 arcn->type = PAX_SLK;
435 arcn->sb.st_mode |= S_IFLNK;
436 arcn->ln_nlen = strlcpy(arcn->ln_name, hd->linkname, sizeof(arcn->ln_name));
437 break;
438 case LNKTYPE:
439 /*
440 * hard link, need to get the link name, set the type in the
441 * st_mode and st_nlink so -v printing will look better.
442 */
443 arcn->type = PAX_HLK;
444 arcn->sb.st_nlink = 2;
445 arcn->ln_nlen = strlcpy(arcn->ln_name, hd->linkname, sizeof(arcn->ln_name));
446
447 /*
448 * no idea of what type this thing really points at, but
449 * we set something for printing only.
450 */
451 arcn->sb.st_mode |= S_IFREG;
452 break;
453 case LONGLINKTYPE:
454 case LONGNAMETYPE:
455 /*
456 * GNU long link/file; we tag these here and let the
457 * pax internals deal with it -- too ugly otherwise.
458 */
459 arcn->type =
460 hd->linkflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF;
461 arcn->pad = TAR_PAD(arcn->sb.st_size);
462 arcn->skip = arcn->sb.st_size;
463 break;
464 case DIRTYPE:
465 /*
466 * It is a directory, set the mode for -v printing
467 */
468 arcn->type = PAX_DIR;
469 arcn->sb.st_mode |= S_IFDIR;
470 arcn->sb.st_nlink = 2;
471 break;
472 case AREGTYPE:
473 case REGTYPE:
474 default:
475 /*
476 * If we have a trailing / this is a directory and NOT a file.
477 */
478 arcn->ln_name[0] = '\0';
479 arcn->ln_nlen = 0;
480 if (*pt == '/') {
481 /*
482 * it is a directory, set the mode for -v printing
483 */
484 arcn->type = PAX_DIR;
485 arcn->sb.st_mode |= S_IFDIR;
486 arcn->sb.st_nlink = 2;
487 } else {
488 /*
489 * have a file that will be followed by data. Set the
490 * skip value to the size field and calculate the size
491 * of the padding.
492 */
493 arcn->type = PAX_REG;
494 arcn->sb.st_mode |= S_IFREG;
495 arcn->pad = TAR_PAD(arcn->sb.st_size);
496 arcn->skip = arcn->sb.st_size;
497 }
498 break;
499 }
500
501 /*
502 * strip off any trailing slash.
503 */
504 if (*pt == '/') {
505 *pt = '\0';
506 --arcn->nlen;
507 }
508 return(0);
509 }
510
511 /*
512 * tar_wr()
513 * write a tar header for the file specified in the ARCHD to the archive.
514 * Have to check for file types that cannot be stored and file names that
515 * are too long. Be careful of the term (last arg) to ul_oct, each field
516 * of tar has it own spec for the termination character(s).
517 * ASSUMED: space after header in header block is zero filled
518 * Return:
519 * 0 if file has data to be written after the header, 1 if file has NO
520 * data to write after the header, -1 if archive write failed
521 */
522
523 int
524 tar_wr(ARCHD *arcn)
525 {
526 HD_TAR *hd;
527 int len;
528 HD_TAR hdblk;
529
530 /*
531 * check for those file system types which tar cannot store
532 */
533 switch (arcn->type) {
534 case PAX_DIR:
535 /*
536 * user asked that dirs not be written to the archive
537 */
538 if (tar_nodir)
539 return(1);
540 break;
541 case PAX_CHR:
542 paxwarn(1, "Tar cannot archive a character device %s",
543 arcn->org_name);
544 return(1);
545 case PAX_BLK:
546 paxwarn(1, "Tar cannot archive a block device %s", arcn->org_name);
547 return(1);
548 case PAX_SCK:
549 paxwarn(1, "Tar cannot archive a socket %s", arcn->org_name);
550 return(1);
551 case PAX_FIF:
552 paxwarn(1, "Tar cannot archive a fifo %s", arcn->org_name);
553 return(1);
554 case PAX_SLK:
555 case PAX_HLK:
556 case PAX_HRG:
557 if (arcn->ln_nlen >= sizeof(hd->linkname)) {
558 paxwarn(1, "Link name too long for tar %s",
559 arcn->ln_name);
560 return(1);
561 }
562 break;
563 case PAX_REG:
564 case PAX_CTG:
565 default:
566 break;
567 }
568
569 /*
570 * check file name len, remember extra char for dirs (the / at the end)
571 */
572 len = arcn->nlen;
573 if (arcn->type == PAX_DIR)
574 ++len;
575 if (len >= sizeof(hd->name)) {
576 paxwarn(1, "File name too long for tar %s", arcn->name);
577 return(1);
578 }
579
580 /*
581 * Copy the data out of the ARCHD into the tar header based on the type
582 * of the file. Remember, many tar readers want all fields to be
583 * padded with zero so we zero the header first. We then set the
584 * linkflag field (type), the linkname, the size, and set the padding
585 * (if any) to be added after the file data (0 for all other types,
586 * as they only have a header).
587 */
588 memset(&hdblk, 0, sizeof(hdblk));
589 hd = (HD_TAR *)&hdblk;
590 strlcpy(hd->name, arcn->name, sizeof(hd->name));
591 arcn->pad = 0;
592
593 if (arcn->type == PAX_DIR) {
594 /*
595 * directories are the same as files, except have a filename
596 * that ends with a /, we add the slash here. No data follows
597 * dirs, so no pad.
598 */
599 hd->linkflag = AREGTYPE;
600 hd->name[len-1] = '/';
601 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
602 goto out;
603 } else if (arcn->type == PAX_SLK) {
604 /*
605 * no data follows this file, so no pad
606 */
607 hd->linkflag = SYMTYPE;
608 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
609 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
610 goto out;
611 } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) {
612 /*
613 * no data follows this file, so no pad
614 */
615 hd->linkflag = LNKTYPE;
616 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
617 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
618 goto out;
619 } else {
620 /*
621 * data follows this file, so set the pad
622 */
623 hd->linkflag = AREGTYPE;
624 # ifdef LONG_OFF_T
625 if (ul_oct((u_long)arcn->sb.st_size, hd->size,
626 sizeof(hd->size), 1)) {
627 # else
628 if (uqd_oct((u_quad_t)arcn->sb.st_size, hd->size,
629 sizeof(hd->size), 1)) {
630 # endif
631 paxwarn(1,"File is too large for tar %s", arcn->org_name);
632 return(1);
633 }
634 arcn->pad = TAR_PAD(arcn->sb.st_size);
635 }
636
637 /*
638 * copy those fields that are independent of the type
639 */
640 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) ||
641 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) ||
642 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) ||
643 ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1))
644 goto out;
645
646 /*
647 * calculate and add the checksum, then write the header. A return of
648 * 0 tells the caller to now write the file data, 1 says no data needs
649 * to be written
650 */
651 if (ul_oct(tar_chksm((char *)&hdblk, sizeof(HD_TAR)), hd->chksum,
652 sizeof(hd->chksum), 3))
653 goto out;
654 if (wr_rdbuf((char *)&hdblk, sizeof(HD_TAR)) < 0)
655 return(-1);
656 if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0)
657 return(-1);
658 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
659 return(0);
660 return(1);
661
662 out:
663 /*
664 * header field is out of range
665 */
666 paxwarn(1, "Tar header field is too small for %s", arcn->org_name);
667 return(1);
668 }
669
670 /*
671 * Routines for POSIX ustar
672 */
673
674 /*
675 * ustar_strd()
676 * initialization for ustar read
677 * Return:
678 * 0 if ok, -1 otherwise
679 */
680
681 int
682 ustar_strd(void)
683 {
684 if ((usrtb_start() < 0) || (grptb_start() < 0))
685 return(-1);
686 return(0);
687 }
688
689 /*
690 * ustar_stwr()
691 * initialization for ustar write
692 * Return:
693 * 0 if ok, -1 otherwise
694 */
695
696 int
697 ustar_stwr(void)
698 {
699 if ((uidtb_start() < 0) || (gidtb_start() < 0))
700 return(-1);
701 return(0);
702 }
703
704 /*
705 * ustar_id()
706 * determine if a block given to us is a valid ustar header. We have to
707 * be on the lookout for those pesky blocks of all zero's
708 * Return:
709 * 0 if a ustar header, -1 otherwise
710 */
711
712 int
713 ustar_id(char *blk, int size)
714 {
715 HD_USTAR *hd;
716
717 if (size < BLKMULT)
718 return(-1);
719 hd = (HD_USTAR *)blk;
720
721 /*
722 * check for block of zero's first, a simple and fast test then check
723 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive
724 * programs are fouled up and create archives missing the \0. Last we
725 * check the checksum. If ok we have to assume it is a valid header.
726 */
727 if (hd->name[0] == '\0')
728 return(-1);
729 if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0)
730 return(-1);
731 if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
732 return(-1);
733 return(0);
734 }
735
736 /*
737 * ustar_rd()
738 * extract the values out of block already determined to be a ustar header.
739 * store the values in the ARCHD parameter.
740 * Return:
741 * 0
742 */
743
744 int
745 ustar_rd(ARCHD *arcn, char *buf)
746 {
747 HD_USTAR *hd;
748 char *dest;
749 int cnt = 0;
750 dev_t devmajor;
751 dev_t devminor;
752
753 /*
754 * we only get proper sized buffers
755 */
756 if (ustar_id(buf, BLKMULT) < 0)
757 return(-1);
758 memset(arcn, 0, sizeof(*arcn));
759 arcn->org_name = arcn->name;
760 arcn->sb.st_nlink = 1;
761 hd = (HD_USTAR *)buf;
762
763 /*
764 * see if the filename is split into two parts. if, so joint the parts.
765 * we copy the prefix first and add a / between the prefix and name.
766 */
767 dest = arcn->name;
768 if (*(hd->prefix) != '\0') {
769 cnt = strlcpy(dest, hd->prefix, sizeof(arcn->name) - 1);
770 dest += cnt;
771 *dest++ = '/';
772 cnt++;
773 } else {
774 cnt = 0;
775 }
776
777 if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) {
778 arcn->nlen = cnt + expandname(dest, sizeof(arcn->name) - cnt,
779 &gnu_name_string, hd->name, sizeof(hd->name));
780 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
781 &gnu_link_string, hd->linkname, sizeof(hd->linkname));
782 }
783
784 /*
785 * follow the spec to the letter. we should only have mode bits, strip
786 * off all other crud we may be passed.
787 */
788 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) &
789 0xfff);
790 #ifdef LONG_OFF_T
791 arcn->sb.st_size = (off_t)asc_ul(hd->size, sizeof(hd->size), OCT);
792 #else
793 arcn->sb.st_size = (off_t)asc_uqd(hd->size, sizeof(hd->size), OCT);
794 #endif
795 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
796 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
797
798 /*
799 * If we can find the ascii names for gname and uname in the password
800 * and group files we will use the uid's and gid they bind. Otherwise
801 * we use the uid and gid values stored in the header. (This is what
802 * the POSIX spec wants).
803 */
804 hd->gname[sizeof(hd->gname) - 1] = '\0';
805 if (gid_name(hd->gname, &(arcn->sb.st_gid)) < 0)
806 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
807 hd->uname[sizeof(hd->uname) - 1] = '\0';
808 if (uid_name(hd->uname, &(arcn->sb.st_uid)) < 0)
809 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
810
811 /*
812 * set the defaults, these may be changed depending on the file type
813 */
814 arcn->pad = 0;
815 arcn->skip = 0;
816 arcn->sb.st_rdev = (dev_t)0;
817
818 /*
819 * set the mode and PAX type according to the typeflag in the header
820 */
821 switch (hd->typeflag) {
822 case FIFOTYPE:
823 arcn->type = PAX_FIF;
824 arcn->sb.st_mode |= S_IFIFO;
825 break;
826 case DIRTYPE:
827 arcn->type = PAX_DIR;
828 arcn->sb.st_mode |= S_IFDIR;
829 arcn->sb.st_nlink = 2;
830
831 /*
832 * Some programs that create ustar archives append a '/'
833 * to the pathname for directories. This clearly violates
834 * ustar specs, but we will silently strip it off anyway.
835 */
836 if (arcn->name[arcn->nlen - 1] == '/')
837 arcn->name[--arcn->nlen] = '\0';
838 break;
839 case BLKTYPE:
840 case CHRTYPE:
841 /*
842 * this type requires the rdev field to be set.
843 */
844 if (hd->typeflag == BLKTYPE) {
845 arcn->type = PAX_BLK;
846 arcn->sb.st_mode |= S_IFBLK;
847 } else {
848 arcn->type = PAX_CHR;
849 arcn->sb.st_mode |= S_IFCHR;
850 }
851 devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT);
852 devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT);
853 arcn->sb.st_rdev = TODEV(devmajor, devminor);
854 break;
855 case SYMTYPE:
856 case LNKTYPE:
857 if (hd->typeflag == SYMTYPE) {
858 arcn->type = PAX_SLK;
859 arcn->sb.st_mode |= S_IFLNK;
860 } else {
861 arcn->type = PAX_HLK;
862 /*
863 * so printing looks better
864 */
865 arcn->sb.st_mode |= S_IFREG;
866 arcn->sb.st_nlink = 2;
867 }
868 break;
869 case LONGLINKTYPE:
870 case LONGNAMETYPE:
871 /*
872 * GNU long link/file; we tag these here and let the
873 * pax internals deal with it -- too ugly otherwise.
874 */
875 arcn->type =
876 hd->typeflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF;
877 arcn->pad = TAR_PAD(arcn->sb.st_size);
878 arcn->skip = arcn->sb.st_size;
879 break;
880 case CONTTYPE:
881 case AREGTYPE:
882 case REGTYPE:
883 default:
884 /*
885 * these types have file data that follows. Set the skip and
886 * pad fields.
887 */
888 arcn->type = PAX_REG;
889 arcn->pad = TAR_PAD(arcn->sb.st_size);
890 arcn->skip = arcn->sb.st_size;
891 arcn->sb.st_mode |= S_IFREG;
892 break;
893 }
894 return(0);
895 }
896
897 /*
898 * ustar_wr()
899 * write a ustar header for the file specified in the ARCHD to the archive
900 * Have to check for file types that cannot be stored and file names that
901 * are too long. Be careful of the term (last arg) to ul_oct, we only use
902 * '\0' for the termination character (this is different than picky tar)
903 * ASSUMED: space after header in header block is zero filled
904 * Return:
905 * 0 if file has data to be written after the header, 1 if file has NO
906 * data to write after the header, -1 if archive write failed
907 */
908
909 int
910 ustar_wr(ARCHD *arcn)
911 {
912 HD_USTAR *hd;
913 char *pt;
914 char hdblk[sizeof(HD_USTAR)];
915 mode_t mode12only;
916 int term_char=3; /* orignal setting */
917 term_char=1; /* To pass conformance tests 274, 301 */
918
919 /*
920 * check for those file system types ustar cannot store
921 */
922 if (arcn->type == PAX_SCK) {
923 paxwarn(1, "Ustar cannot archive a socket %s", arcn->org_name);
924 return(1);
925 }
926
927 /*
928 * check the length of the linkname
929 */
930 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
931 (arcn->type == PAX_HRG)) && (arcn->ln_nlen > sizeof(hd->linkname))){
932 paxwarn(1, "Link name too long for ustar %s", arcn->ln_name);
933 /*
934 * Conformance: test pax:285 wants error code to be non-zero, and
935 * test tar:12 wants error code from pax to be 0
936 */
937 return(1);
938 }
939
940 /*
941 * split the path name into prefix and name fields (if needed). if
942 * pt != arcn->name, the name has to be split
943 */
944 if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) {
945 paxwarn(1, "File name too long for ustar %s", arcn->name);
946 return(1);
947 }
948
949 /*
950 * zero out the header so we don't have to worry about zero fill below
951 */
952 memset(hdblk, 0, sizeof(hdblk));
953 hd = (HD_USTAR *)hdblk;
954 arcn->pad = 0L;
955
956 /* To pass conformance tests 274/301, always set these fields to "zero" */
957 ul_oct(0, hd->devmajor, sizeof(hd->devmajor), term_char);
958 ul_oct(0, hd->devminor, sizeof(hd->devminor), term_char);
959
960 /*
961 * split the name, or zero out the prefix
962 */
963 if (pt != arcn->name) {
964 /*
965 * name was split, pt points at the / where the split is to
966 * occur, we remove the / and copy the first part to the prefix
967 */
968 *pt = '\0';
969 strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix));
970 *pt++ = '/';
971 }
972
973 /*
974 * copy the name part. this may be the whole path or the part after
975 * the prefix. both the name and prefix may fill the entire field.
976 */
977 if (strlen(pt) == sizeof(hd->name)) { /* must account for name just fits in buffer */
978 strncpy(hd->name, pt, sizeof(hd->name));
979 } else {
980 strlcpy(hd->name, pt, sizeof(hd->name));
981 }
982
983 /*
984 * set the fields in the header that are type dependent
985 */
986 switch (arcn->type) {
987 case PAX_DIR:
988 hd->typeflag = DIRTYPE;
989 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), term_char))
990 goto out;
991 break;
992 case PAX_CHR:
993 case PAX_BLK:
994 if (arcn->type == PAX_CHR)
995 hd->typeflag = CHRTYPE;
996 else
997 hd->typeflag = BLKTYPE;
998 if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor,
999 sizeof(hd->devmajor), term_char) ||
1000 ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor,
1001 sizeof(hd->devminor), term_char) ||
1002 ul_oct((u_long)0L, hd->size, sizeof(hd->size), term_char))
1003 goto out;
1004 break;
1005 case PAX_FIF:
1006 hd->typeflag = FIFOTYPE;
1007 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), term_char))
1008 goto out;
1009 break;
1010 case PAX_SLK:
1011 case PAX_HLK:
1012 case PAX_HRG:
1013 if (arcn->type == PAX_SLK)
1014 hd->typeflag = SYMTYPE;
1015 else
1016 hd->typeflag = LNKTYPE;
1017 if (strlen(arcn->ln_name) == sizeof(hd->linkname)) { /* must account for name just fits in buffer */
1018 strncpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
1019 } else {
1020 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
1021 }
1022 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), term_char))
1023 goto out;
1024 break;
1025 case PAX_REG:
1026 case PAX_CTG:
1027 default:
1028 /*
1029 * file data with this type, set the padding
1030 */
1031 if (arcn->type == PAX_CTG)
1032 hd->typeflag = CONTTYPE;
1033 else
1034 hd->typeflag = REGTYPE;
1035 arcn->pad = TAR_PAD(arcn->sb.st_size);
1036 # ifdef LONG_OFF_T
1037 if (ul_oct((u_long)arcn->sb.st_size, hd->size,
1038 sizeof(hd->size), term_char)) {
1039 # else
1040 if (uqd_oct((u_quad_t)arcn->sb.st_size, hd->size,
1041 sizeof(hd->size), term_char)) {
1042 # endif
1043 paxwarn(1,"File is too long for ustar %s",arcn->org_name);
1044 return(1);
1045 }
1046 break;
1047 }
1048
1049 strncpy(hd->magic, TMAGIC, TMAGLEN);
1050 strncpy(hd->version, TVERSION, TVERSLEN);
1051
1052 /*
1053 * set the remaining fields. Some versions want all 16 bits of mode
1054 * we better humor them (they really do not meet spec though)....
1055 */
1056 if (ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), term_char)) {
1057 if (uid_nobody == 0) {
1058 if (uid_name("nobody", &uid_nobody) == -1)
1059 goto out;
1060 }
1061 if (uid_warn != arcn->sb.st_uid) {
1062 uid_warn = arcn->sb.st_uid;
1063 paxwarn(1,
1064 "Ustar header field is too small for uid %lu, "
1065 "using nobody", (u_long)arcn->sb.st_uid);
1066 }
1067 if (ul_oct((u_long)uid_nobody, hd->uid, sizeof(hd->uid), term_char))
1068 goto out;
1069 }
1070 if (ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), term_char)) {
1071 if (gid_nobody == 0) {
1072 if (gid_name("nobody", &gid_nobody) == -1)
1073 goto out;
1074 }
1075 if (gid_warn != arcn->sb.st_gid) {
1076 gid_warn = arcn->sb.st_gid;
1077 paxwarn(1,
1078 "Ustar header field is too small for gid %lu, "
1079 "using nobody", (u_long)arcn->sb.st_gid);
1080 }
1081 if (ul_oct((u_long)gid_nobody, hd->gid, sizeof(hd->gid), term_char))
1082 goto out;
1083 }
1084 /* However, Unix conformance tests do not like MORE than 12 mode bits:
1085 remove all beyond (see definition of stat.st_mode structure) */
1086 mode12only = ((u_long)arcn->sb.st_mode) & 0x00000fff;
1087 if (ul_oct((u_long)mode12only, hd->mode, sizeof(hd->mode), term_char) ||
1088 ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),term_char))
1089 goto out;
1090 strncpy(hd->uname, name_uid(arcn->sb.st_uid, 0), sizeof(hd->uname));
1091 strncpy(hd->gname, name_gid(arcn->sb.st_gid, 0), sizeof(hd->gname));
1092
1093 /*
1094 * calculate and store the checksum write the header to the archive
1095 * return 0 tells the caller to now write the file data, 1 says no data
1096 * needs to be written
1097 */
1098 if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
1099 sizeof(hd->chksum), term_char))
1100 goto out;
1101 if (wr_rdbuf((char *)&hdblk, sizeof(HD_USTAR)) < 0)
1102 return(-1);
1103 if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0)
1104 return(-1);
1105 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
1106 return(0);
1107 return(1);
1108
1109 out:
1110 /*
1111 * header field is out of range
1112 */
1113 paxwarn(1, "Ustar header field is too small for %s", arcn->org_name);
1114 return(1);
1115 }
1116
1117 /*
1118 * name_split()
1119 * see if the name has to be split for storage in a ustar header. We try
1120 * to fit the entire name in the name field without splitting if we can.
1121 * The split point is always at a /
1122 * Return
1123 * character pointer to split point (always the / that is to be removed
1124 * if the split is not needed, the points is set to the start of the file
1125 * name (it would violate the spec to split there). A NULL is returned if
1126 * the file name is too long
1127 */
1128
1129 static char *
1130 name_split(char *name, int len)
1131 {
1132 char *start;
1133
1134 /*
1135 * check to see if the file name is small enough to fit in the name
1136 * field. if so just return a pointer to the name.
1137 * The strings can fill the complete name and prefix fields
1138 * without a NUL terminator.
1139 */
1140 if (len <= TNMSZ)
1141 return(name);
1142 if (len > (TPFSZ + TNMSZ + 1))
1143 return(NULL);
1144
1145 /*
1146 * we start looking at the biggest sized piece that fits in the name
1147 * field. We walk forward looking for a slash to split at. The idea is
1148 * to find the biggest piece to fit in the name field (or the smallest
1149 * prefix we can find) (the -1 is correct the biggest piece would
1150 * include the slash between the two parts that gets thrown away)
1151 */
1152 start = name + len - TNMSZ - 1;
1153 if ((*start == '/') && (start == name))
1154 ++start; /* 101 byte paths with leading '/' are dinged otherwise */
1155 while ((*start != '\0') && (*start != '/'))
1156 ++start;
1157
1158 /*
1159 * if we hit the end of the string, this name cannot be split, so we
1160 * cannot store this file.
1161 */
1162 if (*start == '\0')
1163 return(NULL);
1164 len = start - name;
1165
1166 /*
1167 * NOTE: /str where the length of str == TNMSZ can not be stored under
1168 * the p1003.1-1990 spec for ustar. We could force a prefix of / and
1169 * the file would then expand on extract to //str. The len == 0 below
1170 * makes this special case follow the spec to the letter.
1171 */
1172 if ((len > TPFSZ) || (len == 0))
1173 return(NULL);
1174
1175 /*
1176 * ok have a split point, return it to the caller
1177 */
1178 return(start);
1179 }
1180
1181 static size_t
1182 expandname(char *buf, size_t len, char **gnu_name, const char *name,
1183 size_t name_len)
1184 {
1185 size_t nlen;
1186
1187 if (*gnu_name) {
1188 /* *gnu_name is NUL terminated */
1189 if ((nlen = strlcpy(buf, *gnu_name, len)) >= len)
1190 nlen = len - 1;
1191 free(*gnu_name);
1192 *gnu_name = NULL;
1193 } else {
1194 if (name_len < len) {
1195 /* name may not be null terminated: it might be as big as the
1196 field, so copy is limited to the max size of the header field */
1197 if ((nlen = strlcpy(buf, name, name_len+1)) >= name_len+1)
1198 nlen = name_len;
1199 } else {
1200 if ((nlen = strlcpy(buf, name, len)) >= len)
1201 nlen = len - 1;
1202 }
1203 }
1204 return(nlen);
1205 }