]> git.saurik.com Git - apple/file_cmds.git/blob - pax/tar.c
file_cmds-185.2.tar.gz
[apple/file_cmds.git] / pax / tar.c
1 /* $OpenBSD: tar.c,v 1.34 2004/10/23 19:34:14 otto Exp $ */
2 /* $NetBSD: tar.c,v 1.5 1995/03/21 09:07:49 cgd Exp $ */
3
4 /*-
5 * Copyright (c) 1992 Keith Muller.
6 * Copyright (c) 1992, 1993
7 * The Regents of the University of California. All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * Keith Muller of the University of California, San Diego.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37 #ifndef lint
38 #if 0
39 static const char sccsid[] = "@(#)tar.c 8.2 (Berkeley) 4/18/94";
40 #else
41 static const char rcsid[] __attribute__((__unused__)) = "$OpenBSD: tar.c,v 1.34 2004/10/23 19:34:14 otto Exp $";
42 #endif
43 #endif /* not lint */
44
45 #include <sys/types.h>
46 #include <sys/time.h>
47 #include <sys/stat.h>
48 #include <sys/param.h>
49 #include <string.h>
50 #include <stdio.h>
51 #include <unistd.h>
52 #include <stdlib.h>
53 #include "pax.h"
54 #include "extern.h"
55 #include "tar.h"
56
57 /*
58 * Routines for reading, writing and header identify of various versions of tar
59 */
60
61 static size_t expandname(char *, size_t, char **, const char *, size_t);
62 static u_long tar_chksm(char *, int);
63 char *name_split(char *, int);
64 static int ul_oct(u_long, char *, int, int);
65 #ifndef LONG_OFF_T
66 static int uqd_oct(u_quad_t, char *, int, int);
67 #endif
68
69 static uid_t uid_nobody;
70 static uid_t uid_warn;
71 static gid_t gid_nobody;
72 static gid_t gid_warn;
73
74 /*
75 * Routines common to all versions of tar
76 */
77
78 static int tar_nodir; /* do not write dirs under old tar */
79 char *gnu_name_string; /* GNU ././@LongLink hackery name */
80 char *gnu_link_string; /* GNU ././@LongLink hackery link */
81
82 /*
83 * tar_endwr()
84 * add the tar trailer of two null blocks
85 * Return:
86 * 0 if ok, -1 otherwise (what wr_skip returns)
87 */
88
89 int
90 tar_endwr(void)
91 {
92 return(wr_skip((off_t)(NULLCNT*BLKMULT)));
93 }
94
95 /*
96 * tar_endrd()
97 * no cleanup needed here, just return size of trailer (for append)
98 * Return:
99 * size of trailer (2 * BLKMULT)
100 */
101
102 off_t
103 tar_endrd(void)
104 {
105 return((off_t)(NULLCNT*BLKMULT));
106 }
107
108 /*
109 * tar_trail()
110 * Called to determine if a header block is a valid trailer. We are passed
111 * the block, the in_sync flag (which tells us we are in resync mode;
112 * looking for a valid header), and cnt (which starts at zero) which is
113 * used to count the number of empty blocks we have seen so far.
114 * Return:
115 * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block
116 * could never contain a header.
117 */
118
119 int
120 tar_trail(ARCHD *ignore, char *buf, int in_resync, int *cnt)
121 {
122 int i;
123
124 /*
125 * look for all zero, trailer is two consecutive blocks of zero
126 */
127 for (i = 0; i < BLKMULT; ++i) {
128 if (buf[i] != '\0')
129 break;
130 }
131
132 /*
133 * if not all zero it is not a trailer, but MIGHT be a header.
134 */
135 if (i != BLKMULT)
136 return(-1);
137
138 /*
139 * When given a zero block, we must be careful!
140 * If we are not in resync mode, check for the trailer. Have to watch
141 * out that we do not mis-identify file data as the trailer, so we do
142 * NOT try to id a trailer during resync mode. During resync mode we
143 * might as well throw this block out since a valid header can NEVER be
144 * a block of all 0 (we must have a valid file name).
145 */
146 if (!in_resync && (++*cnt >= NULLCNT))
147 return(0);
148 return(1);
149 }
150
151 /*
152 * ul_oct()
153 * convert an unsigned long to an octal string. many oddball field
154 * termination characters are used by the various versions of tar in the
155 * different fields. term selects which kind to use. str is '0' padded
156 * at the front to len. we are unable to use only one format as many old
157 * tar readers are very cranky about this.
158 * Return:
159 * 0 if the number fit into the string, -1 otherwise
160 */
161
162 static int
163 ul_oct(u_long val, char *str, int len, int term)
164 {
165 char *pt;
166
167 /*
168 * term selects the appropriate character(s) for the end of the string
169 */
170 pt = str + len - 1;
171 switch (term) {
172 case 3:
173 *pt-- = '\0';
174 break;
175 case 2:
176 *pt-- = ' ';
177 *pt-- = '\0';
178 break;
179 case 1:
180 *pt-- = ' ';
181 break;
182 case 0:
183 default:
184 *pt-- = '\0';
185 *pt-- = ' ';
186 break;
187 }
188
189 /*
190 * convert and blank pad if there is space
191 */
192 while (pt >= str) {
193 *pt-- = '0' + (char)(val & 0x7);
194 if ((val = val >> 3) == (u_long)0)
195 break;
196 }
197
198 while (pt >= str)
199 *pt-- = '0';
200 if (val != (u_long)0)
201 return(-1);
202 return(0);
203 }
204
205 #ifndef LONG_OFF_T
206 /*
207 * uqd_oct()
208 * convert an u_quad_t to an octal string. one of many oddball field
209 * termination characters are used by the various versions of tar in the
210 * different fields. term selects which kind to use. str is '0' padded
211 * at the front to len. we are unable to use only one format as many old
212 * tar readers are very cranky about this.
213 * Return:
214 * 0 if the number fit into the string, -1 otherwise
215 */
216
217 static int
218 uqd_oct(u_quad_t val, char *str, int len, int term)
219 {
220 char *pt;
221
222 /*
223 * term selects the appropriate character(s) for the end of the string
224 */
225 pt = str + len - 1;
226 switch (term) {
227 case 3:
228 *pt-- = '\0';
229 break;
230 case 2:
231 *pt-- = ' ';
232 *pt-- = '\0';
233 break;
234 case 1:
235 *pt-- = ' ';
236 break;
237 case 0:
238 default:
239 *pt-- = '\0';
240 *pt-- = ' ';
241 break;
242 }
243
244 /*
245 * convert and blank pad if there is space
246 */
247 while (pt >= str) {
248 *pt-- = '0' + (char)(val & 0x7);
249 if ((val = val >> 3) == 0)
250 break;
251 }
252
253 while (pt >= str)
254 *pt-- = '0';
255 if (val != (u_quad_t)0)
256 return(-1);
257 return(0);
258 }
259 #endif
260
261 /*
262 * tar_chksm()
263 * calculate the checksum for a tar block counting the checksum field as
264 * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks).
265 * NOTE: we use len to short circuit summing 0's on write since we ALWAYS
266 * pad headers with 0.
267 * Return:
268 * unsigned long checksum
269 */
270
271 static u_long
272 tar_chksm(char *blk, int len)
273 {
274 char *stop;
275 char *pt;
276 u_long chksm = BLNKSUM; /* initial value is checksum field sum */
277
278 /*
279 * add the part of the block before the checksum field
280 */
281 pt = blk;
282 stop = blk + CHK_OFFSET;
283 while (pt < stop)
284 chksm += (u_long)(*pt++ & 0xff);
285 /*
286 * move past the checksum field and keep going, spec counts the
287 * checksum field as the sum of 8 blanks (which is pre-computed as
288 * BLNKSUM).
289 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding
290 * starts, no point in summing zero's)
291 */
292 pt += CHK_LEN;
293 stop = blk + len;
294 while (pt < stop)
295 chksm += (u_long)(*pt++ & 0xff);
296 return(chksm);
297 }
298
299 /*
300 * Routines for old BSD style tar (also made portable to sysV tar)
301 */
302
303 /*
304 * tar_id()
305 * determine if a block given to us is a valid tar header (and not a USTAR
306 * header). We have to be on the lookout for those pesky blocks of all
307 * zero's.
308 * Return:
309 * 0 if a tar header, -1 otherwise
310 */
311
312 int
313 tar_id(char *blk, int size)
314 {
315 HD_TAR *hd;
316 HD_USTAR *uhd;
317
318 if (size < BLKMULT)
319 return(-1);
320 hd = (HD_TAR *)blk;
321 uhd = (HD_USTAR *)blk;
322
323 /*
324 * check for block of zero's first, a simple and fast test, then make
325 * sure this is not a ustar header by looking for the ustar magic
326 * cookie. We should use TMAGLEN, but some USTAR archive programs are
327 * wrong and create archives missing the \0. Last we check the
328 * checksum. If this is ok we have to assume it is a valid header.
329 */
330 if (hd->name[0] == '\0')
331 return(-1);
332 if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0)
333 return(-1);
334 if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
335 return(-1);
336 force_one_volume = 1;
337 return(0);
338 }
339
340 /*
341 * tar_opt()
342 * handle tar format specific -o options
343 * Return:
344 * 0 if ok -1 otherwise
345 */
346
347 int
348 tar_opt(void)
349 {
350 OPLIST *opt;
351
352 while ((opt = opt_next()) != NULL) {
353 if (strcmp(opt->name, TAR_OPTION) ||
354 strcmp(opt->value, TAR_NODIR)) {
355 paxwarn(1, "Unknown tar format -o option/value pair %s=%s",
356 opt->name, opt->value);
357 paxwarn(1,"%s=%s is the only supported tar format option",
358 TAR_OPTION, TAR_NODIR);
359 return(-1);
360 }
361
362 /*
363 * we only support one option, and only when writing
364 */
365 if ((act != APPND) && (act != ARCHIVE)) {
366 paxwarn(1, "%s=%s is only supported when writing.",
367 opt->name, opt->value);
368 return(-1);
369 }
370 tar_nodir = 1;
371 }
372 return(0);
373 }
374
375
376 /*
377 * tar_rd()
378 * extract the values out of block already determined to be a tar header.
379 * store the values in the ARCHD parameter.
380 * Return:
381 * 0
382 */
383
384 int
385 tar_rd(ARCHD *arcn, char *buf)
386 {
387 HD_TAR *hd;
388 char *pt;
389
390 /*
391 * we only get proper sized buffers passed to us
392 */
393 if (tar_id(buf, BLKMULT) < 0)
394 return(-1);
395 memset(arcn, 0, sizeof(*arcn));
396 arcn->org_name = arcn->name;
397 arcn->sb.st_nlink = 1;
398
399 /*
400 * copy out the name and values in the stat buffer
401 */
402 hd = (HD_TAR *)buf;
403 if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) {
404 arcn->nlen = expandname(arcn->name, sizeof(arcn->name),
405 &gnu_name_string, hd->name, sizeof(hd->name));
406 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
407 &gnu_link_string, hd->linkname, sizeof(hd->linkname));
408 }
409 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) &
410 0xfff);
411 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
412 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
413 #ifdef LONG_OFF_T
414 arcn->sb.st_size = (off_t)asc_ul(hd->size, sizeof(hd->size), OCT);
415 #else
416 arcn->sb.st_size = (off_t)asc_uqd(hd->size, sizeof(hd->size), OCT);
417 #endif
418 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
419 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
420
421 /*
422 * have to look at the last character, it may be a '/' and that is used
423 * to encode this as a directory
424 */
425 pt = &(arcn->name[arcn->nlen - 1]);
426 arcn->pad = 0;
427 arcn->skip = 0;
428 switch (hd->linkflag) {
429 case SYMTYPE:
430 /*
431 * symbolic link, need to get the link name and set the type in
432 * the st_mode so -v printing will look correct.
433 */
434 arcn->type = PAX_SLK;
435 arcn->sb.st_mode |= S_IFLNK;
436 break;
437 case LNKTYPE:
438 /*
439 * hard link, need to get the link name, set the type in the
440 * st_mode and st_nlink so -v printing will look better.
441 */
442 arcn->type = PAX_HLK;
443 arcn->sb.st_nlink = 2;
444
445 /*
446 * no idea of what type this thing really points at, but
447 * we set something for printing only.
448 */
449 arcn->sb.st_mode |= S_IFREG;
450 break;
451 case LONGLINKTYPE:
452 case LONGNAMETYPE:
453 /*
454 * GNU long link/file; we tag these here and let the
455 * pax internals deal with it -- too ugly otherwise.
456 */
457 arcn->type =
458 hd->linkflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF;
459 arcn->pad = TAR_PAD(arcn->sb.st_size);
460 arcn->skip = arcn->sb.st_size;
461 break;
462 case DIRTYPE:
463 /*
464 * It is a directory, set the mode for -v printing
465 */
466 arcn->type = PAX_DIR;
467 arcn->sb.st_mode |= S_IFDIR;
468 arcn->sb.st_nlink = 2;
469 break;
470 case AREGTYPE:
471 case REGTYPE:
472 default:
473 /*
474 * If we have a trailing / this is a directory and NOT a file.
475 */
476 arcn->ln_name[0] = '\0';
477 arcn->ln_nlen = 0;
478 if (*pt == '/') {
479 /*
480 * it is a directory, set the mode for -v printing
481 */
482 arcn->type = PAX_DIR;
483 arcn->sb.st_mode |= S_IFDIR;
484 arcn->sb.st_nlink = 2;
485 } else {
486 /*
487 * have a file that will be followed by data. Set the
488 * skip value to the size field and calculate the size
489 * of the padding.
490 */
491 arcn->type = PAX_REG;
492 arcn->sb.st_mode |= S_IFREG;
493 arcn->pad = TAR_PAD(arcn->sb.st_size);
494 arcn->skip = arcn->sb.st_size;
495 }
496 break;
497 }
498
499 /*
500 * strip off any trailing slash.
501 */
502 if (*pt == '/') {
503 *pt = '\0';
504 --arcn->nlen;
505 }
506 return(0);
507 }
508
509 /*
510 * tar_wr()
511 * write a tar header for the file specified in the ARCHD to the archive.
512 * Have to check for file types that cannot be stored and file names that
513 * are too long. Be careful of the term (last arg) to ul_oct, each field
514 * of tar has it own spec for the termination character(s).
515 * ASSUMED: space after header in header block is zero filled
516 * Return:
517 * 0 if file has data to be written after the header, 1 if file has NO
518 * data to write after the header, -1 if archive write failed
519 */
520
521 int
522 tar_wr(ARCHD *arcn)
523 {
524 HD_TAR *hd;
525 int len;
526 char hdblk[sizeof(HD_TAR)];
527
528 /*
529 * check for those file system types which tar cannot store
530 */
531 switch (arcn->type) {
532 case PAX_DIR:
533 /*
534 * user asked that dirs not be written to the archive
535 */
536 if (tar_nodir)
537 return(1);
538 break;
539 case PAX_CHR:
540 paxwarn(1, "Tar cannot archive a character device %s",
541 arcn->org_name);
542 return(1);
543 case PAX_BLK:
544 paxwarn(1, "Tar cannot archive a block device %s", arcn->org_name);
545 return(1);
546 case PAX_SCK:
547 paxwarn(1, "Tar cannot archive a socket %s", arcn->org_name);
548 return(1);
549 case PAX_FIF:
550 paxwarn(1, "Tar cannot archive a fifo %s", arcn->org_name);
551 return(1);
552 case PAX_SLK:
553 case PAX_HLK:
554 case PAX_HRG:
555 if (arcn->ln_nlen > sizeof(hd->linkname)) {
556 paxwarn(1,"Link name too long for tar %s", arcn->ln_name);
557 return(1);
558 }
559 break;
560 case PAX_REG:
561 case PAX_CTG:
562 default:
563 break;
564 }
565
566 /*
567 * check file name len, remember extra char for dirs (the / at the end)
568 */
569 len = arcn->nlen;
570 if (arcn->type == PAX_DIR)
571 ++len;
572 if (len >= sizeof(hd->name)) {
573 paxwarn(1, "File name too long for tar %s", arcn->name);
574 return(1);
575 }
576
577 /*
578 * Copy the data out of the ARCHD into the tar header based on the type
579 * of the file. Remember, many tar readers want all fields to be
580 * padded with zero so we zero the header first. We then set the
581 * linkflag field (type), the linkname, the size, and set the padding
582 * (if any) to be added after the file data (0 for all other types,
583 * as they only have a header).
584 */
585 memset(hdblk, 0, sizeof(hdblk));
586 hd = (HD_TAR *)hdblk;
587 strlcpy(hd->name, arcn->name, sizeof(hd->name));
588 arcn->pad = 0;
589
590 if (arcn->type == PAX_DIR) {
591 /*
592 * directories are the same as files, except have a filename
593 * that ends with a /, we add the slash here. No data follows
594 * dirs, so no pad.
595 */
596 hd->linkflag = AREGTYPE;
597 hd->name[len-1] = '/';
598 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
599 goto out;
600 } else if (arcn->type == PAX_SLK) {
601 /*
602 * no data follows this file, so no pad
603 */
604 hd->linkflag = SYMTYPE;
605 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
606 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
607 goto out;
608 } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) {
609 /*
610 * no data follows this file, so no pad
611 */
612 hd->linkflag = LNKTYPE;
613 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
614 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
615 goto out;
616 } else {
617 /*
618 * data follows this file, so set the pad
619 */
620 hd->linkflag = AREGTYPE;
621 # ifdef LONG_OFF_T
622 if (ul_oct((u_long)arcn->sb.st_size, hd->size,
623 sizeof(hd->size), 1)) {
624 # else
625 if (uqd_oct((u_quad_t)arcn->sb.st_size, hd->size,
626 sizeof(hd->size), 1)) {
627 # endif
628 paxwarn(1,"File is too large for tar %s", arcn->org_name);
629 return(1);
630 }
631 arcn->pad = TAR_PAD(arcn->sb.st_size);
632 }
633
634 /*
635 * copy those fields that are independent of the type
636 */
637 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) ||
638 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) ||
639 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) ||
640 ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1))
641 goto out;
642
643 /*
644 * calculate and add the checksum, then write the header. A return of
645 * 0 tells the caller to now write the file data, 1 says no data needs
646 * to be written
647 */
648 if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum,
649 sizeof(hd->chksum), 3))
650 goto out;
651 if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0)
652 return(-1);
653 if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0)
654 return(-1);
655 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
656 return(0);
657 return(1);
658
659 out:
660 /*
661 * header field is out of range
662 */
663 paxwarn(1, "Tar header field is too small for %s", arcn->org_name);
664 return(1);
665 }
666
667 /*
668 * Routines for POSIX ustar
669 */
670
671 /*
672 * ustar_strd()
673 * initialization for ustar read
674 * Return:
675 * 0 if ok, -1 otherwise
676 */
677
678 int
679 ustar_strd(void)
680 {
681 if ((usrtb_start() < 0) || (grptb_start() < 0))
682 return(-1);
683 return(0);
684 }
685
686 /*
687 * ustar_stwr()
688 * initialization for ustar write
689 * Return:
690 * 0 if ok, -1 otherwise
691 */
692
693 int
694 ustar_stwr(void)
695 {
696 if ((uidtb_start() < 0) || (gidtb_start() < 0))
697 return(-1);
698 return(0);
699 }
700
701 /*
702 * ustar_id()
703 * determine if a block given to us is a valid ustar header. We have to
704 * be on the lookout for those pesky blocks of all zero's
705 * Return:
706 * 0 if a ustar header, -1 otherwise
707 */
708
709 int
710 ustar_id(char *blk, int size)
711 {
712 HD_USTAR *hd;
713
714 if (size < BLKMULT)
715 return(-1);
716 hd = (HD_USTAR *)blk;
717
718 /*
719 * check for block of zero's first, a simple and fast test then check
720 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive
721 * programs are fouled up and create archives missing the \0. Last we
722 * check the checksum. If ok we have to assume it is a valid header.
723 */
724 if (hd->name[0] == '\0')
725 return(-1);
726 if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0)
727 return(-1);
728 if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
729 return(-1);
730 return(0);
731 }
732
733 /*
734 * ustar_rd()
735 * extract the values out of block already determined to be a ustar header.
736 * store the values in the ARCHD parameter.
737 * Return:
738 * 0
739 */
740
741 int
742 ustar_rd(ARCHD *arcn, char *buf)
743 {
744 HD_USTAR *hd;
745 char *dest;
746 int cnt = 0;
747 dev_t devmajor;
748 dev_t devminor;
749
750 /*
751 * we only get proper sized buffers
752 */
753 if (ustar_id(buf, BLKMULT) < 0)
754 return(-1);
755 memset(arcn, 0, sizeof(*arcn));
756 arcn->org_name = arcn->name;
757 arcn->sb.st_nlink = 1;
758 hd = (HD_USTAR *)buf;
759
760 /*
761 * see if the filename is split into two parts. if, so joint the parts.
762 * we copy the prefix first and add a / between the prefix and name.
763 */
764 dest = arcn->name;
765 if (*(hd->prefix) != '\0') {
766 cnt = strlcpy(dest, hd->prefix, sizeof(arcn->name) - 1);
767 dest += cnt;
768 *dest++ = '/';
769 cnt++;
770 } else {
771 cnt = 0;
772 }
773
774 if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) {
775 arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt,
776 &gnu_name_string, hd->name, sizeof(hd->name));
777 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
778 &gnu_link_string, hd->linkname, sizeof(hd->linkname));
779 }
780
781 /*
782 * follow the spec to the letter. we should only have mode bits, strip
783 * off all other crud we may be passed.
784 */
785 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) &
786 0xfff);
787 #ifdef LONG_OFF_T
788 arcn->sb.st_size = (off_t)asc_ul(hd->size, sizeof(hd->size), OCT);
789 #else
790 arcn->sb.st_size = (off_t)asc_uqd(hd->size, sizeof(hd->size), OCT);
791 #endif
792 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
793 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
794
795 /*
796 * If we can find the ascii names for gname and uname in the password
797 * and group files we will use the uid's and gid they bind. Otherwise
798 * we use the uid and gid values stored in the header. (This is what
799 * the posix spec wants).
800 */
801 hd->gname[sizeof(hd->gname) - 1] = '\0';
802 if (gid_name(hd->gname, &(arcn->sb.st_gid)) < 0)
803 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
804 hd->uname[sizeof(hd->uname) - 1] = '\0';
805 if (uid_name(hd->uname, &(arcn->sb.st_uid)) < 0)
806 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
807
808 /*
809 * set the defaults, these may be changed depending on the file type
810 */
811 arcn->pad = 0;
812 arcn->skip = 0;
813 arcn->sb.st_rdev = (dev_t)0;
814
815 /*
816 * set the mode and PAX type according to the typeflag in the header
817 */
818 switch (hd->typeflag) {
819 case FIFOTYPE:
820 arcn->type = PAX_FIF;
821 arcn->sb.st_mode |= S_IFIFO;
822 break;
823 case DIRTYPE:
824 arcn->type = PAX_DIR;
825 arcn->sb.st_mode |= S_IFDIR;
826 arcn->sb.st_nlink = 2;
827
828 /*
829 * Some programs that create ustar archives append a '/'
830 * to the pathname for directories. This clearly violates
831 * ustar specs, but we will silently strip it off anyway.
832 */
833 if (arcn->name[arcn->nlen - 1] == '/')
834 arcn->name[--arcn->nlen] = '\0';
835 break;
836 case BLKTYPE:
837 case CHRTYPE:
838 /*
839 * this type requires the rdev field to be set.
840 */
841 if (hd->typeflag == BLKTYPE) {
842 arcn->type = PAX_BLK;
843 arcn->sb.st_mode |= S_IFBLK;
844 } else {
845 arcn->type = PAX_CHR;
846 arcn->sb.st_mode |= S_IFCHR;
847 }
848 devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT);
849 devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT);
850 arcn->sb.st_rdev = TODEV(devmajor, devminor);
851 break;
852 case SYMTYPE:
853 case LNKTYPE:
854 if (hd->typeflag == SYMTYPE) {
855 arcn->type = PAX_SLK;
856 arcn->sb.st_mode |= S_IFLNK;
857 } else {
858 arcn->type = PAX_HLK;
859 /*
860 * so printing looks better
861 */
862 arcn->sb.st_mode |= S_IFREG;
863 arcn->sb.st_nlink = 2;
864 }
865 break;
866 case LONGLINKTYPE:
867 case LONGNAMETYPE:
868 /*
869 * GNU long link/file; we tag these here and let the
870 * pax internals deal with it -- too ugly otherwise.
871 */
872 arcn->type =
873 hd->typeflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF;
874 arcn->pad = TAR_PAD(arcn->sb.st_size);
875 arcn->skip = arcn->sb.st_size;
876 break;
877 case CONTTYPE:
878 case AREGTYPE:
879 case REGTYPE:
880 default:
881 /*
882 * these types have file data that follows. Set the skip and
883 * pad fields.
884 */
885 arcn->type = PAX_REG;
886 arcn->pad = TAR_PAD(arcn->sb.st_size);
887 arcn->skip = arcn->sb.st_size;
888 arcn->sb.st_mode |= S_IFREG;
889 break;
890 }
891 return(0);
892 }
893
894 /*
895 * ustar_wr()
896 * write a ustar header for the file specified in the ARCHD to the archive
897 * Have to check for file types that cannot be stored and file names that
898 * are too long. Be careful of the term (last arg) to ul_oct, we only use
899 * '\0' for the termination character (this is different than picky tar)
900 * ASSUMED: space after header in header block is zero filled
901 * Return:
902 * 0 if file has data to be written after the header, 1 if file has NO
903 * data to write after the header, -1 if archive write failed
904 */
905
906 int
907 ustar_wr(ARCHD *arcn)
908 {
909 HD_USTAR *hd;
910 char *pt;
911 char hdblk[sizeof(HD_USTAR)];
912 mode_t mode12only;
913 int term_char=3; /* orignal setting */
914 term_char=1; /* To pass conformance tests 274, 301 */
915
916 /*
917 * check for those file system types ustar cannot store
918 */
919 if (arcn->type == PAX_SCK) {
920 paxwarn(1, "Ustar cannot archive a socket %s", arcn->org_name);
921 return(1);
922 }
923
924 /*
925 * check the length of the linkname
926 */
927 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
928 (arcn->type == PAX_HRG)) && (arcn->ln_nlen > sizeof(hd->linkname))){
929 paxwarn(1, "Link name too long for ustar %s", arcn->ln_name);
930 /*
931 * Conformance: test pax:285 wants error code to be non-zero, and
932 * test tar:12 wants error code from pax to be 0
933 */
934 return(1);
935 }
936
937 /*
938 * split the path name into prefix and name fields (if needed). if
939 * pt != arcn->name, the name has to be split
940 */
941 if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) {
942 paxwarn(1, "File name too long for ustar %s", arcn->name);
943 return(1);
944 }
945
946 /*
947 * zero out the header so we don't have to worry about zero fill below
948 */
949 memset(hdblk, 0, sizeof(hdblk));
950 hd = (HD_USTAR *)hdblk;
951 arcn->pad = 0L;
952 /* To pass conformance tests 274/301, always set these fields to "zero" */
953 ul_oct(0, hd->devmajor, sizeof(hd->devmajor), term_char);
954 ul_oct(0, hd->devminor, sizeof(hd->devminor), term_char);
955
956 /*
957 * split the name, or zero out the prefix
958 */
959 if (pt != arcn->name) {
960 /*
961 * name was split, pt points at the / where the split is to
962 * occur, we remove the / and copy the first part to the prefix
963 */
964 *pt = '\0';
965 strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix));
966 *pt++ = '/';
967 }
968
969 /*
970 * copy the name part. this may be the whole path or the part after
971 * the prefix
972 */
973 if (strlen(pt) == sizeof(hd->name)) { /* must account for name just fits in buffer */
974 strncpy(hd->name, pt, sizeof(hd->name));
975 } else {
976 strlcpy(hd->name, pt, sizeof(hd->name));
977 }
978
979 /*
980 * set the fields in the header that are type dependent
981 */
982 switch (arcn->type) {
983 case PAX_DIR:
984 hd->typeflag = DIRTYPE;
985 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), term_char))
986 goto out;
987 break;
988 case PAX_CHR:
989 case PAX_BLK:
990 if (arcn->type == PAX_CHR)
991 hd->typeflag = CHRTYPE;
992 else
993 hd->typeflag = BLKTYPE;
994 if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor,
995 sizeof(hd->devmajor), term_char) ||
996 ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor,
997 sizeof(hd->devminor), term_char) ||
998 ul_oct((u_long)0L, hd->size, sizeof(hd->size), term_char))
999 goto out;
1000 break;
1001 case PAX_FIF:
1002 hd->typeflag = FIFOTYPE;
1003 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), term_char))
1004 goto out;
1005 break;
1006 case PAX_SLK:
1007 case PAX_HLK:
1008 case PAX_HRG:
1009 if (arcn->type == PAX_SLK)
1010 hd->typeflag = SYMTYPE;
1011 else
1012 hd->typeflag = LNKTYPE;
1013 if (strlen(arcn->ln_name) == sizeof(hd->linkname)) { /* must account for name just fits in buffer */
1014 strncpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
1015 } else {
1016 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
1017 }
1018 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), term_char))
1019 goto out;
1020 break;
1021 case PAX_REG:
1022 case PAX_CTG:
1023 default:
1024 /*
1025 * file data with this type, set the padding
1026 */
1027 if (arcn->type == PAX_CTG)
1028 hd->typeflag = CONTTYPE;
1029 else
1030 hd->typeflag = REGTYPE;
1031 arcn->pad = TAR_PAD(arcn->sb.st_size);
1032 # ifdef LONG_OFF_T
1033 if (ul_oct((u_long)arcn->sb.st_size, hd->size,
1034 sizeof(hd->size), term_char)) {
1035 # else
1036 if (uqd_oct((u_quad_t)arcn->sb.st_size, hd->size,
1037 sizeof(hd->size), term_char)) {
1038 # endif
1039 paxwarn(1,"File is too long for ustar %s",arcn->org_name);
1040 return(1);
1041 }
1042 break;
1043 }
1044
1045 strncpy(hd->magic, TMAGIC, TMAGLEN);
1046 strncpy(hd->version, TVERSION, TVERSLEN);
1047
1048 /*
1049 * set the remaining fields. Some versions want all 16 bits of mode
1050 * we better humor them (they really do not meet spec though)....
1051 */
1052 if (ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), term_char)) {
1053 if (uid_nobody == 0) {
1054 if (uid_name("nobody", &uid_nobody) == -1)
1055 goto out;
1056 }
1057 if (uid_warn != arcn->sb.st_uid) {
1058 uid_warn = arcn->sb.st_uid;
1059 paxwarn(1,
1060 "Ustar header field is too small for uid %lu, "
1061 "using nobody", (u_long)arcn->sb.st_uid);
1062 }
1063 if (ul_oct((u_long)uid_nobody, hd->uid, sizeof(hd->uid), term_char))
1064 goto out;
1065 }
1066 if (ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), term_char)) {
1067 if (gid_nobody == 0) {
1068 if (gid_name("nobody", &gid_nobody) == -1)
1069 goto out;
1070 }
1071 if (gid_warn != arcn->sb.st_gid) {
1072 gid_warn = arcn->sb.st_gid;
1073 paxwarn(1,
1074 "Ustar header field is too small for gid %lu, "
1075 "using nobody", (u_long)arcn->sb.st_gid);
1076 }
1077 if (ul_oct((u_long)gid_nobody, hd->gid, sizeof(hd->gid), term_char))
1078 goto out;
1079 }
1080 /* However, Unix conformance tests do not like MORE than 12 mode bits:
1081 remove all beyond (see definition of stat.st_mode structure) */
1082 mode12only = ((u_long)arcn->sb.st_mode) & 0x00000fff;
1083 if (ul_oct((u_long)mode12only, hd->mode, sizeof(hd->mode), term_char) ||
1084 ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),term_char))
1085 goto out;
1086 strncpy(hd->uname, name_uid(arcn->sb.st_uid, 0), sizeof(hd->uname));
1087 strncpy(hd->gname, name_gid(arcn->sb.st_gid, 0), sizeof(hd->gname));
1088
1089 /*
1090 * calculate and store the checksum write the header to the archive
1091 * return 0 tells the caller to now write the file data, 1 says no data
1092 * needs to be written
1093 */
1094 if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
1095 sizeof(hd->chksum), term_char))
1096 goto out;
1097 if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0)
1098 return(-1);
1099 if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0)
1100 return(-1);
1101 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
1102 return(0);
1103 return(1);
1104
1105 out:
1106 /*
1107 * header field is out of range
1108 */
1109 paxwarn(1, "Ustar header field is too small for %s", arcn->org_name);
1110 return(1);
1111 }
1112
1113 /*
1114 * name_split()
1115 * see if the name has to be split for storage in a ustar header. We try
1116 * to fit the entire name in the name field without splitting if we can.
1117 * The split point is always at a /
1118 * Return
1119 * character pointer to split point (always the / that is to be removed
1120 * if the split is not needed, the points is set to the start of the file
1121 * name (it would violate the spec to split there). A NULL is returned if
1122 * the file name is too long
1123 */
1124
1125 char *
1126 name_split(char *name, int len)
1127 {
1128 char *start;
1129
1130 /*
1131 * check to see if the file name is small enough to fit in the name
1132 * field. if so just return a pointer to the name.
1133 */
1134 if (len <= TNMSZ)
1135 return(name);
1136 if (len > (TPFSZ + TNMSZ))
1137 return(NULL);
1138
1139 /*
1140 * we start looking at the biggest sized piece that fits in the name
1141 * field. We walk forward looking for a slash to split at. The idea is
1142 * to find the biggest piece to fit in the name field (or the smallest
1143 * prefix we can find)
1144 */
1145 start = name + len - TNMSZ -1;
1146 if ((*start == '/') && (start == name))
1147 ++start; /* 101 byte paths with leading '/' are dinged otherwise */
1148 while ((*start != '\0') && (*start != '/'))
1149 ++start;
1150
1151 /*
1152 * if we hit the end of the string, this name cannot be split, so we
1153 * cannot store this file.
1154 */
1155 if (*start == '\0')
1156 return(NULL);
1157 len = start - name;
1158
1159 /*
1160 * NOTE: /str where the length of str == TNMSZ can not be stored under
1161 * the p1003.1-1990 spec for ustar. We could force a prefix of / and
1162 * the file would then expand on extract to //str. The len == 0 below
1163 * makes this special case follow the spec to the letter.
1164 */
1165 if ((len >= TPFSZ) || (len == 0))
1166 return(NULL);
1167
1168 /*
1169 * ok have a split point, return it to the caller
1170 */
1171 return(start);
1172 }
1173
1174 static size_t
1175 expandname(char *buf, size_t len, char **gnu_name, const char *name, size_t name_len)
1176 {
1177 size_t nlen;
1178
1179 if (*gnu_name) {
1180 if ((nlen = strlcpy(buf, *gnu_name, len)) >= len)
1181 nlen = len - 1;
1182 free(*gnu_name);
1183 *gnu_name = NULL;
1184 } else {
1185 if (name_len < len) {
1186 /* name may not be null terminated: it might be as big as the
1187 field, so copy is limited to the max size of the header field */
1188 if ((nlen = strlcpy(buf, name, name_len+1)) >= name_len+1)
1189 nlen = name_len;
1190 } else {
1191 if ((nlen = strlcpy(buf, name, len)) >= len)
1192 nlen = len - 1;
1193 }
1194 }
1195 return(nlen);
1196 }