/*
- * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 1999, 2000, 2003, 2005, 2008, 2012 Apple Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
- * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
- *
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#ifdef __BLOCKS__
+#include <Block.h>
+#endif /* __BLOCKS__ */
static FTSENT *fts_alloc(FTS *, char *, int);
static FTSENT *fts_build(FTS *, int);
#define BNAMES 2 /* fts_children, names only */
#define BREAD 3 /* fts_read */
-FTS *
-fts_open(argv, options, compar)
+/* 5653270
+ * For directories containing > 64k subdirectories (or HFS+ with > 64k files
+ * and subdirectories), struct stat's st_nlink (16 bits) will overflow. This
+ * causes the case with FTS_NOSTAT and FTS_PHYSICAL set to prematurely stop
+ * recursing into subdirectories, because of an optimization that expects
+ * st_nlink to be the number of subdirectories (once that number has been
+ * encountered, no further calls to stat should be needed).
+ *
+ * However, on Mac OS X, another optimization largely nullifies the st_nlink
+ * optimization. struct dirent contains d_type, which can distinguish
+ * directories from files without initially calling stat. So stat is only
+ * called on known directories, rather than on other files. With this
+ * optimization, the difference in also using the st_nlink optimization is
+ * pretty minimal (tests show an improvement of a percent or two, probably
+ * due to additional if statement clauses that need to be evaluated).
+ *
+ * So removing the st_nlink optimization code will fix the > 64k subdirectories
+ * problem. And if we replace the multiple if clause logic with a single
+ * switch statement, we can recover the minimal performance lose. We can
+ * go even further and for the case of FTS_NOSTAT and FTS_LOGICAL set, we
+ * can use d_type to also distinguish symbolic links, and so we only need to
+ * call stat on directories and symlinks, not on all files. This provides
+ * a significant performance boost in that special case.
+ */
+/*
+ * The following macros defines values of the dostat variable, which is or-ed
+ * with the value of d_type, and the result used in a switch statement to
+ * determine whether to call stat or not. (We order the macros to minimize
+ * the size of any jump table that the compiler may generate.)
+ */
+#define F_SHIFT 4 /* shift to leave space for d_type */
+#define F_NOSTAT (0 << F_SHIFT) /* don't do any stat's */
+#define F_STATDIRSYM (1 << F_SHIFT) /* only stat directories and symlinks (and unknowns) */
+#define F_ALWAYSSTAT (2 << F_SHIFT) /* always stat */
+#define F_STATDIR (3 << F_SHIFT) /* only stat directories (and unknowns) */
+#define F_D_TYPE (4 << F_SHIFT) /* only stat directories but use d_type */
+#define F_D_TYPESYM (5 << F_SHIFT) /* only stat directories and symlinks but use d_type */
+
+static FTS *
+__fts_open(argv, sp)
char * const *argv;
- register int options;
- int (*compar)();
-{
register FTS *sp;
+{
register FTSENT *p, *root;
register int nitems;
- FTSENT *parent, *tmp;
+ FTSENT *parent, *tmp = NULL;
int len;
- /* Options check. */
- if (options & ~FTS_OPTIONMASK) {
- errno = EINVAL;
- return (NULL);
- }
-
- /* Allocate/initialize the stream */
- if ((sp = malloc((u_int)sizeof(FTS))) == NULL)
- return (NULL);
- memset(sp, 0, sizeof(FTS));
- sp->fts_compar = compar;
- sp->fts_options = options;
-
/* Logical walks turn on NOCHDIR; symbolic links are too hard. */
if (ISSET(FTS_LOGICAL))
SET(FTS_NOCHDIR);
p->fts_level = FTS_ROOTLEVEL;
p->fts_parent = parent;
p->fts_accpath = p->fts_name;
- p->fts_info = fts_stat(sp, p, ISSET(FTS_COMFOLLOW));
+ p->fts_info = fts_stat(sp, p, ISSET(FTS_COMFOLLOWDIR) ? -1 : ISSET(FTS_COMFOLLOW));
/* Command-line "." and ".." are real directories. */
if (p->fts_info == FTS_DOT)
* If comparison routine supplied, traverse in sorted
* order; otherwise traverse in the order specified.
*/
- if (compar) {
+ if (sp->fts_compar) {
p->fts_link = root;
root = p;
} else {
}
}
}
- if (compar && nitems > 1)
+ if (sp->fts_compar && nitems > 1)
root = fts_sort(sp, root, nitems);
/*
return (NULL);
}
+FTS *
+fts_open(argv, options, compar)
+ char * const *argv;
+ int options;
+ int (*compar)();
+{
+ register FTS *sp;
+
+ /* Options check. */
+ if (options & ~FTS_OPTIONMASK) {
+ errno = EINVAL;
+ return (NULL);
+ }
+ if (options & FTS_NOSTAT_TYPE) options |= FTS_NOSTAT;
+
+ /* Allocate/initialize the stream */
+ if ((sp = malloc((u_int)sizeof(FTS))) == NULL)
+ return (NULL);
+ memset(sp, 0, sizeof(FTS));
+ sp->fts_compar = compar;
+ sp->fts_options = options;
+
+ return __fts_open(argv, sp);
+}
+
+#ifdef __BLOCKS__
+FTS *
+fts_open_b(argv, options, compar)
+ char * const *argv;
+ int options;
+ int (^compar)(const FTSENT **, const FTSENT **);
+{
+ register FTS *sp;
+
+ /* Options check. */
+ if (options & ~FTS_OPTIONMASK) {
+ errno = EINVAL;
+ return (NULL);
+ }
+ if (options & FTS_NOSTAT_TYPE) options |= FTS_NOSTAT;
+
+ /* Allocate/initialize the stream */
+ if ((sp = malloc((u_int)sizeof(FTS))) == NULL)
+ return (NULL);
+ memset(sp, 0, sizeof(FTS));
+ sp->fts_compar_b = (int (^)())Block_copy(compar);
+ sp->fts_options = options | FTS_BLOCK_COMPAR;
+
+ return __fts_open(argv, sp);
+}
+#endif /* __BLOCKS__ */
+
static void
fts_load(sp, p)
FTS *sp;
(void)close(sp->fts_rfd);
}
+#ifdef __BLOCKS__
+ /* Free up any block pointer. */
+ if (ISSET(FTS_BLOCK_COMPAR) && sp->fts_compar_b != NULL)
+ Block_release(sp->fts_compar_b);
+#endif /* __BLOCKS__ */
+
/* Free up the stream pointer. */
free(sp);
if (instr == FTS_FOLLOW &&
(p->fts_info == FTS_SL || p->fts_info == FTS_SLNONE)) {
p->fts_info = fts_stat(sp, p, 1);
- if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR))
+ if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) {
if ((p->fts_symfd = open(".", O_RDONLY, 0)) < 0) {
p->fts_errno = errno;
p->fts_info = FTS_ERR;
- } else
+ } else {
p->fts_flags |= FTS_SYMFOLLOW;
+ }
+ }
return (p);
}
if (p->fts_info == FTS_D) {
/* If skipped or crossed mount point, do post-order visit. */
if (instr == FTS_SKIP ||
- ISSET(FTS_XDEV) && p->fts_dev != sp->fts_dev) {
+ (ISSET(FTS_XDEV) && p->fts_dev != sp->fts_dev)) {
if (p->fts_flags & FTS_SYMFOLLOW)
(void)close(p->fts_symfd);
if (sp->fts_child) {
}
p->fts_info = FTS_DP;
return (p);
- }
+ }
/* Rebuild if only read the names and now traversing. */
if (sp->fts_child && sp->fts_options & FTS_NAMEONLY) {
/* Move to the next node on this level. */
next: tmp = p;
- if (p = p->fts_link) {
- free(tmp);
-
+ if ((p = p->fts_link)) {
/*
* If reached the top, return to the original directory, and
* load the paths for the next root.
return (NULL);
}
fts_load(sp, p);
+ free(tmp);
return (sp->fts_cur = p);
}
* ignore. If followed, get a file descriptor so we can
* get back if necessary.
*/
- if (p->fts_instr == FTS_SKIP)
+ if (p->fts_instr == FTS_SKIP) {
+ free(tmp);
goto next;
+ }
if (p->fts_instr == FTS_FOLLOW) {
p->fts_info = fts_stat(sp, p, 1);
- if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR))
+ if (p->fts_info == FTS_D && !ISSET(FTS_NOCHDIR)) {
if ((p->fts_symfd =
open(".", O_RDONLY, 0)) < 0) {
p->fts_errno = errno;
p->fts_info = FTS_ERR;
- } else
+ } else {
p->fts_flags |= FTS_SYMFOLLOW;
+ }
+ }
p->fts_instr = FTS_NOINSTR;
}
+ free(tmp);
name: t = sp->fts_path + NAPPEND(p->fts_parent);
*t++ = '/';
memmove(t, p->fts_name, p->fts_namelen + 1);
/* Move up to the parent node. */
p = tmp->fts_parent;
- free(tmp);
if (p->fts_level == FTS_ROOTPARENTLEVEL) {
/*
* Done; free everything up and set errno to 0 so the user
* can distinguish between error and EOF.
*/
+ free(tmp);
free(p);
errno = 0;
return (sp->fts_cur = NULL);
return (NULL);
}
}
+ free(tmp);
p->fts_info = p->fts_errno ? FTS_ERR : FTS_DP;
return (sp->fts_cur = p);
}
* and fts_read. There are lots of special cases.
*
* The real slowdown in walking the tree is the stat calls. If FTS_NOSTAT is
- * set and it's a physical walk (so that symbolic links can't be directories),
- * we can do things quickly. First, if it's a 4.4BSD file system, the type
- * of the file is in the directory entry. Otherwise, we assume that the number
- * of subdirectories in a node is equal to the number of links to the parent.
- * The former skips all stat calls. The latter skips stat calls in any leaf
- * directories and for any files after the subdirectories in the directory have
- * been found, cutting the stat calls by about 2/3.
+ * set, we can use d_type to determine if the entry is a directory (or for
+ * logical walks, a directory or symlink) and not call stat for other file
+ * types. This cuts the number of stat calls significantly.
*/
static FTSENT *
fts_build(sp, type)
FTSENT *cur, *tail;
DIR *dirp;
void *adjaddr;
- int cderrno, descend, len, level, maxlen, nlinks, oflag, saved_errno;
- char *cp;
+ int cderrno, descend, len, level, maxlen, dostat, oflag, saved_errno;
+ char *cp = NULL;
/* Set current node pointer. */
cur = sp->fts_cur;
return (NULL);
}
- /*
- * Nlinks is the number of possible entries of type directory in the
- * directory if we're cheating on stat calls, 0 if we're not doing
- * any stat calls at all, -1 if we're doing stats on everything.
- */
if (type == BNAMES)
- nlinks = 0;
- else if (ISSET(FTS_NOSTAT) && ISSET(FTS_PHYSICAL))
- nlinks = cur->fts_nlink - (ISSET(FTS_SEEDOT) ? 0 : 2);
+ dostat = F_NOSTAT;
+ else if (ISSET(FTS_NOSTAT_TYPE))
+ dostat = ISSET(FTS_PHYSICAL) ? F_D_TYPE : F_D_TYPESYM;
+ else if (ISSET(FTS_NOSTAT))
+ dostat = ISSET(FTS_PHYSICAL) ? F_STATDIR : F_STATDIRSYM;
else
- nlinks = -1;
+ dostat = F_ALWAYSSTAT;
#ifdef notdef
- (void)printf("nlinks == %d (cur: %d)\n", nlinks, cur->fts_nlink);
+ (void)printf("dostat == %d\n", dostat);
(void)printf("NOSTAT %d PHYSICAL %d SEEDOT %d\n",
ISSET(FTS_NOSTAT), ISSET(FTS_PHYSICAL), ISSET(FTS_SEEDOT));
#endif
* checking FTS_NS on the returned nodes.
*/
cderrno = 0;
- if (nlinks || type == BREAD)
+ if (dostat || type == BREAD)
if (FCHDIR(sp, dirfd(dirp))) {
- if (nlinks && type == BREAD)
+ if (dostat && type == BREAD)
cur->fts_errno = errno;
cur->fts_flags |= FTS_DONTCHDIR;
descend = 0;
* If not changing directories set a pointer so that can just append
* each new name into the path.
*/
- maxlen = sp->fts_pathlen - cur->fts_pathlen - 1;
len = NAPPEND(cur);
if (ISSET(FTS_NOCHDIR)) {
cp = sp->fts_path + len;
*cp++ = '/';
}
+ len++;
+ maxlen = sp->fts_pathlen - len;
level = cur->fts_level + 1;
/* Read the directory, attaching each entry to the `link' pointer. */
adjaddr = NULL;
- for (head = tail = NULL, nitems = 0; dp = readdir(dirp);) {
+ for (head = tail = NULL, nitems = 0; (dp = readdir(dirp)) ; ) {
if (!ISSET(FTS_SEEDOT) && ISDOT(dp->d_name))
continue;
if ((p = fts_alloc(sp, dp->d_name, (int)dp->d_namlen)) == NULL)
goto mem1;
- if (dp->d_namlen > maxlen) {
+ if (dp->d_namlen >= maxlen) { /* include space for NUL */
if (fts_palloc(sp, (size_t)dp->d_namlen)) {
/*
* No more memory for path or structures. Save
free(p);
fts_lfree(head);
(void)closedir(dirp);
- errno = saved_errno;
cur->fts_info = FTS_ERR;
SET(FTS_STOP);
+ errno = saved_errno;
return (NULL);
}
adjaddr = sp->fts_path;
maxlen = sp->fts_pathlen - sp->fts_cur->fts_pathlen - 1;
}
- p->fts_pathlen = len + dp->d_namlen + 1;
- p->fts_parent = sp->fts_cur;
p->fts_level = level;
+ p->fts_parent = sp->fts_cur;
+ p->fts_pathlen = len + dp->d_namlen;
#ifdef FTS_WHITEOUT
if (dp->d_type == DT_WHT)
#endif
if (cderrno) {
- if (nlinks) {
+ if (dostat) {
p->fts_info = FTS_NS;
p->fts_errno = cderrno;
} else
p->fts_info = FTS_NSOK;
p->fts_accpath = cur->fts_accpath;
- } else if (nlinks == 0
-#ifdef DT_DIR
- || nlinks > 0 &&
- dp->d_type != DT_DIR && dp->d_type != DT_UNKNOWN
-#endif
- ) {
- p->fts_accpath =
- ISSET(FTS_NOCHDIR) ? p->fts_path : p->fts_name;
- p->fts_info = FTS_NSOK;
} else {
- /* Build a file name for fts_stat to stat. */
- if (ISSET(FTS_NOCHDIR)) {
- p->fts_accpath = p->fts_path;
- memmove(cp, p->fts_name, p->fts_namelen + 1);
- } else
- p->fts_accpath = p->fts_name;
- /* Stat it. */
- p->fts_info = fts_stat(sp, p, 0);
-
- /* Decrement link count if applicable. */
- if (nlinks > 0 && (p->fts_info == FTS_D ||
- p->fts_info == FTS_DC || p->fts_info == FTS_DOT))
- --nlinks;
+ /*
+ * We need to know all file types values that d_type may
+ * be set to. So if that changes, the following needs
+ * to be modified appropriately.
+ */
+ switch(dostat | dp->d_type) {
+ case (F_STATDIR | DT_UNKNOWN):
+ case (F_STATDIR | DT_DIR):
+ case (F_STATDIRSYM | DT_UNKNOWN):
+ case (F_STATDIRSYM | DT_DIR):
+ case (F_STATDIRSYM | DT_LNK):
+ case (F_ALWAYSSTAT | DT_UNKNOWN):
+ case (F_ALWAYSSTAT | DT_FIFO):
+ case (F_ALWAYSSTAT | DT_CHR):
+ case (F_ALWAYSSTAT | DT_DIR):
+ case (F_ALWAYSSTAT | DT_BLK):
+ case (F_ALWAYSSTAT | DT_REG):
+ case (F_ALWAYSSTAT | DT_LNK):
+ case (F_ALWAYSSTAT | DT_SOCK):
+ case (F_ALWAYSSTAT | DT_WHT):
+ case (F_D_TYPE | DT_UNKNOWN):
+ case (F_D_TYPE | DT_DIR):
+ case (F_D_TYPESYM | DT_UNKNOWN):
+ case (F_D_TYPESYM | DT_DIR):
+ case (F_D_TYPESYM | DT_LNK):
+ /* Build a file name for fts_stat to stat. */
+ if (ISSET(FTS_NOCHDIR)) {
+ p->fts_accpath = p->fts_path;
+ memmove(cp, p->fts_name, p->fts_namelen + 1);
+ } else
+ p->fts_accpath = p->fts_name;
+ /* Stat it. */
+ p->fts_info = fts_stat(sp, p, 0);
+ break;
+ case (F_D_TYPE | DT_FIFO):
+ case (F_D_TYPE | DT_CHR):
+ case (F_D_TYPE | DT_BLK):
+ case (F_D_TYPE | DT_SOCK):
+ case (F_D_TYPESYM | DT_FIFO):
+ case (F_D_TYPESYM | DT_CHR):
+ case (F_D_TYPESYM | DT_BLK):
+ case (F_D_TYPESYM | DT_SOCK):
+ p->fts_info = FTS_DEFAULT;
+ goto common_no_stat;
+ case (F_D_TYPE | DT_REG):
+ case (F_D_TYPESYM | DT_REG):
+ p->fts_info = FTS_F;
+ goto common_no_stat;
+ case (F_D_TYPE | DT_LNK):
+ p->fts_info = FTS_SL;
+ goto common_no_stat;
+ case (F_D_TYPE | DT_WHT):
+ case (F_D_TYPESYM | DT_WHT):
+ p->fts_info = FTS_W;
+ goto common_no_stat;
+ default:
+ /* No stat necessary */
+ p->fts_info = FTS_NSOK;
+common_no_stat:
+ p->fts_accpath =
+ ISSET(FTS_NOCHDIR) ? p->fts_path : p->fts_name;
+ break;
+ }
}
/* We walk in directory order so "ls -f" doesn't get upset. */
if (stat(p->fts_accpath, sbp)) {
saved_errno = errno;
if (!lstat(p->fts_accpath, sbp)) {
+ if (saved_errno == ELOOP)
+ p->fts_errno = ELOOP;
errno = 0;
return (FTS_SLNONE);
}
p->fts_errno = saved_errno;
goto err;
}
+ /*
+ * For FTS_COMFOLLOWDIR, drop back to lstat unless we have
+ * a directory
+ */
+ if (follow == -1 && !S_ISDIR(sbp->st_mode)) {
+ if (lstat(p->fts_accpath, sbp)) {
+ p->fts_errno = errno;
+ goto err;
+ }
+ }
} else if (lstat(p->fts_accpath, sbp)) {
p->fts_errno = errno;
err: memset(sbp, 0, sizeof(struct stat));
}
for (ap = sp->fts_array, p = head; p; p = p->fts_link)
*ap++ = p;
- qsort((void *)sp->fts_array, nitems, sizeof(FTSENT *), sp->fts_compar);
+#ifdef __BLOCKS__
+ if (ISSET(FTS_BLOCK_COMPAR))
+ qsort_b((void *)sp->fts_array, nitems, sizeof(FTSENT *), (int (^)(const void *, const void *))sp->fts_compar_b);
+ else
+#endif /* __BLOCKS__ */
+ qsort((void *)sp->fts_array, nitems, sizeof(FTSENT *), sp->fts_compar);
for (head = *(ap = sp->fts_array); --nitems; ++ap)
ap[0]->fts_link = ap[1];
ap[0]->fts_link = NULL;
register FTSENT *p;
/* Free a linked list of structures. */
- while (p = head) {
+ while ((p = head)) {
head = head->fts_link;
free(p);
}
* Allow essentially unlimited paths; find, rm, ls should all work on any tree.
* Most systems will allow creation of paths much longer than MAXPATHLEN, even
* though the kernel won't resolve them. Add the size (not just what's needed)
- * plus 256 bytes so don't realloc the path 2 bytes at a time.
+ * plus 256 bytes so don't realloc the path 2 bytes at a time.
*/
static int
fts_palloc(sp, more)
* already returned.
*/
static void
-fts_padjust(sp, addr)
- FTS *sp;
- void *addr;
+fts_padjust(FTS *sp, void *addr)
{
FTSENT *p;
-#define ADJUST(p) { \
- (p)->fts_accpath = \
- (char *)addr + ((p)->fts_accpath - (p)->fts_path); \
+#define ADJUST(p) do { \
+ if ((p)->fts_accpath != (p)->fts_name) { \
+ (p)->fts_accpath = \
+ (char *)addr + ((p)->fts_accpath - (p)->fts_path); \
+ } \
(p)->fts_path = addr; \
-}
+} while (0)
/* Adjust the current set of children. */
for (p = sp->fts_child; p; p = p->fts_link)
ADJUST(p);
for (max = 0; *argv; ++argv)
if ((len = strlen(*argv)) > max)
max = len;
- return (max);
+ return (max + 1);
}