summaryrefslogtreecommitdiff
path: root/dir.c
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2020-04-29 23:15:30 (GMT)
committerJunio C Hamano <gitster@pobox.com>2020-04-29 23:15:31 (GMT)
commit6eacc39b6d2508b6a7522902330c29714c99f5f2 (patch)
treeeb45e53464fed0f3251b6e81d4ea39fc87df3e65 /dir.c
parent48eee46d6accbb6ef491f811fd9a9c72843d469b (diff)
parentc0af173a136785b3cfad4bd414b2fb10a130760a (diff)
downloadgit-6eacc39b6d2508b6a7522902330c29714c99f5f2.zip
git-6eacc39b6d2508b6a7522902330c29714c99f5f2.tar.gz
git-6eacc39b6d2508b6a7522902330c29714c99f5f2.tar.bz2
Merge branch 'en/fill-directory-exponential'
The directory traversal code had redundant recursive calls which made its performance characteristics exponential with respect to the depth of the tree, which was corrected. * en/fill-directory-exponential: completion: fix 'git add' on paths under an untracked directory Fix error-prone fill_directory() API; make it only return matches dir: replace double pathspec matching with single in treat_directory() dir: include DIR_KEEP_UNTRACKED_CONTENTS handling in treat_directory() dir: replace exponential algorithm with a linear one dir: refactor treat_directory to clarify control flow dir: fix confusion based on variable tense dir: fix broken comment dir: consolidate treat_path() and treat_one_path() dir: fix simple typo in comment t3000: add more testcases testing a variety of ls-files issues t7063: more thorough status checking
Diffstat (limited to 'dir.c')
-rw-r--r--dir.c422
1 files changed, 248 insertions, 174 deletions
diff --git a/dir.c b/dir.c
index 0ffb1b3..d97e955 100644
--- a/dir.c
+++ b/dir.c
@@ -1727,36 +1727,59 @@ static enum exist_status directory_exists_in_index(struct index_state *istate,
static enum path_treatment treat_directory(struct dir_struct *dir,
struct index_state *istate,
struct untracked_cache_dir *untracked,
- const char *dirname, int len, int baselen, int exclude,
+ const char *dirname, int len, int baselen, int excluded,
const struct pathspec *pathspec)
{
- int nested_repo = 0;
-
+ /*
+ * WARNING: From this function, you can return path_recurse or you
+ * can call read_directory_recursive() (or neither), but
+ * you CAN'T DO BOTH.
+ */
+ enum path_treatment state;
+ int matches_how = 0;
+ int nested_repo = 0, check_only, stop_early;
+ int old_ignored_nr, old_untracked_nr;
/* The "len-1" is to strip the final '/' */
- switch (directory_exists_in_index(istate, dirname, len-1)) {
- case index_directory:
- return path_recurse;
+ enum exist_status status = directory_exists_in_index(istate, dirname, len-1);
- case index_gitdir:
+ if (status == index_directory)
+ return path_recurse;
+ if (status == index_gitdir)
return path_none;
+ if (status != index_nonexistent)
+ BUG("Unhandled value for directory_exists_in_index: %d\n", status);
- case index_nonexistent:
- if ((dir->flags & DIR_SKIP_NESTED_GIT) ||
- !(dir->flags & DIR_NO_GITLINKS)) {
- struct strbuf sb = STRBUF_INIT;
- strbuf_addstr(&sb, dirname);
- nested_repo = is_nonbare_repository_dir(&sb);
- strbuf_release(&sb);
- }
- if (nested_repo)
- return ((dir->flags & DIR_SKIP_NESTED_GIT) ? path_none :
- (exclude ? path_excluded : path_untracked));
+ /*
+ * We don't want to descend into paths that don't match the necessary
+ * patterns. Clearly, if we don't have a pathspec, then we can't check
+ * for matching patterns. Also, if (excluded) then we know we matched
+ * the exclusion patterns so as an optimization we can skip checking
+ * for matching patterns.
+ */
+ if (pathspec && !excluded) {
+ matches_how = do_match_pathspec(istate, pathspec, dirname, len,
+ 0 /* prefix */, NULL /* seen */,
+ DO_MATCH_LEADING_PATHSPEC);
+ if (!matches_how)
+ return path_none;
+ }
- if (dir->flags & DIR_SHOW_OTHER_DIRECTORIES)
- break;
- if (exclude &&
- (dir->flags & DIR_SHOW_IGNORED_TOO) &&
- (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING)) {
+
+ if ((dir->flags & DIR_SKIP_NESTED_GIT) ||
+ !(dir->flags & DIR_NO_GITLINKS)) {
+ struct strbuf sb = STRBUF_INIT;
+ strbuf_addstr(&sb, dirname);
+ nested_repo = is_nonbare_repository_dir(&sb);
+ strbuf_release(&sb);
+ }
+ if (nested_repo)
+ return ((dir->flags & DIR_SKIP_NESTED_GIT) ? path_none :
+ (excluded ? path_excluded : path_untracked));
+
+ if (!(dir->flags & DIR_SHOW_OTHER_DIRECTORIES)) {
+ if (excluded &&
+ (dir->flags & DIR_SHOW_IGNORED_TOO) &&
+ (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING)) {
/*
* This is an excluded directory and we are
@@ -1783,18 +1806,134 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
/* This is the "show_other_directories" case */
- if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
- return exclude ? path_excluded : path_untracked;
+ /*
+ * If we have a pathspec which could match something _below_ this
+ * directory (e.g. when checking 'subdir/' having a pathspec like
+ * 'subdir/some/deep/path/file' or 'subdir/widget-*.c'), then we
+ * need to recurse.
+ */
+ if (matches_how == MATCHED_RECURSIVELY_LEADING_PATHSPEC)
+ return path_recurse;
+
+ /*
+ * Other than the path_recurse case immediately above, we only need
+ * to recurse into untracked/ignored directories if either of the
+ * following bits is set:
+ * - DIR_SHOW_IGNORED_TOO (because then we need to determine if
+ * there are ignored directories below)
+ * - DIR_HIDE_EMPTY_DIRECTORIES (because we have to determine if
+ * the directory is empty)
+ */
+ if (!(dir->flags & (DIR_SHOW_IGNORED_TOO | DIR_HIDE_EMPTY_DIRECTORIES)))
+ return excluded ? path_excluded : path_untracked;
+ /*
+ * ...and even if DIR_SHOW_IGNORED_TOO is set, we can still avoid
+ * recursing into ignored directories if the path is excluded and
+ * DIR_SHOW_IGNORED_TOO_MODE_MATCHING is also set.
+ */
+ if (excluded &&
+ (dir->flags & DIR_SHOW_IGNORED_TOO) &&
+ (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING))
+ return path_excluded;
+
+ /*
+ * If we have we don't want to know the all the paths under an
+ * untracked or ignored directory, we still need to go into the
+ * directory to determine if it is empty (because an empty directory
+ * should be path_none instead of path_excluded or path_untracked).
+ */
+ check_only = ((dir->flags & DIR_HIDE_EMPTY_DIRECTORIES) &&
+ !(dir->flags & DIR_SHOW_IGNORED_TOO));
+
+ /*
+ * However, there's another optimization possible as a subset of
+ * check_only, based on the cases we have to consider:
+ * A) Directory matches no exclude patterns:
+ * * Directory is empty => path_none
+ * * Directory has an untracked file under it => path_untracked
+ * * Directory has only ignored files under it => path_excluded
+ * B) Directory matches an exclude pattern:
+ * * Directory is empty => path_none
+ * * Directory has an untracked file under it => path_excluded
+ * * Directory has only ignored files under it => path_excluded
+ * In case A, we can exit as soon as we've found an untracked
+ * file but otherwise have to walk all files. In case B, though,
+ * we can stop at the first file we find under the directory.
+ */
+ stop_early = check_only && excluded;
+
+ /*
+ * If /every/ file within an untracked directory is ignored, then
+ * we want to treat the directory as ignored (for e.g. status
+ * --porcelain), without listing the individual ignored files
+ * underneath. To do so, we'll save the current ignored_nr, and
+ * pop all the ones added after it if it turns out the entire
+ * directory is ignored. Also, when DIR_SHOW_IGNORED_TOO and
+ * !DIR_KEEP_UNTRACKED_CONTENTS then we don't want to show
+ * untracked paths so will need to pop all those off the last
+ * after we traverse.
+ */
+ old_ignored_nr = dir->ignored_nr;
+ old_untracked_nr = dir->nr;
+
+ /* Actually recurse into dirname now, we'll fixup the state later. */
untracked = lookup_untracked(dir->untracked, untracked,
dirname + baselen, len - baselen);
+ state = read_directory_recursive(dir, istate, dirname, len, untracked,
+ check_only, stop_early, pathspec);
+
+ /* There are a variety of reasons we may need to fixup the state... */
+ if (state == path_excluded) {
+ /* state == path_excluded implies all paths under
+ * dirname were ignored...
+ *
+ * if running e.g. `git status --porcelain --ignored=matching`,
+ * then we want to see the subpaths that are ignored.
+ *
+ * if running e.g. just `git status --porcelain`, then
+ * we just want the directory itself to be listed as ignored
+ * and not the individual paths underneath.
+ */
+ int want_ignored_subpaths =
+ ((dir->flags & DIR_SHOW_IGNORED_TOO) &&
+ (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING));
+
+ if (want_ignored_subpaths) {
+ /*
+ * with --ignored=matching, we want the subpaths
+ * INSTEAD of the directory itself.
+ */
+ state = path_none;
+ } else {
+ int i;
+ for (i = old_ignored_nr + 1; i<dir->ignored_nr; ++i)
+ FREE_AND_NULL(dir->ignored[i]);
+ dir->ignored_nr = old_ignored_nr;
+ }
+ }
/*
- * If this is an excluded directory, then we only need to check if
- * the directory contains any files.
+ * We may need to ignore some of the untracked paths we found while
+ * traversing subdirectories.
*/
- return read_directory_recursive(dir, istate, dirname, len,
- untracked, 1, exclude, pathspec);
+ if ((dir->flags & DIR_SHOW_IGNORED_TOO) &&
+ !(dir->flags & DIR_KEEP_UNTRACKED_CONTENTS)) {
+ int i;
+ for (i = old_untracked_nr + 1; i<dir->nr; ++i)
+ FREE_AND_NULL(dir->entries[i]);
+ dir->nr = old_untracked_nr;
+ }
+
+ /*
+ * If there is nothing under the current directory and we are not
+ * hiding empty directories, then we need to report on the
+ * untracked or ignored status of the directory itself.
+ */
+ if (state == path_none && !(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
+ state = excluded ? path_excluded : path_untracked;
+
+ return state;
}
/*
@@ -1934,85 +2073,6 @@ static int resolve_dtype(int dtype, struct index_state *istate,
return dtype;
}
-static enum path_treatment treat_one_path(struct dir_struct *dir,
- struct untracked_cache_dir *untracked,
- struct index_state *istate,
- struct strbuf *path,
- int baselen,
- const struct pathspec *pathspec,
- int dtype)
-{
- int exclude;
- int has_path_in_index = !!index_file_exists(istate, path->buf, path->len, ignore_case);
- enum path_treatment path_treatment;
-
- dtype = resolve_dtype(dtype, istate, path->buf, path->len);
-
- /* Always exclude indexed files */
- if (dtype != DT_DIR && has_path_in_index)
- return path_none;
-
- /*
- * When we are looking at a directory P in the working tree,
- * there are three cases:
- *
- * (1) P exists in the index. Everything inside the directory P in
- * the working tree needs to go when P is checked out from the
- * index.
- *
- * (2) P does not exist in the index, but there is P/Q in the index.
- * We know P will stay a directory when we check out the contents
- * of the index, but we do not know yet if there is a directory
- * P/Q in the working tree to be killed, so we need to recurse.
- *
- * (3) P does not exist in the index, and there is no P/Q in the index
- * to require P to be a directory, either. Only in this case, we
- * know that everything inside P will not be killed without
- * recursing.
- */
- if ((dir->flags & DIR_COLLECT_KILLED_ONLY) &&
- (dtype == DT_DIR) &&
- !has_path_in_index &&
- (directory_exists_in_index(istate, path->buf, path->len) == index_nonexistent))
- return path_none;
-
- exclude = is_excluded(dir, istate, path->buf, &dtype);
-
- /*
- * Excluded? If we don't explicitly want to show
- * ignored files, ignore it
- */
- if (exclude && !(dir->flags & (DIR_SHOW_IGNORED|DIR_SHOW_IGNORED_TOO)))
- return path_excluded;
-
- switch (dtype) {
- default:
- return path_none;
- case DT_DIR:
- strbuf_addch(path, '/');
- path_treatment = treat_directory(dir, istate, untracked,
- path->buf, path->len,
- baselen, exclude, pathspec);
- /*
- * If 1) we only want to return directories that
- * match an exclude pattern and 2) this directory does
- * not match an exclude pattern but all of its
- * contents are excluded, then indicate that we should
- * recurse into this directory (instead of marking the
- * directory itself as an ignored path).
- */
- if (!exclude &&
- path_treatment == path_excluded &&
- (dir->flags & DIR_SHOW_IGNORED_TOO) &&
- (dir->flags & DIR_SHOW_IGNORED_TOO_MODE_MATCHING))
- return path_recurse;
- return path_treatment;
- case DT_REG:
- case DT_LNK:
- return exclude ? path_excluded : path_untracked;
- }
-}
-
static enum path_treatment treat_path_fast(struct dir_struct *dir,
struct untracked_cache_dir *untracked,
struct cached_dir *cdir,
@@ -2021,6 +2081,11 @@ static enum path_treatment treat_path_fast(struct dir_struct *dir,
int baselen,
const struct pathspec *pathspec)
{
+ /*
+ * WARNING: From this function, you can return path_recurse or you
+ * can call read_directory_recursive() (or neither), but
+ * you CAN'T DO BOTH.
+ */
strbuf_setlen(path, baselen);
if (!cdir->ucd) {
strbuf_addstr(path, cdir->file);
@@ -2054,6 +2119,8 @@ static enum path_treatment treat_path(struct dir_struct *dir,
int baselen,
const struct pathspec *pathspec)
{
+ int has_path_in_index, dtype, excluded;
+
if (!cdir->d_name)
return treat_path_fast(dir, untracked, cdir, istate, path,
baselen, pathspec);
@@ -2064,8 +2131,72 @@ static enum path_treatment treat_path(struct dir_struct *dir,
if (simplify_away(path->buf, path->len, pathspec))
return path_none;
- return treat_one_path(dir, untracked, istate, path, baselen, pathspec,
- cdir->d_type);
+ dtype = resolve_dtype(cdir->d_type, istate, path->buf, path->len);
+
+ /* Always exclude indexed files */
+ has_path_in_index = !!index_file_exists(istate, path->buf, path->len,
+ ignore_case);
+ if (dtype != DT_DIR && has_path_in_index)
+ return path_none;
+
+ /*
+ * When we are looking at a directory P in the working tree,
+ * there are three cases:
+ *
+ * (1) P exists in the index. Everything inside the directory P in
+ * the working tree needs to go when P is checked out from the
+ * index.
+ *
+ * (2) P does not exist in the index, but there is P/Q in the index.
+ * We know P will stay a directory when we check out the contents
+ * of the index, but we do not know yet if there is a directory
+ * P/Q in the working tree to be killed, so we need to recurse.
+ *
+ * (3) P does not exist in the index, and there is no P/Q in the index
+ * to require P to be a directory, either. Only in this case, we
+ * know that everything inside P will not be killed without
+ * recursing.
+ */
+ if ((dir->flags & DIR_COLLECT_KILLED_ONLY) &&
+ (dtype == DT_DIR) &&
+ !has_path_in_index &&
+ (directory_exists_in_index(istate, path->buf, path->len) == index_nonexistent))
+ return path_none;
+
+ excluded = is_excluded(dir, istate, path->buf, &dtype);
+
+ /*
+ * Excluded? If we don't explicitly want to show
+ * ignored files, ignore it
+ */
+ if (excluded && !(dir->flags & (DIR_SHOW_IGNORED|DIR_SHOW_IGNORED_TOO)))
+ return path_excluded;
+
+ switch (dtype) {
+ default:
+ return path_none;
+ case DT_DIR:
+ /*
+ * WARNING: Do not ignore/amend the return value from
+ * treat_directory(), and especially do not change it to return
+ * path_recurse as that can cause exponential slowdown.
+ * Instead, modify treat_directory() to return the right value.
+ */
+ strbuf_addch(path, '/');
+ return treat_directory(dir, istate, untracked,
+ path->buf, path->len,
+ baselen, excluded, pathspec);
+ case DT_REG:
+ case DT_LNK:
+ if (excluded)
+ return path_excluded;
+ if (pathspec &&
+ !do_match_pathspec(istate, pathspec, path->buf, path->len,
+ 0 /* prefix */, NULL /* seen */,
+ 0 /* flags */))
+ return path_none;
+ return path_untracked;
+ }
}
static void add_untracked(struct untracked_cache_dir *dir, const char *name)
@@ -2245,7 +2376,7 @@ static void add_path_to_appropriate_result_list(struct dir_struct *dir,
* If 'stop_at_first_file' is specified, 'path_excluded' is returned
* to signal that a file was found. This is the least significant value that
* indicates that a file was encountered that does not depend on the order of
- * whether an untracked or exluded path was encountered first.
+ * whether an untracked or excluded path was encountered first.
*
* Returns the most significant path_treatment value encountered in the scan.
* If 'stop_at_first_file' is specified, `path_excluded` is the most
@@ -2258,14 +2389,10 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
int stop_at_first_file, const struct pathspec *pathspec)
{
/*
- * WARNING WARNING WARNING:
- *
- * Any updates to the traversal logic here may need corresponding
- * updates in treat_leading_path(). See the commit message for the
- * commit adding this warning as well as the commit preceding it
- * for details.
+ * WARNING: Do NOT recurse unless path_recurse is returned from
+ * treat_path(). Recursing on any other return value
+ * can result in exponential slowdown.
*/
-
struct cached_dir cdir;
enum path_treatment state, subdir_state, dir_state = path_none;
struct strbuf path = STRBUF_INIT;
@@ -2287,13 +2414,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
dir_state = state;
/* recurse into subdir if instructed by treat_path */
- if ((state == path_recurse) ||
- ((state == path_untracked) &&
- (resolve_dtype(cdir.d_type, istate, path.buf, path.len) == DT_DIR) &&
- ((dir->flags & DIR_SHOW_IGNORED_TOO) ||
- (pathspec &&
- do_match_pathspec(istate, pathspec, path.buf, path.len,
- baselen, NULL, DO_MATCH_LEADING_PATHSPEC) == MATCHED_RECURSIVELY_LEADING_PATHSPEC)))) {
+ if (state == path_recurse) {
struct untracked_cache_dir *ud;
ud = lookup_untracked(dir->untracked, untracked,
path.buf + baselen,
@@ -2341,7 +2462,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
add_untracked(untracked, path.buf + baselen);
break;
}
- /* skip the dir_add_* part */
+ /* skip the add_path_to_appropriate_result_list() */
continue;
}
@@ -2377,15 +2498,6 @@ static int treat_leading_path(struct dir_struct *dir,
const char *path, int len,
const struct pathspec *pathspec)
{
- /*
- * WARNING WARNING WARNING:
- *
- * Any updates to the traversal logic here may need corresponding
- * updates in read_directory_recursive(). See 777b420347 (dir:
- * synchronize treat_leading_path() and read_directory_recursive(),
- * 2019-12-19) and its parent commit for details.
- */
-
struct strbuf sb = STRBUF_INIT;
struct strbuf subdir = STRBUF_INIT;
int prevlen, baselen;
@@ -2436,23 +2548,7 @@ static int treat_leading_path(struct dir_struct *dir,
strbuf_reset(&subdir);
strbuf_add(&subdir, path+prevlen, baselen-prevlen);
cdir.d_name = subdir.buf;
- state = treat_path(dir, NULL, &cdir, istate, &sb, prevlen,
- pathspec);
- if (state == path_untracked &&
- resolve_dtype(cdir.d_type, istate, sb.buf, sb.len) == DT_DIR &&
- (dir->flags & DIR_SHOW_IGNORED_TOO ||
- do_match_pathspec(istate, pathspec, sb.buf, sb.len,
- baselen, NULL, DO_MATCH_LEADING_PATHSPEC) == MATCHED_RECURSIVELY_LEADING_PATHSPEC)) {
- if (!match_pathspec(istate, pathspec, sb.buf, sb.len,
- 0 /* prefix */, NULL,
- 0 /* do NOT special case dirs */))
- state = path_none;
- add_path_to_appropriate_result_list(dir, NULL, &cdir,
- istate,
- &sb, baselen,
- pathspec, state);
- state = path_recurse;
- }
+ state = treat_path(dir, NULL, &cdir, istate, &sb, prevlen, pathspec);
if (state != path_recurse)
break; /* do not recurse into it */
@@ -2652,28 +2748,6 @@ int read_directory(struct dir_struct *dir, struct index_state *istate,
QSORT(dir->entries, dir->nr, cmp_dir_entry);
QSORT(dir->ignored, dir->ignored_nr, cmp_dir_entry);
- /*
- * If DIR_SHOW_IGNORED_TOO is set, read_directory_recursive() will
- * also pick up untracked contents of untracked dirs; by default
- * we discard these, but given DIR_KEEP_UNTRACKED_CONTENTS we do not.
- */
- if ((dir->flags & DIR_SHOW_IGNORED_TOO) &&
- !(dir->flags & DIR_KEEP_UNTRACKED_CONTENTS)) {
- int i, j;
-
- /* remove from dir->entries untracked contents of untracked dirs */
- for (i = j = 0; j < dir->nr; j++) {
- if (i &&
- check_dir_entry_contains(dir->entries[i - 1], dir->entries[j])) {
- FREE_AND_NULL(dir->entries[j]);
- } else {
- dir->entries[i++] = dir->entries[j];
- }
- }
-
- dir->nr = i;
- }
-
trace_performance_leave("read directory %.*s", len, path);
if (dir->untracked) {
static int force_untracked_cache = -1;