From b3487ccc0bffc28e134333b164d0d84fdd15d9f4 Mon Sep 17 00:00:00 2001 From: Samuel Lijin Date: Thu, 18 May 2017 04:21:49 -0400 Subject: t7300: clean -d should skip dirs with ignored files If git sees a directory which contains only untracked and ignored files, clean -d should not remove that directory. It was recently discovered that this is *not* true of git clean -d, and it's possible that this has never worked correctly; this test and its accompanying patch series aims to fix that. Signed-off-by: Samuel Lijin Signed-off-by: Junio C Hamano diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh index b89fd2a..3a2d709 100755 --- a/t/t7300-clean.sh +++ b/t/t7300-clean.sh @@ -653,4 +653,20 @@ test_expect_success 'git clean -d respects pathspecs (pathspec is prefix of dir) test_path_is_dir foobar ' +test_expect_failure 'git clean -d skips untracked dirs containing ignored files' ' + echo /foo/bar >.gitignore && + echo ignoreme >>.gitignore && + rm -rf foo && + mkdir -p foo/a/aa/aaa foo/b/bb/bbb && + touch foo/bar foo/baz foo/a/aa/ignoreme foo/b/ignoreme foo/b/bb/1 foo/b/bb/2 && + git clean -df && + test_path_is_dir foo && + test_path_is_file foo/bar && + test_path_is_missing foo/baz && + test_path_is_file foo/a/aa/ignoreme && + test_path_is_missing foo/a/aa/aaa && + test_path_is_file foo/b/ignoreme && + test_path_is_missing foo/b/bb +' + test_done -- cgit v0.10.2-6-g49f6 From 0a81d4a55917514091727696a09ac649d03b57ff Mon Sep 17 00:00:00 2001 From: Samuel Lijin Date: Thu, 18 May 2017 04:21:50 -0400 Subject: t7061: status --ignored should search untracked dirs Per eb8c5b87, `status --ignored` by design does not list ignored files if they are in a directory which contains only ignored and untracked files (which is itself considered to be untracked) without `-uall`. This does not make sense for `--ignored`, which claims to "Show ignored files as well." Thus we revisit eb8c5b87 and decide that for such directories, `status --ignored` will list the directory as untracked *and* list all ignored files within said directory even without `-uall`. Signed-off-by: Samuel Lijin Signed-off-by: Junio C Hamano diff --git a/t/t7061-wtstatus-ignore.sh b/t/t7061-wtstatus-ignore.sh index cdc0747..15e7592 100755 --- a/t/t7061-wtstatus-ignore.sh +++ b/t/t7061-wtstatus-ignore.sh @@ -9,9 +9,10 @@ cat >expected <<\EOF ?? actual ?? expected ?? untracked/ +!! untracked/ignored EOF -test_expect_success 'status untracked directory with --ignored' ' +test_expect_failure 'status untracked directory with --ignored' ' echo "ignored" >.gitignore && mkdir untracked && : >untracked/ignored && @@ -20,7 +21,7 @@ test_expect_success 'status untracked directory with --ignored' ' test_cmp expected actual ' -test_expect_success 'same with gitignore starting with BOM' ' +test_expect_failure 'same with gitignore starting with BOM' ' printf "\357\273\277ignored\n" >.gitignore && mkdir -p untracked && : >untracked/ignored && -- cgit v0.10.2-6-g49f6 From df5bcdf83aeb94718602ebc8c0f597166bb493f1 Mon Sep 17 00:00:00 2001 From: Samuel Lijin Date: Thu, 18 May 2017 04:21:51 -0400 Subject: dir: recurse into untracked dirs for ignored files We consider directories containing only untracked and ignored files to be themselves untracked, which in the usual case means we don't have to search these directories. This is problematic when we want to collect ignored files with DIR_SHOW_IGNORED_TOO, though, so we teach read_directory_recursive() to recurse into untracked directories to find the ignored files they contain when DIR_SHOW_IGNORED_TOO is set. This has the side effect of also collecting all untracked files in untracked directories as well. Signed-off-by: Samuel Lijin Signed-off-by: Junio C Hamano diff --git a/dir.c b/dir.c index aeeb5ce..6b4eeef 100644 --- a/dir.c +++ b/dir.c @@ -1747,7 +1747,10 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, dir_state = state; /* recurse into subdir if instructed by treat_path */ - if (state == path_recurse) { + if ((state == path_recurse) || + ((state == path_untracked) && + (dir->flags & DIR_SHOW_IGNORED_TOO) && + (get_dtype(cdir.de, path.buf, path.len) == DT_DIR))) { struct untracked_cache_dir *ud; ud = lookup_untracked(dir->untracked, untracked, path.buf + baselen, -- cgit v0.10.2-6-g49f6 From fb898888491b83c9a3396fb559032ca78807a0c0 Mon Sep 17 00:00:00 2001 From: Samuel Lijin Date: Thu, 18 May 2017 04:21:52 -0400 Subject: dir: hide untracked contents of untracked dirs When we taught read_directory_recursive() to recurse into untracked directories in search of ignored files given DIR_SHOW_IGNORED_TOO, that had the side effect of teaching it to collect the untracked contents of untracked directories. It doesn't always make sense to return these, though (we do need them for `clean -d`), so we introduce a flag (DIR_KEEP_UNTRACKED_CONTENTS) to control whether or not read_directory() strips dir->entries of the untracked contents of untracked dirs. We also introduce check_contains() to check if one dir_entry corresponds to a path which contains the path corresponding to another dir_entry. This also fixes known breakages in t7061, since status --ignored now searches untracked directories for ignored files. Signed-off-by: Samuel Lijin Signed-off-by: Junio C Hamano diff --git a/Documentation/technical/api-directory-listing.txt b/Documentation/technical/api-directory-listing.txt index 7f8e78d..6c77b49 100644 --- a/Documentation/technical/api-directory-listing.txt +++ b/Documentation/technical/api-directory-listing.txt @@ -33,6 +33,12 @@ The notable options are: Similar to `DIR_SHOW_IGNORED`, but return ignored files in `ignored[]` in addition to untracked files in `entries[]`. +`DIR_KEEP_UNTRACKED_CONTENTS`::: + + Only has meaning if `DIR_SHOW_IGNORED_TOO` is also set; if this is set, the + untracked contents of untracked directories are also returned in + `entries[]`. + `DIR_COLLECT_IGNORED`::: Special mode for git-add. Return ignored files in `ignored[]` and diff --git a/dir.c b/dir.c index 6b4eeef..61561ea 100644 --- a/dir.c +++ b/dir.c @@ -1813,6 +1813,14 @@ static int cmp_name(const void *p1, const void *p2) return name_compare(e1->name, e1->len, e2->name, e2->len); } +/* check if *out lexically strictly contains *in */ +static int check_contains(const struct dir_entry *out, const struct dir_entry *in) +{ + return (out->len < in->len) && + (out->name[out->len - 1] == '/') && + !memcmp(out->name, in->name, out->len); +} + static int treat_leading_path(struct dir_struct *dir, const char *path, int len, const struct pathspec *pathspec) @@ -2028,6 +2036,29 @@ int read_directory(struct dir_struct *dir, const char *path, read_directory_recursive(dir, path, len, untracked, 0, pathspec); QSORT(dir->entries, dir->nr, cmp_name); QSORT(dir->ignored, dir->ignored_nr, cmp_name); + + /* + * If DIR_SHOW_IGNORED_TOO is set, read_directory_recursive() will + * also pick up untracked contents of untracked dirs; by default + * we discard these, but given DIR_KEEP_UNTRACKED_CONTENTS we do not. + */ + if ((dir->flags & DIR_SHOW_IGNORED_TOO) && + !(dir->flags & DIR_KEEP_UNTRACKED_CONTENTS)) { + int i, j; + + /* remove from dir->entries untracked contents of untracked dirs */ + for (i = j = 0; j < dir->nr; j++) { + if (i && check_contains(dir->entries[i - 1], dir->entries[j])) { + free(dir->entries[j]); + dir->entries[j] = NULL; + } else { + dir->entries[i++] = dir->entries[j]; + } + } + + dir->nr = i; + } + if (dir->untracked) { static struct trace_key trace_untracked_stats = TRACE_KEY_INIT(UNTRACKED_STATS); trace_printf_key(&trace_untracked_stats, diff --git a/dir.h b/dir.h index bf23a47..650e54b 100644 --- a/dir.h +++ b/dir.h @@ -151,7 +151,8 @@ struct dir_struct { DIR_NO_GITLINKS = 1<<3, DIR_COLLECT_IGNORED = 1<<4, DIR_SHOW_IGNORED_TOO = 1<<5, - DIR_COLLECT_KILLED_ONLY = 1<<6 + DIR_COLLECT_KILLED_ONLY = 1<<6, + DIR_KEEP_UNTRACKED_CONTENTS = 1<<7 } flags; struct dir_entry **entries; struct dir_entry **ignored; diff --git a/t/t7061-wtstatus-ignore.sh b/t/t7061-wtstatus-ignore.sh index 15e7592..fc6013b 100755 --- a/t/t7061-wtstatus-ignore.sh +++ b/t/t7061-wtstatus-ignore.sh @@ -12,7 +12,7 @@ cat >expected <<\EOF !! untracked/ignored EOF -test_expect_failure 'status untracked directory with --ignored' ' +test_expect_success 'status untracked directory with --ignored' ' echo "ignored" >.gitignore && mkdir untracked && : >untracked/ignored && @@ -21,7 +21,7 @@ test_expect_failure 'status untracked directory with --ignored' ' test_cmp expected actual ' -test_expect_failure 'same with gitignore starting with BOM' ' +test_expect_success 'same with gitignore starting with BOM' ' printf "\357\273\277ignored\n" >.gitignore && mkdir -p untracked && : >untracked/ignored && -- cgit v0.10.2-6-g49f6 From bbf504a9957e8a2a262619641ffa30348d71a76f Mon Sep 17 00:00:00 2001 From: Samuel Lijin Date: Thu, 18 May 2017 04:21:53 -0400 Subject: dir: expose cmp_name() and check_contains() We want to use cmp_name() and check_contains() (which both compare `struct dir_entry`s, the former in terms of the sort order, the latter in terms of whether one lexically contains another) outside of dir.c, so we have to (1) change their linkage and (2) rename them as appropriate for the global namespace. The second is achieved by renaming cmp_name() to cmp_dir_entry() and check_contains() to check_dir_entry_contains(). Signed-off-by: Samuel Lijin Signed-off-by: Junio C Hamano diff --git a/dir.c b/dir.c index 61561ea..31c6e1d 100644 --- a/dir.c +++ b/dir.c @@ -1805,7 +1805,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir, return dir_state; } -static int cmp_name(const void *p1, const void *p2) +int cmp_dir_entry(const void *p1, const void *p2) { const struct dir_entry *e1 = *(const struct dir_entry **)p1; const struct dir_entry *e2 = *(const struct dir_entry **)p2; @@ -1814,7 +1814,7 @@ static int cmp_name(const void *p1, const void *p2) } /* check if *out lexically strictly contains *in */ -static int check_contains(const struct dir_entry *out, const struct dir_entry *in) +int check_dir_entry_contains(const struct dir_entry *out, const struct dir_entry *in) { return (out->len < in->len) && (out->name[out->len - 1] == '/') && @@ -2034,8 +2034,8 @@ int read_directory(struct dir_struct *dir, const char *path, dir->untracked = NULL; if (!len || treat_leading_path(dir, path, len, pathspec)) read_directory_recursive(dir, path, len, untracked, 0, pathspec); - QSORT(dir->entries, dir->nr, cmp_name); - QSORT(dir->ignored, dir->ignored_nr, cmp_name); + QSORT(dir->entries, dir->nr, cmp_dir_entry); + QSORT(dir->ignored, dir->ignored_nr, cmp_dir_entry); /* * If DIR_SHOW_IGNORED_TOO is set, read_directory_recursive() will @@ -2048,7 +2048,8 @@ int read_directory(struct dir_struct *dir, const char *path, /* remove from dir->entries untracked contents of untracked dirs */ for (i = j = 0; j < dir->nr; j++) { - if (i && check_contains(dir->entries[i - 1], dir->entries[j])) { + if (i && + check_dir_entry_contains(dir->entries[i - 1], dir->entries[j])) { free(dir->entries[j]); dir->entries[j] = NULL; } else { diff --git a/dir.h b/dir.h index 650e54b..edb5fda 100644 --- a/dir.h +++ b/dir.h @@ -327,6 +327,9 @@ static inline int dir_path_match(const struct dir_entry *ent, has_trailing_dir); } +int cmp_dir_entry(const void *p1, const void *p2); +int check_dir_entry_contains(const struct dir_entry *out, const struct dir_entry *in); + void untracked_cache_invalidate_path(struct index_state *, const char *); void untracked_cache_remove_from_index(struct index_state *, const char *); void untracked_cache_add_to_index(struct index_state *, const char *); -- cgit v0.10.2-6-g49f6 From 6b1db43109ab3d4c92e61874cd149779c66016db Mon Sep 17 00:00:00 2001 From: Samuel Lijin Date: Tue, 23 May 2017 06:09:37 -0400 Subject: clean: teach clean -d to preserve ignored paths There is an implicit assumption that a directory containing only untracked and ignored paths should itself be considered untracked. This makes sense in use cases where we're asking if a directory should be added to the git database, but not when we're asking if a directory can be safely removed from the working tree; as a result, clean -d would assume that an "untracked" directory containing ignored paths could be deleted, even though doing so would also remove the ignored paths. To get around this, we teach clean -d to collect ignored paths and skip an untracked directory if it contained an ignored path, instead just removing the untracked contents thereof. To achieve this, cmd_clean() has to collect all untracked contents of untracked directories, in addition to all ignored paths, to determine which untracked dirs must be skipped (because they contain ignored paths) and which ones should *not* be skipped. For this purpose, correct_untracked_entries() is introduced to prune a given dir_struct of untracked entries containing ignored paths and those untracked entries encompassed by the untracked entries which are not pruned away. A memory leak is also fixed in cmd_clean(). This also fixes the known breakage in t7300, since clean -d now skips untracked directories containing ignored paths. Signed-off-by: Samuel Lijin Signed-off-by: Junio C Hamano diff --git a/builtin/clean.c b/builtin/clean.c index d6bc3aa..7272033 100644 --- a/builtin/clean.c +++ b/builtin/clean.c @@ -851,6 +851,38 @@ static void interactive_main_loop(void) } } +static void correct_untracked_entries(struct dir_struct *dir) +{ + int src, dst, ign; + + for (src = dst = ign = 0; src < dir->nr; src++) { + /* skip paths in ignored[] that cannot be inside entries[src] */ + while (ign < dir->ignored_nr && + 0 <= cmp_dir_entry(&dir->entries[src], &dir->ignored[ign])) + ign++; + + if (ign < dir->ignored_nr && + check_dir_entry_contains(dir->entries[src], dir->ignored[ign])) { + /* entries[src] contains an ignored path, so we drop it */ + free(dir->entries[src]); + } else { + struct dir_entry *ent = dir->entries[src++]; + + /* entries[src] does not contain an ignored path, so we keep it */ + dir->entries[dst++] = ent; + + /* then discard paths in entries[] contained inside entries[src] */ + while (src < dir->nr && + check_dir_entry_contains(ent, dir->entries[src])) + free(dir->entries[src++]); + + /* compensate for the outer loop's loop control */ + src--; + } + } + dir->nr = dst; +} + int cmd_clean(int argc, const char **argv, const char *prefix) { int i, res; @@ -910,6 +942,9 @@ int cmd_clean(int argc, const char **argv, const char *prefix) dir.flags |= DIR_SHOW_OTHER_DIRECTORIES; + if (remove_directories) + dir.flags |= DIR_SHOW_IGNORED_TOO | DIR_KEEP_UNTRACKED_CONTENTS; + if (read_cache() < 0) die(_("index file corrupt")); @@ -925,6 +960,7 @@ int cmd_clean(int argc, const char **argv, const char *prefix) prefix, argv); fill_directory(&dir, &pathspec); + correct_untracked_entries(&dir); for (i = 0; i < dir.nr; i++) { struct dir_entry *ent = dir.entries[i]; @@ -952,6 +988,12 @@ int cmd_clean(int argc, const char **argv, const char *prefix) string_list_append(&del_list, rel); } + for (i = 0; i < dir.nr; i++) + free(dir.entries[i]); + + for (i = 0; i < dir.ignored_nr; i++) + free(dir.ignored[i]); + if (interactive && del_list.nr > 0) interactive_main_loop(); diff --git a/t/t7300-clean.sh b/t/t7300-clean.sh index 3a2d709..7b36954 100755 --- a/t/t7300-clean.sh +++ b/t/t7300-clean.sh @@ -653,7 +653,7 @@ test_expect_success 'git clean -d respects pathspecs (pathspec is prefix of dir) test_path_is_dir foobar ' -test_expect_failure 'git clean -d skips untracked dirs containing ignored files' ' +test_expect_success 'git clean -d skips untracked dirs containing ignored files' ' echo /foo/bar >.gitignore && echo ignoreme >>.gitignore && rm -rf foo && -- cgit v0.10.2-6-g49f6