From d11b8d342529a8fe2164ceb563ad9213902d3533 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 20 May 2009 11:04:35 -0700 Subject: write-tree --ignore-cache-tree This allows you to discard the cache-tree information before writing the tree out of the index (i.e. it always recomputes the tree object names for all the subtrees). This is only useful as a debug option, so I did not bother documenting it. Signed-off-by: Junio C Hamano diff --git a/builtin-write-tree.c b/builtin-write-tree.c index 9d64050..3a24ce8 100644 --- a/builtin-write-tree.c +++ b/builtin-write-tree.c @@ -13,7 +13,7 @@ static const char write_tree_usage[] = int cmd_write_tree(int argc, const char **argv, const char *unused_prefix) { - int missing_ok = 0, ret; + int flags = 0, ret; const char *prefix = NULL; unsigned char sha1[20]; const char *me = "git-write-tree"; @@ -22,9 +22,15 @@ int cmd_write_tree(int argc, const char **argv, const char *unused_prefix) while (1 < argc) { const char *arg = argv[1]; if (!strcmp(arg, "--missing-ok")) - missing_ok = 1; + flags |= WRITE_TREE_MISSING_OK; else if (!prefixcmp(arg, "--prefix=")) prefix = arg + 9; + else if (!prefixcmp(arg, "--ignore-cache-tree")) + /* + * This is only useful for debugging, so I + * do not bother documenting it. + */ + flags |= WRITE_TREE_IGNORE_CACHE_TREE; else usage(write_tree_usage); argc--; argv++; @@ -33,7 +39,7 @@ int cmd_write_tree(int argc, const char **argv, const char *unused_prefix) if (argc > 2) die("too many options"); - ret = write_cache_as_tree(sha1, missing_ok, prefix); + ret = write_cache_as_tree(sha1, flags, prefix); switch (ret) { case 0: printf("%s\n", sha1_to_hex(sha1)); diff --git a/cache-tree.c b/cache-tree.c index 37bf35e..6dd8411 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -538,28 +538,32 @@ static struct cache_tree *cache_tree_find(struct cache_tree *it, const char *pat return it; } -int write_cache_as_tree(unsigned char *sha1, int missing_ok, const char *prefix) +int write_cache_as_tree(unsigned char *sha1, int flags, const char *prefix) { int entries, was_valid, newfd; + struct lock_file *lock_file; /* * We can't free this memory, it becomes part of a linked list * parsed atexit() */ - struct lock_file *lock_file = xcalloc(1, sizeof(struct lock_file)); + lock_file = xcalloc(1, sizeof(struct lock_file)); newfd = hold_locked_index(lock_file, 1); entries = read_cache(); if (entries < 0) return WRITE_TREE_UNREADABLE_INDEX; + if (flags & WRITE_TREE_IGNORE_CACHE_TREE) + cache_tree_free(&(active_cache_tree)); if (!active_cache_tree) active_cache_tree = cache_tree(); was_valid = cache_tree_fully_valid(active_cache_tree); - if (!was_valid) { + int missing_ok = flags & WRITE_TREE_MISSING_OK; + if (cache_tree_update(active_cache_tree, active_cache, active_nr, missing_ok, 0) < 0) diff --git a/cache-tree.h b/cache-tree.h index e958835..eadcad8 100644 --- a/cache-tree.h +++ b/cache-tree.h @@ -30,11 +30,16 @@ struct cache_tree *cache_tree_read(const char *buffer, unsigned long size); int cache_tree_fully_valid(struct cache_tree *); int cache_tree_update(struct cache_tree *, struct cache_entry **, int, int, int); +/* bitmasks to write_cache_as_tree flags */ +#define WRITE_TREE_MISSING_OK 1 +#define WRITE_TREE_IGNORE_CACHE_TREE 2 + +/* error return codes */ #define WRITE_TREE_UNREADABLE_INDEX (-1) #define WRITE_TREE_UNMERGED_INDEX (-2) #define WRITE_TREE_PREFIX_ERROR (-3) -int write_cache_as_tree(unsigned char *sha1, int missing_ok, const char *prefix); +int write_cache_as_tree(unsigned char *sha1, int flags, const char *prefix); void prime_cache_tree(struct cache_tree **, struct tree *); #endif -- cgit v0.10.2-6-g49f6 From b87fc96476c4218de044cf77b4c6d10d49aee78a Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 20 May 2009 15:53:57 -0700 Subject: cache-tree.c::cache_tree_find(): simplify internal API Earlier cache_tree_find() needs to be called with a valid cache_tree, but repeated look-up may find an invalid or missing cache_tree in between. Help simplify the callers by returning NULL to mean "nothing appropriate found" when the input is NULL. Signed-off-by: Junio C Hamano diff --git a/cache-tree.c b/cache-tree.c index 6dd8411..5481e43 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -514,6 +514,8 @@ struct cache_tree *cache_tree_read(const char *buffer, unsigned long size) static struct cache_tree *cache_tree_find(struct cache_tree *it, const char *path) { + if (!it) + return NULL; while (*path) { const char *slash; struct cache_tree_sub *sub; -- cgit v0.10.2-6-g49f6 From aceae2ef9261718324bf1853401d5ee9686a54a8 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 22 May 2009 23:06:58 -0700 Subject: t4007: modernize the style This is one of the oldest scripts; update it to match more modern style. Notably, we should: - Put the test title on the same line as the "test_expect_success", and end the line with a single-quote to begin the body of the test which is one multi-line string; and - Run as many commands inside test_expect_success, not outside, to catch unexpected breakages. Signed-off-by: Junio C Hamano diff --git a/t/t4007-rename-3.sh b/t/t4007-rename-3.sh index 42072d7..25e7a83 100755 --- a/t/t4007-rename-3.sh +++ b/t/t4007-rename-3.sh @@ -9,32 +9,31 @@ test_description='Rename interaction with pathspec. . ./test-lib.sh . "$TEST_DIRECTORY"/diff-lib.sh ;# test-lib chdir's into trash -test_expect_success \ - 'prepare reference tree' \ - 'mkdir path0 path1 && - cp "$TEST_DIRECTORY"/../COPYING path0/COPYING && - git update-index --add path0/COPYING && - tree=$(git write-tree) && - echo $tree' - -test_expect_success \ - 'prepare work tree' \ - 'cp path0/COPYING path1/COPYING && - git update-index --add --remove path0/COPYING path1/COPYING' +test_expect_success 'prepare reference tree' ' + mkdir path0 path1 && + cp "$TEST_DIRECTORY"/../COPYING path0/COPYING && + git update-index --add path0/COPYING && + tree=$(git write-tree) && + echo $tree +' + +test_expect_success 'prepare work tree' ' + cp path0/COPYING path1/COPYING && + git update-index --add --remove path0/COPYING path1/COPYING +' # In the tree, there is only path0/COPYING. In the cache, path0 and # path1 both have COPYING and the latter is a copy of path0/COPYING. # Comparing the full tree with cache should tell us so. -git diff-index -C --find-copies-harder $tree >current - cat >expected <<\EOF :100644 100644 6ff87c4664981e4397625791c8ea3bbb5f2279a3 6ff87c4664981e4397625791c8ea3bbb5f2279a3 C100 path0/COPYING path1/COPYING EOF -test_expect_success \ - 'validate the result (#1)' \ - 'compare_diff_raw current expected' +test_expect_success 'copy detection' ' + git diff-index -C --find-copies-harder $tree >current && + compare_diff_raw current expected +' # In the tree, there is only path0/COPYING. In the cache, path0 and # path1 both have COPYING and the latter is a copy of path0/COPYING. @@ -42,49 +41,45 @@ test_expect_success \ # path1/COPYING suddenly appearing from nowhere, not detected as # a copy from path0/COPYING. -git diff-index -C $tree path1 >current - cat >expected <<\EOF :000000 100644 0000000000000000000000000000000000000000 6ff87c4664981e4397625791c8ea3bbb5f2279a3 A path1/COPYING EOF -test_expect_success \ - 'validate the result (#2)' \ - 'compare_diff_raw current expected' - -test_expect_success \ - 'tweak work tree' \ - 'rm -f path0/COPYING && - git update-index --remove path0/COPYING' +test_expect_success 'copy, limited to a subtree' ' + git diff-index -C --find-copies-harder $tree path1 >current && + compare_diff_raw current expected +' +test_expect_success 'tweak work tree' ' + rm -f path0/COPYING && + git update-index --remove path0/COPYING +' # In the tree, there is only path0/COPYING. In the cache, path0 does # not have COPYING anymore and path1 has COPYING which is a copy of # path0/COPYING. Showing the full tree with cache should tell us about # the rename. -git diff-index -C $tree >current - cat >expected <<\EOF :100644 100644 6ff87c4664981e4397625791c8ea3bbb5f2279a3 6ff87c4664981e4397625791c8ea3bbb5f2279a3 R100 path0/COPYING path1/COPYING EOF -test_expect_success \ - 'validate the result (#3)' \ - 'compare_diff_raw current expected' +test_expect_success 'rename detection' ' + git diff-index -C --find-copies-harder $tree >current && + compare_diff_raw current expected +' # In the tree, there is only path0/COPYING. In the cache, path0 does # not have COPYING anymore and path1 has COPYING which is a copy of # path0/COPYING. When we say we care only about path1, we should just # see path1/COPYING appearing from nowhere. -git diff-index -C $tree path1 >current - cat >expected <<\EOF :000000 100644 0000000000000000000000000000000000000000 6ff87c4664981e4397625791c8ea3bbb5f2279a3 A path1/COPYING EOF -test_expect_success \ - 'validate the result (#4)' \ - 'compare_diff_raw current expected' +test_expect_success 'rename, limited to a subtree' ' + git diff-index -C --find-copies-harder $tree path1 >current && + compare_diff_raw current expected +' test_done -- cgit v0.10.2-6-g49f6 From b65982b60876c8f5f4d3b2898d5174f4812552b1 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 20 May 2009 15:57:22 -0700 Subject: Optimize "diff-index --cached" using cache-tree When running "diff-index --cached" after making a change to only a small portion of the index, there is no point unpacking unchanged subtrees into the index recursively, only to find that all entries match anyway. Tweak unpack_trees() logic that is used to read in the tree object to catch the case where the tree entry we are looking at matches the index as a whole by looking at the cache-tree. As an exercise, after modifying a few paths in the kernel tree, here are a few numbers on my Athlon 64X2 3800+: (without patch, hot cache) $ /usr/bin/time git diff --cached --raw :100644 100644 b57e1f5... e69de29... M Makefile :100644 000000 8c86b72... 0000000... D arch/x86/Makefile :000000 100644 0000000... e69de29... A arche 0.07user 0.02system 0:00.09elapsed 102%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+9407minor)pagefaults 0swaps (with patch, hot cache) $ /usr/bin/time ../git.git/git-diff --cached --raw :100644 100644 b57e1f5... e69de29... M Makefile :100644 000000 8c86b72... 0000000... D arch/x86/Makefile :000000 100644 0000000... e69de29... A arche 0.02user 0.00system 0:00.02elapsed 103%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+2446minor)pagefaults 0swaps Cold cache numbers are very impressive, but it does not matter very much in practice: (without patch, cold cache) $ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches' $ /usr/bin/time git diff --cached --raw :100644 100644 b57e1f5... e69de29... M Makefile :100644 000000 8c86b72... 0000000... D arch/x86/Makefile :000000 100644 0000000... e69de29... A arche 0.06user 0.17system 0:10.26elapsed 2%CPU (0avgtext+0avgdata 0maxresident)k 247032inputs+0outputs (1172major+8237minor)pagefaults 0swaps (with patch, cold cache) $ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches' $ /usr/bin/time ../git.git/git-diff --cached --raw :100644 100644 b57e1f5... e69de29... M Makefile :100644 000000 8c86b72... 0000000... D arch/x86/Makefile :000000 100644 0000000... e69de29... A arche 0.02user 0.01system 0:01.01elapsed 3%CPU (0avgtext+0avgdata 0maxresident)k 18440inputs+0outputs (79major+2369minor)pagefaults 0swaps This of course helps "git status" as well. (without patch, hot cache) $ /usr/bin/time ../git.git/git-status >/dev/null 0.17user 0.18system 0:00.35elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+5336outputs (0major+10970minor)pagefaults 0swaps (with patch, hot cache) $ /usr/bin/time ../git.git/git-status >/dev/null 0.10user 0.16system 0:00.27elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+5336outputs (0major+3921minor)pagefaults 0swaps Signed-off-by: Junio C Hamano diff --git a/cache-tree.c b/cache-tree.c index 5481e43..16a65df 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -631,3 +631,35 @@ void prime_cache_tree(struct cache_tree **it, struct tree *tree) *it = cache_tree(); prime_cache_tree_rec(*it, tree); } + +/* + * find the cache_tree that corresponds to the current level without + * exploding the full path into textual form. The root of the + * cache tree is given as "root", and our current level is "info". + * (1) When at root level, info->prev is NULL, so it is "root" itself. + * (2) Otherwise, find the cache_tree that corresponds to one level + * above us, and find ourselves in there. + */ +static struct cache_tree *find_cache_tree_from_traversal(struct cache_tree *root, + struct traverse_info *info) +{ + struct cache_tree *our_parent; + + if (!info->prev) + return root; + our_parent = find_cache_tree_from_traversal(root, info->prev); + return cache_tree_find(our_parent, info->name.path); +} + +int cache_tree_matches_traversal(struct cache_tree *root, + struct name_entry *ent, + struct traverse_info *info) +{ + struct cache_tree *it; + + it = find_cache_tree_from_traversal(root, info); + it = cache_tree_find(it, ent->path); + if (it && it->entry_count > 0 && !hashcmp(ent->sha1, it->sha1)) + return it->entry_count; + return 0; +} diff --git a/cache-tree.h b/cache-tree.h index eadcad8..3df641f 100644 --- a/cache-tree.h +++ b/cache-tree.h @@ -2,6 +2,7 @@ #define CACHE_TREE_H #include "tree.h" +#include "tree-walk.h" struct cache_tree; struct cache_tree_sub { @@ -42,4 +43,6 @@ int cache_tree_update(struct cache_tree *, struct cache_entry **, int, int, int) int write_cache_as_tree(unsigned char *sha1, int flags, const char *prefix); void prime_cache_tree(struct cache_tree **, struct tree *); +extern int cache_tree_matches_traversal(struct cache_tree *, struct name_entry *ent, struct traverse_info *info); + #endif diff --git a/diff-lib.c b/diff-lib.c index a310fb2..1cb97af 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -446,6 +446,7 @@ int run_diff_index(struct rev_info *revs, int cached) memset(&opts, 0, sizeof(opts)); opts.head_idx = 1; opts.index_only = cached; + opts.diff_index_cached = cached; opts.merge = 1; opts.fn = oneway_diff; opts.unpack_data = revs; @@ -502,6 +503,7 @@ int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt) memset(&opts, 0, sizeof(opts)); opts.head_idx = 1; opts.index_only = 1; + opts.diff_index_cached = 1; opts.merge = 1; opts.fn = oneway_diff; opts.unpack_data = &revs; diff --git a/unpack-trees.c b/unpack-trees.c index aaacaf1..8eb3ddb 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -326,6 +326,23 @@ static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, str if (src[0]) conflicts |= 1; } + + /* special case: "diff-index --cached" looking at a tree */ + if (o->diff_index_cached && + n == 1 && dirmask == 1 && S_ISDIR(names->mode)) { + int matches; + matches = cache_tree_matches_traversal(o->src_index->cache_tree, + names, info); + /* + * Everything under the name matches. Adjust o->pos to + * skip the entire hierarchy. + */ + if (matches) { + o->pos += matches; + return mask; + } + } + if (traverse_trees_recursive(n, dirmask, conflicts, names, info) < 0) return -1; diff --git a/unpack-trees.h b/unpack-trees.h index 0d26f3d..1e0e232 100644 --- a/unpack-trees.h +++ b/unpack-trees.h @@ -27,6 +27,7 @@ struct unpack_trees_options { aggressive:1, skip_unmerged:1, initial_checkout:1, + diff_index_cached:1, gently:1; const char *prefix; int pos; -- cgit v0.10.2-6-g49f6 From a0919ced8a5efe938cf97c74a0f851cbbe00aaf6 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 22 May 2009 23:14:25 -0700 Subject: Avoid "diff-index --cached" optimization under --find-copies-harder When find-copies-harder is in effect, the diff frontends are expected to feed all paths, not just changed paths, to the diffcore, so that copy sources can be picked up. In such a case, not descending into subtrees using the cache-tree information is simply wrong. Signed-off-by: Junio C Hamano diff --git a/diff-lib.c b/diff-lib.c index 1cb97af..ae75eac 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -446,7 +446,8 @@ int run_diff_index(struct rev_info *revs, int cached) memset(&opts, 0, sizeof(opts)); opts.head_idx = 1; opts.index_only = cached; - opts.diff_index_cached = cached; + opts.diff_index_cached = (cached && + !DIFF_OPT_TST(&revs->diffopt, FIND_COPIES_HARDER)); opts.merge = 1; opts.fn = oneway_diff; opts.unpack_data = revs; @@ -503,7 +504,7 @@ int do_diff_cache(const unsigned char *tree_sha1, struct diff_options *opt) memset(&opts, 0, sizeof(opts)); opts.head_idx = 1; opts.index_only = 1; - opts.diff_index_cached = 1; + opts.diff_index_cached = !DIFF_OPT_TST(opt, FIND_COPIES_HARDER); opts.merge = 1; opts.fn = oneway_diff; opts.unpack_data = &revs; diff --git a/t/t4007-rename-3.sh b/t/t4007-rename-3.sh index 25e7a83..11502b7 100755 --- a/t/t4007-rename-3.sh +++ b/t/t4007-rename-3.sh @@ -35,6 +35,11 @@ test_expect_success 'copy detection' ' compare_diff_raw current expected ' +test_expect_success 'copy detection, cached' ' + git diff-index -C --find-copies-harder --cached $tree >current && + compare_diff_raw current expected +' + # In the tree, there is only path0/COPYING. In the cache, path0 and # path1 both have COPYING and the latter is a copy of path0/COPYING. # However when we say we care only about path1, we should just see -- cgit v0.10.2-6-g49f6