summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2019-02-07 06:05:24 (GMT)
committerJunio C Hamano <gitster@pobox.com>2019-02-07 06:05:25 (GMT)
commit5fda343321f36384892061b21dcbe1d477145d2c (patch)
tree3bc2ed0d898f0c4768529f5ecc9f5ec58dee9e49
parentd8d62e61353c7e34006cc5714f07f507256612df (diff)
parent99dbbfa8ddbba2b620965d026d4ec199b8837a6f (diff)
downloadgit-5fda343321f36384892061b21dcbe1d477145d2c.zip
git-5fda343321f36384892061b21dcbe1d477145d2c.tar.gz
git-5fda343321f36384892061b21dcbe1d477145d2c.tar.bz2
Merge branch 'ds/push-sparse-tree-walk'
"git pack-objects" learned another algorithm to compute the set of objects to send, that trades the resulting packfile off to save traversal cost to favor small pushes. * ds/push-sparse-tree-walk: pack-objects: create GIT_TEST_PACK_SPARSE pack-objects: create pack.useSparse setting revision: implement sparse algorithm list-objects: consume sparse tree walk revision: add mark_tree_uninteresting_sparse
-rw-r--r--Documentation/config/pack.txt9
-rw-r--r--Documentation/git-pack-objects.txt11
-rw-r--r--bisect.c2
-rw-r--r--builtin/pack-objects.c10
-rw-r--r--builtin/rev-list.c2
-rw-r--r--http-push.c2
-rw-r--r--list-objects.c70
-rw-r--r--list-objects.h4
-rw-r--r--revision.c143
-rw-r--r--revision.h2
-rw-r--r--t/README4
-rwxr-xr-xt/t5322-pack-objects-sparse.sh136
12 files changed, 378 insertions, 17 deletions
diff --git a/Documentation/config/pack.txt b/Documentation/config/pack.txt
index edac75c..425c73a 100644
--- a/Documentation/config/pack.txt
+++ b/Documentation/config/pack.txt
@@ -105,6 +105,15 @@ pack.useBitmaps::
true. You should not generally need to turn this off unless
you are debugging pack bitmaps.
+pack.useSparse::
+ When true, git will default to using the '--sparse' option in
+ 'git pack-objects' when the '--revs' option is present. This
+ algorithm only walks trees that appear in paths that introduce new
+ objects. This can have significant performance benefits when
+ computing a pack to send a small change. However, it is possible
+ that extra objects are added to the pack-file if the included
+ commits contain certain types of direct renames.
+
pack.writeBitmaps (deprecated)::
This is a deprecated synonym for `repack.writeBitmaps`.
diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt
index 40c825c..e45f3e6 100644
--- a/Documentation/git-pack-objects.txt
+++ b/Documentation/git-pack-objects.txt
@@ -14,7 +14,7 @@ SYNOPSIS
[--local] [--incremental] [--window=<n>] [--depth=<n>]
[--revs [--unpacked | --all]] [--keep-pack=<pack-name>]
[--stdout [--filter=<filter-spec>] | base-name]
- [--shallow] [--keep-true-parents] < object-list
+ [--shallow] [--keep-true-parents] [--sparse] < object-list
DESCRIPTION
@@ -196,6 +196,15 @@ depth is 4095.
Add --no-reuse-object if you want to force a uniform compression
level on all data no matter the source.
+--sparse::
+ Use the "sparse" algorithm to determine which objects to include in
+ the pack, when combined with the "--revs" option. This algorithm
+ only walks trees that appear in paths that introduce new objects.
+ This can have significant performance benefits when computing
+ a pack to send a small change. However, it is possible that extra
+ objects are added to the pack-file if the included commits contain
+ certain types of direct renames.
+
--thin::
Create a "thin" pack by omitting the common objects between a
sender and a receiver in order to reduce network transfer. This
diff --git a/bisect.c b/bisect.c
index 6bf5211..3af955c 100644
--- a/bisect.c
+++ b/bisect.c
@@ -658,7 +658,7 @@ static void bisect_common(struct rev_info *revs)
if (prepare_revision_walk(revs))
die("revision walk setup failed");
if (revs->tree_objects)
- mark_edges_uninteresting(revs, NULL);
+ mark_edges_uninteresting(revs, NULL, 0);
}
static void exit_if_skipped_commits(struct commit_list *tried,
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index ffef8dc..bd67491 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -84,6 +84,7 @@ static unsigned long pack_size_limit;
static int depth = 50;
static int delta_search_threads;
static int pack_to_stdout;
+static int sparse;
static int thin;
static int num_preferred_base;
static struct progress *progress_state;
@@ -2703,6 +2704,10 @@ static int git_pack_config(const char *k, const char *v, void *cb)
use_bitmap_index_default = git_config_bool(k, v);
return 0;
}
+ if (!strcmp(k, "pack.usesparse")) {
+ sparse = git_config_bool(k, v);
+ return 0;
+ }
if (!strcmp(k, "pack.threads")) {
delta_search_threads = git_config_int(k, v);
if (delta_search_threads < 0)
@@ -3130,7 +3135,7 @@ static void get_object_list(int ac, const char **av)
if (prepare_revision_walk(&revs))
die(_("revision walk setup failed"));
- mark_edges_uninteresting(&revs, show_edge);
+ mark_edges_uninteresting(&revs, show_edge, sparse);
if (!fn_show_object)
fn_show_object = show_object;
@@ -3287,6 +3292,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
{ OPTION_CALLBACK, 0, "unpack-unreachable", NULL, N_("time"),
N_("unpack unreachable objects newer than <time>"),
PARSE_OPT_OPTARG, option_parse_unpack_unreachable },
+ OPT_BOOL(0, "sparse", &sparse,
+ N_("use the sparse reachability algorithm")),
OPT_BOOL(0, "thin", &thin,
N_("create thin packs")),
OPT_BOOL(0, "shallow", &shallow,
@@ -3319,6 +3326,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
read_replace_refs = 0;
+ sparse = git_env_bool("GIT_TEST_PACK_SPARSE", 0);
reset_pack_idx_option(&pack_idx_opts);
git_config(git_pack_config, NULL);
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index 14ef659..5b5b6db 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -546,7 +546,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix)
if (prepare_revision_walk(&revs))
die("revision walk setup failed");
if (revs.tree_objects)
- mark_edges_uninteresting(&revs, show_edge);
+ mark_edges_uninteresting(&revs, show_edge, 0);
if (bisect_list) {
int reaches, all;
diff --git a/http-push.c b/http-push.c
index bb802d8..77e2e22 100644
--- a/http-push.c
+++ b/http-push.c
@@ -1933,7 +1933,7 @@ int cmd_main(int argc, const char **argv)
pushing = 0;
if (prepare_revision_walk(&revs))
die("revision walk setup failed");
- mark_edges_uninteresting(&revs, NULL);
+ mark_edges_uninteresting(&revs, NULL, 0);
objects_to_send = get_delta(&revs, ref_lock);
finish_all_active_slots();
diff --git a/list-objects.c b/list-objects.c
index a2296a8..dc77361 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -226,25 +226,73 @@ static void mark_edge_parents_uninteresting(struct commit *commit,
}
}
-void mark_edges_uninteresting(struct rev_info *revs, show_edge_fn show_edge)
+static void add_edge_parents(struct commit *commit,
+ struct rev_info *revs,
+ show_edge_fn show_edge,
+ struct oidset *set)
+{
+ struct commit_list *parents;
+
+ for (parents = commit->parents; parents; parents = parents->next) {
+ struct commit *parent = parents->item;
+ struct tree *tree = get_commit_tree(parent);
+
+ if (!tree)
+ continue;
+
+ oidset_insert(set, &tree->object.oid);
+
+ if (!(parent->object.flags & UNINTERESTING))
+ continue;
+ tree->object.flags |= UNINTERESTING;
+
+ if (revs->edge_hint && !(parent->object.flags & SHOWN)) {
+ parent->object.flags |= SHOWN;
+ show_edge(parent);
+ }
+ }
+}
+
+void mark_edges_uninteresting(struct rev_info *revs,
+ show_edge_fn show_edge,
+ int sparse)
{
struct commit_list *list;
int i;
- for (list = revs->commits; list; list = list->next) {
- struct commit *commit = list->item;
+ if (sparse) {
+ struct oidset set;
+ oidset_init(&set, 16);
- if (commit->object.flags & UNINTERESTING) {
- mark_tree_uninteresting(revs->repo,
- get_commit_tree(commit));
- if (revs->edge_hint_aggressive && !(commit->object.flags & SHOWN)) {
- commit->object.flags |= SHOWN;
- show_edge(commit);
+ for (list = revs->commits; list; list = list->next) {
+ struct commit *commit = list->item;
+ struct tree *tree = get_commit_tree(commit);
+
+ if (commit->object.flags & UNINTERESTING)
+ tree->object.flags |= UNINTERESTING;
+
+ oidset_insert(&set, &tree->object.oid);
+ add_edge_parents(commit, revs, show_edge, &set);
+ }
+
+ mark_trees_uninteresting_sparse(revs->repo, &set);
+ oidset_clear(&set);
+ } else {
+ for (list = revs->commits; list; list = list->next) {
+ struct commit *commit = list->item;
+ if (commit->object.flags & UNINTERESTING) {
+ mark_tree_uninteresting(revs->repo,
+ get_commit_tree(commit));
+ if (revs->edge_hint_aggressive && !(commit->object.flags & SHOWN)) {
+ commit->object.flags |= SHOWN;
+ show_edge(commit);
+ }
+ continue;
}
- continue;
+ mark_edge_parents_uninteresting(commit, revs, show_edge);
}
- mark_edge_parents_uninteresting(commit, revs, show_edge);
}
+
if (revs->edge_hint_aggressive) {
for (i = 0; i < revs->cmdline.nr; i++) {
struct object *obj = revs->cmdline.rev[i].item;
diff --git a/list-objects.h b/list-objects.h
index ad40762..a952680 100644
--- a/list-objects.h
+++ b/list-objects.h
@@ -10,7 +10,9 @@ typedef void (*show_object_fn)(struct object *, const char *, void *);
void traverse_commit_list(struct rev_info *, show_commit_fn, show_object_fn, void *);
typedef void (*show_edge_fn)(struct commit *);
-void mark_edges_uninteresting(struct rev_info *, show_edge_fn);
+void mark_edges_uninteresting(struct rev_info *revs,
+ show_edge_fn show_edge,
+ int sparse);
struct oidset;
struct list_objects_filter_options;
diff --git a/revision.c b/revision.c
index 5c7d3c7..162d511 100644
--- a/revision.c
+++ b/revision.c
@@ -27,6 +27,7 @@
#include "commit-reach.h"
#include "commit-graph.h"
#include "prio-queue.h"
+#include "hashmap.h"
volatile show_early_output_fn_t show_early_output;
@@ -99,6 +100,148 @@ void mark_tree_uninteresting(struct repository *r, struct tree *tree)
mark_tree_contents_uninteresting(r, tree);
}
+struct path_and_oids_entry {
+ struct hashmap_entry ent;
+ char *path;
+ struct oidset trees;
+};
+
+static int path_and_oids_cmp(const void *hashmap_cmp_fn_data,
+ const struct path_and_oids_entry *e1,
+ const struct path_and_oids_entry *e2,
+ const void *keydata)
+{
+ return strcmp(e1->path, e2->path);
+}
+
+static void paths_and_oids_init(struct hashmap *map)
+{
+ hashmap_init(map, (hashmap_cmp_fn) path_and_oids_cmp, NULL, 0);
+}
+
+static void paths_and_oids_clear(struct hashmap *map)
+{
+ struct hashmap_iter iter;
+ struct path_and_oids_entry *entry;
+ hashmap_iter_init(map, &iter);
+
+ while ((entry = (struct path_and_oids_entry *)hashmap_iter_next(&iter))) {
+ oidset_clear(&entry->trees);
+ free(entry->path);
+ }
+
+ hashmap_free(map, 1);
+}
+
+static void paths_and_oids_insert(struct hashmap *map,
+ const char *path,
+ const struct object_id *oid)
+{
+ int hash = strhash(path);
+ struct path_and_oids_entry key;
+ struct path_and_oids_entry *entry;
+
+ hashmap_entry_init(&key, hash);
+
+ /* use a shallow copy for the lookup */
+ key.path = (char *)path;
+ oidset_init(&key.trees, 0);
+
+ if (!(entry = (struct path_and_oids_entry *)hashmap_get(map, &key, NULL))) {
+ entry = xcalloc(1, sizeof(struct path_and_oids_entry));
+ hashmap_entry_init(entry, hash);
+ entry->path = xstrdup(key.path);
+ oidset_init(&entry->trees, 16);
+ hashmap_put(map, entry);
+ }
+
+ oidset_insert(&entry->trees, oid);
+}
+
+static void add_children_by_path(struct repository *r,
+ struct tree *tree,
+ struct hashmap *map)
+{
+ struct tree_desc desc;
+ struct name_entry entry;
+
+ if (!tree)
+ return;
+
+ if (parse_tree_gently(tree, 1) < 0)
+ return;
+
+ init_tree_desc(&desc, tree->buffer, tree->size);
+ while (tree_entry(&desc, &entry)) {
+ switch (object_type(entry.mode)) {
+ case OBJ_TREE:
+ paths_and_oids_insert(map, entry.path, &entry.oid);
+
+ if (tree->object.flags & UNINTERESTING) {
+ struct tree *child = lookup_tree(r, &entry.oid);
+ if (child)
+ child->object.flags |= UNINTERESTING;
+ }
+ break;
+ case OBJ_BLOB:
+ if (tree->object.flags & UNINTERESTING) {
+ struct blob *child = lookup_blob(r, &entry.oid);
+ if (child)
+ child->object.flags |= UNINTERESTING;
+ }
+ break;
+ default:
+ /* Subproject commit - not in this repository */
+ break;
+ }
+ }
+
+ free_tree_buffer(tree);
+}
+
+void mark_trees_uninteresting_sparse(struct repository *r,
+ struct oidset *trees)
+{
+ unsigned has_interesting = 0, has_uninteresting = 0;
+ struct hashmap map;
+ struct hashmap_iter map_iter;
+ struct path_and_oids_entry *entry;
+ struct object_id *oid;
+ struct oidset_iter iter;
+
+ oidset_iter_init(trees, &iter);
+ while ((!has_interesting || !has_uninteresting) &&
+ (oid = oidset_iter_next(&iter))) {
+ struct tree *tree = lookup_tree(r, oid);
+
+ if (!tree)
+ continue;
+
+ if (tree->object.flags & UNINTERESTING)
+ has_uninteresting = 1;
+ else
+ has_interesting = 1;
+ }
+
+ /* Do not walk unless we have both types of trees. */
+ if (!has_uninteresting || !has_interesting)
+ return;
+
+ paths_and_oids_init(&map);
+
+ oidset_iter_init(trees, &iter);
+ while ((oid = oidset_iter_next(&iter))) {
+ struct tree *tree = lookup_tree(r, oid);
+ add_children_by_path(r, tree, &map);
+ }
+
+ hashmap_iter_init(&map, &map_iter);
+ while ((entry = hashmap_iter_next(&map_iter)))
+ mark_trees_uninteresting_sparse(r, &entry->trees);
+
+ paths_and_oids_clear(&map);
+}
+
struct commit_stack {
struct commit **items;
size_t nr, alloc;
diff --git a/revision.h b/revision.h
index 52e5a88..d32d62a 100644
--- a/revision.h
+++ b/revision.h
@@ -67,6 +67,7 @@ struct rev_cmdline_info {
#define REVISION_WALK_NO_WALK_SORTED 1
#define REVISION_WALK_NO_WALK_UNSORTED 2
+struct oidset;
struct topo_walk_info;
struct rev_info {
@@ -327,6 +328,7 @@ void put_revision_mark(const struct rev_info *revs,
void mark_parents_uninteresting(struct commit *commit);
void mark_tree_uninteresting(struct repository *r, struct tree *tree);
+void mark_trees_uninteresting_sparse(struct repository *r, struct oidset *trees);
void show_object_with_name(FILE *, struct object *, const char *);
diff --git a/t/README b/t/README
index 25864ec..6140b8c 100644
--- a/t/README
+++ b/t/README
@@ -358,6 +358,10 @@ GIT_TEST_INDEX_VERSION=<n> exercises the index read/write code path
for the index version specified. Can be set to any valid version
(currently 2, 3, or 4).
+GIT_TEST_PACK_SPARSE=<boolean> if enabled will default the pack-objects
+builtin to use the sparse object walk. This can still be overridden by
+the --no-sparse command-line argument.
+
GIT_TEST_PRELOAD_INDEX=<boolean> exercises the preload-index code path
by overriding the minimum number of cache entries required per thread.
diff --git a/t/t5322-pack-objects-sparse.sh b/t/t5322-pack-objects-sparse.sh
new file mode 100755
index 0000000..7124b55
--- /dev/null
+++ b/t/t5322-pack-objects-sparse.sh
@@ -0,0 +1,136 @@
+#!/bin/sh
+
+test_description='pack-objects object selection using sparse algorithm'
+. ./test-lib.sh
+
+test_expect_success 'setup repo' '
+ test_commit initial &&
+ for i in $(test_seq 1 3)
+ do
+ mkdir f$i &&
+ for j in $(test_seq 1 3)
+ do
+ mkdir f$i/f$j &&
+ echo $j >f$i/f$j/data.txt
+ done
+ done &&
+ git add . &&
+ git commit -m "Initialized trees" &&
+ for i in $(test_seq 1 3)
+ do
+ git checkout -b topic$i master &&
+ echo change-$i >f$i/f$i/data.txt &&
+ git commit -a -m "Changed f$i/f$i/data.txt"
+ done &&
+ cat >packinput.txt <<-EOF &&
+ topic1
+ ^topic2
+ ^topic3
+ EOF
+ git rev-parse \
+ topic1 \
+ topic1^{tree} \
+ topic1:f1 \
+ topic1:f1/f1 \
+ topic1:f1/f1/data.txt | sort >expect_objects.txt
+'
+
+test_expect_success 'non-sparse pack-objects' '
+ git pack-objects --stdout --revs --no-sparse <packinput.txt >nonsparse.pack &&
+ git index-pack -o nonsparse.idx nonsparse.pack &&
+ git show-index <nonsparse.idx | awk "{print \$2}" >nonsparse_objects.txt &&
+ test_cmp expect_objects.txt nonsparse_objects.txt
+'
+
+test_expect_success 'sparse pack-objects' '
+ git pack-objects --stdout --revs --sparse <packinput.txt >sparse.pack &&
+ git index-pack -o sparse.idx sparse.pack &&
+ git show-index <sparse.idx | awk "{print \$2}" >sparse_objects.txt &&
+ test_cmp expect_objects.txt sparse_objects.txt
+'
+
+test_expect_success 'duplicate a folder from f3 and commit to topic1' '
+ git checkout topic1 &&
+ echo change-3 >f3/f3/data.txt &&
+ git commit -a -m "Changed f3/f3/data.txt" &&
+ git rev-parse \
+ topic1~1 \
+ topic1~1^{tree} \
+ topic1^{tree} \
+ topic1 \
+ topic1:f1 \
+ topic1:f1/f1 \
+ topic1:f1/f1/data.txt | sort >required_objects.txt
+'
+
+test_expect_success 'non-sparse pack-objects' '
+ git pack-objects --stdout --revs --no-sparse <packinput.txt >nonsparse.pack &&
+ git index-pack -o nonsparse.idx nonsparse.pack &&
+ git show-index <nonsparse.idx | awk "{print \$2}" >nonsparse_objects.txt &&
+ comm -1 -2 required_objects.txt nonsparse_objects.txt >nonsparse_required_objects.txt &&
+ test_cmp required_objects.txt nonsparse_required_objects.txt
+'
+
+test_expect_success 'sparse pack-objects' '
+ git pack-objects --stdout --revs --sparse <packinput.txt >sparse.pack &&
+ git index-pack -o sparse.idx sparse.pack &&
+ git show-index <sparse.idx | awk "{print \$2}" >sparse_objects.txt &&
+ comm -1 -2 required_objects.txt sparse_objects.txt >sparse_required_objects.txt &&
+ test_cmp required_objects.txt sparse_required_objects.txt
+'
+
+# Demonstrate that the algorithms differ when we copy a tree wholesale
+# from one folder to another.
+
+test_expect_success 'duplicate a folder from f1 into f3' '
+ mkdir f3/f4 &&
+ cp -r f1/f1/* f3/f4 &&
+ git add f3/f4 &&
+ git commit -m "Copied f1/f1 to f3/f4" &&
+ cat >packinput.txt <<-EOF &&
+ topic1
+ ^topic1~1
+ EOF
+ git rev-parse \
+ topic1 \
+ topic1^{tree} \
+ topic1:f3 | sort >required_objects.txt
+'
+
+test_expect_success 'non-sparse pack-objects' '
+ git pack-objects --stdout --revs --no-sparse <packinput.txt >nonsparse.pack &&
+ git index-pack -o nonsparse.idx nonsparse.pack &&
+ git show-index <nonsparse.idx | awk "{print \$2}" >nonsparse_objects.txt &&
+ comm -1 -2 required_objects.txt nonsparse_objects.txt >nonsparse_required_objects.txt &&
+ test_cmp required_objects.txt nonsparse_required_objects.txt
+'
+
+test_expect_success 'sparse pack-objects' '
+ git rev-parse \
+ topic1 \
+ topic1^{tree} \
+ topic1:f3 \
+ topic1:f3/f4 \
+ topic1:f3/f4/data.txt | sort >expect_sparse_objects.txt &&
+ git pack-objects --stdout --revs --sparse <packinput.txt >sparse.pack &&
+ git index-pack -o sparse.idx sparse.pack &&
+ git show-index <sparse.idx | awk "{print \$2}" >sparse_objects.txt &&
+ test_cmp expect_sparse_objects.txt sparse_objects.txt
+'
+
+test_expect_success 'pack.useSparse enables algorithm' '
+ git config pack.useSparse true &&
+ git pack-objects --stdout --revs <packinput.txt >sparse.pack &&
+ git index-pack -o sparse.idx sparse.pack &&
+ git show-index <sparse.idx | awk "{print \$2}" >sparse_objects.txt &&
+ test_cmp expect_sparse_objects.txt sparse_objects.txt
+'
+
+test_expect_success 'pack.useSparse overridden' '
+ git pack-objects --stdout --revs --no-sparse <packinput.txt >sparse.pack &&
+ git index-pack -o sparse.idx sparse.pack &&
+ git show-index <sparse.idx | awk "{print \$2}" >sparse_objects.txt &&
+ test_cmp required_objects.txt sparse_objects.txt
+'
+
+test_done