summaryrefslogtreecommitdiff
path: root/t
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2020-05-01 20:39:53 (GMT)
committerJunio C Hamano <gitster@pobox.com>2020-05-01 20:39:53 (GMT)
commit9b6606f43d55bbf33b9924d16e02e60e1c09660a (patch)
tree0082094df8d1e99873fa1e72628fa4e02ec3a282 /t
parentcf054f817a30cf4a6531548f52cd7d5cbed6f4fc (diff)
parentcaf388caa101be90b7ec43d7f78ca4e935fc0150 (diff)
downloadgit-9b6606f43d55bbf33b9924d16e02e60e1c09660a.zip
git-9b6606f43d55bbf33b9924d16e02e60e1c09660a.tar.gz
git-9b6606f43d55bbf33b9924d16e02e60e1c09660a.tar.bz2
Merge branch 'gs/commit-graph-path-filter'
Introduce an extension to the commit-graph to make it efficient to check for the paths that were modified at each commit using Bloom filters. * gs/commit-graph-path-filter: bloom: ignore renames when computing changed paths commit-graph: add GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS test flag t4216: add end to end tests for git log with Bloom filters revision.c: add trace2 stats around Bloom filter usage revision.c: use Bloom filters to speed up path based revision walks commit-graph: add --changed-paths option to write subcommand commit-graph: reuse existing Bloom filters during write commit-graph: write Bloom filters to commit graph file commit-graph: examine commits by generation number commit-graph: examine changed-path objects in pack order commit-graph: compute Bloom filters for changed paths diff: halt tree-diff early after max_changes bloom.c: core Bloom filter implementation for changed paths. bloom.c: introduce core Bloom filter constructs bloom.c: add the murmur3 hash implementation commit-graph: define and use MAX_NUM_CHUNKS
Diffstat (limited to 't')
-rw-r--r--t/README5
-rw-r--r--t/helper/test-bloom.c81
-rw-r--r--t/helper/test-read-graph.c4
-rw-r--r--t/helper/test-tool.c1
-rw-r--r--t/helper/test-tool.h1
-rwxr-xr-xt/t0095-bloom.sh117
-rwxr-xr-xt/t4216-log-bloom.sh155
-rwxr-xr-xt/t5318-commit-graph.sh2
-rwxr-xr-xt/t5324-split-commit-graph.sh1
9 files changed, 367 insertions, 0 deletions
diff --git a/t/README b/t/README
index 13747f1..cf86383 100644
--- a/t/README
+++ b/t/README
@@ -379,6 +379,11 @@ GIT_TEST_COMMIT_GRAPH=<boolean>, when true, forces the commit-graph to
be written after every 'git commit' command, and overrides the
'core.commitGraph' setting to true.
+GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=<boolean>, when true, forces
+commit-graph write to compute and write changed path Bloom filters for
+every 'git commit-graph write', as if the `--changed-paths` option was
+passed in.
+
GIT_TEST_FSMONITOR=$PWD/t7519/fsmonitor-all exercises the fsmonitor
code path for utilizing a file system monitor to speed up detecting
new or changed files.
diff --git a/t/helper/test-bloom.c b/t/helper/test-bloom.c
new file mode 100644
index 0000000..ce41266
--- /dev/null
+++ b/t/helper/test-bloom.c
@@ -0,0 +1,81 @@
+#include "git-compat-util.h"
+#include "bloom.h"
+#include "test-tool.h"
+#include "commit.h"
+
+struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS;
+
+static void add_string_to_filter(const char *data, struct bloom_filter *filter) {
+ struct bloom_key key;
+ int i;
+
+ fill_bloom_key(data, strlen(data), &key, &settings);
+ printf("Hashes:");
+ for (i = 0; i < settings.num_hashes; i++){
+ printf("0x%08x|", key.hashes[i]);
+ }
+ printf("\n");
+ add_key_to_filter(&key, filter, &settings);
+}
+
+static void print_bloom_filter(struct bloom_filter *filter) {
+ int i;
+
+ if (!filter) {
+ printf("No filter.\n");
+ return;
+ }
+ printf("Filter_Length:%d\n", (int)filter->len);
+ printf("Filter_Data:");
+ for (i = 0; i < filter->len; i++){
+ printf("%02x|", filter->data[i]);
+ }
+ printf("\n");
+}
+
+static void get_bloom_filter_for_commit(const struct object_id *commit_oid)
+{
+ struct commit *c;
+ struct bloom_filter *filter;
+ setup_git_directory();
+ c = lookup_commit(the_repository, commit_oid);
+ filter = get_bloom_filter(the_repository, c, 1);
+ print_bloom_filter(filter);
+}
+
+int cmd__bloom(int argc, const char **argv)
+{
+ if (!strcmp(argv[1], "get_murmur3")) {
+ uint32_t hashed = murmur3_seeded(0, argv[2], strlen(argv[2]));
+ printf("Murmur3 Hash with seed=0:0x%08x\n", hashed);
+ }
+
+ if (!strcmp(argv[1], "generate_filter")) {
+ struct bloom_filter filter;
+ int i = 2;
+ filter.len = (settings.bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD;
+ filter.data = xcalloc(filter.len, sizeof(unsigned char));
+
+ if (!argv[2]){
+ die("at least one input string expected");
+ }
+
+ while (argv[i]) {
+ add_string_to_filter(argv[i], &filter);
+ i++;
+ }
+
+ print_bloom_filter(&filter);
+ }
+
+ if (!strcmp(argv[1], "get_filter_for_commit")) {
+ struct object_id oid;
+ const char *end;
+ if (parse_oid_hex(argv[2], &oid, &end))
+ die("cannot parse oid '%s'", argv[2]);
+ init_bloom_filters();
+ get_bloom_filter_for_commit(&oid);
+ }
+
+ return 0;
+} \ No newline at end of file
diff --git a/t/helper/test-read-graph.c b/t/helper/test-read-graph.c
index 4846040..6d0c962 100644
--- a/t/helper/test-read-graph.c
+++ b/t/helper/test-read-graph.c
@@ -34,6 +34,10 @@ int cmd__read_graph(int argc, const char **argv)
printf(" commit_metadata");
if (graph->chunk_extra_edges)
printf(" extra_edges");
+ if (graph->chunk_bloom_indexes)
+ printf(" bloom_indexes");
+ if (graph->chunk_bloom_data)
+ printf(" bloom_data");
printf("\n");
UNLEAK(graph);
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
index 2ece4d1..590b2ef 100644
--- a/t/helper/test-tool.c
+++ b/t/helper/test-tool.c
@@ -15,6 +15,7 @@ struct test_cmd {
static struct test_cmd cmds[] = {
{ "advise", cmd__advise_if_enabled },
+ { "bloom", cmd__bloom },
{ "chmtime", cmd__chmtime },
{ "config", cmd__config },
{ "ctype", cmd__ctype },
diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
index 1cbaec0..ddc8e99 100644
--- a/t/helper/test-tool.h
+++ b/t/helper/test-tool.h
@@ -5,6 +5,7 @@
#include "git-compat-util.h"
int cmd__advise_if_enabled(int argc, const char **argv);
+int cmd__bloom(int argc, const char **argv);
int cmd__chmtime(int argc, const char **argv);
int cmd__config(int argc, const char **argv);
int cmd__ctype(int argc, const char **argv);
diff --git a/t/t0095-bloom.sh b/t/t0095-bloom.sh
new file mode 100755
index 0000000..8f9eef1
--- /dev/null
+++ b/t/t0095-bloom.sh
@@ -0,0 +1,117 @@
+#!/bin/sh
+
+test_description='Testing the various Bloom filter computations in bloom.c'
+. ./test-lib.sh
+
+test_expect_success 'compute unseeded murmur3 hash for empty string' '
+ cat >expect <<-\EOF &&
+ Murmur3 Hash with seed=0:0x00000000
+ EOF
+ test-tool bloom get_murmur3 "" >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success 'compute unseeded murmur3 hash for test string 1' '
+ cat >expect <<-\EOF &&
+ Murmur3 Hash with seed=0:0x627b0c2c
+ EOF
+ test-tool bloom get_murmur3 "Hello world!" >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success 'compute unseeded murmur3 hash for test string 2' '
+ cat >expect <<-\EOF &&
+ Murmur3 Hash with seed=0:0x2e4ff723
+ EOF
+ test-tool bloom get_murmur3 "The quick brown fox jumps over the lazy dog" >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success 'compute bloom key for empty string' '
+ cat >expect <<-\EOF &&
+ Hashes:0x5615800c|0x5b966560|0x61174ab4|0x66983008|0x6c19155c|0x7199fab0|0x771ae004|
+ Filter_Length:2
+ Filter_Data:11|11|
+ EOF
+ test-tool bloom generate_filter "" >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success 'compute bloom key for whitespace' '
+ cat >expect <<-\EOF &&
+ Hashes:0xf178874c|0x5f3d6eb6|0xcd025620|0x3ac73d8a|0xa88c24f4|0x16510c5e|0x8415f3c8|
+ Filter_Length:2
+ Filter_Data:51|55|
+ EOF
+ test-tool bloom generate_filter " " >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success 'compute bloom key for test string 1' '
+ cat >expect <<-\EOF &&
+ Hashes:0xb270de9b|0x1bb6f26e|0x84fd0641|0xee431a14|0x57892de7|0xc0cf41ba|0x2a15558d|
+ Filter_Length:2
+ Filter_Data:92|6c|
+ EOF
+ test-tool bloom generate_filter "Hello world!" >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success 'compute bloom key for test string 2' '
+ cat >expect <<-\EOF &&
+ Hashes:0x20ab385b|0xf5237fe2|0xc99bc769|0x9e140ef0|0x728c5677|0x47049dfe|0x1b7ce585|
+ Filter_Length:2
+ Filter_Data:a5|4a|
+ EOF
+ test-tool bloom generate_filter "file.txt" >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success 'get bloom filters for commit with no changes' '
+ git init &&
+ git commit --allow-empty -m "c0" &&
+ cat >expect <<-\EOF &&
+ Filter_Length:0
+ Filter_Data:
+ EOF
+ test-tool bloom get_filter_for_commit "$(git rev-parse HEAD)" >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success 'get bloom filter for commit with 10 changes' '
+ rm actual &&
+ rm expect &&
+ mkdir smallDir &&
+ for i in $(test_seq 0 9)
+ do
+ echo $i >smallDir/$i
+ done &&
+ git add smallDir &&
+ git commit -m "commit with 10 changes" &&
+ cat >expect <<-\EOF &&
+ Filter_Length:25
+ Filter_Data:82|a0|65|47|0c|92|90|c0|a1|40|02|a0|e2|40|e0|04|0a|9a|66|cf|80|19|85|42|23|
+ EOF
+ test-tool bloom get_filter_for_commit "$(git rev-parse HEAD)" >actual &&
+ test_cmp expect actual
+'
+
+test_expect_success EXPENSIVE 'get bloom filter for commit with 513 changes' '
+ rm actual &&
+ rm expect &&
+ mkdir bigDir &&
+ for i in $(test_seq 0 512)
+ do
+ echo $i >bigDir/$i
+ done &&
+ git add bigDir &&
+ git commit -m "commit with 513 changes" &&
+ cat >expect <<-\EOF &&
+ Filter_Length:0
+ Filter_Data:
+ EOF
+ test-tool bloom get_filter_for_commit "$(git rev-parse HEAD)" >actual &&
+ test_cmp expect actual
+'
+
+test_done \ No newline at end of file
diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh
new file mode 100755
index 0000000..c7011f3
--- /dev/null
+++ b/t/t4216-log-bloom.sh
@@ -0,0 +1,155 @@
+#!/bin/sh
+
+test_description='git log for a path with Bloom filters'
+. ./test-lib.sh
+
+GIT_TEST_COMMIT_GRAPH=0
+GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0
+
+test_expect_success 'setup test - repo, commits, commit graph, log outputs' '
+ git init &&
+ mkdir A A/B A/B/C &&
+ test_commit c1 A/file1 &&
+ test_commit c2 A/B/file2 &&
+ test_commit c3 A/B/C/file3 &&
+ test_commit c4 A/file1 &&
+ test_commit c5 A/B/file2 &&
+ test_commit c6 A/B/C/file3 &&
+ test_commit c7 A/file1 &&
+ test_commit c8 A/B/file2 &&
+ test_commit c9 A/B/C/file3 &&
+ test_commit c10 file_to_be_deleted &&
+ git checkout -b side HEAD~4 &&
+ test_commit side-1 file4 &&
+ git checkout master &&
+ git merge side &&
+ test_commit c11 file5 &&
+ mv file5 file5_renamed &&
+ git add file5_renamed &&
+ git commit -m "rename" &&
+ rm file_to_be_deleted &&
+ git add . &&
+ git commit -m "file removed" &&
+ git commit-graph write --reachable --changed-paths
+'
+graph_read_expect () {
+ NUM_CHUNKS=5
+ cat >expect <<- EOF
+ header: 43475048 1 1 $NUM_CHUNKS 0
+ num_commits: $1
+ chunks: oid_fanout oid_lookup commit_metadata bloom_indexes bloom_data
+ EOF
+ test-tool read-graph >actual &&
+ test_cmp expect actual
+}
+
+test_expect_success 'commit-graph write wrote out the bloom chunks' '
+ graph_read_expect 15
+'
+
+# Turn off any inherited trace2 settings for this test.
+sane_unset GIT_TRACE2 GIT_TRACE2_PERF GIT_TRACE2_EVENT
+sane_unset GIT_TRACE2_PERF_BRIEF
+sane_unset GIT_TRACE2_CONFIG_PARAMS
+
+setup () {
+ rm "$TRASH_DIRECTORY/trace.perf"
+ git -c core.commitGraph=false log --pretty="format:%s" $1 >log_wo_bloom &&
+ GIT_TRACE2_PERF="$TRASH_DIRECTORY/trace.perf" git -c core.commitGraph=true log --pretty="format:%s" $1 >log_w_bloom
+}
+
+test_bloom_filters_used () {
+ log_args=$1
+ bloom_trace_prefix="statistics:{\"filter_not_present\":0,\"zero_length_filter\":0,\"maybe\""
+ setup "$log_args" &&
+ grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" &&
+ test_cmp log_wo_bloom log_w_bloom &&
+ test_path_is_file "$TRASH_DIRECTORY/trace.perf"
+}
+
+test_bloom_filters_not_used () {
+ log_args=$1
+ setup "$log_args" &&
+ !(grep -q "statistics:{\"filter_not_present\":" "$TRASH_DIRECTORY/trace.perf") &&
+ test_cmp log_wo_bloom log_w_bloom
+}
+
+for path in A A/B A/B/C A/file1 A/B/file2 A/B/C/file3 file4 file5 file5_renamed file_to_be_deleted
+do
+ for option in "" \
+ "--all" \
+ "--full-history" \
+ "--full-history --simplify-merges" \
+ "--simplify-merges" \
+ "--simplify-by-decoration" \
+ "--follow" \
+ "--first-parent" \
+ "--topo-order" \
+ "--date-order" \
+ "--author-date-order" \
+ "--ancestry-path side..master"
+ do
+ test_expect_success "git log option: $option for path: $path" '
+ test_bloom_filters_used "$option -- $path"
+ '
+ done
+done
+
+test_expect_success 'git log -- folder works with and without the trailing slash' '
+ test_bloom_filters_used "-- A" &&
+ test_bloom_filters_used "-- A/"
+'
+
+test_expect_success 'git log for path that does not exist. ' '
+ test_bloom_filters_used "-- path_does_not_exist"
+'
+
+test_expect_success 'git log with --walk-reflogs does not use Bloom filters' '
+ test_bloom_filters_not_used "--walk-reflogs -- A"
+'
+
+test_expect_success 'git log -- multiple path specs does not use Bloom filters' '
+ test_bloom_filters_not_used "-- file4 A/file1"
+'
+
+test_expect_success 'git log with wildcard that resolves to a single path uses Bloom filters' '
+ test_bloom_filters_used "-- *4" &&
+ test_bloom_filters_used "-- *renamed"
+'
+
+test_expect_success 'git log with wildcard that resolves to a multiple paths does not uses Bloom filters' '
+ test_bloom_filters_not_used "-- *" &&
+ test_bloom_filters_not_used "-- file*"
+'
+
+test_expect_success 'setup - add commit-graph to the chain without Bloom filters' '
+ test_commit c14 A/anotherFile2 &&
+ test_commit c15 A/B/anotherFile2 &&
+ test_commit c16 A/B/C/anotherFile2 &&
+ GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0 git commit-graph write --reachable --split &&
+ test_line_count = 2 .git/objects/info/commit-graphs/commit-graph-chain
+'
+
+test_expect_success 'Do not use Bloom filters if the latest graph does not have Bloom filters.' '
+ test_bloom_filters_not_used "-- A/B"
+'
+
+test_expect_success 'setup - add commit-graph to the chain with Bloom filters' '
+ test_commit c17 A/anotherFile3 &&
+ git commit-graph write --reachable --changed-paths --split &&
+ test_line_count = 3 .git/objects/info/commit-graphs/commit-graph-chain
+'
+
+test_bloom_filters_used_when_some_filters_are_missing () {
+ log_args=$1
+ bloom_trace_prefix="statistics:{\"filter_not_present\":3,\"zero_length_filter\":0,\"maybe\":8,\"definitely_not\":6"
+ setup "$log_args" &&
+ grep -q "$bloom_trace_prefix" "$TRASH_DIRECTORY/trace.perf" &&
+ test_cmp log_wo_bloom log_w_bloom
+}
+
+test_expect_success 'Use Bloom filters if they exist in the latest but not all commit graphs in the chain.' '
+ test_bloom_filters_used_when_some_filters_are_missing "-- A/B"
+'
+
+test_done \ No newline at end of file
diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh
index e874a12..39e2918 100755
--- a/t/t5318-commit-graph.sh
+++ b/t/t5318-commit-graph.sh
@@ -3,6 +3,8 @@
test_description='commit graph'
. ./test-lib.sh
+GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0
+
test_expect_success 'setup full repo' '
mkdir full &&
cd "$TRASH_DIRECTORY/full" &&
diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh
index 4146b82..594edb7 100755
--- a/t/t5324-split-commit-graph.sh
+++ b/t/t5324-split-commit-graph.sh
@@ -4,6 +4,7 @@ test_description='split commit graph'
. ./test-lib.sh
GIT_TEST_COMMIT_GRAPH=0
+GIT_TEST_COMMIT_GRAPH_CHANGED_PATHS=0
test_expect_success 'setup repo' '
git init &&