summaryrefslogtreecommitdiff
path: root/bloom.c
diff options
context:
space:
mode:
authorGarima Singh <garima.singh@microsoft.com>2020-04-06 16:59:50 (GMT)
committerJunio C Hamano <gitster@pobox.com>2020-04-06 18:08:37 (GMT)
commit1217c03e7b87b15f2c78af5b1e1915a675050454 (patch)
treee19c660138425048b891a39028c6b6ce567c62d2 /bloom.c
parent76ffbca71a9c89d1e530f734e16a70b3924f4bea (diff)
downloadgit-1217c03e7b87b15f2c78af5b1e1915a675050454.zip
git-1217c03e7b87b15f2c78af5b1e1915a675050454.tar.gz
git-1217c03e7b87b15f2c78af5b1e1915a675050454.tar.bz2
commit-graph: reuse existing Bloom filters during write
Add logic to a) parse Bloom filter information from the commit graph file and, b) re-use existing Bloom filters. See Documentation/technical/commit-graph-format for the format in which the Bloom filter information is written to the commit graph file. To read Bloom filter for a given commit with lexicographic position 'i' we need to: 1. Read BIDX[i] which essentially gives us the starting index in BDAT for filter of commit i+1. It is essentially the index past the end of the filter of commit i. It is called end_index in the code. 2. For i>0, read BIDX[i-1] which will give us the starting index in BDAT for filter of commit i. It is called the start_index in the code. For the first commit, where i = 0, Bloom filter data starts at the beginning, just past the header in the BDAT chunk. Hence, start_index will be 0. 3. The length of the filter will be end_index - start_index, because BIDX[i] gives the cumulative 8-byte words including the ith commit's filter. We toggle whether Bloom filters should be recomputed based on the compute_if_not_present flag. Helped-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Garima Singh <garima.singh@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'bloom.c')
-rw-r--r--bloom.c49
1 files changed, 48 insertions, 1 deletions
diff --git a/bloom.c b/bloom.c
index a16eee9..0f714dd 100644
--- a/bloom.c
+++ b/bloom.c
@@ -4,6 +4,8 @@
#include "diffcore.h"
#include "revision.h"
#include "hashmap.h"
+#include "commit-graph.h"
+#include "commit.h"
define_commit_slab(bloom_filter_slab, struct bloom_filter);
@@ -26,6 +28,36 @@ static inline unsigned char get_bitmask(uint32_t pos)
return ((unsigned char)1) << (pos & (BITS_PER_WORD - 1));
}
+static int load_bloom_filter_from_graph(struct commit_graph *g,
+ struct bloom_filter *filter,
+ struct commit *c)
+{
+ uint32_t lex_pos, start_index, end_index;
+
+ while (c->graph_pos < g->num_commits_in_base)
+ g = g->base_graph;
+
+ /* The commit graph commit 'c' lives in doesn't carry bloom filters. */
+ if (!g->chunk_bloom_indexes)
+ return 0;
+
+ lex_pos = c->graph_pos - g->num_commits_in_base;
+
+ end_index = get_be32(g->chunk_bloom_indexes + 4 * lex_pos);
+
+ if (lex_pos > 0)
+ start_index = get_be32(g->chunk_bloom_indexes + 4 * (lex_pos - 1));
+ else
+ start_index = 0;
+
+ filter->len = end_index - start_index;
+ filter->data = (unsigned char *)(g->chunk_bloom_data +
+ sizeof(unsigned char) * start_index +
+ BLOOMDATA_CHUNK_HEADER_SIZE);
+
+ return 1;
+}
+
/*
* Calculate the murmur3 32-bit hash value for the given data
* using the given seed.
@@ -127,7 +159,8 @@ void init_bloom_filters(void)
}
struct bloom_filter *get_bloom_filter(struct repository *r,
- struct commit *c)
+ struct commit *c,
+ int compute_if_not_present)
{
struct bloom_filter *filter;
struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS;
@@ -140,6 +173,20 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
filter = bloom_filter_slab_at(&bloom_filters, c);
+ if (!filter->data) {
+ load_commit_graph_info(r, c);
+ if (c->graph_pos != COMMIT_NOT_FROM_GRAPH &&
+ r->objects->commit_graph->chunk_bloom_indexes) {
+ if (load_bloom_filter_from_graph(r->objects->commit_graph, filter, c))
+ return filter;
+ else
+ return NULL;
+ }
+ }
+
+ if (filter->data || !compute_if_not_present)
+ return filter;
+
repo_diff_setup(r, &diffopt);
diffopt.flags.recursive = 1;
diffopt.max_changes = max_changes;