summaryrefslogtreecommitdiff
path: root/bloom.c
diff options
context:
space:
mode:
Diffstat (limited to 'bloom.c')
-rw-r--r--bloom.c131
1 files changed, 98 insertions, 33 deletions
diff --git a/bloom.c b/bloom.c
index 9b86aa3..e529f76 100644
--- a/bloom.c
+++ b/bloom.c
@@ -2,10 +2,10 @@
#include "bloom.h"
#include "diff.h"
#include "diffcore.h"
-#include "revision.h"
#include "hashmap.h"
#include "commit-graph.h"
#include "commit.h"
+#include "commit-slab.h"
define_commit_slab(bloom_filter_slab, struct bloom_filter);
@@ -28,20 +28,40 @@ static inline unsigned char get_bitmask(uint32_t pos)
return ((unsigned char)1) << (pos & (BITS_PER_WORD - 1));
}
+static int check_bloom_offset(struct commit_graph *g, uint32_t pos,
+ uint32_t offset)
+{
+ /*
+ * Note that we allow offsets equal to the data size, which would set
+ * our pointers at one past the end of the chunk memory. This is
+ * necessary because the on-disk index points to the end of the
+ * entries (so we can compute size by comparing adjacent ones). And
+ * naturally the final entry's end is one-past-the-end of the chunk.
+ */
+ if (offset <= g->chunk_bloom_data_size - BLOOMDATA_CHUNK_HEADER_SIZE)
+ return 0;
+
+ warning("ignoring out-of-range offset (%"PRIuMAX") for changed-path"
+ " filter at pos %"PRIuMAX" of %s (chunk size: %"PRIuMAX")",
+ (uintmax_t)offset, (uintmax_t)pos,
+ g->filename, (uintmax_t)g->chunk_bloom_data_size);
+ return -1;
+}
+
static int load_bloom_filter_from_graph(struct commit_graph *g,
struct bloom_filter *filter,
- struct commit *c)
+ uint32_t graph_pos)
{
uint32_t lex_pos, start_index, end_index;
- while (c->graph_pos < g->num_commits_in_base)
+ while (graph_pos < g->num_commits_in_base)
g = g->base_graph;
- /* The commit graph commit 'c' lives in doesn't carry bloom filters. */
+ /* The commit graph commit 'c' lives in doesn't carry Bloom filters. */
if (!g->chunk_bloom_indexes)
return 0;
- lex_pos = c->graph_pos - g->num_commits_in_base;
+ lex_pos = graph_pos - g->num_commits_in_base;
end_index = get_be32(g->chunk_bloom_indexes + 4 * lex_pos);
@@ -50,6 +70,20 @@ static int load_bloom_filter_from_graph(struct commit_graph *g,
else
start_index = 0;
+ if (check_bloom_offset(g, lex_pos, end_index) < 0 ||
+ check_bloom_offset(g, lex_pos - 1, start_index) < 0)
+ return 0;
+
+ if (end_index < start_index) {
+ warning("ignoring decreasing changed-path index offsets"
+ " (%"PRIuMAX" > %"PRIuMAX") for positions"
+ " %"PRIuMAX" and %"PRIuMAX" of %s",
+ (uintmax_t)start_index, (uintmax_t)end_index,
+ (uintmax_t)(lex_pos-1), (uintmax_t)lex_pos,
+ g->filename);
+ return 0;
+ }
+
filter->len = end_index - start_index;
filter->data = (unsigned char *)(g->chunk_bloom_data +
sizeof(unsigned char) * start_index +
@@ -138,6 +172,11 @@ void fill_bloom_key(const char *data,
key->hashes[i] = hash0 + i * hash1;
}
+void clear_bloom_key(struct bloom_key *key)
+{
+ FREE_AND_NULL(key->hashes);
+}
+
void add_key_to_filter(const struct bloom_key *key,
struct bloom_filter *filter,
const struct bloom_filter_settings *settings)
@@ -158,10 +197,10 @@ void init_bloom_filters(void)
init_bloom_filter_slab(&bloom_filters);
}
-static int pathmap_cmp(const void *hashmap_cmp_fn_data,
+static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED,
const struct hashmap_entry *eptr,
const struct hashmap_entry *entry_or_key,
- const void *keydata)
+ const void *keydata UNUSED)
{
const struct pathmap_hash_entry *e1, *e2;
@@ -171,39 +210,47 @@ static int pathmap_cmp(const void *hashmap_cmp_fn_data,
return strcmp(e1->path, e2->path);
}
-struct bloom_filter *get_bloom_filter(struct repository *r,
- struct commit *c,
- int compute_if_not_present)
+static void init_truncated_large_filter(struct bloom_filter *filter)
+{
+ filter->data = xmalloc(1);
+ filter->data[0] = 0xFF;
+ filter->len = 1;
+}
+
+struct bloom_filter *get_or_compute_bloom_filter(struct repository *r,
+ struct commit *c,
+ int compute_if_not_present,
+ const struct bloom_filter_settings *settings,
+ enum bloom_filter_computed *computed)
{
struct bloom_filter *filter;
- struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS;
int i;
struct diff_options diffopt;
- int max_changes = 512;
- if (bloom_filters.slab_size == 0)
+ if (computed)
+ *computed = BLOOM_NOT_COMPUTED;
+
+ if (!bloom_filters.slab_size)
return NULL;
filter = bloom_filter_slab_at(&bloom_filters, c);
if (!filter->data) {
- load_commit_graph_info(r, c);
- if (c->graph_pos != COMMIT_NOT_FROM_GRAPH &&
- r->objects->commit_graph->chunk_bloom_indexes) {
- if (load_bloom_filter_from_graph(r->objects->commit_graph, filter, c))
- return filter;
- else
- return NULL;
- }
+ uint32_t graph_pos;
+ if (repo_find_commit_pos_in_graph(r, c, &graph_pos))
+ load_bloom_filter_from_graph(r->objects->commit_graph,
+ filter, graph_pos);
}
- if (filter->data || !compute_if_not_present)
+ if (filter->data && filter->len)
return filter;
+ if (!compute_if_not_present)
+ return NULL;
repo_diff_setup(r, &diffopt);
diffopt.flags.recursive = 1;
diffopt.detect_rename = 0;
- diffopt.max_changes = max_changes;
+ diffopt.max_changes = settings->max_changed_paths;
diff_setup_done(&diffopt);
/* ensure commit is parsed so we have parent information */
@@ -215,11 +262,10 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
diff_tree_oid(NULL, &c->object.oid, "", &diffopt);
diffcore_std(&diffopt);
- if (diffopt.num_changes <= max_changes) {
- struct hashmap pathmap;
+ if (diff_queued_diff.nr <= settings->max_changed_paths) {
+ struct hashmap pathmap = HASHMAP_INIT(pathmap_cmp, NULL);
struct pathmap_hash_entry *e;
struct hashmap_iter iter;
- hashmap_init(&pathmap, pathmap_cmp, NULL, 0);
for (i = 0; i < diff_queued_diff.nr; i++) {
const char *path = diff_queued_diff.queue[i]->two->path;
@@ -252,23 +298,42 @@ struct bloom_filter *get_bloom_filter(struct repository *r,
diff_free_filepair(diff_queued_diff.queue[i]);
}
- filter->len = (hashmap_get_size(&pathmap) * settings.bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD;
- filter->data = xcalloc(filter->len, sizeof(unsigned char));
+ if (hashmap_get_size(&pathmap) > settings->max_changed_paths) {
+ init_truncated_large_filter(filter);
+ if (computed)
+ *computed |= BLOOM_TRUNC_LARGE;
+ goto cleanup;
+ }
+
+ filter->len = (hashmap_get_size(&pathmap) * settings->bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD;
+ if (!filter->len) {
+ if (computed)
+ *computed |= BLOOM_TRUNC_EMPTY;
+ filter->len = 1;
+ }
+ CALLOC_ARRAY(filter->data, filter->len);
hashmap_for_each_entry(&pathmap, &iter, e, entry) {
struct bloom_key key;
- fill_bloom_key(e->path, strlen(e->path), &key, &settings);
- add_key_to_filter(&key, filter, &settings);
+ fill_bloom_key(e->path, strlen(e->path), &key, settings);
+ add_key_to_filter(&key, filter, settings);
+ clear_bloom_key(&key);
}
- hashmap_free_entries(&pathmap, struct pathmap_hash_entry, entry);
+ cleanup:
+ hashmap_clear_and_free(&pathmap, struct pathmap_hash_entry, entry);
} else {
for (i = 0; i < diff_queued_diff.nr; i++)
diff_free_filepair(diff_queued_diff.queue[i]);
- filter->data = NULL;
- filter->len = 0;
+ init_truncated_large_filter(filter);
+
+ if (computed)
+ *computed |= BLOOM_TRUNC_LARGE;
}
+ if (computed)
+ *computed |= BLOOM_COMPUTED;
+
free(diff_queued_diff.queue);
DIFF_QUEUE_CLEAR(&diff_queued_diff);