diff options
Diffstat (limited to 'bloom.c')
-rw-r--r-- | bloom.c | 131 |
1 files changed, 98 insertions, 33 deletions
@@ -2,10 +2,10 @@ #include "bloom.h" #include "diff.h" #include "diffcore.h" -#include "revision.h" #include "hashmap.h" #include "commit-graph.h" #include "commit.h" +#include "commit-slab.h" define_commit_slab(bloom_filter_slab, struct bloom_filter); @@ -28,20 +28,40 @@ static inline unsigned char get_bitmask(uint32_t pos) return ((unsigned char)1) << (pos & (BITS_PER_WORD - 1)); } +static int check_bloom_offset(struct commit_graph *g, uint32_t pos, + uint32_t offset) +{ + /* + * Note that we allow offsets equal to the data size, which would set + * our pointers at one past the end of the chunk memory. This is + * necessary because the on-disk index points to the end of the + * entries (so we can compute size by comparing adjacent ones). And + * naturally the final entry's end is one-past-the-end of the chunk. + */ + if (offset <= g->chunk_bloom_data_size - BLOOMDATA_CHUNK_HEADER_SIZE) + return 0; + + warning("ignoring out-of-range offset (%"PRIuMAX") for changed-path" + " filter at pos %"PRIuMAX" of %s (chunk size: %"PRIuMAX")", + (uintmax_t)offset, (uintmax_t)pos, + g->filename, (uintmax_t)g->chunk_bloom_data_size); + return -1; +} + static int load_bloom_filter_from_graph(struct commit_graph *g, struct bloom_filter *filter, - struct commit *c) + uint32_t graph_pos) { uint32_t lex_pos, start_index, end_index; - while (c->graph_pos < g->num_commits_in_base) + while (graph_pos < g->num_commits_in_base) g = g->base_graph; - /* The commit graph commit 'c' lives in doesn't carry bloom filters. */ + /* The commit graph commit 'c' lives in doesn't carry Bloom filters. */ if (!g->chunk_bloom_indexes) return 0; - lex_pos = c->graph_pos - g->num_commits_in_base; + lex_pos = graph_pos - g->num_commits_in_base; end_index = get_be32(g->chunk_bloom_indexes + 4 * lex_pos); @@ -50,6 +70,20 @@ static int load_bloom_filter_from_graph(struct commit_graph *g, else start_index = 0; + if (check_bloom_offset(g, lex_pos, end_index) < 0 || + check_bloom_offset(g, lex_pos - 1, start_index) < 0) + return 0; + + if (end_index < start_index) { + warning("ignoring decreasing changed-path index offsets" + " (%"PRIuMAX" > %"PRIuMAX") for positions" + " %"PRIuMAX" and %"PRIuMAX" of %s", + (uintmax_t)start_index, (uintmax_t)end_index, + (uintmax_t)(lex_pos-1), (uintmax_t)lex_pos, + g->filename); + return 0; + } + filter->len = end_index - start_index; filter->data = (unsigned char *)(g->chunk_bloom_data + sizeof(unsigned char) * start_index + @@ -138,6 +172,11 @@ void fill_bloom_key(const char *data, key->hashes[i] = hash0 + i * hash1; } +void clear_bloom_key(struct bloom_key *key) +{ + FREE_AND_NULL(key->hashes); +} + void add_key_to_filter(const struct bloom_key *key, struct bloom_filter *filter, const struct bloom_filter_settings *settings) @@ -158,10 +197,10 @@ void init_bloom_filters(void) init_bloom_filter_slab(&bloom_filters); } -static int pathmap_cmp(const void *hashmap_cmp_fn_data, +static int pathmap_cmp(const void *hashmap_cmp_fn_data UNUSED, const struct hashmap_entry *eptr, const struct hashmap_entry *entry_or_key, - const void *keydata) + const void *keydata UNUSED) { const struct pathmap_hash_entry *e1, *e2; @@ -171,39 +210,47 @@ static int pathmap_cmp(const void *hashmap_cmp_fn_data, return strcmp(e1->path, e2->path); } -struct bloom_filter *get_bloom_filter(struct repository *r, - struct commit *c, - int compute_if_not_present) +static void init_truncated_large_filter(struct bloom_filter *filter) +{ + filter->data = xmalloc(1); + filter->data[0] = 0xFF; + filter->len = 1; +} + +struct bloom_filter *get_or_compute_bloom_filter(struct repository *r, + struct commit *c, + int compute_if_not_present, + const struct bloom_filter_settings *settings, + enum bloom_filter_computed *computed) { struct bloom_filter *filter; - struct bloom_filter_settings settings = DEFAULT_BLOOM_FILTER_SETTINGS; int i; struct diff_options diffopt; - int max_changes = 512; - if (bloom_filters.slab_size == 0) + if (computed) + *computed = BLOOM_NOT_COMPUTED; + + if (!bloom_filters.slab_size) return NULL; filter = bloom_filter_slab_at(&bloom_filters, c); if (!filter->data) { - load_commit_graph_info(r, c); - if (c->graph_pos != COMMIT_NOT_FROM_GRAPH && - r->objects->commit_graph->chunk_bloom_indexes) { - if (load_bloom_filter_from_graph(r->objects->commit_graph, filter, c)) - return filter; - else - return NULL; - } + uint32_t graph_pos; + if (repo_find_commit_pos_in_graph(r, c, &graph_pos)) + load_bloom_filter_from_graph(r->objects->commit_graph, + filter, graph_pos); } - if (filter->data || !compute_if_not_present) + if (filter->data && filter->len) return filter; + if (!compute_if_not_present) + return NULL; repo_diff_setup(r, &diffopt); diffopt.flags.recursive = 1; diffopt.detect_rename = 0; - diffopt.max_changes = max_changes; + diffopt.max_changes = settings->max_changed_paths; diff_setup_done(&diffopt); /* ensure commit is parsed so we have parent information */ @@ -215,11 +262,10 @@ struct bloom_filter *get_bloom_filter(struct repository *r, diff_tree_oid(NULL, &c->object.oid, "", &diffopt); diffcore_std(&diffopt); - if (diffopt.num_changes <= max_changes) { - struct hashmap pathmap; + if (diff_queued_diff.nr <= settings->max_changed_paths) { + struct hashmap pathmap = HASHMAP_INIT(pathmap_cmp, NULL); struct pathmap_hash_entry *e; struct hashmap_iter iter; - hashmap_init(&pathmap, pathmap_cmp, NULL, 0); for (i = 0; i < diff_queued_diff.nr; i++) { const char *path = diff_queued_diff.queue[i]->two->path; @@ -252,23 +298,42 @@ struct bloom_filter *get_bloom_filter(struct repository *r, diff_free_filepair(diff_queued_diff.queue[i]); } - filter->len = (hashmap_get_size(&pathmap) * settings.bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD; - filter->data = xcalloc(filter->len, sizeof(unsigned char)); + if (hashmap_get_size(&pathmap) > settings->max_changed_paths) { + init_truncated_large_filter(filter); + if (computed) + *computed |= BLOOM_TRUNC_LARGE; + goto cleanup; + } + + filter->len = (hashmap_get_size(&pathmap) * settings->bits_per_entry + BITS_PER_WORD - 1) / BITS_PER_WORD; + if (!filter->len) { + if (computed) + *computed |= BLOOM_TRUNC_EMPTY; + filter->len = 1; + } + CALLOC_ARRAY(filter->data, filter->len); hashmap_for_each_entry(&pathmap, &iter, e, entry) { struct bloom_key key; - fill_bloom_key(e->path, strlen(e->path), &key, &settings); - add_key_to_filter(&key, filter, &settings); + fill_bloom_key(e->path, strlen(e->path), &key, settings); + add_key_to_filter(&key, filter, settings); + clear_bloom_key(&key); } - hashmap_free_entries(&pathmap, struct pathmap_hash_entry, entry); + cleanup: + hashmap_clear_and_free(&pathmap, struct pathmap_hash_entry, entry); } else { for (i = 0; i < diff_queued_diff.nr; i++) diff_free_filepair(diff_queued_diff.queue[i]); - filter->data = NULL; - filter->len = 0; + init_truncated_large_filter(filter); + + if (computed) + *computed |= BLOOM_TRUNC_LARGE; } + if (computed) + *computed |= BLOOM_COMPUTED; + free(diff_queued_diff.queue); DIFF_QUEUE_CLEAR(&diff_queued_diff); |