summaryrefslogtreecommitdiff
path: root/revision.c
diff options
context:
space:
mode:
Diffstat (limited to 'revision.c')
-rw-r--r--revision.c274
1 files changed, 248 insertions, 26 deletions
diff --git a/revision.c b/revision.c
index 8136929..6de29cd 100644
--- a/revision.c
+++ b/revision.c
@@ -29,6 +29,8 @@
#include "prio-queue.h"
#include "hashmap.h"
#include "utf8.h"
+#include "bloom.h"
+#include "json-writer.h"
volatile show_early_output_fn_t show_early_output;
@@ -37,6 +39,8 @@ static const char *term_good;
implement_shared_commit_slab(revision_sources, char *);
+static inline int want_ancestry(const struct rev_info *revs);
+
void show_object_with_name(FILE *out, struct object *obj, const char *name)
{
const char *p;
@@ -624,11 +628,161 @@ static void file_change(struct diff_options *options,
options->flags.has_changes = 1;
}
+static int bloom_filter_atexit_registered;
+static unsigned int count_bloom_filter_maybe;
+static unsigned int count_bloom_filter_definitely_not;
+static unsigned int count_bloom_filter_false_positive;
+static unsigned int count_bloom_filter_not_present;
+
+static void trace2_bloom_filter_statistics_atexit(void)
+{
+ struct json_writer jw = JSON_WRITER_INIT;
+
+ jw_object_begin(&jw, 0);
+ jw_object_intmax(&jw, "filter_not_present", count_bloom_filter_not_present);
+ jw_object_intmax(&jw, "maybe", count_bloom_filter_maybe);
+ jw_object_intmax(&jw, "definitely_not", count_bloom_filter_definitely_not);
+ jw_object_intmax(&jw, "false_positive", count_bloom_filter_false_positive);
+ jw_end(&jw);
+
+ trace2_data_json("bloom", the_repository, "statistics", &jw);
+
+ jw_release(&jw);
+}
+
+static int forbid_bloom_filters(struct pathspec *spec)
+{
+ if (spec->has_wildcard)
+ return 1;
+ if (spec->nr > 1)
+ return 1;
+ if (spec->magic & ~PATHSPEC_LITERAL)
+ return 1;
+ if (spec->nr && (spec->items[0].magic & ~PATHSPEC_LITERAL))
+ return 1;
+
+ return 0;
+}
+
+static void prepare_to_use_bloom_filter(struct rev_info *revs)
+{
+ struct pathspec_item *pi;
+ char *path_alloc = NULL;
+ const char *path, *p;
+ int last_index;
+ size_t len;
+ int path_component_nr = 1;
+
+ if (!revs->commits)
+ return;
+
+ if (forbid_bloom_filters(&revs->prune_data))
+ return;
+
+ repo_parse_commit(revs->repo, revs->commits->item);
+
+ if (!revs->repo->objects->commit_graph)
+ return;
+
+ revs->bloom_filter_settings = revs->repo->objects->commit_graph->bloom_filter_settings;
+ if (!revs->bloom_filter_settings)
+ return;
+
+ if (!revs->pruning.pathspec.nr)
+ return;
+
+ pi = &revs->pruning.pathspec.items[0];
+ last_index = pi->len - 1;
+
+ /* remove single trailing slash from path, if needed */
+ if (pi->match[last_index] == '/') {
+ path_alloc = xstrdup(pi->match);
+ path_alloc[last_index] = '\0';
+ path = path_alloc;
+ } else
+ path = pi->match;
+
+ len = strlen(path);
+ if (!len) {
+ revs->bloom_filter_settings = NULL;
+ return;
+ }
+
+ p = path;
+ while (*p) {
+ /*
+ * At this point, the path is normalized to use Unix-style
+ * path separators. This is required due to how the
+ * changed-path Bloom filters store the paths.
+ */
+ if (*p == '/')
+ path_component_nr++;
+ p++;
+ }
+
+ revs->bloom_keys_nr = path_component_nr;
+ ALLOC_ARRAY(revs->bloom_keys, revs->bloom_keys_nr);
+
+ fill_bloom_key(path, len, &revs->bloom_keys[0],
+ revs->bloom_filter_settings);
+ path_component_nr = 1;
+
+ p = path + len - 1;
+ while (p > path) {
+ if (*p == '/')
+ fill_bloom_key(path, p - path,
+ &revs->bloom_keys[path_component_nr++],
+ revs->bloom_filter_settings);
+ p--;
+ }
+
+ if (trace2_is_enabled() && !bloom_filter_atexit_registered) {
+ atexit(trace2_bloom_filter_statistics_atexit);
+ bloom_filter_atexit_registered = 1;
+ }
+
+ free(path_alloc);
+}
+
+static int check_maybe_different_in_bloom_filter(struct rev_info *revs,
+ struct commit *commit)
+{
+ struct bloom_filter *filter;
+ int result = 1, j;
+
+ if (!revs->repo->objects->commit_graph)
+ return -1;
+
+ if (commit_graph_generation(commit) == GENERATION_NUMBER_INFINITY)
+ return -1;
+
+ filter = get_bloom_filter(revs->repo, commit, 0);
+
+ if (!filter) {
+ count_bloom_filter_not_present++;
+ return -1;
+ }
+
+ for (j = 0; result && j < revs->bloom_keys_nr; j++) {
+ result = bloom_filter_contains(filter,
+ &revs->bloom_keys[j],
+ revs->bloom_filter_settings);
+ }
+
+ if (result)
+ count_bloom_filter_maybe++;
+ else
+ count_bloom_filter_definitely_not++;
+
+ return result;
+}
+
static int rev_compare_tree(struct rev_info *revs,
- struct commit *parent, struct commit *commit)
+ struct commit *parent, struct commit *commit, int nth_parent)
{
struct tree *t1 = get_commit_tree(parent);
struct tree *t2 = get_commit_tree(commit);
+ int bloom_ret = 1;
if (!t1)
return REV_TREE_NEW;
@@ -653,17 +807,26 @@ static int rev_compare_tree(struct rev_info *revs,
return REV_TREE_SAME;
}
+ if (revs->bloom_keys_nr && !nth_parent) {
+ bloom_ret = check_maybe_different_in_bloom_filter(revs, commit);
+
+ if (bloom_ret == 0)
+ return REV_TREE_SAME;
+ }
+
tree_difference = REV_TREE_SAME;
revs->pruning.flags.has_changes = 0;
- if (diff_tree_oid(&t1->object.oid, &t2->object.oid, "",
- &revs->pruning) < 0)
- return REV_TREE_DIFFERENT;
+ diff_tree_oid(&t1->object.oid, &t2->object.oid, "", &revs->pruning);
+
+ if (!nth_parent)
+ if (bloom_ret == 1 && tree_difference == REV_TREE_SAME)
+ count_bloom_filter_false_positive++;
+
return tree_difference;
}
static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
{
- int retval;
struct tree *t1 = get_commit_tree(commit);
if (!t1)
@@ -671,9 +834,9 @@ static int rev_same_tree_as_empty(struct rev_info *revs, struct commit *commit)
tree_difference = REV_TREE_SAME;
revs->pruning.flags.has_changes = 0;
- retval = diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning);
+ diff_tree_oid(NULL, &t1->object.oid, "", &revs->pruning);
- return retval >= 0 && (tree_difference == REV_TREE_SAME);
+ return tree_difference == REV_TREE_SAME;
}
struct treesame_state {
@@ -855,7 +1018,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
die("cannot simplify commit %s (because of %s)",
oid_to_hex(&commit->object.oid),
oid_to_hex(&p->object.oid));
- switch (rev_compare_tree(revs, p, commit)) {
+ switch (rev_compare_tree(revs, p, commit, nth_parent)) {
case REV_TREE_SAME:
if (!revs->simplify_history || !relevant_commit(p)) {
/* Even if a merge with an uninteresting
@@ -870,7 +1033,19 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
}
parent->next = NULL;
commit->parents = parent;
- commit->object.flags |= TREESAME;
+
+ /*
+ * A merge commit is a "diversion" if it is not
+ * TREESAME to its first parent but is TREESAME
+ * to a later parent. In the simplified history,
+ * we "divert" the history walk to the later
+ * parent. These commits are shown when "show_pulls"
+ * is enabled, so do not mark the object as
+ * TREESAME here.
+ */
+ if (!revs->show_pulls || !nth_parent)
+ commit->object.flags |= TREESAME;
+
return;
case REV_TREE_NEW:
@@ -897,6 +1072,10 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
relevant_change = 1;
else
irrelevant_change = 1;
+
+ if (!nth_parent)
+ commit->object.flags |= PULL_MERGE;
+
continue;
}
die("bad tree compare for commit %s", oid_to_hex(&commit->object.oid));
@@ -1253,7 +1432,8 @@ static int limit_list(struct rev_info *revs)
continue;
break;
}
- if (revs->min_age != -1 && (commit->date > revs->min_age))
+ if (revs->min_age != -1 && (commit->date > revs->min_age) &&
+ !revs->line_level_traverse)
continue;
date = commit->date;
p = &commit_list_insert(commit, p)->next;
@@ -1452,7 +1632,7 @@ static void add_other_reflogs_to_pending(struct all_refs_cb *cb)
{
struct worktree **worktrees, **p;
- worktrees = get_worktrees(0);
+ worktrees = get_worktrees();
for (p = worktrees; *p; p++) {
struct worktree *wt = *p;
@@ -1540,7 +1720,7 @@ void add_index_objects_to_pending(struct rev_info *revs, unsigned int flags)
if (revs->single_worktree)
return;
- worktrees = get_worktrees(0);
+ worktrees = get_worktrees();
for (p = worktrees; *p; p++) {
struct worktree *wt = *p;
struct index_state istate = { NULL };
@@ -2241,6 +2421,10 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg
revs->topo_order = 1;
revs->rewrite_parents = 1;
revs->graph = graph_init(revs);
+ } else if (!strcmp(arg, "--encode-email-headers")) {
+ revs->encode_email_headers = 1;
+ } else if (!strcmp(arg, "--no-encode-email-headers")) {
+ revs->encode_email_headers = 0;
} else if (!strcmp(arg, "--root")) {
revs->show_root_diff = 1;
} else if (!strcmp(arg, "--no-commit-id")) {
@@ -2265,6 +2449,8 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg
} else if (!strcmp(arg, "--full-diff")) {
revs->diff = 1;
revs->full_diff = 1;
+ } else if (!strcmp(arg, "--show-pulls")) {
+ revs->show_pulls = 1;
} else if (!strcmp(arg, "--full-history")) {
revs->simplify_history = 0;
} else if (!strcmp(arg, "--relative-date")) {
@@ -2652,6 +2838,12 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
if (revs->diffopt.objfind)
revs->simplify_history = 0;
+ if (revs->line_level_traverse) {
+ if (want_ancestry(revs))
+ revs->limited = 1;
+ revs->topo_order = 1;
+ }
+
if (revs->topo_order && !generation_numbers_enabled(the_repository))
revs->limited = 1;
@@ -2671,11 +2863,6 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
revs->diffopt.abbrev = revs->abbrev;
- if (revs->line_level_traverse) {
- revs->limited = 1;
- revs->topo_order = 1;
- }
-
diff_setup_done(&revs->diffopt);
grep_commit_pattern_type(GREP_PATTERN_TYPE_UNSPECIFIED,
@@ -3019,7 +3206,8 @@ static struct commit_list **simplify_one(struct rev_info *revs, struct commit *c
if (!cnt ||
(commit->object.flags & UNINTERESTING) ||
!(commit->object.flags & TREESAME) ||
- (parent = one_relevant_parent(revs, commit->parents)) == NULL)
+ (parent = one_relevant_parent(revs, commit->parents)) == NULL ||
+ (revs->show_pulls && (commit->object.flags & PULL_MERGE)))
st->simplified = commit;
else {
pst = locate_simplify_state(revs, parent);
@@ -3155,7 +3343,7 @@ static void explore_to_depth(struct rev_info *revs,
struct topo_walk_info *info = revs->topo_walk_info;
struct commit *c;
while ((c = prio_queue_peek(&info->explore_queue)) &&
- c->generation >= gen_cutoff)
+ commit_graph_generation(c) >= gen_cutoff)
explore_walk_step(revs);
}
@@ -3171,7 +3359,7 @@ static void indegree_walk_step(struct rev_info *revs)
if (parse_commit_gently(c, 1) < 0)
return;
- explore_to_depth(revs, c->generation);
+ explore_to_depth(revs, commit_graph_generation(c));
for (p = c->parents; p; p = p->next) {
struct commit *parent = p->item;
@@ -3195,7 +3383,7 @@ static void compute_indegrees_to_depth(struct rev_info *revs,
struct topo_walk_info *info = revs->topo_walk_info;
struct commit *c;
while ((c = prio_queue_peek(&info->indegree_queue)) &&
- c->generation >= gen_cutoff)
+ commit_graph_generation(c) >= gen_cutoff)
indegree_walk_step(revs);
}
@@ -3248,6 +3436,7 @@ static void init_topo_walk(struct rev_info *revs)
info->min_generation = GENERATION_NUMBER_INFINITY;
for (list = revs->commits; list; list = list->next) {
struct commit *c = list->item;
+ uint32_t generation;
if (parse_commit_gently(c, 1))
continue;
@@ -3255,8 +3444,9 @@ static void init_topo_walk(struct rev_info *revs)
test_flag_and_insert(&info->explore_queue, c, TOPO_WALK_EXPLORED);
test_flag_and_insert(&info->indegree_queue, c, TOPO_WALK_INDEGREE);
- if (c->generation < info->min_generation)
- info->min_generation = c->generation;
+ generation = commit_graph_generation(c);
+ if (generation < info->min_generation)
+ info->min_generation = generation;
*(indegree_slab_at(&info->indegree, c)) = 1;
@@ -3307,6 +3497,7 @@ static void expand_topo_walk(struct rev_info *revs, struct commit *commit)
for (p = commit->parents; p; p = p->next) {
struct commit *parent = p->item;
int *pi;
+ uint32_t generation;
if (parent->object.flags & UNINTERESTING)
continue;
@@ -3314,8 +3505,9 @@ static void expand_topo_walk(struct rev_info *revs, struct commit *commit)
if (parse_commit_gently(parent, 1) < 0)
continue;
- if (parent->generation < info->min_generation) {
- info->min_generation = parent->generation;
+ generation = commit_graph_generation(parent);
+ if (generation < info->min_generation) {
+ info->min_generation = generation;
compute_indegrees_to_depth(revs, info->min_generation);
}
@@ -3362,6 +3554,8 @@ int prepare_revision_walk(struct rev_info *revs)
FOR_EACH_OBJECT_PROMISOR_ONLY);
}
+ if (!revs->reflog_info)
+ prepare_to_use_bloom_filter(revs);
if (revs->no_walk != REVISION_WALK_NO_WALK_UNSORTED)
commit_list_sort_by_date(&revs->commits);
if (revs->no_walk)
@@ -3373,12 +3567,20 @@ int prepare_revision_walk(struct rev_info *revs)
sort_in_topological_order(&revs->commits, revs->sort_order);
} else if (revs->topo_order)
init_topo_walk(revs);
- if (revs->line_level_traverse)
+ if (revs->line_level_traverse && want_ancestry(revs))
+ /*
+ * At the moment we can only do line-level log with parent
+ * rewriting by performing this expensive pre-filtering step.
+ * If parent rewriting is not requested, then we rather
+ * perform the line-level log filtering during the regular
+ * history traversal.
+ */
line_log_filter(revs);
if (revs->simplify_merges)
simplify_merges(revs);
if (revs->children.name)
set_children(revs);
+
return 0;
}
@@ -3583,6 +3785,22 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi
return commit_ignore;
if (commit->object.flags & UNINTERESTING)
return commit_ignore;
+ if (revs->line_level_traverse && !want_ancestry(revs)) {
+ /*
+ * In case of line-level log with parent rewriting
+ * prepare_revision_walk() already took care of all line-level
+ * log filtering, and there is nothing left to do here.
+ *
+ * If parent rewriting was not requested, then this is the
+ * place to perform the line-level log filtering. Notably,
+ * this check, though expensive, must come before the other,
+ * cheaper filtering conditions, because the tracked line
+ * ranges must be adjusted even when the commit will end up
+ * being ignored based on other conditions.
+ */
+ if (!line_log_process_ranges_arbitrary_commit(revs, commit))
+ return commit_ignore;
+ }
if (revs->min_age != -1 &&
comparison_date(revs, commit) > revs->min_age)
return commit_ignore;
@@ -3602,6 +3820,10 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi
/* drop merges unless we want parenthood */
if (!want_ancestry(revs))
return commit_ignore;
+
+ if (revs->show_pulls && (commit->object.flags & PULL_MERGE))
+ return commit_show;
+
/*
* If we want ancestry, then need to keep any merges
* between relevant commits to tie together topology.