From 8fb572af5f4e3bef2415ac2dba8d76d37235b489 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 25 Apr 2018 14:37:54 +0000 Subject: ref-filter: fix outdated comment on in_commit_list The in_commit_list() method does not check the parents of the candidate for containment in the list. Fix the comment that incorrectly states that it does. Reported-by: Jakub Narebski Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/ref-filter.c b/ref-filter.c index cffd8bf..aff24d9 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -1582,7 +1582,7 @@ static int in_commit_list(const struct commit_list *want, struct commit *c) } /* - * Test whether the candidate or one of its parents is contained in the list. + * Test whether the candidate is contained in the list. * Do not recurse to find out, though, but return -1 if inconclusive. */ static enum contains_result contains_test(struct commit *candidate, -- cgit v0.10.2-6-g49f6 From 83073cc994cc3cd364f3f213478b9162476e8e44 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 25 Apr 2018 14:37:55 +0000 Subject: commit: add generation number to struct commit The generation number of a commit is defined recursively as follows: * If a commit A has no parents, then the generation number of A is one. * If a commit A has parents, then the generation number of A is one more than the maximum generation number among the parents of A. Add a uint32_t generation field to struct commit so we can pass this information to revision walks. We use three special values to signal the generation number is invalid: GENERATION_NUMBER_INFINITY 0xFFFFFFFF GENERATION_NUMBER_MAX 0x3FFFFFFF GENERATION_NUMBER_ZERO 0 The first (_INFINITY) means the generation number has not been loaded or computed. The second (_MAX) means the generation number is too large to store in the commit-graph file. The third (_ZERO) means the generation number was loaded from a commit graph file that was written by a version of git that did not support generation numbers. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/alloc.c b/alloc.c index cf4f8b6..e8ab14f 100644 --- a/alloc.c +++ b/alloc.c @@ -94,6 +94,7 @@ void *alloc_commit_node(void) c->object.type = OBJ_COMMIT; c->index = alloc_commit_index(); c->graph_pos = COMMIT_NOT_FROM_GRAPH; + c->generation = GENERATION_NUMBER_INFINITY; return c; } diff --git a/commit-graph.c b/commit-graph.c index 70fa1b2..9ad21c3 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -262,6 +262,8 @@ static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uin date_low = get_be32(commit_data + g->hash_len + 12); item->date = (timestamp_t)((date_high << 32) | date_low); + item->generation = get_be32(commit_data + g->hash_len + 8) >> 2; + pptr = &item->parents; edge_value = get_be32(commit_data + g->hash_len); diff --git a/commit.h b/commit.h index 23a3f36..aac3b8c 100644 --- a/commit.h +++ b/commit.h @@ -10,6 +10,9 @@ #include "pretty.h" #define COMMIT_NOT_FROM_GRAPH 0xFFFFFFFF +#define GENERATION_NUMBER_INFINITY 0xFFFFFFFF +#define GENERATION_NUMBER_MAX 0x3FFFFFFF +#define GENERATION_NUMBER_ZERO 0 struct commit_list { struct commit *item; @@ -30,6 +33,7 @@ struct commit { */ struct tree *maybe_tree; uint32_t graph_pos; + uint32_t generation; }; extern int save_commit_buffer; -- cgit v0.10.2-6-g49f6 From 3258c66332abaf6e3e8fd81cab07ae804760cd08 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 1 May 2018 12:47:09 +0000 Subject: commit-graph: compute generation numbers While preparing commits to be written into a commit-graph file, compute the generation numbers using a depth-first strategy. The only commits that are walked in this depth-first search are those without a precomputed generation number. Thus, computation time will be relative to the number of new commits to the commit-graph file. If a computed generation number would exceed GENERATION_NUMBER_MAX, then use GENERATION_NUMBER_MAX instead. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/commit-graph.c b/commit-graph.c index 9ad21c3..36d765e 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -439,6 +439,8 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, else packedDate[0] = 0; + packedDate[0] |= htonl((*list)->generation << 2); + packedDate[1] = htonl((*list)->date); hashwrite(f, packedDate, 8); @@ -571,6 +573,45 @@ static void close_reachable(struct packed_oid_list *oids) } } +static void compute_generation_numbers(struct packed_commit_list* commits) +{ + int i; + struct commit_list *list = NULL; + + for (i = 0; i < commits->nr; i++) { + if (commits->list[i]->generation != GENERATION_NUMBER_INFINITY && + commits->list[i]->generation != GENERATION_NUMBER_ZERO) + continue; + + commit_list_insert(commits->list[i], &list); + while (list) { + struct commit *current = list->item; + struct commit_list *parent; + int all_parents_computed = 1; + uint32_t max_generation = 0; + + for (parent = current->parents; parent; parent = parent->next) { + if (parent->item->generation == GENERATION_NUMBER_INFINITY || + parent->item->generation == GENERATION_NUMBER_ZERO) { + all_parents_computed = 0; + commit_list_insert(parent->item, &list); + break; + } else if (parent->item->generation > max_generation) { + max_generation = parent->item->generation; + } + } + + if (all_parents_computed) { + current->generation = max_generation + 1; + pop_commit(&list); + + if (current->generation > GENERATION_NUMBER_MAX) + current->generation = GENERATION_NUMBER_MAX; + } + } + } +} + void write_commit_graph(const char *obj_dir, const char **pack_indexes, int nr_packs, @@ -694,6 +735,8 @@ void write_commit_graph(const char *obj_dir, if (commits.nr >= GRAPH_PARENT_MISSING) die(_("too many commits to write graph")); + compute_generation_numbers(&commits); + graph_name = get_commit_graph_filename(obj_dir); fd = hold_lock_file_for_update(&lk, graph_name, 0); -- cgit v0.10.2-6-g49f6 From 3afc679b3c13d99e4f02bceb686f11d51576d3ae Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 1 May 2018 12:47:11 +0000 Subject: commit: use generations in paint_down_to_common() Define compare_commits_by_gen_then_commit_date(), which uses generation numbers as a primary comparison and commit date to break ties (or as a comparison when both commits do not have computed generation numbers). Since the commit-graph file is closed under reachability, we know that all commits in the file have generation at most GENERATION_NUMBER_MAX which is less than GENERATION_NUMBER_INFINITY. This change does not affect the number of commits that are walked during the execution of paint_down_to_common(), only the order that those commits are inspected. In the case that commit dates violate topological order (i.e. a parent is "newer" than a child), the previous code could walk a commit twice: if a commit is reached with the PARENT1 bit, but later is re-visited with the PARENT2 bit, then that PARENT2 bit must be propagated to its parents. Using generation numbers avoids this extra effort, even if it is somewhat rare. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/commit.c b/commit.c index 711f674..4d00b0a 100644 --- a/commit.c +++ b/commit.c @@ -640,6 +640,24 @@ static int compare_commits_by_author_date(const void *a_, const void *b_, return 0; } +int compare_commits_by_gen_then_commit_date(const void *a_, const void *b_, void *unused) +{ + const struct commit *a = a_, *b = b_; + + /* newer commits first */ + if (a->generation < b->generation) + return 1; + else if (a->generation > b->generation) + return -1; + + /* use date as a heuristic when generations are equal */ + if (a->date < b->date) + return 1; + else if (a->date > b->date) + return -1; + return 0; +} + int compare_commits_by_commit_date(const void *a_, const void *b_, void *unused) { const struct commit *a = a_, *b = b_; @@ -789,7 +807,7 @@ static int queue_has_nonstale(struct prio_queue *queue) /* all input commits in one and twos[] must have been parsed! */ static struct commit_list *paint_down_to_common(struct commit *one, int n, struct commit **twos) { - struct prio_queue queue = { compare_commits_by_commit_date }; + struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; struct commit_list *result = NULL; int i; diff --git a/commit.h b/commit.h index aac3b8c..64436ff 100644 --- a/commit.h +++ b/commit.h @@ -341,6 +341,7 @@ extern int remove_signature(struct strbuf *buf); extern int check_commit_signature(const struct commit *commit, struct signature_check *sigc); int compare_commits_by_commit_date(const void *a_, const void *b_, void *unused); +int compare_commits_by_gen_then_commit_date(const void *a_, const void *b_, void *unused); LAST_ARG_MUST_BE_NULL extern int run_commit_hook(int editor_is_used, const char *index_file, const char *name, ...); -- cgit v0.10.2-6-g49f6 From e2838d85b6d35592ff5851d67f0232a78083ada7 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 1 May 2018 12:47:13 +0000 Subject: commit-graph: always load commit-graph information Most code paths load commits using lookup_commit() and then parse_commit(). In some cases, including some branch lookups, the commit is parsed using parse_object_buffer() which side-steps parse_commit() in favor of parse_commit_buffer(). With generation numbers in the commit-graph, we need to ensure that any commit that exists in the commit-graph file has its generation number loaded. Create new load_commit_graph_info() method to fill in the information for a commit that exists only in the commit-graph file. Call it from parse_commit_buffer() after loading the other commit information from the given buffer. Only fill this information when specified by the 'check_graph' parameter. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/commit-graph.c b/commit-graph.c index 36d765e..a8c337d 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -245,6 +245,13 @@ static struct commit_list **insert_parent_or_die(struct commit_graph *g, return &commit_list_insert(c, pptr)->next; } +static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, uint32_t pos) +{ + const unsigned char *commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * pos; + item->graph_pos = pos; + item->generation = get_be32(commit_data + g->hash_len + 8) >> 2; +} + static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t pos) { uint32_t edge_value; @@ -292,31 +299,40 @@ static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uin return 1; } +static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos) +{ + if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) { + *pos = item->graph_pos; + return 1; + } else { + return bsearch_graph(g, &(item->object.oid), pos); + } +} + int parse_commit_in_graph(struct commit *item) { + uint32_t pos; + if (!core_commit_graph) return 0; if (item->object.parsed) return 1; - prepare_commit_graph(); - if (commit_graph) { - uint32_t pos; - int found; - if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) { - pos = item->graph_pos; - found = 1; - } else { - found = bsearch_graph(commit_graph, &(item->object.oid), &pos); - } - - if (found) - return fill_commit_in_graph(item, commit_graph, pos); - } - + if (commit_graph && find_commit_in_graph(item, commit_graph, &pos)) + return fill_commit_in_graph(item, commit_graph, pos); return 0; } +void load_commit_graph_info(struct commit *item) +{ + uint32_t pos; + if (!core_commit_graph) + return; + prepare_commit_graph(); + if (commit_graph && find_commit_in_graph(item, commit_graph, &pos)) + fill_commit_graph_info(item, commit_graph, pos); +} + static struct tree *load_tree_for_commit(struct commit_graph *g, struct commit *c) { struct object_id oid; diff --git a/commit-graph.h b/commit-graph.h index 260a468..96cccb1 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -17,6 +17,14 @@ char *get_commit_graph_filename(const char *obj_dir); */ int parse_commit_in_graph(struct commit *item); +/* + * It is possible that we loaded commit contents from the commit buffer, + * but we also want to ensure the commit-graph content is correctly + * checked and filled. Fill the graph_pos and generation members of + * the given commit. + */ +void load_commit_graph_info(struct commit *item); + struct tree *get_commit_tree_in_graph(const struct commit *c); struct commit_graph { diff --git a/commit.c b/commit.c index 4d00b0a..39a3749 100644 --- a/commit.c +++ b/commit.c @@ -331,7 +331,7 @@ const void *detach_commit_buffer(struct commit *commit, unsigned long *sizep) return ret; } -int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size) +int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size, int check_graph) { const char *tail = buffer; const char *bufptr = buffer; @@ -386,6 +386,9 @@ int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long s } item->date = parse_commit_date(bufptr, tail); + if (check_graph) + load_commit_graph_info(item); + return 0; } @@ -412,7 +415,7 @@ int parse_commit_gently(struct commit *item, int quiet_on_missing) return error("Object %s not a commit", oid_to_hex(&item->object.oid)); } - ret = parse_commit_buffer(item, buffer, size); + ret = parse_commit_buffer(item, buffer, size, 0); if (save_commit_buffer && !ret) { set_commit_buffer(item, buffer, size); return 0; diff --git a/commit.h b/commit.h index 64436ff..b5afde1 100644 --- a/commit.h +++ b/commit.h @@ -72,7 +72,7 @@ struct commit *lookup_commit_reference_by_name(const char *name); */ struct commit *lookup_commit_or_die(const struct object_id *oid, const char *ref_name); -int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size); +int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size, int check_graph); int parse_commit_gently(struct commit *item, int quiet_on_missing); static inline int parse_commit(struct commit *item) { diff --git a/object.c b/object.c index e6ad3f6..efe4871 100644 --- a/object.c +++ b/object.c @@ -207,7 +207,7 @@ struct object *parse_object_buffer(const struct object_id *oid, enum object_type } else if (type == OBJ_COMMIT) { struct commit *commit = lookup_commit(oid); if (commit) { - if (parse_commit_buffer(commit, buffer, size)) + if (parse_commit_buffer(commit, buffer, size, 1)) return NULL; if (!get_cached_commit_buffer(commit, NULL)) { set_commit_buffer(commit, buffer, size); diff --git a/sha1_file.c b/sha1_file.c index 1b94f39..0fd4f0b 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1755,7 +1755,7 @@ static void check_commit(const void *buf, size_t size) { struct commit c; memset(&c, 0, sizeof(c)); - if (parse_commit_buffer(&c, buf, size)) + if (parse_commit_buffer(&c, buf, size, 0)) die("corrupt commit"); } -- cgit v0.10.2-6-g49f6 From 819807b33f820dc17d96f043747daf18c5e38516 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 1 May 2018 12:47:15 +0000 Subject: ref-filter: use generation number for --contains A commit A can reach a commit B only if the generation number of A is strictly larger than the generation number of B. This condition allows significantly short-circuiting commit-graph walks. Use generation number for '--contains' type queries. On a copy of the Linux repository where HEAD is contained in v4.13 but no earlier tag, the command 'git tag --contains HEAD' had the following peformance improvement: Before: 0.81s After: 0.04s Rel %: -95% Helped-by: Jeff King Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/ref-filter.c b/ref-filter.c index aff24d9..fb35067 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -16,6 +16,7 @@ #include "trailer.h" #include "wt-status.h" #include "commit-slab.h" +#include "commit-graph.h" static struct ref_msg { const char *gone; @@ -1587,7 +1588,8 @@ static int in_commit_list(const struct commit_list *want, struct commit *c) */ static enum contains_result contains_test(struct commit *candidate, const struct commit_list *want, - struct contains_cache *cache) + struct contains_cache *cache, + uint32_t cutoff) { enum contains_result *cached = contains_cache_at(cache, candidate); @@ -1603,6 +1605,10 @@ static enum contains_result contains_test(struct commit *candidate, /* Otherwise, we don't know; prepare to recurse */ parse_commit_or_die(candidate); + + if (candidate->generation < cutoff) + return CONTAINS_NO; + return CONTAINS_UNKNOWN; } @@ -1618,8 +1624,18 @@ static enum contains_result contains_tag_algo(struct commit *candidate, struct contains_cache *cache) { struct contains_stack contains_stack = { 0, 0, NULL }; - enum contains_result result = contains_test(candidate, want, cache); + enum contains_result result; + uint32_t cutoff = GENERATION_NUMBER_INFINITY; + const struct commit_list *p; + + for (p = want; p; p = p->next) { + struct commit *c = p->item; + load_commit_graph_info(c); + if (c->generation < cutoff) + cutoff = c->generation; + } + result = contains_test(candidate, want, cache, cutoff); if (result != CONTAINS_UNKNOWN) return result; @@ -1637,7 +1653,7 @@ static enum contains_result contains_tag_algo(struct commit *candidate, * If we just popped the stack, parents->item has been marked, * therefore contains_test will return a meaningful yes/no. */ - else switch (contains_test(parents->item, want, cache)) { + else switch (contains_test(parents->item, want, cache, cutoff)) { case CONTAINS_YES: *contains_cache_at(cache, commit) = CONTAINS_YES; contains_stack.nr--; @@ -1651,7 +1667,7 @@ static enum contains_result contains_tag_algo(struct commit *candidate, } } free(contains_stack.contains_stack); - return contains_test(candidate, want, cache); + return contains_test(candidate, want, cache, cutoff); } static int commit_contains(struct ref_filter *filter, struct commit *commit, -- cgit v0.10.2-6-g49f6 From f9b8908b85247ef60001a683c281af0080e9ee77 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 1 May 2018 12:47:17 +0000 Subject: commit: use generation numbers for in_merge_bases() The containment algorithm for 'git branch --contains' is different from that for 'git tag --contains' in that it uses is_descendant_of() instead of contains_tag_algo(). The expensive portion of the branch algorithm is computing merge bases. When a commit-graph file exists with generation numbers computed, we can avoid this merge-base calculation when the target commit has a larger generation number than the initial commits. Performance tests were run on a copy of the Linux repository where HEAD is contained in v4.13 but no earlier tag. Also, all tags were copied to branches and 'git branch --contains' was tested: Before: 60.0s After: 0.4s Rel %: -99.3% Reported-by: Jeff King Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/commit.c b/commit.c index 39a3749..3ecdc13 100644 --- a/commit.c +++ b/commit.c @@ -1056,12 +1056,19 @@ int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit * { struct commit_list *bases; int ret = 0, i; + uint32_t min_generation = GENERATION_NUMBER_INFINITY; if (parse_commit(commit)) return ret; - for (i = 0; i < nr_reference; i++) + for (i = 0; i < nr_reference; i++) { if (parse_commit(reference[i])) return ret; + if (reference[i]->generation < min_generation) + min_generation = reference[i]->generation; + } + + if (commit->generation > min_generation) + return ret; bases = paint_down_to_common(commit, nr_reference, reference); if (commit->object.flags & PARENT2) -- cgit v0.10.2-6-g49f6 From d7c1ec3efd0092ee665085cba4e8a2bcef95143b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 1 May 2018 12:47:19 +0000 Subject: commit: add short-circuit to paint_down_to_common() When running 'git branch --contains', the in_merge_bases_many() method calls paint_down_to_common() to discover if a specific commit is reachable from a set of branches. Commits with lower generation number are not needed to correctly answer the containment query of in_merge_bases_many(). Add a new parameter, min_generation, to paint_down_to_common() that prevents walking commits with generation number strictly less than min_generation. If 0 is given, then there is no functional change. For in_merge_bases_many(), we can pass commit->generation as the cutoff, and this saves time during 'git branch --contains' queries that would otherwise walk "around" the commit we are inspecting. For a copy of the Linux repository, where HEAD is checked out at v4.13~100, we get the following performance improvement for 'git branch --contains' over the previous commit: Before: 0.21s After: 0.13s Rel %: -38% Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/commit.c b/commit.c index 3ecdc13..9875fee 100644 --- a/commit.c +++ b/commit.c @@ -808,11 +808,14 @@ static int queue_has_nonstale(struct prio_queue *queue) } /* all input commits in one and twos[] must have been parsed! */ -static struct commit_list *paint_down_to_common(struct commit *one, int n, struct commit **twos) +static struct commit_list *paint_down_to_common(struct commit *one, int n, + struct commit **twos, + int min_generation) { struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; struct commit_list *result = NULL; int i; + uint32_t last_gen = GENERATION_NUMBER_INFINITY; one->object.flags |= PARENT1; if (!n) { @@ -831,6 +834,15 @@ static struct commit_list *paint_down_to_common(struct commit *one, int n, struc struct commit_list *parents; int flags; + if (commit->generation > last_gen) + BUG("bad generation skip %8x > %8x at %s", + commit->generation, last_gen, + oid_to_hex(&commit->object.oid)); + last_gen = commit->generation; + + if (commit->generation < min_generation) + break; + flags = commit->object.flags & (PARENT1 | PARENT2 | STALE); if (flags == (PARENT1 | PARENT2)) { if (!(commit->object.flags & RESULT)) { @@ -879,7 +891,7 @@ static struct commit_list *merge_bases_many(struct commit *one, int n, struct co return NULL; } - list = paint_down_to_common(one, n, twos); + list = paint_down_to_common(one, n, twos, 0); while (list) { struct commit *commit = pop_commit(&list); @@ -946,7 +958,7 @@ static int remove_redundant(struct commit **array, int cnt) filled_index[filled] = j; work[filled++] = array[j]; } - common = paint_down_to_common(array[i], filled, work); + common = paint_down_to_common(array[i], filled, work, 0); if (array[i]->object.flags & PARENT2) redundant[i] = 1; for (j = 0; j < filled; j++) @@ -1070,7 +1082,7 @@ int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit * if (commit->generation > min_generation) return ret; - bases = paint_down_to_common(commit, nr_reference, reference); + bases = paint_down_to_common(commit, nr_reference, reference, commit->generation); if (commit->object.flags & PARENT2) ret = 1; clear_commit_marks(commit, all_flags); -- cgit v0.10.2-6-g49f6 From 04bc8d1ecc30bae9589f3d77a0a89f3dca28a024 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 1 May 2018 12:47:21 +0000 Subject: commit: use generation number in remove_redundant() The static remove_redundant() method is used to filter a list of commits by removing those that are reachable from another commit in the list. This is used to remove all possible merge- bases except a maximal, mutually independent set. To determine these commits are independent, we use a number of paint_down_to_common() walks and use the PARENT1, PARENT2 flags to determine reachability. Since we only care about reachability and not the full set of merge-bases between 'one' and 'twos', we can use the 'min_generation' parameter to short-circuit the walk. When no commit-graph exists, there is no change in behavior. For a copy of the Linux repository, we measured the following performance improvements: git merge-base v3.3 v4.5 Before: 234 ms After: 208 ms Rel %: -11% git merge-base v4.3 v4.5 Before: 102 ms After: 83 ms Rel %: -19% The experiments above were chosen to demonstrate that we are improving the filtering of the merge-base set. In the first example, more time is spent walking the history to find the set of merge bases before the remove_redundant() call. The starting commits are closer together in the second example, therefore more time is spent in remove_redundant(). The relative change in performance differs as expected. Reported-by: Jakub Narebski Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/commit.c b/commit.c index 9875fee..1d28677 100644 --- a/commit.c +++ b/commit.c @@ -949,6 +949,7 @@ static int remove_redundant(struct commit **array, int cnt) parse_commit(array[i]); for (i = 0; i < cnt; i++) { struct commit_list *common; + uint32_t min_generation = array[i]->generation; if (redundant[i]) continue; @@ -957,8 +958,12 @@ static int remove_redundant(struct commit **array, int cnt) continue; filled_index[filled] = j; work[filled++] = array[j]; + + if (array[j]->generation < min_generation) + min_generation = array[j]->generation; } - common = paint_down_to_common(array[i], filled, work, 0); + common = paint_down_to_common(array[i], filled, work, + min_generation); if (array[i]->object.flags & PARENT2) redundant[i] = 1; for (j = 0; j < filled; j++) -- cgit v0.10.2-6-g49f6 From 7adf526670c5c050ddc22ddcf2fdc7bd654f8fc5 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 1 May 2018 12:47:23 +0000 Subject: merge: check config before loading commits Now that we use generation numbers from the commit-graph, we must ensure that all commits that exist in the commit-graph are loaded from that file instead of from the object database. Since the commit-graph file is only checked if core.commitGraph is true, we must check the default config before we load any commits. In the merge builtin, the config was checked after loading the HEAD commit. This was due to the use of the global 'branch' when checking merge-specific config settings. Move the config load to be between the initialization of 'branch' and the commit lookup. Without this change, a fast-forward merge would hit a BUG("bad generation skip") statement in commit.c during paint_down_to_common(). This is because the HEAD commit would be loaded with "infinite" generation but then reached by commits with "finite" generation numbers. Add a test to t5318-commit-graph.sh that exercises this code path to prevent a regression. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/builtin/merge.c b/builtin/merge.c index 5e5e449..b819756 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -1148,14 +1148,15 @@ int cmd_merge(int argc, const char **argv, const char *prefix) branch = branch_to_free = resolve_refdup("HEAD", 0, &head_oid, NULL); if (branch) skip_prefix(branch, "refs/heads/", &branch); + + init_diff_ui_defaults(); + git_config(git_merge_config, NULL); + if (!branch || is_null_oid(&head_oid)) head_commit = NULL; else head_commit = lookup_commit_or_die(&head_oid, "HEAD"); - init_diff_ui_defaults(); - git_config(git_merge_config, NULL); - if (branch_mergeoptions) parse_branch_merge_options(branch_mergeoptions); argc = parse_options(argc, argv, prefix, builtin_merge_options, diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index a380419..77d85ae 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -221,4 +221,13 @@ test_expect_success 'write graph in bare repo' ' graph_git_behavior 'bare repo with graph, commit 8 vs merge 1' bare commits/8 merge/1 graph_git_behavior 'bare repo with graph, commit 8 vs merge 2' bare commits/8 merge/2 +test_expect_success 'perform fast-forward merge in full repo' ' + cd "$TRASH_DIRECTORY/full" && + git checkout -b merge-5-to-8 commits/5 && + git merge commits/8 && + git show-ref -s merge-5-to-8 >output && + git show-ref -s commits/8 >expect && + test_cmp expect output +' + test_done -- cgit v0.10.2-6-g49f6 From 1472978ec670e57913836dcc98ba9cf560d94a1c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 1 May 2018 12:47:25 +0000 Subject: commit-graph.txt: update design document We now calculate generation numbers in the commit-graph file and use them in paint_down_to_common(). Expand the section on generation numbers to discuss how the three special generation numbers GENERATION_NUMBER_INFINITY, _ZERO, and _MAX interact with other generation numbers. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt index 0550c6d..e1a883e 100644 --- a/Documentation/technical/commit-graph.txt +++ b/Documentation/technical/commit-graph.txt @@ -77,6 +77,29 @@ in the commit graph. We can treat these commits as having "infinite" generation number and walk until reaching commits with known generation number. +We use the macro GENERATION_NUMBER_INFINITY = 0xFFFFFFFF to mark commits not +in the commit-graph file. If a commit-graph file was written by a version +of Git that did not compute generation numbers, then those commits will +have generation number represented by the macro GENERATION_NUMBER_ZERO = 0. + +Since the commit-graph file is closed under reachability, we can guarantee +the following weaker condition on all commits: + + If A and B are commits with generation numbers N amd M, respectively, + and N < M, then A cannot reach B. + +Note how the strict inequality differs from the inequality when we have +fully-computed generation numbers. Using strict inequality may result in +walking a few extra commits, but the simplicity in dealing with commits +with generation number *_INFINITY or *_ZERO is valuable. + +We use the macro GENERATION_NUMBER_MAX = 0x3FFFFFFF to for commits whose +generation numbers are computed to be at least this value. We limit at +this value since it is the largest value that can be stored in the +commit-graph file using the 30 bits available to generation numbers. This +presents another case where a commit can have generation number equal to +that of a parent. + Design Details -------------- @@ -98,18 +121,14 @@ Future Work - The 'commit-graph' subcommand does not have a "verify" mode that is necessary for integration with fsck. -- The file format includes room for precomputed generation numbers. These - are not currently computed, so all generation numbers will be marked as - 0 (or "uncomputed"). A later patch will include this calculation. - - After computing and storing generation numbers, we must make graph walks aware of generation numbers to gain the performance benefits they enable. This will mostly be accomplished by swapping a commit-date-ordered priority queue with one ordered by generation number. The following operations are important candidates: - - paint_down_to_common() - 'log --topo-order' + - 'tag --merged' - Currently, parse_commit_gently() requires filling in the root tree object for a commit. This passes through lookup_tree() and consequently -- cgit v0.10.2-6-g49f6 From 33286dcd6d79eeb81b74f36a324f260275582639 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 10 May 2018 17:42:52 +0000 Subject: commit-graph: fix UX issue when .lock file exists We use the lockfile API to avoid multiple Git processes from writing to the commit-graph file in the .git/objects/info directory. In some cases, this directory may not exist, so we check for its existence. The existing code does the following when acquiring the lock: 1. Try to acquire the lock. 2. If it fails, try to create the .git/object/info directory. 3. Try to acquire the lock, failing if necessary. The problem is that if the lockfile exists, then the mkdir fails, giving an error that doesn't help the user: "fatal: cannot mkdir .git/objects/info: File exists" While technically this honors the lockfile, it does not help the user. Instead, do the following: 1. Check for existence of .git/objects/info; create if necessary. 2. Try to acquire the lock, failing if necessary. The new output looks like: fatal: Unable to create '/.git/objects/info/commit-graph.lock': File exists. Another git process seems to be running in this repository, e.g. an editor opened by 'git commit'. Please make sure all processes are terminated then try again. If it still fails, a git process may have crashed in this repository earlier: remove the file manually to continue. Helped-by: Jeff King Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/commit-graph.c b/commit-graph.c index a8c337d..bb54c12 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1,5 +1,6 @@ #include "cache.h" #include "config.h" +#include "dir.h" #include "git-compat-util.h" #include "lockfile.h" #include "pack.h" @@ -640,7 +641,6 @@ void write_commit_graph(const char *obj_dir, struct hashfile *f; uint32_t i, count_distinct = 0; char *graph_name; - int fd; struct lock_file lk = LOCK_INIT; uint32_t chunk_ids[5]; uint64_t chunk_offsets[5]; @@ -754,23 +754,11 @@ void write_commit_graph(const char *obj_dir, compute_generation_numbers(&commits); graph_name = get_commit_graph_filename(obj_dir); - fd = hold_lock_file_for_update(&lk, graph_name, 0); - - if (fd < 0) { - struct strbuf folder = STRBUF_INIT; - strbuf_addstr(&folder, graph_name); - strbuf_setlen(&folder, strrchr(folder.buf, '/') - folder.buf); - - if (mkdir(folder.buf, 0777) < 0) - die_errno(_("cannot mkdir %s"), folder.buf); - strbuf_release(&folder); - - fd = hold_lock_file_for_update(&lk, graph_name, LOCK_DIE_ON_ERROR); - - if (fd < 0) - die_errno("unable to create '%s'", graph_name); - } + if (safe_create_leading_directories(graph_name)) + die_errno(_("unable to create leading directories of %s"), + graph_name); + hold_lock_file_for_update(&lk, graph_name, LOCK_DIE_ON_ERROR); f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); hashwrite_be32(f, GRAPH_SIGNATURE); -- cgit v0.10.2-6-g49f6