From 890345ac106f78dfcfcde002de39ebbb410e0b39 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:23 -0700 Subject: commit-graph: document commit-graph chains Add a basic description of commit-graph chains. More details about the feature will be added as we add functionality. This introduction gives a high-level overview to the goals of the feature and the basic layout of commit-graph chains. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt index fb53341..1dca3bd 100644 --- a/Documentation/technical/commit-graph.txt +++ b/Documentation/technical/commit-graph.txt @@ -127,6 +127,65 @@ Design Details helpful for these clones, anyway. The commit-graph will not be read or written when shallow commits are present. +Commit Graphs Chains +-------------------- + +Typically, repos grow with near-constant velocity (commits per day). Over time, +the number of commits added by a fetch operation is much smaller than the +number of commits in the full history. By creating a "chain" of commit-graphs, +we enable fast writes of new commit data without rewriting the entire commit +history -- at least, most of the time. + +## File Layout + +A commit-graph chain uses multiple files, and we use a fixed naming convention +to organize these files. Each commit-graph file has a name +`$OBJDIR/info/commit-graphs/graph-{hash}.graph` where `{hash}` is the hex- +valued hash stored in the footer of that file (which is a hash of the file's +contents before that hash). For a chain of commit-graph files, a plain-text +file at `$OBJDIR/info/commit-graphs/commit-graph-chain` contains the +hashes for the files in order from "lowest" to "highest". + +For example, if the `commit-graph-chain` file contains the lines + +``` + {hash0} + {hash1} + {hash2} +``` + +then the commit-graph chain looks like the following diagram: + + +-----------------------+ + | graph-{hash2}.graph | + +-----------------------+ + | + +-----------------------+ + | | + | graph-{hash1}.graph | + | | + +-----------------------+ + | + +-----------------------+ + | | + | | + | | + | graph-{hash0}.graph | + | | + | | + | | + +-----------------------+ + +Let X0 be the number of commits in `graph-{hash0}.graph`, X1 be the number of +commits in `graph-{hash1}.graph`, and X2 be the number of commits in +`graph-{hash2}.graph`. If a commit appears in position i in `graph-{hash2}.graph`, +then we interpret this as being the commit in position (X0 + X1 + i), and that +will be used as its "graph position". The commits in `graph-{hash2}.graph` use these +positions to refer to their parents, which may be in `graph-{hash1}.graph` or +`graph-{hash0}.graph`. We can navigate to an arbitrary commit in position j by checking +its containment in the intervals [0, X0), [X0, X0 + X1), [X0 + X1, X0 + X1 + +X2). + Related Links ------------- [0] https://bugs.chromium.org/p/git/issues/detail?id=8 -- cgit v0.10.2-6-g49f6 From d4f4d60f6da8b93f768b2b4958c04cc1b3cea443 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:24 -0700 Subject: commit-graph: prepare for commit-graph chains To prepare for a chain of commit-graph files, augment the commit_graph struct to point to a base commit_graph. As we load commits from the graph, we may actually want to read from a base file according to the graph position. The "graph position" of a commit is given by concatenating the lexicographic commit orders from each of the commit-graph files in the chain. This means that we must distinguish two values: * lexicographic index : the position within the lexicographic order in a single commit-graph file. * graph position: the position within the concatenated order of multiple commit-graph files Given the lexicographic index of a commit in a graph, we can compute the graph position by adding the number of commits in the lower-level graphs. To find the lexicographic index of a commit, we subtract the number of commits in lower-level graphs. While here, change insert_parent_or_die() to take a uint32_t position, as that is the type used by its only caller and that makes more sense with the limits in the commit-graph format. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/commit-graph.c b/commit-graph.c index 76d189d..8f5c093 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -359,9 +359,18 @@ int generation_numbers_enabled(struct repository *r) return !!first_generation; } +static void close_commit_graph_one(struct commit_graph *g) +{ + if (!g) + return; + + close_commit_graph_one(g->base_graph); + free_commit_graph(g); +} + void close_commit_graph(struct raw_object_store *o) { - free_commit_graph(o->commit_graph); + close_commit_graph_one(o->commit_graph); o->commit_graph = NULL; } @@ -371,18 +380,38 @@ static int bsearch_graph(struct commit_graph *g, struct object_id *oid, uint32_t g->chunk_oid_lookup, g->hash_len, pos); } +static void load_oid_from_graph(struct commit_graph *g, + uint32_t pos, + struct object_id *oid) +{ + uint32_t lex_index; + + while (g && pos < g->num_commits_in_base) + g = g->base_graph; + + if (!g) + BUG("NULL commit-graph"); + + if (pos >= g->num_commits + g->num_commits_in_base) + die(_("invalid commit position. commit-graph is likely corrupt")); + + lex_index = pos - g->num_commits_in_base; + + hashcpy(oid->hash, g->chunk_oid_lookup + g->hash_len * lex_index); +} + static struct commit_list **insert_parent_or_die(struct repository *r, struct commit_graph *g, - uint64_t pos, + uint32_t pos, struct commit_list **pptr) { struct commit *c; struct object_id oid; - if (pos >= g->num_commits) - die("invalid parent position %"PRIu64, pos); + if (pos >= g->num_commits + g->num_commits_in_base) + die("invalid parent position %"PRIu32, pos); - hashcpy(oid.hash, g->chunk_oid_lookup + g->hash_len * pos); + load_oid_from_graph(g, pos, &oid); c = lookup_commit(r, &oid); if (!c) die(_("could not find commit %s"), oid_to_hex(&oid)); @@ -392,7 +421,14 @@ static struct commit_list **insert_parent_or_die(struct repository *r, static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, uint32_t pos) { - const unsigned char *commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * pos; + const unsigned char *commit_data; + uint32_t lex_index; + + while (pos < g->num_commits_in_base) + g = g->base_graph; + + lex_index = pos - g->num_commits_in_base; + commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * lex_index; item->graph_pos = pos; item->generation = get_be32(commit_data + g->hash_len + 8) >> 2; } @@ -405,10 +441,25 @@ static int fill_commit_in_graph(struct repository *r, uint32_t *parent_data_ptr; uint64_t date_low, date_high; struct commit_list **pptr; - const unsigned char *commit_data = g->chunk_commit_data + (g->hash_len + 16) * pos; + const unsigned char *commit_data; + uint32_t lex_index; - item->object.parsed = 1; + while (pos < g->num_commits_in_base) + g = g->base_graph; + + if (pos >= g->num_commits + g->num_commits_in_base) + die(_("invalid commit position. commit-graph is likely corrupt")); + + /* + * Store the "full" position, but then use the + * "local" position for the rest of the calculation. + */ item->graph_pos = pos; + lex_index = pos - g->num_commits_in_base; + + commit_data = g->chunk_commit_data + (g->hash_len + 16) * lex_index; + + item->object.parsed = 1; item->maybe_tree = NULL; @@ -452,7 +503,18 @@ static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uin *pos = item->graph_pos; return 1; } else { - return bsearch_graph(g, &(item->object.oid), pos); + struct commit_graph *cur_g = g; + uint32_t lex_index; + + while (cur_g && !bsearch_graph(cur_g, &(item->object.oid), &lex_index)) + cur_g = cur_g->base_graph; + + if (cur_g) { + *pos = lex_index + cur_g->num_commits_in_base; + return 1; + } + + return 0; } } @@ -492,8 +554,13 @@ static struct tree *load_tree_for_commit(struct repository *r, struct commit *c) { struct object_id oid; - const unsigned char *commit_data = g->chunk_commit_data + - GRAPH_DATA_WIDTH * (c->graph_pos); + const unsigned char *commit_data; + + while (c->graph_pos < g->num_commits_in_base) + g = g->base_graph; + + commit_data = g->chunk_commit_data + + GRAPH_DATA_WIDTH * (c->graph_pos - g->num_commits_in_base); hashcpy(oid.hash, commit_data); c->maybe_tree = lookup_tree(r, &oid); diff --git a/commit-graph.h b/commit-graph.h index 390c7f6..5819910 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -48,6 +48,9 @@ struct commit_graph { uint32_t num_commits; struct object_id oid; + uint32_t num_commits_in_base; + struct commit_graph *base_graph; + const uint32_t *chunk_oid_fanout; const unsigned char *chunk_oid_lookup; const unsigned char *chunk_commit_data; -- cgit v0.10.2-6-g49f6 From 3cbc6ed3ee02ab7c8254d51528ca75644c52d52d Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:24 -0700 Subject: commit-graph: rename commit_compare to oid_compare The helper function commit_compare() actually compares object_id structs, not commits. A future change to commit-graph.c will need to sort commit structs, so rename this function in advance. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/commit-graph.c b/commit-graph.c index 8f5c093..96b07a6 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -770,7 +770,7 @@ static void write_graph_chunk_extra_edges(struct hashfile *f, } } -static int commit_compare(const void *_a, const void *_b) +static int oid_compare(const void *_a, const void *_b) { const struct object_id *a = (const struct object_id *)_a; const struct object_id *b = (const struct object_id *)_b; @@ -1039,7 +1039,7 @@ static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx) _("Counting distinct commits in commit graph"), ctx->oids.nr); display_progress(ctx->progress, 0); /* TODO: Measure QSORT() progress */ - QSORT(ctx->oids.list, ctx->oids.nr, commit_compare); + QSORT(ctx->oids.list, ctx->oids.nr, oid_compare); for (i = 1; i < ctx->oids.nr; i++) { display_progress(ctx->progress, i + 1); -- cgit v0.10.2-6-g49f6 From 5c84b3396c73382eb49285b4e4bbb1bf82be3a9c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:25 -0700 Subject: commit-graph: load commit-graph chains Prepare the logic for reading a chain of commit-graphs. First, look for a file at $OBJDIR/info/commit-graph. If it exists, then use that file and stop. Next, look for the chain file at $OBJDIR/info/commit-graphs/commit-graph-chain. If this file exists, then load the hash values as line-separated values in that file and load $OBJDIR/info/commit-graphs/graph-{hash[i]}.graph for each hash[i] in that file. The file is given in order, so the first hash corresponds to the "base" file and the final hash corresponds to the "tip" file. This implementation assumes that all of the graph-{hash}.graph files are in the same object directory as the commit-graph-chain file. This will be updated in a future change. This change is purposefully simple so we can isolate the different concerns. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/commit-graph.c b/commit-graph.c index 96b07a6..f7dfc6a 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -45,6 +45,19 @@ char *get_commit_graph_filename(const char *obj_dir) return xstrfmt("%s/info/commit-graph", obj_dir); } +static char *get_split_graph_filename(const char *obj_dir, + const char *oid_hex) +{ + return xstrfmt("%s/info/commit-graphs/graph-%s.graph", + obj_dir, + oid_hex); +} + +static char *get_chain_filename(const char *obj_dir) +{ + return xstrfmt("%s/info/commit-graphs/commit-graph-chain", obj_dir); +} + static uint8_t oid_version(void) { return 1; @@ -286,18 +299,105 @@ static struct commit_graph *load_commit_graph_one(const char *graph_file) return load_commit_graph_one_fd_st(fd, &st); } +static struct commit_graph *load_commit_graph_v1(struct repository *r, const char *obj_dir) +{ + char *graph_name = get_commit_graph_filename(obj_dir); + struct commit_graph *g = load_commit_graph_one(graph_name); + free(graph_name); + + return g; +} + +static int add_graph_to_chain(struct commit_graph *g, + struct commit_graph *chain, + struct object_id *oids, + int n) +{ + struct commit_graph *cur_g = chain; + + while (n) { + n--; + cur_g = cur_g->base_graph; + } + + g->base_graph = chain; + + if (chain) + g->num_commits_in_base = chain->num_commits + chain->num_commits_in_base; + + return 1; +} + +static struct commit_graph *load_commit_graph_chain(struct repository *r, const char *obj_dir) +{ + struct commit_graph *graph_chain = NULL; + struct strbuf line = STRBUF_INIT; + struct stat st; + struct object_id *oids; + int i = 0, valid = 1, count; + char *chain_name = get_chain_filename(obj_dir); + FILE *fp; + int stat_res; + + fp = fopen(chain_name, "r"); + stat_res = stat(chain_name, &st); + free(chain_name); + + if (!fp || + stat_res || + st.st_size <= the_hash_algo->hexsz) + return NULL; + + count = st.st_size / (the_hash_algo->hexsz + 1); + oids = xcalloc(count, sizeof(struct object_id)); + + for (i = 0; i < count && valid; i++) { + char *graph_name; + struct commit_graph *g; + + if (strbuf_getline_lf(&line, fp) == EOF) + break; + + if (get_oid_hex(line.buf, &oids[i])) { + warning(_("invalid commit-graph chain: line '%s' not a hash"), + line.buf); + valid = 0; + break; + } + + graph_name = get_split_graph_filename(obj_dir, line.buf); + g = load_commit_graph_one(graph_name); + free(graph_name); + + if (g && add_graph_to_chain(g, graph_chain, oids, i)) + graph_chain = g; + else + valid = 0; + } + + free(oids); + fclose(fp); + + return graph_chain; +} + +static struct commit_graph *read_commit_graph_one(struct repository *r, const char *obj_dir) +{ + struct commit_graph *g = load_commit_graph_v1(r, obj_dir); + + if (!g) + g = load_commit_graph_chain(r, obj_dir); + + return g; +} + static void prepare_commit_graph_one(struct repository *r, const char *obj_dir) { - char *graph_name; if (r->objects->commit_graph) return; - graph_name = get_commit_graph_filename(obj_dir); - r->objects->commit_graph = - load_commit_graph_one(graph_name); - - FREE_AND_NULL(graph_name); + r->objects->commit_graph = read_commit_graph_one(r, obj_dir); } /* -- cgit v0.10.2-6-g49f6 From 118bd570029f5610abdbf1a220e87d3a5c241f5f Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:26 -0700 Subject: commit-graph: add base graphs chunk To quickly verify a commit-graph chain is valid on load, we will read from the new "Base Graphs Chunk" of each file in the chain. This will prevent accidentally loading incorrect data from manually editing the commit-graph-chain file or renaming graph-{hash}.graph files. The commit_graph struct already had an object_id struct "oid", but it was never initialized or used. Add a line to read the hash from the end of the commit-graph file and into the oid member. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/Documentation/technical/commit-graph-format.txt b/Documentation/technical/commit-graph-format.txt index 16452a0..a4f1744 100644 --- a/Documentation/technical/commit-graph-format.txt +++ b/Documentation/technical/commit-graph-format.txt @@ -44,8 +44,9 @@ HEADER: 1-byte number (C) of "chunks" - 1-byte (reserved for later use) - Current clients should ignore this value. + 1-byte number (B) of base commit-graphs + We infer the length (H*B) of the Base Graphs chunk + from this value. CHUNK LOOKUP: @@ -92,6 +93,12 @@ CHUNK DATA: positions for the parents until reaching a value with the most-significant bit on. The other bits correspond to the position of the last parent. + Base Graphs List (ID: {'B', 'A', 'S', 'E'}) [Optional] + This list of H-byte hashes describe a set of B commit-graph files that + form a commit-graph chain. The graph position for the ith commit in this + file's OID Lookup chunk is equal to i plus the number of commits in all + base graphs. If B is non-zero, this chunk must exist. + TRAILER: H-byte HASH-checksum of all of the above. diff --git a/commit-graph.c b/commit-graph.c index f7dfc6a..6f7961d 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -22,6 +22,7 @@ #define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */ #define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */ #define GRAPH_CHUNKID_EXTRAEDGES 0x45444745 /* "EDGE" */ +#define GRAPH_CHUNKID_BASE 0x42415345 /* "BASE" */ #define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16) @@ -262,6 +263,12 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, else graph->chunk_extra_edges = data + chunk_offset; break; + + case GRAPH_CHUNKID_BASE: + if (graph->chunk_base_graphs) + chunk_repeated = 1; + else + graph->chunk_base_graphs = data + chunk_offset; } if (chunk_repeated) { @@ -280,6 +287,8 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, last_chunk_offset = chunk_offset; } + hashcpy(graph->oid.hash, graph->data + graph->data_len - graph->hash_len); + if (verify_commit_graph_lite(graph)) return NULL; @@ -315,8 +324,21 @@ static int add_graph_to_chain(struct commit_graph *g, { struct commit_graph *cur_g = chain; + if (n && !g->chunk_base_graphs) { + warning(_("commit-graph has no base graphs chunk")); + return 0; + } + while (n) { n--; + + if (!cur_g || + !oideq(&oids[n], &cur_g->oid) || + !hasheq(oids[n].hash, g->chunk_base_graphs + g->hash_len * n)) { + warning(_("commit-graph chain does not match")); + return 0; + } + cur_g = cur_g->base_graph; } diff --git a/commit-graph.h b/commit-graph.h index 5819910..6e7d42c 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -55,6 +55,7 @@ struct commit_graph { const unsigned char *chunk_oid_lookup; const unsigned char *chunk_commit_data; const unsigned char *chunk_extra_edges; + const unsigned char *chunk_base_graphs; }; struct commit_graph *load_commit_graph_one_fd_st(int fd, struct stat *st); -- cgit v0.10.2-6-g49f6 From 144354b054c99b2231be6bae517eed5f8da55268 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:27 -0700 Subject: commit-graph: rearrange chunk count logic The number of chunks in a commit-graph file can change depending on whether we need the Extra Edges Chunk. We are going to add more optional chunks, and it will be helpful to rearrange this logic around the chunk count before doing so. Specifically, we need to finalize the number of chunks before writing the commit-graph header. Further, we also need to fill out the chunk lookup table dynamically and using "num_chunks" as we add optional chunks is useful for adding optional chunks in the future. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/commit-graph.c b/commit-graph.c index 6f7961d..f2163e1 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1213,7 +1213,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) uint64_t chunk_offsets[5]; const unsigned hashsz = the_hash_algo->rawsz; struct strbuf progress_title = STRBUF_INIT; - int num_chunks = ctx->num_extra_edges ? 4 : 3; + int num_chunks = 3; ctx->graph_name = get_commit_graph_filename(ctx->obj_dir); if (safe_create_leading_directories(ctx->graph_name)) { @@ -1226,27 +1226,34 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) hold_lock_file_for_update(&lk, ctx->graph_name, LOCK_DIE_ON_ERROR); f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); - hashwrite_be32(f, GRAPH_SIGNATURE); - - hashwrite_u8(f, GRAPH_VERSION); - hashwrite_u8(f, oid_version()); - hashwrite_u8(f, num_chunks); - hashwrite_u8(f, 0); /* unused padding byte */ - chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; chunk_ids[2] = GRAPH_CHUNKID_DATA; - if (ctx->num_extra_edges) - chunk_ids[3] = GRAPH_CHUNKID_EXTRAEDGES; - else - chunk_ids[3] = 0; - chunk_ids[4] = 0; + if (ctx->num_extra_edges) { + chunk_ids[num_chunks] = GRAPH_CHUNKID_EXTRAEDGES; + num_chunks++; + } + + chunk_ids[num_chunks] = 0; chunk_offsets[0] = 8 + (num_chunks + 1) * GRAPH_CHUNKLOOKUP_WIDTH; chunk_offsets[1] = chunk_offsets[0] + GRAPH_FANOUT_SIZE; chunk_offsets[2] = chunk_offsets[1] + hashsz * ctx->commits.nr; chunk_offsets[3] = chunk_offsets[2] + (hashsz + 16) * ctx->commits.nr; - chunk_offsets[4] = chunk_offsets[3] + 4 * ctx->num_extra_edges; + + num_chunks = 3; + if (ctx->num_extra_edges) { + chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] + + 4 * ctx->num_extra_edges; + num_chunks++; + } + + hashwrite_be32(f, GRAPH_SIGNATURE); + + hashwrite_u8(f, GRAPH_VERSION); + hashwrite_u8(f, oid_version()); + hashwrite_u8(f, num_chunks); + hashwrite_u8(f, 0); for (i = 0; i <= num_chunks; i++) { uint32_t chunk_write[3]; -- cgit v0.10.2-6-g49f6 From 6c622f9f0bbb38a23341dc4294f56d0d909b3d50 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:27 -0700 Subject: commit-graph: write commit-graph chains Extend write_commit_graph() to write a commit-graph chain when given the COMMIT_GRAPH_SPLIT flag. This implementation is purposefully simplistic in how it creates a new chain. The commits not already in the chain are added to a new tip commit-graph file. Much of the logic around writing a graph-{hash}.graph file and updating the commit-graph-chain file is the same as the commit-graph file case. However, there are several places where we need to do some extra logic in the split case. Track the list of graph filenames before and after the planned write. This will be more important when we start merging graph files, but it also allows us to upgrade our commit-graph file to the appropriate graph-{hash}.graph file when we upgrade to a chain of commit-graphs. Note that we use the eighth byte of the commit-graph header to store the number of base graph files. This determines the length of the base graphs chunk. A subtle change of behavior with the new logic is that we do not write a commit-graph if we our commit list is empty. This extends to the typical case, which is reflected in t5318-commit-graph.sh. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/commit-graph.c b/commit-graph.c index f2163e1..f0698b0 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -300,12 +300,18 @@ static struct commit_graph *load_commit_graph_one(const char *graph_file) struct stat st; int fd; + struct commit_graph *g; int open_ok = open_commit_graph(graph_file, &fd, &st); if (!open_ok) return NULL; - return load_commit_graph_one_fd_st(fd, &st); + g = load_commit_graph_one_fd_st(fd, &st); + + if (g) + g->filename = xstrdup(graph_file); + + return g; } static struct commit_graph *load_commit_graph_v1(struct repository *r, const char *obj_dir) @@ -730,8 +736,19 @@ struct write_commit_graph_context { struct progress *progress; int progress_done; uint64_t progress_cnt; + + char *base_graph_name; + int num_commit_graphs_before; + int num_commit_graphs_after; + char **commit_graph_filenames_before; + char **commit_graph_filenames_after; + char **commit_graph_hash_after; + uint32_t new_num_commits_in_base; + struct commit_graph *new_base_graph; + unsigned append:1, - report_progress:1; + report_progress:1, + split:1; }; static void write_graph_chunk_fanout(struct hashfile *f, @@ -801,6 +818,16 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, ctx->commits.nr, commit_to_sha1); + if (edge_value >= 0) + edge_value += ctx->new_num_commits_in_base; + else { + uint32_t pos; + if (find_commit_in_graph(parent->item, + ctx->new_base_graph, + &pos)) + edge_value = pos; + } + if (edge_value < 0) BUG("missing parent %s for commit %s", oid_to_hex(&parent->item->object.oid), @@ -821,6 +848,17 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, ctx->commits.list, ctx->commits.nr, commit_to_sha1); + + if (edge_value >= 0) + edge_value += ctx->new_num_commits_in_base; + else { + uint32_t pos; + if (find_commit_in_graph(parent->item, + ctx->new_base_graph, + &pos)) + edge_value = pos; + } + if (edge_value < 0) BUG("missing parent %s for commit %s", oid_to_hex(&parent->item->object.oid), @@ -878,6 +916,16 @@ static void write_graph_chunk_extra_edges(struct hashfile *f, ctx->commits.nr, commit_to_sha1); + if (edge_value >= 0) + edge_value += ctx->new_num_commits_in_base; + else { + uint32_t pos; + if (find_commit_in_graph(parent->item, + ctx->new_base_graph, + &pos)) + edge_value = pos; + } + if (edge_value < 0) BUG("missing parent %s for commit %s", oid_to_hex(&parent->item->object.oid), @@ -969,7 +1017,13 @@ static void close_reachable(struct write_commit_graph_context *ctx) display_progress(ctx->progress, i + 1); commit = lookup_commit(ctx->r, &ctx->oids.list[i]); - if (commit && !parse_commit_no_graph(commit)) + if (!commit) + continue; + if (ctx->split) { + if (!parse_commit(commit) && + commit->graph_pos == COMMIT_NOT_FROM_GRAPH) + add_missing_parents(ctx, commit); + } else if (!parse_commit_no_graph(commit)) add_missing_parents(ctx, commit); } stop_progress(&ctx->progress); @@ -1165,8 +1219,16 @@ static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx) for (i = 1; i < ctx->oids.nr; i++) { display_progress(ctx->progress, i + 1); - if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) + if (!oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) { + if (ctx->split) { + struct commit *c = lookup_commit(ctx->r, &ctx->oids.list[i]); + + if (!c || c->graph_pos != COMMIT_NOT_FROM_GRAPH) + continue; + } + count_distinct++; + } } stop_progress(&ctx->progress); @@ -1189,7 +1251,13 @@ static void copy_oids_to_commits(struct write_commit_graph_context *ctx) if (i > 0 && oideq(&ctx->oids.list[i - 1], &ctx->oids.list[i])) continue; + ALLOC_GROW(ctx->commits.list, ctx->commits.nr + 1, ctx->commits.alloc); ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.list[i]); + + if (ctx->split && + ctx->commits.list[ctx->commits.nr]->graph_pos != COMMIT_NOT_FROM_GRAPH) + continue; + parse_commit_no_graph(ctx->commits.list[ctx->commits.nr]); for (parent = ctx->commits.list[ctx->commits.nr]->parents; @@ -1204,18 +1272,86 @@ static void copy_oids_to_commits(struct write_commit_graph_context *ctx) stop_progress(&ctx->progress); } +static int write_graph_chunk_base_1(struct hashfile *f, + struct commit_graph *g) +{ + int num = 0; + + if (!g) + return 0; + + num = write_graph_chunk_base_1(f, g->base_graph); + hashwrite(f, g->oid.hash, the_hash_algo->rawsz); + return num + 1; +} + +static int write_graph_chunk_base(struct hashfile *f, + struct write_commit_graph_context *ctx) +{ + int num = write_graph_chunk_base_1(f, ctx->new_base_graph); + + if (num != ctx->num_commit_graphs_after - 1) { + error(_("failed to write correct number of base graph ids")); + return -1; + } + + return 0; +} + +static void init_commit_graph_chain(struct write_commit_graph_context *ctx) +{ + struct commit_graph *g = ctx->r->objects->commit_graph; + uint32_t i; + + ctx->new_base_graph = g; + ctx->base_graph_name = xstrdup(g->filename); + ctx->new_num_commits_in_base = g->num_commits + g->num_commits_in_base; + + ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1; + + ALLOC_ARRAY(ctx->commit_graph_filenames_after, ctx->num_commit_graphs_after); + ALLOC_ARRAY(ctx->commit_graph_hash_after, ctx->num_commit_graphs_after); + + for (i = 0; i < ctx->num_commit_graphs_before - 1; i++) + ctx->commit_graph_filenames_after[i] = xstrdup(ctx->commit_graph_filenames_before[i]); + + if (ctx->num_commit_graphs_before) + ctx->commit_graph_filenames_after[ctx->num_commit_graphs_before - 1] = + get_split_graph_filename(ctx->obj_dir, oid_to_hex(&g->oid)); + + i = ctx->num_commit_graphs_before - 1; + + while (g) { + ctx->commit_graph_hash_after[i] = xstrdup(oid_to_hex(&g->oid)); + i--; + g = g->base_graph; + } +} + static int write_commit_graph_file(struct write_commit_graph_context *ctx) { uint32_t i; + int fd; struct hashfile *f; struct lock_file lk = LOCK_INIT; - uint32_t chunk_ids[5]; - uint64_t chunk_offsets[5]; + uint32_t chunk_ids[6]; + uint64_t chunk_offsets[6]; const unsigned hashsz = the_hash_algo->rawsz; struct strbuf progress_title = STRBUF_INIT; int num_chunks = 3; + struct object_id file_hash; + + if (ctx->split) { + struct strbuf tmp_file = STRBUF_INIT; + + strbuf_addf(&tmp_file, + "%s/info/commit-graphs/tmp_graph_XXXXXX", + ctx->obj_dir); + ctx->graph_name = strbuf_detach(&tmp_file, NULL); + } else { + ctx->graph_name = get_commit_graph_filename(ctx->obj_dir); + } - ctx->graph_name = get_commit_graph_filename(ctx->obj_dir); if (safe_create_leading_directories(ctx->graph_name)) { UNLEAK(ctx->graph_name); error(_("unable to create leading directories of %s"), @@ -1223,8 +1359,23 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) return -1; } - hold_lock_file_for_update(&lk, ctx->graph_name, LOCK_DIE_ON_ERROR); - f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); + if (ctx->split) { + char *lock_name = get_chain_filename(ctx->obj_dir); + + hold_lock_file_for_update(&lk, lock_name, LOCK_DIE_ON_ERROR); + + fd = git_mkstemp_mode(ctx->graph_name, 0444); + if (fd < 0) { + error(_("unable to create '%s'"), ctx->graph_name); + return -1; + } + + f = hashfd(fd, ctx->graph_name); + } else { + hold_lock_file_for_update(&lk, ctx->graph_name, LOCK_DIE_ON_ERROR); + fd = lk.tempfile->fd; + f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf); + } chunk_ids[0] = GRAPH_CHUNKID_OIDFANOUT; chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; @@ -1233,6 +1384,10 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) chunk_ids[num_chunks] = GRAPH_CHUNKID_EXTRAEDGES; num_chunks++; } + if (ctx->num_commit_graphs_after > 1) { + chunk_ids[num_chunks] = GRAPH_CHUNKID_BASE; + num_chunks++; + } chunk_ids[num_chunks] = 0; @@ -1247,13 +1402,18 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) 4 * ctx->num_extra_edges; num_chunks++; } + if (ctx->num_commit_graphs_after > 1) { + chunk_offsets[num_chunks + 1] = chunk_offsets[num_chunks] + + hashsz * (ctx->num_commit_graphs_after - 1); + num_chunks++; + } hashwrite_be32(f, GRAPH_SIGNATURE); hashwrite_u8(f, GRAPH_VERSION); hashwrite_u8(f, oid_version()); hashwrite_u8(f, num_chunks); - hashwrite_u8(f, 0); + hashwrite_u8(f, ctx->num_commit_graphs_after - 1); for (i = 0; i <= num_chunks; i++) { uint32_t chunk_write[3]; @@ -1279,11 +1439,67 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) write_graph_chunk_data(f, hashsz, ctx); if (ctx->num_extra_edges) write_graph_chunk_extra_edges(f, ctx); + if (ctx->num_commit_graphs_after > 1 && + write_graph_chunk_base(f, ctx)) { + return -1; + } stop_progress(&ctx->progress); strbuf_release(&progress_title); + if (ctx->split && ctx->base_graph_name && ctx->num_commit_graphs_after > 1) { + char *new_base_hash = xstrdup(oid_to_hex(&ctx->new_base_graph->oid)); + char *new_base_name = get_split_graph_filename(ctx->obj_dir, new_base_hash); + + free(ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2]); + free(ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 2]); + ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2] = new_base_name; + ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 2] = new_base_hash; + } + close_commit_graph(ctx->r->objects); - finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC); + finalize_hashfile(f, file_hash.hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC); + + if (ctx->split) { + FILE *chainf = fdopen_lock_file(&lk, "w"); + char *final_graph_name; + int result; + + close(fd); + + if (!chainf) { + error(_("unable to open commit-graph chain file")); + return -1; + } + + if (ctx->base_graph_name) { + result = rename(ctx->base_graph_name, + ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2]); + + if (result) { + error(_("failed to rename base commit-graph file")); + return -1; + } + } else { + char *graph_name = get_commit_graph_filename(ctx->obj_dir); + unlink(graph_name); + } + + ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1] = xstrdup(oid_to_hex(&file_hash)); + final_graph_name = get_split_graph_filename(ctx->obj_dir, + ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1]); + ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 1] = final_graph_name; + + result = rename(ctx->graph_name, final_graph_name); + + for (i = 0; i < ctx->num_commit_graphs_after; i++) + fprintf(lk.tempfile->fp, "%s\n", ctx->commit_graph_hash_after[i]); + + if (result) { + error(_("failed to rename temporary commit-graph file")); + return -1; + } + } + commit_lock_file(&lk); return 0; @@ -1306,6 +1522,30 @@ int write_commit_graph(const char *obj_dir, ctx->obj_dir = obj_dir; ctx->append = flags & COMMIT_GRAPH_APPEND ? 1 : 0; ctx->report_progress = flags & COMMIT_GRAPH_PROGRESS ? 1 : 0; + ctx->split = flags & COMMIT_GRAPH_SPLIT ? 1 : 0; + + if (ctx->split) { + struct commit_graph *g; + prepare_commit_graph(ctx->r); + + g = ctx->r->objects->commit_graph; + + while (g) { + ctx->num_commit_graphs_before++; + g = g->base_graph; + } + + if (ctx->num_commit_graphs_before) { + ALLOC_ARRAY(ctx->commit_graph_filenames_before, ctx->num_commit_graphs_before); + i = ctx->num_commit_graphs_before; + g = ctx->r->objects->commit_graph; + + while (g) { + ctx->commit_graph_filenames_before[--i] = xstrdup(g->filename); + g = g->base_graph; + } + } + } ctx->approx_nr_objects = approximate_object_count(); ctx->oids.alloc = ctx->approx_nr_objects / 32; @@ -1360,6 +1600,14 @@ int write_commit_graph(const char *obj_dir, goto cleanup; } + if (!ctx->commits.nr) + goto cleanup; + + if (ctx->split) + init_commit_graph_chain(ctx); + else + ctx->num_commit_graphs_after = 1; + compute_generation_numbers(ctx); res = write_commit_graph_file(ctx); @@ -1368,6 +1616,21 @@ cleanup: free(ctx->graph_name); free(ctx->commits.list); free(ctx->oids.list); + + if (ctx->commit_graph_filenames_after) { + for (i = 0; i < ctx->num_commit_graphs_after; i++) { + free(ctx->commit_graph_filenames_after[i]); + free(ctx->commit_graph_hash_after[i]); + } + + for (i = 0; i < ctx->num_commit_graphs_before; i++) + free(ctx->commit_graph_filenames_before[i]); + + free(ctx->commit_graph_filenames_after); + free(ctx->commit_graph_filenames_before); + free(ctx->commit_graph_hash_after); + } + free(ctx); return res; @@ -1555,5 +1818,6 @@ void free_commit_graph(struct commit_graph *g) g->data = NULL; close(g->graph_fd); } + free(g->filename); free(g); } diff --git a/commit-graph.h b/commit-graph.h index 6e7d42c..c321834 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -47,6 +47,7 @@ struct commit_graph { unsigned char num_chunks; uint32_t num_commits; struct object_id oid; + char *filename; uint32_t num_commits_in_base; struct commit_graph *base_graph; @@ -71,6 +72,7 @@ int generation_numbers_enabled(struct repository *r); #define COMMIT_GRAPH_APPEND (1 << 0) #define COMMIT_GRAPH_PROGRESS (1 << 1) +#define COMMIT_GRAPH_SPLIT (1 << 2) /* * The write_commit_graph* methods return zero on success diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 3b6fd0d..063f906 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -20,7 +20,7 @@ test_expect_success 'verify graph with no graph file' ' test_expect_success 'write graph with no packs' ' cd "$TRASH_DIRECTORY/full" && git commit-graph write --object-dir . && - test_path_is_file info/commit-graph + test_path_is_missing info/commit-graph ' test_expect_success 'close with correct error on bad input' ' -- cgit v0.10.2-6-g49f6 From 135a7123755bfdde05da18012bebd2776f82b26c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:28 -0700 Subject: commit-graph: add --split option to builtin Add a new "--split" option to the 'git commit-graph write' subcommand. This option allows the optional behavior of writing a commit-graph chain. The current behavior will add a tip commit-graph containing any commits that are not in the existing commit-graph or commit-graph chain. Later changes will allow merging the chain and expiring out-dated files. Add a new test script (t5324-split-commit-graph.sh) that demonstrates this behavior. Helped-by: Johannes Schindelin Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index d8efa5b..ff8bc3c 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -10,7 +10,7 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph [--object-dir ]"), N_("git commit-graph read [--object-dir ]"), N_("git commit-graph verify [--object-dir ]"), - N_("git commit-graph write [--object-dir ] [--append] [--reachable|--stdin-packs|--stdin-commits]"), + N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits]"), NULL }; @@ -25,7 +25,7 @@ static const char * const builtin_commit_graph_read_usage[] = { }; static const char * const builtin_commit_graph_write_usage[] = { - N_("git commit-graph write [--object-dir ] [--append] [--reachable|--stdin-packs|--stdin-commits]"), + N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits]"), NULL }; @@ -35,9 +35,9 @@ static struct opts_commit_graph { int stdin_packs; int stdin_commits; int append; + int split; } opts; - static int graph_verify(int argc, const char **argv) { struct commit_graph *graph = NULL; @@ -156,6 +156,8 @@ static int graph_write(int argc, const char **argv) N_("start walk at commits listed by stdin")), OPT_BOOL(0, "append", &opts.append, N_("include all commits already in the commit-graph file")), + OPT_BOOL(0, "split", &opts.split, + N_("allow writing an incremental commit-graph file")), OPT_END(), }; @@ -169,6 +171,8 @@ static int graph_write(int argc, const char **argv) opts.obj_dir = get_object_directory(); if (opts.append) flags |= COMMIT_GRAPH_APPEND; + if (opts.split) + flags |= COMMIT_GRAPH_SPLIT; read_replace_refs = 0; diff --git a/commit-graph.c b/commit-graph.c index f0698b0..1224309 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1472,12 +1472,16 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) } if (ctx->base_graph_name) { - result = rename(ctx->base_graph_name, - ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2]); + const char *dest = ctx->commit_graph_filenames_after[ + ctx->num_commit_graphs_after - 2]; - if (result) { - error(_("failed to rename base commit-graph file")); - return -1; + if (strcmp(ctx->base_graph_name, dest)) { + result = rename(ctx->base_graph_name, dest); + + if (result) { + error(_("failed to rename base commit-graph file")); + return -1; + } } } else { char *graph_name = get_commit_graph_filename(ctx->obj_dir); diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh new file mode 100755 index 0000000..ccd24bd --- /dev/null +++ b/t/t5324-split-commit-graph.sh @@ -0,0 +1,122 @@ +#!/bin/sh + +test_description='split commit graph' +. ./test-lib.sh + +GIT_TEST_COMMIT_GRAPH=0 + +test_expect_success 'setup repo' ' + git init && + git config core.commitGraph true && + infodir=".git/objects/info" && + graphdir="$infodir/commit-graphs" && + test_oid_init +' + +graph_read_expect() { + NUM_BASE=0 + if test ! -z $2 + then + NUM_BASE=$2 + fi + cat >expect <<- EOF + header: 43475048 1 1 3 $NUM_BASE + num_commits: $1 + chunks: oid_fanout oid_lookup commit_metadata + EOF + git commit-graph read >output && + test_cmp expect output +} + +test_expect_success 'create commits and write commit-graph' ' + for i in $(test_seq 3) + do + test_commit $i && + git branch commits/$i || return 1 + done && + git commit-graph write --reachable && + test_path_is_file $infodir/commit-graph && + graph_read_expect 3 +' + +graph_git_two_modes() { + git -c core.commitGraph=true $1 >output + git -c core.commitGraph=false $1 >expect + test_cmp expect output +} + +graph_git_behavior() { + MSG=$1 + BRANCH=$2 + COMPARE=$3 + test_expect_success "check normal git operations: $MSG" ' + graph_git_two_modes "log --oneline $BRANCH" && + graph_git_two_modes "log --topo-order $BRANCH" && + graph_git_two_modes "log --graph $COMPARE..$BRANCH" && + graph_git_two_modes "branch -vv" && + graph_git_two_modes "merge-base -a $BRANCH $COMPARE" + ' +} + +graph_git_behavior 'graph exists' commits/3 commits/1 + +verify_chain_files_exist() { + for hash in $(cat $1/commit-graph-chain) + do + test_path_is_file $1/graph-$hash.graph || return 1 + done +} + +test_expect_success 'add more commits, and write a new base graph' ' + git reset --hard commits/1 && + for i in $(test_seq 4 5) + do + test_commit $i && + git branch commits/$i || return 1 + done && + git reset --hard commits/2 && + for i in $(test_seq 6 10) + do + test_commit $i && + git branch commits/$i || return 1 + done && + git reset --hard commits/2 && + git merge commits/4 && + git branch merge/1 && + git reset --hard commits/4 && + git merge commits/6 && + git branch merge/2 && + git commit-graph write --reachable && + graph_read_expect 12 +' + +test_expect_success 'add three more commits, write a tip graph' ' + git reset --hard commits/3 && + git merge merge/1 && + git merge commits/5 && + git merge merge/2 && + git branch merge/3 && + git commit-graph write --reachable --split && + test_path_is_missing $infodir/commit-graph && + test_path_is_file $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 2 graph-files && + verify_chain_files_exist $graphdir +' + +graph_git_behavior 'split commit-graph: merge 3 vs 2' merge/3 merge/2 + +test_expect_success 'add one commit, write a tip graph' ' + test_commit 11 && + git branch commits/11 && + git commit-graph write --reachable --split && + test_path_is_missing $infodir/commit-graph && + test_path_is_file $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 3 graph-files && + verify_chain_files_exist $graphdir +' + +graph_git_behavior 'three-layer commit-graph: commit 11 vs 6' commits/11 commits/6 + +test_done -- cgit v0.10.2-6-g49f6 From 1771be90c8e4797c2466296d1d570dbfa39d9743 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:29 -0700 Subject: commit-graph: merge commit-graph chains When searching for a commit in a commit-graph chain of G graphs with N commits, the search takes O(G log N) time. If we always add a new tip graph with every write, the linear G term will start to dominate and slow the lookup process. To keep lookups fast, but also keep most incremental writes fast, create a strategy for merging levels of the commit-graph chain. The strategy is detailed in the commit-graph design document, but is summarized by these two conditions: 1. If the number of commits we are adding is more than half the number of commits in the graph below, then merge with that graph. 2. If we are writing more than 64,000 commits into a single graph, then merge with all lower graphs. The numeric values in the conditions above are currently constant, but can become config options in a future update. As we merge levels of the commit-graph chain, check that the commits still exist in the repository. A garbage-collection operation may have removed those commits from the object store and we do not want to persist them in the commit-graph chain. This is a non-issue if the 'git gc' process wrote a new, single-level commit-graph file. After we merge levels, the old graph-{hash}.graph files are no longer referenced by the commit-graph-chain file. We will expire these files in a future change. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt index 1dca3bd..d9c6253 100644 --- a/Documentation/technical/commit-graph.txt +++ b/Documentation/technical/commit-graph.txt @@ -186,6 +186,86 @@ positions to refer to their parents, which may be in `graph-{hash1}.graph` or its containment in the intervals [0, X0), [X0, X0 + X1), [X0 + X1, X0 + X1 + X2). +Each commit-graph file (except the base, `graph-{hash0}.graph`) contains data +specifying the hashes of all files in the lower layers. In the above example, +`graph-{hash1}.graph` contains `{hash0}` while `graph-{hash2}.graph` contains +`{hash0}` and `{hash1}`. + +## Merging commit-graph files + +If we only added a new commit-graph file on every write, we would run into a +linear search problem through many commit-graph files. Instead, we use a merge +strategy to decide when the stack should collapse some number of levels. + +The diagram below shows such a collapse. As a set of new commits are added, it +is determined by the merge strategy that the files should collapse to +`graph-{hash1}`. Thus, the new commits, the commits in `graph-{hash2}` and +the commits in `graph-{hash1}` should be combined into a new `graph-{hash3}` +file. + + +---------------------+ + | | + | (new commits) | + | | + +---------------------+ + | | + +-----------------------+ +---------------------+ + | graph-{hash2} |->| | + +-----------------------+ +---------------------+ + | | | + +-----------------------+ +---------------------+ + | | | | + | graph-{hash1} |->| | + | | | | + +-----------------------+ +---------------------+ + | tmp_graphXXX + +-----------------------+ + | | + | | + | | + | graph-{hash0} | + | | + | | + | | + +-----------------------+ + +During this process, the commits to write are combined, sorted and we write the +contents to a temporary file, all while holding a `commit-graph-chain.lock` +lock-file. When the file is flushed, we rename it to `graph-{hash3}` +according to the computed `{hash3}`. Finally, we write the new chain data to +`commit-graph-chain.lock`: + +``` + {hash3} + {hash0} +``` + +We then close the lock-file. + +## Merge Strategy + +When writing a set of commits that do not exist in the commit-graph stack of +height N, we default to creating a new file at level N + 1. We then decide to +merge with the Nth level if one of two conditions hold: + + 1. The expected file size for level N + 1 is at least half the file size for + level N. + + 2. Level N + 1 contains more than 64,0000 commits. + +This decision cascades down the levels: when we merge a level we create a new +set of commits that then compares to the next level. + +The first condition bounds the number of levels to be logarithmic in the total +number of commits. The second condition bounds the total number of commits in +a `graph-{hashN}` file and not in the `commit-graph` file, preventing +significant performance issues when the stack merges and another process only +partially reads the previous stack. + +The merge strategy values (2 for the size multiple, 64,000 for the maximum +number of commits) could be extracted into config settings for full +flexibility. + Related Links ------------- [0] https://bugs.chromium.org/p/git/issues/detail?id=8 diff --git a/commit-graph.c b/commit-graph.c index 1224309..fb31009 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1298,36 +1298,6 @@ static int write_graph_chunk_base(struct hashfile *f, return 0; } -static void init_commit_graph_chain(struct write_commit_graph_context *ctx) -{ - struct commit_graph *g = ctx->r->objects->commit_graph; - uint32_t i; - - ctx->new_base_graph = g; - ctx->base_graph_name = xstrdup(g->filename); - ctx->new_num_commits_in_base = g->num_commits + g->num_commits_in_base; - - ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1; - - ALLOC_ARRAY(ctx->commit_graph_filenames_after, ctx->num_commit_graphs_after); - ALLOC_ARRAY(ctx->commit_graph_hash_after, ctx->num_commit_graphs_after); - - for (i = 0; i < ctx->num_commit_graphs_before - 1; i++) - ctx->commit_graph_filenames_after[i] = xstrdup(ctx->commit_graph_filenames_before[i]); - - if (ctx->num_commit_graphs_before) - ctx->commit_graph_filenames_after[ctx->num_commit_graphs_before - 1] = - get_split_graph_filename(ctx->obj_dir, oid_to_hex(&g->oid)); - - i = ctx->num_commit_graphs_before - 1; - - while (g) { - ctx->commit_graph_hash_after[i] = xstrdup(oid_to_hex(&g->oid)); - i--; - g = g->base_graph; - } -} - static int write_commit_graph_file(struct write_commit_graph_context *ctx) { uint32_t i; @@ -1509,6 +1479,145 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) return 0; } +static int split_strategy_max_commits = 64000; +static float split_strategy_size_mult = 2.0f; + +static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) +{ + struct commit_graph *g = ctx->r->objects->commit_graph; + uint32_t num_commits = ctx->commits.nr; + uint32_t i; + + g = ctx->r->objects->commit_graph; + ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1; + + while (g && (g->num_commits <= split_strategy_size_mult * num_commits || + num_commits > split_strategy_max_commits)) { + num_commits += g->num_commits; + g = g->base_graph; + + ctx->num_commit_graphs_after--; + } + + ctx->new_base_graph = g; + + ALLOC_ARRAY(ctx->commit_graph_filenames_after, ctx->num_commit_graphs_after); + ALLOC_ARRAY(ctx->commit_graph_hash_after, ctx->num_commit_graphs_after); + + for (i = 0; i < ctx->num_commit_graphs_after && + i < ctx->num_commit_graphs_before; i++) + ctx->commit_graph_filenames_after[i] = xstrdup(ctx->commit_graph_filenames_before[i]); + + i = ctx->num_commit_graphs_before - 1; + g = ctx->r->objects->commit_graph; + + while (g) { + if (i < ctx->num_commit_graphs_after) + ctx->commit_graph_hash_after[i] = xstrdup(oid_to_hex(&g->oid)); + + i--; + g = g->base_graph; + } +} + +static void merge_commit_graph(struct write_commit_graph_context *ctx, + struct commit_graph *g) +{ + uint32_t i; + uint32_t offset = g->num_commits_in_base; + + ALLOC_GROW(ctx->commits.list, ctx->commits.nr + g->num_commits, ctx->commits.alloc); + + for (i = 0; i < g->num_commits; i++) { + struct object_id oid; + struct commit *result; + + display_progress(ctx->progress, i + 1); + + load_oid_from_graph(g, i + offset, &oid); + + /* only add commits if they still exist in the repo */ + result = lookup_commit_reference_gently(ctx->r, &oid, 1); + + if (result) { + ctx->commits.list[ctx->commits.nr] = result; + ctx->commits.nr++; + } + } +} + +static int commit_compare(const void *_a, const void *_b) +{ + const struct commit *a = *(const struct commit **)_a; + const struct commit *b = *(const struct commit **)_b; + return oidcmp(&a->object.oid, &b->object.oid); +} + +static void sort_and_scan_merged_commits(struct write_commit_graph_context *ctx) +{ + uint32_t i, num_parents; + struct commit_list *parent; + + if (ctx->report_progress) + ctx->progress = start_delayed_progress( + _("Scanning merged commits"), + ctx->commits.nr); + + QSORT(ctx->commits.list, ctx->commits.nr, commit_compare); + + ctx->num_extra_edges = 0; + for (i = 0; i < ctx->commits.nr; i++) { + display_progress(ctx->progress, i); + + if (i && oideq(&ctx->commits.list[i - 1]->object.oid, + &ctx->commits.list[i]->object.oid)) { + die(_("unexpected duplicate commit id %s"), + oid_to_hex(&ctx->commits.list[i]->object.oid)); + } else { + num_parents = 0; + for (parent = ctx->commits.list[i]->parents; parent; parent = parent->next) + num_parents++; + + if (num_parents > 2) + ctx->num_extra_edges += num_parents - 2; + } + } + + stop_progress(&ctx->progress); +} + +static void merge_commit_graphs(struct write_commit_graph_context *ctx) +{ + struct commit_graph *g = ctx->r->objects->commit_graph; + uint32_t current_graph_number = ctx->num_commit_graphs_before; + struct strbuf progress_title = STRBUF_INIT; + + while (g && current_graph_number >= ctx->num_commit_graphs_after) { + current_graph_number--; + + if (ctx->report_progress) { + strbuf_addstr(&progress_title, _("Merging commit-graph")); + ctx->progress = start_delayed_progress(progress_title.buf, 0); + } + + merge_commit_graph(ctx, g); + stop_progress(&ctx->progress); + strbuf_release(&progress_title); + + g = g->base_graph; + } + + if (g) { + ctx->new_base_graph = g; + ctx->new_num_commits_in_base = g->num_commits + g->num_commits_in_base; + } + + if (ctx->new_base_graph) + ctx->base_graph_name = xstrdup(ctx->new_base_graph->filename); + + sort_and_scan_merged_commits(ctx); +} + int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, @@ -1554,6 +1663,9 @@ int write_commit_graph(const char *obj_dir, ctx->approx_nr_objects = approximate_object_count(); ctx->oids.alloc = ctx->approx_nr_objects / 32; + if (ctx->split && ctx->oids.alloc > split_strategy_max_commits) + ctx->oids.alloc = split_strategy_max_commits; + if (ctx->append) { prepare_commit_graph_one(ctx->r, ctx->obj_dir); if (ctx->r->objects->commit_graph) @@ -1607,9 +1719,11 @@ int write_commit_graph(const char *obj_dir, if (!ctx->commits.nr) goto cleanup; - if (ctx->split) - init_commit_graph_chain(ctx); - else + if (ctx->split) { + split_graph_merge_strategy(ctx); + + merge_commit_graphs(ctx); + } else ctx->num_commit_graphs_after = 1; compute_generation_numbers(ctx); diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index ccd24bd..5cb5663 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -119,4 +119,17 @@ test_expect_success 'add one commit, write a tip graph' ' graph_git_behavior 'three-layer commit-graph: commit 11 vs 6' commits/11 commits/6 +test_expect_success 'add one commit, write a merged graph' ' + test_commit 12 && + git branch commits/12 && + git commit-graph write --reachable --split && + test_path_is_file $graphdir/commit-graph-chain && + test_line_count = 2 $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 4 graph-files && + verify_chain_files_exist $graphdir +' + +graph_git_behavior 'merged commit-graph: commit 12 vs 6' commits/12 commits/6 + test_done -- cgit v0.10.2-6-g49f6 From c523035cbd8515d095dc15e9661fd896733bedbc Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:30 -0700 Subject: commit-graph: allow cross-alternate chains In an environment like a fork network, it is helpful to have a commit-graph chain that spans both the base repo and the fork repo. The fork is usually a small set of data on top of the large repo, but sometimes the fork is much larger. For example, git-for-windows/git has almost double the number of commits as git/git because it rebases its commits on every major version update. To allow cross-alternate commit-graph chains, we need a few pieces: 1. When looking for a graph-{hash}.graph file, check all alternates. 2. When merging commit-graph chains, do not merge across alternates. 3. When writing a new commit-graph chain based on a commit-graph file in another object directory, do not allow success if the base file has of the name "commit-graph" instead of "commit-graphs/graph-{hash}.graph". Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt index d9c6253..473032e 100644 --- a/Documentation/technical/commit-graph.txt +++ b/Documentation/technical/commit-graph.txt @@ -266,6 +266,42 @@ The merge strategy values (2 for the size multiple, 64,000 for the maximum number of commits) could be extracted into config settings for full flexibility. +## Chains across multiple object directories + +In a repo with alternates, we look for the `commit-graph-chain` file starting +in the local object directory and then in each alternate. The first file that +exists defines our chain. As we look for the `graph-{hash}` files for +each `{hash}` in the chain file, we follow the same pattern for the host +directories. + +This allows commit-graphs to be split across multiple forks in a fork network. +The typical case is a large "base" repo with many smaller forks. + +As the base repo advances, it will likely update and merge its commit-graph +chain more frequently than the forks. If a fork updates their commit-graph after +the base repo, then it should "reparent" the commit-graph chain onto the new +chain in the base repo. When reading each `graph-{hash}` file, we track +the object directory containing it. During a write of a new commit-graph file, +we check for any changes in the source object directory and read the +`commit-graph-chain` file for that source and create a new file based on those +files. During this "reparent" operation, we necessarily need to collapse all +levels in the fork, as all of the files are invalid against the new base file. + +It is crucial to be careful when cleaning up "unreferenced" `graph-{hash}.graph` +files in this scenario. It falls to the user to define the proper settings for +their custom environment: + + 1. When merging levels in the base repo, the unreferenced files may still be + referenced by chains from fork repos. + + 2. The expiry time should be set to a length of time such that every fork has + time to recompute their commit-graph chain to "reparent" onto the new base + file(s). + + 3. If the commit-graph chain is updated in the base, the fork will not have + access to the new chain until its chain is updated to reference those files. + (This may change in the future [5].) + Related Links ------------- [0] https://bugs.chromium.org/p/git/issues/detail?id=8 @@ -292,3 +328,7 @@ Related Links [4] https://public-inbox.org/git/20180108154822.54829-1-git@jeffhostetler.com/T/#u A patch to remove the ahead-behind calculation from 'status'. + +[5] https://public-inbox.org/git/f27db281-abad-5043-6d71-cbb083b1c877@gmail.com/ + A discussion of a "two-dimensional graph position" that can allow reading + multiple commit-graph chains at the same time. diff --git a/commit-graph.c b/commit-graph.c index fb31009..fba705b 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -320,6 +320,9 @@ static struct commit_graph *load_commit_graph_v1(struct repository *r, const cha struct commit_graph *g = load_commit_graph_one(graph_name); free(graph_name); + if (g) + g->obj_dir = obj_dir; + return g; } @@ -379,9 +382,10 @@ static struct commit_graph *load_commit_graph_chain(struct repository *r, const count = st.st_size / (the_hash_algo->hexsz + 1); oids = xcalloc(count, sizeof(struct object_id)); - for (i = 0; i < count && valid; i++) { - char *graph_name; - struct commit_graph *g; + prepare_alt_odb(r); + + for (i = 0; i < count; i++) { + struct object_directory *odb; if (strbuf_getline_lf(&line, fp) == EOF) break; @@ -393,14 +397,29 @@ static struct commit_graph *load_commit_graph_chain(struct repository *r, const break; } - graph_name = get_split_graph_filename(obj_dir, line.buf); - g = load_commit_graph_one(graph_name); - free(graph_name); + valid = 0; + for (odb = r->objects->odb; odb; odb = odb->next) { + char *graph_name = get_split_graph_filename(odb->path, line.buf); + struct commit_graph *g = load_commit_graph_one(graph_name); - if (g && add_graph_to_chain(g, graph_chain, oids, i)) - graph_chain = g; - else - valid = 0; + free(graph_name); + + if (g) { + g->obj_dir = odb->path; + + if (add_graph_to_chain(g, graph_chain, oids, i)) { + graph_chain = g; + valid = 1; + } + + break; + } + } + + if (!valid) { + warning(_("unable to find all commit-graph files")); + break; + } } free(oids); @@ -1418,7 +1437,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) if (ctx->split && ctx->base_graph_name && ctx->num_commit_graphs_after > 1) { char *new_base_hash = xstrdup(oid_to_hex(&ctx->new_base_graph->oid)); - char *new_base_name = get_split_graph_filename(ctx->obj_dir, new_base_hash); + char *new_base_name = get_split_graph_filename(ctx->new_base_graph->obj_dir, new_base_hash); free(ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 2]); free(ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 2]); @@ -1493,6 +1512,9 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) while (g && (g->num_commits <= split_strategy_size_mult * num_commits || num_commits > split_strategy_max_commits)) { + if (strcmp(g->obj_dir, ctx->obj_dir)) + break; + num_commits += g->num_commits; g = g->base_graph; @@ -1501,6 +1523,18 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) ctx->new_base_graph = g; + if (ctx->num_commit_graphs_after == 2) { + char *old_graph_name = get_commit_graph_filename(g->obj_dir); + + if (!strcmp(g->filename, old_graph_name) && + strcmp(g->obj_dir, ctx->obj_dir)) { + ctx->num_commit_graphs_after = 1; + ctx->new_base_graph = NULL; + } + + free(old_graph_name); + } + ALLOC_ARRAY(ctx->commit_graph_filenames_after, ctx->num_commit_graphs_after); ALLOC_ARRAY(ctx->commit_graph_hash_after, ctx->num_commit_graphs_after); diff --git a/commit-graph.h b/commit-graph.h index c321834..802d352 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -48,6 +48,7 @@ struct commit_graph { uint32_t num_commits; struct object_id oid; char *filename; + const char *obj_dir; uint32_t num_commits_in_base; struct commit_graph *base_graph; diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 5cb5663..46f0832 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -90,6 +90,21 @@ test_expect_success 'add more commits, and write a new base graph' ' graph_read_expect 12 ' +test_expect_success 'fork and fail to base a chain on a commit-graph file' ' + test_when_finished rm -rf fork && + git clone . fork && + ( + cd fork && + rm .git/objects/info/commit-graph && + echo "$(pwd)/../.git/objects" >.git/objects/info/alternates && + test_commit new-commit && + git commit-graph write --reachable --split && + test_path_is_file $graphdir/commit-graph-chain && + test_line_count = 1 $graphdir/commit-graph-chain && + verify_chain_files_exist $graphdir + ) +' + test_expect_success 'add three more commits, write a tip graph' ' git reset --hard commits/3 && git merge merge/1 && @@ -132,4 +147,26 @@ test_expect_success 'add one commit, write a merged graph' ' graph_git_behavior 'merged commit-graph: commit 12 vs 6' commits/12 commits/6 +test_expect_success 'create fork and chain across alternate' ' + git clone . fork && + ( + cd fork && + git config core.commitGraph true && + rm -rf $graphdir && + echo "$(pwd)/../.git/objects" >.git/objects/info/alternates && + test_commit 13 && + git branch commits/13 && + git commit-graph write --reachable --split && + test_path_is_file $graphdir/commit-graph-chain && + test_line_count = 3 $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 1 graph-files && + git -c core.commitGraph=true rev-list HEAD >expect && + git -c core.commitGraph=false rev-list HEAD >actual && + test_cmp expect actual + ) +' + +graph_git_behavior 'alternate: commit 13 vs 6' commits/13 commits/6 + test_done -- cgit v0.10.2-6-g49f6 From 8d84097f9655a954ea7e94ba08d93e5940ccb2ae Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:31 -0700 Subject: commit-graph: expire commit-graph files As we merge commit-graph files in a commit-graph chain, we should clean up the files that are no longer used. This change introduces an 'expiry_window' value to the context, which is always zero (for now). We then check the modified time of each graph-{hash}.graph file in the $OBJDIR/info/commit-graphs folder and unlink the files that are older than the expiry_window. Since this is always zero, this immediately clears all unused graph files. We will update the value to match a config setting in a future change. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt index 473032e..aed4350 100644 --- a/Documentation/technical/commit-graph.txt +++ b/Documentation/technical/commit-graph.txt @@ -266,6 +266,21 @@ The merge strategy values (2 for the size multiple, 64,000 for the maximum number of commits) could be extracted into config settings for full flexibility. +## Deleting graph-{hash} files + +After a new tip file is written, some `graph-{hash}` files may no longer +be part of a chain. It is important to remove these files from disk, eventually. +The main reason to delay removal is that another process could read the +`commit-graph-chain` file before it is rewritten, but then look for the +`graph-{hash}` files after they are deleted. + +To allow holding old split commit-graphs for a while after they are unreferenced, +we update the modified times of the files when they become unreferenced. Then, +we scan the `$OBJDIR/info/commit-graphs/` directory for `graph-{hash}` +files whose modified times are older than a given expiry window. This window +defaults to zero, but can be changed using command-line arguments or a config +setting. + ## Chains across multiple object directories In a repo with alternates, we look for the `commit-graph-chain` file starting diff --git a/commit-graph.c b/commit-graph.c index fba705b..0cc2ceb 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1652,6 +1652,70 @@ static void merge_commit_graphs(struct write_commit_graph_context *ctx) sort_and_scan_merged_commits(ctx); } +static void mark_commit_graphs(struct write_commit_graph_context *ctx) +{ + uint32_t i; + time_t now = time(NULL); + + for (i = ctx->num_commit_graphs_after - 1; i < ctx->num_commit_graphs_before; i++) { + struct stat st; + struct utimbuf updated_time; + + stat(ctx->commit_graph_filenames_before[i], &st); + + updated_time.actime = st.st_atime; + updated_time.modtime = now; + utime(ctx->commit_graph_filenames_before[i], &updated_time); + } +} + +static void expire_commit_graphs(struct write_commit_graph_context *ctx) +{ + struct strbuf path = STRBUF_INIT; + DIR *dir; + struct dirent *de; + size_t dirnamelen; + time_t expire_time = time(NULL); + + strbuf_addstr(&path, ctx->obj_dir); + strbuf_addstr(&path, "/info/commit-graphs"); + dir = opendir(path.buf); + + if (!dir) { + strbuf_release(&path); + return; + } + + strbuf_addch(&path, '/'); + dirnamelen = path.len; + while ((de = readdir(dir)) != NULL) { + struct stat st; + uint32_t i, found = 0; + + strbuf_setlen(&path, dirnamelen); + strbuf_addstr(&path, de->d_name); + + stat(path.buf, &st); + + if (st.st_mtime > expire_time) + continue; + if (path.len < 6 || strcmp(path.buf + path.len - 6, ".graph")) + continue; + + for (i = 0; i < ctx->num_commit_graphs_after; i++) { + if (!strcmp(ctx->commit_graph_filenames_after[i], + path.buf)) { + found = 1; + break; + } + } + + if (!found) + unlink(path.buf); + + } +} + int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, @@ -1764,6 +1828,11 @@ int write_commit_graph(const char *obj_dir, res = write_commit_graph_file(ctx); + if (ctx->split) { + mark_commit_graphs(ctx); + expire_commit_graphs(ctx); + } + cleanup: free(ctx->graph_name); free(ctx->commits.list); diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 46f0832..76068ee 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -141,7 +141,7 @@ test_expect_success 'add one commit, write a merged graph' ' test_path_is_file $graphdir/commit-graph-chain && test_line_count = 2 $graphdir/commit-graph-chain && ls $graphdir/graph-*.graph >graph-files && - test_line_count = 4 graph-files && + test_line_count = 2 graph-files && verify_chain_files_exist $graphdir ' -- cgit v0.10.2-6-g49f6 From c2bc6e6ab0ade70c475a73d06326e677e70840d2 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:32 -0700 Subject: commit-graph: create options for split files The split commit-graph feature is now fully implemented, but needs some more run-time configurability. Allow direct callers to 'git commit-graph write --split' to specify the values used in the merge strategy and the expire time. Update the documentation to specify these values. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 624470e..365e145 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -26,7 +26,7 @@ OPTIONS Use given directory for the location of packfiles and commit-graph file. This parameter exists to specify the location of an alternate that only has the objects directory, not a full `.git` directory. The - commit-graph file is expected to be at `/info/commit-graph` and + commit-graph file is expected to be in the `/info` directory and the packfiles are expected to be in `/pack`. @@ -51,6 +51,25 @@ or `--stdin-packs`.) + With the `--append` option, include all commits that are present in the existing commit-graph file. ++ +With the `--split` option, write the commit-graph as a chain of multiple +commit-graph files stored in `/info/commit-graphs`. The new commits +not already in the commit-graph are added in a new "tip" file. This file +is merged with the existing file if the following merge conditions are +met: ++ +* If `--size-multiple=` is not specified, let `X` equal 2. If the new +tip file would have `N` commits and the previous tip has `M` commits and +`X` times `N` is greater than `M`, instead merge the two files into a +single file. ++ +* If `--max-commits=` is specified with `M` a positive integer, and the +new tip file would have more than `M` commits, then instead merge the new +tip with the previous tip. ++ +Finally, if `--expire-time=` is not specified, let `datetime` +be the current time. After writing the split commit-graph, delete all +unused commit-graph whose modified times are older than `datetime`. 'read':: diff --git a/Documentation/technical/commit-graph.txt b/Documentation/technical/commit-graph.txt index aed4350..729fbcb 100644 --- a/Documentation/technical/commit-graph.txt +++ b/Documentation/technical/commit-graph.txt @@ -248,10 +248,11 @@ When writing a set of commits that do not exist in the commit-graph stack of height N, we default to creating a new file at level N + 1. We then decide to merge with the Nth level if one of two conditions hold: - 1. The expected file size for level N + 1 is at least half the file size for - level N. + 1. `--size-multiple=` is specified or X = 2, and the number of commits in + level N is less than X times the number of commits in level N + 1. - 2. Level N + 1 contains more than 64,0000 commits. + 2. `--max-commits=` is specified with non-zero C and the number of commits + in level N + 1 is more than C commits. This decision cascades down the levels: when we merge a level we create a new set of commits that then compares to the next level. diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index ff8bc3c..6c0b5b1 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -10,7 +10,7 @@ static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph [--object-dir ]"), N_("git commit-graph read [--object-dir ]"), N_("git commit-graph verify [--object-dir ]"), - N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits]"), + N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] "), NULL }; @@ -25,7 +25,7 @@ static const char * const builtin_commit_graph_read_usage[] = { }; static const char * const builtin_commit_graph_write_usage[] = { - N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits]"), + N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] "), NULL }; @@ -135,6 +135,7 @@ static int graph_read(int argc, const char **argv) } extern int read_replace_refs; +static struct split_commit_graph_opts split_opts; static int graph_write(int argc, const char **argv) { @@ -158,9 +159,19 @@ static int graph_write(int argc, const char **argv) N_("include all commits already in the commit-graph file")), OPT_BOOL(0, "split", &opts.split, N_("allow writing an incremental commit-graph file")), + OPT_INTEGER(0, "max-commits", &split_opts.max_commits, + N_("maximum number of commits in a non-base split commit-graph")), + OPT_INTEGER(0, "size-multiple", &split_opts.size_multiple, + N_("maximum ratio between two levels of a split commit-graph")), + OPT_EXPIRY_DATE(0, "expire-time", &split_opts.expire_time, + N_("maximum number of commits in a non-base split commit-graph")), OPT_END(), }; + split_opts.size_multiple = 2; + split_opts.max_commits = 0; + split_opts.expire_time = 0; + argc = parse_options(argc, argv, NULL, builtin_commit_graph_write_options, builtin_commit_graph_write_usage, 0); @@ -176,8 +187,11 @@ static int graph_write(int argc, const char **argv) read_replace_refs = 0; - if (opts.reachable) - return write_commit_graph_reachable(opts.obj_dir, flags); + if (opts.reachable) { + if (write_commit_graph_reachable(opts.obj_dir, flags, &split_opts)) + return 1; + return 0; + } string_list_init(&lines, 0); if (opts.stdin_packs || opts.stdin_commits) { @@ -197,7 +211,8 @@ static int graph_write(int argc, const char **argv) if (write_commit_graph(opts.obj_dir, pack_indexes, commit_hex, - flags)) + flags, + &split_opts)) result = 1; UNLEAK(lines); diff --git a/builtin/commit.c b/builtin/commit.c index b001ef5..9216e9c 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1670,7 +1670,7 @@ int cmd_commit(int argc, const char **argv, const char *prefix) "not exceeded, and then \"git reset HEAD\" to recover.")); if (git_env_bool(GIT_TEST_COMMIT_GRAPH, 0) && - write_commit_graph_reachable(get_object_directory(), 0)) + write_commit_graph_reachable(get_object_directory(), 0, NULL)) return 1; repo_rerere(the_repository, 0); diff --git a/builtin/gc.c b/builtin/gc.c index 20c8f1b..0aa8eac 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -666,7 +666,8 @@ int cmd_gc(int argc, const char **argv, const char *prefix) if (gc_write_commit_graph && write_commit_graph_reachable(get_object_directory(), - !quiet && !daemonized ? COMMIT_GRAPH_PROGRESS : 0)) + !quiet && !daemonized ? COMMIT_GRAPH_PROGRESS : 0, + NULL)) return 1; if (auto_gc && too_many_loose_objects()) diff --git a/commit-graph.c b/commit-graph.c index 0cc2ceb..315088d 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -768,6 +768,8 @@ struct write_commit_graph_context { unsigned append:1, report_progress:1, split:1; + + const struct split_commit_graph_opts *split_opts; }; static void write_graph_chunk_fanout(struct hashfile *f, @@ -1116,14 +1118,15 @@ static int add_ref_to_list(const char *refname, return 0; } -int write_commit_graph_reachable(const char *obj_dir, unsigned int flags) +int write_commit_graph_reachable(const char *obj_dir, unsigned int flags, + const struct split_commit_graph_opts *split_opts) { struct string_list list = STRING_LIST_INIT_DUP; int result; for_each_ref(add_ref_to_list, &list); result = write_commit_graph(obj_dir, NULL, &list, - flags); + flags, split_opts); string_list_clear(&list, 0); return result; @@ -1498,20 +1501,25 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) return 0; } -static int split_strategy_max_commits = 64000; -static float split_strategy_size_mult = 2.0f; - static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) { struct commit_graph *g = ctx->r->objects->commit_graph; uint32_t num_commits = ctx->commits.nr; uint32_t i; + int max_commits = 0; + int size_mult = 2; + + if (ctx->split_opts) { + max_commits = ctx->split_opts->max_commits; + size_mult = ctx->split_opts->size_multiple; + } + g = ctx->r->objects->commit_graph; ctx->num_commit_graphs_after = ctx->num_commit_graphs_before + 1; - while (g && (g->num_commits <= split_strategy_size_mult * num_commits || - num_commits > split_strategy_max_commits)) { + while (g && (g->num_commits <= size_mult * num_commits || + (max_commits && num_commits > max_commits))) { if (strcmp(g->obj_dir, ctx->obj_dir)) break; @@ -1675,7 +1683,10 @@ static void expire_commit_graphs(struct write_commit_graph_context *ctx) DIR *dir; struct dirent *de; size_t dirnamelen; - time_t expire_time = time(NULL); + timestamp_t expire_time = time(NULL); + + if (ctx->split_opts && ctx->split_opts->expire_time) + expire_time -= ctx->split_opts->expire_time; strbuf_addstr(&path, ctx->obj_dir); strbuf_addstr(&path, "/info/commit-graphs"); @@ -1719,7 +1730,8 @@ static void expire_commit_graphs(struct write_commit_graph_context *ctx) int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, - unsigned int flags) + unsigned int flags, + const struct split_commit_graph_opts *split_opts) { struct write_commit_graph_context *ctx; uint32_t i, count_distinct = 0; @@ -1734,6 +1746,7 @@ int write_commit_graph(const char *obj_dir, ctx->append = flags & COMMIT_GRAPH_APPEND ? 1 : 0; ctx->report_progress = flags & COMMIT_GRAPH_PROGRESS ? 1 : 0; ctx->split = flags & COMMIT_GRAPH_SPLIT ? 1 : 0; + ctx->split_opts = split_opts; if (ctx->split) { struct commit_graph *g; @@ -1761,8 +1774,8 @@ int write_commit_graph(const char *obj_dir, ctx->approx_nr_objects = approximate_object_count(); ctx->oids.alloc = ctx->approx_nr_objects / 32; - if (ctx->split && ctx->oids.alloc > split_strategy_max_commits) - ctx->oids.alloc = split_strategy_max_commits; + if (ctx->split && split_opts && ctx->oids.alloc > split_opts->max_commits) + ctx->oids.alloc = split_opts->max_commits; if (ctx->append) { prepare_commit_graph_one(ctx->r, ctx->obj_dir); diff --git a/commit-graph.h b/commit-graph.h index 802d352..a84c22a 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -75,17 +75,25 @@ int generation_numbers_enabled(struct repository *r); #define COMMIT_GRAPH_PROGRESS (1 << 1) #define COMMIT_GRAPH_SPLIT (1 << 2) +struct split_commit_graph_opts { + int size_multiple; + int max_commits; + timestamp_t expire_time; +}; + /* * The write_commit_graph* methods return zero on success * and a negative value on failure. Note that if the repository * is not compatible with the commit-graph feature, then the * methods will return 0 without writing a commit-graph. */ -int write_commit_graph_reachable(const char *obj_dir, unsigned int flags); +int write_commit_graph_reachable(const char *obj_dir, unsigned int flags, + const struct split_commit_graph_opts *split_opts); int write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, struct string_list *commit_hex, - unsigned int flags); + unsigned int flags, + const struct split_commit_graph_opts *split_opts); int verify_commit_graph(struct repository *r, struct commit_graph *g); diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 76068ee..1b699a5 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -169,4 +169,51 @@ test_expect_success 'create fork and chain across alternate' ' graph_git_behavior 'alternate: commit 13 vs 6' commits/13 commits/6 +test_expect_success 'test merge stragety constants' ' + git clone . merge-2 && + ( + cd merge-2 && + git config core.commitGraph true && + test_line_count = 2 $graphdir/commit-graph-chain && + test_commit 14 && + git commit-graph write --reachable --split --size-multiple=2 && + test_line_count = 3 $graphdir/commit-graph-chain + + ) && + git clone . merge-10 && + ( + cd merge-10 && + git config core.commitGraph true && + test_line_count = 2 $graphdir/commit-graph-chain && + test_commit 14 && + git commit-graph write --reachable --split --size-multiple=10 && + test_line_count = 1 $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 1 graph-files + ) && + git clone . merge-10-expire && + ( + cd merge-10-expire && + git config core.commitGraph true && + test_line_count = 2 $graphdir/commit-graph-chain && + test_commit 15 && + git commit-graph write --reachable --split --size-multiple=10 --expire-time=1980-01-01 && + test_line_count = 1 $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 3 graph-files + ) && + git clone --no-hardlinks . max-commits && + ( + cd max-commits && + git config core.commitGraph true && + test_line_count = 2 $graphdir/commit-graph-chain && + test_commit 16 && + test_commit 17 && + git commit-graph write --reachable --split --max-commits=1 && + test_line_count = 1 $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 1 graph-files + ) +' + test_done -- cgit v0.10.2-6-g49f6 From 3da4b609bb14b13672f64af908706462617f53cb Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:32 -0700 Subject: commit-graph: verify chains with --shallow mode If we wrote a commit-graph chain, we only modified the tip file in the chain. It is valuable to verify what we wrote, but not waste time checking files we did not write. Add a '--shallow' option to the 'git commit-graph verify' subcommand and check that it does not read the base graph in a two-file chain. Making the verify subcommand read from a chain of commit-graphs takes some rearranging of the builtin code. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt index 365e145..eb5e786 100644 --- a/Documentation/git-commit-graph.txt +++ b/Documentation/git-commit-graph.txt @@ -10,7 +10,7 @@ SYNOPSIS -------- [verse] 'git commit-graph read' [--object-dir ] -'git commit-graph verify' [--object-dir ] +'git commit-graph verify' [--object-dir ] [--shallow] 'git commit-graph write' [--object-dir ] @@ -80,6 +80,9 @@ Used for debugging purposes. Read the commit-graph file and verify its contents against the object database. Used to check for corrupted data. ++ +With the `--shallow` option, only check the tip commit-graph file in +a chain of split commit-graphs. EXAMPLES diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index 6c0b5b1..38027b8 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -5,17 +5,18 @@ #include "parse-options.h" #include "repository.h" #include "commit-graph.h" +#include "object-store.h" static char const * const builtin_commit_graph_usage[] = { N_("git commit-graph [--object-dir ]"), N_("git commit-graph read [--object-dir ]"), - N_("git commit-graph verify [--object-dir ]"), + N_("git commit-graph verify [--object-dir ] [--shallow]"), N_("git commit-graph write [--object-dir ] [--append|--split] [--reachable|--stdin-packs|--stdin-commits] "), NULL }; static const char * const builtin_commit_graph_verify_usage[] = { - N_("git commit-graph verify [--object-dir ]"), + N_("git commit-graph verify [--object-dir ] [--shallow]"), NULL }; @@ -36,6 +37,7 @@ static struct opts_commit_graph { int stdin_commits; int append; int split; + int shallow; } opts; static int graph_verify(int argc, const char **argv) @@ -45,11 +47,14 @@ static int graph_verify(int argc, const char **argv) int open_ok; int fd; struct stat st; + int flags = 0; static struct option builtin_commit_graph_verify_options[] = { OPT_STRING(0, "object-dir", &opts.obj_dir, N_("dir"), N_("The object directory to store the graph")), + OPT_BOOL(0, "shallow", &opts.shallow, + N_("if the commit-graph is split, only verify the tip file")), OPT_END(), }; @@ -59,21 +64,27 @@ static int graph_verify(int argc, const char **argv) if (!opts.obj_dir) opts.obj_dir = get_object_directory(); + if (opts.shallow) + flags |= COMMIT_GRAPH_VERIFY_SHALLOW; graph_name = get_commit_graph_filename(opts.obj_dir); open_ok = open_commit_graph(graph_name, &fd, &st); - if (!open_ok && errno == ENOENT) - return 0; - if (!open_ok) + if (!open_ok && errno != ENOENT) die_errno(_("Could not open commit-graph '%s'"), graph_name); - graph = load_commit_graph_one_fd_st(fd, &st); + FREE_AND_NULL(graph_name); + if (open_ok) + graph = load_commit_graph_one_fd_st(fd, &st); + else + graph = read_commit_graph_one(the_repository, opts.obj_dir); + + /* Return failure if open_ok predicted success */ if (!graph) - return 1; + return !!open_ok; UNLEAK(graph); - return verify_commit_graph(the_repository, graph); + return verify_commit_graph(the_repository, graph, flags); } static int graph_read(int argc, const char **argv) diff --git a/commit-graph.c b/commit-graph.c index 315088d..f33f4fe 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -428,7 +428,7 @@ static struct commit_graph *load_commit_graph_chain(struct repository *r, const return graph_chain; } -static struct commit_graph *read_commit_graph_one(struct repository *r, const char *obj_dir) +struct commit_graph *read_commit_graph_one(struct repository *r, const char *obj_dir) { struct commit_graph *g = load_commit_graph_v1(r, obj_dir); @@ -1887,7 +1887,7 @@ static void graph_report(const char *fmt, ...) #define GENERATION_ZERO_EXISTS 1 #define GENERATION_NUMBER_EXISTS 2 -int verify_commit_graph(struct repository *r, struct commit_graph *g) +int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags) { uint32_t i, cur_fanout_pos = 0; struct object_id prev_oid, cur_oid, checksum; @@ -1895,6 +1895,7 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) struct hashfile *f; int devnull; struct progress *progress = NULL; + int local_error = 0; if (!g) { graph_report("no commit-graph file loaded"); @@ -1989,6 +1990,9 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) break; } + /* parse parent in case it is in a base graph */ + parse_commit_in_graph_one(r, g, graph_parents->item); + if (!oideq(&graph_parents->item->object.oid, &odb_parents->item->object.oid)) graph_report(_("commit-graph parent for %s is %s != %s"), oid_to_hex(&cur_oid), @@ -2040,7 +2044,12 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g) } stop_progress(&progress); - return verify_commit_graph_error; + local_error = verify_commit_graph_error; + + if (!(flags & COMMIT_GRAPH_VERIFY_SHALLOW) && g->base_graph) + local_error |= verify_commit_graph(r, g->base_graph, flags); + + return local_error; } void free_commit_graph(struct commit_graph *g) diff --git a/commit-graph.h b/commit-graph.h index a84c22a..df9a3b2 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -61,7 +61,7 @@ struct commit_graph { }; struct commit_graph *load_commit_graph_one_fd_st(int fd, struct stat *st); - +struct commit_graph *read_commit_graph_one(struct repository *r, const char *obj_dir); struct commit_graph *parse_commit_graph(void *graph_map, int fd, size_t graph_size); @@ -95,7 +95,9 @@ int write_commit_graph(const char *obj_dir, unsigned int flags, const struct split_commit_graph_opts *split_opts); -int verify_commit_graph(struct repository *r, struct commit_graph *g); +#define COMMIT_GRAPH_VERIFY_SHALLOW (1 << 0) + +int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags); void close_commit_graph(struct raw_object_store *); void free_commit_graph(struct commit_graph *); diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 1b699a5..3df90ae 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -216,4 +216,66 @@ test_expect_success 'test merge stragety constants' ' ) ' +corrupt_file() { + file=$1 + pos=$2 + data="${3:-\0}" + printf "$data" | dd of="$file" bs=1 seek="$pos" conv=notrunc +} + +test_expect_success 'verify hashes along chain, even in shallow' ' + git clone --no-hardlinks . verify && + ( + cd verify && + git commit-graph verify && + base_file=$graphdir/graph-$(head -n 1 $graphdir/commit-graph-chain).graph && + corrupt_file "$base_file" 1760 "\01" && + test_must_fail git commit-graph verify --shallow 2>test_err && + grep -v "^+" test_err >err && + test_i18ngrep "incorrect checksum" err + ) +' + +test_expect_success 'verify --shallow does not check base contents' ' + git clone --no-hardlinks . verify-shallow && + ( + cd verify-shallow && + git commit-graph verify && + base_file=$graphdir/graph-$(head -n 1 $graphdir/commit-graph-chain).graph && + corrupt_file "$base_file" 1000 "\01" && + git commit-graph verify --shallow && + test_must_fail git commit-graph verify 2>test_err && + grep -v "^+" test_err >err && + test_i18ngrep "incorrect checksum" err + ) +' + +test_expect_success 'warn on base graph chunk incorrect' ' + git clone --no-hardlinks . base-chunk && + ( + cd base-chunk && + git commit-graph verify && + base_file=$graphdir/graph-$(tail -n 1 $graphdir/commit-graph-chain).graph && + corrupt_file "$base_file" 1376 "\01" && + git commit-graph verify --shallow 2>test_err && + grep -v "^+" test_err >err && + test_i18ngrep "commit-graph chain does not match" err + ) +' + +test_expect_success 'verify after commit-graph-chain corruption' ' + git clone --no-hardlinks . verify-chain && + ( + cd verify-chain && + corrupt_file "$graphdir/commit-graph-chain" 60 "G" && + git commit-graph verify 2>test_err && + grep -v "^+" test_err >err && + test_i18ngrep "invalid commit-graph chain" err && + corrupt_file "$graphdir/commit-graph-chain" 60 "A" && + git commit-graph verify 2>test_err && + grep -v "^+" test_err >err && + test_i18ngrep "unable to find all commit-graph files" err + ) +' + test_done -- cgit v0.10.2-6-g49f6 From ba41112a6331e8b454cac6cfd3ee67ae93957a2c Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:33 -0700 Subject: commit-graph: clean up chains after flattened write If we write a commit-graph file without the split option, then we write to $OBJDIR/info/commit-graph and start to ignore the chains in $OBJDIR/info/commit-graphs/. Unlink the commit-graph-chain file and expire the graph-{hash}.graph files in $OBJDIR/info/commit-graphs/ during every write. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/commit-graph.c b/commit-graph.c index f33f4fe..3599ae6 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1687,6 +1687,12 @@ static void expire_commit_graphs(struct write_commit_graph_context *ctx) if (ctx->split_opts && ctx->split_opts->expire_time) expire_time -= ctx->split_opts->expire_time; + if (!ctx->split) { + char *chain_file_name = get_chain_filename(ctx->obj_dir); + unlink(chain_file_name); + free(chain_file_name); + ctx->num_commit_graphs_after = 0; + } strbuf_addstr(&path, ctx->obj_dir); strbuf_addstr(&path, "/info/commit-graphs"); @@ -1841,10 +1847,10 @@ int write_commit_graph(const char *obj_dir, res = write_commit_graph_file(ctx); - if (ctx->split) { + if (ctx->split) mark_commit_graphs(ctx); - expire_commit_graphs(ctx); - } + + expire_commit_graphs(ctx); cleanup: free(ctx->graph_name); diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 3df90ae..e8df35c 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -216,6 +216,18 @@ test_expect_success 'test merge stragety constants' ' ) ' +test_expect_success 'remove commit-graph-chain file after flattening' ' + git clone . flatten && + ( + cd flatten && + test_line_count = 2 $graphdir/commit-graph-chain && + git commit-graph write --reachable && + test_path_is_missing $graphdir/commit-graph-chain && + ls $graphdir >graph-files && + test_line_count = 0 graph-files + ) +' + corrupt_file() { file=$1 pos=$2 -- cgit v0.10.2-6-g49f6 From e2017c48fe95d44f95bc4e9f36e2210dff698c40 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:34 -0700 Subject: commit-graph: test octopus merges with --split Octopus merges require an extra chunk of data in the commit-graph file format. Create a test that ensures the new --split option continues to work with an octopus merge. Specifically, ensure that the octopus merge has parents across layers to truly check that our graph position logic holds up correctly. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index e8df35c..704def7 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -290,4 +290,15 @@ test_expect_success 'verify after commit-graph-chain corruption' ' ) ' +test_expect_success 'add octopus merge' ' + git reset --hard commits/10 && + git merge commits/3 commits/4 && + git branch merge/octopus && + git commit-graph write --reachable --split && + git commit-graph verify && + test_line_count = 3 $graphdir/commit-graph-chain +' + +graph_git_behavior 'graph exists' merge/octopus commits/12 + test_done -- cgit v0.10.2-6-g49f6 From a09c1301ce5986a374ab008b2125fa26a9bdd2fe Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:35 -0700 Subject: commit-graph: test --split across alternate without --split We allow sharing commit-graph files across alternates. When we are writing a split commit-graph, we allow adding tip graph files that are not in the alternate, but include commits from our local repo. However, if our alternate is not using the split commit-graph format, its file is at .git/objects/info/commit-graph and we are trying to write files in .git/objects/info/commit-graphs/graph-{hash}.graph. We already have logic to ensure we do not merge across alternate boundaries, but we also cannot have a commit-graph chain to our alternate if uses the old filename structure. Create a test that verifies we create a new split commit-graph with only one level and we do not modify the existing commit-graph in the alternate. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 704def7..130f2ba 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -301,4 +301,19 @@ test_expect_success 'add octopus merge' ' graph_git_behavior 'graph exists' merge/octopus commits/12 +test_expect_success 'split across alternate where alternate is not split' ' + git commit-graph write --reachable && + test_path_is_file .git/objects/info/commit-graph && + cp .git/objects/info/commit-graph . && + git clone --no-hardlinks . alt-split && + ( + cd alt-split && + echo "$(pwd)"/../.git/objects >.git/objects/info/alternates && + test_commit 18 && + git commit-graph write --reachable --split && + test_line_count = 1 $graphdir/commit-graph-chain + ) && + test_cmp commit-graph .git/objects/info/commit-graph +' + test_done -- cgit v0.10.2-6-g49f6 From 16110c9348fc2022e78f2581968cf428c056dc5b Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:36 -0700 Subject: commit-graph: normalize commit-graph filenames When writing commit-graph files, we append path data to an object directory, which may be specified by the user via the '--object-dir' option. If the user supplies a trailing slash, or some other alternative path format, the resulting path may be usable for writing to the correct location. However, when expiring graph files from the /info/commit-graphs directory during a write, we need to compare paths with exact string matches. Normalize the commit-graph filenames to avoid ambiguity. This creates extra allocations, but this is a constant multiple of the number of commit-graph files, which should be a number in the single digits. Further normalize the object directory in the context. Due to a comparison between g->obj_dir and ctx->obj_dir in split_graph_merge_strategy(), a trailing slash would prevent any merging of layers within the same object directory. The check is there to ensure we do not merge across alternates. Update the tests to include a case with this trailing slash problem. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/commit-graph.c b/commit-graph.c index 3599ae6..c91e6f0 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -43,15 +43,23 @@ char *get_commit_graph_filename(const char *obj_dir) { - return xstrfmt("%s/info/commit-graph", obj_dir); + char *filename = xstrfmt("%s/info/commit-graph", obj_dir); + char *normalized = xmalloc(strlen(filename) + 1); + normalize_path_copy(normalized, filename); + free(filename); + return normalized; } static char *get_split_graph_filename(const char *obj_dir, const char *oid_hex) { - return xstrfmt("%s/info/commit-graphs/graph-%s.graph", - obj_dir, - oid_hex); + char *filename = xstrfmt("%s/info/commit-graphs/graph-%s.graph", + obj_dir, + oid_hex); + char *normalized = xmalloc(strlen(filename) + 1); + normalize_path_copy(normalized, filename); + free(filename); + return normalized; } static char *get_chain_filename(const char *obj_dir) @@ -746,7 +754,7 @@ struct packed_oid_list { struct write_commit_graph_context { struct repository *r; - const char *obj_dir; + char *obj_dir; char *graph_name; struct packed_oid_list oids; struct packed_commit_list commits; @@ -1729,7 +1737,6 @@ static void expire_commit_graphs(struct write_commit_graph_context *ctx) if (!found) unlink(path.buf); - } } @@ -1741,6 +1748,7 @@ int write_commit_graph(const char *obj_dir, { struct write_commit_graph_context *ctx; uint32_t i, count_distinct = 0; + size_t len; int res = 0; if (!commit_graph_compatible(the_repository)) @@ -1748,7 +1756,14 @@ int write_commit_graph(const char *obj_dir, ctx = xcalloc(1, sizeof(struct write_commit_graph_context)); ctx->r = the_repository; - ctx->obj_dir = obj_dir; + + /* normalize object dir with no trailing slash */ + ctx->obj_dir = xmallocz(strlen(obj_dir) + 1); + normalize_path_copy(ctx->obj_dir, obj_dir); + len = strlen(ctx->obj_dir); + if (len && ctx->obj_dir[len - 1] == '/') + ctx->obj_dir[len - 1] = 0; + ctx->append = flags & COMMIT_GRAPH_APPEND ? 1 : 0; ctx->report_progress = flags & COMMIT_GRAPH_PROGRESS ? 1 : 0; ctx->split = flags & COMMIT_GRAPH_SPLIT ? 1 : 0; @@ -1856,6 +1871,7 @@ cleanup: free(ctx->graph_name); free(ctx->commits.list); free(ctx->oids.list); + free(ctx->obj_dir); if (ctx->commit_graph_filenames_after) { for (i = 0; i < ctx->num_commit_graphs_after; i++) { diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 130f2ba..fc0d007 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -163,7 +163,12 @@ test_expect_success 'create fork and chain across alternate' ' test_line_count = 1 graph-files && git -c core.commitGraph=true rev-list HEAD >expect && git -c core.commitGraph=false rev-list HEAD >actual && - test_cmp expect actual + test_cmp expect actual && + test_commit 14 && + git commit-graph write --reachable --split --object-dir=.git/objects/ && + test_line_count = 3 $graphdir/commit-graph-chain && + ls $graphdir/graph-*.graph >graph-files && + test_line_count = 1 graph-files ) ' -- cgit v0.10.2-6-g49f6 From 5b15eb397d176c557a82e872d6b4043aa7589874 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 18 Jun 2019 11:14:36 -0700 Subject: commit-graph: test verify across alternates The 'git commit-graph verify' subcommand loads a commit-graph from a given object directory instead of using the standard method prepare_commit_graph(). During development of load_commit_graph_chain(), a version did not include prepare_alt_odb() as it was previously run by prepare_commit_graph() in most cases. Add a test that prevents that mistake from happening again. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index fc0d007..03f45a1 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -237,6 +237,7 @@ corrupt_file() { file=$1 pos=$2 data="${3:-\0}" + chmod a+w "$file" && printf "$data" | dd of="$file" bs=1 seek="$pos" conv=notrunc } @@ -295,6 +296,24 @@ test_expect_success 'verify after commit-graph-chain corruption' ' ) ' +test_expect_success 'verify across alternates' ' + git clone --no-hardlinks . verify-alt && + ( + cd verify-alt && + rm -rf $graphdir && + altdir="$(pwd)/../.git/objects" && + echo "$altdir" >.git/objects/info/alternates && + git commit-graph verify --object-dir="$altdir/" && + test_commit extra && + git commit-graph write --reachable --split && + tip_file=$graphdir/graph-$(tail -n 1 $graphdir/commit-graph-chain).graph && + corrupt_file "$tip_file" 100 "\01" && + test_must_fail git commit-graph verify --shallow 2>test_err && + grep -v "^+" test_err >err && + test_i18ngrep "commit-graph has incorrect fanout value" err + ) +' + test_expect_success 'add octopus merge' ' git reset --hard commits/10 && git merge commits/3 commits/4 && -- cgit v0.10.2-6-g49f6