From 5af7417bd869d935715a56b2ccdee04d3a79a328 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Sat, 19 Jan 2019 21:21:13 +0100 Subject: commit-graph: rename "large edges" to "extra edges" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optional 'Large Edge List' chunk of the commit graph file stores parent information for commits with more than two parents, and the names of most of the macros, variables, struct fields, and functions related to this chunk contain the term "large edges", e.g. write_graph_chunk_large_edges(). However, it's not a really great term, as the edges to the second and subsequent parents stored in this chunk are not any larger than the edges to the first and second parents stored in the "main" 'Commit Data' chunk. It's the number of edges, IOW number of parents, that is larger compared to non-merge and "regular" two-parent merge commits. And indeed, two functions in 'commit-graph.c' have a local variable called 'num_extra_edges' that refer to the same thing, and this "extra edges" term is much better at describing these edges. So let's rename all these references to "large edges" in macro, variable, function, etc. names to "extra edges". There is a GRAPH_OCTOPUS_EDGES_NEEDED macro as well; for the sake of consistency rename it to GRAPH_EXTRA_EDGES_NEEDED. We can do so safely without causing any incompatibility issues, because the term "large edges" doesn't come up in the file format itself in any form (the chunk's magic is {'E', 'D', 'G', 'E'}, there is no 'L' in there), but only in the specification text. The string "large edges", however, does come up in the output of 'git commit-graph read' and in tests looking at its input, but that command is explicitly documented as debugging aid, so we can change its output and the affected tests safely. Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano diff --git a/Documentation/technical/commit-graph-format.txt b/Documentation/technical/commit-graph-format.txt index cc0474b..16452a0 100644 --- a/Documentation/technical/commit-graph-format.txt +++ b/Documentation/technical/commit-graph-format.txt @@ -76,7 +76,7 @@ CHUNK DATA: of the ith commit. Stores value 0x7000000 if no parent in that position. If there are more than two parents, the second value has its most-significant bit on and the other bits store an array - position into the Large Edge List chunk. + position into the Extra Edge List chunk. * The next 8 bytes store the generation number of the commit and the commit time in seconds since EPOCH. The generation number uses the higher 30 bits of the first 4 bytes, while the commit @@ -84,7 +84,7 @@ CHUNK DATA: 2 bits of the lowest byte, storing the 33rd and 34th bit of the commit time. - Large Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional] + Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional] This list of 4-byte values store the second through nth parents for all octopus merges. The second parent value in the commit data stores an array position within this list along with the most-significant bit diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c index c02a3f1..4ae5027 100644 --- a/builtin/commit-graph.c +++ b/builtin/commit-graph.c @@ -110,8 +110,8 @@ static int graph_read(int argc, const char **argv) printf(" oid_lookup"); if (graph->chunk_commit_data) printf(" commit_metadata"); - if (graph->chunk_large_edges) - printf(" large_edges"); + if (graph->chunk_extra_edges) + printf(" extra_edges"); printf("\n"); UNLEAK(graph); diff --git a/commit-graph.c b/commit-graph.c index 981faf0..c5c6ab5 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -21,7 +21,7 @@ #define GRAPH_CHUNKID_OIDFANOUT 0x4f494446 /* "OIDF" */ #define GRAPH_CHUNKID_OIDLOOKUP 0x4f49444c /* "OIDL" */ #define GRAPH_CHUNKID_DATA 0x43444154 /* "CDAT" */ -#define GRAPH_CHUNKID_LARGEEDGES 0x45444745 /* "EDGE" */ +#define GRAPH_CHUNKID_EXTRAEDGES 0x45444745 /* "EDGE" */ #define GRAPH_DATA_WIDTH 36 @@ -33,7 +33,7 @@ #define GRAPH_OID_VERSION GRAPH_OID_VERSION_SHA1 #define GRAPH_OID_LEN GRAPH_OID_LEN_SHA1 -#define GRAPH_OCTOPUS_EDGES_NEEDED 0x80000000 +#define GRAPH_EXTRA_EDGES_NEEDED 0x80000000 #define GRAPH_PARENT_MISSING 0x7fffffff #define GRAPH_EDGE_LAST_MASK 0x7fffffff #define GRAPH_PARENT_NONE 0x70000000 @@ -177,11 +177,11 @@ struct commit_graph *load_commit_graph_one(const char *graph_file) graph->chunk_commit_data = data + chunk_offset; break; - case GRAPH_CHUNKID_LARGEEDGES: - if (graph->chunk_large_edges) + case GRAPH_CHUNKID_EXTRAEDGES: + if (graph->chunk_extra_edges) chunk_repeated = 1; else - graph->chunk_large_edges = data + chunk_offset; + graph->chunk_extra_edges = data + chunk_offset; break; } @@ -343,12 +343,12 @@ static int fill_commit_in_graph(struct commit *item, struct commit_graph *g, uin edge_value = get_be32(commit_data + g->hash_len + 4); if (edge_value == GRAPH_PARENT_NONE) return 1; - if (!(edge_value & GRAPH_OCTOPUS_EDGES_NEEDED)) { + if (!(edge_value & GRAPH_EXTRA_EDGES_NEEDED)) { pptr = insert_parent_or_die(g, edge_value, pptr); return 1; } - parent_data_ptr = (uint32_t*)(g->chunk_large_edges + + parent_data_ptr = (uint32_t*)(g->chunk_extra_edges + 4 * (uint64_t)(edge_value & GRAPH_EDGE_LAST_MASK)); do { edge_value = get_be32(parent_data_ptr); @@ -504,7 +504,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, if (!parent) edge_value = GRAPH_PARENT_NONE; else if (parent->next) - edge_value = GRAPH_OCTOPUS_EDGES_NEEDED | num_extra_edges; + edge_value = GRAPH_EXTRA_EDGES_NEEDED | num_extra_edges; else { edge_value = sha1_pos(parent->item->object.oid.hash, commits, @@ -516,7 +516,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, hashwrite_be32(f, edge_value); - if (edge_value & GRAPH_OCTOPUS_EDGES_NEEDED) { + if (edge_value & GRAPH_EXTRA_EDGES_NEEDED) { do { num_extra_edges++; parent = parent->next; @@ -537,7 +537,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, } } -static void write_graph_chunk_large_edges(struct hashfile *f, +static void write_graph_chunk_extra_edges(struct hashfile *f, struct commit **commits, int nr_commits) { @@ -923,7 +923,7 @@ void write_commit_graph(const char *obj_dir, chunk_ids[1] = GRAPH_CHUNKID_OIDLOOKUP; chunk_ids[2] = GRAPH_CHUNKID_DATA; if (num_extra_edges) - chunk_ids[3] = GRAPH_CHUNKID_LARGEEDGES; + chunk_ids[3] = GRAPH_CHUNKID_EXTRAEDGES; else chunk_ids[3] = 0; chunk_ids[4] = 0; @@ -946,7 +946,7 @@ void write_commit_graph(const char *obj_dir, write_graph_chunk_fanout(f, commits.list, commits.nr); write_graph_chunk_oids(f, GRAPH_OID_LEN, commits.list, commits.nr); write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.nr); - write_graph_chunk_large_edges(f, commits.list, commits.nr); + write_graph_chunk_extra_edges(f, commits.list, commits.nr); close_commit_graph(the_repository); finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC); diff --git a/commit-graph.h b/commit-graph.h index 9db40b4..e6aff2c 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -49,7 +49,7 @@ struct commit_graph { const uint32_t *chunk_oid_fanout; const unsigned char *chunk_oid_lookup; const unsigned char *chunk_commit_data; - const unsigned char *chunk_large_edges; + const unsigned char *chunk_extra_edges; }; struct commit_graph *load_commit_graph_one(const char *graph_file); diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index 5fe21db..f4deb13 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -122,7 +122,7 @@ test_expect_success 'write graph with merges' ' cd "$TRASH_DIRECTORY/full" && git commit-graph write && test_path_is_file $objdir/info/commit-graph && - graph_read_expect "10" "large_edges" + graph_read_expect "10" "extra_edges" ' graph_git_behavior 'merge 1 vs 2' full merge/1 merge/2 @@ -157,7 +157,7 @@ test_expect_success 'write graph with new commit' ' cd "$TRASH_DIRECTORY/full" && git commit-graph write && test_path_is_file $objdir/info/commit-graph && - graph_read_expect "11" "large_edges" + graph_read_expect "11" "extra_edges" ' graph_git_behavior 'full graph, commit 8 vs merge 1' full commits/8 merge/1 @@ -167,7 +167,7 @@ test_expect_success 'write graph with nothing new' ' cd "$TRASH_DIRECTORY/full" && git commit-graph write && test_path_is_file $objdir/info/commit-graph && - graph_read_expect "11" "large_edges" + graph_read_expect "11" "extra_edges" ' graph_git_behavior 'cleared graph, commit 8 vs merge 1' full commits/8 merge/1 @@ -177,7 +177,7 @@ test_expect_success 'build graph from latest pack with closure' ' cd "$TRASH_DIRECTORY/full" && cat new-idx | git commit-graph write --stdin-packs && test_path_is_file $objdir/info/commit-graph && - graph_read_expect "9" "large_edges" + graph_read_expect "9" "extra_edges" ' graph_git_behavior 'graph from pack, commit 8 vs merge 1' full commits/8 merge/1 @@ -200,7 +200,7 @@ test_expect_success 'build graph from commits with append' ' cd "$TRASH_DIRECTORY/full" && git rev-parse merge/3 | git commit-graph write --stdin-commits --append && test_path_is_file $objdir/info/commit-graph && - graph_read_expect "10" "large_edges" + graph_read_expect "10" "extra_edges" ' graph_git_behavior 'append graph, commit 8 vs merge 1' full commits/8 merge/1 @@ -210,7 +210,7 @@ test_expect_success 'build graph using --reachable' ' cd "$TRASH_DIRECTORY/full" && git commit-graph write --reachable && test_path_is_file $objdir/info/commit-graph && - graph_read_expect "11" "large_edges" + graph_read_expect "11" "extra_edges" ' graph_git_behavior 'append graph, commit 8 vs merge 1' full commits/8 merge/1 @@ -231,7 +231,7 @@ test_expect_success 'write graph in bare repo' ' cd "$TRASH_DIRECTORY/bare" && git commit-graph write && test_path_is_file $baredir/info/commit-graph && - graph_read_expect "11" "large_edges" + graph_read_expect "11" "extra_edges" ' graph_git_behavior 'bare repo with graph, commit 8 vs merge 1' bare commits/8 merge/1 -- cgit v0.10.2-6-g49f6 From 857ba928a4125251f2b4a728ff8021d2adcdbd91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?SZEDER=20G=C3=A1bor?= Date: Wed, 23 Jan 2019 18:51:22 +0100 Subject: commit-graph: don't call write_graph_chunk_extra_edges() unnecessarily MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optional 'Extra Edge List' chunk of the commit graph file stores parent information for commits with more than two parents. Since the chunk is optional, write_commit_graph() looks through all commits to find those with more than two parents, and then writes the commit graph file header accordingly, i.e. if there are no such commits, then there won't be a 'Extra Edge List' chunk written, only the three mandatory chunks. However, when it later comes to writing actual chunk data, write_commit_graph() unconditionally invokes write_graph_chunk_extra_edges(), even when it was decided earlier that that chunk won't be written. Strictly speaking there is no bug here, because write_graph_chunk_extra_edges() won't write anything if it doesn't find any commits with more than two parents, but then it unnecessarily and in vain looks through all commits once again in search for such commits. Don't call write_graph_chunk_extra_edges() when that chunk won't be written to spare an unnecessary iteration over all commits. Signed-off-by: SZEDER Gábor Signed-off-by: Junio C Hamano diff --git a/commit-graph.c b/commit-graph.c index c5c6ab5..e733ba1 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -946,7 +946,8 @@ void write_commit_graph(const char *obj_dir, write_graph_chunk_fanout(f, commits.list, commits.nr); write_graph_chunk_oids(f, GRAPH_OID_LEN, commits.list, commits.nr); write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.nr); - write_graph_chunk_extra_edges(f, commits.list, commits.nr); + if (num_extra_edges) + write_graph_chunk_extra_edges(f, commits.list, commits.nr); close_commit_graph(the_repository); finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC); -- cgit v0.10.2-6-g49f6 From 53035c4f0b06c7e556ab35acb2ce3aff1ba3ff5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 19 Jan 2019 21:21:15 +0100 Subject: commit-graph write: add "Writing out" progress output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add progress output to be shown when we're writing out the commit-graph, this adds to the output already added in 7b0f229222 ("commit-graph write: add progress output", 2018-09-17). As noted in that commit most of the progress output isn't displayed on small repositories, but before this change we'd noticeably hang for 2-3 seconds at the end on medium sized repositories such as linux.git. Now we'll instead show output like this, and reduce the human-observable times at which we're not producing progress output: $ ~/g/git/git --exec-path=$HOME/g/git -C ~/g/2015-04-03-1M-git commit-graph write Finding commits for commit graph: 13064614, done. Expanding reachable commits in commit graph: 1000447, done. Computing commit graph generation numbers: 100% (1000447/1000447), done. Writing out commit graph: 100% (3001341/3001341), done. This "Writing out" number is 3x or 4x the number of commits, depending on the graph we're processing. A later change will make this explicit to the user. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano diff --git a/commit-graph.c b/commit-graph.c index e733ba1..a407d5b 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -430,7 +430,9 @@ struct tree *get_commit_tree_in_graph(struct repository *r, const struct commit static void write_graph_chunk_fanout(struct hashfile *f, struct commit **commits, - int nr_commits) + int nr_commits, + struct progress *progress, + uint64_t *progress_cnt) { int i, count = 0; struct commit **list = commits; @@ -444,6 +446,7 @@ static void write_graph_chunk_fanout(struct hashfile *f, while (count < nr_commits) { if ((*list)->object.oid.hash[0] != i) break; + display_progress(progress, ++*progress_cnt); count++; list++; } @@ -453,12 +456,16 @@ static void write_graph_chunk_fanout(struct hashfile *f, } static void write_graph_chunk_oids(struct hashfile *f, int hash_len, - struct commit **commits, int nr_commits) + struct commit **commits, int nr_commits, + struct progress *progress, + uint64_t *progress_cnt) { struct commit **list = commits; int count; - for (count = 0; count < nr_commits; count++, list++) + for (count = 0; count < nr_commits; count++, list++) { + display_progress(progress, ++*progress_cnt); hashwrite(f, (*list)->object.oid.hash, (int)hash_len); + } } static const unsigned char *commit_to_sha1(size_t index, void *table) @@ -468,7 +475,9 @@ static const unsigned char *commit_to_sha1(size_t index, void *table) } static void write_graph_chunk_data(struct hashfile *f, int hash_len, - struct commit **commits, int nr_commits) + struct commit **commits, int nr_commits, + struct progress *progress, + uint64_t *progress_cnt) { struct commit **list = commits; struct commit **last = commits + nr_commits; @@ -478,6 +487,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, struct commit_list *parent; int edge_value; uint32_t packedDate[2]; + display_progress(progress, ++*progress_cnt); parse_commit(*list); hashwrite(f, get_commit_tree_oid(*list)->hash, hash_len); @@ -539,7 +549,9 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, static void write_graph_chunk_extra_edges(struct hashfile *f, struct commit **commits, - int nr_commits) + int nr_commits, + struct progress *progress, + uint64_t *progress_cnt) { struct commit **list = commits; struct commit **last = commits + nr_commits; @@ -547,6 +559,9 @@ static void write_graph_chunk_extra_edges(struct hashfile *f, while (list < last) { int num_parents = 0; + + display_progress(progress, ++*progress_cnt); + for (parent = (*list)->parents; num_parents < 3 && parent; parent = parent->next) num_parents++; @@ -768,6 +783,7 @@ void write_commit_graph(const char *obj_dir, int num_extra_edges; struct commit_list *parent; struct progress *progress = NULL; + uint64_t progress_cnt = 0; if (!commit_graph_compatible(the_repository)) return; @@ -943,11 +959,16 @@ void write_commit_graph(const char *obj_dir, hashwrite(f, chunk_write, 12); } - write_graph_chunk_fanout(f, commits.list, commits.nr); - write_graph_chunk_oids(f, GRAPH_OID_LEN, commits.list, commits.nr); - write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.nr); + if (report_progress) + progress = start_delayed_progress( + _("Writing out commit graph"), + num_chunks * commits.nr); + write_graph_chunk_fanout(f, commits.list, commits.nr, progress, &progress_cnt); + write_graph_chunk_oids(f, GRAPH_OID_LEN, commits.list, commits.nr, progress, &progress_cnt); + write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.nr, progress, &progress_cnt); if (num_extra_edges) - write_graph_chunk_extra_edges(f, commits.list, commits.nr); + write_graph_chunk_extra_edges(f, commits.list, commits.nr, progress, &progress_cnt); + stop_progress(&progress); close_commit_graph(the_repository); finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC); -- cgit v0.10.2-6-g49f6 From 289447397c311d7f8b3c7ed2e54e11b6e57a1d89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 19 Jan 2019 21:21:16 +0100 Subject: commit-graph write: more descriptive "writing out" output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the "Writing out" part of the progress output more descriptive. Depending on the shape of the graph we either make 3 or 4 passes over it. Let's present this information to the user in case they're wondering what this number, which is much larger than their number of commits, has to do with writing out the commit graph. Now e.g. on linux.git we emit: $ ~/g/git/git --exec-path=$HOME/g/git -C ~/g/linux commit-graph write Finding commits for commit graph: 6529159, done. Expanding reachable commits in commit graph: 815990, done. Computing commit graph generation numbers: 100% (815983/815983), done. Writing out commit graph in 4 passes: 100% (3263932/3263932), done. A note on i18n: Why are we using the Q_() function and passing a number & English text for a singular which'll never be used? Because the plural rules of translated languages may not match those of English, and to use the plural function we need to use this format. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano diff --git a/commit-graph.c b/commit-graph.c index a407d5b..7c639c6 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -784,6 +784,7 @@ void write_commit_graph(const char *obj_dir, struct commit_list *parent; struct progress *progress = NULL; uint64_t progress_cnt = 0; + struct strbuf progress_title = STRBUF_INIT; if (!commit_graph_compatible(the_repository)) return; @@ -959,16 +960,23 @@ void write_commit_graph(const char *obj_dir, hashwrite(f, chunk_write, 12); } - if (report_progress) + if (report_progress) { + strbuf_addf(&progress_title, + Q_("Writing out commit graph in %d pass", + "Writing out commit graph in %d passes", + num_chunks), + num_chunks); progress = start_delayed_progress( - _("Writing out commit graph"), + progress_title.buf, num_chunks * commits.nr); + } write_graph_chunk_fanout(f, commits.list, commits.nr, progress, &progress_cnt); write_graph_chunk_oids(f, GRAPH_OID_LEN, commits.list, commits.nr, progress, &progress_cnt); write_graph_chunk_data(f, GRAPH_OID_LEN, commits.list, commits.nr, progress, &progress_cnt); if (num_extra_edges) write_graph_chunk_extra_edges(f, commits.list, commits.nr, progress, &progress_cnt); stop_progress(&progress); + strbuf_release(&progress_title); close_commit_graph(the_repository); finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC); -- cgit v0.10.2-6-g49f6 From d9b1b309cfc0ebbe5ac689e1131f8c85b025c8c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 19 Jan 2019 21:21:17 +0100 Subject: commit-graph write: show progress for object search MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Show the percentage progress for the "Finding commits for commit graph" phase for the common case where we're operating on all packs in the repository, as "commit-graph write" or "gc" will do. Before we'd emit on e.g. linux.git with "commit-graph write": Finding commits for commit graph: 6529159, done. [...] And now: Finding commits for commit graph: 100% (6529159/6529159), done. [...] Since the commit graph only includes those commits that are packed (via for_each_packed_object(...)) the approximate_object_count() returns the actual number of objects we're going to process. Still, it is possible due to a race with "gc" or another process maintaining packs that the number of objects we're going to process is lower than what approximate_object_count() reported. In that case we don't want to stop the progress bar short of 100%. So let's make sure it snaps to 100% at the end. The inverse case is also possible and more likely. I.e. that a new pack has been added between approximate_object_count() and for_each_packed_object(). In that case the percentage will go beyond 100%, and we'll do nothing to snap it back to 100% at the end. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano diff --git a/commit-graph.c b/commit-graph.c index 7c639c6..3124dfa 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -785,12 +785,14 @@ void write_commit_graph(const char *obj_dir, struct progress *progress = NULL; uint64_t progress_cnt = 0; struct strbuf progress_title = STRBUF_INIT; + unsigned long approx_nr_objects; if (!commit_graph_compatible(the_repository)) return; oids.nr = 0; - oids.alloc = approximate_object_count() / 32; + approx_nr_objects = approximate_object_count(); + oids.alloc = approx_nr_objects / 32; oids.progress = NULL; oids.progress_done = 0; @@ -871,9 +873,12 @@ void write_commit_graph(const char *obj_dir, if (!pack_indexes && !commit_hex) { if (report_progress) oids.progress = start_delayed_progress( - _("Finding commits for commit graph"), 0); + _("Finding commits for commit graph"), + approx_nr_objects); for_each_packed_object(add_packed_commits, &oids, FOR_EACH_OBJECT_PACK_ORDER); + if (oids.progress_done < approx_nr_objects) + display_progress(oids.progress, approx_nr_objects); stop_progress(&oids.progress); } -- cgit v0.10.2-6-g49f6 From 7c7b8a7fc7c87011d5b3e384122ce8b23ef280e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 19 Jan 2019 21:21:18 +0100 Subject: commit-graph write: add more descriptive progress output MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make the progress output shown when we're searching for commits to include in the graph more descriptive. This amends code I added in 7b0f229222 ("commit-graph write: add progress output", 2018-09-17). Now, on linux.git, we'll emit this sort of output in the various modes we support: $ git commit-graph write Finding commits for commit graph among packed objects: 100% (6529159/6529159), done. [...] # Actually we don't emit this since this takes almost no time at # all. But if we did (s/_delayed//) we'd show: $ git for-each-ref --format='%(objectname)' | git commit-graph write --stdin-commits Finding commits for commit graph from 630 refs: 100% (630/630), done. [...] $ (cd .git/objects/pack/ && ls *idx) | git commit-graph write --stdin-pack Finding commits for commit graph in 3 packs: 6529159, done. [...] The middle on of those is going to be the output users might see in practice, since it'll be emitted when they get the commit graph via gc.writeCommitGraph=true. But as noted above you need a really large number of refs for this message to show. It'll show up on a test repository I have with ~165k refs: Finding commits for commit graph from 165203 refs: 100% (165203/165203), done. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano diff --git a/commit-graph.c b/commit-graph.c index 3124dfa..936119b 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -822,8 +822,12 @@ void write_commit_graph(const char *obj_dir, strbuf_addf(&packname, "%s/pack/", obj_dir); dirlen = packname.len; if (report_progress) { - oids.progress = start_delayed_progress( - _("Finding commits for commit graph"), 0); + strbuf_addf(&progress_title, + Q_("Finding commits for commit graph in %d pack", + "Finding commits for commit graph in %d packs", + pack_indexes->nr), + pack_indexes->nr); + oids.progress = start_delayed_progress(progress_title.buf, 0); oids.progress_done = 0; } for (i = 0; i < pack_indexes->nr; i++) { @@ -841,14 +845,20 @@ void write_commit_graph(const char *obj_dir, free(p); } stop_progress(&oids.progress); + strbuf_reset(&progress_title); strbuf_release(&packname); } if (commit_hex) { - if (report_progress) - progress = start_delayed_progress( - _("Finding commits for commit graph"), - commit_hex->nr); + if (report_progress) { + strbuf_addf(&progress_title, + Q_("Finding commits for commit graph from %d ref", + "Finding commits for commit graph from %d refs", + commit_hex->nr), + commit_hex->nr); + progress = start_delayed_progress(progress_title.buf, + commit_hex->nr); + } for (i = 0; i < commit_hex->nr; i++) { const char *end; struct object_id oid; @@ -868,12 +878,13 @@ void write_commit_graph(const char *obj_dir, } } stop_progress(&progress); + strbuf_reset(&progress_title); } if (!pack_indexes && !commit_hex) { if (report_progress) oids.progress = start_delayed_progress( - _("Finding commits for commit graph"), + _("Finding commits for commit graph among packed objects"), approx_nr_objects); for_each_packed_object(add_packed_commits, &oids, FOR_EACH_OBJECT_PACK_ORDER); -- cgit v0.10.2-6-g49f6 From e59c615e3c91c481587f9a13d05886082f518cec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 19 Jan 2019 21:21:19 +0100 Subject: commit-graph write: remove empty line for readability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the empty line between a QSORT(...) and the subsequent oideq() for-loop. This makes it clearer that the QSORT(...) is being done so that we can run the oideq() loop on adjacent OIDs. Amends code added in 08fd81c9b6 ("commit-graph: implement write_commit_graph()", 2018-04-02). Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano diff --git a/commit-graph.c b/commit-graph.c index 936119b..d4a7280 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -896,7 +896,6 @@ void write_commit_graph(const char *obj_dir, close_reachable(&oids, report_progress); QSORT(oids.list, oids.nr, commit_compare); - count_distinct = 1; for (i = 1; i < oids.nr; i++) { if (!oideq(&oids.list[i - 1], &oids.list[i])) -- cgit v0.10.2-6-g49f6 From 890226ccb57d6f9657c29aadfe1c106b939afbc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 19 Jan 2019 21:21:20 +0100 Subject: commit-graph write: add itermediate progress MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add progress output to sections of code between "Annotating[...]" and "Computing[...]generation numbers". This can collectively take 5-10 seconds on a large enough repository. On a test repository with I have with ~7 million commits and ~50 million objects we'll now emit: $ ~/g/git/git --exec-path=$HOME/g/git commit-graph write Finding commits for commit graph among packed objects: 100% (124763727/124763727), done. Loading known commits in commit graph: 100% (18989461/18989461), done. Expanding reachable commits in commit graph: 100% (18989507/18989461), done. Clearing commit marks in commit graph: 100% (18989507/18989507), done. Counting distinct commits in commit graph: 100% (18989507/18989507), done. Finding extra edges in commit graph: 100% (18989507/18989507), done. Computing commit graph generation numbers: 100% (7250302/7250302), done. Writing out commit graph in 4 passes: 100% (29001208/29001208), done. Whereas on a medium-sized repository such as linux.git these new progress bars won't have time to kick in and as before and we'll still emit output like: $ ~/g/git/git --exec-path=$HOME/g/git commit-graph write Finding commits for commit graph among packed objects: 100% (6529159/6529159), done. Expanding reachable commits in commit graph: 815990, done. Computing commit graph generation numbers: 100% (815983/815983), done. Writing out commit graph in 4 passes: 100% (3263932/3263932), done. The "Counting distinct commits in commit graph" phase will spend most of its time paused at "0/*" as we QSORT(...) the list. That's not optimal, but at least we don't seem to be stalling anymore most of the time. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano diff --git a/commit-graph.c b/commit-graph.c index d4a7280..889cdef 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -895,12 +895,19 @@ void write_commit_graph(const char *obj_dir, close_reachable(&oids, report_progress); + if (report_progress) + progress = start_delayed_progress( + _("Counting distinct commits in commit graph"), + oids.nr); + display_progress(progress, 0); /* TODO: Measure QSORT() progress */ QSORT(oids.list, oids.nr, commit_compare); count_distinct = 1; for (i = 1; i < oids.nr; i++) { + display_progress(progress, i + 1); if (!oideq(&oids.list[i - 1], &oids.list[i])) count_distinct++; } + stop_progress(&progress); if (count_distinct >= GRAPH_PARENT_MISSING) die(_("the commit graph format cannot write %d commits"), count_distinct); @@ -910,8 +917,13 @@ void write_commit_graph(const char *obj_dir, ALLOC_ARRAY(commits.list, commits.alloc); num_extra_edges = 0; + if (report_progress) + progress = start_delayed_progress( + _("Finding extra edges in commit graph"), + oids.nr); for (i = 0; i < oids.nr; i++) { int num_parents = 0; + display_progress(progress, i + 1); if (i > 0 && oideq(&oids.list[i - 1], &oids.list[i])) continue; @@ -928,6 +940,7 @@ void write_commit_graph(const char *obj_dir, commits.nr++; } num_chunks = num_extra_edges ? 4 : 3; + stop_progress(&progress); if (commits.nr >= GRAPH_PARENT_MISSING) die(_("too many commits to write graph")); -- cgit v0.10.2-6-g49f6 From 49bbc57a5728880bcf2c4a02289508f7d923e32a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Sat, 19 Jan 2019 21:21:21 +0100 Subject: commit-graph write: emit a percentage for all progress MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up 01ca387774 ("commit-graph: split up close_reachable() progress output", 2018-11-19) by making the progress bars in close_reachable() report a completion percentage. This fixes the last occurrence where in the commit graph writing where we didn't report that. The change in 01ca387774 split up the 1x progress bar in close_reachable() into 3x, but left them as dumb counters without a percentage completion. Fixing that is easy, and the only reason it wasn't done already is because that commit was rushed in during the v2.20.0 RC period to fix the unrelated issue of over-reporting commit numbers. See [1] and follow-ups for ML activity at the time and [2] for an alternative approach where the progress bars weren't split up. Now for e.g. linux.git we'll emit: $ ~/g/git/git --exec-path=$HOME/g/git commit-graph write Finding commits for commit graph among packed objects: 100% (6529159/6529159), done. Expanding reachable commits in commit graph: 100% (815990/815980), done. Computing commit graph generation numbers: 100% (815983/815983), done. Writing out commit graph in 4 passes: 100% (3263932/3263932), done. 1. https://public-inbox.org/git/20181119202300.18670-1-avarab@gmail.com/ 2. https://public-inbox.org/git/20181122153922.16912-11-avarab@gmail.com/ Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano diff --git a/commit-graph.c b/commit-graph.c index 889cdef..017225c 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -653,15 +653,15 @@ static void add_missing_parents(struct packed_oid_list *oids, struct commit *com static void close_reachable(struct packed_oid_list *oids, int report_progress) { - int i, j; + int i; struct commit *commit; struct progress *progress = NULL; if (report_progress) progress = start_delayed_progress( - _("Loading known commits in commit graph"), j = 0); + _("Loading known commits in commit graph"), oids->nr); for (i = 0; i < oids->nr; i++) { - display_progress(progress, ++j); + display_progress(progress, i + 1); commit = lookup_commit(the_repository, &oids->list[i]); if (commit) commit->object.flags |= UNINTERESTING; @@ -675,9 +675,9 @@ static void close_reachable(struct packed_oid_list *oids, int report_progress) */ if (report_progress) progress = start_delayed_progress( - _("Expanding reachable commits in commit graph"), j = 0); + _("Expanding reachable commits in commit graph"), oids->nr); for (i = 0; i < oids->nr; i++) { - display_progress(progress, ++j); + display_progress(progress, i + 1); commit = lookup_commit(the_repository, &oids->list[i]); if (commit && !parse_commit(commit)) @@ -687,9 +687,9 @@ static void close_reachable(struct packed_oid_list *oids, int report_progress) if (report_progress) progress = start_delayed_progress( - _("Clearing commit marks in commit graph"), j = 0); + _("Clearing commit marks in commit graph"), oids->nr); for (i = 0; i < oids->nr; i++) { - display_progress(progress, ++j); + display_progress(progress, i + 1); commit = lookup_commit(the_repository, &oids->list[i]); if (commit) -- cgit v0.10.2-6-g49f6