From f26eef302fc315394d1016eb06360637ac86f62e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 15 Jul 2016 06:26:29 -0400 Subject: check_everything_connected: always pass --quiet to rev-list The check_everything_connected function takes a "quiet" parameter which does two things if non-zero: 1. redirect rev-list's stderr to /dev/null to avoid showing errors to the user 2. pass "--quiet" to rev-list Item (1) is obviously useful. But item (2) is surprisingly not. For rev-list, "--quiet" does not have anything to do with chattiness on stderr; it tells rev-list not to bother writing the list of traversed objects to stdout, for efficiency. And since we always redirect rev-list's stdout to /dev/null in this function, there is no point in asking it to ever write anything to stdout. The efficiency gains are modest; a best-of-five run of "git rev-list --objects --all" on linux.git dropped from 32.013s to 30.502s when adding "--quiet". That's only about 5%, but given how easy it is, it's worth doing. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/connected.c b/connected.c index bf1b12e..7560a31 100644 --- a/connected.c +++ b/connected.c @@ -56,8 +56,7 @@ static int check_everything_connected_real(sha1_iterate_fn fn, argv[ac++] = "--stdin"; argv[ac++] = "--not"; argv[ac++] = "--all"; - if (quiet) - argv[ac++] = "--quiet"; + argv[ac++] = "--quiet"; argv[ac] = NULL; rev_list.argv = argv; -- cgit v0.10.2-6-g49f6 From 434ea3cdadf8c7592ba167ce122d3984e984a158 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 20 Jul 2016 07:28:09 -0600 Subject: rev-list: add optional progress reporting It's easy to ask rev-list to do a traversal that may takes many seconds (e.g., by calling "--objects --all"). In theory you can monitor its progress by the output you get to stdout, but this isn't always easy. Some operations, like "--count", don't make any output until the end. And some callers, like check_everything_connected(), are using it just for the error-checking of the traversal, and throw away stdout entirely. This patch adds a "--progress" option which can be used to give some eye-candy for a user waiting for a long traversal. This is just a rev-list option and not a regular traversal option, because it needs cooperation from the callbacks in builtin/rev-list.c to do the actual count. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index c5bd218..f39cb6d 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -274,6 +274,10 @@ ifdef::git-rev-list[] Try to speed up the traversal using the pack bitmap index (if one is available). Note that when traversing with `--objects`, trees and blobs will not have their associated path printed. + +--progress=
:: + Show progress reports on stderr as objects are considered. The + `
` text will be printed with each progress update. endif::git-rev-list[] -- diff --git a/builtin/rev-list.c b/builtin/rev-list.c index b82bcc3..0ba82b1 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -9,6 +9,7 @@ #include "log-tree.h" #include "graph.h" #include "bisect.h" +#include "progress.h" static const char rev_list_usage[] = "git rev-list [OPTION] ... [ -- paths... ]\n" @@ -49,12 +50,17 @@ static const char rev_list_usage[] = " --bisect-all" ; +static struct progress *progress; +static unsigned progress_counter; + static void finish_commit(struct commit *commit, void *data); static void show_commit(struct commit *commit, void *data) { struct rev_list_info *info = data; struct rev_info *revs = info->revs; + display_progress(progress, ++progress_counter); + if (info->flags & REV_LIST_QUIET) { finish_commit(commit, data); return; @@ -190,6 +196,7 @@ static void show_object(struct object *obj, const char *name, void *cb_data) { struct rev_list_info *info = cb_data; finish_object(obj, name, cb_data); + display_progress(progress, ++progress_counter); if (info->flags & REV_LIST_QUIET) return; show_object_with_name(stdout, obj, name); @@ -276,6 +283,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) int bisect_show_vars = 0; int bisect_find_all = 0; int use_bitmap_index = 0; + const char *show_progress = NULL; git_config(git_default_config, NULL); init_revisions(&revs, prefix); @@ -325,6 +333,10 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) test_bitmap_walk(&revs); return 0; } + if (skip_prefix(arg, "--progress=", &arg)) { + show_progress = arg; + continue; + } usage(rev_list_usage); } @@ -355,6 +367,9 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) if (bisect_list) revs.limited = 1; + if (show_progress) + progress = start_progress_delay(show_progress, 0, 0, 2); + if (use_bitmap_index && !revs.prune) { if (revs.count && !revs.left_right && !revs.cherry_mark) { uint32_t commit_count; @@ -392,6 +407,8 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) traverse_commit_list(&revs, show_commit, show_object, &info); + stop_progress(&progress); + if (revs.count) { if (revs.left_right && revs.cherry_mark) printf("%d\t%d\t%d\n", revs.count_left, revs.count_right, revs.count_same); -- cgit v0.10.2-6-g49f6 From 3be89f9b86cb3891a7865ad004230a50977e3d8c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 15 Jul 2016 06:28:32 -0400 Subject: check_everything_connected: convert to argv_array This avoids the magic "9" array-size which we must avoid overflowing, making further patches simpler. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/connected.c b/connected.c index 7560a31..a3bfc4e 100644 --- a/connected.c +++ b/connected.c @@ -26,10 +26,9 @@ static int check_everything_connected_real(sha1_iterate_fn fn, const char *shallow_file) { struct child_process rev_list = CHILD_PROCESS_INIT; - const char *argv[9]; char commit[41]; unsigned char sha1[20]; - int err = 0, ac = 0; + int err = 0; struct packed_git *new_pack = NULL; size_t base_len; @@ -48,18 +47,16 @@ static int check_everything_connected_real(sha1_iterate_fn fn, } if (shallow_file) { - argv[ac++] = "--shallow-file"; - argv[ac++] = shallow_file; + argv_array_push(&rev_list.args, "--shallow-file"); + argv_array_push(&rev_list.args, shallow_file); } - argv[ac++] = "rev-list"; - argv[ac++] = "--objects"; - argv[ac++] = "--stdin"; - argv[ac++] = "--not"; - argv[ac++] = "--all"; - argv[ac++] = "--quiet"; - argv[ac] = NULL; + argv_array_push(&rev_list.args,"rev-list"); + argv_array_push(&rev_list.args, "--objects"); + argv_array_push(&rev_list.args, "--stdin"); + argv_array_push(&rev_list.args, "--not"); + argv_array_push(&rev_list.args, "--all"); + argv_array_push(&rev_list.args, "--quiet"); - rev_list.argv = argv; rev_list.git_cmd = 1; rev_list.in = -1; rev_list.no_stdout = 1; -- cgit v0.10.2-6-g49f6 From 7043c7071cdbdef60ce8413f425da622b8e6dc85 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 15 Jul 2016 06:30:40 -0400 Subject: check_everything_connected: use a struct with named options The number of variants of check_everything_connected has grown over the years, so that the "real" function takes several possibly-zero, possibly-NULL arguments. We hid the complexity behind some wrapper functions, but this doesn't scale well when we want to add new options. If we add more wrapper variants to handle the new options, then we can get a combinatorial explosion when those options might be used together (right now nobody wants to use both "shallow" and "transport" together, so we get by with just a few wrappers). If instead we add new parameters to each function, each of which can have a default value, then callers who want the defaults end up with confusing invocations like: check_everything_connected(fn, 0, data, -1, 0, NULL); where it is unclear which parameter is which (and every caller needs updated when we add new options). Instead, let's add a struct to hold all of the optional parameters. This is a little more verbose for the callers (who have to declare the struct and fill it in), but it makes their code much easier to follow, because every option is named as it is set (and unused options do not have to be mentioned at all). Note that we could also stick the iteration function and its callback data into the option struct, too. But since those are required for each call, by avoiding doing so, we can let very simple callers just pass "NULL" for the options and not worry about the struct at all. While we're touching each site, let's also rename the function to check_connected(). The existing name was quite long, and not all of the wrappers even used the full name. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/clone.c b/builtin/clone.c index 31ea247..32fe606 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -624,10 +624,13 @@ static void update_remote_refs(const struct ref *refs, const struct ref *rm = mapped_refs; if (check_connectivity) { + struct check_connected_options opt = CHECK_CONNECTED_INIT; + + opt.transport = transport; + if (transport->progress) fprintf(stderr, _("Checking connectivity... ")); - if (check_everything_connected_with_transport(iterate_ref_map, - 0, &rm, transport)) + if (check_connected(iterate_ref_map, &rm, &opt)) die(_("remote did not send all necessary objects")); if (transport->progress) fprintf(stderr, _("done.\n")); diff --git a/builtin/fetch.c b/builtin/fetch.c index d9ea6f3..75ee480 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -729,7 +729,7 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, url = xstrdup("foreign"); rm = ref_map; - if (check_everything_connected(iterate_ref_map, 0, &rm)) { + if (check_connected(iterate_ref_map, &rm, NULL)) { rc = error(_("%s did not send all necessary objects\n"), url); goto abort; } @@ -866,6 +866,7 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, static int quickfetch(struct ref *ref_map) { struct ref *rm = ref_map; + struct check_connected_options opt = CHECK_CONNECTED_INIT; /* * If we are deepening a shallow clone we already have these @@ -876,7 +877,8 @@ static int quickfetch(struct ref *ref_map) */ if (depth) return -1; - return check_everything_connected(iterate_ref_map, 1, &rm); + opt.quiet = 1; + return check_connected(iterate_ref_map, &rm, &opt); } static int fetch_refs(struct transport *transport, struct ref *ref_map) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 15c323a..ce81920 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -737,7 +737,7 @@ static int update_shallow_ref(struct command *cmd, struct shallow_info *si) { static struct lock_file shallow_lock; struct sha1_array extra = SHA1_ARRAY_INIT; - const char *alt_file; + struct check_connected_options opt = CHECK_CONNECTED_INIT; uint32_t mask = 1 << (cmd->index % 32); int i; @@ -749,9 +749,8 @@ static int update_shallow_ref(struct command *cmd, struct shallow_info *si) !delayed_reachability_test(si, i)) sha1_array_append(&extra, si->shallow->sha1[i]); - setup_alternate_shallow(&shallow_lock, &alt_file, &extra); - if (check_shallow_connected(command_singleton_iterator, - 0, cmd, alt_file)) { + setup_alternate_shallow(&shallow_lock, &opt.shallow_file, &extra); + if (check_connected(command_singleton_iterator, cmd, &opt)) { rollback_lock_file(&shallow_lock); sha1_array_clear(&extra); return -1; @@ -1160,8 +1159,8 @@ static void set_connectivity_errors(struct command *commands, if (shallow_update && si->shallow_ref[cmd->index]) /* to be checked in update_shallow_ref() */ continue; - if (!check_everything_connected(command_singleton_iterator, - 0, &singleton)) + if (!check_connected(command_singleton_iterator, &singleton, + NULL)) continue; cmd->error_string = "missing necessary objects"; } @@ -1330,7 +1329,7 @@ static void execute_commands(struct command *commands, data.cmds = commands; data.si = si; - if (check_everything_connected(iterate_receive_command_list, 0, &data)) + if (check_connected(iterate_receive_command_list, &data, NULL)) set_connectivity_errors(commands, si); reject_updates_to_hidden(commands); diff --git a/connected.c b/connected.c index a3bfc4e..2a51eac 100644 --- a/connected.c +++ b/connected.c @@ -4,10 +4,6 @@ #include "connected.h" #include "transport.h" -int check_everything_connected(sha1_iterate_fn fn, int quiet, void *cb_data) -{ - return check_everything_connected_with_transport(fn, quiet, cb_data, NULL); -} /* * If we feed all the commits we want to verify to this command * @@ -19,19 +15,22 @@ int check_everything_connected(sha1_iterate_fn fn, int quiet, void *cb_data) * * Returns 0 if everything is connected, non-zero otherwise. */ -static int check_everything_connected_real(sha1_iterate_fn fn, - int quiet, - void *cb_data, - struct transport *transport, - const char *shallow_file) +int check_connected(sha1_iterate_fn fn, void *cb_data, + struct check_connected_options *opt) { struct child_process rev_list = CHILD_PROCESS_INIT; + struct check_connected_options defaults = CHECK_CONNECTED_INIT; char commit[41]; unsigned char sha1[20]; int err = 0; struct packed_git *new_pack = NULL; + struct transport *transport; size_t base_len; + if (!opt) + opt = &defaults; + transport = opt->transport; + if (fn(cb_data, sha1)) return err; @@ -46,9 +45,9 @@ static int check_everything_connected_real(sha1_iterate_fn fn, strbuf_release(&idx_file); } - if (shallow_file) { + if (opt->shallow_file) { argv_array_push(&rev_list.args, "--shallow-file"); - argv_array_push(&rev_list.args, shallow_file); + argv_array_push(&rev_list.args, opt->shallow_file); } argv_array_push(&rev_list.args,"rev-list"); argv_array_push(&rev_list.args, "--objects"); @@ -60,7 +59,7 @@ static int check_everything_connected_real(sha1_iterate_fn fn, rev_list.git_cmd = 1; rev_list.in = -1; rev_list.no_stdout = 1; - rev_list.no_stderr = quiet; + rev_list.no_stderr = opt->quiet; if (start_command(&rev_list)) return error(_("Could not run 'git rev-list'")); @@ -94,19 +93,3 @@ static int check_everything_connected_real(sha1_iterate_fn fn, sigchain_pop(SIGPIPE); return finish_command(&rev_list) || err; } - -int check_everything_connected_with_transport(sha1_iterate_fn fn, - int quiet, - void *cb_data, - struct transport *transport) -{ - return check_everything_connected_real(fn, quiet, cb_data, - transport, NULL); -} - -int check_shallow_connected(sha1_iterate_fn fn, int quiet, void *cb_data, - const char *shallow_file) -{ - return check_everything_connected_real(fn, quiet, cb_data, - NULL, shallow_file); -} diff --git a/connected.h b/connected.h index 071d408..12594ef 100644 --- a/connected.h +++ b/connected.h @@ -11,17 +11,32 @@ struct transport; typedef int (*sha1_iterate_fn)(void *, unsigned char [20]); /* + * Named-arguments struct for check_connected. All arguments are + * optional, and can be left to defaults as set by CHECK_CONNECTED_INIT. + */ +struct check_connected_options { + /* Avoid printing any errors to stderr. */ + int quiet; + + /* --shallow-file to pass to rev-list sub-process */ + const char *shallow_file; + + /* Transport whose objects we are checking, if available. */ + struct transport *transport; +}; + +#define CHECK_CONNECTED_INIT { 0 } + +/* * Make sure that our object store has all the commits necessary to * connect the ancestry chain to some of our existing refs, and all * the trees and blobs that these commits use. * * Return 0 if Ok, non zero otherwise (i.e. some missing objects) + * + * If "opt" is NULL, behaves as if CHECK_CONNECTED_INIT was passed. */ -extern int check_everything_connected(sha1_iterate_fn, int quiet, void *cb_data); -extern int check_shallow_connected(sha1_iterate_fn, int quiet, void *cb_data, - const char *shallow_file); -extern int check_everything_connected_with_transport(sha1_iterate_fn, int quiet, - void *cb_data, - struct transport *transport); +int check_connected(sha1_iterate_fn fn, void *cb_data, + struct check_connected_options *opt); #endif /* CONNECTED_H */ -- cgit v0.10.2-6-g49f6 From e0331849a081fe4919f4130540165ce7d7355748 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 15 Jul 2016 06:32:03 -0400 Subject: check_connected: relay errors to alternate descriptor Unless the "quiet" flag is given, check_connected sends any errors to the stderr of the caller (because the child rev-list inherits that descriptor). However, server-side callers may want to send these over a sideband channel instead. Let's make that possible. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/connected.c b/connected.c index 2a51eac..5f5c8bd 100644 --- a/connected.c +++ b/connected.c @@ -31,8 +31,11 @@ int check_connected(sha1_iterate_fn fn, void *cb_data, opt = &defaults; transport = opt->transport; - if (fn(cb_data, sha1)) + if (fn(cb_data, sha1)) { + if (opt->err_fd) + close(opt->err_fd); return err; + } if (transport && transport->smart_options && transport->smart_options->self_contained_and_connected && @@ -59,7 +62,11 @@ int check_connected(sha1_iterate_fn fn, void *cb_data, rev_list.git_cmd = 1; rev_list.in = -1; rev_list.no_stdout = 1; - rev_list.no_stderr = opt->quiet; + if (opt->err_fd) + rev_list.err = opt->err_fd; + else + rev_list.no_stderr = opt->quiet; + if (start_command(&rev_list)) return error(_("Could not run 'git rev-list'")); diff --git a/connected.h b/connected.h index 12594ef..5d88e26 100644 --- a/connected.h +++ b/connected.h @@ -23,6 +23,13 @@ struct check_connected_options { /* Transport whose objects we are checking, if available. */ struct transport *transport; + + /* + * If non-zero, send error messages to this descriptor rather + * than stderr. The descriptor is closed before check_connected + * returns. + */ + int err_fd; }; #define CHECK_CONNECTED_INIT { 0 } -- cgit v0.10.2-6-g49f6 From 70d5e2d77b4c2afdb442b37924bc252793e106d9 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 15 Jul 2016 06:32:28 -0400 Subject: check_connected: add progress flag Connectivity checks have to traverse the entire object graph in the worst case (e.g., a full clone or a full push). For large repositories like linux.git, this can take 30-60 seconds, during which time git may produce little or no output. Let's add the option of showing progress, which is taken care of by rev-list. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/connected.c b/connected.c index 5f5c8bd..8e3e4b1 100644 --- a/connected.c +++ b/connected.c @@ -58,6 +58,9 @@ int check_connected(sha1_iterate_fn fn, void *cb_data, argv_array_push(&rev_list.args, "--not"); argv_array_push(&rev_list.args, "--all"); argv_array_push(&rev_list.args, "--quiet"); + if (opt->progress) + argv_array_pushf(&rev_list.args, "--progress=%s", + _("Checking connectivity")); rev_list.git_cmd = 1; rev_list.in = -1; diff --git a/connected.h b/connected.h index 5d88e26..afa48cc 100644 --- a/connected.h +++ b/connected.h @@ -30,6 +30,9 @@ struct check_connected_options { * returns. */ int err_fd; + + /* If non-zero, show progress as we traverse the objects. */ + int progress; }; #define CHECK_CONNECTED_INIT { 0 } -- cgit v0.10.2-6-g49f6 From 38e590ea12336bcfae0310092885743e6ba79781 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 15 Jul 2016 06:33:18 -0400 Subject: clone: use a real progress meter for connectivity check Because the initial connectivity check for a cloned repository can be slow, 0781aa4 (clone: let the user know when check_everything_connected is run, 2013-05-03) added a "fake" progress meter; we simply say "Checking connectivity" when it starts, and "done" at the end, with nothing between. Since check_connected() now knows how to do a real progress meter, we can drop our fake one and use that one instead. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/clone.c b/builtin/clone.c index 32fe606..f044a8c 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -627,13 +627,10 @@ static void update_remote_refs(const struct ref *refs, struct check_connected_options opt = CHECK_CONNECTED_INIT; opt.transport = transport; + opt.progress = transport->progress; - if (transport->progress) - fprintf(stderr, _("Checking connectivity... ")); if (check_connected(iterate_ref_map, &rm, &opt)) die(_("remote did not send all necessary objects")); - if (transport->progress) - fprintf(stderr, _("done.\n")); } if (refs) { -- cgit v0.10.2-6-g49f6 From e376f17fd1a1efd4fe425d023610172bcba3fe97 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 15 Jul 2016 06:34:22 -0400 Subject: index-pack: add flag for showing delta-resolution progress The index-pack command has two progress meters: one for "receiving objects", and one for "resolving deltas". You get neither by default, or both with "-v". But for a push through receive-pack, we would want only the "resolving deltas" phase, _not_ the "receiving objects" progress. There are two reasons for this. One is simply that existing clients are already printing "writing objects" progress at the same time. Arguably "receiving" from the far end is more useful, because it tells you what has actually gotten there, as opposed to what might be stuck in a buffer somewhere between the client and server. But that would require a protocol extension to tell clients not to print their progress. Possible, but complexity for little gain. The second reason is much more important. In a full-duplex connection like git-over-ssh, we can print progress while the pack is incoming, and it will immediately get to the client. But for a half-duplex connection like git-over-http, we should not say anything until we have received the full request. Anything we write is subject to being stuck in a buffer by the webserver. Worse, we can end up in a deadlock if that buffer fills up. So our best bet is to avoid writing anything that isn't a small fixed size until we've received the full pack. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/index-pack.c b/builtin/index-pack.c index e8c71fc..1cba120 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -77,6 +77,7 @@ static int strict; static int do_fsck_object; static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT; static int verbose; +static int show_resolving_progress; static int show_stat; static int check_self_contained_and_connected; @@ -1190,7 +1191,7 @@ static void resolve_deltas(void) qsort(ref_deltas, nr_ref_deltas, sizeof(struct ref_delta_entry), compare_ref_delta_entry); - if (verbose) + if (verbose || show_resolving_progress) progress = start_progress(_("Resolving deltas"), nr_ref_deltas + nr_ofs_deltas); @@ -1694,6 +1695,8 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) input_len = sizeof(*hdr); } else if (!strcmp(arg, "-v")) { verbose = 1; + } else if (!strcmp(arg, "--show-resolving-progress")) { + show_resolving_progress = 1; } else if (!strcmp(arg, "-o")) { if (index_name || (i+1) >= argc) usage(index_pack_usage); -- cgit v0.10.2-6-g49f6 From d06303bb9a6c3791ff67078b480a86cfd9b691ea Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 15 Jul 2016 06:35:28 -0400 Subject: receive-pack: turn on index-pack resolving progress When we receive a large push, the server side may have to spend a lot of CPU processing the incoming packfile. During the "receiving" phase, we are typically network bound, and the client is writing its own progress to the user. But during the delta resolution phase, we may spend minutes (e.g., for a full push of linux.git) without making any indication to the user that the connection has not hung. Let's ask index-pack to produce progress output for this phase (unless the client asked us to be quiet, of course). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index ce81920..de322bc 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1547,6 +1547,8 @@ static const char *unpack(int err_fd, struct shallow_info *si) (uintmax_t)getpid(), hostname); + if (!quiet && err_fd) + argv_array_push(&child.args, "--show-resolving-progress"); if (fsck_objects) argv_array_pushf(&child.args, "--strict%s", fsck_msg_types.buf); -- cgit v0.10.2-6-g49f6 From d415092ac4a7fb474dc9823a1cc08ae824267e35 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 15 Jul 2016 06:36:14 -0400 Subject: receive-pack: relay connectivity errors to sideband If the connectivity check encounters a problem when receiving a push, the error output goes to receive-pack's stderr, whose destination depends on the protocol used (ssh tends to send it to the user, though without a "remote" prefix; http will generally eat it in the server's error log). The information should consistently go back to the user, as there is a reasonable chance their client is buggy and generating a bad pack. We can do so by muxing it over the sideband as we do with other sub-process stderr. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index de322bc..d309109 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1317,9 +1317,12 @@ static void execute_commands(struct command *commands, const char *unpacker_error, struct shallow_info *si) { + struct check_connected_options opt = CHECK_CONNECTED_INIT; struct command *cmd; unsigned char sha1[20]; struct iterate_data data; + struct async muxer; + int err_fd = 0; if (unpacker_error) { for (cmd = commands; cmd; cmd = cmd->next) @@ -1327,11 +1330,24 @@ static void execute_commands(struct command *commands, return; } + if (use_sideband) { + memset(&muxer, 0, sizeof(muxer)); + muxer.proc = copy_to_sideband; + muxer.in = -1; + if (!start_async(&muxer)) + err_fd = muxer.in; + /* ...else, continue without relaying sideband */ + } + data.cmds = commands; data.si = si; - if (check_connected(iterate_receive_command_list, &data, NULL)) + opt.err_fd = err_fd; + if (check_connected(iterate_receive_command_list, &data, &opt)) set_connectivity_errors(commands, si); + if (use_sideband) + finish_async(&muxer); + reject_updates_to_hidden(commands); if (run_receive_hook(commands, "pre-receive", 0)) { -- cgit v0.10.2-6-g49f6 From 6b4cd2f82791b0c9726616832382e0af8d895d99 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 15 Jul 2016 06:36:35 -0400 Subject: receive-pack: turn on connectivity progress When we receive a large push, the server side of the connection may spend a lot of time (30s or more for a full push of linux.git) walking the object graph without producing any output. Let's give the user some indication that we're actually working. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index d309109..7db1639 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1342,6 +1342,7 @@ static void execute_commands(struct command *commands, data.cmds = commands; data.si = si; opt.err_fd = err_fd; + opt.progress = err_fd && !quiet; if (check_connected(iterate_receive_command_list, &data, &opt)) set_connectivity_errors(commands, si); -- cgit v0.10.2-6-g49f6 From 83558686ceaeb634b630d5177818d571cafafbf4 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 15 Jul 2016 06:43:47 -0400 Subject: receive-pack: send keepalives during quiet periods After a client has sent us the complete pack, we may spend some time processing the data and running hooks. If the client asked us to be quiet, receive-pack won't send any progress data during the index-pack or connectivity-check steps. And hooks may or may not produce their own progress output. In these cases, the network connection is totally silent from both ends. Git itself doesn't care about this (it will wait forever), but other parts of the system (e.g., firewalls, load-balancers, etc) might hang up the connection. So we'd like to send some sort of keepalive to let the network and the client side know that we're still alive and processing. We can use the same trick we did in 05e9515 (upload-pack: send keepalive packets during pack computation, 2013-09-08). Namely, we will send an empty sideband data packet every `N` seconds that we do not relay any stderr data over the sideband channel. As with 05e9515, this means that we won't bother sending keepalives when there's actual progress data, but will kick in when it has been disabled (or if there is a lull in the progress data). The concept is simple, but the details are subtle enough that they need discussing here. Before the client sends us the pack, we don't want to do any keepalives. We'll have sent our ref advertisement, and we're waiting for them to send us the pack (and tell us that they support sidebands at all). While we're receiving the pack from the client (or waiting for it to start), there's no need for keepalives; it's up to them to keep the connection active by sending data. Moreover, it would be wrong for us to do so. When we are the server in the smart-http protocol, we must treat our connection as half-duplex. So any keepalives we send while receiving the pack would potentially be buffered by the webserver. Not only does this make them useless (since they would not be delivered in a timely manner), but it could actually cause a deadlock if we fill up the buffer with keepalives. (It wouldn't be wrong to send keepalives in this phase for a full-duplex connection like ssh; it's simply pointless, as it is the client's responsibility to speak). As soon as we've gotten all of the pack data, then the client is waiting for us to speak, and we should start keepalives immediately. From here until the end of the connection, we send one any time we are not otherwise sending data. But there's a catch. Receive-pack doesn't know the moment we've gotten all the data. It passes the descriptor to index-pack, who reads all of the data, and then starts resolving the deltas. We have to communicate that back. To make this work, we instruct the sideband muxer to enable keepalives in three phases: 1. In the beginning, not at all. 2. While reading from index-pack, wait for a signal indicating end-of-input, and then start them. 3. Afterwards, always. The signal from index-pack in phase 2 has to come over the stderr channel which the muxer is reading. We can't use an extra pipe because the portable run-command interface only gives us stderr and stdout. Stdout is already used to pass the .keep filename back to receive-pack. We could also send a signal there, but then we would find out about it in the main thread. And the keepalive needs to be done by the async muxer thread (since it's the one writing sideband data back to the client). And we can't reliably signal the async thread from the main thread, because the async code sometimes uses threads and sometimes uses forked processes. Therefore the signal must come over the stderr channel, where it may be interspersed with other random human-readable messages from index-pack. This patch makes the signal a single NUL byte. This is easy to parse, should not appear in any normal stderr output, and we don't have to worry about any timing issues (like seeing half the signal bytes in one read(), and half in a subsequent one). This is a bit ugly, but it's simple to code and should work reliably. Another option would be to stop using an async thread for muxing entirely, and just poll() both stderr and stdout of index-pack from the main thread. This would work for index-pack (because we aren't doing anything useful in the main thread while it runs anyway). But it would make the connectivity check and the hook muxers much more complicated, as they need to simultaneously feed the sub-programs while reading their stderr. The index-pack phase is the only one that needs this signaling, so it could simply behave differently than the other two. That would mean having two separate implementations of copy_to_sideband (and the keepalive code), though. And it still doesn't get rid of the signaling; it just means we can write a nicer message like "END_OF_INPUT" or something on stdout, since we don't have to worry about separating it from the stderr cruft. One final note: this signaling trick is only done with index-pack, not with unpack-objects. There's no point in doing it for the latter, because by definition it only kicks in for a small number of objects, where keepalives are not as useful (and this conveniently lets us avoid duplicating the implementation). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/Documentation/config.txt b/Documentation/config.txt index e455fae..25c8e36 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -2485,6 +2485,15 @@ receive.fsck.skipList:: can be safely ignored such as invalid committer email addresses. Note: corrupt objects cannot be skipped with this setting. +receive.keepAlive:: + After receiving the pack from the client, `receive-pack` may + produce no output (if `--quiet` was specified) while processing + the pack, causing some networks to drop the TCP connection. + With this option set, if `receive-pack` does not transmit + any data in this phase for `receive.keepAlive` seconds, it will + send a short keepalive packet. The default is 5 seconds; set + to 0 to disable keepalives entirely. + receive.unpackLimit:: If the number of objects received in a push is below this limit then the objects will be unpacked into loose object diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 1cba120..54f2cfb 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1626,6 +1626,7 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) struct pack_idx_option opts; unsigned char pack_sha1[20]; unsigned foreign_nr = 1; /* zero is a "good" value, assume bad */ + int report_end_of_input = 0; if (argc == 2 && !strcmp(argv[1], "-h")) usage(index_pack_usage); @@ -1697,6 +1698,8 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) verbose = 1; } else if (!strcmp(arg, "--show-resolving-progress")) { show_resolving_progress = 1; + } else if (!strcmp(arg, "--report-end-of-input")) { + report_end_of_input = 1; } else if (!strcmp(arg, "-o")) { if (index_name || (i+1) >= argc) usage(index_pack_usage); @@ -1754,6 +1757,8 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) obj_stat = xcalloc(st_add(nr_objects, 1), sizeof(struct object_stat)); ofs_deltas = xcalloc(nr_objects, sizeof(struct ofs_delta_entry)); parse_pack_objects(pack_sha1); + if (report_end_of_input) + write_in_full(2, "\0", 1); resolve_deltas(); conclude_pack(fix_thin_pack, curr_pack, pack_sha1); free(ofs_deltas); diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 7db1639..e41f55f 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -76,6 +76,13 @@ static long nonce_stamp_slop; static unsigned long nonce_stamp_slop_limit; static struct ref_transaction *transaction; +static enum { + KEEPALIVE_NEVER = 0, + KEEPALIVE_AFTER_NUL, + KEEPALIVE_ALWAYS +} use_keepalive; +static int keepalive_in_sec = 5; + static enum deny_action parse_deny_action(const char *var, const char *value) { if (value) { @@ -193,6 +200,11 @@ static int receive_pack_config(const char *var, const char *value, void *cb) return 0; } + if (strcmp(var, "receive.keepalive") == 0) { + keepalive_in_sec = git_config_int(var, value); + return 0; + } + return git_default_config(var, value, cb); } @@ -319,10 +331,60 @@ static void rp_error(const char *err, ...) static int copy_to_sideband(int in, int out, void *arg) { char data[128]; + int keepalive_active = 0; + + if (keepalive_in_sec <= 0) + use_keepalive = KEEPALIVE_NEVER; + if (use_keepalive == KEEPALIVE_ALWAYS) + keepalive_active = 1; + while (1) { - ssize_t sz = xread(in, data, sizeof(data)); + ssize_t sz; + + if (keepalive_active) { + struct pollfd pfd; + int ret; + + pfd.fd = in; + pfd.events = POLLIN; + ret = poll(&pfd, 1, 1000 * keepalive_in_sec); + + if (ret < 0) { + if (errno == EINTR) + continue; + else + break; + } else if (ret == 0) { + /* no data; send a keepalive packet */ + static const char buf[] = "0005\1"; + write_or_die(1, buf, sizeof(buf) - 1); + continue; + } /* else there is actual data to read */ + } + + sz = xread(in, data, sizeof(data)); if (sz <= 0) break; + + if (use_keepalive == KEEPALIVE_AFTER_NUL && !keepalive_active) { + const char *p = memchr(data, '\0', sz); + if (p) { + /* + * The NUL tells us to start sending keepalives. Make + * sure we send any other data we read along + * with it. + */ + keepalive_active = 1; + send_sideband(1, 2, data, p - data, use_sideband); + send_sideband(1, 2, p + 1, sz - (p - data + 1), use_sideband); + continue; + } + } + + /* + * Either we're not looking for a NUL signal, or we didn't see + * it yet; just pass along the data. + */ send_sideband(1, 2, data, sz, use_sideband); } close(in); @@ -1566,6 +1628,8 @@ static const char *unpack(int err_fd, struct shallow_info *si) if (!quiet && err_fd) argv_array_push(&child.args, "--show-resolving-progress"); + if (use_sideband) + argv_array_push(&child.args, "--report-end-of-input"); if (fsck_objects) argv_array_pushf(&child.args, "--strict%s", fsck_msg_types.buf); @@ -1595,6 +1659,7 @@ static const char *unpack_with_sideband(struct shallow_info *si) if (!use_sideband) return unpack(0, si); + use_keepalive = KEEPALIVE_AFTER_NUL; memset(&muxer, 0, sizeof(muxer)); muxer.proc = copy_to_sideband; muxer.in = -1; @@ -1782,6 +1847,7 @@ int cmd_receive_pack(int argc, const char **argv, const char *prefix) unpack_status = unpack_with_sideband(&si); update_shallow_info(commands, &si, &ref); } + use_keepalive = KEEPALIVE_ALWAYS; execute_commands(commands, unpack_status, &si); if (pack_lockfile) unlink_or_warn(pack_lockfile); -- cgit v0.10.2-6-g49f6