From 4bcd37d3bc436f202307897093d5ca162a688be2 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Wed, 20 Jun 2018 14:32:28 -0700 Subject: test-pkt-line: add unpack-sideband subcommand Add an 'unpack-sideband' subcommand to the test-pkt-line helper to enable unpacking packet line data sent multiplexed using a sideband. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano diff --git a/t/helper/test-pkt-line.c b/t/helper/test-pkt-line.c index 0f19e53..30775f9 100644 --- a/t/helper/test-pkt-line.c +++ b/t/helper/test-pkt-line.c @@ -1,3 +1,4 @@ +#include "cache.h" #include "pkt-line.h" static void pack_line(const char *line) @@ -48,6 +49,36 @@ static void unpack(void) } } +static void unpack_sideband(void) +{ + struct packet_reader reader; + packet_reader_init(&reader, 0, NULL, 0, + PACKET_READ_GENTLE_ON_EOF | + PACKET_READ_CHOMP_NEWLINE); + + while (packet_reader_read(&reader) != PACKET_READ_EOF) { + int band; + int fd; + + switch (reader.status) { + case PACKET_READ_EOF: + break; + case PACKET_READ_NORMAL: + band = reader.line[0] & 0xff; + if (band < 1 || band > 2) + die("unexpected side band %d", band); + fd = band; + + write_or_die(fd, reader.line + 1, reader.pktlen - 1); + break; + case PACKET_READ_FLUSH: + return; + case PACKET_READ_DELIM: + break; + } + } +} + int cmd_main(int argc, const char **argv) { if (argc < 2) @@ -57,6 +88,8 @@ int cmd_main(int argc, const char **argv) pack(argc - 2, argv + 2); else if (!strcmp(argv[1], "unpack")) unpack(); + else if (!strcmp(argv[1], "unpack-sideband")) + unpack_sideband(); else die("invalid argument '%s'", argv[1]); -- cgit v0.10.2-6-g49f6 From 516e2b76bdcf53e757309481fa0e663217ee8039 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Wed, 27 Jun 2018 15:30:17 -0700 Subject: upload-pack: implement ref-in-want Currently, while performing packfile negotiation, clients are only allowed to specify their desired objects using object ids. This causes a vulnerability to failure when an object turns non-existent during negotiation, which may happen if, for example, the desired repository is provided by multiple Git servers in a load-balancing arrangement and there exists replication delay. In order to eliminate this vulnerability, implement the ref-in-want feature for the 'fetch' command in protocol version 2. This feature enables the 'fetch' command to support requests in the form of ref names through a new "want-ref " parameter. At the conclusion of negotiation, the server will send a list of all of the wanted references (as provided by "want-ref" lines) in addition to the generated packfile. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano diff --git a/Documentation/config.txt b/Documentation/config.txt index ab641bf..fb1dd74 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -3479,6 +3479,13 @@ Note that this configuration variable is ignored if it is seen in the repository-level config (this is a safety measure against fetching from untrusted repositories). +uploadpack.allowRefInWant:: + If this option is set, `upload-pack` will support the `ref-in-want` + feature of the protocol version 2 `fetch` command. This feature + is intended for the benefit of load-balanced servers which may + not have the same view of what OIDs their refs point to due to + replication delay. + url..insteadOf:: Any URL that starts with this value will be rewritten to start, instead, with . In cases where some site serves a diff --git a/Documentation/technical/protocol-v2.txt b/Documentation/technical/protocol-v2.txt index 49bda76..1da71d0 100644 --- a/Documentation/technical/protocol-v2.txt +++ b/Documentation/technical/protocol-v2.txt @@ -299,12 +299,21 @@ included in the client's request: for use with partial clone and partial fetch operations. See `rev-list` for possible "filter-spec" values. +If the 'ref-in-want' feature is advertised, the following argument can +be included in the client's request as well as the potential addition of +the 'wanted-refs' section in the server's response as explained below. + + want-ref + Indicates to the server that the client wants to retrieve a + particular ref, where is the full name of a ref on the + server. + The response of `fetch` is broken into a number of sections separated by delimiter packets (0001), with each section beginning with its section header. output = *section - section = (acknowledgments | shallow-info | packfile) + section = (acknowledgments | shallow-info | wanted-refs | packfile) (flush-pkt | delim-pkt) acknowledgments = PKT-LINE("acknowledgments" LF) @@ -319,6 +328,10 @@ header. shallow = "shallow" SP obj-id unshallow = "unshallow" SP obj-id + wanted-refs = PKT-LINE("wanted-refs" LF) + *PKT-LINE(wanted-ref LF) + wanted-ref = obj-id SP refname + packfile = PKT-LINE("packfile" LF) *PKT-LINE(%x01-03 *%x00-ff) @@ -379,6 +392,19 @@ header. * This section is only included if a packfile section is also included in the response. + wanted-refs section + * This section is only included if the client has requested a + ref using a 'want-ref' line and if a packfile section is also + included in the response. + + * Always begins with the section header "wanted-refs". + + * The server will send a ref listing (" ") for + each reference requested using 'want-ref' lines. + + * The server MUST NOT send any refs which were not requested + using 'want-ref' lines. + packfile section * This section is only included if the client has sent 'want' lines in its request and either requested that no more diff --git a/t/t5703-upload-pack-ref-in-want.sh b/t/t5703-upload-pack-ref-in-want.sh new file mode 100755 index 0000000..da86f7c --- /dev/null +++ b/t/t5703-upload-pack-ref-in-want.sh @@ -0,0 +1,160 @@ +#!/bin/sh + +test_description='upload-pack ref-in-want' + +. ./test-lib.sh + +get_actual_refs () { + sed -n -e '/wanted-refs/,/0001/{ + /wanted-refs/d + /0001/d + p + }' actual_refs +} + +get_actual_commits () { + sed -n -e '/packfile/,/0000/{ + /packfile/d + p + }' o.pack && + git index-pack o.pack && + git verify-pack -v o.idx | grep commit | cut -c-40 | sort >actual_commits +} + +check_output () { + get_actual_refs && + test_cmp expected_refs actual_refs && + get_actual_commits && + test_cmp expected_commits actual_commits +} + +# c(o/foo) d(o/bar) +# \ / +# b e(baz) f(master) +# \__ | __/ +# \ | / +# a +test_expect_success 'setup repository' ' + test_commit a && + git checkout -b o/foo && + test_commit b && + test_commit c && + git checkout -b o/bar b && + test_commit d && + git checkout -b baz a && + test_commit e && + git checkout master && + test_commit f +' + +test_expect_success 'config controls ref-in-want advertisement' ' + git serve --advertise-capabilities >out && + ! grep -a ref-in-want out && + + git config uploadpack.allowRefInWant false && + git serve --advertise-capabilities >out && + ! grep -a ref-in-want out && + + git config uploadpack.allowRefInWant true && + git serve --advertise-capabilities >out && + grep -a ref-in-want out +' + +test_expect_success 'invalid want-ref line' ' + test-pkt-line pack >in <<-EOF && + command=fetch + 0001 + no-progress + want-ref refs/heads/non-existent + done + 0000 + EOF + + test_must_fail git serve --stateless-rpc 2>out expected_refs <<-EOF && + $(git rev-parse f) refs/heads/master + EOF + git rev-parse f | sort >expected_commits && + + test-pkt-line pack >in <<-EOF && + command=fetch + 0001 + no-progress + want-ref refs/heads/master + have $(git rev-parse a) + done + 0000 + EOF + + git serve --stateless-rpc >out expected_refs <<-EOF && + $(git rev-parse c) refs/heads/o/foo + $(git rev-parse d) refs/heads/o/bar + EOF + git rev-parse c d | sort >expected_commits && + + test-pkt-line pack >in <<-EOF && + command=fetch + 0001 + no-progress + want-ref refs/heads/o/foo + want-ref refs/heads/o/bar + have $(git rev-parse b) + done + 0000 + EOF + + git serve --stateless-rpc >out expected_refs <<-EOF && + $(git rev-parse f) refs/heads/master + EOF + git rev-parse e f | sort >expected_commits && + + test-pkt-line pack >in <<-EOF && + command=fetch + 0001 + no-progress + want-ref refs/heads/master + want $(git rev-parse e) + have $(git rev-parse a) + done + 0000 + EOF + + git serve --stateless-rpc >out expected_refs <<-EOF && + $(git rev-parse c) refs/heads/o/foo + EOF + >expected_commits && + + test-pkt-line pack >in <<-EOF && + command=fetch + 0001 + no-progress + want-ref refs/heads/o/foo + have $(git rev-parse c) + done + 0000 + EOF + + git serve --stateless-rpc >out wants = wants; + data->wanted_refs = wanted_refs; data->haves = haves; data->shallows = shallows; data->deepen_not = deepen_not; @@ -1149,6 +1155,7 @@ static void upload_pack_data_init(struct upload_pack_data *data) static void upload_pack_data_clear(struct upload_pack_data *data) { object_array_clear(&data->wants); + string_list_clear(&data->wanted_refs, 1); oid_array_clear(&data->haves); object_array_clear(&data->shallows); string_list_clear(&data->deepen_not, 0); @@ -1185,6 +1192,34 @@ static int parse_want(const char *line) return 0; } +static int parse_want_ref(const char *line, struct string_list *wanted_refs) +{ + const char *arg; + if (skip_prefix(line, "want-ref ", &arg)) { + struct object_id oid; + struct string_list_item *item; + struct object *o; + + if (read_ref(arg, &oid)) { + packet_write_fmt(1, "ERR unknown ref %s", arg); + die("unknown ref %s", arg); + } + + item = string_list_append(wanted_refs, arg); + item->util = oiddup(&oid); + + o = parse_object_or_die(&oid, arg); + if (!(o->flags & WANTED)) { + o->flags |= WANTED; + add_object_array(o, NULL, &want_obj); + } + + return 1; + } + + return 0; +} + static int parse_have(const char *line, struct oid_array *haves) { const char *arg; @@ -1210,6 +1245,8 @@ static void process_args(struct packet_reader *request, /* process want */ if (parse_want(arg)) continue; + if (allow_ref_in_want && parse_want_ref(arg, &data->wanted_refs)) + continue; /* process have line */ if (parse_have(arg, &data->haves)) continue; @@ -1352,6 +1389,24 @@ static int process_haves_and_send_acks(struct upload_pack_data *data) return ret; } +static void send_wanted_ref_info(struct upload_pack_data *data) +{ + const struct string_list_item *item; + + if (!data->wanted_refs.nr) + return; + + packet_write_fmt(1, "wanted-refs\n"); + + for_each_string_list_item(item, &data->wanted_refs) { + packet_write_fmt(1, "%s %s\n", + oid_to_hex(item->util), + item->string); + } + + packet_delim(1); +} + static void send_shallow_info(struct upload_pack_data *data) { /* No shallow info needs to be sent */ @@ -1418,6 +1473,7 @@ int upload_pack_v2(struct repository *r, struct argv_array *keys, state = FETCH_DONE; break; case FETCH_SEND_PACK: + send_wanted_ref_info(&data); send_shallow_info(&data); packet_write_fmt(1, "packfile\n"); @@ -1438,12 +1494,22 @@ int upload_pack_advertise(struct repository *r, { if (value) { int allow_filter_value; + int allow_ref_in_want; + strbuf_addstr(value, "shallow"); + if (!repo_config_get_bool(the_repository, "uploadpack.allowfilter", &allow_filter_value) && allow_filter_value) strbuf_addstr(value, " filter"); + + if (!repo_config_get_bool(the_repository, + "uploadpack.allowrefinwant", + &allow_ref_in_want) && + allow_ref_in_want) + strbuf_addstr(value, " ref-in-want"); } + return 1; } -- cgit v0.10.2-6-g49f6 From 3374292e55564fa6107b31a63a5e5432cd8c2265 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Wed, 27 Jun 2018 15:30:18 -0700 Subject: upload-pack: test negotiation with changing repository Add tests to check the behavior of fetching from a repository which changes between rounds of negotiation (for example, when different servers in a load-balancing agreement participate in the same stateless RPC negotiation). This forms a baseline of comparison to the ref-in-want functionality (which will be introduced to the client in subsequent commits), and ensures that subsequent commits do not change existing behavior. As part of this effort, a mechanism to substitute strings in a single HTTP response is added. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano diff --git a/t/lib-httpd.sh b/t/lib-httpd.sh index 435a374..ed41b15 100644 --- a/t/lib-httpd.sh +++ b/t/lib-httpd.sh @@ -132,6 +132,7 @@ prepare_httpd() { cp "$TEST_PATH"/passwd "$HTTPD_ROOT_PATH" install_script broken-smart-http.sh install_script error.sh + install_script apply-one-time-sed.sh ln -s "$LIB_HTTPD_MODULE_PATH" "$HTTPD_ROOT_PATH/modules" diff --git a/t/lib-httpd/apache.conf b/t/lib-httpd/apache.conf index 724d9ae..581c010 100644 --- a/t/lib-httpd/apache.conf +++ b/t/lib-httpd/apache.conf @@ -111,9 +111,14 @@ Alias /auth/dumb/ www/auth/dumb/ SetEnv GIT_EXEC_PATH ${GIT_EXEC_PATH} SetEnv GIT_HTTP_EXPORT_ALL + + SetEnv GIT_EXEC_PATH ${GIT_EXEC_PATH} + SetEnv GIT_HTTP_EXPORT_ALL + ScriptAliasMatch /smart_*[^/]*/(.*) ${GIT_EXEC_PATH}/git-http-backend/$1 ScriptAlias /broken_smart/ broken-smart-http.sh/ ScriptAlias /error/ error.sh/ +ScriptAliasMatch /one_time_sed/(.*) apply-one-time-sed.sh/$1 Options FollowSymlinks @@ -123,6 +128,9 @@ ScriptAlias /error/ error.sh/ Options ExecCGI + + Options ExecCGI + Options ExecCGI diff --git a/t/lib-httpd/apply-one-time-sed.sh b/t/lib-httpd/apply-one-time-sed.sh new file mode 100644 index 0000000..fcef728 --- /dev/null +++ b/t/lib-httpd/apply-one-time-sed.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +# If "one-time-sed" exists in $HTTPD_ROOT_PATH, run sed on the HTTP response, +# using the contents of "one-time-sed" as the sed command to be run. If the +# response was modified as a result, delete "one-time-sed" so that subsequent +# HTTP responses are no longer modified. +# +# This can be used to simulate the effects of the repository changing in +# between HTTP request-response pairs. +if [ -e one-time-sed ]; then + "$GIT_EXEC_PATH/git-http-backend" >out + sed "$(cat one-time-sed)" out_modified + + if diff out out_modified >/dev/null; then + cat out + else + cat out_modified + rm one-time-sed + fi +else + "$GIT_EXEC_PATH/git-http-backend" +fi diff --git a/t/t5703-upload-pack-ref-in-want.sh b/t/t5703-upload-pack-ref-in-want.sh index da86f7c..32527a5 100755 --- a/t/t5703-upload-pack-ref-in-want.sh +++ b/t/t5703-upload-pack-ref-in-want.sh @@ -157,4 +157,72 @@ test_expect_success 'want-ref with ref we already have commit for' ' check_output ' +. "$TEST_DIRECTORY"/lib-httpd.sh +start_httpd + +REPO="$HTTPD_DOCUMENT_ROOT_PATH/repo" +LOCAL_PRISTINE="$(pwd)/local_pristine" + +test_expect_success 'setup repos for change-while-negotiating test' ' + ( + git init "$REPO" && + cd "$REPO" && + >.git/git-daemon-export-ok && + test_commit m1 && + git tag -d m1 && + + # Local repo with many commits (so that negotiation will take + # more than 1 request/response pair) + git clone "http://127.0.0.1:$LIB_HTTPD_PORT/smart/repo" "$LOCAL_PRISTINE" && + cd "$LOCAL_PRISTINE" && + git checkout -b side && + for i in $(seq 1 33); do test_commit s$i; done && + + # Add novel commits to upstream + git checkout master && + cd "$REPO" && + test_commit m2 && + test_commit m3 && + git tag -d m2 m3 + ) && + git -C "$LOCAL_PRISTINE" remote set-url origin "http://127.0.0.1:$LIB_HTTPD_PORT/one_time_sed/repo" && + git -C "$LOCAL_PRISTINE" config protocol.version 2 +' + +inconsistency () { + # Simulate that the server initially reports $2 as the ref + # corresponding to $1, and after that, $1 as the ref corresponding to + # $1. This corresponds to the real-life situation where the server's + # repository appears to change during negotiation, for example, when + # different servers in a load-balancing arrangement serve (stateless) + # RPCs during a single negotiation. + printf "s/%s/%s/" \ + $(git -C "$REPO" rev-parse $1 | tr -d "\n") \ + $(git -C "$REPO" rev-parse $2 | tr -d "\n") \ + >"$HTTPD_ROOT_PATH/one-time-sed" +} + +test_expect_success 'server is initially ahead - no ref in want' ' + git -C "$REPO" config uploadpack.allowRefInWant false && + rm -rf local && + cp -r "$LOCAL_PRISTINE" local && + inconsistency master 1234567890123456789012345678901234567890 && + test_must_fail git -C local fetch 2>err && + grep "ERR upload-pack: not our ref" err +' + +test_expect_success 'server is initially behind - no ref in want' ' + git -C "$REPO" config uploadpack.allowRefInWant false && + rm -rf local && + cp -r "$LOCAL_PRISTINE" local && + inconsistency master "master^" && + git -C local fetch && + + git -C "$REPO" rev-parse --verify "master^" >expected && + git -C local rev-parse --verify refs/remotes/origin/master >actual && + test_cmp expected actual +' + +stop_httpd + test_done -- cgit v0.10.2-6-g49f6 From 14b8ced3765b8916683b445f5cc3d6d80acc5f93 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Wed, 27 Jun 2018 15:30:19 -0700 Subject: fetch: refactor the population of peer ref OIDs Populate peer ref OIDs in get_ref_map instead of do_fetch. Besides tightening scopes of variables in the code, this also prepares for get_ref_map being able to be called multiple times within do_fetch. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano diff --git a/builtin/fetch.c b/builtin/fetch.c index ea5b966..5456354 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -351,6 +351,7 @@ static struct ref *get_ref_map(struct transport *transport, /* opportunistically-updated references: */ struct ref *orefs = NULL, **oref_tail = &orefs; + struct string_list existing_refs = STRING_LIST_INIT_DUP; const struct ref *remote_refs; if (rs->nr) @@ -458,7 +459,23 @@ static struct ref *get_ref_map(struct transport *transport, tail = &rm->next; } - return ref_remove_duplicates(ref_map); + ref_map = ref_remove_duplicates(ref_map); + + for_each_ref(add_existing, &existing_refs); + for (rm = ref_map; rm; rm = rm->next) { + if (rm->peer_ref) { + struct string_list_item *peer_item = + string_list_lookup(&existing_refs, + rm->peer_ref->name); + if (peer_item) { + struct object_id *old_oid = peer_item->util; + oidcpy(&rm->peer_ref->old_oid, old_oid); + } + } + } + string_list_clear(&existing_refs, 1); + + return ref_map; } #define STORE_REF_ERROR_OTHER 1 @@ -1110,14 +1127,10 @@ static void backfill_tags(struct transport *transport, struct ref *ref_map) static int do_fetch(struct transport *transport, struct refspec *rs) { - struct string_list existing_refs = STRING_LIST_INIT_DUP; struct ref *ref_map; - struct ref *rm; int autotags = (transport->remote->fetch_tags == 1); int retcode = 0; - for_each_ref(add_existing, &existing_refs); - if (tags == TAGS_DEFAULT) { if (transport->remote->fetch_tags == 2) tags = TAGS_SET; @@ -1136,18 +1149,6 @@ static int do_fetch(struct transport *transport, if (!update_head_ok) check_not_current_branch(ref_map); - for (rm = ref_map; rm; rm = rm->next) { - if (rm->peer_ref) { - struct string_list_item *peer_item = - string_list_lookup(&existing_refs, - rm->peer_ref->name); - if (peer_item) { - struct object_id *old_oid = peer_item->util; - oidcpy(&rm->peer_ref->old_oid, old_oid); - } - } - } - if (tags == TAGS_DEFAULT && autotags) transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, "1"); if (prune) { @@ -1183,7 +1184,6 @@ static int do_fetch(struct transport *transport, } cleanup: - string_list_clear(&existing_refs, 1); return retcode; } -- cgit v0.10.2-6-g49f6 From 05c4422676f6e854aff0277984f4eadcd8912d56 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Wed, 27 Jun 2018 15:30:20 -0700 Subject: fetch: refactor fetch_refs into two functions Refactor the fetch_refs function into a function that does the fetching of refs and another function that stores them. This is in preparation for allowing additional processing of the fetched refs before updating the local ref store. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano diff --git a/builtin/fetch.c b/builtin/fetch.c index 5456354..2fabfed 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -968,9 +968,21 @@ static int fetch_refs(struct transport *transport, struct ref *ref_map) if (ret) ret = transport_fetch_refs(transport, ref_map); if (!ret) - ret |= store_updated_refs(transport->url, - transport->remote->name, - ref_map); + /* + * Keep the new pack's ".keep" file around to allow the caller + * time to update refs to reference the new objects. + */ + return 0; + transport_unlock_pack(transport); + return ret; +} + +/* Update local refs based on the ref values fetched from a remote */ +static int consume_refs(struct transport *transport, struct ref *ref_map) +{ + int ret = store_updated_refs(transport->url, + transport->remote->name, + ref_map); transport_unlock_pack(transport); return ret; } @@ -1116,7 +1128,8 @@ static void backfill_tags(struct transport *transport, struct ref *ref_map) transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, NULL); transport_set_option(transport, TRANS_OPT_DEPTH, "0"); transport_set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, NULL); - fetch_refs(transport, ref_map); + if (!fetch_refs(transport, ref_map)) + consume_refs(transport, ref_map); if (gsecondary) { transport_disconnect(gsecondary); @@ -1165,7 +1178,7 @@ static int do_fetch(struct transport *transport, transport->url); } } - if (fetch_refs(transport, ref_map)) { + if (fetch_refs(transport, ref_map) || consume_refs(transport, ref_map)) { free_refs(ref_map); retcode = 1; goto cleanup; -- cgit v0.10.2-6-g49f6 From 6d1700d564bbd3ecfe11f9889ed3f35a118b9f6a Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Wed, 27 Jun 2018 15:30:21 -0700 Subject: fetch: refactor to make function args narrower Refactor find_non_local_tags and get_ref_map to only take the information they need instead of the entire transport struct. Besides improving code clarity, this also improves their flexibility, allowing for a different set of refs to be used instead of relying on the ones stored in the transport struct. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano diff --git a/builtin/fetch.c b/builtin/fetch.c index 2fabfed..bda00e8 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -254,9 +254,9 @@ static int will_fetch(struct ref **head, const unsigned char *sha1) return 0; } -static void find_non_local_tags(struct transport *transport, - struct ref **head, - struct ref ***tail) +static void find_non_local_tags(const struct ref *refs, + struct ref **head, + struct ref ***tail) { struct string_list existing_refs = STRING_LIST_INIT_DUP; struct string_list remote_refs = STRING_LIST_INIT_NODUP; @@ -264,7 +264,7 @@ static void find_non_local_tags(struct transport *transport, struct string_list_item *item = NULL; for_each_ref(add_existing, &existing_refs); - for (ref = transport_get_remote_refs(transport, NULL); ref; ref = ref->next) { + for (ref = refs; ref; ref = ref->next) { if (!starts_with(ref->name, "refs/tags/")) continue; @@ -338,7 +338,8 @@ static void find_non_local_tags(struct transport *transport, string_list_clear(&remote_refs, 0); } -static struct ref *get_ref_map(struct transport *transport, +static struct ref *get_ref_map(struct remote *remote, + const struct ref *remote_refs, struct refspec *rs, int tags, int *autotags) { @@ -346,27 +347,11 @@ static struct ref *get_ref_map(struct transport *transport, struct ref *rm; struct ref *ref_map = NULL; struct ref **tail = &ref_map; - struct argv_array ref_prefixes = ARGV_ARRAY_INIT; /* opportunistically-updated references: */ struct ref *orefs = NULL, **oref_tail = &orefs; struct string_list existing_refs = STRING_LIST_INIT_DUP; - const struct ref *remote_refs; - - if (rs->nr) - refspec_ref_prefixes(rs, &ref_prefixes); - else if (transport->remote && transport->remote->fetch.nr) - refspec_ref_prefixes(&transport->remote->fetch, &ref_prefixes); - - if (ref_prefixes.argc && - (tags == TAGS_SET || (tags == TAGS_DEFAULT && !rs->nr))) { - argv_array_push(&ref_prefixes, "refs/tags/"); - } - - remote_refs = transport_get_remote_refs(transport, &ref_prefixes); - - argv_array_clear(&ref_prefixes); if (rs->nr) { struct refspec *fetch_refspec; @@ -403,7 +388,7 @@ static struct ref *get_ref_map(struct transport *transport, if (refmap.nr) fetch_refspec = &refmap; else - fetch_refspec = &transport->remote->fetch; + fetch_refspec = &remote->fetch; for (i = 0; i < fetch_refspec->nr; i++) get_fetch_map(ref_map, &fetch_refspec->items[i], &oref_tail, 1); @@ -411,7 +396,6 @@ static struct ref *get_ref_map(struct transport *transport, die("--refmap option is only meaningful with command-line refspec(s)."); } else { /* Use the defaults */ - struct remote *remote = transport->remote; struct branch *branch = branch_get(NULL); int has_merge = branch_has_merge_config(branch); if (remote && @@ -450,7 +434,7 @@ static struct ref *get_ref_map(struct transport *transport, /* also fetch all tags */ get_fetch_map(remote_refs, tag_refspec, &tail, 0); else if (tags == TAGS_DEFAULT && *autotags) - find_non_local_tags(transport, &ref_map, &tail); + find_non_local_tags(remote_refs, &ref_map, &tail); /* Now append any refs to be updated opportunistically: */ *tail = orefs; @@ -1143,6 +1127,8 @@ static int do_fetch(struct transport *transport, struct ref *ref_map; int autotags = (transport->remote->fetch_tags == 1); int retcode = 0; + const struct ref *remote_refs; + struct argv_array ref_prefixes = ARGV_ARRAY_INIT; if (tags == TAGS_DEFAULT) { if (transport->remote->fetch_tags == 2) @@ -1158,7 +1144,21 @@ static int do_fetch(struct transport *transport, goto cleanup; } - ref_map = get_ref_map(transport, rs, tags, &autotags); + if (rs->nr) + refspec_ref_prefixes(rs, &ref_prefixes); + else if (transport->remote && transport->remote->fetch.nr) + refspec_ref_prefixes(&transport->remote->fetch, &ref_prefixes); + + if (ref_prefixes.argc && + (tags == TAGS_SET || (tags == TAGS_DEFAULT && !rs->nr))) { + argv_array_push(&ref_prefixes, "refs/tags/"); + } + + remote_refs = transport_get_remote_refs(transport, &ref_prefixes); + argv_array_clear(&ref_prefixes); + + ref_map = get_ref_map(transport->remote, remote_refs, rs, + tags, &autotags); if (!update_head_ok) check_not_current_branch(ref_map); @@ -1190,7 +1190,7 @@ static int do_fetch(struct transport *transport, if (tags == TAGS_DEFAULT && autotags) { struct ref **tail = &ref_map; ref_map = NULL; - find_non_local_tags(transport, &ref_map, &tail); + find_non_local_tags(remote_refs, &ref_map, &tail); if (ref_map) backfill_tags(transport, ref_map); free_refs(ref_map); -- cgit v0.10.2-6-g49f6 From 989b8c4452f63f415c276df348defce6df613696 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Wed, 27 Jun 2018 15:30:22 -0700 Subject: fetch-pack: put shallow info in output parameter Expand the transport fetch method signature, by adding an output parameter, to allow transports to return information about the refs they have fetched. Then communicate shallow status information through this mechanism instead of by modifying the input list of refs. This does require clients to sometimes generate the ref map twice: once from the list of refs provided by the remote (as is currently done) and potentially once from the new list of refs that the fetch mechanism provides. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano diff --git a/builtin/clone.c b/builtin/clone.c index 99e73da..8f86d99 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1155,7 +1155,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) } if (!is_local && !complete_refs_before_fetch) - transport_fetch_refs(transport, mapped_refs); + transport_fetch_refs(transport, mapped_refs, NULL); remote_head = find_ref_by_name(refs, "HEAD"); remote_head_points_at = @@ -1197,7 +1197,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) if (is_local) clone_local(path, git_dir); else if (refs && complete_refs_before_fetch) - transport_fetch_refs(transport, mapped_refs); + transport_fetch_refs(transport, mapped_refs, NULL); update_remote_refs(refs, mapped_refs, remote_head_points_at, branch_top.buf, reflog_msg.buf, transport, diff --git a/builtin/fetch.c b/builtin/fetch.c index bda00e8..0347cf0 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -946,11 +946,13 @@ static int quickfetch(struct ref *ref_map) return check_connected(iterate_ref_map, &rm, &opt); } -static int fetch_refs(struct transport *transport, struct ref *ref_map) +static int fetch_refs(struct transport *transport, struct ref *ref_map, + struct ref **updated_remote_refs) { int ret = quickfetch(ref_map); if (ret) - ret = transport_fetch_refs(transport, ref_map); + ret = transport_fetch_refs(transport, ref_map, + updated_remote_refs); if (!ret) /* * Keep the new pack's ".keep" file around to allow the caller @@ -1112,7 +1114,7 @@ static void backfill_tags(struct transport *transport, struct ref *ref_map) transport_set_option(transport, TRANS_OPT_FOLLOWTAGS, NULL); transport_set_option(transport, TRANS_OPT_DEPTH, "0"); transport_set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, NULL); - if (!fetch_refs(transport, ref_map)) + if (!fetch_refs(transport, ref_map, NULL)) consume_refs(transport, ref_map); if (gsecondary) { @@ -1128,6 +1130,7 @@ static int do_fetch(struct transport *transport, int autotags = (transport->remote->fetch_tags == 1); int retcode = 0; const struct ref *remote_refs; + struct ref *updated_remote_refs = NULL; struct argv_array ref_prefixes = ARGV_ARRAY_INIT; if (tags == TAGS_DEFAULT) { @@ -1178,7 +1181,24 @@ static int do_fetch(struct transport *transport, transport->url); } } - if (fetch_refs(transport, ref_map) || consume_refs(transport, ref_map)) { + + if (fetch_refs(transport, ref_map, &updated_remote_refs)) { + free_refs(ref_map); + retcode = 1; + goto cleanup; + } + if (updated_remote_refs) { + /* + * Regenerate ref_map using the updated remote refs. This is + * to account for additional information which may be provided + * by the transport (e.g. shallow info). + */ + free_refs(ref_map); + ref_map = get_ref_map(transport->remote, updated_remote_refs, rs, + tags, &autotags); + free_refs(updated_remote_refs); + } + if (consume_refs(transport, ref_map)) { free_refs(ref_map); retcode = 1; goto cleanup; diff --git a/fetch-object.c b/fetch-object.c index 853624f..48fe63d 100644 --- a/fetch-object.c +++ b/fetch-object.c @@ -19,7 +19,7 @@ static void fetch_refs(const char *remote_name, struct ref *ref) transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); transport_set_option(transport, TRANS_OPT_NO_DEPENDENTS, "1"); - transport_fetch_refs(transport, ref); + transport_fetch_refs(transport, ref, NULL); fetch_if_missing = original_fetch_if_missing; } diff --git a/fetch-pack.c b/fetch-pack.c index a320ce9..73890b8 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1470,12 +1470,13 @@ static int remove_duplicates_in_refs(struct ref **ref, int nr) } static void update_shallow(struct fetch_pack_args *args, - struct ref **sought, int nr_sought, + struct ref *refs, struct shallow_info *si) { struct oid_array ref = OID_ARRAY_INIT; int *status; int i; + struct ref *r; if (args->deepen && alternate_shallow_file) { if (*alternate_shallow_file == '\0') { /* --unshallow */ @@ -1517,8 +1518,8 @@ static void update_shallow(struct fetch_pack_args *args, remove_nonexistent_theirs_shallow(si); if (!si->nr_ours && !si->nr_theirs) return; - for (i = 0; i < nr_sought; i++) - oid_array_append(&ref, &sought[i]->old_oid); + for (r = refs; r; r = r->next) + oid_array_append(&ref, &r->old_oid); si->ref = &ref; if (args->update_shallow) { @@ -1552,12 +1553,12 @@ static void update_shallow(struct fetch_pack_args *args, * remote is also shallow, check what ref is safe to update * without updating .git/shallow */ - status = xcalloc(nr_sought, sizeof(*status)); + status = xcalloc(ref.nr, sizeof(*status)); assign_shallow_commits_to_refs(si, NULL, status); if (si->nr_ours || si->nr_theirs) { - for (i = 0; i < nr_sought; i++) + for (r = refs, i = 0; r; r = r->next, i++) if (status[i]) - sought[i]->status = REF_STATUS_REJECT_SHALLOW; + r->status = REF_STATUS_REJECT_SHALLOW; } free(status); oid_array_clear(&ref); @@ -1591,7 +1592,7 @@ struct ref *fetch_pack(struct fetch_pack_args *args, ref_cpy = do_fetch_pack(args, fd, ref, sought, nr_sought, &si, pack_lockfile); reprepare_packed_git(the_repository); - update_shallow(args, sought, nr_sought, &si); + update_shallow(args, ref_cpy, &si); clear_shallow_info(&si); return ref_cpy; } diff --git a/transport-helper.c b/transport-helper.c index 1f8ff7e..8b5abca 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -651,14 +651,16 @@ static int connect_helper(struct transport *transport, const char *name, } static int fetch(struct transport *transport, - int nr_heads, struct ref **to_fetch) + int nr_heads, struct ref **to_fetch, + struct ref **fetched_refs) { struct helper_data *data = transport->data; int i, count; if (process_connect(transport, 0)) { do_take_over(transport); - return transport->vtable->fetch(transport, nr_heads, to_fetch); + return transport->vtable->fetch(transport, nr_heads, to_fetch, + fetched_refs); } count = 0; diff --git a/transport-internal.h b/transport-internal.h index 1cde625..eeb6c34 100644 --- a/transport-internal.h +++ b/transport-internal.h @@ -36,11 +36,18 @@ struct transport_vtable { * Fetch the objects for the given refs. Note that this gets * an array, and should ignore the list structure. * + * The transport *may* provide, in fetched_refs, the list of refs that + * it fetched. If the transport knows anything about the fetched refs + * that the caller does not know (for example, shallow status), it + * should provide that list of refs and include that information in the + * list. + * * If the transport did not get hashes for refs in * get_refs_list(), it should set the old_sha1 fields in the * provided refs now. **/ - int (*fetch)(struct transport *transport, int refs_nr, struct ref **refs); + int (*fetch)(struct transport *transport, int refs_nr, struct ref **refs, + struct ref **fetched_refs); /** * Push the objects and refs. Send the necessary objects, and diff --git a/transport.c b/transport.c index a32da30..39d8c2f 100644 --- a/transport.c +++ b/transport.c @@ -151,7 +151,8 @@ static struct ref *get_refs_from_bundle(struct transport *transport, } static int fetch_refs_from_bundle(struct transport *transport, - int nr_heads, struct ref **to_fetch) + int nr_heads, struct ref **to_fetch, + struct ref **fetched_refs) { struct bundle_transport_data *data = transport->data; return unbundle(&data->header, data->fd, @@ -287,7 +288,8 @@ static struct ref *get_refs_via_connect(struct transport *transport, int for_pus } static int fetch_refs_via_pack(struct transport *transport, - int nr_heads, struct ref **to_fetch) + int nr_heads, struct ref **to_fetch, + struct ref **fetched_refs) { int ret = 0; struct git_transport_data *data = transport->data; @@ -354,8 +356,12 @@ static int fetch_refs_via_pack(struct transport *transport, if (report_unmatched_refs(to_fetch, nr_heads)) ret = -1; + if (fetched_refs) + *fetched_refs = refs; + else + free_refs(refs); + free_refs(refs_tmp); - free_refs(refs); free(dest); return ret; } @@ -1215,19 +1221,31 @@ const struct ref *transport_get_remote_refs(struct transport *transport, return transport->remote_refs; } -int transport_fetch_refs(struct transport *transport, struct ref *refs) +int transport_fetch_refs(struct transport *transport, struct ref *refs, + struct ref **fetched_refs) { int rc; int nr_heads = 0, nr_alloc = 0, nr_refs = 0; struct ref **heads = NULL; + struct ref *nop_head = NULL, **nop_tail = &nop_head; struct ref *rm; for (rm = refs; rm; rm = rm->next) { nr_refs++; if (rm->peer_ref && !is_null_oid(&rm->old_oid) && - !oidcmp(&rm->peer_ref->old_oid, &rm->old_oid)) + !oidcmp(&rm->peer_ref->old_oid, &rm->old_oid)) { + /* + * These need to be reported as fetched, but we don't + * actually need to fetch them. + */ + if (fetched_refs) { + struct ref *nop_ref = copy_ref(rm); + *nop_tail = nop_ref; + nop_tail = &nop_ref->next; + } continue; + } ALLOC_GROW(heads, nr_heads + 1, nr_alloc); heads[nr_heads++] = rm; } @@ -1245,7 +1263,11 @@ int transport_fetch_refs(struct transport *transport, struct ref *refs) heads[nr_heads++] = rm; } - rc = transport->vtable->fetch(transport, nr_heads, heads); + rc = transport->vtable->fetch(transport, nr_heads, heads, fetched_refs); + if (fetched_refs && nop_head) { + *nop_tail = *fetched_refs; + *fetched_refs = nop_head; + } free(heads); return rc; diff --git a/transport.h b/transport.h index 7792b08..3dff767 100644 --- a/transport.h +++ b/transport.h @@ -218,7 +218,8 @@ int transport_push(struct transport *connection, const struct ref *transport_get_remote_refs(struct transport *transport, const struct argv_array *ref_prefixes); -int transport_fetch_refs(struct transport *transport, struct ref *refs); +int transport_fetch_refs(struct transport *transport, struct ref *refs, + struct ref **fetched_refs); void transport_unlock_pack(struct transport *transport); int transport_disconnect(struct transport *transport); char *transport_anonymize_url(const char *url); -- cgit v0.10.2-6-g49f6 From 733020517a1baa6f4f76bb7bf48d8d8d14eecd6c Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Wed, 27 Jun 2018 15:30:23 -0700 Subject: fetch-pack: implement ref-in-want Implement ref-in-want on the client side so that when a server supports the "ref-in-want" feature, a client will send "want-ref" lines for each reference the client wants to fetch. This feature allows clients to tolerate inconsistencies that exist when a remote repository's refs change during the course of negotiation. This allows a client to request to request a particular ref without specifying the OID of the ref. This means that instead of hitting an error when a ref no longer points at the OID it did at the beginning of negotiation, negotiation can continue and the value of that ref will be sent at the termination of negotiation, just before a packfile is sent. More information on the ref-in-want feature can be found in Documentation/technical/protocol-v2.txt. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano diff --git a/fetch-pack.c b/fetch-pack.c index 73890b8..0b4a9f2 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1102,9 +1102,10 @@ static void add_shallow_requests(struct strbuf *req_buf, static void add_wants(const struct ref *wants, struct strbuf *req_buf) { + int use_ref_in_want = server_supports_feature("fetch", "ref-in-want", 0); + for ( ; wants ; wants = wants->next) { const struct object_id *remote = &wants->old_oid; - const char *remote_hex; struct object *o; /* @@ -1122,8 +1123,10 @@ static void add_wants(const struct ref *wants, struct strbuf *req_buf) continue; } - remote_hex = oid_to_hex(remote); - packet_buf_write(req_buf, "want %s\n", remote_hex); + if (!use_ref_in_want || wants->exact_oid) + packet_buf_write(req_buf, "want %s\n", oid_to_hex(remote)); + else + packet_buf_write(req_buf, "want-ref %s\n", wants->name); } } @@ -1334,6 +1337,32 @@ static void receive_shallow_info(struct fetch_pack_args *args, args->deepen = 1; } +static void receive_wanted_refs(struct packet_reader *reader, struct ref *refs) +{ + process_section_header(reader, "wanted-refs", 0); + while (packet_reader_read(reader) == PACKET_READ_NORMAL) { + struct object_id oid; + const char *end; + struct ref *r = NULL; + + if (parse_oid_hex(reader->line, &oid, &end) || *end++ != ' ') + die("expected wanted-ref, got '%s'", reader->line); + + for (r = refs; r; r = r->next) { + if (!strcmp(end, r->name)) { + oidcpy(&r->old_oid, &oid); + break; + } + } + + if (!r) + die("unexpected wanted-ref: '%s'", reader->line); + } + + if (reader->status != PACKET_READ_DELIM) + die("error processing wanted refs: %d", reader->status); +} + enum fetch_state { FETCH_CHECK_LOCAL = 0, FETCH_SEND_REQUEST, @@ -1408,6 +1437,9 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, if (process_section_header(&reader, "shallow-info", 1)) receive_shallow_info(args, &reader); + if (process_section_header(&reader, "wanted-refs", 1)) + receive_wanted_refs(&reader, ref); + /* get the pack */ process_section_header(&reader, "packfile", 0); if (get_pack(args, fd, pack_lockfile)) diff --git a/remote.c b/remote.c index abe80c1..2c2376f 100644 --- a/remote.c +++ b/remote.c @@ -1735,6 +1735,7 @@ int get_fetch_map(const struct ref *remote_refs, if (refspec->exact_sha1) { ref_map = alloc_ref(name); get_oid_hex(name, &ref_map->old_oid); + ref_map->exact_oid = 1; } else { ref_map = get_remote_ref(remote_refs, name); } diff --git a/remote.h b/remote.h index 45ecc6c..9762921 100644 --- a/remote.h +++ b/remote.h @@ -73,6 +73,7 @@ struct ref { force:1, forced_update:1, expect_old_sha1:1, + exact_oid:1, deletion:1; enum { diff --git a/t/t5703-upload-pack-ref-in-want.sh b/t/t5703-upload-pack-ref-in-want.sh index 32527a5..a73c55a 100755 --- a/t/t5703-upload-pack-ref-in-want.sh +++ b/t/t5703-upload-pack-ref-in-want.sh @@ -211,6 +211,18 @@ test_expect_success 'server is initially ahead - no ref in want' ' grep "ERR upload-pack: not our ref" err ' +test_expect_success 'server is initially ahead - ref in want' ' + git -C "$REPO" config uploadpack.allowRefInWant true && + rm -rf local && + cp -r "$LOCAL_PRISTINE" local && + inconsistency master 1234567890123456789012345678901234567890 && + git -C local fetch && + + git -C "$REPO" rev-parse --verify master >expected && + git -C local rev-parse --verify refs/remotes/origin/master >actual && + test_cmp expected actual +' + test_expect_success 'server is initially behind - no ref in want' ' git -C "$REPO" config uploadpack.allowRefInWant false && rm -rf local && @@ -223,6 +235,143 @@ test_expect_success 'server is initially behind - no ref in want' ' test_cmp expected actual ' +test_expect_success 'server is initially behind - ref in want' ' + git -C "$REPO" config uploadpack.allowRefInWant true && + rm -rf local && + cp -r "$LOCAL_PRISTINE" local && + inconsistency master "master^" && + git -C local fetch && + + git -C "$REPO" rev-parse --verify "master" >expected && + git -C local rev-parse --verify refs/remotes/origin/master >actual && + test_cmp expected actual +' + +test_expect_success 'server loses a ref - ref in want' ' + git -C "$REPO" config uploadpack.allowRefInWant true && + rm -rf local && + cp -r "$LOCAL_PRISTINE" local && + echo "s/master/raster/" >"$HTTPD_ROOT_PATH/one-time-sed" && + test_must_fail git -C local fetch 2>err && + + grep "ERR unknown ref refs/heads/raster" err +' + stop_httpd +REPO="$(pwd)/repo" +LOCAL_PRISTINE="$(pwd)/local_pristine" + +# $REPO +# c(o/foo) d(o/bar) +# \ / +# b e(baz) f(master) +# \__ | __/ +# \ | / +# a +# +# $LOCAL_PRISTINE +# s32(side) +# | +# . +# . +# | +# a(master) +test_expect_success 'setup repos for fetching with ref-in-want tests' ' + ( + git init "$REPO" && + cd "$REPO" && + test_commit a && + + # Local repo with many commits (so that negotiation will take + # more than 1 request/response pair) + rm -rf "$LOCAL_PRISTINE" && + git clone "file://$REPO" "$LOCAL_PRISTINE" && + cd "$LOCAL_PRISTINE" && + git checkout -b side && + for i in $(seq 1 33); do test_commit s$i; done && + + # Add novel commits to upstream + git checkout master && + cd "$REPO" && + git checkout -b o/foo && + test_commit b && + test_commit c && + git checkout -b o/bar b && + test_commit d && + git checkout -b baz a && + test_commit e && + git checkout master && + test_commit f + ) && + git -C "$REPO" config uploadpack.allowRefInWant true && + git -C "$LOCAL_PRISTINE" config protocol.version 2 +' + +test_expect_success 'fetching with exact OID' ' + test_when_finished "rm -f log" && + + rm -rf local && + cp -r "$LOCAL_PRISTINE" local && + GIT_TRACE_PACKET="$(pwd)/log" git -C local fetch origin \ + $(git -C "$REPO" rev-parse d):refs/heads/actual && + + git -C "$REPO" rev-parse "d" >expected && + git -C local rev-parse refs/heads/actual >actual && + test_cmp expected actual && + grep "want $(git -C "$REPO" rev-parse d)" log +' + +test_expect_success 'fetching multiple refs' ' + test_when_finished "rm -f log" && + + rm -rf local && + cp -r "$LOCAL_PRISTINE" local && + GIT_TRACE_PACKET="$(pwd)/log" git -C local fetch origin master baz && + + git -C "$REPO" rev-parse "master" "baz" >expected && + git -C local rev-parse refs/remotes/origin/master refs/remotes/origin/baz >actual && + test_cmp expected actual && + grep "want-ref refs/heads/master" log && + grep "want-ref refs/heads/baz" log +' + +test_expect_success 'fetching ref and exact OID' ' + test_when_finished "rm -f log" && + + rm -rf local && + cp -r "$LOCAL_PRISTINE" local && + GIT_TRACE_PACKET="$(pwd)/log" git -C local fetch origin \ + master $(git -C "$REPO" rev-parse b):refs/heads/actual && + + git -C "$REPO" rev-parse "master" "b" >expected && + git -C local rev-parse refs/remotes/origin/master refs/heads/actual >actual && + test_cmp expected actual && + grep "want $(git -C "$REPO" rev-parse b)" log && + grep "want-ref refs/heads/master" log +' + +test_expect_success 'fetching with wildcard that does not match any refs' ' + test_when_finished "rm -f log" && + + rm -rf local && + cp -r "$LOCAL_PRISTINE" local && + git -C local fetch origin refs/heads/none*:refs/heads/* >out && + test_must_be_empty out +' + +test_expect_success 'fetching with wildcard that matches multiple refs' ' + test_when_finished "rm -f log" && + + rm -rf local && + cp -r "$LOCAL_PRISTINE" local && + GIT_TRACE_PACKET="$(pwd)/log" git -C local fetch origin refs/heads/o*:refs/heads/o* && + + git -C "$REPO" rev-parse "o/foo" "o/bar" >expected && + git -C local rev-parse "o/foo" "o/bar" >actual && + test_cmp expected actual && + grep "want-ref refs/heads/o/foo" log && + grep "want-ref refs/heads/o/bar" log +' + test_done -- cgit v0.10.2-6-g49f6 From cf1e7c07705eb21c30d0ee414810e7bc8fdf7d82 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Mon, 2 Jul 2018 15:08:43 -0700 Subject: fetch-pack: write shallow, then check connectivity When fetching, connectivity is checked after the shallow file is updated. There are 2 issues with this: (1) the connectivity check is only performed up to ancestors of existing refs (which is not thorough enough if we were deepening an existing ref in the first place), and (2) there is no rollback of the shallow file if the connectivity check fails. To solve (1), update the connectivity check to check the ancestry chain completely in the case of a deepening fetch by refraining from passing "--not --all" when invoking rev-list in connected.c. To solve (2), have fetch_pack() perform its own connectivity check before updating the shallow file. To support existing use cases in which "git fetch-pack" is used to download objects without much regard as to the connectivity of the resulting objects with respect to the existing repository, the connectivity check is only done if necessary (that is, the fetch is not a clone, and the fetch involves shallow/deepen functionality). "git fetch" still performs its own connectivity check, preserving correctness but sometimes performing redundant work. This redundancy is mitigated by the fact that fetch_pack() reports if it has performed a connectivity check itself, and if the transport supports connect or stateless-connect, it will bubble up that report so that "git fetch" knows not to perform the connectivity check in such a case. This was noticed when a user tried to deepen an existing repository by fetching with --no-shallow from a server that did not send all necessary objects - the connectivity check as run by "git fetch" succeeded, but a subsequent "git fsck" failed. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano diff --git a/builtin/fetch.c b/builtin/fetch.c index 0347cf0..d4b2767 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -769,7 +769,7 @@ static int iterate_ref_map(void *cb_data, struct object_id *oid) } static int store_updated_refs(const char *raw_url, const char *remote_name, - struct ref *ref_map) + int connectivity_checked, struct ref *ref_map) { FILE *fp; struct commit *commit; @@ -791,10 +791,12 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, else url = xstrdup("foreign"); - rm = ref_map; - if (check_connected(iterate_ref_map, &rm, NULL)) { - rc = error(_("%s did not send all necessary objects\n"), url); - goto abort; + if (!connectivity_checked) { + rm = ref_map; + if (check_connected(iterate_ref_map, &rm, NULL)) { + rc = error(_("%s did not send all necessary objects\n"), url); + goto abort; + } } prepare_format_display(ref_map); @@ -966,8 +968,11 @@ static int fetch_refs(struct transport *transport, struct ref *ref_map, /* Update local refs based on the ref values fetched from a remote */ static int consume_refs(struct transport *transport, struct ref *ref_map) { + int connectivity_checked = transport->smart_options + ? transport->smart_options->connectivity_checked : 0; int ret = store_updated_refs(transport->url, transport->remote->name, + connectivity_checked, ref_map); transport_unlock_pack(transport); return ret; diff --git a/connected.c b/connected.c index 91feb78..1bba888 100644 --- a/connected.c +++ b/connected.c @@ -58,8 +58,10 @@ int check_connected(oid_iterate_fn fn, void *cb_data, argv_array_push(&rev_list.args, "--stdin"); if (repository_format_partial_clone) argv_array_push(&rev_list.args, "--exclude-promisor-objects"); - argv_array_push(&rev_list.args, "--not"); - argv_array_push(&rev_list.args, "--all"); + if (!opt->is_deepening_fetch) { + argv_array_push(&rev_list.args, "--not"); + argv_array_push(&rev_list.args, "--all"); + } argv_array_push(&rev_list.args, "--quiet"); if (opt->progress) argv_array_pushf(&rev_list.args, "--progress=%s", diff --git a/connected.h b/connected.h index a53f03a..322dc76 100644 --- a/connected.h +++ b/connected.h @@ -38,6 +38,13 @@ struct check_connected_options { * Insert these variables into the environment of the child process. */ const char **env; + + /* + * If non-zero, check the ancestry chain completely, not stopping at + * any existing ref. This is necessary when deepening existing refs + * during a fetch. + */ + unsigned is_deepening_fetch : 1; }; #define CHECK_CONNECTED_INIT { 0 } diff --git a/fetch-pack.c b/fetch-pack.c index 0b4a9f2..60bbffb 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -19,6 +19,7 @@ #include "sha1-array.h" #include "oidset.h" #include "packfile.h" +#include "connected.h" static int transfer_unpack_limit = -1; static int fetch_unpack_limit = -1; @@ -1596,6 +1597,18 @@ static void update_shallow(struct fetch_pack_args *args, oid_array_clear(&ref); } +static int iterate_ref_map(void *cb_data, struct object_id *oid) +{ + struct ref **rm = cb_data; + struct ref *ref = *rm; + + if (!ref) + return -1; /* end of the list */ + *rm = ref->next; + oidcpy(oid, &ref->old_oid); + return 0; +} + struct ref *fetch_pack(struct fetch_pack_args *args, int fd[], struct child_process *conn, const struct ref *ref, @@ -1624,7 +1637,25 @@ struct ref *fetch_pack(struct fetch_pack_args *args, ref_cpy = do_fetch_pack(args, fd, ref, sought, nr_sought, &si, pack_lockfile); reprepare_packed_git(the_repository); + + if (!args->cloning && args->deepen) { + struct check_connected_options opt = CHECK_CONNECTED_INIT; + struct ref *iterator = ref_cpy; + opt.shallow_file = alternate_shallow_file; + if (args->deepen) + opt.is_deepening_fetch = 1; + if (check_connected(iterate_ref_map, &iterator, &opt)) { + error(_("remote did not send all necessary objects")); + free_refs(ref_cpy); + ref_cpy = NULL; + rollback_lock_file(&shallow_lock); + goto cleanup; + } + args->connectivity_checked = 1; + } + update_shallow(args, ref_cpy, &si); +cleanup: clear_shallow_info(&si); return ref_cpy; } diff --git a/fetch-pack.h b/fetch-pack.h index bb45a36..2160be9 100644 --- a/fetch-pack.h +++ b/fetch-pack.h @@ -41,6 +41,21 @@ struct fetch_pack_args { * regardless of which object flags it uses (if any). */ unsigned no_dependents:1; + + /* + * Because fetch_pack() overwrites the shallow file upon a + * successful deepening non-clone fetch, if this struct + * specifies such a fetch, fetch_pack() needs to perform a + * connectivity check before deciding if a fetch is successful + * (and overwriting the shallow file). fetch_pack() sets this + * field to 1 if such a connectivity check was performed. + * + * This is different from check_self_contained_and_connected + * in that the former allows existing objects in the + * repository to satisfy connectivity needs, whereas the + * latter doesn't. + */ + unsigned connectivity_checked:1; }; /* diff --git a/t/t5537-fetch-shallow.sh b/t/t5537-fetch-shallow.sh index df8d2f0..a7afb66 100755 --- a/t/t5537-fetch-shallow.sh +++ b/t/t5537-fetch-shallow.sh @@ -186,4 +186,47 @@ EOF test_cmp expect actual ' +. "$TEST_DIRECTORY"/lib-httpd.sh +start_httpd + +REPO="$HTTPD_DOCUMENT_ROOT_PATH/repo" + +test_expect_success 'shallow fetches check connectivity before writing shallow file' ' + rm -rf "$REPO" client && + + git init "$REPO" && + test_commit -C "$REPO" one && + test_commit -C "$REPO" two && + test_commit -C "$REPO" three && + + git init client && + + # Use protocol v2 to ensure that shallow information is sent exactly + # once by the server, since we are planning to manipulate it. + git -C "$REPO" config protocol.version 2 && + git -C client config protocol.version 2 && + + git -C client fetch --depth=2 "$HTTPD_URL/one_time_sed/repo" master:a_branch && + + # Craft a situation in which the server sends back an unshallow request + # with an empty packfile. This is done by refetching with a shorter + # depth (to ensure that the packfile is empty), and overwriting the + # shallow line in the response with the unshallow line we want. + printf "s/0034shallow %s/0036unshallow %s/" \ + "$(git -C "$REPO" rev-parse HEAD)" \ + "$(git -C "$REPO" rev-parse HEAD^)" \ + >"$HTTPD_ROOT_PATH/one-time-sed" && + test_must_fail git -C client fetch --depth=1 "$HTTPD_URL/one_time_sed/repo" \ + master:a_branch && + + # Ensure that the one-time-sed script was used. + ! test -e "$HTTPD_ROOT_PATH/one-time-sed" && + + # Ensure that the resulting repo is consistent, despite our failure to + # fetch. + git -C client fsck +' + +stop_httpd + test_done diff --git a/transport.c b/transport.c index 39d8c2f..fdd813f 100644 --- a/transport.c +++ b/transport.c @@ -350,6 +350,7 @@ static int fetch_refs_via_pack(struct transport *transport, data->got_remote_heads = 0; data->options.self_contained_and_connected = args.self_contained_and_connected; + data->options.connectivity_checked = args.connectivity_checked; if (refs == NULL) ret = -1; diff --git a/transport.h b/transport.h index 3dff767..7a9a7fc 100644 --- a/transport.h +++ b/transport.h @@ -18,6 +18,17 @@ struct git_transport_options { unsigned deepen_relative : 1; unsigned from_promisor : 1; unsigned no_dependents : 1; + + /* + * If this transport supports connect or stateless-connect, + * the corresponding field in struct fetch_pack_args is copied + * here after fetching. + * + * See the definition of connectivity_checked in struct + * fetch_pack_args for more information. + */ + unsigned connectivity_checked:1; + int depth; const char *deepen_since; const struct string_list *deepen_not; -- cgit v0.10.2-6-g49f6