From 10ac85c785afed4163ed931149a87b30c7c5eaf9 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 8 Dec 2017 15:58:39 +0000 Subject: upload-pack: add object filtering for partial clone Teach upload-pack to negotiate object filtering over the protocol and to send filter parameters to pack-objects. This is intended for partial clone and fetch. The idea to make upload-pack configurable using uploadpack.allowFilter comes from Jonathan Tan's work in [1]. [1] https://public-inbox.org/git/f211093280b422c32cc1b7034130072f35c5ed51.1506714999.git.jonathantanmy@google.com/ Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano diff --git a/Documentation/config.txt b/Documentation/config.txt index 1ac0ae6..e528210 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -3268,6 +3268,10 @@ uploadpack.packObjectsHook:: was run. I.e., `upload-pack` will feed input intended for `pack-objects` to the hook, and expects a completed packfile on stdout. + +uploadpack.allowFilter:: + If this option is set, `upload-pack` will advertise partial + clone and partial fetch object filtering. + Note that this configuration variable is ignored if it is seen in the repository-level config (this is a safety measure against fetching from diff --git a/Documentation/technical/pack-protocol.txt b/Documentation/technical/pack-protocol.txt index ed1eae8..a43a113 100644 --- a/Documentation/technical/pack-protocol.txt +++ b/Documentation/technical/pack-protocol.txt @@ -212,6 +212,7 @@ out of what the server said it could do with the first 'want' line. upload-request = want-list *shallow-line *1depth-request + [filter-request] flush-pkt want-list = first-want @@ -227,6 +228,8 @@ out of what the server said it could do with the first 'want' line. additional-want = PKT-LINE("want" SP obj-id) depth = 1*DIGIT + + filter-request = PKT-LINE("filter" SP filter-spec) ---- Clients MUST send all the obj-ids it wants from the reference @@ -249,6 +252,11 @@ complete those commits. Commits whose parents are not received as a result are defined as shallow and marked as such in the server. This information is sent back to the client in the next step. +The client can optionally request that pack-objects omit various +objects from the packfile using one of several filtering techniques. +These are intended for use with partial clone and partial fetch +operations. See `rev-list` for possible "filter-spec" values. + Once all the 'want's and 'shallow's (and optional 'deepen') are transferred, clients MUST send a flush-pkt, to tell the server side that it is done sending the list. diff --git a/Documentation/technical/protocol-capabilities.txt b/Documentation/technical/protocol-capabilities.txt index 26dcc6f..332d209 100644 --- a/Documentation/technical/protocol-capabilities.txt +++ b/Documentation/technical/protocol-capabilities.txt @@ -309,3 +309,11 @@ to accept a signed push certificate, and asks the to be included in the push certificate. A send-pack client MUST NOT send a push-cert packet unless the receive-pack server advertises this capability. + +filter +------ + +If the upload-pack server advertises the 'filter' capability, +fetch-pack may send "filter" commands to request a partial clone +or partial fetch and request that the server omit various objects +from the packfile. diff --git a/upload-pack.c b/upload-pack.c index e25f725..e6d38b9 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -10,6 +10,8 @@ #include "diff.h" #include "revision.h" #include "list-objects.h" +#include "list-objects-filter.h" +#include "list-objects-filter-options.h" #include "run-command.h" #include "connect.h" #include "sigchain.h" @@ -18,6 +20,7 @@ #include "parse-options.h" #include "argv-array.h" #include "prio-queue.h" +#include "quote.h" static const char * const upload_pack_usage[] = { N_("git upload-pack [] "), @@ -64,6 +67,10 @@ static int advertise_refs; static int stateless_rpc; static const char *pack_objects_hook; +static int filter_capability_requested; +static int filter_advertise; +static struct list_objects_filter_options filter_options; + static void reset_timeout(void) { alarm(timeout); @@ -131,6 +138,12 @@ static void create_pack_file(void) argv_array_push(&pack_objects.args, "--delta-base-offset"); if (use_include_tag) argv_array_push(&pack_objects.args, "--include-tag"); + if (filter_options.filter_spec) { + struct strbuf buf = STRBUF_INIT; + sq_quote_buf(&buf, filter_options.filter_spec); + argv_array_pushf(&pack_objects.args, "--filter=%s", buf.buf); + strbuf_release(&buf); + } pack_objects.in = -1; pack_objects.out = -1; @@ -794,6 +807,12 @@ static void receive_needs(void) deepen_rev_list = 1; continue; } + if (skip_prefix(line, "filter ", &arg)) { + if (!filter_capability_requested) + die("git upload-pack: filtering capability not negotiated"); + parse_list_objects_filter(&filter_options, arg); + continue; + } if (!skip_prefix(line, "want ", &arg) || get_oid_hex(arg, &oid_buf)) die("git upload-pack: protocol error, " @@ -821,6 +840,8 @@ static void receive_needs(void) no_progress = 1; if (parse_feature_request(features, "include-tag")) use_include_tag = 1; + if (parse_feature_request(features, "filter")) + filter_capability_requested = 1; o = parse_object(&oid_buf); if (!o) { @@ -940,7 +961,7 @@ static int send_ref(const char *refname, const struct object_id *oid, struct strbuf symref_info = STRBUF_INIT; format_symref_info(&symref_info, cb_data); - packet_write_fmt(1, "%s %s%c%s%s%s%s%s agent=%s\n", + packet_write_fmt(1, "%s %s%c%s%s%s%s%s%s agent=%s\n", oid_to_hex(oid), refname_nons, 0, capabilities, (allow_unadvertised_object_request & ALLOW_TIP_SHA1) ? @@ -949,6 +970,7 @@ static int send_ref(const char *refname, const struct object_id *oid, " allow-reachable-sha1-in-want" : "", stateless_rpc ? " no-done" : "", symref_info.buf, + filter_advertise ? " filter" : "", git_user_agent_sanitized()); strbuf_release(&symref_info); } else { @@ -1027,6 +1049,8 @@ static int upload_pack_config(const char *var, const char *value, void *unused) } else if (current_config_scope() != CONFIG_SCOPE_REPO) { if (!strcmp("uploadpack.packobjectshook", var)) return git_config_string(&pack_objects_hook, var, value); + } else if (!strcmp("uploadpack.allowfilter", var)) { + filter_advertise = git_config_bool(var, value); } return parse_hide_refs_config(var, value, "uploadpack"); } -- cgit v0.10.2-6-g49f6 From 640d8b72feaae0b96d5faa663ad624963e416d54 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 8 Dec 2017 15:58:40 +0000 Subject: fetch-pack, index-pack, transport: partial clone Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index 15eeed7..7957807 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -153,6 +153,10 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) args.no_dependents = 1; continue; } + if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) { + parse_list_objects_filter(&args.filter_options, arg); + continue; + } usage(fetch_pack_usage); } if (deepen_not.nr) diff --git a/fetch-pack.c b/fetch-pack.c index 0798e0b..3c5f064 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -29,6 +29,7 @@ static int deepen_not_ok; static int fetch_fsck_objects = -1; static int transfer_fsck_objects = -1; static int agent_supported; +static int server_supports_filtering; static struct lock_file shallow_lock; static const char *alternate_shallow_file; @@ -379,6 +380,8 @@ static int find_common(struct fetch_pack_args *args, if (deepen_not_ok) strbuf_addstr(&c, " deepen-not"); if (agent_supported) strbuf_addf(&c, " agent=%s", git_user_agent_sanitized()); + if (args->filter_options.choice) + strbuf_addstr(&c, " filter"); packet_buf_write(&req_buf, "want %s%s\n", remote_hex, c.buf); strbuf_release(&c); } else @@ -407,6 +410,9 @@ static int find_common(struct fetch_pack_args *args, packet_buf_write(&req_buf, "deepen-not %s", s->string); } } + if (server_supports_filtering && args->filter_options.choice) + packet_buf_write(&req_buf, "filter %s", + args->filter_options.filter_spec); packet_buf_flush(&req_buf); state_len = req_buf.len; @@ -969,6 +975,13 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, else prefer_ofs_delta = 0; + if (server_supports("filter")) { + server_supports_filtering = 1; + print_verbose(args, _("Server supports filter")); + } else if (args->filter_options.choice) { + warning("filtering not recognized by server, ignoring"); + } + if ((agent_feature = server_feature_value("agent", &agent_len))) { agent_supported = 1; if (agent_len) diff --git a/fetch-pack.h b/fetch-pack.h index aeac152..3e224a1 100644 --- a/fetch-pack.h +++ b/fetch-pack.h @@ -3,6 +3,7 @@ #include "string-list.h" #include "run-command.h" +#include "list-objects-filter-options.h" struct oid_array; @@ -12,6 +13,7 @@ struct fetch_pack_args { int depth; const char *deepen_since; const struct string_list *deepen_not; + struct list_objects_filter_options filter_options; unsigned deepen_relative:1; unsigned quiet:1; unsigned keep_pack:1; diff --git a/transport-helper.c b/transport-helper.c index c948d52..0650ca0 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -671,6 +671,11 @@ static int fetch(struct transport *transport, if (data->transport_options.update_shallow) set_helper_option(transport, "update-shallow", "true"); + if (data->transport_options.filter_options.choice) + set_helper_option( + transport, "filter", + data->transport_options.filter_options.filter_spec); + if (data->fetch) return fetch_with_fetch(transport, nr_heads, to_fetch); diff --git a/transport.c b/transport.c index f2fbc6f..cce50f5 100644 --- a/transport.c +++ b/transport.c @@ -166,6 +166,9 @@ static int set_git_option(struct git_transport_options *opts, } else if (!strcmp(name, TRANS_OPT_NO_DEPENDENTS)) { opts->no_dependents = !!value; return 0; + } else if (!strcmp(name, TRANS_OPT_LIST_OBJECTS_FILTER)) { + parse_list_objects_filter(&opts->filter_options, value); + return 0; } return 1; } @@ -236,6 +239,7 @@ static int fetch_refs_via_pack(struct transport *transport, args.update_shallow = data->options.update_shallow; args.from_promisor = data->options.from_promisor; args.no_dependents = data->options.no_dependents; + args.filter_options = data->options.filter_options; if (!data->got_remote_heads) { connect_setup(transport, 0); diff --git a/transport.h b/transport.h index c49a8ed..31b1936 100644 --- a/transport.h +++ b/transport.h @@ -4,6 +4,7 @@ #include "cache.h" #include "run-command.h" #include "remote.h" +#include "list-objects-filter-options.h" struct string_list; @@ -23,6 +24,7 @@ struct git_transport_options { const char *uploadpack; const char *receivepack; struct push_cas_option *cas; + struct list_objects_filter_options filter_options; }; enum transport_family { @@ -221,6 +223,9 @@ void transport_check_allowed(const char *type); */ #define TRANS_OPT_NO_DEPENDENTS "no-dependents" +/* Filter objects for partial clone and fetch */ +#define TRANS_OPT_LIST_OBJECTS_FILTER "filter" + /** * Returns 0 if the option was used, non-zero otherwise. Prints a * message to stderr if the option is not used. -- cgit v0.10.2-6-g49f6 From bc2d0c3396fad7b7064c6e9599e6321742538aa0 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 8 Dec 2017 15:58:41 +0000 Subject: fetch-pack: add --no-filter Fixup fetch-pack to accept --no-filter to be consistent with rev-list and pack-objects. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index 7957807..cbf5035 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -157,6 +157,10 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) parse_list_objects_filter(&args.filter_options, arg); continue; } + if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) { + list_objects_filter_release(&args.filter_options); + continue; + } usage(fetch_pack_usage); } if (deepen_not.nr) -- cgit v0.10.2-6-g49f6 From 0b6069fe0a1e694d632e54a43fc6e546e83edad6 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Fri, 8 Dec 2017 15:58:42 +0000 Subject: fetch-pack: test support excluding large blobs Created tests to verify fetch-pack and upload-pack support for excluding large blobs using --filter=blobs:limit= parameter. Signed-off-by: Jonathan Tan Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano diff --git a/t/t5500-fetch-pack.sh b/t/t5500-fetch-pack.sh index 80a1a32..c57916b 100755 --- a/t/t5500-fetch-pack.sh +++ b/t/t5500-fetch-pack.sh @@ -755,4 +755,31 @@ test_expect_success 'fetching deepen' ' ) ' +test_expect_success 'filtering by size' ' + rm -rf server client && + test_create_repo server && + test_commit -C server one && + test_config -C server uploadpack.allowfilter 1 && + + test_create_repo client && + git -C client fetch-pack --filter=blob:limit=0 ../server HEAD && + + # Ensure that object is not inadvertently fetched + test_must_fail git -C client cat-file -e $(git hash-object server/one.t) +' + +test_expect_success 'filtering by size has no effect if support for it is not advertised' ' + rm -rf server client && + test_create_repo server && + test_commit -C server one && + + test_create_repo client && + git -C client fetch-pack --filter=blob:limit=0 ../server HEAD 2> err && + + # Ensure that object is fetched + git -C client cat-file -e $(git hash-object server/one.t) && + + test_i18ngrep "filtering not recognized by server" err +' + test_done diff --git a/upload-pack.c b/upload-pack.c index e6d38b9..15b6605 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -139,10 +139,15 @@ static void create_pack_file(void) if (use_include_tag) argv_array_push(&pack_objects.args, "--include-tag"); if (filter_options.filter_spec) { - struct strbuf buf = STRBUF_INIT; - sq_quote_buf(&buf, filter_options.filter_spec); - argv_array_pushf(&pack_objects.args, "--filter=%s", buf.buf); - strbuf_release(&buf); + if (pack_objects.use_shell) { + struct strbuf buf = STRBUF_INIT; + sq_quote_buf(&buf, filter_options.filter_spec); + argv_array_pushf(&pack_objects.args, "--filter=%s", buf.buf); + strbuf_release(&buf); + } else { + argv_array_pushf(&pack_objects.args, "--filter=%s", + filter_options.filter_spec); + } } pack_objects.in = -1; -- cgit v0.10.2-6-g49f6 From a1743343f410290578fbd6e0ada50b8cdf1e7df8 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Fri, 8 Dec 2017 15:58:43 +0000 Subject: fetch: refactor calculation of remote list Separate out the calculation of remotes to be fetched from and the actual fetching. This will allow us to include an additional step before the actual fetching in a subsequent commit. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano diff --git a/builtin/fetch.c b/builtin/fetch.c index 225c734..1b1f039 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1322,7 +1322,7 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) { int i; struct string_list list = STRING_LIST_INIT_DUP; - struct remote *remote; + struct remote *remote = NULL; int result = 0; struct argv_array argv_gc_auto = ARGV_ARRAY_INIT; @@ -1367,17 +1367,14 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) else if (argc > 1) die(_("fetch --all does not make sense with refspecs")); (void) for_each_remote(get_one_remote_for_fetch, &list); - result = fetch_multiple(&list); } else if (argc == 0) { /* No arguments -- use default remote */ remote = remote_get(NULL); - result = fetch_one(remote, argc, argv); } else if (multiple) { /* All arguments are assumed to be remotes or groups */ for (i = 0; i < argc; i++) if (!add_remote_or_group(argv[i], &list)) die(_("No such remote or remote group: %s"), argv[i]); - result = fetch_multiple(&list); } else { /* Single remote or group */ (void) add_remote_or_group(argv[0], &list); @@ -1385,14 +1382,19 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) /* More than one remote */ if (argc > 1) die(_("Fetching a group and specifying refspecs does not make sense")); - result = fetch_multiple(&list); } else { /* Zero or one remotes */ remote = remote_get(argv[0]); - result = fetch_one(remote, argc-1, argv+1); + argc--; + argv++; } } + if (remote) + result = fetch_one(remote, argc, argv); + else + result = fetch_multiple(&list); + if (!result && (recurse_submodules != RECURSE_SUBMODULES_OFF)) { struct argv_array options = ARGV_ARRAY_INIT; -- cgit v0.10.2-6-g49f6 From acb0c57260aa78fc99939de2a27c48b5a3fb4f21 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 8 Dec 2017 15:58:44 +0000 Subject: fetch: support filters Teach fetch to support filters. This is only allowed for the remote configured in extensions.partialcloneremote. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano diff --git a/builtin/fetch.c b/builtin/fetch.c index 1b1f039..14aab71 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -18,6 +18,7 @@ #include "argv-array.h" #include "utf8.h" #include "packfile.h" +#include "list-objects-filter-options.h" static const char * const builtin_fetch_usage[] = { N_("git fetch [] [ [...]]"), @@ -55,6 +56,7 @@ static int recurse_submodules_default = RECURSE_SUBMODULES_ON_DEMAND; static int shown_url = 0; static int refmap_alloc, refmap_nr; static const char **refmap_array; +static struct list_objects_filter_options filter_options; static int git_fetch_config(const char *k, const char *v, void *cb) { @@ -160,6 +162,7 @@ static struct option builtin_fetch_options[] = { TRANSPORT_FAMILY_IPV4), OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"), TRANSPORT_FAMILY_IPV6), + OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), OPT_END() }; @@ -1044,6 +1047,11 @@ static struct transport *prepare_transport(struct remote *remote, int deepen) set_option(transport, TRANS_OPT_DEEPEN_RELATIVE, "yes"); if (update_shallow) set_option(transport, TRANS_OPT_UPDATE_SHALLOW, "yes"); + if (filter_options.choice) { + set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, + filter_options.filter_spec); + set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); + } return transport; } @@ -1328,6 +1336,8 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) packet_trace_identity("fetch"); + fetch_if_missing = 0; + /* Record the command line for the reflog */ strbuf_addstr(&default_rla, "fetch"); for (i = 1; i < argc; i++) @@ -1361,6 +1371,9 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) if (depth || deepen_since || deepen_not.nr) deepen = 1; + if (filter_options.choice && !repository_format_partial_clone) + die("--filter can only be used when extensions.partialClone is set"); + if (all) { if (argc == 1) die(_("fetch --all does not take a repository argument")); @@ -1390,10 +1403,16 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) } } - if (remote) + if (remote) { + if (filter_options.choice && + strcmp(remote->name, repository_format_partial_clone)) + die(_("--filter can only be used with the remote configured in core.partialClone")); result = fetch_one(remote, argc, argv); - else + } else { + if (filter_options.choice) + die(_("--filter can only be used with the remote configured in core.partialClone")); result = fetch_multiple(&list); + } if (!result && (recurse_submodules != RECURSE_SUBMODULES_OFF)) { struct argv_array options = ARGV_ARRAY_INIT; diff --git a/connected.c b/connected.c index f416b05..3a5bd67 100644 --- a/connected.c +++ b/connected.c @@ -56,6 +56,8 @@ int check_connected(sha1_iterate_fn fn, void *cb_data, argv_array_push(&rev_list.args,"rev-list"); argv_array_push(&rev_list.args, "--objects"); argv_array_push(&rev_list.args, "--stdin"); + if (repository_format_partial_clone) + argv_array_push(&rev_list.args, "--exclude-promisor-objects"); argv_array_push(&rev_list.args, "--not"); argv_array_push(&rev_list.args, "--all"); argv_array_push(&rev_list.args, "--quiet"); diff --git a/remote-curl.c b/remote-curl.c index 4318391..6ec5352 100644 --- a/remote-curl.c +++ b/remote-curl.c @@ -24,6 +24,7 @@ struct options { char *deepen_since; struct string_list deepen_not; struct string_list push_options; + char *filter; unsigned progress : 1, check_self_contained_and_connected : 1, cloning : 1, @@ -165,6 +166,9 @@ static int set_option(const char *name, const char *value) } else if (!strcmp(name, "no-dependents")) { options.no_dependents = 1; return 0; + } else if (!strcmp(name, "filter")) { + options.filter = xstrdup(value);; + return 0; } else { return 1 /* unsupported */; } @@ -834,6 +838,8 @@ static int fetch_git(struct discovery *heads, argv_array_push(&args, "--from-promisor"); if (options.no_dependents) argv_array_push(&args, "--no-dependents"); + if (options.filter) + argv_array_pushf(&args, "--filter=%s", options.filter); argv_array_push(&args, url.buf); for (i = 0; i < nr_heads; i++) { diff --git a/t/t5500-fetch-pack.sh b/t/t5500-fetch-pack.sh index c57916b..ec9ba9b 100755 --- a/t/t5500-fetch-pack.sh +++ b/t/t5500-fetch-pack.sh @@ -782,4 +782,40 @@ test_expect_success 'filtering by size has no effect if support for it is not ad test_i18ngrep "filtering not recognized by server" err ' +fetch_filter_blob_limit_zero () { + SERVER="$1" + URL="$2" + + rm -rf "$SERVER" client && + test_create_repo "$SERVER" && + test_commit -C "$SERVER" one && + test_config -C "$SERVER" uploadpack.allowfilter 1 && + + git clone "$URL" client && + test_config -C client extensions.partialclone origin && + + test_commit -C "$SERVER" two && + + git -C client fetch --filter=blob:limit=0 origin HEAD:somewhere && + + # Ensure that commit is fetched, but blob is not + test_config -C client extensions.partialclone "arbitrary string" && + git -C client cat-file -e $(git -C "$SERVER" rev-parse two) && + test_must_fail git -C client cat-file -e $(git hash-object "$SERVER/two.t") +} + +test_expect_success 'fetch with --filter=blob:limit=0' ' + fetch_filter_blob_limit_zero server server +' + +. "$TEST_DIRECTORY"/lib-httpd.sh +start_httpd + +test_expect_success 'fetch with --filter=blob:limit=0 and HTTP' ' + fetch_filter_blob_limit_zero "$HTTPD_DOCUMENT_ROOT_PATH/server" "$HTTPD_URL/smart/server" +' + +stop_httpd + + test_done -- cgit v0.10.2-6-g49f6 From 1e1e39b308fe9dd68a3312992a06270da7dd07af Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 8 Dec 2017 15:58:45 +0000 Subject: partial-clone: define partial clone settings in config Create get and set routines for "partial clone" config settings. These will be used in a future commit by clone and fetch to remember the promisor remote and the default filter-spec. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano diff --git a/cache.h b/cache.h index 6980072..bccc510 100644 --- a/cache.h +++ b/cache.h @@ -861,6 +861,7 @@ extern int grafts_replace_parents; #define GIT_REPO_VERSION_READ 1 extern int repository_format_precious_objects; extern char *repository_format_partial_clone; +extern const char *core_partial_clone_filter_default; struct repository_format { int version; diff --git a/config.c b/config.c index adb7d7a..adeee04 100644 --- a/config.c +++ b/config.c @@ -1241,6 +1241,11 @@ static int git_default_core_config(const char *var, const char *value) return 0; } + if (!strcmp(var, "core.partialclonefilter")) { + return git_config_string(&core_partial_clone_filter_default, + var, value); + } + /* Add other config variables here and to Documentation/config.txt. */ return 0; } diff --git a/environment.c b/environment.c index e52aab3..7537565 100644 --- a/environment.c +++ b/environment.c @@ -28,6 +28,7 @@ int warn_on_object_refname_ambiguity = 1; int ref_paranoia = -1; int repository_format_precious_objects; char *repository_format_partial_clone; +const char *core_partial_clone_filter_default; const char *git_commit_encoding; const char *git_log_output_encoding; const char *apply_default_whitespace; diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 4c5b34e..5c47e2b 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -21,29 +21,36 @@ * subordinate commands when necessary. We also "intern" the arg for * the convenience of the current command. */ -int parse_list_objects_filter(struct list_objects_filter_options *filter_options, - const char *arg) +static int gently_parse_list_objects_filter( + struct list_objects_filter_options *filter_options, + const char *arg, + struct strbuf *errbuf) { const char *v0; - if (filter_options->choice) - die(_("multiple object filter types cannot be combined")); + if (filter_options->choice) { + if (errbuf) { + strbuf_init(errbuf, 0); + strbuf_addstr( + errbuf, + _("multiple filter-specs cannot be combined")); + } + return 1; + } filter_options->filter_spec = strdup(arg); if (!strcmp(arg, "blob:none")) { filter_options->choice = LOFC_BLOB_NONE; return 0; - } - if (skip_prefix(arg, "blob:limit=", &v0)) { - if (!git_parse_ulong(v0, &filter_options->blob_limit_value)) - die(_("invalid filter-spec expression '%s'"), arg); - filter_options->choice = LOFC_BLOB_LIMIT; - return 0; - } + } else if (skip_prefix(arg, "blob:limit=", &v0)) { + if (git_parse_ulong(v0, &filter_options->blob_limit_value)) { + filter_options->choice = LOFC_BLOB_LIMIT; + return 0; + } - if (skip_prefix(arg, "sparse:oid=", &v0)) { + } else if (skip_prefix(arg, "sparse:oid=", &v0)) { struct object_context oc; struct object_id sparse_oid; @@ -57,15 +64,27 @@ int parse_list_objects_filter(struct list_objects_filter_options *filter_options filter_options->sparse_oid_value = oiddup(&sparse_oid); filter_options->choice = LOFC_SPARSE_OID; return 0; - } - if (skip_prefix(arg, "sparse:path=", &v0)) { + } else if (skip_prefix(arg, "sparse:path=", &v0)) { filter_options->choice = LOFC_SPARSE_PATH; filter_options->sparse_path_value = strdup(v0); return 0; } - die(_("invalid filter-spec expression '%s'"), arg); + if (errbuf) { + strbuf_init(errbuf, 0); + strbuf_addf(errbuf, "invalid filter-spec '%s'", arg); + } + memset(filter_options, 0, sizeof(*filter_options)); + return 1; +} + +int parse_list_objects_filter(struct list_objects_filter_options *filter_options, + const char *arg) +{ + struct strbuf buf = STRBUF_INIT; + if (gently_parse_list_objects_filter(filter_options, arg, &buf)) + die("%s", buf.buf); return 0; } @@ -90,3 +109,44 @@ void list_objects_filter_release( free(filter_options->sparse_path_value); memset(filter_options, 0, sizeof(*filter_options)); } + +void partial_clone_register( + const char *remote, + const struct list_objects_filter_options *filter_options) +{ + /* + * Record the name of the partial clone remote in the + * config and in the global variable -- the latter is + * used throughout to indicate that partial clone is + * enabled and to expect missing objects. + */ + if (repository_format_partial_clone && + *repository_format_partial_clone && + strcmp(remote, repository_format_partial_clone)) + die(_("cannot change partial clone promisor remote")); + + git_config_set("core.repositoryformatversion", "1"); + git_config_set("extensions.partialclone", remote); + + repository_format_partial_clone = xstrdup(remote); + + /* + * Record the initial filter-spec in the config as + * the default for subsequent fetches from this remote. + */ + core_partial_clone_filter_default = + xstrdup(filter_options->filter_spec); + git_config_set("core.partialclonefilter", + core_partial_clone_filter_default); +} + +void partial_clone_get_default_filter_spec( + struct list_objects_filter_options *filter_options) +{ + /* + * Parse default value, but silently ignore it if it is invalid. + */ + gently_parse_list_objects_filter(filter_options, + core_partial_clone_filter_default, + NULL); +} diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index eea44a1..1143539 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -58,4 +58,10 @@ int opt_parse_list_objects_filter(const struct option *opt, void list_objects_filter_release( struct list_objects_filter_options *filter_options); +void partial_clone_register( + const char *remote, + const struct list_objects_filter_options *filter_options); +void partial_clone_get_default_filter_spec( + struct list_objects_filter_options *filter_options); + #endif /* LIST_OBJECTS_FILTER_OPTIONS_H */ -- cgit v0.10.2-6-g49f6 From 548719fbdc4ccd5d678afafbb5e3b5c8dac28489 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Fri, 8 Dec 2017 15:58:46 +0000 Subject: clone: partial clone Signed-off-by: Jonathan Tan Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano diff --git a/builtin/clone.c b/builtin/clone.c index dbddd98..f519bd4 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -26,6 +26,7 @@ #include "run-command.h" #include "connected.h" #include "packfile.h" +#include "list-objects-filter-options.h" /* * Overall FIXMEs: @@ -60,6 +61,7 @@ static struct string_list option_optional_reference = STRING_LIST_INIT_NODUP; static int option_dissociate; static int max_jobs = -1; static struct string_list option_recurse_submodules = STRING_LIST_INIT_NODUP; +static struct list_objects_filter_options filter_options; static int recurse_submodules_cb(const struct option *opt, const char *arg, int unset) @@ -135,6 +137,7 @@ static struct option builtin_clone_options[] = { TRANSPORT_FAMILY_IPV4), OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"), TRANSPORT_FAMILY_IPV6), + OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), OPT_END() }; @@ -886,6 +889,8 @@ int cmd_clone(int argc, const char **argv, const char *prefix) struct refspec *refspec; const char *fetch_pattern; + fetch_if_missing = 0; + packet_trace_identity("clone"); argc = parse_options(argc, argv, prefix, builtin_clone_options, builtin_clone_usage, 0); @@ -1073,6 +1078,8 @@ int cmd_clone(int argc, const char **argv, const char *prefix) warning(_("--shallow-since is ignored in local clones; use file:// instead.")); if (option_not.nr) warning(_("--shallow-exclude is ignored in local clones; use file:// instead.")); + if (filter_options.choice) + warning(_("--filter is ignored in local clones; use file:// instead.")); if (!access(mkpath("%s/shallow", path), F_OK)) { if (option_local > 0) warning(_("source repository is shallow, ignoring --local")); @@ -1104,7 +1111,13 @@ int cmd_clone(int argc, const char **argv, const char *prefix) transport_set_option(transport, TRANS_OPT_UPLOADPACK, option_upload_pack); - if (transport->smart_options && !deepen) + if (filter_options.choice) { + transport_set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, + filter_options.filter_spec); + transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); + } + + if (transport->smart_options && !deepen && !filter_options.choice) transport->smart_options->check_self_contained_and_connected = 1; refs = transport_get_remote_refs(transport); @@ -1164,13 +1177,17 @@ int cmd_clone(int argc, const char **argv, const char *prefix) write_refspec_config(src_ref_prefix, our_head_points_at, remote_head_points_at, &branch_top); + if (filter_options.choice) + partial_clone_register("origin", &filter_options); + if (is_local) clone_local(path, git_dir); else if (refs && complete_refs_before_fetch) transport_fetch_refs(transport, mapped_refs); update_remote_refs(refs, mapped_refs, remote_head_points_at, - branch_top.buf, reflog_msg.buf, transport, !is_local); + branch_top.buf, reflog_msg.buf, transport, + !is_local && !filter_options.choice); update_head(our_head_points_at, remote_head, reflog_msg.buf); @@ -1191,6 +1208,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) } junk_mode = JUNK_LEAVE_REPO; + fetch_if_missing = 1; err = checkout(submodule_progress); strbuf_release(&reflog_msg); diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh index 9c56f77..6d37c6d 100755 --- a/t/t5601-clone.sh +++ b/t/t5601-clone.sh @@ -571,4 +571,53 @@ test_expect_success 'GIT_TRACE_PACKFILE produces a usable pack' ' git -C replay.git index-pack -v --stdin err && + + test_i18ngrep "filtering not recognized by server" err +' + +. "$TEST_DIRECTORY"/lib-httpd.sh +start_httpd + +test_expect_success 'partial clone using HTTP' ' + partial_clone "$HTTPD_DOCUMENT_ROOT_PATH/server" "$HTTPD_URL/smart/server" +' + +stop_httpd + test_done -- cgit v0.10.2-6-g49f6 From c0c578b33ca48ee6003d85706e13d97d12781354 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Fri, 8 Dec 2017 15:58:47 +0000 Subject: unpack-trees: batch fetching of missing blobs When running checkout, first prefetch all blobs that are to be updated but are missing. This means that only one pack is downloaded during such operations, instead of one per missing blob. This operates only on the blob level - if a repository has a missing tree, they are still fetched one at a time. This does not use the delayed checkout mechanism introduced in commit 2841e8f ("convert: add "status=delayed" to filter process protocol", 2017-06-30) due to significant conceptual differences - in particular, for partial clones, we already know what needs to be fetched based on the contents of the local repo alone, whereas for status=delayed, it is the filter process that tells us what needs to be checked in the end. Signed-off-by: Jonathan Tan Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano diff --git a/fetch-object.c b/fetch-object.c index 258fcfa..853624f 100644 --- a/fetch-object.c +++ b/fetch-object.c @@ -5,11 +5,10 @@ #include "transport.h" #include "fetch-object.h" -void fetch_object(const char *remote_name, const unsigned char *sha1) +static void fetch_refs(const char *remote_name, struct ref *ref) { struct remote *remote; struct transport *transport; - struct ref *ref; int original_fetch_if_missing = fetch_if_missing; fetch_if_missing = 0; @@ -18,10 +17,29 @@ void fetch_object(const char *remote_name, const unsigned char *sha1) die(_("Remote with no URL")); transport = transport_get(remote, remote->url[0]); - ref = alloc_ref(sha1_to_hex(sha1)); - hashcpy(ref->old_oid.hash, sha1); transport_set_option(transport, TRANS_OPT_FROM_PROMISOR, "1"); transport_set_option(transport, TRANS_OPT_NO_DEPENDENTS, "1"); transport_fetch_refs(transport, ref); fetch_if_missing = original_fetch_if_missing; } + +void fetch_object(const char *remote_name, const unsigned char *sha1) +{ + struct ref *ref = alloc_ref(sha1_to_hex(sha1)); + hashcpy(ref->old_oid.hash, sha1); + fetch_refs(remote_name, ref); +} + +void fetch_objects(const char *remote_name, const struct oid_array *to_fetch) +{ + struct ref *ref = NULL; + int i; + + for (i = 0; i < to_fetch->nr; i++) { + struct ref *new_ref = alloc_ref(oid_to_hex(&to_fetch->oid[i])); + oidcpy(&new_ref->old_oid, &to_fetch->oid[i]); + new_ref->next = ref; + ref = new_ref; + } + fetch_refs(remote_name, ref); +} diff --git a/fetch-object.h b/fetch-object.h index f371300..4b269d0 100644 --- a/fetch-object.h +++ b/fetch-object.h @@ -1,6 +1,11 @@ #ifndef FETCH_OBJECT_H #define FETCH_OBJECT_H +#include "sha1-array.h" + extern void fetch_object(const char *remote_name, const unsigned char *sha1); +extern void fetch_objects(const char *remote_name, + const struct oid_array *to_fetch); + #endif diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh index 6d37c6d..13610b7 100755 --- a/t/t5601-clone.sh +++ b/t/t5601-clone.sh @@ -611,6 +611,58 @@ test_expect_success 'partial clone: warn if server does not support object filte test_i18ngrep "filtering not recognized by server" err ' +test_expect_success 'batch missing blob request during checkout' ' + rm -rf server client && + + test_create_repo server && + echo a >server/a && + echo b >server/b && + git -C server add a b && + + git -C server commit -m x && + echo aa >server/a && + echo bb >server/b && + git -C server add a b && + git -C server commit -m x && + + test_config -C server uploadpack.allowfilter 1 && + test_config -C server uploadpack.allowanysha1inwant 1 && + + git clone --filter=blob:limit=0 "file://$(pwd)/server" client && + + # Ensure that there is only one negotiation by checking that there is + # only "done" line sent. ("done" marks the end of negotiation.) + GIT_TRACE_PACKET="$(pwd)/trace" git -C client checkout HEAD^ && + grep "git> done" trace >done_lines && + test_line_count = 1 done_lines +' + +test_expect_success 'batch missing blob request does not inadvertently try to fetch gitlinks' ' + rm -rf server client && + + test_create_repo repo_for_submodule && + test_commit -C repo_for_submodule x && + + test_create_repo server && + echo a >server/a && + echo b >server/b && + git -C server add a b && + git -C server commit -m x && + + echo aa >server/a && + echo bb >server/b && + # Also add a gitlink pointing to an arbitrary repository + git -C server submodule add "$(pwd)/repo_for_submodule" c && + git -C server add a b c && + git -C server commit -m x && + + test_config -C server uploadpack.allowfilter 1 && + test_config -C server uploadpack.allowanysha1inwant 1 && + + # Make sure that it succeeds + git clone --filter=blob:limit=0 "file://$(pwd)/server" client +' + . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd diff --git a/unpack-trees.c b/unpack-trees.c index 71b70cc..73a1cdb 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -14,6 +14,7 @@ #include "dir.h" #include "submodule.h" #include "submodule-config.h" +#include "fetch-object.h" /* * Error messages expected by scripts out of plumbing commands such as @@ -369,6 +370,27 @@ static int check_updates(struct unpack_trees_options *o) load_gitmodules_file(index, &state); enable_delayed_checkout(&state); + if (repository_format_partial_clone && o->update && !o->dry_run) { + /* + * Prefetch the objects that are to be checked out in the loop + * below. + */ + struct oid_array to_fetch = OID_ARRAY_INIT; + int fetch_if_missing_store = fetch_if_missing; + fetch_if_missing = 0; + for (i = 0; i < index->cache_nr; i++) { + struct cache_entry *ce = index->cache[i]; + if ((ce->ce_flags & CE_UPDATE) && + !S_ISGITLINK(ce->ce_mode)) { + if (!has_object_file(&ce->oid)) + oid_array_append(&to_fetch, &ce->oid); + } + } + if (to_fetch.nr) + fetch_objects(repository_format_partial_clone, + &to_fetch); + fetch_if_missing = fetch_if_missing_store; + } for (i = 0; i < index->cache_nr; i++) { struct cache_entry *ce = index->cache[i]; -- cgit v0.10.2-6-g49f6 From a1c6d7c1a792bd2c60bbfb07597d13f7d0c4eb37 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Fri, 8 Dec 2017 15:58:48 +0000 Subject: fetch-pack: restore save_commit_buffer after use In fetch-pack, the global variable save_commit_buffer is set to 0, but not restored to its original value after use. In particular, if show_log() (in log-tree.c) is invoked after fetch_pack() in the same process, show_log() will return before printing out the commit message (because the invocation to get_cached_commit_buffer() returns NULL, because the commit buffer was not saved). I discovered this when attempting to run "git log -S" in a partial clone, triggering the case where revision walking lazily loads missing objects. Therefore, restore save_commit_buffer to its original value after use. An alternative to solve the problem I had is to replace get_cached_commit_buffer() with get_commit_buffer(). That invocation was introduced in commit a97934d ("use get_cached_commit_buffer where appropriate", 2014-06-13) to replace "commit->buffer" introduced in commit 3131b71 ("Add "--show-all" revision walker flag for debugging", 2008-02-13). In the latter commit, the commit author seems to be deciding between not showing an unparsed commit at all and showing an unparsed commit without the message (which is what the commit does), and did not mention parsing the unparsed commit, so I prefer to preserve the existing behavior. Signed-off-by: Jonathan Tan Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano diff --git a/fetch-pack.c b/fetch-pack.c index 3c5f064..773453c 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -717,6 +717,7 @@ static int everything_local(struct fetch_pack_args *args, { struct ref *ref; int retval; + int old_save_commit_buffer = save_commit_buffer; timestamp_t cutoff = 0; save_commit_buffer = 0; @@ -786,6 +787,9 @@ static int everything_local(struct fetch_pack_args *args, print_verbose(args, _("already have %s (%s)"), oid_to_hex(remote), ref->name); } + + save_commit_buffer = old_save_commit_buffer; + return retval; } -- cgit v0.10.2-6-g49f6 From 35a7ae952ffee0d47d4f6302163488abea3bf418 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 8 Dec 2017 15:58:49 +0000 Subject: t5616: end-to-end tests for partial clone Additional end-to-end tests for partial clone. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh new file mode 100755 index 0000000..3b8cc0b --- /dev/null +++ b/t/t5616-partial-clone.sh @@ -0,0 +1,96 @@ +#!/bin/sh + +test_description='git partial clone' + +. ./test-lib.sh + +# create a normal "src" repo where we can later create new commits. +# expect_1.oids will contain a list of the OIDs of all blobs. +test_expect_success 'setup normal src repo' ' + echo "{print \$1}" >print_1.awk && + echo "{print \$2}" >print_2.awk && + + git init src && + for n in 1 2 3 4 + do + echo "This is file: $n" > src/file.$n.txt + git -C src add file.$n.txt + git -C src commit -m "file $n" + git -C src ls-files -s file.$n.txt >>temp + done && + awk -f print_2.awk expect_1.oids && + test_line_count = 4 expect_1.oids +' + +# bare clone "src" giving "srv.bare" for use as our server. +test_expect_success 'setup bare clone for server' ' + git clone --bare "file://$(pwd)/src" srv.bare && + git -C srv.bare config --local uploadpack.allowfilter 1 && + git -C srv.bare config --local uploadpack.allowanysha1inwant 1 +' + +# do basic partial clone from "srv.bare" +# confirm we are missing all of the known blobs. +# confirm partial clone was registered in the local config. +test_expect_success 'do partial clone 1' ' + git clone --no-checkout --filter=blob:none "file://$(pwd)/srv.bare" pc1 && + git -C pc1 rev-list HEAD --quiet --objects --missing=print \ + | awk -f print_1.awk \ + | sed "s/?//" \ + | sort >observed.oids && + test_cmp expect_1.oids observed.oids && + test "$(git -C pc1 config --local core.repositoryformatversion)" = "1" && + test "$(git -C pc1 config --local extensions.partialclone)" = "origin" && + test "$(git -C pc1 config --local core.partialclonefilter)" = "blob:none" +' + +# checkout master to force dynamic object fetch of blobs at HEAD. +test_expect_success 'verify checkout with dynamic object fetch' ' + git -C pc1 rev-list HEAD --quiet --objects --missing=print >observed && + test_line_count = 4 observed && + git -C pc1 checkout master && + git -C pc1 rev-list HEAD --quiet --objects --missing=print >observed && + test_line_count = 0 observed +' + +# create new commits in "src" repo to establish a blame history on file.1.txt +# and push to "srv.bare". +test_expect_success 'push new commits to server' ' + git -C src remote add srv "file://$(pwd)/srv.bare" && + for x in a b c d e + do + echo "Mod $x" >>src/file.1.txt + git -C src add file.1.txt + git -C src commit -m "mod $x" + done && + git -C src blame master -- file.1.txt >expect.blame && + git -C src push -u srv master +' + +# (partial) fetch in the partial clone repo from the promisor remote. +# verify that fetch inherited the filter-spec from the config and DOES NOT +# have the new blobs. +test_expect_success 'partial fetch inherits filter settings' ' + git -C pc1 fetch origin && + git -C pc1 rev-list master..origin/master --quiet --objects --missing=print >observed && + test_line_count = 5 observed +' + +# force dynamic object fetch using diff. +# we should only get 1 new blob (for the file in origin/master). +test_expect_success 'verify diff causes dynamic object fetch' ' + git -C pc1 diff master..origin/master -- file.1.txt && + git -C pc1 rev-list master..origin/master --quiet --objects --missing=print >observed && + test_line_count = 4 observed +' + +# force full dynamic object fetch of the file's history using blame. +# we should get the intermediate blobs for the file. +test_expect_success 'verify blame causes dynamic object fetch' ' + git -C pc1 blame origin/master -- file.1.txt >observed.blame && + test_cmp expect.blame observed.blame && + git -C pc1 rev-list master..origin/master --quiet --objects --missing=print >observed && + test_line_count = 0 observed +' + +test_done -- cgit v0.10.2-6-g49f6 From aa57b871dad3cca07abead9a8d1fefceffe7578d Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 8 Dec 2017 15:58:50 +0000 Subject: fetch: inherit filter-spec from partial clone Teach (partial) fetch to inherit the filter-spec used by the partial clone. Extend --no-filter to override this inheritance. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index cbf5035..a7bc136 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -158,7 +158,7 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) continue; } if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) { - list_objects_filter_release(&args.filter_options); + list_objects_filter_set_no_filter(&args.filter_options); continue; } usage(fetch_pack_usage); diff --git a/builtin/fetch.c b/builtin/fetch.c index 14aab71..79c866c 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -1275,6 +1275,56 @@ static int fetch_multiple(struct string_list *list) return result; } +/* + * Fetching from the promisor remote should use the given filter-spec + * or inherit the default filter-spec from the config. + */ +static inline void fetch_one_setup_partial(struct remote *remote) +{ + /* + * Explicit --no-filter argument overrides everything, regardless + * of any prior partial clones and fetches. + */ + if (filter_options.no_filter) + return; + + /* + * If no prior partial clone/fetch and the current fetch DID NOT + * request a partial-fetch, do a normal fetch. + */ + if (!repository_format_partial_clone && !filter_options.choice) + return; + + /* + * If this is the FIRST partial-fetch request, we enable partial + * on this repo and remember the given filter-spec as the default + * for subsequent fetches to this remote. + */ + if (!repository_format_partial_clone && filter_options.choice) { + partial_clone_register(remote->name, &filter_options); + return; + } + + /* + * We are currently limited to only ONE promisor remote and only + * allow partial-fetches from the promisor remote. + */ + if (strcmp(remote->name, repository_format_partial_clone)) { + if (filter_options.choice) + die(_("--filter can only be used with the remote configured in core.partialClone")); + return; + } + + /* + * Do a partial-fetch from the promisor remote using either the + * explicitly given filter-spec or inherit the filter-spec from + * the config. + */ + if (!filter_options.choice) + partial_clone_get_default_filter_spec(&filter_options); + return; +} + static int fetch_one(struct remote *remote, int argc, const char **argv) { static const char **refs = NULL; @@ -1404,13 +1454,13 @@ int cmd_fetch(int argc, const char **argv, const char *prefix) } if (remote) { - if (filter_options.choice && - strcmp(remote->name, repository_format_partial_clone)) - die(_("--filter can only be used with the remote configured in core.partialClone")); + if (filter_options.choice || repository_format_partial_clone) + fetch_one_setup_partial(remote); result = fetch_one(remote, argc, argv); } else { if (filter_options.choice) die(_("--filter can only be used with the remote configured in core.partialClone")); + /* TODO should this also die if we have a previous partial-clone? */ result = fetch_multiple(&list); } diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 48f922d..8503dea 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -460,7 +460,7 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) continue; } if (!strcmp(arg, ("--no-" CL_ARG__FILTER))) { - list_objects_filter_release(&filter_options); + list_objects_filter_set_no_filter(&filter_options); continue; } if (!strcmp(arg, "--filter-print-omitted")) { diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c index 5c47e2b..6a3cc98 100644 --- a/list-objects-filter-options.c +++ b/list-objects-filter-options.c @@ -94,7 +94,7 @@ int opt_parse_list_objects_filter(const struct option *opt, struct list_objects_filter_options *filter_options = opt->value; if (unset || !arg) { - list_objects_filter_release(filter_options); + list_objects_filter_set_no_filter(filter_options); return 0; } diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h index 1143539..0000a61 100644 --- a/list-objects-filter-options.h +++ b/list-objects-filter-options.h @@ -31,6 +31,11 @@ struct list_objects_filter_options { enum list_objects_filter_choice choice; /* + * Choice is LOFC_DISABLED because "--no-filter" was requested. + */ + unsigned int no_filter : 1; + + /* * Parsed values (fields) from within the filter-spec. These are * choice-specific; not all values will be defined for any given * choice. @@ -58,6 +63,13 @@ int opt_parse_list_objects_filter(const struct option *opt, void list_objects_filter_release( struct list_objects_filter_options *filter_options); +static inline void list_objects_filter_set_no_filter( + struct list_objects_filter_options *filter_options) +{ + list_objects_filter_release(filter_options); + filter_options->no_filter = 1; +} + void partial_clone_register( const char *remote, const struct list_objects_filter_options *filter_options); diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 3b8cc0b..433e07e 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -59,7 +59,7 @@ test_expect_success 'push new commits to server' ' git -C src remote add srv "file://$(pwd)/srv.bare" && for x in a b c d e do - echo "Mod $x" >>src/file.1.txt + echo "Mod file.1.txt $x" >>src/file.1.txt git -C src add file.1.txt git -C src commit -m "mod $x" done && @@ -93,4 +93,24 @@ test_expect_success 'verify blame causes dynamic object fetch' ' test_line_count = 0 observed ' +# create new commits in "src" repo to establish a history on file.2.txt +# and push to "srv.bare". +test_expect_success 'push new commits to server for file.2.txt' ' + for x in a b c d e f + do + echo "Mod file.2.txt $x" >>src/file.2.txt + git -C src add file.2.txt + git -C src commit -m "mod $x" + done && + git -C src push -u srv master +' + +# Do FULL fetch by disabling filter-spec using --no-filter. +# Verify we have all the new blobs. +test_expect_success 'override inherited filter-spec using --no-filter' ' + git -C pc1 fetch --no-filter origin && + git -C pc1 rev-list master..origin/master --quiet --objects --missing=print >observed && + test_line_count = 0 observed +' + test_done -- cgit v0.10.2-6-g49f6 From 3aa6694fb3d38a3afe623ccbdf59fb15f338a94d Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 8 Dec 2017 15:58:51 +0000 Subject: t5616: test bulk prefetch after partial fetch Add test to t5616 to bulk fetch missing objects following a partial fetch. A technique like this could be used in a pre-command hook for example. Signed-off-by: Jeff Hostetler Signed-off-by: Junio C Hamano diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index 433e07e..29d8631 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -105,7 +105,7 @@ test_expect_success 'push new commits to server for file.2.txt' ' git -C src push -u srv master ' -# Do FULL fetch by disabling filter-spec using --no-filter. +# Do FULL fetch by disabling inherited filter-spec using --no-filter. # Verify we have all the new blobs. test_expect_success 'override inherited filter-spec using --no-filter' ' git -C pc1 fetch --no-filter origin && @@ -113,4 +113,34 @@ test_expect_success 'override inherited filter-spec using --no-filter' ' test_line_count = 0 observed ' +# create new commits in "src" repo to establish a history on file.3.txt +# and push to "srv.bare". +test_expect_success 'push new commits to server for file.3.txt' ' + for x in a b c d e f + do + echo "Mod file.3.txt $x" >>src/file.3.txt + git -C src add file.3.txt + git -C src commit -m "mod $x" + done && + git -C src push -u srv master +' + +# Do a partial fetch and then try to manually fetch the missing objects. +# This can be used as the basis of a pre-command hook to bulk fetch objects +# perhaps combined with a command in dry-run mode. +test_expect_success 'manual prefetch of missing objects' ' + git -C pc1 fetch --filter=blob:none origin && + git -C pc1 rev-list master..origin/master --quiet --objects --missing=print \ + | awk -f print_1.awk \ + | sed "s/?//" \ + | sort >observed.oids && + test_line_count = 6 observed.oids && + git -C pc1 fetch-pack --stdin "file://$(pwd)/srv.bare" observed.oids && + test_line_count = 0 observed.oids +' + test_done -- cgit v0.10.2-6-g49f6