From a039a1fcf987de3a8209cc5229324d37d9ead16e Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 6 Apr 2021 18:47:46 +0000 Subject: maintenance: simplify prefetch logic The previous logic filled a string list with the names of each remote, but instead we could simply run the appropriate 'git fetch' data directly in the remote iterator. Do this for reduced code size, but also because it sets up an upcoming change to use the remote's refspec. This data is accessible from the 'struct remote' data that is now accessible in fetch_remote(). Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/builtin/gc.c b/builtin/gc.c index ef7226d..fa8128d 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -873,55 +873,38 @@ static int maintenance_task_commit_graph(struct maintenance_run_opts *opts) return 0; } -static int fetch_remote(const char *remote, struct maintenance_run_opts *opts) +static int fetch_remote(struct remote *remote, void *cbdata) { + struct maintenance_run_opts *opts = cbdata; struct child_process child = CHILD_PROCESS_INIT; child.git_cmd = 1; - strvec_pushl(&child.args, "fetch", remote, "--prune", "--no-tags", + strvec_pushl(&child.args, "fetch", remote->name, "--prune", "--no-tags", "--no-write-fetch-head", "--recurse-submodules=no", "--refmap=", NULL); if (opts->quiet) strvec_push(&child.args, "--quiet"); - strvec_pushf(&child.args, "+refs/heads/*:refs/prefetch/%s/*", remote); + strvec_pushf(&child.args, "+refs/heads/*:refs/prefetch/%s/*", remote->name); return !!run_command(&child); } -static int append_remote(struct remote *remote, void *cbdata) -{ - struct string_list *remotes = (struct string_list *)cbdata; - - string_list_append(remotes, remote->name); - return 0; -} - static int maintenance_task_prefetch(struct maintenance_run_opts *opts) { - int result = 0; - struct string_list_item *item; - struct string_list remotes = STRING_LIST_INIT_DUP; - git_config_set_multivar_gently("log.excludedecoration", "refs/prefetch/", "refs/prefetch/", CONFIG_FLAGS_FIXED_VALUE | CONFIG_FLAGS_MULTI_REPLACE); - if (for_each_remote(append_remote, &remotes)) { - error(_("failed to fill remotes")); - result = 1; - goto cleanup; + if (for_each_remote(fetch_remote, opts)) { + error(_("failed to prefetch remotes")); + return 1; } - for_each_string_list_item(item, &remotes) - result |= fetch_remote(item->string, opts); - -cleanup: - string_list_clear(&remotes, 0); - return result; + return 0; } static int maintenance_task_gc(struct maintenance_run_opts *opts) -- cgit v0.10.2-6-g49f6 From 2e03115d0c253843953ef9d113c72e0375892df4 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 16 Apr 2021 12:49:57 +0000 Subject: fetch: add --prefetch option The --prefetch option will be used by the 'prefetch' maintenance task instead of sending refspecs explicitly across the command-line. The intention is to modify the refspec to place all results in refs/prefetch/ instead of anywhere else. Create helper method filter_prefetch_refspec() to modify a given refspec to fit the rules expected of the prefetch task: * Negative refspecs are preserved. * Refspecs without a destination are removed. * Refspecs whose source starts with "refs/tags/" are removed. * Other refspecs are placed within "refs/prefetch/". Finally, we add the 'force' option to ensure that prefetch refs are replaced as necessary. There are some interesting cases that are worth testing. An earlier version of this change dropped the "i--" from the loop that deletes a refspec item and shifts the remaining entries down. This allowed some refspecs to not be modified. The subtle part about the first --prefetch test is that the "refs/tags/*" refspec appears directly before the "refs/heads/bogus/*" refspec. Without that "i--", this ordering would remove the "refs/tags/*" refspec and leave the last one unmodified, placing the result in "refs/heads/*". It is possible to have an empty refspec. This is typically the case for remotes other than the origin, where users want to fetch a specific tag or branch. To correctly test this case, we need to further remove the upstream remote for the local branch. Thus, we are testing a refspec that will be deleted, leaving nothing to fetch. Helped-by: Tom Saeger Helped-by: Ramsay Jones Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt index 07783de..9e7b4e1 100644 --- a/Documentation/fetch-options.txt +++ b/Documentation/fetch-options.txt @@ -110,6 +110,11 @@ ifndef::git-pull[] setting `fetch.writeCommitGraph`. endif::git-pull[] +--prefetch:: + Modify the configured refspec to place all refs into the + `refs/prefetch/` namespace. See the `prefetch` task in + linkgit:git-maintenance[1]. + -p:: --prune:: Before fetching, remove any remote-tracking references that no diff --git a/builtin/fetch.c b/builtin/fetch.c index 0b90de8..97c4fe6 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -48,6 +48,7 @@ enum { static int fetch_prune_config = -1; /* unspecified */ static int fetch_show_forced_updates = 1; static uint64_t forced_updates_ms = 0; +static int prefetch = 0; static int prune = -1; /* unspecified */ #define PRUNE_BY_DEFAULT 0 /* do we prune by default? */ @@ -158,6 +159,8 @@ static struct option builtin_fetch_options[] = { N_("do not fetch all tags (--no-tags)"), TAGS_UNSET), OPT_INTEGER('j', "jobs", &max_jobs, N_("number of submodules fetched in parallel")), + OPT_BOOL(0, "prefetch", &prefetch, + N_("modify the refspec to place all refs within refs/prefetch/")), OPT_BOOL('p', "prune", &prune, N_("prune remote-tracking branches no longer on remote")), OPT_BOOL('P', "prune-tags", &prune_tags, @@ -436,6 +439,56 @@ static void find_non_local_tags(const struct ref *refs, oidset_clear(&fetch_oids); } +static void filter_prefetch_refspec(struct refspec *rs) +{ + int i; + + if (!prefetch) + return; + + for (i = 0; i < rs->nr; i++) { + struct strbuf new_dst = STRBUF_INIT; + char *old_dst; + const char *sub = NULL; + + if (rs->items[i].negative) + continue; + if (!rs->items[i].dst || + (rs->items[i].src && + !strncmp(rs->items[i].src, "refs/tags/", 10))) { + int j; + + free(rs->items[i].src); + free(rs->items[i].dst); + + for (j = i + 1; j < rs->nr; j++) { + rs->items[j - 1] = rs->items[j]; + rs->raw[j - 1] = rs->raw[j]; + } + rs->nr--; + i--; + continue; + } + + old_dst = rs->items[i].dst; + strbuf_addstr(&new_dst, "refs/prefetch/"); + + /* + * If old_dst starts with "refs/", then place + * sub after that prefix. Otherwise, start at + * the beginning of the string. + */ + if (!skip_prefix(old_dst, "refs/", &sub)) + sub = old_dst; + strbuf_addstr(&new_dst, sub); + + rs->items[i].dst = strbuf_detach(&new_dst, NULL); + rs->items[i].force = 1; + + free(old_dst); + } +} + static struct ref *get_ref_map(struct remote *remote, const struct ref *remote_refs, struct refspec *rs, @@ -452,6 +505,10 @@ static struct ref *get_ref_map(struct remote *remote, struct hashmap existing_refs; int existing_refs_populated = 0; + filter_prefetch_refspec(rs); + if (remote) + filter_prefetch_refspec(&remote->fetch); + if (rs->nr) { struct refspec *fetch_refspec; @@ -520,7 +577,7 @@ static struct ref *get_ref_map(struct remote *remote, if (has_merge && !strcmp(branch->remote_name, remote->name)) add_merge_config(&ref_map, remote_refs, branch, &tail); - } else { + } else if (!prefetch) { ref_map = get_remote_ref(remote_refs, "HEAD"); if (!ref_map) die(_("Couldn't find remote ref HEAD")); diff --git a/t/t5582-fetch-negative-refspec.sh b/t/t5582-fetch-negative-refspec.sh index f345097..e5d2e79 100755 --- a/t/t5582-fetch-negative-refspec.sh +++ b/t/t5582-fetch-negative-refspec.sh @@ -240,4 +240,47 @@ test_expect_success "push with matching +: and negative refspec" ' git -C two push -v one ' +test_expect_success '--prefetch correctly modifies refspecs' ' + git -C one config --unset-all remote.origin.fetch && + git -C one config --add remote.origin.fetch ^refs/heads/bogus/ignore && + git -C one config --add remote.origin.fetch "refs/tags/*:refs/tags/*" && + git -C one config --add remote.origin.fetch "refs/heads/bogus/*:bogus/*" && + + git tag -a -m never never-fetch-tag HEAD && + + git branch bogus/fetched HEAD~1 && + git branch bogus/ignore HEAD && + + git -C one fetch --prefetch --no-tags && + test_must_fail git -C one rev-parse never-fetch-tag && + git -C one rev-parse refs/prefetch/bogus/fetched && + test_must_fail git -C one rev-parse refs/prefetch/bogus/ignore && + + # correctly handle when refspec set becomes empty + # after removing the refs/tags/* refspec. + git -C one config --unset-all remote.origin.fetch && + git -C one config --add remote.origin.fetch "refs/tags/*:refs/tags/*" && + + git -C one fetch --prefetch --no-tags && + test_must_fail git -C one rev-parse never-fetch-tag && + + # The refspec for refs that are not fully qualified + # are filtered multiple times. + git -C one rev-parse refs/prefetch/bogus/fetched && + test_must_fail git -C one rev-parse refs/prefetch/bogus/ignore +' + +test_expect_success '--prefetch succeeds when refspec becomes empty' ' + git checkout bogus/fetched && + test_commit extra && + + git -C one config --unset-all remote.origin.fetch && + git -C one config --unset branch.main.remote && + git -C one config remote.origin.fetch "+refs/tags/extra" && + git -C one config remote.origin.skipfetchall true && + git -C one config remote.origin.tagopt "--no-tags" && + + git -C one fetch --prefetch +' + test_done -- cgit v0.10.2-6-g49f6 From cfd781ea22e0f334d3c0104e1f34c47327934314 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 16 Apr 2021 12:49:58 +0000 Subject: maintenance: use 'git fetch --prefetch' The 'prefetch' maintenance task previously forced the following refspec for each remote: +refs/heads/*:refs/prefetch//* If a user has specified a more strict refspec for the remote, then this prefetch task downloads more objects than necessary. The previous change introduced the '--prefetch' option to 'git fetch' which manipulates the remote's refspec to place all resulting refs into refs/prefetch/, with further partitioning based on the destinations of those refspecs. Update the documentation to be more generic about the destination refs. Do not mention custom refspecs explicitly, as that does not need to be highlighted in this documentation. The important part of placing refs in refs/prefetch/ remains. Reported-by: Tom Saeger Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/Documentation/git-maintenance.txt b/Documentation/git-maintenance.txt index 80ddd33..1e738ad 100644 --- a/Documentation/git-maintenance.txt +++ b/Documentation/git-maintenance.txt @@ -92,10 +92,8 @@ commit-graph:: prefetch:: The `prefetch` task updates the object directory with the latest objects from all registered remotes. For each remote, a `git fetch` - command is run. The refmap is custom to avoid updating local or remote - branches (those in `refs/heads` or `refs/remotes`). Instead, the - remote refs are stored in `refs/prefetch//`. Also, tags are - not updated. + command is run. The configured refspec is modified to place all + requested refs within `refs/prefetch/`. Also, tags are not updated. + This is done to avoid disrupting the remote-tracking branches. The end users expect these refs to stay unmoved unless they initiate a fetch. With prefetch diff --git a/builtin/gc.c b/builtin/gc.c index fa8128d..9d35f7d 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -879,15 +879,14 @@ static int fetch_remote(struct remote *remote, void *cbdata) struct child_process child = CHILD_PROCESS_INIT; child.git_cmd = 1; - strvec_pushl(&child.args, "fetch", remote->name, "--prune", "--no-tags", + strvec_pushl(&child.args, "fetch", remote->name, + "--prefetch", "--prune", "--no-tags", "--no-write-fetch-head", "--recurse-submodules=no", - "--refmap=", NULL); + NULL); if (opts->quiet) strvec_push(&child.args, "--quiet"); - strvec_pushf(&child.args, "+refs/heads/*:refs/prefetch/%s/*", remote->name); - return !!run_command(&child); } diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index 2412d8c..eadb800 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -141,15 +141,15 @@ test_expect_success 'prefetch multiple remotes' ' test_commit -C clone1 one && test_commit -C clone2 two && GIT_TRACE2_EVENT="$(pwd)/run-prefetch.txt" git maintenance run --task=prefetch 2>/dev/null && - fetchargs="--prune --no-tags --no-write-fetch-head --recurse-submodules=no --refmap= --quiet" && - test_subcommand git fetch remote1 $fetchargs +refs/heads/\\*:refs/prefetch/remote1/\\* log && -- cgit v0.10.2-6-g49f6 From 32f67888d85bd0af30b857a29ca25a55999b6d01 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 16 Apr 2021 12:49:59 +0000 Subject: maintenance: respect remote.*.skipFetchAll If a remote has the skipFetchAll setting enabled, then that remote is not intended for frequent fetching. It makes sense to not fetch that data during the 'prefetch' maintenance task. Skip that remote in the iteration without error. The skip_default_update member is initialized in remote.c:handle_config() as part of initializing the 'struct remote'. Signed-off-by: Derrick Stolee Signed-off-by: Junio C Hamano diff --git a/builtin/gc.c b/builtin/gc.c index 9d35f7d..98a8031 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -878,6 +878,9 @@ static int fetch_remote(struct remote *remote, void *cbdata) struct maintenance_run_opts *opts = cbdata; struct child_process child = CHILD_PROCESS_INIT; + if (remote->skip_default_update) + return 0; + child.git_cmd = 1; strvec_pushl(&child.args, "fetch", remote->name, "--prefetch", "--prune", "--no-tags", diff --git a/t/t7900-maintenance.sh b/t/t7900-maintenance.sh index eadb800..b93ae01 100755 --- a/t/t7900-maintenance.sh +++ b/t/t7900-maintenance.sh @@ -153,7 +153,13 @@ test_expect_success 'prefetch multiple remotes' ' test_cmp_config refs/prefetch/ log.excludedecoration && git log --oneline --decorate --all >log && - ! grep "prefetch" log + ! grep "prefetch" log && + + test_when_finished git config --unset remote.remote1.skipFetchAll && + git config remote.remote1.skipFetchAll true && + GIT_TRACE2_EVENT="$(pwd)/skip-remote1.txt" git maintenance run --task=prefetch 2>/dev/null && + test_subcommand ! git fetch remote1 $fetchargs