path: root/builtin/fetch.c
diff options
authorPatrick Steinhardt <>2021-01-12 12:27:52 (GMT)
committerJunio C Hamano <>2021-01-12 20:06:15 (GMT)
commitc7b190dabdea16d5b1c29ae50430449a8547818f (patch)
treef98db780a9f0d2ab47e307e3773f5e048c1fbee1 /builtin/fetch.c
parentd4c8db8f1b43cb26efdd53d74afce60e6d9d195a (diff)
fetch: implement support for atomic reference updates
When executing a fetch, then git will currently allocate one reference transaction per reference update and directly commit it. This means that fetches are non-atomic: even if some of the reference updates fail, others may still succeed and modify local references. This is fine in many scenarios, but this strategy has its downsides. - The view of remote references may be inconsistent and may show a bastardized state of the remote repository. - Batching together updates may improve performance in certain scenarios. While the impact probably isn't as pronounced with loose references, the upcoming reftable backend may benefit as it needs to write less files in case the update is batched. - The reference-update hook is currently being executed twice per updated reference. While this doesn't matter when there is no such hook, we have seen severe performance regressions when doing a git-fetch(1) with reference-transaction hook when the remote repository has hundreds of thousands of references. Similar to `git push --atomic`, this commit thus introduces atomic fetches. Instead of allocating one reference transaction per updated reference, it causes us to only allocate a single transaction and commit it as soon as all updates were received. If locking of any reference fails, then we abort the complete transaction and don't update any reference, which gives us an all-or-nothing fetch. Note that this may not completely fix the first of above downsides, as the consistent view also depends on the server-side. If the server doesn't have a consistent view of its own references during the reference negotiation phase, then the client would get the same inconsistent view the server has. This is a separate problem though and, if it actually exists, can be fixed at a later point. This commit also changes the way we write FETCH_HEAD in case `--atomic` is passed. Instead of writing changes as we go, we need to accumulate all changes first and only commit them at the end when we know that all reference updates succeeded. Ideally, we'd just do so via a temporary file so that we don't need to carry all updates in-memory. This isn't trivially doable though considering the `--append` mode, where we do not truncate the file but simply append to it. And given that we support concurrent processes appending to FETCH_HEAD at the same time without any loss of data, seeding the temporary file with current contents of FETCH_HEAD initially and then doing a rename wouldn't work either. So this commit implements the simple strategy of buffering all changes and appending them to the file on commit. Signed-off-by: Patrick Steinhardt <> Signed-off-by: Junio C Hamano <>
Diffstat (limited to 'builtin/fetch.c')
1 files changed, 41 insertions, 5 deletions
diff --git a/builtin/fetch.c b/builtin/fetch.c
index cada732..91f3d20 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -63,6 +63,7 @@ static int enable_auto_gc = 1;
static int tags = TAGS_DEFAULT, unshallow, update_shallow, deepen;
static int max_jobs = -1, submodule_fetch_jobs_config = -1;
static int fetch_parallel_config = 1;
+static int atomic_fetch;
static enum transport_family family;
static const char *depth;
static const char *deepen_since;
@@ -144,6 +145,8 @@ static struct option builtin_fetch_options[] = {
N_("set upstream for git pull/fetch")),
OPT_BOOL('a', "append", &append,
N_("append to .git/FETCH_HEAD instead of overwriting")),
+ OPT_BOOL(0, "atomic", &atomic_fetch,
+ N_("use atomic transaction to update references")),
OPT_STRING(0, "upload-pack", &upload_pack, N_("path"),
N_("path to upload pack on remote end")),
OPT__FORCE(&force, N_("force overwrite of local reference"), 0),
@@ -970,13 +973,23 @@ static void append_fetch_head(struct fetch_head *fetch_head,
strbuf_addch(&fetch_head->buf, url[i]);
strbuf_addch(&fetch_head->buf, '\n');
- strbuf_write(&fetch_head->buf, fetch_head->fp);
- strbuf_reset(&fetch_head->buf);
+ /*
+ * When using an atomic fetch, we do not want to update FETCH_HEAD if
+ * any of the reference updates fails. We thus have to write all
+ * updates to a buffer first and only commit it as soon as all
+ * references have been successfully updated.
+ */
+ if (!atomic_fetch) {
+ strbuf_write(&fetch_head->buf, fetch_head->fp);
+ strbuf_reset(&fetch_head->buf);
+ }
static void commit_fetch_head(struct fetch_head *fetch_head)
- /* Nothing to commit yet. */
+ if (!fetch_head->fp || !atomic_fetch)
+ return;
+ strbuf_write(&fetch_head->buf, fetch_head->fp);
static void close_fetch_head(struct fetch_head *fetch_head)
@@ -1003,7 +1016,8 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
struct fetch_head fetch_head;
struct commit *commit;
int url_len, i, rc = 0;
- struct strbuf note = STRBUF_INIT;
+ struct strbuf note = STRBUF_INIT, err = STRBUF_INIT;
+ struct ref_transaction *transaction = NULL;
const char *what, *kind;
struct ref *rm;
char *url;
@@ -1029,6 +1043,14 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
+ if (atomic_fetch) {
+ transaction = ref_transaction_begin(&err);
+ if (!transaction) {
+ error("%s", err.buf);
+ goto abort;
+ }
+ }
@@ -1105,7 +1127,7 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
if (ref) {
- rc |= update_local_ref(ref, NULL, what,
+ rc |= update_local_ref(ref, transaction, what,
rm, &note, summary_width);
} else if (write_fetch_head || dry_run) {
@@ -1131,6 +1153,14 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
+ if (!rc && transaction) {
+ rc = ref_transaction_commit(transaction, &err);
+ if (rc) {
+ error("%s", err.buf);
+ goto abort;
+ }
+ }
if (!rc)
@@ -1150,6 +1180,8 @@ static int store_updated_refs(const char *raw_url, const char *remote_name,
+ strbuf_release(&err);
+ ref_transaction_free(transaction);
return rc;
@@ -1961,6 +1993,10 @@ int cmd_fetch(int argc, const char **argv, const char *prefix)
die(_("--filter can only be used with the remote "
"configured in extensions.partialclone"));
+ if (atomic_fetch)
+ die(_("--atomic can only be used when fetching "
+ "from one remote"));
if (stdin_refspecs)
die(_("--stdin can only be used when fetching "
"from one remote"));