From 526f108a271b331af9ae92796215e560e5ec4677 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 3 Oct 2016 16:49:08 -0400 Subject: check_connected: accept an env argument This lets callers influence the environment seen by rev-list, which will be useful when we start providing quarantined objects. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/connected.c b/connected.c index 8e3e4b1..136c2ac 100644 --- a/connected.c +++ b/connected.c @@ -63,6 +63,7 @@ int check_connected(sha1_iterate_fn fn, void *cb_data, _("Checking connectivity")); rev_list.git_cmd = 1; + rev_list.env = opt->env; rev_list.in = -1; rev_list.no_stdout = 1; if (opt->err_fd) diff --git a/connected.h b/connected.h index afa48cc..4ca325f 100644 --- a/connected.h +++ b/connected.h @@ -33,6 +33,11 @@ struct check_connected_options { /* If non-zero, show progress as we traverse the objects. */ int progress; + + /* + * Insert these variables into the environment of the child process. + */ + const char **env; }; #define CHECK_CONNECTED_INIT { 0 } -- cgit v0.10.2-6-g49f6 From 2564d994c9c91aea58d59565d68d42bbc017f536 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 3 Oct 2016 16:49:11 -0400 Subject: tmp-objdir: introduce API for temporary object directories Once objects are added to the object database by a process, they cannot easily be deleted, as we don't know what other processes may have started referencing them. We have to clean them up with git-gc, which will apply the usual reachability and grace-period checks. This patch provides an alternative: it helps callers create a temporary directory inside the object directory, and a temporary environment which can be passed to sub-programs to ask them to write there (the original object directory remains accessible as an alternate of the temporary one). See tmp-objdir.h for details on the API. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/Makefile b/Makefile index 1aad150..4e3becb 100644 --- a/Makefile +++ b/Makefile @@ -831,6 +831,7 @@ LIB_OBJS += submodule-config.o LIB_OBJS += symlinks.o LIB_OBJS += tag.o LIB_OBJS += tempfile.o +LIB_OBJS += tmp-objdir.o LIB_OBJS += trace.o LIB_OBJS += trailer.o LIB_OBJS += transport.o diff --git a/tmp-objdir.c b/tmp-objdir.c new file mode 100644 index 0000000..9443868 --- /dev/null +++ b/tmp-objdir.c @@ -0,0 +1,273 @@ +#include "cache.h" +#include "tmp-objdir.h" +#include "dir.h" +#include "sigchain.h" +#include "string-list.h" +#include "strbuf.h" +#include "argv-array.h" + +struct tmp_objdir { + struct strbuf path; + struct argv_array env; +}; + +/* + * Allow only one tmp_objdir at a time in a running process, which simplifies + * our signal/atexit cleanup routines. It's doubtful callers will ever need + * more than one, and we can expand later if so. You can have many such + * tmp_objdirs simultaneously in many processes, of course. + */ +static struct tmp_objdir *the_tmp_objdir; + +static void tmp_objdir_free(struct tmp_objdir *t) +{ + strbuf_release(&t->path); + argv_array_clear(&t->env); + free(t); +} + +static int tmp_objdir_destroy_1(struct tmp_objdir *t, int on_signal) +{ + int err; + + if (!t) + return 0; + + if (t == the_tmp_objdir) + the_tmp_objdir = NULL; + + /* + * This may use malloc via strbuf_grow(), but we should + * have pre-grown t->path sufficiently so that this + * doesn't happen in practice. + */ + err = remove_dir_recursively(&t->path, 0); + + /* + * When we are cleaning up due to a signal, we won't bother + * freeing memory; it may cause a deadlock if the signal + * arrived while libc's allocator lock is held. + */ + if (!on_signal) + tmp_objdir_free(t); + return err; +} + +int tmp_objdir_destroy(struct tmp_objdir *t) +{ + return tmp_objdir_destroy_1(t, 0); +} + +static void remove_tmp_objdir(void) +{ + tmp_objdir_destroy(the_tmp_objdir); +} + +static void remove_tmp_objdir_on_signal(int signo) +{ + tmp_objdir_destroy_1(the_tmp_objdir, 1); + sigchain_pop(signo); + raise(signo); +} + +/* + * These env_* functions are for setting up the child environment; the + * "replace" variant overrides the value of any existing variable with that + * "key". The "append" variant puts our new value at the end of a list, + * separated by PATH_SEP (which is what separate values in + * GIT_ALTERNATE_OBJECT_DIRECTORIES). + */ +static void env_append(struct argv_array *env, const char *key, const char *val) +{ + const char *old = getenv(key); + + if (!old) + argv_array_pushf(env, "%s=%s", key, val); + else + argv_array_pushf(env, "%s=%s%c%s", key, old, PATH_SEP, val); +} + +static void env_replace(struct argv_array *env, const char *key, const char *val) +{ + argv_array_pushf(env, "%s=%s", key, val); +} + +static int setup_tmp_objdir(const char *root) +{ + char *path; + int ret = 0; + + path = xstrfmt("%s/pack", root); + ret = mkdir(path, 0777); + free(path); + + return ret; +} + +struct tmp_objdir *tmp_objdir_create(void) +{ + static int installed_handlers; + struct tmp_objdir *t; + + if (the_tmp_objdir) + die("BUG: only one tmp_objdir can be used at a time"); + + t = xmalloc(sizeof(*t)); + strbuf_init(&t->path, 0); + argv_array_init(&t->env); + + strbuf_addf(&t->path, "%s/incoming-XXXXXX", get_object_directory()); + + /* + * Grow the strbuf beyond any filename we expect to be placed in it. + * If tmp_objdir_destroy() is called by a signal handler, then + * we should be able to use the strbuf to remove files without + * having to call malloc. + */ + strbuf_grow(&t->path, 1024); + + if (!mkdtemp(t->path.buf)) { + /* free, not destroy, as we never touched the filesystem */ + tmp_objdir_free(t); + return NULL; + } + + the_tmp_objdir = t; + if (!installed_handlers) { + atexit(remove_tmp_objdir); + sigchain_push_common(remove_tmp_objdir_on_signal); + installed_handlers++; + } + + if (setup_tmp_objdir(t->path.buf)) { + tmp_objdir_destroy(t); + return NULL; + } + + env_append(&t->env, ALTERNATE_DB_ENVIRONMENT, + absolute_path(get_object_directory())); + env_replace(&t->env, DB_ENVIRONMENT, absolute_path(t->path.buf)); + + return t; +} + +/* + * Make sure we copy packfiles and their associated metafiles in the correct + * order. All of these ends_with checks are slightly expensive to do in + * the midst of a sorting routine, but in practice it shouldn't matter. + * We will have a relatively small number of packfiles to order, and loose + * objects exit early in the first line. + */ +static int pack_copy_priority(const char *name) +{ + if (!starts_with(name, "pack")) + return 0; + if (ends_with(name, ".keep")) + return 1; + if (ends_with(name, ".pack")) + return 2; + if (ends_with(name, ".idx")) + return 3; + return 4; +} + +static int pack_copy_cmp(const char *a, const char *b) +{ + return pack_copy_priority(a) - pack_copy_priority(b); +} + +static int read_dir_paths(struct string_list *out, const char *path) +{ + DIR *dh; + struct dirent *de; + + dh = opendir(path); + if (!dh) + return -1; + + while ((de = readdir(dh))) + if (!is_dot_or_dotdot(de->d_name)) + string_list_append(out, de->d_name); + + closedir(dh); + return 0; +} + +static int migrate_paths(struct strbuf *src, struct strbuf *dst); + +static int migrate_one(struct strbuf *src, struct strbuf *dst) +{ + struct stat st; + + if (stat(src->buf, &st) < 0) + return -1; + if (S_ISDIR(st.st_mode)) { + if (!mkdir(dst->buf, 0777)) { + if (adjust_shared_perm(dst->buf)) + return -1; + } else if (errno != EEXIST) + return -1; + return migrate_paths(src, dst); + } + return finalize_object_file(src->buf, dst->buf); +} + +static int migrate_paths(struct strbuf *src, struct strbuf *dst) +{ + size_t src_len = src->len, dst_len = dst->len; + struct string_list paths = STRING_LIST_INIT_DUP; + int i; + int ret = 0; + + if (read_dir_paths(&paths, src->buf) < 0) + return -1; + paths.cmp = pack_copy_cmp; + string_list_sort(&paths); + + for (i = 0; i < paths.nr; i++) { + const char *name = paths.items[i].string; + + strbuf_addf(src, "/%s", name); + strbuf_addf(dst, "/%s", name); + + ret |= migrate_one(src, dst); + + strbuf_setlen(src, src_len); + strbuf_setlen(dst, dst_len); + } + + string_list_clear(&paths, 0); + return ret; +} + +int tmp_objdir_migrate(struct tmp_objdir *t) +{ + struct strbuf src = STRBUF_INIT, dst = STRBUF_INIT; + int ret; + + if (!t) + return 0; + + strbuf_addbuf(&src, &t->path); + strbuf_addstr(&dst, get_object_directory()); + + ret = migrate_paths(&src, &dst); + + strbuf_release(&src); + strbuf_release(&dst); + + tmp_objdir_destroy(t); + return ret; +} + +const char **tmp_objdir_env(const struct tmp_objdir *t) +{ + if (!t) + return NULL; + return t->env.argv; +} + +void tmp_objdir_add_as_alternate(const struct tmp_objdir *t) +{ + add_to_alternates_memory(t->path.buf); +} diff --git a/tmp-objdir.h b/tmp-objdir.h new file mode 100644 index 0000000..b1e45b4 --- /dev/null +++ b/tmp-objdir.h @@ -0,0 +1,54 @@ +#ifndef TMP_OBJDIR_H +#define TMP_OBJDIR_H + +/* + * This API allows you to create a temporary object directory, advertise it to + * sub-processes via GIT_OBJECT_DIRECTORY and GIT_ALTERNATE_OBJECT_DIRECTORIES, + * and then either migrate its object into the main object directory, or remove + * it. The library handles unexpected signal/exit death by cleaning up the + * temporary directory. + * + * Example: + * + * struct tmp_objdir *t = tmp_objdir_create(); + * if (!run_command_v_opt_cd_env(cmd, 0, NULL, tmp_objdir_env(t)) && + * !tmp_objdir_migrate(t)) + * printf("success!\n"); + * else + * die("failed...tmp_objdir will clean up for us"); + * + */ + +struct tmp_objdir; + +/* + * Create a new temporary object directory; returns NULL on failure. + */ +struct tmp_objdir *tmp_objdir_create(void); + +/* + * Return a list of environment strings, suitable for use with + * child_process.env, that can be passed to child programs to make use of the + * temporary object directory. + */ +const char **tmp_objdir_env(const struct tmp_objdir *); + +/* + * Finalize a temporary object directory by migrating its objects into the main + * object database, removing the temporary directory, and freeing any + * associated resources. + */ +int tmp_objdir_migrate(struct tmp_objdir *); + +/* + * Destroy a temporary object directory, discarding any objects it contains. + */ +int tmp_objdir_destroy(struct tmp_objdir *); + +/* + * Add the temporary object directory as an alternate object store in the + * current process. + */ +void tmp_objdir_add_as_alternate(const struct tmp_objdir *); + +#endif /* TMP_OBJDIR_H */ -- cgit v0.10.2-6-g49f6 From 722ff7f876c8a2ad99c42434f58af098e61b96e8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 3 Oct 2016 16:49:14 -0400 Subject: receive-pack: quarantine objects until pre-receive accepts When a client pushes objects to us, index-pack checks the objects themselves and then installs them into place. If we then reject the push due to a pre-receive hook, we cannot just delete the packfile; other processes may be depending on it. We have to do a normal reachability check at this point via `git gc`. But such objects may hang around for weeks due to the gc.pruneExpire grace period. And worse, during that time they may be exploded from the pack into inefficient loose objects. Instead, this patch teaches receive-pack to put the new objects into a "quarantine" temporary directory. We make these objects available to the connectivity check and to the pre-receive hook, and then install them into place only if it is successful (and otherwise remove them as tempfiles). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 896b16f..267d320 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -20,6 +20,7 @@ #include "gpg-interface.h" #include "sigchain.h" #include "fsck.h" +#include "tmp-objdir.h" static const char * const receive_pack_usage[] = { N_("git receive-pack "), @@ -86,6 +87,8 @@ static enum { } use_keepalive; static int keepalive_in_sec = 5; +static struct tmp_objdir *tmp_objdir; + static enum deny_action parse_deny_action(const char *var, const char *value) { if (value) { @@ -663,6 +666,9 @@ static int run_and_feed_hook(const char *hook_name, feed_fn feed, } else argv_array_pushf(&proc.env_array, "GIT_PUSH_OPTION_COUNT"); + if (tmp_objdir) + argv_array_pushv(&proc.env_array, tmp_objdir_env(tmp_objdir)); + if (use_sideband) { memset(&muxer, 0, sizeof(muxer)); muxer.proc = copy_to_sideband; @@ -762,6 +768,7 @@ static int run_update_hook(struct command *cmd) proc.stdout_to_stderr = 1; proc.err = use_sideband ? -1 : 0; proc.argv = argv; + proc.env = tmp_objdir_env(tmp_objdir); code = start_command(&proc); if (code) @@ -833,6 +840,7 @@ static int update_shallow_ref(struct command *cmd, struct shallow_info *si) !delayed_reachability_test(si, i)) sha1_array_append(&extra, si->shallow->sha1[i]); + opt.env = tmp_objdir_env(tmp_objdir); setup_alternate_shallow(&shallow_lock, &opt.shallow_file, &extra); if (check_connected(command_singleton_iterator, cmd, &opt)) { rollback_lock_file(&shallow_lock); @@ -1240,12 +1248,17 @@ static void set_connectivity_errors(struct command *commands, for (cmd = commands; cmd; cmd = cmd->next) { struct command *singleton = cmd; + struct check_connected_options opt = CHECK_CONNECTED_INIT; + if (shallow_update && si->shallow_ref[cmd->index]) /* to be checked in update_shallow_ref() */ continue; + + opt.env = tmp_objdir_env(tmp_objdir); if (!check_connected(command_singleton_iterator, &singleton, - NULL)) + &opt)) continue; + cmd->error_string = "missing necessary objects"; } } @@ -1428,6 +1441,7 @@ static void execute_commands(struct command *commands, data.si = si; opt.err_fd = err_fd; opt.progress = err_fd && !quiet; + opt.env = tmp_objdir_env(tmp_objdir); if (check_connected(iterate_receive_command_list, &data, &opt)) set_connectivity_errors(commands, si); @@ -1444,6 +1458,19 @@ static void execute_commands(struct command *commands, return; } + /* + * Now we'll start writing out refs, which means the objects need + * to be in their final positions so that other processes can see them. + */ + if (tmp_objdir_migrate(tmp_objdir) < 0) { + for (cmd = commands; cmd; cmd = cmd->next) { + if (!cmd->error_string) + cmd->error_string = "unable to migrate objects to permanent storage"; + } + return; + } + tmp_objdir = NULL; + check_aliased_updates(commands); free(head_name_to_free); @@ -1639,6 +1666,18 @@ static const char *unpack(int err_fd, struct shallow_info *si) argv_array_push(&child.args, alt_shallow_file); } + tmp_objdir = tmp_objdir_create(); + if (!tmp_objdir) + return "unable to create temporary object directory"; + child.env = tmp_objdir_env(tmp_objdir); + + /* + * Normally we just pass the tmp_objdir environment to the child + * processes that do the heavy lifting, but we may need to see these + * objects ourselves to set up shallow information. + */ + tmp_objdir_add_as_alternate(tmp_objdir); + if (ntohl(hdr.hdr_entries) < unpack_limit) { argv_array_pushl(&child.args, "unpack-objects", hdr_arg, NULL); if (quiet) diff --git a/t/t5547-push-quarantine.sh b/t/t5547-push-quarantine.sh new file mode 100755 index 0000000..1e5d32d --- /dev/null +++ b/t/t5547-push-quarantine.sh @@ -0,0 +1,36 @@ +#!/bin/sh + +test_description='check quarantine of objects during push' +. ./test-lib.sh + +test_expect_success 'create picky dest repo' ' + git init --bare dest.git && + write_script dest.git/hooks/pre-receive <<-\EOF + while read old new ref; do + test "$(git log -1 --format=%s $new)" = reject && exit 1 + done + exit 0 + EOF +' + +test_expect_success 'accepted objects work' ' + test_commit ok && + git push dest.git HEAD && + commit=$(git rev-parse HEAD) && + git --git-dir=dest.git cat-file commit $commit +' + +test_expect_success 'rejected objects are not installed' ' + test_commit reject && + commit=$(git rev-parse HEAD) && + test_must_fail git push dest.git reject && + test_must_fail git --git-dir=dest.git cat-file commit $commit +' + +test_expect_success 'rejected objects are removed' ' + echo "incoming-*" >expect && + (cd dest.git/objects && echo incoming-*) >actual && + test_cmp expect actual +' + +test_done -- cgit v0.10.2-6-g49f6 From e34c2e010f860117dc7f0f992850dfb77ba48289 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 3 Oct 2016 16:49:18 -0400 Subject: tmp-objdir: put quarantine information in the environment The presence of the GIT_QUARANTINE_PATH variable lets any called programs know that they're operating in a temporary object directory (and where that directory is). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/cache.h b/cache.h index 5d36ffa..c898a1d 100644 --- a/cache.h +++ b/cache.h @@ -433,6 +433,7 @@ static inline enum object_type object_type(unsigned int mode) #define GIT_GLOB_PATHSPECS_ENVIRONMENT "GIT_GLOB_PATHSPECS" #define GIT_NOGLOB_PATHSPECS_ENVIRONMENT "GIT_NOGLOB_PATHSPECS" #define GIT_ICASE_PATHSPECS_ENVIRONMENT "GIT_ICASE_PATHSPECS" +#define GIT_QUARANTINE_ENVIRONMENT "GIT_QUARANTINE_PATH" /* * This environment variable is expected to contain a boolean indicating diff --git a/tmp-objdir.c b/tmp-objdir.c index 9443868..780af8e 100644 --- a/tmp-objdir.c +++ b/tmp-objdir.c @@ -147,6 +147,8 @@ struct tmp_objdir *tmp_objdir_create(void) env_append(&t->env, ALTERNATE_DB_ENVIRONMENT, absolute_path(get_object_directory())); env_replace(&t->env, DB_ENVIRONMENT, absolute_path(t->path.buf)); + env_replace(&t->env, GIT_QUARANTINE_ENVIRONMENT, + absolute_path(t->path.buf)); return t; } -- cgit v0.10.2-6-g49f6 From 62fe0eb4804c297486a1d421a4f893865fcbc911 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 3 Oct 2016 16:49:22 -0400 Subject: tmp-objdir: do not migrate files starting with '.' This avoids "." and "..", as we already do, but also leaves room for index-pack to store extra data in the quarantine area (e.g., for passing back any analysis to be read by the pre-receive hook). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/tmp-objdir.c b/tmp-objdir.c index 780af8e..64435f2 100644 --- a/tmp-objdir.c +++ b/tmp-objdir.c @@ -188,7 +188,7 @@ static int read_dir_paths(struct string_list *out, const char *path) return -1; while ((de = readdir(dh))) - if (!is_dot_or_dotdot(de->d_name)) + if (de->d_name[0] != '.') string_list_append(out, de->d_name); closedir(dh); -- cgit v0.10.2-6-g49f6