From ea2fa5a3388281b880ae2aa75c8eac0d4b1a6c69 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Mon, 29 Feb 2016 18:07:11 -0800 Subject: submodule-config: keep update strategy around Currently submodule..update is only handled by git-submodule.sh. C code will start to need to make use of that value as more of the functionality of git-submodule.sh moves into library code in C. Add the update field to 'struct submodule' and populate it so it can be read as sm->update or from sm->update_command. Reviewed-by: Jonathan Nieder Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/submodule-config.c b/submodule-config.c index afe0ea8..a5cd2ee 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -59,6 +59,7 @@ static void free_one_config(struct submodule_entry *entry) { free((void *) entry->config->path); free((void *) entry->config->name); + free((void *) entry->config->update_strategy.command); free(entry->config); } @@ -194,6 +195,8 @@ static struct submodule *lookup_or_create_by_name(struct submodule_cache *cache, submodule->path = NULL; submodule->url = NULL; + submodule->update_strategy.type = SM_UPDATE_UNSPECIFIED; + submodule->update_strategy.command = NULL; submodule->fetch_recurse = RECURSE_SUBMODULES_NONE; submodule->ignore = NULL; @@ -311,6 +314,16 @@ static int parse_config(const char *var, const char *value, void *data) free((void *) submodule->url); submodule->url = xstrdup(value); } + } else if (!strcmp(item.buf, "update")) { + if (!value) + ret = config_error_nonbool(var); + else if (!me->overwrite && + submodule->update_strategy.type != SM_UPDATE_UNSPECIFIED) + warn_multiple_config(me->commit_sha1, submodule->name, + "update"); + else if (parse_submodule_update_strategy(value, + &submodule->update_strategy) < 0) + die(_("invalid value for %s"), var); } strbuf_release(&name); diff --git a/submodule-config.h b/submodule-config.h index 9061e4e..092ebfc 100644 --- a/submodule-config.h +++ b/submodule-config.h @@ -2,6 +2,7 @@ #define SUBMODULE_CONFIG_CACHE_H #include "hashmap.h" +#include "submodule.h" #include "strbuf.h" /* @@ -14,6 +15,7 @@ struct submodule { const char *url; int fetch_recurse; const char *ignore; + struct submodule_update_strategy update_strategy; /* the sha1 blob id of the responsible .gitmodules file */ unsigned char gitmodules_sha1[20]; }; diff --git a/submodule.c b/submodule.c index 916fc84..c3a1300 100644 --- a/submodule.c +++ b/submodule.c @@ -210,6 +210,27 @@ void gitmodules_config(void) } } +int parse_submodule_update_strategy(const char *value, + struct submodule_update_strategy *dst) +{ + free((void*)dst->command); + dst->command = NULL; + if (!strcmp(value, "none")) + dst->type = SM_UPDATE_NONE; + else if (!strcmp(value, "checkout")) + dst->type = SM_UPDATE_CHECKOUT; + else if (!strcmp(value, "rebase")) + dst->type = SM_UPDATE_REBASE; + else if (!strcmp(value, "merge")) + dst->type = SM_UPDATE_MERGE; + else if (skip_prefix(value, "!", &value)) { + dst->type = SM_UPDATE_COMMAND; + dst->command = xstrdup(value); + } else + return -1; + return 0; +} + void handle_ignore_submodules_arg(struct diff_options *diffopt, const char *arg) { diff --git a/submodule.h b/submodule.h index cbc0003..3464500 100644 --- a/submodule.h +++ b/submodule.h @@ -13,6 +13,20 @@ enum { RECURSE_SUBMODULES_ON = 2 }; +enum submodule_update_type { + SM_UPDATE_UNSPECIFIED = 0, + SM_UPDATE_CHECKOUT, + SM_UPDATE_REBASE, + SM_UPDATE_MERGE, + SM_UPDATE_NONE, + SM_UPDATE_COMMAND +}; + +struct submodule_update_strategy { + enum submodule_update_type type; + const char *command; +}; + int is_staging_gitmodules_ok(void); int update_path_in_gitmodules(const char *oldpath, const char *newpath); int remove_path_from_gitmodules(const char *path); @@ -21,6 +35,8 @@ void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt, const char *path); int submodule_config(const char *var, const char *value, void *cb); void gitmodules_config(void); +int parse_submodule_update_strategy(const char *value, + struct submodule_update_strategy *dst); void handle_ignore_submodules_arg(struct diff_options *diffopt, const char *); void show_submodule_summary(FILE *f, const char *path, const char *line_prefix, -- cgit v0.10.2-6-g49f6 From f73da11024adba18fe7443a420259ad07e0139d5 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Mon, 29 Feb 2016 18:07:12 -0800 Subject: submodule-config: drop check against NULL Adhere to the common coding style of Git and not check explicitly for NULL throughout the file. There are still other occurrences in the code base but that is usually inside of conditions with side effects. Reviewed-by: Jonathan Nieder Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/submodule-config.c b/submodule-config.c index a5cd2ee..9fa2269 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -267,7 +267,7 @@ static int parse_config(const char *var, const char *value, void *data) if (!strcmp(item.buf, "path")) { if (!value) ret = config_error_nonbool(var); - else if (!me->overwrite && submodule->path != NULL) + else if (!me->overwrite && submodule->path) warn_multiple_config(me->commit_sha1, submodule->name, "path"); else { @@ -291,7 +291,7 @@ static int parse_config(const char *var, const char *value, void *data) } else if (!strcmp(item.buf, "ignore")) { if (!value) ret = config_error_nonbool(var); - else if (!me->overwrite && submodule->ignore != NULL) + else if (!me->overwrite && submodule->ignore) warn_multiple_config(me->commit_sha1, submodule->name, "ignore"); else if (strcmp(value, "untracked") && @@ -307,7 +307,7 @@ static int parse_config(const char *var, const char *value, void *data) } else if (!strcmp(item.buf, "url")) { if (!value) { ret = config_error_nonbool(var); - } else if (!me->overwrite && submodule->url != NULL) { + } else if (!me->overwrite && submodule->url) { warn_multiple_config(me->commit_sha1, submodule->name, "url"); } else { -- cgit v0.10.2-6-g49f6 From a028a1930c6b4b848e8fb47cc92c30b23d99a75e Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Mon, 29 Feb 2016 18:07:13 -0800 Subject: fetching submodules: respect `submodule.fetchJobs` config option This allows to configure fetching and updating in parallel without having the command line option. This moved the responsibility to determine how many parallel processes to start from builtin/fetch to submodule.c as we need a way to communicate "The user did not specify the number of parallel processes in the command line options" in the builtin fetch. The submodule code takes care of the precedence (CLI > config > default). Reviewed-by: Jonathan Nieder Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/Documentation/config.txt b/Documentation/config.txt index 2d06b11..3b02732 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -2646,6 +2646,12 @@ submodule..ignore:: "--ignore-submodules" option. The 'git submodule' commands are not affected by this setting. +submodule.fetchJobs:: + Specifies how many submodules are fetched/cloned at the same time. + A positive integer allows up to that number of submodules fetched + in parallel. A value of 0 will give some reasonable default. + If unset, it defaults to 1. + tag.sort:: This variable controls the sort ordering of tags when displayed by linkgit:git-tag[1]. Without the "--sort=" option provided, the diff --git a/builtin/fetch.c b/builtin/fetch.c index 586840d..5aa1c2d 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -37,7 +37,7 @@ static int prune = -1; /* unspecified */ static int all, append, dry_run, force, keep, multiple, update_head_ok, verbosity; static int progress = -1, recurse_submodules = RECURSE_SUBMODULES_DEFAULT; static int tags = TAGS_DEFAULT, unshallow, update_shallow; -static int max_children = 1; +static int max_children = -1; static const char *depth; static const char *upload_pack; static struct strbuf default_rla = STRBUF_INIT; diff --git a/submodule.c b/submodule.c index c3a1300..4bd14de 100644 --- a/submodule.c +++ b/submodule.c @@ -15,6 +15,7 @@ #include "thread-utils.h" static int config_fetch_recurse_submodules = RECURSE_SUBMODULES_ON_DEMAND; +static int parallel_jobs = 1; static struct string_list changed_submodule_paths; static int initialized_fetch_ref_tips; static struct sha1_array ref_tips_before_fetch; @@ -169,7 +170,12 @@ void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt, int submodule_config(const char *var, const char *value, void *cb) { - if (starts_with(var, "submodule.")) + if (!strcmp(var, "submodule.fetchjobs")) { + parallel_jobs = git_config_int(var, value); + if (parallel_jobs < 0) + die(_("negative values not allowed for submodule.fetchJobs")); + return 0; + } else if (starts_with(var, "submodule.")) return parse_submodule_config_option(var, value); else if (!strcmp(var, "fetch.recursesubmodules")) { config_fetch_recurse_submodules = parse_fetch_recurse_submodules_arg(var, value); @@ -771,6 +777,9 @@ int fetch_populated_submodules(const struct argv_array *options, argv_array_push(&spf.args, "--recurse-submodules-default"); /* default value, "--submodule-prefix" and its value are added later */ + if (max_parallel_jobs < 0) + max_parallel_jobs = parallel_jobs; + calculate_changed_submodule_paths(); run_processes_parallel(max_parallel_jobs, get_next_submodule, @@ -1117,3 +1126,8 @@ void connect_work_tree_and_git_dir(const char *work_tree, const char *git_dir) strbuf_release(&rel_path); free((void *)real_work_tree); } + +int parallel_submodules(void) +{ + return parallel_jobs; +} diff --git a/submodule.h b/submodule.h index 3464500..3166608 100644 --- a/submodule.h +++ b/submodule.h @@ -26,6 +26,7 @@ struct submodule_update_strategy { enum submodule_update_type type; const char *command; }; +#define SUBMODULE_UPDATE_STRATEGY_INIT {SM_UPDATE_UNSPECIFIED, NULL} int is_staging_gitmodules_ok(void); int update_path_in_gitmodules(const char *oldpath, const char *newpath); @@ -57,5 +58,6 @@ int find_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_nam struct string_list *needs_pushing); int push_unpushed_submodules(unsigned char new_sha1[20], const char *remotes_name); void connect_work_tree_and_git_dir(const char *work_tree, const char *git_dir); +int parallel_submodules(void); #endif diff --git a/t/t5526-fetch-submodules.sh b/t/t5526-fetch-submodules.sh index 1241146..954d0e4 100755 --- a/t/t5526-fetch-submodules.sh +++ b/t/t5526-fetch-submodules.sh @@ -471,4 +471,18 @@ test_expect_success "don't fetch submodule when newly recorded commits are alrea test_i18ncmp expect.err actual.err ' +test_expect_success 'fetching submodules respects parallel settings' ' + git config fetch.recurseSubmodules true && + ( + cd downstream && + GIT_TRACE=$(pwd)/trace.out git fetch --jobs 7 && + grep "7 tasks" trace.out && + git config submodule.fetchJobs 8 && + GIT_TRACE=$(pwd)/trace.out git fetch && + grep "8 tasks" trace.out && + GIT_TRACE=$(pwd)/trace.out git fetch --jobs 9 && + grep "9 tasks" trace.out + ) +' + test_done -- cgit v0.10.2-6-g49f6 From 8c6b5491182510f2344dae71784fcb5c3992d005 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Mon, 29 Feb 2016 18:07:14 -0800 Subject: submodule update: direct error message to stderr Reroute the error message for specified but initialized submodules to stderr instead of stdout. Reviewed-by: Jonathan Nieder Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/git-submodule.sh b/git-submodule.sh index 9bc5c5f..9ee86d4 100755 --- a/git-submodule.sh +++ b/git-submodule.sh @@ -693,7 +693,7 @@ cmd_update() if test "$update_module" = "none" then - echo "Skipping submodule '$displaypath'" + echo >&2 "Skipping submodule '$displaypath'" continue fi @@ -702,7 +702,7 @@ cmd_update() # Only mention uninitialized submodules when its # path have been specified test "$#" != "0" && - say "$(eval_gettext "Submodule path '\$displaypath' not initialized + say >&2 "$(eval_gettext "Submodule path '\$displaypath' not initialized Maybe you want to use 'update --init'?")" continue fi diff --git a/t/t7400-submodule-basic.sh b/t/t7400-submodule-basic.sh index 540771c..5991e3c 100755 --- a/t/t7400-submodule-basic.sh +++ b/t/t7400-submodule-basic.sh @@ -462,7 +462,7 @@ test_expect_success 'update --init' ' git config --remove-section submodule.example && test_must_fail git config submodule.example.url && - git submodule update init > update.out && + git submodule update init 2> update.out && cat update.out && test_i18ngrep "not initialized" update.out && test_must_fail git rev-parse --resolve-git-dir init/.git && @@ -480,7 +480,7 @@ test_expect_success 'update --init from subdirectory' ' mkdir -p sub && ( cd sub && - git submodule update ../init >update.out && + git submodule update ../init 2>update.out && cat update.out && test_i18ngrep "not initialized" update.out && test_must_fail git rev-parse --resolve-git-dir ../init/.git && -- cgit v0.10.2-6-g49f6 From 2dac9b5637abe580e8b8ea36e00dbbf28ec99af6 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Mon, 29 Feb 2016 18:07:15 -0800 Subject: run_processes_parallel: treat output of children as byte array We do not want the output to be interrupted by a NUL byte, so we cannot use raw fputs. Introduce strbuf_write to avoid having long arguments in run-command.c. Reviewed-by: Jonathan Nieder Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/run-command.c b/run-command.c index 8e3ad07..bdda940 100644 --- a/run-command.c +++ b/run-command.c @@ -994,7 +994,7 @@ static void pp_cleanup(struct parallel_processes *pp) * When get_next_task added messages to the buffer in its last * iteration, the buffered output is non empty. */ - fputs(pp->buffered_output.buf, stderr); + strbuf_write(&pp->buffered_output, stderr); strbuf_release(&pp->buffered_output); sigchain_pop_common(); @@ -1079,7 +1079,7 @@ static void pp_output(struct parallel_processes *pp) int i = pp->output_owner; if (pp->children[i].state == GIT_CP_WORKING && pp->children[i].err.len) { - fputs(pp->children[i].err.buf, stderr); + strbuf_write(&pp->children[i].err, stderr); strbuf_reset(&pp->children[i].err); } } @@ -1117,11 +1117,11 @@ static int pp_collect_finished(struct parallel_processes *pp) strbuf_addbuf(&pp->buffered_output, &pp->children[i].err); strbuf_reset(&pp->children[i].err); } else { - fputs(pp->children[i].err.buf, stderr); + strbuf_write(&pp->children[i].err, stderr); strbuf_reset(&pp->children[i].err); /* Output all other finished child processes */ - fputs(pp->buffered_output.buf, stderr); + strbuf_write(&pp->buffered_output, stderr); strbuf_reset(&pp->buffered_output); /* diff --git a/strbuf.c b/strbuf.c index 38686ff..5f6da82 100644 --- a/strbuf.c +++ b/strbuf.c @@ -395,6 +395,12 @@ ssize_t strbuf_read_once(struct strbuf *sb, int fd, size_t hint) return cnt; } +ssize_t strbuf_write(struct strbuf *sb, FILE *f) +{ + return sb->len ? fwrite(sb->buf, 1, sb->len, f) : 0; +} + + #define STRBUF_MAXLINK (2*PATH_MAX) int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint) diff --git a/strbuf.h b/strbuf.h index 2bf90e7..d4f2aa1 100644 --- a/strbuf.h +++ b/strbuf.h @@ -387,6 +387,12 @@ extern ssize_t strbuf_read_file(struct strbuf *sb, const char *path, size_t hint extern int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint); /** + * Write the whole content of the strbuf to the stream not stopping at + * NUL bytes. + */ +extern ssize_t strbuf_write(struct strbuf *sb, FILE *stream); + +/** * Read a line from a FILE *, overwriting the existing contents * of the strbuf. The second argument specifies the line * terminator character, typically `'\n'`. -- cgit v0.10.2-6-g49f6 From aa71049485c7b789fb9121c7fc2bece431bc589b Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Mon, 29 Feb 2016 18:07:16 -0800 Subject: run_processes_parallel: rename parameters for the callbacks The refs code has a similar pattern of passing around 'struct strbuf *err', which is strictly used for error reporting. This is not the case here, as the strbuf is used to accumulate all the output (whether it is error or not) for the user. Rename it to 'out'. Suggested-by: Jonathan Nieder Reviewed-by: Jonathan Nieder Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/run-command.c b/run-command.c index bdda940..62c6721 100644 --- a/run-command.c +++ b/run-command.c @@ -902,7 +902,7 @@ struct parallel_processes { struct strbuf buffered_output; /* of finished children */ }; -static int default_start_failure(struct strbuf *err, +static int default_start_failure(struct strbuf *out, void *pp_cb, void *pp_task_cb) { @@ -910,7 +910,7 @@ static int default_start_failure(struct strbuf *err, } static int default_task_finished(int result, - struct strbuf *err, + struct strbuf *out, void *pp_cb, void *pp_task_cb) { diff --git a/run-command.h b/run-command.h index 6e17894..65bd424 100644 --- a/run-command.h +++ b/run-command.h @@ -139,7 +139,7 @@ int in_async(void); * return the negative signal number. */ typedef int (*get_next_task_fn)(struct child_process *cp, - struct strbuf *err, + struct strbuf *out, void *pp_cb, void **pp_task_cb); @@ -148,7 +148,7 @@ typedef int (*get_next_task_fn)(struct child_process *cp, * a new process. * * You must not write to stdout or stderr in this function. Add your - * message to the strbuf err instead, which will be printed without + * message to the strbuf out instead, which will be printed without * messing up the output of the other parallel processes. * * pp_cb is the callback cookie as passed into run_processes_parallel, @@ -158,7 +158,7 @@ typedef int (*get_next_task_fn)(struct child_process *cp, * To send a signal to other child processes for abortion, return * the negative signal number. */ -typedef int (*start_failure_fn)(struct strbuf *err, +typedef int (*start_failure_fn)(struct strbuf *out, void *pp_cb, void *pp_task_cb); @@ -166,7 +166,7 @@ typedef int (*start_failure_fn)(struct strbuf *err, * This callback is called on every child process that finished processing. * * You must not write to stdout or stderr in this function. Add your - * message to the strbuf err instead, which will be printed without + * message to the strbuf out instead, which will be printed without * messing up the output of the other parallel processes. * * pp_cb is the callback cookie as passed into run_processes_parallel, @@ -177,7 +177,7 @@ typedef int (*start_failure_fn)(struct strbuf *err, * the negative signal number. */ typedef int (*task_finished_fn)(int result, - struct strbuf *err, + struct strbuf *out, void *pp_cb, void *pp_task_cb); -- cgit v0.10.2-6-g49f6 From 48308681b072a1d32e1361c255347324a8ad151e Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Mon, 29 Feb 2016 18:07:17 -0800 Subject: git submodule update: have a dedicated helper for cloning This introduces a new helper function in git submodule--helper which takes care of cloning all submodules, which we want to parallelize eventually. Some tests (such as empty URL, update_mode=none) are required in the helper to make the decision for cloning. These checks have been moved into the C function as well (no need to repeat them in the shell script). Reviewed-by: Jonathan Nieder Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index f4c3eff..fd2b168 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -255,6 +255,251 @@ static int module_clone(int argc, const char **argv, const char *prefix) return 0; } +struct submodule_update_clone { + /* index into 'list', the list of submodules to look into for cloning */ + int current; + struct module_list list; + unsigned warn_if_uninitialized : 1; + + /* update parameter passed via commandline */ + struct submodule_update_strategy update; + + /* configuration parameters which are passed on to the children */ + int quiet; + const char *reference; + const char *depth; + const char *recursive_prefix; + const char *prefix; + + /* Machine-readable status lines to be consumed by git-submodule.sh */ + struct string_list projectlines; + + /* If we want to stop as fast as possible and return an error */ + unsigned quickstop : 1; +}; +#define SUBMODULE_UPDATE_CLONE_INIT {0, MODULE_LIST_INIT, 0, \ + SUBMODULE_UPDATE_STRATEGY_INIT, 0, NULL, NULL, NULL, NULL, \ + STRING_LIST_INIT_DUP, 0} + +/** + * Determine whether 'ce' needs to be cloned. If so, prepare the 'child' to + * run the clone. Returns 1 if 'ce' needs to be cloned, 0 otherwise. + */ +static int prepare_to_clone_next_submodule(const struct cache_entry *ce, + struct child_process *child, + struct submodule_update_clone *suc, + struct strbuf *out) +{ + const struct submodule *sub = NULL; + struct strbuf displaypath_sb = STRBUF_INIT; + struct strbuf sb = STRBUF_INIT; + const char *displaypath = NULL; + char *url = NULL; + int needs_cloning = 0; + + if (ce_stage(ce)) { + if (suc->recursive_prefix) + strbuf_addf(&sb, "%s/%s", suc->recursive_prefix, ce->name); + else + strbuf_addf(&sb, "%s", ce->name); + strbuf_addf(out, _("Skipping unmerged submodule %s"), sb.buf); + strbuf_addch(out, '\n'); + goto cleanup; + } + + sub = submodule_from_path(null_sha1, ce->name); + + if (suc->recursive_prefix) + displaypath = relative_path(suc->recursive_prefix, + ce->name, &displaypath_sb); + else + displaypath = ce->name; + + if (suc->update.type == SM_UPDATE_NONE + || (suc->update.type == SM_UPDATE_UNSPECIFIED + && sub->update_strategy.type == SM_UPDATE_NONE)) { + strbuf_addf(out, _("Skipping submodule '%s'"), displaypath); + strbuf_addch(out, '\n'); + goto cleanup; + } + + /* + * Looking up the url in .git/config. + * We must not fall back to .gitmodules as we only want + * to process configured submodules. + */ + strbuf_reset(&sb); + strbuf_addf(&sb, "submodule.%s.url", sub->name); + git_config_get_string(sb.buf, &url); + if (!url) { + /* + * Only mention uninitialized submodules when their + * path have been specified + */ + if (suc->warn_if_uninitialized) { + strbuf_addf(out, + _("Submodule path '%s' not initialized"), + displaypath); + strbuf_addch(out, '\n'); + strbuf_addstr(out, + _("Maybe you want to use 'update --init'?")); + strbuf_addch(out, '\n'); + } + goto cleanup; + } + + strbuf_reset(&sb); + strbuf_addf(&sb, "%s/.git", ce->name); + needs_cloning = !file_exists(sb.buf); + + strbuf_reset(&sb); + strbuf_addf(&sb, "%06o %s %d %d\t%s\n", ce->ce_mode, + sha1_to_hex(ce->sha1), ce_stage(ce), + needs_cloning, ce->name); + string_list_append(&suc->projectlines, sb.buf); + + if (!needs_cloning) + goto cleanup; + + child->git_cmd = 1; + child->no_stdin = 1; + child->stdout_to_stderr = 1; + child->err = -1; + argv_array_push(&child->args, "submodule--helper"); + argv_array_push(&child->args, "clone"); + if (suc->quiet) + argv_array_push(&child->args, "--quiet"); + if (suc->prefix) + argv_array_pushl(&child->args, "--prefix", suc->prefix, NULL); + argv_array_pushl(&child->args, "--path", sub->path, NULL); + argv_array_pushl(&child->args, "--name", sub->name, NULL); + argv_array_pushl(&child->args, "--url", url, NULL); + if (suc->reference) + argv_array_push(&child->args, suc->reference); + if (suc->depth) + argv_array_push(&child->args, suc->depth); + +cleanup: + free(url); + strbuf_reset(&displaypath_sb); + strbuf_reset(&sb); + + return needs_cloning; +} + +static int update_clone_get_next_task(struct child_process *child, + struct strbuf *err, + void *suc_cb, + void **void_task_cb) +{ + struct submodule_update_clone *suc = suc_cb; + + for (; suc->current < suc->list.nr; suc->current++) { + const struct cache_entry *ce = suc->list.entries[suc->current]; + if (prepare_to_clone_next_submodule(ce, child, suc, err)) { + suc->current++; + return 1; + } + } + return 0; +} + +static int update_clone_start_failure(struct strbuf *err, + void *suc_cb, + void *void_task_cb) +{ + struct submodule_update_clone *suc = suc_cb; + suc->quickstop = 1; + return 1; +} + +static int update_clone_task_finished(int result, + struct strbuf *err, + void *suc_cb, + void *void_task_cb) +{ + struct submodule_update_clone *suc = suc_cb; + + if (!result) + return 0; + + suc->quickstop = 1; + return 1; +} + +static int update_clone(int argc, const char **argv, const char *prefix) +{ + const char *update = NULL; + struct string_list_item *item; + struct pathspec pathspec; + struct submodule_update_clone suc = SUBMODULE_UPDATE_CLONE_INIT; + + struct option module_update_clone_options[] = { + OPT_STRING(0, "prefix", &prefix, + N_("path"), + N_("path into the working tree")), + OPT_STRING(0, "recursive-prefix", &suc.recursive_prefix, + N_("path"), + N_("path into the working tree, across nested " + "submodule boundaries")), + OPT_STRING(0, "update", &update, + N_("string"), + N_("rebase, merge, checkout or none")), + OPT_STRING(0, "reference", &suc.reference, N_("repo"), + N_("reference repository")), + OPT_STRING(0, "depth", &suc.depth, "", + N_("Create a shallow clone truncated to the " + "specified number of revisions")), + OPT__QUIET(&suc.quiet, N_("don't print cloning progress")), + OPT_END() + }; + + const char *const git_submodule_helper_usage[] = { + N_("git submodule--helper update_clone [--prefix=] [...]"), + NULL + }; + suc.prefix = prefix; + + argc = parse_options(argc, argv, prefix, module_update_clone_options, + git_submodule_helper_usage, 0); + + if (update) + if (parse_submodule_update_strategy(update, &suc.update) < 0) + die(_("bad value for update parameter")); + + if (module_list_compute(argc, argv, prefix, &pathspec, &suc.list) < 0) + return 1; + + if (pathspec.nr) + suc.warn_if_uninitialized = 1; + + /* Overlay the parsed .gitmodules file with .git/config */ + gitmodules_config(); + git_config(submodule_config, NULL); + + run_processes_parallel(1, + update_clone_get_next_task, + update_clone_start_failure, + update_clone_task_finished, + &suc); + + /* + * We saved the output and put it out all at once now. + * That means: + * - the listener does not have to interleave their (checkout) + * work with our fetching. The writes involved in a + * checkout involve more straightforward sequential I/O. + * - the listener can avoid doing any work if fetching failed. + */ + if (suc.quickstop) + return 1; + + for_each_string_list_item(item, &suc.projectlines) + utf8_fprintf(stdout, "%s", item->string); + + return 0; +} + struct cmd_struct { const char *cmd; int (*fn)(int, const char **, const char *); @@ -264,6 +509,7 @@ static struct cmd_struct commands[] = { {"list", module_list}, {"name", module_name}, {"clone", module_clone}, + {"update-clone", update_clone} }; int cmd_submodule__helper(int argc, const char **argv, const char *prefix) diff --git a/git-submodule.sh b/git-submodule.sh index 9ee86d4..a6a82d2 100755 --- a/git-submodule.sh +++ b/git-submodule.sh @@ -664,17 +664,20 @@ cmd_update() cmd_init "--" "$@" || return fi - cloned_modules= - git submodule--helper list --prefix "$wt_prefix" "$@" | { + { + git submodule--helper update-clone ${GIT_QUIET:+--quiet} \ + ${wt_prefix:+--prefix "$wt_prefix"} \ + ${prefix:+--recursive-prefix "$prefix"} \ + ${update:+--update "$update"} \ + ${reference:+--reference "$reference"} \ + ${depth:+--depth "$depth"} \ + "$@" || echo "#unmatched" + } | { err= - while read mode sha1 stage sm_path + while read mode sha1 stage just_cloned sm_path do die_if_unmatched "$mode" - if test "$stage" = U - then - echo >&2 "Skipping unmerged submodule $prefix$sm_path" - continue - fi + name=$(git submodule--helper name "$sm_path") || exit url=$(git config submodule."$name".url) branch=$(get_submodule_config "$name" branch master) @@ -691,27 +694,10 @@ cmd_update() displaypath=$(relative_path "$prefix$sm_path") - if test "$update_module" = "none" - then - echo >&2 "Skipping submodule '$displaypath'" - continue - fi - - if test -z "$url" - then - # Only mention uninitialized submodules when its - # path have been specified - test "$#" != "0" && - say >&2 "$(eval_gettext "Submodule path '\$displaypath' not initialized -Maybe you want to use 'update --init'?")" - continue - fi - - if ! test -d "$sm_path"/.git && ! test -f "$sm_path"/.git + if test $just_cloned -eq 1 then - git submodule--helper clone ${GIT_QUIET:+--quiet} --prefix "$prefix" --path "$sm_path" --name "$name" --url "$url" "$reference" "$depth" || exit - cloned_modules="$cloned_modules;$name" subsha1= + update_module=checkout else subsha1=$(clear_local_git_env; cd "$sm_path" && git rev-parse --verify HEAD) || @@ -751,13 +737,6 @@ Maybe you want to use 'update --init'?")" die "$(eval_gettext "Unable to fetch in submodule path '\$displaypath'")" fi - # Is this something we just cloned? - case ";$cloned_modules;" in - *";$name;"*) - # then there is no local change to integrate - update_module=checkout ;; - esac - must_die_on_failure= case "$update_module" in checkout) -- cgit v0.10.2-6-g49f6 From cdc04b65b4bc8094e082ac65a7dce75a7a990163 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Mon, 29 Feb 2016 18:07:18 -0800 Subject: submodule helper: remove double 'fatal: ' prefix The prefix is added by die(...), so we don't have to do it. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index fd2b168..9e8109e 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -516,13 +516,13 @@ int cmd_submodule__helper(int argc, const char **argv, const char *prefix) { int i; if (argc < 2) - die(_("fatal: submodule--helper subcommand must be " + die(_("submodule--helper subcommand must be " "called with a subcommand")); for (i = 0; i < ARRAY_SIZE(commands); i++) if (!strcmp(argv[1], commands[i].cmd)) return commands[i].fn(argc - 1, argv + 1, prefix); - die(_("fatal: '%s' is not a valid submodule--helper " + die(_("'%s' is not a valid submodule--helper " "subcommand"), argv[1]); } -- cgit v0.10.2-6-g49f6 From 2335b870fa7942d9ca9e73e96e35171fda741376 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Mon, 29 Feb 2016 18:07:19 -0800 Subject: submodule update: expose parallelism to the user Expose possible parallelism either via the "--jobs" CLI parameter or the "submodule.fetchJobs" setting. By having the variable initialized to -1, we make sure 0 can be passed into the parallel processing machine, which will then pick as many parallel workers as there are CPUs. Reviewed-by: Jonathan Nieder Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/Documentation/git-submodule.txt b/Documentation/git-submodule.txt index 1572f05..13adebf 100644 --- a/Documentation/git-submodule.txt +++ b/Documentation/git-submodule.txt @@ -16,7 +16,7 @@ SYNOPSIS 'git submodule' [--quiet] deinit [-f|--force] [--] ... 'git submodule' [--quiet] update [--init] [--remote] [-N|--no-fetch] [-f|--force] [--rebase|--merge] [--reference ] - [--depth ] [--recursive] [--] [...] + [--depth ] [--recursive] [--jobs ] [--] [...] 'git submodule' [--quiet] summary [--cached|--files] [(-n|--summary-limit) ] [commit] [--] [...] 'git submodule' [--quiet] foreach [--recursive] @@ -377,6 +377,11 @@ for linkgit:git-clone[1]'s `--reference` and `--shared` options carefully. clone with a history truncated to the specified number of revisions. See linkgit:git-clone[1] +-j :: +--jobs :: + This option is only valid for the update command. + Clone new submodules in parallel with as many jobs. + Defaults to the `submodule.fetchJobs` option. ...:: Paths to submodule(s). When specified this will restrict the command diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 9e8109e..a484945 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -430,6 +430,7 @@ static int update_clone_task_finished(int result, static int update_clone(int argc, const char **argv, const char *prefix) { const char *update = NULL; + int max_jobs = -1; struct string_list_item *item; struct pathspec pathspec; struct submodule_update_clone suc = SUBMODULE_UPDATE_CLONE_INIT; @@ -450,6 +451,8 @@ static int update_clone(int argc, const char **argv, const char *prefix) OPT_STRING(0, "depth", &suc.depth, "", N_("Create a shallow clone truncated to the " "specified number of revisions")), + OPT_INTEGER('j', "jobs", &max_jobs, + N_("parallel jobs")), OPT__QUIET(&suc.quiet, N_("don't print cloning progress")), OPT_END() }; @@ -477,7 +480,10 @@ static int update_clone(int argc, const char **argv, const char *prefix) gitmodules_config(); git_config(submodule_config, NULL); - run_processes_parallel(1, + if (max_jobs < 0) + max_jobs = parallel_submodules(); + + run_processes_parallel(max_jobs, update_clone_get_next_task, update_clone_start_failure, update_clone_task_finished, diff --git a/git-submodule.sh b/git-submodule.sh index a6a82d2..86018ee 100755 --- a/git-submodule.sh +++ b/git-submodule.sh @@ -645,6 +645,14 @@ cmd_update() --depth=*) depth=$1 ;; + -j|--jobs) + case "$2" in '') usage ;; esac + jobs="--jobs=$2" + shift + ;; + --jobs=*) + jobs=$1 + ;; --) shift break @@ -671,6 +679,7 @@ cmd_update() ${update:+--update "$update"} \ ${reference:+--reference "$reference"} \ ${depth:+--depth "$depth"} \ + ${jobs:+$jobs} \ "$@" || echo "#unmatched" } | { err= diff --git a/t/t7406-submodule-update.sh b/t/t7406-submodule-update.sh index dda3929..7fd5142 100755 --- a/t/t7406-submodule-update.sh +++ b/t/t7406-submodule-update.sh @@ -774,4 +774,16 @@ test_expect_success 'submodule update --recursive drops module name before recur test_i18ngrep "Submodule path .deeper/submodule/subsubmodule.: checked out" actual ) ' + +test_expect_success 'submodule update can be run in parallel' ' + (cd super2 && + GIT_TRACE=$(pwd)/trace.out git submodule update --jobs 7 && + grep "7 tasks" trace.out && + git config submodule.fetchJobs 8 && + GIT_TRACE=$(pwd)/trace.out git submodule update && + grep "8 tasks" trace.out && + GIT_TRACE=$(pwd)/trace.out git submodule update --jobs 9 && + grep "9 tasks" trace.out + ) +' test_done -- cgit v0.10.2-6-g49f6 From 72290d6a1d8cccd22f58c21418c0f75bfcdf574d Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Mon, 29 Feb 2016 18:07:20 -0800 Subject: clone: allow an explicit argument for parallel submodule clones Just pass it along to "git submodule update", which may pick reasonable defaults if you don't specify an explicit number. Reviewed-by: Jonathan Nieder Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/Documentation/git-clone.txt b/Documentation/git-clone.txt index 6bf000d..6db7b6d 100644 --- a/Documentation/git-clone.txt +++ b/Documentation/git-clone.txt @@ -14,7 +14,7 @@ SYNOPSIS [-o ] [-b ] [-u ] [--reference ] [--dissociate] [--separate-git-dir ] [--depth ] [--[no-]single-branch] - [--recursive | --recurse-submodules] [--] + [--recursive | --recurse-submodules] [--jobs ] [--] [] DESCRIPTION @@ -221,6 +221,10 @@ objects from the source repository into a pack in the cloned repository. The result is Git repository can be separated from working tree. +-j :: +--jobs :: + The number of submodules fetched at the same time. + Defaults to the `submodule.fetchJobs` option. :: The (possibly remote) repository to clone from. See the diff --git a/builtin/clone.c b/builtin/clone.c index a0b3cd9..b004fb4 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -50,6 +50,7 @@ static int option_progress = -1; static struct string_list option_config; static struct string_list option_reference; static int option_dissociate; +static int max_jobs = -1; static struct option builtin_clone_options[] = { OPT__VERBOSITY(&option_verbosity), @@ -72,6 +73,8 @@ static struct option builtin_clone_options[] = { N_("initialize submodules in the clone")), OPT_BOOL(0, "recurse-submodules", &option_recursive, N_("initialize submodules in the clone")), + OPT_INTEGER('j', "jobs", &max_jobs, + N_("number of submodules cloned in parallel")), OPT_STRING(0, "template", &option_template, N_("template-directory"), N_("directory from which templates will be used")), OPT_STRING_LIST(0, "reference", &option_reference, N_("repo"), @@ -95,10 +98,6 @@ static struct option builtin_clone_options[] = { OPT_END() }; -static const char *argv_submodule[] = { - "submodule", "update", "--init", "--recursive", NULL -}; - static const char *get_repo_path_1(struct strbuf *path, int *is_bundle) { static char *suffix[] = { "/.git", "", ".git/.git", ".git" }; @@ -724,8 +723,16 @@ static int checkout(void) err |= run_hook_le(NULL, "post-checkout", sha1_to_hex(null_sha1), sha1_to_hex(sha1), "1", NULL); - if (!err && option_recursive) - err = run_command_v_opt(argv_submodule, RUN_GIT_CMD); + if (!err && option_recursive) { + struct argv_array args = ARGV_ARRAY_INIT; + argv_array_pushl(&args, "submodule", "update", "--init", "--recursive", NULL); + + if (max_jobs != -1) + argv_array_pushf(&args, "--jobs=%d", max_jobs); + + err = run_command_v_opt(args.argv, RUN_GIT_CMD); + argv_array_clear(&args); + } return err; } diff --git a/t/t7406-submodule-update.sh b/t/t7406-submodule-update.sh index 7fd5142..090891e 100755 --- a/t/t7406-submodule-update.sh +++ b/t/t7406-submodule-update.sh @@ -786,4 +786,19 @@ test_expect_success 'submodule update can be run in parallel' ' grep "9 tasks" trace.out ) ' + +test_expect_success 'git clone passes the parallel jobs config on to submodules' ' + test_when_finished "rm -rf super4" && + GIT_TRACE=$(pwd)/trace.out git clone --recurse-submodules --jobs 7 . super4 && + grep "7 tasks" trace.out && + rm -rf super4 && + git config --global submodule.fetchJobs 8 && + GIT_TRACE=$(pwd)/trace.out git clone --recurse-submodules . super4 && + grep "8 tasks" trace.out && + rm -rf super4 && + GIT_TRACE=$(pwd)/trace.out git clone --recurse-submodules --jobs 9 . super4 && + grep "9 tasks" trace.out && + rm -rf super4 +' + test_done -- cgit v0.10.2-6-g49f6