From c460c0ecdca46be85f6d9c845f9df7ce0e45c3c2 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 15 May 2014 04:33:26 -0400 Subject: run-command: store an optional argv_array All child_process structs need to point to an argv. For flexibility, we do not mandate the use of a dynamic argv_array. However, because the child_process does not own the memory, this can make memory management with a separate argv_array difficult. For example, if a function calls start_command but not finish_command, the argv memory must persist. The code needs to arrange to clean up the argv_array separately after finish_command runs. As a result, some of our code in this situation just leaks the memory. To help such cases, this patch adds a built-in argv_array to the child_process, which gets cleaned up automatically (both in finish_command and when start_command fails). Callers may use it if they choose, but can continue to use the raw argv if they wish. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/Documentation/technical/api-run-command.txt b/Documentation/technical/api-run-command.txt index 5d7d7f2..69510ae 100644 --- a/Documentation/technical/api-run-command.txt +++ b/Documentation/technical/api-run-command.txt @@ -109,6 +109,13 @@ terminated), of which .argv[0] is the program name to run (usually without a path). If the command to run is a git command, set argv[0] to the command name without the 'git-' prefix and set .git_cmd = 1. +Note that the ownership of the memory pointed to by .argv stays with the +caller, but it should survive until `finish_command` completes. If the +.argv member is NULL, `start_command` will point it at the .args +`argv_array` (so you may use one or the other, but you must use exactly +one). The memory in .args will be cleaned up automatically during +`finish_command` (or during `start_command` when it is unsuccessful). + The members .in, .out, .err are used to redirect stdin, stdout, stderr as follows: diff --git a/run-command.c b/run-command.c index 75abc47..be07d4a 100644 --- a/run-command.c +++ b/run-command.c @@ -279,6 +279,9 @@ int start_command(struct child_process *cmd) int failed_errno; char *str; + if (!cmd->argv) + cmd->argv = cmd->args.argv; + /* * In case of errors we must keep the promise to close FDs * that have been passed in via ->in and ->out. @@ -328,6 +331,7 @@ int start_command(struct child_process *cmd) fail_pipe: error("cannot create %s pipe for %s: %s", str, cmd->argv[0], strerror(failed_errno)); + argv_array_clear(&cmd->args); errno = failed_errno; return -1; } @@ -519,6 +523,7 @@ fail_pipe: close_pair(fderr); else if (cmd->err) close(cmd->err); + argv_array_clear(&cmd->args); errno = failed_errno; return -1; } @@ -543,7 +548,9 @@ fail_pipe: int finish_command(struct child_process *cmd) { - return wait_or_whine(cmd->pid, cmd->argv[0]); + int ret = wait_or_whine(cmd->pid, cmd->argv[0]); + argv_array_clear(&cmd->args); + return ret; } int run_command(struct child_process *cmd) diff --git a/run-command.h b/run-command.h index 3653bfa..ea73de3 100644 --- a/run-command.h +++ b/run-command.h @@ -5,8 +5,11 @@ #include #endif +#include "argv-array.h" + struct child_process { const char **argv; + struct argv_array args; pid_t pid; /* * Using .in, .out, .err: -- cgit v0.10.2-6-g49f6 From 5eb7f7ead8537420fde3eb344640096fc330bc16 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 15 May 2014 04:33:40 -0400 Subject: run_column_filter: use argv_array We currently set up the argv array by hand in a fixed-size stack-local array. Using an argv array is more readable, as it handles buffer allocation us (not to mention makes it obvious we do not overflow the array). However, there's a more subtle benefit, too. We leave the function having run start_command (with the child_process in a static global), and then later run finish_command from another function. That means when we run finish_command, neither column_process.argv nor the memory it points to is valid any longer. Most of the time finish_command does not bother looking at argv, but it may if it encounters an error (e.g., waitpid failure or signal death). This is unusual, which is why nobody has noticed. But by using run-command's built-in argv_array, the memory ownership is handled for us automatically. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/column.c b/column.c index 8d1ce88..1a468de 100644 --- a/column.c +++ b/column.c @@ -370,46 +370,29 @@ static struct child_process column_process; int run_column_filter(int colopts, const struct column_options *opts) { - const char *av[10]; - int ret, ac = 0; - struct strbuf sb_colopt = STRBUF_INIT; - struct strbuf sb_width = STRBUF_INIT; - struct strbuf sb_padding = STRBUF_INIT; + struct argv_array *argv; if (fd_out != -1) return -1; - av[ac++] = "column"; - strbuf_addf(&sb_colopt, "--raw-mode=%d", colopts); - av[ac++] = sb_colopt.buf; - if (opts && opts->width) { - strbuf_addf(&sb_width, "--width=%d", opts->width); - av[ac++] = sb_width.buf; - } - if (opts && opts->indent) { - av[ac++] = "--indent"; - av[ac++] = opts->indent; - } - if (opts && opts->padding) { - strbuf_addf(&sb_padding, "--padding=%d", opts->padding); - av[ac++] = sb_padding.buf; - } - av[ac] = NULL; + memset(&column_process, 0, sizeof(column_process)); + argv = &column_process.args; + + argv_array_push(argv, "column"); + argv_array_pushf(argv, "--raw-mode=%d", colopts); + if (opts && opts->width) + argv_array_pushf(argv, "--width=%d", opts->width); + if (opts && opts->indent) + argv_array_pushf(argv, "--indent=%s", opts->indent); + if (opts && opts->padding) + argv_array_pushf(argv, "--padding=%d", opts->padding); fflush(stdout); - memset(&column_process, 0, sizeof(column_process)); column_process.in = -1; column_process.out = dup(1); column_process.git_cmd = 1; - column_process.argv = av; - - ret = start_command(&column_process); - - strbuf_release(&sb_colopt); - strbuf_release(&sb_width); - strbuf_release(&sb_padding); - if (ret) + if (start_command(&column_process)) return -2; fd_out = dup(1); -- cgit v0.10.2-6-g49f6 From 1823bea10fceb371c7876e598d2413c85890cafc Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 15 May 2014 04:34:09 -0400 Subject: git_connect: use argv_array This avoids magic numbers when we allocate fixed-size argv arrays, and makes it more obvious that we are not overflowing. It is also the first step to fixing a memory leak. When git_connect returns a child_process struct, the argv array in the struct is dynamically allocated, but the individual strings are not (they are either owned elsewhere, or are freed). Later, in finish_connect, we free the array but leave the strings alone. This works for the child_process created by git_connect, but if we use transport_take_over, we may also end up with a child_process created by transport-helper's get_helper. In that case, the strings are freshly allocated, and we would want to free them. However, we have no idea in finish_connect which type we have. By consistently using run-command's internal argv-array, we do not have to worry about this issue at all; finish_command takes care of it for us, and we can drop our manual free entirely. Note that this actually makes the get_helper leak slightly worse; now we are leaking both the strings and the array. But when we adjust it in a future patch, that leak will go away entirely. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/connect.c b/connect.c index a983d06..94a6650 100644 --- a/connect.c +++ b/connect.c @@ -534,22 +534,18 @@ static int git_use_proxy(const char *host) static struct child_process *git_proxy_connect(int fd[2], char *host) { const char *port = STR(DEFAULT_GIT_PORT); - const char **argv; struct child_process *proxy; get_host_and_port(&host, &port); - argv = xmalloc(sizeof(*argv) * 4); - argv[0] = git_proxy_command; - argv[1] = host; - argv[2] = port; - argv[3] = NULL; proxy = xcalloc(1, sizeof(*proxy)); - proxy->argv = argv; + argv_array_push(&proxy->args, git_proxy_command); + argv_array_push(&proxy->args, host); + argv_array_push(&proxy->args, port); proxy->in = -1; proxy->out = -1; if (start_command(proxy)) - die("cannot start proxy %s", argv[0]); + die("cannot start proxy %s", git_proxy_command); fd[0] = proxy->out; /* read from proxy stdout */ fd[1] = proxy->in; /* write to proxy stdin */ return proxy; @@ -663,7 +659,6 @@ struct child_process *git_connect(int fd[2], const char *url, char *hostandport, *path; struct child_process *conn = &no_fork; enum protocol protocol; - const char **arg; struct strbuf cmd = STRBUF_INIT; /* Without this we cannot rely on waitpid() to tell @@ -707,7 +702,6 @@ struct child_process *git_connect(int fd[2], const char *url, sq_quote_buf(&cmd, path); conn->in = conn->out = -1; - conn->argv = arg = xcalloc(7, sizeof(*arg)); if (protocol == PROTO_SSH) { const char *ssh = getenv("GIT_SSH"); int putty = ssh && strcasestr(ssh, "plink"); @@ -718,22 +712,21 @@ struct child_process *git_connect(int fd[2], const char *url, if (!ssh) ssh = "ssh"; - *arg++ = ssh; + argv_array_push(&conn->args, ssh); if (putty && !strcasestr(ssh, "tortoiseplink")) - *arg++ = "-batch"; + argv_array_push(&conn->args, "-batch"); if (port) { /* P is for PuTTY, p is for OpenSSH */ - *arg++ = putty ? "-P" : "-p"; - *arg++ = port; + argv_array_push(&conn->args, putty ? "-P" : "-p"); + argv_array_push(&conn->args, port); } - *arg++ = ssh_host; + argv_array_push(&conn->args, ssh_host); } else { /* remove repo-local variables from the environment */ conn->env = local_repo_env; conn->use_shell = 1; } - *arg++ = cmd.buf; - *arg = NULL; + argv_array_push(&conn->args, cmd.buf); if (start_command(conn)) die("unable to fork"); @@ -759,7 +752,6 @@ int finish_connect(struct child_process *conn) return 0; code = finish_command(conn); - free(conn->argv); free(conn); return code; } -- cgit v0.10.2-6-g49f6 From e0ab2ac6c553cbba5d0275cfd35beb3351cae034 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 15 May 2014 04:34:18 -0400 Subject: get_helper: use run-command's internal argv_array The get_helper functions dynamically allocates an argv_array, feeds it to start_command, and then returns. We then have to later clean up the memory manually after calling finish_command. We can make this simpler by just using run-command's internal argv_array, which handles cleanup for us. This also prevents a memory leak in the case that transport_take_over is used, in which case we free the child in finish_connect, which does not manually free the array. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/transport-helper.c b/transport-helper.c index b468e4f..fefd34f 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -101,7 +101,6 @@ static void do_take_over(struct transport *transport) static struct child_process *get_helper(struct transport *transport) { struct helper_data *data = transport->data; - struct argv_array argv = ARGV_ARRAY_INIT; struct strbuf buf = STRBUF_INIT; struct child_process *helper; const char **refspecs = NULL; @@ -123,10 +122,9 @@ static struct child_process *get_helper(struct transport *transport) helper->in = -1; helper->out = -1; helper->err = 0; - argv_array_pushf(&argv, "git-remote-%s", data->name); - argv_array_push(&argv, transport->remote->name); - argv_array_push(&argv, remove_ext_force(transport->url)); - helper->argv = argv_array_detach(&argv, NULL); + argv_array_pushf(&helper->args, "git-remote-%s", data->name); + argv_array_push(&helper->args, transport->remote->name); + argv_array_push(&helper->args, remove_ext_force(transport->url)); helper->git_cmd = 0; helper->silent_exec_failure = 1; @@ -245,7 +243,6 @@ static int disconnect_helper(struct transport *transport) close(data->helper->out); fclose(data->out); res = finish_command(data->helper); - argv_array_free_detached(data->helper->argv); free(data->helper); data->helper = NULL; } -- cgit v0.10.2-6-g49f6 From 2aeae40a754ed8296df95df263e694ad4eab3a49 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 15 May 2014 04:34:44 -0400 Subject: get_exporter: use argv_array This simplifies the code and avoids a fixed array size that we might accidentally overflow. It also prevents a leak after finish_command is run, by using the argv_array that run-command manages for us. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/transport-helper.c b/transport-helper.c index fefd34f..9f8f3b1 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -418,30 +418,24 @@ static int get_exporter(struct transport *transport, { struct helper_data *data = transport->data; struct child_process *helper = get_helper(transport); - int argc = 0, i; - struct strbuf tmp = STRBUF_INIT; + int i; memset(fastexport, 0, sizeof(*fastexport)); /* we need to duplicate helper->in because we want to use it after * fastexport is done with it. */ fastexport->out = dup(helper->in); - fastexport->argv = xcalloc(6 + revlist_args->nr, sizeof(*fastexport->argv)); - fastexport->argv[argc++] = "fast-export"; - fastexport->argv[argc++] = "--use-done-feature"; - fastexport->argv[argc++] = data->signed_tags ? - "--signed-tags=verbatim" : "--signed-tags=warn-strip"; - if (data->export_marks) { - strbuf_addf(&tmp, "--export-marks=%s.tmp", data->export_marks); - fastexport->argv[argc++] = strbuf_detach(&tmp, NULL); - } - if (data->import_marks) { - strbuf_addf(&tmp, "--import-marks=%s", data->import_marks); - fastexport->argv[argc++] = strbuf_detach(&tmp, NULL); - } + argv_array_push(&fastexport->args, "fast-export"); + argv_array_push(&fastexport->args, "--use-done-feature"); + argv_array_push(&fastexport->args, data->signed_tags ? + "--signed-tags=verbatim" : "--signed-tags=warn-strip"); + if (data->export_marks) + argv_array_pushf(&fastexport->args, "--export-marks=%s.tmp", data->export_marks); + if (data->import_marks) + argv_array_pushf(&fastexport->args, "--import-marks=%s", data->import_marks); for (i = 0; i < revlist_args->nr; i++) - fastexport->argv[argc++] = revlist_args->items[i].string; + argv_array_push(&fastexport->args, revlist_args->items[i].string); fastexport->git_cmd = 1; return start_command(fastexport); -- cgit v0.10.2-6-g49f6 From 173fd1a1a44b89a204eb1289e4ff1f9d733e0cf1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 15 May 2014 04:35:06 -0400 Subject: get_importer: use run-command's internal argv_array This saves a few lines and lets us avoid having to clean up the memory manually when the command finishes. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/transport-helper.c b/transport-helper.c index 9f8f3b1..d9063d7 100644 --- a/transport-helper.c +++ b/transport-helper.c @@ -394,18 +394,16 @@ static int get_importer(struct transport *transport, struct child_process *fasti { struct child_process *helper = get_helper(transport); struct helper_data *data = transport->data; - struct argv_array argv = ARGV_ARRAY_INIT; int cat_blob_fd, code; memset(fastimport, 0, sizeof(*fastimport)); fastimport->in = helper->out; - argv_array_push(&argv, "fast-import"); - argv_array_push(&argv, debug ? "--stats" : "--quiet"); + argv_array_push(&fastimport->args, "fast-import"); + argv_array_push(&fastimport->args, debug ? "--stats" : "--quiet"); if (data->bidi_import) { cat_blob_fd = xdup(helper->in); - argv_array_pushf(&argv, "--cat-blob-fd=%d", cat_blob_fd); + argv_array_pushf(&fastimport->args, "--cat-blob-fd=%d", cat_blob_fd); } - fastimport->argv = argv.argv; fastimport->git_cmd = 1; code = start_command(fastimport); @@ -476,7 +474,6 @@ static int fetch_with_import(struct transport *transport, if (finish_command(&fastimport)) die("Error while running fast-import"); - argv_array_free_detached(fastimport.argv); /* * The fast-import stream of a remote helper that advertises -- cgit v0.10.2-6-g49f6 From ff857e4ee8680af3988aff3383b1158f396a6fb2 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 15 May 2014 04:41:03 -0400 Subject: argv-array: drop "detach" code The argv_array_detach function (and associated free() function) was really only useful for transferring ownership of the memory to a "struct child_process". Now that we have an internal argv_array in that struct, there are no callers left. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/Documentation/technical/api-argv-array.txt b/Documentation/technical/api-argv-array.txt index a6b7d83..1a79781 100644 --- a/Documentation/technical/api-argv-array.txt +++ b/Documentation/technical/api-argv-array.txt @@ -53,11 +53,3 @@ Functions `argv_array_clear`:: Free all memory associated with the array and return it to the initial, empty state. - -`argv_array_detach`:: - Detach the argv array from the `struct argv_array`, transferring - ownership of the allocated array and strings. - -`argv_array_free_detached`:: - Free the memory allocated by a `struct argv_array` that was later - detached and is now no longer needed. diff --git a/argv-array.c b/argv-array.c index 9e960d5..256741d 100644 --- a/argv-array.c +++ b/argv-array.c @@ -68,23 +68,3 @@ void argv_array_clear(struct argv_array *array) } argv_array_init(array); } - -const char **argv_array_detach(struct argv_array *array, int *argc) -{ - const char **argv = - array->argv == empty_argv || array->argc == 0 ? NULL : array->argv; - if (argc) - *argc = array->argc; - argv_array_init(array); - return argv; -} - -void argv_array_free_detached(const char **argv) -{ - if (argv) { - int i; - for (i = 0; argv[i]; i++) - free((char **)argv[i]); - free(argv); - } -} diff --git a/argv-array.h b/argv-array.h index 85ba438..c65e6e8 100644 --- a/argv-array.h +++ b/argv-array.h @@ -19,7 +19,5 @@ LAST_ARG_MUST_BE_NULL void argv_array_pushl(struct argv_array *, ...); void argv_array_pop(struct argv_array *); void argv_array_clear(struct argv_array *); -const char **argv_array_detach(struct argv_array *array, int *argc); -void argv_array_free_detached(const char **argv); #endif /* ARGV_ARRAY_H */ -- cgit v0.10.2-6-g49f6