From 163ed566db7fd0f286413040e368324a59c642f9 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 13 Sep 2011 17:57:34 -0400 Subject: add sha1_array API docs This API was introduced in 902bb36, but never documented. Let's be nice to future users of the code. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/Documentation/technical/api-sha1-array.txt b/Documentation/technical/api-sha1-array.txt new file mode 100644 index 0000000..4a4bae8 --- /dev/null +++ b/Documentation/technical/api-sha1-array.txt @@ -0,0 +1,79 @@ +sha1-array API +============== + +The sha1-array API provides storage and manipulation of sets of SHA1 +identifiers. The emphasis is on storage and processing efficiency, +making them suitable for large lists. Note that the ordering of items is +not preserved over some operations. + +Data Structures +--------------- + +`struct sha1_array`:: + + A single array of SHA1 hashes. This should be initialized by + assignment from `SHA1_ARRAY_INIT`. The `sha1` member contains + the actual data. The `nr` member contains the number of items in + the set. The `alloc` and `sorted` members are used internally, + and should not be needed by API callers. + +Functions +--------- + +`sha1_array_append`:: + Add an item to the set. The sha1 will be placed at the end of + the array (but note that some operations below may lose this + ordering). + +`sha1_array_sort`:: + Sort the elements in the array. + +`sha1_array_lookup`:: + Perform a binary search of the array for a specific sha1. + If found, returns the offset (in number of elements) of the + sha1. If not found, returns a negative integer. If the array is + not sorted, this function has the side effect of sorting it. + +`sha1_array_clear`:: + Free all memory associated with the array and return it to the + initial, empty state. + +`sha1_array_for_each_unique`:: + Efficiently iterate over each unique element of the list, + executing the callback function for each one. If the array is + not sorted, this function has the side effect of sorting it. + +Examples +-------- + +----------------------------------------- +void print_callback(const unsigned char sha1[20], + void *data) +{ + printf("%s\n", sha1_to_hex(sha1)); +} + +void some_func(void) +{ + struct sha1_array hashes = SHA1_ARRAY_INIT; + unsigned char sha1[20]; + + /* Read objects into our set */ + while (read_object_from_stdin(sha1)) + sha1_array_append(&hashes, sha1); + + /* Check if some objects are in our set */ + while (read_object_from_stdin(sha1)) { + if (sha1_array_lookup(&hashes, sha1) >= 0) + printf("it's in there!\n"); + + /* + * Print the unique set of objects. We could also have + * avoided adding duplicate objects in the first place, + * but we would end up re-sorting the array repeatedly. + * Instead, this will sort once and then skip duplicates + * in linear time. + */ + sha1_array_for_each_unique(&hashes, print_callback, NULL); +} +----------------------------------------- -- cgit v0.10.2-6-g49f6 From 7878b07c0d86d05fa505f2464557c69addcc2c05 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 13 Sep 2011 17:57:47 -0400 Subject: quote.h: fix bogus comment Commit 758e915 made sq_quote_next static, removing it from quote.h. However, it forgot to update the related comment, making it appear as a confusing description of sq_quote_to_argv. Let's remove the crufty bits, and elaborate more on sq_quote_to_argv. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/quote.h b/quote.h index 024e21d..252b0df 100644 --- a/quote.h +++ b/quote.h @@ -40,9 +40,8 @@ extern char *sq_dequote(char *); /* * Same as the above, but can be used to unwrap many arguments in the - * same string separated by space. "next" is changed to point to the - * next argument that should be passed as first parameter. When there - * is no more argument to be dequoted, "next" is updated to point to NULL. + * same string separated by space. Like sq_quote, it works in place, + * modifying arg and appending pointers into it to argv. */ extern int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc); -- cgit v0.10.2-6-g49f6 From c1189caeaf726e6c16c8bca7da8bf0b243da478d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 13 Sep 2011 17:57:57 -0400 Subject: refactor argv_array into generic code The submodule code recently grew generic code to build a dynamic argv array. Many other parts of the code can reuse this, too, so let's make it generically available. There are two enhancements not found in the original code: 1. We now handle the NULL-termination invariant properly, even when no strings have been pushed (before, you could have an empty, NULL argv). This was not a problem for the submodule code, which always pushed at least one argument, but was not sufficiently safe for generic code. 2. There is a formatted variant of the "push" function. This is a convenience function which was not needed by the submodule code, but will make it easier to port other users to the new code. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/Documentation/technical/api-argv-array.txt b/Documentation/technical/api-argv-array.txt new file mode 100644 index 0000000..49b3d52 --- /dev/null +++ b/Documentation/technical/api-argv-array.txt @@ -0,0 +1,46 @@ +argv-array API +============== + +The argv-array API allows one to dynamically build and store +NULL-terminated lists. An argv-array maintains the invariant that the +`argv` member always points to a non-NULL array, and that the array is +always NULL-terminated at the element pointed to by `argv[argc]`. This +makes the result suitable for passing to functions expecting to receive +argv from main(), or the link:api-run-command.html[run-command API]. + +The link:api-string-list.html[string-list API] is similar, but cannot be +used for these purposes; instead of storing a straight string pointer, +it contains an item structure with a `util` field that is not compatible +with the traditional argv interface. + +Each `argv_array` manages its own memory. Any strings pushed into the +array are duplicated, and all memory is freed by argv_array_clear(). + +Data Structures +--------------- + +`struct argv_array`:: + + A single array. This should be initialized by assignment from + `ARGV_ARRAY_INIT`, or by calling `argv_array_init`. The `argv` + member contains the actual array; the `argc` member contains the + number of elements in the array, not including the terminating + NULL. + +Functions +--------- + +`argv_array_init`:: + Initialize an array. This is no different than assigning from + `ARGV_ARRAY_INIT`. + +`argv_array_push`:: + Push a copy of a string onto the end of the array. + +`argv_array_pushf`:: + Format a string and push it onto the end of the array. This is a + convenience wrapper combining `strbuf_addf` and `argv_array_push`. + +`argv_array_clear`:: + Free all memory associated with the array and return it to the + initial, empty state. diff --git a/Makefile b/Makefile index e40ac0c..ec7d06c 100644 --- a/Makefile +++ b/Makefile @@ -497,6 +497,7 @@ VCSSVN_LIB=vcs-svn/lib.a LIB_H += advice.h LIB_H += archive.h +LIB_H += argv-array.h LIB_H += attr.h LIB_H += blob.h LIB_H += builtin.h @@ -575,6 +576,7 @@ LIB_OBJS += alloc.o LIB_OBJS += archive.o LIB_OBJS += archive-tar.o LIB_OBJS += archive-zip.o +LIB_OBJS += argv-array.o LIB_OBJS += attr.o LIB_OBJS += base85.o LIB_OBJS += bisect.o diff --git a/argv-array.c b/argv-array.c new file mode 100644 index 0000000..a4e0420 --- /dev/null +++ b/argv-array.c @@ -0,0 +1,51 @@ +#include "cache.h" +#include "argv-array.h" +#include "strbuf.h" + +static const char *empty_argv_storage = NULL; +const char **empty_argv = &empty_argv_storage; + +void argv_array_init(struct argv_array *array) +{ + array->argv = empty_argv; + array->argc = 0; + array->alloc = 0; +} + +static void argv_array_push_nodup(struct argv_array *array, const char *value) +{ + if (array->argv == empty_argv) + array->argv = NULL; + + ALLOC_GROW(array->argv, array->argc + 2, array->alloc); + array->argv[array->argc++] = value; + array->argv[array->argc] = NULL; +} + +void argv_array_push(struct argv_array *array, const char *value) +{ + argv_array_push_nodup(array, xstrdup(value)); +} + +void argv_array_pushf(struct argv_array *array, const char *fmt, ...) +{ + va_list ap; + struct strbuf v = STRBUF_INIT; + + va_start(ap, fmt); + strbuf_vaddf(&v, fmt, ap); + va_end(ap); + + argv_array_push_nodup(array, strbuf_detach(&v, NULL)); +} + +void argv_array_clear(struct argv_array *array) +{ + if (array->argv != empty_argv) { + int i; + for (i = 0; i < array->argc; i++) + free((char **)array->argv[i]); + free(array->argv); + } + argv_array_init(array); +} diff --git a/argv-array.h b/argv-array.h new file mode 100644 index 0000000..74dd2b1 --- /dev/null +++ b/argv-array.h @@ -0,0 +1,20 @@ +#ifndef ARGV_ARRAY_H +#define ARGV_ARRAY_H + +extern const char **empty_argv; + +struct argv_array { + const char **argv; + int argc; + int alloc; +}; + +#define ARGV_ARRAY_INIT { empty_argv, 0, 0 } + +void argv_array_init(struct argv_array *); +void argv_array_push(struct argv_array *, const char *); +__attribute__((format (printf,2,3))) +void argv_array_pushf(struct argv_array *, const char *fmt, ...); +void argv_array_clear(struct argv_array *); + +#endif /* ARGV_ARRAY_H */ diff --git a/submodule.c b/submodule.c index 9431c42..6306737 100644 --- a/submodule.c +++ b/submodule.c @@ -9,6 +9,7 @@ #include "refs.h" #include "string-list.h" #include "sha1-array.h" +#include "argv-array.h" static struct string_list config_name_for_path; static struct string_list config_fetch_recurse_submodules_for_name; @@ -388,52 +389,22 @@ void check_for_new_submodule_commits(unsigned char new_sha1[20]) sha1_array_append(&ref_tips_after_fetch, new_sha1); } -struct argv_array { - const char **argv; - unsigned int argc; - unsigned int alloc; -}; - -static void init_argv(struct argv_array *array) -{ - array->argv = NULL; - array->argc = 0; - array->alloc = 0; -} - -static void push_argv(struct argv_array *array, const char *value) -{ - ALLOC_GROW(array->argv, array->argc + 2, array->alloc); - array->argv[array->argc++] = xstrdup(value); - array->argv[array->argc] = NULL; -} - -static void clear_argv(struct argv_array *array) -{ - int i; - for (i = 0; i < array->argc; i++) - free((char **)array->argv[i]); - free(array->argv); - init_argv(array); -} - static void add_sha1_to_argv(const unsigned char sha1[20], void *data) { - push_argv(data, sha1_to_hex(sha1)); + argv_array_push(data, sha1_to_hex(sha1)); } static void calculate_changed_submodule_paths(void) { struct rev_info rev; struct commit *commit; - struct argv_array argv; + struct argv_array argv = ARGV_ARRAY_INIT; init_revisions(&rev, NULL); - init_argv(&argv); - push_argv(&argv, "--"); /* argv[0] program name */ + argv_array_push(&argv, "--"); /* argv[0] program name */ sha1_array_for_each_unique(&ref_tips_after_fetch, add_sha1_to_argv, &argv); - push_argv(&argv, "--not"); + argv_array_push(&argv, "--not"); sha1_array_for_each_unique(&ref_tips_before_fetch, add_sha1_to_argv, &argv); setup_revisions(argv.argc, argv.argv, &rev, NULL); @@ -460,7 +431,7 @@ static void calculate_changed_submodule_paths(void) } } - clear_argv(&argv); + argv_array_clear(&argv); sha1_array_clear(&ref_tips_before_fetch); sha1_array_clear(&ref_tips_after_fetch); initialized_fetch_ref_tips = 0; -- cgit v0.10.2-6-g49f6 From 37e8161a04360d01edd0611135f5eae8e6a08224 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 13 Sep 2011 17:58:08 -0400 Subject: quote: provide sq_dequote_to_argv_array This is similar to sq_dequote_to_argv, but more convenient if you have an argv_array. It's tempting to just feed the components of the argv_array to sq_dequote_to_argv instead, but: 1. It wouldn't maintain the NULL-termination invariant of argv_array. 2. It doesn't match the memory ownership policy of argv_array (in which each component is free-able, not a pointer into a separate buffer). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/quote.c b/quote.c index 63d3b01..87bc65e 100644 --- a/quote.c +++ b/quote.c @@ -1,5 +1,6 @@ #include "cache.h" #include "quote.h" +#include "argv-array.h" int quote_path_fully = 1; @@ -120,7 +121,9 @@ char *sq_dequote(char *arg) return sq_dequote_step(arg, NULL); } -int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc) +static int sq_dequote_to_argv_internal(char *arg, + const char ***argv, int *nr, int *alloc, + struct argv_array *array) { char *next = arg; @@ -130,13 +133,27 @@ int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc) char *dequoted = sq_dequote_step(next, &next); if (!dequoted) return -1; - ALLOC_GROW(*argv, *nr + 1, *alloc); - (*argv)[(*nr)++] = dequoted; + if (argv) { + ALLOC_GROW(*argv, *nr + 1, *alloc); + (*argv)[(*nr)++] = dequoted; + } + if (array) + argv_array_push(array, dequoted); } while (next); return 0; } +int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc) +{ + return sq_dequote_to_argv_internal(arg, argv, nr, alloc, NULL); +} + +int sq_dequote_to_argv_array(char *arg, struct argv_array *array) +{ + return sq_dequote_to_argv_internal(arg, NULL, NULL, NULL, array); +} + /* 1 means: quote as octal * 0 means: quote as octal if (quote_path_fully) * -1 means: never quote diff --git a/quote.h b/quote.h index 252b0df..133155a 100644 --- a/quote.h +++ b/quote.h @@ -45,6 +45,14 @@ extern char *sq_dequote(char *); */ extern int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc); +/* + * Same as above, but store the unquoted strings in an argv_array. We will + * still modify arg in place, but unlike sq_dequote_to_argv, the argv_array + * will duplicate and take ownership of the strings. + */ +struct argv_array; +extern int sq_dequote_to_argv_array(char *arg, struct argv_array *); + extern int unquote_c_style(struct strbuf *, const char *quoted, const char **endp); extern size_t quote_c_style(const char *name, struct strbuf *, FILE *, int no_dq); extern void quote_two_c_style(struct strbuf *, const char *, const char *, int); -- cgit v0.10.2-6-g49f6 From 8a534b612410b9746ef33af9e4631fc18bd77621 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 13 Sep 2011 17:58:14 -0400 Subject: bisect: use argv_array API Now that the argv_array API exists, we can save some lines of code. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/bisect.c b/bisect.c index dd7e8ed..ef92871 100644 --- a/bisect.c +++ b/bisect.c @@ -10,18 +10,13 @@ #include "log-tree.h" #include "bisect.h" #include "sha1-array.h" +#include "argv-array.h" static struct sha1_array good_revs; static struct sha1_array skipped_revs; static const unsigned char *current_bad_sha1; -struct argv_array { - const char **argv; - int argv_nr; - int argv_alloc; -}; - static const char *argv_checkout[] = {"checkout", "-q", NULL, "--", NULL}; static const char *argv_show_branch[] = {"show-branch", NULL, NULL}; @@ -404,21 +399,6 @@ struct commit_list *find_bisection(struct commit_list *list, return best; } -static void argv_array_push(struct argv_array *array, const char *string) -{ - ALLOC_GROW(array->argv, array->argv_nr + 1, array->argv_alloc); - array->argv[array->argv_nr++] = string; -} - -static void argv_array_push_sha1(struct argv_array *array, - const unsigned char *sha1, - const char *format) -{ - struct strbuf buf = STRBUF_INIT; - strbuf_addf(&buf, format, sha1_to_hex(sha1)); - argv_array_push(array, strbuf_detach(&buf, NULL)); -} - static int register_ref(const char *refname, const unsigned char *sha1, int flags, void *cb_data) { @@ -448,16 +428,10 @@ static void read_bisect_paths(struct argv_array *array) die_errno("Could not open file '%s'", filename); while (strbuf_getline(&str, fp, '\n') != EOF) { - char *quoted; - int res; - strbuf_trim(&str); - quoted = strbuf_detach(&str, NULL); - res = sq_dequote_to_argv(quoted, &array->argv, - &array->argv_nr, &array->argv_alloc); - if (res) + if (sq_dequote_to_argv_array(str.buf, array)) die("Badly quoted content in file '%s': %s", - filename, quoted); + filename, str.buf); } strbuf_release(&str); @@ -622,7 +596,7 @@ static void bisect_rev_setup(struct rev_info *revs, const char *prefix, const char *bad_format, const char *good_format, int read_paths) { - struct argv_array rev_argv = { NULL, 0, 0 }; + struct argv_array rev_argv = ARGV_ARRAY_INIT; int i; init_revisions(revs, prefix); @@ -630,17 +604,17 @@ static void bisect_rev_setup(struct rev_info *revs, const char *prefix, revs->commit_format = CMIT_FMT_UNSPECIFIED; /* rev_argv.argv[0] will be ignored by setup_revisions */ - argv_array_push(&rev_argv, xstrdup("bisect_rev_setup")); - argv_array_push_sha1(&rev_argv, current_bad_sha1, bad_format); + argv_array_push(&rev_argv, "bisect_rev_setup"); + argv_array_pushf(&rev_argv, bad_format, sha1_to_hex(current_bad_sha1)); for (i = 0; i < good_revs.nr; i++) - argv_array_push_sha1(&rev_argv, good_revs.sha1[i], - good_format); - argv_array_push(&rev_argv, xstrdup("--")); + argv_array_pushf(&rev_argv, good_format, + sha1_to_hex(good_revs.sha1[i])); + argv_array_push(&rev_argv, "--"); if (read_paths) read_bisect_paths(&rev_argv); - argv_array_push(&rev_argv, NULL); - setup_revisions(rev_argv.argv_nr, rev_argv.argv, revs, NULL); + setup_revisions(rev_argv.argc, rev_argv.argv, revs, NULL); + /* XXX leak rev_argv, as "revs" may still be pointing to it */ } static void bisect_common(struct rev_info *revs) -- cgit v0.10.2-6-g49f6 From 7bf0b01750e975f8a4df925ac75efa7f984bf393 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 13 Sep 2011 17:58:19 -0400 Subject: checkout: use argv_array API We were using a similar ad-hoc rev_list_args structure, but this saves some code. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/checkout.c b/builtin/checkout.c index 28cdc51..bb056e4 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -19,6 +19,7 @@ #include "ll-merge.h" #include "resolve-undo.h" #include "submodule.h" +#include "argv-array.h" static const char * const checkout_usage[] = { "git checkout [options] ", @@ -588,24 +589,12 @@ static void update_refs_for_switch(struct checkout_opts *opts, report_tracking(new); } -struct rev_list_args { - int argc; - int alloc; - const char **argv; -}; - -static void add_one_rev_list_arg(struct rev_list_args *args, const char *s) -{ - ALLOC_GROW(args->argv, args->argc + 1, args->alloc); - args->argv[args->argc++] = s; -} - static int add_one_ref_to_rev_list_arg(const char *refname, const unsigned char *sha1, int flags, void *cb_data) { - add_one_rev_list_arg(cb_data, refname); + argv_array_push(cb_data, refname); return 0; } @@ -684,15 +673,14 @@ static void suggest_reattach(struct commit *commit, struct rev_info *revs) */ static void orphaned_commit_warning(struct commit *commit) { - struct rev_list_args args = { 0, 0, NULL }; + struct argv_array args = ARGV_ARRAY_INIT; struct rev_info revs; - add_one_rev_list_arg(&args, "(internal)"); - add_one_rev_list_arg(&args, sha1_to_hex(commit->object.sha1)); - add_one_rev_list_arg(&args, "--not"); + argv_array_push(&args, "(internal)"); + argv_array_push(&args, sha1_to_hex(commit->object.sha1)); + argv_array_push(&args, "--not"); for_each_ref(add_one_ref_to_rev_list_arg, &args); - add_one_rev_list_arg(&args, "--"); - add_one_rev_list_arg(&args, NULL); + argv_array_push(&args, "--"); init_revisions(&revs, NULL); if (setup_revisions(args.argc - 1, args.argv, &revs, NULL) != 1) @@ -704,6 +692,7 @@ static void orphaned_commit_warning(struct commit *commit) else describe_detached_head(_("Previous HEAD position was"), commit); + argv_array_clear(&args); clear_commit_marks(commit, -1); for_each_ref(clear_commit_marks_from_one_ref, NULL); } -- cgit v0.10.2-6-g49f6 From 5d40a179855a39ae9e8ac22e1874720f2b98a91c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 13 Sep 2011 17:58:25 -0400 Subject: run_hook: use argv_array API This was a pretty straightforward use, so it really doesn't save that many lines. Still, perhaps it's a little bit more readable. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/run-command.c b/run-command.c index 70e8a24..73e013e 100644 --- a/run-command.c +++ b/run-command.c @@ -1,6 +1,7 @@ #include "cache.h" #include "run-command.h" #include "exec_cmd.h" +#include "argv-array.h" static inline void close_pair(int fd[2]) { @@ -609,26 +610,23 @@ int finish_async(struct async *async) int run_hook(const char *index_file, const char *name, ...) { struct child_process hook; - const char **argv = NULL, *env[2]; + struct argv_array argv = ARGV_ARRAY_INIT; + const char *p, *env[2]; char index[PATH_MAX]; va_list args; int ret; - size_t i = 0, alloc = 0; if (access(git_path("hooks/%s", name), X_OK) < 0) return 0; va_start(args, name); - ALLOC_GROW(argv, i + 1, alloc); - argv[i++] = git_path("hooks/%s", name); - while (argv[i-1]) { - ALLOC_GROW(argv, i + 1, alloc); - argv[i++] = va_arg(args, const char *); - } + argv_array_push(&argv, git_path("hooks/%s", name)); + while ((p = va_arg(args, const char *))) + argv_array_push(&argv, p); va_end(args); memset(&hook, 0, sizeof(hook)); - hook.argv = argv; + hook.argv = argv.argv; hook.no_stdin = 1; hook.stdout_to_stderr = 1; if (index_file) { @@ -639,6 +637,6 @@ int run_hook(const char *index_file, const char *name, ...) } ret = run_command(&hook); - free(argv); + argv_array_clear(&argv); return ret; } -- cgit v0.10.2-6-g49f6