From 05b458c104708141d2fad211d79703b3b99cc5a8 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Mon, 12 Dec 2016 10:16:52 -0800 Subject: real_path: resolve symlinks by hand The current implementation of real_path uses chdir() in order to resolve symlinks. Unfortunately this isn't thread-safe as chdir() affects a process as a whole and not just an individual thread. Instead perform the symlink resolution by hand so that the calls to chdir() can be removed, making real_path one step closer to being reentrant. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano diff --git a/abspath.c b/abspath.c index 2825de8..cafcae0 100644 --- a/abspath.c +++ b/abspath.c @@ -11,8 +11,45 @@ int is_directory(const char *path) return (!stat(path, &st) && S_ISDIR(st.st_mode)); } +/* removes the last path component from 'path' except if 'path' is root */ +static void strip_last_component(struct strbuf *path) +{ + size_t offset = offset_1st_component(path->buf); + size_t len = path->len; + + /* Find start of the last component */ + while (offset < len && !is_dir_sep(path->buf[len - 1])) + len--; + /* Skip sequences of multiple path-separators */ + while (offset < len && is_dir_sep(path->buf[len - 1])) + len--; + + strbuf_setlen(path, len); +} + +/* get (and remove) the next component in 'remaining' and place it in 'next' */ +static void get_next_component(struct strbuf *next, struct strbuf *remaining) +{ + char *start = NULL; + char *end = NULL; + + strbuf_reset(next); + + /* look for the next component */ + /* Skip sequences of multiple path-separators */ + for (start = remaining->buf; is_dir_sep(*start); start++) + ; /* nothing */ + /* Find end of the path component */ + for (end = start; *end && !is_dir_sep(*end); end++) + ; /* nothing */ + + strbuf_add(next, start, end - start); + /* remove the component from 'remaining' */ + strbuf_remove(remaining, 0, end - remaining->buf); +} + /* We allow "recursive" symbolic links. Only within reason, though. */ -#define MAXDEPTH 5 +#define MAXSYMLINKS 5 /* * Return the real path (i.e., absolute path, with symlinks resolved @@ -21,7 +58,6 @@ int is_directory(const char *path) * absolute_path().) The return value is a pointer to a static * buffer. * - * The input and all intermediate paths must be shorter than MAX_PATH. * The directory part of path (i.e., everything up to the last * dir_sep) must denote a valid, existing directory, but the last * component need not exist. If die_on_error is set, then die with an @@ -33,22 +69,16 @@ int is_directory(const char *path) */ static const char *real_path_internal(const char *path, int die_on_error) { - static struct strbuf sb = STRBUF_INIT; + static struct strbuf resolved = STRBUF_INIT; + struct strbuf remaining = STRBUF_INIT; + struct strbuf next = STRBUF_INIT; + struct strbuf symlink = STRBUF_INIT; char *retval = NULL; - - /* - * If we have to temporarily chdir(), store the original CWD - * here so that we can chdir() back to it at the end of the - * function: - */ - struct strbuf cwd = STRBUF_INIT; - - int depth = MAXDEPTH; - char *last_elem = NULL; + int num_symlinks = 0; struct stat st; /* We've already done it */ - if (path == sb.buf) + if (path == resolved.buf) return path; if (!*path) { @@ -58,74 +88,112 @@ static const char *real_path_internal(const char *path, int die_on_error) goto error_out; } - strbuf_reset(&sb); - strbuf_addstr(&sb, path); - - while (depth--) { - if (!is_directory(sb.buf)) { - char *last_slash = find_last_dir_sep(sb.buf); - if (last_slash) { - last_elem = xstrdup(last_slash + 1); - strbuf_setlen(&sb, last_slash - sb.buf + 1); - } else { - last_elem = xmemdupz(sb.buf, sb.len); - strbuf_reset(&sb); - } + strbuf_reset(&resolved); + + if (is_absolute_path(path)) { + /* absolute path; start with only root as being resolved */ + int offset = offset_1st_component(path); + strbuf_add(&resolved, path, offset); + strbuf_addstr(&remaining, path + offset); + } else { + /* relative path; can use CWD as the initial resolved path */ + if (strbuf_getcwd(&resolved)) { + if (die_on_error) + die_errno("unable to get current working directory"); + else + goto error_out; } + strbuf_addstr(&remaining, path); + } - if (sb.len) { - if (!cwd.len && strbuf_getcwd(&cwd)) { + /* Iterate over the remaining path components */ + while (remaining.len > 0) { + get_next_component(&next, &remaining); + + if (next.len == 0) { + continue; /* empty component */ + } else if (next.len == 1 && !strcmp(next.buf, ".")) { + continue; /* '.' component */ + } else if (next.len == 2 && !strcmp(next.buf, "..")) { + /* '..' component; strip the last path component */ + strip_last_component(&resolved); + continue; + } + + /* append the next component and resolve resultant path */ + if (!is_dir_sep(resolved.buf[resolved.len - 1])) + strbuf_addch(&resolved, '/'); + strbuf_addbuf(&resolved, &next); + + if (lstat(resolved.buf, &st)) { + /* error out unless this was the last component */ + if (errno != ENOENT || remaining.len) { if (die_on_error) - die_errno("Could not get current working directory"); + die_errno("Invalid path '%s'", + resolved.buf); else goto error_out; } + } else if (S_ISLNK(st.st_mode)) { + ssize_t len; + strbuf_reset(&symlink); - if (chdir(sb.buf)) { + if (num_symlinks++ > MAXSYMLINKS) { if (die_on_error) - die_errno("Could not switch to '%s'", - sb.buf); + die("More than %d nested symlinks " + "on path '%s'", MAXSYMLINKS, path); else goto error_out; } - } - if (strbuf_getcwd(&sb)) { - if (die_on_error) - die_errno("Could not get current working directory"); - else - goto error_out; - } - - if (last_elem) { - if (sb.len && !is_dir_sep(sb.buf[sb.len - 1])) - strbuf_addch(&sb, '/'); - strbuf_addstr(&sb, last_elem); - free(last_elem); - last_elem = NULL; - } - if (!lstat(sb.buf, &st) && S_ISLNK(st.st_mode)) { - struct strbuf next_sb = STRBUF_INIT; - ssize_t len = strbuf_readlink(&next_sb, sb.buf, 0); + len = strbuf_readlink(&symlink, resolved.buf, + st.st_size); if (len < 0) { if (die_on_error) die_errno("Invalid symlink '%s'", - sb.buf); + resolved.buf); else goto error_out; } - strbuf_swap(&sb, &next_sb); - strbuf_release(&next_sb); - } else - break; + + if (is_absolute_path(symlink.buf)) { + /* absolute symlink; set resolved to root */ + int offset = offset_1st_component(symlink.buf); + strbuf_reset(&resolved); + strbuf_add(&resolved, symlink.buf, offset); + strbuf_remove(&symlink, 0, offset); + } else { + /* + * relative symlink + * strip off the last component since it will + * be replaced with the contents of the symlink + */ + strip_last_component(&resolved); + } + + /* + * if there are still remaining components to resolve + * then append them to symlink + */ + if (remaining.len) { + strbuf_addch(&symlink, '/'); + strbuf_addbuf(&symlink, &remaining); + } + + /* + * use the symlink as the remaining components that + * need to be resloved + */ + strbuf_swap(&symlink, &remaining); + } } - retval = sb.buf; + retval = resolved.buf; + error_out: - free(last_elem); - if (cwd.len && chdir(cwd.buf)) - die_errno("Could not change back to '%s'", cwd.buf); - strbuf_release(&cwd); + strbuf_release(&remaining); + strbuf_release(&next); + strbuf_release(&symlink); return retval; } -- cgit v0.10.2-6-g49f6 From a1ae48410dce23c1e81e76aabaeb4eb01b065763 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Mon, 12 Dec 2016 10:16:53 -0800 Subject: real_path: convert real_path_internal to strbuf_realpath Change the name of real_path_internal to strbuf_realpath. In addition push the static strbuf up to its callers and instead take as a parameter a pointer to a strbuf to use for the final result. This change makes strbuf_realpath reentrant. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano diff --git a/abspath.c b/abspath.c index cafcae0..8c6c76b 100644 --- a/abspath.c +++ b/abspath.c @@ -55,21 +55,17 @@ static void get_next_component(struct strbuf *next, struct strbuf *remaining) * Return the real path (i.e., absolute path, with symlinks resolved * and extra slashes removed) equivalent to the specified path. (If * you want an absolute path but don't mind links, use - * absolute_path().) The return value is a pointer to a static - * buffer. + * absolute_path().) Places the resolved realpath in the provided strbuf. * * The directory part of path (i.e., everything up to the last * dir_sep) must denote a valid, existing directory, but the last * component need not exist. If die_on_error is set, then die with an * informative error message if there is a problem. Otherwise, return * NULL on errors (without generating any output). - * - * If path is our buffer, then return path, as it's already what the - * user wants. */ -static const char *real_path_internal(const char *path, int die_on_error) +char *strbuf_realpath(struct strbuf *resolved, const char *path, + int die_on_error) { - static struct strbuf resolved = STRBUF_INIT; struct strbuf remaining = STRBUF_INIT; struct strbuf next = STRBUF_INIT; struct strbuf symlink = STRBUF_INIT; @@ -77,10 +73,6 @@ static const char *real_path_internal(const char *path, int die_on_error) int num_symlinks = 0; struct stat st; - /* We've already done it */ - if (path == resolved.buf) - return path; - if (!*path) { if (die_on_error) die("The empty string is not a valid path"); @@ -88,16 +80,16 @@ static const char *real_path_internal(const char *path, int die_on_error) goto error_out; } - strbuf_reset(&resolved); + strbuf_reset(resolved); if (is_absolute_path(path)) { /* absolute path; start with only root as being resolved */ int offset = offset_1st_component(path); - strbuf_add(&resolved, path, offset); + strbuf_add(resolved, path, offset); strbuf_addstr(&remaining, path + offset); } else { /* relative path; can use CWD as the initial resolved path */ - if (strbuf_getcwd(&resolved)) { + if (strbuf_getcwd(resolved)) { if (die_on_error) die_errno("unable to get current working directory"); else @@ -116,21 +108,21 @@ static const char *real_path_internal(const char *path, int die_on_error) continue; /* '.' component */ } else if (next.len == 2 && !strcmp(next.buf, "..")) { /* '..' component; strip the last path component */ - strip_last_component(&resolved); + strip_last_component(resolved); continue; } /* append the next component and resolve resultant path */ - if (!is_dir_sep(resolved.buf[resolved.len - 1])) - strbuf_addch(&resolved, '/'); - strbuf_addbuf(&resolved, &next); + if (!is_dir_sep(resolved->buf[resolved->len - 1])) + strbuf_addch(resolved, '/'); + strbuf_addbuf(resolved, &next); - if (lstat(resolved.buf, &st)) { + if (lstat(resolved->buf, &st)) { /* error out unless this was the last component */ if (errno != ENOENT || remaining.len) { if (die_on_error) die_errno("Invalid path '%s'", - resolved.buf); + resolved->buf); else goto error_out; } @@ -146,12 +138,12 @@ static const char *real_path_internal(const char *path, int die_on_error) goto error_out; } - len = strbuf_readlink(&symlink, resolved.buf, + len = strbuf_readlink(&symlink, resolved->buf, st.st_size); if (len < 0) { if (die_on_error) die_errno("Invalid symlink '%s'", - resolved.buf); + resolved->buf); else goto error_out; } @@ -159,8 +151,8 @@ static const char *real_path_internal(const char *path, int die_on_error) if (is_absolute_path(symlink.buf)) { /* absolute symlink; set resolved to root */ int offset = offset_1st_component(symlink.buf); - strbuf_reset(&resolved); - strbuf_add(&resolved, symlink.buf, offset); + strbuf_reset(resolved); + strbuf_add(resolved, symlink.buf, offset); strbuf_remove(&symlink, 0, offset); } else { /* @@ -168,7 +160,7 @@ static const char *real_path_internal(const char *path, int die_on_error) * strip off the last component since it will * be replaced with the contents of the symlink */ - strip_last_component(&resolved); + strip_last_component(resolved); } /* @@ -188,24 +180,29 @@ static const char *real_path_internal(const char *path, int die_on_error) } } - retval = resolved.buf; + retval = resolved->buf; error_out: strbuf_release(&remaining); strbuf_release(&next); strbuf_release(&symlink); + if (!retval) + strbuf_reset(resolved); + return retval; } const char *real_path(const char *path) { - return real_path_internal(path, 1); + static struct strbuf realpath = STRBUF_INIT; + return strbuf_realpath(&realpath, path, 1); } const char *real_path_if_valid(const char *path) { - return real_path_internal(path, 0); + static struct strbuf realpath = STRBUF_INIT; + return strbuf_realpath(&realpath, path, 0); } /* diff --git a/cache.h b/cache.h index a50a61a..7a81294 100644 --- a/cache.h +++ b/cache.h @@ -1064,6 +1064,8 @@ static inline int is_absolute_path(const char *path) return is_dir_sep(path[0]) || has_dos_drive_prefix(path); } int is_directory(const char *); +char *strbuf_realpath(struct strbuf *resolved, const char *path, + int die_on_error); const char *real_path(const char *path); const char *real_path_if_valid(const char *path); const char *absolute_path(const char *path); -- cgit v0.10.2-6-g49f6 From 72417640769c91408d15cdbab3160bc494f49c7f Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Mon, 12 Dec 2016 10:16:54 -0800 Subject: real_path: create real_pathdup Create real_pathdup which returns a caller owned string of the resolved realpath based on the provide path. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano diff --git a/abspath.c b/abspath.c index 8c6c76b..79ee310 100644 --- a/abspath.c +++ b/abspath.c @@ -205,6 +205,19 @@ const char *real_path_if_valid(const char *path) return strbuf_realpath(&realpath, path, 0); } +char *real_pathdup(const char *path) +{ + struct strbuf realpath = STRBUF_INIT; + char *retval = NULL; + + if (strbuf_realpath(&realpath, path, 0)) + retval = strbuf_detach(&realpath, NULL); + + strbuf_release(&realpath); + + return retval; +} + /* * Use this to get an absolute path from a relative one. If you want * to resolve links, you should use real_path. diff --git a/cache.h b/cache.h index 7a81294..e12a5d9 100644 --- a/cache.h +++ b/cache.h @@ -1068,6 +1068,7 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path, int die_on_error); const char *real_path(const char *path); const char *real_path_if_valid(const char *path); +char *real_pathdup(const char *path); const char *absolute_path(const char *path); const char *remove_leading_path(const char *in, const char *prefix); const char *relative_path(const char *in, const char *prefix, struct strbuf *sb); -- cgit v0.10.2-6-g49f6 From 4ac9006f832d98ca1f25d956e12f3ff79e0d25bc Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Mon, 12 Dec 2016 10:16:55 -0800 Subject: real_path: have callers use real_pathdup and strbuf_realpath Migrate callers of real_path() who duplicate the retern value to use real_pathdup or strbuf_realpath. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano diff --git a/builtin/init-db.c b/builtin/init-db.c index 2399b97..76d68fa 100644 --- a/builtin/init-db.c +++ b/builtin/init-db.c @@ -338,7 +338,7 @@ int init_db(const char *git_dir, const char *real_git_dir, { int reinit; int exist_ok = flags & INIT_DB_EXIST_OK; - char *original_git_dir = xstrdup(real_path(git_dir)); + char *original_git_dir = real_pathdup(git_dir); if (real_git_dir) { struct stat st; @@ -489,7 +489,7 @@ int cmd_init_db(int argc, const char **argv, const char *prefix) argc = parse_options(argc, argv, prefix, init_db_options, init_db_usage, 0); if (real_git_dir && !is_absolute_path(real_git_dir)) - real_git_dir = xstrdup(real_path(real_git_dir)); + real_git_dir = real_pathdup(real_git_dir); if (argc == 1) { int mkdir_tried = 0; @@ -560,7 +560,7 @@ int cmd_init_db(int argc, const char **argv, const char *prefix) const char *git_dir_parent = strrchr(git_dir, '/'); if (git_dir_parent) { char *rel = xstrndup(git_dir, git_dir_parent - git_dir); - git_work_tree_cfg = xstrdup(real_path(rel)); + git_work_tree_cfg = real_pathdup(rel); free(rel); } if (!git_work_tree_cfg) diff --git a/environment.c b/environment.c index 0935ec6..9b943d2 100644 --- a/environment.c +++ b/environment.c @@ -259,7 +259,7 @@ void set_git_work_tree(const char *new_work_tree) return; } git_work_tree_initialized = 1; - work_tree = xstrdup(real_path(new_work_tree)); + work_tree = real_pathdup(new_work_tree); } const char *get_git_work_tree(void) diff --git a/setup.c b/setup.c index fe572b8..1b534a7 100644 --- a/setup.c +++ b/setup.c @@ -256,8 +256,10 @@ int get_common_dir_noenv(struct strbuf *sb, const char *gitdir) strbuf_addbuf(&path, &data); strbuf_addstr(sb, real_path(path.buf)); ret = 1; - } else + } else { strbuf_addstr(sb, gitdir); + } + strbuf_release(&data); strbuf_release(&path); return ret; @@ -692,7 +694,7 @@ static const char *setup_discovered_git_dir(const char *gitdir, /* --work-tree is set without --git-dir; use discovered one */ if (getenv(GIT_WORK_TREE_ENVIRONMENT) || git_work_tree_cfg) { if (offset != cwd->len && !is_absolute_path(gitdir)) - gitdir = xstrdup(real_path(gitdir)); + gitdir = real_pathdup(gitdir); if (chdir(cwd->buf)) die_errno("Could not come back to cwd"); return setup_explicit_git_dir(gitdir, cwd, nongit_ok); @@ -800,11 +802,12 @@ static int canonicalize_ceiling_entry(struct string_list_item *item, /* Keep entry but do not canonicalize it */ return 1; } else { - const char *real_path = real_path_if_valid(ceil); - if (!real_path) + char *real_path = real_pathdup(ceil); + if (!real_path) { return 0; + } free(item->string); - item->string = xstrdup(real_path); + item->string = real_path; return 1; } } diff --git a/sha1_file.c b/sha1_file.c index 9c86d19..6a03cc3 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -291,7 +291,7 @@ static int link_alt_odb_entry(const char *entry, const char *relative_base, struct strbuf pathbuf = STRBUF_INIT; if (!is_absolute_path(entry) && relative_base) { - strbuf_addstr(&pathbuf, real_path(relative_base)); + strbuf_realpath(&pathbuf, relative_base, 1); strbuf_addch(&pathbuf, '/'); } strbuf_addstr(&pathbuf, entry); diff --git a/submodule.c b/submodule.c index 6f7d883..c85ba50 100644 --- a/submodule.c +++ b/submodule.c @@ -1227,7 +1227,7 @@ void connect_work_tree_and_git_dir(const char *work_tree, const char *git_dir) { struct strbuf file_name = STRBUF_INIT; struct strbuf rel_path = STRBUF_INIT; - const char *real_work_tree = xstrdup(real_path(work_tree)); + const char *real_work_tree = real_pathdup(work_tree); /* Update gitfile */ strbuf_addf(&file_name, "%s/.git", work_tree); diff --git a/transport.c b/transport.c index d57e8de..236c6f6 100644 --- a/transport.c +++ b/transport.c @@ -1130,7 +1130,7 @@ static int refs_from_alternate_cb(struct alternate_object_database *e, const struct ref *extra; struct alternate_refs_data *cb = data; - other = xstrdup(real_path(e->path)); + other = real_pathdup(e->path); len = strlen(other); while (other[len-1] == '/') diff --git a/worktree.c b/worktree.c index f7869f8..c90e013 100644 --- a/worktree.c +++ b/worktree.c @@ -255,7 +255,7 @@ struct worktree *find_worktree(struct worktree **list, return wt; arg = prefix_filename(prefix, strlen(prefix), arg); - path = xstrdup(real_path(arg)); + path = real_pathdup(arg); for (; *list; list++) if (!fspathcmp(path, real_path((*list)->path))) break; -- cgit v0.10.2-6-g49f6 From e9a379c352b02ca560c58a56aa723994aa42666f Mon Sep 17 00:00:00 2001 From: Johannes Sixt Date: Wed, 21 Dec 2016 22:51:35 +0100 Subject: real_path: canonicalize directory separators in root parts When an absolute path is resolved, resolution begins at the first path component after the root part. The root part is just copied verbatim, because it must not be inspected for symbolic links. For POSIX paths, this is just the initial slash, but on Windows, the root part has the forms c:\ or \\server\share. We do want to canonicalize the back-slashes in the root part because these parts are compared to the result of getcwd(), which does return a fully canonicalized path. Factor out a helper that splits off the root part, and have it canonicalize the copied part. This change was prompted because t1504-ceiling-dirs.sh caught a breakage in GIT_CEILING_DIRECTORIES handling on Windows. Signed-off-by: Johannes Sixt Acked-by: Brandon Williams Signed-off-by: Junio C Hamano diff --git a/abspath.c b/abspath.c index 79ee310..1d56f5e 100644 --- a/abspath.c +++ b/abspath.c @@ -48,6 +48,19 @@ static void get_next_component(struct strbuf *next, struct strbuf *remaining) strbuf_remove(remaining, 0, end - remaining->buf); } +/* copies root part from remaining to resolved, canonicalizing it on the way */ +static void get_root_part(struct strbuf *resolved, struct strbuf *remaining) +{ + int offset = offset_1st_component(remaining->buf); + + strbuf_reset(resolved); + strbuf_add(resolved, remaining->buf, offset); +#ifdef GIT_WINDOWS_NATIVE + convert_slashes(resolved->buf); +#endif + strbuf_remove(remaining, 0, offset); +} + /* We allow "recursive" symbolic links. Only within reason, though. */ #define MAXSYMLINKS 5 @@ -80,14 +93,10 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path, goto error_out; } - strbuf_reset(resolved); + strbuf_addstr(&remaining, path); + get_root_part(resolved, &remaining); - if (is_absolute_path(path)) { - /* absolute path; start with only root as being resolved */ - int offset = offset_1st_component(path); - strbuf_add(resolved, path, offset); - strbuf_addstr(&remaining, path + offset); - } else { + if (!resolved->len) { /* relative path; can use CWD as the initial resolved path */ if (strbuf_getcwd(resolved)) { if (die_on_error) @@ -95,7 +104,6 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path, else goto error_out; } - strbuf_addstr(&remaining, path); } /* Iterate over the remaining path components */ @@ -150,10 +158,7 @@ char *strbuf_realpath(struct strbuf *resolved, const char *path, if (is_absolute_path(symlink.buf)) { /* absolute symlink; set resolved to root */ - int offset = offset_1st_component(symlink.buf); - strbuf_reset(resolved); - strbuf_add(resolved, symlink.buf, offset); - strbuf_remove(&symlink, 0, offset); + get_root_part(resolved, &symlink); } else { /* * relative symlink -- cgit v0.10.2-6-g49f6 From 5688c28d81e9103a234efeedcb0568c2c4dd0bfb Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Fri, 16 Dec 2016 11:03:16 -0800 Subject: submodules: add helper to determine if a submodule is populated Add the `is_submodule_populated()` helper function to submodules.c. `is_submodule_populated()` performes a check to see if a submodule has been checkout out (and has a valid .git directory/file) at the given path. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano diff --git a/submodule.c b/submodule.c index c85ba50..ee3198d 100644 --- a/submodule.c +++ b/submodule.c @@ -198,6 +198,21 @@ void gitmodules_config(void) } } +/* + * Determine if a submodule has been populated at a given 'path' + */ +int is_submodule_populated(const char *path) +{ + int ret = 0; + char *gitdir = xstrfmt("%s/.git", path); + + if (resolve_gitdir(gitdir)) + ret = 1; + + free(gitdir); + return ret; +} + int parse_submodule_update_strategy(const char *value, struct submodule_update_strategy *dst) { diff --git a/submodule.h b/submodule.h index d9e197a..c4af505 100644 --- a/submodule.h +++ b/submodule.h @@ -37,6 +37,7 @@ void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt, const char *path); int submodule_config(const char *var, const char *value, void *cb); void gitmodules_config(void); +extern int is_submodule_populated(const char *path); int parse_submodule_update_strategy(const char *value, struct submodule_update_strategy *dst); const char *submodule_strategy_to_string(const struct submodule_update_strategy *s); -- cgit v0.10.2-6-g49f6 From f9f42560e2911a5eef9a3d463a63cfd48d54dd07 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Fri, 16 Dec 2016 11:03:17 -0800 Subject: submodules: add helper to determine if a submodule is initialized Add the `is_submodule_initialized()` helper function to submodules.c. `is_submodule_initialized()` performs a check to determine if the submodule at the given path has been initialized. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano diff --git a/submodule.c b/submodule.c index ee3198d..edffaa1 100644 --- a/submodule.c +++ b/submodule.c @@ -199,6 +199,29 @@ void gitmodules_config(void) } /* + * Determine if a submodule has been initialized at a given 'path' + */ +int is_submodule_initialized(const char *path) +{ + int ret = 0; + const struct submodule *module = NULL; + + module = submodule_from_path(null_sha1, path); + + if (module) { + char *key = xstrfmt("submodule.%s.url", module->name); + char *value = NULL; + + ret = !git_config_get_string(key, &value); + + free(value); + free(key); + } + + return ret; +} + +/* * Determine if a submodule has been populated at a given 'path' */ int is_submodule_populated(const char *path) diff --git a/submodule.h b/submodule.h index c4af505..6ec5f2f 100644 --- a/submodule.h +++ b/submodule.h @@ -37,6 +37,7 @@ void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt, const char *path); int submodule_config(const char *var, const char *value, void *cb); void gitmodules_config(void); +extern int is_submodule_initialized(const char *path); extern int is_submodule_populated(const char *path); int parse_submodule_update_strategy(const char *value, struct submodule_update_strategy *dst); -- cgit v0.10.2-6-g49f6 From 9ebf689aad72bfc091da21e1d73a05308f1ace85 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Fri, 16 Dec 2016 11:03:18 -0800 Subject: submodules: load gitmodules file from commit sha1 teach submodules to load a '.gitmodules' file from a commit sha1. This enables the population of the submodule_cache to be based on the state of the '.gitmodules' file from a particular commit. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano diff --git a/cache.h b/cache.h index e12a5d9..de237ca 100644 --- a/cache.h +++ b/cache.h @@ -1693,6 +1693,8 @@ extern int git_default_config(const char *, const char *, void *); extern int git_config_from_file(config_fn_t fn, const char *, void *); extern int git_config_from_mem(config_fn_t fn, const enum config_origin_type, const char *name, const char *buf, size_t len, void *data); +extern int git_config_from_blob_sha1(config_fn_t fn, const char *name, + const unsigned char *sha1, void *data); extern void git_config_push_parameter(const char *text); extern int git_config_from_parameters(config_fn_t fn, void *data); extern void git_config(config_fn_t fn, void *); diff --git a/config.c b/config.c index 83fdecb..4d78e72 100644 --- a/config.c +++ b/config.c @@ -1214,10 +1214,10 @@ int git_config_from_mem(config_fn_t fn, const enum config_origin_type origin_typ return do_config_from(&top, fn, data); } -static int git_config_from_blob_sha1(config_fn_t fn, - const char *name, - const unsigned char *sha1, - void *data) +int git_config_from_blob_sha1(config_fn_t fn, + const char *name, + const unsigned char *sha1, + void *data) { enum object_type type; char *buf; diff --git a/submodule-config.c b/submodule-config.c index 098085b..8b9a2ef 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -379,9 +379,9 @@ static int parse_config(const char *var, const char *value, void *data) return ret; } -static int gitmodule_sha1_from_commit(const unsigned char *commit_sha1, - unsigned char *gitmodules_sha1, - struct strbuf *rev) +int gitmodule_sha1_from_commit(const unsigned char *commit_sha1, + unsigned char *gitmodules_sha1, + struct strbuf *rev) { int ret = 0; diff --git a/submodule-config.h b/submodule-config.h index d05c542..78584ba 100644 --- a/submodule-config.h +++ b/submodule-config.h @@ -29,6 +29,9 @@ const struct submodule *submodule_from_name(const unsigned char *commit_sha1, const char *name); const struct submodule *submodule_from_path(const unsigned char *commit_sha1, const char *path); +extern int gitmodule_sha1_from_commit(const unsigned char *commit_sha1, + unsigned char *gitmodules_sha1, + struct strbuf *rev); void submodule_free(void); #endif /* SUBMODULE_CONFIG_H */ diff --git a/submodule.c b/submodule.c index edffaa1..2600908 100644 --- a/submodule.c +++ b/submodule.c @@ -198,6 +198,18 @@ void gitmodules_config(void) } } +void gitmodules_config_sha1(const unsigned char *commit_sha1) +{ + struct strbuf rev = STRBUF_INIT; + unsigned char sha1[20]; + + if (gitmodule_sha1_from_commit(commit_sha1, sha1, &rev)) { + git_config_from_blob_sha1(submodule_config, rev.buf, + sha1, NULL); + } + strbuf_release(&rev); +} + /* * Determine if a submodule has been initialized at a given 'path' */ diff --git a/submodule.h b/submodule.h index 6ec5f2f..9203d89 100644 --- a/submodule.h +++ b/submodule.h @@ -37,6 +37,7 @@ void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt, const char *path); int submodule_config(const char *var, const char *value, void *cb); void gitmodules_config(void); +extern void gitmodules_config_sha1(const unsigned char *commit_sha1); extern int is_submodule_initialized(const char *path); extern int is_submodule_populated(const char *path); int parse_submodule_update_strategy(const char *value, -- cgit v0.10.2-6-g49f6 From 4538eef564c81c96f2874ccadc54d3c69cc0e19c Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Fri, 16 Dec 2016 11:03:19 -0800 Subject: grep: add submodules as a grep source type Add `GREP_SOURCE_SUBMODULE` as a grep_source type and cases for this new type in the various switch statements in grep.c. When initializing a grep_source with type `GREP_SOURCE_SUBMODULE` the identifier can either be NULL (to indicate that the working tree will be used) or a SHA1 (the REV of the submodule to be grep'd). If the identifier is a SHA1 then we want to fall through to the `GREP_SOURCE_SHA1` case to handle the copying of the SHA1. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano diff --git a/grep.c b/grep.c index 1194d35..0dbdc1d 100644 --- a/grep.c +++ b/grep.c @@ -1735,12 +1735,23 @@ void grep_source_init(struct grep_source *gs, enum grep_source_type type, case GREP_SOURCE_FILE: gs->identifier = xstrdup(identifier); break; + case GREP_SOURCE_SUBMODULE: + if (!identifier) { + gs->identifier = NULL; + break; + } + /* + * FALL THROUGH + * If the identifier is non-NULL (in the submodule case) it + * will be a SHA1 that needs to be copied. + */ case GREP_SOURCE_SHA1: gs->identifier = xmalloc(20); hashcpy(gs->identifier, identifier); break; case GREP_SOURCE_BUF: gs->identifier = NULL; + break; } } @@ -1760,6 +1771,7 @@ void grep_source_clear_data(struct grep_source *gs) switch (gs->type) { case GREP_SOURCE_FILE: case GREP_SOURCE_SHA1: + case GREP_SOURCE_SUBMODULE: free(gs->buf); gs->buf = NULL; gs->size = 0; @@ -1831,8 +1843,10 @@ static int grep_source_load(struct grep_source *gs) return grep_source_load_sha1(gs); case GREP_SOURCE_BUF: return gs->buf ? 0 : -1; + case GREP_SOURCE_SUBMODULE: + break; } - die("BUG: invalid grep_source type"); + die("BUG: invalid grep_source type to load"); } void grep_source_load_driver(struct grep_source *gs) diff --git a/grep.h b/grep.h index 5856a23..267534c 100644 --- a/grep.h +++ b/grep.h @@ -161,6 +161,7 @@ struct grep_source { GREP_SOURCE_SHA1, GREP_SOURCE_FILE, GREP_SOURCE_BUF, + GREP_SOURCE_SUBMODULE, } type; void *identifier; -- cgit v0.10.2-6-g49f6 From 0281e487fd913bd9a32a710f3109ff3002f3e4a9 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Fri, 16 Dec 2016 11:03:20 -0800 Subject: grep: optionally recurse into submodules Allow grep to recognize submodules and recursively search for patterns in each submodule. This is done by forking off a process to recursively call grep on each submodule. The top level --super-prefix option is used to pass a path to the submodule which can in turn be used to prepend to output or in pathspec matching logic. Recursion only occurs for submodules which have been initialized and checked out by the parent project. If a submodule hasn't been initialized and checked out it is simply skipped. In order to support the existing multi-threading infrastructure in grep, output from each child process is captured in a strbuf so that it can be later printed to the console in an ordered fashion. To limit the number of theads that are created, each child process has half the number of threads as its parents (minimum of 1), otherwise we potentailly have a fork-bomb. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt index 0ecea6e..17aa1ba 100644 --- a/Documentation/git-grep.txt +++ b/Documentation/git-grep.txt @@ -26,6 +26,7 @@ SYNOPSIS [--threads ] [-f ] [-e] [--and|--or|--not|(|)|-e ...] + [--recurse-submodules] [ [--[no-]exclude-standard] [--cached | --no-index | --untracked] | ...] [--] [...] @@ -88,6 +89,10 @@ OPTIONS mechanism. Only useful when searching files in the current directory with `--no-index`. +--recurse-submodules:: + Recursively search in each submodule that has been initialized and + checked out in the repository. + -a:: --text:: Process binary files as if they were text. diff --git a/builtin/grep.c b/builtin/grep.c index 8887b6a..dca0be6 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -18,12 +18,20 @@ #include "quote.h" #include "dir.h" #include "pathspec.h" +#include "submodule.h" static char const * const grep_usage[] = { N_("git grep [] [-e] [...] [[--] ...]"), NULL }; +static const char *super_prefix; +static int recurse_submodules; +static struct argv_array submodule_options = ARGV_ARRAY_INIT; + +static int grep_submodule_launch(struct grep_opt *opt, + const struct grep_source *gs); + #define GREP_NUM_THREADS_DEFAULT 8 static int num_threads; @@ -174,7 +182,10 @@ static void *run(void *arg) break; opt->output_priv = w; - hit |= grep_source(opt, &w->source); + if (w->source.type == GREP_SOURCE_SUBMODULE) + hit |= grep_submodule_launch(opt, &w->source); + else + hit |= grep_source(opt, &w->source); grep_source_clear_data(&w->source); work_done(w); } @@ -300,6 +311,10 @@ static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, if (opt->relative && opt->prefix_length) { quote_path_relative(filename + tree_name_len, opt->prefix, &pathbuf); strbuf_insert(&pathbuf, 0, filename, tree_name_len); + } else if (super_prefix) { + strbuf_add(&pathbuf, filename, tree_name_len); + strbuf_addstr(&pathbuf, super_prefix); + strbuf_addstr(&pathbuf, filename + tree_name_len); } else { strbuf_addstr(&pathbuf, filename); } @@ -328,10 +343,13 @@ static int grep_file(struct grep_opt *opt, const char *filename) { struct strbuf buf = STRBUF_INIT; - if (opt->relative && opt->prefix_length) + if (opt->relative && opt->prefix_length) { quote_path_relative(filename, opt->prefix, &buf); - else + } else { + if (super_prefix) + strbuf_addstr(&buf, super_prefix); strbuf_addstr(&buf, filename); + } #ifndef NO_PTHREADS if (num_threads) { @@ -378,31 +396,260 @@ static void run_pager(struct grep_opt *opt, const char *prefix) exit(status); } -static int grep_cache(struct grep_opt *opt, const struct pathspec *pathspec, int cached) +static void compile_submodule_options(const struct grep_opt *opt, + const struct pathspec *pathspec, + int cached, int untracked, + int opt_exclude, int use_index, + int pattern_type_arg) +{ + struct grep_pat *pattern; + int i; + + if (recurse_submodules) + argv_array_push(&submodule_options, "--recurse-submodules"); + + if (cached) + argv_array_push(&submodule_options, "--cached"); + if (!use_index) + argv_array_push(&submodule_options, "--no-index"); + if (untracked) + argv_array_push(&submodule_options, "--untracked"); + if (opt_exclude > 0) + argv_array_push(&submodule_options, "--exclude-standard"); + + if (opt->invert) + argv_array_push(&submodule_options, "-v"); + if (opt->ignore_case) + argv_array_push(&submodule_options, "-i"); + if (opt->word_regexp) + argv_array_push(&submodule_options, "-w"); + switch (opt->binary) { + case GREP_BINARY_NOMATCH: + argv_array_push(&submodule_options, "-I"); + break; + case GREP_BINARY_TEXT: + argv_array_push(&submodule_options, "-a"); + break; + default: + break; + } + if (opt->allow_textconv) + argv_array_push(&submodule_options, "--textconv"); + if (opt->max_depth != -1) + argv_array_pushf(&submodule_options, "--max-depth=%d", + opt->max_depth); + if (opt->linenum) + argv_array_push(&submodule_options, "-n"); + if (!opt->pathname) + argv_array_push(&submodule_options, "-h"); + if (!opt->relative) + argv_array_push(&submodule_options, "--full-name"); + if (opt->name_only) + argv_array_push(&submodule_options, "-l"); + if (opt->unmatch_name_only) + argv_array_push(&submodule_options, "-L"); + if (opt->null_following_name) + argv_array_push(&submodule_options, "-z"); + if (opt->count) + argv_array_push(&submodule_options, "-c"); + if (opt->file_break) + argv_array_push(&submodule_options, "--break"); + if (opt->heading) + argv_array_push(&submodule_options, "--heading"); + if (opt->pre_context) + argv_array_pushf(&submodule_options, "--before-context=%d", + opt->pre_context); + if (opt->post_context) + argv_array_pushf(&submodule_options, "--after-context=%d", + opt->post_context); + if (opt->funcname) + argv_array_push(&submodule_options, "-p"); + if (opt->funcbody) + argv_array_push(&submodule_options, "-W"); + if (opt->all_match) + argv_array_push(&submodule_options, "--all-match"); + if (opt->debug) + argv_array_push(&submodule_options, "--debug"); + if (opt->status_only) + argv_array_push(&submodule_options, "-q"); + + switch (pattern_type_arg) { + case GREP_PATTERN_TYPE_BRE: + argv_array_push(&submodule_options, "-G"); + break; + case GREP_PATTERN_TYPE_ERE: + argv_array_push(&submodule_options, "-E"); + break; + case GREP_PATTERN_TYPE_FIXED: + argv_array_push(&submodule_options, "-F"); + break; + case GREP_PATTERN_TYPE_PCRE: + argv_array_push(&submodule_options, "-P"); + break; + case GREP_PATTERN_TYPE_UNSPECIFIED: + break; + } + + for (pattern = opt->pattern_list; pattern != NULL; + pattern = pattern->next) { + switch (pattern->token) { + case GREP_PATTERN: + argv_array_pushf(&submodule_options, "-e%s", + pattern->pattern); + break; + case GREP_AND: + case GREP_OPEN_PAREN: + case GREP_CLOSE_PAREN: + case GREP_NOT: + case GREP_OR: + argv_array_push(&submodule_options, pattern->pattern); + break; + /* BODY and HEAD are not used by git-grep */ + case GREP_PATTERN_BODY: + case GREP_PATTERN_HEAD: + break; + } + } + + /* + * Limit number of threads for child process to use. + * This is to prevent potential fork-bomb behavior of git-grep as each + * submodule process has its own thread pool. + */ + argv_array_pushf(&submodule_options, "--threads=%d", + (num_threads + 1) / 2); + + /* Add Pathspecs */ + argv_array_push(&submodule_options, "--"); + for (i = 0; i < pathspec->nr; i++) + argv_array_push(&submodule_options, + pathspec->items[i].original); +} + +/* + * Launch child process to grep contents of a submodule + */ +static int grep_submodule_launch(struct grep_opt *opt, + const struct grep_source *gs) +{ + struct child_process cp = CHILD_PROCESS_INIT; + int status, i; + struct work_item *w = opt->output_priv; + + prepare_submodule_repo_env(&cp.env_array); + + /* Add super prefix */ + argv_array_pushf(&cp.args, "--super-prefix=%s%s/", + super_prefix ? super_prefix : "", + gs->name); + argv_array_push(&cp.args, "grep"); + + /* Add options */ + for (i = 0; i < submodule_options.argc; i++) + argv_array_push(&cp.args, submodule_options.argv[i]); + + cp.git_cmd = 1; + cp.dir = gs->path; + + /* + * Capture output to output buffer and check the return code from the + * child process. A '0' indicates a hit, a '1' indicates no hit and + * anything else is an error. + */ + status = capture_command(&cp, &w->out, 0); + if (status && (status != 1)) { + /* flush the buffer */ + write_or_die(1, w->out.buf, w->out.len); + die("process for submodule '%s' failed with exit code: %d", + gs->name, status); + } + + /* invert the return code to make a hit equal to 1 */ + return !status; +} + +/* + * Prep grep structures for a submodule grep + * sha1: the sha1 of the submodule or NULL if using the working tree + * filename: name of the submodule including tree name of parent + * path: location of the submodule + */ +static int grep_submodule(struct grep_opt *opt, const unsigned char *sha1, + const char *filename, const char *path) +{ + if (!is_submodule_initialized(path)) + return 0; + if (!is_submodule_populated(path)) + return 0; + +#ifndef NO_PTHREADS + if (num_threads) { + add_work(opt, GREP_SOURCE_SUBMODULE, filename, path, sha1); + return 0; + } else +#endif + { + struct work_item w; + int hit; + + grep_source_init(&w.source, GREP_SOURCE_SUBMODULE, + filename, path, sha1); + strbuf_init(&w.out, 0); + opt->output_priv = &w; + hit = grep_submodule_launch(opt, &w.source); + + write_or_die(1, w.out.buf, w.out.len); + + grep_source_clear(&w.source); + strbuf_release(&w.out); + return hit; + } +} + +static int grep_cache(struct grep_opt *opt, const struct pathspec *pathspec, + int cached) { int hit = 0; int nr; + struct strbuf name = STRBUF_INIT; + int name_base_len = 0; + if (super_prefix) { + name_base_len = strlen(super_prefix); + strbuf_addstr(&name, super_prefix); + } + read_cache(); for (nr = 0; nr < active_nr; nr++) { const struct cache_entry *ce = active_cache[nr]; - if (!S_ISREG(ce->ce_mode)) - continue; - if (!ce_path_match(ce, pathspec, NULL)) + strbuf_setlen(&name, name_base_len); + strbuf_addstr(&name, ce->name); + + if (S_ISREG(ce->ce_mode) && + match_pathspec(pathspec, name.buf, name.len, 0, NULL, + S_ISDIR(ce->ce_mode) || + S_ISGITLINK(ce->ce_mode))) { + /* + * If CE_VALID is on, we assume worktree file and its + * cache entry are identical, even if worktree file has + * been modified, so use cache version instead + */ + if (cached || (ce->ce_flags & CE_VALID) || + ce_skip_worktree(ce)) { + if (ce_stage(ce) || ce_intent_to_add(ce)) + continue; + hit |= grep_sha1(opt, ce->oid.hash, ce->name, + 0, ce->name); + } else { + hit |= grep_file(opt, ce->name); + } + } else if (recurse_submodules && S_ISGITLINK(ce->ce_mode) && + submodule_path_match(pathspec, name.buf, NULL)) { + hit |= grep_submodule(opt, NULL, ce->name, ce->name); + } else { continue; - /* - * If CE_VALID is on, we assume worktree file and its cache entry - * are identical, even if worktree file has been modified, so use - * cache version instead - */ - if (cached || (ce->ce_flags & CE_VALID) || ce_skip_worktree(ce)) { - if (ce_stage(ce) || ce_intent_to_add(ce)) - continue; - hit |= grep_sha1(opt, ce->oid.hash, ce->name, 0, - ce->name); } - else - hit |= grep_file(opt, ce->name); + if (ce_stage(ce)) { do { nr++; @@ -413,6 +660,8 @@ static int grep_cache(struct grep_opt *opt, const struct pathspec *pathspec, int if (hit && opt->status_only) break; } + + strbuf_release(&name); return hit; } @@ -651,6 +900,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix) N_("search in both tracked and untracked files")), OPT_SET_INT(0, "exclude-standard", &opt_exclude, N_("ignore files specified via '.gitignore'"), 1), + OPT_BOOL(0, "recurse-submodules", &recurse_submodules, + N_("recursivley search in each submodule")), OPT_GROUP(""), OPT_BOOL('v', "invert-match", &opt.invert, N_("show non-matching lines")), @@ -755,6 +1006,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) init_grep_defaults(); git_config(grep_cmd_config, NULL); grep_init(&opt, prefix); + super_prefix = get_super_prefix(); /* * If there is no -- then the paths must exist in the working @@ -872,6 +1124,13 @@ int cmd_grep(int argc, const char **argv, const char *prefix) pathspec.max_depth = opt.max_depth; pathspec.recursive = 1; + if (recurse_submodules) { + gitmodules_config(); + compile_submodule_options(&opt, &pathspec, cached, untracked, + opt_exclude, use_index, + pattern_type_arg); + } + if (show_in_pager && (cached || list.nr)) die(_("--open-files-in-pager only works on the worktree")); @@ -895,6 +1154,9 @@ int cmd_grep(int argc, const char **argv, const char *prefix) } } + if (recurse_submodules && (!use_index || untracked || list.nr)) + die(_("option not supported with --recurse-submodules.")); + if (!show_in_pager && !opt.status_only) setup_pager(); diff --git a/git.c b/git.c index dce529f..c95d3e3 100644 --- a/git.c +++ b/git.c @@ -434,7 +434,7 @@ static struct cmd_struct commands[] = { { "fsck-objects", cmd_fsck, RUN_SETUP }, { "gc", cmd_gc, RUN_SETUP }, { "get-tar-commit-id", cmd_get_tar_commit_id }, - { "grep", cmd_grep, RUN_SETUP_GENTLY }, + { "grep", cmd_grep, RUN_SETUP_GENTLY | SUPPORT_SUPER_PREFIX }, { "hash-object", cmd_hash_object }, { "help", cmd_help }, { "index-pack", cmd_index_pack, RUN_SETUP_GENTLY }, diff --git a/t/t7814-grep-recurse-submodules.sh b/t/t7814-grep-recurse-submodules.sh new file mode 100755 index 0000000..1019125 --- /dev/null +++ b/t/t7814-grep-recurse-submodules.sh @@ -0,0 +1,99 @@ +#!/bin/sh + +test_description='Test grep recurse-submodules feature + +This test verifies the recurse-submodules feature correctly greps across +submodules. +' + +. ./test-lib.sh + +test_expect_success 'setup directory structure and submodule' ' + echo "foobar" >a && + mkdir b && + echo "bar" >b/b && + git add a b && + git commit -m "add a and b" && + git init submodule && + echo "foobar" >submodule/a && + git -C submodule add a && + git -C submodule commit -m "add a" && + git submodule add ./submodule && + git commit -m "added submodule" +' + +test_expect_success 'grep correctly finds patterns in a submodule' ' + cat >expect <<-\EOF && + a:foobar + b/b:bar + submodule/a:foobar + EOF + + git grep -e "bar" --recurse-submodules >actual && + test_cmp expect actual +' + +test_expect_success 'grep and basic pathspecs' ' + cat >expect <<-\EOF && + submodule/a:foobar + EOF + + git grep -e. --recurse-submodules -- submodule >actual && + test_cmp expect actual +' + +test_expect_success 'grep and nested submodules' ' + git init submodule/sub && + echo "foobar" >submodule/sub/a && + git -C submodule/sub add a && + git -C submodule/sub commit -m "add a" && + git -C submodule submodule add ./sub && + git -C submodule add sub && + git -C submodule commit -m "added sub" && + git add submodule && + git commit -m "updated submodule" && + + cat >expect <<-\EOF && + a:foobar + b/b:bar + submodule/a:foobar + submodule/sub/a:foobar + EOF + + git grep -e "bar" --recurse-submodules >actual && + test_cmp expect actual +' + +test_expect_success 'grep and multiple patterns' ' + cat >expect <<-\EOF && + a:foobar + submodule/a:foobar + submodule/sub/a:foobar + EOF + + git grep -e "bar" --and -e "foo" --recurse-submodules >actual && + test_cmp expect actual +' + +test_expect_success 'grep and multiple patterns' ' + cat >expect <<-\EOF && + b/b:bar + EOF + + git grep -e "bar" --and --not -e "foo" --recurse-submodules >actual && + test_cmp expect actual +' + +test_incompatible_with_recurse_submodules () +{ + test_expect_success "--recurse-submodules and $1 are incompatible" " + test_must_fail git grep -e. --recurse-submodules $1 2>actual && + test_i18ngrep 'not supported with --recurse-submodules' actual + " +} + +test_incompatible_with_recurse_submodules --untracked +test_incompatible_with_recurse_submodules --no-index +test_incompatible_with_recurse_submodules HEAD + +test_done -- cgit v0.10.2-6-g49f6 From 74ed43711fd1cd7ce155d338f87ebe52cb74d9e2 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Fri, 16 Dec 2016 11:03:21 -0800 Subject: grep: enable recurse-submodules to work on objects Teach grep to recursively search in submodules when provided with a object. This allows grep to search a submodule based on the state of the submodule that is present in a commit of the super project. When grep is provided with a object, the name of the object is prefixed to all output. In order to provide uniformity of output between the parent and child processes the option `--parent-basename` has been added so that the child can preface all of it's output with the name of the parent's object instead of the name of the commit SHA1 of the submodule. This changes output from the command `git grep -e. -l --recurse-submodules HEAD` from: HEAD:file :sub/file to: HEAD:file HEAD:sub/file Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt index 17aa1ba..71f32f3 100644 --- a/Documentation/git-grep.txt +++ b/Documentation/git-grep.txt @@ -26,7 +26,7 @@ SYNOPSIS [--threads ] [-f ] [-e] [--and|--or|--not|(|)|-e ...] - [--recurse-submodules] + [--recurse-submodules] [--parent-basename ] [ [--[no-]exclude-standard] [--cached | --no-index | --untracked] | ...] [--] [...] @@ -91,7 +91,16 @@ OPTIONS --recurse-submodules:: Recursively search in each submodule that has been initialized and - checked out in the repository. + checked out in the repository. When used in combination with the + option the prefix of all submodule output will be the name of + the parent project's object. + +--parent-basename :: + For internal use only. In order to produce uniform output with the + --recurse-submodules option, this option can be used to provide the + basename of a parent's object to a submodule so the submodule + can prefix its output with the parent's name rather than the SHA1 of + the submodule. -a:: --text:: diff --git a/builtin/grep.c b/builtin/grep.c index dca0be6..5918a26 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -19,6 +19,7 @@ #include "dir.h" #include "pathspec.h" #include "submodule.h" +#include "submodule-config.h" static char const * const grep_usage[] = { N_("git grep [] [-e] [...] [[--] ...]"), @@ -28,6 +29,7 @@ static char const * const grep_usage[] = { static const char *super_prefix; static int recurse_submodules; static struct argv_array submodule_options = ARGV_ARRAY_INIT; +static const char *parent_basename; static int grep_submodule_launch(struct grep_opt *opt, const struct grep_source *gs); @@ -534,19 +536,53 @@ static int grep_submodule_launch(struct grep_opt *opt, { struct child_process cp = CHILD_PROCESS_INIT; int status, i; + const char *end_of_base; + const char *name; struct work_item *w = opt->output_priv; + end_of_base = strchr(gs->name, ':'); + if (gs->identifier && end_of_base) + name = end_of_base + 1; + else + name = gs->name; + prepare_submodule_repo_env(&cp.env_array); /* Add super prefix */ argv_array_pushf(&cp.args, "--super-prefix=%s%s/", super_prefix ? super_prefix : "", - gs->name); + name); argv_array_push(&cp.args, "grep"); + /* + * Add basename of parent project + * When performing grep on a tree object the filename is prefixed + * with the object's name: 'tree-name:filename'. In order to + * provide uniformity of output we want to pass the name of the + * parent project's object name to the submodule so the submodule can + * prefix its output with the parent's name and not its own SHA1. + */ + if (gs->identifier && end_of_base) + argv_array_pushf(&cp.args, "--parent-basename=%.*s", + (int) (end_of_base - gs->name), + gs->name); + /* Add options */ - for (i = 0; i < submodule_options.argc; i++) + for (i = 0; i < submodule_options.argc; i++) { + /* + * If there is a tree identifier for the submodule, add the + * rev after adding the submodule options but before the + * pathspecs. To do this we listen for the '--' and insert the + * sha1 before pushing the '--' onto the child process argv + * array. + */ + if (gs->identifier && + !strcmp("--", submodule_options.argv[i])) { + argv_array_push(&cp.args, sha1_to_hex(gs->identifier)); + } + argv_array_push(&cp.args, submodule_options.argv[i]); + } cp.git_cmd = 1; cp.dir = gs->path; @@ -673,12 +709,22 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec, enum interesting match = entry_not_interesting; struct name_entry entry; int old_baselen = base->len; + struct strbuf name = STRBUF_INIT; + int name_base_len = 0; + if (super_prefix) { + strbuf_addstr(&name, super_prefix); + name_base_len = name.len; + } while (tree_entry(tree, &entry)) { int te_len = tree_entry_len(&entry); if (match != all_entries_interesting) { - match = tree_entry_interesting(&entry, base, tn_len, pathspec); + strbuf_addstr(&name, base->buf + tn_len); + match = tree_entry_interesting(&entry, &name, + 0, pathspec); + strbuf_setlen(&name, name_base_len); + if (match == all_entries_not_interesting) break; if (match == entry_not_interesting) @@ -690,8 +736,7 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec, if (S_ISREG(entry.mode)) { hit |= grep_sha1(opt, entry.oid->hash, base->buf, tn_len, check_attr ? base->buf + tn_len : NULL); - } - else if (S_ISDIR(entry.mode)) { + } else if (S_ISDIR(entry.mode)) { enum object_type type; struct tree_desc sub; void *data; @@ -707,12 +752,18 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec, hit |= grep_tree(opt, pathspec, &sub, base, tn_len, check_attr); free(data); + } else if (recurse_submodules && S_ISGITLINK(entry.mode)) { + hit |= grep_submodule(opt, entry.oid->hash, base->buf, + base->buf + tn_len); } + strbuf_setlen(base, old_baselen); if (hit && opt->status_only) break; } + + strbuf_release(&name); return hit; } @@ -736,6 +787,10 @@ static int grep_object(struct grep_opt *opt, const struct pathspec *pathspec, if (!data) die(_("unable to read tree (%s)"), oid_to_hex(&obj->oid)); + /* Use parent's name as base when recursing submodules */ + if (recurse_submodules && parent_basename) + name = parent_basename; + len = name ? strlen(name) : 0; strbuf_init(&base, PATH_MAX + len + 1); if (len) { @@ -762,6 +817,12 @@ static int grep_objects(struct grep_opt *opt, const struct pathspec *pathspec, for (i = 0; i < nr; i++) { struct object *real_obj; real_obj = deref_tag(list->objects[i].item, NULL, 0); + + /* load the gitmodules file for this rev */ + if (recurse_submodules) { + submodule_free(); + gitmodules_config_sha1(real_obj->oid.hash); + } if (grep_object(opt, pathspec, real_obj, list->objects[i].name, list->objects[i].path)) { hit = 1; if (opt->status_only) @@ -902,6 +963,9 @@ int cmd_grep(int argc, const char **argv, const char *prefix) N_("ignore files specified via '.gitignore'"), 1), OPT_BOOL(0, "recurse-submodules", &recurse_submodules, N_("recursivley search in each submodule")), + OPT_STRING(0, "parent-basename", &parent_basename, + N_("basename"), + N_("prepend parent project's basename to output")), OPT_GROUP(""), OPT_BOOL('v', "invert-match", &opt.invert, N_("show non-matching lines")), @@ -1154,7 +1218,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) } } - if (recurse_submodules && (!use_index || untracked || list.nr)) + if (recurse_submodules && (!use_index || untracked)) die(_("option not supported with --recurse-submodules.")); if (!show_in_pager && !opt.status_only) diff --git a/t/t7814-grep-recurse-submodules.sh b/t/t7814-grep-recurse-submodules.sh index 1019125..d5fc316 100755 --- a/t/t7814-grep-recurse-submodules.sh +++ b/t/t7814-grep-recurse-submodules.sh @@ -84,6 +84,108 @@ test_expect_success 'grep and multiple patterns' ' test_cmp expect actual ' +test_expect_success 'basic grep tree' ' + cat >expect <<-\EOF && + HEAD:a:foobar + HEAD:b/b:bar + HEAD:submodule/a:foobar + HEAD:submodule/sub/a:foobar + EOF + + git grep -e "bar" --recurse-submodules HEAD >actual && + test_cmp expect actual +' + +test_expect_success 'grep tree HEAD^' ' + cat >expect <<-\EOF && + HEAD^:a:foobar + HEAD^:b/b:bar + HEAD^:submodule/a:foobar + EOF + + git grep -e "bar" --recurse-submodules HEAD^ >actual && + test_cmp expect actual +' + +test_expect_success 'grep tree HEAD^^' ' + cat >expect <<-\EOF && + HEAD^^:a:foobar + HEAD^^:b/b:bar + EOF + + git grep -e "bar" --recurse-submodules HEAD^^ >actual && + test_cmp expect actual +' + +test_expect_success 'grep tree and pathspecs' ' + cat >expect <<-\EOF && + HEAD:submodule/a:foobar + HEAD:submodule/sub/a:foobar + EOF + + git grep -e "bar" --recurse-submodules HEAD -- submodule >actual && + test_cmp expect actual +' + +test_expect_success 'grep tree and pathspecs' ' + cat >expect <<-\EOF && + HEAD:submodule/a:foobar + HEAD:submodule/sub/a:foobar + EOF + + git grep -e "bar" --recurse-submodules HEAD -- "submodule*a" >actual && + test_cmp expect actual +' + +test_expect_success 'grep tree and more pathspecs' ' + cat >expect <<-\EOF && + HEAD:submodule/a:foobar + EOF + + git grep -e "bar" --recurse-submodules HEAD -- "submodul?/a" >actual && + test_cmp expect actual +' + +test_expect_success 'grep tree and more pathspecs' ' + cat >expect <<-\EOF && + HEAD:submodule/sub/a:foobar + EOF + + git grep -e "bar" --recurse-submodules HEAD -- "submodul*/sub/a" >actual && + test_cmp expect actual +' + +test_expect_success !MINGW 'grep recurse submodule colon in name' ' + git init parent && + test_when_finished "rm -rf parent" && + echo "foobar" >"parent/fi:le" && + git -C parent add "fi:le" && + git -C parent commit -m "add fi:le" && + + git init "su:b" && + test_when_finished "rm -rf su:b" && + echo "foobar" >"su:b/fi:le" && + git -C "su:b" add "fi:le" && + git -C "su:b" commit -m "add fi:le" && + + git -C parent submodule add "../su:b" "su:b" && + git -C parent commit -m "add submodule" && + + cat >expect <<-\EOF && + fi:le:foobar + su:b/fi:le:foobar + EOF + git -C parent grep -e "foobar" --recurse-submodules >actual && + test_cmp expect actual && + + cat >expect <<-\EOF && + HEAD:fi:le:foobar + HEAD:su:b/fi:le:foobar + EOF + git -C parent grep -e "foobar" --recurse-submodules HEAD >actual && + test_cmp expect actual +' + test_incompatible_with_recurse_submodules () { test_expect_success "--recurse-submodules and $1 are incompatible" " @@ -94,6 +196,5 @@ test_incompatible_with_recurse_submodules () test_incompatible_with_recurse_submodules --untracked test_incompatible_with_recurse_submodules --no-index -test_incompatible_with_recurse_submodules HEAD test_done diff --git a/tree-walk.c b/tree-walk.c index 828f435..ff77605 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -1004,6 +1004,19 @@ static enum interesting do_match(const struct name_entry *entry, */ if (ps->recursive && S_ISDIR(entry->mode)) return entry_interesting; + + /* + * When matching against submodules with + * wildcard characters, ensure that the entry + * at least matches up to the first wild + * character. More accurate matching can then + * be performed in the submodule itself. + */ + if (ps->recursive && S_ISGITLINK(entry->mode) && + !ps_strncmp(item, match + baselen, + entry->path, + item->nowildcard_len - baselen)) + return entry_interesting; } continue; @@ -1040,6 +1053,21 @@ match_wildcards: strbuf_setlen(base, base_offset + baselen); return entry_interesting; } + + /* + * When matching against submodules with + * wildcard characters, ensure that the entry + * at least matches up to the first wild + * character. More accurate matching can then + * be performed in the submodule itself. + */ + if (ps->recursive && S_ISGITLINK(entry->mode) && + !ps_strncmp(item, match, base->buf + base_offset, + item->nowildcard_len)) { + strbuf_setlen(base, base_offset + baselen); + return entry_interesting; + } + strbuf_setlen(base, base_offset + baselen); /* -- cgit v0.10.2-6-g49f6 From e6fac7f3d3e313a93fe9b1243917669267b33153 Mon Sep 17 00:00:00 2001 From: Brandon Williams Date: Fri, 16 Dec 2016 11:03:22 -0800 Subject: grep: search history of moved submodules If a submodule was renamed at any point since it's inception then if you were to try and grep on a commit prior to the submodule being moved, you wouldn't be able to find a working directory for the submodule since the path in the past is different from the current path. This patch teaches grep to find the .git directory for a submodule in the parents .git/modules/ directory in the event the path to the submodule in the commit that is being searched differs from the state of the currently checked out commit. If found, the child process that is spawned to grep the submodule will chdir into its gitdir instead of a working directory. In order to override the explicit setting of submodule child process's gitdir environment variable (which was introduced in '10f5c526') `GIT_DIR_ENVIORMENT` needs to be pushed onto child process's env_array. This allows the searching of history from a submodule's gitdir, rather than from a working directory. Signed-off-by: Brandon Williams Signed-off-by: Junio C Hamano diff --git a/builtin/grep.c b/builtin/grep.c index 5918a26..2c727ef 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -547,6 +547,7 @@ static int grep_submodule_launch(struct grep_opt *opt, name = gs->name; prepare_submodule_repo_env(&cp.env_array); + argv_array_push(&cp.env_array, GIT_DIR_ENVIRONMENT); /* Add super prefix */ argv_array_pushf(&cp.args, "--super-prefix=%s%s/", @@ -615,8 +616,23 @@ static int grep_submodule(struct grep_opt *opt, const unsigned char *sha1, { if (!is_submodule_initialized(path)) return 0; - if (!is_submodule_populated(path)) - return 0; + if (!is_submodule_populated(path)) { + /* + * If searching history, check for the presense of the + * submodule's gitdir before skipping the submodule. + */ + if (sha1) { + const struct submodule *sub = + submodule_from_path(null_sha1, path); + if (sub) + path = git_path("modules/%s", sub->name); + + if (!(is_directory(path) && is_git_directory(path))) + return 0; + } else { + return 0; + } + } #ifndef NO_PTHREADS if (num_threads) { diff --git a/t/t7814-grep-recurse-submodules.sh b/t/t7814-grep-recurse-submodules.sh index d5fc316..67247a0 100755 --- a/t/t7814-grep-recurse-submodules.sh +++ b/t/t7814-grep-recurse-submodules.sh @@ -186,6 +186,47 @@ test_expect_success !MINGW 'grep recurse submodule colon in name' ' test_cmp expect actual ' +test_expect_success 'grep history with moved submoules' ' + git init parent && + test_when_finished "rm -rf parent" && + echo "foobar" >parent/file && + git -C parent add file && + git -C parent commit -m "add file" && + + git init sub && + test_when_finished "rm -rf sub" && + echo "foobar" >sub/file && + git -C sub add file && + git -C sub commit -m "add file" && + + git -C parent submodule add ../sub dir/sub && + git -C parent commit -m "add submodule" && + + cat >expect <<-\EOF && + dir/sub/file:foobar + file:foobar + EOF + git -C parent grep -e "foobar" --recurse-submodules >actual && + test_cmp expect actual && + + git -C parent mv dir/sub sub-moved && + git -C parent commit -m "moved submodule" && + + cat >expect <<-\EOF && + file:foobar + sub-moved/file:foobar + EOF + git -C parent grep -e "foobar" --recurse-submodules >actual && + test_cmp expect actual && + + cat >expect <<-\EOF && + HEAD^:dir/sub/file:foobar + HEAD^:file:foobar + EOF + git -C parent grep -e "foobar" --recurse-submodules HEAD^ >actual && + test_cmp expect actual +' + test_incompatible_with_recurse_submodules () { test_expect_success "--recurse-submodules and $1 are incompatible" " -- cgit v0.10.2-6-g49f6