From a26345b6085340ebd61e156aa8154a80196bee0f Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 10 Jan 2010 22:39:36 -0800 Subject: grep: optimize built-in grep by skipping lines that do not hit The internal "grep" engine we use checks for hits line-by-line, instead of letting the underlying regexec()/fixmatch() routines scan for the first match from the rest of the buffer. This was a major source of overhead compared to the external grep. Introduce a "look-ahead" mechanism to find the next line that would potentially match by using regexec()/fixmatch() in the remainder of the text to skip unmatching lines, and use it when the query criteria is simple enough (i.e. punt for an advanced grep boolean expression like "lines that have both X and Y but not Z" for now) and we are not running under "-v" (aka "--invert-match") option. Note that "-L" (aka "--files-without-match") is not a reason to disable this optimization. Under the option, we are interested if the file has any hit at all, and that is what we determine reliably with or without the optimization. Signed-off-by: Junio C Hamano diff --git a/grep.c b/grep.c index 5d162da..03ffcd4 100644 --- a/grep.c +++ b/grep.c @@ -608,6 +608,65 @@ static void show_pre_context(struct grep_opt *opt, const char *name, char *buf, } } +static int should_lookahead(struct grep_opt *opt) +{ + struct grep_pat *p; + + if (opt->extended) + return 0; /* punt for too complex stuff */ + if (opt->invert) + return 0; + for (p = opt->pattern_list; p; p = p->next) { + if (p->token != GREP_PATTERN) + return 0; /* punt for "header only" and stuff */ + } + return 1; +} + +static int look_ahead(struct grep_opt *opt, + unsigned long *left_p, + unsigned *lno_p, + char **bol_p) +{ + unsigned lno = *lno_p; + char *bol = *bol_p; + struct grep_pat *p; + char *sp, *last_bol; + regoff_t earliest = -1; + + for (p = opt->pattern_list; p; p = p->next) { + int hit; + regmatch_t m; + + if (p->fixed) + hit = !fixmatch(p->pattern, bol, &m); + else + hit = !regexec(&p->regexp, bol, 1, &m, 0); + if (!hit || m.rm_so < 0 || m.rm_eo < 0) + continue; + if (earliest < 0 || m.rm_so < earliest) + earliest = m.rm_so; + } + + if (earliest < 0) { + *bol_p = bol + *left_p; + *left_p = 0; + return 1; + } + for (sp = bol + earliest; bol < sp && sp[-1] != '\n'; sp--) + ; /* find the beginning of the line */ + last_bol = sp; + + for (sp = bol; sp < last_bol; sp++) { + if (*sp == '\n') + lno++; + } + *left_p -= last_bol - bol; + *bol_p = last_bol; + *lno_p = lno; + return 0; +} + static int grep_buffer_1(struct grep_opt *opt, const char *name, char *buf, unsigned long size, int collect_hits) { @@ -617,6 +676,7 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name, unsigned last_hit = 0; int binary_match_only = 0; unsigned count = 0; + int try_lookahead = 0; enum grep_context ctx = GREP_CONTEXT_HEAD; xdemitconf_t xecfg; @@ -645,11 +705,26 @@ static int grep_buffer_1(struct grep_opt *opt, const char *name, opt->priv = &xecfg; } } + try_lookahead = should_lookahead(opt); while (left) { char *eol, ch; int hit; + /* + * look_ahead() skips quicly to the line that possibly + * has the next hit; don't call it if we need to do + * something more than just skipping the current line + * in response to an unmatch for the current line. E.g. + * inside a post-context window, we will show the current + * line as a context around the previous hit when it + * doesn't hit. + */ + if (try_lookahead + && !(last_hit + && lno <= last_hit + opt->post_context) + && look_ahead(opt, &left, &lno, &bol)) + break; eol = end_of_line(bol, &left); ch = *eol; *eol = 0; -- cgit v0.10.2-6-g49f6 From bbc09c22b9f7784b1aab71d4876227956e6e8f4f Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 12 Jan 2010 19:06:41 -0800 Subject: grep: rip out support for external grep We still allow people to pass --[no-]ext-grep on the command line, but the option is ignored. Signed-off-by: Junio C Hamano diff --git a/Documentation/config.txt b/Documentation/config.txt index a1e36d7..6a96d9d 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -645,14 +645,6 @@ color.grep:: `never`), never. When set to `true` or `auto`, use color only when the output is written to the terminal. Defaults to `false`. -color.grep.external:: - The string value of this variable is passed to an external 'grep' - command as a command line option if match highlighting is turned - on. If set to an empty string, no option is passed at all, - turning off coloring for external 'grep' calls; this is the default. - For GNU grep, set it to `--color=always` to highlight matches even - when a pager is used. - color.grep.match:: Use customized color for matches. The value of this variable may be specified as in color.branch.. It is passed using diff --git a/Makefile b/Makefile index 4a1e5bc..a4b922e 100644 --- a/Makefile +++ b/Makefile @@ -185,10 +185,6 @@ all:: # is a simplified version of the merge sort used in glibc. This is # recommended if Git triggers O(n^2) behavior in your platform's qsort(). # -# Define NO_EXTERNAL_GREP if you don't want "git grep" to ever call -# your external grep (e.g., if your system lacks grep, if its grep is -# broken, or spawning external process is slower than built-in grep git has). -# # Define UNRELIABLE_FSTAT if your system's fstat does not return the same # information on a not yet closed file that lstat would return for the same # file after it was closed. @@ -777,7 +773,6 @@ ifeq ($(uname_S),SunOS) NO_MKDTEMP = YesPlease NO_MKSTEMPS = YesPlease NO_REGEX = YesPlease - NO_EXTERNAL_GREP = YesPlease THREADED_DELTA_SEARCH = YesPlease ifeq ($(uname_R),5.7) NEEDS_RESOLV = YesPlease @@ -895,7 +890,6 @@ ifeq ($(uname_S),IRIX) # NO_MMAP. If you suspect that your compiler is not affected by this # issue, comment out the NO_MMAP statement. NO_MMAP = YesPlease - NO_EXTERNAL_GREP = UnfortunatelyYes SNPRINTF_RETURNS_BOGUS = YesPlease SHELL_PATH = /usr/gnu/bin/bash NEEDS_LIBGEN = YesPlease @@ -915,7 +909,6 @@ ifeq ($(uname_S),IRIX64) # NO_MMAP. If you suspect that your compiler is not affected by this # issue, comment out the NO_MMAP statement. NO_MMAP = YesPlease - NO_EXTERNAL_GREP = UnfortunatelyYes SNPRINTF_RETURNS_BOGUS = YesPlease SHELL_PATH=/usr/gnu/bin/bash NEEDS_LIBGEN = YesPlease @@ -1322,9 +1315,6 @@ endif ifdef DIR_HAS_BSD_GROUP_SEMANTICS COMPAT_CFLAGS += -DDIR_HAS_BSD_GROUP_SEMANTICS endif -ifdef NO_EXTERNAL_GREP - BASIC_CFLAGS += -DNO_EXTERNAL_GREP -endif ifdef UNRELIABLE_FSTAT BASIC_CFLAGS += -DUNRELIABLE_FSTAT endif diff --git a/builtin-grep.c b/builtin-grep.c index a5b6719..3d6ebb5 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -15,14 +15,6 @@ #include "grep.h" #include "quote.h" -#ifndef NO_EXTERNAL_GREP -#ifdef __unix__ -#define NO_EXTERNAL_GREP 0 -#else -#define NO_EXTERNAL_GREP 1 -#endif -#endif - static char const * const grep_usage[] = { "git grep [options] [-e] [...] [[--] path...]", NULL @@ -42,8 +34,6 @@ static int grep_config(const char *var, const char *value, void *cb) opt->color = git_config_colorbool(var, value, -1); return 0; } - if (!strcmp(var, "color.grep.external")) - return git_config_string(&(opt->color_external), var, value); if (!strcmp(var, "color.grep.match")) { if (!value) return config_error_nonbool(var); @@ -215,292 +205,12 @@ static int grep_file(struct grep_opt *opt, const char *filename) return i; } -#if !NO_EXTERNAL_GREP -static int exec_grep(int argc, const char **argv) -{ - pid_t pid; - int status; - - argv[argc] = NULL; - pid = fork(); - if (pid < 0) - return pid; - if (!pid) { - execvp("grep", (char **) argv); - exit(255); - } - while (waitpid(pid, &status, 0) < 0) { - if (errno == EINTR) - continue; - return -1; - } - if (WIFEXITED(status)) { - if (!WEXITSTATUS(status)) - return 1; - return 0; - } - return -1; -} - -#define MAXARGS 1000 -#define ARGBUF 4096 -#define push_arg(a) do { \ - if (nr < MAXARGS) argv[nr++] = (a); \ - else die("maximum number of args exceeded"); \ - } while (0) - -/* - * If you send a singleton filename to grep, it does not give - * the name of the file. GNU grep has "-H" but we would want - * that behaviour in a portable way. - * - * So we keep two pathnames in argv buffer unsent to grep in - * the main loop if we need to do more than one grep. - */ -static int flush_grep(struct grep_opt *opt, - int argc, int arg0, const char **argv, int *kept) -{ - int status; - int count = argc - arg0; - const char *kept_0 = NULL; - - if (count <= 2) { - /* - * Because we keep at least 2 paths in the call from - * the main loop (i.e. kept != NULL), and MAXARGS is - * far greater than 2, this usually is a call to - * conclude the grep. However, the user could attempt - * to overflow the argv buffer by giving too many - * options to leave very small number of real - * arguments even for the call in the main loop. - */ - if (kept) - die("insanely many options to grep"); - - /* - * If we have two or more paths, we do not have to do - * anything special, but we need to push /dev/null to - * get "-H" behaviour of GNU grep portably but when we - * are not doing "-l" nor "-L" nor "-c". - */ - if (count == 1 && - !opt->name_only && - !opt->unmatch_name_only && - !opt->count) { - argv[argc++] = "/dev/null"; - argv[argc] = NULL; - } - } - - else if (kept) { - /* - * Called because we found many paths and haven't finished - * iterating over the cache yet. We keep two paths - * for the concluding call. argv[argc-2] and argv[argc-1] - * has the last two paths, so save the first one away, - * replace it with NULL while sending the list to grep, - * and recover them after we are done. - */ - *kept = 2; - kept_0 = argv[argc-2]; - argv[argc-2] = NULL; - argc -= 2; - } - - if (opt->pre_context || opt->post_context) { - /* - * grep handles hunk marks between files, but we need to - * do that ourselves between multiple calls. - */ - if (opt->show_hunk_mark) - write_or_die(1, "--\n", 3); - else - opt->show_hunk_mark = 1; - } - - status = exec_grep(argc, argv); - - if (kept_0) { - /* - * Then recover them. Now the last arg is beyond the - * terminating NULL which is at argc, and the second - * from the last is what we saved away in kept_0 - */ - argv[arg0++] = kept_0; - argv[arg0] = argv[argc+1]; - } - return status; -} - -static void grep_add_color(struct strbuf *sb, const char *escape_seq) -{ - size_t orig_len = sb->len; - - while (*escape_seq) { - if (*escape_seq == 'm') - strbuf_addch(sb, ';'); - else if (*escape_seq != '\033' && *escape_seq != '[') - strbuf_addch(sb, *escape_seq); - escape_seq++; - } - if (sb->len > orig_len && sb->buf[sb->len - 1] == ';') - strbuf_setlen(sb, sb->len - 1); -} - -static int external_grep(struct grep_opt *opt, const char **paths, int cached) -{ - int i, nr, argc, hit, len, status; - const char *argv[MAXARGS+1]; - char randarg[ARGBUF]; - char *argptr = randarg; - struct grep_pat *p; - - if (opt->extended || (opt->relative && opt->prefix_length)) - return -1; - len = nr = 0; - push_arg("grep"); - if (opt->fixed) - push_arg("-F"); - if (opt->linenum) - push_arg("-n"); - if (!opt->pathname) - push_arg("-h"); - if (opt->regflags & REG_EXTENDED) - push_arg("-E"); - if (opt->ignore_case) - push_arg("-i"); - if (opt->binary == GREP_BINARY_NOMATCH) - push_arg("-I"); - if (opt->word_regexp) - push_arg("-w"); - if (opt->name_only) - push_arg("-l"); - if (opt->unmatch_name_only) - push_arg("-L"); - if (opt->null_following_name) - /* in GNU grep git's "-z" translates to "-Z" */ - push_arg("-Z"); - if (opt->count) - push_arg("-c"); - if (opt->post_context || opt->pre_context) { - if (opt->post_context != opt->pre_context) { - if (opt->pre_context) { - push_arg("-B"); - len += snprintf(argptr, sizeof(randarg)-len, - "%u", opt->pre_context) + 1; - if (sizeof(randarg) <= len) - die("maximum length of args exceeded"); - push_arg(argptr); - argptr += len; - } - if (opt->post_context) { - push_arg("-A"); - len += snprintf(argptr, sizeof(randarg)-len, - "%u", opt->post_context) + 1; - if (sizeof(randarg) <= len) - die("maximum length of args exceeded"); - push_arg(argptr); - argptr += len; - } - } - else { - push_arg("-C"); - len += snprintf(argptr, sizeof(randarg)-len, - "%u", opt->post_context) + 1; - if (sizeof(randarg) <= len) - die("maximum length of args exceeded"); - push_arg(argptr); - argptr += len; - } - } - for (p = opt->pattern_list; p; p = p->next) { - push_arg("-e"); - push_arg(p->pattern); - } - if (opt->color) { - struct strbuf sb = STRBUF_INIT; - - grep_add_color(&sb, opt->color_match); - setenv("GREP_COLOR", sb.buf, 1); - - strbuf_reset(&sb); - strbuf_addstr(&sb, "mt="); - grep_add_color(&sb, opt->color_match); - strbuf_addstr(&sb, ":sl=:cx=:fn=:ln=:bn=:se="); - setenv("GREP_COLORS", sb.buf, 1); - - strbuf_release(&sb); - - if (opt->color_external && strlen(opt->color_external) > 0) - push_arg(opt->color_external); - } else { - unsetenv("GREP_COLOR"); - unsetenv("GREP_COLORS"); - } - unsetenv("GREP_OPTIONS"); - - hit = 0; - argc = nr; - for (i = 0; i < active_nr; i++) { - struct cache_entry *ce = active_cache[i]; - char *name; - int kept; - if (!S_ISREG(ce->ce_mode)) - continue; - if (!pathspec_matches(paths, ce->name, opt->max_depth)) - continue; - name = ce->name; - if (name[0] == '-') { - int len = ce_namelen(ce); - name = xmalloc(len + 3); - memcpy(name, "./", 2); - memcpy(name + 2, ce->name, len + 1); - } - argv[argc++] = name; - if (MAXARGS <= argc) { - status = flush_grep(opt, argc, nr, argv, &kept); - if (0 < status) - hit = 1; - argc = nr + kept; - } - if (ce_stage(ce)) { - do { - i++; - } while (i < active_nr && - !strcmp(ce->name, active_cache[i]->name)); - i--; /* compensate for loop control */ - } - } - if (argc > nr) { - status = flush_grep(opt, argc, nr, argv, NULL); - if (0 < status) - hit = 1; - } - return hit; -} -#endif - -static int grep_cache(struct grep_opt *opt, const char **paths, int cached, - int external_grep_allowed) +static int grep_cache(struct grep_opt *opt, const char **paths, int cached) { int hit = 0; int nr; read_cache(); -#if !NO_EXTERNAL_GREP - /* - * Use the external "grep" command for the case where - * we grep through the checked-out files. It tends to - * be a lot more optimized - */ - if (!cached && external_grep_allowed) { - hit = external_grep(opt, paths, cached); - if (hit >= 0) - return hit; - hit = 0; - } -#endif - for (nr = 0; nr < active_nr; nr++) { struct cache_entry *ce = active_cache[nr]; if (!S_ISREG(ce->ce_mode)) @@ -697,8 +407,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix) { int hit = 0; int cached = 0; - int external_grep_allowed = 1; int seen_dashdash = 0; + int external_grep_allowed__ignored; struct grep_opt opt; struct object_array list = { 0, 0, NULL }; const char **paths = NULL; @@ -780,13 +490,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix) OPT_BOOLEAN(0, "all-match", &opt.all_match, "show only matches from files that match all patterns"), OPT_GROUP(""), -#if NO_EXTERNAL_GREP - OPT_BOOLEAN(0, "ext-grep", &external_grep_allowed, - "allow calling of grep(1) (ignored by this build)"), -#else - OPT_BOOLEAN(0, "ext-grep", &external_grep_allowed, - "allow calling of grep(1) (default)"), -#endif + OPT_BOOLEAN(0, "ext-grep", &external_grep_allowed__ignored, + "allow calling of grep(1) (ignored by this build)"), { OPTION_CALLBACK, 0, "help-all", &options, NULL, "show usage", PARSE_OPT_HIDDEN | PARSE_OPT_NOARG, help_callback }, OPT_END() @@ -837,8 +542,6 @@ int cmd_grep(int argc, const char **argv, const char *prefix) argc--; } - if ((opt.color && !opt.color_external) || opt.funcname) - external_grep_allowed = 0; if (!opt.pattern_list) die("no pattern given."); if (!opt.fixed && opt.ignore_case) @@ -884,7 +587,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) if (!list.nr) { if (!cached) setup_work_tree(); - return !grep_cache(&opt, paths, cached, external_grep_allowed); + return !grep_cache(&opt, paths, cached); } if (cached) diff --git a/grep.h b/grep.h index 75370f6..0c61b00 100644 --- a/grep.h +++ b/grep.h @@ -85,7 +85,6 @@ struct grep_opt { int max_depth; int funcname; char color_match[COLOR_MAXLEN]; - const char *color_external; int regflags; unsigned pre_context; unsigned post_context; diff --git a/t/t7002-grep.sh b/t/t7002-grep.sh index abd14bf..c369cdb 100755 --- a/t/t7002-grep.sh +++ b/t/t7002-grep.sh @@ -302,8 +302,8 @@ test_expect_success 'grep -C1, hunk mark between files' ' test_cmp expected actual ' -test_expect_success 'grep -C1 --no-ext-grep, hunk mark between files' ' - git grep -C1 --no-ext-grep "^[yz]" >actual && +test_expect_success 'grep -C1 hunk mark between files' ' + git grep -C1 "^[yz]" >actual && test_cmp expected actual ' @@ -359,7 +359,7 @@ test_expect_success 'log grep (6)' ' test_expect_success 'grep with CE_VALID file' ' git update-index --assume-unchanged t/t && rm t/t && - test "$(git grep --no-ext-grep test)" = "t/t:test" && + test "$(git grep test)" = "t/t:test" && git update-index --no-assume-unchanged t/t && git checkout t/t ' -- cgit v0.10.2-6-g49f6 From 885d211e714b3787cd059e347694f9b24e1db1f3 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 12 Jan 2010 19:21:46 -0800 Subject: grep: rip out pessimization to use fixmatch() Even when running without the -F (--fixed-strings) option, we checked the pattern and used fixmatch() codepath when it does not contain any regex magic. Finding fixed strings with strstr() surely must be faster than running the regular expression crud. Not so. It turns out that on some libc implementations, using the regcomp()/regexec() pair is a lot faster than running strstr() and strcasestr() the fixmatch() codepath uses. Drop the optimization and use the fixmatch() codepath only when the user explicitly asked for it with the -F option. Signed-off-by: Junio C Hamano diff --git a/grep.c b/grep.c index 62723da..8e1f7de 100644 --- a/grep.c +++ b/grep.c @@ -29,13 +29,6 @@ void append_grep_pattern(struct grep_opt *opt, const char *pat, p->next = NULL; } -static int is_fixed(const char *s) -{ - while (*s && !is_regex_special(*s)) - s++; - return !*s; -} - static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { int err; @@ -43,7 +36,7 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; - if (opt->fixed || is_fixed(p->pattern)) + if (opt->fixed) p->fixed = 1; if (opt->regflags & REG_ICASE) p->fixed = 0; -- cgit v0.10.2-6-g49f6 From 7e622650d756955a94f546866eddd51506aa93a3 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 15 Jan 2010 12:50:54 -0800 Subject: grep: prepare to run outside of a work tree This moves the call to setup_git_directory() for running "grep" from the "git" wrapper to the implementation of the "grep" subcommand. A new variable "use_index" is always true at this stage in the series, and when it is on, we require that we are in a directory that is under git control. To make sure we die the same way, we make a second call into setup_git_directory() when we detect this situation. Signed-off-by: Junio C Hamano diff --git a/builtin-grep.c b/builtin-grep.c index 3d6ebb5..229555d 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -414,6 +414,7 @@ int cmd_grep(int argc, const char **argv, const char *prefix) const char **paths = NULL; int i; int dummy; + int nongit = 0, use_index = 1; struct option options[] = { OPT_BOOLEAN(0, "cached", &cached, "search in index instead of in the work tree"), @@ -497,6 +498,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix) OPT_END() }; + prefix = setup_git_directory_gently(&nongit); + /* * 'git grep -h', unlike 'git grep -h ', is a request * to show usage information and exit. @@ -534,6 +537,10 @@ int cmd_grep(int argc, const char **argv, const char *prefix) PARSE_OPT_STOP_AT_NON_OPTION | PARSE_OPT_NO_INTERNAL_HELP); + if (use_index && nongit) + /* die the same way as if we did it at the beginning */ + setup_git_directory(); + /* First unrecognized non-option token */ if (argc > 0 && !opt.pattern_list) { append_grep_pattern(&opt, argv[0], "command line", 0, diff --git a/git.c b/git.c index 11544cd..ad07473 100644 --- a/git.c +++ b/git.c @@ -317,7 +317,7 @@ static void handle_internal_command(int argc, const char **argv) { "fsck-objects", cmd_fsck, RUN_SETUP }, { "gc", cmd_gc, RUN_SETUP }, { "get-tar-commit-id", cmd_get_tar_commit_id }, - { "grep", cmd_grep, RUN_SETUP | USE_PAGER }, + { "grep", cmd_grep, USE_PAGER }, { "help", cmd_help }, { "init", cmd_init_db }, { "init-db", cmd_init_db }, -- cgit v0.10.2-6-g49f6 From 308162372d0aa202ff45743e02253e20a4fac4d7 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 15 Jan 2010 12:52:40 -0800 Subject: grep --no-index: allow use of "git grep" outside a git repository Just like some people wanted diff features that are not found in other people's diff implementations outside of a git repository and added --no-index mode to the command, this adds --no-index mode to the "git grep" command. Also, inside a git repository, --no-index mode allows you to grep in untracked (but not ignored) files. Signed-off-by: Junio C Hamano diff --git a/builtin-grep.c b/builtin-grep.c index 229555d..1283373 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -14,6 +14,7 @@ #include "userdiff.h" #include "grep.h" #include "quote.h" +#include "dir.h" static char const * const grep_usage[] = { "git grep [options] [-e] [...] [[--] path...]", @@ -320,6 +321,21 @@ static int grep_object(struct grep_opt *opt, const char **paths, die("unable to grep from object of type %s", typename(obj->type)); } +static int grep_directory(struct grep_opt *opt, const char **paths) +{ + struct dir_struct dir; + int i, hit = 0; + + memset(&dir, 0, sizeof(dir)); + setup_standard_excludes(&dir); + + fill_directory(&dir, paths); + for (i = 0; i < dir.nr; i++) + hit |= grep_file(opt, dir.entries[i]->name); + free_grep_patterns(opt); + return hit; +} + static int context_callback(const struct option *opt, const char *arg, int unset) { @@ -418,6 +434,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix) struct option options[] = { OPT_BOOLEAN(0, "cached", &cached, "search in index instead of in the work tree"), + OPT_BOOLEAN(0, "index", &use_index, + "--no-index finds in contents not managed by git"), OPT_GROUP(""), OPT_BOOLEAN('v', "invert-match", &opt.invert, "show non-matching lines"), @@ -591,6 +609,14 @@ int cmd_grep(int argc, const char **argv, const char *prefix) paths[1] = NULL; } + if (!use_index) { + if (cached) + die("--cached cannot be used with --no-index."); + if (list.nr) + die("--no-index cannot be used with revs."); + return !grep_directory(&opt, paths); + } + if (!list.nr) { if (!cached) setup_work_tree(); diff --git a/t/t7002-grep.sh b/t/t7002-grep.sh index c369cdb..7eceb08 100755 --- a/t/t7002-grep.sh +++ b/t/t7002-grep.sh @@ -426,4 +426,56 @@ test_expect_success 'grep -Fi' ' test_cmp expected actual ' +test_expect_success 'outside of git repository' ' + rm -fr non && + mkdir -p non/git/sub && + echo hello >non/git/file1 && + echo world >non/git/sub/file2 && + echo ".*o*" >non/git/.gitignore && + { + echo file1:hello && + echo sub/file2:world + } >non/expect.full && + echo file2:world >non/expect.sub + ( + GIT_CEILING_DIRECTORIES="$(pwd)/non/git" && + export GIT_CEILING_DIRECTORIES && + cd non/git && + test_must_fail git grep o && + git grep --no-index o >../actual.full && + test_cmp ../expect.full ../actual.full + cd sub && + test_must_fail git grep o && + git grep --no-index o >../../actual.sub && + test_cmp ../../expect.sub ../../actual.sub + ) +' + +test_expect_success 'inside git repository but with --no-index' ' + rm -fr is && + mkdir -p is/git/sub && + echo hello >is/git/file1 && + echo world >is/git/sub/file2 && + echo ".*o*" >is/git/.gitignore && + { + echo file1:hello && + echo sub/file2:world + } >is/expect.full && + : >is/expect.empty && + echo file2:world >is/expect.sub + ( + cd is/git && + git init && + test_must_fail git grep o >../actual.full && + test_cmp ../expect.empty ../actual.full && + git grep --no-index o >../actual.full && + test_cmp ../expect.full ../actual.full && + cd sub && + test_must_fail git grep o >../../actual.sub && + test_cmp ../../expect.empty ../../actual.sub && + git grep --no-index o >../../actual.sub && + test_cmp ../../expect.sub ../../actual.sub + ) +' + test_done -- cgit v0.10.2-6-g49f6