summaryrefslogtreecommitdiff
path: root/grep.c
diff options
context:
space:
mode:
authorRené Scharfe <rene.scharfe@lsrfire.ath.cx>2009-01-09 23:08:40 (GMT)
committerJunio C Hamano <gitster@pobox.com>2009-01-10 05:33:35 (GMT)
commitfb62eb7fab97cea880ea7fe4f341a4dfad14ab48 (patch)
tree281d1b9daeb9a1d0a7ecd86b1e03d686a3394633 /grep.c
parentc123b7c5fb596d93cd015645212c379fc3c381d5 (diff)
downloadgit-fb62eb7fab97cea880ea7fe4f341a4dfad14ab48.zip
git-fb62eb7fab97cea880ea7fe4f341a4dfad14ab48.tar.gz
git-fb62eb7fab97cea880ea7fe4f341a4dfad14ab48.tar.bz2
grep -w: forward to next possible position after rejected match
grep -w accepts matches between non-word characters, only. If a match from regexec() doesn't meet this criteria, grep continues its search after the first character of that match. We can be a bit smarter here and skip all positions that follow a word character first, as they can't match our criteria. This way we can consume characters quite cheaply and don't need to special-case the handling of the beginning of a line. Here's a contrived example command on msysgit (best of five runs): $ time git grep -w ...... v1.6.1 >/dev/null real 0m1.611s user 0m0.000s sys 0m0.015s With the patch it's quite a bit faster: $ time git grep -w ...... v1.6.1 >/dev/null real 0m1.179s user 0m0.000s sys 0m0.015s More common search patterns will gain a lot less, but it's a nice clean up anyway. Signed-off-by: Rene Scharfe <rene.scharfe@lsrfire.ath.cx> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'grep.c')
-rw-r--r--grep.c11
1 files changed, 7 insertions, 4 deletions
diff --git a/grep.c b/grep.c
index 49e9319..22a56b5 100644
--- a/grep.c
+++ b/grep.c
@@ -294,7 +294,6 @@ static struct {
static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol, char *eol, enum grep_context ctx)
{
int hit = 0;
- int at_true_bol = 1;
int saved_ch = 0;
regmatch_t pmatch[10];
@@ -337,7 +336,7 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol
* either end of the line, or at word boundary
* (i.e. the next char must not be a word char).
*/
- if ( ((pmatch[0].rm_so == 0 && at_true_bol) ||
+ if ( ((pmatch[0].rm_so == 0) ||
!word_char(bol[pmatch[0].rm_so-1])) &&
((pmatch[0].rm_eo == (eol-bol)) ||
!word_char(bol[pmatch[0].rm_eo])) )
@@ -349,10 +348,14 @@ static int match_one_pattern(struct grep_opt *opt, struct grep_pat *p, char *bol
/* There could be more than one match on the
* line, and the first match might not be
* strict word match. But later ones could be!
+ * Forward to the next possible start, i.e. the
+ * next position following a non-word char.
*/
bol = pmatch[0].rm_so + bol + 1;
- at_true_bol = 0;
- goto again;
+ while (word_char(bol[-1]) && bol < eol)
+ bol++;
+ if (bol < eol)
+ goto again;
}
}
if (p->token == GREP_PATTERN_HEAD && saved_ch)