summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff King <peff@peff.net>2012-02-02 08:21:11 (GMT)
committerJunio C Hamano <gitster@pobox.com>2012-02-02 18:36:08 (GMT)
commit08265798e1ff6abc1b0aaff31c1471f83bd51425 (patch)
treea18db0e50f3687e3ccaf14766ac07e1bfc96e99e
parent41b59bfcb16abb738e5c95c95fb462e717d47d4d (diff)
downloadgit-08265798e1ff6abc1b0aaff31c1471f83bd51425.zip
git-08265798e1ff6abc1b0aaff31c1471f83bd51425.tar.gz
git-08265798e1ff6abc1b0aaff31c1471f83bd51425.tar.bz2
grep: load file data after checking binary-ness
Usually we load each file to grep into memory, check whether it's binary, and then either grep it (the default) or not (if "-I" was given). In the "-I" case, we can skip loading the file entirely if it is marked as binary via gitattributes. On my giant 3-gigabyte media repository, doing "git grep -I foo" went from: real 0m0.712s user 0m0.044s sys 0m4.780s to: real 0m0.026s user 0m0.016s sys 0m0.020s Obviously this is an extreme example. The repo is almost entirely binary files, and you can see that we spent all of our time asking the kernel to read() the data. However, with a cold disk cache, even avoiding a few binary files can have an impact. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r--grep.c6
1 files changed, 3 insertions, 3 deletions
diff --git a/grep.c b/grep.c
index a50d161..3821400 100644
--- a/grep.c
+++ b/grep.c
@@ -1019,9 +1019,6 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
}
opt->last_shown = 0;
- if (grep_source_load(gs) < 0)
- return 0;
-
switch (opt->binary) {
case GREP_BINARY_DEFAULT:
if (grep_source_is_binary(gs))
@@ -1042,6 +1039,9 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
try_lookahead = should_lookahead(opt);
+ if (grep_source_load(gs) < 0)
+ return 0;
+
bol = gs->buf;
left = gs->size;
while (left) {