From 08abe669c05521499149dbf84fedefb04a8fa34d Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 24 Apr 2006 23:07:47 -0400 Subject: split the diff-delta interface This patch splits the diff-delta interface into index creation and delta generation. A wrapper is provided to preserve the diff-delta() call. This will allow for an optimization in pack-objects.c where the source object could be fixed and a full window of objects tentatively tried against that same source object without recomputing the source index each time. This patch only restructure things, plus a couple cleanups for good measure. There is no performance change yet. Signed-off-by: Nicolas Pitre diff --git a/delta.h b/delta.h index 9464f3e..727ae30 100644 --- a/delta.h +++ b/delta.h @@ -1,12 +1,73 @@ #ifndef DELTA_H #define DELTA_H -/* handling of delta buffers */ -extern void *diff_delta(void *from_buf, unsigned long from_size, - void *to_buf, unsigned long to_size, - unsigned long *delta_size, unsigned long max_size); -extern void *patch_delta(void *src_buf, unsigned long src_size, - void *delta_buf, unsigned long delta_size, +/* opaque object for delta index */ +struct delta_index; + +/* + * create_delta_index: compute index data from given buffer + * + * This returns a pointer to a struct delta_index that should be passed to + * subsequent create_delta() calls, or to free_delta_index(). A NULL pointer + * is returned on failure. The given buffer must not be freed nor altered + * before free_delta_index() is called. The returned pointer must be freed + * using free_delta_index(). + */ +extern struct delta_index * +create_delta_index(const void *buf, unsigned long bufsize); + +/* + * free_delta_index: free the index created by create_delta_index() + */ +extern void free_delta_index(struct delta_index *index); + +/* + * create_delta: create a delta from given index for the given buffer + * + * This function may be called multiple times with different buffers using + * the same delta_index pointer. If max_delta_size is non-zero and the + * resulting delta is to be larger than max_delta_size then NULL is returned. + * On success, a non-NULL pointer to the buffer with the delta data is + * returned and *delta_size is updated with its size. The returned buffer + * must be freed by the caller. + */ +extern void * +create_delta(const struct delta_index *index, + const void *buf, unsigned long bufsize, + unsigned long *delta_size, unsigned long max_delta_size); + +/* + * diff_delta: create a delta from source buffer to target buffer + * + * If max_delta_size is non-zero and the resulting delta is to be larger + * than max_delta_size then NULL is returned. On success, a non-NULL + * pointer to the buffer with the delta data is returned and *delta_size is + * updated with its size. The returned buffer must be freed by the caller. + */ +static inline void * +diff_delta(const void *src_buf, unsigned long src_bufsize, + const void *trg_buf, unsigned long trg_bufsize, + unsigned long *delta_size, unsigned long max_delta_size) +{ + struct delta_index *index = create_delta_index(src_buf, src_bufsize); + if (index) { + void *delta = create_delta(index, trg_buf, trg_bufsize, + delta_size, max_delta_size); + free_delta_index(index); + return delta; + } + return NULL; +} + +/* + * patch_delta: recreate target buffer given source buffer and delta data + * + * On success, a non-NULL pointer to the target buffer is returned and + * *trg_bufsize is updated with its size. On failure a NULL pointer is + * returned. The returned buffer must be freed by the caller. + */ +extern void *patch_delta(const void *src_buf, unsigned long src_size, + const void *delta_buf, unsigned long delta_size, unsigned long *dst_size); /* the smallest possible delta size is 4 bytes */ @@ -14,7 +75,7 @@ extern void *patch_delta(void *src_buf, unsigned long src_size, /* * This must be called twice on the delta data buffer, first to get the - * expected reference buffer size, and again to get the result buffer size. + * expected source buffer size, and again to get the target buffer size. */ static inline unsigned long get_delta_hdr_size(const unsigned char **datap, const unsigned char *top) diff --git a/diff-delta.c b/diff-delta.c index 1188b31..fdedf94 100644 --- a/diff-delta.c +++ b/diff-delta.c @@ -27,53 +27,70 @@ /* block size: min = 16, max = 64k, power of 2 */ #define BLK_SIZE 16 -#define MIN(a, b) ((a) < (b) ? (a) : (b)) +/* maximum hash entry list for the same hash bucket */ +#define HASH_LIMIT 64 #define GR_PRIME 0x9e370001 #define HASH(v, shift) (((unsigned int)(v) * GR_PRIME) >> (shift)) -struct index { +struct index_entry { const unsigned char *ptr; unsigned int val; - struct index *next; + struct index_entry *next; }; -static struct index ** delta_index(const unsigned char *buf, - unsigned long bufsize, - unsigned long trg_bufsize, - unsigned int *hash_shift) +struct delta_index { + const void *src_buf; + unsigned long src_size; + unsigned int hash_shift; + struct index_entry *hash[0]; +}; + +struct delta_index * create_delta_index(const void *buf, unsigned long bufsize) { - unsigned int i, hsize, hshift, hlimit, entries, *hash_count; - const unsigned char *data; - struct index *entry, **hash; + unsigned int i, hsize, hshift, entries, *hash_count; + const unsigned char *data, *buffer = buf; + struct delta_index *index; + struct index_entry *entry, **hash; void *mem; + if (!buf || !bufsize) + return NULL; + /* determine index hash size */ entries = bufsize / BLK_SIZE; hsize = entries / 4; for (i = 4; (1 << i) < hsize && i < 31; i++); hsize = 1 << i; hshift = 32 - i; - *hash_shift = hshift; /* allocate lookup index */ - mem = malloc(hsize * sizeof(*hash) + entries * sizeof(*entry)); + mem = malloc(sizeof(*index) + + sizeof(*hash) * hsize + + sizeof(*entry) * entries); if (!mem) return NULL; + index = mem; + mem = index + 1; hash = mem; - entry = mem + hsize * sizeof(*hash); + mem = hash + hsize; + entry = mem; + + index->src_buf = buf; + index->src_size = bufsize; + index->hash_shift = hshift; memset(hash, 0, hsize * sizeof(*hash)); /* allocate an array to count hash entries */ hash_count = calloc(hsize, sizeof(*hash_count)); if (!hash_count) { - free(hash); + free(index); return NULL; } /* then populate the index */ - data = buf + entries * BLK_SIZE - BLK_SIZE; - while (data >= buf) { + data = buffer + entries * BLK_SIZE - BLK_SIZE; + while (data >= buffer) { unsigned int val = adler32(0, data, BLK_SIZE); i = HASH(val, hshift); entry->ptr = data; @@ -91,27 +108,18 @@ static struct index ** delta_index(const unsigned char *buf, * bucket that would bring us to O(m*n) computing costs (m and n * corresponding to reference and target buffer sizes). * - * The more the target buffer is large, the more it is important to - * have small entry lists for each hash buckets. With such a limit - * the cost is bounded to something more like O(m+n). - */ - hlimit = (1 << 26) / trg_bufsize; - if (hlimit < 4*BLK_SIZE) - hlimit = 4*BLK_SIZE; - - /* - * Now make sure none of the hash buckets has more entries than + * Make sure none of the hash buckets has more entries than * we're willing to test. Otherwise we cull the entry list * uniformly to still preserve a good repartition across * the reference buffer. */ for (i = 0; i < hsize; i++) { - if (hash_count[i] < hlimit) + if (hash_count[i] < HASH_LIMIT) continue; entry = hash[i]; do { - struct index *keep = entry; - int skip = hash_count[i] / hlimit / 2; + struct index_entry *keep = entry; + int skip = hash_count[i] / HASH_LIMIT / 2; do { entry = entry->next; } while(--skip && entry); @@ -120,7 +128,12 @@ static struct index ** delta_index(const unsigned char *buf, } free(hash_count); - return hash; + return index; +} + +void free_delta_index(struct delta_index *index) +{ + free(index); } /* provide the size of the copy opcode given the block offset and size */ @@ -131,21 +144,17 @@ static struct index ** delta_index(const unsigned char *buf, /* the maximum size for any opcode */ #define MAX_OP_SIZE COPYOP_SIZE(0xffffffff, 0xffffffff) -void *diff_delta(void *from_buf, unsigned long from_size, - void *to_buf, unsigned long to_size, - unsigned long *delta_size, - unsigned long max_size) +void * +create_delta(const struct delta_index *index, + const void *trg_buf, unsigned long trg_size, + unsigned long *delta_size, unsigned long max_size) { unsigned int i, outpos, outsize, hash_shift; int inscnt; const unsigned char *ref_data, *ref_top, *data, *top; unsigned char *out; - struct index *entry, **hash; - if (!from_size || !to_size) - return NULL; - hash = delta_index(from_buf, from_size, to_size, &hash_shift); - if (!hash) + if (!trg_buf || !trg_size) return NULL; outpos = 0; @@ -153,60 +162,55 @@ void *diff_delta(void *from_buf, unsigned long from_size, if (max_size && outsize >= max_size) outsize = max_size + MAX_OP_SIZE + 1; out = malloc(outsize); - if (!out) { - free(hash); + if (!out) return NULL; - } - - ref_data = from_buf; - ref_top = from_buf + from_size; - data = to_buf; - top = to_buf + to_size; /* store reference buffer size */ - out[outpos++] = from_size; - from_size >>= 7; - while (from_size) { - out[outpos - 1] |= 0x80; - out[outpos++] = from_size; - from_size >>= 7; + i = index->src_size; + while (i >= 0x80) { + out[outpos++] = i | 0x80; + i >>= 7; } + out[outpos++] = i; /* store target buffer size */ - out[outpos++] = to_size; - to_size >>= 7; - while (to_size) { - out[outpos - 1] |= 0x80; - out[outpos++] = to_size; - to_size >>= 7; + i = trg_size; + while (i >= 0x80) { + out[outpos++] = i | 0x80; + i >>= 7; } + out[outpos++] = i; + ref_data = index->src_buf; + ref_top = ref_data + index->src_size; + data = trg_buf; + top = trg_buf + trg_size; + hash_shift = index->hash_shift; inscnt = 0; while (data < top) { unsigned int moff = 0, msize = 0; - if (data + BLK_SIZE <= top) { - unsigned int val = adler32(0, data, BLK_SIZE); - i = HASH(val, hash_shift); - for (entry = hash[i]; entry; entry = entry->next) { - const unsigned char *ref = entry->ptr; - const unsigned char *src = data; - unsigned int ref_size = ref_top - ref; - if (entry->val != val) - continue; - if (ref_size > top - src) - ref_size = top - src; - if (ref_size > 0x10000) - ref_size = 0x10000; - if (ref_size <= msize) - break; - while (ref_size-- && *src++ == *ref) - ref++; - if (msize < ref - entry->ptr) { - /* this is our best match so far */ - msize = ref - entry->ptr; - moff = entry->ptr - ref_data; - } + struct index_entry *entry; + unsigned int val = adler32(0, data, BLK_SIZE); + i = HASH(val, hash_shift); + for (entry = index->hash[i]; entry; entry = entry->next) { + const unsigned char *ref = entry->ptr; + const unsigned char *src = data; + unsigned int ref_size = ref_top - ref; + if (entry->val != val) + continue; + if (ref_size > top - src) + ref_size = top - src; + if (ref_size > 0x10000) + ref_size = 0x10000; + if (ref_size <= msize) + break; + while (ref_size-- && *src++ == *ref) + ref++; + if (msize < ref - entry->ptr) { + /* this is our best match so far */ + msize = ref - entry->ptr; + moff = entry->ptr - ref_data; } } @@ -271,7 +275,6 @@ void *diff_delta(void *from_buf, unsigned long from_size, out = realloc(out, outsize); if (!out) { free(tmp); - free(hash); return NULL; } } @@ -280,7 +283,6 @@ void *diff_delta(void *from_buf, unsigned long from_size, if (inscnt) out[outpos - inscnt - 1] = inscnt; - free(hash); *delta_size = outpos; return out; } diff --git a/patch-delta.c b/patch-delta.c index d95f0d9..8f318ed 100644 --- a/patch-delta.c +++ b/patch-delta.c @@ -13,8 +13,8 @@ #include #include "delta.h" -void *patch_delta(void *src_buf, unsigned long src_size, - void *delta_buf, unsigned long delta_size, +void *patch_delta(const void *src_buf, unsigned long src_size, + const void *delta_buf, unsigned long delta_size, unsigned long *dst_size) { const unsigned char *data, *top; -- cgit v0.10.2-6-g49f6 From 992793c832acfd98107068d90b886643f0344d04 Mon Sep 17 00:00:00 2001 From: Martin Langhoff Date: Wed, 26 Apr 2006 12:26:16 +1200 Subject: git-cvsexportcommit: Add -f(orce) and -m(essage prefix) flags, small cleanups. diff --git a/Documentation/git-cvsexportcommit.txt b/Documentation/git-cvsexportcommit.txt index d30435a..56bd3e5 100644 --- a/Documentation/git-cvsexportcommit.txt +++ b/Documentation/git-cvsexportcommit.txt @@ -8,7 +8,7 @@ git-cvsexportcommit - Export a commit to a CVS checkout SYNOPSIS -------- -'git-cvsexportcommmit' [-h] [-v] [-c] [-p] [PARENTCOMMIT] COMMITID +'git-cvsexportcommmit' [-h] [-v] [-c] [-p] [-f] [-m msgprefix] [PARENTCOMMIT] COMMITID DESCRIPTION @@ -39,6 +39,13 @@ OPTIONS Be pedantic (paranoid) when applying patches. Invokes patch with --fuzz=0 +-f:: + Force the merge even if the files are not up to date. + +-m:: + Prepend the commit message with the provided prefix. + Useful for patch series and the like. + -v:: Verbose. diff --git a/git-cvsexportcommit.perl b/git-cvsexportcommit.perl index 7b3a3d3..f994443 100755 --- a/git-cvsexportcommit.perl +++ b/git-cvsexportcommit.perl @@ -10,9 +10,9 @@ unless ($ENV{GIT_DIR} && -r $ENV{GIT_DIR}){ die "GIT_DIR is not defined or is unreadable"; } -our ($opt_h, $opt_p, $opt_v, $opt_c ); +our ($opt_h, $opt_p, $opt_v, $opt_c, $opt_f, $opt_m ); -getopts('hpvc'); +getopts('hpvcfm:'); $opt_h && usage(); @@ -77,12 +77,16 @@ if ($parent) { $opt_v && print "Applying to CVS commit $commit from parent $parent\n"; # grab the commit message -`git-cat-file commit $commit | sed -e '1,/^\$/d' > .msg`; +open(MSG, ">.msg") or die "Cannot open .msg for writing"; +print MSG $opt_m; +close MSG; + +`git-cat-file commit $commit | sed -e '1,/^\$/d' >> .msg`; $? && die "Error extracting the commit message"; my (@afiles, @dfiles, @mfiles); my @files = safe_pipe_capture('git-diff-tree', '-r', $parent, $commit); -print @files; +#print @files; $? && die "Error in git-diff-tree"; foreach my $f (@files) { chomp $f; @@ -109,7 +113,7 @@ foreach my $f (@afiles) { if (@status > 1) { warn 'Strange! cvs status returned more than one line?'}; unless ($status[0] =~ m/Status: Unknown$/) { $dirty = 1; - warn "File $f is already known in your CVS checkout!\n"; + warn "File $f is already known in your CVS checkout -- perhaps it has been added by another user. Or this may indicate that it exists on a different branch. If this is the case, use -f to force the merge.\n"; } } foreach my $f (@mfiles, @dfiles) { @@ -122,7 +126,11 @@ foreach my $f (@mfiles, @dfiles) { } } if ($dirty) { - die "Exiting: your CVS tree is not clean for this merge."; + if ($opt_f) { warn "The tree is not clean -- forced merge\n"; + $dirty = 0; + } else { + die "Exiting: your CVS tree is not clean for this merge."; + } } ### @@ -215,7 +223,7 @@ if ($opt_c) { } sub usage { print STDERR < Date: Wed, 26 Apr 2006 23:58:00 -0400 Subject: use delta index data when finding best delta matches This patch allows for computing the delta index for each base object only once and reuse it when trying to find the best delta match. This should set the mark and pave the way for possibly better delta generator algorithms. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano diff --git a/pack-objects.c b/pack-objects.c index c0acc46..5b2ef9a 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -994,6 +994,7 @@ static int type_size_sort(const struct object_entry *a, const struct object_entr struct unpacked { struct object_entry *entry; void *data; + struct delta_index *index; }; /* @@ -1004,64 +1005,56 @@ struct unpacked { * more importantly, the bigger file is likely the more recent * one. */ -static int try_delta(struct unpacked *cur, struct unpacked *old, unsigned max_depth) +static int try_delta(struct unpacked *trg, struct unpacked *src, + struct delta_index *src_index, unsigned max_depth) { - struct object_entry *cur_entry = cur->entry; - struct object_entry *old_entry = old->entry; - unsigned long size, oldsize, delta_size, sizediff; - long max_size; + struct object_entry *trg_entry = trg->entry; + struct object_entry *src_entry = src->entry; + unsigned long size, src_size, delta_size, sizediff, max_size; void *delta_buf; /* Don't bother doing diffs between different types */ - if (cur_entry->type != old_entry->type) + if (trg_entry->type != src_entry->type) return -1; /* We do not compute delta to *create* objects we are not * going to pack. */ - if (cur_entry->preferred_base) + if (trg_entry->preferred_base) return -1; - /* If the current object is at pack edge, take the depth the + /* + * If the current object is at pack edge, take the depth the * objects that depend on the current object into account -- * otherwise they would become too deep. */ - if (cur_entry->delta_child) { - if (max_depth <= cur_entry->delta_limit) + if (trg_entry->delta_child) { + if (max_depth <= trg_entry->delta_limit) return 0; - max_depth -= cur_entry->delta_limit; + max_depth -= trg_entry->delta_limit; } - - size = cur_entry->size; - oldsize = old_entry->size; - sizediff = oldsize > size ? oldsize - size : size - oldsize; - - if (size < 50) - return -1; - if (old_entry->depth >= max_depth) + if (src_entry->depth >= max_depth) return 0; - /* - * NOTE! - * - * We always delta from the bigger to the smaller, since that's - * more space-efficient (deletes don't have to say _what_ they - * delete). - */ + /* Now some size filtering euristics. */ + size = trg_entry->size; max_size = size / 2 - 20; - if (cur_entry->delta) - max_size = cur_entry->delta_size-1; + if (trg_entry->delta) + max_size = trg_entry->delta_size-1; + src_size = src_entry->size; + sizediff = src_size < size ? size - src_size : 0; if (sizediff >= max_size) return 0; - delta_buf = diff_delta(old->data, oldsize, - cur->data, size, &delta_size, max_size); + + delta_buf = create_delta(src_index, trg->data, size, &delta_size, max_size); if (!delta_buf) return 0; - cur_entry->delta = old_entry; - cur_entry->delta_size = delta_size; - cur_entry->depth = old_entry->depth + 1; + + trg_entry->delta = src_entry; + trg_entry->delta_size = delta_size; + trg_entry->depth = src_entry->depth + 1; free(delta_buf); - return 0; + return 1; } static void progress_interval(int signum) @@ -1109,11 +1102,19 @@ static void find_deltas(struct object_entry **list, int window, int depth) */ continue; + if (entry->size < 50) + continue; + if (n->index) + free_delta_index(n->index); free(n->data); n->entry = entry; n->data = read_sha1_file(entry->sha1, type, &size); if (size != entry->size) - die("object %s inconsistent object length (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size); + die("object %s inconsistent object length (%lu vs %lu)", + sha1_to_hex(entry->sha1), size, entry->size); + n->index = create_delta_index(n->data, size); + if (!n->index) + die("out of memory"); j = window; while (--j > 0) { @@ -1124,7 +1125,7 @@ static void find_deltas(struct object_entry **list, int window, int depth) m = array + other_idx; if (!m->entry) break; - if (try_delta(n, m, depth) < 0) + if (try_delta(n, m, m->index, depth) < 0) break; } #if 0 @@ -1144,8 +1145,11 @@ static void find_deltas(struct object_entry **list, int window, int depth) if (progress) fputc('\n', stderr); - for (i = 0; i < window; ++i) + for (i = 0; i < window; ++i) { + if (array[i].index) + free_delta_index(array[i].index); free(array[i].data); + } free(array); } -- cgit v0.10.2-6-g49f6 From 3dc5a9e4cdcc7124c665a050547d1285d86a421f Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Sat, 29 Apr 2006 00:58:05 -0400 Subject: replace adler32 with Rabin's polynomial in diff-delta This brings another small repacking speedup for sensibly the same pack size. On the Linux kernel repo, git-repack -a -f is 3.7% faster for a 0.4% larger pack. Credits to Geert Bosch who brought the Rabin's polynomial idea to my attention. This also eliminate the issue of adler32() reading past the data buffer, as noticed by Johannes Schindelin. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano diff --git a/Makefile b/Makefile index d9a3a82..38c980b 100644 --- a/Makefile +++ b/Makefile @@ -609,7 +609,7 @@ test-date$X: test-date.c date.o ctype.o $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) test-date.c date.o ctype.o test-delta$X: test-delta.c diff-delta.o patch-delta.o - $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $^ -lz + $(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $^ check: for i in *.c; do sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; done diff --git a/diff-delta.c b/diff-delta.c index fdedf94..35e517d 100644 --- a/diff-delta.c +++ b/diff-delta.c @@ -20,18 +20,106 @@ #include #include -#include #include "delta.h" -/* block size: min = 16, max = 64k, power of 2 */ -#define BLK_SIZE 16 - /* maximum hash entry list for the same hash bucket */ #define HASH_LIMIT 64 -#define GR_PRIME 0x9e370001 -#define HASH(v, shift) (((unsigned int)(v) * GR_PRIME) >> (shift)) +#define RABIN_SHIFT 23 +#define RABIN_WINDOW 16 + +static const unsigned int T[256] = { + 0x00000000, 0xab59b4d1, 0x56b369a2, 0xfdeadd73, 0x063f6795, 0xad66d344, + 0x508c0e37, 0xfbd5bae6, 0x0c7ecf2a, 0xa7277bfb, 0x5acda688, 0xf1941259, + 0x0a41a8bf, 0xa1181c6e, 0x5cf2c11d, 0xf7ab75cc, 0x18fd9e54, 0xb3a42a85, + 0x4e4ef7f6, 0xe5174327, 0x1ec2f9c1, 0xb59b4d10, 0x48719063, 0xe32824b2, + 0x1483517e, 0xbfdae5af, 0x423038dc, 0xe9698c0d, 0x12bc36eb, 0xb9e5823a, + 0x440f5f49, 0xef56eb98, 0x31fb3ca8, 0x9aa28879, 0x6748550a, 0xcc11e1db, + 0x37c45b3d, 0x9c9defec, 0x6177329f, 0xca2e864e, 0x3d85f382, 0x96dc4753, + 0x6b369a20, 0xc06f2ef1, 0x3bba9417, 0x90e320c6, 0x6d09fdb5, 0xc6504964, + 0x2906a2fc, 0x825f162d, 0x7fb5cb5e, 0xd4ec7f8f, 0x2f39c569, 0x846071b8, + 0x798aaccb, 0xd2d3181a, 0x25786dd6, 0x8e21d907, 0x73cb0474, 0xd892b0a5, + 0x23470a43, 0x881ebe92, 0x75f463e1, 0xdeadd730, 0x63f67950, 0xc8afcd81, + 0x354510f2, 0x9e1ca423, 0x65c91ec5, 0xce90aa14, 0x337a7767, 0x9823c3b6, + 0x6f88b67a, 0xc4d102ab, 0x393bdfd8, 0x92626b09, 0x69b7d1ef, 0xc2ee653e, + 0x3f04b84d, 0x945d0c9c, 0x7b0be704, 0xd05253d5, 0x2db88ea6, 0x86e13a77, + 0x7d348091, 0xd66d3440, 0x2b87e933, 0x80de5de2, 0x7775282e, 0xdc2c9cff, + 0x21c6418c, 0x8a9ff55d, 0x714a4fbb, 0xda13fb6a, 0x27f92619, 0x8ca092c8, + 0x520d45f8, 0xf954f129, 0x04be2c5a, 0xafe7988b, 0x5432226d, 0xff6b96bc, + 0x02814bcf, 0xa9d8ff1e, 0x5e738ad2, 0xf52a3e03, 0x08c0e370, 0xa39957a1, + 0x584ced47, 0xf3155996, 0x0eff84e5, 0xa5a63034, 0x4af0dbac, 0xe1a96f7d, + 0x1c43b20e, 0xb71a06df, 0x4ccfbc39, 0xe79608e8, 0x1a7cd59b, 0xb125614a, + 0x468e1486, 0xedd7a057, 0x103d7d24, 0xbb64c9f5, 0x40b17313, 0xebe8c7c2, + 0x16021ab1, 0xbd5bae60, 0x6cb54671, 0xc7ecf2a0, 0x3a062fd3, 0x915f9b02, + 0x6a8a21e4, 0xc1d39535, 0x3c394846, 0x9760fc97, 0x60cb895b, 0xcb923d8a, + 0x3678e0f9, 0x9d215428, 0x66f4eece, 0xcdad5a1f, 0x3047876c, 0x9b1e33bd, + 0x7448d825, 0xdf116cf4, 0x22fbb187, 0x89a20556, 0x7277bfb0, 0xd92e0b61, + 0x24c4d612, 0x8f9d62c3, 0x7836170f, 0xd36fa3de, 0x2e857ead, 0x85dcca7c, + 0x7e09709a, 0xd550c44b, 0x28ba1938, 0x83e3ade9, 0x5d4e7ad9, 0xf617ce08, + 0x0bfd137b, 0xa0a4a7aa, 0x5b711d4c, 0xf028a99d, 0x0dc274ee, 0xa69bc03f, + 0x5130b5f3, 0xfa690122, 0x0783dc51, 0xacda6880, 0x570fd266, 0xfc5666b7, + 0x01bcbbc4, 0xaae50f15, 0x45b3e48d, 0xeeea505c, 0x13008d2f, 0xb85939fe, + 0x438c8318, 0xe8d537c9, 0x153feaba, 0xbe665e6b, 0x49cd2ba7, 0xe2949f76, + 0x1f7e4205, 0xb427f6d4, 0x4ff24c32, 0xe4abf8e3, 0x19412590, 0xb2189141, + 0x0f433f21, 0xa41a8bf0, 0x59f05683, 0xf2a9e252, 0x097c58b4, 0xa225ec65, + 0x5fcf3116, 0xf49685c7, 0x033df00b, 0xa86444da, 0x558e99a9, 0xfed72d78, + 0x0502979e, 0xae5b234f, 0x53b1fe3c, 0xf8e84aed, 0x17bea175, 0xbce715a4, + 0x410dc8d7, 0xea547c06, 0x1181c6e0, 0xbad87231, 0x4732af42, 0xec6b1b93, + 0x1bc06e5f, 0xb099da8e, 0x4d7307fd, 0xe62ab32c, 0x1dff09ca, 0xb6a6bd1b, + 0x4b4c6068, 0xe015d4b9, 0x3eb80389, 0x95e1b758, 0x680b6a2b, 0xc352defa, + 0x3887641c, 0x93ded0cd, 0x6e340dbe, 0xc56db96f, 0x32c6cca3, 0x999f7872, + 0x6475a501, 0xcf2c11d0, 0x34f9ab36, 0x9fa01fe7, 0x624ac294, 0xc9137645, + 0x26459ddd, 0x8d1c290c, 0x70f6f47f, 0xdbaf40ae, 0x207afa48, 0x8b234e99, + 0x76c993ea, 0xdd90273b, 0x2a3b52f7, 0x8162e626, 0x7c883b55, 0xd7d18f84, + 0x2c043562, 0x875d81b3, 0x7ab75cc0, 0xd1eee811 +}; + +static const unsigned int U[256] = { + 0x00000000, 0x7eb5200d, 0x5633f4cb, 0x2886d4c6, 0x073e5d47, 0x798b7d4a, + 0x510da98c, 0x2fb88981, 0x0e7cba8e, 0x70c99a83, 0x584f4e45, 0x26fa6e48, + 0x0942e7c9, 0x77f7c7c4, 0x5f711302, 0x21c4330f, 0x1cf9751c, 0x624c5511, + 0x4aca81d7, 0x347fa1da, 0x1bc7285b, 0x65720856, 0x4df4dc90, 0x3341fc9d, + 0x1285cf92, 0x6c30ef9f, 0x44b63b59, 0x3a031b54, 0x15bb92d5, 0x6b0eb2d8, + 0x4388661e, 0x3d3d4613, 0x39f2ea38, 0x4747ca35, 0x6fc11ef3, 0x11743efe, + 0x3eccb77f, 0x40799772, 0x68ff43b4, 0x164a63b9, 0x378e50b6, 0x493b70bb, + 0x61bda47d, 0x1f088470, 0x30b00df1, 0x4e052dfc, 0x6683f93a, 0x1836d937, + 0x250b9f24, 0x5bbebf29, 0x73386bef, 0x0d8d4be2, 0x2235c263, 0x5c80e26e, + 0x740636a8, 0x0ab316a5, 0x2b7725aa, 0x55c205a7, 0x7d44d161, 0x03f1f16c, + 0x2c4978ed, 0x52fc58e0, 0x7a7a8c26, 0x04cfac2b, 0x73e5d470, 0x0d50f47d, + 0x25d620bb, 0x5b6300b6, 0x74db8937, 0x0a6ea93a, 0x22e87dfc, 0x5c5d5df1, + 0x7d996efe, 0x032c4ef3, 0x2baa9a35, 0x551fba38, 0x7aa733b9, 0x041213b4, + 0x2c94c772, 0x5221e77f, 0x6f1ca16c, 0x11a98161, 0x392f55a7, 0x479a75aa, + 0x6822fc2b, 0x1697dc26, 0x3e1108e0, 0x40a428ed, 0x61601be2, 0x1fd53bef, + 0x3753ef29, 0x49e6cf24, 0x665e46a5, 0x18eb66a8, 0x306db26e, 0x4ed89263, + 0x4a173e48, 0x34a21e45, 0x1c24ca83, 0x6291ea8e, 0x4d29630f, 0x339c4302, + 0x1b1a97c4, 0x65afb7c9, 0x446b84c6, 0x3adea4cb, 0x1258700d, 0x6ced5000, + 0x4355d981, 0x3de0f98c, 0x15662d4a, 0x6bd30d47, 0x56ee4b54, 0x285b6b59, + 0x00ddbf9f, 0x7e689f92, 0x51d01613, 0x2f65361e, 0x07e3e2d8, 0x7956c2d5, + 0x5892f1da, 0x2627d1d7, 0x0ea10511, 0x7014251c, 0x5facac9d, 0x21198c90, + 0x099f5856, 0x772a785b, 0x4c921c31, 0x32273c3c, 0x1aa1e8fa, 0x6414c8f7, + 0x4bac4176, 0x3519617b, 0x1d9fb5bd, 0x632a95b0, 0x42eea6bf, 0x3c5b86b2, + 0x14dd5274, 0x6a687279, 0x45d0fbf8, 0x3b65dbf5, 0x13e30f33, 0x6d562f3e, + 0x506b692d, 0x2ede4920, 0x06589de6, 0x78edbdeb, 0x5755346a, 0x29e01467, + 0x0166c0a1, 0x7fd3e0ac, 0x5e17d3a3, 0x20a2f3ae, 0x08242768, 0x76910765, + 0x59298ee4, 0x279caee9, 0x0f1a7a2f, 0x71af5a22, 0x7560f609, 0x0bd5d604, + 0x235302c2, 0x5de622cf, 0x725eab4e, 0x0ceb8b43, 0x246d5f85, 0x5ad87f88, + 0x7b1c4c87, 0x05a96c8a, 0x2d2fb84c, 0x539a9841, 0x7c2211c0, 0x029731cd, + 0x2a11e50b, 0x54a4c506, 0x69998315, 0x172ca318, 0x3faa77de, 0x411f57d3, + 0x6ea7de52, 0x1012fe5f, 0x38942a99, 0x46210a94, 0x67e5399b, 0x19501996, + 0x31d6cd50, 0x4f63ed5d, 0x60db64dc, 0x1e6e44d1, 0x36e89017, 0x485db01a, + 0x3f77c841, 0x41c2e84c, 0x69443c8a, 0x17f11c87, 0x38499506, 0x46fcb50b, + 0x6e7a61cd, 0x10cf41c0, 0x310b72cf, 0x4fbe52c2, 0x67388604, 0x198da609, + 0x36352f88, 0x48800f85, 0x6006db43, 0x1eb3fb4e, 0x238ebd5d, 0x5d3b9d50, + 0x75bd4996, 0x0b08699b, 0x24b0e01a, 0x5a05c017, 0x728314d1, 0x0c3634dc, + 0x2df207d3, 0x534727de, 0x7bc1f318, 0x0574d315, 0x2acc5a94, 0x54797a99, + 0x7cffae5f, 0x024a8e52, 0x06852279, 0x78300274, 0x50b6d6b2, 0x2e03f6bf, + 0x01bb7f3e, 0x7f0e5f33, 0x57888bf5, 0x293dabf8, 0x08f998f7, 0x764cb8fa, + 0x5eca6c3c, 0x207f4c31, 0x0fc7c5b0, 0x7172e5bd, 0x59f4317b, 0x27411176, + 0x1a7c5765, 0x64c97768, 0x4c4fa3ae, 0x32fa83a3, 0x1d420a22, 0x63f72a2f, + 0x4b71fee9, 0x35c4dee4, 0x1400edeb, 0x6ab5cde6, 0x42331920, 0x3c86392d, + 0x133eb0ac, 0x6d8b90a1, 0x450d4467, 0x3bb8646a +}; struct index_entry { const unsigned char *ptr; @@ -42,13 +130,13 @@ struct index_entry { struct delta_index { const void *src_buf; unsigned long src_size; - unsigned int hash_shift; + unsigned int hash_mask; struct index_entry *hash[0]; }; struct delta_index * create_delta_index(const void *buf, unsigned long bufsize) { - unsigned int i, hsize, hshift, entries, *hash_count; + unsigned int i, hsize, hmask, entries, *hash_count; const unsigned char *data, *buffer = buf; struct delta_index *index; struct index_entry *entry, **hash; @@ -57,12 +145,14 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize) if (!buf || !bufsize) return NULL; - /* determine index hash size */ - entries = bufsize / BLK_SIZE; + /* Determine index hash size. Note that indexing skips the + first byte to allow for optimizing the rabin polynomial + initialization in create_delta(). */ + entries = (bufsize - 1) / RABIN_WINDOW; hsize = entries / 4; for (i = 4; (1 << i) < hsize && i < 31; i++); hsize = 1 << i; - hshift = 32 - i; + hmask = hsize - 1; /* allocate lookup index */ mem = malloc(sizeof(*index) + @@ -78,7 +168,7 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize) index->src_buf = buf; index->src_size = bufsize; - index->hash_shift = hshift; + index->hash_mask = hmask; memset(hash, 0, hsize * sizeof(*hash)); /* allocate an array to count hash entries */ @@ -89,17 +179,19 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize) } /* then populate the index */ - data = buffer + entries * BLK_SIZE - BLK_SIZE; + data = buffer + entries * RABIN_WINDOW - RABIN_WINDOW; while (data >= buffer) { - unsigned int val = adler32(0, data, BLK_SIZE); - i = HASH(val, hshift); - entry->ptr = data; + unsigned int val = 0; + for (i = 1; i <= RABIN_WINDOW; i++) + val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT]; + i = val & hmask; + entry->ptr = data + RABIN_WINDOW; entry->val = val; entry->next = hash[i]; hash[i] = entry++; hash_count[i]++; - data -= BLK_SIZE; - } + data -= RABIN_WINDOW; + } /* * Determine a limit on the number of entries in the same hash @@ -136,20 +228,18 @@ void free_delta_index(struct delta_index *index) free(index); } -/* provide the size of the copy opcode given the block offset and size */ -#define COPYOP_SIZE(o, s) \ - (!!(o & 0xff) + !!(o & 0xff00) + !!(o & 0xff0000) + !!(o & 0xff000000) + \ - !!(s & 0xff) + !!(s & 0xff00) + 1) - -/* the maximum size for any opcode */ -#define MAX_OP_SIZE COPYOP_SIZE(0xffffffff, 0xffffffff) +/* + * The maximum size for any opcode sequence, including the initial header + * plus rabin window plus biggest copy. + */ +#define MAX_OP_SIZE (5 + 5 + 1 + RABIN_WINDOW + 7) void * create_delta(const struct delta_index *index, const void *trg_buf, unsigned long trg_size, unsigned long *delta_size, unsigned long max_size) { - unsigned int i, outpos, outsize, hash_shift; + unsigned int i, outpos, outsize, hash_mask, val; int inscnt; const unsigned char *ref_data, *ref_top, *data, *top; unsigned char *out; @@ -185,14 +275,22 @@ create_delta(const struct delta_index *index, ref_top = ref_data + index->src_size; data = trg_buf; top = trg_buf + trg_size; - hash_shift = index->hash_shift; - inscnt = 0; + hash_mask = index->hash_mask; + + outpos++; + val = 0; + for (i = 0; i < RABIN_WINDOW && data < top; i++, data++) { + out[outpos++] = *data; + val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT]; + } + inscnt = i; while (data < top) { unsigned int moff = 0, msize = 0; struct index_entry *entry; - unsigned int val = adler32(0, data, BLK_SIZE); - i = HASH(val, hash_shift); + val ^= U[data[-RABIN_WINDOW]]; + val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT]; + i = val & hash_mask; for (entry = index->hash[i]; entry; entry = entry->next) { const unsigned char *ref = entry->ptr; const unsigned char *src = data; @@ -214,7 +312,7 @@ create_delta(const struct delta_index *index, } } - if (!msize || msize < COPYOP_SIZE(moff, msize)) { + if (msize < 4) { if (!inscnt) outpos++; out[outpos++] = *data++; @@ -226,6 +324,20 @@ create_delta(const struct delta_index *index, } else { unsigned char *op; + if (msize >= RABIN_WINDOW) { + const unsigned char *sk; + sk = data + msize - RABIN_WINDOW; + val = 0; + for (i = 0; i < RABIN_WINDOW; i++) + val = ((val << 8) | *sk++) ^ T[val >> RABIN_SHIFT]; + } else { + const unsigned char *sk = data + 1; + for (i = 1; i < msize; i++) { + val ^= U[sk[-RABIN_WINDOW]]; + val = ((val << 8) | *sk++) ^ T[val >> RABIN_SHIFT]; + } + } + if (inscnt) { while (moff && ref_data[moff-1] == data[-1]) { if (msize == 0x10000) @@ -270,9 +382,8 @@ create_delta(const struct delta_index *index, if (max_size && outsize >= max_size) outsize = max_size + MAX_OP_SIZE + 1; if (max_size && outpos > max_size) - out = NULL; - else - out = realloc(out, outsize); + break; + out = realloc(out, outsize); if (!out) { free(tmp); return NULL; @@ -283,6 +394,11 @@ create_delta(const struct delta_index *index, if (inscnt) out[outpos - inscnt - 1] = inscnt; + if (max_size && outpos > max_size) { + free(out); + return NULL; + } + *delta_size = outpos; return out; } -- cgit v0.10.2-6-g49f6 From 73b0e5af9daf38734d94ed170f8800481e15567f Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 21 Apr 2006 17:31:04 -0700 Subject: get_sha1(): :path and :[0-3]:path to extract from index. Earlier patch to say : by Linus was very useful, and this extends the same idea to the current index. An sha1 expression : extracts the object name for the named path from the current index. Signed-off-by: Junio C Hamano diff --git a/sha1_name.c b/sha1_name.c index 345935b..ec5cd2c 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -458,17 +458,55 @@ int get_sha1(const char *name, unsigned char *sha1) { int ret; unsigned unused; + int namelen = strlen(name); + const char *cp; prepare_alt_odb(); - ret = get_sha1_1(name, strlen(name), sha1); - if (ret < 0) { - const char *cp = strchr(name, ':'); - if (cp) { - unsigned char tree_sha1[20]; - if (!get_sha1_1(name, cp-name, tree_sha1)) - return get_tree_entry(tree_sha1, cp+1, sha1, - &unused); + ret = get_sha1_1(name, namelen, sha1); + if (!ret) + return ret; + /* sha1:path --> object name of path in ent sha1 + * :path -> object name of path in index + * :[0-3]:path -> object name of path in index at stage + */ + if (name[0] == ':') { + int stage = 0; + struct cache_entry *ce; + int pos; + if (namelen < 3 || + name[2] != ':' || + name[1] < '0' || '3' < name[1]) + cp = name + 1; + else { + stage = name[1] - '0'; + cp = name + 3; } + namelen = namelen - (cp - name); + if (!active_cache) + read_cache(); + if (active_nr < 0) + return -1; + pos = cache_name_pos(cp, namelen); + if (pos < 0) + pos = -pos - 1; + while (pos < active_nr) { + ce = active_cache[pos]; + if (ce_namelen(ce) != namelen || + memcmp(ce->name, cp, namelen)) + break; + if (ce_stage(ce) == stage) { + memcpy(sha1, ce->sha1, 20); + return 0; + } + } + return -1; + } + cp = strchr(name, ':'); + if (cp) { + unsigned char tree_sha1[20]; + if (!get_sha1_1(name, cp-name, tree_sha1)) + return get_tree_entry(tree_sha1, cp+1, sha1, + &unused); } return ret; } -- cgit v0.10.2-6-g49f6 From 5010cb5fcca30269ad25f2eb38b31455af3205d7 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 30 Apr 2006 23:28:15 -0700 Subject: built-in "git grep" This attempts to set up built-in "git grep" to further reduce our dependence on the shell, while at the same time optionally allowing to run grep against object database. You could do funky things like these: git grep --cached -e pattern ;# grep from index git grep -e pattern master ;# or in a rev git grep -e pattern master next ;# or in multiple revs git grep -e pattern pu^@ ;# even like this with an ;# extension from another topic ;-) git grep -e pattern master..next ;# or even from rev ranges git grep -e pattern master~20:Documentation ;# or an arbitrary tree git grep -e pattern next:git-commit.sh ;# or an arbitrary blob Right now, it does not understand and/or obey many options grep should accept, and the pattern must be given with -e option due to the way the parameter parser is structured, both of which obviously need to be fixed for usability. But this is going in the right direction. The shell script version is one of the worst Portability offender in the git barebone Porcelainish; it uses xargs -0 to pass paths around and shell arrays to sift flags and parameters. Signed-off-by: Junio C Hamano diff --git a/Makefile b/Makefile index 8ce27a6..8d5122b 100644 --- a/Makefile +++ b/Makefile @@ -214,7 +214,7 @@ LIB_OBJS = \ $(DIFF_OBJS) BUILTIN_OBJS = \ - builtin-log.o builtin-help.o + builtin-log.o builtin-help.o builtin-grep.o GITLIBS = $(LIB_FILE) $(XDIFF_LIB) LIBS = $(GITLIBS) -lz diff --git a/builtin-grep.c b/builtin-grep.c new file mode 100644 index 0000000..36150bf --- /dev/null +++ b/builtin-grep.c @@ -0,0 +1,454 @@ +/* + * Builtin "git grep" + * + * Copyright (c) 2006 Junio C Hamano + */ +#include "cache.h" +#include "blob.h" +#include "tree.h" +#include "commit.h" +#include "tag.h" +#include "diff.h" +#include "revision.h" +#include "builtin.h" +#include + +static int pathspec_matches(struct diff_options *opt, const char *name) +{ + int i, j; + int namelen; + if (!opt->nr_paths) + return 1; + namelen = strlen(name); + for (i = 0; i < opt->nr_paths; i++) { + const char *match = opt->paths[i]; + int matchlen = opt->pathlens[i]; + if (matchlen <= namelen) { + if (!strncmp(name, match, matchlen)) + return 1; + continue; + } + /* If name is "Documentation" and pathspec is + * "Documentation/", they should match. Maybe + * we would want to strip it in get_pathspec()??? + */ + if (strncmp(name, match, namelen)) + continue; + for (j = namelen; j < matchlen; j++) + if (match[j] != '/') + break; + if (matchlen <= j) + return 1; + } + return 0; +} + +struct grep_opt { + const char *pattern; + regex_t regexp; + unsigned linenum:1; + unsigned invert:1; + int regflags; + unsigned pre_context; + unsigned post_context; +}; + +static char *end_of_line(char *cp, unsigned long *left) +{ + unsigned long l = *left; + while (l && *cp != '\n') { + l--; + cp++; + } + *left = l; + return cp; +} + +static void show_line(struct grep_opt *opt, const char *bol, const char *eol, + const char *name, unsigned lno, char sign) +{ + printf("%s%c", name, sign); + if (opt->linenum) + printf("%d%c", lno, sign); + printf("%.*s\n", eol-bol, bol); +} + +static int grep_buffer(struct grep_opt *opt, const char *name, + char *buf, unsigned long size) +{ + char *bol = buf; + unsigned long left = size; + unsigned lno = 1; + struct pre_context_line { + char *bol; + char *eol; + } *prev = NULL, *pcl; + unsigned last_hit = 0; + unsigned last_shown = 0; + const char *hunk_mark = ""; + + if (opt->pre_context) + prev = xcalloc(opt->pre_context, sizeof(*prev)); + if (opt->pre_context || opt->post_context) + hunk_mark = "--\n"; + + while (left) { + regmatch_t pmatch[10]; + char *eol, ch; + int hit; + + eol = end_of_line(bol, &left); + ch = *eol; + *eol = 0; + + hit = !regexec(&opt->regexp, bol, ARRAY_SIZE(pmatch), + pmatch, 0); + if (opt->invert) + hit = !hit; + if (hit) { + /* Hit at this line. If we haven't shown the + * pre-context lines, we would need to show them. + */ + if (opt->pre_context) { + unsigned from; + if (opt->pre_context < lno) + from = lno - opt->pre_context; + else + from = 1; + if (from <= last_shown) + from = last_shown + 1; + if (last_shown && from != last_shown + 1) + printf(hunk_mark); + while (from < lno) { + pcl = &prev[lno-from-1]; + show_line(opt, pcl->bol, pcl->eol, + name, from, '-'); + from++; + } + last_shown = lno-1; + } + if (last_shown && lno != last_shown + 1) + printf(hunk_mark); + show_line(opt, bol, eol, name, lno, ':'); + last_shown = last_hit = lno; + } + else if (last_hit && + lno <= last_hit + opt->post_context) { + /* If the last hit is within the post context, + * we need to show this line. + */ + if (last_shown && lno != last_shown + 1) + printf(hunk_mark); + show_line(opt, bol, eol, name, lno, '-'); + last_shown = lno; + } + if (opt->pre_context) { + memmove(prev+1, prev, + (opt->pre_context-1) * sizeof(*prev)); + prev->bol = bol; + prev->eol = eol; + } + *eol = ch; + bol = eol + 1; + left--; + lno++; + } + return !!last_hit; +} + +static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, const char *name) +{ + unsigned long size; + char *data; + char type[20]; + int hit; + data = read_sha1_file(sha1, type, &size); + if (!data) { + error("'%s': unable to read %s", name, sha1_to_hex(sha1)); + return 0; + } + hit = grep_buffer(opt, name, data, size); + free(data); + return hit; +} + +static int grep_file(struct grep_opt *opt, const char *filename) +{ + struct stat st; + int i; + char *data; + if (lstat(filename, &st) < 0) { + err_ret: + if (errno != ENOENT) + error("'%s': %s", filename, strerror(errno)); + return 0; + } + if (!st.st_size) + return 0; /* empty file -- no grep hit */ + if (!S_ISREG(st.st_mode)) + return 0; + i = open(filename, O_RDONLY); + if (i < 0) + goto err_ret; + data = xmalloc(st.st_size + 1); + if (st.st_size != xread(i, data, st.st_size)) { + error("'%s': short read %s", filename, strerror(errno)); + close(i); + free(data); + return 0; + } + close(i); + i = grep_buffer(opt, filename, data, st.st_size); + free(data); + return i; +} + +static int grep_cache(struct grep_opt *opt, struct rev_info *revs, int cached) +{ + int hit = 0; + int nr; + read_cache(); + + for (nr = 0; nr < active_nr; nr++) { + struct cache_entry *ce = active_cache[nr]; + if (ce_stage(ce) || !S_ISREG(ntohl(ce->ce_mode))) + continue; + if (!pathspec_matches(&revs->diffopt, ce->name)) + continue; + if (cached) + hit |= grep_sha1(opt, ce->sha1, ce->name); + else + hit |= grep_file(opt, ce->name); + } + return hit; +} + +static int grep_tree(struct grep_opt *opt, struct rev_info *revs, + struct tree_desc *tree, + const char *tree_name, const char *base) +{ + unsigned mode; + int len; + int hit = 0; + const char *path; + const unsigned char *sha1; + char *down_base; + char *path_buf = xmalloc(PATH_MAX + strlen(tree_name) + 100); + + if (tree_name[0]) { + int offset = sprintf(path_buf, "%s:", tree_name); + down_base = path_buf + offset; + strcat(down_base, base); + } + else { + down_base = path_buf; + strcpy(down_base, base); + } + len = strlen(path_buf); + + while (tree->size) { + int pathlen; + sha1 = tree_entry_extract(tree, &path, &mode); + pathlen = strlen(path); + strcpy(path_buf + len, path); + + if (!pathspec_matches(&revs->diffopt, down_base)) + ; + else if (S_ISREG(mode)) + hit |= grep_sha1(opt, sha1, path_buf); + else if (S_ISDIR(mode)) { + char type[20]; + struct tree_desc sub; + void *data; + data = read_sha1_file(sha1, type, &sub.size); + if (!data) + die("unable to read tree (%s)", + sha1_to_hex(sha1)); + strcpy(path_buf + len + pathlen, "/"); + sub.buf = data; + hit = grep_tree(opt, revs, &sub, tree_name, down_base); + free(data); + } + update_tree_entry(tree); + } + return hit; +} + +static int grep_object(struct grep_opt *opt, struct rev_info *revs, + struct object *obj, const char *name) +{ + if (!strcmp(obj->type, blob_type)) + return grep_sha1(opt, obj->sha1, name); + if (!strcmp(obj->type, commit_type) || + !strcmp(obj->type, tree_type)) { + struct tree_desc tree; + void *data; + int hit; + data = read_object_with_reference(obj->sha1, tree_type, + &tree.size, NULL); + if (!data) + die("unable to read tree (%s)", sha1_to_hex(obj->sha1)); + tree.buf = data; + hit = grep_tree(opt, revs, &tree, name, ""); + free(data); + return hit; + } + die("unable to grep from object of type %s", obj->type); +} + +static const char builtin_grep_usage[] = +"git-grep