summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--diff-tree.c3
-rw-r--r--diff.c83
-rw-r--r--diff.h2
-rw-r--r--diffcore-pickaxe.c2
-rw-r--r--diffcore-rename.c19
-rw-r--r--diffcore.h2
6 files changed, 102 insertions, 9 deletions
diff --git a/diff-tree.c b/diff-tree.c
index c66c787..8bdb1db 100644
--- a/diff-tree.c
+++ b/diff-tree.c
@@ -578,6 +578,9 @@ int main(int argc, const char **argv)
if (!read_stdin)
return 0;
+ if (detect_rename)
+ diff_setup_opt |= (DIFF_SETUP_USE_SIZE_CACHE |
+ DIFF_SETUP_USE_CACHE);
while (fgets(line, sizeof(line), stdin))
diff_tree_stdin(line);
diff --git a/diff.c b/diff.c
index ebec71a..357c4ef 100644
--- a/diff.c
+++ b/diff.c
@@ -12,6 +12,7 @@ static const char *diff_opts = "-pu";
static unsigned char null_sha1[20] = { 0, };
static int reverse_diff;
+static int use_size_cache;
static const char *external_diff(void)
{
@@ -222,12 +223,60 @@ static int work_tree_matches(const char *name, const unsigned char *sha1)
return 1;
}
+static struct sha1_size_cache {
+ unsigned char sha1[20];
+ unsigned long size;
+} **sha1_size_cache;
+static int sha1_size_cache_nr, sha1_size_cache_alloc;
+
+static struct sha1_size_cache *locate_size_cache(unsigned char *sha1,
+ unsigned long size)
+{
+ int first, last;
+ struct sha1_size_cache *e;
+
+ first = 0;
+ last = sha1_size_cache_nr;
+ while (last > first) {
+ int next = (last + first) >> 1;
+ e = sha1_size_cache[next];
+ int cmp = memcmp(e->sha1, sha1, 20);
+ if (!cmp)
+ return e;
+ if (cmp < 0) {
+ last = next;
+ continue;
+ }
+ first = next+1;
+ }
+ /* not found */
+ if (size == UINT_MAX)
+ return NULL;
+ /* insert to make it at "first" */
+ if (sha1_size_cache_alloc <= sha1_size_cache_nr) {
+ sha1_size_cache_alloc = alloc_nr(sha1_size_cache_alloc);
+ sha1_size_cache = xrealloc(sha1_size_cache,
+ sha1_size_cache_alloc *
+ sizeof(*sha1_size_cache));
+ }
+ sha1_size_cache_nr++;
+ if (first < sha1_size_cache_nr)
+ memmove(sha1_size_cache + first + 1, sha1_size_cache + first,
+ (sha1_size_cache_nr - first - 1) *
+ sizeof(*sha1_size_cache));
+ e = xmalloc(sizeof(struct sha1_size_cache));
+ sha1_size_cache[first] = e;
+ memcpy(e->sha1, sha1, 20);
+ e->size = size;
+ return e;
+}
+
/*
* While doing rename detection and pickaxe operation, we may need to
* grab the data for the blob (or file) for our own in-core comparison.
* diff_filespec has data and size fields for this purpose.
*/
-int diff_populate_filespec(struct diff_filespec *s)
+int diff_populate_filespec(struct diff_filespec *s, int size_only)
{
int err = 0;
if (!DIFF_FILE_VALID(s))
@@ -235,6 +284,9 @@ int diff_populate_filespec(struct diff_filespec *s)
if (S_ISDIR(s->mode))
return -1;
+ if (!use_size_cache)
+ size_only = 0;
+
if (s->data)
return err;
if (!s->sha1_valid ||
@@ -254,6 +306,8 @@ int diff_populate_filespec(struct diff_filespec *s)
s->size = st.st_size;
if (!s->size)
goto empty;
+ if (size_only)
+ return 0;
if (S_ISLNK(st.st_mode)) {
int ret;
s->data = xmalloc(s->size);
@@ -273,9 +327,21 @@ int diff_populate_filespec(struct diff_filespec *s)
close(fd);
}
else {
+ /* We cannot do size only for SHA1 blobs */
char type[20];
+ struct sha1_size_cache *e;
+
+ if (size_only) {
+ e = locate_size_cache(s->sha1, UINT_MAX);
+ if (e) {
+ s->size = e->size;
+ return 0;
+ }
+ }
s->data = read_sha1_file(s->sha1, type, &s->size);
s->should_free = 1;
+ if (s->data && size_only)
+ locate_size_cache(s->sha1, s->size);
}
return 0;
}
@@ -361,7 +427,7 @@ static void prepare_temp_file(const char *name,
return;
}
else {
- if (diff_populate_filespec(one))
+ if (diff_populate_filespec(one, 0))
die("cannot read data blob for %s", one->path);
prep_temp_blob(temp, one->data, one->size,
one->sha1, one->mode);
@@ -496,6 +562,19 @@ void diff_setup(int flags)
{
if (flags & DIFF_SETUP_REVERSE)
reverse_diff = 1;
+ if (flags & DIFF_SETUP_USE_CACHE) {
+ if (!active_cache)
+ /* read-cache does not die even when it fails
+ * so it is safe for us to do this here. Also
+ * it does not smudge active_cache or active_nr
+ * when it fails, so we do not have to worry about
+ * cleaning it up oufselves either.
+ */
+ read_cache();
+ }
+ if (flags & DIFF_SETUP_USE_SIZE_CACHE)
+ use_size_cache = 1;
+
}
struct diff_queue_struct diff_queued_diff;
diff --git a/diff.h b/diff.h
index 40a6757..a07ee9f 100644
--- a/diff.h
+++ b/diff.h
@@ -29,6 +29,8 @@ extern void diff_unmerge(const char *path);
extern int diff_scoreopt_parse(const char *opt);
#define DIFF_SETUP_REVERSE 1
+#define DIFF_SETUP_USE_CACHE 2
+#define DIFF_SETUP_USE_SIZE_CACHE 4
extern void diff_setup(int flags);
#define DIFF_DETECT_RENAME 1
diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index 9cf3a50..ef9c5c1 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -11,7 +11,7 @@ static int contains(struct diff_filespec *one,
{
unsigned long offset, sz;
const char *data;
- if (diff_populate_filespec(one))
+ if (diff_populate_filespec(one, 0))
return 0;
sz = one->size;
data = one->data;
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 6389ded..035d4eb 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -99,8 +99,11 @@ static int is_exact_match(struct diff_filespec *src, struct diff_filespec *dst)
if (src->sha1_valid && dst->sha1_valid &&
!memcmp(src->sha1, dst->sha1, 20))
return 1;
- if (diff_populate_filespec(src) || diff_populate_filespec(dst))
- /* this is an error but will be caught downstream */
+ if (diff_populate_filespec(src, 1) || diff_populate_filespec(dst, 1))
+ return 0;
+ if (src->size != dst->size)
+ return 0;
+ if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
return 0;
if (src->size == dst->size &&
!memcmp(src->data, dst->data, src->size))
@@ -125,9 +128,11 @@ static int estimate_similarity(struct diff_filespec *src,
* dst, and then some edit has been applied to dst.
*
* Compare them and return how similar they are, representing
- * the score as an integer between 0 and 10000, except
- * where they match exactly it is considered better than anything
- * else.
+ * the score as an integer between 0 and MAX_SCORE.
+ *
+ * When there is an exact match, it is considered a better
+ * match than anything else; the destination does not even
+ * call into this function in that case.
*/
void *delta;
unsigned long delta_size, base_size;
@@ -147,6 +152,7 @@ static int estimate_similarity(struct diff_filespec *src,
/* We would not consider edits that change the file size so
* drastically. delta_size must be smaller than
* (MAX_SCORE-minimum_score)/MAX_SCORE * min(src->size, dst->size).
+ *
* Note that base_size == 0 case is handled here already
* and the final score computation below would not have a
* divide-by-zero issue.
@@ -154,6 +160,9 @@ static int estimate_similarity(struct diff_filespec *src,
if (base_size * (MAX_SCORE-minimum_score) < delta_size * MAX_SCORE)
return 0;
+ if (diff_populate_filespec(src, 0) || diff_populate_filespec(dst, 0))
+ return 0; /* error but caught downstream */
+
delta = diff_delta(src->data, src->size,
dst->data, dst->size,
&delta_size);
diff --git a/diffcore.h b/diffcore.h
index 462014b..60ee775 100644
--- a/diffcore.h
+++ b/diffcore.h
@@ -33,7 +33,7 @@ extern struct diff_filespec *alloc_filespec(const char *);
extern void fill_filespec(struct diff_filespec *, const unsigned char *,
unsigned short);
-extern int diff_populate_filespec(struct diff_filespec *);
+extern int diff_populate_filespec(struct diff_filespec *, int);
extern void diff_free_filespec_data(struct diff_filespec *);
struct diff_filepair {