From 355e76a4a3c5e49ae15a642806457bce10fe2ef4 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 3 Jun 2005 01:36:03 -0700 Subject: [PATCH] Tweak count-delta interface Make it return copied source and insertion separately, so that later implementation of heuristics can use them more flexibly. This does not change the heuristics implemented in diffcore-rename nor diffcore-break in any way. Signed-off-by: Junio C Hamano Signed-off-by: Linus Torvalds diff --git a/count-delta.c b/count-delta.c index e10c832..c7f3767 100644 --- a/count-delta.c +++ b/count-delta.c @@ -29,15 +29,18 @@ static unsigned long get_hdr_size(const unsigned char **datap) /* * NOTE. We do not _interpret_ delta fully. As an approximation, we * just count the number of bytes that are copied from the source, and - * the number of literal data bytes that are inserted. Number of - * bytes that are _not_ copied from the source is deletion, and number - * of inserted literal bytes are addition, so sum of them is what we - * return. xdelta can express an edit that copies data inside of the - * destination which originally came from the source. We do not count - * that in the following routine, so we are undercounting the source - * material that remains in the final output that way. + * the number of literal data bytes that are inserted. + * + * Number of bytes that are _not_ copied from the source is deletion, + * and number of inserted literal bytes are addition, so sum of them + * is the extent of damage. xdelta can express an edit that copies + * data inside of the destination which originally came from the + * source. We do not count that in the following routine, so we are + * undercounting the source material that remains in the final output + * that way. */ -unsigned long count_delta(void *delta_buf, unsigned long delta_size) +int count_delta(void *delta_buf, unsigned long delta_size, + unsigned long *src_copied, unsigned long *literal_added) { unsigned long copied_from_source, added_literal; const unsigned char *data, *top; @@ -46,7 +49,7 @@ unsigned long count_delta(void *delta_buf, unsigned long delta_size) /* the smallest delta size possible is 6 bytes */ if (delta_size < 6) - return UINT_MAX; + return -1; data = delta_buf; top = delta_buf + delta_size; @@ -83,13 +86,12 @@ unsigned long count_delta(void *delta_buf, unsigned long delta_size) /* sanity check */ if (data != top || out != dst_size) - return UINT_MAX; + return -1; /* delete size is what was _not_ copied from source. * edit size is that and literal additions. */ - if (src_size + added_literal < copied_from_source) - /* we ended up overcounting and underflowed */ - return 0; - return (src_size - copied_from_source) + added_literal; + *src_copied = copied_from_source; + *literal_added = added_literal; + return 0; } diff --git a/count-delta.h b/count-delta.h index 4e6b584..7359629 100644 --- a/count-delta.h +++ b/count-delta.h @@ -4,6 +4,7 @@ #ifndef COUNT_DELTA_H #define COUNT_DELTA_H -unsigned long count_delta(void *, unsigned long); +int count_delta(void *, unsigned long, + unsigned long *src_copied, unsigned long *literal_added); #endif diff --git a/diffcore-break.c b/diffcore-break.c index c5e006d..cab91a2 100644 --- a/diffcore-break.c +++ b/diffcore-break.c @@ -23,7 +23,7 @@ static int very_different(struct diff_filespec *src, * want to get the filepair broken. */ void *delta; - unsigned long delta_size, base_size; + unsigned long delta_size, base_size, src_copied, literal_added; if (!S_ISREG(src->mode) || !S_ISREG(dst->mode)) return 0; /* leave symlink rename alone */ @@ -61,10 +61,17 @@ static int very_different(struct diff_filespec *src, return MAX_SCORE; /* Estimate the edit size by interpreting delta. */ - delta_size = count_delta(delta, delta_size); + if (count_delta(delta, delta_size, &src_copied, &literal_added)) { + free(delta); + return 0; + } free(delta); - if (delta_size == UINT_MAX) - return 0; /* error in delta computation */ + + /* Extent of damage */ + if (src->size + literal_added < src_copied) + delta_size = 0; + else + delta_size = (src->size - src_copied) + literal_added; if (base_size < delta_size) return MAX_SCORE; diff --git a/diffcore-rename.c b/diffcore-rename.c index 8ed37da..eac782b 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -135,7 +135,7 @@ static int estimate_similarity(struct diff_filespec *src, * call into this function in that case. */ void *delta; - unsigned long delta_size, base_size; + unsigned long delta_size, base_size, src_copied, literal_added; int score; /* We deal only with regular files. Symlink renames are handled @@ -174,10 +174,17 @@ static int estimate_similarity(struct diff_filespec *src, return 0; /* Estimate the edit size by interpreting delta. */ - delta_size = count_delta(delta, delta_size); - free(delta); - if (delta_size == UINT_MAX) + if (count_delta(delta, delta_size, &src_copied, &literal_added)) { + free(delta); return 0; + } + free(delta); + + /* Extent of damage */ + if (src->size + literal_added < src_copied) + delta_size = 0; + else + delta_size = (src->size - src_copied) + literal_added; /* * Now we will give some score to it. 100% edit gets 0 points diff --git a/diffcore.h b/diffcore.h index 981ee05..1f4b32c 100644 --- a/diffcore.h +++ b/diffcore.h @@ -12,8 +12,6 @@ #define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */ #define DEFAULT_BREAK_SCORE 59400 /* minimum for break to happen (99%)*/ -#define RENAME_DST_MATCHED 01 - struct diff_filespec { unsigned char sha1[20]; char *path; -- cgit v0.10.2-6-g49f6