summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <junkio@cox.net>2005-06-03 08:36:03 (GMT)
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-03 18:23:03 (GMT)
commit355e76a4a3c5e49ae15a642806457bce10fe2ef4 (patch)
tree61d1a1313ab1732458e47f55e14d6c1c7f1225ea
parent5b86040679626e36c12345039f6df62f4622aef2 (diff)
downloadgit-355e76a4a3c5e49ae15a642806457bce10fe2ef4.zip
git-355e76a4a3c5e49ae15a642806457bce10fe2ef4.tar.gz
git-355e76a4a3c5e49ae15a642806457bce10fe2ef4.tar.bz2
[PATCH] Tweak count-delta interface
Make it return copied source and insertion separately, so that later implementation of heuristics can use them more flexibly. This does not change the heuristics implemented in diffcore-rename nor diffcore-break in any way. Signed-off-by: Junio C Hamano <junkio@cox.net> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--count-delta.c30
-rw-r--r--count-delta.h3
-rw-r--r--diffcore-break.c15
-rw-r--r--diffcore-rename.c15
-rw-r--r--diffcore.h2
5 files changed, 40 insertions, 25 deletions
diff --git a/count-delta.c b/count-delta.c
index e10c832..c7f3767 100644
--- a/count-delta.c
+++ b/count-delta.c
@@ -29,15 +29,18 @@ static unsigned long get_hdr_size(const unsigned char **datap)
/*
* NOTE. We do not _interpret_ delta fully. As an approximation, we
* just count the number of bytes that are copied from the source, and
- * the number of literal data bytes that are inserted. Number of
- * bytes that are _not_ copied from the source is deletion, and number
- * of inserted literal bytes are addition, so sum of them is what we
- * return. xdelta can express an edit that copies data inside of the
- * destination which originally came from the source. We do not count
- * that in the following routine, so we are undercounting the source
- * material that remains in the final output that way.
+ * the number of literal data bytes that are inserted.
+ *
+ * Number of bytes that are _not_ copied from the source is deletion,
+ * and number of inserted literal bytes are addition, so sum of them
+ * is the extent of damage. xdelta can express an edit that copies
+ * data inside of the destination which originally came from the
+ * source. We do not count that in the following routine, so we are
+ * undercounting the source material that remains in the final output
+ * that way.
*/
-unsigned long count_delta(void *delta_buf, unsigned long delta_size)
+int count_delta(void *delta_buf, unsigned long delta_size,
+ unsigned long *src_copied, unsigned long *literal_added)
{
unsigned long copied_from_source, added_literal;
const unsigned char *data, *top;
@@ -46,7 +49,7 @@ unsigned long count_delta(void *delta_buf, unsigned long delta_size)
/* the smallest delta size possible is 6 bytes */
if (delta_size < 6)
- return UINT_MAX;
+ return -1;
data = delta_buf;
top = delta_buf + delta_size;
@@ -83,13 +86,12 @@ unsigned long count_delta(void *delta_buf, unsigned long delta_size)
/* sanity check */
if (data != top || out != dst_size)
- return UINT_MAX;
+ return -1;
/* delete size is what was _not_ copied from source.
* edit size is that and literal additions.
*/
- if (src_size + added_literal < copied_from_source)
- /* we ended up overcounting and underflowed */
- return 0;
- return (src_size - copied_from_source) + added_literal;
+ *src_copied = copied_from_source;
+ *literal_added = added_literal;
+ return 0;
}
diff --git a/count-delta.h b/count-delta.h
index 4e6b584..7359629 100644
--- a/count-delta.h
+++ b/count-delta.h
@@ -4,6 +4,7 @@
#ifndef COUNT_DELTA_H
#define COUNT_DELTA_H
-unsigned long count_delta(void *, unsigned long);
+int count_delta(void *, unsigned long,
+ unsigned long *src_copied, unsigned long *literal_added);
#endif
diff --git a/diffcore-break.c b/diffcore-break.c
index c5e006d..cab91a2 100644
--- a/diffcore-break.c
+++ b/diffcore-break.c
@@ -23,7 +23,7 @@ static int very_different(struct diff_filespec *src,
* want to get the filepair broken.
*/
void *delta;
- unsigned long delta_size, base_size;
+ unsigned long delta_size, base_size, src_copied, literal_added;
if (!S_ISREG(src->mode) || !S_ISREG(dst->mode))
return 0; /* leave symlink rename alone */
@@ -61,10 +61,17 @@ static int very_different(struct diff_filespec *src,
return MAX_SCORE;
/* Estimate the edit size by interpreting delta. */
- delta_size = count_delta(delta, delta_size);
+ if (count_delta(delta, delta_size, &src_copied, &literal_added)) {
+ free(delta);
+ return 0;
+ }
free(delta);
- if (delta_size == UINT_MAX)
- return 0; /* error in delta computation */
+
+ /* Extent of damage */
+ if (src->size + literal_added < src_copied)
+ delta_size = 0;
+ else
+ delta_size = (src->size - src_copied) + literal_added;
if (base_size < delta_size)
return MAX_SCORE;
diff --git a/diffcore-rename.c b/diffcore-rename.c
index 8ed37da..eac782b 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -135,7 +135,7 @@ static int estimate_similarity(struct diff_filespec *src,
* call into this function in that case.
*/
void *delta;
- unsigned long delta_size, base_size;
+ unsigned long delta_size, base_size, src_copied, literal_added;
int score;
/* We deal only with regular files. Symlink renames are handled
@@ -174,10 +174,17 @@ static int estimate_similarity(struct diff_filespec *src,
return 0;
/* Estimate the edit size by interpreting delta. */
- delta_size = count_delta(delta, delta_size);
- free(delta);
- if (delta_size == UINT_MAX)
+ if (count_delta(delta, delta_size, &src_copied, &literal_added)) {
+ free(delta);
return 0;
+ }
+ free(delta);
+
+ /* Extent of damage */
+ if (src->size + literal_added < src_copied)
+ delta_size = 0;
+ else
+ delta_size = (src->size - src_copied) + literal_added;
/*
* Now we will give some score to it. 100% edit gets 0 points
diff --git a/diffcore.h b/diffcore.h
index 981ee05..1f4b32c 100644
--- a/diffcore.h
+++ b/diffcore.h
@@ -12,8 +12,6 @@
#define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */
#define DEFAULT_BREAK_SCORE 59400 /* minimum for break to happen (99%)*/
-#define RENAME_DST_MATCHED 01
-
struct diff_filespec {
unsigned char sha1[20];
char *path;