From 4d0f39cecf93222401a81216e5cf2528e8abc6ae Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sat, 4 Mar 2006 01:03:53 -0800 Subject: diffcore-break: similarity estimator fix. This is a companion patch to the previous fix to diffcore-rename. The merging-back process should use a logic similar to what is used there. Signed-off-by: Junio C Hamano diff --git a/diffcore-break.c b/diffcore-break.c index 0fc2b86..71ad58a 100644 --- a/diffcore-break.c +++ b/diffcore-break.c @@ -45,8 +45,8 @@ static int should_break(struct diff_filespec *src, * The value we return is 1 if we want the pair to be broken, * or 0 if we do not. */ - unsigned long delta_size, base_size, src_copied, literal_added; - int to_break = 0; + unsigned long delta_size, base_size, src_copied, literal_added, + src_removed; *merge_score_p = 0; /* assume no deletion --- "do not break" * is the default. @@ -72,33 +72,40 @@ static int should_break(struct diff_filespec *src, &src_copied, &literal_added)) return 0; + /* sanity */ + if (src->size < src_copied) + src_copied = src->size; + if (dst->size < literal_added + src_copied) { + if (src_copied < dst->size) + literal_added = dst->size - src_copied; + else + literal_added = 0; + } + src_removed = src->size - src_copied; + /* Compute merge-score, which is "how much is removed * from the source material". The clean-up stage will * merge the surviving pair together if the score is * less than the minimum, after rename/copy runs. */ - if (src->size <= src_copied) - ; /* all copied, nothing removed */ - else { - delta_size = src->size - src_copied; - *merge_score_p = delta_size * MAX_SCORE / src->size; - } - + *merge_score_p = src_removed * MAX_SCORE / src->size; + /* Extent of damage, which counts both inserts and * deletes. */ - if (src->size + literal_added <= src_copied) - delta_size = 0; /* avoid wrapping around */ - else - delta_size = (src->size - src_copied) + literal_added; - - /* We break if the edit exceeds the minimum. - * i.e. (break_score / MAX_SCORE < delta_size / base_size) + delta_size = src_removed + literal_added; + if (delta_size * MAX_SCORE / base_size < break_score) + return 0; + + /* If you removed a lot without adding new material, that is + * not really a rewrite. */ - if (break_score * base_size < delta_size * MAX_SCORE) - to_break = 1; + if ((src->size * break_score < src_removed * MAX_SCORE) && + (literal_added * 20 < src_removed) && + (literal_added * 20 < src_copied)) + return 0; - return to_break; + return 1; } void diffcore_break(int break_score) diff --git a/diffcore.h b/diffcore.h index dba4f17..d31b3b4 100644 --- a/diffcore.h +++ b/diffcore.h @@ -17,8 +17,8 @@ */ #define MAX_SCORE 60000.0 #define DEFAULT_RENAME_SCORE 30000 /* rename/copy similarity minimum (50%) */ -#define DEFAULT_BREAK_SCORE 30000 /* minimum for break to happen (50%)*/ -#define DEFAULT_MERGE_SCORE 48000 /* maximum for break-merge to happen (80%)*/ +#define DEFAULT_BREAK_SCORE 30000 /* minimum for break to happen (50%) */ +#define DEFAULT_MERGE_SCORE 36000 /* maximum for break-merge to happen 60%) */ #define MINIMUM_BREAK_SIZE 400 /* do not break a file smaller than this */ -- cgit v0.10.2-6-g49f6