path: root/diff.c
diff options
authorJunio C Hamano <>2005-06-03 08:40:28 (GMT)
committerLinus Torvalds <>2005-06-03 18:23:03 (GMT)
commiteeaa4603147974b988d7b958571628d6ecd697f3 (patch)
tree5a60f9dbaeae8bddd3dda7130b5f5af471c0991a /diff.c
parent0e3994fa97e9876b571531444b97ae6e63fd744d (diff)
[PATCH] diff: Update -B heuristics.
As Linus pointed out on the mailing list discussion, -B should break a files that has many inserts even if it still keeps enough of the original contents, so that the broken pieces can later be matched with other files by -M or -C. However, if such a broken pair does not get picked up by -M or -C, we would want to apply different criteria; namely, regardless of the amount of new material in the result, the determination of "rewrite" should be done by looking at the amount of original material still left in the result. If you still have the original 97 lines from a 100-line document, it does not matter if you add your own 13 lines to make a 110-line document, or if you add 903 lines to make a 1000-line document. It is not a rewrite but an in-place edit. On the other hand, if you did lose 97 lines from the original, it does not matter if you added 27 lines to make a 30-line document or if you added 997 lines to make a 1000-line document. You did a complete rewrite in either case. This patch introduces a post-processing phase that runs after diffcore-rename matches up broken pairs diffcore-break creates. The purpose of this post-processing is to pick up these broken pieces and merge them back into in-place modifications. For this, the score parameter -B option takes is changed into a pair of numbers, and it takes "-B99/80" format when fully spelled out. The first number is the minimum amount of "edit" (same definition as what diffcore-rename uses, which is "sum of deletion and insertion") that a modification needs to have to be broken, and the second number is the minimum amount of "delete" a surviving broken pair must have to avoid being merged back together. It can be abbreviated to "-B" to use default for both, "-B9" or "-B9/" to use 90% for "edit" but default (80%) for merge avoidance, or "-B/75" to use default (99%) "edit" and 75% for merge avoidance. Signed-off-by: Junio C Hamano <> Signed-off-by: Linus Torvalds <>
Diffstat (limited to 'diff.c')
1 files changed, 16 insertions, 2 deletions
diff --git a/diff.c b/diff.c
index 7ccc1ed..315eb5c 100644
--- a/diff.c
+++ b/diff.c
@@ -614,7 +614,7 @@ static int parse_num(const char **cp_p)
int diff_scoreopt_parse(const char *opt)
- int opt1, cmd;
+ int opt1, opt2, cmd;
if (*opt++ != '-')
return -1;
@@ -623,9 +623,21 @@ int diff_scoreopt_parse(const char *opt)
return -1; /* that is not a -M, -C nor -B option */
opt1 = parse_num(&opt);
+ if (cmd != 'B')
+ opt2 = 0;
+ else {
+ if (*opt == 0)
+ opt2 = 0;
+ else if (*opt != '/')
+ return -1; /* we expect -B80/99 or -B80 */
+ else {
+ opt++;
+ opt2 = parse_num(&opt);
+ }
+ }
if (*opt != 0)
return -1;
- return opt1;
+ return opt1 | (opt2 << 16);
struct diff_queue_struct diff_queued_diff;
@@ -955,6 +967,8 @@ void diffcore_std(const char **paths,
if (detect_rename)
diffcore_rename(detect_rename, rename_score);
+ if (0 <= break_opt)
+ diffcore_merge_broken();
if (pickaxe)
diffcore_pickaxe(pickaxe, pickaxe_opts);
if (orderfile)