From 25790be634afc3eb28a2ba8d0f6579aaab10bc27 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Mon, 16 Jul 2018 16:05:34 -0700 Subject: xdiff/xdiff.h: remove unused flags These flags were there since the beginning (3443546f6e (Use a *real* built-in diff generator, 2006-03-24), but were never used. Remove them. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h index c1937a2..2356da5 100644 --- a/xdiff/xdiff.h +++ b/xdiff/xdiff.h @@ -52,14 +52,6 @@ extern "C" { #define XDL_EMIT_FUNCNAMES (1 << 0) #define XDL_EMIT_FUNCCONTEXT (1 << 2) -#define XDL_MMB_READONLY (1 << 0) - -#define XDL_MMF_ATOMIC (1 << 0) - -#define XDL_BDOP_INS 1 -#define XDL_BDOP_CPY 2 -#define XDL_BDOP_INSB 3 - /* merge simplification levels */ #define XDL_MERGE_MINIMAL 0 #define XDL_MERGE_EAGER 1 -- cgit v0.10.2-6-g49f6 From 21c770b63ea7ddcb6e52527d82089068b33eb9f3 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Mon, 16 Jul 2018 16:05:35 -0700 Subject: xdiff/xdiffi.c: remove unneeded function declarations There is no need to forward-declare these functions, as they are used after their implementation only. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index 0de1ef4..3e8aff9 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -22,34 +22,17 @@ #include "xinclude.h" - - #define XDL_MAX_COST_MIN 256 #define XDL_HEUR_MIN_COST 256 #define XDL_LINE_MAX (long)((1UL << (CHAR_BIT * sizeof(long) - 1)) - 1) #define XDL_SNAKE_CNT 20 #define XDL_K_HEUR 4 - - typedef struct s_xdpsplit { long i1, i2; int min_lo, min_hi; } xdpsplit_t; - - - -static long xdl_split(unsigned long const *ha1, long off1, long lim1, - unsigned long const *ha2, long off2, long lim2, - long *kvdf, long *kvdb, int need_min, xdpsplit_t *spl, - xdalgoenv_t *xenv); -static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, long chg2); - - - - - /* * See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers. * Basically considers a "box" (off1, off2, lim1, lim2) and scan from both -- cgit v0.10.2-6-g49f6 From 74cfa7bed96ea003e0a3ea20455096187c517b19 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Mon, 16 Jul 2018 16:05:36 -0700 Subject: t4015: avoid git as a pipe input In t4015 we have a pattern of git diff [] | grep -v "index" | test_decode_color >actual && to produce output that we want to test against. This pattern was introduced in 86b452e2769 (diff.c: add dimming to moved line detection, 2017-06-30) as then the focus on getting the colors right. However the pattern used is not best practice as we do care about the exit code of Git. So let's not have Git as the upstream of a pipe. Piping the output of grep to some function is fine as we assume grep to be un-flawed in our test suite. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index 17df491..ddbc390 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -1271,9 +1271,8 @@ test_expect_success 'detect permutations inside moved code -- dimmed_zebra' ' test_config color.diff.newMovedDimmed "normal cyan" && test_config color.diff.oldMovedAlternativeDimmed "normal blue" && test_config color.diff.newMovedAlternativeDimmed "normal yellow" && - git diff HEAD --no-renames --color-moved=dimmed_zebra --color | - grep -v "index" | - test_decode_color >actual && + git diff HEAD --no-renames --color-moved=dimmed_zebra --color >actual.raw && + grep -v "index" actual.raw | test_decode_color >actual && cat <<-\EOF >expected && diff --git a/lines.txt b/lines.txt --- a/lines.txt @@ -1315,9 +1314,8 @@ test_expect_success 'cmd option assumes configured colored-moved' ' test_config color.diff.oldMovedAlternativeDimmed "normal blue" && test_config color.diff.newMovedAlternativeDimmed "normal yellow" && test_config diff.colorMoved zebra && - git diff HEAD --no-renames --color-moved --color | - grep -v "index" | - test_decode_color >actual && + git diff HEAD --no-renames --color-moved --color >actual.raw && + grep -v "index" actual.raw | test_decode_color >actual && cat <<-\EOF >expected && diff --git a/lines.txt b/lines.txt --- a/lines.txt @@ -1395,9 +1393,8 @@ test_expect_success 'move detection ignoring whitespace ' ' line 4 line 5 EOF - git diff HEAD --no-renames --color-moved --color | - grep -v "index" | - test_decode_color >actual && + git diff HEAD --no-renames --color-moved --color >actual.raw && + grep -v "index" actual.raw | test_decode_color >actual && cat <<-\EOF >expected && diff --git a/lines.txt b/lines.txt --- a/lines.txt @@ -1419,9 +1416,8 @@ test_expect_success 'move detection ignoring whitespace ' ' EOF test_cmp expected actual && - git diff HEAD --no-renames -w --color-moved --color | - grep -v "index" | - test_decode_color >actual && + git diff HEAD --no-renames -w --color-moved --color >actual.raw && + grep -v "index" actual.raw | test_decode_color >actual && cat <<-\EOF >expected && diff --git a/lines.txt b/lines.txt --- a/lines.txt @@ -1459,9 +1455,8 @@ test_expect_success 'move detection ignoring whitespace changes' ' line 5 EOF - git diff HEAD --no-renames --color-moved --color | - grep -v "index" | - test_decode_color >actual && + git diff HEAD --no-renames --color-moved --color >actual.raw && + grep -v "index" actual.raw | test_decode_color >actual && cat <<-\EOF >expected && diff --git a/lines.txt b/lines.txt --- a/lines.txt @@ -1483,9 +1478,8 @@ test_expect_success 'move detection ignoring whitespace changes' ' EOF test_cmp expected actual && - git diff HEAD --no-renames -b --color-moved --color | - grep -v "index" | - test_decode_color >actual && + git diff HEAD --no-renames -b --color-moved --color >actual.raw && + grep -v "index" actual.raw | test_decode_color >actual && cat <<-\EOF >expected && diff --git a/lines.txt b/lines.txt --- a/lines.txt @@ -1526,9 +1520,8 @@ test_expect_success 'move detection ignoring whitespace at eol' ' # avoid cluttering the output with complaints about our eol whitespace test_config core.whitespace -blank-at-eol && - git diff HEAD --no-renames --color-moved --color | - grep -v "index" | - test_decode_color >actual && + git diff HEAD --no-renames --color-moved --color >actual.raw && + grep -v "index" actual.raw | test_decode_color >actual && cat <<-\EOF >expected && diff --git a/lines.txt b/lines.txt --- a/lines.txt @@ -1550,9 +1543,8 @@ test_expect_success 'move detection ignoring whitespace at eol' ' EOF test_cmp expected actual && - git diff HEAD --no-renames --ignore-space-at-eol --color-moved --color | - grep -v "index" | - test_decode_color >actual && + git diff HEAD --no-renames --ignore-space-at-eol --color-moved --color >actual.raw && + grep -v "index" actual.raw | test_decode_color >actual && cat <<-\EOF >expected && diff --git a/lines.txt b/lines.txt --- a/lines.txt @@ -1597,9 +1589,8 @@ test_expect_success '--color-moved block at end of diff output respects MIN_ALNU irrelevant_line EOF - git diff HEAD --color-moved=zebra --color --no-renames | - grep -v "index" | - test_decode_color >actual && + git diff HEAD --color-moved=zebra --color --no-renames >actual.raw && + grep -v "index" actual.raw | test_decode_color >actual && cat >expected <<-\EOF && diff --git a/bar b/bar --- a/bar @@ -1636,9 +1627,8 @@ test_expect_success '--color-moved respects MIN_ALNUM_COUNT' ' nineteen chars 456789 EOF - git diff HEAD --color-moved=zebra --color --no-renames | - grep -v "index" | - test_decode_color >actual && + git diff HEAD --color-moved=zebra --color --no-renames >actual.raw && + grep -v "index" actual.raw | test_decode_color >actual && cat >expected <<-\EOF && diff --git a/bar b/bar --- a/bar -- cgit v0.10.2-6-g49f6 From 3783aad4c8e08ed7905bada47dc7b5df94bfa74a Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Mon, 16 Jul 2018 16:05:37 -0700 Subject: diff.c: do not pass diff options as keydata to hashmap When we initialize the hashmap, we give it a pointer to the diff_options, which it then passes along to each call of the hashmap_cmp_fn function. There's no need to pass it a second time as the "keydata" parameter, and our comparison functions never look at keydata. This was a mistake left over from an earlier round of 2e2d5ac184 (diff.c: color moved lines differently, 2017-06-30), before hashmap learned to pass the data pointer for us. Explanation-by: Jeff King Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/diff.c b/diff.c index 1289df4..ce7bedc 100644 --- a/diff.c +++ b/diff.c @@ -842,13 +842,13 @@ static void mark_color_as_moved(struct diff_options *o, case DIFF_SYMBOL_PLUS: hm = del_lines; key = prepare_entry(o, n); - match = hashmap_get(hm, key, o); + match = hashmap_get(hm, key, NULL); free(key); break; case DIFF_SYMBOL_MINUS: hm = add_lines; key = prepare_entry(o, n); - match = hashmap_get(hm, key, o); + match = hashmap_get(hm, key, NULL); free(key); break; default: -- cgit v0.10.2-6-g49f6 From ee1df66f7cfd0f37bb0e235c4f7ef12bd09a8d2d Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Mon, 16 Jul 2018 16:05:38 -0700 Subject: diff.c: adjust hash function signature to match hashmap expectation This makes the follow up patch easier. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/diff.c b/diff.c index ce7bedc..d1bae90 100644 --- a/diff.c +++ b/diff.c @@ -707,11 +707,15 @@ struct moved_entry { struct moved_entry *next_line; }; -static int moved_entry_cmp(const struct diff_options *diffopt, - const struct moved_entry *a, - const struct moved_entry *b, +static int moved_entry_cmp(const void *hashmap_cmp_fn_data, + const void *entry, + const void *entry_or_key, const void *keydata) { + const struct diff_options *diffopt = hashmap_cmp_fn_data; + const struct moved_entry *a = entry; + const struct moved_entry *b = entry_or_key; + return !xdiff_compare_lines(a->es->line, a->es->len, b->es->line, b->es->len, diffopt->xdl_opts); @@ -5534,10 +5538,8 @@ static void diff_flush_patch_all_file_pairs(struct diff_options *o) if (o->color_moved) { struct hashmap add_lines, del_lines; - hashmap_init(&del_lines, - (hashmap_cmp_fn)moved_entry_cmp, o, 0); - hashmap_init(&add_lines, - (hashmap_cmp_fn)moved_entry_cmp, o, 0); + hashmap_init(&del_lines, moved_entry_cmp, o, 0); + hashmap_init(&add_lines, moved_entry_cmp, o, 0); add_lines_to_move_detection(o, &add_lines, &del_lines); mark_color_as_moved(o, &add_lines, &del_lines); -- cgit v0.10.2-6-g49f6 From 51da15eb2301667c6ac01dd18851b7a424fbf2b1 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Mon, 16 Jul 2018 16:05:39 -0700 Subject: diff.c: add a blocks mode for moved code detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new "blocks" mode provides a middle ground between plain and zebra. It is as intuitive (few colors) as plain, but still has the requirement for a minimum of lines/characters to count a block as moved. Suggested-by: Ævar Arnfjörð Bjarmason (https://public-inbox.org/git/87o9j0uljo.fsf@evledraar.gmail.com/) Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index e3a44f0..ba56169 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -276,10 +276,14 @@ plain:: that are added somewhere else in the diff. This mode picks up any moved line, but it is not very useful in a review to determine if a block of code was moved without permutation. -zebra:: +blocks:: Blocks of moved text of at least 20 alphanumeric characters are detected greedily. The detected blocks are - painted using either the 'color.diff.{old,new}Moved' color or + painted using either the 'color.diff.{old,new}Moved' color. + Adjacent blocks cannot be told apart. +zebra:: + Blocks of moved text are detected as in 'blocks' mode. The blocks + are painted using either the 'color.diff.{old,new}Moved' color or 'color.diff.{old,new}MovedAlternative'. The change between the two colors indicates that a new block was detected. dimmed_zebra:: diff --git a/diff.c b/diff.c index d1bae90..95c51c0 100644 --- a/diff.c +++ b/diff.c @@ -271,6 +271,8 @@ static int parse_color_moved(const char *arg) return COLOR_MOVED_NO; else if (!strcmp(arg, "plain")) return COLOR_MOVED_PLAIN; + else if (!strcmp(arg, "blocks")) + return COLOR_MOVED_BLOCKS; else if (!strcmp(arg, "zebra")) return COLOR_MOVED_ZEBRA; else if (!strcmp(arg, "default")) @@ -278,7 +280,7 @@ static int parse_color_moved(const char *arg) else if (!strcmp(arg, "dimmed_zebra")) return COLOR_MOVED_ZEBRA_DIM; else - return error(_("color moved setting must be one of 'no', 'default', 'zebra', 'dimmed_zebra', 'plain'")); + return error(_("color moved setting must be one of 'no', 'default', 'blocks', 'zebra', 'dimmed_zebra', 'plain'")); } int git_diff_ui_config(const char *var, const char *value, void *cb) @@ -903,7 +905,7 @@ static void mark_color_as_moved(struct diff_options *o, block_length++; - if (flipped_block) + if (flipped_block && o->color_moved != COLOR_MOVED_BLOCKS) l->flags |= DIFF_SYMBOL_MOVED_LINE_ALT; } adjust_last_block(o, n, block_length); diff --git a/diff.h b/diff.h index d29560f..7bd4f18 100644 --- a/diff.h +++ b/diff.h @@ -208,8 +208,9 @@ struct diff_options { enum { COLOR_MOVED_NO = 0, COLOR_MOVED_PLAIN = 1, - COLOR_MOVED_ZEBRA = 2, - COLOR_MOVED_ZEBRA_DIM = 3, + COLOR_MOVED_BLOCKS = 2, + COLOR_MOVED_ZEBRA = 3, + COLOR_MOVED_ZEBRA_DIM = 4, } color_moved; #define COLOR_MOVED_DEFAULT COLOR_MOVED_ZEBRA #define COLOR_MOVED_MIN_ALNUM_COUNT 20 diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index ddbc390..e54529f 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -1223,7 +1223,7 @@ test_expect_success 'plain moved code, inside file' ' test_cmp expected actual ' -test_expect_success 'detect permutations inside moved code -- dimmed_zebra' ' +test_expect_success 'detect blocks of moved code' ' git reset --hard && cat <<-\EOF >lines.txt && long line 1 @@ -1271,6 +1271,50 @@ test_expect_success 'detect permutations inside moved code -- dimmed_zebra' ' test_config color.diff.newMovedDimmed "normal cyan" && test_config color.diff.oldMovedAlternativeDimmed "normal blue" && test_config color.diff.newMovedAlternativeDimmed "normal yellow" && + git diff HEAD --no-renames --color-moved=blocks --color >actual.raw && + grep -v "index" actual.raw | test_decode_color >actual && + cat <<-\EOF >expected && + diff --git a/lines.txt b/lines.txt + --- a/lines.txt + +++ b/lines.txt + @@ -1,16 +1,16 @@ + -long line 1 + -long line 2 + -long line 3 + line 4 + line 5 + line 6 + line 7 + line 8 + line 9 + +long line 1 + +long line 2 + +long line 3 + +long line 14 + +long line 15 + +long line 16 + line 10 + line 11 + line 12 + line 13 + -long line 14 + -long line 15 + -long line 16 + EOF + test_cmp expected actual + +' + +test_expect_success 'detect permutations inside moved code -- dimmed_zebra' ' + # reuse setup from test before! + test_config color.diff.oldMoved "magenta" && + test_config color.diff.newMoved "cyan" && + test_config color.diff.oldMovedAlternative "blue" && + test_config color.diff.newMovedAlternative "yellow" && + test_config color.diff.oldMovedDimmed "normal magenta" && + test_config color.diff.newMovedDimmed "normal cyan" && + test_config color.diff.oldMovedAlternativeDimmed "normal blue" && + test_config color.diff.newMovedAlternativeDimmed "normal yellow" && git diff HEAD --no-renames --color-moved=dimmed_zebra --color >actual.raw && grep -v "index" actual.raw | test_decode_color >actual && cat <<-\EOF >expected && @@ -1669,7 +1713,8 @@ test_expect_success '--color-moved treats adjacent blocks as separate for MIN_AL 7charsA EOF - git diff HEAD --color-moved=zebra --color --no-renames | grep -v "index" | test_decode_color >actual && + git diff HEAD --color-moved=zebra --color --no-renames >actual.raw && + grep -v "index" actual.raw | test_decode_color >actual && cat >expected <<-\EOF && diff --git a/bar b/bar --- a/bar -- cgit v0.10.2-6-g49f6 From b3095712f9d81572c3658d579e50db907b72f46d Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Mon, 16 Jul 2018 16:05:40 -0700 Subject: diff.c: decouple white space treatment from move detection algorithm In the original implementation of the move detection logic the choice for ignoring white space changes is the same for the move detection as it is for the regular diff. Some cases came up where different treatment would have been nice. Allow the user to specify that white space should be ignored differently during detection of moved lines than during generation of added and removed lines. This is done by providing analogs to the --ignore-space-at-eol, -b, and -w options by introducing the option --color-moved-ws= with the modes named "ignore-space-at-eol", "ignore-space-change" and "ignore-all-space", which is used only during the move detection phase. As we change the default, we'll adjust the tests. For now we do not infer any options to treat white spaces in the move detection from the generic white space options given to diff. This can be tuned later to reasonable default. As we plan on adding more white space related options in a later patch, that interferes with the current white space options, use a flag field and clamp it down to XDF_WHITESPACE_FLAGS, as that (a) allows to easily check at parse time if we give invalid combinations and (b) can reuse parts of this patch. By having the white space treatment in its own option, we'll also make it easier for a later patch to have an config option for spaces in the move detection. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index ba56169..80e29e3 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -292,6 +292,23 @@ dimmed_zebra:: blocks are considered interesting, the rest is uninteresting. -- +--color-moved-ws=:: + This configures how white spaces are ignored when performing the + move detection for `--color-moved`. These modes can be given + as a comma separated list: ++ +-- +ignore-space-at-eol:: + Ignore changes in whitespace at EOL. +ignore-space-change:: + Ignore changes in amount of whitespace. This ignores whitespace + at line end, and considers all other sequences of one or + more whitespace characters to be equivalent. +ignore-all-space:: + Ignore whitespace when comparing lines. This ignores differences + even if one line has whitespace where the other line has none. +-- + --word-diff[=]:: Show a word diff, using the to delimit changed words. By default, words are delimited by whitespace; see diff --git a/diff.c b/diff.c index 95c51c0..70eeb40 100644 --- a/diff.c +++ b/diff.c @@ -283,6 +283,36 @@ static int parse_color_moved(const char *arg) return error(_("color moved setting must be one of 'no', 'default', 'blocks', 'zebra', 'dimmed_zebra', 'plain'")); } +static int parse_color_moved_ws(const char *arg) +{ + int ret = 0; + struct string_list l = STRING_LIST_INIT_DUP; + struct string_list_item *i; + + string_list_split(&l, arg, ',', -1); + + for_each_string_list_item(i, &l) { + struct strbuf sb = STRBUF_INIT; + strbuf_addstr(&sb, i->string); + strbuf_trim(&sb); + + if (!strcmp(sb.buf, "ignore-space-change")) + ret |= XDF_IGNORE_WHITESPACE_CHANGE; + else if (!strcmp(sb.buf, "ignore-space-at-eol")) + ret |= XDF_IGNORE_WHITESPACE_AT_EOL; + else if (!strcmp(sb.buf, "ignore-all-space")) + ret |= XDF_IGNORE_WHITESPACE; + else + error(_("ignoring unknown color-moved-ws mode '%s'"), sb.buf); + + strbuf_release(&sb); + } + + string_list_clear(&l, 0); + + return ret; +} + int git_diff_ui_config(const char *var, const char *value, void *cb) { if (!strcmp(var, "diff.color") || !strcmp(var, "color.diff")) { @@ -717,10 +747,12 @@ static int moved_entry_cmp(const void *hashmap_cmp_fn_data, const struct diff_options *diffopt = hashmap_cmp_fn_data; const struct moved_entry *a = entry; const struct moved_entry *b = entry_or_key; + unsigned flags = diffopt->color_moved_ws_handling + & XDF_WHITESPACE_FLAGS; return !xdiff_compare_lines(a->es->line, a->es->len, b->es->line, b->es->len, - diffopt->xdl_opts); + flags); } static struct moved_entry *prepare_entry(struct diff_options *o, @@ -728,8 +760,9 @@ static struct moved_entry *prepare_entry(struct diff_options *o, { struct moved_entry *ret = xmalloc(sizeof(*ret)); struct emitted_diff_symbol *l = &o->emitted_symbols->buf[line_no]; + unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS; - ret->ent.hash = xdiff_hash_string(l->line, l->len, o->xdl_opts); + ret->ent.hash = xdiff_hash_string(l->line, l->len, flags); ret->es = l; ret->next_line = NULL; @@ -4710,6 +4743,8 @@ int diff_opt_parse(struct diff_options *options, if (cm < 0) die("bad --color-moved argument: %s", arg); options->color_moved = cm; + } else if (skip_prefix(arg, "--color-moved-ws=", &arg)) { + options->color_moved_ws_handling = parse_color_moved_ws(arg); } else if (skip_to_optional_arg_default(arg, "--color-words", &options->word_regex, NULL)) { options->use_color = 1; options->word_diff = DIFF_WORDS_COLOR; diff --git a/diff.h b/diff.h index 7bd4f18..de5dc68 100644 --- a/diff.h +++ b/diff.h @@ -214,6 +214,7 @@ struct diff_options { } color_moved; #define COLOR_MOVED_DEFAULT COLOR_MOVED_ZEBRA #define COLOR_MOVED_MIN_ALNUM_COUNT 20 + int color_moved_ws_handling; }; void diff_emit_submodule_del(struct diff_options *o, const char *line); diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index e54529f..000c3a2 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -1460,7 +1460,8 @@ test_expect_success 'move detection ignoring whitespace ' ' EOF test_cmp expected actual && - git diff HEAD --no-renames -w --color-moved --color >actual.raw && + git diff HEAD --no-renames --color-moved --color \ + --color-moved-ws=ignore-all-space >actual.raw && grep -v "index" actual.raw | test_decode_color >actual && cat <<-\EOF >expected && diff --git a/lines.txt b/lines.txt @@ -1522,7 +1523,8 @@ test_expect_success 'move detection ignoring whitespace changes' ' EOF test_cmp expected actual && - git diff HEAD --no-renames -b --color-moved --color >actual.raw && + git diff HEAD --no-renames --color-moved --color \ + --color-moved-ws=ignore-space-change >actual.raw && grep -v "index" actual.raw | test_decode_color >actual && cat <<-\EOF >expected && diff --git a/lines.txt b/lines.txt @@ -1587,7 +1589,8 @@ test_expect_success 'move detection ignoring whitespace at eol' ' EOF test_cmp expected actual && - git diff HEAD --no-renames --ignore-space-at-eol --color-moved --color >actual.raw && + git diff HEAD --no-renames --color-moved --color \ + --color-moved-ws=ignore-space-at-eol >actual.raw && grep -v "index" actual.raw | test_decode_color >actual && cat <<-\EOF >expected && diff --git a/lines.txt b/lines.txt @@ -1757,7 +1760,58 @@ test_expect_success 'move detection with submodules' ' # nor did we mess with it another way git diff --submodule=diff --color | test_decode_color >expect && - test_cmp expect decoded_actual + test_cmp expect decoded_actual && + rm -rf bananas && + git submodule deinit bananas +' + +test_expect_success 'only move detection ignores white spaces' ' + git reset --hard && + q_to_tab <<-\EOF >text.txt && + a long line to exceed per-line minimum + another long line to exceed per-line minimum + original file + EOF + git add text.txt && + git commit -m "add text" && + q_to_tab <<-\EOF >text.txt && + Qa long line to exceed per-line minimum + Qanother long line to exceed per-line minimum + new file + EOF + + # Make sure we get a different diff using -w + git diff --color --color-moved -w >actual.raw && + grep -v "index" actual.raw | test_decode_color >actual && + q_to_tab <<-\EOF >expected && + diff --git a/text.txt b/text.txt + --- a/text.txt + +++ b/text.txt + @@ -1,3 +1,3 @@ + Qa long line to exceed per-line minimum + Qanother long line to exceed per-line minimum + -original file + +new file + EOF + test_cmp expected actual && + + # And now ignoring white space only in the move detection + git diff --color --color-moved \ + --color-moved-ws=ignore-all-space,ignore-space-change,ignore-space-at-eol >actual.raw && + grep -v "index" actual.raw | test_decode_color >actual && + q_to_tab <<-\EOF >expected && + diff --git a/text.txt b/text.txt + --- a/text.txt + +++ b/text.txt + @@ -1,3 +1,3 @@ + -a long line to exceed per-line minimum + -another long line to exceed per-line minimum + -original file + +Qa long line to exceed per-line minimum + +Qanother long line to exceed per-line minimum + +new file + EOF + test_cmp expected actual ' test_done -- cgit v0.10.2-6-g49f6 From e2fe6abc3b8cc78f8dd3dac938bcd63e918e647f Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Mon, 16 Jul 2018 16:05:41 -0700 Subject: diff.c: factor advance_or_nullify out of mark_color_as_moved This moves the part of code that checks if we're still in a block into its own function. We'll need a different approach on advancing the blocks in a later patch, so having it as a separate function will prove useful. While at it rename the variable `p` to `prev` to indicate that it refers to the previous line. This is as pmb[i] was assigned in the last iteration of the outmost for loop. Further rename `pnext` to `cur` to indicate that this should match up with the current line of the outmost for loop. Also replace the advancement of pmb[i] to reuse `cur` instead of using `p->next` (which is how the name for pnext could be explained. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/diff.c b/diff.c index 70eeb40..4963819 100644 --- a/diff.c +++ b/diff.c @@ -801,6 +801,25 @@ static void add_lines_to_move_detection(struct diff_options *o, } } +static void pmb_advance_or_null(struct diff_options *o, + struct moved_entry *match, + struct hashmap *hm, + struct moved_entry **pmb, + int pmb_nr) +{ + int i; + for (i = 0; i < pmb_nr; i++) { + struct moved_entry *prev = pmb[i]; + struct moved_entry *cur = (prev && prev->next_line) ? + prev->next_line : NULL; + if (cur && !hm->cmpfn(o, cur, match, NULL)) { + pmb[i] = cur; + } else { + pmb[i] = NULL; + } + } +} + static int shrink_potential_moved_blocks(struct moved_entry **pmb, int pmb_nr) { @@ -875,7 +894,6 @@ static void mark_color_as_moved(struct diff_options *o, struct moved_entry *key; struct moved_entry *match = NULL; struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n]; - int i; switch (l->s) { case DIFF_SYMBOL_PLUS: @@ -906,17 +924,7 @@ static void mark_color_as_moved(struct diff_options *o, if (o->color_moved == COLOR_MOVED_PLAIN) continue; - /* Check any potential block runs, advance each or nullify */ - for (i = 0; i < pmb_nr; i++) { - struct moved_entry *p = pmb[i]; - struct moved_entry *pnext = (p && p->next_line) ? - p->next_line : NULL; - if (pnext && !hm->cmpfn(o, pnext, match, NULL)) { - pmb[i] = p->next_line; - } else { - pmb[i] = NULL; - } - } + pmb_advance_or_null(o, match, hm, pmb, pmb_nr); pmb_nr = shrink_potential_moved_blocks(pmb, pmb_nr); -- cgit v0.10.2-6-g49f6 From ca1f4ae4dfade677647928d28728a0cad125981d Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Wed, 18 Jul 2018 12:31:55 -0700 Subject: diff.c: add white space mode to move detection that allows indent changes The option of --color-moved has proven to be useful as observed on the mailing list. However when refactoring sometimes the indentation changes, for example when partitioning a functions into smaller helper functions the code usually mostly moved around except for a decrease in indentation. To just review the moved code ignoring the change in indentation, a mode to ignore spaces in the move detection as implemented in a previous patch would be enough. However the whole move coloring as motivated in commit 2e2d5ac (diff.c: color moved lines differently, 2017-06-30), brought up the notion of the reviewer being able to trust the move of a "block". As there are languages such as python, which depend on proper relative indentation for the control flow of the program, ignoring any white space change in a block would not uphold the promises of 2e2d5ac that allows reviewers to pay less attention to the inside of a block, as inside the reviewer wants to assume the same program flow. This new mode of white space ignorance will take this into account and will only allow the same white space changes per line in each block. This patch even allows only for the same change at the beginning of the lines. As this is a white space mode, it is made exclusive to other white space modes in the move detection. This patch brings some challenges, related to the detection of blocks. We need a wide net to catch the possible moved lines, but then need to narrow down to check if the blocks are still intact. Consider this example (ignoring block sizes): - A - B - C + A + B + C At the beginning of a block when checking if there is a counterpart for A, we have to ignore all space changes. However at the following lines we have to check if the indent change stayed the same. Checking if the indentation change did stay the same, is done by computing the indentation change by the difference in line length, and then assume the change is only in the beginning of the longer line, the common tail is the same. That is why the test contains lines like: - A ... + A ... As the first line starting a block is caught using a compare function that ignores white spaces unlike the rest of the block, where the white space delta is taken into account for the comparison, we also have to think about the following situation: - A - B - A - B + A + B + A + B When checking if the first A (both in the + and - lines) is a start of a block, we have to check all 'A' and record all the white space deltas such that we can find the example above to be just one block that is indented. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index 80e29e3..143acd9 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -307,6 +307,11 @@ ignore-space-change:: ignore-all-space:: Ignore whitespace when comparing lines. This ignores differences even if one line has whitespace where the other line has none. +allow-indentation-change:: + Initially ignore any white spaces in the move detection, then + group the moved code blocks only into a block if the change in + whitespace is the same per line. This is incompatible with the + other modes. -- --word-diff[=]:: diff --git a/diff.c b/diff.c index 4963819..7810a47 100644 --- a/diff.c +++ b/diff.c @@ -302,12 +302,18 @@ static int parse_color_moved_ws(const char *arg) ret |= XDF_IGNORE_WHITESPACE_AT_EOL; else if (!strcmp(sb.buf, "ignore-all-space")) ret |= XDF_IGNORE_WHITESPACE; + else if (!strcmp(sb.buf, "allow-indentation-change")) + ret |= COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE; else error(_("ignoring unknown color-moved-ws mode '%s'"), sb.buf); strbuf_release(&sb); } + if ((ret & COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) && + (ret & XDF_WHITESPACE_FLAGS)) + die(_("color-moved-ws: allow-indentation-change cannot be combined with other white space modes")); + string_list_clear(&l, 0); return ret; @@ -737,7 +743,91 @@ struct moved_entry { struct hashmap_entry ent; const struct emitted_diff_symbol *es; struct moved_entry *next_line; + struct ws_delta *wsd; +}; + +/** + * The struct ws_delta holds white space differences between moved lines, i.e. + * between '+' and '-' lines that have been detected to be a move. + * The string contains the difference in leading white spaces, before the + * rest of the line is compared using the white space config for move + * coloring. The current_longer indicates if the first string in the + * comparision is longer than the second. + */ +struct ws_delta { + char *string; + unsigned int current_longer : 1; }; +#define WS_DELTA_INIT { NULL, 0 } + +static int compute_ws_delta(const struct emitted_diff_symbol *a, + const struct emitted_diff_symbol *b, + struct ws_delta *out) +{ + const struct emitted_diff_symbol *longer = a->len > b->len ? a : b; + const struct emitted_diff_symbol *shorter = a->len > b->len ? b : a; + int d = longer->len - shorter->len; + + out->string = xmemdupz(longer->line, d); + out->current_longer = (a == longer); + + return !strncmp(longer->line + d, shorter->line, shorter->len); +} + +static int cmp_in_block_with_wsd(const struct diff_options *o, + const struct moved_entry *cur, + const struct moved_entry *match, + struct moved_entry *pmb, + int n) +{ + struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n]; + int al = cur->es->len, cl = l->len; + const char *a = cur->es->line, + *b = match->es->line, + *c = l->line; + + int wslen; + + /* + * We need to check if 'cur' is equal to 'match'. + * As those are from the same (+/-) side, we do not need to adjust for + * indent changes. However these were found using fuzzy matching + * so we do have to check if they are equal. + */ + if (strcmp(a, b)) + return 1; + + if (!pmb->wsd) + /* + * No white space delta was carried forward? This can happen + * when we exit early in this function and do not carry + * forward ws. + */ + return 1; + + /* + * The indent changes of the block are known and carried forward in + * pmb->wsd; however we need to check if the indent changes of the + * current line are still the same as before. + * + * To do so we need to compare 'l' to 'cur', adjusting the + * one of them for the white spaces, depending which was longer. + */ + + wslen = strlen(pmb->wsd->string); + if (pmb->wsd->current_longer) { + c += wslen; + cl -= wslen; + } else { + a += wslen; + al -= wslen; + } + + if (strcmp(a, c)) + return 1; + + return 0; +} static int moved_entry_cmp(const void *hashmap_cmp_fn_data, const void *entry, @@ -750,6 +840,16 @@ static int moved_entry_cmp(const void *hashmap_cmp_fn_data, unsigned flags = diffopt->color_moved_ws_handling & XDF_WHITESPACE_FLAGS; + if (diffopt->color_moved_ws_handling & + COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) + /* + * As there is not specific white space config given, + * we'd need to check for a new block, so ignore all + * white space. The setup of the white space + * configuration for the next block is done else where + */ + flags |= XDF_IGNORE_WHITESPACE; + return !xdiff_compare_lines(a->es->line, a->es->len, b->es->line, b->es->len, flags); @@ -765,6 +865,7 @@ static struct moved_entry *prepare_entry(struct diff_options *o, ret->ent.hash = xdiff_hash_string(l->line, l->len, flags); ret->es = l; ret->next_line = NULL; + ret->wsd = NULL; return ret; } @@ -820,6 +921,37 @@ static void pmb_advance_or_null(struct diff_options *o, } } +static void pmb_advance_or_null_multi_match(struct diff_options *o, + struct moved_entry *match, + struct hashmap *hm, + struct moved_entry **pmb, + int pmb_nr, int n) +{ + int i; + char *got_match = xcalloc(1, pmb_nr); + + for (; match; match = hashmap_get_next(hm, match)) { + for (i = 0; i < pmb_nr; i++) { + struct moved_entry *prev = pmb[i]; + struct moved_entry *cur = (prev && prev->next_line) ? + prev->next_line : NULL; + if (!cur) + continue; + if (!cmp_in_block_with_wsd(o, cur, match, pmb[i], n)) + got_match[i] |= 1; + } + } + + for (i = 0; i < pmb_nr; i++) { + if (got_match[i]) { + /* Carry the white space delta forward */ + pmb[i]->next_line->wsd = pmb[i]->wsd; + pmb[i] = pmb[i]->next_line; + } else + pmb[i] = NULL; + } +} + static int shrink_potential_moved_blocks(struct moved_entry **pmb, int pmb_nr) { @@ -837,6 +969,10 @@ static int shrink_potential_moved_blocks(struct moved_entry **pmb, if (lp < pmb_nr && rp > -1 && lp < rp) { pmb[lp] = pmb[rp]; + if (pmb[rp]->wsd) { + free(pmb[rp]->wsd->string); + FREE_AND_NULL(pmb[rp]->wsd); + } pmb[rp] = NULL; rp--; lp++; @@ -924,7 +1060,11 @@ static void mark_color_as_moved(struct diff_options *o, if (o->color_moved == COLOR_MOVED_PLAIN) continue; - pmb_advance_or_null(o, match, hm, pmb, pmb_nr); + if (o->color_moved_ws_handling & + COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) + pmb_advance_or_null_multi_match(o, match, hm, pmb, pmb_nr, n); + else + pmb_advance_or_null(o, match, hm, pmb, pmb_nr); pmb_nr = shrink_potential_moved_blocks(pmb, pmb_nr); @@ -935,7 +1075,17 @@ static void mark_color_as_moved(struct diff_options *o, */ for (; match; match = hashmap_get_next(hm, match)) { ALLOC_GROW(pmb, pmb_nr + 1, pmb_alloc); - pmb[pmb_nr++] = match; + if (o->color_moved_ws_handling & + COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) { + struct ws_delta *wsd = xmalloc(sizeof(*match->wsd)); + if (compute_ws_delta(l, match->es, wsd)) { + match->wsd = wsd; + pmb[pmb_nr++] = match; + } else + free(wsd); + } else { + pmb[pmb_nr++] = match; + } } flipped_block = (flipped_block + 1) % 2; @@ -5583,6 +5733,10 @@ static void diff_flush_patch_all_file_pairs(struct diff_options *o) if (o->color_moved) { struct hashmap add_lines, del_lines; + if (o->color_moved_ws_handling & + COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) + o->color_moved_ws_handling |= XDF_IGNORE_WHITESPACE; + hashmap_init(&del_lines, moved_entry_cmp, o, 0); hashmap_init(&add_lines, moved_entry_cmp, o, 0); diff --git a/diff.h b/diff.h index de5dc68..5e6bcf0 100644 --- a/diff.h +++ b/diff.h @@ -214,6 +214,9 @@ struct diff_options { } color_moved; #define COLOR_MOVED_DEFAULT COLOR_MOVED_ZEBRA #define COLOR_MOVED_MIN_ALNUM_COUNT 20 + + /* XDF_WHITESPACE_FLAGS regarding block detection are set at 2, 3, 4 */ + #define COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE (1<<5) int color_moved_ws_handling; }; diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index 000c3a2..41facf7 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -1814,4 +1814,92 @@ test_expect_success 'only move detection ignores white spaces' ' test_cmp expected actual ' +test_expect_success 'compare whitespace delta across moved blocks' ' + + git reset --hard && + q_to_tab <<-\EOF >text.txt && + QIndented + QText across + Qsome lines + QBut! <- this stands out + QAdjusting with + QQdifferent starting + Qwhite spaces + QAnother outlier + QQQIndented + QQQText across + QQQfive lines + QQQthat has similar lines + QQQto previous blocks, but with different indent + QQQYetQAnotherQoutlierQ + EOF + + git add text.txt && + git commit -m "add text.txt" && + + q_to_tab <<-\EOF >text.txt && + QQIndented + QQText across + QQsome lines + QQQBut! <- this stands out + Adjusting with + Qdifferent starting + white spaces + AnotherQoutlier + QQIndented + QQText across + QQfive lines + QQthat has similar lines + QQto previous blocks, but with different indent + QQYetQAnotherQoutlier + EOF + + git diff --color --color-moved --color-moved-ws=allow-indentation-change >actual.raw && + grep -v "index" actual.raw | test_decode_color >actual && + + q_to_tab <<-\EOF >expected && + diff --git a/text.txt b/text.txt + --- a/text.txt + +++ b/text.txt + @@ -1,14 +1,14 @@ + -QIndented + -QText across + -Qsome lines + -QBut! <- this stands out + -QAdjusting with + -QQdifferent starting + -Qwhite spaces + -QAnother outlier + -QQQIndented + -QQQText across + -QQQfive lines + -QQQthat has similar lines + -QQQto previous blocks, but with different indent + -QQQYetQAnotherQoutlierQ + +QQIndented + +QQText across + +QQsome lines + +QQQBut! <- this stands out + +Adjusting with + +Qdifferent starting + +white spaces + +AnotherQoutlier + +QQIndented + +QQText across + +QQfive lines + +QQthat has similar lines + +QQto previous blocks, but with different indent + +QQYetQAnotherQoutlier + EOF + + test_cmp expected actual +' + +test_expect_success 'compare whitespace delta incompatible with other space options' ' + test_must_fail git diff \ + --color-moved-ws=allow-indentation-change,ignore-all-space \ + 2>err && + test_i18ngrep allow-indentation-change err +' + test_done -- cgit v0.10.2-6-g49f6 From 626c0b5d395196ddabcf9b9a430db385722689d4 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Wed, 18 Jul 2018 12:31:56 -0700 Subject: diff.c: offer config option to control ws handling in move detection Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/Documentation/config.txt b/Documentation/config.txt index 2659153..6ca7118 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -1122,6 +1122,11 @@ diff.colorMoved:: true the default color mode will be used. When set to false, moved lines are not colored. +diff.colorMovedWS:: + When moved lines are colored using e.g. the `diff.colorMoved` setting, + this option controls the `` how spaces are treated + for details of valid modes see '--color-moved-ws' in linkgit:git-diff[1]. + color.diff.:: Use customized color for diff colorization. `` specifies which part of the patch to use the specified color, and is one diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index 143acd9..8da7fed 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -294,8 +294,11 @@ dimmed_zebra:: --color-moved-ws=:: This configures how white spaces are ignored when performing the - move detection for `--color-moved`. These modes can be given - as a comma separated list: + move detection for `--color-moved`. +ifdef::git-diff[] + It can be set by the `diff.colorMovedWS` configuration setting. +endif::git-diff[] + These modes can be given as a comma separated list: + -- ignore-space-at-eol:: diff --git a/diff.c b/diff.c index 7810a47..5089c6e 100644 --- a/diff.c +++ b/diff.c @@ -35,6 +35,7 @@ static int diff_rename_limit_default = 400; static int diff_suppress_blank_empty; static int diff_use_color_default = -1; static int diff_color_moved_default; +static int diff_color_moved_ws_default; static int diff_context_default = 3; static int diff_interhunk_context_default; static const char *diff_word_regex_cfg; @@ -332,6 +333,13 @@ int git_diff_ui_config(const char *var, const char *value, void *cb) diff_color_moved_default = cm; return 0; } + if (!strcmp(var, "diff.colormovedws")) { + int cm = parse_color_moved_ws(value); + if (cm < 0) + return -1; + diff_color_moved_ws_default = cm; + return 0; + } if (!strcmp(var, "diff.context")) { diff_context_default = git_config_int(var, value); if (diff_context_default < 0) @@ -4327,6 +4335,7 @@ void diff_setup(struct diff_options *options) } options->color_moved = diff_color_moved_default; + options->color_moved_ws_handling = diff_color_moved_ws_default; } void diff_setup_done(struct diff_options *options) -- cgit v0.10.2-6-g49f6