From fbafb7c682272e039e7aacbf8c0ef8de3c0eb157 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Fri, 23 Nov 2018 11:16:50 +0000 Subject: diff: document --no-color-moved Add documentation for --no-color-moved. Signed-off-by: Phillip Wood Reviewed-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index 0378cd5..151690f 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -293,6 +293,10 @@ dimmed-zebra:: `dimmed_zebra` is a deprecated synonym. -- +--no-color-moved:: + Turn off move detection. This can be used to override configuration + settings. It is the same as `--color-moved=no`. + --color-moved-ws=:: This configures how white spaces are ignored when performing the move detection for `--color-moved`. -- cgit v0.10.2-6-g49f6 From 748aa1aa34a316b8f26a3664f3fdcd273e1c5c39 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Fri, 23 Nov 2018 11:16:51 +0000 Subject: Use "whitespace" consistently Most of the messages and documentation use 'whitespace' rather than 'white space' or 'white spaces' convert to latter two to the former for consistency. Signed-off-by: Phillip Wood Reviewed-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index 151690f..57a2f4c 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -298,7 +298,7 @@ dimmed-zebra:: settings. It is the same as `--color-moved=no`. --color-moved-ws=:: - This configures how white spaces are ignored when performing the + This configures how whitespace is ignored when performing the move detection for `--color-moved`. ifdef::git-diff[] It can be set by the `diff.colorMovedWS` configuration setting. @@ -316,7 +316,7 @@ ignore-all-space:: Ignore whitespace when comparing lines. This ignores differences even if one line has whitespace where the other line has none. allow-indentation-change:: - Initially ignore any white spaces in the move detection, then + Initially ignore any whitespace in the move detection, then group the moved code blocks only into a block if the change in whitespace is the same per line. This is incompatible with the other modes. diff --git a/Documentation/git-cat-file.txt b/Documentation/git-cat-file.txt index 7401333..9a2e9cd 100644 --- a/Documentation/git-cat-file.txt +++ b/Documentation/git-cat-file.txt @@ -23,8 +23,8 @@ In the second form, a list of objects (separated by linefeeds) is provided on stdin, and the SHA-1, type, and size of each object is printed on stdout. The output format can be overridden using the optional `` argument. If either `--textconv` or `--filters` was specified, the input is expected to -list the object names followed by the path name, separated by a single white -space, so that the appropriate drivers can be determined. +list the object names followed by the path name, separated by a single +whitespace, so that the appropriate drivers can be determined. OPTIONS ------- @@ -79,7 +79,7 @@ OPTIONS Print object information and contents for each object provided on stdin. May not be combined with any other options or arguments except `--textconv` or `--filters`, in which case the input lines - also need to specify the path, separated by white space. See the + also need to specify the path, separated by whitespace. See the section `BATCH OUTPUT` below for details. --batch-check:: @@ -87,7 +87,7 @@ OPTIONS Print object information for each object provided on stdin. May not be combined with any other options or arguments except `--textconv` or `--filters`, in which case the input lines also - need to specify the path, separated by white space. See the + need to specify the path, separated by whitespace. See the section `BATCH OUTPUT` below for details. --batch-all-objects:: diff --git a/diff.c b/diff.c index dc9965e..7fc92dc 100644 --- a/diff.c +++ b/diff.c @@ -320,7 +320,7 @@ static int parse_color_moved_ws(const char *arg) if ((ret & COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) && (ret & XDF_WHITESPACE_FLAGS)) - die(_("color-moved-ws: allow-indentation-change cannot be combined with other white space modes")); + die(_("color-moved-ws: allow-indentation-change cannot be combined with other whitespace modes")); string_list_clear(&l, 0); -- cgit v0.10.2-6-g49f6 From b73bcbac4a9d8c76ccdda0f0ac4b122b8e96e4b7 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Fri, 23 Nov 2018 11:16:52 +0000 Subject: diff: allow --no-color-moved-ws Allow --no-color-moved-ws and --color-moved-ws=no to cancel any previous --color-moved-ws option. Signed-off-by: Phillip Wood Reviewed-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index 57a2f4c..e1744fa 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -306,6 +306,8 @@ endif::git-diff[] These modes can be given as a comma separated list: + -- +no:: + Do not ignore whitespace when performing move detection. ignore-space-at-eol:: Ignore changes in whitespace at EOL. ignore-space-change:: @@ -322,6 +324,11 @@ allow-indentation-change:: other modes. -- +--no-color-moved-ws:: + Do not ignore whitespace when performing move detection. This can be + used to override configuration settings. It is the same as + `--color-moved-ws=no`. + --word-diff[=]:: Show a word diff, using the to delimit changed words. By default, words are delimited by whitespace; see diff --git a/diff.c b/diff.c index 7fc92dc..4ef0042 100644 --- a/diff.c +++ b/diff.c @@ -304,7 +304,9 @@ static int parse_color_moved_ws(const char *arg) strbuf_addstr(&sb, i->string); strbuf_trim(&sb); - if (!strcmp(sb.buf, "ignore-space-change")) + if (!strcmp(sb.buf, "no")) + ret = 0; + else if (!strcmp(sb.buf, "ignore-space-change")) ret |= XDF_IGNORE_WHITESPACE_CHANGE; else if (!strcmp(sb.buf, "ignore-space-at-eol")) ret |= XDF_IGNORE_WHITESPACE_AT_EOL; @@ -5036,6 +5038,8 @@ int diff_opt_parse(struct diff_options *options, if (cm < 0) die("bad --color-moved argument: %s", arg); options->color_moved = cm; + } else if (!strcmp(arg, "--no-color-moved-ws")) { + options->color_moved_ws_handling = 0; } else if (skip_prefix(arg, "--color-moved-ws=", &arg)) { options->color_moved_ws_handling = parse_color_moved_ws(arg); } else if (skip_to_optional_arg_default(arg, "--color-words", &options->word_regex, NULL)) { -- cgit v0.10.2-6-g49f6 From 10acc5f750789fcfd38e43810f2c372b134a807c Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Fri, 23 Nov 2018 11:16:53 +0000 Subject: diff --color-moved-ws: demonstrate false positives 'diff --color-moved-ws=allow-indentation-change' can highlight lines that have internal whitespace changes rather than indentation changes. For example in commit 1a07e59c3e ("Update messages in preparation for i18n", 2018-07-21) the lines - die (_("must end with a color")); + die(_("must end with a color")); are highlighted as moved when they should not be. Modify an existing test to show the problem that will be fixed in the next commit. Signed-off-by: Phillip Wood Reviewed-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index a9fb226..eee81a1 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -1809,7 +1809,7 @@ test_expect_success 'only move detection ignores white spaces' ' test_cmp expected actual ' -test_expect_success 'compare whitespace delta across moved blocks' ' +test_expect_failure 'compare whitespace delta across moved blocks' ' git reset --hard && q_to_tab <<-\EOF >text.txt && @@ -1827,6 +1827,7 @@ test_expect_success 'compare whitespace delta across moved blocks' ' QQQthat has similar lines QQQto previous blocks, but with different indent QQQYetQAnotherQoutlierQ + QLine with internal w h i t e s p a c e change EOF git add text.txt && @@ -1847,6 +1848,7 @@ test_expect_success 'compare whitespace delta across moved blocks' ' QQthat has similar lines QQto previous blocks, but with different indent QQYetQAnotherQoutlier + QLine with internal whitespace change EOF git diff --color --color-moved --color-moved-ws=allow-indentation-change >actual.raw && @@ -1856,7 +1858,7 @@ test_expect_success 'compare whitespace delta across moved blocks' ' diff --git a/text.txt b/text.txt --- a/text.txt +++ b/text.txt - @@ -1,14 +1,14 @@ + @@ -1,15 +1,15 @@ -QIndented -QText across -Qsome lines @@ -1871,6 +1873,7 @@ test_expect_success 'compare whitespace delta across moved blocks' ' -QQQthat has similar lines -QQQto previous blocks, but with different indent -QQQYetQAnotherQoutlierQ + -QLine with internal w h i t e s p a c e change +QQIndented +QQText across +QQsome lines @@ -1885,6 +1888,7 @@ test_expect_success 'compare whitespace delta across moved blocks' ' +QQthat has similar lines +QQto previous blocks, but with different indent +QQYetQAnotherQoutlier + +QLine with internal whitespace change EOF test_cmp expected actual -- cgit v0.10.2-6-g49f6 From 2034b473e1316d206c39be67dcb3f71fa93c06f5 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Fri, 23 Nov 2018 11:16:54 +0000 Subject: diff --color-moved-ws: fix false positives 'diff --color-moved-ws=allow-indentation-change' can color lines as moved when they are in fact different. For example in commit 1a07e59c3e ("Update messages in preparation for i18n", 2018-07-21) the lines - die (_("must end with a color")); + die(_("must end with a color")); are colored as moved even though they are different. This is because if there is a fuzzy match for the first line of a potential moved block the line is marked as moved before the potential match is checked to see if it actually matches. The fix is to delay marking the line as moved until after we have checked that there really is at least one matching potential moved block. Note that the test modified in the last commit still fails because adding an unmoved line between two moved blocks that are already separated by unmoved lines changes the color of the block following the addition. This should not be the case and will be fixed in the next commit. Signed-off-by: Phillip Wood Reviewed-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/diff.c b/diff.c index 4ef0042..8c0778d 100644 --- a/diff.c +++ b/diff.c @@ -1106,10 +1106,10 @@ static void mark_color_as_moved(struct diff_options *o, continue; } - l->flags |= DIFF_SYMBOL_MOVED_LINE; - - if (o->color_moved == COLOR_MOVED_PLAIN) + if (o->color_moved == COLOR_MOVED_PLAIN) { + l->flags |= DIFF_SYMBOL_MOVED_LINE; continue; + } if (o->color_moved_ws_handling & COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) @@ -1143,10 +1143,13 @@ static void mark_color_as_moved(struct diff_options *o, block_length = 0; } - block_length++; + if (pmb_nr) { + block_length++; - if (flipped_block && o->color_moved != COLOR_MOVED_BLOCKS) - l->flags |= DIFF_SYMBOL_MOVED_LINE_ALT; + l->flags |= DIFF_SYMBOL_MOVED_LINE; + if (flipped_block && o->color_moved != COLOR_MOVED_BLOCKS) + l->flags |= DIFF_SYMBOL_MOVED_LINE_ALT; + } } adjust_last_block(o, n, block_length); -- cgit v0.10.2-6-g49f6 From b0a2ba47761fa7bffb5a33e5a76f85da50a00ba5 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Fri, 23 Nov 2018 11:16:55 +0000 Subject: diff --color-moved=zebra: be stricter with color alternation Currently when using --color-moved=zebra the color of moved blocks depends on the number of lines separating them. This means that adding an odd number of unmoved lines between blocks that are already separated by one or more unmoved lines will change the color of subsequent moved blocks. This does not make much sense as the blocks were already separated by unmoved lines and causes problems when adding lines to test cases. Fix this by only using the alternate colors for adjacent moved blocks. Signed-off-by: Phillip Wood Reviewed-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/diff.c b/diff.c index 8c0778d..b648f67 100644 --- a/diff.c +++ b/diff.c @@ -1040,14 +1040,17 @@ static int shrink_potential_moved_blocks(struct moved_block *pmb, * The last block consists of the (n - block_length)'th line up to but not * including the nth line. * + * Returns 0 if the last block is empty or is unset by this function, non zero + * otherwise. + * * NEEDSWORK: This uses the same heuristic as blame_entry_score() in blame.c. * Think of a way to unify them. */ -static void adjust_last_block(struct diff_options *o, int n, int block_length) +static int adjust_last_block(struct diff_options *o, int n, int block_length) { int i, alnum_count = 0; if (o->color_moved == COLOR_MOVED_PLAIN) - return; + return block_length; for (i = 1; i < block_length + 1; i++) { const char *c = o->emitted_symbols->buf[n - i].line; for (; *c; c++) { @@ -1055,11 +1058,12 @@ static void adjust_last_block(struct diff_options *o, int n, int block_length) continue; alnum_count++; if (alnum_count >= COLOR_MOVED_MIN_ALNUM_COUNT) - return; + return 1; } } for (i = 1; i < block_length + 1; i++) o->emitted_symbols->buf[n - i].flags &= ~DIFF_SYMBOL_MOVED_LINE; + return 0; } /* Find blocks of moved code, delegate actual coloring decision to helper */ @@ -1069,7 +1073,7 @@ static void mark_color_as_moved(struct diff_options *o, { struct moved_block *pmb = NULL; /* potentially moved blocks */ int pmb_nr = 0, pmb_alloc = 0; - int n, flipped_block = 1, block_length = 0; + int n, flipped_block = 0, block_length = 0; for (n = 0; n < o->emitted_symbols->nr; n++) { @@ -1077,6 +1081,7 @@ static void mark_color_as_moved(struct diff_options *o, struct moved_entry *key; struct moved_entry *match = NULL; struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n]; + enum diff_symbol last_symbol = 0; switch (l->s) { case DIFF_SYMBOL_PLUS: @@ -1092,7 +1097,7 @@ static void mark_color_as_moved(struct diff_options *o, free(key); break; default: - flipped_block = 1; + flipped_block = 0; } if (!match) { @@ -1103,10 +1108,13 @@ static void mark_color_as_moved(struct diff_options *o, moved_block_clear(&pmb[i]); pmb_nr = 0; block_length = 0; + flipped_block = 0; + last_symbol = l->s; continue; } if (o->color_moved == COLOR_MOVED_PLAIN) { + last_symbol = l->s; l->flags |= DIFF_SYMBOL_MOVED_LINE; continue; } @@ -1137,19 +1145,22 @@ static void mark_color_as_moved(struct diff_options *o, } } - flipped_block = (flipped_block + 1) % 2; + if (adjust_last_block(o, n, block_length) && + pmb_nr && last_symbol != l->s) + flipped_block = (flipped_block + 1) % 2; + else + flipped_block = 0; - adjust_last_block(o, n, block_length); block_length = 0; } if (pmb_nr) { block_length++; - l->flags |= DIFF_SYMBOL_MOVED_LINE; if (flipped_block && o->color_moved != COLOR_MOVED_BLOCKS) l->flags |= DIFF_SYMBOL_MOVED_LINE_ALT; } + last_symbol = l->s; } adjust_last_block(o, n, block_length); diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index eee81a1..fe8a2ab 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -1802,14 +1802,14 @@ test_expect_success 'only move detection ignores white spaces' ' -a long line to exceed per-line minimum -another long line to exceed per-line minimum -original file - +Qa long line to exceed per-line minimum - +Qanother long line to exceed per-line minimum + +Qa long line to exceed per-line minimum + +Qanother long line to exceed per-line minimum +new file EOF test_cmp expected actual ' -test_expect_failure 'compare whitespace delta across moved blocks' ' +test_expect_success 'compare whitespace delta across moved blocks' ' git reset --hard && q_to_tab <<-\EOF >text.txt && -- cgit v0.10.2-6-g49f6 From 7a4252c4df49fe07bf91dbb5be2c6012f6a65329 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Fri, 23 Nov 2018 11:16:56 +0000 Subject: diff --color-moved-ws: optimize allow-indentation-change When running git diff --color-moved-ws=allow-indentation-change v2.18.0 v2.19.0 cmp_in_block_with_wsd() is called 694908327 times. Of those 42.7% return after comparing a and b. By comparing the lengths first we can return early in all but 0.03% of those cases without dereferencing the string pointers. The comparison between a and c fails in 6.8% of calls, by comparing the lengths first we reject all the failing calls without dereferencing the string pointers. This reduces the time to run the command above by by 42% from 14.6s to 8.5s. This is still much slower than the normal --color-moved which takes ~0.6-0.7s to run but is a significant improvement. The next commits will replace the current implementation with one that works with mixed tabs and spaces in the indentation. I think it is worth optimizing the current implementation first to enable a fair comparison between the two implementations. Signed-off-by: Phillip Wood Reviewed-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/diff.c b/diff.c index b648f67..4ee5801 100644 --- a/diff.c +++ b/diff.c @@ -831,20 +831,23 @@ static int cmp_in_block_with_wsd(const struct diff_options *o, int n) { struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n]; - int al = cur->es->len, cl = l->len; + int al = cur->es->len, bl = match->es->len, cl = l->len; const char *a = cur->es->line, *b = match->es->line, *c = l->line; - + const char *orig_a = a; int wslen; /* - * We need to check if 'cur' is equal to 'match'. - * As those are from the same (+/-) side, we do not need to adjust for - * indent changes. However these were found using fuzzy matching - * so we do have to check if they are equal. + * We need to check if 'cur' is equal to 'match'. As those + * are from the same (+/-) side, we do not need to adjust for + * indent changes. However these were found using fuzzy + * matching so we do have to check if they are equal. Here we + * just check the lengths. We delay calling memcmp() to check + * the contents until later as if the length comparison for a + * and c fails we can avoid the call all together. */ - if (strcmp(a, b)) + if (al != bl) return 1; if (!pmb->wsd.string) @@ -872,7 +875,7 @@ static int cmp_in_block_with_wsd(const struct diff_options *o, al -= wslen; } - if (al != cl || memcmp(a, c, al)) + if (al != cl || memcmp(orig_a, b, bl) || memcmp(a, c, al)) return 1; return 0; -- cgit v0.10.2-6-g49f6 From 21536d077f4b7b8a249f1fc894dafc38f06cef0f Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Fri, 23 Nov 2018 11:16:57 +0000 Subject: diff --color-moved-ws: modify allow-indentation-change Currently diff --color-moved-ws=allow-indentation-change does not support indentation that contains a mix of tabs and spaces. For example in commit 546f70f377 ("convert.h: drop 'extern' from function declaration", 2018-06-30) the function parameters in the following lines are not colored as moved [1]. -extern int stream_filter(struct stream_filter *, - const char *input, size_t *isize_p, - char *output, size_t *osize_p); +int stream_filter(struct stream_filter *, + const char *input, size_t *isize_p, + char *output, size_t *osize_p); This commit changes the way the indentation is handled to track the visual size of the indentation rather than the characters in the indentation. This has the benefit that any whitespace errors do not interfer with the move detection (the whitespace errors will still be highlighted according to --ws-error-highlight). During the discussion of this feature there were concerns about the correct detection of indentation for python. However those concerns apply whether or not we're detecting moved lines so no attempt is made to determine if the indentation is 'pythonic'. [1] Note that before the commit to fix the erroneous coloring of moved lines each line was colored as a different block, since that commit they are uncolored. Signed-off-by: Phillip Wood Reviewed-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/diff.c b/diff.c index 4ee5801..941956d 100644 --- a/diff.c +++ b/diff.c @@ -752,6 +752,8 @@ struct emitted_diff_symbol { const char *line; int len; int flags; + int indent_off; /* Offset to first non-whitespace character */ + int indent_width; /* The visual width of the indentation */ enum diff_symbol s; }; #define EMITTED_DIFF_SYMBOL_INIT {NULL} @@ -782,44 +784,68 @@ struct moved_entry { struct moved_entry *next_line; }; -/** - * The struct ws_delta holds white space differences between moved lines, i.e. - * between '+' and '-' lines that have been detected to be a move. - * The string contains the difference in leading white spaces, before the - * rest of the line is compared using the white space config for move - * coloring. The current_longer indicates if the first string in the - * comparision is longer than the second. - */ -struct ws_delta { - char *string; - unsigned int current_longer : 1; -}; -#define WS_DELTA_INIT { NULL, 0 } - struct moved_block { struct moved_entry *match; - struct ws_delta wsd; + int wsd; /* The whitespace delta of this block */ }; static void moved_block_clear(struct moved_block *b) { - FREE_AND_NULL(b->wsd.string); - b->match = NULL; + memset(b, 0, sizeof(*b)); } -static int compute_ws_delta(const struct emitted_diff_symbol *a, - const struct emitted_diff_symbol *b, - struct ws_delta *out) +static void fill_es_indent_data(struct emitted_diff_symbol *es) { - const struct emitted_diff_symbol *longer = a->len > b->len ? a : b; - const struct emitted_diff_symbol *shorter = a->len > b->len ? b : a; - int d = longer->len - shorter->len; + unsigned int off = 0; + int width = 0, tab_width = es->flags & WS_TAB_WIDTH_MASK; + const char *s = es->line; + const int len = es->len; + + /* skip any \v \f \r at start of indentation */ + while (s[off] == '\f' || s[off] == '\v' || + (s[off] == '\r' && off < len - 1)) + off++; + + /* calculate the visual width of indentation */ + while(1) { + if (s[off] == ' ') { + width++; + off++; + } else if (s[off] == '\t') { + width += tab_width - (width % tab_width); + while (s[++off] == '\t') + width += tab_width; + } else { + break; + } + } + + es->indent_off = off; + es->indent_width = width; +} + +static int compute_ws_delta(const struct emitted_diff_symbol *a, + const struct emitted_diff_symbol *b, + int *out) +{ + int a_len = a->len, + b_len = b->len, + a_off = a->indent_off, + a_width = a->indent_width, + b_off = b->indent_off, + b_width = b->indent_width; + int delta; + + if (a->s == DIFF_SYMBOL_PLUS) + delta = a_width - b_width; + else + delta = b_width - a_width; - if (strncmp(longer->line + d, shorter->line, shorter->len)) + if (a_len - a_off != b_len - b_off || + memcmp(a->line + a_off, b->line + b_off, a_len - a_off)) return 0; - out->string = xmemdupz(longer->line, d); - out->current_longer = (a == longer); + *out = delta; return 1; } @@ -835,8 +861,11 @@ static int cmp_in_block_with_wsd(const struct diff_options *o, const char *a = cur->es->line, *b = match->es->line, *c = l->line; - const char *orig_a = a; - int wslen; + int a_off = cur->es->indent_off, + a_width = cur->es->indent_width, + c_off = l->indent_off, + c_width = l->indent_width; + int delta; /* * We need to check if 'cur' is equal to 'match'. As those @@ -850,35 +879,20 @@ static int cmp_in_block_with_wsd(const struct diff_options *o, if (al != bl) return 1; - if (!pmb->wsd.string) - /* - * The white space delta is not active? This can happen - * when we exit early in this function. - */ - return 1; - /* - * The indent changes of the block are known and stored in - * pmb->wsd; however we need to check if the indent changes of the - * current line are still the same as before. - * - * To do so we need to compare 'l' to 'cur', adjusting the - * one of them for the white spaces, depending which was longer. + * The indent changes of the block are known and stored in pmb->wsd; + * however we need to check if the indent changes of the current line + * match those of the current block and that the text of 'l' and 'cur' + * after the indentation match. */ + if (cur->es->s == DIFF_SYMBOL_PLUS) + delta = a_width - c_width; + else + delta = c_width - a_width; - wslen = strlen(pmb->wsd.string); - if (pmb->wsd.current_longer) { - c += wslen; - cl -= wslen; - } else { - a += wslen; - al -= wslen; - } - - if (al != cl || memcmp(orig_a, b, bl) || memcmp(a, c, al)) - return 1; - - return 0; + return !(delta == pmb->wsd && al - a_off == cl - c_off && + !memcmp(a, b, al) && ! + memcmp(a + a_off, c + c_off, al - a_off)); } static int moved_entry_cmp(const void *hashmap_cmp_fn_data, @@ -944,6 +958,9 @@ static void add_lines_to_move_detection(struct diff_options *o, continue; } + if (o->color_moved_ws_handling & + COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) + fill_es_indent_data(&o->emitted_symbols->buf[n]); key = prepare_entry(o, n); if (prev_line && prev_line->es->s == o->emitted_symbols->buf[n].s) prev_line->next_line = key; @@ -1022,8 +1039,7 @@ static int shrink_potential_moved_blocks(struct moved_block *pmb, if (lp < pmb_nr && rp > -1 && lp < rp) { pmb[lp] = pmb[rp]; - pmb[rp].match = NULL; - pmb[rp].wsd.string = NULL; + memset(&pmb[rp], 0, sizeof(pmb[rp])); rp--; lp++; } @@ -1143,7 +1159,7 @@ static void mark_color_as_moved(struct diff_options *o, &pmb[pmb_nr].wsd)) pmb[pmb_nr++].match = match; } else { - pmb[pmb_nr].wsd.string = NULL; + pmb[pmb_nr].wsd = 0; pmb[pmb_nr++].match = match; } } @@ -1507,7 +1523,7 @@ static void emit_diff_symbol_from_struct(struct diff_options *o, static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s, const char *line, int len, unsigned flags) { - struct emitted_diff_symbol e = {line, len, flags, s}; + struct emitted_diff_symbol e = {line, len, flags, 0, 0, s}; if (o->emitted_symbols) append_emitted_diff_symbol(o, &e); diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index fe8a2ab..e023839 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -1901,4 +1901,60 @@ test_expect_success 'compare whitespace delta incompatible with other space opti test_i18ngrep allow-indentation-change err ' +test_expect_success 'compare mixed whitespace delta across moved blocks' ' + + git reset --hard && + tr Q_ "\t " <<-EOF >text.txt && + ____Indented text to + _Q____be further indented by four spaces across + ____Qseveral lines + QQ____These two lines have had their + ____indentation reduced by four spaces + Qdifferent indentation change + ____too short + EOF + + git add text.txt && + git commit -m "add text.txt" && + + tr Q_ "\t " <<-EOF >text.txt && + QIndented text to + QQbe further indented by four spaces across + Q____several lines + Q_QThese two lines have had their + indentation reduced by four spaces + QQdifferent indentation change + __Qtoo short + EOF + + git -c color.diff.whitespace="normal red" \ + -c core.whitespace=space-before-tab \ + diff --color --color-moved --ws-error-highlight=all \ + --color-moved-ws=allow-indentation-change >actual.raw && + grep -v "index" actual.raw | test_decode_color >actual && + + cat <<-\EOF >expected && + diff --git a/text.txt b/text.txt + --- a/text.txt + +++ b/text.txt + @@ -1,7 +1,7 @@ + - Indented text to + - be further indented by four spaces across + - several lines + - These two lines have had their + - indentation reduced by four spaces + - different indentation change + - too short + + Indented text to + + be further indented by four spaces across + + several lines + + These two lines have had their + +indentation reduced by four spaces + + different indentation change + + too short + EOF + + test_cmp expected actual +' + test_done -- cgit v0.10.2-6-g49f6 From 0cd51e9d05e65608126c30fbb65a0cdd197cd570 Mon Sep 17 00:00:00 2001 From: Phillip Wood Date: Fri, 23 Nov 2018 11:16:58 +0000 Subject: diff --color-moved-ws: handle blank lines When using --color-moved-ws=allow-indentation-change allow lines with the same indentation change to be grouped across blank lines. For now this only works if the blank lines have been moved as well, not for blocks that have just had their indentation changed. This completes the changes to the implementation of --color-moved=allow-indentation-change. Running git diff --color-moved=allow-indentation-change v2.18.0 v2.19.0 now takes 5.0s. This is a saving of 41% from 8.5s for the optimized version of the previous implementation and 66% from the original which took 14.6s. Signed-off-by: Phillip Wood Reviewed-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/diff.c b/diff.c index 941956d..03ffe46 100644 --- a/diff.c +++ b/diff.c @@ -794,9 +794,11 @@ static void moved_block_clear(struct moved_block *b) memset(b, 0, sizeof(*b)); } +#define INDENT_BLANKLINE INT_MIN + static void fill_es_indent_data(struct emitted_diff_symbol *es) { - unsigned int off = 0; + unsigned int off = 0, i; int width = 0, tab_width = es->flags & WS_TAB_WIDTH_MASK; const char *s = es->line; const int len = es->len; @@ -820,8 +822,18 @@ static void fill_es_indent_data(struct emitted_diff_symbol *es) } } - es->indent_off = off; - es->indent_width = width; + /* check if this line is blank */ + for (i = off; i < len; i++) + if (!isspace(s[i])) + break; + + if (i == len) { + es->indent_width = INDENT_BLANKLINE; + es->indent_off = len; + } else { + es->indent_off = off; + es->indent_width = width; + } } static int compute_ws_delta(const struct emitted_diff_symbol *a, @@ -836,6 +848,11 @@ static int compute_ws_delta(const struct emitted_diff_symbol *a, b_width = b->indent_width; int delta; + if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE) { + *out = INDENT_BLANKLINE; + return 1; + } + if (a->s == DIFF_SYMBOL_PLUS) delta = a_width - b_width; else @@ -879,6 +896,10 @@ static int cmp_in_block_with_wsd(const struct diff_options *o, if (al != bl) return 1; + /* If 'l' and 'cur' are both blank then they match. */ + if (a_width == INDENT_BLANKLINE && c_width == INDENT_BLANKLINE) + return 0; + /* * The indent changes of the block are known and stored in pmb->wsd; * however we need to check if the indent changes of the current line @@ -890,6 +911,13 @@ static int cmp_in_block_with_wsd(const struct diff_options *o, else delta = c_width - a_width; + /* + * If the previous lines of this block were all blank then set its + * whitespace delta. + */ + if (pmb->wsd == INDENT_BLANKLINE) + pmb->wsd = delta; + return !(delta == pmb->wsd && al - a_off == cl - c_off && !memcmp(a, b, al) && ! memcmp(a + a_off, c + c_off, al - a_off)); diff --git a/t/t4015-diff-whitespace.sh b/t/t4015-diff-whitespace.sh index e023839..9d6f88b 100755 --- a/t/t4015-diff-whitespace.sh +++ b/t/t4015-diff-whitespace.sh @@ -1901,10 +1901,20 @@ test_expect_success 'compare whitespace delta incompatible with other space opti test_i18ngrep allow-indentation-change err ' +EMPTY='' test_expect_success 'compare mixed whitespace delta across moved blocks' ' git reset --hard && tr Q_ "\t " <<-EOF >text.txt && + ${EMPTY} + ____too short without + ${EMPTY} + ___being grouped across blank line + ${EMPTY} + context + lines + to + anchor ____Indented text to _Q____be further indented by four spaces across ____Qseveral lines @@ -1918,9 +1928,18 @@ test_expect_success 'compare mixed whitespace delta across moved blocks' ' git commit -m "add text.txt" && tr Q_ "\t " <<-EOF >text.txt && + context + lines + to + anchor QIndented text to QQbe further indented by four spaces across Q____several lines + ${EMPTY} + QQtoo short without + ${EMPTY} + Q_______being grouped across blank line + ${EMPTY} Q_QThese two lines have had their indentation reduced by four spaces QQdifferent indentation change @@ -1937,7 +1956,16 @@ test_expect_success 'compare mixed whitespace delta across moved blocks' ' diff --git a/text.txt b/text.txt --- a/text.txt +++ b/text.txt - @@ -1,7 +1,7 @@ + @@ -1,16 +1,16 @@ + - + - too short without + - + - being grouped across blank line + - + context + lines + to + anchor - Indented text to - be further indented by four spaces across - several lines @@ -1948,9 +1976,14 @@ test_expect_success 'compare mixed whitespace delta across moved blocks' ' + Indented text to + be further indented by four spaces across + several lines - + These two lines have had their - +indentation reduced by four spaces - + different indentation change + + + + too short without + + + + being grouped across blank line + + + + These two lines have had their + +indentation reduced by four spaces + + different indentation change + too short EOF -- cgit v0.10.2-6-g49f6