summaryrefslogtreecommitdiff
path: root/diff.c
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2022-01-05 22:01:29 (GMT)
committerJunio C Hamano <gitster@pobox.com>2022-01-05 22:01:29 (GMT)
commit2b755b337134aac9b1386e2d216cf734e1e229d2 (patch)
treea1ffb96dc36429bc60a9b4b4a53a96b0ed140f42 /diff.c
parentead6767ad7f835f4802248371709c4506ad6a4f1 (diff)
parent72962e8b3c3ea3a631166876b4668718103be4fe (diff)
downloadgit-2b755b337134aac9b1386e2d216cf734e1e229d2.zip
git-2b755b337134aac9b1386e2d216cf734e1e229d2.tar.gz
git-2b755b337134aac9b1386e2d216cf734e1e229d2.tar.bz2
Merge branch 'pw/diff-color-moved-fix'
Correctness and performance update to "diff --color-moved" feature. * pw/diff-color-moved-fix: diff --color-moved: intern strings diff: use designated initializers for emitted_diff_symbol diff --color-moved-ws=allow-indentation-change: improve hash lookups diff --color-moved: stop clearing potential moved blocks diff --color-moved: shrink potential moved blocks as we go diff --color-moved: unify moved block growth functions diff --color-moved: call comparison function directly diff --color-moved-ws=allow-indentation-change: simplify and optimize diff: simplify allow-indentation-change delta calculation diff --color-moved: avoid false short line matches and bad zebra coloring diff --color-moved=zebra: fix alternate coloring diff --color-moved: rewind when discarding pmb diff --color-moved: factor out function diff --color-moved: clear all flags on blocks that are too short diff --color-moved: add perf tests
Diffstat (limited to 'diff.c')
-rw-r--r--diff.c431
1 files changed, 186 insertions, 245 deletions
diff --git a/diff.c b/diff.c
index 4107685..0b4f9b5 100644
--- a/diff.c
+++ b/diff.c
@@ -18,6 +18,7 @@
#include "submodule-config.h"
#include "submodule.h"
#include "hashmap.h"
+#include "mem-pool.h"
#include "ll-merge.h"
#include "string-list.h"
#include "strvec.h"
@@ -773,6 +774,7 @@ struct emitted_diff_symbol {
int flags;
int indent_off; /* Offset to first non-whitespace character */
int indent_width; /* The visual width of the indentation */
+ unsigned id;
enum diff_symbol s;
};
#define EMITTED_DIFF_SYMBOL_INIT { 0 }
@@ -798,9 +800,9 @@ static void append_emitted_diff_symbol(struct diff_options *o,
}
struct moved_entry {
- struct hashmap_entry ent;
const struct emitted_diff_symbol *es;
struct moved_entry *next_line;
+ struct moved_entry *next_match;
};
struct moved_block {
@@ -808,11 +810,6 @@ struct moved_block {
int wsd; /* The whitespace delta of this block */
};
-static void moved_block_clear(struct moved_block *b)
-{
- memset(b, 0, sizeof(*b));
-}
-
#define INDENT_BLANKLINE INT_MIN
static void fill_es_indent_data(struct emitted_diff_symbol *es)
@@ -856,79 +853,41 @@ static void fill_es_indent_data(struct emitted_diff_symbol *es)
}
static int compute_ws_delta(const struct emitted_diff_symbol *a,
- const struct emitted_diff_symbol *b,
- int *out)
-{
- int a_len = a->len,
- b_len = b->len,
- a_off = a->indent_off,
- a_width = a->indent_width,
- b_off = b->indent_off,
+ const struct emitted_diff_symbol *b)
+{
+ int a_width = a->indent_width,
b_width = b->indent_width;
- int delta;
- if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE) {
- *out = INDENT_BLANKLINE;
- return 1;
- }
-
- if (a->s == DIFF_SYMBOL_PLUS)
- delta = a_width - b_width;
- else
- delta = b_width - a_width;
-
- if (a_len - a_off != b_len - b_off ||
- memcmp(a->line + a_off, b->line + b_off, a_len - a_off))
- return 0;
+ if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE)
+ return INDENT_BLANKLINE;
- *out = delta;
-
- return 1;
+ return a_width - b_width;
}
-static int cmp_in_block_with_wsd(const struct diff_options *o,
- const struct moved_entry *cur,
- const struct moved_entry *match,
- struct moved_block *pmb,
- int n)
-{
- struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n];
- int al = cur->es->len, bl = match->es->len, cl = l->len;
- const char *a = cur->es->line,
- *b = match->es->line,
- *c = l->line;
- int a_off = cur->es->indent_off,
- a_width = cur->es->indent_width,
- c_off = l->indent_off,
- c_width = l->indent_width;
+static int cmp_in_block_with_wsd(const struct moved_entry *cur,
+ const struct emitted_diff_symbol *l,
+ struct moved_block *pmb)
+{
+ int a_width = cur->es->indent_width, b_width = l->indent_width;
int delta;
- /*
- * We need to check if 'cur' is equal to 'match'. As those
- * are from the same (+/-) side, we do not need to adjust for
- * indent changes. However these were found using fuzzy
- * matching so we do have to check if they are equal. Here we
- * just check the lengths. We delay calling memcmp() to check
- * the contents until later as if the length comparison for a
- * and c fails we can avoid the call all together.
- */
- if (al != bl)
+ /* The text of each line must match */
+ if (cur->es->id != l->id)
return 1;
- /* If 'l' and 'cur' are both blank then they match. */
- if (a_width == INDENT_BLANKLINE && c_width == INDENT_BLANKLINE)
+ /*
+ * If 'l' and 'cur' are both blank then we don't need to check the
+ * indent. We only need to check cur as we know the strings match.
+ * */
+ if (a_width == INDENT_BLANKLINE)
return 0;
/*
* The indent changes of the block are known and stored in pmb->wsd;
* however we need to check if the indent changes of the current line
- * match those of the current block and that the text of 'l' and 'cur'
- * after the indentation match.
+ * match those of the current block.
*/
- if (cur->es->s == DIFF_SYMBOL_PLUS)
- delta = a_width - c_width;
- else
- delta = c_width - a_width;
+ delta = b_width - a_width;
/*
* If the previous lines of this block were all blank then set its
@@ -937,166 +896,165 @@ static int cmp_in_block_with_wsd(const struct diff_options *o,
if (pmb->wsd == INDENT_BLANKLINE)
pmb->wsd = delta;
- return !(delta == pmb->wsd && al - a_off == cl - c_off &&
- !memcmp(a, b, al) && !
- memcmp(a + a_off, c + c_off, al - a_off));
+ return delta != pmb->wsd;
}
-static int moved_entry_cmp(const void *hashmap_cmp_fn_data,
- const struct hashmap_entry *eptr,
- const struct hashmap_entry *entry_or_key,
- const void *keydata)
+struct interned_diff_symbol {
+ struct hashmap_entry ent;
+ struct emitted_diff_symbol *es;
+};
+
+static int interned_diff_symbol_cmp(const void *hashmap_cmp_fn_data,
+ const struct hashmap_entry *eptr,
+ const struct hashmap_entry *entry_or_key,
+ const void *keydata)
{
const struct diff_options *diffopt = hashmap_cmp_fn_data;
- const struct moved_entry *a, *b;
+ const struct emitted_diff_symbol *a, *b;
unsigned flags = diffopt->color_moved_ws_handling
& XDF_WHITESPACE_FLAGS;
- a = container_of(eptr, const struct moved_entry, ent);
- b = container_of(entry_or_key, const struct moved_entry, ent);
-
- if (diffopt->color_moved_ws_handling &
- COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
- /*
- * As there is not specific white space config given,
- * we'd need to check for a new block, so ignore all
- * white space. The setup of the white space
- * configuration for the next block is done else where
- */
- flags |= XDF_IGNORE_WHITESPACE;
+ a = container_of(eptr, const struct interned_diff_symbol, ent)->es;
+ b = container_of(entry_or_key, const struct interned_diff_symbol, ent)->es;
- return !xdiff_compare_lines(a->es->line, a->es->len,
- b->es->line, b->es->len,
- flags);
+ return !xdiff_compare_lines(a->line + a->indent_off,
+ a->len - a->indent_off,
+ b->line + b->indent_off,
+ b->len - b->indent_off, flags);
}
-static struct moved_entry *prepare_entry(struct diff_options *o,
- int line_no)
+static void prepare_entry(struct diff_options *o, struct emitted_diff_symbol *l,
+ struct interned_diff_symbol *s)
{
- struct moved_entry *ret = xmalloc(sizeof(*ret));
- struct emitted_diff_symbol *l = &o->emitted_symbols->buf[line_no];
unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS;
- unsigned int hash = xdiff_hash_string(l->line, l->len, flags);
-
- hashmap_entry_init(&ret->ent, hash);
- ret->es = l;
- ret->next_line = NULL;
+ unsigned int hash = xdiff_hash_string(l->line + l->indent_off,
+ l->len - l->indent_off, flags);
- return ret;
+ hashmap_entry_init(&s->ent, hash);
+ s->es = l;
}
-static void add_lines_to_move_detection(struct diff_options *o,
- struct hashmap *add_lines,
- struct hashmap *del_lines)
+struct moved_entry_list {
+ struct moved_entry *add, *del;
+};
+
+static struct moved_entry_list *add_lines_to_move_detection(struct diff_options *o,
+ struct mem_pool *entry_mem_pool)
{
struct moved_entry *prev_line = NULL;
-
+ struct mem_pool interned_pool;
+ struct hashmap interned_map;
+ struct moved_entry_list *entry_list = NULL;
+ size_t entry_list_alloc = 0;
+ unsigned id = 0;
int n;
+
+ hashmap_init(&interned_map, interned_diff_symbol_cmp, o, 8096);
+ mem_pool_init(&interned_pool, 1024 * 1024);
+
for (n = 0; n < o->emitted_symbols->nr; n++) {
- struct hashmap *hm;
- struct moved_entry *key;
+ struct interned_diff_symbol key;
+ struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n];
+ struct interned_diff_symbol *s;
+ struct moved_entry *entry;
- switch (o->emitted_symbols->buf[n].s) {
- case DIFF_SYMBOL_PLUS:
- hm = add_lines;
- break;
- case DIFF_SYMBOL_MINUS:
- hm = del_lines;
- break;
- default:
+ if (l->s != DIFF_SYMBOL_PLUS && l->s != DIFF_SYMBOL_MINUS) {
prev_line = NULL;
continue;
}
if (o->color_moved_ws_handling &
COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
- fill_es_indent_data(&o->emitted_symbols->buf[n]);
- key = prepare_entry(o, n);
- if (prev_line && prev_line->es->s == o->emitted_symbols->buf[n].s)
- prev_line->next_line = key;
+ fill_es_indent_data(l);
- hashmap_add(hm, &key->ent);
- prev_line = key;
+ prepare_entry(o, l, &key);
+ s = hashmap_get_entry(&interned_map, &key, ent, &key.ent);
+ if (s) {
+ l->id = s->es->id;
+ } else {
+ l->id = id;
+ ALLOC_GROW_BY(entry_list, id, 1, entry_list_alloc);
+ hashmap_add(&interned_map,
+ memcpy(mem_pool_alloc(&interned_pool,
+ sizeof(key)),
+ &key, sizeof(key)));
+ }
+ entry = mem_pool_alloc(entry_mem_pool, sizeof(*entry));
+ entry->es = l;
+ entry->next_line = NULL;
+ if (prev_line && prev_line->es->s == l->s)
+ prev_line->next_line = entry;
+ prev_line = entry;
+ if (l->s == DIFF_SYMBOL_PLUS) {
+ entry->next_match = entry_list[l->id].add;
+ entry_list[l->id].add = entry;
+ } else {
+ entry->next_match = entry_list[l->id].del;
+ entry_list[l->id].del = entry;
+ }
}
+
+ hashmap_clear(&interned_map);
+ mem_pool_discard(&interned_pool, 0);
+
+ return entry_list;
}
static void pmb_advance_or_null(struct diff_options *o,
- struct moved_entry *match,
- struct hashmap *hm,
+ struct emitted_diff_symbol *l,
struct moved_block *pmb,
- int pmb_nr)
+ int *pmb_nr)
{
- int i;
- for (i = 0; i < pmb_nr; i++) {
+ int i, j;
+
+ for (i = 0, j = 0; i < *pmb_nr; i++) {
+ int match;
struct moved_entry *prev = pmb[i].match;
struct moved_entry *cur = (prev && prev->next_line) ?
prev->next_line : NULL;
- if (cur && !hm->cmpfn(o, &cur->ent, &match->ent, NULL)) {
- pmb[i].match = cur;
- } else {
- pmb[i].match = NULL;
- }
- }
-}
-static void pmb_advance_or_null_multi_match(struct diff_options *o,
- struct moved_entry *match,
- struct hashmap *hm,
- struct moved_block *pmb,
- int pmb_nr, int n)
-{
- int i;
- char *got_match = xcalloc(1, pmb_nr);
-
- hashmap_for_each_entry_from(hm, match, ent) {
- for (i = 0; i < pmb_nr; i++) {
- struct moved_entry *prev = pmb[i].match;
- struct moved_entry *cur = (prev && prev->next_line) ?
- prev->next_line : NULL;
- if (!cur)
- continue;
- if (!cmp_in_block_with_wsd(o, cur, match, &pmb[i], n))
- got_match[i] |= 1;
- }
- }
+ if (o->color_moved_ws_handling &
+ COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
+ match = cur &&
+ !cmp_in_block_with_wsd(cur, l, &pmb[i]);
+ else
+ match = cur && cur->es->id == l->id;
- for (i = 0; i < pmb_nr; i++) {
- if (got_match[i]) {
- /* Advance to the next line */
- pmb[i].match = pmb[i].match->next_line;
- } else {
- moved_block_clear(&pmb[i]);
+ if (match) {
+ pmb[j] = pmb[i];
+ pmb[j++].match = cur;
}
}
-
- free(got_match);
+ *pmb_nr = j;
}
-static int shrink_potential_moved_blocks(struct moved_block *pmb,
- int pmb_nr)
-{
- int lp, rp;
-
- /* Shrink the set of potential block to the remaining running */
- for (lp = 0, rp = pmb_nr - 1; lp <= rp;) {
- while (lp < pmb_nr && pmb[lp].match)
- lp++;
- /* lp points at the first NULL now */
+static void fill_potential_moved_blocks(struct diff_options *o,
+ struct moved_entry *match,
+ struct emitted_diff_symbol *l,
+ struct moved_block **pmb_p,
+ int *pmb_alloc_p, int *pmb_nr_p)
- while (rp > -1 && !pmb[rp].match)
- rp--;
- /* rp points at the last non-NULL */
+{
+ struct moved_block *pmb = *pmb_p;
+ int pmb_alloc = *pmb_alloc_p, pmb_nr = *pmb_nr_p;
- if (lp < pmb_nr && rp > -1 && lp < rp) {
- pmb[lp] = pmb[rp];
- memset(&pmb[rp], 0, sizeof(pmb[rp]));
- rp--;
- lp++;
- }
+ /*
+ * The current line is the start of a new block.
+ * Setup the set of potential blocks.
+ */
+ for (; match; match = match->next_match) {
+ ALLOC_GROW(pmb, pmb_nr + 1, pmb_alloc);
+ if (o->color_moved_ws_handling &
+ COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
+ pmb[pmb_nr].wsd = compute_ws_delta(l, match->es);
+ else
+ pmb[pmb_nr].wsd = 0;
+ pmb[pmb_nr++].match = match;
}
- /* Remember the number of running sets */
- return rp + 1;
+ *pmb_p = pmb;
+ *pmb_alloc_p = pmb_alloc;
+ *pmb_nr_p = pmb_nr;
}
/*
@@ -1115,6 +1073,8 @@ static int shrink_potential_moved_blocks(struct moved_block *pmb,
* NEEDSWORK: This uses the same heuristic as blame_entry_score() in blame.c.
* Think of a way to unify them.
*/
+#define DIFF_SYMBOL_MOVED_LINE_ZEBRA_MASK \
+ (DIFF_SYMBOL_MOVED_LINE | DIFF_SYMBOL_MOVED_LINE_ALT)
static int adjust_last_block(struct diff_options *o, int n, int block_length)
{
int i, alnum_count = 0;
@@ -1131,95 +1091,85 @@ static int adjust_last_block(struct diff_options *o, int n, int block_length)
}
}
for (i = 1; i < block_length + 1; i++)
- o->emitted_symbols->buf[n - i].flags &= ~DIFF_SYMBOL_MOVED_LINE;
+ o->emitted_symbols->buf[n - i].flags &= ~DIFF_SYMBOL_MOVED_LINE_ZEBRA_MASK;
return 0;
}
/* Find blocks of moved code, delegate actual coloring decision to helper */
static void mark_color_as_moved(struct diff_options *o,
- struct hashmap *add_lines,
- struct hashmap *del_lines)
+ struct moved_entry_list *entry_list)
{
struct moved_block *pmb = NULL; /* potentially moved blocks */
int pmb_nr = 0, pmb_alloc = 0;
int n, flipped_block = 0, block_length = 0;
+ enum diff_symbol moved_symbol = DIFF_SYMBOL_BINARY_DIFF_HEADER;
for (n = 0; n < o->emitted_symbols->nr; n++) {
- struct hashmap *hm = NULL;
- struct moved_entry *key;
struct moved_entry *match = NULL;
struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n];
- enum diff_symbol last_symbol = 0;
switch (l->s) {
case DIFF_SYMBOL_PLUS:
- hm = del_lines;
- key = prepare_entry(o, n);
- match = hashmap_get_entry(hm, key, ent, NULL);
- free(key);
+ match = entry_list[l->id].del;
break;
case DIFF_SYMBOL_MINUS:
- hm = add_lines;
- key = prepare_entry(o, n);
- match = hashmap_get_entry(hm, key, ent, NULL);
- free(key);
+ match = entry_list[l->id].add;
break;
default:
flipped_block = 0;
}
- if (!match) {
- int i;
-
- adjust_last_block(o, n, block_length);
- for(i = 0; i < pmb_nr; i++)
- moved_block_clear(&pmb[i]);
+ if (pmb_nr && (!match || l->s != moved_symbol)) {
+ if (!adjust_last_block(o, n, block_length) &&
+ block_length > 1) {
+ /*
+ * Rewind in case there is another match
+ * starting at the second line of the block
+ */
+ match = NULL;
+ n -= block_length;
+ }
pmb_nr = 0;
block_length = 0;
flipped_block = 0;
- last_symbol = l->s;
+ }
+ if (!match) {
+ moved_symbol = DIFF_SYMBOL_BINARY_DIFF_HEADER;
continue;
}
if (o->color_moved == COLOR_MOVED_PLAIN) {
- last_symbol = l->s;
l->flags |= DIFF_SYMBOL_MOVED_LINE;
continue;
}
- if (o->color_moved_ws_handling &
- COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
- pmb_advance_or_null_multi_match(o, match, hm, pmb, pmb_nr, n);
- else
- pmb_advance_or_null(o, match, hm, pmb, pmb_nr);
-
- pmb_nr = shrink_potential_moved_blocks(pmb, pmb_nr);
+ pmb_advance_or_null(o, l, pmb, &pmb_nr);
if (pmb_nr == 0) {
- /*
- * The current line is the start of a new block.
- * Setup the set of potential blocks.
- */
- hashmap_for_each_entry_from(hm, match, ent) {
- ALLOC_GROW(pmb, pmb_nr + 1, pmb_alloc);
- if (o->color_moved_ws_handling &
- COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE) {
- if (compute_ws_delta(l, match->es,
- &pmb[pmb_nr].wsd))
- pmb[pmb_nr++].match = match;
- } else {
- pmb[pmb_nr].wsd = 0;
- pmb[pmb_nr++].match = match;
- }
- }
+ int contiguous = adjust_last_block(o, n, block_length);
+
+ if (!contiguous && block_length > 1)
+ /*
+ * Rewind in case there is another match
+ * starting at the second line of the block
+ */
+ n -= block_length;
+ else
+ fill_potential_moved_blocks(o, match, l,
+ &pmb, &pmb_alloc,
+ &pmb_nr);
- if (adjust_last_block(o, n, block_length) &&
- pmb_nr && last_symbol != l->s)
+ if (contiguous && pmb_nr && moved_symbol == l->s)
flipped_block = (flipped_block + 1) % 2;
else
flipped_block = 0;
+ if (pmb_nr)
+ moved_symbol = l->s;
+ else
+ moved_symbol = DIFF_SYMBOL_BINARY_DIFF_HEADER;
+
block_length = 0;
}
@@ -1229,17 +1179,12 @@ static void mark_color_as_moved(struct diff_options *o,
if (flipped_block && o->color_moved != COLOR_MOVED_BLOCKS)
l->flags |= DIFF_SYMBOL_MOVED_LINE_ALT;
}
- last_symbol = l->s;
}
adjust_last_block(o, n, block_length);
- for(n = 0; n < pmb_nr; n++)
- moved_block_clear(&pmb[n]);
free(pmb);
}
-#define DIFF_SYMBOL_MOVED_LINE_ZEBRA_MASK \
- (DIFF_SYMBOL_MOVED_LINE | DIFF_SYMBOL_MOVED_LINE_ALT)
static void dim_moved_lines(struct diff_options *o)
{
int n;
@@ -1573,7 +1518,9 @@ static void emit_diff_symbol_from_struct(struct diff_options *o,
static void emit_diff_symbol(struct diff_options *o, enum diff_symbol s,
const char *line, int len, unsigned flags)
{
- struct emitted_diff_symbol e = {line, len, flags, 0, 0, s};
+ struct emitted_diff_symbol e = {
+ .line = line, .len = len, .flags = flags, .s = s
+ };
if (o->emitted_symbols)
append_emitted_diff_symbol(o, &e);
@@ -6345,24 +6292,18 @@ static void diff_flush_patch_all_file_pairs(struct diff_options *o)
if (o->emitted_symbols) {
if (o->color_moved) {
- struct hashmap add_lines, del_lines;
-
- if (o->color_moved_ws_handling &
- COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
- o->color_moved_ws_handling |= XDF_IGNORE_WHITESPACE;
-
- hashmap_init(&del_lines, moved_entry_cmp, o, 0);
- hashmap_init(&add_lines, moved_entry_cmp, o, 0);
+ struct mem_pool entry_pool;
+ struct moved_entry_list *entry_list;
- add_lines_to_move_detection(o, &add_lines, &del_lines);
- mark_color_as_moved(o, &add_lines, &del_lines);
+ mem_pool_init(&entry_pool, 1024 * 1024);
+ entry_list = add_lines_to_move_detection(o,
+ &entry_pool);
+ mark_color_as_moved(o, entry_list);
if (o->color_moved == COLOR_MOVED_ZEBRA_DIM)
dim_moved_lines(o);
- hashmap_clear_and_free(&add_lines, struct moved_entry,
- ent);
- hashmap_clear_and_free(&del_lines, struct moved_entry,
- ent);
+ mem_pool_discard(&entry_pool, 0);
+ free(entry_list);
}
for (i = 0; i < esm.nr; i++)