Merge branch 'br/blame-ignore'

"git blame" learned to "ignore" commits in the history, whose effects (as well as their presence) get ignored. * br/blame-ignore: t8014: remove unnecessary braces blame: drop some unused function parameters blame: add a test to cover blame_coalesce() blame: use the fingerprint heuristic to match ignored lines blame: add a fingerprint heuristic to match ignored lines blame: optionally track line fingerprints during fill_blame_origin() blame: add config options for the output of ignored or unblamable lines blame: add the ability to ignore commits and their changes blame: use a helper function in blame_chunk() Move oidset_parse_file() to oidset.c fsck: rename and touch up init_skiplist()
author: Junio C Hamano <gitster@pobox.com> 2019-07-19 18:30:20 (GMT)
committer: Junio C Hamano <gitster@pobox.com> 2019-07-19 18:30:20 (GMT)
commit: 209f0755934a0c9b448408f9b7c9849c15041ecc (patch)
tree: ea04d9cd2847c3753c778b4dc5b976f37b19c910
parent: c62bff2cedead7df6fc55a745f073715da30727e (diff)
parent: 78fafbb2800915047ad2f1578c34ca19c316eca2 (diff)
download: git-209f0755934a0c9b448408f9b7c9849c15041ecc.zip
git-209f0755934a0c9b448408f9b7c9849c15041ecc.tar.gz
git-209f0755934a0c9b448408f9b7c9849c15041ecc.tar.bz2
13 files changed, 1855 insertions, 99 deletions
diff --git a/Documentation/blame-options.txt b/Documentation/blame-options.txt
index dc41957..5d122db 100644
--- a/Documentation/blame-options.txt
+++ b/Documentation/blame-options.txt
@@ -110,5 +110,24 @@ commit. And the default value is 40. If there are more than one
 `-C` options given, the <num> argument of the last `-C` will
 take effect.
 
+--ignore-rev <rev>::
+	Ignore changes made by the revision when assigning blame, as if the
+	change never happened.  Lines that were changed or added by an ignored
+	commit will be blamed on the previous commit that changed that line or
+	nearby lines.  This option may be specified multiple times to ignore
+	more than one revision.  If the `blame.markIgnoredLines` config option
+	is set, then lines that were changed by an ignored commit and attributed to
+	another commit will be marked with a `?` in the blame output.  If the
+	`blame.markUnblamableLines` config option is set, then those lines touched
+	by an ignored commit that we could not attribute to another revision are
+	marked with a '*'.
+
+--ignore-revs-file <file>::
+	Ignore revisions listed in `file`, which must be in the same format as an
+	`fsck.skipList`.  This option may be repeated, and these files will be
+	processed after any files specified with the `blame.ignoreRevsFile` config
+	option.  An empty file name, `""`, will clear the list of revs from
+	previously processed files.
+
 -h::
 	Show help message.
diff --git a/Documentation/config/blame.txt b/Documentation/config/blame.txt
index 67b5c1d..9468e85 100644
--- a/Documentation/config/blame.txt
+++ b/Documentation/config/blame.txt
@@ -19,3 +19,19 @@ blame.showEmail::
 blame.showRoot::
 	Do not treat root commits as boundaries in linkgit:git-blame[1].
 	This option defaults to false.
+
+blame.ignoreRevsFile::
+	Ignore revisions listed in the file, one unabbreviated object name per
+	line, in linkgit:git-blame[1].  Whitespace and comments beginning with
+	`#` are ignored.  This option may be repeated multiple times.  Empty
+	file names will reset the list of ignored revisions.  This option will
+	be handled before the command line option `--ignore-revs-file`.
+
+blame.markUnblamables::
+	Mark lines that were changed by an ignored revision that we could not
+	attribute to another commit with a '*' in the output of
+	linkgit:git-blame[1].
+
+blame.markIgnoredLines::
+	Mark lines that were changed by an ignored revision that we attributed to
+	another commit with a '?' in the output of linkgit:git-blame[1].
diff --git a/Documentation/git-blame.txt b/Documentation/git-blame.txt
index 16323eb..7e81541 100644
--- a/Documentation/git-blame.txt
+++ b/Documentation/git-blame.txt
@@ -10,6 +10,7 @@ SYNOPSIS
 [verse]
 'git blame' [-c] [-b] [-l] [--root] [-t] [-f] [-n] [-s] [-e] [-p] [-w] [--incremental]
 	    [-L <range>] [-S <revs-file>] [-M] [-C] [-C] [-C] [--since=<date>]
+	    [--ignore-rev <rev>] [--ignore-revs-file <file>]
 	    [--progress] [--abbrev=<n>] [<rev> | --contents <file> | --reverse <rev>..<rev>]
 	    [--] <file>
 
diff --git a/blame.c b/blame.c
index 145eaf2..7f04580 100644
--- a/blame.c
+++ b/blame.c
@@ -311,12 +311,707 @@ static int diff_hunks(mmfile_t *file_a, mmfile_t *file_b,
 	return xdi_diff(file_a, file_b, &xpp, &xecfg, &ecb);
 }
 
+static const char *get_next_line(const char *start, const char *end)
+{
+	const char *nl = memchr(start, '\n', end - start);
+
+	return nl ? nl + 1 : end;
+}
+
+static int find_line_starts(int **line_starts, const char *buf,
+			    unsigned long len)
+{
+	const char *end = buf + len;
+	const char *p;
+	int *lineno;
+	int num = 0;
+
+	for (p = buf; p < end; p = get_next_line(p, end))
+		num++;
+
+	ALLOC_ARRAY(*line_starts, num + 1);
+	lineno = *line_starts;
+
+	for (p = buf; p < end; p = get_next_line(p, end))
+		*lineno++ = p - buf;
+
+	*lineno = len;
+
+	return num;
+}
+
+struct fingerprint_entry;
+
+/* A fingerprint is intended to loosely represent a string, such that two
+ * fingerprints can be quickly compared to give an indication of the similarity
+ * of the strings that they represent.
+ *
+ * A fingerprint is represented as a multiset of the lower-cased byte pairs in
+ * the string that it represents. Whitespace is added at each end of the
+ * string. Whitespace pairs are ignored. Whitespace is converted to '\0'.
+ * For example, the string "Darth   Radar" will be converted to the following
+ * fingerprint:
+ * {"\0d", "da", "da", "ar", "ar", "rt", "th", "h\0", "\0r", "ra", "ad", "r\0"}
+ *
+ * The similarity between two fingerprints is the size of the intersection of
+ * their multisets, including repeated elements. See fingerprint_similarity for
+ * examples.
+ *
+ * For ease of implementation, the fingerprint is implemented as a map
+ * of byte pairs to the count of that byte pair in the string, instead of
+ * allowing repeated elements in a set.
+ */
+struct fingerprint {
+	struct hashmap map;
+	/* As we know the maximum number of entries in advance, it's
+	 * convenient to store the entries in a single array instead of having
+	 * the hashmap manage the memory.
+	 */
+	struct fingerprint_entry *entries;
+};
+
+/* A byte pair in a fingerprint. Stores the number of times the byte pair
+ * occurs in the string that the fingerprint represents.
+ */
+struct fingerprint_entry {
+	/* The hashmap entry - the hash represents the byte pair in its
+	 * entirety so we don't need to store the byte pair separately.
+	 */
+	struct hashmap_entry entry;
+	/* The number of times the byte pair occurs in the string that the
+	 * fingerprint represents.
+	 */
+	int count;
+};
+
+/* See `struct fingerprint` for an explanation of what a fingerprint is.
+ * \param result the fingerprint of the string is stored here. This must be
+ * 		 freed later using free_fingerprint.
+ * \param line_begin the start of the string
+ * \param line_end the end of the string
+ */
+static void get_fingerprint(struct fingerprint *result,
+			    const char *line_begin,
+			    const char *line_end)
+{
+	unsigned int hash, c0 = 0, c1;
+	const char *p;
+	int max_map_entry_count = 1 + line_end - line_begin;
+	struct fingerprint_entry *entry = xcalloc(max_map_entry_count,
+		sizeof(struct fingerprint_entry));
+	struct fingerprint_entry *found_entry;
+
+	hashmap_init(&result->map, NULL, NULL, max_map_entry_count);
+	result->entries = entry;
+	for (p = line_begin; p <= line_end; ++p, c0 = c1) {
+		/* Always terminate the string with whitespace.
+		 * Normalise whitespace to 0, and normalise letters to
+		 * lower case. This won't work for multibyte characters but at
+		 * worst will match some unrelated characters.
+		 */
+		if ((p == line_end) || isspace(*p))
+			c1 = 0;
+		else
+			c1 = tolower(*p);
+		hash = c0 | (c1 << 8);
+		/* Ignore whitespace pairs */
+		if (hash == 0)
+			continue;
+		hashmap_entry_init(entry, hash);
+
+		found_entry = hashmap_get(&result->map, entry, NULL);
+		if (found_entry) {
+			found_entry->count += 1;
+		} else {
+			entry->count = 1;
+			hashmap_add(&result->map, entry);
+			++entry;
+		}
+	}
+}
+
+static void free_fingerprint(struct fingerprint *f)
+{
+	hashmap_free(&f->map, 0);
+	free(f->entries);
+}
+
+/* Calculates the similarity between two fingerprints as the size of the
+ * intersection of their multisets, including repeated elements. See
+ * `struct fingerprint` for an explanation of the fingerprint representation.
+ * The similarity between "cat mat" and "father rather" is 2 because "at" is
+ * present twice in both strings while the similarity between "tim" and "mit"
+ * is 0.
+ */
+static int fingerprint_similarity(struct fingerprint *a, struct fingerprint *b)
+{
+	int intersection = 0;
+	struct hashmap_iter iter;
+	const struct fingerprint_entry *entry_a, *entry_b;
+
+	hashmap_iter_init(&b->map, &iter);
+
+	while ((entry_b = hashmap_iter_next(&iter))) {
+		if ((entry_a = hashmap_get(&a->map, entry_b, NULL))) {
+			intersection += entry_a->count < entry_b->count ?
+					entry_a->count : entry_b->count;
+		}
+	}
+	return intersection;
+}
+
+/* Subtracts byte-pair elements in B from A, modifying A in place.
+ */
+static void fingerprint_subtract(struct fingerprint *a, struct fingerprint *b)
+{
+	struct hashmap_iter iter;
+	struct fingerprint_entry *entry_a;
+	const struct fingerprint_entry *entry_b;
+
+	hashmap_iter_init(&b->map, &iter);
+
+	while ((entry_b = hashmap_iter_next(&iter))) {
+		if ((entry_a = hashmap_get(&a->map, entry_b, NULL))) {
+			if (entry_a->count <= entry_b->count)
+				hashmap_remove(&a->map, entry_b, NULL);
+			else
+				entry_a->count -= entry_b->count;
+		}
+	}
+}
+
+/* Calculate fingerprints for a series of lines.
+ * Puts the fingerprints in the fingerprints array, which must have been
+ * preallocated to allow storing line_count elements.
+ */
+static void get_line_fingerprints(struct fingerprint *fingerprints,
+				  const char *content, const int *line_starts,
+				  long first_line, long line_count)
+{
+	int i;
+	const char *linestart, *lineend;
+
+	line_starts += first_line;
+	for (i = 0; i < line_count; ++i) {
+		linestart = content + line_starts[i];
+		lineend = content + line_starts[i + 1];
+		get_fingerprint(fingerprints + i, linestart, lineend);
+	}
+}
+
+static void free_line_fingerprints(struct fingerprint *fingerprints,
+				   int nr_fingerprints)
+{
+	int i;
+
+	for (i = 0; i < nr_fingerprints; i++)
+		free_fingerprint(&fingerprints[i]);
+}
+
+/* This contains the data necessary to linearly map a line number in one half
+ * of a diff chunk to the line in the other half of the diff chunk that is
+ * closest in terms of its position as a fraction of the length of the chunk.
+ */
+struct line_number_mapping {
+	int destination_start, destination_length,
+		source_start, source_length;
+};
+
+/* Given a line number in one range, offset and scale it to map it onto the
+ * other range.
+ * Essentially this mapping is a simple linear equation but the calculation is
+ * more complicated to allow performing it with integer operations.
+ * Another complication is that if a line could map onto many lines in the
+ * destination range then we want to choose the line at the center of those
+ * possibilities.
+ * Example: if the chunk is 2 lines long in A and 10 lines long in B then the
+ * first 5 lines in B will map onto the first line in the A chunk, while the
+ * last 5 lines will all map onto the second line in the A chunk.
+ * Example: if the chunk is 10 lines long in A and 2 lines long in B then line
+ * 0 in B will map onto line 2 in A, and line 1 in B will map onto line 7 in A.
+ */
+static int map_line_number(int line_number,
+	const struct line_number_mapping *mapping)
+{
+	return ((line_number - mapping->source_start) * 2 + 1) *
+	       mapping->destination_length /
+	       (mapping->source_length * 2) +
+	       mapping->destination_start;
+}
+
+/* Get a pointer to the element storing the similarity between a line in A
+ * and a line in B.
+ *
+ * The similarities are stored in a 2-dimensional array. Each "row" in the
+ * array contains the similarities for a line in B. The similarities stored in
+ * a row are the similarities between the line in B and the nearby lines in A.
+ * To keep the length of each row the same, it is padded out with values of -1
+ * where the search range extends beyond the lines in A.
+ * For example, if max_search_distance_a is 2 and the two sides of a diff chunk
+ * look like this:
+ * a | m
+ * b | n
+ * c | o
+ * d | p
+ * e | q
+ * Then the similarity array will contain:
+ * [-1, -1, am, bm, cm,
+ *  -1, an, bn, cn, dn,
+ *  ao, bo, co, do, eo,
+ *  bp, cp, dp, ep, -1,
+ *  cq, dq, eq, -1, -1]
+ * Where similarities are denoted either by -1 for invalid, or the
+ * concatenation of the two lines in the diff being compared.
+ *
+ * \param similarities array of similarities between lines in A and B
+ * \param line_a the index of the line in A, in the same frame of reference as
+ *	closest_line_a.
+ * \param local_line_b the index of the line in B, relative to the first line
+ *		       in B that similarities represents.
+ * \param closest_line_a the index of the line in A that is deemed to be
+ *			 closest to local_line_b. This must be in the same
+ *			 frame of reference as line_a. This value defines
+ *			 where similarities is centered for the line in B.
+ * \param max_search_distance_a maximum distance in lines from the closest line
+ * 				in A for other lines in A for which
+ * 				similarities may be calculated.
+ */
+static int *get_similarity(int *similarities,
+			   int line_a, int local_line_b,
+			   int closest_line_a, int max_search_distance_a)
+{
+	assert(abs(line_a - closest_line_a) <=
+	       max_search_distance_a);
+	return similarities + line_a - closest_line_a +
+	       max_search_distance_a +
+	       local_line_b * (max_search_distance_a * 2 + 1);
+}
+
+#define CERTAIN_NOTHING_MATCHES -2
+#define CERTAINTY_NOT_CALCULATED -1
+
+/* Given a line in B, first calculate its similarities with nearby lines in A
+ * if not already calculated, then identify the most similar and second most
+ * similar lines. The "certainty" is calculated based on those two
+ * similarities.
+ *
+ * \param start_a the index of the first line of the chunk in A
+ * \param length_a the length in lines of the chunk in A
+ * \param local_line_b the index of the line in B, relative to the first line
+ * 		       in the chunk.
+ * \param fingerprints_a array of fingerprints for the chunk in A
+ * \param fingerprints_b array of fingerprints for the chunk in B
+ * \param similarities 2-dimensional array of similarities between lines in A
+ * 		       and B. See get_similarity() for more details.
+ * \param certainties array of values indicating how strongly a line in B is
+ * 		      matched with some line in A.
+ * \param second_best_result array of absolute indices in A for the second
+ * 			     closest match of a line in B.
+ * \param result array of absolute indices in A for the closest match of a line
+ * 		 in B.
+ * \param max_search_distance_a maximum distance in lines from the closest line
+ * 				in A for other lines in A for which
+ * 				similarities may be calculated.
+ * \param map_line_number_in_b_to_a parameter to map_line_number().
+ */
+static void find_best_line_matches(
+	int start_a,
+	int length_a,
+	int start_b,
+	int local_line_b,
+	struct fingerprint *fingerprints_a,
+	struct fingerprint *fingerprints_b,
+	int *similarities,
+	int *certainties,
+	int *second_best_result,
+	int *result,
+	const int max_search_distance_a,
+	const struct line_number_mapping *map_line_number_in_b_to_a)
+{
+
+	int i, search_start, search_end, closest_local_line_a, *similarity,
+		best_similarity = 0, second_best_similarity = 0,
+		best_similarity_index = 0, second_best_similarity_index = 0;
+
+	/* certainty has already been calculated so no need to redo the work */
+	if (certainties[local_line_b] != CERTAINTY_NOT_CALCULATED)
+		return;
+
+	closest_local_line_a = map_line_number(
+		local_line_b + start_b, map_line_number_in_b_to_a) - start_a;
+
+	search_start = closest_local_line_a - max_search_distance_a;
+	if (search_start < 0)
+		search_start = 0;
+
+	search_end = closest_local_line_a + max_search_distance_a + 1;
+	if (search_end > length_a)
+		search_end = length_a;
+
+	for (i = search_start; i < search_end; ++i) {
+		similarity = get_similarity(similarities,
+					    i, local_line_b,
+					    closest_local_line_a,
+					    max_search_distance_a);
+		if (*similarity == -1) {
+			/* This value will never exceed 10 but assert just in
+			 * case
+			 */
+			assert(abs(i - closest_local_line_a) < 1000);
+			/* scale the similarity by (1000 - distance from
+			 * closest line) to act as a tie break between lines
+			 * that otherwise are equally similar.
+			 */
+			*similarity = fingerprint_similarity(
+				fingerprints_b + local_line_b,
+				fingerprints_a + i) *
+				(1000 - abs(i - closest_local_line_a));
+		}
+		if (*similarity > best_similarity) {
+			second_best_similarity = best_similarity;
+			second_best_similarity_index = best_similarity_index;
+			best_similarity = *similarity;
+			best_similarity_index = i;
+		} else if (*similarity > second_best_similarity) {
+			second_best_similarity = *similarity;
+			second_best_similarity_index = i;
+		}
+	}
+
+	if (best_similarity == 0) {
+		/* this line definitely doesn't match with anything. Mark it
+		 * with this special value so it doesn't get invalidated and
+		 * won't be recalculated.
+		 */
+		certainties[local_line_b] = CERTAIN_NOTHING_MATCHES;
+		result[local_line_b] = -1;
+	} else {
+		/* Calculate the certainty with which this line matches.
+		 * If the line matches well with two lines then that reduces
+		 * the certainty. However we still want to prioritise matching
+		 * a line that matches very well with two lines over matching a
+		 * line that matches poorly with one line, hence doubling
+		 * best_similarity.
+		 * This means that if we have
+		 * line X that matches only one line with a score of 3,
+		 * line Y that matches two lines equally with a score of 5,
+		 * and line Z that matches only one line with a score or 2,
+		 * then the lines in order of certainty are X, Y, Z.
+		 */
+		certainties[local_line_b] = best_similarity * 2 -
+			second_best_similarity;
+
+		/* We keep both the best and second best results to allow us to
+		 * check at a later stage of the matching process whether the
+		 * result needs to be invalidated.
+		 */
+		result[local_line_b] = start_a + best_similarity_index;
+		second_best_result[local_line_b] =
+			start_a + second_best_similarity_index;
+	}
+}
+
+/*
+ * This finds the line that we can match with the most confidence, and
+ * uses it as a partition. It then calls itself on the lines on either side of
+ * that partition. In this way we avoid lines appearing out of order, and
+ * retain a sensible line ordering.
+ * \param start_a index of the first line in A with which lines in B may be
+ * 		  compared.
+ * \param start_b index of the first line in B for which matching should be
+ * 		  done.
+ * \param length_a number of lines in A with which lines in B may be compared.
+ * \param length_b number of lines in B for which matching should be done.
+ * \param fingerprints_a mutable array of fingerprints in A. The first element
+ * 			 corresponds to the line at start_a.
+ * \param fingerprints_b array of fingerprints in B. The first element
+ * 			 corresponds to the line at start_b.
+ * \param similarities 2-dimensional array of similarities between lines in A
+ * 		       and B. See get_similarity() for more details.
+ * \param certainties array of values indicating how strongly a line in B is
+ * 		      matched with some line in A.
+ * \param second_best_result array of absolute indices in A for the second
+ * 			     closest match of a line in B.
+ * \param result array of absolute indices in A for the closest match of a line
+ * 		 in B.
+ * \param max_search_distance_a maximum distance in lines from the closest line
+ * 			      in A for other lines in A for which
+ * 			      similarities may be calculated.
+ * \param max_search_distance_b an upper bound on the greatest possible
+ * 			      distance between lines in B such that they will
+ *                              both be compared with the same line in A
+ * 			      according to max_search_distance_a.
+ * \param map_line_number_in_b_to_a parameter to map_line_number().
+ */
+static void fuzzy_find_matching_lines_recurse(
+	int start_a, int start_b,
+	int length_a, int length_b,
+	struct fingerprint *fingerprints_a,
+	struct fingerprint *fingerprints_b,
+	int *similarities,
+	int *certainties,
+	int *second_best_result,
+	int *result,
+	int max_search_distance_a,
+	int max_search_distance_b,
+	const struct line_number_mapping *map_line_number_in_b_to_a)
+{
+	int i, invalidate_min, invalidate_max, offset_b,
+		second_half_start_a, second_half_start_b,
+		second_half_length_a, second_half_length_b,
+		most_certain_line_a, most_certain_local_line_b = -1,
+		most_certain_line_certainty = -1,
+		closest_local_line_a;
+
+	for (i = 0; i < length_b; ++i) {
+		find_best_line_matches(start_a,
+				       length_a,
+				       start_b,
+				       i,
+				       fingerprints_a,
+				       fingerprints_b,
+				       similarities,
+				       certainties,
+				       second_best_result,
+				       result,
+				       max_search_distance_a,
+				       map_line_number_in_b_to_a);
+
+		if (certainties[i] > most_certain_line_certainty) {
+			most_certain_line_certainty = certainties[i];
+			most_certain_local_line_b = i;
+		}
+	}
+
+	/* No matches. */
+	if (most_certain_local_line_b == -1)
+		return;
+
+	most_certain_line_a = result[most_certain_local_line_b];
+
+	/*
+	 * Subtract the most certain line's fingerprint in B from the matched
+	 * fingerprint in A. This means that other lines in B can't also match
+	 * the same parts of the line in A.
+	 */
+	fingerprint_subtract(fingerprints_a + most_certain_line_a - start_a,
+			     fingerprints_b + most_certain_local_line_b);
+
+	/* Invalidate results that may be affected by the choice of most
+	 * certain line.
+	 */
+	invalidate_min = most_certain_local_line_b - max_search_distance_b;
+	invalidate_max = most_certain_local_line_b + max_search_distance_b + 1;
+	if (invalidate_min < 0)
+		invalidate_min = 0;
+	if (invalidate_max > length_b)
+		invalidate_max = length_b;
+
+	/* As the fingerprint in A has changed, discard previously calculated
+	 * similarity values with that fingerprint.
+	 */
+	for (i = invalidate_min; i < invalidate_max; ++i) {
+		closest_local_line_a = map_line_number(
+			i + start_b, map_line_number_in_b_to_a) - start_a;
+
+		/* Check that the lines in A and B are close enough that there
+		 * is a similarity value for them.
+		 */
+		if (abs(most_certain_line_a - start_a - closest_local_line_a) >
+			max_search_distance_a) {
+			continue;
+		}
+
+		*get_similarity(similarities, most_certain_line_a - start_a,
+				i, closest_local_line_a,
+				max_search_distance_a) = -1;
+	}
+
+	/* More invalidating of results that may be affected by the choice of
+	 * most certain line.
+	 * Discard the matches for lines in B that are currently matched with a
+	 * line in A such that their ordering contradicts the ordering imposed
+	 * by the choice of most certain line.
+	 */
+	for (i = most_certain_local_line_b - 1; i >= invalidate_min; --i) {
+		/* In this loop we discard results for lines in B that are
+		 * before most-certain-line-B but are matched with a line in A
+		 * that is after most-certain-line-A.
+		 */
+		if (certainties[i] >= 0 &&
+		    (result[i] >= most_certain_line_a ||
+		     second_best_result[i] >= most_certain_line_a)) {
+			certainties[i] = CERTAINTY_NOT_CALCULATED;
+		}
+	}
+	for (i = most_certain_local_line_b + 1; i < invalidate_max; ++i) {
+		/* In this loop we discard results for lines in B that are
+		 * after most-certain-line-B but are matched with a line in A
+		 * that is before most-certain-line-A.
+		 */
+		if (certainties[i] >= 0 &&
+		    (result[i] <= most_certain_line_a ||
+		     second_best_result[i] <= most_certain_line_a)) {
+			certainties[i] = CERTAINTY_NOT_CALCULATED;
+		}
+	}
+
+	/* Repeat the matching process for lines before the most certain line.
+	 */
+	if (most_certain_local_line_b > 0) {
+		fuzzy_find_matching_lines_recurse(
+			start_a, start_b,
+			most_certain_line_a + 1 - start_a,
+			most_certain_local_line_b,
+			fingerprints_a, fingerprints_b, similarities,
+			certainties, second_best_result, result,
+			max_search_distance_a,
+			max_search_distance_b,
+			map_line_number_in_b_to_a);
+	}
+	/* Repeat the matching process for lines after the most certain line.
+	 */
+	if (most_certain_local_line_b + 1 < length_b) {
+		second_half_start_a = most_certain_line_a;
+		offset_b = most_certain_local_line_b + 1;
+		second_half_start_b = start_b + offset_b;
+		second_half_length_a =
+			length_a + start_a - second_half_start_a;
+		second_half_length_b =
+			length_b + start_b - second_half_start_b;
+		fuzzy_find_matching_lines_recurse(
+			second_half_start_a, second_half_start_b,
+			second_half_length_a, second_half_length_b,
+			fingerprints_a + second_half_start_a - start_a,
+			fingerprints_b + offset_b,
+			similarities +
+				offset_b * (max_search_distance_a * 2 + 1),
+			certainties + offset_b,
+			second_best_result + offset_b, result + offset_b,
+			max_search_distance_a,
+			max_search_distance_b,
+			map_line_number_in_b_to_a);
+	}
+}
+
+/* Find the lines in the parent line range that most closely match the lines in
+ * the target line range. This is accomplished by matching fingerprints in each
+ * blame_origin, and choosing the best matches that preserve the line ordering.
+ * See struct fingerprint for details of fingerprint matching, and
+ * fuzzy_find_matching_lines_recurse for details of preserving line ordering.
+ *
+ * The performance is believed to be O(n log n) in the typical case and O(n^2)
+ * in a pathological case, where n is the number of lines in the target range.
+ */
+static int *fuzzy_find_matching_lines(struct blame_origin *parent,
+				      struct blame_origin *target,
+				      int tlno, int parent_slno, int same,
+				      int parent_len)
+{
+	/* We use the terminology "A" for the left hand side of the diff AKA
+	 * parent, and "B" for the right hand side of the diff AKA target. */
+	int start_a = parent_slno;
+	int length_a = parent_len;
+	int start_b = tlno;
+	int length_b = same - tlno;
+
+	struct line_number_mapping map_line_number_in_b_to_a = {
+		start_a, length_a, start_b, length_b
+	};
+
+	struct fingerprint *fingerprints_a = parent->fingerprints;
+	struct fingerprint *fingerprints_b = target->fingerprints;
+
+	int i, *result, *second_best_result,
+		*certainties, *similarities, similarity_count;
+
+	/*
+	 * max_search_distance_a means that given a line in B, compare it to
+	 * the line in A that is closest to its position, and the lines in A
+	 * that are no greater than max_search_distance_a lines away from the
+	 * closest line in A.
+	 *
+	 * max_search_distance_b is an upper bound on the greatest possible
+	 * distance between lines in B such that they will both be compared
+	 * with the same line in A according to max_search_distance_a.
+	 */
+	int max_search_distance_a = 10, max_search_distance_b;
+
+	if (length_a <= 0)
+		return NULL;
+
+	if (max_search_distance_a >= length_a)
+		max_search_distance_a = length_a ? length_a - 1 : 0;
+
+	max_search_distance_b = ((2 * max_search_distance_a + 1) * length_b
+				 - 1) / length_a;
+
+	result = xcalloc(sizeof(int), length_b);
+	second_best_result = xcalloc(sizeof(int), length_b);
+	certainties = xcalloc(sizeof(int), length_b);
+
+	/* See get_similarity() for details of similarities. */
+	similarity_count = length_b * (max_search_distance_a * 2 + 1);
+	similarities = xcalloc(sizeof(int), similarity_count);
+
+	for (i = 0; i < length_b; ++i) {
+		result[i] = -1;
+		second_best_result[i] = -1;
+		certainties[i] = CERTAINTY_NOT_CALCULATED;
+	}
+
+	for (i = 0; i < similarity_count; ++i)
+		similarities[i] = -1;
+
+	fuzzy_find_matching_lines_recurse(start_a, start_b,
+					  length_a, length_b,
+					  fingerprints_a + start_a,
+					  fingerprints_b + start_b,
+					  similarities,
+					  certainties,
+					  second_best_result,
+					  result,
+					  max_search_distance_a,
+					  max_search_distance_b,
+					  &map_line_number_in_b_to_a);
+
+	free(similarities);
+	free(certainties);
+	free(second_best_result);
+
+	return result;
+}
+
+static void fill_origin_fingerprints(struct blame_origin *o)
+{
+	int *line_starts;
+
+	if (o->fingerprints)
+		return;
+	o->num_lines = find_line_starts(&line_starts, o->file.ptr,
+					o->file.size);
+	o->fingerprints = xcalloc(sizeof(struct fingerprint), o->num_lines);
+	get_line_fingerprints(o->fingerprints, o->file.ptr, line_starts,
+			      0, o->num_lines);
+	free(line_starts);
+}
+
+static void drop_origin_fingerprints(struct blame_origin *o)
+{
+	if (o->fingerprints) {
+		free_line_fingerprints(o->fingerprints, o->num_lines);
+		o->num_lines = 0;
+		FREE_AND_NULL(o->fingerprints);
+	}
+}
+
 /*
  * Given an origin, prepare mmfile_t structure to be used by the
  * diff machinery
  */
 static void fill_origin_blob(struct diff_options *opt,
-			     struct blame_origin *o, mmfile_t *file, int *num_read_blob)
+			     struct blame_origin *o, mmfile_t *file,
+			     int *num_read_blob, int fill_fingerprints)
 {
 	if (!o->file.ptr) {
 		enum object_type type;
@@ -340,11 +1035,14 @@ static void fill_origin_blob(struct diff_options *opt,
 	}
 	else
 		*file = o->file;
+	if (fill_fingerprints)
+		fill_origin_fingerprints(o);
 }
 
 static void drop_origin_blob(struct blame_origin *o)
 {
 	FREE_AND_NULL(o->file.ptr);
+	drop_origin_fingerprints(o);
 }
 
 /*
@@ -480,7 +1178,9 @@ void blame_coalesce(struct blame_scoreboard *sb)
 
 	for (ent = sb->ent; ent && (next = ent->next); ent = next) {
 		if (ent->suspect == next->suspect &&
-		    ent->s_lno + ent->num_lines == next->s_lno) {
+		    ent->s_lno + ent->num_lines == next->s_lno &&
+		    ent->ignored == next->ignored &&
+		    ent->unblamable == next->unblamable) {
 			ent->num_lines += next->num_lines;
 			ent->next = next->next;
 			blame_origin_decref(next->suspect);
@@ -730,8 +1430,14 @@ static void split_overlap(struct blame_entry *split,
 			  struct blame_origin *parent)
 {
 	int chunk_end_lno;
+	int i;
 	memset(split, 0, sizeof(struct blame_entry [3]));
 
+	for (i = 0; i < 3; i++) {
+		split[i].ignored = e->ignored;
+		split[i].unblamable = e->unblamable;
+	}
+
 	if (e->s_lno < tlno) {
 		/* there is a pre-chunk part not blamed on parent */
 		split[0].suspect = blame_origin_incref(e->suspect);
@@ -840,6 +1546,164 @@ static struct blame_entry *reverse_blame(struct blame_entry *head,
 }
 
 /*
+ * Splits a blame entry into two entries at 'len' lines.  The original 'e'
+ * consists of len lines, i.e. [e->lno, e->lno + len), and the second part,
+ * which is returned, consists of the remainder: [e->lno + len, e->lno +
+ * e->num_lines).  The caller needs to sort out the reference counting for the
+ * new entry's suspect.
+ */
+static struct blame_entry *split_blame_at(struct blame_entry *e, int len,
+					  struct blame_origin *new_suspect)
+{
+	struct blame_entry *n = xcalloc(1, sizeof(struct blame_entry));
+
+	n->suspect = new_suspect;
+	n->ignored = e->ignored;
+	n->unblamable = e->unblamable;
+	n->lno = e->lno + len;
+	n->s_lno = e->s_lno + len;
+	n->num_lines = e->num_lines - len;
+	e->num_lines = len;
+	e->score = 0;
+	return n;
+}
+
+struct blame_line_tracker {
+	int is_parent;
+	int s_lno;
+};
+
+static int are_lines_adjacent(struct blame_line_tracker *first,
+			      struct blame_line_tracker *second)
+{
+	return first->is_parent == second->is_parent &&
+	       first->s_lno + 1 == second->s_lno;
+}
+
+static int scan_parent_range(struct fingerprint *p_fps,
+			     struct fingerprint *t_fps, int t_idx,
+			     int from, int nr_lines)
+{
+	int sim, p_idx;
+	#define FINGERPRINT_FILE_THRESHOLD	10
+	int best_sim_val = FINGERPRINT_FILE_THRESHOLD;
+	int best_sim_idx = -1;
+
+	for (p_idx = from; p_idx < from + nr_lines; p_idx++) {
+		sim = fingerprint_similarity(&t_fps[t_idx], &p_fps[p_idx]);
+		if (sim < best_sim_val)
+			continue;
+		/* Break ties with the closest-to-target line number */
+		if (sim == best_sim_val && best_sim_idx != -1 &&
+		    abs(best_sim_idx - t_idx) < abs(p_idx - t_idx))
+			continue;
+		best_sim_val = sim;
+		best_sim_idx = p_idx;
+	}
+	return best_sim_idx;
+}
+
+/*
+ * The first pass checks the blame entry (from the target) against the parent's
+ * diff chunk.  If that fails for a line, the second pass tries to match that
+ * line to any part of parent file.  That catches cases where a change was
+ * broken into two chunks by 'context.'
+ */
+static void guess_line_blames(struct blame_origin *parent,
+			      struct blame_origin *target,
+			      int tlno, int offset, int same, int parent_len,
+			      struct blame_line_tracker *line_blames)
+{
+	int i, best_idx, target_idx;
+	int parent_slno = tlno + offset;
+	int *fuzzy_matches;
+
+	fuzzy_matches = fuzzy_find_matching_lines(parent, target,
+						  tlno, parent_slno, same,
+						  parent_len);
+	for (i = 0; i < same - tlno; i++) {
+		target_idx = tlno + i;
+		if (fuzzy_matches && fuzzy_matches[i] >= 0) {
+			best_idx = fuzzy_matches[i];
+		} else {
+			best_idx = scan_parent_range(parent->fingerprints,
+						     target->fingerprints,
+						     target_idx, 0,
+						     parent->num_lines);
+		}
+		if (best_idx >= 0) {
+			line_blames[i].is_parent = 1;
+			line_blames[i].s_lno = best_idx;
+		} else {
+			line_blames[i].is_parent = 0;
+			line_blames[i].s_lno = target_idx;
+		}
+	}
+	free(fuzzy_matches);
+}
+
+/*
+ * This decides which parts of a blame entry go to the parent (added to the
+ * ignoredp list) and which stay with the target (added to the diffp list).  The
+ * actual decision was made in a separate heuristic function, and those answers
+ * for the lines in 'e' are in line_blames.  This consumes e, essentially
+ * putting it on a list.
+ *
+ * Note that the blame entries on the ignoredp list are not necessarily sorted
+ * with respect to the parent's line numbers yet.
+ */
+static void ignore_blame_entry(struct blame_entry *e,
+			       struct blame_origin *parent,
+			       struct blame_entry **diffp,
+			       struct blame_entry **ignoredp,
+			       struct blame_line_tracker *line_blames)
+{
+	int entry_len, nr_lines, i;
+
+	/*
+	 * We carve new entries off the front of e.  Each entry comes from a
+	 * contiguous chunk of lines: adjacent lines from the same origin
+	 * (either the parent or the target).
+	 */
+	entry_len = 1;
+	nr_lines = e->num_lines;	/* e changes in the loop */
+	for (i = 0; i < nr_lines; i++) {
+		struct blame_entry *next = NULL;
+
+		/*
+		 * We are often adjacent to the next line - only split the blame
+		 * entry when we have to.
+		 */
+		if (i + 1 < nr_lines) {
+			if (are_lines_adjacent(&line_blames[i],
+					       &line_blames[i + 1])) {
+				entry_len++;
+				continue;
+			}
+			next = split_blame_at(e, entry_len,
+					      blame_origin_incref(e->suspect));
+		}
+		if (line_blames[i].is_parent) {
+			e->ignored = 1;
+			blame_origin_decref(e->suspect);
+			e->suspect = blame_origin_incref(parent);
+			e->s_lno = line_blames[i - entry_len + 1].s_lno;
+			e->next = *ignoredp;
+			*ignoredp = e;
+		} else {
+			e->unblamable = 1;
+			/* e->s_lno is already in the target's address space. */
+			e->next = *diffp;
+			*diffp = e;
+		}
+		assert(e->num_lines == entry_len);
+		e = next;
+		entry_len = 1;
+	}
+	assert(!e);
+}
+
+/*
  * Process one hunk from the patch between the current suspect for
  * blame_entry e and its parent.  This first blames any unfinished
  * entries before the chunk (which is where target and parent start
@@ -848,13 +1712,20 @@ static struct blame_entry *reverse_blame(struct blame_entry *head,
  * -C options may lead to overlapping/duplicate source line number
  * ranges, all we can rely on from sorting/merging is the order of the
  * first suspect line number.
+ *
+ * tlno: line number in the target where this chunk begins
+ * same: line number in the target where this chunk ends
+ * offset: add to tlno to get the chunk starting point in the parent
+ * parent_len: number of lines in the parent chunk
  */
 static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
-			int tlno, int offset, int same,
-			struct blame_origin *parent)
+			int tlno, int offset, int same, int parent_len,
+			struct blame_origin *parent,
+			struct blame_origin *target, int ignore_diffs)
 {
 	struct blame_entry *e = **srcq;
-	struct blame_entry *samep = NULL, *diffp = NULL;
+	struct blame_entry *samep = NULL, *diffp = NULL, *ignoredp = NULL;
+	struct blame_line_tracker *line_blames = NULL;
 
 	while (e && e->s_lno < tlno) {
 		struct blame_entry *next = e->next;
@@ -865,14 +1736,9 @@ static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
 		 */
 		if (e->s_lno + e->num_lines > tlno) {
 			/* Move second half to a new record */
-			int len = tlno - e->s_lno;
-			struct blame_entry *n = xcalloc(1, sizeof (struct blame_entry));
-			n->suspect = e->suspect;
-			n->lno = e->lno + len;
-			n->s_lno = e->s_lno + len;
-			n->num_lines = e->num_lines - len;
-			e->num_lines = len;
-			e->score = 0;
+			struct blame_entry *n;
+
+			n = split_blame_at(e, tlno - e->s_lno, e->suspect);
 			/* Push new record to diffp */
 			n->next = diffp;
 			diffp = n;
@@ -908,6 +1774,14 @@ static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
 	 */
 	samep = NULL;
 	diffp = NULL;
+
+	if (ignore_diffs && same - tlno > 0) {
+		line_blames = xcalloc(sizeof(struct blame_line_tracker),
+				      same - tlno);
+		guess_line_blames(parent, target, tlno, offset, same,
+				  parent_len, line_blames);
+	}
+
 	while (e && e->s_lno < same) {
 		struct blame_entry *next = e->next;
 
@@ -919,22 +1793,37 @@ static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
 			 * Move second half to a new record to be
 			 * processed by later chunks
 			 */
-			int len = same - e->s_lno;
-			struct blame_entry *n = xcalloc(1, sizeof (struct blame_entry));
-			n->suspect = blame_origin_incref(e->suspect);
-			n->lno = e->lno + len;
-			n->s_lno = e->s_lno + len;
-			n->num_lines = e->num_lines - len;
-			e->num_lines = len;
-			e->score = 0;
+			struct blame_entry *n;
+
+			n = split_blame_at(e, same - e->s_lno,
+					   blame_origin_incref(e->suspect));
 			/* Push new record to samep */
 			n->next = samep;
 			samep = n;
 		}
-		e->next = diffp;
-		diffp = e;
+		if (ignore_diffs) {
+			ignore_blame_entry(e, parent, &diffp, &ignoredp,
+					   line_blames + e->s_lno - tlno);
+		} else {
+			e->next = diffp;
+			diffp = e;
+		}
 		e = next;
 	}
+	free(line_blames);
+	if (ignoredp) {
+		/*
+		 * Note ignoredp is not sorted yet, and thus neither is dstq.
+		 * That list must be sorted before we queue_blames().  We defer
+		 * sorting until after all diff hunks are processed, so that
+		 * guess_line_blames() can pick *any* line in the parent.  The
+		 * slight drawback is that we end up sorting all blame entries
+		 * passed to the parent, including those that are unrelated to
+		 * changes made by the ignored commit.
+		 */
+		**dstq = reverse_blame(ignoredp, **dstq);
+		*dstq = &ignoredp->next;
+	}
 	**srcq = reverse_blame(diffp, reverse_blame(samep, e));
 	/* Move across elements that are in the unblamable portion */
 	if (diffp)
@@ -943,7 +1832,9 @@ static void blame_chunk(struct blame_entry ***dstq, struct blame_entry ***srcq,
 
 struct blame_chunk_cb_data {
 	struct blame_origin *parent;
+	struct blame_origin *target;
 	long offset;
+	int ignore_diffs;
 	struct blame_entry **dstq;
 	struct blame_entry **srcq;
 };
@@ -956,7 +1847,8 @@ static int blame_chunk_cb(long start_a, long count_a,
 	if (start_a - start_b != d->offset)
 		die("internal error in blame::blame_chunk_cb");
 	blame_chunk(&d->dstq, &d->srcq, start_b, start_a - start_b,
-		    start_b + count_b, d->parent);
+		    start_b + count_b, count_a, d->parent, d->target,
+		    d->ignore_diffs);
 	d->offset = start_a + count_a - (start_b + count_b);
 	return 0;
 }
@@ -968,7 +1860,7 @@ static int blame_chunk_cb(long start_a, long count_a,
  */
 static void pass_blame_to_parent(struct blame_scoreboard *sb,
 				 struct blame_origin *target,
-				 struct blame_origin *parent)
+				 struct blame_origin *parent, int ignore_diffs)
 {
 	mmfile_t file_p, file_o;
 	struct blame_chunk_cb_data d;
@@ -978,11 +1870,15 @@ static void pass_blame_to_parent(struct blame_scoreboard *sb,
 		return; /* nothing remains for this target */
 
 	d.parent = parent;
+	d.target = target;
 	d.offset = 0;
+	d.ignore_diffs = ignore_diffs;
 	d.dstq = &newdest; d.srcq = &target->suspects;
 
-	fill_origin_blob(&sb->revs->diffopt, parent, &file_p, &sb->num_read_blob);
-	fill_origin_blob(&sb->revs->diffopt, target, &file_o, &sb->num_read_blob);
+	fill_origin_blob(&sb->revs->diffopt, parent, &file_p,
+			 &sb->num_read_blob, ignore_diffs);
+	fill_origin_blob(&sb->revs->diffopt, target, &file_o,
+			 &sb->num_read_blob, ignore_diffs);
 	sb->num_get_patch++;
 
 	if (diff_hunks(&file_p, &file_o, blame_chunk_cb, &d, sb->xdl_opts))
@@ -990,8 +1886,13 @@ static void pass_blame_to_parent(struct blame_scoreboard *sb,
 		    oid_to_hex(&parent->commit->object.oid),
 		    oid_to_hex(&target->commit->object.oid));
 	/* The rest are the same as the parent */
-	blame_chunk(&d.dstq, &d.srcq, INT_MAX, d.offset, INT_MAX, parent);
+	blame_chunk(&d.dstq, &d.srcq, INT_MAX, d.offset, INT_MAX, 0,
+		    parent, target, 0);
 	*d.dstq = NULL;
+	if (ignore_diffs)
+		newdest = llist_mergesort(newdest, get_next_blame,
+					  set_next_blame,
+					  compare_blame_suspect);
 	queue_blames(sb, parent, newdest);
 
 	return;
@@ -1188,7 +2089,8 @@ static void find_move_in_parent(struct blame_scoreboard *sb,
 	if (!unblamed)
 		return; /* nothing remains for this target */
 
-	fill_origin_blob(&sb->revs->diffopt, parent, &file_p, &sb->num_read_blob);
+	fill_origin_blob(&sb->revs->diffopt, parent, &file_p,
+			 &sb->num_read_blob, 0);
 	if (!file_p.ptr)
 		return;
 
@@ -1317,7 +2219,8 @@ static void find_copy_in_parent(struct blame_scoreboard *sb,
 			norigin = get_origin(parent, p->one->path);
 			oidcpy(&norigin->blob_oid, &p->one->oid);
 			norigin->mode = p->one->mode;
-			fill_origin_blob(&sb->revs->diffopt, norigin, &file_p, &sb->num_read_blob);
+			fill_origin_blob(&sb->revs->diffopt, norigin, &file_p,
+					 &sb->num_read_blob, 0);
 			if (!file_p.ptr)
 				continue;
 
@@ -1495,12 +2398,35 @@ static void pass_blame(struct blame_scoreboard *sb, struct blame_origin *origin,
 			blame_origin_incref(porigin);
 			origin->previous = porigin;
 		}
-		pass_blame_to_parent(sb, origin, porigin);
+		pass_blame_to_parent(sb, origin, porigin, 0);
 		if (!origin->suspects)
 			goto finish;
 	}
 
 	/*
+	 * Pass remaining suspects for ignored commits to their parents.
+	 */
+	if (oidset_contains(&sb->ignore_list, &commit->object.oid)) {
+		for (i = 0, sg = first_scapegoat(revs, commit, sb->reverse);
+		     i < num_sg && sg;
+		     sg = sg->next, i++) {
+			struct blame_origin *porigin = sg_origin[i];
+
+			if (!porigin)
+				continue;
+			pass_blame_to_parent(sb, origin, porigin, 1);
+			/*
+			 * Preemptively drop porigin so we can refresh the
+			 * fingerprints if we use the parent again, which can
+			 * occur if you ignore back-to-back commits.
+			 */
+			drop_origin_blob(porigin);
+			if (!origin->suspects)
+				goto finish;
+		}
+	}
+
+	/*
 	 * Optionally find moves in parents' files.
 	 */
 	if (opt & PICKAXE_BLAME_MOVE) {
@@ -1640,37 +2566,14 @@ void assign_blame(struct blame_scoreboard *sb, int opt)
 	}
 }
 
-static const char *get_next_line(const char *start, const char *end)
-{
-	const char *nl = memchr(start, '\n', end - start);
-	return nl ? nl + 1 : end;
-}
-
 /*
  * To allow quick access to the contents of nth line in the
  * final image, prepare an index in the scoreboard.
  */
 static int prepare_lines(struct blame_scoreboard *sb)
 {
-	const char *buf = sb->final_buf;
-	unsigned long len = sb->final_buf_size;
-	const char *end = buf + len;
-	const char *p;
-	int *lineno;
-	int num = 0;
-
-	for (p = buf; p < end; p = get_next_line(p, end))
-		num++;
-
-	ALLOC_ARRAY(sb->lineno, num + 1);
-	lineno = sb->lineno;
-
-	for (p = buf; p < end; p = get_next_line(p, end))
-		*lineno++ = p - buf;
-
-	*lineno = len;
-
-	sb->num_lines = num;
+	sb->num_lines = find_line_starts(&sb->lineno, sb->final_buf,
+					 sb->final_buf_size);
 	return sb->num_lines;
 }
 
diff --git a/blame.h b/blame.h
index d62f80f..4a9e127 100644
--- a/blame.h
+++ b/blame.h
@@ -51,6 +51,8 @@ struct blame_origin {
 	 */
 	struct blame_entry *suspects;
 	mmfile_t file;
+	int num_lines;
+	void *fingerprints;
 	struct object_id blob_oid;
 	unsigned short mode;
 	/* guilty gets set when shipping any suspects to the final
@@ -92,6 +94,8 @@ struct blame_entry {
 	 * scanning the lines over and over.
 	 */
 	unsigned score;
+	int ignored;
+	int unblamable;
 };
 
 /*
@@ -117,6 +121,8 @@ struct blame_scoreboard {
 	/* linked list of blames */
 	struct blame_entry *ent;
 
+	struct oidset ignore_list;
+
 	/* look-up a line in the final buffer */
 	int num_lines;
 	int *lineno;
diff --git a/builtin/blame.c b/builtin/blame.c
index 50e3d4a..b6534d4 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -53,6 +53,9 @@ static int no_whole_file_rename;
 static int show_progress;
 static char repeated_meta_color[COLOR_MAXLEN];
 static int coloring_mode;
+static struct string_list ignore_revs_file_list = STRING_LIST_INIT_NODUP;
+static int mark_unblamable_lines;
+static int mark_ignored_lines;
 
 static struct date_mode blame_date_mode = { DATE_ISO8601 };
 static size_t blame_date_width;
@@ -480,6 +483,14 @@ static void emit_other(struct blame_scoreboard *sb, struct blame_entry *ent, int
 			}
 		}
 
+		if (mark_unblamable_lines && ent->unblamable) {
+			length--;
+			putchar('*');
+		}
+		if (mark_ignored_lines && ent->ignored) {
+			length--;
+			putchar('?');
+		}
 		printf("%.*s", length, hex);
 		if (opt & OUTPUT_ANNOTATE_COMPAT) {
 			const char *name;
@@ -696,6 +707,24 @@ static int git_blame_config(const char *var, const char *value, void *cb)
 		parse_date_format(value, &blame_date_mode);
 		return 0;
 	}
+	if (!strcmp(var, "blame.ignorerevsfile")) {
+		const char *str;
+		int ret;
+
+		ret = git_config_pathname(&str, var, value);
+		if (ret)
+			return ret;
+		string_list_insert(&ignore_revs_file_list, str);
+		return 0;
+	}
+	if (!strcmp(var, "blame.markunblamablelines")) {
+		mark_unblamable_lines = git_config_bool(var, value);
+		return 0;
+	}
+	if (!strcmp(var, "blame.markignoredlines")) {
+		mark_ignored_lines = git_config_bool(var, value);
+		return 0;
+	}
 	if (!strcmp(var, "color.blame.repeatedlines")) {
 		if (color_parse_mem(value, strlen(value), repeated_meta_color))
 			warning(_("invalid color '%s' in color.blame.repeatedLines"),
@@ -775,6 +804,27 @@ static int is_a_rev(const char *name)
 	return OBJ_NONE < oid_object_info(the_repository, &oid, NULL);
 }
 
+static void build_ignorelist(struct blame_scoreboard *sb,
+			     struct string_list *ignore_revs_file_list,
+			     struct string_list *ignore_rev_list)
+{
+	struct string_list_item *i;
+	struct object_id oid;
+
+	oidset_init(&sb->ignore_list, 0);
+	for_each_string_list_item(i, ignore_revs_file_list) {
+		if (!strcmp(i->string, ""))
+			oidset_clear(&sb->ignore_list);
+		else
+			oidset_parse_file(&sb->ignore_list, i->string);
+	}
+	for_each_string_list_item(i, ignore_rev_list) {
+		if (get_oid_committish(i->string, &oid))
+			die(_("cannot find revision %s to ignore"), i->string);
+		oidset_insert(&sb->ignore_list, &oid);
+	}
+}
+
 int cmd_blame(int argc, const char **argv, const char *prefix)
 {
 	struct rev_info revs;
@@ -786,6 +836,7 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
 	struct progress_info pi = { NULL, 0 };
 
 	struct string_list range_list = STRING_LIST_INIT_NODUP;
+	struct string_list ignore_rev_list = STRING_LIST_INIT_NODUP;
 	int output_option = 0, opt = 0;
 	int show_stats = 0;
 	const char *revs_file = NULL;
@@ -807,6 +858,8 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
 		OPT_BIT('s', NULL, &output_option, N_("Suppress author name and timestamp (Default: off)"), OUTPUT_NO_AUTHOR),
 		OPT_BIT('e', "show-email", &output_option, N_("Show author email instead of name (Default: off)"), OUTPUT_SHOW_EMAIL),
 		OPT_BIT('w', NULL, &xdl_opts, N_("Ignore whitespace differences"), XDF_IGNORE_WHITESPACE),
+		OPT_STRING_LIST(0, "ignore-rev", &ignore_rev_list, N_("rev"), N_("Ignore <rev> when blaming")),
+		OPT_STRING_LIST(0, "ignore-revs-file", &ignore_revs_file_list, N_("file"), N_("Ignore revisions from <file>")),
 		OPT_BIT(0, "color-lines", &output_option, N_("color redundant metadata from previous line differently"), OUTPUT_COLOR_LINE),
 		OPT_BIT(0, "color-by-age", &output_option, N_("color lines by age"), OUTPUT_SHOW_AGE_WITH_COLOR),
 
@@ -1012,6 +1065,9 @@ parse_done:
 	sb.contents_from = contents_from;
 	sb.reverse = reverse;
 	sb.repo = the_repository;
+	build_ignorelist(&sb, &ignore_revs_file_list, &ignore_rev_list);
+	string_list_clear(&ignore_revs_file_list, 0);
+	string_list_clear(&ignore_rev_list, 0);
 	setup_scoreboard(&sb, path, &o);
 	lno = sb.num_lines;
 
diff --git a/fsck.c b/fsck.c
index 117c4a9..cdb7d8d 100644
--- a/fsck.c
+++ b/fsck.c
@@ -181,41 +181,6 @@ static int fsck_msg_type(enum fsck_msg_id msg_id,
 	return msg_type;
 }
 
-static void init_skiplist(struct fsck_options *options, const char *path)
-{
-	FILE *fp;
-	struct strbuf sb = STRBUF_INIT;
-	struct object_id oid;
-
-	fp = fopen(path, "r");
-	if (!fp)
-		die("Could not open skip list: %s", path);
-	while (!strbuf_getline(&sb, fp)) {
-		const char *p;
-		const char *hash;
-
-		/*
-		 * Allow trailing comments, leading whitespace
-		 * (including before commits), and empty or whitespace
-		 * only lines.
-		 */
-		hash = strchr(sb.buf, '#');
-		if (hash)
-			strbuf_setlen(&sb, hash - sb.buf);
-		strbuf_trim(&sb);
-		if (!sb.len)
-			continue;
-
-		if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0')
-			die("Invalid SHA-1: %s", sb.buf);
-		oidset_insert(&options->skiplist, &oid);
-	}
-	if (ferror(fp))
-		die_errno("Could not read '%s'", path);
-	fclose(fp);
-	strbuf_release(&sb);
-}
-
 static int parse_msg_type(const char *str)
 {
 	if (!strcmp(str, "error"))
@@ -284,7 +249,7 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
 		if (!strcmp(buf, "skiplist")) {
 			if (equal == len)
 				die("skiplist requires a path");
-			init_skiplist(options, buf + equal + 1);
+			oidset_parse_file(&options->skiplist, buf + equal + 1);
 			buf += len + 1;
 			continue;
 		}
diff --git a/oidset.c b/oidset.c
index 8bdecb1..f63ce81 100644
--- a/oidset.c
+++ b/oidset.c
@@ -35,3 +35,38 @@ void oidset_clear(struct oidset *set)
 	kh_release_oid_set(&set->set);
 	oidset_init(set, 0);
 }
+
+void oidset_parse_file(struct oidset *set, const char *path)
+{
+	FILE *fp;
+	struct strbuf sb = STRBUF_INIT;
+	struct object_id oid;
+
+	fp = fopen(path, "r");
+	if (!fp)
+		die("could not open object name list: %s", path);
+	while (!strbuf_getline(&sb, fp)) {
+		const char *p;
+		const char *name;
+
+		/*
+		 * Allow trailing comments, leading whitespace
+		 * (including before commits), and empty or whitespace
+		 * only lines.
+		 */
+		name = strchr(sb.buf, '#');
+		if (name)
+			strbuf_setlen(&sb, name - sb.buf);
+		strbuf_trim(&sb);
+		if (!sb.len)
+			continue;
+
+		if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0')
+			die("invalid object name: %s", sb.buf);
+		oidset_insert(set, &oid);
+	}
+	if (ferror(fp))
+		die_errno("Could not read '%s'", path);
+	fclose(fp);
+	strbuf_release(&sb);
+}
diff --git a/oidset.h b/oidset.h
index 505fad5..5346563 100644
--- a/oidset.h
+++ b/oidset.h
@@ -61,6 +61,14 @@ int oidset_remove(struct oidset *set, const struct object_id *oid);
  */
 void oidset_clear(struct oidset *set);
 
+/**
+ * Add the contents of the file 'path' to an initialized oidset.  Each line is
+ * an unabbreviated object name.  Comments begin with '#', and trailing comments
+ * are allowed.  Leading whitespace and empty or white-space only lines are
+ * ignored.
+ */
+void oidset_parse_file(struct oidset *set, const char *path);
+
 struct oidset_iter {
 	kh_oid_set_t *set;
 	khiter_t iter;
diff --git a/t/t5504-fetch-receive-strict.sh b/t/t5504-fetch-receive-strict.sh
index 7bc7068..fdfe179 100755
--- a/t/t5504-fetch-receive-strict.sh
+++ b/t/t5504-fetch-receive-strict.sh
@@ -164,9 +164,9 @@ test_expect_success 'fsck with unsorted skipList' '
 test_expect_success 'fsck with invalid or bogus skipList input' '
 	git -c fsck.skipList=/dev/null -c fsck.missingEmail=ignore fsck &&
 	test_must_fail git -c fsck.skipList=does-not-exist -c fsck.missingEmail=ignore fsck 2>err &&
-	test_i18ngrep "Could not open skip list: does-not-exist" err &&
+	test_i18ngrep "could not open.*: does-not-exist" err &&
 	test_must_fail git -c fsck.skipList=.git/config -c fsck.missingEmail=ignore fsck 2>err &&
-	test_i18ngrep "Invalid SHA-1: \[core\]" err
+	test_i18ngrep "invalid object name: \[core\]" err
 '
 
 test_expect_success 'fsck with other accepted skipList input (comments & empty lines)' '
@@ -193,7 +193,7 @@ test_expect_success 'fsck no garbage output from comments & empty lines errors'
 test_expect_success 'fsck with invalid abbreviated skipList input' '
 	echo $commit | test_copy_bytes 20 >SKIP.abbreviated &&
 	test_must_fail git -c fsck.skipList=SKIP.abbreviated fsck 2>err-abbreviated &&
-	test_i18ngrep "^fatal: Invalid SHA-1: " err-abbreviated
+	test_i18ngrep "^fatal: invalid object name: " err-abbreviated
 '
 
 test_expect_success 'fsck with exhaustive accepted skipList input (various types of comments etc.)' '
@@ -226,10 +226,10 @@ test_expect_success 'push with receive.fsck.skipList' '
 	test_must_fail git push --porcelain dst bogus &&
 	git --git-dir=dst/.git config receive.fsck.skipList does-not-exist &&
 	test_must_fail git push --porcelain dst bogus 2>err &&
-	test_i18ngrep "Could not open skip list: does-not-exist" err &&
+	test_i18ngrep "could not open.*: does-not-exist" err &&
 	git --git-dir=dst/.git config receive.fsck.skipList config &&
 	test_must_fail git push --porcelain dst bogus 2>err &&
-	test_i18ngrep "Invalid SHA-1: \[core\]" err &&
+	test_i18ngrep "invalid object name: \[core\]" err &&
 
 	git --git-dir=dst/.git config receive.fsck.skipList SKIP &&
 	git push --porcelain dst bogus
@@ -255,10 +255,10 @@ test_expect_success 'fetch with fetch.fsck.skipList' '
 	test_must_fail git --git-dir=dst/.git fetch "file://$(pwd)" $refspec &&
 	git --git-dir=dst/.git config fetch.fsck.skipList does-not-exist &&
 	test_must_fail git --git-dir=dst/.git fetch "file://$(pwd)" $refspec 2>err &&
-	test_i18ngrep "Could not open skip list: does-not-exist" err &&
+	test_i18ngrep "could not open.*: does-not-exist" err &&
 	git --git-dir=dst/.git config fetch.fsck.skipList dst/.git/config &&
 	test_must_fail git --git-dir=dst/.git fetch "file://$(pwd)" $refspec 2>err &&
-	test_i18ngrep "Invalid SHA-1: \[core\]" err &&
+	test_i18ngrep "invalid object name: \[core\]" err &&
 
 	git --git-dir=dst/.git config fetch.fsck.skipList dst/.git/SKIP &&
 	git --git-dir=dst/.git fetch "file://$(pwd)" $refspec
diff --git a/t/t8003-blame-corner-cases.sh b/t/t8003-blame-corner-cases.sh
index c92a47b..1c5fb1d 100755
--- a/t/t8003-blame-corner-cases.sh
+++ b/t/t8003-blame-corner-cases.sh
@@ -275,4 +275,40 @@ test_expect_success 'blame file with CRLF core.autocrlf=true' '
 	grep "A U Thor" actual
 '
 
+# Tests the splitting and merging of blame entries in blame_coalesce().
+# The output of blame is the same, regardless of whether blame_coalesce() runs
+# or not, so we'd likely only notice a problem if blame crashes or assigned
+# blame to the "splitting" commit ('SPLIT' below).
+test_expect_success 'blame coalesce' '
+	cat >giraffe <<-\EOF &&
+	ABC
+	DEF
+	EOF
+	git add giraffe &&
+	git commit -m "original file" &&
+	oid=$(git rev-parse HEAD) &&
+
+	cat >giraffe <<-\EOF &&
+	ABC
+	SPLIT
+	DEF
+	EOF
+	git add giraffe &&
+	git commit -m "interior SPLIT line" &&
+
+	cat >giraffe <<-\EOF &&
+	ABC
+	DEF
+	EOF
+	git add giraffe &&
+	git commit -m "same contents as original" &&
+
+	cat >expect <<-EOF &&
+	$oid 1) ABC
+	$oid 2) DEF
+	EOF
+	git -c core.abbrev=40 blame -s giraffe >actual &&
+	test_cmp expect actual
+'
+
 test_done
diff --git a/t/t8013-blame-ignore-revs.sh b/t/t8013-blame-ignore-revs.sh
new file mode 100755
index 0000000..36dc31e
--- /dev/null
+++ b/t/t8013-blame-ignore-revs.sh
@@ -0,0 +1,274 @@
+#!/bin/sh
+
+test_description='ignore revisions when blaming'
+. ./test-lib.sh
+
+# Creates:
+# 	A--B--X
+# A added line 1 and B added line 2.  X makes changes to those lines.  Sanity
+# check that X is blamed for both lines.
+test_expect_success setup '
+	test_commit A file line1 &&
+
+	echo line2 >>file &&
+	git add file &&
+	test_tick &&
+	git commit -m B &&
+	git tag B &&
+
+	test_write_lines line-one line-two >file &&
+	git add file &&
+	test_tick &&
+	git commit -m X &&
+	git tag X &&
+
+	git blame --line-porcelain file >blame_raw &&
+
+	grep -E "^[0-9a-f]+ [0-9]+ 1" blame_raw | sed -e "s/ .*//" >actual &&
+	git rev-parse X >expect &&
+	test_cmp expect actual &&
+
+	grep -E "^[0-9a-f]+ [0-9]+ 2" blame_raw | sed -e "s/ .*//" >actual &&
+	git rev-parse X >expect &&
+	test_cmp expect actual
+	'
+
+# Ignore X, make sure A is blamed for line 1 and B for line 2.
+test_expect_success ignore_rev_changing_lines '
+	git blame --line-porcelain --ignore-rev X file >blame_raw &&
+
+	grep -E "^[0-9a-f]+ [0-9]+ 1" blame_raw | sed -e "s/ .*//" >actual &&
+	git rev-parse A >expect &&
+	test_cmp expect actual &&
+
+	grep -E "^[0-9a-f]+ [0-9]+ 2" blame_raw | sed -e "s/ .*//" >actual &&
+	git rev-parse B >expect &&
+	test_cmp expect actual
+	'
+
+# For ignored revs that have added 'unblamable' lines, attribute those to the
+# ignored commit.
+# 	A--B--X--Y
+# Where Y changes lines 1 and 2, and adds lines 3 and 4.  The added lines ought
+# to have nothing in common with "line-one" or "line-two", to keep any
+# heuristics from matching them with any lines in the parent.
+test_expect_success ignore_rev_adding_unblamable_lines '
+	test_write_lines line-one-change line-two-changed y3 y4 >file &&
+	git add file &&
+	test_tick &&
+	git commit -m Y &&
+	git tag Y &&
+
+	git rev-parse Y >expect &&
+	git blame --line-porcelain file --ignore-rev Y >blame_raw &&
+
+	grep -E "^[0-9a-f]+ [0-9]+ 3" blame_raw | sed -e "s/ .*//" >actual &&
+	test_cmp expect actual &&
+
+	grep -E "^[0-9a-f]+ [0-9]+ 4" blame_raw | sed -e "s/ .*//" >actual &&
+	test_cmp expect actual
+	'
+
+# Ignore X and Y, both in separate files.  Lines 1 == A, 2 == B.
+test_expect_success ignore_revs_from_files '
+	git rev-parse X >ignore_x &&
+	git rev-parse Y >ignore_y &&
+	git blame --line-porcelain file --ignore-revs-file ignore_x --ignore-revs-file ignore_y >blame_raw &&
+
+	grep -E "^[0-9a-f]+ [0-9]+ 1" blame_raw | sed -e "s/ .*//" >actual &&
+	git rev-parse A >expect &&
+	test_cmp expect actual &&
+
+	grep -E "^[0-9a-f]+ [0-9]+ 2" blame_raw | sed -e "s/ .*//" >actual &&
+	git rev-parse B >expect &&
+	test_cmp expect actual
+	'
+
+# Ignore X from the config option, Y from a file.
+test_expect_success ignore_revs_from_configs_and_files '
+	git config --add blame.ignoreRevsFile ignore_x &&
+	git blame --line-porcelain file --ignore-revs-file ignore_y >blame_raw &&
+
+	grep -E "^[0-9a-f]+ [0-9]+ 1" blame_raw | sed -e "s/ .*//" >actual &&
+	git rev-parse A >expect &&
+	test_cmp expect actual &&
+
+	grep -E "^[0-9a-f]+ [0-9]+ 2" blame_raw | sed -e "s/ .*//" >actual &&
+	git rev-parse B >expect &&
+	test_cmp expect actual
+	'
+
+# Override blame.ignoreRevsFile (ignore_x) with an empty string.  X should be
+# blamed now for lines 1 and 2, since we are no longer ignoring X.
+test_expect_success override_ignore_revs_file '
+	git blame --line-porcelain file --ignore-revs-file "" --ignore-revs-file ignore_y >blame_raw &&
+	git rev-parse X >expect &&
+
+	grep -E "^[0-9a-f]+ [0-9]+ 1" blame_raw | sed -e "s/ .*//" >actual &&
+	test_cmp expect actual &&
+
+	grep -E "^[0-9a-f]+ [0-9]+ 2" blame_raw | sed -e "s/ .*//" >actual &&
+	test_cmp expect actual
+	'
+test_expect_success bad_files_and_revs '
+	test_must_fail git blame file --ignore-rev NOREV 2>err &&
+	test_i18ngrep "cannot find revision NOREV to ignore" err &&
+
+	test_must_fail git blame file --ignore-revs-file NOFILE 2>err &&
+	test_i18ngrep "could not open.*: NOFILE" err &&
+
+	echo NOREV >ignore_norev &&
+	test_must_fail git blame file --ignore-revs-file ignore_norev 2>err &&
+	test_i18ngrep "invalid object name: NOREV" err
+	'
+
+# For ignored revs that have added 'unblamable' lines, mark those lines with a
+# '*'
+# 	A--B--X--Y
+# Lines 3 and 4 are from Y and unblamable.  This was set up in
+# ignore_rev_adding_unblamable_lines.
+test_expect_success mark_unblamable_lines '
+	git config --add blame.markUnblamableLines true &&
+
+	git blame --ignore-rev Y file >blame_raw &&
+	echo "*" >expect &&
+
+	sed -n "3p" blame_raw | cut -c1 >actual &&
+	test_cmp expect actual &&
+
+	sed -n "4p" blame_raw | cut -c1 >actual &&
+	test_cmp expect actual
+	'
+
+# Commit Z will touch the first two lines.  Y touched all four.
+# 	A--B--X--Y--Z
+# The blame output when ignoring Z should be:
+# ?Y ... 1)
+# ?Y ... 2)
+# Y  ... 3)
+# Y  ... 4)
+# We're checking only the first character
+test_expect_success mark_ignored_lines '
+	git config --add blame.markIgnoredLines true &&
+
+	test_write_lines line-one-Z line-two-Z y3 y4 >file &&
+	git add file &&
+	test_tick &&
+	git commit -m Z &&
+	git tag Z &&
+
+	git blame --ignore-rev Z file >blame_raw &&
+	echo "?" >expect &&
+
+	sed -n "1p" blame_raw | cut -c1 >actual &&
+	test_cmp expect actual &&
+
+	sed -n "2p" blame_raw | cut -c1 >actual &&
+	test_cmp expect actual &&
+
+	sed -n "3p" blame_raw | cut -c1 >actual &&
+	! test_cmp expect actual &&
+
+	sed -n "4p" blame_raw | cut -c1 >actual &&
+	! test_cmp expect actual
+	'
+
+# For ignored revs that added 'unblamable' lines and more recent commits changed
+# the blamable lines, mark the unblamable lines with a
+# '*'
+# 	A--B--X--Y--Z
+# Lines 3 and 4 are from Y and unblamable, as set up in
+# ignore_rev_adding_unblamable_lines.  Z changed lines 1 and 2.
+test_expect_success mark_unblamable_lines_intermediate '
+	git config --add blame.markUnblamableLines true &&
+
+	git blame --ignore-rev Y file >blame_raw 2>stderr &&
+	echo "*" >expect &&
+
+	sed -n "3p" blame_raw | cut -c1 >actual &&
+	test_cmp expect actual &&
+
+	sed -n "4p" blame_raw | cut -c1 >actual &&
+	test_cmp expect actual
+	'
+
+# The heuristic called by guess_line_blames() tries to find the size of a
+# blame_entry 'e' in the parent's address space.  Those calculations need to
+# check for negative or zero values for when a blame entry is completely outside
+# the window of the parent's version of a file.
+#
+# This happens when one commit adds several lines (commit B below).  A later
+# commit (C) changes one line in the middle of B's change.  Commit C gets blamed
+# for its change, and that breaks up B's change into multiple blame entries.
+# When processing B, one of the blame_entries is outside A's window (which was
+# zero - it had no lines added on its side of the diff).
+#
+# A--B--C, ignore B to test the ignore heuristic's boundary checks.
+test_expect_success ignored_chunk_negative_parent_size '
+	rm -rf .git/ &&
+	git init &&
+
+	test_write_lines L1 L2 L7 L8 L9 >file &&
+	git add file &&
+	test_tick &&
+	git commit -m A &&
+	git tag A &&
+
+	test_write_lines L1 L2 L3 L4 L5 L6 L7 L8 L9 >file &&
+	git add file &&
+	test_tick &&
+	git commit -m B &&
+	git tag B &&
+
+	test_write_lines L1 L2 L3 L4 xxx L6 L7 L8 L9 >file &&
+	git add file &&
+	test_tick &&
+	git commit -m C &&
+	git tag C &&
+
+	git blame file --ignore-rev B >blame_raw
+	'
+
+# Resetting the repo and creating:
+#
+# A--B--M
+#  \   /
+#   C-+
+#
+# 'A' creates a file.  B changes line 1, and C changes line 9.  M merges.
+test_expect_success ignore_merge '
+	rm -rf .git/ &&
+	git init &&
+
+	test_write_lines L1 L2 L3 L4 L5 L6 L7 L8 L9 >file &&
+	git add file &&
+	test_tick &&
+	git commit -m A &&
+	git tag A &&
+
+	test_write_lines BB L2 L3 L4 L5 L6 L7 L8 L9 >file &&
+	git add file &&
+	test_tick &&
+	git commit -m B &&
+	git tag B &&
+
+	git reset --hard A &&
+	test_write_lines L1 L2 L3 L4 L5 L6 L7 L8 CC >file &&
+	git add file &&
+	test_tick &&
+	git commit -m C &&
+	git tag C &&
+
+	test_merge M B &&
+	git blame --line-porcelain file --ignore-rev M >blame_raw &&
+
+	grep -E "^[0-9a-f]+ [0-9]+ 1" blame_raw | sed -e "s/ .*//" >actual &&
+	git rev-parse B >expect &&
+	test_cmp expect actual &&
+
+	grep -E "^[0-9a-f]+ [0-9]+ 9" blame_raw | sed -e "s/ .*//" >actual &&
+	git rev-parse C >expect &&
+	test_cmp expect actual
+	'
+
+test_done
diff --git a/t/t8014-blame-ignore-fuzzy.sh b/t/t8014-blame-ignore-fuzzy.sh
new file mode 100755
index 0000000..6e61882
--- /dev/null
+++ b/t/t8014-blame-ignore-fuzzy.sh
@@ -0,0 +1,437 @@
+#!/bin/sh
+
+test_description='git blame ignore fuzzy heuristic'
+. ./test-lib.sh
+
+pick_author='s/^[0-9a-f^]* *(\([^ ]*\) .*/\1/'
+
+# Each test is composed of 4 variables:
+# titleN - the test name
+# aN - the initial content
+# bN - the final content
+# expectedN - the line numbers from aN that we expect git blame
+#             on bN to identify, or "Final" if bN itself should
+#             be identified as the origin of that line.
+
+# We start at test 2 because setup will show as test 1
+title2="Regression test for partially overlapping search ranges"
+cat <<EOF >a2
+1
+2
+3
+abcdef
+5
+6
+7
+ijkl
+9
+10
+11
+pqrs
+13
+14
+15
+wxyz
+17
+18
+19
+EOF
+cat <<EOF >b2
+abcde
+ijk
+pqr
+wxy
+EOF
+cat <<EOF >expected2
+4
+8
+12
+16
+EOF
+
+title3="Combine 3 lines into 2"
+cat <<EOF >a3
+if ((maxgrow==0) ||
+	( single_line_field && (field->dcols < maxgrow)) ||
+	(!single_line_field && (field->drows < maxgrow)))
+EOF
+cat <<EOF >b3
+if ((maxgrow == 0) || (single_line_field && (field->dcols < maxgrow)) ||
+	(!single_line_field && (field->drows < maxgrow))) {
+EOF
+cat <<EOF >expected3
+2
+3
+EOF
+
+title4="Add curly brackets"
+cat <<EOF >a4
+	if (rows) *rows = field->rows;
+	if (cols) *cols = field->cols;
+	if (frow) *frow = field->frow;
+	if (fcol) *fcol = field->fcol;
+EOF
+cat <<EOF >b4
+	if (rows) {
+		*rows = field->rows;
+	}
+	if (cols) {
+		*cols = field->cols;
+	}
+	if (frow) {
+		*frow = field->frow;
+	}
+	if (fcol) {
+		*fcol = field->fcol;
+	}
+EOF
+cat <<EOF >expected4
+1
+1
+Final
+2
+2
+Final
+3
+3
+Final
+4
+4
+Final
+EOF
+
+
+title5="Combine many lines and change case"
+cat <<EOF >a5
+for(row=0,pBuffer=field->buf;
+	row<height;
+	row++,pBuffer+=width )
+{
+	if ((len = (int)( After_End_Of_Data( pBuffer, width ) - pBuffer )) > 0)
+	{
+		wmove( win, row, 0 );
+		waddnstr( win, pBuffer, len );
+EOF
+cat <<EOF >b5
+for (Row = 0, PBuffer = field->buf; Row < Height; Row++, PBuffer += Width) {
+	if ((Len = (int)(afterEndOfData(PBuffer, Width) - PBuffer)) > 0) {
+		wmove(win, Row, 0);
+		waddnstr(win, PBuffer, Len);
+EOF
+cat <<EOF >expected5
+1
+5
+7
+8
+EOF
+
+title6="Rename and combine lines"
+cat <<EOF >a6
+bool need_visual_update = ((form != (FORM *)0)      &&
+	(form->status & _POSTED) &&
+	(form->current==field));
+
+if (need_visual_update)
+	Synchronize_Buffer(form);
+
+if (single_line_field)
+{
+	growth = field->cols * amount;
+	if (field->maxgrow)
+		growth = Minimum(field->maxgrow - field->dcols,growth);
+	field->dcols += growth;
+	if (field->dcols == field->maxgrow)
+EOF
+cat <<EOF >b6
+bool NeedVisualUpdate = ((Form != (FORM *)0) && (Form->status & _POSTED) &&
+	(Form->current == field));
+
+if (NeedVisualUpdate) {
+	synchronizeBuffer(Form);
+}
+
+if (SingleLineField) {
+	Growth = field->cols * amount;
+	if (field->maxgrow) {
+		Growth = Minimum(field->maxgrow - field->dcols, Growth);
+	}
+	field->dcols += Growth;
+	if (field->dcols == field->maxgrow) {
+EOF
+cat <<EOF >expected6
+1
+3
+4
+5
+6
+Final
+7
+8
+10
+11
+12
+Final
+13
+14
+EOF
+
+# Both lines match identically so position must be used to tie-break.
+title7="Same line twice"
+cat <<EOF >a7
+abc
+abc
+EOF
+cat <<EOF >b7
+abcd
+abcd
+EOF
+cat <<EOF >expected7
+1
+2
+EOF
+
+title8="Enforce line order"
+cat <<EOF >a8
+abcdef
+ghijkl
+ab
+EOF
+cat <<EOF >b8
+ghijk
+abcd
+EOF
+cat <<EOF >expected8
+2
+3
+EOF
+
+title9="Expand lines and rename variables"
+cat <<EOF >a9
+int myFunction(int ArgumentOne, Thing *ArgTwo, Blah XuglyBug) {
+	Squiggle FabulousResult = squargle(ArgumentOne, *ArgTwo,
+		XuglyBug) + EwwwGlobalWithAReallyLongNameYepTooLong;
+	return FabulousResult * 42;
+}
+EOF
+cat <<EOF >b9
+int myFunction(int argument_one, Thing *arg_asdfgh,
+	Blah xugly_bug) {
+	Squiggle fabulous_result = squargle(argument_one,
+		*arg_asdfgh, xugly_bug)
+		+ g_ewww_global_with_a_really_long_name_yep_too_long;
+	return fabulous_result * 42;
+}
+EOF
+cat <<EOF >expected9
+1
+1
+2
+3
+3
+4
+5
+EOF
+
+title10="Two close matches versus one less close match"
+cat <<EOF >a10
+abcdef
+abcdef
+ghijkl
+EOF
+cat <<EOF >b10
+gh
+abcdefx
+EOF
+cat <<EOF >expected10
+Final
+2
+EOF
+
+# The first line of b matches best with the last line of a, but the overall
+# match is better if we match it with the the first line of a.
+title11="Piggy in the middle"
+cat <<EOF >a11
+abcdefg
+ijklmn
+abcdefgh
+EOF
+cat <<EOF >b11
+abcdefghx
+ijklm
+EOF
+cat <<EOF >expected11
+1
+2
+EOF
+
+title12="No trailing newline"
+printf "abc\ndef" >a12
+printf "abx\nstu" >b12
+cat <<EOF >expected12
+1
+Final
+EOF
+
+title13="Reorder includes"
+cat <<EOF >a13
+#include "c.h"
+#include "b.h"
+#include "a.h"
+#include "e.h"
+#include "d.h"
+EOF
+cat <<EOF >b13
+#include "a.h"
+#include "b.h"
+#include "c.h"
+#include "d.h"
+#include "e.h"
+EOF
+cat <<EOF >expected13
+3
+2
+1
+5
+4
+EOF
+
+last_test=13
+
+test_expect_success setup '
+	for i in $(test_seq 2 $last_test)
+	do
+		# Append each line in a separate commit to make it easy to
+		# check which original line the blame output relates to.
+
+		line_count=0 &&
+		while IFS= read line
+		do
+			line_count=$((line_count+1)) &&
+			echo "$line" >>"$i" &&
+			git add "$i" &&
+			test_tick &&
+			GIT_AUTHOR_NAME="$line_count" git commit -m "$line_count"
+		done <"a$i"
+	done &&
+
+	for i in $(test_seq 2 $last_test)
+	do
+		# Overwrite the files with the final content.
+		cp b$i $i &&
+		git add $i
+	done &&
+	test_tick &&
+
+	# Commit the final content all at once so it can all be
+	# referred to with the same commit ID.
+	GIT_AUTHOR_NAME=Final git commit -m Final &&
+
+	IGNOREME=$(git rev-parse HEAD)
+'
+
+for i in $(test_seq 2 $last_test); do
+	eval title="\$title$i"
+	test_expect_success "$title" \
+	"git blame -M9 --ignore-rev $IGNOREME $i >output &&
+	sed -e \"$pick_author\" output >actual &&
+	test_cmp expected$i actual"
+done
+
+# This invoked a null pointer dereference when the chunk callback was called
+# with a zero length parent chunk and there were no more suspects.
+test_expect_success 'Diff chunks with no suspects' '
+	test_write_lines xy1 A B C xy1 >file &&
+	git add file &&
+	test_tick &&
+	GIT_AUTHOR_NAME=1 git commit -m 1 &&
+
+	test_write_lines xy2 A B xy2 C xy2 >file &&
+	git add file &&
+	test_tick &&
+	GIT_AUTHOR_NAME=2 git commit -m 2 &&
+	REV_2=$(git rev-parse HEAD) &&
+
+	test_write_lines xy3 A >file &&
+	git add file &&
+	test_tick &&
+	GIT_AUTHOR_NAME=3 git commit -m 3 &&
+	REV_3=$(git rev-parse HEAD) &&
+
+	test_write_lines 1 1 >expected &&
+
+	git blame --ignore-rev $REV_2 --ignore-rev $REV_3 file >output &&
+	sed -e "$pick_author" output >actual &&
+
+	test_cmp expected actual
+	'
+
+test_expect_success 'position matching' '
+	test_write_lines abc def >file2 &&
+	git add file2 &&
+	test_tick &&
+	GIT_AUTHOR_NAME=1 git commit -m 1 &&
+
+	test_write_lines abc def abc def >file2 &&
+	git add file2 &&
+	test_tick &&
+	GIT_AUTHOR_NAME=2 git commit -m 2 &&
+
+	test_write_lines abcx defx abcx defx >file2 &&
+	git add file2 &&
+	test_tick &&
+	GIT_AUTHOR_NAME=3 git commit -m 3 &&
+	REV_3=$(git rev-parse HEAD) &&
+
+	test_write_lines abcy defy abcx defx >file2 &&
+	git add file2 &&
+	test_tick &&
+	GIT_AUTHOR_NAME=4 git commit -m 4 &&
+	REV_4=$(git rev-parse HEAD) &&
+
+	test_write_lines 1 1 2 2 >expected &&
+
+	git blame --ignore-rev $REV_3 --ignore-rev $REV_4 file2 >output &&
+	sed -e "$pick_author" output >actual &&
+
+	test_cmp expected actual
+	'
+
+# This fails if each blame entry is processed independently instead of
+# processing each diff change in full.
+test_expect_success 'preserve order' '
+	test_write_lines bcde >file3 &&
+	git add file3 &&
+	test_tick &&
+	GIT_AUTHOR_NAME=1 git commit -m 1 &&
+
+	test_write_lines bcde fghij >file3 &&
+	git add file3 &&
+	test_tick &&
+	GIT_AUTHOR_NAME=2 git commit -m 2 &&
+
+	test_write_lines bcde fghij abcd >file3 &&
+	git add file3 &&
+	test_tick &&
+	GIT_AUTHOR_NAME=3 git commit -m 3 &&
+
+	test_write_lines abcdx fghijx bcdex >file3 &&
+	git add file3 &&
+	test_tick &&
+	GIT_AUTHOR_NAME=4 git commit -m 4 &&
+	REV_4=$(git rev-parse HEAD) &&
+
+	test_write_lines abcdx fghijy bcdex >file3 &&
+	git add file3 &&
+	test_tick &&
+	GIT_AUTHOR_NAME=5 git commit -m 5 &&
+	REV_5=$(git rev-parse HEAD) &&
+
+	test_write_lines 1 2 3 >expected &&
+
+	git blame --ignore-rev $REV_4 --ignore-rev $REV_5 file3 >output &&
+	sed -e "$pick_author" output >actual &&
+
+	test_cmp expected actual
+	'
+
+test_done
author	Junio C Hamano <gitster@pobox.com>	2019-07-19 18:30:20 (GMT)
committer	Junio C Hamano <gitster@pobox.com>	2019-07-19 18:30:20 (GMT)
commit	209f0755934a0c9b448408f9b7c9849c15041ecc (patch)
tree	ea04d9cd2847c3753c778b4dc5b976f37b19c910
parent	c62bff2cedead7df6fc55a745f073715da30727e (diff)
parent	78fafbb2800915047ad2f1578c34ca19c316eca2 (diff)
download	git-209f0755934a0c9b448408f9b7c9849c15041ecc.zip git-209f0755934a0c9b448408f9b7c9849c15041ecc.tar.gz git-209f0755934a0c9b448408f9b7c9849c15041ecc.tar.bz2