xdiff: drop XDL_FAST_HASH

The xdiff code hashes every line of both sides of a diff, and then compares those hashes to find duplicates. The overall performance depends both on how fast we can compute the hashes, but also on how many hash collisions we see. The idea of XDL_FAST_HASH is to speed up the hash computation. But the generated hashes have worse collision behavior. This means that in some cases it speeds diffs up (running "git log -p" on git.git improves by ~8% with it), but in others it can slow things down. One pathological case saw over a 100x slowdown[1]. There may be a better hash function that covers both properties, but in the meantime we are better off with the original hash. It's slightly slower in the common case, but it has fewer surprising pathological cases. [1] http://public-inbox.org/git/20141222041944.GA441@peff.net/ Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
author: Jeff King <peff@peff.net> 2016-12-01 04:52:43 (GMT)
committer: Junio C Hamano <gitster@pobox.com> 2016-12-06 21:27:11 (GMT)
commit: 1f7c9261320576fcaaa5b4e50ad73336b17183e8 (patch)
tree: 358c866f7964837cd50c059184a9735d08553a78 /xdiff
parent: 8d7a455ed52e2a96debc080dfc011b6bb00db5d2 (diff)
download: git-1f7c9261320576fcaaa5b4e50ad73336b17183e8.zip
git-1f7c9261320576fcaaa5b4e50ad73336b17183e8.tar.gz
git-1f7c9261320576fcaaa5b4e50ad73336b17183e8.tar.bz2
1 files changed, 0 insertions, 106 deletions
diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index 027192a..04d7b32 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -264,110 +264,6 @@ static unsigned long xdl_hash_record_with_whitespace(char const **data,
 	return ha;
 }
 
-#ifdef XDL_FAST_HASH
-
-#define REPEAT_BYTE(x)  ((~0ul / 0xff) * (x))
-
-#define ONEBYTES	REPEAT_BYTE(0x01)
-#define NEWLINEBYTES	REPEAT_BYTE(0x0a)
-#define HIGHBITS	REPEAT_BYTE(0x80)
-
-/* Return the high bit set in the first byte that is a zero */
-static inline unsigned long has_zero(unsigned long a)
-{
-	return ((a - ONEBYTES) & ~a) & HIGHBITS;
-}
-
-static inline long count_masked_bytes(unsigned long mask)
-{
-	if (sizeof(long) == 8) {
-		/*
-		 * Jan Achrenius on G+: microoptimized version of
-		 * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
-		 * that works for the bytemasks without having to
-		 * mask them first.
-		 */
-		/*
-		 * return mask * 0x0001020304050608 >> 56;
-		 *
-		 * Doing it like this avoids warnings on 32-bit machines.
-		 */
-		long a = (REPEAT_BYTE(0x01) / 0xff + 1);
-		return mask * a >> (sizeof(long) * 7);
-	} else {
-		/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
-		/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
-		long a = (0x0ff0001 + mask) >> 23;
-		/* Fix the 1 for 00 case */
-		return a & mask;
-	}
-}
-
-unsigned long xdl_hash_record(char const **data, char const *top, long flags)
-{
-	unsigned long hash = 5381;
-	unsigned long a = 0, mask = 0;
-	char const *ptr = *data;
-	char const *end = top - sizeof(unsigned long) + 1;
-
-	if (flags & XDF_WHITESPACE_FLAGS)
-		return xdl_hash_record_with_whitespace(data, top, flags);
-
-	ptr -= sizeof(unsigned long);
-	do {
-		hash += hash << 5;
-		hash ^= a;
-		ptr += sizeof(unsigned long);
-		if (ptr >= end)
-			break;
-		a = *(unsigned long *)ptr;
-		/* Do we have any '\n' bytes in this word? */
-		mask = has_zero(a ^ NEWLINEBYTES);
-	} while (!mask);
-
-	if (ptr >= end) {
-		/*
-		 * There is only a partial word left at the end of the
-		 * buffer. Because we may work with a memory mapping,
-		 * we have to grab the rest byte by byte instead of
-		 * blindly reading it.
-		 *
-		 * To avoid problems with masking in a signed value,
-		 * we use an unsigned char here.
-		 */
-		const char *p;
-		for (p = top - 1; p >= ptr; p--)
-			a = (a << 8) + *((const unsigned char *)p);
-		mask = has_zero(a ^ NEWLINEBYTES);
-		if (!mask)
-			/*
-			 * No '\n' found in the partial word.  Make a
-			 * mask that matches what we read.
-			 */
-			mask = 1UL << (8 * (top - ptr) + 7);
-	}
-
-	/* The mask *below* the first high bit set */
-	mask = (mask - 1) & ~mask;
-	mask >>= 7;
-	hash += hash << 5;
-	hash ^= a & mask;
-
-	/* Advance past the last (possibly partial) word */
-	ptr += count_masked_bytes(mask);
-
-	if (ptr < top) {
-		assert(*ptr == '\n');
-		ptr++;
-	}
-
-	*data = ptr;
-
-	return hash;
-}
-
-#else /* XDL_FAST_HASH */
-
 unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
 	unsigned long ha = 5381;
 	char const *ptr = *data;
@@ -384,8 +280,6 @@ unsigned long xdl_hash_record(char const **data, char const *top, long flags) {
 	return ha;
 }
 
-#endif /* XDL_FAST_HASH */
-
 unsigned int xdl_hashbits(unsigned int size) {
 	unsigned int val = 1, bits = 0;
author	Jeff King <peff@peff.net>	2016-12-01 04:52:43 (GMT)
committer	Junio C Hamano <gitster@pobox.com>	2016-12-06 21:27:11 (GMT)
commit	1f7c9261320576fcaaa5b4e50ad73336b17183e8 (patch)
tree	358c866f7964837cd50c059184a9735d08553a78 /xdiff
parent	8d7a455ed52e2a96debc080dfc011b6bb00db5d2 (diff)
download	git-1f7c9261320576fcaaa5b4e50ad73336b17183e8.zip git-1f7c9261320576fcaaa5b4e50ad73336b17183e8.tar.gz git-1f7c9261320576fcaaa5b4e50ad73336b17183e8.tar.bz2