Merge branch 'jh/unpack-trees-micro-optim'

In a 2- and 3-way merge of trees, more than one source trees often end up sharing an identical subtree; optimize by not reading the same tree multiple times in such a case. * jh/unpack-trees-micro-optim: unpack-trees: avoid duplicate ODB lookups during checkout
author: Junio C Hamano <gitster@pobox.com> 2017-04-24 05:07:48 (GMT)
committer: Junio C Hamano <gitster@pobox.com> 2017-04-24 05:07:48 (GMT)
commit: 8868ba19626a72d5883241c36fd5477897ef8dd1 (patch)
tree: 89157def2f48bd982313403bd658420be10624a6
parent: 8b6bba66633cf16807ae962e0b1126af164f0e78 (diff)
parent: d12a8cf0af18804c2000efc7a0224da631e04cd1 (diff)
download: git-8868ba19626a72d5883241c36fd5477897ef8dd1.zip
git-8868ba19626a72d5883241c36fd5477897ef8dd1.tar.gz
git-8868ba19626a72d5883241c36fd5477897ef8dd1.tar.bz2
1 files changed, 33 insertions, 5 deletions
diff --git a/unpack-trees.c b/unpack-trees.c
index 6b7356d..aa15111 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -604,12 +604,18 @@ static int switch_cache_bottom(struct traverse_info *info)
 	return ret;
 }
 
+static inline int are_same_oid(struct name_entry *name_j, struct name_entry *name_k)
+{
+	return name_j->oid && name_k->oid && !oidcmp(name_j->oid, name_k->oid);
+}
+
 static int traverse_trees_recursive(int n, unsigned long dirmask,
 				    unsigned long df_conflicts,
 				    struct name_entry *names,
 				    struct traverse_info *info)
 {
 	int i, ret, bottom;
+	int nr_buf = 0;
 	struct tree_desc t[MAX_UNPACK_TREES];
 	void *buf[MAX_UNPACK_TREES];
 	struct traverse_info newinfo;
@@ -626,18 +632,40 @@ static int traverse_trees_recursive(int n, unsigned long dirmask,
 	newinfo.pathlen += tree_entry_len(p) + 1;
 	newinfo.df_conflicts |= df_conflicts;
 
+	/*
+	 * Fetch the tree from the ODB for each peer directory in the
+	 * n commits.
+	 *
+	 * For 2- and 3-way traversals, we try to avoid hitting the
+	 * ODB twice for the same OID.  This should yield a nice speed
+	 * up in checkouts and merges when the commits are similar.
+	 *
+	 * We don't bother doing the full O(n^2) search for larger n,
+	 * because wider traversals don't happen that often and we
+	 * avoid the search setup.
+	 *
+	 * When 2 peer OIDs are the same, we just copy the tree
+	 * descriptor data.  This implicitly borrows the buffer
+	 * data from the earlier cell.
+	 */
 	for (i = 0; i < n; i++, dirmask >>= 1) {
-		const unsigned char *sha1 = NULL;
-		if (dirmask & 1)
-			sha1 = names[i].oid->hash;
-		buf[i] = fill_tree_descriptor(t+i, sha1);
+		if (i > 0 && are_same_oid(&names[i], &names[i - 1]))
+			t[i] = t[i - 1];
+		else if (i > 1 && are_same_oid(&names[i], &names[i - 2]))
+			t[i] = t[i - 2];
+		else {
+			const unsigned char *sha1 = NULL;
+			if (dirmask & 1)
+				sha1 = names[i].oid->hash;
+			buf[nr_buf++] = fill_tree_descriptor(t+i, sha1);
+		}
 	}
 
 	bottom = switch_cache_bottom(&newinfo);
 	ret = traverse_trees(n, t, &newinfo);
 	restore_cache_bottom(&newinfo, bottom);
 
-	for (i = 0; i < n; i++)
+	for (i = 0; i < nr_buf; i++)
 		free(buf[i]);
 
 	return ret;
author	Junio C Hamano <gitster@pobox.com>	2017-04-24 05:07:48 (GMT)
committer	Junio C Hamano <gitster@pobox.com>	2017-04-24 05:07:48 (GMT)
commit	8868ba19626a72d5883241c36fd5477897ef8dd1 (patch)
tree	89157def2f48bd982313403bd658420be10624a6
parent	8b6bba66633cf16807ae962e0b1126af164f0e78 (diff)
parent	d12a8cf0af18804c2000efc7a0224da631e04cd1 (diff)
download	git-8868ba19626a72d5883241c36fd5477897ef8dd1.zip git-8868ba19626a72d5883241c36fd5477897ef8dd1.tar.gz git-8868ba19626a72d5883241c36fd5477897ef8dd1.tar.bz2