sha1_name: cache readdir(3) results in find_short_object_filename()

Read each loose object subdirectory at most once when looking for unique abbreviated hashes. This speeds up commands like "git log --pretty=%h" considerably, which previously caused one readdir(3) call for each candidate, even for subdirectories that were visited before. The new cache is kept until the program ends and never invalidated. The same is already true for pack indexes. The inherent racy nature of finding unique short hashes makes it still fit for this purpose -- a conflicting new object may be added at any time. Tasks with higher consistency requirements should not use it, though. The cached object names are stored in an oid_array, which is quite compact. The bitmap for remembering which subdir was already read is stored as a char array, with one char per directory -- that's not quite as compact, but really simple and incurs only an overhead equivalent to 11 hashes after all. Suggested-by: Jeff King <peff@peff.net> Helped-by: Jeff King <peff@peff.net> Signed-off-by: Rene Scharfe <l.s.r@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
author: René Scharfe <l.s.r@web.de> 2017-06-22 18:19:48 (GMT)
committer: Junio C Hamano <gitster@pobox.com> 2017-06-22 19:07:51 (GMT)
commit: cc817ca3ef2267c21af9589a7f92190a3659906c (patch)
tree: df2fef1adc0a4c45d9d7a3275a6aff2081b9e52e /sha1_name.c
parent: fd99e2bda0ca6a361ef03c04d6d7fdc7a9c40b78 (diff)
download: git-cc817ca3ef2267c21af9589a7f92190a3659906c.zip
git-cc817ca3ef2267c21af9589a7f92190a3659906c.tar.gz
git-cc817ca3ef2267c21af9589a7f92190a3659906c.tar.bz2
1 files changed, 30 insertions, 20 deletions
diff --git a/sha1_name.c b/sha1_name.c
index 8eec9f7..76cb76a 100644
--- a/sha1_name.c
+++ b/sha1_name.c
@@ -77,10 +77,19 @@ static void update_candidates(struct disambiguate_state *ds, const struct object
 	/* otherwise, current can be discarded and candidate is still good */
 }
 
+static int append_loose_object(const struct object_id *oid, const char *path,
+			       void *data)
+{
+	oid_array_append(data, oid);
+	return 0;
+}
+
+static int match_sha(unsigned, const unsigned char *, const unsigned char *);
+
 static void find_short_object_filename(struct disambiguate_state *ds)
 {
+	int subdir_nr = ds->bin_pfx.hash[0];
 	struct alternate_object_database *alt;
-	char hex[GIT_MAX_HEXSZ];
 	static struct alternate_object_database *fakeent;
 
 	if (!fakeent) {
@@ -95,29 +104,30 @@ static void find_short_object_filename(struct disambiguate_state *ds)
 	}
 	fakeent->next = alt_odb_list;
 
-	xsnprintf(hex, sizeof(hex), "%.2s", ds->hex_pfx);
 	for (alt = fakeent; alt && !ds->ambiguous; alt = alt->next) {
-		struct strbuf *buf = alt_scratch_buf(alt);
-		struct dirent *de;
-		DIR *dir;
-
-		strbuf_addf(buf, "%.2s/", ds->hex_pfx);
-		dir = opendir(buf->buf);
-		if (!dir)
-			continue;
+		int pos;
 
-		while (!ds->ambiguous && (de = readdir(dir)) != NULL) {
-			struct object_id oid;
+		if (!alt->loose_objects_subdir_seen[subdir_nr]) {
+			struct strbuf *buf = alt_scratch_buf(alt);
+			strbuf_addf(buf, "%02x/", subdir_nr);
+			for_each_file_in_obj_subdir(subdir_nr, buf,
+						    append_loose_object,
+						    NULL, NULL,
+						    &alt->loose_objects_cache);
+			alt->loose_objects_subdir_seen[subdir_nr] = 1;
+		}
 
-			if (strlen(de->d_name) != GIT_SHA1_HEXSZ - 2)
-				continue;
-			if (memcmp(de->d_name, ds->hex_pfx + 2, ds->len - 2))
-				continue;
-			memcpy(hex + 2, de->d_name, GIT_SHA1_HEXSZ - 2);
-			if (!get_oid_hex(hex, &oid))
-				update_candidates(ds, &oid);
+		pos = oid_array_lookup(&alt->loose_objects_cache, &ds->bin_pfx);
+		if (pos < 0)
+			pos = -1 - pos;
+		while (!ds->ambiguous && pos < alt->loose_objects_cache.nr) {
+			const struct object_id *oid;
+			oid = alt->loose_objects_cache.oid + pos;
+			if (!match_sha(ds->len, ds->bin_pfx.hash, oid->hash))
+				break;
+			update_candidates(ds, oid);
+			pos++;
 		}
-		closedir(dir);
 	}
 }
author	René Scharfe <l.s.r@web.de>	2017-06-22 18:19:48 (GMT)
committer	Junio C Hamano <gitster@pobox.com>	2017-06-22 19:07:51 (GMT)
commit	cc817ca3ef2267c21af9589a7f92190a3659906c (patch)
tree	df2fef1adc0a4c45d9d7a3275a6aff2081b9e52e /sha1_name.c
parent	fd99e2bda0ca6a361ef03c04d6d7fdc7a9c40b78 (diff)
download	git-cc817ca3ef2267c21af9589a7f92190a3659906c.zip git-cc817ca3ef2267c21af9589a7f92190a3659906c.tar.gz git-cc817ca3ef2267c21af9589a7f92190a3659906c.tar.bz2