From 207899137dd75916f65bb9988ccf0559760427d6 Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:26 +0000
Subject: builtin/pack-objects: make hash agnostic

Avoid hard-coding a hash size, instead preferring to use the_hash_algo.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 940fbcb..fceac7b 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -879,7 +879,7 @@ static void write_reused_pack_one(size_t pos, struct hashfile *out,
 			len = encode_in_pack_object_header(header, sizeof(header),
 							   OBJ_REF_DELTA, size);
 			hashwrite(out, header, len);
-			hashwrite(out, base_sha1, 20);
+			hashwrite(out, base_sha1, the_hash_algo->rawsz);
 			copy_pack_data(out, reuse_packfile, w_curs, cur, next - cur);
 			return;
 		}
-- 
cgit v0.10.2-6-g49f6


From 768e30ea27c58aa69893e10b96ba5ba5680dc3cf Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:27 +0000
Subject: hash: implement and use a context cloning function

For all of our SHA-1 implementations and most of our SHA-256
implementations, the hash context we use is a real struct.  For these
implementations, it's possible to copy a hash context by making a copy
of the struct.

However, for our libgcrypt implementation, our hash context is a
pointer.  Consequently, copying it does not lead to an independent hash
context like we intended.

Fortunately, however, libgcrypt provides us with a handy function to
copy hash contexts.  Let's add a cloning function to the hash algorithm
API, and use it in the one place we need to make a hash context copy.
With this change, our libgcrypt SHA-256 implementation is fully
functional with all of our other hash implementations.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/csum-file.c b/csum-file.c
index 53ce37f..0f35fa5 100644
--- a/csum-file.c
+++ b/csum-file.c
@@ -157,7 +157,7 @@ void hashfile_checkpoint(struct hashfile *f, struct hashfile_checkpoint *checkpo
 {
 	hashflush(f);
 	checkpoint->offset = f->total;
-	checkpoint->ctx = f->ctx;
+	the_hash_algo->clone_fn(&checkpoint->ctx, &f->ctx);
 }
 
 int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint)
diff --git a/hash.h b/hash.h
index 52a4f1a..e0f3f16 100644
--- a/hash.h
+++ b/hash.h
@@ -16,6 +16,7 @@
 #endif
 
 #if defined(SHA256_GCRYPT)
+#define SHA256_NEEDS_CLONE_HELPER
 #include "sha256/gcrypt.h"
 #elif defined(SHA256_OPENSSL)
 #include <openssl/sha.h>
@@ -54,12 +55,28 @@
 #define git_SHA256_Update	platform_SHA256_Update
 #define git_SHA256_Final	platform_SHA256_Final
 
+#ifdef platform_SHA256_Clone
+#define git_SHA256_Clone	platform_SHA256_Clone
+#endif
+
 #ifdef SHA1_MAX_BLOCK_SIZE
 #include "compat/sha1-chunked.h"
 #undef git_SHA1_Update
 #define git_SHA1_Update		git_SHA1_Update_Chunked
 #endif
 
+static inline void git_SHA1_Clone(git_SHA_CTX *dst, const git_SHA_CTX *src)
+{
+	memcpy(dst, src, sizeof(*dst));
+}
+
+#ifndef SHA256_NEEDS_CLONE_HELPER
+static inline void git_SHA256_Clone(git_SHA256_CTX *dst, const git_SHA256_CTX *src)
+{
+	memcpy(dst, src, sizeof(*dst));
+}
+#endif
+
 /*
  * Note that these constants are suitable for indexing the hash_algos array and
  * comparing against each other, but are otherwise arbitrary, so they should not
@@ -85,6 +102,7 @@ union git_hash_ctx {
 typedef union git_hash_ctx git_hash_ctx;
 
 typedef void (*git_hash_init_fn)(git_hash_ctx *ctx);
+typedef void (*git_hash_clone_fn)(git_hash_ctx *dst, const git_hash_ctx *src);
 typedef void (*git_hash_update_fn)(git_hash_ctx *ctx, const void *in, size_t len);
 typedef void (*git_hash_final_fn)(unsigned char *hash, git_hash_ctx *ctx);
 
@@ -110,6 +128,9 @@ struct git_hash_algo {
 	/* The hash initialization function. */
 	git_hash_init_fn init_fn;
 
+	/* The hash context cloning function. */
+	git_hash_clone_fn clone_fn;
+
 	/* The hash update function. */
 	git_hash_update_fn update_fn;
 
diff --git a/sha1-file.c b/sha1-file.c
index d785de8..c36ca5a 100644
--- a/sha1-file.c
+++ b/sha1-file.c
@@ -74,6 +74,11 @@ static void git_hash_sha1_init(git_hash_ctx *ctx)
 	git_SHA1_Init(&ctx->sha1);
 }
 
+static void git_hash_sha1_clone(git_hash_ctx *dst, const git_hash_ctx *src)
+{
+	git_SHA1_Clone(&dst->sha1, &src->sha1);
+}
+
 static void git_hash_sha1_update(git_hash_ctx *ctx, const void *data, size_t len)
 {
 	git_SHA1_Update(&ctx->sha1, data, len);
@@ -90,6 +95,11 @@ static void git_hash_sha256_init(git_hash_ctx *ctx)
 	git_SHA256_Init(&ctx->sha256);
 }
 
+static void git_hash_sha256_clone(git_hash_ctx *dst, const git_hash_ctx *src)
+{
+	git_SHA256_Clone(&dst->sha256, &src->sha256);
+}
+
 static void git_hash_sha256_update(git_hash_ctx *ctx, const void *data, size_t len)
 {
 	git_SHA256_Update(&ctx->sha256, data, len);
@@ -105,6 +115,11 @@ static void git_hash_unknown_init(git_hash_ctx *ctx)
 	BUG("trying to init unknown hash");
 }
 
+static void git_hash_unknown_clone(git_hash_ctx *dst, const git_hash_ctx *src)
+{
+	BUG("trying to clone unknown hash");
+}
+
 static void git_hash_unknown_update(git_hash_ctx *ctx, const void *data, size_t len)
 {
 	BUG("trying to update unknown hash");
@@ -123,6 +138,7 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = {
 		0,
 		0,
 		git_hash_unknown_init,
+		git_hash_unknown_clone,
 		git_hash_unknown_update,
 		git_hash_unknown_final,
 		NULL,
@@ -136,6 +152,7 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = {
 		GIT_SHA1_HEXSZ,
 		GIT_SHA1_BLKSZ,
 		git_hash_sha1_init,
+		git_hash_sha1_clone,
 		git_hash_sha1_update,
 		git_hash_sha1_final,
 		&empty_tree_oid,
@@ -149,6 +166,7 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = {
 		GIT_SHA256_HEXSZ,
 		GIT_SHA256_BLKSZ,
 		git_hash_sha256_init,
+		git_hash_sha256_clone,
 		git_hash_sha256_update,
 		git_hash_sha256_final,
 		&empty_tree_oid_sha256,
diff --git a/sha256/gcrypt.h b/sha256/gcrypt.h
index 09bd8bb..501da5e 100644
--- a/sha256/gcrypt.h
+++ b/sha256/gcrypt.h
@@ -22,8 +22,14 @@ inline void gcrypt_SHA256_Final(unsigned char *digest, gcrypt_SHA256_CTX *ctx)
 	memcpy(digest, gcry_md_read(*ctx, GCRY_MD_SHA256), SHA256_DIGEST_SIZE);
 }
 
+inline void gcrypt_SHA256_Clone(gcrypt_SHA256_CTX *dst, const gcrypt_SHA256_CTX *src)
+{
+	gcry_md_copy(dst, *src);
+}
+
 #define platform_SHA256_CTX gcrypt_SHA256_CTX
 #define platform_SHA256_Init gcrypt_SHA256_Init
+#define platform_SHA256_Clone gcrypt_SHA256_Clone
 #define platform_SHA256_Update gcrypt_SHA256_Update
 #define platform_SHA256_Final gcrypt_SHA256_Final
 
-- 
cgit v0.10.2-6-g49f6


From dadacf10dc9e11046e2c8c49347174e71cef3fa3 Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:28 +0000
Subject: hex: introduce parsing variants taking hash algorithms

Introduce variants of get_oid_hex and parse_oid_hex that parse an
arbitrary hash algorithm, implementing internal functions to avoid
duplication.  These functions can be used in the transport code to parse
refs properly.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/cache.h b/cache.h
index 37c899b..4c9c21a 100644
--- a/cache.h
+++ b/cache.h
@@ -1481,6 +1481,9 @@ int set_disambiguate_hint_config(const char *var, const char *value);
 int get_sha1_hex(const char *hex, unsigned char *sha1);
 int get_oid_hex(const char *hex, struct object_id *sha1);
 
+/* Like get_oid_hex, but for an arbitrary hash algorithm. */
+int get_oid_hex_algop(const char *hex, struct object_id *oid, const struct git_hash_algo *algop);
+
 /*
  * Read `len` pairs of hexadecimal digits from `hex` and write the
  * values to `binary` as `len` bytes. Return 0 on success, or -1 if
@@ -1516,6 +1519,10 @@ char *oid_to_hex(const struct object_id *oid);						/* same static buffer */
  */
 int parse_oid_hex(const char *hex, struct object_id *oid, const char **end);
 
+/* Like parse_oid_hex, but for an arbitrary hash algorithm. */
+int parse_oid_hex_algop(const char *hex, struct object_id *oid, const char **end,
+			const struct git_hash_algo *algo);
+
 /*
  * This reads short-hand syntax that not only evaluates to a commit
  * object name, but also can act as if the end user spelled the name
diff --git a/hex.c b/hex.c
index fd7f00c..10e24dc 100644
--- a/hex.c
+++ b/hex.c
@@ -47,32 +47,51 @@ int hex_to_bytes(unsigned char *binary, const char *hex, size_t len)
 	return 0;
 }
 
-int get_sha1_hex(const char *hex, unsigned char *sha1)
+static int get_hash_hex_algop(const char *hex, unsigned char *hash,
+			      const struct git_hash_algo *algop)
 {
 	int i;
-	for (i = 0; i < the_hash_algo->rawsz; i++) {
+	for (i = 0; i < algop->rawsz; i++) {
 		int val = hex2chr(hex);
 		if (val < 0)
 			return -1;
-		*sha1++ = val;
+		*hash++ = val;
 		hex += 2;
 	}
 	return 0;
 }
 
+int get_sha1_hex(const char *hex, unsigned char *sha1)
+{
+	return get_hash_hex_algop(hex, sha1, the_hash_algo);
+}
+
+int get_oid_hex_algop(const char *hex, struct object_id *oid,
+		      const struct git_hash_algo *algop)
+{
+	return get_hash_hex_algop(hex, oid->hash, algop);
+}
+
 int get_oid_hex(const char *hex, struct object_id *oid)
 {
-	return get_sha1_hex(hex, oid->hash);
+	return get_oid_hex_algop(hex, oid, the_hash_algo);
 }
 
-int parse_oid_hex(const char *hex, struct object_id *oid, const char **end)
+int parse_oid_hex_algop(const char *hex, struct object_id *oid,
+			const char **end,
+			const struct git_hash_algo *algop)
 {
-	int ret = get_oid_hex(hex, oid);
+	int ret = get_hash_hex_algop(hex, oid->hash, algop);
 	if (!ret)
-		*end = hex + the_hash_algo->hexsz;
+		*end = hex + algop->hexsz;
 	return ret;
 }
 
+int parse_oid_hex(const char *hex, struct object_id *oid, const char **end)
+{
+	return parse_oid_hex_algop(hex, oid, end, the_hash_algo);
+}
+
 char *hash_to_hex_algop_r(char *buffer, const unsigned char *hash,
 			  const struct git_hash_algo *algop)
 {
-- 
cgit v0.10.2-6-g49f6


From 61e2a70ff26f83f761fa36e0cc16996878d8dd59 Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:29 +0000
Subject: hex: add functions to parse hex object IDs in any algorithm

There are some places where we need to parse a hex object ID in any
algorithm without knowing beforehand which algorithm is in use. An
example is when parsing fast-import marks.

Add a get_oid_hex_any to parse an object ID and return the algorithm it
belongs to, and additionally add parse_oid_hex_any which is the
equivalent change for parse_oid_hex. If the object is not parseable, we
return GIT_HASH_UNKNOWN.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/cache.h b/cache.h
index 4c9c21a..158d7cc 100644
--- a/cache.h
+++ b/cache.h
@@ -1523,6 +1523,16 @@ int parse_oid_hex(const char *hex, struct object_id *oid, const char **end);
 int parse_oid_hex_algop(const char *hex, struct object_id *oid, const char **end,
 			const struct git_hash_algo *algo);
 
+
+/*
+ * These functions work like get_oid_hex and parse_oid_hex, but they will parse
+ * a hex value for any algorithm. The algorithm is detected based on the length
+ * and the algorithm in use is returned. If this is not a hex object ID in any
+ * algorithm, returns GIT_HASH_UNKNOWN.
+ */
+int get_oid_hex_any(const char *hex, struct object_id *oid);
+int parse_oid_hex_any(const char *hex, struct object_id *oid, const char **end);
+
 /*
  * This reads short-hand syntax that not only evaluates to a commit
  * object name, but also can act as if the end user spelled the name
diff --git a/hex.c b/hex.c
index 10e24dc..da51e64 100644
--- a/hex.c
+++ b/hex.c
@@ -72,6 +72,20 @@ int get_oid_hex_algop(const char *hex, struct object_id *oid,
 	return get_hash_hex_algop(hex, oid->hash, algop);
 }
 
+/*
+ * NOTE: This function relies on hash algorithms being in order from shortest
+ * length to longest length.
+ */
+int get_oid_hex_any(const char *hex, struct object_id *oid)
+{
+	int i;
+	for (i = GIT_HASH_NALGOS - 1; i > 0; i--) {
+		if (!get_hash_hex_algop(hex, oid->hash, &hash_algos[i]))
+			return i;
+	}
+	return GIT_HASH_UNKNOWN;
+}
+
 int get_oid_hex(const char *hex, struct object_id *oid)
 {
 	return get_oid_hex_algop(hex, oid, the_hash_algo);
@@ -87,6 +101,14 @@ int parse_oid_hex_algop(const char *hex, struct object_id *oid,
 	return ret;
 }
 
+int parse_oid_hex_any(const char *hex, struct object_id *oid, const char **end)
+{
+	int ret = get_oid_hex_any(hex, oid);
+	if (ret)
+		*end = hex + hash_algos[ret].hexsz;
+	return ret;
+}
+
 int parse_oid_hex(const char *hex, struct object_id *oid, const char **end)
 {
 	return parse_oid_hex_algop(hex, oid, end, the_hash_algo);
-- 
cgit v0.10.2-6-g49f6


From 94127599252d85e85571ecde740ea97e59e4bede Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:30 +0000
Subject: repository: require a build flag to use SHA-256

At this point, SHA-256 support is experimental and some behavior may
change. To avoid surprising unsuspecting users, require a build flag,
ENABLE_SHA256, to allow use of a non-SHA-1 algorithm. Enable this flag
by default when the DEVELOPER make flag is set so that contributors can
test this case adequately.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/config.mak.dev b/config.mak.dev
index bf1f3fc..2e19435 100644
--- a/config.mak.dev
+++ b/config.mak.dev
@@ -17,6 +17,8 @@ DEVELOPER_CFLAGS += -Wstrict-prototypes
 DEVELOPER_CFLAGS += -Wunused
 DEVELOPER_CFLAGS += -Wvla
 
+DEVELOPER_CFLAGS += -DENABLE_SHA256
+
 ifndef COMPILER_FEATURES
 COMPILER_FEATURES := $(shell ./detect-compiler $(CC))
 endif
diff --git a/repository.c b/repository.c
index a4174dd..6f7f6f0 100644
--- a/repository.c
+++ b/repository.c
@@ -89,6 +89,10 @@ void repo_set_gitdir(struct repository *repo,
 void repo_set_hash_algo(struct repository *repo, int hash_algo)
 {
 	repo->hash_algo = &hash_algos[hash_algo];
+#ifndef ENABLE_SHA256
+	if (hash_algo != GIT_HASH_SHA1)
+		die(_("The hash algorithm %s is not supported in this build."), repo->hash_algo->name);
+#endif
 }
 
 /*
-- 
cgit v0.10.2-6-g49f6


From 192b517589fc927af6b01914c67cb6d1973a1c29 Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:31 +0000
Subject: t: use hash-specific lookup tables to define test constants

In the future, we'll allow developers to run the testsuite with a hash
algorithm of their choice.  To make this easier, compute the fixed
constants using test_oid. Move the constant initialization down below
the point where test-lib-functions.sh is loaded so the functions are
defined.

Note that we don't provide a value for the OID_REGEX value directly
because writing a large number of instances of "[0-9a-f]" in the
oid-info files is unwieldy and there isn't a way to compute it based on
those values. Instead, compute it based on ZERO_OID.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/t/test-lib.sh b/t/test-lib.sh
index 0ea1e5a..9fe390b 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -494,21 +494,6 @@ case $(echo $GIT_TRACE |tr "[A-Z]" "[a-z]") in
 	;;
 esac
 
-# Convenience
-#
-# A regexp to match 5, 35 and 40 hexdigits
-_x05='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
-_x35="$_x05$_x05$_x05$_x05$_x05$_x05$_x05"
-_x40="$_x35$_x05"
-
-# Zero SHA-1
-_z40=0000000000000000000000000000000000000000
-
-OID_REGEX="$_x40"
-ZERO_OID=$_z40
-EMPTY_TREE=4b825dc642cb6eb9a060e54bf8d69288fbee4904
-EMPTY_BLOB=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
-
 # Line feed
 LF='
 '
@@ -1383,6 +1368,20 @@ then
 	fi
 fi
 
+# Convenience
+# A regexp to match 5, 35 and 40 hexdigits
+_x05='[0-9a-f][0-9a-f][0-9a-f][0-9a-f][0-9a-f]'
+_x35="$_x05$_x05$_x05$_x05$_x05$_x05$_x05"
+_x40="$_x35$_x05"
+
+test_oid_init
+
+ZERO_OID=$(test_oid zero)
+OID_REGEX=$(echo $ZERO_OID | sed -e 's/0/[0-9a-f]/g')
+EMPTY_TREE=$(test_oid empty_tree)
+EMPTY_BLOB=$(test_oid empty_blob)
+_z40=$ZERO_OID
+
 # Provide an implementation of the 'yes' utility; the upper bound
 # limit is there to help Windows that cannot stop this loop from
 # wasting cycles when the downstream stops reading, so do not be
-- 
cgit v0.10.2-6-g49f6


From 1f5f8f3e85d9afeb4c2688de9a192351f616f143 Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:32 +0000
Subject: t6300: abstract away SHA-1-specific constants

Adjust the test so that it computes variables for object IDs instead of
using hard-coded hashes.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index 9c910ce..2406b93 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -20,6 +20,10 @@ setdate_and_increment () {
 }
 
 test_expect_success setup '
+	test_oid_cache <<-EOF &&
+	disklen sha1:138
+	disklen sha256:154
+	EOF
 	setdate_and_increment &&
 	echo "Using $datestamp" > one &&
 	git add one &&
@@ -50,6 +54,9 @@ test_atom() {
 	"
 }
 
+hexlen=$(test_oid hexsz)
+disklen=$(test_oid disklen)
+
 test_atom head refname refs/heads/master
 test_atom head refname: refs/heads/master
 test_atom head refname:short master
@@ -82,9 +89,9 @@ test_atom head push:rstrip=-1 refs
 test_atom head push:strip=1 remotes/myfork/master
 test_atom head push:strip=-1 master
 test_atom head objecttype commit
-test_atom head objectsize 171
-test_atom head objectsize:disk 138
-test_atom head deltabase 0000000000000000000000000000000000000000
+test_atom head objectsize $((131 + hexlen))
+test_atom head objectsize:disk $disklen
+test_atom head deltabase $ZERO_OID
 test_atom head objectname $(git rev-parse refs/heads/master)
 test_atom head objectname:short $(git rev-parse --short refs/heads/master)
 test_atom head objectname:short=1 $(git rev-parse --short=1 refs/heads/master)
@@ -125,11 +132,11 @@ test_atom tag refname:short testtag
 test_atom tag upstream ''
 test_atom tag push ''
 test_atom tag objecttype tag
-test_atom tag objectsize 154
-test_atom tag objectsize:disk 138
-test_atom tag '*objectsize:disk' 138
-test_atom tag deltabase 0000000000000000000000000000000000000000
-test_atom tag '*deltabase' 0000000000000000000000000000000000000000
+test_atom tag objectsize $((114 + hexlen))
+test_atom tag objectsize:disk $disklen
+test_atom tag '*objectsize:disk' $disklen
+test_atom tag deltabase $ZERO_OID
+test_atom tag '*deltabase' $ZERO_OID
 test_atom tag objectname $(git rev-parse refs/tags/testtag)
 test_atom tag objectname:short $(git rev-parse --short refs/tags/testtag)
 test_atom head objectname:short=1 $(git rev-parse --short=1 refs/heads/master)
@@ -139,7 +146,7 @@ test_atom tag parent ''
 test_atom tag numparent ''
 test_atom tag object $(git rev-parse refs/tags/testtag^0)
 test_atom tag type 'commit'
-test_atom tag '*objectname' 'ea122842f48be4afb2d1fc6a4b96c05885ab7463'
+test_atom tag '*objectname' $(git rev-parse refs/tags/testtag^{})
 test_atom tag '*objecttype' 'commit'
 test_atom tag author ''
 test_atom tag authorname ''
-- 
cgit v0.10.2-6-g49f6


From 8bd5a2906ebca9e2d7fcecd1628c1585ffbd85d3 Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:33 +0000
Subject: t6300: make hash algorithm independent

One of the git for-each-ref tests asks to sort by object ID.  However,
when sorted, the order of the refs differs between SHA-1 and SHA-256.
Sort the expected output so that the test passes.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/t/t6300-for-each-ref.sh b/t/t6300-for-each-ref.sh
index 2406b93..b3c1092 100755
--- a/t/t6300-for-each-ref.sh
+++ b/t/t6300-for-each-ref.sh
@@ -650,7 +650,7 @@ test_atom refs/tags/signed-long contents "subject line
 body contents
 $sig"
 
-cat >expected <<EOF
+sort >expected <<EOF
 $(git rev-parse refs/tags/bogo) <committer@example.com> refs/tags/bogo
 $(git rev-parse refs/tags/master) <committer@example.com> refs/tags/master
 EOF
-- 
cgit v0.10.2-6-g49f6


From 6946e525ae19dc756ff0a2a46abf1d1d30511594 Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:34 +0000
Subject: t/helper/test-dump-split-index: initialize git repository

In this test helper, we read the index.  In order to have the proper
hash algorithm set up, we must call setup_git_directory.  Do so, so that
the test works when extensions.objectFormat is set.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/t/helper/test-dump-split-index.c b/t/helper/test-dump-split-index.c
index 63c689d..a209880 100644
--- a/t/helper/test-dump-split-index.c
+++ b/t/helper/test-dump-split-index.c
@@ -13,6 +13,8 @@ int cmd__dump_split_index(int ac, const char **av)
 	struct split_index *si;
 	int i;
 
+	setup_git_directory();
+
 	do_read_index(&the_index, av[1], 1);
 	printf("own %s\n", oid_to_hex(&the_index.oid));
 	si = the_index.split_index;
-- 
cgit v0.10.2-6-g49f6


From 8dca7f30e4e5da31b375ecaeeac995305c9c0c5c Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:35 +0000
Subject: t/helper: initialize repository if necessary

The repository helper is used in t5318 to read commit graphs whether
we're in a repository or not. However, without a repository, we have no
way to properly initialize the hash algorithm, meaning that the file is
misread.

Fix this by calling setup_git_directory_gently, which will read the
environment variable the testsuite sets to ensure that the correct hash
algorithm is set.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/t/helper/test-repository.c b/t/helper/test-repository.c
index f7f8618..ecc768e 100644
--- a/t/helper/test-repository.c
+++ b/t/helper/test-repository.c
@@ -75,6 +75,10 @@ static void test_get_commit_tree_in_graph(const char *gitdir,
 
 int cmd__repository(int argc, const char **argv)
 {
+	int nongit_ok = 0;
+
+	setup_git_directory_gently(&nongit_ok);
+
 	if (argc < 2)
 		die("must have at least 2 arguments");
 	if (!strcmp(argv[1], "parse_commit_in_graph")) {
-- 
cgit v0.10.2-6-g49f6


From bf154a878281b6a971ece0fb6d917938298be60d Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:36 +0000
Subject: t/helper: make repository tests hash independent

This test currently hard-codes the hash algorithm as SHA-1 when calling
repo_set_hash_algo so that the_hash_algo is properly initialized.
However, this does not work with SHA-256 repositories. Read the
repository value that repo_init has read into the local repository
variable and set the algorithm based on that value.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/t/helper/test-repository.c b/t/helper/test-repository.c
index ecc768e..56f0e3c 100644
--- a/t/helper/test-repository.c
+++ b/t/helper/test-repository.c
@@ -19,12 +19,11 @@ static void test_parse_commit_in_graph(const char *gitdir, const char *worktree,
 
 	memset(the_repository, 0, sizeof(*the_repository));
 
-	/* TODO: Needed for temporary hack in hashcmp, see 183a638b7da. */
-	repo_set_hash_algo(the_repository, GIT_HASH_SHA1);
-
 	if (repo_init(&r, gitdir, worktree))
 		die("Couldn't init repo");
 
+	repo_set_hash_algo(the_repository, hash_algo_by_ptr(r.hash_algo));
+
 	c = lookup_commit(&r, commit_oid);
 
 	if (!parse_commit_in_graph(&r, c))
@@ -50,12 +49,11 @@ static void test_get_commit_tree_in_graph(const char *gitdir,
 
 	memset(the_repository, 0, sizeof(*the_repository));
 
-	/* TODO: Needed for temporary hack in hashcmp, see 183a638b7da. */
-	repo_set_hash_algo(the_repository, GIT_HASH_SHA1);
-
 	if (repo_init(&r, gitdir, worktree))
 		die("Couldn't init repo");
 
+	repo_set_hash_algo(the_repository, hash_algo_by_ptr(r.hash_algo));
+
 	c = lookup_commit(&r, commit_oid);
 
 	/*
-- 
cgit v0.10.2-6-g49f6


From cfe3917c85f38c3367de7b6f5838ecaf6d1e148d Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:37 +0000
Subject: setup: allow check_repository_format to read repository format

In some cases, we will want to not only check the repository format, but
extract the information that we've gained.  To do so, allow
check_repository_format to take a pointer to struct repository_format.
Allow passing NULL for this argument if we're not interested in the
information, and pass NULL for all existing callers.  A future patch
will make use of this information.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/builtin/init-db.c b/builtin/init-db.c
index 944ec77..b11f070 100644
--- a/builtin/init-db.c
+++ b/builtin/init-db.c
@@ -378,7 +378,7 @@ int init_db(const char *git_dir, const char *real_git_dir,
 	 * config file, so this will not fail.  What we are catching
 	 * is an attempt to reinitialize new repository with an old tool.
 	 */
-	check_repository_format();
+	check_repository_format(NULL);
 
 	reinit = create_default_files(template_dir, original_git_dir);
 
diff --git a/cache.h b/cache.h
index 158d7cc..29ee02a 100644
--- a/cache.h
+++ b/cache.h
@@ -1086,8 +1086,10 @@ int verify_repository_format(const struct repository_format *format,
  * and die if it is a version we don't understand. Generally one would
  * set_git_dir() before calling this, and use it only for "are we in a valid
  * repo?".
+ *
+ * If successful and fmt is not NULL, fill fmt with data.
  */
-void check_repository_format(void);
+void check_repository_format(struct repository_format *fmt);
 
 #define MTIME_CHANGED	0x0001
 #define CTIME_CHANGED	0x0002
diff --git a/path.c b/path.c
index 88cf593..a10b62c 100644
--- a/path.c
+++ b/path.c
@@ -851,7 +851,7 @@ const char *enter_repo(const char *path, int strict)
 
 	if (is_git_directory(".")) {
 		set_git_dir(".");
-		check_repository_format();
+		check_repository_format(NULL);
 		return path;
 	}
 
diff --git a/setup.c b/setup.c
index 4ea7a0b..af20c3d 100644
--- a/setup.c
+++ b/setup.c
@@ -1253,10 +1253,12 @@ int git_config_perm(const char *var, const char *value)
 	return -(i & 0666);
 }
 
-void check_repository_format(void)
+void check_repository_format(struct repository_format *fmt)
 {
 	struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT;
-	check_repository_format_gently(get_git_dir(), &repo_fmt, NULL);
+	if (!fmt)
+		fmt = &repo_fmt;
+	check_repository_format_gently(get_git_dir(), fmt, NULL);
 	startup_info->have_repository = 1;
 	clear_repository_format(&repo_fmt);
 }
-- 
cgit v0.10.2-6-g49f6


From 8b8f7189dff1f9ee2e3d65dfaafc2fc9f4956232 Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:38 +0000
Subject: builtin/init-db: allow specifying hash algorithm on command line

Allow the user to specify the hash algorithm on the command line by
using the --object-format option to git init.  Validate that the user is
not attempting to reinitialize a repository with a different hash
algorithm.  Ensure that if we are writing a non-SHA-1 repository that we
set the repository version to 1 and write the objectFormat extension.

Restrict this option to work only when ENABLE_SHA256 is set until the
codebase is in a situation to fully support this.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/Documentation/git-init.txt b/Documentation/git-init.txt
index 32880aa..adc6adf 100644
--- a/Documentation/git-init.txt
+++ b/Documentation/git-init.txt
@@ -10,7 +10,7 @@ SYNOPSIS
 --------
 [verse]
 'git init' [-q | --quiet] [--bare] [--template=<template_directory>]
-	  [--separate-git-dir <git dir>]
+	  [--separate-git-dir <git dir>] [--object-format=<format]
 	  [--shared[=<permissions>]] [directory]
 
 
@@ -48,6 +48,11 @@ Only print error and warning messages; all other output will be suppressed.
 Create a bare repository. If `GIT_DIR` environment is not set, it is set to the
 current working directory.
 
+--object-format=<format>::
+
+Specify the given object format (hash algorithm) for the repository.  The valid
+values are 'sha1' and (if enabled) 'sha256'.  'sha1' is the default.
+
 --template=<template_directory>::
 
 Specify the directory from which templates will be used.  (See the "TEMPLATE
diff --git a/builtin/clone.c b/builtin/clone.c
index 4f6150c..961996a 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -1097,7 +1097,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
 		}
 	}
 
-	init_db(git_dir, real_git_dir, option_template, INIT_DB_QUIET);
+	init_db(git_dir, real_git_dir, option_template, GIT_HASH_UNKNOWN, INIT_DB_QUIET);
 
 	if (real_git_dir)
 		git_dir = real_git_dir;
diff --git a/builtin/init-db.c b/builtin/init-db.c
index b11f070..d05552f 100644
--- a/builtin/init-db.c
+++ b/builtin/init-db.c
@@ -177,7 +177,8 @@ static int needs_work_tree_config(const char *git_dir, const char *work_tree)
 }
 
 static int create_default_files(const char *template_path,
-				const char *original_git_dir)
+				const char *original_git_dir,
+				const struct repository_format *fmt)
 {
 	struct stat st1;
 	struct strbuf buf = STRBUF_INIT;
@@ -187,6 +188,7 @@ static int create_default_files(const char *template_path,
 	int reinit;
 	int filemode;
 	struct strbuf err = STRBUF_INIT;
+	int repo_version = GIT_REPO_VERSION;
 
 	/* Just look for `init.templatedir` */
 	init_db_template_dir = NULL; /* re-set in case it was set before */
@@ -244,11 +246,23 @@ static int create_default_files(const char *template_path,
 			exit(1);
 	}
 
+#ifndef ENABLE_SHA256
+	if (fmt->hash_algo != GIT_HASH_SHA1)
+		die(_("The hash algorithm %s is not supported in this build."), hash_algos[fmt->hash_algo].name);
+#endif
+
+	if (fmt->hash_algo != GIT_HASH_SHA1)
+		repo_version = GIT_REPO_VERSION_READ;
+
 	/* This forces creation of new config file */
 	xsnprintf(repo_version_string, sizeof(repo_version_string),
-		  "%d", GIT_REPO_VERSION);
+		  "%d", repo_version);
 	git_config_set("core.repositoryformatversion", repo_version_string);
 
+	if (fmt->hash_algo != GIT_HASH_SHA1)
+		git_config_set("extensions.objectformat",
+			       hash_algos[fmt->hash_algo].name);
+
 	/* Check filemode trustability */
 	path = git_path_buf(&buf, "config");
 	filemode = TEST_FILEMODE;
@@ -340,12 +354,26 @@ static void separate_git_dir(const char *git_dir, const char *git_link)
 	write_file(git_link, "gitdir: %s", git_dir);
 }
 
+static void validate_hash_algorithm(struct repository_format *repo_fmt, int hash)
+{
+	/*
+	 * If we already have an initialized repo, don't allow the user to
+	 * specify a different algorithm, as that could cause corruption.
+	 * Otherwise, if the user has specified one on the command line, use it.
+	 */
+	if (repo_fmt->version >= 0 && hash != GIT_HASH_UNKNOWN && hash != repo_fmt->hash_algo)
+		die(_("attempt to reinitialize repository with different hash"));
+	else if (hash != GIT_HASH_UNKNOWN)
+		repo_fmt->hash_algo = hash;
+}
+
 int init_db(const char *git_dir, const char *real_git_dir,
-	    const char *template_dir, unsigned int flags)
+	    const char *template_dir, int hash, unsigned int flags)
 {
 	int reinit;
 	int exist_ok = flags & INIT_DB_EXIST_OK;
 	char *original_git_dir = real_pathdup(git_dir, 1);
+	struct repository_format repo_fmt = REPOSITORY_FORMAT_INIT;
 
 	if (real_git_dir) {
 		struct stat st;
@@ -378,9 +406,11 @@ int init_db(const char *git_dir, const char *real_git_dir,
 	 * config file, so this will not fail.  What we are catching
 	 * is an attempt to reinitialize new repository with an old tool.
 	 */
-	check_repository_format(NULL);
+	check_repository_format(&repo_fmt);
 
-	reinit = create_default_files(template_dir, original_git_dir);
+	validate_hash_algorithm(&repo_fmt, hash);
+
+	reinit = create_default_files(template_dir, original_git_dir, &repo_fmt);
 
 	create_object_directory();
 
@@ -482,6 +512,8 @@ int cmd_init_db(int argc, const char **argv, const char *prefix)
 	const char *work_tree;
 	const char *template_dir = NULL;
 	unsigned int flags = 0;
+	const char *object_format = NULL;
+	int hash_algo = GIT_HASH_UNKNOWN;
 	const struct option init_db_options[] = {
 		OPT_STRING(0, "template", &template_dir, N_("template-directory"),
 				N_("directory from which templates will be used")),
@@ -494,6 +526,8 @@ int cmd_init_db(int argc, const char **argv, const char *prefix)
 		OPT_BIT('q', "quiet", &flags, N_("be quiet"), INIT_DB_QUIET),
 		OPT_STRING(0, "separate-git-dir", &real_git_dir, N_("gitdir"),
 			   N_("separate git dir from working tree")),
+		OPT_STRING(0, "object-format", &object_format, N_("hash"),
+			   N_("specify the hash algorithm to use")),
 		OPT_END()
 	};
 
@@ -546,6 +580,12 @@ int cmd_init_db(int argc, const char **argv, const char *prefix)
 		free(cwd);
 	}
 
+	if (object_format) {
+		hash_algo = hash_algo_by_name(object_format);
+		if (hash_algo == GIT_HASH_UNKNOWN)
+			die(_("unknown hash algorithm '%s'"), object_format);
+	}
+
 	if (init_shared_repository != -1)
 		set_shared_repository(init_shared_repository);
 
@@ -597,5 +637,5 @@ int cmd_init_db(int argc, const char **argv, const char *prefix)
 	UNLEAK(work_tree);
 
 	flags |= INIT_DB_EXIST_OK;
-	return init_db(git_dir, real_git_dir, template_dir, flags);
+	return init_db(git_dir, real_git_dir, template_dir, hash_algo, flags);
 }
diff --git a/cache.h b/cache.h
index 29ee02a..7a47e02 100644
--- a/cache.h
+++ b/cache.h
@@ -627,7 +627,8 @@ int path_inside_repo(const char *prefix, const char *path);
 #define INIT_DB_EXIST_OK 0x0002
 
 int init_db(const char *git_dir, const char *real_git_dir,
-	    const char *template_dir, unsigned int flags);
+	    const char *template_dir, int hash_algo,
+	    unsigned int flags);
 
 void sanitize_stdfds(void);
 int daemonize(void);
-- 
cgit v0.10.2-6-g49f6


From 3c9331a12980bb897ba4da24789bf36b18222f4f Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:39 +0000
Subject: builtin/init-db: add environment variable for new repo hash

For the foreseeable future, SHA-1 will be the default algorithm for Git.
However, when running the testsuite, we want to be able to test an
arbitrary algorithm. It would be quite burdensome and very untidy to
have to specify the algorithm we'd like to test every time we
initialized a new repository somewhere in the testsuite, so add an
environment variable to allow us to specify the default hash algorithm
for Git.

This has the benefit that we can set it once for the entire testsuite
and not have to think about it. In the future, users can also use it to
set the default for their repositories if they would like to do so.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/Documentation/git.txt b/Documentation/git.txt
index b0672bd..9d6769e 100644
--- a/Documentation/git.txt
+++ b/Documentation/git.txt
@@ -493,6 +493,12 @@ double-quotes and respecting backslash escapes. E.g., the value
 	details. This variable has lower precedence than other path
 	variables such as GIT_INDEX_FILE, GIT_OBJECT_DIRECTORY...
 
+`GIT_DEFAULT_HASH_ALGORITHM`::
+	If this variable is set, the default hash algorithm for new
+	repositories will be set to this value. This value is currently
+	ignored when cloning; the setting of the remote repository
+	is used instead. The default is "sha1".
+
 Git Commits
 ~~~~~~~~~~~
 `GIT_AUTHOR_NAME`::
diff --git a/builtin/init-db.c b/builtin/init-db.c
index d05552f..ab4fd68 100644
--- a/builtin/init-db.c
+++ b/builtin/init-db.c
@@ -20,6 +20,8 @@
 #define TEST_FILEMODE 1
 #endif
 
+#define GIT_DEFAULT_HASH_ENVIRONMENT "GIT_DEFAULT_HASH"
+
 static int init_is_bare_repository = 0;
 static int init_shared_repository = -1;
 static const char *init_db_template_dir;
@@ -356,6 +358,7 @@ static void separate_git_dir(const char *git_dir, const char *git_link)
 
 static void validate_hash_algorithm(struct repository_format *repo_fmt, int hash)
 {
+	const char *env = getenv(GIT_DEFAULT_HASH_ENVIRONMENT);
 	/*
 	 * If we already have an initialized repo, don't allow the user to
 	 * specify a different algorithm, as that could cause corruption.
@@ -365,6 +368,12 @@ static void validate_hash_algorithm(struct repository_format *repo_fmt, int hash
 		die(_("attempt to reinitialize repository with different hash"));
 	else if (hash != GIT_HASH_UNKNOWN)
 		repo_fmt->hash_algo = hash;
+	else if (env) {
+		int env_algo = hash_algo_by_name(env);
+		if (env_algo == GIT_HASH_UNKNOWN)
+			die(_("unknown hash algorithm '%s'"), env);
+		repo_fmt->hash_algo = env_algo;
+	}
 }
 
 int init_db(const char *git_dir, const char *real_git_dir,
-- 
cgit v0.10.2-6-g49f6


From efa7ae36c134cc0de6f766c1ab5dfd6b55fab65c Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:40 +0000
Subject: init-db: move writing repo version into a function

When we perform a clone, we won't know the remote side's hash algorithm
until we've read the heads.  Consequently, we'll need to rewrite the
repository format version and hash algorithm once we know what the
remote side has.  Move the code that does this into its own function so
that we can call it from clone in the future.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/builtin/init-db.c b/builtin/init-db.c
index ab4fd68..5d96e59 100644
--- a/builtin/init-db.c
+++ b/builtin/init-db.c
@@ -178,6 +178,29 @@ static int needs_work_tree_config(const char *git_dir, const char *work_tree)
 	return 1;
 }
 
+void initialize_repository_version(int hash_algo)
+{
+	char repo_version_string[10];
+	int repo_version = GIT_REPO_VERSION;
+
+#ifndef ENABLE_SHA256
+	if (hash_algo != GIT_HASH_SHA1)
+		die(_("The hash algorithm %s is not supported in this build."), hash_algos[hash_algo].name);
+#endif
+
+	if (hash_algo != GIT_HASH_SHA1)
+		repo_version = GIT_REPO_VERSION_READ;
+
+	/* This forces creation of new config file */
+	xsnprintf(repo_version_string, sizeof(repo_version_string),
+		  "%d", repo_version);
+	git_config_set("core.repositoryformatversion", repo_version_string);
+
+	if (hash_algo != GIT_HASH_SHA1)
+		git_config_set("extensions.objectformat",
+			       hash_algos[hash_algo].name);
+}
+
 static int create_default_files(const char *template_path,
 				const char *original_git_dir,
 				const struct repository_format *fmt)
@@ -185,12 +208,10 @@ static int create_default_files(const char *template_path,
 	struct stat st1;
 	struct strbuf buf = STRBUF_INIT;
 	char *path;
-	char repo_version_string[10];
 	char junk[2];
 	int reinit;
 	int filemode;
 	struct strbuf err = STRBUF_INIT;
-	int repo_version = GIT_REPO_VERSION;
 
 	/* Just look for `init.templatedir` */
 	init_db_template_dir = NULL; /* re-set in case it was set before */
@@ -248,22 +269,7 @@ static int create_default_files(const char *template_path,
 			exit(1);
 	}
 
-#ifndef ENABLE_SHA256
-	if (fmt->hash_algo != GIT_HASH_SHA1)
-		die(_("The hash algorithm %s is not supported in this build."), hash_algos[fmt->hash_algo].name);
-#endif
-
-	if (fmt->hash_algo != GIT_HASH_SHA1)
-		repo_version = GIT_REPO_VERSION_READ;
-
-	/* This forces creation of new config file */
-	xsnprintf(repo_version_string, sizeof(repo_version_string),
-		  "%d", repo_version);
-	git_config_set("core.repositoryformatversion", repo_version_string);
-
-	if (fmt->hash_algo != GIT_HASH_SHA1)
-		git_config_set("extensions.objectformat",
-			       hash_algos[fmt->hash_algo].name);
+	initialize_repository_version(fmt->hash_algo);
 
 	/* Check filemode trustability */
 	path = git_path_buf(&buf, "config");
diff --git a/cache.h b/cache.h
index 7a47e02..0653318 100644
--- a/cache.h
+++ b/cache.h
@@ -629,6 +629,7 @@ int path_inside_repo(const char *prefix, const char *path);
 int init_db(const char *git_dir, const char *real_git_dir,
 	    const char *template_dir, int hash_algo,
 	    unsigned int flags);
+void initialize_repository_version(int hash_algo);
 
 void sanitize_stdfds(void);
 int daemonize(void);
-- 
cgit v0.10.2-6-g49f6


From e02a7141f83326f7098800fed764061ecf1f0eff Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:41 +0000
Subject: worktree: allow repository version 1

Git supports both repository versions 0 and 1.  These formats are
identical except for the presence of extensions.  When using an
extension, such as for a different hash algorithm, a check for only
version 0 causes the check to fail.  Instead, call
verify_repository_format to verify that we have an appropriate version
and no unknown extensions.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/worktree.c b/worktree.c
index 5b4793c..d1d23aa 100644
--- a/worktree.c
+++ b/worktree.c
@@ -449,7 +449,7 @@ const struct worktree *find_shared_symref(const char *symref,
 int submodule_uses_worktrees(const char *path)
 {
 	char *submodule_gitdir;
-	struct strbuf sb = STRBUF_INIT;
+	struct strbuf sb = STRBUF_INIT, err = STRBUF_INIT;
 	DIR *dir;
 	struct dirent *d;
 	int ret = 0;
@@ -463,18 +463,16 @@ int submodule_uses_worktrees(const char *path)
 	get_common_dir_noenv(&sb, submodule_gitdir);
 	free(submodule_gitdir);
 
-	/*
-	 * The check below is only known to be good for repository format
-	 * version 0 at the time of writing this code.
-	 */
 	strbuf_addstr(&sb, "/config");
 	read_repository_format(&format, sb.buf);
-	if (format.version != 0) {
+	if (verify_repository_format(&format, &err)) {
+		strbuf_release(&err);
 		strbuf_release(&sb);
 		clear_repository_format(&format);
 		return 1;
 	}
 	clear_repository_format(&format);
+	strbuf_release(&err);
 
 	/* Replace config by worktrees. */
 	strbuf_setlen(&sb, sb.len - strlen("config"));
-- 
cgit v0.10.2-6-g49f6


From 42d4e1d1128fa1cb56032ac58f65ea3dd1296a9a Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:42 +0000
Subject: commit: use expected signature header for SHA-256

The transition plan anticipates that we will allow signatures using
multiple algorithms in a single commit. In order to do so, we need to
use a different header per algorithm so that it will be obvious over
which data to compute the signature.

The transition plan specifies that we should use "gpgsig-sha256", so
wire up the commit code such that it can write and parse the current
algorithm, and it can remove the headers for any algorithm when creating
a new commit. Add tests to ensure that we write using the right header
and that git fsck doesn't reject these commits.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/builtin/commit.c b/builtin/commit.c
index 7ba33a3..798d362 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -1659,7 +1659,7 @@ int cmd_commit(int argc, const char **argv, const char *prefix)
 	}
 
 	if (amend) {
-		const char *exclude_gpgsig[2] = { "gpgsig", NULL };
+		const char *exclude_gpgsig[3] = { "gpgsig", "gpgsig-sha256", NULL };
 		extra = read_commit_extra_headers(current_head, exclude_gpgsig);
 	} else {
 		struct commit_extra_header **tail = &extra;
diff --git a/commit.c b/commit.c
index a6cfa41..534e14f 100644
--- a/commit.c
+++ b/commit.c
@@ -961,14 +961,22 @@ cleanup_return:
 	return ret;
 }
 
-static const char gpg_sig_header[] = "gpgsig";
-static const int gpg_sig_header_len = sizeof(gpg_sig_header) - 1;
+/*
+ * Indexed by hash algorithm identifier.
+ */
+static const char *gpg_sig_headers[] = {
+	NULL,
+	"gpgsig",
+	"gpgsig-sha256",
+};
 
 static int do_sign_commit(struct strbuf *buf, const char *keyid)
 {
 	struct strbuf sig = STRBUF_INIT;
 	int inspos, copypos;
 	const char *eoh;
+	const char *gpg_sig_header = gpg_sig_headers[hash_algo_by_ptr(the_hash_algo)];
+	int gpg_sig_header_len = strlen(gpg_sig_header);
 
 	/* find the end of the header */
 	eoh = strstr(buf->buf, "\n\n");
@@ -1010,6 +1018,8 @@ int parse_signed_commit(const struct commit *commit,
 	const char *buffer = get_commit_buffer(commit, &size);
 	int in_signature, saw_signature = -1;
 	const char *line, *tail;
+	const char *gpg_sig_header = gpg_sig_headers[hash_algo_by_ptr(the_hash_algo)];
+	int gpg_sig_header_len = strlen(gpg_sig_header);
 
 	line = buffer;
 	tail = buffer + size;
@@ -1056,11 +1066,17 @@ int remove_signature(struct strbuf *buf)
 
 		if (in_signature && line[0] == ' ')
 			sig_end = next;
-		else if (starts_with(line, gpg_sig_header) &&
-			 line[gpg_sig_header_len] == ' ') {
-			sig_start = line;
-			sig_end = next;
-			in_signature = 1;
+		else if (starts_with(line, "gpgsig")) {
+			int i;
+			for (i = 1; i < GIT_HASH_NALGOS; i++) {
+				const char *p;
+				if (skip_prefix(line, gpg_sig_headers[i], &p) &&
+				    *p == ' ') {
+					sig_start = line;
+					sig_end = next;
+					in_signature = 1;
+				}
+			}
 		} else {
 			if (*line == '\n')
 				/* dump the whole remainder of the buffer */
diff --git a/sequencer.c b/sequencer.c
index ba90a51..cbd920a 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -1321,7 +1321,7 @@ static int try_to_commit(struct repository *r,
 		return -1;
 
 	if (flags & AMEND_MSG) {
-		const char *exclude_gpgsig[] = { "gpgsig", NULL };
+		const char *exclude_gpgsig[] = { "gpgsig", "gpgsig-sha256", NULL };
 		const char *out_enc = get_commit_output_encoding();
 		const char *message = logmsg_reencode(current_head, NULL,
 						      out_enc);
diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index 02478bc..70a8307 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -133,6 +133,30 @@ test_expect_success 'other worktree HEAD link pointing at a funny place' '
 	test_i18ngrep "worktrees/other/HEAD points to something strange" out
 '
 
+test_expect_success 'commit with multiple signatures is okay' '
+	git cat-file commit HEAD >basis &&
+	cat >sigs <<-EOF &&
+	gpgsig -----BEGIN PGP SIGNATURE-----
+	  VGhpcyBpcyBub3QgcmVhbGx5IGEgc2lnbmF0dXJlLg==
+	  -----END PGP SIGNATURE-----
+	gpgsig-sha256 -----BEGIN PGP SIGNATURE-----
+	  VGhpcyBpcyBub3QgcmVhbGx5IGEgc2lnbmF0dXJlLg==
+	  -----END PGP SIGNATURE-----
+	EOF
+	sed -e "/^committer/q" basis >okay &&
+	cat sigs >>okay &&
+	echo >>okay &&
+	sed -e "1,/^$/d" basis >>okay &&
+	cat okay &&
+	new=$(git hash-object -t commit -w --stdin <okay) &&
+	test_when_finished "remove_object $new" &&
+	git update-ref refs/heads/bogus "$new" &&
+	test_when_finished "git update-ref -d refs/heads/bogus" &&
+	git fsck 2>out &&
+	cat out &&
+	! grep "commit $new" out
+'
+
 test_expect_success 'email without @ is okay' '
 	git cat-file commit HEAD >basis &&
 	sed "s/@/AT/" basis >okay &&
diff --git a/t/t7510-signed-commit.sh b/t/t7510-signed-commit.sh
index 0c06d22..6baaa1a 100755
--- a/t/t7510-signed-commit.sh
+++ b/t/t7510-signed-commit.sh
@@ -6,6 +6,11 @@ GNUPGHOME_NOT_USED=$GNUPGHOME
 . "$TEST_DIRECTORY/lib-gpg.sh"
 
 test_expect_success GPG 'create signed commits' '
+	test_oid_cache <<-\EOF &&
+	header sha1:gpgsig
+	header sha256:gpgsig-sha256
+	EOF
+
 	test_when_finished "test_unconfig commit.gpgsign" &&
 
 	echo 1 >file && git add file &&
@@ -155,6 +160,11 @@ test_expect_success GPG 'verify signatures with --raw' '
 	)
 '
 
+test_expect_success GPG 'proper header is used for hash algorithm' '
+	git cat-file commit fourth-signed >output &&
+	grep "^$(test_oid header) -----BEGIN PGP SIGNATURE-----" output
+'
+
 test_expect_success GPG 'show signed commit with signature' '
 	git show -s initial >commit &&
 	git show -s --show-signature initial >show &&
@@ -162,7 +172,7 @@ test_expect_success GPG 'show signed commit with signature' '
 	git cat-file commit initial >cat &&
 	grep -v -e "gpg: " -e "Warning: " show >show.commit &&
 	grep -e "gpg: " -e "Warning: " show >show.gpg &&
-	grep -v "^ " cat | grep -v "^gpgsig " >cat.commit &&
+	grep -v "^ " cat | grep -v "^$(test_oid header) " >cat.commit &&
 	test_cmp show.commit commit &&
 	test_cmp show.gpg verify.2 &&
 	test_cmp cat.commit verify.1
@@ -299,10 +309,10 @@ test_expect_success GPG 'check config gpg.format values' '
 test_expect_success GPG 'detect fudged commit with double signature' '
 	sed -e "/gpgsig/,/END PGP/d" forged1 >double-base &&
 	sed -n -e "/gpgsig/,/END PGP/p" forged1 | \
-		sed -e "s/^gpgsig//;s/^ //" | gpg --dearmor >double-sig1.sig &&
+		sed -e "s/^$(test_oid header)//;s/^ //" | gpg --dearmor >double-sig1.sig &&
 	gpg -o double-sig2.sig -u 29472784 --detach-sign double-base &&
 	cat double-sig1.sig double-sig2.sig | gpg --enarmor >double-combined.asc &&
-	sed -e "s/^\(-.*\)ARMORED FILE/\1SIGNATURE/;1s/^/gpgsig /;2,\$s/^/ /" \
+	sed -e "s/^\(-.*\)ARMORED FILE/\1SIGNATURE/;1s/^/$(test_oid header) /;2,\$s/^/ /" \
 		double-combined.asc > double-gpgsig &&
 	sed -e "/committer/r double-gpgsig" double-base >double-commit &&
 	git hash-object -w -t commit double-commit >double-commit.commit &&
-- 
cgit v0.10.2-6-g49f6


From ddddf8d7e254f4af6297d0ed62ea6a5d7eabdb64 Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:45 +0000
Subject: fast-import: permit reading multiple marks files

In the future, we'll want to read marks files for submodules as well.
Refactor the existing code to make it possible to read multiple marks
files, each into their own marks set.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/fast-import.c b/fast-import.c
index b8b65a8..b9ecd89 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -493,9 +493,8 @@ static char *pool_strdup(const char *s)
 	return r;
 }
 
-static void insert_mark(uintmax_t idnum, struct object_entry *oe)
+static void insert_mark(struct mark_set *s, uintmax_t idnum, struct object_entry *oe)
 {
-	struct mark_set *s = marks;
 	while ((idnum >> s->shift) >= 1024) {
 		s = mem_pool_calloc(&fi_mem_pool, 1, sizeof(struct mark_set));
 		s->shift = marks->shift + 10;
@@ -919,7 +918,7 @@ static int store_object(
 
 	e = insert_object(&oid);
 	if (mark)
-		insert_mark(mark, e);
+		insert_mark(marks, mark, e);
 	if (e->idx.offset) {
 		duplicate_count_by_type[type]++;
 		return 1;
@@ -1117,7 +1116,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark)
 	e = insert_object(&oid);
 
 	if (mark)
-		insert_mark(mark, e);
+		insert_mark(marks, mark, e);
 
 	if (e->idx.offset) {
 		duplicate_count_by_type[OBJ_BLOB]++;
@@ -1712,16 +1711,9 @@ static void dump_marks(void)
 	}
 }
 
-static void read_marks(void)
+static void read_mark_file(struct mark_set *s, FILE *f)
 {
 	char line[512];
-	FILE *f = fopen(import_marks_file, "r");
-	if (f)
-		;
-	else if (import_marks_file_ignore_missing && errno == ENOENT)
-		goto done; /* Marks file does not exist */
-	else
-		die_errno("cannot read '%s'", import_marks_file);
 	while (fgets(line, sizeof(line), f)) {
 		uintmax_t mark;
 		char *end;
@@ -1747,8 +1739,20 @@ static void read_marks(void)
 			e->pack_id = MAX_PACK_ID;
 			e->idx.offset = 1; /* just not zero! */
 		}
-		insert_mark(mark, e);
+		insert_mark(s, mark, e);
 	}
+}
+
+static void read_marks(void)
+{
+	FILE *f = fopen(import_marks_file, "r");
+	if (f)
+		;
+	else if (import_marks_file_ignore_missing && errno == ENOENT)
+		goto done; /* Marks file does not exist */
+	else
+		die_errno("cannot read '%s'", import_marks_file);
+	read_mark_file(marks, f);
 	fclose(f);
 done:
 	import_marks_file_done = 1;
@@ -3130,7 +3134,7 @@ static void parse_alias(void)
 		die(_("Expected 'to' command, got %s"), command_buf.buf);
 	e = find_object(&b.oid);
 	assert(e);
-	insert_mark(next_mark, e);
+	insert_mark(marks, next_mark, e);
 }
 
 static char* make_fast_import_path(const char *path)
-- 
cgit v0.10.2-6-g49f6


From abe0cc536414f2b9cfa37f208b36df5126e6356a Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:46 +0000
Subject: fast-import: add helper function for inserting mark object entries

Currently, everything we want to insert into a mark set is an object
entry. However, in the future, we will want to insert objects of other
types. Teach read_mark_file to take a function pointer which helps us
insert the object we want into our mark set.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/fast-import.c b/fast-import.c
index b9ecd89..3ce4a04 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -131,6 +131,8 @@ struct recent_command {
 	char *buf;
 };
 
+typedef void (*mark_set_inserter_t)(struct mark_set *s, struct object_id *oid, uintmax_t mark);
+
 /* Configured limits on output */
 static unsigned long max_depth = 50;
 static off_t max_packsize;
@@ -1711,14 +1713,30 @@ static void dump_marks(void)
 	}
 }
 
-static void read_mark_file(struct mark_set *s, FILE *f)
+static void insert_object_entry(struct mark_set *s, struct object_id *oid, uintmax_t mark)
+{
+	struct object_entry *e;
+	e = find_object(oid);
+	if (!e) {
+		enum object_type type = oid_object_info(the_repository,
+							oid, NULL);
+		if (type < 0)
+			die("object not found: %s", oid_to_hex(oid));
+		e = insert_object(oid);
+		e->type = type;
+		e->pack_id = MAX_PACK_ID;
+		e->idx.offset = 1; /* just not zero! */
+	}
+	insert_mark(s, mark, e);
+}
+
+static void read_mark_file(struct mark_set *s, FILE *f, mark_set_inserter_t inserter)
 {
 	char line[512];
 	while (fgets(line, sizeof(line), f)) {
 		uintmax_t mark;
 		char *end;
 		struct object_id oid;
-		struct object_entry *e;
 
 		end = strchr(line, '\n');
 		if (line[0] != ':' || !end)
@@ -1728,18 +1746,7 @@ static void read_mark_file(struct mark_set *s, FILE *f)
 		if (!mark || end == line + 1
 			|| *end != ' ' || get_oid_hex(end + 1, &oid))
 			die("corrupt mark line: %s", line);
-		e = find_object(&oid);
-		if (!e) {
-			enum object_type type = oid_object_info(the_repository,
-								&oid, NULL);
-			if (type < 0)
-				die("object not found: %s", oid_to_hex(&oid));
-			e = insert_object(&oid);
-			e->type = type;
-			e->pack_id = MAX_PACK_ID;
-			e->idx.offset = 1; /* just not zero! */
-		}
-		insert_mark(s, mark, e);
+		inserter(s, &oid, mark);
 	}
 }
 
@@ -1752,7 +1759,7 @@ static void read_marks(void)
 		goto done; /* Marks file does not exist */
 	else
 		die_errno("cannot read '%s'", import_marks_file);
-	read_mark_file(marks, f);
+	read_mark_file(marks, f, insert_object_entry);
 	fclose(f);
 done:
 	import_marks_file_done = 1;
-- 
cgit v0.10.2-6-g49f6


From 11d8ef3ee7ab8cfe56a4276ea4b62794d23293e3 Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:47 +0000
Subject: fast-import: make find_marks work on any mark set

In the future, we'll use multiple different mark sets with this
function, so make it take an argument that points to the mark set to
operate on.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/fast-import.c b/fast-import.c
index 3ce4a04..8aaa7f6 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -517,10 +517,9 @@ static void insert_mark(struct mark_set *s, uintmax_t idnum, struct object_entry
 	s->data.marked[idnum] = oe;
 }
 
-static struct object_entry *find_mark(uintmax_t idnum)
+static void *find_mark(struct mark_set *s, uintmax_t idnum)
 {
 	uintmax_t orig_idnum = idnum;
-	struct mark_set *s = marks;
 	struct object_entry *oe = NULL;
 	if ((idnum >> s->shift) < 1024) {
 		while (s && s->shift) {
@@ -2225,7 +2224,7 @@ static void file_change_m(const char *p, struct branch *b)
 	}
 
 	if (*p == ':') {
-		oe = find_mark(parse_mark_ref_space(&p));
+		oe = find_mark(marks, parse_mark_ref_space(&p));
 		oidcpy(&oid, &oe->idx.oid);
 	} else if (skip_prefix(p, "inline ", &p)) {
 		inline_data = 1;
@@ -2399,7 +2398,7 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa
 	/* Now parse the notemodify command. */
 	/* <dataref> or 'inline' */
 	if (*p == ':') {
-		oe = find_mark(parse_mark_ref_space(&p));
+		oe = find_mark(marks, parse_mark_ref_space(&p));
 		oidcpy(&oid, &oe->idx.oid);
 	} else if (skip_prefix(p, "inline ", &p)) {
 		inline_data = 1;
@@ -2420,7 +2419,7 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa
 		oidcpy(&commit_oid, &s->oid);
 	} else if (*p == ':') {
 		uintmax_t commit_mark = parse_mark_ref_eol(p);
-		struct object_entry *commit_oe = find_mark(commit_mark);
+		struct object_entry *commit_oe = find_mark(marks, commit_mark);
 		if (commit_oe->type != OBJ_COMMIT)
 			die("Mark :%" PRIuMAX " not a commit", commit_mark);
 		oidcpy(&commit_oid, &commit_oe->idx.oid);
@@ -2524,7 +2523,7 @@ static int parse_objectish(struct branch *b, const char *objectish)
 		oidcpy(&b->branch_tree.versions[1].oid, t);
 	} else if (*objectish == ':') {
 		uintmax_t idnum = parse_mark_ref_eol(objectish);
-		struct object_entry *oe = find_mark(idnum);
+		struct object_entry *oe = find_mark(marks, idnum);
 		if (oe->type != OBJ_COMMIT)
 			die("Mark :%" PRIuMAX " not a commit", idnum);
 		if (!oideq(&b->oid, &oe->idx.oid)) {
@@ -2588,7 +2587,7 @@ static struct hash_list *parse_merge(unsigned int *count)
 			oidcpy(&n->oid, &s->oid);
 		else if (*from == ':') {
 			uintmax_t idnum = parse_mark_ref_eol(from);
-			struct object_entry *oe = find_mark(idnum);
+			struct object_entry *oe = find_mark(marks, idnum);
 			if (oe->type != OBJ_COMMIT)
 				die("Mark :%" PRIuMAX " not a commit", idnum);
 			oidcpy(&n->oid, &oe->idx.oid);
@@ -2762,7 +2761,7 @@ static void parse_new_tag(const char *arg)
 	} else if (*from == ':') {
 		struct object_entry *oe;
 		from_mark = parse_mark_ref_eol(from);
-		oe = find_mark(from_mark);
+		oe = find_mark(marks, from_mark);
 		type = oe->type;
 		oidcpy(&oid, &oe->idx.oid);
 	} else if (!get_oid(from, &oid)) {
@@ -2920,7 +2919,7 @@ static void parse_get_mark(const char *p)
 	if (*p != ':')
 		die("Not a mark: %s", p);
 
-	oe = find_mark(parse_mark_ref_eol(p));
+	oe = find_mark(marks, parse_mark_ref_eol(p));
 	if (!oe)
 		die("Unknown mark: %s", command_buf.buf);
 
@@ -2935,7 +2934,7 @@ static void parse_cat_blob(const char *p)
 
 	/* cat-blob SP <object> LF */
 	if (*p == ':') {
-		oe = find_mark(parse_mark_ref_eol(p));
+		oe = find_mark(marks, parse_mark_ref_eol(p));
 		if (!oe)
 			die("Unknown mark: %s", command_buf.buf);
 		oidcpy(&oid, &oe->idx.oid);
@@ -3010,7 +3009,7 @@ static struct object_entry *parse_treeish_dataref(const char **p)
 	struct object_entry *e;
 
 	if (**p == ':') {	/* <mark> */
-		e = find_mark(parse_mark_ref_space(p));
+		e = find_mark(marks, parse_mark_ref_space(p));
 		if (!e)
 			die("Unknown mark: %s", command_buf.buf);
 		oidcpy(&oid, &e->idx.oid);
-- 
cgit v0.10.2-6-g49f6


From d9db599ca85b008faedc1d5e771e28b6fb648c4e Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:48 +0000
Subject: fast-import: add a generic function to iterate over marks

Currently, we can iterate over marks only to dump them to a file. In the
future, we'll want to perform an arbitrary operation over the items of a
mark set. Add a function, for_each_mark, that iterates over marks in a
set and performs an arbitrary callback function for each mark. Switch
the mark dumping routine to use this function now that it's available.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/fast-import.c b/fast-import.c
index 8aaa7f6..6711f71 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -132,6 +132,7 @@ struct recent_command {
 };
 
 typedef void (*mark_set_inserter_t)(struct mark_set *s, struct object_id *oid, uintmax_t mark);
+typedef void (*each_mark_fn_t)(uintmax_t mark, void *obj, void *cbp);
 
 /* Configured limits on output */
 static unsigned long max_depth = 50;
@@ -232,6 +233,29 @@ static void parse_get_mark(const char *p);
 static void parse_cat_blob(const char *p);
 static void parse_ls(const char *p, struct branch *b);
 
+static void for_each_mark(struct mark_set *m, uintmax_t base, each_mark_fn_t callback, void *p)
+{
+	uintmax_t k;
+	if (m->shift) {
+		for (k = 0; k < 1024; k++) {
+			if (m->data.sets[k])
+				for_each_mark(m->data.sets[k], base + (k << m->shift), callback, p);
+		}
+	} else {
+		for (k = 0; k < 1024; k++) {
+			if (m->data.marked[k])
+				callback(base + k, m->data.marked[k], p);
+		}
+	}
+}
+
+static void dump_marks_fn(uintmax_t mark, void *object, void *cbp) {
+	struct object_entry *e = object;
+	FILE *f = cbp;
+
+	fprintf(f, ":%" PRIuMAX " %s\n", mark, oid_to_hex(&e->idx.oid));
+}
+
 static void write_branch_report(FILE *rpt, struct branch *b)
 {
 	fprintf(rpt, "%s:\n", b->name);
@@ -260,8 +284,6 @@ static void write_branch_report(FILE *rpt, struct branch *b)
 	fputc('\n', rpt);
 }
 
-static void dump_marks_helper(FILE *, uintmax_t, struct mark_set *);
-
 static void write_crash_report(const char *err)
 {
 	char *loc = git_pathdup("fast_import_crash_%"PRIuMAX, (uintmax_t) getpid());
@@ -340,7 +362,7 @@ static void write_crash_report(const char *err)
 	if (export_marks_file)
 		fprintf(rpt, "  exported to %s\n", export_marks_file);
 	else
-		dump_marks_helper(rpt, 0, marks);
+		for_each_mark(marks, 0, dump_marks_fn, rpt);
 
 	fputc('\n', rpt);
 	fputs("-------------------\n", rpt);
@@ -1655,26 +1677,6 @@ static void dump_tags(void)
 	strbuf_release(&err);
 }
 
-static void dump_marks_helper(FILE *f,
-	uintmax_t base,
-	struct mark_set *m)
-{
-	uintmax_t k;
-	if (m->shift) {
-		for (k = 0; k < 1024; k++) {
-			if (m->data.sets[k])
-				dump_marks_helper(f, base + (k << m->shift),
-					m->data.sets[k]);
-		}
-	} else {
-		for (k = 0; k < 1024; k++) {
-			if (m->data.marked[k])
-				fprintf(f, ":%" PRIuMAX " %s\n", base + k,
-					oid_to_hex(&m->data.marked[k]->idx.oid));
-		}
-	}
-}
-
 static void dump_marks(void)
 {
 	struct lock_file mark_lock = LOCK_INIT;
@@ -1704,7 +1706,7 @@ static void dump_marks(void)
 		return;
 	}
 
-	dump_marks_helper(f, 0, marks);
+	for_each_mark(marks, 0, dump_marks_fn, f);
 	if (commit_lock_file(&mark_lock)) {
 		failure |= error_errno("Unable to write file %s",
 				       export_marks_file);
-- 
cgit v0.10.2-6-g49f6


From 1bdca816412910e1206c15ef47f2a8a6b369b831 Mon Sep 17 00:00:00 2001
From: "brian m. carlson" <sandals@crustytoothpaste.net>
Date: Sat, 22 Feb 2020 20:17:49 +0000
Subject: fast-import: add options for rewriting submodules

When converting a repository using submodules from one hash algorithm to
another, it is necessary to rewrite the submodules from the old
algorithm to the new algorithm, since only references to submodules, not
their contents, are written to the fast-export stream. Without rewriting
the submodules, fast-import fails with an "Invalid dataref" error when
encountering a submodule in another algorithm.

Add a pair of options, --rewrite-submodules-from and
--rewrite-submodules-to, that take a list of marks produced by
fast-export and fast-import, respectively, when processing the
submodule. Use these marks to map the submodule commits from the old
algorithm to the new algorithm.

We read marks into two corresponding struct mark_set objects and then
perform a mapping from the old to the new using a hash table. This lets
us reuse the same mark parsing code that is used elsewhere and allows us
to efficiently read and match marks based on their ID, since mark files
need not be sorted.

Note that because we're using a khash table for the object IDs, and this
table copies values of struct object_id instead of taking references to
them, it's necessary to zero the struct object_id values that we use to
insert and look up in the table. Otherwise, we would end up with SHA-1
values that don't match because of whatever stack garbage might be left
in the unused area.

Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt
index 7889f95..77c6b3d 100644
--- a/Documentation/git-fast-import.txt
+++ b/Documentation/git-fast-import.txt
@@ -122,6 +122,26 @@ Locations of Marks Files
 Relative and non-relative marks may be combined by interweaving
 --(no-)-relative-marks with the --(import|export)-marks= options.
 
+Submodule Rewriting
+~~~~~~~~~~~~~~~~~~~
+
+--rewrite-submodules-from=<name>:<file>::
+--rewrite-submodules-to=<name>:<file>::
+  Rewrite the object IDs for the submodule specified by <name> from the values
+	used in the from <file> to those used in the to <file>. The from marks should
+	have been created by `git fast-export`, and the to marks should have been
+	created by `git fast-import` when importing that same submodule.
++
+<name> may be any arbitrary string not containing a colon character, but the
+same value must be used with both options when specifying corresponding marks.
+Multiple submodules may be specified with different values for <name>. It is an
+error not to use these options in corresponding pairs.
++
+These options are primarily useful when converting a repository from one hash
+algorithm to another; without them, fast-import will fail if it encounters a
+submodule because it has no way of writing the object ID into the new hash
+algorithm.
+
 Performance and Compression Tuning
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/fast-import.c b/fast-import.c
index 6711f71..202dda1 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -18,6 +18,7 @@
 #include "object-store.h"
 #include "mem-pool.h"
 #include "commit-reach.h"
+#include "khash.h"
 
 #define PACK_ID_BITS 16
 #define MAX_PACK_ID ((1<<PACK_ID_BITS)-1)
@@ -53,6 +54,7 @@ struct object_entry_pool {
 
 struct mark_set {
 	union {
+		struct object_id *oids[1024];
 		struct object_entry *marked[1024];
 		struct mark_set *sets[1024];
 	} data;
@@ -225,6 +227,11 @@ static int allow_unsafe_features;
 /* Signal handling */
 static volatile sig_atomic_t checkpoint_requested;
 
+/* Submodule marks */
+static struct string_list sub_marks_from = STRING_LIST_INIT_DUP;
+static struct string_list sub_marks_to = STRING_LIST_INIT_DUP;
+static kh_oid_map_t *sub_oid_map;
+
 /* Where to write output of cat-blob commands */
 static int cat_blob_fd = STDOUT_FILENO;
 
@@ -1731,6 +1738,11 @@ static void insert_object_entry(struct mark_set *s, struct object_id *oid, uintm
 	insert_mark(s, mark, e);
 }
 
+static void insert_oid_entry(struct mark_set *s, struct object_id *oid, uintmax_t mark)
+{
+	insert_mark(s, mark, xmemdupz(oid, sizeof(*oid)));
+}
+
 static void read_mark_file(struct mark_set *s, FILE *f, mark_set_inserter_t inserter)
 {
 	char line[512];
@@ -1739,13 +1751,17 @@ static void read_mark_file(struct mark_set *s, FILE *f, mark_set_inserter_t inse
 		char *end;
 		struct object_id oid;
 
+		/* Ensure SHA-1 objects are padded with zeros. */
+		memset(oid.hash, 0, sizeof(oid.hash));
+
 		end = strchr(line, '\n');
 		if (line[0] != ':' || !end)
 			die("corrupt mark line: %s", line);
 		*end = 0;
 		mark = strtoumax(line + 1, &end, 10);
 		if (!mark || end == line + 1
-			|| *end != ' ' || get_oid_hex(end + 1, &oid))
+			|| *end != ' '
+			|| get_oid_hex_any(end + 1, &oid) == GIT_HASH_UNKNOWN)
 			die("corrupt mark line: %s", line);
 		inserter(s, &oid, mark);
 	}
@@ -2146,6 +2162,30 @@ static uintmax_t change_note_fanout(struct tree_entry *root,
 	return do_change_note_fanout(root, root, hex_oid, 0, path, 0, fanout);
 }
 
+static int parse_mapped_oid_hex(const char *hex, struct object_id *oid, const char **end)
+{
+	int algo;
+	khiter_t it;
+
+	/* Make SHA-1 object IDs have all-zero padding. */
+	memset(oid->hash, 0, sizeof(oid->hash));
+
+	algo = parse_oid_hex_any(hex, oid, end);
+	if (algo == GIT_HASH_UNKNOWN)
+		return -1;
+
+	it = kh_get_oid_map(sub_oid_map, *oid);
+	/* No such object? */
+	if (it == kh_end(sub_oid_map)) {
+		/* If we're using the same algorithm, pass it through. */
+		if (hash_algos[algo].format_id == the_hash_algo->format_id)
+			return 0;
+		return -1;
+	}
+	oidcpy(oid, kh_value(sub_oid_map, it));
+	return 0;
+}
+
 /*
  * Given a pointer into a string, parse a mark reference:
  *
@@ -2232,7 +2272,7 @@ static void file_change_m(const char *p, struct branch *b)
 		inline_data = 1;
 		oe = NULL; /* not used with inline_data, but makes gcc happy */
 	} else {
-		if (parse_oid_hex(p, &oid, &p))
+		if (parse_mapped_oid_hex(p, &oid, &p))
 			die("Invalid dataref: %s", command_buf.buf);
 		oe = find_object(&oid);
 		if (*p++ != ' ')
@@ -2406,7 +2446,7 @@ static void note_change_n(const char *p, struct branch *b, unsigned char *old_fa
 		inline_data = 1;
 		oe = NULL; /* not used with inline_data, but makes gcc happy */
 	} else {
-		if (parse_oid_hex(p, &oid, &p))
+		if (parse_mapped_oid_hex(p, &oid, &p))
 			die("Invalid dataref: %s", command_buf.buf);
 		oe = find_object(&oid);
 		if (*p++ != ' ')
@@ -2941,7 +2981,7 @@ static void parse_cat_blob(const char *p)
 			die("Unknown mark: %s", command_buf.buf);
 		oidcpy(&oid, &oe->idx.oid);
 	} else {
-		if (parse_oid_hex(p, &oid, &p))
+		if (parse_mapped_oid_hex(p, &oid, &p))
 			die("Invalid dataref: %s", command_buf.buf);
 		if (*p)
 			die("Garbage after SHA1: %s", command_buf.buf);
@@ -3005,6 +3045,42 @@ static struct object_entry *dereference(struct object_entry *oe,
 	return find_object(oid);
 }
 
+static void insert_mapped_mark(uintmax_t mark, void *object, void *cbp)
+{
+	struct object_id *fromoid = object;
+	struct object_id *tooid = find_mark(cbp, mark);
+	int ret;
+	khiter_t it;
+
+	it = kh_put_oid_map(sub_oid_map, *fromoid, &ret);
+	/* We've already seen this object. */
+	if (ret == 0)
+		return;
+	kh_value(sub_oid_map, it) = tooid;
+}
+
+static void build_mark_map_one(struct mark_set *from, struct mark_set *to)
+{
+	for_each_mark(from, 0, insert_mapped_mark, to);
+}
+
+static void build_mark_map(struct string_list *from, struct string_list *to)
+{
+	struct string_list_item *fromp, *top;
+
+	sub_oid_map = kh_init_oid_map();
+
+	for_each_string_list_item(fromp, from) {
+		top = string_list_lookup(to, fromp->string);
+		if (!fromp->util) {
+			die(_("Missing from marks for submodule '%s'"), fromp->string);
+		} else if (!top || !top->util) {
+			die(_("Missing to marks for submodule '%s'"), fromp->string);
+		}
+		build_mark_map_one(fromp->util, top->util);
+	}
+}
+
 static struct object_entry *parse_treeish_dataref(const char **p)
 {
 	struct object_id oid;
@@ -3016,7 +3092,7 @@ static struct object_entry *parse_treeish_dataref(const char **p)
 			die("Unknown mark: %s", command_buf.buf);
 		oidcpy(&oid, &e->idx.oid);
 	} else {	/* <sha1> */
-		if (parse_oid_hex(*p, &oid, p))
+		if (parse_mapped_oid_hex(*p, &oid, p))
 			die("Invalid dataref: %s", command_buf.buf);
 		e = find_object(&oid);
 		if (*(*p)++ != ' ')
@@ -3222,6 +3298,26 @@ static void option_export_pack_edges(const char *edges)
 	pack_edges = xfopen(edges, "a");
 }
 
+static void option_rewrite_submodules(const char *arg, struct string_list *list)
+{
+	struct mark_set *ms;
+	FILE *fp;
+	char *s = xstrdup(arg);
+	char *f = strchr(s, ':');
+	if (!f)
+		die(_("Expected format name:filename for submodule rewrite option"));
+	*f = '\0';
+	f++;
+	ms = xcalloc(1, sizeof(*ms));
+	string_list_insert(list, s)->util = ms;
+
+	fp = fopen(f, "r");
+	if (!fp)
+		die_errno("cannot read '%s'", f);
+	read_mark_file(ms, fp, insert_oid_entry);
+	fclose(fp);
+}
+
 static int parse_one_option(const char *option)
 {
 	if (skip_prefix(option, "max-pack-size=", &option)) {
@@ -3284,6 +3380,11 @@ static int parse_one_feature(const char *feature, int from_stream)
 		option_export_marks(arg);
 	} else if (!strcmp(feature, "alias")) {
 		; /* Don't die - this feature is supported */
+	} else if (skip_prefix(feature, "rewrite-submodules-to=", &arg)) {
+		option_rewrite_submodules(arg, &sub_marks_to);
+	} else if (skip_prefix(feature, "rewrite-submodules-from=", &arg)) {
+		option_rewrite_submodules(arg, &sub_marks_from);
+	} else if (skip_prefix(feature, "rewrite-submodules-from=", &arg)) {
 	} else if (!strcmp(feature, "get-mark")) {
 		; /* Don't die - this feature is supported */
 	} else if (!strcmp(feature, "cat-blob")) {
@@ -3389,6 +3490,7 @@ static void parse_argv(void)
 	seen_data_command = 1;
 	if (import_marks_file)
 		read_marks();
+	build_mark_map(&sub_marks_from, &sub_marks_to);
 }
 
 int cmd_main(int argc, const char **argv)
diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh
index ae9950a..22c6c27 100755
--- a/t/t9300-fast-import.sh
+++ b/t/t9300-fast-import.sh
@@ -3382,4 +3382,113 @@ test_expect_success 'X: handling encoding' '
 	git log -1 --format=%B encoding | grep $(printf "\317\200")
 '
 
+###
+### series Y (submodules and hash algorithms)
+###
+
+cat >Y-sub-input <<\Y_INPUT_END
+blob
+mark :1
+data 4
+foo
+
+reset refs/heads/master
+commit refs/heads/master
+mark :2
+author Full Name <user@company.tld> 1000000000 +0100
+committer Full Name <user@company.tld> 1000000000 +0100
+data 24
+Test submodule commit 1
+M 100644 :1 file
+
+blob
+mark :3
+data 8
+foo
+bar
+
+commit refs/heads/master
+mark :4
+author Full Name <user@company.tld> 1000000001 +0100
+committer Full Name <user@company.tld> 1000000001 +0100
+data 24
+Test submodule commit 2
+from :2
+M 100644 :3 file
+Y_INPUT_END
+
+# Note that the submodule object IDs are intentionally not translated.
+cat >Y-main-input <<\Y_INPUT_END
+blob
+mark :1
+data 4
+foo
+
+reset refs/heads/master
+commit refs/heads/master
+mark :2
+author Full Name <user@company.tld> 2000000000 +0100
+committer Full Name <user@company.tld> 2000000000 +0100
+data 14
+Test commit 1
+M 100644 :1 file
+
+blob
+mark :3
+data 73
+[submodule "sub1"]
+	path = sub1
+	url = https://void.example.com/main.git
+
+commit refs/heads/master
+mark :4
+author Full Name <user@company.tld> 2000000001 +0100
+committer Full Name <user@company.tld> 2000000001 +0100
+data 14
+Test commit 2
+from :2
+M 100644 :3 .gitmodules
+M 160000 0712c5be7cf681388e355ef47525aaf23aee1a6d sub1
+
+blob
+mark :5
+data 8
+foo
+bar
+
+commit refs/heads/master
+mark :6
+author Full Name <user@company.tld> 2000000002 +0100
+committer Full Name <user@company.tld> 2000000002 +0100
+data 14
+Test commit 3
+from :4
+M 100644 :5 file
+M 160000 ff729f5e62f72c0c3978207d9a80e5f3a65f14d7 sub1
+Y_INPUT_END
+
+cat >Y-marks <<\Y_INPUT_END
+:2 0712c5be7cf681388e355ef47525aaf23aee1a6d
+:4 ff729f5e62f72c0c3978207d9a80e5f3a65f14d7
+Y_INPUT_END
+
+test_expect_success 'Y: setup' '
+	test_oid_cache <<-EOF
+	Ymaster sha1:9afed2f9161ddf416c0a1863b8b0725b00070504
+	Ymaster sha256:c0a1010da1df187b2e287654793df01b464bd6f8e3f17fc1481a7dadf84caee3
+	EOF
+'
+
+test_expect_success 'Y: rewrite submodules' '
+	git init main1 &&
+	(
+		cd main1 &&
+		git init sub2 &&
+		git -C sub2 fast-import --export-marks=../sub2-marks <../Y-sub-input &&
+		git fast-import --rewrite-submodules-from=sub:../Y-marks \
+			--rewrite-submodules-to=sub:sub2-marks <../Y-main-input &&
+		test "$(git rev-parse master)" = "$(test_oid Ymaster)"
+	)
+'
+
 test_done
-- 
cgit v0.10.2-6-g49f6