From fae9627470615df5d40f2892e518447058567316 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 28 Feb 2024 17:37:13 -0500
Subject: upload-pack: drop separate v2 "haves" array

When upload-pack sees a "have" line in the v0 protocol, it immediately
calls got_oid() with its argument and potentially produces an ACK
response. In the v2 protocol, we simply record the argument in an
oid_array, and only later process all of the "have" objects by calling
the equivalent of got_oid() on the contents of the array.

This makes some sense, as v2 is a pure request/response protocol, as
opposed to v0's asynchronous negotiation phase. But there's a downside:
a client can send us an infinite number of garbage "have" lines, which
we'll happily slurp into the array, consuming memory. Whereas in v0,
they are limited by the number of objects in the repository (because
got_oid() only records objects we have ourselves, and we avoid
duplicates by setting a flag on the object struct).

We can make v2 behave more like v0 by also calling got_oid() directly
when v2 parses a "have" line. Calling it early like this is OK because
got_oid() itself does not interact with the client; it only confirms
that we have the object and sets a few flags. Note that unlike v0, v2
does not ever (before or after this patch) check the return code of
got_oid(), which lets the caller know whether we have the object. But
again, that makes sense; v0 is using it to asynchronously tell the
client to stop sending. In v2's synchronous protocol, we just discard
those entries (and decide how to ACK at the end of each round).

There is one slight tweak we need, though. In v2's state machine, we
reach the SEND_ACKS state if the other side sent us any "have" lines,
whether they were useful or not. Right now we do that by checking
whether the "have" array had any entries, but if we record only the
useful ones, that doesn't work. Instead, we can add a simple boolean
that tells us whether we saw any have line (even if it was useless).

This lets us drop the "haves" array entirely, as we're now placing
objects directly into the "have_obj" object array (which is where
got_oid() put them in the long run anyway). And as a bonus, we can drop
the secondary "common" array used in process_haves_and_send_acks(). It
was essentially a copy of "haves" minus the objects we do not have. But
now that we are using "have_obj" directly, we know everything in it is
useful. So in addition to protecting ourselves against malicious input,
we should slightly lower our memory usage for normal inputs.

Note that there is one user-visible effect. The trace2 output records
the number of "haves". Previously this was the total number of "have"
lines we saw, but now is the number of useful ones. We could retain the
original meaning by keeping a separate counter, but it doesn't seem
worth the effort; this trace info is for debugging and metrics, and
arguably the count of common oids is at least as useful as the total
count.

Reported-by: Benjamin Flesch <benjaminflesch@icloud.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/upload-pack.c b/upload-pack.c
index 2537aff..7a83127 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -61,7 +61,6 @@ struct upload_pack_data {
 	struct string_list symref;				/* v0 only */
 	struct object_array want_obj;
 	struct object_array have_obj;
-	struct oid_array haves;					/* v2 only */
 	struct string_list wanted_refs;				/* v2 only */
 	struct strvec hidden_refs;
 
@@ -113,6 +112,7 @@ struct upload_pack_data {
 	unsigned done : 1;					/* v2 only */
 	unsigned allow_ref_in_want : 1;				/* v2 only */
 	unsigned allow_sideband_all : 1;			/* v2 only */
+	unsigned seen_haves : 1;				/* v2 only */
 	unsigned advertise_sid : 1;
 	unsigned sent_capabilities : 1;
 };
@@ -124,7 +124,6 @@ static void upload_pack_data_init(struct upload_pack_data *data)
 	struct strvec hidden_refs = STRVEC_INIT;
 	struct object_array want_obj = OBJECT_ARRAY_INIT;
 	struct object_array have_obj = OBJECT_ARRAY_INIT;
-	struct oid_array haves = OID_ARRAY_INIT;
 	struct object_array shallows = OBJECT_ARRAY_INIT;
 	struct string_list deepen_not = STRING_LIST_INIT_DUP;
 	struct string_list uri_protocols = STRING_LIST_INIT_DUP;
@@ -137,7 +136,6 @@ static void upload_pack_data_init(struct upload_pack_data *data)
 	data->hidden_refs = hidden_refs;
 	data->want_obj = want_obj;
 	data->have_obj = have_obj;
-	data->haves = haves;
 	data->shallows = shallows;
 	data->deepen_not = deepen_not;
 	data->uri_protocols = uri_protocols;
@@ -159,7 +157,6 @@ static void upload_pack_data_clear(struct upload_pack_data *data)
 	strvec_clear(&data->hidden_refs);
 	object_array_clear(&data->want_obj);
 	object_array_clear(&data->have_obj);
-	oid_array_clear(&data->haves);
 	object_array_clear(&data->shallows);
 	string_list_clear(&data->deepen_not, 0);
 	object_array_clear(&data->extra_edge_obj);
@@ -1532,15 +1529,14 @@ static int parse_want_ref(struct packet_writer *writer, const char *line,
 	return 0;
 }
 
-static int parse_have(const char *line, struct oid_array *haves)
+static int parse_have(const char *line, struct upload_pack_data *data)
 {
 	const char *arg;
 	if (skip_prefix(line, "have ", &arg)) {
 		struct object_id oid;
 
-		if (get_oid_hex(arg, &oid))
-			die("git upload-pack: expected SHA1 object, got '%s'", arg);
-		oid_array_append(haves, &oid);
+		got_oid(data, arg, &oid);
+		data->seen_haves = 1;
 		return 1;
 	}
 
@@ -1552,7 +1548,7 @@ static void trace2_fetch_info(struct upload_pack_data *data)
 	struct json_writer jw = JSON_WRITER_INIT;
 
 	jw_object_begin(&jw, 0);
-	jw_object_intmax(&jw, "haves", data->haves.nr);
+	jw_object_intmax(&jw, "haves", data->have_obj.nr);
 	jw_object_intmax(&jw, "wants", data->want_obj.nr);
 	jw_object_intmax(&jw, "want-refs", data->wanted_refs.nr);
 	jw_object_intmax(&jw, "depth", data->depth);
@@ -1586,7 +1582,7 @@ static void process_args(struct packet_reader *request,
 				   &data->hidden_refs, &data->want_obj))
 			continue;
 		/* process have line */
-		if (parse_have(arg, &data->haves))
+		if (parse_have(arg, data))
 			continue;
 
 		/* process args like thin-pack */
@@ -1664,27 +1660,7 @@ static void process_args(struct packet_reader *request,
 		trace2_fetch_info(data);
 }
 
-static int process_haves(struct upload_pack_data *data, struct oid_array *common)
-{
-	int i;
-
-	/* Process haves */
-	for (i = 0; i < data->haves.nr; i++) {
-		const struct object_id *oid = &data->haves.oid[i];
-
-		if (!repo_has_object_file_with_flags(the_repository, oid,
-						     OBJECT_INFO_QUICK | OBJECT_INFO_SKIP_FETCH_OBJECT))
-			continue;
-
-		oid_array_append(common, oid);
-
-		do_got_oid(data, oid);
-	}
-
-	return 0;
-}
-
-static int send_acks(struct upload_pack_data *data, struct oid_array *acks)
+static int send_acks(struct upload_pack_data *data, struct object_array *acks)
 {
 	int i;
 
@@ -1696,7 +1672,7 @@ static int send_acks(struct upload_pack_data *data, struct oid_array *acks)
 
 	for (i = 0; i < acks->nr; i++) {
 		packet_writer_write(&data->writer, "ACK %s\n",
-				    oid_to_hex(&acks->oid[i]));
+				    oid_to_hex(&acks->objects[i].item->oid));
 	}
 
 	if (!data->wait_for_done && ok_to_give_up(data)) {
@@ -1710,13 +1686,11 @@ static int send_acks(struct upload_pack_data *data, struct oid_array *acks)
 
 static int process_haves_and_send_acks(struct upload_pack_data *data)
 {
-	struct oid_array common = OID_ARRAY_INIT;
 	int ret = 0;
 
-	process_haves(data, &common);
 	if (data->done) {
 		ret = 1;
-	} else if (send_acks(data, &common)) {
+	} else if (send_acks(data, &data->have_obj)) {
 		packet_writer_delim(&data->writer);
 		ret = 1;
 	} else {
@@ -1725,8 +1699,6 @@ static int process_haves_and_send_acks(struct upload_pack_data *data)
 		ret = 0;
 	}
 
-	oid_array_clear(&data->haves);
-	oid_array_clear(&common);
 	return ret;
 }
 
@@ -1796,7 +1768,7 @@ int upload_pack_v2(struct repository *r UNUSED, struct packet_reader *request)
 				 * they didn't want anything.
 				 */
 				state = FETCH_DONE;
-			} else if (data.haves.nr) {
+			} else if (data.seen_haves) {
 				/*
 				 * Request had 'have' lines, so lets ACK them.
 				 */
-- 
cgit v0.10.2-6-g49f6


From 720ba25d993423ab7b12b9bbdc62dadaa78d315e Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 28 Feb 2024 17:37:20 -0500
Subject: upload-pack: switch deepen-not list to an oid_array

When we see a "deepen-not" line from the client, we verify that the
given name can be resolved as a ref, and then add it to a string list to
be passed later to an internal "rev-list --not" traversal. We record the
actual refname in the string list (so the traversal resolves it again
later), but we'd be better off recording the resolved oid:

  1. There's a tiny bit of wasted work in resolving it twice.

  2. There's a small race condition with simultaneous updates; the later
     traversal may resolve to a different value (or not at all). This
     shouldn't cause any bad behavior (we do not care about the value
     in this first resolution, so whatever value rev-list gets is OK)
     but it could mean a confusing error message (if upload-pack fails
     to resolve the ref it produces a useful message, but a failing
     traversal later results in just "revision walk setup failed").

  3. It makes it simpler to de-duplicate the results. We don't de-dup at
     all right now, but we will in the next patch.

>From the client's perspective the behavior should be the same.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/upload-pack.c b/upload-pack.c
index 7a83127..f6395b2 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -65,7 +65,7 @@ struct upload_pack_data {
 	struct strvec hidden_refs;
 
 	struct object_array shallows;
-	struct string_list deepen_not;
+	struct oid_array deepen_not;
 	struct object_array extra_edge_obj;
 	int depth;
 	timestamp_t deepen_since;
@@ -125,7 +125,7 @@ static void upload_pack_data_init(struct upload_pack_data *data)
 	struct object_array want_obj = OBJECT_ARRAY_INIT;
 	struct object_array have_obj = OBJECT_ARRAY_INIT;
 	struct object_array shallows = OBJECT_ARRAY_INIT;
-	struct string_list deepen_not = STRING_LIST_INIT_DUP;
+	struct oid_array deepen_not = OID_ARRAY_INIT;
 	struct string_list uri_protocols = STRING_LIST_INIT_DUP;
 	struct object_array extra_edge_obj = OBJECT_ARRAY_INIT;
 	struct string_list allowed_filters = STRING_LIST_INIT_DUP;
@@ -158,7 +158,7 @@ static void upload_pack_data_clear(struct upload_pack_data *data)
 	object_array_clear(&data->want_obj);
 	object_array_clear(&data->have_obj);
 	object_array_clear(&data->shallows);
-	string_list_clear(&data->deepen_not, 0);
+	oid_array_clear(&data->deepen_not);
 	object_array_clear(&data->extra_edge_obj);
 	list_objects_filter_release(&data->filter_options);
 	string_list_clear(&data->allowed_filters, 0);
@@ -926,8 +926,8 @@ static int send_shallow_list(struct upload_pack_data *data)
 		if (data->deepen_not.nr) {
 			strvec_push(&av, "--not");
 			for (i = 0; i < data->deepen_not.nr; i++) {
-				struct string_list_item *s = data->deepen_not.items + i;
-				strvec_push(&av, s->string);
+				struct object_id *oid = data->deepen_not.oid + i;
+				strvec_push(&av, oid_to_hex(oid));
 			}
 			strvec_push(&av, "--not");
 		}
@@ -1004,7 +1004,7 @@ static int process_deepen_since(const char *line, timestamp_t *deepen_since, int
 	return 0;
 }
 
-static int process_deepen_not(const char *line, struct string_list *deepen_not, int *deepen_rev_list)
+static int process_deepen_not(const char *line, struct oid_array *deepen_not, int *deepen_rev_list)
 {
 	const char *arg;
 	if (skip_prefix(line, "deepen-not ", &arg)) {
@@ -1012,7 +1012,7 @@ static int process_deepen_not(const char *line, struct string_list *deepen_not,
 		struct object_id oid;
 		if (expand_ref(the_repository, arg, strlen(arg), &oid, &ref) != 1)
 			die("git upload-pack: ambiguous deepen-not: %s", line);
-		string_list_append(deepen_not, ref);
+		oid_array_append(deepen_not, &oid);
 		free(ref);
 		*deepen_rev_list = 1;
 		return 1;
-- 
cgit v0.10.2-6-g49f6


From 388b96df319c62e9c8d984921b8967db37481d8a Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 28 Feb 2024 17:37:44 -0500
Subject: upload-pack: use oidset for deepen_not list

We record the oid of every deepen-not line the client sends to us. For a
well-behaved client, the resulting array should be bounded by the number
of unique refs we have. But because there's no de-duplication, a
malicious client can cause the array to grow unbounded by just sending
the same "refs/heads/foo" over and over (assuming such a ref exists).

Since the deepen-not list is just being fed to a "rev-list --not"
traversal, the order of items doesn't matter. So we can replace the
oid_array with an oidset which notices and skips duplicates.

That bounds the memory in malicious cases to be linear in the number of
unique refs. And even in non-malicious cases, there may be a slight
improvement in memory usage if multiple refs point to the same oid
(though in practice this list is probably pretty tiny anyway, as it
comes from the user specifying "--shallow-exclude" on the client fetch).

Note that in the trace2 output we'll now output the number of
de-duplicated objects, rather than the total number of "deepen-not"
lines we received. This is arguably a more useful value for tracing /
debugging anyway.

Reported-by: Benjamin Flesch <benjaminflesch@icloud.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/upload-pack.c b/upload-pack.c
index f6395b2..ebad902 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -65,7 +65,7 @@ struct upload_pack_data {
 	struct strvec hidden_refs;
 
 	struct object_array shallows;
-	struct oid_array deepen_not;
+	struct oidset deepen_not;
 	struct object_array extra_edge_obj;
 	int depth;
 	timestamp_t deepen_since;
@@ -125,7 +125,7 @@ static void upload_pack_data_init(struct upload_pack_data *data)
 	struct object_array want_obj = OBJECT_ARRAY_INIT;
 	struct object_array have_obj = OBJECT_ARRAY_INIT;
 	struct object_array shallows = OBJECT_ARRAY_INIT;
-	struct oid_array deepen_not = OID_ARRAY_INIT;
+	struct oidset deepen_not = OID_ARRAY_INIT;
 	struct string_list uri_protocols = STRING_LIST_INIT_DUP;
 	struct object_array extra_edge_obj = OBJECT_ARRAY_INIT;
 	struct string_list allowed_filters = STRING_LIST_INIT_DUP;
@@ -158,7 +158,7 @@ static void upload_pack_data_clear(struct upload_pack_data *data)
 	object_array_clear(&data->want_obj);
 	object_array_clear(&data->have_obj);
 	object_array_clear(&data->shallows);
-	oid_array_clear(&data->deepen_not);
+	oidset_clear(&data->deepen_not);
 	object_array_clear(&data->extra_edge_obj);
 	list_objects_filter_release(&data->filter_options);
 	string_list_clear(&data->allowed_filters, 0);
@@ -923,12 +923,13 @@ static int send_shallow_list(struct upload_pack_data *data)
 		strvec_push(&av, "rev-list");
 		if (data->deepen_since)
 			strvec_pushf(&av, "--max-age=%"PRItime, data->deepen_since);
-		if (data->deepen_not.nr) {
+		if (oidset_size(&data->deepen_not)) {
+			const struct object_id *oid;
+			struct oidset_iter iter;
 			strvec_push(&av, "--not");
-			for (i = 0; i < data->deepen_not.nr; i++) {
-				struct object_id *oid = data->deepen_not.oid + i;
+			oidset_iter_init(&data->deepen_not, &iter);
+			while ((oid = oidset_iter_next(&iter)))
 				strvec_push(&av, oid_to_hex(oid));
-			}
 			strvec_push(&av, "--not");
 		}
 		for (i = 0; i < data->want_obj.nr; i++) {
@@ -1004,7 +1005,7 @@ static int process_deepen_since(const char *line, timestamp_t *deepen_since, int
 	return 0;
 }
 
-static int process_deepen_not(const char *line, struct oid_array *deepen_not, int *deepen_rev_list)
+static int process_deepen_not(const char *line, struct oidset *deepen_not, int *deepen_rev_list)
 {
 	const char *arg;
 	if (skip_prefix(line, "deepen-not ", &arg)) {
@@ -1012,7 +1013,7 @@ static int process_deepen_not(const char *line, struct oid_array *deepen_not, in
 		struct object_id oid;
 		if (expand_ref(the_repository, arg, strlen(arg), &oid, &ref) != 1)
 			die("git upload-pack: ambiguous deepen-not: %s", line);
-		oid_array_append(deepen_not, &oid);
+		oidset_insert(deepen_not, &oid);
 		free(ref);
 		*deepen_rev_list = 1;
 		return 1;
@@ -1554,7 +1555,7 @@ static void trace2_fetch_info(struct upload_pack_data *data)
 	jw_object_intmax(&jw, "depth", data->depth);
 	jw_object_intmax(&jw, "shallows", data->shallows.nr);
 	jw_object_bool(&jw, "deepen-since", data->deepen_since);
-	jw_object_intmax(&jw, "deepen-not", data->deepen_not.nr);
+	jw_object_intmax(&jw, "deepen-not", oidset_size(&data->deepen_not));
 	jw_object_bool(&jw, "deepen-relative", data->deepen_relative);
 	if (data->filter_options.choice)
 		jw_object_string(&jw, "filter", list_object_filter_config_name(data->filter_options.choice));
-- 
cgit v0.10.2-6-g49f6


From b065063c570f8dfb25eccb5094fef7e89a1883f2 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 28 Feb 2024 17:38:40 -0500
Subject: upload-pack: use a strmap for want-ref lines

When the "ref-in-want" capability is advertised (which it is not by
default), then upload-pack processes a "want-ref" line from the client
by checking that the name is a valid ref and recording it in a
string-list.

In theory this list should grow no larger than the number of refs in the
server-side repository. But since we don't do any de-duplication, a
client which sends "want-ref refs/heads/foo" over and over will cause
the array to grow without bound.

We can fix this by switching to strmap, which efficiently detects
duplicates. There are two client-visible changes here:

  1. The "wanted-refs" response will now be in an apparently-random
     order (based on iterating the hashmap) rather than the order given
     by the client. The protocol documentation is quiet on ordering
     here. The current fetch-pack implementation is happy with any
     order, as it looks up each returned ref using a binary search in
     its local sorted list. JGit seems to implement want-ref on the
     server side, but has no client-side support. libgit2 doesn't
     support either side.

     It would obviously be possible to record the original order or to
     use the strmap as an auxiliary data structure. But if the client
     doesn't care, we may as well do the simplest thing.

  2. We'll now reject duplicates explicitly as a protocol error. The
     client should never send them (and our current implementation, even
     when asked to "git fetch master:one master:two" will de-dup on the
     client side).

     If we wanted to be more forgiving, we could perhaps just throw away
     the duplicates. But then our "wanted-refs" response back to the
     client would omit the duplicates, and it's hard to say what a
     client that accidentally sent a duplicate would do with that. So I
     think we're better off to complain loudly before anybody
     accidentally writes such a client.

Let's also add a note to the protocol documentation clarifying that
duplicates are forbidden. As discussed above, this was already the
intent, but it's not very explicit.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/Documentation/gitprotocol-v2.txt b/Documentation/gitprotocol-v2.txt
index 0b800ab..837ea61 100644
--- a/Documentation/gitprotocol-v2.txt
+++ b/Documentation/gitprotocol-v2.txt
@@ -346,7 +346,8 @@ the 'wanted-refs' section in the server's response as explained below.
     want-ref <ref>
 	Indicates to the server that the client wants to retrieve a
 	particular ref, where <ref> is the full name of a ref on the
-	server.
+	server.  It is a protocol error to send want-ref for the
+	same ref more than once.
 
 If the 'sideband-all' feature is advertised, the following argument can be
 included in the client's request:
diff --git a/upload-pack.c b/upload-pack.c
index ebad902..8b47576 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -28,6 +28,7 @@
 #include "shallow.h"
 #include "write-or-die.h"
 #include "json-writer.h"
+#include "strmap.h"
 
 /* Remember to update object flag allocation in object.h */
 #define THEY_HAVE	(1u << 11)
@@ -61,7 +62,7 @@ struct upload_pack_data {
 	struct string_list symref;				/* v0 only */
 	struct object_array want_obj;
 	struct object_array have_obj;
-	struct string_list wanted_refs;				/* v2 only */
+	struct strmap wanted_refs;				/* v2 only */
 	struct strvec hidden_refs;
 
 	struct object_array shallows;
@@ -120,7 +121,7 @@ struct upload_pack_data {
 static void upload_pack_data_init(struct upload_pack_data *data)
 {
 	struct string_list symref = STRING_LIST_INIT_DUP;
-	struct string_list wanted_refs = STRING_LIST_INIT_DUP;
+	struct strmap wanted_refs = STRMAP_INIT;
 	struct strvec hidden_refs = STRVEC_INIT;
 	struct object_array want_obj = OBJECT_ARRAY_INIT;
 	struct object_array have_obj = OBJECT_ARRAY_INIT;
@@ -153,7 +154,7 @@ static void upload_pack_data_init(struct upload_pack_data *data)
 static void upload_pack_data_clear(struct upload_pack_data *data)
 {
 	string_list_clear(&data->symref, 1);
-	string_list_clear(&data->wanted_refs, 1);
+	strmap_clear(&data->wanted_refs, 1);
 	strvec_clear(&data->hidden_refs);
 	object_array_clear(&data->want_obj);
 	object_array_clear(&data->have_obj);
@@ -1488,14 +1489,13 @@ static int parse_want(struct packet_writer *writer, const char *line,
 }
 
 static int parse_want_ref(struct packet_writer *writer, const char *line,
-			  struct string_list *wanted_refs,
+			  struct strmap *wanted_refs,
 			  struct strvec *hidden_refs,
 			  struct object_array *want_obj)
 {
 	const char *refname_nons;
 	if (skip_prefix(line, "want-ref ", &refname_nons)) {
 		struct object_id oid;
-		struct string_list_item *item;
 		struct object *o = NULL;
 		struct strbuf refname = STRBUF_INIT;
 
@@ -1507,8 +1507,11 @@ static int parse_want_ref(struct packet_writer *writer, const char *line,
 		}
 		strbuf_release(&refname);
 
-		item = string_list_append(wanted_refs, refname_nons);
-		item->util = oiddup(&oid);
+		if (strmap_put(wanted_refs, refname_nons, oiddup(&oid))) {
+			packet_writer_error(writer, "duplicate want-ref %s",
+					    refname_nons);
+			die("duplicate want-ref %s", refname_nons);
+		}
 
 		if (!starts_with(refname_nons, "refs/tags/")) {
 			struct commit *commit = lookup_commit_in_graph(the_repository, &oid);
@@ -1551,7 +1554,7 @@ static void trace2_fetch_info(struct upload_pack_data *data)
 	jw_object_begin(&jw, 0);
 	jw_object_intmax(&jw, "haves", data->have_obj.nr);
 	jw_object_intmax(&jw, "wants", data->want_obj.nr);
-	jw_object_intmax(&jw, "want-refs", data->wanted_refs.nr);
+	jw_object_intmax(&jw, "want-refs", strmap_get_size(&data->wanted_refs));
 	jw_object_intmax(&jw, "depth", data->depth);
 	jw_object_intmax(&jw, "shallows", data->shallows.nr);
 	jw_object_bool(&jw, "deepen-since", data->deepen_since);
@@ -1705,17 +1708,18 @@ static int process_haves_and_send_acks(struct upload_pack_data *data)
 
 static void send_wanted_ref_info(struct upload_pack_data *data)
 {
-	const struct string_list_item *item;
+	struct hashmap_iter iter;
+	const struct strmap_entry *e;
 
-	if (!data->wanted_refs.nr)
+	if (strmap_empty(&data->wanted_refs))
 		return;
 
 	packet_writer_write(&data->writer, "wanted-refs\n");
 
-	for_each_string_list_item(item, &data->wanted_refs) {
+	strmap_for_each_entry(&data->wanted_refs, &iter, e) {
 		packet_writer_write(&data->writer, "%s %s\n",
-				    oid_to_hex(item->util),
-				    item->string);
+				    oid_to_hex(e->value),
+				    e->key);
 	}
 
 	packet_writer_delim(&data->writer);
-- 
cgit v0.10.2-6-g49f6


From 179776f9e6c4bd2f423c78fcf8b1e4cb4afc4c45 Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 28 Feb 2024 17:38:46 -0500
Subject: upload-pack: accept only a single packfile-uri line

When we see a packfile-uri line from the client, we use
string_list_split() to split it on commas and store the result in a
string_list.  A single packfile-uri line is therefore limited to storing
~64kb, the size of a pkt-line.

But we'll happily accept multiple such lines, and each line appends to
the string list, growing without bound.

In theory this could be useful, making:

  0017packfile-uris http
  0018packfile-uris https

equivalent to:

  001dpackfile-uris http,https

But the protocol documentation doesn't indicate that this should work
(and indeed, refers to this in the singular as "the following argument
can be included in the client's request"). And the client-side
implementation in fetch-pack has always sent a single line (JGit appears
to understand the line on the server side but has no client-side
implementation, and libgit2 understands neither).

If we were worried about compatibility, we could instead just put a
limit on the maximum number of values we'd accept. The current client
implementation limits itself to only two values: "http" and "https", so
something like "256" would be more than enough. But accepting only a
single line seems more in line with the protocol documentation, and
matches other parts of the protocol (e.g., we will not accept a second
"filter" line).

We'll also make this more explicit in the protocol documentation; as
above, I think this was always the intent, but there's no harm in making
it clear.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/Documentation/gitprotocol-v2.txt b/Documentation/gitprotocol-v2.txt
index 837ea61..414bc62 100644
--- a/Documentation/gitprotocol-v2.txt
+++ b/Documentation/gitprotocol-v2.txt
@@ -362,7 +362,8 @@ included in the client's request:
 If the 'packfile-uris' feature is advertised, the following argument
 can be included in the client's request as well as the potential
 addition of the 'packfile-uris' section in the server's response as
-explained below.
+explained below. Note that at most one `packfile-uris` line can be sent
+to the server.
 
     packfile-uris <comma-separated-list-of-protocols>
 	Indicates to the server that the client is willing to receive
diff --git a/upload-pack.c b/upload-pack.c
index 8b47576..2a5c526 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -1646,6 +1646,9 @@ static void process_args(struct packet_reader *request,
 		}
 
 		if (skip_prefix(arg, "packfile-uris ", &p)) {
+			if (data->uri_protocols.nr)
+				send_err_and_die(data,
+						 "multiple packfile-uris lines forbidden");
 			string_list_split(&data->uri_protocols, p, ',', -1);
 			continue;
 		}
-- 
cgit v0.10.2-6-g49f6


From 8c735b11decc96d673140b268e4a257629e1dad4 Mon Sep 17 00:00:00 2001
From: Taylor Blau <me@ttaylorr.com>
Date: Wed, 28 Feb 2024 17:38:58 -0500
Subject: upload-pack: disallow object-info capability by default

We added an "object-info" capability to the v2 upload-pack protocol in
a2ba162cda (object-info: support for retrieving object info,
2021-04-20). In the almost 3 years since, we have not added any
client-side support, and it does not appear to exist in other
implementations either (JGit understands the verb on the server side,
but not on the client side).

Since this largely unused code is accessible over the network by
default, it increases the attack surface of upload-pack. I don't know of
any particularly severe problem, but one issue is that because of the
request/response nature of the v2 protocol, it will happily read an
unbounded number of packets, adding each one to a string list (without
regard to whether they are objects we know about, duplicates, etc).

This may be something we want to improve in the long run, but in the
short term it makes sense to disable the feature entirely. We'll add a
config option as an escape hatch for anybody who wants to develop the
feature further.

A more gentle option would be to add the config option to let people
disable it manually, but leave it enabled by default. But given that
there's no client side support, that seems like the wrong balance with
security.

Disabling by default will slow adoption a bit once client-side support
does become available (there were some patches[1] in 2022, but nothing
got merged and there's been nothing since). But clients have to deal
with older servers that do not understand the option anyway (and the
capability system handles that), so it will just be a matter of servers
flipping their config at that point (and hopefully once any unbounded
allocations have been addressed).

[jk: this is a patch that GitHub has been running for several years, but
     rebased forward and with a new commit message for upstream]

[1] https://lore.kernel.org/git/20220208231911.725273-1-calvinwan@google.com/

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/Documentation/config/transfer.txt b/Documentation/config/transfer.txt
index a9cbdb8..f1ce50f 100644
--- a/Documentation/config/transfer.txt
+++ b/Documentation/config/transfer.txt
@@ -121,3 +121,7 @@ transfer.bundleURI::
 	information from the remote server (if advertised) and download
 	bundles before continuing the clone through the Git protocol.
 	Defaults to `false`.
+
+transfer.advertiseObjectInfo::
+	When `true`, the `object-info` capability is advertised by
+	servers. Defaults to false.
diff --git a/serve.c b/serve.c
index a1d7113..aa651b7 100644
--- a/serve.c
+++ b/serve.c
@@ -12,6 +12,7 @@
 #include "trace2.h"
 
 static int advertise_sid = -1;
+static int advertise_object_info = -1;
 static int client_hash_algo = GIT_HASH_SHA1;
 
 static int always_advertise(struct repository *r UNUSED,
@@ -67,6 +68,17 @@ static void session_id_receive(struct repository *r UNUSED,
 	trace2_data_string("transfer", NULL, "client-sid", client_sid);
 }
 
+static int object_info_advertise(struct repository *r, struct strbuf *value UNUSED)
+{
+	if (advertise_object_info == -1 &&
+	    repo_config_get_bool(r, "transfer.advertiseobjectinfo",
+				 &advertise_object_info)) {
+		/* disabled by default */
+		advertise_object_info = 0;
+	}
+	return advertise_object_info;
+}
+
 struct protocol_capability {
 	/*
 	 * The name of the capability.  The server uses this name when
@@ -135,7 +147,7 @@ static struct protocol_capability capabilities[] = {
 	},
 	{
 		.name = "object-info",
-		.advertise = always_advertise,
+		.advertise = object_info_advertise,
 		.command = cap_object_info,
 	},
 	{
diff --git a/t/t5555-http-smart-common.sh b/t/t5555-http-smart-common.sh
index b1cfe8b..3dcb334 100755
--- a/t/t5555-http-smart-common.sh
+++ b/t/t5555-http-smart-common.sh
@@ -131,7 +131,6 @@ test_expect_success 'git upload-pack --advertise-refs: v2' '
 	fetch=shallow wait-for-done
 	server-option
 	object-format=$(test_oid algo)
-	object-info
 	0000
 	EOF
 
diff --git a/t/t5701-git-serve.sh b/t/t5701-git-serve.sh
index 3591bc2..c48830d 100755
--- a/t/t5701-git-serve.sh
+++ b/t/t5701-git-serve.sh
@@ -20,7 +20,6 @@ test_expect_success 'test capability advertisement' '
 	fetch=shallow wait-for-done
 	server-option
 	object-format=$(test_oid algo)
-	object-info
 	EOF
 	cat >expect.trailer <<-EOF &&
 	0000
@@ -323,6 +322,8 @@ test_expect_success 'unexpected lines are not allowed in fetch request' '
 # Test the basics of object-info
 #
 test_expect_success 'basics of object-info' '
+	test_config transfer.advertiseObjectInfo true &&
+
 	test-tool pkt-line pack >in <<-EOF &&
 	command=object-info
 	object-format=$(test_oid algo)
@@ -380,4 +381,25 @@ test_expect_success 'basics of bundle-uri: dies if not enabled' '
 	test_must_be_empty out
 '
 
+test_expect_success 'object-info missing from capabilities when disabled' '
+	test_config transfer.advertiseObjectInfo false &&
+
+	GIT_TEST_SIDEBAND_ALL=0 test-tool serve-v2 \
+		--advertise-capabilities >out &&
+	test-tool pkt-line unpack <out >actual &&
+
+	! grep object.info actual
+'
+
+test_expect_success 'object-info commands rejected when disabled' '
+	test_config transfer.advertiseObjectInfo false &&
+
+	test-tool pkt-line pack >in <<-EOF &&
+	command=object-info
+	EOF
+
+	test_must_fail test-tool serve-v2 --stateless-rpc <in 2>err &&
+	grep invalid.command err
+'
+
 test_done
-- 
cgit v0.10.2-6-g49f6


From 5f64279443520922949ea89babe9bd3712c11fec Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 28 Feb 2024 17:39:00 -0500
Subject: upload-pack: always turn off save_commit_buffer

When the client sends us "want $oid" lines, we call parse_object($oid)
to get an object struct. It's important to parse the commits because we
need to traverse them in the negotiation phase. But of course we don't
need to hold on to the commit messages for each one.

We've turned off the save_commit_buffer flag in get_common_commits() for
a long time, since f0243f26f6 (git-upload-pack: More efficient usage of
the has_sha1 array, 2005-10-28). That helps with the commits we see
while actually traversing. But:

  1. That function is only used by the v0 protocol. I think the v2
     protocol's code path leaves the flag on (and thus pays the extra
     memory penalty), though I didn't measure it specifically.

  2. If the client sends us a bunch of "want" lines, that happens before
     the negotiation phase. So we'll hold on to all of those commit
     messages. Generally the number of "want" lines scales with the
     refs, not with the number of objects in the repo. But a malicious
     client could send a lot in order to waste memory.

As an example of (2), if I generate a request to fetch all commits in
git.git like this:

  pktline() {
    local msg="$*"
    printf "%04x%s\n" $((1+4+${#msg})) "$msg"
  }

  want_commits() {
    pktline command=fetch
    printf 0001
    git cat-file --batch-all-objects --batch-check='%(objectname) %(objecttype)' |
      while read oid type; do
        test "$type" = "commit" || continue
        pktline want $oid
      done
      pktline done
      printf 0000
  }

  want_commits | GIT_PROTOCOL=version=2 valgrind --tool=massif git-upload-pack . >/dev/null

before this patch upload-pack peaks at ~125MB, and after at ~35MB. The
difference is not coincidentally about the same as the sum of all commit
object sizes as computed by:

  git cat-file --batch-all-objects --batch-check='%(objecttype) %(objectsize)' |
  perl -alne '$v += $F[1] if $F[0] eq "commit"; END { print $v }'

In a larger repository like linux.git, that number is ~1GB.

In a repository with a full commit-graph file this will have no impact
(and the commit graph would save us from parsing at all, so is a much
better solution!). But it's easy to do, might help a little in
real-world cases (where even if you have a commit graph it might not be
fully up to date), and helps a lot for a worst-case malicious request.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/builtin/upload-pack.c b/builtin/upload-pack.c
index 9b021ef..15afb97 100644
--- a/builtin/upload-pack.c
+++ b/builtin/upload-pack.c
@@ -8,6 +8,7 @@
 #include "replace-object.h"
 #include "upload-pack.h"
 #include "serve.h"
+#include "commit.h"
 
 static const char * const upload_pack_usage[] = {
 	N_("git-upload-pack [--[no-]strict] [--timeout=<n>] [--stateless-rpc]\n"
@@ -37,6 +38,7 @@ int cmd_upload_pack(int argc, const char **argv, const char *prefix)
 
 	packet_trace_identity("upload-pack");
 	disable_replace_refs();
+	save_commit_buffer = 0;
 
 	argc = parse_options(argc, argv, prefix, options, upload_pack_usage, 0);
 
diff --git a/upload-pack.c b/upload-pack.c
index 2a5c526..3970bb9 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -526,8 +526,6 @@ static int get_common_commits(struct upload_pack_data *data,
 	int got_other = 0;
 	int sent_ready = 0;
 
-	save_commit_buffer = 0;
-
 	for (;;) {
 		const char *arg;
 
-- 
cgit v0.10.2-6-g49f6


From a6ca601cdf82dda85db86c12973f2ecd746cb4bd Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 28 Feb 2024 17:39:03 -0500
Subject: upload-pack: use PARSE_OBJECT_SKIP_HASH_CHECK in more places

In commit 0bc2557951 (upload-pack: skip parse-object re-hashing of
"want" objects, 2022-09-06), we optimized the parse_object() calls for
v2 "want" lines from the client so that they avoided parsing blobs, and
so that they used the commit-graph rather than parsing commit objects
from scratch.

We should extend that to two other spots:

  1. We parse "have" objects in the got_oid() function. These won't
     generally be non-commits (unlike "want" lines from a partial
     clone). But we still benefit from the use of the commit-graph.

  2. For v0, the "want" lines are parsed in receive_needs(). These are
     also less likely to be non-commits because by default they have to
     be ref tips. There are config options you might set to allow
     non-tip objects, but you'd mostly do so to support partial clones,
     and clients recent enough to support partial clone will generally
     speak v2 anyway.

So I don't expect this change to improve performance much for day-to-day
operations. But both are possible denial-of-service vectors, where an
attacker can waste our time by sending over a large number of objects to
parse (of course we may waste even more time serving a pack to them, but
we try as much as possible to optimize that in pack-objects; we should
do what we can here in upload-pack, too).

With this patch, running p5600 with GIT_TEST_PROTOCOL_VERSION=0 shows
similar results to what we saw in 0bc2557951 (which ran with the v2
protocol by default). Here are the numbers for linux.git:

  Test                          HEAD^                 HEAD
  -----------------------------------------------------------------------------
  5600.3: checkout of result    50.91(87.95+2.93)     41.75(79.00+3.18) -18.0%

Or for a more extreme (and malicious) case, we can claim to "have" every
blob in git.git over the v0 protocol:

  $ {
      echo "0032want $(git rev-parse HEAD)"
      printf 0000
      git cat-file --batch-all-objects --batch-check='%(objectname) %(objecttype)' |
      perl -alne 'print "0032have $F[0]" if $F[1] eq "blob"'
    } >input

  $ time ./git.old upload-pack . <input >/dev/null
  real	0m52.951s
  user	0m51.633s
  sys	0m1.304s

  $ time ./git.new upload-pack . <input >/dev/null
  real	0m0.261s
  user	0m0.156s
  sys	0m0.105s

(Note that these don't actually compute a pack because of the hacky
protocol usage, so those numbers are representing the raw blob-parsing
effort done by upload-pack).

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/upload-pack.c b/upload-pack.c
index 3970bb9..b721155 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -469,7 +469,8 @@ static void create_pack_file(struct upload_pack_data *pack_data,
 static int do_got_oid(struct upload_pack_data *data, const struct object_id *oid)
 {
 	int we_knew_they_have = 0;
-	struct object *o = parse_object(the_repository, oid);
+	struct object *o = parse_object_with_flags(the_repository, oid,
+						   PARSE_OBJECT_SKIP_HASH_CHECK);
 
 	if (!o)
 		die("oops (%s)", oid_to_hex(oid));
@@ -1148,7 +1149,8 @@ static void receive_needs(struct upload_pack_data *data,
 			free(client_sid);
 		}
 
-		o = parse_object(the_repository, &oid_buf);
+		o = parse_object_with_flags(the_repository, &oid_buf,
+					    PARSE_OBJECT_SKIP_HASH_CHECK);
 		if (!o) {
 			packet_writer_error(&data->writer,
 					    "upload-pack: not our ref %s",
-- 
cgit v0.10.2-6-g49f6


From 6cd05e768b7e54ca48b16fb0214df4c70aecd46c Mon Sep 17 00:00:00 2001
From: Jeff King <peff@peff.net>
Date: Wed, 28 Feb 2024 17:39:07 -0500
Subject: upload-pack: free tree buffers after parsing

When a client sends us a "want" or "have" line, we call parse_object()
to get an object struct. If the object is a tree, then the parsed state
means that tree->buffer points to the uncompressed contents of the tree.
But we don't really care about it. We only really need to parse commits
and tags; for trees and blobs, the important output is just a "struct
object" with the correct type.

But much worse, we do not ever free that tree buffer. It's not leaked in
the traditional sense, in that we still have a pointer to it from the
global object hash. But if the client requests many trees, we'll hold
all of their contents in memory at the same time.

Nobody really noticed because it's rare for clients to directly request
a tree. It might happen for a lightweight tag pointing straight at a
tree, or it might happen for a "tree:depth" partial clone filling in
missing trees.

But it's also possible for a malicious client to request a lot of trees,
causing upload-pack's memory to balloon. For example, without this
patch, requesting every tree in git.git like:

  pktline() {
    local msg="$*"
    printf "%04x%s\n" $((1+4+${#msg})) "$msg"
  }

  want_trees() {
    pktline command=fetch
    printf 0001
    git cat-file --batch-all-objects --batch-check='%(objectname) %(objecttype)' |
      while read oid type; do
        test "$type" = "tree" || continue
        pktline want $oid
      done
      pktline done
      printf 0000
  }

  want_trees | GIT_PROTOCOL=version=2 valgrind --tool=massif ./git upload-pack . >/dev/null

shows a peak heap usage of ~3.7GB. Which is just about the sum of the
sizes of all of the uncompressed trees. For linux.git, it's closer to
17GB.

So the obvious thing to do is to call free_tree_buffer() after we
realize that we've parsed a tree. We know that upload-pack won't need it
later. But let's push the logic into parse_object_with_flags(), telling
it to discard the tree buffer immediately. There are two reasons for
this. One, all of the relevant call-sites already call the with_options
variant to pass the SKIP_HASH flag. So it actually ends up as less code
than manually free-ing in each spot. And two, it enables an extra
optimization that I'll discuss below.

I've touched all of the sites that currently use SKIP_HASH in
upload-pack. That drops the peak heap of the upload-pack invocation
above from 3.7GB to ~24MB.

I've also modified the caller in get_reference(); a partial clone
benefits from its use in pack-objects for the reasons given in
0bc2557951 (upload-pack: skip parse-object re-hashing of "want" objects,
2022-09-06), where we were measuring blob requests. But note that the
results of get_reference() are used for traversing, as well; so we
really would _eventually_ use the tree contents. That makes this at
first glance a space/time tradeoff: we won't hold all of the trees in
memory at once, but we'll have to reload them each when it comes time to
traverse.

And here's where our extra optimization comes in. If the caller is not
going to immediately look at the tree contents, and it doesn't care
about checking the hash, then parse_object() can simply skip loading the
tree entirely, just like we do for blobs! And now it's not a space/time
tradeoff in get_reference() anymore. It's just a lazy-load: we're
delaying reading the tree contents until it's time to actually traverse
them one by one.

And of course for upload-pack, this optimization means we never load the
trees at all, saving lots of CPU time. Timing the "every tree from
git.git" request above shows upload-pack dropping from 32 seconds of CPU
to 19 (the remainder is mostly due to pack-objects actually sending the
pack; timing just the upload-pack portion shows we go from 13s to
~0.28s).

These are all highly gamed numbers, of course. For real-world
partial-clone requests we're saving only a small bit of time in
practice. But it does help harden upload-pack against malicious
denial-of-service attacks.

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>

diff --git a/object.c b/object.c
index 2c61e4c..e305a30 100644
--- a/object.c
+++ b/object.c
@@ -272,6 +272,7 @@ struct object *parse_object_with_flags(struct repository *r,
 				       enum parse_object_flags flags)
 {
 	int skip_hash = !!(flags & PARSE_OBJECT_SKIP_HASH_CHECK);
+	int discard_tree = !!(flags & PARSE_OBJECT_DISCARD_TREE);
 	unsigned long size;
 	enum object_type type;
 	int eaten;
@@ -299,6 +300,17 @@ struct object *parse_object_with_flags(struct repository *r,
 		return lookup_object(r, oid);
 	}
 
+	/*
+	 * If the caller does not care about the tree buffer and does not
+	 * care about checking the hash, we can simply verify that we
+	 * have the on-disk object with the correct type.
+	 */
+	if (skip_hash && discard_tree &&
+	    (!obj || obj->type == OBJ_TREE) &&
+	    oid_object_info(r, oid, NULL) == OBJ_TREE) {
+		return &lookup_tree(r, oid)->object;
+	}
+
 	buffer = repo_read_object_file(r, oid, &type, &size);
 	if (buffer) {
 		if (!skip_hash &&
@@ -312,6 +324,8 @@ struct object *parse_object_with_flags(struct repository *r,
 					  buffer, &eaten);
 		if (!eaten)
 			free(buffer);
+		if (discard_tree && type == OBJ_TREE)
+			free_tree_buffer((struct tree *)obj);
 		return obj;
 	}
 	return NULL;
diff --git a/object.h b/object.h
index 114d459..c7123ca 100644
--- a/object.h
+++ b/object.h
@@ -197,6 +197,7 @@ void *object_as_type(struct object *obj, enum object_type type, int quiet);
  */
 enum parse_object_flags {
 	PARSE_OBJECT_SKIP_HASH_CHECK = 1 << 0,
+	PARSE_OBJECT_DISCARD_TREE = 1 << 1,
 };
 struct object *parse_object(struct repository *r, const struct object_id *oid);
 struct object *parse_object_with_flags(struct repository *r,
diff --git a/revision.c b/revision.c
index 2424c9b..b10f63a 100644
--- a/revision.c
+++ b/revision.c
@@ -381,7 +381,8 @@ static struct object *get_reference(struct rev_info *revs, const char *name,
 
 	object = parse_object_with_flags(revs->repo, oid,
 					 revs->verify_objects ? 0 :
-					 PARSE_OBJECT_SKIP_HASH_CHECK);
+					 PARSE_OBJECT_SKIP_HASH_CHECK |
+					 PARSE_OBJECT_DISCARD_TREE);
 
 	if (!object) {
 		if (revs->ignore_missing)
diff --git a/upload-pack.c b/upload-pack.c
index b721155..761af4a 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -470,7 +470,8 @@ static int do_got_oid(struct upload_pack_data *data, const struct object_id *oid
 {
 	int we_knew_they_have = 0;
 	struct object *o = parse_object_with_flags(the_repository, oid,
-						   PARSE_OBJECT_SKIP_HASH_CHECK);
+						   PARSE_OBJECT_SKIP_HASH_CHECK |
+						   PARSE_OBJECT_DISCARD_TREE);
 
 	if (!o)
 		die("oops (%s)", oid_to_hex(oid));
@@ -1150,7 +1151,8 @@ static void receive_needs(struct upload_pack_data *data,
 		}
 
 		o = parse_object_with_flags(the_repository, &oid_buf,
-					    PARSE_OBJECT_SKIP_HASH_CHECK);
+					    PARSE_OBJECT_SKIP_HASH_CHECK |
+					    PARSE_OBJECT_DISCARD_TREE);
 		if (!o) {
 			packet_writer_error(&data->writer,
 					    "upload-pack: not our ref %s",
@@ -1467,7 +1469,8 @@ static int parse_want(struct packet_writer *writer, const char *line,
 			    "expected to get oid, not '%s'", line);
 
 		o = parse_object_with_flags(the_repository, &oid,
-					    PARSE_OBJECT_SKIP_HASH_CHECK);
+					    PARSE_OBJECT_SKIP_HASH_CHECK |
+					    PARSE_OBJECT_DISCARD_TREE);
 
 		if (!o) {
 			packet_writer_error(writer,
-- 
cgit v0.10.2-6-g49f6