From db5a58c1bda5b20169b9958af1e8b05ddd178b01 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 2 May 2018 16:37:09 -0400 Subject: index-pack: make fsck error message more specific If fsck reports an error, we say only "Error in object". This isn't quite as bad as it might seem, since the fsck code would have dumped some errors to stderr already. But it might help to give a little more context. The earlier output would not have even mentioned "fsck", and that may be a clue that the "fsck.*" or "*.fsckObjects" config may be relevant. Signed-off-by: Jeff King diff --git a/builtin/index-pack.c b/builtin/index-pack.c index bda84a9..d15b24e 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -853,7 +853,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, die(_("invalid %s"), type_name(type)); if (do_fsck_object && fsck_object(obj, buf, size, &fsck_options)) - die(_("Error in object")); + die(_("fsck error in packed object")); if (strict && fsck_walk(obj, NULL, &fsck_options)) die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid)); diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 6620fee..648b952 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -210,7 +210,7 @@ static int check_object(struct object *obj, int type, void *data, struct fsck_op if (!obj_buf) die("Whoops! Cannot find object '%s'", oid_to_hex(&obj->oid)); if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_options)) - die("Error in object"); + die("fsck error in packed object"); fsck_options.walk = check_object; if (fsck_walk(obj, NULL, &fsck_options)) die("Error on reachable objects of %s", oid_to_hex(&obj->oid)); -- cgit v0.10.2-6-g49f6 From ed9c3220621d634d543bc4dd998d12167dfc57d4 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 13 May 2018 12:35:37 -0400 Subject: fsck: simplify ".git" check There's no need for us to manually check for ".git"; it's a subset of the other filesystem-specific tests. Dropping it makes our code slightly shorter. More importantly, the existing code may make a reader wonder why ".GIT" is not covered here, and whether that is a bug (it isn't, as it's also covered in the filesystem-specific tests). Signed-off-by: Jeff King diff --git a/fsck.c b/fsck.c index 5c8c12d..f7be966 100644 --- a/fsck.c +++ b/fsck.c @@ -561,9 +561,7 @@ static int fsck_tree(struct tree *item, struct fsck_options *options) has_empty_name |= !*name; has_dot |= !strcmp(name, "."); has_dotdot |= !strcmp(name, ".."); - has_dotgit |= (!strcmp(name, ".git") || - is_hfs_dotgit(name) || - is_ntfs_dotgit(name)); + has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name); has_zero_pad |= *(char *)desc.buffer == '0'; if (update_tree_entry_gently(&desc)) { retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree"); -- cgit v0.10.2-6-g49f6 From 7ac4f3a007e2567f9d2492806186aa063f9a08d6 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 2 May 2018 15:44:51 -0400 Subject: fsck: actually fsck blob data Because fscking a blob has always been a noop, we didn't bother passing around the blob data. In preparation for content-level checks, let's fix up a few things: 1. The fsck_object() function just returns success for any blob. Let's a noop fsck_blob(), which we can fill in with actual logic later. 2. The fsck_loose() function in builtin/fsck.c just threw away blob content after loading it. Let's hold onto it until after we've called fsck_object(). The easiest way to do this is to just drop the parse_loose_object() helper entirely. Incidentally, this also fixes a memory leak: if we successfully loaded the object data but did not parse it, we would have left the function without freeing it. 3. When fsck_loose() loads the object data, it does so with a custom read_loose_object() helper. This function streams any blobs, regardless of size, under the assumption that we're only checking the sha1. Instead, let's actually load blobs smaller than big_file_threshold, as the normal object-reading code-paths would do. This lets us fsck small files, and a NULL return is an indication that the blob was so big that it needed to be streamed, and we can pass that information along to fsck_blob(). Signed-off-by: Jeff King diff --git a/builtin/fsck.c b/builtin/fsck.c index ef78c6c..f91d5f3 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -337,7 +337,7 @@ static void check_connectivity(void) } } -static int fsck_obj(struct object *obj) +static int fsck_obj(struct object *obj, void *buffer, unsigned long size) { int err; @@ -351,7 +351,7 @@ static int fsck_obj(struct object *obj) if (fsck_walk(obj, NULL, &fsck_obj_options)) objerror(obj, "broken links"); - err = fsck_object(obj, NULL, 0, &fsck_obj_options); + err = fsck_object(obj, buffer, size, &fsck_obj_options); if (err) goto out; @@ -396,7 +396,7 @@ static int fsck_obj_buffer(const struct object_id *oid, enum object_type type, } obj->flags &= ~(REACHABLE | SEEN); obj->flags |= HAS_OBJ; - return fsck_obj(obj); + return fsck_obj(obj, buffer, size); } static int default_refs; @@ -504,44 +504,42 @@ static void get_default_heads(void) } } -static struct object *parse_loose_object(const struct object_id *oid, - const char *path) +static int fsck_loose(const struct object_id *oid, const char *path, void *data) { struct object *obj; - void *contents; enum object_type type; unsigned long size; + void *contents; int eaten; - if (read_loose_object(path, oid->hash, &type, &size, &contents) < 0) - return NULL; + if (read_loose_object(path, oid->hash, &type, &size, &contents) < 0) { + errors_found |= ERROR_OBJECT; + error("%s: object corrupt or missing: %s", + oid_to_hex(oid), path); + return 0; /* keep checking other objects */ + } if (!contents && type != OBJ_BLOB) - die("BUG: read_loose_object streamed a non-blob"); + BUG("read_loose_object streamed a non-blob"); obj = parse_object_buffer(oid, type, size, contents, &eaten); - - if (!eaten) - free(contents); - return obj; -} - -static int fsck_loose(const struct object_id *oid, const char *path, void *data) -{ - struct object *obj = parse_loose_object(oid, path); - if (!obj) { errors_found |= ERROR_OBJECT; - error("%s: object corrupt or missing: %s", + error("%s: object could not be parsed: %s", oid_to_hex(oid), path); + if (!eaten) + free(contents); return 0; /* keep checking other objects */ } obj->flags &= ~(REACHABLE | SEEN); obj->flags |= HAS_OBJ; - if (fsck_obj(obj)) + if (fsck_obj(obj, contents, size)) errors_found |= ERROR_OBJECT; - return 0; + + if (!eaten) + free(contents); + return 0; /* keep checking other objects, even if we saw an error */ } static int fsck_cruft(const char *basename, const char *path, void *data) diff --git a/fsck.c b/fsck.c index f7be966..76a0ad3 100644 --- a/fsck.c +++ b/fsck.c @@ -899,6 +899,12 @@ static int fsck_tag(struct tag *tag, const char *data, return fsck_tag_buffer(tag, data, size, options); } +static int fsck_blob(struct blob *blob, const char *buf, + unsigned long size, struct fsck_options *options) +{ + return 0; +} + int fsck_object(struct object *obj, void *data, unsigned long size, struct fsck_options *options) { @@ -906,7 +912,7 @@ int fsck_object(struct object *obj, void *data, unsigned long size, return report(options, obj, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck"); if (obj->type == OBJ_BLOB) - return 0; + return fsck_blob((struct blob *)obj, data, size, options); if (obj->type == OBJ_TREE) return fsck_tree((struct tree *) obj, options); if (obj->type == OBJ_COMMIT) diff --git a/sha1_file.c b/sha1_file.c index cc0f43e..53f0a36 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2209,7 +2209,7 @@ int read_loose_object(const char *path, goto out; } - if (*type == OBJ_BLOB) { + if (*type == OBJ_BLOB && *size > big_file_threshold) { if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0) goto out; } else { -- cgit v0.10.2-6-g49f6 From 159e7b080bfa5d34559467cacaa79df89a01afc0 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 2 May 2018 17:20:08 -0400 Subject: fsck: detect gitmodules files In preparation for performing fsck checks on .gitmodules files, this commit plumbs in the actual detection of the files. Note that unlike most other fsck checks, this cannot be a property of a single object: we must know that the object is found at a ".gitmodules" path at the root tree of a commit. Since the fsck code only sees one object at a time, we have to mark the related objects to fit the puzzle together. When we see a commit we mark its tree as a root tree, and when we see a root tree with a .gitmodules file, we mark the corresponding blob to be checked. In an ideal world, we'd check the objects in topological order: commits followed by trees followed by blobs. In that case we can avoid ever loading an object twice, since all markings would be complete by the time we get to the marked objects. And indeed, if we are checking a single packfile, this is the order in which Git will generally write the objects. But we can't count on that: 1. git-fsck may show us the objects in arbitrary order (loose objects are fed in sha1 order, but we may also have multiple packs, and we process each pack fully in sequence). 2. The type ordering is just what git-pack-objects happens to write now. The pack format does not require a specific order, and it's possible that future versions of Git (or a custom version trying to fool official Git's fsck checks!) may order it differently. 3. We may not even be fscking all of the relevant objects at once. Consider pushing with transfer.fsckObjects, where one push adds a blob at path "foo", and then a second push adds the same blob at path ".gitmodules". The blob is not part of the second push at all, but we need to mark and check it. So in the general case, we need to make up to three passes over the objects: once to make sure we've seen all commits, then once to cover any trees we might have missed, and then a final pass to cover any .gitmodules blobs we found in the second pass. We can simplify things a bit by loosening the requirement that we find .gitmodules only at root trees. Technically a file like "subdir/.gitmodules" is not parsed by Git, but it's not unreasonable for us to declare that Git is aware of all ".gitmodules" files and make them eligible for checking. That lets us drop the root-tree requirement, which eliminates one pass entirely. And it makes our worst case much better: instead of potentially queueing every root tree to be re-examined, the worst case is that we queue each unique .gitmodules blob for a second look. This patch just adds the boilerplate to find .gitmodules files. The actual content checks will come in a subsequent commit. Signed-off-by: Jeff King diff --git a/fsck.c b/fsck.c index 76a0ad3..105b3e7 100644 --- a/fsck.c +++ b/fsck.c @@ -10,6 +10,10 @@ #include "utf8.h" #include "sha1-array.h" #include "decorate.h" +#include "oidset.h" + +static struct oidset gitmodules_found = OIDSET_INIT; +static struct oidset gitmodules_done = OIDSET_INIT; #define FSCK_FATAL -1 #define FSCK_INFO -2 @@ -44,6 +48,7 @@ FUNC(MISSING_TAG_ENTRY, ERROR) \ FUNC(MISSING_TAG_OBJECT, ERROR) \ FUNC(MISSING_TREE, ERROR) \ + FUNC(MISSING_TREE_OBJECT, ERROR) \ FUNC(MISSING_TYPE, ERROR) \ FUNC(MISSING_TYPE_ENTRY, ERROR) \ FUNC(MULTIPLE_AUTHORS, ERROR) \ @@ -51,6 +56,8 @@ FUNC(TREE_NOT_SORTED, ERROR) \ FUNC(UNKNOWN_TYPE, ERROR) \ FUNC(ZERO_PADDED_DATE, ERROR) \ + FUNC(GITMODULES_MISSING, ERROR) \ + FUNC(GITMODULES_BLOB, ERROR) \ /* warnings */ \ FUNC(BAD_FILEMODE, WARN) \ FUNC(EMPTY_NAME, WARN) \ @@ -563,6 +570,10 @@ static int fsck_tree(struct tree *item, struct fsck_options *options) has_dotdot |= !strcmp(name, ".."); has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name); has_zero_pad |= *(char *)desc.buffer == '0'; + + if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) + oidset_insert(&gitmodules_found, oid); + if (update_tree_entry_gently(&desc)) { retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree"); break; @@ -936,3 +947,50 @@ int fsck_error_function(struct fsck_options *o, error("object %s: %s", describe_object(o, obj), message); return 1; } + +int fsck_finish(struct fsck_options *options) +{ + int ret = 0; + struct oidset_iter iter; + const struct object_id *oid; + + oidset_iter_init(&gitmodules_found, &iter); + while ((oid = oidset_iter_next(&iter))) { + struct blob *blob; + enum object_type type; + unsigned long size; + char *buf; + + if (oidset_contains(&gitmodules_done, oid)) + continue; + + blob = lookup_blob(oid); + if (!blob) { + ret |= report(options, &blob->object, + FSCK_MSG_GITMODULES_BLOB, + "non-blob found at .gitmodules"); + continue; + } + + buf = read_sha1_file(oid->hash, &type, &size); + if (!buf) { + ret |= report(options, &blob->object, + FSCK_MSG_GITMODULES_MISSING, + "unable to read .gitmodules blob"); + continue; + } + + if (type == OBJ_BLOB) + ret |= fsck_blob(blob, buf, size, options); + else + ret |= report(options, &blob->object, + FSCK_MSG_GITMODULES_BLOB, + "non-blob found at .gitmodules"); + free(buf); + } + + + oidset_clear(&gitmodules_found); + oidset_clear(&gitmodules_done); + return ret; +} diff --git a/fsck.h b/fsck.h index 4525510..c3cf5e0 100644 --- a/fsck.h +++ b/fsck.h @@ -53,4 +53,11 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options); int fsck_object(struct object *obj, void *data, unsigned long size, struct fsck_options *options); +/* + * Some fsck checks are context-dependent, and may end up queued; run this + * after completing all fsck_object() calls in order to resolve any remaining + * checks. + */ +int fsck_finish(struct fsck_options *options); + #endif -- cgit v0.10.2-6-g49f6 From 2738744426c161a98c2ec494d41241a4c5eef9ef Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 14 May 2018 12:22:48 -0400 Subject: fsck: handle promisor objects in .gitmodules check If we have a tree that points to a .gitmodules blob but don't have that blob, we can't check its contents. This produces an fsck error when we encounter it. But in the case of a promisor object, this absence is expected, and we must not complain. Note that this can technically circumvent our transfer.fsckObjects check. Imagine a client fetches a tree, but not the matching .gitmodules blob. An fsck of the incoming objects will show that we don't have enough information. Later, we do fetch the actual blob. But we have no idea that it's a .gitmodules file. The only ways to get around this would be to re-scan all of the existing trees whenever new ones enter (which is expensive), or to somehow persist the gitmodules_found set between fsck runs (which is complicated). In practice, it's probably OK to ignore the problem. Any repository which has all of the objects (including the one serving the promisor packs) can perform the checks. Since promisor packs are inherently about a hierarchical topology in which clients rely on upstream repositories, those upstream repositories can protect all of their downstream clients from broken objects. Signed-off-by: Jeff King diff --git a/fsck.c b/fsck.c index 105b3e7..a91e6ce 100644 --- a/fsck.c +++ b/fsck.c @@ -11,6 +11,7 @@ #include "sha1-array.h" #include "decorate.h" #include "oidset.h" +#include "packfile.h" static struct oidset gitmodules_found = OIDSET_INIT; static struct oidset gitmodules_done = OIDSET_INIT; @@ -974,6 +975,8 @@ int fsck_finish(struct fsck_options *options) buf = read_sha1_file(oid->hash, &type, &size); if (!buf) { + if (is_promisor_object(&blob->object.oid)) + continue; ret |= report(options, &blob->object, FSCK_MSG_GITMODULES_MISSING, "unable to read .gitmodules blob"); -- cgit v0.10.2-6-g49f6 From ed8b10f631c9a71df3351d46187bf7f3fa4f9b7e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 2 May 2018 17:25:27 -0400 Subject: fsck: check .gitmodules content This patch detects and blocks submodule names which do not match the policy set forth in submodule-config. These should already be caught by the submodule code itself, but putting the check here means that newer versions of Git can protect older ones from malicious entries (e.g., a server with receive.fsckObjects will block the objects, protecting clients which fetch from it). As a side effect, this means fsck will also complain about .gitmodules files that cannot be parsed (or were larger than core.bigFileThreshold). Signed-off-by: Jeff King diff --git a/fsck.c b/fsck.c index a91e6ce..2eddfc3 100644 --- a/fsck.c +++ b/fsck.c @@ -12,6 +12,8 @@ #include "decorate.h" #include "oidset.h" #include "packfile.h" +#include "submodule-config.h" +#include "config.h" static struct oidset gitmodules_found = OIDSET_INIT; static struct oidset gitmodules_done = OIDSET_INIT; @@ -59,6 +61,8 @@ static struct oidset gitmodules_done = OIDSET_INIT; FUNC(ZERO_PADDED_DATE, ERROR) \ FUNC(GITMODULES_MISSING, ERROR) \ FUNC(GITMODULES_BLOB, ERROR) \ + FUNC(GITMODULES_PARSE, ERROR) \ + FUNC(GITMODULES_NAME, ERROR) \ /* warnings */ \ FUNC(BAD_FILEMODE, WARN) \ FUNC(EMPTY_NAME, WARN) \ @@ -911,10 +915,64 @@ static int fsck_tag(struct tag *tag, const char *data, return fsck_tag_buffer(tag, data, size, options); } +struct fsck_gitmodules_data { + struct object *obj; + struct fsck_options *options; + int ret; +}; + +static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata) +{ + struct fsck_gitmodules_data *data = vdata; + const char *subsection, *key; + int subsection_len; + char *name; + + if (parse_config_key(var, "submodule", &subsection, &subsection_len, &key) < 0 || + !subsection) + return 0; + + name = xmemdupz(subsection, subsection_len); + if (check_submodule_name(name) < 0) + data->ret |= report(data->options, data->obj, + FSCK_MSG_GITMODULES_NAME, + "disallowed submodule name: %s", + name); + free(name); + + return 0; +} + static int fsck_blob(struct blob *blob, const char *buf, unsigned long size, struct fsck_options *options) { - return 0; + struct fsck_gitmodules_data data; + + if (!oidset_contains(&gitmodules_found, &blob->object.oid)) + return 0; + oidset_insert(&gitmodules_done, &blob->object.oid); + + if (!buf) { + /* + * A missing buffer here is a sign that the caller found the + * blob too gigantic to load into memory. Let's just consider + * that an error. + */ + return report(options, &blob->object, + FSCK_MSG_GITMODULES_PARSE, + ".gitmodules too large to parse"); + } + + data.obj = &blob->object; + data.options = options; + data.ret = 0; + if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB, + ".gitmodules", buf, size, &data)) + data.ret |= report(options, &blob->object, + FSCK_MSG_GITMODULES_PARSE, + "could not parse gitmodules blob"); + + return data.ret; } int fsck_object(struct object *obj, void *data, unsigned long size, -- cgit v0.10.2-6-g49f6 From 1995b5e03e1cc97116be58cdc0502d4a23547856 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 2 May 2018 17:20:35 -0400 Subject: fsck: call fsck_finish() after fscking objects Now that the internal fsck code is capable of checking .gitmodules files, we just need to teach its callers to use the "finish" function to check any queued objects. With this, we can now catch the malicious case in t7415 with git-fsck. Signed-off-by: Jeff King diff --git a/builtin/fsck.c b/builtin/fsck.c index f91d5f3..028aba5 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -748,6 +748,9 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) } stop_progress(&progress); } + + if (fsck_finish(&fsck_obj_options)) + errors_found |= ERROR_OBJECT; } for (i = 0; i < argc; i++) { diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh index 75fa071..c8ce2f4 100755 --- a/t/t7415-submodule-names.sh +++ b/t/t7415-submodule-names.sh @@ -73,4 +73,8 @@ test_expect_success 'clone evil superproject' ' ! grep "RUNNING POST CHECKOUT" output ' +test_expect_success 'fsck detects evil superproject' ' + test_must_fail git fsck +' + test_done -- cgit v0.10.2-6-g49f6 From 6e328d6caef218db320978e3e251009135d87d0e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 4 May 2018 19:40:08 -0400 Subject: unpack-objects: call fsck_finish() after fscking objects As with the previous commit, we must call fsck's "finish" function in order to catch any queued objects for .gitmodules checks. This second pass will be able to access any incoming objects, because we will have exploded them to loose objects by now. This isn't quite ideal, because it means that bad objects may have been written to the object database (and a subsequent operation could then reference them, even if the other side doesn't send the objects again). However, this is sufficient when used with receive.fsckObjects, since those loose objects will all be placed in a temporary quarantine area that will get wiped if we find any problems. Signed-off-by: Jeff King diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 648b952..c8f1406 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -572,8 +572,11 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix) unpack_all(); the_hash_algo->update_fn(&ctx, buffer, offset); the_hash_algo->final_fn(oid.hash, &ctx); - if (strict) + if (strict) { write_rest(); + if (fsck_finish(&fsck_options)) + die(_("fsck error in pack objects")); + } if (hashcmp(fill(the_hash_algo->rawsz), oid.hash)) die("final sha1 did not match"); use(the_hash_algo->rawsz); diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh index c8ce2f4..7fdf5d6 100755 --- a/t/t7415-submodule-names.sh +++ b/t/t7415-submodule-names.sh @@ -77,4 +77,11 @@ test_expect_success 'fsck detects evil superproject' ' test_must_fail git fsck ' +test_expect_success 'transfer.fsckObjects detects evil superproject (unpack)' ' + rm -rf dst.git && + git init --bare dst.git && + git -C dst.git config transfer.fsckObjects true && + test_must_fail git push dst.git HEAD +' + test_done -- cgit v0.10.2-6-g49f6 From 73c3f0f704a91b6792e0199a3f3ab6e3a1971675 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 4 May 2018 19:45:01 -0400 Subject: index-pack: check .gitmodules files with --strict Now that the internal fsck code has all of the plumbing we need, we can start checking incoming .gitmodules files. Naively, it seems like we would just need to add a call to fsck_finish() after we've processed all of the objects. And that would be enough to cover the initial test included here. But there are two extra bits: 1. We currently don't bother calling fsck_object() at all for blobs, since it has traditionally been a noop. We'd actually catch these blobs in fsck_finish() at the end, but it's more efficient to check them when we already have the object loaded in memory. 2. The second pass done by fsck_finish() needs to access the objects, but we're actually indexing the pack in this process. In theory we could give the fsck code a special callback for accessing the in-pack data, but it's actually quite tricky: a. We don't have an internal efficient index mapping oids to packfile offsets. We only generate it on the fly as part of writing out the .idx file. b. We'd still have to reconstruct deltas, which means we'd basically have to replicate all of the reading logic in packfile.c. Instead, let's avoid running fsck_finish() until after we've written out the .idx file, and then just add it to our internal packed_git list. This does mean that the objects are "in the repository" before we finish our fsck checks. But unpack-objects already exhibits this same behavior, and it's an acceptable tradeoff here for the same reason: the quarantine mechanism means that pushes will be fully protected. In addition to a basic push test in t7415, we add a sneaky pack that reverses the usual object order in the pack, requiring that index-pack access the tree and blob during the "finish" step. This already works for unpack-objects (since it will have written out loose objects), but we'll check it with this sneaky pack for good measure. Signed-off-by: Jeff King diff --git a/builtin/index-pack.c b/builtin/index-pack.c index d15b24e..7b2f7c0 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -836,6 +836,9 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, blob->object.flags |= FLAG_CHECKED; else die(_("invalid blob object %s"), oid_to_hex(oid)); + if (do_fsck_object && + fsck_object(&blob->object, (void *)data, size, &fsck_options)) + die(_("fsck error in packed object")); } else { struct object *obj; int eaten; @@ -1477,6 +1480,9 @@ static void final(const char *final_pack_name, const char *curr_pack_name, } else chmod(final_index_name, 0444); + if (do_fsck_object) + add_packed_git(final_index_name, strlen(final_index_name), 0); + if (!from_stdin) { printf("%s\n", sha1_to_hex(hash)); } else { @@ -1818,6 +1824,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) pack_hash); else close(input_fd); + + if (do_fsck_object && fsck_finish(&fsck_options)) + die(_("fsck error in pack objects")); + free(objects); strbuf_release(&index_name_buf); if (pack_name == NULL) diff --git a/t/lib-pack.sh b/t/lib-pack.sh index 7509846..4674899 100644 --- a/t/lib-pack.sh +++ b/t/lib-pack.sh @@ -79,6 +79,18 @@ pack_obj () { ;; esac + # If it's not a delta, we can convince pack-objects to generate a pack + # with just our entry, and then strip off the header (12 bytes) and + # trailer (20 bytes). + if test -z "$2" + then + echo "$1" | git pack-objects --stdout >pack_obj.tmp && + size=$(wc -c &2 "BUG: don't know how to print $1${2:+ (from $2)}" return 1 } diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh index 7fdf5d6..51361c9 100755 --- a/t/t7415-submodule-names.sh +++ b/t/t7415-submodule-names.sh @@ -6,6 +6,7 @@ Exercise the name-checking function on a variety of names, and then give a real-world setup that confirms we catch this in practice. ' . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-pack.sh test_expect_success 'check names' ' cat >expect <<-\EOF && @@ -84,4 +85,41 @@ test_expect_success 'transfer.fsckObjects detects evil superproject (unpack)' ' test_must_fail git push dst.git HEAD ' +test_expect_success 'transfer.fsckObjects detects evil superproject (index)' ' + rm -rf dst.git && + git init --bare dst.git && + git -C dst.git config transfer.fsckObjects true && + git -C dst.git config transfer.unpackLimit 1 && + test_must_fail git push dst.git HEAD +' + +# Normally our packs contain commits followed by trees followed by blobs. This +# reverses the order, which requires backtracking to find the context of a +# blob. We'll start with a fresh gitmodules-only tree to make it simpler. +test_expect_success 'create oddly ordered pack' ' + git checkout --orphan odd && + git rm -rf --cached . && + git add .gitmodules && + git commit -m odd && + { + pack_header 3 && + pack_obj $(git rev-parse HEAD:.gitmodules) && + pack_obj $(git rev-parse HEAD^{tree}) && + pack_obj $(git rev-parse HEAD) + } >odd.pack && + pack_trailer odd.pack +' + +test_expect_success 'transfer.fsckObjects handles odd pack (unpack)' ' + rm -rf dst.git && + git init --bare dst.git && + test_must_fail git -C dst.git unpack-objects --strict Date: Fri, 4 May 2018 20:03:35 -0400 Subject: fsck: complain when .gitmodules is a symlink We've recently forbidden .gitmodules to be a symlink in verify_path(). And it's an easy way to circumvent our fsck checks for .gitmodules content. So let's complain when we see it. Signed-off-by: Jeff King diff --git a/fsck.c b/fsck.c index 2eddfc3..9339f31 100644 --- a/fsck.c +++ b/fsck.c @@ -63,6 +63,7 @@ static struct oidset gitmodules_done = OIDSET_INIT; FUNC(GITMODULES_BLOB, ERROR) \ FUNC(GITMODULES_PARSE, ERROR) \ FUNC(GITMODULES_NAME, ERROR) \ + FUNC(GITMODULES_SYMLINK, ERROR) \ /* warnings */ \ FUNC(BAD_FILEMODE, WARN) \ FUNC(EMPTY_NAME, WARN) \ @@ -576,8 +577,14 @@ static int fsck_tree(struct tree *item, struct fsck_options *options) has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name); has_zero_pad |= *(char *)desc.buffer == '0'; - if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) - oidset_insert(&gitmodules_found, oid); + if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) { + if (!S_ISLNK(mode)) + oidset_insert(&gitmodules_found, oid); + else + retval += report(options, &item->object, + FSCK_MSG_GITMODULES_SYMLINK, + ".gitmodules is a symbolic link"); + } if (update_tree_entry_gently(&desc)) { retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree"); diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh index 51361c9..a770d92 100755 --- a/t/t7415-submodule-names.sh +++ b/t/t7415-submodule-names.sh @@ -122,4 +122,33 @@ test_expect_success 'transfer.fsckObjects handles odd pack (index)' ' test_must_fail git -C dst.git index-pack --strict --stdin output && + grep gitmodulesSymlink output + ) +' + test_done -- cgit v0.10.2-6-g49f6