From 66f9722882993f60be656afaae4c5c9ac92957e9 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 28 Jun 2018 18:05:00 -0400 Subject: config: turn die_on_error into caller-facing enum The config code has a die_on_error flag, which lets us emit an error() instead of dying when we see a bogus config file. But there's no way for a caller of the config code to set this: it's auto-set based on whether we're reading a file or a blob. Instead, let's add it to the config_options struct. When it's not set (or we have no options) we'll continue to fall back to the existing file/blob behavior. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/config.c b/config.c index f4a208a..a1d445e 100644 --- a/config.c +++ b/config.c @@ -31,7 +31,7 @@ struct config_source { enum config_origin_type origin_type; const char *name; const char *path; - int die_on_error; + enum config_error_action default_error_action; int linenr; int eof; struct strbuf value; @@ -809,10 +809,18 @@ static int git_parse_source(config_fn_t fn, void *data, cf->linenr, cf->name); } - if (cf->die_on_error) + switch (opts && opts->error_action ? + opts->error_action : + cf->default_error_action) { + case CONFIG_ERROR_DIE: die("%s", error_msg); - else + break; + case CONFIG_ERROR_ERROR: error_return = error("%s", error_msg); + break; + case CONFIG_ERROR_UNSET: + BUG("config error action unset"); + } free(error_msg); return error_return; @@ -1520,7 +1528,7 @@ static int do_config_from_file(config_fn_t fn, top.origin_type = origin_type; top.name = name; top.path = path; - top.die_on_error = 1; + top.default_error_action = CONFIG_ERROR_DIE; top.do_fgetc = config_file_fgetc; top.do_ungetc = config_file_ungetc; top.do_ftell = config_file_ftell; @@ -1569,7 +1577,7 @@ int git_config_from_mem(config_fn_t fn, const enum config_origin_type origin_typ top.origin_type = origin_type; top.name = name; top.path = NULL; - top.die_on_error = 0; + top.default_error_action = CONFIG_ERROR_ERROR; top.do_fgetc = config_buf_fgetc; top.do_ungetc = config_buf_ungetc; top.do_ftell = config_buf_ftell; diff --git a/config.h b/config.h index 626d465..ce75bf1 100644 --- a/config.h +++ b/config.h @@ -54,6 +54,11 @@ struct config_options { const char *git_dir; config_parser_event_fn_t event_fn; void *event_fn_data; + enum config_error_action { + CONFIG_ERROR_UNSET = 0, /* use source-specific default */ + CONFIG_ERROR_DIE, /* die() on error */ + CONFIG_ERROR_ERROR, /* error() on error, return -1 */ + } error_action; }; typedef int (*config_fn_t)(const char *, const char *, void *); -- cgit v0.10.2-6-g49f6 From 63583203df51c645aa2bf2988bbdfa3d308ef517 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 28 Jun 2018 18:05:09 -0400 Subject: config: add CONFIG_ERROR_SILENT handler We can currently die() or error(), but there's not yet any way for callers to ask us just to quietly return an error. Let's give them one. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/config.c b/config.c index a1d445e..ce02cea 100644 --- a/config.c +++ b/config.c @@ -818,6 +818,9 @@ static int git_parse_source(config_fn_t fn, void *data, case CONFIG_ERROR_ERROR: error_return = error("%s", error_msg); break; + case CONFIG_ERROR_SILENT: + error_return = -1; + break; case CONFIG_ERROR_UNSET: BUG("config error action unset"); } diff --git a/config.h b/config.h index ce75bf1..c02809f 100644 --- a/config.h +++ b/config.h @@ -58,6 +58,7 @@ struct config_options { CONFIG_ERROR_UNSET = 0, /* use source-specific default */ CONFIG_ERROR_DIE, /* die() on error */ CONFIG_ERROR_ERROR, /* error() on error, return -1 */ + CONFIG_ERROR_SILENT, /* return -1 */ } error_action; }; -- cgit v0.10.2-6-g49f6 From 4574f1aace4ca53ac0fc63a545383dab1a71fec9 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 28 Jun 2018 18:05:24 -0400 Subject: config: add options parameter to git_config_from_mem The underlying config parser knows how to handle a config_options struct, but git_config_from_mem() always passes NULL. Let's allow our callers to specify the options struct. We could add a "_with_options" variant, but since there are only a handful of callers, let's just update them to pass NULL. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/config.c b/config.c index ce02cea..60132e3 100644 --- a/config.c +++ b/config.c @@ -1569,8 +1569,10 @@ int git_config_from_file(config_fn_t fn, const char *filename, void *data) return git_config_from_file_with_options(fn, filename, data, NULL); } -int git_config_from_mem(config_fn_t fn, const enum config_origin_type origin_type, - const char *name, const char *buf, size_t len, void *data) +int git_config_from_mem(config_fn_t fn, + const enum config_origin_type origin_type, + const char *name, const char *buf, size_t len, + void *data, const struct config_options *opts) { struct config_source top; @@ -1585,7 +1587,7 @@ int git_config_from_mem(config_fn_t fn, const enum config_origin_type origin_typ top.do_ungetc = config_buf_ungetc; top.do_ftell = config_buf_ftell; - return do_config_from(&top, fn, data, NULL); + return do_config_from(&top, fn, data, opts); } int git_config_from_blob_oid(config_fn_t fn, @@ -1606,7 +1608,8 @@ int git_config_from_blob_oid(config_fn_t fn, return error("reference '%s' does not point to a blob", name); } - ret = git_config_from_mem(fn, CONFIG_ORIGIN_BLOB, name, buf, size, data); + ret = git_config_from_mem(fn, CONFIG_ORIGIN_BLOB, name, buf, size, + data, NULL); free(buf); return ret; diff --git a/config.h b/config.h index c02809f..f2063ce 100644 --- a/config.h +++ b/config.h @@ -68,8 +68,11 @@ extern int git_config_from_file(config_fn_t fn, const char *, void *); extern int git_config_from_file_with_options(config_fn_t fn, const char *, void *, const struct config_options *); -extern int git_config_from_mem(config_fn_t fn, const enum config_origin_type, - const char *name, const char *buf, size_t len, void *data); +extern int git_config_from_mem(config_fn_t fn, + const enum config_origin_type, + const char *name, + const char *buf, size_t len, + void *data, const struct config_options *opts); extern int git_config_from_blob_oid(config_fn_t fn, const char *name, const struct object_id *oid, void *data); extern void git_config_push_parameter(const char *text); diff --git a/fsck.c b/fsck.c index 0b8b20b..aa7a52c 100644 --- a/fsck.c +++ b/fsck.c @@ -1012,7 +1012,7 @@ static int fsck_blob(struct blob *blob, const char *buf, data.options = options; data.ret = 0; if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB, - ".gitmodules", buf, size, &data)) + ".gitmodules", buf, size, &data, NULL)) data.ret |= report(options, &blob->object, FSCK_MSG_GITMODULES_PARSE, "could not parse gitmodules blob"); diff --git a/submodule-config.c b/submodule-config.c index 388ef1f..2ca3272 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -561,7 +561,7 @@ static const struct submodule *config_from(struct submodule_cache *cache, parameter.gitmodules_oid = &oid; parameter.overwrite = 0; git_config_from_mem(parse_config, CONFIG_ORIGIN_SUBMODULE_BLOB, rev.buf, - config, config_size, ¶meter); + config, config_size, ¶meter, NULL); strbuf_release(&rev); free(config); -- cgit v0.10.2-6-g49f6 From de6bd9e3eab3077d49d8486f990ed2b80af7c2c9 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 28 Jun 2018 18:06:04 -0400 Subject: fsck: silence stderr when parsing .gitmodules If there's a parsing error we'll already report it via the usual fsck report() function (or not, if the user has asked to skip this object or warning type). The error message from the config parser just adds confusion. Let's suppress it. Note that we didn't test this case at all, so I've added coverage in t7415. We may end up toning down or removing this fsck check in the future. So take this test as checking what happens now with a focus on stderr, and not any ironclad guarantee that we must detect and report parse failures in the future. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/fsck.c b/fsck.c index aa7a52c..87b0e22 100644 --- a/fsck.c +++ b/fsck.c @@ -992,6 +992,7 @@ static int fsck_blob(struct blob *blob, const char *buf, unsigned long size, struct fsck_options *options) { struct fsck_gitmodules_data data; + struct config_options config_opts = { 0 }; if (!oidset_contains(&gitmodules_found, &blob->object.oid)) return 0; @@ -1011,8 +1012,9 @@ static int fsck_blob(struct blob *blob, const char *buf, data.obj = &blob->object; data.options = options; data.ret = 0; + config_opts.error_action = CONFIG_ERROR_SILENT; if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB, - ".gitmodules", buf, size, &data, NULL)) + ".gitmodules", buf, size, &data, &config_opts)) data.ret |= report(options, &blob->object, FSCK_MSG_GITMODULES_PARSE, "could not parse gitmodules blob"); diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh index b68c5f5..ba8af78 100755 --- a/t/t7415-submodule-names.sh +++ b/t/t7415-submodule-names.sh @@ -176,4 +176,19 @@ test_expect_success 'fsck detects non-blob .gitmodules' ' ) ' +test_expect_success 'fsck detects corrupt .gitmodules' ' + git init corrupt && + ( + cd corrupt && + + echo "[broken" >.gitmodules && + git add .gitmodules && + git commit -m "broken gitmodules" && + + test_must_fail git fsck 2>output && + grep gitmodulesParse output && + test_i18ngrep ! "bad config" output + ) +' + test_done -- cgit v0.10.2-6-g49f6 From 0d68764d94958aece0355d5d27f383e7d4de2e58 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 13 Jul 2018 15:39:53 -0400 Subject: fsck: split ".gitmodules too large" error from parse failure Since ed8b10f631 (fsck: check .gitmodules content, 2018-05-02), we'll report a gitmodulesParse error for two conditions: - a .gitmodules entry is not syntactically valid - a .gitmodules entry is larger than core.bigFileThreshold with the intent that we can detect malicious files and protect downstream clients. E.g., from the issue in 0383bbb901 (submodule-config: verify submodule names as paths, 2018-04-30). But these conditions are actually quite different with respect to that bug: - a syntactically invalid file cannot trigger the problem, as the victim would barf before hitting the problematic code - a too-big .gitmodules _can_ trigger the problem. Even though it is obviously silly to have a 500MB .gitmodules file, the submodule code will happily parse it if you have enough memory. So it may be reasonable to configure their severity separately. Let's add a new class for the "too large" case to allow that. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/fsck.c b/fsck.c index 87b0e22..4129935 100644 --- a/fsck.c +++ b/fsck.c @@ -63,6 +63,7 @@ static struct oidset gitmodules_done = OIDSET_INIT; FUNC(GITMODULES_MISSING, ERROR) \ FUNC(GITMODULES_BLOB, ERROR) \ FUNC(GITMODULES_PARSE, ERROR) \ + FUNC(GITMODULES_LARGE, ERROR) \ FUNC(GITMODULES_NAME, ERROR) \ FUNC(GITMODULES_SYMLINK, ERROR) \ /* warnings */ \ @@ -1005,7 +1006,7 @@ static int fsck_blob(struct blob *blob, const char *buf, * that an error. */ return report(options, &blob->object, - FSCK_MSG_GITMODULES_PARSE, + FSCK_MSG_GITMODULES_LARGE, ".gitmodules too large to parse"); } -- cgit v0.10.2-6-g49f6 From 64eb14d31093b9e3af4a35ac7c030f1cfac64895 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 13 Jul 2018 15:39:58 -0400 Subject: fsck: downgrade gitmodulesParse default to "info" We added an fsck check in ed8b10f631 (fsck: check .gitmodules content, 2018-05-02) as a defense against the vulnerability from 0383bbb901 (submodule-config: verify submodule names as paths, 2018-04-30). With the idea that up-to-date hosting sites could protect downstream unpatched clients that fetch from them. As part of that defense, we reject any ".gitmodules" entry that is not syntactically valid. The theory is that if we cannot even parse the file, we cannot accurately check it for vulnerabilities. And anybody with a broken .gitmodules file would eventually want to know anyway. But there are a few reasons this is a bad tradeoff in practice: - for this particular vulnerability, the client has to be able to parse the file. So you cannot sneak an attack through using a broken file, assuming the config parsers for the process running fsck and the eventual victim are functionally equivalent. - a broken .gitmodules file is not necessarily a problem. Our fsck check detects .gitmodules in _any_ tree, not just at the root. And the presence of a .gitmodules file does not necessarily mean it will be used; you'd have to also have gitlinks in the tree. The cgit repository, for example, has a file named .gitmodules from a pre-submodule attempt at sharing code, but does not actually have any gitlinks. - when the fsck check is used to reject a push, it's often hard to work around. The pusher may not have full control over the destination repository (e.g., if it's on a hosting server, they may need to contact the hosting site's support). And the broken .gitmodules may be too far back in history for rewriting to be feasible (again, this is an issue for cgit). So we're being unnecessarily restrictive without actually improving the security in a meaningful way. It would be more convenient to downgrade this check to "info", which means we'd still comment on it, but not reject a push. Site admins can already do this via config, but we should ship sensible defaults. There are a few counterpoints to consider in favor of keeping the check as an error: - the first point above assumes that the config parsers for the victim and the fsck process are equivalent. This is pretty true now, but as time goes on will become less so. Hosting sites are likely to upgrade their version of Git, whereas vulnerable clients will be stagnant (if they did upgrade, they'd cease to be vulnerable!). So in theory we may see drift over time between what two config parsers will accept. In practice, this is probably OK. The config format is pretty established at this point and shouldn't change a lot. And the farther we get from the announcement of the vulnerability, the less interesting this extra layer of protection becomes. I.e., it was _most_ valuable on day 0, when everybody's client was still vulnerable and hosting sites could protect people. But as time goes on and people upgrade, the population of vulnerable clients becomes smaller and smaller. - In theory this could protect us from other vulnerabilities in the future. E.g., .gitmodules are the only way for a malicious repository to feed data to the config parser, so this check could similarly protect clients from a future (to-be-found) bug there. But that's trading a hypothetical case for real-world pain today. If we do find such a bug, the hosting site would need to be updated to fix it, too. At which point we could figure out whether it's possible to detect _just_ the malicious case without hurting existing broken-but-not-evil cases. - Until recently, we hadn't made any restrictions on .gitmodules content. So now in tightening that we're hitting cases where certain things used to work, but don't anymore. There's some moderate pain now. But as time goes on, we'll see more (and more varied) cases that will make tightening harder in the future. So there's some argument for putting rules in place _now_, before users grow more cases that violate them. Again, this is trading pain now for hypothetical benefit in the future. And if we try hard in the future to keep our tightening to a minimum (i.e., rejecting true maliciousness without hurting broken-but-not-evil repos), then that reduces even the hypothetical benefit. Considering both sets of arguments, it makes sense to loosen this check for now. Note that we have to tweak the test in t7415 since fsck will no longer consider this a fatal error. But we still check that it reports the warning, and that we don't get the spurious error from the config code. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/fsck.c b/fsck.c index 4129935..69ea8d5 100644 --- a/fsck.c +++ b/fsck.c @@ -62,7 +62,6 @@ static struct oidset gitmodules_done = OIDSET_INIT; FUNC(ZERO_PADDED_DATE, ERROR) \ FUNC(GITMODULES_MISSING, ERROR) \ FUNC(GITMODULES_BLOB, ERROR) \ - FUNC(GITMODULES_PARSE, ERROR) \ FUNC(GITMODULES_LARGE, ERROR) \ FUNC(GITMODULES_NAME, ERROR) \ FUNC(GITMODULES_SYMLINK, ERROR) \ @@ -77,6 +76,7 @@ static struct oidset gitmodules_done = OIDSET_INIT; FUNC(ZERO_PADDED_FILEMODE, WARN) \ FUNC(NUL_IN_COMMIT, WARN) \ /* infos (reported as warnings, but ignored by default) */ \ + FUNC(GITMODULES_PARSE, INFO) \ FUNC(BAD_TAG_NAME, INFO) \ FUNC(MISSING_TAGGER_ENTRY, INFO) diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh index ba8af78..293e2e1 100755 --- a/t/t7415-submodule-names.sh +++ b/t/t7415-submodule-names.sh @@ -185,7 +185,7 @@ test_expect_success 'fsck detects corrupt .gitmodules' ' git add .gitmodules && git commit -m "broken gitmodules" && - test_must_fail git fsck 2>output && + git fsck 2>output && grep gitmodulesParse output && test_i18ngrep ! "bad config" output ) -- cgit v0.10.2-6-g49f6