From 0383bbb9015898cbc79abd7b64316484d7713b44 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 30 Apr 2018 03:25:25 -0400 Subject: submodule-config: verify submodule names as paths Submodule "names" come from the untrusted .gitmodules file, but we blindly append them to $GIT_DIR/modules to create our on-disk repo paths. This means you can do bad things by putting "../" into the name (among other things). Let's sanity-check these names to avoid building a path that can be exploited. There are two main decisions: 1. What should the allowed syntax be? It's tempting to reuse verify_path(), since submodule names typically come from in-repo paths. But there are two reasons not to: a. It's technically more strict than what we need, as we really care only about breaking out of the $GIT_DIR/modules/ hierarchy. E.g., having a submodule named "foo/.git" isn't actually dangerous, and it's possible that somebody has manually given such a funny name. b. Since we'll eventually use this checking logic in fsck to prevent downstream repositories, it should be consistent across platforms. Because verify_path() relies on is_dir_sep(), it wouldn't block "foo\..\bar" on a non-Windows machine. 2. Where should we enforce it? These days most of the .gitmodules reads go through submodule-config.c, so I've put it there in the reading step. That should cover all of the C code. We also construct the name for "git submodule add" inside the git-submodule.sh script. This is probably not a big deal for security since the name is coming from the user anyway, but it would be polite to remind them if the name they pick is invalid (and we need to expose the name-checker to the shell anyway for our test scripts). This patch issues a warning when reading .gitmodules and just ignores the related config entry completely. This will generally end up producing a sensible error, as it works the same as a .gitmodules file which is missing a submodule entry (so "submodule update" will barf, but "git clone --recurse-submodules" will print an error but not abort the clone. There is one minor oddity, which is that we print the warning once per malformed config key (since that's how the config subsystem gives us the entries). So in the new test, for example, the user would see three warnings. That's OK, since the intent is that this case should never come up outside of malicious repositories (and then it might even benefit the user to see the message multiple times). Credit for finding this vulnerability and the proof of concept from which the test script was adapted goes to Etienne Stalmans. Signed-off-by: Jeff King diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index cbb17a9..b4b4d29 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -1195,6 +1195,29 @@ static int is_active(int argc, const char **argv, const char *prefix) return !is_submodule_initialized(argv[1]); } +/* + * Exit non-zero if any of the submodule names given on the command line is + * invalid. If no names are given, filter stdin to print only valid names + * (which is primarily intended for testing). + */ +static int check_name(int argc, const char **argv, const char *prefix) +{ + if (argc > 1) { + while (*++argv) { + if (check_submodule_name(*argv) < 0) + return 1; + } + } else { + struct strbuf buf = STRBUF_INIT; + while (strbuf_getline(&buf, stdin) != EOF) { + if (!check_submodule_name(buf.buf)) + printf("%s\n", buf.buf); + } + strbuf_release(&buf); + } + return 0; +} + #define SUPPORT_SUPER_PREFIX (1<<0) struct cmd_struct { @@ -1216,6 +1239,7 @@ static struct cmd_struct commands[] = { {"push-check", push_check, 0}, {"absorb-git-dirs", absorb_git_dirs, SUPPORT_SUPER_PREFIX}, {"is-active", is_active, 0}, + {"check-name", check_name, 0}, }; int cmd_submodule__helper(int argc, const char **argv, const char *prefix) diff --git a/git-submodule.sh b/git-submodule.sh index c0d0e9a..92750b9 100755 --- a/git-submodule.sh +++ b/git-submodule.sh @@ -228,6 +228,11 @@ Use -f if you really want to add it." >&2 sm_name="$sm_path" fi + if ! git submodule--helper check-name "$sm_name" + then + die "$(eval_gettext "'$sm_name' is not a valid submodule name")" + fi + # perhaps the path exists and is already a git repo, else clone it if test -e "$sm_path" then diff --git a/submodule-config.c b/submodule-config.c index 4f58491..de54351 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -163,6 +163,31 @@ static struct submodule *cache_lookup_name(struct submodule_cache *cache, return NULL; } +int check_submodule_name(const char *name) +{ + /* Disallow empty names */ + if (!*name) + return -1; + + /* + * Look for '..' as a path component. Check both '/' and '\\' as + * separators rather than is_dir_sep(), because we want the name rules + * to be consistent across platforms. + */ + goto in_component; /* always start inside component */ + while (*name) { + char c = *name++; + if (c == '/' || c == '\\') { +in_component: + if (name[0] == '.' && name[1] == '.' && + (!name[2] || name[2] == '/' || name[2] == '\\')) + return -1; + } + } + + return 0; +} + static int name_and_item_from_var(const char *var, struct strbuf *name, struct strbuf *item) { @@ -174,6 +199,12 @@ static int name_and_item_from_var(const char *var, struct strbuf *name, return 0; strbuf_add(name, subsection, subsection_len); + if (check_submodule_name(name->buf) < 0) { + warning(_("ignoring suspicious submodule name: %s"), name->buf); + strbuf_release(name); + return 0; + } + strbuf_addstr(item, key); return 1; diff --git a/submodule-config.h b/submodule-config.h index d434ecd..103cc79 100644 --- a/submodule-config.h +++ b/submodule-config.h @@ -35,4 +35,11 @@ extern int gitmodule_sha1_from_commit(const unsigned char *commit_sha1, struct strbuf *rev); extern void submodule_free(void); +/* + * Returns 0 if the name is syntactically acceptable as a submodule "name" + * (e.g., that may be found in the subsection of a .gitmodules file) and -1 + * otherwise. + */ +int check_submodule_name(const char *name); + #endif /* SUBMODULE_CONFIG_H */ diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh new file mode 100755 index 0000000..75fa071 --- /dev/null +++ b/t/t7415-submodule-names.sh @@ -0,0 +1,76 @@ +#!/bin/sh + +test_description='check handling of .. in submodule names + +Exercise the name-checking function on a variety of names, and then give a +real-world setup that confirms we catch this in practice. +' +. ./test-lib.sh + +test_expect_success 'check names' ' + cat >expect <<-\EOF && + valid + valid/with/paths + EOF + + git submodule--helper check-name >actual <<-\EOF && + valid + valid/with/paths + + ../foo + /../foo + ..\foo + \..\foo + foo/.. + foo/../ + foo\.. + foo\..\ + foo/../bar + EOF + + test_cmp expect actual +' + +test_expect_success 'create innocent subrepo' ' + git init innocent && + git -C innocent commit --allow-empty -m foo +' + +test_expect_success 'submodule add refuses invalid names' ' + test_must_fail \ + git submodule add --name ../../modules/evil "$PWD/innocent" evil +' + +test_expect_success 'add evil submodule' ' + git submodule add "$PWD/innocent" evil && + + mkdir modules && + cp -r .git/modules/evil modules && + write_script modules/evil/hooks/post-checkout <<-\EOF && + echo >&2 "RUNNING POST CHECKOUT" + EOF + + git config -f .gitmodules submodule.evil.update checkout && + git config -f .gitmodules --rename-section \ + submodule.evil submodule.../../modules/evil && + git add modules && + git commit -am evil +' + +# This step seems like it shouldn't be necessary, since the payload is +# contained entirely in the evil submodule. But due to the vagaries of the +# submodule code, checking out the evil module will fail unless ".git/modules" +# exists. Adding another submodule (with a name that sorts before "evil") is an +# easy way to make sure this is the case in the victim clone. +test_expect_success 'add other submodule' ' + git submodule add "$PWD/innocent" another-module && + git add another-module && + git commit -am another +' + +test_expect_success 'clone evil superproject' ' + git clone --recurse-submodules . victim >output 2>&1 && + ! grep "RUNNING POST CHECKOUT" output +' + +test_done -- cgit v0.10.2-6-g49f6 From 11a9f4d807a0d71dc6eff51bb87baf4ca2cccf1d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 13 May 2018 12:09:42 -0400 Subject: is_ntfs_dotgit: use a size_t for traversing string We walk through the "name" string using an int, which can wrap to a negative value and cause us to read random memory before our array (e.g., by creating a tree with a name >2GB, since "int" is still 32 bits even on most 64-bit platforms). Worse, this is easy to trigger during the fsck_tree() check, which is supposed to be protecting us from malicious garbage. Note one bit of trickiness in the existing code: we sometimes assign -1 to "len" at the end of the loop, and then rely on the "len++" in the for-loop's increment to take it back to 0. This is still legal with a size_t, since assigning -1 will turn into SIZE_MAX, which then wraps around to 0 on increment. Signed-off-by: Jeff King diff --git a/path.c b/path.c index 0349a0a..9018aa0 100644 --- a/path.c +++ b/path.c @@ -1224,7 +1224,7 @@ static int only_spaces_and_periods(const char *path, size_t len, size_t skip) int is_ntfs_dotgit(const char *name) { - int len; + size_t len; for (len = 0; ; len++) if (!name[len] || name[len] == '\\' || is_dir_sep(name[len])) { -- cgit v0.10.2-6-g49f6 From 0fc333ba20b43a8afee5023e92cb3384ff4e59a6 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 2 May 2018 15:23:45 -0400 Subject: is_hfs_dotgit: match other .git files Both verify_path() and fsck match ".git", ".GIT", and other variants specific to HFS+. Let's allow matching other special files like ".gitmodules", which we'll later use to enforce extra restrictions via verify_path() and fsck. Signed-off-by: Jeff King diff --git a/utf8.c b/utf8.c index 0c8e011..cbf22a7 100644 --- a/utf8.c +++ b/utf8.c @@ -619,28 +619,33 @@ static ucs_char_t next_hfs_char(const char **in) } } -int is_hfs_dotgit(const char *path) +static int is_hfs_dot_generic(const char *path, + const char *needle, size_t needle_len) { ucs_char_t c; c = next_hfs_char(&path); if (c != '.') return 0; - c = next_hfs_char(&path); /* * there's a great deal of other case-folding that occurs - * in HFS+, but this is enough to catch anything that will - * convert to ".git" + * in HFS+, but this is enough to catch our fairly vanilla + * hard-coded needles. */ - if (c != 'g' && c != 'G') - return 0; - c = next_hfs_char(&path); - if (c != 'i' && c != 'I') - return 0; - c = next_hfs_char(&path); - if (c != 't' && c != 'T') - return 0; + for (; needle_len > 0; needle++, needle_len--) { + c = next_hfs_char(&path); + + /* + * We know our needles contain only ASCII, so we clamp here to + * make the results of tolower() sane. + */ + if (c > 127) + return 0; + if (tolower(c) != *needle) + return 0; + } + c = next_hfs_char(&path); if (c && !is_dir_sep(c)) return 0; @@ -648,6 +653,35 @@ int is_hfs_dotgit(const char *path) return 1; } +/* + * Inline wrapper to make sure the compiler resolves strlen() on literals at + * compile time. + */ +static inline int is_hfs_dot_str(const char *path, const char *needle) +{ + return is_hfs_dot_generic(path, needle, strlen(needle)); +} + +int is_hfs_dotgit(const char *path) +{ + return is_hfs_dot_str(path, "git"); +} + +int is_hfs_dotgitmodules(const char *path) +{ + return is_hfs_dot_str(path, "gitmodules"); +} + +int is_hfs_dotgitignore(const char *path) +{ + return is_hfs_dot_str(path, "gitignore"); +} + +int is_hfs_dotgitattributes(const char *path) +{ + return is_hfs_dot_str(path, "gitattributes"); +} + const char utf8_bom[] = "\357\273\277"; int skip_utf8_bom(char **text, size_t len) diff --git a/utf8.h b/utf8.h index 6bbcf31..da19b43 100644 --- a/utf8.h +++ b/utf8.h @@ -52,8 +52,13 @@ int mbs_chrlen(const char **text, size_t *remainder_p, const char *encoding); * The path should be NUL-terminated, but we will match variants of both ".git\0" * and ".git/..." (but _not_ ".../.git"). This makes it suitable for both fsck * and verify_path(). + * + * Likewise, the is_hfs_dotgitfoo() variants look for ".gitfoo". */ int is_hfs_dotgit(const char *path); +int is_hfs_dotgitmodules(const char *path); +int is_hfs_dotgitignore(const char *path); +int is_hfs_dotgitattributes(const char *path); typedef enum { ALIGN_LEFT, -- cgit v0.10.2-6-g49f6 From e7cb0b4455c85b53aeba40f88ffddcf6d4002498 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 11 May 2018 16:03:54 +0200 Subject: is_ntfs_dotgit: match other .git files When we started to catch NTFS short names that clash with .git, we only looked for GIT~1. This is sufficient because we only ever clone into an empty directory, so .git is guaranteed to be the first subdirectory or file in that directory. However, even with a fresh clone, .gitmodules is *not* necessarily the first file to be written that would want the NTFS short name GITMOD~1: a malicious repository can add .gitmodul0000 and friends, which sorts before `.gitmodules` and is therefore checked out *first*. For that reason, we have to test not only for ~1 short names, but for others, too. It's hard to just adapt the existing checks in is_ntfs_dotgit(): since Windows 2000 (i.e., in all Windows versions still supported by Git), NTFS short names are only generated in the ~ form up to number 4. After that, a *different* prefix is used, calculated from the long file name using an undocumented, but stable algorithm. For example, the short name of .gitmodules would be GITMOD~1, but if it is taken, and all of ~2, ~3 and ~4 are taken, too, the short name GI7EBA~1 will be used. From there, collisions are handled by incrementing the number, shortening the prefix as needed (until ~9999999 is reached, in which case NTFS will not allow the file to be created). We'd also want to handle .gitignore and .gitattributes, which suffer from a similar problem, using the fall-back short names GI250A~1 and GI7D29~1, respectively. To accommodate for that, we could reimplement the hashing algorithm, but it is just safer and simpler to provide the known prefixes. This algorithm has been reverse-engineered and described at https://usn.pw/blog/gen/2015/06/09/filenames/, which is defunct but still available via https://web.archive.org/. These can be recomputed by running the following Perl script: -- snip -- use warnings; use strict; sub compute_short_name_hash ($) { my $checksum = 0; foreach (split('', $_[0])) { $checksum = ($checksum * 0x25 + ord($_)) & 0xffff; } $checksum = ($checksum * 314159269) & 0xffffffff; $checksum = 1 + (~$checksum & 0x7fffffff) if ($checksum & 0x80000000); $checksum -= (($checksum * 1152921497) >> 60) * 1000000007; return scalar reverse sprintf("%x", $checksum & 0xffff); } print compute_short_name_hash($ARGV[0]); -- snap -- E.g., running that with the argument ".gitignore" will result in "250a" (which then becomes "gi250a" in the code). Signed-off-by: Johannes Schindelin Signed-off-by: Jeff King diff --git a/cache.h b/cache.h index c1041cc..041c0fb 100644 --- a/cache.h +++ b/cache.h @@ -1188,7 +1188,15 @@ int normalize_path_copy(char *dst, const char *src); int longest_ancestor_length(const char *path, struct string_list *prefixes); char *strip_path_suffix(const char *path, const char *suffix); int daemon_avoid_alias(const char *path); -extern int is_ntfs_dotgit(const char *name); + +/* + * These functions match their is_hfs_dotgit() counterparts; see utf8.h for + * details. + */ +int is_ntfs_dotgit(const char *name); +int is_ntfs_dotgitmodules(const char *name); +int is_ntfs_dotgitignore(const char *name); +int is_ntfs_dotgitattributes(const char *name); /* * Returns true iff "str" could be confused as a command-line option when diff --git a/path.c b/path.c index 9018aa0..c720625 100644 --- a/path.c +++ b/path.c @@ -1241,6 +1241,90 @@ int is_ntfs_dotgit(const char *name) } } +static int is_ntfs_dot_generic(const char *name, + const char *dotgit_name, + size_t len, + const char *dotgit_ntfs_shortname_prefix) +{ + int saw_tilde; + size_t i; + + if ((name[0] == '.' && !strncasecmp(name + 1, dotgit_name, len))) { + i = len + 1; +only_spaces_and_periods: + for (;;) { + char c = name[i++]; + if (!c) + return 1; + if (c != ' ' && c != '.') + return 0; + } + } + + /* + * Is it a regular NTFS short name, i.e. shortened to 6 characters, + * followed by ~1, ... ~4? + */ + if (!strncasecmp(name, dotgit_name, 6) && name[6] == '~' && + name[7] >= '1' && name[7] <= '4') { + i = 8; + goto only_spaces_and_periods; + } + + /* + * Is it a fall-back NTFS short name (for details, see + * https://en.wikipedia.org/wiki/8.3_filename? + */ + for (i = 0, saw_tilde = 0; i < 8; i++) + if (name[i] == '\0') + return 0; + else if (saw_tilde) { + if (name[i] < '0' || name[i] > '9') + return 0; + } else if (name[i] == '~') { + if (name[++i] < '1' || name[i] > '9') + return 0; + saw_tilde = 1; + } else if (i >= 6) + return 0; + else if (name[i] < 0) { + /* + * We know our needles contain only ASCII, so we clamp + * here to make the results of tolower() sane. + */ + return 0; + } else if (tolower(name[i]) != dotgit_ntfs_shortname_prefix[i]) + return 0; + + goto only_spaces_and_periods; +} + +/* + * Inline helper to make sure compiler resolves strlen() on literals at + * compile time. + */ +static inline int is_ntfs_dot_str(const char *name, const char *dotgit_name, + const char *dotgit_ntfs_shortname_prefix) +{ + return is_ntfs_dot_generic(name, dotgit_name, strlen(dotgit_name), + dotgit_ntfs_shortname_prefix); +} + +int is_ntfs_dotgitmodules(const char *name) +{ + return is_ntfs_dot_str(name, "gitmodules", "gi7eba"); +} + +int is_ntfs_dotgitignore(const char *name) +{ + return is_ntfs_dot_str(name, "gitignore", "gi250a"); +} + +int is_ntfs_dotgitattributes(const char *name) +{ + return is_ntfs_dot_str(name, "gitattributes", "gi7d29"); +} + int looks_like_command_line_option(const char *str) { return str && str[0] == '-'; -- cgit v0.10.2-6-g49f6 From dc2d9ba3187fcd0ca8eeab9aa9ddef70cf8627a6 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Sat, 12 May 2018 22:16:51 +0200 Subject: is_{hfs,ntfs}_dotgitmodules: add tests This tests primarily for NTFS issues, but also adds one example of an HFS+ issue. Thanks go to Congyi Wu for coming up with the list of examples where NTFS would possibly equate the filename with `.gitmodules`. Signed-off-by: Johannes Schindelin Signed-off-by: Jeff King diff --git a/t/helper/test-path-utils.c b/t/helper/test-path-utils.c index 1ebe0f7..77517a4 100644 --- a/t/helper/test-path-utils.c +++ b/t/helper/test-path-utils.c @@ -1,5 +1,6 @@ #include "cache.h" #include "string-list.h" +#include "utf8.h" /* * A "string_list_each_func_t" function that normalizes an entry from @@ -156,6 +157,11 @@ static struct test_data dirname_data[] = { { NULL, NULL } }; +static int is_dotgitmodules(const char *path) +{ + return is_hfs_dotgitmodules(path) || is_ntfs_dotgitmodules(path); +} + int cmd_main(int argc, const char **argv) { if (argc == 3 && !strcmp(argv[1], "normalize_path_copy")) { @@ -256,6 +262,20 @@ int cmd_main(int argc, const char **argv) if (argc == 2 && !strcmp(argv[1], "dirname")) return test_function(dirname_data, dirname, argv[1]); + if (argc > 2 && !strcmp(argv[1], "is_dotgitmodules")) { + int res = 0, expect = 1, i; + for (i = 2; i < argc; i++) + if (!strcmp("--not", argv[i])) + expect = !expect; + else if (expect != is_dotgitmodules(argv[i])) + res = error("'%s' is %s.gitmodules", argv[i], + expect ? "not " : ""); + else + fprintf(stderr, "ok: '%s' is %s.gitmodules\n", + argv[i], expect ? "" : "not "); + return !!res; + } + fprintf(stderr, "%s: unknown function name: %s\n", argv[0], argv[1] ? argv[1] : "(there was none)"); return 1; diff --git a/t/t0060-path-utils.sh b/t/t0060-path-utils.sh index 7ea2bb5..3f3357e 100755 --- a/t/t0060-path-utils.sh +++ b/t/t0060-path-utils.sh @@ -349,4 +349,90 @@ test_submodule_relative_url "(null)" "ssh://hostname:22/repo" "../subrepo" "ssh: test_submodule_relative_url "(null)" "user@host:path/to/repo" "../subrepo" "user@host:path/to/subrepo" test_submodule_relative_url "(null)" "user@host:repo" "../subrepo" "user@host:subrepo" +test_expect_success 'match .gitmodules' ' + test-path-utils is_dotgitmodules \ + .gitmodules \ + \ + .git${u200c}modules \ + \ + .Gitmodules \ + .gitmoduleS \ + \ + ".gitmodules " \ + ".gitmodules." \ + ".gitmodules " \ + ".gitmodules. " \ + ".gitmodules ." \ + ".gitmodules.." \ + ".gitmodules " \ + ".gitmodules. " \ + ".gitmodules . " \ + ".gitmodules ." \ + \ + ".Gitmodules " \ + ".Gitmodules." \ + ".Gitmodules " \ + ".Gitmodules. " \ + ".Gitmodules ." \ + ".Gitmodules.." \ + ".Gitmodules " \ + ".Gitmodules. " \ + ".Gitmodules . " \ + ".Gitmodules ." \ + \ + GITMOD~1 \ + gitmod~1 \ + GITMOD~2 \ + gitmod~3 \ + GITMOD~4 \ + \ + "GITMOD~1 " \ + "gitmod~2." \ + "GITMOD~3 " \ + "gitmod~4. " \ + "GITMOD~1 ." \ + "gitmod~2 " \ + "GITMOD~3. " \ + "gitmod~4 . " \ + \ + GI7EBA~1 \ + gi7eba~9 \ + \ + GI7EB~10 \ + GI7EB~11 \ + GI7EB~99 \ + GI7EB~10 \ + GI7E~100 \ + GI7E~101 \ + GI7E~999 \ + ~1000000 \ + ~9999999 \ + \ + --not \ + ".gitmodules x" \ + ".gitmodules .x" \ + \ + " .gitmodules" \ + \ + ..gitmodules \ + \ + gitmodules \ + \ + .gitmodule \ + \ + ".gitmodules x " \ + ".gitmodules .x" \ + \ + GI7EBA~ \ + GI7EBA~0 \ + GI7EBA~~1 \ + GI7EBA~X \ + Gx7EBA~1 \ + GI7EBX~1 \ + \ + GI7EB~1 \ + GI7EB~01 \ + GI7EB~1X +' + test_done -- cgit v0.10.2-6-g49f6 From 41a80924aec0e94309786837b6f954a3b3f19b71 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 13 May 2018 12:57:14 -0400 Subject: skip_prefix: add case-insensitive variant We have the convenient skip_prefix() helper, but if you want to do case-insensitive matching, you're stuck doing it by hand. We could add an extra parameter to the function to let callers ask for this, but the function is small and somewhat performance-critical. Let's just re-implement it for the case-insensitive version. Signed-off-by: Jeff King diff --git a/git-compat-util.h b/git-compat-util.h index 59866d7..4be15e5 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -956,6 +956,23 @@ static inline int sane_iscase(int x, int is_lower) return (x & 0x20) == 0; } +/* + * Like skip_prefix, but compare case-insensitively. Note that the comparison + * is done via tolower(), so it is strictly ASCII (no multi-byte characters or + * locale-specific conversions). + */ +static inline int skip_iprefix(const char *str, const char *prefix, + const char **out) +{ + do { + if (!*prefix) { + *out = str; + return 1; + } + } while (tolower(*str++) == tolower(*prefix++)); + return 0; +} + static inline int strtoul_ui(char const *s, int base, unsigned int *result) { unsigned long ul; -- cgit v0.10.2-6-g49f6 From e19e5e66d691bdeeeb5e0ed2ffcecdd7666b0d7b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 13 May 2018 13:00:23 -0400 Subject: verify_path: drop clever fallthrough We check ".git" and ".." in the same switch statement, and fall through the cases to share the end-of-component check. While this saves us a line or two, it makes modifying the function much harder. Let's just write it out. Signed-off-by: Jeff King diff --git a/read-cache.c b/read-cache.c index 6238df4..5c5dfc6 100644 --- a/read-cache.c +++ b/read-cache.c @@ -810,8 +810,7 @@ static int verify_dotfile(const char *rest) switch (*rest) { /* - * ".git" followed by NUL or slash is bad. This - * shares the path end test with the ".." case. + * ".git" followed by NUL or slash is bad. */ case 'g': case 'G': @@ -819,8 +818,9 @@ static int verify_dotfile(const char *rest) break; if (rest[2] != 't' && rest[2] != 'T') break; - rest += 2; - /* fallthrough */ + if (rest[3] == '\0' || is_dir_sep(rest[3])) + return 0; + break; case '.': if (rest[1] == '\0' || is_dir_sep(rest[1])) return 0; -- cgit v0.10.2-6-g49f6 From 641084b618ddbe099f0992161988c3e479ae848b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 15 May 2018 09:56:50 -0400 Subject: verify_dotfile: mention case-insensitivity in comment We're more restrictive than we need to be in matching ".GIT" on case-sensitive filesystems; let's make a note that this is intentional. Signed-off-by: Jeff King diff --git a/read-cache.c b/read-cache.c index 5c5dfc6..333e0c5 100644 --- a/read-cache.c +++ b/read-cache.c @@ -810,7 +810,10 @@ static int verify_dotfile(const char *rest) switch (*rest) { /* - * ".git" followed by NUL or slash is bad. + * ".git" followed by NUL or slash is bad. Note that we match + * case-insensitively here, even if ignore_case is not set. + * This outlaws ".GIT" everywhere out of an abundance of caution, + * since there's really no good reason to allow it. */ case 'g': case 'G': -- cgit v0.10.2-6-g49f6 From eb12dd0c764d2b71bebd5ffffb7379a3835253ae Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 14 May 2018 11:00:56 -0400 Subject: update-index: stat updated files earlier In the update_one(), we check verify_path() on the proposed path before doing anything else. In preparation for having verify_path() look at the file mode, let's stat the file earlier, so we can check the mode accurately. This is made a bit trickier by the fact that this function only does an lstat in a few code paths (the ones that flow down through process_path()). So we can speculatively do the lstat() here and pass the results down, and just use a dummy mode for cases where we won't actually be updating the index from the filesystem. Signed-off-by: Jeff King diff --git a/builtin/update-index.c b/builtin/update-index.c index ebfc09f..8d152de 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -358,10 +358,9 @@ static int process_directory(const char *path, int len, struct stat *st) return error("%s: is a directory - add files inside instead", path); } -static int process_path(const char *path) +static int process_path(const char *path, struct stat *st, int stat_errno) { int pos, len; - struct stat st; const struct cache_entry *ce; len = strlen(path); @@ -385,13 +384,13 @@ static int process_path(const char *path) * First things first: get the stat information, to decide * what to do about the pathname! */ - if (lstat(path, &st) < 0) - return process_lstat_error(path, errno); + if (stat_errno) + return process_lstat_error(path, stat_errno); - if (S_ISDIR(st.st_mode)) - return process_directory(path, len, &st); + if (S_ISDIR(st->st_mode)) + return process_directory(path, len, st); - return add_one_path(ce, path, len, &st); + return add_one_path(ce, path, len, st); } static int add_cacheinfo(unsigned int mode, const struct object_id *oid, @@ -443,6 +442,16 @@ static void chmod_path(char flip, const char *path) static void update_one(const char *path) { + int stat_errno = 0; + struct stat st; + + if (mark_valid_only || mark_skip_worktree_only || force_remove) + st.st_mode = 0; + else if (lstat(path, &st) < 0) { + st.st_mode = 0; + stat_errno = errno; + } /* else stat is valid */ + if (!verify_path(path)) { fprintf(stderr, "Ignoring path %s\n", path); return; @@ -464,7 +473,7 @@ static void update_one(const char *path) report("remove '%s'", path); return; } - if (process_path(path)) + if (process_path(path, &st, stat_errno)) die("Unable to process path %s", path); report("add '%s'", path); } -- cgit v0.10.2-6-g49f6 From 10ecfa76491e4923988337b2e2243b05376b40de Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 4 May 2018 20:03:35 -0400 Subject: verify_path: disallow symlinks in .gitmodules There are a few reasons it's not a good idea to make .gitmodules a symlink, including: 1. It won't be portable to systems without symlinks. 2. It may behave inconsistently, since Git may look at this file in the index or a tree without bothering to resolve any symbolic links. We don't do this _yet_, but the config infrastructure is there and it's planned for the future. With some clever code, we could make (2) work. And some people may not care about (1) if they only work on one platform. But there are a few security reasons to simply disallow it: a. A symlinked .gitmodules file may circumvent any fsck checks of the content. b. Git may read and write from the on-disk file without sanity checking the symlink target. So for example, if you link ".gitmodules" to "../oops" and run "git submodule add", we'll write to the file "oops" outside the repository. Again, both of those are problems that _could_ be solved with sufficient code, but given the complications in (1) and (2), we're better off just outlawing it explicitly. Note the slightly tricky call to verify_path() in update-index's update_one(). There we may not have a mode if we're not updating from the filesystem (e.g., we might just be removing the file). Passing "0" as the mode there works fine; since it's not a symlink, we'll just skip the extra checks. Signed-off-by: Jeff King diff --git a/apply.c b/apply.c index f8bf0bd..b96d375 100644 --- a/apply.c +++ b/apply.c @@ -3867,9 +3867,9 @@ static int check_unsafe_path(struct patch *patch) if (!patch->is_delete) new_name = patch->new_name; - if (old_name && !verify_path(old_name)) + if (old_name && !verify_path(old_name, patch->old_mode)) return error(_("invalid path '%s'"), old_name); - if (new_name && !verify_path(new_name)) + if (new_name && !verify_path(new_name, patch->new_mode)) return error(_("invalid path '%s'"), new_name); return 0; } diff --git a/builtin/update-index.c b/builtin/update-index.c index 8d152de..1921659 100644 --- a/builtin/update-index.c +++ b/builtin/update-index.c @@ -399,7 +399,7 @@ static int add_cacheinfo(unsigned int mode, const struct object_id *oid, int size, len, option; struct cache_entry *ce; - if (!verify_path(path)) + if (!verify_path(path, mode)) return error("Invalid path '%s'", path); len = strlen(path); @@ -452,7 +452,7 @@ static void update_one(const char *path) stat_errno = errno; } /* else stat is valid */ - if (!verify_path(path)) { + if (!verify_path(path, st.st_mode)) { fprintf(stderr, "Ignoring path %s\n", path); return; } @@ -543,7 +543,7 @@ static void read_index_info(int nul_term_line) path_name = uq.buf; } - if (!verify_path(path_name)) { + if (!verify_path(path_name, mode)) { fprintf(stderr, "Ignoring path %s\n", path_name); continue; } diff --git a/cache.h b/cache.h index 041c0fb..5a44f79 100644 --- a/cache.h +++ b/cache.h @@ -598,7 +598,7 @@ extern int read_index_unmerged(struct index_state *); extern int write_locked_index(struct index_state *, struct lock_file *lock, unsigned flags); extern int discard_index(struct index_state *); extern int unmerged_index(const struct index_state *); -extern int verify_path(const char *path); +extern int verify_path(const char *path, unsigned mode); extern int strcmp_offset(const char *s1, const char *s2, size_t *first_change); extern int index_dir_exists(struct index_state *istate, const char *name, int namelen); extern void adjust_dirname_case(struct index_state *istate, char *name); diff --git a/read-cache.c b/read-cache.c index 333e0c5..aa99f1f 100644 --- a/read-cache.c +++ b/read-cache.c @@ -732,7 +732,7 @@ struct cache_entry *make_cache_entry(unsigned int mode, int size, len; struct cache_entry *ce, *ret; - if (!verify_path(path)) { + if (!verify_path(path, mode)) { error("Invalid path '%s'", path); return NULL; } @@ -796,7 +796,7 @@ int ce_same_name(const struct cache_entry *a, const struct cache_entry *b) * Also, we don't want double slashes or slashes at the * end that can make pathnames ambiguous. */ -static int verify_dotfile(const char *rest) +static int verify_dotfile(const char *rest, unsigned mode) { /* * The first character was '.', but that @@ -814,6 +814,9 @@ static int verify_dotfile(const char *rest) * case-insensitively here, even if ignore_case is not set. * This outlaws ".GIT" everywhere out of an abundance of caution, * since there's really no good reason to allow it. + * + * Once we've seen ".git", we can also find ".gitmodules", etc (also + * case-insensitively). */ case 'g': case 'G': @@ -823,6 +826,12 @@ static int verify_dotfile(const char *rest) break; if (rest[3] == '\0' || is_dir_sep(rest[3])) return 0; + if (S_ISLNK(mode)) { + rest += 3; + if (skip_iprefix(rest, "modules", &rest) && + (*rest == '\0' || is_dir_sep(*rest))) + return 0; + } break; case '.': if (rest[1] == '\0' || is_dir_sep(rest[1])) @@ -831,7 +840,7 @@ static int verify_dotfile(const char *rest) return 1; } -int verify_path(const char *path) +int verify_path(const char *path, unsigned mode) { char c; @@ -844,12 +853,25 @@ int verify_path(const char *path) return 1; if (is_dir_sep(c)) { inside: - if (protect_hfs && is_hfs_dotgit(path)) - return 0; - if (protect_ntfs && is_ntfs_dotgit(path)) - return 0; + if (protect_hfs) { + if (is_hfs_dotgit(path)) + return 0; + if (S_ISLNK(mode)) { + if (is_hfs_dotgitmodules(path)) + return 0; + } + } + if (protect_ntfs) { + if (is_ntfs_dotgit(path)) + return 0; + if (S_ISLNK(mode)) { + if (is_ntfs_dotgitmodules(path)) + return 0; + } + } + c = *path++; - if ((c == '.' && !verify_dotfile(path)) || + if ((c == '.' && !verify_dotfile(path, mode)) || is_dir_sep(c) || c == '\0') return 0; } @@ -1166,7 +1188,7 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e if (!ok_to_add) return -1; - if (!verify_path(ce->name)) + if (!verify_path(ce->name, ce->ce_mode)) return error("Invalid path '%s'", ce->name); if (!skip_df_check && -- cgit v0.10.2-6-g49f6 From db5a58c1bda5b20169b9958af1e8b05ddd178b01 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 2 May 2018 16:37:09 -0400 Subject: index-pack: make fsck error message more specific If fsck reports an error, we say only "Error in object". This isn't quite as bad as it might seem, since the fsck code would have dumped some errors to stderr already. But it might help to give a little more context. The earlier output would not have even mentioned "fsck", and that may be a clue that the "fsck.*" or "*.fsckObjects" config may be relevant. Signed-off-by: Jeff King diff --git a/builtin/index-pack.c b/builtin/index-pack.c index bda84a9..d15b24e 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -853,7 +853,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, die(_("invalid %s"), type_name(type)); if (do_fsck_object && fsck_object(obj, buf, size, &fsck_options)) - die(_("Error in object")); + die(_("fsck error in packed object")); if (strict && fsck_walk(obj, NULL, &fsck_options)) die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid)); diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 6620fee..648b952 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -210,7 +210,7 @@ static int check_object(struct object *obj, int type, void *data, struct fsck_op if (!obj_buf) die("Whoops! Cannot find object '%s'", oid_to_hex(&obj->oid)); if (fsck_object(obj, obj_buf->buffer, obj_buf->size, &fsck_options)) - die("Error in object"); + die("fsck error in packed object"); fsck_options.walk = check_object; if (fsck_walk(obj, NULL, &fsck_options)) die("Error on reachable objects of %s", oid_to_hex(&obj->oid)); -- cgit v0.10.2-6-g49f6 From ed9c3220621d634d543bc4dd998d12167dfc57d4 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Sun, 13 May 2018 12:35:37 -0400 Subject: fsck: simplify ".git" check There's no need for us to manually check for ".git"; it's a subset of the other filesystem-specific tests. Dropping it makes our code slightly shorter. More importantly, the existing code may make a reader wonder why ".GIT" is not covered here, and whether that is a bug (it isn't, as it's also covered in the filesystem-specific tests). Signed-off-by: Jeff King diff --git a/fsck.c b/fsck.c index 5c8c12d..f7be966 100644 --- a/fsck.c +++ b/fsck.c @@ -561,9 +561,7 @@ static int fsck_tree(struct tree *item, struct fsck_options *options) has_empty_name |= !*name; has_dot |= !strcmp(name, "."); has_dotdot |= !strcmp(name, ".."); - has_dotgit |= (!strcmp(name, ".git") || - is_hfs_dotgit(name) || - is_ntfs_dotgit(name)); + has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name); has_zero_pad |= *(char *)desc.buffer == '0'; if (update_tree_entry_gently(&desc)) { retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree"); -- cgit v0.10.2-6-g49f6 From 7ac4f3a007e2567f9d2492806186aa063f9a08d6 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 2 May 2018 15:44:51 -0400 Subject: fsck: actually fsck blob data Because fscking a blob has always been a noop, we didn't bother passing around the blob data. In preparation for content-level checks, let's fix up a few things: 1. The fsck_object() function just returns success for any blob. Let's a noop fsck_blob(), which we can fill in with actual logic later. 2. The fsck_loose() function in builtin/fsck.c just threw away blob content after loading it. Let's hold onto it until after we've called fsck_object(). The easiest way to do this is to just drop the parse_loose_object() helper entirely. Incidentally, this also fixes a memory leak: if we successfully loaded the object data but did not parse it, we would have left the function without freeing it. 3. When fsck_loose() loads the object data, it does so with a custom read_loose_object() helper. This function streams any blobs, regardless of size, under the assumption that we're only checking the sha1. Instead, let's actually load blobs smaller than big_file_threshold, as the normal object-reading code-paths would do. This lets us fsck small files, and a NULL return is an indication that the blob was so big that it needed to be streamed, and we can pass that information along to fsck_blob(). Signed-off-by: Jeff King diff --git a/builtin/fsck.c b/builtin/fsck.c index ef78c6c..f91d5f3 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -337,7 +337,7 @@ static void check_connectivity(void) } } -static int fsck_obj(struct object *obj) +static int fsck_obj(struct object *obj, void *buffer, unsigned long size) { int err; @@ -351,7 +351,7 @@ static int fsck_obj(struct object *obj) if (fsck_walk(obj, NULL, &fsck_obj_options)) objerror(obj, "broken links"); - err = fsck_object(obj, NULL, 0, &fsck_obj_options); + err = fsck_object(obj, buffer, size, &fsck_obj_options); if (err) goto out; @@ -396,7 +396,7 @@ static int fsck_obj_buffer(const struct object_id *oid, enum object_type type, } obj->flags &= ~(REACHABLE | SEEN); obj->flags |= HAS_OBJ; - return fsck_obj(obj); + return fsck_obj(obj, buffer, size); } static int default_refs; @@ -504,44 +504,42 @@ static void get_default_heads(void) } } -static struct object *parse_loose_object(const struct object_id *oid, - const char *path) +static int fsck_loose(const struct object_id *oid, const char *path, void *data) { struct object *obj; - void *contents; enum object_type type; unsigned long size; + void *contents; int eaten; - if (read_loose_object(path, oid->hash, &type, &size, &contents) < 0) - return NULL; + if (read_loose_object(path, oid->hash, &type, &size, &contents) < 0) { + errors_found |= ERROR_OBJECT; + error("%s: object corrupt or missing: %s", + oid_to_hex(oid), path); + return 0; /* keep checking other objects */ + } if (!contents && type != OBJ_BLOB) - die("BUG: read_loose_object streamed a non-blob"); + BUG("read_loose_object streamed a non-blob"); obj = parse_object_buffer(oid, type, size, contents, &eaten); - - if (!eaten) - free(contents); - return obj; -} - -static int fsck_loose(const struct object_id *oid, const char *path, void *data) -{ - struct object *obj = parse_loose_object(oid, path); - if (!obj) { errors_found |= ERROR_OBJECT; - error("%s: object corrupt or missing: %s", + error("%s: object could not be parsed: %s", oid_to_hex(oid), path); + if (!eaten) + free(contents); return 0; /* keep checking other objects */ } obj->flags &= ~(REACHABLE | SEEN); obj->flags |= HAS_OBJ; - if (fsck_obj(obj)) + if (fsck_obj(obj, contents, size)) errors_found |= ERROR_OBJECT; - return 0; + + if (!eaten) + free(contents); + return 0; /* keep checking other objects, even if we saw an error */ } static int fsck_cruft(const char *basename, const char *path, void *data) diff --git a/fsck.c b/fsck.c index f7be966..76a0ad3 100644 --- a/fsck.c +++ b/fsck.c @@ -899,6 +899,12 @@ static int fsck_tag(struct tag *tag, const char *data, return fsck_tag_buffer(tag, data, size, options); } +static int fsck_blob(struct blob *blob, const char *buf, + unsigned long size, struct fsck_options *options) +{ + return 0; +} + int fsck_object(struct object *obj, void *data, unsigned long size, struct fsck_options *options) { @@ -906,7 +912,7 @@ int fsck_object(struct object *obj, void *data, unsigned long size, return report(options, obj, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck"); if (obj->type == OBJ_BLOB) - return 0; + return fsck_blob((struct blob *)obj, data, size, options); if (obj->type == OBJ_TREE) return fsck_tree((struct tree *) obj, options); if (obj->type == OBJ_COMMIT) diff --git a/sha1_file.c b/sha1_file.c index cc0f43e..53f0a36 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -2209,7 +2209,7 @@ int read_loose_object(const char *path, goto out; } - if (*type == OBJ_BLOB) { + if (*type == OBJ_BLOB && *size > big_file_threshold) { if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0) goto out; } else { -- cgit v0.10.2-6-g49f6 From 159e7b080bfa5d34559467cacaa79df89a01afc0 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 2 May 2018 17:20:08 -0400 Subject: fsck: detect gitmodules files In preparation for performing fsck checks on .gitmodules files, this commit plumbs in the actual detection of the files. Note that unlike most other fsck checks, this cannot be a property of a single object: we must know that the object is found at a ".gitmodules" path at the root tree of a commit. Since the fsck code only sees one object at a time, we have to mark the related objects to fit the puzzle together. When we see a commit we mark its tree as a root tree, and when we see a root tree with a .gitmodules file, we mark the corresponding blob to be checked. In an ideal world, we'd check the objects in topological order: commits followed by trees followed by blobs. In that case we can avoid ever loading an object twice, since all markings would be complete by the time we get to the marked objects. And indeed, if we are checking a single packfile, this is the order in which Git will generally write the objects. But we can't count on that: 1. git-fsck may show us the objects in arbitrary order (loose objects are fed in sha1 order, but we may also have multiple packs, and we process each pack fully in sequence). 2. The type ordering is just what git-pack-objects happens to write now. The pack format does not require a specific order, and it's possible that future versions of Git (or a custom version trying to fool official Git's fsck checks!) may order it differently. 3. We may not even be fscking all of the relevant objects at once. Consider pushing with transfer.fsckObjects, where one push adds a blob at path "foo", and then a second push adds the same blob at path ".gitmodules". The blob is not part of the second push at all, but we need to mark and check it. So in the general case, we need to make up to three passes over the objects: once to make sure we've seen all commits, then once to cover any trees we might have missed, and then a final pass to cover any .gitmodules blobs we found in the second pass. We can simplify things a bit by loosening the requirement that we find .gitmodules only at root trees. Technically a file like "subdir/.gitmodules" is not parsed by Git, but it's not unreasonable for us to declare that Git is aware of all ".gitmodules" files and make them eligible for checking. That lets us drop the root-tree requirement, which eliminates one pass entirely. And it makes our worst case much better: instead of potentially queueing every root tree to be re-examined, the worst case is that we queue each unique .gitmodules blob for a second look. This patch just adds the boilerplate to find .gitmodules files. The actual content checks will come in a subsequent commit. Signed-off-by: Jeff King diff --git a/fsck.c b/fsck.c index 76a0ad3..105b3e7 100644 --- a/fsck.c +++ b/fsck.c @@ -10,6 +10,10 @@ #include "utf8.h" #include "sha1-array.h" #include "decorate.h" +#include "oidset.h" + +static struct oidset gitmodules_found = OIDSET_INIT; +static struct oidset gitmodules_done = OIDSET_INIT; #define FSCK_FATAL -1 #define FSCK_INFO -2 @@ -44,6 +48,7 @@ FUNC(MISSING_TAG_ENTRY, ERROR) \ FUNC(MISSING_TAG_OBJECT, ERROR) \ FUNC(MISSING_TREE, ERROR) \ + FUNC(MISSING_TREE_OBJECT, ERROR) \ FUNC(MISSING_TYPE, ERROR) \ FUNC(MISSING_TYPE_ENTRY, ERROR) \ FUNC(MULTIPLE_AUTHORS, ERROR) \ @@ -51,6 +56,8 @@ FUNC(TREE_NOT_SORTED, ERROR) \ FUNC(UNKNOWN_TYPE, ERROR) \ FUNC(ZERO_PADDED_DATE, ERROR) \ + FUNC(GITMODULES_MISSING, ERROR) \ + FUNC(GITMODULES_BLOB, ERROR) \ /* warnings */ \ FUNC(BAD_FILEMODE, WARN) \ FUNC(EMPTY_NAME, WARN) \ @@ -563,6 +570,10 @@ static int fsck_tree(struct tree *item, struct fsck_options *options) has_dotdot |= !strcmp(name, ".."); has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name); has_zero_pad |= *(char *)desc.buffer == '0'; + + if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) + oidset_insert(&gitmodules_found, oid); + if (update_tree_entry_gently(&desc)) { retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree"); break; @@ -936,3 +947,50 @@ int fsck_error_function(struct fsck_options *o, error("object %s: %s", describe_object(o, obj), message); return 1; } + +int fsck_finish(struct fsck_options *options) +{ + int ret = 0; + struct oidset_iter iter; + const struct object_id *oid; + + oidset_iter_init(&gitmodules_found, &iter); + while ((oid = oidset_iter_next(&iter))) { + struct blob *blob; + enum object_type type; + unsigned long size; + char *buf; + + if (oidset_contains(&gitmodules_done, oid)) + continue; + + blob = lookup_blob(oid); + if (!blob) { + ret |= report(options, &blob->object, + FSCK_MSG_GITMODULES_BLOB, + "non-blob found at .gitmodules"); + continue; + } + + buf = read_sha1_file(oid->hash, &type, &size); + if (!buf) { + ret |= report(options, &blob->object, + FSCK_MSG_GITMODULES_MISSING, + "unable to read .gitmodules blob"); + continue; + } + + if (type == OBJ_BLOB) + ret |= fsck_blob(blob, buf, size, options); + else + ret |= report(options, &blob->object, + FSCK_MSG_GITMODULES_BLOB, + "non-blob found at .gitmodules"); + free(buf); + } + + + oidset_clear(&gitmodules_found); + oidset_clear(&gitmodules_done); + return ret; +} diff --git a/fsck.h b/fsck.h index 4525510..c3cf5e0 100644 --- a/fsck.h +++ b/fsck.h @@ -53,4 +53,11 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options); int fsck_object(struct object *obj, void *data, unsigned long size, struct fsck_options *options); +/* + * Some fsck checks are context-dependent, and may end up queued; run this + * after completing all fsck_object() calls in order to resolve any remaining + * checks. + */ +int fsck_finish(struct fsck_options *options); + #endif -- cgit v0.10.2-6-g49f6 From 2738744426c161a98c2ec494d41241a4c5eef9ef Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 14 May 2018 12:22:48 -0400 Subject: fsck: handle promisor objects in .gitmodules check If we have a tree that points to a .gitmodules blob but don't have that blob, we can't check its contents. This produces an fsck error when we encounter it. But in the case of a promisor object, this absence is expected, and we must not complain. Note that this can technically circumvent our transfer.fsckObjects check. Imagine a client fetches a tree, but not the matching .gitmodules blob. An fsck of the incoming objects will show that we don't have enough information. Later, we do fetch the actual blob. But we have no idea that it's a .gitmodules file. The only ways to get around this would be to re-scan all of the existing trees whenever new ones enter (which is expensive), or to somehow persist the gitmodules_found set between fsck runs (which is complicated). In practice, it's probably OK to ignore the problem. Any repository which has all of the objects (including the one serving the promisor packs) can perform the checks. Since promisor packs are inherently about a hierarchical topology in which clients rely on upstream repositories, those upstream repositories can protect all of their downstream clients from broken objects. Signed-off-by: Jeff King diff --git a/fsck.c b/fsck.c index 105b3e7..a91e6ce 100644 --- a/fsck.c +++ b/fsck.c @@ -11,6 +11,7 @@ #include "sha1-array.h" #include "decorate.h" #include "oidset.h" +#include "packfile.h" static struct oidset gitmodules_found = OIDSET_INIT; static struct oidset gitmodules_done = OIDSET_INIT; @@ -974,6 +975,8 @@ int fsck_finish(struct fsck_options *options) buf = read_sha1_file(oid->hash, &type, &size); if (!buf) { + if (is_promisor_object(&blob->object.oid)) + continue; ret |= report(options, &blob->object, FSCK_MSG_GITMODULES_MISSING, "unable to read .gitmodules blob"); -- cgit v0.10.2-6-g49f6 From ed8b10f631c9a71df3351d46187bf7f3fa4f9b7e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 2 May 2018 17:25:27 -0400 Subject: fsck: check .gitmodules content This patch detects and blocks submodule names which do not match the policy set forth in submodule-config. These should already be caught by the submodule code itself, but putting the check here means that newer versions of Git can protect older ones from malicious entries (e.g., a server with receive.fsckObjects will block the objects, protecting clients which fetch from it). As a side effect, this means fsck will also complain about .gitmodules files that cannot be parsed (or were larger than core.bigFileThreshold). Signed-off-by: Jeff King diff --git a/fsck.c b/fsck.c index a91e6ce..2eddfc3 100644 --- a/fsck.c +++ b/fsck.c @@ -12,6 +12,8 @@ #include "decorate.h" #include "oidset.h" #include "packfile.h" +#include "submodule-config.h" +#include "config.h" static struct oidset gitmodules_found = OIDSET_INIT; static struct oidset gitmodules_done = OIDSET_INIT; @@ -59,6 +61,8 @@ static struct oidset gitmodules_done = OIDSET_INIT; FUNC(ZERO_PADDED_DATE, ERROR) \ FUNC(GITMODULES_MISSING, ERROR) \ FUNC(GITMODULES_BLOB, ERROR) \ + FUNC(GITMODULES_PARSE, ERROR) \ + FUNC(GITMODULES_NAME, ERROR) \ /* warnings */ \ FUNC(BAD_FILEMODE, WARN) \ FUNC(EMPTY_NAME, WARN) \ @@ -911,10 +915,64 @@ static int fsck_tag(struct tag *tag, const char *data, return fsck_tag_buffer(tag, data, size, options); } +struct fsck_gitmodules_data { + struct object *obj; + struct fsck_options *options; + int ret; +}; + +static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata) +{ + struct fsck_gitmodules_data *data = vdata; + const char *subsection, *key; + int subsection_len; + char *name; + + if (parse_config_key(var, "submodule", &subsection, &subsection_len, &key) < 0 || + !subsection) + return 0; + + name = xmemdupz(subsection, subsection_len); + if (check_submodule_name(name) < 0) + data->ret |= report(data->options, data->obj, + FSCK_MSG_GITMODULES_NAME, + "disallowed submodule name: %s", + name); + free(name); + + return 0; +} + static int fsck_blob(struct blob *blob, const char *buf, unsigned long size, struct fsck_options *options) { - return 0; + struct fsck_gitmodules_data data; + + if (!oidset_contains(&gitmodules_found, &blob->object.oid)) + return 0; + oidset_insert(&gitmodules_done, &blob->object.oid); + + if (!buf) { + /* + * A missing buffer here is a sign that the caller found the + * blob too gigantic to load into memory. Let's just consider + * that an error. + */ + return report(options, &blob->object, + FSCK_MSG_GITMODULES_PARSE, + ".gitmodules too large to parse"); + } + + data.obj = &blob->object; + data.options = options; + data.ret = 0; + if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB, + ".gitmodules", buf, size, &data)) + data.ret |= report(options, &blob->object, + FSCK_MSG_GITMODULES_PARSE, + "could not parse gitmodules blob"); + + return data.ret; } int fsck_object(struct object *obj, void *data, unsigned long size, -- cgit v0.10.2-6-g49f6 From 1995b5e03e1cc97116be58cdc0502d4a23547856 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 2 May 2018 17:20:35 -0400 Subject: fsck: call fsck_finish() after fscking objects Now that the internal fsck code is capable of checking .gitmodules files, we just need to teach its callers to use the "finish" function to check any queued objects. With this, we can now catch the malicious case in t7415 with git-fsck. Signed-off-by: Jeff King diff --git a/builtin/fsck.c b/builtin/fsck.c index f91d5f3..028aba5 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -748,6 +748,9 @@ int cmd_fsck(int argc, const char **argv, const char *prefix) } stop_progress(&progress); } + + if (fsck_finish(&fsck_obj_options)) + errors_found |= ERROR_OBJECT; } for (i = 0; i < argc; i++) { diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh index 75fa071..c8ce2f4 100755 --- a/t/t7415-submodule-names.sh +++ b/t/t7415-submodule-names.sh @@ -73,4 +73,8 @@ test_expect_success 'clone evil superproject' ' ! grep "RUNNING POST CHECKOUT" output ' +test_expect_success 'fsck detects evil superproject' ' + test_must_fail git fsck +' + test_done -- cgit v0.10.2-6-g49f6 From 6e328d6caef218db320978e3e251009135d87d0e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 4 May 2018 19:40:08 -0400 Subject: unpack-objects: call fsck_finish() after fscking objects As with the previous commit, we must call fsck's "finish" function in order to catch any queued objects for .gitmodules checks. This second pass will be able to access any incoming objects, because we will have exploded them to loose objects by now. This isn't quite ideal, because it means that bad objects may have been written to the object database (and a subsequent operation could then reference them, even if the other side doesn't send the objects again). However, this is sufficient when used with receive.fsckObjects, since those loose objects will all be placed in a temporary quarantine area that will get wiped if we find any problems. Signed-off-by: Jeff King diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 648b952..c8f1406 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -572,8 +572,11 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix) unpack_all(); the_hash_algo->update_fn(&ctx, buffer, offset); the_hash_algo->final_fn(oid.hash, &ctx); - if (strict) + if (strict) { write_rest(); + if (fsck_finish(&fsck_options)) + die(_("fsck error in pack objects")); + } if (hashcmp(fill(the_hash_algo->rawsz), oid.hash)) die("final sha1 did not match"); use(the_hash_algo->rawsz); diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh index c8ce2f4..7fdf5d6 100755 --- a/t/t7415-submodule-names.sh +++ b/t/t7415-submodule-names.sh @@ -77,4 +77,11 @@ test_expect_success 'fsck detects evil superproject' ' test_must_fail git fsck ' +test_expect_success 'transfer.fsckObjects detects evil superproject (unpack)' ' + rm -rf dst.git && + git init --bare dst.git && + git -C dst.git config transfer.fsckObjects true && + test_must_fail git push dst.git HEAD +' + test_done -- cgit v0.10.2-6-g49f6 From 73c3f0f704a91b6792e0199a3f3ab6e3a1971675 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 4 May 2018 19:45:01 -0400 Subject: index-pack: check .gitmodules files with --strict Now that the internal fsck code has all of the plumbing we need, we can start checking incoming .gitmodules files. Naively, it seems like we would just need to add a call to fsck_finish() after we've processed all of the objects. And that would be enough to cover the initial test included here. But there are two extra bits: 1. We currently don't bother calling fsck_object() at all for blobs, since it has traditionally been a noop. We'd actually catch these blobs in fsck_finish() at the end, but it's more efficient to check them when we already have the object loaded in memory. 2. The second pass done by fsck_finish() needs to access the objects, but we're actually indexing the pack in this process. In theory we could give the fsck code a special callback for accessing the in-pack data, but it's actually quite tricky: a. We don't have an internal efficient index mapping oids to packfile offsets. We only generate it on the fly as part of writing out the .idx file. b. We'd still have to reconstruct deltas, which means we'd basically have to replicate all of the reading logic in packfile.c. Instead, let's avoid running fsck_finish() until after we've written out the .idx file, and then just add it to our internal packed_git list. This does mean that the objects are "in the repository" before we finish our fsck checks. But unpack-objects already exhibits this same behavior, and it's an acceptable tradeoff here for the same reason: the quarantine mechanism means that pushes will be fully protected. In addition to a basic push test in t7415, we add a sneaky pack that reverses the usual object order in the pack, requiring that index-pack access the tree and blob during the "finish" step. This already works for unpack-objects (since it will have written out loose objects), but we'll check it with this sneaky pack for good measure. Signed-off-by: Jeff King diff --git a/builtin/index-pack.c b/builtin/index-pack.c index d15b24e..7b2f7c0 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -836,6 +836,9 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, blob->object.flags |= FLAG_CHECKED; else die(_("invalid blob object %s"), oid_to_hex(oid)); + if (do_fsck_object && + fsck_object(&blob->object, (void *)data, size, &fsck_options)) + die(_("fsck error in packed object")); } else { struct object *obj; int eaten; @@ -1477,6 +1480,9 @@ static void final(const char *final_pack_name, const char *curr_pack_name, } else chmod(final_index_name, 0444); + if (do_fsck_object) + add_packed_git(final_index_name, strlen(final_index_name), 0); + if (!from_stdin) { printf("%s\n", sha1_to_hex(hash)); } else { @@ -1818,6 +1824,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) pack_hash); else close(input_fd); + + if (do_fsck_object && fsck_finish(&fsck_options)) + die(_("fsck error in pack objects")); + free(objects); strbuf_release(&index_name_buf); if (pack_name == NULL) diff --git a/t/lib-pack.sh b/t/lib-pack.sh index 7509846..4674899 100644 --- a/t/lib-pack.sh +++ b/t/lib-pack.sh @@ -79,6 +79,18 @@ pack_obj () { ;; esac + # If it's not a delta, we can convince pack-objects to generate a pack + # with just our entry, and then strip off the header (12 bytes) and + # trailer (20 bytes). + if test -z "$2" + then + echo "$1" | git pack-objects --stdout >pack_obj.tmp && + size=$(wc -c &2 "BUG: don't know how to print $1${2:+ (from $2)}" return 1 } diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh index 7fdf5d6..51361c9 100755 --- a/t/t7415-submodule-names.sh +++ b/t/t7415-submodule-names.sh @@ -6,6 +6,7 @@ Exercise the name-checking function on a variety of names, and then give a real-world setup that confirms we catch this in practice. ' . ./test-lib.sh +. "$TEST_DIRECTORY"/lib-pack.sh test_expect_success 'check names' ' cat >expect <<-\EOF && @@ -84,4 +85,41 @@ test_expect_success 'transfer.fsckObjects detects evil superproject (unpack)' ' test_must_fail git push dst.git HEAD ' +test_expect_success 'transfer.fsckObjects detects evil superproject (index)' ' + rm -rf dst.git && + git init --bare dst.git && + git -C dst.git config transfer.fsckObjects true && + git -C dst.git config transfer.unpackLimit 1 && + test_must_fail git push dst.git HEAD +' + +# Normally our packs contain commits followed by trees followed by blobs. This +# reverses the order, which requires backtracking to find the context of a +# blob. We'll start with a fresh gitmodules-only tree to make it simpler. +test_expect_success 'create oddly ordered pack' ' + git checkout --orphan odd && + git rm -rf --cached . && + git add .gitmodules && + git commit -m odd && + { + pack_header 3 && + pack_obj $(git rev-parse HEAD:.gitmodules) && + pack_obj $(git rev-parse HEAD^{tree}) && + pack_obj $(git rev-parse HEAD) + } >odd.pack && + pack_trailer odd.pack +' + +test_expect_success 'transfer.fsckObjects handles odd pack (unpack)' ' + rm -rf dst.git && + git init --bare dst.git && + test_must_fail git -C dst.git unpack-objects --strict Date: Fri, 4 May 2018 20:03:35 -0400 Subject: fsck: complain when .gitmodules is a symlink We've recently forbidden .gitmodules to be a symlink in verify_path(). And it's an easy way to circumvent our fsck checks for .gitmodules content. So let's complain when we see it. Signed-off-by: Jeff King diff --git a/fsck.c b/fsck.c index 2eddfc3..9339f31 100644 --- a/fsck.c +++ b/fsck.c @@ -63,6 +63,7 @@ static struct oidset gitmodules_done = OIDSET_INIT; FUNC(GITMODULES_BLOB, ERROR) \ FUNC(GITMODULES_PARSE, ERROR) \ FUNC(GITMODULES_NAME, ERROR) \ + FUNC(GITMODULES_SYMLINK, ERROR) \ /* warnings */ \ FUNC(BAD_FILEMODE, WARN) \ FUNC(EMPTY_NAME, WARN) \ @@ -576,8 +577,14 @@ static int fsck_tree(struct tree *item, struct fsck_options *options) has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name); has_zero_pad |= *(char *)desc.buffer == '0'; - if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) - oidset_insert(&gitmodules_found, oid); + if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) { + if (!S_ISLNK(mode)) + oidset_insert(&gitmodules_found, oid); + else + retval += report(options, &item->object, + FSCK_MSG_GITMODULES_SYMLINK, + ".gitmodules is a symbolic link"); + } if (update_tree_entry_gently(&desc)) { retval += report(options, &item->object, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree"); diff --git a/t/t7415-submodule-names.sh b/t/t7415-submodule-names.sh index 51361c9..a770d92 100755 --- a/t/t7415-submodule-names.sh +++ b/t/t7415-submodule-names.sh @@ -122,4 +122,33 @@ test_expect_success 'transfer.fsckObjects handles odd pack (index)' ' test_must_fail git -C dst.git index-pack --strict --stdin output && + grep gitmodulesSymlink output + ) +' + test_done -- cgit v0.10.2-6-g49f6 From 0114f71344844be9e5add321cffea34bac077d75 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 22 May 2018 13:50:36 +0900 Subject: Git 2.13.7 Signed-off-by: Junio C Hamano diff --git a/Documentation/RelNotes/2.13.7.txt b/Documentation/RelNotes/2.13.7.txt new file mode 100644 index 0000000..09fc014 --- /dev/null +++ b/Documentation/RelNotes/2.13.7.txt @@ -0,0 +1,20 @@ +Git v2.13.7 Release Notes +========================= + +Fixes since v2.13.6 +------------------- + + * Submodule "names" come from the untrusted .gitmodules file, but we + blindly append them to $GIT_DIR/modules to create our on-disk repo + paths. This means you can do bad things by putting "../" into the + name. We now enforce some rules for submodule names which will cause + Git to ignore these malicious names (CVE-2018-11235). + + Credit for finding this vulnerability and the proof of concept from + which the test script was adapted goes to Etienne Stalmans. + + * It was possible to trick the code that sanity-checks paths on NTFS + into reading random piece of memory (CVE-2018-11233). + +Credit for fixing for these bugs goes to Jeff King, Johannes +Schindelin and others. diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index 3db6830..1534654 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=GIT-VERSION-FILE -DEF_VER=v2.13.6 +DEF_VER=v2.13.7 LF=' ' diff --git a/RelNotes b/RelNotes index c2dd9dd..a7f9eca 120000 --- a/RelNotes +++ b/RelNotes @@ -1 +1 @@ -Documentation/RelNotes/2.13.6.txt \ No newline at end of file +Documentation/RelNotes/2.13.7.txt \ No newline at end of file -- cgit v0.10.2-6-g49f6 From 4dde7b8799dec0e7aecb04fdc55c656e674cff6f Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 22 May 2018 14:12:02 +0900 Subject: Git 2.14.4 Signed-off-by: Junio C Hamano diff --git a/Documentation/RelNotes/2.14.4.txt b/Documentation/RelNotes/2.14.4.txt new file mode 100644 index 0000000..97755a8 --- /dev/null +++ b/Documentation/RelNotes/2.14.4.txt @@ -0,0 +1,5 @@ +Git v2.14.4 Release Notes +========================= + +This release is to forward-port the fixes made in the v2.13.7 version +of Git. See its release notes for details. diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index 9c203fb..918b6c2 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=GIT-VERSION-FILE -DEF_VER=v2.14.3 +DEF_VER=v2.14.4 LF=' ' diff --git a/RelNotes b/RelNotes index 8612bbd..1b1ac35 120000 --- a/RelNotes +++ b/RelNotes @@ -1 +1 @@ -Documentation/RelNotes/2.14.3.txt \ No newline at end of file +Documentation/RelNotes/2.14.4.txt \ No newline at end of file -- cgit v0.10.2-6-g49f6 From d33c87517a85f544159b2efa406136dde18a5b9f Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 22 May 2018 14:15:59 +0900 Subject: Git 2.15.2 Signed-off-by: Junio C Hamano diff --git a/Documentation/RelNotes/2.15.2.txt b/Documentation/RelNotes/2.15.2.txt index 9f7e28f..b480e56 100644 --- a/Documentation/RelNotes/2.15.2.txt +++ b/Documentation/RelNotes/2.15.2.txt @@ -43,5 +43,8 @@ Fixes since v2.15.1 * Clarify and enhance documentation for "merge-base --fork-point", as it was clear what it computed but not why/what for. + * This release also contains the fixes made in the v2.13.7 version of + Git. See its release notes for details. + Also contains various documentation updates and code clean-ups. diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index 879178c..824649f 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=GIT-VERSION-FILE -DEF_VER=v2.15.1 +DEF_VER=v2.15.2 LF=' ' -- cgit v0.10.2-6-g49f6 From a42a58d7b62cc1d6301440e81a83feed9d7c118c Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 22 May 2018 14:18:51 +0900 Subject: Git 2.16.4 Signed-off-by: Junio C Hamano diff --git a/Documentation/RelNotes/2.16.4.txt b/Documentation/RelNotes/2.16.4.txt new file mode 100644 index 0000000..6be538b --- /dev/null +++ b/Documentation/RelNotes/2.16.4.txt @@ -0,0 +1,5 @@ +Git v2.16.4 Release Notes +========================= + +This release is to forward-port the fixes made in the v2.13.7 version +of Git. See its release notes for details. diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index 8945e05..f6c7db0 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=GIT-VERSION-FILE -DEF_VER=v2.16.3 +DEF_VER=v2.16.4 LF=' ' diff --git a/RelNotes b/RelNotes index fdd7e42..d93c6ee 120000 --- a/RelNotes +++ b/RelNotes @@ -1 +1 @@ -Documentation/RelNotes/2.16.3.txt \ No newline at end of file +Documentation/RelNotes/2.16.4.txt \ No newline at end of file -- cgit v0.10.2-6-g49f6 From fc54c1af3ec09bab8b8ea09768c2da4069b7f53e Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Tue, 22 May 2018 14:28:26 +0900 Subject: Git 2.17.1 Signed-off-by: Junio C Hamano diff --git a/Documentation/RelNotes/2.17.1.txt b/Documentation/RelNotes/2.17.1.txt new file mode 100644 index 0000000..e01384f --- /dev/null +++ b/Documentation/RelNotes/2.17.1.txt @@ -0,0 +1,16 @@ +Git v2.17.1 Release Notes +========================= + +Fixes since v2.17 +----------------- + + * This release contains the same fixes made in the v2.13.7 version of + Git, covering CVE-2018-11233 and 11235, and forward-ported to + v2.14.4, v2.15.2 and v2.16.4 releases. See release notes to + v2.13.7 for details. + + * In addition to the above fixes, this release has support on the + server side to reject pushes to repositories that attempt to create + such problematic .gitmodules file etc. as tracked contents, to help + hosting sites protect their customers by preventing malicious + contents from spreading. diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index 1b4624c..4baa413 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=GIT-VERSION-FILE -DEF_VER=v2.17.0 +DEF_VER=v2.17.1 LF=' ' diff --git a/RelNotes b/RelNotes index 7a6dc06..cde891d 120000 --- a/RelNotes +++ b/RelNotes @@ -1 +1 @@ -Documentation/RelNotes/2.17.0.txt \ No newline at end of file +Documentation/RelNotes/2.17.1.txt \ No newline at end of file -- cgit v0.10.2-6-g49f6