summaryrefslogtreecommitdiff
path: root/fsck.c
diff options
context:
space:
mode:
Diffstat (limited to 'fsck.c')
-rw-r--r--fsck.c737
1 files changed, 383 insertions, 354 deletions
diff --git a/fsck.c b/fsck.c
index 8bb3ecf..78af29d 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1,7 +1,12 @@
-#include "cache.h"
-#include "object-store.h"
+#include "git-compat-util.h"
+#include "date.h"
+#include "dir.h"
+#include "hex.h"
+#include "object-store-ll.h"
+#include "path.h"
#include "repository.h"
#include "object.h"
+#include "attr.h"
#include "blob.h"
#include "tree.h"
#include "tree-walk.h"
@@ -11,83 +16,13 @@
#include "refs.h"
#include "url.h"
#include "utf8.h"
-#include "decorate.h"
#include "oidset.h"
#include "packfile.h"
#include "submodule-config.h"
#include "config.h"
-#include "credential.h"
#include "help.h"
-static struct oidset gitmodules_found = OIDSET_INIT;
-static struct oidset gitmodules_done = OIDSET_INIT;
-
-#define FSCK_FATAL -1
-#define FSCK_INFO -2
-
-#define FOREACH_MSG_ID(FUNC) \
- /* fatal errors */ \
- FUNC(NUL_IN_HEADER, FATAL) \
- FUNC(UNTERMINATED_HEADER, FATAL) \
- /* errors */ \
- FUNC(BAD_DATE, ERROR) \
- FUNC(BAD_DATE_OVERFLOW, ERROR) \
- FUNC(BAD_EMAIL, ERROR) \
- FUNC(BAD_NAME, ERROR) \
- FUNC(BAD_OBJECT_SHA1, ERROR) \
- FUNC(BAD_PARENT_SHA1, ERROR) \
- FUNC(BAD_TAG_OBJECT, ERROR) \
- FUNC(BAD_TIMEZONE, ERROR) \
- FUNC(BAD_TREE, ERROR) \
- FUNC(BAD_TREE_SHA1, ERROR) \
- FUNC(BAD_TYPE, ERROR) \
- FUNC(DUPLICATE_ENTRIES, ERROR) \
- FUNC(MISSING_AUTHOR, ERROR) \
- FUNC(MISSING_COMMITTER, ERROR) \
- FUNC(MISSING_EMAIL, ERROR) \
- FUNC(MISSING_NAME_BEFORE_EMAIL, ERROR) \
- FUNC(MISSING_OBJECT, ERROR) \
- FUNC(MISSING_SPACE_BEFORE_DATE, ERROR) \
- FUNC(MISSING_SPACE_BEFORE_EMAIL, ERROR) \
- FUNC(MISSING_TAG, ERROR) \
- FUNC(MISSING_TAG_ENTRY, ERROR) \
- FUNC(MISSING_TREE, ERROR) \
- FUNC(MISSING_TREE_OBJECT, ERROR) \
- FUNC(MISSING_TYPE, ERROR) \
- FUNC(MISSING_TYPE_ENTRY, ERROR) \
- FUNC(MULTIPLE_AUTHORS, ERROR) \
- FUNC(TREE_NOT_SORTED, ERROR) \
- FUNC(UNKNOWN_TYPE, ERROR) \
- FUNC(ZERO_PADDED_DATE, ERROR) \
- FUNC(GITMODULES_MISSING, ERROR) \
- FUNC(GITMODULES_BLOB, ERROR) \
- FUNC(GITMODULES_LARGE, ERROR) \
- FUNC(GITMODULES_NAME, ERROR) \
- FUNC(GITMODULES_SYMLINK, ERROR) \
- FUNC(GITMODULES_URL, ERROR) \
- FUNC(GITMODULES_PATH, ERROR) \
- FUNC(GITMODULES_UPDATE, ERROR) \
- /* warnings */ \
- FUNC(BAD_FILEMODE, WARN) \
- FUNC(EMPTY_NAME, WARN) \
- FUNC(FULL_PATHNAME, WARN) \
- FUNC(HAS_DOT, WARN) \
- FUNC(HAS_DOTDOT, WARN) \
- FUNC(HAS_DOTGIT, WARN) \
- FUNC(NULL_SHA1, WARN) \
- FUNC(ZERO_PADDED_FILEMODE, WARN) \
- FUNC(NUL_IN_COMMIT, WARN) \
- /* infos (reported as warnings, but ignored by default) */ \
- FUNC(GITMODULES_PARSE, INFO) \
- FUNC(BAD_TAG_NAME, INFO) \
- FUNC(MISSING_TAGGER_ENTRY, INFO)
-
-#define MSG_ID(id, msg_type) FSCK_MSG_##id,
-enum fsck_msg_id {
- FOREACH_MSG_ID(MSG_ID)
- FSCK_MSG_MAX
-};
-#undef MSG_ID
+static ssize_t max_tree_entry_len = 4096;
#define STR(x) #x
#define MSG_ID(id, msg_type) { STR(id), NULL, NULL, FSCK_##msg_type },
@@ -95,12 +30,13 @@ static struct {
const char *id_string;
const char *downcased;
const char *camelcased;
- int msg_type;
+ enum fsck_msg_type msg_type;
} msg_id_info[FSCK_MSG_MAX + 1] = {
- FOREACH_MSG_ID(MSG_ID)
+ FOREACH_FSCK_MSG_ID(MSG_ID)
{ NULL, NULL, NULL, -1 }
};
#undef MSG_ID
+#undef STR
static void prepare_msg_ids(void)
{
@@ -162,25 +98,23 @@ void list_config_fsck_msg_ids(struct string_list *list, const char *prefix)
list_config_item(list, prefix, msg_id_info[i].camelcased);
}
-static int fsck_msg_type(enum fsck_msg_id msg_id,
+static enum fsck_msg_type fsck_msg_type(enum fsck_msg_id msg_id,
struct fsck_options *options)
{
- int msg_type;
-
assert(msg_id >= 0 && msg_id < FSCK_MSG_MAX);
- if (options->msg_type)
- msg_type = options->msg_type[msg_id];
- else {
- msg_type = msg_id_info[msg_id].msg_type;
+ if (!options->msg_type) {
+ enum fsck_msg_type msg_type = msg_id_info[msg_id].msg_type;
+
if (options->strict && msg_type == FSCK_WARN)
msg_type = FSCK_ERROR;
+ return msg_type;
}
- return msg_type;
+ return options->msg_type[msg_id];
}
-static int parse_msg_type(const char *str)
+static enum fsck_msg_type parse_msg_type(const char *str)
{
if (!strcmp(str, "error"))
return FSCK_ERROR;
@@ -200,28 +134,49 @@ int is_valid_msg_type(const char *msg_id, const char *msg_type)
return 1;
}
-void fsck_set_msg_type(struct fsck_options *options,
- const char *msg_id, const char *msg_type)
+void fsck_set_msg_type_from_ids(struct fsck_options *options,
+ enum fsck_msg_id msg_id,
+ enum fsck_msg_type msg_type)
{
- int id = parse_msg_id(msg_id), type;
-
- if (id < 0)
- die("Unhandled message id: %s", msg_id);
- type = parse_msg_type(msg_type);
-
- if (type != FSCK_ERROR && msg_id_info[id].msg_type == FSCK_FATAL)
- die("Cannot demote %s to %s", msg_id, msg_type);
-
if (!options->msg_type) {
int i;
- int *msg_type;
- ALLOC_ARRAY(msg_type, FSCK_MSG_MAX);
+ enum fsck_msg_type *severity;
+ ALLOC_ARRAY(severity, FSCK_MSG_MAX);
for (i = 0; i < FSCK_MSG_MAX; i++)
- msg_type[i] = fsck_msg_type(i, options);
- options->msg_type = msg_type;
+ severity[i] = fsck_msg_type(i, options);
+ options->msg_type = severity;
}
- options->msg_type[id] = type;
+ options->msg_type[msg_id] = msg_type;
+}
+
+void fsck_set_msg_type(struct fsck_options *options,
+ const char *msg_id_str, const char *msg_type_str)
+{
+ int msg_id = parse_msg_id(msg_id_str);
+ char *to_free = NULL;
+ enum fsck_msg_type msg_type;
+
+ if (msg_id < 0)
+ die("Unhandled message id: %s", msg_id_str);
+
+ if (msg_id == FSCK_MSG_LARGE_PATHNAME) {
+ const char *colon = strchr(msg_type_str, ':');
+ if (colon) {
+ msg_type_str = to_free =
+ xmemdupz(msg_type_str, colon - msg_type_str);
+ colon++;
+ if (!git_parse_ssize_t(colon, &max_tree_entry_len))
+ die("unable to parse max tree entry len: %s", colon);
+ }
+ }
+ msg_type = parse_msg_type(msg_type_str);
+
+ if (msg_type != FSCK_ERROR && msg_id_info[msg_id].msg_type == FSCK_FATAL)
+ die("Cannot demote %s to %s", msg_id_str, msg_type_str);
+
+ fsck_set_msg_type_from_ids(options, msg_id, msg_type);
+ free(to_free);
}
void fsck_set_msg_types(struct fsck_options *options, const char *values)
@@ -262,24 +217,6 @@ void fsck_set_msg_types(struct fsck_options *options, const char *values)
free(to_free);
}
-static void append_msg_id(struct strbuf *sb, const char *msg_id)
-{
- for (;;) {
- char c = *(msg_id)++;
-
- if (!c)
- break;
- if (c != '_')
- strbuf_addch(sb, tolower(c));
- else {
- assert(*msg_id);
- strbuf_addch(sb, *(msg_id)++);
- }
- }
-
- strbuf_addstr(sb, ": ");
-}
-
static int object_on_skiplist(struct fsck_options *opts,
const struct object_id *oid)
{
@@ -289,11 +226,12 @@ static int object_on_skiplist(struct fsck_options *opts,
__attribute__((format (printf, 5, 6)))
static int report(struct fsck_options *options,
const struct object_id *oid, enum object_type object_type,
- enum fsck_msg_id id, const char *fmt, ...)
+ enum fsck_msg_id msg_id, const char *fmt, ...)
{
va_list ap;
struct strbuf sb = STRBUF_INIT;
- int msg_type = fsck_msg_type(id, options), result;
+ enum fsck_msg_type msg_type = fsck_msg_type(msg_id, options);
+ int result;
if (msg_type == FSCK_IGNORE)
return 0;
@@ -306,12 +244,13 @@ static int report(struct fsck_options *options,
else if (msg_type == FSCK_INFO)
msg_type = FSCK_WARN;
- append_msg_id(&sb, msg_id_info[id].id_string);
+ prepare_msg_ids();
+ strbuf_addf(&sb, "%s: ", msg_id_info[msg_id].camelcased);
va_start(ap, fmt);
strbuf_vaddf(&sb, fmt, ap);
result = options->error_func(options, oid, object_type,
- msg_type, sb.buf);
+ msg_type, msg_id, sb.buf);
strbuf_release(&sb);
va_end(ap);
@@ -388,7 +327,8 @@ static int fsck_walk_tree(struct tree *tree, void *data, struct fsck_options *op
return -1;
name = fsck_get_object_name(options, &tree->object.oid);
- if (init_tree_desc_gently(&desc, tree->buffer, tree->size))
+ if (init_tree_desc_gently(&desc, &tree->object.oid,
+ tree->buffer, tree->size, 0))
return -1;
while (tree_entry_gently(&desc, &entry)) {
struct object *obj;
@@ -432,7 +372,7 @@ static int fsck_walk_commit(struct commit *commit, void *data, struct fsck_optio
int result;
const char *name;
- if (parse_commit(commit))
+ if (repo_parse_commit(the_repository, commit))
return -1;
name = fsck_get_object_name(options, &commit->object.oid);
@@ -440,7 +380,7 @@ static int fsck_walk_commit(struct commit *commit, void *data, struct fsck_optio
fsck_put_object_name(options, get_commit_tree_oid(commit),
"%s:", name);
- result = options->walk((struct object *)get_commit_tree(commit),
+ result = options->walk((struct object *) repo_get_commit_tree(the_repository, commit),
OBJ_TREE, data, options);
if (result < 0)
return result;
@@ -461,6 +401,11 @@ static int fsck_walk_commit(struct commit *commit, void *data, struct fsck_optio
generation += power * (name[--len] - '0');
if (power > 1 && len && name[len - 1] == '~')
name_prefix_len = len - 1;
+ else {
+ /* Maybe a non-first parent, e.g. HEAD^2 */
+ generation = 0;
+ name_prefix_len = len;
+ }
}
}
@@ -598,7 +543,7 @@ static int verify_ordered(unsigned mode1, const char *name1,
/*
* There can be non-consecutive duplicates due to the implicitly
- * add slash, e.g.:
+ * added slash, e.g.:
*
* foo
* foo.bar
@@ -620,7 +565,7 @@ static int verify_ordered(unsigned mode1, const char *name1,
if (!f_name)
break;
if (!skip_prefix(name2, f_name, &p))
- break;
+ continue;
if (!*p)
return TREE_HAS_DUPS;
if (is_less_than_slash(*p)) {
@@ -633,7 +578,7 @@ static int verify_ordered(unsigned mode1, const char *name1,
return c1 < c2 ? 0 : TREE_UNORDERED;
}
-static int fsck_tree(const struct object_id *oid,
+static int fsck_tree(const struct object_id *tree_oid,
const char *buffer, unsigned long size,
struct fsck_options *options)
{
@@ -648,13 +593,17 @@ static int fsck_tree(const struct object_id *oid,
int has_bad_modes = 0;
int has_dup_entries = 0;
int not_properly_sorted = 0;
+ int has_large_name = 0;
struct tree_desc desc;
unsigned o_mode;
const char *o_name;
struct name_stack df_dup_candidates = { NULL };
- if (init_tree_desc_gently(&desc, buffer, size)) {
- retval += report(options, oid, OBJ_TREE, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree");
+ if (init_tree_desc_gently(&desc, tree_oid, buffer, size,
+ TREE_DESC_RAW_MODES)) {
+ retval += report(options, tree_oid, OBJ_TREE,
+ FSCK_MSG_BAD_TREE,
+ "cannot be parsed as a tree");
return retval;
}
@@ -664,37 +613,63 @@ static int fsck_tree(const struct object_id *oid,
while (desc.size) {
unsigned short mode;
const char *name, *backslash;
- const struct object_id *oid;
+ const struct object_id *entry_oid;
- oid = tree_entry_extract(&desc, &name, &mode);
+ entry_oid = tree_entry_extract(&desc, &name, &mode);
- has_null_sha1 |= is_null_oid(oid);
+ has_null_sha1 |= is_null_oid(entry_oid);
has_full_path |= !!strchr(name, '/');
has_empty_name |= !*name;
has_dot |= !strcmp(name, ".");
has_dotdot |= !strcmp(name, "..");
has_dotgit |= is_hfs_dotgit(name) || is_ntfs_dotgit(name);
has_zero_pad |= *(char *)desc.buffer == '0';
+ has_large_name |= tree_entry_len(&desc.entry) > max_tree_entry_len;
if (is_hfs_dotgitmodules(name) || is_ntfs_dotgitmodules(name)) {
if (!S_ISLNK(mode))
- oidset_insert(&gitmodules_found, oid);
+ oidset_insert(&options->gitmodules_found,
+ entry_oid);
else
retval += report(options,
- oid, OBJ_TREE,
+ tree_oid, OBJ_TREE,
FSCK_MSG_GITMODULES_SYMLINK,
".gitmodules is a symbolic link");
}
+ if (is_hfs_dotgitattributes(name) || is_ntfs_dotgitattributes(name)) {
+ if (!S_ISLNK(mode))
+ oidset_insert(&options->gitattributes_found,
+ entry_oid);
+ else
+ retval += report(options, tree_oid, OBJ_TREE,
+ FSCK_MSG_GITATTRIBUTES_SYMLINK,
+ ".gitattributes is a symlink");
+ }
+
+ if (S_ISLNK(mode)) {
+ if (is_hfs_dotgitignore(name) ||
+ is_ntfs_dotgitignore(name))
+ retval += report(options, tree_oid, OBJ_TREE,
+ FSCK_MSG_GITIGNORE_SYMLINK,
+ ".gitignore is a symlink");
+ if (is_hfs_dotmailmap(name) ||
+ is_ntfs_dotmailmap(name))
+ retval += report(options, tree_oid, OBJ_TREE,
+ FSCK_MSG_MAILMAP_SYMLINK,
+ ".mailmap is a symlink");
+ }
+
if ((backslash = strchr(name, '\\'))) {
while (backslash) {
backslash++;
has_dotgit |= is_ntfs_dotgit(backslash);
if (is_ntfs_dotgitmodules(backslash)) {
if (!S_ISLNK(mode))
- oidset_insert(&gitmodules_found, oid);
+ oidset_insert(&options->gitmodules_found,
+ entry_oid);
else
- retval += report(options, oid, OBJ_TREE,
+ retval += report(options, tree_oid, OBJ_TREE,
FSCK_MSG_GITMODULES_SYMLINK,
".gitmodules is a symbolic link");
}
@@ -703,7 +678,9 @@ static int fsck_tree(const struct object_id *oid,
}
if (update_tree_entry_gently(&desc)) {
- retval += report(options, oid, OBJ_TREE, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree");
+ retval += report(options, tree_oid, OBJ_TREE,
+ FSCK_MSG_BAD_TREE,
+ "cannot be parsed as a tree");
break;
}
@@ -751,28 +728,69 @@ static int fsck_tree(const struct object_id *oid,
name_stack_clear(&df_dup_candidates);
if (has_null_sha1)
- retval += report(options, oid, OBJ_TREE, FSCK_MSG_NULL_SHA1, "contains entries pointing to null sha1");
+ retval += report(options, tree_oid, OBJ_TREE,
+ FSCK_MSG_NULL_SHA1,
+ "contains entries pointing to null sha1");
if (has_full_path)
- retval += report(options, oid, OBJ_TREE, FSCK_MSG_FULL_PATHNAME, "contains full pathnames");
+ retval += report(options, tree_oid, OBJ_TREE,
+ FSCK_MSG_FULL_PATHNAME,
+ "contains full pathnames");
if (has_empty_name)
- retval += report(options, oid, OBJ_TREE, FSCK_MSG_EMPTY_NAME, "contains empty pathname");
+ retval += report(options, tree_oid, OBJ_TREE,
+ FSCK_MSG_EMPTY_NAME,
+ "contains empty pathname");
if (has_dot)
- retval += report(options, oid, OBJ_TREE, FSCK_MSG_HAS_DOT, "contains '.'");
+ retval += report(options, tree_oid, OBJ_TREE,
+ FSCK_MSG_HAS_DOT,
+ "contains '.'");
if (has_dotdot)
- retval += report(options, oid, OBJ_TREE, FSCK_MSG_HAS_DOTDOT, "contains '..'");
+ retval += report(options, tree_oid, OBJ_TREE,
+ FSCK_MSG_HAS_DOTDOT,
+ "contains '..'");
if (has_dotgit)
- retval += report(options, oid, OBJ_TREE, FSCK_MSG_HAS_DOTGIT, "contains '.git'");
+ retval += report(options, tree_oid, OBJ_TREE,
+ FSCK_MSG_HAS_DOTGIT,
+ "contains '.git'");
if (has_zero_pad)
- retval += report(options, oid, OBJ_TREE, FSCK_MSG_ZERO_PADDED_FILEMODE, "contains zero-padded file modes");
+ retval += report(options, tree_oid, OBJ_TREE,
+ FSCK_MSG_ZERO_PADDED_FILEMODE,
+ "contains zero-padded file modes");
if (has_bad_modes)
- retval += report(options, oid, OBJ_TREE, FSCK_MSG_BAD_FILEMODE, "contains bad file modes");
+ retval += report(options, tree_oid, OBJ_TREE,
+ FSCK_MSG_BAD_FILEMODE,
+ "contains bad file modes");
if (has_dup_entries)
- retval += report(options, oid, OBJ_TREE, FSCK_MSG_DUPLICATE_ENTRIES, "contains duplicate file entries");
+ retval += report(options, tree_oid, OBJ_TREE,
+ FSCK_MSG_DUPLICATE_ENTRIES,
+ "contains duplicate file entries");
if (not_properly_sorted)
- retval += report(options, oid, OBJ_TREE, FSCK_MSG_TREE_NOT_SORTED, "not properly sorted");
+ retval += report(options, tree_oid, OBJ_TREE,
+ FSCK_MSG_TREE_NOT_SORTED,
+ "not properly sorted");
+ if (has_large_name)
+ retval += report(options, tree_oid, OBJ_TREE,
+ FSCK_MSG_LARGE_PATHNAME,
+ "contains excessively large pathname");
return retval;
}
+/*
+ * Confirm that the headers of a commit or tag object end in a reasonable way,
+ * either with the usual "\n\n" separator, or at least with a trailing newline
+ * on the final header line.
+ *
+ * This property is important for the memory safety of our callers. It allows
+ * them to scan the buffer linewise without constantly checking the remaining
+ * size as long as:
+ *
+ * - they check that there are bytes left in the buffer at the start of any
+ * line (i.e., that the last newline they saw was not the final one we
+ * found here)
+ *
+ * - any intra-line scanning they do will stop at a newline, which will worst
+ * case hit the newline we found here as the end-of-header. This makes it
+ * OK for them to use helpers like parse_oid_hex(), or even skip_prefix().
+ */
static int verify_headers(const void *data, unsigned long size,
const struct object_id *oid, enum object_type type,
struct fsck_options *options)
@@ -833,6 +851,20 @@ static int fsck_ident(const char **ident,
if (*p != ' ')
return report(options, oid, type, FSCK_MSG_MISSING_SPACE_BEFORE_DATE, "invalid author/committer line - missing space before date");
p++;
+ /*
+ * Our timestamp parser is based on the C strto*() functions, which
+ * will happily eat whitespace, including the newline that is supposed
+ * to prevent us walking past the end of the buffer. So do our own
+ * scan, skipping linear whitespace but not newlines, and then
+ * confirming we found a digit. We _could_ be even more strict here,
+ * as we really expect only a single space, but since we have
+ * traditionally allowed extra whitespace, we'll continue to do so.
+ */
+ while (*p == ' ' || *p == '\t')
+ p++;
+ if (!isdigit(*p))
+ return report(options, oid, type, FSCK_MSG_BAD_DATE,
+ "invalid author/committer line - bad date");
if (*p == '0' && p[1] != ' ')
return report(options, oid, type, FSCK_MSG_ZERO_PADDED_DATE, "invalid author/committer line - zero-padded date");
if (date_overflows(parse_timestamp(p, &end, 10)))
@@ -859,12 +891,18 @@ static int fsck_commit(const struct object_id *oid,
unsigned author_count;
int err;
const char *buffer_begin = buffer;
+ const char *buffer_end = buffer + size;
const char *p;
+ /*
+ * We _must_ stop parsing immediately if this reports failure, as the
+ * memory safety of the rest of the function depends on it. See the
+ * comment above the definition of verify_headers() for more details.
+ */
if (verify_headers(buffer, size, oid, OBJ_COMMIT, options))
return -1;
- if (!skip_prefix(buffer, "tree ", &buffer))
+ if (buffer >= buffer_end || !skip_prefix(buffer, "tree ", &buffer))
return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_TREE, "invalid format - expected 'tree' line");
if (parse_oid_hex(buffer, &tree_oid, &p) || *p != '\n') {
err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_TREE_SHA1, "invalid 'tree' line format - bad sha1");
@@ -872,7 +910,7 @@ static int fsck_commit(const struct object_id *oid,
return err;
}
buffer = p + 1;
- while (skip_prefix(buffer, "parent ", &buffer)) {
+ while (buffer < buffer_end && skip_prefix(buffer, "parent ", &buffer)) {
if (parse_oid_hex(buffer, &parent_oid, &p) || *p != '\n') {
err = report(options, oid, OBJ_COMMIT, FSCK_MSG_BAD_PARENT_SHA1, "invalid 'parent' line format - bad sha1");
if (err)
@@ -881,7 +919,7 @@ static int fsck_commit(const struct object_id *oid,
buffer = p + 1;
}
author_count = 0;
- while (skip_prefix(buffer, "author ", &buffer)) {
+ while (buffer < buffer_end && skip_prefix(buffer, "author ", &buffer)) {
author_count++;
err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
if (err)
@@ -893,7 +931,7 @@ static int fsck_commit(const struct object_id *oid,
err = report(options, oid, OBJ_COMMIT, FSCK_MSG_MULTIPLE_AUTHORS, "invalid format - multiple 'author' lines");
if (err)
return err;
- if (!skip_prefix(buffer, "committer ", &buffer))
+ if (buffer >= buffer_end || !skip_prefix(buffer, "committer ", &buffer))
return report(options, oid, OBJ_COMMIT, FSCK_MSG_MISSING_COMMITTER, "invalid format - expected 'committer' line");
err = fsck_ident(&buffer, oid, OBJ_COMMIT, options);
if (err)
@@ -911,46 +949,63 @@ static int fsck_tag(const struct object_id *oid, const char *buffer,
unsigned long size, struct fsck_options *options)
{
struct object_id tagged_oid;
+ int tagged_type;
+ return fsck_tag_standalone(oid, buffer, size, options, &tagged_oid,
+ &tagged_type);
+}
+
+int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
+ unsigned long size, struct fsck_options *options,
+ struct object_id *tagged_oid,
+ int *tagged_type)
+{
int ret = 0;
char *eol;
struct strbuf sb = STRBUF_INIT;
+ const char *buffer_end = buffer + size;
const char *p;
+ /*
+ * We _must_ stop parsing immediately if this reports failure, as the
+ * memory safety of the rest of the function depends on it. See the
+ * comment above the definition of verify_headers() for more details.
+ */
ret = verify_headers(buffer, size, oid, OBJ_TAG, options);
if (ret)
goto done;
- if (!skip_prefix(buffer, "object ", &buffer)) {
+ if (buffer >= buffer_end || !skip_prefix(buffer, "object ", &buffer)) {
ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_OBJECT, "invalid format - expected 'object' line");
goto done;
}
- if (parse_oid_hex(buffer, &tagged_oid, &p) || *p != '\n') {
+ if (parse_oid_hex(buffer, tagged_oid, &p) || *p != '\n') {
ret = report(options, oid, OBJ_TAG, FSCK_MSG_BAD_OBJECT_SHA1, "invalid 'object' line format - bad sha1");
if (ret)
goto done;
}
buffer = p + 1;
- if (!skip_prefix(buffer, "type ", &buffer)) {
+ if (buffer >= buffer_end || !skip_prefix(buffer, "type ", &buffer)) {
ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TYPE_ENTRY, "invalid format - expected 'type' line");
goto done;
}
- eol = strchr(buffer, '\n');
+ eol = memchr(buffer, '\n', buffer_end - buffer);
if (!eol) {
ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TYPE, "invalid format - unexpected end after 'type' line");
goto done;
}
- if (type_from_string_gently(buffer, eol - buffer, 1) < 0)
+ *tagged_type = type_from_string_gently(buffer, eol - buffer, 1);
+ if (*tagged_type < 0)
ret = report(options, oid, OBJ_TAG, FSCK_MSG_BAD_TYPE, "invalid 'type' value");
if (ret)
goto done;
buffer = eol + 1;
- if (!skip_prefix(buffer, "tag ", &buffer)) {
+ if (buffer >= buffer_end || !skip_prefix(buffer, "tag ", &buffer)) {
ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAG_ENTRY, "invalid format - expected 'tag' line");
goto done;
}
- eol = strchr(buffer, '\n');
+ eol = memchr(buffer, '\n', buffer_end - buffer);
if (!eol) {
ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAG, "invalid format - unexpected end after 'type' line");
goto done;
@@ -966,7 +1021,7 @@ static int fsck_tag(const struct object_id *oid, const char *buffer,
}
buffer = eol + 1;
- if (!skip_prefix(buffer, "tagger ", &buffer)) {
+ if (buffer >= buffer_end || !skip_prefix(buffer, "tagger ", &buffer)) {
/* early tags do not contain 'tagger' lines; warn only */
ret = report(options, oid, OBJ_TAG, FSCK_MSG_MISSING_TAGGER_ENTRY, "invalid format - expected 'tagger' line");
if (ret)
@@ -975,152 +1030,22 @@ static int fsck_tag(const struct object_id *oid, const char *buffer,
else
ret = fsck_ident(&buffer, oid, OBJ_TAG, options);
-done:
- strbuf_release(&sb);
- return ret;
-}
-
-/*
- * Like builtin/submodule--helper.c's starts_with_dot_slash, but without
- * relying on the platform-dependent is_dir_sep helper.
- *
- * This is for use in checking whether a submodule URL is interpreted as
- * relative to the current directory on any platform, since \ is a
- * directory separator on Windows but not on other platforms.
- */
-static int starts_with_dot_slash(const char *str)
-{
- return str[0] == '.' && (str[1] == '/' || str[1] == '\\');
-}
-
-/*
- * Like starts_with_dot_slash, this is a variant of submodule--helper's
- * helper of the same name with the twist that it accepts backslash as a
- * directory separator even on non-Windows platforms.
- */
-static int starts_with_dot_dot_slash(const char *str)
-{
- return str[0] == '.' && starts_with_dot_slash(str + 1);
-}
-
-static int submodule_url_is_relative(const char *url)
-{
- return starts_with_dot_slash(url) || starts_with_dot_dot_slash(url);
-}
-
-/*
- * Count directory components that a relative submodule URL should chop
- * from the remote_url it is to be resolved against.
- *
- * In other words, this counts "../" components at the start of a
- * submodule URL.
- *
- * Returns the number of directory components to chop and writes a
- * pointer to the next character of url after all leading "./" and
- * "../" components to out.
- */
-static int count_leading_dotdots(const char *url, const char **out)
-{
- int result = 0;
- while (1) {
- if (starts_with_dot_dot_slash(url)) {
- result++;
- url += strlen("../");
- continue;
- }
- if (starts_with_dot_slash(url)) {
- url += strlen("./");
- continue;
- }
- *out = url;
- return result;
- }
-}
-/*
- * Check whether a transport is implemented by git-remote-curl.
- *
- * If it is, returns 1 and writes the URL that would be passed to
- * git-remote-curl to the "out" parameter.
- *
- * Otherwise, returns 0 and leaves "out" untouched.
- *
- * Examples:
- * http::https://example.com/repo.git -> 1, https://example.com/repo.git
- * https://example.com/repo.git -> 1, https://example.com/repo.git
- * git://example.com/repo.git -> 0
- *
- * This is for use in checking for previously exploitable bugs that
- * required a submodule URL to be passed to git-remote-curl.
- */
-static int url_to_curl_url(const char *url, const char **out)
-{
- /*
- * We don't need to check for case-aliases, "http.exe", and so
- * on because in the default configuration, is_transport_allowed
- * prevents URLs with those schemes from being cloned
- * automatically.
- */
- if (skip_prefix(url, "http::", out) ||
- skip_prefix(url, "https::", out) ||
- skip_prefix(url, "ftp::", out) ||
- skip_prefix(url, "ftps::", out))
- return 1;
- if (starts_with(url, "http://") ||
- starts_with(url, "https://") ||
- starts_with(url, "ftp://") ||
- starts_with(url, "ftps://")) {
- *out = url;
- return 1;
- }
- return 0;
-}
-
-static int check_submodule_url(const char *url)
-{
- const char *curl_url;
-
- if (looks_like_command_line_option(url))
- return -1;
-
- if (submodule_url_is_relative(url)) {
- char *decoded;
- const char *next;
- int has_nl;
-
+ if (buffer < buffer_end && !starts_with(buffer, "\n")) {
/*
- * This could be appended to an http URL and url-decoded;
- * check for malicious characters.
+ * The verify_headers() check will allow
+ * e.g. "[...]tagger <tagger>\nsome
+ * garbage\n\nmessage" to pass, thinking "some
+ * garbage" could be a custom header. E.g. "mktag"
+ * doesn't want any unknown headers.
*/
- decoded = url_decode(url);
- has_nl = !!strchr(decoded, '\n');
-
- free(decoded);
- if (has_nl)
- return -1;
-
- /*
- * URLs which escape their root via "../" can overwrite
- * the host field and previous components, resolving to
- * URLs like https::example.com/submodule.git and
- * https:///example.com/submodule.git that were
- * susceptible to CVE-2020-11008.
- */
- if (count_leading_dotdots(url, &next) > 0 &&
- (*next == ':' || *next == '/'))
- return -1;
- }
-
- else if (url_to_curl_url(url, &curl_url)) {
- struct credential c = CREDENTIAL_INIT;
- int ret = 0;
- if (credential_from_url_gently(&c, curl_url, 1) ||
- !*c.host)
- ret = -1;
- credential_clear(&c);
- return ret;
+ ret = report(options, oid, OBJ_TAG, FSCK_MSG_EXTRA_HEADER_ENTRY, "invalid format - extra header(s) after 'tagger'");
+ if (ret)
+ goto done;
}
- return 0;
+done:
+ strbuf_release(&sb);
+ return ret;
}
struct fsck_gitmodules_data {
@@ -1129,7 +1054,9 @@ struct fsck_gitmodules_data {
int ret;
};
-static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata)
+static int fsck_gitmodules_fn(const char *var, const char *value,
+ const struct config_context *ctx UNUSED,
+ void *vdata)
{
struct fsck_gitmodules_data *data = vdata;
const char *subsection, *key;
@@ -1175,38 +1102,71 @@ static int fsck_gitmodules_fn(const char *var, const char *value, void *vdata)
static int fsck_blob(const struct object_id *oid, const char *buf,
unsigned long size, struct fsck_options *options)
{
- struct fsck_gitmodules_data data;
- struct config_options config_opts = { 0 };
-
- if (!oidset_contains(&gitmodules_found, oid))
- return 0;
- oidset_insert(&gitmodules_done, oid);
+ int ret = 0;
if (object_on_skiplist(options, oid))
return 0;
- if (!buf) {
- /*
- * A missing buffer here is a sign that the caller found the
- * blob too gigantic to load into memory. Let's just consider
- * that an error.
- */
- return report(options, oid, OBJ_BLOB,
- FSCK_MSG_GITMODULES_LARGE,
- ".gitmodules too large to parse");
+ if (oidset_contains(&options->gitmodules_found, oid)) {
+ struct config_options config_opts = { 0 };
+ struct fsck_gitmodules_data data;
+
+ oidset_insert(&options->gitmodules_done, oid);
+
+ if (!buf) {
+ /*
+ * A missing buffer here is a sign that the caller found the
+ * blob too gigantic to load into memory. Let's just consider
+ * that an error.
+ */
+ return report(options, oid, OBJ_BLOB,
+ FSCK_MSG_GITMODULES_LARGE,
+ ".gitmodules too large to parse");
+ }
+
+ data.oid = oid;
+ data.options = options;
+ data.ret = 0;
+ config_opts.error_action = CONFIG_ERROR_SILENT;
+ if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB,
+ ".gitmodules", buf, size, &data,
+ CONFIG_SCOPE_UNKNOWN, &config_opts))
+ data.ret |= report(options, oid, OBJ_BLOB,
+ FSCK_MSG_GITMODULES_PARSE,
+ "could not parse gitmodules blob");
+ ret |= data.ret;
+ }
+
+ if (oidset_contains(&options->gitattributes_found, oid)) {
+ const char *ptr;
+
+ oidset_insert(&options->gitattributes_done, oid);
+
+ if (!buf || size > ATTR_MAX_FILE_SIZE) {
+ /*
+ * A missing buffer here is a sign that the caller found the
+ * blob too gigantic to load into memory. Let's just consider
+ * that an error.
+ */
+ return report(options, oid, OBJ_BLOB,
+ FSCK_MSG_GITATTRIBUTES_LARGE,
+ ".gitattributes too large to parse");
+ }
+
+ for (ptr = buf; *ptr; ) {
+ const char *eol = strchrnul(ptr, '\n');
+ if (eol - ptr >= ATTR_MAX_LINE_LENGTH) {
+ ret |= report(options, oid, OBJ_BLOB,
+ FSCK_MSG_GITATTRIBUTES_LINE_LENGTH,
+ ".gitattributes has too long lines to parse");
+ break;
+ }
+
+ ptr = *eol ? eol + 1 : eol;
+ }
}
- data.oid = oid;
- data.options = options;
- data.ret = 0;
- config_opts.error_action = CONFIG_ERROR_SILENT;
- if (git_config_from_mem(fsck_gitmodules_fn, CONFIG_ORIGIN_BLOB,
- ".gitmodules", buf, size, &data, &config_opts))
- data.ret |= report(options, oid, OBJ_BLOB,
- FSCK_MSG_GITMODULES_PARSE,
- "could not parse gitmodules blob");
-
- return data.ret;
+ return ret;
}
int fsck_object(struct object *obj, void *data, unsigned long size,
@@ -1215,25 +1175,34 @@ int fsck_object(struct object *obj, void *data, unsigned long size,
if (!obj)
return report(options, NULL, OBJ_NONE, FSCK_MSG_BAD_OBJECT_SHA1, "no valid object to fsck");
- if (obj->type == OBJ_BLOB)
- return fsck_blob(&obj->oid, data, size, options);
- if (obj->type == OBJ_TREE)
- return fsck_tree(&obj->oid, data, size, options);
- if (obj->type == OBJ_COMMIT)
- return fsck_commit(&obj->oid, data, size, options);
- if (obj->type == OBJ_TAG)
- return fsck_tag(&obj->oid, data, size, options);
+ return fsck_buffer(&obj->oid, obj->type, data, size, options);
+}
- return report(options, &obj->oid, obj->type,
+int fsck_buffer(const struct object_id *oid, enum object_type type,
+ void *data, unsigned long size,
+ struct fsck_options *options)
+{
+ if (type == OBJ_BLOB)
+ return fsck_blob(oid, data, size, options);
+ if (type == OBJ_TREE)
+ return fsck_tree(oid, data, size, options);
+ if (type == OBJ_COMMIT)
+ return fsck_commit(oid, data, size, options);
+ if (type == OBJ_TAG)
+ return fsck_tag(oid, data, size, options);
+
+ return report(options, oid, type,
FSCK_MSG_UNKNOWN_TYPE,
"unknown type '%d' (internal fsck error)",
- obj->type);
+ type);
}
int fsck_error_function(struct fsck_options *o,
const struct object_id *oid,
- enum object_type object_type,
- int msg_type, const char *message)
+ enum object_type object_type UNUSED,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id UNUSED,
+ const char *message)
{
if (msg_type == FSCK_WARN) {
warning("object %s: %s", fsck_describe_object(o, oid), message);
@@ -1243,44 +1212,104 @@ int fsck_error_function(struct fsck_options *o,
return 1;
}
-int fsck_finish(struct fsck_options *options)
+static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done,
+ enum fsck_msg_id msg_missing, enum fsck_msg_id msg_type,
+ struct fsck_options *options, const char *blob_type)
{
int ret = 0;
struct oidset_iter iter;
const struct object_id *oid;
- oidset_iter_init(&gitmodules_found, &iter);
+ oidset_iter_init(blobs_found, &iter);
while ((oid = oidset_iter_next(&iter))) {
enum object_type type;
unsigned long size;
char *buf;
- if (oidset_contains(&gitmodules_done, oid))
+ if (oidset_contains(blobs_done, oid))
continue;
- buf = read_object_file(oid, &type, &size);
+ buf = repo_read_object_file(the_repository, oid, &type, &size);
if (!buf) {
if (is_promisor_object(oid))
continue;
ret |= report(options,
- oid, OBJ_BLOB,
- FSCK_MSG_GITMODULES_MISSING,
- "unable to read .gitmodules blob");
+ oid, OBJ_BLOB, msg_missing,
+ "unable to read %s blob", blob_type);
continue;
}
if (type == OBJ_BLOB)
ret |= fsck_blob(oid, buf, size, options);
else
- ret |= report(options,
- oid, type,
- FSCK_MSG_GITMODULES_BLOB,
- "non-blob found at .gitmodules");
+ ret |= report(options, oid, type, msg_type,
+ "non-blob found at %s", blob_type);
free(buf);
}
+ oidset_clear(blobs_found);
+ oidset_clear(blobs_done);
- oidset_clear(&gitmodules_found);
- oidset_clear(&gitmodules_done);
return ret;
}
+
+int fsck_finish(struct fsck_options *options)
+{
+ int ret = 0;
+
+ ret |= fsck_blobs(&options->gitmodules_found, &options->gitmodules_done,
+ FSCK_MSG_GITMODULES_MISSING, FSCK_MSG_GITMODULES_BLOB,
+ options, ".gitmodules");
+ ret |= fsck_blobs(&options->gitattributes_found, &options->gitattributes_done,
+ FSCK_MSG_GITATTRIBUTES_MISSING, FSCK_MSG_GITATTRIBUTES_BLOB,
+ options, ".gitattributes");
+
+ return ret;
+}
+
+int git_fsck_config(const char *var, const char *value,
+ const struct config_context *ctx, void *cb)
+{
+ struct fsck_options *options = cb;
+ const char *msg_id;
+
+ if (strcmp(var, "fsck.skiplist") == 0) {
+ const char *path;
+ struct strbuf sb = STRBUF_INIT;
+
+ if (git_config_pathname(&path, var, value))
+ return 1;
+ strbuf_addf(&sb, "skiplist=%s", path);
+ free((char *)path);
+ fsck_set_msg_types(options, sb.buf);
+ strbuf_release(&sb);
+ return 0;
+ }
+
+ if (skip_prefix(var, "fsck.", &msg_id)) {
+ if (!value)
+ return config_error_nonbool(var);
+ fsck_set_msg_type(options, msg_id, value);
+ return 0;
+ }
+
+ return git_default_config(var, value, ctx, cb);
+}
+
+/*
+ * Custom error callbacks that are used in more than one place.
+ */
+
+int fsck_error_cb_print_missing_gitmodules(struct fsck_options *o,
+ const struct object_id *oid,
+ enum object_type object_type,
+ enum fsck_msg_type msg_type,
+ enum fsck_msg_id msg_id,
+ const char *message)
+{
+ if (msg_id == FSCK_MSG_GITMODULES_MISSING) {
+ puts(oid_to_hex(oid));
+ return 0;
+ }
+ return fsck_error_function(o, oid, object_type, msg_type, msg_id, message);
+}