From 9068cfb20f5699fd639a559af14affaefe43a812 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sun, 10 May 2020 18:12:16 +0200 Subject: fsck: report non-consecutive duplicate names in trees MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tree entries are sorted in path order, meaning that directory names get a slash ('/') appended implicitly. Git fsck checks if trees contains consecutive duplicates, but due to that ordering there can be non-consecutive duplicates as well if one of them is a directory and the other one isn't. Such a tree cannot be fully checked out. Find these duplicates by recording candidate file names on a stack and check candidate directory names against that stack to find matches. Suggested-by: Brandon Williams Original-test-by: Brandon Williams Signed-off-by: René Scharfe Reviewed-by: Luke Diamand Signed-off-by: Junio C Hamano diff --git a/fsck.c b/fsck.c index 73f3077..b9b3350 100644 --- a/fsck.c +++ b/fsck.c @@ -523,6 +523,28 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options) } } +struct name_stack { + const char **names; + size_t nr, alloc; +}; + +static void name_stack_push(struct name_stack *stack, const char *name) +{ + ALLOC_GROW(stack->names, stack->nr + 1, stack->alloc); + stack->names[stack->nr++] = name; +} + +static const char *name_stack_pop(struct name_stack *stack) +{ + return stack->nr ? stack->names[--stack->nr] : NULL; +} + +static void name_stack_clear(struct name_stack *stack) +{ + FREE_AND_NULL(stack->names); + stack->nr = stack->alloc = 0; +} + /* * The entries in a tree are ordered in the _path_ order, * which means that a directory entry is ordered by adding @@ -534,7 +556,14 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options) #define TREE_UNORDERED (-1) #define TREE_HAS_DUPS (-2) -static int verify_ordered(unsigned mode1, const char *name1, unsigned mode2, const char *name2) +static int is_less_than_slash(unsigned char c) +{ + return '\0' < c && c < '/'; +} + +static int verify_ordered(unsigned mode1, const char *name1, + unsigned mode2, const char *name2, + struct name_stack *candidates) { int len1 = strlen(name1); int len2 = strlen(name2); @@ -566,6 +595,41 @@ static int verify_ordered(unsigned mode1, const char *name1, unsigned mode2, con c1 = '/'; if (!c2 && S_ISDIR(mode2)) c2 = '/'; + + /* + * There can be non-consecutive duplicates due to the implicitly + * add slash, e.g.: + * + * foo + * foo.bar + * foo.bar.baz + * foo.bar/ + * foo/ + * + * Record non-directory candidates (like "foo" and "foo.bar" in + * the example) on a stack and check directory candidates (like + * foo/" and "foo.bar/") against that stack. + */ + if (!c1 && is_less_than_slash(c2)) { + name_stack_push(candidates, name1); + } else if (c2 == '/' && is_less_than_slash(c1)) { + for (;;) { + const char *p; + const char *f_name = name_stack_pop(candidates); + + if (!f_name) + break; + if (!skip_prefix(name2, f_name, &p)) + break; + if (!*p) + return TREE_HAS_DUPS; + if (is_less_than_slash(*p)) { + name_stack_push(candidates, f_name); + break; + } + } + } + return c1 < c2 ? 0 : TREE_UNORDERED; } @@ -587,6 +651,7 @@ static int fsck_tree(const struct object_id *oid, struct tree_desc desc; unsigned o_mode; const char *o_name; + struct name_stack df_dup_candidates = { NULL }; if (init_tree_desc_gently(&desc, buffer, size)) { retval += report(options, oid, OBJ_TREE, FSCK_MSG_BAD_TREE, "cannot be parsed as a tree"); @@ -666,7 +731,8 @@ static int fsck_tree(const struct object_id *oid, } if (o_name) { - switch (verify_ordered(o_mode, o_name, mode, name)) { + switch (verify_ordered(o_mode, o_name, mode, name, + &df_dup_candidates)) { case TREE_UNORDERED: not_properly_sorted = 1; break; @@ -682,6 +748,8 @@ static int fsck_tree(const struct object_id *oid, o_name = name; } + name_stack_clear(&df_dup_candidates); + if (has_null_sha1) retval += report(options, oid, OBJ_TREE, FSCK_MSG_NULL_SHA1, "contains entries pointing to null sha1"); if (has_full_path) diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index 02478bc..c43bc68 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -234,6 +234,22 @@ test_expect_success 'tree object with duplicate entries' ' test_i18ngrep "error in tree .*contains duplicate file entries" out ' +test_expect_success 'tree object with dublicate names' ' + test_when_finished "remove_object \$blob" && + test_when_finished "remove_object \$tree" && + test_when_finished "remove_object \$badtree" && + blob=$(echo blob | git hash-object -w --stdin) && + printf "100644 blob %s\t%s\n" $blob x.2 >tree && + tree=$(git mktree badtree && + printf "100644 blob %s\t%s\n" $blob x >>badtree && + printf "040000 tree %s\t%s\n" $tree x >>badtree && + badtree=$(git mktree out && + test_i18ngrep "$badtree" out && + test_i18ngrep "error in tree .*contains duplicate file entries" out +' + test_expect_success 'unparseable tree object' ' test_oid_cache <<-\EOF && junk sha1:twenty-bytes-of-junk -- cgit v0.10.2-6-g49f6