From 134b7327d05c76d26e501e00d20b5ade9836ac4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 3 Sep 2018 14:49:19 +0000 Subject: fsck tests: setup of bogus commit object MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several fsck tests used the exact same git-hash-object output, but had copy/pasted that part of the setup code. Let's instead do that setup once and use it in subsequent tests. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano diff --git a/t/t5504-fetch-receive-strict.sh b/t/t5504-fetch-receive-strict.sh index 62f3569..6d268f3 100755 --- a/t/t5504-fetch-receive-strict.sh +++ b/t/t5504-fetch-receive-strict.sh @@ -133,6 +133,10 @@ committer Bugs Bunny 1234567890 +0000 This commit object intentionally broken EOF +test_expect_success 'setup bogus commit' ' + commit="$(git hash-object -t commit -w --stdin err && @@ -142,7 +146,6 @@ test_expect_success 'fsck with invalid or bogus skipList input' ' ' test_expect_success 'push with receive.fsck.skipList' ' - commit="$(git hash-object -t commit -w --stdin dies' ' ' test_expect_success 'push with receive.fsck.missingEmail=warn' ' - commit="$(git hash-object -t commit -w --stdin Date: Mon, 3 Sep 2018 14:49:20 +0000 Subject: fsck tests: add a test for no skipList input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The recent 65a836fa6b ("fsck: add stress tests for fsck.skipList", 2018-07-27) added various stress tests for odd invocations of fsck.skipList, but didn't tests for some very simple ones, such as asserting that providing to skipList with a bad commit causes fsck to exit with a non-zero exit code. Add such a test. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano diff --git a/t/t5504-fetch-receive-strict.sh b/t/t5504-fetch-receive-strict.sh index 6d268f3..cbae31f 100755 --- a/t/t5504-fetch-receive-strict.sh +++ b/t/t5504-fetch-receive-strict.sh @@ -137,6 +137,11 @@ test_expect_success 'setup bogus commit' ' commit="$(git hash-object -t commit -w --stdin err && + test_i18ngrep "missingEmail" err +' + test_expect_success 'fsck with invalid or bogus skipList input' ' git -c fsck.skipList=/dev/null -c fsck.missingEmail=ignore fsck && test_must_fail git -c fsck.skipList=does-not-exist -c fsck.missingEmail=ignore fsck 2>err && -- cgit v0.10.2-6-g49f6 From 58dc440b3cd34db29b54e87ef80e0da8cd507445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 3 Sep 2018 14:49:21 +0000 Subject: fsck: document and test sorted skipList input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ever since the skipList support was first added in cd94c6f91 ("fsck: git receive-pack: support excluding objects from fsck'ing", 2015-06-22) the documentation for the format has that the file is a sorted list of object names. Thus, anyone using the feature would have thought the list needed to be sorted. E.g. I recently in conjunction with my fetch.fsck.* implementation in 1362df0d41 ("fetch: implement fetch.fsck.*", 2018-07-27) wrote some code to ship a skipList, and went out of my way to sort it. Doing so seems intuitive, since it contains fixed-width records, and has no support for comments, so one might expect it to be binary searched in-place on-disk. However, as documented here this was never a requirement, so let's change the documentation. Since this is a file format change let's also document what was said about this in the past, so e.g. someone like myself reading the new docs can see this never needed to be sorted ("why do I have all this code to sort this thing..."). Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano diff --git a/Documentation/config.txt b/Documentation/config.txt index eb66a11..fd1b583 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -1710,7 +1710,7 @@ doing the same for `receive.fsck.` and `fetch.fsck.` will only cause git to warn. fsck.skipList:: - The path to a sorted list of object names (i.e. one SHA-1 per + The path to a list of object names (i.e. one SHA-1 per line) that are known to be broken in a non-fatal way and should be ignored. This feature is useful when an established project should be accepted despite early commits containing errors that @@ -1725,6 +1725,14 @@ Unlike variables like `color.ui` and `core.editor` the fall back on the `fsck.skipList` configuration if they aren't set. To uniformly configure the same fsck settings in different circumstances all three of them they must all set to the same values. ++ +Older versions of Git (before 2.20) documented that the object names +list should be sorted. This was never a requirement, the object names +can appear in any order, but when reading the list we track whether +the list is sorted for the purposes of an internal binary search +implementation, which can save itself some work with an already sorted +list. Unless you have a humongous list there's no reason to go out of +your way to pre-sort the list. gc.aggressiveDepth:: The depth parameter used in the delta compression diff --git a/t/t5504-fetch-receive-strict.sh b/t/t5504-fetch-receive-strict.sh index cbae31f..fa56052 100755 --- a/t/t5504-fetch-receive-strict.sh +++ b/t/t5504-fetch-receive-strict.sh @@ -142,6 +142,25 @@ test_expect_success 'fsck with no skipList input' ' test_i18ngrep "missingEmail" err ' +test_expect_success 'setup sorted and unsorted skipLists' ' + cat >SKIP.unsorted <<-EOF && + 0000000000000000000000000000000000000004 + 0000000000000000000000000000000000000002 + $commit + 0000000000000000000000000000000000000001 + 0000000000000000000000000000000000000003 + EOF + sort SKIP.unsorted >SKIP.sorted +' + +test_expect_success 'fsck with sorted skipList' ' + git -c fsck.skipList=SKIP.sorted fsck +' + +test_expect_success 'fsck with unsorted skipList' ' + git -c fsck.skipList=SKIP.unsorted fsck +' + test_expect_success 'fsck with invalid or bogus skipList input' ' git -c fsck.skipList=/dev/null -c fsck.missingEmail=ignore fsck && test_must_fail git -c fsck.skipList=does-not-exist -c fsck.missingEmail=ignore fsck 2>err && -- cgit v0.10.2-6-g49f6 From f706c42bab5a2ac4af45e0df7853af1fc5346c45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 3 Sep 2018 14:49:22 +0000 Subject: fsck: document and test commented & empty line skipList input MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is currently no comment syntax for the fsck.skipList, this isn't really by design, and it would be nice to have support for comments. Document that this doesn't work, and test for how this errors out. These tests reveal a current bug, if there's invalid input the output will emit some of the next line, and then go into uninitialized memory. This is fixed in a subsequent change. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano diff --git a/Documentation/config.txt b/Documentation/config.txt index fd1b583..0e1ce7d 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -1712,10 +1712,13 @@ will only cause git to warn. fsck.skipList:: The path to a list of object names (i.e. one SHA-1 per line) that are known to be broken in a non-fatal way and should - be ignored. This feature is useful when an established project - should be accepted despite early commits containing errors that - can be safely ignored such as invalid committer email addresses. - Note: corrupt objects cannot be skipped with this setting. + be ignored. Comments ('#') and empty lines are not supported, and + will error out. ++ +This feature is useful when an established project should be accepted +despite early commits containing errors that can be safely ignored +such as invalid committer email addresses. Note: corrupt objects +cannot be skipped with this setting. + Like `fsck.` this variable has corresponding `receive.fsck.skipList` and `fetch.fsck.skipList` variants. diff --git a/t/t5504-fetch-receive-strict.sh b/t/t5504-fetch-receive-strict.sh index fa56052..38aaf3b 100755 --- a/t/t5504-fetch-receive-strict.sh +++ b/t/t5504-fetch-receive-strict.sh @@ -169,6 +169,27 @@ test_expect_success 'fsck with invalid or bogus skipList input' ' test_i18ngrep "Invalid SHA-1: \[core\]" err ' +test_expect_success 'fsck with invalid or bogus skipList input (comments & empty lines)' ' + cat >SKIP.with-comment <<-EOF && + # Some bad commit + 0000000000000000000000000000000000000001 + EOF + test_must_fail git -c fsck.skipList=SKIP.with-comment fsck 2>err-with-comment && + test_i18ngrep "^fatal: Invalid SHA-1: # Some bad commit$" err-with-comment && + cat >SKIP.with-empty-line <<-EOF && + 0000000000000000000000000000000000000001 + + 0000000000000000000000000000000000000002 + EOF + test_must_fail git -c fsck.skipList=SKIP.with-empty-line fsck 2>err-with-empty-line && + test_i18ngrep "^fatal: Invalid SHA-1: " err-with-empty-line +' + +test_expect_failure 'fsck no garbage output from comments & empty lines errors' ' + test_line_count = 1 err-with-comment && + test_line_count = 1 err-with-empty-line +' + test_expect_success 'push with receive.fsck.skipList' ' git push . $commit:refs/heads/bogus && rm -rf dst && -- cgit v0.10.2-6-g49f6 From 12b1c50a426d84aadd9909ed42b1ed0b2d12d7e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 3 Sep 2018 14:49:23 +0000 Subject: fsck: document that skipList input must be unabbreviated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Abbreviating the SHA-1s in the skipList input has never worked, but the documentation hasn't unambiguously stated that this is an error, and there was no test for it. Let's fix both since it would be easy for some later refactoring e.g. switch to accidentally switch to a looser OID parsing function, causing the tests before this change to pass, but for older versions of git to be incompatible with the new skipList format. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano diff --git a/Documentation/config.txt b/Documentation/config.txt index 0e1ce7d..3287c7e 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -1710,7 +1710,7 @@ doing the same for `receive.fsck.` and `fetch.fsck.` will only cause git to warn. fsck.skipList:: - The path to a list of object names (i.e. one SHA-1 per + The path to a list of object names (i.e. one unabbreviated SHA-1 per line) that are known to be broken in a non-fatal way and should be ignored. Comments ('#') and empty lines are not supported, and will error out. diff --git a/t/t5504-fetch-receive-strict.sh b/t/t5504-fetch-receive-strict.sh index 38aaf3b..96bf9fa 100755 --- a/t/t5504-fetch-receive-strict.sh +++ b/t/t5504-fetch-receive-strict.sh @@ -190,6 +190,12 @@ test_expect_failure 'fsck no garbage output from comments & empty lines errors' test_line_count = 1 err-with-empty-line ' +test_expect_success 'fsck with invalid abbreviated skipList input' ' + echo $commit | test_copy_bytes 20 >SKIP.abbreviated && + test_must_fail git -c fsck.skipList=SKIP.abbreviated fsck 2>err-abbreviated && + test_i18ngrep "^fatal: Invalid SHA-1: " err-abbreviated +' + test_expect_success 'push with receive.fsck.skipList' ' git push . $commit:refs/heads/bogus && rm -rf dst && -- cgit v0.10.2-6-g49f6 From 6cb173b5b62c2e86febe06b8575ae808533a3834 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 3 Sep 2018 14:49:24 +0000 Subject: fsck: add a performance test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a plain performance test for "fsck". This test will not be used to / referred to in any upcoming commit of mine in this series, but having a simple test for fsck performance is valuable, so let's add it while we're at it. Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano diff --git a/t/perf/p1450-fsck.sh b/t/perf/p1450-fsck.sh new file mode 100755 index 0000000..ae1b841 --- /dev/null +++ b/t/perf/p1450-fsck.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +test_description='Test fsck performance' + +. ./perf-lib.sh + +test_perf_large_repo + +test_perf 'fsck' ' + git fsck +' + +test_done -- cgit v0.10.2-6-g49f6 From 01e0d545ab5f5cf717a04012307204df1e7c9f8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 3 Sep 2018 14:49:25 +0000 Subject: fsck: add a performance test for skipList MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create a performance test to see how the skipList implementation performs. First we setup N bad commits, then we see how progressively working our way up to 0..N in increments of 10x does. I.e. the needle(s) in the haystack get progressively more numerous. Signed-off-by: René Scharfe Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano diff --git a/t/perf/p1451-fsck-skip-list.sh b/t/perf/p1451-fsck-skip-list.sh new file mode 100755 index 0000000..c2b97d2 --- /dev/null +++ b/t/perf/p1451-fsck-skip-list.sh @@ -0,0 +1,40 @@ +#!/bin/sh + +test_description='Test fsck skipList performance' + +. ./perf-lib.sh + +test_perf_fresh_repo + +n=1000000 + +test_expect_success "setup $n bad commits" ' + for i in $(test_seq 1 $n) + do + echo "commit refs/heads/master" && + echo "committer C 1234567890 +0000" && + echo "data <skiplist + ' + + test_perf "fsck with $skip skipped bad commits" ' + git -c fsck.skipList=skiplist fsck + ' + + case $skip in + 0) skip=1 ;; + *) skip=${skip}0 ;; + esac +done + +test_done -- cgit v0.10.2-6-g49f6 From fb8952077dfa11dc1534b5a9d965cbb39b043af4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 3 Sep 2018 14:49:26 +0000 Subject: fsck: use strbuf_getline() to read skiplist file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The buffer is unlikely to contain a NUL character, so printing its contents using %s in a die() format is unsafe (detected with ASan). Use an idiomatic strbuf_getline() loop instead, which ensures the buffer is always NUL-terminated, supports CRLF files as well, accepts files without a newline after the last line, supports any hash length automatically, and is shorter. This fixes a bug where emitting an error about an invalid line on say line 1 would continue printing subsequent lines, and usually continue into uninitialized memory. The performance impact of this, on a CentOS 7 box with RedHat GCC 4.8.5-28: $ GIT_PERF_REPEAT_COUNT=5 GIT_PERF_MAKE_OPTS='-j56 CFLAGS="-O3"' ./run HEAD~ HEAD p1451-fsck-skip-list.sh Test HEAD~ HEAD ---------------------------------------------------------------------------------------- 1450.3: fsck with 0 skipped bad commits 7.75(7.39+0.35) 7.68(7.29+0.39) -0.9% 1450.5: fsck with 1 skipped bad commits 7.70(7.30+0.40) 7.80(7.42+0.37) +1.3% 1450.7: fsck with 10 skipped bad commits 7.77(7.37+0.40) 7.87(7.47+0.40) +1.3% 1450.9: fsck with 100 skipped bad commits 7.82(7.41+0.40) 7.88(7.43+0.44) +0.8% 1450.11: fsck with 1000 skipped bad commits 7.88(7.49+0.39) 7.84(7.43+0.40) -0.5% 1450.13: fsck with 10000 skipped bad commits 8.02(7.63+0.39) 8.07(7.67+0.39) +0.6% 1450.15: fsck with 100000 skipped bad commits 8.01(7.60+0.41) 8.08(7.70+0.38) +0.9% 1450.17: fsck with 1000000 skipped bad commits 7.60(7.10+0.50) 7.37(7.18+0.19) -3.0% Helped-by: Jeff King Signed-off-by: Rene Scharfe Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano diff --git a/fsck.c b/fsck.c index a0cee0b..972a26b 100644 --- a/fsck.c +++ b/fsck.c @@ -183,8 +183,9 @@ static int fsck_msg_type(enum fsck_msg_id msg_id, static void init_skiplist(struct fsck_options *options, const char *path) { static struct oid_array skiplist = OID_ARRAY_INIT; - int sorted, fd; - char buffer[GIT_MAX_HEXSZ + 1]; + int sorted; + FILE *fp; + struct strbuf sb = STRBUF_INIT; struct object_id oid; if (options->skiplist) @@ -194,25 +195,23 @@ static void init_skiplist(struct fsck_options *options, const char *path) options->skiplist = &skiplist; } - fd = open(path, O_RDONLY); - if (fd < 0) + fp = fopen(path, "r"); + if (!fp) die("Could not open skip list: %s", path); - for (;;) { + while (!strbuf_getline(&sb, fp)) { const char *p; - int result = read_in_full(fd, buffer, sizeof(buffer)); - if (result < 0) - die_errno("Could not read '%s'", path); - if (!result) - break; - if (parse_oid_hex(buffer, &oid, &p) || *p != '\n') - die("Invalid SHA-1: %s", buffer); + if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0') + die("Invalid SHA-1: %s", sb.buf); oid_array_append(&skiplist, &oid); if (sorted && skiplist.nr > 1 && oidcmp(&skiplist.oid[skiplist.nr - 2], &oid) > 0) sorted = 0; } - close(fd); + if (ferror(fp)) + die_errno("Could not read '%s'", path); + fclose(fp); + strbuf_release(&sb); if (sorted) skiplist.sorted = 1; diff --git a/t/t5504-fetch-receive-strict.sh b/t/t5504-fetch-receive-strict.sh index 96bf9fa..d67ab37 100755 --- a/t/t5504-fetch-receive-strict.sh +++ b/t/t5504-fetch-receive-strict.sh @@ -185,7 +185,7 @@ test_expect_success 'fsck with invalid or bogus skipList input (comments & empty test_i18ngrep "^fatal: Invalid SHA-1: " err-with-empty-line ' -test_expect_failure 'fsck no garbage output from comments & empty lines errors' ' +test_expect_success 'fsck no garbage output from comments & empty lines errors' ' test_line_count = 1 err-with-comment && test_line_count = 1 err-with-empty-line ' -- cgit v0.10.2-6-g49f6 From 3b41fb0cb217f4b4491f2e67ce4183e5d2a5d873 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 3 Sep 2018 14:49:27 +0000 Subject: fsck: use oidset instead of oid_array for skipList MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the implementation of the skipList feature to use oidset instead of oid_array to store SHA-1s for later lookup. This list is parsed once on startup by fsck, fetch-pack or receive-pack depending on the *.skipList config in use. I.e. only once per invocation, but note that for "clone --recurse-submodules" each submodule will re-parse the list, in addition to the main project, and it will be re-parsed when checking .gitmodules blobs, see fb16287719 ("fsck: check skiplist for object in fsck_blob()", 2018-06-27). Memory usage is a bit higher, but we don't need to keep track of the sort order anymore. Embed the oidset into struct fsck_options to make its ownership clear (no hidden sharing) and avoid unnecessary pointer indirection. The cumulative impact on performance of this & the preceding change, using the test setup described in the previous commit: Test HEAD~2 HEAD~ HEAD ---------------------------------------------------------------------------------------------------------------- 1450.3: fsck with 0 skipped bad commits 7.70(7.31+0.38) 7.72(7.33+0.38) +0.3% 7.70(7.30+0.40) +0.0% 1450.5: fsck with 1 skipped bad commits 7.84(7.47+0.37) 7.69(7.32+0.36) -1.9% 7.71(7.29+0.41) -1.7% 1450.7: fsck with 10 skipped bad commits 7.81(7.40+0.40) 7.94(7.57+0.36) +1.7% 7.92(7.55+0.37) +1.4% 1450.9: fsck with 100 skipped bad commits 7.81(7.42+0.38) 7.95(7.53+0.41) +1.8% 7.83(7.42+0.41) +0.3% 1450.11: fsck with 1000 skipped bad commits 7.99(7.62+0.36) 7.90(7.50+0.40) -1.1% 7.86(7.49+0.37) -1.6% 1450.13: fsck with 10000 skipped bad commits 7.98(7.57+0.40) 7.94(7.53+0.40) -0.5% 7.90(7.45+0.44) -1.0% 1450.15: fsck with 100000 skipped bad commits 7.97(7.57+0.39) 8.03(7.67+0.36) +0.8% 7.84(7.43+0.41) -1.6% 1450.17: fsck with 1000000 skipped bad commits 7.72(7.22+0.50) 7.28(7.07+0.20) -5.7% 7.13(6.87+0.25) -7.6% Helped-by: Ævar Arnfjörð Bjarmason Signed-off-by: Rene Scharfe Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano diff --git a/Documentation/config.txt b/Documentation/config.txt index 3287c7e..161ffe2 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -1731,11 +1731,12 @@ all three of them they must all set to the same values. + Older versions of Git (before 2.20) documented that the object names list should be sorted. This was never a requirement, the object names -can appear in any order, but when reading the list we track whether -the list is sorted for the purposes of an internal binary search -implementation, which can save itself some work with an already sorted -list. Unless you have a humongous list there's no reason to go out of -your way to pre-sort the list. +could appear in any order, but when reading the list we tracked whether +the list was sorted for the purposes of an internal binary search +implementation, which could save itself some work with an already sorted +list. Unless you had a humongous list there was no reason to go out of +your way to pre-sort the list. After Git version 2.20 a hash implementation +is used instead, so there's now no reason to pre-sort the list. gc.aggressiveDepth:: The depth parameter used in the delta compression diff --git a/fsck.c b/fsck.c index 972a26b..4c643f1 100644 --- a/fsck.c +++ b/fsck.c @@ -10,7 +10,6 @@ #include "fsck.h" #include "refs.h" #include "utf8.h" -#include "sha1-array.h" #include "decorate.h" #include "oidset.h" #include "packfile.h" @@ -182,19 +181,10 @@ static int fsck_msg_type(enum fsck_msg_id msg_id, static void init_skiplist(struct fsck_options *options, const char *path) { - static struct oid_array skiplist = OID_ARRAY_INIT; - int sorted; FILE *fp; struct strbuf sb = STRBUF_INIT; struct object_id oid; - if (options->skiplist) - sorted = options->skiplist->sorted; - else { - sorted = 1; - options->skiplist = &skiplist; - } - fp = fopen(path, "r"); if (!fp) die("Could not open skip list: %s", path); @@ -202,19 +192,12 @@ static void init_skiplist(struct fsck_options *options, const char *path) const char *p; if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0') die("Invalid SHA-1: %s", sb.buf); - oid_array_append(&skiplist, &oid); - if (sorted && skiplist.nr > 1 && - oidcmp(&skiplist.oid[skiplist.nr - 2], - &oid) > 0) - sorted = 0; + oidset_insert(&options->skiplist, &oid); } if (ferror(fp)) die_errno("Could not read '%s'", path); fclose(fp); strbuf_release(&sb); - - if (sorted) - skiplist.sorted = 1; } static int parse_msg_type(const char *str) @@ -319,9 +302,7 @@ static void append_msg_id(struct strbuf *sb, const char *msg_id) static int object_on_skiplist(struct fsck_options *opts, struct object *obj) { - if (opts && opts->skiplist && obj) - return oid_array_lookup(opts->skiplist, &obj->oid) >= 0; - return 0; + return opts && obj && oidset_contains(&opts->skiplist, &obj->oid); } __attribute__((format (printf, 4, 5))) diff --git a/fsck.h b/fsck.h index 0c7e8c9..b95595a 100644 --- a/fsck.h +++ b/fsck.h @@ -1,6 +1,8 @@ #ifndef GIT_FSCK_H #define GIT_FSCK_H +#include "oidset.h" + #define FSCK_ERROR 1 #define FSCK_WARN 2 #define FSCK_IGNORE 3 @@ -35,12 +37,12 @@ struct fsck_options { fsck_error error_func; unsigned strict:1; int *msg_type; - struct oid_array *skiplist; + struct oidset skiplist; struct decoration *object_names; }; -#define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL } -#define FSCK_OPTIONS_STRICT { NULL, fsck_error_function, 1, NULL } +#define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL, OIDSET_INIT } +#define FSCK_OPTIONS_STRICT { NULL, fsck_error_function, 1, NULL, OIDSET_INIT } /* descend in all linked child objects * the return value is: -- cgit v0.10.2-6-g49f6 From 371a6550741e38abe6162d891e8d4102dd20f5a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=86var=20Arnfj=C3=B6r=C3=B0=20Bjarmason?= Date: Mon, 3 Sep 2018 14:49:28 +0000 Subject: fsck: support comments & empty lines in skipList MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's annoying not to be able to put comments and empty lines in the skipList, when e.g. keeping a big central list of commits to skip in /etc/gitconfig, which was my motivation for 1362df0d41 ("fetch: implement fetch.fsck.*", 2018-07-27). Implement that, and document what version of Git this was changed in, since this on-disk format can be expected to be used by multiple versions of git. There is no notable performance impact from this change, using the test setup described a couple of commits back: Test HEAD~ HEAD ---------------------------------------------------------------------------------------- 1450.3: fsck with 0 skipped bad commits 7.69(7.27+0.42) 7.86(7.48+0.37) +2.2% 1450.5: fsck with 1 skipped bad commits 7.69(7.30+0.38) 7.83(7.47+0.36) +1.8% 1450.7: fsck with 10 skipped bad commits 7.76(7.38+0.38) 7.79(7.38+0.41) +0.4% 1450.9: fsck with 100 skipped bad commits 7.76(7.38+0.38) 7.74(7.36+0.38) -0.3% 1450.11: fsck with 1000 skipped bad commits 7.71(7.30+0.41) 7.72(7.34+0.38) +0.1% 1450.13: fsck with 10000 skipped bad commits 7.74(7.34+0.40) 7.72(7.34+0.38) -0.3% 1450.15: fsck with 100000 skipped bad commits 7.75(7.40+0.35) 7.70(7.29+0.40) -0.6% 1450.17: fsck with 1000000 skipped bad commits 7.12(6.86+0.26) 7.13(6.87+0.26) +0.1% Signed-off-by: Ævar Arnfjörð Bjarmason Signed-off-by: Junio C Hamano diff --git a/Documentation/config.txt b/Documentation/config.txt index 161ffe2..0906db3 100644 --- a/Documentation/config.txt +++ b/Documentation/config.txt @@ -1712,8 +1712,9 @@ will only cause git to warn. fsck.skipList:: The path to a list of object names (i.e. one unabbreviated SHA-1 per line) that are known to be broken in a non-fatal way and should - be ignored. Comments ('#') and empty lines are not supported, and - will error out. + be ignored. On versions of Git 2.20 and later comments ('#'), empty + lines, and any leading and trailing whitespace is ignored. Everything + but a SHA-1 per line will error out on older versions. + This feature is useful when an established project should be accepted despite early commits containing errors that can be safely ignored diff --git a/fsck.c b/fsck.c index 4c643f1..859b050 100644 --- a/fsck.c +++ b/fsck.c @@ -190,6 +190,20 @@ static void init_skiplist(struct fsck_options *options, const char *path) die("Could not open skip list: %s", path); while (!strbuf_getline(&sb, fp)) { const char *p; + const char *hash; + + /* + * Allow trailing comments, leading whitespace + * (including before commits), and empty or whitespace + * only lines. + */ + hash = strchr(sb.buf, '#'); + if (hash) + strbuf_setlen(&sb, hash - sb.buf); + strbuf_trim(&sb); + if (!sb.len) + continue; + if (parse_oid_hex(sb.buf, &oid, &p) || *p != '\0') die("Invalid SHA-1: %s", sb.buf); oidset_insert(&options->skiplist, &oid); diff --git a/t/t5504-fetch-receive-strict.sh b/t/t5504-fetch-receive-strict.sh index d67ab37..7bc7068 100755 --- a/t/t5504-fetch-receive-strict.sh +++ b/t/t5504-fetch-receive-strict.sh @@ -169,20 +169,20 @@ test_expect_success 'fsck with invalid or bogus skipList input' ' test_i18ngrep "Invalid SHA-1: \[core\]" err ' -test_expect_success 'fsck with invalid or bogus skipList input (comments & empty lines)' ' +test_expect_success 'fsck with other accepted skipList input (comments & empty lines)' ' cat >SKIP.with-comment <<-EOF && # Some bad commit 0000000000000000000000000000000000000001 EOF test_must_fail git -c fsck.skipList=SKIP.with-comment fsck 2>err-with-comment && - test_i18ngrep "^fatal: Invalid SHA-1: # Some bad commit$" err-with-comment && + test_i18ngrep "missingEmail" err-with-comment && cat >SKIP.with-empty-line <<-EOF && 0000000000000000000000000000000000000001 0000000000000000000000000000000000000002 EOF test_must_fail git -c fsck.skipList=SKIP.with-empty-line fsck 2>err-with-empty-line && - test_i18ngrep "^fatal: Invalid SHA-1: " err-with-empty-line + test_i18ngrep "missingEmail" err-with-empty-line ' test_expect_success 'fsck no garbage output from comments & empty lines errors' ' @@ -196,6 +196,19 @@ test_expect_success 'fsck with invalid abbreviated skipList input' ' test_i18ngrep "^fatal: Invalid SHA-1: " err-abbreviated ' +test_expect_success 'fsck with exhaustive accepted skipList input (various types of comments etc.)' ' + >SKIP.exhaustive && + echo "# A commented line" >>SKIP.exhaustive && + echo "" >>SKIP.exhaustive && + echo " " >>SKIP.exhaustive && + echo " # Comment after whitespace" >>SKIP.exhaustive && + echo "$commit # Our bad commit (with leading whitespace and trailing comment)" >>SKIP.exhaustive && + echo "# Some bad commit (leading whitespace)" >>SKIP.exhaustive && + echo " 0000000000000000000000000000000000000001" >>SKIP.exhaustive && + git -c fsck.skipList=SKIP.exhaustive fsck 2>err && + test_must_be_empty err +' + test_expect_success 'push with receive.fsck.skipList' ' git push . $commit:refs/heads/bogus && rm -rf dst && -- cgit v0.10.2-6-g49f6