From 7d9a2819415663ee5f0676d06cdbb1368fdc02c7 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 24 Feb 2014 02:36:22 -0500 Subject: t4212: test bogus timestamps with git-log When t4212 was originally added by 9dbe7c3d (pretty: handle broken commit headers gracefully, 2013-04-17), it tested our handling of commits with broken ident lines in which the timestamps could not be parsed. It does so using a bogus line like "Name -<> 1234 -0000", because that simulates an error that was seen in the wild. Later, 03818a4 (split_ident: parse timestamp from end of line, 2013-10-14) made our parser smart enough to actually find the timestamp on such a line, and t4212 was adjusted to match. While it's nice that we handle this real-world case, this meant that we were not actually testing the bogus-timestamp case anymore. This patch adds a test with a totally incomprehensible timestamp to make sure we are testing the code path. Note that the behavior is slightly different between regular log output and "--format=%ad". In the former case, we produce a sentinel value and in the latter, we produce an empty string. While at first this seems unnecessarily inconsistent, it matches the original behavior given by 9dbe7c3d. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/t/t4212-log-corrupt.sh b/t/t4212-log-corrupt.sh index ec5099b..611b687 100755 --- a/t/t4212-log-corrupt.sh +++ b/t/t4212-log-corrupt.sh @@ -39,4 +39,25 @@ test_expect_success 'git log --format with broken author email' ' test_cmp expect.err actual.err ' +munge_author_date () { + git cat-file commit "$1" >commit.orig && + sed "s/^\(author .*>\) [0-9]*/\1 $2/" commit.munge && + git hash-object -w -t commit commit.munge +} + +test_expect_success 'unparsable dates produce sentinel value' ' + commit=$(munge_author_date HEAD totally_bogus) && + echo "Date: Thu Jan 1 00:00:00 1970 +0000" >expect && + git log -1 $commit >actual.full && + grep Date actual && + test_cmp expect actual +' + +test_expect_success 'unparsable dates produce sentinel value (%ad)' ' + commit=$(munge_author_date HEAD totally_bogus) && + echo >expect && + git log -1 --format=%ad $commit >actual + test_cmp expect actual +' + test_done -- cgit v0.10.2-6-g49f6 From d4b8de0420ffcc7a654ddc6c69a96d3c1b25b4fa Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 24 Feb 2014 02:39:04 -0500 Subject: fsck: report integer overflow in author timestamps When we check commit objects, we complain if commit->date is ULONG_MAX, which is an indication that we saw integer overflow when parsing it. However, we do not do any check at all for author lines, which also contain a timestamp. Let's actually check the timestamps on each ident line with strtoul. This catches both author and committer lines, and we can get rid of the now-redundant commit->date check. Note that like the existing check, we compare only against ULONG_MAX. Now that we are calling strtoul at the site of the check, we could be slightly more careful and also check that errno is set to ERANGE. However, this will make further refactoring in future patches a little harder, and it doesn't really matter in practice. For 32-bit systems, one would have to create a commit at the exact wrong second in 2038. But by the time we get close to that, all systems will hopefully have moved to 64-bit (and if they haven't, they have a real problem one second later). For 64-bit systems, by the time we get close to ULONG_MAX, all systems will hopefully have been consumed in the fiery wrath of our expanding Sun. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/fsck.c b/fsck.c index 99c0497..760e072 100644 --- a/fsck.c +++ b/fsck.c @@ -245,6 +245,8 @@ static int fsck_tree(struct tree *item, int strict, fsck_error error_func) static int fsck_ident(char **ident, struct object *obj, fsck_error error_func) { + char *end; + if (**ident == '<') return error_func(obj, FSCK_ERROR, "invalid author/committer line - missing space before email"); *ident += strcspn(*ident, "<>\n"); @@ -264,10 +266,11 @@ static int fsck_ident(char **ident, struct object *obj, fsck_error error_func) (*ident)++; if (**ident == '0' && (*ident)[1] != ' ') return error_func(obj, FSCK_ERROR, "invalid author/committer line - zero-padded date"); - *ident += strspn(*ident, "0123456789"); - if (**ident != ' ') + if (strtoul(*ident, &end, 10) == ULONG_MAX) + return error_func(obj, FSCK_ERROR, "invalid author/committer line - date causes integer overflow"); + if (end == *ident || *end != ' ') return error_func(obj, FSCK_ERROR, "invalid author/committer line - bad date"); - (*ident)++; + *ident = end + 1; if ((**ident != '+' && **ident != '-') || !isdigit((*ident)[1]) || !isdigit((*ident)[2]) || @@ -287,9 +290,6 @@ static int fsck_commit(struct commit *commit, fsck_error error_func) int parents = 0; int err; - if (commit->date == ULONG_MAX) - return error_func(&commit->object, FSCK_ERROR, "invalid author/committer line"); - if (memcmp(buffer, "tree ", 5)) return error_func(&commit->object, FSCK_ERROR, "invalid format - expected 'tree' line"); if (get_sha1_hex(buffer+5, tree_sha1) || buffer[45] != '\n') diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh index d730734..8c739c9 100755 --- a/t/t1450-fsck.sh +++ b/t/t1450-fsck.sh @@ -142,6 +142,20 @@ test_expect_success '> in name is reported' ' grep "error in commit $new" out ' +# date is 2^64 + 1 +test_expect_success 'integer overflow in timestamps is reported' ' + git cat-file commit HEAD >basis && + sed "s/^\\(author .*>\\) [0-9]*/\\1 18446744073709551617/" \ + bad-timestamp && + new=$(git hash-object -t commit -w --stdin out && + cat out && + grep "error in commit $new.*integer overflow" out +' + test_expect_success 'tag pointing to nonexistent' ' cat >invalid-tag <<-\EOF && object ffffffffffffffffffffffffffffffffffffffff -- cgit v0.10.2-6-g49f6 From 7ca36d9398a85e7974d04f8fbd2c6adb088290e1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 24 Feb 2014 02:39:45 -0500 Subject: date: check date overflow against time_t When we check whether a timestamp has overflowed, we check only against ULONG_MAX, meaning that strtoul has overflowed. However, we also feed these timestamps to system functions like gmtime, which expect a time_t. On many systems, time_t is actually smaller than "unsigned long" (e.g., because it is signed), and we would overflow when using these functions. We don't know the actual size or signedness of time_t, but we can easily check for truncation with a simple assignment. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/cache.h b/cache.h index bb71bf8..9a2c377 100644 --- a/cache.h +++ b/cache.h @@ -909,6 +909,7 @@ void datestamp(char *buf, int bufsize); unsigned long approxidate_careful(const char *, int *); unsigned long approxidate_relative(const char *date, const struct timeval *now); enum date_mode parse_date_format(const char *format); +int date_overflows(unsigned long date); #define IDENT_STRICT 1 #define IDENT_NO_DATE 2 diff --git a/date.c b/date.c index 57331ed..2dae471 100644 --- a/date.c +++ b/date.c @@ -1085,3 +1085,20 @@ unsigned long approxidate_careful(const char *date, int *error_ret) gettimeofday(&tv, NULL); return approxidate_str(date, &tv, error_ret); } + +int date_overflows(unsigned long t) +{ + time_t sys; + + /* If we overflowed our unsigned long, that's bad... */ + if (t == ULONG_MAX) + return 1; + + /* + * ...but we also are going to feed the result to system + * functions that expect time_t, which is often "signed long". + * Make sure that we fit into time_t, as well. + */ + sys = t; + return t != sys || (t < 1) != (sys < 1); +} diff --git a/fsck.c b/fsck.c index 760e072..64bf279 100644 --- a/fsck.c +++ b/fsck.c @@ -266,7 +266,7 @@ static int fsck_ident(char **ident, struct object *obj, fsck_error error_func) (*ident)++; if (**ident == '0' && (*ident)[1] != ' ') return error_func(obj, FSCK_ERROR, "invalid author/committer line - zero-padded date"); - if (strtoul(*ident, &end, 10) == ULONG_MAX) + if (date_overflows(strtoul(*ident, &end, 10))) return error_func(obj, FSCK_ERROR, "invalid author/committer line - date causes integer overflow"); if (end == *ident || *end != ' ') return error_func(obj, FSCK_ERROR, "invalid author/committer line - bad date"); -- cgit v0.10.2-6-g49f6 From 1dca155fe3fac29e847d2d8ff1087d892a129a9c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 24 Feb 2014 02:46:37 -0500 Subject: log: handle integer overflow in timestamps If an ident line has a ridiculous date value like (2^64)+1, we currently just pass ULONG_MAX along to the date code, which can produce nonsensical dates. On systems with a signed long time_t (e.g., 64-bit glibc systems), this actually doesn't end up too bad. The ULONG_MAX is converted to -1, we apply the timezone field to that, and the result ends up somewhere between Dec 31, 1969 and Jan 1, 1970. However, there is still a few good reasons to detect the overflow explicitly: 1. On systems where "unsigned long" is smaller than time_t, we get a nonsensical date in the future. 2. Even where it would produce "Dec 31, 1969", it's easier to recognize "midnight Jan 1" as a consistent sentinel value for "we could not parse this". 3. Values which do not overflow strtoul but do overflow a signed time_t produce nonsensical values in the past. For example, on a 64-bit system with a signed long time_t, a timestamp of 18446744073000000000 produces a date in 1947. We also recognize overflow in the timezone field, which could produce nonsensical results. In this case we show the parsed date, but in UTC. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/pretty.c b/pretty.c index acbfceb..4da9a68 100644 --- a/pretty.c +++ b/pretty.c @@ -401,8 +401,14 @@ static const char *show_ident_date(const struct ident_split *ident, if (ident->date_begin && ident->date_end) date = strtoul(ident->date_begin, NULL, 10); - if (ident->tz_begin && ident->tz_end) - tz = strtol(ident->tz_begin, NULL, 10); + if (date_overflows(date)) + date = 0; + else { + if (ident->tz_begin && ident->tz_end) + tz = strtol(ident->tz_begin, NULL, 10); + if (tz == LONG_MAX || tz == LONG_MIN) + tz = 0; + } return show_date(date, tz, mode); } diff --git a/t/t4212-log-corrupt.sh b/t/t4212-log-corrupt.sh index 611b687..80542d6 100755 --- a/t/t4212-log-corrupt.sh +++ b/t/t4212-log-corrupt.sh @@ -60,4 +60,20 @@ test_expect_success 'unparsable dates produce sentinel value (%ad)' ' test_cmp expect actual ' +# date is 2^64 + 1 +test_expect_success 'date parser recognizes integer overflow' ' + commit=$(munge_author_date HEAD 18446744073709551617) && + echo "Thu Jan 1 00:00:00 1970 +0000" >expect && + git log -1 --format=%ad $commit >actual && + test_cmp expect actual +' + +# date is 2^64 - 2 +test_expect_success 'date parser recognizes time_t overflow' ' + commit=$(munge_author_date HEAD 18446744073709551614) && + echo "Thu Jan 1 00:00:00 1970 +0000" >expect && + git log -1 --format=%ad $commit >actual && + test_cmp expect actual +' + test_done -- cgit v0.10.2-6-g49f6 From 2b15846dbfb31df10a69a4d56ae944a01563bc07 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Mon, 24 Feb 2014 02:49:05 -0500 Subject: log: do not segfault on gmtime errors Many code paths assume that show_date and show_ident_date cannot return NULL. For the most part, we handle missing or corrupt timestamps by showing the epoch time t=0. However, we might still return NULL if gmtime rejects the time_t we feed it, resulting in a segfault. Let's catch this case and just format t=0. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/date.c b/date.c index 2dae471..f64bbeb 100644 --- a/date.c +++ b/date.c @@ -184,8 +184,10 @@ const char *show_date(unsigned long time, int tz, enum date_mode mode) tz = local_tzoffset(time); tm = time_to_tm(time, tz); - if (!tm) - return NULL; + if (!tm) { + tm = time_to_tm(0, 0); + tz = 0; + } strbuf_reset(&timebuf); if (mode == DATE_SHORT) diff --git a/t/t4212-log-corrupt.sh b/t/t4212-log-corrupt.sh index 80542d6..85c6df4 100755 --- a/t/t4212-log-corrupt.sh +++ b/t/t4212-log-corrupt.sh @@ -76,4 +76,12 @@ test_expect_success 'date parser recognizes time_t overflow' ' test_cmp expect actual ' +# date is within 2^63-1, but enough to choke glibc's gmtime +test_expect_success 'absurdly far-in-future dates produce sentinel' ' + commit=$(munge_author_date HEAD 999999999999999999) && + echo "Thu Jan 1 00:00:00 1970 +0000" >expect && + git log -1 --format=%ad $commit >actual && + test_cmp expect actual +' + test_done -- cgit v0.10.2-6-g49f6 From 3f419d45ef0dfc33dc301d9ae4737043c091291a Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 7 Mar 2014 12:15:01 -0500 Subject: show_ident_date: fix tz range check Commit 1dca155fe3fa (log: handle integer overflow in timestamps, 2014-02-24) tried to catch integer overflow coming from strtol() on the timezone field by comparing against LONG_MIN/LONG_MAX. However, the intermediate "tz" variable is an "int", which means it can never be LONG_MAX on LP64 systems; we would truncate the output from strtol before the comparison. Clang's -Wtautological-constant-out-of-range-compare notices this and rightly complains. Let's instead store the result of strtol in a long, and then compare it against INT_MIN/INT_MAX. This will catch overflow from strtol, and also overflow when we pass the result as an int to show_date. Reported-by: Eric Sunshine Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/pretty.c b/pretty.c index 4da9a68..4d4c1e9 100644 --- a/pretty.c +++ b/pretty.c @@ -397,7 +397,7 @@ static const char *show_ident_date(const struct ident_split *ident, enum date_mode mode) { unsigned long date = 0; - int tz = 0; + long tz = 0; if (ident->date_begin && ident->date_end) date = strtoul(ident->date_begin, NULL, 10); @@ -406,7 +406,7 @@ static const char *show_ident_date(const struct ident_split *ident, else { if (ident->tz_begin && ident->tz_end) tz = strtol(ident->tz_begin, NULL, 10); - if (tz == LONG_MAX || tz == LONG_MIN) + if (tz >= INT_MAX || tz <= INT_MIN) tz = 0; } return show_date(date, tz, mode); -- cgit v0.10.2-6-g49f6