summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2021-11-29 23:41:51 (GMT)
committerJunio C Hamano <gitster@pobox.com>2021-11-29 23:41:51 (GMT)
commitf9ba6acaa9348ea7b733bf78adc2f084247a912f (patch)
treebd702b925cea098b8e04bd49272c5faba336bf15
parentad1260b6c994f7c0f9c259bd39f39979f7f4ecc2 (diff)
parent596b5e77c960cc57ad2e68407b298411ec5e8cb8 (diff)
downloadgit-f9ba6acaa9348ea7b733bf78adc2f084247a912f.zip
git-f9ba6acaa9348ea7b733bf78adc2f084247a912f.tar.gz
git-f9ba6acaa9348ea7b733bf78adc2f084247a912f.tar.bz2
Merge branch 'mc/clean-smudge-with-llp64'
The clean/smudge conversion code path has been prepared to better work on platforms where ulong is narrower than size_t. * mc/clean-smudge-with-llp64: clean/smudge: allow clean filters to process extremely large files odb: guard against data loss checking out a huge file git-compat-util: introduce more size_t helpers odb: teach read_blob_entry to use size_t t1051: introduce a smudge filter test for extremely large files test-lib: add prerequisite for 64-bit platforms test-tool genzeros: generate large amounts of data more efficiently test-genzeros: allow more than 2G zeros in Windows
-rw-r--r--convert.c2
-rw-r--r--delta.h6
-rw-r--r--entry.c8
-rw-r--r--entry.h2
-rw-r--r--git-compat-util.h25
-rw-r--r--object-file.c6
-rw-r--r--packfile.c6
-rw-r--r--parallel-checkout.c2
-rw-r--r--t/helper/test-genzeros.c21
-rwxr-xr-xt/t1051-large-conversion.sh26
-rw-r--r--t/test-lib.sh4
11 files changed, 89 insertions, 19 deletions
diff --git a/convert.c b/convert.c
index 0d6fb34..df7186b 100644
--- a/convert.c
+++ b/convert.c
@@ -613,7 +613,7 @@ static int crlf_to_worktree(const char *src, size_t len, struct strbuf *buf,
struct filter_params {
const char *src;
- unsigned long size;
+ size_t size;
int fd;
const char *cmd;
const char *path;
diff --git a/delta.h b/delta.h
index 2df5fe1..8a56ec0 100644
--- a/delta.h
+++ b/delta.h
@@ -90,15 +90,15 @@ static inline unsigned long get_delta_hdr_size(const unsigned char **datap,
const unsigned char *top)
{
const unsigned char *data = *datap;
- unsigned long cmd, size = 0;
+ size_t cmd, size = 0;
int i = 0;
do {
cmd = *data++;
- size |= (cmd & 0x7f) << i;
+ size |= st_left_shift(cmd & 0x7f, i);
i += 7;
} while (cmd & 0x80 && data < top);
*datap = data;
- return size;
+ return cast_size_t_to_ulong(size);
}
#endif
diff --git a/entry.c b/entry.c
index 9b0f968..1c9df62 100644
--- a/entry.c
+++ b/entry.c
@@ -82,11 +82,13 @@ static int create_file(const char *path, unsigned int mode)
return open(path, O_WRONLY | O_CREAT | O_EXCL, mode);
}
-void *read_blob_entry(const struct cache_entry *ce, unsigned long *size)
+void *read_blob_entry(const struct cache_entry *ce, size_t *size)
{
enum object_type type;
- void *blob_data = read_object_file(&ce->oid, &type, size);
+ unsigned long ul;
+ void *blob_data = read_object_file(&ce->oid, &type, &ul);
+ *size = ul;
if (blob_data) {
if (type == OBJ_BLOB)
return blob_data;
@@ -271,7 +273,7 @@ static int write_entry(struct cache_entry *ce, char *path, struct conv_attrs *ca
int fd, ret, fstat_done = 0;
char *new_blob;
struct strbuf buf = STRBUF_INIT;
- unsigned long size;
+ size_t size;
ssize_t wrote;
size_t newsize = 0;
struct stat st;
diff --git a/entry.h b/entry.h
index 2254c62..252fd24 100644
--- a/entry.h
+++ b/entry.h
@@ -52,7 +52,7 @@ int finish_delayed_checkout(struct checkout *state, int *nr_checkouts,
*/
void unlink_entry(const struct cache_entry *ce);
-void *read_blob_entry(const struct cache_entry *ce, unsigned long *size);
+void *read_blob_entry(const struct cache_entry *ce, size_t *size);
int fstat_checkout_output(int fd, const struct checkout *state, struct stat *st);
void update_ce_after_write(const struct checkout *state, struct cache_entry *ce,
struct stat *st);
diff --git a/git-compat-util.h b/git-compat-util.h
index 7176a43..c6bd2a8 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -113,6 +113,14 @@
#define unsigned_mult_overflows(a, b) \
((a) && (b) > maximum_unsigned_value_of_type(a) / (a))
+/*
+ * Returns true if the left shift of "a" by "shift" bits will
+ * overflow. The type of "a" must be unsigned.
+ */
+#define unsigned_left_shift_overflows(a, shift) \
+ ((shift) < bitsizeof(a) && \
+ (a) > maximum_unsigned_value_of_type(a) >> (shift))
+
#ifdef __GNUC__
#define TYPEOF(x) (__typeof__(x))
#else
@@ -862,6 +870,23 @@ static inline size_t st_sub(size_t a, size_t b)
return a - b;
}
+static inline size_t st_left_shift(size_t a, unsigned shift)
+{
+ if (unsigned_left_shift_overflows(a, shift))
+ die("size_t overflow: %"PRIuMAX" << %u",
+ (uintmax_t)a, shift);
+ return a << shift;
+}
+
+static inline unsigned long cast_size_t_to_ulong(size_t a)
+{
+ if (a != (unsigned long)a)
+ die("object too large to read on this platform: %"
+ PRIuMAX" is cut off to %lu",
+ (uintmax_t)a, (unsigned long)a);
+ return (unsigned long)a;
+}
+
#ifdef HAVE_ALLOCA_H
# include <alloca.h>
# define xalloca(size) (alloca(size))
diff --git a/object-file.c b/object-file.c
index c3d866a..eb972cd 100644
--- a/object-file.c
+++ b/object-file.c
@@ -1306,7 +1306,7 @@ static void *unpack_loose_rest(git_zstream *stream,
int parse_loose_header(const char *hdr, struct object_info *oi)
{
const char *type_buf = hdr;
- unsigned long size;
+ size_t size;
int type, type_len = 0;
/*
@@ -1341,12 +1341,12 @@ int parse_loose_header(const char *hdr, struct object_info *oi)
if (c > 9)
break;
hdr++;
- size = size * 10 + c;
+ size = st_add(st_mult(size, 10), c);
}
}
if (oi->sizep)
- *oi->sizep = size;
+ *oi->sizep = cast_size_t_to_ulong(size);
/*
* The length must be followed by a zero byte
diff --git a/packfile.c b/packfile.c
index 89402cf..6423d77 100644
--- a/packfile.c
+++ b/packfile.c
@@ -1060,7 +1060,7 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
unsigned long len, enum object_type *type, unsigned long *sizep)
{
unsigned shift;
- unsigned long size, c;
+ size_t size, c;
unsigned long used = 0;
c = buf[used++];
@@ -1074,10 +1074,10 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf,
break;
}
c = buf[used++];
- size += (c & 0x7f) << shift;
+ size = st_add(size, st_left_shift(c & 0x7f, shift));
shift += 7;
}
- *sizep = size;
+ *sizep = cast_size_t_to_ulong(size);
return used;
}
diff --git a/parallel-checkout.c b/parallel-checkout.c
index ed9c999..8dd7e7b 100644
--- a/parallel-checkout.c
+++ b/parallel-checkout.c
@@ -261,7 +261,7 @@ static int write_pc_item_to_fd(struct parallel_checkout_item *pc_item, int fd,
struct stream_filter *filter;
struct strbuf buf = STRBUF_INIT;
char *blob;
- unsigned long size;
+ size_t size;
ssize_t wrote;
/* Sanity check */
diff --git a/t/helper/test-genzeros.c b/t/helper/test-genzeros.c
index 9532f5b..8ca988d 100644
--- a/t/helper/test-genzeros.c
+++ b/t/helper/test-genzeros.c
@@ -3,18 +3,31 @@
int cmd__genzeros(int argc, const char **argv)
{
- long count;
+ /* static, so that it is NUL-initialized */
+ static const char zeros[256 * 1024];
+ intmax_t count;
+ ssize_t n;
if (argc > 2) {
fprintf(stderr, "usage: %s [<count>]\n", argv[0]);
return 1;
}
- count = argc > 1 ? strtol(argv[1], NULL, 0) : -1L;
+ count = argc > 1 ? strtoimax(argv[1], NULL, 0) : -1;
- while (count < 0 || count--) {
- if (putchar(0) == EOF)
+ /* Writing out individual NUL bytes is slow... */
+ while (count < 0)
+ if (write(1, zeros, ARRAY_SIZE(zeros)) < 0)
return -1;
+
+ while (count > 0) {
+ n = write(1, zeros, count < ARRAY_SIZE(zeros) ?
+ count : ARRAY_SIZE(zeros));
+
+ if (n < 0)
+ return -1;
+
+ count -= n;
}
return 0;
diff --git a/t/t1051-large-conversion.sh b/t/t1051-large-conversion.sh
index 8b7640b..042b0e4 100755
--- a/t/t1051-large-conversion.sh
+++ b/t/t1051-large-conversion.sh
@@ -83,4 +83,30 @@ test_expect_success 'ident converts on output' '
test_cmp small.clean large.clean
'
+# This smudge filter prepends 5GB of zeros to the file it checks out. This
+# ensures that smudging doesn't mangle large files on 64-bit Windows.
+test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
+ 'files over 4GB convert on output' '
+ test_commit test small "a small file" &&
+ small_size=$(test_file_size small) &&
+ test_config filter.makelarge.smudge \
+ "test-tool genzeros $((5*1024*1024*1024)) && cat" &&
+ echo "small filter=makelarge" >.gitattributes &&
+ rm small &&
+ git checkout -- small &&
+ size=$(test_file_size small) &&
+ test "$size" -eq $((5 * 1024 * 1024 * 1024 + $small_size))
+'
+
+# This clean filter writes down the size of input it receives. By checking against
+# the actual size, we ensure that cleaning doesn't mangle large files on 64-bit Windows.
+test_expect_success EXPENSIVE,SIZE_T_IS_64BIT,!LONG_IS_64BIT \
+ 'files over 4GB convert on input' '
+ test-tool genzeros $((5*1024*1024*1024)) >big &&
+ test_config filter.checklarge.clean "wc -c >big.size" &&
+ echo "big filter=checklarge" >.gitattributes &&
+ git add big &&
+ test $(test_file_size big) -eq $(cat big.size)
+'
+
test_done
diff --git a/t/test-lib.sh b/t/test-lib.sh
index 2679a75..57efcc5 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1734,6 +1734,10 @@ build_option () {
sed -ne "s/^$1: //p"
}
+test_lazy_prereq SIZE_T_IS_64BIT '
+ test 8 -eq "$(build_option sizeof-size_t)"
+'
+
test_lazy_prereq LONG_IS_64BIT '
test 8 -le "$(build_option sizeof-long)"
'