From 9fd750461befcaf984d5966606308c8cd6912f3c Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Sat, 6 May 2017 22:10:20 +0000 Subject: Convert the verify_pack callback to struct object_id Make the verify_pack_callback take a pointer to struct object_id. Change the pack checksum to use GIT_MAX_RAWSZ, even though it is not strictly an object ID. Doing so ensures resilience against future hash size changes, and allows us to remove hard-coded assumptions about how big the buffer needs to be. Also, use a union to convert the pointer from nth_packed_object_sha1 to to a pointer to struct object_id. This behavior is compatible with GCC and clang and explicitly sanctioned by C11. The alternatives are to just perform a cast, which would run afoul of strict aliasing rules, but should just work, and changing the pointer into an instance of struct object_id and copying the value. The latter operation could seriously bloat memory usage on fsck, which already uses a lot of memory on some repositories. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano diff --git a/builtin/fsck.c b/builtin/fsck.c index 2f67e82..a187054 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -377,7 +377,7 @@ static int fsck_obj(struct object *obj) return 0; } -static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type, +static int fsck_obj_buffer(const struct object_id *oid, enum object_type type, unsigned long size, void *buffer, int *eaten) { /* @@ -385,10 +385,10 @@ static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type, * verify_packfile(), data_valid variable for details. */ struct object *obj; - obj = parse_object_buffer(sha1, type, size, buffer, eaten); + obj = parse_object_buffer(oid->hash, type, size, buffer, eaten); if (!obj) { errors_found |= ERROR_OBJECT; - return error("%s: object corrupt or missing", sha1_to_hex(sha1)); + return error("%s: object corrupt or missing", oid_to_hex(oid)); } obj->flags = HAS_OBJ; return fsck_obj(obj); diff --git a/pack-check.c b/pack-check.c index 27f70d3..e1fcb22 100644 --- a/pack-check.c +++ b/pack-check.c @@ -5,7 +5,10 @@ struct idx_entry { off_t offset; - const unsigned char *sha1; + union idx_entry_object { + const unsigned char *hash; + struct object_id *oid; + } oid; unsigned int nr; }; @@ -51,7 +54,7 @@ static int verify_packfile(struct packed_git *p, off_t index_size = p->index_size; const unsigned char *index_base = p->index_data; git_SHA_CTX ctx; - unsigned char sha1[20], *pack_sig; + unsigned char hash[GIT_MAX_RAWSZ], *pack_sig; off_t offset = 0, pack_sig_ofs = 0; uint32_t nr_objects, i; int err = 0; @@ -71,9 +74,9 @@ static int verify_packfile(struct packed_git *p, remaining -= (unsigned int)(offset - pack_sig_ofs); git_SHA1_Update(&ctx, in, remaining); } while (offset < pack_sig_ofs); - git_SHA1_Final(sha1, &ctx); + git_SHA1_Final(hash, &ctx); pack_sig = use_pack(p, w_curs, pack_sig_ofs, NULL); - if (hashcmp(sha1, pack_sig)) + if (hashcmp(hash, pack_sig)) err = error("%s SHA1 checksum mismatch", p->pack_name); if (hashcmp(index_base + index_size - 40, pack_sig)) @@ -90,8 +93,8 @@ static int verify_packfile(struct packed_git *p, entries[nr_objects].offset = pack_sig_ofs; /* first sort entries by pack offset, since unpacking them is more efficient that way */ for (i = 0; i < nr_objects; i++) { - entries[i].sha1 = nth_packed_object_sha1(p, i); - if (!entries[i].sha1) + entries[i].oid.hash = nth_packed_object_sha1(p, i); + if (!entries[i].oid.hash) die("internal error pack-check nth-packed-object"); entries[i].offset = nth_packed_object_offset(p, i); entries[i].nr = i; @@ -112,7 +115,7 @@ static int verify_packfile(struct packed_git *p, if (check_pack_crc(p, w_curs, offset, len, nr)) err = error("index CRC mismatch for object %s " "from %s at offset %"PRIuMAX"", - sha1_to_hex(entries[i].sha1), + oid_to_hex(entries[i].oid.oid), p->pack_name, (uintmax_t)offset); } @@ -135,14 +138,14 @@ static int verify_packfile(struct packed_git *p, if (data_valid && !data) err = error("cannot unpack %s from %s at offset %"PRIuMAX"", - sha1_to_hex(entries[i].sha1), p->pack_name, + oid_to_hex(entries[i].oid.oid), p->pack_name, (uintmax_t)entries[i].offset); - else if (check_sha1_signature(entries[i].sha1, data, size, typename(type))) + else if (check_sha1_signature(entries[i].oid.hash, data, size, typename(type))) err = error("packed %s from %s is corrupt", - sha1_to_hex(entries[i].sha1), p->pack_name); + oid_to_hex(entries[i].oid.oid), p->pack_name); else if (fn) { int eaten = 0; - err |= fn(entries[i].sha1, type, size, data, &eaten); + err |= fn(entries[i].oid.oid, type, size, data, &eaten); if (eaten) data = NULL; } diff --git a/pack.h b/pack.h index c7de42e..8294341 100644 --- a/pack.h +++ b/pack.h @@ -75,7 +75,7 @@ struct pack_idx_entry { struct progress; /* Note, the data argument could be NULL if object type is blob */ -typedef int (*verify_fn)(const unsigned char*, enum object_type, unsigned long, void*, int*); +typedef int (*verify_fn)(const struct object_id *, enum object_type, unsigned long, void*, int*); extern const char *write_idx_file(const char *index_name, struct pack_idx_entry **objects, int nr_objects, const struct pack_idx_option *, const unsigned char *sha1); extern int check_pack_crc(struct packed_git *p, struct pack_window **w_curs, off_t offset, off_t len, unsigned int nr); -- cgit v0.10.2-6-g49f6