From 325006f2dbeb482f422b2c0e62b485a41cb1c64b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 11 Sep 2021 22:36:40 +0200 Subject: oidset: make oidset_size() an inline function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit oidset_size() just reads a single word from memory and returns it. Avoid the function call overhead for this trivial operation by turning it into an inline function. While we're at it, declare its parameter const to allow it to be used on read-only oidsets. Suggested-by: Jeff King Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano diff --git a/oidset.c b/oidset.c index 5aac633..b36a2ba 100644 --- a/oidset.c +++ b/oidset.c @@ -36,11 +36,6 @@ void oidset_clear(struct oidset *set) oidset_init(set, 0); } -int oidset_size(struct oidset *set) -{ - return kh_size(&set->set); -} - void oidset_parse_file(struct oidset *set, const char *path) { oidset_parse_file_carefully(set, path, NULL, NULL); diff --git a/oidset.h b/oidset.h index 01f6560..ba4a5a2 100644 --- a/oidset.h +++ b/oidset.h @@ -57,7 +57,10 @@ int oidset_remove(struct oidset *set, const struct object_id *oid); /** * Returns the number of oids in the set. */ -int oidset_size(struct oidset *set); +static inline int oidset_size(const struct oidset *set) +{ + return kh_size(&set->set); +} /** * Remove all entries from the oidset, freeing any resources associated with -- cgit v0.10.2-6-g49f6 From 893b5635059a7e47b042073bc69ca0c5d9d15dc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 11 Sep 2021 22:39:31 +0200 Subject: midx: inline nth_midxed_pack_entry() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fill_midx_entry() finds the position of an object ID and passes it to nth_midxed_pack_entry(), which uses the position to look up the object ID for its own purposes. Inline the latter into the former to avoid that lookup. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano diff --git a/midx.c b/midx.c index 321c6fd..8cb0630 100644 --- a/midx.c +++ b/midx.c @@ -276,14 +276,18 @@ uint32_t nth_midxed_pack_int_id(struct multi_pack_index *m, uint32_t pos) (off_t)pos * MIDX_CHUNK_OFFSET_WIDTH); } -static int nth_midxed_pack_entry(struct repository *r, - struct multi_pack_index *m, - struct pack_entry *e, - uint32_t pos) +int fill_midx_entry(struct repository * r, + const struct object_id *oid, + struct pack_entry *e, + struct multi_pack_index *m) { + uint32_t pos; uint32_t pack_int_id; struct packed_git *p; + if (!bsearch_midx(oid, m, &pos)) + return 0; + if (pos >= m->num_objects) return 0; @@ -305,10 +309,8 @@ static int nth_midxed_pack_entry(struct repository *r, if (p->num_bad_objects) { uint32_t i; - struct object_id oid; - nth_midxed_object_oid(&oid, m, pos); for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(oid.hash, + if (hasheq(oid->hash, p->bad_object_sha1 + the_hash_algo->rawsz * i)) return 0; } @@ -319,19 +321,6 @@ static int nth_midxed_pack_entry(struct repository *r, return 1; } -int fill_midx_entry(struct repository * r, - const struct object_id *oid, - struct pack_entry *e, - struct multi_pack_index *m) -{ - uint32_t pos; - - if (!bsearch_midx(oid, m, &pos)) - return 0; - - return nth_midxed_pack_entry(r, m, e, pos); -} - /* Match "foo.idx" against either "foo.pack" _or_ "foo.idx". */ static int cmp_idx_or_pack_name(const char *idx_or_pack_name, const char *idx_name) -- cgit v0.10.2-6-g49f6 From 751530de5db77196a08897e3c47526c0f0147ef1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 11 Sep 2021 22:40:33 +0200 Subject: packfile: convert mark_bad_packed_object() to object_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All callers have full object IDs, so pass them on instead of just their hash member. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano diff --git a/object-file.c b/object-file.c index a8be899..fb5a385 100644 --- a/object-file.c +++ b/object-file.c @@ -1616,7 +1616,7 @@ static int do_oid_object_info_extended(struct repository *r, return 0; rtype = packed_object_info(r, e.p, e.offset, oi); if (rtype < 0) { - mark_bad_packed_object(e.p, real->hash); + mark_bad_packed_object(e.p, real); return do_oid_object_info_extended(r, real, oi, 0); } else if (oi->whence == OI_PACKED) { oi->u.packed.offset = e.offset; diff --git a/packfile.c b/packfile.c index 4d0d625..fb15fc5 100644 --- a/packfile.c +++ b/packfile.c @@ -1161,17 +1161,17 @@ int unpack_object_header(struct packed_git *p, return type; } -void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1) +void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid) { unsigned i; const unsigned hashsz = the_hash_algo->rawsz; for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(sha1, p->bad_object_sha1 + hashsz * i)) + if (hasheq(oid->hash, p->bad_object_sha1 + hashsz * i)) return; p->bad_object_sha1 = xrealloc(p->bad_object_sha1, st_mult(GIT_MAX_RAWSZ, st_add(p->num_bad_objects, 1))); - hashcpy(p->bad_object_sha1 + hashsz * p->num_bad_objects, sha1); + hashcpy(p->bad_object_sha1 + hashsz * p->num_bad_objects, oid->hash); p->num_bad_objects++; } @@ -1272,7 +1272,7 @@ static int retry_bad_packed_offset(struct repository *r, if (offset_to_pack_pos(p, obj_offset, &pos) < 0) return OBJ_BAD; nth_packed_object_id(&oid, p, pack_pos_to_index(p, pos)); - mark_bad_packed_object(p, oid.hash); + mark_bad_packed_object(p, &oid); type = oid_object_info(r, &oid, NULL); if (type <= OBJ_NONE) return OBJ_BAD; @@ -1722,7 +1722,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, nth_packed_object_id(&oid, p, index_pos); error("bad packed object CRC for %s", oid_to_hex(&oid)); - mark_bad_packed_object(p, oid.hash); + mark_bad_packed_object(p, &oid); data = NULL; goto out; } @@ -1811,7 +1811,7 @@ void *unpack_entry(struct repository *r, struct packed_git *p, off_t obj_offset, " at offset %"PRIuMAX" from %s", oid_to_hex(&base_oid), (uintmax_t)obj_offset, p->pack_name); - mark_bad_packed_object(p, base_oid.hash); + mark_bad_packed_object(p, &base_oid); base = read_object(r, &base_oid, &type, &base_size); external_base = base; } diff --git a/packfile.h b/packfile.h index 3ae117a..a982ed9 100644 --- a/packfile.h +++ b/packfile.h @@ -159,7 +159,7 @@ int packed_object_info(struct repository *r, struct packed_git *pack, off_t offset, struct object_info *); -void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1); +void mark_bad_packed_object(struct packed_git *, const struct object_id *); const struct packed_git *has_packed_and_bad(struct repository *r, const unsigned char *sha1); #define ON_DISK_KEEP_PACKS 1 -- cgit v0.10.2-6-g49f6 From 7407d733a4a99cf946cab0c82e03c41dbd305b7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 11 Sep 2021 22:42:20 +0200 Subject: packfile: convert has_packed_and_bad() to object_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The single caller has a full object ID, so pass it on instead of just its hash member. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano diff --git a/object-file.c b/object-file.c index fb5a385..01e7058 100644 --- a/object-file.c +++ b/object-file.c @@ -1725,7 +1725,7 @@ void *read_object_file_extended(struct repository *r, die(_("loose object %s (stored in %s) is corrupt"), oid_to_hex(repl), path); - if ((p = has_packed_and_bad(r, repl->hash)) != NULL) + if ((p = has_packed_and_bad(r, repl)) != NULL) die(_("packed object %s (stored in %s) is corrupt"), oid_to_hex(repl), p->pack_name); obj_read_unlock(); diff --git a/packfile.c b/packfile.c index fb15fc5..04080a5 100644 --- a/packfile.c +++ b/packfile.c @@ -1176,14 +1176,14 @@ void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid) } const struct packed_git *has_packed_and_bad(struct repository *r, - const unsigned char *sha1) + const struct object_id *oid) { struct packed_git *p; unsigned i; for (p = r->objects->packed_git; p; p = p->next) for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(sha1, + if (hasheq(oid->hash, p->bad_object_sha1 + the_hash_algo->rawsz * i)) return p; return NULL; diff --git a/packfile.h b/packfile.h index a982ed9..1861467 100644 --- a/packfile.h +++ b/packfile.h @@ -160,7 +160,7 @@ int packed_object_info(struct repository *r, off_t offset, struct object_info *); void mark_bad_packed_object(struct packed_git *, const struct object_id *); -const struct packed_git *has_packed_and_bad(struct repository *r, const unsigned char *sha1); +const struct packed_git *has_packed_and_bad(struct repository *, const struct object_id *); #define ON_DISK_KEEP_PACKS 1 #define IN_CORE_KEEP_PACKS 2 -- cgit v0.10.2-6-g49f6 From 09ef66179b943d03cbe0bea0603e5f40574695a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Sat, 11 Sep 2021 22:43:26 +0200 Subject: packfile: use oidset for bad objects MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Store the object ID of broken pack entries in an oidset instead of keeping only their hashes in an unsorted array. The resulting code is shorter and easier to read. It also handles the (hopefully) very rare case of having a high number of bad objects better. Helped-by: Jeff King Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano diff --git a/midx.c b/midx.c index 8cb0630..76322b7 100644 --- a/midx.c +++ b/midx.c @@ -307,13 +307,9 @@ int fill_midx_entry(struct repository * r, if (!is_pack_valid(p)) return 0; - if (p->num_bad_objects) { - uint32_t i; - for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(oid->hash, - p->bad_object_sha1 + the_hash_algo->rawsz * i)) - return 0; - } + if (oidset_size(&p->bad_objects) && + oidset_contains(&p->bad_objects, oid)) + return 0; e->offset = nth_midxed_offset(m, pos); e->p = p; diff --git a/object-store.h b/object-store.h index b4dc666..c7bead6 100644 --- a/object-store.h +++ b/object-store.h @@ -10,6 +10,7 @@ #include "khash.h" #include "dir.h" #include "oidtree.h" +#include "oidset.h" struct object_directory { struct object_directory *next; @@ -75,9 +76,8 @@ struct packed_git { const void *index_data; size_t index_size; uint32_t num_objects; - uint32_t num_bad_objects; uint32_t crc_offset; - unsigned char *bad_object_sha1; + struct oidset bad_objects; int index_version; time_t mtime; int pack_fd; diff --git a/packfile.c b/packfile.c index 04080a5..caba29c 100644 --- a/packfile.c +++ b/packfile.c @@ -1163,29 +1163,17 @@ int unpack_object_header(struct packed_git *p, void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid) { - unsigned i; - const unsigned hashsz = the_hash_algo->rawsz; - for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(oid->hash, p->bad_object_sha1 + hashsz * i)) - return; - p->bad_object_sha1 = xrealloc(p->bad_object_sha1, - st_mult(GIT_MAX_RAWSZ, - st_add(p->num_bad_objects, 1))); - hashcpy(p->bad_object_sha1 + hashsz * p->num_bad_objects, oid->hash); - p->num_bad_objects++; + oidset_insert(&p->bad_objects, oid); } const struct packed_git *has_packed_and_bad(struct repository *r, const struct object_id *oid) { struct packed_git *p; - unsigned i; for (p = r->objects->packed_git; p; p = p->next) - for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(oid->hash, - p->bad_object_sha1 + the_hash_algo->rawsz * i)) - return p; + if (oidset_contains(&p->bad_objects, oid)) + return p; return NULL; } @@ -2016,13 +2004,9 @@ static int fill_pack_entry(const struct object_id *oid, { off_t offset; - if (p->num_bad_objects) { - unsigned i; - for (i = 0; i < p->num_bad_objects; i++) - if (hasheq(oid->hash, - p->bad_object_sha1 + the_hash_algo->rawsz * i)) - return 0; - } + if (oidset_size(&p->bad_objects) && + oidset_contains(&p->bad_objects, oid)) + return 0; offset = find_pack_entry_one(oid->hash, p); if (!offset) -- cgit v0.10.2-6-g49f6