summaryrefslogtreecommitdiff
path: root/packfile.c
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2021-03-24 21:36:27 (GMT)
committerJunio C Hamano <gitster@pobox.com>2021-03-24 21:36:27 (GMT)
commit2744383cbda9bbbe4219bd3532757ae6d28460e1 (patch)
tree8ca08ee3555ef97487136e9e9de5699ca19d8990 /packfile.c
parentc6617d1e4f462405e6e237a21026c1cf3b557ec0 (diff)
parent14e7b8344f0b7349b914331c0aff18f73faf03da (diff)
downloadgit-2744383cbda9bbbe4219bd3532757ae6d28460e1.zip
git-2744383cbda9bbbe4219bd3532757ae6d28460e1.tar.gz
git-2744383cbda9bbbe4219bd3532757ae6d28460e1.tar.bz2
Merge branch 'tb/geometric-repack'
"git repack" so far has been only capable of repacking everything under the sun into a single pack (or split by size). A cleverer strategy to reduce the cost of repacking a repository has been introduced. * tb/geometric-repack: builtin/pack-objects.c: ignore missing links with --stdin-packs builtin/repack.c: reword comment around pack-objects flags builtin/repack.c: be more conservative with unsigned overflows builtin/repack.c: assign pack split later t7703: test --geometric repack with loose objects builtin/repack.c: do not repack single packs with --geometric builtin/repack.c: add '--geometric' option packfile: add kept-pack cache for find_kept_pack_entry() builtin/pack-objects.c: rewrite honor-pack-keep logic p5303: measure time to repack with keep p5303: add missing &&-chains builtin/pack-objects.c: add '--stdin-packs' option revision: learn '--no-kept-objects' packfile: introduce 'find_kept_pack_entry()'
Diffstat (limited to 'packfile.c')
-rw-r--r--packfile.c67
1 files changed, 67 insertions, 0 deletions
diff --git a/packfile.c b/packfile.c
index ea29f4b..6661f33 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2066,12 +2066,79 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa
return 0;
}
+static void maybe_invalidate_kept_pack_cache(struct repository *r,
+ unsigned flags)
+{
+ if (!r->objects->kept_pack_cache.packs)
+ return;
+ if (r->objects->kept_pack_cache.flags == flags)
+ return;
+ FREE_AND_NULL(r->objects->kept_pack_cache.packs);
+ r->objects->kept_pack_cache.flags = 0;
+}
+
+static struct packed_git **kept_pack_cache(struct repository *r, unsigned flags)
+{
+ maybe_invalidate_kept_pack_cache(r, flags);
+
+ if (!r->objects->kept_pack_cache.packs) {
+ struct packed_git **packs = NULL;
+ size_t nr = 0, alloc = 0;
+ struct packed_git *p;
+
+ /*
+ * We want "all" packs here, because we need to cover ones that
+ * are used by a midx, as well. We need to look in every one of
+ * them (instead of the midx itself) to cover duplicates. It's
+ * possible that an object is found in two packs that the midx
+ * covers, one kept and one not kept, but the midx returns only
+ * the non-kept version.
+ */
+ for (p = get_all_packs(r); p; p = p->next) {
+ if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) ||
+ (p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) {
+ ALLOC_GROW(packs, nr + 1, alloc);
+ packs[nr++] = p;
+ }
+ }
+ ALLOC_GROW(packs, nr + 1, alloc);
+ packs[nr] = NULL;
+
+ r->objects->kept_pack_cache.packs = packs;
+ r->objects->kept_pack_cache.flags = flags;
+ }
+
+ return r->objects->kept_pack_cache.packs;
+}
+
+int find_kept_pack_entry(struct repository *r,
+ const struct object_id *oid,
+ unsigned flags,
+ struct pack_entry *e)
+{
+ struct packed_git **cache;
+
+ for (cache = kept_pack_cache(r, flags); *cache; cache++) {
+ struct packed_git *p = *cache;
+ if (fill_pack_entry(oid, e, p))
+ return 1;
+ }
+
+ return 0;
+}
+
int has_object_pack(const struct object_id *oid)
{
struct pack_entry e;
return find_pack_entry(the_repository, oid, &e);
}
+int has_object_kept_pack(const struct object_id *oid, unsigned flags)
+{
+ struct pack_entry e;
+ return find_kept_pack_entry(the_repository, oid, flags, &e);
+}
+
int has_pack_index(const unsigned char *sha1)
{
struct stat st;