summaryrefslogtreecommitdiff
path: root/builtin/pack-objects.c
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2016-10-10 21:03:46 (GMT)
committerJunio C Hamano <gitster@pobox.com>2016-10-10 21:03:47 (GMT)
commite6e24c94df9df6d39f2316113c14fe07d2ab03d7 (patch)
tree1f883be2ac3f3a84ce5af3cd1e9e32c45648e21e /builtin/pack-objects.c
parentb8688adb12d086b161aa7c369126bdd56843a01b (diff)
parentc9af708b1a5a92cbde2a0c9a75b13530f792ac84 (diff)
downloadgit-e6e24c94df9df6d39f2316113c14fe07d2ab03d7.zip
git-e6e24c94df9df6d39f2316113c14fe07d2ab03d7.tar.gz
git-e6e24c94df9df6d39f2316113c14fe07d2ab03d7.tar.bz2
Merge branch 'jk/pack-objects-optim-mru'
"git pack-objects" in a repository with many packfiles used to spend a lot of time looking for/at objects in them; the accesses to the packfiles are now optimized by checking the most-recently-used packfile first. * jk/pack-objects-optim-mru: pack-objects: use mru list when iterating over packs pack-objects: break delta cycles before delta-search phase sha1_file: make packed_object_info public provide an initializer for "struct object_info"
Diffstat (limited to 'builtin/pack-objects.c')
-rw-r--r--builtin/pack-objects.c92
1 files changed, 90 insertions, 2 deletions
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 8aeba6a..1e7c2a9 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -23,6 +23,7 @@
#include "reachable.h"
#include "sha1-array.h"
#include "argv-array.h"
+#include "mru.h"
static const char *pack_usage[] = {
N_("git pack-objects --stdout [<options>...] [< <ref-list> | < <object-list>]"),
@@ -994,7 +995,7 @@ static int want_object_in_pack(const unsigned char *sha1,
struct packed_git **found_pack,
off_t *found_offset)
{
- struct packed_git *p;
+ struct mru_entry *entry;
int want;
if (!exclude && local && has_loose_object_nonlocal(sha1))
@@ -1011,7 +1012,8 @@ static int want_object_in_pack(const unsigned char *sha1,
return want;
}
- for (p = packed_git; p; p = p->next) {
+ for (entry = packed_git_mru->head; entry; entry = entry->next) {
+ struct packed_git *p = entry->item;
off_t offset;
if (p == *found_pack)
@@ -1027,6 +1029,8 @@ static int want_object_in_pack(const unsigned char *sha1,
*found_pack = p;
}
want = want_found_object(exclude, p);
+ if (!exclude && want > 0)
+ mru_mark(packed_git_mru, entry);
if (want != -1)
return want;
}
@@ -1527,6 +1531,83 @@ static int pack_offset_sort(const void *_a, const void *_b)
(a->in_pack_offset > b->in_pack_offset);
}
+/*
+ * Drop an on-disk delta we were planning to reuse. Naively, this would
+ * just involve blanking out the "delta" field, but we have to deal
+ * with some extra book-keeping:
+ *
+ * 1. Removing ourselves from the delta_sibling linked list.
+ *
+ * 2. Updating our size/type to the non-delta representation. These were
+ * either not recorded initially (size) or overwritten with the delta type
+ * (type) when check_object() decided to reuse the delta.
+ */
+static void drop_reused_delta(struct object_entry *entry)
+{
+ struct object_entry **p = &entry->delta->delta_child;
+ struct object_info oi = OBJECT_INFO_INIT;
+
+ while (*p) {
+ if (*p == entry)
+ *p = (*p)->delta_sibling;
+ else
+ p = &(*p)->delta_sibling;
+ }
+ entry->delta = NULL;
+
+ oi.sizep = &entry->size;
+ oi.typep = &entry->type;
+ if (packed_object_info(entry->in_pack, entry->in_pack_offset, &oi) < 0) {
+ /*
+ * We failed to get the info from this pack for some reason;
+ * fall back to sha1_object_info, which may find another copy.
+ * And if that fails, the error will be recorded in entry->type
+ * and dealt with in prepare_pack().
+ */
+ entry->type = sha1_object_info(entry->idx.sha1, &entry->size);
+ }
+}
+
+/*
+ * Follow the chain of deltas from this entry onward, throwing away any links
+ * that cause us to hit a cycle (as determined by the DFS state flags in
+ * the entries).
+ */
+static void break_delta_chains(struct object_entry *entry)
+{
+ /* If it's not a delta, it can't be part of a cycle. */
+ if (!entry->delta) {
+ entry->dfs_state = DFS_DONE;
+ return;
+ }
+
+ switch (entry->dfs_state) {
+ case DFS_NONE:
+ /*
+ * This is the first time we've seen the object. We mark it as
+ * part of the active potential cycle and recurse.
+ */
+ entry->dfs_state = DFS_ACTIVE;
+ break_delta_chains(entry->delta);
+ entry->dfs_state = DFS_DONE;
+ break;
+
+ case DFS_DONE:
+ /* object already examined, and not part of a cycle */
+ break;
+
+ case DFS_ACTIVE:
+ /*
+ * We found a cycle that needs broken. It would be correct to
+ * break any link in the chain, but it's convenient to
+ * break this one.
+ */
+ drop_reused_delta(entry);
+ entry->dfs_state = DFS_DONE;
+ break;
+ }
+}
+
static void get_object_details(void)
{
uint32_t i;
@@ -1544,6 +1625,13 @@ static void get_object_details(void)
entry->no_try_delta = 1;
}
+ /*
+ * This must happen in a second pass, since we rely on the delta
+ * information for the whole list being completed.
+ */
+ for (i = 0; i < to_pack.nr_objects; i++)
+ break_delta_chains(&to_pack.objects[i]);
+
free(sorted_by_offset);
}