summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/git-pack-objects.txt10
-rw-r--r--Documentation/git-repack.txt23
-rw-r--r--builtin/pack-objects.c330
-rw-r--r--builtin/repack.c207
-rw-r--r--object-store.h5
-rw-r--r--packfile.c67
-rw-r--r--packfile.h5
-rw-r--r--revision.c15
-rw-r--r--revision.h4
-rwxr-xr-xt/perf/p5303-many-packs.sh36
-rwxr-xr-xt/t5300-pack-object.sh135
-rwxr-xr-xt/t6114-keep-packs.sh69
-rwxr-xr-xt/t7703-repack-geometric.sh183
13 files changed, 1029 insertions, 60 deletions
diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt
index f85cb7e..25d9fbe 100644
--- a/Documentation/git-pack-objects.txt
+++ b/Documentation/git-pack-objects.txt
@@ -85,6 +85,16 @@ base-name::
reference was included in the resulting packfile. This
can be useful to send new tags to native Git clients.
+--stdin-packs::
+ Read the basenames of packfiles (e.g., `pack-1234abcd.pack`)
+ from the standard input, instead of object names or revision
+ arguments. The resulting pack contains all objects listed in the
+ included packs (those not beginning with `^`), excluding any
+ objects listed in the excluded packs (beginning with `^`).
++
+Incompatible with `--revs`, or options that imply `--revs` (such as
+`--all`), with the exception of `--unpacked`, which is compatible.
+
--window=<n>::
--depth=<n>::
These two options affect how the objects contained in
diff --git a/Documentation/git-repack.txt b/Documentation/git-repack.txt
index fbd4b4a..317d63c 100644
--- a/Documentation/git-repack.txt
+++ b/Documentation/git-repack.txt
@@ -165,6 +165,29 @@ depth is 4095.
Pass the `--delta-islands` option to `git-pack-objects`, see
linkgit:git-pack-objects[1].
+-g=<factor>::
+--geometric=<factor>::
+ Arrange resulting pack structure so that each successive pack
+ contains at least `<factor>` times the number of objects as the
+ next-largest pack.
++
+`git repack` ensures this by determining a "cut" of packfiles that need
+to be repacked into one in order to ensure a geometric progression. It
+picks the smallest set of packfiles such that as many of the larger
+packfiles (by count of objects contained in that pack) may be left
+intact.
++
+Unlike other repack modes, the set of objects to pack is determined
+uniquely by the set of packs being "rolled-up"; in other words, the
+packs determined to need to be combined in order to restore a geometric
+progression.
++
+When `--unpacked` is specified, loose objects are implicitly included in
+this "roll-up", without respect to their reachability. This is subject
+to change in the future. This option (implying a drastically different
+repack mode) is not guaranteed to work with all other combinations of
+option to `git repack`).
+
CONFIGURATION
-------------
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 4bb6026..8319831 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -1188,7 +1188,8 @@ static int have_duplicate_entry(const struct object_id *oid,
return 1;
}
-static int want_found_object(int exclude, struct packed_git *p)
+static int want_found_object(const struct object_id *oid, int exclude,
+ struct packed_git *p)
{
if (exclude)
return 1;
@@ -1204,27 +1205,82 @@ static int want_found_object(int exclude, struct packed_git *p)
* make sure no copy of this object appears in _any_ pack that makes us
* to omit the object, so we need to check all the packs.
*
- * We can however first check whether these options can possible matter;
+ * We can however first check whether these options can possibly matter;
* if they do not matter we know we want the object in generated pack.
* Otherwise, we signal "-1" at the end to tell the caller that we do
* not know either way, and it needs to check more packs.
*/
- if (!ignore_packed_keep_on_disk &&
- !ignore_packed_keep_in_core &&
- (!local || !have_non_local_packs))
- return 1;
+ /*
+ * Objects in packs borrowed from elsewhere are discarded regardless of
+ * if they appear in other packs that weren't borrowed.
+ */
if (local && !p->pack_local)
return 0;
- if (p->pack_local &&
- ((ignore_packed_keep_on_disk && p->pack_keep) ||
- (ignore_packed_keep_in_core && p->pack_keep_in_core)))
- return 0;
+
+ /*
+ * Then handle .keep first, as we have a fast(er) path there.
+ */
+ if (ignore_packed_keep_on_disk || ignore_packed_keep_in_core) {
+ /*
+ * Set the flags for the kept-pack cache to be the ones we want
+ * to ignore.
+ *
+ * That is, if we are ignoring objects in on-disk keep packs,
+ * then we want to search through the on-disk keep and ignore
+ * the in-core ones.
+ */
+ unsigned flags = 0;
+ if (ignore_packed_keep_on_disk)
+ flags |= ON_DISK_KEEP_PACKS;
+ if (ignore_packed_keep_in_core)
+ flags |= IN_CORE_KEEP_PACKS;
+
+ if (ignore_packed_keep_on_disk && p->pack_keep)
+ return 0;
+ if (ignore_packed_keep_in_core && p->pack_keep_in_core)
+ return 0;
+ if (has_object_kept_pack(oid, flags))
+ return 0;
+ }
+
+ /*
+ * At this point we know definitively that either we don't care about
+ * keep-packs, or the object is not in one. Keep checking other
+ * conditions...
+ */
+ if (!local || !have_non_local_packs)
+ return 1;
/* we don't know yet; keep looking for more packs */
return -1;
}
+static int want_object_in_pack_one(struct packed_git *p,
+ const struct object_id *oid,
+ int exclude,
+ struct packed_git **found_pack,
+ off_t *found_offset)
+{
+ off_t offset;
+
+ if (p == *found_pack)
+ offset = *found_offset;
+ else
+ offset = find_pack_entry_one(oid->hash, p);
+
+ if (offset) {
+ if (!*found_pack) {
+ if (!is_pack_valid(p))
+ return -1;
+ *found_offset = offset;
+ *found_pack = p;
+ }
+ return want_found_object(oid, exclude, p);
+ }
+ return -1;
+}
+
/*
* Check whether we want the object in the pack (e.g., we do not want
* objects found in non-local stores if the "--local" option was used).
@@ -1252,7 +1308,7 @@ static int want_object_in_pack(const struct object_id *oid,
* are present we will determine the answer right now.
*/
if (*found_pack) {
- want = want_found_object(exclude, *found_pack);
+ want = want_found_object(oid, exclude, *found_pack);
if (want != -1)
return want;
}
@@ -1260,51 +1316,20 @@ static int want_object_in_pack(const struct object_id *oid,
for (m = get_multi_pack_index(the_repository); m; m = m->next) {
struct pack_entry e;
if (fill_midx_entry(the_repository, oid, &e, m)) {
- struct packed_git *p = e.p;
- off_t offset;
-
- if (p == *found_pack)
- offset = *found_offset;
- else
- offset = find_pack_entry_one(oid->hash, p);
-
- if (offset) {
- if (!*found_pack) {
- if (!is_pack_valid(p))
- continue;
- *found_offset = offset;
- *found_pack = p;
- }
- want = want_found_object(exclude, p);
- if (want != -1)
- return want;
- }
+ want = want_object_in_pack_one(e.p, oid, exclude, found_pack, found_offset);
+ if (want != -1)
+ return want;
}
}
list_for_each(pos, get_packed_git_mru(the_repository)) {
struct packed_git *p = list_entry(pos, struct packed_git, mru);
- off_t offset;
-
- if (p == *found_pack)
- offset = *found_offset;
- else
- offset = find_pack_entry_one(oid->hash, p);
-
- if (offset) {
- if (!*found_pack) {
- if (!is_pack_valid(p))
- continue;
- *found_offset = offset;
- *found_pack = p;
- }
- want = want_found_object(exclude, p);
- if (!exclude && want > 0)
- list_move(&p->mru,
- get_packed_git_mru(the_repository));
- if (want != -1)
- return want;
- }
+ want = want_object_in_pack_one(p, oid, exclude, found_pack, found_offset);
+ if (!exclude && want > 0)
+ list_move(&p->mru,
+ get_packed_git_mru(the_repository));
+ if (want != -1)
+ return want;
}
if (uri_protocols.nr) {
@@ -2986,6 +3011,191 @@ static int git_pack_config(const char *k, const char *v, void *cb)
return git_default_config(k, v, cb);
}
+/* Counters for trace2 output when in --stdin-packs mode. */
+static int stdin_packs_found_nr;
+static int stdin_packs_hints_nr;
+
+static int add_object_entry_from_pack(const struct object_id *oid,
+ struct packed_git *p,
+ uint32_t pos,
+ void *_data)
+{
+ struct rev_info *revs = _data;
+ struct object_info oi = OBJECT_INFO_INIT;
+ off_t ofs;
+ enum object_type type;
+
+ display_progress(progress_state, ++nr_seen);
+
+ if (have_duplicate_entry(oid, 0))
+ return 0;
+
+ ofs = nth_packed_object_offset(p, pos);
+ if (!want_object_in_pack(oid, 0, &p, &ofs))
+ return 0;
+
+ oi.typep = &type;
+ if (packed_object_info(the_repository, p, ofs, &oi) < 0)
+ die(_("could not get type of object %s in pack %s"),
+ oid_to_hex(oid), p->pack_name);
+ else if (type == OBJ_COMMIT) {
+ /*
+ * commits in included packs are used as starting points for the
+ * subsequent revision walk
+ */
+ add_pending_oid(revs, NULL, oid, 0);
+ }
+
+ stdin_packs_found_nr++;
+
+ create_object_entry(oid, type, 0, 0, 0, p, ofs);
+
+ return 0;
+}
+
+static void show_commit_pack_hint(struct commit *commit, void *_data)
+{
+ /* nothing to do; commits don't have a namehash */
+}
+
+static void show_object_pack_hint(struct object *object, const char *name,
+ void *_data)
+{
+ struct object_entry *oe = packlist_find(&to_pack, &object->oid);
+ if (!oe)
+ return;
+
+ /*
+ * Our 'to_pack' list was constructed by iterating all objects packed in
+ * included packs, and so doesn't have a non-zero hash field that you
+ * would typically pick up during a reachability traversal.
+ *
+ * Make a best-effort attempt to fill in the ->hash and ->no_try_delta
+ * here using a now in order to perhaps improve the delta selection
+ * process.
+ */
+ oe->hash = pack_name_hash(name);
+ oe->no_try_delta = name && no_try_delta(name);
+
+ stdin_packs_hints_nr++;
+}
+
+static int pack_mtime_cmp(const void *_a, const void *_b)
+{
+ struct packed_git *a = ((const struct string_list_item*)_a)->util;
+ struct packed_git *b = ((const struct string_list_item*)_b)->util;
+
+ /*
+ * order packs by descending mtime so that objects are laid out
+ * roughly as newest-to-oldest
+ */
+ if (a->mtime < b->mtime)
+ return 1;
+ else if (b->mtime < a->mtime)
+ return -1;
+ else
+ return 0;
+}
+
+static void read_packs_list_from_stdin(void)
+{
+ struct strbuf buf = STRBUF_INIT;
+ struct string_list include_packs = STRING_LIST_INIT_DUP;
+ struct string_list exclude_packs = STRING_LIST_INIT_DUP;
+ struct string_list_item *item = NULL;
+
+ struct packed_git *p;
+ struct rev_info revs;
+
+ repo_init_revisions(the_repository, &revs, NULL);
+ /*
+ * Use a revision walk to fill in the namehash of objects in the include
+ * packs. To save time, we'll avoid traversing through objects that are
+ * in excluded packs.
+ *
+ * That may cause us to avoid populating all of the namehash fields of
+ * all included objects, but our goal is best-effort, since this is only
+ * an optimization during delta selection.
+ */
+ revs.no_kept_objects = 1;
+ revs.keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
+ revs.blob_objects = 1;
+ revs.tree_objects = 1;
+ revs.tag_objects = 1;
+ revs.ignore_missing_links = 1;
+
+ while (strbuf_getline(&buf, stdin) != EOF) {
+ if (!buf.len)
+ continue;
+
+ if (*buf.buf == '^')
+ string_list_append(&exclude_packs, buf.buf + 1);
+ else
+ string_list_append(&include_packs, buf.buf);
+
+ strbuf_reset(&buf);
+ }
+
+ string_list_sort(&include_packs);
+ string_list_sort(&exclude_packs);
+
+ for (p = get_all_packs(the_repository); p; p = p->next) {
+ const char *pack_name = pack_basename(p);
+
+ item = string_list_lookup(&include_packs, pack_name);
+ if (!item)
+ item = string_list_lookup(&exclude_packs, pack_name);
+
+ if (item)
+ item->util = p;
+ }
+
+ /*
+ * First handle all of the excluded packs, marking them as kept in-core
+ * so that later calls to add_object_entry() discards any objects that
+ * are also found in excluded packs.
+ */
+ for_each_string_list_item(item, &exclude_packs) {
+ struct packed_git *p = item->util;
+ if (!p)
+ die(_("could not find pack '%s'"), item->string);
+ p->pack_keep_in_core = 1;
+ }
+
+ /*
+ * Order packs by ascending mtime; use QSORT directly to access the
+ * string_list_item's ->util pointer, which string_list_sort() does not
+ * provide.
+ */
+ QSORT(include_packs.items, include_packs.nr, pack_mtime_cmp);
+
+ for_each_string_list_item(item, &include_packs) {
+ struct packed_git *p = item->util;
+ if (!p)
+ die(_("could not find pack '%s'"), item->string);
+ for_each_object_in_pack(p,
+ add_object_entry_from_pack,
+ &revs,
+ FOR_EACH_OBJECT_PACK_ORDER);
+ }
+
+ if (prepare_revision_walk(&revs))
+ die(_("revision walk setup failed"));
+ traverse_commit_list(&revs,
+ show_commit_pack_hint,
+ show_object_pack_hint,
+ NULL);
+
+ trace2_data_intmax("pack-objects", the_repository, "stdin_packs_found",
+ stdin_packs_found_nr);
+ trace2_data_intmax("pack-objects", the_repository, "stdin_packs_hints",
+ stdin_packs_hints_nr);
+
+ strbuf_release(&buf);
+ string_list_clear(&include_packs, 0);
+ string_list_clear(&exclude_packs, 0);
+}
+
static void read_object_list_from_stdin(void)
{
char line[GIT_MAX_HEXSZ + 1 + PATH_MAX + 2];
@@ -3489,6 +3699,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
struct strvec rp = STRVEC_INIT;
int rev_list_unpacked = 0, rev_list_all = 0, rev_list_reflog = 0;
int rev_list_index = 0;
+ int stdin_packs = 0;
struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
struct option pack_objects_options[] = {
OPT_SET_INT('q', "quiet", &progress,
@@ -3539,6 +3750,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
OPT_SET_INT_F(0, "indexed-objects", &rev_list_index,
N_("include objects referred to by the index"),
1, PARSE_OPT_NONEG),
+ OPT_BOOL(0, "stdin-packs", &stdin_packs,
+ N_("read packs from stdin")),
OPT_BOOL(0, "stdout", &pack_to_stdout,
N_("output pack to stdout")),
OPT_BOOL(0, "include-tag", &include_tag,
@@ -3645,7 +3858,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
use_internal_rev_list = 1;
strvec_push(&rp, "--indexed-objects");
}
- if (rev_list_unpacked) {
+ if (rev_list_unpacked && !stdin_packs) {
use_internal_rev_list = 1;
strvec_push(&rp, "--unpacked");
}
@@ -3690,8 +3903,13 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (filter_options.choice) {
if (!pack_to_stdout)
die(_("cannot use --filter without --stdout"));
+ if (stdin_packs)
+ die(_("cannot use --filter with --stdin-packs"));
}
+ if (stdin_packs && use_internal_rev_list)
+ die(_("cannot use internal rev list with --stdin-packs"));
+
/*
* "soft" reasons not to use bitmaps - for on-disk repack by default we want
*
@@ -3750,7 +3968,13 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
if (progress)
progress_state = start_progress(_("Enumerating objects"), 0);
- if (!use_internal_rev_list)
+ if (stdin_packs) {
+ /* avoids adding objects in excluded packs */
+ ignore_packed_keep_in_core = 1;
+ read_packs_list_from_stdin();
+ if (rev_list_unpacked)
+ add_unreachable_loose_objects();
+ } else if (!use_internal_rev_list)
read_object_list_from_stdin();
else {
get_object_list(rp.nr, rp.v);
diff --git a/builtin/repack.c b/builtin/repack.c
index 01440de..6ce2556 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -297,6 +297,142 @@ static void repack_promisor_objects(const struct pack_objects_args *args,
#define ALL_INTO_ONE 1
#define LOOSEN_UNREACHABLE 2
+struct pack_geometry {
+ struct packed_git **pack;
+ uint32_t pack_nr, pack_alloc;
+ uint32_t split;
+};
+
+static uint32_t geometry_pack_weight(struct packed_git *p)
+{
+ if (open_pack_index(p))
+ die(_("cannot open index for %s"), p->pack_name);
+ return p->num_objects;
+}
+
+static int geometry_cmp(const void *va, const void *vb)
+{
+ uint32_t aw = geometry_pack_weight(*(struct packed_git **)va),
+ bw = geometry_pack_weight(*(struct packed_git **)vb);
+
+ if (aw < bw)
+ return -1;
+ if (aw > bw)
+ return 1;
+ return 0;
+}
+
+static void init_pack_geometry(struct pack_geometry **geometry_p)
+{
+ struct packed_git *p;
+ struct pack_geometry *geometry;
+
+ *geometry_p = xcalloc(1, sizeof(struct pack_geometry));
+ geometry = *geometry_p;
+
+ for (p = get_all_packs(the_repository); p; p = p->next) {
+ if (!pack_kept_objects && p->pack_keep)
+ continue;
+
+ ALLOC_GROW(geometry->pack,
+ geometry->pack_nr + 1,
+ geometry->pack_alloc);
+
+ geometry->pack[geometry->pack_nr] = p;
+ geometry->pack_nr++;
+ }
+
+ QSORT(geometry->pack, geometry->pack_nr, geometry_cmp);
+}
+
+static void split_pack_geometry(struct pack_geometry *geometry, int factor)
+{
+ uint32_t i;
+ uint32_t split;
+ off_t total_size = 0;
+
+ if (!geometry->pack_nr) {
+ geometry->split = geometry->pack_nr;
+ return;
+ }
+
+ /*
+ * First, count the number of packs (in descending order of size) which
+ * already form a geometric progression.
+ */
+ for (i = geometry->pack_nr - 1; i > 0; i--) {
+ struct packed_git *ours = geometry->pack[i];
+ struct packed_git *prev = geometry->pack[i - 1];
+
+ if (unsigned_mult_overflows(factor, geometry_pack_weight(prev)))
+ die(_("pack %s too large to consider in geometric "
+ "progression"),
+ prev->pack_name);
+
+ if (geometry_pack_weight(ours) < factor * geometry_pack_weight(prev))
+ break;
+ }
+
+ split = i;
+
+ if (split) {
+ /*
+ * Move the split one to the right, since the top element in the
+ * last-compared pair can't be in the progression. Only do this
+ * when we split in the middle of the array (otherwise if we got
+ * to the end, then the split is in the right place).
+ */
+ split++;
+ }
+
+ /*
+ * Then, anything to the left of 'split' must be in a new pack. But,
+ * creating that new pack may cause packs in the heavy half to no longer
+ * form a geometric progression.
+ *
+ * Compute an expected size of the new pack, and then determine how many
+ * packs in the heavy half need to be joined into it (if any) to restore
+ * the geometric progression.
+ */
+ for (i = 0; i < split; i++) {
+ struct packed_git *p = geometry->pack[i];
+
+ if (unsigned_add_overflows(total_size, geometry_pack_weight(p)))
+ die(_("pack %s too large to roll up"), p->pack_name);
+ total_size += geometry_pack_weight(p);
+ }
+ for (i = split; i < geometry->pack_nr; i++) {
+ struct packed_git *ours = geometry->pack[i];
+
+ if (unsigned_mult_overflows(factor, total_size))
+ die(_("pack %s too large to roll up"), ours->pack_name);
+
+ if (geometry_pack_weight(ours) < factor * total_size) {
+ if (unsigned_add_overflows(total_size,
+ geometry_pack_weight(ours)))
+ die(_("pack %s too large to roll up"),
+ ours->pack_name);
+
+ split++;
+ total_size += geometry_pack_weight(ours);
+ } else
+ break;
+ }
+
+ geometry->split = split;
+}
+
+static void clear_pack_geometry(struct pack_geometry *geometry)
+{
+ if (!geometry)
+ return;
+
+ free(geometry->pack);
+ geometry->pack_nr = 0;
+ geometry->pack_alloc = 0;
+ geometry->split = 0;
+}
+
int cmd_repack(int argc, const char **argv, const char *prefix)
{
struct child_process cmd = CHILD_PROCESS_INIT;
@@ -304,6 +440,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
struct string_list names = STRING_LIST_INIT_DUP;
struct string_list rollback = STRING_LIST_INIT_NODUP;
struct string_list existing_packs = STRING_LIST_INIT_DUP;
+ struct pack_geometry *geometry = NULL;
struct strbuf line = STRBUF_INIT;
int i, ext, ret;
FILE *out;
@@ -316,6 +453,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
int no_update_server_info = 0;
struct pack_objects_args po_args = {NULL};
+ int geometric_factor = 0;
struct option builtin_repack_options[] = {
OPT_BIT('a', NULL, &pack_everything,
@@ -356,6 +494,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
N_("repack objects in packs marked with .keep")),
OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
N_("do not repack this pack")),
+ OPT_INTEGER('g', "geometric", &geometric_factor,
+ N_("find a geometric progression with factor <N>")),
OPT_END()
};
@@ -382,6 +522,13 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
if (write_bitmaps && !(pack_everything & ALL_INTO_ONE))
die(_(incremental_bitmap_conflict_error));
+ if (geometric_factor) {
+ if (pack_everything)
+ die(_("--geometric is incompatible with -A, -a"));
+ init_pack_geometry(&geometry);
+ split_pack_geometry(geometry, geometric_factor);
+ }
+
packdir = mkpathdup("%s/pack", get_object_directory());
packtmp = mkpathdup("%s/.tmp-%d-pack", packdir, (int)getpid());
@@ -396,9 +543,21 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
strvec_pushf(&cmd.args, "--keep-pack=%s",
keep_pack_list.items[i].string);
strvec_push(&cmd.args, "--non-empty");
- strvec_push(&cmd.args, "--all");
- strvec_push(&cmd.args, "--reflog");
- strvec_push(&cmd.args, "--indexed-objects");
+ if (!geometry) {
+ /*
+ * We need to grab all reachable objects, including those that
+ * are reachable from reflogs and the index.
+ *
+ * When repacking into a geometric progression of packs,
+ * however, we ask 'git pack-objects --stdin-packs', and it is
+ * not about packing objects based on reachability but about
+ * repacking all the objects in specified packs and loose ones
+ * (indeed, --stdin-packs is incompatible with these options).
+ */
+ strvec_push(&cmd.args, "--all");
+ strvec_push(&cmd.args, "--reflog");
+ strvec_push(&cmd.args, "--indexed-objects");
+ }
if (has_promisor_remote())
strvec_push(&cmd.args, "--exclude-promisor-objects");
if (write_bitmaps > 0)
@@ -429,17 +588,37 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
strvec_push(&cmd.env_array, "GIT_REF_PARANOIA=1");
}
}
+ } else if (geometry) {
+ strvec_push(&cmd.args, "--stdin-packs");
+ strvec_push(&cmd.args, "--unpacked");
} else {
strvec_push(&cmd.args, "--unpacked");
strvec_push(&cmd.args, "--incremental");
}
- cmd.no_stdin = 1;
+ if (geometry)
+ cmd.in = -1;
+ else
+ cmd.no_stdin = 1;
ret = start_command(&cmd);
if (ret)
return ret;
+ if (geometry) {
+ FILE *in = xfdopen(cmd.in, "w");
+ /*
+ * The resulting pack should contain all objects in packs that
+ * are going to be rolled up, but exclude objects in packs which
+ * are being left alone.
+ */
+ for (i = 0; i < geometry->split; i++)
+ fprintf(in, "%s\n", pack_basename(geometry->pack[i]));
+ for (i = geometry->split; i < geometry->pack_nr; i++)
+ fprintf(in, "^%s\n", pack_basename(geometry->pack[i]));
+ fclose(in);
+ }
+
out = xfdopen(cmd.out, "r");
while (strbuf_getline_lf(&line, out) != EOF) {
if (line.len != the_hash_algo->hexsz)
@@ -507,6 +686,25 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
if (!string_list_has_string(&names, sha1))
remove_redundant_pack(packdir, item->string);
}
+
+ if (geometry) {
+ struct strbuf buf = STRBUF_INIT;
+
+ uint32_t i;
+ for (i = 0; i < geometry->split; i++) {
+ struct packed_git *p = geometry->pack[i];
+ if (string_list_has_string(&names,
+ hash_to_hex(p->hash)))
+ continue;
+
+ strbuf_reset(&buf);
+ strbuf_addstr(&buf, pack_basename(p));
+ strbuf_strip_suffix(&buf, ".pack");
+
+ remove_redundant_pack(packdir, buf.buf);
+ }
+ strbuf_release(&buf);
+ }
if (!po_args.quiet && isatty(2))
opts |= PRUNE_PACKED_VERBOSE;
prune_packed_objects(opts);
@@ -528,6 +726,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
string_list_clear(&names, 0);
string_list_clear(&rollback, 0);
string_list_clear(&existing_packs, 0);
+ clear_pack_geometry(geometry);
strbuf_release(&line);
return 0;
diff --git a/object-store.h b/object-store.h
index 541dab0..ec32c23 100644
--- a/object-store.h
+++ b/object-store.h
@@ -153,6 +153,11 @@ struct raw_object_store {
/* A most-recently-used ordered version of the packed_git list. */
struct list_head packed_git_mru;
+ struct {
+ struct packed_git **packs;
+ unsigned flags;
+ } kept_pack_cache;
+
/*
* A map of packfiles to packed_git structs for tracking which
* packs have been loaded already.
diff --git a/packfile.c b/packfile.c
index ea29f4b..6661f33 100644
--- a/packfile.c
+++ b/packfile.c
@@ -2066,12 +2066,79 @@ int find_pack_entry(struct repository *r, const struct object_id *oid, struct pa
return 0;
}
+static void maybe_invalidate_kept_pack_cache(struct repository *r,
+ unsigned flags)
+{
+ if (!r->objects->kept_pack_cache.packs)
+ return;
+ if (r->objects->kept_pack_cache.flags == flags)
+ return;
+ FREE_AND_NULL(r->objects->kept_pack_cache.packs);
+ r->objects->kept_pack_cache.flags = 0;
+}
+
+static struct packed_git **kept_pack_cache(struct repository *r, unsigned flags)
+{
+ maybe_invalidate_kept_pack_cache(r, flags);
+
+ if (!r->objects->kept_pack_cache.packs) {
+ struct packed_git **packs = NULL;
+ size_t nr = 0, alloc = 0;
+ struct packed_git *p;
+
+ /*
+ * We want "all" packs here, because we need to cover ones that
+ * are used by a midx, as well. We need to look in every one of
+ * them (instead of the midx itself) to cover duplicates. It's
+ * possible that an object is found in two packs that the midx
+ * covers, one kept and one not kept, but the midx returns only
+ * the non-kept version.
+ */
+ for (p = get_all_packs(r); p; p = p->next) {
+ if ((p->pack_keep && (flags & ON_DISK_KEEP_PACKS)) ||
+ (p->pack_keep_in_core && (flags & IN_CORE_KEEP_PACKS))) {
+ ALLOC_GROW(packs, nr + 1, alloc);
+ packs[nr++] = p;
+ }
+ }
+ ALLOC_GROW(packs, nr + 1, alloc);
+ packs[nr] = NULL;
+
+ r->objects->kept_pack_cache.packs = packs;
+ r->objects->kept_pack_cache.flags = flags;
+ }
+
+ return r->objects->kept_pack_cache.packs;
+}
+
+int find_kept_pack_entry(struct repository *r,
+ const struct object_id *oid,
+ unsigned flags,
+ struct pack_entry *e)
+{
+ struct packed_git **cache;
+
+ for (cache = kept_pack_cache(r, flags); *cache; cache++) {
+ struct packed_git *p = *cache;
+ if (fill_pack_entry(oid, e, p))
+ return 1;
+ }
+
+ return 0;
+}
+
int has_object_pack(const struct object_id *oid)
{
struct pack_entry e;
return find_pack_entry(the_repository, oid, &e);
}
+int has_object_kept_pack(const struct object_id *oid, unsigned flags)
+{
+ struct pack_entry e;
+ return find_kept_pack_entry(the_repository, oid, flags, &e);
+}
+
int has_pack_index(const unsigned char *sha1)
{
struct stat st;
diff --git a/packfile.h b/packfile.h
index 4cfec9e..3ae117a 100644
--- a/packfile.h
+++ b/packfile.h
@@ -162,13 +162,18 @@ int packed_object_info(struct repository *r,
void mark_bad_packed_object(struct packed_git *p, const unsigned char *sha1);
const struct packed_git *has_packed_and_bad(struct repository *r, const unsigned char *sha1);
+#define ON_DISK_KEEP_PACKS 1
+#define IN_CORE_KEEP_PACKS 2
+
/*
* Iff a pack file in the given repository contains the object named by sha1,
* return true and store its location to e.
*/
int find_pack_entry(struct repository *r, const struct object_id *oid, struct pack_entry *e);
+int find_kept_pack_entry(struct repository *r, const struct object_id *oid, unsigned flags, struct pack_entry *e);
int has_object_pack(const struct object_id *oid);
+int has_object_kept_pack(const struct object_id *oid, unsigned flags);
int has_pack_index(const unsigned char *sha1);
diff --git a/revision.c b/revision.c
index 99c859f..553c0fa 100644
--- a/revision.c
+++ b/revision.c
@@ -2336,6 +2336,16 @@ static int handle_revision_opt(struct rev_info *revs, int argc, const char **arg
revs->unpacked = 1;
} else if (starts_with(arg, "--unpacked=")) {
die(_("--unpacked=<packfile> no longer supported"));
+ } else if (!strcmp(arg, "--no-kept-objects")) {
+ revs->no_kept_objects = 1;
+ revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
+ revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS;
+ } else if (skip_prefix(arg, "--no-kept-objects=", &optarg)) {
+ revs->no_kept_objects = 1;
+ if (!strcmp(optarg, "in-core"))
+ revs->keep_pack_cache_flags |= IN_CORE_KEEP_PACKS;
+ if (!strcmp(optarg, "on-disk"))
+ revs->keep_pack_cache_flags |= ON_DISK_KEEP_PACKS;
} else if (!strcmp(arg, "-r")) {
revs->diff = 1;
revs->diffopt.flags.recursive = 1;
@@ -3795,6 +3805,11 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi
return commit_ignore;
if (revs->unpacked && has_object_pack(&commit->object.oid))
return commit_ignore;
+ if (revs->no_kept_objects) {
+ if (has_object_kept_pack(&commit->object.oid,
+ revs->keep_pack_cache_flags))
+ return commit_ignore;
+ }
if (commit->object.flags & UNINTERESTING)
return commit_ignore;
if (revs->line_level_traverse && !want_ancestry(revs)) {
diff --git a/revision.h b/revision.h
index e6be3c8..a20a530 100644
--- a/revision.h
+++ b/revision.h
@@ -148,6 +148,7 @@ struct rev_info {
edge_hint_aggressive:1,
limited:1,
unpacked:1,
+ no_kept_objects:1,
boundary:2,
count:1,
left_right:1,
@@ -317,6 +318,9 @@ struct rev_info {
* This is loaded from the commit-graph being used.
*/
struct bloom_filter_settings *bloom_filter_settings;
+
+ /* misc. flags related to '--no-kept-objects' */
+ unsigned keep_pack_cache_flags;
};
int ref_excluded(struct string_list *, const char *path);
diff --git a/t/perf/p5303-many-packs.sh b/t/perf/p5303-many-packs.sh
index ce0c42c..35c0cbd 100755
--- a/t/perf/p5303-many-packs.sh
+++ b/t/perf/p5303-many-packs.sh
@@ -28,11 +28,18 @@ repack_into_n () {
push @commits, $_ if $. % 5 == 1;
}
print reverse @commits;
- ' "$1" >pushes
+ ' "$1" >pushes &&
# create base packfile
- head -n 1 pushes |
- git pack-objects --delta-base-offset --revs staging/pack
+ base_pack=$(
+ head -n 1 pushes |
+ git pack-objects --delta-base-offset --revs staging/pack
+ ) &&
+ test_export base_pack &&
+
+ # create an empty packfile
+ empty_pack=$(git pack-objects staging/pack </dev/null) &&
+ test_export empty_pack &&
# and then incrementals between each pair of commits
last= &&
@@ -49,6 +56,12 @@ repack_into_n () {
last=$rev
done <pushes &&
+ (
+ find staging -type f -name 'pack-*.pack' |
+ xargs -n 1 basename | grep -v "$base_pack" &&
+ printf "^pack-%s.pack\n" $base_pack
+ ) >stdin.packs
+
# and install the whole thing
rm -f .git/objects/pack/* &&
mv staging/* .git/objects/pack/
@@ -91,6 +104,23 @@ do
--reflog --indexed-objects --delta-base-offset \
--stdout </dev/null >/dev/null
'
+
+ test_perf "repack with kept ($nr_packs)" '
+ git pack-objects --keep-true-parents \
+ --keep-pack=pack-$empty_pack.pack \
+ --honor-pack-keep --non-empty --all \
+ --reflog --indexed-objects --delta-base-offset \
+ --stdout </dev/null >/dev/null
+ '
+
+ test_perf "repack with --stdin-packs ($nr_packs)" '
+ git pack-objects \
+ --keep-true-parents \
+ --stdin-packs \
+ --non-empty \
+ --delta-base-offset \
+ --stdout <stdin.packs >/dev/null
+ '
done
# Measure pack loading with 10,000 packs.
diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh
index d586fdc..b5699e0 100755
--- a/t/t5300-pack-object.sh
+++ b/t/t5300-pack-object.sh
@@ -532,4 +532,139 @@ test_expect_success 'prefetch objects' '
test_line_count = 1 donelines
'
+test_expect_success 'setup for --stdin-packs tests' '
+ git init stdin-packs &&
+ (
+ cd stdin-packs &&
+
+ test_commit A &&
+ test_commit B &&
+ test_commit C &&
+
+ for id in A B C
+ do
+ git pack-objects .git/objects/pack/pack-$id \
+ --incremental --revs <<-EOF
+ refs/tags/$id
+ EOF
+ done &&
+
+ ls -la .git/objects/pack
+ )
+'
+
+test_expect_success '--stdin-packs with excluded packs' '
+ (
+ cd stdin-packs &&
+
+ PACK_A="$(basename .git/objects/pack/pack-A-*.pack)" &&
+ PACK_B="$(basename .git/objects/pack/pack-B-*.pack)" &&
+ PACK_C="$(basename .git/objects/pack/pack-C-*.pack)" &&
+
+ git pack-objects test --stdin-packs <<-EOF &&
+ $PACK_A
+ ^$PACK_B
+ $PACK_C
+ EOF
+
+ (
+ git show-index <$(ls .git/objects/pack/pack-A-*.idx) &&
+ git show-index <$(ls .git/objects/pack/pack-C-*.idx)
+ ) >expect.raw &&
+ git show-index <$(ls test-*.idx) >actual.raw &&
+
+ cut -d" " -f2 <expect.raw | sort >expect &&
+ cut -d" " -f2 <actual.raw | sort >actual &&
+ test_cmp expect actual
+ )
+'
+
+test_expect_success '--stdin-packs is incompatible with --filter' '
+ (
+ cd stdin-packs &&
+ test_must_fail git pack-objects --stdin-packs --stdout \
+ --filter=blob:none </dev/null 2>err &&
+ test_i18ngrep "cannot use --filter with --stdin-packs" err
+ )
+'
+
+test_expect_success '--stdin-packs is incompatible with --revs' '
+ (
+ cd stdin-packs &&
+ test_must_fail git pack-objects --stdin-packs --revs out \
+ </dev/null 2>err &&
+ test_i18ngrep "cannot use internal rev list with --stdin-packs" err
+ )
+'
+
+test_expect_success '--stdin-packs with loose objects' '
+ (
+ cd stdin-packs &&
+
+ PACK_A="$(basename .git/objects/pack/pack-A-*.pack)" &&
+ PACK_B="$(basename .git/objects/pack/pack-B-*.pack)" &&
+ PACK_C="$(basename .git/objects/pack/pack-C-*.pack)" &&
+
+ test_commit D && # loose
+
+ git pack-objects test2 --stdin-packs --unpacked <<-EOF &&
+ $PACK_A
+ ^$PACK_B
+ $PACK_C
+ EOF
+
+ (
+ git show-index <$(ls .git/objects/pack/pack-A-*.idx) &&
+ git show-index <$(ls .git/objects/pack/pack-C-*.idx) &&
+ git rev-list --objects --no-object-names \
+ refs/tags/C..refs/tags/D
+
+ ) >expect.raw &&
+ ls -la . &&
+ git show-index <$(ls test2-*.idx) >actual.raw &&
+
+ cut -d" " -f2 <expect.raw | sort >expect &&
+ cut -d" " -f2 <actual.raw | sort >actual &&
+ test_cmp expect actual
+ )
+'
+
+test_expect_success '--stdin-packs with broken links' '
+ (
+ cd stdin-packs &&
+
+ # make an unreachable object with a bogus parent
+ git cat-file -p HEAD >commit &&
+ sed "s/$(git rev-parse HEAD^)/$(test_oid zero)/" <commit |
+ git hash-object -w -t commit --stdin >in &&
+
+ git pack-objects .git/objects/pack/pack-D <in &&
+
+ PACK_A="$(basename .git/objects/pack/pack-A-*.pack)" &&
+ PACK_B="$(basename .git/objects/pack/pack-B-*.pack)" &&
+ PACK_C="$(basename .git/objects/pack/pack-C-*.pack)" &&
+ PACK_D="$(basename .git/objects/pack/pack-D-*.pack)" &&
+
+ git pack-objects test3 --stdin-packs --unpacked <<-EOF &&
+ $PACK_A
+ ^$PACK_B
+ $PACK_C
+ $PACK_D
+ EOF
+
+ (
+ git show-index <$(ls .git/objects/pack/pack-A-*.idx) &&
+ git show-index <$(ls .git/objects/pack/pack-C-*.idx) &&
+ git show-index <$(ls .git/objects/pack/pack-D-*.idx) &&
+ git rev-list --objects --no-object-names \
+ refs/tags/C..refs/tags/D
+ ) >expect.raw &&
+ git show-index <$(ls test3-*.idx) >actual.raw &&
+
+ cut -d" " -f2 <expect.raw | sort >expect &&
+ cut -d" " -f2 <actual.raw | sort >actual &&
+ test_cmp expect actual
+ )
+'
+
test_done
diff --git a/t/t6114-keep-packs.sh b/t/t6114-keep-packs.sh
new file mode 100755
index 0000000..9239d8a
--- /dev/null
+++ b/t/t6114-keep-packs.sh
@@ -0,0 +1,69 @@
+#!/bin/sh
+
+test_description='rev-list with .keep packs'
+. ./test-lib.sh
+
+test_expect_success 'setup' '
+ test_commit loose &&
+ test_commit packed &&
+ test_commit kept &&
+
+ KEPT_PACK=$(git pack-objects --revs .git/objects/pack/pack <<-EOF
+ refs/tags/kept
+ ^refs/tags/packed
+ EOF
+ ) &&
+ MISC_PACK=$(git pack-objects --revs .git/objects/pack/pack <<-EOF
+ refs/tags/packed
+ ^refs/tags/loose
+ EOF
+ ) &&
+
+ touch .git/objects/pack/pack-$KEPT_PACK.keep
+'
+
+rev_list_objects () {
+ git rev-list "$@" >out &&
+ sort out
+}
+
+idx_objects () {
+ git show-index <$1 >expect-idx &&
+ cut -d" " -f2 <expect-idx | sort
+}
+
+test_expect_success '--no-kept-objects excludes trees and blobs in .keep packs' '
+ rev_list_objects --objects --all --no-object-names >kept &&
+ rev_list_objects --objects --all --no-object-names --no-kept-objects >no-kept &&
+
+ idx_objects .git/objects/pack/pack-$KEPT_PACK.idx >expect &&
+ comm -3 kept no-kept >actual &&
+
+ test_cmp expect actual
+'
+
+test_expect_success '--no-kept-objects excludes kept non-MIDX object' '
+ test_config core.multiPackIndex true &&
+
+ # Create a pack with just the commit object in pack, and do not mark it
+ # as kept (even though it appears in $KEPT_PACK, which does have a .keep
+ # file).
+ MIDX_PACK=$(git pack-objects .git/objects/pack/pack <<-EOF
+ $(git rev-parse kept)
+ EOF
+ ) &&
+
+ # Write a MIDX containing all packs, but use the version of the commit
+ # at "kept" in a non-kept pack by touching $MIDX_PACK.
+ touch .git/objects/pack/pack-$MIDX_PACK.pack &&
+ git multi-pack-index write &&
+
+ rev_list_objects --objects --no-object-names --no-kept-objects HEAD >actual &&
+ (
+ idx_objects .git/objects/pack/pack-$MISC_PACK.idx &&
+ git rev-list --objects --no-object-names refs/tags/loose
+ ) | sort >expect &&
+ test_cmp expect actual
+'
+
+test_done
diff --git a/t/t7703-repack-geometric.sh b/t/t7703-repack-geometric.sh
new file mode 100755
index 0000000..5ccaa44
--- /dev/null
+++ b/t/t7703-repack-geometric.sh
@@ -0,0 +1,183 @@
+#!/bin/sh
+
+test_description='git repack --geometric works correctly'
+
+. ./test-lib.sh
+
+GIT_TEST_MULTI_PACK_INDEX=0
+
+objdir=.git/objects
+midx=$objdir/pack/multi-pack-index
+
+test_expect_success '--geometric with no packs' '
+ git init geometric &&
+ test_when_finished "rm -fr geometric" &&
+ (
+ cd geometric &&
+
+ git repack --geometric 2 >out &&
+ test_i18ngrep "Nothing new to pack" out
+ )
+'
+
+test_expect_success '--geometric with one pack' '
+ git init geometric &&
+ test_when_finished "rm -fr geometric" &&
+ (
+ cd geometric &&
+
+ test_commit "base" &&
+ git repack -d &&
+
+ git repack --geometric 2 >out &&
+
+ test_i18ngrep "Nothing new to pack" out
+ )
+'
+
+test_expect_success '--geometric with an intact progression' '
+ git init geometric &&
+ test_when_finished "rm -fr geometric" &&
+ (
+ cd geometric &&
+
+ # These packs already form a geometric progression.
+ test_commit_bulk --start=1 1 && # 3 objects
+ test_commit_bulk --start=2 2 && # 6 objects
+ test_commit_bulk --start=4 4 && # 12 objects
+
+ find $objdir/pack -name "*.pack" | sort >expect &&
+ git repack --geometric 2 -d &&
+ find $objdir/pack -name "*.pack" | sort >actual &&
+
+ test_cmp expect actual
+ )
+'
+
+test_expect_success '--geometric with loose objects' '
+ git init geometric &&
+ test_when_finished "rm -fr geometric" &&
+ (
+ cd geometric &&
+
+ # These packs already form a geometric progression.
+ test_commit_bulk --start=1 1 && # 3 objects
+ test_commit_bulk --start=2 2 && # 6 objects
+ # The loose objects are packed together, breaking the
+ # progression.
+ test_commit loose && # 3 objects
+
+ find $objdir/pack -name "*.pack" | sort >before &&
+ git repack --geometric 2 -d &&
+ find $objdir/pack -name "*.pack" | sort >after &&
+
+ comm -13 before after >new &&
+ comm -23 before after >removed &&
+
+ test_line_count = 1 new &&
+ test_must_be_empty removed &&
+
+ git repack --geometric 2 -d &&
+ find $objdir/pack -name "*.pack" | sort >after &&
+
+ # The progression (3, 3, 6) is combined into one new pack.
+ test_line_count = 1 after
+ )
+'
+
+test_expect_success '--geometric with small-pack rollup' '
+ git init geometric &&
+ test_when_finished "rm -fr geometric" &&
+ (
+ cd geometric &&
+
+ test_commit_bulk --start=1 1 && # 3 objects
+ test_commit_bulk --start=2 1 && # 3 objects
+ find $objdir/pack -name "*.pack" | sort >small &&
+ test_commit_bulk --start=3 4 && # 12 objects
+ test_commit_bulk --start=7 8 && # 24 objects
+ find $objdir/pack -name "*.pack" | sort >before &&
+
+ git repack --geometric 2 -d &&
+
+ # Three packs in total; two of the existing large ones, and one
+ # new one.
+ find $objdir/pack -name "*.pack" | sort >after &&
+ test_line_count = 3 after &&
+ comm -3 small before | tr -d "\t" >large &&
+ grep -qFf large after
+ )
+'
+
+test_expect_success '--geometric with small- and large-pack rollup' '
+ git init geometric &&
+ test_when_finished "rm -fr geometric" &&
+ (
+ cd geometric &&
+
+ # size(small1) + size(small2) > size(medium) / 2
+ test_commit_bulk --start=1 1 && # 3 objects
+ test_commit_bulk --start=2 1 && # 3 objects
+ test_commit_bulk --start=2 3 && # 7 objects
+ test_commit_bulk --start=6 9 && # 27 objects &&
+
+ find $objdir/pack -name "*.pack" | sort >before &&
+
+ git repack --geometric 2 -d &&
+
+ find $objdir/pack -name "*.pack" | sort >after &&
+ comm -12 before after >untouched &&
+
+ # Two packs in total; the largest pack from before running "git
+ # repack", and one new one.
+ test_line_count = 1 untouched &&
+ test_line_count = 2 after
+ )
+'
+
+test_expect_success '--geometric ignores kept packs' '
+ git init geometric &&
+ test_when_finished "rm -fr geometric" &&
+ (
+ cd geometric &&
+
+ test_commit kept && # 3 objects
+ test_commit pack && # 3 objects
+
+ KEPT=$(git pack-objects --revs $objdir/pack/pack <<-EOF
+ refs/tags/kept
+ EOF
+ ) &&
+ PACK=$(git pack-objects --revs $objdir/pack/pack <<-EOF
+ refs/tags/pack
+ ^refs/tags/kept
+ EOF
+ ) &&
+
+ # neither pack contains more than twice the number of objects in
+ # the other, so they should be combined. but, marking one as
+ # .kept on disk will "freeze" it, so the pack structure should
+ # remain unchanged.
+ touch $objdir/pack/pack-$KEPT.keep &&
+
+ find $objdir/pack -name "*.pack" | sort >before &&
+ git repack --geometric 2 -d &&
+ find $objdir/pack -name "*.pack" | sort >after &&
+
+ # both packs should still exist
+ test_path_is_file $objdir/pack/pack-$KEPT.pack &&
+ test_path_is_file $objdir/pack/pack-$PACK.pack &&
+
+ # and no new packs should be created
+ test_cmp before after &&
+
+ # Passing --pack-kept-objects causes packs with a .keep file to
+ # be repacked, too.
+ git repack --geometric 2 -d --pack-kept-objects &&
+
+ find $objdir/pack -name "*.pack" >after &&
+ test_line_count = 1 after
+ )
+'
+
+test_done