summaryrefslogtreecommitdiff
path: root/builtin/repack.c
diff options
context:
space:
mode:
authorChristian Couder <christian.couder@gmail.com>2023-10-02 16:55:01 (GMT)
committerJunio C Hamano <gitster@pobox.com>2023-10-02 21:54:30 (GMT)
commit48a9b67b4301083d0820e90afef208ed1436298f (patch)
treee9ccde728d847d0b6d1cfd5bb628f1d8feaec4c7 /builtin/repack.c
parent0e4747ec8b5558e7106146fd37501649f577bcfa (diff)
downloadgit-48a9b67b4301083d0820e90afef208ed1436298f.zip
git-48a9b67b4301083d0820e90afef208ed1436298f.tar.gz
git-48a9b67b4301083d0820e90afef208ed1436298f.tar.bz2
repack: add `--filter=<filter-spec>` option
This new option puts the objects specified by `<filter-spec>` into a separate packfile. This could be useful if, for example, some blobs take up a lot of precious space on fast storage while they are rarely accessed. It could make sense to move them into a separate cheaper, though slower, storage. It's possible to find which new packfile contains the filtered out objects using one of the following: - `git verify-pack -v ...`, - `test-tool find-pack ...`, which a previous commit added, - `--filter-to=<dir>`, which a following commit will add to specify where the pack containing the filtered out objects will be. This feature is implemented by running `git pack-objects` twice in a row. The first command is run with `--filter=<filter-spec>`, using the specified filter. It packs objects while omitting the objects specified by the filter. Then another `git pack-objects` command is launched using `--stdin-packs`. We pass it all the previously existing packs into its stdin, so that it will pack all the objects in the previously existing packs. But we also pass into its stdin, the pack created by the previous `git pack-objects --filter=<filter-spec>` command as well as the kept packs, all prefixed with '^', so that the objects in these packs will be omitted from the resulting pack. The result is that only the objects filtered out by the first `git pack-objects` command are in the pack resulting from the second `git pack-objects` command. As the interactions with kept packs are a bit tricky, a few related tests are added. Helped-by: Taylor Blau <me@ttaylorr.com> Signed-off-by: John Cai <johncai86@gmail.com> Signed-off-by: Christian Couder <chriscool@tuxfamily.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'builtin/repack.c')
-rw-r--r--builtin/repack.c70
1 files changed, 70 insertions, 0 deletions
diff --git a/builtin/repack.c b/builtin/repack.c
index 9ef0044..c7b5641 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -21,6 +21,7 @@
#include "pack.h"
#include "pack-bitmap.h"
#include "refs.h"
+#include "list-objects-filter-options.h"
#define ALL_INTO_ONE 1
#define LOOSEN_UNREACHABLE 2
@@ -56,6 +57,7 @@ struct pack_objects_args {
int no_reuse_object;
int quiet;
int local;
+ struct list_objects_filter_options filter_options;
};
static int repack_config(const char *var, const char *value,
@@ -836,6 +838,56 @@ static int finish_pack_objects_cmd(struct child_process *cmd,
return finish_command(cmd);
}
+static int write_filtered_pack(const struct pack_objects_args *args,
+ const char *destination,
+ const char *pack_prefix,
+ struct existing_packs *existing,
+ struct string_list *names)
+{
+ struct child_process cmd = CHILD_PROCESS_INIT;
+ struct string_list_item *item;
+ FILE *in;
+ int ret;
+ const char *caret;
+ const char *scratch;
+ int local = skip_prefix(destination, packdir, &scratch);
+
+ prepare_pack_objects(&cmd, args, destination);
+
+ strvec_push(&cmd.args, "--stdin-packs");
+
+ if (!pack_kept_objects)
+ strvec_push(&cmd.args, "--honor-pack-keep");
+ for_each_string_list_item(item, &existing->kept_packs)
+ strvec_pushf(&cmd.args, "--keep-pack=%s", item->string);
+
+ cmd.in = -1;
+
+ ret = start_command(&cmd);
+ if (ret)
+ return ret;
+
+ /*
+ * Here 'names' contains only the pack(s) that were just
+ * written, which is exactly the packs we want to keep. Also
+ * 'existing_kept_packs' already contains the packs in
+ * 'keep_pack_list'.
+ */
+ in = xfdopen(cmd.in, "w");
+ for_each_string_list_item(item, names)
+ fprintf(in, "^%s-%s.pack\n", pack_prefix, item->string);
+ for_each_string_list_item(item, &existing->non_kept_packs)
+ fprintf(in, "%s.pack\n", item->string);
+ for_each_string_list_item(item, &existing->cruft_packs)
+ fprintf(in, "%s.pack\n", item->string);
+ caret = pack_kept_objects ? "" : "^";
+ for_each_string_list_item(item, &existing->kept_packs)
+ fprintf(in, "%s%s.pack\n", caret, item->string);
+ fclose(in);
+
+ return finish_pack_objects_cmd(&cmd, names, local);
+}
+
static int write_cruft_pack(const struct pack_objects_args *args,
const char *destination,
const char *pack_prefix,
@@ -966,6 +1018,7 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
N_("limits the maximum number of threads")),
OPT_STRING(0, "max-pack-size", &po_args.max_pack_size, N_("bytes"),
N_("maximum size of each packfile")),
+ OPT_PARSE_LIST_OBJECTS_FILTER(&po_args.filter_options),
OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects,
N_("repack objects in packs marked with .keep")),
OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"),
@@ -979,6 +1032,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
OPT_END()
};
+ list_objects_filter_init(&po_args.filter_options);
+
git_config(repack_config, &cruft_po_args);
argc = parse_options(argc, argv, prefix, builtin_repack_options,
@@ -1119,6 +1174,10 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
strvec_push(&cmd.args, "--incremental");
}
+ if (po_args.filter_options.choice)
+ strvec_pushf(&cmd.args, "--filter=%s",
+ expand_list_objects_filter_spec(&po_args.filter_options));
+
if (geometry.split_factor)
cmd.in = -1;
else
@@ -1205,6 +1264,16 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
}
}
+ if (po_args.filter_options.choice) {
+ ret = write_filtered_pack(&po_args,
+ packtmp,
+ find_pack_prefix(packdir, packtmp),
+ &existing,
+ &names);
+ if (ret)
+ goto cleanup;
+ }
+
string_list_sort(&names);
close_object_store(the_repository->objects);
@@ -1297,6 +1366,7 @@ cleanup:
string_list_clear(&names, 1);
existing_packs_release(&existing);
free_pack_geometry(&geometry);
+ list_objects_filter_release(&po_args.filter_options);
return ret;
}