diff options
author | Junio C Hamano <gitster@pobox.com> | 2022-06-03 21:30:37 (GMT) |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2022-06-03 21:30:37 (GMT) |
commit | a50036da1a39806a8ae1aba2e2f2fea6f7fb8e08 (patch) | |
tree | 1e6c184852ee5a5e5860073c04ea73e034b0060f /builtin/pack-objects.c | |
parent | 37d4ae58efcc9f716435f327c39d5552aedb4b7c (diff) | |
parent | a613164257b46700ca583bdcab160c712ad392fe (diff) | |
download | git-a50036da1a39806a8ae1aba2e2f2fea6f7fb8e08.zip git-a50036da1a39806a8ae1aba2e2f2fea6f7fb8e08.tar.gz git-a50036da1a39806a8ae1aba2e2f2fea6f7fb8e08.tar.bz2 |
Merge branch 'tb/cruft-packs'
A mechanism to pack unreachable objects into a "cruft pack",
instead of ejecting them into loose form to be reclaimed later, has
been introduced.
* tb/cruft-packs:
sha1-file.c: don't freshen cruft packs
builtin/gc.c: conditionally avoid pruning objects via loose
builtin/repack.c: add cruft packs to MIDX during geometric repack
builtin/repack.c: use named flags for existing_packs
builtin/repack.c: allow configuring cruft pack generation
builtin/repack.c: support generating a cruft pack
builtin/pack-objects.c: --cruft with expiration
reachable: report precise timestamps from objects in cruft packs
reachable: add options to add_unseen_recent_objects_to_traversal
builtin/pack-objects.c: --cruft without expiration
builtin/pack-objects.c: return from create_object_entry()
t/helper: add 'pack-mtimes' test-tool
pack-mtimes: support writing pack .mtimes files
chunk-format.h: extract oid_version()
pack-write: pass 'struct packing_data' to 'stage_tmp_packfiles'
pack-mtimes: support reading .mtimes files
Documentation/technical: add cruft-packs.txt
Diffstat (limited to 'builtin/pack-objects.c')
-rw-r--r-- | builtin/pack-objects.c | 304 |
1 files changed, 291 insertions, 13 deletions
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 0a26de1..040ceba 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -36,6 +36,7 @@ #include "trace2.h" #include "shallow.h" #include "promisor-remote.h" +#include "pack-mtimes.h" /* * Objects we are going to pack are collected in the `to_pack` structure. @@ -194,6 +195,8 @@ static int reuse_delta = 1, reuse_object = 1; static int keep_unreachable, unpack_unreachable, include_tag; static timestamp_t unpack_unreachable_expiration; static int pack_loose_unreachable; +static int cruft; +static timestamp_t cruft_expiration; static int local; static int have_non_local_packs; static int incremental; @@ -1260,9 +1263,13 @@ static void write_pack_file(void) &to_pack, written_list, nr_written); } + if (cruft) + pack_idx_opts.flags |= WRITE_MTIMES; + stage_tmp_packfiles(&tmpname, pack_tmp_name, written_list, nr_written, - &pack_idx_opts, hash, &idx_tmp_name); + &to_pack, &pack_idx_opts, hash, + &idx_tmp_name); if (write_bitmap_index) { size_t tmpname_len = tmpname.len; @@ -1521,13 +1528,13 @@ static int want_object_in_pack(const struct object_id *oid, return 1; } -static void create_object_entry(const struct object_id *oid, - enum object_type type, - uint32_t hash, - int exclude, - int no_try_delta, - struct packed_git *found_pack, - off_t found_offset) +static struct object_entry *create_object_entry(const struct object_id *oid, + enum object_type type, + uint32_t hash, + int exclude, + int no_try_delta, + struct packed_git *found_pack, + off_t found_offset) { struct object_entry *entry; @@ -1544,6 +1551,8 @@ static void create_object_entry(const struct object_id *oid, } entry->no_try_delta = no_try_delta; + + return entry; } static const char no_closure_warning[] = N_( @@ -3403,6 +3412,217 @@ static void read_packs_list_from_stdin(void) string_list_clear(&exclude_packs, 0); } +static void add_cruft_object_entry(const struct object_id *oid, enum object_type type, + struct packed_git *pack, off_t offset, + const char *name, uint32_t mtime) +{ + struct object_entry *entry; + + display_progress(progress_state, ++nr_seen); + + entry = packlist_find(&to_pack, oid); + if (entry) { + if (name) { + entry->hash = pack_name_hash(name); + entry->no_try_delta = no_try_delta(name); + } + } else { + if (!want_object_in_pack(oid, 0, &pack, &offset)) + return; + if (!pack && type == OBJ_BLOB && !has_loose_object(oid)) { + /* + * If a traversed tree has a missing blob then we want + * to avoid adding that missing object to our pack. + * + * This only applies to missing blobs, not trees, + * because the traversal needs to parse sub-trees but + * not blobs. + * + * Note we only perform this check when we couldn't + * already find the object in a pack, so we're really + * limited to "ensure non-tip blobs which don't exist in + * packs do exist via loose objects". Confused? + */ + return; + } + + entry = create_object_entry(oid, type, pack_name_hash(name), + 0, name && no_try_delta(name), + pack, offset); + } + + if (mtime > oe_cruft_mtime(&to_pack, entry)) + oe_set_cruft_mtime(&to_pack, entry, mtime); + return; +} + +static void show_cruft_object(struct object *obj, const char *name, void *data) +{ + /* + * if we did not record it earlier, it's at least as old as our + * expiration value. Rather than find it exactly, just use that + * value. This may bump it forward from its real mtime, but it + * will still be "too old" next time we run with the same + * expiration. + * + * if obj does appear in the packing list, this call is a noop (or may + * set the namehash). + */ + add_cruft_object_entry(&obj->oid, obj->type, NULL, 0, name, cruft_expiration); +} + +static void show_cruft_commit(struct commit *commit, void *data) +{ + show_cruft_object((struct object*)commit, NULL, data); +} + +static int cruft_include_check_obj(struct object *obj, void *data) +{ + return !has_object_kept_pack(&obj->oid, IN_CORE_KEEP_PACKS); +} + +static int cruft_include_check(struct commit *commit, void *data) +{ + return cruft_include_check_obj((struct object*)commit, data); +} + +static void set_cruft_mtime(const struct object *object, + struct packed_git *pack, + off_t offset, time_t mtime) +{ + add_cruft_object_entry(&object->oid, object->type, pack, offset, NULL, + mtime); +} + +static void mark_pack_kept_in_core(struct string_list *packs, unsigned keep) +{ + struct string_list_item *item = NULL; + for_each_string_list_item(item, packs) { + struct packed_git *p = item->util; + if (!p) + die(_("could not find pack '%s'"), item->string); + p->pack_keep_in_core = keep; + } +} + +static void add_unreachable_loose_objects(void); +static void add_objects_in_unpacked_packs(void); + +static void enumerate_cruft_objects(void) +{ + if (progress) + progress_state = start_progress(_("Enumerating cruft objects"), 0); + + add_objects_in_unpacked_packs(); + add_unreachable_loose_objects(); + + stop_progress(&progress_state); +} + +static void enumerate_and_traverse_cruft_objects(struct string_list *fresh_packs) +{ + struct packed_git *p; + struct rev_info revs; + int ret; + + repo_init_revisions(the_repository, &revs, NULL); + + revs.tag_objects = 1; + revs.tree_objects = 1; + revs.blob_objects = 1; + + revs.include_check = cruft_include_check; + revs.include_check_obj = cruft_include_check_obj; + + revs.ignore_missing_links = 1; + + if (progress) + progress_state = start_progress(_("Enumerating cruft objects"), 0); + ret = add_unseen_recent_objects_to_traversal(&revs, cruft_expiration, + set_cruft_mtime, 1); + stop_progress(&progress_state); + + if (ret) + die(_("unable to add cruft objects")); + + /* + * Re-mark only the fresh packs as kept so that objects in + * unknown packs do not halt the reachability traversal early. + */ + for (p = get_all_packs(the_repository); p; p = p->next) + p->pack_keep_in_core = 0; + mark_pack_kept_in_core(fresh_packs, 1); + + if (prepare_revision_walk(&revs)) + die(_("revision walk setup failed")); + if (progress) + progress_state = start_progress(_("Traversing cruft objects"), 0); + nr_seen = 0; + traverse_commit_list(&revs, show_cruft_commit, show_cruft_object, NULL); + + stop_progress(&progress_state); +} + +static void read_cruft_objects(void) +{ + struct strbuf buf = STRBUF_INIT; + struct string_list discard_packs = STRING_LIST_INIT_DUP; + struct string_list fresh_packs = STRING_LIST_INIT_DUP; + struct packed_git *p; + + ignore_packed_keep_in_core = 1; + + while (strbuf_getline(&buf, stdin) != EOF) { + if (!buf.len) + continue; + + if (*buf.buf == '-') + string_list_append(&discard_packs, buf.buf + 1); + else + string_list_append(&fresh_packs, buf.buf); + strbuf_reset(&buf); + } + + string_list_sort(&discard_packs); + string_list_sort(&fresh_packs); + + for (p = get_all_packs(the_repository); p; p = p->next) { + const char *pack_name = pack_basename(p); + struct string_list_item *item; + + item = string_list_lookup(&fresh_packs, pack_name); + if (!item) + item = string_list_lookup(&discard_packs, pack_name); + + if (item) { + item->util = p; + } else { + /* + * This pack wasn't mentioned in either the "fresh" or + * "discard" list, so the caller didn't know about it. + * + * Mark it as kept so that its objects are ignored by + * add_unseen_recent_objects_to_traversal(). We'll + * unmark it before starting the traversal so it doesn't + * halt the traversal early. + */ + p->pack_keep_in_core = 1; + } + } + + mark_pack_kept_in_core(&fresh_packs, 1); + mark_pack_kept_in_core(&discard_packs, 0); + + if (cruft_expiration) + enumerate_and_traverse_cruft_objects(&fresh_packs); + else + enumerate_cruft_objects(); + + strbuf_release(&buf); + string_list_clear(&discard_packs, 0); + string_list_clear(&fresh_packs, 0); +} + static void read_object_list_from_stdin(void) { char line[GIT_MAX_HEXSZ + 1 + PATH_MAX + 2]; @@ -3535,7 +3755,24 @@ static int add_object_in_unpacked_pack(const struct object_id *oid, uint32_t pos, void *_data) { - add_object_entry(oid, OBJ_NONE, "", 0); + if (cruft) { + off_t offset; + time_t mtime; + + if (pack->is_cruft) { + if (load_pack_mtimes(pack) < 0) + die(_("could not load cruft pack .mtimes")); + mtime = nth_packed_mtime(pack, pos); + } else { + mtime = pack->mtime; + } + offset = nth_packed_object_offset(pack, pos); + + add_cruft_object_entry(oid, OBJ_NONE, pack, offset, + NULL, mtime); + } else { + add_object_entry(oid, OBJ_NONE, "", 0); + } return 0; } @@ -3559,7 +3796,19 @@ static int add_loose_object(const struct object_id *oid, const char *path, return 0; } - add_object_entry(oid, type, "", 0); + if (cruft) { + struct stat st; + if (stat(path, &st) < 0) { + if (errno == ENOENT) + return 0; + return error_errno("unable to stat %s", oid_to_hex(oid)); + } + + add_cruft_object_entry(oid, type, NULL, 0, NULL, + st.st_mtime); + } else { + add_object_entry(oid, type, "", 0); + } return 0; } @@ -3799,7 +4048,7 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av) if (unpack_unreachable_expiration) { revs->ignore_missing_links = 1; if (add_unseen_recent_objects_to_traversal(revs, - unpack_unreachable_expiration)) + unpack_unreachable_expiration, NULL, 0)) die(_("unable to add recent objects")); if (prepare_revision_walk(revs)) die(_("revision walk setup failed")); @@ -3876,6 +4125,20 @@ static int option_parse_unpack_unreachable(const struct option *opt, return 0; } +static int option_parse_cruft_expiration(const struct option *opt, + const char *arg, int unset) +{ + if (unset) { + cruft = 0; + cruft_expiration = 0; + } else { + cruft = 1; + if (arg) + cruft_expiration = approxidate(arg); + } + return 0; +} + struct po_filter_data { unsigned have_revs:1; struct rev_info revs; @@ -3965,6 +4228,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) OPT_CALLBACK_F(0, "unpack-unreachable", NULL, N_("time"), N_("unpack unreachable objects newer than <time>"), PARSE_OPT_OPTARG, option_parse_unpack_unreachable), + OPT_BOOL(0, "cruft", &cruft, N_("create a cruft pack")), + OPT_CALLBACK_F(0, "cruft-expiration", NULL, N_("time"), + N_("expire cruft objects older than <time>"), + PARSE_OPT_OPTARG, option_parse_cruft_expiration), OPT_BOOL(0, "sparse", &sparse, N_("use the sparse reachability algorithm")), OPT_BOOL(0, "thin", &thin, @@ -4091,7 +4358,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (!HAVE_THREADS && delta_search_threads != 1) warning(_("no threads support, ignoring --threads")); - if (!pack_to_stdout && !pack_size_limit) + if (!pack_to_stdout && !pack_size_limit && !cruft) pack_size_limit = pack_size_limit_cfg; if (pack_to_stdout && pack_size_limit) die(_("--max-pack-size cannot be used to build a pack for transfer")); @@ -4118,6 +4385,15 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (stdin_packs && use_internal_rev_list) die(_("cannot use internal rev list with --stdin-packs")); + if (cruft) { + if (use_internal_rev_list) + die(_("cannot use internal rev list with --cruft")); + if (stdin_packs) + die(_("cannot use --stdin-packs with --cruft")); + if (pack_size_limit) + die(_("cannot use --max-pack-size with --cruft")); + } + /* * "soft" reasons not to use bitmaps - for on-disk repack by default we want * @@ -4174,7 +4450,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) the_repository); prepare_packing_data(the_repository, &to_pack); - if (progress) + if (progress && !cruft) progress_state = start_progress(_("Enumerating objects"), 0); if (stdin_packs) { /* avoids adding objects in excluded packs */ @@ -4182,6 +4458,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) read_packs_list_from_stdin(); if (rev_list_unpacked) add_unreachable_loose_objects(); + } else if (cruft) { + read_cruft_objects(); } else if (!use_internal_rev_list) { read_object_list_from_stdin(); } else if (pfd.have_revs) { |