summaryrefslogtreecommitdiff
path: root/builtin/gc.c
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2018-05-23 05:38:14 (GMT)
committerJunio C Hamano <gitster@pobox.com>2018-05-23 05:38:14 (GMT)
commit30b015bffe21a127d4f349f9e876562c3c94a1a2 (patch)
tree665d489fdc6cbef1a1dd9ed2dfa1f75afb717a35 /builtin/gc.c
parentc89b6e136e421f1e9108b3c5bc050b19b0243844 (diff)
parent5af050437af1c061804b4317154085c65490bbac (diff)
downloadgit-30b015bffe21a127d4f349f9e876562c3c94a1a2.zip
git-30b015bffe21a127d4f349f9e876562c3c94a1a2.tar.gz
git-30b015bffe21a127d4f349f9e876562c3c94a1a2.tar.bz2
Merge branch 'nd/repack-keep-pack'
"git gc" in a large repository takes a lot of time as it considers to repack all objects into one pack by default. The command has been taught to pretend as if the largest existing packfile is marked with ".keep" so that it is left untouched while objects in other packs and loose ones are repacked. * nd/repack-keep-pack: pack-objects: show some progress when counting kept objects gc --auto: exclude base pack if not enough mem to "repack -ad" gc: handle a corner case in gc.bigPackThreshold gc: add gc.bigPackThreshold config gc: add --keep-largest-pack option repack: add --keep-pack option t7700: have closing quote of a test at the beginning of line
Diffstat (limited to 'builtin/gc.c')
-rw-r--r--builtin/gc.c165
1 files changed, 159 insertions, 6 deletions
diff --git a/builtin/gc.c b/builtin/gc.c
index d604940..c4777b2 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -22,6 +22,10 @@
#include "commit.h"
#include "packfile.h"
#include "object-store.h"
+#include "pack.h"
+#include "pack-objects.h"
+#include "blob.h"
+#include "tree.h"
#define FAILED_RUN "failed to run %s"
@@ -41,6 +45,8 @@ static timestamp_t gc_log_expire_time;
static const char *gc_log_expire = "1.day.ago";
static const char *prune_expire = "2.weeks.ago";
static const char *prune_worktrees_expire = "3.months.ago";
+static unsigned long big_pack_threshold;
+static unsigned long max_delta_cache_size = DEFAULT_DELTA_CACHE_SIZE;
static struct argv_array pack_refs_cmd = ARGV_ARRAY_INIT;
static struct argv_array reflog = ARGV_ARRAY_INIT;
@@ -128,6 +134,9 @@ static void gc_config(void)
git_config_get_expiry("gc.worktreepruneexpire", &prune_worktrees_expire);
git_config_get_expiry("gc.logexpiry", &gc_log_expire);
+ git_config_get_ulong("gc.bigpackthreshold", &big_pack_threshold);
+ git_config_get_ulong("pack.deltacachesize", &max_delta_cache_size);
+
git_config(git_default_config, NULL);
}
@@ -166,6 +175,28 @@ static int too_many_loose_objects(void)
return needed;
}
+static struct packed_git *find_base_packs(struct string_list *packs,
+ unsigned long limit)
+{
+ struct packed_git *p, *base = NULL;
+
+ for (p = get_packed_git(the_repository); p; p = p->next) {
+ if (!p->pack_local)
+ continue;
+ if (limit) {
+ if (p->pack_size >= limit)
+ string_list_append(packs, p->pack_name);
+ } else if (!base || base->pack_size < p->pack_size) {
+ base = p;
+ }
+ }
+
+ if (base)
+ string_list_append(packs, base->pack_name);
+
+ return base;
+}
+
static int too_many_packs(void)
{
struct packed_git *p;
@@ -188,7 +219,86 @@ static int too_many_packs(void)
return gc_auto_pack_limit < cnt;
}
-static void add_repack_all_option(void)
+static uint64_t total_ram(void)
+{
+#if defined(HAVE_SYSINFO)
+ struct sysinfo si;
+
+ if (!sysinfo(&si))
+ return si.totalram;
+#elif defined(HAVE_BSD_SYSCTL) && (defined(HW_MEMSIZE) || defined(HW_PHYSMEM))
+ int64_t physical_memory;
+ int mib[2];
+ size_t length;
+
+ mib[0] = CTL_HW;
+# if defined(HW_MEMSIZE)
+ mib[1] = HW_MEMSIZE;
+# else
+ mib[1] = HW_PHYSMEM;
+# endif
+ length = sizeof(int64_t);
+ if (!sysctl(mib, 2, &physical_memory, &length, NULL, 0))
+ return physical_memory;
+#elif defined(GIT_WINDOWS_NATIVE)
+ MEMORYSTATUSEX memInfo;
+
+ memInfo.dwLength = sizeof(MEMORYSTATUSEX);
+ if (GlobalMemoryStatusEx(&memInfo))
+ return memInfo.ullTotalPhys;
+#endif
+ return 0;
+}
+
+static uint64_t estimate_repack_memory(struct packed_git *pack)
+{
+ unsigned long nr_objects = approximate_object_count();
+ size_t os_cache, heap;
+
+ if (!pack || !nr_objects)
+ return 0;
+
+ /*
+ * First we have to scan through at least one pack.
+ * Assume enough room in OS file cache to keep the entire pack
+ * or we may accidentally evict data of other processes from
+ * the cache.
+ */
+ os_cache = pack->pack_size + pack->index_size;
+ /* then pack-objects needs lots more for book keeping */
+ heap = sizeof(struct object_entry) * nr_objects;
+ /*
+ * internal rev-list --all --objects takes up some memory too,
+ * let's say half of it is for blobs
+ */
+ heap += sizeof(struct blob) * nr_objects / 2;
+ /*
+ * and the other half is for trees (commits and tags are
+ * usually insignificant)
+ */
+ heap += sizeof(struct tree) * nr_objects / 2;
+ /* and then obj_hash[], underestimated in fact */
+ heap += sizeof(struct object *) * nr_objects;
+ /* revindex is used also */
+ heap += sizeof(struct revindex_entry) * nr_objects;
+ /*
+ * read_sha1_file() (either at delta calculation phase, or
+ * writing phase) also fills up the delta base cache
+ */
+ heap += delta_base_cache_limit;
+ /* and of course pack-objects has its own delta cache */
+ heap += max_delta_cache_size;
+
+ return os_cache + heap;
+}
+
+static int keep_one_pack(struct string_list_item *item, void *data)
+{
+ argv_array_pushf(&repack, "--keep-pack=%s", basename(item->string));
+ return 0;
+}
+
+static void add_repack_all_option(struct string_list *keep_pack)
{
if (prune_expire && !strcmp(prune_expire, "now"))
argv_array_push(&repack, "-a");
@@ -197,6 +307,9 @@ static void add_repack_all_option(void)
if (prune_expire)
argv_array_pushf(&repack, "--unpack-unreachable=%s", prune_expire);
}
+
+ if (keep_pack)
+ for_each_string_list(keep_pack, keep_one_pack, NULL);
}
static void add_repack_incremental_option(void)
@@ -219,9 +332,35 @@ static int need_to_gc(void)
* we run "repack -A -d -l". Otherwise we tell the caller
* there is no need.
*/
- if (too_many_packs())
- add_repack_all_option();
- else if (too_many_loose_objects())
+ if (too_many_packs()) {
+ struct string_list keep_pack = STRING_LIST_INIT_NODUP;
+
+ if (big_pack_threshold) {
+ find_base_packs(&keep_pack, big_pack_threshold);
+ if (keep_pack.nr >= gc_auto_pack_limit) {
+ big_pack_threshold = 0;
+ string_list_clear(&keep_pack, 0);
+ find_base_packs(&keep_pack, 0);
+ }
+ } else {
+ struct packed_git *p = find_base_packs(&keep_pack, 0);
+ uint64_t mem_have, mem_want;
+
+ mem_have = total_ram();
+ mem_want = estimate_repack_memory(p);
+
+ /*
+ * Only allow 1/2 of memory for pack-objects, leave
+ * the rest for the OS and other processes in the
+ * system.
+ */
+ if (!mem_have || mem_want < mem_have / 2)
+ string_list_clear(&keep_pack, 0);
+ }
+
+ add_repack_all_option(&keep_pack);
+ string_list_clear(&keep_pack, 0);
+ } else if (too_many_loose_objects())
add_repack_incremental_option();
else
return 0;
@@ -354,6 +493,7 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
const char *name;
pid_t pid;
int daemonized = 0;
+ int keep_base_pack = -1;
timestamp_t dummy;
struct option builtin_gc_options[] = {
@@ -367,6 +507,8 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
OPT_BOOL_F(0, "force", &force,
N_("force running gc even if there may be another gc running"),
PARSE_OPT_NOCOMPLETE),
+ OPT_BOOL(0, "keep-largest-pack", &keep_base_pack,
+ N_("repack all other packs except the largest pack")),
OPT_END()
};
@@ -435,8 +577,19 @@ int cmd_gc(int argc, const char **argv, const char *prefix)
*/
daemonized = !daemonize();
}
- } else
- add_repack_all_option();
+ } else {
+ struct string_list keep_pack = STRING_LIST_INIT_NODUP;
+
+ if (keep_base_pack != -1) {
+ if (keep_base_pack)
+ find_base_packs(&keep_pack, 0);
+ } else if (big_pack_threshold) {
+ find_base_packs(&keep_pack, big_pack_threshold);
+ }
+
+ add_repack_all_option(&keep_pack);
+ string_list_clear(&keep_pack, 0);
+ }
name = lock_repo_for_gc(force, &pid);
if (name) {