From 99bf115c879af7e38ef0ca9596fc9db1d6598d5f Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:24 -0700 Subject: repository: introduce parsed objects field Convert the existing global cache for parsed objects (obj_hash) into repository-specific parsed object caches. Existing code that uses obj_hash are modified to use the parsed object cache of the_repository; future patches will use the parsed object caches of other repositories. Another future use case for a pool of objects is ease of memory management in revision walking: If we can free the rev-list related memory early in pack-objects (e.g. part of repack operation) then it could lower memory pressure significantly when running on large repos. While this has been discussed on the mailing list lately, this series doesn't implement this. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/object.c b/object.c index 5044d08..f7c624a 100644 --- a/object.c +++ b/object.c @@ -8,17 +8,14 @@ #include "object-store.h" #include "packfile.h" -static struct object **obj_hash; -static int nr_objs, obj_hash_size; - unsigned int get_max_object_index(void) { - return obj_hash_size; + return the_repository->parsed_objects->obj_hash_size; } struct object *get_indexed_object(unsigned int idx) { - return obj_hash[idx]; + return the_repository->parsed_objects->obj_hash[idx]; } static const char *object_type_strings[] = { @@ -90,15 +87,16 @@ struct object *lookup_object(const unsigned char *sha1) unsigned int i, first; struct object *obj; - if (!obj_hash) + if (!the_repository->parsed_objects->obj_hash) return NULL; - first = i = hash_obj(sha1, obj_hash_size); - while ((obj = obj_hash[i]) != NULL) { + first = i = hash_obj(sha1, + the_repository->parsed_objects->obj_hash_size); + while ((obj = the_repository->parsed_objects->obj_hash[i]) != NULL) { if (!hashcmp(sha1, obj->oid.hash)) break; i++; - if (i == obj_hash_size) + if (i == the_repository->parsed_objects->obj_hash_size) i = 0; } if (obj && i != first) { @@ -107,7 +105,8 @@ struct object *lookup_object(const unsigned char *sha1) * that we do not need to walk the hash table the next * time we look for it. */ - SWAP(obj_hash[i], obj_hash[first]); + SWAP(the_repository->parsed_objects->obj_hash[i], + the_repository->parsed_objects->obj_hash[first]); } return obj; } @@ -124,19 +123,19 @@ static void grow_object_hash(void) * Note that this size must always be power-of-2 to match hash_obj * above. */ - int new_hash_size = obj_hash_size < 32 ? 32 : 2 * obj_hash_size; + int new_hash_size = the_repository->parsed_objects->obj_hash_size < 32 ? 32 : 2 * the_repository->parsed_objects->obj_hash_size; struct object **new_hash; new_hash = xcalloc(new_hash_size, sizeof(struct object *)); - for (i = 0; i < obj_hash_size; i++) { - struct object *obj = obj_hash[i]; + for (i = 0; i < the_repository->parsed_objects->obj_hash_size; i++) { + struct object *obj = the_repository->parsed_objects->obj_hash[i]; if (!obj) continue; insert_obj_hash(obj, new_hash, new_hash_size); } - free(obj_hash); - obj_hash = new_hash; - obj_hash_size = new_hash_size; + free(the_repository->parsed_objects->obj_hash); + the_repository->parsed_objects->obj_hash = new_hash; + the_repository->parsed_objects->obj_hash_size = new_hash_size; } void *create_object(const unsigned char *sha1, void *o) @@ -147,11 +146,12 @@ void *create_object(const unsigned char *sha1, void *o) obj->flags = 0; hashcpy(obj->oid.hash, sha1); - if (obj_hash_size - 1 <= nr_objs * 2) + if (the_repository->parsed_objects->obj_hash_size - 1 <= the_repository->parsed_objects->nr_objs * 2) grow_object_hash(); - insert_obj_hash(obj, obj_hash, obj_hash_size); - nr_objs++; + insert_obj_hash(obj, the_repository->parsed_objects->obj_hash, + the_repository->parsed_objects->obj_hash_size); + the_repository->parsed_objects->nr_objs++; return obj; } @@ -431,8 +431,8 @@ void clear_object_flags(unsigned flags) { int i; - for (i=0; i < obj_hash_size; i++) { - struct object *obj = obj_hash[i]; + for (i=0; i < the_repository->parsed_objects->obj_hash_size; i++) { + struct object *obj = the_repository->parsed_objects->obj_hash[i]; if (obj) obj->flags &= ~flags; } @@ -442,13 +442,20 @@ void clear_commit_marks_all(unsigned int flags) { int i; - for (i = 0; i < obj_hash_size; i++) { - struct object *obj = obj_hash[i]; + for (i = 0; i < the_repository->parsed_objects->obj_hash_size; i++) { + struct object *obj = the_repository->parsed_objects->obj_hash[i]; if (obj && obj->type == OBJ_COMMIT) obj->flags &= ~flags; } } +struct parsed_object_pool *parsed_object_pool_new(void) +{ + struct parsed_object_pool *o = xmalloc(sizeof(*o)); + memset(o, 0, sizeof(*o)); + return o; +} + struct raw_object_store *raw_object_store_new(void) { struct raw_object_store *o = xmalloc(sizeof(*o)); @@ -488,3 +495,13 @@ void raw_object_store_clear(struct raw_object_store *o) close_all_packs(o); o->packed_git = NULL; } + +void parsed_object_pool_clear(struct parsed_object_pool *o) +{ + /* + * TOOD free objects in o->obj_hash. + * + * As objects are allocated in slabs (see alloc.c), we do + * not need to free each object, but each slab instead. + */ +} diff --git a/object.h b/object.h index f13f85b..cecda7d 100644 --- a/object.h +++ b/object.h @@ -1,6 +1,14 @@ #ifndef OBJECT_H #define OBJECT_H +struct parsed_object_pool { + struct object **obj_hash; + int nr_objs, obj_hash_size; +}; + +struct parsed_object_pool *parsed_object_pool_new(void); +void parsed_object_pool_clear(struct parsed_object_pool *o); + struct object_list { struct object *item; struct object_list *next; diff --git a/repository.c b/repository.c index a4848c1..c234046 100644 --- a/repository.c +++ b/repository.c @@ -2,6 +2,7 @@ #include "repository.h" #include "object-store.h" #include "config.h" +#include "object.h" #include "submodule-config.h" /* The main repository */ @@ -14,6 +15,8 @@ void initialize_the_repository(void) the_repo.index = &the_index; the_repo.objects = raw_object_store_new(); + the_repo.parsed_objects = parsed_object_pool_new(); + repo_set_hash_algo(&the_repo, GIT_HASH_SHA1); } @@ -143,6 +146,7 @@ static int repo_init(struct repository *repo, memset(repo, 0, sizeof(*repo)); repo->objects = raw_object_store_new(); + repo->parsed_objects = parsed_object_pool_new(); if (repo_init_gitdir(repo, gitdir)) goto error; @@ -226,6 +230,9 @@ void repo_clear(struct repository *repo) raw_object_store_clear(repo->objects); FREE_AND_NULL(repo->objects); + parsed_object_pool_clear(repo->parsed_objects); + FREE_AND_NULL(repo->parsed_objects); + if (repo->config) { git_configset_clear(repo->config); FREE_AND_NULL(repo->config); diff --git a/repository.h b/repository.h index e6e00f5..6d19981 100644 --- a/repository.h +++ b/repository.h @@ -26,6 +26,15 @@ struct repository { */ struct raw_object_store *objects; + /* + * All objects in this repository that have been parsed. This structure + * owns all objects it references, so users of "struct object *" + * generally do not need to free them; instead, when a repository is no + * longer used, call parsed_object_pool_clear() on this structure, which + * is called by the repositories repo_clear on its desconstruction. + */ + struct parsed_object_pool *parsed_objects; + /* The store in which the refs are held. */ struct ref_store *refs; -- cgit v0.10.2-6-g49f6 From 68f95d382b51b134b138c91f94adb8d9ef2f557a Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:25 -0700 Subject: object: add repository argument to create_object Add a repository argument to allow the callers of create_object to be more specific about which repository to act on. This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Signed-off-by: Jonathan Nieder Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/blob.c b/blob.c index fa2ab4f..85c2143 100644 --- a/blob.c +++ b/blob.c @@ -1,5 +1,6 @@ #include "cache.h" #include "blob.h" +#include "repository.h" const char *blob_type = "blob"; @@ -7,7 +8,8 @@ struct blob *lookup_blob(const struct object_id *oid) { struct object *obj = lookup_object(oid->hash); if (!obj) - return create_object(oid->hash, alloc_blob_node()); + return create_object(the_repository, oid->hash, + alloc_blob_node()); return object_as_type(obj, OBJ_BLOB, 0); } diff --git a/commit.c b/commit.c index ca474a7..9106acf 100644 --- a/commit.c +++ b/commit.c @@ -50,7 +50,8 @@ struct commit *lookup_commit(const struct object_id *oid) { struct object *obj = lookup_object(oid->hash); if (!obj) - return create_object(oid->hash, alloc_commit_node()); + return create_object(the_repository, oid->hash, + alloc_commit_node()); return object_as_type(obj, OBJ_COMMIT, 0); } diff --git a/object.c b/object.c index f7c624a..2de0292 100644 --- a/object.c +++ b/object.c @@ -138,7 +138,7 @@ static void grow_object_hash(void) the_repository->parsed_objects->obj_hash_size = new_hash_size; } -void *create_object(const unsigned char *sha1, void *o) +void *create_object_the_repository(const unsigned char *sha1, void *o) { struct object *obj = o; @@ -178,7 +178,8 @@ struct object *lookup_unknown_object(const unsigned char *sha1) { struct object *obj = lookup_object(sha1); if (!obj) - obj = create_object(sha1, alloc_object_node()); + obj = create_object(the_repository, sha1, + alloc_object_node()); return obj; } diff --git a/object.h b/object.h index cecda7d..2cb0b24 100644 --- a/object.h +++ b/object.h @@ -93,7 +93,8 @@ extern struct object *get_indexed_object(unsigned int); */ struct object *lookup_object(const unsigned char *sha1); -extern void *create_object(const unsigned char *sha1, void *obj); +#define create_object(r, s, o) create_object_##r(s, o) +extern void *create_object_the_repository(const unsigned char *sha1, void *obj); void *object_as_type(struct object *obj, enum object_type type, int quiet); diff --git a/tag.c b/tag.c index 3d37c1b..7150b75 100644 --- a/tag.c +++ b/tag.c @@ -93,7 +93,8 @@ struct tag *lookup_tag(const struct object_id *oid) { struct object *obj = lookup_object(oid->hash); if (!obj) - return create_object(oid->hash, alloc_tag_node()); + return create_object(the_repository, oid->hash, + alloc_tag_node()); return object_as_type(obj, OBJ_TAG, 0); } diff --git a/tree.c b/tree.c index 1c68ea5..63730e3 100644 --- a/tree.c +++ b/tree.c @@ -196,7 +196,8 @@ struct tree *lookup_tree(const struct object_id *oid) { struct object *obj = lookup_object(oid->hash); if (!obj) - return create_object(oid->hash, alloc_tree_node()); + return create_object(the_repository, oid->hash, + alloc_tree_node()); return object_as_type(obj, OBJ_TREE, 0); } -- cgit v0.10.2-6-g49f6 From c077a4526ba456ada28d16db9c945afd9a4a57de Mon Sep 17 00:00:00 2001 From: Jonathan Nieder Date: Tue, 8 May 2018 12:37:26 -0700 Subject: object: add repository argument to grow_object_hash Add a repository argument to allow the caller of grow_object_hash to be more specific about which repository to handle. This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Signed-off-by: Jonathan Nieder Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/object.c b/object.c index 2de0292..91edc30 100644 --- a/object.c +++ b/object.c @@ -116,7 +116,8 @@ struct object *lookup_object(const unsigned char *sha1) * power of 2 (but at least 32). Copy the existing values to the new * hash map. */ -static void grow_object_hash(void) +#define grow_object_hash(r) grow_object_hash_##r() +static void grow_object_hash_the_repository(void) { int i; /* @@ -147,7 +148,7 @@ void *create_object_the_repository(const unsigned char *sha1, void *o) hashcpy(obj->oid.hash, sha1); if (the_repository->parsed_objects->obj_hash_size - 1 <= the_repository->parsed_objects->nr_objs * 2) - grow_object_hash(); + grow_object_hash(the_repository); insert_obj_hash(obj, the_repository->parsed_objects->obj_hash, the_repository->parsed_objects->obj_hash_size); -- cgit v0.10.2-6-g49f6 From f0de1d62ae5982630cfb2c713ddcda853b9ee0cf Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:27 -0700 Subject: alloc: add repository argument to alloc_blob_node This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Use a macro to catch callers passing a repository other than the_repository at compile time. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/alloc.c b/alloc.c index 12afadf..6c5c376 100644 --- a/alloc.c +++ b/alloc.c @@ -49,7 +49,7 @@ static inline void *alloc_node(struct alloc_state *s, size_t node_size) } static struct alloc_state blob_state; -void *alloc_blob_node(void) +void *alloc_blob_node_the_repository(void) { struct blob *b = alloc_node(&blob_state, sizeof(struct blob)); b->object.type = OBJ_BLOB; diff --git a/blob.c b/blob.c index 85c2143..9e64f30 100644 --- a/blob.c +++ b/blob.c @@ -9,7 +9,7 @@ struct blob *lookup_blob(const struct object_id *oid) struct object *obj = lookup_object(oid->hash); if (!obj) return create_object(the_repository, oid->hash, - alloc_blob_node()); + alloc_blob_node(the_repository)); return object_as_type(obj, OBJ_BLOB, 0); } diff --git a/cache.h b/cache.h index 3a4d80e..2258e61 100644 --- a/cache.h +++ b/cache.h @@ -1764,7 +1764,8 @@ int decode_85(char *dst, const char *line, int linelen); void encode_85(char *buf, const unsigned char *data, int bytes); /* alloc.c */ -extern void *alloc_blob_node(void); +#define alloc_blob_node(r) alloc_blob_node_##r() +extern void *alloc_blob_node_the_repository(void); extern void *alloc_tree_node(void); extern void *alloc_commit_node(void); extern void *alloc_tag_node(void); -- cgit v0.10.2-6-g49f6 From cf7203bdc65f11de89d27f52115cb98052c52ce8 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:28 -0700 Subject: alloc: add repository argument to alloc_tree_node This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Use a macro to catch callers passing a repository other than the_repository at compile time. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/alloc.c b/alloc.c index 6c5c376..2c8d143 100644 --- a/alloc.c +++ b/alloc.c @@ -57,7 +57,7 @@ void *alloc_blob_node_the_repository(void) } static struct alloc_state tree_state; -void *alloc_tree_node(void) +void *alloc_tree_node_the_repository(void) { struct tree *t = alloc_node(&tree_state, sizeof(struct tree)); t->object.type = OBJ_TREE; diff --git a/cache.h b/cache.h index 2258e61..1717d07 100644 --- a/cache.h +++ b/cache.h @@ -1766,7 +1766,8 @@ void encode_85(char *buf, const unsigned char *data, int bytes); /* alloc.c */ #define alloc_blob_node(r) alloc_blob_node_##r() extern void *alloc_blob_node_the_repository(void); -extern void *alloc_tree_node(void); +#define alloc_tree_node(r) alloc_tree_node_##r() +extern void *alloc_tree_node_the_repository(void); extern void *alloc_commit_node(void); extern void *alloc_tag_node(void); extern void *alloc_object_node(void); diff --git a/tree.c b/tree.c index 63730e3..58cf19b 100644 --- a/tree.c +++ b/tree.c @@ -197,7 +197,7 @@ struct tree *lookup_tree(const struct object_id *oid) struct object *obj = lookup_object(oid->hash); if (!obj) return create_object(the_repository, oid->hash, - alloc_tree_node()); + alloc_tree_node(the_repository)); return object_as_type(obj, OBJ_TREE, 0); } -- cgit v0.10.2-6-g49f6 From 8ba0e5ec57e4a7da18f735416e3028a9a8b8b1ad Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:29 -0700 Subject: alloc: add repository argument to alloc_commit_node This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Use a macro to catch callers passing a repository other than the_repository at compile time. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/alloc.c b/alloc.c index 2c8d143..9e2b897 100644 --- a/alloc.c +++ b/alloc.c @@ -88,7 +88,7 @@ unsigned int alloc_commit_index(void) return count++; } -void *alloc_commit_node(void) +void *alloc_commit_node_the_repository(void) { struct commit *c = alloc_node(&commit_state, sizeof(struct commit)); c->object.type = OBJ_COMMIT; diff --git a/blame.c b/blame.c index dfa2447..ba9b18e 100644 --- a/blame.c +++ b/blame.c @@ -161,7 +161,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt, read_cache(); time(&now); - commit = alloc_commit_node(); + commit = alloc_commit_node(the_repository); commit->object.parsed = 1; commit->date = now; parent_tail = &commit->parents; diff --git a/cache.h b/cache.h index 1717d07..bf6e8c8 100644 --- a/cache.h +++ b/cache.h @@ -1768,7 +1768,8 @@ void encode_85(char *buf, const unsigned char *data, int bytes); extern void *alloc_blob_node_the_repository(void); #define alloc_tree_node(r) alloc_tree_node_##r() extern void *alloc_tree_node_the_repository(void); -extern void *alloc_commit_node(void); +#define alloc_commit_node(r) alloc_commit_node_##r() +extern void *alloc_commit_node_the_repository(void); extern void *alloc_tag_node(void); extern void *alloc_object_node(void); extern void alloc_report(void); diff --git a/commit.c b/commit.c index 9106acf..a9a43e7 100644 --- a/commit.c +++ b/commit.c @@ -51,7 +51,7 @@ struct commit *lookup_commit(const struct object_id *oid) struct object *obj = lookup_object(oid->hash); if (!obj) return create_object(the_repository, oid->hash, - alloc_commit_node()); + alloc_commit_node(the_repository)); return object_as_type(obj, OBJ_COMMIT, 0); } diff --git a/merge-recursive.c b/merge-recursive.c index 0c0d486..6dac890 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -98,7 +98,7 @@ static struct tree *shift_tree_object(struct tree *one, struct tree *two, static struct commit *make_virtual_commit(struct tree *tree, const char *comment) { - struct commit *commit = alloc_commit_node(); + struct commit *commit = alloc_commit_node(the_repository); set_merge_remote_desc(commit, comment, (struct object *)commit); commit->tree = tree; -- cgit v0.10.2-6-g49f6 From a0bd9086bb66fa8cb84bd4fac6441699121e1327 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:30 -0700 Subject: alloc: add repository argument to alloc_tag_node This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Use a macro to catch callers passing a repository other than the_repository at compile time. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/alloc.c b/alloc.c index 9e2b897..290250e 100644 --- a/alloc.c +++ b/alloc.c @@ -65,7 +65,7 @@ void *alloc_tree_node_the_repository(void) } static struct alloc_state tag_state; -void *alloc_tag_node(void) +void *alloc_tag_node_the_repository(void) { struct tag *t = alloc_node(&tag_state, sizeof(struct tag)); t->object.type = OBJ_TAG; diff --git a/cache.h b/cache.h index bf6e8c8..32f340c 100644 --- a/cache.h +++ b/cache.h @@ -1770,7 +1770,8 @@ extern void *alloc_blob_node_the_repository(void); extern void *alloc_tree_node_the_repository(void); #define alloc_commit_node(r) alloc_commit_node_##r() extern void *alloc_commit_node_the_repository(void); -extern void *alloc_tag_node(void); +#define alloc_tag_node(r) alloc_tag_node_##r() +extern void *alloc_tag_node_the_repository(void); extern void *alloc_object_node(void); extern void alloc_report(void); extern unsigned int alloc_commit_index(void); diff --git a/tag.c b/tag.c index 7150b75..02ef4ea 100644 --- a/tag.c +++ b/tag.c @@ -94,7 +94,7 @@ struct tag *lookup_tag(const struct object_id *oid) struct object *obj = lookup_object(oid->hash); if (!obj) return create_object(the_repository, oid->hash, - alloc_tag_node()); + alloc_tag_node(the_repository)); return object_as_type(obj, OBJ_TAG, 0); } -- cgit v0.10.2-6-g49f6 From 13e3fdcb767fe860953f8c27eb1985bd5d15674d Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:31 -0700 Subject: alloc: add repository argument to alloc_object_node This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Use a macro to catch callers passing a repository other than the_repository at compile time. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/alloc.c b/alloc.c index 290250e..f031ce4 100644 --- a/alloc.c +++ b/alloc.c @@ -73,7 +73,7 @@ void *alloc_tag_node_the_repository(void) } static struct alloc_state object_state; -void *alloc_object_node(void) +void *alloc_object_node_the_repository(void) { struct object *obj = alloc_node(&object_state, sizeof(union any_object)); obj->type = OBJ_NONE; diff --git a/cache.h b/cache.h index 32f340c..2d60359 100644 --- a/cache.h +++ b/cache.h @@ -1772,7 +1772,8 @@ extern void *alloc_tree_node_the_repository(void); extern void *alloc_commit_node_the_repository(void); #define alloc_tag_node(r) alloc_tag_node_##r() extern void *alloc_tag_node_the_repository(void); -extern void *alloc_object_node(void); +#define alloc_object_node(r) alloc_object_node_##r() +extern void *alloc_object_node_the_repository(void); extern void alloc_report(void); extern unsigned int alloc_commit_index(void); diff --git a/object.c b/object.c index 91edc30..b8c3f92 100644 --- a/object.c +++ b/object.c @@ -180,7 +180,7 @@ struct object *lookup_unknown_object(const unsigned char *sha1) struct object *obj = lookup_object(sha1); if (!obj) obj = create_object(the_repository, sha1, - alloc_object_node()); + alloc_object_node(the_repository)); return obj; } -- cgit v0.10.2-6-g49f6 From 17bfe87369a726ebf3b1156ffd38c014bbd67e88 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:32 -0700 Subject: alloc: add repository argument to alloc_report This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Use a macro to catch callers passing a repository other than the_repository at compile time. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/alloc.c b/alloc.c index f031ce4..28b85b2 100644 --- a/alloc.c +++ b/alloc.c @@ -105,7 +105,7 @@ static void report(const char *name, unsigned int count, size_t size) #define REPORT(name, type) \ report(#name, name##_state.count, name##_state.count * sizeof(type) >> 10) -void alloc_report(void) +void alloc_report_the_repository(void) { REPORT(blob, struct blob); REPORT(tree, struct tree); diff --git a/cache.h b/cache.h index 2d60359..01cc207 100644 --- a/cache.h +++ b/cache.h @@ -1774,7 +1774,8 @@ extern void *alloc_commit_node_the_repository(void); extern void *alloc_tag_node_the_repository(void); #define alloc_object_node(r) alloc_object_node_##r() extern void *alloc_object_node_the_repository(void); -extern void alloc_report(void); +#define alloc_report(r) alloc_report_##r() +extern void alloc_report_the_repository(void); extern unsigned int alloc_commit_index(void); /* pkt-line.c */ -- cgit v0.10.2-6-g49f6 From dd5d9deb0155a27cb2d74d1a0faf0af84ef5f355 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:33 -0700 Subject: alloc: add repository argument to alloc_commit_index This is a small mechanical change; it doesn't change the implementation to handle repositories other than the_repository yet. Use a macro to catch callers passing a repository other than the_repository at compile time. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/alloc.c b/alloc.c index 28b85b2..277dadd 100644 --- a/alloc.c +++ b/alloc.c @@ -82,7 +82,7 @@ void *alloc_object_node_the_repository(void) static struct alloc_state commit_state; -unsigned int alloc_commit_index(void) +unsigned int alloc_commit_index_the_repository(void) { static unsigned int count; return count++; @@ -92,7 +92,7 @@ void *alloc_commit_node_the_repository(void) { struct commit *c = alloc_node(&commit_state, sizeof(struct commit)); c->object.type = OBJ_COMMIT; - c->index = alloc_commit_index(); + c->index = alloc_commit_index(the_repository); return c; } diff --git a/cache.h b/cache.h index 01cc207..0e6c5dd 100644 --- a/cache.h +++ b/cache.h @@ -1776,7 +1776,8 @@ extern void *alloc_tag_node_the_repository(void); extern void *alloc_object_node_the_repository(void); #define alloc_report(r) alloc_report_##r() extern void alloc_report_the_repository(void); -extern unsigned int alloc_commit_index(void); +#define alloc_commit_index(r) alloc_commit_index_##r() +extern unsigned int alloc_commit_index_the_repository(void); /* pkt-line.c */ void packet_trace_identity(const char *prog); diff --git a/object.c b/object.c index b8c3f92..a365a91 100644 --- a/object.c +++ b/object.c @@ -162,7 +162,7 @@ void *object_as_type(struct object *obj, enum object_type type, int quiet) return obj; else if (obj->type == OBJ_NONE) { if (type == OBJ_COMMIT) - ((struct commit *)obj)->index = alloc_commit_index(); + ((struct commit *)obj)->index = alloc_commit_index(the_repository); obj->type = type; return obj; } -- cgit v0.10.2-6-g49f6 From 346a817a7271b58811a8004fa225ab88bc94e9c3 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:34 -0700 Subject: object: allow grow_object_hash to handle arbitrary repositories Reviewed-by: Jonathan Tan Signed-off-by: Jonathan Nieder Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/object.c b/object.c index a365a91..0fcd6f6 100644 --- a/object.c +++ b/object.c @@ -116,27 +116,27 @@ struct object *lookup_object(const unsigned char *sha1) * power of 2 (but at least 32). Copy the existing values to the new * hash map. */ -#define grow_object_hash(r) grow_object_hash_##r() -static void grow_object_hash_the_repository(void) +static void grow_object_hash(struct repository *r) { int i; /* * Note that this size must always be power-of-2 to match hash_obj * above. */ - int new_hash_size = the_repository->parsed_objects->obj_hash_size < 32 ? 32 : 2 * the_repository->parsed_objects->obj_hash_size; + int new_hash_size = r->parsed_objects->obj_hash_size < 32 ? 32 : 2 * r->parsed_objects->obj_hash_size; struct object **new_hash; new_hash = xcalloc(new_hash_size, sizeof(struct object *)); - for (i = 0; i < the_repository->parsed_objects->obj_hash_size; i++) { - struct object *obj = the_repository->parsed_objects->obj_hash[i]; + for (i = 0; i < r->parsed_objects->obj_hash_size; i++) { + struct object *obj = r->parsed_objects->obj_hash[i]; + if (!obj) continue; insert_obj_hash(obj, new_hash, new_hash_size); } - free(the_repository->parsed_objects->obj_hash); - the_repository->parsed_objects->obj_hash = new_hash; - the_repository->parsed_objects->obj_hash_size = new_hash_size; + free(r->parsed_objects->obj_hash); + r->parsed_objects->obj_hash = new_hash; + r->parsed_objects->obj_hash_size = new_hash_size; } void *create_object_the_repository(const unsigned char *sha1, void *o) -- cgit v0.10.2-6-g49f6 From 341e45e46bba094ef1274957ef5891f43e91b344 Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 8 May 2018 12:37:35 -0700 Subject: object: allow create_object to handle arbitrary repositories Reviewed-by: Jonathan Tan Signed-off-by: Jonathan Nieder Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/object.c b/object.c index 0fcd6f6..49b952e 100644 --- a/object.c +++ b/object.c @@ -139,7 +139,7 @@ static void grow_object_hash(struct repository *r) r->parsed_objects->obj_hash_size = new_hash_size; } -void *create_object_the_repository(const unsigned char *sha1, void *o) +void *create_object(struct repository *r, const unsigned char *sha1, void *o) { struct object *obj = o; @@ -147,12 +147,12 @@ void *create_object_the_repository(const unsigned char *sha1, void *o) obj->flags = 0; hashcpy(obj->oid.hash, sha1); - if (the_repository->parsed_objects->obj_hash_size - 1 <= the_repository->parsed_objects->nr_objs * 2) - grow_object_hash(the_repository); + if (r->parsed_objects->obj_hash_size - 1 <= r->parsed_objects->nr_objs * 2) + grow_object_hash(r); - insert_obj_hash(obj, the_repository->parsed_objects->obj_hash, - the_repository->parsed_objects->obj_hash_size); - the_repository->parsed_objects->nr_objs++; + insert_obj_hash(obj, r->parsed_objects->obj_hash, + r->parsed_objects->obj_hash_size); + r->parsed_objects->nr_objs++; return obj; } diff --git a/object.h b/object.h index 2cb0b24..b41d7a3 100644 --- a/object.h +++ b/object.h @@ -93,8 +93,7 @@ extern struct object *get_indexed_object(unsigned int); */ struct object *lookup_object(const unsigned char *sha1); -#define create_object(r, s, o) create_object_##r(s, o) -extern void *create_object_the_repository(const unsigned char *sha1, void *obj); +extern void *create_object(struct repository *r, const unsigned char *sha1, void *obj); void *object_as_type(struct object *obj, enum object_type type, int quiet); -- cgit v0.10.2-6-g49f6 From 14ba97f81c7b94e10d591b363688a073023f332d Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 15 May 2018 14:48:42 -0700 Subject: alloc: allow arbitrary repositories for alloc functions We have to convert all of the alloc functions at once, because alloc_report uses a funky macro for reporting. It is better for the sake of mechanical conversion to convert multiple functions at once rather than changing the structure of the reporting function. We record all memory allocation in alloc.c, and free them in clear_alloc_state, which is called for all repositories except the_repository. Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano diff --git a/alloc.c b/alloc.c index 277dadd..714df63 100644 --- a/alloc.c +++ b/alloc.c @@ -4,8 +4,7 @@ * Copyright (C) 2006 Linus Torvalds * * The standard malloc/free wastes too much space for objects, partly because - * it maintains all the allocation infrastructure (which isn't needed, since - * we never free an object descriptor anyway), but even more because it ends + * it maintains all the allocation infrastructure, but even more because it ends * up with maximal alignment because it doesn't know what the object alignment * for the new allocation is. */ @@ -15,6 +14,7 @@ #include "tree.h" #include "commit.h" #include "tag.h" +#include "alloc.h" #define BLOCKING 1024 @@ -30,8 +30,27 @@ struct alloc_state { int count; /* total number of nodes allocated */ int nr; /* number of nodes left in current allocation */ void *p; /* first free node in current allocation */ + + /* bookkeeping of allocations */ + void **slabs; + int slab_nr, slab_alloc; }; +void *allocate_alloc_state(void) +{ + return xcalloc(1, sizeof(struct alloc_state)); +} + +void clear_alloc_state(struct alloc_state *s) +{ + while (s->slab_nr > 0) { + s->slab_nr--; + free(s->slabs[s->slab_nr]); + } + + FREE_AND_NULL(s->slabs); +} + static inline void *alloc_node(struct alloc_state *s, size_t node_size) { void *ret; @@ -39,60 +58,57 @@ static inline void *alloc_node(struct alloc_state *s, size_t node_size) if (!s->nr) { s->nr = BLOCKING; s->p = xmalloc(BLOCKING * node_size); + + ALLOC_GROW(s->slabs, s->slab_nr + 1, s->slab_alloc); + s->slabs[s->slab_nr++] = s->p; } s->nr--; s->count++; ret = s->p; s->p = (char *)s->p + node_size; memset(ret, 0, node_size); + return ret; } -static struct alloc_state blob_state; -void *alloc_blob_node_the_repository(void) +void *alloc_blob_node(struct repository *r) { - struct blob *b = alloc_node(&blob_state, sizeof(struct blob)); + struct blob *b = alloc_node(r->parsed_objects->blob_state, sizeof(struct blob)); b->object.type = OBJ_BLOB; return b; } -static struct alloc_state tree_state; -void *alloc_tree_node_the_repository(void) +void *alloc_tree_node(struct repository *r) { - struct tree *t = alloc_node(&tree_state, sizeof(struct tree)); + struct tree *t = alloc_node(r->parsed_objects->tree_state, sizeof(struct tree)); t->object.type = OBJ_TREE; return t; } -static struct alloc_state tag_state; -void *alloc_tag_node_the_repository(void) +void *alloc_tag_node(struct repository *r) { - struct tag *t = alloc_node(&tag_state, sizeof(struct tag)); + struct tag *t = alloc_node(r->parsed_objects->tag_state, sizeof(struct tag)); t->object.type = OBJ_TAG; return t; } -static struct alloc_state object_state; -void *alloc_object_node_the_repository(void) +void *alloc_object_node(struct repository *r) { - struct object *obj = alloc_node(&object_state, sizeof(union any_object)); + struct object *obj = alloc_node(r->parsed_objects->object_state, sizeof(union any_object)); obj->type = OBJ_NONE; return obj; } -static struct alloc_state commit_state; - -unsigned int alloc_commit_index_the_repository(void) +unsigned int alloc_commit_index(struct repository *r) { - static unsigned int count; - return count++; + return r->parsed_objects->commit_count++; } -void *alloc_commit_node_the_repository(void) +void *alloc_commit_node(struct repository *r) { - struct commit *c = alloc_node(&commit_state, sizeof(struct commit)); + struct commit *c = alloc_node(r->parsed_objects->commit_state, sizeof(struct commit)); c->object.type = OBJ_COMMIT; - c->index = alloc_commit_index(the_repository); + c->index = alloc_commit_index(r); return c; } @@ -103,9 +119,10 @@ static void report(const char *name, unsigned int count, size_t size) } #define REPORT(name, type) \ - report(#name, name##_state.count, name##_state.count * sizeof(type) >> 10) + report(#name, r->parsed_objects->name##_state->count, \ + r->parsed_objects->name##_state->count * sizeof(type) >> 10) -void alloc_report_the_repository(void) +void alloc_report(struct repository *r) { REPORT(blob, struct blob); REPORT(tree, struct tree); diff --git a/alloc.h b/alloc.h new file mode 100644 index 0000000..3e4e828 --- /dev/null +++ b/alloc.h @@ -0,0 +1,19 @@ +#ifndef ALLOC_H +#define ALLOC_H + +struct tree; +struct commit; +struct tag; + +void *alloc_blob_node(struct repository *r); +void *alloc_tree_node(struct repository *r); +void *alloc_commit_node(struct repository *r); +void *alloc_tag_node(struct repository *r); +void *alloc_object_node(struct repository *r); +void alloc_report(struct repository *r); +unsigned int alloc_commit_index(struct repository *r); + +void *allocate_alloc_state(void); +void clear_alloc_state(struct alloc_state *s); + +#endif diff --git a/blame.c b/blame.c index ba9b18e..3a11f1c 100644 --- a/blame.c +++ b/blame.c @@ -6,6 +6,7 @@ #include "diffcore.h" #include "tag.h" #include "blame.h" +#include "alloc.h" void blame_origin_decref(struct blame_origin *o) { diff --git a/blob.c b/blob.c index 9e64f30..458dafa 100644 --- a/blob.c +++ b/blob.c @@ -1,6 +1,7 @@ #include "cache.h" #include "blob.h" #include "repository.h" +#include "alloc.h" const char *blob_type = "blob"; diff --git a/cache.h b/cache.h index 0e6c5dd..c75559b 100644 --- a/cache.h +++ b/cache.h @@ -1763,22 +1763,6 @@ extern const char *excludes_file; int decode_85(char *dst, const char *line, int linelen); void encode_85(char *buf, const unsigned char *data, int bytes); -/* alloc.c */ -#define alloc_blob_node(r) alloc_blob_node_##r() -extern void *alloc_blob_node_the_repository(void); -#define alloc_tree_node(r) alloc_tree_node_##r() -extern void *alloc_tree_node_the_repository(void); -#define alloc_commit_node(r) alloc_commit_node_##r() -extern void *alloc_commit_node_the_repository(void); -#define alloc_tag_node(r) alloc_tag_node_##r() -extern void *alloc_tag_node_the_repository(void); -#define alloc_object_node(r) alloc_object_node_##r() -extern void *alloc_object_node_the_repository(void); -#define alloc_report(r) alloc_report_##r() -extern void alloc_report_the_repository(void); -#define alloc_commit_index(r) alloc_commit_index_##r() -extern unsigned int alloc_commit_index_the_repository(void); - /* pkt-line.c */ void packet_trace_identity(const char *prog); diff --git a/commit.c b/commit.c index a9a43e7..5eb4d2f 100644 --- a/commit.c +++ b/commit.c @@ -6,6 +6,7 @@ #include "diff.h" #include "revision.h" #include "notes.h" +#include "alloc.h" #include "gpg-interface.h" #include "mergesort.h" #include "commit-slab.h" @@ -296,6 +297,17 @@ void free_commit_buffer(struct commit *commit) } } +void release_commit_memory(struct commit *c) +{ + c->tree = NULL; + c->index = 0; + free_commit_buffer(c); + free_commit_list(c->parents); + /* TODO: what about commit->util? */ + + c->object.parsed = 0; +} + const void *detach_commit_buffer(struct commit *commit, unsigned long *sizep) { struct commit_buffer *v = buffer_slab_peek(&buffer_slab, commit); diff --git a/commit.h b/commit.h index 0fb8271..2d764ab 100644 --- a/commit.h +++ b/commit.h @@ -100,6 +100,12 @@ void unuse_commit_buffer(const struct commit *, const void *buffer); void free_commit_buffer(struct commit *); /* + * Release memory related to a commit, including the parent list and + * any cached object buffer. + */ +void release_commit_memory(struct commit *c); + +/* * Disassociate any cached object buffer from the commit, but do not free it. * The buffer (or NULL, if none) is returned. */ diff --git a/merge-recursive.c b/merge-recursive.c index 6dac890..cbded67 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -15,6 +15,7 @@ #include "diff.h" #include "diffcore.h" #include "tag.h" +#include "alloc.h" #include "unpack-trees.h" #include "string-list.h" #include "xdiff-interface.h" diff --git a/object.c b/object.c index 49b952e..8e29f63 100644 --- a/object.c +++ b/object.c @@ -5,6 +5,7 @@ #include "tree.h" #include "commit.h" #include "tag.h" +#include "alloc.h" #include "object-store.h" #include "packfile.h" @@ -455,6 +456,13 @@ struct parsed_object_pool *parsed_object_pool_new(void) { struct parsed_object_pool *o = xmalloc(sizeof(*o)); memset(o, 0, sizeof(*o)); + + o->blob_state = allocate_alloc_state(); + o->tree_state = allocate_alloc_state(); + o->commit_state = allocate_alloc_state(); + o->tag_state = allocate_alloc_state(); + o->object_state = allocate_alloc_state(); + return o; } @@ -501,9 +509,39 @@ void raw_object_store_clear(struct raw_object_store *o) void parsed_object_pool_clear(struct parsed_object_pool *o) { /* - * TOOD free objects in o->obj_hash. - * * As objects are allocated in slabs (see alloc.c), we do * not need to free each object, but each slab instead. + * + * Before doing so, we need to free any additional memory + * the objects may hold. */ + unsigned i; + + for (i = 0; i < o->obj_hash_size; i++) { + struct object *obj = o->obj_hash[i]; + + if (!obj) + continue; + + if (obj->type == OBJ_TREE) + free_tree_buffer((struct tree*)obj); + else if (obj->type == OBJ_COMMIT) + release_commit_memory((struct commit*)obj); + else if (obj->type == OBJ_TAG) + release_tag_memory((struct tag*)obj); + } + + FREE_AND_NULL(o->obj_hash); + o->obj_hash_size = 0; + + clear_alloc_state(o->blob_state); + clear_alloc_state(o->tree_state); + clear_alloc_state(o->commit_state); + clear_alloc_state(o->tag_state); + clear_alloc_state(o->object_state); + FREE_AND_NULL(o->blob_state); + FREE_AND_NULL(o->tree_state); + FREE_AND_NULL(o->commit_state); + FREE_AND_NULL(o->tag_state); + FREE_AND_NULL(o->object_state); } diff --git a/object.h b/object.h index b41d7a3..7916edb 100644 --- a/object.h +++ b/object.h @@ -4,6 +4,14 @@ struct parsed_object_pool { struct object **obj_hash; int nr_objs, obj_hash_size; + + /* TODO: migrate alloc_states to mem-pool? */ + struct alloc_state *blob_state; + struct alloc_state *tree_state; + struct alloc_state *commit_state; + struct alloc_state *tag_state; + struct alloc_state *object_state; + unsigned commit_count; }; struct parsed_object_pool *parsed_object_pool_new(void); diff --git a/tag.c b/tag.c index 02ef4ea..7c12426 100644 --- a/tag.c +++ b/tag.c @@ -3,6 +3,7 @@ #include "commit.h" #include "tree.h" #include "blob.h" +#include "alloc.h" #include "gpg-interface.h" const char *tag_type = "tag"; @@ -115,6 +116,14 @@ static timestamp_t parse_tag_date(const char *buf, const char *tail) return parse_timestamp(dateptr, NULL, 10); } +void release_tag_memory(struct tag *t) +{ + free(t->tag); + t->tagged = NULL; + t->object.parsed = 0; + t->date = 0; +} + int parse_tag_buffer(struct tag *item, const void *data, unsigned long size) { struct object_id oid; diff --git a/tag.h b/tag.h index d469534..9057d76 100644 --- a/tag.h +++ b/tag.h @@ -15,6 +15,7 @@ struct tag { extern struct tag *lookup_tag(const struct object_id *oid); extern int parse_tag_buffer(struct tag *item, const void *data, unsigned long size); extern int parse_tag(struct tag *item); +extern void release_tag_memory(struct tag *t); extern struct object *deref_tag(struct object *, const char *, int); extern struct object *deref_tag_noverify(struct object *); extern int gpg_verify_tag(const struct object_id *oid, diff --git a/tree.c b/tree.c index 58cf19b..8f8ef31 100644 --- a/tree.c +++ b/tree.c @@ -5,6 +5,7 @@ #include "blob.h" #include "commit.h" #include "tag.h" +#include "alloc.h" #include "tree-walk.h" const char *tree_type = "tree"; -- cgit v0.10.2-6-g49f6