summaryrefslogtreecommitdiff
path: root/builtin/fast-import.c
diff options
context:
space:
mode:
authorJeff King <peff@peff.net>2020-10-15 15:38:49 (GMT)
committerJunio C Hamano <gitster@pobox.com>2020-10-15 17:30:53 (GMT)
commit3f018ec716292b4d757385686f42f57af3bca685 (patch)
treef343594abcc39e40326912ab48bad3563c337012 /builtin/fast-import.c
parentd4a392452e292ff924e79ec8458611c0f679d6d4 (diff)
downloadgit-3f018ec716292b4d757385686f42f57af3bca685.zip
git-3f018ec716292b4d757385686f42f57af3bca685.tar.gz
git-3f018ec716292b4d757385686f42f57af3bca685.tar.bz2
fast-import: fix over-allocation of marks storage
Fast-import stores its marks in a trie-like structure made of mark_set structs. Each struct has a fixed size (1024). If our id number is too large to fit in the struct, then we allocate a new struct which shifts the id number by 10 bits. Our original struct becomes a child node of this new layer, and the new struct becomes the top level of the trie. This scheme was broken by ddddf8d7e2 (fast-import: permit reading multiple marks files, 2020-02-22). Before then, we had a top-level "marks" pointer, and the push-down worked by assigning the new top-level struct to "marks". But after that commit, insert_mark() takes a pointer to the mark_set, rather than using the global "marks". It continued to assign to the global "marks" variable during the push down, which was wrong for two reasons: - we added a call in option_rewrite_submodules() which uses a separate mark set; pushing down on "marks" is outright wrong here. We'd corrupt the "marks" set, and we'd fail to correctly store any submodule mappings with an id over 1024. - the other callers passed "marks", but the push-down was still wrong. In read_mark_file(), we take the pointer to the mark_set as a parameter. So even though insert_mark() was updating the global "marks", the local pointer we had in read_mark_file() was not updated. As a result, we'd add a new level when needed, but then the next call to insert_mark() wouldn't see it! It would then allocate a new layer, which would also not be seen, and so on. Lookups for the lost layers obviously wouldn't work, but before we even hit any lookup stage, we'd generally run out of memory and die. Our tests didn't notice either of these cases because they didn't have enough marks to trigger the push-down behavior. The new tests in t9304 cover both cases (and fail without this patch). We can solve the problem by having insert_mark() take a pointer-to-pointer of the top-level of the set. Then our push down can assign to it in a way that the caller actually sees. Note the subtle reordering in option_rewrite_submodules(). Our call to read_mark_file() may modify our top-level set pointer, so we have to wait until after it returns to assign its value into the string_list. Reported-by: Sergey Brester <serg.brester@sebres.de> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'builtin/fast-import.c')
-rw-r--r--builtin/fast-import.c31
1 files changed, 17 insertions, 14 deletions
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 1bf50a7..70d7d25 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -150,7 +150,7 @@ struct recent_command {
char *buf;
};
-typedef void (*mark_set_inserter_t)(struct mark_set *s, struct object_id *oid, uintmax_t mark);
+typedef void (*mark_set_inserter_t)(struct mark_set **s, struct object_id *oid, uintmax_t mark);
typedef void (*each_mark_fn_t)(uintmax_t mark, void *obj, void *cbp);
/* Configured limits on output */
@@ -526,13 +526,15 @@ static unsigned int hc_str(const char *s, size_t len)
return r;
}
-static void insert_mark(struct mark_set *s, uintmax_t idnum, struct object_entry *oe)
+static void insert_mark(struct mark_set **top, uintmax_t idnum, struct object_entry *oe)
{
+ struct mark_set *s = *top;
+
while ((idnum >> s->shift) >= 1024) {
s = mem_pool_calloc(&fi_mem_pool, 1, sizeof(struct mark_set));
- s->shift = marks->shift + 10;
- s->data.sets[0] = marks;
- marks = s;
+ s->shift = (*top)->shift + 10;
+ s->data.sets[0] = *top;
+ *top = s;
}
while (s->shift) {
uintmax_t i = idnum >> s->shift;
@@ -944,7 +946,7 @@ static int store_object(
e = insert_object(&oid);
if (mark)
- insert_mark(marks, mark, e);
+ insert_mark(&marks, mark, e);
if (e->idx.offset) {
duplicate_count_by_type[type]++;
return 1;
@@ -1142,7 +1144,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark)
e = insert_object(&oid);
if (mark)
- insert_mark(marks, mark, e);
+ insert_mark(&marks, mark, e);
if (e->idx.offset) {
duplicate_count_by_type[OBJ_BLOB]++;
@@ -1717,7 +1719,7 @@ static void dump_marks(void)
}
}
-static void insert_object_entry(struct mark_set *s, struct object_id *oid, uintmax_t mark)
+static void insert_object_entry(struct mark_set **s, struct object_id *oid, uintmax_t mark)
{
struct object_entry *e;
e = find_object(oid);
@@ -1734,12 +1736,12 @@ static void insert_object_entry(struct mark_set *s, struct object_id *oid, uintm
insert_mark(s, mark, e);
}
-static void insert_oid_entry(struct mark_set *s, struct object_id *oid, uintmax_t mark)
+static void insert_oid_entry(struct mark_set **s, struct object_id *oid, uintmax_t mark)
{
insert_mark(s, mark, xmemdupz(oid, sizeof(*oid)));
}
-static void read_mark_file(struct mark_set *s, FILE *f, mark_set_inserter_t inserter)
+static void read_mark_file(struct mark_set **s, FILE *f, mark_set_inserter_t inserter)
{
char line[512];
while (fgets(line, sizeof(line), f)) {
@@ -1772,7 +1774,7 @@ static void read_marks(void)
goto done; /* Marks file does not exist */
else
die_errno("cannot read '%s'", import_marks_file);
- read_mark_file(marks, f, insert_object_entry);
+ read_mark_file(&marks, f, insert_object_entry);
fclose(f);
done:
import_marks_file_done = 1;
@@ -3228,7 +3230,7 @@ static void parse_alias(void)
die(_("Expected 'to' command, got %s"), command_buf.buf);
e = find_object(&b.oid);
assert(e);
- insert_mark(marks, next_mark, e);
+ insert_mark(&marks, next_mark, e);
}
static char* make_fast_import_path(const char *path)
@@ -3321,13 +3323,14 @@ static void option_rewrite_submodules(const char *arg, struct string_list *list)
*f = '\0';
f++;
ms = xcalloc(1, sizeof(*ms));
- string_list_insert(list, s)->util = ms;
fp = fopen(f, "r");
if (!fp)
die_errno("cannot read '%s'", f);
- read_mark_file(ms, fp, insert_oid_entry);
+ read_mark_file(&ms, fp, insert_oid_entry);
fclose(fp);
+
+ string_list_insert(list, s)->util = ms;
}
static int parse_one_option(const char *option)