From 9441b61dc5c3f1f984114ec8bd470dc20c55dfe0 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 17 Oct 2008 15:57:57 -0400 Subject: index-pack: rationalize delta resolution code Instead of having strange loops for walking unresolved deltas with the same base duplicated in many places, let's rework the code so this is done in a single place instead. This simplifies callers quite a bit too. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano diff --git a/index-pack.c b/index-pack.c index d3a4d31..82843e7 100644 --- a/index-pack.c +++ b/index-pack.c @@ -244,7 +244,8 @@ static void link_base_data(struct base_data *base, struct base_data *c) c->base = base; c->child = NULL; - base_cache_used += c->size; + if (c->data) + base_cache_used += c->size; prune_base_data(c); } @@ -494,8 +495,10 @@ static void *get_base_data(struct base_data *c) free(raw); if (!c->data) bad_object(obj->idx.offset, "failed to apply delta"); - } else + } else { c->data = get_data_from_pack(obj); + c->size = obj->size; + } base_cache_used += c->size; prune_base_data(c); @@ -504,49 +507,73 @@ static void *get_base_data(struct base_data *c) } static void resolve_delta(struct object_entry *delta_obj, - struct base_data *base_obj, enum object_type type) + struct base_data *base, struct base_data *result) { void *delta_data; unsigned long delta_size; - union delta_base delta_base; - int j, first, last; - struct base_data result; - delta_obj->real_type = type; + delta_obj->real_type = base->obj->type; delta_data = get_data_from_pack(delta_obj); delta_size = delta_obj->size; - result.data = patch_delta(get_base_data(base_obj), base_obj->size, - delta_data, delta_size, - &result.size); + result->obj = delta_obj; + result->data = patch_delta(get_base_data(base), base->obj->size, + delta_data, delta_size, &result->size); free(delta_data); - if (!result.data) + if (!result->data) bad_object(delta_obj->idx.offset, "failed to apply delta"); - sha1_object(result.data, result.size, type, delta_obj->idx.sha1); + sha1_object(result->data, result->size, delta_obj->real_type, + delta_obj->idx.sha1); nr_resolved_deltas++; +} + +static void find_unresolved_deltas(struct base_data *base, + struct base_data *prev_base) +{ + int i, ref, ref_first, ref_last, ofs, ofs_first, ofs_last; + + /* + * This is a recursive function. Those brackets should help reducing + * stack usage by limiting the scope of the delta_base union. + */ + { + union delta_base base_spec; + + hashcpy(base_spec.sha1, base->obj->idx.sha1); + ref = !find_delta_children(&base_spec, &ref_first, &ref_last); + + memset(&base_spec, 0, sizeof(base_spec)); + base_spec.offset = base->obj->idx.offset; + ofs = !find_delta_children(&base_spec, &ofs_first, &ofs_last); + } + + if (!ref && !ofs) + return; - result.obj = delta_obj; - link_base_data(base_obj, &result); + link_base_data(prev_base, base); - hashcpy(delta_base.sha1, delta_obj->idx.sha1); - if (!find_delta_children(&delta_base, &first, &last)) { - for (j = first; j <= last; j++) { - struct object_entry *child = objects + deltas[j].obj_no; - if (child->real_type == OBJ_REF_DELTA) - resolve_delta(child, &result, type); + if (ref) { + for (i = ref_first; i <= ref_last; i++) { + struct object_entry *child = objects + deltas[i].obj_no; + if (child->real_type == OBJ_REF_DELTA) { + struct base_data result; + resolve_delta(child, base, &result); + find_unresolved_deltas(&result, base); + } } } - memset(&delta_base, 0, sizeof(delta_base)); - delta_base.offset = delta_obj->idx.offset; - if (!find_delta_children(&delta_base, &first, &last)) { - for (j = first; j <= last; j++) { - struct object_entry *child = objects + deltas[j].obj_no; - if (child->real_type == OBJ_OFS_DELTA) - resolve_delta(child, &result, type); + if (ofs) { + for (i = ofs_first; i <= ofs_last; i++) { + struct object_entry *child = objects + deltas[i].obj_no; + if (child->real_type == OBJ_OFS_DELTA) { + struct base_data result; + resolve_delta(child, base, &result); + find_unresolved_deltas(&result, base); + } } } - unlink_base_data(&result); + unlink_base_data(base); } static int compare_delta_entry(const void *a, const void *b) @@ -622,37 +649,13 @@ static void parse_pack_objects(unsigned char *sha1) progress = start_progress("Resolving deltas", nr_deltas); for (i = 0; i < nr_objects; i++) { struct object_entry *obj = &objects[i]; - union delta_base base; - int j, ref, ref_first, ref_last, ofs, ofs_first, ofs_last; struct base_data base_obj; if (obj->type == OBJ_REF_DELTA || obj->type == OBJ_OFS_DELTA) continue; - hashcpy(base.sha1, obj->idx.sha1); - ref = !find_delta_children(&base, &ref_first, &ref_last); - memset(&base, 0, sizeof(base)); - base.offset = obj->idx.offset; - ofs = !find_delta_children(&base, &ofs_first, &ofs_last); - if (!ref && !ofs) - continue; - base_obj.data = get_data_from_pack(obj); - base_obj.size = obj->size; base_obj.obj = obj; - link_base_data(NULL, &base_obj); - - if (ref) - for (j = ref_first; j <= ref_last; j++) { - struct object_entry *child = objects + deltas[j].obj_no; - if (child->real_type == OBJ_REF_DELTA) - resolve_delta(child, &base_obj, obj->type); - } - if (ofs) - for (j = ofs_first; j <= ofs_last; j++) { - struct object_entry *child = objects + deltas[j].obj_no; - if (child->real_type == OBJ_OFS_DELTA) - resolve_delta(child, &base_obj, obj->type); - } - unlink_base_data(&base_obj); + base_obj.data = NULL; + find_unresolved_deltas(&base_obj, NULL); display_progress(progress, nr_resolved_deltas); } } @@ -745,7 +748,6 @@ static void fix_unresolved_deltas(struct sha1file *f, int nr_unresolved) for (i = 0; i < n; i++) { struct delta_entry *d = sorted_by_pos[i]; enum object_type type; - int j, first, last; struct base_data base_obj; if (objects[d->obj_no].real_type != OBJ_REF_DELTA) @@ -759,16 +761,7 @@ static void fix_unresolved_deltas(struct sha1file *f, int nr_unresolved) die("local object %s is corrupt", sha1_to_hex(d->base.sha1)); base_obj.obj = append_obj_to_pack(f, d->base.sha1, base_obj.data, base_obj.size, type); - link_base_data(NULL, &base_obj); - - find_delta_children(&d->base, &first, &last); - for (j = first; j <= last; j++) { - struct object_entry *child = objects + deltas[j].obj_no; - if (child->real_type == OBJ_REF_DELTA) - resolve_delta(child, &base_obj, type); - } - - unlink_base_data(&base_obj); + find_unresolved_deltas(&base_obj, NULL); display_progress(progress, nr_resolved_deltas); } free(sorted_by_pos); -- cgit v0.10.2-6-g49f6 From 6a87ed972c9c7a4b4378b6e34670d3aa2ac7ccc8 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 17 Oct 2008 15:57:58 -0400 Subject: index-pack: smarter memory usage during delta resolution There is no need to keep the base object data around after its last delta has been resolved. This also means that long delta chains with only one delta per base won't grow the cache size unnecessarily as the base will be freed before recursing down. To make it easy, find_delta_children() is modified so the first and last indices are initialized in all cases. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano diff --git a/index-pack.c b/index-pack.c index 82843e7..9def641 100644 --- a/index-pack.c +++ b/index-pack.c @@ -221,17 +221,23 @@ static void bad_object(unsigned long offset, const char *format, ...) die("pack has bad object at offset %lu: %s", offset, buf); } +static void free_base_data(struct base_data *c) +{ + if (c->data) { + free(c->data); + c->data = NULL; + base_cache_used -= c->size; + } +} + static void prune_base_data(struct base_data *retain) { struct base_data *b = base_cache; for (b = base_cache; base_cache_used > delta_base_cache_limit && b; b = b->child) { - if (b->data && b != retain) { - free(b->data); - b->data = NULL; - base_cache_used -= b->size; - } + if (b->data && b != retain) + free_base_data(b); } } @@ -256,10 +262,7 @@ static void unlink_base_data(struct base_data *c) base->child = NULL; else base_cache = NULL; - if (c->data) { - free(c->data); - base_cache_used -= c->size; - } + free_base_data(c); } static void *unpack_entry_data(unsigned long offset, unsigned long size) @@ -409,22 +412,24 @@ static int find_delta(const union delta_base *base) return -first-1; } -static int find_delta_children(const union delta_base *base, - int *first_index, int *last_index) +static void find_delta_children(const union delta_base *base, + int *first_index, int *last_index) { int first = find_delta(base); int last = first; int end = nr_deltas - 1; - if (first < 0) - return -1; + if (first < 0) { + *first_index = 0; + *last_index = -1; + return; + } while (first > 0 && !memcmp(&deltas[first - 1].base, base, UNION_BASE_SZ)) --first; while (last < end && !memcmp(&deltas[last + 1].base, base, UNION_BASE_SZ)) ++last; *first_index = first; *last_index = last; - return 0; } static void sha1_object(const void *data, unsigned long size, @@ -529,7 +534,7 @@ static void resolve_delta(struct object_entry *delta_obj, static void find_unresolved_deltas(struct base_data *base, struct base_data *prev_base) { - int i, ref, ref_first, ref_last, ofs, ofs_first, ofs_last; + int i, ref_first, ref_last, ofs_first, ofs_last; /* * This is a recursive function. Those brackets should help reducing @@ -539,37 +544,37 @@ static void find_unresolved_deltas(struct base_data *base, union delta_base base_spec; hashcpy(base_spec.sha1, base->obj->idx.sha1); - ref = !find_delta_children(&base_spec, &ref_first, &ref_last); + find_delta_children(&base_spec, &ref_first, &ref_last); memset(&base_spec, 0, sizeof(base_spec)); base_spec.offset = base->obj->idx.offset; - ofs = !find_delta_children(&base_spec, &ofs_first, &ofs_last); + find_delta_children(&base_spec, &ofs_first, &ofs_last); } - if (!ref && !ofs) + if (ref_last == -1 && ofs_last == -1) return; link_base_data(prev_base, base); - if (ref) { - for (i = ref_first; i <= ref_last; i++) { - struct object_entry *child = objects + deltas[i].obj_no; - if (child->real_type == OBJ_REF_DELTA) { - struct base_data result; - resolve_delta(child, base, &result); - find_unresolved_deltas(&result, base); - } + for (i = ref_first; i <= ref_last; i++) { + struct object_entry *child = objects + deltas[i].obj_no; + if (child->real_type == OBJ_REF_DELTA) { + struct base_data result; + resolve_delta(child, base, &result); + if (i == ref_last && ofs_last == -1) + free_base_data(base); + find_unresolved_deltas(&result, base); } } - if (ofs) { - for (i = ofs_first; i <= ofs_last; i++) { - struct object_entry *child = objects + deltas[i].obj_no; - if (child->real_type == OBJ_OFS_DELTA) { - struct base_data result; - resolve_delta(child, base, &result); - find_unresolved_deltas(&result, base); - } + for (i = ofs_first; i <= ofs_last; i++) { + struct object_entry *child = objects + deltas[i].obj_no; + if (child->real_type == OBJ_OFS_DELTA) { + struct base_data result; + resolve_delta(child, base, &result); + if (i == ofs_last) + free_base_data(base); + find_unresolved_deltas(&result, base); } } -- cgit v0.10.2-6-g49f6 From ce3f6dc655392803a59c9b2f2ec9509fd8061b33 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 20 Oct 2008 16:46:19 -0400 Subject: fix multiple issues in index-pack Since commit 9441b61dc5, two issues affected correct behavior of index-pack: 1) The real_type of a delta object is the 'real_type' of its base, not the 'type' which can be a "delta type". Consequence of this is a corrupted pack index file which only needs to be recreated with a good index-pack command ('git verify-pack' will flag those). 2) The code sequence: result->data = patch_delta(get_base_data(base), base->obj->size, delta_data, delta_size, &result->size); has two issues of its own since base->obj->size should instead be base->size as we want the size of the actual object data and not the size of the delta object it is represented by. Except that simply replacing base->obj->size with base->size won't make the code more correct as the C language doesn't enforce a particular ordering for the evaluation of needed arguments for a function call, hence base->size could be pushed on the stack before get_base_data() which initializes base->size is called. Signed-off-by: Nicolas Pitre Tested-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/index-pack.c b/index-pack.c index 9def641..f109a00 100644 --- a/index-pack.c +++ b/index-pack.c @@ -514,15 +514,14 @@ static void *get_base_data(struct base_data *c) static void resolve_delta(struct object_entry *delta_obj, struct base_data *base, struct base_data *result) { - void *delta_data; - unsigned long delta_size; + void *base_data, *delta_data; - delta_obj->real_type = base->obj->type; + delta_obj->real_type = base->obj->real_type; delta_data = get_data_from_pack(delta_obj); - delta_size = delta_obj->size; + base_data = get_base_data(base); result->obj = delta_obj; - result->data = patch_delta(get_base_data(base), base->obj->size, - delta_data, delta_size, &result->size); + result->data = patch_delta(base_data, base->size, + delta_data, delta_obj->size, &result->size); free(delta_data); if (!result->data) bad_object(delta_obj->idx.offset, "failed to apply delta"); -- cgit v0.10.2-6-g49f6 From 2b5c208f5bd371ea5527046384f9955dbbceb188 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 22 Oct 2008 20:59:22 -0400 Subject: improve index-pack tests Commit 9441b61dc5 introduced serious bugs in index-pack which are described and fixed by commit ce3f6dc655. However, despite the boldness of those bugs, the test suite still passed. This improves t5302-pack-index.sh so to ensure a much better code path coverage. With commit ce3f6dc655 reverted, 17 of the 26 tests do fail now. Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano diff --git a/t/t5302-pack-index.sh b/t/t5302-pack-index.sh index 6424db1..0cb945f 100755 --- a/t/t5302-pack-index.sh +++ b/t/t5302-pack-index.sh @@ -11,13 +11,18 @@ test_expect_success \ 'rm -rf .git git init && i=1 && - while test $i -le 100 + while test $i -le 100 do - i=`printf '%03i' $i` - echo $i >file_$i && - test-genrandom "$i" 8192 >>file_$i && - git update-index --add file_$i && - i=`expr $i + 1` || return 1 + iii=`printf '%03i' $i` + test-genrandom "bar" 200 > wide_delta_$iii && + test-genrandom "baz $iii" 50 >> wide_delta_$iii && + test-genrandom "foo"$i 100 > deep_delta_$iii && + test-genrandom "foo"`expr $i + 1` 100 >> deep_delta_$iii && + test-genrandom "foo"`expr $i + 2` 100 >> deep_delta_$iii && + echo $iii >file_$iii && + test-genrandom "$iii" 8192 >>file_$iii && + git update-index --add file_$iii deep_delta_$iii wide_delta_$iii && + i=`expr $i + 1` || return 1 done && { echo 101 && test-genrandom 100 8192; } >file_101 && git update-index --add file_101 && @@ -92,6 +97,31 @@ test_expect_success \ '64-bit offsets: index-pack result should match pack-objects one' \ 'cmp "test-3-${pack3}.idx" "3.idx"' +# returns the object number for given object in given pack index +index_obj_nr() +{ + idx_file=$1 + object_sha1=$2 + nr=0 + git show-index < $idx_file | + while read offs sha1 extra + do + nr=$(($nr + 1)) + test "$sha1" = "$object_sha1" || continue + echo "$(($nr - 1))" + break + done +} + +# returns the pack offset for given object as found in given pack index +index_obj_offset() +{ + idx_file=$1 + object_sha1=$2 + git show-index < $idx_file | grep $object_sha1 | + ( read offs extra && echo "$offs" ) +} + test_expect_success \ '[index v1] 1) stream pack to repository' \ 'git index-pack --index-version=1 --stdin < "test-1-${pack1}.pack" && @@ -102,19 +132,22 @@ test_expect_success \ test_expect_success \ '[index v1] 2) create a stealth corruption in a delta base reference' \ - '# this test assumes a delta smaller than 16 bytes at the end of the pack - git show-index <1.idx | sort -n | sed -ne \$p | ( - read delta_offs delta_sha1 && - git cat-file blob "$delta_sha1" > blob_1 && - chmod +w ".git/objects/pack/pack-${pack1}.pack" && - dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($delta_offs + 1)) \ - if=".git/objects/pack/pack-${pack1}.idx" skip=$((256 * 4 + 4)) \ - bs=1 count=20 conv=notrunc && - git cat-file blob "$delta_sha1" > blob_2 )' + '# This test assumes file_101 is a delta smaller than 16 bytes. + # It should be against file_100 but we substitute its base for file_099 + sha1_101=`git hash-object file_101` && + sha1_099=`git hash-object file_099` && + offs_101=`index_obj_offset 1.idx $sha1_101` && + nr_099=`index_obj_nr 1.idx $sha1_099` && + chmod +w ".git/objects/pack/pack-${pack1}.pack" && + dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($offs_101 + 1)) \ + if=".git/objects/pack/pack-${pack1}.idx" \ + skip=$((4 + 256 * 4 + $nr_099 * 24)) \ + bs=1 count=20 conv=notrunc && + git cat-file blob $sha1_101 > file_101_foo1' test_expect_success \ '[index v1] 3) corrupted delta happily returned wrong data' \ - '! cmp blob_1 blob_2' + 'test -f file_101_foo1 && ! cmp file_101 file_101_foo1' test_expect_success \ '[index v1] 4) confirm that the pack is actually corrupted' \ @@ -140,19 +173,22 @@ test_expect_success \ test_expect_success \ '[index v2] 2) create a stealth corruption in a delta base reference' \ - '# this test assumes a delta smaller than 16 bytes at the end of the pack - git show-index <1.idx | sort -n | sed -ne \$p | ( - read delta_offs delta_sha1 delta_crc && - git cat-file blob "$delta_sha1" > blob_3 && - chmod +w ".git/objects/pack/pack-${pack1}.pack" && - dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($delta_offs + 1)) \ - if=".git/objects/pack/pack-${pack1}.idx" skip=$((8 + 256 * 4)) \ - bs=1 count=20 conv=notrunc && - git cat-file blob "$delta_sha1" > blob_4 )' + '# This test assumes file_101 is a delta smaller than 16 bytes. + # It should be against file_100 but we substitute its base for file_099 + sha1_101=`git hash-object file_101` && + sha1_099=`git hash-object file_099` && + offs_101=`index_obj_offset 1.idx $sha1_101` && + nr_099=`index_obj_nr 1.idx $sha1_099` && + chmod +w ".git/objects/pack/pack-${pack1}.pack" && + dd of=".git/objects/pack/pack-${pack1}.pack" seek=$(($offs_101 + 1)) \ + if=".git/objects/pack/pack-${pack1}.idx" \ + skip=$((8 + 256 * 4 + $nr_099 * 20)) \ + bs=1 count=20 conv=notrunc && + git cat-file blob $sha1_101 > file_101_foo2' test_expect_success \ '[index v2] 3) corrupted delta happily returned wrong data' \ - '! cmp blob_3 blob_4' + 'test -f file_101_foo2 && ! cmp file_101 file_101_foo2' test_expect_success \ '[index v2] 4) confirm that the pack is actually corrupted' \ @@ -167,9 +203,11 @@ test_expect_success \ 'rm -f .git/objects/pack/* && git index-pack --index-version=2 --stdin < "test-1-${pack1}.pack" && git verify-pack ".git/objects/pack/pack-${pack1}.pack" && + obj=`git hash-object file_001` && + nr=`index_obj_nr ".git/objects/pack/pack-${pack1}.idx" $obj` && chmod +w ".git/objects/pack/pack-${pack1}.idx" && dd if=/dev/zero of=".git/objects/pack/pack-${pack1}.idx" conv=notrunc \ - bs=1 count=4 seek=$((8 + 256 * 4 + `wc -l /dev/null || exit 1 done Date: Thu, 23 Oct 2008 15:05:59 -0400 Subject: index-pack: don't leak leaf delta result Another (but minor this time) fallout from commit 9441b61 (index-pack: rationalize delta resolution code, 2008-10-17). Signed-off-by: Nicolas Pitre Signed-off-by: Junio C Hamano diff --git a/index-pack.c b/index-pack.c index f109a00..7db7fbb 100644 --- a/index-pack.c +++ b/index-pack.c @@ -550,8 +550,10 @@ static void find_unresolved_deltas(struct base_data *base, find_delta_children(&base_spec, &ofs_first, &ofs_last); } - if (ref_last == -1 && ofs_last == -1) + if (ref_last == -1 && ofs_last == -1) { + free(base->data); return; + } link_base_data(prev_base, base); -- cgit v0.10.2-6-g49f6