diff options
Diffstat (limited to 'pack-bitmap.c')
-rw-r--r-- | pack-bitmap.c | 1227 |
1 files changed, 972 insertions, 255 deletions
diff --git a/pack-bitmap.c b/pack-bitmap.c index 8504110..35c5ef9 100644 --- a/pack-bitmap.c +++ b/pack-bitmap.c @@ -1,5 +1,8 @@ -#include "cache.h" +#include "git-compat-util.h" #include "commit.h" +#include "gettext.h" +#include "hex.h" +#include "strbuf.h" #include "tag.h" #include "diff.h" #include "revision.h" @@ -11,7 +14,9 @@ #include "pack-objects.h" #include "packfile.h" #include "repository.h" -#include "object-store.h" +#include "trace2.h" +#include "object-file.h" +#include "object-store-ll.h" #include "list-objects-filter-options.h" #include "midx.h" #include "config.h" @@ -46,13 +51,6 @@ struct bitmap_index { struct packed_git *pack; struct multi_pack_index *midx; - /* - * Mark the first `reuse_objects` in the packfile as reused: - * they will be sent as-is without using them for repacking - * calculations - */ - uint32_t reuse_objects; - /* mmapped buffer of the whole bitmap index */ unsigned char *map; size_t map_size; /* size of the mmaped buffer */ @@ -83,6 +81,12 @@ struct bitmap_index { const unsigned char *checksum; /* + * If not NULL, this point into the commit table extension + * (within the memory mapped region `map`). + */ + unsigned char *table_lookup; + + /* * Extended index. * * When trying to perform bitmap operations with objects that are not @@ -111,7 +115,7 @@ static struct ewah_bitmap *lookup_stored_bitmap(struct stored_bitmap *st) struct ewah_bitmap *parent; struct ewah_bitmap *composed; - if (st->xor == NULL) + if (!st->xor) return st->root; composed = ewah_pool_new(); @@ -138,7 +142,7 @@ static struct ewah_bitmap *read_bitmap_1(struct bitmap_index *index) index->map_size - index->map_pos); if (bitmap_size < 0) { - error("Failed to load bitmap index (corrupted?)"); + error(_("failed to load bitmap index (corrupted?)")); ewah_pool_free(b); return NULL; } @@ -160,14 +164,14 @@ static int load_bitmap_header(struct bitmap_index *index) size_t header_size = sizeof(*header) - GIT_MAX_RAWSZ + the_hash_algo->rawsz; if (index->map_size < header_size + the_hash_algo->rawsz) - return error("Corrupted bitmap index (too small)"); + return error(_("corrupted bitmap index (too small)")); if (memcmp(header->magic, BITMAP_IDX_SIGNATURE, sizeof(BITMAP_IDX_SIGNATURE)) != 0) - return error("Corrupted bitmap index file (wrong header)"); + return error(_("corrupted bitmap index file (wrong header)")); index->version = ntohs(header->version); if (index->version != 1) - return error("Unsupported version for bitmap index file (%d)", index->version); + return error(_("unsupported version '%d' for bitmap index file"), index->version); /* Parse known bitmap format options */ { @@ -176,15 +180,25 @@ static int load_bitmap_header(struct bitmap_index *index) unsigned char *index_end = index->map + index->map_size - the_hash_algo->rawsz; if ((flags & BITMAP_OPT_FULL_DAG) == 0) - return error("Unsupported options for bitmap index file " + BUG("unsupported options for bitmap index file " "(Git requires BITMAP_OPT_FULL_DAG)"); if (flags & BITMAP_OPT_HASH_CACHE) { if (cache_size > index_end - index->map - header_size) - return error("corrupted bitmap index file (too short to fit hash cache)"); + return error(_("corrupted bitmap index file (too short to fit hash cache)")); index->hashes = (void *)(index_end - cache_size); index_end -= cache_size; } + + if (flags & BITMAP_OPT_LOOKUP_TABLE) { + size_t table_size = st_mult(ntohl(header->entry_count), + BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH); + if (table_size > index_end - index->map - header_size) + return error(_("corrupted bitmap index file (too short to fit lookup table)")); + if (git_env_bool("GIT_TEST_READ_COMMIT_TABLE", 1)) + index->table_lookup = (void *)(index_end - table_size); + index_end -= table_size; + } } index->entry_count = ntohl(header->entry_count); @@ -211,11 +225,13 @@ static struct stored_bitmap *store_bitmap(struct bitmap_index *index, hash_pos = kh_put_oid_map(index->bitmaps, stored->oid, &ret); - /* a 0 return code means the insertion succeeded with no changes, - * because the SHA1 already existed on the map. this is bad, there - * shouldn't be duplicated commits in the index */ + /* + * A 0 return code means the insertion succeeded with no changes, + * because the SHA1 already existed on the map. This is bad, there + * shouldn't be duplicated commits in the index. + */ if (ret == 0) { - error("Duplicate entry in bitmap index: %s", oid_to_hex(oid)); + error(_("duplicate entry in bitmap index: '%s'"), oid_to_hex(oid)); return NULL; } @@ -259,14 +275,14 @@ static int load_bitmap_entries_v1(struct bitmap_index *index) struct object_id oid; if (index->map_size - index->map_pos < 6) - return error("corrupt ewah bitmap: truncated header for entry %d", i); + return error(_("corrupt ewah bitmap: truncated header for entry %d"), i); commit_idx_pos = read_be32(index->map, &index->map_pos); xor_offset = read_u8(index->map, &index->map_pos); flags = read_u8(index->map, &index->map_pos); if (nth_bitmap_object_oid(index, &oid, commit_idx_pos) < 0) - return error("corrupt ewah bitmap: commit index %u out of range", + return error(_("corrupt ewah bitmap: commit index %u out of range"), (unsigned)commit_idx_pos); bitmap = read_bitmap_1(index); @@ -274,13 +290,13 @@ static int load_bitmap_entries_v1(struct bitmap_index *index) return -1; if (xor_offset > MAX_XOR_OFFSET || xor_offset > i) - return error("Corrupted bitmap pack index"); + return error(_("corrupted bitmap pack index")); if (xor_offset > 0) { xor_bitmap = recent_bitmaps[(i - xor_offset) % MAX_XOR_OFFSET]; - if (xor_bitmap == NULL) - return error("Invalid XOR offset in bitmap pack index"); + if (!xor_bitmap) + return error(_("invalid XOR offset in bitmap pack index")); } recent_bitmaps[i % MAX_XOR_OFFSET] = store_bitmap( @@ -292,9 +308,12 @@ static int load_bitmap_entries_v1(struct bitmap_index *index) char *midx_bitmap_filename(struct multi_pack_index *midx) { - return xstrfmt("%s-%s.bitmap", - get_midx_filename(midx->object_dir), - hash_to_hex(get_midx_checksum(midx))); + struct strbuf buf = STRBUF_INIT; + + get_midx_filename(&buf, midx->object_dir); + strbuf_addf(&buf, "-%s.bitmap", hash_to_hex(get_midx_checksum(midx))); + + return strbuf_detach(&buf, NULL); } char *pack_bitmap_filename(struct packed_git *p) @@ -310,24 +329,32 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, struct multi_pack_index *midx) { struct stat st; - char *idx_name = midx_bitmap_filename(midx); - int fd = git_open(idx_name); - - free(idx_name); - - if (fd < 0) + char *bitmap_name = midx_bitmap_filename(midx); + int fd = git_open(bitmap_name); + uint32_t i, preferred_pack; + struct packed_git *preferred; + + if (fd < 0) { + if (errno != ENOENT) + warning_errno("cannot open '%s'", bitmap_name); + free(bitmap_name); return -1; + } + free(bitmap_name); if (fstat(fd, &st)) { + error_errno(_("cannot fstat bitmap file")); close(fd); return -1; } if (bitmap_git->pack || bitmap_git->midx) { - /* ignore extra bitmap file; we can only handle one */ - warning("ignoring extra bitmap file: %s", - get_midx_filename(midx->object_dir)); + struct strbuf buf = STRBUF_INIT; + get_midx_filename(&buf, midx->object_dir); + trace2_data_string("bitmap", the_repository, + "ignoring extra midx bitmap file", buf.buf); close(fd); + strbuf_release(&buf); return -1; } @@ -341,19 +368,44 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git, if (load_bitmap_header(bitmap_git) < 0) goto cleanup; - if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum)) + if (!hasheq(get_midx_checksum(bitmap_git->midx), bitmap_git->checksum)) { + error(_("checksum doesn't match in MIDX and bitmap")); goto cleanup; + } - if (load_midx_revindex(bitmap_git->midx) < 0) { + if (load_midx_revindex(bitmap_git->midx)) { warning(_("multi-pack bitmap is missing required reverse index")); goto cleanup; } + + for (i = 0; i < bitmap_git->midx->num_packs; i++) { + if (prepare_midx_pack(the_repository, bitmap_git->midx, i)) { + warning(_("could not open pack %s"), + bitmap_git->midx->pack_names[i]); + goto cleanup; + } + } + + if (midx_preferred_pack(bitmap_git->midx, &preferred_pack) < 0) { + warning(_("could not determine MIDX preferred pack")); + goto cleanup; + } + + preferred = bitmap_git->midx->packs[preferred_pack]; + if (!is_pack_valid(preferred)) { + warning(_("preferred pack (%s) is invalid"), + preferred->pack_name); + goto cleanup; + } + return 0; cleanup: munmap(bitmap_git->map, bitmap_git->map_size); bitmap_git->map_size = 0; + bitmap_git->map_pos = 0; bitmap_git->map = NULL; + bitmap_git->midx = NULL; return -1; } @@ -361,26 +413,28 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git { int fd; struct stat st; - char *idx_name; + char *bitmap_name; - if (open_pack_index(packfile)) - return -1; - - idx_name = pack_bitmap_filename(packfile); - fd = git_open(idx_name); - free(idx_name); + bitmap_name = pack_bitmap_filename(packfile); + fd = git_open(bitmap_name); - if (fd < 0) + if (fd < 0) { + if (errno != ENOENT) + warning_errno("cannot open '%s'", bitmap_name); + free(bitmap_name); return -1; + } + free(bitmap_name); if (fstat(fd, &st)) { + error_errno(_("cannot fstat bitmap file")); close(fd); return -1; } if (bitmap_git->pack || bitmap_git->midx) { - /* ignore extra bitmap file; we can only handle one */ - warning("ignoring extra bitmap file: %s", packfile->pack_name); + trace2_data_string("bitmap", the_repository, + "ignoring extra bitmap file", packfile->pack_name); close(fd); return -1; } @@ -400,13 +454,17 @@ static int open_pack_bitmap_1(struct bitmap_index *bitmap_git, struct packed_git munmap(bitmap_git->map, bitmap_git->map_size); bitmap_git->map = NULL; bitmap_git->map_size = 0; + bitmap_git->map_pos = 0; + bitmap_git->pack = NULL; return -1; } + trace2_data_string("bitmap", the_repository, "opened bitmap file", + packfile->pack_name); return 0; } -static int load_reverse_index(struct bitmap_index *bitmap_git) +static int load_reverse_index(struct repository *r, struct bitmap_index *bitmap_git) { if (bitmap_is_midx(bitmap_git)) { uint32_t i; @@ -420,25 +478,23 @@ static int load_reverse_index(struct bitmap_index *bitmap_git) * since we will need to make use of them in pack-objects. */ for (i = 0; i < bitmap_git->midx->num_packs; i++) { - if (prepare_midx_pack(the_repository, bitmap_git->midx, i)) - die(_("load_reverse_index: could not open pack")); - ret = load_pack_revindex(bitmap_git->midx->packs[i]); + ret = load_pack_revindex(r, bitmap_git->midx->packs[i]); if (ret) return ret; } return 0; } - return load_pack_revindex(bitmap_git->pack); + return load_pack_revindex(r, bitmap_git->pack); } -static int load_bitmap(struct bitmap_index *bitmap_git) +static int load_bitmap(struct repository *r, struct bitmap_index *bitmap_git) { assert(bitmap_git->map); bitmap_git->bitmaps = kh_init_oid_map(); bitmap_git->ext_index.positions = kh_init_oid_pos(); - if (load_reverse_index(bitmap_git)) + if (load_reverse_index(r, bitmap_git)) goto failed; if (!(bitmap_git->commits = read_bitmap_1(bitmap_git)) || @@ -447,7 +503,7 @@ static int load_bitmap(struct bitmap_index *bitmap_git) !(bitmap_git->tags = read_bitmap_1(bitmap_git))) goto failed; - if (load_bitmap_entries_v1(bitmap_git) < 0) + if (!bitmap_git->table_lookup && load_bitmap_entries_v1(bitmap_git) < 0) goto failed; return 0; @@ -472,11 +528,16 @@ static int open_pack_bitmap(struct repository *r, struct packed_git *p; int ret = -1; - assert(!bitmap_git->map); - for (p = get_all_packs(r); p; p = p->next) { - if (open_pack_bitmap_1(bitmap_git, p) == 0) + if (open_pack_bitmap_1(bitmap_git, p) == 0) { ret = 0; + /* + * The only reason to keep looking is to report + * duplicates. + */ + if (!trace2_is_enabled()) + break; + } } return ret; @@ -485,32 +546,42 @@ static int open_pack_bitmap(struct repository *r, static int open_midx_bitmap(struct repository *r, struct bitmap_index *bitmap_git) { + int ret = -1; struct multi_pack_index *midx; assert(!bitmap_git->map); for (midx = get_multi_pack_index(r); midx; midx = midx->next) { if (!open_midx_bitmap_1(bitmap_git, midx)) - return 0; + ret = 0; } - return -1; + return ret; } static int open_bitmap(struct repository *r, struct bitmap_index *bitmap_git) { + int found; + assert(!bitmap_git->map); - if (!open_midx_bitmap(r, bitmap_git)) - return 0; - return open_pack_bitmap(r, bitmap_git); + found = !open_midx_bitmap(r, bitmap_git); + + /* + * these will all be skipped if we opened a midx bitmap; but run it + * anyway if tracing is enabled to report the duplicates + */ + if (!found || trace2_is_enabled()) + found |= !open_pack_bitmap(r, bitmap_git); + + return found ? 0 : -1; } struct bitmap_index *prepare_bitmap_git(struct repository *r) { struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git)); - if (!open_bitmap(r, bitmap_git) && !load_bitmap(bitmap_git)) + if (!open_bitmap(r, bitmap_git) && !load_bitmap(r, bitmap_git)) return bitmap_git; free_bitmap_index(bitmap_git); @@ -519,9 +590,10 @@ struct bitmap_index *prepare_bitmap_git(struct repository *r) struct bitmap_index *prepare_midx_bitmap_git(struct multi_pack_index *midx) { + struct repository *r = the_repository; struct bitmap_index *bitmap_git = xcalloc(1, sizeof(*bitmap_git)); - if (!open_midx_bitmap_1(bitmap_git, midx) && !load_bitmap(bitmap_git)) + if (!open_midx_bitmap_1(bitmap_git, midx) && !load_bitmap(r, bitmap_git)) return bitmap_git; free_bitmap_index(bitmap_git); @@ -534,13 +606,255 @@ struct include_data { struct bitmap *seen; }; +struct bitmap_lookup_table_triplet { + uint32_t commit_pos; + uint64_t offset; + uint32_t xor_row; +}; + +struct bitmap_lookup_table_xor_item { + struct object_id oid; + uint64_t offset; +}; + +/* + * Given a `triplet` struct pointer and pointer `p`, this + * function reads the triplet beginning at `p` into the struct. + * Note that this function assumes that there is enough memory + * left for filling the `triplet` struct from `p`. + */ +static int bitmap_lookup_table_get_triplet_by_pointer(struct bitmap_lookup_table_triplet *triplet, + const unsigned char *p) +{ + if (!triplet) + return -1; + + triplet->commit_pos = get_be32(p); + p += sizeof(uint32_t); + triplet->offset = get_be64(p); + p += sizeof(uint64_t); + triplet->xor_row = get_be32(p); + return 0; +} + +/* + * This function gets the raw triplet from `row`'th row in the + * lookup table and fills that data to the `triplet`. + */ +static int bitmap_lookup_table_get_triplet(struct bitmap_index *bitmap_git, + uint32_t pos, + struct bitmap_lookup_table_triplet *triplet) +{ + unsigned char *p = NULL; + if (pos >= bitmap_git->entry_count) + return error(_("corrupt bitmap lookup table: triplet position out of index")); + + p = bitmap_git->table_lookup + st_mult(pos, BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH); + + return bitmap_lookup_table_get_triplet_by_pointer(triplet, p); +} + +/* + * Searches for a matching triplet. `commit_pos` is a pointer + * to the wanted commit position value. `table_entry` points to + * a triplet in lookup table. The first 4 bytes of each + * triplet (pointed by `table_entry`) are compared with `*commit_pos`. + */ +static int triplet_cmp(const void *commit_pos, const void *table_entry) +{ + + uint32_t a = *(uint32_t *)commit_pos; + uint32_t b = get_be32(table_entry); + if (a > b) + return 1; + else if (a < b) + return -1; + + return 0; +} + +static uint32_t bitmap_bsearch_pos(struct bitmap_index *bitmap_git, + struct object_id *oid, + uint32_t *result) +{ + int found; + + if (bitmap_is_midx(bitmap_git)) + found = bsearch_midx(oid, bitmap_git->midx, result); + else + found = bsearch_pack(oid, bitmap_git->pack, result); + + return found; +} + +/* + * `bsearch_triplet_by_pos` function searches for the raw triplet + * having commit position same as `commit_pos` and fills `triplet` + * object from the raw triplet. Returns 1 on success and 0 on + * failure. + */ +static int bitmap_bsearch_triplet_by_pos(uint32_t commit_pos, + struct bitmap_index *bitmap_git, + struct bitmap_lookup_table_triplet *triplet) +{ + unsigned char *p = bsearch(&commit_pos, bitmap_git->table_lookup, bitmap_git->entry_count, + BITMAP_LOOKUP_TABLE_TRIPLET_WIDTH, triplet_cmp); + + if (!p) + return -1; + + return bitmap_lookup_table_get_triplet_by_pointer(triplet, p); +} + +static struct stored_bitmap *lazy_bitmap_for_commit(struct bitmap_index *bitmap_git, + struct commit *commit) +{ + uint32_t commit_pos, xor_row; + uint64_t offset; + int flags; + struct bitmap_lookup_table_triplet triplet; + struct object_id *oid = &commit->object.oid; + struct ewah_bitmap *bitmap; + struct stored_bitmap *xor_bitmap = NULL; + const int bitmap_header_size = 6; + static struct bitmap_lookup_table_xor_item *xor_items = NULL; + static size_t xor_items_nr = 0, xor_items_alloc = 0; + static int is_corrupt = 0; + int xor_flags; + khiter_t hash_pos; + struct bitmap_lookup_table_xor_item *xor_item; + + if (is_corrupt) + return NULL; + + if (!bitmap_bsearch_pos(bitmap_git, oid, &commit_pos)) + return NULL; + + if (bitmap_bsearch_triplet_by_pos(commit_pos, bitmap_git, &triplet) < 0) + return NULL; + + xor_items_nr = 0; + offset = triplet.offset; + xor_row = triplet.xor_row; + + while (xor_row != 0xffffffff) { + ALLOC_GROW(xor_items, xor_items_nr + 1, xor_items_alloc); + + if (xor_items_nr + 1 >= bitmap_git->entry_count) { + error(_("corrupt bitmap lookup table: xor chain exceeds entry count")); + goto corrupt; + } + + if (bitmap_lookup_table_get_triplet(bitmap_git, xor_row, &triplet) < 0) + goto corrupt; + + xor_item = &xor_items[xor_items_nr]; + xor_item->offset = triplet.offset; + + if (nth_bitmap_object_oid(bitmap_git, &xor_item->oid, triplet.commit_pos) < 0) { + error(_("corrupt bitmap lookup table: commit index %u out of range"), + triplet.commit_pos); + goto corrupt; + } + + hash_pos = kh_get_oid_map(bitmap_git->bitmaps, xor_item->oid); + + /* + * If desired bitmap is already stored, we don't need + * to iterate further. Because we know that bitmaps + * that are needed to be parsed to parse this bitmap + * has already been stored. So, assign this stored bitmap + * to the xor_bitmap. + */ + if (hash_pos < kh_end(bitmap_git->bitmaps) && + (xor_bitmap = kh_value(bitmap_git->bitmaps, hash_pos))) + break; + xor_items_nr++; + xor_row = triplet.xor_row; + } + + while (xor_items_nr) { + xor_item = &xor_items[xor_items_nr - 1]; + bitmap_git->map_pos = xor_item->offset; + if (bitmap_git->map_size - bitmap_git->map_pos < bitmap_header_size) { + error(_("corrupt ewah bitmap: truncated header for bitmap of commit \"%s\""), + oid_to_hex(&xor_item->oid)); + goto corrupt; + } + + bitmap_git->map_pos += sizeof(uint32_t) + sizeof(uint8_t); + xor_flags = read_u8(bitmap_git->map, &bitmap_git->map_pos); + bitmap = read_bitmap_1(bitmap_git); + + if (!bitmap) + goto corrupt; + + xor_bitmap = store_bitmap(bitmap_git, bitmap, &xor_item->oid, xor_bitmap, xor_flags); + xor_items_nr--; + } + + bitmap_git->map_pos = offset; + if (bitmap_git->map_size - bitmap_git->map_pos < bitmap_header_size) { + error(_("corrupt ewah bitmap: truncated header for bitmap of commit \"%s\""), + oid_to_hex(oid)); + goto corrupt; + } + + /* + * Don't bother reading the commit's index position or its xor + * offset: + * + * - The commit's index position is irrelevant to us, since + * load_bitmap_entries_v1 only uses it to learn the object + * id which is used to compute the hashmap's key. We already + * have an object id, so no need to look it up again. + * + * - The xor_offset is unusable for us, since it specifies how + * many entries previous to ours we should look at. This + * makes sense when reading the bitmaps sequentially (as in + * load_bitmap_entries_v1()), since we can keep track of + * each bitmap as we read them. + * + * But it can't work for us, since the bitmap's don't have a + * fixed size. So we learn the position of the xor'd bitmap + * from the commit table (and resolve it to a bitmap in the + * above if-statement). + * + * Instead, we can skip ahead and immediately read the flags and + * ewah bitmap. + */ + bitmap_git->map_pos += sizeof(uint32_t) + sizeof(uint8_t); + flags = read_u8(bitmap_git->map, &bitmap_git->map_pos); + bitmap = read_bitmap_1(bitmap_git); + + if (!bitmap) + goto corrupt; + + return store_bitmap(bitmap_git, bitmap, oid, xor_bitmap, flags); + +corrupt: + free(xor_items); + is_corrupt = 1; + return NULL; +} + struct ewah_bitmap *bitmap_for_commit(struct bitmap_index *bitmap_git, struct commit *commit) { khiter_t hash_pos = kh_get_oid_map(bitmap_git->bitmaps, commit->object.oid); - if (hash_pos >= kh_end(bitmap_git->bitmaps)) - return NULL; + if (hash_pos >= kh_end(bitmap_git->bitmaps)) { + struct stored_bitmap *bitmap = NULL; + if (!bitmap_git->table_lookup) + return NULL; + + /* this is a fairly hot codepath - no trace2_region please */ + /* NEEDSWORK: cache misses aren't recorded */ + bitmap = lazy_bitmap_for_commit(bitmap_git, commit); + if (!bitmap) + return NULL; + return lookup_stored_bitmap(bitmap); + } return lookup_stored_bitmap(kh_value(bitmap_git->bitmaps, hash_pos)); } @@ -642,7 +956,8 @@ static void show_object(struct object *object, const char *name, void *data_) bitmap_set(data->base, bitmap_pos); } -static void show_commit(struct commit *commit, void *data) +static void show_commit(struct commit *commit UNUSED, + void *data UNUSED) { } @@ -719,7 +1034,7 @@ static int add_commit_to_bitmap(struct bitmap_index *bitmap_git, if (!or_with) return 0; - if (*base == NULL) + if (!*base) *base = ewah_to_bitmap(or_with); else bitmap_or_ewah(*base, or_with); @@ -727,11 +1042,165 @@ static int add_commit_to_bitmap(struct bitmap_index *bitmap_git, return 1; } +static struct bitmap *fill_in_bitmap(struct bitmap_index *bitmap_git, + struct rev_info *revs, + struct bitmap *base, + struct bitmap *seen) +{ + struct include_data incdata; + struct bitmap_show_data show_data; + + if (!base) + base = bitmap_new(); + + incdata.bitmap_git = bitmap_git; + incdata.base = base; + incdata.seen = seen; + + revs->include_check = should_include; + revs->include_check_obj = should_include_obj; + revs->include_check_data = &incdata; + + if (prepare_revision_walk(revs)) + die(_("revision walk setup failed")); + + show_data.bitmap_git = bitmap_git; + show_data.base = base; + + traverse_commit_list(revs, show_commit, show_object, &show_data); + + revs->include_check = NULL; + revs->include_check_obj = NULL; + revs->include_check_data = NULL; + + return base; +} + +struct bitmap_boundary_cb { + struct bitmap_index *bitmap_git; + struct bitmap *base; + + struct object_array boundary; +}; + +static void show_boundary_commit(struct commit *commit, void *_data) +{ + struct bitmap_boundary_cb *data = _data; + + if (commit->object.flags & BOUNDARY) + add_object_array(&commit->object, "", &data->boundary); + + if (commit->object.flags & UNINTERESTING) { + if (bitmap_walk_contains(data->bitmap_git, data->base, + &commit->object.oid)) + return; + + add_commit_to_bitmap(data->bitmap_git, &data->base, commit); + } +} + +static void show_boundary_object(struct object *object UNUSED, + const char *name UNUSED, + void *data UNUSED) +{ + BUG("should not be called"); +} + +static struct bitmap *find_boundary_objects(struct bitmap_index *bitmap_git, + struct rev_info *revs, + struct object_list *roots) +{ + struct bitmap_boundary_cb cb; + struct object_list *root; + unsigned int i; + unsigned int tmp_blobs, tmp_trees, tmp_tags; + int any_missing = 0; + + cb.bitmap_git = bitmap_git; + cb.base = bitmap_new(); + object_array_init(&cb.boundary); + + revs->ignore_missing_links = 1; + + /* + * OR in any existing reachability bitmaps among `roots` into + * `cb.base`. + */ + for (root = roots; root; root = root->next) { + struct object *object = root->item; + if (object->type != OBJ_COMMIT || + bitmap_walk_contains(bitmap_git, cb.base, &object->oid)) + continue; + + if (add_commit_to_bitmap(bitmap_git, &cb.base, + (struct commit *)object)) + continue; + + any_missing = 1; + } + + if (!any_missing) + goto cleanup; + + tmp_blobs = revs->blob_objects; + tmp_trees = revs->tree_objects; + tmp_tags = revs->blob_objects; + revs->blob_objects = 0; + revs->tree_objects = 0; + revs->tag_objects = 0; + + /* + * We didn't have complete coverage of the roots. First setup a + * revision walk to (a) OR in any bitmaps that are UNINTERESTING + * between the tips and boundary, and (b) record the boundary. + */ + trace2_region_enter("pack-bitmap", "boundary-prepare", the_repository); + if (prepare_revision_walk(revs)) + die("revision walk setup failed"); + trace2_region_leave("pack-bitmap", "boundary-prepare", the_repository); + + trace2_region_enter("pack-bitmap", "boundary-traverse", the_repository); + revs->boundary = 1; + traverse_commit_list_filtered(revs, + show_boundary_commit, + show_boundary_object, + &cb, NULL); + revs->boundary = 0; + trace2_region_leave("pack-bitmap", "boundary-traverse", the_repository); + + revs->blob_objects = tmp_blobs; + revs->tree_objects = tmp_trees; + revs->tag_objects = tmp_tags; + + reset_revision_walk(); + clear_object_flags(UNINTERESTING); + + /* + * Then add the boundary commit(s) as fill-in traversal tips. + */ + trace2_region_enter("pack-bitmap", "boundary-fill-in", the_repository); + for (i = 0; i < cb.boundary.nr; i++) { + struct object *obj = cb.boundary.objects[i].item; + if (bitmap_walk_contains(bitmap_git, cb.base, &obj->oid)) + obj->flags |= SEEN; + else + add_pending_object(revs, obj, ""); + } + if (revs->pending.nr) + cb.base = fill_in_bitmap(bitmap_git, revs, cb.base, NULL); + trace2_region_leave("pack-bitmap", "boundary-fill-in", the_repository); + +cleanup: + object_array_clear(&cb.boundary); + revs->ignore_missing_links = 0; + + return cb.base; +} + static struct bitmap *find_objects(struct bitmap_index *bitmap_git, struct rev_info *revs, struct object_list *roots, - struct bitmap *seen, - struct list_objects_filter_options *filter) + struct bitmap *seen) { struct bitmap *base = NULL; int needs_walk = 0; @@ -763,7 +1232,7 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, * Best case scenario: We found bitmaps for all the roots, * so the resulting `or` bitmap has the full reachability analysis */ - if (not_mapped == NULL) + if (!not_mapped) return base; roots = not_mapped; @@ -794,35 +1263,23 @@ static struct bitmap *find_objects(struct bitmap_index *bitmap_git, } if (needs_walk) { - struct include_data incdata; - struct bitmap_show_data show_data; - - if (base == NULL) - base = bitmap_new(); - - incdata.bitmap_git = bitmap_git; - incdata.base = base; - incdata.seen = seen; - - revs->include_check = should_include; - revs->include_check_obj = should_include_obj; - revs->include_check_data = &incdata; - - if (prepare_revision_walk(revs)) - die("revision walk setup failed"); - - show_data.bitmap_git = bitmap_git; - show_data.base = base; - - traverse_commit_list_filtered(filter, revs, - show_commit, show_object, - &show_data, NULL); - - revs->include_check = NULL; - revs->include_check_obj = NULL; - revs->include_check_data = NULL; + /* + * This fill-in traversal may walk over some objects + * again, since we have already traversed in order to + * find the boundary. + * + * But this extra walk should be extremely cheap, since + * all commit objects are loaded into memory, and + * because we skip walking to parents that are + * UNINTERESTING, since it will be marked in the haves + * bitmap already (or it has an on-disk bitmap, since + * OR-ing it in covers all of its ancestors). + */ + base = fill_in_bitmap(bitmap_git, revs, base, seen); } + object_list_free(¬_mapped); + return base; } @@ -837,7 +1294,7 @@ static void show_extended_objects(struct bitmap_index *bitmap_git, for (i = 0; i < eindex->count; ++i) { struct object *obj; - if (!bitmap_get(objects, bitmap_num_objects(bitmap_git) + i)) + if (!bitmap_get(objects, st_add(bitmap_num_objects(bitmap_git), i))) continue; obj = eindex->objects[i]; @@ -1016,7 +1473,7 @@ static void filter_bitmap_exclude_type(struct bitmap_index *bitmap_git, * them individually. */ for (i = 0; i < eindex->count; i++) { - uint32_t pos = i + bitmap_num_objects(bitmap_git); + size_t pos = st_add(i, bitmap_num_objects(bitmap_git)); if (eindex->objects[i]->type == type && bitmap_get(to_filter, pos) && !bitmap_get(tips, pos)) @@ -1107,7 +1564,7 @@ static void filter_bitmap_blob_limit(struct bitmap_index *bitmap_git, } for (i = 0; i < eindex->count; i++) { - uint32_t pos = i + bitmap_num_objects(bitmap_git); + size_t pos = st_add(i, bitmap_num_objects(bitmap_git)); if (eindex->objects[i]->type == OBJ_BLOB && bitmap_get(to_filter, pos) && !bitmap_get(tips, pos) && @@ -1209,11 +1666,35 @@ static int can_filter_bitmap(struct list_objects_filter_options *filter) return !filter_bitmap(NULL, NULL, NULL, filter); } + +static void filter_packed_objects_from_bitmap(struct bitmap_index *bitmap_git, + struct bitmap *result) +{ + struct eindex *eindex = &bitmap_git->ext_index; + uint32_t objects_nr; + size_t i, pos; + + objects_nr = bitmap_num_objects(bitmap_git); + pos = objects_nr / BITS_IN_EWORD; + + if (pos > result->word_alloc) + pos = result->word_alloc; + + memset(result->words, 0x00, sizeof(eword_t) * pos); + for (i = pos * BITS_IN_EWORD; i < objects_nr; i++) + bitmap_unset(result, i); + + for (i = 0; i < eindex->count; ++i) { + if (has_object_pack(&eindex->objects[i]->oid)) + bitmap_unset(result, objects_nr + i); + } +} + struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, - struct list_objects_filter_options *filter, int filter_provided_objects) { unsigned int i; + int use_boundary_traversal; struct object_list *wants = NULL; struct object_list *haves = NULL; @@ -1231,7 +1712,7 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, if (revs->prune) return NULL; - if (!can_filter_bitmap(filter)) + if (!can_filter_bitmap(&revs->filter)) return NULL; /* try to open a bitmapped pack, but don't parse it yet @@ -1264,13 +1745,21 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, object_list_insert(object, &wants); } - /* - * if we have a HAVES list, but none of those haves is contained - * in the packfile that has a bitmap, we don't have anything to - * optimize here - */ - if (haves && !in_bitmapped_pack(bitmap_git, haves)) - goto cleanup; + use_boundary_traversal = git_env_bool(GIT_TEST_PACK_USE_BITMAP_BOUNDARY_TRAVERSAL, -1); + if (use_boundary_traversal < 0) { + prepare_repo_settings(revs->repo); + use_boundary_traversal = revs->repo->settings.pack_use_bitmap_boundary_traversal; + } + + if (!use_boundary_traversal) { + /* + * if we have a HAVES list, but none of those haves is contained + * in the packfile that has a bitmap, we don't have anything to + * optimize here + */ + if (haves && !in_bitmapped_pack(bitmap_git, haves)) + goto cleanup; + } /* if we don't want anything, we're done here */ if (!wants) @@ -1281,24 +1770,36 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, * from disk. this is the point of no return; after this the rev_list * becomes invalidated and we must perform the revwalk through bitmaps */ - if (load_bitmap(bitmap_git) < 0) + if (load_bitmap(revs->repo, bitmap_git) < 0) goto cleanup; - object_array_clear(&revs->pending); + if (!use_boundary_traversal) + object_array_clear(&revs->pending); if (haves) { - revs->ignore_missing_links = 1; - haves_bitmap = find_objects(bitmap_git, revs, haves, NULL, - filter); - reset_revision_walk(); - revs->ignore_missing_links = 0; + if (use_boundary_traversal) { + trace2_region_enter("pack-bitmap", "haves/boundary", the_repository); + haves_bitmap = find_boundary_objects(bitmap_git, revs, haves); + trace2_region_leave("pack-bitmap", "haves/boundary", the_repository); + } else { + trace2_region_enter("pack-bitmap", "haves/classic", the_repository); + revs->ignore_missing_links = 1; + haves_bitmap = find_objects(bitmap_git, revs, haves, NULL); + reset_revision_walk(); + revs->ignore_missing_links = 0; + trace2_region_leave("pack-bitmap", "haves/classic", the_repository); + } - if (haves_bitmap == NULL) + if (!haves_bitmap) BUG("failed to perform bitmap walk"); } - wants_bitmap = find_objects(bitmap_git, revs, wants, haves_bitmap, - filter); + if (use_boundary_traversal) { + object_array_clear(&revs->pending); + reset_revision_walk(); + } + + wants_bitmap = find_objects(bitmap_git, revs, wants, haves_bitmap); if (!wants_bitmap) BUG("failed to perform bitmap walk"); @@ -1306,8 +1807,13 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs, if (haves_bitmap) bitmap_and_not(wants_bitmap, haves_bitmap); - filter_bitmap(bitmap_git, (filter && filter_provided_objects) ? NULL : wants, - wants_bitmap, filter); + filter_bitmap(bitmap_git, + (revs->filter.choice && filter_provided_objects) ? NULL : wants, + wants_bitmap, + &revs->filter); + + if (revs->unpacked) + filter_packed_objects_from_bitmap(bitmap_git, wants_bitmap); bitmap_git->result = wants_bitmap; bitmap_git->haves = haves_bitmap; @@ -1328,8 +1834,10 @@ cleanup: * -1 means "stop trying further objects"; 0 means we may or may not have * reused, but you can keep feeding bits. */ -static int try_partial_reuse(struct packed_git *pack, - size_t pos, +static int try_partial_reuse(struct bitmap_index *bitmap_git, + struct bitmapped_pack *pack, + size_t bitmap_pos, + uint32_t pack_pos, struct bitmap *reuse, struct pack_window **w_curs) { @@ -1337,40 +1845,18 @@ static int try_partial_reuse(struct packed_git *pack, enum object_type type; unsigned long size; - /* - * try_partial_reuse() is called either on (a) objects in the - * bitmapped pack (in the case of a single-pack bitmap) or (b) - * objects in the preferred pack of a multi-pack bitmap. - * Importantly, the latter can pretend as if only a single pack - * exists because: - * - * - The first pack->num_objects bits of a MIDX bitmap are - * reserved for the preferred pack, and - * - * - Ties due to duplicate objects are always resolved in - * favor of the preferred pack. - * - * Therefore we do not need to ever ask the MIDX for its copy of - * an object by OID, since it will always select it from the - * preferred pack. Likewise, the selected copy of the base - * object for any deltas will reside in the same pack. - * - * This means that we can reuse pos when looking up the bit in - * the reuse bitmap, too, since bits corresponding to the - * preferred pack precede all bits from other packs. - */ - - if (pos >= pack->num_objects) - return -1; /* not actually in the pack or MIDX preferred pack */ + if (pack_pos >= pack->p->num_objects) + return -1; /* not actually in the pack */ - offset = delta_obj_offset = pack_pos_to_offset(pack, pos); - type = unpack_object_header(pack, w_curs, &offset, &size); + offset = delta_obj_offset = pack_pos_to_offset(pack->p, pack_pos); + type = unpack_object_header(pack->p, w_curs, &offset, &size); if (type < 0) return -1; /* broken packfile, punt */ if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA) { off_t base_offset; uint32_t base_pos; + uint32_t base_bitmap_pos; /* * Find the position of the base object so we can look it up @@ -1380,24 +1866,48 @@ static int try_partial_reuse(struct packed_git *pack, * and the normal slow path will complain about it in * more detail. */ - base_offset = get_delta_base(pack, w_curs, &offset, type, + base_offset = get_delta_base(pack->p, w_curs, &offset, type, delta_obj_offset); if (!base_offset) return 0; - if (offset_to_pack_pos(pack, base_offset, &base_pos) < 0) - return 0; - /* - * We assume delta dependencies always point backwards. This - * lets us do a single pass, and is basically always true - * due to the way OFS_DELTAs work. You would not typically - * find REF_DELTA in a bitmapped pack, since we only bitmap - * packs we write fresh, and OFS_DELTA is the default). But - * let's double check to make sure the pack wasn't written with - * odd parameters. - */ - if (base_pos >= pos) - return 0; + offset_to_pack_pos(pack->p, base_offset, &base_pos); + + if (bitmap_is_midx(bitmap_git)) { + /* + * Cross-pack deltas are rejected for now, but could + * theoretically be supported in the future. + * + * We would need to ensure that we're sending both + * halves of the delta/base pair, regardless of whether + * or not the two cross a pack boundary. If they do, + * then we must convert the delta to an REF_DELTA to + * refer back to the base in the other pack. + * */ + if (midx_pair_to_pack_pos(bitmap_git->midx, + pack->pack_int_id, + base_offset, + &base_bitmap_pos) < 0) { + return 0; + } + } else { + if (offset_to_pack_pos(pack->p, base_offset, + &base_pos) < 0) + return 0; + /* + * We assume delta dependencies always point backwards. + * This lets us do a single pass, and is basically + * always true due to the way OFS_DELTAs work. You would + * not typically find REF_DELTA in a bitmapped pack, + * since we only bitmap packs we write fresh, and + * OFS_DELTA is the default). But let's double check to + * make sure the pack wasn't written with odd + * parameters. + */ + if (base_pos >= pack_pos) + return 0; + base_bitmap_pos = pack->bitmap_pos + base_pos; + } /* * And finally, if we're not sending the base as part of our @@ -1407,76 +1917,89 @@ static int try_partial_reuse(struct packed_git *pack, * to REF_DELTA on the fly. Better to just let the normal * object_entry code path handle it. */ - if (!bitmap_get(reuse, base_pos)) + if (!bitmap_get(reuse, base_bitmap_pos)) return 0; } /* * If we got here, then the object is OK to reuse. Mark it. */ - bitmap_set(reuse, pos); + bitmap_set(reuse, bitmap_pos); return 0; } -static uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git) +static void reuse_partial_packfile_from_bitmap_1(struct bitmap_index *bitmap_git, + struct bitmapped_pack *pack, + struct bitmap *reuse) { - struct multi_pack_index *m = bitmap_git->midx; - if (!m) - BUG("midx_preferred_pack: requires non-empty MIDX"); - return nth_midxed_pack_int_id(m, pack_pos_to_midx(bitmap_git->midx, 0)); -} - -int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, - struct packed_git **packfile_out, - uint32_t *entries, - struct bitmap **reuse_out) -{ - struct packed_git *pack; struct bitmap *result = bitmap_git->result; - struct bitmap *reuse; struct pack_window *w_curs = NULL; - size_t i = 0; - uint32_t offset; - uint32_t objects_nr; + size_t pos = pack->bitmap_pos / BITS_IN_EWORD; - assert(result); + if (!pack->bitmap_pos) { + /* + * If we're processing the first (in the case of a MIDX, the + * preferred pack) or the only (in the case of single-pack + * bitmaps) pack, then we can reuse whole words at a time. + * + * This is because we know that any deltas in this range *must* + * have their bases chosen from the same pack, since: + * + * - In the single pack case, there is no other pack to choose + * them from. + * + * - In the MIDX case, the first pack is the preferred pack, so + * all ties are broken in favor of that pack (i.e. the one + * we're currently processing). So any duplicate bases will be + * resolved in favor of the pack we're processing. + */ + while (pos < result->word_alloc && + pos < pack->bitmap_nr / BITS_IN_EWORD && + result->words[pos] == (eword_t)~0) + pos++; + memset(reuse->words, 0xFF, pos * sizeof(eword_t)); + } - load_reverse_index(bitmap_git); + for (; pos < result->word_alloc; pos++) { + eword_t word = result->words[pos]; + size_t offset; - if (bitmap_is_midx(bitmap_git)) - pack = bitmap_git->midx->packs[midx_preferred_pack(bitmap_git)]; - else - pack = bitmap_git->pack; - objects_nr = pack->num_objects; + for (offset = 0; offset < BITS_IN_EWORD; offset++) { + size_t bit_pos; + uint32_t pack_pos; - while (i < result->word_alloc && result->words[i] == (eword_t)~0) - i++; + if (word >> offset == 0) + break; - /* - * Don't mark objects not in the packfile or preferred pack. This bitmap - * marks objects eligible for reuse, but the pack-reuse code only - * understands how to reuse a single pack. Since the preferred pack is - * guaranteed to have all bases for its deltas (in a multi-pack bitmap), - * we use it instead of another pack. In single-pack bitmaps, the choice - * is made for us. - */ - if (i > objects_nr / BITS_IN_EWORD) - i = objects_nr / BITS_IN_EWORD; + offset += ewah_bit_ctz64(word >> offset); - reuse = bitmap_word_alloc(i); - memset(reuse->words, 0xFF, i * sizeof(eword_t)); + bit_pos = pos * BITS_IN_EWORD + offset; + if (bit_pos < pack->bitmap_pos) + continue; + if (bit_pos >= pack->bitmap_pos + pack->bitmap_nr) + goto done; - for (; i < result->word_alloc; ++i) { - eword_t word = result->words[i]; - size_t pos = (i * BITS_IN_EWORD); + if (bitmap_is_midx(bitmap_git)) { + uint32_t midx_pos; + off_t ofs; - for (offset = 0; offset < BITS_IN_EWORD; ++offset) { - if ((word >> offset) == 0) - break; + midx_pos = pack_pos_to_midx(bitmap_git->midx, bit_pos); + ofs = nth_midxed_offset(bitmap_git->midx, midx_pos); - offset += ewah_bit_ctz64(word >> offset); - if (try_partial_reuse(pack, pos + offset, - reuse, &w_curs) < 0) { + if (offset_to_pack_pos(pack->p, ofs, &pack_pos) < 0) + BUG("could not find object in pack %s " + "at offset %"PRIuMAX" in MIDX", + pack_basename(pack->p), (uintmax_t)ofs); + } else { + pack_pos = cast_size_t_to_uint32_t(st_sub(bit_pos, pack->bitmap_pos)); + if (pack_pos >= pack->p->num_objects) + BUG("advanced beyond the end of pack %s (%"PRIuMAX" > %"PRIu32")", + pack_basename(pack->p), (uintmax_t)pack_pos, + pack->p->num_objects); + } + + if (try_partial_reuse(bitmap_git, pack, bit_pos, + pack_pos, reuse, &w_curs) < 0) { /* * try_partial_reuse indicated we couldn't reuse * any bits, so there is no point in trying more @@ -1493,11 +2016,98 @@ int reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, done: unuse_pack(&w_curs); +} - *entries = bitmap_popcount(reuse); - if (!*entries) { - bitmap_free(reuse); +static int bitmapped_pack_cmp(const void *va, const void *vb) +{ + const struct bitmapped_pack *a = va; + const struct bitmapped_pack *b = vb; + + if (a->bitmap_pos < b->bitmap_pos) return -1; + if (a->bitmap_pos > b->bitmap_pos) + return 1; + return 0; +} + +void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git, + struct bitmapped_pack **packs_out, + size_t *packs_nr_out, + struct bitmap **reuse_out, + int multi_pack_reuse) +{ + struct repository *r = the_repository; + struct bitmapped_pack *packs = NULL; + struct bitmap *result = bitmap_git->result; + struct bitmap *reuse; + size_t i; + size_t packs_nr = 0, packs_alloc = 0; + size_t word_alloc; + uint32_t objects_nr = 0; + + assert(result); + + load_reverse_index(r, bitmap_git); + + if (!bitmap_is_midx(bitmap_git) || !bitmap_git->midx->chunk_bitmapped_packs) + multi_pack_reuse = 0; + + if (multi_pack_reuse) { + for (i = 0; i < bitmap_git->midx->num_packs; i++) { + struct bitmapped_pack pack; + if (nth_bitmapped_pack(r, bitmap_git->midx, &pack, i) < 0) { + warning(_("unable to load pack: '%s', disabling pack-reuse"), + bitmap_git->midx->pack_names[i]); + free(packs); + return; + } + + if (!pack.bitmap_nr) + continue; + + ALLOC_GROW(packs, packs_nr + 1, packs_alloc); + memcpy(&packs[packs_nr++], &pack, sizeof(pack)); + + objects_nr += pack.p->num_objects; + } + + QSORT(packs, packs_nr, bitmapped_pack_cmp); + } else { + struct packed_git *pack; + + if (bitmap_is_midx(bitmap_git)) { + uint32_t preferred_pack_pos; + + if (midx_preferred_pack(bitmap_git->midx, &preferred_pack_pos) < 0) { + warning(_("unable to compute preferred pack, disabling pack-reuse")); + return; + } + + pack = bitmap_git->midx->packs[preferred_pack_pos]; + } else { + pack = bitmap_git->pack; + } + + ALLOC_GROW(packs, packs_nr + 1, packs_alloc); + packs[packs_nr].p = pack; + packs[packs_nr].bitmap_nr = pack->num_objects; + packs[packs_nr].bitmap_pos = 0; + + objects_nr = packs[packs_nr++].bitmap_nr; + } + + word_alloc = objects_nr / BITS_IN_EWORD; + if (objects_nr % BITS_IN_EWORD) + word_alloc++; + reuse = bitmap_word_alloc(word_alloc); + + for (i = 0; i < packs_nr; i++) + reuse_partial_packfile_from_bitmap_1(bitmap_git, &packs[i], reuse); + + if (bitmap_is_empty(reuse)) { + free(packs); + bitmap_free(reuse); + return; } /* @@ -1505,9 +2115,9 @@ done: * need to be handled separately. */ bitmap_and_not(result, reuse); - *packfile_out = pack; + *packs_out = packs; + *packs_nr_out = packs_nr; *reuse_out = reuse; - return 0; } int bitmap_walk_contains(struct bitmap_index *bitmap_git, @@ -1558,7 +2168,8 @@ static uint32_t count_object_type(struct bitmap_index *bitmap_git, for (i = 0; i < eindex->count; ++i) { if (eindex->objects[i]->type == type && - bitmap_get(objects, bitmap_num_objects(bitmap_git) + i)) + bitmap_get(objects, + st_add(bitmap_num_objects(bitmap_git), i))) count++; } @@ -1619,21 +2230,22 @@ static void test_bitmap_type(struct bitmap_test_data *tdata, } if (bitmap_type == OBJ_NONE) - die("object %s not found in type bitmaps", + die(_("object '%s' not found in type bitmaps"), oid_to_hex(&obj->oid)); if (bitmaps_nr > 1) - die("object %s does not have a unique type", + die(_("object '%s' does not have a unique type"), oid_to_hex(&obj->oid)); if (bitmap_type != obj->type) - die("object %s: real type %s, expected: %s", + die(_("object '%s': real type '%s', expected: '%s'"), oid_to_hex(&obj->oid), type_name(obj->type), type_name(bitmap_type)); } -static void test_show_object(struct object *object, const char *name, +static void test_show_object(struct object *object, + const char *name UNUSED, void *data) { struct bitmap_test_data *tdata = data; @@ -1641,7 +2253,7 @@ static void test_show_object(struct object *object, const char *name, bitmap_pos = bitmap_position(tdata->bitmap_git, &object->oid); if (bitmap_pos < 0) - die("Object not in bitmap: %s\n", oid_to_hex(&object->oid)); + die(_("object not in bitmap: '%s'"), oid_to_hex(&object->oid)); test_bitmap_type(tdata, object, bitmap_pos); bitmap_set(tdata->base, bitmap_pos); @@ -1656,7 +2268,7 @@ static void test_show_commit(struct commit *commit, void *data) bitmap_pos = bitmap_position(tdata->bitmap_git, &commit->object.oid); if (bitmap_pos < 0) - die("Object not in bitmap: %s\n", oid_to_hex(&commit->object.oid)); + die(_("object not in bitmap: '%s'"), oid_to_hex(&commit->object.oid)); test_bitmap_type(tdata, &commit->object, bitmap_pos); bitmap_set(tdata->base, bitmap_pos); @@ -1673,26 +2285,28 @@ void test_bitmap_walk(struct rev_info *revs) struct ewah_bitmap *bm; if (!(bitmap_git = prepare_bitmap_git(revs->repo))) - die("failed to load bitmap indexes"); + die(_("failed to load bitmap indexes")); if (revs->pending.nr != 1) - die("you must specify exactly one commit to test"); + die(_("you must specify exactly one commit to test")); - fprintf(stderr, "Bitmap v%d test (%d entries loaded)\n", - bitmap_git->version, bitmap_git->entry_count); + fprintf_ln(stderr, "Bitmap v%d test (%d entries%s)", + bitmap_git->version, + bitmap_git->entry_count, + bitmap_git->table_lookup ? "" : " loaded"); root = revs->pending.objects[0].item; bm = bitmap_for_commit(bitmap_git, (struct commit *)root); if (bm) { - fprintf(stderr, "Found bitmap for %s. %d bits / %08x checksum\n", + fprintf_ln(stderr, "Found bitmap for '%s'. %d bits / %08x checksum", oid_to_hex(&root->oid), (int)bm->bit_size, ewah_checksum(bm)); result = ewah_to_bitmap(bm); } - if (result == NULL) - die("Commit %s doesn't have an indexed bitmap", oid_to_hex(&root->oid)); + if (!result) + die(_("commit '%s' doesn't have an indexed bitmap"), oid_to_hex(&root->oid)); revs->tag_objects = 1; revs->tree_objects = 1; @@ -1701,7 +2315,7 @@ void test_bitmap_walk(struct rev_info *revs) result_popcnt = bitmap_popcount(result); if (prepare_revision_walk(revs)) - die("revision walk setup failed"); + die(_("revision walk setup failed")); tdata.bitmap_git = bitmap_git; tdata.base = bitmap_new(); @@ -1717,24 +2331,39 @@ void test_bitmap_walk(struct rev_info *revs) stop_progress(&tdata.prg); if (bitmap_equals(result, tdata.base)) - fprintf(stderr, "OK!\n"); + fprintf_ln(stderr, "OK!"); else - die("mismatch in bitmap results"); - + die(_("mismatch in bitmap results")); + + bitmap_free(result); + bitmap_free(tdata.base); + bitmap_free(tdata.commits); + bitmap_free(tdata.trees); + bitmap_free(tdata.blobs); + bitmap_free(tdata.tags); free_bitmap_index(bitmap_git); } int test_bitmap_commits(struct repository *r) { - struct bitmap_index *bitmap_git = prepare_bitmap_git(r); struct object_id oid; MAYBE_UNUSED void *value; + struct bitmap_index *bitmap_git = prepare_bitmap_git(r); if (!bitmap_git) - die("failed to load bitmap indexes"); + die(_("failed to load bitmap indexes")); + + /* + * As this function is only used to print bitmap selected + * commits, we don't have to read the commit table. + */ + if (bitmap_git->table_lookup) { + if (load_bitmap_entries_v1(bitmap_git) < 0) + die(_("failed to load bitmap indexes")); + } kh_foreach(bitmap_git->bitmaps, oid, value, { - printf("%s\n", oid_to_hex(&oid)); + printf_ln("%s", oid_to_hex(&oid)); }); free_bitmap_index(bitmap_git); @@ -1742,6 +2371,33 @@ int test_bitmap_commits(struct repository *r) return 0; } +int test_bitmap_hashes(struct repository *r) +{ + struct bitmap_index *bitmap_git = prepare_bitmap_git(r); + struct object_id oid; + uint32_t i, index_pos; + + if (!bitmap_git || !bitmap_git->hashes) + goto cleanup; + + for (i = 0; i < bitmap_num_objects(bitmap_git); i++) { + if (bitmap_is_midx(bitmap_git)) + index_pos = pack_pos_to_midx(bitmap_git->midx, i); + else + index_pos = pack_pos_to_index(bitmap_git->pack, i); + + nth_bitmap_object_oid(bitmap_git, &oid, index_pos); + + printf_ln("%s %"PRIu32"", + oid_to_hex(&oid), get_be32(bitmap_git->hashes + index_pos)); + } + +cleanup: + free_bitmap_index(bitmap_git); + + return 0; +} + int rebuild_bitmap(const uint32_t *reposition, struct ewah_bitmap *source, struct bitmap *dest) @@ -1776,12 +2432,13 @@ int rebuild_bitmap(const uint32_t *reposition, uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git, struct packing_data *mapping) { + struct repository *r = the_repository; uint32_t i, num_objects; uint32_t *reposition; if (!bitmap_is_midx(bitmap_git)) - load_reverse_index(bitmap_git); - else if (load_midx_revindex(bitmap_git->midx) < 0) + load_reverse_index(r, bitmap_git); + else if (load_midx_revindex(bitmap_git->midx)) BUG("rebuild_existing_bitmaps: missing required rev-cache " "extension"); @@ -1791,18 +2448,20 @@ uint32_t *create_bitmap_mapping(struct bitmap_index *bitmap_git, for (i = 0; i < num_objects; ++i) { struct object_id oid; struct object_entry *oe; + uint32_t index_pos; if (bitmap_is_midx(bitmap_git)) - nth_midxed_object_oid(&oid, - bitmap_git->midx, - pack_pos_to_midx(bitmap_git->midx, i)); + index_pos = pack_pos_to_midx(bitmap_git->midx, i); else - nth_packed_object_id(&oid, bitmap_git->pack, - pack_pos_to_index(bitmap_git->pack, i)); + index_pos = pack_pos_to_index(bitmap_git->pack, i); + nth_bitmap_object_oid(bitmap_git, &oid, index_pos); oe = packlist_find(mapping, &oid); - if (oe) + if (oe) { reposition[i] = oe_in_pack_pos(mapping, oe) + 1; + if (bitmap_git->hashes && !oe->hash) + oe->hash = get_be32(bitmap_git->hashes + index_pos); + } } return reposition; @@ -1819,9 +2478,17 @@ void free_bitmap_index(struct bitmap_index *b) ewah_pool_free(b->trees); ewah_pool_free(b->blobs); ewah_pool_free(b->tags); + if (b->bitmaps) { + struct stored_bitmap *sb; + kh_foreach_value(b->bitmaps, sb, { + ewah_pool_free(sb->root); + free(sb); + }); + } kh_destroy_oid_map(b->bitmaps); free(b->ext_index.objects); free(b->ext_index.hashes); + kh_destroy_oid_pos(b->ext_index.positions); bitmap_free(b->result); bitmap_free(b->haves); if (bitmap_is_midx(b)) { @@ -1884,7 +2551,7 @@ static off_t get_disk_usage_for_type(struct bitmap_index *bitmap_git, struct object_id oid; nth_midxed_object_oid(&oid, bitmap_git->midx, midx_pos); - die(_("could not find %s in pack %s at offset %"PRIuMAX), + die(_("could not find '%s' in pack '%s' at offset %"PRIuMAX), oid_to_hex(&oid), pack->pack_name, (uintmax_t)offset); @@ -1916,11 +2583,12 @@ static off_t get_disk_usage_for_extended(struct bitmap_index *bitmap_git) for (i = 0; i < eindex->count; i++) { struct object *obj = eindex->objects[i]; - if (!bitmap_get(result, bitmap_num_objects(bitmap_git) + i)) + if (!bitmap_get(result, + st_add(bitmap_num_objects(bitmap_git), i))) continue; if (oid_object_info_extended(the_repository, &obj->oid, &oi, 0) < 0) - die(_("unable to get disk usage of %s"), + die(_("unable to get disk usage of '%s'"), oid_to_hex(&obj->oid)); total += object_size; @@ -1953,7 +2621,11 @@ int bitmap_is_midx(struct bitmap_index *bitmap_git) const struct string_list *bitmap_preferred_tips(struct repository *r) { - return repo_config_get_value_multi(r, "pack.preferbitmaptips"); + const struct string_list *dest; + + if (!repo_config_get_string_multi(r, "pack.preferbitmaptips", &dest)) + return dest; + return NULL; } int bitmap_is_preferred_refname(struct repository *r, const char *refname) @@ -1971,3 +2643,48 @@ int bitmap_is_preferred_refname(struct repository *r, const char *refname) return 0; } + +static int verify_bitmap_file(const char *name) +{ + struct stat st; + unsigned char *data; + int fd = git_open(name); + int res = 0; + + /* It is OK to not have the file. */ + if (fd < 0 || fstat(fd, &st)) { + if (fd >= 0) + close(fd); + return 0; + } + + data = xmmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + if (!hashfile_checksum_valid(data, st.st_size)) + res = error(_("bitmap file '%s' has invalid checksum"), + name); + + munmap(data, st.st_size); + return res; +} + +int verify_bitmap_files(struct repository *r) +{ + int res = 0; + + for (struct multi_pack_index *m = get_multi_pack_index(r); + m; m = m->next) { + char *midx_bitmap_name = midx_bitmap_filename(m); + res |= verify_bitmap_file(midx_bitmap_name); + free(midx_bitmap_name); + } + + for (struct packed_git *p = get_all_packs(r); + p; p = p->next) { + char *pack_bitmap_name = pack_bitmap_filename(p); + res |= verify_bitmap_file(pack_bitmap_name); + free(pack_bitmap_name); + } + + return res; +} |