summaryrefslogtreecommitdiff
path: root/midx.c
diff options
context:
space:
mode:
authorDerrick Stolee <stolee@gmail.com>2018-07-12 19:39:27 (GMT)
committerJunio C Hamano <gitster@pobox.com>2018-07-20 18:27:28 (GMT)
commit32f3c541e3c8b3ad225c36b1430c9483d0e427ad (patch)
tree061563137f962ea4487730093a29b684b622b006 /midx.c
parent396f257018a6031c4eb0803d4693441ad8a9fd10 (diff)
downloadgit-32f3c541e3c8b3ad225c36b1430c9483d0e427ad.zip
git-32f3c541e3c8b3ad225c36b1430c9483d0e427ad.tar.gz
git-32f3c541e3c8b3ad225c36b1430c9483d0e427ad.tar.bz2
multi-pack-index: write pack names in chunk
The multi-pack-index needs to track which packfiles it indexes. Store these in our first required chunk. Since filenames are not well structured, add padding to keep good alignment in later chunks. Modify the 'git multi-pack-index read' subcommand to output the existence of the pack-file name chunk. Modify t5319-multi-pack-index.sh to reflect this new output and the new expected number of chunks. Defense in depth: A pattern we are using in the multi-pack-index feature is to verify the data as we write it. We want to ensure we never write invalid data to the multi-pack-index. There are many checks that verify that the values we are writing fit the format definitions. This mainly helps developers while working on the feature, but it can also identify issues that only appear when dealing with very large data sets. These large sets are hard to encode into test cases. Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'midx.c')
-rw-r--r--midx.c174
1 files changed, 172 insertions, 2 deletions
diff --git a/midx.c b/midx.c
index f742d7c..ca7a32b 100644
--- a/midx.c
+++ b/midx.c
@@ -17,6 +17,11 @@
#define MIDX_HASH_LEN 20
#define MIDX_MIN_SIZE (MIDX_HEADER_SIZE + MIDX_HASH_LEN)
+#define MIDX_MAX_CHUNKS 1
+#define MIDX_CHUNK_ALIGNMENT 4
+#define MIDX_CHUNKID_PACKNAMES 0x504e414d /* "PNAM" */
+#define MIDX_CHUNKLOOKUP_WIDTH (sizeof(uint32_t) + sizeof(uint64_t))
+
static char *get_midx_filename(const char *object_dir)
{
return xstrfmt("%s/pack/multi-pack-index", object_dir);
@@ -31,6 +36,7 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir)
void *midx_map = NULL;
uint32_t hash_version;
char *midx_name = get_midx_filename(object_dir);
+ uint32_t i;
fd = git_open(midx_name);
@@ -82,6 +88,33 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir)
m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS);
+ for (i = 0; i < m->num_chunks; i++) {
+ uint32_t chunk_id = get_be32(m->data + MIDX_HEADER_SIZE +
+ MIDX_CHUNKLOOKUP_WIDTH * i);
+ uint64_t chunk_offset = get_be64(m->data + MIDX_HEADER_SIZE + 4 +
+ MIDX_CHUNKLOOKUP_WIDTH * i);
+
+ switch (chunk_id) {
+ case MIDX_CHUNKID_PACKNAMES:
+ m->chunk_pack_names = m->data + chunk_offset;
+ break;
+
+ case 0:
+ die(_("terminating multi-pack-index chunk id appears earlier than expected"));
+ break;
+
+ default:
+ /*
+ * Do nothing on unrecognized chunks, allowing future
+ * extensions to add optional chunks.
+ */
+ break;
+ }
+ }
+
+ if (!m->chunk_pack_names)
+ die(_("multi-pack-index missing required pack-name chunk"));
+
return m;
cleanup_fail:
@@ -113,8 +146,11 @@ static size_t write_midx_header(struct hashfile *f,
struct pack_list {
struct packed_git **list;
+ char **names;
uint32_t nr;
uint32_t alloc_list;
+ uint32_t alloc_names;
+ size_t pack_name_concat_len;
};
static void add_pack_to_midx(const char *full_path, size_t full_path_len,
@@ -124,6 +160,7 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
if (ends_with(file_name, ".idx")) {
ALLOC_GROW(packs->list, packs->nr + 1, packs->alloc_list);
+ ALLOC_GROW(packs->names, packs->nr + 1, packs->alloc_names);
packs->list[packs->nr] = add_packed_git(full_path,
full_path_len,
@@ -134,18 +171,89 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,
return;
}
+ packs->names[packs->nr] = xstrdup(file_name);
+ packs->pack_name_concat_len += strlen(file_name) + 1;
packs->nr++;
}
}
+struct pack_pair {
+ uint32_t pack_int_id;
+ char *pack_name;
+};
+
+static int pack_pair_compare(const void *_a, const void *_b)
+{
+ struct pack_pair *a = (struct pack_pair *)_a;
+ struct pack_pair *b = (struct pack_pair *)_b;
+ return strcmp(a->pack_name, b->pack_name);
+}
+
+static void sort_packs_by_name(char **pack_names, uint32_t nr_packs, uint32_t *perm)
+{
+ uint32_t i;
+ struct pack_pair *pairs;
+
+ ALLOC_ARRAY(pairs, nr_packs);
+
+ for (i = 0; i < nr_packs; i++) {
+ pairs[i].pack_int_id = i;
+ pairs[i].pack_name = pack_names[i];
+ }
+
+ QSORT(pairs, nr_packs, pack_pair_compare);
+
+ for (i = 0; i < nr_packs; i++) {
+ pack_names[i] = pairs[i].pack_name;
+ perm[pairs[i].pack_int_id] = i;
+ }
+
+ free(pairs);
+}
+
+static size_t write_midx_pack_names(struct hashfile *f,
+ char **pack_names,
+ uint32_t num_packs)
+{
+ uint32_t i;
+ unsigned char padding[MIDX_CHUNK_ALIGNMENT];
+ size_t written = 0;
+
+ for (i = 0; i < num_packs; i++) {
+ size_t writelen = strlen(pack_names[i]) + 1;
+
+ if (i && strcmp(pack_names[i], pack_names[i - 1]) <= 0)
+ BUG("incorrect pack-file order: %s before %s",
+ pack_names[i - 1],
+ pack_names[i]);
+
+ hashwrite(f, pack_names[i], writelen);
+ written += writelen;
+ }
+
+ /* add padding to be aligned */
+ i = MIDX_CHUNK_ALIGNMENT - (written % MIDX_CHUNK_ALIGNMENT);
+ if (i < MIDX_CHUNK_ALIGNMENT) {
+ memset(padding, 0, sizeof(padding));
+ hashwrite(f, padding, i);
+ written += i;
+ }
+
+ return written;
+}
+
int write_midx_file(const char *object_dir)
{
- unsigned char num_chunks = 0;
+ unsigned char cur_chunk, num_chunks = 0;
char *midx_name;
uint32_t i;
struct hashfile *f = NULL;
struct lock_file lk;
struct pack_list packs;
+ uint32_t *pack_perm = NULL;
+ uint64_t written = 0;
+ uint32_t chunk_ids[MIDX_MAX_CHUNKS + 1];
+ uint64_t chunk_offsets[MIDX_MAX_CHUNKS + 1];
midx_name = get_midx_filename(object_dir);
if (safe_create_leading_directories(midx_name)) {
@@ -156,16 +264,76 @@ int write_midx_file(const char *object_dir)
packs.nr = 0;
packs.alloc_list = 16;
+ packs.alloc_names = 16;
packs.list = NULL;
+ packs.pack_name_concat_len = 0;
ALLOC_ARRAY(packs.list, packs.alloc_list);
+ ALLOC_ARRAY(packs.names, packs.alloc_names);
for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &packs);
+ if (packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT)
+ packs.pack_name_concat_len += MIDX_CHUNK_ALIGNMENT -
+ (packs.pack_name_concat_len % MIDX_CHUNK_ALIGNMENT);
+
+ ALLOC_ARRAY(pack_perm, packs.nr);
+ sort_packs_by_name(packs.names, packs.nr, pack_perm);
+
hold_lock_file_for_update(&lk, midx_name, LOCK_DIE_ON_ERROR);
f = hashfd(lk.tempfile->fd, lk.tempfile->filename.buf);
FREE_AND_NULL(midx_name);
- write_midx_header(f, num_chunks, packs.nr);
+ cur_chunk = 0;
+ num_chunks = 1;
+
+ written = write_midx_header(f, num_chunks, packs.nr);
+
+ chunk_ids[cur_chunk] = MIDX_CHUNKID_PACKNAMES;
+ chunk_offsets[cur_chunk] = written + (num_chunks + 1) * MIDX_CHUNKLOOKUP_WIDTH;
+
+ cur_chunk++;
+ chunk_ids[cur_chunk] = 0;
+ chunk_offsets[cur_chunk] = chunk_offsets[cur_chunk - 1] + packs.pack_name_concat_len;
+
+ for (i = 0; i <= num_chunks; i++) {
+ if (i && chunk_offsets[i] < chunk_offsets[i - 1])
+ BUG("incorrect chunk offsets: %"PRIu64" before %"PRIu64,
+ chunk_offsets[i - 1],
+ chunk_offsets[i]);
+
+ if (chunk_offsets[i] % MIDX_CHUNK_ALIGNMENT)
+ BUG("chunk offset %"PRIu64" is not properly aligned",
+ chunk_offsets[i]);
+
+ hashwrite_be32(f, chunk_ids[i]);
+ hashwrite_be32(f, chunk_offsets[i] >> 32);
+ hashwrite_be32(f, chunk_offsets[i]);
+
+ written += MIDX_CHUNKLOOKUP_WIDTH;
+ }
+
+ for (i = 0; i < num_chunks; i++) {
+ if (written != chunk_offsets[i])
+ BUG("incorrect chunk offset (%"PRIu64" != %"PRIu64") for chunk id %"PRIx32,
+ chunk_offsets[i],
+ written,
+ chunk_ids[i]);
+
+ switch (chunk_ids[i]) {
+ case MIDX_CHUNKID_PACKNAMES:
+ written += write_midx_pack_names(f, packs.names, packs.nr);
+ break;
+
+ default:
+ BUG("trying to write unknown chunk id %"PRIx32,
+ chunk_ids[i]);
+ }
+ }
+
+ if (written != chunk_offsets[num_chunks])
+ BUG("incorrect final offset %"PRIu64" != %"PRIu64,
+ written,
+ chunk_offsets[num_chunks]);
finalize_hashfile(f, NULL, CSUM_FSYNC | CSUM_HASH_IN_STREAM);
commit_lock_file(&lk);
@@ -175,8 +343,10 @@ int write_midx_file(const char *object_dir)
close_pack(packs.list[i]);
free(packs.list[i]);
}
+ free(packs.names[i]);
}
free(packs.list);
+ free(packs.names);
return 0;
}