From cf0983213c72727c4c7dea51ed9b3b38a2968fbe Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 26 Apr 2021 01:02:49 +0000 Subject: hash: add an algo member to struct object_id Now that we're working with multiple hash algorithms in the same repo, it's best if we label each object ID with its algorithm so we can determine how to format a given object ID. Add a member called algo to struct object_id. Performance testing on object ID-heavy workloads doesn't reveal a clear change in performance. Out of performance tests t0001 and t1450, there are slight variations in performance both up and down, but all measurements are within the margin of error. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano diff --git a/hash.h b/hash.h index 3fb0c3d..dafdcb3 100644 --- a/hash.h +++ b/hash.h @@ -181,6 +181,7 @@ static inline int hash_algo_by_ptr(const struct git_hash_algo *p) struct object_id { unsigned char hash[GIT_MAX_RAWSZ]; + int algo; }; #define the_hash_algo the_repository->hash_algo -- cgit v0.10.2-6-g49f6 From 92e2cab96b8b8ea9a076dc279864226b3d0863e9 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 26 Apr 2021 01:02:50 +0000 Subject: Always use oidread to read into struct object_id In the future, we'll want oidread to automatically set the hash algorithm member for an object ID we read into it, so ensure we use oidread instead of hashcpy everywhere we're copying a hash value into a struct object_id. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano diff --git a/archive.c b/archive.c index 2956155..6cfb9e4 100644 --- a/archive.c +++ b/archive.c @@ -203,7 +203,7 @@ static void queue_directory(const unsigned char *sha1, d->mode = mode; c->bottom = d; d->len = xsnprintf(d->path, len, "%.*s%s/", (int)base->len, base->buf, filename); - hashcpy(d->oid.hash, sha1); + oidread(&d->oid, sha1); } static int write_directory(struct archiver_context *c) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 3afa81c..9d2a058 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -1276,8 +1276,8 @@ static void load_tree(struct tree_entry *root) e->versions[0].mode = e->versions[1].mode; e->name = to_atom(c, strlen(c)); c += e->name->str_len + 1; - hashcpy(e->versions[0].oid.hash, (unsigned char *)c); - hashcpy(e->versions[1].oid.hash, (unsigned char *)c); + oidread(&e->versions[0].oid, (unsigned char *)c); + oidread(&e->versions[1].oid, (unsigned char *)c); c += the_hash_algo->rawsz; } free(buf); diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 15507b5..41e2c24 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -524,7 +524,7 @@ static void *unpack_raw_entry(struct object_entry *obj, switch (obj->type) { case OBJ_REF_DELTA: - hashcpy(ref_oid->hash, fill(the_hash_algo->rawsz)); + oidread(ref_oid, fill(the_hash_algo->rawsz)); use(the_hash_algo->rawsz); break; case OBJ_OFS_DELTA: @@ -1358,7 +1358,7 @@ static struct object_entry *append_obj_to_pack(struct hashfile *f, obj[1].idx.offset += write_compressed(f, buf, size); obj[0].idx.crc32 = crc32_end(f); hashflush(f); - hashcpy(obj->idx.oid.hash, sha1); + oidread(&obj->idx.oid, sha1); return obj; } diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 4a70b17..a8b73ec 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -355,7 +355,7 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, struct object_id base_oid; if (type == OBJ_REF_DELTA) { - hashcpy(base_oid.hash, fill(the_hash_algo->rawsz)); + oidread(&base_oid, fill(the_hash_algo->rawsz)); use(the_hash_algo->rawsz); delta_data = get_data(delta_size); if (dry_run || !delta_data) { diff --git a/commit-graph.c b/commit-graph.c index f18380b..23fef56 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -425,7 +425,7 @@ struct commit_graph *parse_commit_graph(struct repository *r, FREE_AND_NULL(graph->bloom_filter_settings); } - hashcpy(graph->oid.hash, graph->data + graph->data_len - graph->hash_len); + oidread(&graph->oid, graph->data + graph->data_len - graph->hash_len); if (verify_commit_graph_lite(graph)) goto free_and_return; @@ -746,7 +746,7 @@ static void load_oid_from_graph(struct commit_graph *g, lex_index = pos - g->num_commits_in_base; - hashcpy(oid->hash, g->chunk_oid_lookup + g->hash_len * lex_index); + oidread(oid, g->chunk_oid_lookup + g->hash_len * lex_index); } static struct commit_list **insert_parent_or_die(struct repository *r, @@ -939,7 +939,7 @@ static struct tree *load_tree_for_commit(struct repository *r, commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * (graph_pos - g->num_commits_in_base); - hashcpy(oid.hash, commit_data); + oidread(&oid, commit_data); set_commit_tree(c, lookup_tree(r, &oid)); return c->maybe_tree; @@ -2322,7 +2322,7 @@ int write_commit_graph(struct object_directory *odb, struct commit_graph *g = ctx->r->objects->commit_graph; for (i = 0; i < g->num_commits; i++) { struct object_id oid; - hashcpy(oid.hash, g->chunk_oid_lookup + g->hash_len * i); + oidread(&oid, g->chunk_oid_lookup + g->hash_len * i); oid_array_append(&ctx->oids, &oid); } } @@ -2453,7 +2453,7 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags) for (i = 0; i < g->num_commits; i++) { struct commit *graph_commit; - hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i); + oidread(&cur_oid, g->chunk_oid_lookup + g->hash_len * i); if (i && oidcmp(&prev_oid, &cur_oid) >= 0) graph_report(_("commit-graph has incorrect OID order: %s then %s"), @@ -2501,7 +2501,7 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags) timestamp_t generation; display_progress(progress, i + 1); - hashcpy(cur_oid.hash, g->chunk_oid_lookup + g->hash_len * i); + oidread(&cur_oid, g->chunk_oid_lookup + g->hash_len * i); graph_commit = lookup_commit(r, &cur_oid); odb_commit = (struct commit *)create_object(r, &cur_oid, alloc_commit_node(r)); diff --git a/dir.c b/dir.c index 3474e67..813dd7b 100644 --- a/dir.c +++ b/dir.c @@ -3344,7 +3344,7 @@ static void read_oid(size_t pos, void *cb) rd->data = rd->end + 1; return; } - hashcpy(ud->exclude_oid.hash, rd->data); + oidread(&ud->exclude_oid, rd->data); rd->data += the_hash_algo->rawsz; } @@ -3352,7 +3352,7 @@ static void load_oid_stat(struct oid_stat *oid_stat, const unsigned char *data, const unsigned char *sha1) { stat_data_from_disk(&oid_stat->stat, data); - hashcpy(oid_stat->oid.hash, sha1); + oidread(&oid_stat->oid, sha1); oid_stat->valid = 1; } diff --git a/http-walker.c b/http-walker.c index 4fb1235..90d8ecb 100644 --- a/http-walker.c +++ b/http-walker.c @@ -155,7 +155,7 @@ static void prefetch(struct walker *walker, unsigned char *sha1) newreq = xmalloc(sizeof(*newreq)); newreq->walker = walker; - hashcpy(newreq->oid.hash, sha1); + oidread(&newreq->oid, sha1); newreq->repo = data->alt; newreq->state = WAITING; newreq->req = NULL; diff --git a/match-trees.c b/match-trees.c index f6c194c..df41398 100644 --- a/match-trees.c +++ b/match-trees.c @@ -226,7 +226,7 @@ static int splice_tree(const struct object_id *oid1, const char *prefix, oid_to_hex(oid1)); if (*subpath) { struct object_id tree_oid; - hashcpy(tree_oid.hash, rewrite_here); + oidread(&tree_oid, rewrite_here); status = splice_tree(&tree_oid, subpath, oid2, &subtree); if (status) return status; diff --git a/midx.c b/midx.c index 9e86583..21d6a05 100644 --- a/midx.c +++ b/midx.c @@ -247,7 +247,7 @@ struct object_id *nth_midxed_object_oid(struct object_id *oid, if (n >= m->num_objects) return NULL; - hashcpy(oid->hash, m->chunk_oid_lookup + m->hash_len * n); + oidread(oid, m->chunk_oid_lookup + m->hash_len * n); return oid; } diff --git a/notes.c b/notes.c index a19e4ad..a44b258 100644 --- a/notes.c +++ b/notes.c @@ -352,7 +352,7 @@ static void add_non_note(struct notes_tree *t, char *path, n->next = NULL; n->path = path; n->mode = mode; - hashcpy(n->oid.hash, sha1); + oidread(&n->oid, sha1); t->prev_non_note = n; if (!t->first_non_note) { @@ -1134,7 +1134,7 @@ int remove_note(struct notes_tree *t, const unsigned char *object_sha1) if (!t) t = &default_notes_tree; assert(t->initialized); - hashcpy(l.key_oid.hash, object_sha1); + oidread(&l.key_oid, object_sha1); oidclr(&l.val_oid); note_tree_remove(t, t->root, 0, &l); if (is_null_oid(&l.val_oid)) /* no note was removed */ diff --git a/read-cache.c b/read-cache.c index 5a907af..2944146 100644 --- a/read-cache.c +++ b/read-cache.c @@ -1845,7 +1845,7 @@ static struct cache_entry *create_from_disk(struct mem_pool *ce_mem_pool, ce->ce_flags = flags & ~CE_NAMEMASK; ce->ce_namelen = len; ce->index = 0; - hashcpy(ce->oid.hash, ondisk->data); + oidread(&ce->oid, ondisk->data); memcpy(ce->name, name, len); ce->name[len] = '\0'; @@ -2195,7 +2195,7 @@ int do_read_index(struct index_state *istate, const char *path, int must_exist) if (verify_hdr(hdr, mmap_size) < 0) goto unmap; - hashcpy(istate->oid.hash, (const unsigned char *)hdr + mmap_size - the_hash_algo->rawsz); + oidread(&istate->oid, (const unsigned char *)hdr + mmap_size - the_hash_algo->rawsz); istate->version = ntohl(hdr->hdr_version); istate->cache_nr = ntohl(hdr->hdr_entries); istate->cache_alloc = alloc_nr(istate->cache_nr); diff --git a/split-index.c b/split-index.c index 94937d2..4d6e52d 100644 --- a/split-index.c +++ b/split-index.c @@ -21,7 +21,7 @@ int read_link_extension(struct index_state *istate, if (sz < the_hash_algo->rawsz) return error("corrupt link extension (too short)"); si = init_split_index(istate); - hashcpy(si->base_oid.hash, data); + oidread(&si->base_oid, data); data += the_hash_algo->rawsz; sz -= the_hash_algo->rawsz; if (!sz) diff --git a/tree-walk.c b/tree-walk.c index 2d6226d..3a94959 100644 --- a/tree-walk.c +++ b/tree-walk.c @@ -49,7 +49,7 @@ static int decode_tree_entry(struct tree_desc *desc, const char *buf, unsigned l desc->entry.path = path; desc->entry.mode = canon_mode(mode); desc->entry.pathlen = len - 1; - hashcpy(desc->entry.oid.hash, (const unsigned char *)path + len); + oidread(&desc->entry.oid, (const unsigned char *)path + len); return 0; } -- cgit v0.10.2-6-g49f6 From c3b4e4ee366b8de9802fa752d0a81be92e690e17 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 26 Apr 2021 01:02:51 +0000 Subject: http-push: set algorithm when reading object ID In most places in the codebase, we use oidread to properly read an object ID into a struct object_id. However, in the HTTP code, we end up needing to parse a loose object path with a slash in it, so we can't do that. Let's instead explicitly set the algorithm in this function so we can rely on it in the future. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano diff --git a/http-push.c b/http-push.c index b60d5fc..5675cd7 100644 --- a/http-push.c +++ b/http-push.c @@ -1022,6 +1022,8 @@ static void remote_ls(const char *path, int flags, /* extract hex from sharded "xx/x{38}" filename */ static int get_oid_hex_from_objpath(const char *path, struct object_id *oid) { + oid->algo = hash_algo_by_ptr(the_hash_algo); + if (strlen(path) != the_hash_algo->hexsz + 1) return -1; -- cgit v0.10.2-6-g49f6 From ab795f0d77aec52496557eb69a5fc9e4f60ea1fd Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 26 Apr 2021 01:02:52 +0000 Subject: hash: add a function to finalize object IDs To avoid the penalty of having to branch in hash comparison functions, we'll want to always compare the full hash member in a struct object_id, which will require that SHA-1 object IDs be zero-padded. To do so, add a function which finalizes a hash context and writes it into an object ID that performs this padding. Move the definition of struct object_id and the constant definitions higher up so we they are available for us to use. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano diff --git a/hash.h b/hash.h index dafdcb3..c8f03d8 100644 --- a/hash.h +++ b/hash.h @@ -95,6 +95,29 @@ static inline void git_SHA256_Clone(git_SHA256_CTX *dst, const git_SHA256_CTX *s /* Number of algorithms supported (including unknown). */ #define GIT_HASH_NALGOS (GIT_HASH_SHA256 + 1) +/* The length in bytes and in hex digits of an object name (SHA-1 value). */ +#define GIT_SHA1_RAWSZ 20 +#define GIT_SHA1_HEXSZ (2 * GIT_SHA1_RAWSZ) +/* The block size of SHA-1. */ +#define GIT_SHA1_BLKSZ 64 + +/* The length in bytes and in hex digits of an object name (SHA-256 value). */ +#define GIT_SHA256_RAWSZ 32 +#define GIT_SHA256_HEXSZ (2 * GIT_SHA256_RAWSZ) +/* The block size of SHA-256. */ +#define GIT_SHA256_BLKSZ 64 + +/* The length in byte and in hex digits of the largest possible hash value. */ +#define GIT_MAX_RAWSZ GIT_SHA256_RAWSZ +#define GIT_MAX_HEXSZ GIT_SHA256_HEXSZ +/* The largest possible block size for any supported hash. */ +#define GIT_MAX_BLKSZ GIT_SHA256_BLKSZ + +struct object_id { + unsigned char hash[GIT_MAX_RAWSZ]; + int algo; +}; + /* A suitably aligned type for stack allocations of hash contexts. */ union git_hash_ctx { git_SHA_CTX sha1; @@ -106,6 +129,7 @@ typedef void (*git_hash_init_fn)(git_hash_ctx *ctx); typedef void (*git_hash_clone_fn)(git_hash_ctx *dst, const git_hash_ctx *src); typedef void (*git_hash_update_fn)(git_hash_ctx *ctx, const void *in, size_t len); typedef void (*git_hash_final_fn)(unsigned char *hash, git_hash_ctx *ctx); +typedef void (*git_hash_final_oid_fn)(struct object_id *oid, git_hash_ctx *ctx); struct git_hash_algo { /* @@ -138,6 +162,9 @@ struct git_hash_algo { /* The hash finalization function. */ git_hash_final_fn final_fn; + /* The hash finalization function for object IDs. */ + git_hash_final_oid_fn final_oid_fn; + /* The OID of the empty tree. */ const struct object_id *empty_tree; @@ -161,29 +188,6 @@ static inline int hash_algo_by_ptr(const struct git_hash_algo *p) return p - hash_algos; } -/* The length in bytes and in hex digits of an object name (SHA-1 value). */ -#define GIT_SHA1_RAWSZ 20 -#define GIT_SHA1_HEXSZ (2 * GIT_SHA1_RAWSZ) -/* The block size of SHA-1. */ -#define GIT_SHA1_BLKSZ 64 - -/* The length in bytes and in hex digits of an object name (SHA-256 value). */ -#define GIT_SHA256_RAWSZ 32 -#define GIT_SHA256_HEXSZ (2 * GIT_SHA256_RAWSZ) -/* The block size of SHA-256. */ -#define GIT_SHA256_BLKSZ 64 - -/* The length in byte and in hex digits of the largest possible hash value. */ -#define GIT_MAX_RAWSZ GIT_SHA256_RAWSZ -#define GIT_MAX_HEXSZ GIT_SHA256_HEXSZ -/* The largest possible block size for any supported hash. */ -#define GIT_MAX_BLKSZ GIT_SHA256_BLKSZ - -struct object_id { - unsigned char hash[GIT_MAX_RAWSZ]; - int algo; -}; - #define the_hash_algo the_repository->hash_algo extern const struct object_id null_oid; diff --git a/object-file.c b/object-file.c index 624af40..ef1eb98 100644 --- a/object-file.c +++ b/object-file.c @@ -89,6 +89,12 @@ static void git_hash_sha1_final(unsigned char *hash, git_hash_ctx *ctx) git_SHA1_Final(hash, &ctx->sha1); } +static void git_hash_sha1_final_oid(struct object_id *oid, git_hash_ctx *ctx) +{ + git_SHA1_Final(oid->hash, &ctx->sha1); + memset(oid->hash + GIT_SHA1_RAWSZ, 0, GIT_MAX_RAWSZ - GIT_SHA1_RAWSZ); +} + static void git_hash_sha256_init(git_hash_ctx *ctx) { @@ -110,6 +116,16 @@ static void git_hash_sha256_final(unsigned char *hash, git_hash_ctx *ctx) git_SHA256_Final(hash, &ctx->sha256); } +static void git_hash_sha256_final_oid(struct object_id *oid, git_hash_ctx *ctx) +{ + git_SHA256_Final(oid->hash, &ctx->sha256); + /* + * This currently does nothing, so the compiler should optimize it out, + * but keep it in case we extend the hash size again. + */ + memset(oid->hash + GIT_SHA256_RAWSZ, 0, GIT_MAX_RAWSZ - GIT_SHA256_RAWSZ); +} + static void git_hash_unknown_init(git_hash_ctx *ctx) { BUG("trying to init unknown hash"); @@ -130,6 +146,12 @@ static void git_hash_unknown_final(unsigned char *hash, git_hash_ctx *ctx) BUG("trying to finalize unknown hash"); } +static void git_hash_unknown_final_oid(struct object_id *oid, git_hash_ctx *ctx) +{ + BUG("trying to finalize unknown hash"); +} + + const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { { NULL, @@ -141,6 +163,7 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { git_hash_unknown_clone, git_hash_unknown_update, git_hash_unknown_final, + git_hash_unknown_final_oid, NULL, NULL, }, @@ -155,6 +178,7 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { git_hash_sha1_clone, git_hash_sha1_update, git_hash_sha1_final, + git_hash_sha1_final_oid, &empty_tree_oid, &empty_blob_oid, }, @@ -169,6 +193,7 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { git_hash_sha256_clone, git_hash_sha256_update, git_hash_sha256_final, + git_hash_sha256_final_oid, &empty_tree_oid_sha256, &empty_blob_oid_sha256, } -- cgit v0.10.2-6-g49f6 From 5951bf467ea92458c3bea3051c8413041f3b27d5 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 26 Apr 2021 01:02:53 +0000 Subject: Use the final_oid_fn to finalize hashing of object IDs When we're hashing a value which is going to be an object ID, we want to zero-pad that value if necessary. To do so, use the final_oid_fn instead of the final_fn anytime we're going to create an object ID to ensure we perform this operation. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 9d2a058..20406f6 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -940,7 +940,7 @@ static int store_object( the_hash_algo->init_fn(&c); the_hash_algo->update_fn(&c, hdr, hdrlen); the_hash_algo->update_fn(&c, dat->buf, dat->len); - the_hash_algo->final_fn(oid.hash, &c); + the_hash_algo->final_oid_fn(&oid, &c); if (oidout) oidcpy(oidout, &oid); @@ -1136,7 +1136,7 @@ static void stream_blob(uintmax_t len, struct object_id *oidout, uintmax_t mark) } } git_deflate_end(&s); - the_hash_algo->final_fn(oid.hash, &c); + the_hash_algo->final_oid_fn(&oid, &c); if (oidout) oidcpy(oidout, &oid); diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 41e2c24..3fbc5d7 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -489,7 +489,7 @@ static void *unpack_entry_data(off_t offset, unsigned long size, bad_object(offset, _("inflate returned %d"), status); git_inflate_end(&stream); if (oid) - the_hash_algo->final_fn(oid->hash, &c); + the_hash_algo->final_oid_fn(oid, &c); return buf == fixed_buf ? NULL : buf; } diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index a8b73ec..6ac90dc 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -576,7 +576,7 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix) the_hash_algo->init_fn(&ctx); unpack_all(); the_hash_algo->update_fn(&ctx, buffer, offset); - the_hash_algo->final_fn(oid.hash, &ctx); + the_hash_algo->final_oid_fn(&oid, &ctx); if (strict) { write_rest(); if (fsck_finish(&fsck_options)) diff --git a/bulk-checkin.c b/bulk-checkin.c index 6f3c97c..127312a 100644 --- a/bulk-checkin.c +++ b/bulk-checkin.c @@ -238,7 +238,7 @@ static int deflate_to_pack(struct bulk_checkin_state *state, if (lseek(fd, seekback, SEEK_SET) == (off_t) -1) return error("cannot seek back"); } - the_hash_algo->final_fn(result_oid->hash, &ctx); + the_hash_algo->final_oid_fn(result_oid, &ctx); if (!idx) return 0; diff --git a/diff.c b/diff.c index 4acccd9..97c62f4 100644 --- a/diff.c +++ b/diff.c @@ -6234,7 +6234,7 @@ static int diff_get_patch_id(struct diff_options *options, struct object_id *oid } if (!stable) - the_hash_algo->final_fn(oid->hash, &ctx); + the_hash_algo->final_oid_fn(oid, &ctx); return 0; } diff --git a/http.c b/http.c index 406410f..c83bc33 100644 --- a/http.c +++ b/http.c @@ -2576,7 +2576,7 @@ int finish_http_object_request(struct http_object_request *freq) } git_inflate_end(&freq->stream); - the_hash_algo->final_fn(freq->real_oid.hash, &freq->c); + the_hash_algo->final_oid_fn(&freq->real_oid, &freq->c); if (freq->zret != Z_STREAM_END) { unlink_or_warn(freq->tmpfile.buf); return -1; diff --git a/object-file.c b/object-file.c index ef1eb98..1d8c82f 100644 --- a/object-file.c +++ b/object-file.c @@ -1054,7 +1054,7 @@ int check_object_signature(struct repository *r, const struct object_id *oid, break; r->hash_algo->update_fn(&c, buf, readlen); } - r->hash_algo->final_fn(real_oid.hash, &c); + r->hash_algo->final_oid_fn(&real_oid, &c); close_istream(st); return !oideq(oid, &real_oid) ? -1 : 0; } @@ -1755,7 +1755,7 @@ static void write_object_file_prepare(const struct git_hash_algo *algo, algo->init_fn(&c); algo->update_fn(&c, hdr, *hdrlen); algo->update_fn(&c, buf, len); - algo->final_fn(oid->hash, &c); + algo->final_oid_fn(oid, &c); } /* @@ -1927,7 +1927,7 @@ static int write_loose_object(const struct object_id *oid, char *hdr, if (ret != Z_OK) die(_("deflateEnd on object %s failed (%d)"), oid_to_hex(oid), ret); - the_hash_algo->final_fn(parano_oid.hash, &c); + the_hash_algo->final_oid_fn(¶no_oid, &c); if (!oideq(oid, ¶no_oid)) die(_("confused by unstable object source data for %s"), oid_to_hex(oid)); @@ -2508,7 +2508,7 @@ static int check_stream_oid(git_zstream *stream, return -1; } - the_hash_algo->final_fn(real_oid.hash, &c); + the_hash_algo->final_oid_fn(&real_oid, &c); if (!oideq(expected_oid, &real_oid)) { error(_("hash mismatch for %s (expected %s)"), path, oid_to_hex(expected_oid)); -- cgit v0.10.2-6-g49f6 From 0e5e2284f1267293e56e8948143f6cd04c74e2ed Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 26 Apr 2021 01:02:54 +0000 Subject: builtin/pack-redundant: avoid casting buffers to struct object_id Now that we need our instances of struct object_id to be zero padded, we can no longer cast unsigned char buffers to be pointers to struct object_id. This file reads data out of the pack objects and then inserts it directly into a linked list item which is a pointer to struct object_id. Instead, let's have the linked list item hold its own struct object_id and copy the data into it. In addition, since these are not really pointers to struct object_id, stop passing them around as such, and call them what they really are: pointers to unsigned char. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c index 7102996..8bf5c0a 100644 --- a/builtin/pack-redundant.c +++ b/builtin/pack-redundant.c @@ -20,7 +20,7 @@ static int load_all_packs, verbose, alt_odb; struct llist_item { struct llist_item *next; - const struct object_id *oid; + struct object_id oid; }; static struct llist { struct llist_item *front; @@ -95,10 +95,10 @@ static struct llist * llist_copy(struct llist *list) static inline struct llist_item *llist_insert(struct llist *list, struct llist_item *after, - const struct object_id *oid) + const unsigned char *oid) { struct llist_item *new_item = llist_item_get(); - new_item->oid = oid; + oidread(&new_item->oid, oid); new_item->next = NULL; if (after != NULL) { @@ -118,7 +118,7 @@ static inline struct llist_item *llist_insert(struct llist *list, } static inline struct llist_item *llist_insert_back(struct llist *list, - const struct object_id *oid) + const unsigned char *oid) { return llist_insert(list, list->back, oid); } @@ -130,9 +130,9 @@ static inline struct llist_item *llist_insert_sorted_unique(struct llist *list, l = (hint == NULL) ? list->front : hint; while (l) { - int cmp = oidcmp(l->oid, oid); + int cmp = oidcmp(&l->oid, oid); if (cmp > 0) { /* we insert before this entry */ - return llist_insert(list, prev, oid); + return llist_insert(list, prev, oid->hash); } if (!cmp) { /* already exists */ return l; @@ -141,11 +141,11 @@ static inline struct llist_item *llist_insert_sorted_unique(struct llist *list, l = l->next; } /* insert at the end */ - return llist_insert_back(list, oid); + return llist_insert_back(list, oid->hash); } /* returns a pointer to an item in front of sha1 */ -static inline struct llist_item * llist_sorted_remove(struct llist *list, const struct object_id *oid, struct llist_item *hint) +static inline struct llist_item * llist_sorted_remove(struct llist *list, const unsigned char *oid, struct llist_item *hint) { struct llist_item *prev, *l; @@ -153,7 +153,7 @@ redo_from_start: l = (hint == NULL) ? list->front : hint; prev = NULL; while (l) { - const int cmp = oidcmp(l->oid, oid); + const int cmp = hashcmp(l->oid.hash, oid); if (cmp > 0) /* not in list, since sorted */ return prev; if (!cmp) { /* found */ @@ -188,7 +188,7 @@ static void llist_sorted_difference_inplace(struct llist *A, b = B->front; while (b) { - hint = llist_sorted_remove(A, b->oid, hint); + hint = llist_sorted_remove(A, b->oid.hash, hint); b = b->next; } } @@ -260,10 +260,10 @@ static void cmp_two_packs(struct pack_list *p1, struct pack_list *p2) /* cmp ~ p1 - p2 */ if (cmp == 0) { p1_hint = llist_sorted_remove(p1->unique_objects, - (const struct object_id *)(p1_base + p1_off), + p1_base + p1_off, p1_hint); p2_hint = llist_sorted_remove(p2->unique_objects, - (const struct object_id *)(p1_base + p1_off), + p1_base + p1_off, p2_hint); p1_off += p1_step; p2_off += p2_step; @@ -455,7 +455,7 @@ static void load_all_objects(void) l = pl->remaining_objects->front; while (l) { hint = llist_insert_sorted_unique(all_objects, - l->oid, hint); + &l->oid, hint); l = l->next; } pl = pl->next; @@ -521,7 +521,7 @@ static struct pack_list * add_pack(struct packed_git *p) base += 256 * 4 + ((p->index_version < 2) ? 4 : 8); step = the_hash_algo->rawsz + ((p->index_version < 2) ? 4 : 0); while (off < p->num_objects * step) { - llist_insert_back(l.remaining_objects, (const struct object_id *)(base + off)); + llist_insert_back(l.remaining_objects, base + off); off += step; } l.all_objects_size = l.remaining_objects->size; -- cgit v0.10.2-6-g49f6 From 5a6dce70d7bb12ee2bc7926254c5b6741b91ac5f Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 26 Apr 2021 01:02:55 +0000 Subject: hash: set, copy, and use algo field in struct object_id Now that struct object_id has an algorithm field, we should populate it. This will allow us to handle object IDs in any supported algorithm and distinguish between them. Ensure that the field is written whenever we write an object ID by storing it explicitly every time we write an object. Set values for the empty blob and tree values as well. In addition, use the algorithm field to compare object IDs. Note that because we zero-initialize struct object_id in many places throughout the codebase, we default to the default algorithm in cases where the algorithm field is zero rather than explicitly initialize all of those locations. This leads to a branch on every comparison, but the alternative is to compare the entire buffer each time and padding the buffer for SHA-1. That alternative ranges up to 3.9% worse than this approach on the perf t0001, t1450, and t1451. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano diff --git a/hash.h b/hash.h index c8f03d8..0e85e44 100644 --- a/hash.h +++ b/hash.h @@ -192,36 +192,56 @@ static inline int hash_algo_by_ptr(const struct git_hash_algo *p) extern const struct object_id null_oid; -static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2) +static inline int hashcmp_algop(const unsigned char *sha1, const unsigned char *sha2, const struct git_hash_algo *algop) { /* * Teach the compiler that there are only two possibilities of hash size * here, so that it can optimize for this case as much as possible. */ - if (the_hash_algo->rawsz == GIT_MAX_RAWSZ) + if (algop->rawsz == GIT_MAX_RAWSZ) return memcmp(sha1, sha2, GIT_MAX_RAWSZ); return memcmp(sha1, sha2, GIT_SHA1_RAWSZ); } +static inline int hashcmp(const unsigned char *sha1, const unsigned char *sha2) +{ + return hashcmp_algop(sha1, sha2, the_hash_algo); +} + static inline int oidcmp(const struct object_id *oid1, const struct object_id *oid2) { - return hashcmp(oid1->hash, oid2->hash); + const struct git_hash_algo *algop; + if (!oid1->algo) + algop = the_hash_algo; + else + algop = &hash_algos[oid1->algo]; + return hashcmp_algop(oid1->hash, oid2->hash, algop); } -static inline int hasheq(const unsigned char *sha1, const unsigned char *sha2) +static inline int hasheq_algop(const unsigned char *sha1, const unsigned char *sha2, const struct git_hash_algo *algop) { /* * We write this here instead of deferring to hashcmp so that the * compiler can properly inline it and avoid calling memcmp. */ - if (the_hash_algo->rawsz == GIT_MAX_RAWSZ) + if (algop->rawsz == GIT_MAX_RAWSZ) return !memcmp(sha1, sha2, GIT_MAX_RAWSZ); return !memcmp(sha1, sha2, GIT_SHA1_RAWSZ); } +static inline int hasheq(const unsigned char *sha1, const unsigned char *sha2) +{ + return hasheq_algop(sha1, sha2, the_hash_algo); +} + static inline int oideq(const struct object_id *oid1, const struct object_id *oid2) { - return hasheq(oid1->hash, oid2->hash); + const struct git_hash_algo *algop; + if (!oid1->algo) + algop = the_hash_algo; + else + algop = &hash_algos[oid1->algo]; + return hasheq_algop(oid1->hash, oid2->hash, algop); } static inline int is_null_oid(const struct object_id *oid) @@ -237,6 +257,7 @@ static inline void hashcpy(unsigned char *sha_dst, const unsigned char *sha_src) static inline void oidcpy(struct object_id *dst, const struct object_id *src) { memcpy(dst->hash, src->hash, GIT_MAX_RAWSZ); + dst->algo = src->algo; } static inline struct object_id *oiddup(const struct object_id *src) @@ -254,11 +275,13 @@ static inline void hashclr(unsigned char *hash) static inline void oidclr(struct object_id *oid) { memset(oid->hash, 0, GIT_MAX_RAWSZ); + oid->algo = hash_algo_by_ptr(the_hash_algo); } static inline void oidread(struct object_id *oid, const unsigned char *hash) { memcpy(oid->hash, hash, the_hash_algo->rawsz); + oid->algo = hash_algo_by_ptr(the_hash_algo); } static inline int is_empty_blob_sha1(const unsigned char *sha1) @@ -281,6 +304,11 @@ static inline int is_empty_tree_oid(const struct object_id *oid) return oideq(oid, the_hash_algo->empty_tree); } +static inline void oid_set_algo(struct object_id *oid, const struct git_hash_algo *algop) +{ + oid->algo = hash_algo_by_ptr(algop); +} + const char *empty_tree_oid_hex(void); const char *empty_blob_oid_hex(void); diff --git a/hex.c b/hex.c index da51e64..e7af18f 100644 --- a/hex.c +++ b/hex.c @@ -69,7 +69,10 @@ int get_sha1_hex(const char *hex, unsigned char *sha1) int get_oid_hex_algop(const char *hex, struct object_id *oid, const struct git_hash_algo *algop) { - return get_hash_hex_algop(hex, oid->hash, algop); + int ret = get_hash_hex_algop(hex, oid->hash, algop); + if (!ret) + oid_set_algo(oid, algop); + return ret; } /* @@ -80,7 +83,7 @@ int get_oid_hex_any(const char *hex, struct object_id *oid) { int i; for (i = GIT_HASH_NALGOS - 1; i > 0; i--) { - if (!get_hash_hex_algop(hex, oid->hash, &hash_algos[i])) + if (!get_oid_hex_algop(hex, oid, &hash_algos[i])) return i; } return GIT_HASH_UNKNOWN; @@ -95,7 +98,7 @@ int parse_oid_hex_algop(const char *hex, struct object_id *oid, const char **end, const struct git_hash_algo *algop) { - int ret = get_hash_hex_algop(hex, oid->hash, algop); + int ret = get_oid_hex_algop(hex, oid, algop); if (!ret) *end = hex + algop->hexsz; return ret; diff --git a/notes.c b/notes.c index a44b258..135ea13 100644 --- a/notes.c +++ b/notes.c @@ -455,6 +455,8 @@ static void load_subtree(struct notes_tree *t, struct leaf_node *subtree, CALLOC_ARRAY(l, 1); oidcpy(&l->key_oid, &object_oid); oidcpy(&l->val_oid, &entry.oid); + oid_set_algo(&l->key_oid, the_hash_algo); + oid_set_algo(&l->val_oid, the_hash_algo); if (note_tree_insert(t, node, n, l, type, combine_notes_concatenate)) die("Failed to load %s %s into notes tree " @@ -484,6 +486,7 @@ handle_non_note: strbuf_addch(&non_note_path, '/'); } strbuf_addstr(&non_note_path, entry.path); + oid_set_algo(&entry.oid, the_hash_algo); add_non_note(t, strbuf_detach(&non_note_path, NULL), entry.mode, entry.oid.hash); } diff --git a/object-file.c b/object-file.c index 1d8c82f..d4ba0c4 100644 --- a/object-file.c +++ b/object-file.c @@ -57,16 +57,20 @@ const struct object_id null_oid; static const struct object_id empty_tree_oid = { - EMPTY_TREE_SHA1_BIN_LITERAL + .hash = EMPTY_TREE_SHA1_BIN_LITERAL, + .algo = GIT_HASH_SHA1, }; static const struct object_id empty_blob_oid = { - EMPTY_BLOB_SHA1_BIN_LITERAL + .hash = EMPTY_BLOB_SHA1_BIN_LITERAL, + .algo = GIT_HASH_SHA1, }; static const struct object_id empty_tree_oid_sha256 = { - EMPTY_TREE_SHA256_BIN_LITERAL + .hash = EMPTY_TREE_SHA256_BIN_LITERAL, + .algo = GIT_HASH_SHA256, }; static const struct object_id empty_blob_oid_sha256 = { - EMPTY_BLOB_SHA256_BIN_LITERAL + .hash = EMPTY_BLOB_SHA256_BIN_LITERAL, + .algo = GIT_HASH_SHA256, }; static void git_hash_sha1_init(git_hash_ctx *ctx) @@ -93,6 +97,7 @@ static void git_hash_sha1_final_oid(struct object_id *oid, git_hash_ctx *ctx) { git_SHA1_Final(oid->hash, &ctx->sha1); memset(oid->hash + GIT_SHA1_RAWSZ, 0, GIT_MAX_RAWSZ - GIT_SHA1_RAWSZ); + oid->algo = GIT_HASH_SHA1; } @@ -124,6 +129,7 @@ static void git_hash_sha256_final_oid(struct object_id *oid, git_hash_ctx *ctx) * but keep it in case we extend the hash size again. */ memset(oid->hash + GIT_SHA256_RAWSZ, 0, GIT_MAX_RAWSZ - GIT_SHA256_RAWSZ); + oid->algo = GIT_HASH_SHA256; } static void git_hash_unknown_init(git_hash_ctx *ctx) @@ -2340,6 +2346,7 @@ int for_each_file_in_obj_subdir(unsigned int subdir_nr, if (namelen == the_hash_algo->hexsz - 2 && !hex_to_bytes(oid.hash + 1, de->d_name, the_hash_algo->rawsz - 1)) { + oid_set_algo(&oid, the_hash_algo); if (obj_cb) { r = obj_cb(&oid, path->buf, data); if (r) -- cgit v0.10.2-6-g49f6 From 14228447c9ce664a4e9c31ba10344ec5e4ea4ba5 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 26 Apr 2021 01:02:56 +0000 Subject: hash: provide per-algorithm null OIDs Up until recently, object IDs did not have an algorithm member, only a hash. Consequently, it was possible to share one null (all-zeros) object ID among all hash algorithms. Now that we're going to be handling objects from multiple hash algorithms, it's important to make sure that all object IDs have a correct algorithm field. Introduce a per-algorithm null OID, and add it to struct hash_algo. Introduce a wrapper function as well, and use it everywhere we used to use the null_oid constant. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano diff --git a/archive.c b/archive.c index 6cfb9e4..ff2bb54 100644 --- a/archive.c +++ b/archive.c @@ -275,9 +275,11 @@ int write_archive_entries(struct archiver_args *args, int err; struct strbuf path_in_archive = STRBUF_INIT; struct strbuf content = STRBUF_INIT; - struct object_id fake_oid = null_oid; + struct object_id fake_oid; int i; + oidcpy(&fake_oid, null_oid()); + if (args->baselen > 0 && args->base[args->baselen - 1] == '/') { size_t len = args->baselen; diff --git a/blame.c b/blame.c index 5018bb8..206c295 100644 --- a/blame.c +++ b/blame.c @@ -242,7 +242,7 @@ static struct commit *fake_working_tree_commit(struct repository *r, switch (st.st_mode & S_IFMT) { case S_IFREG: if (opt->flags.allow_textconv && - textconv_object(r, read_from, mode, &null_oid, 0, &buf_ptr, &buf_len)) + textconv_object(r, read_from, mode, null_oid(), 0, &buf_ptr, &buf_len)) strbuf_attach(&buf, buf_ptr, buf_len, buf_len + 1); else if (strbuf_read_file(&buf, read_from, st.st_size) != st.st_size) die_errno("cannot open or read '%s'", read_from); diff --git a/branch.c b/branch.c index 9c9dae1..8db10f8 100644 --- a/branch.c +++ b/branch.c @@ -322,7 +322,7 @@ void create_branch(struct repository *r, transaction = ref_transaction_begin(&err); if (!transaction || ref_transaction_update(transaction, ref.buf, - &oid, forcing ? NULL : &null_oid, + &oid, forcing ? NULL : null_oid(), 0, msg, &err) || ref_transaction_commit(transaction, &err)) die("%s", err.buf); diff --git a/builtin/checkout.c b/builtin/checkout.c index 4c696ef..8a12b92 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -106,8 +106,8 @@ static int post_checkout_hook(struct commit *old_commit, struct commit *new_comm int changed) { return run_hook_le(NULL, "post-checkout", - oid_to_hex(old_commit ? &old_commit->object.oid : &null_oid), - oid_to_hex(new_commit ? &new_commit->object.oid : &null_oid), + oid_to_hex(old_commit ? &old_commit->object.oid : null_oid()), + oid_to_hex(new_commit ? &new_commit->object.oid : null_oid()), changed ? "1" : "0", NULL); /* "new_commit" can be NULL when checking out from the index before a commit exists. */ @@ -638,7 +638,7 @@ static int reset_tree(struct tree *tree, const struct checkout_opts *o, opts.src_index = &the_index; opts.dst_index = &the_index; init_checkout_metadata(&opts.meta, info->refname, - info->commit ? &info->commit->object.oid : &null_oid, + info->commit ? &info->commit->object.oid : null_oid(), NULL); parse_tree(tree); init_tree_desc(&tree_desc, tree->buffer, tree->size); diff --git a/builtin/clone.c b/builtin/clone.c index f6b0c48..eeb74c0 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -820,7 +820,7 @@ static int checkout(int submodule_progress) if (write_locked_index(&the_index, &lock_file, COMMIT_LOCK)) die(_("unable to write new index file")); - err |= run_hook_le(NULL, "post-checkout", oid_to_hex(&null_oid), + err |= run_hook_le(NULL, "post-checkout", oid_to_hex(null_oid()), oid_to_hex(&oid), "1", NULL); if (!err && (option_recurse_submodules.nr > 0)) { diff --git a/builtin/describe.c b/builtin/describe.c index 40482d8..e912ba5 100644 --- a/builtin/describe.c +++ b/builtin/describe.c @@ -502,7 +502,7 @@ static void describe_blob(struct object_id oid, struct strbuf *dst) { struct rev_info revs; struct strvec args = STRVEC_INIT; - struct process_commit_data pcd = { null_oid, oid, dst, &revs}; + struct process_commit_data pcd = { *null_oid(), oid, dst, &revs}; strvec_pushl(&args, "internal: The first arg is not parsed", "--objects", "--in-commit-order", "--reverse", "HEAD", diff --git a/builtin/diff.c b/builtin/diff.c index 617b9a4..2d87c37 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -98,7 +98,7 @@ static int builtin_diff_b_f(struct rev_info *revs, stuff_change(&revs->diffopt, blob[0]->mode, canon_mode(st.st_mode), - &blob[0]->item->oid, &null_oid, + &blob[0]->item->oid, null_oid(), 1, 0, blob[0]->path ? blob[0]->path : path, path); diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 85a76e0..3c20f16 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -870,7 +870,7 @@ static void handle_tag(const char *name, struct tag *tag) p = rewrite_commit((struct commit *)tagged); if (!p) { printf("reset %s\nfrom %s\n\n", - name, oid_to_hex(&null_oid)); + name, oid_to_hex(null_oid())); free(buf); return; } @@ -884,7 +884,7 @@ static void handle_tag(const char *name, struct tag *tag) if (tagged->type == OBJ_TAG) { printf("reset %s\nfrom %s\n\n", - name, oid_to_hex(&null_oid)); + name, oid_to_hex(null_oid())); } skip_prefix(name, "refs/tags/", &name); printf("tag %s\n", name); @@ -1016,7 +1016,7 @@ static void handle_tags_and_duplicates(struct string_list *extras) * it. */ printf("reset %s\nfrom %s\n\n", - name, oid_to_hex(&null_oid)); + name, oid_to_hex(null_oid())); continue; } @@ -1035,7 +1035,7 @@ static void handle_tags_and_duplicates(struct string_list *extras) if (!reference_excluded_commits) { /* delete the ref */ printf("reset %s\nfrom %s\n\n", - name, oid_to_hex(&null_oid)); + name, oid_to_hex(null_oid())); continue; } /* set ref to commit using oid, not mark */ @@ -1146,7 +1146,7 @@ static void handle_deletes(void) continue; printf("reset %s\nfrom %s\n\n", - refspec->dst, oid_to_hex(&null_oid)); + refspec->dst, oid_to_hex(null_oid())); } } diff --git a/builtin/grep.c b/builtin/grep.c index 5de725f..e0e3260 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -421,7 +421,7 @@ static int grep_submodule(struct grep_opt *opt, struct grep_opt subopt; int hit; - sub = submodule_from_path(superproject, &null_oid, path); + sub = submodule_from_path(superproject, null_oid(), path); if (!is_submodule_active(superproject, path)) return 0; diff --git a/builtin/ls-files.c b/builtin/ls-files.c index 60a2913..c589eb7 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -210,7 +210,7 @@ static void show_submodule(struct repository *superproject, { struct repository subrepo; const struct submodule *sub = submodule_from_path(superproject, - &null_oid, path); + null_oid(), path); if (repo_submodule_init(&subrepo, superproject, sub)) return; diff --git a/builtin/rebase.c b/builtin/rebase.c index 783b526..c9206b4 100644 --- a/builtin/rebase.c +++ b/builtin/rebase.c @@ -485,7 +485,7 @@ static const char * const builtin_rebase_interactive_usage[] = { int cmd_rebase__interactive(int argc, const char **argv, const char *prefix) { struct rebase_options opts = REBASE_OPTIONS_INIT; - struct object_id squash_onto = null_oid; + struct object_id squash_onto = *null_oid(); enum action command = ACTION_NONE; struct option options[] = { OPT_NEGBIT(0, "ff", &opts.flags, N_("allow fast-forward"), @@ -1139,7 +1139,7 @@ static int can_fast_forward(struct commit *onto, struct commit *upstream, merge_bases = get_merge_bases(onto, head); if (!merge_bases || merge_bases->next) { - oidcpy(merge_base, &null_oid); + oidcpy(merge_base, null_oid()); goto done; } diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 6bc12c8..a347425 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -329,7 +329,7 @@ static void write_head_info(void) for_each_alternate_ref(show_one_alternate_ref, &seen); oidset_clear(&seen); if (!sent_capabilities) - show_ref("capabilities^{}", &null_oid); + show_ref("capabilities^{}", null_oid()); advertise_shallow_grafts(1); diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 9d505a6..d55f626 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -426,7 +426,8 @@ static int module_list(int argc, const char **argv, const char *prefix) const struct cache_entry *ce = list.entries[i]; if (ce_stage(ce)) - printf("%06o %s U\t", ce->ce_mode, oid_to_hex(&null_oid)); + printf("%06o %s U\t", ce->ce_mode, + oid_to_hex(null_oid())); else printf("%06o %s %d\t", ce->ce_mode, oid_to_hex(&ce->oid), ce_stage(ce)); @@ -466,7 +467,7 @@ static void runcommand_in_submodule_cb(const struct cache_entry *list_item, displaypath = get_submodule_displaypath(path, info->prefix); - sub = submodule_from_path(the_repository, &null_oid, path); + sub = submodule_from_path(the_repository, null_oid(), path); if (!sub) die(_("No url found for submodule path '%s' in .gitmodules"), @@ -623,7 +624,7 @@ static void init_submodule(const char *path, const char *prefix, displaypath = get_submodule_displaypath(path, prefix); - sub = submodule_from_path(the_repository, &null_oid, path); + sub = submodule_from_path(the_repository, null_oid(), path); if (!sub) die(_("No url found for submodule path '%s' in .gitmodules"), @@ -783,14 +784,14 @@ static void status_submodule(const char *path, const struct object_id *ce_oid, struct strbuf buf = STRBUF_INIT; const char *git_dir; - if (!submodule_from_path(the_repository, &null_oid, path)) + if (!submodule_from_path(the_repository, null_oid(), path)) die(_("no submodule mapping found in .gitmodules for path '%s'"), path); displaypath = get_submodule_displaypath(path, prefix); if ((CE_STAGEMASK & ce_flags) >> CE_STAGESHIFT) { - print_status(flags, 'U', path, &null_oid, displaypath); + print_status(flags, 'U', path, null_oid(), displaypath); goto cleanup; } @@ -916,7 +917,7 @@ static int module_name(int argc, const char **argv, const char *prefix) if (argc != 2) usage(_("git submodule--helper name ")); - sub = submodule_from_path(the_repository, &null_oid, argv[1]); + sub = submodule_from_path(the_repository, null_oid(), argv[1]); if (!sub) die(_("no submodule mapping found in .gitmodules for path '%s'"), @@ -1040,7 +1041,7 @@ static void generate_submodule_summary(struct summary_cb *info, char *errmsg = NULL; int total_commits = -1; - if (!info->cached && oideq(&p->oid_dst, &null_oid)) { + if (!info->cached && oideq(&p->oid_dst, null_oid())) { if (S_ISGITLINK(p->mod_dst)) { struct ref_store *refs = get_submodule_ref_store(p->sm_path); if (refs) @@ -1177,7 +1178,7 @@ static void prepare_submodule_summary(struct summary_cb *info, if (info->for_status && p->status != 'A' && (sub = submodule_from_path(the_repository, - &null_oid, p->sm_path))) { + null_oid(), p->sm_path))) { char *config_key = NULL; const char *value; int ignore_all = 0; @@ -1373,7 +1374,7 @@ static void sync_submodule(const char *path, const char *prefix, if (!is_submodule_active(the_repository, path)) return; - sub = submodule_from_path(the_repository, &null_oid, path); + sub = submodule_from_path(the_repository, null_oid(), path); if (sub && sub->url) { if (starts_with_dot_dot_slash(sub->url) || @@ -1525,7 +1526,7 @@ static void deinit_submodule(const char *path, const char *prefix, struct strbuf sb_config = STRBUF_INIT; char *sub_git_dir = xstrfmt("%s/.git", path); - sub = submodule_from_path(the_repository, &null_oid, path); + sub = submodule_from_path(the_repository, null_oid(), path); if (!sub || !sub->name) goto cleanup; @@ -1925,7 +1926,7 @@ static void determine_submodule_update_strategy(struct repository *r, const char *update, struct submodule_update_strategy *out) { - const struct submodule *sub = submodule_from_path(r, &null_oid, path); + const struct submodule *sub = submodule_from_path(r, null_oid(), path); char *key; const char *val; @@ -2077,7 +2078,7 @@ static int prepare_to_clone_next_submodule(const struct cache_entry *ce, goto cleanup; } - sub = submodule_from_path(the_repository, &null_oid, ce->name); + sub = submodule_from_path(the_repository, null_oid(), ce->name); if (suc->recursive_prefix) displaypath = relative_path(suc->recursive_prefix, @@ -2395,7 +2396,7 @@ static const char *remote_submodule_branch(const char *path) const char *branch = NULL; char *key; - sub = submodule_from_path(the_repository, &null_oid, path); + sub = submodule_from_path(the_repository, null_oid(), path); if (!sub) return NULL; @@ -2533,7 +2534,7 @@ static int ensure_core_worktree(int argc, const char **argv, const char *prefix) path = argv[1]; - sub = submodule_from_path(the_repository, &null_oid, path); + sub = submodule_from_path(the_repository, null_oid(), path); if (!sub) BUG("We could get the submodule handle before?"); diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 6ac90dc..4a94662 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -421,7 +421,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, * has not been resolved yet. */ oidclr(&obj_list[nr].oid); - add_delta_to_list(nr, &null_oid, base_offset, delta_data, delta_size); + add_delta_to_list(nr, null_oid(), base_offset, + delta_data, delta_size); return; } } diff --git a/builtin/worktree.c b/builtin/worktree.c index 8771453..f754978 100644 --- a/builtin/worktree.c +++ b/builtin/worktree.c @@ -331,7 +331,7 @@ static int add_worktree(const char *path, const char *refname, */ strbuf_reset(&sb); strbuf_addf(&sb, "%s/HEAD", sb_repo.buf); - write_file(sb.buf, "%s", oid_to_hex(&null_oid)); + write_file(sb.buf, "%s", oid_to_hex(null_oid())); strbuf_reset(&sb); strbuf_addf(&sb, "%s/commondir", sb_repo.buf); write_file(sb.buf, "../.."); @@ -394,7 +394,7 @@ done: cp.argv = NULL; cp.trace2_hook_name = "post-checkout"; strvec_pushl(&cp.args, absolute_path(hook), - oid_to_hex(&null_oid), + oid_to_hex(null_oid()), oid_to_hex(&commit->object.oid), "1", NULL); ret = run_command(&cp); diff --git a/combine-diff.c b/combine-diff.c index 06635f9..7d925ce 100644 --- a/combine-diff.c +++ b/combine-diff.c @@ -1068,7 +1068,7 @@ static void show_patch_diff(struct combine_diff_path *elem, int num_parent, &result_size, NULL, NULL); } else if (textconv) { struct diff_filespec *df = alloc_filespec(elem->path); - fill_filespec(df, &null_oid, 0, st.st_mode); + fill_filespec(df, null_oid(), 0, st.st_mode); result_size = fill_textconv(opt->repo, textconv, df, &result); free_filespec(df); } else if (0 <= (fd = open(elem->path, O_RDONLY))) { diff --git a/connect.c b/connect.c index 40b5c15..70b1338 100644 --- a/connect.c +++ b/connect.c @@ -254,7 +254,7 @@ static int process_dummy_ref(const struct packet_reader *reader) return 0; name++; - return oideq(&null_oid, &oid) && !strcmp(name, "capabilities^{}"); + return oideq(null_oid(), &oid) && !strcmp(name, "capabilities^{}"); } static void check_no_capabilities(const char *line, int len) diff --git a/diff-lib.c b/diff-lib.c index e5a58c9..c2ac925 100644 --- a/diff-lib.c +++ b/diff-lib.c @@ -232,7 +232,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option) ce_intent_to_add(ce)) { newmode = ce_mode_from_stat(ce, st.st_mode); diff_addremove(&revs->diffopt, '+', newmode, - &null_oid, 0, ce->name, 0); + null_oid(), 0, ce->name, 0); continue; } @@ -249,7 +249,7 @@ int run_diff_files(struct rev_info *revs, unsigned int option) } oldmode = ce->ce_mode; old_oid = &ce->oid; - new_oid = changed ? &null_oid : &ce->oid; + new_oid = changed ? null_oid() : &ce->oid; diff_change(&revs->diffopt, oldmode, newmode, old_oid, new_oid, !is_null_oid(old_oid), @@ -307,7 +307,7 @@ static int get_stat_data(const struct index_state *istate, 0, dirty_submodule); if (changed) { mode = ce_mode_from_stat(ce, st.st_mode); - oid = &null_oid; + oid = null_oid(); } } diff --git a/diff-no-index.c b/diff-no-index.c index 7814eab..308922e 100644 --- a/diff-no-index.c +++ b/diff-no-index.c @@ -83,7 +83,7 @@ static struct diff_filespec *noindex_filespec(const char *name, int mode) if (!name) name = "/dev/null"; s = alloc_filespec(name); - fill_filespec(s, &null_oid, 0, mode); + fill_filespec(s, null_oid(), 0, mode); if (name == file_from_standard_input) populate_from_stdin(s); return s; diff --git a/diff.c b/diff.c index 97c62f4..7c730fe 100644 --- a/diff.c +++ b/diff.c @@ -4190,7 +4190,7 @@ static struct diff_tempfile *prepare_temp_file(struct repository *r, die_errno("readlink(%s)", name); prep_temp_blob(r->index, name, temp, sb.buf, sb.len, (one->oid_valid ? - &one->oid : &null_oid), + &one->oid : null_oid()), (one->oid_valid ? one->mode : S_IFLNK)); strbuf_release(&sb); @@ -4199,7 +4199,7 @@ static struct diff_tempfile *prepare_temp_file(struct repository *r, /* we can borrow from the file in the work tree */ temp->name = name; if (!one->oid_valid) - oid_to_hex_r(temp->hex, &null_oid); + oid_to_hex_r(temp->hex, null_oid()); else oid_to_hex_r(temp->hex, &one->oid); /* Even though we may sometimes borrow the diff --git a/dir.c b/dir.c index 813dd7b..0374743 100644 --- a/dir.c +++ b/dir.c @@ -3556,7 +3556,7 @@ static void connect_wt_gitdir_in_nested(const char *sub_worktree, */ i++; - sub = submodule_from_path(&subrepo, &null_oid, ce->name); + sub = submodule_from_path(&subrepo, null_oid(), ce->name); if (!sub || !is_submodule_active(&subrepo, ce->name)) /* .gitmodules broken or inactive sub */ continue; diff --git a/grep.c b/grep.c index c5c348b..8f91af1 100644 --- a/grep.c +++ b/grep.c @@ -1494,7 +1494,7 @@ static int fill_textconv_grep(struct repository *r, fill_filespec(df, gs->identifier, 1, 0100644); break; case GREP_SOURCE_FILE: - fill_filespec(df, &null_oid, 0, 0100644); + fill_filespec(df, null_oid(), 0, 0100644); break; default: BUG("attempt to textconv something without a path?"); diff --git a/hash.h b/hash.h index 0e85e44..2986f99 100644 --- a/hash.h +++ b/hash.h @@ -170,6 +170,9 @@ struct git_hash_algo { /* The OID of the empty blob. */ const struct object_id *empty_blob; + + /* The all-zeros OID. */ + const struct object_id *null_oid; }; extern const struct git_hash_algo hash_algos[GIT_HASH_NALGOS]; @@ -190,7 +193,7 @@ static inline int hash_algo_by_ptr(const struct git_hash_algo *p) #define the_hash_algo the_repository->hash_algo -extern const struct object_id null_oid; +const struct object_id *null_oid(void); static inline int hashcmp_algop(const unsigned char *sha1, const unsigned char *sha2, const struct git_hash_algo *algop) { @@ -246,7 +249,7 @@ static inline int oideq(const struct object_id *oid1, const struct object_id *oi static inline int is_null_oid(const struct object_id *oid) { - return oideq(oid, &null_oid); + return oideq(oid, null_oid()); } static inline void hashcpy(unsigned char *sha_dst, const unsigned char *sha_src) diff --git a/log-tree.c b/log-tree.c index f3178a6..7b82378 100644 --- a/log-tree.c +++ b/log-tree.c @@ -419,7 +419,7 @@ void log_write_email_headers(struct rev_info *opt, struct commit *commit, { const char *extra_headers = opt->extra_headers; const char *name = oid_to_hex(opt->zero_commit ? - &null_oid : &commit->object.oid); + null_oid() : &commit->object.oid); *need_8bit_cte_p = 0; /* unknown */ diff --git a/merge-ort.c b/merge-ort.c index 5e118a8..3e552cf 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -1291,7 +1291,7 @@ static int handle_content_merge(struct merge_options *opt, two_way = ((S_IFMT & o->mode) != (S_IFMT & a->mode)); merge_status = merge_3way(opt, path, - two_way ? &null_oid : &o->oid, + two_way ? null_oid() : &o->oid, &a->oid, &b->oid, pathnames, extra_marker_size, &result_buf); @@ -1313,7 +1313,7 @@ static int handle_content_merge(struct merge_options *opt, } else if (S_ISGITLINK(a->mode)) { int two_way = ((S_IFMT & o->mode) != (S_IFMT & a->mode)); clean = merge_submodule(opt, pathnames[0], - two_way ? &null_oid : &o->oid, + two_way ? null_oid() : &o->oid, &a->oid, &b->oid, &result->oid); if (opt->priv->call_depth && two_way && !clean) { result->mode = o->mode; @@ -2123,7 +2123,7 @@ static int process_renames(struct merge_options *opt, if (type_changed) { /* rename vs. typechange */ /* Mark the original as resolved by removal */ - memcpy(&oldinfo->stages[0].oid, &null_oid, + memcpy(&oldinfo->stages[0].oid, null_oid(), sizeof(oldinfo->stages[0].oid)); oldinfo->stages[0].mode = 0; oldinfo->filemask &= 0x06; @@ -2762,7 +2762,7 @@ static void process_entry(struct merge_options *opt, if (ci->filemask & (1 << i)) continue; ci->stages[i].mode = 0; - oidcpy(&ci->stages[i].oid, &null_oid); + oidcpy(&ci->stages[i].oid, null_oid()); } } else if (ci->df_conflict && ci->merged.result.mode != 0) { /* @@ -2808,7 +2808,7 @@ static void process_entry(struct merge_options *opt, continue; /* zero out any entries related to directories */ new_ci->stages[i].mode = 0; - oidcpy(&new_ci->stages[i].oid, &null_oid); + oidcpy(&new_ci->stages[i].oid, null_oid()); } /* @@ -2909,11 +2909,11 @@ static void process_entry(struct merge_options *opt, new_ci->merged.result.mode = ci->stages[2].mode; oidcpy(&new_ci->merged.result.oid, &ci->stages[2].oid); new_ci->stages[1].mode = 0; - oidcpy(&new_ci->stages[1].oid, &null_oid); + oidcpy(&new_ci->stages[1].oid, null_oid()); new_ci->filemask = 5; if ((S_IFMT & b_mode) != (S_IFMT & o_mode)) { new_ci->stages[0].mode = 0; - oidcpy(&new_ci->stages[0].oid, &null_oid); + oidcpy(&new_ci->stages[0].oid, null_oid()); new_ci->filemask = 4; } @@ -2921,11 +2921,11 @@ static void process_entry(struct merge_options *opt, ci->merged.result.mode = ci->stages[1].mode; oidcpy(&ci->merged.result.oid, &ci->stages[1].oid); ci->stages[2].mode = 0; - oidcpy(&ci->stages[2].oid, &null_oid); + oidcpy(&ci->stages[2].oid, null_oid()); ci->filemask = 3; if ((S_IFMT & a_mode) != (S_IFMT & o_mode)) { ci->stages[0].mode = 0; - oidcpy(&ci->stages[0].oid, &null_oid); + oidcpy(&ci->stages[0].oid, null_oid()); ci->filemask = 2; } @@ -3042,7 +3042,7 @@ static void process_entry(struct merge_options *opt, /* Deleted on both sides */ ci->merged.is_null = 1; ci->merged.result.mode = 0; - oidcpy(&ci->merged.result.oid, &null_oid); + oidcpy(&ci->merged.result.oid, null_oid()); ci->merged.clean = !ci->path_conflict; } diff --git a/merge-recursive.c b/merge-recursive.c index ed31f94..03f5c07 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -486,7 +486,7 @@ static int get_tree_entry_if_blob(struct repository *r, ret = get_tree_entry(r, tree, path, &dfs->oid, &dfs->mode); if (S_ISDIR(dfs->mode)) { - oidcpy(&dfs->oid, &null_oid); + oidcpy(&dfs->oid, null_oid()); dfs->mode = 0; } return ret; @@ -1604,7 +1604,7 @@ static int handle_file_collision(struct merge_options *opt, /* Store things in diff_filespecs for functions that need it */ null.path = (char *)collide_path; - oidcpy(&null.oid, &null_oid); + oidcpy(&null.oid, null_oid()); null.mode = 0; if (merge_mode_and_contents(opt, &null, a, b, collide_path, @@ -2789,11 +2789,11 @@ static int process_renames(struct merge_options *opt, dst_other.mode = ren1->dst_entry->stages[other_stage].mode; try_merge = 0; - if (oideq(&src_other.oid, &null_oid) && + if (oideq(&src_other.oid, null_oid()) && ren1->dir_rename_original_type == 'A') { setup_rename_conflict_info(RENAME_VIA_DIR, opt, ren1, NULL); - } else if (oideq(&src_other.oid, &null_oid)) { + } else if (oideq(&src_other.oid, null_oid())) { setup_rename_conflict_info(RENAME_DELETE, opt, ren1, NULL); } else if ((dst_other.mode == ren1->pair->two->mode) && @@ -2812,7 +2812,7 @@ static int process_renames(struct merge_options *opt, 1, /* update_cache */ 0 /* update_wd */)) clean_merge = -1; - } else if (!oideq(&dst_other.oid, &null_oid)) { + } else if (!oideq(&dst_other.oid, null_oid())) { /* * Probably not a clean merge, but it's * premature to set clean_merge to 0 here, diff --git a/notes-merge.c b/notes-merge.c index d2771fa..53c587f 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -600,7 +600,7 @@ int notes_merge(struct notes_merge_options *o, /* Find merge bases */ bases = get_merge_bases(local, remote); if (!bases) { - base_oid = &null_oid; + base_oid = null_oid(); base_tree_oid = the_hash_algo->empty_tree; if (o->verbosity >= 4) printf("No merge base found; doing history-less merge\n"); diff --git a/notes.c b/notes.c index 135ea13..f87dac4 100644 --- a/notes.c +++ b/notes.c @@ -1327,7 +1327,7 @@ int copy_note(struct notes_tree *t, if (note) return add_note(t, to_obj, note, combine_notes); else if (existing_note) - return add_note(t, to_obj, &null_oid, combine_notes); + return add_note(t, to_obj, null_oid(), combine_notes); return 0; } diff --git a/object-file.c b/object-file.c index d4ba0c4..884855b 100644 --- a/object-file.c +++ b/object-file.c @@ -55,7 +55,6 @@ "\x6f\xe1\x41\xf7\x74\x91\x20\xa3\x03\x72" \ "\x18\x13" -const struct object_id null_oid; static const struct object_id empty_tree_oid = { .hash = EMPTY_TREE_SHA1_BIN_LITERAL, .algo = GIT_HASH_SHA1, @@ -64,6 +63,10 @@ static const struct object_id empty_blob_oid = { .hash = EMPTY_BLOB_SHA1_BIN_LITERAL, .algo = GIT_HASH_SHA1, }; +static const struct object_id null_oid_sha1 = { + .hash = {0}, + .algo = GIT_HASH_SHA1, +}; static const struct object_id empty_tree_oid_sha256 = { .hash = EMPTY_TREE_SHA256_BIN_LITERAL, .algo = GIT_HASH_SHA256, @@ -72,6 +75,10 @@ static const struct object_id empty_blob_oid_sha256 = { .hash = EMPTY_BLOB_SHA256_BIN_LITERAL, .algo = GIT_HASH_SHA256, }; +static const struct object_id null_oid_sha256 = { + .hash = {0}, + .algo = GIT_HASH_SHA256, +}; static void git_hash_sha1_init(git_hash_ctx *ctx) { @@ -172,6 +179,7 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { git_hash_unknown_final_oid, NULL, NULL, + NULL, }, { "sha1", @@ -187,6 +195,7 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { git_hash_sha1_final_oid, &empty_tree_oid, &empty_blob_oid, + &null_oid_sha1, }, { "sha256", @@ -202,9 +211,15 @@ const struct git_hash_algo hash_algos[GIT_HASH_NALGOS] = { git_hash_sha256_final_oid, &empty_tree_oid_sha256, &empty_blob_oid_sha256, + &null_oid_sha256, } }; +const struct object_id *null_oid(void) +{ + return the_hash_algo->null_oid; +} + const char *empty_tree_oid_hex(void) { static char buf[GIT_MAX_HEXSZ + 1]; diff --git a/parse-options-cb.c b/parse-options-cb.c index 4542d4d..3c811e1 100644 --- a/parse-options-cb.c +++ b/parse-options-cb.c @@ -140,7 +140,7 @@ int parse_opt_object_id(const struct option *opt, const char *arg, int unset) struct object_id *target = opt->value; if (unset) { - *target = null_oid; + oidcpy(target, null_oid()); return 0; } if (!arg) diff --git a/range-diff.c b/range-diff.c index 116fb07..1a4471f 100644 --- a/range-diff.c +++ b/range-diff.c @@ -445,7 +445,7 @@ static struct diff_filespec *get_filespec(const char *name, const char *p) { struct diff_filespec *spec = alloc_filespec(name); - fill_filespec(spec, &null_oid, 0, 0100644); + fill_filespec(spec, null_oid(), 0, 0100644); spec->data = (char *)p; spec->size = strlen(p); spec->should_munmap = 0; diff --git a/refs.c b/refs.c index 261fd82..996063f 100644 --- a/refs.c +++ b/refs.c @@ -1107,7 +1107,7 @@ int ref_transaction_create(struct ref_transaction *transaction, if (!new_oid || is_null_oid(new_oid)) BUG("create called without valid new_oid"); return ref_transaction_update(transaction, refname, new_oid, - &null_oid, flags, msg, err); + null_oid(), flags, msg, err); } int ref_transaction_delete(struct ref_transaction *transaction, @@ -1119,7 +1119,7 @@ int ref_transaction_delete(struct ref_transaction *transaction, if (old_oid && is_null_oid(old_oid)) BUG("delete called with old_oid set to zeros"); return ref_transaction_update(transaction, refname, - &null_oid, old_oid, + null_oid(), old_oid, flags, msg, err); } diff --git a/refs/debug.c b/refs/debug.c index 922e64f..2665f94 100644 --- a/refs/debug.c +++ b/refs/debug.c @@ -243,7 +243,7 @@ static int debug_read_raw_ref(struct ref_store *ref_store, const char *refname, struct debug_ref_store *drefs = (struct debug_ref_store *)ref_store; int res = 0; - oidcpy(oid, &null_oid); + oidcpy(oid, null_oid()); res = drefs->refs->be->read_raw_ref(drefs->refs, refname, oid, referent, type); diff --git a/refs/files-backend.c b/refs/files-backend.c index 119972e..3f29f8c 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -1084,7 +1084,7 @@ static void prune_ref(struct files_ref_store *refs, struct ref_to_prune *r) ref_transaction_add_update( transaction, r->name, REF_NO_DEREF | REF_HAVE_NEW | REF_HAVE_OLD | REF_IS_PRUNING, - &null_oid, &r->oid, NULL); + null_oid(), &r->oid, NULL); if (ref_transaction_commit(transaction, &err)) goto cleanup; diff --git a/reset.c b/reset.c index 2f4fbd0..4bea758 100644 --- a/reset.c +++ b/reset.c @@ -128,7 +128,7 @@ reset_head_refs: } if (run_hook) run_hook_le(NULL, "post-checkout", - oid_to_hex(orig ? orig : &null_oid), + oid_to_hex(orig ? orig : null_oid()), oid_to_hex(oid), "1", NULL); leave_reset_head: diff --git a/sequencer.c b/sequencer.c index fd183b5..dbee779 100644 --- a/sequencer.c +++ b/sequencer.c @@ -524,7 +524,7 @@ static int fast_forward_to(struct repository *r, if (!transaction || ref_transaction_update(transaction, "HEAD", to, unborn && !is_rebase_i(opts) ? - &null_oid : from, + null_oid() : from, 0, sb.buf, &err) || ref_transaction_commit(transaction, &err)) { ref_transaction_free(transaction); @@ -1131,7 +1131,7 @@ int update_head_with_reflog(const struct commit *old_head, transaction = ref_transaction_begin(err); if (!transaction || ref_transaction_update(transaction, "HEAD", new_head, - old_head ? &old_head->object.oid : &null_oid, + old_head ? &old_head->object.oid : null_oid(), 0, sb.buf, err) || ref_transaction_commit(transaction, err)) { ret = -1; diff --git a/submodule-config.c b/submodule-config.c index f502505..2026120 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -671,7 +671,7 @@ static int gitmodules_cb(const char *var, const char *value, void *data) parameter.cache = repo->submodule_cache; parameter.treeish_name = NULL; - parameter.gitmodules_oid = &null_oid; + parameter.gitmodules_oid = null_oid(); parameter.overwrite = 1; return parse_config(var, value, ¶meter); diff --git a/submodule.c b/submodule.c index 9767ba9..7eeaf7d 100644 --- a/submodule.c +++ b/submodule.c @@ -113,7 +113,7 @@ int update_path_in_gitmodules(const char *oldpath, const char *newpath) if (is_gitmodules_unmerged(the_repository->index)) die(_("Cannot change unmerged .gitmodules, resolve merge conflicts first")); - submodule = submodule_from_path(the_repository, &null_oid, oldpath); + submodule = submodule_from_path(the_repository, null_oid(), oldpath); if (!submodule || !submodule->name) { warning(_("Could not find section in .gitmodules where path=%s"), oldpath); return -1; @@ -142,7 +142,7 @@ int remove_path_from_gitmodules(const char *path) if (is_gitmodules_unmerged(the_repository->index)) die(_("Cannot change unmerged .gitmodules, resolve merge conflicts first")); - submodule = submodule_from_path(the_repository, &null_oid, path); + submodule = submodule_from_path(the_repository, null_oid(), path); if (!submodule || !submodule->name) { warning(_("Could not find section in .gitmodules where path=%s"), path); return -1; @@ -188,7 +188,8 @@ void set_diffopt_flags_from_submodule_config(struct diff_options *diffopt, const char *path) { const struct submodule *submodule = submodule_from_path(the_repository, - &null_oid, path); + null_oid(), + path); if (submodule) { const char *ignore; char *key; @@ -244,7 +245,7 @@ int is_submodule_active(struct repository *repo, const char *path) const struct string_list *sl; const struct submodule *module; - module = submodule_from_path(repo, &null_oid, path); + module = submodule_from_path(repo, null_oid(), path); /* early return if there isn't a path->module mapping */ if (!module) @@ -745,7 +746,7 @@ const struct submodule *submodule_from_ce(const struct cache_entry *ce) if (!should_update_submodules()) return NULL; - return submodule_from_path(the_repository, &null_oid, ce->name); + return submodule_from_path(the_repository, null_oid(), ce->name); } static struct oid_array *submodule_commits(struct string_list *submodules, @@ -1037,7 +1038,7 @@ int find_unpushed_submodules(struct repository *r, const struct submodule *submodule; const char *path = NULL; - submodule = submodule_from_name(r, &null_oid, name->string); + submodule = submodule_from_name(r, null_oid(), name->string); if (submodule) path = submodule->path; else @@ -1224,7 +1225,7 @@ static void calculate_changed_submodule_paths(struct repository *r, const struct submodule *submodule; const char *path = NULL; - submodule = submodule_from_name(r, &null_oid, name->string); + submodule = submodule_from_name(r, null_oid(), name->string); if (submodule) path = submodule->path; else @@ -1361,7 +1362,7 @@ static struct fetch_task *fetch_task_create(struct repository *r, struct fetch_task *task = xmalloc(sizeof(*task)); memset(task, 0, sizeof(*task)); - task->sub = submodule_from_path(r, &null_oid, path); + task->sub = submodule_from_path(r, null_oid(), path); if (!task->sub) { /* * No entry in .gitmodules? Technically not a submodule, @@ -1917,7 +1918,7 @@ int submodule_move_head(const char *path, if (old_head && !is_submodule_populated_gently(path, error_code_ptr)) return 0; - sub = submodule_from_path(the_repository, &null_oid, path); + sub = submodule_from_path(the_repository, null_oid(), path); if (!sub) BUG("could not get submodule information for '%s'", path); @@ -2076,7 +2077,7 @@ static void relocate_single_git_dir_into_superproject(const char *path) real_old_git_dir = real_pathdup(old_git_dir, 1); - sub = submodule_from_path(the_repository, &null_oid, path); + sub = submodule_from_path(the_repository, null_oid(), path); if (!sub) die(_("could not lookup name for submodule '%s'"), path); @@ -2135,7 +2136,7 @@ void absorb_git_dir_into_superproject(const char *path, * superproject did not rewrite the git file links yet, * fix it now. */ - sub = submodule_from_path(the_repository, &null_oid, path); + sub = submodule_from_path(the_repository, null_oid(), path); if (!sub) die(_("could not lookup name for submodule '%s'"), path); connect_work_tree_and_git_dir(path, @@ -2283,7 +2284,8 @@ int submodule_to_gitdir(struct strbuf *buf, const char *submodule) strbuf_addstr(buf, git_dir); } if (!is_git_directory(buf->buf)) { - sub = submodule_from_path(the_repository, &null_oid, submodule); + sub = submodule_from_path(the_repository, null_oid(), + submodule); if (!sub) { ret = -1; goto cleanup; diff --git a/t/helper/test-submodule-nested-repo-config.c b/t/helper/test-submodule-nested-repo-config.c index c5fd452..e3f11ff 100644 --- a/t/helper/test-submodule-nested-repo-config.c +++ b/t/helper/test-submodule-nested-repo-config.c @@ -18,7 +18,7 @@ int cmd__submodule_nested_repo_config(int argc, const char **argv) setup_git_directory(); - sub = submodule_from_path(the_repository, &null_oid, argv[1]); + sub = submodule_from_path(the_repository, null_oid(), argv[1]); if (repo_submodule_init(&subrepo, the_repository, sub)) { die_usage(argv, "Submodule not found."); } diff --git a/tree-diff.c b/tree-diff.c index 7cebbb3..1572615 100644 --- a/tree-diff.c +++ b/tree-diff.c @@ -161,7 +161,7 @@ static struct combine_diff_path *path_appendnew(struct combine_diff_path *last, memcpy(p->path + base->len, path, pathlen); p->path[len] = 0; p->mode = mode; - oidcpy(&p->oid, oid ? oid : &null_oid); + oidcpy(&p->oid, oid ? oid : null_oid()); return p; } @@ -243,7 +243,7 @@ static struct combine_diff_path *emit_path(struct combine_diff_path *p, mode_i = tp[i].entry.mode; } else { - oid_i = &null_oid; + oid_i = null_oid(); mode_i = 0; } diff --git a/wt-status.c b/wt-status.c index 1aed68c..7603c1b 100644 --- a/wt-status.c +++ b/wt-status.c @@ -1687,10 +1687,10 @@ void wt_status_get_state(struct repository *r, if (!sequencer_get_last_command(r, &action)) { if (action == REPLAY_PICK) { state->cherry_pick_in_progress = 1; - oidcpy(&state->cherry_pick_head_oid, &null_oid); + oidcpy(&state->cherry_pick_head_oid, null_oid()); } else { state->revert_in_progress = 1; - oidcpy(&state->revert_head_oid, &null_oid); + oidcpy(&state->revert_head_oid, null_oid()); } } if (get_detached_from) diff --git a/xdiff-interface.c b/xdiff-interface.c index 4d20069..609615d 100644 --- a/xdiff-interface.c +++ b/xdiff-interface.c @@ -172,7 +172,7 @@ void read_mmblob(mmfile_t *ptr, const struct object_id *oid) unsigned long size; enum object_type type; - if (oideq(oid, &null_oid)) { + if (oideq(oid, null_oid())) { ptr->ptr = xstrdup(""); ptr->size = 0; return; -- cgit v0.10.2-6-g49f6 From dd15f4f4570f9fed3012132adf9bf95cd921442b Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 26 Apr 2021 01:02:57 +0000 Subject: builtin/show-index: set the algorithm for object IDs In most cases, when we load the hash of an object into a struct object_id, we load it using one of the oid* or *_oid_hex functions. However, for git show-index, we read it in directly using fread. As a consequence, set the algorithm correctly so the objects can be used correctly both now and in the future. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano diff --git a/builtin/show-index.c b/builtin/show-index.c index 8106b03..0e0b9fb 100644 --- a/builtin/show-index.c +++ b/builtin/show-index.c @@ -71,9 +71,11 @@ int cmd_show_index(int argc, const char **argv, const char *prefix) uint32_t off; } *entries; ALLOC_ARRAY(entries, nr); - for (i = 0; i < nr; i++) + for (i = 0; i < nr; i++) { if (fread(entries[i].oid.hash, hashsz, 1, stdin) != 1) die("unable to read sha1 %u/%u", i, nr); + entries[i].oid.algo = hash_algo_by_ptr(the_hash_algo); + } for (i = 0; i < nr; i++) if (fread(&entries[i].crc, 4, 1, stdin) != 1) die("unable to read crc %u/%u", i, nr); -- cgit v0.10.2-6-g49f6 From 72871b132c7c81d23aa9d4467013651c1c51bbc4 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 26 Apr 2021 01:02:58 +0000 Subject: commit-graph: don't store file hashes as struct object_id The idea behind struct object_id is that it is supposed to represent the identifier of a standard Git object or a special pseudo-object like the all-zeros object ID. In this case, we have file hashes, which, while similar, are distinct from the identifiers of objects. Switch these code paths to use an unsigned char array. This is both more logically consistent and it means that we need not set the algorithm identifier for the struct object_id. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano diff --git a/commit-graph.c b/commit-graph.c index 23fef56..2bcb4e0 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1793,8 +1793,8 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) struct lock_file lk = LOCK_INIT; const unsigned hashsz = the_hash_algo->rawsz; struct strbuf progress_title = STRBUF_INIT; - struct object_id file_hash; struct chunkfile *cf; + unsigned char file_hash[GIT_MAX_RAWSZ]; if (ctx->split) { struct strbuf tmp_file = STRBUF_INIT; @@ -1909,7 +1909,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) } close_commit_graph(ctx->r->objects); - finalize_hashfile(f, file_hash.hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC); + finalize_hashfile(f, file_hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC); free_chunkfile(cf); if (ctx->split) { @@ -1945,7 +1945,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) unlink(graph_name); } - ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1] = xstrdup(oid_to_hex(&file_hash)); + ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1] = xstrdup(hash_to_hex(file_hash)); final_graph_name = get_split_graph_filename(ctx->odb, ctx->commit_graph_hash_after[ctx->num_commit_graphs_after - 1]); ctx->commit_graph_filenames_after[ctx->num_commit_graphs_after - 1] = final_graph_name; @@ -2425,7 +2425,8 @@ static void graph_report(const char *fmt, ...) int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags) { uint32_t i, cur_fanout_pos = 0; - struct object_id prev_oid, cur_oid, checksum; + struct object_id prev_oid, cur_oid; + unsigned char checksum[GIT_MAX_HEXSZ]; int generation_zero = 0; struct hashfile *f; int devnull; @@ -2444,8 +2445,8 @@ int verify_commit_graph(struct repository *r, struct commit_graph *g, int flags) devnull = open("/dev/null", O_WRONLY); f = hashfd(devnull, NULL); hashwrite(f, g->data, g->data_len - g->hash_len); - finalize_hashfile(f, checksum.hash, CSUM_CLOSE); - if (!hasheq(checksum.hash, g->data + g->data_len - g->hash_len)) { + finalize_hashfile(f, checksum, CSUM_CLOSE); + if (!hasheq(checksum, g->data + g->data_len - g->hash_len)) { graph_report(_("the commit-graph file has incorrect checksum and is likely corrupt")); verify_commit_graph_error = VERIFY_COMMIT_GRAPH_ERROR_HASH; } -- cgit v0.10.2-6-g49f6 From 71b7672b674bc9a44fb41428804be428c2981046 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 26 Apr 2021 01:02:59 +0000 Subject: builtin/pack-objects: avoid using struct object_id for pack hash We use struct object_id for the names of objects. It isn't intended to be used for other hash values that don't name objects such as the pack hash. Because struct object_id will soon need to have its algorithm member set, using it in this code path would mean that we didn't set that member, only the hash member, which would result in a crash. For both of these reasons, switch to using an unsigned char array of size GIT_MAX_RAWSZ. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 525c2d8..5b25382 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1030,7 +1030,7 @@ static void write_pack_file(void) write_order = compute_write_order(); do { - struct object_id oid; + unsigned char hash[GIT_MAX_RAWSZ]; char *pack_tmp_name = NULL; if (pack_to_stdout) @@ -1059,13 +1059,13 @@ static void write_pack_file(void) * If so, rewrite it like in fast-import */ if (pack_to_stdout) { - finalize_hashfile(f, oid.hash, CSUM_HASH_IN_STREAM | CSUM_CLOSE); + finalize_hashfile(f, hash, CSUM_HASH_IN_STREAM | CSUM_CLOSE); } else if (nr_written == nr_remaining) { - finalize_hashfile(f, oid.hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); + finalize_hashfile(f, hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE); } else { - int fd = finalize_hashfile(f, oid.hash, 0); - fixup_pack_header_footer(fd, oid.hash, pack_tmp_name, - nr_written, oid.hash, offset); + int fd = finalize_hashfile(f, hash, 0); + fixup_pack_header_footer(fd, hash, pack_tmp_name, + nr_written, hash, offset); close(fd); if (write_bitmap_index) { if (write_bitmap_index != WRITE_BITMAP_QUIET) @@ -1100,17 +1100,17 @@ static void write_pack_file(void) strbuf_addf(&tmpname, "%s-", base_name); if (write_bitmap_index) { - bitmap_writer_set_checksum(oid.hash); + bitmap_writer_set_checksum(hash); bitmap_writer_build_type_index( &to_pack, written_list, nr_written); } finish_tmp_packfile(&tmpname, pack_tmp_name, written_list, nr_written, - &pack_idx_opts, oid.hash); + &pack_idx_opts, hash); if (write_bitmap_index) { - strbuf_addf(&tmpname, "%s.bitmap", oid_to_hex(&oid)); + strbuf_addf(&tmpname, "%s.bitmap", hash_to_hex(hash)); stop_progress(&progress_state); @@ -1124,7 +1124,7 @@ static void write_pack_file(void) strbuf_release(&tmpname); free(pack_tmp_name); - puts(oid_to_hex(&oid)); + puts(hash_to_hex(hash)); } /* mark written objects as written to previous pack */ -- cgit v0.10.2-6-g49f6 From b8505ecbf2b1e4ef27b9597fd113cb1679792b29 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 26 Apr 2021 01:03:00 +0000 Subject: hex: default to the_hash_algo on zero algorithm value There are numerous places in the codebase where we assume we can initialize data by zeroing all its bytes. However, when we do that with a struct object_id, it leaves the structure with a zero value for the algorithm, which is invalid. We could forbid this pattern and require that all struct object_id instances be initialized using oidclr, but this seems burdensome and it's unnatural to most C programmers. Instead, if the algorithm is zero, assume we wanted to use the default hash algorithm instead. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano diff --git a/hex.c b/hex.c index e7af18f..74d256f 100644 --- a/hex.c +++ b/hex.c @@ -124,6 +124,13 @@ char *hash_to_hex_algop_r(char *buffer, const unsigned char *hash, char *buf = buffer; int i; + /* + * Our struct object_id has been memset to 0, so default to printing + * using the default hash. + */ + if (algop == &hash_algos[0]) + algop = the_hash_algo; + for (i = 0; i < algop->rawsz; i++) { unsigned int val = *hash++; *buf++ = hex[val >> 4]; -- cgit v0.10.2-6-g49f6 From 3dd71461e25b4cc7ea2a2d8deef1c0486bb32580 Mon Sep 17 00:00:00 2001 From: "brian m. carlson" Date: Mon, 26 Apr 2021 01:03:01 +0000 Subject: hex: print objects using the hash algorithm member Now that all code paths correctly set the hash algorithm member of struct object_id, write an object's hex representation using the hash algorithm member embedded in it. Signed-off-by: brian m. carlson Signed-off-by: Junio C Hamano diff --git a/hex.c b/hex.c index 74d256f..4f64d34 100644 --- a/hex.c +++ b/hex.c @@ -143,7 +143,7 @@ char *hash_to_hex_algop_r(char *buffer, const unsigned char *hash, char *oid_to_hex_r(char *buffer, const struct object_id *oid) { - return hash_to_hex_algop_r(buffer, oid->hash, the_hash_algo); + return hash_to_hex_algop_r(buffer, oid->hash, &hash_algos[oid->algo]); } char *hash_to_hex_algop(const unsigned char *hash, const struct git_hash_algo *algop) @@ -161,5 +161,5 @@ char *hash_to_hex(const unsigned char *hash) char *oid_to_hex(const struct object_id *oid) { - return hash_to_hex_algop(oid->hash, the_hash_algo); + return hash_to_hex_algop(oid->hash, &hash_algos[oid->algo]); } -- cgit v0.10.2-6-g49f6