From 3ffefb54c0515308ceafb6ba071567d9fd379498 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:36:52 -0400 Subject: commit_tree: take a pointer/len pair rather than a const strbuf While strbufs are pretty common throughout our code, it is more flexible for functions to take a pointer/len pair than a strbuf. It's easy to turn a strbuf into such a pair (by dereferencing its members), but less easy to go the other way (you can strbuf_attach, but that has implications about memory ownership). This patch teaches commit_tree (and its associated callers and sub-functions) to take such a pair for the commit message rather than a strbuf. This makes passing the buffer around slightly more verbose, but means we can get rid of some dangerous strbuf_attach calls in the next patch. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/commit-tree.c b/builtin/commit-tree.c index 987a4c3..8a66c74 100644 --- a/builtin/commit-tree.c +++ b/builtin/commit-tree.c @@ -123,8 +123,8 @@ int cmd_commit_tree(int argc, const char **argv, const char *prefix) die_errno("git commit-tree: failed to read"); } - if (commit_tree(&buffer, tree_sha1, parents, commit_sha1, - NULL, sign_commit)) { + if (commit_tree(buffer.buf, buffer.len, tree_sha1, parents, + commit_sha1, NULL, sign_commit)) { strbuf_release(&buffer); return 1; } diff --git a/builtin/commit.c b/builtin/commit.c index 9cfef6c..8b85df4 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1659,8 +1659,8 @@ int cmd_commit(int argc, const char **argv, const char *prefix) append_merge_tag_headers(parents, &tail); } - if (commit_tree_extended(&sb, active_cache_tree->sha1, parents, sha1, - author_ident.buf, sign_commit, extra)) { + if (commit_tree_extended(sb.buf, sb.len, active_cache_tree->sha1, + parents, sha1, author_ident.buf, sign_commit, extra)) { rollback_index_files(); die(_("failed to write commit object")); } diff --git a/builtin/merge.c b/builtin/merge.c index 66d8843..8763b2e 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -852,8 +852,8 @@ static int merge_trivial(struct commit *head, struct commit_list *remoteheads) parent->next->item = remoteheads->item; parent->next->next = NULL; prepare_to_commit(remoteheads); - if (commit_tree(&merge_msg, result_tree, parent, result_commit, NULL, - sign_commit)) + if (commit_tree(merge_msg.buf, merge_msg.len, result_tree, parent, + result_commit, NULL, sign_commit)) die(_("failed to write commit object")); finish(head, remoteheads, result_commit, "In-index merge"); drop_save(); @@ -877,8 +877,8 @@ static int finish_automerge(struct commit *head, commit_list_insert(head, &parents); strbuf_addch(&merge_msg, '\n'); prepare_to_commit(remoteheads); - if (commit_tree(&merge_msg, result_tree, parents, result_commit, - NULL, sign_commit)) + if (commit_tree(merge_msg.buf, merge_msg.len, result_tree, parents, + result_commit, NULL, sign_commit)) die(_("failed to write commit object")); strbuf_addf(&buf, "Merge made by the '%s' strategy.", wt_strategy); finish(head, remoteheads, result_commit, buf.buf); diff --git a/commit.c b/commit.c index f479331..bd3d5af 100644 --- a/commit.c +++ b/commit.c @@ -1344,7 +1344,8 @@ void free_commit_extra_headers(struct commit_extra_header *extra) } } -int commit_tree(const struct strbuf *msg, const unsigned char *tree, +int commit_tree(const char *msg, size_t msg_len, + const unsigned char *tree, struct commit_list *parents, unsigned char *ret, const char *author, const char *sign_commit) { @@ -1352,7 +1353,7 @@ int commit_tree(const struct strbuf *msg, const unsigned char *tree, int result; append_merge_tag_headers(parents, &tail); - result = commit_tree_extended(msg, tree, parents, ret, + result = commit_tree_extended(msg, msg_len, tree, parents, ret, author, sign_commit, extra); free_commit_extra_headers(extra); return result; @@ -1473,7 +1474,8 @@ static const char commit_utf8_warn[] = "You may want to amend it after fixing the message, or set the config\n" "variable i18n.commitencoding to the encoding your project uses.\n"; -int commit_tree_extended(const struct strbuf *msg, const unsigned char *tree, +int commit_tree_extended(const char *msg, size_t msg_len, + const unsigned char *tree, struct commit_list *parents, unsigned char *ret, const char *author, const char *sign_commit, struct commit_extra_header *extra) @@ -1484,7 +1486,7 @@ int commit_tree_extended(const struct strbuf *msg, const unsigned char *tree, assert_sha1_type(tree, OBJ_TREE); - if (memchr(msg->buf, '\0', msg->len)) + if (memchr(msg, '\0', msg_len)) return error("a NUL byte in commit log message not allowed."); /* Not having i18n.commitencoding is the same as having utf-8 */ @@ -1523,7 +1525,7 @@ int commit_tree_extended(const struct strbuf *msg, const unsigned char *tree, strbuf_addch(&buffer, '\n'); /* And add the comment */ - strbuf_addbuf(&buffer, msg); + strbuf_add(&buffer, msg, msg_len); /* And check the encoding */ if (encoding_is_utf8 && !verify_utf8(&buffer)) diff --git a/commit.h b/commit.h index a9f177b..1cb55ae 100644 --- a/commit.h +++ b/commit.h @@ -261,11 +261,13 @@ struct commit_extra_header { extern void append_merge_tag_headers(struct commit_list *parents, struct commit_extra_header ***tail); -extern int commit_tree(const struct strbuf *msg, const unsigned char *tree, +extern int commit_tree(const char *msg, size_t msg_len, + const unsigned char *tree, struct commit_list *parents, unsigned char *ret, const char *author, const char *sign_commit); -extern int commit_tree_extended(const struct strbuf *msg, const unsigned char *tree, +extern int commit_tree_extended(const char *msg, size_t msg_len, + const unsigned char *tree, struct commit_list *parents, unsigned char *ret, const char *author, const char *sign_commit, struct commit_extra_header *); diff --git a/notes-cache.c b/notes-cache.c index eabe4a0..9d27b66 100644 --- a/notes-cache.c +++ b/notes-cache.c @@ -59,7 +59,7 @@ int notes_cache_write(struct notes_cache *c) return -1; strbuf_attach(&msg, c->validity, strlen(c->validity), strlen(c->validity) + 1); - if (commit_tree(&msg, tree_sha1, NULL, commit_sha1, NULL, NULL) < 0) + if (commit_tree(msg.buf, msg.len, tree_sha1, NULL, commit_sha1, NULL, NULL) < 0) return -1; if (update_ref("update notes cache", c->tree.ref, commit_sha1, NULL, 0, QUIET_ON_ERR) < 0) diff --git a/notes-merge.c b/notes-merge.c index 94a1a8a..9d94210 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -644,7 +644,8 @@ int notes_merge(struct notes_merge_options *o, struct commit_list *parents = NULL; commit_list_insert(remote, &parents); /* LIFO order */ commit_list_insert(local, &parents); - create_notes_commit(local_tree, parents, &o->commit_msg, + create_notes_commit(local_tree, parents, + o->commit_msg.buf, o->commit_msg.len, result_sha1); } @@ -720,7 +721,8 @@ int notes_merge_commit(struct notes_merge_options *o, } strbuf_attach(&sb_msg, msg, strlen(msg), strlen(msg) + 1); - create_notes_commit(partial_tree, partial_commit->parents, &sb_msg, + create_notes_commit(partial_tree, partial_commit->parents, + sb_msg.buf, sb_msg.len, result_sha1); if (o->verbosity >= 4) printf("Finalized notes merge commit: %s\n", diff --git a/notes-utils.c b/notes-utils.c index 4aa7023..f6891f7 100644 --- a/notes-utils.c +++ b/notes-utils.c @@ -4,7 +4,8 @@ #include "notes-utils.h" void create_notes_commit(struct notes_tree *t, struct commit_list *parents, - const struct strbuf *msg, unsigned char *result_sha1) + const char *msg, size_t msg_len, + unsigned char *result_sha1) { unsigned char tree_sha1[20]; @@ -25,7 +26,7 @@ void create_notes_commit(struct notes_tree *t, struct commit_list *parents, /* else: t->ref points to nothing, assume root/orphan commit */ } - if (commit_tree(msg, tree_sha1, parents, result_sha1, NULL, NULL)) + if (commit_tree(msg, msg_len, tree_sha1, parents, result_sha1, NULL, NULL)) die("Failed to commit notes tree to database"); } @@ -46,7 +47,7 @@ void commit_notes(struct notes_tree *t, const char *msg) if (buf.buf[buf.len - 1] != '\n') strbuf_addch(&buf, '\n'); /* Make sure msg ends with newline */ - create_notes_commit(t, NULL, &buf, commit_sha1); + create_notes_commit(t, NULL, buf.buf, buf.len, commit_sha1); strbuf_insert(&buf, 0, "notes: ", 7); /* commit message starts at index 7 */ update_ref(buf.buf, t->ref, commit_sha1, NULL, 0, DIE_ON_ERR); diff --git a/notes-utils.h b/notes-utils.h index 564e30c..890ddb3 100644 --- a/notes-utils.h +++ b/notes-utils.h @@ -15,7 +15,7 @@ * The resulting commit SHA1 is stored in result_sha1. */ void create_notes_commit(struct notes_tree *t, struct commit_list *parents, - const struct strbuf *msg, unsigned char *result_sha1); + const char *msg, size_t msg_len, unsigned char *result_sha1); void commit_notes(struct notes_tree *t, const char *msg); -- cgit v0.10.2-6-g49f6 From e6dfcd6767a58816dacec0df39515803b267fbe6 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:38:38 -0400 Subject: replace dangerous uses of strbuf_attach It is not a good idea to strbuf_attach an arbitrary pointer just because a function you are calling wants a strbuf. Attaching implies a transfer of memory ownership; if anyone were to modify or release the resulting strbuf, we would free() the pointer, leading to possible problems: 1. Other users of the original pointer might access freed memory. 2. The pointer might not be the start of a malloc'd area, so calling free() on it in the first place would be wrong. In the two cases modified here, we are fortunate that nobody touches the strbuf once it is attached, but it is an accident waiting to happen. Since the previous commit, commit_tree and friends take a pointer/buf pair, so we can just do away with the strbufs entirely. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/notes-cache.c b/notes-cache.c index 9d27b66..25b20aa 100644 --- a/notes-cache.c +++ b/notes-cache.c @@ -48,7 +48,6 @@ int notes_cache_write(struct notes_cache *c) { unsigned char tree_sha1[20]; unsigned char commit_sha1[20]; - struct strbuf msg = STRBUF_INIT; if (!c || !c->tree.initialized || !c->tree.ref || !*c->tree.ref) return -1; @@ -57,9 +56,8 @@ int notes_cache_write(struct notes_cache *c) if (write_notes_tree(&c->tree, tree_sha1)) return -1; - strbuf_attach(&msg, c->validity, - strlen(c->validity), strlen(c->validity) + 1); - if (commit_tree(msg.buf, msg.len, tree_sha1, NULL, commit_sha1, NULL, NULL) < 0) + if (commit_tree(c->validity, strlen(c->validity), tree_sha1, NULL, + commit_sha1, NULL, NULL) < 0) return -1; if (update_ref("update notes cache", c->tree.ref, commit_sha1, NULL, 0, QUIET_ON_ERR) < 0) diff --git a/notes-merge.c b/notes-merge.c index 9d94210..697cec3 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -673,7 +673,6 @@ int notes_merge_commit(struct notes_merge_options *o, struct dirent *e; struct strbuf path = STRBUF_INIT; char *msg = strstr(partial_commit->buffer, "\n\n"); - struct strbuf sb_msg = STRBUF_INIT; int baselen; strbuf_addstr(&path, git_path(NOTES_MERGE_WORKTREE)); @@ -720,10 +719,8 @@ int notes_merge_commit(struct notes_merge_options *o, strbuf_setlen(&path, baselen); } - strbuf_attach(&sb_msg, msg, strlen(msg), strlen(msg) + 1); create_notes_commit(partial_tree, partial_commit->parents, - sb_msg.buf, sb_msg.len, - result_sha1); + msg, strlen(msg), result_sha1); if (o->verbosity >= 4) printf("Finalized notes merge commit: %s\n", sha1_to_hex(result_sha1)); -- cgit v0.10.2-6-g49f6 From c335d74d349d201cb7627aa7233638d2953a4b02 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:38:47 -0400 Subject: alloc: include any-object allocations in alloc_report When 2c1cbec (Use proper object allocators for unknown object nodes too, 2007-04-16), added a special "any_object" allocator, it never taught alloc_report to report on it. To do so we need to add an extra type argument to the REPORT macro, as that commit did for DEFINE_ALLOCATOR. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/alloc.c b/alloc.c index f3ee745..38ff7e7 100644 --- a/alloc.c +++ b/alloc.c @@ -57,13 +57,14 @@ static void report(const char *name, unsigned int count, size_t size) name, count, (uintmax_t) size); } -#define REPORT(name) \ - report(#name, name##_allocs, name##_allocs * sizeof(struct name) >> 10) +#define REPORT(name, type) \ + report(#name, name##_allocs, name##_allocs * sizeof(type) >> 10) void alloc_report(void) { - REPORT(blob); - REPORT(tree); - REPORT(commit); - REPORT(tag); + REPORT(blob, struct blob); + REPORT(tree, struct tree); + REPORT(commit, struct commit); + REPORT(tag, struct tag); + REPORT(object, union any_object); } -- cgit v0.10.2-6-g49f6 From 969eba6341a5af8ac52c67e26462548ed05e23e3 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:39:04 -0400 Subject: commit: push commit_index update into alloc_commit_node Whenever we create a commit object via lookup_commit, we give it a unique index to be used with the commit-slab API. The theory is that any "struct commit" we create would follow this code path, so any such struct would get an index. However, callers could use alloc_commit_node() directly (and get multiple commits with index 0). Let's push the indexing into alloc_commit_node so that it's hard for callers to get it wrong. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/alloc.c b/alloc.c index 38ff7e7..eb22a45 100644 --- a/alloc.c +++ b/alloc.c @@ -47,10 +47,18 @@ union any_object { DEFINE_ALLOCATOR(blob, struct blob) DEFINE_ALLOCATOR(tree, struct tree) -DEFINE_ALLOCATOR(commit, struct commit) +DEFINE_ALLOCATOR(raw_commit, struct commit) DEFINE_ALLOCATOR(tag, struct tag) DEFINE_ALLOCATOR(object, union any_object) +void *alloc_commit_node(void) +{ + static int commit_count; + struct commit *c = alloc_raw_commit_node(); + c->index = commit_count++; + return c; +} + static void report(const char *name, unsigned int count, size_t size) { fprintf(stderr, "%10s: %8u (%"PRIuMAX" kB)\n", @@ -64,7 +72,7 @@ void alloc_report(void) { REPORT(blob, struct blob); REPORT(tree, struct tree); - REPORT(commit, struct commit); + REPORT(raw_commit, struct commit); REPORT(tag, struct tag); REPORT(object, union any_object); } diff --git a/commit.c b/commit.c index bd3d5af..fbdc480 100644 --- a/commit.c +++ b/commit.c @@ -17,7 +17,6 @@ static struct commit_extra_header *read_commit_extra_header_lines(const char *bu int save_commit_buffer = 1; const char *commit_type = "commit"; -static int commit_count; static struct commit *check_commit(struct object *obj, const unsigned char *sha1, @@ -64,7 +63,6 @@ struct commit *lookup_commit(const unsigned char *sha1) struct object *obj = lookup_object(sha1); if (!obj) { struct commit *c = alloc_commit_node(); - c->index = commit_count++; return create_object(sha1, OBJ_COMMIT, c); } if (!obj->type) -- cgit v0.10.2-6-g49f6 From 10322a0aaf84382d8901f9ab59e59c39f0c035bb Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:39:11 -0400 Subject: do not create "struct commit" with xcalloc In both blame and merge-recursive, we sometimes create a "fake" commit struct for convenience (e.g., to represent the HEAD state as if we would commit it). By allocating ourselves rather than using alloc_commit_node, we do not properly set the "index" field of the commit. This can produce subtle bugs if we then use commit-slab on the resulting commit, as we will share the "0" index with another commit. We can fix this by using alloc_commit_node() to allocate. Note that we cannot free the result, as it is part of our commit allocator. However, both cases were already leaking the allocated commit anyway, so there's nothing to fix up. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/blame.c b/builtin/blame.c index 88cb799..6e22bdf 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -2019,7 +2019,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt, struct strbuf msg = STRBUF_INIT; time(&now); - commit = xcalloc(1, sizeof(*commit)); + commit = alloc_commit_node(); commit->object.parsed = 1; commit->date = now; commit->object.type = OBJ_COMMIT; diff --git a/merge-recursive.c b/merge-recursive.c index 4177092..fc2a68a 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -40,7 +40,7 @@ static struct tree *shift_tree_object(struct tree *one, struct tree *two, static struct commit *make_virtual_commit(struct tree *tree, const char *comment) { - struct commit *commit = xcalloc(1, sizeof(struct commit)); + struct commit *commit = alloc_commit_node(); struct merge_remote_desc *desc = xmalloc(sizeof(*desc)); desc->name = comment; -- cgit v0.10.2-6-g49f6 From b000c59b0c80fc187e5e0e48dc9396cd60576c4e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:39:30 -0400 Subject: logmsg_reencode: return const buffer The return value from logmsg_reencode may be either a newly allocated buffer or a pointer to the existing commit->buffer. We would not want the caller to accidentally free() or modify the latter, so let's mark it as const. We can cast away the constness in logmsg_free, but only once we have determined that it is a free-able buffer. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/blame.c b/builtin/blame.c index 6e22bdf..85a3681 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -1405,7 +1405,7 @@ static void get_commit_info(struct commit *commit, { int len; const char *subject, *encoding; - char *message; + const char *message; commit_info_init(ret); diff --git a/builtin/reset.c b/builtin/reset.c index f4e0875..b5312c4 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -93,7 +93,7 @@ static int reset_index(const unsigned char *sha1, int reset_type, int quiet) static void print_new_head_line(struct commit *commit) { const char *hex, *body; - char *msg; + const char *msg; hex = find_unique_abbrev(commit->object.sha1, DEFAULT_ABBREV); printf(_("HEAD is now at %s"), hex); diff --git a/commit.h b/commit.h index 1cb55ae..4df48cb 100644 --- a/commit.h +++ b/commit.h @@ -115,10 +115,10 @@ struct userformat_want { extern int has_non_ascii(const char *text); struct rev_info; /* in revision.h, it circularly uses enum cmit_fmt */ -extern char *logmsg_reencode(const struct commit *commit, - char **commit_encoding, - const char *output_encoding); -extern void logmsg_free(char *msg, const struct commit *commit); +extern const char *logmsg_reencode(const struct commit *commit, + char **commit_encoding, + const char *output_encoding); +extern void logmsg_free(const char *msg, const struct commit *commit); extern void get_commit_format(const char *arg, struct rev_info *); extern const char *format_subject(struct strbuf *sb, const char *msg, const char *line_separator); diff --git a/pretty.c b/pretty.c index 3c43db5..85b0bb3 100644 --- a/pretty.c +++ b/pretty.c @@ -606,9 +606,9 @@ static char *replace_encoding_header(char *buf, const char *encoding) return strbuf_detach(&tmp, NULL); } -char *logmsg_reencode(const struct commit *commit, - char **commit_encoding, - const char *output_encoding) +const char *logmsg_reencode(const struct commit *commit, + char **commit_encoding, + const char *output_encoding) { static const char *utf8 = "UTF-8"; const char *use_encoding; @@ -687,10 +687,10 @@ char *logmsg_reencode(const struct commit *commit, return out ? out : msg; } -void logmsg_free(char *msg, const struct commit *commit) +void logmsg_free(const char *msg, const struct commit *commit) { if (msg != commit->buffer) - free(msg); + free((void *)msg); } static int mailmap_name(const char **email, size_t *email_len, @@ -796,7 +796,7 @@ struct format_commit_context { struct signature_check signature_check; enum flush_type flush_type; enum trunc_type truncate; - char *message; + const char *message; char *commit_encoding; size_t width, indent1, indent2; int auto_color; @@ -1700,7 +1700,7 @@ void pretty_print_commit(struct pretty_print_context *pp, unsigned long beginning_of_body; int indent = 4; const char *msg; - char *reencoded; + const char *reencoded; const char *encoding; int need_8bit_cte = pp->need_8bit_cte; diff --git a/revision.c b/revision.c index 71e2337..be151ef 100644 --- a/revision.c +++ b/revision.c @@ -2788,7 +2788,7 @@ static int commit_match(struct commit *commit, struct rev_info *opt) { int retval; const char *encoding; - char *message; + const char *message; struct strbuf buf = STRBUF_INIT; if (!opt->grep_filter.pattern_list && !opt->grep_filter.header_list) @@ -2830,12 +2830,19 @@ static int commit_match(struct commit *commit, struct rev_info *opt) format_display_notes(commit->object.sha1, &buf, encoding, 1); } - /* Find either in the original commit message, or in the temporary */ + /* + * Find either in the original commit message, or in the temporary. + * Note that we cast away the constness of "message" here. It is + * const because it may come from the cached commit buffer. That's OK, + * because we know that it is modifiable heap memory, and that while + * grep_buffer may modify it for speed, it will restore any + * changes before returning. + */ if (buf.len) retval = grep_buffer(&opt->grep_filter, buf.buf, buf.len); else retval = grep_buffer(&opt->grep_filter, - message, strlen(message)); + (char *)message, strlen(message)); strbuf_release(&buf); logmsg_free(message, commit); return retval; -- cgit v0.10.2-6-g49f6 From d74a4e57d2703e841deed994752b948a768f31ec Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:39:35 -0400 Subject: sequencer: use logmsg_reencode in get_message This simplifies the code, as logmsg_reencode handles the reencoding for us in a single call. It also means we learn logmsg_reencode's trick of pulling the buffer from disk when commit->buffer is NULL (we currently just silently return!). It is doubtful this matters in practice, though, as sequencer operations would not generally turn off save_commit_buffer. Note that we may be fixing a bug here. The existing code does: if (same_encoding(to, from)) reencode_string(buf, to, from); That probably should have been "!same_encoding". Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/sequencer.c b/sequencer.c index bde5f04..2ee4f43 100644 --- a/sequencer.c +++ b/sequencer.c @@ -116,39 +116,23 @@ static const char *action_name(const struct replay_opts *opts) return opts->action == REPLAY_REVERT ? "revert" : "cherry-pick"; } -static char *get_encoding(const char *message); - struct commit_message { char *parent_label; const char *label; const char *subject; - char *reencoded_message; const char *message; }; static int get_message(struct commit *commit, struct commit_message *out) { - const char *encoding; const char *abbrev, *subject; int abbrev_len, subject_len; char *q; - if (!commit->buffer) - return -1; - encoding = get_encoding(commit->buffer); - if (!encoding) - encoding = "UTF-8"; if (!git_commit_encoding) git_commit_encoding = "UTF-8"; - out->reencoded_message = NULL; - out->message = commit->buffer; - if (same_encoding(encoding, git_commit_encoding)) - out->reencoded_message = reencode_string(commit->buffer, - git_commit_encoding, encoding); - if (out->reencoded_message) - out->message = out->reencoded_message; - + out->message = logmsg_reencode(commit, NULL, git_commit_encoding); abbrev = find_unique_abbrev(commit->object.sha1, DEFAULT_ABBREV); abbrev_len = strlen(abbrev); @@ -167,29 +151,10 @@ static int get_message(struct commit *commit, struct commit_message *out) return 0; } -static void free_message(struct commit_message *msg) +static void free_message(struct commit *commit, struct commit_message *msg) { free(msg->parent_label); - free(msg->reencoded_message); -} - -static char *get_encoding(const char *message) -{ - const char *p = message, *eol; - - while (*p && *p != '\n') { - for (eol = p + 1; *eol && *eol != '\n'; eol++) - ; /* do nothing */ - if (starts_with(p, "encoding ")) { - char *result = xmalloc(eol - 8 - p); - strlcpy(result, p + 9, eol - 8 - p); - return result; - } - p = eol; - if (*p == '\n') - p++; - } - return NULL; + logmsg_free(msg->message, commit); } static void write_cherry_pick_head(struct commit *commit, const char *pseudoref) @@ -485,7 +450,7 @@ static int do_pick_commit(struct commit *commit, struct replay_opts *opts) unsigned char head[20]; struct commit *base, *next, *parent; const char *base_label, *next_label; - struct commit_message msg = { NULL, NULL, NULL, NULL, NULL }; + struct commit_message msg = { NULL, NULL, NULL, NULL }; char *defmsg = NULL; struct strbuf msgbuf = STRBUF_INIT; int res, unborn = 0, allow; @@ -650,7 +615,7 @@ static int do_pick_commit(struct commit *commit, struct replay_opts *opts) res = run_git_commit(defmsg, opts, allow); leave: - free_message(&msg); + free_message(commit, &msg); free(defmsg); return res; -- cgit v0.10.2-6-g49f6 From 0fb370da9ca972f9571530f95c0dacb31368c280 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 12 Jun 2014 18:05:37 -0400 Subject: provide a helper to free commit buffer This converts two lines into one at each caller. But more importantly, it abstracts the concept of freeing the buffer, which will make it easier to change later. Note that we also need to provide a "detach" mechanism for a tricky case in index-pack. We are passed a buffer for the object generated by processing the incoming pack. If we are not using --strict, we just calculate the sha1 on that buffer and return, leaving the caller to free it. But if we are using --strict, we actually attach that buffer to an object, pass the object to the fsck functions, and then detach the buffer from the object again (so that the caller can free it as usual). In this case, we don't want to free the buffer ourselves, but just make sure it is no longer associated with the commit. Note that we are making the assumption here that the attach/detach process does not impact the buffer at all (e.g., it is never reallocated or modified). That holds true now, and we have no plans to change that. However, as we abstract the commit_buffer code, this dependency becomes less obvious. So when we detach, let's also make sure that we get back the same buffer that we gave to the commit_buffer code. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/fsck.c b/builtin/fsck.c index fc150c8..8aadca1 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -310,8 +310,7 @@ static int fsck_obj(struct object *obj) if (obj->type == OBJ_COMMIT) { struct commit *commit = (struct commit *) obj; - free(commit->buffer); - commit->buffer = NULL; + free_commit_buffer(commit); if (!commit->parents && show_root) printf("root %s\n", sha1_to_hex(commit->object.sha1)); diff --git a/builtin/index-pack.c b/builtin/index-pack.c index b9f6e12..42551ce 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -774,7 +774,8 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, } if (obj->type == OBJ_COMMIT) { struct commit *commit = (struct commit *) obj; - commit->buffer = NULL; + if (detach_commit_buffer(commit) != data) + die("BUG: parse_object_buffer transmogrified our buffer"); } obj->flags |= FLAG_CHECKED; } diff --git a/builtin/log.c b/builtin/log.c index 39e8836..226f8f2 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -349,8 +349,7 @@ static int cmd_log_walk(struct rev_info *rev) rev->max_count++; if (!rev->reflog_info) { /* we allow cycles in reflog ancestry */ - free(commit->buffer); - commit->buffer = NULL; + free_commit_buffer(commit); } free_commit_list(commit->parents); commit->parents = NULL; @@ -1508,8 +1507,7 @@ int cmd_format_patch(int argc, const char **argv, const char *prefix) reopen_stdout(rev.numbered_files ? NULL : commit, NULL, &rev, quiet)) die(_("Failed to create output files")); shown = log_tree_commit(&rev, commit); - free(commit->buffer); - commit->buffer = NULL; + free_commit_buffer(commit); /* We put one extra blank line between formatted * patches and this flag is used by log-tree code diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 9f92905..e012ebe 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -173,8 +173,7 @@ static void finish_commit(struct commit *commit, void *data) free_commit_list(commit->parents); commit->parents = NULL; } - free(commit->buffer); - commit->buffer = NULL; + free_commit_buffer(commit); } static void finish_object(struct object *obj, diff --git a/commit.c b/commit.c index fbdc480..11a05c1 100644 --- a/commit.c +++ b/commit.c @@ -245,6 +245,19 @@ int unregister_shallow(const unsigned char *sha1) return 0; } +void free_commit_buffer(struct commit *commit) +{ + free(commit->buffer); + commit->buffer = NULL; +} + +const void *detach_commit_buffer(struct commit *commit) +{ + void *ret = commit->buffer; + commit->buffer = NULL; + return ret; +} + int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size) { const char *tail = buffer; diff --git a/commit.h b/commit.h index 4df48cb..d72ed43 100644 --- a/commit.h +++ b/commit.h @@ -51,6 +51,17 @@ int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long s int parse_commit(struct commit *item); void parse_commit_or_die(struct commit *item); +/* + * Free any cached object buffer associated with the commit. + */ +void free_commit_buffer(struct commit *); + +/* + * Disassociate any cached object buffer from the commit, but do not free it. + * The buffer (or NULL, if none) is returned. + */ +const void *detach_commit_buffer(struct commit *); + /* Find beginning and length of commit subject. */ int find_commit_subject(const char *commit_buffer, const char **subject); -- cgit v0.10.2-6-g49f6 From 66c2827ea4deb24ff541e30a5b6239ad5e9f6801 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:40:14 -0400 Subject: provide a helper to set the commit buffer Right now this is just a one-liner, but abstracting it will make it easier to change later. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/blame.c b/builtin/blame.c index 85a3681..38784ab 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -2046,7 +2046,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt, ident, ident, path, (!contents_from ? path : (!strcmp(contents_from, "-") ? "standard input" : contents_from))); - commit->buffer = strbuf_detach(&msg, NULL); + set_commit_buffer(commit, strbuf_detach(&msg, NULL)); if (!contents_from || strcmp("-", contents_from)) { struct stat st; diff --git a/commit.c b/commit.c index 11a05c1..fc8b4e2 100644 --- a/commit.c +++ b/commit.c @@ -245,6 +245,11 @@ int unregister_shallow(const unsigned char *sha1) return 0; } +void set_commit_buffer(struct commit *commit, void *buffer) +{ + commit->buffer = buffer; +} + void free_commit_buffer(struct commit *commit) { free(commit->buffer); @@ -335,7 +340,7 @@ int parse_commit(struct commit *item) } ret = parse_commit_buffer(item, buffer, size); if (save_commit_buffer && !ret) { - item->buffer = buffer; + set_commit_buffer(item, buffer); return 0; } free(buffer); diff --git a/commit.h b/commit.h index d72ed43..cc89128 100644 --- a/commit.h +++ b/commit.h @@ -52,6 +52,12 @@ int parse_commit(struct commit *item); void parse_commit_or_die(struct commit *item); /* + * Associate an object buffer with the commit. The ownership of the + * memory is handed over to the commit, and must be free()-able. + */ +void set_commit_buffer(struct commit *, void *buffer); + +/* * Free any cached object buffer associated with the commit. */ void free_commit_buffer(struct commit *); diff --git a/object.c b/object.c index 57a0890..44ca657 100644 --- a/object.c +++ b/object.c @@ -198,7 +198,7 @@ struct object *parse_object_buffer(const unsigned char *sha1, enum object_type t if (parse_commit_buffer(commit, buffer, size)) return NULL; if (!commit->buffer) { - commit->buffer = buffer; + set_commit_buffer(commit, buffer); *eaten_p = 1; } obj = &commit->object; -- cgit v0.10.2-6-g49f6 From 152ff1ccebd822fd97f27d2a6c3fa2058f088fd8 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:40:39 -0400 Subject: provide helpers to access the commit buffer Many sites look at commit->buffer to get more detailed information than what is in the parsed commit struct. However, we sometimes drop commit->buffer to save memory, in which case the caller would need to read the object afresh. Some callers do this (leading to duplicated code), and others do not (which opens the possibility of a segfault if somebody else frees the buffer). Let's provide a pair of helpers, "get" and "unuse", that let callers easily get the buffer. They will use the cached buffer when possible, and otherwise load from disk using read_sha1_file. Note that we also need to add a "get_cached" variant which returns NULL when we do not have a cached buffer. At first glance this seems to defeat the purpose of "get", which is to always provide a return value. However, some log code paths actually use the NULL-ness of commit->buffer as a boolean flag to decide whether to try printing the commit. At least for now, we want to continue supporting that use. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/commit.c b/commit.c index fc8b4e2..b6b0e0d 100644 --- a/commit.c +++ b/commit.c @@ -250,6 +250,34 @@ void set_commit_buffer(struct commit *commit, void *buffer) commit->buffer = buffer; } +const void *get_cached_commit_buffer(const struct commit *commit) +{ + return commit->buffer; +} + +const void *get_commit_buffer(const struct commit *commit) +{ + const void *ret = get_cached_commit_buffer(commit); + if (!ret) { + enum object_type type; + unsigned long size; + ret = read_sha1_file(commit->object.sha1, &type, &size); + if (!ret) + die("cannot read commit object %s", + sha1_to_hex(commit->object.sha1)); + if (type != OBJ_COMMIT) + die("expected commit for %s, got %s", + sha1_to_hex(commit->object.sha1), typename(type)); + } + return ret; +} + +void unuse_commit_buffer(const struct commit *commit, const void *buffer) +{ + if (commit->buffer != buffer) + free((void *)buffer); +} + void free_commit_buffer(struct commit *commit) { free(commit->buffer); diff --git a/commit.h b/commit.h index cc89128..259c0ae 100644 --- a/commit.h +++ b/commit.h @@ -58,6 +58,27 @@ void parse_commit_or_die(struct commit *item); void set_commit_buffer(struct commit *, void *buffer); /* + * Get any cached object buffer associated with the commit. Returns NULL + * if none. The resulting memory should not be freed. + */ +const void *get_cached_commit_buffer(const struct commit *); + +/* + * Get the commit's object contents, either from cache or by reading the object + * from disk. The resulting memory should not be modified, and must be given + * to unuse_commit_buffer when the caller is done. + */ +const void *get_commit_buffer(const struct commit *); + +/* + * Tell the commit subsytem that we are done with a particular commit buffer. + * The commit and buffer should be the input and return value, respectively, + * from an earlier call to get_commit_buffer. The buffer may or may not be + * freed by this call; callers should not access the memory afterwards. + */ +void unuse_commit_buffer(const struct commit *, const void *buffer); + +/* * Free any cached object buffer associated with the commit. */ void free_commit_buffer(struct commit *); -- cgit v0.10.2-6-g49f6 From a97934d8205772ffd2a528a9e970af7dec725012 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:40:46 -0400 Subject: use get_cached_commit_buffer where appropriate Some call sites check commit->buffer to see whether we have a cached buffer, and if so, do some work with it. In the long run we may want to switch these code paths to make their decision on a different boolean flag (because checking the cache may get a little more expensive in the future). But for now, we can easily support them by converting the calls to use get_cached_commit_buffer. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/rev-list.c b/builtin/rev-list.c index e012ebe..3fcbf21 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -106,7 +106,7 @@ static void show_commit(struct commit *commit, void *data) else putchar('\n'); - if (revs->verbose_header && commit->buffer) { + if (revs->verbose_header && get_cached_commit_buffer(commit)) { struct strbuf buf = STRBUF_INIT; struct pretty_print_context ctx = {0}; ctx.abbrev = revs->abbrev; diff --git a/log-tree.c b/log-tree.c index cf2f86c..e9ef8ab 100644 --- a/log-tree.c +++ b/log-tree.c @@ -588,7 +588,7 @@ void show_log(struct rev_info *opt) show_mergetag(opt, commit); } - if (!commit->buffer) + if (!get_cached_commit_buffer(commit)) return; if (opt->show_notes) { diff --git a/object.c b/object.c index 44ca657..67b6e35 100644 --- a/object.c +++ b/object.c @@ -197,7 +197,7 @@ struct object *parse_object_buffer(const unsigned char *sha1, enum object_type t if (commit) { if (parse_commit_buffer(commit, buffer, size)) return NULL; - if (!commit->buffer) { + if (!get_cached_commit_buffer(commit)) { set_commit_buffer(commit, buffer); *eaten_p = 1; } -- cgit v0.10.2-6-g49f6 From ba41c1c93fd9109eae954f75a8cb8e32c3e29530 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:41:02 -0400 Subject: use get_commit_buffer to avoid duplicate code For both of these sites, we already do the "fallback to read_sha1_file" trick. But we can shorten the code by just using get_commit_buffer. Note that the error cases are slightly different when read_sha1_file fails. get_commit_buffer will die() if the object cannot be loaded, or is a non-commit. For get_sha1_oneline, this will almost certainly never happen, as we will have just called parse_object (and if it does, it's probably worth complaining about). For record_author_date, the new behavior is probably better; we notify the user of the error instead of silently ignoring it. And because it's used only for sorting by author-date, somebody examining a corrupt repo can fallback to the regular traversal order. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/commit.c b/commit.c index b6b0e0d..1903dde 100644 --- a/commit.c +++ b/commit.c @@ -583,22 +583,12 @@ static void record_author_date(struct author_date_slab *author_date, struct commit *commit) { const char *buf, *line_end, *ident_line; - char *buffer = NULL; + const char *buffer = get_commit_buffer(commit); struct ident_split ident; char *date_end; unsigned long date; - if (!commit->buffer) { - unsigned long size; - enum object_type type; - buffer = read_sha1_file(commit->object.sha1, &type, &size); - if (!buffer) - return; - } - - for (buf = commit->buffer ? commit->buffer : buffer; - buf; - buf = line_end + 1) { + for (buf = buffer; buf; buf = line_end + 1) { line_end = strchrnul(buf, '\n'); ident_line = skip_prefix(buf, "author "); if (!ident_line) { @@ -619,7 +609,7 @@ static void record_author_date(struct author_date_slab *author_date, *(author_date_slab_at(author_date, commit)) = date; fail_exit: - free(buffer); + unuse_commit_buffer(commit, buffer); } static int compare_commits_by_author_date(const void *a_, const void *b_, diff --git a/sha1_name.c b/sha1_name.c index 2b6322f..0a65d23 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -862,27 +862,17 @@ static int get_sha1_oneline(const char *prefix, unsigned char *sha1, commit_list_insert(l->item, &backup); } while (list) { - char *p, *to_free = NULL; + const char *p, *buf; struct commit *commit; - enum object_type type; - unsigned long size; int matches; commit = pop_most_recent_commit(&list, ONELINE_SEEN); if (!parse_object(commit->object.sha1)) continue; - if (commit->buffer) - p = commit->buffer; - else { - p = read_sha1_file(commit->object.sha1, &type, &size); - if (!p) - continue; - to_free = p; - } - - p = strstr(p, "\n\n"); + buf = get_commit_buffer(commit); + p = strstr(buf, "\n\n"); matches = p && !regexec(®ex, p + 2, 0, NULL, 0); - free(to_free); + unuse_commit_buffer(commit, buf); if (matches) { hashcpy(sha1, commit->object.sha1); -- cgit v0.10.2-6-g49f6 From b66103c3baa593a39b8b0751213b9fce60e94de4 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:41:39 -0400 Subject: convert logmsg_reencode to get_commit_buffer Like the callsites in the previous commit, logmsg_reencode already falls back to read_sha1_file when necessary. However, I split its conversion out into its own commit because it's a bit more complex. We return either: 1. The original commit->buffer 2. A newly allocated buffer from read_sha1_file 3. A reencoded buffer (based on either 1 or 2 above). while trying to do as few extra reads/allocations as possible. Callers currently free the result with logmsg_free, but we can simplify this by pointing them straight to unuse_commit_buffer. This is a slight layering violation, in that we may be passing a buffer from (3). However, since the end result is to free() anything except (1), which is unlikely to change, and because this makes the interface much simpler, it's a reasonable bending of the rules. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/blame.c b/builtin/blame.c index 38784ab..857d98a 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -1416,7 +1416,7 @@ static void get_commit_info(struct commit *commit, &ret->author_time, &ret->author_tz); if (!detailed) { - logmsg_free(message, commit); + unuse_commit_buffer(commit, message); return; } @@ -1430,7 +1430,7 @@ static void get_commit_info(struct commit *commit, else strbuf_addf(&ret->summary, "(%s)", sha1_to_hex(commit->object.sha1)); - logmsg_free(message, commit); + unuse_commit_buffer(commit, message); } /* diff --git a/builtin/reset.c b/builtin/reset.c index b5312c4..6bd6245 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -109,7 +109,7 @@ static void print_new_head_line(struct commit *commit) } else printf("\n"); - logmsg_free(msg, commit); + unuse_commit_buffer(commit, msg); } static void update_index_from_diff(struct diff_queue_struct *q, diff --git a/commit.h b/commit.h index 259c0ae..5ce5ce7 100644 --- a/commit.h +++ b/commit.h @@ -156,7 +156,6 @@ struct rev_info; /* in revision.h, it circularly uses enum cmit_fmt */ extern const char *logmsg_reencode(const struct commit *commit, char **commit_encoding, const char *output_encoding); -extern void logmsg_free(const char *msg, const struct commit *commit); extern void get_commit_format(const char *arg, struct rev_info *); extern const char *format_subject(struct strbuf *sb, const char *msg, const char *line_separator); diff --git a/pretty.c b/pretty.c index 85b0bb3..915bd1e 100644 --- a/pretty.c +++ b/pretty.c @@ -613,22 +613,9 @@ const char *logmsg_reencode(const struct commit *commit, static const char *utf8 = "UTF-8"; const char *use_encoding; char *encoding; - char *msg = commit->buffer; + const char *msg = get_commit_buffer(commit); char *out; - if (!msg) { - enum object_type type; - unsigned long size; - - msg = read_sha1_file(commit->object.sha1, &type, &size); - if (!msg) - die("Cannot read commit object %s", - sha1_to_hex(commit->object.sha1)); - if (type != OBJ_COMMIT) - die("Expected commit for '%s', got %s", - sha1_to_hex(commit->object.sha1), typename(type)); - } - if (!output_encoding || !*output_encoding) { if (commit_encoding) *commit_encoding = @@ -652,12 +639,13 @@ const char *logmsg_reencode(const struct commit *commit, * Otherwise, we still want to munge the encoding header in the * result, which will be done by modifying the buffer. If we * are using a fresh copy, we can reuse it. But if we are using - * the cached copy from commit->buffer, we need to duplicate it - * to avoid munging commit->buffer. + * the cached copy from get_commit_buffer, we need to duplicate it + * to avoid munging the cached copy. */ - out = msg; - if (out == commit->buffer) - out = xstrdup(out); + if (msg == get_cached_commit_buffer(commit)) + out = xstrdup(msg); + else + out = (char *)msg; } else { /* @@ -667,8 +655,8 @@ const char *logmsg_reencode(const struct commit *commit, * copy, we can free it. */ out = reencode_string(msg, output_encoding, use_encoding); - if (out && msg != commit->buffer) - free(msg); + if (out) + unuse_commit_buffer(commit, msg); } /* @@ -687,12 +675,6 @@ const char *logmsg_reencode(const struct commit *commit, return out ? out : msg; } -void logmsg_free(const char *msg, const struct commit *commit) -{ - if (msg != commit->buffer) - free((void *)msg); -} - static int mailmap_name(const char **email, size_t *email_len, const char **name, size_t *name_len) { @@ -1531,7 +1513,7 @@ void format_commit_message(const struct commit *commit, } free(context.commit_encoding); - logmsg_free(context.message, commit); + unuse_commit_buffer(commit, context.message); free(context.signature_check.gpg_output); free(context.signature_check.signer); } @@ -1767,7 +1749,7 @@ void pretty_print_commit(struct pretty_print_context *pp, if (pp->fmt == CMIT_FMT_EMAIL && sb->len <= beginning_of_body) strbuf_addch(sb, '\n'); - logmsg_free(reencoded, commit); + unuse_commit_buffer(commit, reencoded); } void pp_commit_easy(enum cmit_fmt fmt, const struct commit *commit, diff --git a/revision.c b/revision.c index be151ef..1cc91e5 100644 --- a/revision.c +++ b/revision.c @@ -2844,7 +2844,7 @@ static int commit_match(struct commit *commit, struct rev_info *opt) retval = grep_buffer(&opt->grep_filter, (char *)message, strlen(message)); strbuf_release(&buf); - logmsg_free(message, commit); + unuse_commit_buffer(commit, message); return retval; } diff --git a/sequencer.c b/sequencer.c index 2ee4f43..7b1f87a 100644 --- a/sequencer.c +++ b/sequencer.c @@ -154,7 +154,7 @@ static int get_message(struct commit *commit, struct commit_message *out) static void free_message(struct commit *commit, struct commit_message *msg) { free(msg->parent_label); - logmsg_free(msg->message, commit); + unuse_commit_buffer(commit, msg->message); } static void write_cherry_pick_head(struct commit *commit, const char *pseudoref) -- cgit v0.10.2-6-g49f6 From bc6b8fc1300ef79c4b4c3c2a79bb3c1e2e032963 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:41:51 -0400 Subject: use get_commit_buffer everywhere Each of these sites assumes that commit->buffer is valid. Since they would segfault if this was not the case, they are likely to be correct in practice. However, we can future-proof them by using get_commit_buffer. And as a side effect, we abstract away the final bare uses of commit->buffer. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/fast-export.c b/builtin/fast-export.c index b8d8a3a..7ee5e08 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -279,6 +279,7 @@ static const char *find_encoding(const char *begin, const char *end) static void handle_commit(struct commit *commit, struct rev_info *rev) { int saved_output_format = rev->diffopt.output_format; + const char *commit_buffer; const char *author, *author_end, *committer, *committer_end; const char *encoding, *message; char *reencoded = NULL; @@ -288,7 +289,8 @@ static void handle_commit(struct commit *commit, struct rev_info *rev) rev->diffopt.output_format = DIFF_FORMAT_CALLBACK; parse_commit_or_die(commit); - author = strstr(commit->buffer, "\nauthor "); + commit_buffer = get_commit_buffer(commit); + author = strstr(commit_buffer, "\nauthor "); if (!author) die ("Could not find author in commit %s", sha1_to_hex(commit->object.sha1)); @@ -335,6 +337,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev) ? strlen(message) : 0), reencoded ? reencoded : message ? message : ""); free(reencoded); + unuse_commit_buffer(commit, commit_buffer); for (i = 0, p = commit->parents; p; p = p->next) { int mark = get_object_mark(&p->item->object); diff --git a/builtin/fmt-merge-msg.c b/builtin/fmt-merge-msg.c index 3906eda..01f6d59 100644 --- a/builtin/fmt-merge-msg.c +++ b/builtin/fmt-merge-msg.c @@ -230,12 +230,14 @@ static void add_branch_desc(struct strbuf *out, const char *name) static void record_person(int which, struct string_list *people, struct commit *commit) { + const char *buffer; char *name_buf, *name, *name_end; struct string_list_item *elem; const char *field; field = (which == 'a') ? "\nauthor " : "\ncommitter "; - name = strstr(commit->buffer, field); + buffer = get_commit_buffer(commit); + name = strstr(buffer, field); if (!name) return; name += strlen(field); @@ -247,6 +249,7 @@ static void record_person(int which, struct string_list *people, if (name_end < name) return; name_buf = xmemdupz(name, name_end - name + 1); + unuse_commit_buffer(commit, buffer); elem = string_list_lookup(people, name_buf); if (!elem) { diff --git a/builtin/log.c b/builtin/log.c index 226f8f2..2c74260 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -918,9 +918,12 @@ static void make_cover_letter(struct rev_info *rev, int use_stdout, log_write_email_headers(rev, head, &pp.subject, &pp.after_subject, &need_8bit_cte); - for (i = 0; !need_8bit_cte && i < nr; i++) - if (has_non_ascii(list[i]->buffer)) + for (i = 0; !need_8bit_cte && i < nr; i++) { + const char *buf = get_commit_buffer(list[i]); + if (has_non_ascii(buf)) need_8bit_cte = 1; + unuse_commit_buffer(list[i], buf); + } if (!branch_name) branch_name = find_branch_name(rev); diff --git a/fsck.c b/fsck.c index abed62b..8223780 100644 --- a/fsck.c +++ b/fsck.c @@ -276,9 +276,10 @@ static int fsck_ident(const char **ident, struct object *obj, fsck_error error_f return 0; } -static int fsck_commit(struct commit *commit, fsck_error error_func) +static int fsck_commit_buffer(struct commit *commit, const char *buffer, + fsck_error error_func) { - const char *buffer = commit->buffer, *tmp; + const char *tmp; unsigned char tree_sha1[20], sha1[20]; struct commit_graft *graft; int parents = 0; @@ -336,6 +337,14 @@ static int fsck_commit(struct commit *commit, fsck_error error_func) return 0; } +static int fsck_commit(struct commit *commit, fsck_error error_func) +{ + const char *buffer = get_commit_buffer(commit); + int ret = fsck_commit_buffer(commit, buffer, error_func); + unuse_commit_buffer(commit, buffer); + return ret; +} + static int fsck_tag(struct tag *tag, fsck_error error_func) { struct object *tagged = tag->tagged; diff --git a/merge-recursive.c b/merge-recursive.c index fc2a68a..78908aa 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -190,9 +190,11 @@ static void output_commit_title(struct merge_options *o, struct commit *commit) printf(_("(bad commit)\n")); else { const char *title; - int len = find_commit_subject(commit->buffer, &title); + const char *msg = get_commit_buffer(commit); + int len = find_commit_subject(msg, &title); if (len) printf("%.*s\n", len, title); + unuse_commit_buffer(commit, msg); } } } diff --git a/notes-merge.c b/notes-merge.c index 697cec3..e804db2 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -672,7 +672,8 @@ int notes_merge_commit(struct notes_merge_options *o, DIR *dir; struct dirent *e; struct strbuf path = STRBUF_INIT; - char *msg = strstr(partial_commit->buffer, "\n\n"); + const char *buffer = get_commit_buffer(partial_commit); + const char *msg = strstr(buffer, "\n\n"); int baselen; strbuf_addstr(&path, git_path(NOTES_MERGE_WORKTREE)); @@ -721,6 +722,7 @@ int notes_merge_commit(struct notes_merge_options *o, create_notes_commit(partial_tree, partial_commit->parents, msg, strlen(msg), result_sha1); + unuse_commit_buffer(partial_commit, buffer); if (o->verbosity >= 4) printf("Finalized notes merge commit: %s\n", sha1_to_hex(result_sha1)); diff --git a/sequencer.c b/sequencer.c index 7b1f87a..69bcf3d 100644 --- a/sequencer.c +++ b/sequencer.c @@ -662,10 +662,12 @@ static int format_todo(struct strbuf *buf, struct commit_list *todo_list, int subject_len; for (cur = todo_list; cur; cur = cur->next) { + const char *commit_buffer = get_commit_buffer(cur->item); sha1_abbrev = find_unique_abbrev(cur->item->object.sha1, DEFAULT_ABBREV); - subject_len = find_commit_subject(cur->item->buffer, &subject); + subject_len = find_commit_subject(commit_buffer, &subject); strbuf_addf(buf, "%s %s %.*s\n", action_str, sha1_abbrev, subject_len, subject); + unuse_commit_buffer(cur->item, commit_buffer); } return 0; } -- cgit v0.10.2-6-g49f6 From 80cdaba569600154d91819d2e252f6b7dd9cff73 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:42:51 -0400 Subject: commit-slab: provide a static initializer Callers currently must use init_foo_slab() at runtime before accessing a slab. For global slabs, it's much nicer if we can initialize them in BSS, so that each user does not have to add code to check-and-initialize. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/commit-slab.h b/commit-slab.h index cc114b5..375c9c7 100644 --- a/commit-slab.h +++ b/commit-slab.h @@ -117,4 +117,16 @@ static int stat_ ##slabname## realloc * catch because GCC silently parses it by default. */ +/* + * Statically initialize a commit slab named "var". Note that this + * evaluates "stride" multiple times! Example: + * + * struct indegree indegrees = COMMIT_SLAB_INIT(1, indegrees); + * + */ +#define COMMIT_SLAB_INIT(stride, var) { \ + COMMIT_SLAB_SIZE / sizeof(**((var).slab)) / (stride), \ + (stride), 0, NULL \ +} + #endif /* COMMIT_SLAB_H */ -- cgit v0.10.2-6-g49f6 From c1b3c71f4b4571abb2b2a457122fd100dc9f7eb0 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:43:02 -0400 Subject: commit: convert commit->buffer to a slab This will make it easier to manage the buffer cache independently of the "struct commit" objects. It also shrinks "struct commit" by one pointer, which may be helpful. Unfortunately it does not reduce the max memory size of something like "rev-list", because rev-list uses get_cached_commit_buffer() to decide not to show each commit's output (and due to the design of slab_at, accessing the slab requires us to extend it, allocating exactly the same number of buffer pointers we dropped from the commit structs). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/commit.c b/commit.c index 1903dde..e289c78 100644 --- a/commit.c +++ b/commit.c @@ -245,14 +245,17 @@ int unregister_shallow(const unsigned char *sha1) return 0; } +define_commit_slab(buffer_slab, void *); +static struct buffer_slab buffer_slab = COMMIT_SLAB_INIT(1, buffer_slab); + void set_commit_buffer(struct commit *commit, void *buffer) { - commit->buffer = buffer; + *buffer_slab_at(&buffer_slab, commit) = buffer; } const void *get_cached_commit_buffer(const struct commit *commit) { - return commit->buffer; + return *buffer_slab_at(&buffer_slab, commit); } const void *get_commit_buffer(const struct commit *commit) @@ -274,20 +277,23 @@ const void *get_commit_buffer(const struct commit *commit) void unuse_commit_buffer(const struct commit *commit, const void *buffer) { - if (commit->buffer != buffer) + void *cached = *buffer_slab_at(&buffer_slab, commit); + if (cached != buffer) free((void *)buffer); } void free_commit_buffer(struct commit *commit) { - free(commit->buffer); - commit->buffer = NULL; + void **b = buffer_slab_at(&buffer_slab, commit); + free(*b); + *b = NULL; } const void *detach_commit_buffer(struct commit *commit) { - void *ret = commit->buffer; - commit->buffer = NULL; + void **b = buffer_slab_at(&buffer_slab, commit); + void *ret = *b; + *b = NULL; return ret; } diff --git a/commit.h b/commit.h index 5ce5ce7..e1c2569 100644 --- a/commit.h +++ b/commit.h @@ -20,7 +20,6 @@ struct commit { unsigned long date; struct commit_list *parents; struct tree *tree; - char *buffer; }; extern int save_commit_buffer; -- cgit v0.10.2-6-g49f6 From 8597ea3afea067b39ba7d4adae7ec6c1ee0e7c91 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 10 Jun 2014 17:44:13 -0400 Subject: commit: record buffer length in cache Most callsites which use the commit buffer try to use the cached version attached to the commit, rather than re-reading from disk. Unfortunately, that interface provides only a pointer to the NUL-terminated buffer, with no indication of the original length. For the most part, this doesn't matter. People do not put NULs in their commit messages, and the log code is happy to treat it all as a NUL-terminated string. However, some code paths do care. For example, when checking signatures, we want to be very careful that we verify all the bytes to avoid malicious trickery. This patch just adds an optional "size" out-pointer to get_commit_buffer and friends. The existing callers all pass NULL (there did not seem to be any obvious sites where we could avoid an immediate strlen() call, though perhaps with some further refactoring we could). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/blame.c b/builtin/blame.c index 857d98a..b84e375 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -1999,6 +1999,18 @@ static void append_merge_parents(struct commit_list **tail) } /* + * This isn't as simple as passing sb->buf and sb->len, because we + * want to transfer ownership of the buffer to the commit (so we + * must use detach). + */ +static void set_commit_buffer_from_strbuf(struct commit *c, struct strbuf *sb) +{ + size_t len; + void *buf = strbuf_detach(sb, &len); + set_commit_buffer(c, buf, len); +} + +/* * Prepare a dummy commit that represents the work tree (or staged) item. * Note that annotating work tree item never works in the reverse. */ @@ -2046,7 +2058,7 @@ static struct commit *fake_working_tree_commit(struct diff_options *opt, ident, ident, path, (!contents_from ? path : (!strcmp(contents_from, "-") ? "standard input" : contents_from))); - set_commit_buffer(commit, strbuf_detach(&msg, NULL)); + set_commit_buffer_from_strbuf(commit, &msg); if (!contents_from || strcmp("-", contents_from)) { struct stat st; diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 7ee5e08..05d161f 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -289,7 +289,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev) rev->diffopt.output_format = DIFF_FORMAT_CALLBACK; parse_commit_or_die(commit); - commit_buffer = get_commit_buffer(commit); + commit_buffer = get_commit_buffer(commit, NULL); author = strstr(commit_buffer, "\nauthor "); if (!author) die ("Could not find author in commit %s", diff --git a/builtin/fmt-merge-msg.c b/builtin/fmt-merge-msg.c index 01f6d59..ef8b254 100644 --- a/builtin/fmt-merge-msg.c +++ b/builtin/fmt-merge-msg.c @@ -236,7 +236,7 @@ static void record_person(int which, struct string_list *people, const char *field; field = (which == 'a') ? "\nauthor " : "\ncommitter "; - buffer = get_commit_buffer(commit); + buffer = get_commit_buffer(commit, NULL); name = strstr(buffer, field); if (!name) return; diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 42551ce..459b9f0 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -774,7 +774,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, } if (obj->type == OBJ_COMMIT) { struct commit *commit = (struct commit *) obj; - if (detach_commit_buffer(commit) != data) + if (detach_commit_buffer(commit, NULL) != data) die("BUG: parse_object_buffer transmogrified our buffer"); } obj->flags |= FLAG_CHECKED; diff --git a/builtin/log.c b/builtin/log.c index 2c74260..c599eac 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -919,7 +919,7 @@ static void make_cover_letter(struct rev_info *rev, int use_stdout, &need_8bit_cte); for (i = 0; !need_8bit_cte && i < nr; i++) { - const char *buf = get_commit_buffer(list[i]); + const char *buf = get_commit_buffer(list[i], NULL); if (has_non_ascii(buf)) need_8bit_cte = 1; unuse_commit_buffer(list[i], buf); diff --git a/builtin/rev-list.c b/builtin/rev-list.c index 3fcbf21..ff84a82 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -106,7 +106,7 @@ static void show_commit(struct commit *commit, void *data) else putchar('\n'); - if (revs->verbose_header && get_cached_commit_buffer(commit)) { + if (revs->verbose_header && get_cached_commit_buffer(commit, NULL)) { struct strbuf buf = STRBUF_INIT; struct pretty_print_context ctx = {0}; ctx.abbrev = revs->abbrev; diff --git a/commit.c b/commit.c index e289c78..a036e18 100644 --- a/commit.c +++ b/commit.c @@ -245,22 +245,31 @@ int unregister_shallow(const unsigned char *sha1) return 0; } -define_commit_slab(buffer_slab, void *); +struct commit_buffer { + void *buffer; + unsigned long size; +}; +define_commit_slab(buffer_slab, struct commit_buffer); static struct buffer_slab buffer_slab = COMMIT_SLAB_INIT(1, buffer_slab); -void set_commit_buffer(struct commit *commit, void *buffer) +void set_commit_buffer(struct commit *commit, void *buffer, unsigned long size) { - *buffer_slab_at(&buffer_slab, commit) = buffer; + struct commit_buffer *v = buffer_slab_at(&buffer_slab, commit); + v->buffer = buffer; + v->size = size; } -const void *get_cached_commit_buffer(const struct commit *commit) +const void *get_cached_commit_buffer(const struct commit *commit, unsigned long *sizep) { - return *buffer_slab_at(&buffer_slab, commit); + struct commit_buffer *v = buffer_slab_at(&buffer_slab, commit); + if (sizep) + *sizep = v->size; + return v->buffer; } -const void *get_commit_buffer(const struct commit *commit) +const void *get_commit_buffer(const struct commit *commit, unsigned long *sizep) { - const void *ret = get_cached_commit_buffer(commit); + const void *ret = get_cached_commit_buffer(commit, sizep); if (!ret) { enum object_type type; unsigned long size; @@ -271,29 +280,38 @@ const void *get_commit_buffer(const struct commit *commit) if (type != OBJ_COMMIT) die("expected commit for %s, got %s", sha1_to_hex(commit->object.sha1), typename(type)); + if (sizep) + *sizep = size; } return ret; } void unuse_commit_buffer(const struct commit *commit, const void *buffer) { - void *cached = *buffer_slab_at(&buffer_slab, commit); - if (cached != buffer) + struct commit_buffer *v = buffer_slab_at(&buffer_slab, commit); + if (v->buffer != buffer) free((void *)buffer); } void free_commit_buffer(struct commit *commit) { - void **b = buffer_slab_at(&buffer_slab, commit); - free(*b); - *b = NULL; + struct commit_buffer *v = buffer_slab_at(&buffer_slab, commit); + free(v->buffer); + v->buffer = NULL; + v->size = 0; } -const void *detach_commit_buffer(struct commit *commit) +const void *detach_commit_buffer(struct commit *commit, unsigned long *sizep) { - void **b = buffer_slab_at(&buffer_slab, commit); - void *ret = *b; - *b = NULL; + struct commit_buffer *v = buffer_slab_at(&buffer_slab, commit); + void *ret; + + ret = v->buffer; + if (sizep) + *sizep = v->size; + + v->buffer = NULL; + v->size = 0; return ret; } @@ -374,7 +392,7 @@ int parse_commit(struct commit *item) } ret = parse_commit_buffer(item, buffer, size); if (save_commit_buffer && !ret) { - set_commit_buffer(item, buffer); + set_commit_buffer(item, buffer, size); return 0; } free(buffer); @@ -589,7 +607,7 @@ static void record_author_date(struct author_date_slab *author_date, struct commit *commit) { const char *buf, *line_end, *ident_line; - const char *buffer = get_commit_buffer(commit); + const char *buffer = get_commit_buffer(commit, NULL); struct ident_split ident; char *date_end; unsigned long date; diff --git a/commit.h b/commit.h index e1c2569..61559a9 100644 --- a/commit.h +++ b/commit.h @@ -54,20 +54,20 @@ void parse_commit_or_die(struct commit *item); * Associate an object buffer with the commit. The ownership of the * memory is handed over to the commit, and must be free()-able. */ -void set_commit_buffer(struct commit *, void *buffer); +void set_commit_buffer(struct commit *, void *buffer, unsigned long size); /* * Get any cached object buffer associated with the commit. Returns NULL * if none. The resulting memory should not be freed. */ -const void *get_cached_commit_buffer(const struct commit *); +const void *get_cached_commit_buffer(const struct commit *, unsigned long *size); /* * Get the commit's object contents, either from cache or by reading the object * from disk. The resulting memory should not be modified, and must be given * to unuse_commit_buffer when the caller is done. */ -const void *get_commit_buffer(const struct commit *); +const void *get_commit_buffer(const struct commit *, unsigned long *size); /* * Tell the commit subsytem that we are done with a particular commit buffer. @@ -86,7 +86,7 @@ void free_commit_buffer(struct commit *); * Disassociate any cached object buffer from the commit, but do not free it. * The buffer (or NULL, if none) is returned. */ -const void *detach_commit_buffer(struct commit *); +const void *detach_commit_buffer(struct commit *, unsigned long *sizep); /* Find beginning and length of commit subject. */ int find_commit_subject(const char *commit_buffer, const char **subject); diff --git a/fsck.c b/fsck.c index 8223780..a7233c8 100644 --- a/fsck.c +++ b/fsck.c @@ -339,7 +339,7 @@ static int fsck_commit_buffer(struct commit *commit, const char *buffer, static int fsck_commit(struct commit *commit, fsck_error error_func) { - const char *buffer = get_commit_buffer(commit); + const char *buffer = get_commit_buffer(commit, NULL); int ret = fsck_commit_buffer(commit, buffer, error_func); unuse_commit_buffer(commit, buffer); return ret; diff --git a/log-tree.c b/log-tree.c index e9ef8ab..4447021 100644 --- a/log-tree.c +++ b/log-tree.c @@ -588,7 +588,7 @@ void show_log(struct rev_info *opt) show_mergetag(opt, commit); } - if (!get_cached_commit_buffer(commit)) + if (!get_cached_commit_buffer(commit, NULL)) return; if (opt->show_notes) { diff --git a/merge-recursive.c b/merge-recursive.c index 78908aa..a9ab328 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -190,7 +190,7 @@ static void output_commit_title(struct merge_options *o, struct commit *commit) printf(_("(bad commit)\n")); else { const char *title; - const char *msg = get_commit_buffer(commit); + const char *msg = get_commit_buffer(commit, NULL); int len = find_commit_subject(msg, &title); if (len) printf("%.*s\n", len, title); diff --git a/notes-merge.c b/notes-merge.c index e804db2..fd5fae2 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -672,7 +672,7 @@ int notes_merge_commit(struct notes_merge_options *o, DIR *dir; struct dirent *e; struct strbuf path = STRBUF_INIT; - const char *buffer = get_commit_buffer(partial_commit); + const char *buffer = get_commit_buffer(partial_commit, NULL); const char *msg = strstr(buffer, "\n\n"); int baselen; diff --git a/object.c b/object.c index 67b6e35..9c31e9a 100644 --- a/object.c +++ b/object.c @@ -197,8 +197,8 @@ struct object *parse_object_buffer(const unsigned char *sha1, enum object_type t if (commit) { if (parse_commit_buffer(commit, buffer, size)) return NULL; - if (!get_cached_commit_buffer(commit)) { - set_commit_buffer(commit, buffer); + if (!get_cached_commit_buffer(commit, NULL)) { + set_commit_buffer(commit, buffer, size); *eaten_p = 1; } obj = &commit->object; diff --git a/pretty.c b/pretty.c index 915bd1e..b9fceed 100644 --- a/pretty.c +++ b/pretty.c @@ -613,7 +613,7 @@ const char *logmsg_reencode(const struct commit *commit, static const char *utf8 = "UTF-8"; const char *use_encoding; char *encoding; - const char *msg = get_commit_buffer(commit); + const char *msg = get_commit_buffer(commit, NULL); char *out; if (!output_encoding || !*output_encoding) { @@ -642,7 +642,7 @@ const char *logmsg_reencode(const struct commit *commit, * the cached copy from get_commit_buffer, we need to duplicate it * to avoid munging the cached copy. */ - if (msg == get_cached_commit_buffer(commit)) + if (msg == get_cached_commit_buffer(commit, NULL)) out = xstrdup(msg); else out = (char *)msg; diff --git a/sequencer.c b/sequencer.c index 69bcf3d..bbaddcb 100644 --- a/sequencer.c +++ b/sequencer.c @@ -662,7 +662,7 @@ static int format_todo(struct strbuf *buf, struct commit_list *todo_list, int subject_len; for (cur = todo_list; cur; cur = cur->next) { - const char *commit_buffer = get_commit_buffer(cur->item); + const char *commit_buffer = get_commit_buffer(cur->item, NULL); sha1_abbrev = find_unique_abbrev(cur->item->object.sha1, DEFAULT_ABBREV); subject_len = find_commit_subject(commit_buffer, &subject); strbuf_addf(buf, "%s %s %.*s\n", action_str, sha1_abbrev, diff --git a/sha1_name.c b/sha1_name.c index 0a65d23..c2c938c 100644 --- a/sha1_name.c +++ b/sha1_name.c @@ -869,7 +869,7 @@ static int get_sha1_oneline(const char *prefix, unsigned char *sha1, commit = pop_most_recent_commit(&list, ONELINE_SEEN); if (!parse_object(commit->object.sha1)) continue; - buf = get_commit_buffer(commit); + buf = get_commit_buffer(commit, NULL); p = strstr(buf, "\n\n"); matches = p && !regexec(®ex, p + 2, 0, NULL, 0); unuse_commit_buffer(commit, buf); -- cgit v0.10.2-6-g49f6 From 218aa3a6162b80696a82b8745daa38fa826985ae Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 13 Jun 2014 02:32:11 -0400 Subject: reuse cached commit buffer when parsing signatures When we call show_signature or show_mergetag, we read the commit object fresh via read_sha1_file and reparse its headers. However, in most cases we already have the object data available, attached to the "struct commit". This is partially laziness in dealing with the memory allocation issues, but partially defensive programming, in that we would always want to verify a clean version of the buffer (not one that might have been munged by other users of the commit). However, we do not currently ever munge the commit buffer, and not using the already-available buffer carries a fairly big performance penalty when we are looking at a large number of commits. Here are timings on linux.git: [baseline, no signatures] $ time git log >/dev/null real 0m4.902s user 0m4.784s sys 0m0.120s [before] $ time git log --show-signature >/dev/null real 0m14.735s user 0m9.964s sys 0m0.944s [after] $ time git log --show-signature >/dev/null real 0m9.981s user 0m5.260s sys 0m0.936s Note that our user CPU time drops almost in half, close to the non-signature case, but we do still spend more wall-clock and system time, presumably from dealing with gpg. An alternative to this is to note that most commits do not have signatures (less than 1% in this repo), yet we pay the re-parsing cost for every commit just to find out if it has a mergetag or signature. If we checked that when parsing the commit initially, we could avoid re-examining most commits later on. Even if we did pursue that direction, however, this would still speed up the cases where we _do_ have signatures. So it's probably worth doing either way. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/commit.c b/commit.c index a036e18..ebd7ad8 100644 --- a/commit.c +++ b/commit.c @@ -1138,17 +1138,14 @@ static int do_sign_commit(struct strbuf *buf, const char *keyid) return 0; } -int parse_signed_commit(const unsigned char *sha1, +int parse_signed_commit(const struct commit *commit, struct strbuf *payload, struct strbuf *signature) { + unsigned long size; - enum object_type type; - char *buffer = read_sha1_file(sha1, &type, &size); + const char *buffer = get_commit_buffer(commit, &size); int in_signature, saw_signature = -1; - char *line, *tail; - - if (!buffer || type != OBJ_COMMIT) - goto cleanup; + const char *line, *tail; line = buffer; tail = buffer + size; @@ -1156,7 +1153,7 @@ int parse_signed_commit(const unsigned char *sha1, saw_signature = 0; while (line < tail) { const char *sig = NULL; - char *next = memchr(line, '\n', tail - line); + const char *next = memchr(line, '\n', tail - line); next = next ? next + 1 : tail; if (in_signature && line[0] == ' ') @@ -1177,8 +1174,7 @@ int parse_signed_commit(const unsigned char *sha1, } line = next; } - cleanup: - free(buffer); + unuse_commit_buffer(commit, buffer); return saw_signature; } @@ -1269,8 +1265,7 @@ void check_commit_signature(const struct commit* commit, struct signature_check sigc->result = 'N'; - if (parse_signed_commit(commit->object.sha1, - &payload, &signature) <= 0) + if (parse_signed_commit(commit, &payload, &signature) <= 0) goto out; status = verify_signed_buffer(payload.buf, payload.len, signature.buf, signature.len, @@ -1315,11 +1310,9 @@ struct commit_extra_header *read_commit_extra_headers(struct commit *commit, { struct commit_extra_header *extra = NULL; unsigned long size; - enum object_type type; - char *buffer = read_sha1_file(commit->object.sha1, &type, &size); - if (buffer && type == OBJ_COMMIT) - extra = read_commit_extra_header_lines(buffer, size, exclude); - free(buffer); + const char *buffer = get_commit_buffer(commit, &size); + extra = read_commit_extra_header_lines(buffer, size, exclude); + unuse_commit_buffer(commit, buffer); return extra; } diff --git a/commit.h b/commit.h index 61559a9..2e1492a 100644 --- a/commit.h +++ b/commit.h @@ -325,7 +325,7 @@ struct merge_remote_desc { */ struct commit *get_merge_parent(const char *name); -extern int parse_signed_commit(const unsigned char *sha1, +extern int parse_signed_commit(const struct commit *commit, struct strbuf *message, struct strbuf *signature); extern void print_commit_list(struct commit_list *list, const char *format_cur, diff --git a/log-tree.c b/log-tree.c index 4447021..10e6844 100644 --- a/log-tree.c +++ b/log-tree.c @@ -376,7 +376,7 @@ static void show_signature(struct rev_info *opt, struct commit *commit) struct strbuf gpg_output = STRBUF_INIT; int status; - if (parse_signed_commit(commit->object.sha1, &payload, &signature) <= 0) + if (parse_signed_commit(commit, &payload, &signature) <= 0) goto out; status = verify_signed_buffer(payload.buf, payload.len, -- cgit v0.10.2-6-g49f6