From fe6eb7f2c506190c817407accf27834005a57f2b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 27 Aug 2014 03:56:01 -0400 Subject: commit: provide a function to find a header in a buffer Usually when we parse a commit, we read it line by line and handle each individual line (e.g., parse_commit and parse_commit_header). Sometimes, however, we only care about extracting a single header. Code in this situation is stuck doing an ad-hoc parse of the commit buffer. Let's provide a reusable function to locate a header within the commit. The code is modeled after pretty.c's get_header, which is used to extract the encoding. Since some callers may not have the "struct commit" to go along with the buffer, we drop that parameter. The only thing lost is a warning for truncated commits, but that's OK. This shouldn't happen in practice, and even if it does, there's no particular reason that this function needs to complain about it. It either finds the header it was asked for, or it doesn't (and in the latter case, the caller will typically complain). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/commit.c b/commit.c index ae7f2b1..b6ffd62 100644 --- a/commit.c +++ b/commit.c @@ -1660,3 +1660,25 @@ void print_commit_list(struct commit_list *list, printf(format, sha1_to_hex(list->item->object.sha1)); } } + +const char *find_commit_header(const char *msg, const char *key, size_t *out_len) +{ + int key_len = strlen(key); + const char *line = msg; + + while (line) { + const char *eol = strchrnul(line, '\n'); + + if (line == eol) + return NULL; + + if (eol - line > key_len && + !strncmp(line, key, key_len) && + line[key_len] == ' ') { + *out_len = eol - line - key_len - 1; + return line + key_len + 1; + } + line = *eol ? eol + 1 : NULL; + } + return NULL; +} diff --git a/commit.h b/commit.h index a8cbf52..12f4fe1 100644 --- a/commit.h +++ b/commit.h @@ -313,6 +313,17 @@ extern struct commit_extra_header *read_commit_extra_headers(struct commit *, co extern void free_commit_extra_headers(struct commit_extra_header *extra); +/* + * Search the commit object contents given by "msg" for the header "key". + * Returns a pointer to the start of the header contents, or NULL. The length + * of the header, up to the first newline, is returned via out_len. + * + * Note that some headers (like mergetag) may be multi-line. It is the caller's + * responsibility to parse further in this case! + */ +extern const char *find_commit_header(const char *msg, const char *key, + size_t *out_len); + typedef void (*each_mergetag_fn)(struct commit *commit, struct commit_extra_header *extra, void *cb_data); diff --git a/pretty.c b/pretty.c index 3a1da6f..3a70557 100644 --- a/pretty.c +++ b/pretty.c @@ -547,31 +547,11 @@ static void add_merge_info(const struct pretty_print_context *pp, strbuf_addch(sb, '\n'); } -static char *get_header(const struct commit *commit, const char *msg, - const char *key) +static char *get_header(const char *msg, const char *key) { - int key_len = strlen(key); - const char *line = msg; - - while (line) { - const char *eol = strchrnul(line, '\n'), *next; - - if (line == eol) - return NULL; - if (!*eol) { - warning("malformed commit (header is missing newline): %s", - sha1_to_hex(commit->object.sha1)); - next = NULL; - } else - next = eol + 1; - if (eol - line > key_len && - !strncmp(line, key, key_len) && - line[key_len] == ' ') { - return xmemdupz(line + key_len + 1, eol - line - key_len - 1); - } - line = next; - } - return NULL; + size_t len; + const char *v = find_commit_header(msg, key, &len); + return v ? xmemdupz(v, len) : NULL; } static char *replace_encoding_header(char *buf, const char *encoding) @@ -617,11 +597,10 @@ const char *logmsg_reencode(const struct commit *commit, if (!output_encoding || !*output_encoding) { if (commit_encoding) - *commit_encoding = - get_header(commit, msg, "encoding"); + *commit_encoding = get_header(msg, "encoding"); return msg; } - encoding = get_header(commit, msg, "encoding"); + encoding = get_header(msg, "encoding"); if (commit_encoding) *commit_encoding = encoding; use_encoding = encoding ? encoding : utf8; -- cgit v0.10.2-6-g49f6 From 6876618ceaafddba625ed823679d99de0e79d111 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 27 Aug 2014 03:56:31 -0400 Subject: record_author_date(): fix memory leak on malformed commit If we hit the end-of-header without finding an "author" line, we just return from the function. We should jump to the fail_exit path to clean up the buffer that we may have allocated. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/commit.c b/commit.c index b6ffd62..4ff8c66 100644 --- a/commit.c +++ b/commit.c @@ -594,7 +594,7 @@ static void record_author_date(struct author_date_slab *author_date, line_end = strchrnul(buf, '\n'); if (!skip_prefix(buf, "author ", &ident_line)) { if (!line_end[0] || line_end[1] == '\n') - return; /* end of header */ + goto fail_exit; /* end of header */ continue; } if (split_ident_line(&ident, -- cgit v0.10.2-6-g49f6 From ea5517f04b08bdb40eca72888220bd6a90d3cf17 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 27 Aug 2014 03:56:55 -0400 Subject: record_author_date(): use find_commit_header() This saves us some manual parsing and makes the code more readable. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/commit.c b/commit.c index 4ff8c66..9416d84 100644 --- a/commit.c +++ b/commit.c @@ -584,25 +584,19 @@ define_commit_slab(author_date_slab, unsigned long); static void record_author_date(struct author_date_slab *author_date, struct commit *commit) { - const char *buf, *line_end, *ident_line; const char *buffer = get_commit_buffer(commit, NULL); struct ident_split ident; + const char *ident_line; + size_t ident_len; char *date_end; unsigned long date; - for (buf = buffer; buf; buf = line_end + 1) { - line_end = strchrnul(buf, '\n'); - if (!skip_prefix(buf, "author ", &ident_line)) { - if (!line_end[0] || line_end[1] == '\n') - goto fail_exit; /* end of header */ - continue; - } - if (split_ident_line(&ident, - ident_line, line_end - ident_line) || - !ident.date_begin || !ident.date_end) - goto fail_exit; /* malformed "author" line */ - break; - } + ident_line = find_commit_header(buffer, "author", &ident_len); + if (!ident_line) + goto fail_exit; /* no author line */ + if (split_ident_line(&ident, ident_line, ident_len) || + !ident.date_begin || !ident.date_end) + goto fail_exit; /* malformed "author" line */ date = strtoul(ident.date_begin, &date_end, 10); if (date_end != ident.date_end) -- cgit v0.10.2-6-g49f6 From c33ddc2e33d51da9391a81206a1d9e4a92d97d10 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 27 Aug 2014 03:57:08 -0400 Subject: date: use strbufs in date-formatting functions Many of the date functions write into fixed-size buffers. This is a minor pain, as we have to take special precautions, and frequently end up copying the result into a strbuf or heap-allocated buffer anyway (for which we sometimes use strcpy!). Let's instead teach parse_date, datestamp, etc to write to a strbuf. The obvious downside is that we might need to perform a heap allocation where we otherwise would not need to. However, it turns out that the only two new allocations required are: 1. In test-date.c, where we don't care about efficiency. 2. In determine_author_info, which is not performance critical (and where the use of a strbuf will help later refactoring). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/commit.c b/builtin/commit.c index 5ed6036..8da0a9f 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -520,19 +520,16 @@ static int sane_ident_split(struct ident_split *person) return 1; } -static int parse_force_date(const char *in, char *out, int len) +static int parse_force_date(const char *in, struct strbuf *out) { - if (len < 1) - return -1; - *out++ = '@'; - len--; + strbuf_addch(out, '@'); - if (parse_date(in, out, len) < 0) { + if (parse_date(in, out) < 0) { int errors = 0; unsigned long t = approxidate_careful(in, &errors); if (errors) return -1; - snprintf(out, len, "%lu", t); + strbuf_addf(out, "%lu", t); } return 0; @@ -542,7 +539,7 @@ static void determine_author_info(struct strbuf *author_ident) { char *name, *email, *date; struct ident_split author; - char date_buf[64]; + struct strbuf date_buf = STRBUF_INIT; name = getenv("GIT_AUTHOR_NAME"); email = getenv("GIT_AUTHOR_EMAIL"); @@ -588,9 +585,10 @@ static void determine_author_info(struct strbuf *author_ident) } if (force_date) { - if (parse_force_date(force_date, date_buf, sizeof(date_buf))) + strbuf_reset(&date_buf); + if (parse_force_date(force_date, &date_buf)) die(_("invalid date format: %s"), force_date); - date = date_buf; + date = date_buf.buf; } strbuf_addstr(author_ident, fmt_ident(name, email, date, IDENT_STRICT)); @@ -600,6 +598,8 @@ static void determine_author_info(struct strbuf *author_ident) export_one("GIT_AUTHOR_EMAIL", author.mail_begin, author.mail_end, 0); export_one("GIT_AUTHOR_DATE", author.date_begin, author.tz_end, '@'); } + + strbuf_release(&date_buf); } static void split_ident_or_die(struct ident_split *id, const struct strbuf *buf) diff --git a/cache.h b/cache.h index fcb511d..96ecc1f 100644 --- a/cache.h +++ b/cache.h @@ -1044,10 +1044,10 @@ enum date_mode { const char *show_date(unsigned long time, int timezone, enum date_mode mode); void show_date_relative(unsigned long time, int tz, const struct timeval *now, struct strbuf *timebuf); -int parse_date(const char *date, char *buf, int bufsize); +int parse_date(const char *date, struct strbuf *out); int parse_date_basic(const char *date, unsigned long *timestamp, int *offset); int parse_expiry_date(const char *date, unsigned long *timestamp); -void datestamp(char *buf, int bufsize); +void datestamp(struct strbuf *out); #define approxidate(s) approxidate_careful((s), NULL) unsigned long approxidate_careful(const char *, int *); unsigned long approxidate_relative(const char *date, const struct timeval *now); diff --git a/date.c b/date.c index 782de95..2c33468 100644 --- a/date.c +++ b/date.c @@ -605,7 +605,7 @@ static int match_tz(const char *date, int *offp) return end - date; } -static int date_string(unsigned long date, int offset, char *buf, int len) +static void date_string(unsigned long date, int offset, struct strbuf *buf) { int sign = '+'; @@ -613,7 +613,7 @@ static int date_string(unsigned long date, int offset, char *buf, int len) offset = -offset; sign = '-'; } - return snprintf(buf, len, "%lu %c%02d%02d", date, sign, offset / 60, offset % 60); + strbuf_addf(buf, "%lu %c%02d%02d", date, sign, offset / 60, offset % 60); } /* @@ -735,13 +735,14 @@ int parse_expiry_date(const char *date, unsigned long *timestamp) return errors; } -int parse_date(const char *date, char *result, int maxlen) +int parse_date(const char *date, struct strbuf *result) { unsigned long timestamp; int offset; if (parse_date_basic(date, ×tamp, &offset)) return -1; - return date_string(timestamp, offset, result, maxlen); + date_string(timestamp, offset, result); + return 0; } enum date_mode parse_date_format(const char *format) @@ -766,7 +767,7 @@ enum date_mode parse_date_format(const char *format) die("unknown date format %s", format); } -void datestamp(char *buf, int bufsize) +void datestamp(struct strbuf *out) { time_t now; int offset; @@ -776,7 +777,7 @@ void datestamp(char *buf, int bufsize) offset = tm_to_time_t(localtime(&now)) - now; offset /= 60; - date_string(now, offset, buf, bufsize); + date_string(now, offset, out); } /* diff --git a/fast-import.c b/fast-import.c index d73f58c..dc9f7a8 100644 --- a/fast-import.c +++ b/fast-import.c @@ -1971,7 +1971,7 @@ static int parse_data(struct strbuf *sb, uintmax_t limit, uintmax_t *len_res) return 1; } -static int validate_raw_date(const char *src, char *result, int maxlen) +static int validate_raw_date(const char *src, struct strbuf *result) { const char *orig_src = src; char *endp; @@ -1989,11 +1989,10 @@ static int validate_raw_date(const char *src, char *result, int maxlen) return -1; num = strtoul(src + 1, &endp, 10); - if (errno || endp == src + 1 || *endp || (endp - orig_src) >= maxlen || - 1400 < num) + if (errno || endp == src + 1 || *endp || 1400 < num) return -1; - strcpy(result, orig_src); + strbuf_addstr(result, orig_src); return 0; } @@ -2001,7 +2000,7 @@ static char *parse_ident(const char *buf) { const char *ltgt; size_t name_len; - char *ident; + struct strbuf ident = STRBUF_INIT; /* ensure there is a space delimiter even if there is no name */ if (*buf == '<') @@ -2020,26 +2019,25 @@ static char *parse_ident(const char *buf) die("Missing space after > in ident string: %s", buf); ltgt++; name_len = ltgt - buf; - ident = xmalloc(name_len + 24); - strncpy(ident, buf, name_len); + strbuf_add(&ident, buf, name_len); switch (whenspec) { case WHENSPEC_RAW: - if (validate_raw_date(ltgt, ident + name_len, 24) < 0) + if (validate_raw_date(ltgt, &ident) < 0) die("Invalid raw date \"%s\" in ident: %s", ltgt, buf); break; case WHENSPEC_RFC2822: - if (parse_date(ltgt, ident + name_len, 24) < 0) + if (parse_date(ltgt, &ident) < 0) die("Invalid rfc2822 date \"%s\" in ident: %s", ltgt, buf); break; case WHENSPEC_NOW: if (strcmp("now", ltgt)) die("Date in ident must be 'now': %s", buf); - datestamp(ident + name_len, 24); + datestamp(&ident); break; } - return ident; + return strbuf_detach(&ident, NULL); } static void parse_and_store_blob( diff --git a/ident.c b/ident.c index 1d9b6e7..9bcc4e1 100644 --- a/ident.c +++ b/ident.c @@ -9,7 +9,7 @@ static struct strbuf git_default_name = STRBUF_INIT; static struct strbuf git_default_email = STRBUF_INIT; -static char git_default_date[50]; +static struct strbuf git_default_date = STRBUF_INIT; #define IDENT_NAME_GIVEN 01 #define IDENT_MAIL_GIVEN 02 @@ -129,9 +129,9 @@ const char *ident_default_email(void) static const char *ident_default_date(void) { - if (!git_default_date[0]) - datestamp(git_default_date, sizeof(git_default_date)); - return git_default_date; + if (!git_default_date.len) + datestamp(&git_default_date); + return git_default_date.buf; } static int crud(unsigned char c) @@ -292,7 +292,6 @@ const char *fmt_ident(const char *name, const char *email, const char *date_str, int flag) { static struct strbuf ident = STRBUF_INIT; - char date[50]; int strict = (flag & IDENT_STRICT); int want_date = !(flag & IDENT_NO_DATE); int want_name = !(flag & IDENT_NO_NAME); @@ -320,15 +319,6 @@ const char *fmt_ident(const char *name, const char *email, die("unable to auto-detect email address (got '%s')", email); } - if (want_date) { - if (date_str && date_str[0]) { - if (parse_date(date_str, date, sizeof(date)) < 0) - die("invalid date format: %s", date_str); - } - else - strcpy(date, ident_default_date()); - } - strbuf_reset(&ident); if (want_name) { strbuf_addstr_without_crud(&ident, name); @@ -339,8 +329,14 @@ const char *fmt_ident(const char *name, const char *email, strbuf_addch(&ident, '>'); if (want_date) { strbuf_addch(&ident, ' '); - strbuf_addstr_without_crud(&ident, date); + if (date_str && date_str[0]) { + if (parse_date(date_str, &ident) < 0) + die("invalid date format: %s", date_str); + } + else + strbuf_addstr(&ident, ident_default_date()); } + return ident.buf; } diff --git a/test-date.c b/test-date.c index 10afaab..94a6997 100644 --- a/test-date.c +++ b/test-date.c @@ -19,19 +19,21 @@ static void show_dates(char **argv, struct timeval *now) static void parse_dates(char **argv, struct timeval *now) { + struct strbuf result = STRBUF_INIT; + for (; *argv; argv++) { - char result[100]; unsigned long t; int tz; - result[0] = 0; - parse_date(*argv, result, sizeof(result)); - if (sscanf(result, "%lu %d", &t, &tz) == 2) + strbuf_reset(&result); + parse_date(*argv, &result); + if (sscanf(result.buf, "%lu %d", &t, &tz) == 2) printf("%s -> %s\n", *argv, show_date(t, tz, DATE_ISO8601)); else printf("%s -> bad\n", *argv); } + strbuf_release(&result); } static void parse_approxidate(char **argv, struct timeval *now) -- cgit v0.10.2-6-g49f6 From f0f9662ae9d1c7f58a95397d1c6d5f31760b14be Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 27 Aug 2014 03:57:28 -0400 Subject: determine_author_info(): reuse parsing functions Rather than parsing the header manually to find the "author" field, and then parsing its sub-parts, let's use find_commit_header and split_ident_line. This is shorter and easier to read, and should do a more careful parsing job. For example, the current parser could find the end-of-email right-bracket across a newline (for a malformed commit), and calculate a bogus gigantic length for the date (by using "eol - rb"). As a bonus, this also plugs a memory leak when we pull the date field from an existing commit (we still leak the name and email buffers, which will be fixed in a later commit). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/commit.c b/builtin/commit.c index 8da0a9f..bc03a1c 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -546,42 +546,35 @@ static void determine_author_info(struct strbuf *author_ident) date = getenv("GIT_AUTHOR_DATE"); if (author_message) { - const char *a, *lb, *rb, *eol; + struct ident_split ident; size_t len; + const char *a; - a = strstr(author_message_buffer, "\nauthor "); + a = find_commit_header(author_message_buffer, "author", &len); if (!a) - die(_("invalid commit: %s"), author_message); - - lb = strchrnul(a + strlen("\nauthor "), '<'); - rb = strchrnul(lb, '>'); - eol = strchrnul(rb, '\n'); - if (!*lb || !*rb || !*eol) - die(_("invalid commit: %s"), author_message); - - if (lb == a + strlen("\nauthor ")) - /* \nauthor */ - name = xcalloc(1, 1); - else - name = xmemdupz(a + strlen("\nauthor "), - (lb - strlen(" ") - - (a + strlen("\nauthor ")))); - email = xmemdupz(lb + strlen("<"), rb - (lb + strlen("<"))); - len = eol - (rb + strlen("> ")); - date = xmalloc(len + 2); - *date = '@'; - memcpy(date + 1, rb + strlen("> "), len); - date[len + 1] = '\0'; + die(_("commit '%s' lacks author header"), author_message); + if (split_ident_line(&ident, a, len) < 0) + die(_("commit '%s' has malformed author line"), author_message); + + name = xmemdupz(ident.name_begin, ident.name_end - ident.name_begin); + email = xmemdupz(ident.mail_begin, ident.mail_end - ident.mail_begin); + if (ident.date_begin) { + strbuf_reset(&date_buf); + strbuf_addch(&date_buf, '@'); + strbuf_add(&date_buf, ident.date_begin, ident.date_end - ident.date_begin); + strbuf_addch(&date_buf, ' '); + strbuf_add(&date_buf, ident.tz_begin, ident.tz_end - ident.tz_begin); + date = date_buf.buf; + } } if (force_author) { - const char *lb = strstr(force_author, " <"); - const char *rb = strchr(force_author, '>'); + struct ident_split ident; - if (!lb || !rb) + if (split_ident_line(&ident, force_author, strlen(force_author)) < 0) die(_("malformed --author parameter")); - name = xstrndup(force_author, lb - force_author); - email = xstrndup(lb + 2, rb - (lb + 2)); + name = xmemdupz(ident.name_begin, ident.name_end - ident.name_begin); + email = xmemdupz(ident.mail_begin, ident.mail_end - ident.mail_begin); } if (force_date) { -- cgit v0.10.2-6-g49f6 From f4ef51739343f80c7cb0467244925b3725d65730 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 27 Aug 2014 03:57:56 -0400 Subject: determine_author_info(): copy getenv output When figuring out the author name for a commit, we may end up either pointing to const storage from getenv("GIT_AUTHOR_*"), or to newly allocated storage based on an existing commit or the --author option. Using const pointers to getenv's return has two problems: 1. It is not guaranteed that the return value from getenv remains valid across multiple calls. 2. We do not know whether to free the values at the end, so we just leak them. We can solve both by duplicating the string returned by getenv(). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano diff --git a/builtin/commit.c b/builtin/commit.c index bc03a1c..8c63720 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -535,15 +535,26 @@ static int parse_force_date(const char *in, struct strbuf *out) return 0; } +static void set_ident_var(char **buf, char *val) +{ + free(*buf); + *buf = val; +} + +static char *envdup(const char *var) +{ + const char *val = getenv(var); + return val ? xstrdup(val) : NULL; +} + static void determine_author_info(struct strbuf *author_ident) { char *name, *email, *date; struct ident_split author; - struct strbuf date_buf = STRBUF_INIT; - name = getenv("GIT_AUTHOR_NAME"); - email = getenv("GIT_AUTHOR_EMAIL"); - date = getenv("GIT_AUTHOR_DATE"); + name = envdup("GIT_AUTHOR_NAME"); + email = envdup("GIT_AUTHOR_EMAIL"); + date = envdup("GIT_AUTHOR_DATE"); if (author_message) { struct ident_split ident; @@ -556,15 +567,16 @@ static void determine_author_info(struct strbuf *author_ident) if (split_ident_line(&ident, a, len) < 0) die(_("commit '%s' has malformed author line"), author_message); - name = xmemdupz(ident.name_begin, ident.name_end - ident.name_begin); - email = xmemdupz(ident.mail_begin, ident.mail_end - ident.mail_begin); + set_ident_var(&name, xmemdupz(ident.name_begin, ident.name_end - ident.name_begin)); + set_ident_var(&email, xmemdupz(ident.mail_begin, ident.mail_end - ident.mail_begin)); + if (ident.date_begin) { - strbuf_reset(&date_buf); + struct strbuf date_buf = STRBUF_INIT; strbuf_addch(&date_buf, '@'); strbuf_add(&date_buf, ident.date_begin, ident.date_end - ident.date_begin); strbuf_addch(&date_buf, ' '); strbuf_add(&date_buf, ident.tz_begin, ident.tz_end - ident.tz_begin); - date = date_buf.buf; + set_ident_var(&date, strbuf_detach(&date_buf, NULL)); } } @@ -573,15 +585,15 @@ static void determine_author_info(struct strbuf *author_ident) if (split_ident_line(&ident, force_author, strlen(force_author)) < 0) die(_("malformed --author parameter")); - name = xmemdupz(ident.name_begin, ident.name_end - ident.name_begin); - email = xmemdupz(ident.mail_begin, ident.mail_end - ident.mail_begin); + set_ident_var(&name, xmemdupz(ident.name_begin, ident.name_end - ident.name_begin)); + set_ident_var(&email, xmemdupz(ident.mail_begin, ident.mail_end - ident.mail_begin)); } if (force_date) { - strbuf_reset(&date_buf); + struct strbuf date_buf = STRBUF_INIT; if (parse_force_date(force_date, &date_buf)) die(_("invalid date format: %s"), force_date); - date = date_buf.buf; + set_ident_var(&date, strbuf_detach(&date_buf, NULL)); } strbuf_addstr(author_ident, fmt_ident(name, email, date, IDENT_STRICT)); @@ -592,7 +604,9 @@ static void determine_author_info(struct strbuf *author_ident) export_one("GIT_AUTHOR_DATE", author.date_begin, author.tz_end, '@'); } - strbuf_release(&date_buf); + free(name); + free(email); + free(date); } static void split_ident_or_die(struct ident_split *id, const struct strbuf *buf) -- cgit v0.10.2-6-g49f6