From 9f23e040615857e3909db51a5420f338c9831b5a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 5 Jun 2016 04:46:39 +0000 Subject: pretty: support "mboxrd" output format This output format prevents format-patch output from breaking readers if somebody copy+pasted an mbox into a commit message. Unlike the traditional "mboxo" format, "mboxrd" is designed to be fully-reversible. "mboxrd" also gracefully degrades to showing extra ">" in existing "mboxo" readers. This degradation is preferable to breaking message splitting completely, a problem I've seen in "mboxcl" due to having multiple, non-existent, or inaccurate Content-Length headers. "mboxcl2" is a non-starter since it's inherits the problems of "mboxcl" while being completely incompatible with existing tooling based around mailsplit. ref: http://homepage.ntlworld.com/jonathan.deboynepollard/FGA/mail-mbox-formats.html Signed-off-by: Eric Wong Signed-off-by: Junio C Hamano diff --git a/builtin/log.c b/builtin/log.c index 099f4f7..6d6f368 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -953,7 +953,7 @@ static void make_cover_letter(struct rev_info *rev, int use_stdout, struct pretty_print_context pp = {0}; struct commit *head = list[0]; - if (rev->commit_format != CMIT_FMT_EMAIL) + if (!cmit_fmt_is_mail(rev->commit_format)) die(_("Cover letter needs email format")); committer = git_committer_info(0); diff --git a/commit.h b/commit.h index b06db4d..1e04d3a 100644 --- a/commit.h +++ b/commit.h @@ -131,11 +131,17 @@ enum cmit_fmt { CMIT_FMT_FULLER, CMIT_FMT_ONELINE, CMIT_FMT_EMAIL, + CMIT_FMT_MBOXRD, CMIT_FMT_USERFORMAT, CMIT_FMT_UNSPECIFIED }; +static inline int cmit_fmt_is_mail(enum cmit_fmt fmt) +{ + return (fmt == CMIT_FMT_EMAIL || fmt == CMIT_FMT_MBOXRD); +} + struct pretty_print_context { /* * Callers should tweak these to change the behavior of pp_* functions. diff --git a/log-tree.c b/log-tree.c index 78a5381..48daf84 100644 --- a/log-tree.c +++ b/log-tree.c @@ -603,7 +603,7 @@ void show_log(struct rev_info *opt) * Print header line of header.. */ - if (opt->commit_format == CMIT_FMT_EMAIL) { + if (cmit_fmt_is_mail(opt->commit_format)) { log_write_email_headers(opt, commit, &ctx.subject, &extra_headers, &ctx.need_8bit_cte); } else if (opt->commit_format != CMIT_FMT_USERFORMAT) { @@ -694,7 +694,7 @@ void show_log(struct rev_info *opt) if ((ctx.fmt != CMIT_FMT_USERFORMAT) && ctx.notes_message && *ctx.notes_message) { - if (ctx.fmt == CMIT_FMT_EMAIL) { + if (cmit_fmt_is_mail(ctx.fmt)) { strbuf_addstr(&msgbuf, "---\n"); opt->shown_dashes = 1; } diff --git a/pretty.c b/pretty.c index 87c4497..6abd8a1 100644 --- a/pretty.c +++ b/pretty.c @@ -92,6 +92,7 @@ static void setup_commit_formats(void) { "medium", CMIT_FMT_MEDIUM, 0, 8 }, { "short", CMIT_FMT_SHORT, 0, 0 }, { "email", CMIT_FMT_EMAIL, 0, 0 }, + { "mboxrd", CMIT_FMT_MBOXRD, 0, 0 }, { "fuller", CMIT_FMT_FULLER, 0, 8 }, { "full", CMIT_FMT_FULL, 0, 8 }, { "oneline", CMIT_FMT_ONELINE, 1, 0 } @@ -444,7 +445,7 @@ void pp_user_info(struct pretty_print_context *pp, if (pp->mailmap) map_user(pp->mailmap, &mailbuf, &maillen, &namebuf, &namelen); - if (pp->fmt == CMIT_FMT_EMAIL) { + if (cmit_fmt_is_mail(pp->fmt)) { if (pp->from_ident && ident_cmp(pp->from_ident, &ident)) { struct strbuf buf = STRBUF_INIT; @@ -494,6 +495,7 @@ void pp_user_info(struct pretty_print_context *pp, show_ident_date(&ident, &pp->date_mode)); break; case CMIT_FMT_EMAIL: + case CMIT_FMT_MBOXRD: strbuf_addf(sb, "Date: %s\n", show_ident_date(&ident, DATE_MODE(RFC2822))); break; @@ -535,7 +537,7 @@ static void add_merge_info(const struct pretty_print_context *pp, { struct commit_list *parent = commit->parents; - if ((pp->fmt == CMIT_FMT_ONELINE) || (pp->fmt == CMIT_FMT_EMAIL) || + if ((pp->fmt == CMIT_FMT_ONELINE) || (cmit_fmt_is_mail(pp->fmt)) || !parent || !parent->next) return; @@ -1614,7 +1616,7 @@ void pp_title_line(struct pretty_print_context *pp, if (pp->after_subject) { strbuf_addstr(sb, pp->after_subject); } - if (pp->fmt == CMIT_FMT_EMAIL) { + if (cmit_fmt_is_mail(pp->fmt)) { strbuf_addch(sb, '\n'); } @@ -1697,6 +1699,16 @@ static void pp_handle_indent(struct pretty_print_context *pp, strbuf_add(sb, line, linelen); } +static int is_mboxrd_from(const char *line, int len) +{ + /* + * a line matching /^From $/ here would only have len == 4 + * at this point because is_empty_line would've trimmed all + * trailing space + */ + return len > 4 && starts_with(line + strspn(line, ">"), "From "); +} + void pp_remainder(struct pretty_print_context *pp, const char **msg_p, struct strbuf *sb, @@ -1725,8 +1737,13 @@ void pp_remainder(struct pretty_print_context *pp, else if (pp->expand_tabs_in_log) strbuf_add_tabexpand(sb, pp->expand_tabs_in_log, line, linelen); - else + else { + if (pp->fmt == CMIT_FMT_MBOXRD && + is_mboxrd_from(line, linelen)) + strbuf_addch(sb, '>'); + strbuf_add(sb, line, linelen); + } strbuf_addch(sb, '\n'); } } @@ -1750,14 +1767,14 @@ void pretty_print_commit(struct pretty_print_context *pp, encoding = get_log_output_encoding(); msg = reencoded = logmsg_reencode(commit, NULL, encoding); - if (pp->fmt == CMIT_FMT_ONELINE || pp->fmt == CMIT_FMT_EMAIL) + if (pp->fmt == CMIT_FMT_ONELINE || cmit_fmt_is_mail(pp->fmt)) indent = 0; /* * We need to check and emit Content-type: to mark it * as 8-bit if we haven't done so. */ - if (pp->fmt == CMIT_FMT_EMAIL && need_8bit_cte == 0) { + if (cmit_fmt_is_mail(pp->fmt) && need_8bit_cte == 0) { int i, ch, in_body; for (in_body = i = 0; (ch = msg[i]); i++) { @@ -1785,7 +1802,7 @@ void pretty_print_commit(struct pretty_print_context *pp, msg = skip_empty_lines(msg); /* These formats treat the title line specially. */ - if (pp->fmt == CMIT_FMT_ONELINE || pp->fmt == CMIT_FMT_EMAIL) + if (pp->fmt == CMIT_FMT_ONELINE || cmit_fmt_is_mail(pp->fmt)) pp_title_line(pp, &msg, sb, encoding, need_8bit_cte); beginning_of_body = sb->len; @@ -1802,7 +1819,7 @@ void pretty_print_commit(struct pretty_print_context *pp, * format. Make sure we did not strip the blank line * between the header and the body. */ - if (pp->fmt == CMIT_FMT_EMAIL && sb->len <= beginning_of_body) + if (cmit_fmt_is_mail(pp->fmt) && sb->len <= beginning_of_body) strbuf_addch(sb, '\n'); unuse_commit_buffer(commit, reencoded); diff --git a/t/t4014-format-patch.sh b/t/t4014-format-patch.sh index 8049cad..8a1cab5 100755 --- a/t/t4014-format-patch.sh +++ b/t/t4014-format-patch.sh @@ -1565,4 +1565,45 @@ test_expect_success 'format-patch --base overrides format.useAutoBase' ' test_cmp expected actual ' +test_expect_success 'format-patch --pretty=mboxrd' ' + sp=" " && + cat >msg <<-INPUT_END && + mboxrd should escape the body + + From could trip up a loose mbox parser + >From extra escape for reversibility + >>From extra escape for reversibility 2 + from lower case not escaped + Fromm bad speling not escaped + From with leading space not escaped + + F + From + From$sp + From $sp + From $sp + INPUT_END + + cat >expect <<-INPUT_END && + >From could trip up a loose mbox parser + >>From extra escape for reversibility + >>>From extra escape for reversibility 2 + from lower case not escaped + Fromm bad speling not escaped + From with leading space not escaped + + F + From + From + From + From + INPUT_END + + C=$(git commit-tree HEAD^^{tree} -p HEAD patch && + git grep -h --no-index -A11 \ + "^>From could trip up a loose mbox parser" patch >actual && + test_cmp expect actual +' + test_done -- cgit v0.10.2-6-g49f6 From c88098d7f19c6322fbd911bb89e2efd246bf75c4 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 5 Jun 2016 04:46:40 +0000 Subject: mailsplit: support unescaping mboxrd messages This will allow us to parse the output of --pretty=mboxrd and the output of other mboxrd generators. Signed-off-by: Eric Wong Signed-off-by: Junio C Hamano diff --git a/Documentation/git-mailsplit.txt b/Documentation/git-mailsplit.txt index 4d1b871..e3b2a88 100644 --- a/Documentation/git-mailsplit.txt +++ b/Documentation/git-mailsplit.txt @@ -8,7 +8,8 @@ git-mailsplit - Simple UNIX mbox splitter program SYNOPSIS -------- [verse] -'git mailsplit' [-b] [-f] [-d] [--keep-cr] -o [--] [(|)...] +'git mailsplit' [-b] [-f] [-d] [--keep-cr] [--mboxrd] + -o [--] [(|)...] DESCRIPTION ----------- @@ -47,6 +48,10 @@ OPTIONS --keep-cr:: Do not remove `\r` from lines ending with `\r\n`. +--mboxrd:: + Input is of the "mboxrd" format and "^>+From " line escaping is + reversed. + GIT --- Part of the linkgit:git[1] suite diff --git a/builtin/mailsplit.c b/builtin/mailsplit.c index 4859ede..3068168 100644 --- a/builtin/mailsplit.c +++ b/builtin/mailsplit.c @@ -45,6 +45,19 @@ static int is_from_line(const char *line, int len) static struct strbuf buf = STRBUF_INIT; static int keep_cr; +static int mboxrd; + +static int is_gtfrom(const struct strbuf *buf) +{ + size_t min = strlen(">From "); + size_t ngt; + + if (buf->len < min) + return 0; + + ngt = strspn(buf->buf, ">"); + return ngt && starts_with(buf->buf + ngt, "From "); +} /* Called with the first line (potentially partial) * already in buf[] -- normally that should begin with @@ -77,6 +90,9 @@ static int split_one(FILE *mbox, const char *name, int allow_bare) strbuf_addch(&buf, '\n'); } + if (mboxrd && is_gtfrom(&buf)) + strbuf_remove(&buf, 0, 1); + if (fwrite(buf.buf, 1, buf.len, output) != buf.len) die_errno("cannot write output"); @@ -271,6 +287,8 @@ int cmd_mailsplit(int argc, const char **argv, const char *prefix) keep_cr = 1; } else if ( arg[1] == 'o' && arg[2] ) { dir = arg+2; + } else if (!strcmp(arg, "--mboxrd")) { + mboxrd = 1; } else if ( arg[1] == '-' && !arg[2] ) { argp++; /* -- marks end of options */ break; diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh index 85b3df5..1a5a546 100755 --- a/t/t5100-mailinfo.sh +++ b/t/t5100-mailinfo.sh @@ -111,4 +111,35 @@ test_expect_success 'mailinfo on message with quoted >From' ' test_cmp "$TEST_DIRECTORY"/t5100/quoted-from.expect quoted-from/msg ' +test_expect_success 'mailinfo unescapes with --mboxrd' ' + mkdir mboxrd && + git mailsplit -omboxrd --mboxrd \ + "$TEST_DIRECTORY"/t5100/sample.mboxrd >last && + test x"$(cat last)" = x2 && + for i in 0001 0002 + do + git mailinfo mboxrd/msg mboxrd/patch \ + mboxrd/out && + test_cmp "$TEST_DIRECTORY"/t5100/${i}mboxrd mboxrd/msg + done && + sp=" " && + echo "From " >expect && + echo "From " >>expect && + echo >> expect && + cat >sp <<-INPUT_END && + From mboxrd Mon Sep 17 00:00:00 2001 + From: trailing spacer + Subject: [PATCH] a commit with trailing space + + From$sp + >From$sp + + INPUT_END + + git mailsplit -f2 -omboxrd --mboxrd last && + test x"$(cat last)" = x1 && + git mailinfo mboxrd/msg mboxrd/patch From escaped +From not mangled but this line should have been escaped + diff --git a/t/t5100/0002mboxrd b/t/t5100/0002mboxrd new file mode 100644 index 0000000..71343d4 --- /dev/null +++ b/t/t5100/0002mboxrd @@ -0,0 +1,5 @@ + >From unchanged + From also unchanged +no trailing space, no escaping necessary and '>' was intended: +>From + diff --git a/t/t5100/sample.mboxrd b/t/t5100/sample.mboxrd new file mode 100644 index 0000000..79ad5ae --- /dev/null +++ b/t/t5100/sample.mboxrd @@ -0,0 +1,19 @@ +From mboxrd Mon Sep 17 00:00:00 2001 +From: mboxrd writer +Date: Fri, 9 Jun 2006 00:44:16 -0700 +Subject: [PATCH] a commit with escaped From lines + +>From the beginning, mbox should have been mboxrd +>>From escaped +From not mangled but this line should have been escaped + +From mboxrd Mon Sep 17 00:00:00 2001 +From: mboxrd writer +Date: Fri, 9 Jun 2006 00:44:16 -0700 +Subject: [PATCH 2/2] another with fake From lines + + >From unchanged + From also unchanged +no trailing space, no escaping necessary and '>' was intended: +>From + -- cgit v0.10.2-6-g49f6 From d9925d1a714a440f4063f64e1bd776194d2dd918 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 5 Jun 2016 04:46:41 +0000 Subject: am: support --patch-format=mboxrd Combined with "git format-patch --pretty=mboxrd", this should allow us to round-trip commit messages with embedded mbox "From " lines without corruption. Signed-off-by: Eric Wong Signed-off-by: Junio C Hamano diff --git a/Documentation/git-am.txt b/Documentation/git-am.txt index 13cdd7f..6348c29 100644 --- a/Documentation/git-am.txt +++ b/Documentation/git-am.txt @@ -116,7 +116,8 @@ default. You can use `--no-utf8` to override this. By default the command will try to detect the patch format automatically. This option allows the user to bypass the automatic detection and specify the patch format that the patch(es) should be - interpreted as. Valid formats are mbox, stgit, stgit-series and hg. + interpreted as. Valid formats are mbox, mboxrd, + stgit, stgit-series and hg. -i:: --interactive:: diff --git a/builtin/am.c b/builtin/am.c index 3dfe70b..d5da5fe 100644 --- a/builtin/am.c +++ b/builtin/am.c @@ -70,7 +70,8 @@ enum patch_format { PATCH_FORMAT_MBOX, PATCH_FORMAT_STGIT, PATCH_FORMAT_STGIT_SERIES, - PATCH_FORMAT_HG + PATCH_FORMAT_HG, + PATCH_FORMAT_MBOXRD }; enum keep_type { @@ -712,7 +713,8 @@ done: * Splits out individual email patches from `paths`, where each path is either * a mbox file or a Maildir. Returns 0 on success, -1 on failure. */ -static int split_mail_mbox(struct am_state *state, const char **paths, int keep_cr) +static int split_mail_mbox(struct am_state *state, const char **paths, + int keep_cr, int mboxrd) { struct child_process cp = CHILD_PROCESS_INIT; struct strbuf last = STRBUF_INIT; @@ -724,6 +726,8 @@ static int split_mail_mbox(struct am_state *state, const char **paths, int keep_ argv_array_push(&cp.args, "-b"); if (keep_cr) argv_array_push(&cp.args, "--keep-cr"); + if (mboxrd) + argv_array_push(&cp.args, "--mboxrd"); argv_array_push(&cp.args, "--"); argv_array_pushv(&cp.args, paths); @@ -965,13 +969,15 @@ static int split_mail(struct am_state *state, enum patch_format patch_format, switch (patch_format) { case PATCH_FORMAT_MBOX: - return split_mail_mbox(state, paths, keep_cr); + return split_mail_mbox(state, paths, keep_cr, 0); case PATCH_FORMAT_STGIT: return split_mail_conv(stgit_patch_to_mail, state, paths, keep_cr); case PATCH_FORMAT_STGIT_SERIES: return split_mail_stgit_series(state, paths, keep_cr); case PATCH_FORMAT_HG: return split_mail_conv(hg_patch_to_mail, state, paths, keep_cr); + case PATCH_FORMAT_MBOXRD: + return split_mail_mbox(state, paths, keep_cr, 1); default: die("BUG: invalid patch_format"); } @@ -2201,6 +2207,8 @@ static int parse_opt_patchformat(const struct option *opt, const char *arg, int *opt_value = PATCH_FORMAT_STGIT_SERIES; else if (!strcmp(arg, "hg")) *opt_value = PATCH_FORMAT_HG; + else if (!strcmp(arg, "mboxrd")) + *opt_value = PATCH_FORMAT_MBOXRD; else return error(_("Invalid value for --patch-format: %s"), arg); return 0; diff --git a/t/t4150-am.sh b/t/t4150-am.sh index b41bd17..9ce9424 100755 --- a/t/t4150-am.sh +++ b/t/t4150-am.sh @@ -957,4 +957,24 @@ test_expect_success 'am -s unexpected trailer block' ' test_cmp expect actual ' +test_expect_success 'am --patch-format=mboxrd handles mboxrd' ' + rm -fr .git/rebase-apply && + git checkout -f first && + echo mboxrd >>file && + git add file && + cat >msg <<-\INPUT_END && + mboxrd should escape the body + + From could trip up a loose mbox parser + >From extra escape for reversibility + INPUT_END + git commit -F msg && + git format-patch --pretty=mboxrd --stdout -1 >mboxrd1 && + grep "^>From could trip up a loose mbox parser" mboxrd1 && + git checkout -f first && + git am --patch-format=mboxrd mboxrd1 && + git cat-file commit HEAD | tail -n4 >out && + test_cmp msg out +' + test_done -- cgit v0.10.2-6-g49f6