From 597c9cc540c2ca5d0b0eeaa3f453dffa14afab6d Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Wed, 7 Sep 2005 12:22:56 -0700 Subject: Flatten tools/ directory to make build procedure simpler. Also make platform specific part more isolated. Currently we only have Darwin defined, but I've taken a look at SunOS specific patch (which I dropped on the floor for now) as well. Doing things this way would make adding it easier. Signed-off-by: Junio C Hamano diff --git a/Makefile b/Makefile index 9aa0c9a..9122c03 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,9 @@ # # Define PPC_SHA1 environment variable when running make to make use of # a bundled SHA1 routine optimized for PowerPC. - +# +# Define NEEDS_SSL_WITH_CRYPTO if you need -lcrypto with -lssl (Darwin). +# Define NEEDS_LIBICONV if linking with libc is not enough (Darwin). # Define COLLISION_CHECK below if you believe that SHA1's # 1461501637330902918203684832716283019655932542976 hashes do not give you @@ -66,13 +68,20 @@ SCRIPTS=git git-merge-one-file-script git-prune-script \ git-format-patch-script git-sh-setup-script git-push-script \ git-branch-script git-parse-remote-script git-verify-tag-script \ git-ls-remote-script git-rename-script \ - git-request-pull-script git-bisect-script + git-request-pull-script git-bisect-script \ + git-applymbox git-applypatch SCRIPTS += git-count-objects-script SCRIPTS += git-revert-script SCRIPTS += git-octopus-script SCRIPTS += git-archimport-script +# The ones that do not have to link with lcrypto nor lz. +SIMPLE_PROGRAMS = \ + git-get-tar-commit-id git-mailinfo git-mailsplit git-stripspace \ + git-daemon git-var + +# ... and all the rest PROG= git-update-cache git-diff-files git-init-db git-write-tree \ git-read-tree git-commit-tree git-cat-file git-fsck-cache \ git-checkout-cache git-diff-tree git-rev-tree git-ls-files \ @@ -80,12 +89,13 @@ PROG= git-update-cache git-diff-files git-init-db git-write-tree \ git-unpack-file git-export git-diff-cache git-convert-cache \ git-ssh-push git-ssh-pull git-rev-list git-mktag \ git-diff-helper git-tar-tree git-local-pull git-hash-object \ - git-get-tar-commit-id git-apply git-stripspace \ + git-apply \ git-diff-stages git-rev-parse git-patch-id git-pack-objects \ git-unpack-objects git-verify-pack git-receive-pack git-send-pack \ git-prune-packed git-fetch-pack git-upload-pack git-clone-pack \ - git-show-index git-daemon git-var git-peek-remote git-show-branch \ - git-update-server-info git-show-rev-cache git-build-rev-cache + git-show-index git-peek-remote git-show-branch \ + git-update-server-info git-show-rev-cache git-build-rev-cache \ + $(SIMPLE_PROGRAMS) ifdef WITH_SEND_EMAIL SCRIPTS += git-send-email-script @@ -126,6 +136,11 @@ LIB_OBJS += server-info.o LIBS = $(LIB_FILE) LIBS += -lz +ifeq ($(shell uname -s),Darwin) + NEEDS_SSL_WITH_CRYPTO = YesPlease + NEEDS_LIBICONV = YesPlease +endif + ifndef NO_OPENSSL LIB_OBJS += epoch.o OPENSSL_LIBSSL=-lssl @@ -134,6 +149,16 @@ else MOZILLA_SHA1=1 OPENSSL_LIBSSL= endif +ifdef NEEDS_SSL_WITH_CRYPTO + LIB_4_CRYPTO = -lcrypto -lssl +else + LIB_4_CRYPTO = -lcrypto +endif +ifdef NEEDS_LIBICONV + LIB_4_ICONV = -liconv +else + LIB_4_ICONV = +endif ifdef MOZILLA_SHA1 SHA1_HEADER="mozilla-sha1/sha1.h" LIB_OBJS += mozilla-sha1/sha1.o @@ -143,11 +168,7 @@ else LIB_OBJS += ppc/sha1.o ppc/sha1ppc.o else SHA1_HEADER= - ifeq ($(shell uname -s),Darwin) - LIBS += -lcrypto -lssl - else - LIBS += -lcrypto - endif + LIBS += $(LIB_4_CRYPTO) endif endif @@ -161,7 +182,6 @@ all: $(PROG) all: $(MAKE) -C templates - $(MAKE) -C tools %.o: %.c $(CC) -o $*.o -c $(ALL_CFLAGS) $< @@ -171,6 +191,11 @@ all: git-%: %.o $(LIB_FILE) $(CC) $(ALL_CFLAGS) -o $@ $(filter %.o,$^) $(LIBS) +git-mailinfo : SIMPLE_LIB += $(LIB_4_ICONV) +$(SIMPLE_PROGRAMS) : $(LIB_FILE) +$(SIMPLE_PROGRAMS) : git-% : %.o + $(CC) $(ALL_CFLAGS) -o $@ $(filter %.o,$^) $(LIB_FILE) $(SIMPLE_LIB) + git-http-pull: pull.o git-local-pull: pull.o git-ssh-pull: rsh.o pull.o @@ -218,7 +243,6 @@ install: $(PROG) $(SCRIPTS) $(INSTALL) $(PROG) $(SCRIPTS) $(DESTDIR)$(bindir) $(INSTALL) git-revert-script $(DESTDIR)$(bindir)/git-cherry-pick-script $(MAKE) -C templates install - $(MAKE) -C tools install install-doc: $(MAKE) -C Documentation install @@ -258,7 +282,6 @@ clean: rm -f $(GIT_TARNAME).tar.gz git-core_$(GIT_VERSION)-*.tar.gz rm -f git-core_$(GIT_VERSION)-*.deb git-core_$(GIT_VERSION)-*.dsc rm -f git-tk_$(GIT_VERSION)-*.deb - $(MAKE) -C tools/ clean $(MAKE) -C Documentation/ clean $(MAKE) -C templates/ clean $(MAKE) -C t/ clean diff --git a/git-applymbox b/git-applymbox new file mode 100755 index 0000000..e58bb21 --- /dev/null +++ b/git-applymbox @@ -0,0 +1,109 @@ +#!/bin/sh +## +## "dotest" is my stupid name for my patch-application script, which +## I never got around to renaming after I tested it. We're now on the +## second generation of scripts, still called "dotest". +## +## Update: Ryan Anderson finally shamed me into naming this "applymbox". +## +## You give it a mbox-format collection of emails, and it will try to +## apply them to the kernel using "applypatch" +## +## applymbox [-u] [-k] [-q] (-c .dotest/msg-number | mail_archive) [Signoff_file]" +## +## The patch application may fail in the middle. In which case: +## (1) look at .dotest/patch and fix it up to apply +## (2) re-run applymbox with -c .dotest/msg-number for the current one. +## Pay a special attention to the commit log message if you do this and +## use a Signoff_file, because applypatch wants to append the sign-off +## message to msg-clean every time it is run. + +. git-sh-setup-script || die "Not a git archive" + +usage () { + echo >&2 "applymbox [-u] [-k] [-q] (-c .dotest/ | mbox) [signoff]" + exit 1 +} + +keep_subject= query_apply= continue= utf8= resume=t +while case "$#" in 0) break ;; esac +do + case "$1" in + -u) utf8=-u ;; + -k) keep_subject=-k ;; + -q) query_apply=t ;; + -c) continue="$2"; resume=f; shift ;; + -*) usage ;; + *) break ;; + esac + shift +done + +case "$continue" in +'') + rm -rf .dotest + mkdir .dotest + git-mailsplit "$1" .dotest || exit 1 + shift +esac + +files=$(git-diff-cache --cached --name-only HEAD) || exit +if [ "$files" ]; then + echo "Dirty index: cannot apply patches (dirty: $files)" >&2 + exit 1 +fi + +case "$query_apply" in +t) touch .dotest/.query_apply +esac +case "$keep_subject" in +-k) : >.dotest/.keep_subject +esac + +signoff="$1" +set x .dotest/0* +shift +while case "$#" in 0) break;; esac +do + i="$1" + case "$resume,$continue" in + f,$i) resume=t;; + f,*) shift + continue;; + *) + git-mailinfo $keep_subject $utf8 \ + .dotest/msg .dotest/patch <$i >.dotest/info || exit 1 + git-stripspace < .dotest/msg > .dotest/msg-clean + ;; + esac + while :; # for fixing up and retry + do + git-applypatch .dotest/msg-clean .dotest/patch .dotest/info "$signoff" + case "$?" in + 0 | 2 ) + # 2 is a special exit code from applypatch to indicate that + # the patch wasn't applied, but continue anyway + ;; + *) + ret=$? + if test -f .dotest/.query_apply + then + echo >&2 "* Patch failed." + echo >&2 "* You could fix it up in your editor and" + echo >&2 " retry. If you want to do so, say yes here" + echo >&2 " AFTER fixing .dotest/patch up." + echo >&2 -n "Retry [y/N]? " + read yesno + case "$yesno" in + [Yy]*) + continue ;; + esac + fi + exit $ret + esac + break + done + shift +done +# return to pristine +rm -fr .dotest diff --git a/git-applypatch b/git-applypatch new file mode 100755 index 0000000..e5bc3c0 --- /dev/null +++ b/git-applypatch @@ -0,0 +1,118 @@ +#!/bin/sh +## +## applypatch takes four file arguments, and uses those to +## apply the unpacked patch (surprise surprise) that they +## represent to the current tree. +## +## The arguments are: +## $1 - file with commit message +## $2 - file with the actual patch +## $3 - "info" file with Author, email and subject +## $4 - optional file containing signoff to add +## +. git-sh-setup-script || die "Not a git archive." + +final=.dotest/final-commit +## +## If this file exists, we ask before applying +## +query_apply=.dotest/.query_apply + +## We do not munge the first line of the commit message too much +## if this file exists. +keep_subject=.dotest/.keep_subject + + +MSGFILE=$1 +PATCHFILE=$2 +INFO=$3 +SIGNOFF=$4 +EDIT=${VISUAL:-${EDITOR:-vi}} + +export GIT_AUTHOR_NAME="$(sed -n '/^Author/ s/Author: //p' .dotest/info)" +export GIT_AUTHOR_EMAIL="$(sed -n '/^Email/ s/Email: //p' .dotest/info)" +export GIT_AUTHOR_DATE="$(sed -n '/^Date/ s/Date: //p' .dotest/info)" +export SUBJECT="$(sed -n '/^Subject/ s/Subject: //p' .dotest/info)" + +if test '' != "$SIGNOFF" +then + if test -f "$SIGNOFF" + then + SIGNOFF=`cat "$SIGNOFF"` || exit + elif case "$SIGNOFF" in yes | true | me | please) : ;; *) false ;; esac + then + SIGNOFF=`git-var GIT_COMMITTER_IDENT | sed -e ' + s/>.*/>/ + s/^/Signed-off-by: /' + ` + else + SIGNOFF= + fi + if test '' != "$SIGNOFF" + then + LAST_SIGNED_OFF_BY=` + sed -ne '/^Signed-off-by: /p' "$MSGFILE" | + tail -n 1 + ` + test "$LAST_SIGNED_OFF_BY" = "$SIGNOFF" || + echo "$SIGNOFF" >>"$MSGFILE" + fi +fi + +patch_header= +test -f "$keep_subject" || patch_header='[PATCH] ' + +{ + echo "$patch_header$SUBJECT" + if test -s "$MSGFILE" + then + echo + cat "$MSGFILE" + fi +} >"$final" + +interactive=yes +test -f "$query_apply" || interactive=no + +while [ "$interactive" = yes ]; do + echo "Commit Body is:" + echo "--------------------------" + cat "$final" + echo "--------------------------" + echo -n "Apply? [y]es/[n]o/[e]dit/[a]ccept all " + read reply + case "$reply" in + y|Y) interactive=no;; + n|N) exit 2;; # special value to tell dotest to keep going + e|E) "$EDIT" "$final";; + a|A) rm -f "$query_apply" + interactive=no ;; + esac +done + +if test -x "$GIT_DIR"/hooks/applypatch-msg +then + "$GIT_DIR"/hooks/applypatch-msg "$final" || exit +fi + +echo +echo Applying "'$SUBJECT'" +echo + +git-apply --index "$PATCHFILE" || exit 1 + +if test -x "$GIT_DIR"/hooks/pre-applypatch +then + "$GIT_DIR"/hooks/pre-applypatch || exit +fi + +tree=$(git-write-tree) || exit 1 +echo Wrote tree $tree +commit=$(git-commit-tree $tree -p $(cat "$GIT_DIR"/HEAD) < "$final") || exit 1 +echo Committed: $commit +echo $commit > "$GIT_DIR"/HEAD + +if test -x "$GIT_DIR"/hooks/post-applypatch +then + "$GIT_DIR"/hooks/post-applypatch +fi diff --git a/mailinfo.c b/mailinfo.c new file mode 100644 index 0000000..df470bb --- /dev/null +++ b/mailinfo.c @@ -0,0 +1,749 @@ +/* + * Another stupid program, this one parsing the headers of an + * email to figure out authorship and subject + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +static FILE *cmitmsg, *patchfile; + +static int keep_subject = 0; +static int metainfo_utf8 = 0; +static char line[1000]; +static char date[1000]; +static char name[1000]; +static char email[1000]; +static char subject[1000]; + +static enum { + TE_DONTCARE, TE_QP, TE_BASE64, +} transfer_encoding; +static char charset[256]; + +static char multipart_boundary[1000]; +static int multipart_boundary_len; +static int patch_lines = 0; + +static char *sanity_check(char *name, char *email) +{ + int len = strlen(name); + if (len < 3 || len > 60) + return email; + if (strchr(name, '@') || strchr(name, '<') || strchr(name, '>')) + return email; + return name; +} + +static int handle_from(char *line) +{ + char *at = strchr(line, '@'); + char *dst; + + if (!at) + return 0; + + /* + * If we already have one email, don't take any confusing lines + */ + if (*email && strchr(at+1, '@')) + return 0; + + /* Pick up the string around '@', possibly delimited with <> + * pair; that is the email part. White them out while copying. + */ + while (at > line) { + char c = at[-1]; + if (isspace(c)) + break; + if (c == '<') { + at[-1] = ' '; + break; + } + at--; + } + dst = email; + for (;;) { + unsigned char c = *at; + if (!c || c == '>' || isspace(c)) { + if (c == '>') + *at = ' '; + break; + } + *at++ = ' '; + *dst++ = c; + } + *dst++ = 0; + + /* The remainder is name. It could be "John Doe " + * or "john.doe@xz (John Doe)", but we have whited out the + * email part, so trim from both ends, possibly removing + * the () pair at the end. + */ + at = line + strlen(line); + while (at > line) { + unsigned char c = *--at; + if (!isspace(c)) { + at[(c == ')') ? 0 : 1] = 0; + break; + } + } + + at = line; + for (;;) { + unsigned char c = *at; + if (!c || !isspace(c)) { + if (c == '(') + at++; + break; + } + at++; + } + at = sanity_check(at, email); + strcpy(name, at); + return 1; +} + +static int handle_date(char *line) +{ + strcpy(date, line); + return 0; +} + +static int handle_subject(char *line) +{ + strcpy(subject, line); + return 0; +} + +/* NOTE NOTE NOTE. We do not claim we do full MIME. We just attempt + * to have enough heuristics to grok MIME encoded patches often found + * on our mailing lists. For example, we do not even treat header lines + * case insensitively. + */ + +static int slurp_attr(const char *line, const char *name, char *attr) +{ + char *ends, *ap = strcasestr(line, name); + size_t sz; + + if (!ap) { + *attr = 0; + return 0; + } + ap += strlen(name); + if (*ap == '"') { + ap++; + ends = "\""; + } + else + ends = "; \t"; + sz = strcspn(ap, ends); + memcpy(attr, ap, sz); + attr[sz] = 0; + return 1; +} + +static int handle_subcontent_type(char *line) +{ + /* We do not want to mess with boundary. Note that we do not + * handle nested multipart. + */ + if (strcasestr(line, "boundary=")) { + fprintf(stderr, "Not handling nested multipart message.\n"); + exit(1); + } + slurp_attr(line, "charset=", charset); + if (*charset) { + int i, c; + for (i = 0; (c = charset[i]) != 0; i++) + charset[i] = tolower(c); + } + return 0; +} + +static int handle_content_type(char *line) +{ + *multipart_boundary = 0; + if (slurp_attr(line, "boundary=", multipart_boundary + 2)) { + memcpy(multipart_boundary, "--", 2); + multipart_boundary_len = strlen(multipart_boundary); + } + slurp_attr(line, "charset=", charset); + return 0; +} + +static int handle_content_transfer_encoding(char *line) +{ + if (strcasestr(line, "base64")) + transfer_encoding = TE_BASE64; + else if (strcasestr(line, "quoted-printable")) + transfer_encoding = TE_QP; + else + transfer_encoding = TE_DONTCARE; + return 0; +} + +static int is_multipart_boundary(const char *line) +{ + return (!memcmp(line, multipart_boundary, multipart_boundary_len)); +} + +static int eatspace(char *line) +{ + int len = strlen(line); + while (len > 0 && isspace(line[len-1])) + line[--len] = 0; + return len; +} + +#define SEEN_FROM 01 +#define SEEN_DATE 02 +#define SEEN_SUBJECT 04 + +/* First lines of body can have From:, Date:, and Subject: */ +static int handle_inbody_header(int *seen, char *line) +{ + if (!memcmp("From:", line, 5) && isspace(line[5])) { + if (!(*seen & SEEN_FROM) && handle_from(line+6)) { + *seen |= SEEN_FROM; + return 1; + } + } + if (!memcmp("Date:", line, 5) && isspace(line[5])) { + if (!(*seen & SEEN_DATE)) { + handle_date(line+6); + *seen |= SEEN_DATE; + return 1; + } + } + if (!memcmp("Subject:", line, 8) && isspace(line[8])) { + if (!(*seen & SEEN_SUBJECT)) { + handle_subject(line+9); + *seen |= SEEN_SUBJECT; + return 1; + } + } + if (!memcmp("[PATCH]", line, 7) && isspace(line[7])) { + if (!(*seen & SEEN_SUBJECT)) { + handle_subject(line); + *seen |= SEEN_SUBJECT; + return 1; + } + } + return 0; +} + +static char *cleanup_subject(char *subject) +{ + if (keep_subject) + return subject; + for (;;) { + char *p; + int len, remove; + switch (*subject) { + case 'r': case 'R': + if (!memcmp("e:", subject+1, 2)) { + subject +=3; + continue; + } + break; + case ' ': case '\t': case ':': + subject++; + continue; + + case '[': + p = strchr(subject, ']'); + if (!p) { + subject++; + continue; + } + len = strlen(p); + remove = p - subject; + if (remove <= len *2) { + subject = p+1; + continue; + } + break; + } + return subject; + } +} + +static void cleanup_space(char *buf) +{ + unsigned char c; + while ((c = *buf) != 0) { + buf++; + if (isspace(c)) { + buf[-1] = ' '; + c = *buf; + while (isspace(c)) { + int len = strlen(buf); + memmove(buf, buf+1, len); + c = *buf; + } + } + } +} + +typedef int (*header_fn_t)(char *); +struct header_def { + const char *name; + header_fn_t func; + int namelen; +}; + +static void check_header(char *line, int len, struct header_def *header) +{ + int i; + + if (header[0].namelen <= 0) { + for (i = 0; header[i].name; i++) + header[i].namelen = strlen(header[i].name); + } + for (i = 0; header[i].name; i++) { + int len = header[i].namelen; + if (!strncasecmp(line, header[i].name, len) && + line[len] == ':' && isspace(line[len + 1])) { + header[i].func(line + len + 2); + break; + } + } +} + +static void check_subheader_line(char *line, int len) +{ + static struct header_def header[] = { + { "Content-Type", handle_subcontent_type }, + { "Content-Transfer-Encoding", + handle_content_transfer_encoding }, + { NULL }, + }; + check_header(line, len, header); +} +static void check_header_line(char *line, int len) +{ + static struct header_def header[] = { + { "From", handle_from }, + { "Date", handle_date }, + { "Subject", handle_subject }, + { "Content-Type", handle_content_type }, + { "Content-Transfer-Encoding", + handle_content_transfer_encoding }, + { NULL }, + }; + check_header(line, len, header); +} + +static int read_one_header_line(char *line, int sz, FILE *in) +{ + int ofs = 0; + while (ofs < sz) { + int peek, len; + if (fgets(line + ofs, sz - ofs, in) == NULL) + return ofs; + len = eatspace(line + ofs); + if (len == 0) + return ofs; + peek = fgetc(in); ungetc(peek, in); + if (peek == ' ' || peek == '\t') { + /* Yuck, 2822 header "folding" */ + ofs += len; + continue; + } + return ofs + len; + } + return ofs; +} + +static unsigned hexval(int c) +{ + if (c >= '0' && c <= '9') + return c - '0'; + if (c >= 'a' && c <= 'f') + return c - 'a' + 10; + if (c >= 'A' && c <= 'F') + return c - 'A' + 10; + return ~0; +} + +static int decode_q_segment(char *in, char *ot, char *ep) +{ + int c; + while ((c = *in++) != 0 && (in <= ep)) { + if (c == '=') { + int d = *in++; + if (d == '\n' || !d) + break; /* drop trailing newline */ + *ot++ = ((hexval(d) << 4) | hexval(*in++)); + } + else + *ot++ = c; + } + *ot = 0; + return 0; +} + +static int decode_b_segment(char *in, char *ot, char *ep) +{ + /* Decode in..ep, possibly in-place to ot */ + int c, pos = 0, acc = 0; + + while ((c = *in++) != 0 && (in <= ep)) { + if (c == '+') + c = 62; + else if (c == '/') + c = 63; + else if ('A' <= c && c <= 'Z') + c -= 'A'; + else if ('a' <= c && c <= 'z') + c -= 'a' - 26; + else if ('0' <= c && c <= '9') + c -= '0' - 52; + else if (c == '=') { + /* padding is almost like (c == 0), except we do + * not output NUL resulting only from it; + * for now we just trust the data. + */ + c = 0; + } + else + continue; /* garbage */ + switch (pos++) { + case 0: + acc = (c << 2); + break; + case 1: + *ot++ = (acc | (c >> 4)); + acc = (c & 15) << 4; + break; + case 2: + *ot++ = (acc | (c >> 2)); + acc = (c & 3) << 6; + break; + case 3: + *ot++ = (acc | c); + acc = pos = 0; + break; + } + } + *ot = 0; + return 0; +} + +static void convert_to_utf8(char *line, char *charset) +{ + if (*charset) { + char *in, *out; + size_t insize, outsize, nrc; + char outbuf[4096]; /* cheat */ + iconv_t conv = iconv_open("utf-8", charset); + + if (conv == (iconv_t) -1) { + fprintf(stderr, "cannot convert from %s to utf-8\n", + charset); + *charset = 0; + return; + } + in = line; + insize = strlen(in); + out = outbuf; + outsize = sizeof(outbuf); + nrc = iconv(conv, &in, &insize, &out, &outsize); + iconv_close(conv); + if (nrc == (size_t) -1) + return; + *out = 0; + strcpy(line, outbuf); + } +} + +static void decode_header_bq(char *it) +{ + char *in, *out, *ep, *cp, *sp; + char outbuf[1000]; + + in = it; + out = outbuf; + while ((ep = strstr(in, "=?")) != NULL) { + int sz, encoding; + char charset_q[256], piecebuf[256]; + if (in != ep) { + sz = ep - in; + memcpy(out, in, sz); + out += sz; + in += sz; + } + /* E.g. + * ep : "=?iso-2022-jp?B?GyR...?= foo" + * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz" + */ + ep += 2; + cp = strchr(ep, '?'); + if (!cp) + return; /* no munging */ + for (sp = ep; sp < cp; sp++) + charset_q[sp - ep] = tolower(*sp); + charset_q[cp - ep] = 0; + encoding = cp[1]; + if (!encoding || cp[2] != '?') + return; /* no munging */ + ep = strstr(cp + 3, "?="); + if (!ep) + return; /* no munging */ + switch (tolower(encoding)) { + default: + return; /* no munging */ + case 'b': + sz = decode_b_segment(cp + 3, piecebuf, ep); + break; + case 'q': + sz = decode_q_segment(cp + 3, piecebuf, ep); + break; + } + if (sz < 0) + return; + if (metainfo_utf8) + convert_to_utf8(piecebuf, charset_q); + strcpy(out, piecebuf); + out += strlen(out); + in = ep + 2; + } + strcpy(out, in); + strcpy(it, outbuf); +} + +static void decode_transfer_encoding(char *line) +{ + char *ep; + + switch (transfer_encoding) { + case TE_QP: + ep = line + strlen(line); + decode_q_segment(line, line, ep); + break; + case TE_BASE64: + ep = line + strlen(line); + decode_b_segment(line, line, ep); + break; + case TE_DONTCARE: + break; + } +} + +static void handle_info(void) +{ + char *sub; + static int done_info = 0; + + if (done_info) + return; + + done_info = 1; + sub = cleanup_subject(subject); + cleanup_space(name); + cleanup_space(date); + cleanup_space(email); + cleanup_space(sub); + + /* Unwrap inline B and Q encoding, and optionally + * normalize the meta information to utf8. + */ + decode_header_bq(name); + decode_header_bq(date); + decode_header_bq(email); + decode_header_bq(sub); + printf("Author: %s\nEmail: %s\nSubject: %s\nDate: %s\n\n", + name, email, sub, date); +} + +/* We are inside message body and have read line[] already. + * Spit out the commit log. + */ +static int handle_commit_msg(void) +{ + if (!cmitmsg) + return 0; + do { + if (!memcmp("diff -", line, 6) || + !memcmp("---", line, 3) || + !memcmp("Index: ", line, 7)) + break; + if ((multipart_boundary[0] && is_multipart_boundary(line))) { + /* We come here when the first part had only + * the commit message without any patch. We + * pretend we have not seen this line yet, and + * go back to the loop. + */ + return 1; + } + + /* Unwrap transfer encoding and optionally + * normalize the log message to UTF-8. + */ + decode_transfer_encoding(line); + if (metainfo_utf8) + convert_to_utf8(line, charset); + fputs(line, cmitmsg); + } while (fgets(line, sizeof(line), stdin) != NULL); + fclose(cmitmsg); + cmitmsg = NULL; + return 0; +} + +/* We have done the commit message and have the first + * line of the patch in line[]. + */ +static void handle_patch(void) +{ + do { + if (multipart_boundary[0] && is_multipart_boundary(line)) + break; + /* Only unwrap transfer encoding but otherwise do not + * do anything. We do *NOT* want UTF-8 conversion + * here; we are dealing with the user payload. + */ + decode_transfer_encoding(line); + fputs(line, patchfile); + patch_lines++; + } while (fgets(line, sizeof(line), stdin) != NULL); +} + +/* multipart boundary and transfer encoding are set up for us, and we + * are at the end of the sub header. do equivalent of handle_body up + * to the next boundary without closing patchfile --- we will expect + * that the first part to contain commit message and a patch, and + * handle other parts as pure patches. + */ +static int handle_multipart_one_part(void) +{ + int seen = 0; + int n = 0; + int len; + + while (fgets(line, sizeof(line), stdin) != NULL) { + again: + len = eatspace(line); + n++; + if (!len) + continue; + if (is_multipart_boundary(line)) + break; + if (0 <= seen && handle_inbody_header(&seen, line)) + continue; + seen = -1; /* no more inbody headers */ + line[len] = '\n'; + handle_info(); + if (handle_commit_msg()) + goto again; + handle_patch(); + break; + } + if (n == 0) + return -1; + return 0; +} + +static void handle_multipart_body(void) +{ + int part_num = 0; + + /* Skip up to the first boundary */ + while (fgets(line, sizeof(line), stdin) != NULL) + if (is_multipart_boundary(line)) { + part_num = 1; + break; + } + if (!part_num) + return; + /* We are on boundary line. Start slurping the subhead. */ + while (1) { + int len = read_one_header_line(line, sizeof(line), stdin); + if (!len) { + if (handle_multipart_one_part() < 0) + return; + } + else + check_subheader_line(line, len); + } + fclose(patchfile); + if (!patch_lines) { + fprintf(stderr, "No patch found\n"); + exit(1); + } +} + +/* Non multipart message */ +static void handle_body(void) +{ + int seen = 0; + + while (fgets(line, sizeof(line), stdin) != NULL) { + int len = eatspace(line); + if (!len) + continue; + if (0 <= seen && handle_inbody_header(&seen, line)) + continue; + seen = -1; /* no more inbody headers */ + line[len] = '\n'; + handle_info(); + handle_commit_msg(); + handle_patch(); + break; + } + fclose(patchfile); + if (!patch_lines) { + fprintf(stderr, "No patch found\n"); + exit(1); + } +} + +static const char mailinfo_usage[] = + "git-mailinfo [-k] [-u] msg patch info"; + +static void usage(void) { + fprintf(stderr, "%s\n", mailinfo_usage); + exit(1); +} + +int main(int argc, char **argv) +{ + while (1 < argc && argv[1][0] == '-') { + if (!strcmp(argv[1], "-k")) + keep_subject = 1; + else if (!strcmp(argv[1], "-u")) + metainfo_utf8 = 1; + else + usage(); + argc--; argv++; + } + + if (argc != 3) + usage(); + cmitmsg = fopen(argv[1], "w"); + if (!cmitmsg) { + perror(argv[1]); + exit(1); + } + patchfile = fopen(argv[2], "w"); + if (!patchfile) { + perror(argv[2]); + exit(1); + } + while (1) { + int len = read_one_header_line(line, sizeof(line), stdin); + if (!len) { + if (multipart_boundary[0]) + handle_multipart_body(); + else + handle_body(); + break; + } + check_header_line(line, len); + } + return 0; +} diff --git a/mailsplit.c b/mailsplit.c new file mode 100644 index 0000000..a3238c2 --- /dev/null +++ b/mailsplit.c @@ -0,0 +1,145 @@ +/* + * Totally braindamaged mbox splitter program. + * + * It just splits a mbox into a list of files: "0001" "0002" .. + * so you can process them further from there. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int usage(void) +{ + fprintf(stderr, "mailsplit \n"); + exit(1); +} + +static int linelen(const char *map, unsigned long size) +{ + int len = 0, c; + + do { + c = *map; + map++; + size--; + len++; + } while (size && c != '\n'); + return len; +} + +static int is_from_line(const char *line, int len) +{ + const char *colon; + + if (len < 20 || memcmp("From ", line, 5)) + return 0; + + colon = line + len - 2; + line += 5; + for (;;) { + if (colon < line) + return 0; + if (*--colon == ':') + break; + } + + if (!isdigit(colon[-4]) || + !isdigit(colon[-2]) || + !isdigit(colon[-1]) || + !isdigit(colon[ 1]) || + !isdigit(colon[ 2])) + return 0; + + /* year */ + if (strtol(colon+3, NULL, 10) <= 90) + return 0; + + /* Ok, close enough */ + return 1; +} + +static int parse_email(const void *map, unsigned long size) +{ + unsigned long offset; + + if (size < 6 || memcmp("From ", map, 5)) + goto corrupt; + + /* Make sure we don't trigger on this first line */ + map++; size--; offset=1; + + /* + * Search for a line beginning with "From ", and + * having something that looks like a date format. + */ + do { + int len = linelen(map, size); + if (is_from_line(map, len)) + return offset; + map += len; + size -= len; + offset += len; + } while (size); + return offset; + +corrupt: + fprintf(stderr, "corrupt mailbox\n"); + exit(1); +} + +int main(int argc, char **argv) +{ + int fd, nr; + struct stat st; + unsigned long size; + void *map; + + if (argc != 3) + usage(); + fd = open(argv[1], O_RDONLY); + if (fd < 0) { + perror(argv[1]); + exit(1); + } + if (chdir(argv[2]) < 0) + usage(); + if (fstat(fd, &st) < 0) { + perror("stat"); + exit(1); + } + size = st.st_size; + map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + if (map == MAP_FAILED) { + perror("mmap"); + close(fd); + exit(1); + } + close(fd); + nr = 0; + do { + char name[10]; + unsigned long len = parse_email(map, size); + assert(len <= size); + sprintf(name, "%04d", ++nr); + fd = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600); + if (fd < 0) { + perror(name); + exit(1); + } + if (write(fd, map, len) != len) { + perror("write"); + exit(1); + } + close(fd); + map += len; + size -= len; + } while (size > 0); + return 0; +} diff --git a/tools/.gitignore b/tools/.gitignore deleted file mode 100644 index d1ea9ea..0000000 --- a/tools/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -git-mailinfo -git-mailsplit diff --git a/tools/Makefile b/tools/Makefile deleted file mode 100644 index 5cc6d14..0000000 --- a/tools/Makefile +++ /dev/null @@ -1,25 +0,0 @@ -# -# Make Linus git-tools -# -CC=gcc -CFLAGS = -O2 -g -Wall -ALL_CFLAGS = $(CFLAGS) -INSTALL=install -prefix=$(HOME) -bindir=$(prefix)/bin -# DESTDIR= - -PROGRAMS=git-mailsplit git-mailinfo -SCRIPTS=git-applymbox git-applypatch - -git-%: %.c - $(CC) $(ALL_CFLAGS) -o $@ $(filter %.c,$^) - -all: $(PROGRAMS) - -install: $(PROGRAMS) $(SCRIPTS) - $(INSTALL) -m755 -d $(DESTDIR)$(bindir) - $(INSTALL) $(PROGRAMS) $(SCRIPTS) $(DESTDIR)$(bindir) - -clean: - rm -f $(PROGRAMS) *.o diff --git a/tools/git-applymbox b/tools/git-applymbox deleted file mode 100755 index e58bb21..0000000 --- a/tools/git-applymbox +++ /dev/null @@ -1,109 +0,0 @@ -#!/bin/sh -## -## "dotest" is my stupid name for my patch-application script, which -## I never got around to renaming after I tested it. We're now on the -## second generation of scripts, still called "dotest". -## -## Update: Ryan Anderson finally shamed me into naming this "applymbox". -## -## You give it a mbox-format collection of emails, and it will try to -## apply them to the kernel using "applypatch" -## -## applymbox [-u] [-k] [-q] (-c .dotest/msg-number | mail_archive) [Signoff_file]" -## -## The patch application may fail in the middle. In which case: -## (1) look at .dotest/patch and fix it up to apply -## (2) re-run applymbox with -c .dotest/msg-number for the current one. -## Pay a special attention to the commit log message if you do this and -## use a Signoff_file, because applypatch wants to append the sign-off -## message to msg-clean every time it is run. - -. git-sh-setup-script || die "Not a git archive" - -usage () { - echo >&2 "applymbox [-u] [-k] [-q] (-c .dotest/ | mbox) [signoff]" - exit 1 -} - -keep_subject= query_apply= continue= utf8= resume=t -while case "$#" in 0) break ;; esac -do - case "$1" in - -u) utf8=-u ;; - -k) keep_subject=-k ;; - -q) query_apply=t ;; - -c) continue="$2"; resume=f; shift ;; - -*) usage ;; - *) break ;; - esac - shift -done - -case "$continue" in -'') - rm -rf .dotest - mkdir .dotest - git-mailsplit "$1" .dotest || exit 1 - shift -esac - -files=$(git-diff-cache --cached --name-only HEAD) || exit -if [ "$files" ]; then - echo "Dirty index: cannot apply patches (dirty: $files)" >&2 - exit 1 -fi - -case "$query_apply" in -t) touch .dotest/.query_apply -esac -case "$keep_subject" in --k) : >.dotest/.keep_subject -esac - -signoff="$1" -set x .dotest/0* -shift -while case "$#" in 0) break;; esac -do - i="$1" - case "$resume,$continue" in - f,$i) resume=t;; - f,*) shift - continue;; - *) - git-mailinfo $keep_subject $utf8 \ - .dotest/msg .dotest/patch <$i >.dotest/info || exit 1 - git-stripspace < .dotest/msg > .dotest/msg-clean - ;; - esac - while :; # for fixing up and retry - do - git-applypatch .dotest/msg-clean .dotest/patch .dotest/info "$signoff" - case "$?" in - 0 | 2 ) - # 2 is a special exit code from applypatch to indicate that - # the patch wasn't applied, but continue anyway - ;; - *) - ret=$? - if test -f .dotest/.query_apply - then - echo >&2 "* Patch failed." - echo >&2 "* You could fix it up in your editor and" - echo >&2 " retry. If you want to do so, say yes here" - echo >&2 " AFTER fixing .dotest/patch up." - echo >&2 -n "Retry [y/N]? " - read yesno - case "$yesno" in - [Yy]*) - continue ;; - esac - fi - exit $ret - esac - break - done - shift -done -# return to pristine -rm -fr .dotest diff --git a/tools/git-applypatch b/tools/git-applypatch deleted file mode 100755 index e5bc3c0..0000000 --- a/tools/git-applypatch +++ /dev/null @@ -1,118 +0,0 @@ -#!/bin/sh -## -## applypatch takes four file arguments, and uses those to -## apply the unpacked patch (surprise surprise) that they -## represent to the current tree. -## -## The arguments are: -## $1 - file with commit message -## $2 - file with the actual patch -## $3 - "info" file with Author, email and subject -## $4 - optional file containing signoff to add -## -. git-sh-setup-script || die "Not a git archive." - -final=.dotest/final-commit -## -## If this file exists, we ask before applying -## -query_apply=.dotest/.query_apply - -## We do not munge the first line of the commit message too much -## if this file exists. -keep_subject=.dotest/.keep_subject - - -MSGFILE=$1 -PATCHFILE=$2 -INFO=$3 -SIGNOFF=$4 -EDIT=${VISUAL:-${EDITOR:-vi}} - -export GIT_AUTHOR_NAME="$(sed -n '/^Author/ s/Author: //p' .dotest/info)" -export GIT_AUTHOR_EMAIL="$(sed -n '/^Email/ s/Email: //p' .dotest/info)" -export GIT_AUTHOR_DATE="$(sed -n '/^Date/ s/Date: //p' .dotest/info)" -export SUBJECT="$(sed -n '/^Subject/ s/Subject: //p' .dotest/info)" - -if test '' != "$SIGNOFF" -then - if test -f "$SIGNOFF" - then - SIGNOFF=`cat "$SIGNOFF"` || exit - elif case "$SIGNOFF" in yes | true | me | please) : ;; *) false ;; esac - then - SIGNOFF=`git-var GIT_COMMITTER_IDENT | sed -e ' - s/>.*/>/ - s/^/Signed-off-by: /' - ` - else - SIGNOFF= - fi - if test '' != "$SIGNOFF" - then - LAST_SIGNED_OFF_BY=` - sed -ne '/^Signed-off-by: /p' "$MSGFILE" | - tail -n 1 - ` - test "$LAST_SIGNED_OFF_BY" = "$SIGNOFF" || - echo "$SIGNOFF" >>"$MSGFILE" - fi -fi - -patch_header= -test -f "$keep_subject" || patch_header='[PATCH] ' - -{ - echo "$patch_header$SUBJECT" - if test -s "$MSGFILE" - then - echo - cat "$MSGFILE" - fi -} >"$final" - -interactive=yes -test -f "$query_apply" || interactive=no - -while [ "$interactive" = yes ]; do - echo "Commit Body is:" - echo "--------------------------" - cat "$final" - echo "--------------------------" - echo -n "Apply? [y]es/[n]o/[e]dit/[a]ccept all " - read reply - case "$reply" in - y|Y) interactive=no;; - n|N) exit 2;; # special value to tell dotest to keep going - e|E) "$EDIT" "$final";; - a|A) rm -f "$query_apply" - interactive=no ;; - esac -done - -if test -x "$GIT_DIR"/hooks/applypatch-msg -then - "$GIT_DIR"/hooks/applypatch-msg "$final" || exit -fi - -echo -echo Applying "'$SUBJECT'" -echo - -git-apply --index "$PATCHFILE" || exit 1 - -if test -x "$GIT_DIR"/hooks/pre-applypatch -then - "$GIT_DIR"/hooks/pre-applypatch || exit -fi - -tree=$(git-write-tree) || exit 1 -echo Wrote tree $tree -commit=$(git-commit-tree $tree -p $(cat "$GIT_DIR"/HEAD) < "$final") || exit 1 -echo Committed: $commit -echo $commit > "$GIT_DIR"/HEAD - -if test -x "$GIT_DIR"/hooks/post-applypatch -then - "$GIT_DIR"/hooks/post-applypatch -fi diff --git a/tools/mailinfo.c b/tools/mailinfo.c deleted file mode 100644 index df470bb..0000000 --- a/tools/mailinfo.c +++ /dev/null @@ -1,749 +0,0 @@ -/* - * Another stupid program, this one parsing the headers of an - * email to figure out authorship and subject - */ -#define _GNU_SOURCE -#include -#include -#include -#include -#include - -static FILE *cmitmsg, *patchfile; - -static int keep_subject = 0; -static int metainfo_utf8 = 0; -static char line[1000]; -static char date[1000]; -static char name[1000]; -static char email[1000]; -static char subject[1000]; - -static enum { - TE_DONTCARE, TE_QP, TE_BASE64, -} transfer_encoding; -static char charset[256]; - -static char multipart_boundary[1000]; -static int multipart_boundary_len; -static int patch_lines = 0; - -static char *sanity_check(char *name, char *email) -{ - int len = strlen(name); - if (len < 3 || len > 60) - return email; - if (strchr(name, '@') || strchr(name, '<') || strchr(name, '>')) - return email; - return name; -} - -static int handle_from(char *line) -{ - char *at = strchr(line, '@'); - char *dst; - - if (!at) - return 0; - - /* - * If we already have one email, don't take any confusing lines - */ - if (*email && strchr(at+1, '@')) - return 0; - - /* Pick up the string around '@', possibly delimited with <> - * pair; that is the email part. White them out while copying. - */ - while (at > line) { - char c = at[-1]; - if (isspace(c)) - break; - if (c == '<') { - at[-1] = ' '; - break; - } - at--; - } - dst = email; - for (;;) { - unsigned char c = *at; - if (!c || c == '>' || isspace(c)) { - if (c == '>') - *at = ' '; - break; - } - *at++ = ' '; - *dst++ = c; - } - *dst++ = 0; - - /* The remainder is name. It could be "John Doe " - * or "john.doe@xz (John Doe)", but we have whited out the - * email part, so trim from both ends, possibly removing - * the () pair at the end. - */ - at = line + strlen(line); - while (at > line) { - unsigned char c = *--at; - if (!isspace(c)) { - at[(c == ')') ? 0 : 1] = 0; - break; - } - } - - at = line; - for (;;) { - unsigned char c = *at; - if (!c || !isspace(c)) { - if (c == '(') - at++; - break; - } - at++; - } - at = sanity_check(at, email); - strcpy(name, at); - return 1; -} - -static int handle_date(char *line) -{ - strcpy(date, line); - return 0; -} - -static int handle_subject(char *line) -{ - strcpy(subject, line); - return 0; -} - -/* NOTE NOTE NOTE. We do not claim we do full MIME. We just attempt - * to have enough heuristics to grok MIME encoded patches often found - * on our mailing lists. For example, we do not even treat header lines - * case insensitively. - */ - -static int slurp_attr(const char *line, const char *name, char *attr) -{ - char *ends, *ap = strcasestr(line, name); - size_t sz; - - if (!ap) { - *attr = 0; - return 0; - } - ap += strlen(name); - if (*ap == '"') { - ap++; - ends = "\""; - } - else - ends = "; \t"; - sz = strcspn(ap, ends); - memcpy(attr, ap, sz); - attr[sz] = 0; - return 1; -} - -static int handle_subcontent_type(char *line) -{ - /* We do not want to mess with boundary. Note that we do not - * handle nested multipart. - */ - if (strcasestr(line, "boundary=")) { - fprintf(stderr, "Not handling nested multipart message.\n"); - exit(1); - } - slurp_attr(line, "charset=", charset); - if (*charset) { - int i, c; - for (i = 0; (c = charset[i]) != 0; i++) - charset[i] = tolower(c); - } - return 0; -} - -static int handle_content_type(char *line) -{ - *multipart_boundary = 0; - if (slurp_attr(line, "boundary=", multipart_boundary + 2)) { - memcpy(multipart_boundary, "--", 2); - multipart_boundary_len = strlen(multipart_boundary); - } - slurp_attr(line, "charset=", charset); - return 0; -} - -static int handle_content_transfer_encoding(char *line) -{ - if (strcasestr(line, "base64")) - transfer_encoding = TE_BASE64; - else if (strcasestr(line, "quoted-printable")) - transfer_encoding = TE_QP; - else - transfer_encoding = TE_DONTCARE; - return 0; -} - -static int is_multipart_boundary(const char *line) -{ - return (!memcmp(line, multipart_boundary, multipart_boundary_len)); -} - -static int eatspace(char *line) -{ - int len = strlen(line); - while (len > 0 && isspace(line[len-1])) - line[--len] = 0; - return len; -} - -#define SEEN_FROM 01 -#define SEEN_DATE 02 -#define SEEN_SUBJECT 04 - -/* First lines of body can have From:, Date:, and Subject: */ -static int handle_inbody_header(int *seen, char *line) -{ - if (!memcmp("From:", line, 5) && isspace(line[5])) { - if (!(*seen & SEEN_FROM) && handle_from(line+6)) { - *seen |= SEEN_FROM; - return 1; - } - } - if (!memcmp("Date:", line, 5) && isspace(line[5])) { - if (!(*seen & SEEN_DATE)) { - handle_date(line+6); - *seen |= SEEN_DATE; - return 1; - } - } - if (!memcmp("Subject:", line, 8) && isspace(line[8])) { - if (!(*seen & SEEN_SUBJECT)) { - handle_subject(line+9); - *seen |= SEEN_SUBJECT; - return 1; - } - } - if (!memcmp("[PATCH]", line, 7) && isspace(line[7])) { - if (!(*seen & SEEN_SUBJECT)) { - handle_subject(line); - *seen |= SEEN_SUBJECT; - return 1; - } - } - return 0; -} - -static char *cleanup_subject(char *subject) -{ - if (keep_subject) - return subject; - for (;;) { - char *p; - int len, remove; - switch (*subject) { - case 'r': case 'R': - if (!memcmp("e:", subject+1, 2)) { - subject +=3; - continue; - } - break; - case ' ': case '\t': case ':': - subject++; - continue; - - case '[': - p = strchr(subject, ']'); - if (!p) { - subject++; - continue; - } - len = strlen(p); - remove = p - subject; - if (remove <= len *2) { - subject = p+1; - continue; - } - break; - } - return subject; - } -} - -static void cleanup_space(char *buf) -{ - unsigned char c; - while ((c = *buf) != 0) { - buf++; - if (isspace(c)) { - buf[-1] = ' '; - c = *buf; - while (isspace(c)) { - int len = strlen(buf); - memmove(buf, buf+1, len); - c = *buf; - } - } - } -} - -typedef int (*header_fn_t)(char *); -struct header_def { - const char *name; - header_fn_t func; - int namelen; -}; - -static void check_header(char *line, int len, struct header_def *header) -{ - int i; - - if (header[0].namelen <= 0) { - for (i = 0; header[i].name; i++) - header[i].namelen = strlen(header[i].name); - } - for (i = 0; header[i].name; i++) { - int len = header[i].namelen; - if (!strncasecmp(line, header[i].name, len) && - line[len] == ':' && isspace(line[len + 1])) { - header[i].func(line + len + 2); - break; - } - } -} - -static void check_subheader_line(char *line, int len) -{ - static struct header_def header[] = { - { "Content-Type", handle_subcontent_type }, - { "Content-Transfer-Encoding", - handle_content_transfer_encoding }, - { NULL }, - }; - check_header(line, len, header); -} -static void check_header_line(char *line, int len) -{ - static struct header_def header[] = { - { "From", handle_from }, - { "Date", handle_date }, - { "Subject", handle_subject }, - { "Content-Type", handle_content_type }, - { "Content-Transfer-Encoding", - handle_content_transfer_encoding }, - { NULL }, - }; - check_header(line, len, header); -} - -static int read_one_header_line(char *line, int sz, FILE *in) -{ - int ofs = 0; - while (ofs < sz) { - int peek, len; - if (fgets(line + ofs, sz - ofs, in) == NULL) - return ofs; - len = eatspace(line + ofs); - if (len == 0) - return ofs; - peek = fgetc(in); ungetc(peek, in); - if (peek == ' ' || peek == '\t') { - /* Yuck, 2822 header "folding" */ - ofs += len; - continue; - } - return ofs + len; - } - return ofs; -} - -static unsigned hexval(int c) -{ - if (c >= '0' && c <= '9') - return c - '0'; - if (c >= 'a' && c <= 'f') - return c - 'a' + 10; - if (c >= 'A' && c <= 'F') - return c - 'A' + 10; - return ~0; -} - -static int decode_q_segment(char *in, char *ot, char *ep) -{ - int c; - while ((c = *in++) != 0 && (in <= ep)) { - if (c == '=') { - int d = *in++; - if (d == '\n' || !d) - break; /* drop trailing newline */ - *ot++ = ((hexval(d) << 4) | hexval(*in++)); - } - else - *ot++ = c; - } - *ot = 0; - return 0; -} - -static int decode_b_segment(char *in, char *ot, char *ep) -{ - /* Decode in..ep, possibly in-place to ot */ - int c, pos = 0, acc = 0; - - while ((c = *in++) != 0 && (in <= ep)) { - if (c == '+') - c = 62; - else if (c == '/') - c = 63; - else if ('A' <= c && c <= 'Z') - c -= 'A'; - else if ('a' <= c && c <= 'z') - c -= 'a' - 26; - else if ('0' <= c && c <= '9') - c -= '0' - 52; - else if (c == '=') { - /* padding is almost like (c == 0), except we do - * not output NUL resulting only from it; - * for now we just trust the data. - */ - c = 0; - } - else - continue; /* garbage */ - switch (pos++) { - case 0: - acc = (c << 2); - break; - case 1: - *ot++ = (acc | (c >> 4)); - acc = (c & 15) << 4; - break; - case 2: - *ot++ = (acc | (c >> 2)); - acc = (c & 3) << 6; - break; - case 3: - *ot++ = (acc | c); - acc = pos = 0; - break; - } - } - *ot = 0; - return 0; -} - -static void convert_to_utf8(char *line, char *charset) -{ - if (*charset) { - char *in, *out; - size_t insize, outsize, nrc; - char outbuf[4096]; /* cheat */ - iconv_t conv = iconv_open("utf-8", charset); - - if (conv == (iconv_t) -1) { - fprintf(stderr, "cannot convert from %s to utf-8\n", - charset); - *charset = 0; - return; - } - in = line; - insize = strlen(in); - out = outbuf; - outsize = sizeof(outbuf); - nrc = iconv(conv, &in, &insize, &out, &outsize); - iconv_close(conv); - if (nrc == (size_t) -1) - return; - *out = 0; - strcpy(line, outbuf); - } -} - -static void decode_header_bq(char *it) -{ - char *in, *out, *ep, *cp, *sp; - char outbuf[1000]; - - in = it; - out = outbuf; - while ((ep = strstr(in, "=?")) != NULL) { - int sz, encoding; - char charset_q[256], piecebuf[256]; - if (in != ep) { - sz = ep - in; - memcpy(out, in, sz); - out += sz; - in += sz; - } - /* E.g. - * ep : "=?iso-2022-jp?B?GyR...?= foo" - * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz" - */ - ep += 2; - cp = strchr(ep, '?'); - if (!cp) - return; /* no munging */ - for (sp = ep; sp < cp; sp++) - charset_q[sp - ep] = tolower(*sp); - charset_q[cp - ep] = 0; - encoding = cp[1]; - if (!encoding || cp[2] != '?') - return; /* no munging */ - ep = strstr(cp + 3, "?="); - if (!ep) - return; /* no munging */ - switch (tolower(encoding)) { - default: - return; /* no munging */ - case 'b': - sz = decode_b_segment(cp + 3, piecebuf, ep); - break; - case 'q': - sz = decode_q_segment(cp + 3, piecebuf, ep); - break; - } - if (sz < 0) - return; - if (metainfo_utf8) - convert_to_utf8(piecebuf, charset_q); - strcpy(out, piecebuf); - out += strlen(out); - in = ep + 2; - } - strcpy(out, in); - strcpy(it, outbuf); -} - -static void decode_transfer_encoding(char *line) -{ - char *ep; - - switch (transfer_encoding) { - case TE_QP: - ep = line + strlen(line); - decode_q_segment(line, line, ep); - break; - case TE_BASE64: - ep = line + strlen(line); - decode_b_segment(line, line, ep); - break; - case TE_DONTCARE: - break; - } -} - -static void handle_info(void) -{ - char *sub; - static int done_info = 0; - - if (done_info) - return; - - done_info = 1; - sub = cleanup_subject(subject); - cleanup_space(name); - cleanup_space(date); - cleanup_space(email); - cleanup_space(sub); - - /* Unwrap inline B and Q encoding, and optionally - * normalize the meta information to utf8. - */ - decode_header_bq(name); - decode_header_bq(date); - decode_header_bq(email); - decode_header_bq(sub); - printf("Author: %s\nEmail: %s\nSubject: %s\nDate: %s\n\n", - name, email, sub, date); -} - -/* We are inside message body and have read line[] already. - * Spit out the commit log. - */ -static int handle_commit_msg(void) -{ - if (!cmitmsg) - return 0; - do { - if (!memcmp("diff -", line, 6) || - !memcmp("---", line, 3) || - !memcmp("Index: ", line, 7)) - break; - if ((multipart_boundary[0] && is_multipart_boundary(line))) { - /* We come here when the first part had only - * the commit message without any patch. We - * pretend we have not seen this line yet, and - * go back to the loop. - */ - return 1; - } - - /* Unwrap transfer encoding and optionally - * normalize the log message to UTF-8. - */ - decode_transfer_encoding(line); - if (metainfo_utf8) - convert_to_utf8(line, charset); - fputs(line, cmitmsg); - } while (fgets(line, sizeof(line), stdin) != NULL); - fclose(cmitmsg); - cmitmsg = NULL; - return 0; -} - -/* We have done the commit message and have the first - * line of the patch in line[]. - */ -static void handle_patch(void) -{ - do { - if (multipart_boundary[0] && is_multipart_boundary(line)) - break; - /* Only unwrap transfer encoding but otherwise do not - * do anything. We do *NOT* want UTF-8 conversion - * here; we are dealing with the user payload. - */ - decode_transfer_encoding(line); - fputs(line, patchfile); - patch_lines++; - } while (fgets(line, sizeof(line), stdin) != NULL); -} - -/* multipart boundary and transfer encoding are set up for us, and we - * are at the end of the sub header. do equivalent of handle_body up - * to the next boundary without closing patchfile --- we will expect - * that the first part to contain commit message and a patch, and - * handle other parts as pure patches. - */ -static int handle_multipart_one_part(void) -{ - int seen = 0; - int n = 0; - int len; - - while (fgets(line, sizeof(line), stdin) != NULL) { - again: - len = eatspace(line); - n++; - if (!len) - continue; - if (is_multipart_boundary(line)) - break; - if (0 <= seen && handle_inbody_header(&seen, line)) - continue; - seen = -1; /* no more inbody headers */ - line[len] = '\n'; - handle_info(); - if (handle_commit_msg()) - goto again; - handle_patch(); - break; - } - if (n == 0) - return -1; - return 0; -} - -static void handle_multipart_body(void) -{ - int part_num = 0; - - /* Skip up to the first boundary */ - while (fgets(line, sizeof(line), stdin) != NULL) - if (is_multipart_boundary(line)) { - part_num = 1; - break; - } - if (!part_num) - return; - /* We are on boundary line. Start slurping the subhead. */ - while (1) { - int len = read_one_header_line(line, sizeof(line), stdin); - if (!len) { - if (handle_multipart_one_part() < 0) - return; - } - else - check_subheader_line(line, len); - } - fclose(patchfile); - if (!patch_lines) { - fprintf(stderr, "No patch found\n"); - exit(1); - } -} - -/* Non multipart message */ -static void handle_body(void) -{ - int seen = 0; - - while (fgets(line, sizeof(line), stdin) != NULL) { - int len = eatspace(line); - if (!len) - continue; - if (0 <= seen && handle_inbody_header(&seen, line)) - continue; - seen = -1; /* no more inbody headers */ - line[len] = '\n'; - handle_info(); - handle_commit_msg(); - handle_patch(); - break; - } - fclose(patchfile); - if (!patch_lines) { - fprintf(stderr, "No patch found\n"); - exit(1); - } -} - -static const char mailinfo_usage[] = - "git-mailinfo [-k] [-u] msg patch info"; - -static void usage(void) { - fprintf(stderr, "%s\n", mailinfo_usage); - exit(1); -} - -int main(int argc, char **argv) -{ - while (1 < argc && argv[1][0] == '-') { - if (!strcmp(argv[1], "-k")) - keep_subject = 1; - else if (!strcmp(argv[1], "-u")) - metainfo_utf8 = 1; - else - usage(); - argc--; argv++; - } - - if (argc != 3) - usage(); - cmitmsg = fopen(argv[1], "w"); - if (!cmitmsg) { - perror(argv[1]); - exit(1); - } - patchfile = fopen(argv[2], "w"); - if (!patchfile) { - perror(argv[2]); - exit(1); - } - while (1) { - int len = read_one_header_line(line, sizeof(line), stdin); - if (!len) { - if (multipart_boundary[0]) - handle_multipart_body(); - else - handle_body(); - break; - } - check_header_line(line, len); - } - return 0; -} diff --git a/tools/mailsplit.c b/tools/mailsplit.c deleted file mode 100644 index a3238c2..0000000 --- a/tools/mailsplit.c +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Totally braindamaged mbox splitter program. - * - * It just splits a mbox into a list of files: "0001" "0002" .. - * so you can process them further from there. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static int usage(void) -{ - fprintf(stderr, "mailsplit \n"); - exit(1); -} - -static int linelen(const char *map, unsigned long size) -{ - int len = 0, c; - - do { - c = *map; - map++; - size--; - len++; - } while (size && c != '\n'); - return len; -} - -static int is_from_line(const char *line, int len) -{ - const char *colon; - - if (len < 20 || memcmp("From ", line, 5)) - return 0; - - colon = line + len - 2; - line += 5; - for (;;) { - if (colon < line) - return 0; - if (*--colon == ':') - break; - } - - if (!isdigit(colon[-4]) || - !isdigit(colon[-2]) || - !isdigit(colon[-1]) || - !isdigit(colon[ 1]) || - !isdigit(colon[ 2])) - return 0; - - /* year */ - if (strtol(colon+3, NULL, 10) <= 90) - return 0; - - /* Ok, close enough */ - return 1; -} - -static int parse_email(const void *map, unsigned long size) -{ - unsigned long offset; - - if (size < 6 || memcmp("From ", map, 5)) - goto corrupt; - - /* Make sure we don't trigger on this first line */ - map++; size--; offset=1; - - /* - * Search for a line beginning with "From ", and - * having something that looks like a date format. - */ - do { - int len = linelen(map, size); - if (is_from_line(map, len)) - return offset; - map += len; - size -= len; - offset += len; - } while (size); - return offset; - -corrupt: - fprintf(stderr, "corrupt mailbox\n"); - exit(1); -} - -int main(int argc, char **argv) -{ - int fd, nr; - struct stat st; - unsigned long size; - void *map; - - if (argc != 3) - usage(); - fd = open(argv[1], O_RDONLY); - if (fd < 0) { - perror(argv[1]); - exit(1); - } - if (chdir(argv[2]) < 0) - usage(); - if (fstat(fd, &st) < 0) { - perror("stat"); - exit(1); - } - size = st.st_size; - map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); - if (map == MAP_FAILED) { - perror("mmap"); - close(fd); - exit(1); - } - close(fd); - nr = 0; - do { - char name[10]; - unsigned long len = parse_email(map, size); - assert(len <= size); - sprintf(name, "%04d", ++nr); - fd = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600); - if (fd < 0) { - perror(name); - exit(1); - } - if (write(fd, map, len) != len) { - perror("write"); - exit(1); - } - close(fd); - map += len; - size -= len; - } while (size > 0); - return 0; -} -- cgit v0.10.2-6-g49f6