From c5f7674a97e621bfab5544165098b4860ee6e247 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 16 Jul 2005 10:05:26 -0700 Subject: Prepare git-tools for merging into the main git archive Rename into a "tools" subdirectory, and change name of "dotest" to "applymbox". Remove stripspace (which was already copied into git) and cvs2git (which was likewise already copied into git, and then replaced by a much better perl version). All of this was brought on by Ryan Anderson shaming me into it. Thanks. I guess. diff --git a/Makefile b/Makefile deleted file mode 100644 index 9998682..0000000 --- a/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -CC=gcc -CFLAGS=-Wall -O2 -HOME=$(shell echo $$HOME) - -PROGRAMS=mailsplit mailinfo stripspace cvs2git -SCRIPTS=dotest applypatch - -all: $(PROGRAMS) - -install: $(PROGRAMS) $(SCRIPTS) - cp -f $(PROGRAMS) $(SCRIPTS) $(HOME)/bin/ - -clean: - rm -f $(PROGRAMS) *.o diff --git a/applypatch b/applypatch deleted file mode 100755 index 5a3a44b..0000000 --- a/applypatch +++ /dev/null @@ -1,64 +0,0 @@ -#!/bin/sh -## -## applypatch takes four file arguments, and uses those to -## apply the unpacked patch (surprise surprise) that they -## represent to the current tree. -## -## The arguments are: -## $1 - file with commit message -## $2 - file with the actual patch -## $3 - "info" file with Author, email and subject -## $4 - optional file containing signoff to add -## -signoff="$4" -final=.dotest/final-commit -## -## If this file exists, we ask before applying -## -query_apply=.dotest/.query_apply -MSGFILE=$1 -PATCHFILE=$2 -INFO=$3 -EDIT=${VISUAL:-$EDITOR} -EDIT=${EDIT:-vi} - -export GIT_AUTHOR_NAME="$(sed -n '/^Author/ s/Author: //p' .dotest/info)" -export GIT_AUTHOR_EMAIL="$(sed -n '/^Email/ s/Email: //p' .dotest/info)" -export GIT_AUTHOR_DATE="$(sed -n '/^Date/ s/Date: //p' .dotest/info)" -export SUBJECT="$(sed -n '/^Subject/ s/Subject: //p' .dotest/info)" - -if [ -n "$signoff" -a -f "$signoff" ]; then - cat $signoff >> $MSGFILE -fi - -(echo "[PATCH] $SUBJECT" ; if [ -s $MSGFILE ]; then echo ; cat $MSGFILE; fi ) > $final - -f=0 -[ -f $query_apply ] || f=1 - -while [ $f -eq 0 ]; do - echo "Commit Body is:" - echo "--------------------------" - cat $final - echo "--------------------------" - echo -n "Apply? [y]es/[n]o/[e]dit/[a]ccept all " - read reply - case $reply in - y|Y) f=1;; - n|N) exit 2;; # special value to tell dotest to keep going - e|E) $EDIT $final;; - a|A) rm -f $query_apply - f=1;; - esac -done - -echo -echo Applying "'$SUBJECT'" -echo - -git-apply --index $PATCHFILE || exit 1 -tree=$(git-write-tree) || exit 1 -echo Wrote tree $tree -commit=$(git-commit-tree $tree -p $(cat .git/HEAD) < $final) || exit 1 -echo Committed: $commit -echo $commit > .git/HEAD diff --git a/cvs2git.c b/cvs2git.c deleted file mode 100644 index 06dd74b..0000000 --- a/cvs2git.c +++ /dev/null @@ -1,307 +0,0 @@ -/* - * cvs2git - * - * Copyright (C) Linus Torvalds 2005 - */ - -#include -#include -#include -#include -#include - -static int verbose = 0; - -/* - * This is a really stupid program that takes cvsps output, and - * generates a a long _shell_script_ that will create the GIT archive - * from it. - * - * You've been warned. I told you it was stupid. - * - * NOTE NOTE NOTE! In order to do branches correctly, this needs - * the fixed cvsps that has the "Ancestor branch" tag output. - * Hopefully David Mansfield will update his distribution soon - * enough (he's the one who wrote the patch, so at least we don't - * have to figt maintainer issues ;) - * - * Usage: - * - * TZ=UTC cvsps -A | - * cvs2git --cvsroot=[root] --module=[module] > script - * - * Creates a shell script that will generate the .git archive of - * the names CVS repository. - * - * IMPORTANT NOTE ABOUT "cvsps"! This requires version 2.1 or better, - * and the "TZ=UTC" and the "-A" flag is required for sane results! - */ -enum state { - Header, - Log, - Members -}; - -static const char *cvsroot; -static const char *cvsmodule; - -static char date[100]; -static char author[100]; -static char branch[100]; -static char ancestor[100]; -static char tag[100]; -static char log[32768]; -static int loglen = 0; -static int initial_commit = 1; - -static void lookup_author(char *n, char **name, char **email) -{ - /* - * FIXME!!! I'm lazy and stupid. - * - * This could be something like - * - * printf("lookup_author '%s'\n", n); - * *name = "$author_name"; - * *email = "$author_email"; - * - * and that would allow the script to do its own - * lookups at run-time. - */ - *name = n; - *email = n; -} - -static void prepare_commit(void) -{ - char *author_name, *author_email; - char *src_branch; - - lookup_author(author, &author_name, &author_email); - - printf("export GIT_COMMITTER_NAME=%s\n", author_name); - printf("export GIT_COMMITTER_EMAIL=%s\n", author_email); - printf("export GIT_COMMITTER_DATE='+0000 %s'\n", date); - - printf("export GIT_AUTHOR_NAME=%s\n", author_name); - printf("export GIT_AUTHOR_EMAIL=%s\n", author_email); - printf("export GIT_AUTHOR_DATE='+0000 %s'\n", date); - - if (initial_commit) - return; - - src_branch = *ancestor ? ancestor : branch; - if (!strcmp(src_branch, "HEAD")) - src_branch = "master"; - printf("ln -sf refs/heads/'%s' .git/HEAD\n", src_branch); - - /* - * Even if cvsps claims an ancestor, we'll let the new - * branch name take precedence if it already exists - */ - if (*ancestor) { - src_branch = branch; - if (!strcmp(src_branch, "HEAD")) - src_branch = "master"; - printf("[ -e .git/refs/heads/'%s' ] && ln -sf refs/heads/'%s' .git/HEAD\n", - src_branch, src_branch); - } - - printf("git-read-tree -m HEAD || exit 1\n"); - printf("git-checkout-cache -f -u -a\n"); -} - -static void commit(void) -{ - const char *cmit_parent = initial_commit ? "" : "-p HEAD"; - const char *dst_branch; - int i; - - printf("tree=$(git-write-tree)\n"); - printf("cat > .cmitmsg < .git/refs/heads/'%s'\n", dst_branch); - - printf("echo 'Committed (to %s):' ; cat .cmitmsg; echo\n", dst_branch); - - *date = 0; - *author = 0; - *branch = 0; - *ancestor = 0; - *tag = 0; - loglen = 0; - - initial_commit = 0; -} - -static void update_file(char *line) -{ - char *name, *version; - char *dir; - - while (isspace(*line)) - line++; - name = line; - line = strchr(line, ':'); - if (!line) - return; - *line++ = 0; - line = strchr(line, '>'); - if (!line) - return; - *line++ = 0; - version = line; - line = strchr(line, '('); - if (line) { /* "(DEAD)" */ - printf("git-update-cache --force-remove '%s'\n", name); - return; - } - - dir = strrchr(name, '/'); - if (dir) - printf("mkdir -p %.*s\n", (int)(dir - name), name); - - printf("cvs -q -d %s checkout -r%s -p '%s/%s' > '%s'\n", cvsroot, version, cvsmodule, name, name); - printf("git-update-cache --add -- '%s'\n", name); -} - -struct hdrentry { - const char *name; - char *dest; -} hdrs[] = { - { "Date:", date }, - { "Author:", author }, - { "Branch:", branch }, - { "Ancestor branch:", ancestor }, - { "Tag:", tag }, - { "Log:", NULL }, - { NULL, NULL } -}; - -int main(int argc, char **argv) -{ - static char line[1000]; - enum state state = Header; - int i; - - for (i = 1; i < argc; i++) { - const char *arg = argv[i]; - if (!memcmp(arg, "--cvsroot=", 10)) { - cvsroot = arg + 10; - continue; - } - if (!memcmp(arg, "--module=", 9)) { - cvsmodule = arg+9; - continue; - } - if (!strcmp(arg, "-v")) { - verbose = 1; - continue; - } - } - - - if (!cvsroot) - cvsroot = getenv("CVSROOT"); - - if (!cvsmodule || !cvsroot) { - fprintf(stderr, "I need a CVSROOT and module name\n"); - exit(1); - } - - printf("[ -d .git ] && exit 1\n"); - printf("git-init-db\n"); - printf("mkdir -p .git/refs/heads\n"); - printf("mkdir -p .git/refs/tags\n"); - printf("ln -sf refs/heads/master .git/HEAD\n"); - - while (fgets(line, sizeof(line), stdin) != NULL) { - int linelen = strlen(line); - - while (linelen && isspace(line[linelen-1])) - line[--linelen] = 0; - - switch (state) { - struct hdrentry *entry; - - case Header: - if (verbose) - printf("# H: %s\n", line); - for (entry = hdrs ; entry->name ; entry++) { - int len = strlen(entry->name); - char *val; - - if (memcmp(entry->name, line, len)) - continue; - if (!entry->dest) { - state = Log; - break; - } - val = line + len; - linelen -= len; - while (isspace(*val)) { - val++; - linelen--; - } - memcpy(entry->dest, val, linelen+1); - break; - } - continue; - - case Log: - if (verbose) - printf("# L: %s\n", line); - if (!strcmp(line, "Members:")) { - while (loglen && isspace(log[loglen-1])) - log[--loglen] = 0; - prepare_commit(); - state = Members; - continue; - } - - if (loglen + linelen + 5 > sizeof(log)) - continue; - memcpy(log + loglen, line, linelen); - loglen += linelen; - log[loglen++] = '\n'; - continue; - - case Members: - if (verbose) - printf("# M: %s\n", line); - if (!linelen) { - commit(); - state = Header; - continue; - } - update_file(line); - continue; - } - } - return 0; -} diff --git a/dotest b/dotest deleted file mode 100755 index c6f44e1c..0000000 --- a/dotest +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/sh -## -## "dotest" is my stupid name for my patch-application script, which -## I never got around to renaming after I tested it. We're now on the -## second generation of scripts, still called "dotest". -## -## You give it a mbox-format collection of emails, and it will try to -## apply them to the kernel using "applypatch" -## -## dotest [ -q ] mail_archive [Signoff_file] -## -rm -rf .dotest -mkdir .dotest -case $1 in - - -q) touch .dotest/.query_apply - shift;; -esac -mailsplit $1 .dotest || exit 1 -for i in .dotest/* -do - mailinfo .dotest/msg .dotest/patch < $i > .dotest/info || exit 1 - stripspace < .dotest/msg > .dotest/msg-clean - applypatch .dotest/msg-clean .dotest/patch .dotest/info "$2" - ret=$? - if [ $ret -ne 0 ]; then - # 2 is a special exit code from applypatch to indicate that - # the patch wasn't applied, but continue anyway - [ $ret -ne 2 ] && exit $ret - fi -done -# return to pristine -rm -fr .dotest diff --git a/mailinfo.c b/mailinfo.c deleted file mode 100644 index ae279bf..0000000 --- a/mailinfo.c +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Another stupid program, this one parsing the headers of an - * email to figure out authorship and subject - */ -#include -#include -#include -#include - -static FILE *cmitmsg, *patchfile; - -static char line[1000]; -static char date[1000]; -static char name[1000]; -static char email[1000]; -static char subject[1000]; - -static char *sanity_check(char *name, char *email) -{ - int len = strlen(name); - if (len < 3 || len > 60) - return email; - if (strchr(name, '@') || strchr(name, '<') || strchr(name, '>')) - return email; - return name; -} - -static int handle_from(char *line) -{ - char *at = strchr(line, '@'); - char *dst; - - if (!at) - return 0; - - /* - * If we already have one email, don't take any confusing lines - */ - if (*email && strchr(at+1, '@')) - return 0; - - while (at > line) { - char c = at[-1]; - if (isspace(c) || c == '<') - break; - at--; - } - dst = email; - for (;;) { - unsigned char c = *at; - if (!c || c == '>' || isspace(c)) - break; - *at++ = ' '; - *dst++ = c; - } - *dst++ = 0; - - at = line + strlen(line); - while (at > line) { - unsigned char c = *--at; - if (isalnum(c)) - break; - *at = 0; - } - - at = line; - for (;;) { - unsigned char c = *at; - if (!c) - break; - if (isalnum(c)) - break; - at++; - } - - at = sanity_check(at, email); - - strcpy(name, at); - return 1; -} - -static void handle_date(char *line) -{ - strcpy(date, line); -} - -static void handle_subject(char *line) -{ - strcpy(subject, line); -} - -static void add_subject_line(char *line) -{ - while (isspace(*line)) - line++; - *--line = ' '; - strcat(subject, line); -} - -static void check_line(char *line, int len) -{ - static int cont = -1; - if (!memcmp(line, "From:", 5) && isspace(line[5])) { - handle_from(line+6); - cont = 0; - return; - } - if (!memcmp(line, "Date:", 5) && isspace(line[5])) { - handle_date(line+6); - cont = 0; - return; - } - if (!memcmp(line, "Subject:", 8) && isspace(line[8])) { - handle_subject(line+9); - cont = 1; - return; - } - if (isspace(*line)) { - switch (cont) { - case 0: - fprintf(stderr, "I don't do 'Date:' or 'From:' line continuations\n"); - break; - case 1: - add_subject_line(line); - return; - default: - break; - } - } - cont = -1; -} - -static char * cleanup_subject(char *subject) -{ - for (;;) { - char *p; - int len, remove; - switch (*subject) { - case 'r': case 'R': - if (!memcmp("e:", subject+1, 2)) { - subject +=3; - continue; - } - break; - case ' ': case '\t': case ':': - subject++; - continue; - - case '[': - p = strchr(subject, ']'); - if (!p) { - subject++; - continue; - } - len = strlen(p); - remove = p - subject; - if (remove <= len *2) { - subject = p+1; - continue; - } - break; - } - return subject; - } -} - -static void cleanup_space(char *buf) -{ - unsigned char c; - while ((c = *buf) != 0) { - buf++; - if (isspace(c)) { - buf[-1] = ' '; - c = *buf; - while (isspace(c)) { - int len = strlen(buf); - memmove(buf, buf+1, len); - c = *buf; - } - } - } -} - -static void handle_rest(void) -{ - char *sub = cleanup_subject(subject); - cleanup_space(name); - cleanup_space(date); - cleanup_space(email); - cleanup_space(sub); - printf("Author: %s\nEmail: %s\nSubject: %s\nDate: %s\n\n", name, email, sub, date); - FILE *out = cmitmsg; - - do { - if (!memcmp("diff -", line, 6) || - !memcmp("---", line, 3) || - !memcmp("Index: ", line, 7)) - out = patchfile; - - fputs(line, out); - } while (fgets(line, sizeof(line), stdin) != NULL); - - if (out == cmitmsg) { - fprintf(stderr, "No patch found\n"); - exit(1); - } - - fclose(cmitmsg); - fclose(patchfile); -} - -static int eatspace(char *line) -{ - int len = strlen(line); - while (len > 0 && isspace(line[len-1])) - line[--len] = 0; - return len; -} - -static void handle_body(void) -{ - int has_from = 0; - - /* First line of body can be a From: */ - while (fgets(line, sizeof(line), stdin) != NULL) { - int len = eatspace(line); - if (!len) - continue; - if (!memcmp("From:", line, 5) && isspace(line[5])) { - if (!has_from && handle_from(line+6)) { - has_from = 1; - continue; - } - } - line[len] = '\n'; - handle_rest(); - break; - } -} - -static void usage(void) -{ - fprintf(stderr, "mailinfo msg-file path-file < email\n"); - exit(1); -} - -int main(int argc, char ** argv) -{ - if (argc != 3) - usage(); - cmitmsg = fopen(argv[1], "w"); - if (!cmitmsg) { - perror(argv[1]); - exit(1); - } - patchfile = fopen(argv[2], "w"); - if (!patchfile) { - perror(argv[2]); - exit(1); - } - while (fgets(line, sizeof(line), stdin) != NULL) { - int len = eatspace(line); - if (!len) { - handle_body(); - break; - } - check_line(line, len); - } - return 0; -} diff --git a/mailsplit.c b/mailsplit.c deleted file mode 100644 index 9379fbc..0000000 --- a/mailsplit.c +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Totally braindamaged mbox splitter program. - * - * It just splits a mbox into a list of files: "0001" "0002" .. - * so you can process them further from there. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static int usage(void) -{ - fprintf(stderr, "mailsplit \n"); - exit(1); -} - -static int linelen(const char *map, unsigned long size) -{ - int len = 0, c; - - do { - c = *map; - map++; - size--; - len++; - } while (size && c != '\n'); - return len; -} - -static int is_from_line(const char *line, int len) -{ - const char *colon; - - if (len < 20 || memcmp("From ", line, 5)) - return 0; - - colon = line + len - 2; - line += 5; - for (;;) { - if (colon < line) - return 0; - if (*--colon == ':') - break; - } - - if (!isdigit(colon[-4]) || - !isdigit(colon[-2]) || - !isdigit(colon[-1]) || - !isdigit(colon[ 1]) || - !isdigit(colon[ 2])) - return 0; - - /* year */ - if (strtol(colon+3, NULL, 10) <= 90) - return 0; - - /* Ok, close enough */ - return 1; -} - -static int parse_email(const void *map, unsigned long size) -{ - unsigned long offset; - - if (size < 6 || memcmp("From ", map, 5)) - goto corrupt; - - /* Make sure we don't trigger on this first line */ - map++; size--; offset=1; - - /* - * Search for a line beginning with "From ", and - * having smething that looks like a date format. - */ - do { - int len = linelen(map, size); - if (is_from_line(map, len)) - return offset; - map += len; - size -= len; - offset += len; - } while (size); - return offset; - -corrupt: - fprintf(stderr, "corrupt mailbox\n"); - exit(1); -} - -int main(int argc, char **argv) -{ - int fd, nr; - struct stat st; - unsigned long size; - void *map; - - if (argc != 3) - usage(); - fd = open(argv[1], O_RDONLY); - if (fd < 0) { - perror(argv[1]); - exit(1); - } - if (chdir(argv[2]) < 0) - usage(); - if (fstat(fd, &st) < 0) { - perror("stat"); - exit(1); - } - size = st.st_size; - map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); - if (-1 == (int)(long)map) { - perror("mmap"); - exit(1); - } - close(fd); - nr = 0; - do { - char name[10]; - unsigned long len = parse_email(map, size); - assert(len <= size); - sprintf(name, "%04d", ++nr); - fd = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600); - if (fd < 0) { - perror(name); - exit(1); - } - if (write(fd, map, len) != len) { - perror("write"); - exit(1); - } - close(fd); - map += len; - size -= len; - } while (size > 0); - return 0; -} diff --git a/stripspace.c b/stripspace.c deleted file mode 100644 index 96cd0a8..0000000 --- a/stripspace.c +++ /dev/null @@ -1,48 +0,0 @@ -#include -#include -#include - -/* - * Remove empty lines from the beginning and end. - * - * Turn multiple consecutive empty lines into just one - * empty line. - */ -static void cleanup(char *line) -{ - int len = strlen(line); - - if (len > 1 && line[len-1] == '\n') { - do { - unsigned char c = line[len-2]; - if (!isspace(c)) - break; - line[len-2] = '\n'; - len--; - line[len] = 0; - } while (len > 1); - } -} - -int main(int argc, char **argv) -{ - int empties = -1; - char line[1024]; - - while (fgets(line, sizeof(line), stdin)) { - cleanup(line); - - /* Not just an empty line? */ - if (line[0] != '\n') { - if (empties > 0) - putchar('\n'); - empties = 0; - fputs(line, stdout); - continue; - } - if (empties < 0) - continue; - empties++; - } - return 0; -} diff --git a/tools/Makefile b/tools/Makefile new file mode 100644 index 0000000..8e7252e --- /dev/null +++ b/tools/Makefile @@ -0,0 +1,14 @@ +CC=gcc +CFLAGS=-Wall -O2 +HOME=$(shell echo $$HOME) + +PROGRAMS=mailsplit mailinfo +SCRIPTS=applymbox applypatch + +all: $(PROGRAMS) + +install: $(PROGRAMS) $(SCRIPTS) + cp -f $(PROGRAMS) $(SCRIPTS) $(HOME)/bin/ + +clean: + rm -f $(PROGRAMS) *.o diff --git a/tools/applymbox b/tools/applymbox new file mode 100755 index 0000000..5ac8d2b --- /dev/null +++ b/tools/applymbox @@ -0,0 +1,35 @@ +#!/bin/sh +## +## "dotest" is my stupid name for my patch-application script, which +## I never got around to renaming after I tested it. We're now on the +## second generation of scripts, still called "dotest". +## +## Update: Ryan Anderson finally shamed me into naming this "applymbox". +## +## You give it a mbox-format collection of emails, and it will try to +## apply them to the kernel using "applypatch" +## +## dotest [ -q ] mail_archive [Signoff_file] +## +rm -rf .dotest +mkdir .dotest +case $1 in + + -q) touch .dotest/.query_apply + shift;; +esac +mailsplit $1 .dotest || exit 1 +for i in .dotest/* +do + mailinfo .dotest/msg .dotest/patch < $i > .dotest/info || exit 1 + git-stripspace < .dotest/msg > .dotest/msg-clean + applypatch .dotest/msg-clean .dotest/patch .dotest/info "$2" + ret=$? + if [ $ret -ne 0 ]; then + # 2 is a special exit code from applypatch to indicate that + # the patch wasn't applied, but continue anyway + [ $ret -ne 2 ] && exit $ret + fi +done +# return to pristine +rm -fr .dotest diff --git a/tools/applypatch b/tools/applypatch new file mode 100755 index 0000000..5a3a44b --- /dev/null +++ b/tools/applypatch @@ -0,0 +1,64 @@ +#!/bin/sh +## +## applypatch takes four file arguments, and uses those to +## apply the unpacked patch (surprise surprise) that they +## represent to the current tree. +## +## The arguments are: +## $1 - file with commit message +## $2 - file with the actual patch +## $3 - "info" file with Author, email and subject +## $4 - optional file containing signoff to add +## +signoff="$4" +final=.dotest/final-commit +## +## If this file exists, we ask before applying +## +query_apply=.dotest/.query_apply +MSGFILE=$1 +PATCHFILE=$2 +INFO=$3 +EDIT=${VISUAL:-$EDITOR} +EDIT=${EDIT:-vi} + +export GIT_AUTHOR_NAME="$(sed -n '/^Author/ s/Author: //p' .dotest/info)" +export GIT_AUTHOR_EMAIL="$(sed -n '/^Email/ s/Email: //p' .dotest/info)" +export GIT_AUTHOR_DATE="$(sed -n '/^Date/ s/Date: //p' .dotest/info)" +export SUBJECT="$(sed -n '/^Subject/ s/Subject: //p' .dotest/info)" + +if [ -n "$signoff" -a -f "$signoff" ]; then + cat $signoff >> $MSGFILE +fi + +(echo "[PATCH] $SUBJECT" ; if [ -s $MSGFILE ]; then echo ; cat $MSGFILE; fi ) > $final + +f=0 +[ -f $query_apply ] || f=1 + +while [ $f -eq 0 ]; do + echo "Commit Body is:" + echo "--------------------------" + cat $final + echo "--------------------------" + echo -n "Apply? [y]es/[n]o/[e]dit/[a]ccept all " + read reply + case $reply in + y|Y) f=1;; + n|N) exit 2;; # special value to tell dotest to keep going + e|E) $EDIT $final;; + a|A) rm -f $query_apply + f=1;; + esac +done + +echo +echo Applying "'$SUBJECT'" +echo + +git-apply --index $PATCHFILE || exit 1 +tree=$(git-write-tree) || exit 1 +echo Wrote tree $tree +commit=$(git-commit-tree $tree -p $(cat .git/HEAD) < $final) || exit 1 +echo Committed: $commit +echo $commit > .git/HEAD diff --git a/tools/mailinfo.c b/tools/mailinfo.c new file mode 100644 index 0000000..ae279bf --- /dev/null +++ b/tools/mailinfo.c @@ -0,0 +1,270 @@ +/* + * Another stupid program, this one parsing the headers of an + * email to figure out authorship and subject + */ +#include +#include +#include +#include + +static FILE *cmitmsg, *patchfile; + +static char line[1000]; +static char date[1000]; +static char name[1000]; +static char email[1000]; +static char subject[1000]; + +static char *sanity_check(char *name, char *email) +{ + int len = strlen(name); + if (len < 3 || len > 60) + return email; + if (strchr(name, '@') || strchr(name, '<') || strchr(name, '>')) + return email; + return name; +} + +static int handle_from(char *line) +{ + char *at = strchr(line, '@'); + char *dst; + + if (!at) + return 0; + + /* + * If we already have one email, don't take any confusing lines + */ + if (*email && strchr(at+1, '@')) + return 0; + + while (at > line) { + char c = at[-1]; + if (isspace(c) || c == '<') + break; + at--; + } + dst = email; + for (;;) { + unsigned char c = *at; + if (!c || c == '>' || isspace(c)) + break; + *at++ = ' '; + *dst++ = c; + } + *dst++ = 0; + + at = line + strlen(line); + while (at > line) { + unsigned char c = *--at; + if (isalnum(c)) + break; + *at = 0; + } + + at = line; + for (;;) { + unsigned char c = *at; + if (!c) + break; + if (isalnum(c)) + break; + at++; + } + + at = sanity_check(at, email); + + strcpy(name, at); + return 1; +} + +static void handle_date(char *line) +{ + strcpy(date, line); +} + +static void handle_subject(char *line) +{ + strcpy(subject, line); +} + +static void add_subject_line(char *line) +{ + while (isspace(*line)) + line++; + *--line = ' '; + strcat(subject, line); +} + +static void check_line(char *line, int len) +{ + static int cont = -1; + if (!memcmp(line, "From:", 5) && isspace(line[5])) { + handle_from(line+6); + cont = 0; + return; + } + if (!memcmp(line, "Date:", 5) && isspace(line[5])) { + handle_date(line+6); + cont = 0; + return; + } + if (!memcmp(line, "Subject:", 8) && isspace(line[8])) { + handle_subject(line+9); + cont = 1; + return; + } + if (isspace(*line)) { + switch (cont) { + case 0: + fprintf(stderr, "I don't do 'Date:' or 'From:' line continuations\n"); + break; + case 1: + add_subject_line(line); + return; + default: + break; + } + } + cont = -1; +} + +static char * cleanup_subject(char *subject) +{ + for (;;) { + char *p; + int len, remove; + switch (*subject) { + case 'r': case 'R': + if (!memcmp("e:", subject+1, 2)) { + subject +=3; + continue; + } + break; + case ' ': case '\t': case ':': + subject++; + continue; + + case '[': + p = strchr(subject, ']'); + if (!p) { + subject++; + continue; + } + len = strlen(p); + remove = p - subject; + if (remove <= len *2) { + subject = p+1; + continue; + } + break; + } + return subject; + } +} + +static void cleanup_space(char *buf) +{ + unsigned char c; + while ((c = *buf) != 0) { + buf++; + if (isspace(c)) { + buf[-1] = ' '; + c = *buf; + while (isspace(c)) { + int len = strlen(buf); + memmove(buf, buf+1, len); + c = *buf; + } + } + } +} + +static void handle_rest(void) +{ + char *sub = cleanup_subject(subject); + cleanup_space(name); + cleanup_space(date); + cleanup_space(email); + cleanup_space(sub); + printf("Author: %s\nEmail: %s\nSubject: %s\nDate: %s\n\n", name, email, sub, date); + FILE *out = cmitmsg; + + do { + if (!memcmp("diff -", line, 6) || + !memcmp("---", line, 3) || + !memcmp("Index: ", line, 7)) + out = patchfile; + + fputs(line, out); + } while (fgets(line, sizeof(line), stdin) != NULL); + + if (out == cmitmsg) { + fprintf(stderr, "No patch found\n"); + exit(1); + } + + fclose(cmitmsg); + fclose(patchfile); +} + +static int eatspace(char *line) +{ + int len = strlen(line); + while (len > 0 && isspace(line[len-1])) + line[--len] = 0; + return len; +} + +static void handle_body(void) +{ + int has_from = 0; + + /* First line of body can be a From: */ + while (fgets(line, sizeof(line), stdin) != NULL) { + int len = eatspace(line); + if (!len) + continue; + if (!memcmp("From:", line, 5) && isspace(line[5])) { + if (!has_from && handle_from(line+6)) { + has_from = 1; + continue; + } + } + line[len] = '\n'; + handle_rest(); + break; + } +} + +static void usage(void) +{ + fprintf(stderr, "mailinfo msg-file path-file < email\n"); + exit(1); +} + +int main(int argc, char ** argv) +{ + if (argc != 3) + usage(); + cmitmsg = fopen(argv[1], "w"); + if (!cmitmsg) { + perror(argv[1]); + exit(1); + } + patchfile = fopen(argv[2], "w"); + if (!patchfile) { + perror(argv[2]); + exit(1); + } + while (fgets(line, sizeof(line), stdin) != NULL) { + int len = eatspace(line); + if (!len) { + handle_body(); + break; + } + check_line(line, len); + } + return 0; +} diff --git a/tools/mailsplit.c b/tools/mailsplit.c new file mode 100644 index 0000000..9379fbc --- /dev/null +++ b/tools/mailsplit.c @@ -0,0 +1,144 @@ +/* + * Totally braindamaged mbox splitter program. + * + * It just splits a mbox into a list of files: "0001" "0002" .. + * so you can process them further from there. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int usage(void) +{ + fprintf(stderr, "mailsplit \n"); + exit(1); +} + +static int linelen(const char *map, unsigned long size) +{ + int len = 0, c; + + do { + c = *map; + map++; + size--; + len++; + } while (size && c != '\n'); + return len; +} + +static int is_from_line(const char *line, int len) +{ + const char *colon; + + if (len < 20 || memcmp("From ", line, 5)) + return 0; + + colon = line + len - 2; + line += 5; + for (;;) { + if (colon < line) + return 0; + if (*--colon == ':') + break; + } + + if (!isdigit(colon[-4]) || + !isdigit(colon[-2]) || + !isdigit(colon[-1]) || + !isdigit(colon[ 1]) || + !isdigit(colon[ 2])) + return 0; + + /* year */ + if (strtol(colon+3, NULL, 10) <= 90) + return 0; + + /* Ok, close enough */ + return 1; +} + +static int parse_email(const void *map, unsigned long size) +{ + unsigned long offset; + + if (size < 6 || memcmp("From ", map, 5)) + goto corrupt; + + /* Make sure we don't trigger on this first line */ + map++; size--; offset=1; + + /* + * Search for a line beginning with "From ", and + * having smething that looks like a date format. + */ + do { + int len = linelen(map, size); + if (is_from_line(map, len)) + return offset; + map += len; + size -= len; + offset += len; + } while (size); + return offset; + +corrupt: + fprintf(stderr, "corrupt mailbox\n"); + exit(1); +} + +int main(int argc, char **argv) +{ + int fd, nr; + struct stat st; + unsigned long size; + void *map; + + if (argc != 3) + usage(); + fd = open(argv[1], O_RDONLY); + if (fd < 0) { + perror(argv[1]); + exit(1); + } + if (chdir(argv[2]) < 0) + usage(); + if (fstat(fd, &st) < 0) { + perror("stat"); + exit(1); + } + size = st.st_size; + map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + if (-1 == (int)(long)map) { + perror("mmap"); + exit(1); + } + close(fd); + nr = 0; + do { + char name[10]; + unsigned long len = parse_email(map, size); + assert(len <= size); + sprintf(name, "%04d", ++nr); + fd = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600); + if (fd < 0) { + perror(name); + exit(1); + } + if (write(fd, map, len) != len) { + perror("write"); + exit(1); + } + close(fd); + map += len; + size -= len; + } while (size > 0); + return 0; +} -- cgit v0.10.2-6-g49f6