summaryrefslogtreecommitdiff
path: root/tools/mailsplit.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/mailsplit.c')
-rw-r--r--tools/mailsplit.c144
1 files changed, 144 insertions, 0 deletions
diff --git a/tools/mailsplit.c b/tools/mailsplit.c
new file mode 100644
index 0000000..9379fbc
--- /dev/null
+++ b/tools/mailsplit.c
@@ -0,0 +1,144 @@
+/*
+ * Totally braindamaged mbox splitter program.
+ *
+ * It just splits a mbox into a list of files: "0001" "0002" ..
+ * so you can process them further from there.
+ */
+#include <unistd.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <assert.h>
+
+static int usage(void)
+{
+ fprintf(stderr, "mailsplit <mbox> <directory>\n");
+ exit(1);
+}
+
+static int linelen(const char *map, unsigned long size)
+{
+ int len = 0, c;
+
+ do {
+ c = *map;
+ map++;
+ size--;
+ len++;
+ } while (size && c != '\n');
+ return len;
+}
+
+static int is_from_line(const char *line, int len)
+{
+ const char *colon;
+
+ if (len < 20 || memcmp("From ", line, 5))
+ return 0;
+
+ colon = line + len - 2;
+ line += 5;
+ for (;;) {
+ if (colon < line)
+ return 0;
+ if (*--colon == ':')
+ break;
+ }
+
+ if (!isdigit(colon[-4]) ||
+ !isdigit(colon[-2]) ||
+ !isdigit(colon[-1]) ||
+ !isdigit(colon[ 1]) ||
+ !isdigit(colon[ 2]))
+ return 0;
+
+ /* year */
+ if (strtol(colon+3, NULL, 10) <= 90)
+ return 0;
+
+ /* Ok, close enough */
+ return 1;
+}
+
+static int parse_email(const void *map, unsigned long size)
+{
+ unsigned long offset;
+
+ if (size < 6 || memcmp("From ", map, 5))
+ goto corrupt;
+
+ /* Make sure we don't trigger on this first line */
+ map++; size--; offset=1;
+
+ /*
+ * Search for a line beginning with "From ", and
+ * having smething that looks like a date format.
+ */
+ do {
+ int len = linelen(map, size);
+ if (is_from_line(map, len))
+ return offset;
+ map += len;
+ size -= len;
+ offset += len;
+ } while (size);
+ return offset;
+
+corrupt:
+ fprintf(stderr, "corrupt mailbox\n");
+ exit(1);
+}
+
+int main(int argc, char **argv)
+{
+ int fd, nr;
+ struct stat st;
+ unsigned long size;
+ void *map;
+
+ if (argc != 3)
+ usage();
+ fd = open(argv[1], O_RDONLY);
+ if (fd < 0) {
+ perror(argv[1]);
+ exit(1);
+ }
+ if (chdir(argv[2]) < 0)
+ usage();
+ if (fstat(fd, &st) < 0) {
+ perror("stat");
+ exit(1);
+ }
+ size = st.st_size;
+ map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (-1 == (int)(long)map) {
+ perror("mmap");
+ exit(1);
+ }
+ close(fd);
+ nr = 0;
+ do {
+ char name[10];
+ unsigned long len = parse_email(map, size);
+ assert(len <= size);
+ sprintf(name, "%04d", ++nr);
+ fd = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600);
+ if (fd < 0) {
+ perror(name);
+ exit(1);
+ }
+ if (write(fd, map, len) != len) {
+ perror("write");
+ exit(1);
+ }
+ close(fd);
+ map += len;
+ size -= len;
+ } while (size > 0);
+ return 0;
+}