mailinfo: assume input is latin-1 on the header as we do for the body

When the input mbox does not identify what encoding it is in, and already have RFC2047 stripped away, we cannot tell what encoding the header text is in. For body text, when the message does not say what charset it is in, we fall back to assume latin-1 input when converting to utf8. This should be done consistently to the header as well. Signed-off-by: Junio C Hamano <junkio@cox.net>
author: Junio C Hamano <junkio@cox.net> 2006-07-05 21:17:49 (GMT)
committer: Junio C Hamano <junkio@cox.net> 2006-07-06 07:10:49 (GMT)
commit: b75bf2c3f08b70a3e4850867290a8c215d2c1abf (patch)
tree: 164eba30f02b30ee8c9dd3712ebe821aa4fadd07
parent: ac83aa2e1fffcb4e7560431ddc15b288dc237634 (diff)
download: git-b75bf2c3f08b70a3e4850867290a8c215d2c1abf.zip
git-b75bf2c3f08b70a3e4850867290a8c215d2c1abf.tar.gz
git-b75bf2c3f08b70a3e4850867290a8c215d2c1abf.tar.bz2
1 files changed, 24 insertions, 8 deletions
diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c
index 3e40747..ac53f76 100644
--- a/builtin-mailinfo.c
+++ b/builtin-mailinfo.c
@@ -348,7 +348,7 @@ static void cleanup_space(char *buf)
 	}
 }
 
-static void decode_header_bq(char *it);
+static void decode_header(char *it);
 typedef int (*header_fn_t)(char *);
 struct header_def {
 	const char *name;
@@ -371,7 +371,7 @@ static void check_header(char *line, struct header_def *header)
 			/* Unwrap inline B and Q encoding, and optionally
 			 * normalize the meta information to utf8.
 			 */
-			decode_header_bq(line + len + 2);
+			decode_header(line + len + 2);
 			header[i].func(line + len + 2);
 			break;
 		}
@@ -566,16 +566,19 @@ static void convert_to_utf8(char *line, char *charset)
 #endif
 }
 
-static void decode_header_bq(char *it)
+static int decode_header_bq(char *it)
 {
 	char *in, *out, *ep, *cp, *sp;
 	char outbuf[1000];
+	int rfc2047 = 0;
 
 	in = it;
 	out = outbuf;
 	while ((ep = strstr(in, "=?")) != NULL) {
 		int sz, encoding;
 		char charset_q[256], piecebuf[256];
+		rfc2047 = 1;
+
 		if (in != ep) {
 			sz = ep - in;
 			memcpy(out, in, sz);
@@ -589,19 +592,19 @@ static void decode_header_bq(char *it)
 		ep += 2;
 		cp = strchr(ep, '?');
 		if (!cp)
-			return; /* no munging */
+			return rfc2047; /* no munging */
 		for (sp = ep; sp < cp; sp++)
 			charset_q[sp - ep] = tolower(*sp);
 		charset_q[cp - ep] = 0;
 		encoding = cp[1];
 		if (!encoding || cp[2] != '?')
-			return; /* no munging */
+			return rfc2047; /* no munging */
 		ep = strstr(cp + 3, "?=");
 		if (!ep)
-			return; /* no munging */
+			return rfc2047; /* no munging */
 		switch (tolower(encoding)) {
 		default:
-			return; /* no munging */
+			return rfc2047; /* no munging */
 		case 'b':
 			sz = decode_b_segment(cp + 3, piecebuf, ep);
 			break;
@@ -610,7 +613,7 @@ static void decode_header_bq(char *it)
 			break;
 		}
 		if (sz < 0)
-			return;
+			return rfc2047;
 		if (metainfo_charset)
 			convert_to_utf8(piecebuf, charset_q);
 		strcpy(out, piecebuf);
@@ -619,6 +622,19 @@ static void decode_header_bq(char *it)
 	}
 	strcpy(out, in);
 	strcpy(it, outbuf);
+	return rfc2047;
+}
+
+static void decode_header(char *it)
+{
+
+	if (decode_header_bq(it))
+		return;
+	/* otherwise "it" is a straight copy of the input.
+	 * This can be binary guck but there is no charset specified.
+	 */
+	if (metainfo_charset)
+		convert_to_utf8(it, "");
 }
 
 static void decode_transfer_encoding(char *line)
author	Junio C Hamano <junkio@cox.net>	2006-07-05 21:17:49 (GMT)
committer	Junio C Hamano <junkio@cox.net>	2006-07-06 07:10:49 (GMT)
commit	b75bf2c3f08b70a3e4850867290a8c215d2c1abf (patch)
tree	164eba30f02b30ee8c9dd3712ebe821aa4fadd07
parent	ac83aa2e1fffcb4e7560431ddc15b288dc237634 (diff)
download	git-b75bf2c3f08b70a3e4850867290a8c215d2c1abf.zip git-b75bf2c3f08b70a3e4850867290a8c215d2c1abf.tar.gz git-b75bf2c3f08b70a3e4850867290a8c215d2c1abf.tar.bz2