summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <gitster@pobox.com>2016-09-27 01:09:48 (GMT)
committerJunio C Hamano <gitster@pobox.com>2016-09-27 01:16:23 (GMT)
commit3270741ea8c2a225183d272bf19ea19d5b3c05d8 (patch)
tree5307896f3fe6a1c53b82b080fcfe3e569c3daf78
parent0b65a8dbdb38962e700ee16776a3042beb489060 (diff)
downloadgit-3270741ea8c2a225183d272bf19ea19d5b3c05d8.zip
git-3270741ea8c2a225183d272bf19ea19d5b3c05d8.tar.gz
git-3270741ea8c2a225183d272bf19ea19d5b3c05d8.tar.bz2
utf8: refactor code to decide fallback encoding
The codepath we use to call iconv_open() has a provision to use a fallback encoding when it fails, hoping that "UTF-8" being spelled differently could be the reason why the library function did not like the encoding names we gave it. Essentially, we turn what we have observed to be used as variants of "UTF-8" (e.g. "utf8") into the most official spelling and use that as a fallback. We do the same thing for input and output encoding. Introduce a helper function to do just one side and call that twice. Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r--utf8.c29
1 files changed, 18 insertions, 11 deletions
diff --git a/utf8.c b/utf8.c
index 00e10c8..550e785 100644
--- a/utf8.c
+++ b/utf8.c
@@ -489,6 +489,21 @@ char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv, int *outs
return out;
}
+static const char *fallback_encoding(const char *name)
+{
+ /*
+ * Some platforms do not have the variously spelled variants of
+ * UTF-8, so let's fall back to trying the most official
+ * spelling. We do so only as a fallback in case the platform
+ * does understand the user's spelling, but not our official
+ * one.
+ */
+ if (is_encoding_utf8(name))
+ return "UTF-8";
+
+ return name;
+}
+
char *reencode_string_len(const char *in, int insz,
const char *out_encoding, const char *in_encoding,
int *outsz)
@@ -501,17 +516,9 @@ char *reencode_string_len(const char *in, int insz,
conv = iconv_open(out_encoding, in_encoding);
if (conv == (iconv_t) -1) {
- /*
- * Some platforms do not have the variously spelled variants of
- * UTF-8, so let's fall back to trying the most official
- * spelling. We do so only as a fallback in case the platform
- * does understand the user's spelling, but not our official
- * one.
- */
- if (is_encoding_utf8(in_encoding))
- in_encoding = "UTF-8";
- if (is_encoding_utf8(out_encoding))
- out_encoding = "UTF-8";
+ in_encoding = fallback_encoding(in_encoding);
+ out_encoding = fallback_encoding(out_encoding);
+
conv = iconv_open(out_encoding, in_encoding);
if (conv == (iconv_t) -1)
return NULL;