summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunio C Hamano <junkio@cox.net>2007-01-14 20:04:25 (GMT)
committerJunio C Hamano <junkio@cox.net>2007-01-14 20:04:25 (GMT)
commitf4b6c6b90fdce12d69e4ad80ff6082405ec8cfb8 (patch)
tree53977ce9f7f60973239db3de43060cf947aa5a43
parent6de33478affbf252066cc5863add213ca5cdc13c (diff)
parentc34c6008bcf2c66e17a97acc89be1144a6216f3f (diff)
downloadgit-f4b6c6b90fdce12d69e4ad80ff6082405ec8cfb8.zip
git-f4b6c6b90fdce12d69e4ad80ff6082405ec8cfb8.tar.gz
git-f4b6c6b90fdce12d69e4ad80ff6082405ec8cfb8.tar.bz2
Merge branch 'jc/int'
* jc/int: More tests in t3901. Consistent message encoding while reusing log from an existing commit. t3901: test "format-patch | am" pipe with i18n Use log output encoding in --pretty=email headers.
-rw-r--r--commit.c82
-rwxr-xr-xgit-commit.sh7
-rwxr-xr-xgit-revert.sh14
-rwxr-xr-xt/t3901-8859-1.txt4
-rwxr-xr-xt/t3901-i18n-patch.sh255
-rwxr-xr-xt/t3901-utf8.txt4
6 files changed, 333 insertions, 33 deletions
diff --git a/commit.c b/commit.c
index 496d37a..9b2b842 100644
--- a/commit.c
+++ b/commit.c
@@ -464,20 +464,29 @@ static int get_one_line(const char *msg, unsigned long len)
return ret;
}
+/* High bit set, or ISO-2022-INT */
+static int non_ascii(int ch)
+{
+ ch = (ch & 0xff);
+ return ((ch & 0x80) || (ch == 0x1b));
+}
+
static int is_rfc2047_special(char ch)
{
- return ((ch & 0x80) || (ch == '=') || (ch == '?') || (ch == '_'));
+ return (non_ascii(ch) || (ch == '=') || (ch == '?') || (ch == '_'));
}
-static int add_rfc2047(char *buf, const char *line, int len)
+static int add_rfc2047(char *buf, const char *line, int len,
+ const char *encoding)
{
char *bp = buf;
int i, needquote;
- static const char q_utf8[] = "=?utf-8?q?";
+ char q_encoding[128];
+ const char *q_encoding_fmt = "=?%s?q?";
for (i = needquote = 0; !needquote && i < len; i++) {
- unsigned ch = line[i];
- if (ch & 0x80)
+ int ch = line[i];
+ if (non_ascii(ch))
needquote++;
if ((i + 1 < len) &&
(ch == '=' && line[i+1] == '?'))
@@ -486,8 +495,11 @@ static int add_rfc2047(char *buf, const char *line, int len)
if (!needquote)
return sprintf(buf, "%.*s", len, line);
- memcpy(bp, q_utf8, sizeof(q_utf8)-1);
- bp += sizeof(q_utf8)-1;
+ i = snprintf(q_encoding, sizeof(q_encoding), q_encoding_fmt, encoding);
+ if (sizeof(q_encoding) < i)
+ die("Insanely long encoding name %s", encoding);
+ memcpy(bp, q_encoding, i);
+ bp += i;
for (i = 0; i < len; i++) {
unsigned ch = line[i] & 0xFF;
if (is_rfc2047_special(ch)) {
@@ -505,7 +517,8 @@ static int add_rfc2047(char *buf, const char *line, int len)
}
static int add_user_info(const char *what, enum cmit_fmt fmt, char *buf,
- const char *line, int relative_date)
+ const char *line, int relative_date,
+ const char *encoding)
{
char *date;
int namelen;
@@ -533,7 +546,8 @@ static int add_user_info(const char *what, enum cmit_fmt fmt, char *buf,
filler = "";
strcpy(buf, "From: ");
ret = strlen(buf);
- ret += add_rfc2047(buf + ret, line, display_name_length);
+ ret += add_rfc2047(buf + ret, line, display_name_length,
+ encoding);
memcpy(buf + ret, name_tail, namelen - display_name_length);
ret += namelen - display_name_length;
buf[ret++] = '\n';
@@ -668,21 +682,18 @@ static char *replace_encoding_header(char *buf, char *encoding)
return buf;
}
-static char *logmsg_reencode(const struct commit *commit)
+static char *logmsg_reencode(const struct commit *commit,
+ char *output_encoding)
{
char *encoding;
char *out;
- char *output_encoding = (git_log_output_encoding
- ? git_log_output_encoding
- : git_commit_encoding);
+ char *utf8 = "utf-8";
- if (!output_encoding)
- output_encoding = "utf-8";
- else if (!*output_encoding)
+ if (!*output_encoding)
return NULL;
encoding = get_header(commit, "encoding");
if (!encoding)
- return NULL;
+ encoding = utf8;
if (!strcmp(encoding, output_encoding))
out = strdup(commit->buffer);
else
@@ -691,7 +702,8 @@ static char *logmsg_reencode(const struct commit *commit)
if (out)
out = replace_encoding_header(out, output_encoding);
- free(encoding);
+ if (encoding != utf8)
+ free(encoding);
if (!out)
return NULL;
return out;
@@ -711,8 +723,15 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt,
int parents_shown = 0;
const char *msg = commit->buffer;
int plain_non_ascii = 0;
- char *reencoded = logmsg_reencode(commit);
+ char *reencoded;
+ char *encoding;
+ encoding = (git_log_output_encoding
+ ? git_log_output_encoding
+ : git_commit_encoding);
+ if (!encoding)
+ encoding = "utf-8";
+ reencoded = logmsg_reencode(commit, encoding);
if (reencoded)
msg = reencoded;
@@ -738,7 +757,7 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt,
i + 1 < len && msg[i+1] == '\n')
in_body = 1;
}
- else if (ch & 0x80) {
+ else if (non_ascii(ch)) {
plain_non_ascii = 1;
break;
}
@@ -797,13 +816,15 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt,
offset += add_user_info("Author", fmt,
buf + offset,
line + 7,
- relative_date);
+ relative_date,
+ encoding);
if (!memcmp(line, "committer ", 10) &&
(fmt == CMIT_FMT_FULL || fmt == CMIT_FMT_FULLER))
offset += add_user_info("Commit", fmt,
buf + offset,
line + 10,
- relative_date);
+ relative_date,
+ encoding);
continue;
}
@@ -826,7 +847,8 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt,
int slen = strlen(subject);
memcpy(buf + offset, subject, slen);
offset += slen;
- offset += add_rfc2047(buf + offset, line, linelen);
+ offset += add_rfc2047(buf + offset, line, linelen,
+ encoding);
}
else {
memset(buf + offset, ' ', indent);
@@ -837,11 +859,17 @@ unsigned long pretty_print_commit(enum cmit_fmt fmt,
if (fmt == CMIT_FMT_ONELINE)
break;
if (subject && plain_non_ascii) {
- static const char header[] =
- "Content-Type: text/plain; charset=UTF-8\n"
+ int sz;
+ char header[512];
+ const char *header_fmt =
+ "Content-Type: text/plain; charset=%s\n"
"Content-Transfer-Encoding: 8bit\n";
- memcpy(buf + offset, header, sizeof(header)-1);
- offset += sizeof(header)-1;
+ sz = snprintf(header, sizeof(header), header_fmt,
+ encoding);
+ if (sizeof(header) < sz)
+ die("Encoding name %s too long", encoding);
+ memcpy(buf + offset, header, sz);
+ offset += sz;
}
if (after_subject) {
int slen = strlen(after_subject);
diff --git a/git-commit.sh b/git-commit.sh
index 9fdf234..e23918c 100755
--- a/git-commit.sh
+++ b/git-commit.sh
@@ -429,7 +429,9 @@ then
fi
elif test "$use_commit" != ""
then
- git-cat-file commit "$use_commit" | sed -e '1,/^$/d'
+ encoding=$(git repo-config i18n.commitencoding || echo UTF-8)
+ git show -s --pretty=raw --encoding="$encoding" "$use_commit" |
+ sed -e '1,/^$/d' -e 's/^ //'
elif test -f "$GIT_DIR/MERGE_MSG"
then
cat "$GIT_DIR/MERGE_MSG"
@@ -491,7 +493,8 @@ then
q
}
'
- set_author_env=`git-cat-file commit "$use_commit" |
+ encoding=$(git repo-config i18n.commitencoding || echo UTF-8)
+ set_author_env=`git show -s --pretty=raw --encoding="$encoding" "$use_commit" |
LANG=C LC_ALL=C sed -ne "$pick_author_script"`
eval "$set_author_env"
export GIT_AUTHOR_NAME
diff --git a/git-revert.sh b/git-revert.sh
index 224e654..71cbcbc 100755
--- a/git-revert.sh
+++ b/git-revert.sh
@@ -81,6 +81,8 @@ prev=$(git-rev-parse --verify "$commit^1" 2>/dev/null) ||
git-rev-parse --verify "$commit^2" >/dev/null 2>&1 &&
die "Cannot run $me a multi-parent commit."
+encoding=$(git repo-config i18n.commitencoding || echo UTF-8)
+
# "commit" is an existing commit. We would want to apply
# the difference it introduces since its first parent "prev"
# on top of the current HEAD if we are cherry-pick. Or the
@@ -88,10 +90,11 @@ git-rev-parse --verify "$commit^2" >/dev/null 2>&1 &&
case "$me" in
revert)
- git-rev-list --pretty=oneline --max-count=1 $commit |
+ git show -s --pretty=oneline --encoding="$encoding" $commit |
sed -e '
s/^[^ ]* /Revert "/
- s/$/"/'
+ s/$/"/
+ '
echo
echo "This reverts commit $commit."
test "$rev" = "$commit" ||
@@ -120,14 +123,17 @@ cherry-pick)
q
}'
- set_author_env=`git-cat-file commit "$commit" |
+
+ logmsg=`git show -s --pretty=raw --encoding="$encoding" "$commit"`
+ set_author_env=`echo "$logmsg" |
LANG=C LC_ALL=C sed -ne "$pick_author_script"`
eval "$set_author_env"
export GIT_AUTHOR_NAME
export GIT_AUTHOR_EMAIL
export GIT_AUTHOR_DATE
- git-cat-file commit $commit | sed -e '1,/^$/d'
+ echo "$logmsg" |
+ sed -e '1,/^$/d' -e 's/^ //'
case "$replay" in
'')
echo "(cherry picked from commit $commit)"
diff --git a/t/t3901-8859-1.txt b/t/t3901-8859-1.txt
new file mode 100755
index 0000000..38c21a6
--- /dev/null
+++ b/t/t3901-8859-1.txt
@@ -0,0 +1,4 @@
+: to be sourced in t3901 -- this is latin-1
+GIT_AUTHOR_NAME=" " &&
+GIT_COMMITTER_NAME=$GIT_AUTHOR_NAME &&
+export GIT_AUTHOR_NAME GIT_COMMITTER_NAME
diff --git a/t/t3901-i18n-patch.sh b/t/t3901-i18n-patch.sh
new file mode 100755
index 0000000..eda0e2d
--- /dev/null
+++ b/t/t3901-i18n-patch.sh
@@ -0,0 +1,255 @@
+#!/bin/sh
+#
+# Copyright (c) 2006 Junio C Hamano
+#
+
+test_description='i18n settings and format-patch | am pipe'
+
+. ./test-lib.sh
+
+check_encoding () {
+ # Make sure characters are not corrupted
+ cnt="$1" header="$2" i=1 j=0 bad=0
+ while test "$i" -le $cnt
+ do
+ git format-patch --encoding=UTF-8 --stdout HEAD~$i..HEAD~$j |
+ grep "^From: =?UTF-8?q?=C3=81=C3=A9=C3=AD_=C3=B3=C3=BA?=" &&
+ git-cat-file commit HEAD~$j |
+ case "$header" in
+ 8859)
+ grep "^encoding ISO-8859-1" ;;
+ *)
+ ! grep "^encoding ISO-8859-1" ;;
+ esac || {
+ bad=1
+ break
+ }
+ j=$i
+ i=$(($i+1))
+ done
+ (exit $bad)
+}
+
+test_expect_success setup '
+ git-repo-config i18n.commitencoding UTF-8 &&
+
+ # use UTF-8 in author and committer name to match the
+ # i18n.commitencoding settings
+ . ../t3901-utf8.txt &&
+
+ test_tick &&
+ echo "$GIT_AUTHOR_NAME" >mine &&
+ git add mine &&
+ git commit -s -m "Initial commit" &&
+
+ test_tick &&
+ echo Hello world >mine &&
+ git add mine &&
+ git commit -s -m "Second on main" &&
+
+ # the first commit on the side branch is UTF-8
+ test_tick &&
+ git checkout -b side master^ &&
+ echo Another file >yours &&
+ git add yours &&
+ git commit -s -m "Second on side" &&
+
+ # the second one on the side branch is ISO-8859-1
+ git-repo-config i18n.commitencoding ISO-8859-1 &&
+ # use author and committer name in ISO-8859-1 to match it.
+ . ../t3901-8859-1.txt &&
+ test_tick &&
+ echo Yet another >theirs &&
+ git add theirs &&
+ git commit -s -m "Third on side" &&
+
+ # Back to default
+ git-repo-config i18n.commitencoding UTF-8
+'
+
+test_expect_success 'format-patch output (ISO-8859-1)' '
+ git-repo-config i18n.logoutputencoding ISO-8859-1 &&
+
+ git format-patch --stdout master..HEAD^ >out-l1 &&
+ git format-patch --stdout HEAD^ >out-l2 &&
+ grep "^Content-Type: text/plain; charset=ISO-8859-1" out-l1 &&
+ grep "^From: =?ISO-8859-1?q?=C1=E9=ED_=F3=FA?=" out-l1 &&
+ grep "^Content-Type: text/plain; charset=ISO-8859-1" out-l2 &&
+ grep "^From: =?ISO-8859-1?q?=C1=E9=ED_=F3=FA?=" out-l2
+'
+
+test_expect_success 'format-patch output (UTF-8)' '
+ git repo-config i18n.logoutputencoding UTF-8 &&
+
+ git format-patch --stdout master..HEAD^ >out-u1 &&
+ git format-patch --stdout HEAD^ >out-u2 &&
+ grep "^Content-Type: text/plain; charset=UTF-8" out-u1 &&
+ grep "^From: =?UTF-8?q?=C3=81=C3=A9=C3=AD_=C3=B3=C3=BA?=" out-u1 &&
+ grep "^Content-Type: text/plain; charset=UTF-8" out-u2 &&
+ grep "^From: =?UTF-8?q?=C3=81=C3=A9=C3=AD_=C3=B3=C3=BA?=" out-u2
+'
+
+test_expect_success 'rebase (U/U)' '
+ # We want the result of rebase in UTF-8
+ git-repo-config i18n.commitencoding UTF-8 &&
+
+ # The test is about logoutputencoding not affecting the
+ # final outcome -- it is used internally to generate the
+ # patch and the log.
+
+ git repo-config i18n.logoutputencoding UTF-8 &&
+
+ # The result will be committed by GIT_COMMITTER_NAME --
+ # we want UTF-8 encoded name.
+ . ../t3901-utf8.txt &&
+ git checkout -b test &&
+ git-rebase master &&
+
+ check_encoding 2
+'
+
+test_expect_success 'rebase (U/L)' '
+ git-repo-config i18n.commitencoding UTF-8 &&
+ git repo-config i18n.logoutputencoding ISO-8859-1 &&
+ . ../t3901-utf8.txt &&
+
+ git reset --hard side &&
+ git-rebase master &&
+
+ check_encoding 2
+'
+
+test_expect_success 'rebase (L/L)' '
+ # In this test we want ISO-8859-1 encoded commits as the result
+ git-repo-config i18n.commitencoding ISO-8859-1 &&
+ git repo-config i18n.logoutputencoding ISO-8859-1 &&
+ . ../t3901-8859-1.txt &&
+
+ git reset --hard side &&
+ git-rebase master &&
+
+ check_encoding 2 8859
+'
+
+test_expect_success 'rebase (L/U)' '
+ # This is pathological -- use UTF-8 as intermediate form
+ # to get ISO-8859-1 results.
+ git-repo-config i18n.commitencoding ISO-8859-1 &&
+ git repo-config i18n.logoutputencoding UTF-8 &&
+ . ../t3901-8859-1.txt &&
+
+ git reset --hard side &&
+ git-rebase master &&
+
+ check_encoding 2 8859
+'
+
+test_expect_success 'cherry-pick(U/U)' '
+ # Both the commitencoding and logoutputencoding is set to UTF-8.
+
+ git-repo-config i18n.commitencoding UTF-8 &&
+ git repo-config i18n.logoutputencoding UTF-8 &&
+ . ../t3901-utf8.txt &&
+
+ git reset --hard master &&
+ git cherry-pick side^ &&
+ git cherry-pick side &&
+ EDITOR=: VISUAL=: git revert HEAD &&
+
+ check_encoding 3
+'
+
+test_expect_success 'cherry-pick(L/L)' '
+ # Both the commitencoding and logoutputencoding is set to ISO-8859-1
+
+ git-repo-config i18n.commitencoding ISO-8859-1 &&
+ git repo-config i18n.logoutputencoding ISO-8859-1 &&
+ . ../t3901-8859-1.txt &&
+
+ git reset --hard master &&
+ git cherry-pick side^ &&
+ git cherry-pick side &&
+ EDITOR=: VISUAL=: git revert HEAD &&
+
+ check_encoding 3 8859
+'
+
+test_expect_success 'cherry-pick(U/L)' '
+ # Commitencoding is set to UTF-8 but logoutputencoding is ISO-8859-1
+
+ git-repo-config i18n.commitencoding UTF-8 &&
+ git repo-config i18n.logoutputencoding ISO-8859-1 &&
+ . ../t3901-utf8.txt &&
+
+ git reset --hard master &&
+ git cherry-pick side^ &&
+ git cherry-pick side &&
+ EDITOR=: VISUAL=: git revert HEAD &&
+
+ check_encoding 3
+'
+
+test_expect_success 'cherry-pick(L/U)' '
+ # Again, the commitencoding is set to ISO-8859-1 but
+ # logoutputencoding is set to UTF-8.
+
+ git-repo-config i18n.commitencoding ISO-8859-1 &&
+ git repo-config i18n.logoutputencoding UTF-8 &&
+ . ../t3901-8859-1.txt &&
+
+ git reset --hard master &&
+ git cherry-pick side^ &&
+ git cherry-pick side &&
+ EDITOR=: VISUAL=: git revert HEAD &&
+
+ check_encoding 3 8859
+'
+
+test_expect_success 'rebase --merge (U/U)' '
+ git-repo-config i18n.commitencoding UTF-8 &&
+ git repo-config i18n.logoutputencoding UTF-8 &&
+ . ../t3901-utf8.txt &&
+
+ git reset --hard side &&
+ git-rebase --merge master &&
+
+ check_encoding 2
+'
+
+test_expect_success 'rebase --merge (U/L)' '
+ git-repo-config i18n.commitencoding UTF-8 &&
+ git repo-config i18n.logoutputencoding ISO-8859-1 &&
+ . ../t3901-utf8.txt &&
+
+ git reset --hard side &&
+ git-rebase --merge master &&
+
+ check_encoding 2
+'
+
+test_expect_success 'rebase --merge (L/L)' '
+ # In this test we want ISO-8859-1 encoded commits as the result
+ git-repo-config i18n.commitencoding ISO-8859-1 &&
+ git repo-config i18n.logoutputencoding ISO-8859-1 &&
+ . ../t3901-8859-1.txt &&
+
+ git reset --hard side &&
+ git-rebase --merge master &&
+
+ check_encoding 2 8859
+'
+
+test_expect_success 'rebase --merge (L/U)' '
+ # This is pathological -- use UTF-8 as intermediate form
+ # to get ISO-8859-1 results.
+ git-repo-config i18n.commitencoding ISO-8859-1 &&
+ git repo-config i18n.logoutputencoding UTF-8 &&
+ . ../t3901-8859-1.txt &&
+
+ git reset --hard side &&
+ git-rebase --merge master &&
+
+ check_encoding 2 8859
+'
+
+test_done
diff --git a/t/t3901-utf8.txt b/t/t3901-utf8.txt
new file mode 100755
index 0000000..5f5205c
--- /dev/null
+++ b/t/t3901-utf8.txt
@@ -0,0 +1,4 @@
+: to be sourced in t3901 -- this is utf8
+GIT_AUTHOR_NAME="Áéí óú" &&
+GIT_COMMITTER_NAME=$GIT_AUTHOR_NAME &&
+export GIT_AUTHOR_NAME GIT_COMMITTER_NAME