summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorElijah Newren <newren@gmail.com>2019-05-14 04:30:59 (GMT)
committerJunio C Hamano <gitster@pobox.com>2019-05-14 07:48:56 (GMT)
commit3edfcc65fdfc708c1c8f1d314885eecf9beb9b67 (patch)
tree23d4c0a0758dcb4f34fe82781a7749e68aa310b2
parent32615ce7626f8f07990bd1c01f49be6dca7d4a2d (diff)
downloadgit-3edfcc65fdfc708c1c8f1d314885eecf9beb9b67.zip
git-3edfcc65fdfc708c1c8f1d314885eecf9beb9b67.tar.gz
git-3edfcc65fdfc708c1c8f1d314885eecf9beb9b67.tar.bz2
fast-import: support 'encoding' commit header
Since git supports commit messages with an encoding other than UTF-8, allow fast-import to import such commits. This may be useful for folks who do not want to reencode commit messages from an external system, and may also be useful to achieve reversible history rewrites (e.g. sha1sum <-> sha256sum transitions or subtree work) with git repositories that have used specialized encodings in their commit history. Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r--Documentation/git-fast-import.txt7
-rw-r--r--fast-import.c11
-rwxr-xr-xt/t9300-fast-import.sh20
3 files changed, 36 insertions, 2 deletions
diff --git a/Documentation/git-fast-import.txt b/Documentation/git-fast-import.txt
index d65cdb3..7baf9e4 100644
--- a/Documentation/git-fast-import.txt
+++ b/Documentation/git-fast-import.txt
@@ -388,6 +388,7 @@ change to the project.
original-oid?
('author' (SP <name>)? SP LT <email> GT SP <when> LF)?
'committer' (SP <name>)? SP LT <email> GT SP <when> LF
+ ('encoding' SP <encoding>)?
data
('from' SP <commit-ish> LF)?
('merge' SP <commit-ish> LF)?
@@ -455,6 +456,12 @@ that was selected by the --date-format=<fmt> command-line option.
See ``Date Formats'' above for the set of supported formats, and
their syntax.
+`encoding`
+^^^^^^^^^^
+The optional `encoding` command indicates the encoding of the commit
+message. Most commits are UTF-8 and the encoding is omitted, but this
+allows importing commit messages into git without first reencoding them.
+
`from`
^^^^^^
The `from` command is used to specify the commit to initialize
diff --git a/fast-import.c b/fast-import.c
index f38d04f..76a7bd3 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -2585,6 +2585,7 @@ static void parse_new_commit(const char *arg)
struct branch *b;
char *author = NULL;
char *committer = NULL;
+ const char *encoding = NULL;
struct hash_list *merge_list = NULL;
unsigned int merge_count;
unsigned char prev_fanout, new_fanout;
@@ -2607,6 +2608,8 @@ static void parse_new_commit(const char *arg)
}
if (!committer)
die("Expected committer but didn't get one");
+ if (skip_prefix(command_buf.buf, "encoding ", &encoding))
+ read_next_command();
parse_data(&msg, 0, NULL);
read_next_command();
parse_from(b);
@@ -2670,9 +2673,13 @@ static void parse_new_commit(const char *arg)
}
strbuf_addf(&new_data,
"author %s\n"
- "committer %s\n"
- "\n",
+ "committer %s\n",
author ? author : committer, committer);
+ if (encoding)
+ strbuf_addf(&new_data,
+ "encoding %s\n",
+ encoding);
+ strbuf_addch(&new_data, '\n');
strbuf_addbuf(&new_data, &msg);
free(author);
free(committer);
diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh
index 3668263..141b7fa 100755
--- a/t/t9300-fast-import.sh
+++ b/t/t9300-fast-import.sh
@@ -3299,4 +3299,24 @@ test_expect_success !MINGW 'W: get-mark & empty orphan commit with erroneous thi
sed -e s/LFs/LLL/ W-input | tr L "\n" | test_must_fail git fast-import
'
+###
+### series X (other new features)
+###
+
+test_expect_success 'X: handling encoding' '
+ test_tick &&
+ cat >input <<-INPUT_END &&
+ commit refs/heads/encoding
+ committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+ encoding iso-8859-7
+ data <<COMMIT
+ INPUT_END
+
+ printf "Pi: \360\nCOMMIT\n" >>input &&
+
+ git fast-import <input &&
+ git cat-file -p encoding | grep $(printf "\360") &&
+ git log -1 --format=%B encoding | grep $(printf "\317\200")
+'
+
test_done