summaryrefslogtreecommitdiff
path: root/t/t9802-git-p4-filetype.sh
diff options
context:
space:
mode:
authorTao Klerks <tao@klerks.biz>2022-04-04 05:50:36 (GMT)
committerJunio C Hamano <gitster@pobox.com>2022-04-06 19:59:58 (GMT)
commitfbe5f6b80437adbcd58af1b3751b830910a2ddaa (patch)
tree21ee632bc2823430943d7f5ff4af33591a06637f /t/t9802-git-p4-filetype.sh
parentfaa21c10d44184f616d391c158dcbb13b9c72ef3 (diff)
downloadgit-fbe5f6b80437adbcd58af1b3751b830910a2ddaa.zip
git-fbe5f6b80437adbcd58af1b3751b830910a2ddaa.tar.gz
git-fbe5f6b80437adbcd58af1b3751b830910a2ddaa.tar.bz2
git-p4: preserve utf8 BOM when importing from p4 to git
Perforce has a file type "utf8" which represents a text file with explicit BOM. utf8-encoded files *without* BOM are stored as regular file type "text". The "utf8" file type behaves like text in all but one important way: it is stored, internally, without the leading 3 BOM bytes. git-p4 has historically imported utf8-with-BOM files (files stored, in Perforce, as type "utf8") the same way as regular text files - losing the BOM in the process. Under most circumstances this issue has little functional impact, as most systems consider the BOM to be optional and redundant, but this *is* a correctness failure, and can have lead to practical issues for example when BOMs are explicitly included in test files, for example in a file encoding test suite. Fix the handling of utf8-with-BOM files when importing changes from p4 to git, and introduce a test that checks it is working correctly. Signed-off-by: Tao Klerks <tao@klerks.biz> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 't/t9802-git-p4-filetype.sh')
-rwxr-xr-xt/t9802-git-p4-filetype.sh34
1 files changed, 34 insertions, 0 deletions
diff --git a/t/t9802-git-p4-filetype.sh b/t/t9802-git-p4-filetype.sh
index 19073c6..2a6ee2a 100755
--- a/t/t9802-git-p4-filetype.sh
+++ b/t/t9802-git-p4-filetype.sh
@@ -333,4 +333,38 @@ test_expect_success SYMLINKS 'empty symlink target' '
)
'
+test_expect_success SYMLINKS 'utf-8 with and without BOM in text file' '
+ (
+ cd "$cli" &&
+
+ # some utf8 content
+ echo some tǣxt >utf8-nobom-test &&
+
+ # same utf8 content as before but with bom
+ echo some tǣxt | sed '\''s/^/\xef\xbb\xbf/'\'' >utf8-bom-test &&
+
+ # bom only
+ dd bs=1 count=3 if=utf8-bom-test of=utf8-bom-empty-test &&
+
+ p4 add utf8-nobom-test utf8-bom-test utf8-bom-empty-test &&
+ p4 submit -d "add utf8 test files"
+ ) &&
+ test_when_finished cleanup_git &&
+
+ git p4 clone --dest="$git" //depot@all &&
+ (
+ cd "$git" &&
+ git checkout refs/remotes/p4/master &&
+
+ echo some tǣxt >utf8-nobom-check &&
+ test_cmp utf8-nobom-check utf8-nobom-test &&
+
+ echo some tǣxt | sed '\''s/^/\xef\xbb\xbf/'\'' >utf8-bom-check &&
+ test_cmp utf8-bom-check utf8-bom-test &&
+
+ dd bs=1 count=3 if=utf8-bom-check of=utf8-bom-empty-check &&
+ test_cmp utf8-bom-empty-check utf8-bom-empty-test
+ )
+'
+
test_done