summaryrefslogtreecommitdiff
path: root/t/t7812-grep-icase-non-ascii.sh
diff options
context:
space:
mode:
authorHamza Mahfooz <someguy@effective-light.com>2021-10-15 16:13:56 (GMT)
committerJunio C Hamano <gitster@pobox.com>2021-10-15 19:45:39 (GMT)
commitae39ba431ab861548eb60b4bd2e1d8b8813db76f (patch)
treea2c09de1515827b3a657064b689a5280f8b51c8b /t/t7812-grep-icase-non-ascii.sh
parent6a5c337922a5221d1f6d025d84e18b526df9944c (diff)
downloadgit-ae39ba431ab861548eb60b4bd2e1d8b8813db76f.zip
git-ae39ba431ab861548eb60b4bd2e1d8b8813db76f.tar.gz
git-ae39ba431ab861548eb60b4bd2e1d8b8813db76f.tar.bz2
grep/pcre2: fix an edge case concerning ascii patterns and UTF-8 data
If we attempt to grep non-ascii log message text with an ascii pattern, we run into the following issue: $ git log --color --author='.var.*Bjar' -1 origin/master | grep ^Author grep: (standard input): binary file matches So, to fix this teach the grep code to use PCRE2_UTF, as long as the log output is encoded in UTF-8. Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com> Signed-off-by: Hamza Mahfooz <someguy@effective-light.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 't/t7812-grep-icase-non-ascii.sh')
-rwxr-xr-xt/t7812-grep-icase-non-ascii.sh48
1 files changed, 48 insertions, 0 deletions
diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh
index e5d1e4e..22487d9 100755
--- a/t/t7812-grep-icase-non-ascii.sh
+++ b/t/t7812-grep-icase-non-ascii.sh
@@ -53,6 +53,54 @@ test_expect_success REGEX_LOCALE 'pickaxe -i on non-ascii' '
test_cmp expected actual
'
+test_expect_success GETTEXT_LOCALE,PCRE 'log --author with an ascii pattern on UTF-8 data' '
+ cat >expected <<-\EOF &&
+ Author: <BOLD;RED>À Ú Thor<RESET> <author@example.com>
+ EOF
+ test_write_lines "forth" >file4 &&
+ git add file4 &&
+ git commit --author="À Ú Thor <author@example.com>" -m sécond &&
+ git log -1 --color=always --perl-regexp --author=".*Thor" >log &&
+ grep Author log >actual.raw &&
+ test_decode_color <actual.raw >actual &&
+ test_cmp expected actual
+'
+
+test_expect_success GETTEXT_LOCALE,PCRE 'log --committer with an ascii pattern on ISO-8859-1 data' '
+ cat >expected <<-\EOF &&
+ Commit: Ç<BOLD;RED> O Mîtter <committer@example.com><RESET>
+ EOF
+ test_write_lines "fifth" >file5 &&
+ git add file5 &&
+ GIT_COMMITTER_NAME="Ç O Mîtter" &&
+ GIT_COMMITTER_EMAIL="committer@example.com" &&
+ git -c i18n.commitEncoding=latin1 commit -m thïrd &&
+ git -c i18n.logOutputEncoding=latin1 log -1 --pretty=fuller --color=always --perl-regexp --committer=" O.*" >log &&
+ grep Commit: log >actual.raw &&
+ test_decode_color <actual.raw >actual &&
+ test_cmp expected actual
+'
+
+test_expect_success GETTEXT_LOCALE,PCRE 'log --grep with an ascii pattern on UTF-8 data' '
+ cat >expected <<-\EOF &&
+ sé<BOLD;RED>con<RESET>d
+ EOF
+ git log -1 --color=always --perl-regexp --grep="con" >log &&
+ grep con log >actual.raw &&
+ test_decode_color <actual.raw >actual &&
+ test_cmp expected actual
+'
+
+test_expect_success GETTEXT_LOCALE,PCRE 'log --grep with an ascii pattern on ISO-8859-1 data' '
+ cat >expected <<-\EOF &&
+ <BOLD;RED>thïrd<RESET>
+ EOF
+ git -c i18n.logOutputEncoding=latin1 log -1 --color=always --perl-regexp --grep="th.*rd" >log &&
+ grep "th.*rd" log >actual.raw &&
+ test_decode_color <actual.raw >actual &&
+ test_cmp expected actual
+'
+
test_expect_success GETTEXT_LOCALE,LIBPCRE2 'PCRE v2: setup invalid UTF-8 data' '
printf "\\200\\n" >invalid-0x80 &&
echo "ævar" >expected &&