From 168c1e012030c65f2c1bcdd56a6ec77e37e5a743 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Kreileder?= Date: Sat, 17 Dec 2011 10:22:21 +0100 Subject: gitweb: Call to_utf8() on input string in chop_and_escape_str() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit a) To fix the comparison with the chopped string, otherwise we compare bytes with characters, as chop_str() must run to_utf8() for correct operation b) To give the title attribute correct encoding; we need to mark strings as UTF-8 before outpur Signed-off-by: Jürgen Kreileder Acked-by: Jakub Narębski Signed-off-by: Junio C Hamano diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl index 4f0c3bd..b077032 100755 --- a/gitweb/gitweb.perl +++ b/gitweb/gitweb.perl @@ -1695,6 +1695,7 @@ sub chop_and_escape_str { my ($str) = @_; my $chopped = chop_str(@_); + $str = to_utf8($str); if ($chopped eq $str) { return esc_html($chopped); } else { -- cgit v0.10.2-6-g49f6 From 5d7910569b2caeb65354d17b26359cb235026b39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Kreileder?= Date: Sat, 17 Dec 2011 10:22:22 +0100 Subject: gitweb: esc_html() site name for title in OPML MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This escapes the site name in OPML (XML uses the same escaping rules as HTML). Also fixes encoding issues because esc_html() uses to_utf8(). Signed-off-by: Jürgen Kreileder Acked-by: Jakub Narębski Signed-off-by: Junio C Hamano diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl index b077032..a9f39d5 100755 --- a/gitweb/gitweb.perl +++ b/gitweb/gitweb.perl @@ -7700,11 +7700,12 @@ sub git_opml { -charset => 'utf-8', -content_disposition => 'inline; filename="opml.xml"'); + my $title = esc_html($site_name); print < - $site_name OPML Export + $title OPML Export -- cgit v0.10.2-6-g49f6 From 57cf4ad6e82af6aaa38bb215ea35ea9c465c6045 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrgen=20Kreileder?= Date: Sat, 17 Dec 2011 10:22:23 +0100 Subject: gitweb: Output valid utf8 in git_blame_common('data') MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise when javascript-actions are enabled gitweb shown broken author names in the tooltips on blame pages ('blame_incremental' view). Signed-off-by: Jürgen Kreileder Acked-by: Jakub Narębski Signed-off-by: Junio C Hamano diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl index a9f39d5..dc2ad9d 100755 --- a/gitweb/gitweb.perl +++ b/gitweb/gitweb.perl @@ -6108,7 +6108,9 @@ sub git_blame_common { -type=>"text/plain", -charset => "utf-8", -status=> "200 OK"); local $| = 1; # output autoflush - print while <$fd>; + while (my $line = <$fd>) { + print to_utf8($line); + } close $fd or print "ERROR $!\n"; -- cgit v0.10.2-6-g49f6 From b13e3eacefc0fb6f4f89738f74ba5ef14437bed5 Mon Sep 17 00:00:00 2001 From: Jakub Narebski Date: Sun, 18 Dec 2011 23:00:58 +0100 Subject: gitweb: Fix fallback mode of to_utf8 subroutine e5d3de5 (gitweb: use Perl built-in utf8 function for UTF-8 decoding., 2007-12-04) was meant to make gitweb faster by using Perl's internals (see subsection "Messing with Perl's Internals" in Encode(3pm) manpage) Simple benchmark confirms that (old = 00f429a, new = this version): old new old -- -65% new 189% -- Unfortunately it made fallback mode of to_utf8 do not work... except for default value 'latin1' of $fallback_encoding ('latin1' is Perl native encoding), which is why it was not noticed for such long time. utf8::valid(STRING) is an internal function that tests whether STRING is in a _consistent state_ regarding UTF-8. It returns true is well-formed UTF-8 and has the UTF-8 flag on _*or*_ if string is held as bytes (both these states are 'consistent'). For gitweb the second option was true, as output from git commands is opened without ':utf8' layer. What made it work at all for STRING in 'latin1' encoding is the fact that utf8:decode(STRING) turns on UTF-8 flag only if source string is valid UTF-8 and contains multi-byte UTF-8 characters... and that if string doesn't have UTF-8 flag set it is treated as in native Perl encoding, i.e. 'latin1' / 'iso-8859-1' (unless native encoding it is EBCDIC ;-)). It was ':utf8' layer that actually converted 'latin1' (no UTF-8 flag == native == 'latin1) to 'utf8'. Let's make use of the fact that utf8:decode(STRING) returns false if STRING is invalid as UTF-8 to check whether to enable fallback mode. Signed-off-by: Jakub Narebski Signed-off-by: Junio C Hamano diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl index dc2ad9d..874023a 100755 --- a/gitweb/gitweb.perl +++ b/gitweb/gitweb.perl @@ -1442,8 +1442,8 @@ sub validate_refname { sub to_utf8 { my $str = shift; return undef unless defined $str; - if (utf8::valid($str)) { - utf8::decode($str); + + if (utf8::is_utf8($str) || utf8::decode($str)) { return $str; } else { return decode($fallback_encoding, $str, Encode::FB_DEFAULT); -- cgit v0.10.2-6-g49f6