gitweb: Handle non UTF-8 text better
gitweb assumes that everything is in UTF-8. If a text contains invalid UTF-8 character sequences, the text must be in a different encoding. This commit introduces $fallback_encoding which would be used as input encoding if gitweb encounters text with is not valid UTF-8. Add basic test for this in t/t9500-gitweb-standalone-no-errors.sh Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at> Signed-off-by: Jakub Narebski <jnareb@gmail.com> Tested-by: Alexandre Julliard <julliard@winehq.org> Tested-by: Ismail Dönmez <ismail@pardus.org.tr> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
2169368fc1
commit
00f429af7b
@ -94,6 +94,13 @@ our $default_text_plain_charset = undef;
|
|||||||
# (relative to the current git repository)
|
# (relative to the current git repository)
|
||||||
our $mimetypes_file = undef;
|
our $mimetypes_file = undef;
|
||||||
|
|
||||||
|
# assume this charset if line contains non-UTF-8 characters;
|
||||||
|
# it should be valid encoding (see Encoding::Supported(3pm) for list),
|
||||||
|
# for which encoding all byte sequences are valid, for example
|
||||||
|
# 'iso-8859-1' aka 'latin1' (it is decoded without checking, so it
|
||||||
|
# could be even 'utf-8' for the old behavior)
|
||||||
|
our $fallback_encoding = 'latin1';
|
||||||
|
|
||||||
# You define site-wide feature defaults here; override them with
|
# You define site-wide feature defaults here; override them with
|
||||||
# $GITWEB_CONFIG as necessary.
|
# $GITWEB_CONFIG as necessary.
|
||||||
our %feature = (
|
our %feature = (
|
||||||
@ -602,6 +609,20 @@ sub validate_refname {
|
|||||||
return $input;
|
return $input;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# decode sequences of octets in utf8 into Perl's internal form,
|
||||||
|
# which is utf-8 with utf8 flag set if needed. gitweb writes out
|
||||||
|
# in utf-8 thanks to "binmode STDOUT, ':utf8'" at beginning
|
||||||
|
sub to_utf8 {
|
||||||
|
my $str = shift;
|
||||||
|
my $res;
|
||||||
|
eval { $res = decode_utf8($str, Encode::FB_CROAK); };
|
||||||
|
if (defined $res) {
|
||||||
|
return $res;
|
||||||
|
} else {
|
||||||
|
return decode($fallback_encoding, $str, Encode::FB_DEFAULT);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
# quote unsafe chars, but keep the slash, even when it's not
|
# quote unsafe chars, but keep the slash, even when it's not
|
||||||
# correct, but quoted slashes look too horrible in bookmarks
|
# correct, but quoted slashes look too horrible in bookmarks
|
||||||
sub esc_param {
|
sub esc_param {
|
||||||
@ -626,7 +647,7 @@ sub esc_html ($;%) {
|
|||||||
my $str = shift;
|
my $str = shift;
|
||||||
my %opts = @_;
|
my %opts = @_;
|
||||||
|
|
||||||
$str = decode_utf8($str);
|
$str = to_utf8($str);
|
||||||
$str = $cgi->escapeHTML($str);
|
$str = $cgi->escapeHTML($str);
|
||||||
if ($opts{'-nbsp'}) {
|
if ($opts{'-nbsp'}) {
|
||||||
$str =~ s/ / /g;
|
$str =~ s/ / /g;
|
||||||
@ -640,7 +661,7 @@ sub esc_path {
|
|||||||
my $str = shift;
|
my $str = shift;
|
||||||
my %opts = @_;
|
my %opts = @_;
|
||||||
|
|
||||||
$str = decode_utf8($str);
|
$str = to_utf8($str);
|
||||||
$str = $cgi->escapeHTML($str);
|
$str = $cgi->escapeHTML($str);
|
||||||
if ($opts{'-nbsp'}) {
|
if ($opts{'-nbsp'}) {
|
||||||
$str =~ s/ / /g;
|
$str =~ s/ / /g;
|
||||||
@ -925,7 +946,7 @@ sub format_subject_html {
|
|||||||
|
|
||||||
if (length($short) < length($long)) {
|
if (length($short) < length($long)) {
|
||||||
return $cgi->a({-href => $href, -class => "list subject",
|
return $cgi->a({-href => $href, -class => "list subject",
|
||||||
-title => decode_utf8($long)},
|
-title => to_utf8($long)},
|
||||||
esc_html($short) . $extra);
|
esc_html($short) . $extra);
|
||||||
} else {
|
} else {
|
||||||
return $cgi->a({-href => $href, -class => "list subject"},
|
return $cgi->a({-href => $href, -class => "list subject"},
|
||||||
@ -1239,7 +1260,7 @@ sub git_get_projects_list {
|
|||||||
if (check_export_ok("$projectroot/$path")) {
|
if (check_export_ok("$projectroot/$path")) {
|
||||||
my $pr = {
|
my $pr = {
|
||||||
path => $path,
|
path => $path,
|
||||||
owner => decode_utf8($owner),
|
owner => to_utf8($owner),
|
||||||
};
|
};
|
||||||
push @list, $pr;
|
push @list, $pr;
|
||||||
(my $forks_path = $path) =~ s/\.git$//;
|
(my $forks_path = $path) =~ s/\.git$//;
|
||||||
@ -1269,7 +1290,7 @@ sub git_get_project_owner {
|
|||||||
$pr = unescape($pr);
|
$pr = unescape($pr);
|
||||||
$ow = unescape($ow);
|
$ow = unescape($ow);
|
||||||
if ($pr eq $project) {
|
if ($pr eq $project) {
|
||||||
$owner = decode_utf8($ow);
|
$owner = to_utf8($ow);
|
||||||
last;
|
last;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1759,7 +1780,7 @@ sub get_file_owner {
|
|||||||
}
|
}
|
||||||
my $owner = $gcos;
|
my $owner = $gcos;
|
||||||
$owner =~ s/[,;].*$//;
|
$owner =~ s/[,;].*$//;
|
||||||
return decode_utf8($owner);
|
return to_utf8($owner);
|
||||||
}
|
}
|
||||||
|
|
||||||
## ......................................................................
|
## ......................................................................
|
||||||
@ -1842,7 +1863,7 @@ sub git_header_html {
|
|||||||
|
|
||||||
my $title = "$site_name";
|
my $title = "$site_name";
|
||||||
if (defined $project) {
|
if (defined $project) {
|
||||||
$title .= " - " . decode_utf8($project);
|
$title .= " - " . to_utf8($project);
|
||||||
if (defined $action) {
|
if (defined $action) {
|
||||||
$title .= "/$action";
|
$title .= "/$action";
|
||||||
if (defined $file_name) {
|
if (defined $file_name) {
|
||||||
@ -2116,7 +2137,7 @@ sub git_print_page_path {
|
|||||||
|
|
||||||
print "<div class=\"page_path\">";
|
print "<div class=\"page_path\">";
|
||||||
print $cgi->a({-href => href(action=>"tree", hash_base=>$hb),
|
print $cgi->a({-href => href(action=>"tree", hash_base=>$hb),
|
||||||
-title => 'tree root'}, decode_utf8("[$project]"));
|
-title => 'tree root'}, to_utf8("[$project]"));
|
||||||
print " / ";
|
print " / ";
|
||||||
if (defined $name) {
|
if (defined $name) {
|
||||||
my @dirname = split '/', $name;
|
my @dirname = split '/', $name;
|
||||||
@ -2936,7 +2957,7 @@ sub git_project_list_body {
|
|||||||
($pr->{'age'}, $pr->{'age_string'}) = @aa;
|
($pr->{'age'}, $pr->{'age_string'}) = @aa;
|
||||||
if (!defined $pr->{'descr'}) {
|
if (!defined $pr->{'descr'}) {
|
||||||
my $descr = git_get_project_description($pr->{'path'}) || "";
|
my $descr = git_get_project_description($pr->{'path'}) || "";
|
||||||
$pr->{'descr_long'} = decode_utf8($descr);
|
$pr->{'descr_long'} = to_utf8($descr);
|
||||||
$pr->{'descr'} = chop_str($descr, 25, 5);
|
$pr->{'descr'} = chop_str($descr, 25, 5);
|
||||||
}
|
}
|
||||||
if (!defined $pr->{'owner'}) {
|
if (!defined $pr->{'owner'}) {
|
||||||
@ -3981,7 +4002,7 @@ sub git_snapshot {
|
|||||||
my $git = git_cmd_str();
|
my $git = git_cmd_str();
|
||||||
my $name = $project;
|
my $name = $project;
|
||||||
$name =~ s/\047/\047\\\047\047/g;
|
$name =~ s/\047/\047\\\047\047/g;
|
||||||
my $filename = decode_utf8(basename($project));
|
my $filename = to_utf8(basename($project));
|
||||||
my $cmd;
|
my $cmd;
|
||||||
if ($suffix eq 'zip') {
|
if ($suffix eq 'zip') {
|
||||||
$filename .= "-$hash.$suffix";
|
$filename .= "-$hash.$suffix";
|
||||||
|
@ -487,4 +487,32 @@ test_expect_success \
|
|||||||
'gitweb_run "p=.git;a=atom"'
|
'gitweb_run "p=.git;a=atom"'
|
||||||
test_debug 'cat gitweb.log'
|
test_debug 'cat gitweb.log'
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------
|
||||||
|
# encoding/decoding
|
||||||
|
|
||||||
|
test_expect_success \
|
||||||
|
'encode(commit): utf8' \
|
||||||
|
'. ../t3901-utf8.txt &&
|
||||||
|
echo "UTF-8" >> file &&
|
||||||
|
git add file &&
|
||||||
|
git commit -F ../t3900/1-UTF-8.txt &&
|
||||||
|
gitweb_run "p=.git;a=commit"'
|
||||||
|
test_debug 'cat gitweb.log'
|
||||||
|
|
||||||
|
test_expect_success \
|
||||||
|
'encode(commit): iso-8859-1' \
|
||||||
|
'. ../t3901-8859-1.txt &&
|
||||||
|
echo "ISO-8859-1" >> file &&
|
||||||
|
git add file &&
|
||||||
|
git config i18n.commitencoding ISO-8859-1 &&
|
||||||
|
git commit -F ../t3900/ISO-8859-1.txt &&
|
||||||
|
git config --unset i18n.commitencoding &&
|
||||||
|
gitweb_run "p=.git;a=commit"'
|
||||||
|
test_debug 'cat gitweb.log'
|
||||||
|
|
||||||
|
test_expect_success \
|
||||||
|
'encode(log): utf-8 and iso-8859-1' \
|
||||||
|
'gitweb_run "p=.git;a=log"'
|
||||||
|
test_debug 'cat gitweb.log'
|
||||||
|
|
||||||
test_done
|
test_done
|
||||||
|
Loading…
Reference in New Issue
Block a user