1 Subject: [PATCH] gitweb: use to_utf8 on all input lines
3 Git does not mandate a character set encoding in the data it
4 tracks. Therefore the output of any Git command that includes
5 user data could be in any encoding.
7 Just automatically marking it as UTF-8 with the ':utf8' option
8 is wrong and can lead to unexpected errors and/or misdisplayed
11 Instead make sure every input line that may contain user-created
12 bytes passes through the to_utf8 function before being used.
14 Signed-off-by: Kyle J. McKay <mackyle@gmail.com>
16 gitweb/gitweb.perl | 139 +++++++++++++++++++++++++++++++++++------------------
17 1 file changed, 93 insertions(+), 46 deletions(-)
19 diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
20 index be7bf206..f144ab30 100755
21 --- a/gitweb/gitweb.perl
22 +++ b/gitweb/gitweb.perl
23 @@ -1501,6 +1501,7 @@ our $CGI = 'CGI';
26 our $fcgi_nproc_active = 0;
27 +our $fcgi_raw_mode = 0;
30 my $stdinfno = fileno STDIN;
31 @@ -1512,7 +1513,33 @@ sub is_fcgi {
32 sub configure_as_fcgi {
38 + # We have gone to great effort to make sure that all incoming data has
39 + # been converted from whatever format it was in into UTF-8. We have
40 + # even taken care to make sure the output handle is in ':utf8' mode.
41 + # Now along comes FCGI and blows it with:
43 + # Use of wide characters in FCGI::Stream::PRINT is deprecated
44 + # and will stop wprking[sic] in a future version of FCGI
46 + # To fix this we replace FCGI::Stream::PRINT with our own routine that
47 + # first encodes everything and then calls the original routine, but
48 + # not if $fcgi_raw_mode is true (then we just call the original routine).
50 + # Note that we could do this by using utf8::is_utf8 to check instead
51 + # of having a $fcgi_raw_mode global, but that would be slower to run
52 + # the test on each element and much slower than skipping the conversion
53 + # entirely when we know we're outputting raw bytes.
54 + my $orig = \&FCGI::Stream::PRINT;
55 + undef *FCGI::Stream::PRINT;
56 + *FCGI::Stream::PRINT = sub {
57 + @_ = (shift, map {my $x=$_; utf8::encode($x); $x} @_)
58 + unless $fcgi_raw_mode;
62 our $CGI = 'CGI::Fast';
65 @@ -1564,6 +1591,7 @@ sub _reset_globals {
67 our $gitweb_project_owner = undef;
68 our $shown_stale_message = 0;
69 + our $fcgi_raw_mode = 0;
70 keys %known_snapshot_formats; # reset 'each' iterator
73 @@ -3041,7 +3069,7 @@ sub git_parse_project_config {
74 defined(my $fh = git_cmd_pipe "config", '-z', '-l')
77 - while (my $keyval = <$fh>) {
78 + while (my $keyval = to_utf8(scalar <$fh>)) {
80 my ($key, $value) = split(/\n/, $keyval, 2);
82 @@ -3155,7 +3183,7 @@ sub git_get_hash_by_path {
84 defined(my $fd = git_cmd_pipe "ls-tree", $base, "--", $path)
85 or die_error(500, "Open git-ls-tree failed");
87 + my $line = to_utf8(scalar <$fd>);
88 close $fd or return undef;
91 @@ -3182,7 +3210,7 @@ sub git_get_path_by_hash {
93 defined(my $fd = git_cmd_pipe "ls-tree", '-r', '-t', '-z', $base)
95 - while (my $line = <$fd>) {
96 + while (my $line = to_utf8(scalar <$fd>)) {
99 #'040000 tree 595596a6a9117ddba9fe379b6b012b558bac8423 gitweb'
100 @@ -3208,7 +3236,7 @@ sub git_get_file_or_project_config {
101 $git_dir = "$projectroot/$path";
102 open my $fd, '<', "$git_dir/$name"
103 or return git_get_project_config($name);
105 + my $conf = to_utf8(scalar <$fd>);
109 @@ -3249,6 +3277,7 @@ sub git_get_project_ctags {
112 (my $ctag = $tagfile) =~ s#.*/##;
113 + $ctag = to_utf8($ctag);
114 if ($val =~ /^\d+$/) {
115 $ctags->{$ctag} = $val;
117 @@ -3258,7 +3287,7 @@ sub git_get_project_ctags {
120 } elsif (open my $fh, '<', "$git_dir/ctags") {
121 - while (my $line = <$fh>) {
122 + while (my $line = to_utf8(scalar <$fh>)) {
124 $ctags->{$line}++ if $line;
126 @@ -3358,7 +3387,7 @@ sub git_get_project_url_list {
127 or return wantarray ?
128 @{ config_to_multi(git_get_project_config('url')) } :
129 config_to_multi(git_get_project_config('url'));
130 - my @git_project_url_list = map { chomp; $_ } <$fd>;
131 + my @git_project_url_list = map { chomp; to_utf8($_) } <$fd>;
134 return wantarray ? @git_project_url_list : \@git_project_url_list;
135 @@ -3658,7 +3687,7 @@ sub git_get_remotes_list {
137 my $fd = git_cmd_pipe 'remote', '-v';
139 - while (my $remote = <$fd>) {
140 + while (my $remote = to_utf8(scalar <$fd>)) {
142 $remote =~ s!\t(.*?)\s+\((\w+)\)$!!;
143 next if $wanted and not $remote eq $wanted;
144 @@ -3693,7 +3722,7 @@ sub git_get_references {
145 ($type ? ("--", "refs/$type") : ())) # use -- <pattern> if $type
148 - while (my $line = <$fd>) {
149 + while (my $line = to_utf8(scalar <$fd>)) {
151 if ($line =~ m!^([0-9a-fA-F]{40})\srefs/($type.*)$!) {
152 if (defined $refs{$1}) {
153 @@ -3712,7 +3741,7 @@ sub git_get_rev_name_tags {
155 defined(my $fd = git_cmd_pipe "name-rev", "--tags", $hash)
157 - my $name_rev = <$fd>;
158 + my $name_rev = to_utf8(scalar <$fd>);
161 if ($name_rev =~ m|^$hash tags/(.*)$|) {
162 @@ -3767,7 +3796,7 @@ sub parse_tag {
164 defined(my $fd = git_cmd_pipe "cat-file", "tag", $tag_id) or return;
165 $tag{'id'} = $tag_id;
166 - while (my $line = <$fd>) {
167 + while (my $line = to_utf8(scalar <$fd>)) {
169 if ($line =~ m/^object ([0-9a-fA-F]{40})$/) {
171 @@ -3792,7 +3821,7 @@ sub parse_tag {
175 - push @comment, <$fd>;
176 + push @comment, map(to_utf8($_), <$fd>);
177 $tag{'comment'} = \@comment;
179 if (!defined $tag{'name'}) {
180 @@ -3851,6 +3880,7 @@ sub parse_commit_text {
181 $co{'parents'} = \@parents;
182 $co{'parent'} = $parents[0];
184 + @commit_lines = map to_utf8($_), @commit_lines;
185 foreach my $title (@commit_lines) {
188 @@ -4094,7 +4124,7 @@ sub git_get_heads_list {
189 '--format=%(objectname) %(refname) %(subject)%00%(committer)',
192 - while (my $line = <$fd>) {
193 + while (my $line = to_utf8(scalar <$fd>)) {
197 @@ -4142,7 +4172,7 @@ sub git_get_tags_list {
198 '%(*objectname) %(*objecttype) %(subject)%00%(creator)',
199 ($all ? 'refs' : 'refs/tags'))
201 - while (my $line = <$fd>) {
202 + while (my $line = to_utf8(scalar <$fd>)) {
206 @@ -4203,7 +4233,9 @@ sub insert_file {
207 my $filename = shift;
209 open my $fd, '<', $filename;
210 - print map { to_utf8($_) } <$fd>;
217 @@ -4937,7 +4969,9 @@ sub git_print_section {
218 } elsif (ref($content) eq 'SCALAR') {
219 print esc_html($$content);
220 } elsif (ref($content) eq 'GLOB' or ref($content) eq 'IO::Handle') {
222 + while (<$content>) {
225 } elsif (!ref($content) && defined($content)) {
228 @@ -5109,7 +5143,7 @@ sub git_get_link_target {
232 - $link_target = <$fd>;
233 + $link_target = to_utf8(scalar <$fd>);
237 @@ -5848,7 +5882,7 @@ sub git_patchset_body {
238 print "<div class=\"patchset\">\n";
240 # skip to first patch
241 - while ($patch_line = <$fd>) {
242 + while ($patch_line = to_utf8(scalar <$fd>)) {
245 last if ($patch_line =~ m/^diff /);
246 @@ -5916,7 +5950,7 @@ sub git_patchset_body {
247 # print extended diff header
248 print "<div class=\"diff extended_header\">\n";
250 - while ($patch_line = <$fd>) {
251 + while ($patch_line = to_utf8(scalar<$fd>)) {
254 last EXTENDED_HEADER if ($patch_line =~ m/^--- |^diff /);
255 @@ -5935,7 +5969,7 @@ sub git_patchset_body {
256 #assert($patch_line =~ m/^---/) if DEBUG;
258 my $last_patch_line = $patch_line;
259 - $patch_line = <$fd>;
260 + $patch_line = to_utf8(scalar <$fd>);
262 #assert($patch_line =~ m/^\+\+\+/) if DEBUG;
264 @@ -5945,7 +5979,7 @@ sub git_patchset_body {
268 - while ($patch_line = <$fd>) {
269 + while ($patch_line = to_utf8(scalar <$fd>)) {
272 next PATCH if ($patch_line =~ m/^diff /);
273 @@ -6900,7 +6934,7 @@ sub git_search_changes {
277 - while (my $line = <$fd>) {
278 + while (my $line = to_utf8(scalar <$fd>)) {
282 @@ -6984,7 +7018,7 @@ sub git_search_files {
286 - while (my $line = <$fd>) {
287 + while (my $line = to_utf8(scalar <$fd>)) {
289 my ($file, $lno, $ltext, $binary);
290 last if ($matches++ > 1000);
291 @@ -7421,7 +7455,6 @@ sub git_blame_common {
292 $hash_base, '--', $file_name)
293 or die_error(500, "Open git-blame --porcelain failed");
295 - binmode $fd, ':utf8';
297 # incremental blame data returns early
298 if ($format eq 'data') {
299 @@ -7429,8 +7462,8 @@ sub git_blame_common {
300 -type=>"text/plain", -charset => "utf-8",
302 local $| = 1; # output autoflush
303 - while (my $line = <$fd>) {
304 - print to_utf8($line);
309 or print "ERROR $!\n";
310 @@ -7508,7 +7541,7 @@ sub git_blame_common {
314 - while (my $line = <$fd>) {
315 + while (my $line = to_utf8(scalar <$fd>)) {
319 @@ -7527,7 +7560,7 @@ sub git_blame_common {
323 - while (my $line = <$fd>) {
324 + while (my $line = to_utf8(scalar <$fd>)) {
326 # the header: <SHA-1> <src lineno> <dst lineno> [<lines in group>]
327 # no <lines in group> for subsequent lines in group of lines
328 @@ -7538,7 +7571,7 @@ sub git_blame_common {
330 my $meta = $metainfo{$full_rev};
332 - while ($data = <$fd>) {
333 + while ($data = to_utf8(scalar <$fd>)) {
335 last if ($data =~ s/^\t//); # contents of line
336 if ($data =~ /^(\S+)(?: (.*))?$/) {
337 @@ -7726,6 +7759,7 @@ sub git_blob_plain {
339 defined(my $fd = git_cmd_pipe "cat-file", "blob", $hash)
340 or die_error(500, "Open git-cat-file blob '$hash' failed");
343 # content-type (can include charset)
344 $type = blob_contenttype($fd, $file_name, $type);
345 @@ -7762,10 +7796,14 @@ sub git_blob_plain {
346 -content_disposition =>
347 ($sandbox ? 'attachment' : 'inline')
348 . '; filename="' . $save_as . '"');
350 binmode STDOUT, ':raw';
352 + $fcgi_raw_mode = 1;
354 + while (read($fd, $buf, 32768)) {
357 binmode STDOUT, ':utf8'; # as set at the beginning of gitweb.cgi
358 + $fcgi_raw_mode = 0;
362 @@ -7848,7 +7886,7 @@ sub git_blob {
366 - while (my $line = <$fd>) {
367 + while (my $line = to_utf8(scalar <$fd>)) {
370 $line = untabify($line);
371 @@ -7885,7 +7923,7 @@ sub git_tree {
372 defined(my $fd = git_cmd_pipe "ls-tree", '-z',
373 ($show_sizes ? '-l' : ()), @extra_options, $hash)
374 or die_error(500, "Open git-ls-tree failed");
375 - @entries = map { chomp; $_ } <$fd>;
376 + @entries = map { chomp; to_utf8($_) } <$fd>;
378 or die_error(404, "Reading tree failed");
380 @@ -8104,9 +8142,15 @@ sub git_snapshot {
382 defined(my $fd = cmd_pipe @cmd)
383 or die_error(500, "Execute git-archive failed");
385 binmode STDOUT, ':raw';
387 + $fcgi_raw_mode = 1;
389 + while (read($fd, $buf, 32768)) {
392 binmode STDOUT, ':utf8'; # as set at the beginning of gitweb.cgi
393 + $fcgi_raw_mode = 0;
397 @@ -8239,7 +8283,7 @@ sub git_commit {
398 (@$parents <= 1 ? $parent : '-c'),
400 or die_error(500, "Open git-diff-tree failed");
401 - @difftree = map { chomp; $_ } <$fd>;
402 + @difftree = map { chomp; to_utf8($_) } <$fd>;
403 close $fd or die_error(404, "Reading git-diff-tree failed");
405 # non-textual hash id's can be cached
406 @@ -8333,7 +8377,7 @@ sub git_object {
407 # here errors should not happen
408 defined(my $fd = git_cmd_pipe "ls-tree", $hash_base, "--", $file_name)
409 or die_error(500, "Open git-ls-tree failed");
411 + my $line = to_utf8(scalar <$fd>);
414 #'100644 blob 0fa3f3a66fb6a137f6ec2c19351ed4d807070ffa panic.c'
415 @@ -8370,7 +8414,7 @@ sub git_blobdiff {
416 $hash_parent_base, $hash_base,
417 "--", (defined $file_parent ? $file_parent : ()), $file_name)
418 or die_error(500, "Open git-diff-tree failed");
419 - @difftree = map { chomp; $_ } <$fd>;
420 + @difftree = map { chomp; to_utf8($_) } <$fd>;
422 or die_error(404, "Reading git-diff-tree failed");
424 @@ -8388,7 +8432,7 @@ sub git_blobdiff {
425 # ':100644 100644 03b21826... 3b93d5e7... M ls-files.c'
427 grep { /^:[0-7]{6} [0-7]{6} [0-9a-fA-F]{40} $hash/ }
428 - map { chomp; $_ } <$fd>;
429 + map { chomp; to_utf8($_) } <$fd>;
431 or die_error(404, "Reading git-diff-tree failed");
433 @@ -8473,7 +8517,7 @@ sub git_blobdiff {
437 - while (my $line = <$fd>) {
438 + while (my $line = to_utf8(scalar <$fd>)) {
439 $line =~ s!a/($hash|$hash_parent)!'a/'.esc_path($diffinfo{'from_file'})!eg;
440 $line =~ s!b/($hash|$hash_parent)!'b/'.esc_path($diffinfo{'to_file'})!eg;
442 @@ -8481,8 +8525,9 @@ sub git_blobdiff {
444 last if $line =~ m!^\+\+\+!;
454 @@ -8614,7 +8659,7 @@ sub git_commitdiff {
455 $hash_parent_param, $hash, "--")
456 or die_error(500, "Open git-diff-tree failed");
458 - while (my $line = <$fd>) {
459 + while (my $line = to_utf8(scalar <$fd>)) {
461 # empty line ends raw part of diff-tree output
463 @@ -8728,13 +8773,15 @@ sub git_commitdiff {
466 } elsif ($format eq 'plain') {
473 or print "Reading git-diff-tree failed\n";
474 } elsif ($format eq 'patch') {
481 or print "Reading git-format-patch failed\n";
483 @@ -9001,7 +9048,7 @@ XML
484 $co{'parent'} || "--root",
485 $co{'id'}, "--", (defined $file_name ? $file_name : ()))
487 - my @difftree = map { chomp; $_ } <$fd>;
488 + my @difftree = map { chomp; to_utf8($_) } <$fd>;