v2.11.1 release
[git-osx-installer.git] / patches / gitweb / q / gitweb-use-to_utf8-on-all-input-lines.diff
blobbef81765993ca905d0333592fcb739607e352fea
1 Subject: [PATCH] gitweb: use to_utf8 on all input lines
3 Git does not mandate a character set encoding in the data it
4 tracks. Therefore the output of any Git command that includes
5 user data could be in any encoding.
7 Just automatically marking it as UTF-8 with the ':utf8' option
8 is wrong and can lead to unexpected errors and/or misdisplayed
9 results.
11 Instead make sure every input line that may contain user-created
12 bytes passes through the to_utf8 function before being used.
14 Signed-off-by: Kyle J. McKay <mackyle@gmail.com>
15 ---
16 gitweb/gitweb.perl | 139 +++++++++++++++++++++++++++++++++++------------------
17 1 file changed, 93 insertions(+), 46 deletions(-)
19 diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl
20 index be7bf206..f144ab30 100755
21 --- a/gitweb/gitweb.perl
22 +++ b/gitweb/gitweb.perl
23 @@ -1501,6 +1501,7 @@ our $CGI = 'CGI';
24 our $cgi;
25 our $fcgi_mode = 0;
26 our $fcgi_nproc_active = 0;
27 +our $fcgi_raw_mode = 0;
28 sub is_fcgi {
29 use Errno;
30 my $stdinfno = fileno STDIN;
31 @@ -1512,7 +1513,33 @@ sub is_fcgi {
32 sub configure_as_fcgi {
33 return if $fcgi_mode;
35 + require FCGI;
36 require CGI::Fast;
38 + # We have gone to great effort to make sure that all incoming data has
39 + # been converted from whatever format it was in into UTF-8. We have
40 + # even taken care to make sure the output handle is in ':utf8' mode.
41 + # Now along comes FCGI and blows it with:
42 + #
43 + # Use of wide characters in FCGI::Stream::PRINT is deprecated
44 + # and will stop wprking[sic] in a future version of FCGI
45 + #
46 + # To fix this we replace FCGI::Stream::PRINT with our own routine that
47 + # first encodes everything and then calls the original routine, but
48 + # not if $fcgi_raw_mode is true (then we just call the original routine).
49 + #
50 + # Note that we could do this by using utf8::is_utf8 to check instead
51 + # of having a $fcgi_raw_mode global, but that would be slower to run
52 + # the test on each element and much slower than skipping the conversion
53 + # entirely when we know we're outputting raw bytes.
54 + my $orig = \&FCGI::Stream::PRINT;
55 + undef *FCGI::Stream::PRINT;
56 + *FCGI::Stream::PRINT = sub {
57 + @_ = (shift, map {my $x=$_; utf8::encode($x); $x} @_)
58 + unless $fcgi_raw_mode;
59 + goto $orig;
60 + };
62 our $CGI = 'CGI::Fast';
64 $fcgi_mode = 1;
65 @@ -1564,6 +1591,7 @@ sub _reset_globals {
66 our %config = ();
67 our $gitweb_project_owner = undef;
68 our $shown_stale_message = 0;
69 + our $fcgi_raw_mode = 0;
70 keys %known_snapshot_formats; # reset 'each' iterator
73 @@ -3041,7 +3069,7 @@ sub git_parse_project_config {
74 defined(my $fh = git_cmd_pipe "config", '-z', '-l')
75 or return;
77 - while (my $keyval = <$fh>) {
78 + while (my $keyval = to_utf8(scalar <$fh>)) {
79 chomp $keyval;
80 my ($key, $value) = split(/\n/, $keyval, 2);
82 @@ -3155,7 +3183,7 @@ sub git_get_hash_by_path {
84 defined(my $fd = git_cmd_pipe "ls-tree", $base, "--", $path)
85 or die_error(500, "Open git-ls-tree failed");
86 - my $line = <$fd>;
87 + my $line = to_utf8(scalar <$fd>);
88 close $fd or return undef;
90 if (!defined $line) {
91 @@ -3182,7 +3210,7 @@ sub git_get_path_by_hash {
93 defined(my $fd = git_cmd_pipe "ls-tree", '-r', '-t', '-z', $base)
94 or return undef;
95 - while (my $line = <$fd>) {
96 + while (my $line = to_utf8(scalar <$fd>)) {
97 chomp $line;
99 #'040000 tree 595596a6a9117ddba9fe379b6b012b558bac8423 gitweb'
100 @@ -3208,7 +3236,7 @@ sub git_get_file_or_project_config {
101 $git_dir = "$projectroot/$path";
102 open my $fd, '<', "$git_dir/$name"
103 or return git_get_project_config($name);
104 - my $conf = <$fd>;
105 + my $conf = to_utf8(scalar <$fd>);
106 close $fd;
107 if (defined $conf) {
108 chomp $conf;
109 @@ -3249,6 +3277,7 @@ sub git_get_project_ctags {
110 close $ct;
112 (my $ctag = $tagfile) =~ s#.*/##;
113 + $ctag = to_utf8($ctag);
114 if ($val =~ /^\d+$/) {
115 $ctags->{$ctag} = $val;
116 } else {
117 @@ -3258,7 +3287,7 @@ sub git_get_project_ctags {
118 closedir $dh;
120 } elsif (open my $fh, '<', "$git_dir/ctags") {
121 - while (my $line = <$fh>) {
122 + while (my $line = to_utf8(scalar <$fh>)) {
123 chomp $line;
124 $ctags->{$line}++ if $line;
126 @@ -3358,7 +3387,7 @@ sub git_get_project_url_list {
127 or return wantarray ?
128 @{ config_to_multi(git_get_project_config('url')) } :
129 config_to_multi(git_get_project_config('url'));
130 - my @git_project_url_list = map { chomp; $_ } <$fd>;
131 + my @git_project_url_list = map { chomp; to_utf8($_) } <$fd>;
132 close $fd;
134 return wantarray ? @git_project_url_list : \@git_project_url_list;
135 @@ -3658,7 +3687,7 @@ sub git_get_remotes_list {
137 my $fd = git_cmd_pipe 'remote', '-v';
138 return unless $fd;
139 - while (my $remote = <$fd>) {
140 + while (my $remote = to_utf8(scalar <$fd>)) {
141 chomp $remote;
142 $remote =~ s!\t(.*?)\s+\((\w+)\)$!!;
143 next if $wanted and not $remote eq $wanted;
144 @@ -3693,7 +3722,7 @@ sub git_get_references {
145 ($type ? ("--", "refs/$type") : ())) # use -- <pattern> if $type
146 or return;
148 - while (my $line = <$fd>) {
149 + while (my $line = to_utf8(scalar <$fd>)) {
150 chomp $line;
151 if ($line =~ m!^([0-9a-fA-F]{40})\srefs/($type.*)$!) {
152 if (defined $refs{$1}) {
153 @@ -3712,7 +3741,7 @@ sub git_get_rev_name_tags {
155 defined(my $fd = git_cmd_pipe "name-rev", "--tags", $hash)
156 or return;
157 - my $name_rev = <$fd>;
158 + my $name_rev = to_utf8(scalar <$fd>);
159 close $fd;
161 if ($name_rev =~ m|^$hash tags/(.*)$|) {
162 @@ -3767,7 +3796,7 @@ sub parse_tag {
164 defined(my $fd = git_cmd_pipe "cat-file", "tag", $tag_id) or return;
165 $tag{'id'} = $tag_id;
166 - while (my $line = <$fd>) {
167 + while (my $line = to_utf8(scalar <$fd>)) {
168 chomp $line;
169 if ($line =~ m/^object ([0-9a-fA-F]{40})$/) {
170 $tag{'object'} = $1;
171 @@ -3792,7 +3821,7 @@ sub parse_tag {
172 last;
175 - push @comment, <$fd>;
176 + push @comment, map(to_utf8($_), <$fd>);
177 $tag{'comment'} = \@comment;
178 close $fd or return;
179 if (!defined $tag{'name'}) {
180 @@ -3851,6 +3880,7 @@ sub parse_commit_text {
181 $co{'parents'} = \@parents;
182 $co{'parent'} = $parents[0];
184 + @commit_lines = map to_utf8($_), @commit_lines;
185 foreach my $title (@commit_lines) {
186 $title =~ s/^ //;
187 if ($title ne "") {
188 @@ -4094,7 +4124,7 @@ sub git_get_heads_list {
189 '--format=%(objectname) %(refname) %(subject)%00%(committer)',
190 @patterns)
191 or return;
192 - while (my $line = <$fd>) {
193 + while (my $line = to_utf8(scalar <$fd>)) {
194 my %ref_item;
196 chomp $line;
197 @@ -4142,7 +4172,7 @@ sub git_get_tags_list {
198 '%(*objectname) %(*objecttype) %(subject)%00%(creator)',
199 ($all ? 'refs' : 'refs/tags'))
200 or return;
201 - while (my $line = <$fd>) {
202 + while (my $line = to_utf8(scalar <$fd>)) {
203 my %ref_item;
205 chomp $line;
206 @@ -4203,7 +4233,9 @@ sub insert_file {
207 my $filename = shift;
209 open my $fd, '<', $filename;
210 - print map { to_utf8($_) } <$fd>;
211 + while (<$fd>) {
212 + print to_utf8($_);
214 close $fd;
217 @@ -4937,7 +4969,9 @@ sub git_print_section {
218 } elsif (ref($content) eq 'SCALAR') {
219 print esc_html($$content);
220 } elsif (ref($content) eq 'GLOB' or ref($content) eq 'IO::Handle') {
221 - print <$content>;
222 + while (<$content>) {
223 + print to_utf8($_);
225 } elsif (!ref($content) && defined($content)) {
226 print $content;
228 @@ -5109,7 +5143,7 @@ sub git_get_link_target {
229 or return;
231 local $/ = undef;
232 - $link_target = <$fd>;
233 + $link_target = to_utf8(scalar <$fd>);
235 close $fd
236 or return;
237 @@ -5848,7 +5882,7 @@ sub git_patchset_body {
238 print "<div class=\"patchset\">\n";
240 # skip to first patch
241 - while ($patch_line = <$fd>) {
242 + while ($patch_line = to_utf8(scalar <$fd>)) {
243 chomp $patch_line;
245 last if ($patch_line =~ m/^diff /);
246 @@ -5916,7 +5950,7 @@ sub git_patchset_body {
247 # print extended diff header
248 print "<div class=\"diff extended_header\">\n";
249 EXTENDED_HEADER:
250 - while ($patch_line = <$fd>) {
251 + while ($patch_line = to_utf8(scalar<$fd>)) {
252 chomp $patch_line;
254 last EXTENDED_HEADER if ($patch_line =~ m/^--- |^diff /);
255 @@ -5935,7 +5969,7 @@ sub git_patchset_body {
256 #assert($patch_line =~ m/^---/) if DEBUG;
258 my $last_patch_line = $patch_line;
259 - $patch_line = <$fd>;
260 + $patch_line = to_utf8(scalar <$fd>);
261 chomp $patch_line;
262 #assert($patch_line =~ m/^\+\+\+/) if DEBUG;
264 @@ -5945,7 +5979,7 @@ sub git_patchset_body {
266 # the patch itself
267 LINE:
268 - while ($patch_line = <$fd>) {
269 + while ($patch_line = to_utf8(scalar <$fd>)) {
270 chomp $patch_line;
272 next PATCH if ($patch_line =~ m/^diff /);
273 @@ -6900,7 +6934,7 @@ sub git_search_changes {
274 my $alternate = 1;
275 undef %co;
276 my @files;
277 - while (my $line = <$fd>) {
278 + while (my $line = to_utf8(scalar <$fd>)) {
279 chomp $line;
280 next unless $line;
282 @@ -6984,7 +7018,7 @@ sub git_search_files {
283 my $matches = 0;
284 my $lastfile = '';
285 my $file_href;
286 - while (my $line = <$fd>) {
287 + while (my $line = to_utf8(scalar <$fd>)) {
288 chomp $line;
289 my ($file, $lno, $ltext, $binary);
290 last if ($matches++ > 1000);
291 @@ -7421,7 +7455,6 @@ sub git_blame_common {
292 $hash_base, '--', $file_name)
293 or die_error(500, "Open git-blame --porcelain failed");
295 - binmode $fd, ':utf8';
297 # incremental blame data returns early
298 if ($format eq 'data') {
299 @@ -7429,8 +7462,8 @@ sub git_blame_common {
300 -type=>"text/plain", -charset => "utf-8",
301 -status=> "200 OK");
302 local $| = 1; # output autoflush
303 - while (my $line = <$fd>) {
304 - print to_utf8($line);
305 + while (<$fd>) {
306 + print to_utf8($_);
308 close $fd
309 or print "ERROR $!\n";
310 @@ -7508,7 +7541,7 @@ sub git_blame_common {
311 #contents of a file
312 my $linenr = 0;
313 LINE:
314 - while (my $line = <$fd>) {
315 + while (my $line = to_utf8(scalar <$fd>)) {
316 chomp $line;
317 $linenr++;
319 @@ -7527,7 +7560,7 @@ sub git_blame_common {
321 # blame data
322 LINE:
323 - while (my $line = <$fd>) {
324 + while (my $line = to_utf8(scalar <$fd>)) {
325 chomp $line;
326 # the header: <SHA-1> <src lineno> <dst lineno> [<lines in group>]
327 # no <lines in group> for subsequent lines in group of lines
328 @@ -7538,7 +7571,7 @@ sub git_blame_common {
330 my $meta = $metainfo{$full_rev};
331 my $data;
332 - while ($data = <$fd>) {
333 + while ($data = to_utf8(scalar <$fd>)) {
334 chomp $data;
335 last if ($data =~ s/^\t//); # contents of line
336 if ($data =~ /^(\S+)(?: (.*))?$/) {
337 @@ -7726,6 +7759,7 @@ sub git_blob_plain {
339 defined(my $fd = git_cmd_pipe "cat-file", "blob", $hash)
340 or die_error(500, "Open git-cat-file blob '$hash' failed");
341 + binmode($fd);
343 # content-type (can include charset)
344 $type = blob_contenttype($fd, $file_name, $type);
345 @@ -7762,10 +7796,14 @@ sub git_blob_plain {
346 -content_disposition =>
347 ($sandbox ? 'attachment' : 'inline')
348 . '; filename="' . $save_as . '"');
349 - local $/ = undef;
350 binmode STDOUT, ':raw';
351 - print <$fd>;
352 + $fcgi_raw_mode = 1;
353 + my $buf;
354 + while (read($fd, $buf, 32768)) {
355 + print $buf;
357 binmode STDOUT, ':utf8'; # as set at the beginning of gitweb.cgi
358 + $fcgi_raw_mode = 0;
359 close $fd;
362 @@ -7848,7 +7886,7 @@ sub git_blob {
363 qq!" />\n!;
364 } else {
365 my $nr;
366 - while (my $line = <$fd>) {
367 + while (my $line = to_utf8(scalar <$fd>)) {
368 chomp $line;
369 $nr++;
370 $line = untabify($line);
371 @@ -7885,7 +7923,7 @@ sub git_tree {
372 defined(my $fd = git_cmd_pipe "ls-tree", '-z',
373 ($show_sizes ? '-l' : ()), @extra_options, $hash)
374 or die_error(500, "Open git-ls-tree failed");
375 - @entries = map { chomp; $_ } <$fd>;
376 + @entries = map { chomp; to_utf8($_) } <$fd>;
377 close $fd
378 or die_error(404, "Reading tree failed");
380 @@ -8104,9 +8142,15 @@ sub git_snapshot {
382 defined(my $fd = cmd_pipe @cmd)
383 or die_error(500, "Execute git-archive failed");
384 + binmode($fd);
385 binmode STDOUT, ':raw';
386 - print <$fd>;
387 + $fcgi_raw_mode = 1;
388 + my $buf;
389 + while (read($fd, $buf, 32768)) {
390 + print $buf;
392 binmode STDOUT, ':utf8'; # as set at the beginning of gitweb.cgi
393 + $fcgi_raw_mode = 0;
394 close $fd;
397 @@ -8239,7 +8283,7 @@ sub git_commit {
398 (@$parents <= 1 ? $parent : '-c'),
399 $hash, "--")
400 or die_error(500, "Open git-diff-tree failed");
401 - @difftree = map { chomp; $_ } <$fd>;
402 + @difftree = map { chomp; to_utf8($_) } <$fd>;
403 close $fd or die_error(404, "Reading git-diff-tree failed");
405 # non-textual hash id's can be cached
406 @@ -8333,7 +8377,7 @@ sub git_object {
407 # here errors should not happen
408 defined(my $fd = git_cmd_pipe "ls-tree", $hash_base, "--", $file_name)
409 or die_error(500, "Open git-ls-tree failed");
410 - my $line = <$fd>;
411 + my $line = to_utf8(scalar <$fd>);
412 close $fd;
414 #'100644 blob 0fa3f3a66fb6a137f6ec2c19351ed4d807070ffa panic.c'
415 @@ -8370,7 +8414,7 @@ sub git_blobdiff {
416 $hash_parent_base, $hash_base,
417 "--", (defined $file_parent ? $file_parent : ()), $file_name)
418 or die_error(500, "Open git-diff-tree failed");
419 - @difftree = map { chomp; $_ } <$fd>;
420 + @difftree = map { chomp; to_utf8($_) } <$fd>;
421 close $fd
422 or die_error(404, "Reading git-diff-tree failed");
423 @difftree
424 @@ -8388,7 +8432,7 @@ sub git_blobdiff {
425 # ':100644 100644 03b21826... 3b93d5e7... M ls-files.c'
426 # $hash == to_id
427 grep { /^:[0-7]{6} [0-7]{6} [0-9a-fA-F]{40} $hash/ }
428 - map { chomp; $_ } <$fd>;
429 + map { chomp; to_utf8($_) } <$fd>;
430 close $fd
431 or die_error(404, "Reading git-diff-tree failed");
432 @difftree
433 @@ -8473,7 +8517,7 @@ sub git_blobdiff {
434 git_footer_html();
436 } else {
437 - while (my $line = <$fd>) {
438 + while (my $line = to_utf8(scalar <$fd>)) {
439 $line =~ s!a/($hash|$hash_parent)!'a/'.esc_path($diffinfo{'from_file'})!eg;
440 $line =~ s!b/($hash|$hash_parent)!'b/'.esc_path($diffinfo{'to_file'})!eg;
442 @@ -8481,8 +8525,9 @@ sub git_blobdiff {
444 last if $line =~ m!^\+\+\+!;
446 - local $/ = undef;
447 - print <$fd>;
448 + while (<$fd>) {
449 + print to_utf8($_);
451 close $fd;
454 @@ -8614,7 +8659,7 @@ sub git_commitdiff {
455 $hash_parent_param, $hash, "--")
456 or die_error(500, "Open git-diff-tree failed");
458 - while (my $line = <$fd>) {
459 + while (my $line = to_utf8(scalar <$fd>)) {
460 chomp $line;
461 # empty line ends raw part of diff-tree output
462 last unless $line;
463 @@ -8728,13 +8773,15 @@ sub git_commitdiff {
464 git_footer_html();
466 } elsif ($format eq 'plain') {
467 - local $/ = undef;
468 - print <$fd>;
469 + while (<$fd>) {
470 + print to_utf8($_);
472 close $fd
473 or print "Reading git-diff-tree failed\n";
474 } elsif ($format eq 'patch') {
475 - local $/ = undef;
476 - print <$fd>;
477 + while (<$fd>) {
478 + print to_utf8($_);
480 close $fd
481 or print "Reading git-format-patch failed\n";
483 @@ -9001,7 +9048,7 @@ XML
484 $co{'parent'} || "--root",
485 $co{'id'}, "--", (defined $file_name ? $file_name : ()))
486 or next;
487 - my @difftree = map { chomp; $_ } <$fd>;
488 + my @difftree = map { chomp; to_utf8($_) } <$fd>;
489 close $fd
490 or next;