2 # Copyright (C) all contributors <meta@public-inbox.org>
3 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
6 use PublicInbox::TestCommon;
7 use IO::Uncompress::Gunzip qw(gunzip);
10 use PublicInbox::Config;
11 use PublicInbox::MID qw(mids);
12 use autodie qw(kill rename);
13 require_mods qw(DBD::SQLite Xapian psgi);
14 use_ok 'PublicInbox::WWW';
15 my ($tmpdir, $for_destroy) = tmpdir();
16 my $enc_dup = 'ref-20150309094050.GO3427@x1.example';
18 my $dibx = create_inbox 'v2-dup', version => 2, indexlevel => 'medium',
19 tmpdir => "$tmpdir/dup", sub {
22 Date: Mon, 9 Mar 2015 09:40:50 +0000
26 Message-ID: <$enc_dup>
29 $im->add(PublicInbox::Eml->new($common.<<EOM)) or BAIL_OUT;
30 Content-Type: text/plain; charset=utf-8
31 Content-Disposition: inline
32 Content-Transfer-Encoding: 8bit
35 pipe \x{e2}\x{94}\x{82} or not
37 $im->add(PublicInbox::Eml->new($common.<<EOM)) or BAIL_OUT;
38 Content-Type: text/plain; charset="windows-1252"
39 Content-Transfer-Encoding: quoted-printable
46 my $eml = PublicInbox::Eml->new(<<'EOF');
47 From oldbug-pre-a0c07cba0e5d8b6a Fri Oct 2 00:00:00 1993
50 Subject: this is a subject
52 Date: Fri, 02 Oct 1993 00:00:00 +0000
53 Content-Type: text/plain; charset=iso-8859-1
58 my $ibx = create_inbox 'v2-1', version => 2, indexlevel => 'medium',
59 tmpdir => "$tmpdir/v2", sub {
61 $im->add($eml) or BAIL_OUT;
62 $eml->body_set("hello world!\n");
64 local $SIG{__WARN__} = sub { push @warn, @_ };
65 $eml->header_set(Date => 'Fri, 02 Oct 1993 00:01:00 +0000');
66 $im->add($eml) or BAIL_OUT;
67 is(scalar(@warn), 1, 'got one warning');
68 my $mids = mids($eml->header_obj);
69 $new_mid = $mids->[1];
70 open my $fh, '>', "$ibx->{inboxdir}/new_mid" or BAIL_OUT;
71 print $fh $new_mid or BAIL_OUT;
72 close $fh or BAIL_OUT;
75 open my $fh, '<', "$ibx->{inboxdir}/new_mid" or BAIL_OUT;
80 my $m2t = create_inbox 'mid2tid-1', version => 2, indexlevel => 'medium', sub {
83 $im->add(PublicInbox::Eml->new(<<EOM)) or xbail 'add';
84 Date: Fri, 02 Oct 1993 00:0$n:00 +0000
88 References: <a-mid\@b>
92 $im->add(PublicInbox::Eml->new(<<EOM)) or xbail 'add';
93 Date: Fri, 02 Oct 1993 00:0$n:00 +0000
95 Subject: unrelated tid $n
97 References: <b-mid\@b>
103 my $test_lei_q_threadid = sub {
106 lei_ok qw(q -f text --only), $u, qw(-T t@1 s:unrelated);
107 is $lei_out, '', 'no results on unrelated thread';
108 lei_ok qw(q -f text --only), $u, qw(-T t@1 dt:19931002000300..);
109 my @m = ($lei_out =~ m!^Message-ID: <([^>]+)>\n!gms);
110 is_deeply \@m, ['t@3'], 'got expected result from -T MSGID';
114 $test_lei_q_threadid->($m2t->{inboxdir});
116 my $cfgpath = "$ibx->{inboxdir}/pi_config";
118 open my $fh, '>', $cfgpath or BAIL_OUT $!;
119 print $fh <<EOF or BAIL_OUT $!;
120 [publicinbox "v2test"]
121 inboxdir = $ibx->{inboxdir}
122 address = $ibx->{-primary_address}
124 inboxdir = $dibx->{inboxdir}
125 address = $dibx->{-primary_address}
127 inboxdir = $m2t->{inboxdir}
128 address = $m2t->{-primary_address}
130 close $fh or BAIL_OUT;
133 my $msg = $ibx->msg_by_mid('a-mid@b');
134 like($$msg, qr/\AFrom oldbug/s,
135 '"From_" line stored to test old bug workaround');
136 my $cfg = PublicInbox::Config->new($cfgpath);
137 my $www = PublicInbox::WWW->new($cfg);
138 my ($res, $raw, @from_);
141 $res = $cb->(GET('/v2test/description'));
142 like($res->content, qr!\$INBOX_DIR/description missing!,
143 'got v2 description missing message');
144 $res = $cb->(GET('/v2test/a-mid@b/raw'));
145 is($res->header('Content-Type'), 'text/plain; charset=iso-8859-1',
146 'charset from message used');
147 $raw = $res->content;
148 unlike($raw, qr/^From oldbug/sm, 'buggy "From_" line omitted');
149 like($raw, qr/^hello world$/m, 'got first message');
150 like($raw, qr/^hello world!$/m, 'got second message');
151 @from_ = ($raw =~ m/^From /mg);
152 is(scalar(@from_), 2, 'two From_ lines');
154 $res = $cb->(GET("/v2test/$new_mid/raw"));
155 $raw = $res->content;
156 like($raw, qr/^hello world!$/m, 'second message with new Message-Id');
157 @from_ = ($raw =~ m/^From /mg);
158 is(scalar(@from_), 1, 'only one From_ line');
160 # Atom feed should sort by Date: (if Received is missing)
161 $res = $cb->(GET('/v2test/new.atom'));
162 my @bodies = ($res->content =~ />(hello [^<]+)</mg);
163 is_deeply(\@bodies, [ "hello world!\n", "hello world\n" ],
164 'Atom ordering is chronological');
166 # new.html should sort by Date:, too (if Received is missing)
167 $res = $cb->(GET('/v2test/new.html'));
168 @bodies = ($res->content =~ /^(hello [^<]+)$/mg);
169 is_deeply(\@bodies, [ "hello world!\n", "hello world\n" ],
170 'new.html ordering is chronological');
172 $res = $cb->(GET('/v2test/new.atom'));
173 my @dates = ($res->content =~ m!title><updated>([^<]+)</updated>!g);
174 is_deeply(\@dates, [ "1993-10-02T00:01:00Z", "1993-10-02T00:00:00Z" ],
175 'Date headers made it through');
177 test_psgi(sub { $www->call(@_) }, $client0);
178 my $env = { TMPDIR => $tmpdir, PI_CONFIG => $cfgpath };
179 test_httpd($env, $client0, 9);
181 $eml->header_set('Message-ID', 'a-mid@b');
182 $eml->body_set("hello ghosts\n");
183 my $im = $ibx->importer(0);
186 local $SIG{__WARN__} = sub { push @warn, @_ };
187 ok($im->add($eml), 'added 3rd duplicate-but-different message');
188 is(scalar(@warn), 1, 'got another warning');
189 like($warn[0], qr/mismatched/, 'warned about mismatched messages');
191 my $mids = mids($eml->header_obj);
192 my $third = $mids->[-1];
197 $res = $cb->(GET('/v2test/_/text/config/raw'));
198 my $lm = $res->header('Last-Modified');
199 ok($lm, 'Last-Modified set w/ ->mm');
200 $lm = HTTP::Date::str2time($lm);
201 is($lm, $ibx->mm->created_at,
202 'Last-Modified for text/config/raw matches ->created_at');
205 $res = $cb->(GET("/v2test/$third/raw"));
206 $raw = $res->content;
207 like($raw, qr/^hello ghosts$/m, 'got third message');
208 @from_ = ($raw =~ m/^From /mg);
209 is(scalar(@from_), 1, 'one From_ line');
211 $res = $cb->(GET('/v2test/a-mid@b/raw'));
212 $raw = $res->content;
213 like($raw, qr/^hello world$/m, 'got first message');
214 like($raw, qr/^hello world!$/m, 'got second message');
215 like($raw, qr/^hello ghosts$/m, 'got third message');
216 @from_ = ($raw =~ m/^From /mg);
217 is(scalar(@from_), 3, 'three From_ lines');
218 $cfg->each_inbox(sub { $_[0]->search->reopen });
221 my ($in, $out, $status);
222 my $req = GET('/v2test/a-mid@b/raw');
223 $req->header('Accept-Encoding' => 'gzip');
225 is($res->header('Content-Encoding'), 'gzip', 'gzip encoding');
227 gunzip(\$in => \$out);
228 is($out, $raw, 'gzip response matches');
230 $res = $cb->(GET('/v2test/a-mid@b/t.mbox.gz'));
232 $status = gunzip(\$in => \$out);
233 unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted');
234 like($out, qr/^hello world$/m, 'got first in t.mbox.gz');
235 like($out, qr/^hello world!$/m, 'got second in t.mbox.gz');
236 like($out, qr/^hello ghosts$/m, 'got third in t.mbox.gz');
237 @from_ = ($out =~ m/^From /mg);
238 is(scalar(@from_), 3, 'three From_ lines in t.mbox.gz');
241 $res = $cb->(POST('/v2test/?q=m:a-mid@b&x=m'));
243 $status = gunzip(\$in => \$out);
244 unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted');
245 like($out, qr/^hello world$/m, 'got first in mbox POST');
246 like($out, qr/^hello world!$/m, 'got second in mbox POST');
247 like($out, qr/^hello ghosts$/m, 'got third in mbox POST');
248 @from_ = ($out =~ m/^From /mg);
249 is(scalar(@from_), 3, 'three From_ lines in mbox POST');
251 # all.mbox.gz interface
252 $res = $cb->(GET('/v2test/all.mbox.gz'));
254 $status = gunzip(\$in => \$out);
255 unlike($out, qr/^From oldbug/sm, 'buggy "From_" line omitted');
256 like($out, qr/^hello world$/m, 'got first in all.mbox');
257 like($out, qr/^hello world!$/m, 'got second in all.mbox');
258 like($out, qr/^hello ghosts$/m, 'got third in all.mbox');
259 @from_ = ($out =~ m/^From /mg);
260 is(scalar(@from_), 3, 'three From_ lines in all.mbox');
263 $res = $cb->(GET('/v2test/?q=m:a-mid@b&x=t'));
264 is($res->code, 200, 'success with threaded search');
265 my $raw = $res->content;
266 ok($raw =~ s/\A.*>Results 1-3 of 3\b//s, 'got all results');
267 my @over = ($raw =~ m/\d{4}-\d+-\d+\s+\d+:\d+ +(?:\d+\% )?(.+)$/gm);
268 is_deeply(\@over, [ '<a', '` <a', '` <a' ], 'threaded messages show up');
270 $res = $cb->(GET('/v2test/?q=m:a-mid@b&x=A'));
271 is($res->code, 200, 'success with Atom search');
273 require_mods(qw(XML::TreePP), 2);
274 my $t = XML::TreePP->new->parse($res->content);
275 like($t->{feed}->{-xmlns}, qr/\bAtom\b/,
276 'looks like an an Atom feed');
277 is(scalar @{$t->{feed}->{entry}}, 3, 'parsed three entries');
280 local $SIG{__WARN__} = 'DEFAULT';
281 $res = $cb->(GET('/v2test/a-mid@b/'));
282 $raw = $res->content;
283 like($raw, qr/WARNING: multiple messages have this Message-ID/,
284 'warned about duplicate Message-IDs');
285 like($raw, qr/^hello world$/m, 'got first message');
286 like($raw, qr/^hello world!$/m, 'got second message');
287 like($raw, qr/^hello ghosts$/m, 'got third message');
288 @from_ = ($raw =~ m/>From: /mg);
289 is(scalar(@from_), 3, 'three From: lines');
290 foreach my $mid ('a-mid@b', $new_mid, $third) {
291 like($raw, qr!>\Q$mid\E</a>!s, "Message-ID $mid shown");
293 like($raw, qr/\b3\+ messages\b/, 'thread overview shown');
295 $res = $cb->(GET("/dup/$enc_dup/d/"));
296 is($res->code, 200, '/d/ (diff) endpoint works');
297 $raw = $res->content;
298 like($raw, qr!</span> cr_mismatch\n!s,
299 'cr_mismatch is only diff context');
300 like($raw, qr!>\-pipe !s, 'pipe diff del line');
301 like($raw, qr!>\+pipe !s, 'pipe diff ins line');
302 unlike $raw, qr/No newline at end of file/;
305 test_psgi(sub { $www->call(@_) }, $client1);
306 test_httpd($env, $client1, 38);
309 my $exp = [ qw(<a-mid@b> <reuse@mid>) ];
310 $eml->header_set('Message-Id', @$exp);
311 $eml->header_set('Subject', '4th dupe');
312 local $SIG{__WARN__} = sub {};
313 ok($im->add($eml), 'added one message');
315 my @h = $eml->header('Message-ID');
316 is_deeply($exp, \@h, 'reused existing Message-ID');
317 $cfg->each_inbox(sub { $_[0]->search->reopen });
322 my $res = $cb->(GET('/v2test/new.atom'));
323 my @ids = ($res->content =~ m!<id>urn:uuid:([^<]+)</id>!sg);
326 is_deeply([qw(1 1 1 1)], [values %ids], 'feed ids unique');
328 $res = $cb->(GET('/v2test/reuse@mid/T/'));
329 $raw = $res->content;
330 like($raw, qr/\b4\+ messages\b/, 'thread overview shown with /T/');
331 my @over = ($raw =~ m/^\d{4}-\d+-\d+\s+\d+:\d+ (.+)$/gm);
332 is_deeply(\@over, [ '<a', '` <a', '` <a', '` <a' ],
333 'duplicate messages share the same root');
335 $res = $cb->(GET('/v2test/reuse@mid/t/'));
336 $raw = $res->content;
337 like($raw, qr/\b4\+ messages\b/, 'thread overview shown with /t/');
339 $res = $cb->(GET('/v2test/0/info/refs'));
340 is($res->code, 200, 'got info refs for dumb clones');
341 $res = $cb->(GET('/v2test/0.git/info/refs'));
342 is($res->code, 200, 'got info refs for dumb clones w/ .git suffix');
343 $res = $cb->(GET('/v2test/info/refs'));
344 is($res->code, 404, 'v2 git URL w/o shard fails');
347 test_psgi(sub { $www->call(@_) }, $client2);
348 test_httpd($env, $client2, 8);
350 # ensure conflicted attachments can be resolved
351 local $SIG{__WARN__} = sub {};
352 foreach my $body (qw(old new)) {
353 $im->add(eml_load "t/psgi_v2-$body.eml") or BAIL_OUT;
357 $cfg->each_inbox(sub { $_[0]->search->reopen });
361 my $res = $cb->(GET('/v2test/a@dup/'));
362 my @links = ($res->content =~ m!"\.\./([^/]+/2-attach\.txt)\"!g);
363 is(scalar(@links), 2, 'both attachment links exist');
364 isnt($links[0], $links[1], 'attachment links are different');
366 my $old = $cb->(GET('/v2test/' . $links[0]));
367 my $new = $cb->(GET('/v2test/' . $links[1]));
368 is($old->content, 'old', 'got expected old content');
369 is($new->content, 'new', 'got expected new content');
371 $res = $cb->(GET('/v2test/?t=1970'.'01'.'01'.'000000'));
372 is($res->code, 404, '404 for out-of-range t= param');
374 local $SIG{__WARN__} = sub { push @warn, @_ };
375 $res = $cb->(GET('/v2test/?t=1970'.'01'.'01'));
376 is_deeply(\@warn, [], 'no warnings on YYYYMMDD only');
378 $res = $cb->(POST("/m2t/t\@1/?q=dt:19931002000300..&x=m"));
379 is($res->code, 200, 'got 200 on mid2tid query');
380 gunzip(\(my $in = $res->content) => \(my $out));
381 my @m = ($out =~ m!^Message-ID: <([^>]+)>\n!gms);
382 is_deeply(\@m, ['t@3'], 'only got latest result from query');
384 $res = $cb->(POST("/m2t/t\@1/?q=dt:19931002000400..&x=m"));
385 is($res->code, 404, '404 on out-of-range mid2tid query');
387 $res = $cb->(POST("/m2t/t\@1/?q=s:unrelated&x=m"));
388 is($res->code, 404, '404 on cross-thread search');
390 my $rmt = $ENV{PLACK_TEST_EXTERNALSERVER_URI};
391 $rmt and $test_lei_q_threadid->("$rmt/m2t/");
393 test_psgi(sub { $www->call(@_) }, $client3);
394 test_httpd($env, $client3, 4);
396 if ($^O eq 'linux' && -r "/proc/$$/stat") {
398 my $search_xh_pid = sub {
400 for my $f (glob('/proc/*/stat')) {
401 open my $fh, '<', $f or next;
402 my @s = split /\s+/, readline($fh) // next;
403 next if $s[3] ne $pid; # look for matching PPID
404 open $fh, '<', "/proc/$s[0]/cmdline" or next;
405 my $cmdline = readline($fh) // next;
406 if ($cmdline =~ /\0-MPublicInbox::XapHelper\0-e\0/ ||
407 $cmdline =~ m!/xap_helper\0!) {
413 my $usr1_test = sub {
415 my $td = $PublicInbox::TestCommon::CURRENT_DAEMON;
416 my $pid = $td->{pid};
417 my $res = $cb->(GET('/v2test/?q=m:a-mid@b'));
418 is $res->code, 200, '-httpd is running w/ search';
420 $search_xh_pid->($pid);
421 my $xh_pid = $search_xh_pid->($pid) or
422 BAIL_OUT "can't find XH pid with $args";
423 my $xh_err = readlink "/proc/$xh_pid/fd/2";
424 is $xh_err, "$env->{TMPDIR}/stderr.log",
425 "initial stderr expected ($args)";
426 rename "$env->{TMPDIR}/stderr.log",
427 "$env->{TMPDIR}/stderr.old";
428 $xh_err = readlink "/proc/$xh_pid/fd/2";
429 is $xh_err, "$env->{TMPDIR}/stderr.old",
430 "stderr followed rename ($args)";
433 $res = $cb->(GET('/v2test/?q=m:a-mid@b'));
434 is $res->code, 200, '-httpd still running w/ search';
435 my $new_xh_pid = $search_xh_pid->($pid) or
436 BAIL_OUT "can't find new XH pid with $args";
437 is $new_xh_pid, $xh_pid, "XH pid unchanged ($args)";
438 $xh_err = readlink "/proc/$new_xh_pid/fd/2";
439 is $xh_err, "$env->{TMPDIR}/stderr.log",
440 "stderr updated ($args)";
442 for my $x ('-X0', '-X1', '-X0 -W1', '-X1 -W1') {
444 local $ENV{TEST_DAEMON_XH} = $args;
445 test_httpd($env, $usr1_test);