Merge branch 'master' of https://github.com/solgenomics/sgn
[sgn.git] / cgi-bin / bulk / display.pl
blob7944733d3d35161d80c2259c086c8be1a54dcc30
1 #!/usr/bin/perl
3 =head1 NAME
5 /bulk/display.pl
7 =head1 DESCRIPTION
9 This perl script is used on the bulk download page. It determines the format
10 of the data given back to the user (submitted to download.pl). This includes
11 the format of the the html pages that display the data as well as determining
12 the fasta format and the text format.
14 =cut
16 use strict;
17 use warnings;
18 use CatalystX::GlobalContext '$c';
20 bulk_display->new($c)->display_page;
22 package bulk_display;
23 use CGI ();
24 use CXGN::Page;
25 use CXGN::Page::FormattingHelpers qw/ html_break_string/;
27 =head2 new
29 Desc: sub new
30 Args: default
31 Ret : $args, $class;
33 When implemented creates a new display object that the methods in the display
34 package can be called on.
36 =cut
38 sub new {
39 my $class = shift;
40 my $c = shift;
41 my $args = {};
43 # define some constants
44 $args->{pagesize} = 50;
45 $args->{content} = "";
46 $args->{tempdir} = $c->path_to( $c->tempfiles_subdir('bulk') );
48 # get cgi arguments
49 $args->{cgi} = my $cgi = CGI->new;
50 $args->{dumpfile} = $cgi->param("dumpfile");
51 $args->{page_number} = $cgi->param("page_number");
52 $args->{outputType} = $cgi->param("outputType");
53 $args->{seq_type} = $cgi->param("seq_type");
54 $args->{idType} = $cgi->param("idType");
55 $args->{summary} = $cgi->param("summary");
56 $args->{download} = $cgi->param("download");
57 $args->{page} = CXGN::Page->new( "Browse Bulk Results", "Lukas Mueller" );
59 return bless $args, $class;
62 =head2 display_page
64 Desc: sub display_page
65 Args: default
66 Ret : page
68 Calls summary page to be displayed. Determines what page to render depending
69 on the outout type the user selects.
71 =cut
73 sub display_page() {
74 my $self = shift;
76 # if summary switch is set, display summary page
77 if ( $self->{summary} ) { $self->display_summary_page(); }
79 elsif ( $self->{outputType} =~ /html/i ) {
80 $self->render_html_table_page();
82 elsif ( $self->{outputType} =~ /text/i ) {
83 $self->render_text_page();
85 elsif ( $self->{outputType} =~ /fasta/i ) {
86 $self->render_fasta_page();
88 elsif ( $self->{outputType} =~ /notfound/i ) {
89 $self->display_ids_notfound();
91 else { $self->render_html_table_page(); }
94 =head2 display_summary_page
96 Desc: sub display_summary_page
97 Args: default
98 Ret : n/a
100 Opens temp file created in download.pl then opens a filehandle and prints the
101 data to that file.
103 =cut
105 sub display_summary_page {
106 my $self = shift;
107 my $file = $self->{tempdir} . "/" . $self->{dumpfile} . ".summary";
108 open( F, "<$file" ) || $self->{page}->error_page("can't open $file");
109 $self->{page}->header();
110 while (<F>) {
111 print $_;
113 $self->{page}->footer();
116 =head2 render_html_page
118 Desc: sub render_html_page
119 Args: default
120 Ret : n/a
122 Creates html page in style specified by object that calls the method.
124 =cut
126 sub render_html_page {
127 my $self = shift;
128 my $style = shift;
129 if ( $style =~ /HTML/i || !defined($style) ) {
130 $self->render_html_table_page();
132 if ( $style =~ /tree/i ) { $self->render_html_tree_page(); }
135 =head2 render_html_table_page
137 Desc: sub render_html_table_page
138 Args: default
139 Ret : n/a
141 Creates the html table that will contain the data on the page. Prints page
142 number and links above and below the table. Also determines the format of text
143 that will be place in the table (by download.pl). For example, sequences and
144 quality values are displayed in a smaller font and appear 60 per line (value
145 can be adjusted).
147 =cut
149 sub render_html_table_page {
150 my $self = shift;
152 $self->{page}->header("Bulk download results");
155 # open the file
157 if ( !exists( $self->{page_number} ) || ( $self->{page_number} == 0 ) ) {
158 $self->{page_number} = 1;
160 if ( !exists( $self->{page_size} ) ) { $self->{page_size} = 50; }
162 $self->{debug} = 0;
163 my $line_count =
164 $self->getFileLines( $self->{tempdir} . "/" . $self->{dumpfile} );
165 $self->{line_count} = $line_count;
166 $self->debug("Line Count in the file: $line_count");
167 if ( $line_count < 2 ) {
168 $self->{content} .=
169 "No data was retrieved. Please verify your input parameters. Thanks.<br /><br />\n";
171 else {
172 open( F, "<" . $self->{tempdir} . "/" . $self->{dumpfile} )
173 || $self->{page}->error_page("Can't open $self->{dumpfile}");
176 # read the column definitions
178 my @output_fields;
179 my $defs = <F>;
180 if ($defs) { chomp($defs); @output_fields = split /\t/, $defs; }
181 $self->debug( "column definitions: " . ( join " ", @output_fields ) );
183 # define the links
184 my %links = (
185 clone_name =>
186 "/search/est.pl?request_type=10&amp;search=Search&amp;request_id=",
187 SGN_U => "/search/unigene.pl?unigene_id=",
188 converted_id => "/search/unigene.pl?unigene_id=",
190 $self->{links} = \%links;
193 # read in the required page
195 my $line = 0;
196 my @data = ();
197 $self->buttons();
198 $self->{content} .= "<table summary=\"\" border=\"1\">\n";
200 # print table header
201 $self->{content} .=
202 "<tr><td>line</td><td>"
203 . ( join "</td><td>", @output_fields )
204 . "</td></tr>";
206 while (<F>) {
207 chomp;
208 $line++;
209 if ( $line >=
210 ( ( ( $self->{page_number} - 1 ) * $self->{page_size} ) + 1 )
211 && (
212 $line <= ( ( $self->{page_number} ) * $self->{page_size} ) )
215 my @fields = split /\t/;
216 my %row;
217 for ( my $i = 0 ; $i < @fields ; $i++ ) {
218 $row{ $output_fields[$i] } = $fields[$i];
221 # format the sequence data for output to browser.
222 # number of letters in sequence or qual to wrap on
223 my $breakspace_num = 60;
225 $row{est_seq} =
226 html_break_string( $row{est_seq}, $breakspace_num );
227 $row{est_seq} =
228 "<span class=\"sequence\" style=\"font-size: smaller;\"> $row{est_seq}</span>";
230 $row{unigene_seq} =
231 html_break_string( $row{unigene_seq}, $breakspace_num );
232 $row{unigene_seq} =
233 "<span class=\"sequence\" style=\"font-size: smaller;\"> $row{unigene_seq}</span>";
235 $row{protein_seq} =
236 html_break_string( $row{protein_seq}, $breakspace_num );
237 $row{protein_seq} =
238 "<span class=\"sequence\" style=\"font-size: smaller;\"> $row{protein_seq}</span>";
239 $row{estscan_seq} =
240 html_break_string( $row{estscan_seq}, $breakspace_num );
241 $row{estscan_seq} =
242 "<span class=\"sequence\" style=\"font-size: smaller;\"> $row{estscan_seq}</span>";
243 $row{longest6frame_seq} =
244 html_break_string( $row{longest6frame_seq}, $breakspace_num );
245 $row{longest6frame_seq} =
246 "<span class=\"sequence\" style=\"font-size: smaller;\"> $row{longest6frame_seq}</span>";
247 $row{preferred_protein_seq} =
248 html_break_string( $row{preferred_protein_seq},
249 $breakspace_num );
250 $row{preferred_protein_seq} =
251 "<span class=\"sequence\" style=\"font-size: smaller;\"> $row{preferred_protein_seq}</span>";
253 $row{bac_end_sequence} =
254 html_break_string( $row{bac_end_sequence}, $breakspace_num );
255 $row{bac_end_sequence} =
256 "<span class=\"sequence\" style=\"font-size: smaller;\"> $row{bac_end_sequence}</span>";
258 my $qual = $row{qual_value_seq};
259 my @qual = split /\s+/, $qual;
260 $row{qual_value_seq} = "";
261 s/^(\d)$/&nbsp;$1/ foreach (@qual);
262 while ( my @a = splice( @qual, 0, $breakspace_num ) ) {
263 $row{qual_value_seq} .= join( "&nbsp;", @a ) . "<br />";
265 $row{qual_value_seq} =
266 "<span class=\"sequence\" style=\"font-size: smaller;\"> $row{qual_value_seq}</span>";
268 my @output;
271 # cycle through @output_fields and find the corresponding hash elements
273 $self->{content} .= "<tr><td>$line</td>\n";
274 foreach my $f (@output_fields) {
276 #$self-> debug("outputting $row{$f}...");
277 if ( !exists( $row{$f} ) || $row{$f} eq undef ) {
278 $row{$f} = "N.A.";
282 # add links as required. Links for each output field are stored in the %links hash.
284 if ( exists( $links{$f} ) && $row{$f} ne "N.A." ) {
285 $row{$f} =
286 "<a href=\"$links{$f}$row{$f}\">$row{$f}</a>";
288 if ( $f eq "clone_id" ) {
289 $self->{content} .= "<td>$row{$f}</td>";
291 else {
292 $self->{content} .= "<td>$row{$f}</td>";
295 #push @output, $row{$f};
297 $self->{content} .= "</tr>\n";
299 #$self->{content} .= "<tr><td>".(join "</td><td>", @output) . "</td></tr>";
304 $self->{content} .= "</table><!-- dump info -->\n";
305 $self->buttons();
310 # output to browser
312 print $self->{content};
313 $self->{page}->footer();
314 close(F);
317 =head2 render_fasta
319 Desc: sub render_fasta
320 Args: default
321 Ret : n/a
323 Determines the format of the fasta page (in a similar way as
324 render_html_table_page). No trailing spaces or new lines should be present
325 after this subroutine is called for fasta pages.
327 =cut
329 sub render_fasta_page {
330 my $self = shift;
332 # print header
334 if ( $self->{download} ) {
335 print
336 "Pragma: \"no-cache\"\nContent-Disposition: filename=sequences.fasta\nContent-type: application/data\n\n";
338 else {
339 print "Content-type: text/plain\n\n";
341 open( F, "<" . $self->{tempdir} . "/" . $self->{dumpfile} )
342 || $self->{page}->error_page( "Can't open " . $self->{dumpfile} );
344 # read column definitions
345 my @output_fields;
346 my $defs = <F>;
347 if ($defs) { chomp($defs); @output_fields = split /\t/, $defs; }
349 while (<F>) {
350 chomp;
351 my @f = split /\t/;
352 my %data = ();
354 #convert to hash
355 for ( my $i = 0 ; $i < @output_fields ; $i++ ) {
356 $data{ $output_fields[$i] } = $f[$i];
358 if ( ( join " ", @output_fields ) =~ /est_seq/i ) {
359 $self->{seq_type} = "est_seq";
361 elsif ( ( join " ", @output_fields ) =~ /unigene_seq/i ) {
362 $self->{seq_type} = "unigene_seq";
364 elsif ( ( join " ", @output_fields ) =~ /^protein_seq$/i ) {
365 $self->{seq_type} = "protein_seq";
366 } #added for protein
367 elsif ( ( join " ", @output_fields ) =~ /estscan_seq/i ) {
368 $self->{seq_type} = "estscan_seq";
370 elsif ( ( join " ", @output_fields ) =~ /longest6frame_seq/i ) {
371 $self->{seq_type} = "longest6frame_seq";
373 elsif ( ( join " ", @output_fields ) =~ /preferred_protein_seq/i ) {
374 $self->{seq_type} = "preferred_protein_seq";
377 my $breakspace_num = 60;
379 my $seq = "";
381 # quality values
382 my $qual = "";
383 if ( $data{qual_value_seq} ) {
384 $qual = $data{qual_value_seq};
386 else {
387 $data{qual_value_seq} = "";
389 my @qual = split /\s+/, $qual;
390 $data{qual_value_seq} = "\n";
391 while ( my @a = splice( @qual, 0, $breakspace_num ) ) {
392 $data{qual_value_seq} .= join( " ", @a ) . "\n";
394 if ( $data{qual_value_seq} ) {
395 $seq = $data{qual_value_seq};
396 $data{qual_value_seq} = "";
399 # bac end sequences
400 # only print sequence if both quality value and sequence selected
401 if ( $data{bac_end_sequence} ) {
402 $seq = "\n"
403 . html_break_string( $data{bac_end_sequence}, $breakspace_num,
404 "\n" )
405 . "\n";
406 $data{bac_end_sequence} = "";
408 else {
409 $data{bac_end_sequence} = "";
412 # est vs. unigene seq
413 $data{est_seq} =
414 html_break_string( $data{est_seq}, $breakspace_num, "\n" )
415 if defined( $data{est_seq} );
416 $data{unigene_seq} =
417 html_break_string( $data{unigene_seq}, $breakspace_num, "\n" )
418 if defined( $data{unigene_seq} );
419 $data{protein_seq} =
420 html_break_string( $data{protein_seq}, $breakspace_num, "\n" )
421 if defined( $data{protein_seq} );
422 $data{estscan_seq} =
423 html_break_string( $data{estscan_seq}, $breakspace_num, "\n" )
424 if defined( $data{estscan_seq} );
425 $data{longest6frame_seq} =
426 html_break_string( $data{longest6frame_seq}, $breakspace_num, "\n" )
427 if defined( $data{longest6frame_seq} );
428 $data{preferred_protein_seq} =
429 html_break_string( $data{preferred_protein_seq},
430 $breakspace_num, "\n" )
431 if defined( $data{preferred_protein_seq} );
433 if ( $self->{seq_type} eq "est_seq" ) {
434 $seq = "\n" . $data{est_seq} . "\n";
435 warn "NOTE: est sequence given, seq_type: "
436 . $self->{seq_type} . "\n";
437 $data{est_seq} = "";
439 elsif ( $self->{seq_type} eq "unigene_seq" ) {
440 $seq = "\n" . $data{unigene_seq} . "\n";
441 warn "NOTE: unigene sequence given, seq_type: "
442 . $self->{seq_type} . "\n";
443 $data{unigene_seq} = "";
446 #added for protein
447 elsif ( $self->{seq_type} eq "protein_seq" ) {
448 $seq = "\n" . $data{protein_seq} . "\n";
449 warn "NOTE: protein sequence given, seq_type: "
450 . $self->{seq_type} . "\n";
451 $data{protein_seq} = "";
453 elsif ( $self->{seq_type} eq "estscan_seq" ) {
454 $seq = "\n" . $data{estscan_seq} . "\n";
455 $data{estscan_seq} = "";
457 elsif ( $self->{seq_type} eq "preferred_protein_seq" ) {
458 $seq = "\n" . $data{preferred_protein_seq} . "\n";
459 $data{preferred_protein_seq} = "";
461 elsif ( $self->{seq_type} eq "longest6frame_seq" ) {
462 $seq = "\n" . $data{longest6frame_seq} . "\n";
463 $data{longest6frame_seq} = "";
466 # output
467 my $output = "";
468 foreach my $o (@output_fields) {
469 if ( exists( $data{$o} ) && $data{$o} ne "" ) {
470 $output .= "$o:$data{$o}\t";
474 s/ +/ /g foreach ( $output . $seq );
475 $output =~ s/.*?\://
476 ; # remove the first label (>SGN_U:SGN_U738473 becomes simply >SGN_U738473)
478 print ">$output$seq"; # do not add new lines to this string
482 close(F);
485 =head2 buttons
487 Desc: sub buttons
488 Args: default
489 Ret : n/a
491 Calls subtroutines for buttons that will display on the html display pages.
493 =cut
495 sub buttons {
496 my $self = shift;
497 my $pages = ( int( $self->{line_count} / $self->{page_size} ) ) + 1;
498 $self->{content} .= "<br />Page" . $self->{page_number} . " of $pages | ";
499 $self->previousButton();
500 $self->{content} .= " | ";
501 $self->summaryButton();
502 $self->{content} .= " | ";
503 $self->newSearchButton();
504 $self->{content} .= " | ";
505 $self->nextButton();
506 $self->{content} .= "<br /><br />";
509 =head2 nextButtons
511 Desc: sub nextButtons
512 Args: default
513 Ret : n/a
515 Determines the next button that will display on the html display pages.
517 =cut
519 sub nextButton {
520 my $self = shift;
522 # add next page button if there is a next page
523 if ( ( $self->{line_count} + $self->{page_size} ) >
524 ( ( $self->{page_number} + 1 ) * $self->{page_size} ) )
526 $self->{content} .=
527 "<a href=\"display.pl?dumpfile="
528 . ( $self->{dumpfile} )
529 . "&amp;page_number="
530 . ( $self->{page_number} + 1 )
531 . "\">Next Page</a>";
533 else {
534 $self->{content} .= "Next Page";
538 =head2 previousButtons
540 Desc: sub previousButtons
541 Args: default
542 Ret : n/a
544 Determines the previous button that will display on the html display pages.
546 =cut
548 sub previousButton {
549 my $self = shift;
550 if ( ( $self->{page_number} - 1 ) > 0 ) {
551 $self->{content} .=
552 "<a href=\"display.pl?dumpfile="
553 . ( $self->{dumpfile} )
554 . "&amp;page_number="
555 . ( $self->{page_number} - 1 )
556 . "\">Previous Page</a>";
558 else {
559 $self->{content} .= "Previous Page";
563 =head2 summaryButtons
565 Desc: sub summaryButtons
566 Args: default
567 Ret : n/a
569 Determines the summary button that will display on the html display pages.
571 =cut
573 sub summaryButton {
574 my $self = shift;
575 $self->{content} .=
576 "<a href=\"display.pl?summary=1&amp;dumpfile="
577 . ( $self->{dumpfile} )
578 . "&amp;idType="
579 . ( $self->{idType} )
580 . "\">Summary Page</a>";
583 =head2 newSearchButtons
585 Desc: sub newSearchButtons
586 Args: default
587 Ret : n/a
589 Determines the new search button that will display on the html display pages.
591 =cut
593 sub newSearchButton {
594 my $self = shift;
595 $self->{content} .= "<a href=\"input.pl\">New Search</a>";
598 =head2 getFileLines
600 Desc: sub getFileLines
601 Args: file; example. $self -> getFileLines($self->{tempdir});
602 Ret : $list[0];
604 Counts file lines (used on temp directories).
606 =cut
608 sub getFileLines {
609 my $self = shift;
610 my $file = shift;
611 open my $f, '<', $file or die "$! reading $file";
612 my $cnt = 0;
613 $cnt++ while <$f>;
614 return $cnt;
617 =head2 render_text_page
619 Desc: sub render_text_page
620 Args: default;
621 Ret : n/a
623 Opens dumpfile and displays it (this is the text file).
625 =cut
627 sub render_text_page {
628 my $self = shift;
629 $self->dumptextfile( $self->{tempdir} . "/" . $self->{dumpfile} );
632 =head2 display_ids_notfound
634 Desc: sub display_ids_notfound
635 Args: default;
636 Ret : n/a
638 Opens dumpfile and counts the lines, then compares to the number of IDs
639 submitted to get the count of the IDs that were not found in the database.
641 =cut
643 sub display_ids_notfound {
644 my $self = shift;
645 my $file = $self->{tempdir} . "/" . $self->{dumpfile} . ".notfound";
646 my $count = $self->getFileLines($file);
647 $self->dumptextfile( $file, "IDs not found in the database: $count" );
650 =head2 dumptextfile
652 Desc: sub dumptextfile
653 Args: default;
654 Ret : n/a
656 Cleans up and closes dumpfile.
658 =cut
660 sub dumptextfile {
661 my $self = shift;
662 my $file = shift;
663 my $message = shift;
664 if ( $self->{download} ) {
665 print
666 "Pragma: \"no-cache\"\nContent-Disposition: filename=sgn_dump.txt\nContent-type: application/data\n\n";
668 else {
669 print "Content-type: text/plain\n\n";
672 # open file
673 print $message. "\n";
674 open( F, "<$file" ) || no_data_error_page();
676 while (<F>) {
677 print $_;
679 close(F);
682 =head2 debug
684 Desc: sub debug
685 Args: string; example. $self -> debug("input_ok: Input is NOT OK!");
686 Ret : n/a
688 Function for printing adds break and new line to messages.
690 =cut
692 sub debug {
693 my $self = shift;
694 my $message = shift;
695 if ( $self->{debug} ) { $self->{content} .= "$message<br />\n"; }
698 =head2 no_data_error_page
700 Desc: sub no_data_error_page
701 Args: n/a
702 Ret : n/a
704 Displays message when file can no longer be opened.
706 =cut
708 sub no_data_error_page {
709 print
710 "The results of that search or not available anymore. Please repeat your search.";
713 =head1 BUGS
715 None known.
717 =head1 AUTHOR
719 Lukas Mueller, August 12, 2003
720 Modified and documented by Caroline Nyenke, August, 11, 2005
722 =head1 SEE ALSO
724 /bulk/download.pl
725 /bulk/input.pl
727 =cut