8 This perl script is used on the bulk download page. It structures the tabs on
9 the main download page and determines the mode of each tab, as well as what
10 information fields are available in each mode. It does this by accepting the
11 mode parameter and displaying its corresponding input form. Legal values for
12 mode are clone_search, array_search, unigene_search, bac_search,
13 bac_end_search, unigene_convert and ftp. Other values have undefined results but will
14 probably default to clone_search.It also accepts a debug parameter, which when
15 set to 1 will add a 'print debug info' checkbox to the input forms. This
16 parameter will be used by download.pl to display debug information.
22 use CXGN
::Page
::FormattingHelpers
23 qw
/page_title_html modesel simple_selectbox_html/;
24 use CXGN
::DB
::Connection
;
26 our $page = CXGN
::Page
->new( "Bulk download",
27 "Lukas Mueller, Caroline Nyenke, Rob Buels" );
28 my $db = CXGN
::DB
::Connection
->new();
30 my ( $mode, $debug ) =
31 $page->get_arguments(qw
/mode debug/); #determine mode and whether debug is on
33 $page->header( "Bulk download", 'Bulk download' )
34 ; #print page header and text below
36 <div style="margin-bottom: 1em">Download Unigene or BAC information using a list of identifiers, or complete datasets with FTP.</div>
39 # create tab categories
41 \
&clone_search
, \
&array_search
, \
&unigene_search
, \
&bac_search
,
42 \
&bac_end_search
, \
&ftp_site
, \
&unigene_convert
,
45 # define urls of modes
47 [ '?mode=clone_search', 'Clone name<br />(SGN-C)' ],
48 [ '?mode=microarray', 'Array spot ID<br />(SGN-S)' ],
49 [ '?mode=unigene', 'Unigene ID<br />(SGN-U)' ],
50 [ '?mode=bac', 'BACs' ],
51 [ '?mode=bac_end', 'BAC ends' ],
52 [ '?mode=ftp', 'Full datasets<br />(FTP)' ],
53 [ '?mode=unigene_convert', 'Unigene ID Converter<br />(SGN-U)' ],
54 [ '/bulk/feature', 'Features' ],
55 [ '/bulk/gene', 'Genes' ],
58 ### figure out which mode we're in ###
60 $mode =~ /clone_search/i ?
0
61 : $mode =~ /array/i ?
1
62 : $mode =~ /unigene_convert/i ?
6
63 : $mode =~ /unigene/i ?
2
64 : $mode =~ /bac_end/i ?
4
67 : $mode =~ /feature/i ?
7
68 : 0; #clone search is default
70 ### print out the mode selection buttons at the top of the page ###
71 ### prints out all @mode_links buttons, highlighting the current selected one###
72 print modesel
( \
@mode_links, $modenum );
73 print qq|<div
class="indentedcontent">\n|;
75 ### call the appropriate function from the $mode_funcs list to print###
76 ### out the download form, passing the $page object ###
77 $mode_funcs[$modenum]( $page, $db, $debug );
81 <i><b>Note:</b> The SGN bulk download is limited to 10,000 identifiers per request. If you need more, please either split your query into multiple parts, download a full dataset from the FTP site, or <a href="mailto:sgn-feedback\@sgn.cornell.edu">email us</a> and we will be happy to generate a custom dataset for you.</i>
93 Print debug information fed to it by download.pl when set to 1. (see also
94 debug() in download.pl).
98 #one parameter: a string to print, or not, as desired
103 sub ug_build_selectbox
{
104 my ( $db, $filter_sub ) = @_;
106 my $sth = $db->prepare(
107 q
|SELECT ub
.unigene_build_id
,
108 ub
.organism_group_id
,
112 FROM sgn
.unigene_build as ub
, sgn
.groups as g
113 WHERE ub
.organism_group_id
=g
.group_id
119 while ( my @row = $sth->fetchrow_array() ) {
121 next unless $filter_sub->(@row);
124 my ( $unigene_build_id, $organism_group_id, $build_nr, $group_id,
127 $species =~ s/(\S)[a-z]+\s([a-z]+)/uc($1).'. '.$2/ei
128 ; #< abbreviate the species names
129 $builds{$unigene_build_id} = "$species (build $build_nr)";
132 return simple_selectbox_html
(
134 label
=> 'Only include unigene build:',
136 [ all
=> 'include all' ],
137 ( map [ $_, $builds{$_} ], keys %builds ),
145 Desc: sub clone_search
147 Ret : clone tab format
149 Defines the format of the clone tab using html, perl and perl dbi. Speicfies
150 information fields available for searching clones. Also, this and all other
151 tabs print the debug checkbox when debug parameter is set to one.
157 # displays the clone search input form
159 my ( $page, $db, $debug ) = @_;
162 my $ug_build_select = ug_build_selectbox
($db);
164 <form name="bulkform" action="download.pl" method="post" enctype="multipart/form-data">
167 <table summary="" cellpadding="10" width="100%"><tr><td valign="top" bgcolor="#EEEEEE" width="320">
172 Enter a list of identifiers or upload a file containing identifers, one per line:<br />
173 <table summary="" width="100%"><tr><td>
174 <textarea name="ids" rows="5" cols="20"></textarea>
177 <i>Example:</i> #example box
178 <pre style="border: 1px solid gray; width: 10em; height: 5em">
187 And/or upload list file: <br /><input type="file" name="file" />
193 print qq|</td
><td valign
="top" bgcolor
="#EEEEEE" width
="320">\n|;
194 output_list
(); #print out checkboxes
202 if ( $debug eq "1" ) {
204 qq|<input type
="checkbox" checked
="checked" name
="debug" /> print debug statements<br /><br
/>\n|;
209 <input type="hidden" name="idType" value="clone" />
210 <input type="reset" />
211 <input type="submit" value="Submit" /><br />
220 Desc: sub array_search
222 Ret : array tab format
224 Defines the format of the array tab using html, perl and perl dbi. Specifies
225 information fields available for searching microarrays. Prints the debug
226 checkbox when debug parameter is set to one.
233 # displays the array search input form
235 my ( $page, $db, $debug ) = @_;
239 $page = CXGN
::Page
->new( "Bulk download", "Lukas Mueller" );
241 my $ug_select = ug_build_selectbox
(
244 ( $_[4] =~ /lycopersicon|tomato/i && $_[4] !~ /demethylated/i )
251 <form name="bulkform" action="download.pl" method="post" enctype="multipart/form-data">
253 <table summary="" cellpadding="10" width="100%"><tr><td valign="top" bgcolor="#EEEEEE" width="320">
258 Enter a list of identifiers or upload a file containing one identifier:<br />
259 <table summary="" width="100%"><tr><td>
260 <textarea name="ids" rows="5" cols="20"></textarea>
264 <pre style="border: 1px solid gray; width: 10em; height: 5em">
272 And/or upload list file: <br /><input type="file" name="file" />
276 </td><td valign="top" bgcolor="#EEEEEE" width="320">
282 print "</td></tr></table>\n";
284 if ( $debug eq "1" ) {
286 qq|<input type
="checkbox" checked
="checked" name
="debug" /> print debug statements<br /><br
/>\n|;
291 <input type="hidden" name="idType" value="microarray" />
292 <input type="reset" />
293 <input type="submit" value="Submit" /><br />
302 =head2 unigene_search
304 Desc: sub unigene_search
306 Ret : unigene tab format
308 Defines the format of the unigene tab using html, perl and perl dbi. Speicfies
309 information fields available for searching unigenes. Prints the debug
310 checkbox when debug parameter is set to one.
317 # displays the unigene input form
319 # Note: the unigene input form does not display the drop down of unigene builds, because a unigene ID is by definition mapped to a
322 # This form supports to different types of unigene queries: getting information pertaining to unigenes themselves (annotations and seq)
323 # and unigene membership information. The two queries are distinguished by the unigene_mode radio control. Some adjustments have to
324 # be made in the download.pl program when distinguishing the two modes.
326 my ( $page, $db, $debug ) = @_;
329 my $ug_select = ug_build_selectbox
($db);
333 <form name="bulkform" action="download.pl" method="post" enctype="multipart/form-data">
336 <table summary="" cellpadding="10" width="100%"><tr><td valign="top" bgcolor="#EEEEEE" width="320">
343 Enter a list of identifiers or upload a file containing one identifer separated by whitespace (returns, spaces or tabs):<br />
346 <table summary="" width="100%" cellpadding="0"><tr><td>
347 <textarea name="ids" rows="5" cols="20"></textarea>
351 <pre style="border: 1px solid gray; width: 10em; height: 5em">
360 And/or upload list file: <br /><input type="file" name="file" />
364 </td><td valign="top" bgcolor="#EEEEEE" width="320">
366 <input type="checkbox" name="convert_to_current" checked="checked" /><label for="convert_to_current"><a class="stealth" title="Check this box to convert each unigene in the list to its equivalent in the most current corresponding unigene build(s), if available"><img src="/documents/img/new.gif" />convert unigene list to current build</a></label>
370 <input type="radio" name="unigene_mode" value="unigene_info" checked="checked" /> <b>Download unigene information</b>:<br />
371 <!-- SGN_U for Unigene Info -->
372 <div style="padding-left: 1em">
373 <input type="checkbox" name="SGN_U_U" checked="checked" /> unigene id (SGN-U)<br />
374 <input type="checkbox" name="automatic_annotation" checked="checked" /> automatic (BLAST) annotation<br />
375 <input type="checkbox" name="best_genbank_match" checked="checked" /> best genbank match<br />
376 <input type="checkbox" name="best_arabidopsis_match" checked="checked" /> best arabidopsis match<br />
377 <input type="checkbox" name="associated_loci" checked="checked" /> associated loci<br />
378 <input type="checkbox" name = "uni_seq" checked="checked" onclick="check_fasta_option()"/> sequence<br />
379 <div style="padding-left: 1em">
380 <input type="radio" name="seq_mode" value="unigene_seq" checked="checked" /> unigene nucleotide sequence<br />
381 <input type="radio" name="seq_mode" value="estscan_seq" /> estscan predicted proteins<br />
382 <input type="radio" name="seq_mode" value="longest6frame_seq" /> longest ORF from 6-frame translation<br />
383 <input type="radio" name="seq_mode" value="preferred_protein_seq" /> preferred sequence<br />
387 <input type="radio" name="unigene_mode" value="member_info" /> <b>Download member information</b>:
388 <div style="padding-left: 1em">
389 <input type="checkbox" name="clone_name" checked="checked" /> clone name<br />
390 <input type="checkbox" name="SGN_C" checked="checked" /> clone id (SGN-C)<br />
391 <input type="checkbox" name="SGN_T" checked="checked" /> sequence read id (SGN-T)<br />
392 <input type="checkbox" name="SGN_E" checked="checked" /> est id (SGN-E)<br />
393 <input type="checkbox" name="build_nr" checked="checked" /> unigene build number<br />
394 <!-- SGN_U for Member Info -->
395 <input type="checkbox" name="SGN_U_M" checked="checked" /> unigene id (SGN-U)<br />
396 <input type="checkbox" name="chipname" checked="checked" /> chipname<br />
397 <input type="checkbox" name="SGN_S" checked="checked" /> microarray spot id (SGN-S)<br />
398 <input type="checkbox" name="TUS" checked="checked" /> TUS number<br />
399 <input type="checkbox" name="manual_annotation" /> manual annotation<br />
400 <input type="checkbox" name="est_seq" checked="checked" /> EST sequence<br />
408 if ( $debug eq "1" ) {
410 qq|<input type="checkbox" checked="checked" name=debug /> print debug statements<br /><br />\n|;
415 <input type
=hidden name
="idType" value
="unigene" />
416 <input type
="reset" /> 
; 
;
417 <input type
="submit" value
="Submit" /><br />
426 =head2 unigene_convert
428 Desc: sub unigene_convert
430 Ret : unigene tab format
432 Defines the format of the unigene conversion tab using html, perl and perl dbi. Speicfies
433 information fields available for searching unigenes. Prints the debug
434 checkbox when debug parameter is set to one.
438 sub unigene_convert
{
441 # displays the unigene converter input form
443 # Note: the unigene input form does not display the drop down of unigene builds, because a unigene ID is by definition mapped to a
446 my ( $page, $db, $debug ) = @_;
452 <form name="bulkform" action="download.pl" method="post" enctype="multipart/form-data">
455 <table summary="" cellpadding="10" width="100%"><tr><td valign="top" bgcolor="#EEEEEE" width="320">
461 Enter a list of identifiers or upload a file containing one identifer separated by whitespace (returns, spaces or tabs):<br />
464 <table summary="" width="100%" cellpadding="0"><tr><td>
465 <textarea name="ids" rows="5" cols="50"></textarea>
469 <pre style="border: 1px solid grey; width: 20em; height: 5em">
478 And/or upload list file: <br /><input type="file" name="file" />
488 if ( $debug eq "1" ) {
490 qq|<input type="checkbox" checked="checked" name="debug" /> print debug statements<br /><br />\n|;
495 <input type
=hidden name
="idType" value
="unigene_convert" />
496 <input type
="reset" /> 
; 
;
497 <input type
="submit" value
="Convert" /><br />
510 <input type="radio" name="outputType" value="html" checked="checked" /> HTML<br />
511 <input type="radio" name="outputType" value="text" /> text<br />
512 <input type="checkbox" name="fasta" /> Fasta<br />
519 #method used by clone_name and microarray searches to display checkboxes
523 <b
>Please
select the information you would like
for each identifier
:</b><br />
524 <input type
="checkbox" name
="clone_name" checked
="checked" /> clone name<br />
525 <input type
="checkbox" name
="SGN_C" checked
="checked" /> clone id (SGN-C)<br />
526 <input type
="checkbox" name
="SGN_T" checked
="checked" /> sequence read id (SGN-T)<br />
527 <input type
="checkbox" name
="SGN_E" checked
="checked" /> est id (SGN-E)<br />
528 <input type
="checkbox" name
="build_nr" checked
="checked" /> unigene build nr<br />
529 <input type
="checkbox" name
="SGN_U" checked
="checked" /> unigene id (SGN-U)<br />
530 <input type
="checkbox" name
="chipname" checked
="checked" /> chipname<br />
531 <input type
="checkbox" name
="SGN_S" checked
="checked" /> microarray spot id (SGN-S)<br />
532 <input type
="checkbox" name
="TUS" checked
="checked" /> TUS number (used to order clones)<br />
533 <input type
="checkbox" name
="manual_annotation" /> manual annotation<br />
534 <input type
="checkbox" name
="automatic_annotation" /> automatic (BLAST) annotation<br />
535 <input type
="checkbox" name
="sequence" onclick
="check_fasta_option()" /> sequence<br />
536  
; 
; 
;<input type
="radio" name
="seq_type" value
="est_seq" checked
="checked" /> EST sequence<br />
537  
; 
; 
;<input type
="radio" name
="seq_type" value
="unigene_seq" /> Unigene sequence<br />
549 Defines the format of the BAC tab using html & perl. Speicfies
550 information fields available for searching BACs. Prints the debug
551 checkbox when debug parameter is set to one.
558 # displays the bac search input form
560 my ( $page, $db, $debug ) = @_;
566 <form name="bulkform" action="download.pl" method="post" enctype="multipart/form-data">
569 <table summary="" cellpadding="10" width="100%"><tr><td valign="top" bgcolor="#EEEEEE" width="320">
573 Enter a list of identifiers or upload a file containing identifers separated by whitespace (returns, spaces or tabs):<br />
574 <table summary="" width="100%"><tr><td>
575 <textarea name="ids" rows="5" cols="20"></textarea>
579 <pre style="border: 1px solid gray; width: 10em; height: 5em">
588 And/or upload list file: <br /><input type="file" name="file" />
592 </td><td valign="top" bgcolor="#EEEEEE" width="320">\n
593 <b>Download BAC information:</b> <br />
594 <div style="margin: 1em; white-space: nowrap">
595 <input type="checkbox" name="arizona_clone_name" checked="checked" /> Arizona-style clone name (LE_HBa0001A01) <br />
596 <input type="checkbox" name="cornell_clone_name" checked="checked" /> Old Cornell-style clone name (P001A01)<br />
597 <input type="checkbox" name="chr_clone_name" checked="checked" /> Clone name with chromosome (C01HBa0001A01)<br />
598 <input type="checkbox" name="clone_type" checked="checked" /> clone type <br />
599 <input type="checkbox" name="org_name" checked="checked" /> organism name <br />
600 <input type="checkbox" name="accession_name" checked="checked" /> accession name <br />
601 <input type="checkbox" name="library_name" checked="checked" /> library name <br />
602 <input type="checkbox" name="estimated_length" checked="checked" /> estimated length <br />
603 <input type="checkbox" name="genbank_accession" checked="checked" /> genbank accession<br />
604 <!-- <input type="checkbox" name="overgo_matches" checked="checked" DISABLED /> overgo matches<br /> -->
610 if ( $debug eq "1" ) {
612 qq|<input type
="checkbox" checked
="checked" name
="debug" /> print debug statements<br /><br
/>\n|;
617 <input type="hidden" name="idType" value="bac" />
618 <input type="reset" />
619 <input type="submit" value="Submit"><br />
626 =head2 bac_end_search
628 Desc: sub bac_end_search
630 Ret : BAC end tab format
632 Defines the format of the BAC end=head2 bac_end_search tab using html & perl.
633 Specifies information fields available for searching BAC ends. Prints the debug
634 checkbox when debug parameter is set to one.
641 # displays the bac search input form
643 my ( $page, $db, $debug ) = @_;
649 <form name="bulkform" action="download.pl" method="post" enctype="multipart/form-data">
652 <table summary="" cellpadding="10" width="100%"><tr><td valign="top" bgcolor="#EEEEEE" width="370">
656 Enter a list of identifiers or upload a file containing identifers separated by whitespace (returns, spaces or tabs):<br />
657 <table summary="" width="100%"><tr><td>
658 <textarea name="ids" rows="5" cols="25"></textarea>
662 <pre style="border: 1px solid gray; width: 15em; height: 5em">
663 LE_HBa0011C24_SP6_121022
664 SL_MboI0033A13_SP6_294865
665 SL_EcoRI0022A07_T7_229350
671 And/or upload list file: <br /><input type="file" name="file" />
675 </td><td valign="top" bgcolor="#EEEEEE" width="280">\n
677 <b>Download BAC end information:</b><br />
678 <div style="margin-left: 1em; white-space: nowrap">
679 <input type="checkbox" name="bac_id" checked= "checked" /> bac end identifier <br />
680 <input type="checkbox" name="clone_type" checked="checked" /> clone type <br />
681 <input type="checkbox" name="org_name" checked="checked" /> organism name <br />
682 <input type="checkbox" name="accession_name" checked="checked" /> accession name <br />
683 <input type="checkbox" name="library_name" checked="checked" /> library name <br />
684 <input type="checkbox" name="estimated_length" checked="checked" /> estimated length <br />
685 <input type="checkbox" name="genbank_accession" checked="checked" /> genbank accession<br />
686 <!-- <input type="checkbox" name="overgo_matches" checked="checked" DISABLED /> overgo matches <br /> --> <br />
687 <b>Choose format and type:</b><br />
688 <input type="checkbox" name="bac_end_sequence" checked="checked" /> bac end sequence <br />
689 <input type="checkbox" name="qual_value_seq" checked="checked" /> quality value <br />
690 <div style="margin-left: 1em">
691 <input type="radio" name="bac_seq_type" value="raw_seq" />raw sequence and/or quality<br />
692 <input type="radio" name="bac_seq_type" value="trim_seq" checked="checked" />trimmed seq. and/or quality<br />
700 if ( $debug eq "1" ) {
702 qq|<input type
="checkbox" checked
="checked" name
="debug" /> print debug statements<br /><br
/>\n|;
707 <input type="hidden" name="idType" value="bac_end" />
708 <input type="reset" />
709 <input type="submit" value="Submit"><br />
722 Defines the format of the ftp tab using html & perl. Specifies
723 links for downloading ftp information.
731 <h3 style="margin-bottom: 0.3em">SGN FTP site</h3>
732 <div style="margin: 0 1em 0 1em">Download complete datasets.</div><br />
734 <div style="margin: 0; padding: 1em; border: 1px solid #ccccff">
735 <a class="folderlink" href="ftp://ftp.solgenomics.net">ftp top level</a>
737 <ul style="margin: 1em 0 0 0; list-style: none">
743 'Sequence, quality, and membership information for all SGN unigene builds',
745 'Sequence and quality files for all SGN ESTs, organized by library',
746 'maps_and_markers' =>
747 'Marker sequences and marker position lists for all SGN maps',
748 'physical_mapping' =>
749 'Raw data files for Tomato HindIII BAC library FPC and overgo analyses',
750 'blast_annotations' =>
751 'Highest-ranked hits vs. Genbank NR and Arabidopsis for all SGN unigenes.',
752 'manual_annotations' =>
753 'Manual annotations for a number of SGN unigenes',
755 'Custom-generated datasets for individual users (mail special requests to <a href="mailto:sgn-feedback@sgn.cornell.edu">sgn-feedback</a>)',
757 'Tomato genomic data, including BAC end and full BAC sequences',
758 'COSII' => 'COSII marker data'
761 foreach my $dir ( sort keys %ftplinks ) {
762 my $desc = $ftplinks{$dir};
764 <li><a class="folderlink" href="ftp://ftp.solgenomics.net/$dir"> $dir</a>
765 <div class="folderdesc">$desc</div>
775 <i>Note:</i> The SGN FTP site can also be accessed directly through <a href="ftp://ftp.solgenomics.net">ftp://ftp.solgenomics.net</a> using a browser or ftp program.
786 Lukas Mueller, Caroline Nyenke, Alexander Naydich and Matthew Crumb