4 show_ssr.pl - find SSR sequences
8 Generates an simple table with SSR sequences from data submitted through the input page (index.pl in this directory). It is a web script that supports the following cgi parameters:
14 seq_data is the actual data uploaded; it should be in FASTA format.
18 upload is an optional file upload containing a FASTA formatted sequence as input
22 denotes the minimum length needed to be found.
26 denotes the maximum length needed to be found.
30 denotes the minimum number of times a substring must be repeated to be found.
36 =item 'html' for HTML output
38 =item 'tab' for tab delimited output
46 Search code by Chenwei Lin (cl295@cornell.edu) with edits by Robert Albright (rfa5). Documentation and HTML interface by Robert Albright (rfa5@cornell.edu).
52 use CXGN
::Page
::FormattingHelpers qw
/ page_title_html
56 our $page = CXGN
::Page
->new("SSR Search Results", "rfa5");
57 use CatalystX
::GlobalContext
qw( $c );
59 if ($c->req->method() ne "POST") {
65 my ($output_type, $unit_low, $unit_high, $repeat_time, $seq_data) = $page->get_arguments("type", "ssr_min", "ssr_max", "ssr_repeat", "seq_data");
67 # Check for input errors
68 my $upload = $page->get_upload("upload");
71 # print STDERR "Uploading file $args{upload_file}...\n";
72 if (defined $upload) {
73 $upload_fh = $upload->fh();
75 while (<$upload_fh>) {
80 if ($seq_data eq "") {
81 user_error($page, "No sequence was entered!");
86 my %out = search($unit_low, $unit_high, $repeat_time, $seq_data);
88 if ($output_type eq "tab") {
91 } elsif ($output_type eq "html") {
93 print page_title_html("SSR Search Results");
94 print blue_section_html("$out{num} SSRs Found",'<table width="100%" cellpadding="5" cellspacing="0" border="0" summary=""><tr><td>' . $out{html} . '</td></tr></table>');
97 user_error($page, "Unrecognized output type!");
101 (@_ == 4) or die "Please supply minimum unit length, maximum unit length, minimum repeat time, input sequence (in blast format), true if output should be HTML, false if tab delimited.";
103 my ($unit_low, $unit_high, $repeat_time, $inseq) = @_;
104 $repeat_time = $repeat_time - 1;
107 # open input string as a file
108 open IN, "+<", \$inseq or die "Couldn't open input file \n";
109 my $seqio = Bio::SeqIO->new( -format=>"fasta",
113 #Search and write result to output file
114 my $result_table = '<table width="100%"><tr><th>Sequence</th><th>Motif</th><th>Repeat</th><th>Start</th><th>Length</th></tr>'."\n";
115 my $result_tab = "Sequence\tMotif\tRepeat\tStart\tLength\n";
117 open OUT_TAB, "+>>", \$result_tab or die "Couldn't open output file\n";
118 open OUT_TABLE, "+>>", \$result_table or die "Couldn't open output file\n";
121 while (my $seqobj = $seqio->next_seq()) {
122 my $id = $seqobj->display_id(); #substr ($_, 1);
123 my $length = $seqobj->length(); # length $seq{$_};
124 my $seq = $seqobj->seq();
126 # print OUT_TABLE '<tr><td colspan="5">'.$seq.'</td></tr>';
128 #seach for patterns repeats
129 while ($seq =~ /([ATGC]{$unit_low,$unit_high}?)(\1{$repeat_time,})/g){
130 my $actual_repeat_time = (length $&)/(length $1);
131 my $start = (length $`) + 1;
135 #screen out single nucleotide repeat
136 if (!($match =~ /^A+$|^T+$|^G+$|^C+$/)) {
137 #for patterns longer than 2 necleotides, print out all the hits
138 if (length $match >=3){
139 print OUT_TAB "$id\t$1\t$actual_repeat_time\t$start\t$length\n";
140 print OUT_TABLE "<tr><td>$id</td><td>$1</td><td>$actual_repeat_time</td><td>$start</td><td>$length</td></tr>\n";
143 #If the repeat time specified in command line is less than 4, for patterns of 2 nucleotides, print out matches of at least 4 repeats.
144 elsif ($actual_repeat_time >3){
145 print OUT_TAB "$id\t$1\t$actual_repeat_time\t$start\t$length\n";
146 print OUT_TABLE "<tr><td>$id</td><td>$1</td><td>$actual_repeat_time</td><td>$start</td><td>$length</td></tr>\n";
152 print OUT_TABLE "</table>\n";
157 $result{html} = $result_table;
158 $result{tab} = $result_tab;
159 $result{num} = $numResults;
170 <h4>SGN SSR Interface Error</h4>
172 <p>SSR subsystem can only accept HTTP POST requests</p>
180 my ($page, $reason) = @_;
186 <h4>SGN SSR Interface Error</h4>