Merge pull request #5069 from solgenomics/topic/accession_upload_file
[sgn.git] / cgi-bin / tools / caps_designer / find_caps.pl
blob9e92756b9ef17be11cfc4d07cf39f5b7660efb29
1 use strict;
2 use warnings;
3 use CXGN::Page;
4 use CXGN::Page::FormattingHelpers qw/ page_title_html
5 blue_section_html /;
6 use CXGN::BioTools::CapsDesigner2;
7 use File::Temp;
8 use CatalystX::GlobalContext '$c';
10 my $page = CXGN::Page->new( "CAPS Designer Result", "Chenwei");
12 my ($format, $cheap_only, $exclude_seq, $cutno, $seq_data) = $page->get_arguments("format", "cheap", "start", "cutno", "seq_data");
15 #########Check if the input sequence is empty
16 if ($seq_data eq ''){
17 err_page ($page, "Please enter sequence!\n");
20 ########Check validity of exclusion number and cut number
21 if ($exclude_seq < 0){
22 err_page ($page, "The number of excluded nucleotides may not be negative!");
24 if ($cutno < 1){
25 err_page ($page, "The number of enzyme cut sites allowed must be greater than zero!");
28 ##########Write the sequence into a temp file for processing
29 my $html_root_path = $c->config->{'basepath'};
30 my $doc_path = $c->tempfiles_subdir('caps_designer');
31 my $path = $c->path_to($doc_path);
32 my ($tmp_fh, $tmp_name);
34 $tmp_fh = new File::Temp(
35 DIR => $path,
36 UNLINK => 0,
38 $tmp_name = $tmp_fh->filename;
39 print $tmp_fh "$seq_data";
40 close $tmp_fh;
45 ########Read enzyme information. allENZYME is a file containing all enzyme information and it is located in the same directory as this script.
46 ##allENZYME last updated August 17, 2009 to 09-10 prices
47 my $support_dir = $c->config->{'support_data_subdir'};
48 my $enzyme_file = $html_root_path . $support_dir . '/caps_designer/allENZYME';
49 my ($cost_ref, $cut_ref) = CXGN::BioTools::CapsDesigner2::read_enzyme_file($enzyme_file);
54 ########Check if the input format is correct
55 my $format_check;
56 if ($format =~ /fasta/i){
57 $format_check = CXGN::BioTools::CapsDesigner2::check_fasta($tmp_name);
58 } elsif ($format =~ /clustal/i){
59 $format_check = CXGN::BioTools::CapsDesigner2::check_clustal($tmp_name);
60 } else {
61 &err_page($page,'Unrecognized format - please enter your input in FASTA or CLUSTAL format.');
64 err_page($page,$format_check) unless $format_check;
66 ########Check if the input has at least two sequences and 12 or fewer sequences
67 my $seq_num;
68 if ($format =~ /fasta/i){
69 $seq_num = CXGN::BioTools::CapsDesigner2::check_input_number($tmp_name);
70 if($seq_num < 2){
71 &err_page($page, "Please enter at least two sequences!");
72 }elsif($seq_num > 12){
73 &err_page($page, "You may only enter up to 12 sequences!");
78 #########Process input sequence and return aligned sequences
79 my ($align_clustal,$align_fasta);
80 eval {
81 ($align_clustal,$align_fasta) = CXGN::BioTools::CapsDesigner2::format_input_file($format, $tmp_name);
84 if (!$align_fasta || ($align_fasta eq "")){
85 err_page($page, "Clustal alignment failed. Please check input sequences!");
87 my ($seq_length, $parent_info_ref) = CXGN::BioTools::CapsDesigner2::get_seqs($align_fasta);
89 &err_page($page, "Please enter sequences containing only DNA nucleotides A, C, G, T or N.") unless CXGN::BioTools::CapsDesigner2::check_seqs($parent_info_ref);
91 &err_page($page, "The number of excluded nucleotides is too large for the sequences given.") if ($exclude_seq*2 >= $seq_length);
95 ##########Find CAPS
96 #my ($position1_ref, $position2_ref, $cap_seq1_ref, $cap_seq2_ref) = CXGN::BioTools::CapsDesigner2::find_caps($parent1_id, $parent1_seq, $parent2_id, $parent2_seq, $seq_length, $cut_ref, $exclude_seq, $exclude_seq, $cutno, $cost_ref, $cheap_only);
99 my ($substrings_ref, $cutsites_ref, $uniques_ref) = CXGN::BioTools::CapsDesigner2::find_caps($parent_info_ref, $seq_length, $cut_ref, $exclude_seq, $cutno, $cost_ref, $cheap_only);
102 ############Predict the size of bands
103 my ($size_ref) = CXGN::BioTools::CapsDesigner2::predict_fragments($cutsites_ref, $seq_length);
107 ##########Prepare HTML printout content
109 my ($help_content, $sum_content, $caps_content, $plain_content);
110 $help_content = "<tr><th>1.</th><td>Polymorphism caused by an ambiguous nucleotide 'N' is not considered.</td></tr>";
111 $help_content .= "<tr><th>2.</th><td>Please check the provided local alignment around the predicted CAPs site in order to make sure it is not caused by alignment gaps.</td></tr>";
112 $help_content .= "<tr><th>3.</th><td>Analysis is based on sequenced parts of the PCR products. Additional cutting sites and digested fragments may exist.</td></tr>";
113 $help_content .= "<tr><th>4.</th><td>Enzymes separated by a slash are isoschizimers.</td></tr>";
114 $help_content .= "<tr><th>5.</th><td>Enzyme price is based on NEB catalogue <a href = 'http://www.neb.com/nebecomm/price_list.pdf'>(http://www.neb.com/nebecomm/price_list.pdf)</a>.</td></tr>";
116 my $temp_str = join ", ", sort keys %$parent_info_ref;
118 $sum_content = "<tr><th>Aligned Sequences</th><td>$temp_str</td></tr>";
119 my $search_start = $exclude_seq + 1;
120 my $search_end = $seq_length - $exclude_seq;
121 $sum_content .= "<tr><th>Alignment Length(w/ gaps)</th><td>$seq_length bp</td><th>Search Range</th><td>bp $search_start - $search_end</td></tr>";
122 $sum_content .= "<tr><th>Cutting Sites Limit</th><td>$cutno</td></tr>";
123 $sum_content .= "<tr><th>Enzyme Selection</th><td>";
124 if ($cheap_only == 1){
125 $sum_content .= "less than \$65/1000u";
127 else {
128 $sum_content .= "All";
130 $sum_content .= "</td></tr>";
132 # my %position1 = %$position1_ref;
133 # my %position2 = %$position2_ref;
134 # my %cap_seq1 = %$cap_seq1_ref;
135 # my %cap_seq2 = %$cap_seq2_ref;
136 my %cut_site = %$cut_ref;
137 my %cost = %$cost_ref;
138 # my %size1 = %$size1_ref;
139 # my %size2 = %$size2_ref;
141 my %position = %$cutsites_ref;
142 my %cap_seq = %$substrings_ref;
143 my %size = %$size_ref;
144 my %uniques = %$uniques_ref;
145 my %parent_info = %$parent_info_ref;
148 for my $current_enzyme(sort keys %cap_seq) {
149 if(!defined $cost{$current_enzyme}) {$cost{$current_enzyme} = 'over $65/1000u';}
150 $caps_content .= '<table width="100%" cellpadding="5" cellspacing="0" border="6"><tr><th>Enzyme</th><td>' . $current_enzyme . '</td><th>Price</th><td>' . $cost{$current_enzyme} . '</td></tr>';
152 $caps_content .= '<tr><th>Recognition Sequence</th><td colspan="3">' . $cut_site{$current_enzyme} . '</td></tr>';
154 for my $id (sort keys %parent_info) {
155 $caps_content .='<tr><th>' . $id . ' Current Site(s) </th><td>';
156 if(@{$position{$current_enzyme}{$id}} == 0) {
157 $caps_content .= 'None.';
158 }else{
159 for (sort {$a<=>$b} @{$position{$current_enzyme}{$id}}){
160 $caps_content .= $_ . ' ';
163 $caps_content .= '<th>' . $id . ' Fragments(s),bp </th><td>';
164 for (@{$size{$current_enzyme}{$id}}){
165 $caps_content .= $_ . ' ';
167 $caps_content .= '</td></tr>';
170 for (sort {$a<=>$b} keys %{$cap_seq{$current_enzyme}}){
171 $caps_content .= '<tr><th>CAPS Site</th><td>'. $_ . '</td><td colspan=2>';
172 $caps_content .= '<table cellspacing="2" style="font-family:Lucida Console"><tr><th>';
173 for my $id (sort keys %parent_info) {
174 if($id eq $uniques{$current_enzyme}{$_}) {
175 $caps_content .= $id . '</th><td><tt>' . $cap_seq{$current_enzyme}{$_}{$id} . '</tt></td></tr><tr><th>';
176 }else{
177 $caps_content .= $id . '</th><td><span style="color:gray"><tt>' . $cap_seq{$current_enzyme}{$_}{$id} . '</tt></span></td></tr><tr><th>';
180 $caps_content .= '</table>';
182 $caps_content .= '</td></tr>';
183 $caps_content .= '</table><br /><br />';
185 $caps_content .= 'Black font denotes sequence with unique cut site(s). <br />' if $caps_content;
187 ###############Prepare plain text result links
188 # my $out = CXGN::BioTools::CapsDesigner2::print_text($cost_ref,$cut_ref,$seq_length,$position1_ref, $position2_ref, $cap_seq1_ref, $cap_seq2_ref, $parent1_id, $parent2_id, $cheap_only, $size1_ref, $size2_ref, $cutno, $exclude_seq, $path);
189 my $out = CXGN::BioTools::CapsDesigner2::print_text($cost_ref, $cut_ref, $seq_length, $cutsites_ref, $substrings_ref, $parent_info_ref, $cheap_only, $size_ref, $uniques_ref, $cutno, $exclude_seq, $path);
191 $out =~ s/$html_root_path//;
192 $align_fasta =~ s/$html_root_path//;
193 $align_clustal =~ s/$html_root_path//;
194 $plain_content .= "<tr><td><a target='blank' href = $out> View/download plain text result file</a></td></tr>";
195 $plain_content .= "<tr><td><a target='blank' href = $align_clustal> View/download alignment in clustal format</a></td></tr>";
196 $plain_content .= "<tr><td><a target='blank' href = $align_fasta> View/dowload alignment in fasta format</a></td></tr>";
198 $page->header();
199 print page_title_html("CAPS Designer Result");
200 print blue_section_html('For experienced users','<table width="100%" cellpadding="5" cellspacing="0" border="0">' . $plain_content . '</table>');
202 #######The user can go back to the caps_input page and keep all the input. Some browsers don't do that.
203 print "<form methed=\"post\" action=\"caps_input.pl\" name=\"capsinput\">";
204 print "<input type=\"hidden\" name=\"format\" value=\"$format\">";
205 print "<input type=\"hidden\" name=\"cheap\" value=\"$cheap_only\">";
206 print "<input type=\"hidden\" name=\"start\" value=\"$exclude_seq\">";
207 print "<input type=\"hidden\" name=\"cutno\" value=\"$cutno\">";
208 print "<input type=\"hidden\" name=\"seq_data\" value=\"$seq_data\">";
209 #print "<input type=\"hidden\" name=\"seq_select\" value=\"$seq_select\">";
210 print "<input type=\"submit\" value=\"Back to input\">";
211 print "</form>";
212 print "<br />";
214 print blue_section_html('Notes','<table width="100%" cellpadding="5" cellspacing="0" border="0">' . $help_content . '</table>');
215 print blue_section_html('Query Summary','<table width="100%" cellpadding="5" cellspacing="0" border="0">' . $sum_content . '</table>');
216 if ($caps_content) {
217 print blue_section_html('CAPS Candidates',$caps_content);
218 }else{
219 print blue_section_html('CAPS Candidates',"None.");
221 $page -> footer();
223 sub err_page {
224 my $err_page = shift;
225 my $err_message = shift;
227 $c->throw( public_message => $err_message, notify => 0, is_client_error => 1 );