1 #Program that inputs a DNA sequence and the name of a restriction enzyme
2 #Shows where the enzyme would cut the DNA
3 #Written by Emily Hart 7/27/06
4 #Refactored by Johnathon Schultz 4/08/07
9 use CXGN
::Page
::FormattingHelpers
;
10 use CXGN
::DB
::Connection
;
11 our $dbh=CXGN
::DB
::Connection
->new();
12 our $page=CXGN
::Page
->new("Sol Genomics Network","john");
13 our ($dna_seq, $enz_name, $action)=$page->get_encoded_arguments("dna_seq", "enz_name", "action");
15 if($action eq "digest"){
16 if (!is_valid_dna
($dna_seq)){
17 # $page-> message_page("Invalid DNA Sequence", "Please make sure that your sequence contains only the letters A, G, T, and C.");
18 $page->header("Sol Genomics Network");
19 print "<div align='center'>Invalid DNA Sequence<br />Please make sure that your sequence contains only the letter A, G, T, and C.</div>";
22 elsif(!is_valid_enzyme
($enz_name, $dbh)){
23 # $page-> message_page("Invalid Enzyme", "Please select an enzyme from the list.");
24 $page->header("Sol Genomics Network");
25 print "<div align='center'>Invalid Enzyme<br /> Please select an enzyme from the list.</div>";
28 elsif(!(find_cut_sites
(is_valid_dna
($dna_seq), $enz_name) || $enz_name eq "All")){
29 no_cuts_found
($enz_name);
30 print "<h1>Error 1</h1>";
32 # $page-> message_page("No Cuts Found", "Your sequence is not cut by $enz_name.");
36 if($enz_name eq "All"){
37 my $enzymes = get_all_enzymes
($dbh);
38 foreach my $enz(@
$enzymes){
39 my $result = find_cut_sites
(is_valid_dna
($dna_seq), $enz);
41 $dna .= "Your sequence was cut by $enz at:<br /> $result";
43 # print "<h1>$enz</h1>";
47 $dna = find_cut_sites
(is_valid_dna
($dna_seq), $enz_name);
50 $page->header("Sol Genomics Network");
51 print "<div align='center'>Cuts Found!<br /> <tt>$dna</tt></div>";
55 no_cuts_found
($enz_name);
60 $page->header("Sol Genomics Network","Virtual DNA Digestion");
66 $page->header("Sol Genomics Network");
67 my $message = "<div align='center'>No cuts found.<br /> Your sequence is not cut by ";
68 if($enz_name eq "All"){
69 $message .= "any enzymes.</div>";
72 $message .= "$enz_name.</div>";
81 print "<p>Enter the sequence of DNA that you would like to have virtually digested and select the enzyme that you would like to use. Please do not enter a DNA sequence containing \"N\"s.</p>";
82 print '<p>Enter DNA sequence:</p><form action="#" method="post"><textarea name="dna_seq" rows="10" cols="60" wrap="virtual"></textarea>';
83 my $menu = enz_menu
($dbh);
84 print "<p>Select enzyme: $menu</p>";
85 print '<input type="hidden" name="action" value="digest" /><input type="submit" value="Submit" /><input type="reset" value="Reset" /></form>';
88 #gets the names of the enzymes form the database and puts them into a drop down list
91 $dbh -> do("set search_path=sgn");
92 my $enzyme_ids = $dbh ->selectcol_arrayref("SELECT DISTINCT enzyme_id FROM enzyme_restriction_sites WHERE restriction_site IS NOT NULL");
93 my $formatted_enz_ids = join ", ", @
$enzyme_ids;
94 my $enzyme_names = $dbh -> selectcol_arrayref
("SELECT enzyme_name FROM enzymes WHERE enzyme_id IN($formatted_enz_ids) ORDER BY enzyme_name");
95 my $menu_string = '<select name="enz_name">';
96 $menu_string .= "<option value='All' default='True'>All Enzymes</option>";
97 foreach my $e(@
$enzyme_names){
98 $menu_string .= "<option value = \"$e\">$e</option>";
100 $menu_string .= "</select>";
103 #if the DNA sequence contains only A,G,T,C, returns the sequence
104 #otherwise returns false
109 if($dna =~ m/^[ACGT]+$/){
117 #returns true if the enzyme is valid, false otherwise
119 my ($enz, $dbh) = @_;
120 $dbh -> do("set search_path=sgn");
121 my $ids = $dbh ->selectcol_arrayref("SELECT DISTINCT enzyme_id FROM enzyme_restriction_sites WHERE restriction_site IS NOT NULL");
122 my $formatted_enz_ids = join ", ", @
$ids;
123 my $enzyme_names = $dbh -> selectcol_arrayref
("SELECT enzyme_name FROM enzymes WHERE enzyme_id IN($formatted_enz_ids) ORDER BY enzyme_name");
125 foreach my $e(@
$enzyme_names){
130 return $is_valid || $enz_name eq "All";
131 #Return the truth of this statement: The enzyme "is valid" or is equal to "All"
134 #returns an array of all possible sequences for the given enzyme
137 $dbh -> do("set search_path=sgn");
138 my $ids_ref = $dbh -> selectcol_arrayref
("SELECT enzyme_id FROM enzymes WHERE enzyme_name = '$enz_name'");
139 my $enz_id = $ids_ref->[0];
140 my $seqs_ref = $dbh -> selectcol_arrayref
("SELECT restriction_site FROM enzyme_restriction_sites WHERE enzyme_id = ?", undef, $enz_id);
141 foreach my $seq(@
$seqs_ref){
147 #finds where the enzyme matches the dna sequence and returns an array of the indexes where they first match
149 my ($dna_seq, $enz_name) = @_;
150 my $seqs_ref = get_enz_seq_arr
($enz_name);
152 for(my $i=0; $i<length($dna_seq); $i++){
153 foreach my $enz_seq(@
$seqs_ref){
155 $enz_seq =~ s/(.*)\(\d*\/\d*\)/$1/;
156 my $enz_length = length($enz_seq);
157 my $sub_seq = substr($dna_seq, $i, $enz_length);
158 if($enz_seq eq $sub_seq){
166 #returns the reverse compliment of the given DNA sequence
167 sub reverse_compliment
{
169 my %base_pair = ("A" => "T",
174 my @seq = split "", $dna_seq;
176 $compliment.=$base_pair{$s};
178 my $rev_comp = reverse($compliment);
182 #returns finished, formatted DNA
184 my($top_seq, $enz_name) = @_;
185 my $rev_bottom_seq = reverse_compliment
($top_seq);
186 my $enzyme_seq_ref = get_enz_seq_arr
($enz_name);
187 my $enzyme_seq = $enzyme_seq_ref->[0];
188 my $length_before_cut = this_length_before_cut
($enzyme_seq);
189 my $top_seq_cuts = find_matches
($top_seq, $enz_name);
190 foreach my $c(@
$top_seq_cuts){
191 my $cut = $c+$length_before_cut;
192 $top_seq = insert_cut
($top_seq, $cut);
193 if (other_length_before_cut
($enzyme_seq)){
194 my $bottom_seq = reverse($rev_bottom_seq);
195 $bottom_seq = insert_cut
($bottom_seq, other_length_before_cut
($enzyme_seq));
196 $rev_bottom_seq = reverse($bottom_seq);
199 my $rev_bottom_seq_cuts = find_matches
($rev_bottom_seq, $enz_name);
200 foreach my $c(@
$rev_bottom_seq_cuts){
201 my $cut = $c+$length_before_cut;
202 $rev_bottom_seq = insert_cut
($rev_bottom_seq, $cut);
203 if (other_length_before_cut
($enzyme_seq)){
204 my $rev_top_seq = insert_cut
(reverse($top_seq), other_length_before_cut
($enzyme_seq));
205 $top_seq = reverse($rev_top_seq);
208 my $bottom_seq = reverse($rev_bottom_seq);
209 if(has_cut_sites
($top_seq) || has_cut_sites
($bottom_seq)){
210 if(length($top_seq) < 100 && length($bottom_seq) < 100){
211 return "$top_seq<br />$bottom_seq<br />";
214 return wrap_text
($top_seq, $bottom_seq);
220 #returns true if a DNA sequence contains a ^
223 my @dna = split "", $dna_seq;
224 my $has_cut_sites = 0;
230 return $has_cut_sites;
233 #returns how far after the match the strand will be cut
234 sub this_length_before_cut
{
235 my ($enzyme_seq) = @_;
236 $enzyme_seq =~ s/\s//g;
237 my $length_before_cut = "";
238 if($enzyme_seq =~ m/^(.*)\^.*$/){
239 $length_before_cut = length($1);
241 elsif($enzyme_seq =~ m/(.*)\((\d*)\/\d
*\
)/){
242 $length_before_cut = length($1) + $2;
244 return $length_before_cut;
247 #for enzymes which cut the strand and its compliment, returns how far after the match the other strand will be cut
248 sub other_length_before_cut
{
249 my ($enzyme_seq) = @_;
250 my $length_before_cut = "";
251 if($enzyme_seq =~ m/(.*)\(\d*\/(\d
*)\
)/){
252 $length_before_cut = length($1) + $2;
254 return $length_before_cut;
257 #insert a cut before the index given, ignoring ^s
259 my ($dna_seq, $index) = @_;
260 my $length = length($dna_seq);
261 if($index > length($dna_seq)){
264 my @dna = split "", $dna_seq;
265 my $letters_count = -1;
269 if ($d =~ m/[ATGC]/){
272 if ($letters_count == $index){
276 if($letters_count == $index-1){
279 elsif($letters_count < $index-1){
283 $dna_seq =~ s/^(.{$char_count})(.*)$/$1^$2/;
288 #Returns the names of all the enzymes in the database.
291 $dbh->do("set search_path=sgn");
292 my $enzymes = $dbh->selectcol_arrayref("SELECT DISTINCT enzyme_name from enzymes");
296 #Wraps the dna text around the screen instead of letting it run off into the abyss.
297 #I know this is like the worst possible way to write this code but, its the only way I can think of for doing it
298 #Since I don't understand a lot of the perl "subtleties"
300 my ($top, $bottom) = @_;
301 print CXGN
::Page
::FormattingHelpers
->html_break_string($top,100,":");
302 my $delimitedtop = CXGN
::Page
::FormattingHelpers
->html_break_string($top,100,":");
303 my $delimitedbottom = CXGN
::Page
::FormattingHelpers
->html_break_string($bottom,100,":");
304 my @toplines = split(/:/,$delimitedtop);
305 my @bottomlines = split(/:/,$delimitedbottom);
307 for(my $i = 0; $i < length(@toplines) && $i < length(@bottomlines); $i++){
308 $result .= "$toplines[$i]<br />$bottomlines[$i]<br /><br />";