4 use Term
::ANSIColor
qw(:constants);
5 use Data
::Dump
qw(ddx);
9 ATTACATGGACTTCTCTGGCTACCAAT
11 blastn to bacteria (taxid:2)
13 http://www.ncbi.nlm.nih.gov/nucleotide/373881580?report=genbank&log$=nuclalign&blast_rank=1&RID=927901S401N
14 LOCUS JQ031552 76221 bp DNA circular BCT 29-JAN-2012
15 DEFINITION Aliivibrio fischeri strain KB1A-97 plasmid pKB1A97-67, complete
18 VERSION JQ031552.1 GI:373881580
20 SOURCE Aliivibrio fischeri (Vibrio fischeri)
21 ORGANISM Aliivibrio fischeri
22 Bacteria; Proteobacteria; Gammaproteobacteria; Vibrionales;
23 Vibrionaceae; Aliivibrio.
24 REFERENCE 1 (bases 1 to 76221)
25 AUTHORS Summers,A.O., Wireman,J. and Williams,L.E.
26 TITLE Direct Submission
27 JOURNAL Submitted (14-NOV-2011) Department of Microbiology, University of
28 Georgia, 527 Biological Sciences, Athens, GA 30602, USA
29 FEATURES Location/Qualifiers
31 /organism="Aliivibrio fischeri"
32 /mol_type="genomic DNA"
34 /host="Euprymna scolopes"
37 /country="USA: Hawaii"
38 /collection_date="2005"
39 /note="strain provided by Eric Stabb, University of
40 Georgia, Athens, GA, USA"
41 CDS complement(53120..53287)
44 /product="hypothetical protein"
45 /protein_id="AEY78253.1"
46 /db_xref="GI:373881650"
47 /translation="MIFVCNGLQFYCLVNRHESVINFQACLWVILVLFIYEKWHYVDLRRFNGCFEWLS"
51 /product="Transposase-like protein"
52 /protein_id="AEY78254.1"
53 /db_xref="GI:373881651"
54 /translation="MDFSGYQYPSDIILQAVRYYVSYKLSTRDIEEIFTERGSAIDHS
55 TINRWVITFAPMLEQNARQLKRKVSSSWRMDETYIKIKGEWWYYYRAVDKYGDIVDFY
56 LSKERDEKAAKAFLRKAIHTNGLPDKVVIDKSGANALALHNLNVKLWLSVVFMLNLIE
57 IVDVKYLNNIVEQSYRPIKQKMVQALGWKSVEGATVTMSG"
61 /product="hypothetical protein"
62 /protein_id="AEY78255.1"
63 /db_xref="GI:373881652"
64 /translation="MQHDVGSNSDVYISCYSLNIKESGLSGAHYTISDIRKSIETVKVTSSYRHLHIEMKNSLCVCFSTLDITWVNGLEHGELLAGQFTVFDSECPISYKVTRVGSLCFVFIPKYFYEGVLQKQMVRCGMFEFVYVDAIRFILTRVNSKEDGEQLLISELLALGYLLSVLERKEEAVGKKVAFEDKVHEVIKDNMLNPSLYLDDIALILGCSKRKIQHCLSLQGVSFTKLVTKYRIEYLAEQLIRKKHSRIDVLCYESGFNSPGYASNSFKVIMGMSPKEYRCRYLAKSSVF"
67 # http://www.perlmonks.org/?node_id=1085446
68 my %invert; @invert{ qw
[ A C G T
] } = qw
[ T G C A
];
74 for my $p1 ( 1 .. length( $in ) -2 ) {
75 next unless substr( $in, $p1, 1 ) eq $invert{ substr $in, $p1+1, 1 };
77 for my $p2 ( 1 .. $p1 -1 ) {
78 last unless substr( $in, $p1-$p2, 1 ) eq $invert{ substr $in, $p1+$p2+1, 1 };
83 #printf "%s at %d\n",substr( $in, $p1-$pals, ($pals+1)*2 ), $p1-$pals;
84 my $palindromicDNA = substr( $in, $p1-$pals, ($pals+1)*2 );
85 my $leftPad = $p1-$pals;
86 $leftPad = 0 if $leftPad > 272;
87 printf "%s%s @ %d [%d]", '.'x
$leftPad,
88 $palindromicDNA, $p1-$pals, ($pals+1)*2;
89 push @
{$ret{$palindromicDNA}},$p1-$pals;
90 if (scalar @
{$ret{$palindromicDNA}} > 1) {
91 print BOLD
,RED
,' *',RESET
;
99 my @CDS = (53565, 54167);
103 ($getIN,$getLen) = (0,2000);
105 my $wholeLen = $getIN + $getLen;
106 my ($leftTEs,$rightTEs,%TEs,$allTEs);
109 open I
,'<','pKB1A97-67.fa' or die $?
;
113 my ($id,$desc)=split / /,$_,2;
114 if ($desc && $desc !~ /^\s*$/) {
117 } else { $desc='.';$Head=$id; }
123 my $len=length($seq);
124 print STDERR
"$id:$len,[$desc]\n";
126 my $left = substr $seq,($CDS[0]-1-$getLen),$wholeLen;
127 my $right = substr $seq,($CDS[1]-$getIN),$wholeLen;
128 #local $Term::ANSIColor::AUTORESET = 1;
129 # print substr($left,0,$getLen),BOLD,GREEN,substr($left,$getLen),RESET,"\n";
130 # $leftTEs = getTE($left);
131 # print BOLD,GREEN,substr($right,0,$getIN),RESET,substr($right,$getIN,$getLen),"\n";
132 # $rightTEs = getTE($right);
133 $allTEs = getTE
($seq);
139 my ($pos,$telen) = @_;
140 ++$pos; # now, 1-based
141 my $posR = $pos+$telen;
142 if ( $posR < $CDS[0] ) {
143 return $posR - $CDS[0];
144 } elsif ( $pos > $CDS[1] ) {
145 return $pos - $CDS[1];
149 my (%PatDat,%PatFlag,@Patterns);
151 my ($flag,$mode) = @_;
152 for my $k (@Patterns) {
154 next unless $PatFlag{$k} == $flag;
155 } elsif ($mode == 0) {
156 next unless $PatFlag{$k} & $flag;
157 } elsif ($mode == -1) {
158 next if $PatFlag{$k} == $flag;
160 my $itsLen = length($k);
161 my ($Instde,@Left,@Right,@PosL,@PosR) = (0);
162 for my $p (@
{$PatDat{$k}}) {
163 push @Left,$p if $p < 0;
164 push @Right,$p if $p > 0;
165 $Instde = 1 if $p == 0;
171 push @PosL,$_-$itsLen+$CDS[0] for @Left;
177 push @PosR,$_+$CDS[1] for @Right;
180 #print BOLD,GREEN,"$PatFlag{$k} $k\[$itsLen]: ",join(',',$Left[-1],$Right[0]),' -> ',join(',',$PosL[-1],$PosR[0]),($Instde?' *':''),RESET,"\n";
181 print BOLD
,GREEN
,"$k\[$itsLen]: ",join(',',$Left[-1],$Right[0]),' -> ',join(',',$PosL[-1],$PosR[0]),($Instde?
' *':''),RESET
,"\n";
182 if ((@Left + @Right)>2) {
183 print "\t",join(',',@PosL),' | ',join(',',@PosR);
184 #print " <- ",join(',',@Left),' | ',join(',',@Right);
193 for my $k (keys %{$leftTEs}) {
194 $TEs{$k} = [ ${$leftTEs}{$k} ];
196 for my $k (keys %{$rightTEs}) {
197 push @{$TEs{$k}},${$rightTEs}{$k};
199 for my $k (keys %TEs) {
200 next unless scalar @{$TEs{$k}} == 2;
207 @Patterns = sort { length($b)<=>length($a) || $a cmp $b } keys %TEs;
208 (%PatDat,%PatFlag)=();
209 for my $k (@Patterns) {
211 my $itsLen = length($k);
212 print "\n$k\[$itsLen]: ";
213 while ($seq =~ /(?=$k)/g) { # http://www.perlmonks.org/?node_id=1090633
215 my $chk = checkType
($p,$itsLen);
217 push @
{$PatDat{$k}},$chk;
218 $PatFlag{$k} |= 1 if $chk <0;
219 $PatFlag{$k} |= 2 if $chk >0;
220 $PatFlag{$k} |= 4 if $chk ==0;
226 print BOLD
,RED
,'-'x5
,'Both','-'x10
,"\n";
228 print BOLD
,RED
,'-'x5
,'Instde','-'x10
,"\n";
230 #print BOLD,RED,'-'x5,'Single','-'x10,"\n";
236 #http://repo.hackerzvoice.net/depot_madchat/esprit/texture/hallucinati/finding%20DNA%20palindroms.htm
239 $filename = "668plasmids.fa";
240 open (TEXT
, $filename)||die"Cannot";
245 $re = qr
/[CAGT]{$n}/;
246 $regexes[$n-5] = $re;
248 NEXTLINE
: while ($count < 1000)
252 foreach my $value (@regexes)
255 while ($line =~ /$value/g)
259 $revmatch = reverse($match);
260 $revmatch =~ tr/CAGT/GTCA/;
261 if ($endline =~ /^([CAGT]{0,15})($revmatch)/)
264 $palindrome = $match . "*" . $1 . "*" . $2;
265 $palhash{$palindrome}++;
275 while(($key, $value) = each (%palhash))
277 print "$key => $value\n";