4 use Term
::ANSIColor
qw(:constants);
5 use Data
::Dump
qw(ddx);
9 ATTACATGGACTTCTCTGGCTACCAAT
11 blastn to bacteria (taxid:2)
13 http://www.ncbi.nlm.nih.gov/nucleotide/373881580?report=genbank&log$=nuclalign&blast_rank=1&RID=927901S401N
14 LOCUS JQ031552 76221 bp DNA circular BCT 29-JAN-2012
15 DEFINITION Aliivibrio fischeri strain KB1A-97 plasmid pKB1A97-67, complete
18 VERSION JQ031552.1 GI:373881580
20 SOURCE Aliivibrio fischeri (Vibrio fischeri)
21 ORGANISM Aliivibrio fischeri
22 Bacteria; Proteobacteria; Gammaproteobacteria; Vibrionales;
23 Vibrionaceae; Aliivibrio.
24 REFERENCE 1 (bases 1 to 76221)
25 AUTHORS Summers,A.O., Wireman,J. and Williams,L.E.
26 TITLE Direct Submission
27 JOURNAL Submitted (14-NOV-2011) Department of Microbiology, University of
28 Georgia, 527 Biological Sciences, Athens, GA 30602, USA
29 FEATURES Location/Qualifiers
31 /organism="Aliivibrio fischeri"
32 /mol_type="genomic DNA"
34 /host="Euprymna scolopes"
37 /country="USA: Hawaii"
38 /collection_date="2005"
39 /note="strain provided by Eric Stabb, University of
40 Georgia, Athens, GA, USA"
41 CDS complement(53120..53287)
44 /product="hypothetical protein"
45 /protein_id="AEY78253.1"
46 /db_xref="GI:373881650"
47 /translation="MIFVCNGLQFYCLVNRHESVINFQACLWVILVLFIYEKWHYVDLRRFNGCFEWLS"
51 /product="Transposase-like protein"
52 /protein_id="AEY78254.1"
53 /db_xref="GI:373881651"
54 /translation="MDFSGYQYPSDIILQAVRYYVSYKLSTRDIEEIFTERGSAIDHS
55 TINRWVITFAPMLEQNARQLKRKVSSSWRMDETYIKIKGEWWYYYRAVDKYGDIVDFY
56 LSKERDEKAAKAFLRKAIHTNGLPDKVVIDKSGANALALHNLNVKLWLSVVFMLNLIE
57 IVDVKYLNNIVEQSYRPIKQKMVQALGWKSVEGATVTMSG"
61 /product="hypothetical protein"
62 /protein_id="AEY78255.1"
63 /db_xref="GI:373881652"
64 /translation="MQHDVGSNSDVYISCYSLNIKESGLSGAHYTISDIRKSIETVKVTSSYRHLHIEMKNSLCVCFSTLDITWVNGLEHGELLAGQFTVFDSECPISYKVTRVGSLCFVFIPKYFYEGVLQKQMVRCGMFEFVYVDAIRFILTRVNSKEDGEQLLISELLALGYLLSVLERKEEAVGKKVAFEDKVHEVIKDNMLNPSLYLDDIALILGCSKRKIQHCLSLQGVSFTKLVTKYRIEYLAEQLIRKKHSRIDVLCYESGFNSPGYASNSFKVIMGMSPKEYRCRYLAKSSVF"
67 # http://www.perlmonks.org/?node_id=1085446
68 #my %invert; @invert{ qw[ A C G T ] } = qw[ T G C A ];
70 my @CDS = (53565, 54167);
77 # http://www.perlmonks.org/?node_id=197793
79 my $dna = shift; # or my $dna = shift @_;
80 my $revcomp = reverse($dna);
81 $revcomp =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
86 my ($seq1,$seq2) = @_;
87 my $seqlen1 = length $seq1;
88 my $seqlen2 = length $seq2;
90 for my $irLeftPos2 (0 .. ($seqlen2-1)) {
91 #for my $irLen ($irMinLen .. int(($seqlen-$irLeftPos)/2)) {
92 for my $irLen ($irMinLen .. ($seqlen2-1-$irLeftPos2)) {
93 my $irSeq = substr $seq2,-$irLen-$irLeftPos2,$irLen;
94 my $revcmp = revdnacomp
($irSeq);
98 $p = index $seq1,$revcmp,$p+1;
101 last if $thePoses[0] == -1;
103 $Dat{"$_|$irLeftPos2"} = $irLen for @thePoses;
105 print "$irLen\[$irSeq]\t";
107 my $found = substr $seq1,$_,$irLen;
108 my $extra = substr $seq1,$_+$irLen,6;
109 print ".$found",GREEN,BOLD,"$extra $_",RESET;
111 print "\t$irLeftPos2\n";
115 #print "1:$seq1,$seq2\n";
119 my ($dat,$getLen,$seq1,$seq2) = @_;
121 for my $poses (keys %{$dat}) {
122 my $irlen = $dat->{$poses};
123 ++$res{$irlen}{$poses};
125 for my $irlen (sort {$a<=>$b} keys %res) {
127 for my $poses (keys %{$res{$irlen}}) {
128 my @Poses = split /\|/,$poses;
129 my $dis = $Poses[1] + $Poses[0];
130 my $upperPoses = join('|',$Poses[0]-1,$Poses[1]-1);
131 if (exists $res{$irlen+1}) {
132 delete $res{$irlen}{$poses} if exists $res{$irlen+1}{$upperPoses};
136 my @Poses = keys %{$res{$irlen}};
138 my @tmp = sort { $tmp{$a} <=> $tmp{$b} } @Poses;
139 $res{$irlen} = \
@tmp;
144 for my $irlen (sort {$a<=>$b} keys %res) {
145 print "--- $irlen ---\n";
147 for my $poses ( @
{$res{$irlen}} ) {
148 my @Poses = split /\|/,$poses;
149 my $irL = substr $seq1,$Poses[0],$irlen;
150 my $irR = substr $seq2,-$irlen-$Poses[1],$irlen;
151 #my $revcmp = revdnacomp($irR);
152 print "$poses $irL $irR\n";
155 #print "2:$seq1,$seq2\n";
158 open I
,'<','pKB1A97-67.fa' or die $?
;
162 my ($id,$desc)=split / /,$_,2;
163 if ($desc && $desc !~ /^\s*$/) {
166 } else { $desc='.';$Head=$id; }
172 my $len=length($seq);
173 print STDERR
"$id:$len,[$desc]\n";
175 my $left = substr $seq,0,($CDS[0]-1);
176 my $right = substr $seq,($CDS[1]);
177 print "0 Len:(",length $left,',',length $right,")\n";
178 #$left = substr $left,-$getLen;
179 #$right = substr $right,0,$getLen;
180 print "1 Len:(",length $left,',',length $right,")\n";
181 my $res = analyseIR
($left,$right);
183 printRes
($res,$getLen,$left,$right);
185 #print "[$left]\n\n[$right]\n";
186 [TTTATTGACCGGCAGCGGTATGCGAGTGCGAACACCAACTTCTTATGGTGAATCACAAATGTACGACCAGCCTTTGCCTGTCACGCAGATGGACATGGGGCAAATTCCT
187 ... ATACACTTACCAACGCATCTTAATAGGGGTTCTGTCGCATGACAGGGAACTAAATCATCCATGAGATACTCAAAGTTCTGTTTTTTACATTAACTCGAGTATTAC]
189 [GAGGTCTGGACCCAAATTAAACGAAGGCAAGTTGGGGACGTAAATTTACCCGTATGGGAGCGCTTTCACGCGCTCGCTGCATAATTGTGTCTAGAATTCGGCACGTCTA
190 ... TGGATGCACGTATGCCTGCAATGGCAAATCTGACCGTAAAAGATGGGTTGGATTT]