4 use Data
::Dump
qw(ddx);
32 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
33 my $rev = reverse $str;
34 $rev =~ tr/[](){}<>/][)(}{></;
38 my $minLen = 100; # 150
40 print "[@patterns]\n";
41 my (@bothpatterns,%t);
42 push @bothpatterns,revcom
($_) for @patterns;
43 ++$t{$_} for (@patterns,@bothpatterns);
44 @bothpatterns = sort { length($a) <=> length($b) || $a cmp $b } keys %t;
45 print "[@bothpatterns]\n";
50 $in =~ /_(\w+)\.mfa\.gz$/;
52 print "From [$in] to [$out]\n";
54 open IN
, "-|", "zcat $in";
58 my ($seq,$pattLen) = @_;
59 #my $pattLen = length $patt;
60 my $mincpy = int($minLen/$pattLen);
61 my $maxcpy = int(1+$maxLen/$pattLen);
66 $count{uc(substr($seq, $_, $size))}++ for 0..(length($seq) - $size);
68 for (sort {$count{$b} <=> $count{$a} || $a cmp $b} keys %count) {
69 last if $count{$_} < $mincpy;
72 my (@poses,@groups,$lastpos,$groupid);
73 while ($seq =~ /$_/ig) {
74 push @poses,pos($seq)-$size;
76 $lastpos = shift @poses;
79 if ($_-$lastpos == $size) {
80 push @
{$groups[$groupid]},$_; # lacking the 1st one.
87 next unless defined $_;
89 if (@eachposes > $mincpy-1) {
90 my $start = $eachposes[0]-$size;
91 my $left = substr($seq, $start-$flanking, $flanking);
92 my $right = substr($seq, $eachposes[-1]+$size,$flanking);
93 my $ssrlen = (1+@eachposes) * $size;
94 my $ssrseq = substr($seq, $start, $ssrlen);
95 my $all = "$left ($ssrseq) $right";
96 print O
join("\t",$pattern, $ssrlen, 1+$start,$all),"\n";
100 #print ' '.(pos($seq)-$size) while $seq =~ /$_/g;
101 #print " ($count{$_} matches)\n";
104 while($seq=~m/($patt){$mincpy,$maxcpy}/ig){
105 #while($seq=~m/([ATCG]+)(\1){$mincpy,$maxcpy}/ig){
109 print O join("\t",$patt,$len,$-[0],$&,substr($seq, $-[0]-$flanking, $flanking),substr($seq, $+[0],$flanking) ),"\n";# if $len >= $minLen and $len <= $maxLen;
117 while ( my $record = <IN
> ) {
118 chomp $record; # Remove the ">" from the end of $record, and realize that the ">" is already gone from the begining of the record
119 my ($defLine, @seqLines) = split /\n/, $record;
120 my $sequence = join('',@seqLines); # Concatenates all elements of the @seqLines array into a single string.
121 $sequence =~ s/\s//g;
122 print "$defLine\n"; # Print your definition; remember the ">" has already been removed. Remember to print a newline.
123 my $seqlen = length($sequence);
124 print "Seq Length: $seqlen\n"; # Print the sequence length and a newline
125 print O
">$defLine $seqlen\n";
126 #getpattern($sequence,$_) for (@bothpatterns);
127 getpattern
($sequence,$_) for (3,4);
140 scp galaxy
@192.168
.0.83:/Users/Galaxy
/t/ssr
.pl
.
142 find
*.mfa
.gz
|while read a
;do perl ssr
.pl
"$a";done
143 awk
'{print $2}' *.lst
|sort -n
|uniq
-c
145 wc
-l
*.lst
|grep -v
' 2 '|grep lst
|awk
'{print $2}'|xargs tar
-czvf ssr
.tgz