limit fstBC to 30bp in Python3 ver.
[GalaxyCodeBases.git] / perl / etc / WoodyMiaoLin / PbeBefore2015 / 3.make_NEXUS.pl
blob2d1f7267aff7938714cde6b08846fa18da67e972
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
5 open my $fca62, "-|", "gzip -dc Felis_catus-6.2_masked.fa.gz";
6 open my $pbe084, "-|", "gzip -dc PBE084_masked.fa.gz";
7 open my $pbe144, "-|", "gzip -dc PBE144_masked.fa.gz";
8 open my $pvi033, "-|", "gzip -dc PVI033_masked.fa.gz";
9 open OUT, ">", "exons.nex";
11 my %file = (
12 FCA62 => $fca62,
13 PBE084 => $pbe084,
14 PBE144 => $pbe144,
15 PVI033 => $pvi033,
18 my %chr = (
19 chrA1 => 239302903,
20 chrA2 => 169043629,
21 chrA3 => 142459683,
22 chrB1 => 205241052,
23 chrB2 => 154261789,
24 chrB3 => 148491654,
25 chrB4 => 144259557,
26 chrC1 => 221441202,
27 chrC2 => 157659299,
28 chrD1 => 116869131,
29 chrD2 => 89822065,
30 chrD3 => 95741729,
31 chrD4 => 96020406,
32 chrE1 => 63002102,
33 chrE2 => 64039838,
34 chrE3 => 43024555,
35 chrF1 => 68669167,
36 chrF2 => 82763536,
37 chrX => 126427096,
40 my %seq;
41 foreach (sort keys %file) {
42 my $u = $_;
43 $seq{$u} = &readseq($file{$u});
44 warn "Read $u sequence complete!\n";
47 my %loci;
48 my $ncha;
49 foreach my $u (sort keys %chr) {
50 for (my $i=0; $i < $chr{$u}; ++$i) {
51 my $w = substr($seq{FCA62}{$u}, $i, 100);
52 next if (length $w) < 100;
53 next if $w =~ /^N/;
54 my @subseq;
55 foreach my $v (sort keys %seq) {
56 push @subseq, substr($seq{$v}{$u}, $i, 100);
58 if (grep(/N{5}/, @subseq) == 0) { # find 100bp windows without /N{5}/
59 push @{$loci{$u}}, \@subseq;
60 $i += 99;
63 my $w = @{$loci{$u}};
64 $ncha += $w*100;
65 warn "$u exons\t$ncha bp\n";
67 warn "Pick loci complete!\n";
69 print OUT "#NEXUS
70 BEGIN DATA;
71 Dimensions ntax=4 nchar=$ncha;
72 Format datatype=DNA interleave=yes;
73 Matrix\n";
75 foreach my $u (sort keys %loci) {
76 my $v;
77 foreach my $w (@{$loci{$u}}) {
78 ++$v;
79 # print OUT "${u}_$v\t4\t1000\n";
80 print OUT "FCA6.2\t${$w}[0]\n";
81 print OUT "PBE084\t${$w}[1]\n";
82 print OUT "PBE144\t${$w}[2]\n";
83 print OUT "PVI033\t${$w}[3]\n\n";
87 print OUT ";\nEND;\n";
88 close OUT;
89 warn "Done!\n";
91 sub readseq {
92 my $in = $_[0];
93 my %fa;
94 while (<$in>) {
95 last unless /(chr\w+)/;
96 my $name = $1;
97 $/ = ">";
98 my $sequ = <$in>;
99 chomp $sequ;
100 $sequ =~ s/\s//g;
101 $/ = "\n";
102 $fa{$name} = $sequ;
104 close $in;
105 return \%fa;