modified: Makefile
[GalaxyCodeBases.git] / perl / etc / WoodyMiaoLin / PbeBefore2015 / BankIt_submit.pl
blob66275ed4a25d23ff6d9718ce8fdde8ca9d1c093e
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
5 open I1, "<", "../1.find_need_to_submit/MT/MT_needto.fasta";
6 open I2, "<", "../1.find_need_to_submit/Y/Y_needto.fasta";
7 open I3, "<", "../1.find_need_to_submit/PLP/PLP_needto.fasta";
8 #open O, ">", "BankIt_submit.fasta";
10 my %STS;
11 while (<I1>) {
12 /^>(\w{3})(MT\d+)/;
13 my $s = <I1>;
14 $STS{"16S"}{$1}{$2} = substr $s, 0, 367, "";
15 $STS{ATP8}{$1}{$2} = substr $s, 0, 176, "";
16 $STS{CytB}{$1}{$2} = substr $s, 0, 1249, "";
17 warn $s unless $s eq "\r\n";
18 delete $STS{ATP8};
20 close I1;
21 while (<I2>) {
22 /^>(\w{3})(Y)/;
23 my $s = <I2>;
24 $STS{SMCY3}{$1}{$2} = substr $s, 0, 841, "";
25 $STS{SMCY7_STR_upstream}{$1}{$2} = substr $s, 0, 224, "";
26 $STS{SMCY7_STR_downstream}{$1}{$2} = substr $s, 0, 325, "";
27 $STS{DBY7}{$1}{$2} = substr $s, 0, 280, "";
28 $STS{UTY11}{$1}{$2} = substr $s, 0, 484, "";
29 warn $s unless $s eq "\r\n";
31 close I2;
32 while (<I3>) {
33 $/ = ">";
34 my $s = <I3>;
35 chomp $s;
36 $s =~ s/\s//g;
37 $/ = "\n";
38 /^>?(\w{3})([\--Z]+)/;
39 $STS{PLP}{$1}{$2} = $s;
41 close I3;
43 my %SOURCE = (
44 "Pbe" => "Prionailurus bengalensis",
45 "Pvi" => "Prionailurus viverrinus",
46 "Ipl" => "Prionailurus planiceps",
47 "Pte" => "Pardofelis temminckii",
48 "Pma" => "Pardofelis marmorata",
49 "Pti" => "Panthera tigris",
50 "Ppa" => "Panthera pardus",
53 my %desc = (
54 PLP => 'X chromosome, PLP1, partial sequence',
55 CytB => 'mitochondrial, cytochrome b complete CDS, tRNA-Thr complete sequence, tRNA-Pro partial sequence',
56 '16S' => 'mitochondrial, 16S ribosomal RNA, partial sequence',
57 UTY11 => 'Y chromosome, UTY, partial sequence',
58 DBY7 => 'Y chromosome, DBY, partial sequence',
59 SMCY3 => 'Y chromosome, SMCY, partial sequence',
60 SMCY7_STR_upstream => 'Y chromosome, SMCY, partial sequence',
61 SMCY7_STR_downstream => 'Y chromosome, SMCY, partial sequence'
64 foreach my $a (sort keys %STS) {
65 open O, ">", "submit_$a.fa";
66 foreach my $b (sort keys %{$STS{$a}}) {
67 foreach my $c (sort keys %{$STS{$a}{$b}}) {
68 $STS{$a}{$b}{$c} =~ s/-|\?//g;
69 my $l = length $STS{$a}{$b}{$c};
70 next if $l < 200;
71 next unless $SOURCE{$b};
72 my $str = "$SOURCE{$b}, $desc{$a}";
73 if ($a eq "PLP") {
74 print O ">$b$c [organism=$SOURCE{$b}] [chromosome=X] [gcode=1] $str\n$STS{$a}{$b}{$c}\n";
75 } elsif ($a eq "CytB") {
76 if ($l > 1000) {
77 print O ">$a-$b$c [organism=$SOURCE{$b}] [location=mitochondrion] [mgcode=2] $str\n$STS{$a}{$b}{$c}\n";
78 } else {
79 print O ">$a-$b$c [organism=$SOURCE{$b}] [location=mitochondrion] [mgcode=2]";
80 print O "$SOURCE{$b}, mitochondrial, cytochrome b, partical CDS\n$STS{$a}{$b}{$c}\n";
82 } elsif ($a eq "16S"){
83 print O ">$a-$b$c [organism=$SOURCE{$b}] [location=mitochondrion] $str\n$STS{$a}{$b}{$c}\n";
84 } elsif ($a =~ /^(SMCY|DBY7|UTY11)/) {
85 print O ">$a-$b$c [organism=$SOURCE{$b}] [chromosome=Y] [gcode=1] $str\n$STS{$a}{$b}{$c}\n";
86 } else {
87 print O ">$a-$b$c [organism=$SOURCE{$b}] [gcode=1]\n$STS{$a}{$b}{$c}\n";
88 warn "Bad gene name!";
92 close O;
94 #close O;