modified: Makefile
[GalaxyCodeBases.git] / perl / etc / submit / BankIt_submit.pl
blob5515e7b9bf147a337795d9a84a1e9086730f8886
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
5 open I1, "<", "Mt.fasta";
6 open I2, "<", "Y.fasta";
7 open I3, "<", "PLP.fasta";
8 #open O, ">", "BankIt_submit.fasta";
10 my %STS;
11 while (<I1>) {
12 /^>(\w{3})(MT\d+)/;
13 my $s = <I1>;
14 $STS{"16S"}{$1}{$2} = substr $s, 0, 367, "";
15 $STS{ATP8}{$1}{$2} = substr $s, 0, 176, "";
16 $STS{CytB}{$1}{$2} = substr $s, 0, 1249, "";
17 warn $s unless $s eq "\r\n";
18 delete $STS{ATP8};
21 close I1;
22 while (<I2>) {
23 /^>(\w{3})(Y\w)/;
24 my $s = <I2>;
25 $STS{SMCY3}{$1}{$2} = substr $s, 0, 841, "";
26 $STS{SMCY7_STR_upstream}{$1}{$2} = substr $s, 0, 224, "";
27 $STS{SMCY7_STR_downstream}{$1}{$2} = substr $s, 0, 325, "";
28 $STS{DBY7}{$1}{$2} = substr $s, 0, 280, "";
29 $STS{UTY11}{$1}{$2} = substr $s, 0, 484, "";
30 warn $s unless $s eq "\r\n";
32 close I2;
33 while (<I3>) {
34 $/ = ">";
35 my $s = <I3>;
36 chomp $s;
37 $s =~ s/\s//g;
38 $/ = "\n";
39 /^>?(\w{3})([\--Z]+)/;
40 $STS{PLP}{$1}{$2} = $s;
42 close I3;
44 my %SOURCE = (
45 "Pbe" => "Prionailurus bengalensis",
46 "Pvi" => "Prionailurus viverrinus",
47 "Ppl" => "Prionailurus planiceps",
48 "Pte" => "Pardofelis temminckii",
49 "Pma" => "Pardofelis marmorata",
50 "Pti" => "Panthera tigris",
51 "Ppa" => "Panthera pardus",
54 =pod
55 print O "TYPE: Pub
56 TITLE:
57 $CITATION
58 AUTHORS:
59 $AUTHORS
60 YEAR: 2014
61 STATUS: 1
62 ||\n\n";
64 print O "TYPE: Source
65 NAME: $SOURCE{$_}
66 ORGANISM: $SOURCE{$_}
67 ||\n\n" foreach sort keys %SOURCE;
69 print O "TYPE: Cont
70 NAME: $CONT_NAME
71 TEL: +86-10-62752307
72 EMAIL: Luo.shujin\@pku.edu.cn
73 LAB: Peking-Tsinghua Center for Life Sciences, College of Life Sciences
74 INST: Peking University
75 ADDR: No. 5 Yiheyuan Road, Haidian District, Beijing 100871, China
76 ||\n\n";
77 =cut
79 my %desc = (
80 PLP => 'X chromosome, PLP1, partial CDs',
81 CytB => 'mitochondrial haplotype Cytochrome B, partial CDs',
82 '16S' => 'mitochondrial haplotype 16s Ribosomal RNA, partial sequence',
83 UTY11 => 'Y-chromosome haplotype UTY, partial sequence',
84 DBY7 => 'Y-chromosome haplotype DBY7, partial sequence',
85 SMCY3 => 'Y-chromosome haplotype SMCY3, partial sequence',
86 SMCY7_STR_upstream => 'Y-chromosome haplotype SMCY7, partial sequence',
87 SMCY7_STR_downstream => 'Y-chromosome haplotype SMCY7, partial sequence'
90 foreach my $a (sort keys %STS) {
91 open O, ">", "submit_$a.fa";
92 foreach my $b (sort keys %{$STS{$a}}) {
93 foreach my $c (sort keys %{$STS{$a}{$b}}) {
94 $STS{$a}{$b}{$c} =~ s/-|\?//g;
95 next unless $STS{$a}{$b}{$c};
96 next unless $SOURCE{$b};
97 my $str = "$SOURCE{$b} $desc{$a}";
98 if ($a eq "PLP") {
99 print O ">$b$c [organism=$SOURCE{$b}] [chromosome=X] [gcode=1] $str\n$STS{$a}{$b}{$c}\n";
100 } elsif ( $a eq "CytB" or $a eq "16S" ) {
101 print O ">$a-$b$c [organism=$SOURCE{$b}] [location=mitochondrion] [mgcode=2] $str\n$STS{$a}{$b}{$c}\n";
102 } elsif ( $a =~ /^(SMCY|DBY7|UTY11)/ ) {
103 print O ">$a-$b$c [organism=$SOURCE{$b}] [chromosome=Y] [gcode=1] $str\n$STS{$a}{$b}{$c}\n";
104 } else {
105 print O ">$a-$b$c [organism=$SOURCE{$b}] [gcode=1]\n$STS{$a}{$b}{$c}\n";
106 die;
110 close O;
112 #close O;
114 system("cat submit_*.fa > submitall.fa");
115 system('find submit*.fa|sed \'s/\.fa$//\'|xargs -n1 perl BankIt_Feature.pl');
116 print '-' x 75,"\n";
117 system('ls -l *.val');
119 print "To clean, run:[ rm submit* ]\n";