modified: Makefile
[GalaxyCodeBases.git] / perl / etc / WoodyMiaoLin / PbeBefore2015 / BankIt_Feature.pl
blob94d347fdd419955dd7b574f633f25d7361850561
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
5 my $in = shift;
6 open I, "<", "$in.fa";
7 open O, ">", "$in.tbl";
9 my $undefed = "\n\t\t\tnote\tcoding region not determined";
11 while (<I>) {
12 chomp;
13 s/>//;
14 my @a = split / /;
15 my @b = split /-/, $a[0];
16 $a[1] =~ s/\[organism=//;
17 $a[2] =~ s/\]//;
18 my $seq = <I>;
19 chomp $seq;
20 my $len = length $seq;
21 if ($b[0] =~ /PLP/) {
22 print O ">Feature $a[0]
23 <1 >$len gene
24 gene PLP1
25 allele $a[0]
26 <1 >61 CDS
27 gene PLP1
28 codon_start 3
29 product proteolipid protein 1
30 62 >$len intron
31 gene PLP1
32 ";
33 } elsif ($b[0] =~ /16S/) {
34 print O ">Feature $a[0]\n";
35 print O "<1\t>$len\trRNA\n\t\t\tproduct\tl-rRNA\n\t\t\tnote\t16S ribosomal RNA\n\t\t\tallele\t$b[1]\n";
36 } elsif ($b[0] eq 'CYTB') {
37 if ($len == 1247) {
38 print O ">Feature $a[0]
39 1 1140 gene
40 gene CYTB
41 gene_synonym cytB
42 allele $b[1]
43 1 1140 CDS
44 gene CYTB
45 gene_synonym cytB
46 codon_start 1
47 transl_table 2
48 product cytochrome b
49 1141 1209 tRNA
50 product tRNA-Thr
51 1210 >1247 tRNA
52 product tRNA-Pro
54 } elsif ($len == 1249) {
55 print O ">Feature $a[0]
56 1 1140 gene
57 gene CYTB
58 gene_synonym cytB
59 allele $b[1]
60 1 1140 CDS
61 gene CYTB
62 gene_synonym cytB
63 codon_start 1
64 transl_table 2
65 product cytochrome b
66 1141 1211 tRNA
67 product tRNA-Thr
68 1212 >1249 tRNA
69 product tRNA-Pro
71 } elsif ($len == 1233) {
72 print O ">Feature $a[0]
73 <1 1126 gene
74 gene CYTB
75 gene_synonym cytB
76 allele $b[1]
77 <1 1126 CDS
78 gene CYTB
79 gene_synonym cytB
80 codon_start 2
81 transl_table 2
82 product cytochrome b
83 1127 1195 tRNA
84 product tRNA-Thr
85 1196 >1233 tRNA
86 product tRNA-Pro
88 } elsif ($len == 853) {
89 print O ">Feature $a[0]
90 1 >853 gene
91 gene CYTB
92 gene_synonym cytB
93 allele $b[1]
94 1 >853 CDS
95 gene CYTB
96 gene_synonym cytB
97 codon_start 1
98 transl_table 2
99 product cytochrome b
101 } else { die '*' x 50,"[x]"; }
102 } elsif ( $b[0] eq 'SMCY3' ) {
103 print O ">Feature $a[0]
104 <1 >$len gene
105 gene SMCY
106 allele $b[1]
107 <1 >4 CDS
108 gene SMCY
109 gene_synonym KDM5D
110 codon_start 2
111 product lysine (K)-specific demethylase 5D
112 5 >$len intron
113 gene SMCY
115 } elsif ( $b[0] eq 'SMCY7_STR_upstream' ) {
116 print O ">Feature $a[0]
117 <1 >$len gene
118 gene SMCY
119 allele $b[1]
120 <1 >$len intron
121 gene SMCY
123 } elsif ( $b[0] eq 'SMCY7_STR_downstream' ) {
124 my $a = $len - 20;
125 my $b = $len - 21;
126 print O ">Feature $a[0]
127 <1 >$len gene
128 gene SMCY
129 allele $b[1]
130 <$a >$len CDS
131 gene SMCY
132 gene_synonym KDM5D
133 codon_start 1
134 product lysine (K)-specific demethylase 5D
135 <1 $b intron
136 gene SMCY
138 } elsif ( $b[0] eq 'DBY7' ) {
139 my $a = $len - (278-248);
140 my $b = $len - (278-247);
141 print O ">Feature $a[0]
142 <1 >$len gene
143 gene DBY
144 allele $b[1]
145 <$a >$len CDS
146 gene DBY
147 codon_start 1
148 product Y-linked DBY
149 <1 $b intron
150 gene DBY
152 } elsif ( $b[0] eq 'UTY11' ) {
153 print O ">Feature $a[0]
154 <1 >$len gene
155 gene UTY
156 allele $b[1]
157 <1 >$len intron
158 gene UTY
160 } else {
161 print O ">Feature $a[0]\n<1\t>$len\tgene\n\t\t\tgene\t$b[0]\n\t\t\tallele\t$b[1]\n";
162 warn "Bad sequence name!";
165 close I;
166 close O;
167 my $cmd = "tbl2asn -t Pbe.tpm -i $in.fa -a s2 -V vb";
168 print "Running:[$cmd]\n";
169 system($cmd);
170 #system("ls -l $in.*")