3 Author: Hu Xuesong @ BIOPIC <galaxy001@gmail.com>
4 Version: 1.0.0 @ 20120720
11 use Data
::Dump
qw(ddx);
13 my ($strCDS, $strmRNA) = qw(CDS mRNA);
14 GetOptions
('CDS=s' => \
$strCDS, 'mRNA=s' => \
$strmRNA);
16 die "Usage: $0 [--cds=CDS] [--mrna=mRNA] <gene_file> <db_file>\n" if @ARGV<2;
20 warn "From [$infs] to [$outfs] with [$strCDS] in [$strmRNA]\n";
22 open I
,'<',$infs or die;
23 open O
,'>',$outfs or die;
24 print O
"# From [$infs] to [$outfs]\n";
27 my (%GeneDat,%Gene2Chr,$strand,$seqname, $primary, $start, $end, $frame, $LN, $groups);
29 next if /^(#|((\s)*$))/;
32 if (/^\[([^]]*)\] ([+-])$/) {
33 #ddx $Gene2Chr{$secname};
36 #print "[$secname $strand]\n";
37 die if length $secname == 0;
40 ($seqname, $primary, $start, $end, $frame, $LN, $groups) = split /\t/,$_;
41 if ( $primary eq $strCDS or $primary eq $strmRNA ) {
42 push @
{$GeneDat{"$secname\n$strand"}{$primary}},[$start, $end, $frame];
43 ++$Gene2Chr{$secname}{$seqname};
50 for (sort keys %GeneDat) {
51 my ($secname,$strand) = split /\n/,$_;
52 my @ref = keys %{$Gene2Chr{$secname}};
54 #print O "\n[$secname] $strand $ref[0]\n";
56 my $CDSarray = $GeneDat{$_}{$strCDS};
57 my $mRNAarray = $GeneDat{$_}{$strmRNA};
60 @CDS = sort { $a->[0] <=> $b->[0] } @
{$CDSarray};
61 } elsif ($strand eq '-') {
62 @CDS = sort { $b->[0] <=> $a->[0] } @
{$CDSarray};
64 my $CDSLen = $CDS[0][1] - $CDS[0][0] + 1;
65 $CDS[0][3] = $CDS[0][2];
66 if ( $CDS[0][2] != 0) {
73 my $phase = -$CDSLen % 3;
74 $CDS[$_][3] = $CDS[$_][2];
75 if ( $CDS[$_][2] != $phase) {
79 ++$Err{'Reverse'} if $CDS[$_][2] + $CDS[$_][3] == 3;
82 $CDSLen += $CDS[$_][1] - $CDS[$_][0] + 1;
84 $$mRNAarray[0][3] = $CDSLen;
86 #print "$secname,$strand\n";
90 $flag .= ' InCompCDS';
93 print O
"\n[$secname] $strand $ref[0] $flag\n";
94 print O
join("\t",'mRNA',@
{$$mRNAarray[0]}),"\n";
96 print O
join("\t",'CDS',@
$_),"\n";
99 die "[x] MultiRef(",scalar @ref,") in $secname($strand): [@ref].\n";
109 perl ano_db_from_read
.pl Dasnov3
.gene1 Dasnov3
.db