modified: n.fq
[GalaxyCodeBases.git] / tools / annot / annotmix.pl
blobbc3564cb942fbca9abc70f8551e96939cedf073e
1 #!/usr/bin/perl -w
2 #use threads 1.73;
3 use strict;
4 use warnings;
5 use Time::HiRes qw ( gettimeofday tv_interval );
6 use Galaxy::ShowHelp;
7 #use Galaxy::Data;
8 #use Fcntl qw(:DEFAULT :flock);
10 $main::VERSION=0.1.1;
12 our $opts='i:o:bv';
13 our($opt_i, $opt_o, $opt_v,$opt_b);
15 our $help=<<EOH;
16 \t-i Annotation files list (./anno.lst) [ID\\tPath_to_file\\n]
17 \t-o Output mixed file (mixed_annot.txt)
18 \t-v show verbose info to STDOUT
19 \t-b No pause for batch runs
20 EOH
21 # \t-d dbSNP SQLite data file (_tdbSNP.sqlite)
23 ShowHelp();
25 $opt_i='./anno.lst' if ! defined $opt_i;
26 $opt_o='mixed_annot.txt' if ! $opt_o;
28 print STDERR "From [$opt_i] to [$opt_o]\n";
29 if (! $opt_b) {print STDERR 'press [Enter] to continue...'; <>;}
31 my $start_time = [gettimeofday];
33 my (@Annots,%Keys);
34 open( SAMP,'<',$opt_i) or die "Error: $!\n";
35 open( OUT,'>',$opt_o) or die "Error: $!\n";
36 print OUT 'Gene';
37 while (<SAMP>) {
38 chomp;
39 my ($id,$file)=split /\s+/;
40 my %dat;
41 getHash($file,\%dat);
42 ++$Keys{$_} for keys %dat;
43 push @Annots,[$id,\%dat];
44 print OUT "\t$id\t${id}_nfo";
46 close SAMP;
47 print OUT "\n";
49 sub getHash {
50 my( $file ,$hash) = @_;
51 open (F,"$file") or die $!;
52 while (<F>) {
53 chomp;
54 my($key,$a,$b)=split /\s+/,$_,3;
55 $key=~s/\.\d+$//;
56 $a=~s/\t/ /g;
57 $b=~s/\t/ /g;
58 my $value = "$a\t$b";
59 #print "$key\t$value\n";
60 $$hash{$key} = $value;
62 close F;
66 for my $gene (sort keys %Keys) {
67 print OUT $gene;
68 for (@Annots) {
69 my ($id,$dathash)=@$_;
70 my $v=(defined $$dathash{$gene})?$$dathash{$gene}:".\t.";
71 print OUT "\t$v";
73 print OUT "\n";
76 close OUT;
78 my $stop_time = [gettimeofday];
79 #$|=1;
80 print STDERR "\n Time Elapsed:\t",tv_interval( $start_time, $stop_time )," second(s)\n";
82 print STDERR "\033[32;1mgrep -i 'transcription factor' mixed_annot.txt > out\033[0;0m\n";
84 __END__