2 # Author: Jason Stajich <jason-at-bioperl-dot-org>
3 # Purpose: Bioperl implementation of Sean Eddy's sreformat
4 # We're not as clever as Sean's squid library though so
5 # you have to specify the input format rather than letting
6 # the application guess.
14 my $USAGE = "bp_sreformat -if INFORMAT -of OUTFORMAT -i FILENAME -o output.FORMAT
16 -h/--help Print this help
17 -if/--informat Specify the input format
18 -of/--outformat Specify the output format
19 -i/--input Specify the input file name
20 (to pass in data on STDIN use minus sign as filename)
21 -o/--output Specify the output file name
22 (to pass data out on STDOUT use minus sign as filename)
23 --msa Specify this is multiple sequence alignment data
24 --special=specialparams Specify special params supported by some formats
25 Comma or space separated please.
27 nointerleaved -- for phylip,non-interleaved format
28 idlinebreak -- for phylip, makes it molphy format
29 percentages -- for clustalw, show % id per line
30 flat -- don't show start-end in seqid
31 linelength -- line length for clustalw
32 mrbayes -- for MrBayes proper NEXUS output
36 my ($input,$output,$informat,$outformat,$msa,$special);
39 'h|help' => sub { print STDERR
($USAGE); exit(0) },
40 'i|input:s' => \
$input,
41 'o|output:s' => \
$output,
42 'if|informat:s' => \
$informat,
43 'of|outformat:s' => \
$outformat,
45 's|special:s' => \
$special,
48 unless( defined $informat && defined $outformat ) {
49 die(sprintf("Cannot proceed without a defined input and output you gave (%s,%s)\n",
50 defined $informat ?
$informat : "''" ,
51 defined $outformat ?
$outformat : "''"));
57 @extra = map { my @rc;
58 if( /nointerleaved/) {
59 @rc = ('-interleaved' => '0');
60 } elsif( /mrbayes/ ) {
61 @rc = ('-show_symbols' => 0,
62 '-show_endblock' => 0);
63 } elsif( /(\S+)\=(\S+)/ ) { @rc = ( "-$1" => $2) }
64 else{ @rc = ("-$_" => 1) }
66 } split(/[\s,]/,$special);
68 # guess we're talking about MSA if any of the standard MSA names are used
69 if( $informat =~ /nexus|phylip|clustal|maf|stockholm|bl2seq|msf/ ||
70 $outformat =~ /nexus|phylip|clustal|maf|stockholm|bl2seq|msf/ ) {
76 if( defined $input ) {
77 $in = new Bio
::AlignIO
(-format
=> $informat, -file
=> $input);
79 $in = new Bio
::AlignIO
(-format
=> $informat, -fh
=> \
*ARGV
);
83 die("Unknown MSA format to bioperl $informat\n");
87 $out = new Bio
::AlignIO
(-format
=> $outformat,
88 -file
=> ">$output", @extra);
90 # default to STDOUT for output
91 $out = new Bio
::AlignIO
(-format
=> $outformat,@extra);
95 die("Unknown MSA format to bioperl $outformat\n");
97 while( my $aln = $in->next_aln) {
98 if( $special =~ /flat/ ) {$aln->set_displayname_flat(1); }
99 $out->write_aln($aln) }
103 if( defined $input ) {
104 $in = new Bio
::SeqIO
(-format
=> $informat, -file
=> $input);
106 $in = new Bio
::SeqIO
(-format
=> $informat, -fh
=> \
*ARGV
);
110 if( $@
=~ /Could not open/ ) {
111 die("Could not open input file: $input\n");
113 die("Unknown sequence format to bioperl $informat\n");
118 $out = new Bio
::SeqIO
(-format
=> $outformat,
119 -file
=> ">$output");
121 # default to STDOUT for output
122 $out = new Bio
::SeqIO
(-format
=> $outformat);
126 if( $@
=~ /Could not open/ ) {
127 die("Could not open output file: $output\n");
129 die("Unknown sequence format to bioperl $outformat: $@\n");
132 while( my $seq = $in->next_seq ) {
133 $out->write_seq($seq);
139 bpsreformat - convert sequence formats
143 This script uses the SeqIO system that allows conversion of sequence
144 formats either sequence data or multiple sequence alignment data. The
145 name comes from the fact that Sean Eddy's program sreformat (part of
146 the HMMER pkg) already does this. Sean's program tries to guess the
147 input formats while in our code we currently require your to specify what
148 the input and output formats are and if the data is from a multiple
149 sequence alignment or from straight sequence files.
153 bpsreformat -if INFORMAT -of OUTFORMAT -i FILENAME -o output.FORMAT
155 -h/--help Print this help
157 -if/--informat Specify the input format
159 -of/--outformat Specify the output format
161 -i/--input Specify the input file name
162 (to pass in data on STDIN use minus sign as filename)
163 -o/--output Specify the output file name
164 (to pass data out on STDOUT use minus sign as filename)
166 --msa Specify this is multiple sequence alignment data
168 --special Will pass on special parameters to the AlignIO/SeqIO
169 object -- most of these are for Bio::AlignIO objects
170 Comma separated list of the following
171 nointerleaved -- for phylip,non-interleaved format
172 idlinebreak -- for phylip, makes it molphy format
173 percentages -- for clustalw, show % id per line