Bio::DB::Universal: move into its own distribution
[bioperl-live.git] / examples / align / aligntutorial.pl
blobfcebbd679b61194513b1d439b7b2e57381c2f3b2
1 #!/usr/bin/perl
3 # An example of how to use the different alignment tools in bioperl
4 # to align some sequences
6 # All these methods except Bio::Tools::pSW will work for DNA sequence
7 # (need to use a different matrix however)
9 use Bio::Factory::EMBOSS;
10 use Bio::SeqIO;
11 use Bio::AlignIO;
12 use Bio::Tools::pSW;
13 use Bio::PrimarySeq;
14 use Bio::Tools::Run::Alignment::Clustalw;
15 use Bio::Tools::Run::Alignment::TCoffee;
16 use Bio::Tools::Run::StandAloneBlast;
18 use strict;
19 # build the sequences since EMBOSS expects seqs to be in files
20 my $seq = new Bio::PrimarySeq(-seq =>
21 'MAVNPELAPFTLSRGIPSFDDQALSTIIQLQDCIQQAIQQLNYSTAEFLAELLYAECSILDKSSVYWSDAVYLYALSLFLNKSYHTAFQISKEFKEYHLGIAYIFGRCALQLSQGVNEAILTLLSIINVFSSNSSNTRINMVLNSNLVHIPDLATLNCLLGNLYMKLDHSKEGAFYHSEALAINPYLWESYEAICKMRATVDLKRVFFDIAGKKSNSHNNNAASSFPSTSLSHFEPRSQPSLYSKTNKNGNNNINNNVNTLFQSSNSPPSTSASSFSSIQHFSRSQQQQANTSIRTCQNKNTQTPKNPAINSKTSSALPNNISMNLVSPSSKQPTISSLAKVYNRNKLLTTPPSKLLNNDRNHQNNNNNNNNNNNNNNNNNNNNNNNNIINKTTFKTPRNLYSSTGRLTTSKKNPRSLIISNSILTSDYQITLPEIMYNFALILRSSSQYNSFKAIRLFESQIPSHIKDTMPWCLVQLGKLHFEIINYDMSLKYFNRLKDLQPARVKDMEIFSTLLWHLHDKVKSSNLANGLMDTMPNKPETWCCIGNLLSLQKDHDAAIKAFEKATQLDPNFAYAYTLQGHEHSSNDSSDSAKTCYRKALACDPQHYNAYYGLGTSAMKLGQYEEALLYFEKARSINPVNVVLICCCGGSLEKLGYKEKALQYYELACHLQPTSSLSKYKMGQLLYSMTRYNVALQTFEELVKLVPDDATAHYLLGQTYRIVGRKKDAIKELTVAMNLDPKGNQVIIDELQKCHMQE',
22 -id => 'seq1'
23 );
24 my $seq2 = new Bio::PrimarySeq( -seq =>
25 'CLIFXRLLLIQMIHPQARRAFTFLQQQEPYRIQSMEQLSTLLWHLADLPALSHLSQSLISISRSSPQAWIAVGNCFSLQKDHDEAMRCFRRATQVDEGCAYAWTLCGYEAVEMEEYERAMAFYRTAIRTDARHYNAWYVLFFFFFFFFVPGDIDSXPKKGMEWGXFISKRIDRGMRSIILKEPSKSIQLIPFFYVALVWXVGVSSYPLETMTNIDFPKKKKALEKSNDVVQALHFYERASKYAPTSAMVQFKRIRALVALQRYDEAISALVPLTHSAPDEANVFFLLGKCLLKKERRQEATMAFTNARELEPK',
26 -id => 'seq2');
28 my $out = new Bio::SeqIO(-format => 'fasta',
29 -file => ">seq1.fa");
30 $out->write_seq($seq);
31 $out->close();
32 $out = new Bio::SeqIO(-format => 'fasta',
33 -file => ">seq2.fa");
34 $out->write_seq($seq2);
35 $out->close();
38 my $embossfactory = Bio::Factory::EMBOSS->new();
40 my @alignprogs = qw(water needle stretcher matcher);
41 my $alignout = new Bio::AlignIO(-format => 'msf');
44 foreach my $prog ( @alignprogs ) {
45 my $alignfactory = $embossfactory->program('water');
48 $alignfactory->run({ '-sequencea' => 'seq1.fa',
49 '-seqall' => 'seq2.fa',
50 '-gapext' => 2.0,
51 '-datafile' => 'EBLOSUM62',
52 '-gapopen' => 14.0,
53 '-outfile' => "seq1_vs_seq2.$prog"});
55 my $alnin = new Bio::AlignIO(-format => 'emboss',
56 -file => "seq1_vs_seq2.$prog");
57 my $aln = $alnin->next_aln();
58 $alignout->write_aln($aln);
61 # this should produce the same alignment as 'water'
62 my $factory = new Bio::Tools::pSW(-matrix=> 'blosum62.bla',
63 -gap => 14,
64 -ext => 2);
65 my $aln = $factory->pairwise_alignment($seq,$seq2);
66 $alignout->write_aln($aln);
68 $factory = new Bio::Tools::Run::Alignment::Clustalw('ktuple' => 2,
69 'matrix' => 'BLOSUM');
70 $aln = $factory->align([$seq,$seq2]);
71 $alignout->write_aln($aln);
73 $factory = new Bio::Tools::Run::Alignment::TCoffee('ktuple' => 2,
74 'matrix' => 'BLOSUM');
75 $aln = $factory->align([$seq,$seq2]);
76 $alignout->write_aln($aln);
78 $factory = new Bio::Tools::Run::StandAloneBlast();
79 $aln = $factory->bl2seq($seq,$seq2);
81 # this actually returns a Bio::Tools::BPbl2seq object
82 # it can be transformed to a SimpleAlign object see
83 # the code in Bio::AlignIO::bl2seq
84 # A transformer object will be written at some point