2 # BioPerl module for Bio::Index::SwissPfam
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # You may distribute this module under the same terms as perl itself
9 # POD documentation - main docs before the code
13 Bio::Index::SwissPfam - Interface for indexing swisspfam files
17 use Bio::Index::SwissPfam;
20 my $Index_File_Name = shift;
21 my $inx = Bio::Index::SwissPfam->new('-filename' => $Index_File_Name,
22 '-write_flag' => 'WRITE');
23 $inx->make_index(@ARGV);
25 use Bio::Index::SwissPfam;
28 my $Index_File_Name = shift;
29 my $inx = Bio::Index::SwissPfam->new('-filename' => $Index_File_Name);
31 foreach my $id (@ARGV) {
32 my $seq = $inx->fetch($id); # Returns stream
44 SwissPfam is one of the flat files released with Pfam. This modules
45 provides a way of indexing this module.
47 Inherits functions for managing dbm files from Bio::Index::Abstract.pm, and
48 provides the basic funtionallity for indexing SwissPfam files. Only
49 retrieves FileStreams at the moment. Once we have something better
50 (ie, an object!), will use that. Heavily snaffled from Index::Fasta system of
51 James Gilbert. Note: for best results 'use strict'.
57 User feedback is an integral part of the evolution of this and other
58 Bioperl modules. Send your comments and suggestions preferably to one
59 of the Bioperl mailing lists. Your participation is much appreciated.
61 bioperl-l@bioperl.org - General discussion
62 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
66 Please direct usage questions or support issues to the mailing list:
68 I<bioperl-l@bioperl.org>
70 rather than to the module maintainer directly. Many experienced and
71 reponsive experts will be able look at the problem and quickly
72 address it. Please include a thorough description of the problem
73 with code and data examples if at all possible.
77 Report bugs to the Bioperl bug tracking system to help us keep track
78 the bugs and their resolution. Bug reports can be submitted via the
81 https://github.com/bioperl/bioperl-live/issues
83 =head1 AUTHOR - Ewan Birney
87 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
91 # Let's begin the code...
93 package Bio
::Index
::SwissPfam
;
98 use base
qw(Bio::Index::Abstract);
101 return '__SWISSPFAM_FLAT__'; # What kind of index are we?
111 Usage : $index->_index_file( $file_name, $i )
112 Function: Specialist function to index swisspfam format files.
113 Is provided with a filename and an integer
114 by make_index in its SUPER class.
124 $i # Index-number of file being indexed
127 my( $begin, # Offset from start of file of the start
128 # of the last found record.
129 $end, # Offset from start of file of the end
130 # of the last found record.
131 $id, # ID of last found record.
132 $acc, # accession of last record. Also put into the index
133 $nid, $nacc, # new ids for the record just found
139 open my $SP, '<', $file or $self->throw("Could not read file '$file': $!");
141 # In Windows, text files have '\r\n' as line separator, but when reading in
142 # text mode Perl will only show the '\n'. This means that for a line "ABC\r\n",
143 # "length $_" will report 4 although the line is 5 bytes in length.
144 # We assume that all lines have the same line separator and only read current line.
145 my $init_pos = tell($SP);
146 my $curr_line = <$SP>;
147 my $pos_diff = tell($SP) - $init_pos;
148 my $correction = $pos_diff - length $curr_line;
149 seek $SP, $init_pos, 0; # Rewind position to proceed to read the file
153 if (/^>(\S+)\s+\|=*\|\s+(\S+)/) {
156 my $new_begin = tell($SP) - length( $_ ) - $correction;
157 $end = $new_begin - 1;
160 $self->add_record($id, $i, $begin, $end);
162 $self->add_record($acc, $i, $begin, $end);
170 # Don't forget to add the last record
172 $self->add_record($id, $i, $begin, $end) if $id;
182 Usage : $index->fetch( $id )
183 Function: Returns a Bio::Seq object from the index
184 Example : $seq = $index->fetch( 'dJ67B12' )
185 Returns : Bio::Seq object
191 my( $self, $id ) = @_;
193 my $db = $self->db();
194 if (my $rec = $db->{ $id }) {
196 my ($file, $begin, $end) = $self->unpack_record( $rec );
197 # Get the (possibly cached) filehandle
198 my $fh = $self->_file_handle( $file );
201 seek($fh, $begin, 0);
205 $self->throw("Unable to find a record for $id in SwissPfam flat file index");