Bio::DB::Universal: move into its own distribution
[bioperl-live.git] / Bio / Index / SwissPfam.pm
blob4ebd032466728312db18988943883343bb697755
2 # BioPerl module for Bio::Index::SwissPfam
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # You may distribute this module under the same terms as perl itself
9 # POD documentation - main docs before the code
11 =head1 NAME
13 Bio::Index::SwissPfam - Interface for indexing swisspfam files
15 =head1 SYNOPSIS
17 use Bio::Index::SwissPfam;
18 use strict;
20 my $Index_File_Name = shift;
21 my $inx = Bio::Index::SwissPfam->new('-filename' => $Index_File_Name,
22 '-write_flag' => 'WRITE');
23 $inx->make_index(@ARGV);
25 use Bio::Index::SwissPfam;
26 use strict;
28 my $Index_File_Name = shift;
29 my $inx = Bio::Index::SwissPfam->new('-filename' => $Index_File_Name);
31 foreach my $id (@ARGV) {
32 my $seq = $inx->fetch($id); # Returns stream
33 while( <$seq> ) {
34 if(/^>/) {
35 print;
36 last;
42 =head1 DESCRIPTION
44 SwissPfam is one of the flat files released with Pfam. This modules
45 provides a way of indexing this module.
47 Inherits functions for managing dbm files from Bio::Index::Abstract.pm, and
48 provides the basic funtionallity for indexing SwissPfam files. Only
49 retrieves FileStreams at the moment. Once we have something better
50 (ie, an object!), will use that. Heavily snaffled from Index::Fasta system of
51 James Gilbert. Note: for best results 'use strict'.
53 =head1 FEED_BACK
55 =head2 Mailing Lists
57 User feedback is an integral part of the evolution of this and other
58 Bioperl modules. Send your comments and suggestions preferably to one
59 of the Bioperl mailing lists. Your participation is much appreciated.
61 bioperl-l@bioperl.org - General discussion
62 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
64 =head2 Support
66 Please direct usage questions or support issues to the mailing list:
68 I<bioperl-l@bioperl.org>
70 rather than to the module maintainer directly. Many experienced and
71 reponsive experts will be able look at the problem and quickly
72 address it. Please include a thorough description of the problem
73 with code and data examples if at all possible.
75 =head2 Reporting Bugs
77 Report bugs to the Bioperl bug tracking system to help us keep track
78 the bugs and their resolution. Bug reports can be submitted via the
79 web:
81 https://github.com/bioperl/bioperl-live/issues
83 =head1 AUTHOR - Ewan Birney
85 =head1 APPENDIX
87 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
89 =cut
91 # Let's begin the code...
93 package Bio::Index::SwissPfam;
95 use strict;
96 use Bio::Seq;
98 use base qw(Bio::Index::Abstract);
100 sub _type_stamp {
101 return '__SWISSPFAM_FLAT__'; # What kind of index are we?
104 sub _version {
105 return 0.1;
108 =head2 _index_file
110 Title : _index_file
111 Usage : $index->_index_file( $file_name, $i )
112 Function: Specialist function to index swisspfam format files.
113 Is provided with a filename and an integer
114 by make_index in its SUPER class.
115 Example :
116 Returns :
117 Args :
119 =cut
121 sub _index_file {
122 my( $self,
123 $file, # File name
124 $i # Index-number of file being indexed
125 ) = @_;
127 my( $begin, # Offset from start of file of the start
128 # of the last found record.
129 $end, # Offset from start of file of the end
130 # of the last found record.
131 $id, # ID of last found record.
132 $acc, # accession of last record. Also put into the index
133 $nid, $nacc, # new ids for the record just found
136 $begin = 0;
137 $end = 0;
139 open my $SP, '<', $file or $self->throw("Could not read file '$file': $!");
141 # In Windows, text files have '\r\n' as line separator, but when reading in
142 # text mode Perl will only show the '\n'. This means that for a line "ABC\r\n",
143 # "length $_" will report 4 although the line is 5 bytes in length.
144 # We assume that all lines have the same line separator and only read current line.
145 my $init_pos = tell($SP);
146 my $curr_line = <$SP>;
147 my $pos_diff = tell($SP) - $init_pos;
148 my $correction = $pos_diff - length $curr_line;
149 seek $SP, $init_pos, 0; # Rewind position to proceed to read the file
151 # Main indexing loop
152 while (<$SP>) {
153 if (/^>(\S+)\s+\|=*\|\s+(\S+)/) {
154 $nid = $1;
155 $nacc = $2;
156 my $new_begin = tell($SP) - length( $_ ) - $correction;
157 $end = $new_begin - 1;
159 if( $id ) {
160 $self->add_record($id, $i, $begin, $end);
161 if( $acc ne $id ) {
162 $self->add_record($acc, $i, $begin, $end);
165 $begin = $new_begin;
166 $id = $nid;
167 $acc = $nacc;
170 # Don't forget to add the last record
171 $end = tell($SP);
172 $self->add_record($id, $i, $begin, $end) if $id;
174 close $SP;
175 return 1;
179 =head2 fetch
181 Title : fetch
182 Usage : $index->fetch( $id )
183 Function: Returns a Bio::Seq object from the index
184 Example : $seq = $index->fetch( 'dJ67B12' )
185 Returns : Bio::Seq object
186 Args : ID
188 =cut
190 sub fetch {
191 my( $self, $id ) = @_;
192 my $desc;
193 my $db = $self->db();
194 if (my $rec = $db->{ $id }) {
195 my( @record );
196 my ($file, $begin, $end) = $self->unpack_record( $rec );
197 # Get the (possibly cached) filehandle
198 my $fh = $self->_file_handle( $file );
200 # move to start
201 seek($fh, $begin, 0);
203 return $fh;
204 } else {
205 $self->throw("Unable to find a record for $id in SwissPfam flat file index");