Bio/Index/SwissPfam.pm

   1 #
   2 # BioPerl module for Bio::Index::SwissPfam
   3 #
   4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
   5 #
   6
   7 # You may distribute this module under the same terms as perl itself
   8
   9 # POD documentation - main docs before the code
  10
  11 =head1 NAME
  12
  13 Bio::Index::SwissPfam - Interface for indexing swisspfam files
  14
  15 =head1 SYNOPSIS
  16
  17     use Bio::Index::SwissPfam;
  18     use strict;
  19
  20     my $Index_File_Name = shift;
  21     my $inx = Bio::Index::SwissPfam->new('-filename' => $Index_File_Name,
  22                                          '-write_flag' => 'WRITE');
  23     $inx->make_index(@ARGV);
  24
  25     use Bio::Index::SwissPfam;
  26     use strict;
  27
  28     my $Index_File_Name = shift;
  29     my $inx = Bio::Index::SwissPfam->new('-filename' => $Index_File_Name);
  30
  31     foreach my $id (@ARGV) {
  32         my $seq = $inx->fetch($id); # Returns stream
  33              while( <$seq> ) {
  34                  if(/^>/) {
  35                        print;
  36                           last;
  37                  }
  38              }
  39     }
  40
  41
  42 =head1 DESCRIPTION
  43
  44 SwissPfam is one of the flat files released with Pfam. This modules
  45 provides a way of indexing this module.
  46
  47 Inherits functions for managing dbm files from Bio::Index::Abstract.pm, and
  48 provides the basic funtionallity for indexing SwissPfam files.  Only
  49 retrieves FileStreams at the moment. Once we have something better
  50 (ie, an object!), will use that. Heavily snaffled from Index::Fasta system of
  51 James Gilbert. Note: for best results 'use strict'.
  52
  53 =head1 FEED_BACK
  54
  55 =head2 Mailing Lists
  56
  57 User feedback is an integral part of the evolution of this and other
  58 Bioperl modules. Send your comments and suggestions preferably to one
  59 of the Bioperl mailing lists.  Your participation is much appreciated.
  60
  61   bioperl-l@bioperl.org                  - General discussion
  62   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
  63
  64 =head2 Support
  65
  66 Please direct usage questions or support issues to the mailing list:
  67
  68 I<bioperl-l@bioperl.org>
  69
  70 rather than to the module maintainer directly. Many experienced and
  71 reponsive experts will be able look at the problem and quickly
  72 address it. Please include a thorough description of the problem
  73 with code and data examples if at all possible.
  74
  75 =head2 Reporting Bugs
  76
  77 Report bugs to the Bioperl bug tracking system to help us keep track
  78 the bugs and their resolution.  Bug reports can be submitted via the
  79 web:
  80
  81   https://github.com/bioperl/bioperl-live/issues
  82
  83 =head1 AUTHOR - Ewan Birney
  84
  85 =head1 APPENDIX
  86
  87 The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
  88
  89 =cut
  90
  91 # Let's begin the code...
  92
  93 package Bio::Index::SwissPfam;
  94
  95 use strict;
  96 use Bio::Seq;
  97
  98 use base qw(Bio::Index::Abstract);
  99
 100 sub _type_stamp {
 101     return '__SWISSPFAM_FLAT__'; # What kind of index are we?
 102 }
 103
 104 sub _version {
 105     return 0.1;
 106 }
 107
 108 =head2 _index_file
 109
 110   Title   : _index_file
 111   Usage   : $index->_index_file( $file_name, $i )
 112   Function: Specialist function to index swisspfam format files.
 113             Is provided with a filename and an integer
 114             by make_index in its SUPER class.
 115   Example :
 116   Returns :
 117   Args    :
 118
 119 =cut
 120
 121 sub _index_file {
 122     my( $self,
 123         $file, # File name
 124         $i     # Index-number of file being indexed
 125         ) = @_;
 126
 127     my( $begin, # Offset from start of file of the start
 128                 # of the last found record.
 129         $end,   # Offset from start of file of the end
 130                 # of the last found record.
 131         $id,    # ID of last found record.
 132         $acc,   # accession of last record. Also put into the index
 133         $nid, $nacc, # new ids for the record just found
 134         );
 135
 136     $begin = 0;
 137     $end   = 0;
 138
 139     open my $SP, '<', $file or $self->throw("Could not read file '$file': $!");
 140
 141     # In Windows, text files have '\r\n' as line separator, but when reading in
 142     # text mode Perl will only show the '\n'. This means that for a line "ABC\r\n",
 143     # "length $_" will report 4 although the line is 5 bytes in length.
 144     # We assume that all lines have the same line separator and only read current line.
 145     my $init_pos   = tell($SP);
 146     my $curr_line  = <$SP>;
 147     my $pos_diff   = tell($SP) - $init_pos;
 148     my $correction = $pos_diff - length $curr_line;
 149     seek $SP, $init_pos, 0; # Rewind position to proceed to read the file
 150
 151     # Main indexing loop
 152     while (<$SP>) {
 153         if (/^>(\S+)\s+\|=*\|\s+(\S+)/) {
 154             $nid = $1;
 155             $nacc = $2;
 156             my $new_begin = tell($SP) - length( $_ ) - $correction;
 157             $end = $new_begin - 1;
 158
 159             if( $id ) {
 160                 $self->add_record($id, $i, $begin, $end);
 161                 if( $acc ne $id ) {
 162                     $self->add_record($acc, $i, $begin, $end);
 163                 }
 164             }
 165             $begin = $new_begin;
 166             $id = $nid;
 167             $acc = $nacc;
 168         }
 169     }
 170     # Don't forget to add the last record
 171     $end = tell($SP);
 172     $self->add_record($id, $i, $begin, $end) if $id;
 173
 174     close $SP;
 175     return 1;
 176 }
 177
 178
 179 =head2 fetch
 180
 181   Title   : fetch
 182   Usage   : $index->fetch( $id )
 183   Function: Returns a Bio::Seq object from the index
 184   Example : $seq = $index->fetch( 'dJ67B12' )
 185   Returns : Bio::Seq object
 186   Args    : ID
 187
 188 =cut
 189
 190 sub fetch {
 191     my( $self, $id ) = @_;
 192     my $desc;
 193     my $db = $self->db();
 194     if (my $rec = $db->{ $id }) {
 195         my( @record );
 196         my ($file, $begin, $end) = $self->unpack_record( $rec );
 197         # Get the (possibly cached) filehandle
 198         my $fh = $self->_file_handle( $file );
 199
 200         # move to start
 201         seek($fh, $begin, 0);
 202
 203         return $fh;
 204     } else {
 205         $self->throw("Unable to find a record for $id in SwissPfam flat file index");
 206     }
 207 }
 208
 209 1;