a class to extract sequences from the genome
[cxgn-corelibs.git] / lib / CXGN / Tools / Parse.pm
blob54df7ad33b6b6520a4e8ea7c3c3190bccceb1acd
1 package CXGN::Tools::Parse;
2 use strict;
4 =head1 CXGN::Tools::Parse
6 Interface-like base class for CXGN parsers.
8 Two different methods are encouraged:
9 1) Give a filename to new(), and a filehandle is created,
10 and you can parse one entry at a time w/ next().
11 2) Send raw data to new(), and automatically parses everything
12 at once with parse_all(), pushing each entry, as a hash ref,
13 to @{$self->{entries}}
15 =head1 Author
17 C. Carpita <csc32@cornell.edu>
19 =head1 Methods
21 =cut
23 =head2 new()
25 Args: (opt) raw output data
26 Ret: Parser object
27 Side: Calls parse() automatically if argument provided
29 =cut
31 sub new {
32 my $class = shift;
33 my $self = bless {}, $class;
34 my $data_or_file = shift;
35 if(-f $data_or_file){
36 $self->{file} = $data_or_file;
37 my $fh;
38 open($fh, $self->{file}) or die "Can't open file for reading: " . $self->{file} . "\n";
39 $self->{fh} = $fh;
41 else{
42 $self->{data} = $data_or_file;
43 $self->{data_to_parse} = $data_or_file;
44 $self->parse_all_data();
46 return $self;
49 sub parse_all {
50 my $self = shift;
51 while(my $entry = $self->next()){
52 push(@{$self->{entries}}, $entry);
53 $self->{entry_by_id}->{$entry->{id}} = $entry;
57 sub get_entry_by_id {
58 my $self = shift;
59 my $id = shift;
60 return $self->{entry_by_id}->{$id};
63 sub get_all_entries {
64 my $self = shift;
65 return @{$self->{entries}};
68 sub next {
69 my $self = shift;
71 #Do stuff with this:
72 #my $data = $self->{data_to_parse};
73 # grab entry, set hash, then...
74 # $self->{data_to_parse} = $data_with_stuff_chopped_off
75 # then return hashref
77 #or this, if filehandle exists:
79 #my $fh = $self->{fh};
80 # do filehandle reads, grab entry, set hash
81 # return hashref
84 die "Override this function in a subclass";
89 sub DESTROY {
90 my $self = shift;
91 $self->{fh}->close() if $self->{fh};