1 package CXGN
::Tools
::FeatureTree
;
6 use CXGN
::Chado
::Feature
;
8 use CXGN
::Tools
::Entrez
;
12 =head1 CXGN::Tools::FeatureTree
14 get data from the NucleotideCore site and parse the necessary fields to fill feature objects
23 Naama Menda (nm249@cornell.edu)
29 Usage: my $feature_fetch = CXGN::Tools::FeatureTree->new($GBaccession);
32 Args: genbank accession
38 #our $feature_object=undef;
46 my $self = bless $args, $class;
57 my $gb=shift; #GenBank accessions are stored in feature.name!
59 my $feature_xml = `wget "eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=nuccore&id=$gb&rettype=xml&retmode=text" -O - `;
62 my $twig=XML
::Twig
->new(
65 'Textseq-id/Textseq-id_accession' => \
&name
,
66 'Textseq-id/Textseq-id_version' => \
&version
,
67 'Seq-data_iupacna/IUPACna' => \
&residues
,
68 'Seq-inst/Seq-inst_length' => \
&seqlen
,
69 'Seqdesc/Seqdesc_title' => \
&description
,
70 'Org-ref/Org-ref_taxname' => \
&organism_name
,
71 'Org-ref_db/Dbtag/Dbtag_tag/Object-id/Object-id_id' => \
&organism_taxon_id
,
72 'PubMedId' => \
&pubmed_id
,
73 'Bioseq_id/Seq-id/Seq-id_gi' =>\
&accession
, # accession refers to genBnk GI number
74 'MolInfo/MolInfo_biomol' =>\
&molecule_type
,
78 # AbstractText => \&abstract,
81 pretty_print
=> 'indented', # output will be nicely formatted
84 $twig->parse($feature_xml );
87 my $message= "!!NCBI server seems to be down. Please try again later!!.\n $@";
88 print STDERR
$message;
91 print STDERR
"exiting FeatureTree.pm!\n";
96 sub get_feature_list
{
104 Desc: Store the scientific organism name. This is used only for cleaner error messages.
113 my ($twig, $elt) = @_;
114 my $o_name=$elt->text;
115 push @
{$Ftree[0] }, $o_name;
120 =head2 organism_taxon_id
123 Desc: Store the genbank taxon_id
131 sub organism_taxon_id
{
132 my ($twig, $elt) = @_;
133 my $o_taxon_id=$elt->text;
134 push @
{$Ftree[1] }, $o_taxon_id;
139 my ($twig, $elt)= @_;
141 my $name_data= $elt->text;
142 push @
{ $Ftree[2] }, $name_data;
143 #print STDERR "**name (genbank accession) $name_data\n";
149 my ($twig, $elt)= @_;
152 push @
{ $Ftree[3] }, $gi if (!grep{/^$gi$/ } @
{ $Ftree[3] } );
157 my ($twig, $elt)= @_;
158 my $pubmed_id = $elt->text;
160 # my @pubmed_ids = $feature_object->get_pubmed_ids();
161 # my @already_exists = grep(/$pubmed_id/, @pubmed_ids);
162 #if(!@already_exists){
163 # $feature_object->add_pubmed_id($pubmed_id);
164 push @
{ $Ftree[4] }, $pubmed_id if (!grep{/^$pubmed_id$/ } @
{ $Ftree[4] } );
170 my ($twig, $elt)= @_;
171 my $version= $elt->text;
172 push @
{ $Ftree[5] }, $version;
178 my ($twig, $elt)= @_;
180 push @
{ $Ftree[6] } , $res;
186 my ($twig, $elt)= @_;
187 my $seqlen=$elt->text;
188 push @
{ $Ftree[7] } , $seqlen;
193 my ($twig, $elt)= @_;
195 push @
{ $Ftree[8] } , $desc;
202 my ($twig, $elt)= @_;
211 'genomic DNA' => 'genomic_clone',
212 'genomic clone' => 'genomic_clone',
213 '1' => 'genomic_clone',
214 'genomic RNA' => 'RNA',
215 'Pre-RNA' => 'PRE-RNA',
216 'unassigned DNA' => 'DNA',
217 'unassigned RNA' => 'RNA',
224 my $mol_text= ($elt->text);
225 my $molecule = $mol_hash{$mol_text};
227 warn "no molecule type found for type $mol_text!!\n";}
229 print STDERR
"molecule type = $molecule\n\n";
230 push @
{ $Ftree[9] } , $molecule;