8 John Binns <zombieite@gmail.com>
12 Marker object for retrieving marker data. It is streamlined to work fairly efficiently with both scripts which only want one or two pieces of marker data, and scripts which want all of a marker's data. The module for storing marker data is a subclass of this, CXGN::Marker::Modifiable.
18 use CXGN
::Marker
::Tools
;
19 use CXGN
::Marker
::Location
;
20 use CXGN
::Marker
::PCR
::Experiment
;
21 use CXGN
::Marker
::RFLP
::Experiment
;
23 use CXGN
::Tools
::Text
;
24 use CXGN
::DB
::Connection
;
29 my $marker=CXGN::Marker->new($dbh,$marker_id);
31 Takes a dbh and marker ID and returns a marker object.
35 #this is the constructor for a marker whose data you want to get from the database. you must send in a dbh and a marker id. this returns undef if the marker is not found.
38 my($dbh,$marker_id)=@_;
39 my $self=bless({},$class);
40 if(CXGN
::DB
::Connection
::is_valid_dbh
($dbh)) {
44 croak
"You must supply a dbh as the first argument to the marker constructor";
46 unless($marker_id and $marker_id=~/^\d+$/ and $marker_id>0) {
47 croak
"Marker ID '$marker_id' is not a valid ID";
49 $self->{marker_id
}=CXGN
::Marker
::Tools
::is_valid_marker_id
($dbh,$marker_id);
50 unless($self->{marker_id
}) {
51 warn"Marker ID '$marker_id' not found in database";
59 Usage: my $marker = CXGN::Marker->new($dbh, $name)
60 Desc: retrieves the marker with name $name.
61 The name must be in the marker_alias table
62 as the preferred name (which is unique).
63 Ret: a CXGN::Marker object
64 Args: a database handle, and a marker name [string]
65 Side Effects: accesses the database
74 my $query = "SELECT marker_id FROM sgn.marker_alias WHERE alias ilike ? and preferred='t'";
75 my $sth = $dbh->prepare($query);
77 my ($marker_id) = $sth->fetchrow_array();
78 if (!defined($marker_id)) { return undef; }
79 my $self = $class->new($dbh, $marker_id);
85 =head2 _should_we_run_query
87 For internal use only.
91 #store whether this query has already been run, so we don't have to run it again.
92 #this helps optimize the speed of the display object, for scripts that use a lot of them.
93 #also, it prevents the Modifiable subclass from being able to clobber its own modifications by calling accessors
94 #which reload data from the database.
95 sub _should_we_run_query
{
99 #if we do not have a marker id yet, we are a new marker being created for future insertion. therefore, our
100 #data will not be in the database, and running a query would be unnecessary and/or bad.
101 unless($self->{marker_id
}) {
105 #if this query has already been run, return that we do not need to run it again
106 if($self->{data_populated
}->{$query_name}) {
110 #if it hasn't already been run, note that we assume it will be now be run, and return that it should be run
111 $self->{data_populated
}->{$query_name}=1;
118 print $marker->as_string();
120 Returns a string of this markers data for debugging.
124 #for debugging and such
128 #any time you need a fully populated marker, you must run populate_from_db
129 $self->populate_from_db();
132 $string.="<marker>\n";
133 $string.="Name(s): ".CXGN
::Tools
::Text
::list_to_string
(@
{$self->{marker_names
}});
135 if($self->{marker_id
}) {
136 $string.="\tSGN-M$self->{marker_id}\n";
139 $string.="\t(Marker not yet inserted into database)\n";
142 $string.="Collections: ".CXGN
::Tools
::Text
::list_to_string
(@
{$self->{collections
}})."\n";
143 for my $location(@
{$self->{locations
}}) {
144 $string.="Location:\tMap version ID '$location->{map_version_id}'\tLinkage group ID '$location->{lg_id}'\tPosition '$location->{position}'\tConfidence '$location->{confidence}'\tSubscript '$location->{subscript}'\n";
145 $string.="Mapped via:\tPCR exp ID: '$location->{pcr_experiment_id}'\tRFLP exp ID: '$location->{rflp_experiment_id}'\n";
147 $string.="Non-mapping PCR experiments:\n";
148 for my $pcr_id(@
{$self->{non_mapping_pcr_experiment_ids
}}) {
149 $string.=CXGN
::Marker
::PCR
::Experiment
->new($self->{dbh
},$pcr_id)->as_string();
152 $string.="</marker>\n";
159 my $id=$marker->marker_id();
161 Returns this markers ID.
165 #you cannot set the marker id except in the constructor
168 return $self->{marker_id
};
171 =head2 name_that_marker
173 my $marker_name=$marker->name_that_marker();
174 my @marker_names=$marker->name_that_marker();
176 Returns a the preferred alias, or all aliases starting with the preferred, depending on what you are expecting.
180 #the marker name is stored as a "preferred" alias
181 sub name_that_marker
{
183 if ($self->_should_we_run_query('name_that_marker')) {
184 my $name_q = $self->{dbh
}->prepare("select alias from marker_alias where marker_id=? order by preferred desc,alias");
185 $name_q->execute($self->{marker_id
});
186 while(my ($alias) = $name_q->fetchrow_array()) {
187 push(@
{$self->{marker_names
}},$alias);
191 return @
{$self->{marker_names
}};
194 return $self->{marker_names
}->[0];
211 return $self->name_that_marker();
216 =head2 associated_loci
218 Usage: my @locus_info = $marker->associated_loci()
219 Desc: retrieves information about associated loci.
220 Ret: a list of listrefs of the form [ $locus_id, $locus_name].
222 Side Effects: accesses the database
227 sub associated_loci
{
230 my $kfg_query = $self->{dbh
}->prepare('SELECT locus_id, locus_name FROM phenome.locus_marker inner join phenome.locus using(locus_id) where marker_id=?');
231 $kfg_query->execute($self->marker_id);
233 return unless $kfg_query->rows() > 0;
236 while (my ($locus_id, $locus_name) = $kfg_query->fetchrow_array()){
237 push @loci, [$locus_id, $locus_name];
244 Usage: my $hashref = $marker->rflp_data()
246 Ret: a hashref with the following keys:
247 r.rflp_id, r.marker_id, r.rflp_name, r.insert_size,
248 r.vector, r.cutting_site, r.drug_resistance,
249 fs.fasta_sequence as forward_seq,
250 rs.fasta_sequence as reverse_seq, r.forward_seq_id,
261 my $rflp_query = q{SELECT r.rflp_id, r.marker_id, r.rflp_name, r.insert_size,
262 r.vector, r.cutting_site, r.drug_resistance,
263 fs.fasta_sequence as forward_seq,
264 rs.fasta_sequence as reverse_seq, r.forward_seq_id,
265 r.reverse_seq_id FROM
266 rflp_markers AS r LEFT JOIN rflp_sequences AS fs ON
267 r.forward_seq_id=fs.seq_id LEFT JOIN rflp_sequences AS rs
268 ON r.reverse_seq_id=rs.seq_id WHERE marker_id=?};
270 my $rflp_sth = $self->{dbh
}->prepare($rflp_query);
271 $rflp_sth->execute($self->marker_id());
272 my $r = $rflp_sth->fetchrow_hashref();
273 unless($r->{rflp_id
}){return'';}
279 =head2 rflp_unigene_matches
281 Usage: my ($a_ref, $b_ref) = $marker->rflp_unigene_matches($forward_id, $reverse_id)
282 Desc: returns unigene match data for rflp markers
283 Ret: a listref with the forward matches and a listref
284 with the reverse matches.
288 TO DO: This needs to be refactored, because sorting by evalue is not a good idea.
292 sub rflp_unigene_matches
{
295 my $sth = $self->{dbh
}->prepare(<<'');
297 'SGN-U' || unigene_id
300 FROM rflp_unigene_associations
306 $sth->execute( $id );
307 $sth->fetchall_arrayref
311 =head2 primer_unigene_matches
313 Usage: @unigene_ids = $marker->primer_unigene_matches()
314 Desc: returns a list of unigene ids that have primers
315 matching this marker sequence
316 Ret: a list of unigene ids
318 Side Effects: accesses the database
323 sub primer_unigene_matches
{
325 return $self->{dbh
}->selectcol_arrayref("SELECT DISTINCT unigene_id FROM primer_unigene_match WHERE marker_id=".$self->marker_id());
330 Usage: my @data = $marker->ssr_data()
331 Desc: returns ssr data.
332 Ret: returns a list with the following values:
333 $ssr_id, $marker_id, $ssr_name, $est_trace,
334 $start_primer, $end_primer, $pcr_length, $ann_high,
345 # get legacy information
347 my $ssr_sth = $self->{dbh
}->prepare("SELECT s.ssr_id, s.ssr_name, et.trace_name, s.start_primer, s.end_primer, s.pcr_product_ln, s.ann_high, s.ann_low FROM ssr AS s LEFT JOIN seqread AS et ON s.est_read_id=et.read_id where marker_id=?");
349 $ssr_sth->execute($self->marker_id());
350 if(my ($ssr_id, $ssr_name, $est_trace, $start_primer, $end_primer, $pcr_length, $ann_high, $ann_low) = $ssr_sth->fetchrow_array) {
353 return ($ssr_id, $ssr_name, $est_trace, $start_primer, $end_primer, $pcr_length, $ann_high, $ann_low);
356 # get information from the new way to store things (sequence table)
357 my $ssr_sth = $self->{dbh
}->prepare("SELECT sequence, cvterm.name as seq_type, marker_experiment.protocol FROM sgn.marker_alias join sgn.marker_experiment using(marker_id) JOIN sgn.pcr_experiment using(pcr_experiment_id) JOIN sgn.pcr_experiment_sequence using(pcr_experiment_id) join sgn.sequence using(sequence_id) JOIN cvterm on (type_id=cvterm_id) where marker_alias.marker_id=?");
359 $ssr_sth->execute($self->marker_id());
363 while (my ($seq, $seq_type, $protocol) = $ssr_sth->fetchrow_array()) {
364 $seqs{$seq_type} = $seq;
365 $protocol_name = $protocol;
368 if ($protocol_name=~/ssr/i) {
369 print STDERR
"\n\n\n:-)\n\n\n";
371 return (0, $self->get_name(), 0, $seqs{forward_primer
}, $seqs{reverse_primer
}, 0, undef, undef, undef);
376 =head2 ssr_motif_info
378 Usage: my @motif_info = $marker->ssr_motif_info();
379 Desc: returns motif information if the $marker is an
380 SSR marker, an empty list otherwise
381 Ret: a list of lists with [ $motif, $repeat_count ]
394 my $repeats_sth = $self->{dbh
}->prepare("SELECT repeat_motif, reapeat_nr FROM ssr_repeats WHERE ssr_id=?");
395 $repeats_sth->execute($ssr_id);
397 while (my ($motif, $r_nr) = $repeats_sth->fetchrow_array) {
398 push @motif_info, [ $motif, $r_nr ];
402 #data from sequence table
403 my $repeats_sth = $self->{dbh
}->prepare("SELECT sequence FROM sgn.sequence JOIN sgn.pcr_experiment_sequence using(sequence_id) JOIN cvterm on (type_id=cvterm_id) JOIN sgn.pcr_experiment on (pcr_experiment_sequence.pcr_experiment_id=pcr_experiment.pcr_experiment_id) WHERE marker_id=? and name='repeat_unit'");
404 $repeats_sth->execute($self->marker_id());
405 push @motif_info, [ $repeats_sth->fetchrow_array() ];
416 Usage: my $hashref = $marker->cos_data()
417 Desc: retrieves cos marker related data
418 Ret: a hashref with the following keys:
443 my $marker_id = shift;
445 my $cos_query = q{SELECT c.cos_marker_id, c.marker_id, c.cos_id, c.at_match,
446 c.at_position, c.bac_id, c.best_gb_prot_hit, c.at_evalue,
447 c.at_identities, c.mips_cat, c.description, c.comment,
448 c.gbprot_evalue, c.gbprot_identities, s.trace_name
449 FROM cos_markers AS c LEFT JOIN seqread AS s ON
450 c.est_read_id=s.read_id WHERE c.marker_id = ?};
452 my $cos_sth = $self->{dbh
}->prepare($cos_query);
453 $cos_sth->execute($self->marker_id());
454 my $r = $cos_sth->fetchrow_hashref();
462 my $collections=$marker->collections();
464 Returns an arrayref of this markers collections.
468 #this is a list of groups this marker is considered to be a part of. this is usually a list of one or two collection names.
471 if($self->_should_we_run_query('collections')) {
472 my $collections_q=$self->{dbh
}->prepare
478 inner join marker_collectible using (marker_id)
479 inner join marker_collection using (mc_id)
483 $collections_q->execute($self->{marker_id
});
484 while(my($collection)=$collections_q->fetchrow_array()) {
485 push(@
{$self->{collections
}},$collection);
488 return $self->{collections
};
491 =head2 derived_from_sources
493 my $sources=$marker->derived_from_sources();
495 for my $source(@{$sources}) {
496 my $source_name=$source->{source_name};
497 my $id_in_source=$source->{id_in_source};
498 print"Marker is from source '$source_name' with ID '$id_in_source'\n";
502 Returns an arrayref of sources whence this marker came.
506 #ids of all sources from which this marker was derived
507 sub derived_from_sources
{
509 if($self->_should_we_run_query('derived_from_sources')) {
510 my $sources_q=$self->{dbh
}->prepare
514 derived_from_source_id,
518 inner join derived_from_source using (derived_from_source_id)
522 $sources_q->execute($self->{marker_id
});
523 $self->{derived_from_sources
}=$sources_q->fetchall_arrayref({});
525 return $self->{derived_from_sources
};
530 my $exps=$marker->experiments();
532 for my $exp(@{$exps}) {
533 if($exp->{location}){print $exp->{location}->as_string();}
534 if($exp->{pcr_experiment}){print $exp->{pcr_experiment}->as_string();}
535 if($exp->{rflp_experiment}){print $exp->{rflp_experiment}->as_string();}
539 Returns an arrayref of hashrefs with keys 'location', 'pcr_experiment', and 'rflp_experiment' which have values which are objects of these types.
543 #get information about all of this marker's locations on various maps
546 my $dbh=$self->{dbh
};
547 if($self->_should_we_run_query('experiments')) {
548 #the order-bys in this SQL statement make the marker_experiment entries with MORE information
549 #show up FIRST. the reason for doing this is that the display page (markerinfo.pl) assumes
550 #that if it has already displayed a location or experiment, that it does not need to display
551 #it again. if an entry with MORE information came LATER in the list, it might be overlooked
552 #by markerinfo. so, to reiterate:
554 #SOME OF THESE ORDER-BYS ARE IMPORTANT FOR THE WEBPAGE DISPLAY!
555 #IF YOU CHANGE THEM, SOME DATA MAY NOT SHOW UP ON THE WEBSITE!
556 my $locations_q=$dbh->prepare
559 marker_experiment_id,
566 left join marker_location using (location_id)
570 marker_location.map_version_id desc,
576 #SOME OF THESE ORDER-BYS ARE IMPORTANT FOR THE WEBPAGE DISPLAY!
577 #IF YOU CHANGE THEM, SOME DATA MAY NOT SHOW UP ON THE WEBSITE!
578 $locations_q->execute($self->{marker_id
});
579 while(my ($marker_experiment_id,$location_id,$pcr_experiment_id,$rflp_experiment_id,$protocol)=$locations_q->fetchrow_array()) {
582 $experiment{location
}=CXGN
::Marker
::Location
->new($dbh,$location_id);
584 if($pcr_experiment_id) {
585 $experiment{pcr_experiment
}=CXGN
::Marker
::PCR
::Experiment
->new($dbh,$pcr_experiment_id);
587 if($rflp_experiment_id) {
588 $experiment{rflp_experiment
}=CXGN
::Marker
::RFLP
::Experiment
->new($dbh,$rflp_experiment_id);
590 $experiment{protocol
}=$protocol;
591 $experiment{marker_experiment_id
}=$marker_experiment_id;
592 push(@
{$self->{experiments
}},\
%experiment);
594 my $orphan_pcr_notification='';
595 #grab any remaining pcr_experiments even if they are missing their marker_experiment entries
596 my $orphan_pcr_q=$dbh->prepare
602 left join marker_experiment using (pcr_experiment_id)
604 marker_experiment.pcr_experiment_id is null
605 and pcr_experiment.marker_id=?
607 $orphan_pcr_q->execute($self->{marker_id
});
608 while(my ($orphan_pcr_id)=$orphan_pcr_q->fetchrow_array()) {
610 $experiment{pcr_experiment
}=CXGN
::Marker
::PCR
::Experiment
->new($dbh,$orphan_pcr_id);
611 push(@
{$self->{experiments
}},\
%experiment);
612 $orphan_pcr_notification.=$self->name_that_marker()." has orphan PCR experiment ID '$orphan_pcr_id'\n";
614 if($orphan_pcr_notification) {
615 #turn this once when beth is finished fixing the known ones
616 #CXGN::Apache::Error::notify('found orphan PCR experiment',$orphan_pcr_notification);
619 return $self->{experiments
};
622 =head2 populate_from_db
624 $marker->populate_from_db();
626 Fully populates the object. Mainly for use by CXGN::Marker::Modifiable to ensure that the marker is in a consistent state.
630 #this function MUST contain all of the accessors which populate this object, because it is used by the Modifiable subclass, which MUST have a fully populated object.
631 #if you add a marker accessor which retrieves data from the database (as the others do), you MUST call it here.
632 sub populate_from_db
{
634 $self->name_that_marker();
635 $self->collections();
636 $self->derived_from_sources();
637 $self->experiments();
644 #----------------------------------------------------------------------------
645 #the following functions are different from those above. they do not populate
646 #the object, and they do not store any state. they are just used to get
647 #ancillary data about a marker. probably other markerinfo.pl functions should
648 #be moved into here eventually.
650 #these COULD populate the object, but most of them won't need to be called more
651 #than once. also, this module kind of works hand in hand with CXGN::Marker::Modifiable,
652 #so I think it would be confusing if the marker could store state in itself, but not
653 #store that state to the database. as this module stands now, all state which
654 #it can store in itself it can also store in the database.
655 #----------------------------------------------------------------------------
656 =head2 current_mapping_experiments
658 my $experiments=$marker->current_mapping_experiments();
660 Usually these are the ones you are interested in, right? Note: this does not get the simple non-mapping polymorphism tests.
664 sub current_mapping_experiments
{
666 my @current_mapping_experiments;
667 my $experiments=$self->experiments();
668 if($experiments and @
{$experiments}) {
669 for my $experiment(@
{$self->{experiments
}}) {
670 if($experiment->{location
}) {
671 my $map_version_id=$experiment->{location
}->map_version_id();
672 if(CXGN
::Map
::Tools
::is_current_version
($self->{dbh
},$map_version_id)) {
673 push(@current_mapping_experiments,$experiment);
678 return \
@current_mapping_experiments;
681 =head2 upa_experiments
683 my $experiments=$marker->upa_experiments();
685 This gets the COSII iUPA and eUPA experiments.
689 sub upa_experiments
{
692 my $experiments=$self->experiments();
693 if($experiments and @
{$experiments}) {
694 for my $experiment(@
{$self->{experiments
}}) {
697 !$experiment->{location
}#if there is no associated location
698 and $experiment->{pcr_experiment
}#and this is a PCR experiment (not RFLP)
699 and#and there is some primer type given
701 $experiment->{pcr_experiment
}->primer_type() eq 'iUPA'
702 or $experiment->{pcr_experiment
}->primer_type() eq 'eUPA'
706 #then it is one of feinan's COSII iUPA or eUPA experiments
707 push(@upa_experiments,$experiment);
711 return \
@upa_experiments;
716 print $marker->comments();
718 Returns marker comment text.
724 my $dbh=$self->{dbh
};
725 my $id=$dbh->quote($self->{marker_id
});
726 my ($comment)=$dbh->selectrow_array("SELECT comment_text from metadata.attribution as a inner join metadata.comments as c using(attribution_id) where a.table_name = 'markers' and row_id = $id");
730 =head2 marker_page_link
732 print"<a href=\"".$marker->marker_page_link()."\">[Link to marker]</a>";
734 Returns the stuff that goes in the 'href' attribute of the 'a' tag which will take you to a markers page.
738 #get a link to a marker's info page
739 sub marker_page_link
{
741 return"/search/markers/markerinfo.pl?marker_id=$self->{marker_id}";
744 =head2 cosii_unigenes
746 my @unigenes=$marker->cosii_unigenes();
748 Returns an array of hashrefs with COSII unigene data.
752 #special marker data accessors
755 unless($self->is_in_collection('COSII')){return;}
756 my $dbh=$self->{dbh
};
758 my $unigene_query=$dbh->prepare('select unigene_id,copies,database_name,sequence_name from cosii_ortholog where marker_id=?');
759 $unigene_query->execute($self->{marker_id
});
760 my $unigene_results_ref=$unigene_query->fetchall_arrayref();
761 my @unigene_results=@
{$unigene_results_ref};
762 for(0..$#unigene_results) {
764 $unigenes[$_]->{unigene_id
}=$unigene_results[$_][0];
765 $unigenes[$_]->{copies
}=$unigene_results[$_][1];
766 $unigenes[$_]->{database_name
}=$unigene_results[$_][2];
767 $unigenes[$_]->{sequence_name
}=$unigene_results[$_][3];
768 if($unigenes[$_]->{unigene_id
})
770 my $unigeneq=$dbh->prepare("SELECT groups.comment from unigene LEFT JOIN unigene_build USING (unigene_build_id) LEFT JOIN groups ON (groups.group_id=unigene_build.organism_group_id) where unigene_id=?");
771 $unigeneq->execute($unigenes[$_]->{unigene_id
});
772 my($org_group_name)=$unigeneq->fetchrow_array();
773 $unigenes[$_]->{organism
}=$org_group_name;
775 else{$unigenes[$_]->{organism
}=undef;}
781 ################################################
783 ################################################
784 #finds whether this marker is in a collection
786 =head2 is_in_collection
788 if($marker->is_in_collection('COSII')){print"This marker is a COSII marker."}
790 Takes a collection name and returns a 1 if the marker is in the collection or a 0 if not.
794 sub is_in_collection
{
796 my($collection_maybe)=@_;
797 unless($collection_maybe){return 0;}
798 my $collections=$self->collections();
799 for my $collection(@
{$collections}) {
800 if($collection_maybe eq $collection) {