4 CXGN::Transcript::EST - a class to deal with EST sequences.
8 This class attempts to sanely wrap some of the more esoteric features of the sgn.est table, such as the flags and status fields, and provides accessors for all the database fields of sgn.est and sgn.qc_report.
10 =head2 The meaning of the flag fields
12 The flag bits have the following meaning, according to documentation by Koni:
18 0x1 Vector parsing anomoly
19 0x2 Possibly chimeric (vector parsing triggered)
21 0x8 High expected error (low base calling quality values overall)
23 0x20 E.coli or cloning host contamination
24 0x40 rRNA contamination
25 0x80 Possibly chimeric (arabidopsis screen)
26 0x100 Possibly chimeric (internal screen during unigene assembly)
27 0x200 Manually censored (reason may not be recorded)
31 0x1 Legacy (sequence & identifier tracked for only legacy dataset support)
32 0x2 Discarded (sequence is formally ÒforgottenÓ)
33 0x4 Deprecated (will end up as 0x2 or 0x1 in subsequent releases)
34 0x8 Censored (see also 0x200 in flags above)
35 0x10 Vector/Quality not assessed
36 0x20 Contaminants not assessed
37 0x40 Chimera not assessed
43 Lukas Mueller <lam87@cornell.edu>
45 Adapted from the unstructured est detail page hacking that occurred over the years by multiple authors.
49 The following class methods are implemented:
55 package CXGN
::Transcript
::EST
;
59 use base qw
| CXGN
::DB
::Object
|;
61 our %flags = ( vector_parsing_anomaly
=> 0,
62 possibly_chimeric
=> 1,
63 insert_too_short
=> 2,
64 high_expected_error
=> 3,
66 ecoli_or_cloning_host_contamination
=> 5,
67 rRNA_contamination
=> 6,
68 possibly_chimeric_arabidopsis_screen
=> 7,
69 possibly_chimeric_unigene_assembly_screen
=> 8,
70 manually_censored
=> 9
73 our %status = ( legacy
=> 1,
77 vector_quality_not_assessed
=>5,
78 contaminats_not_assessed
=>6,
79 chimera_not_assessed
=>7
89 my $self = $class->SUPER::new
($dbh);
91 $self->set_est_id($id);
100 my $query = "SELECT est_id, read_id, flags, status, seq as untrimmed_seq, substring(seq from hqi_start::int for hqi_length::int) as trimmed_seq, hqi_start, hqi_length, clone.clone_name, seqread.read_id, seqread.trace_name,seqread.direction, seqread.trace_location FROM $sgn.est LEFT JOIN $sgn.qc_report using(est_id) LEFT JOIN $sgn.seqread USING(read_id) LEFT JOIN $sgn.clone USING(clone_id) where est_id=?";
101 my $sth = $self->get_dbh()->prepare($query);
102 $sth->execute($self->get_est_id());
103 my $h = $sth->fetchrow_hashref();
104 $self->set_est_id($h->{est_id
});
105 $self->set_read_id($h->{read_id
});
106 $self->set_trimmed_seq($h->{trimmed_seq
});
107 $self->set_raw_seq($h->{untrimmed_seq
});
108 $self->set_hqi_start($h->{hqi_start
});
109 $self->set_hqi_length($h->{hqi_length
});
110 $self->set_clone_name($h->{clone_name
});
111 $self->set_direction($h->{direction
});
112 $self->set_trace_name($h->{trace_name
});
113 $self->set_trace_location($h->{trace_location
});
114 $self->set_status($h->{status
});
115 $self->set_flags($h->{flags
});
119 =head2 new_with_alternate_id
130 sub new_with_alternate_id
{
133 my $alternate_id = shift;
134 my $q = "SELECT internal_id, internal_id_type, t1.comment, t2.comment from id_linkage as il LEFT OUTER JOIN types as t1 ON (t1.type_id=il.internal_id_type) LEFT OUTER JOIN types as t2 ON (t2.type_id=il.link_id_type) where il.link_id=?";
145 =head2 get_est_id, set_est_id
158 return $self->{est_id
};
164 $self->{est_id
}=shift;
167 =head2 get_read_id, set_read_id
180 return $self->{read_id
};
186 $self->{read_id
}=shift;
189 =head2 get_trimmed_seq
192 Desc: gets the trimmed seq. To set the trimmed seq,
193 set the hqi_start and hqi_length parameters.
201 sub get_trimmed_seq
{
203 return $self->{trimmed_seq
};
207 sub set_trimmed_seq
{
209 $self->{trimmed_seq
}=shift;
212 =head2 get_trimmed_qscores
223 sub get_trimmed_qscores
{
227 =head2 get_raw_qscores, set_raw_qscores
238 sub get_raw_qscores
{
240 return $self->{raw_qscores
};
244 sub set_raw_qscores
{
246 $self->{raw_qscores
}=shift;
251 =head2 get_raw_seq, set_raw_seq
264 return $self->{raw_seq
};
270 $self->{raw_seq
}=shift;
276 =head2 get_flags, set_flags
279 Desc: get/set the raw flag information. The flag
280 field contains information about chimera
281 status and contamination. This information
282 is better accessed through dedicated accessors
283 that are also provided instead of manipulating
284 the flag field directly.
285 Property: the flags [byte]
293 return $self->{flags
};
299 $self->{flags
}=shift;
302 =head2 get_status, set_status
305 Desc: get/set the status flags. To set/get flags
306 the get_flag_bit() accessors for each flag should be used
307 to manipulate the flag information
317 return $self->{status
};
323 $self->{status
}=shift;
327 =head2 get_hqi_start, set_hqi_start
340 return $self->{hqi_start
};
346 $self->{hqi_start
}=shift;
349 =head2 get_hqi_length, set_hqi_length
362 return $self->{hqi_length
};
369 $self->{hqi_length
}=shift;
372 =head2 get_qstart, set_qstart
385 return $self->{qstart
};
392 $self->{qstart
}=shift;
395 =head2 get_qend, set_qend
408 return $self->{qend
};
417 =head2 get_istart, set_iend
430 return $self->{istart
};
436 $self->{istart
}=shift;
439 =head2 get_iend, set_iend
452 return $self->{iend
};
461 =head2 get_trace_name
474 return $self->{trace_name
};
478 =head2 set_trace_name
491 $self->{trace_name
}=shift;
507 return $self->{direction
};
524 $self->{direction
}=shift;
527 =head2 get_trace_location
538 sub get_trace_location
{
540 return $self->{trace_location
};
544 =head2 set_trace_location
555 sub set_trace_location
{
557 $self->{trace_location
}=shift;
560 =head2 get_clone_name
573 return $self->{clone_name
};
577 =head2 set_clone_name
590 $self->{clone_name
}=shift;
593 =head2 accessors get_flag_bit, set_flag_bit
595 Usage: my $est->set_flag_bit('possibly_chimeric', 1);
596 my $est->set_flag_bit('insert_too_short', 0);
597 my $low_complexity_flag = $est->get_flag_bit('low_complexity');
598 Desc: this function sets the flags using common accessors. The flag
599 name has to be provided as an argument. The legal flags are:
600 o vector_parsing_anomaly
603 o high_expected_error
605 o ecoli_or_cloning_host_contamination
607 o possibly_chimeric_arabidopsis_screen
608 o possibly_chimeric_unigene_assembly_screen
610 the program will die if an illegal flag name is supplied.
611 Property manipulates the flags property in the est table. The flags can
612 be accessed collectively as an int using the accessors
613 get_flags() and set_flags()
620 my $bit_name = shift;
622 if (!exists($flags{$bit_name})) { die "no bit with name $bit_name"; }
623 return $self->get_bit($self->get_flags(), $flags{$bit_name});
629 my $bit_name = shift;
630 my $bit_value = shift;
632 if (!exists($flags{$bit_name})) { die "no bit with name $bit_name"; }
634 my $new_flags = $self->set_bit($self->get_flags(), $flags{$bit_name}, $bit_value);
636 $self->set_flags($new_flags);
640 =head2 accessors get_status_bit, set_status_bit
642 Usage: $est->set_status_bit('contaminants_not_assessed', 1)
643 Desc: sets specific bits in the status fields of the
644 sgn.est table. The currently defined status bit names are:
649 o vector_quality_not_assessed
650 o contaminats_not_assessed
651 o chimera_not_assessed
652 undefined status bit names will cause a die.
653 Property: The function accesses the status bits in the est table
654 through the accessors set_status() and get_status().
660 my $status_bit_name = shift;
662 if (!exists($status{$status_bit_name})) { die "Status bit $status_bit_name does not exist!"; }
663 return $self->get_bit($self->get_status(), $status{$status_bit_name});
668 my $status_bit_name = shift;
671 if (!exists($status{$status_bit_name})) { die "Status bit $status_bit_name does not exist!"; }
672 my $new_status = $self->set_bit($self->get_status(), $status{$status_bit_name}, $value);
673 $self->set_status($new_status);
677 =head2 function get_bit
679 Usage: my $new_int = $est->get_bit($int, $n)
680 Desc: sets the $n-th bit in $int and returns
681 the corresponding new int $new_int.
682 Example: my $new_int = $est->get_bit($status, 4);
683 Note: used internally for the set_flag_bit()
684 and set_status_bit() accessors
691 my $which_bit = shift;
692 my $mask = 2 ** $which_bit;
702 =head2 function set_bit
717 my $which_bit = shift;
720 if ($value) { $value =1; }
723 my $ormask = 2 ** $which_bit;
724 my $andmask = 1023 - $ormask;
727 if ($value ==1) { $new_int = ($int | $ormask); }
728 elsif ($value == 0) { $new_int = ($int & $andmask); }