Merge pull request #42 from solgenomics/topic/duplicate_image_warning
[cxgn-corelibs.git] / lib / CXGN / Transcript / EST.pm
blob45924ade3ca7053b1e1632a9e6c98655b213f493
2 =head1 NAME
4 CXGN::Transcript::EST - a class to deal with EST sequences.
6 =head1 DESCRIPTION
8 This class attempts to sanely wrap some of the more esoteric features of the sgn.est table, such as the flags and status fields, and provides accessors for all the database fields of sgn.est and sgn.qc_report.
10 =head2 The meaning of the flag fields
12 The flag bits have the following meaning, according to documentation by Koni:
14 =over 10
16 =item flag
18 0x1 Vector parsing anomoly
19 0x2 Possibly chimeric (vector parsing triggered)
20 0x4 Insert too short
21 0x8 High expected error (low base calling quality values overall)
22 0x10 Low complexity
23 0x20 E.coli or cloning host contamination
24 0x40 rRNA contamination
25 0x80 Possibly chimeric (arabidopsis screen)
26 0x100 Possibly chimeric (internal screen during unigene assembly)
27 0x200 Manually censored (reason may not be recorded)
29 =item status
31 0x1 Legacy (sequence & identifier tracked for only legacy dataset support)
32 0x2 Discarded (sequence is formally ÒforgottenÓ)
33 0x4 Deprecated (will end up as 0x2 or 0x1 in subsequent releases)
34 0x8 Censored (see also 0x200 in flags above)
35 0x10 Vector/Quality not assessed
36 0x20 Contaminants not assessed
37 0x40 Chimera not assessed
39 =back
41 =head1 AUTHOR
43 Lukas Mueller <lam87@cornell.edu>
45 Adapted from the unstructured est detail page hacking that occurred over the years by multiple authors.
47 =head1 CLASS METHODS
49 The following class methods are implemented:
51 =cut
53 use strict;
55 package CXGN::Transcript::EST;
57 use CXGN::DB::Object;
59 use base qw | CXGN::DB::Object |;
61 our %flags = ( vector_parsing_anomaly => 0,
62 possibly_chimeric => 1,
63 insert_too_short => 2,
64 high_expected_error => 3,
65 low_complexity => 4,
66 ecoli_or_cloning_host_contamination => 5,
67 rRNA_contamination => 6,
68 possibly_chimeric_arabidopsis_screen => 7,
69 possibly_chimeric_unigene_assembly_screen => 8,
70 manually_censored => 9
73 our %status = ( legacy => 1,
74 discarded => 2,
75 deprecated => 3,
76 censored => 4,
77 vector_quality_not_assessed=>5,
78 contaminats_not_assessed=>6,
79 chimera_not_assessed=>7
85 sub new {
86 my $class = shift;
87 my $dbh = shift;
88 my $id = shift;
89 my $self = $class->SUPER::new($dbh);
90 if ($id) {
91 $self->set_est_id($id);
92 $self->fetch();
94 return $self;
97 sub fetch {
98 my $self = shift;
99 my $sgn = 'sgn';
100 my $query = "SELECT est_id, read_id, flags, status, seq as untrimmed_seq, substring(seq from hqi_start::int for hqi_length::int) as trimmed_seq, hqi_start, hqi_length, clone.clone_name, seqread.read_id, seqread.trace_name,seqread.direction, seqread.trace_location FROM $sgn.est LEFT JOIN $sgn.qc_report using(est_id) LEFT JOIN $sgn.seqread USING(read_id) LEFT JOIN $sgn.clone USING(clone_id) where est_id=?";
101 my $sth = $self->get_dbh()->prepare($query);
102 $sth->execute($self->get_est_id());
103 my $h = $sth->fetchrow_hashref();
104 $self->set_est_id($h->{est_id});
105 $self->set_read_id($h->{read_id});
106 $self->set_trimmed_seq($h->{trimmed_seq});
107 $self->set_raw_seq($h->{untrimmed_seq});
108 $self->set_hqi_start($h->{hqi_start});
109 $self->set_hqi_length($h->{hqi_length});
110 $self->set_clone_name($h->{clone_name});
111 $self->set_direction($h->{direction});
112 $self->set_trace_name($h->{trace_name});
113 $self->set_trace_location($h->{trace_location});
114 $self->set_status($h->{status});
115 $self->set_flags($h->{flags});
119 =head2 new_with_alternate_id
121 Usage:
122 Desc:
123 Ret:
124 Args:
125 Side Effects:
126 Example:
128 =cut
130 sub new_with_alternate_id {
131 my $class = shift;
132 my $dbh = shift;
133 my $alternate_id = shift;
134 my $q = "SELECT internal_id, internal_id_type, t1.comment, t2.comment from id_linkage as il LEFT OUTER JOIN types as t1 ON (t1.type_id=il.internal_id_type) LEFT OUTER JOIN types as t2 ON (t2.type_id=il.link_id_type) where il.link_id=?";
136 # Not yet....
142 sub store {
145 =head2 get_est_id, set_est_id
147 Usage:
148 Desc:
149 Ret:
150 Args:
151 Side Effects:
152 Example:
154 =cut
156 sub get_est_id {
157 my $self=shift;
158 return $self->{est_id};
162 sub set_est_id {
163 my $self=shift;
164 $self->{est_id}=shift;
167 =head2 get_read_id, set_read_id
169 Usage:
170 Desc:
171 Ret:
172 Args:
173 Side Effects:
174 Example:
176 =cut
178 sub get_read_id {
179 my $self=shift;
180 return $self->{read_id};
184 sub set_read_id {
185 my $self=shift;
186 $self->{read_id}=shift;
189 =head2 get_trimmed_seq
191 Usage:
192 Desc: gets the trimmed seq. To set the trimmed seq,
193 set the hqi_start and hqi_length parameters.
194 Ret:
195 Args:
196 Side Effects:
197 Example:
199 =cut
201 sub get_trimmed_seq {
202 my $self=shift;
203 return $self->{trimmed_seq};
207 sub set_trimmed_seq {
208 my $self =shift;
209 $self->{trimmed_seq}=shift;
212 =head2 get_trimmed_qscores
214 Usage:
215 Desc:
216 Ret:
217 Args:
218 Side Effects:
219 Example:
221 =cut
223 sub get_trimmed_qscores {
227 =head2 get_raw_qscores, set_raw_qscores
229 Usage:
230 Desc:
231 Ret:
232 Args:
233 Side Effects:
234 Example:
236 =cut
238 sub get_raw_qscores {
239 my $self=shift;
240 return $self->{raw_qscores};
244 sub set_raw_qscores {
245 my $self=shift;
246 $self->{raw_qscores}=shift;
251 =head2 get_raw_seq, set_raw_seq
253 Usage:
254 Desc:
255 Ret:
256 Args:
257 Side Effects:
258 Example:
260 =cut
262 sub get_raw_seq {
263 my $self=shift;
264 return $self->{raw_seq};
268 sub set_raw_seq {
269 my $self=shift;
270 $self->{raw_seq}=shift;
276 =head2 get_flags, set_flags
278 Usage:
279 Desc: get/set the raw flag information. The flag
280 field contains information about chimera
281 status and contamination. This information
282 is better accessed through dedicated accessors
283 that are also provided instead of manipulating
284 the flag field directly.
285 Property: the flags [byte]
286 Side Effects:
287 Example:
289 =cut
291 sub get_flags {
292 my $self=shift;
293 return $self->{flags};
297 sub set_flags {
298 my $self=shift;
299 $self->{flags}=shift;
302 =head2 get_status, set_status
304 Usage:
305 Desc: get/set the status flags. To set/get flags
306 the get_flag_bit() accessors for each flag should be used
307 to manipulate the flag information
308 (safer).
309 Property:
310 Side Effects:
311 Example:
313 =cut
315 sub get_status {
316 my $self=shift;
317 return $self->{status};
321 sub set_status {
322 my $self=shift;
323 $self->{status}=shift;
327 =head2 get_hqi_start, set_hqi_start
329 Usage:
330 Desc:
331 Ret:
332 Args:
333 Side Effects:
334 Example:
336 =cut
338 sub get_hqi_start {
339 my $self=shift;
340 return $self->{hqi_start};
344 sub set_hqi_start {
345 my $self=shift;
346 $self->{hqi_start}=shift;
349 =head2 get_hqi_length, set_hqi_length
351 Usage:
352 Desc:
353 Ret:
354 Args:
355 Side Effects:
356 Example:
358 =cut
360 sub get_hqi_length {
361 my $self=shift;
362 return $self->{hqi_length};
367 sub set_hqi_length {
368 my $self=shift;
369 $self->{hqi_length}=shift;
372 =head2 get_qstart, set_qstart
374 Usage:
375 Desc:
376 Ret:
377 Args:
378 Side Effects:
379 Example:
381 =cut
383 sub get_qstart {
384 my $self=shift;
385 return $self->{qstart};
390 sub set_qstart {
391 my $self=shift;
392 $self->{qstart}=shift;
395 =head2 get_qend, set_qend
397 Usage:
398 Desc:
399 Ret:
400 Args:
401 Side Effects:
402 Example:
404 =cut
406 sub get_qend {
407 my $self=shift;
408 return $self->{qend};
412 sub set_qend {
413 my $self=shift;
414 $self->{qend}=shift;
417 =head2 get_istart, set_iend
419 Usage:
420 Desc:
421 Ret:
422 Args:
423 Side Effects:
424 Example:
426 =cut
428 sub get_istart {
429 my $self=shift;
430 return $self->{istart};
434 sub set_istart {
435 my $self=shift;
436 $self->{istart}=shift;
439 =head2 get_iend, set_iend
441 Usage:
442 Desc:
443 Ret:
444 Args:
445 Side Effects:
446 Example:
448 =cut
450 sub get_iend {
451 my $self=shift;
452 return $self->{iend};
456 sub set_iend {
457 my $self=shift;
458 $self->{iend}=shift;
461 =head2 get_trace_name
463 Usage:
464 Desc:
465 Ret:
466 Args:
467 Side Effects:
468 Example:
470 =cut
472 sub get_trace_name {
473 my $self=shift;
474 return $self->{trace_name};
478 =head2 set_trace_name
480 Usage:
481 Desc:
482 Ret:
483 Args:
484 Side Effects:
485 Example:
487 =cut
489 sub set_trace_name {
490 my $self=shift;
491 $self->{trace_name}=shift;
494 =head2 get_direction
496 Usage:
497 Desc:
498 Ret:
499 Args:
500 Side Effects:
501 Example:
503 =cut
505 sub get_direction {
506 my $self=shift;
507 return $self->{direction};
511 =head2 set_direction
513 Usage:
514 Desc:
515 Ret:
516 Args:
517 Side Effects:
518 Example:
520 =cut
522 sub set_direction {
523 my $self=shift;
524 $self->{direction}=shift;
527 =head2 get_trace_location
529 Usage:
530 Desc:
531 Ret:
532 Args:
533 Side Effects:
534 Example:
536 =cut
538 sub get_trace_location {
539 my $self=shift;
540 return $self->{trace_location};
544 =head2 set_trace_location
546 Usage:
547 Desc:
548 Ret:
549 Args:
550 Side Effects:
551 Example:
553 =cut
555 sub set_trace_location {
556 my $self=shift;
557 $self->{trace_location}=shift;
560 =head2 get_clone_name
562 Usage:
563 Desc:
564 Ret:
565 Args:
566 Side Effects:
567 Example:
569 =cut
571 sub get_clone_name {
572 my $self=shift;
573 return $self->{clone_name};
577 =head2 set_clone_name
579 Usage:
580 Desc:
581 Ret:
582 Args:
583 Side Effects:
584 Example:
586 =cut
588 sub set_clone_name {
589 my $self=shift;
590 $self->{clone_name}=shift;
593 =head2 accessors get_flag_bit, set_flag_bit
595 Usage: my $est->set_flag_bit('possibly_chimeric', 1);
596 my $est->set_flag_bit('insert_too_short', 0);
597 my $low_complexity_flag = $est->get_flag_bit('low_complexity');
598 Desc: this function sets the flags using common accessors. The flag
599 name has to be provided as an argument. The legal flags are:
600 o vector_parsing_anomaly
601 o possibly_chimeric
602 o insert_too_short
603 o high_expected_error
604 o low_complexity
605 o ecoli_or_cloning_host_contamination
606 o rRNA_contamination
607 o possibly_chimeric_arabidopsis_screen
608 o possibly_chimeric_unigene_assembly_screen
609 o manually_censored
610 the program will die if an illegal flag name is supplied.
611 Property manipulates the flags property in the est table. The flags can
612 be accessed collectively as an int using the accessors
613 get_flags() and set_flags()
616 =cut
618 sub get_flag_bit {
619 my $self = shift;
620 my $bit_name = shift;
622 if (!exists($flags{$bit_name})) { die "no bit with name $bit_name"; }
623 return $self->get_bit($self->get_flags(), $flags{$bit_name});
627 sub set_flag_bit {
628 my $self = shift;
629 my $bit_name = shift;
630 my $bit_value = shift;
632 if (!exists($flags{$bit_name})) { die "no bit with name $bit_name"; }
634 my $new_flags = $self->set_bit($self->get_flags(), $flags{$bit_name}, $bit_value);
636 $self->set_flags($new_flags);
640 =head2 accessors get_status_bit, set_status_bit
642 Usage: $est->set_status_bit('contaminants_not_assessed', 1)
643 Desc: sets specific bits in the status fields of the
644 sgn.est table. The currently defined status bit names are:
645 o legacy
646 o discarded
647 o deprecated
648 o censored
649 o vector_quality_not_assessed
650 o contaminats_not_assessed
651 o chimera_not_assessed
652 undefined status bit names will cause a die.
653 Property: The function accesses the status bits in the est table
654 through the accessors set_status() and get_status().
656 =cut
658 sub get_status_bit {
659 my $self = shift;
660 my $status_bit_name = shift;
662 if (!exists($status{$status_bit_name})) { die "Status bit $status_bit_name does not exist!"; }
663 return $self->get_bit($self->get_status(), $status{$status_bit_name});
666 sub set_status_bit {
667 my $self = shift;
668 my $status_bit_name = shift;
669 my $value = shift;
671 if (!exists($status{$status_bit_name})) { die "Status bit $status_bit_name does not exist!"; }
672 my $new_status = $self->set_bit($self->get_status(), $status{$status_bit_name}, $value);
673 $self->set_status($new_status);
677 =head2 function get_bit
679 Usage: my $new_int = $est->get_bit($int, $n)
680 Desc: sets the $n-th bit in $int and returns
681 the corresponding new int $new_int.
682 Example: my $new_int = $est->get_bit($status, 4);
683 Note: used internally for the set_flag_bit()
684 and set_status_bit() accessors
686 =cut
688 sub get_bit {
689 my $self = shift;
690 my $int = shift;
691 my $which_bit = shift;
692 my $mask = 2 ** $which_bit;
693 if ($int & $mask) {
694 return 1;
696 else {
697 return 0;
702 =head2 function set_bit
704 Usage:
705 Desc:
706 Ret:
707 Args:
708 Side Effects:
709 Example:
711 =cut
714 sub set_bit {
715 my $self = shift;
716 my $int = shift;
717 my $which_bit = shift;
718 my $value = shift;
720 if ($value) { $value =1; }
721 else { $value = 0; }
723 my $ormask = 2 ** $which_bit;
724 my $andmask = 1023 - $ormask;
726 my $new_int = 0;
727 if ($value ==1) { $new_int = ($int | $ormask); }
728 elsif ($value == 0) { $new_int = ($int & $andmask); }
730 return $new_int;
735 1;#do not remove