Merge pull request #5 from solgenomics/topic/fix_il_maps
[cview.git] / lib / CXGN / Cview / Cview_data_adapter.pm
blob3bd8da9be3705efd439cf45399d46e487ca84258
1 package CXGN::Cview::Cview_data_adapter;
3 # NOTE: THIS MODULE IS DEPRECATED AND WILL BE REMOVED SOON.
4 # --LUKAS, APR 2007.
6 use strict;
7 use warnings;
9 use CXGN::Cview::Marker;
10 use CXGN::Cview::Marker::SequencedBAC;
11 use CXGN::Cview::Marker::FISHMarker;
12 use CXGN::Genomic::Clone;
13 use CXGN::Map::Tools;
14 use CXGN::Cview::Config;
16 our $config = CXGN::Cview::Config->new;
19 =head2 function get_chromosome()
21 Synopsis: gets a chromosome object of the appropriate type given a map_version_id
22 Arguments: database handle, CXGN::Map object, and a chr_nr.
23 Returns: a chromosome object of the appropriate type.
24 Side effects:
25 Description:
27 =cut
29 sub get_chromosome {
30 my $dbh = shift;
31 my $map = shift; # CXGN::Map object
32 my $chr_nr = shift;
33 my $type = shift;
34 my $c;
35 unless (defined($type)) {$type='';}
36 if ($type =~ /fish/i) {
37 $c=CXGN::Cview::Chromosome::PachyteneIdiogram -> new ($chr_nr, 100, 100, 40);
38 CXGN::Cview::Cview_data_adapter::fetch_pachytene_idiogram($c, $chr_nr);
39 CXGN::Cview::Cview_data_adapter::fetch_fish_chromosome($dbh, $c, $map, $chr_nr, 0, $type);
40 $c->set_width(6);
41 $c->set_units("%");
44 else {
45 $c= CXGN::Cview::Chromosome -> new($chr_nr, 60, 100, 40);
46 CXGN::Cview::Cview_data_adapter::fetch_chromosome($dbh, $c, $map, $chr_nr, 0, $type);
49 return $c;
52 sub fetch_chromosome {
53 my $dbh = shift;
54 my $chromosome = shift; # the chromosome object
55 my $map = shift; # CXGN::Map object
56 my $chr_nr = shift; # the chromosome name
57 my $marker_confidence_cutoff = shift; # the confidence cutoff. 3=frame 2=coseg 1=interval LOD=2 0=interval
58 my $type = shift;
60 # if ($type=~/fish/i) {
61 # return fetch_fish_chromosome($dbh, $chromosome, $map_id, $chr_nr);
62 # }
65 if (!$marker_confidence_cutoff) { $marker_confidence_cutoff=-1; }
67 my %seq_bac = ();
69 my $sgn = $dbh->qualify_schema('sgn');
70 my $physical = $dbh->qualify_schema('physical');
72 if ($map->map_id() == CXGN::Map::Tools::current_tomato_map_id()) {
74 my $Sequenced_BAC_query =
76 SELECT
77 distinct $physical.bac_marker_matches.bac_id,
78 $physical.bac_marker_matches.cornell_clone_name,
79 $physical.bac_marker_matches.marker_id,
80 $physical.bac_marker_matches.position
81 FROM
82 $physical.bac_marker_matches
83 LEFT JOIN $sgn.linkage_group USING (lg_id)
84 LEFT JOIN sgn_people.bac_status USING (bac_id)
85 WHERE
86 $sgn.linkage_group.lg_name=?
87 AND sgn_people.bac_status.status='complete'
89 my $sth2 = $dbh->prepare($Sequenced_BAC_query);
90 $sth2->execute($chr_nr);
91 while (my ($bac_id, $name, $marker_id, $offset)=$sth2->fetchrow_array()) {
92 # print STDERR "Sequenced BAC for: $bac_id, $name, $marker_id, $offset...\n";
93 $name = CXGN::Genomic::Clone->retrieve($bac_id)->clone_name();
95 my $m = CXGN::Cview::Marker::SequencedBAC->new($chromosome, $bac_id, $name, "", "", "", "", $offset);
96 $m->get_label()->set_text_color(200,200,80);
97 $m->get_label()->set_line_color(200,200,80);
98 $seq_bac{$marker_id}=$m;
102 # get the "normal" markers
104 my $query =
106 SELECT
107 marker_experiment.marker_id,
108 alias,
109 mc_name,
110 confidence_id,
112 subscript,
113 position,
115 FROM
116 $sgn.map_version
117 inner join $sgn.linkage_group using (map_version_id)
118 inner join $sgn.marker_location using (lg_id)
119 inner join $sgn.marker_experiment using (location_id)
120 inner join $sgn.marker_alias using (marker_id)
121 inner join $sgn.marker_confidence using (confidence_id)
122 left join $sgn.marker_collectible using (marker_id)
123 left join $sgn.marker_collection using (mc_id)
124 WHERE
125 map_version.map_version_id=?
126 and lg_name=?
127 and preferred='t'
128 ORDER BY
129 position,
130 confidence_id desc
134 # GROUP BY
135 # markers.marker_id,
136 # marker_name,
137 # marker_types.type_name,
138 # confidence,
139 # marker_locations.order_in_loc,
140 # location_subscript,
141 # \"offset\",
142 # loc_type
144 my $sth = $dbh -> prepare($query);
145 $sth -> execute($map->map_version_id(), $chr_nr);
147 while (my ($marker_id, $marker_name, $marker_type, $confidence, $order_in_loc, $location_subscript, $offset, $loc_type) = $sth->fetchrow_array()) {
148 #print STDERR "Marker Read: $marker_id\t$marker_name\t$marker_type\t$offset\n";
149 my $m = CXGN::Cview::Marker -> new($chromosome, $marker_id, $marker_name, $marker_type, $confidence, $order_in_loc, $location_subscript, $offset, undef , $loc_type, 0);
150 #print STDERR "dataadapter baccount = $bac_count!\n";
151 if ($loc_type == 100) { $m -> set_frame_marker(); }
152 $m -> set_url("/search/markers/markerinfo.pl?marker_id=".$m->get_id());
153 $chromosome->add_marker($m);
155 if (exists($seq_bac{$marker_id})) {
156 #print STDERR "Adding Sequenced BAC [".($seq_bac{$marker_id}->get_name())."] to map...[$marker_id]\n";
157 $chromosome->add_marker($seq_bac{$marker_id});
161 $chromosome -> _calculate_chromosome_length();
164 sub fetch_fish_chromosome {
165 my $dbh = shift;
166 my $chromosome = shift;
167 my $map = shift; # CXGN::Map object
168 my $chr_nr = shift;
169 # The following query is a composition of 3 subqueries (look for the 'AS'
170 # keywords), joined using the clone_id. Here's what the subqueries do:
172 # * clone_id_and_percent: gets the average percent distance from the
173 # centromere as a signed float between -1.0 and +1.0, for each
174 # BAC on a given chromosome. This is done by first computing the
175 # average absolute distance from the centromere (signed, in um),
176 # and then dividing by the length of the arm that the average
177 # would be located on.
179 # * min_marker_for_clone: finds one marker associated with the BAC
180 # (if any).
182 # * clone_info: finds the library shortname and clone name components.
183 my $query = "
184 SELECT shortname, clone_id, platenum, wellrow, wellcol, percent, marker_id
185 FROM (SELECT clone_id, (CASE WHEN absdist < 0
186 THEN absdist / short_arm_length
187 ELSE absdist / long_arm_length END) AS percent
188 FROM (SELECT clone_id, chromo_num,
189 AVG(percent_from_centromere * arm_length *
190 CASE WHEN r.chromo_arm = 'P' THEN -1 ELSE 1 END)
191 AS absdist
192 FROM fish_result r
193 JOIN fish_karyotype_constants k USING (chromo_num, chromo_arm)
194 WHERE chromo_num = ?
195 GROUP BY clone_id, chromo_num) AS clone_id_and_absdist
196 JOIN (SELECT k1.chromo_num, k1.arm_length AS short_arm_length,
197 k2.arm_length AS long_arm_length
198 FROM fish_karyotype_constants k1
199 JOIN fish_karyotype_constants k2 USING (chromo_num)
200 WHERE k1.chromo_arm = 'P' AND k2.chromo_arm = 'Q')
201 AS karyotype_rearranged USING (chromo_num))
202 AS clone_id_and_percent
203 LEFT JOIN (SELECT clone_id, MIN (m.marker_id) AS marker_id
204 FROM sgn.fish_result AS c
205 LEFT JOIN physical.overgo_associations a ON (c.clone_id = a.bac_id)
206 LEFT JOIN physical.probe_markers m USING (overgo_probe_id)
207 LEFT JOIN marker_experiment e ON (m.marker_id = e.marker_id)
208 LEFT JOIN marker_location l ON (l.location_id = e.location_id)
209 LEFT JOIN linkage_group g ON (g.lg_id = l.lg_id)
210 LEFT JOIN map_version v ON (g.map_version_id = v.map_version_id)
211 WHERE (v.current_version = 't' OR v.current_version IS NULL)
212 AND (v.map_id = ? OR v.map_id IS NULL)
213 GROUP BY c.clone_id)
214 AS min_marker_for_clone USING (clone_id)
215 LEFT JOIN (SELECT shortname, clone_id, platenum, wellrow, wellcol
216 FROM genomic.clone
217 JOIN genomic.library USING (library_id))
218 AS clone_info USING (clone_id)
219 ORDER BY percent
222 my $sth = $dbh -> prepare($query);
223 $sth->execute($chr_nr, CXGN::Map::Tools::current_tomato_map_id);
224 while (my ($library_name, $clone_id, $platenum, $wellcol, $wellrow, $percent, $marker_id) = $sth->fetchrow_array()) {
225 #print STDERR "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!Adding marker BAC:$library_name, $wellrow, $wellcol, $chromo_arm etc.\n";
226 my $offset = 0;
227 my $factor = 0;
228 $offset = $percent * 100;
230 my $clone_name = CXGN::Genomic::Clone->retrieve($clone_id)->clone_name();
232 my $m = CXGN::Cview::Marker::FISHMarker -> new($chromosome, $marker_id, $clone_name, "", 3, $offset+100, "", $offset );
233 $m -> set_url("/maps/physical/clone_info.pl?id=".$clone_id);
234 $chromosome->add_marker($m);
240 # sub get_overgo_bac_data {
241 # my $dbh = shift;
242 # my $m = shift;
244 # my $query =
246 # SELECT
247 # if (physical.probe_markers.overgo_probe_id IS NULL, 0, 1),
248 # count(distinct(physical.overgo_associations.bac_id))
249 # FROM
250 # marker
251 # left join physical.probe_markers using (marker_id)
252 # left join physical.overgo_associations using (overgo_probe_id)
253 # where
254 # marker.marker_id=?
255 # group by
256 # marker.marker_id
257 # ";
259 # my $sth = $dbh -> prepare($query);
260 # $sth -> execute($m->get_id());
262 # if (my ($probes, $bacs) = $sth -> fetchrow_array()) {
263 # if ($probes) { $m->set_has_overgo(); }
264 # if ($bacs) { $m->set_has_bacs($bacs); }
270 sub fetch_chromosome_overgo {
271 my $dbh = shift;
272 my $chromosome = shift; # the chromosome object
273 my $map = shift; # CXGN::Map object
274 my $chr_nr = shift; # the chromosome number
275 my $start = shift; # the start of the section in cM
276 my $end = shift; # the end of the section in cM
278 # main query to get the marker data, including the BACs that are associated with this
279 # marker -- needs to be refactored to work with the materialized views for speed improvements.
281 my $query =
283 SELECT
284 marker_experiment.marker_id,
285 alias,
286 mc_name,
287 confidence_id,
289 subscript,
290 position,
292 min(physical.probe_markers.overgo_probe_id),
293 count(distinct(physical.overgo_associations.bac_id)),
294 max(physical.oa_plausibility.plausible)
295 FROM
296 map_version
297 inner join linkage_group using (map_version_id)
298 inner join marker_location using (lg_id)
299 inner join marker_experiment using (location_id)
300 inner join marker_alias using (marker_id)
301 inner join marker_confidence using (confidence_id)
302 left join marker_collectible using (marker_id)
303 left join marker_collection using (mc_id)
304 LEFT JOIN physical.probe_markers ON (marker_experiment.marker_id=physical.probe_markers.marker_id)
305 LEFT JOIN physical.overgo_associations USING (overgo_probe_id)
306 LEFT JOIN physical.oa_plausibility USING (overgo_assoc_id)
307 WHERE
308 map_version.map_version_id=?
309 and lg_name=?
310 and preferred='t'
311 -- and current_version='t'
312 AND position >= ?
313 AND position <= ?
314 GROUP BY
315 marker_experiment.marker_id,
316 alias,
317 mc_name,
318 confidence_id,
319 subscript,
320 position
321 ORDER BY
322 position,
323 confidence_id desc,
324 max(physical.oa_plausibility.plausible),
325 max(physical.probe_markers.overgo_probe_id)
329 my $sth = $dbh -> prepare($query);
330 # print STDERR "START/END: $start/$end\n";
331 $sth -> execute($map->map_version_id, $chr_nr, $start, $end);
333 # for each marker, look if there is a associated fully sequenced BAC, and add that
334 # as a marker of type Sequenced_BAC to the map at the right location
336 my $bac_status_q =
338 SELECT
339 cornell_clone_name,
340 bac_id
341 FROM
342 physical.bac_marker_matches
343 JOIN sgn_people.bac_status using (bac_id)
344 WHERE
345 physical.bac_marker_matches.marker_id=?
346 AND sgn_people.bac_status.status='complete'
349 my $bac_status_h = $dbh->prepare($bac_status_q);
350 my $seq_bac;
352 while (my ($marker_id, $marker_name, $marker_type, $confidence, $order_in_loc, $location_subscript, $offset, $loc_type, $overgo, $bac_count, $plausible, $status, $bac_name, $bac_id) = $sth->fetchrow_array()) {
353 #print STDERR "Marker Read: $marker_id\t$marker_name\t$marker_type\toffset: $offset\tovergo: $overgo\tbac_count: $bac_count\tplausible: $plausible\n";
354 my $seq_bac=undef;
355 my $seq_bac_name="";
356 my $seq_bac_id="";
357 if (!$plausible || $plausible == 0) { $bac_count = 0; }
358 my $m = CXGN::Cview::Marker -> new($chromosome, $marker_id, $marker_name, $marker_type, $confidence, $order_in_loc, $location_subscript, $offset, , $loc_type, 0, $overgo, $bac_count);
359 #print STDERR "dataadapter baccount = $bac_count!\n";
360 if ($loc_type == 100) { $m -> set_frame_marker(); }
362 # only add the sequenced BAC information to the F2-2000.
364 if ($map->map_id == CXGN::Map::Tools::current_tomato_map_id()) {
366 $bac_status_h->execute($marker_id);
367 ($seq_bac_name, $seq_bac_id) = $bac_status_h->fetchrow_array();
369 # change the name to look more standard
371 if ($seq_bac_name) {
372 if ($seq_bac_name =~ m/(\d+)([A-Z])(\d+)/i) {
373 $seq_bac_name = sprintf ("%3s%04d%1s%02d", "HBa",$1,$2,$3);
375 $seq_bac = CXGN::Cview::Marker::SequencedBAC->new($chromosome, $seq_bac_id, $seq_bac_name, "", "", "", "", $offset);
379 # add the marker $m to the chromosome
381 $chromosome->add_marker($m);
383 # add the sequenced BAC to the chromosome
384 # -url link needs to be changed
385 # -add a confidence level of 3 so that it is always displayed.
387 if ($seq_bac) {
388 $seq_bac->set_confidence(3);
389 $seq_bac->set_url("/maps/physical/clone_info.pl?id=$seq_bac_id");
390 $chromosome->add_marker($seq_bac);
393 $chromosome -> _calculate_chromosome_length();
397 =head2 fetch_chromosome_connections
399 Usage:
400 Desc:
401 Ret:
402 Args:
403 Side Effects:
404 Example:
406 =cut
408 sub fetch_chromosome_connections {
409 my $dbh = shift;
410 my $map = shift; # CXGN::Map object
411 my $chr_nr = shift;
413 my $query =
415 SELECT
416 c_map_version.map_version_id,
417 c_map.short_name,
418 c_linkage_group.lg_name,
419 count(distinct(marker.marker_id)) as marker_count
420 from
421 marker
422 join marker_experiment using(marker_id)
423 join marker_location using (location_id)
424 join linkage_group on (marker_location.lg_id=linkage_group.lg_id)
425 join map_version on (linkage_group.map_version_id=map_version.map_version_id)
427 join marker_experiment as c_marker_experiment on
428 (marker.marker_id=c_marker_experiment.marker_id)
429 join marker_location as c_marker_location on
430 (c_marker_experiment.location_id=c_marker_location.location_id)
431 join linkage_group as c_linkage_group on (c_marker_location.lg_id=c_linkage_group.lg_id)
432 join map_version as c_map_version on
433 (c_linkage_group.map_version_id=c_map_version.map_version_id)
434 join map as c_map on (c_map.map_id=c_map_version.map_id)
435 where
436 map_version.map_version_id=?
437 and linkage_group.lg_name=?
438 and c_map_version.map_version_id !=map_version.map_version_id
439 and c_map_version.current_version='t'
440 group by
441 c_map_version.map_version_id,
442 c_linkage_group.lg_name,
443 c_map.short_name
444 order by
445 marker_count desc
448 my $sth = $dbh -> prepare($query);
449 $sth -> execute($map->map_version_id(), $chr_nr);
450 my @chr_list = ();
452 #print STDERR "***************** Done with query..\n";
453 while (my $hashref = $sth->fetchrow_hashref()) {
454 #print STDERR "READING----> $hashref->{map_version_id} $hashref->{chr_nr} $hashref->{marker_count}\n";
455 push @chr_list, $hashref;
458 return @chr_list;
461 =head2 fetch_available_maps
463 Usage:
464 Desc:
465 Ret:
466 Args:
467 Side Effects:
468 Example:
470 =cut
472 sub fetch_available_maps {
473 my $dbh = shift;
475 my $query = "SELECT map_version.map_version_id, short_name
476 FROM map_version JOIN map using (map_id)
477 WHERE current_version='t' order by short_name";
478 my $sth = $dbh -> prepare($query);
479 $sth -> execute();
481 my @maps = ();
482 while (my $map_ref = $sth -> fetchrow_hashref()) {
483 push @maps, $map_ref;
485 return @maps;
488 sub fetch_physical {
489 my $dbh = shift;
490 my $physical = shift;
491 my $map = shift; # CXGN::Map object
492 my $chr_nr = shift;
494 my $query =
496 SELECT
497 distinct(physical.bacs.bac_id),
498 marker_experiment.marker_id,
499 position
500 FROM
501 map_version
502 inner join linkage_group using (map_version_id)
503 inner join marker_location using (lg_id)
504 inner join marker_experiment using (location_id)
505 inner join physical.probe_markers using (marker_id)
506 inner join physical.overgo_associations using (overgo_probe_id)
507 inner join physical.bacs using (bac_id)
508 inner join physical.oa_plausibility using (overgo_assoc_id)
509 where
510 map_version.map_version_id=?
511 and lg_name=?
512 and current_version='t'
513 and physical.oa_plausibility.plausible=1
516 # print STDERR "Query: $query\n";
517 my $sth = $dbh->prepare($query);
518 $sth -> execute($map->map_version_id, $chr_nr);
519 while (my ($bac_id, $marker_id, $offset) = $sth->fetchrow_array()) {
520 #print STDERR "Physical: Marker Read: $bac_id\t$marker_id\t$offset\n";
521 $physical -> add_bac_association($offset, $bac_id, "overgo");
524 my $sgn = $dbh -> qualify_schema("sgn");
525 my $computational_query = "
526 SELECT distinct(physical.computational_associations.clone_id),
527 physical.computational_associations.marker_id,
528 marker_location.position
529 FROM physical.computational_associations
530 JOIN $sgn.marker_experiment using(marker_id)
531 JOIN $sgn.marker_location using (location_id)
532 JOIN $sgn.linkage_group using (lg_id)
533 JOIN $sgn.map_version on (map_version.map_version_id=linkage_group.map_version_id)
534 WHERE map_version.map_version_id=?
535 AND linkage_group.lg_name=?
536 ORDER BY marker_location.position
539 my $cq_sth = $dbh->prepare($computational_query);
540 $cq_sth->execute($map->map_version_id(), $chr_nr);
542 while (my ($clone_id, $marker_id, $offset)=$cq_sth->fetchrow_array()) {
543 $physical -> add_bac_association($offset, $clone_id, "computational");
550 sub fetch_IL {
551 my $IL = shift;
552 my $IL_name = shift;
553 my $chr_nr = shift;
555 my %marker_pos = ();
556 my @m2 = $IL -> get_markers();
557 foreach my $m (@m2) {
558 $marker_pos{$m->get_name()} = $m-> get_offset();
561 my $data_folder= $config->get_conf('basepath') . $config->get_conf('documents_subdir');
563 open (F, '<', "$data_folder/cview/IL_defs/$IL_name".".txt") || die "Can't open IL file IL_defs/$IL_name $!\n";
565 while (<F>) {
566 chomp;
567 my ($chromosome, $name, $start_marker, $end_marker) = split/\t/;
568 if (/^\#/) { next; }
569 if ($chr_nr == $chromosome) {
570 $start_marker =~ s/^\s+(.*)/$1/;
571 $start_marker =~ s/(.*)\s+/$1/;
572 $end_marker =~ s/^\s+(.*)/$1/;
573 $end_marker =~ s/(.*)\s+/$1/;
574 if (exists($marker_pos{$start_marker}) && exists($marker_pos{$end_marker})) {
575 if ($name=~ /^\d+\-\w+/) {
576 $IL -> add_section($name, $start_marker, $marker_pos{$start_marker}, $end_marker, $marker_pos{$end_marker});
578 elsif ($name =~/IL/i) {
579 $IL -> add_fragment($name, $start_marker, $marker_pos{$start_marker},$end_marker,$marker_pos{$end_marker});
582 else { print STDERR "$start_marker or $end_marker where not found.\n";}
585 close(F);
588 # POD Documentation
590 =head2 fetch_pachytene_idiogram(pachytene_object, chromosome_number)
592 for a pachytene object, fetches the definition data from a file. The tab delimited file has the following columns:
594 chromosome number
595 feature type ( can be short_arm, telomere, centromere, )
596 feature start (a signed integer, negative numbers are on the short (top) arm, in arbitrary units, positive values on the long arm.
597 feature end
599 Comment lines starting with # are ignored.
601 =cut
604 sub fetch_pachytene_idiogram {
605 my $pachytene_object = shift;
606 my $chromosome_number = shift;
608 #print STDERR "Fetching $chromosome_number pachytene\n";
609 my $vhost_conf=CXGN::Cview::Config->new();
610 my $data_folder=$vhost_conf->get_conf('basepath').$vhost_conf->get_conf('documents_subdir');
611 open (F, '<', "<$data_folder/cview/pachytene/pachytene_tomato_dapi.txt") || die "Can't open pachytene def file $!";
613 while (<F>) {
614 chomp;
616 my ($chr, $type, $start, $end) = split/\t/;
618 # skip comment lines.
619 if (/^\#/) { next(); }
621 if ($chr == $chromosome_number) {
622 #print STDERR "Adding feature $type ($start, $end)\n";
623 $pachytene_object -> add_feature($type, $start, $end);