Bio::DB::TFBS namespace has been moved to its own distribution named after itself
[bioperl-live.git] / Bio / MapIO / fpc.pm
blob915cf59d53d7598356aa81fb3dcd0bb647d4576a
1 # fpc.pm,v 1.2.2.1 2005/10/09 15:16:27 jason Exp
3 # BioPerl module for Bio::MapIO::fpc
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Gaurav Gupta <gaurav@genome.arizona.edu>
9 # Copyright AGCoL
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::MapIO::fpc - A FPC Map reader
19 =head1 SYNOPSIS
21 # do not use this object directly it is accessed through the Bio::MapIO system
23 use Bio::MapIO;
25 -format : specifies the format of the file format is "fpc",
26 -file : specifies the name of the .fpc file
27 -readcor : boolean argument, indicating if .cor is to be read
28 or not. It looks for the .cor file in the same path
29 as .fpc file.
30 0 : doesn't read .cor file
31 1 : reads the .cor file
32 [default 0]
33 -verbose : indicates the process of loading of fpc file
34 my $mapio = Bio::MapIO->new(-format => "fpc",
35 -file => "rice.fpc",
36 -readcor => 0,
37 -verbose => 0);
39 my $map = $mapio->next_map();
41 foreach my $marker ( $map->each_markerid() ) {
42 # loop through the markers associated with the map
43 # likewise for contigs, clones, etc.
47 =head1 DESCRIPTION
49 This object contains code for parsing and processing FPC files and creating
50 L<Bio::Map::Physical> object from it.
52 For faster access and better optimization, the data is stored internally in
53 hashes. The corresponding objects are created on request.
55 We handle reading of the FPC ourselves, since MapIO module of Bioperl adds
56 too much overhead.
58 =cut
60 # Let the code begin...
62 package Bio::MapIO::fpc;
63 use strict;
64 use POSIX;
66 use Bio::Map::Physical;
67 use Bio::Map::Clone;
68 use Bio::Map::Contig;
69 use Bio::Map::FPCMarker;
70 use Bio::Range;
72 use base qw(Bio::MapIO);
74 my $_readcor;
76 =head1 Initializer
78 =head2 _initialize
80 Title : _initialize
81 Usage : called implicitly
82 Function: calls the SUPER::_initialize
83 Returns : nothing
84 Args : species, readcor
86 =cut
88 sub _initialize{
89 my ($self,@args) = @_;
90 my $species;
91 $self->SUPER::_initialize(@args);
92 ($species,$_readcor) = $self->_rearrange([qw(SPECIES READCOR)], @args);
93 $_readcor = 0 unless (defined($_readcor));
96 =head1 Access Methods
98 These methods let you get and set the member variables
100 =head2 next_map
102 Title : next_map
103 Usage : my $fpcmap = $mapio->next_map();
104 Function: gets the fpcmap from MapIO
105 Returns : object of type L<Bio::Map::MapI>
106 Args : none
108 =cut
110 sub next_map{
112 my ($self) = @_;
114 my $line;
115 my ($name,$fpcver,$moddate,$moduser,$contigcnt,$clonecnt,$markerscnt,
116 $bandcnt,$marker,$seqclone);
117 my ($corfile,$corindex,$BUFFER);
118 my @cordata;
119 my %fpcmarker;
120 my ($contig, $contigNumber);
121 my $curClone = 0;
122 my $curMarker = 0;
123 my $curContig = 0;
124 my %_clones;
125 my %_markers;
126 my %_contigs;
127 my $ctgzeropos = 1;
129 my $map = Bio::Map::Physical->new('-units' => 'CB',
130 '-type' => 'physical');
132 my $filename = $self->file();
133 my $fh = $self->{'_filehandle'};
135 if (defined($_readcor)) {
136 $map->core_exists($_readcor);
138 else {
139 $map->core_exists(0);
142 if ($map->core_exists()) {
143 $corfile = substr($filename,0,length($filename)-3)."cor";
144 if (open my $CORE, '<', $corfile) {
145 while( read($CORE, $BUFFER, 2) ) {
146 push @cordata, unpack('n*', $BUFFER);
149 else {
150 $map->core_exists(0);
154 ## Read in the header
155 while (defined($line = <$fh>)) {
156 chomp($line);
158 if ($line =~ m{^//\s+fpc\s+project\s+(.+)}) { $map->name($1); }
159 if ($line =~ m{^//\s+([\d.]+)}) {
160 my $version = $1;
161 $version =~ /((\d+)\.(\d+))(.*)/;
162 $map->version($1);
163 if ($line =~ /User:\s+(.+)/) { $map->modification_user($1); }
166 if ($line =~ m{^//\s+Framework\s+(\w+)\s+(\w+)\s+([-\w]+)\s+(\w+)\s+(\w+)\s+(.+)$})
168 $map->group_type($3) if ($2 eq "Label");
169 $map->group_abbr($5) if ($4 eq "Abbrev");
172 last unless ($line =~ m{^//});
175 if (!defined($map->group_type()) || !defined($map->group_abbr()) ) {
176 $map->group_type("Chromosome");
177 $map->group_abbr("Chr");
180 $_contigs{0}{'range'}{'end'} = 0;
181 $_contigs{0}{'range'}{'start'} = 0;
183 ## Read in the clone data
184 while (defined($line = <$fh>)) {
185 $marker = 0;
186 $contig = 0;
187 $seqclone = 0;
188 $contigNumber = 0;
190 my ($type,$name);
191 my (@amatch,@pmatch,@ematch);
193 my $bandsread = 0;
195 last if ($line =~ /^Markerdata/);
198 $line =~ /^(\w+)\s+:\s+"(.+)"/;
200 ## these will be set if we did find the clone line
201 ($type, $name) = ($1, $2);
203 if ($name =~ /sd1/) {
204 $seqclone = 1;
207 $_clones{$name}{'type'} = $type;
208 $_clones{$name}{'contig'} = 0;
209 $_contigs{'0'}{'clones'}{$name} = 0;
211 my $temp;
213 ## Loop through the following lines, getting attributes for clone
214 while (defined($line = <$fh>) && $line !~ /^\s*\n$/) {
216 if ($line =~ /^Map "ctg(\d+)" Ends (Left|Right) ([-\d]+)/) {
217 $_clones{$name}{'contig'} = $1;
218 $_contigs{$1}{'clones'}{$name} = 0;
220 delete($_contigs{'0'}{'clones'}{$name});
222 $temp = $3;
223 $contigNumber = $1;
224 $line = <$fh>;
225 $line =~ /^Map "ctg(\d+)" Ends (Left|Right) ([\d]+)/;
226 $_clones{$name}{'range'}{'start'} = $temp;
228 $_contigs{$contigNumber}{'range'}{'start'} = $temp
229 if (!exists($_contigs{$contigNumber}{'range'}{'start'})
230 || $_contigs{$contigNumber}{'range'}{'start'}
231 > $temp );
233 $_clones{$name}{'range'}{'end'} = $3;
235 $_contigs{$contigNumber}{'range'}{'end'} = $3
236 if (!exists($_contigs{$contigNumber}{'range'}{'end'})
237 || $_contigs{$contigNumber}{'range'}{'end'} < $3 );
240 elsif ($line =~ /^([a-zA-Z]+)_match_to_\w+\s+"(.+)"/) {
241 my $matchtype = "match" . lc(substr($1, 0, 1));
242 $_clones{$name}{$matchtype}{$2} = 0;
244 elsif ($line =~ /^Positive_(\w+)\s+"(.+)"/) {
245 $_clones{$name}{'markers'}{$2} = 0;
246 $_markers{$2}{'clones'}{$name} = 0;
247 $_markers{$2}{'type'} = $1;
248 $_markers{$2}{'contigs'}{$contigNumber} = 0;
249 $_contigs{$contigNumber}{'markers'}{$2} = 0;
251 elsif ($line =~ /^Bands\s+(\d+)\s+(\d+)/ && !$bandsread) {
252 my $i = 0;
253 my @numbands;
254 $bandsread = 1;
256 if ($map->core_exists()) {
257 while($i<$2){
258 push(@numbands,$cordata[($1-1)+$i]);
259 $i++;
261 $_clones{$name}{'bands'} = \@numbands;
263 else {
264 push(@numbands,$1,$2);
265 $_clones{$name}{'bands'} = \@numbands;
267 if (exists($_contigs{0}{'clones'}{$name})) {
268 $_clones{$name}{'range'}{'start'} = $ctgzeropos;
269 $_clones{$name}{'range'}{'end'} = $ctgzeropos + $2;
270 $_contigs{0}{'range'}{'end'} = $ctgzeropos + $2;
271 $ctgzeropos += $2;
274 elsif ($line =~ /^Gel_number\s+(.+)/) {
275 $_clones{$name}{'gel'} = $1;
277 elsif ($line =~ /^Remark\s+"(.+)"/) {
278 $_clones{$name}{'remark'} .= $1;
279 $_clones{$name}{'remark'} .= "\n";
280 if($seqclone == 1 ) {
281 if( $1 =~ /\,\s+Chr(\d+)\s+/){
282 $_clones{$name}{'group'} = $1;
286 elsif ($line =~ /^Fp_number\s+"(.+)"/) {
287 $_clones{$name}{'fp_number'} = $1;
289 elsif ($line =~ /^Shotgun\s+(\w+)\s+(\w+)/) {
290 $_clones{$name}{'sequence_type'} = $1;
291 $_clones{$name}{'sequence_status'} = $2;
293 elsif ($line =~ /^Fpc_remark\s+"(.+)"/) {
294 $_clones{$name}{'fpc_remark'} .= $1;
295 $_clones{$name}{'fpc_remark'} .= "\n";
299 $curClone++;
300 print "Adding clone $curClone...\n\r"
301 if ($self->verbose() && $curClone % 1000 == 0);
304 $map->_setCloneRef(\%_clones);
305 $line = <$fh>;
307 while (defined($line = <$fh>) && $line !~ /Contigdata/) {
308 my ($type,$name);
310 last if ($line !~ /^Marker_(\w+)\s+:\s+"(.+)"/);
312 ($type, $name) = ($1, $2);
314 $_markers{$name}{'type'} = $type;
315 $_markers{$name}{'group'} = 0;
316 $_markers{$name}{'global'} = 0;
317 $_markers{$name}{'anchor'} = 0;
319 while (defined($line = <$fh>) && $line !~ /^\s*\n$/) {
320 if ($line =~ /^Global_position\s+([\d.]+)\s*(Frame)?/) {
321 my $position = $1 - floor($1/1000)*1000;
322 $position = sprintf("%.2f",$position);
324 $_markers{$name}{'global'} = $position;
325 $_markers{$name}{'group'} = floor($1/1000);
326 $_markers{$name}{'anchor'} = 1;
328 if(defined($2)) {
329 $_markers{$name}{'framework'} = 1;
331 else {
332 $_markers{$name}{'framework'} = 0;
335 elsif ($line =~ /^Anchor_bin\s+"([\w\d.]+)"/) {
336 my $grpmatch = $1;
337 my $grptype = $map->group_type();
339 $grpmatch =~ /(\d+|\w)(.*)/;
341 my ($group,$subgroup);
342 $group = $1;
343 $subgroup = $2;
345 $subgroup = substr($subgroup,1) if ($subgroup =~ /^\./);
347 $_markers{$name}{'group'} = $group;
348 $_markers{$name}{'subgroup'} = $subgroup;
350 elsif ($line =~ /^Anchor_pos\s+([\d.]+)\s+(F|P)?/){
351 $_markers{$name}{'global'} = $1;
352 $_markers{$name}{'anchor'} = 1;
354 if ($2 eq 'F') {
355 $_markers{$name}{'framework'} = 1;
357 else {
358 $_markers{$name}{'framework'} = 0;
361 elsif ($line =~ /^anchor$/) {
362 $_markers{$name}{'anchor'} = 1;
364 elsif ($line =~ /^Remark\s+"(.+)"/) {
365 $_markers{$name}{'remark'} .= $1;
366 $_markers{$name}{'remark'} .= "\n";
369 $curMarker++;
370 print "Adding Marker $curMarker...\n"
371 if ($self->verbose() && $curMarker % 1000 == 0);
374 $map->_setMarkerRef(\%_markers);
376 my $ctgname;
377 my $grpabbr = $map->group_abbr();
378 my $chr_remark;
380 $_contigs{0}{'group'} = 0;
382 while (defined($line = <$fh>)) {
384 if ($line =~ /^Ctg(\d+)/) {
385 $ctgname = $1;
386 $_contigs{$ctgname}{'group'} = 0;
387 $_contigs{$ctgname}{'anchor'} = 0;
388 $_contigs{$ctgname}{'position'} = 0;
390 if ($line =~ /#\w*(.*)\w*$/) {
391 $_contigs{$ctgname}{'remark'} = $1;
392 if ($line =~ /#\s+Chr(\d+)\s+/) {
393 $_contigs{$ctgname}{'group'} = $1;
394 $_contigs{$ctgname}{'anchor'} = 1;
398 elsif ($line =~ /^Chr_remark\s+"(-|\+|Chr(\d+))\s+(.+)"$/) {
400 $_contigs{$ctgname}{'anchor'} = 1;
401 $_contigs{$ctgname}{'chr_remark'} = $3 if(defined($3));
403 if (defined($2)) {
404 $_contigs{$ctgname}{'group'} = $2;
406 else {
407 $_contigs{$ctgname}{'group'} = "?";
410 elsif ($line =~ /^User_remark\s+"(.+)"/) {
411 $_contigs{$ctgname}{'usr_remark'} = $1;
413 elsif ($line =~ /^Trace_remark\s+"(.+)"/) {
414 $_contigs{$ctgname}{'trace_remark'} = $1;
416 elsif ($grpabbr && $line =~ /^Chr_remark\s+"(\W|$grpabbr((\d+)|(\w+)|([.\w\d]+)))\s*(\{(.*)\}|\[(.*)\])?"\s+(Pos\s+((\d.)+|NaN))(NOEDIT)?/)
418 my $grpmatch = $2;
419 my $pos = $10;
420 if ($pos eq "NaN") {
421 $pos = 0;
422 print "Warning: Nan encountered for Contig position \n";
424 $_contigs{$ctgname}{'chr_remark'} = $6;
425 $_contigs{$ctgname}{'position'} = $pos;
426 $_contigs{$ctgname}{'subgroup'} = 0;
428 if (defined($grpmatch)) {
429 $_contigs{$ctgname}{'anchor'} = 1;
431 if ($grpmatch =~ /((\d+)((\D\d.\d+)|(.\d+)))|((\w+)(\.\d+))/) {
433 my ($group,$subgroup);
434 $group = $2 if($grpabbr eq "Chr");
435 $subgroup = $3 if($grpabbr eq "Chr");
437 $group = $7 if($grpabbr eq "Lg");
438 $subgroup = $8 if($grpabbr eq "Lg");
440 $subgroup = substr($subgroup,1) if ($subgroup =~ /^\./);
441 $_contigs{$ctgname}{'group'} = $group;
442 $_contigs{$ctgname}{'subgroup'} = $subgroup;
445 else {
446 $_contigs{$ctgname}{'group'} = $grpmatch;
449 else {
450 $_contigs{$ctgname}{'anchor'} = 1;
451 $_contigs{$ctgname}{'group'} = "?";
454 $curContig++;
455 print "Adding Contig $curContig...\n"
456 if ($self->verbose() && $curContig % 100 == 0);
459 $map->_setContigRef(\%_contigs);
460 $map->_calc_markerposition();
461 $map->_calc_contigposition() if ($map->version() < 7.0);
462 $map->_calc_contiggroup() if ($map->version() == 4.6);
464 return $map;
468 =head2 write_map
470 Title : write_map
471 Usage : $mapio->write_map($map);
472 Function: Write a map out
473 Returns : none
474 Args : Bio::Map::MapI
476 =cut
478 sub write_map{
479 my ($self,@args) = @_;
480 $self->throw_not_implemented();
485 =head1 FEEDBACK
487 =head2 Mailing Lists
489 User feedback is an integral part of the evolution of this and other
490 Bioperl modules. Send your comments and suggestions preferably to
491 the Bioperl mailing list. Your participation is much appreciated.
493 bioperl-l@bioperl.org - General discussion
494 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
496 =head2 Support
498 Please direct usage questions or support issues to the mailing list:
500 I<bioperl-l@bioperl.org>
502 rather than to the module maintainer directly. Many experienced and
503 reponsive experts will be able look at the problem and quickly
504 address it. Please include a thorough description of the problem
505 with code and data examples if at all possible.
507 =head2 Reporting Bugs
509 Report bugs to the Bioperl bug tracking system to help us keep track
510 of the bugs and their resolution. Bug reports can be submitted via the
511 web:
513 https://github.com/bioperl/bioperl-live/issues
515 =head1 AUTHOR - Gaurav Gupta
517 Email gaurav@genome.arizona.edu
519 =head1 PROJECT LEADERS
521 Jamie Hatfield jamie@genome.arizona.edu
523 Dr. Cari Soderlund cari@genome.arizona.edu
525 =head1 PROJECT DESCRIPTION
527 The project was done in Arizona Genomics Computational Laboratory
528 (AGCoL) at University of Arizona.
530 This work was funded by USDA-IFAFS grant #11180 titled "Web Resources
531 for the Computation and Display of Physical Mapping Data".
533 For more information on this project, please refer:
534 http://www.genome.arizona.edu
536 =head1 APPENDIX
538 The rest of the documentation details each of the object methods.
539 Internal methods are usually preceded with a _
541 =cut