1 # fpc.pm,v 1.2.2.1 2005/10/09 15:16:27 jason Exp
3 # BioPerl module for Bio::MapIO::fpc
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Gaurav Gupta <gaurav@genome.arizona.edu>
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
17 Bio::MapIO::fpc - A FPC Map reader
21 # do not use this object directly it is accessed through the Bio::MapIO system
25 -format : specifies the format of the file format is "fpc",
26 -file : specifies the name of the .fpc file
27 -readcor : boolean argument, indicating if .cor is to be read
28 or not. It looks for the .cor file in the same path
30 0 : doesn't read .cor file
31 1 : reads the .cor file
33 -verbose : indicates the process of loading of fpc file
34 my $mapio = Bio::MapIO->new(-format => "fpc",
39 my $map = $mapio->next_map();
41 foreach my $marker ( $map->each_markerid() ) {
42 # loop through the markers associated with the map
43 # likewise for contigs, clones, etc.
49 This object contains code for parsing and processing FPC files and creating
50 L<Bio::Map::Physical> object from it.
52 For faster access and better optimization, the data is stored internally in
53 hashes. The corresponding objects are created on request.
55 We handle reading of the FPC ourselves, since MapIO module of Bioperl adds
60 # Let the code begin...
62 package Bio
::MapIO
::fpc
;
66 use Bio
::Map
::Physical
;
69 use Bio
::Map
::FPCMarker
;
72 use base
qw(Bio::MapIO);
81 Usage : called implicitly
82 Function: calls the SUPER::_initialize
84 Args : species, readcor
89 my ($self,@args) = @_;
91 $self->SUPER::_initialize
(@args);
92 ($species,$_readcor) = $self->_rearrange([qw(SPECIES READCOR)], @args);
93 $_readcor = 0 unless (defined($_readcor));
98 These methods let you get and set the member variables
103 Usage : my $fpcmap = $mapio->next_map();
104 Function: gets the fpcmap from MapIO
105 Returns : object of type L<Bio::Map::MapI>
115 my ($name,$fpcver,$moddate,$moduser,$contigcnt,$clonecnt,$markerscnt,
116 $bandcnt,$marker,$seqclone);
117 my ($corfile,$corindex,$BUFFER);
120 my ($contig, $contigNumber);
129 my $map = Bio
::Map
::Physical
->new('-units' => 'CB',
130 '-type' => 'physical');
132 my $filename = $self->file();
133 my $fh = $self->{'_filehandle'};
135 if (defined($_readcor)) {
136 $map->core_exists($_readcor);
139 $map->core_exists(0);
142 if ($map->core_exists()) {
143 $corfile = substr($filename,0,length($filename)-3)."cor";
144 if (open my $CORE, '<', $corfile) {
145 while( read($CORE, $BUFFER, 2) ) {
146 push @cordata, unpack('n*', $BUFFER);
150 $map->core_exists(0);
154 ## Read in the header
155 while (defined($line = <$fh>)) {
158 if ($line =~ m{^//\s+fpc\s+project\s+(.+)}) { $map->name($1); }
159 if ($line =~ m{^//\s+([\d.]+)}) {
161 $version =~ /((\d+)\.(\d+))(.*)/;
163 if ($line =~ /User:\s+(.+)/) { $map->modification_user($1); }
166 if ($line =~ m{^//\s+Framework\s+(\w+)\s+(\w+)\s+([-\w]+)\s+(\w+)\s+(\w+)\s+(.+)$})
168 $map->group_type($3) if ($2 eq "Label");
169 $map->group_abbr($5) if ($4 eq "Abbrev");
172 last unless ($line =~ m{^//});
175 if (!defined($map->group_type()) || !defined($map->group_abbr()) ) {
176 $map->group_type("Chromosome");
177 $map->group_abbr("Chr");
180 $_contigs{0}{'range'}{'end'} = 0;
181 $_contigs{0}{'range'}{'start'} = 0;
183 ## Read in the clone data
184 while (defined($line = <$fh>)) {
191 my (@amatch,@pmatch,@ematch);
195 last if ($line =~ /^Markerdata/);
198 $line =~ /^(\w+)\s+:\s+"(.+)"/;
200 ## these will be set if we did find the clone line
201 ($type, $name) = ($1, $2);
203 if ($name =~ /sd1/) {
207 $_clones{$name}{'type'} = $type;
208 $_clones{$name}{'contig'} = 0;
209 $_contigs{'0'}{'clones'}{$name} = 0;
213 ## Loop through the following lines, getting attributes for clone
214 while (defined($line = <$fh>) && $line !~ /^\s*\n$/) {
216 if ($line =~ /^Map "ctg(\d+)" Ends (Left|Right) ([-\d]+)/) {
217 $_clones{$name}{'contig'} = $1;
218 $_contigs{$1}{'clones'}{$name} = 0;
220 delete($_contigs{'0'}{'clones'}{$name});
225 $line =~ /^Map "ctg(\d+)" Ends (Left|Right) ([\d]+)/;
226 $_clones{$name}{'range'}{'start'} = $temp;
228 $_contigs{$contigNumber}{'range'}{'start'} = $temp
229 if (!exists($_contigs{$contigNumber}{'range'}{'start'})
230 || $_contigs{$contigNumber}{'range'}{'start'}
233 $_clones{$name}{'range'}{'end'} = $3;
235 $_contigs{$contigNumber}{'range'}{'end'} = $3
236 if (!exists($_contigs{$contigNumber}{'range'}{'end'})
237 || $_contigs{$contigNumber}{'range'}{'end'} < $3 );
240 elsif ($line =~ /^([a-zA-Z]+)_match_to_\w+\s+"(.+)"/) {
241 my $matchtype = "match" . lc(substr($1, 0, 1));
242 $_clones{$name}{$matchtype}{$2} = 0;
244 elsif ($line =~ /^Positive_(\w+)\s+"(.+)"/) {
245 $_clones{$name}{'markers'}{$2} = 0;
246 $_markers{$2}{'clones'}{$name} = 0;
247 $_markers{$2}{'type'} = $1;
248 $_markers{$2}{'contigs'}{$contigNumber} = 0;
249 $_contigs{$contigNumber}{'markers'}{$2} = 0;
251 elsif ($line =~ /^Bands\s+(\d+)\s+(\d+)/ && !$bandsread) {
256 if ($map->core_exists()) {
258 push(@numbands,$cordata[($1-1)+$i]);
261 $_clones{$name}{'bands'} = \
@numbands;
264 push(@numbands,$1,$2);
265 $_clones{$name}{'bands'} = \
@numbands;
267 if (exists($_contigs{0}{'clones'}{$name})) {
268 $_clones{$name}{'range'}{'start'} = $ctgzeropos;
269 $_clones{$name}{'range'}{'end'} = $ctgzeropos + $2;
270 $_contigs{0}{'range'}{'end'} = $ctgzeropos + $2;
274 elsif ($line =~ /^Gel_number\s+(.+)/) {
275 $_clones{$name}{'gel'} = $1;
277 elsif ($line =~ /^Remark\s+"(.+)"/) {
278 $_clones{$name}{'remark'} .= $1;
279 $_clones{$name}{'remark'} .= "\n";
280 if($seqclone == 1 ) {
281 if( $1 =~ /\,\s+Chr(\d+)\s+/){
282 $_clones{$name}{'group'} = $1;
286 elsif ($line =~ /^Fp_number\s+"(.+)"/) {
287 $_clones{$name}{'fp_number'} = $1;
289 elsif ($line =~ /^Shotgun\s+(\w+)\s+(\w+)/) {
290 $_clones{$name}{'sequence_type'} = $1;
291 $_clones{$name}{'sequence_status'} = $2;
293 elsif ($line =~ /^Fpc_remark\s+"(.+)"/) {
294 $_clones{$name}{'fpc_remark'} .= $1;
295 $_clones{$name}{'fpc_remark'} .= "\n";
300 print "Adding clone $curClone...\n\r"
301 if ($self->verbose() && $curClone % 1000 == 0);
304 $map->_setCloneRef(\
%_clones);
307 while (defined($line = <$fh>) && $line !~ /Contigdata/) {
310 last if ($line !~ /^Marker_(\w+)\s+:\s+"(.+)"/);
312 ($type, $name) = ($1, $2);
314 $_markers{$name}{'type'} = $type;
315 $_markers{$name}{'group'} = 0;
316 $_markers{$name}{'global'} = 0;
317 $_markers{$name}{'anchor'} = 0;
319 while (defined($line = <$fh>) && $line !~ /^\s*\n$/) {
320 if ($line =~ /^Global_position\s+([\d.]+)\s*(Frame)?/) {
321 my $position = $1 - floor
($1/1000)*1000;
322 $position = sprintf("%.2f",$position);
324 $_markers{$name}{'global'} = $position;
325 $_markers{$name}{'group'} = floor
($1/1000);
326 $_markers{$name}{'anchor'} = 1;
329 $_markers{$name}{'framework'} = 1;
332 $_markers{$name}{'framework'} = 0;
335 elsif ($line =~ /^Anchor_bin\s+"([\w\d.]+)"/) {
337 my $grptype = $map->group_type();
339 $grpmatch =~ /(\d+|\w)(.*)/;
341 my ($group,$subgroup);
345 $subgroup = substr($subgroup,1) if ($subgroup =~ /^\./);
347 $_markers{$name}{'group'} = $group;
348 $_markers{$name}{'subgroup'} = $subgroup;
350 elsif ($line =~ /^Anchor_pos\s+([\d.]+)\s+(F|P)?/){
351 $_markers{$name}{'global'} = $1;
352 $_markers{$name}{'anchor'} = 1;
355 $_markers{$name}{'framework'} = 1;
358 $_markers{$name}{'framework'} = 0;
361 elsif ($line =~ /^anchor$/) {
362 $_markers{$name}{'anchor'} = 1;
364 elsif ($line =~ /^Remark\s+"(.+)"/) {
365 $_markers{$name}{'remark'} .= $1;
366 $_markers{$name}{'remark'} .= "\n";
370 print "Adding Marker $curMarker...\n"
371 if ($self->verbose() && $curMarker % 1000 == 0);
374 $map->_setMarkerRef(\
%_markers);
377 my $grpabbr = $map->group_abbr();
380 $_contigs{0}{'group'} = 0;
382 while (defined($line = <$fh>)) {
384 if ($line =~ /^Ctg(\d+)/) {
386 $_contigs{$ctgname}{'group'} = 0;
387 $_contigs{$ctgname}{'anchor'} = 0;
388 $_contigs{$ctgname}{'position'} = 0;
390 if ($line =~ /#\w*(.*)\w*$/) {
391 $_contigs{$ctgname}{'remark'} = $1;
392 if ($line =~ /#\s+Chr(\d+)\s+/) {
393 $_contigs{$ctgname}{'group'} = $1;
394 $_contigs{$ctgname}{'anchor'} = 1;
398 elsif ($line =~ /^Chr_remark\s+"(-|\+|Chr(\d+))\s+(.+)"$/) {
400 $_contigs{$ctgname}{'anchor'} = 1;
401 $_contigs{$ctgname}{'chr_remark'} = $3 if(defined($3));
404 $_contigs{$ctgname}{'group'} = $2;
407 $_contigs{$ctgname}{'group'} = "?";
410 elsif ($line =~ /^User_remark\s+"(.+)"/) {
411 $_contigs{$ctgname}{'usr_remark'} = $1;
413 elsif ($line =~ /^Trace_remark\s+"(.+)"/) {
414 $_contigs{$ctgname}{'trace_remark'} = $1;
416 elsif ($grpabbr && $line =~ /^Chr_remark\s+"(\W|$grpabbr((\d+)|(\w+)|([.\w\d]+)))\s*(\{(.*)\}|\[(.*)\])?"\s+(Pos\s+((\d.)+|NaN))(NOEDIT)?/)
422 print "Warning: Nan encountered for Contig position \n";
424 $_contigs{$ctgname}{'chr_remark'} = $6;
425 $_contigs{$ctgname}{'position'} = $pos;
426 $_contigs{$ctgname}{'subgroup'} = 0;
428 if (defined($grpmatch)) {
429 $_contigs{$ctgname}{'anchor'} = 1;
431 if ($grpmatch =~ /((\d+)((\D\d.\d+)|(.\d+)))|((\w+)(\.\d+))/) {
433 my ($group,$subgroup);
434 $group = $2 if($grpabbr eq "Chr");
435 $subgroup = $3 if($grpabbr eq "Chr");
437 $group = $7 if($grpabbr eq "Lg");
438 $subgroup = $8 if($grpabbr eq "Lg");
440 $subgroup = substr($subgroup,1) if ($subgroup =~ /^\./);
441 $_contigs{$ctgname}{'group'} = $group;
442 $_contigs{$ctgname}{'subgroup'} = $subgroup;
446 $_contigs{$ctgname}{'group'} = $grpmatch;
450 $_contigs{$ctgname}{'anchor'} = 1;
451 $_contigs{$ctgname}{'group'} = "?";
455 print "Adding Contig $curContig...\n"
456 if ($self->verbose() && $curContig % 100 == 0);
459 $map->_setContigRef(\
%_contigs);
460 $map->_calc_markerposition();
461 $map->_calc_contigposition() if ($map->version() < 7.0);
462 $map->_calc_contiggroup() if ($map->version() == 4.6);
471 Usage : $mapio->write_map($map);
472 Function: Write a map out
474 Args : Bio::Map::MapI
479 my ($self,@args) = @_;
480 $self->throw_not_implemented();
489 User feedback is an integral part of the evolution of this and other
490 Bioperl modules. Send your comments and suggestions preferably to
491 the Bioperl mailing list. Your participation is much appreciated.
493 bioperl-l@bioperl.org - General discussion
494 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
498 Please direct usage questions or support issues to the mailing list:
500 I<bioperl-l@bioperl.org>
502 rather than to the module maintainer directly. Many experienced and
503 reponsive experts will be able look at the problem and quickly
504 address it. Please include a thorough description of the problem
505 with code and data examples if at all possible.
507 =head2 Reporting Bugs
509 Report bugs to the Bioperl bug tracking system to help us keep track
510 of the bugs and their resolution. Bug reports can be submitted via the
513 https://github.com/bioperl/bioperl-live/issues
515 =head1 AUTHOR - Gaurav Gupta
517 Email gaurav@genome.arizona.edu
519 =head1 PROJECT LEADERS
521 Jamie Hatfield jamie@genome.arizona.edu
523 Dr. Cari Soderlund cari@genome.arizona.edu
525 =head1 PROJECT DESCRIPTION
527 The project was done in Arizona Genomics Computational Laboratory
528 (AGCoL) at University of Arizona.
530 This work was funded by USDA-IFAFS grant #11180 titled "Web Resources
531 for the Computation and Display of Physical Mapping Data".
533 For more information on this project, please refer:
534 http://www.genome.arizona.edu
538 The rest of the documentation details each of the object methods.
539 Internal methods are usually preceded with a _