Bio/SeqIO/mbsout.pm

   1 # POD documentation - main docs before the code
   2
   3 =head1 NAME
   4
   5 Bio::SeqIO::mbsout - input stream for output by Teshima et al.'s mbs.
   6
   7 =head1 SYNOPSIS
   8
   9 Do not use this module directly.  Use it via the Bio::SeqIO class.
  10
  11 =head1 DESCRIPTION
  12
  13 mbs (Teshima KM, Innan H (2009) mbs: modifying Hudson's ms software to generate
  14 samples of DNA sequences with a biallelic site under selection. BMC
  15 Bioinformatics 10: 166 ) can be found at
  16 http://www.biomedcentral.com/1471-2105/10/166/additional/.
  17
  18 Currently this object can be used to read output from mbs into seq objects.
  19 However, because bioperl has no support for haplotypes created using an infinite
  20 sites model (where '1' identifies a derived allele and '0' identifies an
  21 ancestral allele), the sequences returned by mbsout are coded using A, T, C and
  22 G. To decode the bases, use the sequence conversion table (a hash) returned by
  23 get_base_conversion_table(). In the table, 4 and 5 are used when the ancestry is
  24 unclear. This should not ever happen when creating files with mbs, but it will
  25 be used when creating mbsOUT files from a collection of seq objects ( To be
  26 added later ). Alternatively, use get_next_hap() to get a string with 1's and
  27 0's instead of a seq object.
  28
  29 =head1 FEEDBACK
  30
  31 =head2 Mailing Lists
  32
  33 User feedback is an integral part of the evolution of this and other
  34 Bioperl modules. Send your comments and suggestions preferably to the
  35 Bioperl mailing list. Your participation is much appreciated.
  36
  37   bioperl-l@bioperl.org                  - General discussion
  38   http://bioperl.org/wiki/Mailing_lists  - About the mailing lists
  39
  40 =head2 Reporting Bugs
  41
  42 Report bugs to the Bioperl bug tracking system to help us keep track
  43 of the bugs and their resolution. Bug reports can be submitted via the
  44 web:
  45
  46   https://github.com/bioperl/bioperl-live/issues
  47
  48 =head1 AUTHOR - Warren Kretzschmar
  49
  50 This module was written by Warren Kretzschmar
  51
  52 email: wkretzsch@gmail.com
  53
  54 This module grew out of a parser written by Aida Andres.
  55
  56 =head1 COPYRIGHT
  57
  58 =head2 Public Domain Notice
  59
  60 This software/database is ``United States Government Work'' under the
  61 terms of the United States Copyright Act. It was written as part of
  62 the authors' official duties for the United States Government and thus
  63 cannot be copyrighted. This software/database is freely available to
  64 the public for use without a copyright notice. Restrictions cannot
  65 be placed on its present or future use.
  66
  67 Although all reasonable efforts have been taken to ensure the accuracy
  68 and reliability of the software and data, the National Human Genome
  69 Research Institute (NHGRI) and the U.S. Government does not and cannot
  70 warrant the performance or results that may be obtained by using this
  71 software or data.  NHGRI and the U.S. Government disclaims all
  72 warranties as to performance, merchantability or fitness for any
  73 particular purpose.
  74
  75 =head1 METHODS
  76
  77 =cut
  78
  79 package Bio::SeqIO::mbsout;
  80
  81 use version;
  82 our $API_VERSION = qv('1.1.3');
  83
  84 use strict;
  85 use base qw(Bio::SeqIO);    # This ISA Bio::SeqIO object
  86 use Bio::Seq::SeqFactory;
  87
  88 =head2 INTERNAL METHODS
  89
  90 =head3 _initialize
  91
  92 Title   : _initialize
  93 Usage   : $stream = Bio::SeqIO::mbsout->new($infile)
  94 Function: extracts basic information about the file.
  95 Returns : Bio::SeqIO object
  96 Args    : no_og
  97 Details : include 'no_og' flag = 0 if the last population of an mbsout file
  98           contains only one haplotype and you want the last haplotype to be
  99           treated as the outgroup.
 100 =cut
 101
 102 sub _initialize {
 103     my ( $self, @args ) = @_;
 104     $self->SUPER::_initialize(@args);
 105
 106     unless ( defined $self->sequence_factory ) {
 107         $self->sequence_factory( Bio::Seq::SeqFactory->new() );
 108     }
 109
 110     # Don't expect mbs to create an outgroup
 111     my ($no_og) = $self->_rearrange( [qw(NO_OG)], @args ) || 1;
 112
 113     my %initial_values = (
 114         RUNS              => undef,
 115         SEGSITES          => undef,
 116         MBS_INFO_LINE     => undef,
 117         TOT_RUN_HAPS      => undef,
 118         NEXT_RUN_NUM      => undef, # What run is the next hap from? undef = EOF
 119         LAST_READ_HAP_NUM => undef, # What did we just read from
 120         LAST_READ_POSITIONS                      => [],
 121         LAST_READ_SEGSITES                       => undef,
 122         BUFFER_HAP                               => undef,
 123         NO_OUTGROUP                              => $no_og,
 124         OPTIONS                                  => {},
 125         LAST_READ_ALLELES                        => [],
 126         LAST_READ_TRAJECTORY_FILE                => undef,
 127         LAST_READ_REPLICATION_OF_TRAJECTORY_FILE => undef,
 128         BASE_CONVERSION_TABLE_HASH_REF           => {
 129             'A' => 0,
 130             'T' => 1,
 131             'C' => 4,
 132             'G' => 5,
 133         },
 134     );
 135
 136     foreach my $key ( keys %initial_values ) {
 137         $self->{$key} = $initial_values{$key};
 138     }
 139
 140     # If the filehandle is defined open it and read a few lines
 141     if ( ref( $self->{_filehandle} ) eq 'GLOB' ) {
 142         $self->_read_start();
 143         return $self;
 144     }
 145
 146     # Otherwise throw a warning
 147     else {
 148         $self->throw(
 149 "No filehandle defined.  Please define a file handle through -file when calling mbsout with Bio::SeqIO"
 150         );
 151     }
 152 }
 153
 154 =head3 _read_start
 155
 156 Title   : _read_start
 157 Usage   : $stream->_read_start()
 158 Function: reads from the filehandle $stream->{_filehandle} all information up to the first haplotype (sequence).
 159 Returns : void
 160 Args    : none
 161
 162 =cut
 163
 164 sub _read_start {
 165     my $self = shift;
 166
 167     my $fh_IN = $self->{_filehandle};
 168
 169     # get the first five lines and parse for important info
 170     my ($mbs_info_line) = $self->_get_next_clean_hap( $fh_IN, 1, 1 );
 171
 172     my @mbs_info_line = split( /\s+/, $mbs_info_line );
 173
 174     # Parsing the mbs header line
 175     shift @mbs_info_line;
 176     shift @mbs_info_line;
 177     my $tot_run_haps = shift @mbs_info_line;
 178     my $runs;
 179
 180     # $pop_mut_param_per_site is the population mutation parameter per site.
 181     my $pop_mut_param_per_site;
 182
 183     # $pop_recomb_param_per_site is the population recombination parameter per
 184     # site.
 185     my $pop_recomb_param_per_site;
 186
 187     # $nsites is length of the simulated region.
 188     # $selpos is position of the target site of selection relative to the first
 189     # site of the simulated region.
 190     my $nsites;
 191     my $selpos;
 192
 193     # $nfile is number of trajectory files.
 194     # $nrep is number of replications for each trajectory.
 195     # $traj_filename is initial part of the name of the trajectory files.
 196     my $nfiles;
 197     my $nreps;
 198     my $traj_filename;
 199
 200     foreach my $word ( 0 .. $#mbs_info_line ) {
 201         if ( $mbs_info_line[$word] eq '-t' ) {
 202             $pop_mut_param_per_site = $mbs_info_line[ $word + 1 ];
 203         }
 204         elsif ( $mbs_info_line[$word] eq '-r' ) {
 205             $pop_recomb_param_per_site = $mbs_info_line[ $word + 1 ];
 206             $selpos                    = $mbs_info_line[ $word + 2 ];
 207         }
 208         elsif ( $mbs_info_line[$word] eq '-s' ) {
 209             $nsites = $mbs_info_line[ $word + 1 ];
 210             $selpos = $mbs_info_line[ $word + 2 ];
 211         }
 212         elsif ( $mbs_info_line[$word] eq '-f' ) {
 213             $nfiles        = $mbs_info_line[ $word + 1 ];
 214             $nreps         = $mbs_info_line[ $word + 2 ];
 215             $traj_filename = $mbs_info_line[ $word + 3 ];
 216             $runs          = $nfiles * $nreps;
 217         }
 218         else { next; }
 219     }
 220
 221     # Save mbs info data
 222     $self->{RUNS}                      = $runs;
 223     $self->{MBS_INFO_LINE}             = $mbs_info_line;
 224     $self->{TOT_RUN_HAPS}              = $tot_run_haps;
 225     $self->{POP_MUT_PARAM_PER_SITE}    = $pop_mut_param_per_site;
 226     $self->{POP_RECOMB_PARAM_PER_SITE} = $pop_recomb_param_per_site;
 227     $self->{NSITES}                    = $nsites;
 228     $self->{SELPOS}                    = $selpos;
 229     $self->{NFILES}                    = $nfiles;
 230     $self->{NREPS}                     = $nreps;
 231     $self->{TRAJ_FILENAME}             = $traj_filename;
 232 }
 233
 234 =head2 Methods to retrieve mbsout data
 235
 236 =head3 get_segsites
 237
 238 Title   : get_segsites
 239 Usage   : $segsites = $stream->get_segsites()
 240 Function: returns the number segsites in the mbsout file (according to the mbsout header line).
 241 Returns : scalar
 242 Args    : NONE
 243
 244 =cut
 245
 246 sub get_segsites {
 247     my $self = shift;
 248     if ( defined $self->{SEGSITES} ) {
 249         return $self->{SEGSITES};
 250     }
 251     else {
 252         return $self->get_current_run_segsites;
 253     }
 254 }
 255
 256 =head3 get_current_run_segsites
 257
 258 Title   : get_current_run_segsites
 259 Usage   : $segsites = $stream->get_current_run_segsites()
 260 Function: returns the number of segsites in the run of the last read haplotype (sequence).
 261 Returns : scalar
 262 Args    : NONE
 263
 264 =cut
 265
 266 sub get_current_run_segsites {
 267     my $self = shift;
 268     return $self->{LAST_READ_SEGSITES};
 269 }
 270
 271 =head3 get_pop_mut_param_per_site
 272
 273 Title   : get_pop_mut_param_per_site
 274 Usage   : $pop_mut_param_per_site = $stream->get_pop_mut_param_per_site()
 275 Function: returns 4*N0*mu or the "population mutation parameter per site"
 276 Returns : scalar
 277 Args    : NONE
 278
 279 =cut
 280
 281 sub get_pop_mut_param_per_site {
 282     my $self = shift;
 283     return $self->{POP_MUT_PARAM_PER_SITE};
 284 }
 285
 286 =head3 get_pop_recomb_param_per_site
 287
 288 Title   : get_pop_recomb_param_per_site
 289 Usage   : $pop_recomb_param_per_site = $stream->get_pop_recomb_param_per_site()
 290 Function: returns 4*N0*r or the "population recombination parameter per site"
 291 Returns : scalar
 292 Args    : NONE
 293
 294 =cut
 295
 296 sub get_pop_recomb_param_per_site {
 297     my $self = shift;
 298     return $self->{POP_RECOMB_PARAM_PER_SITE};
 299 }
 300
 301 =head3 get_nsites
 302
 303 Title   : get_nsites
 304 Usage   : $nsites = $stream->get_nsites()
 305 Function: returns the number of sites simulated by mbs.
 306 Returns : scalar
 307 Args    : NONE
 308
 309 =cut
 310
 311 sub get_nsites {
 312     my $self = shift;
 313     return $self->{NSITES};
 314 }
 315
 316 =head3 get_selpos
 317
 318 Title   : get_selpos
 319 Usage   : $selpos = $stream->get_selpos()
 320 Function: returns the location on the chromosome where the allele is located that was selected for by mbs.
 321 Returns : scalar
 322 Args    : NONE
 323
 324 =cut
 325
 326 sub get_selpos {
 327     my $self = shift;
 328     return $self->{SELPOS};
 329 }
 330
 331 =head3 get_nreps
 332
 333 Title   : get_nreps
 334 Usage   : $nreps = $stream->get_nreps()
 335 Function: returns the number replications done by mbs on each trajectory file to create the mbsout file.
 336 Returns : scalar
 337 Args    : NONE
 338
 339 =cut
 340
 341 sub get_nreps {
 342     my $self = shift;
 343     return $self->{NREPS};
 344 }
 345
 346 =head3 get_nfiles
 347
 348 Title   : get_nfiles
 349 Usage   : $nfiles = $stream->get_nfiles()
 350 Function: returns the number of trajectory files used by mbs to create the mbsout file
 351 Returns : scalar
 352 Args    : NONE
 353
 354 =cut
 355
 356 sub get_nfiles {
 357     my $self = shift;
 358     return $self->{NFILES};
 359 }
 360
 361 =head3 get_traj_filename
 362
 363 Title   : get_traj_filename
 364 Usage   : $traj_filename = $stream->get_traj_filename()
 365 Function: returns the prefix of the trajectory files used by mbs to create the mbsout file
 366 Returns : scalar
 367 Args    : NONE
 368
 369 =cut
 370
 371 sub get_traj_filename {
 372     my $self = shift;
 373     return $self->{TRAJ_FILENAME};
 374 }
 375
 376 =head3 get_runs
 377
 378 Title   : get_runs
 379 Usage   : $runs = $stream->get_runs()
 380 Function: returns the number of runs in the mbsout file
 381 Returns : scalar
 382 Args    : NONE
 383
 384 =cut
 385
 386 sub get_runs {
 387     my $self = shift;
 388     return $self->{RUNS};
 389 }
 390
 391 =head3 get_Positions
 392
 393 Title   : get_Positions
 394 Usage   : @positions = $stream->get_Positions()
 395 Function: returns an array of the names of each segsite of the run of the last read hap.
 396 Returns : array
 397 Args    : NONE
 398
 399 =cut
 400
 401 sub get_Positions {
 402     my $self = shift;
 403     return @{ $self->{LAST_READ_POSITIONS} };
 404 }
 405
 406 =head3 get_tot_run_haps
 407
 408 Title   : get_tot_run_haps
 409 Usage   : $number_of_haps_per_run = $stream->get_tot_run_haps()
 410 Function: returns the number of haplotypes (sequences) in each run of the mbsout file.
 411 Returns : scalar >= 0
 412 Args    : NONE
 413
 414 =cut
 415
 416 sub get_tot_run_haps {
 417     my $self = shift;
 418     return $self->{TOT_RUN_HAPS};
 419 }
 420
 421 =head3 get_mbs_info_line
 422
 423 Title   : get_mbs_info_line
 424 Usage   : $mbs_info_line = $stream->get_mbs_info_line()
 425 Function: returns the header line of the mbsout file.
 426 Returns : scalar
 427 Args    : NONE
 428
 429 =cut
 430
 431 sub get_mbs_info_line {
 432     my $self = shift;
 433     return $self->{MBS_INFO_LINE};
 434 }
 435
 436 =head3 tot_haps
 437
 438 Title   : tot_haps
 439 Usage   : $number_of_haplotypes_in_file = $stream->tot_haps()
 440 Function: returns the number of haplotypes (sequences) in the mbsout file.  Information gathered from mbsout header line.
 441 Returns : scalar
 442 Args    : NONE
 443
 444 =cut
 445
 446 sub get_tot_haps {
 447     my $self = shift;
 448     return ( $self->{TOT_RUN_HAPS} * $self->{RUNS} );
 449 }
 450
 451 =head3 next_run_num
 452
 453 Title   : next_run_num
 454 Usage   : $next_run_number = $stream->next_run_num()
 455 Function: returns the number of the mbs run that the next haplotype (sequence)
 456           will be taken from (starting at 1).  Returns undef if the complete
 457           file has been read.
 458 Returns : scalar > 0 or undef
 459 Args    : NONE
 460
 461 =cut
 462
 463 sub get_next_run_num {
 464     my $self = shift;
 465     return $self->{NEXT_RUN_NUM};
 466 }
 467
 468 =head3 get_last_haps_run_num
 469
 470 Title   : get_last_haps_run_num
 471 Usage   : $last_haps_run_number = $stream->get_last_haps_run_num()
 472 Function: returns the number of the ms run that the last haplotype (sequence)
 473           was taken from (starting at 1).  Returns undef if no hap has been
 474           read yet.
 475 Returns : scalar > 0 or undef
 476 Args    : NONE
 477
 478 =cut
 479
 480 sub get_last_haps_run_num {
 481     my $self = shift;
 482     return $self->{LAST_HAPS_RUN_NUM};
 483 }
 484
 485 =head3 get_last_read_hap_num
 486
 487 Title   : get_last_read_hap_num
 488 Usage   : $last_read_hap_num = $stream->get_last_read_hap_num()
 489           Function: returns the number (starting with 1) of the last haplotype
 490           read from the mbs file
 491 Returns : scalar >= 0
 492 Args    : NONE
 493 Details : 0 means that no haplotype has been read yet.
 494
 495 =cut
 496
 497 sub get_last_read_hap_num {
 498     my $self = shift;
 499     return $self->{LAST_READ_HAP_NUM};
 500 }
 501
 502 =head3 outgroup
 503
 504 Title   : outgroup
 505 Usage   : $outgroup = $stream->outgroup()
 506 Function: returns '1' if the mbsout object has an outgroup.  Returns '0'
 507           otherwise.
 508 Returns :  1 or 0, currently always 0
 509 Args    : NONE
 510 Details : This method will return '1' only if the last population in the mbsout
 511           file contains only one haplotype.  If the last population is not an
 512           outgroup then create the mbsout object using 'no_outgroup' as input
 513           parameter for new() (see mbsout->new()).
 514
 515           Currently there exists no way of introducing an outgroup into an mbs
 516           file, so this function will always return '0'.
 517
 518 =cut
 519
 520 sub outgroup {
 521     my $self = shift;
 522     if   ( $self->{NO_OUTGROUP} ) { return 0; }
 523     else                          { return 0; }
 524 }
 525
 526 =head3 get_next_seq
 527
 528 Title   : get_next_seq
 529 Usage   : $seq = $stream->get_next_seq()
 530 Function: reads and returns the next sequence (haplotype) in the stream
 531 Returns : Bio::Seq object
 532 Args    : NONE
 533 Note    : This function is included only to conform to convention.  It only
 534           calls next_hap() and passes on that method's return value.  Use
 535           next_hap() instead for better performance.
 536
 537 =cut
 538
 539 sub get_next_seq {
 540     my $self      = shift;
 541     my $seqstring = $self->get_next_hap;
 542
 543     return unless defined $seqstring;
 544
 545     # Used to create unique ID;
 546     my $run = $self->get_last_haps_run_num;
 547
 548     # Converting numbers to letters so that the haplotypes can be stored as a
 549     # seq object
 550     my $rh_base_conversion_table = $self->get_base_conversion_table;
 551     foreach my $base ( keys %{$rh_base_conversion_table} ) {
 552         $seqstring =~ s/($rh_base_conversion_table->{$base})/$base/g;
 553     }
 554
 555     my $last_read_hap = $self->get_last_read_hap_num;
 556
 557     my $id = 'Hap_' . $last_read_hap . '_Run_' . $run;
 558     my $description =
 559         'Segsites '
 560       . $self->get_current_run_segsites
 561       . "; Positions $self->positions; Haplotype "
 562       . $last_read_hap
 563       . '; Run '
 564       . $run . ';';
 565     my $seq = $self->sequence_factory->create(
 566         -seq      => $seqstring,
 567         -id       => $id,
 568         -desc     => $description,
 569         -alphabet => q(dna),
 570         -direct   => 1,
 571     );
 572
 573     return $seq;
 574
 575 }
 576
 577 =head3 get_next_hap
 578
 579 Title   : get_next_hap
 580 Usage   : $seq = $stream->get_next_hap()
 581 Function: reads and returns the next sequence (haplotype) in the stream. Returns
 582           void if all sequences in stream have been read.
 583 Returns : Bio::Seq object
 584 Args    : NONE
 585 Note    : Use this instead of get_next_seq().
 586
 587 =cut
 588
 589 sub get_next_hap {
 590     my $self = shift;
 591
 592     # Let's figure out how many haps to read from the input file so that
 593     # we get back to the beginning of the next run.
 594
 595     my $end_run = 0;
 596     if ( $self->{TOT_RUN_HAPS} == $self->{LAST_READ_HAP_NUM} + 1 ) {
 597         $end_run = 1;
 598     }
 599
 600     # Setting last_haps_run_num
 601     $self->{LAST_HAPS_RUN_NUM} = $self->get_next_run_num;
 602
 603     my $fh_IN = $self->{_filehandle};
 604
 605     my ($seqstring) =
 606       $self->_get_next_clean_hap( $self->{_filehandle}, 1, $end_run );
 607
 608     return $seqstring;
 609 }
 610
 611 =head3 get_next_run
 612
 613 Title   : get_next_run
 614 Usage   : @seqs = $stream->get_next_run()
 615 Function: reads and returns all the remaining sequences (haplotypes) in the mbs
 616           run of the next sequence.
 617 Returns : array of Bio::Seq objects
 618 Args    : NONE
 619
 620 =cut
 621
 622 sub get_next_run {
 623     my $self = shift;
 624
 625     # Let's figure out how many haps to read from the input file so that
 626     # we get back to the beginning of the next run.
 627
 628     my $haps_to_pull = $self->{TOT_RUN_HAPS} - $self->{LAST_READ_HAP_NUM};
 629
 630     # Read those haps from the input file
 631     # Next hap read will be the first hap of the next run.
 632
 633     my @seqs;
 634     for ( 1 .. $haps_to_pull ) {
 635         my $seq = $self->get_next_seq;
 636         next unless defined $seq;
 637
 638         push @seqs, $seq;
 639     }
 640
 641     return @seqs;
 642 }
 643
 644 =head2 METHODS TO RETRIEVE CONSTANTS
 645
 646 =head3 base_conversion_table
 647
 648 Title   : get_base_conversion_table
 649 Usage   : $table_hash_ref = $stream->get_base_conversion_table()
 650 Function: returns a reference to a hash.  The keys of the hash are the letters
 651           'A','T','G','C'.  The values associated with each key are the value
 652           that each letter in the sequence of a seq object returned by a
 653           Bio::SeqIO::mbsout stream should be translated to.
 654 Returns : reference to a hash
 655 Args    : NONE
 656 Synopsis:
 657
 658         # retrieve the Bio::Seq object's sequence
 659         my $haplotype = $seq->seq;
 660         my $rh_base_conversion_table = $stream->get_base_conversion_table();
 661
 662         # need to convert all letters to their corresponding numbers.
 663         foreach my $base (keys %{$rh_base_conversion_table}){
 664                 $haplotype =~ s/($base)/$rh_base_conversion_table->{$base}/g;
 665         }
 666
 667         # $haplotype is now an ms style haplotype. (e.g. '100101101455')
 668
 669 =cut
 670
 671 sub get_base_conversion_table {
 672     my $self = shift;
 673     return $self->{BASE_CONVERSION_TABLE_HASH_REF};
 674 }
 675
 676 ##############################################################################
 677 ## subs for internal use only
 678 ##############################################################################
 679
 680 sub _get_next_clean_hap {
 681
 682     #By Warren Kretzschmar
 683
 684     # return the next non-empty line from file handle (chomped line)
 685     # skipps to the next run if '//' is encountered
 686     my ( $self, $fh, $times, $end_run ) = @_;
 687     my @data;
 688
 689     unless ( defined $fh ) { return; }
 690
 691     unless ( defined $times && $times > 0 ) {
 692         $times = 1;
 693     }
 694
 695     if ( defined $self->{BUFFER_HAP} ) {
 696         push @data, $self->{BUFFER_HAP};
 697         $self->{BUFFER_HAP} = undef;
 698         $self->{LAST_READ_HAP_NUM}++;
 699         $times--;
 700     }
 701
 702     while ( 1 <= $times-- ) {
 703
 704         # Find next clean line
 705         my $data = <$fh>;
 706         last if !defined($data);
 707         chomp $data;
 708         while ( $data !~ /./ ) {
 709             $data = <$fh>;
 710             chomp $data;
 711         }
 712
 713         # If the next run is encountered here, then we have a programming
 714         # or format error
 715         if ( $data eq '//' ) { $self->throw("'//' found when not expected\n") }
 716
 717         $self->{LAST_READ_HAP_NUM}++;
 718         push @data, $data;
 719     }
 720
 721     if ($end_run) {
 722         $self->_load_run_info($fh);
 723     }
 724
 725     return (@data);
 726 }
 727
 728 sub _load_run_info {
 729
 730     my ( $self, $fh ) = @_;
 731
 732     my $data = <$fh>;
 733
 734     # In this case we are at EOF
 735     if ( !defined($data) ) { $self->{NEXT_RUN_NUM} = undef; return; }
 736
 737     chomp $data;
 738
 739     while ( $data !~ /./ ) {
 740         $data = <$fh>;
 741
 742         # In this case we are at EOF
 743         if ( !defined($data) ) { $self->{NEXT_RUN_NUM} = undef; return; }
 744         chomp $data;
 745     }
 746
 747     # If the next run is encountered, then skip to the next hap and save it in
 748     # the buffer.
 749     if ( $data =~ /^\/\// ) {
 750         $self->{NEXT_RUN_NUM}++;
 751         $self->{LAST_READ_HAP_NUM} = 0;
 752         my @data = split( /\s+/,  $data );
 753         my @temp = split( /\/\//, $data[0] );
 754         @temp = split( /-/, $temp[0] );
 755         $self->{LAST_READ_TRAJ_FILE}             = $temp[0];
 756         $self->{LAST_LEAD_TRAJ_FILE_REPLICATION} = $temp[1];
 757         $self->{LAST_READ_ALLELES}               = \@data[ 2 .. $#data ];
 758
 759         for ( 1 .. 3 ) {
 760             $data = <$fh>;
 761             while ( $data !~ /./ ) {
 762                 $data = <$fh>;
 763             }
 764             chomp $data;
 765
 766             @data = split( /\s+/, $data );
 767
 768             if ( $_ eq '1' ) {
 769                 $self->{LAST_READ_SEGSITES} = $data[1];
 770             }
 771             elsif ( $_ eq '2' ) {
 772                 $self->{LAST_READ_POSITIONS} = [ @data[ 1 .. $#data ] ];
 773             }
 774             else {
 775                 if ( !defined($data) ) {
 776                     $self->throw("run $self->{NEXT_RUN_NUM} has no haps./n");
 777                 }
 778                 $self->{BUFFER_HAP} = $data;
 779             }
 780         }
 781     }
 782     else { $self->throw("'//' not encountered when expected\n") }
 783
 784 }
 785 1;