lib/data_subs.pm

   1 This file contains the code for the subroutines in
   2 Perl-speaks-NONMEMs data module. It is not functional by itself.
   3 The code should be transferred to the module autogenerated by dia2code
   4 using the fill_diacode.pl script.
   5
   6 TODO: Fix the synced attribute so that all methods that change the state of the
   7 data object sets synced=0
   8
   9 # {{{ include
  10
  11 start include statements
  12 use Digest::MD5 'md5_hex';
  13 use OSspecific;
  14 use File::Copy "cp";
  15 use Carp;
  16 use Carp qw(cluck);
  17 use Config;
  18 use Math::Random;
  19 use Storable;
  20 use debug;
  21 use ui;
  22 use status_bar;
  23 use Data::Dumper;
  24 use Time::HiRes qw(gettimeofday);
  25 my @primary_column_names = ('ID', 'DATE', 'DAT1', 'DAT2', 'DAT3' ,'L1', 'L2', 'DV', 'MDV', 'RAW_', 'MRG_', 'RPT_', 'TIME', 'DROP', 'SKIP', 'EVID', 'AMT', 'RATE', 'SS', 'II', 'ADDL', 'CMT', 'PCMT', 'CALL');
  26 end include
  27
  28 # }}} include statements
  29
  30 # {{{ description
  31
  32 start description
  33     # The structure of the data class is subject-centric, recognising that
  34     # the subjects included in a study often can be regarded as
  35     # independent. A class for the subject level exists within PsN and is
  36     # called the individual class. A data object consists of at least one
  37     # but probably many individual objects plus optional comments.
  38 end description
  39
  40 # }}} description
  41
  42 # {{{ synopsis
  43
  44 start synopsis
  45     #   use data;
  46     #
  47     #   my $data_obj = data -> new ( filename => 'test040314.dta' );
  48     #
  49     #   $data_obj -> renumber_ascending;
  50     #
  51     #   my $subsets_ref = $data_obj -> case_deletion( bins => 10 );
  52     #
  53     #   my @subsets = @{$subsets_ref};
  54 end synopsis
  55
  56 # }}} synopsis
  57
  58 # {{{ see_also
  59
  60 start see_also
  61     # =begin html
  62     #
  63     # <a HREF="model.html">model</a>, <a HREF="output.html">output</a>,
  64     # <a HREF="tool/modelfit.html">tool::modelfit</a>,
  65     # <a HREF="tool.html">tool</a>
  66     #
  67     # =end html
  68     #
  69     # =begin man
  70     #
  71     # model, output, tool::modelfit, tool
  72     #
  73     # =end man
  74 end see_also
  75
  76 # }}} see_also
  77
  78 # {{{ new
  79
  80 start new
  81       {
  82         # If the column holding the subject identifier is not the
  83         # first, it can be specified using the I<idcolumn> attribute
  84         #
  85         # I<ignoresign> determines which rows that are regarded as
  86         # comments. Corresponds to the IGNORE= option in the $DATA
  87         # record in a NONMEM model file.
  88
  89         $this -> {'use_data_table'} = 0;
  90
  91         ( $this -> {'directory'},
  92           $this -> {'filename'} ) = OSspecific::absolute_path( $this -> {'directory'},
  93                                                                $this->{'filename'} );
  94
  95         debug -> warn( level => 2,
  96                        message => "data -> new: Data object initialized from file: ".
  97                        $this -> full_name );
  98
  99 #       sub register_in_database {
 100 #         my $this = shift;
 101 #         # Backslashes messes up the sql syntax
 102 #         my $file_str = $this->{'filename'};
 103 #         my $dir_str = $this->{'directory'};
 104 #         $file_str =~ s/\\/\//g;
 105 #         $dir_str =~ s/\\/\//g;
 106
 107 #         # md5sum
 108 #         my $md5sum = md5_hex(OSspecific::slurp_file($this-> full_name ));
 109 #         my $dbh = DBI ->
 110 #             connect("DBI:mysql:host=".$PsN::config -> {'_'} -> {'database_server'}.
 111 #                     ";databse=".$PsN::config -> {'_'} -> {'project'},
 112 #                     $PsN::config -> {'_'} -> {'user'},
 113 #                     $PsN::config -> {'_'} -> {'password'},
 114 #                     {'RaiseError' => 1});
 115 #         my $sth;
 116 #         my $sth = $dbh -> prepare( "SELECT data_id FROM ".$PsN::config -> {'_'} -> {'project'}.
 117 #                              ".data ".
 118 #                                    "WHERE filename = '$file_str' AND ".
 119 #                                    "directory = '$dir_str' AND ".
 120 #                                    "md5sum = '".$md5sum."'" );
 121 #         $sth -> execute or debug -> die( message => $sth->errstr ) ;
 122 #         my $select_arr = $sth -> fetchall_arrayref;
 123 #         if ( scalar @{$select_arr} > 0 ) {
 124 #           debug -> warn( level   => 1,
 125 #                          message => "Found an old entry in the database matching the ".
 126 #                          "current data file" );
 127 #           if ( scalar @{$select_arr} > 1 ) {
 128 #             debug -> warn( level   => 1,
 129 #                            message => "Found more than one matching entry in database".
 130 #                            ", using the first" );
 131 #           }
 132 #           $this -> {'data_id'} = $select_arr->[0][0];
 133 #         } else {
 134 #           my ( $date_str, $time_str );
 135 #           if ( $Config{osname} eq 'MSWin32' ) {
 136 #             $date_str = `date /T`;
 137 #             $time_str = ' '.`time /T`;
 138 #           } else {
 139 #             # Assuming UNIX
 140 #             $date_str = `date`;
 141 #           }
 142 #           chomp($date_str);
 143 #           chomp($time_str);
 144 #           my $date_time = $date_str.$time_str;
 145 #           $sth = $dbh -> prepare("INSERT INTO ".$PsN::config -> {'_'} -> {'project'}.
 146 #                              ".data (filename,date,directory,md5sum) ".
 147 #                                  "VALUES ('$file_str', '$date_time', '$dir_str','".
 148 #                                  $md5sum."' )");
 149 #           $sth -> execute;
 150 #           $this -> {'data_id'} = $sth->{'mysql_insertid'};
 151 #         }
 152 #         $sth -> finish;
 153 #         $dbh -> disconnect;
 154 #         }
 155
 156
 157         unless ( ( defined $this -> {'header'} and
 158                    scalar @{$this -> {'header'}} > 0 ) or
 159                  ( defined $this -> {'individuals'} and
 160                    scalar @{$this -> {'individuals'}} > 0 ) ) {
 161           if ( -e $this -> full_name ) {
 162             if ( $this -> {'target'} eq 'mem' ) {
 163 #             &register_in_database( $this ) if ( $PsN::config -> {'_'} -> {'use_database'} and
 164 #                                                 $this -> {'use_data_table'} );
 165               $this -> _read_header;
 166               $this -> _read_individuals;
 167               $this -> {'synced'} = 1;
 168             } else {
 169               $this -> {'synced'} = 0;
 170             }
 171           } else {
 172             debug -> die(message => "No header, individuals, and no file " . $this -> full_name . " on disk.")
 173               unless $this -> {'ignore_missing_files'};
 174             $this ->  {'synced'} = 0;
 175           }
 176         } else {
 177           if ( $this -> {'target'} eq 'mem') {
 178             if ( -e $this -> {'filename'} ) {
 179               $this -> _read_header;
 180 #             &register_in_database if ( $PsN::config -> {'_'} -> {'use_database'} and
 181 #                                        $this -> {'use_data_table'} );
 182               $this -> _read_individuals;
 183               $this -> {'synced'} = 1;
 184             } else {
 185               debug -> die(message => "No file:".$this->{'filename'}." on disk" )
 186                   unless $this -> {'ignore_missing_files'};
 187               $this -> {'synced'} = 0;
 188             }
 189           } else {
 190             $this -> flush;
 191           }
 192         }
 193
 194         if ( $this -> {'synced'} ) {
 195           my $i = 1;
 196           foreach my $head ( @{$this -> {'header'}} ) {
 197             $this -> {'column_head_indices'} -> {$head} = $i;
 198             $i++;
 199           }
 200         }
 201 #       $Data::Dumper::Maxdepth = 3;
 202 #       die Dumper $this -> {'individuals'};
 203       }
 204 end new
 205
 206 # }}} new
 207
 208 # {{{ register_in_database
 209 start register_in_database
 210     if ( $PsN::config -> {'_'} -> {'use_database'} ) {
 211       # Backslashes messes up the sql syntax
 212       my $file_str = $self->{'filename'};
 213       my $dir_str = $self->{'directory'};
 214       $file_str =~ s/\\/\//g;
 215       $dir_str =~ s/\\/\//g;
 216
 217       my $project = $PsN::config -> {'_'} -> {'project'};
 218       # md5sum
 219       my $md5sum = md5_hex(OSspecific::slurp_file($self-> full_name ));
 220
 221       my $dbh = DBI -> connect("DBI:mysql:host=".$PsN::config -> {'_'} -> {'database_server'}.
 222                                ";databse=".$project,
 223                                $PsN::config -> {'_'} -> {'user'},
 224                                $PsN::config -> {'_'} -> {'password'},
 225                                {'RaiseError' => 1});
 226
 227       my $sth;
 228
 229       my $select_arr = [];
 230
 231       if ( not $force ) {
 232         my $sth = $dbh -> prepare( "SELECT data_id FROM ".$project.
 233                                    ".data ".
 234                                    "WHERE filename = '$file_str' AND ".
 235                                    "directory = '$dir_str' AND ".
 236                                    "md5sum = '".$md5sum."'" );
 237         $sth -> execute or debug -> die( message => $sth->errstr ) ;
 238         $select_arr = $sth -> fetchall_arrayref;
 239       }
 240
 241       if ( scalar @{$select_arr} > 0 ) {
 242         'debug' -> warn( level   => 1,
 243                          message => "Found an old entry in the database matching the ".
 244                          "current data file" );
 245         if ( scalar @{$select_arr} > 1 ) {
 246           'debug' -> warn( level   => 1,
 247                            message => "Found more than one data matching entry in database".
 248                            ", using the first" );
 249         }
 250         $self -> {'data_id'} = $select_arr->[0][0];
 251         # Find the id's
 252         my $sth = $dbh -> prepare( "SELECT individual_id FROM ".$project.".data_individual ".
 253                                    "WHERE data_id = '".$self -> {'data_id'}."'" );
 254         $sth -> execute or debug -> die( message => $sth->errstr ) ;
 255         my $id_arr = $sth -> fetchall_arrayref;
 256         map( $_ = $_ -> [0], @{$id_arr} );
 257         $self -> {'individual_ids'} = $id_arr;
 258       } elsif ( defined $self -> {'individuals'} ) {
 259         my ( $date_str, $time_str );
 260         if( $Config{osname} eq 'MSWin32' ){
 261           $date_str = `date /T`;
 262           $time_str = ' '.`time /T`;
 263         } else {
 264           # Assuming UNIX
 265           $date_str = `date`;
 266         }
 267         chomp($date_str);
 268         chomp($time_str);
 269         my $date_time = $date_str.$time_str;
 270         my ( $columns, $values );
 271         my $res_str = $resampled ? '1' : '0';
 272         if ( defined $model_id ) {
 273           $columns = '(model_id, filename, date, directory, md5sum, resampled)';
 274           $values = "('$model_id', '$file_str', '$date_time', '$dir_str','".
 275               $md5sum."', '$res_str' )";
 276         } else {
 277           $columns = '(filename, date, directory, md5sum, resampled)';
 278           $values = "('$file_str', '$date_time', '$dir_str','".$md5sum."', '$res_str' )";
 279         }
 280         $sth = $dbh -> prepare("INSERT INTO ".$PsN::config -> {'_'} -> {'project'}.
 281                                ".data $columns VALUES $values");
 282         $sth -> execute;
 283         $self -> {'data_id'} = $sth->{'mysql_insertid'};
 284
 285         if ( defined $self -> {'data_id'} ) {
 286           my $values;
 287           my $columns = "( id_key, id )";
 288           if( $#individual_ids >= 0 ) {
 289             $self -> register_di_relation( individual_ids => \@individual_ids );
 290           } else {
 291             my $inds = scalar @{$self -> {'individuals'}};
 292             $dbh -> do( "LOCK TABLES ".$PsN::config -> {'_'} -> {'project'}.
 293                         ".individual WRITE" );
 294 #           $sth = $dbh -> prepare( "SELECT MAX(individual_id)".
 295 #                                   " FROM ".$PsN::config -> {'_'} -> {'project'}.
 296 #                                   ".individual" );
 297             $dbh -> do( 'USE '.$PsN::config -> {'_'} -> {'project'} );
 298             $sth = $dbh -> prepare( "SHOW TABLE STATUS LIKE 'individual'" );
 299             $sth -> execute or debug -> die( message => $sth->errstr ) ;
 300             my $select_arr = $sth -> fetchall_arrayref;
 301             my $first_id_id = $select_arr -> [0][10] ?
 302                 $select_arr -> [0][10] : 0;
 303 #           my $first_id_id = $select_arr -> [0][0] ? ($select_arr -> [0][0] + 1) : 0;
 304             my $last_id_id = $first_id_id + $inds - 1;
 305             for( my $i = 0; $i < $inds; $i++ ) {
 306               if( defined $self -> {'individuals'}[$i] ) {
 307                 my $id_id = $self -> {'individuals'}[$i] -> idnumber;
 308                 $values = $values."," if ( defined $values );
 309                 $values = $values."( $i, $id_id )";
 310               }
 311             }
 312             $sth = $dbh -> prepare( "INSERT INTO ".$PsN::config -> {'_'} -> {'project'}.
 313                                     ".individual $columns VALUES $values" );
 314             $sth -> execute;
 315             $dbh -> do( "UNLOCK TABLES" );
 316             @individual_ids = ($first_id_id .. $last_id_id);
 317             $self -> register_di_relation( individual_ids => \@individual_ids );
 318           }
 319           $self -> {'individual_ids'} = \@individual_ids;
 320         }
 321         $sth -> finish;
 322         $dbh -> disconnect;
 323       }
 324       $data_id = $self -> {'data_id'}; # return the data_id
 325     }
 326 end register_in_database
 327 # }}} register_in_database
 328
 329 # {{{ register_di_relation
 330 start register_di_relation
 331   if ( $PsN::config -> {'_'} -> {'use_database'} and
 332        defined $self -> {'data_id'} and $#individual_ids >= 0 ) {
 333     my $dbh = DBI -> connect("DBI:mysql:host=".$PsN::config -> {'_'} -> {'database_server'}.
 334                              ";databse=".$PsN::config -> {'_'} -> {'project'},
 335                              $PsN::config -> {'_'} -> {'user'},
 336                              $PsN::config -> {'_'} -> {'password'},
 337                              {'raiseerror' => 1});
 338     my $sth;
 339     my $values;
 340     my $columns = "( data_id, individual_id )";
 341     foreach my $individual_id ( @individual_ids ) {
 342       if ( defined $individual_id ) {
 343         $values = $values."," if ( defined $values );
 344         $values = $values."(".$self -> {'data_id'}.", $individual_id )";
 345       }
 346     }
 347     $sth = $dbh -> prepare( "INSERT INTO ".$PsN::config -> {'_'} -> {'project'}.
 348                                ".data_individual $columns VALUES $values" );
 349     $sth -> execute;
 350     $sth -> finish if ( defined $sth );
 351     $dbh -> disconnect;
 352   }
 353 end register_di_relation
 354 # }}} register_di_relation
 355
 356 # {{{ full_name
 357
 358 start full_name
 359       {
 360         $full_name = $self -> {'directory'} . $self -> {'filename'};
 361       }
 362 end full_name
 363
 364 # }}}
 365
 366 # {{{ bootstrap
 367
 368 start bootstrap
 369       {
 370         # The bootstrap method draws I<samples> number of boostrap
 371         # samples from the data set. The I<subjects> arguments
 372         # determines the size of each sample (default equals to the
 373         # number of individuals in the original data set). The method
 374         # returns references to three arrays: I<boot_samples_ref>,
 375         # which holds the bootstrap data sets, I<incl_individuals_ref>
 376         # which holds arrays containing the subject identifiers (ID's)
 377         # for the included individuals of each bootstrap data set and
 378         # I<included_keys_ref> which holds the key or index of the
 379         # included individuals. The key or index is an integer
 380         # starting at 1 for the first individual in the original data
 381         # set and increasing by one for each following.
 382         $self -> synchronize;
 383         my @header      = @{$self -> {'header'}};
 384         my $individuals = $self -> {'individuals'};
 385         my $key_ref;
 386
 387         my $status_bar = status_bar -> new( steps => $samples );
 388         ui -> print( category => 'bootstrap',
 389                      message => $status_bar -> print_step,
 390                      newline => 0);
 391
 392         for ( my $i = 1; $i <= $samples; $i++ ) {
 393           my $new_name = defined $name_stub ? $name_stub."_$i.dta" : "bs$i.dta";
 394           $new_name = $directory.'/'.$new_name;
 395           my ( $boot, $incl_ind_ref, $incl_key_ref ) =
 396             $self -> resample( subjects    => $subjects,
 397                                resume      => $resume,
 398                                new_name    => $new_name,
 399                                target      => $target,
 400                                stratify_on => $stratify_on,
 401                                model_id    => $model_ids[$i-1] );
 402           push( @included_keys, $incl_key_ref );
 403           push( @incl_individuals, $incl_ind_ref );
 404 #         $boot -> renumber_ascending;
 405           push( @boot_samples, $boot );
 406 #          $boot -> synchronize;
 407 #          $boot -> flush;
 408           if( $status_bar -> tick() ){
 409             ui -> print( category => 'bootstrap',
 410                          message => $status_bar -> print_step,
 411                          newline => 0,
 412                          wrap => 0);
 413           }
 414         }
 415         ui -> print( category => 'bootstrap',
 416                      message => ' ... done' );
 417       }
 418 end bootstrap
 419
 420 # }}} bootstrap
 421
 422 # {{{ resample
 423
 424 start resample
 425       {
 426         $self -> synchronize;
 427         my ( @header, $individuals, @bs_inds, $key_ref, @id_ids, @bs_id_ids );
 428         @id_ids = @{$self -> {'individual_ids'}} if( defined $self -> {'individual_ids'} );
 429         if ( defined $stratify_on ) {
 430           unless ( $resume and -e $new_name ) {
 431             @header = @{$self -> {'header'}};
 432             $individuals = $self -> {'individuals'};
 433             my %strata;
 434             if( $stratify_on =~ /\D/ ){
 435               %strata = %{$self -> factors( column_head => $stratify_on )};
 436               if ( $strata{'Non-unique values found'} eq '1' ) {
 437                 debug -> die( message => "Individuals were found to have multiple values in the $stratify_on column. ".
 438                               "The column $stratify_on cannot be used for stratification of the resampling." );
 439               }
 440             } else {
 441               %strata = %{$self -> factors( column => $stratify_on )};
 442               if ( $strata{'Non-unique values found'} eq '1' ) {
 443                 debug -> die( message => "Individuals were found to have multiple values in column number $stratify_on. ".
 444                               "Column $stratify_on cannot be used for stratification of the resampling." );
 445               }
 446             }
 447
 448             while( my ( $factor, $key_list ) = each %strata ) {
 449               my $keys = scalar @{$key_list};
 450               for ( my $i = 0; $i < $keys; $i++ ) {
 451                 my $list_ref = random_uniform_integer(1,0,$keys-1);
 452                 push( @bs_inds, $individuals ->
 453                       [ $key_list -> [$list_ref] ] -> copy );
 454                 push( @included_keys, $key_list -> [$list_ref] );
 455                 push( @incl_individuals, $individuals ->
 456                       [ $key_list -> [$list_ref] ] -> idnumber );
 457                 push( @bs_id_ids, $id_ids[ $key_list -> [$list_ref] ] );
 458               }
 459             }
 460
 461             $boot = data -> new( header      => \@header,
 462                                  idcolumn    => $self -> {'idcolumn'},
 463                                  ignoresign  => $self -> {'ignoresign'},
 464                                  individuals => \@bs_inds,
 465                                  filename    => $new_name,
 466                                  ignore_missing_files => 1,
 467                                  target      => 'mem' );
 468             $boot -> renumber_ascending;
 469             $boot -> _write;
 470             $boot -> flush;
 471             #$boot -> target( $target );
 472           }
 473         } else {
 474           unless ( $resume and -e $new_name ) {
 475             @header = @{$self -> {'header'}};
 476             $individuals = $self -> {'individuals'};
 477             for ( my $i = 1; $i <= $subjects; $i++ ) {
 478               $key_ref = random_uniform_integer(1,0,scalar @{$individuals}-1);
 479               push( @bs_inds, $individuals -> [ $key_ref ] -> copy );
 480               push( @included_keys, $key_ref );
 481               push( @incl_individuals, $individuals -> [ $key_ref ] -> idnumber );
 482               push( @bs_id_ids, $id_ids[ $key_ref ] );
 483             }
 484
 485             # MUST FIX: If a file already exists with the same name,
 486             # the created bs data set will be appended to this. IT
 487             # MUST BE OVERWRITTEN!
 488             $boot = data -> new( header      => \@header,
 489                                  idcolumn    => $self -> {'idcolumn'},
 490                                  ignoresign  => $self -> {'ignoresign'},
 491                                  individuals => \@bs_inds,
 492                                  filename    => $new_name,
 493                                  ignore_missing_files => 1,
 494                                  target      => 'mem' );
 495             $boot -> renumber_ascending;
 496             $boot -> _write;
 497             $boot -> target( $target );
 498           } else {
 499             # If we are resuming, we still need to generate the
 500             # pseudo-random sequence and initiate a data object
 501             for ( my $i = 1; $i <= $subjects; $i++ ) {
 502               random_uniform_integer(1,0,scalar @{$individuals}-1)
 503             }
 504             $boot = data -> new( idcolumn    => $self -> {'idcolumn'},
 505                                  ignoresign  => $self -> {'ignoresign'},
 506                                  filename    => $new_name,
 507                                  ignore_missing_files => 1,
 508                                  target      => $target );
 509             $boot -> _write;
 510             $boot -> flush;
 511           }
 512           if( $target eq 'disk'){
 513             $boot -> flush;
 514           }
 515         }
 516         $boot -> register_in_database( individual_ids => \@bs_id_ids,
 517                                        resampled      => 1,
 518                                        model_id       => $model_id );
 519       }
 520 end resample
 521
 522 # }}} resample
 523
 524 # {{{ case_deletion
 525
 526 start case_deletion
 527       {
 528         # case_deletion creates subsets of the data. The number of
 529         # subsets is specified by the bins argument. The individuals
 530         # of each subset is selected randomly or in ascending
 531         # numerical order depending on the selection argument that can
 532         # be either 'consecutive' or 'random'. case_column must be
 533         # specified to give the method something to base the selection
 534         # on. Valid case_column values are either the column number
 535         # (pure digits) or the name of the column in the (optional)
 536         # header row.
 537         $self -> synchronize;
 538         my @header    = @{$self -> {'header'}};
 539         if ( not defined $case_column ) {
 540           debug -> die( message => "case_column must be specified" );
 541         } else {
 542           if ( not $case_column =~ /^\d/ ) {
 543             for ( my $i = 0; $i <= $#header; $i++ ) {
 544               $case_column = $i+1 if ( $header[$i] eq $case_column );
 545             }
 546           }
 547         }
 548         $bins = defined $bins ? $bins :
 549           scalar keys %{$self -> factors( column => $case_column)};
 550         my %factors   = %{$self -> factors( column => $case_column )};
 551         if ( $factors{'Non-unique values found'} eq '1' ) {
 552           debug -> die( message => "Individuals were found to have multiple values in column number $case_column. ".
 553                         "Column $case_column cannot be used for case deletion." );
 554         }
 555
 556         my $maxbins   = scalar keys %factors;
 557         my @ftrs      = sort { $a <=> $b } keys %factors;
 558         my $individuals = $self -> {'individuals'};
 559         my $maxkey    = scalar @{$individuals} - 1;
 560
 561         my ( @tmp_ftrs, @binsize ) =
 562             ((),());
 563         my ( $k, $j, $i ) = ( 0, 0, 0 );
 564         # Create the binsizes
 565         for ( $j = 0; $j < $maxbins; $j++ ) {
 566           $binsize[ $k++ ]++;
 567           $k = 0 if( $k >= $bins );
 568         }
 569         $self -> _fisher_yates_shuffle( array => \@ftrs ) if( $selection eq 'random' );
 570         for ( $k = 0; $k < $bins; $k++ ) {
 571           for ( $j = 0; $j < $binsize[ $k ]; $j++ ) {
 572 #           print "SK: ",$skipped_keys[ $k ]," F: ",$factors{ $ftrs[ $i ] },"\n";
 573             push( @{$skipped_keys[ $k ]}, @{$factors{ $ftrs[ $i ] }} );
 574             push( @{$skipped_values[ $k ]}, $ftrs[ $i++ ] );
 575           }
 576         }
 577
 578         for ( $k = 0; $k < $bins; $k++ ) {
 579           my @cd_inds = ();
 580           my @del_inds = ();
 581         SELKEYS: foreach my $key ( 0..$maxkey ) {
 582           foreach my $skipped ( @{$skipped_keys[ $k ]} ) {
 583             if ( $key == $skipped ) {
 584               push( @{$skipped_ids[ $k ]}, $individuals ->
 585                     [ $skipped ] -> idnumber );
 586               push( @del_inds, $individuals -> [ $key ] -> copy );
 587               next SELKEYS;
 588             }
 589           }
 590           push( @cd_inds, $individuals -> [ $key ] -> copy );
 591         }
 592           # Set ignore_missing_files = 1 to make it possible to get the result
 593           # in memory only
 594           my $newdata = data ->
 595               new ( header      => \@header,
 596                     ignoresign  => $self -> {'ignoresign'},
 597                     idcolumn    => $self -> {'idcolumn'},
 598                     individuals => \@cd_inds,
 599                     target      => $target,
 600                     filename    => 'cd'.$k+1 .'.dta',
 601                     ignore_missing_files => 1 );
 602           my $deldata = data ->
 603               new ( header      => \@header,
 604                     ignoresign  => $self -> {'ignoresign'},
 605                     idcolumn    => $self -> {'idcolumn'},
 606                     individuals => \@del_inds,
 607                     target      => $target,
 608                     filename    => 'del'.$k+1 .'.dta',
 609                     ignore_missing_files => 1 );
 610           push( @subsets, $newdata );
 611           push( @remainders, $deldata );
 612         }
 613       }
 614 end case_deletion
 615
 616 # }}} case_deletion
 617
 618 # {{{ copy
 619 start copy
 620       {
 621         # filename: new data file name.
 622         #
 623         # target: keep the copy in memory ('mem') or write it to disk and flush the memory ('disk').
 624
 625         ($directory, $filename) = OSspecific::absolute_path( $directory, $filename );
 626
 627         # Clone self into new data object. Why don't the individuals get cloned too?
 628         # strange. need to set synced to 0 AND set the {'individuals'} to undef.
 629         cp($self -> full_name, $directory.$filename );
 630         $new_data = Storable::dclone( $self );
 631         $new_data -> {'synced'} = 0;
 632         $new_data -> {'individuals'} = undef;
 633         $new_data -> synchronize;
 634
 635         # Set the new file name for the copy
 636         $new_data -> directory( $directory );
 637         $new_data -> filename( $filename );
 638       }
 639 end copy
 640
 641 # }}} copy
 642
 643 # {{{ count_ind
 644
 645 start count_ind
 646       {
 647         # Returns the number of individuals in the data set.
 648         $self -> synchronize;
 649         $num = scalar @{$self -> {'individuals'}};
 650       }
 651 end count_ind
 652
 653 # }}} count_ind
 654
 655 # {{{ diff
 656 start diff
 657 {
 658   $self -> synchronize;
 659
 660   my $first_id = $self -> {'individuals'}[0];
 661
 662   debug -> die( message => "No individuals defined in data object based on ".
 663                 $self -> full_name ) unless ( defined $first_id );
 664
 665   # Check if $column(-index) is defined and valid, else try to find index
 666   # using column_head
 667
 668   my @data_row = split( /,/, $first_id -> subject_data -> [0] );
 669   if( $#columns >= 0 ) {
 670     foreach my $column ( @columns ) {
 671       unless ( defined $column && defined( $data_row[$column-1] ) ) {
 672         debug -> die( message => "Error in data -> factors: ".
 673                       "invalid column number: \"$column\"\n".
 674                       "Valid column numbers are 1 to ".
 675                       scalar @{$first_id -> subject_data ->[0]}."\n" );
 676       }
 677     }
 678   } elsif ( $#column_heads >= 0 ) {
 679     foreach my $column_head ( @column_heads ) {
 680       unless (defined($column_head) && defined($self -> {'column_head_indices'}{$column_head})) {
 681         debug -> die( message => "Error in data -> factors: unknown column: \"$column_head\" ".
 682                       "Valid column headers are (in no particular order):\n".
 683                       join(', ',keys(%{$self -> {'column_head_indices'}})) );
 684       } else {
 685         my $column = $self -> {'column_head_indices'}{$column_head};
 686         push( @columns, $column );
 687         debug -> warn( level   => 2,
 688                        message => "$column_head is in column number $column" );
 689       }
 690     }
 691   } else {
 692     debug -> die( message => "No column or column_head defined" );
 693   }
 694
 695   if( $global_largest or $global_smallest or
 696       $largest_per_individual or $smallest_per_individual ) {
 697     if( not scalar @{$self -> {'individuals'}} == scalar @{$against_data -> individuals} ) {
 698       debug -> die( message => "Both data object must hold the same number of individuals ".
 699                     "and observations when calling data -> diff" );
 700     }
 701     for( my $i = 0; $i < scalar @{$self -> {'individuals'}}; $i++ ) {
 702       my %id_diffs = %{$self -> {'individuals'}[$i] ->
 703                            diff( against_individual => $against_data -> individuals -> [$i],
 704                                  columns            => \@columns,
 705                                  absolute_diff      => $absolute_diff,
 706                                  diff_as_fraction   => $diff_as_fraction,
 707                                  largest            => ( $global_largest or $largest_per_individual ),
 708                                  smallest           => ( $global_smallest or $smallest_per_individual ) )};
 709       if( $global_largest ) {
 710         for( my $j = 0; $j <= $#columns; $j++ ) {
 711           my $label = defined $column_heads[$j] ? $column_heads[$j] : $columns[$j];
 712           if( not defined $diff_results{$label} or not defined $diff_results{$label}{'diff'} or
 713               $id_diffs{$columns[$j]}{'diff'} > $diff_results{$label}{'diff'} ) {
 714             $diff_results{$label}{'diff'} = $id_diffs{$columns[$j]}{'diff'};
 715             $diff_results{$label}{'self'} = $id_diffs{$columns[$j]}{'self'};
 716             $diff_results{$label}{'test'} = $id_diffs{$columns[$j]}{'test'};
 717           }
 718         }
 719       }
 720     }
 721   } else {
 722     die "data -> diff is only implemented for finding the largest difference at any observation at this point\n";
 723   }
 724 }
 725 end diff
 726 # }}} diff
 727
 728 # {{{ filename
 729 start filename
 730       {
 731         if ( defined $parm and $parm ne $self -> {'filename'} ) {
 732           $self -> {'filename'} = $parm;
 733           $self -> {'data_id'} = undef;
 734 #         $self -> _write;
 735         }
 736       }
 737 end filename
 738 # }}} filename
 739
 740 # {{{ fractions
 741
 742 start fractions
 743       {
 744         my %factors = $self -> factors( 'return_occurences' => 1,
 745                                         'unique_in_individual' => $unique_in_individual,
 746                                         'column_head' => $column_head,
 747                                         'column' => $column);
 748
 749         my $sum = 0;
 750         while (my ($factor, $amount) = each %factors) {
 751           if ( $factor == $self -> {'missing_data'} && $ignore_missing ) {
 752             next;
 753           } else {
 754             $sum += $amount;
 755           }
 756         }
 757         while (my ($factor, $amount) = each %factors) {
 758           if ( $factor == $self -> {'missing_data'} && $ignore_missing ) {
 759             next;
 760           } else {
 761             $fractions{$factor} = $amount/$sum;
 762           }
 763         }
 764       }
 765 end fractions
 766
 767 # }}} fractions
 768
 769 # {{{ factors
 770
 771 start factors
 772       {
 773         # Either column (number, starting at 1) or column_head must be specified.
 774         #
 775         # The default behaviour is to return a hash with the factors as keys
 776         # and as values references to arrays with the order numbers (not the ID numbers)
 777         # of the individuals that contain this factor
 778         #
 779         # If unique_in_individual is true (1), the returned hash will contain
 780         # an element with key 'Non-unique values found' and value 1 if any
 781         # individual contain more than one value in the specified column.
 782         #
 783         # Return occurences will calculate the occurence of each
 784         # factor value. Several occurences in one individual counts as
 785         # one occurence. The elements of the returned hash will have the factors
 786         # as keys and the number of occurences as values.
 787         #
 788
 789         $self -> synchronize;
 790
 791         # Check if $column(-index) is defined and valid, else try to find index
 792         # using column_head
 793         my $first_id = $self -> {'individuals'}[0];
 794
 795         debug -> die( message => "No individuals defined in data object based on ".
 796                       $self -> full_name ) unless ( defined $first_id );
 797
 798         my @data_row = split( /,/, $first_id -> subject_data -> [0] );
 799         unless ( defined $column && defined( $data_row[$column-1] ) ) {
 800           unless (defined($column_head) && defined($self -> {'column_head_indices'}{$column_head})) {
 801             debug -> die( message => "Error in data -> factors: unknown column: \"$column_head\" ".
 802                           "or invalid column number: \"$column\".\n".
 803                           "Valid column numbers are 1 to ".scalar @data_row ."\n".
 804                           "Valid column headers are (in no particular order):\n".
 805                           join(', ',keys(%{$self -> {'column_head_indices'}})) );
 806           } else {
 807             $column = $self -> {'column_head_indices'}{$column_head};
 808             debug -> warn( level   => 2,
 809                            message => "$column_head is in column number $column" );
 810           }
 811         }
 812
 813         my $key = 0;
 814         foreach my $individual ( @{$self -> {'individuals'}} ) {
 815           my @ifactors = keys %{$individual -> factors( column => $column )};
 816           if ( scalar @ifactors > 1 and $unique_in_individual ) {
 817             %factors = ( 'Non-unique values found' => 1 );
 818             last;
 819           }
 820           debug -> die( message => "No value found in column $column in individual ".
 821                         $individual -> idnumber ) if ( scalar @ifactors == 0 );
 822
 823           # Return occurences will calculate the occurence of each
 824           # factor value. Several occurences in one individual counts as
 825           # one occurence.
 826
 827           if ( $return_occurences ) {
 828             foreach my $ifactor ( @ifactors ) {
 829               $factors{$ifactor}++;
 830             }
 831           } else {
 832             foreach my $ifactor ( @ifactors ) {
 833               push( @{$factors{$ifactor}}, $key );
 834             }
 835           }
 836           $key++;
 837         }
 838       }
 839 end factors
 840
 841 # }}} factors
 842
 843 # {{{ find_individual
 844
 845 # start find_individual
 846 #         foreach my $tmp_ind ( @{$self -> individuals} ) {
 847 #         if ( $tmp_ind -> key == $key ) {
 848 #           $individual = $tmp_ind;
 849 #           last;
 850 #         }
 851 #       }
 852 #         if ( defined $individual ) {
 853 #         if ( $copy ) {
 854 #           $individual = $individual -> copy;
 855 #         }
 856 #       } else {
 857 #         print "No individual with key $key found in call to ".
 858 #           "data -> find_individual\n" if ( $self -> debug );
 859 #         }
 860 # end find_individual
 861
 862 # }}}
 863
 864 # {{{ format_data
 865
 866 start format_data
 867       {
 868         my $header = $self -> {'header'};
 869
 870         # format the data for NONMEM (simple comma-separated layout)
 871         if ( defined $self -> {'comment'} ) {
 872           my @comment   = @{$self -> {'comment'}};
 873           for ( @comment ) {
 874             push( @form_data );
 875           }
 876         }
 877
 878         my $wrap = ( defined $self -> {'wrap_column'} and
 879                      defined $self -> {'cont_column'} );
 880
 881         my @primary_columns = defined $self -> {'primary_columns'} ?
 882           @{$self -> {'primary_columns'}} : ();
 883         my @secondary_columns = defined $self -> {'secondary_columns'} ?
 884           @{$self -> {'secondary_columns'}} : ();
 885         if ( defined $header and defined $self -> {'ignoresign'} ) {
 886           my $istr;
 887           if ( $self -> {'ignoresign'} ne '@' ) {
 888             $istr = $self -> {'ignoresign'};
 889           }
 890           if ( $wrap ) {
 891             my @h_data;
 892             for ( my $i = 0; $i <= $#secondary_columns ; $i++ ) {
 893               my $sstr = $istr;
 894               for ( my $j = 0; $j < scalar @{$secondary_columns[$i]} ; $j++ ) {
 895                 my $jstr = $j == 0 ? '' : ',';
 896                 $sstr = $sstr.$jstr.$secondary_columns[$i][$j][0];
 897               }
 898               push( @h_data, $sstr."\n" );
 899             }
 900             push( @form_data, @h_data );
 901             my $pstr = $istr;
 902             for ( my $i = 0; $i <= $#primary_columns ; $i++ ) {
 903               my $jstr = $i == 0 ? '' : ',';
 904               $pstr = $pstr.$jstr.$primary_columns[$i][0];
 905             }
 906             push( @form_data, $pstr."\n" );
 907           } else {
 908             push( @form_data, $istr.join(',',@{$self -> {'header'}})."\n" );
 909           }
 910         }
 911         if ( $wrap ) {
 912           foreach my $individual ( @{$self -> {'individuals'}} ) {
 913             foreach my $row ( @{$individual -> subject_data} ) {
 914               my @r_data;
 915               for ( my $i = 0; $i <= $#secondary_columns ; $i++ ) {
 916                 my $sstr = '';
 917                 for ( my $j = 0; $j < scalar @{$secondary_columns[$i]} ; $j++ ) {
 918                   my $jstr = $j == 0 ? '' : ',';
 919                   if ( $secondary_columns[$i][$j][0] eq 'CONT' ) {
 920                     $sstr = $sstr.$jstr.'1';
 921                   } else {
 922                     my @data_row = split( /,/, $row );
 923                     $sstr = $sstr.$jstr.$data_row[$secondary_columns[$i][$j][1]];
 924                   }
 925                 }
 926                 push( @r_data, $sstr."\n" );
 927               }
 928               push( @form_data, @r_data );
 929               my $pstr = '';
 930               for ( my $i = 0; $i <= $#primary_columns ; $i++ ) {
 931                 my $jstr = $i == 0 ? '' : ',';
 932                 if ( $primary_columns[$i][0] eq 'CONT' ) {
 933                   $pstr = $pstr.$jstr.'0';
 934                 } else {
 935                   my @data_row = split( /,/, $row );
 936                   $pstr = $pstr.$jstr.$data_row[$primary_columns[$i][1]];
 937                 }
 938               }
 939               push( @form_data, $pstr."\n" );
 940             }
 941           }
 942         } else {
 943           foreach my $individual ( @{$self -> {'individuals'}} ) {
 944             foreach my $row ( @{$individual -> subject_data} ) {
 945               push( @form_data, $row ."\n" );
 946             }
 947           }
 948         }
 949       }
 950 end format_data
 951
 952 # }}} format_data
 953
 954 # {{{ drop_dropped
 955
 956 start drop_dropped
 957       {
 958         # This method removes columns that has '=DROP' value in the
 959         # model header as given by $INPUT. The model header must be
 960         # transfered to this method through the model_header
 961         # argument. The model_header argument should be a
 962         # two-dimensional array where each position in the first
 963         # dimension should be a reference to a 1*2 array holding the
 964         # column name and value. Any ignore-sign must be removed.
 965
 966         debug -> die( message => 'model header must be defined' )
 967           if ( $#model_header < 0 );
 968         # Important that the drop_dropped method of the model::problem
 969         # class is in sync with this method.
 970         $self -> synchronize;
 971
 972         $self -> {'header'} = [];
 973         my @drop;
 974         my $counter = 1;
 975         for( my $i = 0; $i <= $#model_header; $i++ ) {
 976           $self -> {'idcolumn'} = $counter if ( $model_header[$i][0] eq 'ID' );
 977           if( $model_header[$i][1] eq 'DROP' and
 978               not $model_header[$i][0] =~ /DAT(E|1|2|3)/ ) {
 979             push( @drop, 1 );
 980           } else {
 981             $counter++;
 982             push( @drop, 0 );
 983             push( @{$self -> {'header'}}, $model_header[$i][0] );
 984           }
 985         }
 986
 987         foreach my $individual ( @{$self -> {'individuals'}} ) {
 988           $individual -> drop_columns( drop => \@drop );
 989         }
 990
 991         $self -> {'synced'} = 0;
 992 #       $Data::Dumper::Maxdepth = 2;
 993 #       die Dumper $self;
 994 #       die Dumper $self -> {'individuals'};
 995       }
 996 end drop_dropped
 997
 998 # }}} drop_dropped
 999
1000 # {{{ wrap
1001 start wrap
1002       {
1003         $self -> synchronize;
1004         $self -> cont_column( $cont_column ) if ( defined $cont_column );
1005         $self -> wrap_column( $wrap_column ) if ( defined $wrap_column );
1006         $self -> prepare_wrap( model_header => \@model_header );
1007         @secondary_columns = @{$self -> {'secondary_columns'}}
1008           if ( defined $self -> {'secondary_columns'} );
1009         @primary_columns = @{$self -> {'primary_columns'}}
1010           if ( defined $self -> {'primary_columns'} );
1011       }
1012 end wrap
1013 # }}} wrap
1014
1015 # {{{ unwrap
1016 start unwrap
1017       {
1018         $self -> {'cont_column'} = undef;
1019         $self -> {'wrap_column'} = undef;
1020         $self -> {'secondary_columns'} = undef;
1021         $self -> {'primary_columns'} = undef;
1022       }
1023 end unwrap
1024 # }}} unwrap
1025
1026 # {{{ prepare_wrap
1027
1028 start prepare_wrap
1029       {
1030         my $cont_column = $self -> {'cont_column'};
1031         my $wrap_column = $self -> {'wrap_column'};
1032         debug -> die( message => 'cont_column ('.$cont_column.') must be less or equal '.
1033                       'to the requested number of columns in each row ('.
1034                       ($wrap_column).')' )
1035           if ( $cont_column > $wrap_column );
1036         my @header;
1037         if ( scalar @model_header > 0 ) {
1038           @header = @model_header;
1039         } else {
1040           @header = @{$self -> {'header'}};
1041         }
1042
1043         my ( @primary, @secondary, @date_columns );
1044
1045         for ( my $i = 0; $i <= $#header; $i++ ) {
1046           my $name  = ref( $header[$i] ) eq 'ARRAY' ? $header[$i][0] : $header[$i];
1047           my $value = ref( $header[$i] ) eq 'ARRAY' ? $header[$i][1] : undef;
1048           next if ( $name eq 'ID' );
1049           my $found = 0;
1050           foreach my $prim ( @primary_column_names ) {
1051             if ( not $found and
1052                  ( $name eq $prim or $value eq $prim ) ) {
1053               push( @primary, [$name, $i, $value] );
1054               $found = 1;
1055               my $col = ($#primary+2)>= $cont_column ? ($#primary+3) : ($#primary+2);
1056               push( @date_columns, $col ) if ( $name =~ /DAT(E|1|2|3)/ );
1057             }
1058           }
1059           push( @secondary, [$name, $i, $value] ) if ( not $found );
1060         }
1061
1062         my $prim_num = scalar @primary;
1063         debug -> die( message => 'The number of primary columns (that need to '.
1064                       'be part of the row with CONT=0) ('.($prim_num+1).
1065                       ') is larger than the required number of columns (wrap_column='.
1066                       $wrap_column.') - 1' )
1067           if ( scalar $prim_num > ($wrap_column-2) );
1068
1069         my ( $i, $dum ) = ( 0, 1 );
1070         my @tmp;
1071         for ( my $j = 1; $j <= $wrap_column; $j++ ) {
1072           if( $j == 1 ) {
1073             push( @tmp, ['ID', $self -> {'idcolumn'}-1] );
1074           } elsif ( $j == $wrap_column ) {
1075             if ( $j == $cont_column ) {
1076               push( @tmp, ['CONT', undef] );
1077             } else {
1078               my $val;
1079               if ( defined $primary[$i] ) {
1080                 $val = $primary[$i];
1081               } elsif ( defined $secondary[0] ) {
1082                 $val = shift(@secondary);
1083               } else {
1084                 $val = ['XX'.$dum++,$self -> {'idcolumn'}-1];
1085               }
1086               push( @tmp, $val );
1087               $i++;
1088             }
1089             push( @{$self -> {'primary_columns'}}, @tmp );
1090           } else {
1091             if ( $j == $cont_column ) {
1092               push( @tmp, ['CONT', undef] );
1093             } else {
1094               if ( $i <= $#primary ) {
1095                 push( @tmp, $primary[$i] );
1096                 $i++;
1097               } else {
1098                 my $val = defined $secondary[0] ? shift(@secondary) :
1099                   ['XX'.$dum++,$self -> {'idcolumn'}-1];
1100                 push( @tmp, $val );
1101               }
1102             }
1103           }
1104         }
1105
1106         my $i = 0;
1107         while ( $i <= $#secondary ) {
1108           my @tmp;
1109           for ( my $j = 1; $j <= $wrap_column; $j++ ) {
1110             if( $j == 1 ) {
1111               push( @tmp, ['ID', $self -> {'idcolumn'}-1] );
1112             } elsif ( $j == $wrap_column ) {
1113               if ( $j == $cont_column ) {
1114                 push( @tmp, ['CONT', undef] );
1115               } else {
1116                 my $val = defined $secondary[$i] ? $secondary[$i] :
1117                   ['XX'.$dum++,$self -> {'idcolumn'}-1];
1118                 push( @tmp, $val );
1119                 $i++;
1120               }
1121               unshift( @{$self -> {'secondary_columns'}}, \@tmp );
1122             } else {
1123               if ( $j == $cont_column ) {
1124                 push( @tmp, ['CONT', undef] );
1125               } else {
1126                 my $isdate = 0;
1127                 if ( $#date_columns >= 0 ) {
1128                   foreach my $col ( @date_columns ) {
1129                     # This is a date column which may have to be dropped
1130                     # and thus will not appear as a secondary
1131                     # column. Nothing should be pushed. The indexes in
1132                     # model::problem::pk::_format_record will be ok.
1133                     $isdate = 1 if ( $col == $j ) ;
1134                   }
1135                 }
1136                 if ( $isdate ) {
1137                   push( @tmp, ['XX'.$dum++,$self -> {'idcolumn'}-1] );
1138                 } else {
1139                   if ( $i <= $#secondary ) {
1140                     push( @tmp, $secondary[$i] );
1141                     $i++;
1142                   } else {
1143                     push( @tmp, ['XX'.$dum++,$self -> {'idcolumn'}-1] );
1144                   }
1145                 }
1146               }
1147             }
1148           }
1149         }
1150       }
1151 end prepare_wrap
1152
1153 # }}} prepare_wrap
1154
1155 # {{{ have_missing_data
1156 start have_missing_data
1157       {
1158         # Either I<column> or I<column_head> must be specified.
1159         #
1160         # This method looks through the data column with index I<column> or
1161         # (optional) header name I<column_head> and returns O if no missing
1162         # data indicator was found or 1 otherwise.
1163
1164         $self -> synchronize;
1165         my $first_id = $self -> {'individuals'}[0];
1166         debug -> die( message => "No individuals defined in data object based on ".
1167                       $self -> full_name ) unless ( defined $first_id );
1168         my @data_row = split( /,/ , $first_id -> subject_data -> [0] );
1169         unless ( defined $column  && defined( $data_row[$column-1] ) ) {
1170           unless(defined($column_head) && defined($self -> {'column_head_indices'}{$column_head})){
1171             die "Error in data -> have_missing_data: unknown column: \"$column_head\" or invalid column number: \"$column\"\n";
1172           } else {
1173             $column = $self -> {'column_head_indices'}{$column_head};
1174           }
1175         }
1176         $self -> flush if ( $self -> {'target'} eq 'disk' );
1177
1178         # In case anyone wonders, the ternary statment ( bool ? true :
1179         # false ) below will possibly make a minuscle memory
1180         # optimization. But hey, why not :)
1181
1182         $return_value = defined $self -> {'have_missing_data'} ? $self -> {'have_missing_data'} -> {$column} : 0;
1183       }
1184 end have_missing_data
1185 # }}} have_missing_data
1186
1187 # {{{ merge
1188 start merge
1189       {
1190         #$self -> synchronize;
1191         push( @{$self -> {'individuals'}}, @{$mergeobj -> individuals} );
1192       }
1193 end merge
1194 # }}} merge
1195
1196 # {{{ max
1197
1198 start max
1199       {
1200         # Either column or column_head must be specified. Column_head must be a string that
1201         # identifies a column in the (optional ) data file header.
1202
1203 # The if-statement below used to be a cache of allready calculated
1204 # means. But since individuals can be accessed in so many ways, we
1205 # don't know when this cache should be updated. Its easier to
1206 # recalculate the max. Maybe we can include this optimization in the
1207 # future, if it turns out to be a bottleneck
1208 #       my $tmp_column = $self -> {'column_head_indices'}{$column_head};
1209 #       if ( defined $self -> {'max'}[$tmp_column] ) {
1210 #         $return_value = $self -> {'max'}[$tmp_column] ;
1211 #       } else {
1212           $self -> synchronize;
1213           my $first_id = $self -> {'individuals'}[0];
1214           debug -> die( message => "data -> max: No individuals defined in data object based on " .
1215                         $self -> full_name ) unless defined $first_id;
1216
1217           my @data_row = split( /,/ , $first_id -> subject_data ->[0] );
1218
1219           unless ( defined $column  && defined( $data_row[$column-1] ) ) {
1220             unless (defined($column_head) && defined($self -> {'column_head_indices'}{$column_head})) {
1221               die "Error in data -> max: unknown column: \"$column_head\" or invalid column number: \"$column\"\n";
1222             } else {
1223               $column = $self -> {'column_head_indices'}{$column_head};
1224             }
1225           }
1226           foreach my $individual ( @{$self -> {'individuals'}} ) {
1227             my $ifactors = $individual -> factors( 'column' => $column );
1228             foreach ( keys %{$ifactors} ) {
1229               next if ( $_ == $self -> {'missing_data_token'} );
1230               if ( defined ($return_value) ) {
1231                 $return_value = $_ > $return_value ? $_ : $return_value;
1232               } else {
1233                 $return_value = $_;
1234               }
1235             }
1236           }
1237
1238 #         $self -> {'max'}[$column] = $return_value;
1239           $self -> flush if ( $self -> {'target'} eq 'disk' );
1240 #       }
1241       }
1242 end max
1243
1244 # }}} max
1245
1246 # {{{ min
1247
1248 start min
1249       {
1250         # See L</max>.
1251         my $tmp_column = $self -> {'column_head_indices'}{$column_head};
1252
1253 # The if-statement below used to be a cache of allready calculated
1254 # means. But since individuals can be accessed in so many ways, we
1255 # don't know when this cache should be updated. Its easier to
1256 # recalculate the min. Maybe we can include this optimization in the
1257 # future, if it turns out to be a bottleneck
1258 #       if ( defined $self -> {'min'}[$tmp_column] ) {
1259 #         $return_value = $self -> {'min'}[$tmp_column] ;
1260 #       } else {
1261           $self -> synchronize;
1262           my $first_id = $self -> {'individuals'}[0];
1263           die "data -> min: No individuals defined in data object based on ",
1264             $self -> full_name,"\n" unless defined $first_id;
1265
1266           my @data_row = split( /,/ , $first_id -> subject_data ->[0] );
1267
1268           unless ( defined $column  && defined( $data_row[$column-1] ) ) {
1269             unless (defined($column_head) && defined($self -> {'column_head_indices'}{$column_head})) {
1270               die "Error in data -> min: unknown column: \"$column_head\" or invalid column number: \"$column\"\n";
1271             } else {
1272               $column = $self -> {'column_head_indices'}{$column_head};
1273             }
1274           }
1275           foreach my $individual ( @{$self -> {'individuals'}} ) {
1276             my $ifactors = $individual -> factors( 'column' => $column );
1277             foreach ( keys %{$ifactors} ) {
1278               next if ( $_ == $self -> {'missing_data_token'} );
1279               if ( defined ($return_value) ) {
1280                 $return_value = $_ < $return_value ? $_ : $return_value;
1281               } else {
1282                 $return_value = $_;
1283               }
1284             }
1285           }
1286 #         $self -> {'min'}[$column] = $return_value;
1287           $self -> flush if ( $self -> {'target'} eq 'disk' );
1288 #       }
1289       }
1290 end min
1291
1292 # }}} min
1293
1294 # {{{ median
1295
1296 start median
1297       {
1298           # See L</max>.
1299         $self -> synchronize;
1300         my $first_id = $self -> {'individuals'}[0];
1301         die "data -> median: No individuals defined in data object based on ",
1302           $self -> full_name,"\n" unless defined $first_id;
1303
1304         my @data_row = split( /,/ , $first_id -> subject_data ->[0] );
1305
1306         unless ( defined $column  && defined( $data_row[$column-1] ) ) {
1307           unless(defined($column_head) && defined($self -> {'column_head_indices'}{$column_head})){
1308             die "Error in data -> median: unknown column: \"$column_head\" or invalid column number: \"$column\"\n";
1309           } else {
1310             $column = $self -> {'column_head_indices'}{$column_head};
1311           }
1312         }
1313
1314         if( defined $self -> {'median'}[$column] ){
1315           return $self -> {'median'}[$column];
1316         }
1317
1318         my @median_array;
1319
1320         foreach my $individual ( @{$self -> {'individuals'}} ) {
1321           if( $unique_in_individual ){
1322             my $ifactors = $individual -> factors( 'column' => $column );
1323
1324             foreach ( keys %{$ifactors} ) {
1325               next if ( $_ == $self -> {'missing_data_token'} );
1326               push( @median_array, $_ );
1327             }
1328           } else {
1329             my $ifactors = $individual -> subject_data;
1330
1331             for(my $i=0; $i<=$#{$ifactors}; $i++ ) {
1332               my @data_row = split( /,/ , $ifactors -> [$i] );
1333               next if ( $data_row[$column-1] == $self -> {'missing_data_token'} );
1334               push(@median_array, $data_row[$column-1]);
1335             }
1336           }
1337         }
1338         @median_array = sort {$a <=> $b} @median_array ;
1339
1340         if( @median_array % 2 ){
1341           $return_value = $median_array[$#median_array / 2];
1342         } else {
1343           $return_value = ( $median_array[@median_array / 2] +
1344                             $median_array[(@median_array - 2) / 2] ) / 2;
1345         }
1346
1347         $self -> {'median'}[$column] = $return_value;
1348       }
1349 end median
1350
1351 # }}} median
1352
1353 # {{{ mean
1354
1355 start mean
1356   {
1357      # Returns mean value of a column
1358      # If a individual contains more then 1 value (i.e. if an
1359      # individual has different values in different samples a mean
1360      # value of all individuals if calculate first, then the mean
1361      # value of the column If hi_cutoff is defined the mean function
1362      # will cut all value below the cutoff, and set their value to
1363      # 0. It's used to calculate the HI-mean/LOW-mean of a column for
1364      # e.g. Hockey-stick covariates If both hi_cutoff and low_cutoff
1365      # are defined only the hi_cutoff will be used.  See L</max>.
1366      my $tmp_column = $self -> {'column_head_indices'}{$column_head};
1367      $self -> synchronize;
1368      my $first_id = $self -> {'individuals'}[0];
1369        die "data -> mean: No individuals defined in data object based on ",
1370         $self -> full_name,"\n" unless defined $first_id;
1371
1372      my @data_row = split( /,/, $first_id -> subject_data ->[0] );
1373
1374      unless ( defined $column  && defined( $data_row[$column-1] ) ) {
1375        unless (defined($column_head) && defined($self -> {'column_head_indices'}{$column_head})) {
1376          die "Error in data -> mean: unknown column: \"$column_head\" or invalid column number: \"$column\"\n";
1377        } else {
1378               $column = $self -> {'column_head_indices'}{$column_head};
1379          }
1380      }
1381
1382         ## Here the calculation starts
1383         my $num_individuals = 0;
1384         my $sum = 0;
1385
1386      my $all_data_rows=0;
1387         foreach my $individual ( @{$self ->{'individuals'}} ) {
1388
1389           my $ifactors = $individual -> subject_data;
1390           my $individual_sum = 0;
1391           my $data_rows = 0;
1392           for(my $i=0; $i<=$#{$ifactors}; $i++ ) {
1393
1394             # data is stored in strings. We need to split them into an
1395             # array.
1396
1397             my @data_row = split( /,/, $ifactors -> [$i] );
1398             if ( $data_row[$column-1] == $self -> {'missing_data_token'} ) {
1399 #             print "Skipping row with missing data\n";
1400               next;
1401             }
1402
1403             if( defined $subset_column and not eval ( $data_row[$subset_column-1].$subset_syntax ) ) {
1404 #             print "Skipping row outside subset: syntax: ".($subset_column-1)." $subset_syntax\n";
1405               next;
1406             }
1407
1408             if (defined $hi_cutoff) {
1409               if ($data_row[$column-1]>$hi_cutoff) {
1410                 $individual_sum += $data_row[$column-1]-$hi_cutoff;
1411               }
1412             }
1413             else {
1414               if (defined $low_cutoff) {
1415                 if ($data_row[$column-1]<$low_cutoff) {
1416                   $individual_sum += $low_cutoff - $data_row[$column-1];
1417                 }
1418               }
1419               else {
1420                 $individual_sum += $data_row[$column-1];
1421               }
1422             }
1423             $data_rows++;
1424           }
1425           if( $global_mean ) {
1426             $sum += $individual_sum;
1427             $num_individuals += $data_rows;
1428           } else {
1429             if( $data_rows != 0 ) {
1430               $sum += $individual_sum/$data_rows;
1431             }
1432             $num_individuals ++;
1433           }
1434           $all_data_rows += $data_rows;
1435         }
1436      if( $num_individuals != 0 ) {
1437        $return_value = $sum / $num_individuals;
1438      }
1439 #     print "DR: $all_data_rows\n";
1440 #     print "\nNIM: $num_individuals $return_value\n";
1441 }
1442
1443 end mean
1444
1445 # }}} mean
1446
1447 # {{{ sd
1448
1449 start sd
1450   {
1451     # This sub returns standard deviation for a specific column
1452     # If there are more than one sample/individual the value used for that specific
1453     # individual is the mean value of its samples.
1454     # The cut-offs are for hockey stick variables. I.e. If one individual value is
1455     # lower than the hi-cutoff the individual value will be zero.
1456     # HI_cutoff is used to calculate the HI-mean of a column.
1457     # If cut_off is undef it won't be used
1458     # See L</max>.
1459      my $tmp_column = $self -> {'column_head_indices'}{$column_head};
1460      $self -> synchronize;
1461      my $first_id = $self -> {'individuals'}[0];
1462      debug -> die( message => "No individuals defined in data object based on ".
1463                    $self -> full_name ) unless defined $first_id;
1464
1465      my @data_row = split( /,/ , $first_id -> subject_data ->[0] );
1466
1467      unless ( defined $column  && defined( $data_row[$column-1] ) ) {
1468        unless (defined($column_head) && defined($self -> {'column_head_indices'}{$column_head})) {
1469          debug -> die( message => "Unknown column: \"$column_head\" or "
1470                        ."invalid column number: \"$column\"" );
1471        } else {
1472          $column = $self -> {'column_head_indices'}{$column_head};
1473        }
1474      }
1475
1476      ## Here the calculation starts
1477      my $num_individuals = 0;
1478      my $sum = 0;
1479      my $mean;
1480      if (defined $hi_cutoff) {
1481        $mean = $self->mean(column   => $column,
1482                            hi_cutoff => $hi_cutoff,
1483                            global_mean => $global_sd );
1484      } elsif (defined $low_cutoff) {
1485        $mean = $self->mean(column   => $column,
1486                            low_cutoff => $low_cutoff,
1487                            global_mean => $global_sd );
1488      } else {
1489        $mean = $self->mean( column        => $column,
1490                             subset_column => $subset_column,
1491                             subset_syntax => $subset_syntax,
1492                             global_mean => $global_sd );
1493      }
1494
1495      foreach my $individual ( @{$self -> {'individuals'}} ) {
1496        my $ifactors = $individual -> subject_data;
1497        my $individual_sum = 0;
1498        my $data_rows = 0;
1499        for(my $i=0; $i<=$#{$ifactors}; $i++ ) {
1500
1501          # data is stored in strings. We need to split them into an
1502          # array.
1503
1504          my @data_row = split( /,/, $ifactors -> [$i] );
1505
1506          if ( $data_row[$column-1] == $self -> {'missing_data_token'} ) {
1507 #          print "Skipping row with missing data\n";
1508            next;
1509          }
1510
1511          if( defined $subset_column and not eval ( $data_row[$subset_column-1].$subset_syntax ) ) {
1512 #          print "Skipping row outside subset: syntax: ".($subset_column-1)." $subset_syntax\n";
1513            next;
1514          }
1515
1516          if (defined $hi_cutoff) {
1517            if ($ifactors->[$i]->[$column-1]>$hi_cutoff) {
1518              if( $global_sd ) {
1519                $individual_sum += ($data_row[$column-1] - $hi_cutoff - $mean) ** 2;
1520              } else {
1521                $individual_sum += $data_row[$column-1]-$hi_cutoff;
1522              }
1523            }
1524          } else {
1525            if (defined $low_cutoff) {
1526              if ($ifactors->[$i]->[$column-1]<$low_cutoff) {
1527                if( $global_sd ) {
1528                  $individual_sum += ($low_cutoff - $data_row[$column-1] - $mean) ** 2;
1529                } else {
1530                  $individual_sum += $low_cutoff - $data_row[$column-1];
1531                }
1532              }
1533            } else {
1534              if( $global_sd ) {
1535                $individual_sum += ($data_row[$column-1] - $mean) ** 2;
1536              } else {
1537                $individual_sum += $data_row[$column-1];
1538              }
1539            }
1540          }
1541          $data_rows++;
1542        }
1543        if( $global_sd ) {
1544          $sum += $individual_sum;
1545          $num_individuals += $data_rows;
1546        } else {
1547          if( $data_rows != 0 ) {
1548            $sum += ($individual_sum/$data_rows - $mean) ** 2;
1549          }
1550          $num_individuals++;
1551        }
1552      }
1553      if( $num_individuals < 2 ) {
1554        $return_value = 0;
1555      } else {
1556        if( $num_individuals != 0 ) {
1557          $return_value = (1/($num_individuals-1)*$sum) ** 0.5;
1558        }
1559      }
1560    }
1561
1562 end sd
1563
1564 # }}} sd
1565
1566 # {{{ range
1567 start range
1568       {
1569         # See L</max>.
1570         my $tmp_column = $self -> {'column_head_indices'}{$column_head};
1571         if ( defined $self -> {'range'}[$tmp_column] ) {
1572           $return_value = $self -> {'range'}[$tmp_column];
1573         } else {
1574           my $old_target = $self -> {'target'};
1575           $self -> {'target'} = 'mem';
1576           $self -> synchronize;
1577           $return_value = $self -> max( column      => $column,
1578                                         column_head => $column_head ) -
1579                                           $self -> min( column      => $column,
1580                                                         column_head => $column_head );
1581           $self -> {'range'}[$column] = $return_value;
1582           if ( $old_target eq 'disk' ) {
1583             $self -> flush if ( $self -> {'target'} eq 'disk' );
1584             $self -> {'target'} = 'disk';
1585           }
1586         }
1587       }
1588 end range
1589 # }}} range
1590
1591 # {{{ recalc_column
1592 start recalc_column
1593       {
1594           # Recalculates a column based on expression. Also, see L</max>.
1595         $self -> synchronize;
1596
1597         # Check if $column(-index) is defined and valid, else try to find index using column_head
1598         my $first_id = $self -> {'individuals'}[0];
1599         die "data -> recalc_column: No individuals defined in data object based on ",
1600           $self -> full_name,"\n" unless defined $first_id;
1601
1602         my @data_row = split( /,/ , $first_id -> subject_data ->[0] );
1603
1604         unless ( defined $column  && defined( $data_row[$column-1] ) ) {
1605           if(defined($column_head) && defined($self -> {'column_head_indices'}{$column_head})){
1606             die "Error in data -> recalc_column: unknown column: \"$column_head\" or column number: \"$column\"\n";
1607           } else {
1608             $column = $self -> {'column_head_indices'}{$column_head};
1609           }
1610         }
1611
1612         for my $individual ( @{$self -> {'individuals'}} ) {
1613           $individual -> recalc_column( column     => $column,
1614                                         expression => $expression );
1615         }
1616       }
1617 end recalc_column
1618 # }}} recalc_column
1619
1620 # {{{ renumber_ascending
1621
1622 start renumber_ascending
1623       {
1624 # Renumbers the individuals (changes the subject identifiers) so that
1625 # all have unique integer numbers starting with start_at and
1626 # ascending. The primary use of this
1627 # method is not to order the individuals after their identifiers but to
1628 # ensure that all individuals have unique identifiers.
1629
1630         $self -> synchronize;
1631         foreach my $individual ( @{$self -> {'individuals'}} ) {
1632           $individual -> idnumber ( $start_at++ );
1633         }
1634         $self -> {'synced'} = 0;
1635       }
1636 end renumber_ascending
1637
1638 # }}} renumber_ascending
1639
1640 # {{{ renumber_descending
1641
1642 start renumber_descending
1643       {
1644           # See L</renumber_ascending>.
1645         $self -> synchronize;
1646         foreach my $individual ( @{$self -> {'individuals'}} ) {
1647           $individual -> idnumber ( $start_at-- );
1648         }
1649         $self -> {'synced'} = 0;
1650       }
1651 end renumber_descending
1652
1653 # }}} renumber_descending
1654
1655 # {{{ single_valued_data
1656
1657 start single_valued_data
1658       {
1659         # Usage:
1660         #
1661         # ($single_value_data_set, $remainder, $column_indexes) =
1662         #      $data_object -> single_valued_data( subset_name         => 'subset.dta',
1663         #                                          remainder_name      => 'remainder.dta',
1664         #                                          target              => 'disk',
1665         #                                          do_not_test_columns => [1..18,24,26];
1666         #
1667         # my $single_value_column_indexes = $column_indexes -> [0];
1668         # my $all_other_column_indexes    = $column_indexes -> [1];
1669         #
1670         # Analyses the content of each column, based on the
1671         # ID column, and returns two new data objects: One
1672         # that contains all columns that is has only one value per
1673         # individual and one that contains the
1674         # remainding data. This is useful for creating compact 'extra'
1675         # data sets that can be read in via user-defined sub-routines
1676         # when the number of columns needed exceeds the maximum that
1677         # NONMEM allows (e.g. 20 in NONMEM version V).
1678         #
1679         # The I<do_not_test_columns> argument specifies on which columns
1680         # to skip the single value test
1681
1682         my @multi_value_flags;
1683         my @individuals = @{$self -> {'individuals'}};
1684         # Initiate the flags:
1685         if ( defined $individuals[0] ) {
1686           my @data = @{$individuals[0] -> {'subject_data'}};
1687           my @data_row = split( /,/ , $data[0] );
1688           for ( my $i = 0; $i < scalar @data_row; $i++ ) {
1689             my $dnt_flag = 0;
1690             foreach my $dntc ( @do_not_test_columns ) {
1691               $dnt_flag = 1 if ( $i == $dntc - 1 );
1692             }
1693             $multi_value_flags[$i] = $dnt_flag;
1694           }
1695         } else {
1696           die "data -> single_valued_data: No data in ID number 1\n";
1697         }
1698         # Collect the stats
1699         for ( my $id = 0; $id <= $#individuals; $id++ ) {
1700           my @data = @{$individuals[$id] -> {'subject_data'}};
1701           my @data_row = split( /,/, $data[0] );
1702           for ( my $j = 0; $j < scalar @data_row; $j++ ) {
1703             my %col_unique;
1704             for ( my $i = 0; $i <= $#data; $i++ ) {
1705               my @data_row = split( /,/ , $data[$i] );
1706               $col_unique{$data_row[$j]}++;
1707             }
1708             my $factors = scalar keys %col_unique;
1709             $multi_value_flags[$j]++ if ( $factors > 1 );
1710           }
1711         }
1712         for ( my $i = 0; $i <= $#multi_value_flags; $i++ ) {
1713           if ( $multi_value_flags[$i] ) {
1714             push ( @{$column_indexes[1]}, $i + 1);
1715           } else {
1716             push ( @{$column_indexes[0]}, $i + 1);
1717           }
1718         }
1719         ( $single_value_data_set, $remainder ) =
1720           $self -> subset_vertically( column_indexes      => $column_indexes[0],
1721                                       subset_name         => $subset_name,
1722                                       return_remainder    => 1,
1723                                       remainder_name      => $remainder_name,
1724                                       target              => $target,
1725                                       keep_first_row_only => 1);
1726       }
1727 end single_valued_data
1728
1729 # }}}
1730
1731 # {{{ subset_vertically
1732
1733 start subset_vertically
1734       {
1735         # Usage:
1736         #
1737         # $subset = $data_object -> subset_vertically ( column_indexes => [1,2,6],
1738         #                                               subset_name    => 'subset.dta' );
1739         #
1740         # This basic usage returns a new data object containing
1741         # columns 1,2 and 6 from the original data plus the
1742         # idcolumn. The new data object will be associated with the
1743         # file 'subset.dta'.
1744         #
1745         # You get the remaining data, i.e. the original data minus
1746         # the created subset by specifying
1747         #
1748         # ( $subset, $remainder ) =
1749         #      $data_object -> subset_vertically ( column_indexes   => [1,2,6],
1750         #                                          subset_name      => 'subset.dta',
1751         #                                          return_remainder => 1,
1752         #                                          remainder_name   => 'remainder.dta' );
1753         #
1754         # If you would like to flush the created data sets to disk and
1755         # save memory, set the I<target> argument to 'disk'. The
1756         # default value 'mem' will keep the whole data object in RAM.
1757         #
1758         # The I<keep_first_row_only> argument can be used to reduce
1759         # the size of the subset data obejct by excluding all but the
1760         # first row of data from each individual.
1761         #
1762         my @individuals = @{$self -> {'individuals'}};
1763         # Create remainder index array if necessary
1764         my @remainder_indexes;
1765         if ( defined $individuals[0] ) {
1766           my @data = @{$individuals[0] -> {'subject_data'}};
1767           my $idcolumn = $individuals[0] -> {'idcolumn'};
1768 #         print "IC: $idcolumn\n";
1769           my $id_flag = 0;
1770           foreach my $use_index ( @column_indexes ) {
1771             $id_flag = 1 if ( $use_index == $idcolumn );
1772           }
1773           if ( $return_remainder ) {
1774 #           @remainder_indexes = ( $idcolumn );
1775             for ( my $i = 0; $i < scalar split(/,/,$data[0]); $i++ ) {
1776               my $rem_flag = 1;
1777               foreach my $use_index ( @column_indexes ) {
1778                 $rem_flag = 0 if ( $i == $use_index -1 );
1779 # or
1780 #                                  $i == $idcolumn -1 );
1781               }
1782               push( @remainder_indexes, $i + 1 ) if ( $rem_flag );
1783             }
1784             unshift( @remainder_indexes, $idcolumn ) if ( $id_flag );
1785           }
1786           unshift( @column_indexes, $idcolumn ) unless ( $id_flag );
1787         } else {
1788           die "data -> single_valued_data: No data in ID number 1\n";
1789         }
1790
1791 #       print "SS: @column_indexes\n";
1792 #       print "R : @remainder_indexes\n";
1793
1794         my @new_ids;
1795         my @new_ids_2;
1796         for ( my $id = 0; $id <= $#individuals; $id++ ) {
1797           my $idnumber = $individuals[$id] -> idnumber;
1798           my $idcolumn = $individuals[$id] -> idcolumn;
1799           my @data = @{$individuals[$id] -> {'subject_data'}};
1800           my @new_data;
1801           my @new_data_2;
1802           my $use_rows = $keep_first_row_only ? 0 : $#data;
1803           for ( my $i = 0; $i <= $use_rows; $i++ ) {
1804             my @new_row;
1805             my @data_row = split( /,/, $data[$i] );
1806             foreach my $use_index ( @column_indexes ) {
1807               push( @new_row, $data_row[$use_index-1] );
1808             }
1809 #           print "@new_row $#new_row\n";
1810             push( @new_data, join( ',', @new_row ) );
1811           }
1812           for ( my $i = 0; $i <= $#data; $i++ ) {
1813             if ( $return_remainder ) {
1814               my @new_row_2;
1815               my @data_row = split( /,/, $data[$i] );
1816               foreach my $use_index ( @remainder_indexes ) {
1817                 push( @new_row_2, $data_row[$use_index-1] );
1818               }
1819 #             print "@new_row_2 $#new_row_2\n";
1820               push( @new_data_2, join( ',' , @new_row_2 ) );
1821             }
1822           }
1823           my $new_id = data::individual -> new( idnumber     => $idnumber,
1824                                                 idcolumn     => $idcolumn,
1825                                                 subject_data => \@new_data );
1826           push( @new_ids, $new_id );
1827           if ( $return_remainder ) {
1828             my $new_id_2;
1829             $new_id_2 = data::individual -> new( idnumber     => $idnumber,
1830                                                  idcolumn     => $idcolumn,
1831                                                  subject_data => \@new_data_2 );
1832           push( @new_ids_2, $new_id_2 );
1833           }
1834         }
1835         my @header = @{$self -> {'header'}};
1836         my @new_header;
1837         foreach my $use_index ( @column_indexes ) {
1838           push( @new_header, @header[$use_index-1] );
1839         }
1840         my $comment;
1841         if( defined $self -> {'comment'} ){
1842           my @comment = @{$self -> {'comment'}};
1843           $comment = \@comment;
1844         }
1845         $subset = data -> new ( filename             => $subset_name,
1846                                 directory            => $self -> {'directory'},
1847                                 ignoresign           => $self -> {'ignoresign'},
1848                                 header               => \@new_header,
1849                                 comment              => $comment,
1850                                 individuals          => \@new_ids,
1851                                 target               => $target,
1852                                 ignore_missing_files => 1 );
1853         if ( $return_remainder ) {
1854           my @new_header_2;
1855           foreach my $use_index ( @remainder_indexes ) {
1856             push( @new_header_2, @header[$use_index-1] );
1857           }
1858           $remainder = data -> new ( filename             => $remainder_name,
1859                                      directory            => $self -> {'directory'},
1860                                      ignoresign           => $self -> {'ignoresign'},
1861                                      header               => \@new_header_2,
1862                                      comment              => $comment,
1863                                      individuals          => \@new_ids_2,
1864                                      target               => $target,
1865                                      ignore_missing_files => 1 );
1866         }
1867       }
1868 end subset_vertically
1869
1870 # }}}
1871
1872 # {{{ subsets
1873
1874 start subsets
1875       {
1876 #       if ( defined $expression and defined $bins ) {
1877 #         die "data -> subset: expression and bins may not both be specified\n";
1878 #       }
1879 #       if ( not ( defined $expression or defined $bins ) ) {
1880 #         die "data -> subset: expression or bins must be specified\n";
1881 #       }
1882         $self -> synchronize;
1883         my @header  = @{$self -> {'header'}};
1884         my @comment = defined $self -> {'comment'} ? @{$self -> {'comment'}} : ();
1885         my @subset_ids= ();
1886         my %rnd_ids;
1887         my $key = 0;
1888         my @ids = @{$self -> {'individuals'}};
1889         if ( defined $stratify_on ) {
1890           my $work_data = $self -> copy( filename => 'work_data.dta',
1891                                          target   => 'mem' );
1892           my %strata = %{$work_data -> factors( column => $stratify_on )};
1893 #         $Data::Dumper::Maxdepth = 1;
1894 #         print Dumper \%strata;
1895
1896           while ( my ( $factor, $keys ) = each %strata ) {
1897               foreach my $key ( @{$keys} ) {
1898                   my $rnd_num = rand;
1899                   while ( defined $rnd_ids{$factor}{$rnd_num} ) {
1900                       $rnd_num = rand;
1901                   }
1902                   $rnd_ids{$factor}{$rnd_num} = $ids[$key];
1903               }
1904           }
1905           my $first = 1;
1906           while ( my ( $factor, $rnd_nums ) = each %rnd_ids ) {
1907               my @sort_rnd_nums = sort { $a <=> $b } keys %{$rnd_nums};
1908               for ( my $i = 0; $i <= $#sort_rnd_nums; $i ) {
1909                   for ( my $j = 0; $j < $bins; $j++ ) {
1910                       if ( $first ) {
1911                           push( @subset_ids, [$rnd_ids{$factor}{$sort_rnd_nums[$i]} -> copy] );
1912                           push( @incl_ids, [$rnd_ids{$factor}{$sort_rnd_nums[$i]} -> idnumber] );
1913                       } else {
1914                           push( @{$subset_ids[$j]}, $rnd_ids{$factor}{$sort_rnd_nums[$i]} -> copy );
1915                           push( @{$incl_ids[$j]}, $rnd_ids{$factor}{$sort_rnd_nums[$i]} -> idnumber );
1916                       }
1917                       $i++;
1918                       last if $i > $#sort_rnd_nums;
1919                   }
1920                   $first = 0;
1921               }
1922           }
1923           for ( my $j = 0; $j < $bins; $j++ ) {
1924             my $sdata = data -> new ( header               => \@header,
1925                                       comment              => \@comment,
1926                                       ignoresign           => $self -> {'ignoresign'},
1927                                       individuals          => $subset_ids[$j],
1928                                       ignore_missing_files => 1,
1929                                       target               => 'disk',
1930                                       idcolumn             => $self -> {'idcolumn'},
1931                                       filename             => "subset_$j.dta" );
1932             #$sdata -> _write;
1933             push( @subsets, $sdata );
1934           }
1935         } else {
1936           for ( my $i = 0; $i <= $#ids; $i++ ) {
1937             my $rnd_num = rand;
1938             while ( defined $rnd_ids{$rnd_num} ) {
1939               $rnd_num = rand;
1940             }
1941             $rnd_ids{$rnd_num} = $ids[$i];
1942           }
1943           my @keys = sort { $a <=> $b } keys %rnd_ids;
1944           my $first = 1;
1945           for ( my $i = 0; $i <= $#keys; $i ) {
1946             for ( my $j = 0; $j < $bins; $j++ ) {
1947               if ( $first ) {
1948                 push( @subset_ids, [$rnd_ids{$keys[$i]} -> copy] );
1949                 push( @incl_ids, [$rnd_ids{$keys[$i]} -> idnumber] );
1950               } else {
1951                 push( @{$subset_ids[$j]}, $rnd_ids{$keys[$i]} -> copy );
1952                 push( @{$incl_ids[$j]}, $rnd_ids{$keys[$i]} -> idnumber );
1953               }
1954               $i++;
1955               last if $i > $#keys;
1956             }
1957             $first = 0;
1958           }
1959           for ( my $j = 0; $j < $bins; $j++ ) {
1960             my $sdata = data -> new ( header               => \@header,
1961                                       comment              => \@comment,
1962                                       ignoresign           => $self -> {'ignoresign'},
1963                                       individuals          => $subset_ids[$j],
1964                                       ignore_missing_files => 1,
1965                                       target               => $target,
1966                                       idcolumn             => $self -> {'idcolumn'},
1967                                       filename             => "subset_$j.dta" );
1968             #$sdata -> _write;
1969             push( @subsets, $sdata );
1970           }
1971         }
1972       }
1973 end subsets
1974
1975 # }}} subsets
1976
1977 # {{{ subset
1978
1979 start subset
1980       {
1981         $self -> synchronize;
1982         my @header  = @{$self -> {'header'}};
1983         my @comment = defined $self -> {'comment'} ? @{$self -> {'comment'}} : ();
1984         my @subset_inds = ();
1985         my $key = 0;
1986         foreach my $individual ( @{$self -> {'individuals'}} ) {
1987           if ( $individual -> evaluate_expression( column     => $based_on,
1988                                                    expression => $expression ) ) {
1989             push( @subset_inds, $individual -> copy );
1990             push( @incl_individuals, $individual -> idnumber );
1991             push( @included_keys, $key );
1992           }
1993           $key++;
1994         }
1995         $subset = data -> new ( header      => \@header,
1996                                 comment     => \@comment,
1997                                 ignoresign  => $self -> {'ignoresign'},
1998                                 individuals => \@subset_inds,
1999                                 idcolumn    => $self -> {'idcolumn'},
2000                                 filename    => "subset.dta" );
2001       }
2002 end subset
2003
2004 # }}} subset
2005
2006 # {{{ target
2007
2008 start target
2009       {
2010         if ( $parm eq 'disk' and $self -> {'target'} eq 'mem' ) {
2011           $self -> {'target'} = 'disk';
2012           $self -> flush;
2013         } elsif ( $parm eq 'mem' and $self -> {'target'} eq 'disk' ) {
2014           $self -> {'target'} = 'mem';
2015           $self -> synchronize;
2016         }
2017       }
2018 end target
2019
2020 # }}}
2021
2022 # {{{ _write
2023
2024 start _write
2025     {
2026       die "ERROR: data -> _write: No filename set in data object.\n"
2027           if( $filename eq '' );
2028
2029 #      $Data::Dumper::Maxdepth = 2;
2030 #      die Dumper $self -> {'individuals'};
2031
2032       if( not defined $self -> {'individuals'} ){
2033
2034         # If we don't have any individuals and write to a new
2035         # filename, we must first read individuals from the old
2036         # file. A call to synchronize will do that. There is no risk
2037         # of a infinite loop here since synchronize allways writes to
2038         # "full_name".
2039
2040         unless( $filename eq $self -> full_name ){
2041           $self -> synchronize;
2042         }
2043       }
2044
2045       open(FILE,">$filename") ||
2046           die "Could not create $filename\n";
2047       my $data_ref = $self -> format_data;
2048       my @data = @{$data_ref};
2049       for ( @data ) {
2050         print ( FILE );
2051       }
2052       close(FILE);
2053
2054 #       if ( $PsN::config -> {'_'} -> {'use_database'} and
2055 #          $self -> {'use_data_table'} ) {
2056 #       # Backslashes messes up the sql syntax
2057 #       my $file_str = $self->{'filename'};
2058 #       my $dir_str = $self->{'directory'};
2059 #       $file_str =~ s/\\/\//g;
2060 #       $dir_str =~ s/\\/\//g;
2061
2062 #       # md5sum
2063 #       my $md5sum = md5_hex(OSspecific::slurp_file($self-> full_name ));
2064 #       my ( $date_str, $time_str );
2065 #       if ( $Config{osname} eq 'MSWin32' ) {
2066 #         $date_str = `date /T`;
2067 #         $time_str = ' '.`time /T`;
2068 #       } else {
2069 #         # Assuming UNIX
2070 #         $date_str = `date`;
2071 #       }
2072 #       chomp($date_str);
2073 #       chomp($time_str);
2074 #       my $date_time = $date_str.$time_str;
2075 #       my $dbh = DBI -> connect("DBI:mysql:host=".$PsN::config -> {'_'} -> {'database_server'}.
2076 #                              ";databse=".$PsN::config -> {'_'} -> {'project'},
2077 #                                $PsN::config -> {'_'} -> {'user'},
2078 #                               $PsN::config -> {'_'} -> {'password'},
2079 #                                {
2080 #                                 'RaiseError' => 1});
2081 #       my $sth;
2082 #       if ( defined $self -> {'data_id'} ) {
2083 #         $sth = $dbh -> prepare( "UPDATE ".$PsN::config -> {'_'} -> {'project'}.
2084 #                              ".data ".
2085 #                                 "SET filename='$file_str',date='$date_time',".
2086 #                                 "directory='$dir_str',md5sum='$md5sum' ".
2087 #                                 "WHERE data_id='".$self -> {'data_id'}."'" );
2088 #         $sth -> execute or debug -> die( message => $sth->errstr ) ;
2089 #       } else {
2090 #         $sth = $dbh -> prepare("INSERT INTO ".$PsN::config -> {'_'} -> {'project'}.
2091 #                              ".data (filename,date,directory,md5sum) ".
2092 #                                "VALUES ('$file_str', '$date_time', '$dir_str','".
2093 #                                $md5sum."' )");
2094 #         $sth -> execute;
2095 #         $self -> {'data_id'} = $sth->{'mysql_insertid'};
2096 #       }
2097 #       $sth -> finish;
2098 #       $dbh -> disconnect;
2099 #       }
2100     }
2101 end _write
2102
2103 # }}} _write
2104
2105 # {{{ flush
2106 start flush
2107     {
2108       # synchronizes the object with the file on disk and empties
2109       # most of the objects attributes to save memory.
2110       if( defined $self -> {'individuals'} and
2111           ( !$self -> {'synced'} or $force ) ) {
2112         $self -> _write;
2113       }
2114 #      $self -> {'header'} = undef;
2115       $self -> {'comment'} = undef;
2116       $self -> {'individuals'} = undef;
2117       $self -> {'synced'} = 0;
2118       $self -> {'column_head_indices'} = undef;
2119       $self -> {'have_missing_data'} = undef;
2120     }
2121 end flush
2122 # }}} flush
2123
2124 # {{{ synchronize
2125
2126 start synchronize
2127     {
2128       # synchronizes the object with the file on disk
2129       unless( $self -> {'synced'} ){
2130         if( defined $self -> {'individuals'} and
2131             scalar @{$self -> {'individuals'}} > 0 ){
2132           # We should not read new data from file if we
2133           # have an individuals defined?
2134           # Perhaps there should be an attribute
2135           # 'from_file' that overrides this and reads in
2136           # the data from the file specified in filename
2137           # and overwrites whatever the object already
2138           # contains?
2139 #           if( -e $self -> {'filename'} ){
2140 #             $self -> _read_header;
2141 #             $self -> _read_individuals;
2142 #           }
2143           $self -> _write;
2144         } else {
2145           if( -e $self -> full_name ){
2146             unless( defined $self -> {'header'} and scalar @{$self -> {'header'}} > 0 ){
2147               $self -> _read_header;
2148             }
2149             $self -> _read_individuals;
2150           } else {
2151             debug -> die( message => "Fatal error: datafile: " . $self -> full_name . " does not exist." );
2152             return;
2153           }
2154         }
2155       }
2156       my $i = 1;
2157       foreach my $head ( @{$self -> {'header'}} ){
2158         $self -> {'column_head_indices'} -> {$head} = $i;
2159         $i++;
2160       }
2161       $self -> {'synced'} = 1;
2162     }
2163 end synchronize
2164
2165 # }}} synchronize
2166
2167 # {{{ _fisher_yates_shuffle
2168
2169 start _fisher_yates_shuffle
2170       {
2171         my $arr_ref = $parm{'array'};
2172         debug -> warn( level   => 1,
2173                        message => "Array of zero length received" )
2174           if ( scalar @{$arr_ref} < 1 );
2175         my $i;
2176         for ($i = @$arr_ref; --$i; ) {
2177           my $j = random_uniform_integer(1,0,$i);
2178 #          my $j = int rand ($i+1);
2179 #         print "$j $j_new\n";
2180           @$arr_ref[$i,$j] = @$arr_ref[$j,$i];
2181         }
2182       }
2183 end _fisher_yates_shuffle
2184
2185 # }}} _fisher_yates_shuffle
2186
2187 # {{{ _read_header
2188
2189 start _read_header
2190       {
2191         my $filename   = $self -> full_name;
2192         my $ignoresign = $self -> ignoresign;
2193         my ( @data, @new_record, $row, $tmp_row, @header, $hdrstring );
2194
2195         open(DATAFILE,"$filename") ||
2196           die "Could not open $filename for reading";
2197         my $columns;
2198         while (<DATAFILE>) {
2199           $tmp_row    = $_;
2200           #     @new_record = split(/\,|\s+/,$_);
2201           if ( ! (/^\s*\d+|^\s*\./) ) {
2202             $data[$row] = $tmp_row;
2203             $row++;
2204           } else {
2205             # We have reached the first data-row, return.
2206             $columns = scalar split(/\,\s*|\s+/);
2207             last;
2208           }
2209         }
2210         close(DATAFILE);
2211
2212         if ( defined $self -> {'cont_column'} and not $self -> {'table_file'} ) {
2213           my $data_len = $#data;
2214           for ( my $i = $data_len; $i >= 0; $i-- ) {
2215             my @arr = split(/\,\s*|\s+/,$data[$i]);
2216             if ( $arr[$self -> {'cont_column'}-1] eq 'CONT' ) {
2217               my $start = $i == $data_len ? 0 : 1;
2218               for ( my $j = $start; $j <= $#arr; $j++ ) {
2219                 if ( $j != ($self -> {'cont_column'}-1) ) {
2220                   push( @header, $arr[$j] );
2221                 }
2222               }
2223               pop( @data );
2224             }
2225           }
2226           $header[0] =~ s/$ignoresign//
2227               if ( defined $self->ignoresign );
2228           shift( @header ) if ( $header[0] eq "" );
2229         } else {
2230           chomp( $hdrstring = pop(@data));
2231           @header = split(/\,\s*|\s+/,$hdrstring);
2232           $header[0] =~ s/$ignoresign//
2233               if ( defined $self->ignoresign );
2234           shift( @header ) if ( $header[0] eq "" );
2235           if( $self -> {'table_file'} ) {
2236             my @new_header;
2237             for( my $i = 1; $i <= scalar @header; $i++ ) {
2238               if( $header[$i-1] eq 'CONT' ) {
2239                 if ( defined $self -> {'cont_column'} and not $i == $self -> {'cont_column'} ) {
2240                   debug -> warn( level   => 1,
2241                                  message => "The supplied columns for the CONT data item (".
2242                                  $self -> {'cont_column'}.") does not match the column where the CONT ".
2243                                  "header was found ($i), using $i" );
2244                 }
2245                 $self -> {'cont_column'} = $i;
2246               } else {
2247                 push( @new_header, $header[$i-1] );
2248               }
2249             }
2250             @header = @new_header;
2251             for( my $i = 1; $i <= scalar @header; $i++ ) {
2252               if( $header[$i-1] eq 'ID' ) {
2253                 if ( defined $self -> {'idcolumn'} and not $i == $self -> {'idcolumn'} ) {
2254                   debug -> warn( level   => 1,
2255                                  message => "The supplied columns for the ID data item (".
2256                                  $self -> {'idcolumn'}.") does not match the column where the CONT ".
2257                                  "header was found ($i), using $i" );
2258                 }
2259                 $self -> {'idcolumn'} = $i;
2260               }
2261             }
2262           }
2263         }
2264
2265 # I'm not certain on how to deal with this conflict. I'm leaving it commented because I believe this code should not be here.
2266 #
2267 #<<<<<<< data_subs.pm
2268 #       $header[0] =~ s/$ignoresign//
2269 #         if ( defined $self->ignoresign );
2270 #       shift( @header ) if ( $header[0] eq "" );
2271 #=======
2272 #>>>>>>> 1.28
2273
2274 # It is ok with data sets without a header.
2275 #       unless( scalar @header > 0 ){ debug -> die( message => 'Datafile ' . $self -> full_name . ' is empty.' ); }
2276
2277         $self -> {'header'} = \@header;
2278         $self -> {'comment'} = \@data;
2279 #       if ( $PsN::config -> {'_'} -> {'use_database'} and
2280 #            $self -> {'use_data_table'} ) {
2281 #         my $dbh = DBI -> connect("DBI:mysql:host=".$PsN::config -> {'_'} -> {'database_server'}.
2282 #                              ";databse=".$PsN::config -> {'_'} -> {'project'},
2283 #                                  $PsN::config -> {'_'} -> {'user'},
2284 #                               $PsN::config -> {'_'} -> {'password'},
2285 #                                  {'RaiseError' => 1});
2286 #         if ( scalar @header < 1 ) {
2287 #           for ( my $i = 1; $i <= $columns; $i++ ) {
2288 #             push( @header, $i );
2289 #           }
2290 #         }
2291 #         for ( my $i = 0; $i <= $#header; $i++ ) {
2292 #           my $sth = $dbh -> prepare("INSERT INTO ".$PsN::config -> {'_'} -> {'project'}.
2293 #                              ".data_column ".
2294 #                                     "(name,number,data_id) ".
2295 #                                     "VALUES ('".$header[$i]."', '".($i+1).
2296 #                                     "', '".$self -> {'data_id'}."' )");
2297 #           $sth -> execute;
2298 #           push( @{$self -> {'data_column_ids'}}, $sth->{'mysql_insertid'} );
2299 #           $sth -> finish;
2300 #         }
2301 #         $dbh -> disconnect;
2302 #       }
2303       }
2304 end _read_header
2305
2306 # }}} _read_header
2307
2308 # {{{ _read_individuals
2309
2310 start _read_individuals
2311       {
2312         my $idcol        = $self -> idcolumn;
2313         my $filename     = $self -> full_name;
2314         debug -> warn( level   => 1,
2315                        message => "Building array of individuals from file " . $self -> {'filename'} );
2316         open(DATAFILE,"$filename") ||
2317           die "Could not open $filename for reading";
2318         my ( @new_row, $new_ID, $old_ID, @init_data );
2319         my $buffer;
2320         my $lines = 0;
2321         while (sysread DATAFILE, $buffer, 4096) {
2322           $lines += ($buffer =~ tr/\n//);
2323         }
2324         seek( DATAFILE, 0,0 );
2325
2326         # For status bar:
2327         my $status_bar = status_bar -> new( steps => $lines );
2328
2329         ui -> print( category => 'scm',
2330                      message  => "Reading data file: ".$self -> filename );
2331         ui -> print( category => 'scm',
2332                      message => $status_bar -> print_step(),
2333                      newline => 0);
2334
2335         my ( $sth, $dbh, $first_row_id, $first_value_id );
2336         my $insert = 1;
2337 #       if ( $PsN::config -> {'_'} -> {'use_database'} and
2338 #            $self -> {'use_data_table'} ) {
2339 #         $dbh = DBI -> connect("DBI:mysql:host=".$PsN::config -> {'_'} -> {'database_server'}.
2340 #                              ";databse=".$PsN::config -> {'_'} -> {'project'},
2341 #                               $PsN::config -> {'_'} -> {'user'},
2342 #                               $PsN::config -> {'_'} -> {'password'},
2343 #                               {'RaiseError' => 1});
2344 #         my $sth = $dbh -> prepare( "SELECT data_row_id FROM ".$PsN::config -> {'_'} -> {'project'}.
2345 #                              ".data_row ".
2346 #                                    "WHERE data_id='".$self -> {'data_id'}."'" );
2347 #         $sth -> execute or debug -> die( message => $sth->errstr ) ;
2348 #         my $select_arr = $sth -> fetchall_arrayref;
2349 #         if ( scalar @{$select_arr} > 0 ) {
2350 #           for ( my $i = 0; $i < scalar @{$select_arr}; $i++ ) {
2351 #             push( @{$self -> {'data_row_ids'}}, $select_arr->[$i][0] );
2352 #           }
2353 #           $sth = $dbh -> prepare( "SELECT data_value_id FROM ".$PsN::config -> {'_'} -> {'project'}.
2354 #                              ".data_value ".
2355 #                                   "WHERE data_id='".$self -> {'data_id'}."'" );
2356 #           $sth -> execute or debug -> die( message => $sth->errstr ) ;
2357 #           my $select_val = $sth -> fetchall_arrayref;
2358 #           for ( my $i = 0; $i < scalar @{$select_val}; $i++ ) {
2359 #             push( @{$self -> {'data_value_ids'}}, $select_val->[$i][0] );
2360 #           }
2361 #           $insert = 0;
2362 #           $dbh -> disconnect;
2363 #         } else {
2364 #           $dbh -> do( "LOCK TABLES ".$PsN::config -> {'_'} -> {'project'}.
2365 #                              ".data_row WRITE, ".$PsN::config -> {'_'} -> {'project'}.
2366 #                              ".data_value WRITE" );
2367 #           $sth = $dbh -> prepare( "SELECT MAX(data_row_id) FROM ".$PsN::config -> {'_'} -> {'project'}.
2368 #                              ".data_row" );
2369 #           $sth -> execute or debug -> die( message => $sth->errstr ) ;
2370 #           my $select_arr = $sth -> fetchall_arrayref;
2371 #           $first_row_id = defined $select_arr -> [0][0] ? $select_arr -> [0][0] : 0;
2372 #           $sth = $dbh -> prepare( "SELECT MAX(data_value_id) FROM ".$PsN::config -> {'_'} -> {'project'}.
2373 #                              ".data_value" );
2374 #           $sth -> execute or debug -> die( message => $sth->errstr ) ;
2375 #           my $select_arr = $sth -> fetchall_arrayref;
2376 #           $first_value_id = defined $select_arr -> [0][0] ? $select_arr -> [0][0] : 0;
2377 #         }
2378 #         $sth -> finish;
2379 #       }
2380
2381         my $insert_rows;
2382         my $insert_values;
2383         my $row_counter = 0;
2384         my $full_row;
2385       ROW: while ( <DATAFILE> ) {
2386           s/^ *//;
2387           my @new_row   = split(/\,\s*|\s+/);
2388           # This regexp check is not time consuming.
2389           if ( /^\s*\d+|^\s*\./ ) {
2390             if ( defined $self -> {'cont_column'} ) {
2391               if ( $new_row[$self -> {'cont_column'} - 1] == 1 ) {
2392                 if ( not $self -> {'table_file'} ) { # Skip the CONT=1 rows if this is a table file
2393                   for ( my $i = $#new_row; $i > 0; $i-- ) {
2394                     if ( $i != ($self -> {'cont_column'} - 1) ) {
2395                       unshift( @{$full_row}, $new_row[$i] );
2396                     }
2397                   }
2398                 }
2399                 next ROW;
2400               } else {
2401                 for ( my $i = $#new_row; $i >= 0; $i-- ) {
2402 #                 if ( $i != ($self -> {'cont_column'} - 1) or $self -> {'table_file'} ) {
2403                   if ( $i != ($self -> {'cont_column'} - 1) ) {
2404                     unshift( @{$full_row}, $new_row[$i] );
2405                   }
2406                 }
2407               }
2408             } else {
2409               @{$full_row} = @new_row;
2410             }
2411             $new_ID = $full_row -> [$idcol-1]; # index starts at 0
2412             $old_ID = $new_ID if ( not defined $old_ID );
2413
2414             # Check if column miss data at some row (This adds about 30% of init time)
2415             my $mdt = $self -> {'missing_data_token'};
2416             for( my $i = 0; $i <= $#{$full_row}; $i++ ){
2417                 $self -> {'have_missing_data'} -> {$i+1} = 1
2418                   if( $full_row -> [$i] == $mdt ); # == is slower but safer than eq
2419             }
2420 #           if ( $PsN::config -> {'_'} -> {'use_database'} and
2421 #                $self -> {'use_data_table'} and $insert ) {
2422 #             $row_counter++;
2423 #             $insert_rows = $insert_rows."," if ( defined $insert_rows );
2424 #             $insert_rows = $insert_rows.
2425 #               "('$row_counter', '".$self -> {'data_id'}."' )";
2426 #             for ( my $j = 0; $j <= $#{$full_row}; $j++ ) {
2427 #               $insert_values = $insert_values."," if ( defined $insert_values );
2428 #               $insert_values = $insert_values.
2429 #                 "('".$full_row -> [$j]."', '".
2430 #                   ($first_row_id+$row_counter)."', '".
2431 #                     $self -> {'data_column_ids'}->[$j].
2432 #                       "', '".$self -> {'data_id'}."' )";
2433 #             }
2434 #           }
2435
2436             if ( $new_ID != $old_ID ) {
2437               my @subject_data = @init_data;
2438               my $id = data::individual -> new ( idcolumn     => $idcol,
2439                                                  subject_data => \@subject_data,
2440                                                  data_id      => $self -> {'data_id'} );
2441               push( @{$self -> {'individuals'}}, $id );
2442               @init_data =(join( ",", @{$full_row}));
2443             } else {
2444               push( @init_data, join( ",", @{$full_row}) );
2445             }
2446             $old_ID = $new_ID;
2447             $full_row = undef;
2448           }
2449           if ( $status_bar -> tick() ) {
2450             ui -> print( category => 'scm',
2451                          message  => $status_bar -> print_step(),
2452                          wrap     => 0,
2453                          newline  => 0 );
2454           }
2455         }
2456
2457 #       if ( $PsN::config -> {'_'} -> {'use_database'} and
2458 #            $self -> {'use_data_table'} and $insert ) {
2459 #         $dbh -> do("INSERT INTO ".$PsN::config -> {'_'} -> {'project'}.
2460 #                    ".data_row ".
2461 #                    "(number,data_id) ".
2462 #                    "VALUES ".$insert_rows);
2463 #         push( @{$self -> {'data_row_ids'}}, ($first_row_id..$first_row_id+$row_counter) );
2464 #         $dbh -> do( "INSERT INTO ".$PsN::config -> {'_'} -> {'project'}.
2465 #                     ".data_value ".
2466 #                     "(value,data_row_id,data_column_id,data_id) ".
2467 #                     "VALUES ".$insert_values );
2468 #         push( @{$self -> {'data_value_ids'}},
2469 #               ($first_value_id..$first_value_id+($row_counter*
2470 #                                                  scalar @{$self->{'data_column_ids'}})));
2471 #         $dbh -> do( "UNLOCK TABLES" );
2472 #         $dbh -> disconnect;
2473 #       }
2474
2475         if ( $#init_data >= 0 ) {
2476           push( @{$self -> {'individuals'}},
2477                 data::individual -> new ( idcolumn     => $idcol,
2478                                           subject_data => \@init_data ) );
2479         }
2480         ui -> print( category => 'scm',
2481                      message  => " ... done" );
2482         close(DATAFILE);
2483 #       $self -> _write( filename => 'test.dta' );
2484       }
2485 end _read_individuals
2486
2487 # }}} _read_individuals
2488