lib/tool/bootstrap_subs.pm

   1 # {{{ include
   2
   3 start include statements
   4 use strict;
   5 use File::Copy 'cp';
   6 use data;
   7 use OSspecific;
   8 use tool::llp;
   9 use tool::cdd::jackknife;
  10 use ext::Statistics::Distributions 'udistr', 'uprob';
  11 use Math::Random;
  12 use Data::Dumper;
  13 end include
  14
  15 # }}} include statements
  16
  17 # {{{ new
  18
  19 start new
  20       {
  21         foreach my $attribute ( 'logfile', 'raw_results_file','raw_nonp_file' ) {
  22           if ( not( ref($this -> {$attribute}) eq 'ARRAY' or
  23                     ref($this -> {$attribute}) eq 'HASH' ) ) {
  24             my $tmp = $this -> {$attribute};
  25             $this -> {$attribute} = [];
  26             for ( my $i = 1; $i <= scalar @{$this -> {'models'}}; $i++ ) {
  27               my $name = $tmp;
  28               if ( $name =~ /\./ ) {
  29                 $name =~ s/\./$i\./;
  30               } else {
  31                 $name = $name.$i;
  32               }
  33               my $ldir;
  34               ( $ldir, $name ) =
  35                 OSspecific::absolute_path( $this -> {'directory'}, $name );
  36               push ( @{$this -> {$attribute}}, $ldir.$name ) ;
  37             }
  38           }
  39         }
  40       }
  41 end new
  42
  43 # }}} new
  44
  45 # {{{ general_pre_fork_setup
  46
  47 start general_pre_fork_setup
  48     {
  49       # These attributes can be given as a
  50       # 1. A scalar       : used for all models and problems (subjects can be a scalar ref to a hash)
  51       # 2. A 1-dim. array : specified per problem but same for all models
  52       # 3. A 2-dim. array : specified per problem and model
  53
  54       foreach my $attribute_string( 'stratify_on', 'samples', 'subjects' ){
  55         my $attribute = $self -> {$attribute_string};
  56         if ( defined $attribute ) {
  57           # If stratification is given we check at what level it is
  58           # given.
  59           unless ( ref( \$attribute ) eq 'SCALAR' or
  60                    ref( \$attribute ) eq 'REF' or
  61                    ( ref( $attribute ) eq 'ARRAY' and scalar @{$attribute} > 0 ) ) {
  62             # Here we know its neither scalar or a correct array. But we
  63             # seem to assume the lenght be equal to the number of
  64             # models.
  65             debug -> die( message => "attribute $attribute_string is " .
  66                           "defined as a " . ref( $attribute ) .
  67                           " but is neither a scalar or a non-zero size array" );
  68           } elsif ( ref( \$attribute ) eq 'SCALAR' or ref( \$attribute ) eq 'REF' ) {
  69             # If it is a scalar we copy the scalar value into an array
  70             # for each model, with one value for each problem.
  71             my @mo_attribute = ();
  72             foreach my $model ( @{$self -> {'models'}} ) {
  73               my @pr_attribute = ();
  74               foreach my $problem ( @{$model -> problems}  ) {
  75                 push( @pr_attribute, $attribute );
  76               }
  77               push( @mo_attribute, \@pr_attribute );
  78             }
  79             $self -> {$attribute_string} = \@mo_attribute;
  80           } elsif ( ref( $attribute ) eq 'ARRAY' ) {
  81             # If it is an array we check if the it is an array of
  82             # scalars or of arrays. If it is of arrays we seem to
  83             # assume that the arrays have a length the matches the
  84             # number of problems. It it is an array of scalars, we
  85             # copy that scalar into arrays for each problem.
  86             unless ( ref( \$attribute -> [0] ) eq 'SCALAR' or
  87                      ( ref( $attribute -> [0] ) eq 'ARRAY' and scalar @{$attribute -> [0]} > 0 ) ) {
  88               debug -> die( message => "attribute $attribute is ",
  89                             "defined as a ",ref( $attribute -> [0] ),
  90                             "and is neither a scalar or a non-zero size array" );
  91             } elsif ( ref(\$attribute -> [0]) eq 'SCALAR' ) {
  92               my @mo_attribute = ();
  93               foreach my $model ( @{$self -> {'models'}} ) {
  94                 push( @mo_attribute, $attribute );
  95               }
  96               $self -> {$attribute_string} = \@mo_attribute;
  97             }
  98           }
  99         } else {
 100           # If stratify is not defined we copy an undefined value to
 101           # arrays corresponding to models and subproblem. I wonder if
 102           # this is really necessary.
 103           my @mo_attribute = ();
 104           foreach my $model ( @{$self -> {'models'}} ) {
 105             my @pr_attribute = ();
 106             foreach my $data ( @{$model -> datas}  ) {
 107               if( $attribute_string eq 'stratify_on' ){
 108                 push( @pr_attribute, undef );
 109               } elsif ( $attribute_string eq 'samples' ){
 110                 push( @pr_attribute, 200 );
 111               } elsif ( $attribute_string eq 'subjects' ){
 112                 push( @pr_attribute, {'default'=>($data -> count_ind)} );
 113               }
 114             }
 115             push( @mo_attribute, \@pr_attribute );
 116           }
 117           $self -> {$attribute_string} = \@mo_attribute;
 118         }
 119       }
 120     }
 121 end general_pre_fork_setup
 122
 123 # }}}
 124
 125 # {{{ modelfit_pre_fork_setup
 126
 127 start modelfit_pre_fork_setup
 128       {
 129         $self -> general_pre_fork_setup( model_number => $model_number );
 130       }
 131 end modelfit_pre_fork_setup
 132
 133 # }}}
 134
 135 # {{{ llp_pre_fork_setup
 136
 137 start llp_pre_fork_setup
 138       {
 139         $self -> general_pre_fork_setup( model_number => $model_number );
 140       }
 141 end llp_pre_fork_setup
 142
 143 # }}}
 144
 145 # {{{ modelfit_setup
 146
 147 start modelfit_setup
 148       {
 149
 150         my $subm_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
 151           $self -> {'threads'} -> [1]:$self -> {'threads'};
 152         $self -> general_setup( model_number => $model_number,
 153                                 class        => 'tool::modelfit',
 154                                 subm_threads => $subm_threads );
 155       }
 156 end modelfit_setup
 157
 158 # }}}
 159
 160 # {{{ general_setup
 161
 162 start general_setup
 163       {
 164
 165         # If the number of threads are given per tool, e.g. [2,5] meaning 2 threads for
 166         # scm and 5 for the modelfit.
 167         my $subm_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
 168           $self -> {'threads'} -> [1]:$self -> {'threads'};
 169         my $own_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
 170           $self -> {'threads'} -> [0]:$self -> {'threads'};
 171         # More threads than models?
 172         my $num = scalar @{$self -> {'models'}};
 173         $own_threads = $num if ( $own_threads > $num );
 174
 175         # Sub tool threads can be given as scalar or reference to an array?
 176         #my $subm_threads = $parm{'subm_threads'};
 177         #my $own_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
 178         #  $self -> {'threads'} -> [0]:$self -> {'threads'};
 179         # Group variable names are matched in the model, not the data!
 180         my $model       = $self   -> {'models'}      -> [$model_number-1];
 181         my @samples     = @{$self -> {'samples'}     -> [$model_number-1]};
 182         my @stratify_on = @{$self -> {'stratify_on'} -> [$model_number-1]};
 183         my @subjects    = @{$self -> {'subjects'}    -> [$model_number-1]};
 184
 185         # Check which models that hasn't been run and run them This
 186         # will be performed each step but will only result in running
 187         # models at the first step, if at all.
 188
 189         # If more than one process is used, there is a VERY high risk
 190         # of interaction between the processes when creating
 191         # directories for model fits. Therefore the {'directory'}
 192         # attribute is given explicitly below.
 193
 194         # ------------------------  Run original run  -------------------------------
 195
 196         # {{{ orig run
 197
 198         unless ( $model -> is_run ) {
 199           my %subargs = ();
 200           if ( defined $self -> {'subtool_arguments'} ) {
 201             %subargs = %{$self -> {'subtool_arguments'}};
 202           }
 203
 204           if( $self -> {'nonparametric_etas'} or
 205               $self -> {'nonparametric_marginals'} ) {
 206             $model -> add_nonparametric_code;
 207           }
 208
 209           my $orig_fit = tool::modelfit ->
 210               new( reference_object      => $self,
 211                    base_directory        => $self -> {'directory'},
 212                    directory             => $self -> {'directory'}.
 213                    '/orig_modelfit_dir'.$model_number,
 214                    models                => [$model],
 215                    threads               => $subm_threads,
 216                    parent_threads        => $own_threads,
 217                    parent_tool_id        => $self -> {'tool_id'},
 218                    logfile               => undef,
 219                    raw_results           => undef,
 220                    prepared_models       => undef,
 221                    top_tool              => 0,
 222                    %subargs );
 223
 224           ui -> print( category => 'bootstrap',
 225                        message => 'Executing base model.' );
 226
 227           $orig_fit -> run;
 228
 229         }
 230
 231         my $output = $model -> outputs -> [0];
 232
 233         # }}} orig run
 234
 235         # ------------------------  Print a log-header  -----------------------------
 236
 237         # {{{ log header
 238
 239         my @orig_datas = @{$model -> datas};
 240         my @problems   = @{$model -> problems};
 241         my @new_models;
 242
 243         # Print a log-header
 244         # Lasse 2005-04-21: The minimization_message print will probably not work anymore
 245         open( LOG, ">>".$self -> {'logfile'}[$model_number-1] );
 246         my $ui_text = sprintf("%-5s",'RUN').','.sprintf("%20s",'FILENAME  ').',';
 247         print LOG sprintf("%-5s",'RUN'),',',sprintf("%20s",'FILENAME  '),',';
 248         foreach my $param ( 'ofv', 'minimization_message', 'covariance_step_successful' ) {
 249           my $orig_ests   = $model -> outputs -> [0] -> $param;
 250           # Loop the problems
 251           for ( my $j = 0; $j < scalar @{$orig_ests}; $j++ ) {
 252             if ( ref( $orig_ests -> [$j][0] ) ne 'ARRAY' ) {
 253               my $label = uc($param)."_".($j+1);
 254               $ui_text = $ui_text.sprintf("%12s",$label).',';
 255               print LOG sprintf("%12s",$label),',';
 256             } else {
 257               # Loop the parameter numbers (skip sub problem level)
 258               for ( my $num = 1; $num <= scalar @{$orig_ests -> [$j][0]}; $num++ ) {
 259                 my $label = uc($param).$num."_".($j+1);
 260                 $ui_text = $ui_text.sprintf("%12s",$label).',';
 261                 print LOG sprintf("%12s",$label),',';
 262               }
 263             }
 264           }
 265         }
 266         print LOG "\n";
 267
 268         # }}} log header
 269
 270         # ------------------------  Log original run  -------------------------------
 271
 272         # {{{ Log original run
 273
 274         # Lasse 2005-04-21: The minimization_message print will probably not work anymore
 275         open( LOG, ">>".$self -> {'logfile'}[$model_number-1] );
 276         $ui_text = sprintf("%5s",'0').','.sprintf("%20s",$model -> filename).',';
 277         print LOG sprintf("%5s",'0'),',',sprintf("%20s",$model -> filename),',';
 278         foreach my $param ( 'ofv', 'minimization_message', 'covariance_step_successful' ) {
 279           my $orig_ests   = $model -> outputs -> [0] -> $param;
 280           # Loop the problems
 281           for ( my $j = 0; $j < scalar @{$orig_ests}; $j++ ) {
 282             if ( ref( $orig_ests -> [$j][0] ) ne 'ARRAY' ) {
 283               $orig_ests -> [$j][0] =~ s/\n//g;
 284               $ui_text = $ui_text.sprintf("%12s",$orig_ests -> [$j][0]).',';
 285               print LOG sprintf("%12s",$orig_ests -> [$j][0]),',';
 286             } else {
 287               # Loop the parameter numbers (skip sub problem level)
 288               for ( my $num = 0; $num < scalar @{$orig_ests -> [$j][0]}; $num++ ) {
 289                 $ui_text = $ui_text.sprintf("%12f",$orig_ests -> [$j][0][$num]).',';
 290                 print LOG sprintf("%12f",$orig_ests -> [$j][0][$num]),',';
 291               }
 292             }
 293           }
 294         }
 295         print LOG "\n";
 296
 297         # }}} Log original run
 298
 299         # TODO: In this loop we loose one dimension (problem) in that
 300         # all new models with new data sets are put in the same array,
 301         # regardless of which problem the initially belonged to. Fix
 302         # this.
 303
 304         if ( $#orig_datas < 0 ) {
 305           debug -> warn( level   => 1,
 306                          message => "No data files to resample from" );
 307         } else {
 308           debug -> warn( level   => 1,
 309                          message => "Starting bootstrap sampling" );
 310         }
 311         for ( my $i = 1; $i <= scalar @orig_datas; $i++ ) {
 312           my $orig_data = $orig_datas[$i-1];
 313
 314           if ( $self -> {'drop_dropped'} ) {
 315             my $model_copy = $model -> copy( copy_data => 1,
 316                                              filename => "drop_copy_$i.mod",
 317                                              directory => $self -> {'directory'} );
 318
 319             $model_copy -> drop_dropped;
 320             $model_copy -> _write( write_data => 1 );
 321             $model_copy -> datas -> [0] -> flush();
 322             $orig_data = $model -> datas -> [0];
 323             $model = $model_copy;
 324           }
 325
 326           my ( @seed, $new_datas, $incl_ids, $incl_keys, $new_mod );
 327
 328           my $done = ( -e $self -> {'directory'}."/m$model_number/done.$i" ) ? 1 : 0;
 329
 330           if ( not $done ) {
 331             ui -> print( category => 'bootstrap',
 332                          message  => "Resampling from ".$orig_data -> filename );
 333
 334             ( $new_datas, $incl_ids, $incl_keys )
 335                 = $orig_data -> bootstrap( directory   => $self -> {'directory'}.'/m'.$model_number,
 336                                            name_stub   => 'bs_pr'.$i,
 337                                            samples     => $samples[$i-1],
 338                                            subjects    => $subjects[$i-1],
 339                                            stratify_on => $stratify_on[$i-1],
 340                                            target            => 'disk',
 341                                            model_ids   => $self -> {'prepared_model_ids'} );
 342
 343
 344             for ( my $j = 0; $j < $samples[$i-1]; $j++ ) {
 345               my ($model_dir, $filename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.$model_number,
 346                                                                       'bs_pr'.$i.'_'.($j+1).'.mod' );
 347 #             my ($out_dir, $outfilename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.$model_number ,
 348 #                                                                      '/bs_pr'.$i.'_'.$j.'.lst' );
 349               my $prob_copy = Storable::dclone($problems[$i-1]);
 350 #             $Data::Dumper::Maxdepth = 2;
 351 #             print Dumper $model; die;
 352
 353               $new_mod = model ->
 354                   new(reference_object     => $model,
 355                       sde                  => 0,
 356                       outputs              => undef,
 357                       datas                => undef,
 358                       synced               => undef,
 359                       problems             => undef,
 360                       active_problems      => undef,
 361                       directory            => $model_dir,
 362                       filename             => $filename,
 363                       outputfile           => undef,
 364                       problems             => [$prob_copy],
 365                       extra_files          => $model -> extra_files,
 366                       target               => 'disk',
 367                       ignore_missing_files => 1 );
 368
 369               if( $self -> {'shrinkage'} ) {
 370                 $new_mod -> shrinkage_stats( enabled => 1 );
 371                 my @problems = @{$new_mod -> problems};
 372                 for( my $i = 1; $i <= scalar @problems; $i++ ) {
 373                   $problems[ $i-1 ] -> shrinkage_module -> model( $new_mod );
 374                 }
 375               }
 376
 377               my $model_id  = $new_mod -> register_in_database;
 378               push( @{$self -> {'prepared_model_ids'}}, $model_id );
 379
 380               $self -> register_tm_relation( model_ids       => [$model_id],
 381                                              prepared_models => 1 );
 382
 383               $new_mod -> datas ( [$new_datas -> [$j]] );
 384
 385               if( $self -> {'nonparametric_etas'} or
 386                   $self -> {'nonparametric_marginals'} ) {
 387                 $new_mod -> add_nonparametric_code;
 388               }
 389
 390               $new_mod -> update_inits( from_output => $output );
 391               $new_mod -> _write;
 392
 393               push( @new_models, $new_mod );
 394             }
 395             # Create a checkpoint. Log the samples and individuals.
 396             open( DONE, ">".$self -> {'directory'}."/m$model_number/done.$i" ) ;
 397             print DONE "Resampling from ",$orig_data -> filename, " performed\n";
 398             print DONE "$samples[$i-1] samples\n";
 399             while( my ( $strata, $samples ) = each %{$subjects[$i-1]} ) {
 400               print DONE "Strata $strata: $samples sample_size\n";
 401             }
 402             print DONE "Included individuals:\n";
 403             @seed = random_get_seed;
 404             print DONE "seed: @seed\n";
 405             for( my $k = 0; $k < scalar @{$incl_ids}; $k++ ) {
 406               print DONE join(',',@{$incl_ids -> [$k]}),"\n";
 407             }
 408             print DONE "Included keys:\n";
 409             for( my $k = 0; $k < scalar @{$incl_keys}; $k++ ) {
 410               print DONE join(',',@{$incl_keys -> [$k]}),"\n";
 411             }
 412             close( DONE );
 413             open( INCL, ">".$self -> {'directory'}."included_individuals".$model_number.".csv" ) ;
 414             for( my $k = 0; $k < scalar @{$incl_ids}; $k++ ) {
 415               print INCL join(',',@{$incl_ids -> [$k]}),"\n";
 416             }
 417             close( INCL );
 418             open( KEYS, ">".$self -> {'directory'}."included_keys".$model_number.".csv" ) ;
 419             open( SAMPLEKEYS, ">".$self -> {'directory'}."sample_keys".$model_number.".csv" ) ;
 420             my $ninds= ($orig_data -> count_ind());
 421             for( my $k = 0; $k < scalar @{$incl_keys}; $k++ ) {
 422               my %sample_keys;
 423               my $sample_size = scalar @{$incl_keys -> [$k]};
 424               for ( my $l = 0; $l < $ninds; $l++ ) {
 425                 $sample_keys{$incl_keys -> [$k][$l]}++;
 426               }
 427               for ( my $l = 0; $l < $ninds; $l++ ) {
 428                 my $val   = defined $sample_keys{$l} ? $sample_keys{$l} : 0;
 429                 my $extra = ($l == ($ninds-1)) ? "\n" : ',';
 430                 print SAMPLEKEYS $val,$extra;
 431               }
 432               print KEYS join(',',@{$incl_keys -> [$k]}),"\n";
 433             }
 434             close( KEYS );
 435             close( SAMPLEKEYS );
 436           } else {
 437             ui -> print( category => 'bootstrap',
 438                          message  => "Recreating bootstrap from previous run." );
 439
 440             # Recreate the datasets and models from a checkpoint
 441             my ($stored_filename, $stored_samples, %stored_subjects);
 442             my @seed;
 443             my ($stored_filename_found, $stored_samples_found, $stored_subjects_found, $stored_seed_found);
 444             open( DONE, $self -> {'directory'}."/m$model_number/done.$i" );
 445             while( <DONE> ){
 446               if( /^Resampling from (.+) performed$/ ){
 447                 $stored_filename = $1;
 448                 $stored_filename_found = 1;
 449                 next;
 450               }
 451               if( /^(\d+) samples$/ ){
 452                 ui -> print( category => 'bootstrap',
 453                              message  => "Samples: $1" );
 454                 $stored_samples = $1;
 455                 $stored_samples_found = 1;
 456                 next;
 457               }
 458               if( /^(\d+) subjects$/ ){
 459                   # Old format (pre 2.2.2)
 460                 $stored_subjects{'default'} = $1;
 461                 $stored_subjects_found = 1;
 462                 next;
 463               }
 464               if( /^Strata (\w+): (\d+) sample_size$/ ){
 465                 ui -> print( category => 'bootstrap',
 466                              message  => "Strata $1, samples size: $2" );
 467                 $stored_subjects{$1} = $2;
 468                 $stored_subjects_found = 1;
 469                 next;
 470               }
 471               if( /^seed: (\d+) (\d+)$/ ){
 472                 @seed = ($1, $2);
 473                 $stored_seed_found = 1;
 474                 next;
 475               }
 476
 477               if( $stored_filename_found and $stored_samples_found
 478                   and $stored_subjects_found and $stored_seed_found ){
 479                 last;
 480               }
 481             }
 482             close( DONE );
 483             unless( $stored_filename_found and $stored_samples_found
 484                     and $stored_samples_found and $stored_seed_found ){
 485               debug -> die( level => 1,
 486                              message => "The bootstrap/m1/done file could not be parsed." );
 487             }
 488
 489             if ( $stored_samples < $samples[$i-1] ) {
 490               debug -> die( message => "The number of samples saved in previous run ($stored_samples) ".
 491                             "are bigger than the number of samples specified for this run (".
 492                             $samples[$i-1].")" );
 493             }
 494             while( my ( $strata, $samples ) = each %{$subjects[$i-1]} ) {
 495               if ( $stored_subjects{$strata} != $samples ) {
 496                 debug -> die( message => "The number of individuals sampled i strata $strata ".
 497                               "in previous run (".
 498                               $stored_subjects{$strata}.
 499                               ") does not match the number of individuals specified ".
 500                               "for this run (".$samples.")" );
 501               }
 502             }
 503             while( my ( $strata, $samples ) = each %stored_subjects ) {
 504               if ( $subjects[$i-1]->{$strata} != $samples ) {
 505                 debug -> die( message => "The number of individuals sampled i strata $strata ".
 506                               "in previous run (". $samples .
 507                               ") does not match the number of individuals specified ".
 508                               "for this run (".$subjects[$i-1]->{$strata}.")" );
 509               }
 510             }
 511 #           my ( @stored_ids, @stored_keys );
 512 #           for ( my $k = 4; $k < 4+$stored_samples; $k++ ) {
 513 #             chomp($rows[$k]);
 514 #             my @sample_ids = split(',', $rows[$k] );
 515 #             push( @stored_ids, \@sample_ids );
 516 #           }
 517 #           for ( my $k = 5+$stored_samples; $k < 5+2*$stored_samples; $k++ ) {
 518 #             chomp($rows[$k]);
 519 #             my @sample_keys = split(',', $rows[$k] );
 520 #             push( @stored_keys, \@sample_keys );
 521 #           }
 522 #           @seed = split(' ',$rows[5+2*$stored_samples]);
 523 #           $incl_ids = \@stored_ids;
 524 #           $incl_keys = \@stored_keys;
 525 #           shift( @seed ); # get rid of 'seed'-word
 526             # Reinitiate the model objects
 527             for ( my $j = 1; $j <= $samples[$i-1]; $j++ ) {
 528               my ($model_dir, $filename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.
 529                                                                       $model_number,
 530                                                                       'bs_pr'.$i.'_'.$j.'.mod' );
 531 #             my ($out_dir, $outfilename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.
 532 #                                                                      $model_number,
 533 #                                                                      '/bs_pr'.$i.'_'.$j.'.lst' );
 534               $new_mod = model ->
 535                 new( directory   => $model_dir,
 536                      filename    => $filename,
 537 #                    outputfile  => $outfilename,
 538                      extra_files => $model -> extra_files,
 539                      target      => 'disk',
 540                      ignore_missing_files => 1 );
 541 #             $new_mod -> target( 'disk' );
 542               push( @new_models, $new_mod );
 543 #             print "$j\n";
 544 #             print Dumper $new_mod;
 545 #             sleep(10);
 546             }
 547             random_set_seed( @seed );
 548             ui -> print( category => 'bootstrap',
 549                          message  => "Using $stored_samples previously resampled ".
 550                          "bootstrap sets from $stored_filename" )
 551               unless $self -> {'parent_threads'} > 1;
 552           }
 553 #         push( @{$self -> {'included_inds'} -> [$model_number-1]}, $incl_ids );
 554 #         push( @{$self -> {'included_keys'} -> [$model_number-1]}, $incl_keys );
 555         }
 556         $self -> {'prepared_models'}[$model_number-1]{'own'} = \@new_models;
 557
 558         # ---------------------  Create the sub tools  ------------------------------
 559
 560         # {{{ sub tools
 561
 562         my $subdir = $class;
 563         $subdir =~ s/tool:://;
 564         my @subtools = @{$self -> {'subtools'}};
 565         shift( @subtools );
 566         my %subargs = ();
 567         if ( defined $self -> {'subtool_arguments'} ) {
 568           %subargs = %{$self -> {'subtool_arguments'}};
 569         }
 570         push( @{$self -> {'tools'}},
 571               $class ->
 572               new( reference_object      => $self,
 573                    models                => \@new_models,
 574                    threads               => $subm_threads,
 575                    directory             => $self -> {'directory'}.'/'.$subdir.'_dir'.$model_number,
 576                    _raw_results_callback => $self ->
 577                           _modelfit_raw_results_callback( model_number => $model_number ),
 578                    subtools              => \@subtools,
 579                    parent_threads        => $own_threads,
 580                    parent_tool_id        => $self -> {'tool_id'},
 581                    logfile               => $self -> {'logfile'}[$model_number-1],
 582                    raw_results           => undef,
 583                    prepared_models       => undef,
 584                    top_tool              => 0,
 585                    %subargs ) );
 586
 587 #             ( clean            => $self -> {'clean'},
 588 #               base_directory   => $self -> {'directory'},
 589 #               compress         => $self -> {'compress'},
 590 #               directory        => $self -> {'directory'}.'/'.$subdir.'_dir'.$model_number,
 591 #               drop_dropped     => $self -> {'drop_dropped'},
 592 #               wrap_data        => $self -> {'wrap_data'},
 593 #               run_on_nordugrid => $self -> {'run_on_nordugrid'},
 594 #               cpu_time         => $self -> {'cpu_time'},
 595 #               run_on_lsf       => $self -> {'run_on_lsf'},
 596 #               lsf_queue        => $self -> {'lsf_queue'},
 597 #               lsf_options      => $self -> {'lsf_options'},
 598 #               lsf_job_name     => $self -> {'lsf_job_name'},
 599 #               lsf_project_name => $self -> {'lsf_project_name'},
 600
 601 #               parent_tool_id   => $self -> {'tool_id'},
 602
 603 #               models           => \@new_models,
 604 #               grid_batch_size  => $self -> {'grid_batch_size'},
 605 #               nm_version       => $self -> {'nm_version'},
 606 #               picky            => $self -> {'picky'},
 607 #               retries          => $self -> {'retries'},
 608 #               tweak_inits      => $self -> {'tweak_inits'},
 609 #               handle_maxevals  => $self -> {'handle_maxevals'},
 610 #               _raw_results_callback => $self ->
 611 #               _modelfit_raw_results_callback( model_number => $model_number ),
 612 #               threads               => $subm_threads,
 613 #               subtools              => \@subtools,
 614 #               logfile               => $self -> {'logfile'}[$model_number-1],
 615 #               parent_threads        => $own_threads,
 616 #               %subargs ) );
 617
 618         # }}} sub tools
 619
 620       }
 621 end general_setup
 622
 623 # }}}
 624
 625 # {{{ llp_setup
 626 start llp_setup
 627       {
 628         my @subm_threads;
 629         if (ref( $self -> {'threads'} ) eq 'ARRAY') {
 630           @subm_threads = @{$self -> {'threads'}};
 631           unshift(@subm_threads);
 632         } else {
 633           @subm_threads = ($self -> {'threads'});
 634         }
 635         $self -> general_setup( model_number => $model_number,
 636                                 class        => 'tool::llp',
 637                                 subm_threads => \@subm_threads );
 638       }
 639 end llp_setup
 640 # }}} llp_setup
 641
 642 # {{{ _jackknife_raw_results_callback
 643
 644 start _jackknife_raw_results_callback
 645       {
 646         # Use the bootstrap's raw_results file.
 647         my ($dir,$file) =
 648           OSspecific::absolute_path( $self -> {'directory'},
 649                                      $self -> {'raw_results_file'}[$model_number-1] );
 650         my ($dir,$nonp_file) =
 651           OSspecific::absolute_path( $self -> {'directory'},
 652                                      $self -> {'raw_nonp_file'}[$model_number-1] );
 653         $subroutine = sub {
 654           my $jackknife = shift;
 655           my $modelfit  = shift;
 656           $modelfit -> raw_results_file( $dir.$file );
 657           $modelfit -> raw_nonp_file( $dir.$nonp_file );
 658           $modelfit -> raw_results_append( 1 );
 659           my ( @new_header, %param_names );
 660           foreach my $row ( @{$modelfit -> {'raw_results'}} ) {
 661             unshift( @{$row}, 'jackknife' );
 662           }
 663           $modelfit -> {'raw_results_header'} = [];
 664         };
 665         return $subroutine;
 666       }
 667 end _jackknife_raw_results_callback
 668
 669 # }}} _jackknife_raw_results_callback
 670
 671 # {{{ _modelfit_raw_results_callback
 672
 673 start _modelfit_raw_results_callback
 674       {
 675         # Use the bootstrap's raw_results file.
 676         my ($dir,$file) =
 677           OSspecific::absolute_path( $self -> {'directory'},
 678                                      $self -> {'raw_results_file'}[$model_number-1] );
 679         my ($dir,$nonp_file) =
 680           OSspecific::absolute_path( $self -> {'directory'},
 681                                      $self -> {'raw_nonp_file'}[$model_number-1] );
 682         my $orig_mod = $self -> {'models'}[$model_number-1];
 683         my $type = $self -> {'type'};
 684         $subroutine = sub {
 685           my $modelfit = shift;
 686           my $mh_ref   = shift;
 687           my %max_hash = %{$mh_ref};
 688           $modelfit -> raw_results_file( $dir.$file );
 689           $modelfit -> raw_nonp_file( $dir.$nonp_file );
 690
 691           # The prepare_raw_results in the modelfit will fix the
 692           # raw_results for each bootstrap sample model, we must add
 693           # the result for the original model.
 694
 695           my %dummy;
 696
 697           my ($raw_results_row, $nonp_rows) = $self -> create_raw_results_rows( max_hash => $mh_ref,
 698                                                                                 model => $orig_mod,
 699                                                                                 raw_line_structure => \%dummy );
 700
 701           $orig_mod -> outputs -> [0] -> flush;
 702
 703           unshift( @{$modelfit -> {'raw_results'}}, @{$raw_results_row} );
 704
 705           # {{{ New header
 706
 707           if ( $type eq 'bca' ) {
 708             foreach my $row ( @{$modelfit -> {'raw_results'}} ) {
 709               unshift( @{$row}, 'bootstrap' );
 710             }
 711             unshift( @{$modelfit -> {'raw_results_header'}}, 'method' );
 712           }
 713
 714           @{$self -> {'raw_results_header'}} = @{$modelfit -> {'raw_results_header'}};
 715
 716           # }}} New header
 717
 718         };
 719         return $subroutine;
 720       }
 721 end _modelfit_raw_results_callback
 722
 723 # }}} _modelfit_raw_results_callback
 724
 725 # {{{ modelfit_analyze
 726
 727 start modelfit_analyze
 728       {
 729         my @params = @{$self -> {'parameters'}};
 730         my @diagnostic_params = @{$self -> {'diagnostic_parameters'}};
 731         my ( @print_order, @calculation_order );
 732
 733         if ( $self -> {'type'} eq 'bca' ) {
 734
 735           # --------------------------  BCa method  ---------------------------------
 736
 737           # {{{ BCa
 738
 739           @calculation_order = @{$self -> {'bca_calculation_order'}};
 740           @print_order = @{$self -> {'bca_print_order'}};
 741           my $jk_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
 742             $self -> {'threads'} -> [1]:$self -> {'threads'};
 743           my $done = ( -e $self -> {'directory'}."/jackknife_done.$model_number" ) ? 1 : 0;
 744           if ( not $done ) {
 745
 746             # {{{ Create Jackknife
 747
 748             ui -> print( category => 'bootstrap',
 749                          message  => "Running a Jackknife for the BCa estimates" );
 750             $self -> {'jackknife'} = tool::cdd::jackknife ->
 751               new( models           => [$self -> models -> [$model_number -1]],
 752                    case_columns     => $self -> models -> [$model_number -1]
 753                    -> datas -> [0] -> idcolumn,
 754                    _raw_results_callback => $self ->
 755                    _jackknife_raw_results_callback( model_number => $model_number ),
 756                    nm_version       => $self -> {'nm_version'},
 757                    parent_tool_id   => $self -> {'tool_id'},
 758                    threads          => $jk_threads,
 759                    bca_mode         => 1,
 760                    shrinkage        => $self -> {'shrinkage'},
 761                    nonparametric_marginals => $self -> {'nonparametric_marginals'},
 762                    nonparametric_etas => $self -> {'nonparametric_etas'},
 763                    adaptive         => $self -> {'adaptive'},
 764                    rerun            => $self -> {'rerun'},
 765                    verbose          => $self -> {'verbose'},
 766                    cross_validate   => 0 );
 767             # Create a checkpoint. Log the samples and individuals.
 768             open( DONE, ">".$self -> {'directory'}."/jackknife_done.$model_number" ) ;
 769             print DONE "Jackknife directory:\n";
 770             print DONE $self -> {'jackknife'} -> directory,"\n";
 771             my @seed = random_get_seed;
 772             print DONE "seed: @seed\n";
 773             close( DONE );
 774
 775             # }}} Create Jackknife
 776
 777           } else {
 778
 779             # {{{ Recreate Jackknife
 780
 781             open( DONE, $self -> {'directory'}."/jackknife_done.$model_number" );
 782             my @rows = <DONE>;
 783             close( DONE );
 784             my ( $stored_directory ) = $rows[1];
 785             chomp( $stored_directory );
 786             if ( not -e $stored_directory ) {
 787               debug -> die( message => "The Jackknife directory ".$stored_directory.
 788                             "indicated by ".$self -> {'directory'}.
 789                             "/jackknife_done.$model_number".
 790                             " from the old bootstrap run in ".
 791                             $self -> {'directory'}." does not exist" );
 792             }
 793             my @seed = split(' ',$rows[2]);
 794             shift( @seed ); # get rid of 'seed'-word
 795             $self -> {'jackknife'} = tool::cdd::jackknife ->
 796               new( models           => [$self -> models -> [$model_number -1]],
 797                    case_columns     => $self -> models -> [$model_number -1]
 798                    -> datas -> [0] -> idcolumn,
 799                    _raw_results_callback => $self ->
 800                    _jackknife_raw_results_callback( model_number => $model_number ),
 801                    threads          => $jk_threads,
 802                    parent_tool_id   => $self -> {'tool_id'},
 803                    directory        => $stored_directory,
 804                    bca_mode         => 1,
 805                    shrinkage        => $self -> {'shrinkage'},
 806                    nm_version       => $self -> {'nm_version'},
 807                    nonparametric_marginals => $self -> {'nonparametric_marginals'},
 808                    nonparametric_etas => $self -> {'nonparametric_etas'},
 809                    adaptive         => $self -> {'adaptive'},
 810                    rerun            => $self -> {'rerun'},
 811                    verbose          => $self -> {'verbose'},
 812                    cross_validate   => 0 );
 813             random_set_seed( @seed );
 814             ui -> print( category => 'bootstrap',
 815                          message  => "Restarting BCa Jackknife from ".
 816                          $stored_directory )
 817               unless $self -> {'parent_threads'} > 1;
 818
 819             # }}} Recreate Jackknife
 820
 821           }
 822
 823           $self -> {'jackknife'} -> run;
 824           $self -> {'jackknife_results'} = $self -> {'jackknife'} -> {'results'};
 825           $self -> {'jackknife_prepared_models'} = $self -> {'jackknife'} -> {'prepared_models'};
 826
 827           $self -> {'jackknife_raw_results'}[$model_number-1] =
 828             $self -> {'jackknife'} -> raw_results;
 829 #           $self -> {'jackknife'} -> raw_results -> [$model_number-1];
 830 #         $Data::Dumper::Maxdepth = 0;
 831 #         print Dumper $self -> {'jackknife_raw_results'};
 832
 833           # }}} BCa
 834
 835         } else {
 836           @calculation_order = @{$self -> {'calculation_order'}};
 837           @print_order = @{$self -> {'print_order'}};
 838           $self -> {'bootstrap_raw_results'}[$model_number-1] =
 839             $self -> {'tools'} -> [0] -> raw_results;
 840 #           $self -> {'tools'} -> [0] -> raw_results -> [$model_number-1];
 841         }
 842         unless( ref($self -> {'raw_results_header'}[0]) eq 'ARRAY' ) {
 843           my $tmp = $self -> {'raw_results_header'};
 844           $self -> {'raw_results_header'} = [];
 845           $self -> {'raw_results_header'}[$model_number-1] = $tmp;
 846         }
 847
 848         my @param_names = @{$self -> models -> [$model_number -1] -> outputs -> [0] -> labels};
 849         my ( @diagnostic_names, @tmp_names );
 850         foreach my $param ( @diagnostic_params ) {
 851           push( @tmp_names, $param );
 852           $tmp_names[$#tmp_names] =~ s/_/\./g;
 853         }
 854         for ( my $i = 0; $i <= $#param_names; $i++ ) {
 855           unshift( @{$param_names[$i]}, 'OFV' );
 856           push( @{$diagnostic_names[$i]}, @tmp_names );
 857         }
 858         if( defined $PsN::config -> {'_'} -> {'R'} and
 859             -e $PsN::lib_dir . '/R-scripts/bootstrap.R' ) {
 860           # copy the bootstrap R-script
 861           cp ( $PsN::lib_dir . '/R-scripts/bootstrap.R', $self -> {'directory'} );
 862           # Execute the script
 863           system( $PsN::config -> {'_'} -> {'R'}." CMD BATCH bootstrap.R" );
 864         }
 865       }
 866 end modelfit_analyze
 867
 868 # }}}
 869
 870 # {{{ prepare_results
 871
 872 start prepare_results
 873       {
 874         # {{{ definitions
 875
 876         # The '3' is there to skip model, problem and subproblem numbers
 877         my $skip_mps = 3;
 878         my ( @calculation_order, @print_order, %diag_idx );
 879         if ( $self -> {'type'} eq 'bca' ) {
 880           @calculation_order = @{$self -> {'bca_calculation_order'}};
 881           @print_order = @{$self -> {'bca_print_order'}};
 882         } else {
 883           @calculation_order = @{$self -> {'calculation_order'}};
 884           @print_order = @{$self -> {'print_order'}};
 885         }
 886         if ( $self -> {'type'} eq 'bca' ) {
 887 #         $self -> read_raw_results();
 888           $self -> bca_read_raw_results();
 889 #           if ( not defined $self -> {'bootstrap_raw_results'} );
 890         } else {
 891 #         if ( not defined $self -> {'raw_results'} ) {
 892             $self -> read_raw_results();
 893             $self -> {'bootstrap_raw_results'} = $self -> {'raw_results'};
 894 #         }
 895         }
 896
 897         for ( my $i = 0; $i < scalar @{$self -> {'diagnostic_parameters'}}; $i++ ) {
 898           $diag_idx{$self -> {'diagnostic_parameters'} -> [$i]} = $i + $skip_mps;
 899         }
 900
 901         # }}} definitions
 902
 903         # ---------------------  Get data from raw_results  -------------------------
 904
 905         # Divide the data into diagnostics and estimates. Included in estimates are
 906         # the parametric estimates, the standard errors of these, the nonparametric
 907         # estimates, the shrinkage in eta and the shrinkage in wres
 908         # The diagnostics end up in {'bootstrap_diagnostics'} and
 909         # {'jackknife_diagnostics'}. The estimates in {'bootstrap_estimates'} and
 910         # {'jackknife_estimates'}.
 911         # The number of runs that are selected for calculation of the results is
 912         # saved.
 913
 914         # {{{ Get the data from the runs
 915
 916         foreach my $tool ( 'bootstrap', 'jackknife' ) {
 917           if ( defined $self -> {$tool.'_raw_results'} ) {
 918             for ( my $i = 0; $i < scalar @{$self->{$tool.'_raw_results'}}; $i++ ) { # All models
 919
 920               # {{{ Get the number of columns with estimates
 921
 922               my $cols_orig = 0;
 923               foreach my $param ( 'theta', 'omega', 'sigma' ) {
 924                 my $labels =
 925                   $self -> {'models'} -> [$i] -> labels( parameter_type => $param );
 926                 # we can't use labels directly since different models may have different
 927                 # labels (still within the same modelfit)
 928                 my $numpar = scalar @{$labels -> [0]} if ( defined $labels and
 929                                                            defined $labels -> [0] );
 930                 $cols_orig += ( $numpar*3 ); # est + SE + eigen values
 931               }
 932               # nonparametric omegas and shrinkage
 933               my $nomegas = $self -> {'models'} -> [$i] -> nomegas;
 934               my $numpar = $nomegas -> [0];
 935
 936               # shrinkage omega + wres shrinkage
 937               $cols_orig += $numpar + 1;
 938 #             $cols_orig += ($numpar*($numpar+1)/2 + $numpar + 1);
 939
 940               $cols_orig++; # OFV
 941
 942               # }}} Get the number of columns with estimates
 943
 944               # {{{ Loop, choose and set diagnostics and estimates
 945
 946               my %return_section;
 947               $return_section{'name'} = 'Warnings';
 948               my ( $skip_term, $skip_cov, $skip_warn, $skip_bound );
 949               my $included = 0;
 950 #             print Dumper $self->{$tool.'_raw_results'};
 951               for ( my $j = 0; $j < scalar @{$self->{$tool.'_raw_results'}->[$i]}; $j++ ) { # orig model + prepared_models
 952                 my $columns = scalar @{$self->{$tool.'_raw_results'}->[$i][$j]};
 953
 954                 # -----------------------  Diagnostics  -----------------------------
 955
 956                 for ( my $m = $skip_mps; $m < scalar @{$self -> {'diagnostic_parameters'}} + $skip_mps; $m++ ) { # value
 957                   $self->{$tool.'_diagnostics'}->[$i][$j][$m-$skip_mps] =
 958                       $self->{$tool.'_raw_results'}->[$i][$j][$m];
 959                 }
 960                 my $use_run = 1;
 961                 if ( $self -> {'skip_minimization_terminated'} and
 962                      ( not defined $self->{$tool.'_raw_results'}->
 963                        [$i][$j][$diag_idx{'minimization_successful'}]
 964                        or not $self->{$tool.'_raw_results'}->
 965                        [$i][$j][$diag_idx{'minimization_successful'}] ) ) {
 966                   $skip_term++;
 967                   $use_run = 0;
 968                 } elsif ( $self -> {'skip_covariance_step_terminated'} and not
 969                           $self->{$tool.'_raw_results'}->
 970                           [$i][$j][$diag_idx{'covariance_step_successful'}] ) {
 971                   $skip_cov++;
 972                   $use_run = 0;
 973                 } elsif ( $self -> {'skip_with_covstep_warnings'} and
 974                           $self->{$tool.'_raw_results'}->
 975                           [$i][$j][$diag_idx{'covariance_step_warnings'}] ) {
 976                   $skip_warn++;
 977                   $use_run = 0;
 978                 } elsif ( $self -> {'skip_estimate_near_boundary'} and
 979                           $self->{$tool.'_raw_results'}->
 980                           [$i][$j][$diag_idx{'estimate_near_boundary'}] ) {
 981                   $skip_bound++;
 982                   $use_run = 0;
 983                 }
 984
 985                 # ------------------------  Estimates  ------------------------------
 986
 987                 if( $use_run ) {
 988                   for ( my $m = scalar @{$self -> {'diagnostic_parameters'}} + $skip_mps; $m < $columns; $m++ ) { # value
 989                     my $val = $self->{$tool.'_raw_results'}->[$i][$j][$m];
 990                     $self->{$tool.'_estimates'}->
 991                         [$i][$included][$m-(scalar @{$self -> {'diagnostic_parameters'}} + $skip_mps)] = $val;
 992                   }
 993                   $included++;
 994                 }
 995               }
 996
 997               # }}} Loop, choose and set diagnostics and estimates
 998
 999               # {{{ push #runs to results
1000
1001               if ( defined $skip_term ) {
1002                 push( @{$return_section{'values'}}, "$skip_term runs with miminization ".
1003                       "terminated were skipped when calculating the $tool results" );
1004               }
1005               if ( defined $skip_cov ) {
1006                 push( @{$return_section{'values'}}, "$skip_cov runs with aborted ".
1007                       "covariance steps were skipped when calculating the $tool results" );
1008               }
1009               if ( defined $skip_warn ) {
1010                 push( @{$return_section{'values'}}, "$skip_warn runs with errors from ".
1011                       "the covariance step were skipped when calculating the $tool results" );
1012               }
1013               if ( defined $skip_bound ) {
1014                 push( @{$return_section{'values'}}, "$skip_bound runs with estimates ".
1015                       "near a boundary were skipped when calculating the $tool results" );
1016               }
1017               $return_section{'labels'} = [];
1018               push( @{$self -> {'results'}[$i]{'own'}},\%return_section );
1019
1020               # }}} push #runs to results
1021
1022             }
1023           }
1024         }
1025
1026 #         $Data::Dumper::Maxdepth = 5;
1027 #         die Dumper $self -> {'bootstrap_diagnostics'};
1028
1029         # }}} Get the data from the runs
1030
1031         # ----------------------  Calculate the results  ----------------------------
1032
1033         # {{{ Result calculations
1034
1035         for ( my $i = 0; $i < scalar @{$self -> {'bootstrap_raw_results'}} ; $i++ ) { # All models
1036
1037           my $mps_offset = $self -> {'bca'} ? 4 : 3; # <- this is the offset to
1038                                                      # diagonstic_parameters,
1039                                                      # which is one more for
1040                                                      # the method column added
1041                                                      # with a bca run.
1042
1043           my @param_names = @{$self -> {'raw_results_header'}[$i]}[($mps_offset + scalar @{$self -> {'diagnostic_parameters'}}) .. (scalar @{$self -> {'raw_results_header'}[$i]} - 1)];
1044           my ( @diagnostic_names, @tmp_names );
1045           foreach my $param ( @{$self -> {'diagnostic_parameters'}} ) {
1046             push( @tmp_names, $param );
1047             $tmp_names[$#tmp_names] =~ s/_/\./g;
1048           }
1049
1050           @diagnostic_names = @tmp_names;
1051           foreach my $result_type ( @calculation_order ) {
1052             my @names = $result_type eq 'diagnostic_means' ?
1053                 @diagnostic_names : @param_names;
1054             my $calc = 'calculate_'.$result_type;
1055             $self -> $calc( model_number    => ($i+1),
1056                             parameter_names => \@names );
1057           }
1058           foreach my $result_type ( @print_order ) {
1059             my $name = $result_type;
1060             $name =~ s/_/\./g;
1061             my %return_section;
1062             $return_section{'name'} = $name;
1063             $return_section{'values'} = $self -> {$result_type} -> [$i];
1064             $return_section{'labels'} = $self -> {$result_type.'_labels'} -> [$i];
1065             push( @{$self -> {'results'}[$i]{'own'}},\%return_section );
1066           }
1067         }
1068
1069         # }}} Result calculations
1070
1071       }
1072 end prepare_results
1073
1074 # }}} prepare_results
1075
1076 # {{{ print_summary
1077
1078 start print_summary
1079       {
1080         sub acknowledge {
1081           my $name    = shift;
1082           my $outcome = shift;
1083           my $file    = shift;
1084           my $l = (7 - length( $outcome ))/2;
1085           my $c_num = '00';
1086           $c_num    = '07' if ( $outcome eq 'OK' );
1087           $c_num    = '13' if ( $outcome eq 'WARNING' );
1088           $c_num    = '05' if ( $outcome eq 'ERROR' );
1089           # my $text = sprintf( "%-66s%2s%7s%-5s\n\n", $name, '[ ', $outcome. ' ' x $l, ' ]' );
1090           my $text = sprintf( "%-66s%2s%7s%-5s", $name, '[ ', $outcome. ' ' x $l, ' ]' );
1091           #  cprintf( "%-66s%2s\x03$c_num%7s\x030%-5s", $name, '[ ', $outcome. ' ' x $l, ' ]' );
1092           #  my $text = cprintf( "%-66s%2s\x03$c_num%7s\x030%-5s", $name, '[ ', $outcome. ' ' x $l, ' ]' );
1093           print $text, "\n\n";
1094           print $file $text if defined $file;
1095         }
1096
1097         sub sum {
1098           my $arr = shift;
1099           my $sum = 0;
1100           for ( @{$arr} ) {
1101             $sum += $_;
1102           }
1103           return $sum;
1104         }
1105
1106         my $diag_number = scalar @{$self -> {'diagnostic_parameters'}} - 1;
1107         my %diag_idxs;
1108         for ( my $i = 0; $i <= $diag_number; $i++ ) {
1109           $diag_idxs{$self -> {'diagnostic_parameters'} -> [$i]} = $i;
1110         }
1111
1112         open( my $log, ">test.log" );
1113         for ( my $i = 0; $i < scalar @{$self -> {'bootstrap_raw_results'}} ; $i++ ) { # All models
1114           print "MODEL ",$i+1,"\n" if ( scalar @{$self -> {'bootstrap_raw_results'}} > 1 );
1115           if ( $self -> {'diagnostic_means'} ->
1116                [$i][0][$diag_idxs{'minimization_successful'}] >
1117                $self -> {'minimization_successful_limit'} ) {
1118             acknowledge( 'Successful minimization ratio = '.
1119                          $self -> {'diagnostic_means'} ->
1120                          [$i][0][$diag_idxs{'minimization_successful'}], 'OK', $log );
1121           } else {
1122             acknowledge( 'Termination problems in '.
1123                          sprintf("%4.2f", (100-($self -> {'diagnostic_means'} ->
1124                                                 [$i][0][$diag_idxs{'minimization_successful'}]*100)))
1125                          .'% of the bootstrap runs', 'WARNING', $log );
1126           }
1127
1128           if ( $self -> {'diagnostic_means'} ->
1129                [$i][0][$diag_idxs{'covariance_step_successful'}] >
1130                $self -> {'covariance_step_successful_limit'} ) {
1131             acknowledge( 'Successful covariance step ratio = '.$self -> {'diagnostic_means'} ->
1132                          [$i][0][$diag_idxs{'covariance_step_successful'}], 'OK', $log );
1133           } else {
1134             acknowledge( 'Covariance step terminated in '.
1135                          sprintf("%4.2f", (100-($self -> {'diagnostic_means'} ->
1136                                                 [$i][0][$diag_idxs{'covariance_step_successful'}]*100)))
1137                          .'% of the bootstrap runs', 'WARNING', $log );
1138           }
1139
1140           if ( $self -> {'diagnostic_means'} ->
1141                [$i][0][$diag_idxs{'covariance_step_warnings'}] <
1142                $self -> {'covariance_step_warnings_limit'} ) {
1143             acknowledge( 'Covariance step warnings ratio = '.$self -> {'diagnostic_means'} ->
1144                          [$i][0][$diag_idxs{'covariance_step_warnings'}], 'OK', $log );
1145           } else {
1146             acknowledge( 'Covariance step warnings in '.
1147                          sprintf("%4.2f", ($self -> {'diagnostic_means'} ->
1148                                            [$i][0][$diag_idxs{'covariance_step_warnings'}]*100))
1149                          .'% of the bootstrap runs', 'WARNING', $log );
1150           }
1151
1152           if ( $self -> {'diagnostic_means'} ->
1153                [$i][0][$diag_idxs{'estimate_near_boundary'}] <
1154                $self -> {'estimate_near_boundary_limit'} ) {
1155             acknowledge( 'Estimate near boundary ratio = '.$self -> {'diagnostic_means'} ->
1156                          [$i][0][$diag_idxs{'estimate_near_boundary'}], 'OK', $log );
1157           } else {
1158             acknowledge( 'Estimate near boundary found in '.
1159                          sprintf("%4.2f", ($self -> {'diagnostic_means'} ->
1160                                            [$i][0][$diag_idxs{'estimate_near_boundary'}]*100))
1161                          .'% of the bootstrap runs', 'WARNING', $log );
1162           }
1163           my $sum = sum( $self -> {'within_se_confidence_intervals'}->
1164                          [$i]{$self -> {'se_confidence_intervals_level'}} );
1165           if ( not defined $sum or $sum < 1 ) {
1166             acknowledge( 'No '.(100-$self -> {'se_confidence_intervals_level'}).
1167                          '% confidence intervals based on the'.
1168                          ' bootstrap standard errors include '.
1169                          $self -> {'se_confidence_intervals_check'}, 'OK', $log );
1170           } else {
1171             acknowledge( "$sum ".(100-$self -> {'se_confidence_intervals_level'}).
1172                          "% confidence intervals based on the bootstrap".
1173                          " SE's include ".
1174                          $self -> {'se_confidence_intervals_check'}, 'WARNING', $log );
1175             my $found =
1176               scalar @{$self -> {'within_se_confidence_intervals'}->
1177                          [$i]{$self -> {'se_confidence_intervals_level'}}} - 1;
1178             for ( my $l = 0; $l <= $found ; $l++ ) {
1179               if ( $self -> {'within_se_confidence_intervals'}->
1180                    [$i]{$self -> {'se_confidence_intervals_level'}}[$l] ) {
1181                 printf( "\t%-20s\n",
1182                         $self -> {'standard_error_confidence_intervals_labels'} ->
1183                         [$i][1][$l] );
1184                 print $log
1185                   sprintf( "\t%-20s\n",
1186                            $self -> {'standard_error_confidence_intervals_labels'} ->
1187                            [$i][1][$l] );
1188                 print "\n" if ( $l == $found );
1189                 print $log "\n" if ( $l == $found );
1190               }
1191             }
1192           }
1193
1194           my $sum = sum( $self -> {'large_bias'}-> [$i][0] );
1195           if ( not defined $sum or $sum < 1 ) {
1196             acknowledge( 'No bias larger than '.
1197                          ($self -> {'large_bias_limit'}*100).'% found', 'OK', $log );
1198           } else {
1199             acknowledge( "$sum estimates were found to have a relative bias > ".
1200                          $self -> {'large_bias_limit'}, 'WARNING', $log );
1201             my $found =
1202               scalar @{$self -> {'large_bias'}->
1203                          [$i][0]} - 1;
1204             for ( my $l = 0; $l <= $found ; $l++ ) {
1205               if ( $self -> {'large_bias'}->
1206                    [$i][0][$l] ) {
1207                 printf( "\t%-20s%3.2f %\n", $self -> {'bias_labels'} ->
1208                         [$i][1][$l], ($self -> {'bias'} -> [$i][0][$l]/
1209                                       $self->{'bootstrap_estimates'} -> [$i][0][$l])*100 );
1210                 #                   print $log
1211                 #                     sprintf( "\t%-20s\n",
1212                 #                              $self -> {'percentile_confidence_intervals_labels'} ->
1213                 #                              [$i][1][$l] );
1214                 print "\n" if ( $l == $found );
1215                 print $log "\n" if ( $l == $found );
1216               }
1217             }
1218           }
1219
1220           if ( $self -> {'type'} eq 'bca' ) {
1221             my $sum = sum( $self -> {'within_bca_confidence_intervals'}->
1222                            [$i]{$self -> {'bca_confidence_intervals_level'}} );
1223             if ( not defined $sum or $sum < 1 ) {
1224               acknowledge( 'No '.(100-$self -> {'bca_confidence_intervals_level'}).
1225                            '% BCa confidence intervals include '.
1226                            $self -> {'bca_confidence_intervals_check'}, 'OK', $log );
1227             } else {
1228               acknowledge( "$sum ".(100-$self -> {'bca_confidence_intervals_level'}).
1229                            "% BCa confidence intervals include ".
1230                            $self -> {'bca_confidence_intervals_check'}, 'WARNING', $log );
1231               my $found =
1232                 scalar @{$self -> {'within_bca_confidence_intervals'}->
1233                            [$i]{$self -> {'bca_confidence_intervals_level'}}} - 1;
1234               for ( my $l = 0; $l <= $found ; $l++ ) {
1235                 if ( $self -> {'within_bca_confidence_intervals'}->
1236                      [$i]{$self -> {'bca_confidence_intervals_level'}}[$l] ) {
1237                   printf( "\t%-20s\n",
1238                           $self -> {'bca_confidence_intervals_labels'} ->
1239                           [$i][1][$l] );
1240                   print $log
1241                     sprintf( "\t%-20s\n",
1242                              $self -> {'bca_confidence_intervals_labels'} ->
1243                              [$i][1][$l] );
1244                   print "\n" if ( $l == $found );
1245                   print $log "\n" if ( $l == $found );
1246                 }
1247               }
1248             }
1249           } else {
1250             my $sum = sum( $self -> {'within_percentiles'}->
1251                            [$i]{$self -> {'percentile_confidence_intervals_level'}} );
1252             if ( not defined $sum or $sum < 1 ) {
1253               acknowledge( 'No '.(100-$self -> {'percentile_confidence_intervals_level'}).
1254                            '% confidence intervals based on the'.
1255                            ' bootstrap percentiles include '.
1256                            $self -> {'percentile_confidence_intervals_check'}, 'OK', $log );
1257             } else {
1258               acknowledge( "$sum ".(100-$self -> {'percentile_confidence_intervals_level'}).
1259                            "% confidence intervals based on the percentiles".
1260                            " include ".
1261                            $self -> {'percentile_confidence_intervals_check'}, 'WARNING', $log );
1262               my $found =
1263                 scalar @{$self -> {'within_percentiles'}->
1264                            [$i]{$self -> {'percentile_confidence_intervals_level'}}} - 1;
1265               for ( my $l = 0; $l <= $found ; $l++ ) {
1266                 if ( $self -> {'within_percentiles'}->
1267                      [$i]{$self -> {'percentile_confidence_intervals_level'}}[$l] ) {
1268                   printf( "\t%-20s\n",
1269                           $self -> {'percentile_confidence_intervals_labels'} ->
1270                           [$i][1][$l] );
1271                   print $log
1272                     sprintf( "\t%-20s\n",
1273                              $self -> {'percentile_confidence_intervals_labels'} ->
1274                              [$i][1][$l] );
1275                   print "\n" if ( $l == $found );
1276                   print $log "\n" if ( $l == $found );
1277                 }
1278               }
1279             }
1280           }
1281         }
1282         close ( $log );
1283       }
1284 end print_summary
1285
1286 # }}} print_summary
1287
1288 # {{{ bca_read_raw_results
1289
1290 start bca_read_raw_results
1291       {
1292         $self -> {'raw_results_header'} = [];
1293         for ( my $i = 1; $i <= scalar @{$self->{'models'}}; $i++ ) { # All models
1294           if ( -e $self -> {'directory'}.'raw_results'.$i.'.csv' ) {
1295             open( RRES, $self -> {'directory'}.'raw_results'.$i.'.csv' );
1296             my @file = <RRES>;
1297             close( RRES );
1298             map { chomp; my @tmp = split(',',$_); $_ = \@tmp } @file ;
1299
1300             my $header = shift @file;
1301
1302             # Get rid of 'method' column
1303             my $cols = scalar(@{$header})-1;
1304             @{$self -> {'raw_results_header'}[$i-1]} = @{$header}[1..$cols];
1305             $self -> {'raw_results'} -> [$i-1] = \@file;
1306             for( my $j = 0; $j <= $#file; $j++ ) {
1307               if ( $file[$j][0] eq 'jackknife' ) {
1308                 shift( @{$file[$j]} );
1309 #               $self -> {'jackknife_raw_results'}[$i-1] = \@file;
1310                 push( @{$self -> {'jackknife_raw_results'}[$i-1]}, $file[$j]);
1311               } else {
1312                 shift( @{$file[$j]} );
1313 #               $self -> {'bootstrap_raw_results'}[$i-1] = \@file;
1314                 push( @{$self -> {'bootstrap_raw_results'}[$i-1]}, $file[$j] );
1315               }
1316             }
1317           }
1318         }
1319       }
1320 end bca_read_raw_results
1321
1322 # }}} bca_read_raw_results
1323
1324 # {{{ calculate_diagnostic_means
1325
1326 start calculate_diagnostic_means
1327       {
1328         my ( @sum, @diagsum, %diag_idx );
1329         for ( my $i = 0; $i < scalar @{$self -> {'diagnostic_parameters'}}; $i++ ) {
1330           $diag_idx{$self -> {'diagnostic_parameters'} -> [$i]} = $i;
1331         }
1332
1333         my $def = 0;
1334         # Prepared model, skip the first (the original)
1335         for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_diagnostics'} ->
1336                                          [$model_number-1]}; $k++ ) {
1337           # Diagnostics
1338           if( defined $self -> {'bootstrap_diagnostics'} ->
1339               [$model_number-1][$k] ) {
1340             $def++;
1341             for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_diagnostics'} ->
1342                                                [$model_number-1][$k]}; $l++ ) {
1343               $sum[$l] += $self -> {'bootstrap_diagnostics'} ->
1344                   [$model_number-1][$k][$l];
1345             }
1346           }
1347         }
1348
1349         # divide by the number of bootstrap samples (-1 to get rid of the original
1350         # model) The [0] in the index is there to indicate the 'model' level. Mostly
1351         # used for printing
1352         for ( my $l = 0; $l <= $#sum; $l++ ) {
1353           if( $l == $diag_idx{'significant_digits'} ) {
1354             $self -> {'diagnostic_means'} -> [$model_number-1][0][$l] =
1355                 $sum[$l] / $def;
1356           } else {
1357             $self -> {'diagnostic_means'} -> [$model_number-1][0][$l] =
1358                 $sum[$l] / ( scalar @{$self -> {'bootstrap_diagnostics'} ->
1359                                           [$model_number-1]} - 1);
1360           }
1361         }
1362         $self -> {'diagnostic_means_labels'} -> [$model_number-1] =
1363           [[],\@parameter_names];
1364       }
1365 end calculate_diagnostic_means
1366
1367 # }}} calculate_diagnostic_means
1368
1369 # {{{ calculate_means
1370
1371 start calculate_means
1372       {
1373         my ( @sum, @diagsum );
1374         # Prepared model, skip the first (the original)
1375         for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'} ->
1376                                          [$model_number-1]}; $k++ ) {
1377           # Estimates
1378           for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'} ->
1379                                            [$model_number-1][$k]}; $l++ ) {
1380             $sum[$l] += $self -> {'bootstrap_estimates'} ->
1381               [$model_number-1][$k][$l];
1382           }
1383         }
1384         # divide by the number of bootstrap samples (-1 to get rid of the original
1385         # model) The [0] in the index is there to indicate the 'model' level. Mostly
1386         # used for printing
1387         my $samples = scalar @{$self -> {'bootstrap_estimates'} ->
1388                                  [$model_number-1]} - 1;
1389         for ( my $l = 0; $l <= $#sum; $l++ ) {
1390           my $mean = $sum[$l] / $samples;
1391           $self -> {'means'} -> [$model_number-1][0][$l] = $mean;
1392           my $bias = $mean - $self ->
1393             {'bootstrap_estimates'} -> [$model_number-1][0][$l];
1394           $self -> {'bias'} -> [$model_number-1][0][$l] = $bias;
1395           if ( $self->{'bootstrap_estimates'} -> [$model_number-1][0][$l] != 0 and
1396                $bias/$self->{'bootstrap_estimates'} -> [$model_number-1][0][$l]
1397                > $self -> {'large_bias_limit'} ) {
1398             $self -> {'large_bias'} -> [$model_number-1][0][$l] = 1;
1399           } else {
1400             $self -> {'large_bias'} -> [$model_number-1][0][$l] = 0;
1401           }
1402         }
1403         $self -> {'means_labels'} -> [$model_number-1] =
1404           [[],\@parameter_names];
1405
1406         $self -> {'bias_labels'} -> [$model_number-1] =
1407           [[],\@parameter_names];
1408       }
1409 end calculate_means
1410
1411 # }}} calculate_means
1412
1413 # {{{ calculate_jackknife_means
1414
1415 start calculate_jackknife_means
1416       {
1417         my @sum;
1418         # Prepared model, skip the first (the original)
1419         if( defined $self -> {'jackknife_estimates'} ){
1420           for ( my $k = 1; $k < scalar @{$self -> {'jackknife_estimates'}->[$model_number-1]}; $k++ ) {
1421             # Estimate
1422             for ( my $l = 0; $l <
1423                   scalar @{$self -> {'jackknife_estimates'}->[$model_number-1][$k]}; $l++ ) {
1424               $sum[$l] += $self -> {'jackknife_estimates'}->[$model_number-1][$k][$l];
1425             }
1426           }
1427           # divide by the number of jackknife samples (-1 to get rid of the original model)
1428           # The [0] in the index is there to indicate the 'model' level. Mostly used for printing
1429           for ( my $l = 0; $l <
1430                 scalar @{$self -> {'jackknife_estimates'}->[$model_number-1][0]}; $l++ ) {
1431             if( ( scalar @{$self -> {'jackknife_estimates'}->[$model_number-1]} - 1) != 0 ) {
1432               $self -> {'jackknife_means'} -> [$model_number-1][0][$l] =
1433                   $sum[$l] / ( scalar @{$self -> {'jackknife_estimates'}->[$model_number-1]} - 1);
1434             }
1435           }
1436           $self -> {'jackknife_means_labels'} -> [$model_number-1] = [[],\@parameter_names];
1437         }
1438       }
1439 end calculate_jackknife_means
1440
1441 # }}} calculate_jackknife_means
1442
1443 # {{{ calculate_medians
1444 start calculate_medians
1445       {
1446         my @medians;
1447         # Loop the parameters
1448         for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'}->
1449                                          [$model_number-1][0]}; $l++ ) {
1450           my @parameter_array;
1451           # From 1 to get rid of original model
1452           for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'}->
1453                                            [$model_number-1]}; $k++ ) {
1454             $parameter_array[$k-1] =
1455               $self -> {'bootstrap_estimates'}->[$model_number-1][$k][$l];
1456           }
1457           my @sorted = sort {$a <=> $b} @parameter_array;
1458           # median postition is half the ( array length - 1 ).
1459           my $median_position = ( $#sorted ) / 2;
1460           my ($int_med,$frac_med)   = split(/\./, $median_position );
1461           $frac_med = eval("0.".$frac_med);
1462           my $median_low  = $sorted[ $int_med ];
1463           my $median_high = ( $sorted[ $int_med + 1 ] - $sorted[ $int_med ] ) * $frac_med;
1464           $medians[$l] = $median_low + $median_high;
1465         }
1466         # The [0] in the index is there to indicate the 'model' level. Mostly used for printing
1467         $self -> {'medians'} -> [$model_number-1][0] = \@medians;
1468         $self -> {'medians_labels'} -> [$model_number-1] = [[],\@parameter_names];
1469       }
1470 end calculate_medians
1471 # }}} calculate_medians
1472
1473 # {{{ calculate_standard_error_confidence_intervals
1474 start calculate_standard_error_confidence_intervals
1475       {
1476         # Sort the limits from the inside out
1477         my @limits = sort { $b <=> $a } keys %{$self -> {'confidence_limits'}};
1478         foreach my $limit ( @limits ) {
1479           my ( @lower_limits, @upper_limits, @within_ci );
1480           # Loop the estimates of the first (original) model
1481           for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'}->
1482                                            [$model_number-1][0]}; $l++ ) {
1483             my $lower_limit =
1484               $self -> {'bootstrap_estimates'}->[$model_number-1][0][$l] -
1485                 $self -> {'standard_errors'}->[$model_number-1][0][$l] *
1486                   $self -> {'confidence_limits'} ->{$limit};
1487             my $upper_limit =
1488               $self -> {'bootstrap_estimates'}->[$model_number-1][0][$l] +
1489                 $self -> {'standard_errors'}->[$model_number-1][0][$l] *
1490                   $self -> {'confidence_limits'} ->{$limit};
1491             push( @lower_limits, $lower_limit );
1492             push(  @upper_limits, $upper_limit );
1493             if ( $self -> {'se_confidence_intervals_check'} < $upper_limit and
1494                  $self -> {'se_confidence_intervals_check'} > $lower_limit ) {
1495               push( @within_ci , 1 );
1496             } else {
1497               push( @within_ci , 0 );
1498             }
1499           }
1500           unshift( @{$self -> {'standard_error_confidence_intervals'} ->
1501                        [$model_number-1]}, \@lower_limits );
1502           push( @{$self -> {'standard_error_confidence_intervals'} ->
1503                     [$model_number-1]}, \@upper_limits );
1504           $self -> {'within_se_confidence_intervals'} ->
1505             [$model_number-1]{$limit} = \@within_ci;
1506           unshift( @{$self -> {'standard_error_confidence_intervals_labels'} ->
1507                        [$model_number-1][0]}, ($limit/2).'%' );
1508           push( @{$self -> {'standard_error_confidence_intervals_labels'} ->
1509                     [$model_number-1][0]}, (100-($limit/2)).'%' );
1510           push( @{$self -> {'within_se_confidence_intervals_labels'} ->
1511                     [$model_number-1][0]}, $limit.'%' );
1512         }
1513         $self -> {'standard_error_confidence_intervals_labels'} -> [$model_number-1][1] =
1514           \@parameter_names;
1515         $self -> {'within_se_confidence_intervals_labels'} -> [$model_number-1][1] =
1516           \@parameter_names;
1517       }
1518 end calculate_standard_error_confidence_intervals
1519 # }}} calculate_standard_error_confidence_intervals
1520
1521 # {{{ calculate_standard_errors
1522
1523 start calculate_standard_errors
1524       {
1525         my @se;
1526         # Prepared model, skip the first (the original)
1527         for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'}->[$model_number-1]}; $k++ ) {
1528           # Estimate
1529           for ( my $l = 0; $l <
1530                 scalar @{$self -> {'bootstrap_estimates'}->[$model_number-1][$k]}; $l++ ) {
1531             $se[$l] += ( $self -> {'bootstrap_estimates'}->[$model_number-1][$k][$l] -
1532                          $self -> {'means'}->[$model_number-1][0][$l] )**2;
1533           }
1534         }
1535         # divide by the number of bootstrap samples -1 (-2 to get rid of the original model)
1536         # The [0] in the index is there to indicate the 'model' level.
1537         for ( my $l = 0; $l <
1538               scalar @{$self -> {'bootstrap_estimates'}->[$model_number-1][0]}; $l++ ) {
1539           my $div = ( scalar @{$self -> {'bootstrap_estimates'}->[$model_number-1]} - 2 );
1540           if( defined $div and not $div == 0 ) {
1541             $self -> {'standard_errors'} -> [$model_number-1][0][$l] =
1542                 ($se[$l] / $div  )**0.5;
1543           } else {
1544             $self -> {'standard_errors'} -> [$model_number-1][0][$l] = undef;
1545           }
1546         }
1547         $self -> {'standard_errors_labels'} -> [$model_number-1] = [[],\@parameter_names];
1548       }
1549 end calculate_standard_errors
1550
1551 # }}} calculate_standard_errors
1552
1553 # {{{ calculate_bca_confidence_intervals
1554
1555 start calculate_bca_confidence_intervals
1556       {
1557         sub c_get_z0 {
1558           my $arr_ref  = shift;
1559           my $orig_value = shift;
1560           my $num_less_than_orig = 0;
1561           my $nvalues = 0;
1562           my $z0;
1563           foreach my $value ( @{$arr_ref} ) {
1564             if ( defined $value and $value ne '' ) {
1565               $num_less_than_orig++ if ( $value < $orig_value );
1566               $nvalues ++;
1567             }
1568           }
1569
1570           unless ( $nvalues == 0 ) {
1571             if ( ($num_less_than_orig / $nvalues ) == 0 ) {
1572               $z0 = -100;
1573             } elsif ( ($num_less_than_orig / $nvalues ) == 1 ) {
1574               $z0 = 100;
1575             } else {
1576               $z0 = udistr( 1 - ($num_less_than_orig / $nvalues ) );
1577             }
1578           }
1579 #         return ( $z0, $nvalues );
1580 #         print sprintf( "%4s:%5.0f,%4s:%15.8g  ",'N:',$num_less_than_orig,'ZO', $z0);
1581           return $z0;
1582         }
1583
1584         sub c_get_acc {
1585           my $arr_ref = shift;
1586           my $jk_mean = shift;
1587           my $acc_upper = 0;
1588           my $acc_lower = 0;
1589           my $nvalues = 0;
1590           my $acc;
1591           foreach my $value ( @{$arr_ref} ){
1592             if ( defined $value and $value ne '' ) {
1593               $acc_upper = $acc_upper + ($jk_mean-$value)**3;
1594               $acc_lower = $acc_lower + ($jk_mean-$value)**2;
1595               $nvalues ++;
1596             }
1597           }
1598           $acc_lower = 6*($acc_lower**(3/2));
1599           unless ( $acc_lower == 0 ) {
1600             $acc = $acc_upper / $acc_lower;
1601           } else {
1602             $acc = $acc_upper / 0.001;
1603           }
1604 #         return ( $acc, $nvalues );
1605 #         print sprintf( "%4s:%15.8g%4s%8.5g\n",'ACC', $acc,'JKm', $jk_mean);
1606           return $acc;
1607         }
1608
1609         sub c_get_alphas {
1610           my $old_alphas = shift;
1611           my $acc = shift;
1612           my $z0 = shift;
1613           my $denom;
1614           my @new_alphas = ();
1615           foreach my $position ( @{$old_alphas} ) {
1616             if ( $position == 0 ){
1617               $denom = -100;
1618             } elsif ( $position == 100 ) {
1619               $denom = 100;
1620             } else {
1621               $denom = $z0 + udistr( 1 - $position/100 );
1622             }
1623             my $nom     = 1 - $acc * $denom;
1624             my $lim = 100*uprob( - ( $z0 + $denom / $nom ) );
1625             push( @new_alphas, $lim );
1626           }
1627 #         print "@new_alphas\n";
1628           return \@new_alphas;
1629         }
1630
1631         my @limits = sort { $a <=> $b } keys %{$self -> {'confidence_limits'}};
1632         # Add the upper limits
1633         my $limnum = $#limits;
1634         for ( my $i = $limnum; $i >= 0; $i-- ) {
1635           $limits[$i] = $limits[$i]/2;
1636           push( @limits, 100-$limits[$i] );
1637         }
1638         my ( @bootstrap_array, @jackknife_array, @new_alphas, @z0, @acc );
1639         # Loop the estimates of the first (original) model
1640         for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'}->
1641                                          [$model_number-1][0]}; $l++ ) {
1642           my ( @unsorted_array1, @unsorted_array2 );
1643           # Loop the bootstrap samples from 1 to get rid of original model
1644           for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'}->
1645                                            [$model_number-1]}; $k++ ) {
1646             $unsorted_array1[$k-1] =
1647               $self -> {'bootstrap_estimates'}->[$model_number-1][$k][$l];
1648           }
1649           @{$bootstrap_array[$l]} = sort {$a <=> $b} @unsorted_array1;
1650
1651           # Loop the jackknife samples from 1 to get rid of original model
1652           for ( my $k = 1; $k < scalar @{$self -> {'jackknife_estimates'}->
1653                                            [$model_number-1]}; $k++ ) {
1654             $unsorted_array2[$k-1] =
1655               $self -> {'jackknife_estimates'}->[$model_number-1][$k][$l];
1656           }
1657           @{$jackknife_array[$l]} = sort {$a <=> $b} @unsorted_array2;
1658           $z0[$l]         = c_get_z0     ( $bootstrap_array[$l],
1659                                            $self -> {'bootstrap_estimates'} ->
1660                                            [$model_number-1][0][$l] );
1661           $acc[$l]        = c_get_acc    ( $jackknife_array[$l],
1662                                            $self -> {'jackknife_means'} ->
1663                                            [$model_number-1][0][$l] );
1664           $new_alphas[$l] = c_get_alphas ( \@limits, $acc[$l], $z0[$l] );
1665         }
1666         # Loop limits
1667         for ( my $lim_idx = 0; $lim_idx <= $#limits; $lim_idx++ ) {
1668           my @percentiles;
1669           # Loop parameters
1670           for ( my $l = 0; $l <= $#bootstrap_array; $l++ ) {
1671             my $limit = $new_alphas[$l][$lim_idx]/100;
1672             my $position = ( scalar @{$bootstrap_array[$l]} + 1 ) * $limit;
1673             my $percentile;
1674             if ( $position < 1 ) {
1675               $percentile = undef;
1676             } elsif ( $position > scalar @{$bootstrap_array[$l]} ) {
1677               $percentile = undef;
1678             } else {
1679               my ($int_med,$frac_med)   = split(/\./, $position );
1680               $frac_med = eval("0.".$frac_med);
1681               my $percentile_low  = $bootstrap_array[$l][ $int_med - 1];
1682               my $percentile_high = ( $bootstrap_array[$l][ $int_med ] -
1683                                       $bootstrap_array[$l][ $int_med - 1] ) * $frac_med;
1684               $percentile = $percentile_low + $percentile_high;
1685             }
1686             push( @percentiles, $percentile );
1687           }
1688           push( @{$self -> {'bca_confidence_intervals'} -> [$model_number-1]},
1689                 \@percentiles );
1690           push( @{$self -> {'bca_confidence_intervals_labels'}->[$model_number-1][0]},
1691                 $limits[$lim_idx].'%');
1692         }
1693         # Check the intervals
1694         for ( my $lim_idx = 0; $lim_idx <= $limnum; $lim_idx++ ) {
1695           my @within_ci;
1696           for ( my $l = 0; $l <= $#bootstrap_array; $l++ ) {
1697             my $lower_limit = $self -> {'bca_confidence_intervals'} ->
1698               [$model_number-1][$lim_idx][$l];
1699             my $upper_limit = $self -> {'bca_confidence_intervals'} ->
1700               [$model_number-1][($limnum*2+1)-$lim_idx][$l];
1701             if ( $self -> {'bca_confidence_intervals_check'} < $upper_limit and
1702                  $self -> {'bca_confidence_intervals_check'} > $lower_limit ) {
1703               push( @within_ci , 1 );
1704             } else {
1705               push( @within_ci , 0 );
1706             }
1707           }
1708           $self -> {'within_bca_confidence_intervals'} ->
1709             [$model_number-1]{$limits[$lim_idx]*2} = \@within_ci;
1710         }
1711         $self -> {'bca_confidence_intervals_labels'} -> [$model_number-1][1] =
1712           \@parameter_names;
1713       }
1714 end calculate_bca_confidence_intervals
1715
1716 # }}} calculate_bca_confidence_intervals
1717
1718 # {{{ calculate_percentile_confidence_intervals
1719
1720 start calculate_percentile_confidence_intervals
1721       {
1722         # Sort the limits from the inside out
1723         my @limits = sort { $b <=> $a } keys %{$self -> {'confidence_limits'}};
1724         foreach my $limit ( @limits ) {
1725           my ( @lower_limits, @upper_limits, @within_ci );
1726           # Loop the estimates of the first (original) model
1727           for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'}->
1728                                            [$model_number-1][0]}; $l++ ) {
1729             my @parameter_array;
1730             # Loop the bootstrap samples from 1 to get rid of original model
1731             for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'}->
1732                                              [$model_number-1]}; $k++ ) {
1733               my $val = $self -> {'bootstrap_estimates'}->[$model_number-1][$k][$l];
1734               # get rid of undefined values (these were probably deleted
1735               # when the bootstrap_estimates was created
1736               push( @parameter_array, $val ) if( defined $val );
1737             }
1738             my @sorted = sort {$a <=> $b} @parameter_array;
1739             for my $side ( 'lower', 'upper' ) {
1740               my $use_limit = $side eq 'lower' ? $limit/200 : 1-($limit/200);
1741               # percentile postition is:
1742               my $percentile_position = ( $#sorted + 2 ) * $use_limit;
1743               my $percentile;
1744               if ( $percentile_position < 1 ) {
1745                 $percentile = undef;
1746               } elsif ( $percentile_position > $#sorted +1) {
1747                 $percentile = undef;
1748               } else {
1749                 my ($int_med,$frac_med)   = split(/\./, $percentile_position );
1750                 $frac_med = eval("0.".$frac_med);
1751                 my $percentile_low  = $sorted[ $int_med - 1];
1752                 my $percentile_high = ( $sorted[ $int_med ] - $sorted[ $int_med - 1] ) * $frac_med;
1753                 $percentile = $percentile_low + $percentile_high;
1754               }
1755               push( @lower_limits, $percentile ) if ( $side eq 'lower' );
1756               push( @upper_limits, $percentile ) if ( $side eq 'upper' );
1757             }
1758             if ( $self -> {'percentile_confidence_intervals_check'} < $upper_limits[$#upper_limits] and
1759                  $self -> {'percentile_confidence_intervals_check'} > $lower_limits[$#lower_limits] ) {
1760               push( @within_ci , 1 );
1761             } else {
1762               push( @within_ci , 0 );
1763             }
1764           }
1765           unshift( @{$self -> {'percentile_confidence_intervals'} ->
1766                        [$model_number-1]}, \@lower_limits );
1767           push( @{$self -> {'percentile_confidence_intervals'} ->
1768                     [$model_number-1]}, \@upper_limits );
1769           unshift( @{$self -> {'percentile_confidence_intervals_labels'}->
1770                        [$model_number-1][0]}, ($limit/2).'%' );
1771           push( @{$self -> {'percentile_confidence_intervals_labels'}->
1772                     [$model_number-1][0]},(100-($limit/2)).'%');
1773           $self -> {'within_percentiles'}->[$model_number-1]{$limit}=\@within_ci;
1774         }
1775         $self -> {'percentile_confidence_intervals_labels'} ->
1776           [$model_number-1][1] = \@parameter_names;
1777       }
1778 end calculate_percentile_confidence_intervals
1779
1780 # }}} calculate_percentile_confidence_intervals
1781
1782 # {{{ modelfit_post_fork_analyze
1783
1784 start modelfit_post_fork_analyze
1785 end modelfit_post_fork_analyze
1786
1787 # }}}
1788
1789 # {{{ resample
1790
1791 start resample
1792       {
1793         my $dataObj = $model -> datas -> [0];
1794         for( my $i = 1; $i <= $self -> {'samples'}; $i++ ) {
1795           my ($bs_dir, $bs_name) = OSspecific::absolute_path( $self -> {'directory'}, "bs$i.dta" );
1796           my $new_name = $bs_dir . $bs_name;
1797           my $boot_sample = $dataObj -> resample( 'subjects' => $self -> {'subjects'},
1798                                                   'new_name' => $new_name,
1799                                                   'target'   => $target );
1800           my $newmodel = $model -> copy( filename => "bs$i.mod",
1801                                          target   => $target,
1802                                          ignore_missing_files => 1 );
1803           $newmodel -> datafiles( new_names => ["bs$i.dta"] );
1804           $newmodel -> datas -> [0] = $boot_sample ;
1805           $newmodel -> write;
1806           push( @resample_models, $newmodel );
1807         }
1808       }
1809 end resample
1810
1811 # }}} resample
1812
1813 # {{{ _sampleTools
1814
1815 start _sampleTools
1816       {
1817         foreach my $tool ( @{$self -> {'tools'}} ) {
1818           my @models = @{$tool -> models};
1819           foreach my $model (@models){
1820             my $dataObj = $model -> datas -> [0];
1821             for( my $i = 1; $i <= $samples; $i++ ) {
1822               my $boot_sample = $dataObj -> resample( 'subjects' => $self -> {'subjects'},
1823                                                       'new_name' => "bs$i.dta",
1824                                                       'target' => $target );
1825               my $newmodel;
1826               $newmodel = $model -> copy( filename => "bs$i.mod" );
1827               $newmodel -> datafiles( new_names => ["bs$i.dta"] );
1828               $newmodel -> datas -> [0] = $boot_sample ;
1829               $newmodel -> write;
1830               if( defined( $tool -> models ) ){
1831                 push( @{$tool -> models}, $newmodel );
1832               } else {
1833                 $tool -> models( [ $newmodel ] );
1834               }
1835             }
1836           }
1837         }
1838       }
1839 end _sampleTools
1840
1841 # }}} _sampleTools
1842
1843 # {{{ create_matlab_scripts
1844
1845 start create_matlab_scripts
1846     {
1847       if( defined $PsN::lib_dir ){
1848         unless( -e $PsN::lib_dir . '/histograms.m' and
1849                 -e $PsN::lib_dir . '/bca.m' ){
1850           'debug' -> die( message => 'Bootstrap matlab template scripts are not installed, no matlab scripts will be generated.' );
1851           return;
1852         }
1853
1854         open( PROF, $PsN::lib_dir . '/histograms.m' );
1855         my @file = <PROF>;
1856         close( PROF );
1857         my $found_code;
1858         my $code_area_start=0;
1859         my $code_area_end=0;
1860
1861
1862         for(my $i = 0;$i < scalar(@file); $i++) {
1863           if( $file[$i] =~ /% ---------- Autogenerated code below ----------/ ){
1864             $found_code = 1;
1865             $code_area_start = $i;
1866           }
1867           if( $file[$i] =~ /% ---------- End autogenerated code ----------/ ){
1868             unless( $found_code ){
1869               'debug' -> die ( message => 'Bootstrap matlab template script is malformated, no matlab scripts will be generated' );
1870               return;
1871             }
1872             $code_area_end = $i;
1873           }
1874         }
1875
1876         my @auto_code;
1877         if( $self -> {'type'} eq 'bca' ){
1878           push( @auto_code, "use_bca = 1;                               % Was a BCa-type of\n" );
1879         } else {
1880           push( @auto_code, "use_bca = 0;                               % Was a BCa-type of\n" );
1881         }
1882
1883         push( @auto_code, "                                % bootstrap run?\n" );
1884         if( ref $self -> {'samples'} eq 'ARRAY' ) {
1885           push( @auto_code, "bs_samples = ".$self -> {'samples'}->[0][0].";                     % Number of bootstrap samples\n" );
1886         } else {
1887           push( @auto_code, "bs_samples = ".$self -> {'samples'}.";                     % Number of bootstrap samples\n" );
1888         }
1889         if( $self -> {'type'} eq 'bca' ){
1890           my $ninds = $self -> models -> [0]
1891               -> datas -> [0] -> count_ind;
1892           push( @auto_code, "jk_samples = $ninds;                       % Number of (BCa) jackknife samples\n\n" );
1893         }
1894
1895         push( @auto_code, "col_names = { 'Significant Digits',\n" );
1896         push( @auto_code, "              'Condition Number',\n" );
1897         push( @auto_code, "              'OFV',\n" );
1898
1899         my $nps = $self -> {'models'} -> [0] -> nomegas -> [0];
1900
1901         my %param_names;
1902         my( @par_names, @se_names, @np_names, @sh_names );
1903         foreach my $param ( 'theta','omega','sigma' ) {
1904           my $labels = $self -> {'models'} -> [0] -> labels( parameter_type => $param );
1905           if ( defined $labels ){
1906             foreach my $label ( @{$labels -> [0]} ){
1907               push( @par_names, "                '",$label,"',\n" );
1908               push( @se_names, "                 '",'se-'.$label,"',\n" );
1909             }
1910           }
1911         }
1912
1913         for( my $i = 1; $i <= ($nps*($nps+1)/2); $i++ ) {
1914           push( @np_names, "             '",'np-om'.$i,"',\n" );
1915         }
1916
1917         for( my $i = 1; $i <= $nps; $i++ ) {
1918           push( @sh_names, "             '",'shrinkage-eta'.$i,"',\n" );
1919         }
1920
1921         push( @sh_names, "               '",'shrinkage-iwres',"'\n" );
1922
1923 # NP not used for now
1924
1925         push( @auto_code,(@par_names, @se_names, @sh_names));
1926 #       push( @auto_code,(@par_names, @se_names, @np_names, @sh_names));
1927         push( @auto_code, "           };\n\n" );
1928
1929         my @np_columns = (0) x ($nps*($nps+1)/2);
1930         my @sh_columns = (0) x ($nps+1);
1931
1932         if( $self -> {'type'} eq 'bca' ){
1933           push( @auto_code, "fixed_columns = [ 0, 0, 0, " );
1934         } else {
1935           push( @auto_code, "fixed_columns = [ 0, 0, 0, " );
1936         }
1937         my ( @fixed_columns, @same_columns, @adjust_axes );
1938         foreach my $param ( 'theta','omega','sigma' ) {
1939           my $fixed = $self -> {'models'} -> [0] -> fixed( parameter_type => $param );
1940
1941           if ( defined $fixed ){
1942             push( @fixed_columns, @{$fixed -> [0]} );
1943             if( $param eq 'theta' ) {
1944               push( @same_columns, (0) x scalar( @{$fixed -> [0]} ) );
1945             }
1946           }
1947         }
1948
1949         @adjust_axes = (1) x ( ($#fixed_columns + 1) * 2 +
1950                                $#sh_columns + 1 );
1951 #                              $#np_columns + $#sh_columns + 2 );
1952
1953         push( @auto_code , join( ', ' , @fixed_columns).', '.
1954               join( ', ' , @fixed_columns).', '.
1955 #             join( ', ' , @np_columns).', '.
1956               join( ', ' , @sh_columns)."];\n\n" );
1957
1958         if( $self -> {'type'} eq 'bca' ){
1959           push( @auto_code, "same_columns  = [ 0, 0, 0, " );
1960         } else {
1961           push( @auto_code, "same_columns  = [ 0, 0, 0, " );
1962         }
1963         foreach my $param ( 'omegas','sigmas' ) {
1964           my $parameters = $self -> {'models'} -> [0] -> problems -> [0] -> $param;
1965           foreach my $parameter ( @{$parameters} ){
1966             if( $parameter -> same() ){
1967               push( @same_columns, (1) x $parameter -> size() );
1968             } else {
1969               push( @same_columns, (0) x scalar @{$parameter -> options} );
1970             }
1971           }
1972         }
1973         push( @auto_code , join( ', ' , @same_columns ).', '.
1974               join( ', ' , @same_columns).', '.
1975 #             join( ', ' , @np_columns).', '.
1976               join( ', ' , @sh_columns)."];\n\n" );
1977
1978         push( @auto_code , "adjust_axes   = [ 1, 1, 1, ".join( ', ' , @adjust_axes)."];\n\n" );
1979
1980 #        push( @auto_code , 'npomegas = '.($nps*($nps+1)/2).";\n\n" );
1981         push( @auto_code , "npomegas = 0;\n\n" );
1982
1983
1984         push( @auto_code, "minimization_successful_col    = 5;  % Column number for the\n" );
1985         push( @auto_code, "                                                     % minimization sucessful flag\n" );
1986         push( @auto_code, "covariance_step_successful_col = 6;  % As above for cov-step warnings\n" );
1987         push( @auto_code, "covariance_step_warnings_col   = 7;  % etc\n" );
1988         push( @auto_code, "estimate_near_boundary_col     = 8;  % etc\n" );
1989
1990         push( @auto_code, "not_data_cols = 13;                     % Number of columns in the\n" );
1991         push( @auto_code, "                                        % beginning that are not\n" );
1992         push( @auto_code, "                                        % parameter estimates.\n" );
1993
1994         push( @auto_code, "filename = 'raw_results_matlab.csv';\n" );
1995
1996         splice( @file, $code_area_start, ($code_area_end - $code_area_start), @auto_code );
1997         open( OUTFILE, ">", $self -> {'directory'} . "/histograms.m" );
1998         print OUTFILE "addpath " . $PsN::lib_dir . ";\n";
1999         print OUTFILE @file ;
2000         close OUTFILE;
2001
2002         open( OUTFILE, ">", $self -> {'directory'} . "/raw_results_matlab.csv" );
2003         for( my $i = 0; $i < scalar ( @{$self -> {'raw_results'} -> [0]} ); $i ++ ){
2004 #         $self -> {'raw_results'} -> [0] -> [$i][0] =
2005 #             $self -> {'raw_results'} -> [0] -> [$i][0] eq 'bootstrap' ?
2006 #             1 : $self -> {'raw_results'} -> [0] -> [$i][0];
2007 #         $self -> {'raw_results'} -> [0] -> [$i][0] =
2008 #             $self -> {'raw_results'} -> [0] -> [$i][0] eq 'jackknife' ?
2009 #             2 : $self -> {'raw_results'} -> [0] -> [$i][0];
2010           map( $_ = $_ eq 'NA' ? 'NaN' : $_, @{$self -> {'raw_results'} -> [0] -> [$i]} );
2011           map( $_ = not( defined $_ ) ? 'NaN' : $_, @{$self -> {'raw_results'} -> [0] -> [$i]} );
2012           print OUTFILE join( ',', @{$self -> {'raw_results'} -> [0] -> [$i]} ), "\n";
2013         }
2014         close OUTFILE;
2015
2016       } else {
2017         'debug' -> die( message => 'matlab_dir not configured, no matlab scripts will be generated.');
2018         return;
2019       }
2020     }
2021 end create_matlab_scripts
2022
2023 # }}}
2024
2025 # {{{ create_R_scripts
2026 start create_R_scripts
2027     {
2028       unless( -e $PsN::lib_dir . '/R-scripts/bootstrap.R' ){
2029         'debug' -> die( message => 'Bootstrap R-script are not installed, no R-script will be generated.' );
2030         return;
2031       }
2032       cp ( $PsN::lib_dir . '/R-scripts/bootstrap.R', $self -> {'directory'} );
2033     }
2034 end create_R_scripts
2035 # }}}