lib/tool/bootstrap_subs.pm

   1 # {{{ include
   2
   3 start include statements
   4 use strict;
   5 use File::Copy 'cp';
   6 use data;
   7 use OSspecific;
   8 use tool::llp;
   9 use tool::cdd::jackknife;
  10 use ext::Statistics::Distributions 'udistr', 'uprob';
  11 use Math::Random;
  12 use Data::Dumper;
  13 end include
  14
  15 # }}} include statements
  16
  17 # {{{ new
  18
  19 start new
  20       {
  21         foreach my $attribute ( 'logfile', 'raw_results_file','raw_nonp_file' ) {
  22           if ( not( ref($this -> {$attribute}) eq 'ARRAY' or
  23                     ref($this -> {$attribute}) eq 'HASH' ) ) {
  24             my $tmp = $this -> {$attribute};
  25             $this -> {$attribute} = [];
  26             for ( my $i = 1; $i <= scalar @{$this -> {'models'}}; $i++ ) {
  27               my $name = $tmp;
  28               if ( $name =~ /\./ ) {
  29                 $name =~ s/\./$i\./;
  30               } else {
  31                 $name = $name.$i;
  32               }
  33               my $ldir;
  34               ( $ldir, $name ) =
  35                 OSspecific::absolute_path( $this -> {'directory'}, $name );
  36               push ( @{$this -> {$attribute}}, $ldir.$name ) ;
  37             }
  38           }
  39         }
  40       }
  41 end new
  42
  43 # }}} new
  44
  45 # {{{ general_pre_fork_setup
  46
  47 start general_pre_fork_setup
  48     {
  49       # These attributes can be given as a
  50       # 1. A scalar       : used for all models and problems
  51       # 2. A 1-dim. array : specified per problem but same for all models
  52       # 3. A 2-dim. array : specified per problem and model
  53
  54       foreach my $attribute_string( 'stratify_on', 'samples', 'subjects' ){
  55         my $attribute = $self -> {$attribute_string};
  56         if ( defined $attribute ) {
  57           # If stratification is given we check at what level it is
  58           # given.
  59           unless ( ref( \$attribute ) eq 'SCALAR' or
  60                    ( ref( $attribute ) eq 'ARRAY' and scalar @{$attribute} > 0 ) ) {
  61             # Here we know its neither scalar or a correct array. But we
  62             # seem to assume the lenght be equal to the number of
  63             # models.
  64             debug -> die( message => "attribute $attribute is " .
  65                           "defined as a " . ref( $attribute ) .
  66                           "and is neither a scalar or a non-zero size array" );
  67           } elsif ( ref( \$attribute ) eq 'SCALAR' ) {
  68             # If it is a scalar we copy the scalar value into an array
  69             # for each model, with one value for each problem.
  70             my @mo_attribute = ();
  71             foreach my $model ( @{$self -> {'models'}} ) {
  72               my @pr_attribute = ();
  73               foreach my $problem ( @{$model -> problems}  ) {
  74                 push( @pr_attribute, $attribute );
  75               }
  76               push( @mo_attribute, \@pr_attribute );
  77             }
  78             $self -> {$attribute_string} = \@mo_attribute;
  79           } elsif ( ref( $attribute ) eq 'ARRAY' ) {
  80             # If it is an array we check if the it is an array of
  81             # scalars or of arrays. If it is of arrays we seem to
  82             # assume that the arrays have a length the matches the
  83             # number of problems. It it is an array of scalars, we
  84             # copy that scalar into arrays for each problem.
  85             unless ( ref( \$attribute -> [0] ) eq 'SCALAR' or
  86                      ( ref( $attribute -> [0] ) eq 'ARRAY' and scalar @{$attribute -> [0]} > 0 ) ) {
  87               debug -> die( message => "attribute $attribute is ",
  88                             "defined as a ",ref( $attribute -> [0] ),
  89                             "and is neither a scalar or a non-zero size array" );
  90             } elsif ( ref(\$attribute -> [0]) eq 'SCALAR' ) {
  91               my @mo_attribute = ();
  92               foreach my $model ( @{$self -> {'models'}} ) {
  93                 push( @mo_attribute, $attribute );
  94               }
  95               $self -> {$attribute_string} = \@mo_attribute;
  96             }
  97           }
  98         } else {
  99           # If stratify is not defined we copy an undefined value to
 100           # arrays corresponding to models and subproblem. I wonder if
 101           # this is really necessary.
 102           my @mo_attribute = ();
 103           foreach my $model ( @{$self -> {'models'}} ) {
 104             my @pr_attribute = ();
 105             foreach my $data ( @{$model -> datas}  ) {
 106               if( $attribute_string eq 'stratify_on' ){
 107                 push( @pr_attribute, undef );
 108               } elsif ( $attribute_string eq 'samples' ){
 109                 push( @pr_attribute, 200 );
 110               } elsif ( $attribute_string eq 'subjects' ){
 111                 push( @pr_attribute, $data -> count_ind );
 112               }
 113             }
 114             push( @mo_attribute, \@pr_attribute );
 115           }
 116           $self -> {$attribute_string} = \@mo_attribute;
 117         }
 118       }
 119     }
 120 end general_pre_fork_setup
 121
 122 # }}}
 123
 124 # {{{ modelfit_pre_fork_setup
 125
 126 start modelfit_pre_fork_setup
 127       {
 128         $self -> general_pre_fork_setup( model_number => $model_number );
 129       }
 130 end modelfit_pre_fork_setup
 131
 132 # }}}
 133
 134 # {{{ llp_pre_fork_setup
 135
 136 start llp_pre_fork_setup
 137       {
 138         $self -> general_pre_fork_setup( model_number => $model_number );
 139       }
 140 end llp_pre_fork_setup
 141
 142 # }}}
 143
 144 # {{{ modelfit_setup
 145
 146 start modelfit_setup
 147       {
 148
 149         my $subm_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
 150           $self -> {'threads'} -> [1]:$self -> {'threads'};
 151         $self -> general_setup( model_number => $model_number,
 152                                 class        => 'tool::modelfit',
 153                                 subm_threads => $subm_threads );
 154       }
 155 end modelfit_setup
 156
 157 # }}}
 158
 159 # {{{ general_setup
 160
 161 start general_setup
 162       {
 163
 164         # If the number of threads are given per tool, e.g. [2,5] meaning 2 threads for
 165         # scm and 5 for the modelfit.
 166         my $subm_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
 167           $self -> {'threads'} -> [1]:$self -> {'threads'};
 168         my $own_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
 169           $self -> {'threads'} -> [0]:$self -> {'threads'};
 170         # More threads than models?
 171         my $num = scalar @{$self -> {'models'}};
 172         $own_threads = $num if ( $own_threads > $num );
 173
 174         # Sub tool threads can be given as scalar or reference to an array?
 175         #my $subm_threads = $parm{'subm_threads'};
 176         #my $own_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
 177         #  $self -> {'threads'} -> [0]:$self -> {'threads'};
 178         # Group variable names are matched in the model, not the data!
 179         my $model       = $self   -> {'models'}      -> [$model_number-1];
 180         my @samples     = @{$self -> {'samples'}     -> [$model_number-1]};
 181         my @stratify_on = @{$self -> {'stratify_on'} -> [$model_number-1]};
 182         my @subjects    = @{$self -> {'subjects'}    -> [$model_number-1]};
 183
 184         # Check which models that hasn't been run and run them This
 185         # will be performed each step but will only result in running
 186         # models at the first step, if at all.
 187
 188         # If more than one process is used, there is a VERY high risk
 189         # of interaction between the processes when creating
 190         # directories for model fits. Therefore the {'directory'}
 191         # attribute is given explicitly below.
 192
 193         # ------------------------  Run original run  -------------------------------
 194
 195         # {{{ orig run
 196
 197         unless ( $model -> is_run ) {
 198           my %subargs = ();
 199           if ( defined $self -> {'subtool_arguments'} ) {
 200             %subargs = %{$self -> {'subtool_arguments'}};
 201           }
 202
 203           if( $self -> {'nonparametric_etas'} or
 204               $self -> {'nonparametric_marginals'} ) {
 205             $model -> add_nonparametric_code;
 206           }
 207
 208           my $orig_fit = tool::modelfit ->
 209               new( reference_object      => $self,
 210                    base_directory        => $self -> {'directory'},
 211                    directory             => $self -> {'directory'}.
 212                    '/orig_modelfit_dir'.$model_number,
 213                    models                => [$model],
 214                    threads               => $subm_threads,
 215                    parent_threads        => $own_threads,
 216                    parent_tool_id        => $self -> {'tool_id'},
 217                    logfile               => undef,
 218                    raw_results           => undef,
 219                    prepared_models       => undef,
 220                    %subargs );
 221
 222           ui -> print( category => 'bootstrap',
 223                        message => 'Executing base model.' );
 224
 225           $orig_fit -> run;
 226
 227         }
 228
 229         my $output = $model -> outputs -> [0];
 230
 231         # }}} orig run
 232
 233         # ------------------------  Print a log-header  -----------------------------
 234
 235         # {{{ log header
 236
 237         my @orig_datas = @{$model -> datas};
 238         my @problems   = @{$model -> problems};
 239         my @new_models;
 240
 241         # Print a log-header
 242         # Lasse 2005-04-21: The minimization_message print will probably not work anymore
 243         open( LOG, ">>".$self -> {'logfile'}[$model_number-1] );
 244         my $ui_text = sprintf("%-5s",'RUN').','.sprintf("%20s",'FILENAME  ').',';
 245         print LOG sprintf("%-5s",'RUN'),',',sprintf("%20s",'FILENAME  '),',';
 246         foreach my $param ( 'ofv', 'minimization_message', 'covariance_step_successful' ) {
 247           my $orig_ests   = $model -> outputs -> [0] -> $param;
 248           # Loop the problems
 249           for ( my $j = 0; $j < scalar @{$orig_ests}; $j++ ) {
 250             if ( ref( $orig_ests -> [$j][0] ) ne 'ARRAY' ) {
 251               my $label = uc($param)."_".($j+1);
 252               $ui_text = $ui_text.sprintf("%12s",$label).',';
 253               print LOG sprintf("%12s",$label),',';
 254             } else {
 255               # Loop the parameter numbers (skip sub problem level)
 256               for ( my $num = 1; $num <= scalar @{$orig_ests -> [$j][0]}; $num++ ) {
 257                 my $label = uc($param).$num."_".($j+1);
 258                 $ui_text = $ui_text.sprintf("%12s",$label).',';
 259                 print LOG sprintf("%12s",$label),',';
 260               }
 261             }
 262           }
 263         }
 264         print LOG "\n";
 265
 266         # }}} log header
 267
 268         # ------------------------  Log original run  -------------------------------
 269
 270         # {{{ Log original run
 271
 272         # Lasse 2005-04-21: The minimization_message print will probably not work anymore
 273         open( LOG, ">>".$self -> {'logfile'}[$model_number-1] );
 274         $ui_text = sprintf("%5s",'0').','.sprintf("%20s",$model -> filename).',';
 275         print LOG sprintf("%5s",'0'),',',sprintf("%20s",$model -> filename),',';
 276         foreach my $param ( 'ofv', 'minimization_message', 'covariance_step_successful' ) {
 277           my $orig_ests   = $model -> outputs -> [0] -> $param;
 278           # Loop the problems
 279           for ( my $j = 0; $j < scalar @{$orig_ests}; $j++ ) {
 280             if ( ref( $orig_ests -> [$j][0] ) ne 'ARRAY' ) {
 281               $orig_ests -> [$j][0] =~ s/\n//g;
 282               $ui_text = $ui_text.sprintf("%12s",$orig_ests -> [$j][0]).',';
 283               print LOG sprintf("%12s",$orig_ests -> [$j][0]),',';
 284             } else {
 285               # Loop the parameter numbers (skip sub problem level)
 286               for ( my $num = 0; $num < scalar @{$orig_ests -> [$j][0]}; $num++ ) {
 287                 $ui_text = $ui_text.sprintf("%12f",$orig_ests -> [$j][0][$num]).',';
 288                 print LOG sprintf("%12f",$orig_ests -> [$j][0][$num]),',';
 289               }
 290             }
 291           }
 292         }
 293         print LOG "\n";
 294
 295         # }}} Log original run
 296
 297         # TODO: In this loop we loose one dimension (problem) in that
 298         # all new models with new data sets are put in the same array,
 299         # regardless of which problem the initially belonged to. Fix
 300         # this.
 301
 302         if ( $#orig_datas < 0 ) {
 303           debug -> warn( level   => 1,
 304                          message => "No data files to resample from" );
 305         } else {
 306           debug -> warn( level   => 1,
 307                          message => "Starting bootstrap sampling" );
 308         }
 309         for ( my $i = 1; $i <= scalar @orig_datas; $i++ ) {
 310           my $orig_data = $orig_datas[$i-1];
 311
 312           if ( $self -> {'drop_dropped'} ) {
 313             my $model_copy = $model -> copy( copy_data => 1,
 314                                              filename => "drop_copy_$i.mod",
 315                                              directory => $self -> {'directory'} );
 316
 317             $model_copy -> drop_dropped;
 318             $model_copy -> _write( write_data => 1 );
 319             $model_copy -> datas -> [0] -> flush();
 320             $orig_data = $model -> datas -> [0];
 321             $model = $model_copy;
 322           }
 323
 324           my ( @seed, $new_datas, $incl_ids, $incl_keys, $new_mod );
 325
 326           my $done = ( -e $self -> {'directory'}."/m$model_number/done.$i" ) ? 1 : 0;
 327
 328           if ( not $done ) {
 329             ui -> print( category => 'bootstrap',
 330                          message  => "Resampling from ".$orig_data -> filename );
 331
 332             ( $new_datas, $incl_ids, $incl_keys )
 333                 = $orig_data -> bootstrap( directory   => $self -> {'directory'}.'/m'.$model_number,
 334                                            name_stub   => 'bs_pr'.$i,
 335                                            samples     => $samples[$i-1],
 336                                            subjects    => $subjects[$i-1],
 337                                            stratify_on => $stratify_on[$i-1],
 338                                            target            => 'disk',
 339                                            model_ids   => $self -> {'prepared_model_ids'} );
 340
 341
 342             for ( my $j = 0; $j < $samples[$i-1]; $j++ ) {
 343               my ($model_dir, $filename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.$model_number,
 344                                                                       'bs_pr'.$i.'_'.($j+1).'.mod' );
 345 #             my ($out_dir, $outfilename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.$model_number ,
 346 #                                                                      '/bs_pr'.$i.'_'.$j.'.lst' );
 347               my $prob_copy = Storable::dclone($problems[$i-1]);
 348 #             $Data::Dumper::Maxdepth = 2;
 349 #             print Dumper $model; die;
 350
 351               $new_mod = model ->
 352                   new(reference_object     => $model,
 353                       sde                  => 0,
 354                       outputs              => undef,
 355                       datas                => undef,
 356                       synced               => undef,
 357                       problems             => undef,
 358                       active_problems      => undef,
 359                       directory            => $model_dir,
 360                       filename             => $filename,
 361                       outputfile           => undef,
 362                       problems             => [$prob_copy],
 363                       extra_files          => $model -> extra_files,
 364                       target               => 'disk',
 365                       ignore_missing_files => 1 );
 366
 367               if( $self -> {'shrinkage'} ) {
 368                 $new_mod -> shrinkage_stats( enabled => 1 );
 369                 my @problems = @{$new_mod -> problems};
 370                 for( my $i = 1; $i <= scalar @problems; $i++ ) {
 371                   $problems[ $i-1 ] -> shrinkage_module -> model( $new_mod );
 372                 }
 373               }
 374
 375               my $model_id  = $new_mod -> register_in_database;
 376               push( @{$self -> {'prepared_model_ids'}}, $model_id );
 377
 378               $self -> register_tm_relation( model_ids       => [$model_id],
 379                                              prepared_models => 1 );
 380
 381               $new_mod -> datas ( [$new_datas -> [$j]] );
 382
 383               if( $self -> {'nonparametric_etas'} or
 384                   $self -> {'nonparametric_marginals'} ) {
 385                 $new_mod -> add_nonparametric_code;
 386               }
 387
 388               $new_mod -> update_inits( from_output => $output );
 389               $new_mod -> _write;
 390
 391               push( @new_models, $new_mod );
 392             }
 393             # Create a checkpoint. Log the samples and individuals.
 394             open( DONE, ">".$self -> {'directory'}."/m$model_number/done.$i" ) ;
 395             print DONE "Resampling from ",$orig_data -> filename, " performed\n";
 396             print DONE "$samples[$i-1] samples\n";
 397             print DONE "$subjects[$i-1] subjects\n";
 398             print DONE "Included individuals:\n";
 399             @seed = random_get_seed;
 400             print DONE "seed: @seed\n";
 401             for( my $k = 0; $k < scalar @{$incl_ids}; $k++ ) {
 402               print DONE join(',',@{$incl_ids -> [$k]}),"\n";
 403             }
 404             print DONE "Included keys:\n";
 405             for( my $k = 0; $k < scalar @{$incl_keys}; $k++ ) {
 406               print DONE join(',',@{$incl_keys -> [$k]}),"\n";
 407             }
 408             close( DONE );
 409             open( INCL, ">".$self -> {'directory'}."included_individuals".$model_number.".csv" ) ;
 410             for( my $k = 0; $k < scalar @{$incl_ids}; $k++ ) {
 411               print INCL join(',',@{$incl_ids -> [$k]}),"\n";
 412             }
 413             close( INCL );
 414             open( KEYS, ">".$self -> {'directory'}."included_keys".$model_number.".csv" ) ;
 415             open( SAMPLEKEYS, ">".$self -> {'directory'}."sample_keys".$model_number.".csv" ) ;
 416             for( my $k = 0; $k < scalar @{$incl_keys}; $k++ ) {
 417               my %sample_keys;
 418               my $sample_size = scalar @{$incl_keys -> [$k]};
 419               for ( my $l = 0; $l < $sample_size; $l++ ) {
 420                 $sample_keys{$incl_keys -> [$k][$l]}++;
 421               }
 422               for ( my $l = 0; $l < $sample_size; $l++ ) {
 423                 my $val   = defined $sample_keys{$l} ? $sample_keys{$l} : 0;
 424                 my $extra = ($l == ($sample_size-1)) ? "\n" : ',';
 425                 print SAMPLEKEYS $val,$extra;
 426               }
 427               print KEYS join(',',@{$incl_keys -> [$k]}),"\n";
 428             }
 429             close( KEYS );
 430             close( SAMPLEKEYS );
 431           } else {
 432             ui -> print( category => 'bootstrap',
 433                          message  => "Recreating samples ".$orig_data -> filename." from previous run." );
 434
 435             # Recreate the datasets and models from a checkpoint
 436             my ($stored_filename, $stored_samples, $stored_subjects);
 437             my @seed;
 438             my ($stored_filename_found, $stored_samples_found, $stored_subjects_found, $stored_seed_found);
 439             open( DONE, $self -> {'directory'}."/m$model_number/done.$i" );
 440             while( <DONE> ){
 441               if( /^Resampling from (.+) performed$/ ){
 442                 $stored_filename = $1;
 443                 $stored_filename_found = 1;
 444                 next;
 445               }
 446               if( /^(\d+) samples$/ ){
 447                 $stored_samples = $1;
 448                 $stored_samples_found = 1;
 449                 next;
 450               }
 451               if( /^(\d+) subjects$/ ){
 452                 $stored_subjects = $1;
 453                 $stored_subjects_found = 1;
 454                 next;
 455               }
 456               if( /^seed: (\d+) (\d+)$/ ){
 457                 @seed = ($1, $2);
 458                 $stored_seed_found = 1;
 459                 next;
 460               }
 461
 462               if( $stored_filename_found and $stored_samples_found
 463                   and $stored_subjects_found and $stored_seed_found ){
 464                 last;
 465               }
 466             }
 467             close( DONE );
 468             unless( $stored_filename_found and $stored_samples_found
 469                     and $stored_samples_found and $stored_seed_found ){
 470               debug -> die( level => 1,
 471                              message => "The bootstrap/m1/done file could not be parsed." );
 472             }
 473
 474             if ( $stored_samples < $samples[$i-1] ) {
 475               debug -> die( message => "The number of samples saved in previous run ($stored_samples) ".
 476                             "are bigger than the number of samples specified for this run (".
 477                             $samples[$i-1].")" );
 478             }
 479             if ( $stored_subjects != $subjects[$i-1] ) {
 480               debug -> die( message => "The number of individuals saved in previous run (".
 481                             "$stored_subjects) does not match the number of individuals specified ".
 482                             "for this run (".$subjects[$i-1].")" );
 483             }
 484 #           my ( @stored_ids, @stored_keys );
 485 #           for ( my $k = 4; $k < 4+$stored_samples; $k++ ) {
 486 #             chomp($rows[$k]);
 487 #             my @sample_ids = split(',', $rows[$k] );
 488 #             push( @stored_ids, \@sample_ids );
 489 #           }
 490 #           for ( my $k = 5+$stored_samples; $k < 5+2*$stored_samples; $k++ ) {
 491 #             chomp($rows[$k]);
 492 #             my @sample_keys = split(',', $rows[$k] );
 493 #             push( @stored_keys, \@sample_keys );
 494 #           }
 495 #           @seed = split(' ',$rows[5+2*$stored_samples]);
 496 #           $incl_ids = \@stored_ids;
 497 #           $incl_keys = \@stored_keys;
 498 #           shift( @seed ); # get rid of 'seed'-word
 499             # Reinitiate the model objects
 500             for ( my $j = 1; $j <= $stored_samples; $j++ ) {
 501               my ($model_dir, $filename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.
 502                                                                       $model_number,
 503                                                                       'bs_pr'.$i.'_'.$j.'.mod' );
 504 #             my ($out_dir, $outfilename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.
 505 #                                                                      $model_number,
 506 #                                                                      '/bs_pr'.$i.'_'.$j.'.lst' );
 507               $new_mod = model ->
 508                 new( directory   => $model_dir,
 509                      filename    => $filename,
 510 #                    outputfile  => $outfilename,
 511                      extra_files => $model -> extra_files,
 512                      target      => 'disk',
 513                      ignore_missing_files => 1 );
 514               push( @new_models, $new_mod );
 515             }
 516             random_set_seed( @seed );
 517             ui -> print( category => 'bootstrap',
 518                          message  => "Using $stored_samples previously resampled bootstrap sets of".
 519                          " of $stored_subjects individuals from $stored_filename" )
 520               unless $self -> {'parent_threads'} > 1;
 521           }
 522 #         push( @{$self -> {'included_inds'} -> [$model_number-1]}, $incl_ids );
 523 #         push( @{$self -> {'included_keys'} -> [$model_number-1]}, $incl_keys );
 524         }
 525         $self -> {'prepared_models'}[$model_number-1]{'own'} = \@new_models;
 526
 527         # ---------------------  Create the sub tools  ------------------------------
 528
 529         # {{{ sub tools
 530
 531         my $subdir = $class;
 532         $subdir =~ s/tool:://;
 533         my @subtools = @{$self -> {'subtools'}};
 534         shift( @subtools );
 535         my %subargs = ();
 536         if ( defined $self -> {'subtool_arguments'} ) {
 537           %subargs = %{$self -> {'subtool_arguments'}};
 538         }
 539         push( @{$self -> {'tools'}},
 540               $class ->
 541               new( reference_object      => $self,
 542                    models                => \@new_models,
 543                    threads               => $subm_threads,
 544                    directory             => $self -> {'directory'}.'/'.$subdir.'_dir'.$model_number,
 545                    _raw_results_callback => $self ->
 546                           _modelfit_raw_results_callback( model_number => $model_number ),
 547                    subtools              => \@subtools,
 548                    parent_threads        => $own_threads,
 549                    parent_tool_id        => $self -> {'tool_id'},
 550                    logfile               => $self -> {'logfile'}[$model_number-1],
 551                    raw_results           => undef,
 552                    prepared_models       => undef,
 553                    %subargs ) );
 554
 555 #             ( clean            => $self -> {'clean'},
 556 #               base_directory   => $self -> {'directory'},
 557 #               compress         => $self -> {'compress'},
 558 #               directory        => $self -> {'directory'}.'/'.$subdir.'_dir'.$model_number,
 559 #               drop_dropped     => $self -> {'drop_dropped'},
 560 #               wrap_data        => $self -> {'wrap_data'},
 561 #               run_on_nordugrid => $self -> {'run_on_nordugrid'},
 562 #               cpu_time         => $self -> {'cpu_time'},
 563 #               run_on_lsf       => $self -> {'run_on_lsf'},
 564 #               no_remote_execution => $self -> {'no_remote_execution'},
 565 #               no_remote_compile => $self -> {'no_remote_compile'},
 566 #               lsf_queue        => $self -> {'lsf_queue'},
 567 #               lsf_options      => $self -> {'lsf_options'},
 568 #               lsf_job_name     => $self -> {'lsf_job_name'},
 569 #               lsf_project_name => $self -> {'lsf_project_name'},
 570
 571 #               parent_tool_id   => $self -> {'tool_id'},
 572
 573 #               models           => \@new_models,
 574 #               grid_batch_size  => $self -> {'grid_batch_size'},
 575 #               nm_version       => $self -> {'nm_version'},
 576 #               picky            => $self -> {'picky'},
 577 #               retries          => $self -> {'retries'},
 578 #               tweak_inits      => $self -> {'tweak_inits'},
 579 #               handle_maxevals  => $self -> {'handle_maxevals'},
 580 #               remove_temp_files        => $self -> {'remove_temp_files'},
 581 #               _raw_results_callback => $self ->
 582 #               _modelfit_raw_results_callback( model_number => $model_number ),
 583 #               threads               => $subm_threads,
 584 #               subtools              => \@subtools,
 585 #               logfile               => $self -> {'logfile'}[$model_number-1],
 586 #               parent_threads        => $own_threads,
 587 #               %subargs ) );
 588
 589         # }}} sub tools
 590
 591       }
 592 end general_setup
 593
 594 # }}}
 595
 596 # {{{ llp_setup
 597 start llp_setup
 598       {
 599         my @subm_threads;
 600         if (ref( $self -> {'threads'} ) eq 'ARRAY') {
 601           @subm_threads = @{$self -> {'threads'}};
 602           unshift(@subm_threads);
 603         } else {
 604           @subm_threads = ($self -> {'threads'});
 605         }
 606         $self -> general_setup( model_number => $model_number,
 607                                 class        => 'tool::llp',
 608                                 subm_threads => \@subm_threads );
 609       }
 610 end llp_setup
 611 # }}} llp_setup
 612
 613 # {{{ _jackknife_raw_results_callback
 614
 615 start _jackknife_raw_results_callback
 616       {
 617         # Use the bootstrap's raw_results file.
 618         my ($dir,$file) =
 619           OSspecific::absolute_path( $self -> {'directory'},
 620                                      $self -> {'raw_results_file'}[$model_number-1] );
 621         my ($dir,$nonp_file) =
 622           OSspecific::absolute_path( $self -> {'directory'},
 623                                      $self -> {'raw_nonp_file'}[$model_number-1] );
 624         $subroutine = sub {
 625           my $jackknife = shift;
 626           my $modelfit  = shift;
 627           $modelfit -> raw_results_file( $dir.$file );
 628           $modelfit -> raw_nonp_file( $dir.$nonp_file );
 629           $modelfit -> raw_results_append( 1 );
 630           my ( @new_header, %param_names );
 631           foreach my $row ( @{$modelfit -> {'raw_results'}} ) {
 632             unshift( @{$row}, 'jackknife' );
 633           }
 634           $modelfit -> {'raw_results_header'} = [];
 635         };
 636         return $subroutine;
 637       }
 638 end _jackknife_raw_results_callback
 639
 640 # }}} _jackknife_raw_results_callback
 641
 642 # {{{ _modelfit_raw_results_callback
 643
 644 start _modelfit_raw_results_callback
 645       {
 646         # Use the bootstrap's raw_results file.
 647         my ($dir,$file) =
 648           OSspecific::absolute_path( $self -> {'directory'},
 649                                      $self -> {'raw_results_file'}[$model_number-1] );
 650         my ($dir,$nonp_file) =
 651           OSspecific::absolute_path( $self -> {'directory'},
 652                                      $self -> {'raw_nonp_file'}[$model_number-1] );
 653         my $orig_mod = $self -> {'models'}[$model_number-1];
 654         my $type = $self -> {'type'};
 655         $subroutine = sub {
 656           my $modelfit = shift;
 657           my $mh_ref   = shift;
 658           my %max_hash = %{$mh_ref};
 659           $modelfit -> raw_results_file( $dir.$file );
 660           $modelfit -> raw_nonp_file( $dir.$nonp_file );
 661
 662           # The prepare_raw_results in the modelfit will fix the
 663           # raw_results for each bootstrap sample model, we must add
 664           # the result for the original model.
 665
 666           my ($raw_results_row,$row_structure) = $self -> create_raw_results_rows( max_hash => $mh_ref,
 667                                                                                     model => $orig_mod );
 668
 669           unshift( @{$modelfit -> {'raw_results'}}, @{$raw_results_row} );
 670
 671           # {{{ New header
 672
 673           my ( @new_header, %param_names );
 674           my @params = ( 'theta', 'omega', 'sigma' );
 675           foreach my $param ( @params ) {
 676             my $labels = $orig_mod -> labels( parameter_type => $param );
 677             $param_names{$param} = $labels -> [0] if ( defined $labels );
 678           }
 679
 680           foreach my $name ( @{$modelfit -> {'raw_results_header'}} ) {
 681             my $success;
 682             foreach my $param ( @params, 'eigen', 'shrinkage_etas', 'shrinkage_wres' ) {
 683               if ( $name eq $param ){
 684                 if( defined $param_names{$name} ) {
 685                   push( @new_header , @{$param_names{$name}} );
 686                 } else {
 687                   for ( my $i = 1; $i <= $max_hash{ $name }; $i++ ) {
 688                     push ( @new_header, uc(substr($name,0,2)).$i );
 689                   }
 690                 }
 691                 $success = 1;
 692                 last;
 693               } elsif ( $name eq 'se'.$param ) {
 694                 my @new_names = ();
 695                 for ( my $i = 1; $i <= $max_hash{ $name }; $i++ ) {
 696                   push ( @new_names, uc(substr($param,0,2)).$i );
 697                 }
 698                 map ( $_ = 'se'.$_, @new_names );
 699
 700                 push( @new_header, @new_names );
 701                 $success = 1;
 702                 last;
 703               }
 704             }
 705             unless( $success ){
 706               push( @new_header, $name );
 707             }
 708           }
 709
 710           $modelfit -> {'raw_results_header'} = \@new_header;
 711
 712           if ( $type eq 'bca' ) {
 713             foreach my $row ( @{$modelfit -> {'raw_results'}} ) {
 714               unshift( @{$row}, 'bootstrap' );
 715             }
 716             unshift( @{$modelfit -> {'raw_results_header'}}, 'method' );
 717           }
 718
 719           @{$self -> {'raw_results_header'}} = @{$modelfit -> {'raw_results_header'}};
 720
 721           # }}} New header
 722
 723         };
 724         return $subroutine;
 725       }
 726 end _modelfit_raw_results_callback
 727
 728 # }}} _modelfit_raw_results_callback
 729
 730 # {{{ modelfit_analyze
 731
 732 start modelfit_analyze
 733       {
 734         my @params = @{$self -> {'parameters'}};
 735         my @diagnostic_params = @{$self -> {'diagnostic_parameters'}};
 736         my ( @print_order, @calculation_order );
 737
 738         if ( $self -> {'type'} eq 'bca' ) {
 739
 740           # --------------------------  BCa method  ---------------------------------
 741
 742           # {{{ BCa
 743
 744           @calculation_order = @{$self -> {'bca_calculation_order'}};
 745           @print_order = @{$self -> {'bca_print_order'}};
 746           my $jk_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
 747             $self -> {'threads'} -> [1]:$self -> {'threads'};
 748           my $done = ( -e $self -> {'directory'}."/jackknife_done.$model_number" ) ? 1 : 0;
 749           if ( not $done ) {
 750
 751             # {{{ Create Jackknife
 752
 753             ui -> print( category => 'bootstrap',
 754                          message  => "Running a Jackknife for the BCa estimates" );
 755             $self -> {'jackknife'} = tool::cdd::jackknife ->
 756               new( models           => [$self -> models -> [$model_number -1]],
 757                    case_columns     => $self -> models -> [$model_number -1]
 758                    -> datas -> [0] -> idcolumn,
 759                    _raw_results_callback => $self ->
 760                    _jackknife_raw_results_callback( model_number => $model_number ),
 761                    parent_tool_id   => $self -> {'tool_id'},
 762                    threads          => $jk_threads,
 763                    bca_mode         => 1,
 764                    shrinkage        => $self -> {'shrinkage'},
 765                    nonparametric_marginals => $self -> {'nonparametric_marginals'},
 766                    nonparametric_etas => $self -> {'nonparametric_etas'},
 767                    adaptive         => $self -> {'adaptive'},
 768                    rerun            => $self -> {'rerun'},
 769                    verbose          => $self -> {'verbose'},
 770                    cross_validate   => 0 );
 771             # Create a checkpoint. Log the samples and individuals.
 772             open( DONE, ">".$self -> {'directory'}."/jackknife_done.$model_number" ) ;
 773             print DONE "Jackknife directory:\n";
 774             print DONE $self -> {'jackknife'} -> directory,"\n";
 775             my @seed = random_get_seed;
 776             print DONE "seed: @seed\n";
 777             close( DONE );
 778
 779             # }}} Create Jackknife
 780
 781           } else {
 782
 783             # {{{ Recreate Jackknife
 784
 785             open( DONE, $self -> {'directory'}."/jackknife_done.$model_number" );
 786             my @rows = <DONE>;
 787             close( DONE );
 788             my ( $stored_directory ) = $rows[1];
 789             chomp( $stored_directory );
 790             if ( not -e $stored_directory ) {
 791               debug -> die( message => "The Jackknife directory ".$stored_directory.
 792                             "indicated by ".$self -> {'directory'}.
 793                             "/jackknife_done.$model_number".
 794                             " from the old bootstrap run in ".
 795                             $self -> {'directory'}." does not exist" );
 796             }
 797             my @seed = split(' ',$rows[2]);
 798             shift( @seed ); # get rid of 'seed'-word
 799             $self -> {'jackknife'} = tool::cdd::jackknife ->
 800               new( models           => [$self -> models -> [$model_number -1]],
 801                    case_columns     => $self -> models -> [$model_number -1]
 802                    -> datas -> [0] -> idcolumn,
 803                    _raw_results_callback => $self ->
 804                    _jackknife_raw_results_callback( model_number => $model_number ),
 805                    threads          => $jk_threads,
 806                    parent_tool_id   => $self -> {'tool_id'},
 807                    directory        => $stored_directory,
 808                    bca_mode         => 1,
 809                    shrinkage        => $self -> {'shrinkage'},
 810                    nonparametric_marginals => $self -> {'nonparametric_marginals'},
 811                    nonparametric_etas => $self -> {'nonparametric_etas'},
 812                    adaptive         => $self -> {'adaptive'},
 813                    rerun            => $self -> {'rerun'},
 814                    verbose          => $self -> {'verbose'},
 815                    cross_validate   => 0 );
 816             random_set_seed( @seed );
 817             ui -> print( category => 'bootstrap',
 818                          message  => "Restarting BCa Jackknife from ".
 819                          $stored_directory )
 820               unless $self -> {'parent_threads'} > 1;
 821
 822             # }}} Recreate Jackknife
 823
 824           }
 825
 826           $self -> {'jackknife'} -> run;
 827           $self -> {'jackknife_results'} = $self -> {'jackknife'} -> {'results'};
 828           $self -> {'jackknife_prepared_models'} = $self -> {'jackknife'} -> {'prepared_models'};
 829
 830           $self -> {'jackknife_raw_results'}[$model_number-1] =
 831             $self -> {'jackknife'} -> raw_results;
 832 #           $self -> {'jackknife'} -> raw_results -> [$model_number-1];
 833 #         $Data::Dumper::Maxdepth = 0;
 834 #         print Dumper $self -> {'jackknife_raw_results'};
 835
 836           # }}} BCa
 837
 838         } else {
 839           @calculation_order = @{$self -> {'calculation_order'}};
 840           @print_order = @{$self -> {'print_order'}};
 841           $self -> {'bootstrap_raw_results'}[$model_number-1] =
 842             $self -> {'tools'} -> [0] -> raw_results;
 843 #           $self -> {'tools'} -> [0] -> raw_results -> [$model_number-1];
 844         }
 845         unless( ref($self -> {'raw_results_header'}[0]) eq 'ARRAY' ) {
 846           my $tmp = $self -> {'raw_results_header'};
 847           $self -> {'raw_results_header'} = [];
 848           $self -> {'raw_results_header'}[$model_number-1] = $tmp;
 849         }
 850
 851         my @param_names = @{$self -> models -> [$model_number -1] -> outputs -> [0] -> labels};
 852         my ( @diagnostic_names, @tmp_names );
 853         foreach my $param ( @diagnostic_params ) {
 854           push( @tmp_names, $param );
 855           $tmp_names[$#tmp_names] =~ s/_/\./g;
 856         }
 857         for ( my $i = 0; $i <= $#param_names; $i++ ) {
 858           unshift( @{$param_names[$i]}, 'OFV' );
 859           push( @{$diagnostic_names[$i]}, @tmp_names );
 860         }
 861       }
 862 end modelfit_analyze
 863
 864 # }}}
 865
 866 # {{{ prepare_results
 867
 868 start prepare_results
 869       {
 870         # {{{ definitions
 871
 872         # The '3' is there to skip model, problem and subproblem numbers
 873         my $skip_mps = 3;
 874         my ( @calculation_order, @print_order, %diag_idx );
 875         if ( $self -> {'type'} eq 'bca' ) {
 876           @calculation_order = @{$self -> {'bca_calculation_order'}};
 877           @print_order = @{$self -> {'bca_print_order'}};
 878         } else {
 879           @calculation_order = @{$self -> {'calculation_order'}};
 880           @print_order = @{$self -> {'print_order'}};
 881         }
 882         if ( $self -> {'type'} eq 'bca' ) {
 883 #         $self -> read_raw_results();
 884           $self -> bca_read_raw_results();
 885 #           if ( not defined $self -> {'bootstrap_raw_results'} );
 886         } else {
 887 #         if ( not defined $self -> {'raw_results'} ) {
 888             $self -> read_raw_results();
 889             $self -> {'bootstrap_raw_results'} = $self -> {'raw_results'};
 890 #         }
 891         }
 892
 893         for ( my $i = 0; $i < scalar @{$self -> {'diagnostic_parameters'}}; $i++ ) {
 894           $diag_idx{$self -> {'diagnostic_parameters'} -> [$i]} = $i + $skip_mps;
 895         }
 896
 897         # }}} definitions
 898
 899         # ---------------------  Get data from raw_results  -------------------------
 900
 901         # Divide the data into diagnostics and estimates. Included in estimates are
 902         # the parametric estimates, the standard errors of these, the nonparametric
 903         # estimates, the shrinkage in eta and the shrinkage in wres
 904         # The diagnostics end up in {'bootstrap_diagnostics'} and
 905         # {'jackknife_diagnostics'}. The estimates in {'bootstrap_estimates'} and
 906         # {'jackknife_estimates'}.
 907         # The number of runs that are selected for calculation of the results is
 908         # saved.
 909
 910         # {{{ Get the data from the runs
 911
 912         foreach my $tool ( 'bootstrap', 'jackknife' ) {
 913           if ( defined $self -> {$tool.'_raw_results'} ) {
 914             for ( my $i = 0; $i < scalar @{$self->{$tool.'_raw_results'}}; $i++ ) { # All models
 915
 916               # {{{ Get the number of columns with estimates
 917
 918               my $cols_orig = 0;
 919               foreach my $param ( 'theta', 'omega', 'sigma' ) {
 920                 my $labels =
 921                   $self -> {'models'} -> [$i] -> labels( parameter_type => $param );
 922                 # we can't use labels directly since different models may have different
 923                 # labels (still within the same modelfit)
 924                 my $numpar = scalar @{$labels -> [0]} if ( defined $labels and
 925                                                            defined $labels -> [0] );
 926                 $cols_orig += ( $numpar*3 ); # est + SE + eigen values
 927               }
 928               # nonparametric omegas and shrinkage
 929               my $nomegas = $self -> {'models'} -> [$i] -> nomegas;
 930               my $numpar = $nomegas -> [0];
 931
 932               # shrinkage omega + wres shrinkage
 933               $cols_orig += $numpar + 1;
 934 #             $cols_orig += ($numpar*($numpar+1)/2 + $numpar + 1);
 935
 936               $cols_orig++; # OFV
 937
 938               # }}} Get the number of columns with estimates
 939
 940               # {{{ Loop, choose and set diagnostics and estimates
 941
 942               my %return_section;
 943               $return_section{'name'} = 'Warnings';
 944               my ( $skip_term, $skip_cov, $skip_warn, $skip_bound );
 945               my $included = 0;
 946 #             print Dumper $self->{$tool.'_raw_results'};
 947               for ( my $j = 0; $j < scalar @{$self->{$tool.'_raw_results'}->[$i]}; $j++ ) { # orig model + prepared_models
 948                 my $columns = scalar @{$self->{$tool.'_raw_results'}->[$i][$j]};
 949
 950                 # -----------------------  Diagnostics  -----------------------------
 951
 952                 for ( my $m = $skip_mps; $m < scalar @{$self -> {'diagnostic_parameters'}} + $skip_mps; $m++ ) { # value
 953                   $self->{$tool.'_diagnostics'}->[$i][$j][$m-$skip_mps] =
 954                       $self->{$tool.'_raw_results'}->[$i][$j][$m];
 955                 }
 956                 my $use_run = 1;
 957                 if ( $self -> {'skip_minimization_terminated'} and
 958                      ( not defined $self->{$tool.'_raw_results'}->
 959                        [$i][$j][$diag_idx{'minimization_successful'}]
 960                        or not $self->{$tool.'_raw_results'}->
 961                        [$i][$j][$diag_idx{'minimization_successful'}] ) ) {
 962                   $skip_term++;
 963                   $use_run = 0;
 964                 } elsif ( $self -> {'skip_covariance_step_terminated'} and not
 965                           $self->{$tool.'_raw_results'}->
 966                           [$i][$j][$diag_idx{'covariance_step_successful'}] ) {
 967                   $skip_cov++;
 968                   $use_run = 0;
 969                 } elsif ( $self -> {'skip_with_covstep_warnings'} and
 970                           $self->{$tool.'_raw_results'}->
 971                           [$i][$j][$diag_idx{'covariance_step_warnings'}] ) {
 972                   $skip_warn++;
 973                   $use_run = 0;
 974                 } elsif ( $self -> {'skip_estimate_near_boundary'} and
 975                           $self->{$tool.'_raw_results'}->
 976                           [$i][$j][$diag_idx{'estimate_near_boundary'}] ) {
 977                   $skip_bound++;
 978                   $use_run = 0;
 979                 }
 980
 981                 # ------------------------  Estimates  ------------------------------
 982
 983                 if( $use_run ) {
 984                   for ( my $m = scalar @{$self -> {'diagnostic_parameters'}} + $skip_mps; $m < $columns; $m++ ) { # value
 985                     my $val = $self->{$tool.'_raw_results'}->[$i][$j][$m];
 986                     $self->{$tool.'_estimates'}->
 987                         [$i][$included][$m-(scalar @{$self -> {'diagnostic_parameters'}} + $skip_mps)] = $val;
 988                   }
 989                   $included++;
 990                 }
 991               }
 992
 993               # }}} Loop, choose and set diagnostics and estimates
 994
 995               # {{{ push #runs to results
 996
 997               if ( defined $skip_term ) {
 998                 push( @{$return_section{'values'}}, "$skip_term runs with miminization ".
 999                       "terminated were skipped when calculating the $tool results" );
1000               }
1001               if ( defined $skip_cov ) {
1002                 push( @{$return_section{'values'}}, "$skip_cov runs with aborted ".
1003                       "covariance steps were skipped when calculating the $tool results" );
1004               }
1005               if ( defined $skip_warn ) {
1006                 push( @{$return_section{'values'}}, "$skip_warn runs with errors from ".
1007                       "the covariance step were skipped when calculating the $tool results" );
1008               }
1009               if ( defined $skip_bound ) {
1010                 push( @{$return_section{'values'}}, "$skip_bound runs with estimates ".
1011                       "near a boundary were skipped when calculating the $tool results" );
1012               }
1013               $return_section{'labels'} = [];
1014               push( @{$self -> {'results'}[$i]{'own'}},\%return_section );
1015
1016               # }}} push #runs to results
1017
1018             }
1019           }
1020         }
1021
1022 #         $Data::Dumper::Maxdepth = 5;
1023 #         die Dumper $self -> {'bootstrap_diagnostics'};
1024
1025         # }}} Get the data from the runs
1026
1027         # ----------------------  Calculate the results  ----------------------------
1028
1029         # {{{ Result calculations
1030
1031         for ( my $i = 0; $i < scalar @{$self -> {'bootstrap_raw_results'}} ; $i++ ) { # All models
1032
1033           my $mps_offset = $self -> {'bca'} ? 4 : 3; # <- this is the offset to
1034                                                      # diagonstic_parameters,
1035                                                      # which is one more for
1036                                                      # the method column added
1037                                                      # with a bca run.
1038
1039           my @param_names = @{$self -> {'raw_results_header'}[$i]}[($mps_offset + scalar @{$self -> {'diagnostic_parameters'}}) .. (scalar @{$self -> {'raw_results_header'}[$i]} - 1)];
1040           my ( @diagnostic_names, @tmp_names );
1041           foreach my $param ( @{$self -> {'diagnostic_parameters'}} ) {
1042             push( @tmp_names, $param );
1043             $tmp_names[$#tmp_names] =~ s/_/\./g;
1044           }
1045
1046           @diagnostic_names = @tmp_names;
1047           foreach my $result_type ( @calculation_order ) {
1048             my @names = $result_type eq 'diagnostic_means' ?
1049                 @diagnostic_names : @param_names;
1050             my $calc = 'calculate_'.$result_type;
1051             $self -> $calc( model_number    => ($i+1),
1052                             parameter_names => \@names );
1053           }
1054           foreach my $result_type ( @print_order ) {
1055             my $name = $result_type;
1056             $name =~ s/_/\./g;
1057             my %return_section;
1058             $return_section{'name'} = $name;
1059             $return_section{'values'} = $self -> {$result_type} -> [$i];
1060             $return_section{'labels'} = $self -> {$result_type.'_labels'} -> [$i];
1061             push( @{$self -> {'results'}[$i]{'own'}},\%return_section );
1062           }
1063         }
1064
1065         # }}} Result calculations
1066
1067       }
1068 end prepare_results
1069
1070 # }}} prepare_results
1071
1072 # {{{ print_summary
1073
1074 start print_summary
1075       {
1076         sub acknowledge {
1077           my $name    = shift;
1078           my $outcome = shift;
1079           my $file    = shift;
1080           my $l = (7 - length( $outcome ))/2;
1081           my $c_num = '00';
1082           $c_num    = '07' if ( $outcome eq 'OK' );
1083           $c_num    = '13' if ( $outcome eq 'WARNING' );
1084           $c_num    = '05' if ( $outcome eq 'ERROR' );
1085           # my $text = sprintf( "%-66s%2s%7s%-5s\n\n", $name, '[ ', $outcome. ' ' x $l, ' ]' );
1086           my $text = sprintf( "%-66s%2s%7s%-5s", $name, '[ ', $outcome. ' ' x $l, ' ]' );
1087           #  cprintf( "%-66s%2s\x03$c_num%7s\x030%-5s", $name, '[ ', $outcome. ' ' x $l, ' ]' );
1088           #  my $text = cprintf( "%-66s%2s\x03$c_num%7s\x030%-5s", $name, '[ ', $outcome. ' ' x $l, ' ]' );
1089           print $text, "\n\n";
1090           print $file $text if defined $file;
1091         }
1092
1093         sub sum {
1094           my $arr = shift;
1095           my $sum = 0;
1096           for ( @{$arr} ) {
1097             $sum += $_;
1098           }
1099           return $sum;
1100         }
1101
1102         my $diag_number = scalar @{$self -> {'diagnostic_parameters'}} - 1;
1103         my %diag_idxs;
1104         for ( my $i = 0; $i <= $diag_number; $i++ ) {
1105           $diag_idxs{$self -> {'diagnostic_parameters'} -> [$i]} = $i;
1106         }
1107
1108         open( my $log, ">test.log" );
1109         for ( my $i = 0; $i < scalar @{$self -> {'bootstrap_raw_results'}} ; $i++ ) { # All models
1110           print "MODEL ",$i+1,"\n" if ( scalar @{$self -> {'bootstrap_raw_results'}} > 1 );
1111           if ( $self -> {'diagnostic_means'} ->
1112                [$i][0][$diag_idxs{'minimization_successful'}] >
1113                $self -> {'minimization_successful_limit'} ) {
1114             acknowledge( 'Successful minimization ratio = '.
1115                          $self -> {'diagnostic_means'} ->
1116                          [$i][0][$diag_idxs{'minimization_successful'}], 'OK', $log );
1117           } else {
1118             acknowledge( 'Termination problems in '.
1119                          sprintf("%4.2f", ($self -> {'diagnostic_means'} ->
1120                                            [$i][0][$diag_idxs{'minimization_successful'}]*100))
1121                          .'% of the bootstrap runs', 'WARNING', $log );
1122           }
1123
1124           if ( $self -> {'diagnostic_means'} ->
1125                [$i][0][$diag_idxs{'covariance_step_successful'}] >
1126                $self -> {'covariance_step_successful_limit'} ) {
1127             acknowledge( 'Successful covariance step ratio = '.$self -> {'diagnostic_means'} ->
1128                          [$i][0][$diag_idxs{'covariance_step_successful'}], 'OK', $log );
1129           } else {
1130             acknowledge( 'Covariance step problems in '.
1131                          sprintf("%4.2f", ($self -> {'diagnostic_means'} ->
1132                                            [$i][0][$diag_idxs{'covariance_step_successful'}]*100))
1133                          .'% of the bootstrap runs', 'WARNING', $log );
1134           }
1135
1136           if ( $self -> {'diagnostic_means'} ->
1137                [$i][0][$diag_idxs{'covariance_step_warnings'}] <
1138                $self -> {'covariance_step_warnings_limit'} ) {
1139             acknowledge( 'Covariance step warnings ratio = '.$self -> {'diagnostic_means'} ->
1140                          [$i][0][$diag_idxs{'covariance_step_warnings'}], 'OK', $log );
1141           } else {
1142             acknowledge( 'Covariance step warnings in '.
1143                          sprintf("%4.2f", ($self -> {'diagnostic_means'} ->
1144                                            [$i][0][$diag_idxs{'covariance_step_warnings'}]*100))
1145                          .'% of the bootstrap runs', 'WARNING', $log );
1146           }
1147
1148           if ( $self -> {'diagnostic_means'} ->
1149                [$i][0][$diag_idxs{'estimate_near_boundary'}] <
1150                $self -> {'estimate_near_boundary_limit'} ) {
1151             acknowledge( 'Estimate near boundary ratio = '.$self -> {'diagnostic_means'} ->
1152                          [$i][0][$diag_idxs{'estimate_near_boundary'}], 'OK', $log );
1153           } else {
1154             acknowledge( 'Estimate near boundary found in '.
1155                          sprintf("%4.2f", ($self -> {'diagnostic_means'} ->
1156                                            [$i][0][$diag_idxs{'estimate_near_boundary'}]*100))
1157                          .'% of the bootstrap runs', 'WARNING', $log );
1158           }
1159           my $sum = sum( $self -> {'within_se_confidence_intervals'}->
1160                          [$i]{$self -> {'se_confidence_intervals_level'}} );
1161           if ( not defined $sum or $sum < 1 ) {
1162             acknowledge( 'No '.(100-$self -> {'se_confidence_intervals_level'}).
1163                          '% confidence intervals based on the'.
1164                          ' bootstrap standard errors include '.
1165                          $self -> {'se_confidence_intervals_check'}, 'OK', $log );
1166           } else {
1167             acknowledge( "$sum ".(100-$self -> {'se_confidence_intervals_level'}).
1168                          "% confidence intervals based on the bootstrap".
1169                          " SE's include ".
1170                          $self -> {'se_confidence_intervals_check'}, 'WARNING', $log );
1171             my $found =
1172               scalar @{$self -> {'within_se_confidence_intervals'}->
1173                          [$i]{$self -> {'se_confidence_intervals_level'}}} - 1;
1174             for ( my $l = 0; $l <= $found ; $l++ ) {
1175               if ( $self -> {'within_se_confidence_intervals'}->
1176                    [$i]{$self -> {'se_confidence_intervals_level'}}[$l] ) {
1177                 printf( "\t%-20s\n",
1178                         $self -> {'standard_error_confidence_intervals_labels'} ->
1179                         [$i][1][$l] );
1180                 print $log
1181                   sprintf( "\t%-20s\n",
1182                            $self -> {'standard_error_confidence_intervals_labels'} ->
1183                            [$i][1][$l] );
1184                 print "\n" if ( $l == $found );
1185                 print $log "\n" if ( $l == $found );
1186               }
1187             }
1188           }
1189
1190           my $sum = sum( $self -> {'large_bias'}-> [$i][0] );
1191           if ( not defined $sum or $sum < 1 ) {
1192             acknowledge( 'No bias larger than '.
1193                          ($self -> {'large_bias_limit'}*100).'% found', 'OK', $log );
1194           } else {
1195             acknowledge( "$sum estimates were found to have a relative bias > ".
1196                          $self -> {'large_bias_limit'}, 'WARNING', $log );
1197             my $found =
1198               scalar @{$self -> {'large_bias'}->
1199                          [$i][0]} - 1;
1200             for ( my $l = 0; $l <= $found ; $l++ ) {
1201               if ( $self -> {'large_bias'}->
1202                    [$i][0][$l] ) {
1203                 printf( "\t%-20s%3.2f %\n", $self -> {'bias_labels'} ->
1204                         [$i][1][$l], ($self -> {'bias'} -> [$i][0][$l]/
1205                                       $self->{'bootstrap_estimates'} -> [$i][0][$l])*100 );
1206                 #                   print $log
1207                 #                     sprintf( "\t%-20s\n",
1208                 #                              $self -> {'percentile_confidence_intervals_labels'} ->
1209                 #                              [$i][1][$l] );
1210                 print "\n" if ( $l == $found );
1211                 print $log "\n" if ( $l == $found );
1212               }
1213             }
1214           }
1215
1216           if ( $self -> {'type'} eq 'bca' ) {
1217             my $sum = sum( $self -> {'within_bca_confidence_intervals'}->
1218                            [$i]{$self -> {'bca_confidence_intervals_level'}} );
1219             if ( not defined $sum or $sum < 1 ) {
1220               acknowledge( 'No '.(100-$self -> {'bca_confidence_intervals_level'}).
1221                            '% BCa confidence intervals include '.
1222                            $self -> {'bca_confidence_intervals_check'}, 'OK', $log );
1223             } else {
1224               acknowledge( "$sum ".(100-$self -> {'bca_confidence_intervals_level'}).
1225                            "% BCa confidence intervals include ".
1226                            $self -> {'bca_confidence_intervals_check'}, 'WARNING', $log );
1227               my $found =
1228                 scalar @{$self -> {'within_bca_confidence_intervals'}->
1229                            [$i]{$self -> {'bca_confidence_intervals_level'}}} - 1;
1230               for ( my $l = 0; $l <= $found ; $l++ ) {
1231                 if ( $self -> {'within_bca_confidence_intervals'}->
1232                      [$i]{$self -> {'bca_confidence_intervals_level'}}[$l] ) {
1233                   printf( "\t%-20s\n",
1234                           $self -> {'bca_confidence_intervals_labels'} ->
1235                           [$i][1][$l] );
1236                   print $log
1237                     sprintf( "\t%-20s\n",
1238                              $self -> {'bca_confidence_intervals_labels'} ->
1239                              [$i][1][$l] );
1240                   print "\n" if ( $l == $found );
1241                   print $log "\n" if ( $l == $found );
1242                 }
1243               }
1244             }
1245           } else {
1246             my $sum = sum( $self -> {'within_percentiles'}->
1247                            [$i]{$self -> {'percentile_confidence_intervals_level'}} );
1248             if ( not defined $sum or $sum < 1 ) {
1249               acknowledge( 'No '.(100-$self -> {'percentile_confidence_intervals_level'}).
1250                            '% confidence intervals based on the'.
1251                            ' bootstrap percentiles include '.
1252                            $self -> {'percentile_confidence_intervals_check'}, 'OK', $log );
1253             } else {
1254               acknowledge( "$sum ".(100-$self -> {'percentile_confidence_intervals_level'}).
1255                            "% confidence intervals based on the percentiles".
1256                            " include ".
1257                            $self -> {'percentile_confidence_intervals_check'}, 'WARNING', $log );
1258               my $found =
1259                 scalar @{$self -> {'within_percentiles'}->
1260                            [$i]{$self -> {'percentile_confidence_intervals_level'}}} - 1;
1261               for ( my $l = 0; $l <= $found ; $l++ ) {
1262                 if ( $self -> {'within_percentiles'}->
1263                      [$i]{$self -> {'percentile_confidence_intervals_level'}}[$l] ) {
1264                   printf( "\t%-20s\n",
1265                           $self -> {'percentile_confidence_intervals_labels'} ->
1266                           [$i][1][$l] );
1267                   print $log
1268                     sprintf( "\t%-20s\n",
1269                              $self -> {'percentile_confidence_intervals_labels'} ->
1270                              [$i][1][$l] );
1271                   print "\n" if ( $l == $found );
1272                   print $log "\n" if ( $l == $found );
1273                 }
1274               }
1275             }
1276           }
1277         }
1278         close ( $log );
1279       }
1280 end print_summary
1281
1282 # }}} print_summary
1283
1284 # {{{ bca_read_raw_results
1285
1286 start bca_read_raw_results
1287       {
1288         $self -> {'raw_results_header'} = [];
1289         for ( my $i = 1; $i <= scalar @{$self->{'models'}}; $i++ ) { # All models
1290           if ( -e $self -> {'directory'}.'raw_results'.$i.'.csv' ) {
1291             open( RRES, $self -> {'directory'}.'raw_results'.$i.'.csv' );
1292             my @file = <RRES>;
1293             close( RRES );
1294             map { chomp; my @tmp = split(',',$_); $_ = \@tmp } @file ;
1295
1296             my $header = shift @file;
1297
1298             # Get rid of 'method' column
1299             my $cols = scalar(@{$header})-1;
1300             @{$self -> {'raw_results_header'}[$i-1]} = @{$header}[1..$cols];
1301             $self -> {'raw_results'} -> [$i-1] = \@file;
1302             for( my $j = 0; $j <= $#file; $j++ ) {
1303               if ( $file[$j][0] eq 'jackknife' ) {
1304                 shift( @{$file[$j]} );
1305 #               $self -> {'jackknife_raw_results'}[$i-1] = \@file;
1306                 push( @{$self -> {'jackknife_raw_results'}[$i-1]}, $file[$j]);
1307               } else {
1308                 shift( @{$file[$j]} );
1309 #               $self -> {'bootstrap_raw_results'}[$i-1] = \@file;
1310                 push( @{$self -> {'bootstrap_raw_results'}[$i-1]}, $file[$j] );
1311               }
1312             }
1313           }
1314         }
1315       }
1316 end bca_read_raw_results
1317
1318 # }}} bca_read_raw_results
1319
1320 # {{{ calculate_diagnostic_means
1321
1322 start calculate_diagnostic_means
1323       {
1324         my ( @sum, @diagsum, %diag_idx );
1325         for ( my $i = 0; $i < scalar @{$self -> {'diagnostic_parameters'}}; $i++ ) {
1326           $diag_idx{$self -> {'diagnostic_parameters'} -> [$i]} = $i;
1327         }
1328
1329         my $def = 0;
1330         # Prepared model, skip the first (the original)
1331         for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_diagnostics'} ->
1332                                          [$model_number-1]}; $k++ ) {
1333           # Diagnostics
1334           if( defined $self -> {'bootstrap_diagnostics'} ->
1335               [$model_number-1][$k] ) {
1336             $def++;
1337             for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_diagnostics'} ->
1338                                                [$model_number-1][$k]}; $l++ ) {
1339               $sum[$l] += $self -> {'bootstrap_diagnostics'} ->
1340                   [$model_number-1][$k][$l];
1341             }
1342           }
1343         }
1344
1345         # divide by the number of bootstrap samples (-1 to get rid of the original
1346         # model) The [0] in the index is there to indicate the 'model' level. Mostly
1347         # used for printing
1348         for ( my $l = 0; $l <= $#sum; $l++ ) {
1349           if( $l == $diag_idx{'significant_digits'} ) {
1350             $self -> {'diagnostic_means'} -> [$model_number-1][0][$l] =
1351                 $sum[$l] / $def;
1352           } else {
1353             $self -> {'diagnostic_means'} -> [$model_number-1][0][$l] =
1354                 $sum[$l] / ( scalar @{$self -> {'bootstrap_diagnostics'} ->
1355                                           [$model_number-1]} - 1);
1356           }
1357         }
1358         $self -> {'diagnostic_means_labels'} -> [$model_number-1] =
1359           [[],\@parameter_names];
1360       }
1361 end calculate_diagnostic_means
1362
1363 # }}} calculate_diagnostic_means
1364
1365 # {{{ calculate_means
1366
1367 start calculate_means
1368       {
1369         my ( @sum, @diagsum );
1370         # Prepared model, skip the first (the original)
1371         for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'} ->
1372                                          [$model_number-1]}; $k++ ) {
1373           # Estimates
1374           for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'} ->
1375                                            [$model_number-1][$k]}; $l++ ) {
1376             $sum[$l] += $self -> {'bootstrap_estimates'} ->
1377               [$model_number-1][$k][$l];
1378           }
1379         }
1380         # divide by the number of bootstrap samples (-1 to get rid of the original
1381         # model) The [0] in the index is there to indicate the 'model' level. Mostly
1382         # used for printing
1383         my $samples = scalar @{$self -> {'bootstrap_estimates'} ->
1384                                  [$model_number-1]} - 1;
1385         for ( my $l = 0; $l <= $#sum; $l++ ) {
1386           my $mean = $sum[$l] / $samples;
1387           $self -> {'means'} -> [$model_number-1][0][$l] = $mean;
1388           my $bias = $mean - $self ->
1389             {'bootstrap_estimates'} -> [$model_number-1][0][$l];
1390           $self -> {'bias'} -> [$model_number-1][0][$l] = $bias;
1391           if ( $self->{'bootstrap_estimates'} -> [$model_number-1][0][$l] != 0 and
1392                $bias/$self->{'bootstrap_estimates'} -> [$model_number-1][0][$l]
1393                > $self -> {'large_bias_limit'} ) {
1394             $self -> {'large_bias'} -> [$model_number-1][0][$l] = 1;
1395           } else {
1396             $self -> {'large_bias'} -> [$model_number-1][0][$l] = 0;
1397           }
1398         }
1399         $self -> {'means_labels'} -> [$model_number-1] =
1400           [[],\@parameter_names];
1401
1402         $self -> {'bias_labels'} -> [$model_number-1] =
1403           [[],\@parameter_names];
1404       }
1405 end calculate_means
1406
1407 # }}} calculate_means
1408
1409 # {{{ calculate_jackknife_means
1410
1411 start calculate_jackknife_means
1412       {
1413         my @sum;
1414         # Prepared model, skip the first (the original)
1415         if( defined $self -> {'jackknife_estimates'} ){
1416           for ( my $k = 1; $k < scalar @{$self -> {'jackknife_estimates'}->[$model_number-1]}; $k++ ) {
1417             # Estimate
1418             for ( my $l = 0; $l <
1419                   scalar @{$self -> {'jackknife_estimates'}->[$model_number-1][$k]}; $l++ ) {
1420               $sum[$l] += $self -> {'jackknife_estimates'}->[$model_number-1][$k][$l];
1421             }
1422           }
1423           # divide by the number of bootstrap samples (-1 to get rid of the original model)
1424           # The [0] in the index is there to indicate the 'model' level. Mostly used for printing
1425           for ( my $l = 0; $l <
1426                 scalar @{$self -> {'jackknife_estimates'}->[$model_number-1][0]}; $l++ ) {
1427             if( ( scalar @{$self -> {'jackknife_estimates'}->[$model_number-1]} - 1) != 0 ) {
1428               $self -> {'jackknife_means'} -> [$model_number-1][0][$l] =
1429                   $sum[$l] / ( scalar @{$self -> {'jackknife_estimates'}->[$model_number-1]} - 1);
1430             }
1431           }
1432           $self -> {'jackknife_means_labels'} -> [$model_number-1] = [[],\@parameter_names];
1433         }
1434       }
1435 end calculate_jackknife_means
1436
1437 # }}} calculate_jackknife_means
1438
1439 # {{{ calculate_medians
1440 start calculate_medians
1441       {
1442         my @medians;
1443         # Loop the parameters
1444         for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'}->
1445                                          [$model_number-1][0]}; $l++ ) {
1446           my @parameter_array;
1447           # From 1 to get rid of original model
1448           for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'}->
1449                                            [$model_number-1]}; $k++ ) {
1450             $parameter_array[$k-1] =
1451               $self -> {'bootstrap_estimates'}->[$model_number-1][$k][$l];
1452           }
1453           my @sorted = sort {$a <=> $b} @parameter_array;
1454           # median postition is half the ( array length - 1 ).
1455           my $median_position = ( $#sorted ) / 2;
1456           my ($int_med,$frac_med)   = split(/\./, $median_position );
1457           $frac_med = eval("0.".$frac_med);
1458           my $median_low  = $sorted[ $int_med ];
1459           my $median_high = ( $sorted[ $int_med + 1 ] - $sorted[ $int_med ] ) * $frac_med;
1460           $medians[$l] = $median_low + $median_high;
1461         }
1462         # The [0] in the index is there to indicate the 'model' level. Mostly used for printing
1463         $self -> {'medians'} -> [$model_number-1][0] = \@medians;
1464         $self -> {'medians_labels'} -> [$model_number-1] = [[],\@parameter_names];
1465       }
1466 end calculate_medians
1467 # }}} calculate_medians
1468
1469 # {{{ calculate_standard_error_confidence_intervals
1470 start calculate_standard_error_confidence_intervals
1471       {
1472         # Sort the limits from the inside out
1473         my @limits = sort { $b <=> $a } keys %{$self -> {'confidence_limits'}};
1474         foreach my $limit ( @limits ) {
1475           my ( @lower_limits, @upper_limits, @within_ci );
1476           # Loop the estimates of the first (original) model
1477           for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'}->
1478                                            [$model_number-1][0]}; $l++ ) {
1479             my $lower_limit =
1480               $self -> {'bootstrap_estimates'}->[$model_number-1][0][$l] -
1481                 $self -> {'standard_errors'}->[$model_number-1][0][$l] *
1482                   $self -> {'confidence_limits'} ->{$limit};
1483             my $upper_limit =
1484               $self -> {'bootstrap_estimates'}->[$model_number-1][0][$l] +
1485                 $self -> {'standard_errors'}->[$model_number-1][0][$l] *
1486                   $self -> {'confidence_limits'} ->{$limit};
1487             push( @lower_limits, $lower_limit );
1488             push(  @upper_limits, $upper_limit );
1489             if ( $self -> {'se_confidence_intervals_check'} < $upper_limit and
1490                  $self -> {'se_confidence_intervals_check'} > $lower_limit ) {
1491               push( @within_ci , 1 );
1492             } else {
1493               push( @within_ci , 0 );
1494             }
1495           }
1496           unshift( @{$self -> {'standard_error_confidence_intervals'} ->
1497                        [$model_number-1]}, \@lower_limits );
1498           push( @{$self -> {'standard_error_confidence_intervals'} ->
1499                     [$model_number-1]}, \@upper_limits );
1500           $self -> {'within_se_confidence_intervals'} ->
1501             [$model_number-1]{$limit} = \@within_ci;
1502           unshift( @{$self -> {'standard_error_confidence_intervals_labels'} ->
1503                        [$model_number-1][0]}, ($limit/2).'%' );
1504           push( @{$self -> {'standard_error_confidence_intervals_labels'} ->
1505                     [$model_number-1][0]}, (100-($limit/2)).'%' );
1506           push( @{$self -> {'within_se_confidence_intervals_labels'} ->
1507                     [$model_number-1][0]}, $limit.'%' );
1508         }
1509         $self -> {'standard_error_confidence_intervals_labels'} -> [$model_number-1][1] =
1510           \@parameter_names;
1511         $self -> {'within_se_confidence_intervals_labels'} -> [$model_number-1][1] =
1512           \@parameter_names;
1513       }
1514 end calculate_standard_error_confidence_intervals
1515 # }}} calculate_standard_error_confidence_intervals
1516
1517 # {{{ calculate_standard_errors
1518
1519 start calculate_standard_errors
1520       {
1521         my @se;
1522         # Prepared model, skip the first (the original)
1523         for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'}->[$model_number-1]}; $k++ ) {
1524           # Estimate
1525           for ( my $l = 0; $l <
1526                 scalar @{$self -> {'bootstrap_estimates'}->[$model_number-1][$k]}; $l++ ) {
1527             $se[$l] += ( $self -> {'bootstrap_estimates'}->[$model_number-1][$k][$l] -
1528                          $self -> {'means'}->[$model_number-1][0][$l] )**2;
1529           }
1530         }
1531         # divide by the number of bootstrap samples -1 (-2 to get rid of the original model)
1532         # The [0] in the index is there to indicate the 'model' level.
1533         for ( my $l = 0; $l <
1534               scalar @{$self -> {'bootstrap_estimates'}->[$model_number-1][0]}; $l++ ) {
1535           my $div = ( scalar @{$self -> {'bootstrap_estimates'}->[$model_number-1]} - 2 );
1536           if( defined $div and not $div == 0 ) {
1537             $self -> {'standard_errors'} -> [$model_number-1][0][$l] =
1538                 ($se[$l] / $div  )**0.5;
1539           } else {
1540             $self -> {'standard_errors'} -> [$model_number-1][0][$l] = undef;
1541           }
1542         }
1543         $self -> {'standard_errors_labels'} -> [$model_number-1] = [[],\@parameter_names];
1544       }
1545 end calculate_standard_errors
1546
1547 # }}} calculate_standard_errors
1548
1549 # {{{ calculate_bca_confidence_intervals
1550
1551 start calculate_bca_confidence_intervals
1552       {
1553         sub c_get_z0 {
1554           my $arr_ref  = shift;
1555           my $orig_value = shift;
1556           my $num_less_than_orig = 0;
1557           my $nvalues = 0;
1558           my $z0;
1559           foreach my $value ( @{$arr_ref} ) {
1560             if ( defined $value and $value ne '' ) {
1561               $num_less_than_orig++ if ( $value < $orig_value );
1562               $nvalues ++;
1563             }
1564           }
1565
1566           unless ( $nvalues == 0 ) {
1567             if ( ($num_less_than_orig / $nvalues ) == 0 ) {
1568               $z0 = -100;
1569             } elsif ( ($num_less_than_orig / $nvalues ) == 1 ) {
1570               $z0 = 100;
1571             } else {
1572               $z0 = udistr( 1 - ($num_less_than_orig / $nvalues ) );
1573             }
1574           }
1575 #         return ( $z0, $nvalues );
1576           return $z0;
1577         }
1578
1579         sub c_get_acc {
1580           my $arr_ref = shift;
1581           my $jk_mean = shift;
1582           my $acc_upper = 0;
1583           my $acc_lower = 0;
1584           my $nvalues = 0;
1585           my $acc;
1586           foreach my $value ( @{$arr_ref} ){
1587             if ( defined $value and $value ne '' ) {
1588               $acc_upper = $acc_upper + ($jk_mean-$value)**3;
1589               $acc_lower = $acc_lower + ($jk_mean-$value)**2;
1590               $nvalues ++;
1591             }
1592           }
1593           $acc_lower = 6*($acc_lower**(3/2));
1594           unless ( $acc_lower == 0 ) {
1595             $acc = $acc_upper / $acc_lower;
1596           } else {
1597             $acc = $acc_upper / 0.001;
1598           }
1599 #         return ( $acc, $nvalues );
1600           return $acc;
1601         }
1602
1603         sub c_get_alphas {
1604           my $old_alphas = shift;
1605           my $acc = shift;
1606           my $z0 = shift;
1607           my $denom;
1608           my @new_alphas = ();
1609           foreach my $position ( @{$old_alphas} ) {
1610             if ( $position == 0 ){
1611               $denom = -100;
1612             } elsif ( $position == 100 ) {
1613               $denom = 100;
1614             } else {
1615               $denom = $z0 + udistr( 1 - $position/100 );
1616             }
1617             my $nom     = 1 - $acc * $denom;
1618             my $lim = 100*uprob( - ( $z0 + $denom / $nom ) );
1619             push( @new_alphas, $lim );
1620           }
1621           return \@new_alphas;
1622         }
1623
1624         my @limits = sort { $a <=> $b } keys %{$self -> {'confidence_limits'}};
1625         # Add the upper limits
1626         my $limnum = $#limits;
1627         for ( my $i = $limnum; $i >= 0; $i-- ) {
1628           $limits[$i] = $limits[$i]/2;
1629           push( @limits, 100-$limits[$i] );
1630         }
1631         my ( @bootstrap_array, @jackknife_array, @new_alphas, @z0, @acc );
1632         # Loop the estimates of the first (original) model
1633         for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'}->
1634                                          [$model_number-1][0]}; $l++ ) {
1635           my ( @unsorted_array1, @unsorted_array2 );
1636           # Loop the bootstrap samples from 1 to get rid of original model
1637           for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'}->
1638                                            [$model_number-1]}; $k++ ) {
1639             $unsorted_array1[$k-1] =
1640               $self -> {'bootstrap_estimates'}->[$model_number-1][$k][$l];
1641           }
1642           @{$bootstrap_array[$l]} = sort {$a <=> $b} @unsorted_array1;
1643
1644           # Loop the jackknife samples from 1 to get rid of original model
1645           for ( my $k = 1; $k < scalar @{$self -> {'jackknife_estimates'}->
1646                                            [$model_number-1]}; $k++ ) {
1647             $unsorted_array2[$k-1] =
1648               $self -> {'jackknife_estimates'}->[$model_number-1][$k][$l];
1649           }
1650           @{$jackknife_array[$l]} = sort {$a <=> $b} @unsorted_array2;
1651           $z0[$l]         = c_get_z0     ( $bootstrap_array[$l],
1652                                            $self -> {'bootstrap_estimates'} ->
1653                                            [$model_number-1][0][$l] );
1654           $acc[$l]        = c_get_acc    ( $jackknife_array[$l],
1655                                            $self -> {'jackknife_means'} ->
1656                                            [$model_number-1][0][$l] );
1657           $new_alphas[$l] = c_get_alphas ( \@limits, $acc[$l], $z0[$l] );
1658         }
1659         # Loop limits
1660         for ( my $lim_idx = 0; $lim_idx <= $#limits; $lim_idx++ ) {
1661           my @percentiles;
1662           # Loop parameters
1663           for ( my $l = 0; $l <= $#bootstrap_array; $l++ ) {
1664             my $limit = $new_alphas[$l][$lim_idx]/100;
1665             my $position = ( scalar @{$bootstrap_array[$l]} + 1 ) * $limit;
1666             my $percentile;
1667             if ( $position < 1 ) {
1668               $percentile = undef;
1669             } elsif ( $position > scalar @{$bootstrap_array[$l]} ) {
1670               $percentile = undef;
1671             } else {
1672               my ($int_med,$frac_med)   = split(/\./, $position );
1673               $frac_med = eval("0.".$frac_med);
1674               my $percentile_low  = $bootstrap_array[$l][ $int_med - 1];
1675               my $percentile_high = ( $bootstrap_array[$l][ $int_med ] -
1676                                       $bootstrap_array[$l][ $int_med - 1] ) * $frac_med;
1677               $percentile = $percentile_low + $percentile_high;
1678             }
1679             push( @percentiles, $percentile );
1680           }
1681           push( @{$self -> {'bca_confidence_intervals'} -> [$model_number-1]},
1682                 \@percentiles );
1683           push( @{$self -> {'bca_confidence_intervals_labels'}->[$model_number-1][0]},
1684                 $limits[$lim_idx].'%');
1685         }
1686         # Check the intervals
1687         for ( my $lim_idx = 0; $lim_idx <= $limnum; $lim_idx++ ) {
1688           my @within_ci;
1689           for ( my $l = 0; $l <= $#bootstrap_array; $l++ ) {
1690             my $lower_limit = $self -> {'bca_confidence_intervals'} ->
1691               [$model_number-1][$lim_idx][$l];
1692             my $upper_limit = $self -> {'bca_confidence_intervals'} ->
1693               [$model_number-1][($limnum*2+1)-$lim_idx][$l];
1694             if ( $self -> {'bca_confidence_intervals_check'} < $upper_limit and
1695                  $self -> {'bca_confidence_intervals_check'} > $lower_limit ) {
1696               push( @within_ci , 1 );
1697             } else {
1698               push( @within_ci , 0 );
1699             }
1700           }
1701           $self -> {'within_bca_confidence_intervals'} ->
1702             [$model_number-1]{$limits[$lim_idx]*2} = \@within_ci;
1703         }
1704         $self -> {'bca_confidence_intervals_labels'} -> [$model_number-1][1] =
1705           \@parameter_names;
1706       }
1707 end calculate_bca_confidence_intervals
1708
1709 # }}} calculate_bca_confidence_intervals
1710
1711 # {{{ calculate_percentile_confidence_intervals
1712
1713 start calculate_percentile_confidence_intervals
1714       {
1715         # Sort the limits from the inside out
1716         my @limits = sort { $b <=> $a } keys %{$self -> {'confidence_limits'}};
1717         foreach my $limit ( @limits ) {
1718           my ( @lower_limits, @upper_limits, @within_ci );
1719           # Loop the estimates of the first (original) model
1720           for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'}->
1721                                            [$model_number-1][0]}; $l++ ) {
1722             my @parameter_array;
1723             # Loop the bootstrap samples from 1 to get rid of original model
1724             for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'}->
1725                                              [$model_number-1]}; $k++ ) {
1726               my $val = $self -> {'bootstrap_estimates'}->[$model_number-1][$k][$l];
1727               # get rid of undefined values (these were probably deleted
1728               # when the bootstrap_estimates was created
1729               push( @parameter_array, $val ) if( defined $val );
1730             }
1731             my @sorted = sort {$a <=> $b} @parameter_array;
1732             for my $side ( 'lower', 'upper' ) {
1733               my $use_limit = $side eq 'lower' ? $limit/200 : 1-($limit/200);
1734               # percentile postition is:
1735               my $percentile_position = ( $#sorted + 2 ) * $use_limit;
1736               my $percentile;
1737               if ( $percentile_position < 1 ) {
1738                 $percentile = undef;
1739               } elsif ( $percentile_position > $#sorted +1) {
1740                 $percentile = undef;
1741               } else {
1742                 my ($int_med,$frac_med)   = split(/\./, $percentile_position );
1743                 $frac_med = eval("0.".$frac_med);
1744                 my $percentile_low  = $sorted[ $int_med - 1];
1745                 my $percentile_high = ( $sorted[ $int_med ] - $sorted[ $int_med - 1] ) * $frac_med;
1746                 $percentile = $percentile_low + $percentile_high;
1747               }
1748               push( @lower_limits, $percentile ) if ( $side eq 'lower' );
1749               push( @upper_limits, $percentile ) if ( $side eq 'upper' );
1750             }
1751             if ( $self -> {'percentile_confidence_intervals_check'} < $upper_limits[$#upper_limits] and
1752                  $self -> {'percentile_confidence_intervals_check'} > $lower_limits[$#lower_limits] ) {
1753               push( @within_ci , 1 );
1754             } else {
1755               push( @within_ci , 0 );
1756             }
1757           }
1758           unshift( @{$self -> {'percentile_confidence_intervals'} ->
1759                        [$model_number-1]}, \@lower_limits );
1760           push( @{$self -> {'percentile_confidence_intervals'} ->
1761                     [$model_number-1]}, \@upper_limits );
1762           unshift( @{$self -> {'percentile_confidence_intervals_labels'}->
1763                        [$model_number-1][0]}, ($limit/2).'%' );
1764           push( @{$self -> {'percentile_confidence_intervals_labels'}->
1765                     [$model_number-1][0]},(100-($limit/2)).'%');
1766           $self -> {'within_percentiles'}->[$model_number-1]{$limit}=\@within_ci;
1767         }
1768         $self -> {'percentile_confidence_intervals_labels'} ->
1769           [$model_number-1][1] = \@parameter_names;
1770       }
1771 end calculate_percentile_confidence_intervals
1772
1773 # }}} calculate_percentile_confidence_intervals
1774
1775 # {{{ modelfit_post_fork_analyze
1776
1777 start modelfit_post_fork_analyze
1778 end modelfit_post_fork_analyze
1779
1780 # }}}
1781
1782 # {{{ resample
1783
1784 start resample
1785       {
1786         my $dataObj = $model -> datas -> [0];
1787         for( my $i = 1; $i <= $self -> {'samples'}; $i++ ) {
1788           my ($bs_dir, $bs_name) = OSspecific::absolute_path( $self -> {'directory'}, "bs$i.dta" );
1789           my $new_name = $bs_dir . $bs_name;
1790           my $boot_sample = $dataObj -> resample( 'subjects' => $self -> {'subjects'},
1791                                                   'new_name' => $new_name,
1792                                                   'target'   => $target );
1793           my $newmodel = $model -> copy( filename => "bs$i.mod",
1794                                          target   => $target,
1795                                          ignore_missing_files => 1 );
1796           $newmodel -> datafile( new_name => "bs$i.dta" );
1797           $newmodel -> datas -> [0] = $boot_sample ;
1798           $newmodel -> write;
1799           push( @resample_models, $newmodel );
1800         }
1801       }
1802 end resample
1803
1804 # }}} resample
1805
1806 # {{{ _sampleTools
1807 start _sampleTools
1808       {
1809         foreach my $tool ( @{$self -> {'tools'}} ) {
1810           my @models = @{$tool -> models};
1811           foreach my $model (@models){
1812             my $dataObj = $model -> datas -> [0];
1813             for( my $i = 1; $i <= $samples; $i++ ) {
1814               my $boot_sample = $dataObj -> resample( 'subjects' => $self -> {'subjects'},
1815                                                       'new_name' => "bs$i.dta",
1816                                                       'target' => $target );
1817               my $newmodel;
1818               $newmodel = $model -> copy( filename => "bs$i.mod" );
1819               $newmodel -> datafile( new_name => "bs$i.dta" );
1820               $newmodel -> datas -> [0] = $boot_sample ;
1821               $newmodel -> write;
1822               if( defined( $tool -> models ) ){
1823                 push( @{$tool -> models}, $newmodel );
1824               } else {
1825                 $tool -> models( [ $newmodel ] );
1826               }
1827             }
1828           }
1829         }
1830       }
1831 end _sampleTools
1832 # }}} _sampleTools
1833
1834 # {{{ print_results
1835
1836 start print_results
1837       {
1838         # Run the print_results specific for the subtool
1839         my $sub_print_results = $self -> {'subtools'} -> [0];
1840         if ( defined $sub_print_results ) {
1841           sub get_dim {
1842             my $arr      = shift;
1843             my $dim      = shift;
1844             my $size_ref = shift;
1845             $dim++;
1846             if ( defined $arr and ref($arr) eq 'ARRAY' ) {
1847               push( @{$size_ref}, scalar @{$arr} );
1848               ( $dim, $size_ref ) = get_dim( $arr->[0], $dim, $size_ref );
1849             }
1850             return ( $dim, $size_ref );
1851           }
1852           sub format_value {
1853             my $val = shift;
1854             if ( not defined $val or $val eq '' ) {
1855               return sprintf("%10s",$PsN::output_style).',';
1856             } else {
1857               $_ = $val;
1858               my $nodot = /.*\..*/ ? 0 : 1;
1859               $_ =~ s/\.//g;
1860               if ( /.*\D+.*/ or $nodot) {
1861                 return sprintf("%10s",$val).',';
1862               } else {
1863                 return sprintf("%10.5f",$val).',';
1864               }
1865             }
1866           }
1867           debug -> die( message => "No results_file defined" )
1868             unless ( defined $self -> {'results_file'} );
1869           open ( RES, ">".$self -> {'directory'}.'/'.$self -> {'results_file'} );
1870           if ( defined $self -> {'results'} ) {
1871             my @all_results = @{$self -> {'results'}};
1872             for ( my $i = 0; $i <= $#all_results; $i++ ) {
1873               if ( defined $all_results[$i]{'own'} ) {
1874                 my @my_results = @{$all_results[$i]{'own'}};
1875                 for ( my $j = 0; $j <= $#my_results; $j++ ) {
1876                   # These size estimates include the problem and sub_problem dimensions:
1877                   my ( $ldim, $lsize_ref ) = get_dim( $my_results[$j]{'labels'}, -1, [] );
1878                   my ( $vdim, $vsize_ref ) = get_dim( $my_results[$j]{'values'}, -1, [] );
1879                   print RES $my_results[$j]{'name'},"\n" if ( $vdim > 1 );
1880                   if ( defined $my_results[$j]{'values'} and
1881                        scalar @{$my_results[$j]{'values'}} >= 0 ) {
1882                     my @values  = @{$my_results[$j]{'values'}};
1883                     my @labels;
1884                     if ( defined $my_results[$j]{'labels'} and
1885                          scalar @{$my_results[$j]{'labels'}} >= 0 ) {
1886                       @labels = @{$my_results[$j]{'labels'}};
1887                     }
1888
1889                     # Print Header Labels
1890                     if ( $ldim == 0 ) {
1891                       my $label = \@labels;
1892                       print RES ','.format_value($label),"\n";
1893                     } elsif ( $ldim == 2 and defined $labels[1] ) {
1894                       print RES ',';
1895                       for ( my $n = 0; $n < scalar @{$labels[1]}; $n++ ) {
1896                         my $label = $labels[1][$n];
1897                         print RES format_value($label);
1898                       }
1899                       print RES "\n" if ( scalar @{$labels[1]} );
1900                     }
1901                     # Print the values:
1902                     if ( $vdim == 0 ) {
1903                       print RES ','.format_value(\@values),"\n";
1904                     } elsif ( $vdim == 1 ) {
1905                       for ( my $m = 0; $m < scalar @{\@values}; $m++ ) {
1906                         my $label = $labels[$m];
1907                         print RES ','.format_value($label);
1908                         my $val = $values[$m];
1909                         print RES ','.format_value($val),"\n";
1910                       }
1911                     } elsif ( $vdim == 2 ) {
1912                       for ( my $m = 0; $m < scalar @{\@values}; $m++ ) {
1913                         my $label;
1914                         if ( $ldim == 1 ) {
1915                           $label = $labels[$m];
1916                         } elsif ( $ldim == 2 ) {
1917                           $label = $labels[0][$m];
1918                         }
1919                         print RES format_value($label);
1920                         if ( defined $values[$m] ) {
1921                           for ( my $n = 0; $n < scalar @{$values[$m]}; $n++ ) {
1922                             print RES format_value($values[$m][$n]);
1923                           }
1924                         }
1925                         print RES "\n";
1926                       }
1927                     }
1928                   }
1929                 }
1930               }
1931             }
1932           }
1933           close( RES );
1934         } else {
1935           debug -> warn( level   => 2,
1936                          message => "No subtools defined".
1937                          ", using default printing routine" );
1938         }
1939       }
1940 end print_results
1941
1942 # }}}
1943
1944 # {{{ create_matlab_scripts
1945
1946 start create_matlab_scripts
1947     {
1948       if( defined $PsN::config -> {'_'} -> {'matlab_dir'} ){
1949         unless( -e $PsN::config -> {'_'} -> {'matlab_dir'} . '/histograms.m' and
1950                 -e $PsN::config -> {'_'} -> {'matlab_dir'} . '/bca.m' ){
1951           'debug' -> die( message => 'Bootstrap matlab template scripts are not installed, no matlab scripts will be generated.' );
1952           return;
1953         }
1954
1955         open( PROF, $PsN::config -> {'_'} -> {'matlab_dir'} . '/histograms.m' );
1956         my @file = <PROF>;
1957         close( PROF );
1958         my $found_code;
1959         my $code_area_start=0;
1960         my $code_area_end=0;
1961
1962
1963         for(my $i = 0;$i < scalar(@file); $i++) {
1964           if( $file[$i] =~ /% ---------- Autogenerated code below ----------/ ){
1965             $found_code = 1;
1966             $code_area_start = $i;
1967           }
1968           if( $file[$i] =~ /% ---------- End autogenerated code ----------/ ){
1969             unless( $found_code ){
1970               'debug' -> die ( message => 'Bootstrap matlab template script is malformated, no matlab scripts will be generated' );
1971               return;
1972             }
1973             $code_area_end = $i;
1974           }
1975         }
1976
1977         my @auto_code;
1978         if( $self -> {'type'} eq 'bca' ){
1979           push( @auto_code, "use_bca = 1;                               % Was a BCa-type of\n" );
1980         } else {
1981           push( @auto_code, "use_bca = 0;                               % Was a BCa-type of\n" );
1982         }
1983
1984         push( @auto_code, "                                % bootstrap run?\n" );
1985         if( ref $self -> {'samples'} eq 'ARRAY' ) {
1986           push( @auto_code, "bs_samples = ".$self -> {'samples'}->[0][0].";                     % Number of bootstrap samples\n" );
1987         } else {
1988           push( @auto_code, "bs_samples = ".$self -> {'samples'}.";                     % Number of bootstrap samples\n" );
1989         }
1990         if( $self -> {'type'} eq 'bca' ){
1991           push( @auto_code, "jk_samples = 36;                   % Number of (BCa) jackknife samples\n\n" );
1992         }
1993
1994         push( @auto_code, "str_format = '%30s';\n\n" );
1995
1996         push( @auto_code, "col_names = [ sprintf(str_format,'Significant Digits');\n" );
1997         push( @auto_code, "           sprintf(str_format,'OFV');\n" );
1998
1999         my $nps = $self -> {'models'} -> [0] -> nomegas -> [0];
2000
2001         my %param_names;
2002         my( @par_names, @se_names, @np_names, @sh_names );
2003         foreach my $param ( 'theta','omega','sigma' ) {
2004           my $labels = $self -> {'models'} -> [0] -> labels( parameter_type => $param );
2005           if ( defined $labels ){
2006             foreach my $label ( @{$labels -> [0]} ){
2007               push( @par_names, "             sprintf(str_format,'",$label,"');\n" );
2008               push( @se_names, "              sprintf(str_format,'",'se-'.$label,"');\n" );
2009             }
2010           }
2011         }
2012
2013         for( my $i = 1; $i <= ($nps*($nps+1)/2); $i++ ) {
2014           push( @np_names, "          sprintf(str_format,'",'np-om'.$i,"');\n" );
2015         }
2016
2017         for( my $i = 1; $i <= $nps; $i++ ) {
2018           push( @sh_names, "          sprintf(str_format,'",'shrinkage-eta'.$i,"');\n" );
2019         }
2020
2021         push( @sh_names, "            sprintf(str_format,'",'shrinkage-iwres',"');\n" );
2022
2023         push( @auto_code,(@par_names, @se_names, @np_names, @sh_names));
2024         push( @auto_code, "           ];\n\n" );
2025
2026         my @np_columns = (0) x ($nps*($nps+1)/2);
2027         my @sh_columns = (0) x ($nps+1);
2028
2029         if( $self -> {'type'} eq 'bca' ){
2030           push( @auto_code, "fixed_columns = [ 0, 0, " );
2031         } else {
2032           push( @auto_code, "fixed_columns = [ 0, 0, " );
2033         }
2034         my @fixed_columns;
2035         my @same_columns;
2036         foreach my $param ( 'theta','omega','sigma' ) {
2037           my $fixed = $self -> {'models'} -> [0] -> fixed( parameter_type => $param );
2038
2039           if ( defined $fixed ){
2040             push( @fixed_columns, @{$fixed -> [0]} );
2041             if( $param eq 'theta' ) {
2042               push( @same_columns, (0) x scalar( @{$fixed -> [0]} ) );
2043             }
2044           }
2045         }
2046         push( @auto_code , join( ', ' , @fixed_columns).', '.
2047               join( ', ' , @fixed_columns).', '.
2048               join( ', ' , @np_columns).', '.
2049               join( ', ' , @sh_columns)."];\n\n" );
2050
2051         if( $self -> {'type'} eq 'bca' ){
2052           push( @auto_code, "same_columns = [ 0, 0, " );
2053         } else {
2054           push( @auto_code, "same_columns = [ 0, 0, " );
2055         }
2056         foreach my $param ( 'omegas','sigmas' ) {
2057           my $parameters = $self -> {'models'} -> [0] -> problems -> [0] -> $param;
2058           foreach my $parameter ( @{$parameters} ){
2059             if( $parameter -> same() ){
2060               push( @same_columns, (1) x $parameter -> size() );
2061             } else {
2062               push( @same_columns, (0) x scalar @{$parameter -> options} );
2063             }
2064           }
2065         }
2066         push( @auto_code , join( ', ' , @same_columns ).', '.
2067               join( ', ' , @same_columns).', '.
2068               join( ', ' , @np_columns).', '.
2069               join( ', ' , @sh_columns)."];\n\n" );
2070
2071         push( @auto_code , 'npomegas = '.($nps*($nps+1)/2).";\n\n" );
2072
2073
2074         push( @auto_code, "minimization_successful_col    = 4 + use_bca;        % Column number for the\n" );
2075         push( @auto_code, "                                                     % minimization sucessful flag\n" );
2076         push( @auto_code, "covariance_step_successful_col = 5 + use_bca;        % As above for cov-step warnings\n" );
2077         push( @auto_code, "covariance_step_warnings_col   = 6 + use_bca;        % etc\n" );
2078         push( @auto_code, "estimate_near_boundary_col     = 7 + use_bca;        % etc\n" );
2079
2080         push( @auto_code, "not_data_cols = 11 + use_bca;                        % Number of columns in the\n" );
2081         push( @auto_code, "                                        % beginning that are not\n" );
2082         push( @auto_code, "                                        % parameter estimates.\n" );
2083
2084         push( @auto_code, "filename = 'raw_results_matlab.csv';\n" );
2085
2086         splice( @file, $code_area_start, ($code_area_end - $code_area_start), @auto_code );
2087         open( OUTFILE, ">", $self -> {'directory'} . "/histograms.m" );
2088         print OUTFILE "addpath " . $PsN::config -> {'_'} -> {'matlab_dir'} . ";\n";
2089         print OUTFILE @file ;
2090         close OUTFILE;
2091
2092         open( OUTFILE, ">", $self -> {'directory'} . "/raw_results_matlab.csv" );
2093         for( my $i = 0; $i < scalar ( @{$self -> {'raw_results'} -> [0]} ); $i ++ ){
2094           $self -> {'raw_results'} -> [0] -> [$i][0] =
2095               $self -> {'raw_results'} -> [0] -> [$i][0] eq 'bootstrap' ?
2096               1 : $self -> {'raw_results'} -> [0] -> [$i][0];
2097           $self -> {'raw_results'} -> [0] -> [$i][0] =
2098               $self -> {'raw_results'} -> [0] -> [$i][0] eq 'jackknife' ?
2099               2 : $self -> {'raw_results'} -> [0] -> [$i][0];
2100           map( $_ = $_ eq 'NA' ? 'NaN' : $_, @{$self -> {'raw_results'} -> [0] -> [$i]} );
2101           print OUTFILE join( ',', @{$self -> {'raw_results'} -> [0] -> [$i]} ), "\n";
2102         }
2103         close OUTFILE;
2104
2105       } else {
2106         'debug' -> die( message => 'matlab_dir not configured, no matlab scripts will be generated.');
2107         return;
2108       }
2109     }
2110 end create_matlab_scripts
2111
2112 # }}}
2113
2114 # {{{ create_R_scripts
2115 start create_R_scripts
2116     {
2117       unless( -e $PsN::lib_dir . '/R-scripts/bootstrap.R' ){
2118         'debug' -> die( message => 'Bootstrap R-script are not installed, no R-script will be generated.' );
2119         return;
2120       }
2121       cp ( $PsN::lib_dir . '/R-scripts/bootstrap.R', $self -> {'directory'} );
2122     }
2123 end create_R_scripts
2124 # }}}