removed "remove_temp_files" option and documented "clean" a bit
[PsN.git] / lib / tool / bootstrap_subs.pm
blob737bc14f288e8053808d703a97713f41864710b0
1 # {{{ include
3 start include statements
4 use strict;
5 use File::Copy 'cp';
6 use data;
7 use OSspecific;
8 use tool::llp;
9 use tool::cdd::jackknife;
10 use ext::Statistics::Distributions 'udistr', 'uprob';
11 use Math::Random;
12 use Data::Dumper;
13 end include
15 # }}} include statements
17 # {{{ new
19 start new
21 foreach my $attribute ( 'logfile', 'raw_results_file','raw_nonp_file' ) {
22 if ( not( ref($this -> {$attribute}) eq 'ARRAY' or
23 ref($this -> {$attribute}) eq 'HASH' ) ) {
24 my $tmp = $this -> {$attribute};
25 $this -> {$attribute} = [];
26 for ( my $i = 1; $i <= scalar @{$this -> {'models'}}; $i++ ) {
27 my $name = $tmp;
28 if ( $name =~ /\./ ) {
29 $name =~ s/\./$i\./;
30 } else {
31 $name = $name.$i;
33 my $ldir;
34 ( $ldir, $name ) =
35 OSspecific::absolute_path( $this -> {'directory'}, $name );
36 push ( @{$this -> {$attribute}}, $ldir.$name ) ;
41 end new
43 # }}} new
45 # {{{ general_pre_fork_setup
47 start general_pre_fork_setup
49 # These attributes can be given as a
50 # 1. A scalar : used for all models and problems (subjects can be a scalar ref to a hash)
51 # 2. A 1-dim. array : specified per problem but same for all models
52 # 3. A 2-dim. array : specified per problem and model
54 foreach my $attribute_string( 'stratify_on', 'samples', 'subjects' ){
55 my $attribute = $self -> {$attribute_string};
56 if ( defined $attribute ) {
57 # If stratification is given we check at what level it is
58 # given.
59 unless ( ref( \$attribute ) eq 'SCALAR' or
60 ref( \$attribute ) eq 'REF' or
61 ( ref( $attribute ) eq 'ARRAY' and scalar @{$attribute} > 0 ) ) {
62 # Here we know its neither scalar or a correct array. But we
63 # seem to assume the lenght be equal to the number of
64 # models.
65 debug -> die( message => "attribute $attribute_string is " .
66 "defined as a " . ref( $attribute ) .
67 " but is neither a scalar or a non-zero size array" );
68 } elsif ( ref( \$attribute ) eq 'SCALAR' or ref( \$attribute ) eq 'REF' ) {
69 # If it is a scalar we copy the scalar value into an array
70 # for each model, with one value for each problem.
71 my @mo_attribute = ();
72 foreach my $model ( @{$self -> {'models'}} ) {
73 my @pr_attribute = ();
74 foreach my $problem ( @{$model -> problems} ) {
75 push( @pr_attribute, $attribute );
77 push( @mo_attribute, \@pr_attribute );
79 $self -> {$attribute_string} = \@mo_attribute;
80 } elsif ( ref( $attribute ) eq 'ARRAY' ) {
81 # If it is an array we check if the it is an array of
82 # scalars or of arrays. If it is of arrays we seem to
83 # assume that the arrays have a length the matches the
84 # number of problems. It it is an array of scalars, we
85 # copy that scalar into arrays for each problem.
86 unless ( ref( \$attribute -> [0] ) eq 'SCALAR' or
87 ( ref( $attribute -> [0] ) eq 'ARRAY' and scalar @{$attribute -> [0]} > 0 ) ) {
88 debug -> die( message => "attribute $attribute is ",
89 "defined as a ",ref( $attribute -> [0] ),
90 "and is neither a scalar or a non-zero size array" );
91 } elsif ( ref(\$attribute -> [0]) eq 'SCALAR' ) {
92 my @mo_attribute = ();
93 foreach my $model ( @{$self -> {'models'}} ) {
94 push( @mo_attribute, $attribute );
96 $self -> {$attribute_string} = \@mo_attribute;
99 } else {
100 # If stratify is not defined we copy an undefined value to
101 # arrays corresponding to models and subproblem. I wonder if
102 # this is really necessary.
103 my @mo_attribute = ();
104 foreach my $model ( @{$self -> {'models'}} ) {
105 my @pr_attribute = ();
106 foreach my $data ( @{$model -> datas} ) {
107 if( $attribute_string eq 'stratify_on' ){
108 push( @pr_attribute, undef );
109 } elsif ( $attribute_string eq 'samples' ){
110 push( @pr_attribute, 200 );
111 } elsif ( $attribute_string eq 'subjects' ){
112 push( @pr_attribute, {'default'=>($data -> count_ind)} );
115 push( @mo_attribute, \@pr_attribute );
117 $self -> {$attribute_string} = \@mo_attribute;
121 end general_pre_fork_setup
123 # }}}
125 # {{{ modelfit_pre_fork_setup
127 start modelfit_pre_fork_setup
129 $self -> general_pre_fork_setup( model_number => $model_number );
131 end modelfit_pre_fork_setup
133 # }}}
135 # {{{ llp_pre_fork_setup
137 start llp_pre_fork_setup
139 $self -> general_pre_fork_setup( model_number => $model_number );
141 end llp_pre_fork_setup
143 # }}}
145 # {{{ modelfit_setup
147 start modelfit_setup
150 my $subm_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
151 $self -> {'threads'} -> [1]:$self -> {'threads'};
152 $self -> general_setup( model_number => $model_number,
153 class => 'tool::modelfit',
154 subm_threads => $subm_threads );
156 end modelfit_setup
158 # }}}
160 # {{{ general_setup
162 start general_setup
165 # If the number of threads are given per tool, e.g. [2,5] meaning 2 threads for
166 # scm and 5 for the modelfit.
167 my $subm_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
168 $self -> {'threads'} -> [1]:$self -> {'threads'};
169 my $own_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
170 $self -> {'threads'} -> [0]:$self -> {'threads'};
171 # More threads than models?
172 my $num = scalar @{$self -> {'models'}};
173 $own_threads = $num if ( $own_threads > $num );
175 # Sub tool threads can be given as scalar or reference to an array?
176 #my $subm_threads = $parm{'subm_threads'};
177 #my $own_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
178 # $self -> {'threads'} -> [0]:$self -> {'threads'};
179 # Group variable names are matched in the model, not the data!
180 my $model = $self -> {'models'} -> [$model_number-1];
181 my @samples = @{$self -> {'samples'} -> [$model_number-1]};
182 my @stratify_on = @{$self -> {'stratify_on'} -> [$model_number-1]};
183 my @subjects = @{$self -> {'subjects'} -> [$model_number-1]};
185 # Check which models that hasn't been run and run them This
186 # will be performed each step but will only result in running
187 # models at the first step, if at all.
189 # If more than one process is used, there is a VERY high risk
190 # of interaction between the processes when creating
191 # directories for model fits. Therefore the {'directory'}
192 # attribute is given explicitly below.
194 # ------------------------ Run original run -------------------------------
196 # {{{ orig run
198 unless ( $model -> is_run ) {
199 my %subargs = ();
200 if ( defined $self -> {'subtool_arguments'} ) {
201 %subargs = %{$self -> {'subtool_arguments'}};
204 if( $self -> {'nonparametric_etas'} or
205 $self -> {'nonparametric_marginals'} ) {
206 $model -> add_nonparametric_code;
209 my $orig_fit = tool::modelfit ->
210 new( reference_object => $self,
211 base_directory => $self -> {'directory'},
212 directory => $self -> {'directory'}.
213 '/orig_modelfit_dir'.$model_number,
214 models => [$model],
215 threads => $subm_threads,
216 parent_threads => $own_threads,
217 parent_tool_id => $self -> {'tool_id'},
218 logfile => undef,
219 raw_results => undef,
220 prepared_models => undef,
221 top_tool => 0,
222 %subargs );
224 ui -> print( category => 'bootstrap',
225 message => 'Executing base model.' );
227 $orig_fit -> run;
231 my $output = $model -> outputs -> [0];
233 # }}} orig run
235 # ------------------------ Print a log-header -----------------------------
237 # {{{ log header
239 my @orig_datas = @{$model -> datas};
240 my @problems = @{$model -> problems};
241 my @new_models;
243 # Print a log-header
244 # Lasse 2005-04-21: The minimization_message print will probably not work anymore
245 open( LOG, ">>".$self -> {'logfile'}[$model_number-1] );
246 my $ui_text = sprintf("%-5s",'RUN').','.sprintf("%20s",'FILENAME ').',';
247 print LOG sprintf("%-5s",'RUN'),',',sprintf("%20s",'FILENAME '),',';
248 foreach my $param ( 'ofv', 'minimization_message', 'covariance_step_successful' ) {
249 my $orig_ests = $model -> outputs -> [0] -> $param;
250 # Loop the problems
251 for ( my $j = 0; $j < scalar @{$orig_ests}; $j++ ) {
252 if ( ref( $orig_ests -> [$j][0] ) ne 'ARRAY' ) {
253 my $label = uc($param)."_".($j+1);
254 $ui_text = $ui_text.sprintf("%12s",$label).',';
255 print LOG sprintf("%12s",$label),',';
256 } else {
257 # Loop the parameter numbers (skip sub problem level)
258 for ( my $num = 1; $num <= scalar @{$orig_ests -> [$j][0]}; $num++ ) {
259 my $label = uc($param).$num."_".($j+1);
260 $ui_text = $ui_text.sprintf("%12s",$label).',';
261 print LOG sprintf("%12s",$label),',';
266 print LOG "\n";
268 # }}} log header
270 # ------------------------ Log original run -------------------------------
272 # {{{ Log original run
274 # Lasse 2005-04-21: The minimization_message print will probably not work anymore
275 open( LOG, ">>".$self -> {'logfile'}[$model_number-1] );
276 $ui_text = sprintf("%5s",'0').','.sprintf("%20s",$model -> filename).',';
277 print LOG sprintf("%5s",'0'),',',sprintf("%20s",$model -> filename),',';
278 foreach my $param ( 'ofv', 'minimization_message', 'covariance_step_successful' ) {
279 my $orig_ests = $model -> outputs -> [0] -> $param;
280 # Loop the problems
281 for ( my $j = 0; $j < scalar @{$orig_ests}; $j++ ) {
282 if ( ref( $orig_ests -> [$j][0] ) ne 'ARRAY' ) {
283 $orig_ests -> [$j][0] =~ s/\n//g;
284 $ui_text = $ui_text.sprintf("%12s",$orig_ests -> [$j][0]).',';
285 print LOG sprintf("%12s",$orig_ests -> [$j][0]),',';
286 } else {
287 # Loop the parameter numbers (skip sub problem level)
288 for ( my $num = 0; $num < scalar @{$orig_ests -> [$j][0]}; $num++ ) {
289 $ui_text = $ui_text.sprintf("%12f",$orig_ests -> [$j][0][$num]).',';
290 print LOG sprintf("%12f",$orig_ests -> [$j][0][$num]),',';
295 print LOG "\n";
297 # }}} Log original run
299 # TODO: In this loop we loose one dimension (problem) in that
300 # all new models with new data sets are put in the same array,
301 # regardless of which problem the initially belonged to. Fix
302 # this.
304 if ( $#orig_datas < 0 ) {
305 debug -> warn( level => 1,
306 message => "No data files to resample from" );
307 } else {
308 debug -> warn( level => 1,
309 message => "Starting bootstrap sampling" );
311 for ( my $i = 1; $i <= scalar @orig_datas; $i++ ) {
312 my $orig_data = $orig_datas[$i-1];
314 if ( $self -> {'drop_dropped'} ) {
315 my $model_copy = $model -> copy( copy_data => 1,
316 filename => "drop_copy_$i.mod",
317 directory => $self -> {'directory'} );
319 $model_copy -> drop_dropped;
320 $model_copy -> _write( write_data => 1 );
321 $model_copy -> datas -> [0] -> flush();
322 $orig_data = $model -> datas -> [0];
323 $model = $model_copy;
326 my ( @seed, $new_datas, $incl_ids, $incl_keys, $new_mod );
328 my $done = ( -e $self -> {'directory'}."/m$model_number/done.$i" ) ? 1 : 0;
330 if ( not $done ) {
331 ui -> print( category => 'bootstrap',
332 message => "Resampling from ".$orig_data -> filename );
334 ( $new_datas, $incl_ids, $incl_keys )
335 = $orig_data -> bootstrap( directory => $self -> {'directory'}.'/m'.$model_number,
336 name_stub => 'bs_pr'.$i,
337 samples => $samples[$i-1],
338 subjects => $subjects[$i-1],
339 stratify_on => $stratify_on[$i-1],
340 target => 'disk',
341 model_ids => $self -> {'prepared_model_ids'} );
344 for ( my $j = 0; $j < $samples[$i-1]; $j++ ) {
345 my ($model_dir, $filename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.$model_number,
346 'bs_pr'.$i.'_'.($j+1).'.mod' );
347 # my ($out_dir, $outfilename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.$model_number ,
348 # '/bs_pr'.$i.'_'.$j.'.lst' );
349 my $prob_copy = Storable::dclone($problems[$i-1]);
350 # $Data::Dumper::Maxdepth = 2;
351 # print Dumper $model; die;
353 $new_mod = model ->
354 new(reference_object => $model,
355 sde => 0,
356 outputs => undef,
357 datas => undef,
358 synced => undef,
359 problems => undef,
360 active_problems => undef,
361 directory => $model_dir,
362 filename => $filename,
363 outputfile => undef,
364 problems => [$prob_copy],
365 extra_files => $model -> extra_files,
366 target => 'disk',
367 ignore_missing_files => 1 );
369 if( $self -> {'shrinkage'} ) {
370 $new_mod -> shrinkage_stats( enabled => 1 );
371 my @problems = @{$new_mod -> problems};
372 for( my $i = 1; $i <= scalar @problems; $i++ ) {
373 $problems[ $i-1 ] -> shrinkage_module -> model( $new_mod );
377 my $model_id = $new_mod -> register_in_database;
378 push( @{$self -> {'prepared_model_ids'}}, $model_id );
380 $self -> register_tm_relation( model_ids => [$model_id],
381 prepared_models => 1 );
383 $new_mod -> datas ( [$new_datas -> [$j]] );
385 if( $self -> {'nonparametric_etas'} or
386 $self -> {'nonparametric_marginals'} ) {
387 $new_mod -> add_nonparametric_code;
390 $new_mod -> update_inits( from_output => $output );
391 $new_mod -> _write;
393 push( @new_models, $new_mod );
395 # Create a checkpoint. Log the samples and individuals.
396 open( DONE, ">".$self -> {'directory'}."/m$model_number/done.$i" ) ;
397 print DONE "Resampling from ",$orig_data -> filename, " performed\n";
398 print DONE "$samples[$i-1] samples\n";
399 while( my ( $strata, $samples ) = each %{$subjects[$i-1]} ) {
400 print DONE "Strata $strata: $samples sample_size\n";
402 print DONE "Included individuals:\n";
403 @seed = random_get_seed;
404 print DONE "seed: @seed\n";
405 for( my $k = 0; $k < scalar @{$incl_ids}; $k++ ) {
406 print DONE join(',',@{$incl_ids -> [$k]}),"\n";
408 print DONE "Included keys:\n";
409 for( my $k = 0; $k < scalar @{$incl_keys}; $k++ ) {
410 print DONE join(',',@{$incl_keys -> [$k]}),"\n";
412 close( DONE );
413 open( INCL, ">".$self -> {'directory'}."included_individuals".$model_number.".csv" ) ;
414 for( my $k = 0; $k < scalar @{$incl_ids}; $k++ ) {
415 print INCL join(',',@{$incl_ids -> [$k]}),"\n";
417 close( INCL );
418 open( KEYS, ">".$self -> {'directory'}."included_keys".$model_number.".csv" ) ;
419 open( SAMPLEKEYS, ">".$self -> {'directory'}."sample_keys".$model_number.".csv" ) ;
420 my $ninds= ($orig_data -> count_ind());
421 for( my $k = 0; $k < scalar @{$incl_keys}; $k++ ) {
422 my %sample_keys;
423 my $sample_size = scalar @{$incl_keys -> [$k]};
424 for ( my $l = 0; $l < $ninds; $l++ ) {
425 $sample_keys{$incl_keys -> [$k][$l]}++;
427 for ( my $l = 0; $l < $ninds; $l++ ) {
428 my $val = defined $sample_keys{$l} ? $sample_keys{$l} : 0;
429 my $extra = ($l == ($ninds-1)) ? "\n" : ',';
430 print SAMPLEKEYS $val,$extra;
432 print KEYS join(',',@{$incl_keys -> [$k]}),"\n";
434 close( KEYS );
435 close( SAMPLEKEYS );
436 } else {
437 ui -> print( category => 'bootstrap',
438 message => "Recreating bootstrap from previous run." );
440 # Recreate the datasets and models from a checkpoint
441 my ($stored_filename, $stored_samples, %stored_subjects);
442 my @seed;
443 my ($stored_filename_found, $stored_samples_found, $stored_subjects_found, $stored_seed_found);
444 open( DONE, $self -> {'directory'}."/m$model_number/done.$i" );
445 while( <DONE> ){
446 if( /^Resampling from (.+) performed$/ ){
447 $stored_filename = $1;
448 $stored_filename_found = 1;
449 next;
451 if( /^(\d+) samples$/ ){
452 ui -> print( category => 'bootstrap',
453 message => "Samples: $1" );
454 $stored_samples = $1;
455 $stored_samples_found = 1;
456 next;
458 if( /^(\d+) subjects$/ ){
459 # Old format (pre 2.2.2)
460 $stored_subjects{'default'} = $1;
461 $stored_subjects_found = 1;
462 next;
464 if( /^Strata (\w+): (\d+) sample_size$/ ){
465 ui -> print( category => 'bootstrap',
466 message => "Strata $1, samples size: $2" );
467 $stored_subjects{$1} = $2;
468 $stored_subjects_found = 1;
469 next;
471 if( /^seed: (\d+) (\d+)$/ ){
472 @seed = ($1, $2);
473 $stored_seed_found = 1;
474 next;
477 if( $stored_filename_found and $stored_samples_found
478 and $stored_subjects_found and $stored_seed_found ){
479 last;
482 close( DONE );
483 unless( $stored_filename_found and $stored_samples_found
484 and $stored_samples_found and $stored_seed_found ){
485 debug -> die( level => 1,
486 message => "The bootstrap/m1/done file could not be parsed." );
489 if ( $stored_samples < $samples[$i-1] ) {
490 debug -> die( message => "The number of samples saved in previous run ($stored_samples) ".
491 "are bigger than the number of samples specified for this run (".
492 $samples[$i-1].")" );
494 while( my ( $strata, $samples ) = each %{$subjects[$i-1]} ) {
495 if ( $stored_subjects{$strata} != $samples ) {
496 debug -> die( message => "The number of individuals sampled i strata $strata ".
497 "in previous run (".
498 $stored_subjects{$strata}.
499 ") does not match the number of individuals specified ".
500 "for this run (".$samples.")" );
503 while( my ( $strata, $samples ) = each %stored_subjects ) {
504 if ( $subjects[$i-1]->{$strata} != $samples ) {
505 debug -> die( message => "The number of individuals sampled i strata $strata ".
506 "in previous run (". $samples .
507 ") does not match the number of individuals specified ".
508 "for this run (".$subjects[$i-1]->{$strata}.")" );
511 # my ( @stored_ids, @stored_keys );
512 # for ( my $k = 4; $k < 4+$stored_samples; $k++ ) {
513 # chomp($rows[$k]);
514 # my @sample_ids = split(',', $rows[$k] );
515 # push( @stored_ids, \@sample_ids );
517 # for ( my $k = 5+$stored_samples; $k < 5+2*$stored_samples; $k++ ) {
518 # chomp($rows[$k]);
519 # my @sample_keys = split(',', $rows[$k] );
520 # push( @stored_keys, \@sample_keys );
522 # @seed = split(' ',$rows[5+2*$stored_samples]);
523 # $incl_ids = \@stored_ids;
524 # $incl_keys = \@stored_keys;
525 # shift( @seed ); # get rid of 'seed'-word
526 # Reinitiate the model objects
527 for ( my $j = 1; $j <= $samples[$i-1]; $j++ ) {
528 my ($model_dir, $filename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.
529 $model_number,
530 'bs_pr'.$i.'_'.$j.'.mod' );
531 # my ($out_dir, $outfilename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.
532 # $model_number,
533 # '/bs_pr'.$i.'_'.$j.'.lst' );
534 $new_mod = model ->
535 new( directory => $model_dir,
536 filename => $filename,
537 # outputfile => $outfilename,
538 extra_files => $model -> extra_files,
539 target => 'disk',
540 ignore_missing_files => 1 );
541 # $new_mod -> target( 'disk' );
542 push( @new_models, $new_mod );
543 # print "$j\n";
544 # print Dumper $new_mod;
545 # sleep(10);
547 random_set_seed( @seed );
548 ui -> print( category => 'bootstrap',
549 message => "Using $stored_samples previously resampled ".
550 "bootstrap sets from $stored_filename" )
551 unless $self -> {'parent_threads'} > 1;
553 # push( @{$self -> {'included_inds'} -> [$model_number-1]}, $incl_ids );
554 # push( @{$self -> {'included_keys'} -> [$model_number-1]}, $incl_keys );
556 $self -> {'prepared_models'}[$model_number-1]{'own'} = \@new_models;
558 # --------------------- Create the sub tools ------------------------------
560 # {{{ sub tools
562 my $subdir = $class;
563 $subdir =~ s/tool:://;
564 my @subtools = @{$self -> {'subtools'}};
565 shift( @subtools );
566 my %subargs = ();
567 if ( defined $self -> {'subtool_arguments'} ) {
568 %subargs = %{$self -> {'subtool_arguments'}};
570 push( @{$self -> {'tools'}},
571 $class ->
572 new( reference_object => $self,
573 models => \@new_models,
574 threads => $subm_threads,
575 directory => $self -> {'directory'}.'/'.$subdir.'_dir'.$model_number,
576 _raw_results_callback => $self ->
577 _modelfit_raw_results_callback( model_number => $model_number ),
578 subtools => \@subtools,
579 parent_threads => $own_threads,
580 parent_tool_id => $self -> {'tool_id'},
581 logfile => $self -> {'logfile'}[$model_number-1],
582 raw_results => undef,
583 prepared_models => undef,
584 top_tool => 0,
585 %subargs ) );
587 # ( clean => $self -> {'clean'},
588 # base_directory => $self -> {'directory'},
589 # compress => $self -> {'compress'},
590 # directory => $self -> {'directory'}.'/'.$subdir.'_dir'.$model_number,
591 # drop_dropped => $self -> {'drop_dropped'},
592 # wrap_data => $self -> {'wrap_data'},
593 # run_on_nordugrid => $self -> {'run_on_nordugrid'},
594 # cpu_time => $self -> {'cpu_time'},
595 # run_on_lsf => $self -> {'run_on_lsf'},
596 # lsf_queue => $self -> {'lsf_queue'},
597 # lsf_options => $self -> {'lsf_options'},
598 # lsf_job_name => $self -> {'lsf_job_name'},
599 # lsf_project_name => $self -> {'lsf_project_name'},
601 # parent_tool_id => $self -> {'tool_id'},
603 # models => \@new_models,
604 # grid_batch_size => $self -> {'grid_batch_size'},
605 # nm_version => $self -> {'nm_version'},
606 # picky => $self -> {'picky'},
607 # retries => $self -> {'retries'},
608 # tweak_inits => $self -> {'tweak_inits'},
609 # handle_maxevals => $self -> {'handle_maxevals'},
610 # _raw_results_callback => $self ->
611 # _modelfit_raw_results_callback( model_number => $model_number ),
612 # threads => $subm_threads,
613 # subtools => \@subtools,
614 # logfile => $self -> {'logfile'}[$model_number-1],
615 # parent_threads => $own_threads,
616 # %subargs ) );
618 # }}} sub tools
621 end general_setup
623 # }}}
625 # {{{ llp_setup
626 start llp_setup
628 my @subm_threads;
629 if (ref( $self -> {'threads'} ) eq 'ARRAY') {
630 @subm_threads = @{$self -> {'threads'}};
631 unshift(@subm_threads);
632 } else {
633 @subm_threads = ($self -> {'threads'});
635 $self -> general_setup( model_number => $model_number,
636 class => 'tool::llp',
637 subm_threads => \@subm_threads );
639 end llp_setup
640 # }}} llp_setup
642 # {{{ _jackknife_raw_results_callback
644 start _jackknife_raw_results_callback
646 # Use the bootstrap's raw_results file.
647 my ($dir,$file) =
648 OSspecific::absolute_path( $self -> {'directory'},
649 $self -> {'raw_results_file'}[$model_number-1] );
650 my ($dir,$nonp_file) =
651 OSspecific::absolute_path( $self -> {'directory'},
652 $self -> {'raw_nonp_file'}[$model_number-1] );
653 $subroutine = sub {
654 my $jackknife = shift;
655 my $modelfit = shift;
656 $modelfit -> raw_results_file( $dir.$file );
657 $modelfit -> raw_nonp_file( $dir.$nonp_file );
658 $modelfit -> raw_results_append( 1 );
659 my ( @new_header, %param_names );
660 foreach my $row ( @{$modelfit -> {'raw_results'}} ) {
661 unshift( @{$row}, 'jackknife' );
663 $modelfit -> {'raw_results_header'} = [];
665 return $subroutine;
667 end _jackknife_raw_results_callback
669 # }}} _jackknife_raw_results_callback
671 # {{{ _modelfit_raw_results_callback
673 start _modelfit_raw_results_callback
675 # Use the bootstrap's raw_results file.
676 my ($dir,$file) =
677 OSspecific::absolute_path( $self -> {'directory'},
678 $self -> {'raw_results_file'}[$model_number-1] );
679 my ($dir,$nonp_file) =
680 OSspecific::absolute_path( $self -> {'directory'},
681 $self -> {'raw_nonp_file'}[$model_number-1] );
682 my $orig_mod = $self -> {'models'}[$model_number-1];
683 my $type = $self -> {'type'};
684 $subroutine = sub {
685 my $modelfit = shift;
686 my $mh_ref = shift;
687 my %max_hash = %{$mh_ref};
688 $modelfit -> raw_results_file( $dir.$file );
689 $modelfit -> raw_nonp_file( $dir.$nonp_file );
691 # The prepare_raw_results in the modelfit will fix the
692 # raw_results for each bootstrap sample model, we must add
693 # the result for the original model.
695 my %dummy;
697 my ($raw_results_row, $nonp_rows) = $self -> create_raw_results_rows( max_hash => $mh_ref,
698 model => $orig_mod,
699 raw_line_structure => \%dummy );
701 $orig_mod -> outputs -> [0] -> flush;
703 unshift( @{$modelfit -> {'raw_results'}}, @{$raw_results_row} );
705 # {{{ New header
707 if ( $type eq 'bca' ) {
708 foreach my $row ( @{$modelfit -> {'raw_results'}} ) {
709 unshift( @{$row}, 'bootstrap' );
711 unshift( @{$modelfit -> {'raw_results_header'}}, 'method' );
714 @{$self -> {'raw_results_header'}} = @{$modelfit -> {'raw_results_header'}};
716 # }}} New header
719 return $subroutine;
721 end _modelfit_raw_results_callback
723 # }}} _modelfit_raw_results_callback
725 # {{{ modelfit_analyze
727 start modelfit_analyze
729 my @params = @{$self -> {'parameters'}};
730 my @diagnostic_params = @{$self -> {'diagnostic_parameters'}};
731 my ( @print_order, @calculation_order );
733 if ( $self -> {'type'} eq 'bca' ) {
735 # -------------------------- BCa method ---------------------------------
737 # {{{ BCa
739 @calculation_order = @{$self -> {'bca_calculation_order'}};
740 @print_order = @{$self -> {'bca_print_order'}};
741 my $jk_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
742 $self -> {'threads'} -> [1]:$self -> {'threads'};
743 my $done = ( -e $self -> {'directory'}."/jackknife_done.$model_number" ) ? 1 : 0;
744 if ( not $done ) {
746 # {{{ Create Jackknife
748 ui -> print( category => 'bootstrap',
749 message => "Running a Jackknife for the BCa estimates" );
750 $self -> {'jackknife'} = tool::cdd::jackknife ->
751 new( models => [$self -> models -> [$model_number -1]],
752 case_columns => $self -> models -> [$model_number -1]
753 -> datas -> [0] -> idcolumn,
754 _raw_results_callback => $self ->
755 _jackknife_raw_results_callback( model_number => $model_number ),
756 nm_version => $self -> {'nm_version'},
757 parent_tool_id => $self -> {'tool_id'},
758 threads => $jk_threads,
759 bca_mode => 1,
760 shrinkage => $self -> {'shrinkage'},
761 nonparametric_marginals => $self -> {'nonparametric_marginals'},
762 nonparametric_etas => $self -> {'nonparametric_etas'},
763 adaptive => $self -> {'adaptive'},
764 rerun => $self -> {'rerun'},
765 verbose => $self -> {'verbose'},
766 cross_validate => 0 );
767 # Create a checkpoint. Log the samples and individuals.
768 open( DONE, ">".$self -> {'directory'}."/jackknife_done.$model_number" ) ;
769 print DONE "Jackknife directory:\n";
770 print DONE $self -> {'jackknife'} -> directory,"\n";
771 my @seed = random_get_seed;
772 print DONE "seed: @seed\n";
773 close( DONE );
775 # }}} Create Jackknife
777 } else {
779 # {{{ Recreate Jackknife
781 open( DONE, $self -> {'directory'}."/jackknife_done.$model_number" );
782 my @rows = <DONE>;
783 close( DONE );
784 my ( $stored_directory ) = $rows[1];
785 chomp( $stored_directory );
786 if ( not -e $stored_directory ) {
787 debug -> die( message => "The Jackknife directory ".$stored_directory.
788 "indicated by ".$self -> {'directory'}.
789 "/jackknife_done.$model_number".
790 " from the old bootstrap run in ".
791 $self -> {'directory'}." does not exist" );
793 my @seed = split(' ',$rows[2]);
794 shift( @seed ); # get rid of 'seed'-word
795 $self -> {'jackknife'} = tool::cdd::jackknife ->
796 new( models => [$self -> models -> [$model_number -1]],
797 case_columns => $self -> models -> [$model_number -1]
798 -> datas -> [0] -> idcolumn,
799 _raw_results_callback => $self ->
800 _jackknife_raw_results_callback( model_number => $model_number ),
801 threads => $jk_threads,
802 parent_tool_id => $self -> {'tool_id'},
803 directory => $stored_directory,
804 bca_mode => 1,
805 shrinkage => $self -> {'shrinkage'},
806 nm_version => $self -> {'nm_version'},
807 nonparametric_marginals => $self -> {'nonparametric_marginals'},
808 nonparametric_etas => $self -> {'nonparametric_etas'},
809 adaptive => $self -> {'adaptive'},
810 rerun => $self -> {'rerun'},
811 verbose => $self -> {'verbose'},
812 cross_validate => 0 );
813 random_set_seed( @seed );
814 ui -> print( category => 'bootstrap',
815 message => "Restarting BCa Jackknife from ".
816 $stored_directory )
817 unless $self -> {'parent_threads'} > 1;
819 # }}} Recreate Jackknife
823 $self -> {'jackknife'} -> run;
824 $self -> {'jackknife_results'} = $self -> {'jackknife'} -> {'results'};
825 $self -> {'jackknife_prepared_models'} = $self -> {'jackknife'} -> {'prepared_models'};
827 $self -> {'jackknife_raw_results'}[$model_number-1] =
828 $self -> {'jackknife'} -> raw_results;
829 # $self -> {'jackknife'} -> raw_results -> [$model_number-1];
830 # $Data::Dumper::Maxdepth = 0;
831 # print Dumper $self -> {'jackknife_raw_results'};
833 # }}} BCa
835 } else {
836 @calculation_order = @{$self -> {'calculation_order'}};
837 @print_order = @{$self -> {'print_order'}};
838 $self -> {'bootstrap_raw_results'}[$model_number-1] =
839 $self -> {'tools'} -> [0] -> raw_results;
840 # $self -> {'tools'} -> [0] -> raw_results -> [$model_number-1];
842 unless( ref($self -> {'raw_results_header'}[0]) eq 'ARRAY' ) {
843 my $tmp = $self -> {'raw_results_header'};
844 $self -> {'raw_results_header'} = [];
845 $self -> {'raw_results_header'}[$model_number-1] = $tmp;
848 my @param_names = @{$self -> models -> [$model_number -1] -> outputs -> [0] -> labels};
849 my ( @diagnostic_names, @tmp_names );
850 foreach my $param ( @diagnostic_params ) {
851 push( @tmp_names, $param );
852 $tmp_names[$#tmp_names] =~ s/_/\./g;
854 for ( my $i = 0; $i <= $#param_names; $i++ ) {
855 unshift( @{$param_names[$i]}, 'OFV' );
856 push( @{$diagnostic_names[$i]}, @tmp_names );
858 if( defined $PsN::config -> {'_'} -> {'R'} and
859 -e $PsN::lib_dir . '/R-scripts/bootstrap.R' ) {
860 # copy the bootstrap R-script
861 cp ( $PsN::lib_dir . '/R-scripts/bootstrap.R', $self -> {'directory'} );
862 # Execute the script
863 system( $PsN::config -> {'_'} -> {'R'}." CMD BATCH bootstrap.R" );
866 end modelfit_analyze
868 # }}}
870 # {{{ prepare_results
872 start prepare_results
874 # {{{ definitions
876 # The '3' is there to skip model, problem and subproblem numbers
877 my $skip_mps = 3;
878 my ( @calculation_order, @print_order, %diag_idx );
879 if ( $self -> {'type'} eq 'bca' ) {
880 @calculation_order = @{$self -> {'bca_calculation_order'}};
881 @print_order = @{$self -> {'bca_print_order'}};
882 } else {
883 @calculation_order = @{$self -> {'calculation_order'}};
884 @print_order = @{$self -> {'print_order'}};
886 if ( $self -> {'type'} eq 'bca' ) {
887 # $self -> read_raw_results();
888 $self -> bca_read_raw_results();
889 # if ( not defined $self -> {'bootstrap_raw_results'} );
890 } else {
891 # if ( not defined $self -> {'raw_results'} ) {
892 $self -> read_raw_results();
893 $self -> {'bootstrap_raw_results'} = $self -> {'raw_results'};
897 for ( my $i = 0; $i < scalar @{$self -> {'diagnostic_parameters'}}; $i++ ) {
898 $diag_idx{$self -> {'diagnostic_parameters'} -> [$i]} = $i + $skip_mps;
901 # }}} definitions
903 # --------------------- Get data from raw_results -------------------------
905 # Divide the data into diagnostics and estimates. Included in estimates are
906 # the parametric estimates, the standard errors of these, the nonparametric
907 # estimates, the shrinkage in eta and the shrinkage in wres
908 # The diagnostics end up in {'bootstrap_diagnostics'} and
909 # {'jackknife_diagnostics'}. The estimates in {'bootstrap_estimates'} and
910 # {'jackknife_estimates'}.
911 # The number of runs that are selected for calculation of the results is
912 # saved.
914 # {{{ Get the data from the runs
916 foreach my $tool ( 'bootstrap', 'jackknife' ) {
917 if ( defined $self -> {$tool.'_raw_results'} ) {
918 for ( my $i = 0; $i < scalar @{$self->{$tool.'_raw_results'}}; $i++ ) { # All models
920 # {{{ Get the number of columns with estimates
922 my $cols_orig = 0;
923 foreach my $param ( 'theta', 'omega', 'sigma' ) {
924 my $labels =
925 $self -> {'models'} -> [$i] -> labels( parameter_type => $param );
926 # we can't use labels directly since different models may have different
927 # labels (still within the same modelfit)
928 my $numpar = scalar @{$labels -> [0]} if ( defined $labels and
929 defined $labels -> [0] );
930 $cols_orig += ( $numpar*3 ); # est + SE + eigen values
932 # nonparametric omegas and shrinkage
933 my $nomegas = $self -> {'models'} -> [$i] -> nomegas;
934 my $numpar = $nomegas -> [0];
936 # shrinkage omega + wres shrinkage
937 $cols_orig += $numpar + 1;
938 # $cols_orig += ($numpar*($numpar+1)/2 + $numpar + 1);
940 $cols_orig++; # OFV
942 # }}} Get the number of columns with estimates
944 # {{{ Loop, choose and set diagnostics and estimates
946 my %return_section;
947 $return_section{'name'} = 'Warnings';
948 my ( $skip_term, $skip_cov, $skip_warn, $skip_bound );
949 my $included = 0;
950 # print Dumper $self->{$tool.'_raw_results'};
951 for ( my $j = 0; $j < scalar @{$self->{$tool.'_raw_results'}->[$i]}; $j++ ) { # orig model + prepared_models
952 my $columns = scalar @{$self->{$tool.'_raw_results'}->[$i][$j]};
954 # ----------------------- Diagnostics -----------------------------
956 for ( my $m = $skip_mps; $m < scalar @{$self -> {'diagnostic_parameters'}} + $skip_mps; $m++ ) { # value
957 $self->{$tool.'_diagnostics'}->[$i][$j][$m-$skip_mps] =
958 $self->{$tool.'_raw_results'}->[$i][$j][$m];
960 my $use_run = 1;
961 if ( $self -> {'skip_minimization_terminated'} and
962 ( not defined $self->{$tool.'_raw_results'}->
963 [$i][$j][$diag_idx{'minimization_successful'}]
964 or not $self->{$tool.'_raw_results'}->
965 [$i][$j][$diag_idx{'minimization_successful'}] ) ) {
966 $skip_term++;
967 $use_run = 0;
968 } elsif ( $self -> {'skip_covariance_step_terminated'} and not
969 $self->{$tool.'_raw_results'}->
970 [$i][$j][$diag_idx{'covariance_step_successful'}] ) {
971 $skip_cov++;
972 $use_run = 0;
973 } elsif ( $self -> {'skip_with_covstep_warnings'} and
974 $self->{$tool.'_raw_results'}->
975 [$i][$j][$diag_idx{'covariance_step_warnings'}] ) {
976 $skip_warn++;
977 $use_run = 0;
978 } elsif ( $self -> {'skip_estimate_near_boundary'} and
979 $self->{$tool.'_raw_results'}->
980 [$i][$j][$diag_idx{'estimate_near_boundary'}] ) {
981 $skip_bound++;
982 $use_run = 0;
985 # ------------------------ Estimates ------------------------------
987 if( $use_run ) {
988 for ( my $m = scalar @{$self -> {'diagnostic_parameters'}} + $skip_mps; $m < $columns; $m++ ) { # value
989 my $val = $self->{$tool.'_raw_results'}->[$i][$j][$m];
990 $self->{$tool.'_estimates'}->
991 [$i][$included][$m-(scalar @{$self -> {'diagnostic_parameters'}} + $skip_mps)] = $val;
993 $included++;
997 # }}} Loop, choose and set diagnostics and estimates
999 # {{{ push #runs to results
1001 if ( defined $skip_term ) {
1002 push( @{$return_section{'values'}}, "$skip_term runs with miminization ".
1003 "terminated were skipped when calculating the $tool results" );
1005 if ( defined $skip_cov ) {
1006 push( @{$return_section{'values'}}, "$skip_cov runs with aborted ".
1007 "covariance steps were skipped when calculating the $tool results" );
1009 if ( defined $skip_warn ) {
1010 push( @{$return_section{'values'}}, "$skip_warn runs with errors from ".
1011 "the covariance step were skipped when calculating the $tool results" );
1013 if ( defined $skip_bound ) {
1014 push( @{$return_section{'values'}}, "$skip_bound runs with estimates ".
1015 "near a boundary were skipped when calculating the $tool results" );
1017 $return_section{'labels'} = [];
1018 push( @{$self -> {'results'}[$i]{'own'}},\%return_section );
1020 # }}} push #runs to results
1026 # $Data::Dumper::Maxdepth = 5;
1027 # die Dumper $self -> {'bootstrap_diagnostics'};
1029 # }}} Get the data from the runs
1031 # ---------------------- Calculate the results ----------------------------
1033 # {{{ Result calculations
1035 for ( my $i = 0; $i < scalar @{$self -> {'bootstrap_raw_results'}} ; $i++ ) { # All models
1037 my $mps_offset = $self -> {'bca'} ? 4 : 3; # <- this is the offset to
1038 # diagonstic_parameters,
1039 # which is one more for
1040 # the method column added
1041 # with a bca run.
1043 my @param_names = @{$self -> {'raw_results_header'}[$i]}[($mps_offset + scalar @{$self -> {'diagnostic_parameters'}}) .. (scalar @{$self -> {'raw_results_header'}[$i]} - 1)];
1044 my ( @diagnostic_names, @tmp_names );
1045 foreach my $param ( @{$self -> {'diagnostic_parameters'}} ) {
1046 push( @tmp_names, $param );
1047 $tmp_names[$#tmp_names] =~ s/_/\./g;
1050 @diagnostic_names = @tmp_names;
1051 foreach my $result_type ( @calculation_order ) {
1052 my @names = $result_type eq 'diagnostic_means' ?
1053 @diagnostic_names : @param_names;
1054 my $calc = 'calculate_'.$result_type;
1055 $self -> $calc( model_number => ($i+1),
1056 parameter_names => \@names );
1058 foreach my $result_type ( @print_order ) {
1059 my $name = $result_type;
1060 $name =~ s/_/\./g;
1061 my %return_section;
1062 $return_section{'name'} = $name;
1063 $return_section{'values'} = $self -> {$result_type} -> [$i];
1064 $return_section{'labels'} = $self -> {$result_type.'_labels'} -> [$i];
1065 push( @{$self -> {'results'}[$i]{'own'}},\%return_section );
1069 # }}} Result calculations
1072 end prepare_results
1074 # }}} prepare_results
1076 # {{{ print_summary
1078 start print_summary
1080 sub acknowledge {
1081 my $name = shift;
1082 my $outcome = shift;
1083 my $file = shift;
1084 my $l = (7 - length( $outcome ))/2;
1085 my $c_num = '00';
1086 $c_num = '07' if ( $outcome eq 'OK' );
1087 $c_num = '13' if ( $outcome eq 'WARNING' );
1088 $c_num = '05' if ( $outcome eq 'ERROR' );
1089 # my $text = sprintf( "%-66s%2s%7s%-5s\n\n", $name, '[ ', $outcome. ' ' x $l, ' ]' );
1090 my $text = sprintf( "%-66s%2s%7s%-5s", $name, '[ ', $outcome. ' ' x $l, ' ]' );
1091 # cprintf( "%-66s%2s\x03$c_num%7s\x030%-5s", $name, '[ ', $outcome. ' ' x $l, ' ]' );
1092 # my $text = cprintf( "%-66s%2s\x03$c_num%7s\x030%-5s", $name, '[ ', $outcome. ' ' x $l, ' ]' );
1093 print $text, "\n\n";
1094 print $file $text if defined $file;
1097 sub sum {
1098 my $arr = shift;
1099 my $sum = 0;
1100 for ( @{$arr} ) {
1101 $sum += $_;
1103 return $sum;
1106 my $diag_number = scalar @{$self -> {'diagnostic_parameters'}} - 1;
1107 my %diag_idxs;
1108 for ( my $i = 0; $i <= $diag_number; $i++ ) {
1109 $diag_idxs{$self -> {'diagnostic_parameters'} -> [$i]} = $i;
1112 open( my $log, ">test.log" );
1113 for ( my $i = 0; $i < scalar @{$self -> {'bootstrap_raw_results'}} ; $i++ ) { # All models
1114 print "MODEL ",$i+1,"\n" if ( scalar @{$self -> {'bootstrap_raw_results'}} > 1 );
1115 if ( $self -> {'diagnostic_means'} ->
1116 [$i][0][$diag_idxs{'minimization_successful'}] >
1117 $self -> {'minimization_successful_limit'} ) {
1118 acknowledge( 'Successful minimization ratio = '.
1119 $self -> {'diagnostic_means'} ->
1120 [$i][0][$diag_idxs{'minimization_successful'}], 'OK', $log );
1121 } else {
1122 acknowledge( 'Termination problems in '.
1123 sprintf("%4.2f", (100-($self -> {'diagnostic_means'} ->
1124 [$i][0][$diag_idxs{'minimization_successful'}]*100)))
1125 .'% of the bootstrap runs', 'WARNING', $log );
1128 if ( $self -> {'diagnostic_means'} ->
1129 [$i][0][$diag_idxs{'covariance_step_successful'}] >
1130 $self -> {'covariance_step_successful_limit'} ) {
1131 acknowledge( 'Successful covariance step ratio = '.$self -> {'diagnostic_means'} ->
1132 [$i][0][$diag_idxs{'covariance_step_successful'}], 'OK', $log );
1133 } else {
1134 acknowledge( 'Covariance step terminated in '.
1135 sprintf("%4.2f", (100-($self -> {'diagnostic_means'} ->
1136 [$i][0][$diag_idxs{'covariance_step_successful'}]*100)))
1137 .'% of the bootstrap runs', 'WARNING', $log );
1140 if ( $self -> {'diagnostic_means'} ->
1141 [$i][0][$diag_idxs{'covariance_step_warnings'}] <
1142 $self -> {'covariance_step_warnings_limit'} ) {
1143 acknowledge( 'Covariance step warnings ratio = '.$self -> {'diagnostic_means'} ->
1144 [$i][0][$diag_idxs{'covariance_step_warnings'}], 'OK', $log );
1145 } else {
1146 acknowledge( 'Covariance step warnings in '.
1147 sprintf("%4.2f", ($self -> {'diagnostic_means'} ->
1148 [$i][0][$diag_idxs{'covariance_step_warnings'}]*100))
1149 .'% of the bootstrap runs', 'WARNING', $log );
1152 if ( $self -> {'diagnostic_means'} ->
1153 [$i][0][$diag_idxs{'estimate_near_boundary'}] <
1154 $self -> {'estimate_near_boundary_limit'} ) {
1155 acknowledge( 'Estimate near boundary ratio = '.$self -> {'diagnostic_means'} ->
1156 [$i][0][$diag_idxs{'estimate_near_boundary'}], 'OK', $log );
1157 } else {
1158 acknowledge( 'Estimate near boundary found in '.
1159 sprintf("%4.2f", ($self -> {'diagnostic_means'} ->
1160 [$i][0][$diag_idxs{'estimate_near_boundary'}]*100))
1161 .'% of the bootstrap runs', 'WARNING', $log );
1163 my $sum = sum( $self -> {'within_se_confidence_intervals'}->
1164 [$i]{$self -> {'se_confidence_intervals_level'}} );
1165 if ( not defined $sum or $sum < 1 ) {
1166 acknowledge( 'No '.(100-$self -> {'se_confidence_intervals_level'}).
1167 '% confidence intervals based on the'.
1168 ' bootstrap standard errors include '.
1169 $self -> {'se_confidence_intervals_check'}, 'OK', $log );
1170 } else {
1171 acknowledge( "$sum ".(100-$self -> {'se_confidence_intervals_level'}).
1172 "% confidence intervals based on the bootstrap".
1173 " SE's include ".
1174 $self -> {'se_confidence_intervals_check'}, 'WARNING', $log );
1175 my $found =
1176 scalar @{$self -> {'within_se_confidence_intervals'}->
1177 [$i]{$self -> {'se_confidence_intervals_level'}}} - 1;
1178 for ( my $l = 0; $l <= $found ; $l++ ) {
1179 if ( $self -> {'within_se_confidence_intervals'}->
1180 [$i]{$self -> {'se_confidence_intervals_level'}}[$l] ) {
1181 printf( "\t%-20s\n",
1182 $self -> {'standard_error_confidence_intervals_labels'} ->
1183 [$i][1][$l] );
1184 print $log
1185 sprintf( "\t%-20s\n",
1186 $self -> {'standard_error_confidence_intervals_labels'} ->
1187 [$i][1][$l] );
1188 print "\n" if ( $l == $found );
1189 print $log "\n" if ( $l == $found );
1194 my $sum = sum( $self -> {'large_bias'}-> [$i][0] );
1195 if ( not defined $sum or $sum < 1 ) {
1196 acknowledge( 'No bias larger than '.
1197 ($self -> {'large_bias_limit'}*100).'% found', 'OK', $log );
1198 } else {
1199 acknowledge( "$sum estimates were found to have a relative bias > ".
1200 $self -> {'large_bias_limit'}, 'WARNING', $log );
1201 my $found =
1202 scalar @{$self -> {'large_bias'}->
1203 [$i][0]} - 1;
1204 for ( my $l = 0; $l <= $found ; $l++ ) {
1205 if ( $self -> {'large_bias'}->
1206 [$i][0][$l] ) {
1207 printf( "\t%-20s%3.2f %\n", $self -> {'bias_labels'} ->
1208 [$i][1][$l], ($self -> {'bias'} -> [$i][0][$l]/
1209 $self->{'bootstrap_estimates'} -> [$i][0][$l])*100 );
1210 # print $log
1211 # sprintf( "\t%-20s\n",
1212 # $self -> {'percentile_confidence_intervals_labels'} ->
1213 # [$i][1][$l] );
1214 print "\n" if ( $l == $found );
1215 print $log "\n" if ( $l == $found );
1220 if ( $self -> {'type'} eq 'bca' ) {
1221 my $sum = sum( $self -> {'within_bca_confidence_intervals'}->
1222 [$i]{$self -> {'bca_confidence_intervals_level'}} );
1223 if ( not defined $sum or $sum < 1 ) {
1224 acknowledge( 'No '.(100-$self -> {'bca_confidence_intervals_level'}).
1225 '% BCa confidence intervals include '.
1226 $self -> {'bca_confidence_intervals_check'}, 'OK', $log );
1227 } else {
1228 acknowledge( "$sum ".(100-$self -> {'bca_confidence_intervals_level'}).
1229 "% BCa confidence intervals include ".
1230 $self -> {'bca_confidence_intervals_check'}, 'WARNING', $log );
1231 my $found =
1232 scalar @{$self -> {'within_bca_confidence_intervals'}->
1233 [$i]{$self -> {'bca_confidence_intervals_level'}}} - 1;
1234 for ( my $l = 0; $l <= $found ; $l++ ) {
1235 if ( $self -> {'within_bca_confidence_intervals'}->
1236 [$i]{$self -> {'bca_confidence_intervals_level'}}[$l] ) {
1237 printf( "\t%-20s\n",
1238 $self -> {'bca_confidence_intervals_labels'} ->
1239 [$i][1][$l] );
1240 print $log
1241 sprintf( "\t%-20s\n",
1242 $self -> {'bca_confidence_intervals_labels'} ->
1243 [$i][1][$l] );
1244 print "\n" if ( $l == $found );
1245 print $log "\n" if ( $l == $found );
1249 } else {
1250 my $sum = sum( $self -> {'within_percentiles'}->
1251 [$i]{$self -> {'percentile_confidence_intervals_level'}} );
1252 if ( not defined $sum or $sum < 1 ) {
1253 acknowledge( 'No '.(100-$self -> {'percentile_confidence_intervals_level'}).
1254 '% confidence intervals based on the'.
1255 ' bootstrap percentiles include '.
1256 $self -> {'percentile_confidence_intervals_check'}, 'OK', $log );
1257 } else {
1258 acknowledge( "$sum ".(100-$self -> {'percentile_confidence_intervals_level'}).
1259 "% confidence intervals based on the percentiles".
1260 " include ".
1261 $self -> {'percentile_confidence_intervals_check'}, 'WARNING', $log );
1262 my $found =
1263 scalar @{$self -> {'within_percentiles'}->
1264 [$i]{$self -> {'percentile_confidence_intervals_level'}}} - 1;
1265 for ( my $l = 0; $l <= $found ; $l++ ) {
1266 if ( $self -> {'within_percentiles'}->
1267 [$i]{$self -> {'percentile_confidence_intervals_level'}}[$l] ) {
1268 printf( "\t%-20s\n",
1269 $self -> {'percentile_confidence_intervals_labels'} ->
1270 [$i][1][$l] );
1271 print $log
1272 sprintf( "\t%-20s\n",
1273 $self -> {'percentile_confidence_intervals_labels'} ->
1274 [$i][1][$l] );
1275 print "\n" if ( $l == $found );
1276 print $log "\n" if ( $l == $found );
1282 close ( $log );
1284 end print_summary
1286 # }}} print_summary
1288 # {{{ bca_read_raw_results
1290 start bca_read_raw_results
1292 $self -> {'raw_results_header'} = [];
1293 for ( my $i = 1; $i <= scalar @{$self->{'models'}}; $i++ ) { # All models
1294 if ( -e $self -> {'directory'}.'raw_results'.$i.'.csv' ) {
1295 open( RRES, $self -> {'directory'}.'raw_results'.$i.'.csv' );
1296 my @file = <RRES>;
1297 close( RRES );
1298 map { chomp; my @tmp = split(',',$_); $_ = \@tmp } @file ;
1300 my $header = shift @file;
1302 # Get rid of 'method' column
1303 my $cols = scalar(@{$header})-1;
1304 @{$self -> {'raw_results_header'}[$i-1]} = @{$header}[1..$cols];
1305 $self -> {'raw_results'} -> [$i-1] = \@file;
1306 for( my $j = 0; $j <= $#file; $j++ ) {
1307 if ( $file[$j][0] eq 'jackknife' ) {
1308 shift( @{$file[$j]} );
1309 # $self -> {'jackknife_raw_results'}[$i-1] = \@file;
1310 push( @{$self -> {'jackknife_raw_results'}[$i-1]}, $file[$j]);
1311 } else {
1312 shift( @{$file[$j]} );
1313 # $self -> {'bootstrap_raw_results'}[$i-1] = \@file;
1314 push( @{$self -> {'bootstrap_raw_results'}[$i-1]}, $file[$j] );
1320 end bca_read_raw_results
1322 # }}} bca_read_raw_results
1324 # {{{ calculate_diagnostic_means
1326 start calculate_diagnostic_means
1328 my ( @sum, @diagsum, %diag_idx );
1329 for ( my $i = 0; $i < scalar @{$self -> {'diagnostic_parameters'}}; $i++ ) {
1330 $diag_idx{$self -> {'diagnostic_parameters'} -> [$i]} = $i;
1333 my $def = 0;
1334 # Prepared model, skip the first (the original)
1335 for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_diagnostics'} ->
1336 [$model_number-1]}; $k++ ) {
1337 # Diagnostics
1338 if( defined $self -> {'bootstrap_diagnostics'} ->
1339 [$model_number-1][$k] ) {
1340 $def++;
1341 for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_diagnostics'} ->
1342 [$model_number-1][$k]}; $l++ ) {
1343 $sum[$l] += $self -> {'bootstrap_diagnostics'} ->
1344 [$model_number-1][$k][$l];
1349 # divide by the number of bootstrap samples (-1 to get rid of the original
1350 # model) The [0] in the index is there to indicate the 'model' level. Mostly
1351 # used for printing
1352 for ( my $l = 0; $l <= $#sum; $l++ ) {
1353 if( $l == $diag_idx{'significant_digits'} ) {
1354 $self -> {'diagnostic_means'} -> [$model_number-1][0][$l] =
1355 $sum[$l] / $def;
1356 } else {
1357 $self -> {'diagnostic_means'} -> [$model_number-1][0][$l] =
1358 $sum[$l] / ( scalar @{$self -> {'bootstrap_diagnostics'} ->
1359 [$model_number-1]} - 1);
1362 $self -> {'diagnostic_means_labels'} -> [$model_number-1] =
1363 [[],\@parameter_names];
1365 end calculate_diagnostic_means
1367 # }}} calculate_diagnostic_means
1369 # {{{ calculate_means
1371 start calculate_means
1373 my ( @sum, @diagsum );
1374 # Prepared model, skip the first (the original)
1375 for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'} ->
1376 [$model_number-1]}; $k++ ) {
1377 # Estimates
1378 for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'} ->
1379 [$model_number-1][$k]}; $l++ ) {
1380 $sum[$l] += $self -> {'bootstrap_estimates'} ->
1381 [$model_number-1][$k][$l];
1384 # divide by the number of bootstrap samples (-1 to get rid of the original
1385 # model) The [0] in the index is there to indicate the 'model' level. Mostly
1386 # used for printing
1387 my $samples = scalar @{$self -> {'bootstrap_estimates'} ->
1388 [$model_number-1]} - 1;
1389 for ( my $l = 0; $l <= $#sum; $l++ ) {
1390 my $mean = $sum[$l] / $samples;
1391 $self -> {'means'} -> [$model_number-1][0][$l] = $mean;
1392 my $bias = $mean - $self ->
1393 {'bootstrap_estimates'} -> [$model_number-1][0][$l];
1394 $self -> {'bias'} -> [$model_number-1][0][$l] = $bias;
1395 if ( $self->{'bootstrap_estimates'} -> [$model_number-1][0][$l] != 0 and
1396 $bias/$self->{'bootstrap_estimates'} -> [$model_number-1][0][$l]
1397 > $self -> {'large_bias_limit'} ) {
1398 $self -> {'large_bias'} -> [$model_number-1][0][$l] = 1;
1399 } else {
1400 $self -> {'large_bias'} -> [$model_number-1][0][$l] = 0;
1403 $self -> {'means_labels'} -> [$model_number-1] =
1404 [[],\@parameter_names];
1406 $self -> {'bias_labels'} -> [$model_number-1] =
1407 [[],\@parameter_names];
1409 end calculate_means
1411 # }}} calculate_means
1413 # {{{ calculate_jackknife_means
1415 start calculate_jackknife_means
1417 my @sum;
1418 # Prepared model, skip the first (the original)
1419 if( defined $self -> {'jackknife_estimates'} ){
1420 for ( my $k = 1; $k < scalar @{$self -> {'jackknife_estimates'}->[$model_number-1]}; $k++ ) {
1421 # Estimate
1422 for ( my $l = 0; $l <
1423 scalar @{$self -> {'jackknife_estimates'}->[$model_number-1][$k]}; $l++ ) {
1424 $sum[$l] += $self -> {'jackknife_estimates'}->[$model_number-1][$k][$l];
1427 # divide by the number of jackknife samples (-1 to get rid of the original model)
1428 # The [0] in the index is there to indicate the 'model' level. Mostly used for printing
1429 for ( my $l = 0; $l <
1430 scalar @{$self -> {'jackknife_estimates'}->[$model_number-1][0]}; $l++ ) {
1431 if( ( scalar @{$self -> {'jackknife_estimates'}->[$model_number-1]} - 1) != 0 ) {
1432 $self -> {'jackknife_means'} -> [$model_number-1][0][$l] =
1433 $sum[$l] / ( scalar @{$self -> {'jackknife_estimates'}->[$model_number-1]} - 1);
1436 $self -> {'jackknife_means_labels'} -> [$model_number-1] = [[],\@parameter_names];
1439 end calculate_jackknife_means
1441 # }}} calculate_jackknife_means
1443 # {{{ calculate_medians
1444 start calculate_medians
1446 my @medians;
1447 # Loop the parameters
1448 for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'}->
1449 [$model_number-1][0]}; $l++ ) {
1450 my @parameter_array;
1451 # From 1 to get rid of original model
1452 for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'}->
1453 [$model_number-1]}; $k++ ) {
1454 $parameter_array[$k-1] =
1455 $self -> {'bootstrap_estimates'}->[$model_number-1][$k][$l];
1457 my @sorted = sort {$a <=> $b} @parameter_array;
1458 # median postition is half the ( array length - 1 ).
1459 my $median_position = ( $#sorted ) / 2;
1460 my ($int_med,$frac_med) = split(/\./, $median_position );
1461 $frac_med = eval("0.".$frac_med);
1462 my $median_low = $sorted[ $int_med ];
1463 my $median_high = ( $sorted[ $int_med + 1 ] - $sorted[ $int_med ] ) * $frac_med;
1464 $medians[$l] = $median_low + $median_high;
1466 # The [0] in the index is there to indicate the 'model' level. Mostly used for printing
1467 $self -> {'medians'} -> [$model_number-1][0] = \@medians;
1468 $self -> {'medians_labels'} -> [$model_number-1] = [[],\@parameter_names];
1470 end calculate_medians
1471 # }}} calculate_medians
1473 # {{{ calculate_standard_error_confidence_intervals
1474 start calculate_standard_error_confidence_intervals
1476 # Sort the limits from the inside out
1477 my @limits = sort { $b <=> $a } keys %{$self -> {'confidence_limits'}};
1478 foreach my $limit ( @limits ) {
1479 my ( @lower_limits, @upper_limits, @within_ci );
1480 # Loop the estimates of the first (original) model
1481 for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'}->
1482 [$model_number-1][0]}; $l++ ) {
1483 my $lower_limit =
1484 $self -> {'bootstrap_estimates'}->[$model_number-1][0][$l] -
1485 $self -> {'standard_errors'}->[$model_number-1][0][$l] *
1486 $self -> {'confidence_limits'} ->{$limit};
1487 my $upper_limit =
1488 $self -> {'bootstrap_estimates'}->[$model_number-1][0][$l] +
1489 $self -> {'standard_errors'}->[$model_number-1][0][$l] *
1490 $self -> {'confidence_limits'} ->{$limit};
1491 push( @lower_limits, $lower_limit );
1492 push( @upper_limits, $upper_limit );
1493 if ( $self -> {'se_confidence_intervals_check'} < $upper_limit and
1494 $self -> {'se_confidence_intervals_check'} > $lower_limit ) {
1495 push( @within_ci , 1 );
1496 } else {
1497 push( @within_ci , 0 );
1500 unshift( @{$self -> {'standard_error_confidence_intervals'} ->
1501 [$model_number-1]}, \@lower_limits );
1502 push( @{$self -> {'standard_error_confidence_intervals'} ->
1503 [$model_number-1]}, \@upper_limits );
1504 $self -> {'within_se_confidence_intervals'} ->
1505 [$model_number-1]{$limit} = \@within_ci;
1506 unshift( @{$self -> {'standard_error_confidence_intervals_labels'} ->
1507 [$model_number-1][0]}, ($limit/2).'%' );
1508 push( @{$self -> {'standard_error_confidence_intervals_labels'} ->
1509 [$model_number-1][0]}, (100-($limit/2)).'%' );
1510 push( @{$self -> {'within_se_confidence_intervals_labels'} ->
1511 [$model_number-1][0]}, $limit.'%' );
1513 $self -> {'standard_error_confidence_intervals_labels'} -> [$model_number-1][1] =
1514 \@parameter_names;
1515 $self -> {'within_se_confidence_intervals_labels'} -> [$model_number-1][1] =
1516 \@parameter_names;
1518 end calculate_standard_error_confidence_intervals
1519 # }}} calculate_standard_error_confidence_intervals
1521 # {{{ calculate_standard_errors
1523 start calculate_standard_errors
1525 my @se;
1526 # Prepared model, skip the first (the original)
1527 for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'}->[$model_number-1]}; $k++ ) {
1528 # Estimate
1529 for ( my $l = 0; $l <
1530 scalar @{$self -> {'bootstrap_estimates'}->[$model_number-1][$k]}; $l++ ) {
1531 $se[$l] += ( $self -> {'bootstrap_estimates'}->[$model_number-1][$k][$l] -
1532 $self -> {'means'}->[$model_number-1][0][$l] )**2;
1535 # divide by the number of bootstrap samples -1 (-2 to get rid of the original model)
1536 # The [0] in the index is there to indicate the 'model' level.
1537 for ( my $l = 0; $l <
1538 scalar @{$self -> {'bootstrap_estimates'}->[$model_number-1][0]}; $l++ ) {
1539 my $div = ( scalar @{$self -> {'bootstrap_estimates'}->[$model_number-1]} - 2 );
1540 if( defined $div and not $div == 0 ) {
1541 $self -> {'standard_errors'} -> [$model_number-1][0][$l] =
1542 ($se[$l] / $div )**0.5;
1543 } else {
1544 $self -> {'standard_errors'} -> [$model_number-1][0][$l] = undef;
1547 $self -> {'standard_errors_labels'} -> [$model_number-1] = [[],\@parameter_names];
1549 end calculate_standard_errors
1551 # }}} calculate_standard_errors
1553 # {{{ calculate_bca_confidence_intervals
1555 start calculate_bca_confidence_intervals
1557 sub c_get_z0 {
1558 my $arr_ref = shift;
1559 my $orig_value = shift;
1560 my $num_less_than_orig = 0;
1561 my $nvalues = 0;
1562 my $z0;
1563 foreach my $value ( @{$arr_ref} ) {
1564 if ( defined $value and $value ne '' ) {
1565 $num_less_than_orig++ if ( $value < $orig_value );
1566 $nvalues ++;
1570 unless ( $nvalues == 0 ) {
1571 if ( ($num_less_than_orig / $nvalues ) == 0 ) {
1572 $z0 = -100;
1573 } elsif ( ($num_less_than_orig / $nvalues ) == 1 ) {
1574 $z0 = 100;
1575 } else {
1576 $z0 = udistr( 1 - ($num_less_than_orig / $nvalues ) );
1579 # return ( $z0, $nvalues );
1580 # print sprintf( "%4s:%5.0f,%4s:%15.8g ",'N:',$num_less_than_orig,'ZO', $z0);
1581 return $z0;
1584 sub c_get_acc {
1585 my $arr_ref = shift;
1586 my $jk_mean = shift;
1587 my $acc_upper = 0;
1588 my $acc_lower = 0;
1589 my $nvalues = 0;
1590 my $acc;
1591 foreach my $value ( @{$arr_ref} ){
1592 if ( defined $value and $value ne '' ) {
1593 $acc_upper = $acc_upper + ($jk_mean-$value)**3;
1594 $acc_lower = $acc_lower + ($jk_mean-$value)**2;
1595 $nvalues ++;
1598 $acc_lower = 6*($acc_lower**(3/2));
1599 unless ( $acc_lower == 0 ) {
1600 $acc = $acc_upper / $acc_lower;
1601 } else {
1602 $acc = $acc_upper / 0.001;
1604 # return ( $acc, $nvalues );
1605 # print sprintf( "%4s:%15.8g%4s%8.5g\n",'ACC', $acc,'JKm', $jk_mean);
1606 return $acc;
1609 sub c_get_alphas {
1610 my $old_alphas = shift;
1611 my $acc = shift;
1612 my $z0 = shift;
1613 my $denom;
1614 my @new_alphas = ();
1615 foreach my $position ( @{$old_alphas} ) {
1616 if ( $position == 0 ){
1617 $denom = -100;
1618 } elsif ( $position == 100 ) {
1619 $denom = 100;
1620 } else {
1621 $denom = $z0 + udistr( 1 - $position/100 );
1623 my $nom = 1 - $acc * $denom;
1624 my $lim = 100*uprob( - ( $z0 + $denom / $nom ) );
1625 push( @new_alphas, $lim );
1627 # print "@new_alphas\n";
1628 return \@new_alphas;
1631 my @limits = sort { $a <=> $b } keys %{$self -> {'confidence_limits'}};
1632 # Add the upper limits
1633 my $limnum = $#limits;
1634 for ( my $i = $limnum; $i >= 0; $i-- ) {
1635 $limits[$i] = $limits[$i]/2;
1636 push( @limits, 100-$limits[$i] );
1638 my ( @bootstrap_array, @jackknife_array, @new_alphas, @z0, @acc );
1639 # Loop the estimates of the first (original) model
1640 for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'}->
1641 [$model_number-1][0]}; $l++ ) {
1642 my ( @unsorted_array1, @unsorted_array2 );
1643 # Loop the bootstrap samples from 1 to get rid of original model
1644 for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'}->
1645 [$model_number-1]}; $k++ ) {
1646 $unsorted_array1[$k-1] =
1647 $self -> {'bootstrap_estimates'}->[$model_number-1][$k][$l];
1649 @{$bootstrap_array[$l]} = sort {$a <=> $b} @unsorted_array1;
1651 # Loop the jackknife samples from 1 to get rid of original model
1652 for ( my $k = 1; $k < scalar @{$self -> {'jackknife_estimates'}->
1653 [$model_number-1]}; $k++ ) {
1654 $unsorted_array2[$k-1] =
1655 $self -> {'jackknife_estimates'}->[$model_number-1][$k][$l];
1657 @{$jackknife_array[$l]} = sort {$a <=> $b} @unsorted_array2;
1658 $z0[$l] = c_get_z0 ( $bootstrap_array[$l],
1659 $self -> {'bootstrap_estimates'} ->
1660 [$model_number-1][0][$l] );
1661 $acc[$l] = c_get_acc ( $jackknife_array[$l],
1662 $self -> {'jackknife_means'} ->
1663 [$model_number-1][0][$l] );
1664 $new_alphas[$l] = c_get_alphas ( \@limits, $acc[$l], $z0[$l] );
1666 # Loop limits
1667 for ( my $lim_idx = 0; $lim_idx <= $#limits; $lim_idx++ ) {
1668 my @percentiles;
1669 # Loop parameters
1670 for ( my $l = 0; $l <= $#bootstrap_array; $l++ ) {
1671 my $limit = $new_alphas[$l][$lim_idx]/100;
1672 my $position = ( scalar @{$bootstrap_array[$l]} + 1 ) * $limit;
1673 my $percentile;
1674 if ( $position < 1 ) {
1675 $percentile = undef;
1676 } elsif ( $position > scalar @{$bootstrap_array[$l]} ) {
1677 $percentile = undef;
1678 } else {
1679 my ($int_med,$frac_med) = split(/\./, $position );
1680 $frac_med = eval("0.".$frac_med);
1681 my $percentile_low = $bootstrap_array[$l][ $int_med - 1];
1682 my $percentile_high = ( $bootstrap_array[$l][ $int_med ] -
1683 $bootstrap_array[$l][ $int_med - 1] ) * $frac_med;
1684 $percentile = $percentile_low + $percentile_high;
1686 push( @percentiles, $percentile );
1688 push( @{$self -> {'bca_confidence_intervals'} -> [$model_number-1]},
1689 \@percentiles );
1690 push( @{$self -> {'bca_confidence_intervals_labels'}->[$model_number-1][0]},
1691 $limits[$lim_idx].'%');
1693 # Check the intervals
1694 for ( my $lim_idx = 0; $lim_idx <= $limnum; $lim_idx++ ) {
1695 my @within_ci;
1696 for ( my $l = 0; $l <= $#bootstrap_array; $l++ ) {
1697 my $lower_limit = $self -> {'bca_confidence_intervals'} ->
1698 [$model_number-1][$lim_idx][$l];
1699 my $upper_limit = $self -> {'bca_confidence_intervals'} ->
1700 [$model_number-1][($limnum*2+1)-$lim_idx][$l];
1701 if ( $self -> {'bca_confidence_intervals_check'} < $upper_limit and
1702 $self -> {'bca_confidence_intervals_check'} > $lower_limit ) {
1703 push( @within_ci , 1 );
1704 } else {
1705 push( @within_ci , 0 );
1708 $self -> {'within_bca_confidence_intervals'} ->
1709 [$model_number-1]{$limits[$lim_idx]*2} = \@within_ci;
1711 $self -> {'bca_confidence_intervals_labels'} -> [$model_number-1][1] =
1712 \@parameter_names;
1714 end calculate_bca_confidence_intervals
1716 # }}} calculate_bca_confidence_intervals
1718 # {{{ calculate_percentile_confidence_intervals
1720 start calculate_percentile_confidence_intervals
1722 # Sort the limits from the inside out
1723 my @limits = sort { $b <=> $a } keys %{$self -> {'confidence_limits'}};
1724 foreach my $limit ( @limits ) {
1725 my ( @lower_limits, @upper_limits, @within_ci );
1726 # Loop the estimates of the first (original) model
1727 for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'}->
1728 [$model_number-1][0]}; $l++ ) {
1729 my @parameter_array;
1730 # Loop the bootstrap samples from 1 to get rid of original model
1731 for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'}->
1732 [$model_number-1]}; $k++ ) {
1733 my $val = $self -> {'bootstrap_estimates'}->[$model_number-1][$k][$l];
1734 # get rid of undefined values (these were probably deleted
1735 # when the bootstrap_estimates was created
1736 push( @parameter_array, $val ) if( defined $val );
1738 my @sorted = sort {$a <=> $b} @parameter_array;
1739 for my $side ( 'lower', 'upper' ) {
1740 my $use_limit = $side eq 'lower' ? $limit/200 : 1-($limit/200);
1741 # percentile postition is:
1742 my $percentile_position = ( $#sorted + 2 ) * $use_limit;
1743 my $percentile;
1744 if ( $percentile_position < 1 ) {
1745 $percentile = undef;
1746 } elsif ( $percentile_position > $#sorted +1) {
1747 $percentile = undef;
1748 } else {
1749 my ($int_med,$frac_med) = split(/\./, $percentile_position );
1750 $frac_med = eval("0.".$frac_med);
1751 my $percentile_low = $sorted[ $int_med - 1];
1752 my $percentile_high = ( $sorted[ $int_med ] - $sorted[ $int_med - 1] ) * $frac_med;
1753 $percentile = $percentile_low + $percentile_high;
1755 push( @lower_limits, $percentile ) if ( $side eq 'lower' );
1756 push( @upper_limits, $percentile ) if ( $side eq 'upper' );
1758 if ( $self -> {'percentile_confidence_intervals_check'} < $upper_limits[$#upper_limits] and
1759 $self -> {'percentile_confidence_intervals_check'} > $lower_limits[$#lower_limits] ) {
1760 push( @within_ci , 1 );
1761 } else {
1762 push( @within_ci , 0 );
1765 unshift( @{$self -> {'percentile_confidence_intervals'} ->
1766 [$model_number-1]}, \@lower_limits );
1767 push( @{$self -> {'percentile_confidence_intervals'} ->
1768 [$model_number-1]}, \@upper_limits );
1769 unshift( @{$self -> {'percentile_confidence_intervals_labels'}->
1770 [$model_number-1][0]}, ($limit/2).'%' );
1771 push( @{$self -> {'percentile_confidence_intervals_labels'}->
1772 [$model_number-1][0]},(100-($limit/2)).'%');
1773 $self -> {'within_percentiles'}->[$model_number-1]{$limit}=\@within_ci;
1775 $self -> {'percentile_confidence_intervals_labels'} ->
1776 [$model_number-1][1] = \@parameter_names;
1778 end calculate_percentile_confidence_intervals
1780 # }}} calculate_percentile_confidence_intervals
1782 # {{{ modelfit_post_fork_analyze
1784 start modelfit_post_fork_analyze
1785 end modelfit_post_fork_analyze
1787 # }}}
1789 # {{{ resample
1791 start resample
1793 my $dataObj = $model -> datas -> [0];
1794 for( my $i = 1; $i <= $self -> {'samples'}; $i++ ) {
1795 my ($bs_dir, $bs_name) = OSspecific::absolute_path( $self -> {'directory'}, "bs$i.dta" );
1796 my $new_name = $bs_dir . $bs_name;
1797 my $boot_sample = $dataObj -> resample( 'subjects' => $self -> {'subjects'},
1798 'new_name' => $new_name,
1799 'target' => $target );
1800 my $newmodel = $model -> copy( filename => "bs$i.mod",
1801 target => $target,
1802 ignore_missing_files => 1 );
1803 $newmodel -> datafiles( new_names => ["bs$i.dta"] );
1804 $newmodel -> datas -> [0] = $boot_sample ;
1805 $newmodel -> write;
1806 push( @resample_models, $newmodel );
1809 end resample
1811 # }}} resample
1813 # {{{ _sampleTools
1815 start _sampleTools
1817 foreach my $tool ( @{$self -> {'tools'}} ) {
1818 my @models = @{$tool -> models};
1819 foreach my $model (@models){
1820 my $dataObj = $model -> datas -> [0];
1821 for( my $i = 1; $i <= $samples; $i++ ) {
1822 my $boot_sample = $dataObj -> resample( 'subjects' => $self -> {'subjects'},
1823 'new_name' => "bs$i.dta",
1824 'target' => $target );
1825 my $newmodel;
1826 $newmodel = $model -> copy( filename => "bs$i.mod" );
1827 $newmodel -> datafiles( new_names => ["bs$i.dta"] );
1828 $newmodel -> datas -> [0] = $boot_sample ;
1829 $newmodel -> write;
1830 if( defined( $tool -> models ) ){
1831 push( @{$tool -> models}, $newmodel );
1832 } else {
1833 $tool -> models( [ $newmodel ] );
1839 end _sampleTools
1841 # }}} _sampleTools
1843 # {{{ create_matlab_scripts
1845 start create_matlab_scripts
1847 if( defined $PsN::lib_dir ){
1848 unless( -e $PsN::lib_dir . '/histograms.m' and
1849 -e $PsN::lib_dir . '/bca.m' ){
1850 'debug' -> die( message => 'Bootstrap matlab template scripts are not installed, no matlab scripts will be generated.' );
1851 return;
1854 open( PROF, $PsN::lib_dir . '/histograms.m' );
1855 my @file = <PROF>;
1856 close( PROF );
1857 my $found_code;
1858 my $code_area_start=0;
1859 my $code_area_end=0;
1862 for(my $i = 0;$i < scalar(@file); $i++) {
1863 if( $file[$i] =~ /% ---------- Autogenerated code below ----------/ ){
1864 $found_code = 1;
1865 $code_area_start = $i;
1867 if( $file[$i] =~ /% ---------- End autogenerated code ----------/ ){
1868 unless( $found_code ){
1869 'debug' -> die ( message => 'Bootstrap matlab template script is malformated, no matlab scripts will be generated' );
1870 return;
1872 $code_area_end = $i;
1876 my @auto_code;
1877 if( $self -> {'type'} eq 'bca' ){
1878 push( @auto_code, "use_bca = 1; % Was a BCa-type of\n" );
1879 } else {
1880 push( @auto_code, "use_bca = 0; % Was a BCa-type of\n" );
1883 push( @auto_code, " % bootstrap run?\n" );
1884 if( ref $self -> {'samples'} eq 'ARRAY' ) {
1885 push( @auto_code, "bs_samples = ".$self -> {'samples'}->[0][0]."; % Number of bootstrap samples\n" );
1886 } else {
1887 push( @auto_code, "bs_samples = ".$self -> {'samples'}."; % Number of bootstrap samples\n" );
1889 if( $self -> {'type'} eq 'bca' ){
1890 my $ninds = $self -> models -> [0]
1891 -> datas -> [0] -> count_ind;
1892 push( @auto_code, "jk_samples = $ninds; % Number of (BCa) jackknife samples\n\n" );
1895 push( @auto_code, "col_names = { 'Significant Digits',\n" );
1896 push( @auto_code, " 'Condition Number',\n" );
1897 push( @auto_code, " 'OFV',\n" );
1899 my $nps = $self -> {'models'} -> [0] -> nomegas -> [0];
1901 my %param_names;
1902 my( @par_names, @se_names, @np_names, @sh_names );
1903 foreach my $param ( 'theta','omega','sigma' ) {
1904 my $labels = $self -> {'models'} -> [0] -> labels( parameter_type => $param );
1905 if ( defined $labels ){
1906 foreach my $label ( @{$labels -> [0]} ){
1907 push( @par_names, " '",$label,"',\n" );
1908 push( @se_names, " '",'se-'.$label,"',\n" );
1913 for( my $i = 1; $i <= ($nps*($nps+1)/2); $i++ ) {
1914 push( @np_names, " '",'np-om'.$i,"',\n" );
1917 for( my $i = 1; $i <= $nps; $i++ ) {
1918 push( @sh_names, " '",'shrinkage-eta'.$i,"',\n" );
1921 push( @sh_names, " '",'shrinkage-iwres',"'\n" );
1923 # NP not used for now
1925 push( @auto_code,(@par_names, @se_names, @sh_names));
1926 # push( @auto_code,(@par_names, @se_names, @np_names, @sh_names));
1927 push( @auto_code, " };\n\n" );
1929 my @np_columns = (0) x ($nps*($nps+1)/2);
1930 my @sh_columns = (0) x ($nps+1);
1932 if( $self -> {'type'} eq 'bca' ){
1933 push( @auto_code, "fixed_columns = [ 0, 0, 0, " );
1934 } else {
1935 push( @auto_code, "fixed_columns = [ 0, 0, 0, " );
1937 my ( @fixed_columns, @same_columns, @adjust_axes );
1938 foreach my $param ( 'theta','omega','sigma' ) {
1939 my $fixed = $self -> {'models'} -> [0] -> fixed( parameter_type => $param );
1941 if ( defined $fixed ){
1942 push( @fixed_columns, @{$fixed -> [0]} );
1943 if( $param eq 'theta' ) {
1944 push( @same_columns, (0) x scalar( @{$fixed -> [0]} ) );
1949 @adjust_axes = (1) x ( ($#fixed_columns + 1) * 2 +
1950 $#sh_columns + 1 );
1951 # $#np_columns + $#sh_columns + 2 );
1953 push( @auto_code , join( ', ' , @fixed_columns).', '.
1954 join( ', ' , @fixed_columns).', '.
1955 # join( ', ' , @np_columns).', '.
1956 join( ', ' , @sh_columns)."];\n\n" );
1958 if( $self -> {'type'} eq 'bca' ){
1959 push( @auto_code, "same_columns = [ 0, 0, 0, " );
1960 } else {
1961 push( @auto_code, "same_columns = [ 0, 0, 0, " );
1963 foreach my $param ( 'omegas','sigmas' ) {
1964 my $parameters = $self -> {'models'} -> [0] -> problems -> [0] -> $param;
1965 foreach my $parameter ( @{$parameters} ){
1966 if( $parameter -> same() ){
1967 push( @same_columns, (1) x $parameter -> size() );
1968 } else {
1969 push( @same_columns, (0) x scalar @{$parameter -> options} );
1973 push( @auto_code , join( ', ' , @same_columns ).', '.
1974 join( ', ' , @same_columns).', '.
1975 # join( ', ' , @np_columns).', '.
1976 join( ', ' , @sh_columns)."];\n\n" );
1978 push( @auto_code , "adjust_axes = [ 1, 1, 1, ".join( ', ' , @adjust_axes)."];\n\n" );
1980 # push( @auto_code , 'npomegas = '.($nps*($nps+1)/2).";\n\n" );
1981 push( @auto_code , "npomegas = 0;\n\n" );
1984 push( @auto_code, "minimization_successful_col = 5; % Column number for the\n" );
1985 push( @auto_code, " % minimization sucessful flag\n" );
1986 push( @auto_code, "covariance_step_successful_col = 6; % As above for cov-step warnings\n" );
1987 push( @auto_code, "covariance_step_warnings_col = 7; % etc\n" );
1988 push( @auto_code, "estimate_near_boundary_col = 8; % etc\n" );
1990 push( @auto_code, "not_data_cols = 13; % Number of columns in the\n" );
1991 push( @auto_code, " % beginning that are not\n" );
1992 push( @auto_code, " % parameter estimates.\n" );
1994 push( @auto_code, "filename = 'raw_results_matlab.csv';\n" );
1996 splice( @file, $code_area_start, ($code_area_end - $code_area_start), @auto_code );
1997 open( OUTFILE, ">", $self -> {'directory'} . "/histograms.m" );
1998 print OUTFILE "addpath " . $PsN::lib_dir . ";\n";
1999 print OUTFILE @file ;
2000 close OUTFILE;
2002 open( OUTFILE, ">", $self -> {'directory'} . "/raw_results_matlab.csv" );
2003 for( my $i = 0; $i < scalar ( @{$self -> {'raw_results'} -> [0]} ); $i ++ ){
2004 # $self -> {'raw_results'} -> [0] -> [$i][0] =
2005 # $self -> {'raw_results'} -> [0] -> [$i][0] eq 'bootstrap' ?
2006 # 1 : $self -> {'raw_results'} -> [0] -> [$i][0];
2007 # $self -> {'raw_results'} -> [0] -> [$i][0] =
2008 # $self -> {'raw_results'} -> [0] -> [$i][0] eq 'jackknife' ?
2009 # 2 : $self -> {'raw_results'} -> [0] -> [$i][0];
2010 map( $_ = $_ eq 'NA' ? 'NaN' : $_, @{$self -> {'raw_results'} -> [0] -> [$i]} );
2011 map( $_ = not( defined $_ ) ? 'NaN' : $_, @{$self -> {'raw_results'} -> [0] -> [$i]} );
2012 print OUTFILE join( ',', @{$self -> {'raw_results'} -> [0] -> [$i]} ), "\n";
2014 close OUTFILE;
2016 } else {
2017 'debug' -> die( message => 'matlab_dir not configured, no matlab scripts will be generated.');
2018 return;
2021 end create_matlab_scripts
2023 # }}}
2025 # {{{ create_R_scripts
2026 start create_R_scripts
2028 unless( -e $PsN::lib_dir . '/R-scripts/bootstrap.R' ){
2029 'debug' -> die( message => 'Bootstrap R-script are not installed, no R-script will be generated.' );
2030 return;
2032 cp ( $PsN::lib_dir . '/R-scripts/bootstrap.R', $self -> {'directory'} );
2034 end create_R_scripts
2035 # }}}