bootstrap memleak plugged
[PsN.git] / lib / tool / bootstrap_subs.pm
blobc5a19af07d66e02e861012d75c071ba6ac77b227
1 # {{{ include
3 start include statements
4 use strict;
5 use File::Copy 'cp';
6 use data;
7 use OSspecific;
8 use tool::llp;
9 use tool::cdd::jackknife;
10 use ext::Statistics::Distributions 'udistr', 'uprob';
11 use Math::Random;
12 use Data::Dumper;
13 end include
15 # }}} include statements
17 # {{{ new
19 start new
21 foreach my $attribute ( 'logfile', 'raw_results_file','raw_nonp_file' ) {
22 if ( not( ref($this -> {$attribute}) eq 'ARRAY' or
23 ref($this -> {$attribute}) eq 'HASH' ) ) {
24 my $tmp = $this -> {$attribute};
25 $this -> {$attribute} = [];
26 for ( my $i = 1; $i <= scalar @{$this -> {'models'}}; $i++ ) {
27 my $name = $tmp;
28 if ( $name =~ /\./ ) {
29 $name =~ s/\./$i\./;
30 } else {
31 $name = $name.$i;
33 my $ldir;
34 ( $ldir, $name ) =
35 OSspecific::absolute_path( $this -> {'directory'}, $name );
36 push ( @{$this -> {$attribute}}, $ldir.$name ) ;
41 end new
43 # }}} new
45 # {{{ general_pre_fork_setup
47 start general_pre_fork_setup
49 # These attributes can be given as a
50 # 1. A scalar : used for all models and problems
51 # 2. A 1-dim. array : specified per problem but same for all models
52 # 3. A 2-dim. array : specified per problem and model
54 foreach my $attribute_string( 'stratify_on', 'samples', 'subjects' ){
55 my $attribute = $self -> {$attribute_string};
56 if ( defined $attribute ) {
57 # If stratification is given we check at what level it is
58 # given.
59 unless ( ref( \$attribute ) eq 'SCALAR' or
60 ( ref( $attribute ) eq 'ARRAY' and scalar @{$attribute} > 0 ) ) {
61 # Here we know its neither scalar or a correct array. But we
62 # seem to assume the lenght be equal to the number of
63 # models.
64 debug -> die( message => "attribute $attribute is " .
65 "defined as a " . ref( $attribute ) .
66 "and is neither a scalar or a non-zero size array" );
67 } elsif ( ref( \$attribute ) eq 'SCALAR' ) {
68 # If it is a scalar we copy the scalar value into an array
69 # for each model, with one value for each problem.
70 my @mo_attribute = ();
71 foreach my $model ( @{$self -> {'models'}} ) {
72 my @pr_attribute = ();
73 foreach my $problem ( @{$model -> problems} ) {
74 push( @pr_attribute, $attribute );
76 push( @mo_attribute, \@pr_attribute );
78 $self -> {$attribute_string} = \@mo_attribute;
79 } elsif ( ref( $attribute ) eq 'ARRAY' ) {
80 # If it is an array we check if the it is an array of
81 # scalars or of arrays. If it is of arrays we seem to
82 # assume that the arrays have a length the matches the
83 # number of problems. It it is an array of scalars, we
84 # copy that scalar into arrays for each problem.
85 unless ( ref( \$attribute -> [0] ) eq 'SCALAR' or
86 ( ref( $attribute -> [0] ) eq 'ARRAY' and scalar @{$attribute -> [0]} > 0 ) ) {
87 debug -> die( message => "attribute $attribute is ",
88 "defined as a ",ref( $attribute -> [0] ),
89 "and is neither a scalar or a non-zero size array" );
90 } elsif ( ref(\$attribute -> [0]) eq 'SCALAR' ) {
91 my @mo_attribute = ();
92 foreach my $model ( @{$self -> {'models'}} ) {
93 push( @mo_attribute, $attribute );
95 $self -> {$attribute_string} = \@mo_attribute;
98 } else {
99 # If stratify is not defined we copy an undefined value to
100 # arrays corresponding to models and subproblem. I wonder if
101 # this is really necessary.
102 my @mo_attribute = ();
103 foreach my $model ( @{$self -> {'models'}} ) {
104 my @pr_attribute = ();
105 foreach my $data ( @{$model -> datas} ) {
106 if( $attribute_string eq 'stratify_on' ){
107 push( @pr_attribute, undef );
108 } elsif ( $attribute_string eq 'samples' ){
109 push( @pr_attribute, 200 );
110 } elsif ( $attribute_string eq 'subjects' ){
111 push( @pr_attribute, $data -> count_ind );
114 push( @mo_attribute, \@pr_attribute );
116 $self -> {$attribute_string} = \@mo_attribute;
120 end general_pre_fork_setup
122 # }}}
124 # {{{ modelfit_pre_fork_setup
126 start modelfit_pre_fork_setup
128 $self -> general_pre_fork_setup( model_number => $model_number );
130 end modelfit_pre_fork_setup
132 # }}}
134 # {{{ llp_pre_fork_setup
136 start llp_pre_fork_setup
138 $self -> general_pre_fork_setup( model_number => $model_number );
140 end llp_pre_fork_setup
142 # }}}
144 # {{{ modelfit_setup
146 start modelfit_setup
149 my $subm_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
150 $self -> {'threads'} -> [1]:$self -> {'threads'};
151 $self -> general_setup( model_number => $model_number,
152 class => 'tool::modelfit',
153 subm_threads => $subm_threads );
155 end modelfit_setup
157 # }}}
159 # {{{ general_setup
161 start general_setup
164 # If the number of threads are given per tool, e.g. [2,5] meaning 2 threads for
165 # scm and 5 for the modelfit.
166 my $subm_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
167 $self -> {'threads'} -> [1]:$self -> {'threads'};
168 my $own_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
169 $self -> {'threads'} -> [0]:$self -> {'threads'};
170 # More threads than models?
171 my $num = scalar @{$self -> {'models'}};
172 $own_threads = $num if ( $own_threads > $num );
174 # Sub tool threads can be given as scalar or reference to an array?
175 #my $subm_threads = $parm{'subm_threads'};
176 #my $own_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
177 # $self -> {'threads'} -> [0]:$self -> {'threads'};
178 # Group variable names are matched in the model, not the data!
179 my $model = $self -> {'models'} -> [$model_number-1];
180 my @samples = @{$self -> {'samples'} -> [$model_number-1]};
181 my @stratify_on = @{$self -> {'stratify_on'} -> [$model_number-1]};
182 my @subjects = @{$self -> {'subjects'} -> [$model_number-1]};
184 # Check which models that hasn't been run and run them This
185 # will be performed each step but will only result in running
186 # models at the first step, if at all.
188 # If more than one process is used, there is a VERY high risk
189 # of interaction between the processes when creating
190 # directories for model fits. Therefore the {'directory'}
191 # attribute is given explicitly below.
193 # ------------------------ Run original run -------------------------------
195 # {{{ orig run
197 unless ( $model -> is_run ) {
198 my %subargs = ();
199 if ( defined $self -> {'subtool_arguments'} ) {
200 %subargs = %{$self -> {'subtool_arguments'}};
203 if( $self -> {'nonparametric_etas'} or
204 $self -> {'nonparametric_marginals'} ) {
205 $model -> add_nonparametric_code;
208 my $orig_fit = tool::modelfit ->
209 new( reference_object => $self,
210 base_directory => $self -> {'directory'},
211 directory => $self -> {'directory'}.
212 '/orig_modelfit_dir'.$model_number,
213 models => [$model],
214 threads => $subm_threads,
215 parent_threads => $own_threads,
216 parent_tool_id => $self -> {'tool_id'},
217 logfile => undef,
218 raw_results => undef,
219 prepared_models => undef,
220 %subargs );
222 ui -> print( category => 'bootstrap',
223 message => 'Executing base model.' );
225 $orig_fit -> run;
229 my $output = $model -> outputs -> [0];
231 # }}} orig run
233 # ------------------------ Print a log-header -----------------------------
235 # {{{ log header
237 my @orig_datas = @{$model -> datas};
238 my @problems = @{$model -> problems};
239 my @new_models;
241 # Print a log-header
242 # Lasse 2005-04-21: The minimization_message print will probably not work anymore
243 open( LOG, ">>".$self -> {'logfile'}[$model_number-1] );
244 my $ui_text = sprintf("%-5s",'RUN').','.sprintf("%20s",'FILENAME ').',';
245 print LOG sprintf("%-5s",'RUN'),',',sprintf("%20s",'FILENAME '),',';
246 foreach my $param ( 'ofv', 'minimization_message', 'covariance_step_successful' ) {
247 my $orig_ests = $model -> outputs -> [0] -> $param;
248 # Loop the problems
249 for ( my $j = 0; $j < scalar @{$orig_ests}; $j++ ) {
250 if ( ref( $orig_ests -> [$j][0] ) ne 'ARRAY' ) {
251 my $label = uc($param)."_".($j+1);
252 $ui_text = $ui_text.sprintf("%12s",$label).',';
253 print LOG sprintf("%12s",$label),',';
254 } else {
255 # Loop the parameter numbers (skip sub problem level)
256 for ( my $num = 1; $num <= scalar @{$orig_ests -> [$j][0]}; $num++ ) {
257 my $label = uc($param).$num."_".($j+1);
258 $ui_text = $ui_text.sprintf("%12s",$label).',';
259 print LOG sprintf("%12s",$label),',';
264 print LOG "\n";
266 # }}} log header
268 # ------------------------ Log original run -------------------------------
270 # {{{ Log original run
272 # Lasse 2005-04-21: The minimization_message print will probably not work anymore
273 open( LOG, ">>".$self -> {'logfile'}[$model_number-1] );
274 $ui_text = sprintf("%5s",'0').','.sprintf("%20s",$model -> filename).',';
275 print LOG sprintf("%5s",'0'),',',sprintf("%20s",$model -> filename),',';
276 foreach my $param ( 'ofv', 'minimization_message', 'covariance_step_successful' ) {
277 my $orig_ests = $model -> outputs -> [0] -> $param;
278 # Loop the problems
279 for ( my $j = 0; $j < scalar @{$orig_ests}; $j++ ) {
280 if ( ref( $orig_ests -> [$j][0] ) ne 'ARRAY' ) {
281 $orig_ests -> [$j][0] =~ s/\n//g;
282 $ui_text = $ui_text.sprintf("%12s",$orig_ests -> [$j][0]).',';
283 print LOG sprintf("%12s",$orig_ests -> [$j][0]),',';
284 } else {
285 # Loop the parameter numbers (skip sub problem level)
286 for ( my $num = 0; $num < scalar @{$orig_ests -> [$j][0]}; $num++ ) {
287 $ui_text = $ui_text.sprintf("%12f",$orig_ests -> [$j][0][$num]).',';
288 print LOG sprintf("%12f",$orig_ests -> [$j][0][$num]),',';
293 print LOG "\n";
295 # }}} Log original run
297 # TODO: In this loop we loose one dimension (problem) in that
298 # all new models with new data sets are put in the same array,
299 # regardless of which problem the initially belonged to. Fix
300 # this.
302 if ( $#orig_datas < 0 ) {
303 debug -> warn( level => 1,
304 message => "No data files to resample from" );
305 } else {
306 debug -> warn( level => 1,
307 message => "Starting bootstrap sampling" );
309 for ( my $i = 1; $i <= scalar @orig_datas; $i++ ) {
310 my $orig_data = $orig_datas[$i-1];
312 if ( $self -> {'drop_dropped'} ) {
313 my $model_copy = $model -> copy( copy_data => 1,
314 filename => "drop_copy_$i.mod",
315 directory => $self -> {'directory'} );
317 $model_copy -> drop_dropped;
318 $model_copy -> _write( write_data => 1 );
319 $model_copy -> datas -> [0] -> flush();
320 $orig_data = $model -> datas -> [0];
321 $model = $model_copy;
324 my ( @seed, $new_datas, $incl_ids, $incl_keys, $new_mod );
326 my $done = ( -e $self -> {'directory'}."/m$model_number/done.$i" ) ? 1 : 0;
328 if ( not $done ) {
329 ui -> print( category => 'bootstrap',
330 message => "Resampling from ".$orig_data -> filename );
332 ( $new_datas, $incl_ids, $incl_keys )
333 = $orig_data -> bootstrap( directory => $self -> {'directory'}.'/m'.$model_number,
334 name_stub => 'bs_pr'.$i,
335 samples => $samples[$i-1],
336 subjects => $subjects[$i-1],
337 stratify_on => $stratify_on[$i-1],
338 target => 'disk',
339 model_ids => $self -> {'prepared_model_ids'} );
342 for ( my $j = 0; $j < $samples[$i-1]; $j++ ) {
343 my ($model_dir, $filename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.$model_number,
344 'bs_pr'.$i.'_'.($j+1).'.mod' );
345 # my ($out_dir, $outfilename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.$model_number ,
346 # '/bs_pr'.$i.'_'.$j.'.lst' );
347 my $prob_copy = Storable::dclone($problems[$i-1]);
348 # $Data::Dumper::Maxdepth = 2;
349 # print Dumper $model; die;
351 $new_mod = model ->
352 new(reference_object => $model,
353 sde => 0,
354 outputs => undef,
355 datas => undef,
356 synced => undef,
357 problems => undef,
358 active_problems => undef,
359 directory => $model_dir,
360 filename => $filename,
361 outputfile => undef,
362 problems => [$prob_copy],
363 extra_files => $model -> extra_files,
364 target => 'disk',
365 ignore_missing_files => 1 );
367 if( $self -> {'shrinkage'} ) {
368 $new_mod -> shrinkage_stats( enabled => 1 );
369 my @problems = @{$new_mod -> problems};
370 for( my $i = 1; $i <= scalar @problems; $i++ ) {
371 $problems[ $i-1 ] -> shrinkage_module -> model( $new_mod );
375 my $model_id = $new_mod -> register_in_database;
376 push( @{$self -> {'prepared_model_ids'}}, $model_id );
378 $self -> register_tm_relation( model_ids => [$model_id],
379 prepared_models => 1 );
381 $new_mod -> datas ( [$new_datas -> [$j]] );
383 if( $self -> {'nonparametric_etas'} or
384 $self -> {'nonparametric_marginals'} ) {
385 $new_mod -> add_nonparametric_code;
388 $new_mod -> update_inits( from_output => $output );
389 $new_mod -> _write;
391 push( @new_models, $new_mod );
393 # Create a checkpoint. Log the samples and individuals.
394 open( DONE, ">".$self -> {'directory'}."/m$model_number/done.$i" ) ;
395 print DONE "Resampling from ",$orig_data -> filename, " performed\n";
396 print DONE "$samples[$i-1] samples\n";
397 print DONE "$subjects[$i-1] subjects\n";
398 print DONE "Included individuals:\n";
399 @seed = random_get_seed;
400 print DONE "seed: @seed\n";
401 for( my $k = 0; $k < scalar @{$incl_ids}; $k++ ) {
402 print DONE join(',',@{$incl_ids -> [$k]}),"\n";
404 print DONE "Included keys:\n";
405 for( my $k = 0; $k < scalar @{$incl_keys}; $k++ ) {
406 print DONE join(',',@{$incl_keys -> [$k]}),"\n";
408 close( DONE );
409 open( INCL, ">".$self -> {'directory'}."included_individuals".$model_number.".csv" ) ;
410 for( my $k = 0; $k < scalar @{$incl_ids}; $k++ ) {
411 print INCL join(',',@{$incl_ids -> [$k]}),"\n";
413 close( INCL );
414 open( KEYS, ">".$self -> {'directory'}."included_keys".$model_number.".csv" ) ;
415 open( SAMPLEKEYS, ">".$self -> {'directory'}."sample_keys".$model_number.".csv" ) ;
416 for( my $k = 0; $k < scalar @{$incl_keys}; $k++ ) {
417 my %sample_keys;
418 my $sample_size = scalar @{$incl_keys -> [$k]};
419 for ( my $l = 0; $l < $sample_size; $l++ ) {
420 $sample_keys{$incl_keys -> [$k][$l]}++;
422 for ( my $l = 0; $l < $sample_size; $l++ ) {
423 my $val = defined $sample_keys{$l} ? $sample_keys{$l} : 0;
424 my $extra = ($l == ($sample_size-1)) ? "\n" : ',';
425 print SAMPLEKEYS $val,$extra;
427 print KEYS join(',',@{$incl_keys -> [$k]}),"\n";
429 close( KEYS );
430 close( SAMPLEKEYS );
431 } else {
432 ui -> print( category => 'bootstrap',
433 message => "Recreating samples ".$orig_data -> filename." from previous run." );
435 # Recreate the datasets and models from a checkpoint
436 my ($stored_filename, $stored_samples, $stored_subjects);
437 my @seed;
438 my ($stored_filename_found, $stored_samples_found, $stored_subjects_found, $stored_seed_found);
439 open( DONE, $self -> {'directory'}."/m$model_number/done.$i" );
440 while( <DONE> ){
441 if( /^Resampling from (.+) performed$/ ){
442 $stored_filename = $1;
443 $stored_filename_found = 1;
444 next;
446 if( /^(\d+) samples$/ ){
447 $stored_samples = $1;
448 $stored_samples_found = 1;
449 next;
451 if( /^(\d+) subjects$/ ){
452 $stored_subjects = $1;
453 $stored_subjects_found = 1;
454 next;
456 if( /^seed: (\d+) (\d+)$/ ){
457 @seed = ($1, $2);
458 $stored_seed_found = 1;
459 next;
462 if( $stored_filename_found and $stored_samples_found
463 and $stored_subjects_found and $stored_seed_found ){
464 last;
467 close( DONE );
468 unless( $stored_filename_found and $stored_samples_found
469 and $stored_samples_found and $stored_seed_found ){
470 debug -> die( level => 1,
471 message => "The bootstrap/m1/done file could not be parsed." );
474 if ( $stored_samples < $samples[$i-1] ) {
475 debug -> die( message => "The number of samples saved in previous run ($stored_samples) ".
476 "are bigger than the number of samples specified for this run (".
477 $samples[$i-1].")" );
479 if ( $stored_subjects != $subjects[$i-1] ) {
480 debug -> die( message => "The number of individuals saved in previous run (".
481 "$stored_subjects) does not match the number of individuals specified ".
482 "for this run (".$subjects[$i-1].")" );
484 # my ( @stored_ids, @stored_keys );
485 # for ( my $k = 4; $k < 4+$stored_samples; $k++ ) {
486 # chomp($rows[$k]);
487 # my @sample_ids = split(',', $rows[$k] );
488 # push( @stored_ids, \@sample_ids );
490 # for ( my $k = 5+$stored_samples; $k < 5+2*$stored_samples; $k++ ) {
491 # chomp($rows[$k]);
492 # my @sample_keys = split(',', $rows[$k] );
493 # push( @stored_keys, \@sample_keys );
495 # @seed = split(' ',$rows[5+2*$stored_samples]);
496 # $incl_ids = \@stored_ids;
497 # $incl_keys = \@stored_keys;
498 # shift( @seed ); # get rid of 'seed'-word
499 # Reinitiate the model objects
500 for ( my $j = 1; $j <= $stored_samples; $j++ ) {
501 my ($model_dir, $filename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.
502 $model_number,
503 'bs_pr'.$i.'_'.$j.'.mod' );
504 # my ($out_dir, $outfilename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.
505 # $model_number,
506 # '/bs_pr'.$i.'_'.$j.'.lst' );
507 $new_mod = model ->
508 new( directory => $model_dir,
509 filename => $filename,
510 # outputfile => $outfilename,
511 extra_files => $model -> extra_files,
512 target => 'disk',
513 ignore_missing_files => 1 );
514 push( @new_models, $new_mod );
516 random_set_seed( @seed );
517 ui -> print( category => 'bootstrap',
518 message => "Using $stored_samples previously resampled bootstrap sets of".
519 " of $stored_subjects individuals from $stored_filename" )
520 unless $self -> {'parent_threads'} > 1;
522 # push( @{$self -> {'included_inds'} -> [$model_number-1]}, $incl_ids );
523 # push( @{$self -> {'included_keys'} -> [$model_number-1]}, $incl_keys );
525 $self -> {'prepared_models'}[$model_number-1]{'own'} = \@new_models;
527 # --------------------- Create the sub tools ------------------------------
529 # {{{ sub tools
531 my $subdir = $class;
532 $subdir =~ s/tool:://;
533 my @subtools = @{$self -> {'subtools'}};
534 shift( @subtools );
535 my %subargs = ();
536 if ( defined $self -> {'subtool_arguments'} ) {
537 %subargs = %{$self -> {'subtool_arguments'}};
539 push( @{$self -> {'tools'}},
540 $class ->
541 new( reference_object => $self,
542 models => \@new_models,
543 threads => $subm_threads,
544 directory => $self -> {'directory'}.'/'.$subdir.'_dir'.$model_number,
545 _raw_results_callback => $self ->
546 _modelfit_raw_results_callback( model_number => $model_number ),
547 subtools => \@subtools,
548 parent_threads => $own_threads,
549 parent_tool_id => $self -> {'tool_id'},
550 logfile => $self -> {'logfile'}[$model_number-1],
551 raw_results => undef,
552 prepared_models => undef,
553 %subargs ) );
555 # ( clean => $self -> {'clean'},
556 # base_directory => $self -> {'directory'},
557 # compress => $self -> {'compress'},
558 # directory => $self -> {'directory'}.'/'.$subdir.'_dir'.$model_number,
559 # drop_dropped => $self -> {'drop_dropped'},
560 # wrap_data => $self -> {'wrap_data'},
561 # run_on_nordugrid => $self -> {'run_on_nordugrid'},
562 # cpu_time => $self -> {'cpu_time'},
563 # run_on_lsf => $self -> {'run_on_lsf'},
564 # no_remote_execution => $self -> {'no_remote_execution'},
565 # no_remote_compile => $self -> {'no_remote_compile'},
566 # lsf_queue => $self -> {'lsf_queue'},
567 # lsf_options => $self -> {'lsf_options'},
568 # lsf_job_name => $self -> {'lsf_job_name'},
569 # lsf_project_name => $self -> {'lsf_project_name'},
571 # parent_tool_id => $self -> {'tool_id'},
573 # models => \@new_models,
574 # grid_batch_size => $self -> {'grid_batch_size'},
575 # nm_version => $self -> {'nm_version'},
576 # picky => $self -> {'picky'},
577 # retries => $self -> {'retries'},
578 # tweak_inits => $self -> {'tweak_inits'},
579 # handle_maxevals => $self -> {'handle_maxevals'},
580 # remove_temp_files => $self -> {'remove_temp_files'},
581 # _raw_results_callback => $self ->
582 # _modelfit_raw_results_callback( model_number => $model_number ),
583 # threads => $subm_threads,
584 # subtools => \@subtools,
585 # logfile => $self -> {'logfile'}[$model_number-1],
586 # parent_threads => $own_threads,
587 # %subargs ) );
589 # }}} sub tools
592 end general_setup
594 # }}}
596 # {{{ llp_setup
597 start llp_setup
599 my @subm_threads;
600 if (ref( $self -> {'threads'} ) eq 'ARRAY') {
601 @subm_threads = @{$self -> {'threads'}};
602 unshift(@subm_threads);
603 } else {
604 @subm_threads = ($self -> {'threads'});
606 $self -> general_setup( model_number => $model_number,
607 class => 'tool::llp',
608 subm_threads => \@subm_threads );
610 end llp_setup
611 # }}} llp_setup
613 # {{{ _jackknife_raw_results_callback
615 start _jackknife_raw_results_callback
617 # Use the bootstrap's raw_results file.
618 my ($dir,$file) =
619 OSspecific::absolute_path( $self -> {'directory'},
620 $self -> {'raw_results_file'}[$model_number-1] );
621 my ($dir,$nonp_file) =
622 OSspecific::absolute_path( $self -> {'directory'},
623 $self -> {'raw_nonp_file'}[$model_number-1] );
624 $subroutine = sub {
625 my $jackknife = shift;
626 my $modelfit = shift;
627 $modelfit -> raw_results_file( $dir.$file );
628 $modelfit -> raw_nonp_file( $dir.$nonp_file );
629 $modelfit -> raw_results_append( 1 );
630 my ( @new_header, %param_names );
631 foreach my $row ( @{$modelfit -> {'raw_results'}} ) {
632 unshift( @{$row}, 'jackknife' );
634 $modelfit -> {'raw_results_header'} = [];
636 return $subroutine;
638 end _jackknife_raw_results_callback
640 # }}} _jackknife_raw_results_callback
642 # {{{ _modelfit_raw_results_callback
644 start _modelfit_raw_results_callback
646 # Use the bootstrap's raw_results file.
647 my ($dir,$file) =
648 OSspecific::absolute_path( $self -> {'directory'},
649 $self -> {'raw_results_file'}[$model_number-1] );
650 my ($dir,$nonp_file) =
651 OSspecific::absolute_path( $self -> {'directory'},
652 $self -> {'raw_nonp_file'}[$model_number-1] );
653 my $orig_mod = $self -> {'models'}[$model_number-1];
654 my $type = $self -> {'type'};
655 $subroutine = sub {
656 my $modelfit = shift;
657 my $mh_ref = shift;
658 my %max_hash = %{$mh_ref};
659 $modelfit -> raw_results_file( $dir.$file );
660 $modelfit -> raw_nonp_file( $dir.$nonp_file );
662 # The prepare_raw_results in the modelfit will fix the
663 # raw_results for each bootstrap sample model, we must add
664 # the result for the original model.
666 my ($raw_results_row,$row_structure) = $self -> create_raw_results_rows( max_hash => $mh_ref,
667 model => $orig_mod );
669 unshift( @{$modelfit -> {'raw_results'}}, @{$raw_results_row} );
671 # {{{ New header
673 my ( @new_header, %param_names );
674 my @params = ( 'theta', 'omega', 'sigma' );
675 foreach my $param ( @params ) {
676 my $labels = $orig_mod -> labels( parameter_type => $param );
677 $param_names{$param} = $labels -> [0] if ( defined $labels );
680 foreach my $name ( @{$modelfit -> {'raw_results_header'}} ) {
681 my $success;
682 foreach my $param ( @params, 'eigen', 'shrinkage_etas', 'shrinkage_wres' ) {
683 if ( $name eq $param ){
684 if( defined $param_names{$name} ) {
685 push( @new_header , @{$param_names{$name}} );
686 } else {
687 for ( my $i = 1; $i <= $max_hash{ $name }; $i++ ) {
688 push ( @new_header, uc(substr($name,0,2)).$i );
691 $success = 1;
692 last;
693 } elsif ( $name eq 'se'.$param ) {
694 my @new_names = ();
695 for ( my $i = 1; $i <= $max_hash{ $name }; $i++ ) {
696 push ( @new_names, uc(substr($param,0,2)).$i );
698 map ( $_ = 'se'.$_, @new_names );
700 push( @new_header, @new_names );
701 $success = 1;
702 last;
705 unless( $success ){
706 push( @new_header, $name );
710 $modelfit -> {'raw_results_header'} = \@new_header;
712 if ( $type eq 'bca' ) {
713 foreach my $row ( @{$modelfit -> {'raw_results'}} ) {
714 unshift( @{$row}, 'bootstrap' );
716 unshift( @{$modelfit -> {'raw_results_header'}}, 'method' );
719 @{$self -> {'raw_results_header'}} = @{$modelfit -> {'raw_results_header'}};
721 # }}} New header
724 return $subroutine;
726 end _modelfit_raw_results_callback
728 # }}} _modelfit_raw_results_callback
730 # {{{ modelfit_analyze
732 start modelfit_analyze
734 my @params = @{$self -> {'parameters'}};
735 my @diagnostic_params = @{$self -> {'diagnostic_parameters'}};
736 my ( @print_order, @calculation_order );
738 if ( $self -> {'type'} eq 'bca' ) {
740 # -------------------------- BCa method ---------------------------------
742 # {{{ BCa
744 @calculation_order = @{$self -> {'bca_calculation_order'}};
745 @print_order = @{$self -> {'bca_print_order'}};
746 my $jk_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
747 $self -> {'threads'} -> [1]:$self -> {'threads'};
748 my $done = ( -e $self -> {'directory'}."/jackknife_done.$model_number" ) ? 1 : 0;
749 if ( not $done ) {
751 # {{{ Create Jackknife
753 ui -> print( category => 'bootstrap',
754 message => "Running a Jackknife for the BCa estimates" );
755 $self -> {'jackknife'} = tool::cdd::jackknife ->
756 new( models => [$self -> models -> [$model_number -1]],
757 case_columns => $self -> models -> [$model_number -1]
758 -> datas -> [0] -> idcolumn,
759 _raw_results_callback => $self ->
760 _jackknife_raw_results_callback( model_number => $model_number ),
761 parent_tool_id => $self -> {'tool_id'},
762 threads => $jk_threads,
763 bca_mode => 1,
764 shrinkage => $self -> {'shrinkage'},
765 nonparametric_marginals => $self -> {'nonparametric_marginals'},
766 nonparametric_etas => $self -> {'nonparametric_etas'},
767 adaptive => $self -> {'adaptive'},
768 rerun => $self -> {'rerun'},
769 verbose => $self -> {'verbose'},
770 cross_validate => 0 );
771 # Create a checkpoint. Log the samples and individuals.
772 open( DONE, ">".$self -> {'directory'}."/jackknife_done.$model_number" ) ;
773 print DONE "Jackknife directory:\n";
774 print DONE $self -> {'jackknife'} -> directory,"\n";
775 my @seed = random_get_seed;
776 print DONE "seed: @seed\n";
777 close( DONE );
779 # }}} Create Jackknife
781 } else {
783 # {{{ Recreate Jackknife
785 open( DONE, $self -> {'directory'}."/jackknife_done.$model_number" );
786 my @rows = <DONE>;
787 close( DONE );
788 my ( $stored_directory ) = $rows[1];
789 chomp( $stored_directory );
790 if ( not -e $stored_directory ) {
791 debug -> die( message => "The Jackknife directory ".$stored_directory.
792 "indicated by ".$self -> {'directory'}.
793 "/jackknife_done.$model_number".
794 " from the old bootstrap run in ".
795 $self -> {'directory'}." does not exist" );
797 my @seed = split(' ',$rows[2]);
798 shift( @seed ); # get rid of 'seed'-word
799 $self -> {'jackknife'} = tool::cdd::jackknife ->
800 new( models => [$self -> models -> [$model_number -1]],
801 case_columns => $self -> models -> [$model_number -1]
802 -> datas -> [0] -> idcolumn,
803 _raw_results_callback => $self ->
804 _jackknife_raw_results_callback( model_number => $model_number ),
805 threads => $jk_threads,
806 parent_tool_id => $self -> {'tool_id'},
807 directory => $stored_directory,
808 bca_mode => 1,
809 shrinkage => $self -> {'shrinkage'},
810 nonparametric_marginals => $self -> {'nonparametric_marginals'},
811 nonparametric_etas => $self -> {'nonparametric_etas'},
812 adaptive => $self -> {'adaptive'},
813 rerun => $self -> {'rerun'},
814 verbose => $self -> {'verbose'},
815 cross_validate => 0 );
816 random_set_seed( @seed );
817 ui -> print( category => 'bootstrap',
818 message => "Restarting BCa Jackknife from ".
819 $stored_directory )
820 unless $self -> {'parent_threads'} > 1;
822 # }}} Recreate Jackknife
826 $self -> {'jackknife'} -> run;
827 $self -> {'jackknife_results'} = $self -> {'jackknife'} -> {'results'};
828 $self -> {'jackknife_prepared_models'} = $self -> {'jackknife'} -> {'prepared_models'};
830 $self -> {'jackknife_raw_results'}[$model_number-1] =
831 $self -> {'jackknife'} -> raw_results;
832 # $self -> {'jackknife'} -> raw_results -> [$model_number-1];
833 # $Data::Dumper::Maxdepth = 0;
834 # print Dumper $self -> {'jackknife_raw_results'};
836 # }}} BCa
838 } else {
839 @calculation_order = @{$self -> {'calculation_order'}};
840 @print_order = @{$self -> {'print_order'}};
841 $self -> {'bootstrap_raw_results'}[$model_number-1] =
842 $self -> {'tools'} -> [0] -> raw_results;
843 # $self -> {'tools'} -> [0] -> raw_results -> [$model_number-1];
845 unless( ref($self -> {'raw_results_header'}[0]) eq 'ARRAY' ) {
846 my $tmp = $self -> {'raw_results_header'};
847 $self -> {'raw_results_header'} = [];
848 $self -> {'raw_results_header'}[$model_number-1] = $tmp;
851 my @param_names = @{$self -> models -> [$model_number -1] -> outputs -> [0] -> labels};
852 my ( @diagnostic_names, @tmp_names );
853 foreach my $param ( @diagnostic_params ) {
854 push( @tmp_names, $param );
855 $tmp_names[$#tmp_names] =~ s/_/\./g;
857 for ( my $i = 0; $i <= $#param_names; $i++ ) {
858 unshift( @{$param_names[$i]}, 'OFV' );
859 push( @{$diagnostic_names[$i]}, @tmp_names );
862 end modelfit_analyze
864 # }}}
866 # {{{ prepare_results
868 start prepare_results
870 # {{{ definitions
872 # The '3' is there to skip model, problem and subproblem numbers
873 my $skip_mps = 3;
874 my ( @calculation_order, @print_order, %diag_idx );
875 if ( $self -> {'type'} eq 'bca' ) {
876 @calculation_order = @{$self -> {'bca_calculation_order'}};
877 @print_order = @{$self -> {'bca_print_order'}};
878 } else {
879 @calculation_order = @{$self -> {'calculation_order'}};
880 @print_order = @{$self -> {'print_order'}};
882 if ( $self -> {'type'} eq 'bca' ) {
883 # $self -> read_raw_results();
884 $self -> bca_read_raw_results();
885 # if ( not defined $self -> {'bootstrap_raw_results'} );
886 } else {
887 # if ( not defined $self -> {'raw_results'} ) {
888 $self -> read_raw_results();
889 $self -> {'bootstrap_raw_results'} = $self -> {'raw_results'};
893 for ( my $i = 0; $i < scalar @{$self -> {'diagnostic_parameters'}}; $i++ ) {
894 $diag_idx{$self -> {'diagnostic_parameters'} -> [$i]} = $i + $skip_mps;
897 # }}} definitions
899 # --------------------- Get data from raw_results -------------------------
901 # Divide the data into diagnostics and estimates. Included in estimates are
902 # the parametric estimates, the standard errors of these, the nonparametric
903 # estimates, the shrinkage in eta and the shrinkage in wres
904 # The diagnostics end up in {'bootstrap_diagnostics'} and
905 # {'jackknife_diagnostics'}. The estimates in {'bootstrap_estimates'} and
906 # {'jackknife_estimates'}.
907 # The number of runs that are selected for calculation of the results is
908 # saved.
910 # {{{ Get the data from the runs
912 foreach my $tool ( 'bootstrap', 'jackknife' ) {
913 if ( defined $self -> {$tool.'_raw_results'} ) {
914 for ( my $i = 0; $i < scalar @{$self->{$tool.'_raw_results'}}; $i++ ) { # All models
916 # {{{ Get the number of columns with estimates
918 my $cols_orig = 0;
919 foreach my $param ( 'theta', 'omega', 'sigma' ) {
920 my $labels =
921 $self -> {'models'} -> [$i] -> labels( parameter_type => $param );
922 # we can't use labels directly since different models may have different
923 # labels (still within the same modelfit)
924 my $numpar = scalar @{$labels -> [0]} if ( defined $labels and
925 defined $labels -> [0] );
926 $cols_orig += ( $numpar*3 ); # est + SE + eigen values
928 # nonparametric omegas and shrinkage
929 my $nomegas = $self -> {'models'} -> [$i] -> nomegas;
930 my $numpar = $nomegas -> [0];
932 # shrinkage omega + wres shrinkage
933 $cols_orig += $numpar + 1;
934 # $cols_orig += ($numpar*($numpar+1)/2 + $numpar + 1);
936 $cols_orig++; # OFV
938 # }}} Get the number of columns with estimates
940 # {{{ Loop, choose and set diagnostics and estimates
942 my %return_section;
943 $return_section{'name'} = 'Warnings';
944 my ( $skip_term, $skip_cov, $skip_warn, $skip_bound );
945 my $included = 0;
946 # print Dumper $self->{$tool.'_raw_results'};
947 for ( my $j = 0; $j < scalar @{$self->{$tool.'_raw_results'}->[$i]}; $j++ ) { # orig model + prepared_models
948 my $columns = scalar @{$self->{$tool.'_raw_results'}->[$i][$j]};
950 # ----------------------- Diagnostics -----------------------------
952 for ( my $m = $skip_mps; $m < scalar @{$self -> {'diagnostic_parameters'}} + $skip_mps; $m++ ) { # value
953 $self->{$tool.'_diagnostics'}->[$i][$j][$m-$skip_mps] =
954 $self->{$tool.'_raw_results'}->[$i][$j][$m];
956 my $use_run = 1;
957 if ( $self -> {'skip_minimization_terminated'} and
958 ( not defined $self->{$tool.'_raw_results'}->
959 [$i][$j][$diag_idx{'minimization_successful'}]
960 or not $self->{$tool.'_raw_results'}->
961 [$i][$j][$diag_idx{'minimization_successful'}] ) ) {
962 $skip_term++;
963 $use_run = 0;
964 } elsif ( $self -> {'skip_covariance_step_terminated'} and not
965 $self->{$tool.'_raw_results'}->
966 [$i][$j][$diag_idx{'covariance_step_successful'}] ) {
967 $skip_cov++;
968 $use_run = 0;
969 } elsif ( $self -> {'skip_with_covstep_warnings'} and
970 $self->{$tool.'_raw_results'}->
971 [$i][$j][$diag_idx{'covariance_step_warnings'}] ) {
972 $skip_warn++;
973 $use_run = 0;
974 } elsif ( $self -> {'skip_estimate_near_boundary'} and
975 $self->{$tool.'_raw_results'}->
976 [$i][$j][$diag_idx{'estimate_near_boundary'}] ) {
977 $skip_bound++;
978 $use_run = 0;
981 # ------------------------ Estimates ------------------------------
983 if( $use_run ) {
984 for ( my $m = scalar @{$self -> {'diagnostic_parameters'}} + $skip_mps; $m < $columns; $m++ ) { # value
985 my $val = $self->{$tool.'_raw_results'}->[$i][$j][$m];
986 $self->{$tool.'_estimates'}->
987 [$i][$included][$m-(scalar @{$self -> {'diagnostic_parameters'}} + $skip_mps)] = $val;
989 $included++;
993 # }}} Loop, choose and set diagnostics and estimates
995 # {{{ push #runs to results
997 if ( defined $skip_term ) {
998 push( @{$return_section{'values'}}, "$skip_term runs with miminization ".
999 "terminated were skipped when calculating the $tool results" );
1001 if ( defined $skip_cov ) {
1002 push( @{$return_section{'values'}}, "$skip_cov runs with aborted ".
1003 "covariance steps were skipped when calculating the $tool results" );
1005 if ( defined $skip_warn ) {
1006 push( @{$return_section{'values'}}, "$skip_warn runs with errors from ".
1007 "the covariance step were skipped when calculating the $tool results" );
1009 if ( defined $skip_bound ) {
1010 push( @{$return_section{'values'}}, "$skip_bound runs with estimates ".
1011 "near a boundary were skipped when calculating the $tool results" );
1013 $return_section{'labels'} = [];
1014 push( @{$self -> {'results'}[$i]{'own'}},\%return_section );
1016 # }}} push #runs to results
1022 # $Data::Dumper::Maxdepth = 5;
1023 # die Dumper $self -> {'bootstrap_diagnostics'};
1025 # }}} Get the data from the runs
1027 # ---------------------- Calculate the results ----------------------------
1029 # {{{ Result calculations
1031 for ( my $i = 0; $i < scalar @{$self -> {'bootstrap_raw_results'}} ; $i++ ) { # All models
1033 my $mps_offset = $self -> {'bca'} ? 4 : 3; # <- this is the offset to
1034 # diagonstic_parameters,
1035 # which is one more for
1036 # the method column added
1037 # with a bca run.
1039 my @param_names = @{$self -> {'raw_results_header'}[$i]}[($mps_offset + scalar @{$self -> {'diagnostic_parameters'}}) .. (scalar @{$self -> {'raw_results_header'}[$i]} - 1)];
1040 my ( @diagnostic_names, @tmp_names );
1041 foreach my $param ( @{$self -> {'diagnostic_parameters'}} ) {
1042 push( @tmp_names, $param );
1043 $tmp_names[$#tmp_names] =~ s/_/\./g;
1046 @diagnostic_names = @tmp_names;
1047 foreach my $result_type ( @calculation_order ) {
1048 my @names = $result_type eq 'diagnostic_means' ?
1049 @diagnostic_names : @param_names;
1050 my $calc = 'calculate_'.$result_type;
1051 $self -> $calc( model_number => ($i+1),
1052 parameter_names => \@names );
1054 foreach my $result_type ( @print_order ) {
1055 my $name = $result_type;
1056 $name =~ s/_/\./g;
1057 my %return_section;
1058 $return_section{'name'} = $name;
1059 $return_section{'values'} = $self -> {$result_type} -> [$i];
1060 $return_section{'labels'} = $self -> {$result_type.'_labels'} -> [$i];
1061 push( @{$self -> {'results'}[$i]{'own'}},\%return_section );
1065 # }}} Result calculations
1068 end prepare_results
1070 # }}} prepare_results
1072 # {{{ print_summary
1074 start print_summary
1076 sub acknowledge {
1077 my $name = shift;
1078 my $outcome = shift;
1079 my $file = shift;
1080 my $l = (7 - length( $outcome ))/2;
1081 my $c_num = '00';
1082 $c_num = '07' if ( $outcome eq 'OK' );
1083 $c_num = '13' if ( $outcome eq 'WARNING' );
1084 $c_num = '05' if ( $outcome eq 'ERROR' );
1085 # my $text = sprintf( "%-66s%2s%7s%-5s\n\n", $name, '[ ', $outcome. ' ' x $l, ' ]' );
1086 my $text = sprintf( "%-66s%2s%7s%-5s", $name, '[ ', $outcome. ' ' x $l, ' ]' );
1087 # cprintf( "%-66s%2s\x03$c_num%7s\x030%-5s", $name, '[ ', $outcome. ' ' x $l, ' ]' );
1088 # my $text = cprintf( "%-66s%2s\x03$c_num%7s\x030%-5s", $name, '[ ', $outcome. ' ' x $l, ' ]' );
1089 print $text, "\n\n";
1090 print $file $text if defined $file;
1093 sub sum {
1094 my $arr = shift;
1095 my $sum = 0;
1096 for ( @{$arr} ) {
1097 $sum += $_;
1099 return $sum;
1102 my $diag_number = scalar @{$self -> {'diagnostic_parameters'}} - 1;
1103 my %diag_idxs;
1104 for ( my $i = 0; $i <= $diag_number; $i++ ) {
1105 $diag_idxs{$self -> {'diagnostic_parameters'} -> [$i]} = $i;
1108 open( my $log, ">test.log" );
1109 for ( my $i = 0; $i < scalar @{$self -> {'bootstrap_raw_results'}} ; $i++ ) { # All models
1110 print "MODEL ",$i+1,"\n" if ( scalar @{$self -> {'bootstrap_raw_results'}} > 1 );
1111 if ( $self -> {'diagnostic_means'} ->
1112 [$i][0][$diag_idxs{'minimization_successful'}] >
1113 $self -> {'minimization_successful_limit'} ) {
1114 acknowledge( 'Successful minimization ratio = '.
1115 $self -> {'diagnostic_means'} ->
1116 [$i][0][$diag_idxs{'minimization_successful'}], 'OK', $log );
1117 } else {
1118 acknowledge( 'Termination problems in '.
1119 sprintf("%4.2f", ($self -> {'diagnostic_means'} ->
1120 [$i][0][$diag_idxs{'minimization_successful'}]*100))
1121 .'% of the bootstrap runs', 'WARNING', $log );
1124 if ( $self -> {'diagnostic_means'} ->
1125 [$i][0][$diag_idxs{'covariance_step_successful'}] >
1126 $self -> {'covariance_step_successful_limit'} ) {
1127 acknowledge( 'Successful covariance step ratio = '.$self -> {'diagnostic_means'} ->
1128 [$i][0][$diag_idxs{'covariance_step_successful'}], 'OK', $log );
1129 } else {
1130 acknowledge( 'Covariance step problems in '.
1131 sprintf("%4.2f", ($self -> {'diagnostic_means'} ->
1132 [$i][0][$diag_idxs{'covariance_step_successful'}]*100))
1133 .'% of the bootstrap runs', 'WARNING', $log );
1136 if ( $self -> {'diagnostic_means'} ->
1137 [$i][0][$diag_idxs{'covariance_step_warnings'}] <
1138 $self -> {'covariance_step_warnings_limit'} ) {
1139 acknowledge( 'Covariance step warnings ratio = '.$self -> {'diagnostic_means'} ->
1140 [$i][0][$diag_idxs{'covariance_step_warnings'}], 'OK', $log );
1141 } else {
1142 acknowledge( 'Covariance step warnings in '.
1143 sprintf("%4.2f", ($self -> {'diagnostic_means'} ->
1144 [$i][0][$diag_idxs{'covariance_step_warnings'}]*100))
1145 .'% of the bootstrap runs', 'WARNING', $log );
1148 if ( $self -> {'diagnostic_means'} ->
1149 [$i][0][$diag_idxs{'estimate_near_boundary'}] <
1150 $self -> {'estimate_near_boundary_limit'} ) {
1151 acknowledge( 'Estimate near boundary ratio = '.$self -> {'diagnostic_means'} ->
1152 [$i][0][$diag_idxs{'estimate_near_boundary'}], 'OK', $log );
1153 } else {
1154 acknowledge( 'Estimate near boundary found in '.
1155 sprintf("%4.2f", ($self -> {'diagnostic_means'} ->
1156 [$i][0][$diag_idxs{'estimate_near_boundary'}]*100))
1157 .'% of the bootstrap runs', 'WARNING', $log );
1159 my $sum = sum( $self -> {'within_se_confidence_intervals'}->
1160 [$i]{$self -> {'se_confidence_intervals_level'}} );
1161 if ( not defined $sum or $sum < 1 ) {
1162 acknowledge( 'No '.(100-$self -> {'se_confidence_intervals_level'}).
1163 '% confidence intervals based on the'.
1164 ' bootstrap standard errors include '.
1165 $self -> {'se_confidence_intervals_check'}, 'OK', $log );
1166 } else {
1167 acknowledge( "$sum ".(100-$self -> {'se_confidence_intervals_level'}).
1168 "% confidence intervals based on the bootstrap".
1169 " SE's include ".
1170 $self -> {'se_confidence_intervals_check'}, 'WARNING', $log );
1171 my $found =
1172 scalar @{$self -> {'within_se_confidence_intervals'}->
1173 [$i]{$self -> {'se_confidence_intervals_level'}}} - 1;
1174 for ( my $l = 0; $l <= $found ; $l++ ) {
1175 if ( $self -> {'within_se_confidence_intervals'}->
1176 [$i]{$self -> {'se_confidence_intervals_level'}}[$l] ) {
1177 printf( "\t%-20s\n",
1178 $self -> {'standard_error_confidence_intervals_labels'} ->
1179 [$i][1][$l] );
1180 print $log
1181 sprintf( "\t%-20s\n",
1182 $self -> {'standard_error_confidence_intervals_labels'} ->
1183 [$i][1][$l] );
1184 print "\n" if ( $l == $found );
1185 print $log "\n" if ( $l == $found );
1190 my $sum = sum( $self -> {'large_bias'}-> [$i][0] );
1191 if ( not defined $sum or $sum < 1 ) {
1192 acknowledge( 'No bias larger than '.
1193 ($self -> {'large_bias_limit'}*100).'% found', 'OK', $log );
1194 } else {
1195 acknowledge( "$sum estimates were found to have a relative bias > ".
1196 $self -> {'large_bias_limit'}, 'WARNING', $log );
1197 my $found =
1198 scalar @{$self -> {'large_bias'}->
1199 [$i][0]} - 1;
1200 for ( my $l = 0; $l <= $found ; $l++ ) {
1201 if ( $self -> {'large_bias'}->
1202 [$i][0][$l] ) {
1203 printf( "\t%-20s%3.2f %\n", $self -> {'bias_labels'} ->
1204 [$i][1][$l], ($self -> {'bias'} -> [$i][0][$l]/
1205 $self->{'bootstrap_estimates'} -> [$i][0][$l])*100 );
1206 # print $log
1207 # sprintf( "\t%-20s\n",
1208 # $self -> {'percentile_confidence_intervals_labels'} ->
1209 # [$i][1][$l] );
1210 print "\n" if ( $l == $found );
1211 print $log "\n" if ( $l == $found );
1216 if ( $self -> {'type'} eq 'bca' ) {
1217 my $sum = sum( $self -> {'within_bca_confidence_intervals'}->
1218 [$i]{$self -> {'bca_confidence_intervals_level'}} );
1219 if ( not defined $sum or $sum < 1 ) {
1220 acknowledge( 'No '.(100-$self -> {'bca_confidence_intervals_level'}).
1221 '% BCa confidence intervals include '.
1222 $self -> {'bca_confidence_intervals_check'}, 'OK', $log );
1223 } else {
1224 acknowledge( "$sum ".(100-$self -> {'bca_confidence_intervals_level'}).
1225 "% BCa confidence intervals include ".
1226 $self -> {'bca_confidence_intervals_check'}, 'WARNING', $log );
1227 my $found =
1228 scalar @{$self -> {'within_bca_confidence_intervals'}->
1229 [$i]{$self -> {'bca_confidence_intervals_level'}}} - 1;
1230 for ( my $l = 0; $l <= $found ; $l++ ) {
1231 if ( $self -> {'within_bca_confidence_intervals'}->
1232 [$i]{$self -> {'bca_confidence_intervals_level'}}[$l] ) {
1233 printf( "\t%-20s\n",
1234 $self -> {'bca_confidence_intervals_labels'} ->
1235 [$i][1][$l] );
1236 print $log
1237 sprintf( "\t%-20s\n",
1238 $self -> {'bca_confidence_intervals_labels'} ->
1239 [$i][1][$l] );
1240 print "\n" if ( $l == $found );
1241 print $log "\n" if ( $l == $found );
1245 } else {
1246 my $sum = sum( $self -> {'within_percentiles'}->
1247 [$i]{$self -> {'percentile_confidence_intervals_level'}} );
1248 if ( not defined $sum or $sum < 1 ) {
1249 acknowledge( 'No '.(100-$self -> {'percentile_confidence_intervals_level'}).
1250 '% confidence intervals based on the'.
1251 ' bootstrap percentiles include '.
1252 $self -> {'percentile_confidence_intervals_check'}, 'OK', $log );
1253 } else {
1254 acknowledge( "$sum ".(100-$self -> {'percentile_confidence_intervals_level'}).
1255 "% confidence intervals based on the percentiles".
1256 " include ".
1257 $self -> {'percentile_confidence_intervals_check'}, 'WARNING', $log );
1258 my $found =
1259 scalar @{$self -> {'within_percentiles'}->
1260 [$i]{$self -> {'percentile_confidence_intervals_level'}}} - 1;
1261 for ( my $l = 0; $l <= $found ; $l++ ) {
1262 if ( $self -> {'within_percentiles'}->
1263 [$i]{$self -> {'percentile_confidence_intervals_level'}}[$l] ) {
1264 printf( "\t%-20s\n",
1265 $self -> {'percentile_confidence_intervals_labels'} ->
1266 [$i][1][$l] );
1267 print $log
1268 sprintf( "\t%-20s\n",
1269 $self -> {'percentile_confidence_intervals_labels'} ->
1270 [$i][1][$l] );
1271 print "\n" if ( $l == $found );
1272 print $log "\n" if ( $l == $found );
1278 close ( $log );
1280 end print_summary
1282 # }}} print_summary
1284 # {{{ bca_read_raw_results
1286 start bca_read_raw_results
1288 $self -> {'raw_results_header'} = [];
1289 for ( my $i = 1; $i <= scalar @{$self->{'models'}}; $i++ ) { # All models
1290 if ( -e $self -> {'directory'}.'raw_results'.$i.'.csv' ) {
1291 open( RRES, $self -> {'directory'}.'raw_results'.$i.'.csv' );
1292 my @file = <RRES>;
1293 close( RRES );
1294 map { chomp; my @tmp = split(',',$_); $_ = \@tmp } @file ;
1296 my $header = shift @file;
1298 # Get rid of 'method' column
1299 my $cols = scalar(@{$header})-1;
1300 @{$self -> {'raw_results_header'}[$i-1]} = @{$header}[1..$cols];
1301 $self -> {'raw_results'} -> [$i-1] = \@file;
1302 for( my $j = 0; $j <= $#file; $j++ ) {
1303 if ( $file[$j][0] eq 'jackknife' ) {
1304 shift( @{$file[$j]} );
1305 # $self -> {'jackknife_raw_results'}[$i-1] = \@file;
1306 push( @{$self -> {'jackknife_raw_results'}[$i-1]}, $file[$j]);
1307 } else {
1308 shift( @{$file[$j]} );
1309 # $self -> {'bootstrap_raw_results'}[$i-1] = \@file;
1310 push( @{$self -> {'bootstrap_raw_results'}[$i-1]}, $file[$j] );
1316 end bca_read_raw_results
1318 # }}} bca_read_raw_results
1320 # {{{ calculate_diagnostic_means
1322 start calculate_diagnostic_means
1324 my ( @sum, @diagsum, %diag_idx );
1325 for ( my $i = 0; $i < scalar @{$self -> {'diagnostic_parameters'}}; $i++ ) {
1326 $diag_idx{$self -> {'diagnostic_parameters'} -> [$i]} = $i;
1329 my $def = 0;
1330 # Prepared model, skip the first (the original)
1331 for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_diagnostics'} ->
1332 [$model_number-1]}; $k++ ) {
1333 # Diagnostics
1334 if( defined $self -> {'bootstrap_diagnostics'} ->
1335 [$model_number-1][$k] ) {
1336 $def++;
1337 for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_diagnostics'} ->
1338 [$model_number-1][$k]}; $l++ ) {
1339 $sum[$l] += $self -> {'bootstrap_diagnostics'} ->
1340 [$model_number-1][$k][$l];
1345 # divide by the number of bootstrap samples (-1 to get rid of the original
1346 # model) The [0] in the index is there to indicate the 'model' level. Mostly
1347 # used for printing
1348 for ( my $l = 0; $l <= $#sum; $l++ ) {
1349 if( $l == $diag_idx{'significant_digits'} ) {
1350 $self -> {'diagnostic_means'} -> [$model_number-1][0][$l] =
1351 $sum[$l] / $def;
1352 } else {
1353 $self -> {'diagnostic_means'} -> [$model_number-1][0][$l] =
1354 $sum[$l] / ( scalar @{$self -> {'bootstrap_diagnostics'} ->
1355 [$model_number-1]} - 1);
1358 $self -> {'diagnostic_means_labels'} -> [$model_number-1] =
1359 [[],\@parameter_names];
1361 end calculate_diagnostic_means
1363 # }}} calculate_diagnostic_means
1365 # {{{ calculate_means
1367 start calculate_means
1369 my ( @sum, @diagsum );
1370 # Prepared model, skip the first (the original)
1371 for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'} ->
1372 [$model_number-1]}; $k++ ) {
1373 # Estimates
1374 for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'} ->
1375 [$model_number-1][$k]}; $l++ ) {
1376 $sum[$l] += $self -> {'bootstrap_estimates'} ->
1377 [$model_number-1][$k][$l];
1380 # divide by the number of bootstrap samples (-1 to get rid of the original
1381 # model) The [0] in the index is there to indicate the 'model' level. Mostly
1382 # used for printing
1383 my $samples = scalar @{$self -> {'bootstrap_estimates'} ->
1384 [$model_number-1]} - 1;
1385 for ( my $l = 0; $l <= $#sum; $l++ ) {
1386 my $mean = $sum[$l] / $samples;
1387 $self -> {'means'} -> [$model_number-1][0][$l] = $mean;
1388 my $bias = $mean - $self ->
1389 {'bootstrap_estimates'} -> [$model_number-1][0][$l];
1390 $self -> {'bias'} -> [$model_number-1][0][$l] = $bias;
1391 if ( $self->{'bootstrap_estimates'} -> [$model_number-1][0][$l] != 0 and
1392 $bias/$self->{'bootstrap_estimates'} -> [$model_number-1][0][$l]
1393 > $self -> {'large_bias_limit'} ) {
1394 $self -> {'large_bias'} -> [$model_number-1][0][$l] = 1;
1395 } else {
1396 $self -> {'large_bias'} -> [$model_number-1][0][$l] = 0;
1399 $self -> {'means_labels'} -> [$model_number-1] =
1400 [[],\@parameter_names];
1402 $self -> {'bias_labels'} -> [$model_number-1] =
1403 [[],\@parameter_names];
1405 end calculate_means
1407 # }}} calculate_means
1409 # {{{ calculate_jackknife_means
1411 start calculate_jackknife_means
1413 my @sum;
1414 # Prepared model, skip the first (the original)
1415 if( defined $self -> {'jackknife_estimates'} ){
1416 for ( my $k = 1; $k < scalar @{$self -> {'jackknife_estimates'}->[$model_number-1]}; $k++ ) {
1417 # Estimate
1418 for ( my $l = 0; $l <
1419 scalar @{$self -> {'jackknife_estimates'}->[$model_number-1][$k]}; $l++ ) {
1420 $sum[$l] += $self -> {'jackknife_estimates'}->[$model_number-1][$k][$l];
1423 # divide by the number of bootstrap samples (-1 to get rid of the original model)
1424 # The [0] in the index is there to indicate the 'model' level. Mostly used for printing
1425 for ( my $l = 0; $l <
1426 scalar @{$self -> {'jackknife_estimates'}->[$model_number-1][0]}; $l++ ) {
1427 if( ( scalar @{$self -> {'jackknife_estimates'}->[$model_number-1]} - 1) != 0 ) {
1428 $self -> {'jackknife_means'} -> [$model_number-1][0][$l] =
1429 $sum[$l] / ( scalar @{$self -> {'jackknife_estimates'}->[$model_number-1]} - 1);
1432 $self -> {'jackknife_means_labels'} -> [$model_number-1] = [[],\@parameter_names];
1435 end calculate_jackknife_means
1437 # }}} calculate_jackknife_means
1439 # {{{ calculate_medians
1440 start calculate_medians
1442 my @medians;
1443 # Loop the parameters
1444 for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'}->
1445 [$model_number-1][0]}; $l++ ) {
1446 my @parameter_array;
1447 # From 1 to get rid of original model
1448 for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'}->
1449 [$model_number-1]}; $k++ ) {
1450 $parameter_array[$k-1] =
1451 $self -> {'bootstrap_estimates'}->[$model_number-1][$k][$l];
1453 my @sorted = sort {$a <=> $b} @parameter_array;
1454 # median postition is half the ( array length - 1 ).
1455 my $median_position = ( $#sorted ) / 2;
1456 my ($int_med,$frac_med) = split(/\./, $median_position );
1457 $frac_med = eval("0.".$frac_med);
1458 my $median_low = $sorted[ $int_med ];
1459 my $median_high = ( $sorted[ $int_med + 1 ] - $sorted[ $int_med ] ) * $frac_med;
1460 $medians[$l] = $median_low + $median_high;
1462 # The [0] in the index is there to indicate the 'model' level. Mostly used for printing
1463 $self -> {'medians'} -> [$model_number-1][0] = \@medians;
1464 $self -> {'medians_labels'} -> [$model_number-1] = [[],\@parameter_names];
1466 end calculate_medians
1467 # }}} calculate_medians
1469 # {{{ calculate_standard_error_confidence_intervals
1470 start calculate_standard_error_confidence_intervals
1472 # Sort the limits from the inside out
1473 my @limits = sort { $b <=> $a } keys %{$self -> {'confidence_limits'}};
1474 foreach my $limit ( @limits ) {
1475 my ( @lower_limits, @upper_limits, @within_ci );
1476 # Loop the estimates of the first (original) model
1477 for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'}->
1478 [$model_number-1][0]}; $l++ ) {
1479 my $lower_limit =
1480 $self -> {'bootstrap_estimates'}->[$model_number-1][0][$l] -
1481 $self -> {'standard_errors'}->[$model_number-1][0][$l] *
1482 $self -> {'confidence_limits'} ->{$limit};
1483 my $upper_limit =
1484 $self -> {'bootstrap_estimates'}->[$model_number-1][0][$l] +
1485 $self -> {'standard_errors'}->[$model_number-1][0][$l] *
1486 $self -> {'confidence_limits'} ->{$limit};
1487 push( @lower_limits, $lower_limit );
1488 push( @upper_limits, $upper_limit );
1489 if ( $self -> {'se_confidence_intervals_check'} < $upper_limit and
1490 $self -> {'se_confidence_intervals_check'} > $lower_limit ) {
1491 push( @within_ci , 1 );
1492 } else {
1493 push( @within_ci , 0 );
1496 unshift( @{$self -> {'standard_error_confidence_intervals'} ->
1497 [$model_number-1]}, \@lower_limits );
1498 push( @{$self -> {'standard_error_confidence_intervals'} ->
1499 [$model_number-1]}, \@upper_limits );
1500 $self -> {'within_se_confidence_intervals'} ->
1501 [$model_number-1]{$limit} = \@within_ci;
1502 unshift( @{$self -> {'standard_error_confidence_intervals_labels'} ->
1503 [$model_number-1][0]}, ($limit/2).'%' );
1504 push( @{$self -> {'standard_error_confidence_intervals_labels'} ->
1505 [$model_number-1][0]}, (100-($limit/2)).'%' );
1506 push( @{$self -> {'within_se_confidence_intervals_labels'} ->
1507 [$model_number-1][0]}, $limit.'%' );
1509 $self -> {'standard_error_confidence_intervals_labels'} -> [$model_number-1][1] =
1510 \@parameter_names;
1511 $self -> {'within_se_confidence_intervals_labels'} -> [$model_number-1][1] =
1512 \@parameter_names;
1514 end calculate_standard_error_confidence_intervals
1515 # }}} calculate_standard_error_confidence_intervals
1517 # {{{ calculate_standard_errors
1519 start calculate_standard_errors
1521 my @se;
1522 # Prepared model, skip the first (the original)
1523 for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'}->[$model_number-1]}; $k++ ) {
1524 # Estimate
1525 for ( my $l = 0; $l <
1526 scalar @{$self -> {'bootstrap_estimates'}->[$model_number-1][$k]}; $l++ ) {
1527 $se[$l] += ( $self -> {'bootstrap_estimates'}->[$model_number-1][$k][$l] -
1528 $self -> {'means'}->[$model_number-1][0][$l] )**2;
1531 # divide by the number of bootstrap samples -1 (-2 to get rid of the original model)
1532 # The [0] in the index is there to indicate the 'model' level.
1533 for ( my $l = 0; $l <
1534 scalar @{$self -> {'bootstrap_estimates'}->[$model_number-1][0]}; $l++ ) {
1535 my $div = ( scalar @{$self -> {'bootstrap_estimates'}->[$model_number-1]} - 2 );
1536 if( defined $div and not $div == 0 ) {
1537 $self -> {'standard_errors'} -> [$model_number-1][0][$l] =
1538 ($se[$l] / $div )**0.5;
1539 } else {
1540 $self -> {'standard_errors'} -> [$model_number-1][0][$l] = undef;
1543 $self -> {'standard_errors_labels'} -> [$model_number-1] = [[],\@parameter_names];
1545 end calculate_standard_errors
1547 # }}} calculate_standard_errors
1549 # {{{ calculate_bca_confidence_intervals
1551 start calculate_bca_confidence_intervals
1553 sub c_get_z0 {
1554 my $arr_ref = shift;
1555 my $orig_value = shift;
1556 my $num_less_than_orig = 0;
1557 my $nvalues = 0;
1558 my $z0;
1559 foreach my $value ( @{$arr_ref} ) {
1560 if ( defined $value and $value ne '' ) {
1561 $num_less_than_orig++ if ( $value < $orig_value );
1562 $nvalues ++;
1566 unless ( $nvalues == 0 ) {
1567 if ( ($num_less_than_orig / $nvalues ) == 0 ) {
1568 $z0 = -100;
1569 } elsif ( ($num_less_than_orig / $nvalues ) == 1 ) {
1570 $z0 = 100;
1571 } else {
1572 $z0 = udistr( 1 - ($num_less_than_orig / $nvalues ) );
1575 # return ( $z0, $nvalues );
1576 return $z0;
1579 sub c_get_acc {
1580 my $arr_ref = shift;
1581 my $jk_mean = shift;
1582 my $acc_upper = 0;
1583 my $acc_lower = 0;
1584 my $nvalues = 0;
1585 my $acc;
1586 foreach my $value ( @{$arr_ref} ){
1587 if ( defined $value and $value ne '' ) {
1588 $acc_upper = $acc_upper + ($jk_mean-$value)**3;
1589 $acc_lower = $acc_lower + ($jk_mean-$value)**2;
1590 $nvalues ++;
1593 $acc_lower = 6*($acc_lower**(3/2));
1594 unless ( $acc_lower == 0 ) {
1595 $acc = $acc_upper / $acc_lower;
1596 } else {
1597 $acc = $acc_upper / 0.001;
1599 # return ( $acc, $nvalues );
1600 return $acc;
1603 sub c_get_alphas {
1604 my $old_alphas = shift;
1605 my $acc = shift;
1606 my $z0 = shift;
1607 my $denom;
1608 my @new_alphas = ();
1609 foreach my $position ( @{$old_alphas} ) {
1610 if ( $position == 0 ){
1611 $denom = -100;
1612 } elsif ( $position == 100 ) {
1613 $denom = 100;
1614 } else {
1615 $denom = $z0 + udistr( 1 - $position/100 );
1617 my $nom = 1 - $acc * $denom;
1618 my $lim = 100*uprob( - ( $z0 + $denom / $nom ) );
1619 push( @new_alphas, $lim );
1621 return \@new_alphas;
1624 my @limits = sort { $a <=> $b } keys %{$self -> {'confidence_limits'}};
1625 # Add the upper limits
1626 my $limnum = $#limits;
1627 for ( my $i = $limnum; $i >= 0; $i-- ) {
1628 $limits[$i] = $limits[$i]/2;
1629 push( @limits, 100-$limits[$i] );
1631 my ( @bootstrap_array, @jackknife_array, @new_alphas, @z0, @acc );
1632 # Loop the estimates of the first (original) model
1633 for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'}->
1634 [$model_number-1][0]}; $l++ ) {
1635 my ( @unsorted_array1, @unsorted_array2 );
1636 # Loop the bootstrap samples from 1 to get rid of original model
1637 for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'}->
1638 [$model_number-1]}; $k++ ) {
1639 $unsorted_array1[$k-1] =
1640 $self -> {'bootstrap_estimates'}->[$model_number-1][$k][$l];
1642 @{$bootstrap_array[$l]} = sort {$a <=> $b} @unsorted_array1;
1644 # Loop the jackknife samples from 1 to get rid of original model
1645 for ( my $k = 1; $k < scalar @{$self -> {'jackknife_estimates'}->
1646 [$model_number-1]}; $k++ ) {
1647 $unsorted_array2[$k-1] =
1648 $self -> {'jackknife_estimates'}->[$model_number-1][$k][$l];
1650 @{$jackknife_array[$l]} = sort {$a <=> $b} @unsorted_array2;
1651 $z0[$l] = c_get_z0 ( $bootstrap_array[$l],
1652 $self -> {'bootstrap_estimates'} ->
1653 [$model_number-1][0][$l] );
1654 $acc[$l] = c_get_acc ( $jackknife_array[$l],
1655 $self -> {'jackknife_means'} ->
1656 [$model_number-1][0][$l] );
1657 $new_alphas[$l] = c_get_alphas ( \@limits, $acc[$l], $z0[$l] );
1659 # Loop limits
1660 for ( my $lim_idx = 0; $lim_idx <= $#limits; $lim_idx++ ) {
1661 my @percentiles;
1662 # Loop parameters
1663 for ( my $l = 0; $l <= $#bootstrap_array; $l++ ) {
1664 my $limit = $new_alphas[$l][$lim_idx]/100;
1665 my $position = ( scalar @{$bootstrap_array[$l]} + 1 ) * $limit;
1666 my $percentile;
1667 if ( $position < 1 ) {
1668 $percentile = undef;
1669 } elsif ( $position > scalar @{$bootstrap_array[$l]} ) {
1670 $percentile = undef;
1671 } else {
1672 my ($int_med,$frac_med) = split(/\./, $position );
1673 $frac_med = eval("0.".$frac_med);
1674 my $percentile_low = $bootstrap_array[$l][ $int_med - 1];
1675 my $percentile_high = ( $bootstrap_array[$l][ $int_med ] -
1676 $bootstrap_array[$l][ $int_med - 1] ) * $frac_med;
1677 $percentile = $percentile_low + $percentile_high;
1679 push( @percentiles, $percentile );
1681 push( @{$self -> {'bca_confidence_intervals'} -> [$model_number-1]},
1682 \@percentiles );
1683 push( @{$self -> {'bca_confidence_intervals_labels'}->[$model_number-1][0]},
1684 $limits[$lim_idx].'%');
1686 # Check the intervals
1687 for ( my $lim_idx = 0; $lim_idx <= $limnum; $lim_idx++ ) {
1688 my @within_ci;
1689 for ( my $l = 0; $l <= $#bootstrap_array; $l++ ) {
1690 my $lower_limit = $self -> {'bca_confidence_intervals'} ->
1691 [$model_number-1][$lim_idx][$l];
1692 my $upper_limit = $self -> {'bca_confidence_intervals'} ->
1693 [$model_number-1][($limnum*2+1)-$lim_idx][$l];
1694 if ( $self -> {'bca_confidence_intervals_check'} < $upper_limit and
1695 $self -> {'bca_confidence_intervals_check'} > $lower_limit ) {
1696 push( @within_ci , 1 );
1697 } else {
1698 push( @within_ci , 0 );
1701 $self -> {'within_bca_confidence_intervals'} ->
1702 [$model_number-1]{$limits[$lim_idx]*2} = \@within_ci;
1704 $self -> {'bca_confidence_intervals_labels'} -> [$model_number-1][1] =
1705 \@parameter_names;
1707 end calculate_bca_confidence_intervals
1709 # }}} calculate_bca_confidence_intervals
1711 # {{{ calculate_percentile_confidence_intervals
1713 start calculate_percentile_confidence_intervals
1715 # Sort the limits from the inside out
1716 my @limits = sort { $b <=> $a } keys %{$self -> {'confidence_limits'}};
1717 foreach my $limit ( @limits ) {
1718 my ( @lower_limits, @upper_limits, @within_ci );
1719 # Loop the estimates of the first (original) model
1720 for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'}->
1721 [$model_number-1][0]}; $l++ ) {
1722 my @parameter_array;
1723 # Loop the bootstrap samples from 1 to get rid of original model
1724 for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'}->
1725 [$model_number-1]}; $k++ ) {
1726 my $val = $self -> {'bootstrap_estimates'}->[$model_number-1][$k][$l];
1727 # get rid of undefined values (these were probably deleted
1728 # when the bootstrap_estimates was created
1729 push( @parameter_array, $val ) if( defined $val );
1731 my @sorted = sort {$a <=> $b} @parameter_array;
1732 for my $side ( 'lower', 'upper' ) {
1733 my $use_limit = $side eq 'lower' ? $limit/200 : 1-($limit/200);
1734 # percentile postition is:
1735 my $percentile_position = ( $#sorted + 2 ) * $use_limit;
1736 my $percentile;
1737 if ( $percentile_position < 1 ) {
1738 $percentile = undef;
1739 } elsif ( $percentile_position > $#sorted +1) {
1740 $percentile = undef;
1741 } else {
1742 my ($int_med,$frac_med) = split(/\./, $percentile_position );
1743 $frac_med = eval("0.".$frac_med);
1744 my $percentile_low = $sorted[ $int_med - 1];
1745 my $percentile_high = ( $sorted[ $int_med ] - $sorted[ $int_med - 1] ) * $frac_med;
1746 $percentile = $percentile_low + $percentile_high;
1748 push( @lower_limits, $percentile ) if ( $side eq 'lower' );
1749 push( @upper_limits, $percentile ) if ( $side eq 'upper' );
1751 if ( $self -> {'percentile_confidence_intervals_check'} < $upper_limits[$#upper_limits] and
1752 $self -> {'percentile_confidence_intervals_check'} > $lower_limits[$#lower_limits] ) {
1753 push( @within_ci , 1 );
1754 } else {
1755 push( @within_ci , 0 );
1758 unshift( @{$self -> {'percentile_confidence_intervals'} ->
1759 [$model_number-1]}, \@lower_limits );
1760 push( @{$self -> {'percentile_confidence_intervals'} ->
1761 [$model_number-1]}, \@upper_limits );
1762 unshift( @{$self -> {'percentile_confidence_intervals_labels'}->
1763 [$model_number-1][0]}, ($limit/2).'%' );
1764 push( @{$self -> {'percentile_confidence_intervals_labels'}->
1765 [$model_number-1][0]},(100-($limit/2)).'%');
1766 $self -> {'within_percentiles'}->[$model_number-1]{$limit}=\@within_ci;
1768 $self -> {'percentile_confidence_intervals_labels'} ->
1769 [$model_number-1][1] = \@parameter_names;
1771 end calculate_percentile_confidence_intervals
1773 # }}} calculate_percentile_confidence_intervals
1775 # {{{ modelfit_post_fork_analyze
1777 start modelfit_post_fork_analyze
1778 end modelfit_post_fork_analyze
1780 # }}}
1782 # {{{ resample
1784 start resample
1786 my $dataObj = $model -> datas -> [0];
1787 for( my $i = 1; $i <= $self -> {'samples'}; $i++ ) {
1788 my ($bs_dir, $bs_name) = OSspecific::absolute_path( $self -> {'directory'}, "bs$i.dta" );
1789 my $new_name = $bs_dir . $bs_name;
1790 my $boot_sample = $dataObj -> resample( 'subjects' => $self -> {'subjects'},
1791 'new_name' => $new_name,
1792 'target' => $target );
1793 my $newmodel = $model -> copy( filename => "bs$i.mod",
1794 target => $target,
1795 ignore_missing_files => 1 );
1796 $newmodel -> datafile( new_name => "bs$i.dta" );
1797 $newmodel -> datas -> [0] = $boot_sample ;
1798 $newmodel -> write;
1799 push( @resample_models, $newmodel );
1802 end resample
1804 # }}} resample
1806 # {{{ _sampleTools
1807 start _sampleTools
1809 foreach my $tool ( @{$self -> {'tools'}} ) {
1810 my @models = @{$tool -> models};
1811 foreach my $model (@models){
1812 my $dataObj = $model -> datas -> [0];
1813 for( my $i = 1; $i <= $samples; $i++ ) {
1814 my $boot_sample = $dataObj -> resample( 'subjects' => $self -> {'subjects'},
1815 'new_name' => "bs$i.dta",
1816 'target' => $target );
1817 my $newmodel;
1818 $newmodel = $model -> copy( filename => "bs$i.mod" );
1819 $newmodel -> datafile( new_name => "bs$i.dta" );
1820 $newmodel -> datas -> [0] = $boot_sample ;
1821 $newmodel -> write;
1822 if( defined( $tool -> models ) ){
1823 push( @{$tool -> models}, $newmodel );
1824 } else {
1825 $tool -> models( [ $newmodel ] );
1831 end _sampleTools
1832 # }}} _sampleTools
1834 # {{{ print_results
1836 start print_results
1838 # Run the print_results specific for the subtool
1839 my $sub_print_results = $self -> {'subtools'} -> [0];
1840 if ( defined $sub_print_results ) {
1841 sub get_dim {
1842 my $arr = shift;
1843 my $dim = shift;
1844 my $size_ref = shift;
1845 $dim++;
1846 if ( defined $arr and ref($arr) eq 'ARRAY' ) {
1847 push( @{$size_ref}, scalar @{$arr} );
1848 ( $dim, $size_ref ) = get_dim( $arr->[0], $dim, $size_ref );
1850 return ( $dim, $size_ref );
1852 sub format_value {
1853 my $val = shift;
1854 if ( not defined $val or $val eq '' ) {
1855 return sprintf("%10s",$PsN::output_style).',';
1856 } else {
1857 $_ = $val;
1858 my $nodot = /.*\..*/ ? 0 : 1;
1859 $_ =~ s/\.//g;
1860 if ( /.*\D+.*/ or $nodot) {
1861 return sprintf("%10s",$val).',';
1862 } else {
1863 return sprintf("%10.5f",$val).',';
1867 debug -> die( message => "No results_file defined" )
1868 unless ( defined $self -> {'results_file'} );
1869 open ( RES, ">".$self -> {'directory'}.'/'.$self -> {'results_file'} );
1870 if ( defined $self -> {'results'} ) {
1871 my @all_results = @{$self -> {'results'}};
1872 for ( my $i = 0; $i <= $#all_results; $i++ ) {
1873 if ( defined $all_results[$i]{'own'} ) {
1874 my @my_results = @{$all_results[$i]{'own'}};
1875 for ( my $j = 0; $j <= $#my_results; $j++ ) {
1876 # These size estimates include the problem and sub_problem dimensions:
1877 my ( $ldim, $lsize_ref ) = get_dim( $my_results[$j]{'labels'}, -1, [] );
1878 my ( $vdim, $vsize_ref ) = get_dim( $my_results[$j]{'values'}, -1, [] );
1879 print RES $my_results[$j]{'name'},"\n" if ( $vdim > 1 );
1880 if ( defined $my_results[$j]{'values'} and
1881 scalar @{$my_results[$j]{'values'}} >= 0 ) {
1882 my @values = @{$my_results[$j]{'values'}};
1883 my @labels;
1884 if ( defined $my_results[$j]{'labels'} and
1885 scalar @{$my_results[$j]{'labels'}} >= 0 ) {
1886 @labels = @{$my_results[$j]{'labels'}};
1889 # Print Header Labels
1890 if ( $ldim == 0 ) {
1891 my $label = \@labels;
1892 print RES ','.format_value($label),"\n";
1893 } elsif ( $ldim == 2 and defined $labels[1] ) {
1894 print RES ',';
1895 for ( my $n = 0; $n < scalar @{$labels[1]}; $n++ ) {
1896 my $label = $labels[1][$n];
1897 print RES format_value($label);
1899 print RES "\n" if ( scalar @{$labels[1]} );
1901 # Print the values:
1902 if ( $vdim == 0 ) {
1903 print RES ','.format_value(\@values),"\n";
1904 } elsif ( $vdim == 1 ) {
1905 for ( my $m = 0; $m < scalar @{\@values}; $m++ ) {
1906 my $label = $labels[$m];
1907 print RES ','.format_value($label);
1908 my $val = $values[$m];
1909 print RES ','.format_value($val),"\n";
1911 } elsif ( $vdim == 2 ) {
1912 for ( my $m = 0; $m < scalar @{\@values}; $m++ ) {
1913 my $label;
1914 if ( $ldim == 1 ) {
1915 $label = $labels[$m];
1916 } elsif ( $ldim == 2 ) {
1917 $label = $labels[0][$m];
1919 print RES format_value($label);
1920 if ( defined $values[$m] ) {
1921 for ( my $n = 0; $n < scalar @{$values[$m]}; $n++ ) {
1922 print RES format_value($values[$m][$n]);
1925 print RES "\n";
1933 close( RES );
1934 } else {
1935 debug -> warn( level => 2,
1936 message => "No subtools defined".
1937 ", using default printing routine" );
1940 end print_results
1942 # }}}
1944 # {{{ create_matlab_scripts
1946 start create_matlab_scripts
1948 if( defined $PsN::config -> {'_'} -> {'matlab_dir'} ){
1949 unless( -e $PsN::config -> {'_'} -> {'matlab_dir'} . '/histograms.m' and
1950 -e $PsN::config -> {'_'} -> {'matlab_dir'} . '/bca.m' ){
1951 'debug' -> die( message => 'Bootstrap matlab template scripts are not installed, no matlab scripts will be generated.' );
1952 return;
1955 open( PROF, $PsN::config -> {'_'} -> {'matlab_dir'} . '/histograms.m' );
1956 my @file = <PROF>;
1957 close( PROF );
1958 my $found_code;
1959 my $code_area_start=0;
1960 my $code_area_end=0;
1963 for(my $i = 0;$i < scalar(@file); $i++) {
1964 if( $file[$i] =~ /% ---------- Autogenerated code below ----------/ ){
1965 $found_code = 1;
1966 $code_area_start = $i;
1968 if( $file[$i] =~ /% ---------- End autogenerated code ----------/ ){
1969 unless( $found_code ){
1970 'debug' -> die ( message => 'Bootstrap matlab template script is malformated, no matlab scripts will be generated' );
1971 return;
1973 $code_area_end = $i;
1977 my @auto_code;
1978 if( $self -> {'type'} eq 'bca' ){
1979 push( @auto_code, "use_bca = 1; % Was a BCa-type of\n" );
1980 } else {
1981 push( @auto_code, "use_bca = 0; % Was a BCa-type of\n" );
1984 push( @auto_code, " % bootstrap run?\n" );
1985 if( ref $self -> {'samples'} eq 'ARRAY' ) {
1986 push( @auto_code, "bs_samples = ".$self -> {'samples'}->[0][0]."; % Number of bootstrap samples\n" );
1987 } else {
1988 push( @auto_code, "bs_samples = ".$self -> {'samples'}."; % Number of bootstrap samples\n" );
1990 if( $self -> {'type'} eq 'bca' ){
1991 push( @auto_code, "jk_samples = 36; % Number of (BCa) jackknife samples\n\n" );
1994 push( @auto_code, "str_format = '%30s';\n\n" );
1996 push( @auto_code, "col_names = [ sprintf(str_format,'Significant Digits');\n" );
1997 push( @auto_code, " sprintf(str_format,'OFV');\n" );
1999 my $nps = $self -> {'models'} -> [0] -> nomegas -> [0];
2001 my %param_names;
2002 my( @par_names, @se_names, @np_names, @sh_names );
2003 foreach my $param ( 'theta','omega','sigma' ) {
2004 my $labels = $self -> {'models'} -> [0] -> labels( parameter_type => $param );
2005 if ( defined $labels ){
2006 foreach my $label ( @{$labels -> [0]} ){
2007 push( @par_names, " sprintf(str_format,'",$label,"');\n" );
2008 push( @se_names, " sprintf(str_format,'",'se-'.$label,"');\n" );
2013 for( my $i = 1; $i <= ($nps*($nps+1)/2); $i++ ) {
2014 push( @np_names, " sprintf(str_format,'",'np-om'.$i,"');\n" );
2017 for( my $i = 1; $i <= $nps; $i++ ) {
2018 push( @sh_names, " sprintf(str_format,'",'shrinkage-eta'.$i,"');\n" );
2021 push( @sh_names, " sprintf(str_format,'",'shrinkage-iwres',"');\n" );
2023 push( @auto_code,(@par_names, @se_names, @np_names, @sh_names));
2024 push( @auto_code, " ];\n\n" );
2026 my @np_columns = (0) x ($nps*($nps+1)/2);
2027 my @sh_columns = (0) x ($nps+1);
2029 if( $self -> {'type'} eq 'bca' ){
2030 push( @auto_code, "fixed_columns = [ 0, 0, " );
2031 } else {
2032 push( @auto_code, "fixed_columns = [ 0, 0, " );
2034 my @fixed_columns;
2035 my @same_columns;
2036 foreach my $param ( 'theta','omega','sigma' ) {
2037 my $fixed = $self -> {'models'} -> [0] -> fixed( parameter_type => $param );
2039 if ( defined $fixed ){
2040 push( @fixed_columns, @{$fixed -> [0]} );
2041 if( $param eq 'theta' ) {
2042 push( @same_columns, (0) x scalar( @{$fixed -> [0]} ) );
2046 push( @auto_code , join( ', ' , @fixed_columns).', '.
2047 join( ', ' , @fixed_columns).', '.
2048 join( ', ' , @np_columns).', '.
2049 join( ', ' , @sh_columns)."];\n\n" );
2051 if( $self -> {'type'} eq 'bca' ){
2052 push( @auto_code, "same_columns = [ 0, 0, " );
2053 } else {
2054 push( @auto_code, "same_columns = [ 0, 0, " );
2056 foreach my $param ( 'omegas','sigmas' ) {
2057 my $parameters = $self -> {'models'} -> [0] -> problems -> [0] -> $param;
2058 foreach my $parameter ( @{$parameters} ){
2059 if( $parameter -> same() ){
2060 push( @same_columns, (1) x $parameter -> size() );
2061 } else {
2062 push( @same_columns, (0) x scalar @{$parameter -> options} );
2066 push( @auto_code , join( ', ' , @same_columns ).', '.
2067 join( ', ' , @same_columns).', '.
2068 join( ', ' , @np_columns).', '.
2069 join( ', ' , @sh_columns)."];\n\n" );
2071 push( @auto_code , 'npomegas = '.($nps*($nps+1)/2).";\n\n" );
2074 push( @auto_code, "minimization_successful_col = 4 + use_bca; % Column number for the\n" );
2075 push( @auto_code, " % minimization sucessful flag\n" );
2076 push( @auto_code, "covariance_step_successful_col = 5 + use_bca; % As above for cov-step warnings\n" );
2077 push( @auto_code, "covariance_step_warnings_col = 6 + use_bca; % etc\n" );
2078 push( @auto_code, "estimate_near_boundary_col = 7 + use_bca; % etc\n" );
2080 push( @auto_code, "not_data_cols = 11 + use_bca; % Number of columns in the\n" );
2081 push( @auto_code, " % beginning that are not\n" );
2082 push( @auto_code, " % parameter estimates.\n" );
2084 push( @auto_code, "filename = 'raw_results_matlab.csv';\n" );
2086 splice( @file, $code_area_start, ($code_area_end - $code_area_start), @auto_code );
2087 open( OUTFILE, ">", $self -> {'directory'} . "/histograms.m" );
2088 print OUTFILE "addpath " . $PsN::config -> {'_'} -> {'matlab_dir'} . ";\n";
2089 print OUTFILE @file ;
2090 close OUTFILE;
2092 open( OUTFILE, ">", $self -> {'directory'} . "/raw_results_matlab.csv" );
2093 for( my $i = 0; $i < scalar ( @{$self -> {'raw_results'} -> [0]} ); $i ++ ){
2094 $self -> {'raw_results'} -> [0] -> [$i][0] =
2095 $self -> {'raw_results'} -> [0] -> [$i][0] eq 'bootstrap' ?
2096 1 : $self -> {'raw_results'} -> [0] -> [$i][0];
2097 $self -> {'raw_results'} -> [0] -> [$i][0] =
2098 $self -> {'raw_results'} -> [0] -> [$i][0] eq 'jackknife' ?
2099 2 : $self -> {'raw_results'} -> [0] -> [$i][0];
2100 map( $_ = $_ eq 'NA' ? 'NaN' : $_, @{$self -> {'raw_results'} -> [0] -> [$i]} );
2101 print OUTFILE join( ',', @{$self -> {'raw_results'} -> [0] -> [$i]} ), "\n";
2103 close OUTFILE;
2105 } else {
2106 'debug' -> die( message => 'matlab_dir not configured, no matlab scripts will be generated.');
2107 return;
2110 end create_matlab_scripts
2112 # }}}
2114 # {{{ create_R_scripts
2115 start create_R_scripts
2117 unless( -e $PsN::lib_dir . '/R-scripts/bootstrap.R' ){
2118 'debug' -> die( message => 'Bootstrap R-script are not installed, no R-script will be generated.' );
2119 return;
2121 cp ( $PsN::lib_dir . '/R-scripts/bootstrap.R', $self -> {'directory'} );
2123 end create_R_scripts
2124 # }}}