Merged raw_results bug fixes from serial_patches branch
[PsN.git] / lib / tool / bootstrap_subs.pm
blob4b09a6ac2eda0750c1a0125cc2b78c542e1a3615
1 # {{{ include
3 start include statements
4 use strict;
5 use File::Copy 'cp';
6 use data;
7 use OSspecific;
8 use tool::llp;
9 use tool::cdd::jackknife;
10 use ext::Statistics::Distributions 'udistr', 'uprob';
11 use Math::Random;
12 use Data::Dumper;
13 end include
15 # }}} include statements
17 # {{{ new
19 start new
21 foreach my $attribute ( 'logfile', 'raw_results_file','raw_nonp_file' ) {
22 if ( not( ref($this -> {$attribute}) eq 'ARRAY' or
23 ref($this -> {$attribute}) eq 'HASH' ) ) {
24 my $tmp = $this -> {$attribute};
25 $this -> {$attribute} = [];
26 for ( my $i = 1; $i <= scalar @{$this -> {'models'}}; $i++ ) {
27 my $name = $tmp;
28 if ( $name =~ /\./ ) {
29 $name =~ s/\./$i\./;
30 } else {
31 $name = $name.$i;
33 my $ldir;
34 ( $ldir, $name ) =
35 OSspecific::absolute_path( $this -> {'directory'}, $name );
36 push ( @{$this -> {$attribute}}, $ldir.$name ) ;
41 end new
43 # }}} new
45 # {{{ general_pre_fork_setup
47 start general_pre_fork_setup
49 # These attributes can be given as a
50 # 1. A scalar : used for all models and problems (subjects can be a scalar ref to a hash)
51 # 2. A 1-dim. array : specified per problem but same for all models
52 # 3. A 2-dim. array : specified per problem and model
54 foreach my $attribute_string( 'stratify_on', 'samples', 'subjects' ){
55 my $attribute = $self -> {$attribute_string};
56 if ( defined $attribute ) {
57 # If stratification is given we check at what level it is
58 # given.
59 unless ( ref( \$attribute ) eq 'SCALAR' or
60 ref( \$attribute ) eq 'REF' or
61 ( ref( $attribute ) eq 'ARRAY' and scalar @{$attribute} > 0 ) ) {
62 # Here we know its neither scalar or a correct array. But we
63 # seem to assume the lenght be equal to the number of
64 # models.
65 debug -> die( message => "attribute $attribute_string is " .
66 "defined as a " . ref( $attribute ) .
67 " but is neither a scalar or a non-zero size array" );
68 } elsif ( ref( \$attribute ) eq 'SCALAR' or ref( \$attribute ) eq 'REF' ) {
69 # If it is a scalar we copy the scalar value into an array
70 # for each model, with one value for each problem.
71 my @mo_attribute = ();
72 foreach my $model ( @{$self -> {'models'}} ) {
73 my @pr_attribute = ();
74 foreach my $problem ( @{$model -> problems} ) {
75 push( @pr_attribute, $attribute );
77 push( @mo_attribute, \@pr_attribute );
79 $self -> {$attribute_string} = \@mo_attribute;
80 } elsif ( ref( $attribute ) eq 'ARRAY' ) {
81 # If it is an array we check if the it is an array of
82 # scalars or of arrays. If it is of arrays we seem to
83 # assume that the arrays have a length the matches the
84 # number of problems. It it is an array of scalars, we
85 # copy that scalar into arrays for each problem.
86 unless ( ref( \$attribute -> [0] ) eq 'SCALAR' or
87 ( ref( $attribute -> [0] ) eq 'ARRAY' and scalar @{$attribute -> [0]} > 0 ) ) {
88 debug -> die( message => "attribute $attribute is ",
89 "defined as a ",ref( $attribute -> [0] ),
90 "and is neither a scalar or a non-zero size array" );
91 } elsif ( ref(\$attribute -> [0]) eq 'SCALAR' ) {
92 my @mo_attribute = ();
93 foreach my $model ( @{$self -> {'models'}} ) {
94 push( @mo_attribute, $attribute );
96 $self -> {$attribute_string} = \@mo_attribute;
99 } else {
100 # If stratify is not defined we copy an undefined value to
101 # arrays corresponding to models and subproblem. I wonder if
102 # this is really necessary.
103 my @mo_attribute = ();
104 foreach my $model ( @{$self -> {'models'}} ) {
105 my @pr_attribute = ();
106 foreach my $data ( @{$model -> datas} ) {
107 if( $attribute_string eq 'stratify_on' ){
108 push( @pr_attribute, undef );
109 } elsif ( $attribute_string eq 'samples' ){
110 push( @pr_attribute, 200 );
111 } elsif ( $attribute_string eq 'subjects' ){
112 push( @pr_attribute, {'default'=>($data -> count_ind)} );
115 push( @mo_attribute, \@pr_attribute );
117 $self -> {$attribute_string} = \@mo_attribute;
121 end general_pre_fork_setup
123 # }}}
125 # {{{ modelfit_pre_fork_setup
127 start modelfit_pre_fork_setup
129 $self -> general_pre_fork_setup( model_number => $model_number );
131 end modelfit_pre_fork_setup
133 # }}}
135 # {{{ llp_pre_fork_setup
137 start llp_pre_fork_setup
139 $self -> general_pre_fork_setup( model_number => $model_number );
141 end llp_pre_fork_setup
143 # }}}
145 # {{{ modelfit_setup
147 start modelfit_setup
150 my $subm_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
151 $self -> {'threads'} -> [1]:$self -> {'threads'};
152 $self -> general_setup( model_number => $model_number,
153 class => 'tool::modelfit',
154 subm_threads => $subm_threads );
156 end modelfit_setup
158 # }}}
160 # {{{ general_setup
162 start general_setup
165 # If the number of threads are given per tool, e.g. [2,5] meaning 2 threads for
166 # scm and 5 for the modelfit.
167 my $subm_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
168 $self -> {'threads'} -> [1]:$self -> {'threads'};
169 my $own_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
170 $self -> {'threads'} -> [0]:$self -> {'threads'};
171 # More threads than models?
172 my $num = scalar @{$self -> {'models'}};
173 $own_threads = $num if ( $own_threads > $num );
175 # Sub tool threads can be given as scalar or reference to an array?
176 #my $subm_threads = $parm{'subm_threads'};
177 #my $own_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
178 # $self -> {'threads'} -> [0]:$self -> {'threads'};
179 # Group variable names are matched in the model, not the data!
180 my $model = $self -> {'models'} -> [$model_number-1];
181 my @samples = @{$self -> {'samples'} -> [$model_number-1]};
182 my @stratify_on = @{$self -> {'stratify_on'} -> [$model_number-1]};
183 my @subjects = @{$self -> {'subjects'} -> [$model_number-1]};
185 # Check which models that hasn't been run and run them This
186 # will be performed each step but will only result in running
187 # models at the first step, if at all.
189 # If more than one process is used, there is a VERY high risk
190 # of interaction between the processes when creating
191 # directories for model fits. Therefore the {'directory'}
192 # attribute is given explicitly below.
194 # ------------------------ Run original run -------------------------------
196 # {{{ orig run
198 unless ( $model -> is_run ) {
199 my %subargs = ();
200 if ( defined $self -> {'subtool_arguments'} ) {
201 %subargs = %{$self -> {'subtool_arguments'}};
204 if( $self -> {'nonparametric_etas'} or
205 $self -> {'nonparametric_marginals'} ) {
206 $model -> add_nonparametric_code;
209 my $orig_fit = tool::modelfit ->
210 new( reference_object => $self,
211 base_directory => $self -> {'directory'},
212 directory => $self -> {'directory'}.
213 '/orig_modelfit_dir'.$model_number,
214 models => [$model],
215 threads => $subm_threads,
216 parent_threads => $own_threads,
217 parent_tool_id => $self -> {'tool_id'},
218 logfile => undef,
219 raw_results => undef,
220 prepared_models => undef,
221 top_tool => 0,
222 %subargs );
224 ui -> print( category => 'bootstrap',
225 message => 'Executing base model.' );
227 $orig_fit -> run;
231 my $output = $model -> outputs -> [0];
233 # }}} orig run
235 # ------------------------ Print a log-header -----------------------------
237 # {{{ log header
239 my @orig_datas = @{$model -> datas};
240 my @problems = @{$model -> problems};
241 my @new_models;
243 # Print a log-header
244 # Lasse 2005-04-21: The minimization_message print will probably not work anymore
245 open( LOG, ">>".$self -> {'logfile'}[$model_number-1] );
246 my $ui_text = sprintf("%-5s",'RUN').','.sprintf("%20s",'FILENAME ').',';
247 print LOG sprintf("%-5s",'RUN'),',',sprintf("%20s",'FILENAME '),',';
248 foreach my $param ( 'ofv', 'minimization_message', 'covariance_step_successful' ) {
249 my $orig_ests = $model -> outputs -> [0] -> $param;
250 # Loop the problems
251 for ( my $j = 0; $j < scalar @{$orig_ests}; $j++ ) {
252 if ( ref( $orig_ests -> [$j][0] ) ne 'ARRAY' ) {
253 my $label = uc($param)."_".($j+1);
254 $ui_text = $ui_text.sprintf("%12s",$label).',';
255 print LOG sprintf("%12s",$label),',';
256 } else {
257 # Loop the parameter numbers (skip sub problem level)
258 for ( my $num = 1; $num <= scalar @{$orig_ests -> [$j][0]}; $num++ ) {
259 my $label = uc($param).$num."_".($j+1);
260 $ui_text = $ui_text.sprintf("%12s",$label).',';
261 print LOG sprintf("%12s",$label),',';
266 print LOG "\n";
268 # }}} log header
270 # ------------------------ Log original run -------------------------------
272 # {{{ Log original run
274 # Lasse 2005-04-21: The minimization_message print will probably not work anymore
275 open( LOG, ">>".$self -> {'logfile'}[$model_number-1] );
276 $ui_text = sprintf("%5s",'0').','.sprintf("%20s",$model -> filename).',';
277 print LOG sprintf("%5s",'0'),',',sprintf("%20s",$model -> filename),',';
278 foreach my $param ( 'ofv', 'minimization_message', 'covariance_step_successful' ) {
279 my $orig_ests = $model -> outputs -> [0] -> $param;
280 # Loop the problems
281 for ( my $j = 0; $j < scalar @{$orig_ests}; $j++ ) {
282 if ( ref( $orig_ests -> [$j][0] ) ne 'ARRAY' ) {
283 $orig_ests -> [$j][0] =~ s/\n//g;
284 $ui_text = $ui_text.sprintf("%12s",$orig_ests -> [$j][0]).',';
285 print LOG sprintf("%12s",$orig_ests -> [$j][0]),',';
286 } else {
287 # Loop the parameter numbers (skip sub problem level)
288 for ( my $num = 0; $num < scalar @{$orig_ests -> [$j][0]}; $num++ ) {
289 $ui_text = $ui_text.sprintf("%12f",$orig_ests -> [$j][0][$num]).',';
290 print LOG sprintf("%12f",$orig_ests -> [$j][0][$num]),',';
295 print LOG "\n";
297 # }}} Log original run
299 # TODO: In this loop we loose one dimension (problem) in that
300 # all new models with new data sets are put in the same array,
301 # regardless of which problem the initially belonged to. Fix
302 # this.
304 if ( $#orig_datas < 0 ) {
305 debug -> warn( level => 1,
306 message => "No data files to resample from" );
307 } else {
308 debug -> warn( level => 1,
309 message => "Starting bootstrap sampling" );
311 for ( my $i = 1; $i <= scalar @orig_datas; $i++ ) {
312 my $orig_data = $orig_datas[$i-1];
314 if ( $self -> {'drop_dropped'} ) {
315 my $model_copy = $model -> copy( copy_data => 1,
316 filename => "drop_copy_$i.mod",
317 directory => $self -> {'directory'} );
319 $model_copy -> drop_dropped;
320 $model_copy -> _write( write_data => 1 );
321 $model_copy -> datas -> [0] -> flush();
322 $orig_data = $model -> datas -> [0];
323 $model = $model_copy;
326 my ( @seed, $new_datas, $incl_ids, $incl_keys, $new_mod );
328 my $done = ( -e $self -> {'directory'}."/m$model_number/done.$i" ) ? 1 : 0;
330 if ( not $done ) {
331 ui -> print( category => 'bootstrap',
332 message => "Resampling from ".$orig_data -> filename );
334 ( $new_datas, $incl_ids, $incl_keys )
335 = $orig_data -> bootstrap( directory => $self -> {'directory'}.'/m'.$model_number,
336 name_stub => 'bs_pr'.$i,
337 samples => $samples[$i-1],
338 subjects => $subjects[$i-1],
339 stratify_on => $stratify_on[$i-1],
340 target => 'disk',
341 model_ids => $self -> {'prepared_model_ids'} );
344 for ( my $j = 0; $j < $samples[$i-1]; $j++ ) {
345 my ($model_dir, $filename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.$model_number,
346 'bs_pr'.$i.'_'.($j+1).'.mod' );
347 # my ($out_dir, $outfilename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.$model_number ,
348 # '/bs_pr'.$i.'_'.$j.'.lst' );
349 my $prob_copy = Storable::dclone($problems[$i-1]);
350 # $Data::Dumper::Maxdepth = 2;
351 # print Dumper $model; die;
353 $new_mod = model ->
354 new(reference_object => $model,
355 sde => 0,
356 outputs => undef,
357 datas => undef,
358 synced => undef,
359 problems => undef,
360 active_problems => undef,
361 directory => $model_dir,
362 filename => $filename,
363 outputfile => undef,
364 problems => [$prob_copy],
365 extra_files => $model -> extra_files,
366 target => 'disk',
367 ignore_missing_files => 1 );
369 if( $self -> {'shrinkage'} ) {
370 $new_mod -> shrinkage_stats( enabled => 1 );
371 my @problems = @{$new_mod -> problems};
372 for( my $i = 1; $i <= scalar @problems; $i++ ) {
373 $problems[ $i-1 ] -> shrinkage_module -> model( $new_mod );
377 my $model_id = $new_mod -> register_in_database;
378 push( @{$self -> {'prepared_model_ids'}}, $model_id );
380 $self -> register_tm_relation( model_ids => [$model_id],
381 prepared_models => 1 );
383 $new_mod -> datas ( [$new_datas -> [$j]] );
385 if( $self -> {'nonparametric_etas'} or
386 $self -> {'nonparametric_marginals'} ) {
387 $new_mod -> add_nonparametric_code;
390 $new_mod -> update_inits( from_output => $output );
391 $new_mod -> _write;
393 push( @new_models, $new_mod );
395 # Create a checkpoint. Log the samples and individuals.
396 open( DONE, ">".$self -> {'directory'}."/m$model_number/done.$i" ) ;
397 print DONE "Resampling from ",$orig_data -> filename, " performed\n";
398 print DONE "$samples[$i-1] samples\n";
399 while( my ( $strata, $samples ) = each %{$subjects[$i-1]} ) {
400 print DONE "Strata $strata: $samples sample_size\n";
402 print DONE "Included individuals:\n";
403 @seed = random_get_seed;
404 print DONE "seed: @seed\n";
405 for( my $k = 0; $k < scalar @{$incl_ids}; $k++ ) {
406 print DONE join(',',@{$incl_ids -> [$k]}),"\n";
408 print DONE "Included keys:\n";
409 for( my $k = 0; $k < scalar @{$incl_keys}; $k++ ) {
410 print DONE join(',',@{$incl_keys -> [$k]}),"\n";
412 close( DONE );
413 open( INCL, ">".$self -> {'directory'}."included_individuals".$model_number.".csv" ) ;
414 for( my $k = 0; $k < scalar @{$incl_ids}; $k++ ) {
415 print INCL join(',',@{$incl_ids -> [$k]}),"\n";
417 close( INCL );
418 open( KEYS, ">".$self -> {'directory'}."included_keys".$model_number.".csv" ) ;
419 open( SAMPLEKEYS, ">".$self -> {'directory'}."sample_keys".$model_number.".csv" ) ;
420 my $ninds= ($orig_data -> count_ind());
421 for( my $k = 0; $k < scalar @{$incl_keys}; $k++ ) {
422 my %sample_keys;
423 my $sample_size = scalar @{$incl_keys -> [$k]};
424 for ( my $l = 0; $l < $ninds; $l++ ) {
425 $sample_keys{$incl_keys -> [$k][$l]}++;
427 for ( my $l = 0; $l < $ninds; $l++ ) {
428 my $val = defined $sample_keys{$l} ? $sample_keys{$l} : 0;
429 my $extra = ($l == ($ninds-1)) ? "\n" : ',';
430 print SAMPLEKEYS $val,$extra;
432 print KEYS join(',',@{$incl_keys -> [$k]}),"\n";
434 close( KEYS );
435 close( SAMPLEKEYS );
436 } else {
437 ui -> print( category => 'bootstrap',
438 message => "Recreating bootstrap from previous run." );
440 # Recreate the datasets and models from a checkpoint
441 my ($stored_filename, $stored_samples, %stored_subjects);
442 my @seed;
443 my ($stored_filename_found, $stored_samples_found, $stored_subjects_found, $stored_seed_found);
444 open( DONE, $self -> {'directory'}."/m$model_number/done.$i" );
445 while( <DONE> ){
446 if( /^Resampling from (.+) performed$/ ){
447 $stored_filename = $1;
448 $stored_filename_found = 1;
449 next;
451 if( /^(\d+) samples$/ ){
452 ui -> print( category => 'bootstrap',
453 message => "Samples: $1" );
454 $stored_samples = $1;
455 $stored_samples_found = 1;
456 next;
458 if( /^(\d+) subjects$/ ){
459 # Old format (pre 2.2.2)
460 $stored_subjects{'default'} = $1;
461 $stored_subjects_found = 1;
462 next;
464 if( /^Strata (\w+): (\d+) sample_size$/ ){
465 ui -> print( category => 'bootstrap',
466 message => "Strata $1, samples size: $2" );
467 $stored_subjects{$1} = $2;
468 $stored_subjects_found = 1;
469 next;
471 if( /^seed: (\d+) (\d+)$/ ){
472 @seed = ($1, $2);
473 $stored_seed_found = 1;
474 next;
477 if( $stored_filename_found and $stored_samples_found
478 and $stored_subjects_found and $stored_seed_found ){
479 last;
482 close( DONE );
483 unless( $stored_filename_found and $stored_samples_found
484 and $stored_samples_found and $stored_seed_found ){
485 debug -> die( level => 1,
486 message => "The bootstrap/m1/done file could not be parsed." );
489 if ( $stored_samples < $samples[$i-1] ) {
490 debug -> die( message => "The number of samples saved in previous run ($stored_samples) ".
491 "are bigger than the number of samples specified for this run (".
492 $samples[$i-1].")" );
494 while( my ( $strata, $samples ) = each %{$subjects[$i-1]} ) {
495 if ( $stored_subjects{$strata} != $samples ) {
496 debug -> die( message => "The number of individuals sampled i strata $strata ".
497 "in previous run (".
498 $stored_subjects{$strata}.
499 ") does not match the number of individuals specified ".
500 "for this run (".$samples.")" );
503 while( my ( $strata, $samples ) = each %stored_subjects ) {
504 if ( $subjects[$i-1]->{$strata} != $samples ) {
505 debug -> die( message => "The number of individuals sampled i strata $strata ".
506 "in previous run (". $samples .
507 ") does not match the number of individuals specified ".
508 "for this run (".$subjects[$i-1]->{$strata}.")" );
511 # my ( @stored_ids, @stored_keys );
512 # for ( my $k = 4; $k < 4+$stored_samples; $k++ ) {
513 # chomp($rows[$k]);
514 # my @sample_ids = split(',', $rows[$k] );
515 # push( @stored_ids, \@sample_ids );
517 # for ( my $k = 5+$stored_samples; $k < 5+2*$stored_samples; $k++ ) {
518 # chomp($rows[$k]);
519 # my @sample_keys = split(',', $rows[$k] );
520 # push( @stored_keys, \@sample_keys );
522 # @seed = split(' ',$rows[5+2*$stored_samples]);
523 # $incl_ids = \@stored_ids;
524 # $incl_keys = \@stored_keys;
525 # shift( @seed ); # get rid of 'seed'-word
526 # Reinitiate the model objects
527 for ( my $j = 1; $j <= $samples[$i-1]; $j++ ) {
528 my ($model_dir, $filename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.
529 $model_number,
530 'bs_pr'.$i.'_'.$j.'.mod' );
531 # my ($out_dir, $outfilename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.
532 # $model_number,
533 # '/bs_pr'.$i.'_'.$j.'.lst' );
534 $new_mod = model ->
535 new( directory => $model_dir,
536 filename => $filename,
537 # outputfile => $outfilename,
538 extra_files => $model -> extra_files,
539 target => 'disk',
540 ignore_missing_files => 1 );
541 # $new_mod -> target( 'disk' );
542 push( @new_models, $new_mod );
543 # print "$j\n";
544 # print Dumper $new_mod;
545 # sleep(10);
547 random_set_seed( @seed );
548 ui -> print( category => 'bootstrap',
549 message => "Using $stored_samples previously resampled ".
550 "bootstrap sets from $stored_filename" )
551 unless $self -> {'parent_threads'} > 1;
553 # push( @{$self -> {'included_inds'} -> [$model_number-1]}, $incl_ids );
554 # push( @{$self -> {'included_keys'} -> [$model_number-1]}, $incl_keys );
556 $self -> {'prepared_models'}[$model_number-1]{'own'} = \@new_models;
558 # --------------------- Create the sub tools ------------------------------
560 # {{{ sub tools
562 my $subdir = $class;
563 $subdir =~ s/tool:://;
564 my @subtools = @{$self -> {'subtools'}};
565 shift( @subtools );
566 my %subargs = ();
567 if ( defined $self -> {'subtool_arguments'} ) {
568 %subargs = %{$self -> {'subtool_arguments'}};
570 push( @{$self -> {'tools'}},
571 $class ->
572 new( reference_object => $self,
573 models => \@new_models,
574 threads => $subm_threads,
575 directory => $self -> {'directory'}.'/'.$subdir.'_dir'.$model_number,
576 _raw_results_callback => $self ->
577 _modelfit_raw_results_callback( model_number => $model_number ),
578 subtools => \@subtools,
579 parent_threads => $own_threads,
580 parent_tool_id => $self -> {'tool_id'},
581 logfile => $self -> {'logfile'}[$model_number-1],
582 raw_results => undef,
583 prepared_models => undef,
584 top_tool => 0,
585 %subargs ) );
587 # ( clean => $self -> {'clean'},
588 # base_directory => $self -> {'directory'},
589 # compress => $self -> {'compress'},
590 # directory => $self -> {'directory'}.'/'.$subdir.'_dir'.$model_number,
591 # drop_dropped => $self -> {'drop_dropped'},
592 # wrap_data => $self -> {'wrap_data'},
593 # run_on_nordugrid => $self -> {'run_on_nordugrid'},
594 # cpu_time => $self -> {'cpu_time'},
595 # run_on_lsf => $self -> {'run_on_lsf'},
596 # no_remote_execution => $self -> {'no_remote_execution'},
597 # no_remote_compile => $self -> {'no_remote_compile'},
598 # lsf_queue => $self -> {'lsf_queue'},
599 # lsf_options => $self -> {'lsf_options'},
600 # lsf_job_name => $self -> {'lsf_job_name'},
601 # lsf_project_name => $self -> {'lsf_project_name'},
603 # parent_tool_id => $self -> {'tool_id'},
605 # models => \@new_models,
606 # grid_batch_size => $self -> {'grid_batch_size'},
607 # nm_version => $self -> {'nm_version'},
608 # picky => $self -> {'picky'},
609 # retries => $self -> {'retries'},
610 # tweak_inits => $self -> {'tweak_inits'},
611 # handle_maxevals => $self -> {'handle_maxevals'},
612 # remove_temp_files => $self -> {'remove_temp_files'},
613 # _raw_results_callback => $self ->
614 # _modelfit_raw_results_callback( model_number => $model_number ),
615 # threads => $subm_threads,
616 # subtools => \@subtools,
617 # logfile => $self -> {'logfile'}[$model_number-1],
618 # parent_threads => $own_threads,
619 # %subargs ) );
621 # }}} sub tools
624 end general_setup
626 # }}}
628 # {{{ llp_setup
629 start llp_setup
631 my @subm_threads;
632 if (ref( $self -> {'threads'} ) eq 'ARRAY') {
633 @subm_threads = @{$self -> {'threads'}};
634 unshift(@subm_threads);
635 } else {
636 @subm_threads = ($self -> {'threads'});
638 $self -> general_setup( model_number => $model_number,
639 class => 'tool::llp',
640 subm_threads => \@subm_threads );
642 end llp_setup
643 # }}} llp_setup
645 # {{{ _jackknife_raw_results_callback
647 start _jackknife_raw_results_callback
649 # Use the bootstrap's raw_results file.
650 my ($dir,$file) =
651 OSspecific::absolute_path( $self -> {'directory'},
652 $self -> {'raw_results_file'}[$model_number-1] );
653 my ($dir,$nonp_file) =
654 OSspecific::absolute_path( $self -> {'directory'},
655 $self -> {'raw_nonp_file'}[$model_number-1] );
656 $subroutine = sub {
657 my $jackknife = shift;
658 my $modelfit = shift;
659 $modelfit -> raw_results_file( $dir.$file );
660 $modelfit -> raw_nonp_file( $dir.$nonp_file );
661 $modelfit -> raw_results_append( 1 );
662 my ( @new_header, %param_names );
663 foreach my $row ( @{$modelfit -> {'raw_results'}} ) {
664 unshift( @{$row}, 'jackknife' );
666 $modelfit -> {'raw_results_header'} = [];
668 return $subroutine;
670 end _jackknife_raw_results_callback
672 # }}} _jackknife_raw_results_callback
674 # {{{ _modelfit_raw_results_callback
676 start _modelfit_raw_results_callback
678 # Use the bootstrap's raw_results file.
679 my ($dir,$file) =
680 OSspecific::absolute_path( $self -> {'directory'},
681 $self -> {'raw_results_file'}[$model_number-1] );
682 my ($dir,$nonp_file) =
683 OSspecific::absolute_path( $self -> {'directory'},
684 $self -> {'raw_nonp_file'}[$model_number-1] );
685 my $orig_mod = $self -> {'models'}[$model_number-1];
686 my $type = $self -> {'type'};
687 $subroutine = sub {
688 my $modelfit = shift;
689 my $mh_ref = shift;
690 my %max_hash = %{$mh_ref};
691 $modelfit -> raw_results_file( $dir.$file );
692 $modelfit -> raw_nonp_file( $dir.$nonp_file );
694 # The prepare_raw_results in the modelfit will fix the
695 # raw_results for each bootstrap sample model, we must add
696 # the result for the original model.
698 my ($raw_results_row,$row_structure) = $self -> create_raw_results_rows( max_hash => $mh_ref,
699 model => $orig_mod );
701 unshift( @{$modelfit -> {'raw_results'}}, @{$raw_results_row} );
703 # {{{ New header
705 my ( @new_header, %param_names );
706 my @params = ( 'theta', 'omega', 'sigma' );
707 foreach my $param ( @params ) {
708 my $labels = $orig_mod -> labels( parameter_type => $param );
709 $param_names{$param} = $labels -> [0] if ( defined $labels );
712 foreach my $name ( @{$modelfit -> {'raw_results_header'}} ) {
713 my $success;
714 # foreach my $param ( @params, 'eigen', 'shrinkage_etas', 'shrinkage_wres' ) {
715 foreach my $param ( @params ) {
716 if ( $name eq $param ){
717 if( defined $param_names{$name} ) {
718 push( @new_header , @{$param_names{$name}} );
719 } else {
720 for ( my $i = 1; $i <= $max_hash{ $name }; $i++ ) {
721 push ( @new_header, uc(substr($name,0,2)).$i );
724 $success = 1;
725 last;
726 } elsif ( $name eq 'se'.$param ) {
727 if( defined $param_names{$param} ) {
728 foreach my $pname ( @{$param_names{$param}} ) {
729 push( @new_header , 'se'.$pname );
731 } else {
732 for ( my $i = 1; $i <= $max_hash{ $param }; $i++ ) {
733 push ( @new_header, 'se'.uc(substr($param,0,2)).$i );
736 $success = 1;
737 last;
740 unless( $success ){
741 push( @new_header, $name );
745 $modelfit -> {'raw_results_header'} = \@new_header;
747 if ( $type eq 'bca' ) {
748 foreach my $row ( @{$modelfit -> {'raw_results'}} ) {
749 unshift( @{$row}, 'bootstrap' );
751 unshift( @{$modelfit -> {'raw_results_header'}}, 'method' );
754 @{$self -> {'raw_results_header'}} = @{$modelfit -> {'raw_results_header'}};
756 # }}} New header
759 return $subroutine;
761 end _modelfit_raw_results_callback
763 # }}} _modelfit_raw_results_callback
765 # {{{ modelfit_analyze
767 start modelfit_analyze
769 my @params = @{$self -> {'parameters'}};
770 my @diagnostic_params = @{$self -> {'diagnostic_parameters'}};
771 my ( @print_order, @calculation_order );
773 if ( $self -> {'type'} eq 'bca' ) {
775 # -------------------------- BCa method ---------------------------------
777 # {{{ BCa
779 @calculation_order = @{$self -> {'bca_calculation_order'}};
780 @print_order = @{$self -> {'bca_print_order'}};
781 my $jk_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
782 $self -> {'threads'} -> [1]:$self -> {'threads'};
783 my $done = ( -e $self -> {'directory'}."/jackknife_done.$model_number" ) ? 1 : 0;
784 if ( not $done ) {
786 # {{{ Create Jackknife
788 ui -> print( category => 'bootstrap',
789 message => "Running a Jackknife for the BCa estimates" );
790 $self -> {'jackknife'} = tool::cdd::jackknife ->
791 new( models => [$self -> models -> [$model_number -1]],
792 case_columns => $self -> models -> [$model_number -1]
793 -> datas -> [0] -> idcolumn,
794 _raw_results_callback => $self ->
795 _jackknife_raw_results_callback( model_number => $model_number ),
796 nm_version => $self -> {'nm_version'},
797 parent_tool_id => $self -> {'tool_id'},
798 threads => $jk_threads,
799 bca_mode => 1,
800 shrinkage => $self -> {'shrinkage'},
801 nonparametric_marginals => $self -> {'nonparametric_marginals'},
802 nonparametric_etas => $self -> {'nonparametric_etas'},
803 adaptive => $self -> {'adaptive'},
804 rerun => $self -> {'rerun'},
805 verbose => $self -> {'verbose'},
806 cross_validate => 0 );
807 # Create a checkpoint. Log the samples and individuals.
808 open( DONE, ">".$self -> {'directory'}."/jackknife_done.$model_number" ) ;
809 print DONE "Jackknife directory:\n";
810 print DONE $self -> {'jackknife'} -> directory,"\n";
811 my @seed = random_get_seed;
812 print DONE "seed: @seed\n";
813 close( DONE );
815 # }}} Create Jackknife
817 } else {
819 # {{{ Recreate Jackknife
821 open( DONE, $self -> {'directory'}."/jackknife_done.$model_number" );
822 my @rows = <DONE>;
823 close( DONE );
824 my ( $stored_directory ) = $rows[1];
825 chomp( $stored_directory );
826 if ( not -e $stored_directory ) {
827 debug -> die( message => "The Jackknife directory ".$stored_directory.
828 "indicated by ".$self -> {'directory'}.
829 "/jackknife_done.$model_number".
830 " from the old bootstrap run in ".
831 $self -> {'directory'}." does not exist" );
833 my @seed = split(' ',$rows[2]);
834 shift( @seed ); # get rid of 'seed'-word
835 $self -> {'jackknife'} = tool::cdd::jackknife ->
836 new( models => [$self -> models -> [$model_number -1]],
837 case_columns => $self -> models -> [$model_number -1]
838 -> datas -> [0] -> idcolumn,
839 _raw_results_callback => $self ->
840 _jackknife_raw_results_callback( model_number => $model_number ),
841 threads => $jk_threads,
842 parent_tool_id => $self -> {'tool_id'},
843 directory => $stored_directory,
844 bca_mode => 1,
845 shrinkage => $self -> {'shrinkage'},
846 nm_version => $self -> {'nm_version'},
847 nonparametric_marginals => $self -> {'nonparametric_marginals'},
848 nonparametric_etas => $self -> {'nonparametric_etas'},
849 adaptive => $self -> {'adaptive'},
850 rerun => $self -> {'rerun'},
851 verbose => $self -> {'verbose'},
852 cross_validate => 0 );
853 random_set_seed( @seed );
854 ui -> print( category => 'bootstrap',
855 message => "Restarting BCa Jackknife from ".
856 $stored_directory )
857 unless $self -> {'parent_threads'} > 1;
859 # }}} Recreate Jackknife
863 $self -> {'jackknife'} -> run;
864 $self -> {'jackknife_results'} = $self -> {'jackknife'} -> {'results'};
865 $self -> {'jackknife_prepared_models'} = $self -> {'jackknife'} -> {'prepared_models'};
867 $self -> {'jackknife_raw_results'}[$model_number-1] =
868 $self -> {'jackknife'} -> raw_results;
869 # $self -> {'jackknife'} -> raw_results -> [$model_number-1];
870 # $Data::Dumper::Maxdepth = 0;
871 # print Dumper $self -> {'jackknife_raw_results'};
873 # }}} BCa
875 } else {
876 @calculation_order = @{$self -> {'calculation_order'}};
877 @print_order = @{$self -> {'print_order'}};
878 $self -> {'bootstrap_raw_results'}[$model_number-1] =
879 $self -> {'tools'} -> [0] -> raw_results;
880 # $self -> {'tools'} -> [0] -> raw_results -> [$model_number-1];
882 unless( ref($self -> {'raw_results_header'}[0]) eq 'ARRAY' ) {
883 my $tmp = $self -> {'raw_results_header'};
884 $self -> {'raw_results_header'} = [];
885 $self -> {'raw_results_header'}[$model_number-1] = $tmp;
888 my @param_names = @{$self -> models -> [$model_number -1] -> outputs -> [0] -> labels};
889 my ( @diagnostic_names, @tmp_names );
890 foreach my $param ( @diagnostic_params ) {
891 push( @tmp_names, $param );
892 $tmp_names[$#tmp_names] =~ s/_/\./g;
894 for ( my $i = 0; $i <= $#param_names; $i++ ) {
895 unshift( @{$param_names[$i]}, 'OFV' );
896 push( @{$diagnostic_names[$i]}, @tmp_names );
899 end modelfit_analyze
901 # }}}
903 # {{{ prepare_results
905 start prepare_results
907 # {{{ definitions
909 # The '3' is there to skip model, problem and subproblem numbers
910 my $skip_mps = 3;
911 my ( @calculation_order, @print_order, %diag_idx );
912 if ( $self -> {'type'} eq 'bca' ) {
913 @calculation_order = @{$self -> {'bca_calculation_order'}};
914 @print_order = @{$self -> {'bca_print_order'}};
915 } else {
916 @calculation_order = @{$self -> {'calculation_order'}};
917 @print_order = @{$self -> {'print_order'}};
919 if ( $self -> {'type'} eq 'bca' ) {
920 # $self -> read_raw_results();
921 $self -> bca_read_raw_results();
922 # if ( not defined $self -> {'bootstrap_raw_results'} );
923 } else {
924 # if ( not defined $self -> {'raw_results'} ) {
925 $self -> read_raw_results();
926 $self -> {'bootstrap_raw_results'} = $self -> {'raw_results'};
930 for ( my $i = 0; $i < scalar @{$self -> {'diagnostic_parameters'}}; $i++ ) {
931 $diag_idx{$self -> {'diagnostic_parameters'} -> [$i]} = $i + $skip_mps;
934 # }}} definitions
936 # --------------------- Get data from raw_results -------------------------
938 # Divide the data into diagnostics and estimates. Included in estimates are
939 # the parametric estimates, the standard errors of these, the nonparametric
940 # estimates, the shrinkage in eta and the shrinkage in wres
941 # The diagnostics end up in {'bootstrap_diagnostics'} and
942 # {'jackknife_diagnostics'}. The estimates in {'bootstrap_estimates'} and
943 # {'jackknife_estimates'}.
944 # The number of runs that are selected for calculation of the results is
945 # saved.
947 # {{{ Get the data from the runs
949 foreach my $tool ( 'bootstrap', 'jackknife' ) {
950 if ( defined $self -> {$tool.'_raw_results'} ) {
951 for ( my $i = 0; $i < scalar @{$self->{$tool.'_raw_results'}}; $i++ ) { # All models
953 # {{{ Get the number of columns with estimates
955 my $cols_orig = 0;
956 foreach my $param ( 'theta', 'omega', 'sigma' ) {
957 my $labels =
958 $self -> {'models'} -> [$i] -> labels( parameter_type => $param );
959 # we can't use labels directly since different models may have different
960 # labels (still within the same modelfit)
961 my $numpar = scalar @{$labels -> [0]} if ( defined $labels and
962 defined $labels -> [0] );
963 $cols_orig += ( $numpar*3 ); # est + SE + eigen values
965 # nonparametric omegas and shrinkage
966 my $nomegas = $self -> {'models'} -> [$i] -> nomegas;
967 my $numpar = $nomegas -> [0];
969 # shrinkage omega + wres shrinkage
970 $cols_orig += $numpar + 1;
971 # $cols_orig += ($numpar*($numpar+1)/2 + $numpar + 1);
973 $cols_orig++; # OFV
975 # }}} Get the number of columns with estimates
977 # {{{ Loop, choose and set diagnostics and estimates
979 my %return_section;
980 $return_section{'name'} = 'Warnings';
981 my ( $skip_term, $skip_cov, $skip_warn, $skip_bound );
982 my $included = 0;
983 # print Dumper $self->{$tool.'_raw_results'};
984 for ( my $j = 0; $j < scalar @{$self->{$tool.'_raw_results'}->[$i]}; $j++ ) { # orig model + prepared_models
985 my $columns = scalar @{$self->{$tool.'_raw_results'}->[$i][$j]};
987 # ----------------------- Diagnostics -----------------------------
989 for ( my $m = $skip_mps; $m < scalar @{$self -> {'diagnostic_parameters'}} + $skip_mps; $m++ ) { # value
990 $self->{$tool.'_diagnostics'}->[$i][$j][$m-$skip_mps] =
991 $self->{$tool.'_raw_results'}->[$i][$j][$m];
993 my $use_run = 1;
994 if ( $self -> {'skip_minimization_terminated'} and
995 ( not defined $self->{$tool.'_raw_results'}->
996 [$i][$j][$diag_idx{'minimization_successful'}]
997 or not $self->{$tool.'_raw_results'}->
998 [$i][$j][$diag_idx{'minimization_successful'}] ) ) {
999 $skip_term++;
1000 $use_run = 0;
1001 } elsif ( $self -> {'skip_covariance_step_terminated'} and not
1002 $self->{$tool.'_raw_results'}->
1003 [$i][$j][$diag_idx{'covariance_step_successful'}] ) {
1004 $skip_cov++;
1005 $use_run = 0;
1006 } elsif ( $self -> {'skip_with_covstep_warnings'} and
1007 $self->{$tool.'_raw_results'}->
1008 [$i][$j][$diag_idx{'covariance_step_warnings'}] ) {
1009 $skip_warn++;
1010 $use_run = 0;
1011 } elsif ( $self -> {'skip_estimate_near_boundary'} and
1012 $self->{$tool.'_raw_results'}->
1013 [$i][$j][$diag_idx{'estimate_near_boundary'}] ) {
1014 $skip_bound++;
1015 $use_run = 0;
1018 # ------------------------ Estimates ------------------------------
1020 if( $use_run ) {
1021 for ( my $m = scalar @{$self -> {'diagnostic_parameters'}} + $skip_mps; $m < $columns; $m++ ) { # value
1022 my $val = $self->{$tool.'_raw_results'}->[$i][$j][$m];
1023 $self->{$tool.'_estimates'}->
1024 [$i][$included][$m-(scalar @{$self -> {'diagnostic_parameters'}} + $skip_mps)] = $val;
1026 $included++;
1030 # }}} Loop, choose and set diagnostics and estimates
1032 # {{{ push #runs to results
1034 if ( defined $skip_term ) {
1035 push( @{$return_section{'values'}}, "$skip_term runs with miminization ".
1036 "terminated were skipped when calculating the $tool results" );
1038 if ( defined $skip_cov ) {
1039 push( @{$return_section{'values'}}, "$skip_cov runs with aborted ".
1040 "covariance steps were skipped when calculating the $tool results" );
1042 if ( defined $skip_warn ) {
1043 push( @{$return_section{'values'}}, "$skip_warn runs with errors from ".
1044 "the covariance step were skipped when calculating the $tool results" );
1046 if ( defined $skip_bound ) {
1047 push( @{$return_section{'values'}}, "$skip_bound runs with estimates ".
1048 "near a boundary were skipped when calculating the $tool results" );
1050 $return_section{'labels'} = [];
1051 push( @{$self -> {'results'}[$i]{'own'}},\%return_section );
1053 # }}} push #runs to results
1059 # $Data::Dumper::Maxdepth = 5;
1060 # die Dumper $self -> {'bootstrap_diagnostics'};
1062 # }}} Get the data from the runs
1064 # ---------------------- Calculate the results ----------------------------
1066 # {{{ Result calculations
1068 for ( my $i = 0; $i < scalar @{$self -> {'bootstrap_raw_results'}} ; $i++ ) { # All models
1070 my $mps_offset = $self -> {'bca'} ? 4 : 3; # <- this is the offset to
1071 # diagonstic_parameters,
1072 # which is one more for
1073 # the method column added
1074 # with a bca run.
1076 my @param_names = @{$self -> {'raw_results_header'}[$i]}[($mps_offset + scalar @{$self -> {'diagnostic_parameters'}}) .. (scalar @{$self -> {'raw_results_header'}[$i]} - 1)];
1077 my ( @diagnostic_names, @tmp_names );
1078 foreach my $param ( @{$self -> {'diagnostic_parameters'}} ) {
1079 push( @tmp_names, $param );
1080 $tmp_names[$#tmp_names] =~ s/_/\./g;
1083 @diagnostic_names = @tmp_names;
1084 foreach my $result_type ( @calculation_order ) {
1085 my @names = $result_type eq 'diagnostic_means' ?
1086 @diagnostic_names : @param_names;
1087 my $calc = 'calculate_'.$result_type;
1088 $self -> $calc( model_number => ($i+1),
1089 parameter_names => \@names );
1091 foreach my $result_type ( @print_order ) {
1092 my $name = $result_type;
1093 $name =~ s/_/\./g;
1094 my %return_section;
1095 $return_section{'name'} = $name;
1096 $return_section{'values'} = $self -> {$result_type} -> [$i];
1097 $return_section{'labels'} = $self -> {$result_type.'_labels'} -> [$i];
1098 push( @{$self -> {'results'}[$i]{'own'}},\%return_section );
1102 # }}} Result calculations
1105 end prepare_results
1107 # }}} prepare_results
1109 # {{{ print_summary
1111 start print_summary
1113 sub acknowledge {
1114 my $name = shift;
1115 my $outcome = shift;
1116 my $file = shift;
1117 my $l = (7 - length( $outcome ))/2;
1118 my $c_num = '00';
1119 $c_num = '07' if ( $outcome eq 'OK' );
1120 $c_num = '13' if ( $outcome eq 'WARNING' );
1121 $c_num = '05' if ( $outcome eq 'ERROR' );
1122 # my $text = sprintf( "%-66s%2s%7s%-5s\n\n", $name, '[ ', $outcome. ' ' x $l, ' ]' );
1123 my $text = sprintf( "%-66s%2s%7s%-5s", $name, '[ ', $outcome. ' ' x $l, ' ]' );
1124 # cprintf( "%-66s%2s\x03$c_num%7s\x030%-5s", $name, '[ ', $outcome. ' ' x $l, ' ]' );
1125 # my $text = cprintf( "%-66s%2s\x03$c_num%7s\x030%-5s", $name, '[ ', $outcome. ' ' x $l, ' ]' );
1126 print $text, "\n\n";
1127 print $file $text if defined $file;
1130 sub sum {
1131 my $arr = shift;
1132 my $sum = 0;
1133 for ( @{$arr} ) {
1134 $sum += $_;
1136 return $sum;
1139 my $diag_number = scalar @{$self -> {'diagnostic_parameters'}} - 1;
1140 my %diag_idxs;
1141 for ( my $i = 0; $i <= $diag_number; $i++ ) {
1142 $diag_idxs{$self -> {'diagnostic_parameters'} -> [$i]} = $i;
1145 open( my $log, ">test.log" );
1146 for ( my $i = 0; $i < scalar @{$self -> {'bootstrap_raw_results'}} ; $i++ ) { # All models
1147 print "MODEL ",$i+1,"\n" if ( scalar @{$self -> {'bootstrap_raw_results'}} > 1 );
1148 if ( $self -> {'diagnostic_means'} ->
1149 [$i][0][$diag_idxs{'minimization_successful'}] >
1150 $self -> {'minimization_successful_limit'} ) {
1151 acknowledge( 'Successful minimization ratio = '.
1152 $self -> {'diagnostic_means'} ->
1153 [$i][0][$diag_idxs{'minimization_successful'}], 'OK', $log );
1154 } else {
1155 acknowledge( 'Termination problems in '.
1156 sprintf("%4.2f", (100-($self -> {'diagnostic_means'} ->
1157 [$i][0][$diag_idxs{'minimization_successful'}]*100)))
1158 .'% of the bootstrap runs', 'WARNING', $log );
1161 if ( $self -> {'diagnostic_means'} ->
1162 [$i][0][$diag_idxs{'covariance_step_successful'}] >
1163 $self -> {'covariance_step_successful_limit'} ) {
1164 acknowledge( 'Successful covariance step ratio = '.$self -> {'diagnostic_means'} ->
1165 [$i][0][$diag_idxs{'covariance_step_successful'}], 'OK', $log );
1166 } else {
1167 acknowledge( 'Covariance step terminated in '.
1168 sprintf("%4.2f", (100-($self -> {'diagnostic_means'} ->
1169 [$i][0][$diag_idxs{'covariance_step_successful'}]*100)))
1170 .'% of the bootstrap runs', 'WARNING', $log );
1173 if ( $self -> {'diagnostic_means'} ->
1174 [$i][0][$diag_idxs{'covariance_step_warnings'}] <
1175 $self -> {'covariance_step_warnings_limit'} ) {
1176 acknowledge( 'Covariance step warnings ratio = '.$self -> {'diagnostic_means'} ->
1177 [$i][0][$diag_idxs{'covariance_step_warnings'}], 'OK', $log );
1178 } else {
1179 acknowledge( 'Covariance step warnings in '.
1180 sprintf("%4.2f", ($self -> {'diagnostic_means'} ->
1181 [$i][0][$diag_idxs{'covariance_step_warnings'}]*100))
1182 .'% of the bootstrap runs', 'WARNING', $log );
1185 if ( $self -> {'diagnostic_means'} ->
1186 [$i][0][$diag_idxs{'estimate_near_boundary'}] <
1187 $self -> {'estimate_near_boundary_limit'} ) {
1188 acknowledge( 'Estimate near boundary ratio = '.$self -> {'diagnostic_means'} ->
1189 [$i][0][$diag_idxs{'estimate_near_boundary'}], 'OK', $log );
1190 } else {
1191 acknowledge( 'Estimate near boundary found in '.
1192 sprintf("%4.2f", ($self -> {'diagnostic_means'} ->
1193 [$i][0][$diag_idxs{'estimate_near_boundary'}]*100))
1194 .'% of the bootstrap runs', 'WARNING', $log );
1196 my $sum = sum( $self -> {'within_se_confidence_intervals'}->
1197 [$i]{$self -> {'se_confidence_intervals_level'}} );
1198 if ( not defined $sum or $sum < 1 ) {
1199 acknowledge( 'No '.(100-$self -> {'se_confidence_intervals_level'}).
1200 '% confidence intervals based on the'.
1201 ' bootstrap standard errors include '.
1202 $self -> {'se_confidence_intervals_check'}, 'OK', $log );
1203 } else {
1204 acknowledge( "$sum ".(100-$self -> {'se_confidence_intervals_level'}).
1205 "% confidence intervals based on the bootstrap".
1206 " SE's include ".
1207 $self -> {'se_confidence_intervals_check'}, 'WARNING', $log );
1208 my $found =
1209 scalar @{$self -> {'within_se_confidence_intervals'}->
1210 [$i]{$self -> {'se_confidence_intervals_level'}}} - 1;
1211 for ( my $l = 0; $l <= $found ; $l++ ) {
1212 if ( $self -> {'within_se_confidence_intervals'}->
1213 [$i]{$self -> {'se_confidence_intervals_level'}}[$l] ) {
1214 printf( "\t%-20s\n",
1215 $self -> {'standard_error_confidence_intervals_labels'} ->
1216 [$i][1][$l] );
1217 print $log
1218 sprintf( "\t%-20s\n",
1219 $self -> {'standard_error_confidence_intervals_labels'} ->
1220 [$i][1][$l] );
1221 print "\n" if ( $l == $found );
1222 print $log "\n" if ( $l == $found );
1227 my $sum = sum( $self -> {'large_bias'}-> [$i][0] );
1228 if ( not defined $sum or $sum < 1 ) {
1229 acknowledge( 'No bias larger than '.
1230 ($self -> {'large_bias_limit'}*100).'% found', 'OK', $log );
1231 } else {
1232 acknowledge( "$sum estimates were found to have a relative bias > ".
1233 $self -> {'large_bias_limit'}, 'WARNING', $log );
1234 my $found =
1235 scalar @{$self -> {'large_bias'}->
1236 [$i][0]} - 1;
1237 for ( my $l = 0; $l <= $found ; $l++ ) {
1238 if ( $self -> {'large_bias'}->
1239 [$i][0][$l] ) {
1240 printf( "\t%-20s%3.2f %\n", $self -> {'bias_labels'} ->
1241 [$i][1][$l], ($self -> {'bias'} -> [$i][0][$l]/
1242 $self->{'bootstrap_estimates'} -> [$i][0][$l])*100 );
1243 # print $log
1244 # sprintf( "\t%-20s\n",
1245 # $self -> {'percentile_confidence_intervals_labels'} ->
1246 # [$i][1][$l] );
1247 print "\n" if ( $l == $found );
1248 print $log "\n" if ( $l == $found );
1253 if ( $self -> {'type'} eq 'bca' ) {
1254 my $sum = sum( $self -> {'within_bca_confidence_intervals'}->
1255 [$i]{$self -> {'bca_confidence_intervals_level'}} );
1256 if ( not defined $sum or $sum < 1 ) {
1257 acknowledge( 'No '.(100-$self -> {'bca_confidence_intervals_level'}).
1258 '% BCa confidence intervals include '.
1259 $self -> {'bca_confidence_intervals_check'}, 'OK', $log );
1260 } else {
1261 acknowledge( "$sum ".(100-$self -> {'bca_confidence_intervals_level'}).
1262 "% BCa confidence intervals include ".
1263 $self -> {'bca_confidence_intervals_check'}, 'WARNING', $log );
1264 my $found =
1265 scalar @{$self -> {'within_bca_confidence_intervals'}->
1266 [$i]{$self -> {'bca_confidence_intervals_level'}}} - 1;
1267 for ( my $l = 0; $l <= $found ; $l++ ) {
1268 if ( $self -> {'within_bca_confidence_intervals'}->
1269 [$i]{$self -> {'bca_confidence_intervals_level'}}[$l] ) {
1270 printf( "\t%-20s\n",
1271 $self -> {'bca_confidence_intervals_labels'} ->
1272 [$i][1][$l] );
1273 print $log
1274 sprintf( "\t%-20s\n",
1275 $self -> {'bca_confidence_intervals_labels'} ->
1276 [$i][1][$l] );
1277 print "\n" if ( $l == $found );
1278 print $log "\n" if ( $l == $found );
1282 } else {
1283 my $sum = sum( $self -> {'within_percentiles'}->
1284 [$i]{$self -> {'percentile_confidence_intervals_level'}} );
1285 if ( not defined $sum or $sum < 1 ) {
1286 acknowledge( 'No '.(100-$self -> {'percentile_confidence_intervals_level'}).
1287 '% confidence intervals based on the'.
1288 ' bootstrap percentiles include '.
1289 $self -> {'percentile_confidence_intervals_check'}, 'OK', $log );
1290 } else {
1291 acknowledge( "$sum ".(100-$self -> {'percentile_confidence_intervals_level'}).
1292 "% confidence intervals based on the percentiles".
1293 " include ".
1294 $self -> {'percentile_confidence_intervals_check'}, 'WARNING', $log );
1295 my $found =
1296 scalar @{$self -> {'within_percentiles'}->
1297 [$i]{$self -> {'percentile_confidence_intervals_level'}}} - 1;
1298 for ( my $l = 0; $l <= $found ; $l++ ) {
1299 if ( $self -> {'within_percentiles'}->
1300 [$i]{$self -> {'percentile_confidence_intervals_level'}}[$l] ) {
1301 printf( "\t%-20s\n",
1302 $self -> {'percentile_confidence_intervals_labels'} ->
1303 [$i][1][$l] );
1304 print $log
1305 sprintf( "\t%-20s\n",
1306 $self -> {'percentile_confidence_intervals_labels'} ->
1307 [$i][1][$l] );
1308 print "\n" if ( $l == $found );
1309 print $log "\n" if ( $l == $found );
1315 close ( $log );
1317 end print_summary
1319 # }}} print_summary
1321 # {{{ bca_read_raw_results
1323 start bca_read_raw_results
1325 $self -> {'raw_results_header'} = [];
1326 for ( my $i = 1; $i <= scalar @{$self->{'models'}}; $i++ ) { # All models
1327 if ( -e $self -> {'directory'}.'raw_results'.$i.'.csv' ) {
1328 open( RRES, $self -> {'directory'}.'raw_results'.$i.'.csv' );
1329 my @file = <RRES>;
1330 close( RRES );
1331 map { chomp; my @tmp = split(',',$_); $_ = \@tmp } @file ;
1333 my $header = shift @file;
1335 # Get rid of 'method' column
1336 my $cols = scalar(@{$header})-1;
1337 @{$self -> {'raw_results_header'}[$i-1]} = @{$header}[1..$cols];
1338 $self -> {'raw_results'} -> [$i-1] = \@file;
1339 for( my $j = 0; $j <= $#file; $j++ ) {
1340 if ( $file[$j][0] eq 'jackknife' ) {
1341 shift( @{$file[$j]} );
1342 # $self -> {'jackknife_raw_results'}[$i-1] = \@file;
1343 push( @{$self -> {'jackknife_raw_results'}[$i-1]}, $file[$j]);
1344 } else {
1345 shift( @{$file[$j]} );
1346 # $self -> {'bootstrap_raw_results'}[$i-1] = \@file;
1347 push( @{$self -> {'bootstrap_raw_results'}[$i-1]}, $file[$j] );
1353 end bca_read_raw_results
1355 # }}} bca_read_raw_results
1357 # {{{ calculate_diagnostic_means
1359 start calculate_diagnostic_means
1361 my ( @sum, @diagsum, %diag_idx );
1362 for ( my $i = 0; $i < scalar @{$self -> {'diagnostic_parameters'}}; $i++ ) {
1363 $diag_idx{$self -> {'diagnostic_parameters'} -> [$i]} = $i;
1366 my $def = 0;
1367 # Prepared model, skip the first (the original)
1368 for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_diagnostics'} ->
1369 [$model_number-1]}; $k++ ) {
1370 # Diagnostics
1371 if( defined $self -> {'bootstrap_diagnostics'} ->
1372 [$model_number-1][$k] ) {
1373 $def++;
1374 for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_diagnostics'} ->
1375 [$model_number-1][$k]}; $l++ ) {
1376 $sum[$l] += $self -> {'bootstrap_diagnostics'} ->
1377 [$model_number-1][$k][$l];
1382 # divide by the number of bootstrap samples (-1 to get rid of the original
1383 # model) The [0] in the index is there to indicate the 'model' level. Mostly
1384 # used for printing
1385 for ( my $l = 0; $l <= $#sum; $l++ ) {
1386 if( $l == $diag_idx{'significant_digits'} ) {
1387 $self -> {'diagnostic_means'} -> [$model_number-1][0][$l] =
1388 $sum[$l] / $def;
1389 } else {
1390 $self -> {'diagnostic_means'} -> [$model_number-1][0][$l] =
1391 $sum[$l] / ( scalar @{$self -> {'bootstrap_diagnostics'} ->
1392 [$model_number-1]} - 1);
1395 $self -> {'diagnostic_means_labels'} -> [$model_number-1] =
1396 [[],\@parameter_names];
1398 end calculate_diagnostic_means
1400 # }}} calculate_diagnostic_means
1402 # {{{ calculate_means
1404 start calculate_means
1406 my ( @sum, @diagsum );
1407 # Prepared model, skip the first (the original)
1408 for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'} ->
1409 [$model_number-1]}; $k++ ) {
1410 # Estimates
1411 for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'} ->
1412 [$model_number-1][$k]}; $l++ ) {
1413 $sum[$l] += $self -> {'bootstrap_estimates'} ->
1414 [$model_number-1][$k][$l];
1417 # divide by the number of bootstrap samples (-1 to get rid of the original
1418 # model) The [0] in the index is there to indicate the 'model' level. Mostly
1419 # used for printing
1420 my $samples = scalar @{$self -> {'bootstrap_estimates'} ->
1421 [$model_number-1]} - 1;
1422 for ( my $l = 0; $l <= $#sum; $l++ ) {
1423 my $mean = $sum[$l] / $samples;
1424 $self -> {'means'} -> [$model_number-1][0][$l] = $mean;
1425 my $bias = $mean - $self ->
1426 {'bootstrap_estimates'} -> [$model_number-1][0][$l];
1427 $self -> {'bias'} -> [$model_number-1][0][$l] = $bias;
1428 if ( $self->{'bootstrap_estimates'} -> [$model_number-1][0][$l] != 0 and
1429 $bias/$self->{'bootstrap_estimates'} -> [$model_number-1][0][$l]
1430 > $self -> {'large_bias_limit'} ) {
1431 $self -> {'large_bias'} -> [$model_number-1][0][$l] = 1;
1432 } else {
1433 $self -> {'large_bias'} -> [$model_number-1][0][$l] = 0;
1436 $self -> {'means_labels'} -> [$model_number-1] =
1437 [[],\@parameter_names];
1439 $self -> {'bias_labels'} -> [$model_number-1] =
1440 [[],\@parameter_names];
1442 end calculate_means
1444 # }}} calculate_means
1446 # {{{ calculate_jackknife_means
1448 start calculate_jackknife_means
1450 my @sum;
1451 # Prepared model, skip the first (the original)
1452 if( defined $self -> {'jackknife_estimates'} ){
1453 for ( my $k = 1; $k < scalar @{$self -> {'jackknife_estimates'}->[$model_number-1]}; $k++ ) {
1454 # Estimate
1455 for ( my $l = 0; $l <
1456 scalar @{$self -> {'jackknife_estimates'}->[$model_number-1][$k]}; $l++ ) {
1457 $sum[$l] += $self -> {'jackknife_estimates'}->[$model_number-1][$k][$l];
1460 # divide by the number of jackknife samples (-1 to get rid of the original model)
1461 # The [0] in the index is there to indicate the 'model' level. Mostly used for printing
1462 for ( my $l = 0; $l <
1463 scalar @{$self -> {'jackknife_estimates'}->[$model_number-1][0]}; $l++ ) {
1464 if( ( scalar @{$self -> {'jackknife_estimates'}->[$model_number-1]} - 1) != 0 ) {
1465 $self -> {'jackknife_means'} -> [$model_number-1][0][$l] =
1466 $sum[$l] / ( scalar @{$self -> {'jackknife_estimates'}->[$model_number-1]} - 1);
1469 $self -> {'jackknife_means_labels'} -> [$model_number-1] = [[],\@parameter_names];
1472 end calculate_jackknife_means
1474 # }}} calculate_jackknife_means
1476 # {{{ calculate_medians
1477 start calculate_medians
1479 my @medians;
1480 # Loop the parameters
1481 for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'}->
1482 [$model_number-1][0]}; $l++ ) {
1483 my @parameter_array;
1484 # From 1 to get rid of original model
1485 for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'}->
1486 [$model_number-1]}; $k++ ) {
1487 $parameter_array[$k-1] =
1488 $self -> {'bootstrap_estimates'}->[$model_number-1][$k][$l];
1490 my @sorted = sort {$a <=> $b} @parameter_array;
1491 # median postition is half the ( array length - 1 ).
1492 my $median_position = ( $#sorted ) / 2;
1493 my ($int_med,$frac_med) = split(/\./, $median_position );
1494 $frac_med = eval("0.".$frac_med);
1495 my $median_low = $sorted[ $int_med ];
1496 my $median_high = ( $sorted[ $int_med + 1 ] - $sorted[ $int_med ] ) * $frac_med;
1497 $medians[$l] = $median_low + $median_high;
1499 # The [0] in the index is there to indicate the 'model' level. Mostly used for printing
1500 $self -> {'medians'} -> [$model_number-1][0] = \@medians;
1501 $self -> {'medians_labels'} -> [$model_number-1] = [[],\@parameter_names];
1503 end calculate_medians
1504 # }}} calculate_medians
1506 # {{{ calculate_standard_error_confidence_intervals
1507 start calculate_standard_error_confidence_intervals
1509 # Sort the limits from the inside out
1510 my @limits = sort { $b <=> $a } keys %{$self -> {'confidence_limits'}};
1511 foreach my $limit ( @limits ) {
1512 my ( @lower_limits, @upper_limits, @within_ci );
1513 # Loop the estimates of the first (original) model
1514 for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'}->
1515 [$model_number-1][0]}; $l++ ) {
1516 my $lower_limit =
1517 $self -> {'bootstrap_estimates'}->[$model_number-1][0][$l] -
1518 $self -> {'standard_errors'}->[$model_number-1][0][$l] *
1519 $self -> {'confidence_limits'} ->{$limit};
1520 my $upper_limit =
1521 $self -> {'bootstrap_estimates'}->[$model_number-1][0][$l] +
1522 $self -> {'standard_errors'}->[$model_number-1][0][$l] *
1523 $self -> {'confidence_limits'} ->{$limit};
1524 push( @lower_limits, $lower_limit );
1525 push( @upper_limits, $upper_limit );
1526 if ( $self -> {'se_confidence_intervals_check'} < $upper_limit and
1527 $self -> {'se_confidence_intervals_check'} > $lower_limit ) {
1528 push( @within_ci , 1 );
1529 } else {
1530 push( @within_ci , 0 );
1533 unshift( @{$self -> {'standard_error_confidence_intervals'} ->
1534 [$model_number-1]}, \@lower_limits );
1535 push( @{$self -> {'standard_error_confidence_intervals'} ->
1536 [$model_number-1]}, \@upper_limits );
1537 $self -> {'within_se_confidence_intervals'} ->
1538 [$model_number-1]{$limit} = \@within_ci;
1539 unshift( @{$self -> {'standard_error_confidence_intervals_labels'} ->
1540 [$model_number-1][0]}, ($limit/2).'%' );
1541 push( @{$self -> {'standard_error_confidence_intervals_labels'} ->
1542 [$model_number-1][0]}, (100-($limit/2)).'%' );
1543 push( @{$self -> {'within_se_confidence_intervals_labels'} ->
1544 [$model_number-1][0]}, $limit.'%' );
1546 $self -> {'standard_error_confidence_intervals_labels'} -> [$model_number-1][1] =
1547 \@parameter_names;
1548 $self -> {'within_se_confidence_intervals_labels'} -> [$model_number-1][1] =
1549 \@parameter_names;
1551 end calculate_standard_error_confidence_intervals
1552 # }}} calculate_standard_error_confidence_intervals
1554 # {{{ calculate_standard_errors
1556 start calculate_standard_errors
1558 my @se;
1559 # Prepared model, skip the first (the original)
1560 for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'}->[$model_number-1]}; $k++ ) {
1561 # Estimate
1562 for ( my $l = 0; $l <
1563 scalar @{$self -> {'bootstrap_estimates'}->[$model_number-1][$k]}; $l++ ) {
1564 $se[$l] += ( $self -> {'bootstrap_estimates'}->[$model_number-1][$k][$l] -
1565 $self -> {'means'}->[$model_number-1][0][$l] )**2;
1568 # divide by the number of bootstrap samples -1 (-2 to get rid of the original model)
1569 # The [0] in the index is there to indicate the 'model' level.
1570 for ( my $l = 0; $l <
1571 scalar @{$self -> {'bootstrap_estimates'}->[$model_number-1][0]}; $l++ ) {
1572 my $div = ( scalar @{$self -> {'bootstrap_estimates'}->[$model_number-1]} - 2 );
1573 if( defined $div and not $div == 0 ) {
1574 $self -> {'standard_errors'} -> [$model_number-1][0][$l] =
1575 ($se[$l] / $div )**0.5;
1576 } else {
1577 $self -> {'standard_errors'} -> [$model_number-1][0][$l] = undef;
1580 $self -> {'standard_errors_labels'} -> [$model_number-1] = [[],\@parameter_names];
1582 end calculate_standard_errors
1584 # }}} calculate_standard_errors
1586 # {{{ calculate_bca_confidence_intervals
1588 start calculate_bca_confidence_intervals
1590 sub c_get_z0 {
1591 my $arr_ref = shift;
1592 my $orig_value = shift;
1593 my $num_less_than_orig = 0;
1594 my $nvalues = 0;
1595 my $z0;
1596 foreach my $value ( @{$arr_ref} ) {
1597 if ( defined $value and $value ne '' ) {
1598 $num_less_than_orig++ if ( $value < $orig_value );
1599 $nvalues ++;
1603 unless ( $nvalues == 0 ) {
1604 if ( ($num_less_than_orig / $nvalues ) == 0 ) {
1605 $z0 = -100;
1606 } elsif ( ($num_less_than_orig / $nvalues ) == 1 ) {
1607 $z0 = 100;
1608 } else {
1609 $z0 = udistr( 1 - ($num_less_than_orig / $nvalues ) );
1612 # return ( $z0, $nvalues );
1613 # print sprintf( "%4s:%5.0f,%4s:%15.8g ",'N:',$num_less_than_orig,'ZO', $z0);
1614 return $z0;
1617 sub c_get_acc {
1618 my $arr_ref = shift;
1619 my $jk_mean = shift;
1620 my $acc_upper = 0;
1621 my $acc_lower = 0;
1622 my $nvalues = 0;
1623 my $acc;
1624 foreach my $value ( @{$arr_ref} ){
1625 if ( defined $value and $value ne '' ) {
1626 $acc_upper = $acc_upper + ($jk_mean-$value)**3;
1627 $acc_lower = $acc_lower + ($jk_mean-$value)**2;
1628 $nvalues ++;
1631 $acc_lower = 6*($acc_lower**(3/2));
1632 unless ( $acc_lower == 0 ) {
1633 $acc = $acc_upper / $acc_lower;
1634 } else {
1635 $acc = $acc_upper / 0.001;
1637 # return ( $acc, $nvalues );
1638 # print sprintf( "%4s:%15.8g%4s%8.5g\n",'ACC', $acc,'JKm', $jk_mean);
1639 return $acc;
1642 sub c_get_alphas {
1643 my $old_alphas = shift;
1644 my $acc = shift;
1645 my $z0 = shift;
1646 my $denom;
1647 my @new_alphas = ();
1648 foreach my $position ( @{$old_alphas} ) {
1649 if ( $position == 0 ){
1650 $denom = -100;
1651 } elsif ( $position == 100 ) {
1652 $denom = 100;
1653 } else {
1654 $denom = $z0 + udistr( 1 - $position/100 );
1656 my $nom = 1 - $acc * $denom;
1657 my $lim = 100*uprob( - ( $z0 + $denom / $nom ) );
1658 push( @new_alphas, $lim );
1660 # print "@new_alphas\n";
1661 return \@new_alphas;
1664 my @limits = sort { $a <=> $b } keys %{$self -> {'confidence_limits'}};
1665 # Add the upper limits
1666 my $limnum = $#limits;
1667 for ( my $i = $limnum; $i >= 0; $i-- ) {
1668 $limits[$i] = $limits[$i]/2;
1669 push( @limits, 100-$limits[$i] );
1671 my ( @bootstrap_array, @jackknife_array, @new_alphas, @z0, @acc );
1672 # Loop the estimates of the first (original) model
1673 for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'}->
1674 [$model_number-1][0]}; $l++ ) {
1675 my ( @unsorted_array1, @unsorted_array2 );
1676 # Loop the bootstrap samples from 1 to get rid of original model
1677 for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'}->
1678 [$model_number-1]}; $k++ ) {
1679 $unsorted_array1[$k-1] =
1680 $self -> {'bootstrap_estimates'}->[$model_number-1][$k][$l];
1682 @{$bootstrap_array[$l]} = sort {$a <=> $b} @unsorted_array1;
1684 # Loop the jackknife samples from 1 to get rid of original model
1685 for ( my $k = 1; $k < scalar @{$self -> {'jackknife_estimates'}->
1686 [$model_number-1]}; $k++ ) {
1687 $unsorted_array2[$k-1] =
1688 $self -> {'jackknife_estimates'}->[$model_number-1][$k][$l];
1690 @{$jackknife_array[$l]} = sort {$a <=> $b} @unsorted_array2;
1691 $z0[$l] = c_get_z0 ( $bootstrap_array[$l],
1692 $self -> {'bootstrap_estimates'} ->
1693 [$model_number-1][0][$l] );
1694 $acc[$l] = c_get_acc ( $jackknife_array[$l],
1695 $self -> {'jackknife_means'} ->
1696 [$model_number-1][0][$l] );
1697 $new_alphas[$l] = c_get_alphas ( \@limits, $acc[$l], $z0[$l] );
1699 # Loop limits
1700 for ( my $lim_idx = 0; $lim_idx <= $#limits; $lim_idx++ ) {
1701 my @percentiles;
1702 # Loop parameters
1703 for ( my $l = 0; $l <= $#bootstrap_array; $l++ ) {
1704 my $limit = $new_alphas[$l][$lim_idx]/100;
1705 my $position = ( scalar @{$bootstrap_array[$l]} + 1 ) * $limit;
1706 my $percentile;
1707 if ( $position < 1 ) {
1708 $percentile = undef;
1709 } elsif ( $position > scalar @{$bootstrap_array[$l]} ) {
1710 $percentile = undef;
1711 } else {
1712 my ($int_med,$frac_med) = split(/\./, $position );
1713 $frac_med = eval("0.".$frac_med);
1714 my $percentile_low = $bootstrap_array[$l][ $int_med - 1];
1715 my $percentile_high = ( $bootstrap_array[$l][ $int_med ] -
1716 $bootstrap_array[$l][ $int_med - 1] ) * $frac_med;
1717 $percentile = $percentile_low + $percentile_high;
1719 push( @percentiles, $percentile );
1721 push( @{$self -> {'bca_confidence_intervals'} -> [$model_number-1]},
1722 \@percentiles );
1723 push( @{$self -> {'bca_confidence_intervals_labels'}->[$model_number-1][0]},
1724 $limits[$lim_idx].'%');
1726 # Check the intervals
1727 for ( my $lim_idx = 0; $lim_idx <= $limnum; $lim_idx++ ) {
1728 my @within_ci;
1729 for ( my $l = 0; $l <= $#bootstrap_array; $l++ ) {
1730 my $lower_limit = $self -> {'bca_confidence_intervals'} ->
1731 [$model_number-1][$lim_idx][$l];
1732 my $upper_limit = $self -> {'bca_confidence_intervals'} ->
1733 [$model_number-1][($limnum*2+1)-$lim_idx][$l];
1734 if ( $self -> {'bca_confidence_intervals_check'} < $upper_limit and
1735 $self -> {'bca_confidence_intervals_check'} > $lower_limit ) {
1736 push( @within_ci , 1 );
1737 } else {
1738 push( @within_ci , 0 );
1741 $self -> {'within_bca_confidence_intervals'} ->
1742 [$model_number-1]{$limits[$lim_idx]*2} = \@within_ci;
1744 $self -> {'bca_confidence_intervals_labels'} -> [$model_number-1][1] =
1745 \@parameter_names;
1747 end calculate_bca_confidence_intervals
1749 # }}} calculate_bca_confidence_intervals
1751 # {{{ calculate_percentile_confidence_intervals
1753 start calculate_percentile_confidence_intervals
1755 # Sort the limits from the inside out
1756 my @limits = sort { $b <=> $a } keys %{$self -> {'confidence_limits'}};
1757 foreach my $limit ( @limits ) {
1758 my ( @lower_limits, @upper_limits, @within_ci );
1759 # Loop the estimates of the first (original) model
1760 for ( my $l = 0; $l < scalar @{$self -> {'bootstrap_estimates'}->
1761 [$model_number-1][0]}; $l++ ) {
1762 my @parameter_array;
1763 # Loop the bootstrap samples from 1 to get rid of original model
1764 for ( my $k = 1; $k < scalar @{$self -> {'bootstrap_estimates'}->
1765 [$model_number-1]}; $k++ ) {
1766 my $val = $self -> {'bootstrap_estimates'}->[$model_number-1][$k][$l];
1767 # get rid of undefined values (these were probably deleted
1768 # when the bootstrap_estimates was created
1769 push( @parameter_array, $val ) if( defined $val );
1771 my @sorted = sort {$a <=> $b} @parameter_array;
1772 for my $side ( 'lower', 'upper' ) {
1773 my $use_limit = $side eq 'lower' ? $limit/200 : 1-($limit/200);
1774 # percentile postition is:
1775 my $percentile_position = ( $#sorted + 2 ) * $use_limit;
1776 my $percentile;
1777 if ( $percentile_position < 1 ) {
1778 $percentile = undef;
1779 } elsif ( $percentile_position > $#sorted +1) {
1780 $percentile = undef;
1781 } else {
1782 my ($int_med,$frac_med) = split(/\./, $percentile_position );
1783 $frac_med = eval("0.".$frac_med);
1784 my $percentile_low = $sorted[ $int_med - 1];
1785 my $percentile_high = ( $sorted[ $int_med ] - $sorted[ $int_med - 1] ) * $frac_med;
1786 $percentile = $percentile_low + $percentile_high;
1788 push( @lower_limits, $percentile ) if ( $side eq 'lower' );
1789 push( @upper_limits, $percentile ) if ( $side eq 'upper' );
1791 if ( $self -> {'percentile_confidence_intervals_check'} < $upper_limits[$#upper_limits] and
1792 $self -> {'percentile_confidence_intervals_check'} > $lower_limits[$#lower_limits] ) {
1793 push( @within_ci , 1 );
1794 } else {
1795 push( @within_ci , 0 );
1798 unshift( @{$self -> {'percentile_confidence_intervals'} ->
1799 [$model_number-1]}, \@lower_limits );
1800 push( @{$self -> {'percentile_confidence_intervals'} ->
1801 [$model_number-1]}, \@upper_limits );
1802 unshift( @{$self -> {'percentile_confidence_intervals_labels'}->
1803 [$model_number-1][0]}, ($limit/2).'%' );
1804 push( @{$self -> {'percentile_confidence_intervals_labels'}->
1805 [$model_number-1][0]},(100-($limit/2)).'%');
1806 $self -> {'within_percentiles'}->[$model_number-1]{$limit}=\@within_ci;
1808 $self -> {'percentile_confidence_intervals_labels'} ->
1809 [$model_number-1][1] = \@parameter_names;
1811 end calculate_percentile_confidence_intervals
1813 # }}} calculate_percentile_confidence_intervals
1815 # {{{ modelfit_post_fork_analyze
1817 start modelfit_post_fork_analyze
1818 end modelfit_post_fork_analyze
1820 # }}}
1822 # {{{ resample
1824 start resample
1826 my $dataObj = $model -> datas -> [0];
1827 for( my $i = 1; $i <= $self -> {'samples'}; $i++ ) {
1828 my ($bs_dir, $bs_name) = OSspecific::absolute_path( $self -> {'directory'}, "bs$i.dta" );
1829 my $new_name = $bs_dir . $bs_name;
1830 my $boot_sample = $dataObj -> resample( 'subjects' => $self -> {'subjects'},
1831 'new_name' => $new_name,
1832 'target' => $target );
1833 my $newmodel = $model -> copy( filename => "bs$i.mod",
1834 target => $target,
1835 ignore_missing_files => 1 );
1836 $newmodel -> datafiles( new_names => ["bs$i.dta"] );
1837 $newmodel -> datas -> [0] = $boot_sample ;
1838 $newmodel -> write;
1839 push( @resample_models, $newmodel );
1842 end resample
1844 # }}} resample
1846 # {{{ _sampleTools
1848 start _sampleTools
1850 foreach my $tool ( @{$self -> {'tools'}} ) {
1851 my @models = @{$tool -> models};
1852 foreach my $model (@models){
1853 my $dataObj = $model -> datas -> [0];
1854 for( my $i = 1; $i <= $samples; $i++ ) {
1855 my $boot_sample = $dataObj -> resample( 'subjects' => $self -> {'subjects'},
1856 'new_name' => "bs$i.dta",
1857 'target' => $target );
1858 my $newmodel;
1859 $newmodel = $model -> copy( filename => "bs$i.mod" );
1860 $newmodel -> datafiles( new_names => ["bs$i.dta"] );
1861 $newmodel -> datas -> [0] = $boot_sample ;
1862 $newmodel -> write;
1863 if( defined( $tool -> models ) ){
1864 push( @{$tool -> models}, $newmodel );
1865 } else {
1866 $tool -> models( [ $newmodel ] );
1872 end _sampleTools
1874 # }}} _sampleTools
1876 # {{{ print_results
1878 start print_results
1880 # Run the print_results specific for the subtool
1881 my $sub_print_results = $self -> {'subtools'} -> [0];
1882 if ( defined $sub_print_results ) {
1883 sub get_dim {
1884 my $arr = shift;
1885 my $dim = shift;
1886 my $size_ref = shift;
1887 $dim++;
1888 if ( defined $arr and ref($arr) eq 'ARRAY' ) {
1889 push( @{$size_ref}, scalar @{$arr} );
1890 ( $dim, $size_ref ) = get_dim( $arr->[0], $dim, $size_ref );
1892 return ( $dim, $size_ref );
1894 sub format_value {
1895 my $val = shift;
1896 if ( not defined $val or $val eq '' ) {
1897 return sprintf("%10s",$PsN::output_style).',';
1898 } else {
1899 $_ = $val;
1900 my $nodot = /.*\..*/ ? 0 : 1;
1901 $_ =~ s/\.//g;
1902 if ( /.*\D+.*/ or $nodot) {
1903 return sprintf("%10s",$val).',';
1904 } else {
1905 return sprintf("%10.5f",$val).',';
1909 debug -> die( message => "No results_file defined" )
1910 unless ( defined $self -> {'results_file'} );
1911 open ( RES, ">".$self -> {'directory'}.'/'.$self -> {'results_file'} );
1912 if ( defined $self -> {'results'} ) {
1913 my @all_results = @{$self -> {'results'}};
1914 for ( my $i = 0; $i <= $#all_results; $i++ ) {
1915 if ( defined $all_results[$i]{'own'} ) {
1916 my @my_results = @{$all_results[$i]{'own'}};
1917 for ( my $j = 0; $j <= $#my_results; $j++ ) {
1918 # These size estimates include the problem and sub_problem dimensions:
1919 my ( $ldim, $lsize_ref ) = get_dim( $my_results[$j]{'labels'}, -1, [] );
1920 my ( $vdim, $vsize_ref ) = get_dim( $my_results[$j]{'values'}, -1, [] );
1921 print RES $my_results[$j]{'name'},"\n" if ( $vdim > 1 );
1922 if ( defined $my_results[$j]{'values'} and
1923 scalar @{$my_results[$j]{'values'}} >= 0 ) {
1924 my @values = @{$my_results[$j]{'values'}};
1925 my @labels;
1926 if ( defined $my_results[$j]{'labels'} and
1927 scalar @{$my_results[$j]{'labels'}} >= 0 ) {
1928 @labels = @{$my_results[$j]{'labels'}};
1931 # Print Header Labels
1932 if ( $ldim == 0 ) {
1933 my $label = \@labels;
1934 print RES ','.format_value($label),"\n";
1935 } elsif ( $ldim == 2 and defined $labels[1] ) {
1936 print RES ',';
1937 for ( my $n = 0; $n < scalar @{$labels[1]}; $n++ ) {
1938 my $label = $labels[1][$n];
1939 print RES format_value($label);
1941 print RES "\n" if ( scalar @{$labels[1]} );
1943 # Print the values:
1944 if ( $vdim == 0 ) {
1945 print RES ','.format_value(\@values),"\n";
1946 } elsif ( $vdim == 1 ) {
1947 for ( my $m = 0; $m < scalar @{\@values}; $m++ ) {
1948 my $label = $labels[$m];
1949 print RES ','.format_value($label);
1950 my $val = $values[$m];
1951 print RES ','.format_value($val),"\n";
1953 } elsif ( $vdim == 2 ) {
1954 for ( my $m = 0; $m < scalar @{\@values}; $m++ ) {
1955 my $label;
1956 if ( $ldim == 1 ) {
1957 $label = $labels[$m];
1958 } elsif ( $ldim == 2 ) {
1959 $label = $labels[0][$m];
1961 print RES format_value($label);
1962 if ( defined $values[$m] ) {
1963 for ( my $n = 0; $n < scalar @{$values[$m]}; $n++ ) {
1964 print RES format_value($values[$m][$n]);
1967 print RES "\n";
1975 close( RES );
1976 } else {
1977 debug -> warn( level => 2,
1978 message => "No subtools defined".
1979 ", using default printing routine" );
1982 end print_results
1984 # }}}
1986 # {{{ create_matlab_scripts
1988 start create_matlab_scripts
1990 if( defined $PsN::lib_dir ){
1991 unless( -e $PsN::lib_dir . '/histograms.m' and
1992 -e $PsN::lib_dir . '/bca.m' ){
1993 'debug' -> die( message => 'Bootstrap matlab template scripts are not installed, no matlab scripts will be generated.' );
1994 return;
1997 open( PROF, $PsN::lib_dir . '/histograms.m' );
1998 my @file = <PROF>;
1999 close( PROF );
2000 my $found_code;
2001 my $code_area_start=0;
2002 my $code_area_end=0;
2005 for(my $i = 0;$i < scalar(@file); $i++) {
2006 if( $file[$i] =~ /% ---------- Autogenerated code below ----------/ ){
2007 $found_code = 1;
2008 $code_area_start = $i;
2010 if( $file[$i] =~ /% ---------- End autogenerated code ----------/ ){
2011 unless( $found_code ){
2012 'debug' -> die ( message => 'Bootstrap matlab template script is malformated, no matlab scripts will be generated' );
2013 return;
2015 $code_area_end = $i;
2019 my @auto_code;
2020 if( $self -> {'type'} eq 'bca' ){
2021 push( @auto_code, "use_bca = 1; % Was a BCa-type of\n" );
2022 } else {
2023 push( @auto_code, "use_bca = 0; % Was a BCa-type of\n" );
2026 push( @auto_code, " % bootstrap run?\n" );
2027 if( ref $self -> {'samples'} eq 'ARRAY' ) {
2028 push( @auto_code, "bs_samples = ".$self -> {'samples'}->[0][0]."; % Number of bootstrap samples\n" );
2029 } else {
2030 push( @auto_code, "bs_samples = ".$self -> {'samples'}."; % Number of bootstrap samples\n" );
2032 if( $self -> {'type'} eq 'bca' ){
2033 my $ninds = $self -> models -> [0]
2034 -> datas -> [0] -> count_ind;
2035 push( @auto_code, "jk_samples = $ninds; % Number of (BCa) jackknife samples\n\n" );
2038 push( @auto_code, "col_names = { 'Significant Digits',\n" );
2039 push( @auto_code, " 'Condition Number',\n" );
2040 push( @auto_code, " 'OFV',\n" );
2042 my $nps = $self -> {'models'} -> [0] -> nomegas -> [0];
2044 my %param_names;
2045 my( @par_names, @se_names, @np_names, @sh_names );
2046 foreach my $param ( 'theta','omega','sigma' ) {
2047 my $labels = $self -> {'models'} -> [0] -> labels( parameter_type => $param );
2048 if ( defined $labels ){
2049 foreach my $label ( @{$labels -> [0]} ){
2050 push( @par_names, " '",$label,"',\n" );
2051 push( @se_names, " '",'se-'.$label,"',\n" );
2056 for( my $i = 1; $i <= ($nps*($nps+1)/2); $i++ ) {
2057 push( @np_names, " '",'np-om'.$i,"',\n" );
2060 for( my $i = 1; $i <= $nps; $i++ ) {
2061 push( @sh_names, " '",'shrinkage-eta'.$i,"',\n" );
2064 push( @sh_names, " '",'shrinkage-iwres',"'\n" );
2066 # NP not used for now
2068 push( @auto_code,(@par_names, @se_names, @sh_names));
2069 # push( @auto_code,(@par_names, @se_names, @np_names, @sh_names));
2070 push( @auto_code, " };\n\n" );
2072 my @np_columns = (0) x ($nps*($nps+1)/2);
2073 my @sh_columns = (0) x ($nps+1);
2075 if( $self -> {'type'} eq 'bca' ){
2076 push( @auto_code, "fixed_columns = [ 0, 0, 0, " );
2077 } else {
2078 push( @auto_code, "fixed_columns = [ 0, 0, 0, " );
2080 my ( @fixed_columns, @same_columns, @adjust_axes );
2081 foreach my $param ( 'theta','omega','sigma' ) {
2082 my $fixed = $self -> {'models'} -> [0] -> fixed( parameter_type => $param );
2084 if ( defined $fixed ){
2085 push( @fixed_columns, @{$fixed -> [0]} );
2086 if( $param eq 'theta' ) {
2087 push( @same_columns, (0) x scalar( @{$fixed -> [0]} ) );
2092 @adjust_axes = (1) x ( ($#fixed_columns + 1) * 2 +
2093 $#sh_columns + 1 );
2094 # $#np_columns + $#sh_columns + 2 );
2096 push( @auto_code , join( ', ' , @fixed_columns).', '.
2097 join( ', ' , @fixed_columns).', '.
2098 # join( ', ' , @np_columns).', '.
2099 join( ', ' , @sh_columns)."];\n\n" );
2101 if( $self -> {'type'} eq 'bca' ){
2102 push( @auto_code, "same_columns = [ 0, 0, 0, " );
2103 } else {
2104 push( @auto_code, "same_columns = [ 0, 0, 0, " );
2106 foreach my $param ( 'omegas','sigmas' ) {
2107 my $parameters = $self -> {'models'} -> [0] -> problems -> [0] -> $param;
2108 foreach my $parameter ( @{$parameters} ){
2109 if( $parameter -> same() ){
2110 push( @same_columns, (1) x $parameter -> size() );
2111 } else {
2112 push( @same_columns, (0) x scalar @{$parameter -> options} );
2116 push( @auto_code , join( ', ' , @same_columns ).', '.
2117 join( ', ' , @same_columns).', '.
2118 # join( ', ' , @np_columns).', '.
2119 join( ', ' , @sh_columns)."];\n\n" );
2121 push( @auto_code , "adjust_axes = [ 1, 1, 1, ".join( ', ' , @adjust_axes)."];\n\n" );
2123 # push( @auto_code , 'npomegas = '.($nps*($nps+1)/2).";\n\n" );
2124 push( @auto_code , "npomegas = 0;\n\n" );
2127 push( @auto_code, "minimization_successful_col = 5; % Column number for the\n" );
2128 push( @auto_code, " % minimization sucessful flag\n" );
2129 push( @auto_code, "covariance_step_successful_col = 6; % As above for cov-step warnings\n" );
2130 push( @auto_code, "covariance_step_warnings_col = 7; % etc\n" );
2131 push( @auto_code, "estimate_near_boundary_col = 8; % etc\n" );
2133 push( @auto_code, "not_data_cols = 13; % Number of columns in the\n" );
2134 push( @auto_code, " % beginning that are not\n" );
2135 push( @auto_code, " % parameter estimates.\n" );
2137 push( @auto_code, "filename = 'raw_results_matlab.csv';\n" );
2139 splice( @file, $code_area_start, ($code_area_end - $code_area_start), @auto_code );
2140 open( OUTFILE, ">", $self -> {'directory'} . "/histograms.m" );
2141 print OUTFILE "addpath " . $PsN::lib_dir . ";\n";
2142 print OUTFILE @file ;
2143 close OUTFILE;
2145 open( OUTFILE, ">", $self -> {'directory'} . "/raw_results_matlab.csv" );
2146 for( my $i = 0; $i < scalar ( @{$self -> {'raw_results'} -> [0]} ); $i ++ ){
2147 # $self -> {'raw_results'} -> [0] -> [$i][0] =
2148 # $self -> {'raw_results'} -> [0] -> [$i][0] eq 'bootstrap' ?
2149 # 1 : $self -> {'raw_results'} -> [0] -> [$i][0];
2150 # $self -> {'raw_results'} -> [0] -> [$i][0] =
2151 # $self -> {'raw_results'} -> [0] -> [$i][0] eq 'jackknife' ?
2152 # 2 : $self -> {'raw_results'} -> [0] -> [$i][0];
2153 map( $_ = $_ eq 'NA' ? 'NaN' : $_, @{$self -> {'raw_results'} -> [0] -> [$i]} );
2154 map( $_ = not( defined $_ ) ? 'NaN' : $_, @{$self -> {'raw_results'} -> [0] -> [$i]} );
2155 print OUTFILE join( ',', @{$self -> {'raw_results'} -> [0] -> [$i]} ), "\n";
2157 close OUTFILE;
2159 } else {
2160 'debug' -> die( message => 'matlab_dir not configured, no matlab scripts will be generated.');
2161 return;
2164 end create_matlab_scripts
2166 # }}}
2168 # {{{ create_R_scripts
2169 start create_R_scripts
2171 unless( -e $PsN::lib_dir . '/R-scripts/bootstrap.R' ){
2172 'debug' -> die( message => 'Bootstrap R-script are not installed, no R-script will be generated.' );
2173 return;
2175 cp ( $PsN::lib_dir . '/R-scripts/bootstrap.R', $self -> {'directory'} );
2177 end create_R_scripts
2178 # }}}