functionalized version 2, implemented version 1 using functions in version 2
[PsN.git] / bin / nonp_bootstrap_v2
blob2e30b0391840a0c7cfa42d9c049a46fea1882569
1 #!/usr/local/bin/perl
3 # Only for Development
4 #use FindBin qw($Bin);
5 #use lib "$Bin/../PsN_2_2_0_patches_serial/lib";
7 # Don't edit the line below, it must look exactly like this.
8 # Everything above this line will be replaced #
10 # Perl includes #
11 use strict;
12 use Getopt::Long;
13 # External modules #
14 use Math::Random;
15 # PsN includes #
16 use PsN;
17 use tool::modelfit;
18 use tool::bootstrap;
19 use model;
20 use debug;
21 use common_options;
22 use ui;
23 use File::Copy 'cp';
24 use table_file;
26 ################################################################################
28 # This is the nonp_bootstrap version 2. First the command line options
29 # are parsed using Getopt::Long. PsN "common options" are used to fine
30 # tune the running.
32 # Prerequisites for the nonp_bootstrap is a that the first $TABLE
33 # record prints the wished value and its probability per individual.
35 ################################################################################
37 # {{{ options handling
39 my $cmd_line = $0 . " " . join( " ", @ARGV );
41 my %options;
43 my %optional_options = ('theta_names:s' => '',
44 'samples:i' => '',
45 'result_file' => '',
46 "bca"=>'',
47 "mplots"=>'',
48 "rplots"=>'',
49 "sample_size:i"=>'',
50 "stratify_on:s"=>'',
51 "skip_minimization_terminated"=>'',
52 "skip_covariance_step_terminated"=>'',
53 "skip_with_covstep_warnings"=>'',
54 "skip_estimate_near_boundary"=>'',
55 "minimization_successful_limit:f"=>'',
56 "covariance_step_successful_limit:f"=>'',
57 "covariance_step_warnings_limit:f"=>'',
58 "estimate_near_boundary_limit:f"=>'',
59 "estimate_near_boundary_limit:f"=>'',
60 "se_confidence_intervals_level:f"=>'',
61 "percentile_confidence_intervals_level:f"=>'',
62 "bca_confidence_intervals_level:f"=>'',
63 "se_confidence_intervals_check:f"=>'',
64 "percentile_confidence_intervals_check:f"=>'',
65 "bca_confidence_intervals_check:f"=>'',
66 "large_bias_limit:f"=>''
69 my $res = GetOptions( \%options,
70 @common_options::get_opt_strings,
71 keys(%optional_options) );
73 exit unless $res;
75 common_options::set_globals( \%options, 'nonp_bootstrap' );
76 common_options::get_defaults( \%options, 'nonp_bootstrap' );
77 common_options::sanity_checks( \%options, 'nonp_bootstrap' );
78 common_options::online_help('nonp_bootstrap', \%options, undef,{},{});
80 unless( $options{'samples'} ){
81 $options{'samples'} = 200;
84 unless( $options{'result_file'} ){
85 $options{'result_file'} = 'result.csv';
88 if ( scalar( @ARGV ) < 1 ) {
89 print "A model file must be specified. Use 'nonp_bootstrap -h' for help.\n";
90 exit;
93 my $eval_string = common_options::model_parameters(\%options);
95 # }}}
97 ################################################################################
99 # The original model file is parsed and bootstrapped.
101 ################################################################################
103 # {{{ orig+bootstrap
105 my $model;
107 $model = model -> new ( eval( $eval_string ),
108 extra_output => ['fort.80'],
109 filename => $ARGV[0],
110 ignore_missing_output_files => 1 );
112 my( $base_dir, $dummy) = OSspecific::absolute_path( '', '' );
114 my $directory ;
116 if ( defined $options{'directory'} ) {
117 my $dummy;
118 ( $directory, $dummy ) = OSspecific::absolute_path( $options{'directory'}, '');
119 } else {
120 $directory = OSspecific::unique_path( 'nonp_bootstrap_dir' ,
121 $base_dir );
124 unless( -d $directory ){
125 mkdir( $directory );
127 open(CMD, ">", $directory . "/command.txt");
128 print CMD $cmd_line, "\n";
129 close(CMD);
132 chdir( $directory );
134 my $type = defined $options{'bca'} ? 'bca' : undef;
136 my $bs = tool::bootstrap ->
137 new ( eval( $common_options::parameters ),
138 directory => 'bootstrap',
139 models => [ $model ],
140 samples => $options{'samples'},
141 subjects => $options{'sample_size'},
142 type => $type,
143 stratify_on => $options{'stratify_on'},
144 skip_minimization_terminated => $options{'skip_minimization_terminated'},
145 skip_covariance_step_terminated => $options{'skip_covariance_step_terminated'},
146 skip_with_covstep_warnings => $options{'skip_with_covstep_warnings'},
147 skip_estimate_near_boundary => $options{'skip_estimate_near_boundary'},
148 minimization_successful_limit => $options{'minimization_successful_limit'},
149 covariance_step_successful_limit => $options{'covariance_step_successful_limit'},
150 covariance_step_warnings_limit => $options{'covariance_step_warnings_limit'},
151 estimate_near_boundary_limit => $options{'estimate_near_boundary_limit'},
152 estimate_near_boundary_limit => $options{'estimate_near_boundary_limit'},
153 se_confidence_intervals_level => $options{'se_confidence_intervals_level'},
154 percentile_confidence_intervals_level => $options{'percentile_confidence_intervals_level'},
155 bca_confidence_intervals_level => $options{'bca_confidence_intervals_level'},
156 se_confidence_intervals_check => $options{'se_confidence_intervals_check'},
157 percentile_confidence_intervals_check => $options{'percentile_confidence_intervals_check'},
158 bca_confidence_intervals_check => $options{'bca_confidence_intervals_check'},
159 large_bias_limit => $options{'large_bias_limit'} );
161 $bs -> run;
162 $bs -> prepare_results();
163 $bs -> print_results();
165 # }}}
167 ################################################################################
169 # Collect the bootstrap results and create $samples number of new
170 # models. Each new model should have MAXE=0, $NONP and initial
171 # estimates fixed also it should use the original dataset. This is to
172 # get table files with P in them.
174 ################################################################################
176 # {{{ read bootstrap + create new models
178 my @bs_models = @{$bs -> {'prepared_models'}[0]{'own'}};
179 my @new_bs_models;
181 my $id = 1;
183 my $nr_of_old_thetas = $model -> nthetas;
185 foreach my $bs_model ( @bs_models ) {
186 my $copy = $bs_model -> copy( filename => 'bs_model_'.$id++.'.mod' );
188 $copy -> update_inits( from_output => $bs_model -> outputs -> [0] );
190 $copy -> datafiles( new_names => $model -> datafiles(absolute_path => 1) );
192 $copy -> maxeval( new_values => [[0]] );
194 my $nomegas = $copy -> nomegas -> [0];
196 $copy -> add_records( type => 'nonparametric',
197 record_strings => ['MARGINALS UNCONDITIONAL'] );
199 $copy -> set_option( record_name => 'abbreviated',
200 option_name => 'COMRES',
201 option_value => ($nomegas+1) );
203 my $table_string;
205 my $code_block;
206 my $use_pk = 0;
208 if( defined $copy -> pk ){
209 $code_block = $copy -> pk;
210 $use_pk = 1;
211 } elsif( defined $copy -> pred ){
212 $code_block = $copy -> pred;
213 } else {
214 die "Error: No \$PK or \$PRED found... \n";
217 push( @{$code_block},' JD = DEN_');
219 for( 1..$nomegas ){
220 $table_string = 'ETA' . ($nomegas - $_ + 1) ." ". $table_string . " DN$_";
221 push(@{$code_block}," DN$_=CDEN_($_)" );
224 if( $use_pk ){
225 $copy -> pk( new_pk => $code_block );
226 } else {
227 $copy -> pred( new_pred => $code_block );
230 if( defined $options{'theta_names'} ){
231 my @names = split( /,/ , $options{'theta_names'} );
232 my $nr_names = scalar( @names );
233 unless( $nr_names == $nr_of_old_thetas ){
234 die "There are $nr_of_old_thetas thetas in the model, but you only specified $nr_names in the 'theta_names' options\n" ;
236 $table_string .= ' ' . join( ' ', @names );
237 } else {
238 my $theta_labels = $model -> labels( 'parameter_type' => 'theta' );
240 foreach ( @{$theta_labels -> [0]} ){
241 if( $id == 2 and /TH\d+/ ){
242 print "\nWarning: There is a generic theta label($_) in the model.(you might want to use the 'theta_names' option).\n\n" ;
246 $table_string .= ' ' . join( ' ', @{$theta_labels -> [0]} );
249 $copy -> add_records( type => 'table',
250 record_strings => ["ID JD $table_string NOPRINT ONEHEADER FIRSTONLY FILE=patab"] );
252 $copy -> fixed( parameter_type => 'theta',
253 new_values => [[(1) x $copy -> nthetas ]] );
254 $copy -> fixed( parameter_type => 'omega',
255 new_values => [[(1) x $nomegas ]] );
256 $copy -> fixed( parameter_type => 'sigma',
257 new_values => [[(1) x $copy -> nsigmas -> [0] ]] );
259 $copy -> remove_records( type => 'covariance' );
261 $copy -> extra_output( ['patab'] );
263 $copy -> _write;
265 push( @new_bs_models, $copy );
268 my $modelfit;
270 $modelfit = tool::modelfit ->
271 new ( eval( $common_options::parameters ),
272 prepend_model_file_name => 1,
273 directory => 'tables',
274 models => \@new_bs_models );
276 $modelfit -> run;
278 # }}}
280 ################################################################################
282 # Do one run per individual initial clearence and get fort.80 from
283 # each run.
285 ################################################################################
287 # {{{ Fort.80 creation
289 open( KEYS, "<", $bs -> directory() . "/included_keys1.csv" );
291 my @parameter_P_sums;
292 my @result_header;
293 my $batch_nr = 0;
295 # {{{ create iofvconf
297 unless( -e 'iofvcont.f' ){
298 open(IOFVCONT , '>', 'iofvcont.f');
300 print IOFVCONT <<'EOF';
301 subroutine contr (icall,cnt,ier1,ier2)
302 parameter (no=1000)
303 common /rocm1/ y(no),data(no,3),nobs
304 integer nobs, un
305 double precision cnt,y
306 DATA un /80/
307 if (icall.le.1) return
308 call ncontr (cnt,ier1,ier2,l2r)
309 C individual obj. funct. value for indiv. jj = cnt
310 write(un,10) data(1,1),cnt
311 10 FORMAT(1F6.2,1F19.4)
312 return
315 close( IOFVCONT );
318 # }}}
320 # TODO update all initial values
322 foreach my $ofv_model ( @new_bs_models ) {
323 $batch_nr++;
324 my @new_ofv_models;
326 unless ( -e "ofv_b$batch_nr" ){
327 mkdir( "ofv_b$batch_nr" );
329 chdir( "ofv_b$batch_nr" );
330 cp( '../iofvcont.f', './iofvcont.f' ) or print "Warning: unable to copy iofvcont.f\n";
332 my $table = table_file -> new( filename => "bs_model_$batch_nr.patab",
333 directory => '../' );
335 my $nomegas = $ofv_model -> nomegas -> [0];
337 my @individual_etas;
338 my @individual_etas_probabilities;
340 my $joint_densities = $table -> column_to_array( column => 1 ); # 1 is JD
342 for( 0..($nomegas-1) ){
344 my $individual_eta = $table -> column_to_array( column => $_ + 1 + 1 ); # first plus 1 is for ID column, second is for JD
346 push( @individual_etas , $individual_eta );
347 push( @individual_etas_probabilities , $joint_densities );
352 my @individual_thetas;
354 for( 0..($nr_of_old_thetas-1) ){
355 my $individual_theta = $table -> column_to_array( column => ($nomegas) + ($nomegas) + 1 + 1 + $_ );
357 push( @individual_thetas, $individual_theta );
361 my $number_of_individuals = @{$individual_etas[0]};
363 unless( -e 'model.done' ){
364 foreach my $id( 0..($number_of_individuals-1) ){
366 my $copy = $ofv_model -> copy( filename => 'ofv_model_'.($id+1).'.mod' );
368 $copy -> update_inits( from_output => $ofv_model -> outputs -> [0] );
371 my @theta_inits;
373 for( 0..($nr_of_old_thetas-1) ) {
374 push( @theta_inits, $individual_thetas[$_]->[$id] );
377 $copy -> lower_bounds( parameter_type => 'theta',
378 parameter_numbers => [[1..$nr_of_old_thetas]],
379 new_values => [[(-10000) x $nr_of_old_thetas]]) ;
381 $copy -> upper_bounds( parameter_type => 'theta',
382 parameter_numbers => [[1..$nr_of_old_thetas]],
383 new_values => [[(10000) x $nr_of_old_thetas]]) ;
385 $copy -> initial_values( parameter_type => 'theta',
386 parameter_numbers => [[1..$nr_of_old_thetas]],
387 new_values => [\@theta_inits]) ;
389 $copy -> add_records( type => 'contr',
390 record_strings => ['DATA=(ID)'] );
392 $copy -> add_option( record_name => 'subroutine',
393 option_name => 'CONTR',
394 option_value => 'iofvcont.f' );
396 $copy -> lower_bounds( parameter_type => 'omega',
397 parameter_numbers => [[1..$copy -> nomegas -> [0]]],
398 new_values => [[(-1) x $copy -> nomegas -> [0]]] );
400 $copy -> initial_values( parameter_type => 'omega',
401 parameter_numbers => [[1..$copy -> nomegas -> [0]]],
402 new_values => [[(0) x $copy -> nomegas -> [0]]] );
404 $copy -> extra_output( ['fort.80'] );
406 $copy -> remove_records( type => 'table' );
407 $copy -> remove_records( type => 'abbreviated' );
408 $copy -> remove_records( type => 'nonparametric' );
410 $copy -> _write;
412 push( @new_ofv_models, $copy );
415 my $new_modelfit = tool::modelfit ->
416 new ( eval( $common_options::parameters ),
417 prepend_model_file_name => 1,
418 directory => 'modelfit',
419 models => \@new_ofv_models );
421 open(CMD, ">", $new_modelfit -> directory . "/command.txt");
422 print CMD $cmd_line, "\n";
423 close(CMD);
425 $new_modelfit -> run;
427 open( TMP, ">", 'models.done' );
428 print TMP "1";
429 close( TMP );
431 # Post process fort.80
433 my $id = 1;
434 my @all_ind_ofv;
436 foreach( 1..$number_of_individuals ){
437 open (IND, '<', "ofv_model_$id.fort.80") || die("Couldn't open ofv_model_$id.mod.fort.80 for reading: $! \n");
438 $id++;
439 my @ind_ofv;
440 while(<IND>) {
441 chomp;
442 my ($junk,$ind_ofv) = split(' ',$_);
443 push @ind_ofv, $ind_ofv;
445 close IND;
446 @ind_ofv = splice(@ind_ofv,-$number_of_individuals);
447 push( @all_ind_ofv, \@ind_ofv );
450 dump_csv( "ind_ofv.csv", \@all_ind_ofv );
452 dump_csv( "ind_etas.csv", \@individual_etas_probabilities );
454 foreach my $parameter( 0..($nomegas - 1) ){
456 my @result;
457 my @columns;
458 for( my $ind_ofv=0; $ind_ofv < $number_of_individuals ; $ind_ofv ++) {
460 my $p = $individual_etas_probabilities[$parameter] -> [$ind_ofv];
462 for( my $individual=0;$individual < $number_of_individuals; $individual++) {
463 my $ofv = $all_ind_ofv[$ind_ofv] -> [$individual];
464 my $LP = exp((-$ofv)/2)*$p;
465 $result[$ind_ofv] -> [$individual] = $LP;
466 push(@{$columns[$individual]},$LP);
471 my @column_sums;
473 foreach my $column( @columns ){
474 my $bad_sum=0;
475 foreach my $val ( @{$column} ){
476 $bad_sum+=$val;
478 my $sum=0;
479 foreach my $val( sort( {$a <=> $b} @{$column} ) ) {
480 $sum+=$val;
483 push(@column_sums, $sum);
487 dump_csv( "LP_sum_$parameter.csv",[\@column_sums]);
489 dump_csv( "L_times_P_ETA$parameter.csv", \@result );
491 for( my $ind_ofv=0; $ind_ofv < $number_of_individuals ; $ind_ofv ++) {
493 for( my $individual=0;$individual <= $#{$result[$ind_ofv]}; $individual++ ) {
494 if( $column_sums[$individual] == 0 ){
495 print "Warning: Sum of probabilties is zero\n";
496 $result[$ind_ofv] -> [$individual] = 'NaN';
497 } else {
498 $result[$ind_ofv] -> [$individual] = (($result[$ind_ofv] -> [$individual] / $column_sums[$individual])
499 / $number_of_individuals);
504 dump_csv( "P_values_ETA$parameter.csv", \@result );
506 my $included_keys_string = <KEYS>;
508 my @included_keys = split( /,/ , $included_keys_string );
510 my @bootstrapped_set;
511 for( my $ind_ofv=0; $ind_ofv < $number_of_individuals ; $ind_ofv ++) {
512 for( my $id = 0; $id < $number_of_individuals ; $id ++ ) {
513 $bootstrapped_set[$ind_ofv]->[$id] = $result[$ind_ofv]->[$included_keys[$id]];
517 dump_csv( "bootstrapped_P_values_ETA$parameter.csv", \@bootstrapped_set );
519 my @parameter_P_sum;
521 for( my $ind_ofv=0; $ind_ofv < $number_of_individuals ; $ind_ofv ++) {
522 for( my $id = 0 ; $id < $number_of_individuals ; $id++ ){
523 $parameter_P_sum[$ind_ofv] += $bootstrapped_set[$ind_ofv]->[$id];
528 # Sort bootstrapped_set together with parameter_P_sums
530 my %index;
532 for( 0 .. $number_of_individuals-1 ){
533 $index{$_} = $individual_etas[$parameter] -> [$_];
536 my @order = sort( {$index{$a} <=> $index{$b}} keys %index );
538 my $previous_probability = 0;
540 my (@new_p_sum, @new_etas);
542 my $p_cumulative = 0;
544 foreach my $idx ( @order ){
545 $p_cumulative += $parameter_P_sum[$idx];
547 push( @new_p_sum , $p_cumulative );
548 push( @new_etas , $individual_etas[$parameter] -> [$idx] );
551 @parameter_P_sum = @new_p_sum;
552 @individual_etas[$parameter] = \@new_etas;
554 # Add header and push on big results table
556 unshift( @new_etas, "BS$batch_nr ETA$parameter" );
557 unshift( @new_p_sum, "BS$batch_nr P_sum" );
558 push( @parameter_P_sums, \@new_etas );
559 push( @parameter_P_sums, \@new_p_sum );
562 chdir( '..' );
565 dump_T_csv( $options{'result_file'}, \@parameter_P_sums );
567 close( KEYS );
569 # }}}
571 # {{{ helper routines
573 sub dump_csv {
574 my $file_name = shift;
575 my $arr = shift;
577 open( FILE, ">", $file_name );
579 foreach my $sub_arr( @{$arr} ){
580 print( FILE join( ',', @{$sub_arr} ), "\n" );
583 close( FILE );
587 sub dump_T_csv {
588 my $file_name = shift;
589 my $arr = shift;
591 open( FILE, ">", $file_name );
593 for( my $x = 0;$x < @{$arr->[0]}; $x++ ){
594 for( my $y = 0;$y < @{$arr}; $y++ ){
595 if( $y > 0 and $y < @{$arr} ){
596 print( FILE ',' );
598 print( FILE $arr->[$y][$x] );
600 print( FILE "\n" );
603 close( FILE );
606 sub dump_arrays {
607 my ($filename,$header,$ids,$first,$second) = @_;
609 open FILE , ">", $filename;
610 print FILE $header;
611 for( 0..$#{$ids} ){
612 print FILE join(',',(($ids->[$_]+1), $first -> [$_], $second -> [$_])), "\n";
614 close FILE;
617 # }}}