Added extended_grid and nonp_bootstrap_v2
[PsN.git] / bin / extended_grid
blob7801bbb4cc658a606fb98685a98d3fa731b4fbef
1 #!/usr/local/bin/perl
3 # Only for Development
4 use FindBin qw($Bin);
5 use lib "$Bin/../sfPsN/lib";
7 # Don't edit the line below, it must look exactly like this.
8 # Everything above this line will be replaced #
10 # Perl includes #
11 use strict;
12 use Getopt::Long;
13 # External modules #
14 use Math::Random;
15 # PsN includes #
16 use PsN;
17 use tool::modelfit;
18 use model;
19 use debug;
20 use common_options;
21 use ui;
22 use File::Copy 'cp';
23 use Data::Dumper;
25 ################################################################################
27 # This is the extended_gri. First the command line options are parsed
28 # using Getopt::Long. PsN "common options" are used to fine tune the
29 # running.
31 ################################################################################
33 # {{{ options handling
35 my $cmd_line = $0 . " " . join( " ", @ARGV );
37 my %options;
39 my %optional_options = ( 'ids_to_simulate:i' => undef,
40 'id_column:s' => undef,
41 'dv_column:s' => undef );
43 my $res = GetOptions( \%options,
44 @common_options::get_opt_strings,
45 keys(%optional_options) );
47 exit unless $res;
49 common_options::set_globals( \%options );
50 common_options::get_defaults( \%options, 'extended_grid' );
51 my %help_text;
52 $help_text{Pre_help_message} = <<'EOF';
53 <h3 class="heading1">extended_grid</h3>
55 Perl script for getting standard errors of etas.
57 <h3 class="heading1">Usage:</h3>
58 EOF
60 $help_text{Description} = <<'EOF';
61 <h3 class="heading1">Description:</h3>
63 extended_grid requires "MAXEVALS" to be abreviated to "MAX" if it
64 exists in the model file.
66 EOF
67 $help_text{Examples} = <<'EOF';
68 <h3 class="heading1">Example:</h3>
70 <p class="style2">extended_grid pheno.mod</p>
72 This will create a new directory extended_grid_dirX where X is a
73 number increased every time you run the program. Inside that
74 directory it will create a new model file for each OMEGA in the
75 file. You will be able to retrieve the results from the
76 corresponding table file.
77 EOF
79 common_options::online_help('extended_grid',\%options, \%help_text, {}, {});
81 if ( scalar( @ARGV ) < 1 ) {
82 print "A model file must be specified. Use '$0 -h' for help.\n";
83 exit;
86 unless( defined $options{'seed'} ){
87 $options{'seed'} = 1;
90 unless( defined $options{'dv_column'} ){
91 $options{'dv_column'} = 'DV';
94 unless( defined $options{'id_column'} ){
95 $options{'id_column'} = 'ID';
98 my( $base_dir, $dummy) = OSspecific::absolute_path( '', '' );
100 my $directory;
102 if ( defined $options{'directory'} ) {
103 my $dummy;
104 ( $directory, $dummy ) = OSspecific::absolute_path( $options{'directory'} );
105 } else {
106 $directory = OSspecific::unique_path( 'extended_grid_dir' ,
107 $base_dir );
110 ui -> category( 'modelfit' );
112 random_set_seed_from_phrase( $options{'seed'} ) if ( defined $options{'seed'} );
114 debug -> level( $options{'debug'} );
115 debug -> package( $options{'debug_package'} );
116 debug -> subroutine( $options{'debug_subroutine'} );
117 debug -> warn_with_trace( $options{'warn_with_trace'} );
119 my $eval_string = common_options::model_parameters(\%options);
121 unless( -d $directory ){
122 mkdir( $directory );
124 open(CMD, ">", $directory . "/command.txt");
125 print CMD $cmd_line, "\n";
126 close(CMD);
129 # }}}
131 ################################################################################
133 # The original model file is run and simulation started.
135 ################################################################################
137 # {{{ Orig + simulation
139 my $model;
141 $model = model -> new ( eval( $eval_string ),
142 filename => $ARGV[0],
143 ignore_missing_output_files => 1 );
145 my $datafile_name = $model -> datas -> [0] -> full_name;
147 chdir( $directory );
149 my $modelfit;
151 $modelfit = tool::modelfit ->
152 new ( eval( $common_options::parameters ),
153 directory => 'orig_modelfit_dir',
154 models => [$model] );
156 $modelfit -> run;
158 my $model_copy = $model -> copy( filename => 'simulation.mod',
159 copy_data => 1,
160 data_file_names => ['simulation.dta']);
162 $model_copy -> update_inits( from_output => $model -> outputs -> [0] );
164 $model_copy -> remove_records( type => 'estimation' );
165 $model_copy -> remove_records( type => 'covariance' );
166 $model_copy -> remove_records( type => 'table' );
168 $model_copy -> set_records( type => 'simulation',
169 record_strings => ['('.$options{'seed'}.') ONLYSIM']);
171 my $nr_of_old_etas = $model -> nomegas -> [0];
172 my $nr_of_old_sigmas = $model -> nsigmas -> [0];
173 my @table_string;
174 for( 1..$nr_of_old_etas ){
176 push( @table_string, "ETA$_" );
180 $model_copy -> set_records( type => 'table',
181 record_strings => [ "ID ",join( " ",@table_string ), " FIRSTONLY FILE=sim" ] );
183 $model_copy -> _write;
185 my $id_to_sim;
187 if( defined $options{'ids_to_simulate'} ){
188 $id_to_sim = $options{'ids_to_simulate'};
190 my $data_set = $model_copy -> datas -> [0];
192 if( $id_to_sim <= scalar @{$data_set -> individuals} ){
193 my $individuals = $data_set -> individuals;
194 $data_set -> individuals( \@{$individuals}[0..$id_to_sim] );
195 } else {
197 my $nr_of_new_ids = $id_to_sim - scalar @{$data_set -> individuals};
199 for( 1..$nr_of_new_ids ){
200 my $new_individual = $data_set -> individuals -> copy;
202 push( @{$data_set -> individuals}, $new_individual );
208 $data_set -> _write;
210 } else {
211 $id_to_sim = scalar @{$model_copy -> datas -> [0] -> individuals};
214 $modelfit = tool::modelfit ->
215 new ( eval( $common_options::parameters ),
216 directory => 'simulation_dir',
217 models => [$model_copy] );
219 $modelfit -> run;
221 # }}}
223 # {{{ create Dataset
225 my $extended_model = $model -> copy( filename => 'extended.mod',
226 copy_data => 1,
227 data_file_names => ['extended.dta']);
229 my $data_copy = $extended_model -> datas -> [0];
231 $extended_model -> update_inits( from_output => $model -> outputs -> [0] );
233 my $ids = $data_copy -> individuals;
235 #my $omegas = $models[0] -> problems -> [0] -> omegas();
236 #my @L2_column;
237 #my $L2_value = 0;
239 #foreach my $omega( @{$omegas} ){
240 # $L2_value++;
241 # for( 1..$omega -> size() ){
242 # push( @L2_column, $L2_value );
246 foreach my $id( @{$ids} ){
247 my @last_row_of_id = split(/,/,$id -> subject_data -> [$#{$id -> subject_data}]);
249 foreach my $row( @{$id -> subject_data} ){
250 $row .= ",0";
254 my $id_template = $ids->[$#{$ids}];
256 my @last_row_of_id = split(/,/,$id_template -> subject_data -> [$#{$id_template -> subject_data}]);
257 my $row_length = scalar @last_row_of_id;
259 my $id_column = $data_copy -> column_head_indices -> {$options{'id_column'}};
260 my $dv_column = $data_copy -> column_head_indices -> {$options{'dv_column'}};
261 my $evid_column = $data_copy -> column_head_indices -> {'EVID'};
262 my $last_id = $id_template -> idnumber();
264 my $table_data = data -> new( filename => 'sim' );
266 my $table_ids = $table_data -> individuals();
268 foreach my $id_num ( 1..$id_to_sim ){
269 my @new_id_rows;
270 my @row_template = ('.') x $row_length;
271 my $type = 1;
273 my $eta_from_sim = $table_ids -> [$id_num-1] -> subject_data -> [0];
275 foreach my $eta_num ( 1..$nr_of_old_etas ){
276 my @eta_from_sim = split( /,/ , $eta_from_sim );
277 my @new_row = @row_template;
278 $new_row[$id_column-1] = ($last_id + $id_num );
279 $new_row[$dv_column-1] = $eta_from_sim[$eta_num]; # eta begins with 1, but there is an ID-column in $eta_from_sim
280 $new_row[$evid_column-1] = 0;
281 $new_row[$#new_row] = $type++;
282 push( @new_id_rows, join(',',@new_row ) );
284 my $new_id = data::individual -> new( idcolumn => $last_id + $id_num,
285 subject_data => \@new_id_rows,
286 data_id => $data_copy -> {'data_id'} );
287 push( @{$data_copy -> individuals}, $new_id );
290 push( @{$data_copy -> {'header'}}, 'TYPE' );
291 $data_copy -> synchronize;
293 $data_copy -> _write;
295 # }}}
297 # {{{ create the second model
299 # Get error block to replace EPS(n) with ETA(n + $nr_of_old_eta) and
300 # replace ERR(n) with ERR(n+$nr_of_old_eta)
302 my $error = $extended_model -> record( record_name => 'error' );
304 my %seen_eps;
305 my %seen_err;
307 my $error_has_if = 0;
309 my $loop_end = $#{$error -> [0]};
310 for( my $row = 0 ; $row <= $loop_end; $row ++ ){
311 $error -> [0] -> [$row] =~ s/
312 (.)EPS\((\d+)
313 (?{$seen_eps{$2}=1;$dummy=$2+$nr_of_old_etas;})\)
314 /$1ETA($dummy)/gx;
316 if( $error -> [0] -> [$row] =~ /IF\s*\(/ ){
317 $error_has_if = 1;
320 # $error -> [0] -> [$row] =~ s/
321 # (.)ERR\((\d+)
322 # (?{$seen_err{$2}=1;$dummy=$2+$nr_of_old_etas;})\)
323 # /$1ETA($dummy)/gx;
326 # if( $error -> [0] -> [$row] =~ /(.*)Y(\s*)=(.*)/ ){
329 # @{$error -> [0]} = ( @{$error -> [0]}[0..($row-1)],
330 # ' WT=1/100000',
331 # ' IF( TYPE.LE.0 ) ' . $error -> [0] -> [$row],
332 # @{$error -> [0]}[$row+1..$#{$error->[0]}] );
333 # $row++;
339 unless( $error_has_if ){
340 unshift( @{$error->[0]}, 'IF(TYPE.LE.0) THEN' );
341 push( @{$error -> [0]}, 'ENDIF' );
344 my $nr_of_added_etas = scalar keys %seen_eps;
346 my $input = $extended_model -> record( record_name => 'input' );
348 $input->[0]->[0] =~ s/\$INPUT//;
349 chomp( $input->[0]->[ $#{$input->[0]} ] );
350 $input->[0]->[ $#{$input->[0]} ] .= ' TYPE';
352 $extended_model -> set_records( type => 'input',
353 record_strings => $input->[0]);
356 foreach( 1..$nr_of_old_etas ){
357 push(@{$error -> [0]}, ' IF( TYPE.EQ.'.($_).' ) Y = ETA('.($_).')+ERR(2)*W/100000');
361 $extended_model -> set_records( type => 'error',
362 record_strings => $error -> [0] );
364 $extended_model -> initial_values( parameter_type => 'sigma',
365 add_if_absent => 1,
366 parameter_numbers => [[$nr_of_old_sigmas+1]],
367 new_values => [[1]] );
369 $extended_model -> initial_values( parameter_type => 'omega',
370 add_if_absent => 1,
371 parameter_numbers => [[$nr_of_old_etas+1 .. $nr_of_old_etas+$nr_of_added_etas]],
372 new_values => [[(1) x $nr_of_added_etas]] );
374 $extended_model -> fixed( parameter_type => 'sigma',
375 parameter_numbers => [[$nr_of_old_sigmas+1]],
376 new_values => [[1]] );
378 $extended_model -> set_records( type => 'estimation',
379 record_strings => ['MAX=0 POSTHOC'] );
381 $extended_model -> set_records( type => 'nonparametric',
382 record_strings => ['MARGINALS MSFO=MSF01 UNCONDITIONAL'] );
384 $extended_model -> remove_records( type => 'covariance' );
386 my $code_block;
387 my $use_pk = 0;
388 # Find PK or PRED block
389 if( defined $extended_model -> pk ){
390 $code_block = $extended_model -> pk;
391 $use_pk = 1;
392 } elsif( defined $extended_model -> pred ){
393 $code_block = $extended_model -> pred;
394 } else {
395 die "Error: No \$PK or \$PRED found... \n";
398 push( @{$code_block},' JD = DEN_');
400 my @JD_string;
402 for( 1..$nr_of_old_etas ){
403 push(@{$code_block}," DN$_=CDEN_($_)" );
404 push( @JD_string, "DN$_" );
407 if( $use_pk ){
408 $extended_model -> pk( new_pk => $code_block );
409 } else {
410 $extended_model -> pred( new_pred => $code_block );
413 $extended_model -> set_records( type => 'table',
414 record_strings => ['ID '.join( ' ', @table_string, 'JD', @JD_string,'NOPRINT NOAPPEND FIRSTONLY FILE=nptab' ) ] );
416 $extended_model -> extra_output( ['nptab'] );
418 $extended_model -> _write;
420 $modelfit = tool::modelfit ->
421 new ( eval( $common_options::parameters ),
422 prepend_model_file_name => 1,
423 directory => 'extended_dir',
424 models => [$extended_model] );
426 $modelfit -> run;
428 # }}}
430 # {{{ Results handling (Cut'n'Paste from nonp_bootstrap_v2)
432 ################################################################################
434 # Do one run per individual initial clearence and get fort.80 from
435 # each run.
437 ################################################################################
439 # {{{ Fort.80 creation
441 my @parameter_P_sums;
442 my @result_header;
443 my $batch_nr = 0;
445 # {{{ create iofvcont.f
447 unless( -e 'iofvcont.f' ){
448 open(IOFVCONT , '>', 'iofvcont.f');
450 print IOFVCONT <<'EOF';
451 subroutine contr (icall,cnt,ier1,ier2)
452 parameter (no=1000)
453 common /rocm1/ y(no),data(no,3),nobs
454 integer nobs, un
455 double precision cnt,y
456 DATA un /80/
457 if (icall.le.1) return
458 call ncontr (cnt,ier1,ier2,l2r)
459 C individual obj. funct. value for indiv. jj = cnt
460 write(un,10) data(1,1),cnt
461 10 FORMAT(1F6.2,1F19.4)
462 return
465 close( IOFVCONT );
468 # }}}
470 # TODO update all initial values
472 my @new_ofv_models;
474 unless ( -e "ofv_dir" ){
475 mkdir( "ofv_dir" );
477 chdir( "ofv_dir" );
478 cp( '../iofvcont.f', './iofvcont.f' ) or print "Warning: unable to copy iofvcont.f\n";
480 my $table = table_file -> new( filename => "extended.nptab",
481 directory => '../' );
483 my $nomegas = $extended_model -> nomegas -> [0];
485 my @individual_etas;
486 my @individual_etas_probabilities;
488 for( 0..($nomegas-1) ){
490 my $individual_eta = $table -> column_to_array( column => $_ + 1 ); # plus 1 is for ID column
491 my $individual_eta_probabilities = $table -> column_to_array( column => ($nomegas-1) + $_ + 1 + 1 ); # first plus 1 is for ID column. Second is for JD
493 push( @individual_etas , $individual_eta );
494 push( @individual_etas_probabilities , $individual_eta_probabilities );
498 my $number_of_individuals = @{$individual_etas[0]};
500 foreach my $id( 0..($number_of_individuals-1) ){
502 my $copy = $extended_model -> copy( filename => 'ofv_model_'.($id+1).'.mod' );
504 # Not needed ?
505 # $copy -> update_inits( from_output => $extended_model -> outputs -> [0] );
507 my @new_values;
509 for my $parm ( 0..$nomegas-1 ){
511 push( @new_values, $individual_etas[$parm][$id] );
515 # $copy -> lower_bounds( parameter_type => 'theta',
516 # parameter_numbers => [[1..$nomegas]],
517 # new_values => [[(-10000) x $nomegas]]) ;
519 # $copy -> upper_bounds( parameter_type => 'theta',
520 # parameter_numbers => [[1..$nomegas]],
521 # new_values => [[(10000) x $nomegas]]) ;
523 # $copy -> initial_values( parameter_type => 'theta',
524 # parameter_numbers => [[1..$nomegas]],
525 # new_values => [\@new_values]) ;
527 $copy -> add_records( type => 'contr',
528 record_strings => ['DATA=(ID)'] );
530 $copy -> add_option( record_name => 'subroutine',
531 option_name => 'CONTR',
532 option_value => 'iofvcont.f' );
534 #$copy -> add_option( record_name => 'DATA',
535 # option_name => 'ACCEPT',
536 # option_value => "ID.LE.$ids" );
538 $copy -> lower_bounds( parameter_type => 'omega',
539 parameter_numbers => [[1..$copy -> nomegas -> [0]]],
540 new_values => [[(-1) x $copy -> nomegas -> [0]]] );
542 $copy -> initial_values( parameter_type => 'omega',
543 parameter_numbers => [[1..$copy -> nomegas -> [0]]],
544 new_values => [[(0) x $copy -> nomegas -> [0]]] );
546 # $copy -> initial_values( parameter_type => 'omega',
547 # parameter_numbers => [[1..$copy -> nomegas -> [0]]],
548 # new_values => [\@new_values] );
550 $copy -> fixed( parameter_type => 'theta',
551 new_values => [[(1) x $copy -> nthetas ]] );
553 $copy -> fixed( parameter_type => 'omega',
554 new_values => [[(1) x $copy -> nomegas -> [0] ]] );
556 $copy -> fixed( parameter_type => 'sigma',
557 new_values => [[(1) x $copy -> nsigmas -> [0] ]] );
559 $copy -> extra_output( ['fort.80'] );
561 $copy -> remove_records( type => 'table' );
562 $copy -> remove_records( type => 'abbreviated' );
563 $copy -> remove_records( type => 'nonparametric' );
565 $copy -> _write;
567 push( @new_ofv_models, $copy );
570 my $new_modelfit = tool::modelfit ->
571 new ( eval( $common_options::parameters ),
572 prepend_model_file_name => 1,
573 directory => 'modelfit',
574 models => \@new_ofv_models );
576 $new_modelfit -> run;
578 # Post process fort.80
580 my $id = 1;
581 my @all_ind_ofv;
583 foreach( 1..$number_of_individuals ){
584 open (IND, '<', "ofv_model_$id.fort.80") || die("Couldn't open ofv_model_$id.mod.fort.80 for reading: $! \n");
585 $id++;
586 my @ind_ofv;
587 while(<IND>) {
588 chomp;
589 my ($junk,$ind_ofv) = split(' ',$_);
590 push @ind_ofv, $ind_ofv;
592 close IND;
593 @ind_ofv = splice(@ind_ofv,-$number_of_individuals);
594 push( @all_ind_ofv, \@ind_ofv );
597 dump_csv( "ind_ofv.csv", \@all_ind_ofv );
599 dump_csv( "ind_etas.csv", \@individual_etas_probabilities );
601 my @etas_and_sums;
603 foreach my $parameter( 0..($nomegas - 1) ){
604 my @result;
605 my @row_sums = (0) x scalar @{$individual_etas[0]};
607 for( my $ind_ofv=0; $ind_ofv < $number_of_individuals ; $ind_ofv ++) {
608 for( my $individual=0;$individual < $number_of_individuals; $individual++) {
609 my $ofv = $all_ind_ofv[$ind_ofv] -> [$individual];
610 my $p = $individual_etas_probabilities[$parameter] -> [$individual];
611 my $LP = exp((-$ofv)/2)*$p;
612 $result[$ind_ofv] -> [$individual] = $LP;
613 $row_sums[$individual]+=$LP;
617 dump_csv( "L_times_P_ETA$parameter.csv", \@result );
619 for( my $ind_ofv=0; $ind_ofv < $number_of_individuals ; $ind_ofv ++) {
620 for( my $individual=0;$individual <= $#{$result[$ind_ofv]}; $individual++ ) {
621 $result[$ind_ofv] -> [$individual] = ($result[$ind_ofv] -> [$individual] / $row_sums[$individual]) / $number_of_individuals;
625 my $big_sum=0;
627 my @sums;
629 for( my $row = 0; $row < scalar @result; $row++ ){
630 my $row_length = $#{$result[$row]};
631 @{$result[$row]} = @{$result[$row]}[0..$id_to_sim];
633 my $scale = ($number_of_individuals/$id_to_sim);
635 my $sum = 0;
637 for(my $col = 0; $col < scalar @{$result[$row]}; $col++ ){
638 $result[$row] -> [$col] *= $scale;
639 $sum += $result[$row] -> [$col];
642 $big_sum += $sum;
644 #push( @{$result[$row]}, $sum );
646 push( @sums, $sum );
650 my @order = 0..scalar(@{$individual_etas[$parameter]})-1 ;
652 @order = sort( { $sums[$a] <=> $sums[$b] } @order );
654 my @etas_reordered;
655 my @sums_reordered;
656 my $i = 0;
658 foreach my $index( @order ){
659 $etas_reordered[$i] = $individual_etas[$parameter] -> [$index];
660 $sums_reordered[$i] = $sums[$index];
661 $i++;
664 push( @etas_and_sums, \@etas_reordered );
665 push( @etas_and_sums, \@sums_reordered );
666 if( $big_sum > 1.01 or $big_sum < 0.99 ){
667 print "Warning, sum of probabilties is outside of range [0.99..1.01]: $big_sum\n";
670 dump_csv( "P_values_ETA$parameter.csv", \@result );
674 chdir( '..' );
676 dump_T_csv('results.csv', \@etas_and_sums);
678 # }}}
680 # {{{ helper routines
682 sub dump_csv {
683 my $file_name = shift;
684 my $arr = shift;
686 open( FILE, ">", $file_name );
688 foreach my $sub_arr( @{$arr} ){
689 print( FILE join( ',', @{$sub_arr} ), "\n" );
692 close( FILE );
696 sub dump_T_csv {
697 my $file_name = shift;
698 my $arr = shift;
700 open( FILE, ">", $file_name );
702 for( my $x = 0;$x < @{$arr->[0]}; $x++ ){
703 for( my $y = 0;$y < @{$arr}; $y++ ){
704 if( $y > 0 and $y < @{$arr} ){
705 print( FILE ',' );
707 print( FILE $arr->[$y][$x] );
709 print( FILE "\n" );
712 close( FILE );
715 # }}}
717 # }}}