Rewrote much of the raw_results handling to save memory and enable intermediate raw_r...
[PsN.git] / lib / tool / mc_subs.pm
blobc8c7b1538e75f7619ce7e9ad8f63ad530d71231b
1 # {{{ include statements
3 start include statements
4 use tool::cdd;
5 use tool::modelfit;
6 use Math::Random;
7 use Data::Dumper;
8 end include statements
10 # }}} include statements
12 # {{{ new
14 start new
15 foreach my $attribute ( 'logfile', 'raw_results_file' ) {
16 if ( not( ref($this -> {$attribute}) eq 'ARRAY' or
17 ref($this -> {$attribute}) eq 'HASH' ) ) {
18 my $tmp = $this -> {$attribute};
19 if ( not defined $tmp and $attribute eq 'logfile' ) {
20 $tmp = 'mcs.log';
22 $this -> {$attribute} = [];
23 for ( my $i = 1; $i <= scalar @{$this -> {'models'}}; $i++ ) {
24 my $name = $tmp;
25 if ( $name =~ /\./ ) {
26 $name =~ s/\./$i\./;
27 } else {
28 $name = $name.$i;
30 my $ldir;
31 ( $ldir, $name ) =
32 OSspecific::absolute_path( $this -> {'directory'}, $name );
33 push ( @{$this -> {$attribute}}, $ldir.$name ) ;
37 end new
39 # }}}
41 # {{{ cdd_setup
43 start cdd_setup
45 my @subm_threads;
46 if (ref( $self -> {'threads'} ) eq 'ARRAY') {
47 @subm_threads = @{$self -> {'threads'}};
48 shift(@subm_threads);
49 } else {
50 @subm_threads = ($self -> {'threads'});
52 $self -> general_setup( model_number => $model_number,
53 class => 'tool::cdd',
54 subm_threads => \@subm_threads );
56 end cdd_setup
58 # }}} cdd_setup
60 # {{{ modelfit_setup
62 start modelfit_setup
64 my @subm_threads;
65 if (ref( $self -> {'threads'} ) eq 'ARRAY') {
66 @subm_threads = @{$self -> {'threads'}};
67 shift(@subm_threads);
68 } else {
69 @subm_threads = ($self -> {'threads'});
71 $self -> general_setup( model_number => $model_number,
72 class => 'tool::modelfit',
73 subm_threads => $subm_threads[$model_number-1] );
75 end modelfit_setup
77 # }}} modelfit_setup
79 # {{{ general_setup
81 start general_setup
83 my $model = $self -> {'models'} -> [$model_number-1];
84 #my $alternative = $self -> {'alternative_models'} -> [$model_number-1];
85 my @alternatives = @{$self -> {'alternative_models'}}; #may be empty
86 my $subm_threads = $parm{'subm_threads'};
87 my $own_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
88 $self -> {'threads'} -> [0]:$self -> {'threads'};
89 my $done = ( -e $self -> {'directory'}."/m$model_number/done" ) ? 1 : 0;
90 my ( @seed, $new_datas, $skip_ids, $skip_keys, $skip_values );
91 my ( @orig_est_models, @alt_est_models );
92 my ( $sim_model, $est_original, $est_alternative );
93 my $alternative;
95 if ( not $done ) {
96 my @sim_models;
97 my @table_header;
98 my @all_simulated_files;
99 my ( @orig_table_names, @alt_table_names );
101 # Copy the model to a new simulation models
102 for( my $sim_no = 1; $sim_no <= $self -> {'samples'}; $sim_no++ ) {
103 my $sim_name = "mc-$sim_no.sim";
104 my $orig_name = "mc-original-$sim_no.mod";
105 my $out_name = "mc-$sim_no.lst";
106 my $orig_out = "mc-original-$sim_no.lst";
107 if( $sim_no == 1 ) {
108 $sim_model = $model ->
109 copy( filename => $self -> {'directory'}.'m'.$model_number.'/'.$sim_name,
110 target => 'disk',
111 copy_data => 1,
112 copy_output => 0);
113 $sim_model -> drop_dropped;
114 $est_original = $model ->
115 copy( filename => $self -> {'directory'}.'m'.$model_number.'/'.$orig_name,
116 target => 'disk',
117 copy_data => 0,
118 copy_output => 0);
119 $est_original -> drop_dropped;
120 $est_original -> remove_records( type => 'simulation' );
122 my $tbl_nm_ref = $model -> table_names();
123 @orig_table_names = @{$tbl_nm_ref} if( defined $tbl_nm_ref );
124 } else {
125 $sim_model = $sim_models[0] ->
126 copy( filename => $self -> {'directory'}.'m'.$model_number.'/'.$sim_name,
127 target => 'disk',
128 copy_data => 0,
129 copy_output => 0);
130 $est_original = $orig_est_models[0] ->
131 copy( filename => $self -> {'directory'}.'m'.$model_number.'/'.$orig_name,
132 target => 'disk',
133 copy_data => 0,
134 copy_output => 0);
136 $sim_model -> ignore_missing_files( 1 );
137 $sim_model -> outputfile( $self -> {'directory'}.'m'.$model_number.'/'.$out_name );
138 $sim_model -> ignore_missing_files( 0 );
139 $est_original -> ignore_missing_files( 1 );
140 $est_original -> outputfile( $self -> {'directory'}.'m'.$model_number.'/'.$orig_out );
141 $est_original -> ignore_missing_files( 0 );
142 my @new_orig_table_names;
143 for( my $pr = 0; $pr <= $#orig_table_names; $pr++ ) {
144 for( my $tbl = 0; $tbl < scalar @{$orig_table_names[$pr]}; $tbl++ ) {
145 $new_orig_table_names[$pr][$tbl] = $orig_table_names[$pr][$tbl].'-'.$sim_no;
148 if( $#new_orig_table_names >= 0 ) {
149 $est_original -> table_names( new_names => \@new_orig_table_names );
151 if( $self -> shrinkage() ) {
152 $est_original -> shrinkage_stats( enabled => 1 );
153 $est_alternative -> shrinkage_stats( enabled => 1 );
154 my @problems = @{$est_original -> problems};
155 for( my $i = 1; $i <= scalar @problems; $i++ ) {
156 $problems[ $i-1 ] -> shrinkage_module -> model( $est_original );
160 my @simulated_files;
161 for( my $j = 0; $j < scalar @{$sim_model -> problems}; $j++ ) {
162 my $prob = $sim_model -> problems -> [$j];
164 # set $SIMULATION record
166 my $sim_record = $sim_model -> record( problem_number => $j,
167 record_name => 'simulation' );
169 if( scalar(@{$sim_record}) > 0 ){
171 my @new_record;
172 foreach my $sim_line ( @{$sim_record -> [0]} ){
173 my $new_line;
174 while( $sim_line =~ /([^()]*)(\([^()]+\))(.*)/g ){
175 my $seed = random_uniform_integer( 1, 0, 1000000 ); # Upper limit is from nmhelp
177 my $head = $1;
178 my $old_seed = $2;
179 $sim_line = $3;
181 if( $old_seed =~ /\(\d+\s+(NORMAL|UNIFORM|NONPARAMETRIC)\)/ ){
182 $new_line .= "$head($seed $1)";
183 } else {
184 $new_line .= "$head($seed)";
189 push( @new_record, $new_line.$sim_line );
192 $prob -> set_records( type => 'simulation',
193 record_strings => \@new_record );
195 } else {
197 my $seed = random_uniform_integer( 1, 0, 1000000 ); # Upper limit is from nmhelp
198 $prob -> set_records( type => 'simulation',
199 record_strings => [ '(' . $seed .
200 ') ONLYSIMULATION' ] );
203 if( $sim_no == 1 ) {
204 # remove $EST and $COV
206 if( $sim_model -> is_option_set( problem_number => $j,
207 record => 'estimation',
208 name => 'LIKELIHOOD',
209 fuzzy_match => 1 )
211 $sim_model -> is_option_set( problem_number => $j,
212 record => 'estimation',
213 name => '-2LOGLIKELIHOOD',
214 fuzzy_match => 1 )
216 $sim_model -> is_option_set( problem_number => $j,
217 record => 'estimation',
218 name => '-2LLIKELIHOOD',
219 fuzzy_match => 1 )
222 $sim_model -> set_option( problem_numbers => [$j],
223 record_name => 'estimation',
224 option_name => 'MAXEVAL',
225 fuzzy_match => 1,
226 option_value => 0 );
228 } else {
230 $prob -> remove_records(type => 'estimation');
234 $prob -> remove_records(type => 'covariance');
236 # set $TABLE record
237 if( defined $prob -> inputs and defined $prob -> inputs -> [0] -> options ) {
238 foreach my $option ( @{$prob -> inputs -> [0] -> options} ) {
239 push( @table_header, $option -> name );
241 } else {
242 debug -> die( message => "Trying to construct table for monte-carlo simulation".
243 " but no headers were found in \$model_number-1NPUT" );
246 my $simulated_file = "mc-sim-".($j+1)."-$sim_no.dat";
247 $prob -> set_records( type => 'table',
248 record_strings => [ join( ' ', @table_header ).
249 ' NOPRINT NOAPPEND ONEHEADER FILE='.
250 $simulated_file ] );
251 push( @simulated_files, $self -> {'directory'}.'m'.$model_number.'/'.
252 $simulated_file );
254 $sim_model -> _write( write_data => $sim_no == 1 ? 1 : 0 );
255 push( @sim_models, $sim_model );
256 push( @orig_est_models, $est_original );
257 push( @all_simulated_files, \@simulated_files );
259 my $mod_sim = tool::modelfit -> new( reference_object => $self,
260 models => \@sim_models,
261 base_directory => $self -> {'directory'},
262 directory => $self -> {'directory'}.
263 'simulation_dir'.$model_number,
264 parent_tool_id => $self -> {'tool_id'},
265 retries => 1,
266 logfile => undef,
267 raw_results => undef,
268 prepared_models => undef,
269 threads => 1 );
270 $mod_sim -> run;
271 for( my $j = 0; $j <= $#orig_est_models; $j++ ) {
272 $orig_est_models[$j] -> datafiles( new_names => $all_simulated_files[$j] );
273 $orig_est_models[$j] -> _write;
274 $orig_est_models[$j] -> flush_data();
276 @{$self -> {'mc_models'}} = ( @orig_est_models );
278 foreach $alternative (@alternatives){
279 @alt_table_names = ();
280 @alt_est_models =();
281 my $filestem = 'mc-alt-' . $alternative -> filename();
282 $filestem =~ s/\.[^.]*$//;
283 for( my $sim_no = 1; $sim_no <= $self -> {'samples'}; $sim_no++ ) {
284 my $alt_name = "$filestem-$sim_no.mod";
285 my $alt_out = "$filestem-$sim_no.lst";
287 if( $sim_no == 1 ) {
288 $est_alternative = $alternative ->
289 copy( filename => $self -> {'directory'}.'m'.$model_number.'/'.$alt_name,
290 target => 'disk',
291 copy_data => 0,
292 copy_output => 0);
293 $est_alternative -> drop_dropped;
294 $est_alternative -> remove_records( type => 'simulation' );
295 my $tbl_nm_ref = $alternative -> table_names();
296 @alt_table_names = @{$tbl_nm_ref} if( defined $tbl_nm_ref );
297 } else {
298 $est_alternative = $alt_est_models[0] ->
299 copy( filename => $self -> {'directory'}.'m'.$model_number.'/'.$alt_name,
300 target => 'disk',
301 copy_data => 0,
302 copy_output => 0);
304 $est_alternative -> ignore_missing_files( 1 );
305 $est_alternative -> outputfile( $self -> {'directory'}.'m'.$model_number.'/'.$alt_out );
306 $est_alternative -> ignore_missing_files( 0 );
307 my @new_alt_table_names;
308 for( my $pr = 0; $pr <= $#alt_table_names; $pr++ ) {
309 for( my $tbl = 0; $tbl < scalar @{$alt_table_names[$pr]}; $tbl++ ) {
310 $new_alt_table_names[$pr][$tbl] = $alt_table_names[$pr][$tbl].'-'.$sim_no;
313 if( $#new_alt_table_names >= 0 ) {
314 $est_alternative -> table_names( new_names => \@new_alt_table_names );
316 if( $self -> shrinkage() ) {
317 my @problems = @{$est_alternative -> problems};
318 for( my $i = 1; $i <= scalar @problems; $i++ ) {
319 $problems[ $i-1 ] -> shrinkage_module -> model( $est_alternative );
322 push( @alt_est_models, $est_alternative );
323 } #end loop over sim_no
324 #is all_simulated_files still in scope? should be, declared inside if not $done
325 for( my $j = 0; $j <= $#alt_est_models; $j++ ) {
326 $alt_est_models[$j] -> datafiles( new_names => $all_simulated_files[$j] );
327 $alt_est_models[$j] -> _write;
328 $alt_est_models[$j] -> flush_data();
330 push (@{$self -> {'mc_models'}}, @alt_est_models);
333 }#end loop over alternatives
335 # Create a checkpoint.
336 open( DONE, ">".$self -> {'directory'}."/m$model_number/done" ) ;
337 print DONE "Simulation from ",$sim_models[0] -> filename," through ",
338 $sim_models[$#sim_models] -> filename," performed\n";
339 print DONE $self -> {'samples'}." samples\n";
340 @seed = random_get_seed;
341 print DONE "seed: @seed\n";
342 close( DONE );
343 } else {
344 #$done=true
345 # Recreate the datasets and models from a checkpoint
346 ui -> print( category => 'mc',
347 message => "Recreating models from a previous run" );
348 open( DONE, $self -> {'directory'}."/m$model_number/done" );
349 my @rows = <DONE>;
350 close( DONE );
351 my ( $junk, $junk, $stored_filename1, $junk,
352 $stored_filename2, $junk ) = split(' ',$rows[0],4);
353 my ( $stored_samples, $junk ) = split(' ',$rows[1],2);
354 @seed = split(' ',$rows[2]);
355 shift( @seed ); # get rid of 'seed'-word
356 # Reinitiate the model objects
357 for ( my $j = 1; $j <= $stored_samples; $j++ ) {
358 my ($model_dir, $orig_name) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.
359 $model_number,
360 "mc-original-$j.mod" );
361 my ($out_dir, $orig_out) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.
362 $model_number,
363 "mc-original-$j.lst" );
364 my $est_original = model ->
365 new( directory => $model_dir,
366 filename => $orig_name,
367 outputfile => $orig_out,
368 extra_files => $model -> extra_files,
369 target => 'disk',
370 ignore_missing_files => 1,
371 extra_output => $model -> extra_output(),
372 cwres => $model -> cwres(),
373 quick_reload => 1 );
374 push( @orig_est_models, $est_original );
375 my $nl = $j == $stored_samples ? "" : "\r";
376 ui -> print( category => 'mc',
377 message => ui -> status_bar( sofar => $j+1,
378 goal => $stored_samples+1 ).$nl,
379 wrap => 0,
380 newline => 0 );
381 } #end loop over samples
382 @{$self -> {'mc_models'}} = ( @orig_est_models);
384 #start alternatives section
385 foreach $alternative (@alternatives){
386 my $filestem = 'mc-alt-' . $alternative -> filename();
387 $filestem =~ s/\.[^.]*$//;
388 @alt_est_models =();
389 for ( my $j = 1; $j <= $stored_samples; $j++ ) {
390 my ($model_dir, $alt_name) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.
391 $model_number,
392 "$filestem-$j.mod" );
393 my ($out_dir, $alt_out) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.
394 $model_number,
395 "$filestem-$j.lst" );
396 my $est_alternative = model ->
397 new( directory => $model_dir,
398 filename => $alt_name,
399 outputfile => $alt_out,
400 extra_files => $alternative -> extra_files,
401 target => 'disk',
402 ignore_missing_files => 1,
403 extra_output => $model -> extra_output(),
404 cwres => $model -> cwres(),
405 quick_reload => 1 );
406 push( @alt_est_models, $est_alternative );
407 my $nl = $j == $stored_samples ? "" : "\r";
408 ui -> print( category => 'mc',
409 message => ui -> status_bar( sofar => $j+1,
410 goal => $stored_samples+1 ).$nl,
411 wrap => 0,
412 newline => 0 );
413 } #end loop over samples
414 push (@{$self -> {'mc_models'}}, @alt_est_models );
416 }#end loop over alternatives
418 ui -> print( category => 'mc', message => " ... done." );
419 random_set_seed( @seed );
420 ui -> print( category => 'mc',
421 message => "Using $stored_samples data sets, previously simulated ".
422 "from $stored_filename1 through $stored_filename2" )
423 unless $self -> {'parent_threads'} > 1;
424 } #end $done
425 my $subdir = $class;
426 $subdir =~ s/tool:://;
427 my @subtools = @{$self -> {'subtools'}};
428 shift( @subtools );
429 my %subargs = ();
430 if ( defined $self -> {'subtool_arguments'} ) {
431 %subargs = %{$self -> {'subtool_arguments'}};
434 push( @{$self -> {'tools'}},
435 $class -> new
436 ( models => $self -> {'mc_models'},
437 nm_version => $self -> {'nm_version'},
439 run_on_lsf => $self -> {'run_on_lsf'},
440 lsf_queue => $self -> {'lsf_queue'},
441 lsf_options => $self -> {'lsf_options'},
442 lsf_job_name => $self -> {'lsf_job_name'},
443 lsf_project_name => $self -> {'lsf_project_name'},
445 run_on_nordugrid => $self -> {'run_on_nordugrid'},
446 cpu_time => $self -> {'cpu_time'},
448 parent_tool_id => $self -> {'tool_id'},
450 min_retries => $self -> {'min_retries'},
451 picky => $self -> {'picky'},
452 compress => $self -> {'compress'},
453 threads => $subm_threads,
454 retries => $self -> {'retries'},
455 base_directory => $self -> {'directory'},
456 directory => $self -> {'directory'}.'/'.$subdir.'_dir'.$model_number,
457 subtools => $#subtools >= 0 ? \@subtools : undef,
458 parent_threads => $own_threads,
459 shrinkage => $self -> {'shrinkage'},
460 _raw_results_callback => $self ->
461 _modelfit_raw_results_callback( model_number => $model_number ),
462 %subargs ) );
464 end general_setup
466 # }}} general_setup
468 # {{{ cdd_analyze
469 start cdd_analyze
470 $self -> {'tools'}[0] -> print_results;
471 end cdd_analyze
472 # }}} cdd_analyze
474 # {{{ modelfit_analyze
476 start modelfit_analyze
477 $self -> {'tools'}[0] -> print_results;
478 end modelfit_analyze
480 # }}}
482 # {{{ _modelfit_raw_results_callback
484 start _modelfit_raw_results_callback
486 # Use the mc's raw_results file.
487 my ($dir,$file) =
488 OSspecific::absolute_path( $self -> {'directory'},
489 $self -> {'raw_results_file'}[$model_number-1] );
490 my $orig_mod = $self -> {'models'}[$model_number-1];
491 $subroutine = sub {
492 my $modelfit = shift;
493 my $mh_ref = shift;
494 my %max_hash = %{$mh_ref};
495 $modelfit -> raw_results_file( $dir.$file );
497 # {{{ New header
499 if ( defined $modelfit -> raw_results() ) {
500 my @rows = @{$modelfit -> raw_results()};
501 #the first 'samples' (a scalar) number of rows is original model
502 for( my $i = 0; $i < $self -> {'samples'}; $i++ ) {
503 unshift( @{$rows[$i]}, 'null' );
505 #remaining rows, if any, belong to alternatives model(s)
506 my $offset = $self -> {'samples'};
507 foreach my $alternative (@{$self -> {'alternative_models'}}){
508 if (($offset + $self -> {'samples'}-1)> $#rows){
509 debug -> die( message => "Too few rows in raw results file." );
511 my $altname = 'alt-' . $alternative -> filename();
512 $altname =~ s/\.[^.]*$//;
513 for( my $i = 0; $i < $self -> {'samples'}; $i++ ) {
514 unshift( @{$rows[($offset+$i)]}, $altname );
516 $offset += $self -> {'samples'};
518 if (($offset -1)< $#rows){
519 debug -> die( message => "Too many rows in raw results file." );
522 unshift( @{$modelfit -> {'raw_results_header'}}, 'hypothesis' );
525 @{$self -> {'raw_results_header'}} = @{$modelfit -> {'raw_results_header'}};
526 # }}} New header
529 return $subroutine;
531 end _modelfit_raw_results_callback
533 # }}} _modelfit_raw_results_callback