This is a massive update that merges all changes from PsN_2_2_0_patches_serial. It...
[PsN.git] / lib / tool / cdd_subs.pm
blob39ce861a896e918d7b69004c424269a00fcb188b
1 # {{{ include statements
3 start include statements
4 use Cwd;
5 use File::Copy 'cp';
6 use tool::llp;
7 use tool::modelfit;
8 use Math::Random;
9 use ext::Math::MatrixReal;
10 use Data::Dumper;
11 if ( $PsN::config -> {'_'} -> {'use_database'} ) {
12 # Testing DBD::mysql:
13 require DBI;
15 end include statements
17 # }}} include statements
19 # {{{ new
21 start new
23 foreach my $attribute ( 'logfile', 'raw_results_file' ) {
24 if ( not( ref($this -> {$attribute}) eq 'ARRAY' or
25 ref($this -> {$attribute}) eq 'HASH' ) ) {
26 my $tmp = $this -> {$attribute};
27 if ( not defined $tmp and $attribute eq 'logfile' ) {
28 $tmp = 'cdd.log';
30 $this -> {$attribute} = [];
31 for ( my $i = 1; $i <= scalar @{$this -> {'models'}}; $i++ ) {
32 my $name = $tmp;
33 if ( $name =~ /\./ ) {
34 $name =~ s/\./$i\./;
35 } else {
36 $name = $name.$i;
38 my $ldir;
39 ( $ldir, $name ) =
40 OSspecific::absolute_path( $this -> {'directory'}, $name );
41 push ( @{$this -> {$attribute}}, $ldir.$name ) ;
45 if ( $PsN::config -> {'_'} -> {'use_database'} ) {
46 my( $found_log, $found_cdd_id ) = $this -> read_cdd_log;
48 $this -> register_cdd_in_database unless ( $found_cdd_id );
50 $this -> log_object unless ( $found_log and $found_cdd_id );
51 print "Found ",$this -> {'cdd_id'},"\n";
54 end new
56 # }}} new
58 # {{{ register_cdd_in_database
60 start register_cdd_in_database
62 if ( $PsN::config -> {'_'} -> {'use_database'} ) {
63 my $dbh = DBI -> connect("DBI:mysql:host=".$PsN::config -> {'_'} -> {'database_server'}.
64 ";databse=".$PsN::config -> {'_'} -> {'project'},
65 $PsN::config -> {'_'} -> {'user'},
66 $PsN::config -> {'_'} -> {'password'},
67 {'RaiseError' => 1});
68 my $sth;
69 # bins and case_column can be defined for more than one model. Skip
70 # registration of these for now.
71 # $sth = $dbh -> prepare("INSERT INTO ".$PsN::config -> {'_'} -> {'project'}.
72 # ".cdd ( tool_id, bins, case_column ) ".
73 # "VALUES (".$self -> {'tool_id'}.", '".$self -> {'bins'}.
74 # "', '".$self -> {'case_column'}."' )");
75 $sth = $dbh -> prepare("INSERT INTO ".$PsN::config -> {'_'} -> {'project'}.
76 ".cdd ( tool_id ) ".
77 "VALUES (".$self -> {'tool_id'}." )");
78 $sth -> execute;
79 $self -> {'cdd_id'} = $sth->{'mysql_insertid'};
80 $sth -> finish;
81 $dbh -> disconnect;
84 end register_cdd_in_database
86 # }}} register_cdd_in_database
88 # {{{ register_mfit_results
90 start register_mfit_results
92 if ( $PsN::config -> {'_'} -> {'use_database'} ) {
93 my $dbh = DBI -> connect("DBI:mysql:host=".$PsN::config -> {'_'} -> {'database_server'}.
94 ";databse=".$PsN::config -> {'_'} -> {'project'},
95 $PsN::config -> {'_'} -> {'user'},
96 $PsN::config -> {'_'} -> {'password'},
97 {'RaiseError' => 1});
98 $dbh -> do( "LOCK TABLES ".$PsN::config -> {'_'} -> {'project'}.
99 ".cdd_modelfit_results WRITE" );
100 my $sth = $dbh -> prepare( "SELECT MAX(cdd_modelfit_results_id)".
101 " FROM ".$PsN::config -> {'_'} -> {'project'}.
102 ".cdd_modelfit_results" );
103 $sth -> execute or debug -> die( message => $sth->errstr ) ;
104 my $select_arr = $sth -> fetchall_arrayref;
105 $first_res_id = defined $select_arr -> [0][0] ? ($select_arr -> [0][0] + 1) : 0;
106 $last_res_id = $first_res_id + $#cook_score;
107 $sth -> finish;
109 my $insert_values;
110 for( my $i = 0; $i <= $#cook_score; $i++ ) {
111 $insert_values = $insert_values."," if ( defined $insert_values );
112 $insert_values = $insert_values.
113 "('".$self -> {'cdd_id'}."', '".$self -> {'model_ids'}[$model_number-1].
114 "', '".$self -> {'prepared_model_ids'}[($model_number-1)*($#cook_score+1)+$i].
115 "', '".($i+1).
116 "','$cook_score[$i]', '$covariance_ratio[$i]', '$projections[$i][0]', '$projections[$i][01]', '$outside_n_sd[$i]' )";
118 $dbh -> do("INSERT INTO ".$PsN::config -> {'_'} -> {'project'}.
119 ".cdd_modelfit_results ".
120 "( cdd_id, orig_model_id, model_id, ".
121 "bin, cook_score, covariance_ratio, ".
122 "pca_component_1, pca_component_2, outside_n_sd ) ".
123 "VALUES $insert_values");
124 $dbh -> do( "UNLOCK TABLES" );
125 $dbh -> disconnect;
128 end register_mfit_results
130 # }}} register_mfit_results
132 # {{{ read_cdd_log
133 start read_cdd_log
135 if( -e $self -> {'directory'}.'object.txt' ) {
136 $found_log = 1;
137 open( OLOG, '<'.$self -> {'directory'}.'object.txt' );
138 my @olog = <OLOG>;
139 my $str = "(";
140 for ( my $i = 1; $i < $#olog; $i++ ) {
141 $str = $str.$olog[$i];
143 $str = $str.")";
144 my %tmp = eval( $str );
146 if( exists $tmp{'cdd_id'} ) {
147 $self -> {'cdd_id'} = $tmp{'cdd_id'};
148 $found_cdd_id = 1;
150 close( OLOG );
153 end read_cdd_log
154 # }}} read_cdd_log
156 # {{{ llp_pre_fork_setup
158 start llp_pre_fork_setup
160 $self -> modelfit_pre_fork_setup;
162 end llp_pre_fork_setup
164 # }}} llp_pre_fork_setup
166 # {{{ modelfit_pre_fork_setup
168 start modelfit_pre_fork_setup
170 # These attributes can be given as a
171 # 1. A scalar : used for all models and problems
172 # 2. A 1-dim. array : specified per problem but same for all models
173 # 3. A 2-dim. array : specified per problem and model
174 my $bins = $self -> {'bins'};
175 # my $idxs = $self -> {'grouping_indexes'};
176 my $case_columns = $self -> {'case_columns'};
178 if ( defined $bins ) {
179 unless ( ref( \$bins ) eq 'SCALAR' or
180 ( ref( $bins ) eq 'ARRAY' and scalar @{$bins} > 0 ) ) {
181 debug -> die( message => "Attribute bins is ",
182 "defined as a ",ref( $bins ),
183 "and is neither a scalar or a non-zero size array." );
184 } elsif ( ref( \$bins ) eq 'SCALAR' ) {
185 my @mo_bins = ();
186 foreach my $model ( @{$self -> {'models'}} ) {
187 my @pr_bins = ();
188 foreach my $problem ( @{$model -> problems} ) {
189 push( @pr_bins, $bins );
191 push( @mo_bins, \@pr_bins );
193 $self -> {'bins'} = \@mo_bins;
194 } elsif ( ref( $bins ) eq 'ARRAY' ) {
195 unless ( ref( \$bins -> [0] ) eq 'SCALAR' or
196 ( ref( $bins -> [0] ) eq 'ARRAY' and scalar @{$bins -> [0]} > 0 ) ) {
197 debug -> die( message => "Attribute bins is ",
198 "defined as a ",ref( $bins -> [0] ),
199 "and is neither a scalar or a non-zero size array." );
200 } elsif ( ref(\$bins -> [0]) eq 'SCALAR' ) {
201 my @mo_bins = ();
202 foreach my $model ( @{$self -> {'models'}} ) {
203 push( @mo_bins, $bins );
205 $self -> {'bins'} = \@mo_bins;
208 } else {
209 my @mo_bins = ();
210 foreach my $model ( @{$self -> {'models'}} ) {
211 my @pr_bins = ();
212 foreach my $data ( @{$model -> datas} ) {
213 push( @pr_bins, $data -> count_ind );
215 push( @mo_bins, \@pr_bins );
217 $self -> {'bins'} = \@mo_bins;
220 if ( defined $case_columns ) {
221 if ( defined $case_columns ) {
222 unless ( ref( \$case_columns ) eq 'SCALAR' or
223 ( ref( $case_columns ) eq 'ARRAY' and scalar @{$case_columns} > 0 ) ) {
224 debug -> die( message => "Attribute case_columns is ",
225 "defined as a ",ref( $case_columns ),
226 "and is neither a scalar or a non-zero size array." );
227 } elsif ( ref( \$case_columns ) eq 'SCALAR' ) {
228 # SCALAR!
229 my @mo_case_columns = ();
230 foreach my $model ( @{$self -> {'models'}} ) {
231 my @pr_case_columns = ();
232 for( my $i = 1; $i <= scalar @{$model -> problems}; $i++ ) {
233 if ( not $case_columns =~ /^\d/ ) {
234 # STRING
235 my ( $junk, $column_position ) = $model ->
236 _get_option_val_pos( name => $case_columns,
237 record_name => 'input',
238 problem_numbers => [$i] );
239 # We assume that there is no duplicate column names
240 push ( @pr_case_columns, $column_position->[0][0] );
241 } else {
242 # NUMBER
243 push ( @pr_case_columns, $case_columns );
246 push( @mo_case_columns, \@pr_case_columns );
248 $self -> {'case_columns'} = \@mo_case_columns;
249 } elsif ( ref( $case_columns ) eq 'ARRAY' ) {
250 # ARRAY!
251 unless ( ref( \$case_columns -> [0] ) eq 'SCALAR' or
252 ( ref( $case_columns -> [0] ) eq 'ARRAY' and
253 scalar @{$case_columns -> [0]} > 0 ) ) {
254 debug -> die( message => "Second dimension of attribute case_columns is ",
255 "defined as a ",ref( $case_columns -> [0]),
256 "and is neither a scalar or a non-zero size array." );
257 } elsif ( ref(\$case_columns -> [0]) eq 'SCALAR' ) {
258 # ARRAY -> SCALAR!
259 my @mo_case_columns = ();
260 foreach my $model ( @{$self -> {'models'}} ) {
261 my @pr_case_columns = ();
262 for( my $i = 1; $i <= scalar @{$model -> problems}; $i++ ) {
263 if ( not $case_columns =~ /^\d/ ) {
264 # STRING
265 my ( $junk, $column_position ) = $model ->
266 _get_option_val_pos( name => $case_columns->[$i-1],
267 record_name => 'input',
268 problem_numbers => [$i] );
269 # We assume that there is no duplicate column names
270 push ( @pr_case_columns, $column_position->[0][0] );
271 } else {
272 # NUMBER
273 push ( @pr_case_columns, $case_columns -> [$i-1] );
276 push( @mo_case_columns, \@pr_case_columns );
278 $self -> {'case_columns'} = \@mo_case_columns;
279 } elsif ( ref($case_columns -> [0]) eq 'ARRAY' ) {
280 # ARRAY -> ARRAY!
281 my @mo_case_columns = ();
282 for( my $j = 0; $j < scalar @{$self -> {'models'}}; $j++ ) {
283 my @pr_case_columns = ();
284 for( my $i = 1; $i <= scalar @{$self -> {'models'} -> problems}; $i++ ) {
285 if ( not $case_columns =~ /^\d/ ) {
286 # STRING
287 my ( $junk, $column_position ) = $self -> {'models'} -> [$j] ->
288 _get_option_val_pos( name => $case_columns->[$j]->[$i-1],
289 record_name => 'input',
290 problem_numbers => [$i] );
291 # We assume that there is no duplicate column names
292 push ( @pr_case_columns, $column_position->[0][0] );
293 } else {
294 # NUMBER
295 push ( @pr_case_columns, $case_columns -> [$j]->[$i-1] );
298 push( @mo_case_columns, \@pr_case_columns );
300 $self -> {'case_columns'} = \@mo_case_columns;
303 } else {
304 debug -> die( message => "case_columns is not specified for model $model_number" );
308 end modelfit_pre_fork_setup
310 # }}} modelfit_pre_fork_setup
312 # {{{ modelfit_setup
314 start modelfit_setup
316 my $subm_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
317 $self -> {'threads'} -> [1]:$self -> {'threads'};
318 $self -> general_setup( model_number => $model_number,
319 class => 'tool::modelfit',
320 subm_threads => $subm_threads );
322 end modelfit_setup
324 # }}} modelfit_setup
326 # {{{ llp_setup
327 start llp_setup
329 my @subm_threads;
330 if (ref( $self -> {'threads'} ) eq 'ARRAY') {
331 @subm_threads = @{$self -> {'threads'}};
332 unshift(@subm_threads);
333 } else {
334 @subm_threads = ($self -> {'threads'});
336 $self -> general_setup( model_number => $model_number,
337 class => 'tool::llp',
338 subm_threads => \@subm_threads );
340 end llp_setup
341 # }}} llp_setup
343 # {{{ general_setup
345 start general_setup
347 # Sub tool threads can be given as scalar or reference to an array?
348 my $subm_threads = $parm{'subm_threads'};
349 my $own_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
350 $self -> {'threads'} -> [0]:$self -> {'threads'};
351 # case_column names are matched in the model, not the data!
353 my $model = $self -> {'models'} -> [$model_number-1];
354 my @bins = @{$self -> {'bins'} -> [$model_number-1]};
356 # Check which models that hasn't been run and run them
357 # This will be performed each step but will only result in running
358 # models at the first step, if at all.
360 # If more than one process is used, there is a VERY high risk of interaction
361 # between the processes when creating directories for model fits. Therefore
362 # the {'directory'} attribute is given explicitly below.
365 unless ( $model -> is_run ) {
367 # ----------------------- Run original run ------------------------------
369 # {{{ orig run
371 my %subargs = ();
372 if ( defined $self -> {'subtool_arguments'} ) {
373 %subargs = %{$self -> {'subtool_arguments'}};
375 if( $self -> {'nonparametric_etas'} or
376 $self -> {'nonparametric_marginals'} ) {
377 $model -> add_nonparametric_code;
380 my $orig_fit = tool::modelfit ->
381 new( reference_object => $self,
382 models => [$model],
383 threads => $subm_threads,
384 directory => $self -> {'directory'}.'/orig_modelfit_dir'.$model_number,
385 subtools => [],
386 parent_threads => $own_threads,
387 parent_tool_id => $self -> {'tool_id'},
388 logfile => undef,
389 raw_results => undef,
390 prepared_models => undef,
391 top_tool => 0,
392 %subargs );
394 # $Data::Dumper::Maxdepth=1;
395 # die Dumper $orig_fit;
396 # ( models => [$model],
398 # run_on_lsf => $self -> {'run_on_lsf'},
399 # no_remote_execution => $self -> {'no_remote_execution'},
400 # no_remote_compile => $self -> {'no_remote_compile'},
401 # lsf_queue => $self -> {'lsf_queue'},
402 # lsf_options => $self -> {'lsf_options'},
403 # lsf_job_name => $self -> {'lsf_job_name'},
404 # lsf_project_name => $self -> {'lsf_project_name'},
406 # run_on_nordugrid => $self -> {'run_on_nordugrid'},
407 # cpu_time => $self -> {'cpu_time'},
409 # parent_tool_id => $self -> {'tool_id'},
411 # subtools => [],
412 # nm_version => $self -> {'nm_version'},
413 # picky => $self -> {'picky'},
414 # compress => $self -> {'compress'},
415 # threads => $subm_threads,
416 # retries => $self -> {'retries'},
417 # remove_temp_files => $self -> {'remove_temp_files'},
418 # base_directory => $self -> {'directory'},
419 # directory => $self -> {'directory'}.'/orig_modelfit_dir'.$model_number,
420 # parent_threads => $own_threads );
422 ui -> print( category => 'cdd',
423 message => 'Executing base model.' );
426 $orig_fit -> run;
428 # }}} orig run
432 # ------------------------ Print a log-header -----------------------------
434 # {{{ log header
436 open( LOG, ">>".$self -> {'logfile'}[$model_number-1] );
437 my $ui_text = sprintf("%-5s",'RUN').','.sprintf("%20s",'FILENAME ').',';
438 print LOG sprintf("%-5s",'RUN'),',',sprintf("%20s",'FILENAME '),',';
439 foreach my $param ( 'ofv', 'theta', 'omega', 'sigma' ) {
440 my $accessor = $param eq 'ofv' ? $param : $param.'s';
441 my $orig_ests = $model -> outputs -> [0] -> $accessor;
442 # Loop the problems
443 if( defined $orig_ests ){
444 for ( my $j = 0; $j < scalar @{$orig_ests}; $j++ ) {
445 if ( $param eq 'ofv' ) {
446 my $label = uc($param)."_".($j+1);
447 $ui_text = $ui_text.sprintf("%12s",$label).',';
448 print LOG sprintf("%12s",$label),',';
449 } else {
450 # Loop the parameter numbers (skip sub problem level)
451 if( defined $orig_ests -> [$j] and
452 defined $orig_ests -> [$j][0] ){
453 for ( my $num = 1; $num <= scalar @{$orig_ests -> [$j][0]}; $num++ ) {
454 my $label = uc($param).$num."_".($j+1);
455 $ui_text = $ui_text.sprintf("%12s",$label).',';
456 print LOG sprintf("%12s",$label),',';
464 print LOG "\n";
466 # }}} log header
468 # ------------------------ Log original run -------------------------------
470 # {{{ log orig
472 open( LOG, ">>".$self -> {'logfile'}[$model_number-1] );
473 $ui_text = sprintf("%5s",'0').','.sprintf("%20s",$model -> filename).',';
474 print LOG sprintf("%5s",'0'),',',sprintf("%20s",$model -> filename),',';
475 foreach my $param ( 'ofv', 'theta', 'omega', 'sigma' ) {
476 my $accessor = $param eq 'ofv' ? $param : $param.'s';
477 my $orig_ests = $model -> outputs -> [0] -> $accessor;
478 # Loop the problems
479 if( defined $orig_ests ) {
480 for ( my $j = 0; $j < scalar @{$orig_ests}; $j++ ) {
481 if ( $param eq 'ofv' ) {
482 $ui_text = $ui_text.sprintf("%12f",$orig_ests -> [$j][0]).',';
483 print LOG sprintf("%12f",$orig_ests -> [$j][0]),',';
484 } else {
485 # Loop the parameter numbers (skip sub problem level)
486 if( defined $orig_ests -> [$j] and
487 defined $orig_ests -> [$j][0] ){
488 for ( my $num = 0; $num < scalar @{$orig_ests -> [$j][0]}; $num++ ) {
489 $ui_text = $ui_text.sprintf("%12f",$orig_ests -> [$j][0][$num]).',';
490 print LOG sprintf("%12f",$orig_ests -> [$j][0][$num]),',';
498 print LOG "\n";
500 # }}} log orig
502 # --------------------- Initiate some variables ----------------------------
504 # {{{ inits
506 my $case_column = $self -> {'case_columns'} -> [$model_number-1] -> [0];
508 # Case-deletion Diagnostics will only work for models with one problem.
509 my $orig_data = $model -> datas -> [0];
511 if ( not defined $orig_data ) {
512 debug -> die( message => "No data file to resample from found in ".$model -> full_name );
515 my @problems = @{$model -> problems};
516 my @new_models = ();
518 my ( @skipped_ids, @skipped_keys, @skipped_values );
520 my %orig_factors = %{$orig_data -> factors( column => $case_column )};
521 my $maxbins = scalar keys %orig_factors;
522 my $pr_bins = ( defined $bins[0] and $bins[0] <= $maxbins ) ?
523 $bins[0] : $maxbins;
525 my $done = ( -e $self -> {'directory'}."/m$model_number/done" ) ? 1 : 0;
527 my ( @seed, $new_datas, $skip_ids, $skip_keys, $skip_values, $remainders );
529 # }}} inits
531 if ( not $done ) {
533 # -------------- Create new case-deleted data sets ----------------------
535 # {{{ create new
537 # Keep the new sample data objects i memory and write them to disk later
538 # with a proper name.
540 ( $new_datas, $skip_ids, $skip_keys, $skip_values, $remainders )
541 = $orig_data -> case_deletion( case_column => $case_column,
542 selection => $self -> {'selection_method'},
543 bins => $pr_bins,
544 target => 'mem',
545 directory => $self -> {'directory'}.'/m'.$model_number );
546 my $ndatas = scalar @{$new_datas};
547 open( DB, ">".$self -> {'directory'}."m$model_number/done.database.models" );
548 my @model_ids;
549 for ( my $j = 1; $j <= $ndatas; $j++ ) {
550 my @names = ( 'cdd_'.$j, 'rem_'.$j );
551 my @datasets = ( $new_datas -> [$j-1], $remainders -> [$j-1] );
552 foreach my $i ( 0, 1 ) {
553 my $dataset = $datasets[$i];
554 my ($data_dir, $data_file) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.$model_number,
555 $names[$i].'.dta' );
556 # $dataset -> directory( $data_dir );
557 # $dataset -> filename( $data_file );
558 # $dataset -> flush;
559 my $newmodel = $model -> copy( filename => $data_dir.$names[$i].'.mod',
560 copy_data => 0,
561 copy_output => 0);
562 $newmodel -> ignore_missing_files(1);
563 $newmodel -> datafiles( new_names => [$names[$i].'.dta'] );
564 $newmodel -> outputfile( $data_dir.$names[$i].".lst" );
565 $newmodel -> datas -> [0] = $dataset;
566 if( $i == 1 ) {
567 # set MAXEVAL=0. Again, CDD will only work for one $PROBLEM
568 $newmodel -> maxeval( new_values => [[0]] );
571 if( $self -> {'nonparametric_etas'} or
572 $self -> {'nonparametric_marginals'} ) {
573 $newmodel -> add_nonparametric_code;
576 $newmodel -> _write;
577 push( @{$new_models[$i]}, $newmodel );
578 $model_ids[$j*$i+$j-1] = $newmodel -> register_in_database( force => 1 ) ;
579 print DB $model_ids[$j*$i+$j-1],"\n";
580 $self -> {'prepared_model_ids'}[($model_number-1)*$ndatas*2+$j*$i+$j-1] =
581 $model_ids[$j*$i+$j-1];
585 # my $new_data = $new_datas -> [$j-1];
586 # my ($data_dir, $data_file) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.$model_number,
587 # 'cdd_'.$j.'.dta' );
588 # $new_data -> directory( $data_dir );
589 # $new_data -> filename( $data_file );
590 # $new_data -> flush;
591 # my $newmodel = $model -> copy( filename => $data_dir."cdd_$j.mod",
592 # copy_data => 0,
593 # copy_output => 0);
594 # $newmodel -> ignore_missing_files(1);
595 # $newmodel -> datafiles( new_names => ["cdd_$j.dta"] );
596 # $newmodel -> outputfile( $data_dir."cdd_$j.lst" );
597 # $newmodel -> datas -> [0] = $new_data;
598 # $newmodel -> _write;
599 # push( @new_models, $newmodel );
600 # $model_ids[$j-1] = $newmodel -> register_in_database( force => 1 );
601 # print DB $model_ids[$j-1],"\n";
602 # $self -> {'prepared_model_ids'}[($model_number-1)*$ndatas+$j-1] =
603 # $model_ids[$j-1];
605 close( DB );
606 if ( not -e $self -> {'directory'}."m$model_number/done.database.tool_models" ) {
607 $self -> register_tm_relation( model_ids => \@model_ids,
608 prepared_models => 1 );
609 open( DB, ">".$self -> {'directory'}."m$model_number/done.database.tool_models" );
610 print DB "";
611 close( DB );
613 # Create a checkpoint. Log the samples and individuals.
614 open( DONE, ">".$self -> {'directory'}."/m$model_number/done" ) ;
615 print DONE "Sampling from ",$orig_data -> filename, " performed\n";
616 print DONE "$pr_bins bins\n";
617 print DONE "Skipped individuals:\n";
618 for( my $k = 0; $k < scalar @{$skip_ids}; $k++ ) {
619 print DONE join(',',@{$skip_ids -> [$k]}),"\n";
621 print DONE "Skipped keys:\n";
622 for( my $k = 0; $k < scalar @{$skip_keys}; $k++ ) {
623 print DONE join(',',@{$skip_keys -> [$k]}),"\n";
625 print DONE "Skipped values:\n";
626 for( my $k = 0; $k < scalar @{$skip_values}; $k++ ) {
627 print DONE join(',',@{$skip_values -> [$k]}),"\n";
629 @seed = random_get_seed;
630 print DONE "seed: @seed\n";
631 close( DONE );
633 open( SKIP, ">".$self -> {'directory'}."skipped_individuals".$model_number.".csv" ) ;
634 for( my $k = 0; $k < scalar @{$skip_ids}; $k++ ) {
635 print SKIP join(',',@{$skip_ids -> [$k]}),"\n";
637 close( SKIP );
638 open( SKIP, ">".$self -> {'directory'}."skipped_keys".$model_number.".csv" ) ;
639 for( my $k = 0; $k < scalar @{$skip_keys}; $k++ ) {
640 print SKIP join(',',@{$skip_keys -> [$k]}),"\n";
642 close( SKIP );
644 # }}} create new
646 } else {
648 # --------- Recreate the datasets and models from a checkpoint ----------
650 # {{{ resume
652 ui -> print( category => 'cdd',
653 message => "Recreating models from a previous run" );
654 open( DONE, $self -> {'directory'}."/m$model_number/done" );
655 my @rows = <DONE>;
656 close( DONE );
657 my ( $junk, $junk, $stored_filename, $junk ) = split(' ',$rows[0],4);
658 my ( $stored_bins, $junk ) = split(' ',$rows[1],2);
659 my ( @stored_ids, @stored_keys, @stored_values );
660 for ( my $k = 3; $k < 3+$stored_bins; $k++ ) {
661 chomp($rows[$k]);
662 my @bin_ids = split(',', $rows[$k] );
663 push( @stored_ids, \@bin_ids );
665 for ( my $k = 4+$stored_bins; $k < 4+2*$stored_bins; $k++ ) {
666 chomp($rows[$k]);
667 my @bin_keys = split(',', $rows[$k] );
668 push( @stored_keys, \@bin_keys );
670 for ( my $k = 5+2*$stored_bins; $k < 5+3*$stored_bins; $k++ ) {
671 chomp($rows[$k]);
672 my @bin_values = split(',', $rows[$k] );
673 push( @stored_values, \@bin_values );
675 @seed = split(' ',$rows[5+3*$stored_bins]);
676 $skip_ids = \@stored_ids;
677 $skip_keys = \@stored_keys;
678 $skip_values = \@stored_values;
679 shift( @seed ); # get rid of 'seed'-word
681 # Reinitiate the model objects
682 my @model_ids;
683 my $reg_relations = 0;
684 if ( -e $self -> {'directory'}."m$model_number/done.database.models" ) {
685 open( DB, $self -> {'directory'}."m$model_number/done.database.models" );
686 @model_ids = <DB>;
687 chomp( @model_ids );
688 } else {
689 open( DB, ">".$self -> {'directory'}."m$model_number/done.database.models" );
690 $reg_relations = 1;
692 for ( my $j = 1; $j <= $stored_bins; $j++ ) {
693 my @names = ( 'cdd_'.$j, 'rem_'.$j );
694 foreach my $i ( 0, 1 ) {
695 my ($model_dir, $filename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.
696 $model_number,
697 $names[$i].'.mod' );
698 my ($out_dir, $outfilename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.
699 $model_number,
700 $names[$i].'.lst' );
701 my $new_mod = model ->
702 new( directory => $model_dir,
703 filename => $filename,
704 outputfile => $outfilename,
705 extra_files => $model -> extra_files,
706 target => 'disk',
707 ignore_missing_files => 1,
708 quick_reload => 1,
709 model_id => $model_ids[$j*$i+$j-1] );
710 push( @{$new_models[$i]}, $new_mod );
711 my $model_id;
712 if( not defined $model_ids[$j*$i+$j-1] ) {
713 $model_ids[$j*$i+$j-1] = $new_mod -> register_in_database;
714 print DB $model_ids[$j-1],"\n";
716 $self -> {'prepared_model_ids'}[($model_number-1)*
717 $stored_bins+$j*$i+$j-1] =
718 $model_ids[$j*$i+$j-1];
721 # my ($model_dir, $filename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.
722 # $model_number,
723 # 'cdd_'.$j.'.mod' );
724 # my ($out_dir, $outfilename) = OSspecific::absolute_path( $self -> {'directory'}.'/m'.
725 # $model_number,
726 # '/cdd_'.$j.'.lst' );
727 # my $new_mod = model ->
728 # new( directory => $model_dir,
729 # filename => $filename,
730 # outputfile => $outfilename,
731 # extra_files => $model -> extra_files,
732 # target => 'disk',
733 # ignore_missing_files => 1,
734 # quick_reload => 1,
735 # model_id => $model_ids[$j-1] );
736 # push( @new_models, $new_mod );
737 # my $model_id;
738 # if( not defined $model_ids[$j-1] ) {
739 # $model_ids[$j-1] = $new_mod -> register_in_database;
740 # print DB $model_ids[$j-1],"\n";
742 # $self -> {'prepared_model_ids'}[($model_number-1)*$stored_bins+$j-1] =
743 # $model_ids[$j-1];
744 my $nl = $j == $stored_bins ? "" : "\r";
745 ui -> print( category => 'cdd',
746 message => ui -> status_bar( sofar => $j+1,
747 goal => $stored_bins+1 ).$nl,
748 wrap => 0,
749 newline => 0 );
751 close( DB );
752 if ( not -e $self -> {'directory'}."m$model_number/done.database.tool_models" ) {
753 $self -> register_tm_relation( model_ids => \@model_ids,
754 prepared_models => 1 ) if ( $reg_relations );
755 open( DB, ">".$self -> {'directory'}."m$model_number/done.database.tool_models" );
756 print DB "";
757 close( DB );
759 ui -> print( category => 'cdd',
760 message => " ... done." );
761 random_set_seed( @seed );
762 ui -> print( category => 'cdd',
763 message => "Using $stored_bins previously sampled case-deletion sets ".
764 "from $stored_filename" )
765 unless $self -> {'parent_threads'} > 1;
767 # }}} resume
770 push( @skipped_ids, $skip_ids );
771 push( @skipped_keys, $skip_keys );
772 push( @skipped_values, $skip_values );
774 # Use only the first half (the case-deleted) of the data sets.
775 $self -> {'prepared_models'}[$model_number-1]{'own'} = $new_models[0];
777 # The remainders are left until the analyze step
778 $self -> {'prediction_models'}[$model_number-1]{'own'} = $new_models[1];
780 # --------------------- Create the sub tools ------------------------------
782 # {{{ sub tools
784 my $subdir = $class;
785 $subdir =~ s/tool:://;
786 my @subtools = @{$self -> {'subtools'}};
787 shift( @subtools );
788 my %subargs = ();
789 if ( defined $self -> {'subtool_arguments'} ) {
790 %subargs = %{$self -> {'subtool_arguments'}};
792 push( @{$self -> {'tools'}},
793 $class ->
794 new( reference_object => $self,
795 models => $new_models[0],
796 threads => $subm_threads,
797 directory => $self -> {'directory'}.'/'.$subdir.'_dir'.$model_number,
798 _raw_results_callback => $self ->
799 _modelfit_raw_results_callback( model_number => $model_number ),
800 subtools => \@subtools,
801 parent_threads => $own_threads,
802 parent_tool_id => $self -> {'tool_id'},
803 logfile => undef,
804 raw_results => undef,
805 prepared_models => undef,
806 top_tool => 0,
807 %subargs ) );
810 # ( models => $new_models[0],
811 # nm_version => $self -> {'nm_version'},
812 # run_on_lsf => $self -> {'run_on_lsf'},
813 # no_remote_execution => $self -> {'no_remote_execution'},
814 # no_remote_compile => $self -> {'no_remote_compile'},
815 # lsf_queue => $self -> {'lsf_queue'},
816 # lsf_options => $self -> {'lsf_options'},
817 # lsf_job_name => $self -> {'lsf_job_name'},
818 # lsf_project_name => $self -> {'lsf_project_name'},
820 # run_on_nordugrid => $self -> {'run_on_nordugrid'},
821 # cpu_time => $self -> {'cpu_time'},
823 # parent_tool_id => $self -> {'tool_id'},
825 # picky => $self -> {'picky'},
826 # compress => $self -> {'compress'},
827 # threads => $subm_threads,
828 # retries => $self -> {'retries'},
829 # remove_temp_files => $self -> {'remove_temp_files'},
830 # base_directory => $self -> {'directory'},
831 # directory => $self -> {'directory'}.'/'.$subdir.'_dir'.$model_number,
832 # _raw_results_callback => $self ->
833 # _modelfit_raw_results_callback( model_number => $model_number ),
834 # subtools => \@subtools,
835 # parent_threads => $own_threads,
836 # %subargs ) );
838 # }}} sub tools
840 open( SKIP, ">".$self -> {'directory'}."skipped_values".$model_number.".csv" ) ;
841 for( my $k = 0; $k < scalar @{$skip_values}; $k++ ) {
842 print SKIP join(',',@{$skip_values -> [$k]}),"\n";
844 close( SKIP );
847 end general_setup
849 # }}} general_setup
851 # {{{ llp_analyze
853 start llp_analyze
855 print "POSTFORK\n";
856 my %proc_results;
857 # $proc_results{'skipped.section.identifiers'} = $self -> {'skipped.section.identifiers'};
858 # $proc_results{'skipped_ids'} = $self -> {'skipped_ids'};
859 # $proc_results{'skipped_keys'} = $self -> {'skipped_keys'};
860 # $proc_results{'skipped_values'} = $self -> {'skipped_values'};
862 push( @{$self -> {'results'} -> {'own'}}, \%proc_results );
864 end llp_analyze
866 # }}} llp_analyze
868 # {{{ _modelfit_raw_results_callback
870 start _modelfit_raw_results_callback
873 # Use the cdd's raw_results file.
874 # The cdd and the bootstrap's callback methods are identical
875 # in the beginning, then the cdd callback adds cook.scores and
876 # cov.ratios.
878 my ($dir,$file) =
879 OSspecific::absolute_path( $self -> {'directory'},
880 $self -> {'raw_results_file'}[$model_number-1] );
881 my $orig_mod = $self -> {'models'}[$model_number-1];
882 my $xv = $self -> {'cross_validate'};
883 $subroutine = sub {
884 my $modelfit = shift;
885 my $mh_ref = shift;
886 my %max_hash = %{$mh_ref};
887 $modelfit -> raw_results_file( $dir.$file );
888 if( $cross_validation_set ) {
889 $modelfit -> raw_results_append( 1 ) if( not $self -> {'bca_mode'} ); # overwrite when doing a jackknife
890 my ( @new_header, %param_names );
891 foreach my $row ( @{$modelfit -> {'raw_results'}} ) {
892 unshift( @{$row}, 'cross_validation' );
894 $modelfit -> {'raw_results_header'} = undef; # May be a bit silly to do...
895 } else {
896 # my @orig_res = ( 0, 1, 1 ); # Model zero: original run. Problem 1 and subproblem 1
897 # foreach my $param ( @{$modelfit -> raw_results_header} ){
898 # next if( $param eq 'model' or $param eq 'problem' or $param eq 'subproblem' );
899 # my ( $accessor, $res );
900 # if ( $param eq 'theta' or $param eq 'omega' or $param eq 'sigma' or
901 # $param eq 'setheta' or $param eq 'seomega' or $param eq 'sesigma' or
902 # $param eq 'npomega' ) {
904 # $accessor = $param.'s';
905 # $res = $orig_mod -> {'outputs'} -> [0] -> $accessor;
907 # } elsif ( $param eq 'shrinkage_etas' ) {
909 # # Shrinkage does not work for subproblems right now.
910 # $res = $orig_mod -> eta_shrinkage;
912 # } elsif ( $param eq 'shrinkage_wres' ) {
914 # $res = $orig_mod -> wres_shrinkage;
916 # } else {
918 # $res = $orig_mod -> {'outputs'} -> [0] -> $param;
922 # # {{{ Push results
924 # $res = defined $res ? $res -> [0][0] : undef;
925 # if( defined $res ) {
926 # if ( ref $res eq 'ARRAY' ) {
927 # push( @orig_res, @{$res} );
928 # push( @orig_res, (undef) x ($max_hash{$param}- scalar @{$res}) );
929 # } else {
930 # push( @orig_res, $res );
932 # } else {
933 # push( @orig_res, (undef) x $max_hash{$param} );
936 # # }}} Push results
940 my ($raw_results_row,$row_structure) = $self -> create_raw_results_rows( max_hash => $mh_ref,
941 model => $orig_mod );
943 unshift( @{$modelfit -> {'raw_results'}}, @{$raw_results_row} );
945 &{$self -> {'_raw_results_callback'}}( $self, $modelfit )
946 if ( defined $self -> {'_raw_results_callback'} );
948 my ( @new_header, %param_names );
949 my @params = ( 'theta', 'omega', 'sigma' );
950 foreach my $param ( @params ) {
951 my $labels = $orig_mod -> labels( parameter_type => $param );
952 $param_names{$param} = $labels -> [0] if ( defined $labels );
955 foreach my $name ( @{$modelfit -> {'raw_results_header'}} ) {
956 my $success;
957 foreach my $param ( @params, 'eigen', 'shrinkage_etas', 'shrinkage_wres' ) {
958 if ( $name eq $param ){
959 if( defined $param_names{$name} ) {
960 push( @new_header , @{$param_names{$name}} );
961 } else {
962 for ( my $i = 1; $i <= $max_hash{ $name }; $i++ ) {
963 push ( @new_header, uc(substr($name,0,2)).$i );
966 $success = 1;
967 last;
968 } elsif ( $name eq 'se'.$param ) {
969 my @new_names = ();
970 for ( my $i = 1; $i <= $max_hash{ $name }; $i++ ) {
971 push ( @new_names, uc(substr($param,0,2)).$i );
973 map ( $_ = 'se'.$_, @new_names );
975 push( @new_header, @new_names );
976 $success = 1;
977 last;
980 unless( $success ){
981 push( @new_header, $name );
985 $modelfit -> {'raw_results_header'} = \@new_header;
987 if( $xv and not $self -> {'bca_mode'} ) {
988 foreach my $row ( @{$modelfit -> {'raw_results'}} ) {
989 unshift( @{$row}, 'cdd' );
991 unshift( @{$modelfit -> {'raw_results_header'}}, 'method' );
995 return $subroutine;
997 end _modelfit_raw_results_callback
999 # }}} _modelfit_raw_results_callback
1001 # {{{ modelfit_analyze
1003 start modelfit_analyze
1005 # Only valid for one problem and one sub problem.
1007 if ( $self -> {'cross_validate'} ) {
1009 # --- Evaluate the models on the remainder data sets ----
1011 # {{{ eval models
1013 for ( my $i = 0;
1014 $i < scalar @{$self -> {'prediction_models'}[$model_number-1]{'own'}};
1015 $i++ ) {
1016 $self -> {'prediction_models'}[$model_number-1]{'own'}[$i] ->
1017 update_inits( from_model => $self ->
1018 {'prepared_models'}[$model_number-1]{'own'}[$i]);
1019 $self -> {'prediction_models'}[$model_number-1]{'own'}[$i] ->
1020 remove_records( type => 'covariance' );
1021 $self -> {'prediction_models'}[$model_number-1]{'own'}[$i] -> _write;
1023 my ($dir,$file) =
1024 OSspecific::absolute_path( $self -> {'directory'},
1025 $self -> {'raw_results_file'}[$model_number-1] );
1026 my $xv_threads = ref( $self -> {'threads'} ) eq 'ARRAY' ?
1027 $self -> {'threads'} -> [1]:$self -> {'threads'};
1028 my $mod_eval = tool::modelfit ->
1029 new( reference_object => $self,
1030 models => $self ->
1031 {'prediction_models'}[$model_number-1]{'own'},
1032 base_directory => $self -> {'directory'},
1033 directory => $self -> {'directory'}.
1034 'evaluation_dir'.$model_number,
1035 threads => $xv_threads,
1036 _raw_results_callback => $self ->
1037 _modelfit_raw_results_callback( model_number => $model_number,
1038 cross_validation_set => 1 ),
1039 parent_tool_id => $self -> {'tool_id'},
1040 logfile => undef,
1041 raw_results => undef,
1042 prepared_models => undef,
1043 top_tool => 0,
1044 retries => 1 );
1045 $Data::Dumper::Maxdepth = 2;
1046 print "Running xv runs\n";
1047 $mod_eval -> run;
1049 # }}} eval models
1053 # ------------ Cook-scores and Covariance-Ratios ----------
1055 # {{{ Cook-scores and Covariance-Ratios
1057 # ---------------------- Cook-score -----------------------
1059 # {{{ Cook-score
1061 my ( @cook_score, @cov_ratio );
1062 if( $self -> models -> [$model_number-1] ->
1063 outputs -> [0] -> covariance_step_successful -> [0][0]) {
1065 ui -> print( category => 'cdd',
1066 message => "Calculating the cook-scores" );
1067 my @orig_ests;
1068 my @changes;
1070 my $output_harvest = $self ->
1071 harvest_output( accessors => ['est_thetas','est_omegas','est_sigmas'],
1072 search_output => 1 );
1074 # Calculate the changes
1075 foreach my $param ( 'est_thetas', 'est_omegas', 'est_sigmas' ) {
1076 my $orig_est = $self -> models -> [$model_number-1] -> outputs -> [0] -> $param;
1077 my @mod_ests;
1078 my $est = defined $output_harvest -> {$param} ?
1079 $output_harvest -> {$param} -> [$model_number-1]{'own'} : [];
1080 if( defined $est ) {
1081 for ( my $i = 0; $i < scalar @{$est}; $i++ ) {
1082 if( defined $est->[$i][0][0] ) {
1083 my $n_par = scalar @{$est->[$i][0][0]};
1084 # Since we use the _estimated_ parameters there should be no undefined elements
1085 # Not sure what to do if we find one... /Lasse
1086 for( my $j = 0; $j < $n_par; $j++ ) {
1087 push( @{$changes[$i]}, $orig_est->[0][0][$j]-$est->[$i][0][0][$j]);
1094 my $inverse_covariance_matrix = $self -> models -> [$model_number-1] ->
1095 outputs -> [0] -> inverse_covariance_matrix -> [0][0];
1097 # Equation: sqrt((orig_est-est(i))'*inv_cov_matrix*(orig_est-est(i)))
1098 for ( my $i = 0; $i <= $#changes; $i++ ) {
1099 if( defined $changes[$i] and
1100 scalar @{$changes[$i]} > 0 and
1101 defined $inverse_covariance_matrix ) {
1102 my $vec_changes = Math::MatrixReal ->
1103 new_from_cols( [$changes[$i]] );
1104 $cook_score[$i] = $inverse_covariance_matrix*$vec_changes;
1105 $cook_score[$i] = ~$vec_changes*$cook_score[$i];
1106 } else {
1107 $cook_score[$i] = undef;
1110 my $nl = $i == $#changes ? "" : "\r";
1111 ui -> print( category => 'cdd',
1112 message => ui -> status_bar( sofar => $i+1,
1113 goal => $#changes+1 ).$nl,
1114 wrap => 0,
1115 newline => 0 );
1118 # Calculate the square root
1119 # The matrixreal object holds a 1x1 matrix in the first position of its array.
1120 for ( my $i = 0; $i <= $#cook_score; $i++ ) {
1121 if( defined $cook_score[$i] and
1122 $cook_score[$i][0][0][0] >= 0 ) {
1123 $cook_score[$i] = sqrt($cook_score[$i][0][0][0]);
1124 } else {
1125 open( LOG, ">>".$self -> {'logfile'}[$model_number-1] );
1126 my $mes;
1127 if( defined $cook_score[$i] ) {
1128 $mes = "Negative squared cook-score ",$cook_score[$i][0][0][0];
1129 } else {
1130 $mes = "Undefined squared cook-score";
1132 $mes .= "; can't take the square root.\n",
1133 "The cook-score for model $model_number and cdd bin $i was set to zero\n";
1134 print LOG $mes;
1135 close( LOG );
1136 debug -> warn( level => 1,
1137 message => $mes );
1139 $cook_score[$i] = 0;
1140 # $cook_score[$i] = undef;
1145 $self -> {'cook_scores'} = \@cook_score;
1147 ui -> print( category => 'cdd',
1148 message => " ... done." );
1150 # }}} Cook-score
1152 # ------------------- Covariance Ratio --------------------
1154 # {{{ Covariance Ratio
1156 if( $self -> models -> [$model_number-1] ->
1157 outputs -> [0] -> covariance_step_successful -> [0][0]) {
1159 # {{{ sub clear dots
1161 sub clear_dots {
1162 my $m_ref = shift;
1163 my @matrix = @{$m_ref};
1164 # get rid of '........'
1165 my @clear;
1166 foreach ( @matrix ) {
1167 push( @clear, $_ ) unless ( $_ eq '.........' );
1169 # print Dumper \@clear;
1170 return \@clear;
1173 # }}}
1175 # {{{ sub make square
1177 sub make_square {
1178 my $m_ref = shift;
1179 my @matrix = @{$m_ref};
1180 # Make the matrix square:
1181 my $elements = scalar @matrix; # = M*(M+1)/2
1182 my $M = -0.5 + sqrt( 0.25 + 2 * $elements );
1183 my @square;
1184 for ( my $m = 1; $m <= $M; $m++ ) {
1185 for ( my $n = 1; $n <= $m; $n++ ) {
1186 push( @{$square[$m-1]}, $matrix[($m-1)*$m/2 + $n - 1] );
1187 unless ( $m == $n ) {
1188 push( @{$square[$n-1]}, $matrix[($m-1)*$m/2 + $n - 1] );
1192 return \@square;
1195 # }}}
1197 ui -> print( category => 'cdd',
1198 message => "Calculating the covariance-ratios" );
1200 # Equation: sqrt(det(cov_matrix(i))/det(cov_matrix(orig)))
1201 my $cov_linear = $self -> models -> [$model_number-1] ->
1202 outputs -> [0] -> raw_covmatrix -> [0][0];
1203 my $orig_det;
1204 if( defined $cov_linear ) {
1205 my $orig_cov = Math::MatrixReal ->
1206 new_from_cols( make_square( clear_dots( $cov_linear ) ) );
1207 $orig_det = $orig_cov -> det();
1209 # AUTOLOAD: raw_covmatrix
1211 my $output_harvest = $self -> harvest_output( accessors => ['raw_covmatrix'],
1212 search_output => 1 );
1214 my $est_cov = defined $output_harvest -> {'raw_covmatrix'} ? $output_harvest -> {'raw_covmatrix'} -> [$model_number-1]{'own'} : [];
1216 my $mods = scalar @{$est_cov};
1217 for ( my $i = 0; $i < scalar @{$est_cov}; $i++ ) {
1218 if ( $orig_det != 0 and defined $est_cov->[$i][0][0] ) {
1219 my $cov = Math::MatrixReal ->
1220 new_from_cols( make_square( clear_dots( $est_cov->[$i][0][0] ) ) );
1221 my $ratio = $cov -> det() / $orig_det;
1222 if( $ratio > 0 ) {
1223 push( @cov_ratio, sqrt( $ratio ) );
1224 } else {
1225 open( LOG, ">>".$self -> {'logfile'}[$model_number-1] );
1226 print LOG "Negative covariance ratio ",$ratio,
1227 "; can't take the square root.\n",
1228 "The covariance ratio for model $model_number and cdd bin $i was set to one (1)\n";
1229 # "The covariance ratio for model $model_number and cdd bin $i was set to undef\n";
1230 close( LOG );
1231 push( @cov_ratio, 1 );
1232 # push( @cov_ratio, undef );
1234 } else {
1235 open( LOG, ">>".$self -> {'logfile'}[$model_number-1] );
1236 print LOG "The determinant of the cov-matrix of the original run was zero\n",
1237 "or the determinant of cdd bin $i was undefined\n",
1238 "The covariance ratio for model $model_number and cdd bin $i was set to one (1)\n";
1239 # "The covariance ratio for model $model_number and cdd bin $i was set to undef\n";
1240 close( LOG );
1241 push( @cov_ratio, 1 );
1242 # push( @cov_ratio, undef );
1245 my $nl = $i == $mods-1 ? "" : "\r";
1246 ui -> print( category => 'cdd',
1247 message => ui -> status_bar( sofar => $i+1,
1248 goal => $mods ).$nl,
1249 wrap => 0,
1250 newline => 0 );
1254 $self -> {'covariance_ratios'} = \@cov_ratio;
1256 ui -> print( category => 'cdd',
1257 message => " ... done." );
1259 # }}} Covariance Ratio
1261 # - Perform a PCA on the cook-score:covariance-ratio data --
1263 # {{{ PCA
1265 my ( @outside_n_sd, $eig_ref, $eig_vec_ref, $proj_ref, $std_ref );
1267 if( $self -> models -> [$model_number-1] ->
1268 outputs -> [0] -> covariance_step_successful -> [0][0]) {
1270 ( $eig_ref, $eig_vec_ref, $proj_ref, $std_ref ) =
1271 # die Dumper [\@cook_score,\@cov_ratio];
1272 $self -> pca( data_matrix => [\@cook_score,\@cov_ratio] );
1273 my @projections = @{$proj_ref};
1274 my @standard_deviation = @{$std_ref};
1276 # }}}
1278 # ---- Mark the runs with CS-CR outside N standard deviations of the PCA ----
1280 # {{{ mark runs
1282 for( my $i = 0; $i <= $#projections; $i++ ) {
1283 my $vector_length = 0;
1284 for( my $j = 0; $j <= 1; $j++ ) {
1285 $vector_length += $projections[$i][$j]**2;
1287 $vector_length = sqrt( $vector_length );
1288 my $n_sd = 0;
1289 for( my $j = 0; $j <= 1; $j++ ) {
1290 $n_sd += (($projections[$i][$j]/$vector_length)*$standard_deviation[$j])**2;
1292 $n_sd = ( $self -> {'outside_n_sd_check'} * sqrt( $n_sd ) );
1293 $outside_n_sd[$i] = $vector_length > $n_sd ? 1 : 0;
1297 $self -> {'outside_n_sd'} = \@outside_n_sd;
1299 # }}} mark runs
1301 my %covariance_return_section;
1302 $covariance_return_section{'name'} = 'Diagnostics';
1303 $covariance_return_section{'labels'} = [[],['cook.scores','covariance.ratios','outside.n.sd']];
1305 my @res_array;
1306 for( my $i = 0; $i <= $#cov_ratio; $i ++ ){
1307 push( @res_array , [$cook_score[$i],$cov_ratio[$i],$outside_n_sd[$i]] );
1310 $covariance_return_section{'values'} = \@res_array;
1312 push( @{$self -> {'results'}[$model_number-1]{'own'}},\%covariance_return_section );
1314 # }}}
1316 # --------- Relative estimate change and Jackknife bias ----------
1318 # {{{ Relative change of the parameter estimates
1320 my $output_harvest = $self -> harvest_output( accessors => ['ofv', 'thetas', 'omegas', 'sigmas','sethetas', 'seomegas', 'sesigmas'],
1321 search_output => 1 );
1323 my %return_section;
1324 $return_section{'name'} = 'relative.changes';
1325 $return_section{'labels'} = [[],[]];
1327 my %bias_return_section;
1328 $bias_return_section{'name'} = 'Jackknife.bias.estimate';
1329 $bias_return_section{'labels'} = [['bias','relative.bias'],[]];
1331 my ( @bias, @bias_num, @b_orig, @rel_bias );
1332 my $k = 0;
1333 foreach my $param ( 'thetas', 'omegas', 'sigmas' ) {
1334 my $orig_est = $self -> {'models'} -> [$model_number-1] -> outputs -> [0] -> $param;
1335 if ( defined $orig_est->[0][0] ) {
1336 for ( my $j = 0; $j < scalar @{$orig_est->[0][0]}; $j++ ) {
1337 $b_orig[$k++] = $orig_est->[0][0][$j];
1342 my @rel_ests;
1344 for ( my $i = 0; $i < scalar @{$output_harvest -> {'ofv'} -> [$model_number-1]{'own'}}; $i++ ) {
1345 my @values;
1346 my $k = 0;
1347 foreach my $param ( 'ofv', 'thetas', 'omegas', 'sigmas',
1348 'sethetas', 'seomegas', 'sesigmas',) {
1350 my $orig_est = $self -> {'models'} -> [$model_number-1] -> outputs -> [0] -> $param;
1351 my $est = defined $output_harvest -> {$param} ? $output_harvest -> {$param} -> [$model_number-1]{'own'} : [];
1353 if ( $param eq 'ofv' ) {
1354 if ( defined $orig_est->[0][0] and $orig_est->[0][0] != 0 ) {
1355 push( @values, ($est->[$i][0][0]-$orig_est->[0][0])/$orig_est->[0][0]*100 );
1356 } else {
1357 push( @values, 'INF' );
1359 if( $i == 0 ){
1360 push( @{$return_section{'labels'} -> [1]}, $param );
1362 } else {
1363 my @in_rel_ests;
1364 if( defined $est->[$i][0][0] ){
1365 for ( my $j = 0; $j < scalar @{$est->[$i][0][0]}; $j++ ) {
1366 if ( defined $orig_est->[0][0][$j] and $orig_est->[0][0][$j] != 0 ) {
1367 push( @values, ($est->[$i][0][0][$j]-$orig_est->[0][0][$j])/$orig_est->[0][0][$j]*100);
1368 if( substr($param,0,2) ne 'se' ) {
1369 $bias[$k] += $est->[$i][0][0][$j];
1370 $bias_num[$k++]++;
1372 } else {
1373 push( @values, 'INF' );
1374 if( substr($param,0,2) ne 'se' ) {
1375 $k++;
1377 if( $i == 0 ){
1378 if( substr($param,0,2) eq 'se' ) {
1379 push( @{$return_section{'labels'} -> [1]}, uc(substr($param,0,4)).($j+1) );
1380 } else {
1381 my $lbl = uc(substr($param,0,2)).($j+1);
1382 push( @{$bias_return_section{'labels'} -> [1]}, $lbl );
1383 push( @{$return_section{'labels'} -> [1]}, $lbl );
1391 push( @rel_ests, \@values );
1394 # Jackknife bias
1396 for( my $i = 0; $i <= $#bias_num; $i++ ) {
1397 # The [0] is there to handle the fact that thw bins
1398 # attribute is restructured per model and problem
1399 next if( not defined $bias[$i] );
1400 $bias[$i] = ($self -> {'bins'}[$model_number-1][0]-1)*
1401 ($bias[$i]/$bias_num[$i]-$b_orig[$i]);
1402 if( defined $b_orig[$i] and $b_orig[$i] != 0 ) {
1403 $rel_bias[$i] = $bias[$i]/$b_orig[$i]*100;
1404 } else {
1405 $rel_bias[$i] = undef;
1408 $bias_return_section{'values'} = [\@bias,\@rel_bias];
1410 $return_section{'values'} = \@rel_ests ;
1411 push( @{$self -> {'results'}[$model_number-1]{'own'}},\%return_section );
1412 push( @{$self -> {'results'}[$model_number-1]{'own'}},\%bias_return_section );
1414 # }}} Relative change of the parameter estimates
1417 $self -> update_raw_results(model_number => $model_number);
1419 # ------------- Register the results in a Database ----------------
1421 if( not -e $self -> {'directory'}."m$model_number/done.database.results" ) {
1422 open( DB, ">".$self -> {'directory'}."m$model_number/done.database.results" );
1423 my ( $start_id, $last_id ) = $self ->
1424 register_mfit_results( model_number => $model_number,
1425 cook_score => \@cook_score,
1426 covariance_ratio => \@cov_ratio,
1427 projections => $proj_ref,
1428 outside_n_sd => \@outside_n_sd );
1429 print DB "$start_id-$last_id\n";
1430 close( DB );
1433 # experimental: to save memory
1434 $self -> {'prepared_models'}[$model_number-1]{'own'} = undef;
1435 if( defined $PsN::config -> {'_'} -> {'R'} and
1436 -e $PsN::lib_dir . '/R-scripts/cdd.R' ) {
1437 # copy the cdd R-script
1438 cp ( $PsN::lib_dir . '/R-scripts/cdd.R', $self -> {'directory'} );
1439 # Execute the script
1440 system( $PsN::config -> {'_'} -> {'R'}." CMD BATCH cdd.R" );
1443 end modelfit_analyze
1445 # }}} modelfit_analyze
1447 # {{{ pca
1448 start pca
1450 my $D = Math::MatrixReal ->
1451 new_from_rows( \@data_matrix );
1452 my @n_dim = @{$data_matrix[0]};
1453 my @d_dim = @data_matrix;
1454 my $n = scalar @n_dim;
1455 my $d = scalar @d_dim;
1456 map( $_=(1/$n), @n_dim );
1457 my $frac_vec_n = Math::MatrixReal ->
1458 new_from_cols( [\@n_dim] );
1459 map( $_=1, @n_dim );
1460 map( $_=1, @d_dim );
1461 my $one_vec_n = Math::MatrixReal ->
1462 new_from_cols( [\@n_dim] );
1463 my $one_vec_d = Math::MatrixReal ->
1464 new_from_cols( [\@d_dim] );
1465 my $one_vec_d_n = $one_vec_d * ~$one_vec_n;
1466 my $M = $D*$frac_vec_n;
1467 my $M_matrix = $M * ~$one_vec_n;
1469 # Calculate the mean-subtracted data
1470 my $S = $D-$M_matrix;
1472 # compue the empirical covariance matrix
1473 my $C = $S * ~$S;
1475 # compute the eigenvalues and vectors
1476 my ($l, $V) = $C -> sym_diagonalize();
1478 # Project the original data on the eigenvectors
1479 my $P = ~$V * $S;
1482 # l, V and projections are all MatrixReal objects.
1483 # We need to return the normal perl equivalents.
1484 @eigenvalues = @{$l->[0]};
1485 @eigenvectors = @{$V->[0]};
1486 @std = @{$self -> std( data_matrix => $P -> [0] )};
1487 # Make $P a n * d matrix
1488 $P = ~$P;
1489 @projections = @{$P->[0]};
1491 end pca
1492 # }}} pca
1494 # {{{ std
1495 start std
1497 my ( @sum, @pow_2_sum );
1498 if ( defined $data_matrix[0] ) {
1499 my $n = scalar @{$data_matrix[0]};
1500 for( my $i = 0; $i <= $#data_matrix; $i++ ) {
1501 for( my $j = 0; $j < $n; $j++ ) {
1502 $sum[$i] = $sum[$i]+$data_matrix[$i][$j];
1503 $pow_2_sum[$i] += $data_matrix[$i][$j]*$data_matrix[$i][$j];
1505 $std[$i] = sqrt( ( $n*$pow_2_sum[$i] - $sum[$i]*$sum[$i] ) / ($n*$n) );
1509 end std
1510 # }}} std
1512 # {{{ modelfit_post_fork_analyze
1514 start modelfit_post_fork_analyze
1516 # my @modelfit_results = @{ $self -> {'results'} -> {'subtools'} };
1517 my @modelfit_results = @{ $self -> {'results'} };
1519 ui -> print( category => 'cdd',
1520 message => "Soon done" );
1522 end modelfit_post_fork_analyze
1524 # }}} modelfit_post_fork_analyze
1526 # {{{ modelfit_results
1528 start modelfit_results
1530 my @orig_models = @{$self -> {'models'}};
1531 my @orig_raw_results = ();
1532 foreach my $orig_model ( @orig_models ) {
1533 my $orig_output = $orig_model -> outputs -> [0];
1534 push( @orig_raw_results, $orig_output -> $accessor );
1536 # my @models = @{$self -> {'prepared_models'}};
1537 my @outputs = @{$self -> {'results'}};
1539 my @raw_results = ();
1541 foreach my $mod ( @outputs ) {
1542 my @raw_inner = ();
1543 foreach my $output ( @{$mod -> {'subset_outputs'}} ) {
1544 push( @raw_inner, $output -> $accessor );
1546 push( @raw_results, \@raw_inner );
1548 if ( $format eq 'relative' or $format eq 'relative_percent' ) {
1549 @results = ();
1550 for ( my $i = 0; $i <= $#orig_raw_results; $i++ ) {
1551 print "Model\t$i\n";
1552 my @rel_subset = ();
1553 for ( my $i2 = 0; $i2 < scalar @{$raw_results[$i]}; $i2++ ) {
1554 print "Subset Model\t$i2\n";
1555 my @rel_prob = ();
1556 for ( my $j = 0; $j < scalar @{$orig_raw_results[$i]}; $j++ ) {
1557 print "Problem\t$j\n";
1558 if( ref( $orig_raw_results[$i][$j] ) eq 'ARRAY' ) {
1559 my @rel_subprob = ();
1560 for ( my $k = 0; $k < scalar @{$orig_raw_results[$i][$j]}; $k++ ) {
1561 print "Subprob\t$k\n";
1562 if( ref( $orig_raw_results[$i][$j][$k] ) eq 'ARRAY' ) {
1563 my @rel_instance = ();
1564 for ( my $l = 0; $l < scalar @{$orig_raw_results[$i][$j][$k]}; $l++ ) {
1565 print "Instance\t$l\n";
1566 my $orig = $orig_raw_results[$i][$j][$k][$l];
1567 my $res = $raw_results[$i][$i2][$j][$k][$l];
1568 if( defined $orig and ! $orig == 0 ) {
1569 print "ORIGINAL $orig\n";
1570 print "SUBSET $res\n";
1571 print "RELATIVE ",$res/$orig,"\n";
1572 if ( $format eq 'relative_percent' ) {
1573 push( @rel_instance, ($res/$orig-1)*100 );
1574 } else {
1575 push( @rel_instance, $res/$orig );
1577 } else {
1578 push( @rel_instance, 'NA' );
1580 push( @rel_subprob,\@rel_instance );
1582 } elsif( ref( $orig_raw_results[$i][$j][$k] ) eq 'SCALAR' ) {
1583 print "One instance per problem\n";
1584 my $orig = $orig_raw_results[$i][$j][$k];
1585 my $res = $raw_results[$i][$i2][$j][$k];
1586 if( defined $orig and ! $orig == 0 ) {
1587 print "ORIGINAL $orig\n";
1588 print "SUBSET $res\n";
1589 print "RELATIVE ",$res/$orig,"\n";
1590 if ( $format eq 'relative_percent' ) {
1591 push( @rel_subprob, ($res/$orig-1)*100 );
1592 } else {
1593 push( @rel_subprob, $res/$orig );
1595 } else {
1596 push( @rel_subprob, 'NA' );
1598 } else {
1599 print "WARNING: tool::cdd -> modelfit_results: neither\n\t".
1600 "array or scalar reference found at layer 4 in result data\n\t".
1601 "structure (found ",ref( $orig_raw_results[$i][$j][$k] ),")\n";
1604 push( @rel_prob, \@rel_subprob );
1605 } elsif( ref( $orig_raw_results[$i][$j] ) eq 'SCALAR' ) {
1606 print "One instance per problem\n";
1607 my $orig = $orig_raw_results[$i][$j];
1608 my $res = $raw_results[$i][$i2][$j];
1609 if( defined $orig and ! $orig == 0 ) {
1610 print "ORIGINAL $orig\n";
1611 print "SUBSET $res\n";
1612 print "RELATIVE ",$res/$orig,"\n";
1613 if ( $format eq 'relative_percent' ) {
1614 push( @rel_prob, ($res/$orig-1)*100 );
1615 } else {
1616 push( @rel_prob, $res/$orig );
1618 } else {
1619 push( @rel_prob, 'NA' );
1621 } else {
1622 print "WARNING: tool::cdd -> modelfit_results: neither\n\t".
1623 "array or scalar reference found at layer 3 in result data\n\t".
1624 "structure (found ",ref( $orig_raw_results[$i][$j] ),")\n";
1627 push( @rel_subset, \@rel_prob );
1629 push( @results, \@rel_subset );
1631 } else {
1632 @results = @raw_results;
1635 end modelfit_results
1637 # }}} modelfit_results
1639 # {{{ relative_estimates
1641 start relative_estimates
1643 my $accessor = $parameter.'s';
1644 my @params = $self -> $accessor;
1646 # print "Parameter: $parameter\n";
1647 # sub process_inner_results {
1648 # my $res_ref = shift;
1649 # my $pad = shift;
1650 # $pad++;
1651 # foreach my $res ( @{$res_ref} ) {
1652 # if ( ref ( $res ) eq 'ARRAY' ) {
1653 # process_inner_results( $res, $pad );
1654 # } else {
1655 # print "RELEST $pad\t$res\n";
1659 # process_inner_results( \@params, 0 );
1661 my @orig_params = $self -> $accessor( original_models => 1 );
1662 # [?][model][prob][subp][#]
1663 # print "ORIG TH1: ",$orig_params[0][0][0][0][0],"\n";
1664 for ( my $i = 0; $i < scalar @params; $i++ ) {
1665 # Loop over models
1666 my @mod = ();
1667 for ( my $j = 0; $j < scalar @{$params[$i]}; $j++ ) {
1668 # Loop over data sets
1669 my @prep = ();
1670 for ( my $k = 1; $k < scalar @{$params[$i]->[$j]}; $k++ ) {
1671 # Loop over problems (sort of, at least)
1672 my @prob = ();
1673 for ( my $l = 0; $l < scalar @{$params[$i]->[$j]->[$k]}; $l++ ) {
1674 # Loop over sub problems (sort of, at least)
1675 my @sub = ();
1676 for ( my $m = 0; $m < scalar @{$params[$i]->[$j]->[$k]->[$l]}; $m++ ) {
1677 # Loop over the params
1678 my @par = ();
1679 for ( my $n = 0; $n < scalar @{$params[$i][$j][$k][$l][$m]}; $n++ ) {
1680 my $orig = $orig_params[$i][$j][$l][$m][$n];
1681 # my $orig = $params[$i][$j][0][$l][$m][$n];
1682 my $prep = $params[$i][$j][$k][$l][$m][$n];
1683 if ( $orig != 0 ) {
1684 if ( $percentage ) {
1685 push( @par, ($prep/$orig*100)-100 );
1686 } else {
1687 push( @par, $prep/$orig );
1689 } else {
1690 push( @par, $PsN::out_miss_data );
1693 push( @sub, \@par );
1695 push( @prob, \@sub );
1697 push( @prep, \@prob );
1699 push( @mod, \@prep );
1701 push( @relative_estimates, \@mod );
1704 end relative_estimates
1706 # }}} relative_estimates
1708 # {{{ relative_confidence_limits
1710 start relative_confidence_limits
1712 my @params = @{$self -> confidence_limits( class => 'tool::llp',
1713 parameter => $parameter )};
1714 for ( my $i = 0; $i < scalar @params; $i++ ) {
1715 # Loop over models
1716 my @mod = ();
1717 for ( my $j = 1; $j < scalar @{$params[$i]}; $j++ ) {
1718 # Loop over data sets
1719 my %num_lim;
1720 my @nums = sort {$a <=> $b} keys %{$params[$i][$j]};
1721 foreach my $num ( @nums ) {
1722 my @prob_lim = ();
1723 for ( my $n = 0; $n < scalar @{$params[$i][$j]->{$num}}; $n++ ) {
1724 my @side_lim = ();
1725 for ( my $o = 0; $o < scalar @{$params[$i][$j]->{$num}->[$n]}; $o++ ) {
1726 # OBS: the [0] in the $j position points at the first element i.e
1727 # the results of the tool run on the original model
1728 my $orig = $params[$i][0]->{$num}->[$n][$o];
1729 my $prep = $params[$i][$j]->{$num}->[$n][$o];
1730 print "ORIG: $orig, PREP: $prep\n";
1731 if ( $orig != 0 ) {
1732 if ( $percentage ) {
1733 push( @side_lim, ($prep/$orig*100)-100 );
1734 } else {
1735 push( @side_lim, $prep/$orig );
1737 } else {
1738 push( @side_lim, $PsN::out_miss_data );
1741 push( @prob_lim, \@side_lim );
1743 $num_lim{$num} = \@prob_lim;
1745 push( @mod, \%num_lim );
1747 push( @relative_limits, \@mod );
1750 end relative_confidence_limits
1752 # }}} relative_confidence_limits
1754 # {{{ llp_print_results
1756 start llp_print_results
1758 # NOTE! Only valid for models with one problem and one sub problem!
1760 my %relative_values;
1761 $relative_values{'theta_cis'} = $self ->
1762 relative_confidence_limits( parameter => 'theta',
1763 percentage => 1 );
1764 $relative_values{'omega_cis'} = $self ->
1765 relative_confidence_limits( parameter => 'omega',
1766 percentage => 1 );
1767 $relative_values{'sigma_cis'} = $self ->
1768 relative_confidence_limits( parameter => 'sigma',
1769 percentage => 1 );
1770 $relative_values{'thetas'} = $self ->
1771 relative_estimates( parameter => 'theta',
1772 percentage => 1 );
1773 $relative_values{'omegas'} = $self ->
1774 relative_estimates( parameter => 'omega',
1775 percentage => 1 );
1776 $relative_values{'sigmas'} = $self ->
1777 relative_estimates( parameter => 'sigma',
1778 percentage => 1 );
1779 $relative_values{'sethetas'} = $self ->
1780 relative_estimates( parameter => 'setheta',
1781 percentage => 1 );
1782 $relative_values{'seomegas'} = $self ->
1783 relative_estimates( parameter => 'seomega',
1784 percentage => 1 );
1785 $relative_values{'sesigmas'} = $self ->
1786 relative_estimates( parameter => 'sesigma',
1787 percentage => 1 );
1789 my %prep_values;
1790 $prep_values{'theta_cis'} = $self -> confidence_limits( class => 'tool::llp',
1791 parameter => 'theta' );;
1792 $prep_values{'omega_cis'} = $self -> confidence_limits( class => 'tool::llp',
1793 parameter => 'omega' );;
1794 $prep_values{'sigma_cis'} = $self -> confidence_limits( class => 'tool::llp',
1795 parameter => 'sigma' );;
1796 $prep_values{'thetas'} = $self -> thetas;
1797 $prep_values{'omegas'} = $self -> omegas;
1798 $prep_values{'sigmas'} = $self -> sigmas;
1799 $prep_values{'sethetas'} = $self -> sethetas;
1800 $prep_values{'seomegas'} = $self -> seomegas;
1801 $prep_values{'sesigmas'} = $self -> sesigmas;
1805 open( RES, ">".$self -> {'results_file'} );
1806 print RES "Case-Deletion Diagnostic with Log-Likelihood Profiling\n";
1807 # Loop over models
1808 for ( my $i = 0; $i < scalar @{$relative_values{'theta_cis'}}; $i++ ) {
1809 print RES "MODEL:;",$i+1,"\n";
1810 foreach my $param ( 'theta_cis', 'omega_cis', 'sigma_cis' ) {
1811 print RES "\n",uc($param),":\n";
1812 # Loop over data sets
1814 my @nums = sort {$a <=> $b} keys %{$relative_values{$param}[$i][0]};
1815 print RES ";";
1816 foreach my $num ( @nums ) {
1817 printf RES "$num;;;;";
1819 print RES "\n";
1820 foreach my $num ( @nums ) {
1821 for ( my $o = 0; $o < scalar @{$relative_values{$param}[$i][0]->{$num}[0]}; $o++ ) {
1822 my $side = $o == 0 ? 'lower' : 'upper';
1823 printf RES ";$side;rel diff (%)";
1826 print RES "\n";
1827 print RES "orig";
1828 foreach my $num ( @nums ) {
1829 for ( my $o = 0; $o < scalar @{$relative_values{$param}[$i][0]->{$num}[0]}; $o++ ) {
1830 printf RES ";%7.5f",$prep_values{$param}[$i][0]->{$num}[0][$o];
1831 print RES ";0";
1834 print RES "\n";
1835 for ( my $j = 0; $j < scalar @{$relative_values{$param}[$i]}; $j++ ) {
1836 printf RES "%-7d",$j+1;
1837 my @nums = sort {$a <=> $b} keys %{$relative_values{$param}[$i][$j]};
1838 foreach my $num ( @nums ) {
1839 for ( my $n = 0; $n < scalar @{$relative_values{$param}[$i][$j]->{$num}}; $n++ ) {
1840 for ( my $o = 0; $o < scalar @{$relative_values{$param}[$i][$j]->{$num}[$n]}; $o++ ) {
1841 my $rel = $relative_values{$param}[$i][$j]->{$num}[$n][$o];
1842 my $prep = $prep_values{$param}[$i][$j+1]->{$num}[$n][$o];
1843 printf RES ";%7.5f",$prep;
1844 printf RES ";%3.0f",$rel;
1848 print RES "\n";
1852 # Skipped id's, keys and values:
1853 # Loop over models
1856 # sub process_inner_results {
1857 # my $res_ref = shift;
1858 # my $pad = shift;
1859 # $pad++;
1860 # foreach my $res ( @{$res_ref} ) {
1861 # if ( ref ( $res ) eq 'ARRAY' ) {
1862 # print "$pad ARRAY size ",scalar @{$res},"\n";
1863 # process_inner_results( $res, $pad );
1864 # } elsif ( ref ( $res ) eq 'HASH' ) {
1865 # print "$pad HASH keys ",keys %{$res},"\n";
1866 # } else {
1867 # print "$pad OTHER\n";
1871 # process_inner_results( $self -> {'results'}, 0 );
1873 my $i = 1;
1874 foreach my $own ( @{$self -> {'results'} -> {'own'}} ) {
1875 # print "REF1: ",ref($mod),"\n";
1876 # foreach my $prob ( @{$mod} ) {
1877 # print "REF2: ",ref($prob),"\n";
1878 # foreach my $subprob ( @{$prob} ) {
1879 # print "REF3: ",ref($subprob),"\n";
1880 # print "KEYS: ",keys %{$subprob},"\n";
1883 print RES "MODEL $i\n";
1884 foreach my $param ( 'skipped_ids', 'skipped_keys', 'skipped_values' ) {
1885 print RES uc($param),"\n";
1886 my $j = 1;
1887 foreach my $prep ( @{$own -> {$param}} ) {
1888 print RES "Bin no;$j;";
1889 foreach my $val ( @{$prep} ) {
1890 print RES ";$val";
1892 print RES "\n";
1893 $j++;
1896 $i++;
1900 # for ( my $j = 0; $j < scalar @{$relative_values{'thetas_cis'}->[$i]}; $j++ ) {
1901 # print RES "MODEL:;",$j+1,"\n";
1902 # # Loop over problems (sort of, at least)
1903 # for ( my $l = 0; $l < scalar @{$relative_values{'thetas_cis'}->[$i]->[$j]->[0]}; $l++ ) {
1904 # # Loop over sub problems (sort of, at least)
1905 # for ( my $m = 0; $m < scalar @{$relative_values{'thetas_cis'}->[$i]->[$j]->[0]->[$l]}; $m++ ) {
1906 # # foreach my $param ( 'thetas', 'omegas', 'sigmas',
1907 # # 'sethetas', 'seomegas', 'sesigmas' ) {
1908 # foreach my $param ( 'theta_cis' ) {
1909 # print RES uc($param),":\n\n";
1910 # # Here one could add printing of parameter names, i.e. 'CL V...' or 'TH1 TH2...'
1911 # # Loop over data sets
1912 # for ( my $k = 0; $k < scalar @{$relative_values{$param}->[$i]->[$j]}; $k++ ) {
1913 # printf RES "%-7d",$k+1;
1914 # for ( my $n = 0; $n < scalar @{$relative_values{$param}[$i][$j][$k][$l][$m]}; $n++ ) {
1915 # for ( my $o = 0; $o < scalar @{$relative_values{$param}[$i][$j][$k][$l][$m][$n]}; $o++ ) {
1916 # my $rel = $relative_values{$param}->[$i][$j][$k][$l][$m][$n];
1917 # my $prep = $prep_values{$param}->[$j][$k][$l][$m][$n];
1918 # printf RES ";%7.5f",$prep;
1919 # printf RES ";%3.0f",$rel;
1921 # print RES "\n";
1923 # print RES "\n";
1925 # print RES "\n";
1928 # print "\n";
1930 # print "\n\n";
1933 close( RES );
1935 end llp_print_results
1937 # }}} llp_print_results
1939 # {{{ general_print_results
1941 start general_print_results
1943 unless ( defined $self -> {'results'} ) {
1944 print "WARNING: cdd->general_print_results: no return values defined;\n"
1945 ."cannot print results\n";
1946 return;
1948 my %results = %{$self -> {'results'}};
1950 open( RES, ">".$self -> {'results_file'} );
1951 print RES "Case-Deletion Diagnostic\n";
1953 # Print meta data
1955 unless ( defined $results{'own'} ) {
1956 print "WARNING: cdd->general_print_results: no own return values defined;\n"
1957 ."cannot print results\n";
1958 return;
1962 my @own_results = @{$results{'own'}};
1963 foreach my $result_unit ( @own_results ) {
1964 print RES $result_unit -> {'name'},"\n";
1965 print RES $result_unit -> {'comment'},"\n";
1966 my @values = defined $result_unit{'values'} ? @{$result_unit{'values'}} : ();
1967 my @labels = defined $result_unit{'labels'} ? @{$result_unit{'labels'}} : ();
1968 # Loop the models
1969 for ( my $i = 0; $i <= $#values; $i++ ) {
1970 # Loop the problems
1971 for ( my $j = 0; $j <= $#values[$i]; $j++ ) {
1972 # Loop the sub problems
1973 for ( my $k = 0; $k <= $#values[$i][$j]; $k++ ) {
1974 # Loop the first result dimension
1975 for (my $l = 0; $l <= $#values[$i][$j][$k]; $l++ ) {
1976 # Loop the second result dimension
1977 for ( my $m = 0; $m <= $#values[$i][$j][$k][$l]; $m++ ) {
1978 # Loop the second result dimension
1979 for ( my $m = 0; $m <= $#values[$i][$j]$k][$l]; $m++ ) {
1980 foreach my $model_res ( @values ) {
1981 foreach my $prob_res ( @{$model_unit} ) {
1982 foreach my $subprob_res ( @{$prob_unit} ) {
1983 foreach my $subprob_res ( @{$prob_unit} ) {
1985 close ( RES );
1987 end general_print_results
1989 # }}} general_print_results
1991 # {{{ modelfit_print_results
1993 start modelfit_print_results
1995 my @parameters = ( 'theta', 'omega', 'sigma',
1996 'setheta', 'seomega', 'sesigma' );
1997 my %relative_values;
1998 my %prep_values;
1999 my %orig_values;
2000 foreach my $parameter ( @parameters ) {
2001 my $accessor = $parameter.'s';
2002 $relative_values{$parameter} = $self ->
2003 relative_estimates( parameter => $parameter,
2004 percentage => 1 );
2005 $prep_values{$parameter} = $self -> $accessor;
2006 $orig_values{$parameter} = $self -> $accessor( original_models => 1 );
2008 # sub process_results {
2009 # my $res_ref = shift;
2010 # my $pad = shift;
2011 # $pad++;
2012 # foreach my $res ( @{$res_ref} ) {
2013 # if ( ref ( $res ) eq 'ARRAY' ) {
2014 # process_results( $res, $pad );
2015 # } else {
2016 # print "final $pad\t$res\n";
2020 # process_results( $relative_values{'thetas'}, 0 );
2023 my %nparam;
2024 print "Calling nthetas\n";
2025 $nparam{'thetas'} = $self -> nthetas( original_models => 1 );
2026 print "Done that\n";
2027 open( RES, ">".$self -> {'results_file'} );
2028 print RES "Case-Deletion Diagnostic\n";
2029 # Date information to be added
2030 # print RES "Date:;;;;",$self -> {'date'},"\n";
2032 print RES "Modelfiles:";
2033 foreach my $model ( @{$self -> {'models'}} ) {
2034 print RES ";;;;",$model -> filename,"\n";
2037 # Based on columns and number of datasets might better be shown if split by
2038 # model and problem:
2039 print RES "Based on columns:";
2040 my $vars = $self -> {'case_columns'};
2041 if ( ref( $vars ) eq 'ARRAY' ) {
2042 foreach my $vars2 ( @{$vars} ) {
2043 if ( ref( $vars2 ) eq 'ARRAY' ) {
2044 foreach my $vars3 ( @{$vars2} ) {
2045 print RES ";;;;$vars3\n";
2047 } else {
2048 print RES ";;;;$vars2\n";
2051 } else {
2052 print RES ";;;;$vars\n";
2055 print RES "Number of data sets:";
2056 my $bins = $self -> {'bins'};
2057 if ( ref( $bins ) eq 'ARRAY' ) {
2058 foreach my $bins2 ( @{$bins} ) {
2059 if ( ref( $bins2 ) eq 'ARRAY' ) {
2060 foreach my $bins3 ( @{$bins2} ) {
2061 print RES ";;;;$bins3\n";
2063 } else {
2064 print RES ";;;;$bins2\n";
2067 } else {
2068 print RES ";;;;$bins\n";
2071 print RES "Selection:;;;;",$self-> {'selection_method'},"\n";
2072 if ( defined $self -> {'seed'} ) {
2073 print RES "Seed number:;;;;",$self -> {'seed'},"\n";
2074 } else {
2075 print RES "No seed number specified\n";
2078 # TODO: $skip_keys etc from data->case_deletion must be transferred back to
2079 # the main process and appropriate attributes set.
2081 print RES "\n\n\n\n";
2083 # process_results( $relative_values{'thetas'}->[0]->[0]->[0], 0 );
2085 for ( my $i = 0; $i < scalar @{$relative_values{'theta'}}; $i++ ) {
2086 # Loop over models
2087 for ( my $j = 0; $j < scalar @{$relative_values{'theta'}->[$i]}; $j++ ) {
2088 print RES "MODEL:;",$j+1,"\n";
2089 # Loop over problems (sort of, at least)
2090 for ( my $l = 0; $l < scalar @{$relative_values{'theta'}->[$i]->[$j]->[0]}; $l++ ) {
2091 # Loop over sub problems (sort of, at least)
2092 for ( my $m = 0; $m < scalar @{$relative_values{'theta'}->[$i]->[$j]->[0]->[$l]}; $m++ ) {
2093 foreach my $param ( @parameters ) {
2094 print RES uc($param),":\n\n";
2095 # Here one could add printing of parameter names, i.e. 'CL V...' or 'TH1 TH2...'
2096 # Loop over data sets
2097 print RES ";";
2098 for ( my $n = 1; $n <=scalar @{$relative_values{$param}[$i][$j][0][$l][$m]}; $n++ ) {
2099 printf RES "estimate;rel diff (%);";
2101 print RES "\n;";
2102 for ( my $n = 1; $n <= scalar @{$relative_values{$param}[$i][$j][0][$l][$m]}; $n++ ) {
2103 printf RES "$n;;";
2105 print RES "\n";
2106 print RES "orig";
2107 for ( my $n = 0; $n < scalar @{$relative_values{$param}[$i][$j][0][$l][$m]}; $n++ ) {
2108 printf RES ";%7.5f",$orig_values{$param}[$j][$l][$m][$n];
2109 print RES ";0";
2111 print RES "\n";
2112 for ( my $k = 0; $k < scalar @{$relative_values{$param}->[$i]->[$j]}; $k++ ) {
2113 printf RES "%-7d",$k+1;
2114 for ( my $n = 0; $n < scalar @{$relative_values{$param}[$i][$j][$k][$l][$m]}; $n++ ) {
2115 my $rel = $relative_values{$param}->[$i][$j][$k][$l][$m][$n];
2116 my $prep = $prep_values{$param}->[$j][$k+1][$l][$m][$n];
2117 printf RES ";%7.5f",$prep;
2118 printf RES ";%3.0f",$rel;
2120 print RES "\n";
2122 print RES "\n";
2125 print "\n";
2127 print "\n\n";
2131 # sub process_inner_results {
2132 # my $res_ref = shift;
2133 # my $pad = shift;
2134 # $pad++;
2135 # foreach my $res ( @{$res_ref} ) {
2136 # if ( ref ( $res ) eq 'ARRAY' ) {
2137 # print "$pad ARRAY size ",scalar @{$res},"\n";
2138 # process_inner_results( $res, $pad );
2139 # } elsif ( ref ( $res ) eq 'HASH' ) {
2140 # print "$pad HASH keys ",keys %{$res},"\n";
2141 # } else {
2142 # print "$pad OTHER\n";
2146 # process_inner_results( $self -> {'results'}, 0 );
2147 # die;
2148 # Skipped id's, keys and values:
2149 # Loop over models
2150 my $i = 1;
2151 foreach my $own ( @{$self -> {'results'} -> {'own'}} ) {
2152 print RES "MODEL $i\n";
2153 foreach my $param ( 'skipped_ids', 'skipped_keys', 'skipped_values' ) {
2154 print RES uc($param),"\n";
2155 my $j = 1;
2156 foreach my $prep ( @{$own -> {$param}} ) {
2157 print RES "Bin no;$j;";
2158 foreach my $val ( @{$prep} ) {
2159 print RES ";$val";
2161 print RES "\n";
2162 $j++;
2165 $i++;
2168 close ( RES );
2170 end modelfit_print_results
2172 # }}} modelfit_print_results
2174 # {{{ prepare_results
2176 start prepare_results
2178 if ( not defined $self -> {'raw_results'} ) {
2179 $self -> read_raw_results();
2182 end prepare_results
2184 # }}}
2186 # {{{ print_summary
2187 start print_summary
2189 my ($outside_n_sd, $cook, $covrat );
2190 for( my $i = 0; $i < scalar @{$self -> {'raw_results_header'} -> [0]} ; $i++) {
2191 if( $self -> {'raw_results_header'} -> [0][$i] eq 'outside.n.sd' ){
2192 $outside_n_sd = $i;
2194 if( $self -> {'raw_results_header'} -> [0][$i] eq 'cook.scores' ){
2195 $cook = $i;
2197 if( $self -> {'raw_results_header'} -> [0][$i] eq 'cov.ratios' ){
2198 $covrat = $i;
2202 sub acknowledge {
2203 my $name = shift;
2204 my $outcome = shift;
2205 my $file = shift;
2206 my $l = (7 - length( $outcome ))/2;
2207 my $text = sprintf( "%-66s%2s%7s%-5s", $name, '[ ', $outcome. ' ' x $l, ' ]' );
2208 print $text, "\n\n";
2209 print $file $text if defined $file;
2212 my ( @num, @cs, @cr ) ;
2213 for( my $model_i = 0; $model_i <= $#{$self -> {'raw_results'}}; $model_i++ ){
2214 for( my $prep_mod_i = 0; $prep_mod_i <= $#{$self -> {'raw_results'} -> [$model_i]}; $prep_mod_i++ ){
2215 my $test_val = $self -> {'raw_results'} -> [$model_i] -> [$prep_mod_i] -> [$outside_n_sd];
2216 if( defined($test_val) and $test_val == 1 ){
2217 push( @num, ($prep_mod_i) ); # prep_mod_i includes the original run as 0
2218 push( @cs, $self -> {'raw_results'} -> [$model_i] -> [$prep_mod_i] -> [$cook] );
2219 push( @cr, $self -> {'raw_results'} -> [$model_i] -> [$prep_mod_i] -> [$covrat] );
2224 if ( $#num < 0 ) {
2225 acknowledge( 'No outlying case-deleted data set was found', 'OK' );
2226 } else {
2227 print "\n";
2228 acknowledge( (scalar @num).' case-deleted data sets were marked as outliers', 'WARNING' );
2229 printf( "\t%-20s%20s\t%20s\n", 'Data set', 'cook-score', 'covariance-ratio' );
2230 for( my $i = 0; $i <= $#num; $i++ ) {
2231 printf( "\t%-20s%14s%3.3f\t%14s%3.3f\n", $num[$i], ' ', $cs[$i], ' ', $cr[$i] );
2233 print "\n";
2237 end print_summary
2238 # }}} print_summary
2240 # {{{ update_raw_results
2241 start update_raw_results
2243 my $cook_scores;
2244 my $cov_ratios;
2245 my $outside_n_sd;
2247 # foreach my $section( @{$self -> {'results'}[0] -> {'own'}} ){
2248 # if( $section -> {'name'} eq 'cook.scores' ){
2249 # $cook_scores = $section -> {'values'};
2251 # if( $section -> {'name'} eq 'cov.ratio' ){
2252 # $cov_ratios = $section -> {'values'};
2254 # if( $section -> {'name'} eq 'outside.n.sd' ){
2255 # $outside_n_sd = $section -> {'values'};
2259 my ($dir,$file) =
2260 OSspecific::absolute_path( $self -> {'directory'},
2261 $self -> {'raw_results_file'}[$model_number-1] );
2262 open( RRES, $dir.$file );
2263 my @rres = <RRES>;
2264 close( RRES );
2265 open( RRES, '>',$dir.$file );
2267 chomp( $rres[0] );
2268 print RRES $rres[0] . ",cook.scores,cov.ratios,outside.n.sd\n";
2269 chomp( $rres[1] );
2270 print RRES $rres[1] . ",0,1,0\n";
2272 my @new_rres;
2273 for( my $i = 2 ; $i <= $#rres; $i ++ ) {
2274 my $row_str = $rres[$i];
2275 chomp( $row_str );
2276 $row_str .= sprintf( ",%.5f,%.5f,%1f\n" ,
2277 $self -> {'cook_scores'} -> [$i-2],
2278 $self -> {'covariance_ratios'} -> [$i-2],
2279 $self -> {'outside_n_sd'} -> [$i-2] );
2280 print RRES $row_str;
2282 close( RRES );
2284 end update_raw_results
2286 # }}} update_raw_results
2288 # {{{ create_R_scripts
2289 start create_R_scripts
2291 unless( -e $PsN::lib_dir . '/R-scripts/cdd.R' ){
2292 'debug' -> die( message => 'CDD R-script are not installed, no matlab scripts will be generated.' );
2293 return;
2295 cp ( $PsN::lib_dir . '/R-scripts/cdd.R', $self -> {'directory'} );
2297 end create_R_scripts
2298 # }}}