lib/tool/npc_subs.pm

   1 # {{{ include_statements
   2 start include statements
   3 use Data::Dumper;
   4 use Math::Random;
   5 use strict;
   6 use tool::modelfit;
   7 use model;
   8 use ui;
   9 end include
  10 # }}}
  11
  12 # {{{ new
  13
  14 start new
  15 {
  16
  17   if (defined $this->{'samples'}){
  18     if ( $this->{'samples'} < 20){
  19       'debug' -> die(message => "Program does not allow less than 20 samples.");
  20     }
  21   } else {
  22     'debug' -> die(message => "Option -samples is required.");
  23   }
  24
  25   if ( scalar (@{$this -> {'models'}->[0]-> problems}) != 1 ){
  26     'debug' -> die(message => 'Tool can only handle modelfiles with exactly one problem.');
  27   }
  28
  29   unless ($this->{'dv'} =~ /^(IPRED|IWRES|CWRES)$/ ){
  30     if (length($this->{'dv'})>4){
  31       'debug' -> die(message => "The dependent variable name (option -dv) can have at most".
  32                      " 4 letters, unless it is IPRED/IWRES/CWRES, due to a limitation in NONMEM.");
  33     }
  34   }
  35
  36   my @check_cols;
  37   push (@check_cols, $this->{'bin_on'}) if (defined $this->{'bin_on'});
  38   push (@check_cols, $this->{'stratify_on'})
  39       if ((defined $this->{'stratify_on'}) && ('STRT' ne $this->{'stratify_on'}));
  40   foreach my $col (@check_cols){
  41     my $var_text = ($col eq $this->{'stratify_on'})? 'stratification' : 'independent';
  42     if ($col =~ /^(IPRED|IWRES|IRES|RES|WRES|DV|CWRES)$/){
  43       'debug' -> die(message => "It is not allowed to use $col as the $var_text ".
  44                      "variable, since it it is dependent and hence differs between ".
  45                      "simulations of the same observation.");
  46     }
  47
  48     unless ($col =~ /^(PRED)$/){
  49       #may be synonym
  50       if ($this -> {'models'}->[0]-> is_option_set(record=>'input',name=>$col)){
  51         my $val = $this -> {'models'}->[0]-> get_option_value(record_name => 'input',
  52                                                               option_name => $col);
  53         if ((defined $val) && ($val =~ /(SKIP|DROP)/ )){
  54           'debug' -> die(message => "Cannot SKIP/DROP the $var_text variable ".
  55                          "in the \$INPUT record.");
  56         }
  57       } else {
  58         #check synonyms
  59         my $input_record = $this -> {'models'}->[0]-> record(record_name => 'input' );
  60         my $found=0;
  61         if( scalar(@{$input_record}) > 0 ){ #always true
  62           foreach my $line ( @{$input_record -> [0]} ){
  63             if ( $line =~ /([\w]+)=([\w]+)[^\w]/ ){
  64               if ($col eq $2){
  65                 $found=1;
  66                 last;
  67               }
  68             }
  69           }
  70         }
  71         'debug' -> die(message => "In order to use $col as the $var_text variable, ".
  72                        "it must be in the \$INPUT record ") if !($found);
  73       }
  74     }
  75   }
  76
  77
  78   if ($this->{'dv'} =~ /^(PRED|MDV|TIME|DATE|EVID|AMT|RATE|ID)$/){
  79     'debug' -> die(message => "It is not allowed to use $this->{'dv'} as the dependent".
  80                    " variable,\nbecause it".
  81                    " is the same for all simulations of a single observation.");
  82   }
  83
  84   my @needed_variables=();
  85   push (@needed_variables,('IWRES','IPRED','IRES')) if ($this->{'do_estimation'});
  86   push (@needed_variables,'STRT') if ($this->{'stratify_on'} eq 'STRT');
  87   push (@needed_variables,$this->{'dv'}) unless
  88       ($this->{'dv'} =~ /^(IPRED|IWRES|IRES|RES|WRES|DV|CWRES)$/);
  89
  90   if (scalar(@needed_variables)>0){
  91     my @line_array;
  92     push (@line_array,@{$this ->{'models'}->[0]->problems->[0]->errors->[0]->code})
  93         if (defined ($this ->{'models'}->[0]->problems->[0]->errors));
  94
  95     push (@line_array,@{$this ->{'models'}->[0]->problems->[0]->preds->[0]->code})
  96         if (defined $this ->{'models'}->[0]->problems->[0]->preds);
  97
  98     push (@line_array,@{$this ->{'models'}->[0]->problems->[0]->pks->[0]->code})
  99         if (defined $this ->{'models'}->[0]->problems->[0]->pks);
 100
 101     my $missing_variables='';
 102     foreach my $var (@needed_variables){
 103       my $found=0;
 104       foreach my $line (@line_array){
 105         #variable must be left hand side, i.e. first set of non-blank characters
 106         if ($line =~ /^ *$var[ =]/){
 107           $found=1;
 108           last;
 109         }
 110       }
 111       $missing_variables = "$missing_variables $var" unless $found;
 112     }
 113     if (length($missing_variables)>0){
 114       'debug' -> die(message => "The selected input options require the user to define".
 115                      "$missing_variables in the modelfile,\nbut this was not done in ".
 116                      "neither \$ERROR, \$PRED nor \$PK.");
 117     }
 118   }
 119
 120 #check for ICALL .EQ. 4
 121
 122   my @line_array;
 123   push (@line_array,@{$this ->{'models'}->[0]->problems->[0]->errors->[0]->code})
 124       if (defined ($this ->{'models'}->[0]->problems->[0]->errors));
 125
 126   push (@line_array,@{$this ->{'models'}->[0]->problems->[0]->preds->[0]->code})
 127       if (defined $this ->{'models'}->[0]->problems->[0]->preds);
 128
 129   push (@line_array,@{$this ->{'models'}->[0]->problems->[0]->pks->[0]->code})
 130       if (defined $this ->{'models'}->[0]->problems->[0]->pks);
 131
 132   push (@line_array,@{$this ->{'models'}->[0]-> record(record_name => 'des')->[0]})
 133       if (defined $this ->{'models'}->[0]-> record(record_name => 'des')->[0]);
 134
 135   my $found=0;
 136   foreach my $line (@line_array){
 137     if ($line =~ /ICALL\s?\.EQ\.\s?4/){
 138       $found=1;
 139       last;
 140     }
 141   }
 142
 143   if ($found){
 144     print "\n\nWarning: String ICALL.EQ.4 found in modelfile. Simulation statements\n".
 145         "will influence NPC/VPC simulations and might produce unwanted results.\n\n";
 146   }
 147
 148  ##end ICALL check
 149
 150
 151
 152   if (defined $this->{'mirrors'}){
 153     if ( $this->{'mirrors'} > 20){
 154       'debug' -> die(message => "Program does not allow more than 20 mirror plots.");
 155     }
 156   }
 157
 158   if ( scalar (@{$this -> {'models'}->[0]-> record(record_name => 'simulation' )}) > 1 ){
 159     'debug' -> die(message => "Tool does not allow more than one \$SIMULATION".
 160                    " record in the modelfile.");
 161   }
 162
 163   unless ( scalar (@{$this -> {'models'}->[0]-> record( record_name => 'estimation' )}) == 1 ){
 164     'debug' -> die(message => 'Tool requires exactly one $ESTIMATION record in '.
 165                    'the modelfile.');
 166   }
 167
 168   if ( scalar (@{$this -> {'models'}->[0]-> record(record_name => 'table' )}) > 0 ){
 169     'debug' -> warn(level => 1,
 170                     message => 'Tool will delete existing $TABLE records in the modelfile.');
 171   }
 172
 173   my $np_record = $this -> {'models'}->[0]-> record(record_name => 'nonparametric' );
 174 #    if(( scalar(@{$np_record}) > 0 )&&($this->{'dv'} eq 'CWRES')){
 175 #       'debug' -> die(message => "CWRES cannot be computed when there is a \$NONPARAMETRIC record".
 176 #                      ' in the modelfile.');
 177 #    }
 178
 179   if (defined $this->{'msfo_file'}){
 180     if (defined $this->{'lst_file'}){
 181       'debug' -> die(message => 'Tool does not allow using both -lst '.
 182                      'and -msfo options simultaneously.');
 183     }
 184   } else {
 185 #       my $np_record = $this -> {'models'}->[0]-> record(record_name => 'nonparametric' );
 186     if( scalar(@{$np_record}) > 0 ){
 187       'debug' -> die(message => "Tool requires an msfo file when there is a \$NONPARAMETRIC record".
 188                      ' in the modelfile.');
 189     }
 190   }
 191
 192   #Synonym forbidden for MDV,ID
 193
 194   foreach my $variable ('MDV','ID'){
 195     if ($this -> {'models'}->[0]-> is_option_set(record=>'input',name=>$variable)){
 196       my $value = $this -> {'models'}->[0]-> get_option_value(record_name => 'input',
 197                                                               option_name => $variable);
 198       if ((defined $value) && !($value =~ /(SKIP|DROP)/ )){
 199         'debug' -> die(message => "It is forbidden to use a synonym for $variable ".
 200                        'in the $INPUT record.');
 201       }
 202     } else {
 203       my $input_record = $this -> {'models'}->[0]-> record(record_name => 'input' );
 204       if( scalar(@{$input_record}) > 0 ){ #always true
 205         foreach my $line ( @{$input_record -> [0]} ){
 206           if ( $line =~ /([\w]+)=(MDV|ID)[^\w]/ ){
 207             if (($variable eq $2) && !($1 =~ /(SKIP|DROP)/ )){
 208               'debug' -> die(message => "It is forbidden to use a synonym ".
 209                              "for $variable in the \$INPUT record.");
 210             }
 211             last;
 212           }
 213         }
 214       }
 215     }
 216
 217   }
 218
 219   ##Checks for VPC
 220   if ($this->{'is_vpc'}){
 221
 222     unless (defined $this->{'bin_on'}){
 223       'debug' -> die(message => "VPC requires an independent variable column.");
 224     }
 225
 226     my $option_count=0;
 227     if (defined $this->{'no_of_bins'}){
 228       $option_count++;
 229       unless (defined $this->{'bin_by_count'}){
 230         'debug' -> die(message =>"Option bin_by_count must be defined when ".
 231                        "option no_of_bins is used.");
 232       }
 233     }
 234     if (scalar(@{$this->{'bin_array'}}>0)){
 235       if ($option_count>0){
 236         'debug' -> die(message =>"Incompatible set of binning options. Use vpc -h for help.");
 237       }
 238       $option_count++;
 239       unless (defined $this->{'bin_by_count'}){
 240         'debug' -> die(message =>"Option bin_by_count must be defined when ".
 241                        "option bin_array is used.");
 242       }
 243       if ($this->{'bin_by_count'}==1){
 244         #check at least two values and all counts larger than 0
 245         unless (scalar(@{$this->{'bin_array'}}) > 1){
 246           'debug' -> die(message =>"Must define at least two counts in bin_array ".
 247                          "when binning by count.");
 248         }
 249         foreach my $c ($this->{'bin_array'}){
 250           if ($c<1){
 251             'debug' -> die(message =>"Number of observations in each bin must ".
 252                            "be at least 1.");
 253           }
 254         }
 255       }else {
 256         #check at least one value and sorted in ascending order and increasing
 257         unless (scalar(@{$this->{'bin_array'}}) > 0){
 258           'debug' -> die(message =>"Must define at least one boundary in bin_array ".
 259                          "when binning by width.");
 260         }
 261         for (my $i=1; $i< scalar(@{$this->{'bin_array'}}); $i++){
 262           unless ($this->{'bin_array'}->[$i] > $this->{'bin_array'}->[$i-1]){
 263             'debug' -> die(message =>"List of bin boundaries must be sorted and ".
 264                            "increasing.");
 265           }
 266         }
 267       }
 268
 269     }
 270
 271     if (defined $this->{'single_bin_size'}){
 272       if ($option_count>0){
 273         'debug' -> die(message =>"Incompatible set of binning options. Use vpc -h for help.");
 274       }
 275       $option_count++;
 276       unless ($this->{'single_bin_size'}>0){
 277         'debug' -> die(message =>"Option single_bin_size must be larger than 0.");
 278       }
 279       unless (defined $this->{'bin_by_count'}){
 280         'debug' -> die(message =>"Option bin_by_count must be defined when ".
 281                        "option single_bin_size is used.");
 282       }
 283       if (defined $this->{'overlap_percent'}){
 284         unless ($this->{'overlap_percent'}>0){
 285           'debug' -> die(message =>"Option overlap must be larger than 0\%.");
 286         }
 287         unless ($this->{'overlap_percent'}<100){
 288           'debug' -> die(message =>"Option overlap must be smaller than 100\%.");
 289         }
 290       }
 291       #else handle by translating to no_of_bins-alternative after know width/no of observations
 292     } else {
 293       if (defined $this->{'overlap_percent'}){
 294         'debug' -> die(message =>"Option single_bin_size must be defined when ".
 295                        "option overlap is used.");
 296       }
 297     }
 298
 299     if (defined $this->{'bin_by_count'}){
 300       unless ($option_count > 0){
 301         'debug' -> die(message =>"Option bin_by_count is forbidden without ".
 302                        "additional binning options.");
 303       }
 304       unless ($this->{'bin_by_count'} eq '0' || $this->{'bin_by_count'} eq '1'){
 305         'debug' -> die(message =>"Option bin_by_count must be either 1 or 0.");
 306       }
 307     }
 308   }
 309
 310   my $command_file = $this -> {'directory'}."/command.txt";
 311   if ( -e $command_file ){
 312     #this is a restart run, check if calls are compatible
 313     open( CMD, $command_file ) or
 314         'debug'->die(message=> "Could not open command.txt in restart directory.");
 315     my $row = <CMD>;
 316     close(CMD);
 317     my $str = $this -> {'models'}->[0]->filename();
 318     unless ($row =~ /($str)/){
 319       'debug'->die(message=> "Name of modelfile $str on command line does not match name of ".
 320                    "modelfile \nin restart directory ".$this -> {'directory'});
 321     }
 322     if (defined $this->{'lst_file'}){
 323       unless ($row =~ /($this->{'lst'})/){
 324         'debug'->die(message=> "lst-file on command line was not used in call\n$row in".
 325                      "restart directory ".$this -> {'directory'});
 326       }
 327     } elsif ($row =~ /-lst=/){
 328       'debug'->die(message=> "No lst-file was specified on command line, but an lst-file was used ".
 329                    "in call\n$row in restart directory ".$this -> {'directory'});
 330     }
 331     if (defined $this->{'msfo_file'}){
 332       unless ($row =~ /($this->{'msfo'})/){
 333         'debug'->die(message=> "msfo-file on command line was not used in call\n$row".
 334                      "in restart directory ".$this -> {'directory'});
 335       }
 336     } elsif ($row =~ /-msfo=/){
 337       'debug'->die(message=> "No msfo-file was specified on command line, but an msfo-file was used ".
 338                    "in call\n$row in restart directory ".$this -> {'directory'});
 339     }
 340     if (defined $this->{'samples'}){
 341       my $str="-samples=$this->{'samples'}";
 342       unless ($row =~ /$str\s/){
 343         'debug'->die(message=> "$str on command line is different from call\n$row".
 344                      "in restart directory ".$this -> {'directory'});
 345       }
 346     }
 347     if ($this->{'do_estimation'}){
 348       my $str="-dv=";
 349       unless (($row =~ /$str/)||($row =~ /=PRED\s/)){
 350         'debug'->die(message=> "Estimation was not requested by call\n$row".
 351                      "in restart directory ".$this -> {'directory'}.
 352                      "\n(i.e. neither -dv option used nor binning/stratifying on PRED)".
 353                      "\nand therefore cannot be used here.");
 354       }
 355     }
 356     unless ($this->{'dv'} =~ /^(IPRED|IWRES|IRES|RES|WRES|DV)$/ ){
 357       my $str="-dv=$this->{'dv'}";
 358       unless ($row =~ /$str/){
 359         'debug'->die(message=> "$this->{'dv'} is not part of default variable set from estimation,\n".
 360                      "nor was $str on command line used in call\n$row".
 361                      "in restart directory ".$this -> {'directory'}.
 362                      "\nTherefore $this->{'dv'} is not available.");
 363       }
 364     }
 365   }
 366
 367 }
 368 end new
 369
 370 # }}}
 371
 372 # {{{ modelfit_setup
 373 start modelfit_setup
 374 {
 375   my $type='npc';
 376   if ($self->{'is_vpc'}){
 377     $type='vpc';
 378     $self->logfile('vpc.log');
 379     $self->results_file('vpc_results.csv');
 380
 381   }
 382
 383
 384   my $model_orig = $self-> {'models'}->[0] -> copy(filename=>$type.'_original.mod',
 385                                                    directory=>'m1');
 386
 387 #Always force rerun of original vpc model by removing lst file of original model
 388 #For NPC only force rerun if are reusing old runs, but want to do stratify,
 389
 390   if ($self->{'is_vpc'}){
 391     my $oldlst = $self -> {'directory'}."/m1/vpc_original.lst";
 392     unlink $oldlst;
 393     unlink $self -> {'directory'}."NM_run1/psn-1.lst";
 394     unlink $self -> {'directory'}."NM_run1/psn.lst";
 395   } else {
 396     if (defined $self -> {'stratify_on'}){
 397       my $oldlst = $self -> {'directory'}."/m1/npc_original.lst";
 398       unlink $oldlst;
 399       unlink $self -> {'directory'}."NM_run1/psn-1.lst";
 400       unlink $self -> {'directory'}."NM_run1/psn.lst";
 401     }
 402   }
 403
 404 #logic for use of MDV column as basis for finding which rows are observations
 405 #request MDV in $TABLE if no $PRED record or if there is a $PRED record and
 406 #MDV is in the input
 407 #if there is a $PRED but no MDV in input then all rows will be observations
 408
 409   my $MDV='';
 410   if (scalar @{$model_orig-> record( record_name => 'pred' )} < 1){
 411     $MDV='MDV';
 412   } else {
 413     if ($model_orig-> is_option_set(record=>'input',name=>'MDV')){
 414       my $mdv_val = $model_orig -> get_option_value(record_name => 'input',
 415                                                     option_name => 'MDV');
 416       #have already checked no synonym. If value defined then SKIP/DROP ->don't include
 417       unless (defined $mdv_val){
 418         $MDV='MDV'; #MDV in input, no synynom
 419       }
 420     }
 421     #have already checked no synonym. If value defined then SKIP/DROP ->don't include
 422   }
 423
 424 ######## fix $TABLE record: remove any existing. create a new correct one.
 425
 426   #store $TABLE if FILE=cwtab<>.deriv, i.e. dv is CWRES, add it back to models later
 427   my @extra_table_record;
 428   if ($self->{'dv'} eq 'CWRES'){
 429     my $table_record = $model_orig -> record(record_name => 'table',
 430                                              problem_number => 0);
 431
 432     my $rec_count = scalar(@{$table_record});
 433     if( $rec_count > 0 ){
 434       for (my $i=0; $i<$rec_count; $i++){
 435         @extra_table_record=();
 436         my $keep=0;
 437         foreach my $table_line ( @{$table_record -> [$i]} ){
 438           push(@extra_table_record,$table_line);
 439           if ( $table_line =~ /(cwtab[0-9]*)\.deriv/){
 440             $self->dv_table_name($1);
 441             $keep=1;
 442           }
 443         }
 444         last if ($keep==1);
 445       }
 446     }
 447   }
 448
 449   $model_orig -> remove_records(type => 'table');
 450
 451
 452
 453   my @rec_strings = ('ID',$MDV);
 454   if (defined $self->{'stratify_on'}){
 455     push (@rec_strings,$self->{'stratify_on'}) unless ($self->{'stratify_on'} eq 'PRED');
 456   }
 457   if ($self->{'is_vpc'}){
 458     push (@rec_strings,$self->{'bin_on'}) unless ($self->{'bin_on'} eq 'PRED');
 459   }
 460   if ($self->{'do_estimation'}){
 461     unless ($self->{'dv'} =~ /^(IPRED|IWRES|IRES|RES|WRES|DV|CWRES)$/ ){
 462       push (@rec_strings,$self->{'dv'});
 463     }
 464     push (@rec_strings,('IPRED','IWRES','IRES','ONEHEADER','NOPRINT'));
 465   }else{
 466     push (@rec_strings,$self->{'dv'});
 467     push (@rec_strings,('NOAPPEND','ONEHEADER','NOPRINT'));
 468   }
 469   push (@rec_strings,('FILE=npctab.dta'));
 470   $model_orig -> set_records(type => 'table',
 471                              record_strings => \@rec_strings);
 472
 473   $model_orig -> add_records(type => 'table',
 474                              record_strings => \@extra_table_record)
 475       if (scalar(@extra_table_record)>0);
 476
 477
 478   #check if synonyms used for DV/strat/bin. If so, replace reserved variable with synonym.
 479
 480   my @reserved_labels=('ID','L1','L2','MDV','RAW_','MRG_','RPT_','TIME','DATE');
 481   push (@reserved_labels,('DAT1','DAT2','DAT3','EVID','AMT','RATE','SS','II','ADDL'));
 482   push (@reserved_labels,('CMT','PCMT','CALL','CONT','SKIP','DROP'));
 483
 484   my @check_var;
 485   push (@check_var,'DV') if ($self->{'dv'} eq 'DV');
 486
 487   #collect variables to check
 488   my @cvar;
 489   push (@cvar,$self->{'stratify_on'}) if ((defined $self->{'stratify_on'})
 490                                           && !($self->{'stratify_on'} =~ /(STRT|PRED)/));
 491   push (@cvar,$self->{'bin_on'}) if ((defined $self->{'bin_on'})
 492                                      && ($self->{'bin_on'} ne 'PRED'));
 493   #check if strat/bin reserved
 494   foreach my $var (@cvar){
 495     foreach my $lab (@reserved_labels){
 496       if ($var eq $lab){
 497         push (@check_var,$var) ;
 498         last;
 499       }
 500     }
 501   }
 502
 503   foreach my $reserved_name (@check_var){
 504     my $synonym=$reserved_name;
 505     if ($model_orig-> is_option_set(record=>'input',name=>$reserved_name)){
 506       my $value = $model_orig -> get_option_value(record_name => 'input',
 507                                                   option_name => $reserved_name);
 508       if (defined $value){
 509         unless ($value =~ /(SKIP|DROP)/ ){
 510           $synonym=$value;
 511         }
 512       }
 513     } else {
 514       my $input_record = $model_orig -> record(record_name => 'input' );
 515       if( scalar(@{$input_record}) > 0 ){ #always true
 516         foreach my $line ( @{$input_record -> [0]} ){
 517           if ( $line =~ /([\w]+)=([\w]+)[^\w]/ ){
 518             unless ($1 =~ /(SKIP|DROP)/ ){ #synonym
 519               $synonym=$1 if ($2 eq $reserved_name);
 520             }
 521             last;
 522           }
 523         }
 524       }
 525     }
 526     if ($synonym ne $reserved_name){
 527       #found synonym
 528       if ('DV' eq $reserved_name){
 529         $self->{'dv'}=$synonym;
 530       } elsif ($reserved_name eq $self->{'bin_on'}){
 531         $self->{'bin_on'}=$synonym;
 532       } elsif ($reserved_name eq $self->{'stratify_on'}){
 533         $self->{'stratify_on'}=$synonym;
 534       }
 535     }
 536   }
 537
 538 ##### fix $ESTIMATION for original model
 539
 540   $model_orig -> set_option(record_name => 'estimation',
 541                             option_name => 'MAXEVALS',
 542                             fuzzy_match => 1,
 543                             option_value => '0');
 544
 545   $model_orig -> remove_option(record_name => 'estimation',
 546                                option_name => 'MSFO',
 547                                fuzzy_match => 1);
 548
 549   if ($self->{'do_estimation'}){
 550     $model_orig -> set_option(record_name => 'estimation',
 551                               option_name => 'POSTHOC',
 552                               fuzzy_match => 1);
 553   }
 554
 555
 556 ##### remove $COVARIANCE
 557   $model_orig -> remove_records(type => 'covariance');
 558
 559 ###update initial estimates/input data, if given###
 560
 561   if (defined $self->{'lst_file'}){
 562     #create output object to check that can be parsed correctly, and to
 563     #extract data for error checking
 564     my $outputObject= output -> new(filename => '../'.$self -> {'lst_file'});
 565     unless ($outputObject->parsed_successfully()){
 566       'debug'-> die (message=>"lst file $self->{'lst_file'} could not be parsed.");
 567     }
 568     #update initial values in model
 569     $model_orig -> update_inits ( from_output => $outputObject);
 570
 571   } elsif (defined $self->{'msfo_file'}){
 572     #add msfi record, and remove $THETA, $OMEGA, $SIGMA
 573     $model_orig -> set_records(type => 'msfi',
 574                                record_strings => [$self -> {'msfo_file'}]);
 575     $model_orig -> remove_records(type => 'theta');
 576     $model_orig -> remove_records(type => 'omega');
 577     $model_orig -> remove_records(type => 'sigma');
 578   }
 579
 580 ####end update initial estimates############
 581
 582
 583 #####Copy to simulation model
 584
 585   my $model_sim = $model_orig -> copy(filename=>$type.'_simulation.mod',
 586                                       directory=>'m1');
 587
 588   $model_orig -> remove_records(type => 'simulation');
 589
 590   unless ($self->{'do_estimation'}){
 591     $model_sim -> remove_records(type => 'estimation');
 592   }
 593
 594 ###Create simulation record######
 595
 596
 597   #Check if existing simulation record
 598   my $sim_record = $model_sim-> record(record_name => 'simulation',problem_number => 0);
 599
 600   if (scalar (@{$sim_record}) > 0){
 601     #this option will be set below
 602     foreach my $altopt ('SUBPROBLEMS','SUBPROBS','NSUBPROBLEMS','NSUBPROBS','NSUBS'){
 603       #NONMEM accepts a heck of a lot of alternatives...
 604       $model_sim -> remove_option(record_name => 'simulation',
 605                                   option_name => $altopt,
 606                                   fuzzy_match => 1);
 607     }
 608
 609     #unless $do_simulation this option will be set below
 610     $model_sim -> remove_option(record_name => 'simulation',
 611                                 option_name => 'ONLYSIMULATION',
 612                                 fuzzy_match => 1);
 613
 614     if (defined $self->{'msfo_file'}){ #always if $nonp, but even if not $nonp
 615       my $val= $model_sim -> get_option_value(record_name => 'simulation',
 616                                               option_name => 'TRUE');
 617       unless ((defined $val)&& ($val eq 'FINAL')){
 618         'debug'->die(message=>"Error in \$SIMULATION record in modelfile: when using an msfo-file\n".
 619                      "the option TRUE=FINAL must be set.");
 620
 621       }
 622     }
 623
 624     #find and replace seeds
 625     my $short_record = $model_sim-> record(record_name => 'simulation',problem_number => 0);
 626     @rec_strings =();
 627     my $set_seeds=0;
 628     #Simply look for numbers. Since got rid of NSUBPROBS this is ok.
 629     foreach my $sim_line ( @{$short_record -> [0]} ){
 630       my $new_line;
 631       while ( $sim_line =~ /(\D*)(\d+)(\D.*)/g ){
 632         my $seed = random_uniform_integer(1,0,2147483560 );
 633         $new_line .= "$1$seed";
 634         $sim_line = $3;
 635         $set_seeds += 1;
 636       }
 637       push (@rec_strings,$new_line.$sim_line);
 638     }
 639     if (($set_seeds < 1)||($set_seeds > 2)){
 640       'debug'->die(message=>"Error in \$SIMULATION record in modelfile: wrong number of seeds.");
 641     }
 642   } else {
 643     my $seed1 = random_uniform_integer(1,0,2147483560 );
 644     my $seed2 = random_uniform_integer(1,0,2147483560 );
 645     @rec_strings = ('('.$seed1.')');
 646     my $np_record = $model_sim -> record(record_name => 'nonparametric' );
 647     if( scalar(@{$np_record}) > 0 ){
 648       push (@rec_strings ,'('.$seed2.' NONPARAMETRIC)');
 649       $model_sim -> remove_records(type => 'nonparametric');
 650     }
 651     if (defined $self->{'msfo_file'}){ #always if $nonp, but even if not $nonp
 652       push (@rec_strings ,'TRUE=FINAL');
 653     }
 654   }
 655
 656   unless ($self->{'do_estimation'}){
 657     push (@rec_strings,('ONLYSIMULATION'));
 658   }
 659   push (@rec_strings,('NSUBPROBLEMS='.$self->{'samples'}));
 660   $model_sim -> set_records(type => 'simulation',
 661                             record_strings => \@rec_strings);
 662
 663
 664
 665 ###end simulation record######
 666
 667
 668 #det som koer modellerna aer ett modelfit-objekt. Detta maoste skapas explicit.
 669
 670   $self->{'original_model'} = $model_orig;
 671   $self->{'simulation_model'} = $model_sim;
 672
 673   push( @{$self -> {'tools'}},
 674         tool::modelfit ->
 675         new( reference_object      => $self,
 676              models              => [$model_orig,$model_sim],
 677              parent_threads        => 1,
 678              raw_results           => undef,
 679              prepared_models       => undef,
 680              top_tool              => 0,
 681              prepend_model_file_name => 1
 682              ) );
 683
 684
 685
 686 }
 687 end modelfit_setup
 688 # }}}
 689
 690 # {{{ _modelfit_raw_results_callback
 691
 692 #start _modelfit_raw_results_callback
 693
 694 #end _modelfit_raw_results_callback
 695
 696 # }}}
 697
 698 # {{{ create_unique_values_hash
 699
 700 start create_unique_values_hash
 701 {
 702   #in @data_column
 703   #out %value_hash
 704   my $value_index = 0;
 705
 706   foreach my $val (sort {$a <=> $b} @data_column){
 707     if ($value_index == 0){
 708       $value_hash{$value_index}=$val;
 709       $value_index++;
 710       next;
 711     }
 712     unless ($val == $value_hash{($value_index-1)}){
 713       $value_hash{$value_index}=$val;
 714       $value_index++;
 715     }
 716   }
 717
 718 }
 719 end create_unique_values_hash
 720
 721 # }}}
 722
 723 # {{{ get_bin_boundaries_overlap_count
 724
 725 start get_bin_boundaries_overlap_count
 726 {
 727   # input value_hash
 728   # overlap_percent and scalar count
 729   # data_column, and data_indices
 730   # return bin_floors, lower boundary not part of upper bin
 731   # and bin_ceilings, list of upperlimits, upperlimit is part of lower bin!!!
 732
 733   my $value_index;
 734   my $n_values = scalar(keys %value_hash);
 735   my @obs_count=(0) x $n_values;
 736   my $check_count=0;
 737
 738   #Count how many values we have in input for each unique value
 739   for ($value_index=0; $value_index < $n_values;$value_index++){
 740     foreach my $index (@data_indices){
 741       if ($data_column[$index] == $value_hash{$value_index}){
 742         $obs_count[$value_index] +=1;
 743         $check_count+=1;
 744       }
 745     }
 746   }
 747
 748   unless ($check_count == scalar(@data_indices)){
 749     'debug'->die(message=>"Did not find all data values in hash.");
 750   }
 751
 752   my $local_error=-$count;
 753   my $overlap=$count*$overlap_percent/100;
 754   my $overlap_error;
 755   push(@bin_floors,($value_hash{0}));
 756   $value_index = 0;
 757   my $prev_start_index=0;
 758
 759   while ($value_index < $n_values ) {
 760     if ($obs_count[$value_index] == 0){
 761       $value_index++; #no observations for this value, skip to next
 762     } elsif ($local_error == -$count){
 763       #bin empty since error equals initial error, must add observations
 764       $local_error += $obs_count[$value_index];
 765       $prev_start_index=$value_index;
 766       $value_index++;
 767     }elsif (abs($local_error)>abs($local_error+$obs_count[$value_index])){
 768       #bin not empty and adding observations will reduce error
 769       $local_error += $obs_count[$value_index];
 770       $value_index++;
 771     } else {
 772       #bin not empty and best to switch to new bin
 773       push(@bin_ceilings,$value_hash{$value_index-1}); #last index was previous index
 774       $local_error=-$count;
 775       $overlap_error=-$overlap;
 776       #compute overlap error if starting at $prev_start_index+1
 777       for (my $k=($value_index-1);$k>$prev_start_index;$k--){
 778         $overlap_error += $obs_count[$k];
 779       }
 780       $value_index= $prev_start_index+1; #must have nonzero overlap
 781       #check if increasing value_index, i.e. moving start of next bin, will reduce overlap errror
 782       while (abs($overlap_error)>abs($overlap_error-$obs_count[$value_index])){
 783         $overlap_error -= $obs_count[$value_index];
 784         $value_index++;
 785       }
 786       push(@bin_floors,$value_hash{$value_index-1});
 787     }
 788   }
 789   push(@bin_ceilings,$value_hash{$n_values-1});
 790
 791 }
 792 end get_bin_boundaries_overlap_count
 793
 794 # }}}
 795
 796 # {{{ get_bin_ceilings_from_count
 797
 798 start get_bin_ceilings_from_count
 799 {
 800   # input value_hash
 801   # n_bins or list_counts or single_bin_size
 802   # data_column, and data_indices
 803   # return bin_ceilings, list of upperlimits, upperlimit is part of lower bin!!!
 804
 805   my @ideal_count=();
 806   my $value_index;
 807   my $n_values = scalar(keys %value_hash);
 808   my @obs_count=(0) x $n_values;
 809   my $nonzero_counts=0;
 810
 811
 812   #Count how many values we have in input for each unique value
 813   #and how many of the unique values have nonzero count
 814   for ($value_index=0; $value_index < $n_values;$value_index++){
 815     foreach my $index (@data_indices){
 816       if ($data_column[$index] == $value_hash{$value_index}){
 817         $obs_count[$value_index] +=1;
 818       }
 819     }
 820   }
 821   foreach my $c (@obs_count){
 822     if ($c > 0){
 823       $nonzero_counts++;
 824     }
 825   }
 826
 827
 828
 829   if (defined $n_bins){
 830     if ($n_bins < 1){
 831       'debug'->die(message=>"Number of bins must be at least 1.");
 832     }
 833     if (scalar(@list_counts)>0){
 834       'debug'->die(message=>"Cannot input both number of bins and set of bin counts.");
 835     }
 836
 837     if ($n_bins >= $nonzero_counts){#one bin per unique value with nonzero obs => no binning
 838                                         return;
 839                                       }
 840     my $count = scalar(@data_indices)/$n_bins;
 841     @ideal_count = ($count) x $n_bins;
 842
 843   } elsif (defined $single_bin_size){
 844     #translate to $n_bins
 845     my $nb = int (scalar(@data_indices)/$single_bin_size); #round down
 846     $nb++ if ((scalar(@data_indices)/$single_bin_size)-$nb >= 0.5);
 847     if ($nb >= $nonzero_counts){#one bin per unique value with nonzero obs => no binning
 848                                     return;
 849                                   }
 850     my $count = scalar(@data_indices)/$nb;
 851     @ideal_count = ($count) x $nb;
 852   } elsif (scalar(@list_counts)>0 && scalar(@list_counts)<$nonzero_counts ){
 853     #check that requested counts matches total number of observations
 854     my $checksum=0;
 855     foreach my $c (@list_counts){
 856       $checksum += $c;
 857     }
 858     unless ($checksum == scalar(@data_indices)){
 859       'debug'->die(message=>"Sum of observations requested in each bin must equal ".
 860                    "total number of observations (in strata).");
 861     }
 862
 863     push (@ideal_count,@list_counts);
 864   }  else {
 865     return;
 866   }
 867
 868   my $global_error=0;
 869   my $bin_index=0;
 870   my $local_error=-$ideal_count[$bin_index];
 871
 872   for ($value_index = 0; $value_index < $n_values; $value_index++) {
 873 #       print "index=$value_index obs=$obs_count[$value_index] local=$local_error global=$global_error \n";
 874     if ($bin_index == $#ideal_count){
 875       #have reached last bin
 876       #last upper limit is largest unique value, include all remaining observations
 877       push(@bin_ceilings,$value_hash{($n_values-1)});
 878       last;
 879     }elsif ($local_error == -$ideal_count[$bin_index]){
 880       #bin empty since error equals initial error
 881       $local_error += $obs_count[$value_index];
 882     }elsif ($obs_count[$value_index] == 0){
 883       next;
 884     }elsif (abs($global_error+$local_error)>
 885             abs($global_error+$local_error+$obs_count[$value_index])
 886             && (($n_values-$value_index-1)> ($#ideal_count - $bin_index))){
 887       #adding observations will reduce global error
 888       #and more than one unique value remaining per remaining bin.
 889       $local_error += $obs_count[$value_index];
 890
 891     } else {
 892       #bin not empty and best add observations to new bin
 893       push(@bin_ceilings,$value_hash{$value_index-1});
 894       $global_error += $local_error;
 895       $bin_index++;
 896       $local_error=-$ideal_count[$bin_index]+$obs_count[$value_index];
 897     }
 898
 899
 900   }
 901
 902 }
 903
 904 end get_bin_ceilings_from_count
 905
 906 # }}}
 907
 908 # {{{ get_bin_boundaries_overlap_value
 909
 910 start get_bin_boundaries_overlap_value
 911 {
 912   # @data_column, @data_indices
 913   # width, overlap_percent
 914   #return bin_floors, bin_ceilings
 915
 916   my @local_values=();
 917   for (my $i=0; $i< scalar(@data_indices); $i++){
 918     push(@local_values,$data_column[$data_indices[$i]]);
 919   }
 920
 921   my @temp = sort {$a <=> $b} @local_values;
 922   my $minval = $temp[0];
 923   my $maxval = $temp[$#temp];
 924   my $non_overlap_width=$width*(100-$overlap_percent)/100;
 925
 926   my $next_ceiling=$minval+$width;
 927   my $next_floor=$minval;
 928   while ($next_ceiling < $maxval){
 929     push(@bin_floors,$next_floor);
 930     push(@bin_ceilings,$next_ceiling);
 931     $next_ceiling += $non_overlap_width;
 932     $next_floor = $next_ceiling - $width;
 933   }
 934   push(@bin_floors,$next_floor);
 935   push(@bin_ceilings,$maxval);
 936
 937 }
 938 end get_bin_boundaries_overlap_value
 939
 940 # }}}
 941
 942 # {{{ get_bin_ceilings_from_value
 943
 944 start get_bin_ceilings_from_value
 945 {
 946   # @data_column, @data_indices
 947   # n_bins or list_boundaries or single_bin_size
 948   #return bin_ceilings, upper limits of intervals, upperlimit part of lower bin
 949
 950   my @local_values=();
 951   for (my $i=0; $i< scalar(@data_indices); $i++){
 952     push(@local_values,$data_column[$data_indices[$i]]);
 953   }
 954
 955   my @temp = sort {$a <=> $b} @local_values;
 956   my $minval = $temp[0];
 957   my $maxval = $temp[$#temp];
 958
 959   if (defined $n_bins){
 960     if (scalar(@list_boundaries)>0){
 961       'debug'->die(message=>"Cannot input both number of intervals and set of boundaries");
 962     }
 963     my $width = ($maxval-$minval)/$n_bins;
 964     for (my $i=1; $i<=$n_bins; $i++){
 965       push (@bin_ceilings,($width*$i+$minval));
 966     }
 967   } elsif (defined $single_bin_size){
 968     #translate to $n_bins
 969     my $nb = int(($maxval-$minval)/$single_bin_size); #round down
 970     $nb++ if ((($maxval-$minval)/$single_bin_size)-$nb >= 0.5);
 971     my $width = ($maxval-$minval)/$nb;
 972     for (my $i=1; $i<=$nb; $i++){
 973       push (@bin_ceilings,($width*$i+$minval));
 974     }
 975
 976   } elsif (scalar(@list_boundaries)>0){
 977     push (@bin_ceilings,@list_boundaries);
 978     if ($maxval > $list_boundaries[$#list_boundaries]){
 979       push (@bin_ceilings,$maxval);
 980     }
 981   }
 982
 983 }
 984 end get_bin_ceilings_from_value
 985
 986 # }}}
 987
 988
 989 # {{{ index_matrix_binned_values
 990
 991 start index_matrix_binned_values
 992 {
 993   #based on either unique values hash or bin_ceilings,
 994   #create array of arrays of data matrix row indices
 995   #one index array for each unique value
 996   #input %value_hash, @data_column, @data_indices,
 997   #optional input @bin_ceilings, if empty then unique values
 998   #optional input @bin_floors, only allowed if ceilings given.
 999   #If floors empty then ceilings are floors , if given then first value is inclusive, the rest exclusive
1000   #output index_matrix
1001
1002   my @ceilings=();
1003   my @floors=();
1004   if (scalar(@bin_ceilings)>0){
1005     push(@ceilings,@bin_ceilings);
1006     if (scalar(@bin_floors)>0){
1007       push(@floors,@bin_floors);
1008     }else {
1009       push(@floors,$value_hash{0});
1010       for (my $i=0; $i<$#ceilings; $i++){
1011         push(@floors,$ceilings[$i]);
1012       }
1013     }
1014   }else {
1015     #check that floors not given as input
1016     if (scalar(@bin_floors)>0){
1017       'debug'->die(message=>"Cannot give bin floor input to index_matrix_binned_values ".
1018                    "unless ceilings given.");
1019     }
1020     #no_binning, base on unique values
1021     my $no_of_values = scalar (keys %value_hash);
1022     for (my $value_index=0; $value_index<$no_of_values; $value_index++){
1023       push(@ceilings,$value_hash{$value_index});
1024     }
1025     push(@floors,$value_hash{0} - 0.2);
1026     for (my $i=0; $i<$#ceilings; $i++){
1027       push(@floors,$ceilings[$i]);
1028     }
1029   }
1030
1031   for (my $bin_index=0; $bin_index<scalar(@ceilings); $bin_index++){
1032     my @index_row=();
1033     my $this_floor=$floors[$bin_index];
1034     $this_floor -= 0.2 if ($bin_index==0); #first floor is inclusive
1035     foreach my $row_index (@data_indices){
1036       if ($data_column[$row_index]>$this_floor &&
1037           $data_column[$row_index]<=$ceilings[$bin_index] ){
1038         push (@index_row,$row_index);
1039       }
1040     }
1041     push (@index_matrix,\@index_row);
1042   }
1043
1044 }
1045 end index_matrix_binned_values
1046
1047 # }}}
1048
1049
1050 # {{{ get_data_matrix
1051
1052 start get_data_matrix
1053 {
1054   my $signal_file = $self -> {'directory'}."/m1/".$self -> {'dv'}."_matrix_saved";
1055   my $matrix_saved = ( -e $signal_file ) ? 1 : 0;
1056   my $no_sim=$self->{'samples'};
1057   my $type = ($self->{'is_vpc'}) ? 'vpc' : 'npc';
1058
1059   if ($matrix_saved){
1060     open( MATRIX, $self -> {'directory'}."/m1/".$self -> {'dv'}."_matrix.csv" ) or
1061         'debug'->die(message=> "Could not find saved matrix.");
1062     my $nsim=0;
1063     while (my $row = <MATRIX>){
1064       chomp $row;
1065       push (@matrix,$row);
1066       $nsim= (split(/,/,$row)) -1 if ($nsim==0);
1067     }
1068     close(MATRIX);
1069     #check that found correct number of columns in matrix
1070     unless ($nsim == $no_sim){
1071       'debug'->die(message=> "Number of simulated data sets $nsim in saved matrix\n".
1072                    "is different from number $no_sim in input (-samples option).");
1073     }
1074     return \@matrix;
1075   }
1076
1077
1078   my @datearr=localtime;
1079   my $the_time=sprintf "%2.2d:%2.2d:%2.2d",$datearr[2],$datearr[1],$datearr[0];
1080   my $prev_sec=$datearr[2]*3600+$datearr[1]*60+$datearr[0];
1081   my $this_sec;
1082   ui -> print (category=>'npc',
1083                message=>"\nReading and formatting $self->{'dv'} data. This can take a while...");
1084
1085   my $orig_file = (defined $self->{'dv_table_name'}) ? $self->{'dv_table_name'}:
1086       $self->{'original_model'}-> get_option_value(record_name => 'table',
1087                                                    option_name => 'FILE');
1088
1089   $orig_file = $self->{'original_model'}->directory().$type.'_original.'.$orig_file;
1090
1091   unless ( -e $orig_file ){
1092     my $file_to_check = $self -> {'directory'}."NM_run1/compute_cwres.Rout";
1093     print "\nCheck installation of R and XPose, check file $file_to_check\n"
1094         if ($self->{'dv'} eq 'CWRES');
1095     $file_to_check = $self -> {'directory'}."NM_run1/psn-1.lst";
1096     'debug'->die(message=> "File $orig_file \nwith table output for original data does not exist. ".
1097                  "It is recommended to check lst-file $file_to_check for NONMEM error messages.");
1098   }
1099   my $d = data -> new(filename=>$orig_file); #läser in allt
1100
1101   my $no_individuals= scalar(@{$d->{'individuals'}});
1102
1103   my $filt = $d -> create_row_filter('no_individuals'=>$no_individuals);
1104   my $all_rows_observations = (scalar(@{$filt}) > 0)? 0 : 1;
1105
1106   #NONMEM only prints first 4 letters of variable name as header
1107   #exception for CWRES
1108   my $dv_header = ($self->{'dv'} eq 'CWRES') ? $self->{'dv'} : substr($self->{'dv'},0,4);
1109
1110   @matrix = @{$d -> column_to_array('column'=>$dv_header,'filter'=>$filt)};
1111   my $no_observations = scalar(@matrix);
1112
1113   unless ($no_observations >0){
1114     'debug' -> die(message => "No $self->{'dv'} values found after filtering original data.");
1115   }
1116
1117   #want to always read whole simulation, i.e. whole table. Compute how many
1118   #simulations can be read in each round given this condition and a limit of 400000 lines
1119   #count lines in original tablefile
1120   my $linecount=0;
1121   open(ORIG, $orig_file ) or 'debug'->die(message=> "Could not open tablefile.");
1122   while (<ORIG>){
1123     $linecount++;
1124   }
1125   close(ORIG);
1126   my $max_read_sim = ($linecount >= 400000) ? 1 : int (400000/$linecount);
1127   my $max_ind=$max_read_sim*$no_individuals; #individuals to read in one round
1128
1129   $filt = undef;
1130   $d = undef;
1131
1132   my $sim_file = (defined $self->{'dv_table_name'}) ? $self->{'dv_table_name'}:
1133       $self->{'simulation_model'}-> get_option_value(record_name => 'table',
1134                                                      option_name => 'FILE');
1135
1136   $sim_file = $self->{'simulation_model'}->directory().$type.'_simulation.'.$sim_file;
1137   unless ( -e $sim_file ){
1138     my $file_to_check = $self -> {'directory'}."NM_run2/compute_cwres.Rout";
1139     print "\nCheck installation of R and XPose, check file $file_to_check\n"
1140         if ($self->{'dv'} eq 'CWRES');
1141     $file_to_check = $self -> {'directory'}."NM_run2/psn-1.lst";
1142     'debug'->die(message=> "File $sim_file \nwith table output for original data does not exist. ".
1143                  "It is recommended to check lst-file\n$file_to_check \nfor NONMEM error messages.");
1144   }
1145
1146   $d = data -> new(filename=>$sim_file,target =>'disk');
1147   $d -> _read_header();
1148
1149   #read portions of the simdatafiles, until all expected records read
1150   #build data matrix so that there is one column for each simulation
1151   #(real data is first column) and one row for each data point.
1152   #Store as one-dimensional array of rows, each row is comma-separated string of values
1153
1154   my $skip_tab=0;
1155   while ( $skip_tab<$no_sim){
1156
1157     @datearr=localtime;
1158     $the_time=sprintf "%2.2d:%2.2d:%2.2d",$datearr[2],$datearr[1],$datearr[0];
1159     $this_sec=$datearr[2]*3600+$datearr[1]*60+$datearr[0];
1160     unless ($skip_tab == 0){
1161       my $sec_rem = $self->ceil('number'=>(($no_sim-$skip_tab)/$max_read_sim))*
1162           ($this_sec-$prev_sec);
1163       my $percent_done=int(100*$skip_tab/$no_sim);
1164       my $minutes_remaining=$self->ceil('number'=>($sec_rem/60));
1165       my $plural = ($minutes_remaining == 1)? '' : 's';
1166       my $mess="Time $the_time, $percent_done\% done. ".
1167           "Approximately $minutes_remaining minute$plural of formatting remaining.";
1168       ui -> print (category=>'npc', message=>$mess);
1169     }
1170     $prev_sec=$this_sec;
1171
1172
1173 #       my $mess=sprintf "Time $the_time. Processing %d to %d out of %d simulated datasets...",
1174 #       ($skip_tab+1),
1175 #       (($skip_tab+$max_read_sim)>$no_sim) ? $no_sim :($skip_tab+$max_read_sim),$no_sim;
1176
1177     $d-> _read_individuals('skip_tables'=>$skip_tab,'max_individuals'=>$max_ind);
1178
1179     ##error check
1180     my $tables_to_read= ($max_read_sim > ($no_sim-$skip_tab))? ($no_sim-$skip_tab): $max_read_sim;
1181     my $individuals_expected = $tables_to_read*$no_individuals;
1182     unless (scalar(@{$d->{'individuals'}}) == $individuals_expected){
1183       my $file_to_check = $self -> {'directory'}."NM_run2/psn-1.lst";
1184       'debug' -> die(message => "Did not find the expected number of individuals to read in \n".
1185                      "$sim_file. \nA possible cause is that some simulations failed. ".
1186                      "Check lst-file\n $file_to_check  \nfor error messages. Also check in \n$sim_file \n".
1187                      "that number of TABLE rows is equal to -samples ".
1188                      "requested on command-line, and that all TABLE sections contain ".
1189                      "observations for all individuals.");
1190     }
1191
1192     $d -> synced(1);
1193
1194     if ($skip_tab == 0){ #if first read from simdata, create filter
1195       $filt = $d -> create_row_filter('no_individuals'=>$no_individuals);
1196       my $test_count=0;
1197       unless ($all_rows_observations){
1198         foreach (@{$filt}){
1199           $test_count++ if ($_ > 0);
1200         }
1201         unless ($test_count == $no_observations){
1202           'debug' -> die(message => "Number of observations $test_count after filtering ".
1203                          "simulated data set were not equal to number after ".
1204                          "filtering $no_observations original data set.");
1205         }
1206       }
1207     }
1208     my $column_data = $d -> column_to_array('column'=>$dv_header,'filter'=>$filt);
1209
1210     unless (scalar @{$column_data} >0){
1211       'debug' -> die(message => "No $self->{'dv'} values found after filtering simulated data portion.");
1212     }
1213
1214     $d-> flush();
1215
1216     data -> append_columns_to_matrix('matrix'=>\@matrix,'columns'=>$column_data);
1217     $column_data = undef;
1218     $skip_tab += $max_read_sim;
1219   }
1220
1221   #check simulations read
1222   my $no_read_sim= (split(/,/,$matrix[0])) -1;
1223   unless ($no_sim == $no_read_sim){
1224     'debug'-> die(message =>"Number of read simulated datasets $no_read_sim is\n".
1225                   "different from expected number $no_sim.");
1226   }
1227
1228
1229   #print matrix
1230   open( MATRIX, ">".$self -> {'directory'}."/m1/".$self -> {'dv'}."_matrix.csv" ) ;
1231   foreach my $row (@matrix){
1232     print MATRIX "$row"."\n";
1233   }
1234   close (MATRIX);
1235   #create signal file
1236   open( DONE, ">".$signal_file ) ;
1237   my $tmp = $no_sim+1;
1238   print DONE "Created $no_observations rows by $tmp columns data matrix ".
1239       "and wrote to file $self -> {'dv'}_matrix.csv\n";
1240   print DONE "First column is original data, columns 2 to $tmp are simulated data.\n";
1241   close( DONE );
1242
1243
1244   ui -> print (category=>'npc', message=> "Done reading and formatting $self->{'dv'} data, finishing run.");
1245
1246 }
1247 end get_data_matrix
1248
1249 # }}}
1250
1251
1252 # {{{ modelfit_analyze
1253 start modelfit_analyze
1254 {
1255
1256   my $type='npc';
1257   if ($self->{'is_vpc'}){
1258     $type='vpc';
1259   }
1260   my $matrix = $self -> get_data_matrix();
1261   my $no_sim= (split(/,/,$matrix ->[0])) -1;
1262   unless ($no_sim == $self->{'samples'}){
1263     'debug'-> die(message =>"Number of simulated datasets in matrix file $no_sim is\n".
1264                   "different from number $self->{'samples'} in input (option -samples).");
1265   }
1266   print "$no_sim simulations \n" if ($self->{'verbose'});
1267
1268   my $no_of_strata = 1;
1269   my $strat_hash;
1270   my $strat_array;
1271   my $strat_matrix;
1272   my @strata_labels=();
1273   my $no_observations = scalar(@{$matrix});
1274   my @all_indices= (0 .. ($no_observations-1));
1275
1276   if (defined $self -> {'stratify_on'}){
1277     my $col = $self -> {'stratify_on'};
1278     #NONMEM only prints first 4 letters of variable name as header
1279     #no CWRES exception since not allowed stratify on CWRES
1280
1281     my $strat_header = substr($self->{'stratify_on'},0,4);
1282
1283     #add new: If ((defined $self->{dv_table_name }) && (not $self->{'dv} eq 'CWRES'))
1284     #use dv_table_name instead
1285     #OR require that stratification and binning variables are in vpc-generated table
1286
1287     my $orig_file =  $self->{'original_model'}->directory().$type.'_original.'.
1288         $self->{'original_model'}-> get_option_value(record_name => 'table',
1289                                                      option_name => 'FILE');
1290
1291     my $d = data -> new(filename=>$orig_file); #läser in allt
1292
1293     my $no_individuals= scalar(@{$d->{'individuals'}});
1294     my $filt = $d -> create_row_filter('no_individuals'=>$no_individuals);
1295
1296     $strat_array = $d -> column_to_array('column'=>$strat_header,'filter'=>$filt);
1297     unless (scalar @{$strat_array} > 0){
1298       'debug'-> die(message=>"Could not find column to stratify on in original data file.");
1299     }
1300     unless (scalar @{$strat_array} == $no_observations){
1301       'debug'-> die(message=>"Number of observations in stratification column after filtering ".
1302                     "does not match number of observations in matrix file.");
1303     }
1304
1305     $strat_hash = $self -> create_unique_values_hash('data_column'=>$strat_array);
1306     #compute strata limits. Currently only no_of_strata based on counts allowed.
1307     #ok with undefined no_of_strata, will return empty array, which
1308     #index_matrix_binned_values interprets as stratify on unique values.
1309
1310     my $strata_ceilings = $self -> get_bin_ceilings_from_count('data_column'=>$strat_array,
1311                                                                'value_hash'=>$strat_hash,
1312                                                                'data_indices'=>\@all_indices,
1313                                                                'n_bins'=>$self->{'no_of_strata'});
1314     $strat_matrix = $self -> index_matrix_binned_values('data_column'=>$strat_array,
1315                                                         'value_hash'=>$strat_hash,
1316                                                         'data_indices'=>\@all_indices,
1317                                                         'bin_ceilings'=>$strata_ceilings);
1318
1319     #create label array for printed report.
1320     if (defined $self->{'no_of_strata'} && $self->{'no_of_strata'}< scalar(keys %{$strat_hash})){
1321       $no_of_strata=$self->{'no_of_strata'};
1322       my $low= sprintf "[%g;",${$strat_hash}{0};
1323       foreach my $high (@{$strata_ceilings}){
1324         my $lab = sprintf "strata $col %s %g]",$low,$high;
1325         push (@strata_labels,$lab);
1326         $low=sprintf "(%g;",$high;
1327       }
1328     } else{
1329       $no_of_strata = scalar (keys %{$strat_hash});
1330       for (my $strat_ind=0; $strat_ind<$no_of_strata; $strat_ind++){
1331         my $lab = sprintf "strata $col = %g",${$strat_hash}{$strat_ind};
1332         push (@strata_labels,$lab);
1333       }
1334     }
1335
1336
1337     if ($self->{'verbose'}){
1338       print "\n strata $no_of_strata\n";
1339       foreach my $k (keys %{$strat_hash}){
1340         print "key $k and value ${$strat_hash}{$k}\n";
1341       }
1342     }
1343     $d-> flush();
1344   } else {
1345     $strat_matrix = [\@all_indices];
1346     push (@strata_labels," ");
1347
1348   }
1349
1350
1351   if ($no_of_strata > 10){
1352     ui -> print (category=>'npc', message=>"Warning: The number of stratification levels is $no_of_strata");
1353   }
1354
1355   if ($self->{'is_vpc'}){
1356     $self->vpc_analyze('matrix'=>$matrix,
1357                        'strat_matrix'=>$strat_matrix,
1358                        'strata_labels'=>\@strata_labels,
1359                        'strata_variable'=> $strat_array);
1360   } else {
1361     $self->npc_analyze('matrix'=>$matrix,
1362                        'strat_matrix'=>$strat_matrix,
1363                        'strata_labels'=>\@strata_labels);
1364   }
1365
1366
1367 }
1368 end modelfit_analyze
1369 # }}} modelfit_analyze
1370
1371 # {{{ round
1372 start round
1373 {
1374   my $floor=int($number);
1375   my $rem=$number-$floor;
1376   if ($rem >= 0){
1377     $integer_out = ($rem >= 0.5)? $floor+1 : $floor;
1378   } else {
1379     $integer_out = (abs($rem) >= 0.5)? $floor-1 : $floor;
1380   }
1381
1382 }
1383 end round
1384 #}}} round
1385
1386 # {{{ ceil
1387 start ceil
1388 {
1389   my $floor=int($number);
1390   my $rem=$number-$floor;
1391   if ($rem > 0){
1392     $integer_out = $floor+1;
1393   } else {
1394     #equal or  neg
1395     $integer_out = $floor;
1396   }
1397
1398 }
1399 end ceil
1400 #}}} ceil
1401
1402 # {{{ median
1403 start median
1404 {
1405   my $len= scalar( @{$sorted_array} );
1406
1407   if( $len  % 2 ){
1408     $result = $sorted_array->[($len-1)/2];
1409   } else {
1410     $result = ($sorted_array->[$len/2]+$sorted_array->[($len-2)/2])/ 2;
1411   }
1412 }
1413 end median
1414 #}}} median
1415
1416 # {{{ vpc_analyze
1417 start vpc_analyze
1418 {
1419   #input refs strat_matrix, strata_labels(fix no_of_strata from labels)
1420   #matrix (fix no_observations and no_sim from matrix)
1421
1422   my $no_sim= (split(/,/,$matrix ->[0])) -1;
1423   my $no_observations = scalar(@{$matrix});
1424   my $no_of_strata = scalar(@{$strata_labels});
1425
1426
1427   #Recreate basic observation filter, and hash for binning. Copy paste from modelfit_analyze
1428
1429   my $col = $self -> {'bin_on'};
1430   #NONMEM only prints first 4 letters of variable name as header
1431   #no exception for CWRES since not allowed to bin on CWRES
1432   my $bin_header = substr($self->{'bin_on'},0,4);
1433
1434   #add new: If ((defined $self->{dv_table_name }) && (not $self->{'dv} eq 'CWRES'))
1435   #use dv_table_name instead
1436   #OR require that stratification and binning variables are in vpc-generated table
1437
1438   my $orig_file =  $self->{'original_model'}->directory().'vpc_original.'.
1439       $self->{'original_model'}-> get_option_value(record_name => 'table',
1440                                                    option_name => 'FILE');
1441
1442   my $d = data -> new(filename=>$orig_file); #läser in allt
1443
1444   my $no_individuals= scalar(@{$d->{'individuals'}});
1445   my $filter = $d -> create_row_filter('no_individuals'=>$no_individuals);
1446
1447   my $bin_array = $d -> column_to_array('column'=>$bin_header,'filter'=>$filter);
1448   unless (scalar @{$bin_array} > 0){
1449     'debug'-> die(message=>"Could not find column to bin on in original data file.");
1450   }
1451   unless (scalar @{$bin_array} == $no_observations){
1452     'debug'-> die(message=>"Number of observations in binning column after filtering ".
1453                   "does not match number of observations in matrix file.");
1454   }
1455
1456 ###### print data for VPC plots, prepare mirror parameters
1457   #add ID
1458   my $id_array = $d -> column_to_array('column'=>'ID','filter'=>$filter);
1459   unless (scalar @{$id_array} > 0){
1460     'debug'-> die(message=>"Could not find ID column in original data file.");
1461   }
1462   unless (scalar @{$id_array} == $no_observations){
1463     'debug'-> die(message=>"Number of observations in ID column after filtering ".
1464                   "does not match number of observations in matrix file.");
1465   }
1466
1467   #add translated stratacol
1468   my @translated_strata = (0) x $no_observations;
1469   for (my $strat_ind=0; $strat_ind<$no_of_strata; $strat_ind++){
1470     foreach my $j (@{$strat_matrix->[$strat_ind]}){
1471       $translated_strata[$j] = ($strat_ind +1);
1472     }
1473   }
1474
1475   #filename vpctabN - find number N
1476   my $tabno='';
1477   if ($self -> {'models'}->[0]->filename() =~ /run(\d+)\.mod/){
1478     $tabno=$1;
1479   }
1480
1481
1482   #mirrors
1483   my @mirror_set=();
1484   my @mirror_labels=();
1485   if (defined $self->{'mirrors'}){
1486     my @rand_indices=random_permuted_index($self -> {'samples'});
1487     @mirror_set = @rand_indices[0 .. ($self->{'mirrors'}-1)];
1488     for my $j (1 .. $self->{'mirrors'}){
1489       push (@mirror_labels,"mirror_$j");
1490     }
1491   }
1492
1493   open( ST, ">".$self -> {'directory'}."/vpctab".$tabno);
1494   #header row
1495   print ST "ID,".$self->{'dv'}.",".$self->{'bin_on'};
1496   print ST ",strata_no,".$self->{'stratify_on'} if (defined $self->{'stratify_on'});
1497   foreach my $j (@mirror_labels){
1498     print ST ",$j";
1499   }
1500   print ST "\n";
1501
1502   for (my $i=0;$i<$no_observations;$i++){
1503     my $idno = sprintf "%d",$id_array->[$i] ;
1504     my $row = $matrix->[$i];
1505     my $idv=$bin_array->[$i];
1506     my $orig_value;
1507     my @tmp;
1508     ($orig_value,@tmp) = split(/,/,$row);
1509     print ST "$idno,$orig_value,$idv";
1510     print ST ",$translated_strata[$i],".$strata_variable->[$i] if (defined $self->{'stratify_on'});
1511     foreach my $j (@mirror_set){
1512       print ST ",$tmp[$j]";
1513     }
1514     print ST "\n";
1515   }
1516   close (ST);
1517
1518 #################
1519
1520   my $bin_hash = $self -> create_unique_values_hash('data_column'=>$bin_array);
1521
1522
1523   my @pred_int = sort {$a <=> $b} 0,40,80,90,95;
1524
1525   my @perc_limit;
1526   foreach my $pi (@pred_int){
1527     if ($pi == 0){
1528       push (@perc_limit,50);
1529     }else {
1530       push (@perc_limit,(100-$pi)/2);
1531       push (@perc_limit,(100-(100-$pi)/2));
1532     }
1533   }
1534
1535   my $no_perc_limits = scalar(@perc_limit);
1536   my @limit = (0) x $no_perc_limits;
1537   my @lower_limit_ci = (0) x $no_perc_limits;
1538   my @upper_limit_ci = (0) x $no_perc_limits;
1539   my @limit_real = (0) x $no_perc_limits;
1540   my @limit_index= (0) x $no_perc_limits;
1541   my @limit_singlesim;
1542   for (my $i=0; $i<$no_perc_limits; $i++){
1543     @{$limit_singlesim[$i]} = (0) x ($no_sim);
1544   }
1545   my @limit_mirrors;
1546   if (defined $self->{'mirrors'}){
1547     for (my $i=0; $i<$no_perc_limits; $i++){
1548       @{$limit_mirrors[$i]} = (0) x ($self->{'mirrors'});
1549     }
1550   }
1551
1552
1553   my $c_i=95;
1554   my $low_ci_ind = $self->round('number'=>((100-$c_i)*($no_sim-1)/200));
1555 #    my $low_ci_ind = int(((100-$c_i)*($no_sim-1)/200)+0.5); #index of start of ci % interval
1556   my $high_ci_ind = $no_sim - $low_ci_ind - 1; #index of end  of  c_i% interval
1557
1558
1559   ## Prepare general run info for output file, and bin labels
1560   my %return_section;
1561   $return_section{'name'} = 'VPC run info';
1562   my $modelname=$self-> {'models'}->[0] ->filename();
1563   my $extra_value=$modelname;
1564   if (defined $self->{'msfo_file'}){
1565     $extra_value='msfo-file '.$self->{'msfo_file'};
1566   } elsif (defined $self->{'lst_file'}){
1567     $extra_value='lst-file '.$self->{'lst_file'};
1568   }
1569
1570   $return_section{'labels'} = [[],['Date','observations','simulations','Modelfile',
1571                                    'parameter values from','Independent variable','Dependent variable',
1572                                    'PsN version','NONMEM version']];
1573   my @datearr=localtime;
1574   my $the_date=($datearr[5]+1900).'-'.($datearr[4]+1).'-'.($datearr[3]);
1575
1576   $return_section{'values'} = [[$the_date,$no_observations,$no_sim,$modelname,$extra_value,
1577                                 $self->{'bin_on'},$self->{'dv'},$PsN::version,$self->{'nm_version'}]];
1578   push( @{$self -> {'results'}[0]{'own'}},\%return_section );
1579
1580   my @result_column_labels=("< $col",'<=','no. of obs');
1581   for (my $i=0; $i<$no_perc_limits; $i++){
1582     push (@result_column_labels,"$perc_limit[$i]\% real");
1583     foreach my $lab (@mirror_labels){
1584       push (@result_column_labels,"$perc_limit[$i]\% ".$lab);
1585     }
1586     push (@result_column_labels,"$perc_limit[$i]\% sim","$c_i\%CI for $perc_limit[$i]\% from",
1587           "$c_i\%CI for $perc_limit[$i]\% to");
1588   }
1589
1590   #For VPC diagnostics
1591
1592   my $npc_pi_offset=0;
1593   foreach my $pi (@pred_int){
1594     if ($pi == 0){
1595       $npc_pi_offset=1;
1596     }else {
1597       push (@result_column_labels,"PI $pi\% False pos (\%)","PI $pi\% False neg (\%)")
1598         }
1599   }
1600
1601   my ($npc_lower_index,$npc_upper_index,$npc_low_ind,$npc_high_ind);
1602
1603   ($npc_lower_index,$npc_upper_index,$npc_low_ind,$npc_high_ind)=
1604       $self->get_npc_indices('ci' => $c_i,
1605                              'no_sim' => $no_sim,
1606                              'pred_intervals' => \@pred_int);
1607
1608   my ($npc_result_column_labels,$npc_result_row_labels);
1609   ($npc_result_column_labels,$npc_result_row_labels) =
1610       $self->get_npc_result_labels('ci' => $c_i,'pred_intervals' => \@pred_int);
1611   my @npc_result_labels = ($npc_result_row_labels,$npc_result_column_labels);
1612 #    my @npc_section_array; have insdie strata loop
1613
1614   #end prep VPC diagnostics
1615
1616   ##done general run info and labels
1617
1618   #Loop over strata.
1619   #Do the binning. Copy paste from modelfit_analyze stratification,
1620   #very similar but more options and parameters...
1621
1622   for (my $strat_ind=0; $strat_ind<$no_of_strata; $strat_ind++){
1623     #report strata header, use labels...
1624     my @result_row_labels = ('first interval is closed');
1625     my %return_section;
1626     my @result_values=();
1627     my $no_strata_obs=scalar(@{$strat_matrix->[$strat_ind]});
1628     my $result_name = "\nVPC results ".$strata_labels -> [$strat_ind].
1629         "\n$no_strata_obs observations out of $no_observations";
1630     $return_section{'name'} = $result_name;
1631     my @npc_section_array; #possibly move out again
1632
1633
1634     my ($bin_floors,$bin_ceilings);
1635     if ($self->{'bin_by_count'} eq '1') {
1636 #           print "Binning by count\n";
1637       if (defined $self->{'overlap_percent'}){
1638         ($bin_floors,$bin_ceilings) =
1639             $self -> get_bin_boundaries_overlap_count('data_column'=>$bin_array,
1640                                                       'value_hash'=>$bin_hash,
1641                                                       'data_indices'=>$strat_matrix->[$strat_ind],
1642                                                       'count'=>$self->{'single_bin_size'},
1643                                                       'overlap_percent' =>$self->{'overlap_percent'});
1644       } else {
1645         #add input param single_bin_size
1646         $bin_ceilings =
1647             $self -> get_bin_ceilings_from_count('data_column'=>$bin_array,
1648                                                  'value_hash'=>$bin_hash,
1649                                                  'data_indices'=>$strat_matrix->[$strat_ind],
1650                                                  'n_bins'=>$self->{'no_of_bins'},
1651                                                  'single_bin_size'=>$self->{'single_bin_size'},
1652                                                  'list_counts' =>$self->{'bin_array'});
1653       }
1654     } elsif ($self->{'bin_by_count'} eq '0') {
1655       if (defined $self->{'overlap_percent'}){
1656         ($bin_floors,$bin_ceilings) =
1657             $self -> get_bin_boundaries_overlap_value('data_column'=>$bin_array,
1658                                                       'data_indices'=>$strat_matrix->[$strat_ind],
1659                                                       'width'=>$self->{'single_bin_size'},
1660                                                       'overlap_percent' =>$self->{'overlap_percent'});
1661       } else {
1662         #add input param single_bin_size
1663         $bin_ceilings =
1664             $self -> get_bin_ceilings_from_value('data_column'=>$bin_array,
1665                                                  'data_indices'=>$strat_matrix->[$strat_ind],
1666                                                  'n_bins'=>$self->{'no_of_bins'},
1667                                                  'single_bin_size'=>$self->{'single_bin_size'},
1668                                                  'list_boundaries' =>$self->{'bin_array'});
1669       }
1670     }
1671     my $bin_matrix = $self -> index_matrix_binned_values('data_column'=>$bin_array,
1672                                                          'value_hash'=>$bin_hash,
1673                                                          'data_indices'=>$strat_matrix->[$strat_ind],
1674                                                          'bin_floors'=>$bin_floors,
1675                                                          'bin_ceilings'=>$bin_ceilings);
1676     my $no_bins = scalar(@{$bin_matrix});
1677
1678 ###
1679     if (defined $bin_ceilings ){
1680       unless (defined $bin_floors ){
1681         $bin_floors->[0]=${$bin_hash}{0};
1682         for (my $bi=1; $bi<$no_bins; $bi++){
1683           $bin_floors->[$bi]=$bin_ceilings->[$bi-1];
1684         }
1685       }
1686     } else{ #unique value binning
1687       for (my $bi=0; $bi<$no_bins; $bi++){
1688         $bin_ceilings->[$bi]=${$bin_hash}{$bi};
1689 #               my $lab = sprintf "\n$type results bin $col = %g",${$bin_hash}{$bi};
1690 #               push (@bin_labels,$lab);
1691       }
1692     }
1693
1694
1695 ###
1696
1697
1698
1699     my @sum_falsepos= (0) x scalar(@pred_int);
1700     my @sum_falseneg= (0) x scalar(@pred_int);
1701     my $sum_no_obs=0;
1702
1703     for (my $bin_index=0;$bin_index<$no_bins;$bin_index++){
1704       #Loop over bins. Check URS
1705
1706       #push (@result_row_labels,' ') unless ($bin_index==0); #avoid NA in labels
1707
1708       my $no_simvalues=0;
1709       my @simvalues=();
1710       my $orig_value;
1711       my @tmp=();
1712       my @singleset=();
1713       my $no_bin_observations=0;
1714
1715       #a) pool all simulated values in bin
1716       #sort simvalues, compute indices for this particular $no_simvalues;
1717       #and save perclimit
1718       foreach my $row_index (@{$bin_matrix->[$bin_index]}){
1719         my $row = $matrix->[$row_index];
1720         ($orig_value,@tmp) = split(/,/,$row);
1721         push(@simvalues,@tmp);
1722         push(@singleset,$orig_value);
1723         $no_simvalues += $no_sim;
1724         $no_bin_observations++;
1725       }
1726       next if ($no_bin_observations == 0);
1727
1728       my @sorted_sim_values = sort {$a <=> $b} @simvalues;
1729       for (my $i=0; $i<$no_perc_limits; $i++){
1730         if ($perc_limit[$i]==50){
1731 #               if (int($limit_index[$i]+0.5)>int($limit_index[$i])){
1732           #take median
1733           $limit[$i]= $self->median('sorted_array' => \@sorted_sim_values);
1734 #                       ($sorted_sim_values[int($limit_index[$i]+0.5)]+$sorted_sim_values[int($limit_index[$i])])/2;
1735         }else{
1736           $limit_index[$i] = $self->round('number'=>$perc_limit[$i]*($no_simvalues-1)/100);
1737           $limit[$i]=$sorted_sim_values[$limit_index[$i]];
1738         }
1739       }
1740
1741       for (my $i=0; $i<$no_perc_limits; $i++){
1742         $limit_index[$i] = $self->round('number'=>($perc_limit[$i]*($no_bin_observations-1)/100));
1743       }
1744
1745
1746       ##VPC diagnostics. Before b,c) because need singleset of real values
1747
1748       #outside bin-loop: call get_npc_indices with correct PI:s and no_sim (and ci)
1749       #here call subset_npc_analyze with this bin
1750       my ($npc_result_values,$npc_realpos,$npc_stats_warnings);
1751       ($npc_result_values,$npc_realpos,$npc_stats_warnings)=
1752           $self->subset_npc_analyze('matrix' => $matrix,
1753                                     'row_indices' => $bin_matrix->[$bin_index],
1754                                     'low_ind' => $npc_low_ind,
1755                                     'high_ind' => $npc_high_ind,
1756                                     'lower_index' => $npc_lower_index,
1757                                     'upper_index' => $npc_upper_index,
1758                                     'pred_intervals' => \@pred_int);
1759       #realpos has one arrayref per PI, each array one integer per obs. -1 below, 0 in, 1 above
1760
1761       my @diagnostics=();
1762       #the offset is to skip interval 50-50 which is of course empty
1763       for (my $i=$npc_pi_offset; $i<scalar(@pred_int); $i++){
1764         my $false_pos=0;
1765         my $false_neg=0;
1766         for (my $j=0; $j<$no_bin_observations; $j++){
1767           if ($npc_realpos->[$i]->[$j] == 0){
1768             #NPC inside
1769             if (($singleset[$j] < $limit[($i*2-$npc_pi_offset)]) ||
1770                 ($singleset[$j] > $limit[($i*2-$npc_pi_offset+1)])){
1771               #VPC outside
1772               $false_pos++;
1773             }
1774           } else {
1775             #NPC outside
1776             if (($singleset[$j] >= $limit[($i*2-$npc_pi_offset)]) &&
1777                 ($singleset[$j] <= $limit[($i*2-$npc_pi_offset+1)])){
1778               #VPC inside
1779               $false_neg++;
1780             }
1781           }
1782         }
1783         push (@diagnostics,($false_pos*100/$no_bin_observations,$false_neg*100/$no_bin_observations));
1784         $sum_falsepos[$i] += $false_pos;
1785         $sum_falseneg[$i] += $false_neg;
1786       }
1787       $sum_no_obs += $no_bin_observations;
1788
1789       my %npc_return_section;
1790       $npc_return_section{'name'} = "\nNPC results ".$strata_labels -> [$strat_ind].
1791           "\nbin ".($bin_index+1).": ".$no_bin_observations." observations";
1792       $npc_return_section{'labels'} = \@npc_result_labels;
1793       $npc_return_section{'values'} = $npc_result_values;
1794       push( @npc_section_array,\%npc_return_section );
1795
1796
1797       #end VPC diagnostics (print below)
1798
1799
1800       #b) compute PI:s for original, sort values and save lower and upper limits
1801       my @sorted_singleset = sort {$a <=> $b} @singleset;
1802
1803       for (my $i=0; $i<$no_perc_limits; $i++){
1804         if ($perc_limit[$i]==50){
1805           $limit_real[$i] = $self->median('sorted_array' => \@sorted_singleset);
1806 #               if (int($limit_index[$i]+0.5)>int($limit_index[$i])){
1807 #                   $limit_real[$i] =
1808 #                       ($sorted_singleset[int($limit_index[$i]+0.5)]+$sorted_singleset[int($limit_index[$i])])/2;
1809         }else{
1810           $limit_real[$i] = $sorted_singleset[$limit_index[$i]];
1811         }
1812       }
1813
1814
1815       #c) compute PI:s for each simset. Use clever indexing in @simvalues to extract values.
1816       #then determine and save ci:s for each PI boundary
1817       for (my $col=0;$col<$no_sim; $col++){
1818         @singleset=();
1819         for (my $row=0; $row<$no_bin_observations; $row++){
1820           push(@singleset,$simvalues[$col+$row*$no_sim]);
1821         }
1822         @sorted_singleset = sort {$a <=> $b} @singleset;
1823         for (my $i=0; $i<$no_perc_limits; $i++){
1824           if ($perc_limit[$i]==50){
1825             $limit_singlesim[$i]->[$col]  = $self->median('sorted_array' => \@sorted_singleset);
1826 #                   if (int($limit_index[$i]+0.5)>int($limit_index[$i])){
1827 #                       $limit_singlesim[$i]->[$col] =
1828 #                           ($sorted_singleset[int($limit_index[$i]+0.5)]+$sorted_singleset[int($limit_index[$i])])/2;
1829           }else{
1830 #                       $limit_singlesim[$i]->[$col] = $sorted_singleset[int($limit_index[$i])];
1831             $limit_singlesim[$i]->[$col] = $sorted_singleset[$limit_index[$i]];
1832           }
1833         }
1834       }
1835
1836       #for mirror plots
1837       if (defined $self->{'mirrors'}){
1838         for my $mi (0 .. ($self->{'mirrors'}-1)){
1839           my $col = $mirror_set[$mi];
1840           for (my $i=0; $i<$no_perc_limits; $i++){
1841             $limit_mirrors[$i]->[$mi]=$limit_singlesim[$i]->[$col];
1842           }
1843           $mi++;
1844         }
1845       }
1846
1847       for (my $i=0; $i<$no_perc_limits; $i++){
1848         my @val_arr = sort {$a <=> $b} @{$limit_singlesim[$i]};
1849         $lower_limit_ci[$i] = $val_arr[$low_ci_ind];
1850         $upper_limit_ci[$i] = $val_arr[$high_ci_ind];
1851       }
1852
1853       my $st= ' ';
1854       $st = $bin_floors->[$bin_index] if (defined $bin_floors );
1855       my @result_row_values=($st,$bin_ceilings->[$bin_index],$no_bin_observations);
1856
1857       for (my $i=0; $i<$no_perc_limits; $i++){
1858         push (@result_row_values,$limit_real[$i]);
1859         for my $mi (0 .. ($self->{'mirrors'}-1)){
1860           push (@result_row_values,$limit_mirrors[$i]->[$mi]);
1861         }
1862         push (@result_row_values,($limit[$i],$lower_limit_ci[$i],$upper_limit_ci[$i]));
1863       }
1864
1865       #VPC diagnostics
1866       push (@result_row_values,@diagnostics);
1867       #end diagnostics
1868
1869
1870       push (@result_values,\@result_row_values);
1871
1872     } #end loop over bins
1873
1874     my @result_labels = (\@result_row_labels,\@result_column_labels);
1875     $return_section{'labels'} = \@result_labels;
1876     $return_section{'values'} = \@result_values;
1877     push( @{$self -> {'results'}[0]{'own'}},\%return_section );
1878
1879     my @diag_labels=();
1880     my @diag_vals=();
1881
1882     my %diag_section;
1883     $diag_section{'name'}="\nDiagnostics VPC ".$strata_labels -> [$strat_ind];
1884     for (my $i=$npc_pi_offset; $i<scalar(@pred_int); $i++){
1885       push (@diag_labels,"PI $pred_int[$i]\% ");
1886       push (@diag_vals,[$sum_falsepos[$i]*100/$sum_no_obs,$sum_falseneg[$i]*100/$sum_no_obs]);
1887     }
1888     $diag_section{'labels'}=[\@diag_labels,["False pos (\%)","False neg (\%)"]];
1889     $diag_section{'values'}=\@diag_vals;
1890     push( @{$self -> {'results'}[0]{'own'}},\%diag_section );
1891     push( @{$self -> {'results'}[0]{'own'}},@npc_section_array);
1892
1893   } #end loop over strata
1894
1895
1896 }
1897 end vpc_analyze
1898 # }}} vpc_analyze
1899
1900
1901 # {{{ subset_npc_analyze
1902 start subset_npc_analyze
1903 {
1904   #start function, in is row_indices,matrix,lower_index,upper_index,
1905   #pred_intervals,$high_ind,$low_ind
1906   #out is ref to result_values and real_positions and stats_warnings
1907   #npoints not needed, length of input @row_indices
1908
1909   my $no_sim= (split(/,/,$matrix ->[0])) -1;
1910   my $point_counter=scalar(@{$row_indices});
1911   if ($point_counter < 1){ #empty set of observations
1912     return;
1913   }
1914
1915   #need to guarantee sorted pred_ints
1916   my @pred_int = sort {$a <=> $b} @{$pred_intervals};
1917   my $no_pred_ints = scalar(@pred_int);
1918   my @upper_limit = (0) x $no_pred_ints;
1919   my @lower_limit = (0) x $no_pred_ints;
1920   my (@values,@sorted_sim_values);
1921   my $orig_value;
1922   my @sum_warnings = (0) x ($no_sim+1);
1923   my (@upper_count, @lower_count);
1924   for (my $i=0; $i<$no_pred_ints; $i++){
1925     @{$upper_count[$i]} = (0) x ($no_sim+1);
1926     @{$lower_count[$i]} = (0) x ($no_sim+1);
1927     @{$real_positions[$i]} = (0) x ($point_counter);
1928   }
1929
1930   my $obs_counter=0;
1931
1932 #    print "\n\n\n";
1933   foreach my $row_index (@{$row_indices}){
1934     my $row = $matrix->[$row_index];
1935     ($orig_value,@values) = split(/,/,$row);
1936
1937     @sorted_sim_values = sort {$a <=> $b} @values; #sort numerically ascending
1938
1939     if ($self->{'verbose'}){
1940       printf "\n%.3f ",$orig_value;
1941       foreach my $v (@values){
1942         printf "%.3f ",$v;
1943       }
1944       printf "\n%.3f ",$orig_value;
1945       foreach my $v (@sorted_sim_values){
1946         printf "%.3f ",$v;
1947       }
1948       print "\n\n";
1949     }
1950
1951     for (my $i=0; $i<$no_pred_ints; $i++){
1952       $lower_limit[$i] = $sorted_sim_values[($lower_index->[$i])];
1953       $upper_limit[$i] = $sorted_sim_values[($upper_index->[$i])];
1954     }
1955
1956     unshift @values,($orig_value); #put original value back at the beginning
1957
1958     #Since the pred_ints are sorted in ascending order (e.g 80, 90, 95),
1959     #if value is below limit of first prediction interval, cannot be above
1960     #limit of any interval. Then if value is not below limit of 2nd, 3rd...
1961     #interval it cannot be below limit of any of the remaining.
1962
1963     for (my $j=0; $j<= $no_sim; $j++){
1964       if ($values[$j] < $lower_limit[0]){
1965         $lower_count[0]->[$j] +=1;
1966         for (my $i=1; $i<$no_pred_ints; $i++){
1967           if ($values[$j] < $lower_limit[$i]){
1968             $lower_count[$i]->[$j] +=1;
1969           } else {
1970             last; #goto next column (next value in @values)
1971           }
1972         }
1973       } elsif ($values[$j] > $upper_limit[0]){
1974         $upper_count[0]->[$j] +=1;
1975         for (my $i=1; $i<$no_pred_ints; $i++){
1976           if ($values[$j] > $upper_limit[$i]){
1977             $upper_count[$i]->[$j] +=1;
1978           } else {
1979             last; #goto next $value
1980           }
1981         }
1982       }
1983
1984     }
1985
1986
1987     ##For VPC diagnostics: build integer matrix w/ under/above/in info for real data only
1988     # 0 means inside.
1989
1990     if ($values[0] < $lower_limit[0]){
1991       $real_positions[0]->[$obs_counter]=-1;
1992       for (my $i=1; $i<$no_pred_ints; $i++){
1993         if ($values[0] < $lower_limit[$i]){
1994           $real_positions[$i]->[$obs_counter]=-1;
1995         } else {
1996           last;
1997         }
1998       }
1999     } elsif ($values[0] > $upper_limit[0]){
2000       $real_positions[0]->[$obs_counter]= 1;
2001       for (my $i=1; $i<$no_pred_ints; $i++){
2002         if ($values[0] > $upper_limit[$i]){
2003           $real_positions[$i]->[$obs_counter]= 1;
2004         } else {
2005           last;
2006         }
2007       }
2008     }
2009
2010
2011     $obs_counter++;
2012     ### end VPC diagnostics
2013
2014     if ($self->{'verbose'}){
2015       for (my $i=0; $i<$no_pred_ints; $i++){
2016         printf "%.0f %.3f %.3f\n",$pred_int[$i],$lower_limit[$i],$upper_limit[$i] if ($self->{'verbose'});
2017         for (my $j=0; $j<= $no_sim; $j++){
2018           print "$lower_count[$i]->[$j] ";
2019         }
2020         print "\n";
2021         for (my $j=0; $j<= $no_sim; $j++){
2022           print "$upper_count[$i]->[$j] ";
2023         }
2024         print "\n\n";
2025       }
2026       print "\n";
2027     }
2028   } #end foreach rowindex
2029
2030   my $const=100/$point_counter; #conversion from count to percent
2031
2032   print "$low_ind $high_ind $const\n" if ($self->{'verbose'});
2033
2034   #loop over prediction intervals, compute results for each interval
2035   for (my $i=0; $i<$no_pred_ints; $i++){
2036     my $warn=' ';
2037     my $realcount = shift @{$lower_count[$i]};
2038     my @count_arr = sort {$a <=> $b} @{$lower_count[$i]};
2039
2040     if (( $realcount<$count_arr[$low_ind]) ||($realcount>$count_arr[$high_ind])){
2041       $warn='*';
2042       $sum_warnings[0] += 1; #NPC diagnostics
2043     }
2044
2045     #For NPC diagnostics:
2046     for (my $si=0; $si<$no_sim; $si++){
2047       if (( $lower_count[$i]->[$si] < $count_arr[$low_ind]) ||($lower_count[$i]->[$si] > $count_arr[$high_ind])){
2048         $sum_warnings[(1+$si)] += 1; # +1 because 0 is real data.
2049       }
2050     }
2051     #end NPC diagnostics
2052
2053
2054     my @result_row_values =($realcount,$realcount*$const,$warn,$count_arr[$low_ind]*$const,
2055                             $count_arr[$high_ind]*$const);
2056     if ($self->{'verbose'}){
2057       print "$realcount ";
2058       for (my $j=0; $j<= $no_sim; $j++){
2059         print "$lower_count[$i]->[$j] ";
2060       }
2061       print "\n  ";
2062       for (my $j=0; $j<$no_sim; $j++){
2063         print "$count_arr[$j] ";
2064       }
2065       print "\n\n";
2066     }
2067
2068     $warn=' ';
2069     $realcount = shift @{$upper_count[$i]};
2070     @count_arr = sort {$a <=> $b} @{$upper_count[$i]};
2071     unless (scalar(@count_arr)==$no_sim){  #kolla no_sim vaerden!
2072       'debug' -> die(message => "Crash.");
2073     }
2074
2075     if (( $realcount<$count_arr[$low_ind]) ||($realcount>$count_arr[$high_ind])){
2076       $warn='*';
2077       $sum_warnings[0] += 1; #NPC diagnostics
2078     }
2079     #For NPC diagnostics:
2080     for (my $si=0; $si<$no_sim; $si++){
2081       if (( $upper_count[$i]->[$si] < $count_arr[$low_ind]) ||($upper_count[$i]->[$si] > $count_arr[$high_ind])){
2082         $sum_warnings[(1+$si)] += 1; # 1+ because 0 is real data
2083       }
2084     }
2085     #end NPC diagnostics
2086
2087     push (@result_row_values,($realcount, $realcount*$const,$warn,$count_arr[$low_ind]*$const,
2088                               $count_arr[$high_ind]*$const));
2089
2090     if ($self->{'verbose'}){
2091       print "$realcount ";
2092       for (my $j=0; $j<= $no_sim; $j++){
2093         print "$upper_count[$i]->[$j] ";
2094       }
2095       print "\n  ";
2096       for (my $j=0; $j<$no_sim; $j++){
2097         print "$count_arr[$j] ";
2098       }
2099       print "\n\n";
2100     }
2101
2102     push (@result_values,\@result_row_values);
2103
2104   } #end loop over prediction intervals
2105
2106   #warning statistics
2107   my $i = shift @sum_warnings; #real count
2108   push(@stats_warnings,$i);
2109   my $sum_sums=0;
2110   foreach my $i (@sum_warnings){
2111     $sum_sums += $i;
2112   }
2113   push(@stats_warnings,$sum_sums/$no_sim); #mean
2114   push(@stats_warnings,($no_pred_ints*2*2*$low_ind/$no_sim)); #theoretical mean
2115
2116   my @sorted_sums = sort {$a <=> $b} @sum_warnings;
2117 #    print "\n $sum_sums $low_ind $high_ind\n";
2118 #    foreach my $i (@sorted_sums){
2119 #       print "$i ";
2120 #    }
2121 #    print "\n";
2122
2123   #median
2124   push(@stats_warnings,$self->median('sorted_array'=>\@sorted_sums));
2125 #
2126 #    if( scalar( @sorted_sums ) % 2 ){
2127 #       push(@stats_warnings,$sorted_sums[$#sorted_sums/2]);
2128 #    } else {
2129 #       push(@stats_warnings,($sorted_sums[@sorted_sums/2]+$sorted_sums[(@sorted_sums-2)/2]) / 2);
2130 #    }
2131
2132   push(@stats_warnings,$sorted_sums[$low_ind]); #start 95% CI
2133   push(@stats_warnings,$sorted_sums[$high_ind]); #end 95% CI
2134
2135 }
2136 end subset_npc_analyze
2137 # }}} subset_npc_analyze
2138
2139 # {{{ get_npc_indices
2140 start get_npc_result_labels
2141 {
2142   #in pred_intervals ci
2143   #out result_column_labels result_row_labels
2144
2145   my @pred_int = sort {$a <=> $b} @{$pred_intervals};
2146   my $no_pred_ints = scalar(@pred_int);
2147
2148   @result_column_labels=('points below PI (count)','points below PI (%)','outside CI for below PI',
2149                          "$ci\% CI below: from (\%)","$ci\% CI below: to (\%)" );
2150   push (@result_column_labels,('points above PI (count)','points above PI (%)','outside CI for above PI',
2151                                "$ci\% CI above: from (\%)","$ci\% CI above: to (\%)"));
2152
2153   for (my $i=0; $i<$no_pred_ints; $i++){
2154     push (@result_row_labels,"$pred_int[$i]% PI");
2155   }
2156
2157 }
2158 end get_npc_result_labels
2159 # }}} get_npc_result_labels
2160
2161 # {{{ get_npc_indices
2162 start get_npc_indices
2163 {
2164   #in pred_intervals ci no_sim
2165   #out lower_index upper_index low_ind high_ind
2166
2167   #For each prediction interval pred_int: Compute lower_index, i.e. index
2168   #of first value in sorted array which is inside this interval.
2169   #Compute upper index, index of last value inside interval
2170
2171   my @pred_int = sort {$a <=> $b} @{$pred_intervals};
2172   my $no_pred_ints = scalar(@pred_int);
2173
2174 #    $low_ind = int(((100-$ci)*($no_sim-1)/200)+0.5); #index of start of ci % interval
2175   $low_ind = $self->round('number'=>((100-$ci)*($no_sim-1)/200)); #index of start of ci % interval
2176   $high_ind = $no_sim - $low_ind - 1; #index of end  of  c_i% interval
2177
2178   for (my $i=0; $i<$no_pred_ints; $i++){
2179     push (@lower_index, $self->round('number'=>((100-$pred_int[$i])*($no_sim-1)/200)));
2180     push (@upper_index, $no_sim - $lower_index[$i] -1);
2181   }
2182
2183 }
2184 end get_npc_indices
2185 # }}} get_npc_indices
2186
2187
2188 # {{{ npc_analyze
2189 start npc_analyze
2190 {
2191   #input refs strat_matrix, strata_labels(fix no_of_strata from labels)
2192   #matrix (fix no_observations and no_sim from matrix)
2193
2194   my $c_i = 95;
2195   my @pred_int = sort {$a <=> $b} 0,20,40,50,60,80,90,95;
2196   my $no_sim= (split(/,/,$matrix ->[0])) -1;
2197   my $no_observations = scalar(@{$matrix});
2198   my $no_of_strata = scalar(@{$strata_labels});
2199
2200   my $no_pred_ints = scalar(@pred_int);
2201   my ($lower_index,$upper_index,$low_ind,$high_ind);
2202   my ($result_column_labels,$result_row_labels);
2203
2204   ## Prepare general run info for output file
2205   my %return_sec;
2206   $return_sec{'name'} = 'NPC run info';
2207   my $modelname=$self-> {'models'}->[0] ->filename();
2208
2209   my $extra_value=$modelname;
2210   if (defined $self->{'msfo_file'}){
2211     $extra_value='msfo-file '.$self->{'msfo_file'};
2212   } elsif (defined $self->{'lst_file'}){
2213     $extra_value='lst-file '.$self->{'lst_file'};
2214   }
2215
2216   $return_sec{'labels'} = [[],['Date','observations','simulations','Modelfile',
2217                                'parameter values from','Dependent variable','PsN version','NONMEM version']];
2218
2219   my @datearr=localtime;
2220   my $the_date=($datearr[5]+1900).'-'.($datearr[4]+1).'-'.($datearr[3]);
2221
2222   $return_sec{'values'} = [[$the_date,$no_observations,$no_sim,$modelname,$extra_value,
2223                             $self->{'dv'},$PsN::version,$self->{'nm_version'}]];
2224
2225   push( @{$self -> {'results'}[0]{'own'}},\%return_sec );
2226
2227   ##done general run info
2228
2229   ($result_column_labels,$result_row_labels) = $self->get_npc_result_labels('ci' => $c_i,
2230                                                                             'pred_intervals' => \@pred_int);
2231   my @result_labels = ($result_row_labels,$result_column_labels);
2232
2233
2234   ($lower_index,$upper_index,$low_ind,$high_ind)= $self->get_npc_indices('ci' => $c_i,
2235                                                                          'no_sim' => $no_sim,
2236                                                                          'pred_intervals' => \@pred_int);
2237
2238   if ($self->{'verbose'}){
2239     for (my $i=0; $i<$no_pred_ints; $i++){
2240       print "$pred_int[$i] $lower_index->[$i] $upper_index->[$i]\n";
2241     }
2242     print "\n" ;
2243   }
2244
2245   my $analyzed_points=0;
2246   my $realpos; #dirt
2247   for (my $strat_ind=0; $strat_ind<$no_of_strata; $strat_ind++){
2248     my %return_section;
2249     my ($result_values,$stats_warnings);
2250     ($result_values,$realpos,$stats_warnings)=$self->subset_npc_analyze('matrix' => $matrix,
2251                                                                         'row_indices' => $strat_matrix->[$strat_ind],
2252                                                                         'low_ind' => $low_ind,
2253                                                                         'high_ind' => $high_ind,
2254                                                                         'lower_index' => $lower_index,
2255                                                                         'upper_index' => $upper_index,
2256                                                                         'pred_intervals' => \@pred_int);
2257
2258     my $point_counter=scalar(@{$strat_matrix->[$strat_ind]});
2259     my $result_name = "\nNPC results ".$strata_labels -> [$strat_ind].
2260         "\n$point_counter observations out of $no_observations";
2261     $return_section{'name'} = $result_name;
2262
2263     $return_section{'labels'} = \@result_labels;
2264     $return_section{'values'} = $result_values;
2265     push( @{$self -> {'results'}[0]{'own'}},\%return_section );
2266
2267     my %diag_section;
2268     $diag_section{'name'} = "\nNPC * (warning) statistics";
2269     $diag_section{'labels'} = [[],['Real data * count','Sim. data * count mean','Theoretical mean','Sim. data * count median',
2270                                    'Sim. data * count 95% CI from','to']];
2271     $diag_section{'values'} = [$stats_warnings];
2272     push( @{$self -> {'results'}[0]{'own'}},\%diag_section );
2273
2274     $analyzed_points+=$point_counter;
2275   }
2276
2277   #check that all individuals were grouped to a strata
2278   unless ($analyzed_points == $no_observations){
2279     'debug' -> die(message => "Sum of observations $analyzed_points in each strata does not\n".
2280                    "equal total number of observations $no_observations. Something went wrong.");
2281   }
2282 }
2283 end npc_analyze
2284 # }}}
2285