fmda/moisture_rnn.py

   1 # Environment
   2 import numpy as np
   3 import pandas as pd
   4 import tensorflow as tf
   5 import matplotlib.pyplot as plt
   6 import sys
   7 from tensorflow.keras.callbacks import Callback
   8 from sklearn.metrics import mean_squared_error
   9 import logging
  10 from tensorflow.keras.layers import LSTM, SimpleRNN, Input, Dropout, Dense
  11 # Local modules
  12 import reproducibility
  13 from utils import print_dict_summary
  14 from data_funcs import load_and_fix_data, rmse
  15 from abc import ABC, abstractmethod
  16 from utils import hash2
  17 from data_funcs import rmse, plot_data, compare_dicts
  18 import copy
  19 from sklearn.preprocessing import MinMaxScaler, StandardScaler
  20
  21 def staircase(x,y,timesteps,datapoints,return_sequences=False, verbose = False):
  22     # x [datapoints,features]    all inputs
  23     # y [datapoints,outputs]
  24     # timesteps: split x and y into samples length timesteps, shifted by 1
  25     # datapoints: number of timesteps to use for training, no more than y.shape[0]
  26     print('staircase: shape x = ',x.shape)
  27     print('staircase: shape y = ',y.shape)
  28     print('staircase: timesteps=',timesteps)
  29     print('staircase: datapoints=',datapoints)
  30     print('staircase: return_sequences=',return_sequences)
  31     outputs = y.shape[1]
  32     features = x.shape[1]
  33     samples = datapoints-timesteps+1
  34     print('staircase: samples=',samples,'timesteps=',timesteps,'features=',features)
  35     x_train = np.empty([samples, timesteps, features])
  36     if return_sequences:
  37         print('returning all timesteps in a sample')
  38         y_train = np.empty([samples, timesteps, outputs])  # all
  39         for i in range(samples):
  40             for k in range(timesteps):
  41                 x_train[i,k,:] = x[i+k,:]
  42                 y_train[i,k,:] = y[i+k,:]
  43     else:
  44         print('returning only the last timestep in a sample')
  45         y_train = np.empty([samples, outputs])
  46         for i in range(samples):
  47             for k in range(timesteps):
  48                 x_train[i,k,:] = x[i+k,:]
  49             y_train[i,:] = y[i+timesteps-1,:]
  50
  51     return x_train, y_train
  52
  53 def staircase_2(x,y,timesteps,batch_size=None,trainsteps=np.inf,return_sequences=False, verbose = False):
  54     # create RNN training data in multiple batches
  55     # input:
  56     #     x (,features)
  57     #     y (,outputs)
  58     #     timesteps: split x and y into sequences length timesteps
  59     #                a.k.a. lookback or sequence_length
  60
  61     # print params if verbose
  62
  63     if batch_size is None:
  64         raise ValueError('staircase_2 requires batch_size')
  65     print('staircase_2: shape x = ',x.shape)
  66     print('staircase_2: shape y = ',y.shape)
  67     print('staircase_2: timesteps=',timesteps)
  68     print('staircase_2: batch_size=',batch_size)
  69     print('staircase_2: return_sequences=',return_sequences)
  70
  71     nx,features= x.shape
  72     ny,outputs = y.shape
  73     datapoints = min(nx,ny,trainsteps)
  74     print('staircase_2: datapoints=',datapoints)
  75
  76     # sequence j in a given batch is assumed to be the continuation of sequence j in the previous batch
  77     # https://www.tensorflow.org/guide/keras/working_with_rnns Cross-batch statefulness
  78
  79     # example with timesteps=3 batch_size=3 datapoints=15
  80     #     batch 0: [0 1 2]      [1 2 3]      [2 3 4]
  81     #     batch 1: [3 4 5]      [4 5 6]      [5 6 7]
  82     #     batch 2: [6 7 8]      [7 8 9]      [8 9 10]
  83     #     batch 3: [9 10 11]    [10 11 12]   [11 12 13]
  84     #     batch 4: [12 13 14]   [13 14 15]    when runs out this is the last batch, can be shorter
  85     #
  86     # TODO: implement for multiple locations, same starting time for each batch
  87     #              Loc 1         Loc 2       Loc 3
  88     #     batch 0: [0 1 2]      [0 1 2]      [0 1 2]
  89     #     batch 1: [3 4 5]      [3 4 5]      [3 4 5]
  90     #     batch 2: [6 7 8]      [6 7 8]      [6 7 8]
  91     # TODO: second epoch shift starting time at batch 0 in time
  92
  93     # TODO: implement for multiple locations, different starting times for each batch
  94     #              Loc 1       Loc 2       Loc 3
  95     #     batch 0: [0 1 2]   [1 2 3]      [2 3 4]
  96     #     batch 1: [3 4 5]   [4 5 6]      [5 6 57
  97     #     batch 2: [6 7 8]   [7 8 9]      [8 9 10]
  98
  99     #
 100     #     the first sample in batch j starts from timesteps*j and ends with timesteps*(j+1)-1
 101     #     e.g. the final hidden state of the rnn after the sequence of steps [0 1 2] in batch 0
 102     #     becomes the starting hidden state of the rnn in the sequence of steps [3 4 5] in batch 1, etc.
 103     #
 104     #     sample [0 1 2] means the rnn is used twice to map state 0 -> 1 -> 2
 105     #     the state at time 0 is fixed but the state is considered a variable at times 1 and 2
 106     #     the loss is computed from the output at time 2 and the gradient of the loss function by chain rule which ends at time 0 because the state there is a constant -> derivative is zero
 107     #     sample [3 4 5] means the rnn is used twice to map state 3 -> 4 -> 5    #     the state at time 3 is fixed to the output of the first sequence [0 1 2]
 108     #     the loss is computed from the output at time 5 and the gradient of the loss function by chain rule which ends at time 3 because the state there is considered constant -> derivative is zero
 109     #     how is the gradient computed? I suppose keras adds gradient wrt the weights at 2 5 8 ... 3 6 9... 4 7 ... and uses that to update the weights
 110     #     there is only one set of weights   h(2) = f(h(1),w)  h(1) = f(h(0),w)   but w is always the same
 111     #     each column is a one successive evaluation of h(n+1) = f(h(n),w)  for n = n_startn n_start+1,...
 112     #     the cannot be evaluated efficiently on gpu because gpu is a parallel processor
 113     #     this of it as each column served by one thread, and the threads are independent because they execute in parallel, there needs to be large number of threads (32 is a good number)\
 114     #     each batch consists of independent calculations
 115     #     but it can depend on the result of the previous batch (that's the recurrent parr)
 116
 117
 118
 119     max_batches = datapoints // timesteps
 120     max_sequences = max_batches * batch_size
 121
 122     print('staircase_2: max_batches=',max_batches)
 123     print('staircase_2: max_sequences=',max_sequences)
 124
 125     x_train = np.zeros((max_sequences, timesteps, features))
 126     if return_sequences:
 127         y_train = np.empty((max_sequences, timesteps, outputs))
 128     else:
 129         y_train = np.empty((max_sequences, outputs ))
 130
 131     # build the sequences
 132     k=0
 133     for i in range(max_batches):
 134         for j in range(batch_size):
 135             begin = i*timesteps + j
 136             next  = begin + timesteps
 137             if next > datapoints:
 138                 break
 139             if verbose:
 140                 print('sequence',k,'batch',i,'sample',j,'data',begin,'to',next-1)
 141             x_train[k,:,:] = x[begin:next,:]
 142             if return_sequences:
 143                  y_train[k,:,:] = y[begin:next,:]
 144             else:
 145                  y_train[k,:] = y[next-1,:]
 146             k += 1
 147
 148     print('staircase_2: shape x_train = ',x_train.shape)
 149     print('staircase_2: shape y_train = ',y_train.shape)
 150     print('staircase_2: sequences generated',k)
 151     print('staircase_2: batch_size=',batch_size)
 152     k = (k // batch_size) * batch_size
 153     print('staircase_2: removing partial and empty batches at the end, keeping',k)
 154     x_train = x_train[:k,:,:]
 155     if return_sequences:
 156          y_train = y_train[:k,:,:]
 157     else:
 158          y_train = y_train[:k,:]
 159
 160     print('staircase_2: shape x_train = ',x_train.shape)
 161     print('staircase_2: shape y_train = ',y_train.shape)
 162
 163     return x_train, y_train
 164
 165
 166 # Dictionary of scalers, used to avoid multiple object creation and to avoid multiple if statements
 167 scalers = {
 168     'minmax': MinMaxScaler(),
 169     'standard': StandardScaler()
 170 }
 171
 172 # def scale_transform(X, method='minmax'):
 173 #     # Function to scale data in place
 174 #     # Inputs:
 175 #     # X: (ndarray) data to be scaled
 176 #     # method: (str) one of keys in scalers dictionary above
 177 #     scaler = scalers[method]
 178 #     scaler.fit(X)
 179 #     # Modify X in-place
 180 #     X[:] = scaler.transform(X)
 181
 182 def create_rnn_data2(dict1, params, atm_dict="HRRR", verbose=False, train_ind=None, test_ind=None):
 183     # Given fmda data and hyperparameters, return formatted dictionary to be used in RNN
 184     # Inputs:
 185     # d: (dict) fmda dictionary
 186     # params: (dict) hyperparameters
 187     # atm_dict: (str) string specifying name of subdictionary for atmospheric vars
 188     # train_frac: (float) fraction of data to use for training (starting from time 0)
 189     # val_frac: (float) fraction of data to use for validation data (starting from end of train)
 190     # Returns: (dict) formatted data used in RNN
 191     logging.info('create_rnn_data start')
 192     # Copy Dictionary
 193     d=copy.deepcopy(dict1)
 194     scale = params['scale']
 195     scaler= params['scaler']
 196     features_list = params["features_list"]
 197
 198
 199     # Extract desired features based on params, combine into matrix
 200     # Extract response vector
 201     fm = d['y']
 202     y = np.reshape(fm,[fm.shape[0],1])
 203     # Extract Features matrix
 204     X = d['X']
 205
 206     # Check total observed hours
 207     hours=d['hours']
 208     assert hours == y.shape[0] # Check that it matches response
 209
 210     logging.info('create_rnn_data: total_hours=%s',hours)
 211     logging.info('feature matrix X shape %s',np.shape(X))
 212     logging.info('target  matrix Y shape %s',np.shape(y))
 213     logging.info('features_list: %s',features_list)
 214
 215     logging.info('splitting train/val/test')
 216     if train_ind is None:
 217         train_ind = round(hours * params['train_frac']) # index of last training observation
 218     test_ind= train_ind + round(hours * params['val_frac'])# index of first test observation, if no validation data it is equal to train_ind
 219     logging.info('Final index of training data=%s',train_ind)
 220     logging.info('First index of Test data=%s',test_ind)
 221     # Training data from 0 to train_ind
 222     X_train = X[:train_ind]
 223     y_train = y[:train_ind].reshape(-1,1)
 224     # Validation data from train_ind to test_ind
 225     X_val = X[train_ind:test_ind]
 226     y_val = y[train_ind:test_ind].reshape(-1,1)
 227     # Test data from test_ind to end
 228     X_test = X[test_ind:]
 229     y_test = y[test_ind:].reshape(-1,1)
 230
 231     # Scale Data if required
 232     # TODO:
 233         # Remove need for "scale_fm" param
 234         # Reset reproducibility with this scaling
 235     if scale:
 236         logging.info('Scaling feature data with scaler: %s',scaler)
 237         # scale=1
 238         if scaler=="reproducibility":
 239             scale_fm = 17.076346687085564
 240         else:
 241             scale_fm=1.0
 242             # Fit scaler to training data
 243             scalers[scaler].fit(X_train)
 244             # Apply scaling to all data using in-place operations
 245             X_train[:] = scalers[scaler].transform(X_train)
 246             if X_val.shape[0] > 0:
 247                 X_val[:] = scalers[scaler].transform(X_val)
 248             X_test[:] = scalers[scaler].transform(X_test)
 249
 250
 251     else:
 252         print("Not scaling data")
 253         scale_fm=1.0
 254         scaler=None
 255
 256     logging.info('x_train shape=%s',X_train.shape)
 257     logging.info('y_train shape=%s',y_train.shape)
 258     if test_ind == train_ind:
 259         logging.info('No validation data')
 260     elif X_val.shape[0]!= 0:
 261         logging.info('X_val shape=%s',X_val.shape)
 262         logging.info('y_val shape=%s',y_val.shape)
 263     logging.info('X_test shape=%s',X_test.shape)
 264     logging.info('y_test shape=%s',y_test.shape)
 265
 266     # Set up return dictionary
 267     rnn_dat={
 268         'case':d['case'],
 269         'hours':hours,
 270         'features_list':features_list,
 271         'features': len(features_list),
 272         'scaler':scaler,
 273         'train_ind':train_ind,
 274         'test_ind':test_ind,
 275         'X':X,
 276         'y':y,
 277         'X_train': X_train,
 278         'y_train': y_train,
 279         'X_test': X_test,
 280         'y_test': y_test
 281     }
 282
 283     if X_val.shape[0] > 0:
 284             rnn_dat.update({
 285                 'X_val': X_val,
 286                 'y_val': y_val
 287             })
 288
 289     # Update RNN params using data attributes
 290     logging.info('Updating model params based on data')
 291     timesteps = params['timesteps']
 292     batch_size = params['batch_size']
 293     logging.info('batch_size=%s',batch_size)
 294     logging.info('timesteps=%s',timesteps)
 295     features = len(features_list)
 296     params.update({
 297             'features': features,
 298             'batch_shape': (params["batch_size"],params["timesteps"],features),
 299             'pred_input_shape': (hours, features),
 300             'scaler': scaler,
 301             'scale_fm': scale_fm
 302         })
 303     rnn_dat.update({'scaler': scaler, 'scale_fm': scale_fm})
 304
 305     logging.info('create_rnn_data2 done')
 306     return rnn_dat
 307
 308
 309 # def create_rnn_data(dict1, params, atm_dict="HRRR", verbose=False, train_ind=None, test_ind=None, scaler=None):
 310 #     # Given fmda data and hyperparameters, return formatted dictionary to be used in RNN
 311 #     # Inputs:
 312 #     # d: (dict) fmda dictionary
 313 #     # params: (dict) hyperparameters
 314 #     # atm_dict: (str) string specifying name of subdictionary for atmospheric vars
 315 #     # train_frac: (float) fraction of data to use for training (starting from time 0)
 316 #     # val_frac: (float) fraction of data to use for validation data (starting from end of train)
 317 #     # Returns: (dict) formatted data used in RNN
 318 #     logging.info('create_rnn_data start')
 319 #     # Copy Dictionary
 320 #     d=copy.deepcopy(dict1)
 321 #     scale = params['scale']
 322 #     features_list = params["features_list"]
 323
 324 #     # Check if reproducibility case
 325 #     if d['case']=="reproducibility":
 326 #         params.update({'scale':1})
 327 #         atm_dict="RAWS"
 328
 329 #     # Scale Data if required
 330 #     if scale:
 331 #         scale=1
 332 #         if d['case']=="reproducibility":
 333 #             # Note: this was calculated from the max observed fm, Ed, Ew in a whole timeseries originally with using data from test period
 334 #             scale_fm = 17.076346687085564
 335 #             logging.info("REPRODUCIBILITY scaling moisture features: using %s", scale_fm)
 336 #             logging.info('create_rnn_data: scaling to range 0 to 1')
 337 #             d[atm_dict]['Ed'] = d[atm_dict]['Ed'] / scale_fm
 338 #             d[atm_dict]['Ew'] = d[atm_dict]['Ew'] / scale_fm
 339 #             d[atm_dict]['fm'] = d[atm_dict]['fm'] / scale_fm
 340 #             scaler = 'reproducibility'
 341
 342 #     else:
 343 #         scale_fm=1.0
 344 #         scaler=None
 345 #     # Extract desired features based on params, combine into matrix
 346 #     fm = d[atm_dict]['fm']
 347 #     values = [d[atm_dict][key] for key in features_list]
 348 #     X = np.vstack(values).T
 349 #     # Extract response vector
 350 #     y = np.reshape(fm,[fm.shape[0],1])
 351 #     # Calculate total observed hours
 352 #     hours = X.shape[0]
 353 #     assert hours == y.shape[0] # Check that it matches response
 354
 355 #     logging.info('create_rnn_data: total_hours=%s',hours)
 356 #     logging.info('feature matrix X shape %s',np.shape(X))
 357 #     logging.info('target  matrix Y shape %s',np.shape(y))
 358 #     logging.info('features_list: %s',features_list)
 359
 360 #     logging.info('splitting train/val/test')
 361 #     if train_ind is None:
 362 #         train_ind = round(hours * params['train_frac']) # index of last training observation
 363 #     test_ind= train_ind + round(hours * params['val_frac'])# index of first test observation, if no validation data it is equal to train_ind
 364 #     logging.info('Final index of training data=%s',train_ind)
 365 #     logging.info('First index of Test data=%s',test_ind)
 366 #     # Training data from 0 to train_ind
 367 #     X_train = X[:train_ind]
 368 #     y_train = y[:train_ind].reshape(-1,1)
 369 #     # Validation data from train_ind to test_ind
 370 #     X_val = X[train_ind:test_ind]
 371 #     y_val = y[train_ind:test_ind].reshape(-1,1)
 372 #     # Test data from test_ind to end
 373 #     X_test = X[test_ind:]
 374 #     y_test = y[test_ind:].reshape(-1,1)
 375
 376 #     logging.info('x_train shape=%s',X_train.shape)
 377 #     logging.info('y_train shape=%s',y_train.shape)
 378 #     if test_ind == train_ind:
 379 #         logging.info('No validation data')
 380 #     elif X_val.shape[0]!= 0:
 381 #         logging.info('X_val shape=%s',X_val.shape)
 382 #         logging.info('y_val shape=%s',y_val.shape)
 383 #     logging.info('X_test shape=%s',X_test.shape)
 384 #     logging.info('y_test shape=%s',y_test.shape)
 385
 386 #     # Set up return dictionary
 387 #     rnn_dat={
 388 #         'case':d['case'],
 389 #         'hours':hours,
 390 #         'features_list':features_list,
 391 #         'features': len(features_list),
 392 #         'scaler':scaler,
 393 #         'train_ind':train_ind,
 394 #         'test_ind':test_ind,
 395 #         'X':X,
 396 #         'y':y,
 397 #         'X_train': X_train,
 398 #         'y_train': y_train,
 399 #         'X_test': X_test,
 400 #         'y_test': y_test
 401 #     }
 402 #     if rnn_dat['scaler'] == "reproducibility":
 403 #         rnn_dat['scale_fm']=17.076346687085564
 404 #     if X_val.shape[0] > 0:
 405 #             rnn_dat.update({
 406 #                 'X_val': X_val,
 407 #                 'y_val': y_val
 408 #             })
 409
 410 #     # Update RNN params using data attributes
 411 #     logging.info('Updating model params based on data')
 412 #     timesteps = params['timesteps']
 413 #     batch_size = params['batch_size']
 414 #     logging.info('batch_size=%s',batch_size)
 415 #     logging.info('timesteps=%s',timesteps)
 416 #     features = len(features_list)
 417 #     params.update({
 418 #             'features': features,
 419 #             'batch_shape': (params["batch_size"],params["timesteps"],features),
 420 #             'pred_input_shape': (hours, features),
 421 #             'scaler': scaler
 422 #         })
 423 #     if params['scaler'] == "reproducibility":
 424 #         params['scale_fm']=17.076346687085564
 425
 426
 427 #     logging.info('create_rnn_data_2 done')
 428 #     return rnn_dat
 429
 430
 431 repro_hashes = {
 432     'phys_initialize': {
 433         'fitted_weight_hash' : 4.2030588308041834e+19,
 434         'predictions_hash' :3.59976005554199219
 435     },
 436     'rand_initialize': {
 437         'fitted_weight_hash' : 4.4965532557938975e+19,
 438         'predictions_hash' : 3.71594738960266113
 439     },
 440     'params':{'id':0,
 441         'purpose':'reproducibility',
 442         'batch_size':32,
 443         'training':5,
 444         'cases':['case11'],
 445         'scale':1,        # every feature in [0, scale]
 446         'rain_do':True,
 447         'verbose':False,
 448         'timesteps':5,
 449         'activation':['linear','linear'],
 450         'hidden_units':20,
 451         'dense_units':1,    # do not change
 452         'dense_layers':1,   # do not change
 453         'centering':[0.0,0.0],  # should be activation at 0
 454         'DeltaE':[0,-1],    # bias correction
 455         'synthetic':False,  # run also synthetic cases
 456         'T1': 0.1,          # 1/fuel class (10)
 457         'fm_raise_vs_rain': 0.2,         # fm increase per mm rain
 458         'train_frac':0.5,  # time fraction to spend on training
 459         'epochs':200,
 460         'verbose_fit':0,
 461         'verbose_weights':False,
 462         'initialize': True,
 463         'learning_rate': 0.001 # default learning rate
 464         }
 465 }
 466
 467
 468 class RNNModel(ABC):
 469     def __init__(self, params: dict):
 470         self.params = params
 471         if type(self) is RNNModel:
 472             raise TypeError("MLModel is an abstract class and cannot be instantiated directly")
 473         super().__init__()
 474
 475     @abstractmethod
 476     def _build_model_train(self, return_sequences=False):
 477         pass
 478
 479     @abstractmethod
 480     def _build_model_predict(self, return_sequences=True):
 481         pass
 482
 483     def fit(self, X_train, y_train, plot=True, plot_title = '',
 484             weights=None, callbacks=[], verbose_fit=None, validation_data=None, *args, **kwargs):
 485         # verbose_fit argument is for printing out update after each epoch, which gets very long
 486         # These print statements at the top could be turned off with a verbose argument, but then
 487         # there would be a bunch of different verbose params
 488         print(f"Training simple RNN with params: {self.params}")
 489         X_train, y_train = self.format_train_data(X_train, y_train)
 490         print(f"X_train hash: {hash2(X_train)}")
 491         print(f"y_train hash: {hash2(y_train)}")
 492         if validation_data is not None:
 493             X_val, y_val = self.format_train_data(validation_data[0], validation_data[1])
 494             print(f"X_val hash: {hash2(X_val)}")
 495             print(f"y_val hash: {hash2(y_val)}")
 496         print(f"Initial weights before training hash: {hash2(self.model_train.get_weights())}")
 497         # Setup callbacks
 498         if self.params["reset_states"]:
 499             callbacks=callbacks+[ResetStatesCallback()]
 500
 501         # Note: we overload the params here so that verbose_fit can be easily turned on/off at the .fit call
 502         if verbose_fit is None:
 503             verbose_fit = self.params['verbose_fit']
 504         # Evaluate Model once to set nonzero initial state
 505         if self.params["batch_size"]>= X_train.shape[0]:
 506             self.model_train(X_train)
 507         if validation_data is not None:
 508             history = self.model_train.fit(
 509                 X_train, y_train+self.params['centering'][1],
 510                 epochs=self.params['epochs'],
 511                 batch_size=self.params['batch_size'],
 512                 callbacks = callbacks,
 513                 verbose=verbose_fit,
 514                 validation_data = (X_val, y_val),
 515                 *args, **kwargs
 516             )
 517         else:
 518             history = self.model_train.fit(
 519                 X_train, y_train+self.params['centering'][1],
 520                 epochs=self.params['epochs'],
 521                 batch_size=self.params['batch_size'],
 522                 callbacks = callbacks,
 523                 verbose=verbose_fit,
 524                 *args, **kwargs
 525             )
 526         if plot:
 527             self.plot_history(history,plot_title)
 528         if self.params["verbose_weights"]:
 529             print(f"Fitted Weights Hash: {hash2(self.model_train.get_weights())}")
 530
 531         # Update Weights for Prediction Model
 532         w_fitted = self.model_train.get_weights()
 533         self.model_predict.set_weights(w_fitted)
 534
 535     def predict(self, X_test):
 536         print("Predicting with simple RNN")
 537         X_test = self.format_pred_data(X_test)
 538         preds = self.model_predict.predict(X_test).flatten()
 539         return preds
 540
 541     def format_train_data(self, X, y, verbose=False):
 542         X, y = staircase_2(X, y, timesteps = self.params["timesteps"], batch_size=self.params["batch_size"], verbose=verbose)
 543         return X, y
 544     def format_pred_data(self, X):
 545         return np.reshape(X,(1, X.shape[0], self.params['features']))
 546
 547     def plot_history(self, history, plot_title):
 548         plt.figure()
 549         plt.semilogy(history.history['loss'], label='Training loss')
 550         if 'val_loss' in history.history:
 551             plt.semilogy(history.history['val_loss'], label='Validation loss')
 552         plt.title(f'{plot_title} Model loss')
 553         plt.ylabel('Loss')
 554         plt.xlabel('Epoch')
 555         plt.legend(loc='upper left')
 556         plt.show()
 557
 558     def run_model(self, dict0):
 559         # Make copy to prevent changing in place
 560         dict1 = copy.deepcopy(dict0)
 561         # Extract Fields
 562         X_train, y_train, X_test, y_test = dict1['X_train'].copy(), dict1['y_train'].copy(), dict1["X_test"].copy(), dict1['y_test'].copy()
 563         if 'X_val' in dict1:
 564             X_val, y_val = dict1['X_val'].copy(), dict1['y_val'].copy()
 565         else:
 566             X_val = None
 567         case_id = dict1['case']
 568
 569         # Fit model
 570         if X_val is None:
 571             self.fit(X_train, y_train)
 572         else:
 573             self.fit(X_train, y_train, validation_data = (X_val, y_val))
 574         # Generate Predictions,
 575         # run through training to get hidden state set proporly for forecast period
 576         if X_val is None:
 577             X = np.concatenate((X_train, X_test))
 578             y = np.concatenate((y_train, y_test)).flatten()
 579         else:
 580             X = np.concatenate((X_train, X_val, X_test))
 581             y = np.concatenate((y_train, y_val, y_test)).flatten()
 582         # Predict
 583         print(f"Predicting Training through Test \n features hash: {hash2(X)} \n response hash: {hash2(y)} ")
 584         m = self.predict(X).flatten()
 585         dict1['m']=m
 586         dict0['m']=m # add to outside env dictionary, should be only place this happens
 587         if self.params['scale']:
 588             print(f"Rescaling data using {self.params['scaler']}")
 589             if self.params['scaler'] == "reproducibility":
 590                 m  *= self.params['scale_fm']
 591                 y  *= self.params['scale_fm']
 592                 y_train *= self.params['scale_fm']
 593                 y_test *= self.params['scale_fm']
 594         # Check Reproducibility, TODO: old dict calls it hidden_units not rnn_units, so this doens't check that
 595         if (case_id == "reproducibility") and compare_dicts(self.params, repro_hashes['params'], ['epochs', 'batch_size', 'scale', 'activation', 'learning_rate']):
 596             print("Checking Reproducibility")
 597             checkm = m[350]
 598             hv = hash2(self.model_predict.get_weights())
 599             if self.params['phys_initialize']:
 600                 hv5 = repro_hashes['phys_initialize']['fitted_weight_hash']
 601                 mv = repro_hashes['phys_initialize']['predictions_hash']
 602             else:
 603                 hv5 = repro_hashes['rand_initialize']['fitted_weight_hash']
 604                 mv = repro_hashes['rand_initialize']['predictions_hash']
 605
 606             print(f"Fitted weights hash (check 5): {hv} \n Reproducibility weights hash: {hv5} \n Error: {hv5-hv}")
 607             print(f"Model predictions hash: {checkm} \n Reproducibility preds hash: {mv} \n Error: {mv-checkm}")
 608
 609         # print(dict1.keys())
 610         # Plot final fit and data
 611         # TODO: make plot_data specific to this context
 612         dict1['y'] = y
 613         plot_data(dict1, title="RNN", title2=dict1['case'])
 614
 615         # Calculate Errors
 616         err = rmse(m, y)
 617         train_ind = dict1["train_ind"] # index of final training set value
 618         test_ind = dict1["test_ind"] # index of first test set value
 619         err_train = rmse(m[:train_ind], y_train.flatten())
 620         err_pred = rmse(m[test_ind:], y_test.flatten())
 621         rmse_dict = {
 622             'all': err,
 623             'training': err_train,
 624             'prediction': err_pred
 625         }
 626         return m, rmse_dict
 627
 628 class ResetStatesCallback(Callback):
 629     def on_epoch_end(self, epoch, logs=None):
 630         self.model.reset_states()
 631
 632
 633 class RNN(RNNModel):
 634     def __init__(self, params, loss='mean_squared_error'):
 635         super().__init__(params)
 636         self.model_train = self._build_model_train()
 637         self.model_predict = self._build_model_predict()
 638
 639     def _build_model_train(self, return_sequences=False):
 640         inputs = tf.keras.Input(batch_shape=self.params['batch_shape'])
 641         x = inputs
 642         for i in range(self.params['rnn_layers']):
 643             x = SimpleRNN(
 644                 units=self.params['rnn_units'],
 645                 activation=self.params['activation'][0],
 646                 dropout=self.params["dropout"][0],
 647                 stateful=self.params['stateful'],
 648                 return_sequences=return_sequences)(x)
 649         if self.params["dropout"][1] > 0:
 650             x = Dropout(self.params["dropout"][1])(x)
 651         for i in range(self.params['dense_layers']):
 652             x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
 653         model = tf.keras.Model(inputs=inputs, outputs=x)
 654         optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
 655         model.compile(loss='mean_squared_error', optimizer=optimizer)
 656
 657         if self.params["verbose_weights"]:
 658             print(f"Initial Weights Hash: {hash2(model.get_weights())}")
 659         return model
 660     def _build_model_predict(self, return_sequences=True):
 661
 662         inputs = tf.keras.Input(shape=(None,self.params['features']))
 663         x = inputs
 664         for i in range(self.params['rnn_layers']):
 665             x = SimpleRNN(self.params['rnn_units'],activation=self.params['activation'][0],
 666                   stateful=False,return_sequences=return_sequences)(x)
 667         for i in range(self.params['dense_layers']):
 668             x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
 669         model = tf.keras.Model(inputs=inputs, outputs=x)
 670         optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
 671         model.compile(loss='mean_squared_error', optimizer=optimizer)
 672
 673         # Set Weights to model_train
 674         w_fitted = self.model_train.get_weights()
 675         model.set_weights(w_fitted)
 676
 677         return model
 678
 679
 680 class RNN_LSTM(RNNModel):
 681     def __init__(self, params, loss='mean_squared_error'):
 682         super().__init__(params)
 683         self.model_train = self._build_model_train()
 684         self.model_predict = self._build_model_predict()
 685
 686     def _build_model_train(self, return_sequences=False):
 687         inputs = tf.keras.Input(batch_shape=self.params['batch_shape'])
 688         x = inputs
 689         for i in range(self.params['rnn_layers']):
 690             x = LSTM(
 691                 units=self.params['rnn_units'],
 692                 activation=self.params['activation'][0],
 693                 dropout=self.params["dropout"][0],
 694                 recurrent_activation=self.params["recurrent_activation"],
 695                 stateful=self.params['stateful'],
 696                 return_sequences=return_sequences)(x)
 697         if self.params["dropout"][1] > 0:
 698             x = Dropout(self.params["dropout"][1])(x)
 699         for i in range(self.params['dense_layers']):
 700             x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
 701         model = tf.keras.Model(inputs=inputs, outputs=x)
 702         optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
 703         model.compile(loss='mean_squared_error', optimizer=optimizer)
 704
 705         if self.params["verbose_weights"]:
 706             print(f"Initial Weights Hash: {hash2(model.get_weights())}")
 707         return model
 708     def _build_model_predict(self, return_sequences=True):
 709
 710         inputs = tf.keras.Input(shape=(None,self.params['features']))
 711         x = inputs
 712         for i in range(self.params['rnn_layers']):
 713             x = LSTM(
 714                 units=self.params['rnn_units'],
 715                 activation=self.params['activation'][0],
 716                 stateful=False,return_sequences=return_sequences)(x)
 717         for i in range(self.params['dense_layers']):
 718             x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
 719         model = tf.keras.Model(inputs=inputs, outputs=x)
 720         optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
 721         model.compile(loss='mean_squared_error', optimizer=optimizer)
 722
 723         # Set Weights to model_train
 724         w_fitted = self.model_train.get_weights()
 725         model.set_weights(w_fitted)
 726
 727         return model
 728
 729
 730
 731