fmda/moisture_rnn.py

   1 # Environment
   2 import numpy as np
   3 import pandas as pd
   4 import tensorflow as tf
   5 import matplotlib.pyplot as plt
   6 import sys
   7 from tensorflow.keras.callbacks import Callback
   8 from sklearn.metrics import mean_squared_error
   9 import logging
  10 from tensorflow.keras.layers import Input, SimpleRNN, Dropout, Dense
  11 # Local modules
  12 import reproducibility
  13 from utils import print_dict_summary
  14 from data_funcs import load_and_fix_data, rmse
  15 from abc import ABC, abstractmethod
  16 from utils import hash2
  17 from data_funcs import rmse, plot_data, compare_dicts
  18 import copy
  19
  20
  21 def staircase(x,y,timesteps,datapoints,return_sequences=False, verbose = False):
  22     # x [datapoints,features]    all inputs
  23     # y [datapoints,outputs]
  24     # timesteps: split x and y into samples length timesteps, shifted by 1
  25     # datapoints: number of timesteps to use for training, no more than y.shape[0]
  26     print('staircase: shape x = ',x.shape)
  27     print('staircase: shape y = ',y.shape)
  28     print('staircase: timesteps=',timesteps)
  29     print('staircase: datapoints=',datapoints)
  30     print('staircase: return_sequences=',return_sequences)
  31     outputs = y.shape[1]
  32     features = x.shape[1]
  33     samples = datapoints-timesteps+1
  34     print('staircase: samples=',samples,'timesteps=',timesteps,'features=',features)
  35     x_train = np.empty([samples, timesteps, features])
  36     if return_sequences:
  37         print('returning all timesteps in a sample')
  38         y_train = np.empty([samples, timesteps, outputs])  # all
  39         for i in range(samples):
  40             for k in range(timesteps):
  41                 x_train[i,k,:] = x[i+k,:]
  42                 y_train[i,k,:] = y[i+k,:]
  43     else:
  44         print('returning only the last timestep in a sample')
  45         y_train = np.empty([samples, outputs])
  46         for i in range(samples):
  47             for k in range(timesteps):
  48                 x_train[i,k,:] = x[i+k,:]
  49             y_train[i,:] = y[i+timesteps-1,:]
  50
  51     return x_train, y_train
  52
  53 def staircase_2(x,y,timesteps,batch_size=None,trainsteps=np.inf,return_sequences=False, verbose = False):
  54     # create RNN training data in multiple batches
  55     # input:
  56     #     x (,features)
  57     #     y (,outputs)
  58     #     timesteps: split x and y into sequences length timesteps
  59     #                a.k.a. lookback or sequence_length
  60
  61     # print params if verbose
  62
  63     if batch_size is None:
  64         raise ValueError('staircase_2 requires batch_size')
  65     print('staircase_2: shape x = ',x.shape)
  66     print('staircase_2: shape y = ',y.shape)
  67     print('staircase_2: timesteps=',timesteps)
  68     print('staircase_2: batch_size=',batch_size)
  69     print('staircase_2: return_sequences=',return_sequences)
  70
  71     nx,features= x.shape
  72     ny,outputs = y.shape
  73     datapoints = min(nx,ny,trainsteps)
  74     print('staircase_2: datapoints=',datapoints)
  75
  76     # sequence j in a given batch is assumed to be the continuation of sequence j in the previous batch
  77     # https://www.tensorflow.org/guide/keras/working_with_rnns Cross-batch statefulness
  78
  79     # example with timesteps=3 batch_size=3 datapoints=15
  80     #     batch 0: [0 1 2]      [1 2 3]      [2 3 4]
  81     #     batch 1: [3 4 5]      [4 5 6]      [5 6 7]
  82     #     batch 2: [6 7 8]      [7 8 9]      [8 9 10]
  83     #     batch 3: [9 10 11]    [10 11 12]   [11 12 13]
  84     #     batch 4: [12 13 14]   [13 14 15]    when runs out this is the last batch, can be shorter
  85     #
  86     # TODO: implement for multiple locations, same starting time for each batch
  87     #              Loc 1         Loc 2       Loc 3
  88     #     batch 0: [0 1 2]      [0 1 2]      [0 1 2]
  89     #     batch 1: [3 4 5]      [3 4 5]      [3 4 5]
  90     #     batch 2: [6 7 8]      [6 7 8]      [6 7 8]
  91     # TODO: second epoch shift starting time at batch 0 in time
  92
  93     # TODO: implement for multiple locations, different starting times for each batch
  94     #              Loc 1       Loc 2       Loc 3
  95     #     batch 0: [0 1 2]   [1 2 3]      [2 3 4]
  96     #     batch 1: [3 4 5]   [4 5 6]      [5 6 57
  97     #     batch 2: [6 7 8]   [7 8 9]      [8 9 10]
  98
  99     #
 100     #     the first sample in batch j starts from timesteps*j and ends with timesteps*(j+1)-1
 101     #     e.g. the final hidden state of the rnn after the sequence of steps [0 1 2] in batch 0
 102     #     becomes the starting hidden state of the rnn in the sequence of steps [3 4 5] in batch 1, etc.
 103     #
 104     #     sample [0 1 2] means the rnn is used twice to map state 0 -> 1 -> 2
 105     #     the state at time 0 is fixed but the state is considered a variable at times 1 and 2
 106     #     the loss is computed from the output at time 2 and the gradient of the loss function by chain rule which ends at time 0 because the state there is a constant -> derivative is zero
 107     #     sample [3 4 5] means the rnn is used twice to map state 3 -> 4 -> 5    #     the state at time 3 is fixed to the output of the first sequence [0 1 2]
 108     #     the loss is computed from the output at time 5 and the gradient of the loss function by chain rule which ends at time 3 because the state there is considered constant -> derivative is zero
 109     #     how is the gradient computed? I suppose keras adds gradient wrt the weights at 2 5 8 ... 3 6 9... 4 7 ... and uses that to update the weights
 110     #     there is only one set of weights   h(2) = f(h(1),w)  h(1) = f(h(0),w)   but w is always the same
 111     #     each column is a one successive evaluation of h(n+1) = f(h(n),w)  for n = n_startn n_start+1,...
 112     #     the cannot be evaluated efficiently on gpu because gpu is a parallel processor
 113     #     this of it as each column served by one thread, and the threads are independent because they execute in parallel, there needs to be large number of threads (32 is a good number)\
 114     #     each batch consists of independent calculations
 115     #     but it can depend on the result of the previous batch (that's the recurrent parr)
 116
 117
 118
 119     max_batches = datapoints // timesteps
 120     max_sequences = max_batches * batch_size
 121
 122     print('staircase_2: max_batches=',max_batches)
 123     print('staircase_2: max_sequences=',max_sequences)
 124
 125     x_train = np.zeros((max_sequences, timesteps, features))
 126     if return_sequences:
 127         y_train = np.empty((max_sequences, timesteps, outputs))
 128     else:
 129         y_train = np.empty((max_sequences, outputs ))
 130
 131     # build the sequences
 132     k=0
 133     for i in range(max_batches):
 134         for j in range(batch_size):
 135             begin = i*timesteps + j
 136             next  = begin + timesteps
 137             if next > datapoints:
 138                 break
 139             if verbose:
 140                 print('sequence',k,'batch',i,'sample',j,'data',begin,'to',next-1)
 141             x_train[k,:,:] = x[begin:next,:]
 142             if return_sequences:
 143                  y_train[k,:,:] = y[begin:next,:]
 144             else:
 145                  y_train[k,:] = y[next-1,:]
 146             k += 1
 147
 148     print('staircase_2: shape x_train = ',x_train.shape)
 149     print('staircase_2: shape y_train = ',y_train.shape)
 150     print('staircase_2: sequences generated',k)
 151     print('staircase_2: batch_size=',batch_size)
 152     k = (k // batch_size) * batch_size
 153     print('staircase_2: removing partial and empty batches at the end, keeping',k)
 154     x_train = x_train[:k,:,:]
 155     if return_sequences:
 156          y_train = y_train[:k,:,:]
 157     else:
 158          y_train = y_train[:k,:]
 159
 160     print('staircase_2: shape x_train = ',x_train.shape)
 161     print('staircase_2: shape y_train = ',y_train.shape)
 162
 163     return x_train, y_train
 164
 165 def create_rnn_data2(dict1, params, atm_dict="HRRR", verbose=False, train_ind=None, test_ind=None, scaler=None):
 166     # Given fmda data and hyperparameters, return formatted dictionary to be used in RNN
 167     # Inputs:
 168     # d: (dict) fmda dictionary
 169     # params: (dict) hyperparameters
 170     # atm_dict: (str) string specifying name of subdictionary for atmospheric vars
 171     # train_frac: (float) fraction of data to use for training (starting from time 0)
 172     # val_frac: (float) fraction of data to use for validation data (starting from end of train)
 173     # Returns: (dict) formatted data used in RNN
 174     logging.info('create_rnn_data start')
 175     # Copy Dictionary
 176     d=copy.deepcopy(dict1)
 177     scale = params['scale']
 178     features_list = params["features_list"]
 179
 180
 181     # Scale Data if required
 182     # TODO: Reconcile scaling with moisture_rnn_pkl
 183     if scale:
 184         scale=1
 185         if d['case']=="reproducibility":
 186             scale_fm = 17.076346687085564
 187             scaler = 'reproducibility'
 188         else:
 189             scale_fm=1.0
 190             scaler=None
 191
 192     # Extract desired features based on params, combine into matrix
 193     # Extract response vector
 194     fm = d['y']
 195     y = np.reshape(fm,[fm.shape[0],1])
 196     # Extract Features matrix
 197     X = d['X']
 198
 199     # Check total observed hours
 200     hours=d['hours']
 201     assert hours == y.shape[0] # Check that it matches response
 202
 203     logging.info('create_rnn_data: total_hours=%s',hours)
 204     logging.info('feature matrix X shape %s',np.shape(X))
 205     logging.info('target  matrix Y shape %s',np.shape(y))
 206     logging.info('features_list: %s',features_list)
 207
 208     logging.info('splitting train/val/test')
 209     if train_ind is None:
 210         train_ind = round(hours * params['train_frac']) # index of last training observation
 211     test_ind= train_ind + round(hours * params['val_frac'])# index of first test observation, if no validation data it is equal to train_ind
 212     logging.info('Final index of training data=%s',train_ind)
 213     logging.info('First index of Test data=%s',test_ind)
 214     # Training data from 0 to train_ind
 215     X_train = X[:train_ind]
 216     y_train = y[:train_ind].reshape(-1,1)
 217     # Validation data from train_ind to test_ind
 218     X_val = X[train_ind:test_ind]
 219     y_val = y[train_ind:test_ind].reshape(-1,1)
 220     # Test data from test_ind to end
 221     X_test = X[test_ind:]
 222     y_test = y[test_ind:].reshape(-1,1)
 223
 224     logging.info('x_train shape=%s',X_train.shape)
 225     logging.info('y_train shape=%s',y_train.shape)
 226     if test_ind == train_ind:
 227         logging.info('No validation data')
 228     elif X_val.shape[0]!= 0:
 229         logging.info('X_val shape=%s',X_val.shape)
 230         logging.info('y_val shape=%s',y_val.shape)
 231     logging.info('X_test shape=%s',X_test.shape)
 232     logging.info('y_test shape=%s',y_test.shape)
 233
 234     # Set up return dictionary
 235     rnn_dat={
 236         'case':d['case'],
 237         'hours':hours,
 238         'features_list':features_list,
 239         'features': len(features_list),
 240         'scaler':scaler,
 241         'train_ind':train_ind,
 242         'test_ind':test_ind,
 243         'X':X,
 244         'y':y,
 245         'X_train': X_train,
 246         'y_train': y_train,
 247         'X_test': X_test,
 248         'y_test': y_test
 249     }
 250
 251     if X_val.shape[0] > 0:
 252             rnn_dat.update({
 253                 'X_val': X_val,
 254                 'y_val': y_val
 255             })
 256
 257     # Update RNN params using data attributes
 258     logging.info('Updating model params based on data')
 259     timesteps = params['timesteps']
 260     batch_size = params['batch_size']
 261     logging.info('batch_size=%s',batch_size)
 262     logging.info('timesteps=%s',timesteps)
 263     features = len(features_list)
 264     params.update({
 265             'features': features,
 266             'batch_shape': (params["batch_size"],params["timesteps"],features),
 267             'pred_input_shape': (hours, features),
 268             'scaler': scaler,
 269             'scale_fm': scale_fm
 270         })
 271     rnn_dat.update({'scaler': scaler, 'scale_fm': scale_fm})
 272
 273     logging.info('create_rnn_data2 done')
 274     return rnn_dat
 275
 276
 277 # def create_rnn_data(dict1, params, atm_dict="HRRR", verbose=False, train_ind=None, test_ind=None, scaler=None):
 278 #     # Given fmda data and hyperparameters, return formatted dictionary to be used in RNN
 279 #     # Inputs:
 280 #     # d: (dict) fmda dictionary
 281 #     # params: (dict) hyperparameters
 282 #     # atm_dict: (str) string specifying name of subdictionary for atmospheric vars
 283 #     # train_frac: (float) fraction of data to use for training (starting from time 0)
 284 #     # val_frac: (float) fraction of data to use for validation data (starting from end of train)
 285 #     # Returns: (dict) formatted data used in RNN
 286 #     logging.info('create_rnn_data start')
 287 #     # Copy Dictionary
 288 #     d=copy.deepcopy(dict1)
 289 #     scale = params['scale']
 290 #     features_list = params["features_list"]
 291
 292 #     # Check if reproducibility case
 293 #     if d['case']=="reproducibility":
 294 #         params.update({'scale':1})
 295 #         atm_dict="RAWS"
 296
 297 #     # Scale Data if required
 298 #     if scale:
 299 #         scale=1
 300 #         if d['case']=="reproducibility":
 301 #             # Note: this was calculated from the max observed fm, Ed, Ew in a whole timeseries originally with using data from test period
 302 #             scale_fm = 17.076346687085564
 303 #             logging.info("REPRODUCIBILITY scaling moisture features: using %s", scale_fm)
 304 #             logging.info('create_rnn_data: scaling to range 0 to 1')
 305 #             d[atm_dict]['Ed'] = d[atm_dict]['Ed'] / scale_fm
 306 #             d[atm_dict]['Ew'] = d[atm_dict]['Ew'] / scale_fm
 307 #             d[atm_dict]['fm'] = d[atm_dict]['fm'] / scale_fm
 308 #             scaler = 'reproducibility'
 309
 310 #     else:
 311 #         scale_fm=1.0
 312 #         scaler=None
 313 #     # Extract desired features based on params, combine into matrix
 314 #     fm = d[atm_dict]['fm']
 315 #     values = [d[atm_dict][key] for key in features_list]
 316 #     X = np.vstack(values).T
 317 #     # Extract response vector
 318 #     y = np.reshape(fm,[fm.shape[0],1])
 319 #     # Calculate total observed hours
 320 #     hours = X.shape[0]
 321 #     assert hours == y.shape[0] # Check that it matches response
 322
 323 #     logging.info('create_rnn_data: total_hours=%s',hours)
 324 #     logging.info('feature matrix X shape %s',np.shape(X))
 325 #     logging.info('target  matrix Y shape %s',np.shape(y))
 326 #     logging.info('features_list: %s',features_list)
 327
 328 #     logging.info('splitting train/val/test')
 329 #     if train_ind is None:
 330 #         train_ind = round(hours * params['train_frac']) # index of last training observation
 331 #     test_ind= train_ind + round(hours * params['val_frac'])# index of first test observation, if no validation data it is equal to train_ind
 332 #     logging.info('Final index of training data=%s',train_ind)
 333 #     logging.info('First index of Test data=%s',test_ind)
 334 #     # Training data from 0 to train_ind
 335 #     X_train = X[:train_ind]
 336 #     y_train = y[:train_ind].reshape(-1,1)
 337 #     # Validation data from train_ind to test_ind
 338 #     X_val = X[train_ind:test_ind]
 339 #     y_val = y[train_ind:test_ind].reshape(-1,1)
 340 #     # Test data from test_ind to end
 341 #     X_test = X[test_ind:]
 342 #     y_test = y[test_ind:].reshape(-1,1)
 343
 344 #     logging.info('x_train shape=%s',X_train.shape)
 345 #     logging.info('y_train shape=%s',y_train.shape)
 346 #     if test_ind == train_ind:
 347 #         logging.info('No validation data')
 348 #     elif X_val.shape[0]!= 0:
 349 #         logging.info('X_val shape=%s',X_val.shape)
 350 #         logging.info('y_val shape=%s',y_val.shape)
 351 #     logging.info('X_test shape=%s',X_test.shape)
 352 #     logging.info('y_test shape=%s',y_test.shape)
 353
 354 #     # Set up return dictionary
 355 #     rnn_dat={
 356 #         'case':d['case'],
 357 #         'hours':hours,
 358 #         'features_list':features_list,
 359 #         'features': len(features_list),
 360 #         'scaler':scaler,
 361 #         'train_ind':train_ind,
 362 #         'test_ind':test_ind,
 363 #         'X':X,
 364 #         'y':y,
 365 #         'X_train': X_train,
 366 #         'y_train': y_train,
 367 #         'X_test': X_test,
 368 #         'y_test': y_test
 369 #     }
 370 #     if rnn_dat['scaler'] == "reproducibility":
 371 #         rnn_dat['scale_fm']=17.076346687085564
 372 #     if X_val.shape[0] > 0:
 373 #             rnn_dat.update({
 374 #                 'X_val': X_val,
 375 #                 'y_val': y_val
 376 #             })
 377
 378 #     # Update RNN params using data attributes
 379 #     logging.info('Updating model params based on data')
 380 #     timesteps = params['timesteps']
 381 #     batch_size = params['batch_size']
 382 #     logging.info('batch_size=%s',batch_size)
 383 #     logging.info('timesteps=%s',timesteps)
 384 #     features = len(features_list)
 385 #     params.update({
 386 #             'features': features,
 387 #             'batch_shape': (params["batch_size"],params["timesteps"],features),
 388 #             'pred_input_shape': (hours, features),
 389 #             'scaler': scaler
 390 #         })
 391 #     if params['scaler'] == "reproducibility":
 392 #         params['scale_fm']=17.076346687085564
 393
 394
 395 #     logging.info('create_rnn_data_2 done')
 396 #     return rnn_dat
 397
 398
 399 repro_hashes = {
 400     'phys_initialize': {
 401         'fitted_weight_hash' : 4.2030588308041834e+19,
 402         'predictions_hash' :3.59976005554199219
 403     },
 404     'rand_initialize': {
 405         'fitted_weight_hash' : 4.4965532557938975e+19,
 406         'predictions_hash' : 3.71594738960266113
 407     },
 408     'params':{'id':0,
 409         'purpose':'reproducibility',
 410         'batch_size':32,
 411         'training':5,
 412         'cases':['case11'],
 413         'scale':1,        # every feature in [0, scale]
 414         'rain_do':True,
 415         'verbose':False,
 416         'timesteps':5,
 417         'activation':['linear','linear'],
 418         'hidden_units':20,
 419         'dense_units':1,    # do not change
 420         'dense_layers':1,   # do not change
 421         'centering':[0.0,0.0],  # should be activation at 0
 422         'DeltaE':[0,-1],    # bias correction
 423         'synthetic':False,  # run also synthetic cases
 424         'T1': 0.1,          # 1/fuel class (10)
 425         'fm_raise_vs_rain': 0.2,         # fm increase per mm rain
 426         'train_frac':0.5,  # time fraction to spend on training
 427         'epochs':200,
 428         'verbose_fit':0,
 429         'verbose_weights':False,
 430         'initialize': True,
 431         'learning_rate': 0.001 # default learning rate
 432         }
 433 }
 434
 435
 436 class RNNModel(ABC):
 437     def __init__(self, params: dict):
 438         self.params = params
 439         if type(self) is RNNModel:
 440             raise TypeError("MLModel is an abstract class and cannot be instantiated directly")
 441         super().__init__()
 442
 443     @abstractmethod
 444     def fit(self, X_train, y_train, weights=None):
 445         pass
 446
 447     @abstractmethod
 448     def predict(self, X):
 449         pass
 450
 451     def run_model(self, dict0):
 452         # Make copy to prevent changing in place
 453         dict1 = copy.deepcopy(dict0)
 454         # Extract Fields
 455         X_train, y_train, X_test, y_test = dict1['X_train'].copy(), dict1['y_train'].copy(), dict1["X_test"].copy(), dict1['y_test'].copy()
 456         if 'X_val' in dict1:
 457             X_val, y_val = dict1['X_val'].copy(), dict1['y_val'].copy()
 458         else:
 459             X_val = None
 460         case_id = dict1['case']
 461         # Fit model
 462         if X_val is None:
 463             self.fit(X_train, y_train)
 464         else:
 465             self.fit(X_train, y_train, validation_data = (X_val, y_val))
 466         # Generate Predictions,
 467         # run through training to get hidden state set proporly for forecast period
 468         if X_val is None:
 469             X = np.concatenate((X_train, X_test))
 470             y = np.concatenate((y_train, y_test)).flatten()
 471         else:
 472             X = np.concatenate((X_train, X_val, X_test))
 473             y = np.concatenate((y_train, y_val, y_test)).flatten()
 474         # Predict
 475         print(f"Predicting Training through Test \n features hash: {hash2(X)} \n response hash: {hash2(y)} ")
 476         m = self.predict(X).flatten()
 477         dict1['m']=m
 478         dict0['m']=m # add to outside env dictionary, should be only place this happens
 479         if self.params['scale']:
 480             print(f"Rescaling data using {self.params['scaler']}")
 481             if self.params['scaler'] == "reproducibility":
 482                 m  *= self.params['scale_fm']
 483                 y  *= self.params['scale_fm']
 484                 y_train *= self.params['scale_fm']
 485                 y_test *= self.params['scale_fm']
 486         # Check Reproducibility, TODO: old dict calls it hidden_units not rnn_units, so this doens't check that
 487         if (case_id == "reproducibility") and compare_dicts(self.params, repro_hashes['params'], ['epochs', 'batch_size', 'scale', 'activation', 'learning_rate']):
 488             print("Checking Reproducibility")
 489             checkm = m[350]
 490             hv = hash2(self.model_predict.get_weights())
 491             if self.params['phys_initialize']:
 492                 hv5 = repro_hashes['phys_initialize']['fitted_weight_hash']
 493                 mv = repro_hashes['phys_initialize']['predictions_hash']
 494             else:
 495                 hv5 = repro_hashes['rand_initialize']['fitted_weight_hash']
 496                 mv = repro_hashes['rand_initialize']['predictions_hash']
 497
 498             print(f"Fitted weights hash (check 5): {hv}, Reproducibility weights hash: {hv5}, Error: {hv5-hv}")
 499             print(f"Model predictions hash: {checkm}, Reproducibility preds hash: {mv}, Error: {mv-checkm}")
 500
 501         print("*******DEBUG*******")
 502         # print(dict1.keys())
 503         # Plot final fit and data
 504         # TODO: make plot_data specific to this context
 505         dict1['y'] = y
 506         plot_data(dict1, title="RNN", title2=dict1['case'])
 507
 508         # Calculate Errors
 509         err = rmse(m, y)
 510         train_ind = dict1["train_ind"] # index of final training set value
 511         test_ind = dict1["test_ind"] # index of first test set value
 512         err_train = rmse(m[:train_ind], y_train.flatten())
 513         err_pred = rmse(m[test_ind:], y_test.flatten())
 514         rmse_dict = {
 515             'all': err,
 516             'training': err_train,
 517             'prediction': err_pred
 518         }
 519         return rmse_dict
 520
 521 class ResetStatesCallback(Callback):
 522     def on_epoch_end(self, epoch, logs=None):
 523         self.model.reset_states()
 524
 525
 526 class RNN(RNNModel):
 527     def __init__(self, params, loss='mean_squared_error'):
 528         super().__init__(params)
 529         self.model_train = self._build_model_train()
 530         self.model_predict = self._build_model_predict()
 531
 532     def _build_model_train(self, return_sequences=False):
 533         inputs = tf.keras.Input(batch_shape=self.params['batch_shape'])
 534         x = inputs
 535         for i in range(self.params['rnn_layers']):
 536             x = SimpleRNN(
 537                 units=self.params['rnn_units'],
 538                 activation=self.params['activation'][0],
 539                 dropout=self.params["dropout"][0],
 540                 stateful=self.params['stateful'],
 541                 return_sequences=return_sequences)(x)
 542         if self.params["dropout"][1] > 0:
 543             x = Dropout(self.params["dropout"][1])(x)
 544         for i in range(self.params['dense_layers']):
 545             x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
 546         model = tf.keras.Model(inputs=inputs, outputs=x)
 547         optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
 548         model.compile(loss='mean_squared_error', optimizer=optimizer)
 549
 550         if self.params["verbose_weights"]:
 551             print(f"Initial Weights Hash: {hash2(model.get_weights())}")
 552         return model
 553     def _build_model_predict(self, return_sequences=True):
 554
 555         inputs = tf.keras.Input(shape=self.params['pred_input_shape'])
 556         x = inputs
 557         for i in range(self.params['rnn_layers']):
 558             x = SimpleRNN(self.params['rnn_units'],activation=self.params['activation'][0],
 559                   stateful=False,return_sequences=return_sequences)(x)
 560         for i in range(self.params['dense_layers']):
 561             x = Dense(self.params['dense_units'], activation=self.params['activation'][1])(x)
 562         model = tf.keras.Model(inputs=inputs, outputs=x)
 563         optimizer=tf.keras.optimizers.Adam(learning_rate=self.params['learning_rate'])
 564         model.compile(loss='mean_squared_error', optimizer=optimizer)
 565
 566         # Set Weights to model_train
 567         w_fitted = self.model_train.get_weights()
 568         model.set_weights(w_fitted)
 569
 570         return model
 571     def format_train_data(self, X, y, verbose=False):
 572         X, y = staircase_2(X, y, timesteps = self.params["timesteps"], batch_size=self.params["batch_size"], verbose=verbose)
 573         return X, y
 574     def format_pred_data(self, X):
 575         return np.reshape(X,(1, X.shape[0], self.params['features']))
 576     def fit(self, X_train, y_train, plot=True, plot_title = '',
 577             weights=None, callbacks=[], verbose_fit=None, validation_data=None, *args, **kwargs):
 578         # verbose_fit argument is for printing out update after each epoch, which gets very long
 579         # These print statements at the top could be turned off with a verbose argument, but then
 580         # there would be a bunch of different verbose params
 581         print(f"Training simple RNN with params: {self.params}")
 582         X_train, y_train = self.format_train_data(X_train, y_train)
 583         print(f"X_train hash: {hash2(X_train)}")
 584         print(f"y_train hash: {hash2(y_train)}")
 585         if validation_data is not None:
 586             X_val, y_val = self.format_train_data(validation_data[0], validation_data[1])
 587             print(f"X_val hash: {hash2(X_val)}")
 588             print(f"y_val hash: {hash2(y_val)}")
 589         print(f"Initial weights before training hash: {hash2(self.model_train.get_weights())}")
 590         # Setup callbacks
 591         if self.params["reset_states"]:
 592             callbacks=callbacks+[ResetStatesCallback()]
 593
 594         # Note: we overload the params here so that verbose_fit can be easily turned on/off at the .fit call
 595         if verbose_fit is None:
 596             verbose_fit = self.params['verbose_fit']
 597         # Evaluate Model once to set nonzero initial state
 598         if self.params["batch_size"]>= X_train.shape[0]:
 599             self.model_train(X_train)
 600         if validation_data is not None:
 601             history = self.model_train.fit(
 602                 X_train, y_train+self.params['centering'][1],
 603                 epochs=self.params['epochs'],
 604                 batch_size=self.params['batch_size'],
 605                 callbacks = callbacks,
 606                 verbose=verbose_fit,
 607                 validation_data = (X_val, y_val),
 608                 *args, **kwargs
 609             )
 610         else:
 611             history = self.model_train.fit(
 612                 X_train, y_train+self.params['centering'][1],
 613                 epochs=self.params['epochs'],
 614                 batch_size=self.params['batch_size'],
 615                 callbacks = callbacks,
 616                 verbose=verbose_fit,
 617                 *args, **kwargs
 618             )
 619         if plot:
 620             self.plot_history(history,plot_title)
 621         if self.params["verbose_weights"]:
 622             print(f"Fitted Weights Hash: {hash2(self.model_train.get_weights())}")
 623
 624         # Update Weights for Prediction Model
 625         w_fitted = self.model_train.get_weights()
 626         self.model_predict.set_weights(w_fitted)
 627     def predict(self, X_test):
 628         print("Predicting with simple RNN")
 629         X_test = self.format_pred_data(X_test)
 630         preds = self.model_predict.predict(X_test).flatten()
 631         return preds
 632
 633
 634     def plot_history(self, history, plot_title):
 635         plt.figure()
 636         plt.semilogy(history.history['loss'], label='Training loss')
 637         if 'val_loss' in history.history:
 638             plt.semilogy(history.history['val_loss'], label='Validation loss')
 639         plt.title(f'{plot_title} Model loss')
 640         plt.ylabel('Loss')
 641         plt.xlabel('Epoch')
 642         plt.legend(loc='upper left')
 643         plt.show()
 644
 645
 646